diff options
468 files changed, 18099 insertions, 7686 deletions
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index a0d479d1e1dd..f66f4df18690 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl | |||
@@ -1645,7 +1645,9 @@ the amount of locking which needs to be done. | |||
1645 | all the readers who were traversing the list when we deleted the | 1645 | all the readers who were traversing the list when we deleted the |
1646 | element are finished. We use <function>call_rcu()</function> to | 1646 | element are finished. We use <function>call_rcu()</function> to |
1647 | register a callback which will actually destroy the object once | 1647 | register a callback which will actually destroy the object once |
1648 | the readers are finished. | 1648 | all pre-existing readers are finished. Alternatively, |
1649 | <function>synchronize_rcu()</function> may be used to block until | ||
1650 | all pre-existing are finished. | ||
1649 | </para> | 1651 | </para> |
1650 | <para> | 1652 | <para> |
1651 | But how does Read Copy Update know when the readers are | 1653 | But how does Read Copy Update know when the readers are |
@@ -1714,7 +1716,7 @@ the amount of locking which needs to be done. | |||
1714 | - object_put(obj); | 1716 | - object_put(obj); |
1715 | + list_del_rcu(&obj->list); | 1717 | + list_del_rcu(&obj->list); |
1716 | cache_num--; | 1718 | cache_num--; |
1717 | + call_rcu(&obj->rcu, cache_delete_rcu, obj); | 1719 | + call_rcu(&obj->rcu, cache_delete_rcu); |
1718 | } | 1720 | } |
1719 | 1721 | ||
1720 | /* Must be holding cache_lock */ | 1722 | /* Must be holding cache_lock */ |
@@ -1725,14 +1727,6 @@ the amount of locking which needs to be done. | |||
1725 | if (++cache_num > MAX_CACHE_SIZE) { | 1727 | if (++cache_num > MAX_CACHE_SIZE) { |
1726 | struct object *i, *outcast = NULL; | 1728 | struct object *i, *outcast = NULL; |
1727 | list_for_each_entry(i, &cache, list) { | 1729 | list_for_each_entry(i, &cache, list) { |
1728 | @@ -85,6 +94,7 @@ | ||
1729 | obj->popularity = 0; | ||
1730 | atomic_set(&obj->refcnt, 1); /* The cache holds a reference */ | ||
1731 | spin_lock_init(&obj->lock); | ||
1732 | + INIT_RCU_HEAD(&obj->rcu); | ||
1733 | |||
1734 | spin_lock_irqsave(&cache_lock, flags); | ||
1735 | __cache_add(obj); | ||
1736 | @@ -104,12 +114,11 @@ | 1730 | @@ -104,12 +114,11 @@ |
1737 | struct object *cache_find(int id) | 1731 | struct object *cache_find(int id) |
1738 | { | 1732 | { |
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 790d1a812376..0c134f8afc6f 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt | |||
@@ -218,13 +218,22 @@ over a rather long period of time, but improvements are always welcome! | |||
218 | include: | 218 | include: |
219 | 219 | ||
220 | a. Keeping a count of the number of data-structure elements | 220 | a. Keeping a count of the number of data-structure elements |
221 | used by the RCU-protected data structure, including those | 221 | used by the RCU-protected data structure, including |
222 | waiting for a grace period to elapse. Enforce a limit | 222 | those waiting for a grace period to elapse. Enforce a |
223 | on this number, stalling updates as needed to allow | 223 | limit on this number, stalling updates as needed to allow |
224 | previously deferred frees to complete. | 224 | previously deferred frees to complete. Alternatively, |
225 | 225 | limit only the number awaiting deferred free rather than | |
226 | Alternatively, limit only the number awaiting deferred | 226 | the total number of elements. |
227 | free rather than the total number of elements. | 227 | |
228 | One way to stall the updates is to acquire the update-side | ||
229 | mutex. (Don't try this with a spinlock -- other CPUs | ||
230 | spinning on the lock could prevent the grace period | ||
231 | from ever ending.) Another way to stall the updates | ||
232 | is for the updates to use a wrapper function around | ||
233 | the memory allocator, so that this wrapper function | ||
234 | simulates OOM when there is too much memory awaiting an | ||
235 | RCU grace period. There are of course many other | ||
236 | variations on this theme. | ||
228 | 237 | ||
229 | b. Limiting update rate. For example, if updates occur only | 238 | b. Limiting update rate. For example, if updates occur only |
230 | once per hour, then no explicit rate limiting is required, | 239 | once per hour, then no explicit rate limiting is required, |
@@ -365,3 +374,26 @@ over a rather long period of time, but improvements are always welcome! | |||
365 | and the compiler to freely reorder code into and out of RCU | 374 | and the compiler to freely reorder code into and out of RCU |
366 | read-side critical sections. It is the responsibility of the | 375 | read-side critical sections. It is the responsibility of the |
367 | RCU update-side primitives to deal with this. | 376 | RCU update-side primitives to deal with this. |
377 | |||
378 | 17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and | ||
379 | the __rcu sparse checks to validate your RCU code. These | ||
380 | can help find problems as follows: | ||
381 | |||
382 | CONFIG_PROVE_RCU: check that accesses to RCU-protected data | ||
383 | structures are carried out under the proper RCU | ||
384 | read-side critical section, while holding the right | ||
385 | combination of locks, or whatever other conditions | ||
386 | are appropriate. | ||
387 | |||
388 | CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the | ||
389 | same object to call_rcu() (or friends) before an RCU | ||
390 | grace period has elapsed since the last time that you | ||
391 | passed that same object to call_rcu() (or friends). | ||
392 | |||
393 | __rcu sparse checks: tag the pointer to the RCU-protected data | ||
394 | structure with __rcu, and sparse will warn you if you | ||
395 | access that pointer without the services of one of the | ||
396 | variants of rcu_dereference(). | ||
397 | |||
398 | These debugging aids can help you find problems that are | ||
399 | otherwise extremely difficult to spot. | ||
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 44c6dcc93d6d..862c08ef1fde 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt | |||
@@ -80,6 +80,24 @@ o A CPU looping with bottom halves disabled. This condition can | |||
80 | o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel | 80 | o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel |
81 | without invoking schedule(). | 81 | without invoking schedule(). |
82 | 82 | ||
83 | o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might | ||
84 | happen to preempt a low-priority task in the middle of an RCU | ||
85 | read-side critical section. This is especially damaging if | ||
86 | that low-priority task is not permitted to run on any other CPU, | ||
87 | in which case the next RCU grace period can never complete, which | ||
88 | will eventually cause the system to run out of memory and hang. | ||
89 | While the system is in the process of running itself out of | ||
90 | memory, you might see stall-warning messages. | ||
91 | |||
92 | o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that | ||
93 | is running at a higher priority than the RCU softirq threads. | ||
94 | This will prevent RCU callbacks from ever being invoked, | ||
95 | and in a CONFIG_TREE_PREEMPT_RCU kernel will further prevent | ||
96 | RCU grace periods from ever completing. Either way, the | ||
97 | system will eventually run out of memory and hang. In the | ||
98 | CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning | ||
99 | messages. | ||
100 | |||
83 | o A bug in the RCU implementation. | 101 | o A bug in the RCU implementation. |
84 | 102 | ||
85 | o A hardware failure. This is quite unlikely, but has occurred | 103 | o A hardware failure. This is quite unlikely, but has occurred |
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index efd8cc95c06b..a851118775d8 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
@@ -125,6 +125,17 @@ o "b" is the batch limit for this CPU. If more than this number | |||
125 | of RCU callbacks is ready to invoke, then the remainder will | 125 | of RCU callbacks is ready to invoke, then the remainder will |
126 | be deferred. | 126 | be deferred. |
127 | 127 | ||
128 | o "ci" is the number of RCU callbacks that have been invoked for | ||
129 | this CPU. Note that ci+ql is the number of callbacks that have | ||
130 | been registered in absence of CPU-hotplug activity. | ||
131 | |||
132 | o "co" is the number of RCU callbacks that have been orphaned due to | ||
133 | this CPU going offline. | ||
134 | |||
135 | o "ca" is the number of RCU callbacks that have been adopted due to | ||
136 | other CPUs going offline. Note that ci+co-ca+ql is the number of | ||
137 | RCU callbacks registered on this CPU. | ||
138 | |||
128 | There is also an rcu/rcudata.csv file with the same information in | 139 | There is also an rcu/rcudata.csv file with the same information in |
129 | comma-separated-variable spreadsheet format. | 140 | comma-separated-variable spreadsheet format. |
130 | 141 | ||
@@ -180,7 +191,7 @@ o "s" is the "signaled" state that drives force_quiescent_state()'s | |||
180 | 191 | ||
181 | o "jfq" is the number of jiffies remaining for this grace period | 192 | o "jfq" is the number of jiffies remaining for this grace period |
182 | before force_quiescent_state() is invoked to help push things | 193 | before force_quiescent_state() is invoked to help push things |
183 | along. Note that CPUs in dyntick-idle mode thoughout the grace | 194 | along. Note that CPUs in dyntick-idle mode throughout the grace |
184 | period will not report on their own, but rather must be check by | 195 | period will not report on their own, but rather must be check by |
185 | some other CPU via force_quiescent_state(). | 196 | some other CPU via force_quiescent_state(). |
186 | 197 | ||
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 1762b81fcdf2..741fe66d6eca 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt | |||
@@ -542,9 +542,11 @@ Kprobes does not use mutexes or allocate memory except during | |||
542 | registration and unregistration. | 542 | registration and unregistration. |
543 | 543 | ||
544 | Probe handlers are run with preemption disabled. Depending on the | 544 | Probe handlers are run with preemption disabled. Depending on the |
545 | architecture, handlers may also run with interrupts disabled. In any | 545 | architecture and optimization state, handlers may also run with |
546 | case, your handler should not yield the CPU (e.g., by attempting to | 546 | interrupts disabled (e.g., kretprobe handlers and optimized kprobe |
547 | acquire a semaphore). | 547 | handlers run without interrupt disabled on x86/x86-64). In any case, |
548 | your handler should not yield the CPU (e.g., by attempting to acquire | ||
549 | a semaphore). | ||
548 | 550 | ||
549 | Since a return probe is implemented by replacing the return | 551 | Since a return probe is implemented by replacing the return |
550 | address with the trampoline's address, stack backtraces and calls | 552 | address with the trampoline's address, stack backtraces and calls |
diff --git a/MAINTAINERS b/MAINTAINERS index 7679bf32f7bb..3d4179fbc526 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -1527,6 +1527,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git | |||
1527 | S: Supported | 1527 | S: Supported |
1528 | F: Documentation/filesystems/ceph.txt | 1528 | F: Documentation/filesystems/ceph.txt |
1529 | F: fs/ceph | 1529 | F: fs/ceph |
1530 | F: net/ceph | ||
1531 | F: include/linux/ceph | ||
1530 | 1532 | ||
1531 | CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: | 1533 | CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: |
1532 | M: David Vrabel <david.vrabel@csr.com> | 1534 | M: David Vrabel <david.vrabel@csr.com> |
@@ -3162,7 +3164,7 @@ F: drivers/net/ioc3-eth.c | |||
3162 | 3164 | ||
3163 | IOC3 SERIAL DRIVER | 3165 | IOC3 SERIAL DRIVER |
3164 | M: Pat Gefre <pfg@sgi.com> | 3166 | M: Pat Gefre <pfg@sgi.com> |
3165 | L: linux-mips@linux-mips.org | 3167 | L: linux-serial@vger.kernel.org |
3166 | S: Maintained | 3168 | S: Maintained |
3167 | F: drivers/serial/ioc3_serial.c | 3169 | F: drivers/serial/ioc3_serial.c |
3168 | 3170 | ||
@@ -4805,6 +4807,15 @@ F: fs/qnx4/ | |||
4805 | F: include/linux/qnx4_fs.h | 4807 | F: include/linux/qnx4_fs.h |
4806 | F: include/linux/qnxtypes.h | 4808 | F: include/linux/qnxtypes.h |
4807 | 4809 | ||
4810 | RADOS BLOCK DEVICE (RBD) | ||
4811 | F: include/linux/qnxtypes.h | ||
4812 | M: Yehuda Sadeh <yehuda@hq.newdream.net> | ||
4813 | M: Sage Weil <sage@newdream.net> | ||
4814 | M: ceph-devel@vger.kernel.org | ||
4815 | S: Supported | ||
4816 | F: drivers/block/rbd.c | ||
4817 | F: drivers/block/rbd_types.h | ||
4818 | |||
4808 | RADEON FRAMEBUFFER DISPLAY DRIVER | 4819 | RADEON FRAMEBUFFER DISPLAY DRIVER |
4809 | M: Benjamin Herrenschmidt <benh@kernel.crashing.org> | 4820 | M: Benjamin Herrenschmidt <benh@kernel.crashing.org> |
4810 | L: linux-fbdev@vger.kernel.org | 4821 | L: linux-fbdev@vger.kernel.org |
@@ -1,8 +1,8 @@ | |||
1 | VERSION = 2 | 1 | VERSION = 2 |
2 | PATCHLEVEL = 6 | 2 | PATCHLEVEL = 6 |
3 | SUBLEVEL = 36 | 3 | SUBLEVEL = 36 |
4 | EXTRAVERSION = -rc7 | 4 | EXTRAVERSION = |
5 | NAME = Sheep on Meth | 5 | NAME = Flesh-Eating Bats with Fangs |
6 | 6 | ||
7 | # *DOCUMENTATION* | 7 | # *DOCUMENTATION* |
8 | # To see a list of typical targets execute "make help" | 8 | # To see a list of typical targets execute "make help" |
@@ -568,6 +568,12 @@ endif | |||
568 | 568 | ||
569 | ifdef CONFIG_FUNCTION_TRACER | 569 | ifdef CONFIG_FUNCTION_TRACER |
570 | KBUILD_CFLAGS += -pg | 570 | KBUILD_CFLAGS += -pg |
571 | ifdef CONFIG_DYNAMIC_FTRACE | ||
572 | ifdef CONFIG_HAVE_C_RECORDMCOUNT | ||
573 | BUILD_C_RECORDMCOUNT := y | ||
574 | export BUILD_C_RECORDMCOUNT | ||
575 | endif | ||
576 | endif | ||
571 | endif | 577 | endif |
572 | 578 | ||
573 | # We trigger additional mismatches with less inlining | 579 | # We trigger additional mismatches with less inlining |
@@ -591,6 +597,11 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) | |||
591 | # conserve stack if available | 597 | # conserve stack if available |
592 | KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) | 598 | KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) |
593 | 599 | ||
600 | # check for 'asm goto' | ||
601 | ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) | ||
602 | KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO | ||
603 | endif | ||
604 | |||
594 | # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments | 605 | # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments |
595 | # But warn user when we do so | 606 | # But warn user when we do so |
596 | warn-assign = \ | 607 | warn-assign = \ |
diff --git a/arch/Kconfig b/arch/Kconfig index fe48fc7a3eba..53d7f619a1b9 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI | |||
158 | subsystem. Also has support for calculating CPU cycle events | 158 | subsystem. Also has support for calculating CPU cycle events |
159 | to determine how many clock cycles in a given period. | 159 | to determine how many clock cycles in a given period. |
160 | 160 | ||
161 | config HAVE_ARCH_JUMP_LABEL | ||
162 | bool | ||
163 | |||
161 | source "kernel/gcov/Kconfig" | 164 | source "kernel/gcov/Kconfig" |
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index b9647bb66d13..d04ccd73af45 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig | |||
@@ -9,6 +9,7 @@ config ALPHA | |||
9 | select HAVE_IDE | 9 | select HAVE_IDE |
10 | select HAVE_OPROFILE | 10 | select HAVE_OPROFILE |
11 | select HAVE_SYSCALL_WRAPPERS | 11 | select HAVE_SYSCALL_WRAPPERS |
12 | select HAVE_IRQ_WORK | ||
12 | select HAVE_PERF_EVENTS | 13 | select HAVE_PERF_EVENTS |
13 | select HAVE_DMA_ATTRS | 14 | select HAVE_DMA_ATTRS |
14 | help | 15 | help |
diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h index 4157cd3c44a9..fe792ca818f6 100644 --- a/arch/alpha/include/asm/perf_event.h +++ b/arch/alpha/include/asm/perf_event.h | |||
@@ -1,11 +1,6 @@ | |||
1 | #ifndef __ASM_ALPHA_PERF_EVENT_H | 1 | #ifndef __ASM_ALPHA_PERF_EVENT_H |
2 | #define __ASM_ALPHA_PERF_EVENT_H | 2 | #define __ASM_ALPHA_PERF_EVENT_H |
3 | 3 | ||
4 | /* Alpha only supports software events through this interface. */ | ||
5 | extern void set_perf_event_pending(void); | ||
6 | |||
7 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
8 | |||
9 | #ifdef CONFIG_PERF_EVENTS | 4 | #ifdef CONFIG_PERF_EVENTS |
10 | extern void init_hw_perf_events(void); | 5 | extern void init_hw_perf_events(void); |
11 | #else | 6 | #else |
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 85d8e4f58c83..1cc49683fb69 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c | |||
@@ -307,7 +307,7 @@ again: | |||
307 | new_raw_count) != prev_raw_count) | 307 | new_raw_count) != prev_raw_count) |
308 | goto again; | 308 | goto again; |
309 | 309 | ||
310 | delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; | 310 | delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; |
311 | 311 | ||
312 | /* It is possible on very rare occasions that the PMC has overflowed | 312 | /* It is possible on very rare occasions that the PMC has overflowed |
313 | * but the interrupt is yet to come. Detect and fix this situation. | 313 | * but the interrupt is yet to come. Detect and fix this situation. |
@@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) | |||
402 | struct hw_perf_event *hwc = &pe->hw; | 402 | struct hw_perf_event *hwc = &pe->hw; |
403 | int idx = hwc->idx; | 403 | int idx = hwc->idx; |
404 | 404 | ||
405 | if (cpuc->current_idx[j] != PMC_NO_INDEX) { | 405 | if (cpuc->current_idx[j] == PMC_NO_INDEX) { |
406 | cpuc->idx_mask |= (1<<cpuc->current_idx[j]); | 406 | alpha_perf_event_set_period(pe, hwc, idx); |
407 | continue; | 407 | cpuc->current_idx[j] = idx; |
408 | } | 408 | } |
409 | 409 | ||
410 | alpha_perf_event_set_period(pe, hwc, idx); | 410 | if (!(hwc->state & PERF_HES_STOPPED)) |
411 | cpuc->current_idx[j] = idx; | 411 | cpuc->idx_mask |= (1<<cpuc->current_idx[j]); |
412 | cpuc->idx_mask |= (1<<cpuc->current_idx[j]); | ||
413 | } | 412 | } |
414 | cpuc->config = cpuc->event[0]->hw.config_base; | 413 | cpuc->config = cpuc->event[0]->hw.config_base; |
415 | } | 414 | } |
@@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) | |||
420 | * - this function is called from outside this module via the pmu struct | 419 | * - this function is called from outside this module via the pmu struct |
421 | * returned from perf event initialisation. | 420 | * returned from perf event initialisation. |
422 | */ | 421 | */ |
423 | static int alpha_pmu_enable(struct perf_event *event) | 422 | static int alpha_pmu_add(struct perf_event *event, int flags) |
424 | { | 423 | { |
425 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 424 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
425 | struct hw_perf_event *hwc = &event->hw; | ||
426 | int n0; | 426 | int n0; |
427 | int ret; | 427 | int ret; |
428 | unsigned long flags; | 428 | unsigned long irq_flags; |
429 | 429 | ||
430 | /* | 430 | /* |
431 | * The Sparc code has the IRQ disable first followed by the perf | 431 | * The Sparc code has the IRQ disable first followed by the perf |
@@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event) | |||
435 | * nevertheless we disable the PMCs first to enable a potential | 435 | * nevertheless we disable the PMCs first to enable a potential |
436 | * final PMI to occur before we disable interrupts. | 436 | * final PMI to occur before we disable interrupts. |
437 | */ | 437 | */ |
438 | perf_disable(); | 438 | perf_pmu_disable(event->pmu); |
439 | local_irq_save(flags); | 439 | local_irq_save(irq_flags); |
440 | 440 | ||
441 | /* Default to error to be returned */ | 441 | /* Default to error to be returned */ |
442 | ret = -EAGAIN; | 442 | ret = -EAGAIN; |
@@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event) | |||
455 | } | 455 | } |
456 | } | 456 | } |
457 | 457 | ||
458 | local_irq_restore(flags); | 458 | hwc->state = PERF_HES_UPTODATE; |
459 | perf_enable(); | 459 | if (!(flags & PERF_EF_START)) |
460 | hwc->state |= PERF_HES_STOPPED; | ||
461 | |||
462 | local_irq_restore(irq_flags); | ||
463 | perf_pmu_enable(event->pmu); | ||
460 | 464 | ||
461 | return ret; | 465 | return ret; |
462 | } | 466 | } |
@@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event) | |||
467 | * - this function is called from outside this module via the pmu struct | 471 | * - this function is called from outside this module via the pmu struct |
468 | * returned from perf event initialisation. | 472 | * returned from perf event initialisation. |
469 | */ | 473 | */ |
470 | static void alpha_pmu_disable(struct perf_event *event) | 474 | static void alpha_pmu_del(struct perf_event *event, int flags) |
471 | { | 475 | { |
472 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 476 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
473 | struct hw_perf_event *hwc = &event->hw; | 477 | struct hw_perf_event *hwc = &event->hw; |
474 | unsigned long flags; | 478 | unsigned long irq_flags; |
475 | int j; | 479 | int j; |
476 | 480 | ||
477 | perf_disable(); | 481 | perf_pmu_disable(event->pmu); |
478 | local_irq_save(flags); | 482 | local_irq_save(irq_flags); |
479 | 483 | ||
480 | for (j = 0; j < cpuc->n_events; j++) { | 484 | for (j = 0; j < cpuc->n_events; j++) { |
481 | if (event == cpuc->event[j]) { | 485 | if (event == cpuc->event[j]) { |
@@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event) | |||
501 | } | 505 | } |
502 | } | 506 | } |
503 | 507 | ||
504 | local_irq_restore(flags); | 508 | local_irq_restore(irq_flags); |
505 | perf_enable(); | 509 | perf_pmu_enable(event->pmu); |
506 | } | 510 | } |
507 | 511 | ||
508 | 512 | ||
@@ -514,13 +518,44 @@ static void alpha_pmu_read(struct perf_event *event) | |||
514 | } | 518 | } |
515 | 519 | ||
516 | 520 | ||
517 | static void alpha_pmu_unthrottle(struct perf_event *event) | 521 | static void alpha_pmu_stop(struct perf_event *event, int flags) |
522 | { | ||
523 | struct hw_perf_event *hwc = &event->hw; | ||
524 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
525 | |||
526 | if (!(hwc->state & PERF_HES_STOPPED)) { | ||
527 | cpuc->idx_mask &= ~(1UL<<hwc->idx); | ||
528 | hwc->state |= PERF_HES_STOPPED; | ||
529 | } | ||
530 | |||
531 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
532 | alpha_perf_event_update(event, hwc, hwc->idx, 0); | ||
533 | hwc->state |= PERF_HES_UPTODATE; | ||
534 | } | ||
535 | |||
536 | if (cpuc->enabled) | ||
537 | wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx)); | ||
538 | } | ||
539 | |||
540 | |||
541 | static void alpha_pmu_start(struct perf_event *event, int flags) | ||
518 | { | 542 | { |
519 | struct hw_perf_event *hwc = &event->hw; | 543 | struct hw_perf_event *hwc = &event->hw; |
520 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 544 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
521 | 545 | ||
546 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) | ||
547 | return; | ||
548 | |||
549 | if (flags & PERF_EF_RELOAD) { | ||
550 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | ||
551 | alpha_perf_event_set_period(event, hwc, hwc->idx); | ||
552 | } | ||
553 | |||
554 | hwc->state = 0; | ||
555 | |||
522 | cpuc->idx_mask |= 1UL<<hwc->idx; | 556 | cpuc->idx_mask |= 1UL<<hwc->idx; |
523 | wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); | 557 | if (cpuc->enabled) |
558 | wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); | ||
524 | } | 559 | } |
525 | 560 | ||
526 | 561 | ||
@@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
642 | return 0; | 677 | return 0; |
643 | } | 678 | } |
644 | 679 | ||
645 | static const struct pmu pmu = { | ||
646 | .enable = alpha_pmu_enable, | ||
647 | .disable = alpha_pmu_disable, | ||
648 | .read = alpha_pmu_read, | ||
649 | .unthrottle = alpha_pmu_unthrottle, | ||
650 | }; | ||
651 | |||
652 | |||
653 | /* | 680 | /* |
654 | * Main entry point to initialise a HW performance event. | 681 | * Main entry point to initialise a HW performance event. |
655 | */ | 682 | */ |
656 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 683 | static int alpha_pmu_event_init(struct perf_event *event) |
657 | { | 684 | { |
658 | int err; | 685 | int err; |
659 | 686 | ||
687 | switch (event->attr.type) { | ||
688 | case PERF_TYPE_RAW: | ||
689 | case PERF_TYPE_HARDWARE: | ||
690 | case PERF_TYPE_HW_CACHE: | ||
691 | break; | ||
692 | |||
693 | default: | ||
694 | return -ENOENT; | ||
695 | } | ||
696 | |||
660 | if (!alpha_pmu) | 697 | if (!alpha_pmu) |
661 | return ERR_PTR(-ENODEV); | 698 | return -ENODEV; |
662 | 699 | ||
663 | /* Do the real initialisation work. */ | 700 | /* Do the real initialisation work. */ |
664 | err = __hw_perf_event_init(event); | 701 | err = __hw_perf_event_init(event); |
665 | 702 | ||
666 | if (err) | 703 | return err; |
667 | return ERR_PTR(err); | ||
668 | |||
669 | return &pmu; | ||
670 | } | 704 | } |
671 | 705 | ||
672 | |||
673 | |||
674 | /* | 706 | /* |
675 | * Main entry point - enable HW performance counters. | 707 | * Main entry point - enable HW performance counters. |
676 | */ | 708 | */ |
677 | void hw_perf_enable(void) | 709 | static void alpha_pmu_enable(struct pmu *pmu) |
678 | { | 710 | { |
679 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 711 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
680 | 712 | ||
@@ -700,7 +732,7 @@ void hw_perf_enable(void) | |||
700 | * Main entry point - disable HW performance counters. | 732 | * Main entry point - disable HW performance counters. |
701 | */ | 733 | */ |
702 | 734 | ||
703 | void hw_perf_disable(void) | 735 | static void alpha_pmu_disable(struct pmu *pmu) |
704 | { | 736 | { |
705 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 737 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
706 | 738 | ||
@@ -713,6 +745,17 @@ void hw_perf_disable(void) | |||
713 | wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); | 745 | wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); |
714 | } | 746 | } |
715 | 747 | ||
748 | static struct pmu pmu = { | ||
749 | .pmu_enable = alpha_pmu_enable, | ||
750 | .pmu_disable = alpha_pmu_disable, | ||
751 | .event_init = alpha_pmu_event_init, | ||
752 | .add = alpha_pmu_add, | ||
753 | .del = alpha_pmu_del, | ||
754 | .start = alpha_pmu_start, | ||
755 | .stop = alpha_pmu_stop, | ||
756 | .read = alpha_pmu_read, | ||
757 | }; | ||
758 | |||
716 | 759 | ||
717 | /* | 760 | /* |
718 | * Main entry point - don't know when this is called but it | 761 | * Main entry point - don't know when this is called but it |
@@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, | |||
766 | wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); | 809 | wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); |
767 | 810 | ||
768 | /* la_ptr is the counter that overflowed. */ | 811 | /* la_ptr is the counter that overflowed. */ |
769 | if (unlikely(la_ptr >= perf_max_events)) { | 812 | if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) { |
770 | /* This should never occur! */ | 813 | /* This should never occur! */ |
771 | irq_err_count++; | 814 | irq_err_count++; |
772 | pr_warning("PMI: silly index %ld\n", la_ptr); | 815 | pr_warning("PMI: silly index %ld\n", la_ptr); |
@@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, | |||
807 | /* Interrupts coming too quickly; "throttle" the | 850 | /* Interrupts coming too quickly; "throttle" the |
808 | * counter, i.e., disable it for a little while. | 851 | * counter, i.e., disable it for a little while. |
809 | */ | 852 | */ |
810 | cpuc->idx_mask &= ~(1UL<<idx); | 853 | alpha_pmu_stop(event, 0); |
811 | } | 854 | } |
812 | } | 855 | } |
813 | wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); | 856 | wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); |
@@ -837,6 +880,7 @@ void __init init_hw_perf_events(void) | |||
837 | 880 | ||
838 | /* And set up PMU specification */ | 881 | /* And set up PMU specification */ |
839 | alpha_pmu = &ev67_pmu; | 882 | alpha_pmu = &ev67_pmu; |
840 | perf_max_events = alpha_pmu->num_pmcs; | 883 | |
884 | perf_pmu_register(&pmu); | ||
841 | } | 885 | } |
842 | 886 | ||
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 396af1799ea4..0f1d8493cfca 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c | |||
@@ -41,7 +41,7 @@ | |||
41 | #include <linux/init.h> | 41 | #include <linux/init.h> |
42 | #include <linux/bcd.h> | 42 | #include <linux/bcd.h> |
43 | #include <linux/profile.h> | 43 | #include <linux/profile.h> |
44 | #include <linux/perf_event.h> | 44 | #include <linux/irq_work.h> |
45 | 45 | ||
46 | #include <asm/uaccess.h> | 46 | #include <asm/uaccess.h> |
47 | #include <asm/io.h> | 47 | #include <asm/io.h> |
@@ -83,25 +83,25 @@ static struct { | |||
83 | 83 | ||
84 | unsigned long est_cycle_freq; | 84 | unsigned long est_cycle_freq; |
85 | 85 | ||
86 | #ifdef CONFIG_PERF_EVENTS | 86 | #ifdef CONFIG_IRQ_WORK |
87 | 87 | ||
88 | DEFINE_PER_CPU(u8, perf_event_pending); | 88 | DEFINE_PER_CPU(u8, irq_work_pending); |
89 | 89 | ||
90 | #define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 | 90 | #define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 |
91 | #define test_perf_event_pending() __get_cpu_var(perf_event_pending) | 91 | #define test_irq_work_pending() __get_cpu_var(irq_work_pending) |
92 | #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 | 92 | #define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 |
93 | 93 | ||
94 | void set_perf_event_pending(void) | 94 | void set_irq_work_pending(void) |
95 | { | 95 | { |
96 | set_perf_event_pending_flag(); | 96 | set_irq_work_pending_flag(); |
97 | } | 97 | } |
98 | 98 | ||
99 | #else /* CONFIG_PERF_EVENTS */ | 99 | #else /* CONFIG_IRQ_WORK */ |
100 | 100 | ||
101 | #define test_perf_event_pending() 0 | 101 | #define test_irq_work_pending() 0 |
102 | #define clear_perf_event_pending() | 102 | #define clear_irq_work_pending() |
103 | 103 | ||
104 | #endif /* CONFIG_PERF_EVENTS */ | 104 | #endif /* CONFIG_IRQ_WORK */ |
105 | 105 | ||
106 | 106 | ||
107 | static inline __u32 rpcc(void) | 107 | static inline __u32 rpcc(void) |
@@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev) | |||
191 | 191 | ||
192 | write_sequnlock(&xtime_lock); | 192 | write_sequnlock(&xtime_lock); |
193 | 193 | ||
194 | if (test_perf_event_pending()) { | 194 | if (test_irq_work_pending()) { |
195 | clear_perf_event_pending(); | 195 | clear_irq_work_pending(); |
196 | perf_event_do_pending(); | 196 | irq_work_run(); |
197 | } | 197 | } |
198 | 198 | ||
199 | #ifndef CONFIG_SMP | 199 | #ifndef CONFIG_SMP |
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9c26ba7244fb..9103904b3dab 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -23,6 +23,7 @@ config ARM | |||
23 | select HAVE_KERNEL_GZIP | 23 | select HAVE_KERNEL_GZIP |
24 | select HAVE_KERNEL_LZO | 24 | select HAVE_KERNEL_LZO |
25 | select HAVE_KERNEL_LZMA | 25 | select HAVE_KERNEL_LZMA |
26 | select HAVE_IRQ_WORK | ||
26 | select HAVE_PERF_EVENTS | 27 | select HAVE_PERF_EVENTS |
27 | select PERF_USE_VMALLOC | 28 | select PERF_USE_VMALLOC |
28 | select HAVE_REGS_AND_STACK_ACCESS_API | 29 | select HAVE_REGS_AND_STACK_ACCESS_API |
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index b5799a3b7117..c4aa4e8c6af9 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h | |||
@@ -12,18 +12,6 @@ | |||
12 | #ifndef __ARM_PERF_EVENT_H__ | 12 | #ifndef __ARM_PERF_EVENT_H__ |
13 | #define __ARM_PERF_EVENT_H__ | 13 | #define __ARM_PERF_EVENT_H__ |
14 | 14 | ||
15 | /* | ||
16 | * NOP: on *most* (read: all supported) ARM platforms, the performance | ||
17 | * counter interrupts are regular interrupts and not an NMI. This | ||
18 | * means that when we receive the interrupt we can call | ||
19 | * perf_event_do_pending() that handles all of the work with | ||
20 | * interrupts disabled. | ||
21 | */ | ||
22 | static inline void | ||
23 | set_perf_event_pending(void) | ||
24 | { | ||
25 | } | ||
26 | |||
27 | /* ARM performance counters start from 1 (in the cp15 accesses) so use the | 15 | /* ARM performance counters start from 1 (in the cp15 accesses) so use the |
28 | * same indexes here for consistency. */ | 16 | * same indexes here for consistency. */ |
29 | #define PERF_EVENT_INDEX_OFFSET 1 | 17 | #define PERF_EVENT_INDEX_OFFSET 1 |
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ecbb0288e5dd..49643b1467e6 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -123,6 +123,12 @@ armpmu_get_max_events(void) | |||
123 | } | 123 | } |
124 | EXPORT_SYMBOL_GPL(armpmu_get_max_events); | 124 | EXPORT_SYMBOL_GPL(armpmu_get_max_events); |
125 | 125 | ||
126 | int perf_num_counters(void) | ||
127 | { | ||
128 | return armpmu_get_max_events(); | ||
129 | } | ||
130 | EXPORT_SYMBOL_GPL(perf_num_counters); | ||
131 | |||
126 | #define HW_OP_UNSUPPORTED 0xFFFF | 132 | #define HW_OP_UNSUPPORTED 0xFFFF |
127 | 133 | ||
128 | #define C(_x) \ | 134 | #define C(_x) \ |
@@ -221,46 +227,56 @@ again: | |||
221 | } | 227 | } |
222 | 228 | ||
223 | static void | 229 | static void |
224 | armpmu_disable(struct perf_event *event) | 230 | armpmu_read(struct perf_event *event) |
225 | { | 231 | { |
226 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
227 | struct hw_perf_event *hwc = &event->hw; | 232 | struct hw_perf_event *hwc = &event->hw; |
228 | int idx = hwc->idx; | ||
229 | |||
230 | WARN_ON(idx < 0); | ||
231 | |||
232 | clear_bit(idx, cpuc->active_mask); | ||
233 | armpmu->disable(hwc, idx); | ||
234 | |||
235 | barrier(); | ||
236 | 233 | ||
237 | armpmu_event_update(event, hwc, idx); | 234 | /* Don't read disabled counters! */ |
238 | cpuc->events[idx] = NULL; | 235 | if (hwc->idx < 0) |
239 | clear_bit(idx, cpuc->used_mask); | 236 | return; |
240 | 237 | ||
241 | perf_event_update_userpage(event); | 238 | armpmu_event_update(event, hwc, hwc->idx); |
242 | } | 239 | } |
243 | 240 | ||
244 | static void | 241 | static void |
245 | armpmu_read(struct perf_event *event) | 242 | armpmu_stop(struct perf_event *event, int flags) |
246 | { | 243 | { |
247 | struct hw_perf_event *hwc = &event->hw; | 244 | struct hw_perf_event *hwc = &event->hw; |
248 | 245 | ||
249 | /* Don't read disabled counters! */ | 246 | if (!armpmu) |
250 | if (hwc->idx < 0) | ||
251 | return; | 247 | return; |
252 | 248 | ||
253 | armpmu_event_update(event, hwc, hwc->idx); | 249 | /* |
250 | * ARM pmu always has to update the counter, so ignore | ||
251 | * PERF_EF_UPDATE, see comments in armpmu_start(). | ||
252 | */ | ||
253 | if (!(hwc->state & PERF_HES_STOPPED)) { | ||
254 | armpmu->disable(hwc, hwc->idx); | ||
255 | barrier(); /* why? */ | ||
256 | armpmu_event_update(event, hwc, hwc->idx); | ||
257 | hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
258 | } | ||
254 | } | 259 | } |
255 | 260 | ||
256 | static void | 261 | static void |
257 | armpmu_unthrottle(struct perf_event *event) | 262 | armpmu_start(struct perf_event *event, int flags) |
258 | { | 263 | { |
259 | struct hw_perf_event *hwc = &event->hw; | 264 | struct hw_perf_event *hwc = &event->hw; |
260 | 265 | ||
266 | if (!armpmu) | ||
267 | return; | ||
268 | |||
269 | /* | ||
270 | * ARM pmu always has to reprogram the period, so ignore | ||
271 | * PERF_EF_RELOAD, see the comment below. | ||
272 | */ | ||
273 | if (flags & PERF_EF_RELOAD) | ||
274 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | ||
275 | |||
276 | hwc->state = 0; | ||
261 | /* | 277 | /* |
262 | * Set the period again. Some counters can't be stopped, so when we | 278 | * Set the period again. Some counters can't be stopped, so when we |
263 | * were throttled we simply disabled the IRQ source and the counter | 279 | * were stopped we simply disabled the IRQ source and the counter |
264 | * may have been left counting. If we don't do this step then we may | 280 | * may have been left counting. If we don't do this step then we may |
265 | * get an interrupt too soon or *way* too late if the overflow has | 281 | * get an interrupt too soon or *way* too late if the overflow has |
266 | * happened since disabling. | 282 | * happened since disabling. |
@@ -269,14 +285,33 @@ armpmu_unthrottle(struct perf_event *event) | |||
269 | armpmu->enable(hwc, hwc->idx); | 285 | armpmu->enable(hwc, hwc->idx); |
270 | } | 286 | } |
271 | 287 | ||
288 | static void | ||
289 | armpmu_del(struct perf_event *event, int flags) | ||
290 | { | ||
291 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
292 | struct hw_perf_event *hwc = &event->hw; | ||
293 | int idx = hwc->idx; | ||
294 | |||
295 | WARN_ON(idx < 0); | ||
296 | |||
297 | clear_bit(idx, cpuc->active_mask); | ||
298 | armpmu_stop(event, PERF_EF_UPDATE); | ||
299 | cpuc->events[idx] = NULL; | ||
300 | clear_bit(idx, cpuc->used_mask); | ||
301 | |||
302 | perf_event_update_userpage(event); | ||
303 | } | ||
304 | |||
272 | static int | 305 | static int |
273 | armpmu_enable(struct perf_event *event) | 306 | armpmu_add(struct perf_event *event, int flags) |
274 | { | 307 | { |
275 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 308 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
276 | struct hw_perf_event *hwc = &event->hw; | 309 | struct hw_perf_event *hwc = &event->hw; |
277 | int idx; | 310 | int idx; |
278 | int err = 0; | 311 | int err = 0; |
279 | 312 | ||
313 | perf_pmu_disable(event->pmu); | ||
314 | |||
280 | /* If we don't have a space for the counter then finish early. */ | 315 | /* If we don't have a space for the counter then finish early. */ |
281 | idx = armpmu->get_event_idx(cpuc, hwc); | 316 | idx = armpmu->get_event_idx(cpuc, hwc); |
282 | if (idx < 0) { | 317 | if (idx < 0) { |
@@ -293,25 +328,19 @@ armpmu_enable(struct perf_event *event) | |||
293 | cpuc->events[idx] = event; | 328 | cpuc->events[idx] = event; |
294 | set_bit(idx, cpuc->active_mask); | 329 | set_bit(idx, cpuc->active_mask); |
295 | 330 | ||
296 | /* Set the period for the event. */ | 331 | hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; |
297 | armpmu_event_set_period(event, hwc, idx); | 332 | if (flags & PERF_EF_START) |
298 | 333 | armpmu_start(event, PERF_EF_RELOAD); | |
299 | /* Enable the event. */ | ||
300 | armpmu->enable(hwc, idx); | ||
301 | 334 | ||
302 | /* Propagate our changes to the userspace mapping. */ | 335 | /* Propagate our changes to the userspace mapping. */ |
303 | perf_event_update_userpage(event); | 336 | perf_event_update_userpage(event); |
304 | 337 | ||
305 | out: | 338 | out: |
339 | perf_pmu_enable(event->pmu); | ||
306 | return err; | 340 | return err; |
307 | } | 341 | } |
308 | 342 | ||
309 | static struct pmu pmu = { | 343 | static struct pmu pmu; |
310 | .enable = armpmu_enable, | ||
311 | .disable = armpmu_disable, | ||
312 | .unthrottle = armpmu_unthrottle, | ||
313 | .read = armpmu_read, | ||
314 | }; | ||
315 | 344 | ||
316 | static int | 345 | static int |
317 | validate_event(struct cpu_hw_events *cpuc, | 346 | validate_event(struct cpu_hw_events *cpuc, |
@@ -491,20 +520,29 @@ __hw_perf_event_init(struct perf_event *event) | |||
491 | return err; | 520 | return err; |
492 | } | 521 | } |
493 | 522 | ||
494 | const struct pmu * | 523 | static int armpmu_event_init(struct perf_event *event) |
495 | hw_perf_event_init(struct perf_event *event) | ||
496 | { | 524 | { |
497 | int err = 0; | 525 | int err = 0; |
498 | 526 | ||
527 | switch (event->attr.type) { | ||
528 | case PERF_TYPE_RAW: | ||
529 | case PERF_TYPE_HARDWARE: | ||
530 | case PERF_TYPE_HW_CACHE: | ||
531 | break; | ||
532 | |||
533 | default: | ||
534 | return -ENOENT; | ||
535 | } | ||
536 | |||
499 | if (!armpmu) | 537 | if (!armpmu) |
500 | return ERR_PTR(-ENODEV); | 538 | return -ENODEV; |
501 | 539 | ||
502 | event->destroy = hw_perf_event_destroy; | 540 | event->destroy = hw_perf_event_destroy; |
503 | 541 | ||
504 | if (!atomic_inc_not_zero(&active_events)) { | 542 | if (!atomic_inc_not_zero(&active_events)) { |
505 | if (atomic_read(&active_events) > perf_max_events) { | 543 | if (atomic_read(&active_events) > armpmu->num_events) { |
506 | atomic_dec(&active_events); | 544 | atomic_dec(&active_events); |
507 | return ERR_PTR(-ENOSPC); | 545 | return -ENOSPC; |
508 | } | 546 | } |
509 | 547 | ||
510 | mutex_lock(&pmu_reserve_mutex); | 548 | mutex_lock(&pmu_reserve_mutex); |
@@ -518,17 +556,16 @@ hw_perf_event_init(struct perf_event *event) | |||
518 | } | 556 | } |
519 | 557 | ||
520 | if (err) | 558 | if (err) |
521 | return ERR_PTR(err); | 559 | return err; |
522 | 560 | ||
523 | err = __hw_perf_event_init(event); | 561 | err = __hw_perf_event_init(event); |
524 | if (err) | 562 | if (err) |
525 | hw_perf_event_destroy(event); | 563 | hw_perf_event_destroy(event); |
526 | 564 | ||
527 | return err ? ERR_PTR(err) : &pmu; | 565 | return err; |
528 | } | 566 | } |
529 | 567 | ||
530 | void | 568 | static void armpmu_enable(struct pmu *pmu) |
531 | hw_perf_enable(void) | ||
532 | { | 569 | { |
533 | /* Enable all of the perf events on hardware. */ | 570 | /* Enable all of the perf events on hardware. */ |
534 | int idx; | 571 | int idx; |
@@ -549,13 +586,23 @@ hw_perf_enable(void) | |||
549 | armpmu->start(); | 586 | armpmu->start(); |
550 | } | 587 | } |
551 | 588 | ||
552 | void | 589 | static void armpmu_disable(struct pmu *pmu) |
553 | hw_perf_disable(void) | ||
554 | { | 590 | { |
555 | if (armpmu) | 591 | if (armpmu) |
556 | armpmu->stop(); | 592 | armpmu->stop(); |
557 | } | 593 | } |
558 | 594 | ||
595 | static struct pmu pmu = { | ||
596 | .pmu_enable = armpmu_enable, | ||
597 | .pmu_disable = armpmu_disable, | ||
598 | .event_init = armpmu_event_init, | ||
599 | .add = armpmu_add, | ||
600 | .del = armpmu_del, | ||
601 | .start = armpmu_start, | ||
602 | .stop = armpmu_stop, | ||
603 | .read = armpmu_read, | ||
604 | }; | ||
605 | |||
559 | /* | 606 | /* |
560 | * ARMv6 Performance counter handling code. | 607 | * ARMv6 Performance counter handling code. |
561 | * | 608 | * |
@@ -1045,7 +1092,7 @@ armv6pmu_handle_irq(int irq_num, | |||
1045 | * platforms that can have the PMU interrupts raised as an NMI, this | 1092 | * platforms that can have the PMU interrupts raised as an NMI, this |
1046 | * will not work. | 1093 | * will not work. |
1047 | */ | 1094 | */ |
1048 | perf_event_do_pending(); | 1095 | irq_work_run(); |
1049 | 1096 | ||
1050 | return IRQ_HANDLED; | 1097 | return IRQ_HANDLED; |
1051 | } | 1098 | } |
@@ -2021,7 +2068,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) | |||
2021 | * platforms that can have the PMU interrupts raised as an NMI, this | 2068 | * platforms that can have the PMU interrupts raised as an NMI, this |
2022 | * will not work. | 2069 | * will not work. |
2023 | */ | 2070 | */ |
2024 | perf_event_do_pending(); | 2071 | irq_work_run(); |
2025 | 2072 | ||
2026 | return IRQ_HANDLED; | 2073 | return IRQ_HANDLED; |
2027 | } | 2074 | } |
@@ -2389,7 +2436,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev) | |||
2389 | armpmu->disable(hwc, idx); | 2436 | armpmu->disable(hwc, idx); |
2390 | } | 2437 | } |
2391 | 2438 | ||
2392 | perf_event_do_pending(); | 2439 | irq_work_run(); |
2393 | 2440 | ||
2394 | /* | 2441 | /* |
2395 | * Re-enable the PMU. | 2442 | * Re-enable the PMU. |
@@ -2716,7 +2763,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev) | |||
2716 | armpmu->disable(hwc, idx); | 2763 | armpmu->disable(hwc, idx); |
2717 | } | 2764 | } |
2718 | 2765 | ||
2719 | perf_event_do_pending(); | 2766 | irq_work_run(); |
2720 | 2767 | ||
2721 | /* | 2768 | /* |
2722 | * Re-enable the PMU. | 2769 | * Re-enable the PMU. |
@@ -2933,14 +2980,12 @@ init_hw_perf_events(void) | |||
2933 | armpmu = &armv6pmu; | 2980 | armpmu = &armv6pmu; |
2934 | memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, | 2981 | memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, |
2935 | sizeof(armv6_perf_cache_map)); | 2982 | sizeof(armv6_perf_cache_map)); |
2936 | perf_max_events = armv6pmu.num_events; | ||
2937 | break; | 2983 | break; |
2938 | case 0xB020: /* ARM11mpcore */ | 2984 | case 0xB020: /* ARM11mpcore */ |
2939 | armpmu = &armv6mpcore_pmu; | 2985 | armpmu = &armv6mpcore_pmu; |
2940 | memcpy(armpmu_perf_cache_map, | 2986 | memcpy(armpmu_perf_cache_map, |
2941 | armv6mpcore_perf_cache_map, | 2987 | armv6mpcore_perf_cache_map, |
2942 | sizeof(armv6mpcore_perf_cache_map)); | 2988 | sizeof(armv6mpcore_perf_cache_map)); |
2943 | perf_max_events = armv6mpcore_pmu.num_events; | ||
2944 | break; | 2989 | break; |
2945 | case 0xC080: /* Cortex-A8 */ | 2990 | case 0xC080: /* Cortex-A8 */ |
2946 | armv7pmu.id = ARM_PERF_PMU_ID_CA8; | 2991 | armv7pmu.id = ARM_PERF_PMU_ID_CA8; |
@@ -2952,7 +2997,6 @@ init_hw_perf_events(void) | |||
2952 | /* Reset PMNC and read the nb of CNTx counters | 2997 | /* Reset PMNC and read the nb of CNTx counters |
2953 | supported */ | 2998 | supported */ |
2954 | armv7pmu.num_events = armv7_reset_read_pmnc(); | 2999 | armv7pmu.num_events = armv7_reset_read_pmnc(); |
2955 | perf_max_events = armv7pmu.num_events; | ||
2956 | break; | 3000 | break; |
2957 | case 0xC090: /* Cortex-A9 */ | 3001 | case 0xC090: /* Cortex-A9 */ |
2958 | armv7pmu.id = ARM_PERF_PMU_ID_CA9; | 3002 | armv7pmu.id = ARM_PERF_PMU_ID_CA9; |
@@ -2964,7 +3008,6 @@ init_hw_perf_events(void) | |||
2964 | /* Reset PMNC and read the nb of CNTx counters | 3008 | /* Reset PMNC and read the nb of CNTx counters |
2965 | supported */ | 3009 | supported */ |
2966 | armv7pmu.num_events = armv7_reset_read_pmnc(); | 3010 | armv7pmu.num_events = armv7_reset_read_pmnc(); |
2967 | perf_max_events = armv7pmu.num_events; | ||
2968 | break; | 3011 | break; |
2969 | } | 3012 | } |
2970 | /* Intel CPUs [xscale]. */ | 3013 | /* Intel CPUs [xscale]. */ |
@@ -2975,13 +3018,11 @@ init_hw_perf_events(void) | |||
2975 | armpmu = &xscale1pmu; | 3018 | armpmu = &xscale1pmu; |
2976 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, | 3019 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, |
2977 | sizeof(xscale_perf_cache_map)); | 3020 | sizeof(xscale_perf_cache_map)); |
2978 | perf_max_events = xscale1pmu.num_events; | ||
2979 | break; | 3021 | break; |
2980 | case 2: | 3022 | case 2: |
2981 | armpmu = &xscale2pmu; | 3023 | armpmu = &xscale2pmu; |
2982 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, | 3024 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, |
2983 | sizeof(xscale_perf_cache_map)); | 3025 | sizeof(xscale_perf_cache_map)); |
2984 | perf_max_events = xscale2pmu.num_events; | ||
2985 | break; | 3026 | break; |
2986 | } | 3027 | } |
2987 | } | 3028 | } |
@@ -2991,9 +3032,10 @@ init_hw_perf_events(void) | |||
2991 | arm_pmu_names[armpmu->id], armpmu->num_events); | 3032 | arm_pmu_names[armpmu->id], armpmu->num_events); |
2992 | } else { | 3033 | } else { |
2993 | pr_info("no hardware support available\n"); | 3034 | pr_info("no hardware support available\n"); |
2994 | perf_max_events = -1; | ||
2995 | } | 3035 | } |
2996 | 3036 | ||
3037 | perf_pmu_register(&pmu); | ||
3038 | |||
2997 | return 0; | 3039 | return 0; |
2998 | } | 3040 | } |
2999 | arch_initcall(init_hw_perf_events); | 3041 | arch_initcall(init_hw_perf_events); |
@@ -3001,13 +3043,6 @@ arch_initcall(init_hw_perf_events); | |||
3001 | /* | 3043 | /* |
3002 | * Callchain handling code. | 3044 | * Callchain handling code. |
3003 | */ | 3045 | */ |
3004 | static inline void | ||
3005 | callchain_store(struct perf_callchain_entry *entry, | ||
3006 | u64 ip) | ||
3007 | { | ||
3008 | if (entry->nr < PERF_MAX_STACK_DEPTH) | ||
3009 | entry->ip[entry->nr++] = ip; | ||
3010 | } | ||
3011 | 3046 | ||
3012 | /* | 3047 | /* |
3013 | * The registers we're interested in are at the end of the variable | 3048 | * The registers we're interested in are at the end of the variable |
@@ -3039,7 +3074,7 @@ user_backtrace(struct frame_tail *tail, | |||
3039 | if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) | 3074 | if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) |
3040 | return NULL; | 3075 | return NULL; |
3041 | 3076 | ||
3042 | callchain_store(entry, buftail.lr); | 3077 | perf_callchain_store(entry, buftail.lr); |
3043 | 3078 | ||
3044 | /* | 3079 | /* |
3045 | * Frame pointers should strictly progress back up the stack | 3080 | * Frame pointers should strictly progress back up the stack |
@@ -3051,16 +3086,11 @@ user_backtrace(struct frame_tail *tail, | |||
3051 | return buftail.fp - 1; | 3086 | return buftail.fp - 1; |
3052 | } | 3087 | } |
3053 | 3088 | ||
3054 | static void | 3089 | void |
3055 | perf_callchain_user(struct pt_regs *regs, | 3090 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) |
3056 | struct perf_callchain_entry *entry) | ||
3057 | { | 3091 | { |
3058 | struct frame_tail *tail; | 3092 | struct frame_tail *tail; |
3059 | 3093 | ||
3060 | callchain_store(entry, PERF_CONTEXT_USER); | ||
3061 | |||
3062 | if (!user_mode(regs)) | ||
3063 | regs = task_pt_regs(current); | ||
3064 | 3094 | ||
3065 | tail = (struct frame_tail *)regs->ARM_fp - 1; | 3095 | tail = (struct frame_tail *)regs->ARM_fp - 1; |
3066 | 3096 | ||
@@ -3078,56 +3108,18 @@ callchain_trace(struct stackframe *fr, | |||
3078 | void *data) | 3108 | void *data) |
3079 | { | 3109 | { |
3080 | struct perf_callchain_entry *entry = data; | 3110 | struct perf_callchain_entry *entry = data; |
3081 | callchain_store(entry, fr->pc); | 3111 | perf_callchain_store(entry, fr->pc); |
3082 | return 0; | 3112 | return 0; |
3083 | } | 3113 | } |
3084 | 3114 | ||
3085 | static void | 3115 | void |
3086 | perf_callchain_kernel(struct pt_regs *regs, | 3116 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) |
3087 | struct perf_callchain_entry *entry) | ||
3088 | { | 3117 | { |
3089 | struct stackframe fr; | 3118 | struct stackframe fr; |
3090 | 3119 | ||
3091 | callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
3092 | fr.fp = regs->ARM_fp; | 3120 | fr.fp = regs->ARM_fp; |
3093 | fr.sp = regs->ARM_sp; | 3121 | fr.sp = regs->ARM_sp; |
3094 | fr.lr = regs->ARM_lr; | 3122 | fr.lr = regs->ARM_lr; |
3095 | fr.pc = regs->ARM_pc; | 3123 | fr.pc = regs->ARM_pc; |
3096 | walk_stackframe(&fr, callchain_trace, entry); | 3124 | walk_stackframe(&fr, callchain_trace, entry); |
3097 | } | 3125 | } |
3098 | |||
3099 | static void | ||
3100 | perf_do_callchain(struct pt_regs *regs, | ||
3101 | struct perf_callchain_entry *entry) | ||
3102 | { | ||
3103 | int is_user; | ||
3104 | |||
3105 | if (!regs) | ||
3106 | return; | ||
3107 | |||
3108 | is_user = user_mode(regs); | ||
3109 | |||
3110 | if (!current || !current->pid) | ||
3111 | return; | ||
3112 | |||
3113 | if (is_user && current->state != TASK_RUNNING) | ||
3114 | return; | ||
3115 | |||
3116 | if (!is_user) | ||
3117 | perf_callchain_kernel(regs, entry); | ||
3118 | |||
3119 | if (current->mm) | ||
3120 | perf_callchain_user(regs, entry); | ||
3121 | } | ||
3122 | |||
3123 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | ||
3124 | |||
3125 | struct perf_callchain_entry * | ||
3126 | perf_callchain(struct pt_regs *regs) | ||
3127 | { | ||
3128 | struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); | ||
3129 | |||
3130 | entry->nr = 0; | ||
3131 | perf_do_callchain(regs, entry); | ||
3132 | return entry; | ||
3133 | } | ||
diff --git a/arch/arm/mach-bcmring/dma.c b/arch/arm/mach-bcmring/dma.c index 29c0a911df26..77eb35c89cd0 100644 --- a/arch/arm/mach-bcmring/dma.c +++ b/arch/arm/mach-bcmring/dma.c | |||
@@ -691,7 +691,7 @@ int dma_init(void) | |||
691 | 691 | ||
692 | memset(&gDMA, 0, sizeof(gDMA)); | 692 | memset(&gDMA, 0, sizeof(gDMA)); |
693 | 693 | ||
694 | init_MUTEX_LOCKED(&gDMA.lock); | 694 | sema_init(&gDMA.lock, 0); |
695 | init_waitqueue_head(&gDMA.freeChannelQ); | 695 | init_waitqueue_head(&gDMA.freeChannelQ); |
696 | 696 | ||
697 | /* Initialize the Hardware */ | 697 | /* Initialize the Hardware */ |
@@ -1574,7 +1574,7 @@ int dma_init_mem_map(DMA_MemMap_t *memMap) | |||
1574 | { | 1574 | { |
1575 | memset(memMap, 0, sizeof(*memMap)); | 1575 | memset(memMap, 0, sizeof(*memMap)); |
1576 | 1576 | ||
1577 | init_MUTEX(&memMap->lock); | 1577 | sema_init(&memMap->lock, 1); |
1578 | 1578 | ||
1579 | return 0; | 1579 | return 0; |
1580 | } | 1580 | } |
diff --git a/arch/arm/oprofile/Makefile b/arch/arm/oprofile/Makefile index e666eafed152..b2215c61cdf0 100644 --- a/arch/arm/oprofile/Makefile +++ b/arch/arm/oprofile/Makefile | |||
@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ | |||
6 | oprofilefs.o oprofile_stats.o \ | 6 | oprofilefs.o oprofile_stats.o \ |
7 | timer_int.o ) | 7 | timer_int.o ) |
8 | 8 | ||
9 | ifeq ($(CONFIG_HW_PERF_EVENTS),y) | ||
10 | DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o) | ||
11 | endif | ||
12 | |||
9 | oprofile-y := $(DRIVER_OBJS) common.o | 13 | oprofile-y := $(DRIVER_OBJS) common.o |
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 72e09eb642dd..8aa974491dfc 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c | |||
@@ -25,139 +25,10 @@ | |||
25 | #include <asm/ptrace.h> | 25 | #include <asm/ptrace.h> |
26 | 26 | ||
27 | #ifdef CONFIG_HW_PERF_EVENTS | 27 | #ifdef CONFIG_HW_PERF_EVENTS |
28 | /* | 28 | char *op_name_from_perf_id(void) |
29 | * Per performance monitor configuration as set via oprofilefs. | ||
30 | */ | ||
31 | struct op_counter_config { | ||
32 | unsigned long count; | ||
33 | unsigned long enabled; | ||
34 | unsigned long event; | ||
35 | unsigned long unit_mask; | ||
36 | unsigned long kernel; | ||
37 | unsigned long user; | ||
38 | struct perf_event_attr attr; | ||
39 | }; | ||
40 | |||
41 | static int op_arm_enabled; | ||
42 | static DEFINE_MUTEX(op_arm_mutex); | ||
43 | |||
44 | static struct op_counter_config *counter_config; | ||
45 | static struct perf_event **perf_events[nr_cpumask_bits]; | ||
46 | static int perf_num_counters; | ||
47 | |||
48 | /* | ||
49 | * Overflow callback for oprofile. | ||
50 | */ | ||
51 | static void op_overflow_handler(struct perf_event *event, int unused, | ||
52 | struct perf_sample_data *data, struct pt_regs *regs) | ||
53 | { | 29 | { |
54 | int id; | 30 | enum arm_perf_pmu_ids id = armpmu_get_pmu_id(); |
55 | u32 cpu = smp_processor_id(); | ||
56 | |||
57 | for (id = 0; id < perf_num_counters; ++id) | ||
58 | if (perf_events[cpu][id] == event) | ||
59 | break; | ||
60 | |||
61 | if (id != perf_num_counters) | ||
62 | oprofile_add_sample(regs, id); | ||
63 | else | ||
64 | pr_warning("oprofile: ignoring spurious overflow " | ||
65 | "on cpu %u\n", cpu); | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * Called by op_arm_setup to create perf attributes to mirror the oprofile | ||
70 | * settings in counter_config. Attributes are created as `pinned' events and | ||
71 | * so are permanently scheduled on the PMU. | ||
72 | */ | ||
73 | static void op_perf_setup(void) | ||
74 | { | ||
75 | int i; | ||
76 | u32 size = sizeof(struct perf_event_attr); | ||
77 | struct perf_event_attr *attr; | ||
78 | |||
79 | for (i = 0; i < perf_num_counters; ++i) { | ||
80 | attr = &counter_config[i].attr; | ||
81 | memset(attr, 0, size); | ||
82 | attr->type = PERF_TYPE_RAW; | ||
83 | attr->size = size; | ||
84 | attr->config = counter_config[i].event; | ||
85 | attr->sample_period = counter_config[i].count; | ||
86 | attr->pinned = 1; | ||
87 | } | ||
88 | } | ||
89 | |||
90 | static int op_create_counter(int cpu, int event) | ||
91 | { | ||
92 | int ret = 0; | ||
93 | struct perf_event *pevent; | ||
94 | |||
95 | if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL)) | ||
96 | return ret; | ||
97 | |||
98 | pevent = perf_event_create_kernel_counter(&counter_config[event].attr, | ||
99 | cpu, -1, | ||
100 | op_overflow_handler); | ||
101 | |||
102 | if (IS_ERR(pevent)) { | ||
103 | ret = PTR_ERR(pevent); | ||
104 | } else if (pevent->state != PERF_EVENT_STATE_ACTIVE) { | ||
105 | perf_event_release_kernel(pevent); | ||
106 | pr_warning("oprofile: failed to enable event %d " | ||
107 | "on CPU %d\n", event, cpu); | ||
108 | ret = -EBUSY; | ||
109 | } else { | ||
110 | perf_events[cpu][event] = pevent; | ||
111 | } | ||
112 | |||
113 | return ret; | ||
114 | } | ||
115 | 31 | ||
116 | static void op_destroy_counter(int cpu, int event) | ||
117 | { | ||
118 | struct perf_event *pevent = perf_events[cpu][event]; | ||
119 | |||
120 | if (pevent) { | ||
121 | perf_event_release_kernel(pevent); | ||
122 | perf_events[cpu][event] = NULL; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * Called by op_arm_start to create active perf events based on the | ||
128 | * perviously configured attributes. | ||
129 | */ | ||
130 | static int op_perf_start(void) | ||
131 | { | ||
132 | int cpu, event, ret = 0; | ||
133 | |||
134 | for_each_online_cpu(cpu) { | ||
135 | for (event = 0; event < perf_num_counters; ++event) { | ||
136 | ret = op_create_counter(cpu, event); | ||
137 | if (ret) | ||
138 | goto out; | ||
139 | } | ||
140 | } | ||
141 | |||
142 | out: | ||
143 | return ret; | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | * Called by op_arm_stop at the end of a profiling run. | ||
148 | */ | ||
149 | static void op_perf_stop(void) | ||
150 | { | ||
151 | int cpu, event; | ||
152 | |||
153 | for_each_online_cpu(cpu) | ||
154 | for (event = 0; event < perf_num_counters; ++event) | ||
155 | op_destroy_counter(cpu, event); | ||
156 | } | ||
157 | |||
158 | |||
159 | static char *op_name_from_perf_id(enum arm_perf_pmu_ids id) | ||
160 | { | ||
161 | switch (id) { | 32 | switch (id) { |
162 | case ARM_PERF_PMU_ID_XSCALE1: | 33 | case ARM_PERF_PMU_ID_XSCALE1: |
163 | return "arm/xscale1"; | 34 | return "arm/xscale1"; |
@@ -176,116 +47,6 @@ static char *op_name_from_perf_id(enum arm_perf_pmu_ids id) | |||
176 | } | 47 | } |
177 | } | 48 | } |
178 | 49 | ||
179 | static int op_arm_create_files(struct super_block *sb, struct dentry *root) | ||
180 | { | ||
181 | unsigned int i; | ||
182 | |||
183 | for (i = 0; i < perf_num_counters; i++) { | ||
184 | struct dentry *dir; | ||
185 | char buf[4]; | ||
186 | |||
187 | snprintf(buf, sizeof buf, "%d", i); | ||
188 | dir = oprofilefs_mkdir(sb, root, buf); | ||
189 | oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); | ||
190 | oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); | ||
191 | oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); | ||
192 | oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); | ||
193 | oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); | ||
194 | oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); | ||
195 | } | ||
196 | |||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | static int op_arm_setup(void) | ||
201 | { | ||
202 | spin_lock(&oprofilefs_lock); | ||
203 | op_perf_setup(); | ||
204 | spin_unlock(&oprofilefs_lock); | ||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | static int op_arm_start(void) | ||
209 | { | ||
210 | int ret = -EBUSY; | ||
211 | |||
212 | mutex_lock(&op_arm_mutex); | ||
213 | if (!op_arm_enabled) { | ||
214 | ret = 0; | ||
215 | op_perf_start(); | ||
216 | op_arm_enabled = 1; | ||
217 | } | ||
218 | mutex_unlock(&op_arm_mutex); | ||
219 | return ret; | ||
220 | } | ||
221 | |||
222 | static void op_arm_stop(void) | ||
223 | { | ||
224 | mutex_lock(&op_arm_mutex); | ||
225 | if (op_arm_enabled) | ||
226 | op_perf_stop(); | ||
227 | op_arm_enabled = 0; | ||
228 | mutex_unlock(&op_arm_mutex); | ||
229 | } | ||
230 | |||
231 | #ifdef CONFIG_PM | ||
232 | static int op_arm_suspend(struct platform_device *dev, pm_message_t state) | ||
233 | { | ||
234 | mutex_lock(&op_arm_mutex); | ||
235 | if (op_arm_enabled) | ||
236 | op_perf_stop(); | ||
237 | mutex_unlock(&op_arm_mutex); | ||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | static int op_arm_resume(struct platform_device *dev) | ||
242 | { | ||
243 | mutex_lock(&op_arm_mutex); | ||
244 | if (op_arm_enabled && op_perf_start()) | ||
245 | op_arm_enabled = 0; | ||
246 | mutex_unlock(&op_arm_mutex); | ||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | static struct platform_driver oprofile_driver = { | ||
251 | .driver = { | ||
252 | .name = "arm-oprofile", | ||
253 | }, | ||
254 | .resume = op_arm_resume, | ||
255 | .suspend = op_arm_suspend, | ||
256 | }; | ||
257 | |||
258 | static struct platform_device *oprofile_pdev; | ||
259 | |||
260 | static int __init init_driverfs(void) | ||
261 | { | ||
262 | int ret; | ||
263 | |||
264 | ret = platform_driver_register(&oprofile_driver); | ||
265 | if (ret) | ||
266 | goto out; | ||
267 | |||
268 | oprofile_pdev = platform_device_register_simple( | ||
269 | oprofile_driver.driver.name, 0, NULL, 0); | ||
270 | if (IS_ERR(oprofile_pdev)) { | ||
271 | ret = PTR_ERR(oprofile_pdev); | ||
272 | platform_driver_unregister(&oprofile_driver); | ||
273 | } | ||
274 | |||
275 | out: | ||
276 | return ret; | ||
277 | } | ||
278 | |||
279 | static void exit_driverfs(void) | ||
280 | { | ||
281 | platform_device_unregister(oprofile_pdev); | ||
282 | platform_driver_unregister(&oprofile_driver); | ||
283 | } | ||
284 | #else | ||
285 | static int __init init_driverfs(void) { return 0; } | ||
286 | #define exit_driverfs() do { } while (0) | ||
287 | #endif /* CONFIG_PM */ | ||
288 | |||
289 | static int report_trace(struct stackframe *frame, void *d) | 50 | static int report_trace(struct stackframe *frame, void *d) |
290 | { | 51 | { |
291 | unsigned int *depth = d; | 52 | unsigned int *depth = d; |
@@ -350,74 +111,14 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth) | |||
350 | 111 | ||
351 | int __init oprofile_arch_init(struct oprofile_operations *ops) | 112 | int __init oprofile_arch_init(struct oprofile_operations *ops) |
352 | { | 113 | { |
353 | int cpu, ret = 0; | ||
354 | |||
355 | perf_num_counters = armpmu_get_max_events(); | ||
356 | |||
357 | counter_config = kcalloc(perf_num_counters, | ||
358 | sizeof(struct op_counter_config), GFP_KERNEL); | ||
359 | |||
360 | if (!counter_config) { | ||
361 | pr_info("oprofile: failed to allocate %d " | ||
362 | "counters\n", perf_num_counters); | ||
363 | return -ENOMEM; | ||
364 | } | ||
365 | |||
366 | ret = init_driverfs(); | ||
367 | if (ret) { | ||
368 | kfree(counter_config); | ||
369 | counter_config = NULL; | ||
370 | return ret; | ||
371 | } | ||
372 | |||
373 | for_each_possible_cpu(cpu) { | ||
374 | perf_events[cpu] = kcalloc(perf_num_counters, | ||
375 | sizeof(struct perf_event *), GFP_KERNEL); | ||
376 | if (!perf_events[cpu]) { | ||
377 | pr_info("oprofile: failed to allocate %d perf events " | ||
378 | "for cpu %d\n", perf_num_counters, cpu); | ||
379 | while (--cpu >= 0) | ||
380 | kfree(perf_events[cpu]); | ||
381 | return -ENOMEM; | ||
382 | } | ||
383 | } | ||
384 | |||
385 | ops->backtrace = arm_backtrace; | 114 | ops->backtrace = arm_backtrace; |
386 | ops->create_files = op_arm_create_files; | ||
387 | ops->setup = op_arm_setup; | ||
388 | ops->start = op_arm_start; | ||
389 | ops->stop = op_arm_stop; | ||
390 | ops->shutdown = op_arm_stop; | ||
391 | ops->cpu_type = op_name_from_perf_id(armpmu_get_pmu_id()); | ||
392 | |||
393 | if (!ops->cpu_type) | ||
394 | ret = -ENODEV; | ||
395 | else | ||
396 | pr_info("oprofile: using %s\n", ops->cpu_type); | ||
397 | 115 | ||
398 | return ret; | 116 | return oprofile_perf_init(ops); |
399 | } | 117 | } |
400 | 118 | ||
401 | void oprofile_arch_exit(void) | 119 | void __exit oprofile_arch_exit(void) |
402 | { | 120 | { |
403 | int cpu, id; | 121 | oprofile_perf_exit(); |
404 | struct perf_event *event; | ||
405 | |||
406 | if (*perf_events) { | ||
407 | for_each_possible_cpu(cpu) { | ||
408 | for (id = 0; id < perf_num_counters; ++id) { | ||
409 | event = perf_events[cpu][id]; | ||
410 | if (event != NULL) | ||
411 | perf_event_release_kernel(event); | ||
412 | } | ||
413 | kfree(perf_events[cpu]); | ||
414 | } | ||
415 | } | ||
416 | |||
417 | if (counter_config) { | ||
418 | kfree(counter_config); | ||
419 | exit_driverfs(); | ||
420 | } | ||
421 | } | 122 | } |
422 | #else | 123 | #else |
423 | int __init oprofile_arch_init(struct oprofile_operations *ops) | 124 | int __init oprofile_arch_init(struct oprofile_operations *ops) |
@@ -425,5 +126,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) | |||
425 | pr_info("oprofile: hardware counters not available\n"); | 126 | pr_info("oprofile: hardware counters not available\n"); |
426 | return -ENODEV; | 127 | return -ENODEV; |
427 | } | 128 | } |
428 | void oprofile_arch_exit(void) {} | 129 | void __exit oprofile_arch_exit(void) {} |
429 | #endif /* CONFIG_HW_PERF_EVENTS */ | 130 | #endif /* CONFIG_HW_PERF_EVENTS */ |
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 16399bd24993..0f2417df6323 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig | |||
@@ -7,6 +7,7 @@ config FRV | |||
7 | default y | 7 | default y |
8 | select HAVE_IDE | 8 | select HAVE_IDE |
9 | select HAVE_ARCH_TRACEHOOK | 9 | select HAVE_ARCH_TRACEHOOK |
10 | select HAVE_IRQ_WORK | ||
10 | select HAVE_PERF_EVENTS | 11 | select HAVE_PERF_EVENTS |
11 | 12 | ||
12 | config ZONE_DMA | 13 | config ZONE_DMA |
diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile index f4709756d0d9..4ff2fb1e6b16 100644 --- a/arch/frv/lib/Makefile +++ b/arch/frv/lib/Makefile | |||
@@ -5,4 +5,4 @@ | |||
5 | lib-y := \ | 5 | lib-y := \ |
6 | __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ | 6 | __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ |
7 | checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \ | 7 | checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \ |
8 | outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o | 8 | outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o |
diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c deleted file mode 100644 index 9ac5acfd2e91..000000000000 --- a/arch/frv/lib/perf_event.c +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | /* Performance event handling | ||
2 | * | ||
3 | * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/perf_event.h> | ||
13 | |||
14 | /* | ||
15 | * mark the performance event as pending | ||
16 | */ | ||
17 | void set_perf_event_pending(void) | ||
18 | { | ||
19 | } | ||
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h index d514cd9edb49..8fb7d33a661f 100644 --- a/arch/ia64/include/asm/hardirq.h +++ b/arch/ia64/include/asm/hardirq.h | |||
@@ -6,12 +6,6 @@ | |||
6 | * David Mosberger-Tang <davidm@hpl.hp.com> | 6 | * David Mosberger-Tang <davidm@hpl.hp.com> |
7 | */ | 7 | */ |
8 | 8 | ||
9 | |||
10 | #include <linux/threads.h> | ||
11 | #include <linux/irq.h> | ||
12 | |||
13 | #include <asm/processor.h> | ||
14 | |||
15 | /* | 9 | /* |
16 | * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. | 10 | * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. |
17 | */ | 11 | */ |
@@ -20,6 +14,11 @@ | |||
20 | 14 | ||
21 | #define local_softirq_pending() (local_cpu_data->softirq_pending) | 15 | #define local_softirq_pending() (local_cpu_data->softirq_pending) |
22 | 16 | ||
17 | #include <linux/threads.h> | ||
18 | #include <linux/irq.h> | ||
19 | |||
20 | #include <asm/processor.h> | ||
21 | |||
23 | extern void __iomem *ipi_base_addr; | 22 | extern void __iomem *ipi_base_addr; |
24 | 23 | ||
25 | void ack_bad_irq(unsigned int irq); | 24 | void ack_bad_irq(unsigned int irq); |
diff --git a/arch/m32r/include/asm/elf.h b/arch/m32r/include/asm/elf.h index 2f85412ef730..b8da7d0574d2 100644 --- a/arch/m32r/include/asm/elf.h +++ b/arch/m32r/include/asm/elf.h | |||
@@ -82,9 +82,9 @@ typedef elf_fpreg_t elf_fpregset_t; | |||
82 | * These are used to set parameters in the core dumps. | 82 | * These are used to set parameters in the core dumps. |
83 | */ | 83 | */ |
84 | #define ELF_CLASS ELFCLASS32 | 84 | #define ELF_CLASS ELFCLASS32 |
85 | #if defined(__LITTLE_ENDIAN) | 85 | #if defined(__LITTLE_ENDIAN__) |
86 | #define ELF_DATA ELFDATA2LSB | 86 | #define ELF_DATA ELFDATA2LSB |
87 | #elif defined(__BIG_ENDIAN) | 87 | #elif defined(__BIG_ENDIAN__) |
88 | #define ELF_DATA ELFDATA2MSB | 88 | #define ELF_DATA ELFDATA2MSB |
89 | #else | 89 | #else |
90 | #error no endian defined | 90 | #error no endian defined |
diff --git a/arch/m32r/kernel/.gitignore b/arch/m32r/kernel/.gitignore new file mode 100644 index 000000000000..c5f676c3c224 --- /dev/null +++ b/arch/m32r/kernel/.gitignore | |||
@@ -0,0 +1 @@ | |||
vmlinux.lds | |||
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index 7bbe38645ed5..a08697f0886d 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c | |||
@@ -28,6 +28,8 @@ | |||
28 | 28 | ||
29 | #define DEBUG_SIG 0 | 29 | #define DEBUG_SIG 0 |
30 | 30 | ||
31 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | ||
32 | |||
31 | asmlinkage int | 33 | asmlinkage int |
32 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | 34 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, |
33 | unsigned long r2, unsigned long r3, unsigned long r4, | 35 | unsigned long r2, unsigned long r3, unsigned long r4, |
@@ -254,7 +256,7 @@ give_sigsegv: | |||
254 | static int prev_insn(struct pt_regs *regs) | 256 | static int prev_insn(struct pt_regs *regs) |
255 | { | 257 | { |
256 | u16 inst; | 258 | u16 inst; |
257 | if (get_user(&inst, (u16 __user *)(regs->bpc - 2))) | 259 | if (get_user(inst, (u16 __user *)(regs->bpc - 2))) |
258 | return -EFAULT; | 260 | return -EFAULT; |
259 | if ((inst & 0xfff0) == 0x10f0) /* trap ? */ | 261 | if ((inst & 0xfff0) == 0x10f0) /* trap ? */ |
260 | regs->bpc -= 2; | 262 | regs->bpc -= 2; |
diff --git a/arch/mips/Kbuild b/arch/mips/Kbuild index e322d65f33a4..7dd65cfae837 100644 --- a/arch/mips/Kbuild +++ b/arch/mips/Kbuild | |||
@@ -7,6 +7,10 @@ subdir-ccflags-y := -Werror | |||
7 | include arch/mips/Kbuild.platforms | 7 | include arch/mips/Kbuild.platforms |
8 | obj-y := $(platform-y) | 8 | obj-y := $(platform-y) |
9 | 9 | ||
10 | # make clean traverses $(obj-) without having included .config, so | ||
11 | # everything ends up here | ||
12 | obj- := $(platform-) | ||
13 | |||
10 | # mips object files | 14 | # mips object files |
11 | # The object files are linked as core-y files would be linked | 15 | # The object files are linked as core-y files would be linked |
12 | 16 | ||
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 5526faabfc21..4c9f402295dd 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig | |||
@@ -881,11 +881,15 @@ config NO_IOPORT | |||
881 | config GENERIC_ISA_DMA | 881 | config GENERIC_ISA_DMA |
882 | bool | 882 | bool |
883 | select ZONE_DMA if GENERIC_ISA_DMA_SUPPORT_BROKEN=n | 883 | select ZONE_DMA if GENERIC_ISA_DMA_SUPPORT_BROKEN=n |
884 | select ISA_DMA_API | ||
884 | 885 | ||
885 | config GENERIC_ISA_DMA_SUPPORT_BROKEN | 886 | config GENERIC_ISA_DMA_SUPPORT_BROKEN |
886 | bool | 887 | bool |
887 | select GENERIC_ISA_DMA | 888 | select GENERIC_ISA_DMA |
888 | 889 | ||
890 | config ISA_DMA_API | ||
891 | bool | ||
892 | |||
889 | config GENERIC_GPIO | 893 | config GENERIC_GPIO |
890 | bool | 894 | bool |
891 | 895 | ||
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 5fd7f7a58b7e..5042d51b0512 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile | |||
@@ -105,4 +105,4 @@ OBJCOPYFLAGS_vmlinuz.srec := $(OBJCOPYFLAGS) -S -O srec | |||
105 | vmlinuz.srec: vmlinuz | 105 | vmlinuz.srec: vmlinuz |
106 | $(call cmd,objcopy) | 106 | $(call cmd,objcopy) |
107 | 107 | ||
108 | clean-files := $(objtree)/vmlinuz.* | 108 | clean-files := $(objtree)/vmlinuz $(objtree)/vmlinuz.{32,ecoff,bin,srec} |
diff --git a/arch/mips/dec/Platform b/arch/mips/dec/Platform index 3adbcbd95db1..cf55a6f4e720 100644 --- a/arch/mips/dec/Platform +++ b/arch/mips/dec/Platform | |||
@@ -1,7 +1,7 @@ | |||
1 | # | 1 | # |
2 | # DECstation family | 2 | # DECstation family |
3 | # | 3 | # |
4 | platform-$(CONFIG_MACH_DECSTATION) = dec/ | 4 | platform-$(CONFIG_MACH_DECSTATION) += dec/ |
5 | cflags-$(CONFIG_MACH_DECSTATION) += \ | 5 | cflags-$(CONFIG_MACH_DECSTATION) += \ |
6 | -I$(srctree)/arch/mips/include/asm/mach-dec | 6 | -I$(srctree)/arch/mips/include/asm/mach-dec |
7 | libs-$(CONFIG_MACH_DECSTATION) += arch/mips/dec/prom/ | 7 | libs-$(CONFIG_MACH_DECSTATION) += arch/mips/dec/prom/ |
diff --git a/arch/mips/include/asm/fcntl.h b/arch/mips/include/asm/fcntl.h index e482fe90fe88..75eddedcfc3e 100644 --- a/arch/mips/include/asm/fcntl.h +++ b/arch/mips/include/asm/fcntl.h | |||
@@ -56,6 +56,7 @@ | |||
56 | */ | 56 | */ |
57 | 57 | ||
58 | #ifdef CONFIG_32BIT | 58 | #ifdef CONFIG_32BIT |
59 | #include <linux/types.h> | ||
59 | 60 | ||
60 | struct flock { | 61 | struct flock { |
61 | short l_type; | 62 | short l_type; |
diff --git a/arch/mips/jz4740/Platform b/arch/mips/jz4740/Platform index 6a97230e3d05..ba91be9c21ef 100644 --- a/arch/mips/jz4740/Platform +++ b/arch/mips/jz4740/Platform | |||
@@ -1,3 +1,3 @@ | |||
1 | core-$(CONFIG_MACH_JZ4740) += arch/mips/jz4740/ | 1 | platform-$(CONFIG_MACH_JZ4740) += jz4740/ |
2 | cflags-$(CONFIG_MACH_JZ4740) += -I$(srctree)/arch/mips/include/asm/mach-jz4740 | 2 | cflags-$(CONFIG_MACH_JZ4740) += -I$(srctree)/arch/mips/include/asm/mach-jz4740 |
3 | load-$(CONFIG_MACH_JZ4740) += 0xffffffff80010000 | 3 | load-$(CONFIG_MACH_JZ4740) += 0xffffffff80010000 |
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 0176ed015c89..32103cc2a257 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c | |||
@@ -40,7 +40,6 @@ int __compute_return_epc(struct pt_regs *regs) | |||
40 | return -EFAULT; | 40 | return -EFAULT; |
41 | } | 41 | } |
42 | 42 | ||
43 | regs->regs[0] = 0; | ||
44 | switch (insn.i_format.opcode) { | 43 | switch (insn.i_format.opcode) { |
45 | /* | 44 | /* |
46 | * jr and jalr are in r_format format. | 45 | * jr and jalr are in r_format format. |
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 2340f11dc29c..9a526ba6f257 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c | |||
@@ -103,7 +103,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len, | |||
103 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) | 103 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) |
104 | goto out_unlock; | 104 | goto out_unlock; |
105 | 105 | ||
106 | retval = security_task_setscheduler(p, 0, NULL); | 106 | retval = security_task_setscheduler(p) |
107 | if (retval) | 107 | if (retval) |
108 | goto out_unlock; | 108 | goto out_unlock; |
109 | 109 | ||
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index c51b95ff8644..c8777333e198 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c | |||
@@ -536,7 +536,7 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) | |||
536 | { | 536 | { |
537 | /* do the secure computing check first */ | 537 | /* do the secure computing check first */ |
538 | if (!entryexit) | 538 | if (!entryexit) |
539 | secure_computing(regs->regs[0]); | 539 | secure_computing(regs->regs[2]); |
540 | 540 | ||
541 | if (unlikely(current->audit_context) && entryexit) | 541 | if (unlikely(current->audit_context) && entryexit) |
542 | audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]), | 542 | audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]), |
@@ -565,7 +565,7 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) | |||
565 | 565 | ||
566 | out: | 566 | out: |
567 | if (unlikely(current->audit_context) && !entryexit) | 567 | if (unlikely(current->audit_context) && !entryexit) |
568 | audit_syscall_entry(audit_arch(), regs->regs[0], | 568 | audit_syscall_entry(audit_arch(), regs->regs[2], |
569 | regs->regs[4], regs->regs[5], | 569 | regs->regs[4], regs->regs[5], |
570 | regs->regs[6], regs->regs[7]); | 570 | regs->regs[6], regs->regs[7]); |
571 | } | 571 | } |
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 584415eef8c9..fbaabad0e6e2 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S | |||
@@ -63,9 +63,9 @@ stack_done: | |||
63 | sw t0, PT_R7(sp) # set error flag | 63 | sw t0, PT_R7(sp) # set error flag |
64 | beqz t0, 1f | 64 | beqz t0, 1f |
65 | 65 | ||
66 | lw t1, PT_R2(sp) # syscall number | ||
66 | negu v0 # error | 67 | negu v0 # error |
67 | sw v0, PT_R0(sp) # set flag for syscall | 68 | sw t1, PT_R0(sp) # save it for syscall restarting |
68 | # restarting | ||
69 | 1: sw v0, PT_R2(sp) # result | 69 | 1: sw v0, PT_R2(sp) # result |
70 | 70 | ||
71 | o32_syscall_exit: | 71 | o32_syscall_exit: |
@@ -104,9 +104,9 @@ syscall_trace_entry: | |||
104 | sw t0, PT_R7(sp) # set error flag | 104 | sw t0, PT_R7(sp) # set error flag |
105 | beqz t0, 1f | 105 | beqz t0, 1f |
106 | 106 | ||
107 | lw t1, PT_R2(sp) # syscall number | ||
107 | negu v0 # error | 108 | negu v0 # error |
108 | sw v0, PT_R0(sp) # set flag for syscall | 109 | sw t1, PT_R0(sp) # save it for syscall restarting |
109 | # restarting | ||
110 | 1: sw v0, PT_R2(sp) # result | 110 | 1: sw v0, PT_R2(sp) # result |
111 | 111 | ||
112 | j syscall_exit | 112 | j syscall_exit |
@@ -169,8 +169,7 @@ stackargs: | |||
169 | * We probably should handle this case a bit more drastic. | 169 | * We probably should handle this case a bit more drastic. |
170 | */ | 170 | */ |
171 | bad_stack: | 171 | bad_stack: |
172 | negu v0 # error | 172 | li v0, EFAULT |
173 | sw v0, PT_R0(sp) | ||
174 | sw v0, PT_R2(sp) | 173 | sw v0, PT_R2(sp) |
175 | li t0, 1 # set error flag | 174 | li t0, 1 # set error flag |
176 | sw t0, PT_R7(sp) | 175 | sw t0, PT_R7(sp) |
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 5573f8e4e326..3f4179283207 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S | |||
@@ -66,9 +66,9 @@ NESTED(handle_sys64, PT_SIZE, sp) | |||
66 | sd t0, PT_R7(sp) # set error flag | 66 | sd t0, PT_R7(sp) # set error flag |
67 | beqz t0, 1f | 67 | beqz t0, 1f |
68 | 68 | ||
69 | ld t1, PT_R2(sp) # syscall number | ||
69 | dnegu v0 # error | 70 | dnegu v0 # error |
70 | sd v0, PT_R0(sp) # set flag for syscall | 71 | sd t1, PT_R0(sp) # save it for syscall restarting |
71 | # restarting | ||
72 | 1: sd v0, PT_R2(sp) # result | 72 | 1: sd v0, PT_R2(sp) # result |
73 | 73 | ||
74 | n64_syscall_exit: | 74 | n64_syscall_exit: |
@@ -109,8 +109,9 @@ syscall_trace_entry: | |||
109 | sd t0, PT_R7(sp) # set error flag | 109 | sd t0, PT_R7(sp) # set error flag |
110 | beqz t0, 1f | 110 | beqz t0, 1f |
111 | 111 | ||
112 | ld t1, PT_R2(sp) # syscall number | ||
112 | dnegu v0 # error | 113 | dnegu v0 # error |
113 | sd v0, PT_R0(sp) # set flag for syscall restarting | 114 | sd t1, PT_R0(sp) # save it for syscall restarting |
114 | 1: sd v0, PT_R2(sp) # result | 115 | 1: sd v0, PT_R2(sp) # result |
115 | 116 | ||
116 | j syscall_exit | 117 | j syscall_exit |
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 1e38ec97672e..f08ece6d8acc 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S | |||
@@ -65,8 +65,9 @@ NESTED(handle_sysn32, PT_SIZE, sp) | |||
65 | sd t0, PT_R7(sp) # set error flag | 65 | sd t0, PT_R7(sp) # set error flag |
66 | beqz t0, 1f | 66 | beqz t0, 1f |
67 | 67 | ||
68 | ld t1, PT_R2(sp) # syscall number | ||
68 | dnegu v0 # error | 69 | dnegu v0 # error |
69 | sd v0, PT_R0(sp) # set flag for syscall restarting | 70 | sd t1, PT_R0(sp) # save it for syscall restarting |
70 | 1: sd v0, PT_R2(sp) # result | 71 | 1: sd v0, PT_R2(sp) # result |
71 | 72 | ||
72 | local_irq_disable # make sure need_resched and | 73 | local_irq_disable # make sure need_resched and |
@@ -106,8 +107,9 @@ n32_syscall_trace_entry: | |||
106 | sd t0, PT_R7(sp) # set error flag | 107 | sd t0, PT_R7(sp) # set error flag |
107 | beqz t0, 1f | 108 | beqz t0, 1f |
108 | 109 | ||
110 | ld t1, PT_R2(sp) # syscall number | ||
109 | dnegu v0 # error | 111 | dnegu v0 # error |
110 | sd v0, PT_R0(sp) # set flag for syscall restarting | 112 | sd t1, PT_R0(sp) # save it for syscall restarting |
111 | 1: sd v0, PT_R2(sp) # result | 113 | 1: sd v0, PT_R2(sp) # result |
112 | 114 | ||
113 | j syscall_exit | 115 | j syscall_exit |
@@ -320,10 +322,10 @@ EXPORT(sysn32_call_table) | |||
320 | PTR sys_cacheflush | 322 | PTR sys_cacheflush |
321 | PTR sys_cachectl | 323 | PTR sys_cachectl |
322 | PTR sys_sysmips | 324 | PTR sys_sysmips |
323 | PTR sys_io_setup /* 6200 */ | 325 | PTR compat_sys_io_setup /* 6200 */ |
324 | PTR sys_io_destroy | 326 | PTR sys_io_destroy |
325 | PTR sys_io_getevents | 327 | PTR compat_sys_io_getevents |
326 | PTR sys_io_submit | 328 | PTR compat_sys_io_submit |
327 | PTR sys_io_cancel | 329 | PTR sys_io_cancel |
328 | PTR sys_exit_group /* 6205 */ | 330 | PTR sys_exit_group /* 6205 */ |
329 | PTR sys_lookup_dcookie | 331 | PTR sys_lookup_dcookie |
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 171979fc98e5..78d768a3e19d 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S | |||
@@ -93,8 +93,9 @@ NESTED(handle_sys, PT_SIZE, sp) | |||
93 | sd t0, PT_R7(sp) # set error flag | 93 | sd t0, PT_R7(sp) # set error flag |
94 | beqz t0, 1f | 94 | beqz t0, 1f |
95 | 95 | ||
96 | ld t1, PT_R2(sp) # syscall number | ||
96 | dnegu v0 # error | 97 | dnegu v0 # error |
97 | sd v0, PT_R0(sp) # flag for syscall restarting | 98 | sd t1, PT_R0(sp) # save it for syscall restarting |
98 | 1: sd v0, PT_R2(sp) # result | 99 | 1: sd v0, PT_R2(sp) # result |
99 | 100 | ||
100 | o32_syscall_exit: | 101 | o32_syscall_exit: |
@@ -142,8 +143,9 @@ trace_a_syscall: | |||
142 | sd t0, PT_R7(sp) # set error flag | 143 | sd t0, PT_R7(sp) # set error flag |
143 | beqz t0, 1f | 144 | beqz t0, 1f |
144 | 145 | ||
146 | ld t1, PT_R2(sp) # syscall number | ||
145 | dnegu v0 # error | 147 | dnegu v0 # error |
146 | sd v0, PT_R0(sp) # set flag for syscall restarting | 148 | sd t1, PT_R0(sp) # save it for syscall restarting |
147 | 1: sd v0, PT_R2(sp) # result | 149 | 1: sd v0, PT_R2(sp) # result |
148 | 150 | ||
149 | j syscall_exit | 151 | j syscall_exit |
@@ -154,8 +156,7 @@ trace_a_syscall: | |||
154 | * The stackpointer for a call with more than 4 arguments is bad. | 156 | * The stackpointer for a call with more than 4 arguments is bad. |
155 | */ | 157 | */ |
156 | bad_stack: | 158 | bad_stack: |
157 | dnegu v0 # error | 159 | li v0, EFAULT |
158 | sd v0, PT_R0(sp) | ||
159 | sd v0, PT_R2(sp) | 160 | sd v0, PT_R2(sp) |
160 | li t0, 1 # set error flag | 161 | li t0, 1 # set error flag |
161 | sd t0, PT_R7(sp) | 162 | sd t0, PT_R7(sp) |
@@ -444,10 +445,10 @@ sys_call_table: | |||
444 | PTR compat_sys_futex | 445 | PTR compat_sys_futex |
445 | PTR compat_sys_sched_setaffinity | 446 | PTR compat_sys_sched_setaffinity |
446 | PTR compat_sys_sched_getaffinity /* 4240 */ | 447 | PTR compat_sys_sched_getaffinity /* 4240 */ |
447 | PTR sys_io_setup | 448 | PTR compat_sys_io_setup |
448 | PTR sys_io_destroy | 449 | PTR sys_io_destroy |
449 | PTR sys_io_getevents | 450 | PTR compat_sys_io_getevents |
450 | PTR sys_io_submit | 451 | PTR compat_sys_io_submit |
451 | PTR sys_io_cancel /* 4245 */ | 452 | PTR sys_io_cancel /* 4245 */ |
452 | PTR sys_exit_group | 453 | PTR sys_exit_group |
453 | PTR sys32_lookup_dcookie | 454 | PTR sys32_lookup_dcookie |
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 2099d5a4c4b7..5922342bca39 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c | |||
@@ -390,7 +390,6 @@ asmlinkage void sys_rt_sigreturn(nabi_no_regargs struct pt_regs regs) | |||
390 | { | 390 | { |
391 | struct rt_sigframe __user *frame; | 391 | struct rt_sigframe __user *frame; |
392 | sigset_t set; | 392 | sigset_t set; |
393 | stack_t st; | ||
394 | int sig; | 393 | int sig; |
395 | 394 | ||
396 | frame = (struct rt_sigframe __user *) regs.regs[29]; | 395 | frame = (struct rt_sigframe __user *) regs.regs[29]; |
@@ -411,11 +410,9 @@ asmlinkage void sys_rt_sigreturn(nabi_no_regargs struct pt_regs regs) | |||
411 | else if (sig) | 410 | else if (sig) |
412 | force_sig(sig, current); | 411 | force_sig(sig, current); |
413 | 412 | ||
414 | if (__copy_from_user(&st, &frame->rs_uc.uc_stack, sizeof(st))) | ||
415 | goto badframe; | ||
416 | /* It is more difficult to avoid calling this function than to | 413 | /* It is more difficult to avoid calling this function than to |
417 | call it and ignore errors. */ | 414 | call it and ignore errors. */ |
418 | do_sigaltstack((stack_t __user *)&st, NULL, regs.regs[29]); | 415 | do_sigaltstack(&frame->rs_uc.uc_stack, NULL, regs.regs[29]); |
419 | 416 | ||
420 | /* | 417 | /* |
421 | * Don't let your children do this ... | 418 | * Don't let your children do this ... |
@@ -550,23 +547,26 @@ static int handle_signal(unsigned long sig, siginfo_t *info, | |||
550 | struct mips_abi *abi = current->thread.abi; | 547 | struct mips_abi *abi = current->thread.abi; |
551 | void *vdso = current->mm->context.vdso; | 548 | void *vdso = current->mm->context.vdso; |
552 | 549 | ||
553 | switch(regs->regs[0]) { | 550 | if (regs->regs[0]) { |
554 | case ERESTART_RESTARTBLOCK: | 551 | switch(regs->regs[2]) { |
555 | case ERESTARTNOHAND: | 552 | case ERESTART_RESTARTBLOCK: |
556 | regs->regs[2] = EINTR; | 553 | case ERESTARTNOHAND: |
557 | break; | ||
558 | case ERESTARTSYS: | ||
559 | if (!(ka->sa.sa_flags & SA_RESTART)) { | ||
560 | regs->regs[2] = EINTR; | 554 | regs->regs[2] = EINTR; |
561 | break; | 555 | break; |
556 | case ERESTARTSYS: | ||
557 | if (!(ka->sa.sa_flags & SA_RESTART)) { | ||
558 | regs->regs[2] = EINTR; | ||
559 | break; | ||
560 | } | ||
561 | /* fallthrough */ | ||
562 | case ERESTARTNOINTR: | ||
563 | regs->regs[7] = regs->regs[26]; | ||
564 | regs->regs[2] = regs->regs[0]; | ||
565 | regs->cp0_epc -= 4; | ||
562 | } | 566 | } |
563 | /* fallthrough */ | ||
564 | case ERESTARTNOINTR: /* Userland will reload $v0. */ | ||
565 | regs->regs[7] = regs->regs[26]; | ||
566 | regs->cp0_epc -= 8; | ||
567 | } | ||
568 | 567 | ||
569 | regs->regs[0] = 0; /* Don't deal with this again. */ | 568 | regs->regs[0] = 0; /* Don't deal with this again. */ |
569 | } | ||
570 | 570 | ||
571 | if (sig_uses_siginfo(ka)) | 571 | if (sig_uses_siginfo(ka)) |
572 | ret = abi->setup_rt_frame(vdso + abi->rt_signal_return_offset, | 572 | ret = abi->setup_rt_frame(vdso + abi->rt_signal_return_offset, |
@@ -575,6 +575,9 @@ static int handle_signal(unsigned long sig, siginfo_t *info, | |||
575 | ret = abi->setup_frame(vdso + abi->signal_return_offset, | 575 | ret = abi->setup_frame(vdso + abi->signal_return_offset, |
576 | ka, regs, sig, oldset); | 576 | ka, regs, sig, oldset); |
577 | 577 | ||
578 | if (ret) | ||
579 | return ret; | ||
580 | |||
578 | spin_lock_irq(¤t->sighand->siglock); | 581 | spin_lock_irq(¤t->sighand->siglock); |
579 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | 582 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); |
580 | if (!(ka->sa.sa_flags & SA_NODEFER)) | 583 | if (!(ka->sa.sa_flags & SA_NODEFER)) |
@@ -622,17 +625,13 @@ static void do_signal(struct pt_regs *regs) | |||
622 | return; | 625 | return; |
623 | } | 626 | } |
624 | 627 | ||
625 | /* | ||
626 | * Who's code doesn't conform to the restartable syscall convention | ||
627 | * dies here!!! The li instruction, a single machine instruction, | ||
628 | * must directly be followed by the syscall instruction. | ||
629 | */ | ||
630 | if (regs->regs[0]) { | 628 | if (regs->regs[0]) { |
631 | if (regs->regs[2] == ERESTARTNOHAND || | 629 | if (regs->regs[2] == ERESTARTNOHAND || |
632 | regs->regs[2] == ERESTARTSYS || | 630 | regs->regs[2] == ERESTARTSYS || |
633 | regs->regs[2] == ERESTARTNOINTR) { | 631 | regs->regs[2] == ERESTARTNOINTR) { |
632 | regs->regs[2] = regs->regs[0]; | ||
634 | regs->regs[7] = regs->regs[26]; | 633 | regs->regs[7] = regs->regs[26]; |
635 | regs->cp0_epc -= 8; | 634 | regs->cp0_epc -= 4; |
636 | } | 635 | } |
637 | if (regs->regs[2] == ERESTART_RESTARTBLOCK) { | 636 | if (regs->regs[2] == ERESTART_RESTARTBLOCK) { |
638 | regs->regs[2] = current->thread.abi->restart; | 637 | regs->regs[2] = current->thread.abi->restart; |
diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c index 2c5df818c65a..ee24d814d5b9 100644 --- a/arch/mips/kernel/signal_n32.c +++ b/arch/mips/kernel/signal_n32.c | |||
@@ -109,6 +109,7 @@ asmlinkage int sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs) | |||
109 | asmlinkage void sysn32_rt_sigreturn(nabi_no_regargs struct pt_regs regs) | 109 | asmlinkage void sysn32_rt_sigreturn(nabi_no_regargs struct pt_regs regs) |
110 | { | 110 | { |
111 | struct rt_sigframe_n32 __user *frame; | 111 | struct rt_sigframe_n32 __user *frame; |
112 | mm_segment_t old_fs; | ||
112 | sigset_t set; | 113 | sigset_t set; |
113 | stack_t st; | 114 | stack_t st; |
114 | s32 sp; | 115 | s32 sp; |
@@ -143,7 +144,11 @@ asmlinkage void sysn32_rt_sigreturn(nabi_no_regargs struct pt_regs regs) | |||
143 | 144 | ||
144 | /* It is more difficult to avoid calling this function than to | 145 | /* It is more difficult to avoid calling this function than to |
145 | call it and ignore errors. */ | 146 | call it and ignore errors. */ |
147 | old_fs = get_fs(); | ||
148 | set_fs(KERNEL_DS); | ||
146 | do_sigaltstack((stack_t __user *)&st, NULL, regs.regs[29]); | 149 | do_sigaltstack((stack_t __user *)&st, NULL, regs.regs[29]); |
150 | set_fs(old_fs); | ||
151 | |||
147 | 152 | ||
148 | /* | 153 | /* |
149 | * Don't let your children do this ... | 154 | * Don't let your children do this ... |
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 69b039ca8d83..33d5a5ce4a29 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c | |||
@@ -109,8 +109,6 @@ static void emulate_load_store_insn(struct pt_regs *regs, | |||
109 | unsigned long value; | 109 | unsigned long value; |
110 | unsigned int res; | 110 | unsigned int res; |
111 | 111 | ||
112 | regs->regs[0] = 0; | ||
113 | |||
114 | /* | 112 | /* |
115 | * This load never faults. | 113 | * This load never faults. |
116 | */ | 114 | */ |
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 907417d187e1..79a04a9394d5 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig | |||
@@ -16,6 +16,7 @@ config PARISC | |||
16 | select RTC_DRV_GENERIC | 16 | select RTC_DRV_GENERIC |
17 | select INIT_ALL_POSSIBLE | 17 | select INIT_ALL_POSSIBLE |
18 | select BUG | 18 | select BUG |
19 | select HAVE_IRQ_WORK | ||
19 | select HAVE_PERF_EVENTS | 20 | select HAVE_PERF_EVENTS |
20 | select GENERIC_ATOMIC64 if !64BIT | 21 | select GENERIC_ATOMIC64 if !64BIT |
21 | help | 22 | help |
diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h index cc146427d8f9..1e0fd8ba6c03 100644 --- a/arch/parisc/include/asm/perf_event.h +++ b/arch/parisc/include/asm/perf_event.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef __ASM_PARISC_PERF_EVENT_H | 1 | #ifndef __ASM_PARISC_PERF_EVENT_H |
2 | #define __ASM_PARISC_PERF_EVENT_H | 2 | #define __ASM_PARISC_PERF_EVENT_H |
3 | 3 | ||
4 | /* parisc only supports software events through this interface. */ | 4 | /* Empty, just to avoid compiling error */ |
5 | static inline void set_perf_event_pending(void) { } | ||
6 | 5 | ||
7 | #endif /* __ASM_PARISC_PERF_EVENT_H */ | 6 | #endif /* __ASM_PARISC_PERF_EVENT_H */ |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 631e5a0fb6ab..4b1e521d966f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -138,6 +138,7 @@ config PPC | |||
138 | select HAVE_OPROFILE | 138 | select HAVE_OPROFILE |
139 | select HAVE_SYSCALL_WRAPPERS if PPC64 | 139 | select HAVE_SYSCALL_WRAPPERS if PPC64 |
140 | select GENERIC_ATOMIC64 if PPC32 | 140 | select GENERIC_ATOMIC64 if PPC32 |
141 | select HAVE_IRQ_WORK | ||
141 | select HAVE_PERF_EVENTS | 142 | select HAVE_PERF_EVENTS |
142 | select HAVE_REGS_AND_STACK_ACCESS_API | 143 | select HAVE_REGS_AND_STACK_ACCESS_API |
143 | select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 | 144 | select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 |
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 1ff6662f7faf..9b287fdd8ea3 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h | |||
@@ -129,7 +129,7 @@ struct paca_struct { | |||
129 | u8 soft_enabled; /* irq soft-enable flag */ | 129 | u8 soft_enabled; /* irq soft-enable flag */ |
130 | u8 hard_enabled; /* set if irqs are enabled in MSR */ | 130 | u8 hard_enabled; /* set if irqs are enabled in MSR */ |
131 | u8 io_sync; /* writel() needs spin_unlock sync */ | 131 | u8 io_sync; /* writel() needs spin_unlock sync */ |
132 | u8 perf_event_pending; /* PM interrupt while soft-disabled */ | 132 | u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ |
133 | 133 | ||
134 | /* Stuff for accurate time accounting */ | 134 | /* Stuff for accurate time accounting */ |
135 | u64 user_time; /* accumulated usermode TB ticks */ | 135 | u64 user_time; /* accumulated usermode TB ticks */ |
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 95ad9dad298e..d05ae4204bbf 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c | |||
@@ -23,18 +23,6 @@ | |||
23 | #include "ppc32.h" | 23 | #include "ppc32.h" |
24 | #endif | 24 | #endif |
25 | 25 | ||
26 | /* | ||
27 | * Store another value in a callchain_entry. | ||
28 | */ | ||
29 | static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) | ||
30 | { | ||
31 | unsigned int nr = entry->nr; | ||
32 | |||
33 | if (nr < PERF_MAX_STACK_DEPTH) { | ||
34 | entry->ip[nr] = ip; | ||
35 | entry->nr = nr + 1; | ||
36 | } | ||
37 | } | ||
38 | 26 | ||
39 | /* | 27 | /* |
40 | * Is sp valid as the address of the next kernel stack frame after prev_sp? | 28 | * Is sp valid as the address of the next kernel stack frame after prev_sp? |
@@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) | |||
58 | return 0; | 46 | return 0; |
59 | } | 47 | } |
60 | 48 | ||
61 | static void perf_callchain_kernel(struct pt_regs *regs, | 49 | void |
62 | struct perf_callchain_entry *entry) | 50 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) |
63 | { | 51 | { |
64 | unsigned long sp, next_sp; | 52 | unsigned long sp, next_sp; |
65 | unsigned long next_ip; | 53 | unsigned long next_ip; |
@@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, | |||
69 | 57 | ||
70 | lr = regs->link; | 58 | lr = regs->link; |
71 | sp = regs->gpr[1]; | 59 | sp = regs->gpr[1]; |
72 | callchain_store(entry, PERF_CONTEXT_KERNEL); | 60 | perf_callchain_store(entry, regs->nip); |
73 | callchain_store(entry, regs->nip); | ||
74 | 61 | ||
75 | if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) | 62 | if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) |
76 | return; | 63 | return; |
@@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, | |||
89 | next_ip = regs->nip; | 76 | next_ip = regs->nip; |
90 | lr = regs->link; | 77 | lr = regs->link; |
91 | level = 0; | 78 | level = 0; |
92 | callchain_store(entry, PERF_CONTEXT_KERNEL); | 79 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); |
93 | 80 | ||
94 | } else { | 81 | } else { |
95 | if (level == 0) | 82 | if (level == 0) |
@@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, | |||
111 | ++level; | 98 | ++level; |
112 | } | 99 | } |
113 | 100 | ||
114 | callchain_store(entry, next_ip); | 101 | perf_callchain_store(entry, next_ip); |
115 | if (!valid_next_sp(next_sp, sp)) | 102 | if (!valid_next_sp(next_sp, sp)) |
116 | return; | 103 | return; |
117 | sp = next_sp; | 104 | sp = next_sp; |
@@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp) | |||
233 | puc == (unsigned long) &sf->uc; | 220 | puc == (unsigned long) &sf->uc; |
234 | } | 221 | } |
235 | 222 | ||
236 | static void perf_callchain_user_64(struct pt_regs *regs, | 223 | static void perf_callchain_user_64(struct perf_callchain_entry *entry, |
237 | struct perf_callchain_entry *entry) | 224 | struct pt_regs *regs) |
238 | { | 225 | { |
239 | unsigned long sp, next_sp; | 226 | unsigned long sp, next_sp; |
240 | unsigned long next_ip; | 227 | unsigned long next_ip; |
@@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs, | |||
246 | next_ip = regs->nip; | 233 | next_ip = regs->nip; |
247 | lr = regs->link; | 234 | lr = regs->link; |
248 | sp = regs->gpr[1]; | 235 | sp = regs->gpr[1]; |
249 | callchain_store(entry, PERF_CONTEXT_USER); | 236 | perf_callchain_store(entry, next_ip); |
250 | callchain_store(entry, next_ip); | ||
251 | 237 | ||
252 | for (;;) { | 238 | for (;;) { |
253 | fp = (unsigned long __user *) sp; | 239 | fp = (unsigned long __user *) sp; |
@@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs, | |||
276 | read_user_stack_64(&uregs[PT_R1], &sp)) | 262 | read_user_stack_64(&uregs[PT_R1], &sp)) |
277 | return; | 263 | return; |
278 | level = 0; | 264 | level = 0; |
279 | callchain_store(entry, PERF_CONTEXT_USER); | 265 | perf_callchain_store(entry, PERF_CONTEXT_USER); |
280 | callchain_store(entry, next_ip); | 266 | perf_callchain_store(entry, next_ip); |
281 | continue; | 267 | continue; |
282 | } | 268 | } |
283 | 269 | ||
284 | if (level == 0) | 270 | if (level == 0) |
285 | next_ip = lr; | 271 | next_ip = lr; |
286 | callchain_store(entry, next_ip); | 272 | perf_callchain_store(entry, next_ip); |
287 | ++level; | 273 | ++level; |
288 | sp = next_sp; | 274 | sp = next_sp; |
289 | } | 275 | } |
@@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) | |||
315 | return __get_user_inatomic(*ret, ptr); | 301 | return __get_user_inatomic(*ret, ptr); |
316 | } | 302 | } |
317 | 303 | ||
318 | static inline void perf_callchain_user_64(struct pt_regs *regs, | 304 | static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, |
319 | struct perf_callchain_entry *entry) | 305 | struct pt_regs *regs) |
320 | { | 306 | { |
321 | } | 307 | } |
322 | 308 | ||
@@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp, | |||
435 | return mctx->mc_gregs; | 421 | return mctx->mc_gregs; |
436 | } | 422 | } |
437 | 423 | ||
438 | static void perf_callchain_user_32(struct pt_regs *regs, | 424 | static void perf_callchain_user_32(struct perf_callchain_entry *entry, |
439 | struct perf_callchain_entry *entry) | 425 | struct pt_regs *regs) |
440 | { | 426 | { |
441 | unsigned int sp, next_sp; | 427 | unsigned int sp, next_sp; |
442 | unsigned int next_ip; | 428 | unsigned int next_ip; |
@@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs, | |||
447 | next_ip = regs->nip; | 433 | next_ip = regs->nip; |
448 | lr = regs->link; | 434 | lr = regs->link; |
449 | sp = regs->gpr[1]; | 435 | sp = regs->gpr[1]; |
450 | callchain_store(entry, PERF_CONTEXT_USER); | 436 | perf_callchain_store(entry, next_ip); |
451 | callchain_store(entry, next_ip); | ||
452 | 437 | ||
453 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | 438 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
454 | fp = (unsigned int __user *) (unsigned long) sp; | 439 | fp = (unsigned int __user *) (unsigned long) sp; |
@@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs, | |||
470 | read_user_stack_32(&uregs[PT_R1], &sp)) | 455 | read_user_stack_32(&uregs[PT_R1], &sp)) |
471 | return; | 456 | return; |
472 | level = 0; | 457 | level = 0; |
473 | callchain_store(entry, PERF_CONTEXT_USER); | 458 | perf_callchain_store(entry, PERF_CONTEXT_USER); |
474 | callchain_store(entry, next_ip); | 459 | perf_callchain_store(entry, next_ip); |
475 | continue; | 460 | continue; |
476 | } | 461 | } |
477 | 462 | ||
478 | if (level == 0) | 463 | if (level == 0) |
479 | next_ip = lr; | 464 | next_ip = lr; |
480 | callchain_store(entry, next_ip); | 465 | perf_callchain_store(entry, next_ip); |
481 | ++level; | 466 | ++level; |
482 | sp = next_sp; | 467 | sp = next_sp; |
483 | } | 468 | } |
484 | } | 469 | } |
485 | 470 | ||
486 | /* | 471 | void |
487 | * Since we can't get PMU interrupts inside a PMU interrupt handler, | 472 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) |
488 | * we don't need separate irq and nmi entries here. | ||
489 | */ | ||
490 | static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); | ||
491 | |||
492 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
493 | { | 473 | { |
494 | struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); | 474 | if (current_is_64bit()) |
495 | 475 | perf_callchain_user_64(entry, regs); | |
496 | entry->nr = 0; | 476 | else |
497 | 477 | perf_callchain_user_32(entry, regs); | |
498 | if (!user_mode(regs)) { | ||
499 | perf_callchain_kernel(regs, entry); | ||
500 | if (current->mm) | ||
501 | regs = task_pt_regs(current); | ||
502 | else | ||
503 | regs = NULL; | ||
504 | } | ||
505 | |||
506 | if (regs) { | ||
507 | if (current_is_64bit()) | ||
508 | perf_callchain_user_64(regs, entry); | ||
509 | else | ||
510 | perf_callchain_user_32(regs, entry); | ||
511 | } | ||
512 | |||
513 | return entry; | ||
514 | } | 478 | } |
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index d301a30445e0..3129c855933c 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
@@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event) | |||
402 | { | 402 | { |
403 | s64 val, delta, prev; | 403 | s64 val, delta, prev; |
404 | 404 | ||
405 | if (event->hw.state & PERF_HES_STOPPED) | ||
406 | return; | ||
407 | |||
405 | if (!event->hw.idx) | 408 | if (!event->hw.idx) |
406 | return; | 409 | return; |
407 | /* | 410 | /* |
@@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) | |||
517 | * Disable all events to prevent PMU interrupts and to allow | 520 | * Disable all events to prevent PMU interrupts and to allow |
518 | * events to be added or removed. | 521 | * events to be added or removed. |
519 | */ | 522 | */ |
520 | void hw_perf_disable(void) | 523 | static void power_pmu_disable(struct pmu *pmu) |
521 | { | 524 | { |
522 | struct cpu_hw_events *cpuhw; | 525 | struct cpu_hw_events *cpuhw; |
523 | unsigned long flags; | 526 | unsigned long flags; |
@@ -565,7 +568,7 @@ void hw_perf_disable(void) | |||
565 | * If we were previously disabled and events were added, then | 568 | * If we were previously disabled and events were added, then |
566 | * put the new config on the PMU. | 569 | * put the new config on the PMU. |
567 | */ | 570 | */ |
568 | void hw_perf_enable(void) | 571 | static void power_pmu_enable(struct pmu *pmu) |
569 | { | 572 | { |
570 | struct perf_event *event; | 573 | struct perf_event *event; |
571 | struct cpu_hw_events *cpuhw; | 574 | struct cpu_hw_events *cpuhw; |
@@ -672,6 +675,8 @@ void hw_perf_enable(void) | |||
672 | } | 675 | } |
673 | local64_set(&event->hw.prev_count, val); | 676 | local64_set(&event->hw.prev_count, val); |
674 | event->hw.idx = idx; | 677 | event->hw.idx = idx; |
678 | if (event->hw.state & PERF_HES_STOPPED) | ||
679 | val = 0; | ||
675 | write_pmc(idx, val); | 680 | write_pmc(idx, val); |
676 | perf_event_update_userpage(event); | 681 | perf_event_update_userpage(event); |
677 | } | 682 | } |
@@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count, | |||
727 | * re-enable the PMU in order to get hw_perf_enable to do the | 732 | * re-enable the PMU in order to get hw_perf_enable to do the |
728 | * actual work of reconfiguring the PMU. | 733 | * actual work of reconfiguring the PMU. |
729 | */ | 734 | */ |
730 | static int power_pmu_enable(struct perf_event *event) | 735 | static int power_pmu_add(struct perf_event *event, int ef_flags) |
731 | { | 736 | { |
732 | struct cpu_hw_events *cpuhw; | 737 | struct cpu_hw_events *cpuhw; |
733 | unsigned long flags; | 738 | unsigned long flags; |
@@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event) | |||
735 | int ret = -EAGAIN; | 740 | int ret = -EAGAIN; |
736 | 741 | ||
737 | local_irq_save(flags); | 742 | local_irq_save(flags); |
738 | perf_disable(); | 743 | perf_pmu_disable(event->pmu); |
739 | 744 | ||
740 | /* | 745 | /* |
741 | * Add the event to the list (if there is room) | 746 | * Add the event to the list (if there is room) |
@@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event) | |||
749 | cpuhw->events[n0] = event->hw.config; | 754 | cpuhw->events[n0] = event->hw.config; |
750 | cpuhw->flags[n0] = event->hw.event_base; | 755 | cpuhw->flags[n0] = event->hw.event_base; |
751 | 756 | ||
757 | if (!(ef_flags & PERF_EF_START)) | ||
758 | event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
759 | |||
752 | /* | 760 | /* |
753 | * If group events scheduling transaction was started, | 761 | * If group events scheduling transaction was started, |
754 | * skip the schedulability test here, it will be peformed | 762 | * skip the schedulability test here, it will be peformed |
@@ -769,7 +777,7 @@ nocheck: | |||
769 | 777 | ||
770 | ret = 0; | 778 | ret = 0; |
771 | out: | 779 | out: |
772 | perf_enable(); | 780 | perf_pmu_enable(event->pmu); |
773 | local_irq_restore(flags); | 781 | local_irq_restore(flags); |
774 | return ret; | 782 | return ret; |
775 | } | 783 | } |
@@ -777,14 +785,14 @@ nocheck: | |||
777 | /* | 785 | /* |
778 | * Remove a event from the PMU. | 786 | * Remove a event from the PMU. |
779 | */ | 787 | */ |
780 | static void power_pmu_disable(struct perf_event *event) | 788 | static void power_pmu_del(struct perf_event *event, int ef_flags) |
781 | { | 789 | { |
782 | struct cpu_hw_events *cpuhw; | 790 | struct cpu_hw_events *cpuhw; |
783 | long i; | 791 | long i; |
784 | unsigned long flags; | 792 | unsigned long flags; |
785 | 793 | ||
786 | local_irq_save(flags); | 794 | local_irq_save(flags); |
787 | perf_disable(); | 795 | perf_pmu_disable(event->pmu); |
788 | 796 | ||
789 | power_pmu_read(event); | 797 | power_pmu_read(event); |
790 | 798 | ||
@@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event) | |||
821 | cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); | 829 | cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); |
822 | } | 830 | } |
823 | 831 | ||
824 | perf_enable(); | 832 | perf_pmu_enable(event->pmu); |
825 | local_irq_restore(flags); | 833 | local_irq_restore(flags); |
826 | } | 834 | } |
827 | 835 | ||
828 | /* | 836 | /* |
829 | * Re-enable interrupts on a event after they were throttled | 837 | * POWER-PMU does not support disabling individual counters, hence |
830 | * because they were coming too fast. | 838 | * program their cycle counter to their max value and ignore the interrupts. |
831 | */ | 839 | */ |
832 | static void power_pmu_unthrottle(struct perf_event *event) | 840 | |
841 | static void power_pmu_start(struct perf_event *event, int ef_flags) | ||
842 | { | ||
843 | unsigned long flags; | ||
844 | s64 left; | ||
845 | |||
846 | if (!event->hw.idx || !event->hw.sample_period) | ||
847 | return; | ||
848 | |||
849 | if (!(event->hw.state & PERF_HES_STOPPED)) | ||
850 | return; | ||
851 | |||
852 | if (ef_flags & PERF_EF_RELOAD) | ||
853 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
854 | |||
855 | local_irq_save(flags); | ||
856 | perf_pmu_disable(event->pmu); | ||
857 | |||
858 | event->hw.state = 0; | ||
859 | left = local64_read(&event->hw.period_left); | ||
860 | write_pmc(event->hw.idx, left); | ||
861 | |||
862 | perf_event_update_userpage(event); | ||
863 | perf_pmu_enable(event->pmu); | ||
864 | local_irq_restore(flags); | ||
865 | } | ||
866 | |||
867 | static void power_pmu_stop(struct perf_event *event, int ef_flags) | ||
833 | { | 868 | { |
834 | s64 val, left; | ||
835 | unsigned long flags; | 869 | unsigned long flags; |
836 | 870 | ||
837 | if (!event->hw.idx || !event->hw.sample_period) | 871 | if (!event->hw.idx || !event->hw.sample_period) |
838 | return; | 872 | return; |
873 | |||
874 | if (event->hw.state & PERF_HES_STOPPED) | ||
875 | return; | ||
876 | |||
839 | local_irq_save(flags); | 877 | local_irq_save(flags); |
840 | perf_disable(); | 878 | perf_pmu_disable(event->pmu); |
879 | |||
841 | power_pmu_read(event); | 880 | power_pmu_read(event); |
842 | left = event->hw.sample_period; | 881 | event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; |
843 | event->hw.last_period = left; | 882 | write_pmc(event->hw.idx, 0); |
844 | val = 0; | 883 | |
845 | if (left < 0x80000000L) | ||
846 | val = 0x80000000L - left; | ||
847 | write_pmc(event->hw.idx, val); | ||
848 | local64_set(&event->hw.prev_count, val); | ||
849 | local64_set(&event->hw.period_left, left); | ||
850 | perf_event_update_userpage(event); | 884 | perf_event_update_userpage(event); |
851 | perf_enable(); | 885 | perf_pmu_enable(event->pmu); |
852 | local_irq_restore(flags); | 886 | local_irq_restore(flags); |
853 | } | 887 | } |
854 | 888 | ||
@@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event) | |||
857 | * Set the flag to make pmu::enable() not perform the | 891 | * Set the flag to make pmu::enable() not perform the |
858 | * schedulability test, it will be performed at commit time | 892 | * schedulability test, it will be performed at commit time |
859 | */ | 893 | */ |
860 | void power_pmu_start_txn(const struct pmu *pmu) | 894 | void power_pmu_start_txn(struct pmu *pmu) |
861 | { | 895 | { |
862 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | 896 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); |
863 | 897 | ||
898 | perf_pmu_disable(pmu); | ||
864 | cpuhw->group_flag |= PERF_EVENT_TXN; | 899 | cpuhw->group_flag |= PERF_EVENT_TXN; |
865 | cpuhw->n_txn_start = cpuhw->n_events; | 900 | cpuhw->n_txn_start = cpuhw->n_events; |
866 | } | 901 | } |
@@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu) | |||
870 | * Clear the flag and pmu::enable() will perform the | 905 | * Clear the flag and pmu::enable() will perform the |
871 | * schedulability test. | 906 | * schedulability test. |
872 | */ | 907 | */ |
873 | void power_pmu_cancel_txn(const struct pmu *pmu) | 908 | void power_pmu_cancel_txn(struct pmu *pmu) |
874 | { | 909 | { |
875 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | 910 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); |
876 | 911 | ||
877 | cpuhw->group_flag &= ~PERF_EVENT_TXN; | 912 | cpuhw->group_flag &= ~PERF_EVENT_TXN; |
913 | perf_pmu_enable(pmu); | ||
878 | } | 914 | } |
879 | 915 | ||
880 | /* | 916 | /* |
@@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu) | |||
882 | * Perform the group schedulability test as a whole | 918 | * Perform the group schedulability test as a whole |
883 | * Return 0 if success | 919 | * Return 0 if success |
884 | */ | 920 | */ |
885 | int power_pmu_commit_txn(const struct pmu *pmu) | 921 | int power_pmu_commit_txn(struct pmu *pmu) |
886 | { | 922 | { |
887 | struct cpu_hw_events *cpuhw; | 923 | struct cpu_hw_events *cpuhw; |
888 | long i, n; | 924 | long i, n; |
@@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu) | |||
901 | cpuhw->event[i]->hw.config = cpuhw->events[i]; | 937 | cpuhw->event[i]->hw.config = cpuhw->events[i]; |
902 | 938 | ||
903 | cpuhw->group_flag &= ~PERF_EVENT_TXN; | 939 | cpuhw->group_flag &= ~PERF_EVENT_TXN; |
940 | perf_pmu_enable(pmu); | ||
904 | return 0; | 941 | return 0; |
905 | } | 942 | } |
906 | 943 | ||
907 | struct pmu power_pmu = { | ||
908 | .enable = power_pmu_enable, | ||
909 | .disable = power_pmu_disable, | ||
910 | .read = power_pmu_read, | ||
911 | .unthrottle = power_pmu_unthrottle, | ||
912 | .start_txn = power_pmu_start_txn, | ||
913 | .cancel_txn = power_pmu_cancel_txn, | ||
914 | .commit_txn = power_pmu_commit_txn, | ||
915 | }; | ||
916 | |||
917 | /* | 944 | /* |
918 | * Return 1 if we might be able to put event on a limited PMC, | 945 | * Return 1 if we might be able to put event on a limited PMC, |
919 | * or 0 if not. | 946 | * or 0 if not. |
@@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) | |||
1014 | return 0; | 1041 | return 0; |
1015 | } | 1042 | } |
1016 | 1043 | ||
1017 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 1044 | static int power_pmu_event_init(struct perf_event *event) |
1018 | { | 1045 | { |
1019 | u64 ev; | 1046 | u64 ev; |
1020 | unsigned long flags; | 1047 | unsigned long flags; |
@@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1026 | struct cpu_hw_events *cpuhw; | 1053 | struct cpu_hw_events *cpuhw; |
1027 | 1054 | ||
1028 | if (!ppmu) | 1055 | if (!ppmu) |
1029 | return ERR_PTR(-ENXIO); | 1056 | return -ENOENT; |
1057 | |||
1030 | switch (event->attr.type) { | 1058 | switch (event->attr.type) { |
1031 | case PERF_TYPE_HARDWARE: | 1059 | case PERF_TYPE_HARDWARE: |
1032 | ev = event->attr.config; | 1060 | ev = event->attr.config; |
1033 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | 1061 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) |
1034 | return ERR_PTR(-EOPNOTSUPP); | 1062 | return -EOPNOTSUPP; |
1035 | ev = ppmu->generic_events[ev]; | 1063 | ev = ppmu->generic_events[ev]; |
1036 | break; | 1064 | break; |
1037 | case PERF_TYPE_HW_CACHE: | 1065 | case PERF_TYPE_HW_CACHE: |
1038 | err = hw_perf_cache_event(event->attr.config, &ev); | 1066 | err = hw_perf_cache_event(event->attr.config, &ev); |
1039 | if (err) | 1067 | if (err) |
1040 | return ERR_PTR(err); | 1068 | return err; |
1041 | break; | 1069 | break; |
1042 | case PERF_TYPE_RAW: | 1070 | case PERF_TYPE_RAW: |
1043 | ev = event->attr.config; | 1071 | ev = event->attr.config; |
1044 | break; | 1072 | break; |
1045 | default: | 1073 | default: |
1046 | return ERR_PTR(-EINVAL); | 1074 | return -ENOENT; |
1047 | } | 1075 | } |
1076 | |||
1048 | event->hw.config_base = ev; | 1077 | event->hw.config_base = ev; |
1049 | event->hw.idx = 0; | 1078 | event->hw.idx = 0; |
1050 | 1079 | ||
@@ -1063,7 +1092,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1063 | * XXX we should check if the task is an idle task. | 1092 | * XXX we should check if the task is an idle task. |
1064 | */ | 1093 | */ |
1065 | flags = 0; | 1094 | flags = 0; |
1066 | if (event->ctx->task) | 1095 | if (event->attach_state & PERF_ATTACH_TASK) |
1067 | flags |= PPMU_ONLY_COUNT_RUN; | 1096 | flags |= PPMU_ONLY_COUNT_RUN; |
1068 | 1097 | ||
1069 | /* | 1098 | /* |
@@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1081 | */ | 1110 | */ |
1082 | ev = normal_pmc_alternative(ev, flags); | 1111 | ev = normal_pmc_alternative(ev, flags); |
1083 | if (!ev) | 1112 | if (!ev) |
1084 | return ERR_PTR(-EINVAL); | 1113 | return -EINVAL; |
1085 | } | 1114 | } |
1086 | } | 1115 | } |
1087 | 1116 | ||
@@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1095 | n = collect_events(event->group_leader, ppmu->n_counter - 1, | 1124 | n = collect_events(event->group_leader, ppmu->n_counter - 1, |
1096 | ctrs, events, cflags); | 1125 | ctrs, events, cflags); |
1097 | if (n < 0) | 1126 | if (n < 0) |
1098 | return ERR_PTR(-EINVAL); | 1127 | return -EINVAL; |
1099 | } | 1128 | } |
1100 | events[n] = ev; | 1129 | events[n] = ev; |
1101 | ctrs[n] = event; | 1130 | ctrs[n] = event; |
1102 | cflags[n] = flags; | 1131 | cflags[n] = flags; |
1103 | if (check_excludes(ctrs, cflags, n, 1)) | 1132 | if (check_excludes(ctrs, cflags, n, 1)) |
1104 | return ERR_PTR(-EINVAL); | 1133 | return -EINVAL; |
1105 | 1134 | ||
1106 | cpuhw = &get_cpu_var(cpu_hw_events); | 1135 | cpuhw = &get_cpu_var(cpu_hw_events); |
1107 | err = power_check_constraints(cpuhw, events, cflags, n + 1); | 1136 | err = power_check_constraints(cpuhw, events, cflags, n + 1); |
1108 | put_cpu_var(cpu_hw_events); | 1137 | put_cpu_var(cpu_hw_events); |
1109 | if (err) | 1138 | if (err) |
1110 | return ERR_PTR(-EINVAL); | 1139 | return -EINVAL; |
1111 | 1140 | ||
1112 | event->hw.config = events[n]; | 1141 | event->hw.config = events[n]; |
1113 | event->hw.event_base = cflags[n]; | 1142 | event->hw.event_base = cflags[n]; |
@@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1132 | } | 1161 | } |
1133 | event->destroy = hw_perf_event_destroy; | 1162 | event->destroy = hw_perf_event_destroy; |
1134 | 1163 | ||
1135 | if (err) | 1164 | return err; |
1136 | return ERR_PTR(err); | ||
1137 | return &power_pmu; | ||
1138 | } | 1165 | } |
1139 | 1166 | ||
1167 | struct pmu power_pmu = { | ||
1168 | .pmu_enable = power_pmu_enable, | ||
1169 | .pmu_disable = power_pmu_disable, | ||
1170 | .event_init = power_pmu_event_init, | ||
1171 | .add = power_pmu_add, | ||
1172 | .del = power_pmu_del, | ||
1173 | .start = power_pmu_start, | ||
1174 | .stop = power_pmu_stop, | ||
1175 | .read = power_pmu_read, | ||
1176 | .start_txn = power_pmu_start_txn, | ||
1177 | .cancel_txn = power_pmu_cancel_txn, | ||
1178 | .commit_txn = power_pmu_commit_txn, | ||
1179 | }; | ||
1180 | |||
1140 | /* | 1181 | /* |
1141 | * A counter has overflowed; update its count and record | 1182 | * A counter has overflowed; update its count and record |
1142 | * things if requested. Note that interrupts are hard-disabled | 1183 | * things if requested. Note that interrupts are hard-disabled |
@@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, | |||
1149 | s64 prev, delta, left; | 1190 | s64 prev, delta, left; |
1150 | int record = 0; | 1191 | int record = 0; |
1151 | 1192 | ||
1193 | if (event->hw.state & PERF_HES_STOPPED) { | ||
1194 | write_pmc(event->hw.idx, 0); | ||
1195 | return; | ||
1196 | } | ||
1197 | |||
1152 | /* we don't have to worry about interrupts here */ | 1198 | /* we don't have to worry about interrupts here */ |
1153 | prev = local64_read(&event->hw.prev_count); | 1199 | prev = local64_read(&event->hw.prev_count); |
1154 | delta = (val - prev) & 0xfffffffful; | 1200 | delta = (val - prev) & 0xfffffffful; |
@@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, | |||
1171 | val = 0x80000000LL - left; | 1217 | val = 0x80000000LL - left; |
1172 | } | 1218 | } |
1173 | 1219 | ||
1220 | write_pmc(event->hw.idx, val); | ||
1221 | local64_set(&event->hw.prev_count, val); | ||
1222 | local64_set(&event->hw.period_left, left); | ||
1223 | perf_event_update_userpage(event); | ||
1224 | |||
1174 | /* | 1225 | /* |
1175 | * Finally record data if requested. | 1226 | * Finally record data if requested. |
1176 | */ | 1227 | */ |
@@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, | |||
1183 | if (event->attr.sample_type & PERF_SAMPLE_ADDR) | 1234 | if (event->attr.sample_type & PERF_SAMPLE_ADDR) |
1184 | perf_get_data_addr(regs, &data.addr); | 1235 | perf_get_data_addr(regs, &data.addr); |
1185 | 1236 | ||
1186 | if (perf_event_overflow(event, nmi, &data, regs)) { | 1237 | if (perf_event_overflow(event, nmi, &data, regs)) |
1187 | /* | 1238 | power_pmu_stop(event, 0); |
1188 | * Interrupts are coming too fast - throttle them | ||
1189 | * by setting the event to 0, so it will be | ||
1190 | * at least 2^30 cycles until the next interrupt | ||
1191 | * (assuming each event counts at most 2 counts | ||
1192 | * per cycle). | ||
1193 | */ | ||
1194 | val = 0; | ||
1195 | left = ~0ULL >> 1; | ||
1196 | } | ||
1197 | } | 1239 | } |
1198 | |||
1199 | write_pmc(event->hw.idx, val); | ||
1200 | local64_set(&event->hw.prev_count, val); | ||
1201 | local64_set(&event->hw.period_left, left); | ||
1202 | perf_event_update_userpage(event); | ||
1203 | } | 1240 | } |
1204 | 1241 | ||
1205 | /* | 1242 | /* |
@@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu) | |||
1342 | freeze_events_kernel = MMCR0_FCHV; | 1379 | freeze_events_kernel = MMCR0_FCHV; |
1343 | #endif /* CONFIG_PPC64 */ | 1380 | #endif /* CONFIG_PPC64 */ |
1344 | 1381 | ||
1382 | perf_pmu_register(&power_pmu); | ||
1345 | perf_cpu_notifier(power_pmu_notifier); | 1383 | perf_cpu_notifier(power_pmu_notifier); |
1346 | 1384 | ||
1347 | return 0; | 1385 | return 0; |
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 1ba45471ae43..7ecca59ddf77 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c | |||
@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event) | |||
156 | { | 156 | { |
157 | s64 val, delta, prev; | 157 | s64 val, delta, prev; |
158 | 158 | ||
159 | if (event->hw.state & PERF_HES_STOPPED) | ||
160 | return; | ||
161 | |||
159 | /* | 162 | /* |
160 | * Performance monitor interrupts come even when interrupts | 163 | * Performance monitor interrupts come even when interrupts |
161 | * are soft-disabled, as long as interrupts are hard-enabled. | 164 | * are soft-disabled, as long as interrupts are hard-enabled. |
@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) | |||
177 | * Disable all events to prevent PMU interrupts and to allow | 180 | * Disable all events to prevent PMU interrupts and to allow |
178 | * events to be added or removed. | 181 | * events to be added or removed. |
179 | */ | 182 | */ |
180 | void hw_perf_disable(void) | 183 | static void fsl_emb_pmu_disable(struct pmu *pmu) |
181 | { | 184 | { |
182 | struct cpu_hw_events *cpuhw; | 185 | struct cpu_hw_events *cpuhw; |
183 | unsigned long flags; | 186 | unsigned long flags; |
@@ -216,7 +219,7 @@ void hw_perf_disable(void) | |||
216 | * If we were previously disabled and events were added, then | 219 | * If we were previously disabled and events were added, then |
217 | * put the new config on the PMU. | 220 | * put the new config on the PMU. |
218 | */ | 221 | */ |
219 | void hw_perf_enable(void) | 222 | static void fsl_emb_pmu_enable(struct pmu *pmu) |
220 | { | 223 | { |
221 | struct cpu_hw_events *cpuhw; | 224 | struct cpu_hw_events *cpuhw; |
222 | unsigned long flags; | 225 | unsigned long flags; |
@@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count, | |||
262 | return n; | 265 | return n; |
263 | } | 266 | } |
264 | 267 | ||
265 | /* perf must be disabled, context locked on entry */ | 268 | /* context locked on entry */ |
266 | static int fsl_emb_pmu_enable(struct perf_event *event) | 269 | static int fsl_emb_pmu_add(struct perf_event *event, int flags) |
267 | { | 270 | { |
268 | struct cpu_hw_events *cpuhw; | 271 | struct cpu_hw_events *cpuhw; |
269 | int ret = -EAGAIN; | 272 | int ret = -EAGAIN; |
@@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) | |||
271 | u64 val; | 274 | u64 val; |
272 | int i; | 275 | int i; |
273 | 276 | ||
277 | perf_pmu_disable(event->pmu); | ||
274 | cpuhw = &get_cpu_var(cpu_hw_events); | 278 | cpuhw = &get_cpu_var(cpu_hw_events); |
275 | 279 | ||
276 | if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) | 280 | if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) |
@@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event) | |||
301 | val = 0x80000000L - left; | 305 | val = 0x80000000L - left; |
302 | } | 306 | } |
303 | local64_set(&event->hw.prev_count, val); | 307 | local64_set(&event->hw.prev_count, val); |
308 | |||
309 | if (!(flags & PERF_EF_START)) { | ||
310 | event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
311 | val = 0; | ||
312 | } | ||
313 | |||
304 | write_pmc(i, val); | 314 | write_pmc(i, val); |
305 | perf_event_update_userpage(event); | 315 | perf_event_update_userpage(event); |
306 | 316 | ||
@@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event) | |||
310 | ret = 0; | 320 | ret = 0; |
311 | out: | 321 | out: |
312 | put_cpu_var(cpu_hw_events); | 322 | put_cpu_var(cpu_hw_events); |
323 | perf_pmu_enable(event->pmu); | ||
313 | return ret; | 324 | return ret; |
314 | } | 325 | } |
315 | 326 | ||
316 | /* perf must be disabled, context locked on entry */ | 327 | /* context locked on entry */ |
317 | static void fsl_emb_pmu_disable(struct perf_event *event) | 328 | static void fsl_emb_pmu_del(struct perf_event *event, int flags) |
318 | { | 329 | { |
319 | struct cpu_hw_events *cpuhw; | 330 | struct cpu_hw_events *cpuhw; |
320 | int i = event->hw.idx; | 331 | int i = event->hw.idx; |
321 | 332 | ||
333 | perf_pmu_disable(event->pmu); | ||
322 | if (i < 0) | 334 | if (i < 0) |
323 | goto out; | 335 | goto out; |
324 | 336 | ||
@@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event) | |||
346 | cpuhw->n_events--; | 358 | cpuhw->n_events--; |
347 | 359 | ||
348 | out: | 360 | out: |
361 | perf_pmu_enable(event->pmu); | ||
349 | put_cpu_var(cpu_hw_events); | 362 | put_cpu_var(cpu_hw_events); |
350 | } | 363 | } |
351 | 364 | ||
352 | /* | 365 | static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) |
353 | * Re-enable interrupts on a event after they were throttled | ||
354 | * because they were coming too fast. | ||
355 | * | ||
356 | * Context is locked on entry, but perf is not disabled. | ||
357 | */ | ||
358 | static void fsl_emb_pmu_unthrottle(struct perf_event *event) | ||
359 | { | 366 | { |
360 | s64 val, left; | ||
361 | unsigned long flags; | 367 | unsigned long flags; |
368 | s64 left; | ||
362 | 369 | ||
363 | if (event->hw.idx < 0 || !event->hw.sample_period) | 370 | if (event->hw.idx < 0 || !event->hw.sample_period) |
364 | return; | 371 | return; |
372 | |||
373 | if (!(event->hw.state & PERF_HES_STOPPED)) | ||
374 | return; | ||
375 | |||
376 | if (ef_flags & PERF_EF_RELOAD) | ||
377 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
378 | |||
365 | local_irq_save(flags); | 379 | local_irq_save(flags); |
366 | perf_disable(); | 380 | perf_pmu_disable(event->pmu); |
367 | fsl_emb_pmu_read(event); | 381 | |
368 | left = event->hw.sample_period; | 382 | event->hw.state = 0; |
369 | event->hw.last_period = left; | 383 | left = local64_read(&event->hw.period_left); |
370 | val = 0; | 384 | write_pmc(event->hw.idx, left); |
371 | if (left < 0x80000000L) | 385 | |
372 | val = 0x80000000L - left; | ||
373 | write_pmc(event->hw.idx, val); | ||
374 | local64_set(&event->hw.prev_count, val); | ||
375 | local64_set(&event->hw.period_left, left); | ||
376 | perf_event_update_userpage(event); | 386 | perf_event_update_userpage(event); |
377 | perf_enable(); | 387 | perf_pmu_enable(event->pmu); |
378 | local_irq_restore(flags); | 388 | local_irq_restore(flags); |
379 | } | 389 | } |
380 | 390 | ||
381 | static struct pmu fsl_emb_pmu = { | 391 | static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags) |
382 | .enable = fsl_emb_pmu_enable, | 392 | { |
383 | .disable = fsl_emb_pmu_disable, | 393 | unsigned long flags; |
384 | .read = fsl_emb_pmu_read, | 394 | |
385 | .unthrottle = fsl_emb_pmu_unthrottle, | 395 | if (event->hw.idx < 0 || !event->hw.sample_period) |
386 | }; | 396 | return; |
397 | |||
398 | if (event->hw.state & PERF_HES_STOPPED) | ||
399 | return; | ||
400 | |||
401 | local_irq_save(flags); | ||
402 | perf_pmu_disable(event->pmu); | ||
403 | |||
404 | fsl_emb_pmu_read(event); | ||
405 | event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
406 | write_pmc(event->hw.idx, 0); | ||
407 | |||
408 | perf_event_update_userpage(event); | ||
409 | perf_pmu_enable(event->pmu); | ||
410 | local_irq_restore(flags); | ||
411 | } | ||
387 | 412 | ||
388 | /* | 413 | /* |
389 | * Release the PMU if this is the last perf_event. | 414 | * Release the PMU if this is the last perf_event. |
@@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) | |||
428 | return 0; | 453 | return 0; |
429 | } | 454 | } |
430 | 455 | ||
431 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 456 | static int fsl_emb_pmu_event_init(struct perf_event *event) |
432 | { | 457 | { |
433 | u64 ev; | 458 | u64 ev; |
434 | struct perf_event *events[MAX_HWEVENTS]; | 459 | struct perf_event *events[MAX_HWEVENTS]; |
@@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
441 | case PERF_TYPE_HARDWARE: | 466 | case PERF_TYPE_HARDWARE: |
442 | ev = event->attr.config; | 467 | ev = event->attr.config; |
443 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | 468 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) |
444 | return ERR_PTR(-EOPNOTSUPP); | 469 | return -EOPNOTSUPP; |
445 | ev = ppmu->generic_events[ev]; | 470 | ev = ppmu->generic_events[ev]; |
446 | break; | 471 | break; |
447 | 472 | ||
448 | case PERF_TYPE_HW_CACHE: | 473 | case PERF_TYPE_HW_CACHE: |
449 | err = hw_perf_cache_event(event->attr.config, &ev); | 474 | err = hw_perf_cache_event(event->attr.config, &ev); |
450 | if (err) | 475 | if (err) |
451 | return ERR_PTR(err); | 476 | return err; |
452 | break; | 477 | break; |
453 | 478 | ||
454 | case PERF_TYPE_RAW: | 479 | case PERF_TYPE_RAW: |
@@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
456 | break; | 481 | break; |
457 | 482 | ||
458 | default: | 483 | default: |
459 | return ERR_PTR(-EINVAL); | 484 | return -ENOENT; |
460 | } | 485 | } |
461 | 486 | ||
462 | event->hw.config = ppmu->xlate_event(ev); | 487 | event->hw.config = ppmu->xlate_event(ev); |
463 | if (!(event->hw.config & FSL_EMB_EVENT_VALID)) | 488 | if (!(event->hw.config & FSL_EMB_EVENT_VALID)) |
464 | return ERR_PTR(-EINVAL); | 489 | return -EINVAL; |
465 | 490 | ||
466 | /* | 491 | /* |
467 | * If this is in a group, check if it can go on with all the | 492 | * If this is in a group, check if it can go on with all the |
@@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
473 | n = collect_events(event->group_leader, | 498 | n = collect_events(event->group_leader, |
474 | ppmu->n_counter - 1, events); | 499 | ppmu->n_counter - 1, events); |
475 | if (n < 0) | 500 | if (n < 0) |
476 | return ERR_PTR(-EINVAL); | 501 | return -EINVAL; |
477 | } | 502 | } |
478 | 503 | ||
479 | if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { | 504 | if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { |
@@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
484 | } | 509 | } |
485 | 510 | ||
486 | if (num_restricted >= ppmu->n_restricted) | 511 | if (num_restricted >= ppmu->n_restricted) |
487 | return ERR_PTR(-EINVAL); | 512 | return -EINVAL; |
488 | } | 513 | } |
489 | 514 | ||
490 | event->hw.idx = -1; | 515 | event->hw.idx = -1; |
@@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
497 | if (event->attr.exclude_kernel) | 522 | if (event->attr.exclude_kernel) |
498 | event->hw.config_base |= PMLCA_FCS; | 523 | event->hw.config_base |= PMLCA_FCS; |
499 | if (event->attr.exclude_idle) | 524 | if (event->attr.exclude_idle) |
500 | return ERR_PTR(-ENOTSUPP); | 525 | return -ENOTSUPP; |
501 | 526 | ||
502 | event->hw.last_period = event->hw.sample_period; | 527 | event->hw.last_period = event->hw.sample_period; |
503 | local64_set(&event->hw.period_left, event->hw.last_period); | 528 | local64_set(&event->hw.period_left, event->hw.last_period); |
@@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
523 | } | 548 | } |
524 | event->destroy = hw_perf_event_destroy; | 549 | event->destroy = hw_perf_event_destroy; |
525 | 550 | ||
526 | if (err) | 551 | return err; |
527 | return ERR_PTR(err); | ||
528 | return &fsl_emb_pmu; | ||
529 | } | 552 | } |
530 | 553 | ||
554 | static struct pmu fsl_emb_pmu = { | ||
555 | .pmu_enable = fsl_emb_pmu_enable, | ||
556 | .pmu_disable = fsl_emb_pmu_disable, | ||
557 | .event_init = fsl_emb_pmu_event_init, | ||
558 | .add = fsl_emb_pmu_add, | ||
559 | .del = fsl_emb_pmu_del, | ||
560 | .start = fsl_emb_pmu_start, | ||
561 | .stop = fsl_emb_pmu_stop, | ||
562 | .read = fsl_emb_pmu_read, | ||
563 | }; | ||
564 | |||
531 | /* | 565 | /* |
532 | * A counter has overflowed; update its count and record | 566 | * A counter has overflowed; update its count and record |
533 | * things if requested. Note that interrupts are hard-disabled | 567 | * things if requested. Note that interrupts are hard-disabled |
@@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, | |||
540 | s64 prev, delta, left; | 574 | s64 prev, delta, left; |
541 | int record = 0; | 575 | int record = 0; |
542 | 576 | ||
577 | if (event->hw.state & PERF_HES_STOPPED) { | ||
578 | write_pmc(event->hw.idx, 0); | ||
579 | return; | ||
580 | } | ||
581 | |||
543 | /* we don't have to worry about interrupts here */ | 582 | /* we don't have to worry about interrupts here */ |
544 | prev = local64_read(&event->hw.prev_count); | 583 | prev = local64_read(&event->hw.prev_count); |
545 | delta = (val - prev) & 0xfffffffful; | 584 | delta = (val - prev) & 0xfffffffful; |
@@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, | |||
562 | val = 0x80000000LL - left; | 601 | val = 0x80000000LL - left; |
563 | } | 602 | } |
564 | 603 | ||
604 | write_pmc(event->hw.idx, val); | ||
605 | local64_set(&event->hw.prev_count, val); | ||
606 | local64_set(&event->hw.period_left, left); | ||
607 | perf_event_update_userpage(event); | ||
608 | |||
565 | /* | 609 | /* |
566 | * Finally record data if requested. | 610 | * Finally record data if requested. |
567 | */ | 611 | */ |
@@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, | |||
571 | perf_sample_data_init(&data, 0); | 615 | perf_sample_data_init(&data, 0); |
572 | data.period = event->hw.last_period; | 616 | data.period = event->hw.last_period; |
573 | 617 | ||
574 | if (perf_event_overflow(event, nmi, &data, regs)) { | 618 | if (perf_event_overflow(event, nmi, &data, regs)) |
575 | /* | 619 | fsl_emb_pmu_stop(event, 0); |
576 | * Interrupts are coming too fast - throttle them | ||
577 | * by setting the event to 0, so it will be | ||
578 | * at least 2^30 cycles until the next interrupt | ||
579 | * (assuming each event counts at most 2 counts | ||
580 | * per cycle). | ||
581 | */ | ||
582 | val = 0; | ||
583 | left = ~0ULL >> 1; | ||
584 | } | ||
585 | } | 620 | } |
586 | |||
587 | write_pmc(event->hw.idx, val); | ||
588 | local64_set(&event->hw.prev_count, val); | ||
589 | local64_set(&event->hw.period_left, left); | ||
590 | perf_event_update_userpage(event); | ||
591 | } | 621 | } |
592 | 622 | ||
593 | static void perf_event_interrupt(struct pt_regs *regs) | 623 | static void perf_event_interrupt(struct pt_regs *regs) |
@@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) | |||
651 | pr_info("%s performance monitor hardware support registered\n", | 681 | pr_info("%s performance monitor hardware support registered\n", |
652 | pmu->name); | 682 | pmu->name); |
653 | 683 | ||
684 | perf_pmu_register(&fsl_emb_pmu); | ||
685 | |||
654 | return 0; | 686 | return 0; |
655 | } | 687 | } |
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 8533b3b83f5d..54888eb10c3b 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c | |||
@@ -53,7 +53,7 @@ | |||
53 | #include <linux/posix-timers.h> | 53 | #include <linux/posix-timers.h> |
54 | #include <linux/irq.h> | 54 | #include <linux/irq.h> |
55 | #include <linux/delay.h> | 55 | #include <linux/delay.h> |
56 | #include <linux/perf_event.h> | 56 | #include <linux/irq_work.h> |
57 | #include <asm/trace.h> | 57 | #include <asm/trace.h> |
58 | 58 | ||
59 | #include <asm/io.h> | 59 | #include <asm/io.h> |
@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void) | |||
493 | } | 493 | } |
494 | #endif /* CONFIG_PPC_ISERIES */ | 494 | #endif /* CONFIG_PPC_ISERIES */ |
495 | 495 | ||
496 | #ifdef CONFIG_PERF_EVENTS | 496 | #ifdef CONFIG_IRQ_WORK |
497 | 497 | ||
498 | /* | 498 | /* |
499 | * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... | 499 | * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... |
500 | */ | 500 | */ |
501 | #ifdef CONFIG_PPC64 | 501 | #ifdef CONFIG_PPC64 |
502 | static inline unsigned long test_perf_event_pending(void) | 502 | static inline unsigned long test_irq_work_pending(void) |
503 | { | 503 | { |
504 | unsigned long x; | 504 | unsigned long x; |
505 | 505 | ||
506 | asm volatile("lbz %0,%1(13)" | 506 | asm volatile("lbz %0,%1(13)" |
507 | : "=r" (x) | 507 | : "=r" (x) |
508 | : "i" (offsetof(struct paca_struct, perf_event_pending))); | 508 | : "i" (offsetof(struct paca_struct, irq_work_pending))); |
509 | return x; | 509 | return x; |
510 | } | 510 | } |
511 | 511 | ||
512 | static inline void set_perf_event_pending_flag(void) | 512 | static inline void set_irq_work_pending_flag(void) |
513 | { | 513 | { |
514 | asm volatile("stb %0,%1(13)" : : | 514 | asm volatile("stb %0,%1(13)" : : |
515 | "r" (1), | 515 | "r" (1), |
516 | "i" (offsetof(struct paca_struct, perf_event_pending))); | 516 | "i" (offsetof(struct paca_struct, irq_work_pending))); |
517 | } | 517 | } |
518 | 518 | ||
519 | static inline void clear_perf_event_pending(void) | 519 | static inline void clear_irq_work_pending(void) |
520 | { | 520 | { |
521 | asm volatile("stb %0,%1(13)" : : | 521 | asm volatile("stb %0,%1(13)" : : |
522 | "r" (0), | 522 | "r" (0), |
523 | "i" (offsetof(struct paca_struct, perf_event_pending))); | 523 | "i" (offsetof(struct paca_struct, irq_work_pending))); |
524 | } | 524 | } |
525 | 525 | ||
526 | #else /* 32-bit */ | 526 | #else /* 32-bit */ |
527 | 527 | ||
528 | DEFINE_PER_CPU(u8, perf_event_pending); | 528 | DEFINE_PER_CPU(u8, irq_work_pending); |
529 | 529 | ||
530 | #define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 | 530 | #define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 |
531 | #define test_perf_event_pending() __get_cpu_var(perf_event_pending) | 531 | #define test_irq_work_pending() __get_cpu_var(irq_work_pending) |
532 | #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 | 532 | #define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 |
533 | 533 | ||
534 | #endif /* 32 vs 64 bit */ | 534 | #endif /* 32 vs 64 bit */ |
535 | 535 | ||
536 | void set_perf_event_pending(void) | 536 | void set_irq_work_pending(void) |
537 | { | 537 | { |
538 | preempt_disable(); | 538 | preempt_disable(); |
539 | set_perf_event_pending_flag(); | 539 | set_irq_work_pending_flag(); |
540 | set_dec(1); | 540 | set_dec(1); |
541 | preempt_enable(); | 541 | preempt_enable(); |
542 | } | 542 | } |
543 | 543 | ||
544 | #else /* CONFIG_PERF_EVENTS */ | 544 | #else /* CONFIG_IRQ_WORK */ |
545 | 545 | ||
546 | #define test_perf_event_pending() 0 | 546 | #define test_irq_work_pending() 0 |
547 | #define clear_perf_event_pending() | 547 | #define clear_irq_work_pending() |
548 | 548 | ||
549 | #endif /* CONFIG_PERF_EVENTS */ | 549 | #endif /* CONFIG_IRQ_WORK */ |
550 | 550 | ||
551 | /* | 551 | /* |
552 | * For iSeries shared processors, we have to let the hypervisor | 552 | * For iSeries shared processors, we have to let the hypervisor |
@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs) | |||
587 | 587 | ||
588 | calculate_steal_time(); | 588 | calculate_steal_time(); |
589 | 589 | ||
590 | if (test_perf_event_pending()) { | 590 | if (test_irq_work_pending()) { |
591 | clear_perf_event_pending(); | 591 | clear_irq_work_pending(); |
592 | perf_event_do_pending(); | 592 | irq_work_run(); |
593 | } | 593 | } |
594 | 594 | ||
595 | #ifdef CONFIG_PPC_ISERIES | 595 | #ifdef CONFIG_PPC_ISERIES |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 74a2f1b607a4..75976a141947 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -95,6 +95,7 @@ config S390 | |||
95 | select HAVE_KVM if 64BIT | 95 | select HAVE_KVM if 64BIT |
96 | select HAVE_ARCH_TRACEHOOK | 96 | select HAVE_ARCH_TRACEHOOK |
97 | select INIT_ALL_POSSIBLE | 97 | select INIT_ALL_POSSIBLE |
98 | select HAVE_IRQ_WORK | ||
98 | select HAVE_PERF_EVENTS | 99 | select HAVE_PERF_EVENTS |
99 | select HAVE_KERNEL_GZIP | 100 | select HAVE_KERNEL_GZIP |
100 | select HAVE_KERNEL_BZIP2 | 101 | select HAVE_KERNEL_BZIP2 |
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 498bc3892385..881d94590aeb 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h | |||
@@ -12,10 +12,6 @@ | |||
12 | #ifndef __ASM_HARDIRQ_H | 12 | #ifndef __ASM_HARDIRQ_H |
13 | #define __ASM_HARDIRQ_H | 13 | #define __ASM_HARDIRQ_H |
14 | 14 | ||
15 | #include <linux/threads.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/cache.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include <asm/lowcore.h> | 15 | #include <asm/lowcore.h> |
20 | 16 | ||
21 | #define local_softirq_pending() (S390_lowcore.softirq_pending) | 17 | #define local_softirq_pending() (S390_lowcore.softirq_pending) |
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 3840cbe77637..a75f168d2718 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h | |||
@@ -4,7 +4,6 @@ | |||
4 | * Copyright 2009 Martin Schwidefsky, IBM Corporation. | 4 | * Copyright 2009 Martin Schwidefsky, IBM Corporation. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | static inline void set_perf_event_pending(void) {} | 7 | /* Empty, just to avoid compiling error */ |
8 | static inline void clear_perf_event_pending(void) {} | ||
9 | 8 | ||
10 | #define PERF_EVENT_INDEX_OFFSET 0 | 9 | #define PERF_EVENT_INDEX_OFFSET 0 |
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 33990fa95af0..35b6879628a0 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
@@ -16,6 +16,7 @@ config SUPERH | |||
16 | select HAVE_ARCH_TRACEHOOK | 16 | select HAVE_ARCH_TRACEHOOK |
17 | select HAVE_DMA_API_DEBUG | 17 | select HAVE_DMA_API_DEBUG |
18 | select HAVE_DMA_ATTRS | 18 | select HAVE_DMA_ATTRS |
19 | select HAVE_IRQ_WORK | ||
19 | select HAVE_PERF_EVENTS | 20 | select HAVE_PERF_EVENTS |
20 | select PERF_USE_VMALLOC | 21 | select PERF_USE_VMALLOC |
21 | select HAVE_KERNEL_GZIP | 22 | select HAVE_KERNEL_GZIP |
@@ -249,6 +250,11 @@ config ARCH_SHMOBILE | |||
249 | select PM | 250 | select PM |
250 | select PM_RUNTIME | 251 | select PM_RUNTIME |
251 | 252 | ||
253 | config CPU_HAS_PMU | ||
254 | depends on CPU_SH4 || CPU_SH4A | ||
255 | default y | ||
256 | bool | ||
257 | |||
252 | if SUPERH32 | 258 | if SUPERH32 |
253 | 259 | ||
254 | choice | 260 | choice |
@@ -738,6 +744,14 @@ config GUSA_RB | |||
738 | LLSC, this should be more efficient than the other alternative of | 744 | LLSC, this should be more efficient than the other alternative of |
739 | disabling interrupts around the atomic sequence. | 745 | disabling interrupts around the atomic sequence. |
740 | 746 | ||
747 | config HW_PERF_EVENTS | ||
748 | bool "Enable hardware performance counter support for perf events" | ||
749 | depends on PERF_EVENTS && CPU_HAS_PMU | ||
750 | default y | ||
751 | help | ||
752 | Enable hardware performance counter support for perf events. If | ||
753 | disabled, perf events will use software events only. | ||
754 | |||
741 | source "drivers/sh/Kconfig" | 755 | source "drivers/sh/Kconfig" |
742 | 756 | ||
743 | endmenu | 757 | endmenu |
diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h index 3d0c9f36d150..14308bed7ea5 100644 --- a/arch/sh/include/asm/perf_event.h +++ b/arch/sh/include/asm/perf_event.h | |||
@@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *); | |||
26 | extern int reserve_pmc_hardware(void); | 26 | extern int reserve_pmc_hardware(void); |
27 | extern void release_pmc_hardware(void); | 27 | extern void release_pmc_hardware(void); |
28 | 28 | ||
29 | static inline void set_perf_event_pending(void) | ||
30 | { | ||
31 | /* Nothing to see here, move along. */ | ||
32 | } | ||
33 | |||
34 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
35 | |||
36 | #endif /* __ASM_SH_PERF_EVENT_H */ | 29 | #endif /* __ASM_SH_PERF_EVENT_H */ |
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index a9dd3abde28e..d5ca1ef50fa9 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c | |||
@@ -14,11 +14,6 @@ | |||
14 | #include <asm/unwinder.h> | 14 | #include <asm/unwinder.h> |
15 | #include <asm/ptrace.h> | 15 | #include <asm/ptrace.h> |
16 | 16 | ||
17 | static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) | ||
18 | { | ||
19 | if (entry->nr < PERF_MAX_STACK_DEPTH) | ||
20 | entry->ip[entry->nr++] = ip; | ||
21 | } | ||
22 | 17 | ||
23 | static void callchain_warning(void *data, char *msg) | 18 | static void callchain_warning(void *data, char *msg) |
24 | { | 19 | { |
@@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable) | |||
39 | struct perf_callchain_entry *entry = data; | 34 | struct perf_callchain_entry *entry = data; |
40 | 35 | ||
41 | if (reliable) | 36 | if (reliable) |
42 | callchain_store(entry, addr); | 37 | perf_callchain_store(entry, addr); |
43 | } | 38 | } |
44 | 39 | ||
45 | static const struct stacktrace_ops callchain_ops = { | 40 | static const struct stacktrace_ops callchain_ops = { |
@@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = { | |||
49 | .address = callchain_address, | 44 | .address = callchain_address, |
50 | }; | 45 | }; |
51 | 46 | ||
52 | static void | 47 | void |
53 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 48 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) |
54 | { | 49 | { |
55 | callchain_store(entry, PERF_CONTEXT_KERNEL); | 50 | perf_callchain_store(entry, regs->pc); |
56 | callchain_store(entry, regs->pc); | ||
57 | 51 | ||
58 | unwind_stack(NULL, regs, NULL, &callchain_ops, entry); | 52 | unwind_stack(NULL, regs, NULL, &callchain_ops, entry); |
59 | } | 53 | } |
60 | |||
61 | static void | ||
62 | perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
63 | { | ||
64 | int is_user; | ||
65 | |||
66 | if (!regs) | ||
67 | return; | ||
68 | |||
69 | is_user = user_mode(regs); | ||
70 | |||
71 | if (is_user && current->state != TASK_RUNNING) | ||
72 | return; | ||
73 | |||
74 | /* | ||
75 | * Only the kernel side is implemented for now. | ||
76 | */ | ||
77 | if (!is_user) | ||
78 | perf_callchain_kernel(regs, entry); | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * No need for separate IRQ and NMI entries. | ||
83 | */ | ||
84 | static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); | ||
85 | |||
86 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
87 | { | ||
88 | struct perf_callchain_entry *entry = &__get_cpu_var(callchain); | ||
89 | |||
90 | entry->nr = 0; | ||
91 | |||
92 | perf_do_callchain(regs, entry); | ||
93 | |||
94 | return entry; | ||
95 | } | ||
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 7a3dc3567258..5a4b33435650 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c | |||
@@ -59,6 +59,24 @@ static inline int sh_pmu_initialized(void) | |||
59 | return !!sh_pmu; | 59 | return !!sh_pmu; |
60 | } | 60 | } |
61 | 61 | ||
62 | const char *perf_pmu_name(void) | ||
63 | { | ||
64 | if (!sh_pmu) | ||
65 | return NULL; | ||
66 | |||
67 | return sh_pmu->name; | ||
68 | } | ||
69 | EXPORT_SYMBOL_GPL(perf_pmu_name); | ||
70 | |||
71 | int perf_num_counters(void) | ||
72 | { | ||
73 | if (!sh_pmu) | ||
74 | return 0; | ||
75 | |||
76 | return sh_pmu->num_events; | ||
77 | } | ||
78 | EXPORT_SYMBOL_GPL(perf_num_counters); | ||
79 | |||
62 | /* | 80 | /* |
63 | * Release the PMU if this is the last perf_event. | 81 | * Release the PMU if this is the last perf_event. |
64 | */ | 82 | */ |
@@ -206,50 +224,80 @@ again: | |||
206 | local64_add(delta, &event->count); | 224 | local64_add(delta, &event->count); |
207 | } | 225 | } |
208 | 226 | ||
209 | static void sh_pmu_disable(struct perf_event *event) | 227 | static void sh_pmu_stop(struct perf_event *event, int flags) |
210 | { | 228 | { |
211 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 229 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
212 | struct hw_perf_event *hwc = &event->hw; | 230 | struct hw_perf_event *hwc = &event->hw; |
213 | int idx = hwc->idx; | 231 | int idx = hwc->idx; |
214 | 232 | ||
215 | clear_bit(idx, cpuc->active_mask); | 233 | if (!(event->hw.state & PERF_HES_STOPPED)) { |
216 | sh_pmu->disable(hwc, idx); | 234 | sh_pmu->disable(hwc, idx); |
235 | cpuc->events[idx] = NULL; | ||
236 | event->hw.state |= PERF_HES_STOPPED; | ||
237 | } | ||
238 | |||
239 | if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { | ||
240 | sh_perf_event_update(event, &event->hw, idx); | ||
241 | event->hw.state |= PERF_HES_UPTODATE; | ||
242 | } | ||
243 | } | ||
244 | |||
245 | static void sh_pmu_start(struct perf_event *event, int flags) | ||
246 | { | ||
247 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
248 | struct hw_perf_event *hwc = &event->hw; | ||
249 | int idx = hwc->idx; | ||
250 | |||
251 | if (WARN_ON_ONCE(idx == -1)) | ||
252 | return; | ||
253 | |||
254 | if (flags & PERF_EF_RELOAD) | ||
255 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
217 | 256 | ||
218 | barrier(); | 257 | cpuc->events[idx] = event; |
258 | event->hw.state = 0; | ||
259 | sh_pmu->enable(hwc, idx); | ||
260 | } | ||
219 | 261 | ||
220 | sh_perf_event_update(event, &event->hw, idx); | 262 | static void sh_pmu_del(struct perf_event *event, int flags) |
263 | { | ||
264 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
221 | 265 | ||
222 | cpuc->events[idx] = NULL; | 266 | sh_pmu_stop(event, PERF_EF_UPDATE); |
223 | clear_bit(idx, cpuc->used_mask); | 267 | __clear_bit(event->hw.idx, cpuc->used_mask); |
224 | 268 | ||
225 | perf_event_update_userpage(event); | 269 | perf_event_update_userpage(event); |
226 | } | 270 | } |
227 | 271 | ||
228 | static int sh_pmu_enable(struct perf_event *event) | 272 | static int sh_pmu_add(struct perf_event *event, int flags) |
229 | { | 273 | { |
230 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 274 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
231 | struct hw_perf_event *hwc = &event->hw; | 275 | struct hw_perf_event *hwc = &event->hw; |
232 | int idx = hwc->idx; | 276 | int idx = hwc->idx; |
277 | int ret = -EAGAIN; | ||
278 | |||
279 | perf_pmu_disable(event->pmu); | ||
233 | 280 | ||
234 | if (test_and_set_bit(idx, cpuc->used_mask)) { | 281 | if (__test_and_set_bit(idx, cpuc->used_mask)) { |
235 | idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); | 282 | idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); |
236 | if (idx == sh_pmu->num_events) | 283 | if (idx == sh_pmu->num_events) |
237 | return -EAGAIN; | 284 | goto out; |
238 | 285 | ||
239 | set_bit(idx, cpuc->used_mask); | 286 | __set_bit(idx, cpuc->used_mask); |
240 | hwc->idx = idx; | 287 | hwc->idx = idx; |
241 | } | 288 | } |
242 | 289 | ||
243 | sh_pmu->disable(hwc, idx); | 290 | sh_pmu->disable(hwc, idx); |
244 | 291 | ||
245 | cpuc->events[idx] = event; | 292 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
246 | set_bit(idx, cpuc->active_mask); | 293 | if (flags & PERF_EF_START) |
247 | 294 | sh_pmu_start(event, PERF_EF_RELOAD); | |
248 | sh_pmu->enable(hwc, idx); | ||
249 | 295 | ||
250 | perf_event_update_userpage(event); | 296 | perf_event_update_userpage(event); |
251 | 297 | ret = 0; | |
252 | return 0; | 298 | out: |
299 | perf_pmu_enable(event->pmu); | ||
300 | return ret; | ||
253 | } | 301 | } |
254 | 302 | ||
255 | static void sh_pmu_read(struct perf_event *event) | 303 | static void sh_pmu_read(struct perf_event *event) |
@@ -257,24 +305,56 @@ static void sh_pmu_read(struct perf_event *event) | |||
257 | sh_perf_event_update(event, &event->hw, event->hw.idx); | 305 | sh_perf_event_update(event, &event->hw, event->hw.idx); |
258 | } | 306 | } |
259 | 307 | ||
260 | static const struct pmu pmu = { | 308 | static int sh_pmu_event_init(struct perf_event *event) |
261 | .enable = sh_pmu_enable, | ||
262 | .disable = sh_pmu_disable, | ||
263 | .read = sh_pmu_read, | ||
264 | }; | ||
265 | |||
266 | const struct pmu *hw_perf_event_init(struct perf_event *event) | ||
267 | { | 309 | { |
268 | int err = __hw_perf_event_init(event); | 310 | int err; |
311 | |||
312 | switch (event->attr.type) { | ||
313 | case PERF_TYPE_RAW: | ||
314 | case PERF_TYPE_HW_CACHE: | ||
315 | case PERF_TYPE_HARDWARE: | ||
316 | err = __hw_perf_event_init(event); | ||
317 | break; | ||
318 | |||
319 | default: | ||
320 | return -ENOENT; | ||
321 | } | ||
322 | |||
269 | if (unlikely(err)) { | 323 | if (unlikely(err)) { |
270 | if (event->destroy) | 324 | if (event->destroy) |
271 | event->destroy(event); | 325 | event->destroy(event); |
272 | return ERR_PTR(err); | ||
273 | } | 326 | } |
274 | 327 | ||
275 | return &pmu; | 328 | return err; |
329 | } | ||
330 | |||
331 | static void sh_pmu_enable(struct pmu *pmu) | ||
332 | { | ||
333 | if (!sh_pmu_initialized()) | ||
334 | return; | ||
335 | |||
336 | sh_pmu->enable_all(); | ||
337 | } | ||
338 | |||
339 | static void sh_pmu_disable(struct pmu *pmu) | ||
340 | { | ||
341 | if (!sh_pmu_initialized()) | ||
342 | return; | ||
343 | |||
344 | sh_pmu->disable_all(); | ||
276 | } | 345 | } |
277 | 346 | ||
347 | static struct pmu pmu = { | ||
348 | .pmu_enable = sh_pmu_enable, | ||
349 | .pmu_disable = sh_pmu_disable, | ||
350 | .event_init = sh_pmu_event_init, | ||
351 | .add = sh_pmu_add, | ||
352 | .del = sh_pmu_del, | ||
353 | .start = sh_pmu_start, | ||
354 | .stop = sh_pmu_stop, | ||
355 | .read = sh_pmu_read, | ||
356 | }; | ||
357 | |||
278 | static void sh_pmu_setup(int cpu) | 358 | static void sh_pmu_setup(int cpu) |
279 | { | 359 | { |
280 | struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); | 360 | struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); |
@@ -299,32 +379,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |||
299 | return NOTIFY_OK; | 379 | return NOTIFY_OK; |
300 | } | 380 | } |
301 | 381 | ||
302 | void hw_perf_enable(void) | 382 | int __cpuinit register_sh_pmu(struct sh_pmu *_pmu) |
303 | { | ||
304 | if (!sh_pmu_initialized()) | ||
305 | return; | ||
306 | |||
307 | sh_pmu->enable_all(); | ||
308 | } | ||
309 | |||
310 | void hw_perf_disable(void) | ||
311 | { | ||
312 | if (!sh_pmu_initialized()) | ||
313 | return; | ||
314 | |||
315 | sh_pmu->disable_all(); | ||
316 | } | ||
317 | |||
318 | int __cpuinit register_sh_pmu(struct sh_pmu *pmu) | ||
319 | { | 383 | { |
320 | if (sh_pmu) | 384 | if (sh_pmu) |
321 | return -EBUSY; | 385 | return -EBUSY; |
322 | sh_pmu = pmu; | 386 | sh_pmu = _pmu; |
323 | 387 | ||
324 | pr_info("Performance Events: %s support registered\n", pmu->name); | 388 | pr_info("Performance Events: %s support registered\n", _pmu->name); |
325 | 389 | ||
326 | WARN_ON(pmu->num_events > MAX_HWEVENTS); | 390 | WARN_ON(_pmu->num_events > MAX_HWEVENTS); |
327 | 391 | ||
392 | perf_pmu_register(&pmu); | ||
328 | perf_cpu_notifier(sh_pmu_notifier); | 393 | perf_cpu_notifier(sh_pmu_notifier); |
329 | return 0; | 394 | return 0; |
330 | } | 395 | } |
diff --git a/arch/sh/oprofile/Makefile b/arch/sh/oprofile/Makefile index 4886c5c1786c..e85aae73e3dc 100644 --- a/arch/sh/oprofile/Makefile +++ b/arch/sh/oprofile/Makefile | |||
@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ | |||
6 | oprofilefs.o oprofile_stats.o \ | 6 | oprofilefs.o oprofile_stats.o \ |
7 | timer_int.o ) | 7 | timer_int.o ) |
8 | 8 | ||
9 | ifeq ($(CONFIG_HW_PERF_EVENTS),y) | ||
10 | DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o) | ||
11 | endif | ||
12 | |||
9 | oprofile-y := $(DRIVER_OBJS) common.o backtrace.o | 13 | oprofile-y := $(DRIVER_OBJS) common.o backtrace.o |
diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c index ac604937f3ee..e10d89376f9b 100644 --- a/arch/sh/oprofile/common.c +++ b/arch/sh/oprofile/common.c | |||
@@ -17,114 +17,45 @@ | |||
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/errno.h> | 18 | #include <linux/errno.h> |
19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <linux/perf_event.h> | ||
20 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
21 | #include "op_impl.h" | ||
22 | |||
23 | static struct op_sh_model *model; | ||
24 | |||
25 | static struct op_counter_config ctr[20]; | ||
26 | 22 | ||
23 | #ifdef CONFIG_HW_PERF_EVENTS | ||
27 | extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth); | 24 | extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth); |
28 | 25 | ||
29 | static int op_sh_setup(void) | 26 | char *op_name_from_perf_id(void) |
30 | { | ||
31 | /* Pre-compute the values to stuff in the hardware registers. */ | ||
32 | model->reg_setup(ctr); | ||
33 | |||
34 | /* Configure the registers on all cpus. */ | ||
35 | on_each_cpu(model->cpu_setup, NULL, 1); | ||
36 | |||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | static int op_sh_create_files(struct super_block *sb, struct dentry *root) | ||
41 | { | 27 | { |
42 | int i, ret = 0; | 28 | const char *pmu; |
29 | char buf[20]; | ||
30 | int size; | ||
43 | 31 | ||
44 | for (i = 0; i < model->num_counters; i++) { | 32 | pmu = perf_pmu_name(); |
45 | struct dentry *dir; | 33 | if (!pmu) |
46 | char buf[4]; | 34 | return NULL; |
47 | 35 | ||
48 | snprintf(buf, sizeof(buf), "%d", i); | 36 | size = snprintf(buf, sizeof(buf), "sh/%s", pmu); |
49 | dir = oprofilefs_mkdir(sb, root, buf); | 37 | if (size > -1 && size < sizeof(buf)) |
38 | return buf; | ||
50 | 39 | ||
51 | ret |= oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled); | 40 | return NULL; |
52 | ret |= oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event); | ||
53 | ret |= oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel); | ||
54 | ret |= oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user); | ||
55 | |||
56 | if (model->create_files) | ||
57 | ret |= model->create_files(sb, dir); | ||
58 | else | ||
59 | ret |= oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count); | ||
60 | |||
61 | /* Dummy entries */ | ||
62 | ret |= oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask); | ||
63 | } | ||
64 | |||
65 | return ret; | ||
66 | } | 41 | } |
67 | 42 | ||
68 | static int op_sh_start(void) | 43 | int __init oprofile_arch_init(struct oprofile_operations *ops) |
69 | { | 44 | { |
70 | /* Enable performance monitoring for all counters. */ | 45 | ops->backtrace = sh_backtrace; |
71 | on_each_cpu(model->cpu_start, NULL, 1); | ||
72 | 46 | ||
73 | return 0; | 47 | return oprofile_perf_init(ops); |
74 | } | 48 | } |
75 | 49 | ||
76 | static void op_sh_stop(void) | 50 | void __exit oprofile_arch_exit(void) |
77 | { | 51 | { |
78 | /* Disable performance monitoring for all counters. */ | 52 | oprofile_perf_exit(); |
79 | on_each_cpu(model->cpu_stop, NULL, 1); | ||
80 | } | 53 | } |
81 | 54 | #else | |
82 | int __init oprofile_arch_init(struct oprofile_operations *ops) | 55 | int __init oprofile_arch_init(struct oprofile_operations *ops) |
83 | { | 56 | { |
84 | struct op_sh_model *lmodel = NULL; | 57 | pr_info("oprofile: hardware counters not available\n"); |
85 | int ret; | 58 | return -ENODEV; |
86 | |||
87 | /* | ||
88 | * Always assign the backtrace op. If the counter initialization | ||
89 | * fails, we fall back to the timer which will still make use of | ||
90 | * this. | ||
91 | */ | ||
92 | ops->backtrace = sh_backtrace; | ||
93 | |||
94 | /* | ||
95 | * XXX | ||
96 | * | ||
97 | * All of the SH7750/SH-4A counters have been converted to perf, | ||
98 | * this infrastructure hook is left for other users until they've | ||
99 | * had a chance to convert over, at which point all of this | ||
100 | * will be deleted. | ||
101 | */ | ||
102 | |||
103 | if (!lmodel) | ||
104 | return -ENODEV; | ||
105 | if (!(current_cpu_data.flags & CPU_HAS_PERF_COUNTER)) | ||
106 | return -ENODEV; | ||
107 | |||
108 | ret = lmodel->init(); | ||
109 | if (unlikely(ret != 0)) | ||
110 | return ret; | ||
111 | |||
112 | model = lmodel; | ||
113 | |||
114 | ops->setup = op_sh_setup; | ||
115 | ops->create_files = op_sh_create_files; | ||
116 | ops->start = op_sh_start; | ||
117 | ops->stop = op_sh_stop; | ||
118 | ops->cpu_type = lmodel->cpu_type; | ||
119 | |||
120 | printk(KERN_INFO "oprofile: using %s performance monitoring.\n", | ||
121 | lmodel->cpu_type); | ||
122 | |||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | void oprofile_arch_exit(void) | ||
127 | { | ||
128 | if (model && model->exit) | ||
129 | model->exit(); | ||
130 | } | 59 | } |
60 | void __exit oprofile_arch_exit(void) {} | ||
61 | #endif /* CONFIG_HW_PERF_EVENTS */ | ||
diff --git a/arch/sh/oprofile/op_impl.h b/arch/sh/oprofile/op_impl.h deleted file mode 100644 index 1244479ceb29..000000000000 --- a/arch/sh/oprofile/op_impl.h +++ /dev/null | |||
@@ -1,33 +0,0 @@ | |||
1 | #ifndef __OP_IMPL_H | ||
2 | #define __OP_IMPL_H | ||
3 | |||
4 | /* Per-counter configuration as set via oprofilefs. */ | ||
5 | struct op_counter_config { | ||
6 | unsigned long enabled; | ||
7 | unsigned long event; | ||
8 | |||
9 | unsigned long count; | ||
10 | |||
11 | /* Dummy values for userspace tool compliance */ | ||
12 | unsigned long kernel; | ||
13 | unsigned long user; | ||
14 | unsigned long unit_mask; | ||
15 | }; | ||
16 | |||
17 | /* Per-architecture configury and hooks. */ | ||
18 | struct op_sh_model { | ||
19 | void (*reg_setup)(struct op_counter_config *); | ||
20 | int (*create_files)(struct super_block *sb, struct dentry *dir); | ||
21 | void (*cpu_setup)(void *dummy); | ||
22 | int (*init)(void); | ||
23 | void (*exit)(void); | ||
24 | void (*cpu_start)(void *args); | ||
25 | void (*cpu_stop)(void *args); | ||
26 | char *cpu_type; | ||
27 | unsigned char num_counters; | ||
28 | }; | ||
29 | |||
30 | /* arch/sh/oprofile/common.c */ | ||
31 | extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth); | ||
32 | |||
33 | #endif /* __OP_IMPL_H */ | ||
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 491e9d6de191..3e9d31401fb2 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig | |||
@@ -26,10 +26,12 @@ config SPARC | |||
26 | select ARCH_WANT_OPTIONAL_GPIOLIB | 26 | select ARCH_WANT_OPTIONAL_GPIOLIB |
27 | select RTC_CLASS | 27 | select RTC_CLASS |
28 | select RTC_DRV_M48T59 | 28 | select RTC_DRV_M48T59 |
29 | select HAVE_IRQ_WORK | ||
29 | select HAVE_PERF_EVENTS | 30 | select HAVE_PERF_EVENTS |
30 | select PERF_USE_VMALLOC | 31 | select PERF_USE_VMALLOC |
31 | select HAVE_DMA_ATTRS | 32 | select HAVE_DMA_ATTRS |
32 | select HAVE_DMA_API_DEBUG | 33 | select HAVE_DMA_API_DEBUG |
34 | select HAVE_ARCH_JUMP_LABEL | ||
33 | 35 | ||
34 | config SPARC32 | 36 | config SPARC32 |
35 | def_bool !64BIT | 37 | def_bool !64BIT |
@@ -53,6 +55,7 @@ config SPARC64 | |||
53 | select RTC_DRV_BQ4802 | 55 | select RTC_DRV_BQ4802 |
54 | select RTC_DRV_SUN4V | 56 | select RTC_DRV_SUN4V |
55 | select RTC_DRV_STARFIRE | 57 | select RTC_DRV_STARFIRE |
58 | select HAVE_IRQ_WORK | ||
56 | select HAVE_PERF_EVENTS | 59 | select HAVE_PERF_EVENTS |
57 | select PERF_USE_VMALLOC | 60 | select PERF_USE_VMALLOC |
58 | 61 | ||
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h new file mode 100644 index 000000000000..62e66d7b2fb6 --- /dev/null +++ b/arch/sparc/include/asm/jump_label.h | |||
@@ -0,0 +1,32 @@ | |||
1 | #ifndef _ASM_SPARC_JUMP_LABEL_H | ||
2 | #define _ASM_SPARC_JUMP_LABEL_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | |||
6 | #include <linux/types.h> | ||
7 | #include <asm/system.h> | ||
8 | |||
9 | #define JUMP_LABEL_NOP_SIZE 4 | ||
10 | |||
11 | #define JUMP_LABEL(key, label) \ | ||
12 | do { \ | ||
13 | asm goto("1:\n\t" \ | ||
14 | "nop\n\t" \ | ||
15 | "nop\n\t" \ | ||
16 | ".pushsection __jump_table, \"a\"\n\t"\ | ||
17 | ".word 1b, %l[" #label "], %c0\n\t" \ | ||
18 | ".popsection \n\t" \ | ||
19 | : : "i" (key) : : label);\ | ||
20 | } while (0) | ||
21 | |||
22 | #endif /* __KERNEL__ */ | ||
23 | |||
24 | typedef u32 jump_label_t; | ||
25 | |||
26 | struct jump_entry { | ||
27 | jump_label_t code; | ||
28 | jump_label_t target; | ||
29 | jump_label_t key; | ||
30 | }; | ||
31 | |||
32 | #endif | ||
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h index 727af70646cb..6e8bfa1786da 100644 --- a/arch/sparc/include/asm/perf_event.h +++ b/arch/sparc/include/asm/perf_event.h | |||
@@ -1,10 +1,6 @@ | |||
1 | #ifndef __ASM_SPARC_PERF_EVENT_H | 1 | #ifndef __ASM_SPARC_PERF_EVENT_H |
2 | #define __ASM_SPARC_PERF_EVENT_H | 2 | #define __ASM_SPARC_PERF_EVENT_H |
3 | 3 | ||
4 | extern void set_perf_event_pending(void); | ||
5 | |||
6 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
7 | |||
8 | #ifdef CONFIG_PERF_EVENTS | 4 | #ifdef CONFIG_PERF_EVENTS |
9 | #include <asm/ptrace.h> | 5 | #include <asm/ptrace.h> |
10 | 6 | ||
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 0c2dc1f24a9a..599398fbbc7c 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile | |||
@@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT) += $(audit--y) | |||
119 | 119 | ||
120 | pc--$(CONFIG_PERF_EVENTS) := perf_event.o | 120 | pc--$(CONFIG_PERF_EVENTS) := perf_event.o |
121 | obj-$(CONFIG_SPARC64) += $(pc--y) | 121 | obj-$(CONFIG_SPARC64) += $(pc--y) |
122 | |||
123 | obj-$(CONFIG_SPARC64) += jump_label.o | ||
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c new file mode 100644 index 000000000000..ea2dafc93d78 --- /dev/null +++ b/arch/sparc/kernel/jump_label.c | |||
@@ -0,0 +1,47 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/types.h> | ||
3 | #include <linux/mutex.h> | ||
4 | #include <linux/cpu.h> | ||
5 | |||
6 | #include <linux/jump_label.h> | ||
7 | #include <linux/memory.h> | ||
8 | |||
9 | #ifdef HAVE_JUMP_LABEL | ||
10 | |||
11 | void arch_jump_label_transform(struct jump_entry *entry, | ||
12 | enum jump_label_type type) | ||
13 | { | ||
14 | u32 val; | ||
15 | u32 *insn = (u32 *) (unsigned long) entry->code; | ||
16 | |||
17 | if (type == JUMP_LABEL_ENABLE) { | ||
18 | s32 off = (s32)entry->target - (s32)entry->code; | ||
19 | |||
20 | #ifdef CONFIG_SPARC64 | ||
21 | /* ba,pt %xcc, . + (off << 2) */ | ||
22 | val = 0x10680000 | ((u32) off >> 2); | ||
23 | #else | ||
24 | /* ba . + (off << 2) */ | ||
25 | val = 0x10800000 | ((u32) off >> 2); | ||
26 | #endif | ||
27 | } else { | ||
28 | val = 0x01000000; | ||
29 | } | ||
30 | |||
31 | get_online_cpus(); | ||
32 | mutex_lock(&text_mutex); | ||
33 | *insn = val; | ||
34 | flushi(insn); | ||
35 | mutex_unlock(&text_mutex); | ||
36 | put_online_cpus(); | ||
37 | } | ||
38 | |||
39 | void arch_jump_label_text_poke_early(jump_label_t addr) | ||
40 | { | ||
41 | u32 *insn_p = (u32 *) (unsigned long) addr; | ||
42 | |||
43 | *insn_p = 0x01000000; | ||
44 | flushi(insn_p); | ||
45 | } | ||
46 | |||
47 | #endif | ||
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index f848aadf54dc..ee3c7dde8d9f 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c | |||
@@ -18,6 +18,9 @@ | |||
18 | #include <asm/spitfire.h> | 18 | #include <asm/spitfire.h> |
19 | 19 | ||
20 | #ifdef CONFIG_SPARC64 | 20 | #ifdef CONFIG_SPARC64 |
21 | |||
22 | #include <linux/jump_label.h> | ||
23 | |||
21 | static void *module_map(unsigned long size) | 24 | static void *module_map(unsigned long size) |
22 | { | 25 | { |
23 | struct vm_struct *area; | 26 | struct vm_struct *area; |
@@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
227 | const Elf_Shdr *sechdrs, | 230 | const Elf_Shdr *sechdrs, |
228 | struct module *me) | 231 | struct module *me) |
229 | { | 232 | { |
233 | /* make jump label nops */ | ||
234 | jump_label_apply_nops(me); | ||
235 | |||
230 | /* Cheetah's I-cache is fully coherent. */ | 236 | /* Cheetah's I-cache is fully coherent. */ |
231 | if (tlb_type == spitfire) { | 237 | if (tlb_type == spitfire) { |
232 | unsigned long va; | 238 | unsigned long va; |
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index c4a6a50b4849..b87873c0e8ea 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <linux/irq.h> | 8 | #include <linux/irq.h> |
9 | 9 | ||
10 | #include <linux/perf_event.h> | 10 | #include <linux/irq_work.h> |
11 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
12 | 12 | ||
13 | #include <asm/pil.h> | 13 | #include <asm/pil.h> |
@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs) | |||
43 | 43 | ||
44 | old_regs = set_irq_regs(regs); | 44 | old_regs = set_irq_regs(regs); |
45 | irq_enter(); | 45 | irq_enter(); |
46 | #ifdef CONFIG_PERF_EVENTS | 46 | #ifdef CONFIG_IRQ_WORK |
47 | perf_event_do_pending(); | 47 | irq_work_run(); |
48 | #endif | 48 | #endif |
49 | irq_exit(); | 49 | irq_exit(); |
50 | set_irq_regs(old_regs); | 50 | set_irq_regs(old_regs); |
51 | } | 51 | } |
52 | 52 | ||
53 | void set_perf_event_pending(void) | 53 | void arch_irq_work_raise(void) |
54 | { | 54 | { |
55 | set_softint(1 << PIL_DEFERRED_PCR_WORK); | 55 | set_softint(1 << PIL_DEFERRED_PCR_WORK); |
56 | } | 56 | } |
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6318e622cfb0..0d6deb55a2ae 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) | |||
658 | 658 | ||
659 | enc = perf_event_get_enc(cpuc->events[i]); | 659 | enc = perf_event_get_enc(cpuc->events[i]); |
660 | pcr &= ~mask_for_index(idx); | 660 | pcr &= ~mask_for_index(idx); |
661 | pcr |= event_encoding(enc, idx); | 661 | if (hwc->state & PERF_HES_STOPPED) |
662 | pcr |= nop_for_index(idx); | ||
663 | else | ||
664 | pcr |= event_encoding(enc, idx); | ||
662 | } | 665 | } |
663 | out: | 666 | out: |
664 | return pcr; | 667 | return pcr; |
665 | } | 668 | } |
666 | 669 | ||
667 | void hw_perf_enable(void) | 670 | static void sparc_pmu_enable(struct pmu *pmu) |
668 | { | 671 | { |
669 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 672 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
670 | u64 pcr; | 673 | u64 pcr; |
@@ -691,7 +694,7 @@ void hw_perf_enable(void) | |||
691 | pcr_ops->write(cpuc->pcr); | 694 | pcr_ops->write(cpuc->pcr); |
692 | } | 695 | } |
693 | 696 | ||
694 | void hw_perf_disable(void) | 697 | static void sparc_pmu_disable(struct pmu *pmu) |
695 | { | 698 | { |
696 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 699 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
697 | u64 val; | 700 | u64 val; |
@@ -710,19 +713,65 @@ void hw_perf_disable(void) | |||
710 | pcr_ops->write(cpuc->pcr); | 713 | pcr_ops->write(cpuc->pcr); |
711 | } | 714 | } |
712 | 715 | ||
713 | static void sparc_pmu_disable(struct perf_event *event) | 716 | static int active_event_index(struct cpu_hw_events *cpuc, |
717 | struct perf_event *event) | ||
718 | { | ||
719 | int i; | ||
720 | |||
721 | for (i = 0; i < cpuc->n_events; i++) { | ||
722 | if (cpuc->event[i] == event) | ||
723 | break; | ||
724 | } | ||
725 | BUG_ON(i == cpuc->n_events); | ||
726 | return cpuc->current_idx[i]; | ||
727 | } | ||
728 | |||
729 | static void sparc_pmu_start(struct perf_event *event, int flags) | ||
730 | { | ||
731 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
732 | int idx = active_event_index(cpuc, event); | ||
733 | |||
734 | if (flags & PERF_EF_RELOAD) { | ||
735 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
736 | sparc_perf_event_set_period(event, &event->hw, idx); | ||
737 | } | ||
738 | |||
739 | event->hw.state = 0; | ||
740 | |||
741 | sparc_pmu_enable_event(cpuc, &event->hw, idx); | ||
742 | } | ||
743 | |||
744 | static void sparc_pmu_stop(struct perf_event *event, int flags) | ||
745 | { | ||
746 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
747 | int idx = active_event_index(cpuc, event); | ||
748 | |||
749 | if (!(event->hw.state & PERF_HES_STOPPED)) { | ||
750 | sparc_pmu_disable_event(cpuc, &event->hw, idx); | ||
751 | event->hw.state |= PERF_HES_STOPPED; | ||
752 | } | ||
753 | |||
754 | if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) { | ||
755 | sparc_perf_event_update(event, &event->hw, idx); | ||
756 | event->hw.state |= PERF_HES_UPTODATE; | ||
757 | } | ||
758 | } | ||
759 | |||
760 | static void sparc_pmu_del(struct perf_event *event, int _flags) | ||
714 | { | 761 | { |
715 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 762 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
716 | struct hw_perf_event *hwc = &event->hw; | ||
717 | unsigned long flags; | 763 | unsigned long flags; |
718 | int i; | 764 | int i; |
719 | 765 | ||
720 | local_irq_save(flags); | 766 | local_irq_save(flags); |
721 | perf_disable(); | 767 | perf_pmu_disable(event->pmu); |
722 | 768 | ||
723 | for (i = 0; i < cpuc->n_events; i++) { | 769 | for (i = 0; i < cpuc->n_events; i++) { |
724 | if (event == cpuc->event[i]) { | 770 | if (event == cpuc->event[i]) { |
725 | int idx = cpuc->current_idx[i]; | 771 | /* Absorb the final count and turn off the |
772 | * event. | ||
773 | */ | ||
774 | sparc_pmu_stop(event, PERF_EF_UPDATE); | ||
726 | 775 | ||
727 | /* Shift remaining entries down into | 776 | /* Shift remaining entries down into |
728 | * the existing slot. | 777 | * the existing slot. |
@@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event) | |||
734 | cpuc->current_idx[i]; | 783 | cpuc->current_idx[i]; |
735 | } | 784 | } |
736 | 785 | ||
737 | /* Absorb the final count and turn off the | ||
738 | * event. | ||
739 | */ | ||
740 | sparc_pmu_disable_event(cpuc, hwc, idx); | ||
741 | barrier(); | ||
742 | sparc_perf_event_update(event, hwc, idx); | ||
743 | |||
744 | perf_event_update_userpage(event); | 786 | perf_event_update_userpage(event); |
745 | 787 | ||
746 | cpuc->n_events--; | 788 | cpuc->n_events--; |
@@ -748,23 +790,10 @@ static void sparc_pmu_disable(struct perf_event *event) | |||
748 | } | 790 | } |
749 | } | 791 | } |
750 | 792 | ||
751 | perf_enable(); | 793 | perf_pmu_enable(event->pmu); |
752 | local_irq_restore(flags); | 794 | local_irq_restore(flags); |
753 | } | 795 | } |
754 | 796 | ||
755 | static int active_event_index(struct cpu_hw_events *cpuc, | ||
756 | struct perf_event *event) | ||
757 | { | ||
758 | int i; | ||
759 | |||
760 | for (i = 0; i < cpuc->n_events; i++) { | ||
761 | if (cpuc->event[i] == event) | ||
762 | break; | ||
763 | } | ||
764 | BUG_ON(i == cpuc->n_events); | ||
765 | return cpuc->current_idx[i]; | ||
766 | } | ||
767 | |||
768 | static void sparc_pmu_read(struct perf_event *event) | 797 | static void sparc_pmu_read(struct perf_event *event) |
769 | { | 798 | { |
770 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 799 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event) | |||
774 | sparc_perf_event_update(event, hwc, idx); | 803 | sparc_perf_event_update(event, hwc, idx); |
775 | } | 804 | } |
776 | 805 | ||
777 | static void sparc_pmu_unthrottle(struct perf_event *event) | ||
778 | { | ||
779 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
780 | int idx = active_event_index(cpuc, event); | ||
781 | struct hw_perf_event *hwc = &event->hw; | ||
782 | |||
783 | sparc_pmu_enable_event(cpuc, hwc, idx); | ||
784 | } | ||
785 | |||
786 | static atomic_t active_events = ATOMIC_INIT(0); | 806 | static atomic_t active_events = ATOMIC_INIT(0); |
787 | static DEFINE_MUTEX(pmc_grab_mutex); | 807 | static DEFINE_MUTEX(pmc_grab_mutex); |
788 | 808 | ||
@@ -877,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts, | |||
877 | if (!n_ev) | 897 | if (!n_ev) |
878 | return 0; | 898 | return 0; |
879 | 899 | ||
880 | if (n_ev > perf_max_events) | 900 | if (n_ev > MAX_HWEVENTS) |
881 | return -1; | 901 | return -1; |
882 | 902 | ||
883 | msk0 = perf_event_get_msk(events[0]); | 903 | msk0 = perf_event_get_msk(events[0]); |
@@ -984,23 +1004,27 @@ static int collect_events(struct perf_event *group, int max_count, | |||
984 | return n; | 1004 | return n; |
985 | } | 1005 | } |
986 | 1006 | ||
987 | static int sparc_pmu_enable(struct perf_event *event) | 1007 | static int sparc_pmu_add(struct perf_event *event, int ef_flags) |
988 | { | 1008 | { |
989 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1009 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
990 | int n0, ret = -EAGAIN; | 1010 | int n0, ret = -EAGAIN; |
991 | unsigned long flags; | 1011 | unsigned long flags; |
992 | 1012 | ||
993 | local_irq_save(flags); | 1013 | local_irq_save(flags); |
994 | perf_disable(); | 1014 | perf_pmu_disable(event->pmu); |
995 | 1015 | ||
996 | n0 = cpuc->n_events; | 1016 | n0 = cpuc->n_events; |
997 | if (n0 >= perf_max_events) | 1017 | if (n0 >= MAX_HWEVENTS) |
998 | goto out; | 1018 | goto out; |
999 | 1019 | ||
1000 | cpuc->event[n0] = event; | 1020 | cpuc->event[n0] = event; |
1001 | cpuc->events[n0] = event->hw.event_base; | 1021 | cpuc->events[n0] = event->hw.event_base; |
1002 | cpuc->current_idx[n0] = PIC_NO_INDEX; | 1022 | cpuc->current_idx[n0] = PIC_NO_INDEX; |
1003 | 1023 | ||
1024 | event->hw.state = PERF_HES_UPTODATE; | ||
1025 | if (!(ef_flags & PERF_EF_START)) | ||
1026 | event->hw.state |= PERF_HES_STOPPED; | ||
1027 | |||
1004 | /* | 1028 | /* |
1005 | * If group events scheduling transaction was started, | 1029 | * If group events scheduling transaction was started, |
1006 | * skip the schedulability test here, it will be peformed | 1030 | * skip the schedulability test here, it will be peformed |
@@ -1020,12 +1044,12 @@ nocheck: | |||
1020 | 1044 | ||
1021 | ret = 0; | 1045 | ret = 0; |
1022 | out: | 1046 | out: |
1023 | perf_enable(); | 1047 | perf_pmu_enable(event->pmu); |
1024 | local_irq_restore(flags); | 1048 | local_irq_restore(flags); |
1025 | return ret; | 1049 | return ret; |
1026 | } | 1050 | } |
1027 | 1051 | ||
1028 | static int __hw_perf_event_init(struct perf_event *event) | 1052 | static int sparc_pmu_event_init(struct perf_event *event) |
1029 | { | 1053 | { |
1030 | struct perf_event_attr *attr = &event->attr; | 1054 | struct perf_event_attr *attr = &event->attr; |
1031 | struct perf_event *evts[MAX_HWEVENTS]; | 1055 | struct perf_event *evts[MAX_HWEVENTS]; |
@@ -1038,22 +1062,33 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1038 | if (atomic_read(&nmi_active) < 0) | 1062 | if (atomic_read(&nmi_active) < 0) |
1039 | return -ENODEV; | 1063 | return -ENODEV; |
1040 | 1064 | ||
1041 | pmap = NULL; | 1065 | switch (attr->type) { |
1042 | if (attr->type == PERF_TYPE_HARDWARE) { | 1066 | case PERF_TYPE_HARDWARE: |
1043 | if (attr->config >= sparc_pmu->max_events) | 1067 | if (attr->config >= sparc_pmu->max_events) |
1044 | return -EINVAL; | 1068 | return -EINVAL; |
1045 | pmap = sparc_pmu->event_map(attr->config); | 1069 | pmap = sparc_pmu->event_map(attr->config); |
1046 | } else if (attr->type == PERF_TYPE_HW_CACHE) { | 1070 | break; |
1071 | |||
1072 | case PERF_TYPE_HW_CACHE: | ||
1047 | pmap = sparc_map_cache_event(attr->config); | 1073 | pmap = sparc_map_cache_event(attr->config); |
1048 | if (IS_ERR(pmap)) | 1074 | if (IS_ERR(pmap)) |
1049 | return PTR_ERR(pmap); | 1075 | return PTR_ERR(pmap); |
1050 | } else if (attr->type != PERF_TYPE_RAW) | 1076 | break; |
1051 | return -EOPNOTSUPP; | 1077 | |
1078 | case PERF_TYPE_RAW: | ||
1079 | pmap = NULL; | ||
1080 | break; | ||
1081 | |||
1082 | default: | ||
1083 | return -ENOENT; | ||
1084 | |||
1085 | } | ||
1052 | 1086 | ||
1053 | if (pmap) { | 1087 | if (pmap) { |
1054 | hwc->event_base = perf_event_encode(pmap); | 1088 | hwc->event_base = perf_event_encode(pmap); |
1055 | } else { | 1089 | } else { |
1056 | /* User gives us "(encoding << 16) | pic_mask" for | 1090 | /* |
1091 | * User gives us "(encoding << 16) | pic_mask" for | ||
1057 | * PERF_TYPE_RAW events. | 1092 | * PERF_TYPE_RAW events. |
1058 | */ | 1093 | */ |
1059 | hwc->event_base = attr->config; | 1094 | hwc->event_base = attr->config; |
@@ -1071,7 +1106,7 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1071 | n = 0; | 1106 | n = 0; |
1072 | if (event->group_leader != event) { | 1107 | if (event->group_leader != event) { |
1073 | n = collect_events(event->group_leader, | 1108 | n = collect_events(event->group_leader, |
1074 | perf_max_events - 1, | 1109 | MAX_HWEVENTS - 1, |
1075 | evts, events, current_idx_dmy); | 1110 | evts, events, current_idx_dmy); |
1076 | if (n < 0) | 1111 | if (n < 0) |
1077 | return -EINVAL; | 1112 | return -EINVAL; |
@@ -1107,10 +1142,11 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1107 | * Set the flag to make pmu::enable() not perform the | 1142 | * Set the flag to make pmu::enable() not perform the |
1108 | * schedulability test, it will be performed at commit time | 1143 | * schedulability test, it will be performed at commit time |
1109 | */ | 1144 | */ |
1110 | static void sparc_pmu_start_txn(const struct pmu *pmu) | 1145 | static void sparc_pmu_start_txn(struct pmu *pmu) |
1111 | { | 1146 | { |
1112 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | 1147 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); |
1113 | 1148 | ||
1149 | perf_pmu_disable(pmu); | ||
1114 | cpuhw->group_flag |= PERF_EVENT_TXN; | 1150 | cpuhw->group_flag |= PERF_EVENT_TXN; |
1115 | } | 1151 | } |
1116 | 1152 | ||
@@ -1119,11 +1155,12 @@ static void sparc_pmu_start_txn(const struct pmu *pmu) | |||
1119 | * Clear the flag and pmu::enable() will perform the | 1155 | * Clear the flag and pmu::enable() will perform the |
1120 | * schedulability test. | 1156 | * schedulability test. |
1121 | */ | 1157 | */ |
1122 | static void sparc_pmu_cancel_txn(const struct pmu *pmu) | 1158 | static void sparc_pmu_cancel_txn(struct pmu *pmu) |
1123 | { | 1159 | { |
1124 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | 1160 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); |
1125 | 1161 | ||
1126 | cpuhw->group_flag &= ~PERF_EVENT_TXN; | 1162 | cpuhw->group_flag &= ~PERF_EVENT_TXN; |
1163 | perf_pmu_enable(pmu); | ||
1127 | } | 1164 | } |
1128 | 1165 | ||
1129 | /* | 1166 | /* |
@@ -1131,7 +1168,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu) | |||
1131 | * Perform the group schedulability test as a whole | 1168 | * Perform the group schedulability test as a whole |
1132 | * Return 0 if success | 1169 | * Return 0 if success |
1133 | */ | 1170 | */ |
1134 | static int sparc_pmu_commit_txn(const struct pmu *pmu) | 1171 | static int sparc_pmu_commit_txn(struct pmu *pmu) |
1135 | { | 1172 | { |
1136 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1173 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1137 | int n; | 1174 | int n; |
@@ -1147,28 +1184,24 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu) | |||
1147 | return -EAGAIN; | 1184 | return -EAGAIN; |
1148 | 1185 | ||
1149 | cpuc->group_flag &= ~PERF_EVENT_TXN; | 1186 | cpuc->group_flag &= ~PERF_EVENT_TXN; |
1187 | perf_pmu_enable(pmu); | ||
1150 | return 0; | 1188 | return 0; |
1151 | } | 1189 | } |
1152 | 1190 | ||
1153 | static const struct pmu pmu = { | 1191 | static struct pmu pmu = { |
1154 | .enable = sparc_pmu_enable, | 1192 | .pmu_enable = sparc_pmu_enable, |
1155 | .disable = sparc_pmu_disable, | 1193 | .pmu_disable = sparc_pmu_disable, |
1194 | .event_init = sparc_pmu_event_init, | ||
1195 | .add = sparc_pmu_add, | ||
1196 | .del = sparc_pmu_del, | ||
1197 | .start = sparc_pmu_start, | ||
1198 | .stop = sparc_pmu_stop, | ||
1156 | .read = sparc_pmu_read, | 1199 | .read = sparc_pmu_read, |
1157 | .unthrottle = sparc_pmu_unthrottle, | ||
1158 | .start_txn = sparc_pmu_start_txn, | 1200 | .start_txn = sparc_pmu_start_txn, |
1159 | .cancel_txn = sparc_pmu_cancel_txn, | 1201 | .cancel_txn = sparc_pmu_cancel_txn, |
1160 | .commit_txn = sparc_pmu_commit_txn, | 1202 | .commit_txn = sparc_pmu_commit_txn, |
1161 | }; | 1203 | }; |
1162 | 1204 | ||
1163 | const struct pmu *hw_perf_event_init(struct perf_event *event) | ||
1164 | { | ||
1165 | int err = __hw_perf_event_init(event); | ||
1166 | |||
1167 | if (err) | ||
1168 | return ERR_PTR(err); | ||
1169 | return &pmu; | ||
1170 | } | ||
1171 | |||
1172 | void perf_event_print_debug(void) | 1205 | void perf_event_print_debug(void) |
1173 | { | 1206 | { |
1174 | unsigned long flags; | 1207 | unsigned long flags; |
@@ -1244,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, | |||
1244 | continue; | 1277 | continue; |
1245 | 1278 | ||
1246 | if (perf_event_overflow(event, 1, &data, regs)) | 1279 | if (perf_event_overflow(event, 1, &data, regs)) |
1247 | sparc_pmu_disable_event(cpuc, hwc, idx); | 1280 | sparc_pmu_stop(event, 0); |
1248 | } | 1281 | } |
1249 | 1282 | ||
1250 | return NOTIFY_STOP; | 1283 | return NOTIFY_STOP; |
@@ -1285,28 +1318,21 @@ void __init init_hw_perf_events(void) | |||
1285 | 1318 | ||
1286 | pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); | 1319 | pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); |
1287 | 1320 | ||
1288 | /* All sparc64 PMUs currently have 2 events. */ | 1321 | perf_pmu_register(&pmu); |
1289 | perf_max_events = 2; | ||
1290 | |||
1291 | register_die_notifier(&perf_event_nmi_notifier); | 1322 | register_die_notifier(&perf_event_nmi_notifier); |
1292 | } | 1323 | } |
1293 | 1324 | ||
1294 | static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) | 1325 | void perf_callchain_kernel(struct perf_callchain_entry *entry, |
1295 | { | 1326 | struct pt_regs *regs) |
1296 | if (entry->nr < PERF_MAX_STACK_DEPTH) | ||
1297 | entry->ip[entry->nr++] = ip; | ||
1298 | } | ||
1299 | |||
1300 | static void perf_callchain_kernel(struct pt_regs *regs, | ||
1301 | struct perf_callchain_entry *entry) | ||
1302 | { | 1327 | { |
1303 | unsigned long ksp, fp; | 1328 | unsigned long ksp, fp; |
1304 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1329 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1305 | int graph = 0; | 1330 | int graph = 0; |
1306 | #endif | 1331 | #endif |
1307 | 1332 | ||
1308 | callchain_store(entry, PERF_CONTEXT_KERNEL); | 1333 | stack_trace_flush(); |
1309 | callchain_store(entry, regs->tpc); | 1334 | |
1335 | perf_callchain_store(entry, regs->tpc); | ||
1310 | 1336 | ||
1311 | ksp = regs->u_regs[UREG_I6]; | 1337 | ksp = regs->u_regs[UREG_I6]; |
1312 | fp = ksp + STACK_BIAS; | 1338 | fp = ksp + STACK_BIAS; |
@@ -1330,13 +1356,13 @@ static void perf_callchain_kernel(struct pt_regs *regs, | |||
1330 | pc = sf->callers_pc; | 1356 | pc = sf->callers_pc; |
1331 | fp = (unsigned long)sf->fp + STACK_BIAS; | 1357 | fp = (unsigned long)sf->fp + STACK_BIAS; |
1332 | } | 1358 | } |
1333 | callchain_store(entry, pc); | 1359 | perf_callchain_store(entry, pc); |
1334 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1360 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1335 | if ((pc + 8UL) == (unsigned long) &return_to_handler) { | 1361 | if ((pc + 8UL) == (unsigned long) &return_to_handler) { |
1336 | int index = current->curr_ret_stack; | 1362 | int index = current->curr_ret_stack; |
1337 | if (current->ret_stack && index >= graph) { | 1363 | if (current->ret_stack && index >= graph) { |
1338 | pc = current->ret_stack[index - graph].ret; | 1364 | pc = current->ret_stack[index - graph].ret; |
1339 | callchain_store(entry, pc); | 1365 | perf_callchain_store(entry, pc); |
1340 | graph++; | 1366 | graph++; |
1341 | } | 1367 | } |
1342 | } | 1368 | } |
@@ -1344,13 +1370,12 @@ static void perf_callchain_kernel(struct pt_regs *regs, | |||
1344 | } while (entry->nr < PERF_MAX_STACK_DEPTH); | 1370 | } while (entry->nr < PERF_MAX_STACK_DEPTH); |
1345 | } | 1371 | } |
1346 | 1372 | ||
1347 | static void perf_callchain_user_64(struct pt_regs *regs, | 1373 | static void perf_callchain_user_64(struct perf_callchain_entry *entry, |
1348 | struct perf_callchain_entry *entry) | 1374 | struct pt_regs *regs) |
1349 | { | 1375 | { |
1350 | unsigned long ufp; | 1376 | unsigned long ufp; |
1351 | 1377 | ||
1352 | callchain_store(entry, PERF_CONTEXT_USER); | 1378 | perf_callchain_store(entry, regs->tpc); |
1353 | callchain_store(entry, regs->tpc); | ||
1354 | 1379 | ||
1355 | ufp = regs->u_regs[UREG_I6] + STACK_BIAS; | 1380 | ufp = regs->u_regs[UREG_I6] + STACK_BIAS; |
1356 | do { | 1381 | do { |
@@ -1363,17 +1388,16 @@ static void perf_callchain_user_64(struct pt_regs *regs, | |||
1363 | 1388 | ||
1364 | pc = sf.callers_pc; | 1389 | pc = sf.callers_pc; |
1365 | ufp = (unsigned long)sf.fp + STACK_BIAS; | 1390 | ufp = (unsigned long)sf.fp + STACK_BIAS; |
1366 | callchain_store(entry, pc); | 1391 | perf_callchain_store(entry, pc); |
1367 | } while (entry->nr < PERF_MAX_STACK_DEPTH); | 1392 | } while (entry->nr < PERF_MAX_STACK_DEPTH); |
1368 | } | 1393 | } |
1369 | 1394 | ||
1370 | static void perf_callchain_user_32(struct pt_regs *regs, | 1395 | static void perf_callchain_user_32(struct perf_callchain_entry *entry, |
1371 | struct perf_callchain_entry *entry) | 1396 | struct pt_regs *regs) |
1372 | { | 1397 | { |
1373 | unsigned long ufp; | 1398 | unsigned long ufp; |
1374 | 1399 | ||
1375 | callchain_store(entry, PERF_CONTEXT_USER); | 1400 | perf_callchain_store(entry, regs->tpc); |
1376 | callchain_store(entry, regs->tpc); | ||
1377 | 1401 | ||
1378 | ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; | 1402 | ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; |
1379 | do { | 1403 | do { |
@@ -1386,34 +1410,16 @@ static void perf_callchain_user_32(struct pt_regs *regs, | |||
1386 | 1410 | ||
1387 | pc = sf.callers_pc; | 1411 | pc = sf.callers_pc; |
1388 | ufp = (unsigned long)sf.fp; | 1412 | ufp = (unsigned long)sf.fp; |
1389 | callchain_store(entry, pc); | 1413 | perf_callchain_store(entry, pc); |
1390 | } while (entry->nr < PERF_MAX_STACK_DEPTH); | 1414 | } while (entry->nr < PERF_MAX_STACK_DEPTH); |
1391 | } | 1415 | } |
1392 | 1416 | ||
1393 | /* Like powerpc we can't get PMU interrupts within the PMU handler, | 1417 | void |
1394 | * so no need for separate NMI and IRQ chains as on x86. | 1418 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) |
1395 | */ | ||
1396 | static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); | ||
1397 | |||
1398 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1399 | { | 1419 | { |
1400 | struct perf_callchain_entry *entry = &__get_cpu_var(callchain); | 1420 | flushw_user(); |
1401 | 1421 | if (test_thread_flag(TIF_32BIT)) | |
1402 | entry->nr = 0; | 1422 | perf_callchain_user_32(entry, regs); |
1403 | if (!user_mode(regs)) { | 1423 | else |
1404 | stack_trace_flush(); | 1424 | perf_callchain_user_64(entry, regs); |
1405 | perf_callchain_kernel(regs, entry); | ||
1406 | if (current->mm) | ||
1407 | regs = task_pt_regs(current); | ||
1408 | else | ||
1409 | regs = NULL; | ||
1410 | } | ||
1411 | if (regs) { | ||
1412 | flushw_user(); | ||
1413 | if (test_thread_flag(TIF_32BIT)) | ||
1414 | perf_callchain_user_32(regs, entry); | ||
1415 | else | ||
1416 | perf_callchain_user_64(regs, entry); | ||
1417 | } | ||
1418 | return entry; | ||
1419 | } | 1425 | } |
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index 0c46e398cd8f..63c740a85b4c 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c | |||
@@ -40,6 +40,11 @@ static char *mixer = HOSTAUDIO_DEV_MIXER; | |||
40 | " This is used to specify the host mixer device to the hostaudio driver.\n"\ | 40 | " This is used to specify the host mixer device to the hostaudio driver.\n"\ |
41 | " The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" | 41 | " The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" |
42 | 42 | ||
43 | module_param(dsp, charp, 0644); | ||
44 | MODULE_PARM_DESC(dsp, DSP_HELP); | ||
45 | module_param(mixer, charp, 0644); | ||
46 | MODULE_PARM_DESC(mixer, MIXER_HELP); | ||
47 | |||
43 | #ifndef MODULE | 48 | #ifndef MODULE |
44 | static int set_dsp(char *name, int *add) | 49 | static int set_dsp(char *name, int *add) |
45 | { | 50 | { |
@@ -56,15 +61,6 @@ static int set_mixer(char *name, int *add) | |||
56 | } | 61 | } |
57 | 62 | ||
58 | __uml_setup("mixer=", set_mixer, "mixer=<mixer device>\n" MIXER_HELP); | 63 | __uml_setup("mixer=", set_mixer, "mixer=<mixer device>\n" MIXER_HELP); |
59 | |||
60 | #else /*MODULE*/ | ||
61 | |||
62 | module_param(dsp, charp, 0644); | ||
63 | MODULE_PARM_DESC(dsp, DSP_HELP); | ||
64 | |||
65 | module_param(mixer, charp, 0644); | ||
66 | MODULE_PARM_DESC(mixer, MIXER_HELP); | ||
67 | |||
68 | #endif | 64 | #endif |
69 | 65 | ||
70 | /* /dev/dsp file operations */ | 66 | /* /dev/dsp file operations */ |
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 1bcd208c459f..9734994cba1e 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c | |||
@@ -163,6 +163,7 @@ struct ubd { | |||
163 | struct scatterlist sg[MAX_SG]; | 163 | struct scatterlist sg[MAX_SG]; |
164 | struct request *request; | 164 | struct request *request; |
165 | int start_sg, end_sg; | 165 | int start_sg, end_sg; |
166 | sector_t rq_pos; | ||
166 | }; | 167 | }; |
167 | 168 | ||
168 | #define DEFAULT_COW { \ | 169 | #define DEFAULT_COW { \ |
@@ -187,6 +188,7 @@ struct ubd { | |||
187 | .request = NULL, \ | 188 | .request = NULL, \ |
188 | .start_sg = 0, \ | 189 | .start_sg = 0, \ |
189 | .end_sg = 0, \ | 190 | .end_sg = 0, \ |
191 | .rq_pos = 0, \ | ||
190 | } | 192 | } |
191 | 193 | ||
192 | /* Protected by ubd_lock */ | 194 | /* Protected by ubd_lock */ |
@@ -1228,7 +1230,6 @@ static void do_ubd_request(struct request_queue *q) | |||
1228 | { | 1230 | { |
1229 | struct io_thread_req *io_req; | 1231 | struct io_thread_req *io_req; |
1230 | struct request *req; | 1232 | struct request *req; |
1231 | sector_t sector; | ||
1232 | int n; | 1233 | int n; |
1233 | 1234 | ||
1234 | while(1){ | 1235 | while(1){ |
@@ -1239,12 +1240,12 @@ static void do_ubd_request(struct request_queue *q) | |||
1239 | return; | 1240 | return; |
1240 | 1241 | ||
1241 | dev->request = req; | 1242 | dev->request = req; |
1243 | dev->rq_pos = blk_rq_pos(req); | ||
1242 | dev->start_sg = 0; | 1244 | dev->start_sg = 0; |
1243 | dev->end_sg = blk_rq_map_sg(q, req, dev->sg); | 1245 | dev->end_sg = blk_rq_map_sg(q, req, dev->sg); |
1244 | } | 1246 | } |
1245 | 1247 | ||
1246 | req = dev->request; | 1248 | req = dev->request; |
1247 | sector = blk_rq_pos(req); | ||
1248 | while(dev->start_sg < dev->end_sg){ | 1249 | while(dev->start_sg < dev->end_sg){ |
1249 | struct scatterlist *sg = &dev->sg[dev->start_sg]; | 1250 | struct scatterlist *sg = &dev->sg[dev->start_sg]; |
1250 | 1251 | ||
@@ -1256,10 +1257,9 @@ static void do_ubd_request(struct request_queue *q) | |||
1256 | return; | 1257 | return; |
1257 | } | 1258 | } |
1258 | prepare_request(req, io_req, | 1259 | prepare_request(req, io_req, |
1259 | (unsigned long long)sector << 9, | 1260 | (unsigned long long)dev->rq_pos << 9, |
1260 | sg->offset, sg->length, sg_page(sg)); | 1261 | sg->offset, sg->length, sg_page(sg)); |
1261 | 1262 | ||
1262 | sector += sg->length >> 9; | ||
1263 | n = os_write_file(thread_fd, &io_req, | 1263 | n = os_write_file(thread_fd, &io_req, |
1264 | sizeof(struct io_thread_req *)); | 1264 | sizeof(struct io_thread_req *)); |
1265 | if(n != sizeof(struct io_thread_req *)){ | 1265 | if(n != sizeof(struct io_thread_req *)){ |
@@ -1272,6 +1272,7 @@ static void do_ubd_request(struct request_queue *q) | |||
1272 | return; | 1272 | return; |
1273 | } | 1273 | } |
1274 | 1274 | ||
1275 | dev->rq_pos += sg->length >> 9; | ||
1275 | dev->start_sg++; | 1276 | dev->start_sg++; |
1276 | } | 1277 | } |
1277 | dev->end_sg = 0; | 1278 | dev->end_sg = 0; |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f4c70c246ffe..89b88e3a56e9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -25,6 +25,7 @@ config X86 | |||
25 | select HAVE_IDE | 25 | select HAVE_IDE |
26 | select HAVE_OPROFILE | 26 | select HAVE_OPROFILE |
27 | select HAVE_PERF_EVENTS if (!M386 && !M486) | 27 | select HAVE_PERF_EVENTS if (!M386 && !M486) |
28 | select HAVE_IRQ_WORK | ||
28 | select HAVE_IOREMAP_PROT | 29 | select HAVE_IOREMAP_PROT |
29 | select HAVE_KPROBES | 30 | select HAVE_KPROBES |
30 | select ARCH_WANT_OPTIONAL_GPIOLIB | 31 | select ARCH_WANT_OPTIONAL_GPIOLIB |
@@ -33,6 +34,7 @@ config X86 | |||
33 | select HAVE_KRETPROBES | 34 | select HAVE_KRETPROBES |
34 | select HAVE_OPTPROBES | 35 | select HAVE_OPTPROBES |
35 | select HAVE_FTRACE_MCOUNT_RECORD | 36 | select HAVE_FTRACE_MCOUNT_RECORD |
37 | select HAVE_C_RECORDMCOUNT | ||
36 | select HAVE_DYNAMIC_FTRACE | 38 | select HAVE_DYNAMIC_FTRACE |
37 | select HAVE_FUNCTION_TRACER | 39 | select HAVE_FUNCTION_TRACER |
38 | select HAVE_FUNCTION_GRAPH_TRACER | 40 | select HAVE_FUNCTION_GRAPH_TRACER |
@@ -59,6 +61,8 @@ config X86 | |||
59 | select ANON_INODES | 61 | select ANON_INODES |
60 | select HAVE_ARCH_KMEMCHECK | 62 | select HAVE_ARCH_KMEMCHECK |
61 | select HAVE_USER_RETURN_NOTIFIER | 63 | select HAVE_USER_RETURN_NOTIFIER |
64 | select HAVE_ARCH_JUMP_LABEL | ||
65 | select HAVE_TEXT_POKE_SMP | ||
62 | 66 | ||
63 | config INSTRUCTION_DECODER | 67 | config INSTRUCTION_DECODER |
64 | def_bool (KPROBES || PERF_EVENTS) | 68 | def_bool (KPROBES || PERF_EVENTS) |
@@ -2136,6 +2140,10 @@ config HAVE_ATOMIC_IOMAP | |||
2136 | def_bool y | 2140 | def_bool y |
2137 | depends on X86_32 | 2141 | depends on X86_32 |
2138 | 2142 | ||
2143 | config HAVE_TEXT_POKE_SMP | ||
2144 | bool | ||
2145 | select STOP_MACHINE if SMP | ||
2146 | |||
2139 | source "net/Kconfig" | 2147 | source "net/Kconfig" |
2140 | 2148 | ||
2141 | source "drivers/Kconfig" | 2149 | source "drivers/Kconfig" |
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 0350311906ae..2d93bdbc9ac0 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c | |||
@@ -34,7 +34,7 @@ | |||
34 | #include <asm/ia32.h> | 34 | #include <asm/ia32.h> |
35 | 35 | ||
36 | #undef WARN_OLD | 36 | #undef WARN_OLD |
37 | #undef CORE_DUMP /* probably broken */ | 37 | #undef CORE_DUMP /* definitely broken */ |
38 | 38 | ||
39 | static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs); | 39 | static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs); |
40 | static int load_aout_library(struct file *); | 40 | static int load_aout_library(struct file *); |
@@ -131,21 +131,15 @@ static void set_brk(unsigned long start, unsigned long end) | |||
131 | * macros to write out all the necessary info. | 131 | * macros to write out all the necessary info. |
132 | */ | 132 | */ |
133 | 133 | ||
134 | static int dump_write(struct file *file, const void *addr, int nr) | 134 | #include <linux/coredump.h> |
135 | { | ||
136 | return file->f_op->write(file, addr, nr, &file->f_pos) == nr; | ||
137 | } | ||
138 | 135 | ||
139 | #define DUMP_WRITE(addr, nr) \ | 136 | #define DUMP_WRITE(addr, nr) \ |
140 | if (!dump_write(file, (void *)(addr), (nr))) \ | 137 | if (!dump_write(file, (void *)(addr), (nr))) \ |
141 | goto end_coredump; | 138 | goto end_coredump; |
142 | 139 | ||
143 | #define DUMP_SEEK(offset) \ | 140 | #define DUMP_SEEK(offset) \ |
144 | if (file->f_op->llseek) { \ | 141 | if (!dump_seek(file, offset)) \ |
145 | if (file->f_op->llseek(file, (offset), 0) != (offset)) \ | 142 | goto end_coredump; |
146 | goto end_coredump; \ | ||
147 | } else \ | ||
148 | file->f_pos = (offset) | ||
149 | 143 | ||
150 | #define START_DATA() (u.u_tsize << PAGE_SHIFT) | 144 | #define START_DATA() (u.u_tsize << PAGE_SHIFT) |
151 | #define START_STACK(u) (u.start_stack) | 145 | #define START_STACK(u) (u.start_stack) |
@@ -217,12 +211,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, | |||
217 | dump_size = dump.u_ssize << PAGE_SHIFT; | 211 | dump_size = dump.u_ssize << PAGE_SHIFT; |
218 | DUMP_WRITE(dump_start, dump_size); | 212 | DUMP_WRITE(dump_start, dump_size); |
219 | } | 213 | } |
220 | /* | ||
221 | * Finally dump the task struct. Not be used by gdb, but | ||
222 | * could be useful | ||
223 | */ | ||
224 | set_fs(KERNEL_DS); | ||
225 | DUMP_WRITE(current, sizeof(*current)); | ||
226 | end_coredump: | 214 | end_coredump: |
227 | set_fs(fs); | 215 | set_fs(fs); |
228 | return has_dumped; | 216 | return has_dumped; |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index bc6abb7bc7ee..76561d20ea2f 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <linux/stddef.h> | 5 | #include <linux/stddef.h> |
6 | #include <linux/stringify.h> | 6 | #include <linux/stringify.h> |
7 | #include <linux/jump_label.h> | ||
7 | #include <asm/asm.h> | 8 | #include <asm/asm.h> |
8 | 9 | ||
9 | /* | 10 | /* |
@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, | |||
160 | #define __parainstructions_end NULL | 161 | #define __parainstructions_end NULL |
161 | #endif | 162 | #endif |
162 | 163 | ||
164 | extern void *text_poke_early(void *addr, const void *opcode, size_t len); | ||
165 | |||
163 | /* | 166 | /* |
164 | * Clear and restore the kernel write-protection flag on the local CPU. | 167 | * Clear and restore the kernel write-protection flag on the local CPU. |
165 | * Allows the kernel to edit read-only pages. | 168 | * Allows the kernel to edit read-only pages. |
@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, | |||
180 | extern void *text_poke(void *addr, const void *opcode, size_t len); | 183 | extern void *text_poke(void *addr, const void *opcode, size_t len); |
181 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); | 184 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); |
182 | 185 | ||
186 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | ||
187 | #define IDEAL_NOP_SIZE_5 5 | ||
188 | extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; | ||
189 | extern void arch_init_ideal_nop5(void); | ||
190 | #else | ||
191 | static inline void arch_init_ideal_nop5(void) {} | ||
192 | #endif | ||
193 | |||
183 | #endif /* _ASM_X86_ALTERNATIVE_H */ | 194 | #endif /* _ASM_X86_ALTERNATIVE_H */ |
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 5af2982133b5..f16a2caca1e0 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index cb030374b90a..916bc8111a01 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify it | 5 | * This program is free software; you can redistribute it and/or modify it |
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 08616180deaf..e3509fc303bf 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -416,13 +416,22 @@ struct amd_iommu { | |||
416 | struct dma_ops_domain *default_dom; | 416 | struct dma_ops_domain *default_dom; |
417 | 417 | ||
418 | /* | 418 | /* |
419 | * This array is required to work around a potential BIOS bug. | 419 | * We can't rely on the BIOS to restore all values on reinit, so we |
420 | * The BIOS may miss to restore parts of the PCI configuration | 420 | * need to stash them |
421 | * space when the system resumes from S3. The result is that the | ||
422 | * IOMMU does not execute commands anymore which leads to system | ||
423 | * failure. | ||
424 | */ | 421 | */ |
425 | u32 cache_cfg[4]; | 422 | |
423 | /* The iommu BAR */ | ||
424 | u32 stored_addr_lo; | ||
425 | u32 stored_addr_hi; | ||
426 | |||
427 | /* | ||
428 | * Each iommu has 6 l1s, each of which is documented as having 0x12 | ||
429 | * registers | ||
430 | */ | ||
431 | u32 stored_l1[6][0x12]; | ||
432 | |||
433 | /* The l2 indirect registers */ | ||
434 | u32 stored_l2[0x83]; | ||
426 | }; | 435 | }; |
427 | 436 | ||
428 | /* | 437 | /* |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 8e8ec663a98f..b8e96a18676b 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) | |||
49 | BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) | 49 | BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) |
50 | BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) | 50 | BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) |
51 | 51 | ||
52 | #ifdef CONFIG_PERF_EVENTS | 52 | #ifdef CONFIG_IRQ_WORK |
53 | BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) | 53 | BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR) |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | #ifdef CONFIG_X86_THERMAL_VECTOR | 56 | #ifdef CONFIG_X86_THERMAL_VECTOR |
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 4ac5b0f33fc1..bf357f9b25f0 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h | |||
@@ -17,6 +17,7 @@ extern int fix_aperture; | |||
17 | #define GARTEN (1<<0) | 17 | #define GARTEN (1<<0) |
18 | #define DISGARTCPU (1<<4) | 18 | #define DISGARTCPU (1<<4) |
19 | #define DISGARTIO (1<<5) | 19 | #define DISGARTIO (1<<5) |
20 | #define DISTLBWALKPRB (1<<6) | ||
20 | 21 | ||
21 | /* GART cache control register bits. */ | 22 | /* GART cache control register bits. */ |
22 | #define INVGART (1<<0) | 23 | #define INVGART (1<<0) |
@@ -27,7 +28,6 @@ extern int fix_aperture; | |||
27 | #define AMD64_GARTAPERTUREBASE 0x94 | 28 | #define AMD64_GARTAPERTUREBASE 0x94 |
28 | #define AMD64_GARTTABLEBASE 0x98 | 29 | #define AMD64_GARTTABLEBASE 0x98 |
29 | #define AMD64_GARTCACHECTL 0x9c | 30 | #define AMD64_GARTCACHECTL 0x9c |
30 | #define AMD64_GARTEN (1<<0) | ||
31 | 31 | ||
32 | #ifdef CONFIG_GART_IOMMU | 32 | #ifdef CONFIG_GART_IOMMU |
33 | extern int gart_iommu_aperture; | 33 | extern int gart_iommu_aperture; |
@@ -57,6 +57,19 @@ static inline void gart_iommu_hole_init(void) | |||
57 | 57 | ||
58 | extern int agp_amd64_init(void); | 58 | extern int agp_amd64_init(void); |
59 | 59 | ||
60 | static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order) | ||
61 | { | ||
62 | u32 ctl; | ||
63 | |||
64 | /* | ||
65 | * Don't enable translation but enable GART IO and CPU accesses. | ||
66 | * Also, set DISTLBWALKPRB since GART tables memory is UC. | ||
67 | */ | ||
68 | ctl = DISTLBWALKPRB | order << 1; | ||
69 | |||
70 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); | ||
71 | } | ||
72 | |||
60 | static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) | 73 | static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) |
61 | { | 74 | { |
62 | u32 tmp, ctl; | 75 | u32 tmp, ctl; |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index aeab29aee617..55e4de613f0e 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -14,7 +14,7 @@ typedef struct { | |||
14 | #endif | 14 | #endif |
15 | unsigned int x86_platform_ipis; /* arch dependent */ | 15 | unsigned int x86_platform_ipis; /* arch dependent */ |
16 | unsigned int apic_perf_irqs; | 16 | unsigned int apic_perf_irqs; |
17 | unsigned int apic_pending_irqs; | 17 | unsigned int apic_irq_work_irqs; |
18 | #ifdef CONFIG_SMP | 18 | #ifdef CONFIG_SMP |
19 | unsigned int irq_resched_count; | 19 | unsigned int irq_resched_count; |
20 | unsigned int irq_call_count; | 20 | unsigned int irq_call_count; |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 46c0fe05f230..3a54a1ca1a02 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -29,7 +29,7 @@ | |||
29 | extern void apic_timer_interrupt(void); | 29 | extern void apic_timer_interrupt(void); |
30 | extern void x86_platform_ipi(void); | 30 | extern void x86_platform_ipi(void); |
31 | extern void error_interrupt(void); | 31 | extern void error_interrupt(void); |
32 | extern void perf_pending_interrupt(void); | 32 | extern void irq_work_interrupt(void); |
33 | 33 | ||
34 | extern void spurious_interrupt(void); | 34 | extern void spurious_interrupt(void); |
35 | extern void thermal_interrupt(void); | 35 | extern void thermal_interrupt(void); |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index e2ca30092557..6af0894dafb4 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -114,9 +114,9 @@ | |||
114 | #define X86_PLATFORM_IPI_VECTOR 0xed | 114 | #define X86_PLATFORM_IPI_VECTOR 0xed |
115 | 115 | ||
116 | /* | 116 | /* |
117 | * Performance monitoring pending work vector: | 117 | * IRQ work vector: |
118 | */ | 118 | */ |
119 | #define LOCAL_PENDING_VECTOR 0xec | 119 | #define IRQ_WORK_VECTOR 0xec |
120 | 120 | ||
121 | #define UV_BAU_MESSAGE 0xea | 121 | #define UV_BAU_MESSAGE 0xea |
122 | 122 | ||
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h new file mode 100644 index 000000000000..f52d42e80585 --- /dev/null +++ b/arch/x86/include/asm/jump_label.h | |||
@@ -0,0 +1,37 @@ | |||
1 | #ifndef _ASM_X86_JUMP_LABEL_H | ||
2 | #define _ASM_X86_JUMP_LABEL_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | |||
6 | #include <linux/types.h> | ||
7 | #include <asm/nops.h> | ||
8 | |||
9 | #define JUMP_LABEL_NOP_SIZE 5 | ||
10 | |||
11 | # define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" | ||
12 | |||
13 | # define JUMP_LABEL(key, label) \ | ||
14 | do { \ | ||
15 | asm goto("1:" \ | ||
16 | JUMP_LABEL_INITIAL_NOP \ | ||
17 | ".pushsection __jump_table, \"a\" \n\t"\ | ||
18 | _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ | ||
19 | ".popsection \n\t" \ | ||
20 | : : "i" (key) : : label); \ | ||
21 | } while (0) | ||
22 | |||
23 | #endif /* __KERNEL__ */ | ||
24 | |||
25 | #ifdef CONFIG_X86_64 | ||
26 | typedef u64 jump_label_t; | ||
27 | #else | ||
28 | typedef u32 jump_label_t; | ||
29 | #endif | ||
30 | |||
31 | struct jump_entry { | ||
32 | jump_label_t code; | ||
33 | jump_label_t target; | ||
34 | jump_label_t key; | ||
35 | }; | ||
36 | |||
37 | #endif | ||
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 502e53f999cf..c52e2eb40a1e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -652,20 +652,6 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) | |||
652 | return (struct kvm_mmu_page *)page_private(page); | 652 | return (struct kvm_mmu_page *)page_private(page); |
653 | } | 653 | } |
654 | 654 | ||
655 | static inline u16 kvm_read_fs(void) | ||
656 | { | ||
657 | u16 seg; | ||
658 | asm("mov %%fs, %0" : "=g"(seg)); | ||
659 | return seg; | ||
660 | } | ||
661 | |||
662 | static inline u16 kvm_read_gs(void) | ||
663 | { | ||
664 | u16 seg; | ||
665 | asm("mov %%gs, %0" : "=g"(seg)); | ||
666 | return seg; | ||
667 | } | ||
668 | |||
669 | static inline u16 kvm_read_ldt(void) | 655 | static inline u16 kvm_read_ldt(void) |
670 | { | 656 | { |
671 | u16 ldt; | 657 | u16 ldt; |
@@ -673,16 +659,6 @@ static inline u16 kvm_read_ldt(void) | |||
673 | return ldt; | 659 | return ldt; |
674 | } | 660 | } |
675 | 661 | ||
676 | static inline void kvm_load_fs(u16 sel) | ||
677 | { | ||
678 | asm("mov %0, %%fs" : : "rm"(sel)); | ||
679 | } | ||
680 | |||
681 | static inline void kvm_load_gs(u16 sel) | ||
682 | { | ||
683 | asm("mov %0, %%gs" : : "rm"(sel)); | ||
684 | } | ||
685 | |||
686 | static inline void kvm_load_ldt(u16 sel) | 662 | static inline void kvm_load_ldt(u16 sel) |
687 | { | 663 | { |
688 | asm("lldt %0" : : "rm"(sel)); | 664 | asm("lldt %0" : : "rm"(sel)); |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index def500776b16..a70cd216be5d 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -36,19 +36,6 @@ | |||
36 | #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) | 36 | #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) |
37 | #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) | 37 | #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) |
38 | 38 | ||
39 | /* Non HT mask */ | ||
40 | #define P4_ESCR_MASK \ | ||
41 | (P4_ESCR_EVENT_MASK | \ | ||
42 | P4_ESCR_EVENTMASK_MASK | \ | ||
43 | P4_ESCR_TAG_MASK | \ | ||
44 | P4_ESCR_TAG_ENABLE | \ | ||
45 | P4_ESCR_T0_OS | \ | ||
46 | P4_ESCR_T0_USR) | ||
47 | |||
48 | /* HT mask */ | ||
49 | #define P4_ESCR_MASK_HT \ | ||
50 | (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR) | ||
51 | |||
52 | #define P4_CCCR_OVF 0x80000000U | 39 | #define P4_CCCR_OVF 0x80000000U |
53 | #define P4_CCCR_CASCADE 0x40000000U | 40 | #define P4_CCCR_CASCADE 0x40000000U |
54 | #define P4_CCCR_OVF_PMI_T0 0x04000000U | 41 | #define P4_CCCR_OVF_PMI_T0 0x04000000U |
@@ -70,23 +57,6 @@ | |||
70 | #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) | 57 | #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) |
71 | #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) | 58 | #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) |
72 | 59 | ||
73 | /* Non HT mask */ | ||
74 | #define P4_CCCR_MASK \ | ||
75 | (P4_CCCR_OVF | \ | ||
76 | P4_CCCR_CASCADE | \ | ||
77 | P4_CCCR_OVF_PMI_T0 | \ | ||
78 | P4_CCCR_FORCE_OVF | \ | ||
79 | P4_CCCR_EDGE | \ | ||
80 | P4_CCCR_THRESHOLD_MASK | \ | ||
81 | P4_CCCR_COMPLEMENT | \ | ||
82 | P4_CCCR_COMPARE | \ | ||
83 | P4_CCCR_ESCR_SELECT_MASK | \ | ||
84 | P4_CCCR_ENABLE) | ||
85 | |||
86 | /* HT mask */ | ||
87 | #define P4_CCCR_MASK_HT \ | ||
88 | (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY) | ||
89 | |||
90 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ | 60 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ |
91 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) | 61 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) |
92 | #define P4_ESCR_EMASK_BIT(class, name) class##__##name | 62 | #define P4_ESCR_EMASK_BIT(class, name) class##__##name |
@@ -127,6 +97,28 @@ | |||
127 | #define P4_CONFIG_HT_SHIFT 63 | 97 | #define P4_CONFIG_HT_SHIFT 63 |
128 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) | 98 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) |
129 | 99 | ||
100 | /* | ||
101 | * The bits we allow to pass for RAW events | ||
102 | */ | ||
103 | #define P4_CONFIG_MASK_ESCR \ | ||
104 | P4_ESCR_EVENT_MASK | \ | ||
105 | P4_ESCR_EVENTMASK_MASK | \ | ||
106 | P4_ESCR_TAG_MASK | \ | ||
107 | P4_ESCR_TAG_ENABLE | ||
108 | |||
109 | #define P4_CONFIG_MASK_CCCR \ | ||
110 | P4_CCCR_EDGE | \ | ||
111 | P4_CCCR_THRESHOLD_MASK | \ | ||
112 | P4_CCCR_COMPLEMENT | \ | ||
113 | P4_CCCR_COMPARE | \ | ||
114 | P4_CCCR_THREAD_ANY | \ | ||
115 | P4_CCCR_RESERVED | ||
116 | |||
117 | /* some dangerous bits are reserved for kernel internals */ | ||
118 | #define P4_CONFIG_MASK \ | ||
119 | (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ | ||
120 | (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) | ||
121 | |||
130 | static inline bool p4_is_event_cascaded(u64 config) | 122 | static inline bool p4_is_event_cascaded(u64 config) |
131 | { | 123 | { |
132 | u32 cccr = p4_config_unpack_cccr(config); | 124 | u32 cccr = p4_config_unpack_cccr(config); |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index fedf32a8c3ec..7490bf8d1459 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n | |||
34 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 34 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
35 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 35 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
36 | obj-y += time.o ioport.o ldt.o dumpstack.o | 36 | obj-y += time.o ioport.o ldt.o dumpstack.o |
37 | obj-y += setup.o x86_init.o i8259.o irqinit.o | 37 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o |
38 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | ||
38 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 39 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
39 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 40 | obj-$(CONFIG_X86_32) += probe_roms_32.o |
40 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 41 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f65ab8b014c4..a36bb90aef53 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) | |||
195 | 195 | ||
196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
197 | extern s32 __smp_locks[], __smp_locks_end[]; | 197 | extern s32 __smp_locks[], __smp_locks_end[]; |
198 | static void *text_poke_early(void *addr, const void *opcode, size_t len); | 198 | void *text_poke_early(void *addr, const void *opcode, size_t len); |
199 | 199 | ||
200 | /* Replace instructions with better alternatives for this CPU type. | 200 | /* Replace instructions with better alternatives for this CPU type. |
201 | This runs before SMP is initialized to avoid SMP problems with | 201 | This runs before SMP is initialized to avoid SMP problems with |
@@ -522,7 +522,7 @@ void __init alternative_instructions(void) | |||
522 | * instructions. And on the local CPU you need to be protected again NMI or MCE | 522 | * instructions. And on the local CPU you need to be protected again NMI or MCE |
523 | * handlers seeing an inconsistent instruction while you patch. | 523 | * handlers seeing an inconsistent instruction while you patch. |
524 | */ | 524 | */ |
525 | static void *__init_or_module text_poke_early(void *addr, const void *opcode, | 525 | void *__init_or_module text_poke_early(void *addr, const void *opcode, |
526 | size_t len) | 526 | size_t len) |
527 | { | 527 | { |
528 | unsigned long flags; | 528 | unsigned long flags; |
@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | |||
637 | tpp.len = len; | 637 | tpp.len = len; |
638 | atomic_set(&stop_machine_first, 1); | 638 | atomic_set(&stop_machine_first, 1); |
639 | wrote_text = 0; | 639 | wrote_text = 0; |
640 | stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | 640 | /* Use __stop_machine() because the caller already got online_cpus. */ |
641 | __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | ||
641 | return addr; | 642 | return addr; |
642 | } | 643 | } |
643 | 644 | ||
645 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | ||
646 | |||
647 | unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; | ||
648 | |||
649 | void __init arch_init_ideal_nop5(void) | ||
650 | { | ||
651 | extern const unsigned char ftrace_test_p6nop[]; | ||
652 | extern const unsigned char ftrace_test_nop5[]; | ||
653 | extern const unsigned char ftrace_test_jmp[]; | ||
654 | int faulted = 0; | ||
655 | |||
656 | /* | ||
657 | * There is no good nop for all x86 archs. | ||
658 | * We will default to using the P6_NOP5, but first we | ||
659 | * will test to make sure that the nop will actually | ||
660 | * work on this CPU. If it faults, we will then | ||
661 | * go to a lesser efficient 5 byte nop. If that fails | ||
662 | * we then just use a jmp as our nop. This isn't the most | ||
663 | * efficient nop, but we can not use a multi part nop | ||
664 | * since we would then risk being preempted in the middle | ||
665 | * of that nop, and if we enabled tracing then, it might | ||
666 | * cause a system crash. | ||
667 | * | ||
668 | * TODO: check the cpuid to determine the best nop. | ||
669 | */ | ||
670 | asm volatile ( | ||
671 | "ftrace_test_jmp:" | ||
672 | "jmp ftrace_test_p6nop\n" | ||
673 | "nop\n" | ||
674 | "nop\n" | ||
675 | "nop\n" /* 2 byte jmp + 3 bytes */ | ||
676 | "ftrace_test_p6nop:" | ||
677 | P6_NOP5 | ||
678 | "jmp 1f\n" | ||
679 | "ftrace_test_nop5:" | ||
680 | ".byte 0x66,0x66,0x66,0x66,0x90\n" | ||
681 | "1:" | ||
682 | ".section .fixup, \"ax\"\n" | ||
683 | "2: movl $1, %0\n" | ||
684 | " jmp ftrace_test_nop5\n" | ||
685 | "3: movl $2, %0\n" | ||
686 | " jmp 1b\n" | ||
687 | ".previous\n" | ||
688 | _ASM_EXTABLE(ftrace_test_p6nop, 2b) | ||
689 | _ASM_EXTABLE(ftrace_test_nop5, 3b) | ||
690 | : "=r"(faulted) : "0" (faulted)); | ||
691 | |||
692 | switch (faulted) { | ||
693 | case 0: | ||
694 | pr_info("converting mcount calls to 0f 1f 44 00 00\n"); | ||
695 | memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5); | ||
696 | break; | ||
697 | case 1: | ||
698 | pr_info("converting mcount calls to 66 66 66 66 90\n"); | ||
699 | memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5); | ||
700 | break; | ||
701 | case 2: | ||
702 | pr_info("converting mcount calls to jmp . + 5\n"); | ||
703 | memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5); | ||
704 | break; | ||
705 | } | ||
706 | |||
707 | } | ||
708 | #endif | ||
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 679b6450382b..d2fdb0826df2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 5a170cbbbed8..3cb482e123de 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -194,6 +194,39 @@ static inline unsigned long tbl_size(int entry_size) | |||
194 | return 1UL << shift; | 194 | return 1UL << shift; |
195 | } | 195 | } |
196 | 196 | ||
197 | /* Access to l1 and l2 indexed register spaces */ | ||
198 | |||
199 | static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) | ||
200 | { | ||
201 | u32 val; | ||
202 | |||
203 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); | ||
204 | pci_read_config_dword(iommu->dev, 0xfc, &val); | ||
205 | return val; | ||
206 | } | ||
207 | |||
208 | static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) | ||
209 | { | ||
210 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); | ||
211 | pci_write_config_dword(iommu->dev, 0xfc, val); | ||
212 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); | ||
213 | } | ||
214 | |||
215 | static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) | ||
216 | { | ||
217 | u32 val; | ||
218 | |||
219 | pci_write_config_dword(iommu->dev, 0xf0, address); | ||
220 | pci_read_config_dword(iommu->dev, 0xf4, &val); | ||
221 | return val; | ||
222 | } | ||
223 | |||
224 | static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) | ||
225 | { | ||
226 | pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); | ||
227 | pci_write_config_dword(iommu->dev, 0xf4, val); | ||
228 | } | ||
229 | |||
197 | /**************************************************************************** | 230 | /**************************************************************************** |
198 | * | 231 | * |
199 | * AMD IOMMU MMIO register space handling functions | 232 | * AMD IOMMU MMIO register space handling functions |
@@ -619,6 +652,7 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) | |||
619 | { | 652 | { |
620 | int cap_ptr = iommu->cap_ptr; | 653 | int cap_ptr = iommu->cap_ptr; |
621 | u32 range, misc; | 654 | u32 range, misc; |
655 | int i, j; | ||
622 | 656 | ||
623 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, | 657 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, |
624 | &iommu->cap); | 658 | &iommu->cap); |
@@ -633,12 +667,29 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) | |||
633 | MMIO_GET_LD(range)); | 667 | MMIO_GET_LD(range)); |
634 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); | 668 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); |
635 | 669 | ||
636 | if (is_rd890_iommu(iommu->dev)) { | 670 | if (!is_rd890_iommu(iommu->dev)) |
637 | pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]); | 671 | return; |
638 | pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]); | 672 | |
639 | pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]); | 673 | /* |
640 | pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]); | 674 | * Some rd890 systems may not be fully reconfigured by the BIOS, so |
641 | } | 675 | * it's necessary for us to store this information so it can be |
676 | * reprogrammed on resume | ||
677 | */ | ||
678 | |||
679 | pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
680 | &iommu->stored_addr_lo); | ||
681 | pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, | ||
682 | &iommu->stored_addr_hi); | ||
683 | |||
684 | /* Low bit locks writes to configuration space */ | ||
685 | iommu->stored_addr_lo &= ~1; | ||
686 | |||
687 | for (i = 0; i < 6; i++) | ||
688 | for (j = 0; j < 0x12; j++) | ||
689 | iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); | ||
690 | |||
691 | for (i = 0; i < 0x83; i++) | ||
692 | iommu->stored_l2[i] = iommu_read_l2(iommu, i); | ||
642 | } | 693 | } |
643 | 694 | ||
644 | /* | 695 | /* |
@@ -1127,14 +1178,53 @@ static void iommu_init_flags(struct amd_iommu *iommu) | |||
1127 | iommu_feature_enable(iommu, CONTROL_COHERENT_EN); | 1178 | iommu_feature_enable(iommu, CONTROL_COHERENT_EN); |
1128 | } | 1179 | } |
1129 | 1180 | ||
1130 | static void iommu_apply_quirks(struct amd_iommu *iommu) | 1181 | static void iommu_apply_resume_quirks(struct amd_iommu *iommu) |
1131 | { | 1182 | { |
1132 | if (is_rd890_iommu(iommu->dev)) { | 1183 | int i, j; |
1133 | pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]); | 1184 | u32 ioc_feature_control; |
1134 | pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]); | 1185 | struct pci_dev *pdev = NULL; |
1135 | pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]); | 1186 | |
1136 | pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]); | 1187 | /* RD890 BIOSes may not have completely reconfigured the iommu */ |
1137 | } | 1188 | if (!is_rd890_iommu(iommu->dev)) |
1189 | return; | ||
1190 | |||
1191 | /* | ||
1192 | * First, we need to ensure that the iommu is enabled. This is | ||
1193 | * controlled by a register in the northbridge | ||
1194 | */ | ||
1195 | pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0)); | ||
1196 | |||
1197 | if (!pdev) | ||
1198 | return; | ||
1199 | |||
1200 | /* Select Northbridge indirect register 0x75 and enable writing */ | ||
1201 | pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); | ||
1202 | pci_read_config_dword(pdev, 0x64, &ioc_feature_control); | ||
1203 | |||
1204 | /* Enable the iommu */ | ||
1205 | if (!(ioc_feature_control & 0x1)) | ||
1206 | pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); | ||
1207 | |||
1208 | pci_dev_put(pdev); | ||
1209 | |||
1210 | /* Restore the iommu BAR */ | ||
1211 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
1212 | iommu->stored_addr_lo); | ||
1213 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, | ||
1214 | iommu->stored_addr_hi); | ||
1215 | |||
1216 | /* Restore the l1 indirect regs for each of the 6 l1s */ | ||
1217 | for (i = 0; i < 6; i++) | ||
1218 | for (j = 0; j < 0x12; j++) | ||
1219 | iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); | ||
1220 | |||
1221 | /* Restore the l2 indirect regs */ | ||
1222 | for (i = 0; i < 0x83; i++) | ||
1223 | iommu_write_l2(iommu, i, iommu->stored_l2[i]); | ||
1224 | |||
1225 | /* Lock PCI setup registers */ | ||
1226 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
1227 | iommu->stored_addr_lo | 1); | ||
1138 | } | 1228 | } |
1139 | 1229 | ||
1140 | /* | 1230 | /* |
@@ -1147,7 +1237,6 @@ static void enable_iommus(void) | |||
1147 | 1237 | ||
1148 | for_each_iommu(iommu) { | 1238 | for_each_iommu(iommu) { |
1149 | iommu_disable(iommu); | 1239 | iommu_disable(iommu); |
1150 | iommu_apply_quirks(iommu); | ||
1151 | iommu_init_flags(iommu); | 1240 | iommu_init_flags(iommu); |
1152 | iommu_set_device_table(iommu); | 1241 | iommu_set_device_table(iommu); |
1153 | iommu_enable_command_buffer(iommu); | 1242 | iommu_enable_command_buffer(iommu); |
@@ -1173,6 +1262,11 @@ static void disable_iommus(void) | |||
1173 | 1262 | ||
1174 | static int amd_iommu_resume(struct sys_device *dev) | 1263 | static int amd_iommu_resume(struct sys_device *dev) |
1175 | { | 1264 | { |
1265 | struct amd_iommu *iommu; | ||
1266 | |||
1267 | for_each_iommu(iommu) | ||
1268 | iommu_apply_resume_quirks(iommu); | ||
1269 | |||
1176 | /* re-load the hardware */ | 1270 | /* re-load the hardware */ |
1177 | enable_iommus(); | 1271 | enable_iommus(); |
1178 | 1272 | ||
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index a2e0caf26e17..c9cb17368448 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -307,7 +307,7 @@ void __init early_gart_iommu_check(void) | |||
307 | continue; | 307 | continue; |
308 | 308 | ||
309 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); | 309 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
310 | aper_enabled = ctl & AMD64_GARTEN; | 310 | aper_enabled = ctl & GARTEN; |
311 | aper_order = (ctl >> 1) & 7; | 311 | aper_order = (ctl >> 1) & 7; |
312 | aper_size = (32 * 1024 * 1024) << aper_order; | 312 | aper_size = (32 * 1024 * 1024) << aper_order; |
313 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; | 313 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; |
@@ -362,7 +362,7 @@ void __init early_gart_iommu_check(void) | |||
362 | continue; | 362 | continue; |
363 | 363 | ||
364 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); | 364 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
365 | ctl &= ~AMD64_GARTEN; | 365 | ctl &= ~GARTEN; |
366 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); | 366 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); |
367 | } | 367 | } |
368 | } | 368 | } |
@@ -505,8 +505,13 @@ out: | |||
505 | 505 | ||
506 | /* Fix up the north bridges */ | 506 | /* Fix up the north bridges */ |
507 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { | 507 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
508 | int bus; | 508 | int bus, dev_base, dev_limit; |
509 | int dev_base, dev_limit; | 509 | |
510 | /* | ||
511 | * Don't enable translation yet but enable GART IO and CPU | ||
512 | * accesses and set DISTLBWALKPRB since GART table memory is UC. | ||
513 | */ | ||
514 | u32 ctl = DISTLBWALKPRB | aper_order << 1; | ||
510 | 515 | ||
511 | bus = bus_dev_ranges[i].bus; | 516 | bus = bus_dev_ranges[i].bus; |
512 | dev_base = bus_dev_ranges[i].dev_base; | 517 | dev_base = bus_dev_ranges[i].dev_base; |
@@ -515,10 +520,7 @@ out: | |||
515 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) | 520 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) |
516 | continue; | 521 | continue; |
517 | 522 | ||
518 | /* Don't enable translation yet. That is done later. | 523 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); |
519 | Assume this BIOS didn't initialise the GART so | ||
520 | just overwrite all previous bits */ | ||
521 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1); | ||
522 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); | 524 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); |
523 | } | 525 | } |
524 | } | 526 | } |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 03a5b0385ad6..fe73c1844a9a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event) | |||
531 | /* | 531 | /* |
532 | * Setup the hardware configuration for a given attr_type | 532 | * Setup the hardware configuration for a given attr_type |
533 | */ | 533 | */ |
534 | static int __hw_perf_event_init(struct perf_event *event) | 534 | static int __x86_pmu_event_init(struct perf_event *event) |
535 | { | 535 | { |
536 | int err; | 536 | int err; |
537 | 537 | ||
@@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void) | |||
584 | } | 584 | } |
585 | } | 585 | } |
586 | 586 | ||
587 | void hw_perf_disable(void) | 587 | static void x86_pmu_disable(struct pmu *pmu) |
588 | { | 588 | { |
589 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 589 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
590 | 590 | ||
@@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added) | |||
619 | } | 619 | } |
620 | } | 620 | } |
621 | 621 | ||
622 | static const struct pmu pmu; | 622 | static struct pmu pmu; |
623 | 623 | ||
624 | static inline int is_x86_event(struct perf_event *event) | 624 | static inline int is_x86_event(struct perf_event *event) |
625 | { | 625 | { |
@@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, | |||
801 | hwc->last_tag == cpuc->tags[i]; | 801 | hwc->last_tag == cpuc->tags[i]; |
802 | } | 802 | } |
803 | 803 | ||
804 | static int x86_pmu_start(struct perf_event *event); | 804 | static void x86_pmu_start(struct perf_event *event, int flags); |
805 | static void x86_pmu_stop(struct perf_event *event); | 805 | static void x86_pmu_stop(struct perf_event *event, int flags); |
806 | 806 | ||
807 | void hw_perf_enable(void) | 807 | static void x86_pmu_enable(struct pmu *pmu) |
808 | { | 808 | { |
809 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 809 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
810 | struct perf_event *event; | 810 | struct perf_event *event; |
@@ -840,7 +840,14 @@ void hw_perf_enable(void) | |||
840 | match_prev_assignment(hwc, cpuc, i)) | 840 | match_prev_assignment(hwc, cpuc, i)) |
841 | continue; | 841 | continue; |
842 | 842 | ||
843 | x86_pmu_stop(event); | 843 | /* |
844 | * Ensure we don't accidentally enable a stopped | ||
845 | * counter simply because we rescheduled. | ||
846 | */ | ||
847 | if (hwc->state & PERF_HES_STOPPED) | ||
848 | hwc->state |= PERF_HES_ARCH; | ||
849 | |||
850 | x86_pmu_stop(event, PERF_EF_UPDATE); | ||
844 | } | 851 | } |
845 | 852 | ||
846 | for (i = 0; i < cpuc->n_events; i++) { | 853 | for (i = 0; i < cpuc->n_events; i++) { |
@@ -852,7 +859,10 @@ void hw_perf_enable(void) | |||
852 | else if (i < n_running) | 859 | else if (i < n_running) |
853 | continue; | 860 | continue; |
854 | 861 | ||
855 | x86_pmu_start(event); | 862 | if (hwc->state & PERF_HES_ARCH) |
863 | continue; | ||
864 | |||
865 | x86_pmu_start(event, PERF_EF_RELOAD); | ||
856 | } | 866 | } |
857 | cpuc->n_added = 0; | 867 | cpuc->n_added = 0; |
858 | perf_events_lapic_init(); | 868 | perf_events_lapic_init(); |
@@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event) | |||
953 | } | 963 | } |
954 | 964 | ||
955 | /* | 965 | /* |
956 | * activate a single event | 966 | * Add a single event to the PMU. |
957 | * | 967 | * |
958 | * The event is added to the group of enabled events | 968 | * The event is added to the group of enabled events |
959 | * but only if it can be scehduled with existing events. | 969 | * but only if it can be scehduled with existing events. |
960 | * | ||
961 | * Called with PMU disabled. If successful and return value 1, | ||
962 | * then guaranteed to call perf_enable() and hw_perf_enable() | ||
963 | */ | 970 | */ |
964 | static int x86_pmu_enable(struct perf_event *event) | 971 | static int x86_pmu_add(struct perf_event *event, int flags) |
965 | { | 972 | { |
966 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 973 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
967 | struct hw_perf_event *hwc; | 974 | struct hw_perf_event *hwc; |
@@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event) | |||
970 | 977 | ||
971 | hwc = &event->hw; | 978 | hwc = &event->hw; |
972 | 979 | ||
980 | perf_pmu_disable(event->pmu); | ||
973 | n0 = cpuc->n_events; | 981 | n0 = cpuc->n_events; |
974 | n = collect_events(cpuc, event, false); | 982 | ret = n = collect_events(cpuc, event, false); |
975 | if (n < 0) | 983 | if (ret < 0) |
976 | return n; | 984 | goto out; |
985 | |||
986 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
987 | if (!(flags & PERF_EF_START)) | ||
988 | hwc->state |= PERF_HES_ARCH; | ||
977 | 989 | ||
978 | /* | 990 | /* |
979 | * If group events scheduling transaction was started, | 991 | * If group events scheduling transaction was started, |
980 | * skip the schedulability test here, it will be peformed | 992 | * skip the schedulability test here, it will be peformed |
981 | * at commit time(->commit_txn) as a whole | 993 | * at commit time (->commit_txn) as a whole |
982 | */ | 994 | */ |
983 | if (cpuc->group_flag & PERF_EVENT_TXN) | 995 | if (cpuc->group_flag & PERF_EVENT_TXN) |
984 | goto out; | 996 | goto done_collect; |
985 | 997 | ||
986 | ret = x86_pmu.schedule_events(cpuc, n, assign); | 998 | ret = x86_pmu.schedule_events(cpuc, n, assign); |
987 | if (ret) | 999 | if (ret) |
988 | return ret; | 1000 | goto out; |
989 | /* | 1001 | /* |
990 | * copy new assignment, now we know it is possible | 1002 | * copy new assignment, now we know it is possible |
991 | * will be used by hw_perf_enable() | 1003 | * will be used by hw_perf_enable() |
992 | */ | 1004 | */ |
993 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 1005 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
994 | 1006 | ||
995 | out: | 1007 | done_collect: |
996 | cpuc->n_events = n; | 1008 | cpuc->n_events = n; |
997 | cpuc->n_added += n - n0; | 1009 | cpuc->n_added += n - n0; |
998 | cpuc->n_txn += n - n0; | 1010 | cpuc->n_txn += n - n0; |
999 | 1011 | ||
1000 | return 0; | 1012 | ret = 0; |
1013 | out: | ||
1014 | perf_pmu_enable(event->pmu); | ||
1015 | return ret; | ||
1001 | } | 1016 | } |
1002 | 1017 | ||
1003 | static int x86_pmu_start(struct perf_event *event) | 1018 | static void x86_pmu_start(struct perf_event *event, int flags) |
1004 | { | 1019 | { |
1005 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1020 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1006 | int idx = event->hw.idx; | 1021 | int idx = event->hw.idx; |
1007 | 1022 | ||
1008 | if (idx == -1) | 1023 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) |
1009 | return -EAGAIN; | 1024 | return; |
1025 | |||
1026 | if (WARN_ON_ONCE(idx == -1)) | ||
1027 | return; | ||
1028 | |||
1029 | if (flags & PERF_EF_RELOAD) { | ||
1030 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
1031 | x86_perf_event_set_period(event); | ||
1032 | } | ||
1033 | |||
1034 | event->hw.state = 0; | ||
1010 | 1035 | ||
1011 | x86_perf_event_set_period(event); | ||
1012 | cpuc->events[idx] = event; | 1036 | cpuc->events[idx] = event; |
1013 | __set_bit(idx, cpuc->active_mask); | 1037 | __set_bit(idx, cpuc->active_mask); |
1014 | __set_bit(idx, cpuc->running); | 1038 | __set_bit(idx, cpuc->running); |
1015 | x86_pmu.enable(event); | 1039 | x86_pmu.enable(event); |
1016 | perf_event_update_userpage(event); | 1040 | perf_event_update_userpage(event); |
1017 | |||
1018 | return 0; | ||
1019 | } | ||
1020 | |||
1021 | static void x86_pmu_unthrottle(struct perf_event *event) | ||
1022 | { | ||
1023 | int ret = x86_pmu_start(event); | ||
1024 | WARN_ON_ONCE(ret); | ||
1025 | } | 1041 | } |
1026 | 1042 | ||
1027 | void perf_event_print_debug(void) | 1043 | void perf_event_print_debug(void) |
@@ -1078,27 +1094,29 @@ void perf_event_print_debug(void) | |||
1078 | local_irq_restore(flags); | 1094 | local_irq_restore(flags); |
1079 | } | 1095 | } |
1080 | 1096 | ||
1081 | static void x86_pmu_stop(struct perf_event *event) | 1097 | static void x86_pmu_stop(struct perf_event *event, int flags) |
1082 | { | 1098 | { |
1083 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1099 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1084 | struct hw_perf_event *hwc = &event->hw; | 1100 | struct hw_perf_event *hwc = &event->hw; |
1085 | int idx = hwc->idx; | ||
1086 | 1101 | ||
1087 | if (!__test_and_clear_bit(idx, cpuc->active_mask)) | 1102 | if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { |
1088 | return; | 1103 | x86_pmu.disable(event); |
1089 | 1104 | cpuc->events[hwc->idx] = NULL; | |
1090 | x86_pmu.disable(event); | 1105 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); |
1091 | 1106 | hwc->state |= PERF_HES_STOPPED; | |
1092 | /* | 1107 | } |
1093 | * Drain the remaining delta count out of a event | ||
1094 | * that we are disabling: | ||
1095 | */ | ||
1096 | x86_perf_event_update(event); | ||
1097 | 1108 | ||
1098 | cpuc->events[idx] = NULL; | 1109 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { |
1110 | /* | ||
1111 | * Drain the remaining delta count out of a event | ||
1112 | * that we are disabling: | ||
1113 | */ | ||
1114 | x86_perf_event_update(event); | ||
1115 | hwc->state |= PERF_HES_UPTODATE; | ||
1116 | } | ||
1099 | } | 1117 | } |
1100 | 1118 | ||
1101 | static void x86_pmu_disable(struct perf_event *event) | 1119 | static void x86_pmu_del(struct perf_event *event, int flags) |
1102 | { | 1120 | { |
1103 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1121 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1104 | int i; | 1122 | int i; |
@@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event) | |||
1111 | if (cpuc->group_flag & PERF_EVENT_TXN) | 1129 | if (cpuc->group_flag & PERF_EVENT_TXN) |
1112 | return; | 1130 | return; |
1113 | 1131 | ||
1114 | x86_pmu_stop(event); | 1132 | x86_pmu_stop(event, PERF_EF_UPDATE); |
1115 | 1133 | ||
1116 | for (i = 0; i < cpuc->n_events; i++) { | 1134 | for (i = 0; i < cpuc->n_events; i++) { |
1117 | if (event == cpuc->event_list[i]) { | 1135 | if (event == cpuc->event_list[i]) { |
@@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1134 | struct perf_sample_data data; | 1152 | struct perf_sample_data data; |
1135 | struct cpu_hw_events *cpuc; | 1153 | struct cpu_hw_events *cpuc; |
1136 | struct perf_event *event; | 1154 | struct perf_event *event; |
1137 | struct hw_perf_event *hwc; | ||
1138 | int idx, handled = 0; | 1155 | int idx, handled = 0; |
1139 | u64 val; | 1156 | u64 val; |
1140 | 1157 | ||
@@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1155 | } | 1172 | } |
1156 | 1173 | ||
1157 | event = cpuc->events[idx]; | 1174 | event = cpuc->events[idx]; |
1158 | hwc = &event->hw; | ||
1159 | 1175 | ||
1160 | val = x86_perf_event_update(event); | 1176 | val = x86_perf_event_update(event); |
1161 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) | 1177 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) |
@@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1171 | continue; | 1187 | continue; |
1172 | 1188 | ||
1173 | if (perf_event_overflow(event, 1, &data, regs)) | 1189 | if (perf_event_overflow(event, 1, &data, regs)) |
1174 | x86_pmu_stop(event); | 1190 | x86_pmu_stop(event, 0); |
1175 | } | 1191 | } |
1176 | 1192 | ||
1177 | if (handled) | 1193 | if (handled) |
@@ -1180,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1180 | return handled; | 1196 | return handled; |
1181 | } | 1197 | } |
1182 | 1198 | ||
1183 | void smp_perf_pending_interrupt(struct pt_regs *regs) | ||
1184 | { | ||
1185 | irq_enter(); | ||
1186 | ack_APIC_irq(); | ||
1187 | inc_irq_stat(apic_pending_irqs); | ||
1188 | perf_event_do_pending(); | ||
1189 | irq_exit(); | ||
1190 | } | ||
1191 | |||
1192 | void set_perf_event_pending(void) | ||
1193 | { | ||
1194 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1195 | if (!x86_pmu.apic || !x86_pmu_initialized()) | ||
1196 | return; | ||
1197 | |||
1198 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | ||
1199 | #endif | ||
1200 | } | ||
1201 | |||
1202 | void perf_events_lapic_init(void) | 1199 | void perf_events_lapic_init(void) |
1203 | { | 1200 | { |
1204 | if (!x86_pmu.apic || !x86_pmu_initialized()) | 1201 | if (!x86_pmu.apic || !x86_pmu_initialized()) |
@@ -1388,7 +1385,6 @@ void __init init_hw_perf_events(void) | |||
1388 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | 1385 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; |
1389 | } | 1386 | } |
1390 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; | 1387 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
1391 | perf_max_events = x86_pmu.num_counters; | ||
1392 | 1388 | ||
1393 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { | 1389 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
1394 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | 1390 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
@@ -1424,6 +1420,7 @@ void __init init_hw_perf_events(void) | |||
1424 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); | 1420 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
1425 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); | 1421 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
1426 | 1422 | ||
1423 | perf_pmu_register(&pmu); | ||
1427 | perf_cpu_notifier(x86_pmu_notifier); | 1424 | perf_cpu_notifier(x86_pmu_notifier); |
1428 | } | 1425 | } |
1429 | 1426 | ||
@@ -1437,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event) | |||
1437 | * Set the flag to make pmu::enable() not perform the | 1434 | * Set the flag to make pmu::enable() not perform the |
1438 | * schedulability test, it will be performed at commit time | 1435 | * schedulability test, it will be performed at commit time |
1439 | */ | 1436 | */ |
1440 | static void x86_pmu_start_txn(const struct pmu *pmu) | 1437 | static void x86_pmu_start_txn(struct pmu *pmu) |
1441 | { | 1438 | { |
1442 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1439 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1443 | 1440 | ||
1441 | perf_pmu_disable(pmu); | ||
1444 | cpuc->group_flag |= PERF_EVENT_TXN; | 1442 | cpuc->group_flag |= PERF_EVENT_TXN; |
1445 | cpuc->n_txn = 0; | 1443 | cpuc->n_txn = 0; |
1446 | } | 1444 | } |
@@ -1450,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) | |||
1450 | * Clear the flag and pmu::enable() will perform the | 1448 | * Clear the flag and pmu::enable() will perform the |
1451 | * schedulability test. | 1449 | * schedulability test. |
1452 | */ | 1450 | */ |
1453 | static void x86_pmu_cancel_txn(const struct pmu *pmu) | 1451 | static void x86_pmu_cancel_txn(struct pmu *pmu) |
1454 | { | 1452 | { |
1455 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1453 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1456 | 1454 | ||
@@ -1460,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) | |||
1460 | */ | 1458 | */ |
1461 | cpuc->n_added -= cpuc->n_txn; | 1459 | cpuc->n_added -= cpuc->n_txn; |
1462 | cpuc->n_events -= cpuc->n_txn; | 1460 | cpuc->n_events -= cpuc->n_txn; |
1461 | perf_pmu_enable(pmu); | ||
1463 | } | 1462 | } |
1464 | 1463 | ||
1465 | /* | 1464 | /* |
@@ -1467,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) | |||
1467 | * Perform the group schedulability test as a whole | 1466 | * Perform the group schedulability test as a whole |
1468 | * Return 0 if success | 1467 | * Return 0 if success |
1469 | */ | 1468 | */ |
1470 | static int x86_pmu_commit_txn(const struct pmu *pmu) | 1469 | static int x86_pmu_commit_txn(struct pmu *pmu) |
1471 | { | 1470 | { |
1472 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1471 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1473 | int assign[X86_PMC_IDX_MAX]; | 1472 | int assign[X86_PMC_IDX_MAX]; |
@@ -1489,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) | |||
1489 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 1488 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
1490 | 1489 | ||
1491 | cpuc->group_flag &= ~PERF_EVENT_TXN; | 1490 | cpuc->group_flag &= ~PERF_EVENT_TXN; |
1492 | 1491 | perf_pmu_enable(pmu); | |
1493 | return 0; | 1492 | return 0; |
1494 | } | 1493 | } |
1495 | 1494 | ||
1496 | static const struct pmu pmu = { | ||
1497 | .enable = x86_pmu_enable, | ||
1498 | .disable = x86_pmu_disable, | ||
1499 | .start = x86_pmu_start, | ||
1500 | .stop = x86_pmu_stop, | ||
1501 | .read = x86_pmu_read, | ||
1502 | .unthrottle = x86_pmu_unthrottle, | ||
1503 | .start_txn = x86_pmu_start_txn, | ||
1504 | .cancel_txn = x86_pmu_cancel_txn, | ||
1505 | .commit_txn = x86_pmu_commit_txn, | ||
1506 | }; | ||
1507 | |||
1508 | /* | 1495 | /* |
1509 | * validate that we can schedule this event | 1496 | * validate that we can schedule this event |
1510 | */ | 1497 | */ |
@@ -1579,12 +1566,22 @@ out: | |||
1579 | return ret; | 1566 | return ret; |
1580 | } | 1567 | } |
1581 | 1568 | ||
1582 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 1569 | int x86_pmu_event_init(struct perf_event *event) |
1583 | { | 1570 | { |
1584 | const struct pmu *tmp; | 1571 | struct pmu *tmp; |
1585 | int err; | 1572 | int err; |
1586 | 1573 | ||
1587 | err = __hw_perf_event_init(event); | 1574 | switch (event->attr.type) { |
1575 | case PERF_TYPE_RAW: | ||
1576 | case PERF_TYPE_HARDWARE: | ||
1577 | case PERF_TYPE_HW_CACHE: | ||
1578 | break; | ||
1579 | |||
1580 | default: | ||
1581 | return -ENOENT; | ||
1582 | } | ||
1583 | |||
1584 | err = __x86_pmu_event_init(event); | ||
1588 | if (!err) { | 1585 | if (!err) { |
1589 | /* | 1586 | /* |
1590 | * we temporarily connect event to its pmu | 1587 | * we temporarily connect event to its pmu |
@@ -1604,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1604 | if (err) { | 1601 | if (err) { |
1605 | if (event->destroy) | 1602 | if (event->destroy) |
1606 | event->destroy(event); | 1603 | event->destroy(event); |
1607 | return ERR_PTR(err); | ||
1608 | } | 1604 | } |
1609 | 1605 | ||
1610 | return &pmu; | 1606 | return err; |
1611 | } | 1607 | } |
1612 | 1608 | ||
1613 | /* | 1609 | static struct pmu pmu = { |
1614 | * callchain support | 1610 | .pmu_enable = x86_pmu_enable, |
1615 | */ | 1611 | .pmu_disable = x86_pmu_disable, |
1616 | 1612 | ||
1617 | static inline | 1613 | .event_init = x86_pmu_event_init, |
1618 | void callchain_store(struct perf_callchain_entry *entry, u64 ip) | ||
1619 | { | ||
1620 | if (entry->nr < PERF_MAX_STACK_DEPTH) | ||
1621 | entry->ip[entry->nr++] = ip; | ||
1622 | } | ||
1623 | 1614 | ||
1624 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | 1615 | .add = x86_pmu_add, |
1625 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); | 1616 | .del = x86_pmu_del, |
1617 | .start = x86_pmu_start, | ||
1618 | .stop = x86_pmu_stop, | ||
1619 | .read = x86_pmu_read, | ||
1626 | 1620 | ||
1621 | .start_txn = x86_pmu_start_txn, | ||
1622 | .cancel_txn = x86_pmu_cancel_txn, | ||
1623 | .commit_txn = x86_pmu_commit_txn, | ||
1624 | }; | ||
1625 | |||
1626 | /* | ||
1627 | * callchain support | ||
1628 | */ | ||
1627 | 1629 | ||
1628 | static void | 1630 | static void |
1629 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | 1631 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) |
@@ -1645,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
1645 | { | 1647 | { |
1646 | struct perf_callchain_entry *entry = data; | 1648 | struct perf_callchain_entry *entry = data; |
1647 | 1649 | ||
1648 | callchain_store(entry, addr); | 1650 | perf_callchain_store(entry, addr); |
1649 | } | 1651 | } |
1650 | 1652 | ||
1651 | static const struct stacktrace_ops backtrace_ops = { | 1653 | static const struct stacktrace_ops backtrace_ops = { |
@@ -1656,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = { | |||
1656 | .walk_stack = print_context_stack_bp, | 1658 | .walk_stack = print_context_stack_bp, |
1657 | }; | 1659 | }; |
1658 | 1660 | ||
1659 | static void | 1661 | void |
1660 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1662 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) |
1661 | { | 1663 | { |
1662 | callchain_store(entry, PERF_CONTEXT_KERNEL); | 1664 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { |
1663 | callchain_store(entry, regs->ip); | 1665 | /* TODO: We don't support guest os callchain now */ |
1666 | return; | ||
1667 | } | ||
1668 | |||
1669 | perf_callchain_store(entry, regs->ip); | ||
1664 | 1670 | ||
1665 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1671 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
1666 | } | 1672 | } |
@@ -1689,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1689 | if (fp < compat_ptr(regs->sp)) | 1695 | if (fp < compat_ptr(regs->sp)) |
1690 | break; | 1696 | break; |
1691 | 1697 | ||
1692 | callchain_store(entry, frame.return_address); | 1698 | perf_callchain_store(entry, frame.return_address); |
1693 | fp = compat_ptr(frame.next_frame); | 1699 | fp = compat_ptr(frame.next_frame); |
1694 | } | 1700 | } |
1695 | return 1; | 1701 | return 1; |
@@ -1702,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1702 | } | 1708 | } |
1703 | #endif | 1709 | #endif |
1704 | 1710 | ||
1705 | static void | 1711 | void |
1706 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1712 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) |
1707 | { | 1713 | { |
1708 | struct stack_frame frame; | 1714 | struct stack_frame frame; |
1709 | const void __user *fp; | 1715 | const void __user *fp; |
1710 | 1716 | ||
1711 | if (!user_mode(regs)) | 1717 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { |
1712 | regs = task_pt_regs(current); | 1718 | /* TODO: We don't support guest os callchain now */ |
1719 | return; | ||
1720 | } | ||
1713 | 1721 | ||
1714 | fp = (void __user *)regs->bp; | 1722 | fp = (void __user *)regs->bp; |
1715 | 1723 | ||
1716 | callchain_store(entry, PERF_CONTEXT_USER); | 1724 | perf_callchain_store(entry, regs->ip); |
1717 | callchain_store(entry, regs->ip); | ||
1718 | 1725 | ||
1719 | if (perf_callchain_user32(regs, entry)) | 1726 | if (perf_callchain_user32(regs, entry)) |
1720 | return; | 1727 | return; |
@@ -1731,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1731 | if ((unsigned long)fp < regs->sp) | 1738 | if ((unsigned long)fp < regs->sp) |
1732 | break; | 1739 | break; |
1733 | 1740 | ||
1734 | callchain_store(entry, frame.return_address); | 1741 | perf_callchain_store(entry, frame.return_address); |
1735 | fp = frame.next_frame; | 1742 | fp = frame.next_frame; |
1736 | } | 1743 | } |
1737 | } | 1744 | } |
1738 | 1745 | ||
1739 | static void | ||
1740 | perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1741 | { | ||
1742 | int is_user; | ||
1743 | |||
1744 | if (!regs) | ||
1745 | return; | ||
1746 | |||
1747 | is_user = user_mode(regs); | ||
1748 | |||
1749 | if (is_user && current->state != TASK_RUNNING) | ||
1750 | return; | ||
1751 | |||
1752 | if (!is_user) | ||
1753 | perf_callchain_kernel(regs, entry); | ||
1754 | |||
1755 | if (current->mm) | ||
1756 | perf_callchain_user(regs, entry); | ||
1757 | } | ||
1758 | |||
1759 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1760 | { | ||
1761 | struct perf_callchain_entry *entry; | ||
1762 | |||
1763 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
1764 | /* TODO: We don't support guest os callchain now */ | ||
1765 | return NULL; | ||
1766 | } | ||
1767 | |||
1768 | if (in_nmi()) | ||
1769 | entry = &__get_cpu_var(pmc_nmi_entry); | ||
1770 | else | ||
1771 | entry = &__get_cpu_var(pmc_irq_entry); | ||
1772 | |||
1773 | entry->nr = 0; | ||
1774 | |||
1775 | perf_do_callchain(regs, entry); | ||
1776 | |||
1777 | return entry; | ||
1778 | } | ||
1779 | |||
1780 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1746 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
1781 | { | 1747 | { |
1782 | unsigned long ip; | 1748 | unsigned long ip; |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index c2897b7b4a3b..46d58448c3af 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids | |||
52 | [ C(DTLB) ] = { | 52 | [ C(DTLB) ] = { |
53 | [ C(OP_READ) ] = { | 53 | [ C(OP_READ) ] = { |
54 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | 54 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
55 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | 55 | [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ |
56 | }, | 56 | }, |
57 | [ C(OP_WRITE) ] = { | 57 | [ C(OP_WRITE) ] = { |
58 | [ C(RESULT_ACCESS) ] = 0, | 58 | [ C(RESULT_ACCESS) ] = 0, |
@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids | |||
66 | [ C(ITLB) ] = { | 66 | [ C(ITLB) ] = { |
67 | [ C(OP_READ) ] = { | 67 | [ C(OP_READ) ] = { |
68 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | 68 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ |
69 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | 69 | [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ |
70 | }, | 70 | }, |
71 | [ C(OP_WRITE) ] = { | 71 | [ C(OP_WRITE) ] = { |
72 | [ C(RESULT_ACCESS) ] = -1, | 72 | [ C(RESULT_ACCESS) ] = -1, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ee05c90012d2..c8f5c088cad1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
713 | struct cpu_hw_events *cpuc; | 713 | struct cpu_hw_events *cpuc; |
714 | int bit, loops; | 714 | int bit, loops; |
715 | u64 status; | 715 | u64 status; |
716 | int handled = 0; | 716 | int handled; |
717 | 717 | ||
718 | perf_sample_data_init(&data, 0); | 718 | perf_sample_data_init(&data, 0); |
719 | 719 | ||
720 | cpuc = &__get_cpu_var(cpu_hw_events); | 720 | cpuc = &__get_cpu_var(cpu_hw_events); |
721 | 721 | ||
722 | intel_pmu_disable_all(); | 722 | intel_pmu_disable_all(); |
723 | intel_pmu_drain_bts_buffer(); | 723 | handled = intel_pmu_drain_bts_buffer(); |
724 | status = intel_pmu_get_status(); | 724 | status = intel_pmu_get_status(); |
725 | if (!status) { | 725 | if (!status) { |
726 | intel_pmu_enable_all(0); | 726 | intel_pmu_enable_all(0); |
727 | return 0; | 727 | return handled; |
728 | } | 728 | } |
729 | 729 | ||
730 | loops = 0; | 730 | loops = 0; |
@@ -763,7 +763,7 @@ again: | |||
763 | data.period = event->hw.last_period; | 763 | data.period = event->hw.last_period; |
764 | 764 | ||
765 | if (perf_event_overflow(event, 1, &data, regs)) | 765 | if (perf_event_overflow(event, 1, &data, regs)) |
766 | x86_pmu_stop(event); | 766 | x86_pmu_stop(event, 0); |
767 | } | 767 | } |
768 | 768 | ||
769 | /* | 769 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 18018d1311cd..4977f9c400e5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void) | |||
214 | update_debugctlmsr(debugctlmsr); | 214 | update_debugctlmsr(debugctlmsr); |
215 | } | 215 | } |
216 | 216 | ||
217 | static void intel_pmu_drain_bts_buffer(void) | 217 | static int intel_pmu_drain_bts_buffer(void) |
218 | { | 218 | { |
219 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 219 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
220 | struct debug_store *ds = cpuc->ds; | 220 | struct debug_store *ds = cpuc->ds; |
@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void) | |||
231 | struct pt_regs regs; | 231 | struct pt_regs regs; |
232 | 232 | ||
233 | if (!event) | 233 | if (!event) |
234 | return; | 234 | return 0; |
235 | 235 | ||
236 | if (!ds) | 236 | if (!ds) |
237 | return; | 237 | return 0; |
238 | 238 | ||
239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | 239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; |
240 | top = (struct bts_record *)(unsigned long)ds->bts_index; | 240 | top = (struct bts_record *)(unsigned long)ds->bts_index; |
241 | 241 | ||
242 | if (top <= at) | 242 | if (top <= at) |
243 | return; | 243 | return 0; |
244 | 244 | ||
245 | ds->bts_index = ds->bts_buffer_base; | 245 | ds->bts_index = ds->bts_buffer_base; |
246 | 246 | ||
@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void) | |||
256 | perf_prepare_sample(&header, &data, event, ®s); | 256 | perf_prepare_sample(&header, &data, event, ®s); |
257 | 257 | ||
258 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | 258 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) |
259 | return; | 259 | return 1; |
260 | 260 | ||
261 | for (; at < top; at++) { | 261 | for (; at < top; at++) { |
262 | data.ip = at->from; | 262 | data.ip = at->from; |
@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void) | |||
270 | /* There's new data available. */ | 270 | /* There's new data available. */ |
271 | event->hw.interrupts++; | 271 | event->hw.interrupts++; |
272 | event->pending_kill = POLL_IN; | 272 | event->pending_kill = POLL_IN; |
273 | return 1; | ||
273 | } | 274 | } |
274 | 275 | ||
275 | /* | 276 | /* |
@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
491 | regs.flags &= ~PERF_EFLAGS_EXACT; | 492 | regs.flags &= ~PERF_EFLAGS_EXACT; |
492 | 493 | ||
493 | if (perf_event_overflow(event, 1, &data, ®s)) | 494 | if (perf_event_overflow(event, 1, &data, ®s)) |
494 | x86_pmu_stop(event); | 495 | x86_pmu_stop(event, 0); |
495 | } | 496 | } |
496 | 497 | ||
497 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | 498 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 249015173992..81400b93e694 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -18,6 +18,8 @@ | |||
18 | struct p4_event_bind { | 18 | struct p4_event_bind { |
19 | unsigned int opcode; /* Event code and ESCR selector */ | 19 | unsigned int opcode; /* Event code and ESCR selector */ |
20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | 20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ |
21 | unsigned int escr_emask; /* valid ESCR EventMask bits */ | ||
22 | unsigned int shared; /* event is shared across threads */ | ||
21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | 23 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ |
22 | }; | 24 | }; |
23 | 25 | ||
@@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = { | |||
66 | [P4_EVENT_TC_DELIVER_MODE] = { | 68 | [P4_EVENT_TC_DELIVER_MODE] = { |
67 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), | 69 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), |
68 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | 70 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, |
71 | .escr_emask = | ||
72 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) | | ||
73 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) | | ||
74 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) | | ||
75 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) | | ||
76 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) | | ||
77 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) | | ||
78 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID), | ||
79 | .shared = 1, | ||
69 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 80 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
70 | }, | 81 | }, |
71 | [P4_EVENT_BPU_FETCH_REQUEST] = { | 82 | [P4_EVENT_BPU_FETCH_REQUEST] = { |
72 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), | 83 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), |
73 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, | 84 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, |
85 | .escr_emask = | ||
86 | P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS), | ||
74 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 87 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
75 | }, | 88 | }, |
76 | [P4_EVENT_ITLB_REFERENCE] = { | 89 | [P4_EVENT_ITLB_REFERENCE] = { |
77 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), | 90 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), |
78 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | 91 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, |
92 | .escr_emask = | ||
93 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) | | ||
94 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) | | ||
95 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK), | ||
79 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 96 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
80 | }, | 97 | }, |
81 | [P4_EVENT_MEMORY_CANCEL] = { | 98 | [P4_EVENT_MEMORY_CANCEL] = { |
82 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), | 99 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), |
83 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | 100 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, |
101 | .escr_emask = | ||
102 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) | | ||
103 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF), | ||
84 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 104 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
85 | }, | 105 | }, |
86 | [P4_EVENT_MEMORY_COMPLETE] = { | 106 | [P4_EVENT_MEMORY_COMPLETE] = { |
87 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), | 107 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), |
88 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | 108 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, |
109 | .escr_emask = | ||
110 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) | | ||
111 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC), | ||
89 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 112 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
90 | }, | 113 | }, |
91 | [P4_EVENT_LOAD_PORT_REPLAY] = { | 114 | [P4_EVENT_LOAD_PORT_REPLAY] = { |
92 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), | 115 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), |
93 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, | 116 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, |
117 | .escr_emask = | ||
118 | P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD), | ||
94 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 119 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
95 | }, | 120 | }, |
96 | [P4_EVENT_STORE_PORT_REPLAY] = { | 121 | [P4_EVENT_STORE_PORT_REPLAY] = { |
97 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), | 122 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), |
98 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | 123 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, |
124 | .escr_emask = | ||
125 | P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST), | ||
99 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 126 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
100 | }, | 127 | }, |
101 | [P4_EVENT_MOB_LOAD_REPLAY] = { | 128 | [P4_EVENT_MOB_LOAD_REPLAY] = { |
102 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), | 129 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), |
103 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, | 130 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, |
131 | .escr_emask = | ||
132 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) | | ||
133 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) | | ||
134 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) | | ||
135 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR), | ||
104 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 136 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
105 | }, | 137 | }, |
106 | [P4_EVENT_PAGE_WALK_TYPE] = { | 138 | [P4_EVENT_PAGE_WALK_TYPE] = { |
107 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), | 139 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), |
108 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, | 140 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, |
141 | .escr_emask = | ||
142 | P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) | | ||
143 | P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS), | ||
144 | .shared = 1, | ||
109 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 145 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
110 | }, | 146 | }, |
111 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { | 147 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { |
112 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), | 148 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), |
113 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | 149 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, |
150 | .escr_emask = | ||
151 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | ||
152 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | ||
153 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | ||
154 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | ||
155 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | ||
156 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) | | ||
157 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | ||
158 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | ||
159 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), | ||
114 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 160 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
115 | }, | 161 | }, |
116 | [P4_EVENT_IOQ_ALLOCATION] = { | 162 | [P4_EVENT_IOQ_ALLOCATION] = { |
117 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), | 163 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), |
118 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 164 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
165 | .escr_emask = | ||
166 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) | | ||
167 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) | | ||
168 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) | | ||
169 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) | | ||
170 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) | | ||
171 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) | | ||
172 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) | | ||
173 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) | | ||
174 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) | | ||
175 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) | | ||
176 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH), | ||
119 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 177 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
120 | }, | 178 | }, |
121 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | 179 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ |
122 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), | 180 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), |
123 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, | 181 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, |
182 | .escr_emask = | ||
183 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) | | ||
184 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) | | ||
185 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) | | ||
186 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) | | ||
187 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) | | ||
188 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) | | ||
189 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) | | ||
190 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) | | ||
191 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) | | ||
192 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) | | ||
193 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH), | ||
124 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | 194 | .cntr = { {2, -1, -1}, {3, -1, -1} }, |
125 | }, | 195 | }, |
126 | [P4_EVENT_FSB_DATA_ACTIVITY] = { | 196 | [P4_EVENT_FSB_DATA_ACTIVITY] = { |
127 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), | 197 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), |
128 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 198 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
199 | .escr_emask = | ||
200 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | ||
201 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) | | ||
202 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) | | ||
203 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) | | ||
204 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) | | ||
205 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER), | ||
206 | .shared = 1, | ||
129 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 207 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
130 | }, | 208 | }, |
131 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ | 209 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ |
132 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), | 210 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), |
133 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, | 211 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, |
212 | .escr_emask = | ||
213 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) | | ||
214 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) | | ||
215 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) | | ||
216 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) | | ||
217 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) | | ||
218 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) | | ||
219 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) | | ||
220 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) | | ||
221 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) | | ||
222 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) | | ||
223 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) | | ||
224 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) | | ||
225 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2), | ||
134 | .cntr = { {0, -1, -1}, {1, -1, -1} }, | 226 | .cntr = { {0, -1, -1}, {1, -1, -1} }, |
135 | }, | 227 | }, |
136 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | 228 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ |
137 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), | 229 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), |
138 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, | 230 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, |
231 | .escr_emask = | ||
232 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) | | ||
233 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) | | ||
234 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) | | ||
235 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) | | ||
236 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) | | ||
237 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) | | ||
238 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) | | ||
239 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) | | ||
240 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) | | ||
241 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) | | ||
242 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) | | ||
243 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) | | ||
244 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2), | ||
139 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | 245 | .cntr = { {2, -1, -1}, {3, -1, -1} }, |
140 | }, | 246 | }, |
141 | [P4_EVENT_SSE_INPUT_ASSIST] = { | 247 | [P4_EVENT_SSE_INPUT_ASSIST] = { |
142 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), | 248 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), |
143 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 249 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
250 | .escr_emask = | ||
251 | P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL), | ||
252 | .shared = 1, | ||
144 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 253 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
145 | }, | 254 | }, |
146 | [P4_EVENT_PACKED_SP_UOP] = { | 255 | [P4_EVENT_PACKED_SP_UOP] = { |
147 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), | 256 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), |
148 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 257 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
258 | .escr_emask = | ||
259 | P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL), | ||
260 | .shared = 1, | ||
149 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 261 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
150 | }, | 262 | }, |
151 | [P4_EVENT_PACKED_DP_UOP] = { | 263 | [P4_EVENT_PACKED_DP_UOP] = { |
152 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), | 264 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), |
153 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 265 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
266 | .escr_emask = | ||
267 | P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL), | ||
268 | .shared = 1, | ||
154 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 269 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
155 | }, | 270 | }, |
156 | [P4_EVENT_SCALAR_SP_UOP] = { | 271 | [P4_EVENT_SCALAR_SP_UOP] = { |
157 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), | 272 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), |
158 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 273 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
274 | .escr_emask = | ||
275 | P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL), | ||
276 | .shared = 1, | ||
159 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 277 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
160 | }, | 278 | }, |
161 | [P4_EVENT_SCALAR_DP_UOP] = { | 279 | [P4_EVENT_SCALAR_DP_UOP] = { |
162 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), | 280 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), |
163 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 281 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
282 | .escr_emask = | ||
283 | P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL), | ||
284 | .shared = 1, | ||
164 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 285 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
165 | }, | 286 | }, |
166 | [P4_EVENT_64BIT_MMX_UOP] = { | 287 | [P4_EVENT_64BIT_MMX_UOP] = { |
167 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), | 288 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), |
168 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 289 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
290 | .escr_emask = | ||
291 | P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL), | ||
292 | .shared = 1, | ||
169 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 293 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
170 | }, | 294 | }, |
171 | [P4_EVENT_128BIT_MMX_UOP] = { | 295 | [P4_EVENT_128BIT_MMX_UOP] = { |
172 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), | 296 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), |
173 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 297 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
298 | .escr_emask = | ||
299 | P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL), | ||
300 | .shared = 1, | ||
174 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 301 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
175 | }, | 302 | }, |
176 | [P4_EVENT_X87_FP_UOP] = { | 303 | [P4_EVENT_X87_FP_UOP] = { |
177 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), | 304 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), |
178 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 305 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
306 | .escr_emask = | ||
307 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL), | ||
308 | .shared = 1, | ||
179 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 309 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
180 | }, | 310 | }, |
181 | [P4_EVENT_TC_MISC] = { | 311 | [P4_EVENT_TC_MISC] = { |
182 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), | 312 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), |
183 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | 313 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, |
314 | .escr_emask = | ||
315 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH), | ||
184 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 316 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
185 | }, | 317 | }, |
186 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { | 318 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { |
187 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), | 319 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), |
188 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 320 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
321 | .escr_emask = | ||
322 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING), | ||
189 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 323 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
190 | }, | 324 | }, |
191 | [P4_EVENT_TC_MS_XFER] = { | 325 | [P4_EVENT_TC_MS_XFER] = { |
192 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), | 326 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), |
193 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | 327 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, |
328 | .escr_emask = | ||
329 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC), | ||
194 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 330 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
195 | }, | 331 | }, |
196 | [P4_EVENT_UOP_QUEUE_WRITES] = { | 332 | [P4_EVENT_UOP_QUEUE_WRITES] = { |
197 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), | 333 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), |
198 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | 334 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, |
335 | .escr_emask = | ||
336 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) | | ||
337 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) | | ||
338 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM), | ||
199 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 339 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
200 | }, | 340 | }, |
201 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { | 341 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { |
202 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), | 342 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), |
203 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, | 343 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, |
344 | .escr_emask = | ||
345 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) | | ||
346 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) | | ||
347 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) | | ||
348 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT), | ||
204 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 349 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
205 | }, | 350 | }, |
206 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { | 351 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { |
207 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), | 352 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), |
208 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, | 353 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, |
354 | .escr_emask = | ||
355 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | ||
356 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | ||
357 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | ||
358 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT), | ||
209 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 359 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
210 | }, | 360 | }, |
211 | [P4_EVENT_RESOURCE_STALL] = { | 361 | [P4_EVENT_RESOURCE_STALL] = { |
212 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), | 362 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), |
213 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, | 363 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, |
364 | .escr_emask = | ||
365 | P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL), | ||
214 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 366 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
215 | }, | 367 | }, |
216 | [P4_EVENT_WC_BUFFER] = { | 368 | [P4_EVENT_WC_BUFFER] = { |
217 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), | 369 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), |
218 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | 370 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, |
371 | .escr_emask = | ||
372 | P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) | | ||
373 | P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS), | ||
374 | .shared = 1, | ||
219 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 375 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
220 | }, | 376 | }, |
221 | [P4_EVENT_B2B_CYCLES] = { | 377 | [P4_EVENT_B2B_CYCLES] = { |
222 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), | 378 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), |
223 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 379 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
380 | .escr_emask = 0, | ||
224 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 381 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
225 | }, | 382 | }, |
226 | [P4_EVENT_BNR] = { | 383 | [P4_EVENT_BNR] = { |
227 | .opcode = P4_OPCODE(P4_EVENT_BNR), | 384 | .opcode = P4_OPCODE(P4_EVENT_BNR), |
228 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 385 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
386 | .escr_emask = 0, | ||
229 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 387 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
230 | }, | 388 | }, |
231 | [P4_EVENT_SNOOP] = { | 389 | [P4_EVENT_SNOOP] = { |
232 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), | 390 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), |
233 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 391 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
392 | .escr_emask = 0, | ||
234 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 393 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
235 | }, | 394 | }, |
236 | [P4_EVENT_RESPONSE] = { | 395 | [P4_EVENT_RESPONSE] = { |
237 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), | 396 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), |
238 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 397 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
398 | .escr_emask = 0, | ||
239 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 399 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
240 | }, | 400 | }, |
241 | [P4_EVENT_FRONT_END_EVENT] = { | 401 | [P4_EVENT_FRONT_END_EVENT] = { |
242 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), | 402 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), |
243 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 403 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
404 | .escr_emask = | ||
405 | P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) | | ||
406 | P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS), | ||
244 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 407 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
245 | }, | 408 | }, |
246 | [P4_EVENT_EXECUTION_EVENT] = { | 409 | [P4_EVENT_EXECUTION_EVENT] = { |
247 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), | 410 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), |
248 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 411 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
412 | .escr_emask = | ||
413 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) | | ||
414 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) | | ||
415 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) | | ||
416 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) | | ||
417 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | | ||
418 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | | ||
419 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | | ||
420 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3), | ||
249 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 421 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
250 | }, | 422 | }, |
251 | [P4_EVENT_REPLAY_EVENT] = { | 423 | [P4_EVENT_REPLAY_EVENT] = { |
252 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), | 424 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), |
253 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 425 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
426 | .escr_emask = | ||
427 | P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) | | ||
428 | P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS), | ||
254 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 429 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
255 | }, | 430 | }, |
256 | [P4_EVENT_INSTR_RETIRED] = { | 431 | [P4_EVENT_INSTR_RETIRED] = { |
257 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), | 432 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), |
258 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 433 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
434 | .escr_emask = | ||
435 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | ||
436 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) | | ||
437 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) | | ||
438 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG), | ||
259 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 439 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
260 | }, | 440 | }, |
261 | [P4_EVENT_UOPS_RETIRED] = { | 441 | [P4_EVENT_UOPS_RETIRED] = { |
262 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), | 442 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), |
263 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 443 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
444 | .escr_emask = | ||
445 | P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) | | ||
446 | P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS), | ||
264 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 447 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
265 | }, | 448 | }, |
266 | [P4_EVENT_UOP_TYPE] = { | 449 | [P4_EVENT_UOP_TYPE] = { |
267 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), | 450 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), |
268 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | 451 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, |
452 | .escr_emask = | ||
453 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) | | ||
454 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES), | ||
269 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 455 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
270 | }, | 456 | }, |
271 | [P4_EVENT_BRANCH_RETIRED] = { | 457 | [P4_EVENT_BRANCH_RETIRED] = { |
272 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), | 458 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), |
273 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 459 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
460 | .escr_emask = | ||
461 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) | | ||
462 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) | | ||
463 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) | | ||
464 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM), | ||
274 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 465 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
275 | }, | 466 | }, |
276 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { | 467 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { |
277 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | 468 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), |
278 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 469 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
470 | .escr_emask = | ||
471 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), | ||
279 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 472 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
280 | }, | 473 | }, |
281 | [P4_EVENT_X87_ASSIST] = { | 474 | [P4_EVENT_X87_ASSIST] = { |
282 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), | 475 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), |
283 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 476 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
477 | .escr_emask = | ||
478 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) | | ||
479 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) | | ||
480 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) | | ||
481 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) | | ||
482 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA), | ||
284 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 483 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
285 | }, | 484 | }, |
286 | [P4_EVENT_MACHINE_CLEAR] = { | 485 | [P4_EVENT_MACHINE_CLEAR] = { |
287 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), | 486 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), |
288 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 487 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
488 | .escr_emask = | ||
489 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) | | ||
490 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) | | ||
491 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR), | ||
289 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 492 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
290 | }, | 493 | }, |
291 | [P4_EVENT_INSTR_COMPLETED] = { | 494 | [P4_EVENT_INSTR_COMPLETED] = { |
292 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), | 495 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), |
293 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 496 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
497 | .escr_emask = | ||
498 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) | | ||
499 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS), | ||
294 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 500 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
295 | }, | 501 | }, |
296 | }; | 502 | }; |
@@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event) | |||
428 | return config; | 634 | return config; |
429 | } | 635 | } |
430 | 636 | ||
637 | /* check cpu model specifics */ | ||
638 | static bool p4_event_match_cpu_model(unsigned int event_idx) | ||
639 | { | ||
640 | /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ | ||
641 | if (event_idx == P4_EVENT_INSTR_COMPLETED) { | ||
642 | if (boot_cpu_data.x86_model != 3 && | ||
643 | boot_cpu_data.x86_model != 4 && | ||
644 | boot_cpu_data.x86_model != 6) | ||
645 | return false; | ||
646 | } | ||
647 | |||
648 | /* | ||
649 | * For info | ||
650 | * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2 | ||
651 | */ | ||
652 | |||
653 | return true; | ||
654 | } | ||
655 | |||
431 | static int p4_validate_raw_event(struct perf_event *event) | 656 | static int p4_validate_raw_event(struct perf_event *event) |
432 | { | 657 | { |
433 | unsigned int v; | 658 | unsigned int v, emask; |
434 | 659 | ||
435 | /* user data may have out-of-bound event index */ | 660 | /* User data may have out-of-bound event index */ |
436 | v = p4_config_unpack_event(event->attr.config); | 661 | v = p4_config_unpack_event(event->attr.config); |
437 | if (v >= ARRAY_SIZE(p4_event_bind_map)) { | 662 | if (v >= ARRAY_SIZE(p4_event_bind_map)) |
438 | pr_warning("P4 PMU: Unknown event code: %d\n", v); | 663 | return -EINVAL; |
664 | |||
665 | /* It may be unsupported: */ | ||
666 | if (!p4_event_match_cpu_model(v)) | ||
439 | return -EINVAL; | 667 | return -EINVAL; |
668 | |||
669 | /* | ||
670 | * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as | ||
671 | * in Architectural Performance Monitoring, it means not | ||
672 | * on _which_ logical cpu to count but rather _when_, ie it | ||
673 | * depends on logical cpu state -- count event if one cpu active, | ||
674 | * none, both or any, so we just allow user to pass any value | ||
675 | * desired. | ||
676 | * | ||
677 | * In turn we always set Tx_OS/Tx_USR bits bound to logical | ||
678 | * cpu without their propagation to another cpu | ||
679 | */ | ||
680 | |||
681 | /* | ||
682 | * if an event is shared accross the logical threads | ||
683 | * the user needs special permissions to be able to use it | ||
684 | */ | ||
685 | if (p4_event_bind_map[v].shared) { | ||
686 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
687 | return -EACCES; | ||
440 | } | 688 | } |
441 | 689 | ||
690 | /* ESCR EventMask bits may be invalid */ | ||
691 | emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK; | ||
692 | if (emask & ~p4_event_bind_map[v].escr_emask) | ||
693 | return -EINVAL; | ||
694 | |||
442 | /* | 695 | /* |
443 | * it may have some screwed PEBS bits | 696 | * it may have some invalid PEBS bits |
444 | */ | 697 | */ |
445 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { | 698 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) |
446 | pr_warning("P4 PMU: PEBS are not supported yet\n"); | ||
447 | return -EINVAL; | 699 | return -EINVAL; |
448 | } | 700 | |
449 | v = p4_config_unpack_metric(event->attr.config); | 701 | v = p4_config_unpack_metric(event->attr.config); |
450 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { | 702 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) |
451 | pr_warning("P4 PMU: Unknown metric code: %d\n", v); | ||
452 | return -EINVAL; | 703 | return -EINVAL; |
453 | } | ||
454 | 704 | ||
455 | return 0; | 705 | return 0; |
456 | } | 706 | } |
@@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event) | |||
478 | 728 | ||
479 | if (event->attr.type == PERF_TYPE_RAW) { | 729 | if (event->attr.type == PERF_TYPE_RAW) { |
480 | 730 | ||
731 | /* | ||
732 | * Clear bits we reserve to be managed by kernel itself | ||
733 | * and never allowed from a user space | ||
734 | */ | ||
735 | event->attr.config &= P4_CONFIG_MASK; | ||
736 | |||
481 | rc = p4_validate_raw_event(event); | 737 | rc = p4_validate_raw_event(event); |
482 | if (rc) | 738 | if (rc) |
483 | goto out; | 739 | goto out; |
484 | 740 | ||
485 | /* | 741 | /* |
486 | * We don't control raw events so it's up to the caller | ||
487 | * to pass sane values (and we don't count the thread number | ||
488 | * on HT machine but allow HT-compatible specifics to be | ||
489 | * passed on) | ||
490 | * | ||
491 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED | 742 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED |
492 | * bits since we keep additional info here (for cache events and etc) | 743 | * bits since we keep additional info here (for cache events and etc) |
493 | * | ||
494 | * XXX: HT wide things should check perf_paranoid_cpu() && | ||
495 | * CAP_SYS_ADMIN | ||
496 | */ | 744 | */ |
497 | event->hw.config |= event->attr.config & | 745 | event->hw.config |= event->attr.config; |
498 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | ||
499 | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); | ||
500 | |||
501 | event->hw.config &= ~P4_CCCR_FORCE_OVF; | ||
502 | } | 746 | } |
503 | 747 | ||
504 | rc = x86_setup_perfctr(event); | 748 | rc = x86_setup_perfctr(event); |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 17be5ec7cbba..c375c79065f8 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \ | |||
1023 | apicinterrupt SPURIOUS_APIC_VECTOR \ | 1023 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
1024 | spurious_interrupt smp_spurious_interrupt | 1024 | spurious_interrupt smp_spurious_interrupt |
1025 | 1025 | ||
1026 | #ifdef CONFIG_PERF_EVENTS | 1026 | #ifdef CONFIG_IRQ_WORK |
1027 | apicinterrupt LOCAL_PENDING_VECTOR \ | 1027 | apicinterrupt IRQ_WORK_VECTOR \ |
1028 | perf_pending_interrupt smp_perf_pending_interrupt | 1028 | irq_work_interrupt smp_irq_work_interrupt |
1029 | #endif | 1029 | #endif |
1030 | 1030 | ||
1031 | /* | 1031 | /* |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index cd37469b54ee..3afb33f14d2d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) | |||
257 | return mod_code_status; | 257 | return mod_code_status; |
258 | } | 258 | } |
259 | 259 | ||
260 | |||
261 | |||
262 | |||
263 | static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; | ||
264 | |||
265 | static unsigned char *ftrace_nop_replace(void) | 260 | static unsigned char *ftrace_nop_replace(void) |
266 | { | 261 | { |
267 | return ftrace_nop; | 262 | return ideal_nop5; |
268 | } | 263 | } |
269 | 264 | ||
270 | static int | 265 | static int |
@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) | |||
338 | 333 | ||
339 | int __init ftrace_dyn_arch_init(void *data) | 334 | int __init ftrace_dyn_arch_init(void *data) |
340 | { | 335 | { |
341 | extern const unsigned char ftrace_test_p6nop[]; | ||
342 | extern const unsigned char ftrace_test_nop5[]; | ||
343 | extern const unsigned char ftrace_test_jmp[]; | ||
344 | int faulted = 0; | ||
345 | |||
346 | /* | ||
347 | * There is no good nop for all x86 archs. | ||
348 | * We will default to using the P6_NOP5, but first we | ||
349 | * will test to make sure that the nop will actually | ||
350 | * work on this CPU. If it faults, we will then | ||
351 | * go to a lesser efficient 5 byte nop. If that fails | ||
352 | * we then just use a jmp as our nop. This isn't the most | ||
353 | * efficient nop, but we can not use a multi part nop | ||
354 | * since we would then risk being preempted in the middle | ||
355 | * of that nop, and if we enabled tracing then, it might | ||
356 | * cause a system crash. | ||
357 | * | ||
358 | * TODO: check the cpuid to determine the best nop. | ||
359 | */ | ||
360 | asm volatile ( | ||
361 | "ftrace_test_jmp:" | ||
362 | "jmp ftrace_test_p6nop\n" | ||
363 | "nop\n" | ||
364 | "nop\n" | ||
365 | "nop\n" /* 2 byte jmp + 3 bytes */ | ||
366 | "ftrace_test_p6nop:" | ||
367 | P6_NOP5 | ||
368 | "jmp 1f\n" | ||
369 | "ftrace_test_nop5:" | ||
370 | ".byte 0x66,0x66,0x66,0x66,0x90\n" | ||
371 | "1:" | ||
372 | ".section .fixup, \"ax\"\n" | ||
373 | "2: movl $1, %0\n" | ||
374 | " jmp ftrace_test_nop5\n" | ||
375 | "3: movl $2, %0\n" | ||
376 | " jmp 1b\n" | ||
377 | ".previous\n" | ||
378 | _ASM_EXTABLE(ftrace_test_p6nop, 2b) | ||
379 | _ASM_EXTABLE(ftrace_test_nop5, 3b) | ||
380 | : "=r"(faulted) : "0" (faulted)); | ||
381 | |||
382 | switch (faulted) { | ||
383 | case 0: | ||
384 | pr_info("converting mcount calls to 0f 1f 44 00 00\n"); | ||
385 | memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); | ||
386 | break; | ||
387 | case 1: | ||
388 | pr_info("converting mcount calls to 66 66 66 66 90\n"); | ||
389 | memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); | ||
390 | break; | ||
391 | case 2: | ||
392 | pr_info("converting mcount calls to jmp . + 5\n"); | ||
393 | memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); | ||
394 | break; | ||
395 | } | ||
396 | |||
397 | /* The return code is retured via data */ | 336 | /* The return code is retured via data */ |
398 | *(unsigned long *)data = 0; | 337 | *(unsigned long *)data = 0; |
399 | 338 | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 91fd0c70a18a..44edb03fc9ec 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
67 | for_each_online_cpu(j) | 67 | for_each_online_cpu(j) |
68 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); | 68 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); |
69 | seq_printf(p, " Performance monitoring interrupts\n"); | 69 | seq_printf(p, " Performance monitoring interrupts\n"); |
70 | seq_printf(p, "%*s: ", prec, "PND"); | 70 | seq_printf(p, "%*s: ", prec, "IWI"); |
71 | for_each_online_cpu(j) | 71 | for_each_online_cpu(j) |
72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); | 72 | seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); |
73 | seq_printf(p, " Performance pending work\n"); | 73 | seq_printf(p, " IRQ work interrupts\n"); |
74 | #endif | 74 | #endif |
75 | if (x86_platform_ipi_callback) { | 75 | if (x86_platform_ipi_callback) { |
76 | seq_printf(p, "%*s: ", prec, "PLT"); | 76 | seq_printf(p, "%*s: ", prec, "PLT"); |
@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
185 | sum += irq_stats(cpu)->apic_timer_irqs; | 185 | sum += irq_stats(cpu)->apic_timer_irqs; |
186 | sum += irq_stats(cpu)->irq_spurious_count; | 186 | sum += irq_stats(cpu)->irq_spurious_count; |
187 | sum += irq_stats(cpu)->apic_perf_irqs; | 187 | sum += irq_stats(cpu)->apic_perf_irqs; |
188 | sum += irq_stats(cpu)->apic_pending_irqs; | 188 | sum += irq_stats(cpu)->apic_irq_work_irqs; |
189 | #endif | 189 | #endif |
190 | if (x86_platform_ipi_callback) | 190 | if (x86_platform_ipi_callback) |
191 | sum += irq_stats(cpu)->x86_platform_ipis; | 191 | sum += irq_stats(cpu)->x86_platform_ipis; |
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c new file mode 100644 index 000000000000..ca8f703a1e70 --- /dev/null +++ b/arch/x86/kernel/irq_work.c | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * x86 specific code for irq_work | ||
3 | * | ||
4 | * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
5 | */ | ||
6 | |||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/irq_work.h> | ||
9 | #include <linux/hardirq.h> | ||
10 | #include <asm/apic.h> | ||
11 | |||
12 | void smp_irq_work_interrupt(struct pt_regs *regs) | ||
13 | { | ||
14 | irq_enter(); | ||
15 | ack_APIC_irq(); | ||
16 | inc_irq_stat(apic_irq_work_irqs); | ||
17 | irq_work_run(); | ||
18 | irq_exit(); | ||
19 | } | ||
20 | |||
21 | void arch_irq_work_raise(void) | ||
22 | { | ||
23 | #ifdef CONFIG_X86_LOCAL_APIC | ||
24 | if (!cpu_has_apic) | ||
25 | return; | ||
26 | |||
27 | apic->send_IPI_self(IRQ_WORK_VECTOR); | ||
28 | apic_wait_icr_idle(); | ||
29 | #endif | ||
30 | } | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 990ae7cfc578..713969b9266b 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -224,9 +224,9 @@ static void __init apic_intr_init(void) | |||
224 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 224 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
225 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 225 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
226 | 226 | ||
227 | /* Performance monitoring interrupts: */ | 227 | /* IRQ work interrupts: */ |
228 | # ifdef CONFIG_PERF_EVENTS | 228 | # ifdef CONFIG_IRQ_WORK |
229 | alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); | 229 | alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt); |
230 | # endif | 230 | # endif |
231 | 231 | ||
232 | #endif | 232 | #endif |
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c new file mode 100644 index 000000000000..961b6b30ba90 --- /dev/null +++ b/arch/x86/kernel/jump_label.c | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * jump label x86 support | ||
3 | * | ||
4 | * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> | ||
5 | * | ||
6 | */ | ||
7 | #include <linux/jump_label.h> | ||
8 | #include <linux/memory.h> | ||
9 | #include <linux/uaccess.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/list.h> | ||
12 | #include <linux/jhash.h> | ||
13 | #include <linux/cpu.h> | ||
14 | #include <asm/kprobes.h> | ||
15 | #include <asm/alternative.h> | ||
16 | |||
17 | #ifdef HAVE_JUMP_LABEL | ||
18 | |||
19 | union jump_code_union { | ||
20 | char code[JUMP_LABEL_NOP_SIZE]; | ||
21 | struct { | ||
22 | char jump; | ||
23 | int offset; | ||
24 | } __attribute__((packed)); | ||
25 | }; | ||
26 | |||
27 | void arch_jump_label_transform(struct jump_entry *entry, | ||
28 | enum jump_label_type type) | ||
29 | { | ||
30 | union jump_code_union code; | ||
31 | |||
32 | if (type == JUMP_LABEL_ENABLE) { | ||
33 | code.jump = 0xe9; | ||
34 | code.offset = entry->target - | ||
35 | (entry->code + JUMP_LABEL_NOP_SIZE); | ||
36 | } else | ||
37 | memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE); | ||
38 | get_online_cpus(); | ||
39 | mutex_lock(&text_mutex); | ||
40 | text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); | ||
41 | mutex_unlock(&text_mutex); | ||
42 | put_online_cpus(); | ||
43 | } | ||
44 | |||
45 | void arch_jump_label_text_poke_early(jump_label_t addr) | ||
46 | { | ||
47 | text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE); | ||
48 | } | ||
49 | |||
50 | #endif | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 770ebfb349e9..1cbd54c0df99 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | |||
230 | return 0; | 230 | return 0; |
231 | } | 231 | } |
232 | 232 | ||
233 | /* Dummy buffers for kallsyms_lookup */ | ||
234 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
235 | |||
236 | /* Check if paddr is at an instruction boundary */ | 233 | /* Check if paddr is at an instruction boundary */ |
237 | static int __kprobes can_probe(unsigned long paddr) | 234 | static int __kprobes can_probe(unsigned long paddr) |
238 | { | 235 | { |
@@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr) | |||
241 | struct insn insn; | 238 | struct insn insn; |
242 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 239 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
243 | 240 | ||
244 | if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) | 241 | if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) |
245 | return 0; | 242 | return 0; |
246 | 243 | ||
247 | /* Decode instructions */ | 244 | /* Decode instructions */ |
@@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, | |||
1129 | *(unsigned long *)addr = val; | 1126 | *(unsigned long *)addr = val; |
1130 | } | 1127 | } |
1131 | 1128 | ||
1132 | void __kprobes kprobes_optinsn_template_holder(void) | 1129 | static void __used __kprobes kprobes_optinsn_template_holder(void) |
1133 | { | 1130 | { |
1134 | asm volatile ( | 1131 | asm volatile ( |
1135 | ".global optprobe_template_entry\n" | 1132 | ".global optprobe_template_entry\n" |
@@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | |||
1221 | } | 1218 | } |
1222 | /* Check whether the address range is reserved */ | 1219 | /* Check whether the address range is reserved */ |
1223 | if (ftrace_text_reserved(src, src + len - 1) || | 1220 | if (ftrace_text_reserved(src, src + len - 1) || |
1224 | alternatives_text_reserved(src, src + len - 1)) | 1221 | alternatives_text_reserved(src, src + len - 1) || |
1222 | jump_label_text_reserved(src, src + len - 1)) | ||
1225 | return -EBUSY; | 1223 | return -EBUSY; |
1226 | 1224 | ||
1227 | return len; | 1225 | return len; |
@@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr) | |||
1269 | unsigned long addr, size = 0, offset = 0; | 1267 | unsigned long addr, size = 0, offset = 0; |
1270 | struct insn insn; | 1268 | struct insn insn; |
1271 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 1269 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
1272 | /* Dummy buffers for lookup_symbol_attrs */ | ||
1273 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
1274 | 1270 | ||
1275 | /* Lookup symbol including addr */ | 1271 | /* Lookup symbol including addr */ |
1276 | if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) | 1272 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) |
1277 | return 0; | 1273 | return 0; |
1278 | 1274 | ||
1279 | /* Check there is enough space for a relative jump. */ | 1275 | /* Check there is enough space for a relative jump. */ |
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 1c355c550960..8f2956091735 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
239 | apply_paravirt(pseg, pseg + para->sh_size); | 239 | apply_paravirt(pseg, pseg + para->sh_size); |
240 | } | 240 | } |
241 | 241 | ||
242 | /* make jump label nops */ | ||
243 | jump_label_apply_nops(me); | ||
244 | |||
242 | return 0; | 245 | return 0; |
243 | } | 246 | } |
244 | 247 | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 0f7f130caa67..6015ee13e22b 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -601,7 +601,7 @@ static void gart_fixup_northbridges(struct sys_device *dev) | |||
601 | * Don't enable translations just yet. That is the next | 601 | * Don't enable translations just yet. That is the next |
602 | * step. Restore the pre-suspend aperture settings. | 602 | * step. Restore the pre-suspend aperture settings. |
603 | */ | 603 | */ |
604 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); | 604 | gart_set_size_and_enable(dev, aperture_order); |
605 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); | 605 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); |
606 | } | 606 | } |
607 | } | 607 | } |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c3a4fbb2b996..00e167870f71 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -112,6 +112,7 @@ | |||
112 | #include <asm/numa_64.h> | 112 | #include <asm/numa_64.h> |
113 | #endif | 113 | #endif |
114 | #include <asm/mce.h> | 114 | #include <asm/mce.h> |
115 | #include <asm/alternative.h> | ||
115 | 116 | ||
116 | /* | 117 | /* |
117 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 118 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
@@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p) | |||
726 | { | 727 | { |
727 | int acpi = 0; | 728 | int acpi = 0; |
728 | int k8 = 0; | 729 | int k8 = 0; |
730 | unsigned long flags; | ||
729 | 731 | ||
730 | #ifdef CONFIG_X86_32 | 732 | #ifdef CONFIG_X86_32 |
731 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 733 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
@@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p) | |||
1071 | x86_init.oem.banner(); | 1073 | x86_init.oem.banner(); |
1072 | 1074 | ||
1073 | mcheck_init(); | 1075 | mcheck_init(); |
1076 | |||
1077 | local_irq_save(flags); | ||
1078 | arch_init_ideal_nop5(); | ||
1079 | local_irq_restore(flags); | ||
1074 | } | 1080 | } |
1075 | 1081 | ||
1076 | #ifdef CONFIG_X86_32 | 1082 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 81ed28cb36e6..8a3f9f64f86f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -3163,8 +3163,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3163 | sync_lapic_to_cr8(vcpu); | 3163 | sync_lapic_to_cr8(vcpu); |
3164 | 3164 | ||
3165 | save_host_msrs(vcpu); | 3165 | save_host_msrs(vcpu); |
3166 | fs_selector = kvm_read_fs(); | 3166 | savesegment(fs, fs_selector); |
3167 | gs_selector = kvm_read_gs(); | 3167 | savesegment(gs, gs_selector); |
3168 | ldt_selector = kvm_read_ldt(); | 3168 | ldt_selector = kvm_read_ldt(); |
3169 | svm->vmcb->save.cr2 = vcpu->arch.cr2; | 3169 | svm->vmcb->save.cr2 = vcpu->arch.cr2; |
3170 | /* required for live migration with NPT */ | 3170 | /* required for live migration with NPT */ |
@@ -3251,10 +3251,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3251 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | 3251 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; |
3252 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | 3252 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; |
3253 | 3253 | ||
3254 | kvm_load_fs(fs_selector); | ||
3255 | kvm_load_gs(gs_selector); | ||
3256 | kvm_load_ldt(ldt_selector); | ||
3257 | load_host_msrs(vcpu); | 3254 | load_host_msrs(vcpu); |
3255 | loadsegment(fs, fs_selector); | ||
3256 | #ifdef CONFIG_X86_64 | ||
3257 | load_gs_index(gs_selector); | ||
3258 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | ||
3259 | #else | ||
3260 | loadsegment(gs, gs_selector); | ||
3261 | #endif | ||
3262 | kvm_load_ldt(ldt_selector); | ||
3258 | 3263 | ||
3259 | reload_tss(vcpu); | 3264 | reload_tss(vcpu); |
3260 | 3265 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 49b25eee25ac..7bddfab12013 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -803,7 +803,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
803 | */ | 803 | */ |
804 | vmx->host_state.ldt_sel = kvm_read_ldt(); | 804 | vmx->host_state.ldt_sel = kvm_read_ldt(); |
805 | vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; | 805 | vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; |
806 | vmx->host_state.fs_sel = kvm_read_fs(); | 806 | savesegment(fs, vmx->host_state.fs_sel); |
807 | if (!(vmx->host_state.fs_sel & 7)) { | 807 | if (!(vmx->host_state.fs_sel & 7)) { |
808 | vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); | 808 | vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); |
809 | vmx->host_state.fs_reload_needed = 0; | 809 | vmx->host_state.fs_reload_needed = 0; |
@@ -811,7 +811,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
811 | vmcs_write16(HOST_FS_SELECTOR, 0); | 811 | vmcs_write16(HOST_FS_SELECTOR, 0); |
812 | vmx->host_state.fs_reload_needed = 1; | 812 | vmx->host_state.fs_reload_needed = 1; |
813 | } | 813 | } |
814 | vmx->host_state.gs_sel = kvm_read_gs(); | 814 | savesegment(gs, vmx->host_state.gs_sel); |
815 | if (!(vmx->host_state.gs_sel & 7)) | 815 | if (!(vmx->host_state.gs_sel & 7)) |
816 | vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); | 816 | vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); |
817 | else { | 817 | else { |
@@ -841,27 +841,21 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
841 | 841 | ||
842 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) | 842 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) |
843 | { | 843 | { |
844 | unsigned long flags; | ||
845 | |||
846 | if (!vmx->host_state.loaded) | 844 | if (!vmx->host_state.loaded) |
847 | return; | 845 | return; |
848 | 846 | ||
849 | ++vmx->vcpu.stat.host_state_reload; | 847 | ++vmx->vcpu.stat.host_state_reload; |
850 | vmx->host_state.loaded = 0; | 848 | vmx->host_state.loaded = 0; |
851 | if (vmx->host_state.fs_reload_needed) | 849 | if (vmx->host_state.fs_reload_needed) |
852 | kvm_load_fs(vmx->host_state.fs_sel); | 850 | loadsegment(fs, vmx->host_state.fs_sel); |
853 | if (vmx->host_state.gs_ldt_reload_needed) { | 851 | if (vmx->host_state.gs_ldt_reload_needed) { |
854 | kvm_load_ldt(vmx->host_state.ldt_sel); | 852 | kvm_load_ldt(vmx->host_state.ldt_sel); |
855 | /* | ||
856 | * If we have to reload gs, we must take care to | ||
857 | * preserve our gs base. | ||
858 | */ | ||
859 | local_irq_save(flags); | ||
860 | kvm_load_gs(vmx->host_state.gs_sel); | ||
861 | #ifdef CONFIG_X86_64 | 853 | #ifdef CONFIG_X86_64 |
862 | wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); | 854 | load_gs_index(vmx->host_state.gs_sel); |
855 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | ||
856 | #else | ||
857 | loadsegment(gs, vmx->host_state.gs_sel); | ||
863 | #endif | 858 | #endif |
864 | local_irq_restore(flags); | ||
865 | } | 859 | } |
866 | reload_tss(); | 860 | reload_tss(); |
867 | #ifdef CONFIG_X86_64 | 861 | #ifdef CONFIG_X86_64 |
@@ -2589,8 +2583,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2589 | vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ | 2583 | vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ |
2590 | vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | 2584 | vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ |
2591 | vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | 2585 | vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ |
2592 | vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */ | 2586 | vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ |
2593 | vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */ | 2587 | vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ |
2594 | vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | 2588 | vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ |
2595 | #ifdef CONFIG_X86_64 | 2589 | #ifdef CONFIG_X86_64 |
2596 | rdmsrl(MSR_FS_BASE, a); | 2590 | rdmsrl(MSR_FS_BASE, a); |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4c4508e8a204..a24c6cfdccc4 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -251,6 +251,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) | |||
251 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) | 251 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) |
252 | return -1; | 252 | return -1; |
253 | 253 | ||
254 | WARN_ON_ONCE(in_nmi()); | ||
255 | |||
254 | /* | 256 | /* |
255 | * Synchronize this task's top level page-table | 257 | * Synchronize this task's top level page-table |
256 | * with the 'reference' page table. | 258 | * with the 'reference' page table. |
@@ -369,6 +371,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) | |||
369 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) | 371 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) |
370 | return -1; | 372 | return -1; |
371 | 373 | ||
374 | WARN_ON_ONCE(in_nmi()); | ||
375 | |||
372 | /* | 376 | /* |
373 | * Copy kernel mappings over when needed. This can also | 377 | * Copy kernel mappings over when needed. This can also |
374 | * happen within a race in page table update. In the later | 378 | * happen within a race in page table update. In the later |
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index b3b531a4f8e5..d87dd6d042d6 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
@@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, | |||
631 | if (!pte) | 631 | if (!pte) |
632 | return false; | 632 | return false; |
633 | 633 | ||
634 | WARN_ON_ONCE(in_nmi()); | ||
635 | |||
634 | if (error_code & 2) | 636 | if (error_code & 2) |
635 | kmemcheck_access(regs, address, KMEMCHECK_WRITE); | 637 | kmemcheck_access(regs, address, KMEMCHECK_WRITE); |
636 | else | 638 | else |
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 3855096c59b8..2d49d4e19a36 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/ptrace.h> | 14 | #include <asm/ptrace.h> |
15 | #include <asm/uaccess.h> | 15 | #include <asm/uaccess.h> |
16 | #include <asm/stacktrace.h> | 16 | #include <asm/stacktrace.h> |
17 | #include <linux/compat.h> | ||
17 | 18 | ||
18 | static void backtrace_warning_symbol(void *data, char *msg, | 19 | static void backtrace_warning_symbol(void *data, char *msg, |
19 | unsigned long symbol) | 20 | unsigned long symbol) |
@@ -48,14 +49,12 @@ static struct stacktrace_ops backtrace_ops = { | |||
48 | .walk_stack = print_context_stack, | 49 | .walk_stack = print_context_stack, |
49 | }; | 50 | }; |
50 | 51 | ||
51 | struct frame_head { | 52 | #ifdef CONFIG_COMPAT |
52 | struct frame_head *bp; | 53 | static struct stack_frame_ia32 * |
53 | unsigned long ret; | 54 | dump_user_backtrace_32(struct stack_frame_ia32 *head) |
54 | } __attribute__((packed)); | ||
55 | |||
56 | static struct frame_head *dump_user_backtrace(struct frame_head *head) | ||
57 | { | 55 | { |
58 | struct frame_head bufhead[2]; | 56 | struct stack_frame_ia32 bufhead[2]; |
57 | struct stack_frame_ia32 *fp; | ||
59 | 58 | ||
60 | /* Also check accessibility of one struct frame_head beyond */ | 59 | /* Also check accessibility of one struct frame_head beyond */ |
61 | if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) | 60 | if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) |
@@ -63,20 +62,66 @@ static struct frame_head *dump_user_backtrace(struct frame_head *head) | |||
63 | if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) | 62 | if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) |
64 | return NULL; | 63 | return NULL; |
65 | 64 | ||
66 | oprofile_add_trace(bufhead[0].ret); | 65 | fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); |
66 | |||
67 | oprofile_add_trace(bufhead[0].return_address); | ||
68 | |||
69 | /* frame pointers should strictly progress back up the stack | ||
70 | * (towards higher addresses) */ | ||
71 | if (head >= fp) | ||
72 | return NULL; | ||
73 | |||
74 | return fp; | ||
75 | } | ||
76 | |||
77 | static inline int | ||
78 | x86_backtrace_32(struct pt_regs * const regs, unsigned int depth) | ||
79 | { | ||
80 | struct stack_frame_ia32 *head; | ||
81 | |||
82 | /* User process is 32-bit */ | ||
83 | if (!current || !test_thread_flag(TIF_IA32)) | ||
84 | return 0; | ||
85 | |||
86 | head = (struct stack_frame_ia32 *) regs->bp; | ||
87 | while (depth-- && head) | ||
88 | head = dump_user_backtrace_32(head); | ||
89 | |||
90 | return 1; | ||
91 | } | ||
92 | |||
93 | #else | ||
94 | static inline int | ||
95 | x86_backtrace_32(struct pt_regs * const regs, unsigned int depth) | ||
96 | { | ||
97 | return 0; | ||
98 | } | ||
99 | #endif /* CONFIG_COMPAT */ | ||
100 | |||
101 | static struct stack_frame *dump_user_backtrace(struct stack_frame *head) | ||
102 | { | ||
103 | struct stack_frame bufhead[2]; | ||
104 | |||
105 | /* Also check accessibility of one struct stack_frame beyond */ | ||
106 | if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) | ||
107 | return NULL; | ||
108 | if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) | ||
109 | return NULL; | ||
110 | |||
111 | oprofile_add_trace(bufhead[0].return_address); | ||
67 | 112 | ||
68 | /* frame pointers should strictly progress back up the stack | 113 | /* frame pointers should strictly progress back up the stack |
69 | * (towards higher addresses) */ | 114 | * (towards higher addresses) */ |
70 | if (head >= bufhead[0].bp) | 115 | if (head >= bufhead[0].next_frame) |
71 | return NULL; | 116 | return NULL; |
72 | 117 | ||
73 | return bufhead[0].bp; | 118 | return bufhead[0].next_frame; |
74 | } | 119 | } |
75 | 120 | ||
76 | void | 121 | void |
77 | x86_backtrace(struct pt_regs * const regs, unsigned int depth) | 122 | x86_backtrace(struct pt_regs * const regs, unsigned int depth) |
78 | { | 123 | { |
79 | struct frame_head *head = (struct frame_head *)frame_pointer(regs); | 124 | struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); |
80 | 125 | ||
81 | if (!user_mode_vm(regs)) { | 126 | if (!user_mode_vm(regs)) { |
82 | unsigned long stack = kernel_stack_pointer(regs); | 127 | unsigned long stack = kernel_stack_pointer(regs); |
@@ -86,6 +131,9 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) | |||
86 | return; | 131 | return; |
87 | } | 132 | } |
88 | 133 | ||
134 | if (x86_backtrace_32(regs, depth)) | ||
135 | return; | ||
136 | |||
89 | while (depth-- && head) | 137 | while (depth-- && head) |
90 | head = dump_user_backtrace(head); | 138 | head = dump_user_backtrace(head); |
91 | } | 139 | } |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index f1575c9a2572..bd1489c3ce09 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -695,9 +695,6 @@ static int __init ppro_init(char **cpu_type) | |||
695 | return 1; | 695 | return 1; |
696 | } | 696 | } |
697 | 697 | ||
698 | /* in order to get sysfs right */ | ||
699 | static int using_nmi; | ||
700 | |||
701 | int __init op_nmi_init(struct oprofile_operations *ops) | 698 | int __init op_nmi_init(struct oprofile_operations *ops) |
702 | { | 699 | { |
703 | __u8 vendor = boot_cpu_data.x86_vendor; | 700 | __u8 vendor = boot_cpu_data.x86_vendor; |
@@ -705,8 +702,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
705 | char *cpu_type = NULL; | 702 | char *cpu_type = NULL; |
706 | int ret = 0; | 703 | int ret = 0; |
707 | 704 | ||
708 | using_nmi = 0; | ||
709 | |||
710 | if (!cpu_has_apic) | 705 | if (!cpu_has_apic) |
711 | return -ENODEV; | 706 | return -ENODEV; |
712 | 707 | ||
@@ -790,13 +785,11 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
790 | if (ret) | 785 | if (ret) |
791 | return ret; | 786 | return ret; |
792 | 787 | ||
793 | using_nmi = 1; | ||
794 | printk(KERN_INFO "oprofile: using NMI interrupt.\n"); | 788 | printk(KERN_INFO "oprofile: using NMI interrupt.\n"); |
795 | return 0; | 789 | return 0; |
796 | } | 790 | } |
797 | 791 | ||
798 | void op_nmi_exit(void) | 792 | void op_nmi_exit(void) |
799 | { | 793 | { |
800 | if (using_nmi) | 794 | exit_sysfs(); |
801 | exit_sysfs(); | ||
802 | } | 795 | } |
diff --git a/block/bsg.c b/block/bsg.c index 82d58829ba59..0c00870553a3 100644 --- a/block/bsg.c +++ b/block/bsg.c | |||
@@ -426,7 +426,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, | |||
426 | /* | 426 | /* |
427 | * fill in all the output members | 427 | * fill in all the output members |
428 | */ | 428 | */ |
429 | hdr->device_status = status_byte(rq->errors); | 429 | hdr->device_status = rq->errors & 0xff; |
430 | hdr->transport_status = host_byte(rq->errors); | 430 | hdr->transport_status = host_byte(rq->errors); |
431 | hdr->driver_status = driver_byte(rq->errors); | 431 | hdr->driver_status = driver_byte(rq->errors); |
432 | hdr->info = 0; | 432 | hdr->info = 0; |
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index ee9ddeb53417..8cb0347dec28 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c | |||
@@ -3156,7 +3156,6 @@ static int __devinit ia_init_one(struct pci_dev *pdev, | |||
3156 | { | 3156 | { |
3157 | struct atm_dev *dev; | 3157 | struct atm_dev *dev; |
3158 | IADEV *iadev; | 3158 | IADEV *iadev; |
3159 | unsigned long flags; | ||
3160 | int ret; | 3159 | int ret; |
3161 | 3160 | ||
3162 | iadev = kzalloc(sizeof(*iadev), GFP_KERNEL); | 3161 | iadev = kzalloc(sizeof(*iadev), GFP_KERNEL); |
@@ -3188,19 +3187,14 @@ static int __devinit ia_init_one(struct pci_dev *pdev, | |||
3188 | ia_dev[iadev_count] = iadev; | 3187 | ia_dev[iadev_count] = iadev; |
3189 | _ia_dev[iadev_count] = dev; | 3188 | _ia_dev[iadev_count] = dev; |
3190 | iadev_count++; | 3189 | iadev_count++; |
3191 | spin_lock_init(&iadev->misc_lock); | ||
3192 | /* First fixes first. I don't want to think about this now. */ | ||
3193 | spin_lock_irqsave(&iadev->misc_lock, flags); | ||
3194 | if (ia_init(dev) || ia_start(dev)) { | 3190 | if (ia_init(dev) || ia_start(dev)) { |
3195 | IF_INIT(printk("IA register failed!\n");) | 3191 | IF_INIT(printk("IA register failed!\n");) |
3196 | iadev_count--; | 3192 | iadev_count--; |
3197 | ia_dev[iadev_count] = NULL; | 3193 | ia_dev[iadev_count] = NULL; |
3198 | _ia_dev[iadev_count] = NULL; | 3194 | _ia_dev[iadev_count] = NULL; |
3199 | spin_unlock_irqrestore(&iadev->misc_lock, flags); | ||
3200 | ret = -EINVAL; | 3195 | ret = -EINVAL; |
3201 | goto err_out_deregister_dev; | 3196 | goto err_out_deregister_dev; |
3202 | } | 3197 | } |
3203 | spin_unlock_irqrestore(&iadev->misc_lock, flags); | ||
3204 | IF_EVENT(printk("iadev_count = %d\n", iadev_count);) | 3198 | IF_EVENT(printk("iadev_count = %d\n", iadev_count);) |
3205 | 3199 | ||
3206 | iadev->next_board = ia_boards; | 3200 | iadev->next_board = ia_boards; |
diff --git a/drivers/atm/iphase.h b/drivers/atm/iphase.h index b2cd20f549cb..077735e0e04b 100644 --- a/drivers/atm/iphase.h +++ b/drivers/atm/iphase.h | |||
@@ -1022,7 +1022,7 @@ typedef struct iadev_t { | |||
1022 | struct dle_q rx_dle_q; | 1022 | struct dle_q rx_dle_q; |
1023 | struct free_desc_q *rx_free_desc_qhead; | 1023 | struct free_desc_q *rx_free_desc_qhead; |
1024 | struct sk_buff_head rx_dma_q; | 1024 | struct sk_buff_head rx_dma_q; |
1025 | spinlock_t rx_lock, misc_lock; | 1025 | spinlock_t rx_lock; |
1026 | struct atm_vcc **rx_open; /* list of all open VCs */ | 1026 | struct atm_vcc **rx_open; /* list of all open VCs */ |
1027 | u16 num_rx_desc, rx_buf_sz, rxing; | 1027 | u16 num_rx_desc, rx_buf_sz, rxing; |
1028 | u32 rx_pkt_ram, rx_tmp_cnt; | 1028 | u32 rx_pkt_ram, rx_tmp_cnt; |
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index f916ddf63938..f46138ab38b6 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c | |||
@@ -444,6 +444,7 @@ static ssize_t console_show(struct device *dev, struct device_attribute *attr, | |||
444 | struct atm_dev *atmdev = container_of(dev, struct atm_dev, class_dev); | 444 | struct atm_dev *atmdev = container_of(dev, struct atm_dev, class_dev); |
445 | struct solos_card *card = atmdev->dev_data; | 445 | struct solos_card *card = atmdev->dev_data; |
446 | struct sk_buff *skb; | 446 | struct sk_buff *skb; |
447 | unsigned int len; | ||
447 | 448 | ||
448 | spin_lock(&card->cli_queue_lock); | 449 | spin_lock(&card->cli_queue_lock); |
449 | skb = skb_dequeue(&card->cli_queue[SOLOS_CHAN(atmdev)]); | 450 | skb = skb_dequeue(&card->cli_queue[SOLOS_CHAN(atmdev)]); |
@@ -451,11 +452,12 @@ static ssize_t console_show(struct device *dev, struct device_attribute *attr, | |||
451 | if(skb == NULL) | 452 | if(skb == NULL) |
452 | return sprintf(buf, "No data.\n"); | 453 | return sprintf(buf, "No data.\n"); |
453 | 454 | ||
454 | memcpy(buf, skb->data, skb->len); | 455 | len = skb->len; |
455 | dev_dbg(&card->dev->dev, "len: %d\n", skb->len); | 456 | memcpy(buf, skb->data, len); |
457 | dev_dbg(&card->dev->dev, "len: %d\n", len); | ||
456 | 458 | ||
457 | kfree_skb(skb); | 459 | kfree_skb(skb); |
458 | return skb->len; | 460 | return len; |
459 | } | 461 | } |
460 | 462 | ||
461 | static int send_command(struct solos_card *card, int dev, const char *buf, size_t size) | 463 | static int send_command(struct solos_card *card, int dev, const char *buf, size_t size) |
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index de277689da61..4b9359a6f6ca 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -488,4 +488,21 @@ config BLK_DEV_HD | |||
488 | 488 | ||
489 | If unsure, say N. | 489 | If unsure, say N. |
490 | 490 | ||
491 | config BLK_DEV_RBD | ||
492 | tristate "Rados block device (RBD)" | ||
493 | depends on INET && EXPERIMENTAL && BLOCK | ||
494 | select CEPH_LIB | ||
495 | select LIBCRC32C | ||
496 | select CRYPTO_AES | ||
497 | select CRYPTO | ||
498 | default n | ||
499 | help | ||
500 | Say Y here if you want include the Rados block device, which stripes | ||
501 | a block device over objects stored in the Ceph distributed object | ||
502 | store. | ||
503 | |||
504 | More information at http://ceph.newdream.net/. | ||
505 | |||
506 | If unsure, say N. | ||
507 | |||
491 | endif # BLK_DEV | 508 | endif # BLK_DEV |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index aff5ac925c34..d7f463d6312d 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
@@ -37,5 +37,6 @@ obj-$(CONFIG_BLK_DEV_HD) += hd.o | |||
37 | 37 | ||
38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o | 38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o |
39 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ | 39 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ |
40 | obj-$(CONFIG_BLK_DEV_RBD) += rbd.o | ||
40 | 41 | ||
41 | swim_mod-objs := swim.o swim_asm.o | 42 | swim_mod-objs := swim.o swim_asm.o |
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index e9da874d0419..03688c2da319 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c | |||
@@ -113,7 +113,7 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, | |||
113 | memcpy(buf, dev->bounce_buf+offset, size); | 113 | memcpy(buf, dev->bounce_buf+offset, size); |
114 | offset += size; | 114 | offset += size; |
115 | flush_kernel_dcache_page(bvec->bv_page); | 115 | flush_kernel_dcache_page(bvec->bv_page); |
116 | bvec_kunmap_irq(bvec, &flags); | 116 | bvec_kunmap_irq(buf, &flags); |
117 | i++; | 117 | i++; |
118 | } | 118 | } |
119 | } | 119 | } |
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c new file mode 100644 index 000000000000..6ec9d53806c5 --- /dev/null +++ b/drivers/block/rbd.c | |||
@@ -0,0 +1,1841 @@ | |||
1 | /* | ||
2 | rbd.c -- Export ceph rados objects as a Linux block device | ||
3 | |||
4 | |||
5 | based on drivers/block/osdblk.c: | ||
6 | |||
7 | Copyright 2009 Red Hat, Inc. | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation. | ||
12 | |||
13 | This program is distributed in the hope that it will be useful, | ||
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | GNU General Public License for more details. | ||
17 | |||
18 | You should have received a copy of the GNU General Public License | ||
19 | along with this program; see the file COPYING. If not, write to | ||
20 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
21 | |||
22 | |||
23 | |||
24 | Instructions for use | ||
25 | -------------------- | ||
26 | |||
27 | 1) Map a Linux block device to an existing rbd image. | ||
28 | |||
29 | Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name] | ||
30 | |||
31 | $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add | ||
32 | |||
33 | The snapshot name can be "-" or omitted to map the image read/write. | ||
34 | |||
35 | 2) List all active blkdev<->object mappings. | ||
36 | |||
37 | In this example, we have performed step #1 twice, creating two blkdevs, | ||
38 | mapped to two separate rados objects in the rados rbd pool | ||
39 | |||
40 | $ cat /sys/class/rbd/list | ||
41 | #id major client_name pool name snap KB | ||
42 | 0 254 client4143 rbd foo - 1024000 | ||
43 | |||
44 | The columns, in order, are: | ||
45 | - blkdev unique id | ||
46 | - blkdev assigned major | ||
47 | - rados client id | ||
48 | - rados pool name | ||
49 | - rados block device name | ||
50 | - mapped snapshot ("-" if none) | ||
51 | - device size in KB | ||
52 | |||
53 | |||
54 | 3) Create a snapshot. | ||
55 | |||
56 | Usage: <blkdev id> <snapname> | ||
57 | |||
58 | $ echo "0 mysnap" > /sys/class/rbd/snap_create | ||
59 | |||
60 | |||
61 | 4) Listing a snapshot. | ||
62 | |||
63 | $ cat /sys/class/rbd/snaps_list | ||
64 | #id snap KB | ||
65 | 0 - 1024000 (*) | ||
66 | 0 foo 1024000 | ||
67 | |||
68 | The columns, in order, are: | ||
69 | - blkdev unique id | ||
70 | - snapshot name, '-' means none (active read/write version) | ||
71 | - size of device at time of snapshot | ||
72 | - the (*) indicates this is the active version | ||
73 | |||
74 | 5) Rollback to snapshot. | ||
75 | |||
76 | Usage: <blkdev id> <snapname> | ||
77 | |||
78 | $ echo "0 mysnap" > /sys/class/rbd/snap_rollback | ||
79 | |||
80 | |||
81 | 6) Mapping an image using snapshot. | ||
82 | |||
83 | A snapshot mapping is read-only. This is being done by passing | ||
84 | snap=<snapname> to the options when adding a device. | ||
85 | |||
86 | $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add | ||
87 | |||
88 | |||
89 | 7) Remove an active blkdev<->rbd image mapping. | ||
90 | |||
91 | In this example, we remove the mapping with blkdev unique id 1. | ||
92 | |||
93 | $ echo 1 > /sys/class/rbd/remove | ||
94 | |||
95 | |||
96 | NOTE: The actual creation and deletion of rados objects is outside the scope | ||
97 | of this driver. | ||
98 | |||
99 | */ | ||
100 | |||
101 | #include <linux/ceph/libceph.h> | ||
102 | #include <linux/ceph/osd_client.h> | ||
103 | #include <linux/ceph/mon_client.h> | ||
104 | #include <linux/ceph/decode.h> | ||
105 | |||
106 | #include <linux/kernel.h> | ||
107 | #include <linux/device.h> | ||
108 | #include <linux/module.h> | ||
109 | #include <linux/fs.h> | ||
110 | #include <linux/blkdev.h> | ||
111 | |||
112 | #include "rbd_types.h" | ||
113 | |||
114 | #define DRV_NAME "rbd" | ||
115 | #define DRV_NAME_LONG "rbd (rados block device)" | ||
116 | |||
117 | #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ | ||
118 | |||
119 | #define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX)) | ||
120 | #define RBD_MAX_POOL_NAME_LEN 64 | ||
121 | #define RBD_MAX_SNAP_NAME_LEN 32 | ||
122 | #define RBD_MAX_OPT_LEN 1024 | ||
123 | |||
124 | #define RBD_SNAP_HEAD_NAME "-" | ||
125 | |||
126 | #define DEV_NAME_LEN 32 | ||
127 | |||
128 | /* | ||
129 | * block device image metadata (in-memory version) | ||
130 | */ | ||
131 | struct rbd_image_header { | ||
132 | u64 image_size; | ||
133 | char block_name[32]; | ||
134 | __u8 obj_order; | ||
135 | __u8 crypt_type; | ||
136 | __u8 comp_type; | ||
137 | struct rw_semaphore snap_rwsem; | ||
138 | struct ceph_snap_context *snapc; | ||
139 | size_t snap_names_len; | ||
140 | u64 snap_seq; | ||
141 | u32 total_snaps; | ||
142 | |||
143 | char *snap_names; | ||
144 | u64 *snap_sizes; | ||
145 | }; | ||
146 | |||
147 | /* | ||
148 | * an instance of the client. multiple devices may share a client. | ||
149 | */ | ||
150 | struct rbd_client { | ||
151 | struct ceph_client *client; | ||
152 | struct kref kref; | ||
153 | struct list_head node; | ||
154 | }; | ||
155 | |||
156 | /* | ||
157 | * a single io request | ||
158 | */ | ||
159 | struct rbd_request { | ||
160 | struct request *rq; /* blk layer request */ | ||
161 | struct bio *bio; /* cloned bio */ | ||
162 | struct page **pages; /* list of used pages */ | ||
163 | u64 len; | ||
164 | }; | ||
165 | |||
166 | /* | ||
167 | * a single device | ||
168 | */ | ||
169 | struct rbd_device { | ||
170 | int id; /* blkdev unique id */ | ||
171 | |||
172 | int major; /* blkdev assigned major */ | ||
173 | struct gendisk *disk; /* blkdev's gendisk and rq */ | ||
174 | struct request_queue *q; | ||
175 | |||
176 | struct ceph_client *client; | ||
177 | struct rbd_client *rbd_client; | ||
178 | |||
179 | char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ | ||
180 | |||
181 | spinlock_t lock; /* queue lock */ | ||
182 | |||
183 | struct rbd_image_header header; | ||
184 | char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */ | ||
185 | int obj_len; | ||
186 | char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */ | ||
187 | char pool_name[RBD_MAX_POOL_NAME_LEN]; | ||
188 | int poolid; | ||
189 | |||
190 | char snap_name[RBD_MAX_SNAP_NAME_LEN]; | ||
191 | u32 cur_snap; /* index+1 of current snapshot within snap context | ||
192 | 0 - for the head */ | ||
193 | int read_only; | ||
194 | |||
195 | struct list_head node; | ||
196 | }; | ||
197 | |||
198 | static spinlock_t node_lock; /* protects client get/put */ | ||
199 | |||
200 | static struct class *class_rbd; /* /sys/class/rbd */ | ||
201 | static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ | ||
202 | static LIST_HEAD(rbd_dev_list); /* devices */ | ||
203 | static LIST_HEAD(rbd_client_list); /* clients */ | ||
204 | |||
205 | |||
206 | static int rbd_open(struct block_device *bdev, fmode_t mode) | ||
207 | { | ||
208 | struct gendisk *disk = bdev->bd_disk; | ||
209 | struct rbd_device *rbd_dev = disk->private_data; | ||
210 | |||
211 | set_device_ro(bdev, rbd_dev->read_only); | ||
212 | |||
213 | if ((mode & FMODE_WRITE) && rbd_dev->read_only) | ||
214 | return -EROFS; | ||
215 | |||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static const struct block_device_operations rbd_bd_ops = { | ||
220 | .owner = THIS_MODULE, | ||
221 | .open = rbd_open, | ||
222 | }; | ||
223 | |||
224 | /* | ||
225 | * Initialize an rbd client instance. | ||
226 | * We own *opt. | ||
227 | */ | ||
228 | static struct rbd_client *rbd_client_create(struct ceph_options *opt) | ||
229 | { | ||
230 | struct rbd_client *rbdc; | ||
231 | int ret = -ENOMEM; | ||
232 | |||
233 | dout("rbd_client_create\n"); | ||
234 | rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); | ||
235 | if (!rbdc) | ||
236 | goto out_opt; | ||
237 | |||
238 | kref_init(&rbdc->kref); | ||
239 | INIT_LIST_HEAD(&rbdc->node); | ||
240 | |||
241 | rbdc->client = ceph_create_client(opt, rbdc); | ||
242 | if (IS_ERR(rbdc->client)) | ||
243 | goto out_rbdc; | ||
244 | opt = NULL; /* Now rbdc->client is responsible for opt */ | ||
245 | |||
246 | ret = ceph_open_session(rbdc->client); | ||
247 | if (ret < 0) | ||
248 | goto out_err; | ||
249 | |||
250 | spin_lock(&node_lock); | ||
251 | list_add_tail(&rbdc->node, &rbd_client_list); | ||
252 | spin_unlock(&node_lock); | ||
253 | |||
254 | dout("rbd_client_create created %p\n", rbdc); | ||
255 | return rbdc; | ||
256 | |||
257 | out_err: | ||
258 | ceph_destroy_client(rbdc->client); | ||
259 | out_rbdc: | ||
260 | kfree(rbdc); | ||
261 | out_opt: | ||
262 | if (opt) | ||
263 | ceph_destroy_options(opt); | ||
264 | return ERR_PTR(ret); | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * Find a ceph client with specific addr and configuration. | ||
269 | */ | ||
270 | static struct rbd_client *__rbd_client_find(struct ceph_options *opt) | ||
271 | { | ||
272 | struct rbd_client *client_node; | ||
273 | |||
274 | if (opt->flags & CEPH_OPT_NOSHARE) | ||
275 | return NULL; | ||
276 | |||
277 | list_for_each_entry(client_node, &rbd_client_list, node) | ||
278 | if (ceph_compare_options(opt, client_node->client) == 0) | ||
279 | return client_node; | ||
280 | return NULL; | ||
281 | } | ||
282 | |||
283 | /* | ||
284 | * Get a ceph client with specific addr and configuration, if one does | ||
285 | * not exist create it. | ||
286 | */ | ||
287 | static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, | ||
288 | char *options) | ||
289 | { | ||
290 | struct rbd_client *rbdc; | ||
291 | struct ceph_options *opt; | ||
292 | int ret; | ||
293 | |||
294 | ret = ceph_parse_options(&opt, options, mon_addr, | ||
295 | mon_addr + strlen(mon_addr), NULL, NULL); | ||
296 | if (ret < 0) | ||
297 | return ret; | ||
298 | |||
299 | spin_lock(&node_lock); | ||
300 | rbdc = __rbd_client_find(opt); | ||
301 | if (rbdc) { | ||
302 | ceph_destroy_options(opt); | ||
303 | |||
304 | /* using an existing client */ | ||
305 | kref_get(&rbdc->kref); | ||
306 | rbd_dev->rbd_client = rbdc; | ||
307 | rbd_dev->client = rbdc->client; | ||
308 | spin_unlock(&node_lock); | ||
309 | return 0; | ||
310 | } | ||
311 | spin_unlock(&node_lock); | ||
312 | |||
313 | rbdc = rbd_client_create(opt); | ||
314 | if (IS_ERR(rbdc)) | ||
315 | return PTR_ERR(rbdc); | ||
316 | |||
317 | rbd_dev->rbd_client = rbdc; | ||
318 | rbd_dev->client = rbdc->client; | ||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * Destroy ceph client | ||
324 | */ | ||
325 | static void rbd_client_release(struct kref *kref) | ||
326 | { | ||
327 | struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); | ||
328 | |||
329 | dout("rbd_release_client %p\n", rbdc); | ||
330 | spin_lock(&node_lock); | ||
331 | list_del(&rbdc->node); | ||
332 | spin_unlock(&node_lock); | ||
333 | |||
334 | ceph_destroy_client(rbdc->client); | ||
335 | kfree(rbdc); | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Drop reference to ceph client node. If it's not referenced anymore, release | ||
340 | * it. | ||
341 | */ | ||
342 | static void rbd_put_client(struct rbd_device *rbd_dev) | ||
343 | { | ||
344 | kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); | ||
345 | rbd_dev->rbd_client = NULL; | ||
346 | rbd_dev->client = NULL; | ||
347 | } | ||
348 | |||
349 | |||
350 | /* | ||
351 | * Create a new header structure, translate header format from the on-disk | ||
352 | * header. | ||
353 | */ | ||
354 | static int rbd_header_from_disk(struct rbd_image_header *header, | ||
355 | struct rbd_image_header_ondisk *ondisk, | ||
356 | int allocated_snaps, | ||
357 | gfp_t gfp_flags) | ||
358 | { | ||
359 | int i; | ||
360 | u32 snap_count = le32_to_cpu(ondisk->snap_count); | ||
361 | int ret = -ENOMEM; | ||
362 | |||
363 | init_rwsem(&header->snap_rwsem); | ||
364 | |||
365 | header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); | ||
366 | header->snapc = kmalloc(sizeof(struct ceph_snap_context) + | ||
367 | snap_count * | ||
368 | sizeof(struct rbd_image_snap_ondisk), | ||
369 | gfp_flags); | ||
370 | if (!header->snapc) | ||
371 | return -ENOMEM; | ||
372 | if (snap_count) { | ||
373 | header->snap_names = kmalloc(header->snap_names_len, | ||
374 | GFP_KERNEL); | ||
375 | if (!header->snap_names) | ||
376 | goto err_snapc; | ||
377 | header->snap_sizes = kmalloc(snap_count * sizeof(u64), | ||
378 | GFP_KERNEL); | ||
379 | if (!header->snap_sizes) | ||
380 | goto err_names; | ||
381 | } else { | ||
382 | header->snap_names = NULL; | ||
383 | header->snap_sizes = NULL; | ||
384 | } | ||
385 | memcpy(header->block_name, ondisk->block_name, | ||
386 | sizeof(ondisk->block_name)); | ||
387 | |||
388 | header->image_size = le64_to_cpu(ondisk->image_size); | ||
389 | header->obj_order = ondisk->options.order; | ||
390 | header->crypt_type = ondisk->options.crypt_type; | ||
391 | header->comp_type = ondisk->options.comp_type; | ||
392 | |||
393 | atomic_set(&header->snapc->nref, 1); | ||
394 | header->snap_seq = le64_to_cpu(ondisk->snap_seq); | ||
395 | header->snapc->num_snaps = snap_count; | ||
396 | header->total_snaps = snap_count; | ||
397 | |||
398 | if (snap_count && | ||
399 | allocated_snaps == snap_count) { | ||
400 | for (i = 0; i < snap_count; i++) { | ||
401 | header->snapc->snaps[i] = | ||
402 | le64_to_cpu(ondisk->snaps[i].id); | ||
403 | header->snap_sizes[i] = | ||
404 | le64_to_cpu(ondisk->snaps[i].image_size); | ||
405 | } | ||
406 | |||
407 | /* copy snapshot names */ | ||
408 | memcpy(header->snap_names, &ondisk->snaps[i], | ||
409 | header->snap_names_len); | ||
410 | } | ||
411 | |||
412 | return 0; | ||
413 | |||
414 | err_names: | ||
415 | kfree(header->snap_names); | ||
416 | err_snapc: | ||
417 | kfree(header->snapc); | ||
418 | return ret; | ||
419 | } | ||
420 | |||
421 | static int snap_index(struct rbd_image_header *header, int snap_num) | ||
422 | { | ||
423 | return header->total_snaps - snap_num; | ||
424 | } | ||
425 | |||
426 | static u64 cur_snap_id(struct rbd_device *rbd_dev) | ||
427 | { | ||
428 | struct rbd_image_header *header = &rbd_dev->header; | ||
429 | |||
430 | if (!rbd_dev->cur_snap) | ||
431 | return 0; | ||
432 | |||
433 | return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)]; | ||
434 | } | ||
435 | |||
436 | static int snap_by_name(struct rbd_image_header *header, const char *snap_name, | ||
437 | u64 *seq, u64 *size) | ||
438 | { | ||
439 | int i; | ||
440 | char *p = header->snap_names; | ||
441 | |||
442 | for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { | ||
443 | if (strcmp(snap_name, p) == 0) | ||
444 | break; | ||
445 | } | ||
446 | if (i == header->total_snaps) | ||
447 | return -ENOENT; | ||
448 | if (seq) | ||
449 | *seq = header->snapc->snaps[i]; | ||
450 | |||
451 | if (size) | ||
452 | *size = header->snap_sizes[i]; | ||
453 | |||
454 | return i; | ||
455 | } | ||
456 | |||
457 | static int rbd_header_set_snap(struct rbd_device *dev, | ||
458 | const char *snap_name, | ||
459 | u64 *size) | ||
460 | { | ||
461 | struct rbd_image_header *header = &dev->header; | ||
462 | struct ceph_snap_context *snapc = header->snapc; | ||
463 | int ret = -ENOENT; | ||
464 | |||
465 | down_write(&header->snap_rwsem); | ||
466 | |||
467 | if (!snap_name || | ||
468 | !*snap_name || | ||
469 | strcmp(snap_name, "-") == 0 || | ||
470 | strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) { | ||
471 | if (header->total_snaps) | ||
472 | snapc->seq = header->snap_seq; | ||
473 | else | ||
474 | snapc->seq = 0; | ||
475 | dev->cur_snap = 0; | ||
476 | dev->read_only = 0; | ||
477 | if (size) | ||
478 | *size = header->image_size; | ||
479 | } else { | ||
480 | ret = snap_by_name(header, snap_name, &snapc->seq, size); | ||
481 | if (ret < 0) | ||
482 | goto done; | ||
483 | |||
484 | dev->cur_snap = header->total_snaps - ret; | ||
485 | dev->read_only = 1; | ||
486 | } | ||
487 | |||
488 | ret = 0; | ||
489 | done: | ||
490 | up_write(&header->snap_rwsem); | ||
491 | return ret; | ||
492 | } | ||
493 | |||
494 | static void rbd_header_free(struct rbd_image_header *header) | ||
495 | { | ||
496 | kfree(header->snapc); | ||
497 | kfree(header->snap_names); | ||
498 | kfree(header->snap_sizes); | ||
499 | } | ||
500 | |||
501 | /* | ||
502 | * get the actual striped segment name, offset and length | ||
503 | */ | ||
504 | static u64 rbd_get_segment(struct rbd_image_header *header, | ||
505 | const char *block_name, | ||
506 | u64 ofs, u64 len, | ||
507 | char *seg_name, u64 *segofs) | ||
508 | { | ||
509 | u64 seg = ofs >> header->obj_order; | ||
510 | |||
511 | if (seg_name) | ||
512 | snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, | ||
513 | "%s.%012llx", block_name, seg); | ||
514 | |||
515 | ofs = ofs & ((1 << header->obj_order) - 1); | ||
516 | len = min_t(u64, len, (1 << header->obj_order) - ofs); | ||
517 | |||
518 | if (segofs) | ||
519 | *segofs = ofs; | ||
520 | |||
521 | return len; | ||
522 | } | ||
523 | |||
524 | /* | ||
525 | * bio helpers | ||
526 | */ | ||
527 | |||
528 | static void bio_chain_put(struct bio *chain) | ||
529 | { | ||
530 | struct bio *tmp; | ||
531 | |||
532 | while (chain) { | ||
533 | tmp = chain; | ||
534 | chain = chain->bi_next; | ||
535 | bio_put(tmp); | ||
536 | } | ||
537 | } | ||
538 | |||
539 | /* | ||
540 | * zeros a bio chain, starting at specific offset | ||
541 | */ | ||
542 | static void zero_bio_chain(struct bio *chain, int start_ofs) | ||
543 | { | ||
544 | struct bio_vec *bv; | ||
545 | unsigned long flags; | ||
546 | void *buf; | ||
547 | int i; | ||
548 | int pos = 0; | ||
549 | |||
550 | while (chain) { | ||
551 | bio_for_each_segment(bv, chain, i) { | ||
552 | if (pos + bv->bv_len > start_ofs) { | ||
553 | int remainder = max(start_ofs - pos, 0); | ||
554 | buf = bvec_kmap_irq(bv, &flags); | ||
555 | memset(buf + remainder, 0, | ||
556 | bv->bv_len - remainder); | ||
557 | bvec_kunmap_irq(buf, &flags); | ||
558 | } | ||
559 | pos += bv->bv_len; | ||
560 | } | ||
561 | |||
562 | chain = chain->bi_next; | ||
563 | } | ||
564 | } | ||
565 | |||
566 | /* | ||
567 | * bio_chain_clone - clone a chain of bios up to a certain length. | ||
568 | * might return a bio_pair that will need to be released. | ||
569 | */ | ||
570 | static struct bio *bio_chain_clone(struct bio **old, struct bio **next, | ||
571 | struct bio_pair **bp, | ||
572 | int len, gfp_t gfpmask) | ||
573 | { | ||
574 | struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL; | ||
575 | int total = 0; | ||
576 | |||
577 | if (*bp) { | ||
578 | bio_pair_release(*bp); | ||
579 | *bp = NULL; | ||
580 | } | ||
581 | |||
582 | while (old_chain && (total < len)) { | ||
583 | tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); | ||
584 | if (!tmp) | ||
585 | goto err_out; | ||
586 | |||
587 | if (total + old_chain->bi_size > len) { | ||
588 | struct bio_pair *bp; | ||
589 | |||
590 | /* | ||
591 | * this split can only happen with a single paged bio, | ||
592 | * split_bio will BUG_ON if this is not the case | ||
593 | */ | ||
594 | dout("bio_chain_clone split! total=%d remaining=%d" | ||
595 | "bi_size=%d\n", | ||
596 | (int)total, (int)len-total, | ||
597 | (int)old_chain->bi_size); | ||
598 | |||
599 | /* split the bio. We'll release it either in the next | ||
600 | call, or it will have to be released outside */ | ||
601 | bp = bio_split(old_chain, (len - total) / 512ULL); | ||
602 | if (!bp) | ||
603 | goto err_out; | ||
604 | |||
605 | __bio_clone(tmp, &bp->bio1); | ||
606 | |||
607 | *next = &bp->bio2; | ||
608 | } else { | ||
609 | __bio_clone(tmp, old_chain); | ||
610 | *next = old_chain->bi_next; | ||
611 | } | ||
612 | |||
613 | tmp->bi_bdev = NULL; | ||
614 | gfpmask &= ~__GFP_WAIT; | ||
615 | tmp->bi_next = NULL; | ||
616 | |||
617 | if (!new_chain) { | ||
618 | new_chain = tail = tmp; | ||
619 | } else { | ||
620 | tail->bi_next = tmp; | ||
621 | tail = tmp; | ||
622 | } | ||
623 | old_chain = old_chain->bi_next; | ||
624 | |||
625 | total += tmp->bi_size; | ||
626 | } | ||
627 | |||
628 | BUG_ON(total < len); | ||
629 | |||
630 | if (tail) | ||
631 | tail->bi_next = NULL; | ||
632 | |||
633 | *old = old_chain; | ||
634 | |||
635 | return new_chain; | ||
636 | |||
637 | err_out: | ||
638 | dout("bio_chain_clone with err\n"); | ||
639 | bio_chain_put(new_chain); | ||
640 | return NULL; | ||
641 | } | ||
642 | |||
643 | /* | ||
644 | * helpers for osd request op vectors. | ||
645 | */ | ||
646 | static int rbd_create_rw_ops(struct ceph_osd_req_op **ops, | ||
647 | int num_ops, | ||
648 | int opcode, | ||
649 | u32 payload_len) | ||
650 | { | ||
651 | *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1), | ||
652 | GFP_NOIO); | ||
653 | if (!*ops) | ||
654 | return -ENOMEM; | ||
655 | (*ops)[0].op = opcode; | ||
656 | /* | ||
657 | * op extent offset and length will be set later on | ||
658 | * in calc_raw_layout() | ||
659 | */ | ||
660 | (*ops)[0].payload_len = payload_len; | ||
661 | return 0; | ||
662 | } | ||
663 | |||
664 | static void rbd_destroy_ops(struct ceph_osd_req_op *ops) | ||
665 | { | ||
666 | kfree(ops); | ||
667 | } | ||
668 | |||
669 | /* | ||
670 | * Send ceph osd request | ||
671 | */ | ||
672 | static int rbd_do_request(struct request *rq, | ||
673 | struct rbd_device *dev, | ||
674 | struct ceph_snap_context *snapc, | ||
675 | u64 snapid, | ||
676 | const char *obj, u64 ofs, u64 len, | ||
677 | struct bio *bio, | ||
678 | struct page **pages, | ||
679 | int num_pages, | ||
680 | int flags, | ||
681 | struct ceph_osd_req_op *ops, | ||
682 | int num_reply, | ||
683 | void (*rbd_cb)(struct ceph_osd_request *req, | ||
684 | struct ceph_msg *msg)) | ||
685 | { | ||
686 | struct ceph_osd_request *req; | ||
687 | struct ceph_file_layout *layout; | ||
688 | int ret; | ||
689 | u64 bno; | ||
690 | struct timespec mtime = CURRENT_TIME; | ||
691 | struct rbd_request *req_data; | ||
692 | struct ceph_osd_request_head *reqhead; | ||
693 | struct rbd_image_header *header = &dev->header; | ||
694 | |||
695 | ret = -ENOMEM; | ||
696 | req_data = kzalloc(sizeof(*req_data), GFP_NOIO); | ||
697 | if (!req_data) | ||
698 | goto done; | ||
699 | |||
700 | dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs); | ||
701 | |||
702 | down_read(&header->snap_rwsem); | ||
703 | |||
704 | req = ceph_osdc_alloc_request(&dev->client->osdc, flags, | ||
705 | snapc, | ||
706 | ops, | ||
707 | false, | ||
708 | GFP_NOIO, pages, bio); | ||
709 | if (IS_ERR(req)) { | ||
710 | up_read(&header->snap_rwsem); | ||
711 | ret = PTR_ERR(req); | ||
712 | goto done_pages; | ||
713 | } | ||
714 | |||
715 | req->r_callback = rbd_cb; | ||
716 | |||
717 | req_data->rq = rq; | ||
718 | req_data->bio = bio; | ||
719 | req_data->pages = pages; | ||
720 | req_data->len = len; | ||
721 | |||
722 | req->r_priv = req_data; | ||
723 | |||
724 | reqhead = req->r_request->front.iov_base; | ||
725 | reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); | ||
726 | |||
727 | strncpy(req->r_oid, obj, sizeof(req->r_oid)); | ||
728 | req->r_oid_len = strlen(req->r_oid); | ||
729 | |||
730 | layout = &req->r_file_layout; | ||
731 | memset(layout, 0, sizeof(*layout)); | ||
732 | layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
733 | layout->fl_stripe_count = cpu_to_le32(1); | ||
734 | layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
735 | layout->fl_pg_preferred = cpu_to_le32(-1); | ||
736 | layout->fl_pg_pool = cpu_to_le32(dev->poolid); | ||
737 | ceph_calc_raw_layout(&dev->client->osdc, layout, snapid, | ||
738 | ofs, &len, &bno, req, ops); | ||
739 | |||
740 | ceph_osdc_build_request(req, ofs, &len, | ||
741 | ops, | ||
742 | snapc, | ||
743 | &mtime, | ||
744 | req->r_oid, req->r_oid_len); | ||
745 | up_read(&header->snap_rwsem); | ||
746 | |||
747 | ret = ceph_osdc_start_request(&dev->client->osdc, req, false); | ||
748 | if (ret < 0) | ||
749 | goto done_err; | ||
750 | |||
751 | if (!rbd_cb) { | ||
752 | ret = ceph_osdc_wait_request(&dev->client->osdc, req); | ||
753 | ceph_osdc_put_request(req); | ||
754 | } | ||
755 | return ret; | ||
756 | |||
757 | done_err: | ||
758 | bio_chain_put(req_data->bio); | ||
759 | ceph_osdc_put_request(req); | ||
760 | done_pages: | ||
761 | kfree(req_data); | ||
762 | done: | ||
763 | if (rq) | ||
764 | blk_end_request(rq, ret, len); | ||
765 | return ret; | ||
766 | } | ||
767 | |||
768 | /* | ||
769 | * Ceph osd op callback | ||
770 | */ | ||
771 | static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) | ||
772 | { | ||
773 | struct rbd_request *req_data = req->r_priv; | ||
774 | struct ceph_osd_reply_head *replyhead; | ||
775 | struct ceph_osd_op *op; | ||
776 | __s32 rc; | ||
777 | u64 bytes; | ||
778 | int read_op; | ||
779 | |||
780 | /* parse reply */ | ||
781 | replyhead = msg->front.iov_base; | ||
782 | WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); | ||
783 | op = (void *)(replyhead + 1); | ||
784 | rc = le32_to_cpu(replyhead->result); | ||
785 | bytes = le64_to_cpu(op->extent.length); | ||
786 | read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ); | ||
787 | |||
788 | dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); | ||
789 | |||
790 | if (rc == -ENOENT && read_op) { | ||
791 | zero_bio_chain(req_data->bio, 0); | ||
792 | rc = 0; | ||
793 | } else if (rc == 0 && read_op && bytes < req_data->len) { | ||
794 | zero_bio_chain(req_data->bio, bytes); | ||
795 | bytes = req_data->len; | ||
796 | } | ||
797 | |||
798 | blk_end_request(req_data->rq, rc, bytes); | ||
799 | |||
800 | if (req_data->bio) | ||
801 | bio_chain_put(req_data->bio); | ||
802 | |||
803 | ceph_osdc_put_request(req); | ||
804 | kfree(req_data); | ||
805 | } | ||
806 | |||
807 | /* | ||
808 | * Do a synchronous ceph osd operation | ||
809 | */ | ||
810 | static int rbd_req_sync_op(struct rbd_device *dev, | ||
811 | struct ceph_snap_context *snapc, | ||
812 | u64 snapid, | ||
813 | int opcode, | ||
814 | int flags, | ||
815 | struct ceph_osd_req_op *orig_ops, | ||
816 | int num_reply, | ||
817 | const char *obj, | ||
818 | u64 ofs, u64 len, | ||
819 | char *buf) | ||
820 | { | ||
821 | int ret; | ||
822 | struct page **pages; | ||
823 | int num_pages; | ||
824 | struct ceph_osd_req_op *ops = orig_ops; | ||
825 | u32 payload_len; | ||
826 | |||
827 | num_pages = calc_pages_for(ofs , len); | ||
828 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); | ||
829 | if (IS_ERR(pages)) | ||
830 | return PTR_ERR(pages); | ||
831 | |||
832 | if (!orig_ops) { | ||
833 | payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0); | ||
834 | ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); | ||
835 | if (ret < 0) | ||
836 | goto done; | ||
837 | |||
838 | if ((flags & CEPH_OSD_FLAG_WRITE) && buf) { | ||
839 | ret = ceph_copy_to_page_vector(pages, buf, ofs, len); | ||
840 | if (ret < 0) | ||
841 | goto done_ops; | ||
842 | } | ||
843 | } | ||
844 | |||
845 | ret = rbd_do_request(NULL, dev, snapc, snapid, | ||
846 | obj, ofs, len, NULL, | ||
847 | pages, num_pages, | ||
848 | flags, | ||
849 | ops, | ||
850 | 2, | ||
851 | NULL); | ||
852 | if (ret < 0) | ||
853 | goto done_ops; | ||
854 | |||
855 | if ((flags & CEPH_OSD_FLAG_READ) && buf) | ||
856 | ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); | ||
857 | |||
858 | done_ops: | ||
859 | if (!orig_ops) | ||
860 | rbd_destroy_ops(ops); | ||
861 | done: | ||
862 | ceph_release_page_vector(pages, num_pages); | ||
863 | return ret; | ||
864 | } | ||
865 | |||
866 | /* | ||
867 | * Do an asynchronous ceph osd operation | ||
868 | */ | ||
869 | static int rbd_do_op(struct request *rq, | ||
870 | struct rbd_device *rbd_dev , | ||
871 | struct ceph_snap_context *snapc, | ||
872 | u64 snapid, | ||
873 | int opcode, int flags, int num_reply, | ||
874 | u64 ofs, u64 len, | ||
875 | struct bio *bio) | ||
876 | { | ||
877 | char *seg_name; | ||
878 | u64 seg_ofs; | ||
879 | u64 seg_len; | ||
880 | int ret; | ||
881 | struct ceph_osd_req_op *ops; | ||
882 | u32 payload_len; | ||
883 | |||
884 | seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); | ||
885 | if (!seg_name) | ||
886 | return -ENOMEM; | ||
887 | |||
888 | seg_len = rbd_get_segment(&rbd_dev->header, | ||
889 | rbd_dev->header.block_name, | ||
890 | ofs, len, | ||
891 | seg_name, &seg_ofs); | ||
892 | |||
893 | payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); | ||
894 | |||
895 | ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); | ||
896 | if (ret < 0) | ||
897 | goto done; | ||
898 | |||
899 | /* we've taken care of segment sizes earlier when we | ||
900 | cloned the bios. We should never have a segment | ||
901 | truncated at this point */ | ||
902 | BUG_ON(seg_len < len); | ||
903 | |||
904 | ret = rbd_do_request(rq, rbd_dev, snapc, snapid, | ||
905 | seg_name, seg_ofs, seg_len, | ||
906 | bio, | ||
907 | NULL, 0, | ||
908 | flags, | ||
909 | ops, | ||
910 | num_reply, | ||
911 | rbd_req_cb); | ||
912 | done: | ||
913 | kfree(seg_name); | ||
914 | return ret; | ||
915 | } | ||
916 | |||
917 | /* | ||
918 | * Request async osd write | ||
919 | */ | ||
920 | static int rbd_req_write(struct request *rq, | ||
921 | struct rbd_device *rbd_dev, | ||
922 | struct ceph_snap_context *snapc, | ||
923 | u64 ofs, u64 len, | ||
924 | struct bio *bio) | ||
925 | { | ||
926 | return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, | ||
927 | CEPH_OSD_OP_WRITE, | ||
928 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
929 | 2, | ||
930 | ofs, len, bio); | ||
931 | } | ||
932 | |||
933 | /* | ||
934 | * Request async osd read | ||
935 | */ | ||
936 | static int rbd_req_read(struct request *rq, | ||
937 | struct rbd_device *rbd_dev, | ||
938 | u64 snapid, | ||
939 | u64 ofs, u64 len, | ||
940 | struct bio *bio) | ||
941 | { | ||
942 | return rbd_do_op(rq, rbd_dev, NULL, | ||
943 | (snapid ? snapid : CEPH_NOSNAP), | ||
944 | CEPH_OSD_OP_READ, | ||
945 | CEPH_OSD_FLAG_READ, | ||
946 | 2, | ||
947 | ofs, len, bio); | ||
948 | } | ||
949 | |||
950 | /* | ||
951 | * Request sync osd read | ||
952 | */ | ||
953 | static int rbd_req_sync_read(struct rbd_device *dev, | ||
954 | struct ceph_snap_context *snapc, | ||
955 | u64 snapid, | ||
956 | const char *obj, | ||
957 | u64 ofs, u64 len, | ||
958 | char *buf) | ||
959 | { | ||
960 | return rbd_req_sync_op(dev, NULL, | ||
961 | (snapid ? snapid : CEPH_NOSNAP), | ||
962 | CEPH_OSD_OP_READ, | ||
963 | CEPH_OSD_FLAG_READ, | ||
964 | NULL, | ||
965 | 1, obj, ofs, len, buf); | ||
966 | } | ||
967 | |||
968 | /* | ||
969 | * Request sync osd read | ||
970 | */ | ||
971 | static int rbd_req_sync_rollback_obj(struct rbd_device *dev, | ||
972 | u64 snapid, | ||
973 | const char *obj) | ||
974 | { | ||
975 | struct ceph_osd_req_op *ops; | ||
976 | int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0); | ||
977 | if (ret < 0) | ||
978 | return ret; | ||
979 | |||
980 | ops[0].snap.snapid = snapid; | ||
981 | |||
982 | ret = rbd_req_sync_op(dev, NULL, | ||
983 | CEPH_NOSNAP, | ||
984 | 0, | ||
985 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
986 | ops, | ||
987 | 1, obj, 0, 0, NULL); | ||
988 | |||
989 | rbd_destroy_ops(ops); | ||
990 | |||
991 | if (ret < 0) | ||
992 | return ret; | ||
993 | |||
994 | return ret; | ||
995 | } | ||
996 | |||
997 | /* | ||
998 | * Request sync osd read | ||
999 | */ | ||
1000 | static int rbd_req_sync_exec(struct rbd_device *dev, | ||
1001 | const char *obj, | ||
1002 | const char *cls, | ||
1003 | const char *method, | ||
1004 | const char *data, | ||
1005 | int len) | ||
1006 | { | ||
1007 | struct ceph_osd_req_op *ops; | ||
1008 | int cls_len = strlen(cls); | ||
1009 | int method_len = strlen(method); | ||
1010 | int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL, | ||
1011 | cls_len + method_len + len); | ||
1012 | if (ret < 0) | ||
1013 | return ret; | ||
1014 | |||
1015 | ops[0].cls.class_name = cls; | ||
1016 | ops[0].cls.class_len = (__u8)cls_len; | ||
1017 | ops[0].cls.method_name = method; | ||
1018 | ops[0].cls.method_len = (__u8)method_len; | ||
1019 | ops[0].cls.argc = 0; | ||
1020 | ops[0].cls.indata = data; | ||
1021 | ops[0].cls.indata_len = len; | ||
1022 | |||
1023 | ret = rbd_req_sync_op(dev, NULL, | ||
1024 | CEPH_NOSNAP, | ||
1025 | 0, | ||
1026 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
1027 | ops, | ||
1028 | 1, obj, 0, 0, NULL); | ||
1029 | |||
1030 | rbd_destroy_ops(ops); | ||
1031 | |||
1032 | dout("cls_exec returned %d\n", ret); | ||
1033 | return ret; | ||
1034 | } | ||
1035 | |||
1036 | /* | ||
1037 | * block device queue callback | ||
1038 | */ | ||
1039 | static void rbd_rq_fn(struct request_queue *q) | ||
1040 | { | ||
1041 | struct rbd_device *rbd_dev = q->queuedata; | ||
1042 | struct request *rq; | ||
1043 | struct bio_pair *bp = NULL; | ||
1044 | |||
1045 | rq = blk_fetch_request(q); | ||
1046 | |||
1047 | while (1) { | ||
1048 | struct bio *bio; | ||
1049 | struct bio *rq_bio, *next_bio = NULL; | ||
1050 | bool do_write; | ||
1051 | int size, op_size = 0; | ||
1052 | u64 ofs; | ||
1053 | |||
1054 | /* peek at request from block layer */ | ||
1055 | if (!rq) | ||
1056 | break; | ||
1057 | |||
1058 | dout("fetched request\n"); | ||
1059 | |||
1060 | /* filter out block requests we don't understand */ | ||
1061 | if ((rq->cmd_type != REQ_TYPE_FS)) { | ||
1062 | __blk_end_request_all(rq, 0); | ||
1063 | goto next; | ||
1064 | } | ||
1065 | |||
1066 | /* deduce our operation (read, write) */ | ||
1067 | do_write = (rq_data_dir(rq) == WRITE); | ||
1068 | |||
1069 | size = blk_rq_bytes(rq); | ||
1070 | ofs = blk_rq_pos(rq) * 512ULL; | ||
1071 | rq_bio = rq->bio; | ||
1072 | if (do_write && rbd_dev->read_only) { | ||
1073 | __blk_end_request_all(rq, -EROFS); | ||
1074 | goto next; | ||
1075 | } | ||
1076 | |||
1077 | spin_unlock_irq(q->queue_lock); | ||
1078 | |||
1079 | dout("%s 0x%x bytes at 0x%llx\n", | ||
1080 | do_write ? "write" : "read", | ||
1081 | size, blk_rq_pos(rq) * 512ULL); | ||
1082 | |||
1083 | do { | ||
1084 | /* a bio clone to be passed down to OSD req */ | ||
1085 | dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt); | ||
1086 | op_size = rbd_get_segment(&rbd_dev->header, | ||
1087 | rbd_dev->header.block_name, | ||
1088 | ofs, size, | ||
1089 | NULL, NULL); | ||
1090 | bio = bio_chain_clone(&rq_bio, &next_bio, &bp, | ||
1091 | op_size, GFP_ATOMIC); | ||
1092 | if (!bio) { | ||
1093 | spin_lock_irq(q->queue_lock); | ||
1094 | __blk_end_request_all(rq, -ENOMEM); | ||
1095 | goto next; | ||
1096 | } | ||
1097 | |||
1098 | /* init OSD command: write or read */ | ||
1099 | if (do_write) | ||
1100 | rbd_req_write(rq, rbd_dev, | ||
1101 | rbd_dev->header.snapc, | ||
1102 | ofs, | ||
1103 | op_size, bio); | ||
1104 | else | ||
1105 | rbd_req_read(rq, rbd_dev, | ||
1106 | cur_snap_id(rbd_dev), | ||
1107 | ofs, | ||
1108 | op_size, bio); | ||
1109 | |||
1110 | size -= op_size; | ||
1111 | ofs += op_size; | ||
1112 | |||
1113 | rq_bio = next_bio; | ||
1114 | } while (size > 0); | ||
1115 | |||
1116 | if (bp) | ||
1117 | bio_pair_release(bp); | ||
1118 | |||
1119 | spin_lock_irq(q->queue_lock); | ||
1120 | next: | ||
1121 | rq = blk_fetch_request(q); | ||
1122 | } | ||
1123 | } | ||
1124 | |||
1125 | /* | ||
1126 | * a queue callback. Makes sure that we don't create a bio that spans across | ||
1127 | * multiple osd objects. One exception would be with a single page bios, | ||
1128 | * which we handle later at bio_chain_clone | ||
1129 | */ | ||
1130 | static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, | ||
1131 | struct bio_vec *bvec) | ||
1132 | { | ||
1133 | struct rbd_device *rbd_dev = q->queuedata; | ||
1134 | unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9); | ||
1135 | sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); | ||
1136 | unsigned int bio_sectors = bmd->bi_size >> 9; | ||
1137 | int max; | ||
1138 | |||
1139 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) | ||
1140 | + bio_sectors)) << 9; | ||
1141 | if (max < 0) | ||
1142 | max = 0; /* bio_add cannot handle a negative return */ | ||
1143 | if (max <= bvec->bv_len && bio_sectors == 0) | ||
1144 | return bvec->bv_len; | ||
1145 | return max; | ||
1146 | } | ||
1147 | |||
1148 | static void rbd_free_disk(struct rbd_device *rbd_dev) | ||
1149 | { | ||
1150 | struct gendisk *disk = rbd_dev->disk; | ||
1151 | |||
1152 | if (!disk) | ||
1153 | return; | ||
1154 | |||
1155 | rbd_header_free(&rbd_dev->header); | ||
1156 | |||
1157 | if (disk->flags & GENHD_FL_UP) | ||
1158 | del_gendisk(disk); | ||
1159 | if (disk->queue) | ||
1160 | blk_cleanup_queue(disk->queue); | ||
1161 | put_disk(disk); | ||
1162 | } | ||
1163 | |||
1164 | /* | ||
1165 | * reload the ondisk the header | ||
1166 | */ | ||
1167 | static int rbd_read_header(struct rbd_device *rbd_dev, | ||
1168 | struct rbd_image_header *header) | ||
1169 | { | ||
1170 | ssize_t rc; | ||
1171 | struct rbd_image_header_ondisk *dh; | ||
1172 | int snap_count = 0; | ||
1173 | u64 snap_names_len = 0; | ||
1174 | |||
1175 | while (1) { | ||
1176 | int len = sizeof(*dh) + | ||
1177 | snap_count * sizeof(struct rbd_image_snap_ondisk) + | ||
1178 | snap_names_len; | ||
1179 | |||
1180 | rc = -ENOMEM; | ||
1181 | dh = kmalloc(len, GFP_KERNEL); | ||
1182 | if (!dh) | ||
1183 | return -ENOMEM; | ||
1184 | |||
1185 | rc = rbd_req_sync_read(rbd_dev, | ||
1186 | NULL, CEPH_NOSNAP, | ||
1187 | rbd_dev->obj_md_name, | ||
1188 | 0, len, | ||
1189 | (char *)dh); | ||
1190 | if (rc < 0) | ||
1191 | goto out_dh; | ||
1192 | |||
1193 | rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); | ||
1194 | if (rc < 0) | ||
1195 | goto out_dh; | ||
1196 | |||
1197 | if (snap_count != header->total_snaps) { | ||
1198 | snap_count = header->total_snaps; | ||
1199 | snap_names_len = header->snap_names_len; | ||
1200 | rbd_header_free(header); | ||
1201 | kfree(dh); | ||
1202 | continue; | ||
1203 | } | ||
1204 | break; | ||
1205 | } | ||
1206 | |||
1207 | out_dh: | ||
1208 | kfree(dh); | ||
1209 | return rc; | ||
1210 | } | ||
1211 | |||
1212 | /* | ||
1213 | * create a snapshot | ||
1214 | */ | ||
1215 | static int rbd_header_add_snap(struct rbd_device *dev, | ||
1216 | const char *snap_name, | ||
1217 | gfp_t gfp_flags) | ||
1218 | { | ||
1219 | int name_len = strlen(snap_name); | ||
1220 | u64 new_snapid; | ||
1221 | int ret; | ||
1222 | void *data, *data_start, *data_end; | ||
1223 | |||
1224 | /* we should create a snapshot only if we're pointing at the head */ | ||
1225 | if (dev->cur_snap) | ||
1226 | return -EINVAL; | ||
1227 | |||
1228 | ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid, | ||
1229 | &new_snapid); | ||
1230 | dout("created snapid=%lld\n", new_snapid); | ||
1231 | if (ret < 0) | ||
1232 | return ret; | ||
1233 | |||
1234 | data = kmalloc(name_len + 16, gfp_flags); | ||
1235 | if (!data) | ||
1236 | return -ENOMEM; | ||
1237 | |||
1238 | data_start = data; | ||
1239 | data_end = data + name_len + 16; | ||
1240 | |||
1241 | ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad); | ||
1242 | ceph_encode_64_safe(&data, data_end, new_snapid, bad); | ||
1243 | |||
1244 | ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", | ||
1245 | data_start, data - data_start); | ||
1246 | |||
1247 | kfree(data_start); | ||
1248 | |||
1249 | if (ret < 0) | ||
1250 | return ret; | ||
1251 | |||
1252 | dev->header.snapc->seq = new_snapid; | ||
1253 | |||
1254 | return 0; | ||
1255 | bad: | ||
1256 | return -ERANGE; | ||
1257 | } | ||
1258 | |||
1259 | /* | ||
1260 | * only read the first part of the ondisk header, without the snaps info | ||
1261 | */ | ||
1262 | static int rbd_update_snaps(struct rbd_device *rbd_dev) | ||
1263 | { | ||
1264 | int ret; | ||
1265 | struct rbd_image_header h; | ||
1266 | u64 snap_seq; | ||
1267 | |||
1268 | ret = rbd_read_header(rbd_dev, &h); | ||
1269 | if (ret < 0) | ||
1270 | return ret; | ||
1271 | |||
1272 | down_write(&rbd_dev->header.snap_rwsem); | ||
1273 | |||
1274 | snap_seq = rbd_dev->header.snapc->seq; | ||
1275 | |||
1276 | kfree(rbd_dev->header.snapc); | ||
1277 | kfree(rbd_dev->header.snap_names); | ||
1278 | kfree(rbd_dev->header.snap_sizes); | ||
1279 | |||
1280 | rbd_dev->header.total_snaps = h.total_snaps; | ||
1281 | rbd_dev->header.snapc = h.snapc; | ||
1282 | rbd_dev->header.snap_names = h.snap_names; | ||
1283 | rbd_dev->header.snap_sizes = h.snap_sizes; | ||
1284 | rbd_dev->header.snapc->seq = snap_seq; | ||
1285 | |||
1286 | up_write(&rbd_dev->header.snap_rwsem); | ||
1287 | |||
1288 | return 0; | ||
1289 | } | ||
1290 | |||
1291 | static int rbd_init_disk(struct rbd_device *rbd_dev) | ||
1292 | { | ||
1293 | struct gendisk *disk; | ||
1294 | struct request_queue *q; | ||
1295 | int rc; | ||
1296 | u64 total_size = 0; | ||
1297 | |||
1298 | /* contact OSD, request size info about the object being mapped */ | ||
1299 | rc = rbd_read_header(rbd_dev, &rbd_dev->header); | ||
1300 | if (rc) | ||
1301 | return rc; | ||
1302 | |||
1303 | rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); | ||
1304 | if (rc) | ||
1305 | return rc; | ||
1306 | |||
1307 | /* create gendisk info */ | ||
1308 | rc = -ENOMEM; | ||
1309 | disk = alloc_disk(RBD_MINORS_PER_MAJOR); | ||
1310 | if (!disk) | ||
1311 | goto out; | ||
1312 | |||
1313 | sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id); | ||
1314 | disk->major = rbd_dev->major; | ||
1315 | disk->first_minor = 0; | ||
1316 | disk->fops = &rbd_bd_ops; | ||
1317 | disk->private_data = rbd_dev; | ||
1318 | |||
1319 | /* init rq */ | ||
1320 | rc = -ENOMEM; | ||
1321 | q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); | ||
1322 | if (!q) | ||
1323 | goto out_disk; | ||
1324 | blk_queue_merge_bvec(q, rbd_merge_bvec); | ||
1325 | disk->queue = q; | ||
1326 | |||
1327 | q->queuedata = rbd_dev; | ||
1328 | |||
1329 | rbd_dev->disk = disk; | ||
1330 | rbd_dev->q = q; | ||
1331 | |||
1332 | /* finally, announce the disk to the world */ | ||
1333 | set_capacity(disk, total_size / 512ULL); | ||
1334 | add_disk(disk); | ||
1335 | |||
1336 | pr_info("%s: added with size 0x%llx\n", | ||
1337 | disk->disk_name, (unsigned long long)total_size); | ||
1338 | return 0; | ||
1339 | |||
1340 | out_disk: | ||
1341 | put_disk(disk); | ||
1342 | out: | ||
1343 | return rc; | ||
1344 | } | ||
1345 | |||
1346 | /******************************************************************** | ||
1347 | * /sys/class/rbd/ | ||
1348 | * add map rados objects to blkdev | ||
1349 | * remove unmap rados objects | ||
1350 | * list show mappings | ||
1351 | *******************************************************************/ | ||
1352 | |||
1353 | static void class_rbd_release(struct class *cls) | ||
1354 | { | ||
1355 | kfree(cls); | ||
1356 | } | ||
1357 | |||
1358 | static ssize_t class_rbd_list(struct class *c, | ||
1359 | struct class_attribute *attr, | ||
1360 | char *data) | ||
1361 | { | ||
1362 | int n = 0; | ||
1363 | struct list_head *tmp; | ||
1364 | int max = PAGE_SIZE; | ||
1365 | |||
1366 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
1367 | |||
1368 | n += snprintf(data, max, | ||
1369 | "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n"); | ||
1370 | |||
1371 | list_for_each(tmp, &rbd_dev_list) { | ||
1372 | struct rbd_device *rbd_dev; | ||
1373 | |||
1374 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
1375 | n += snprintf(data+n, max-n, | ||
1376 | "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n", | ||
1377 | rbd_dev->id, | ||
1378 | rbd_dev->major, | ||
1379 | ceph_client_id(rbd_dev->client), | ||
1380 | rbd_dev->pool_name, | ||
1381 | rbd_dev->obj, rbd_dev->snap_name, | ||
1382 | rbd_dev->header.image_size >> 10); | ||
1383 | if (n == max) | ||
1384 | break; | ||
1385 | } | ||
1386 | |||
1387 | mutex_unlock(&ctl_mutex); | ||
1388 | return n; | ||
1389 | } | ||
1390 | |||
1391 | static ssize_t class_rbd_add(struct class *c, | ||
1392 | struct class_attribute *attr, | ||
1393 | const char *buf, size_t count) | ||
1394 | { | ||
1395 | struct ceph_osd_client *osdc; | ||
1396 | struct rbd_device *rbd_dev; | ||
1397 | ssize_t rc = -ENOMEM; | ||
1398 | int irc, new_id = 0; | ||
1399 | struct list_head *tmp; | ||
1400 | char *mon_dev_name; | ||
1401 | char *options; | ||
1402 | |||
1403 | if (!try_module_get(THIS_MODULE)) | ||
1404 | return -ENODEV; | ||
1405 | |||
1406 | mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); | ||
1407 | if (!mon_dev_name) | ||
1408 | goto err_out_mod; | ||
1409 | |||
1410 | options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); | ||
1411 | if (!options) | ||
1412 | goto err_mon_dev; | ||
1413 | |||
1414 | /* new rbd_device object */ | ||
1415 | rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); | ||
1416 | if (!rbd_dev) | ||
1417 | goto err_out_opt; | ||
1418 | |||
1419 | /* static rbd_device initialization */ | ||
1420 | spin_lock_init(&rbd_dev->lock); | ||
1421 | INIT_LIST_HEAD(&rbd_dev->node); | ||
1422 | |||
1423 | /* generate unique id: find highest unique id, add one */ | ||
1424 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
1425 | |||
1426 | list_for_each(tmp, &rbd_dev_list) { | ||
1427 | struct rbd_device *rbd_dev; | ||
1428 | |||
1429 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
1430 | if (rbd_dev->id >= new_id) | ||
1431 | new_id = rbd_dev->id + 1; | ||
1432 | } | ||
1433 | |||
1434 | rbd_dev->id = new_id; | ||
1435 | |||
1436 | /* add to global list */ | ||
1437 | list_add_tail(&rbd_dev->node, &rbd_dev_list); | ||
1438 | |||
1439 | /* parse add command */ | ||
1440 | if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s " | ||
1441 | "%" __stringify(RBD_MAX_OPT_LEN) "s " | ||
1442 | "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s " | ||
1443 | "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s" | ||
1444 | "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", | ||
1445 | mon_dev_name, options, rbd_dev->pool_name, | ||
1446 | rbd_dev->obj, rbd_dev->snap_name) < 4) { | ||
1447 | rc = -EINVAL; | ||
1448 | goto err_out_slot; | ||
1449 | } | ||
1450 | |||
1451 | if (rbd_dev->snap_name[0] == 0) | ||
1452 | rbd_dev->snap_name[0] = '-'; | ||
1453 | |||
1454 | rbd_dev->obj_len = strlen(rbd_dev->obj); | ||
1455 | snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s", | ||
1456 | rbd_dev->obj, RBD_SUFFIX); | ||
1457 | |||
1458 | /* initialize rest of new object */ | ||
1459 | snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id); | ||
1460 | rc = rbd_get_client(rbd_dev, mon_dev_name, options); | ||
1461 | if (rc < 0) | ||
1462 | goto err_out_slot; | ||
1463 | |||
1464 | mutex_unlock(&ctl_mutex); | ||
1465 | |||
1466 | /* pick the pool */ | ||
1467 | osdc = &rbd_dev->client->osdc; | ||
1468 | rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); | ||
1469 | if (rc < 0) | ||
1470 | goto err_out_client; | ||
1471 | rbd_dev->poolid = rc; | ||
1472 | |||
1473 | /* register our block device */ | ||
1474 | irc = register_blkdev(0, rbd_dev->name); | ||
1475 | if (irc < 0) { | ||
1476 | rc = irc; | ||
1477 | goto err_out_client; | ||
1478 | } | ||
1479 | rbd_dev->major = irc; | ||
1480 | |||
1481 | /* set up and announce blkdev mapping */ | ||
1482 | rc = rbd_init_disk(rbd_dev); | ||
1483 | if (rc) | ||
1484 | goto err_out_blkdev; | ||
1485 | |||
1486 | return count; | ||
1487 | |||
1488 | err_out_blkdev: | ||
1489 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | ||
1490 | err_out_client: | ||
1491 | rbd_put_client(rbd_dev); | ||
1492 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
1493 | err_out_slot: | ||
1494 | list_del_init(&rbd_dev->node); | ||
1495 | mutex_unlock(&ctl_mutex); | ||
1496 | |||
1497 | kfree(rbd_dev); | ||
1498 | err_out_opt: | ||
1499 | kfree(options); | ||
1500 | err_mon_dev: | ||
1501 | kfree(mon_dev_name); | ||
1502 | err_out_mod: | ||
1503 | dout("Error adding device %s\n", buf); | ||
1504 | module_put(THIS_MODULE); | ||
1505 | return rc; | ||
1506 | } | ||
1507 | |||
1508 | static struct rbd_device *__rbd_get_dev(unsigned long id) | ||
1509 | { | ||
1510 | struct list_head *tmp; | ||
1511 | struct rbd_device *rbd_dev; | ||
1512 | |||
1513 | list_for_each(tmp, &rbd_dev_list) { | ||
1514 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
1515 | if (rbd_dev->id == id) | ||
1516 | return rbd_dev; | ||
1517 | } | ||
1518 | return NULL; | ||
1519 | } | ||
1520 | |||
1521 | static ssize_t class_rbd_remove(struct class *c, | ||
1522 | struct class_attribute *attr, | ||
1523 | const char *buf, | ||
1524 | size_t count) | ||
1525 | { | ||
1526 | struct rbd_device *rbd_dev = NULL; | ||
1527 | int target_id, rc; | ||
1528 | unsigned long ul; | ||
1529 | |||
1530 | rc = strict_strtoul(buf, 10, &ul); | ||
1531 | if (rc) | ||
1532 | return rc; | ||
1533 | |||
1534 | /* convert to int; abort if we lost anything in the conversion */ | ||
1535 | target_id = (int) ul; | ||
1536 | if (target_id != ul) | ||
1537 | return -EINVAL; | ||
1538 | |||
1539 | /* remove object from list immediately */ | ||
1540 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
1541 | |||
1542 | rbd_dev = __rbd_get_dev(target_id); | ||
1543 | if (rbd_dev) | ||
1544 | list_del_init(&rbd_dev->node); | ||
1545 | |||
1546 | mutex_unlock(&ctl_mutex); | ||
1547 | |||
1548 | if (!rbd_dev) | ||
1549 | return -ENOENT; | ||
1550 | |||
1551 | rbd_put_client(rbd_dev); | ||
1552 | |||
1553 | /* clean up and free blkdev */ | ||
1554 | rbd_free_disk(rbd_dev); | ||
1555 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | ||
1556 | kfree(rbd_dev); | ||
1557 | |||
1558 | /* release module ref */ | ||
1559 | module_put(THIS_MODULE); | ||
1560 | |||
1561 | return count; | ||
1562 | } | ||
1563 | |||
1564 | static ssize_t class_rbd_snaps_list(struct class *c, | ||
1565 | struct class_attribute *attr, | ||
1566 | char *data) | ||
1567 | { | ||
1568 | struct rbd_device *rbd_dev = NULL; | ||
1569 | struct list_head *tmp; | ||
1570 | struct rbd_image_header *header; | ||
1571 | int i, n = 0, max = PAGE_SIZE; | ||
1572 | int ret; | ||
1573 | |||
1574 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
1575 | |||
1576 | n += snprintf(data, max, "#id\tsnap\tKB\n"); | ||
1577 | |||
1578 | list_for_each(tmp, &rbd_dev_list) { | ||
1579 | char *names, *p; | ||
1580 | struct ceph_snap_context *snapc; | ||
1581 | |||
1582 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
1583 | header = &rbd_dev->header; | ||
1584 | |||
1585 | down_read(&header->snap_rwsem); | ||
1586 | |||
1587 | names = header->snap_names; | ||
1588 | snapc = header->snapc; | ||
1589 | |||
1590 | n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", | ||
1591 | rbd_dev->id, RBD_SNAP_HEAD_NAME, | ||
1592 | header->image_size >> 10, | ||
1593 | (!rbd_dev->cur_snap ? " (*)" : "")); | ||
1594 | if (n == max) | ||
1595 | break; | ||
1596 | |||
1597 | p = names; | ||
1598 | for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { | ||
1599 | n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", | ||
1600 | rbd_dev->id, p, header->snap_sizes[i] >> 10, | ||
1601 | (rbd_dev->cur_snap && | ||
1602 | (snap_index(header, i) == rbd_dev->cur_snap) ? | ||
1603 | " (*)" : "")); | ||
1604 | if (n == max) | ||
1605 | break; | ||
1606 | } | ||
1607 | |||
1608 | up_read(&header->snap_rwsem); | ||
1609 | } | ||
1610 | |||
1611 | |||
1612 | ret = n; | ||
1613 | mutex_unlock(&ctl_mutex); | ||
1614 | return ret; | ||
1615 | } | ||
1616 | |||
1617 | static ssize_t class_rbd_snaps_refresh(struct class *c, | ||
1618 | struct class_attribute *attr, | ||
1619 | const char *buf, | ||
1620 | size_t count) | ||
1621 | { | ||
1622 | struct rbd_device *rbd_dev = NULL; | ||
1623 | int target_id, rc; | ||
1624 | unsigned long ul; | ||
1625 | int ret = count; | ||
1626 | |||
1627 | rc = strict_strtoul(buf, 10, &ul); | ||
1628 | if (rc) | ||
1629 | return rc; | ||
1630 | |||
1631 | /* convert to int; abort if we lost anything in the conversion */ | ||
1632 | target_id = (int) ul; | ||
1633 | if (target_id != ul) | ||
1634 | return -EINVAL; | ||
1635 | |||
1636 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
1637 | |||
1638 | rbd_dev = __rbd_get_dev(target_id); | ||
1639 | if (!rbd_dev) { | ||
1640 | ret = -ENOENT; | ||
1641 | goto done; | ||
1642 | } | ||
1643 | |||
1644 | rc = rbd_update_snaps(rbd_dev); | ||
1645 | if (rc < 0) | ||
1646 | ret = rc; | ||
1647 | |||
1648 | done: | ||
1649 | mutex_unlock(&ctl_mutex); | ||
1650 | return ret; | ||
1651 | } | ||
1652 | |||
1653 | static ssize_t class_rbd_snap_create(struct class *c, | ||
1654 | struct class_attribute *attr, | ||
1655 | const char *buf, | ||
1656 | size_t count) | ||
1657 | { | ||
1658 | struct rbd_device *rbd_dev = NULL; | ||
1659 | int target_id, ret; | ||
1660 | char *name; | ||
1661 | |||
1662 | name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL); | ||
1663 | if (!name) | ||
1664 | return -ENOMEM; | ||
1665 | |||
1666 | /* parse snaps add command */ | ||
1667 | if (sscanf(buf, "%d " | ||
1668 | "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", | ||
1669 | &target_id, | ||
1670 | name) != 2) { | ||
1671 | ret = -EINVAL; | ||
1672 | goto done; | ||
1673 | } | ||
1674 | |||
1675 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
1676 | |||
1677 | rbd_dev = __rbd_get_dev(target_id); | ||
1678 | if (!rbd_dev) { | ||
1679 | ret = -ENOENT; | ||
1680 | goto done_unlock; | ||
1681 | } | ||
1682 | |||
1683 | ret = rbd_header_add_snap(rbd_dev, | ||
1684 | name, GFP_KERNEL); | ||
1685 | if (ret < 0) | ||
1686 | goto done_unlock; | ||
1687 | |||
1688 | ret = rbd_update_snaps(rbd_dev); | ||
1689 | if (ret < 0) | ||
1690 | goto done_unlock; | ||
1691 | |||
1692 | ret = count; | ||
1693 | done_unlock: | ||
1694 | mutex_unlock(&ctl_mutex); | ||
1695 | done: | ||
1696 | kfree(name); | ||
1697 | return ret; | ||
1698 | } | ||
1699 | |||
1700 | static ssize_t class_rbd_rollback(struct class *c, | ||
1701 | struct class_attribute *attr, | ||
1702 | const char *buf, | ||
1703 | size_t count) | ||
1704 | { | ||
1705 | struct rbd_device *rbd_dev = NULL; | ||
1706 | int target_id, ret; | ||
1707 | u64 snapid; | ||
1708 | char snap_name[RBD_MAX_SNAP_NAME_LEN]; | ||
1709 | u64 cur_ofs; | ||
1710 | char *seg_name; | ||
1711 | |||
1712 | /* parse snaps add command */ | ||
1713 | if (sscanf(buf, "%d " | ||
1714 | "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", | ||
1715 | &target_id, | ||
1716 | snap_name) != 2) { | ||
1717 | return -EINVAL; | ||
1718 | } | ||
1719 | |||
1720 | ret = -ENOMEM; | ||
1721 | seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); | ||
1722 | if (!seg_name) | ||
1723 | return ret; | ||
1724 | |||
1725 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
1726 | |||
1727 | rbd_dev = __rbd_get_dev(target_id); | ||
1728 | if (!rbd_dev) { | ||
1729 | ret = -ENOENT; | ||
1730 | goto done_unlock; | ||
1731 | } | ||
1732 | |||
1733 | ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL); | ||
1734 | if (ret < 0) | ||
1735 | goto done_unlock; | ||
1736 | |||
1737 | dout("snapid=%lld\n", snapid); | ||
1738 | |||
1739 | cur_ofs = 0; | ||
1740 | while (cur_ofs < rbd_dev->header.image_size) { | ||
1741 | cur_ofs += rbd_get_segment(&rbd_dev->header, | ||
1742 | rbd_dev->obj, | ||
1743 | cur_ofs, (u64)-1, | ||
1744 | seg_name, NULL); | ||
1745 | dout("seg_name=%s\n", seg_name); | ||
1746 | |||
1747 | ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name); | ||
1748 | if (ret < 0) | ||
1749 | pr_warning("could not roll back obj %s err=%d\n", | ||
1750 | seg_name, ret); | ||
1751 | } | ||
1752 | |||
1753 | ret = rbd_update_snaps(rbd_dev); | ||
1754 | if (ret < 0) | ||
1755 | goto done_unlock; | ||
1756 | |||
1757 | ret = count; | ||
1758 | |||
1759 | done_unlock: | ||
1760 | mutex_unlock(&ctl_mutex); | ||
1761 | kfree(seg_name); | ||
1762 | |||
1763 | return ret; | ||
1764 | } | ||
1765 | |||
1766 | static struct class_attribute class_rbd_attrs[] = { | ||
1767 | __ATTR(add, 0200, NULL, class_rbd_add), | ||
1768 | __ATTR(remove, 0200, NULL, class_rbd_remove), | ||
1769 | __ATTR(list, 0444, class_rbd_list, NULL), | ||
1770 | __ATTR(snaps_refresh, 0200, NULL, class_rbd_snaps_refresh), | ||
1771 | __ATTR(snap_create, 0200, NULL, class_rbd_snap_create), | ||
1772 | __ATTR(snaps_list, 0444, class_rbd_snaps_list, NULL), | ||
1773 | __ATTR(snap_rollback, 0200, NULL, class_rbd_rollback), | ||
1774 | __ATTR_NULL | ||
1775 | }; | ||
1776 | |||
1777 | /* | ||
1778 | * create control files in sysfs | ||
1779 | * /sys/class/rbd/... | ||
1780 | */ | ||
1781 | static int rbd_sysfs_init(void) | ||
1782 | { | ||
1783 | int ret = -ENOMEM; | ||
1784 | |||
1785 | class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL); | ||
1786 | if (!class_rbd) | ||
1787 | goto out; | ||
1788 | |||
1789 | class_rbd->name = DRV_NAME; | ||
1790 | class_rbd->owner = THIS_MODULE; | ||
1791 | class_rbd->class_release = class_rbd_release; | ||
1792 | class_rbd->class_attrs = class_rbd_attrs; | ||
1793 | |||
1794 | ret = class_register(class_rbd); | ||
1795 | if (ret) | ||
1796 | goto out_class; | ||
1797 | return 0; | ||
1798 | |||
1799 | out_class: | ||
1800 | kfree(class_rbd); | ||
1801 | class_rbd = NULL; | ||
1802 | pr_err(DRV_NAME ": failed to create class rbd\n"); | ||
1803 | out: | ||
1804 | return ret; | ||
1805 | } | ||
1806 | |||
1807 | static void rbd_sysfs_cleanup(void) | ||
1808 | { | ||
1809 | if (class_rbd) | ||
1810 | class_destroy(class_rbd); | ||
1811 | class_rbd = NULL; | ||
1812 | } | ||
1813 | |||
1814 | int __init rbd_init(void) | ||
1815 | { | ||
1816 | int rc; | ||
1817 | |||
1818 | rc = rbd_sysfs_init(); | ||
1819 | if (rc) | ||
1820 | return rc; | ||
1821 | spin_lock_init(&node_lock); | ||
1822 | pr_info("loaded " DRV_NAME_LONG "\n"); | ||
1823 | return 0; | ||
1824 | } | ||
1825 | |||
1826 | void __exit rbd_exit(void) | ||
1827 | { | ||
1828 | rbd_sysfs_cleanup(); | ||
1829 | } | ||
1830 | |||
1831 | module_init(rbd_init); | ||
1832 | module_exit(rbd_exit); | ||
1833 | |||
1834 | MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); | ||
1835 | MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); | ||
1836 | MODULE_DESCRIPTION("rados block device"); | ||
1837 | |||
1838 | /* following authorship retained from original osdblk.c */ | ||
1839 | MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); | ||
1840 | |||
1841 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h new file mode 100644 index 000000000000..fc6c678aa2cb --- /dev/null +++ b/drivers/block/rbd_types.h | |||
@@ -0,0 +1,73 @@ | |||
1 | /* | ||
2 | * Ceph - scalable distributed file system | ||
3 | * | ||
4 | * Copyright (C) 2004-2010 Sage Weil <sage@newdream.net> | ||
5 | * | ||
6 | * This is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License version 2.1, as published by the Free Software | ||
9 | * Foundation. See file COPYING. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #ifndef CEPH_RBD_TYPES_H | ||
14 | #define CEPH_RBD_TYPES_H | ||
15 | |||
16 | #include <linux/types.h> | ||
17 | |||
18 | /* | ||
19 | * rbd image 'foo' consists of objects | ||
20 | * foo.rbd - image metadata | ||
21 | * foo.00000000 | ||
22 | * foo.00000001 | ||
23 | * ... - data | ||
24 | */ | ||
25 | |||
26 | #define RBD_SUFFIX ".rbd" | ||
27 | #define RBD_DIRECTORY "rbd_directory" | ||
28 | #define RBD_INFO "rbd_info" | ||
29 | |||
30 | #define RBD_DEFAULT_OBJ_ORDER 22 /* 4MB */ | ||
31 | #define RBD_MIN_OBJ_ORDER 16 | ||
32 | #define RBD_MAX_OBJ_ORDER 30 | ||
33 | |||
34 | #define RBD_MAX_OBJ_NAME_LEN 96 | ||
35 | #define RBD_MAX_SEG_NAME_LEN 128 | ||
36 | |||
37 | #define RBD_COMP_NONE 0 | ||
38 | #define RBD_CRYPT_NONE 0 | ||
39 | |||
40 | #define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n" | ||
41 | #define RBD_HEADER_SIGNATURE "RBD" | ||
42 | #define RBD_HEADER_VERSION "001.005" | ||
43 | |||
44 | struct rbd_info { | ||
45 | __le64 max_id; | ||
46 | } __attribute__ ((packed)); | ||
47 | |||
48 | struct rbd_image_snap_ondisk { | ||
49 | __le64 id; | ||
50 | __le64 image_size; | ||
51 | } __attribute__((packed)); | ||
52 | |||
53 | struct rbd_image_header_ondisk { | ||
54 | char text[40]; | ||
55 | char block_name[24]; | ||
56 | char signature[4]; | ||
57 | char version[8]; | ||
58 | struct { | ||
59 | __u8 order; | ||
60 | __u8 crypt_type; | ||
61 | __u8 comp_type; | ||
62 | __u8 unused; | ||
63 | } __attribute__((packed)) options; | ||
64 | __le64 image_size; | ||
65 | __le64 snap_seq; | ||
66 | __le32 snap_count; | ||
67 | __le32 reserved; | ||
68 | __le64 snap_names_len; | ||
69 | struct rbd_image_snap_ondisk snaps[0]; | ||
70 | } __attribute__((packed)); | ||
71 | |||
72 | |||
73 | #endif | ||
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1101e251a629..8320490226b7 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -2,7 +2,6 @@ | |||
2 | #include <linux/spinlock.h> | 2 | #include <linux/spinlock.h> |
3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
4 | #include <linux/blkdev.h> | 4 | #include <linux/blkdev.h> |
5 | #include <linux/smp_lock.h> | ||
6 | #include <linux/hdreg.h> | 5 | #include <linux/hdreg.h> |
7 | #include <linux/virtio.h> | 6 | #include <linux/virtio.h> |
8 | #include <linux/virtio_blk.h> | 7 | #include <linux/virtio_blk.h> |
@@ -222,8 +221,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) | |||
222 | return err; | 221 | return err; |
223 | } | 222 | } |
224 | 223 | ||
225 | static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, | 224 | static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, |
226 | unsigned cmd, unsigned long data) | 225 | unsigned int cmd, unsigned long data) |
227 | { | 226 | { |
228 | struct gendisk *disk = bdev->bd_disk; | 227 | struct gendisk *disk = bdev->bd_disk; |
229 | struct virtio_blk *vblk = disk->private_data; | 228 | struct virtio_blk *vblk = disk->private_data; |
@@ -238,18 +237,6 @@ static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, | |||
238 | (void __user *)data); | 237 | (void __user *)data); |
239 | } | 238 | } |
240 | 239 | ||
241 | static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, | ||
242 | unsigned int cmd, unsigned long param) | ||
243 | { | ||
244 | int ret; | ||
245 | |||
246 | lock_kernel(); | ||
247 | ret = virtblk_locked_ioctl(bdev, mode, cmd, param); | ||
248 | unlock_kernel(); | ||
249 | |||
250 | return ret; | ||
251 | } | ||
252 | |||
253 | /* We provide getgeo only to please some old bootloader/partitioning tools */ | 240 | /* We provide getgeo only to please some old bootloader/partitioning tools */ |
254 | static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) | 241 | static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) |
255 | { | 242 | { |
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 70312da4c968..564808a5c3c0 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c | |||
@@ -199,7 +199,7 @@ static void amd64_cleanup(void) | |||
199 | struct pci_dev *dev = k8_northbridges[i]; | 199 | struct pci_dev *dev = k8_northbridges[i]; |
200 | /* disable gart translation */ | 200 | /* disable gart translation */ |
201 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); | 201 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); |
202 | tmp &= ~AMD64_GARTEN; | 202 | tmp &= ~GARTEN; |
203 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp); | 203 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp); |
204 | } | 204 | } |
205 | } | 205 | } |
@@ -313,7 +313,7 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, | |||
313 | if (order < 0 || !agp_aperture_valid(aper, (32*1024*1024)<<order)) | 313 | if (order < 0 || !agp_aperture_valid(aper, (32*1024*1024)<<order)) |
314 | return -1; | 314 | return -1; |
315 | 315 | ||
316 | pci_write_config_dword(nb, AMD64_GARTAPERTURECTL, order << 1); | 316 | gart_set_size_and_enable(nb, order); |
317 | pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25); | 317 | pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25); |
318 | 318 | ||
319 | return 0; | 319 | return 0; |
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index d2abf5143983..64255cef8a7d 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c | |||
@@ -984,7 +984,9 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge) | |||
984 | 984 | ||
985 | bridge->driver->cache_flush(); | 985 | bridge->driver->cache_flush(); |
986 | #ifdef CONFIG_X86 | 986 | #ifdef CONFIG_X86 |
987 | set_memory_uc((unsigned long)table, 1 << page_order); | 987 | if (set_memory_uc((unsigned long)table, 1 << page_order)) |
988 | printk(KERN_WARNING "Could not set GATT table memory to UC!"); | ||
989 | |||
988 | bridge->gatt_table = (void *)table; | 990 | bridge->gatt_table = (void *)table; |
989 | #else | 991 | #else |
990 | bridge->gatt_table = ioremap_nocache(virt_to_phys(table), | 992 | bridge->gatt_table = ioremap_nocache(virt_to_phys(table), |
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 05ad4a17a28f..7c4133582dba 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c | |||
@@ -47,6 +47,16 @@ enum tpm_duration { | |||
47 | #define TPM_MAX_PROTECTED_ORDINAL 12 | 47 | #define TPM_MAX_PROTECTED_ORDINAL 12 |
48 | #define TPM_PROTECTED_ORDINAL_MASK 0xFF | 48 | #define TPM_PROTECTED_ORDINAL_MASK 0xFF |
49 | 49 | ||
50 | /* | ||
51 | * Bug workaround - some TPM's don't flush the most | ||
52 | * recently changed pcr on suspend, so force the flush | ||
53 | * with an extend to the selected _unused_ non-volatile pcr. | ||
54 | */ | ||
55 | static int tpm_suspend_pcr; | ||
56 | module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644); | ||
57 | MODULE_PARM_DESC(suspend_pcr, | ||
58 | "PCR to use for dummy writes to faciltate flush on suspend."); | ||
59 | |||
50 | static LIST_HEAD(tpm_chip_list); | 60 | static LIST_HEAD(tpm_chip_list); |
51 | static DEFINE_SPINLOCK(driver_lock); | 61 | static DEFINE_SPINLOCK(driver_lock); |
52 | static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES); | 62 | static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES); |
@@ -1077,18 +1087,6 @@ static struct tpm_input_header savestate_header = { | |||
1077 | .ordinal = TPM_ORD_SAVESTATE | 1087 | .ordinal = TPM_ORD_SAVESTATE |
1078 | }; | 1088 | }; |
1079 | 1089 | ||
1080 | /* Bug workaround - some TPM's don't flush the most | ||
1081 | * recently changed pcr on suspend, so force the flush | ||
1082 | * with an extend to the selected _unused_ non-volatile pcr. | ||
1083 | */ | ||
1084 | static int tpm_suspend_pcr; | ||
1085 | static int __init tpm_suspend_setup(char *str) | ||
1086 | { | ||
1087 | get_option(&str, &tpm_suspend_pcr); | ||
1088 | return 1; | ||
1089 | } | ||
1090 | __setup("tpm_suspend_pcr=", tpm_suspend_setup); | ||
1091 | |||
1092 | /* | 1090 | /* |
1093 | * We are about to suspend. Save the TPM state | 1091 | * We are about to suspend. Save the TPM state |
1094 | * so that it can be restored. | 1092 | * so that it can be restored. |
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index c810481a5bc2..6c1b676643a9 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c | |||
@@ -48,6 +48,9 @@ struct ports_driver_data { | |||
48 | /* Used for exporting per-port information to debugfs */ | 48 | /* Used for exporting per-port information to debugfs */ |
49 | struct dentry *debugfs_dir; | 49 | struct dentry *debugfs_dir; |
50 | 50 | ||
51 | /* List of all the devices we're handling */ | ||
52 | struct list_head portdevs; | ||
53 | |||
51 | /* Number of devices this driver is handling */ | 54 | /* Number of devices this driver is handling */ |
52 | unsigned int index; | 55 | unsigned int index; |
53 | 56 | ||
@@ -108,6 +111,9 @@ struct port_buffer { | |||
108 | * ports for that device (vdev->priv). | 111 | * ports for that device (vdev->priv). |
109 | */ | 112 | */ |
110 | struct ports_device { | 113 | struct ports_device { |
114 | /* Next portdev in the list, head is in the pdrvdata struct */ | ||
115 | struct list_head list; | ||
116 | |||
111 | /* | 117 | /* |
112 | * Workqueue handlers where we process deferred work after | 118 | * Workqueue handlers where we process deferred work after |
113 | * notification | 119 | * notification |
@@ -178,15 +184,21 @@ struct port { | |||
178 | struct console cons; | 184 | struct console cons; |
179 | 185 | ||
180 | /* Each port associates with a separate char device */ | 186 | /* Each port associates with a separate char device */ |
181 | struct cdev cdev; | 187 | struct cdev *cdev; |
182 | struct device *dev; | 188 | struct device *dev; |
183 | 189 | ||
190 | /* Reference-counting to handle port hot-unplugs and file operations */ | ||
191 | struct kref kref; | ||
192 | |||
184 | /* A waitqueue for poll() or blocking read operations */ | 193 | /* A waitqueue for poll() or blocking read operations */ |
185 | wait_queue_head_t waitqueue; | 194 | wait_queue_head_t waitqueue; |
186 | 195 | ||
187 | /* The 'name' of the port that we expose via sysfs properties */ | 196 | /* The 'name' of the port that we expose via sysfs properties */ |
188 | char *name; | 197 | char *name; |
189 | 198 | ||
199 | /* We can notify apps of host connect / disconnect events via SIGIO */ | ||
200 | struct fasync_struct *async_queue; | ||
201 | |||
190 | /* The 'id' to identify the port with the Host */ | 202 | /* The 'id' to identify the port with the Host */ |
191 | u32 id; | 203 | u32 id; |
192 | 204 | ||
@@ -221,6 +233,41 @@ out: | |||
221 | return port; | 233 | return port; |
222 | } | 234 | } |
223 | 235 | ||
236 | static struct port *find_port_by_devt_in_portdev(struct ports_device *portdev, | ||
237 | dev_t dev) | ||
238 | { | ||
239 | struct port *port; | ||
240 | unsigned long flags; | ||
241 | |||
242 | spin_lock_irqsave(&portdev->ports_lock, flags); | ||
243 | list_for_each_entry(port, &portdev->ports, list) | ||
244 | if (port->cdev->dev == dev) | ||
245 | goto out; | ||
246 | port = NULL; | ||
247 | out: | ||
248 | spin_unlock_irqrestore(&portdev->ports_lock, flags); | ||
249 | |||
250 | return port; | ||
251 | } | ||
252 | |||
253 | static struct port *find_port_by_devt(dev_t dev) | ||
254 | { | ||
255 | struct ports_device *portdev; | ||
256 | struct port *port; | ||
257 | unsigned long flags; | ||
258 | |||
259 | spin_lock_irqsave(&pdrvdata_lock, flags); | ||
260 | list_for_each_entry(portdev, &pdrvdata.portdevs, list) { | ||
261 | port = find_port_by_devt_in_portdev(portdev, dev); | ||
262 | if (port) | ||
263 | goto out; | ||
264 | } | ||
265 | port = NULL; | ||
266 | out: | ||
267 | spin_unlock_irqrestore(&pdrvdata_lock, flags); | ||
268 | return port; | ||
269 | } | ||
270 | |||
224 | static struct port *find_port_by_id(struct ports_device *portdev, u32 id) | 271 | static struct port *find_port_by_id(struct ports_device *portdev, u32 id) |
225 | { | 272 | { |
226 | struct port *port; | 273 | struct port *port; |
@@ -410,7 +457,10 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, | |||
410 | static ssize_t send_control_msg(struct port *port, unsigned int event, | 457 | static ssize_t send_control_msg(struct port *port, unsigned int event, |
411 | unsigned int value) | 458 | unsigned int value) |
412 | { | 459 | { |
413 | return __send_control_msg(port->portdev, port->id, event, value); | 460 | /* Did the port get unplugged before userspace closed it? */ |
461 | if (port->portdev) | ||
462 | return __send_control_msg(port->portdev, port->id, event, value); | ||
463 | return 0; | ||
414 | } | 464 | } |
415 | 465 | ||
416 | /* Callers must take the port->outvq_lock */ | 466 | /* Callers must take the port->outvq_lock */ |
@@ -459,9 +509,12 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, | |||
459 | 509 | ||
460 | /* | 510 | /* |
461 | * Wait till the host acknowledges it pushed out the data we | 511 | * Wait till the host acknowledges it pushed out the data we |
462 | * sent. This is done for ports in blocking mode or for data | 512 | * sent. This is done for data from the hvc_console; the tty |
463 | * from the hvc_console; the tty operations are performed with | 513 | * operations are performed with spinlocks held so we can't |
464 | * spinlocks held so we can't sleep here. | 514 | * sleep here. An alternative would be to copy the data to a |
515 | * buffer and relax the spinning requirement. The downside is | ||
516 | * we need to kmalloc a GFP_ATOMIC buffer each time the | ||
517 | * console driver writes something out. | ||
465 | */ | 518 | */ |
466 | while (!virtqueue_get_buf(out_vq, &len)) | 519 | while (!virtqueue_get_buf(out_vq, &len)) |
467 | cpu_relax(); | 520 | cpu_relax(); |
@@ -522,6 +575,10 @@ static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count, | |||
522 | /* The condition that must be true for polling to end */ | 575 | /* The condition that must be true for polling to end */ |
523 | static bool will_read_block(struct port *port) | 576 | static bool will_read_block(struct port *port) |
524 | { | 577 | { |
578 | if (!port->guest_connected) { | ||
579 | /* Port got hot-unplugged. Let's exit. */ | ||
580 | return false; | ||
581 | } | ||
525 | return !port_has_data(port) && port->host_connected; | 582 | return !port_has_data(port) && port->host_connected; |
526 | } | 583 | } |
527 | 584 | ||
@@ -572,6 +629,9 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf, | |||
572 | if (ret < 0) | 629 | if (ret < 0) |
573 | return ret; | 630 | return ret; |
574 | } | 631 | } |
632 | /* Port got hot-unplugged. */ | ||
633 | if (!port->guest_connected) | ||
634 | return -ENODEV; | ||
575 | /* | 635 | /* |
576 | * We could've received a disconnection message while we were | 636 | * We could've received a disconnection message while we were |
577 | * waiting for more data. | 637 | * waiting for more data. |
@@ -613,6 +673,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, | |||
613 | if (ret < 0) | 673 | if (ret < 0) |
614 | return ret; | 674 | return ret; |
615 | } | 675 | } |
676 | /* Port got hot-unplugged. */ | ||
677 | if (!port->guest_connected) | ||
678 | return -ENODEV; | ||
616 | 679 | ||
617 | count = min((size_t)(32 * 1024), count); | 680 | count = min((size_t)(32 * 1024), count); |
618 | 681 | ||
@@ -626,6 +689,14 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, | |||
626 | goto free_buf; | 689 | goto free_buf; |
627 | } | 690 | } |
628 | 691 | ||
692 | /* | ||
693 | * We now ask send_buf() to not spin for generic ports -- we | ||
694 | * can re-use the same code path that non-blocking file | ||
695 | * descriptors take for blocking file descriptors since the | ||
696 | * wait is already done and we're certain the write will go | ||
697 | * through to the host. | ||
698 | */ | ||
699 | nonblock = true; | ||
629 | ret = send_buf(port, buf, count, nonblock); | 700 | ret = send_buf(port, buf, count, nonblock); |
630 | 701 | ||
631 | if (nonblock && ret > 0) | 702 | if (nonblock && ret > 0) |
@@ -645,6 +716,10 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) | |||
645 | port = filp->private_data; | 716 | port = filp->private_data; |
646 | poll_wait(filp, &port->waitqueue, wait); | 717 | poll_wait(filp, &port->waitqueue, wait); |
647 | 718 | ||
719 | if (!port->guest_connected) { | ||
720 | /* Port got unplugged */ | ||
721 | return POLLHUP; | ||
722 | } | ||
648 | ret = 0; | 723 | ret = 0; |
649 | if (!will_read_block(port)) | 724 | if (!will_read_block(port)) |
650 | ret |= POLLIN | POLLRDNORM; | 725 | ret |= POLLIN | POLLRDNORM; |
@@ -656,6 +731,8 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) | |||
656 | return ret; | 731 | return ret; |
657 | } | 732 | } |
658 | 733 | ||
734 | static void remove_port(struct kref *kref); | ||
735 | |||
659 | static int port_fops_release(struct inode *inode, struct file *filp) | 736 | static int port_fops_release(struct inode *inode, struct file *filp) |
660 | { | 737 | { |
661 | struct port *port; | 738 | struct port *port; |
@@ -676,6 +753,16 @@ static int port_fops_release(struct inode *inode, struct file *filp) | |||
676 | reclaim_consumed_buffers(port); | 753 | reclaim_consumed_buffers(port); |
677 | spin_unlock_irq(&port->outvq_lock); | 754 | spin_unlock_irq(&port->outvq_lock); |
678 | 755 | ||
756 | /* | ||
757 | * Locks aren't necessary here as a port can't be opened after | ||
758 | * unplug, and if a port isn't unplugged, a kref would already | ||
759 | * exist for the port. Plus, taking ports_lock here would | ||
760 | * create a dependency on other locks taken by functions | ||
761 | * inside remove_port if we're the last holder of the port, | ||
762 | * creating many problems. | ||
763 | */ | ||
764 | kref_put(&port->kref, remove_port); | ||
765 | |||
679 | return 0; | 766 | return 0; |
680 | } | 767 | } |
681 | 768 | ||
@@ -683,22 +770,31 @@ static int port_fops_open(struct inode *inode, struct file *filp) | |||
683 | { | 770 | { |
684 | struct cdev *cdev = inode->i_cdev; | 771 | struct cdev *cdev = inode->i_cdev; |
685 | struct port *port; | 772 | struct port *port; |
773 | int ret; | ||
686 | 774 | ||
687 | port = container_of(cdev, struct port, cdev); | 775 | port = find_port_by_devt(cdev->dev); |
688 | filp->private_data = port; | 776 | filp->private_data = port; |
689 | 777 | ||
778 | /* Prevent against a port getting hot-unplugged at the same time */ | ||
779 | spin_lock_irq(&port->portdev->ports_lock); | ||
780 | kref_get(&port->kref); | ||
781 | spin_unlock_irq(&port->portdev->ports_lock); | ||
782 | |||
690 | /* | 783 | /* |
691 | * Don't allow opening of console port devices -- that's done | 784 | * Don't allow opening of console port devices -- that's done |
692 | * via /dev/hvc | 785 | * via /dev/hvc |
693 | */ | 786 | */ |
694 | if (is_console_port(port)) | 787 | if (is_console_port(port)) { |
695 | return -ENXIO; | 788 | ret = -ENXIO; |
789 | goto out; | ||
790 | } | ||
696 | 791 | ||
697 | /* Allow only one process to open a particular port at a time */ | 792 | /* Allow only one process to open a particular port at a time */ |
698 | spin_lock_irq(&port->inbuf_lock); | 793 | spin_lock_irq(&port->inbuf_lock); |
699 | if (port->guest_connected) { | 794 | if (port->guest_connected) { |
700 | spin_unlock_irq(&port->inbuf_lock); | 795 | spin_unlock_irq(&port->inbuf_lock); |
701 | return -EMFILE; | 796 | ret = -EMFILE; |
797 | goto out; | ||
702 | } | 798 | } |
703 | 799 | ||
704 | port->guest_connected = true; | 800 | port->guest_connected = true; |
@@ -713,10 +809,23 @@ static int port_fops_open(struct inode *inode, struct file *filp) | |||
713 | reclaim_consumed_buffers(port); | 809 | reclaim_consumed_buffers(port); |
714 | spin_unlock_irq(&port->outvq_lock); | 810 | spin_unlock_irq(&port->outvq_lock); |
715 | 811 | ||
812 | nonseekable_open(inode, filp); | ||
813 | |||
716 | /* Notify host of port being opened */ | 814 | /* Notify host of port being opened */ |
717 | send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1); | 815 | send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1); |
718 | 816 | ||
719 | return 0; | 817 | return 0; |
818 | out: | ||
819 | kref_put(&port->kref, remove_port); | ||
820 | return ret; | ||
821 | } | ||
822 | |||
823 | static int port_fops_fasync(int fd, struct file *filp, int mode) | ||
824 | { | ||
825 | struct port *port; | ||
826 | |||
827 | port = filp->private_data; | ||
828 | return fasync_helper(fd, filp, mode, &port->async_queue); | ||
720 | } | 829 | } |
721 | 830 | ||
722 | /* | 831 | /* |
@@ -732,6 +841,8 @@ static const struct file_operations port_fops = { | |||
732 | .write = port_fops_write, | 841 | .write = port_fops_write, |
733 | .poll = port_fops_poll, | 842 | .poll = port_fops_poll, |
734 | .release = port_fops_release, | 843 | .release = port_fops_release, |
844 | .fasync = port_fops_fasync, | ||
845 | .llseek = no_llseek, | ||
735 | }; | 846 | }; |
736 | 847 | ||
737 | /* | 848 | /* |
@@ -990,6 +1101,12 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) | |||
990 | return nr_added_bufs; | 1101 | return nr_added_bufs; |
991 | } | 1102 | } |
992 | 1103 | ||
1104 | static void send_sigio_to_port(struct port *port) | ||
1105 | { | ||
1106 | if (port->async_queue && port->guest_connected) | ||
1107 | kill_fasync(&port->async_queue, SIGIO, POLL_OUT); | ||
1108 | } | ||
1109 | |||
993 | static int add_port(struct ports_device *portdev, u32 id) | 1110 | static int add_port(struct ports_device *portdev, u32 id) |
994 | { | 1111 | { |
995 | char debugfs_name[16]; | 1112 | char debugfs_name[16]; |
@@ -1004,6 +1121,7 @@ static int add_port(struct ports_device *portdev, u32 id) | |||
1004 | err = -ENOMEM; | 1121 | err = -ENOMEM; |
1005 | goto fail; | 1122 | goto fail; |
1006 | } | 1123 | } |
1124 | kref_init(&port->kref); | ||
1007 | 1125 | ||
1008 | port->portdev = portdev; | 1126 | port->portdev = portdev; |
1009 | port->id = id; | 1127 | port->id = id; |
@@ -1011,6 +1129,7 @@ static int add_port(struct ports_device *portdev, u32 id) | |||
1011 | port->name = NULL; | 1129 | port->name = NULL; |
1012 | port->inbuf = NULL; | 1130 | port->inbuf = NULL; |
1013 | port->cons.hvc = NULL; | 1131 | port->cons.hvc = NULL; |
1132 | port->async_queue = NULL; | ||
1014 | 1133 | ||
1015 | port->cons.ws.ws_row = port->cons.ws.ws_col = 0; | 1134 | port->cons.ws.ws_row = port->cons.ws.ws_col = 0; |
1016 | 1135 | ||
@@ -1021,14 +1140,20 @@ static int add_port(struct ports_device *portdev, u32 id) | |||
1021 | port->in_vq = portdev->in_vqs[port->id]; | 1140 | port->in_vq = portdev->in_vqs[port->id]; |
1022 | port->out_vq = portdev->out_vqs[port->id]; | 1141 | port->out_vq = portdev->out_vqs[port->id]; |
1023 | 1142 | ||
1024 | cdev_init(&port->cdev, &port_fops); | 1143 | port->cdev = cdev_alloc(); |
1144 | if (!port->cdev) { | ||
1145 | dev_err(&port->portdev->vdev->dev, "Error allocating cdev\n"); | ||
1146 | err = -ENOMEM; | ||
1147 | goto free_port; | ||
1148 | } | ||
1149 | port->cdev->ops = &port_fops; | ||
1025 | 1150 | ||
1026 | devt = MKDEV(portdev->chr_major, id); | 1151 | devt = MKDEV(portdev->chr_major, id); |
1027 | err = cdev_add(&port->cdev, devt, 1); | 1152 | err = cdev_add(port->cdev, devt, 1); |
1028 | if (err < 0) { | 1153 | if (err < 0) { |
1029 | dev_err(&port->portdev->vdev->dev, | 1154 | dev_err(&port->portdev->vdev->dev, |
1030 | "Error %d adding cdev for port %u\n", err, id); | 1155 | "Error %d adding cdev for port %u\n", err, id); |
1031 | goto free_port; | 1156 | goto free_cdev; |
1032 | } | 1157 | } |
1033 | port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev, | 1158 | port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev, |
1034 | devt, port, "vport%up%u", | 1159 | devt, port, "vport%up%u", |
@@ -1093,7 +1218,7 @@ free_inbufs: | |||
1093 | free_device: | 1218 | free_device: |
1094 | device_destroy(pdrvdata.class, port->dev->devt); | 1219 | device_destroy(pdrvdata.class, port->dev->devt); |
1095 | free_cdev: | 1220 | free_cdev: |
1096 | cdev_del(&port->cdev); | 1221 | cdev_del(port->cdev); |
1097 | free_port: | 1222 | free_port: |
1098 | kfree(port); | 1223 | kfree(port); |
1099 | fail: | 1224 | fail: |
@@ -1102,21 +1227,45 @@ fail: | |||
1102 | return err; | 1227 | return err; |
1103 | } | 1228 | } |
1104 | 1229 | ||
1105 | /* Remove all port-specific data. */ | 1230 | /* No users remain, remove all port-specific data. */ |
1106 | static int remove_port(struct port *port) | 1231 | static void remove_port(struct kref *kref) |
1232 | { | ||
1233 | struct port *port; | ||
1234 | |||
1235 | port = container_of(kref, struct port, kref); | ||
1236 | |||
1237 | sysfs_remove_group(&port->dev->kobj, &port_attribute_group); | ||
1238 | device_destroy(pdrvdata.class, port->dev->devt); | ||
1239 | cdev_del(port->cdev); | ||
1240 | |||
1241 | kfree(port->name); | ||
1242 | |||
1243 | debugfs_remove(port->debugfs_file); | ||
1244 | |||
1245 | kfree(port); | ||
1246 | } | ||
1247 | |||
1248 | /* | ||
1249 | * Port got unplugged. Remove port from portdev's list and drop the | ||
1250 | * kref reference. If no userspace has this port opened, it will | ||
1251 | * result in immediate removal the port. | ||
1252 | */ | ||
1253 | static void unplug_port(struct port *port) | ||
1107 | { | 1254 | { |
1108 | struct port_buffer *buf; | 1255 | struct port_buffer *buf; |
1109 | 1256 | ||
1257 | spin_lock_irq(&port->portdev->ports_lock); | ||
1258 | list_del(&port->list); | ||
1259 | spin_unlock_irq(&port->portdev->ports_lock); | ||
1260 | |||
1110 | if (port->guest_connected) { | 1261 | if (port->guest_connected) { |
1111 | port->guest_connected = false; | 1262 | port->guest_connected = false; |
1112 | port->host_connected = false; | 1263 | port->host_connected = false; |
1113 | wake_up_interruptible(&port->waitqueue); | 1264 | wake_up_interruptible(&port->waitqueue); |
1114 | send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0); | ||
1115 | } | ||
1116 | 1265 | ||
1117 | spin_lock_irq(&port->portdev->ports_lock); | 1266 | /* Let the app know the port is going down. */ |
1118 | list_del(&port->list); | 1267 | send_sigio_to_port(port); |
1119 | spin_unlock_irq(&port->portdev->ports_lock); | 1268 | } |
1120 | 1269 | ||
1121 | if (is_console_port(port)) { | 1270 | if (is_console_port(port)) { |
1122 | spin_lock_irq(&pdrvdata_lock); | 1271 | spin_lock_irq(&pdrvdata_lock); |
@@ -1135,9 +1284,6 @@ static int remove_port(struct port *port) | |||
1135 | hvc_remove(port->cons.hvc); | 1284 | hvc_remove(port->cons.hvc); |
1136 | #endif | 1285 | #endif |
1137 | } | 1286 | } |
1138 | sysfs_remove_group(&port->dev->kobj, &port_attribute_group); | ||
1139 | device_destroy(pdrvdata.class, port->dev->devt); | ||
1140 | cdev_del(&port->cdev); | ||
1141 | 1287 | ||
1142 | /* Remove unused data this port might have received. */ | 1288 | /* Remove unused data this port might have received. */ |
1143 | discard_port_data(port); | 1289 | discard_port_data(port); |
@@ -1148,12 +1294,19 @@ static int remove_port(struct port *port) | |||
1148 | while ((buf = virtqueue_detach_unused_buf(port->in_vq))) | 1294 | while ((buf = virtqueue_detach_unused_buf(port->in_vq))) |
1149 | free_buf(buf); | 1295 | free_buf(buf); |
1150 | 1296 | ||
1151 | kfree(port->name); | 1297 | /* |
1152 | 1298 | * We should just assume the device itself has gone off -- | |
1153 | debugfs_remove(port->debugfs_file); | 1299 | * else a close on an open port later will try to send out a |
1300 | * control message. | ||
1301 | */ | ||
1302 | port->portdev = NULL; | ||
1154 | 1303 | ||
1155 | kfree(port); | 1304 | /* |
1156 | return 0; | 1305 | * Locks around here are not necessary - a port can't be |
1306 | * opened after we removed the port struct from ports_list | ||
1307 | * above. | ||
1308 | */ | ||
1309 | kref_put(&port->kref, remove_port); | ||
1157 | } | 1310 | } |
1158 | 1311 | ||
1159 | /* Any private messages that the Host and Guest want to share */ | 1312 | /* Any private messages that the Host and Guest want to share */ |
@@ -1192,7 +1345,7 @@ static void handle_control_message(struct ports_device *portdev, | |||
1192 | add_port(portdev, cpkt->id); | 1345 | add_port(portdev, cpkt->id); |
1193 | break; | 1346 | break; |
1194 | case VIRTIO_CONSOLE_PORT_REMOVE: | 1347 | case VIRTIO_CONSOLE_PORT_REMOVE: |
1195 | remove_port(port); | 1348 | unplug_port(port); |
1196 | break; | 1349 | break; |
1197 | case VIRTIO_CONSOLE_CONSOLE_PORT: | 1350 | case VIRTIO_CONSOLE_CONSOLE_PORT: |
1198 | if (!cpkt->value) | 1351 | if (!cpkt->value) |
@@ -1234,6 +1387,12 @@ static void handle_control_message(struct ports_device *portdev, | |||
1234 | spin_lock_irq(&port->outvq_lock); | 1387 | spin_lock_irq(&port->outvq_lock); |
1235 | reclaim_consumed_buffers(port); | 1388 | reclaim_consumed_buffers(port); |
1236 | spin_unlock_irq(&port->outvq_lock); | 1389 | spin_unlock_irq(&port->outvq_lock); |
1390 | |||
1391 | /* | ||
1392 | * If the guest is connected, it'll be interested in | ||
1393 | * knowing the host connection state changed. | ||
1394 | */ | ||
1395 | send_sigio_to_port(port); | ||
1237 | break; | 1396 | break; |
1238 | case VIRTIO_CONSOLE_PORT_NAME: | 1397 | case VIRTIO_CONSOLE_PORT_NAME: |
1239 | /* | 1398 | /* |
@@ -1330,6 +1489,9 @@ static void in_intr(struct virtqueue *vq) | |||
1330 | 1489 | ||
1331 | wake_up_interruptible(&port->waitqueue); | 1490 | wake_up_interruptible(&port->waitqueue); |
1332 | 1491 | ||
1492 | /* Send a SIGIO indicating new data in case the process asked for it */ | ||
1493 | send_sigio_to_port(port); | ||
1494 | |||
1333 | if (is_console_port(port) && hvc_poll(port->cons.hvc)) | 1495 | if (is_console_port(port) && hvc_poll(port->cons.hvc)) |
1334 | hvc_kick(); | 1496 | hvc_kick(); |
1335 | } | 1497 | } |
@@ -1566,6 +1728,10 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) | |||
1566 | add_port(portdev, 0); | 1728 | add_port(portdev, 0); |
1567 | } | 1729 | } |
1568 | 1730 | ||
1731 | spin_lock_irq(&pdrvdata_lock); | ||
1732 | list_add_tail(&portdev->list, &pdrvdata.portdevs); | ||
1733 | spin_unlock_irq(&pdrvdata_lock); | ||
1734 | |||
1569 | __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, | 1735 | __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, |
1570 | VIRTIO_CONSOLE_DEVICE_READY, 1); | 1736 | VIRTIO_CONSOLE_DEVICE_READY, 1); |
1571 | return 0; | 1737 | return 0; |
@@ -1589,23 +1755,41 @@ static void virtcons_remove(struct virtio_device *vdev) | |||
1589 | { | 1755 | { |
1590 | struct ports_device *portdev; | 1756 | struct ports_device *portdev; |
1591 | struct port *port, *port2; | 1757 | struct port *port, *port2; |
1592 | struct port_buffer *buf; | ||
1593 | unsigned int len; | ||
1594 | 1758 | ||
1595 | portdev = vdev->priv; | 1759 | portdev = vdev->priv; |
1596 | 1760 | ||
1761 | spin_lock_irq(&pdrvdata_lock); | ||
1762 | list_del(&portdev->list); | ||
1763 | spin_unlock_irq(&pdrvdata_lock); | ||
1764 | |||
1765 | /* Disable interrupts for vqs */ | ||
1766 | vdev->config->reset(vdev); | ||
1767 | /* Finish up work that's lined up */ | ||
1597 | cancel_work_sync(&portdev->control_work); | 1768 | cancel_work_sync(&portdev->control_work); |
1598 | 1769 | ||
1599 | list_for_each_entry_safe(port, port2, &portdev->ports, list) | 1770 | list_for_each_entry_safe(port, port2, &portdev->ports, list) |
1600 | remove_port(port); | 1771 | unplug_port(port); |
1601 | 1772 | ||
1602 | unregister_chrdev(portdev->chr_major, "virtio-portsdev"); | 1773 | unregister_chrdev(portdev->chr_major, "virtio-portsdev"); |
1603 | 1774 | ||
1604 | while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) | 1775 | /* |
1605 | free_buf(buf); | 1776 | * When yanking out a device, we immediately lose the |
1777 | * (device-side) queues. So there's no point in keeping the | ||
1778 | * guest side around till we drop our final reference. This | ||
1779 | * also means that any ports which are in an open state will | ||
1780 | * have to just stop using the port, as the vqs are going | ||
1781 | * away. | ||
1782 | */ | ||
1783 | if (use_multiport(portdev)) { | ||
1784 | struct port_buffer *buf; | ||
1785 | unsigned int len; | ||
1606 | 1786 | ||
1607 | while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) | 1787 | while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) |
1608 | free_buf(buf); | 1788 | free_buf(buf); |
1789 | |||
1790 | while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) | ||
1791 | free_buf(buf); | ||
1792 | } | ||
1609 | 1793 | ||
1610 | vdev->config->del_vqs(vdev); | 1794 | vdev->config->del_vqs(vdev); |
1611 | kfree(portdev->in_vqs); | 1795 | kfree(portdev->in_vqs); |
@@ -1652,6 +1836,7 @@ static int __init init(void) | |||
1652 | PTR_ERR(pdrvdata.debugfs_dir)); | 1836 | PTR_ERR(pdrvdata.debugfs_dir)); |
1653 | } | 1837 | } |
1654 | INIT_LIST_HEAD(&pdrvdata.consoles); | 1838 | INIT_LIST_HEAD(&pdrvdata.consoles); |
1839 | INIT_LIST_HEAD(&pdrvdata.portdevs); | ||
1655 | 1840 | ||
1656 | return register_virtio_driver(&virtio_console); | 1841 | return register_virtio_driver(&virtio_console); |
1657 | } | 1842 | } |
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 1b05896648bc..9dcb17d51aee 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c | |||
@@ -2840,7 +2840,7 @@ static int __devinit pci_probe(struct pci_dev *dev, | |||
2840 | const struct pci_device_id *ent) | 2840 | const struct pci_device_id *ent) |
2841 | { | 2841 | { |
2842 | struct fw_ohci *ohci; | 2842 | struct fw_ohci *ohci; |
2843 | u32 bus_options, max_receive, link_speed, version, link_enh; | 2843 | u32 bus_options, max_receive, link_speed, version; |
2844 | u64 guid; | 2844 | u64 guid; |
2845 | int i, err, n_ir, n_it; | 2845 | int i, err, n_ir, n_it; |
2846 | size_t size; | 2846 | size_t size; |
@@ -2894,23 +2894,6 @@ static int __devinit pci_probe(struct pci_dev *dev, | |||
2894 | if (param_quirks) | 2894 | if (param_quirks) |
2895 | ohci->quirks = param_quirks; | 2895 | ohci->quirks = param_quirks; |
2896 | 2896 | ||
2897 | /* TI OHCI-Lynx and compatible: set recommended configuration bits. */ | ||
2898 | if (dev->vendor == PCI_VENDOR_ID_TI) { | ||
2899 | pci_read_config_dword(dev, PCI_CFG_TI_LinkEnh, &link_enh); | ||
2900 | |||
2901 | /* adjust latency of ATx FIFO: use 1.7 KB threshold */ | ||
2902 | link_enh &= ~TI_LinkEnh_atx_thresh_mask; | ||
2903 | link_enh |= TI_LinkEnh_atx_thresh_1_7K; | ||
2904 | |||
2905 | /* use priority arbitration for asynchronous responses */ | ||
2906 | link_enh |= TI_LinkEnh_enab_unfair; | ||
2907 | |||
2908 | /* required for aPhyEnhanceEnable to work */ | ||
2909 | link_enh |= TI_LinkEnh_enab_accel; | ||
2910 | |||
2911 | pci_write_config_dword(dev, PCI_CFG_TI_LinkEnh, link_enh); | ||
2912 | } | ||
2913 | |||
2914 | ar_context_init(&ohci->ar_request_ctx, ohci, | 2897 | ar_context_init(&ohci->ar_request_ctx, ohci, |
2915 | OHCI1394_AsReqRcvContextControlSet); | 2898 | OHCI1394_AsReqRcvContextControlSet); |
2916 | 2899 | ||
diff --git a/drivers/firewire/ohci.h b/drivers/firewire/ohci.h index 0e6c5a466908..ef5e7336da68 100644 --- a/drivers/firewire/ohci.h +++ b/drivers/firewire/ohci.h | |||
@@ -155,12 +155,4 @@ | |||
155 | 155 | ||
156 | #define OHCI1394_phy_tcode 0xe | 156 | #define OHCI1394_phy_tcode 0xe |
157 | 157 | ||
158 | /* TI extensions */ | ||
159 | |||
160 | #define PCI_CFG_TI_LinkEnh 0xf4 | ||
161 | #define TI_LinkEnh_enab_accel 0x00000002 | ||
162 | #define TI_LinkEnh_enab_unfair 0x00000080 | ||
163 | #define TI_LinkEnh_atx_thresh_mask 0x00003000 | ||
164 | #define TI_LinkEnh_atx_thresh_1_7K 0x00001000 | ||
165 | |||
166 | #endif /* _FIREWIRE_OHCI_H */ | 158 | #endif /* _FIREWIRE_OHCI_H */ |
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 5731fc9b1ae3..3eef567b0421 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c | |||
@@ -203,6 +203,7 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, | |||
203 | struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); | 203 | struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); |
204 | struct radeon_device *rdev = crtc->dev->dev_private; | 204 | struct radeon_device *rdev = crtc->dev->dev_private; |
205 | int xorigin = 0, yorigin = 0; | 205 | int xorigin = 0, yorigin = 0; |
206 | int w = radeon_crtc->cursor_width; | ||
206 | 207 | ||
207 | if (x < 0) | 208 | if (x < 0) |
208 | xorigin = -x + 1; | 209 | xorigin = -x + 1; |
@@ -213,22 +214,7 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, | |||
213 | if (yorigin >= CURSOR_HEIGHT) | 214 | if (yorigin >= CURSOR_HEIGHT) |
214 | yorigin = CURSOR_HEIGHT - 1; | 215 | yorigin = CURSOR_HEIGHT - 1; |
215 | 216 | ||
216 | radeon_lock_cursor(crtc, true); | 217 | if (ASIC_IS_AVIVO(rdev)) { |
217 | if (ASIC_IS_DCE4(rdev)) { | ||
218 | /* cursors are offset into the total surface */ | ||
219 | x += crtc->x; | ||
220 | y += crtc->y; | ||
221 | DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); | ||
222 | |||
223 | /* XXX: check if evergreen has the same issues as avivo chips */ | ||
224 | WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, | ||
225 | ((xorigin ? 0 : x) << 16) | | ||
226 | (yorigin ? 0 : y)); | ||
227 | WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); | ||
228 | WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset, | ||
229 | ((radeon_crtc->cursor_width - 1) << 16) | (radeon_crtc->cursor_height - 1)); | ||
230 | } else if (ASIC_IS_AVIVO(rdev)) { | ||
231 | int w = radeon_crtc->cursor_width; | ||
232 | int i = 0; | 218 | int i = 0; |
233 | struct drm_crtc *crtc_p; | 219 | struct drm_crtc *crtc_p; |
234 | 220 | ||
@@ -260,7 +246,17 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, | |||
260 | if (w <= 0) | 246 | if (w <= 0) |
261 | w = 1; | 247 | w = 1; |
262 | } | 248 | } |
249 | } | ||
263 | 250 | ||
251 | radeon_lock_cursor(crtc, true); | ||
252 | if (ASIC_IS_DCE4(rdev)) { | ||
253 | WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, | ||
254 | ((xorigin ? 0 : x) << 16) | | ||
255 | (yorigin ? 0 : y)); | ||
256 | WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); | ||
257 | WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset, | ||
258 | ((w - 1) << 16) | (radeon_crtc->cursor_height - 1)); | ||
259 | } else if (ASIC_IS_AVIVO(rdev)) { | ||
264 | WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset, | 260 | WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset, |
265 | ((xorigin ? 0 : x) << 16) | | 261 | ((xorigin ? 0 : x) << 16) | |
266 | (yorigin ? 0 : y)); | 262 | (yorigin ? 0 : y)); |
diff --git a/drivers/hid/hid-cando.c b/drivers/hid/hid-cando.c index 4267a6fdc277..5925bdcd417d 100644 --- a/drivers/hid/hid-cando.c +++ b/drivers/hid/hid-cando.c | |||
@@ -237,6 +237,8 @@ static const struct hid_device_id cando_devices[] = { | |||
237 | USB_DEVICE_ID_CANDO_MULTI_TOUCH) }, | 237 | USB_DEVICE_ID_CANDO_MULTI_TOUCH) }, |
238 | { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, | 238 | { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, |
239 | USB_DEVICE_ID_CANDO_MULTI_TOUCH_11_6) }, | 239 | USB_DEVICE_ID_CANDO_MULTI_TOUCH_11_6) }, |
240 | { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, | ||
241 | USB_DEVICE_ID_CANDO_MULTI_TOUCH_15_6) }, | ||
240 | { } | 242 | { } |
241 | }; | 243 | }; |
242 | MODULE_DEVICE_TABLE(hid, cando_devices); | 244 | MODULE_DEVICE_TABLE(hid, cando_devices); |
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 3f7292486024..a0dea3d1296e 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c | |||
@@ -1292,6 +1292,7 @@ static const struct hid_device_id hid_blacklist[] = { | |||
1292 | { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) }, | 1292 | { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) }, |
1293 | { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH) }, | 1293 | { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH) }, |
1294 | { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH_11_6) }, | 1294 | { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH_11_6) }, |
1295 | { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH_15_6) }, | ||
1295 | { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION) }, | 1296 | { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION) }, |
1296 | { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR) }, | 1297 | { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR) }, |
1297 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) }, | 1298 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) }, |
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 765a4f53eb5c..c5ae5f1545bd 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h | |||
@@ -134,6 +134,7 @@ | |||
134 | #define USB_VENDOR_ID_CANDO 0x2087 | 134 | #define USB_VENDOR_ID_CANDO 0x2087 |
135 | #define USB_DEVICE_ID_CANDO_MULTI_TOUCH 0x0a01 | 135 | #define USB_DEVICE_ID_CANDO_MULTI_TOUCH 0x0a01 |
136 | #define USB_DEVICE_ID_CANDO_MULTI_TOUCH_11_6 0x0b03 | 136 | #define USB_DEVICE_ID_CANDO_MULTI_TOUCH_11_6 0x0b03 |
137 | #define USB_DEVICE_ID_CANDO_MULTI_TOUCH_15_6 0x0f01 | ||
137 | 138 | ||
138 | #define USB_VENDOR_ID_CH 0x068e | 139 | #define USB_VENDOR_ID_CH 0x068e |
139 | #define USB_DEVICE_ID_CH_PRO_PEDALS 0x00f2 | 140 | #define USB_DEVICE_ID_CH_PRO_PEDALS 0x00f2 |
@@ -503,6 +504,7 @@ | |||
503 | 504 | ||
504 | #define USB_VENDOR_ID_TURBOX 0x062a | 505 | #define USB_VENDOR_ID_TURBOX 0x062a |
505 | #define USB_DEVICE_ID_TURBOX_KEYBOARD 0x0201 | 506 | #define USB_DEVICE_ID_TURBOX_KEYBOARD 0x0201 |
507 | #define USB_DEVICE_ID_TURBOX_TOUCHSCREEN_MOSART 0x7100 | ||
506 | 508 | ||
507 | #define USB_VENDOR_ID_TWINHAN 0x6253 | 509 | #define USB_VENDOR_ID_TWINHAN 0x6253 |
508 | #define USB_DEVICE_ID_TWINHAN_IR_REMOTE 0x0100 | 510 | #define USB_DEVICE_ID_TWINHAN_IR_REMOTE 0x0100 |
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 47d70c523d93..a3866b5c0c43 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c | |||
@@ -109,6 +109,12 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t | |||
109 | int ret = 0; | 109 | int ret = 0; |
110 | 110 | ||
111 | mutex_lock(&minors_lock); | 111 | mutex_lock(&minors_lock); |
112 | |||
113 | if (!hidraw_table[minor]) { | ||
114 | ret = -ENODEV; | ||
115 | goto out; | ||
116 | } | ||
117 | |||
112 | dev = hidraw_table[minor]->hid; | 118 | dev = hidraw_table[minor]->hid; |
113 | 119 | ||
114 | if (!dev->hid_output_raw_report) { | 120 | if (!dev->hid_output_raw_report) { |
@@ -244,6 +250,10 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd, | |||
244 | 250 | ||
245 | mutex_lock(&minors_lock); | 251 | mutex_lock(&minors_lock); |
246 | dev = hidraw_table[minor]; | 252 | dev = hidraw_table[minor]; |
253 | if (!dev) { | ||
254 | ret = -ENODEV; | ||
255 | goto out; | ||
256 | } | ||
247 | 257 | ||
248 | switch (cmd) { | 258 | switch (cmd) { |
249 | case HIDIOCGRDESCSIZE: | 259 | case HIDIOCGRDESCSIZE: |
@@ -317,6 +327,7 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd, | |||
317 | 327 | ||
318 | ret = -ENOTTY; | 328 | ret = -ENOTTY; |
319 | } | 329 | } |
330 | out: | ||
320 | mutex_unlock(&minors_lock); | 331 | mutex_unlock(&minors_lock); |
321 | return ret; | 332 | return ret; |
322 | } | 333 | } |
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 70da3181c8a0..f0260c699adb 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c | |||
@@ -36,6 +36,7 @@ static const struct hid_blacklist { | |||
36 | { USB_VENDOR_ID_DWAV, USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER, HID_QUIRK_MULTI_INPUT | HID_QUIRK_NOGET }, | 36 | { USB_VENDOR_ID_DWAV, USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER, HID_QUIRK_MULTI_INPUT | HID_QUIRK_NOGET }, |
37 | { USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH, HID_QUIRK_MULTI_INPUT }, | 37 | { USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH, HID_QUIRK_MULTI_INPUT }, |
38 | { USB_VENDOR_ID_MOJO, USB_DEVICE_ID_RETRO_ADAPTER, HID_QUIRK_MULTI_INPUT }, | 38 | { USB_VENDOR_ID_MOJO, USB_DEVICE_ID_RETRO_ADAPTER, HID_QUIRK_MULTI_INPUT }, |
39 | { USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_TOUCHSCREEN_MOSART, HID_QUIRK_MULTI_INPUT }, | ||
39 | { USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING, HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, | 40 | { USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING, HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, |
40 | { USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FLYING, HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, | 41 | { USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FLYING, HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, |
41 | { USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FIGHTING, HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, | 42 | { USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FIGHTING, HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, |
diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index b8feac5f2ef4..5795c8398c7c 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c | |||
@@ -331,21 +331,16 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop) | |||
331 | INIT_COMPLETION(dev->cmd_complete); | 331 | INIT_COMPLETION(dev->cmd_complete); |
332 | dev->cmd_err = 0; | 332 | dev->cmd_err = 0; |
333 | 333 | ||
334 | /* Take I2C out of reset, configure it as master and set the | 334 | /* Take I2C out of reset and configure it as master */ |
335 | * start bit */ | 335 | flag = DAVINCI_I2C_MDR_IRS | DAVINCI_I2C_MDR_MST; |
336 | flag = DAVINCI_I2C_MDR_IRS | DAVINCI_I2C_MDR_MST | DAVINCI_I2C_MDR_STT; | ||
337 | 336 | ||
338 | /* if the slave address is ten bit address, enable XA bit */ | 337 | /* if the slave address is ten bit address, enable XA bit */ |
339 | if (msg->flags & I2C_M_TEN) | 338 | if (msg->flags & I2C_M_TEN) |
340 | flag |= DAVINCI_I2C_MDR_XA; | 339 | flag |= DAVINCI_I2C_MDR_XA; |
341 | if (!(msg->flags & I2C_M_RD)) | 340 | if (!(msg->flags & I2C_M_RD)) |
342 | flag |= DAVINCI_I2C_MDR_TRX; | 341 | flag |= DAVINCI_I2C_MDR_TRX; |
343 | if (stop) | 342 | if (msg->len == 0) |
344 | flag |= DAVINCI_I2C_MDR_STP; | ||
345 | if (msg->len == 0) { | ||
346 | flag |= DAVINCI_I2C_MDR_RM; | 343 | flag |= DAVINCI_I2C_MDR_RM; |
347 | flag &= ~DAVINCI_I2C_MDR_STP; | ||
348 | } | ||
349 | 344 | ||
350 | /* Enable receive or transmit interrupts */ | 345 | /* Enable receive or transmit interrupts */ |
351 | w = davinci_i2c_read_reg(dev, DAVINCI_I2C_IMR_REG); | 346 | w = davinci_i2c_read_reg(dev, DAVINCI_I2C_IMR_REG); |
@@ -358,17 +353,28 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop) | |||
358 | dev->terminate = 0; | 353 | dev->terminate = 0; |
359 | 354 | ||
360 | /* | 355 | /* |
356 | * Write mode register first as needed for correct behaviour | ||
357 | * on OMAP-L138, but don't set STT yet to avoid a race with XRDY | ||
358 | * occuring before we have loaded DXR | ||
359 | */ | ||
360 | davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag); | ||
361 | |||
362 | /* | ||
361 | * First byte should be set here, not after interrupt, | 363 | * First byte should be set here, not after interrupt, |
362 | * because transmit-data-ready interrupt can come before | 364 | * because transmit-data-ready interrupt can come before |
363 | * NACK-interrupt during sending of previous message and | 365 | * NACK-interrupt during sending of previous message and |
364 | * ICDXR may have wrong data | 366 | * ICDXR may have wrong data |
367 | * It also saves us one interrupt, slightly faster | ||
365 | */ | 368 | */ |
366 | if ((!(msg->flags & I2C_M_RD)) && dev->buf_len) { | 369 | if ((!(msg->flags & I2C_M_RD)) && dev->buf_len) { |
367 | davinci_i2c_write_reg(dev, DAVINCI_I2C_DXR_REG, *dev->buf++); | 370 | davinci_i2c_write_reg(dev, DAVINCI_I2C_DXR_REG, *dev->buf++); |
368 | dev->buf_len--; | 371 | dev->buf_len--; |
369 | } | 372 | } |
370 | 373 | ||
371 | /* write the data into mode register; start transmitting */ | 374 | /* Set STT to begin transmit now DXR is loaded */ |
375 | flag |= DAVINCI_I2C_MDR_STT; | ||
376 | if (stop && msg->len != 0) | ||
377 | flag |= DAVINCI_I2C_MDR_STP; | ||
372 | davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag); | 378 | davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag); |
373 | 379 | ||
374 | r = wait_for_completion_interruptible_timeout(&dev->cmd_complete, | 380 | r = wait_for_completion_interruptible_timeout(&dev->cmd_complete, |
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index d1ff9408dc1f..4c2a62b75b5c 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c | |||
@@ -159,15 +159,9 @@ static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy) | |||
159 | 159 | ||
160 | static int i2c_imx_trx_complete(struct imx_i2c_struct *i2c_imx) | 160 | static int i2c_imx_trx_complete(struct imx_i2c_struct *i2c_imx) |
161 | { | 161 | { |
162 | int result; | 162 | wait_event_timeout(i2c_imx->queue, i2c_imx->i2csr & I2SR_IIF, HZ / 10); |
163 | |||
164 | result = wait_event_interruptible_timeout(i2c_imx->queue, | ||
165 | i2c_imx->i2csr & I2SR_IIF, HZ / 10); | ||
166 | 163 | ||
167 | if (unlikely(result < 0)) { | 164 | if (unlikely(!(i2c_imx->i2csr & I2SR_IIF))) { |
168 | dev_dbg(&i2c_imx->adapter.dev, "<%s> result < 0\n", __func__); | ||
169 | return result; | ||
170 | } else if (unlikely(!(i2c_imx->i2csr & I2SR_IIF))) { | ||
171 | dev_dbg(&i2c_imx->adapter.dev, "<%s> Timeout\n", __func__); | 165 | dev_dbg(&i2c_imx->adapter.dev, "<%s> Timeout\n", __func__); |
172 | return -ETIMEDOUT; | 166 | return -ETIMEDOUT; |
173 | } | 167 | } |
@@ -295,7 +289,7 @@ static irqreturn_t i2c_imx_isr(int irq, void *dev_id) | |||
295 | i2c_imx->i2csr = temp; | 289 | i2c_imx->i2csr = temp; |
296 | temp &= ~I2SR_IIF; | 290 | temp &= ~I2SR_IIF; |
297 | writeb(temp, i2c_imx->base + IMX_I2C_I2SR); | 291 | writeb(temp, i2c_imx->base + IMX_I2C_I2SR); |
298 | wake_up_interruptible(&i2c_imx->queue); | 292 | wake_up(&i2c_imx->queue); |
299 | return IRQ_HANDLED; | 293 | return IRQ_HANDLED; |
300 | } | 294 | } |
301 | 295 | ||
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index c908c5f83645..af9ee313c10b 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c | |||
@@ -28,7 +28,7 @@ struct evdev { | |||
28 | int minor; | 28 | int minor; |
29 | struct input_handle handle; | 29 | struct input_handle handle; |
30 | wait_queue_head_t wait; | 30 | wait_queue_head_t wait; |
31 | struct evdev_client *grab; | 31 | struct evdev_client __rcu *grab; |
32 | struct list_head client_list; | 32 | struct list_head client_list; |
33 | spinlock_t client_lock; /* protects client_list */ | 33 | spinlock_t client_lock; /* protects client_list */ |
34 | struct mutex mutex; | 34 | struct mutex mutex; |
@@ -669,6 +669,9 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, | |||
669 | 669 | ||
670 | if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) { | 670 | if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) { |
671 | 671 | ||
672 | if (!dev->absinfo) | ||
673 | return -EINVAL; | ||
674 | |||
672 | t = _IOC_NR(cmd) & ABS_MAX; | 675 | t = _IOC_NR(cmd) & ABS_MAX; |
673 | abs = dev->absinfo[t]; | 676 | abs = dev->absinfo[t]; |
674 | 677 | ||
@@ -680,10 +683,13 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, | |||
680 | } | 683 | } |
681 | } | 684 | } |
682 | 685 | ||
683 | if (_IOC_DIR(cmd) == _IOC_READ) { | 686 | if (_IOC_DIR(cmd) == _IOC_WRITE) { |
684 | 687 | ||
685 | if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) { | 688 | if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) { |
686 | 689 | ||
690 | if (!dev->absinfo) | ||
691 | return -EINVAL; | ||
692 | |||
687 | t = _IOC_NR(cmd) & ABS_MAX; | 693 | t = _IOC_NR(cmd) & ABS_MAX; |
688 | 694 | ||
689 | if (copy_from_user(&abs, p, min_t(size_t, | 695 | if (copy_from_user(&abs, p, min_t(size_t, |
diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c index c19066479057..7e2c12a5b839 100644 --- a/drivers/input/misc/hp_sdc_rtc.c +++ b/drivers/input/misc/hp_sdc_rtc.c | |||
@@ -104,7 +104,7 @@ static int hp_sdc_rtc_do_read_bbrtc (struct rtc_time *rtctm) | |||
104 | t.endidx = 91; | 104 | t.endidx = 91; |
105 | t.seq = tseq; | 105 | t.seq = tseq; |
106 | t.act.semaphore = &tsem; | 106 | t.act.semaphore = &tsem; |
107 | init_MUTEX_LOCKED(&tsem); | 107 | sema_init(&tsem, 0); |
108 | 108 | ||
109 | if (hp_sdc_enqueue_transaction(&t)) return -1; | 109 | if (hp_sdc_enqueue_transaction(&t)) return -1; |
110 | 110 | ||
@@ -698,7 +698,7 @@ static int __init hp_sdc_rtc_init(void) | |||
698 | return -ENODEV; | 698 | return -ENODEV; |
699 | #endif | 699 | #endif |
700 | 700 | ||
701 | init_MUTEX(&i8042tregs); | 701 | sema_init(&i8042tregs, 1); |
702 | 702 | ||
703 | if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr))) | 703 | if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr))) |
704 | return ret; | 704 | return ret; |
diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c index c92f4edfee7b..e5624d8f1709 100644 --- a/drivers/input/serio/hil_mlc.c +++ b/drivers/input/serio/hil_mlc.c | |||
@@ -915,15 +915,15 @@ int hil_mlc_register(hil_mlc *mlc) | |||
915 | mlc->ostarted = 0; | 915 | mlc->ostarted = 0; |
916 | 916 | ||
917 | rwlock_init(&mlc->lock); | 917 | rwlock_init(&mlc->lock); |
918 | init_MUTEX(&mlc->osem); | 918 | sema_init(&mlc->osem, 1); |
919 | 919 | ||
920 | init_MUTEX(&mlc->isem); | 920 | sema_init(&mlc->isem, 1); |
921 | mlc->icount = -1; | 921 | mlc->icount = -1; |
922 | mlc->imatch = 0; | 922 | mlc->imatch = 0; |
923 | 923 | ||
924 | mlc->opercnt = 0; | 924 | mlc->opercnt = 0; |
925 | 925 | ||
926 | init_MUTEX_LOCKED(&(mlc->csem)); | 926 | sema_init(&(mlc->csem), 0); |
927 | 927 | ||
928 | hil_mlc_clear_di_scratch(mlc); | 928 | hil_mlc_clear_di_scratch(mlc); |
929 | hil_mlc_clear_di_map(mlc, 0); | 929 | hil_mlc_clear_di_map(mlc, 0); |
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index bcc2d30ec245..8c0b51c31424 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c | |||
@@ -905,7 +905,7 @@ static int __init hp_sdc_init(void) | |||
905 | ts_sync[1] = 0x0f; | 905 | ts_sync[1] = 0x0f; |
906 | ts_sync[2] = ts_sync[3] = ts_sync[4] = ts_sync[5] = 0; | 906 | ts_sync[2] = ts_sync[3] = ts_sync[4] = ts_sync[5] = 0; |
907 | t_sync.act.semaphore = &s_sync; | 907 | t_sync.act.semaphore = &s_sync; |
908 | init_MUTEX_LOCKED(&s_sync); | 908 | sema_init(&s_sync, 0); |
909 | hp_sdc_enqueue_transaction(&t_sync); | 909 | hp_sdc_enqueue_transaction(&t_sync); |
910 | down(&s_sync); /* Wait for t_sync to complete */ | 910 | down(&s_sync); /* Wait for t_sync to complete */ |
911 | 911 | ||
@@ -1039,7 +1039,7 @@ static int __init hp_sdc_register(void) | |||
1039 | return hp_sdc.dev_err; | 1039 | return hp_sdc.dev_err; |
1040 | } | 1040 | } |
1041 | 1041 | ||
1042 | init_MUTEX_LOCKED(&tq_init_sem); | 1042 | sema_init(&tq_init_sem, 0); |
1043 | 1043 | ||
1044 | tq_init.actidx = 0; | 1044 | tq_init.actidx = 0; |
1045 | tq_init.idx = 1; | 1045 | tq_init.idx = 1; |
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 1c4ee6e77937..bf64e49d996a 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c | |||
@@ -83,7 +83,7 @@ static struct adb_driver *adb_controller; | |||
83 | BLOCKING_NOTIFIER_HEAD(adb_client_list); | 83 | BLOCKING_NOTIFIER_HEAD(adb_client_list); |
84 | static int adb_got_sleep; | 84 | static int adb_got_sleep; |
85 | static int adb_inited; | 85 | static int adb_inited; |
86 | static DECLARE_MUTEX(adb_probe_mutex); | 86 | static DEFINE_SEMAPHORE(adb_probe_mutex); |
87 | static int sleepy_trackpad; | 87 | static int sleepy_trackpad; |
88 | static int autopoll_devs; | 88 | static int autopoll_devs; |
89 | int __adb_probe_sync; | 89 | int __adb_probe_sync; |
diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c index 073f01390cdd..86294ed35c9b 100644 --- a/drivers/media/video/v4l2-compat-ioctl32.c +++ b/drivers/media/video/v4l2-compat-ioctl32.c | |||
@@ -193,17 +193,24 @@ static int put_video_window32(struct video_window *kp, struct video_window32 __u | |||
193 | struct video_code32 { | 193 | struct video_code32 { |
194 | char loadwhat[16]; /* name or tag of file being passed */ | 194 | char loadwhat[16]; /* name or tag of file being passed */ |
195 | compat_int_t datasize; | 195 | compat_int_t datasize; |
196 | unsigned char *data; | 196 | compat_uptr_t data; |
197 | }; | 197 | }; |
198 | 198 | ||
199 | static int get_microcode32(struct video_code *kp, struct video_code32 __user *up) | 199 | static struct video_code __user *get_microcode32(struct video_code32 *kp) |
200 | { | 200 | { |
201 | if (!access_ok(VERIFY_READ, up, sizeof(struct video_code32)) || | 201 | struct video_code __user *up; |
202 | copy_from_user(kp->loadwhat, up->loadwhat, sizeof(up->loadwhat)) || | 202 | |
203 | get_user(kp->datasize, &up->datasize) || | 203 | up = compat_alloc_user_space(sizeof(*up)); |
204 | copy_from_user(kp->data, up->data, up->datasize)) | 204 | |
205 | return -EFAULT; | 205 | /* |
206 | return 0; | 206 | * NOTE! We don't actually care if these fail. If the |
207 | * user address is invalid, the native ioctl will do | ||
208 | * the error handling for us | ||
209 | */ | ||
210 | (void) copy_to_user(up->loadwhat, kp->loadwhat, sizeof(up->loadwhat)); | ||
211 | (void) put_user(kp->datasize, &up->datasize); | ||
212 | (void) put_user(compat_ptr(kp->data), &up->data); | ||
213 | return up; | ||
207 | } | 214 | } |
208 | 215 | ||
209 | #define VIDIOCGTUNER32 _IOWR('v', 4, struct video_tuner32) | 216 | #define VIDIOCGTUNER32 _IOWR('v', 4, struct video_tuner32) |
@@ -739,7 +746,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar | |||
739 | struct video_tuner vt; | 746 | struct video_tuner vt; |
740 | struct video_buffer vb; | 747 | struct video_buffer vb; |
741 | struct video_window vw; | 748 | struct video_window vw; |
742 | struct video_code vc; | 749 | struct video_code32 vc; |
743 | struct video_audio va; | 750 | struct video_audio va; |
744 | #endif | 751 | #endif |
745 | struct v4l2_format v2f; | 752 | struct v4l2_format v2f; |
@@ -818,8 +825,11 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar | |||
818 | break; | 825 | break; |
819 | 826 | ||
820 | case VIDIOCSMICROCODE: | 827 | case VIDIOCSMICROCODE: |
821 | err = get_microcode32(&karg.vc, up); | 828 | /* Copy the 32-bit "video_code32" to kernel space */ |
822 | compatible_arg = 0; | 829 | if (copy_from_user(&karg.vc, up, sizeof(karg.vc))) |
830 | return -EFAULT; | ||
831 | /* Convert the 32-bit version to a 64-bit version in user space */ | ||
832 | up = get_microcode32(&karg.vc); | ||
823 | break; | 833 | break; |
824 | 834 | ||
825 | case VIDIOCSFREQ: | 835 | case VIDIOCSFREQ: |
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 5db49b124ffa..09eee6df0653 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c | |||
@@ -1631,6 +1631,19 @@ int mmc_suspend_host(struct mmc_host *host) | |||
1631 | if (host->bus_ops && !host->bus_dead) { | 1631 | if (host->bus_ops && !host->bus_dead) { |
1632 | if (host->bus_ops->suspend) | 1632 | if (host->bus_ops->suspend) |
1633 | err = host->bus_ops->suspend(host); | 1633 | err = host->bus_ops->suspend(host); |
1634 | if (err == -ENOSYS || !host->bus_ops->resume) { | ||
1635 | /* | ||
1636 | * We simply "remove" the card in this case. | ||
1637 | * It will be redetected on resume. | ||
1638 | */ | ||
1639 | if (host->bus_ops->remove) | ||
1640 | host->bus_ops->remove(host); | ||
1641 | mmc_claim_host(host); | ||
1642 | mmc_detach_bus(host); | ||
1643 | mmc_release_host(host); | ||
1644 | host->pm_flags = 0; | ||
1645 | err = 0; | ||
1646 | } | ||
1634 | } | 1647 | } |
1635 | mmc_bus_put(host); | 1648 | mmc_bus_put(host); |
1636 | 1649 | ||
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index b2828e84d243..214b03afdd48 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c | |||
@@ -30,6 +30,8 @@ | |||
30 | #include <linux/clk.h> | 30 | #include <linux/clk.h> |
31 | #include <linux/err.h> | 31 | #include <linux/err.h> |
32 | #include <linux/io.h> | 32 | #include <linux/io.h> |
33 | #include <linux/irq.h> | ||
34 | #include <linux/completion.h> | ||
33 | 35 | ||
34 | #include <asm/mach/flash.h> | 36 | #include <asm/mach/flash.h> |
35 | #include <mach/mxc_nand.h> | 37 | #include <mach/mxc_nand.h> |
@@ -151,7 +153,7 @@ struct mxc_nand_host { | |||
151 | int irq; | 153 | int irq; |
152 | int eccsize; | 154 | int eccsize; |
153 | 155 | ||
154 | wait_queue_head_t irq_waitq; | 156 | struct completion op_completion; |
155 | 157 | ||
156 | uint8_t *data_buf; | 158 | uint8_t *data_buf; |
157 | unsigned int buf_start; | 159 | unsigned int buf_start; |
@@ -164,6 +166,7 @@ struct mxc_nand_host { | |||
164 | void (*send_read_id)(struct mxc_nand_host *); | 166 | void (*send_read_id)(struct mxc_nand_host *); |
165 | uint16_t (*get_dev_status)(struct mxc_nand_host *); | 167 | uint16_t (*get_dev_status)(struct mxc_nand_host *); |
166 | int (*check_int)(struct mxc_nand_host *); | 168 | int (*check_int)(struct mxc_nand_host *); |
169 | void (*irq_control)(struct mxc_nand_host *, int); | ||
167 | }; | 170 | }; |
168 | 171 | ||
169 | /* OOB placement block for use with hardware ecc generation */ | 172 | /* OOB placement block for use with hardware ecc generation */ |
@@ -216,9 +219,12 @@ static irqreturn_t mxc_nfc_irq(int irq, void *dev_id) | |||
216 | { | 219 | { |
217 | struct mxc_nand_host *host = dev_id; | 220 | struct mxc_nand_host *host = dev_id; |
218 | 221 | ||
219 | disable_irq_nosync(irq); | 222 | if (!host->check_int(host)) |
223 | return IRQ_NONE; | ||
220 | 224 | ||
221 | wake_up(&host->irq_waitq); | 225 | host->irq_control(host, 0); |
226 | |||
227 | complete(&host->op_completion); | ||
222 | 228 | ||
223 | return IRQ_HANDLED; | 229 | return IRQ_HANDLED; |
224 | } | 230 | } |
@@ -245,11 +251,54 @@ static int check_int_v1_v2(struct mxc_nand_host *host) | |||
245 | if (!(tmp & NFC_V1_V2_CONFIG2_INT)) | 251 | if (!(tmp & NFC_V1_V2_CONFIG2_INT)) |
246 | return 0; | 252 | return 0; |
247 | 253 | ||
248 | writew(tmp & ~NFC_V1_V2_CONFIG2_INT, NFC_V1_V2_CONFIG2); | 254 | if (!cpu_is_mx21()) |
255 | writew(tmp & ~NFC_V1_V2_CONFIG2_INT, NFC_V1_V2_CONFIG2); | ||
249 | 256 | ||
250 | return 1; | 257 | return 1; |
251 | } | 258 | } |
252 | 259 | ||
260 | /* | ||
261 | * It has been observed that the i.MX21 cannot read the CONFIG2:INT bit | ||
262 | * if interrupts are masked (CONFIG1:INT_MSK is set). To handle this, the | ||
263 | * driver can enable/disable the irq line rather than simply masking the | ||
264 | * interrupts. | ||
265 | */ | ||
266 | static void irq_control_mx21(struct mxc_nand_host *host, int activate) | ||
267 | { | ||
268 | if (activate) | ||
269 | enable_irq(host->irq); | ||
270 | else | ||
271 | disable_irq_nosync(host->irq); | ||
272 | } | ||
273 | |||
274 | static void irq_control_v1_v2(struct mxc_nand_host *host, int activate) | ||
275 | { | ||
276 | uint16_t tmp; | ||
277 | |||
278 | tmp = readw(NFC_V1_V2_CONFIG1); | ||
279 | |||
280 | if (activate) | ||
281 | tmp &= ~NFC_V1_V2_CONFIG1_INT_MSK; | ||
282 | else | ||
283 | tmp |= NFC_V1_V2_CONFIG1_INT_MSK; | ||
284 | |||
285 | writew(tmp, NFC_V1_V2_CONFIG1); | ||
286 | } | ||
287 | |||
288 | static void irq_control_v3(struct mxc_nand_host *host, int activate) | ||
289 | { | ||
290 | uint32_t tmp; | ||
291 | |||
292 | tmp = readl(NFC_V3_CONFIG2); | ||
293 | |||
294 | if (activate) | ||
295 | tmp &= ~NFC_V3_CONFIG2_INT_MSK; | ||
296 | else | ||
297 | tmp |= NFC_V3_CONFIG2_INT_MSK; | ||
298 | |||
299 | writel(tmp, NFC_V3_CONFIG2); | ||
300 | } | ||
301 | |||
253 | /* This function polls the NANDFC to wait for the basic operation to | 302 | /* This function polls the NANDFC to wait for the basic operation to |
254 | * complete by checking the INT bit of config2 register. | 303 | * complete by checking the INT bit of config2 register. |
255 | */ | 304 | */ |
@@ -259,10 +308,9 @@ static void wait_op_done(struct mxc_nand_host *host, int useirq) | |||
259 | 308 | ||
260 | if (useirq) { | 309 | if (useirq) { |
261 | if (!host->check_int(host)) { | 310 | if (!host->check_int(host)) { |
262 | 311 | INIT_COMPLETION(host->op_completion); | |
263 | enable_irq(host->irq); | 312 | host->irq_control(host, 1); |
264 | 313 | wait_for_completion(&host->op_completion); | |
265 | wait_event(host->irq_waitq, host->check_int(host)); | ||
266 | } | 314 | } |
267 | } else { | 315 | } else { |
268 | while (max_retries-- > 0) { | 316 | while (max_retries-- > 0) { |
@@ -799,6 +847,7 @@ static void preset_v3(struct mtd_info *mtd) | |||
799 | NFC_V3_CONFIG2_2CMD_PHASES | | 847 | NFC_V3_CONFIG2_2CMD_PHASES | |
800 | NFC_V3_CONFIG2_SPAS(mtd->oobsize >> 1) | | 848 | NFC_V3_CONFIG2_SPAS(mtd->oobsize >> 1) | |
801 | NFC_V3_CONFIG2_ST_CMD(0x70) | | 849 | NFC_V3_CONFIG2_ST_CMD(0x70) | |
850 | NFC_V3_CONFIG2_INT_MSK | | ||
802 | NFC_V3_CONFIG2_NUM_ADDR_PHASE0; | 851 | NFC_V3_CONFIG2_NUM_ADDR_PHASE0; |
803 | 852 | ||
804 | if (chip->ecc.mode == NAND_ECC_HW) | 853 | if (chip->ecc.mode == NAND_ECC_HW) |
@@ -1024,6 +1073,10 @@ static int __init mxcnd_probe(struct platform_device *pdev) | |||
1024 | host->send_read_id = send_read_id_v1_v2; | 1073 | host->send_read_id = send_read_id_v1_v2; |
1025 | host->get_dev_status = get_dev_status_v1_v2; | 1074 | host->get_dev_status = get_dev_status_v1_v2; |
1026 | host->check_int = check_int_v1_v2; | 1075 | host->check_int = check_int_v1_v2; |
1076 | if (cpu_is_mx21()) | ||
1077 | host->irq_control = irq_control_mx21; | ||
1078 | else | ||
1079 | host->irq_control = irq_control_v1_v2; | ||
1027 | } | 1080 | } |
1028 | 1081 | ||
1029 | if (nfc_is_v21()) { | 1082 | if (nfc_is_v21()) { |
@@ -1062,6 +1115,7 @@ static int __init mxcnd_probe(struct platform_device *pdev) | |||
1062 | host->send_read_id = send_read_id_v3; | 1115 | host->send_read_id = send_read_id_v3; |
1063 | host->check_int = check_int_v3; | 1116 | host->check_int = check_int_v3; |
1064 | host->get_dev_status = get_dev_status_v3; | 1117 | host->get_dev_status = get_dev_status_v3; |
1118 | host->irq_control = irq_control_v3; | ||
1065 | oob_smallpage = &nandv2_hw_eccoob_smallpage; | 1119 | oob_smallpage = &nandv2_hw_eccoob_smallpage; |
1066 | oob_largepage = &nandv2_hw_eccoob_largepage; | 1120 | oob_largepage = &nandv2_hw_eccoob_largepage; |
1067 | } else | 1121 | } else |
@@ -1093,14 +1147,34 @@ static int __init mxcnd_probe(struct platform_device *pdev) | |||
1093 | this->options |= NAND_USE_FLASH_BBT; | 1147 | this->options |= NAND_USE_FLASH_BBT; |
1094 | } | 1148 | } |
1095 | 1149 | ||
1096 | init_waitqueue_head(&host->irq_waitq); | 1150 | init_completion(&host->op_completion); |
1097 | 1151 | ||
1098 | host->irq = platform_get_irq(pdev, 0); | 1152 | host->irq = platform_get_irq(pdev, 0); |
1099 | 1153 | ||
1154 | /* | ||
1155 | * mask the interrupt. For i.MX21 explicitely call | ||
1156 | * irq_control_v1_v2 to use the mask bit. We can't call | ||
1157 | * disable_irq_nosync() for an interrupt we do not own yet. | ||
1158 | */ | ||
1159 | if (cpu_is_mx21()) | ||
1160 | irq_control_v1_v2(host, 0); | ||
1161 | else | ||
1162 | host->irq_control(host, 0); | ||
1163 | |||
1100 | err = request_irq(host->irq, mxc_nfc_irq, IRQF_DISABLED, DRIVER_NAME, host); | 1164 | err = request_irq(host->irq, mxc_nfc_irq, IRQF_DISABLED, DRIVER_NAME, host); |
1101 | if (err) | 1165 | if (err) |
1102 | goto eirq; | 1166 | goto eirq; |
1103 | 1167 | ||
1168 | host->irq_control(host, 0); | ||
1169 | |||
1170 | /* | ||
1171 | * Now that the interrupt is disabled make sure the interrupt | ||
1172 | * mask bit is cleared on i.MX21. Otherwise we can't read | ||
1173 | * the interrupt status bit on this machine. | ||
1174 | */ | ||
1175 | if (cpu_is_mx21()) | ||
1176 | irq_control_v1_v2(host, 1); | ||
1177 | |||
1104 | /* first scan to find the device and get the page size */ | 1178 | /* first scan to find the device and get the page size */ |
1105 | if (nand_scan_ident(mtd, 1, NULL)) { | 1179 | if (nand_scan_ident(mtd, 1, NULL)) { |
1106 | err = -ENXIO; | 1180 | err = -ENXIO; |
diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c index 70705d1306b9..eca55c52bdfd 100644 --- a/drivers/net/3c527.c +++ b/drivers/net/3c527.c | |||
@@ -522,7 +522,7 @@ static int __init mc32_probe1(struct net_device *dev, int slot) | |||
522 | lp->tx_len = lp->exec_box->data[9]; /* Transmit list count */ | 522 | lp->tx_len = lp->exec_box->data[9]; /* Transmit list count */ |
523 | lp->rx_len = lp->exec_box->data[11]; /* Receive list count */ | 523 | lp->rx_len = lp->exec_box->data[11]; /* Receive list count */ |
524 | 524 | ||
525 | init_MUTEX_LOCKED(&lp->cmd_mutex); | 525 | sema_init(&lp->cmd_mutex, 0); |
526 | init_completion(&lp->execution_cmd); | 526 | init_completion(&lp->execution_cmd); |
527 | init_completion(&lp->xceiver_cmd); | 527 | init_completion(&lp->xceiver_cmd); |
528 | 528 | ||
diff --git a/drivers/net/b44.c b/drivers/net/b44.c index 1e620e287ae0..efeffdf9e5fa 100644 --- a/drivers/net/b44.c +++ b/drivers/net/b44.c | |||
@@ -2170,8 +2170,6 @@ static int __devinit b44_init_one(struct ssb_device *sdev, | |||
2170 | dev->irq = sdev->irq; | 2170 | dev->irq = sdev->irq; |
2171 | SET_ETHTOOL_OPS(dev, &b44_ethtool_ops); | 2171 | SET_ETHTOOL_OPS(dev, &b44_ethtool_ops); |
2172 | 2172 | ||
2173 | netif_carrier_off(dev); | ||
2174 | |||
2175 | err = ssb_bus_powerup(sdev->bus, 0); | 2173 | err = ssb_bus_powerup(sdev->bus, 0); |
2176 | if (err) { | 2174 | if (err) { |
2177 | dev_err(sdev->dev, | 2175 | dev_err(sdev->dev, |
@@ -2213,6 +2211,8 @@ static int __devinit b44_init_one(struct ssb_device *sdev, | |||
2213 | goto err_out_powerdown; | 2211 | goto err_out_powerdown; |
2214 | } | 2212 | } |
2215 | 2213 | ||
2214 | netif_carrier_off(dev); | ||
2215 | |||
2216 | ssb_set_drvdata(sdev, dev); | 2216 | ssb_set_drvdata(sdev, dev); |
2217 | 2217 | ||
2218 | /* Chip reset provides power to the b44 MAC & PCI cores, which | 2218 | /* Chip reset provides power to the b44 MAC & PCI cores, which |
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index a333b42111b8..6372610ed240 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c | |||
@@ -533,8 +533,15 @@ static inline void ehea_fill_skb(struct net_device *dev, | |||
533 | int length = cqe->num_bytes_transfered - 4; /*remove CRC */ | 533 | int length = cqe->num_bytes_transfered - 4; /*remove CRC */ |
534 | 534 | ||
535 | skb_put(skb, length); | 535 | skb_put(skb, length); |
536 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
537 | skb->protocol = eth_type_trans(skb, dev); | 536 | skb->protocol = eth_type_trans(skb, dev); |
537 | |||
538 | /* The packet was not an IPV4 packet so a complemented checksum was | ||
539 | calculated. The value is found in the Internet Checksum field. */ | ||
540 | if (cqe->status & EHEA_CQE_BLIND_CKSUM) { | ||
541 | skb->ip_summed = CHECKSUM_COMPLETE; | ||
542 | skb->csum = csum_unfold(~cqe->inet_checksum_value); | ||
543 | } else | ||
544 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
538 | } | 545 | } |
539 | 546 | ||
540 | static inline struct sk_buff *get_skb_by_index(struct sk_buff **skb_array, | 547 | static inline struct sk_buff *get_skb_by_index(struct sk_buff **skb_array, |
diff --git a/drivers/net/ehea/ehea_qmr.h b/drivers/net/ehea/ehea_qmr.h index f608a6c54af5..38104734a3be 100644 --- a/drivers/net/ehea/ehea_qmr.h +++ b/drivers/net/ehea/ehea_qmr.h | |||
@@ -150,6 +150,7 @@ struct ehea_rwqe { | |||
150 | #define EHEA_CQE_TYPE_RQ 0x60 | 150 | #define EHEA_CQE_TYPE_RQ 0x60 |
151 | #define EHEA_CQE_STAT_ERR_MASK 0x700F | 151 | #define EHEA_CQE_STAT_ERR_MASK 0x700F |
152 | #define EHEA_CQE_STAT_FAT_ERR_MASK 0xF | 152 | #define EHEA_CQE_STAT_FAT_ERR_MASK 0xF |
153 | #define EHEA_CQE_BLIND_CKSUM 0x8000 | ||
153 | #define EHEA_CQE_STAT_ERR_TCP 0x4000 | 154 | #define EHEA_CQE_STAT_ERR_TCP 0x4000 |
154 | #define EHEA_CQE_STAT_ERR_IP 0x2000 | 155 | #define EHEA_CQE_STAT_ERR_IP 0x2000 |
155 | #define EHEA_CQE_STAT_ERR_CRC 0x1000 | 156 | #define EHEA_CQE_STAT_ERR_CRC 0x1000 |
diff --git a/drivers/net/fec.c b/drivers/net/fec.c index 768b840aeb6b..cce32d43175f 100644 --- a/drivers/net/fec.c +++ b/drivers/net/fec.c | |||
@@ -678,24 +678,37 @@ static int fec_enet_mii_probe(struct net_device *dev) | |||
678 | { | 678 | { |
679 | struct fec_enet_private *fep = netdev_priv(dev); | 679 | struct fec_enet_private *fep = netdev_priv(dev); |
680 | struct phy_device *phy_dev = NULL; | 680 | struct phy_device *phy_dev = NULL; |
681 | int ret; | 681 | char mdio_bus_id[MII_BUS_ID_SIZE]; |
682 | char phy_name[MII_BUS_ID_SIZE + 3]; | ||
683 | int phy_id; | ||
682 | 684 | ||
683 | fep->phy_dev = NULL; | 685 | fep->phy_dev = NULL; |
684 | 686 | ||
685 | /* find the first phy */ | 687 | /* check for attached phy */ |
686 | phy_dev = phy_find_first(fep->mii_bus); | 688 | for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) { |
687 | if (!phy_dev) { | 689 | if ((fep->mii_bus->phy_mask & (1 << phy_id))) |
688 | printk(KERN_ERR "%s: no PHY found\n", dev->name); | 690 | continue; |
689 | return -ENODEV; | 691 | if (fep->mii_bus->phy_map[phy_id] == NULL) |
692 | continue; | ||
693 | if (fep->mii_bus->phy_map[phy_id]->phy_id == 0) | ||
694 | continue; | ||
695 | strncpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE); | ||
696 | break; | ||
690 | } | 697 | } |
691 | 698 | ||
692 | /* attach the mac to the phy */ | 699 | if (phy_id >= PHY_MAX_ADDR) { |
693 | ret = phy_connect_direct(dev, phy_dev, | 700 | printk(KERN_INFO "%s: no PHY, assuming direct connection " |
694 | &fec_enet_adjust_link, 0, | 701 | "to switch\n", dev->name); |
695 | PHY_INTERFACE_MODE_MII); | 702 | strncpy(mdio_bus_id, "0", MII_BUS_ID_SIZE); |
696 | if (ret) { | 703 | phy_id = 0; |
697 | printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name); | 704 | } |
698 | return ret; | 705 | |
706 | snprintf(phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id); | ||
707 | phy_dev = phy_connect(dev, phy_name, &fec_enet_adjust_link, 0, | ||
708 | PHY_INTERFACE_MODE_MII); | ||
709 | if (IS_ERR(phy_dev)) { | ||
710 | printk(KERN_ERR "%s: could not attach to PHY\n", dev->name); | ||
711 | return PTR_ERR(phy_dev); | ||
699 | } | 712 | } |
700 | 713 | ||
701 | /* mask with MAC supported features */ | 714 | /* mask with MAC supported features */ |
@@ -738,7 +751,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) | |||
738 | fep->mii_bus->read = fec_enet_mdio_read; | 751 | fep->mii_bus->read = fec_enet_mdio_read; |
739 | fep->mii_bus->write = fec_enet_mdio_write; | 752 | fep->mii_bus->write = fec_enet_mdio_write; |
740 | fep->mii_bus->reset = fec_enet_mdio_reset; | 753 | fep->mii_bus->reset = fec_enet_mdio_reset; |
741 | snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id); | 754 | snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id + 1); |
742 | fep->mii_bus->priv = fep; | 755 | fep->mii_bus->priv = fep; |
743 | fep->mii_bus->parent = &pdev->dev; | 756 | fep->mii_bus->parent = &pdev->dev; |
744 | 757 | ||
@@ -1311,6 +1324,9 @@ fec_probe(struct platform_device *pdev) | |||
1311 | if (ret) | 1324 | if (ret) |
1312 | goto failed_mii_init; | 1325 | goto failed_mii_init; |
1313 | 1326 | ||
1327 | /* Carrier starts down, phylib will bring it up */ | ||
1328 | netif_carrier_off(ndev); | ||
1329 | |||
1314 | ret = register_netdev(ndev); | 1330 | ret = register_netdev(ndev); |
1315 | if (ret) | 1331 | if (ret) |
1316 | goto failed_register; | 1332 | goto failed_register; |
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 4b52c767ad05..3e5d0b6b6516 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c | |||
@@ -608,7 +608,7 @@ static int sixpack_open(struct tty_struct *tty) | |||
608 | 608 | ||
609 | spin_lock_init(&sp->lock); | 609 | spin_lock_init(&sp->lock); |
610 | atomic_set(&sp->refcnt, 1); | 610 | atomic_set(&sp->refcnt, 1); |
611 | init_MUTEX_LOCKED(&sp->dead_sem); | 611 | sema_init(&sp->dead_sem, 0); |
612 | 612 | ||
613 | /* !!! length of the buffers. MTU is IP MTU, not PACLEN! */ | 613 | /* !!! length of the buffers. MTU is IP MTU, not PACLEN! */ |
614 | 614 | ||
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 66e88bd59caa..4c628393c8b1 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c | |||
@@ -747,7 +747,7 @@ static int mkiss_open(struct tty_struct *tty) | |||
747 | 747 | ||
748 | spin_lock_init(&ax->buflock); | 748 | spin_lock_init(&ax->buflock); |
749 | atomic_set(&ax->refcnt, 1); | 749 | atomic_set(&ax->refcnt, 1); |
750 | init_MUTEX_LOCKED(&ax->dead_sem); | 750 | sema_init(&ax->dead_sem, 0); |
751 | 751 | ||
752 | ax->tty = tty; | 752 | ax->tty = tty; |
753 | tty->disc_data = ax; | 753 | tty->disc_data = ax; |
diff --git a/drivers/net/irda/sir_dev.c b/drivers/net/irda/sir_dev.c index 1b051dab7b29..51d74447f8f8 100644 --- a/drivers/net/irda/sir_dev.c +++ b/drivers/net/irda/sir_dev.c | |||
@@ -909,7 +909,7 @@ struct sir_dev * sirdev_get_instance(const struct sir_driver *drv, const char *n | |||
909 | dev->tx_skb = NULL; | 909 | dev->tx_skb = NULL; |
910 | 910 | ||
911 | spin_lock_init(&dev->tx_lock); | 911 | spin_lock_init(&dev->tx_lock); |
912 | init_MUTEX(&dev->fsm.sem); | 912 | sema_init(&dev->fsm.sem, 1); |
913 | 913 | ||
914 | dev->drv = drv; | 914 | dev->drv = drv; |
915 | dev->netdev = ndev; | 915 | dev->netdev = ndev; |
diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index af50a530daee..78d70a6481bf 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c | |||
@@ -184,7 +184,7 @@ ppp_asynctty_open(struct tty_struct *tty) | |||
184 | tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap); | 184 | tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap); |
185 | 185 | ||
186 | atomic_set(&ap->refcnt, 1); | 186 | atomic_set(&ap->refcnt, 1); |
187 | init_MUTEX_LOCKED(&ap->dead_sem); | 187 | sema_init(&ap->dead_sem, 0); |
188 | 188 | ||
189 | ap->chan.private = ap; | 189 | ap->chan.private = ap; |
190 | ap->chan.ops = &async_ops; | 190 | ap->chan.ops = &async_ops; |
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index a0da4a17b025..992db2fa136e 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c | |||
@@ -1212,7 +1212,8 @@ static void rtl8169_update_counters(struct net_device *dev) | |||
1212 | if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0) | 1212 | if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0) |
1213 | return; | 1213 | return; |
1214 | 1214 | ||
1215 | counters = pci_alloc_consistent(tp->pci_dev, sizeof(*counters), &paddr); | 1215 | counters = dma_alloc_coherent(&tp->pci_dev->dev, sizeof(*counters), |
1216 | &paddr, GFP_KERNEL); | ||
1216 | if (!counters) | 1217 | if (!counters) |
1217 | return; | 1218 | return; |
1218 | 1219 | ||
@@ -1233,7 +1234,8 @@ static void rtl8169_update_counters(struct net_device *dev) | |||
1233 | RTL_W32(CounterAddrLow, 0); | 1234 | RTL_W32(CounterAddrLow, 0); |
1234 | RTL_W32(CounterAddrHigh, 0); | 1235 | RTL_W32(CounterAddrHigh, 0); |
1235 | 1236 | ||
1236 | pci_free_consistent(tp->pci_dev, sizeof(*counters), counters, paddr); | 1237 | dma_free_coherent(&tp->pci_dev->dev, sizeof(*counters), counters, |
1238 | paddr); | ||
1237 | } | 1239 | } |
1238 | 1240 | ||
1239 | static void rtl8169_get_ethtool_stats(struct net_device *dev, | 1241 | static void rtl8169_get_ethtool_stats(struct net_device *dev, |
@@ -3292,15 +3294,15 @@ static int rtl8169_open(struct net_device *dev) | |||
3292 | 3294 | ||
3293 | /* | 3295 | /* |
3294 | * Rx and Tx desscriptors needs 256 bytes alignment. | 3296 | * Rx and Tx desscriptors needs 256 bytes alignment. |
3295 | * pci_alloc_consistent provides more. | 3297 | * dma_alloc_coherent provides more. |
3296 | */ | 3298 | */ |
3297 | tp->TxDescArray = pci_alloc_consistent(pdev, R8169_TX_RING_BYTES, | 3299 | tp->TxDescArray = dma_alloc_coherent(&pdev->dev, R8169_TX_RING_BYTES, |
3298 | &tp->TxPhyAddr); | 3300 | &tp->TxPhyAddr, GFP_KERNEL); |
3299 | if (!tp->TxDescArray) | 3301 | if (!tp->TxDescArray) |
3300 | goto err_pm_runtime_put; | 3302 | goto err_pm_runtime_put; |
3301 | 3303 | ||
3302 | tp->RxDescArray = pci_alloc_consistent(pdev, R8169_RX_RING_BYTES, | 3304 | tp->RxDescArray = dma_alloc_coherent(&pdev->dev, R8169_RX_RING_BYTES, |
3303 | &tp->RxPhyAddr); | 3305 | &tp->RxPhyAddr, GFP_KERNEL); |
3304 | if (!tp->RxDescArray) | 3306 | if (!tp->RxDescArray) |
3305 | goto err_free_tx_0; | 3307 | goto err_free_tx_0; |
3306 | 3308 | ||
@@ -3334,12 +3336,12 @@ out: | |||
3334 | err_release_ring_2: | 3336 | err_release_ring_2: |
3335 | rtl8169_rx_clear(tp); | 3337 | rtl8169_rx_clear(tp); |
3336 | err_free_rx_1: | 3338 | err_free_rx_1: |
3337 | pci_free_consistent(pdev, R8169_RX_RING_BYTES, tp->RxDescArray, | 3339 | dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray, |
3338 | tp->RxPhyAddr); | 3340 | tp->RxPhyAddr); |
3339 | tp->RxDescArray = NULL; | 3341 | tp->RxDescArray = NULL; |
3340 | err_free_tx_0: | 3342 | err_free_tx_0: |
3341 | pci_free_consistent(pdev, R8169_TX_RING_BYTES, tp->TxDescArray, | 3343 | dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray, |
3342 | tp->TxPhyAddr); | 3344 | tp->TxPhyAddr); |
3343 | tp->TxDescArray = NULL; | 3345 | tp->TxDescArray = NULL; |
3344 | err_pm_runtime_put: | 3346 | err_pm_runtime_put: |
3345 | pm_runtime_put_noidle(&pdev->dev); | 3347 | pm_runtime_put_noidle(&pdev->dev); |
@@ -3975,7 +3977,7 @@ static void rtl8169_free_rx_skb(struct rtl8169_private *tp, | |||
3975 | { | 3977 | { |
3976 | struct pci_dev *pdev = tp->pci_dev; | 3978 | struct pci_dev *pdev = tp->pci_dev; |
3977 | 3979 | ||
3978 | pci_unmap_single(pdev, le64_to_cpu(desc->addr), tp->rx_buf_sz, | 3980 | dma_unmap_single(&pdev->dev, le64_to_cpu(desc->addr), tp->rx_buf_sz, |
3979 | PCI_DMA_FROMDEVICE); | 3981 | PCI_DMA_FROMDEVICE); |
3980 | dev_kfree_skb(*sk_buff); | 3982 | dev_kfree_skb(*sk_buff); |
3981 | *sk_buff = NULL; | 3983 | *sk_buff = NULL; |
@@ -4000,7 +4002,7 @@ static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping, | |||
4000 | static struct sk_buff *rtl8169_alloc_rx_skb(struct pci_dev *pdev, | 4002 | static struct sk_buff *rtl8169_alloc_rx_skb(struct pci_dev *pdev, |
4001 | struct net_device *dev, | 4003 | struct net_device *dev, |
4002 | struct RxDesc *desc, int rx_buf_sz, | 4004 | struct RxDesc *desc, int rx_buf_sz, |
4003 | unsigned int align) | 4005 | unsigned int align, gfp_t gfp) |
4004 | { | 4006 | { |
4005 | struct sk_buff *skb; | 4007 | struct sk_buff *skb; |
4006 | dma_addr_t mapping; | 4008 | dma_addr_t mapping; |
@@ -4008,13 +4010,13 @@ static struct sk_buff *rtl8169_alloc_rx_skb(struct pci_dev *pdev, | |||
4008 | 4010 | ||
4009 | pad = align ? align : NET_IP_ALIGN; | 4011 | pad = align ? align : NET_IP_ALIGN; |
4010 | 4012 | ||
4011 | skb = netdev_alloc_skb(dev, rx_buf_sz + pad); | 4013 | skb = __netdev_alloc_skb(dev, rx_buf_sz + pad, gfp); |
4012 | if (!skb) | 4014 | if (!skb) |
4013 | goto err_out; | 4015 | goto err_out; |
4014 | 4016 | ||
4015 | skb_reserve(skb, align ? ((pad - 1) & (unsigned long)skb->data) : pad); | 4017 | skb_reserve(skb, align ? ((pad - 1) & (unsigned long)skb->data) : pad); |
4016 | 4018 | ||
4017 | mapping = pci_map_single(pdev, skb->data, rx_buf_sz, | 4019 | mapping = dma_map_single(&pdev->dev, skb->data, rx_buf_sz, |
4018 | PCI_DMA_FROMDEVICE); | 4020 | PCI_DMA_FROMDEVICE); |
4019 | 4021 | ||
4020 | rtl8169_map_to_asic(desc, mapping, rx_buf_sz); | 4022 | rtl8169_map_to_asic(desc, mapping, rx_buf_sz); |
@@ -4039,7 +4041,7 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp) | |||
4039 | } | 4041 | } |
4040 | 4042 | ||
4041 | static u32 rtl8169_rx_fill(struct rtl8169_private *tp, struct net_device *dev, | 4043 | static u32 rtl8169_rx_fill(struct rtl8169_private *tp, struct net_device *dev, |
4042 | u32 start, u32 end) | 4044 | u32 start, u32 end, gfp_t gfp) |
4043 | { | 4045 | { |
4044 | u32 cur; | 4046 | u32 cur; |
4045 | 4047 | ||
@@ -4054,7 +4056,7 @@ static u32 rtl8169_rx_fill(struct rtl8169_private *tp, struct net_device *dev, | |||
4054 | 4056 | ||
4055 | skb = rtl8169_alloc_rx_skb(tp->pci_dev, dev, | 4057 | skb = rtl8169_alloc_rx_skb(tp->pci_dev, dev, |
4056 | tp->RxDescArray + i, | 4058 | tp->RxDescArray + i, |
4057 | tp->rx_buf_sz, tp->align); | 4059 | tp->rx_buf_sz, tp->align, gfp); |
4058 | if (!skb) | 4060 | if (!skb) |
4059 | break; | 4061 | break; |
4060 | 4062 | ||
@@ -4082,7 +4084,7 @@ static int rtl8169_init_ring(struct net_device *dev) | |||
4082 | memset(tp->tx_skb, 0x0, NUM_TX_DESC * sizeof(struct ring_info)); | 4084 | memset(tp->tx_skb, 0x0, NUM_TX_DESC * sizeof(struct ring_info)); |
4083 | memset(tp->Rx_skbuff, 0x0, NUM_RX_DESC * sizeof(struct sk_buff *)); | 4085 | memset(tp->Rx_skbuff, 0x0, NUM_RX_DESC * sizeof(struct sk_buff *)); |
4084 | 4086 | ||
4085 | if (rtl8169_rx_fill(tp, dev, 0, NUM_RX_DESC) != NUM_RX_DESC) | 4087 | if (rtl8169_rx_fill(tp, dev, 0, NUM_RX_DESC, GFP_KERNEL) != NUM_RX_DESC) |
4086 | goto err_out; | 4088 | goto err_out; |
4087 | 4089 | ||
4088 | rtl8169_mark_as_last_descriptor(tp->RxDescArray + NUM_RX_DESC - 1); | 4090 | rtl8169_mark_as_last_descriptor(tp->RxDescArray + NUM_RX_DESC - 1); |
@@ -4099,7 +4101,8 @@ static void rtl8169_unmap_tx_skb(struct pci_dev *pdev, struct ring_info *tx_skb, | |||
4099 | { | 4101 | { |
4100 | unsigned int len = tx_skb->len; | 4102 | unsigned int len = tx_skb->len; |
4101 | 4103 | ||
4102 | pci_unmap_single(pdev, le64_to_cpu(desc->addr), len, PCI_DMA_TODEVICE); | 4104 | dma_unmap_single(&pdev->dev, le64_to_cpu(desc->addr), len, |
4105 | PCI_DMA_TODEVICE); | ||
4103 | desc->opts1 = 0x00; | 4106 | desc->opts1 = 0x00; |
4104 | desc->opts2 = 0x00; | 4107 | desc->opts2 = 0x00; |
4105 | desc->addr = 0x00; | 4108 | desc->addr = 0x00; |
@@ -4243,7 +4246,8 @@ static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb, | |||
4243 | txd = tp->TxDescArray + entry; | 4246 | txd = tp->TxDescArray + entry; |
4244 | len = frag->size; | 4247 | len = frag->size; |
4245 | addr = ((void *) page_address(frag->page)) + frag->page_offset; | 4248 | addr = ((void *) page_address(frag->page)) + frag->page_offset; |
4246 | mapping = pci_map_single(tp->pci_dev, addr, len, PCI_DMA_TODEVICE); | 4249 | mapping = dma_map_single(&tp->pci_dev->dev, addr, len, |
4250 | PCI_DMA_TODEVICE); | ||
4247 | 4251 | ||
4248 | /* anti gcc 2.95.3 bugware (sic) */ | 4252 | /* anti gcc 2.95.3 bugware (sic) */ |
4249 | status = opts1 | len | (RingEnd * !((entry + 1) % NUM_TX_DESC)); | 4253 | status = opts1 | len | (RingEnd * !((entry + 1) % NUM_TX_DESC)); |
@@ -4313,7 +4317,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, | |||
4313 | tp->tx_skb[entry].skb = skb; | 4317 | tp->tx_skb[entry].skb = skb; |
4314 | } | 4318 | } |
4315 | 4319 | ||
4316 | mapping = pci_map_single(tp->pci_dev, skb->data, len, PCI_DMA_TODEVICE); | 4320 | mapping = dma_map_single(&tp->pci_dev->dev, skb->data, len, |
4321 | PCI_DMA_TODEVICE); | ||
4317 | 4322 | ||
4318 | tp->tx_skb[entry].len = len; | 4323 | tp->tx_skb[entry].len = len; |
4319 | txd->addr = cpu_to_le64(mapping); | 4324 | txd->addr = cpu_to_le64(mapping); |
@@ -4477,8 +4482,8 @@ static inline bool rtl8169_try_rx_copy(struct sk_buff **sk_buff, | |||
4477 | if (!skb) | 4482 | if (!skb) |
4478 | goto out; | 4483 | goto out; |
4479 | 4484 | ||
4480 | pci_dma_sync_single_for_cpu(tp->pci_dev, addr, pkt_size, | 4485 | dma_sync_single_for_cpu(&tp->pci_dev->dev, addr, pkt_size, |
4481 | PCI_DMA_FROMDEVICE); | 4486 | PCI_DMA_FROMDEVICE); |
4482 | skb_copy_from_linear_data(*sk_buff, skb->data, pkt_size); | 4487 | skb_copy_from_linear_data(*sk_buff, skb->data, pkt_size); |
4483 | *sk_buff = skb; | 4488 | *sk_buff = skb; |
4484 | done = true; | 4489 | done = true; |
@@ -4549,11 +4554,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev, | |||
4549 | rtl8169_rx_csum(skb, desc); | 4554 | rtl8169_rx_csum(skb, desc); |
4550 | 4555 | ||
4551 | if (rtl8169_try_rx_copy(&skb, tp, pkt_size, addr)) { | 4556 | if (rtl8169_try_rx_copy(&skb, tp, pkt_size, addr)) { |
4552 | pci_dma_sync_single_for_device(pdev, addr, | 4557 | dma_sync_single_for_device(&pdev->dev, addr, |
4553 | pkt_size, PCI_DMA_FROMDEVICE); | 4558 | pkt_size, PCI_DMA_FROMDEVICE); |
4554 | rtl8169_mark_to_asic(desc, tp->rx_buf_sz); | 4559 | rtl8169_mark_to_asic(desc, tp->rx_buf_sz); |
4555 | } else { | 4560 | } else { |
4556 | pci_unmap_single(pdev, addr, tp->rx_buf_sz, | 4561 | dma_unmap_single(&pdev->dev, addr, tp->rx_buf_sz, |
4557 | PCI_DMA_FROMDEVICE); | 4562 | PCI_DMA_FROMDEVICE); |
4558 | tp->Rx_skbuff[entry] = NULL; | 4563 | tp->Rx_skbuff[entry] = NULL; |
4559 | } | 4564 | } |
@@ -4583,7 +4588,7 @@ static int rtl8169_rx_interrupt(struct net_device *dev, | |||
4583 | count = cur_rx - tp->cur_rx; | 4588 | count = cur_rx - tp->cur_rx; |
4584 | tp->cur_rx = cur_rx; | 4589 | tp->cur_rx = cur_rx; |
4585 | 4590 | ||
4586 | delta = rtl8169_rx_fill(tp, dev, tp->dirty_rx, tp->cur_rx); | 4591 | delta = rtl8169_rx_fill(tp, dev, tp->dirty_rx, tp->cur_rx, GFP_ATOMIC); |
4587 | if (!delta && count) | 4592 | if (!delta && count) |
4588 | netif_info(tp, intr, dev, "no Rx buffer allocated\n"); | 4593 | netif_info(tp, intr, dev, "no Rx buffer allocated\n"); |
4589 | tp->dirty_rx += delta; | 4594 | tp->dirty_rx += delta; |
@@ -4769,10 +4774,10 @@ static int rtl8169_close(struct net_device *dev) | |||
4769 | 4774 | ||
4770 | free_irq(dev->irq, dev); | 4775 | free_irq(dev->irq, dev); |
4771 | 4776 | ||
4772 | pci_free_consistent(pdev, R8169_RX_RING_BYTES, tp->RxDescArray, | 4777 | dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray, |
4773 | tp->RxPhyAddr); | 4778 | tp->RxPhyAddr); |
4774 | pci_free_consistent(pdev, R8169_TX_RING_BYTES, tp->TxDescArray, | 4779 | dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray, |
4775 | tp->TxPhyAddr); | 4780 | tp->TxPhyAddr); |
4776 | tp->TxDescArray = NULL; | 4781 | tp->TxDescArray = NULL; |
4777 | tp->RxDescArray = NULL; | 4782 | tp->RxDescArray = NULL; |
4778 | 4783 | ||
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index bc3af78a869f..1ec4b9e0239a 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c | |||
@@ -4666,7 +4666,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) | |||
4666 | desc_idx, *post_ptr); | 4666 | desc_idx, *post_ptr); |
4667 | drop_it_no_recycle: | 4667 | drop_it_no_recycle: |
4668 | /* Other statistics kept track of by card. */ | 4668 | /* Other statistics kept track of by card. */ |
4669 | tp->net_stats.rx_dropped++; | 4669 | tp->rx_dropped++; |
4670 | goto next_pkt; | 4670 | goto next_pkt; |
4671 | } | 4671 | } |
4672 | 4672 | ||
@@ -4726,7 +4726,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) | |||
4726 | if (len > (tp->dev->mtu + ETH_HLEN) && | 4726 | if (len > (tp->dev->mtu + ETH_HLEN) && |
4727 | skb->protocol != htons(ETH_P_8021Q)) { | 4727 | skb->protocol != htons(ETH_P_8021Q)) { |
4728 | dev_kfree_skb(skb); | 4728 | dev_kfree_skb(skb); |
4729 | goto next_pkt; | 4729 | goto drop_it_no_recycle; |
4730 | } | 4730 | } |
4731 | 4731 | ||
4732 | if (desc->type_flags & RXD_FLAG_VLAN && | 4732 | if (desc->type_flags & RXD_FLAG_VLAN && |
@@ -9240,6 +9240,8 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev, | |||
9240 | stats->rx_missed_errors = old_stats->rx_missed_errors + | 9240 | stats->rx_missed_errors = old_stats->rx_missed_errors + |
9241 | get_stat64(&hw_stats->rx_discards); | 9241 | get_stat64(&hw_stats->rx_discards); |
9242 | 9242 | ||
9243 | stats->rx_dropped = tp->rx_dropped; | ||
9244 | |||
9243 | return stats; | 9245 | return stats; |
9244 | } | 9246 | } |
9245 | 9247 | ||
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 4937bd190964..be7ff138a7f9 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h | |||
@@ -2759,7 +2759,7 @@ struct tg3 { | |||
2759 | 2759 | ||
2760 | 2760 | ||
2761 | /* begin "everything else" cacheline(s) section */ | 2761 | /* begin "everything else" cacheline(s) section */ |
2762 | struct rtnl_link_stats64 net_stats; | 2762 | unsigned long rx_dropped; |
2763 | struct rtnl_link_stats64 net_stats_prev; | 2763 | struct rtnl_link_stats64 net_stats_prev; |
2764 | struct tg3_ethtool_stats estats; | 2764 | struct tg3_ethtool_stats estats; |
2765 | struct tg3_ethtool_stats estats_prev; | 2765 | struct tg3_ethtool_stats estats_prev; |
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 04c6cd4333f1..10bafd59f9c3 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c | |||
@@ -575,7 +575,7 @@ static int cosa_probe(int base, int irq, int dma) | |||
575 | 575 | ||
576 | /* Initialize the chardev data structures */ | 576 | /* Initialize the chardev data structures */ |
577 | mutex_init(&chan->rlock); | 577 | mutex_init(&chan->rlock); |
578 | init_MUTEX(&chan->wsem); | 578 | sema_init(&chan->wsem, 1); |
579 | 579 | ||
580 | /* Register the network interface */ | 580 | /* Register the network interface */ |
581 | if (!(chan->netdev = alloc_hdlcdev(chan))) { | 581 | if (!(chan->netdev = alloc_hdlcdev(chan))) { |
diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c index 8cc9e319f435..1737d1488b35 100644 --- a/drivers/net/wimax/i2400m/rx.c +++ b/drivers/net/wimax/i2400m/rx.c | |||
@@ -1244,16 +1244,16 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) | |||
1244 | int i, result; | 1244 | int i, result; |
1245 | struct device *dev = i2400m_dev(i2400m); | 1245 | struct device *dev = i2400m_dev(i2400m); |
1246 | const struct i2400m_msg_hdr *msg_hdr; | 1246 | const struct i2400m_msg_hdr *msg_hdr; |
1247 | size_t pl_itr, pl_size, skb_len; | 1247 | size_t pl_itr, pl_size; |
1248 | unsigned long flags; | 1248 | unsigned long flags; |
1249 | unsigned num_pls, single_last; | 1249 | unsigned num_pls, single_last, skb_len; |
1250 | 1250 | ||
1251 | skb_len = skb->len; | 1251 | skb_len = skb->len; |
1252 | d_fnstart(4, dev, "(i2400m %p skb %p [size %zu])\n", | 1252 | d_fnstart(4, dev, "(i2400m %p skb %p [size %u])\n", |
1253 | i2400m, skb, skb_len); | 1253 | i2400m, skb, skb_len); |
1254 | result = -EIO; | 1254 | result = -EIO; |
1255 | msg_hdr = (void *) skb->data; | 1255 | msg_hdr = (void *) skb->data; |
1256 | result = i2400m_rx_msg_hdr_check(i2400m, msg_hdr, skb->len); | 1256 | result = i2400m_rx_msg_hdr_check(i2400m, msg_hdr, skb_len); |
1257 | if (result < 0) | 1257 | if (result < 0) |
1258 | goto error_msg_hdr_check; | 1258 | goto error_msg_hdr_check; |
1259 | result = -EIO; | 1259 | result = -EIO; |
@@ -1261,10 +1261,10 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) | |||
1261 | pl_itr = sizeof(*msg_hdr) + /* Check payload descriptor(s) */ | 1261 | pl_itr = sizeof(*msg_hdr) + /* Check payload descriptor(s) */ |
1262 | num_pls * sizeof(msg_hdr->pld[0]); | 1262 | num_pls * sizeof(msg_hdr->pld[0]); |
1263 | pl_itr = ALIGN(pl_itr, I2400M_PL_ALIGN); | 1263 | pl_itr = ALIGN(pl_itr, I2400M_PL_ALIGN); |
1264 | if (pl_itr > skb->len) { /* got all the payload descriptors? */ | 1264 | if (pl_itr > skb_len) { /* got all the payload descriptors? */ |
1265 | dev_err(dev, "RX: HW BUG? message too short (%u bytes) for " | 1265 | dev_err(dev, "RX: HW BUG? message too short (%u bytes) for " |
1266 | "%u payload descriptors (%zu each, total %zu)\n", | 1266 | "%u payload descriptors (%zu each, total %zu)\n", |
1267 | skb->len, num_pls, sizeof(msg_hdr->pld[0]), pl_itr); | 1267 | skb_len, num_pls, sizeof(msg_hdr->pld[0]), pl_itr); |
1268 | goto error_pl_descr_short; | 1268 | goto error_pl_descr_short; |
1269 | } | 1269 | } |
1270 | /* Walk each payload payload--check we really got it */ | 1270 | /* Walk each payload payload--check we really got it */ |
@@ -1272,7 +1272,7 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) | |||
1272 | /* work around old gcc warnings */ | 1272 | /* work around old gcc warnings */ |
1273 | pl_size = i2400m_pld_size(&msg_hdr->pld[i]); | 1273 | pl_size = i2400m_pld_size(&msg_hdr->pld[i]); |
1274 | result = i2400m_rx_pl_descr_check(i2400m, &msg_hdr->pld[i], | 1274 | result = i2400m_rx_pl_descr_check(i2400m, &msg_hdr->pld[i], |
1275 | pl_itr, skb->len); | 1275 | pl_itr, skb_len); |
1276 | if (result < 0) | 1276 | if (result < 0) |
1277 | goto error_pl_descr_check; | 1277 | goto error_pl_descr_check; |
1278 | single_last = num_pls == 1 || i == num_pls - 1; | 1278 | single_last = num_pls == 1 || i == num_pls - 1; |
@@ -1290,16 +1290,16 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) | |||
1290 | if (i < i2400m->rx_pl_min) | 1290 | if (i < i2400m->rx_pl_min) |
1291 | i2400m->rx_pl_min = i; | 1291 | i2400m->rx_pl_min = i; |
1292 | i2400m->rx_num++; | 1292 | i2400m->rx_num++; |
1293 | i2400m->rx_size_acc += skb->len; | 1293 | i2400m->rx_size_acc += skb_len; |
1294 | if (skb->len < i2400m->rx_size_min) | 1294 | if (skb_len < i2400m->rx_size_min) |
1295 | i2400m->rx_size_min = skb->len; | 1295 | i2400m->rx_size_min = skb_len; |
1296 | if (skb->len > i2400m->rx_size_max) | 1296 | if (skb_len > i2400m->rx_size_max) |
1297 | i2400m->rx_size_max = skb->len; | 1297 | i2400m->rx_size_max = skb_len; |
1298 | spin_unlock_irqrestore(&i2400m->rx_lock, flags); | 1298 | spin_unlock_irqrestore(&i2400m->rx_lock, flags); |
1299 | error_pl_descr_check: | 1299 | error_pl_descr_check: |
1300 | error_pl_descr_short: | 1300 | error_pl_descr_short: |
1301 | error_msg_hdr_check: | 1301 | error_msg_hdr_check: |
1302 | d_fnend(4, dev, "(i2400m %p skb %p [size %zu]) = %d\n", | 1302 | d_fnend(4, dev, "(i2400m %p skb %p [size %u]) = %d\n", |
1303 | i2400m, skb, skb_len, result); | 1303 | i2400m, skb, skb_len, result); |
1304 | return result; | 1304 | return result; |
1305 | } | 1305 | } |
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index b336cd9ee7a1..f9bda64fcd1b 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c | |||
@@ -225,26 +225,17 @@ post_sync: | |||
225 | mutex_unlock(&start_mutex); | 225 | mutex_unlock(&start_mutex); |
226 | } | 226 | } |
227 | 227 | ||
228 | int oprofile_set_backtrace(unsigned long val) | 228 | int oprofile_set_ulong(unsigned long *addr, unsigned long val) |
229 | { | 229 | { |
230 | int err = 0; | 230 | int err = -EBUSY; |
231 | 231 | ||
232 | mutex_lock(&start_mutex); | 232 | mutex_lock(&start_mutex); |
233 | 233 | if (!oprofile_started) { | |
234 | if (oprofile_started) { | 234 | *addr = val; |
235 | err = -EBUSY; | 235 | err = 0; |
236 | goto out; | ||
237 | } | ||
238 | |||
239 | if (!oprofile_ops.backtrace) { | ||
240 | err = -EINVAL; | ||
241 | goto out; | ||
242 | } | 236 | } |
243 | |||
244 | oprofile_backtrace_depth = val; | ||
245 | |||
246 | out: | ||
247 | mutex_unlock(&start_mutex); | 237 | mutex_unlock(&start_mutex); |
238 | |||
248 | return err; | 239 | return err; |
249 | } | 240 | } |
250 | 241 | ||
@@ -257,16 +248,9 @@ static int __init oprofile_init(void) | |||
257 | printk(KERN_INFO "oprofile: using timer interrupt.\n"); | 248 | printk(KERN_INFO "oprofile: using timer interrupt.\n"); |
258 | err = oprofile_timer_init(&oprofile_ops); | 249 | err = oprofile_timer_init(&oprofile_ops); |
259 | if (err) | 250 | if (err) |
260 | goto out_arch; | 251 | return err; |
261 | } | 252 | } |
262 | err = oprofilefs_register(); | 253 | return oprofilefs_register(); |
263 | if (err) | ||
264 | goto out_arch; | ||
265 | return 0; | ||
266 | |||
267 | out_arch: | ||
268 | oprofile_arch_exit(); | ||
269 | return err; | ||
270 | } | 254 | } |
271 | 255 | ||
272 | 256 | ||
diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 47e12cb4ee8b..177b73de5e5f 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h | |||
@@ -37,7 +37,7 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root); | |||
37 | int oprofile_timer_init(struct oprofile_operations *ops); | 37 | int oprofile_timer_init(struct oprofile_operations *ops); |
38 | void oprofile_timer_exit(void); | 38 | void oprofile_timer_exit(void); |
39 | 39 | ||
40 | int oprofile_set_backtrace(unsigned long depth); | 40 | int oprofile_set_ulong(unsigned long *addr, unsigned long val); |
41 | int oprofile_set_timeout(unsigned long time); | 41 | int oprofile_set_timeout(unsigned long time); |
42 | 42 | ||
43 | #endif /* OPROF_H */ | 43 | #endif /* OPROF_H */ |
diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index bbd7516e0869..ccf099e684a4 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c | |||
@@ -79,14 +79,17 @@ static ssize_t depth_write(struct file *file, char const __user *buf, size_t cou | |||
79 | if (*offset) | 79 | if (*offset) |
80 | return -EINVAL; | 80 | return -EINVAL; |
81 | 81 | ||
82 | if (!oprofile_ops.backtrace) | ||
83 | return -EINVAL; | ||
84 | |||
82 | retval = oprofilefs_ulong_from_user(&val, buf, count); | 85 | retval = oprofilefs_ulong_from_user(&val, buf, count); |
83 | if (retval) | 86 | if (retval) |
84 | return retval; | 87 | return retval; |
85 | 88 | ||
86 | retval = oprofile_set_backtrace(val); | 89 | retval = oprofile_set_ulong(&oprofile_backtrace_depth, val); |
87 | |||
88 | if (retval) | 90 | if (retval) |
89 | return retval; | 91 | return retval; |
92 | |||
90 | return count; | 93 | return count; |
91 | } | 94 | } |
92 | 95 | ||
diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c new file mode 100644 index 000000000000..9046f7b2ed79 --- /dev/null +++ b/drivers/oprofile/oprofile_perf.c | |||
@@ -0,0 +1,328 @@ | |||
1 | /* | ||
2 | * Copyright 2010 ARM Ltd. | ||
3 | * | ||
4 | * Perf-events backend for OProfile. | ||
5 | */ | ||
6 | #include <linux/perf_event.h> | ||
7 | #include <linux/platform_device.h> | ||
8 | #include <linux/oprofile.h> | ||
9 | #include <linux/slab.h> | ||
10 | |||
11 | /* | ||
12 | * Per performance monitor configuration as set via oprofilefs. | ||
13 | */ | ||
14 | struct op_counter_config { | ||
15 | unsigned long count; | ||
16 | unsigned long enabled; | ||
17 | unsigned long event; | ||
18 | unsigned long unit_mask; | ||
19 | unsigned long kernel; | ||
20 | unsigned long user; | ||
21 | struct perf_event_attr attr; | ||
22 | }; | ||
23 | |||
24 | static int oprofile_perf_enabled; | ||
25 | static DEFINE_MUTEX(oprofile_perf_mutex); | ||
26 | |||
27 | static struct op_counter_config *counter_config; | ||
28 | static struct perf_event **perf_events[nr_cpumask_bits]; | ||
29 | static int num_counters; | ||
30 | |||
31 | /* | ||
32 | * Overflow callback for oprofile. | ||
33 | */ | ||
34 | static void op_overflow_handler(struct perf_event *event, int unused, | ||
35 | struct perf_sample_data *data, struct pt_regs *regs) | ||
36 | { | ||
37 | int id; | ||
38 | u32 cpu = smp_processor_id(); | ||
39 | |||
40 | for (id = 0; id < num_counters; ++id) | ||
41 | if (perf_events[cpu][id] == event) | ||
42 | break; | ||
43 | |||
44 | if (id != num_counters) | ||
45 | oprofile_add_sample(regs, id); | ||
46 | else | ||
47 | pr_warning("oprofile: ignoring spurious overflow " | ||
48 | "on cpu %u\n", cpu); | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * Called by oprofile_perf_setup to create perf attributes to mirror the oprofile | ||
53 | * settings in counter_config. Attributes are created as `pinned' events and | ||
54 | * so are permanently scheduled on the PMU. | ||
55 | */ | ||
56 | static void op_perf_setup(void) | ||
57 | { | ||
58 | int i; | ||
59 | u32 size = sizeof(struct perf_event_attr); | ||
60 | struct perf_event_attr *attr; | ||
61 | |||
62 | for (i = 0; i < num_counters; ++i) { | ||
63 | attr = &counter_config[i].attr; | ||
64 | memset(attr, 0, size); | ||
65 | attr->type = PERF_TYPE_RAW; | ||
66 | attr->size = size; | ||
67 | attr->config = counter_config[i].event; | ||
68 | attr->sample_period = counter_config[i].count; | ||
69 | attr->pinned = 1; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | static int op_create_counter(int cpu, int event) | ||
74 | { | ||
75 | struct perf_event *pevent; | ||
76 | |||
77 | if (!counter_config[event].enabled || perf_events[cpu][event]) | ||
78 | return 0; | ||
79 | |||
80 | pevent = perf_event_create_kernel_counter(&counter_config[event].attr, | ||
81 | cpu, NULL, | ||
82 | op_overflow_handler); | ||
83 | |||
84 | if (IS_ERR(pevent)) | ||
85 | return PTR_ERR(pevent); | ||
86 | |||
87 | if (pevent->state != PERF_EVENT_STATE_ACTIVE) { | ||
88 | perf_event_release_kernel(pevent); | ||
89 | pr_warning("oprofile: failed to enable event %d " | ||
90 | "on CPU %d\n", event, cpu); | ||
91 | return -EBUSY; | ||
92 | } | ||
93 | |||
94 | perf_events[cpu][event] = pevent; | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | static void op_destroy_counter(int cpu, int event) | ||
100 | { | ||
101 | struct perf_event *pevent = perf_events[cpu][event]; | ||
102 | |||
103 | if (pevent) { | ||
104 | perf_event_release_kernel(pevent); | ||
105 | perf_events[cpu][event] = NULL; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * Called by oprofile_perf_start to create active perf events based on the | ||
111 | * perviously configured attributes. | ||
112 | */ | ||
113 | static int op_perf_start(void) | ||
114 | { | ||
115 | int cpu, event, ret = 0; | ||
116 | |||
117 | for_each_online_cpu(cpu) { | ||
118 | for (event = 0; event < num_counters; ++event) { | ||
119 | ret = op_create_counter(cpu, event); | ||
120 | if (ret) | ||
121 | return ret; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | return ret; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * Called by oprofile_perf_stop at the end of a profiling run. | ||
130 | */ | ||
131 | static void op_perf_stop(void) | ||
132 | { | ||
133 | int cpu, event; | ||
134 | |||
135 | for_each_online_cpu(cpu) | ||
136 | for (event = 0; event < num_counters; ++event) | ||
137 | op_destroy_counter(cpu, event); | ||
138 | } | ||
139 | |||
140 | static int oprofile_perf_create_files(struct super_block *sb, struct dentry *root) | ||
141 | { | ||
142 | unsigned int i; | ||
143 | |||
144 | for (i = 0; i < num_counters; i++) { | ||
145 | struct dentry *dir; | ||
146 | char buf[4]; | ||
147 | |||
148 | snprintf(buf, sizeof buf, "%d", i); | ||
149 | dir = oprofilefs_mkdir(sb, root, buf); | ||
150 | oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); | ||
151 | oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); | ||
152 | oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); | ||
153 | oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); | ||
154 | oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); | ||
155 | oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); | ||
156 | } | ||
157 | |||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | static int oprofile_perf_setup(void) | ||
162 | { | ||
163 | spin_lock(&oprofilefs_lock); | ||
164 | op_perf_setup(); | ||
165 | spin_unlock(&oprofilefs_lock); | ||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | static int oprofile_perf_start(void) | ||
170 | { | ||
171 | int ret = -EBUSY; | ||
172 | |||
173 | mutex_lock(&oprofile_perf_mutex); | ||
174 | if (!oprofile_perf_enabled) { | ||
175 | ret = 0; | ||
176 | op_perf_start(); | ||
177 | oprofile_perf_enabled = 1; | ||
178 | } | ||
179 | mutex_unlock(&oprofile_perf_mutex); | ||
180 | return ret; | ||
181 | } | ||
182 | |||
183 | static void oprofile_perf_stop(void) | ||
184 | { | ||
185 | mutex_lock(&oprofile_perf_mutex); | ||
186 | if (oprofile_perf_enabled) | ||
187 | op_perf_stop(); | ||
188 | oprofile_perf_enabled = 0; | ||
189 | mutex_unlock(&oprofile_perf_mutex); | ||
190 | } | ||
191 | |||
192 | #ifdef CONFIG_PM | ||
193 | |||
194 | static int oprofile_perf_suspend(struct platform_device *dev, pm_message_t state) | ||
195 | { | ||
196 | mutex_lock(&oprofile_perf_mutex); | ||
197 | if (oprofile_perf_enabled) | ||
198 | op_perf_stop(); | ||
199 | mutex_unlock(&oprofile_perf_mutex); | ||
200 | return 0; | ||
201 | } | ||
202 | |||
203 | static int oprofile_perf_resume(struct platform_device *dev) | ||
204 | { | ||
205 | mutex_lock(&oprofile_perf_mutex); | ||
206 | if (oprofile_perf_enabled && op_perf_start()) | ||
207 | oprofile_perf_enabled = 0; | ||
208 | mutex_unlock(&oprofile_perf_mutex); | ||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | static struct platform_driver oprofile_driver = { | ||
213 | .driver = { | ||
214 | .name = "oprofile-perf", | ||
215 | }, | ||
216 | .resume = oprofile_perf_resume, | ||
217 | .suspend = oprofile_perf_suspend, | ||
218 | }; | ||
219 | |||
220 | static struct platform_device *oprofile_pdev; | ||
221 | |||
222 | static int __init init_driverfs(void) | ||
223 | { | ||
224 | int ret; | ||
225 | |||
226 | ret = platform_driver_register(&oprofile_driver); | ||
227 | if (ret) | ||
228 | return ret; | ||
229 | |||
230 | oprofile_pdev = platform_device_register_simple( | ||
231 | oprofile_driver.driver.name, 0, NULL, 0); | ||
232 | if (IS_ERR(oprofile_pdev)) { | ||
233 | ret = PTR_ERR(oprofile_pdev); | ||
234 | platform_driver_unregister(&oprofile_driver); | ||
235 | } | ||
236 | |||
237 | return ret; | ||
238 | } | ||
239 | |||
240 | static void exit_driverfs(void) | ||
241 | { | ||
242 | platform_device_unregister(oprofile_pdev); | ||
243 | platform_driver_unregister(&oprofile_driver); | ||
244 | } | ||
245 | |||
246 | #else | ||
247 | |||
248 | static inline int init_driverfs(void) { return 0; } | ||
249 | static inline void exit_driverfs(void) { } | ||
250 | |||
251 | #endif /* CONFIG_PM */ | ||
252 | |||
253 | void oprofile_perf_exit(void) | ||
254 | { | ||
255 | int cpu, id; | ||
256 | struct perf_event *event; | ||
257 | |||
258 | for_each_possible_cpu(cpu) { | ||
259 | for (id = 0; id < num_counters; ++id) { | ||
260 | event = perf_events[cpu][id]; | ||
261 | if (event) | ||
262 | perf_event_release_kernel(event); | ||
263 | } | ||
264 | |||
265 | kfree(perf_events[cpu]); | ||
266 | } | ||
267 | |||
268 | kfree(counter_config); | ||
269 | exit_driverfs(); | ||
270 | } | ||
271 | |||
272 | int __init oprofile_perf_init(struct oprofile_operations *ops) | ||
273 | { | ||
274 | int cpu, ret = 0; | ||
275 | |||
276 | ret = init_driverfs(); | ||
277 | if (ret) | ||
278 | return ret; | ||
279 | |||
280 | memset(&perf_events, 0, sizeof(perf_events)); | ||
281 | |||
282 | num_counters = perf_num_counters(); | ||
283 | if (num_counters <= 0) { | ||
284 | pr_info("oprofile: no performance counters\n"); | ||
285 | ret = -ENODEV; | ||
286 | goto out; | ||
287 | } | ||
288 | |||
289 | counter_config = kcalloc(num_counters, | ||
290 | sizeof(struct op_counter_config), GFP_KERNEL); | ||
291 | |||
292 | if (!counter_config) { | ||
293 | pr_info("oprofile: failed to allocate %d " | ||
294 | "counters\n", num_counters); | ||
295 | ret = -ENOMEM; | ||
296 | num_counters = 0; | ||
297 | goto out; | ||
298 | } | ||
299 | |||
300 | for_each_possible_cpu(cpu) { | ||
301 | perf_events[cpu] = kcalloc(num_counters, | ||
302 | sizeof(struct perf_event *), GFP_KERNEL); | ||
303 | if (!perf_events[cpu]) { | ||
304 | pr_info("oprofile: failed to allocate %d perf events " | ||
305 | "for cpu %d\n", num_counters, cpu); | ||
306 | ret = -ENOMEM; | ||
307 | goto out; | ||
308 | } | ||
309 | } | ||
310 | |||
311 | ops->create_files = oprofile_perf_create_files; | ||
312 | ops->setup = oprofile_perf_setup; | ||
313 | ops->start = oprofile_perf_start; | ||
314 | ops->stop = oprofile_perf_stop; | ||
315 | ops->shutdown = oprofile_perf_stop; | ||
316 | ops->cpu_type = op_name_from_perf_id(); | ||
317 | |||
318 | if (!ops->cpu_type) | ||
319 | ret = -ENODEV; | ||
320 | else | ||
321 | pr_info("oprofile: using %s\n", ops->cpu_type); | ||
322 | |||
323 | out: | ||
324 | if (ret) | ||
325 | oprofile_perf_exit(); | ||
326 | |||
327 | return ret; | ||
328 | } | ||
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 2766a6d3c2e9..1944621930d9 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c | |||
@@ -91,16 +91,20 @@ static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count | |||
91 | 91 | ||
92 | static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset) | 92 | static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset) |
93 | { | 93 | { |
94 | unsigned long *value = file->private_data; | 94 | unsigned long value; |
95 | int retval; | 95 | int retval; |
96 | 96 | ||
97 | if (*offset) | 97 | if (*offset) |
98 | return -EINVAL; | 98 | return -EINVAL; |
99 | 99 | ||
100 | retval = oprofilefs_ulong_from_user(value, buf, count); | 100 | retval = oprofilefs_ulong_from_user(&value, buf, count); |
101 | if (retval) | ||
102 | return retval; | ||
101 | 103 | ||
104 | retval = oprofile_set_ulong(file->private_data, value); | ||
102 | if (retval) | 105 | if (retval) |
103 | return retval; | 106 | return retval; |
107 | |||
104 | return count; | 108 | return count; |
105 | } | 109 | } |
106 | 110 | ||
@@ -126,50 +130,41 @@ static const struct file_operations ulong_ro_fops = { | |||
126 | }; | 130 | }; |
127 | 131 | ||
128 | 132 | ||
129 | static struct dentry *__oprofilefs_create_file(struct super_block *sb, | 133 | static int __oprofilefs_create_file(struct super_block *sb, |
130 | struct dentry *root, char const *name, const struct file_operations *fops, | 134 | struct dentry *root, char const *name, const struct file_operations *fops, |
131 | int perm) | 135 | int perm, void *priv) |
132 | { | 136 | { |
133 | struct dentry *dentry; | 137 | struct dentry *dentry; |
134 | struct inode *inode; | 138 | struct inode *inode; |
135 | 139 | ||
136 | dentry = d_alloc_name(root, name); | 140 | dentry = d_alloc_name(root, name); |
137 | if (!dentry) | 141 | if (!dentry) |
138 | return NULL; | 142 | return -ENOMEM; |
139 | inode = oprofilefs_get_inode(sb, S_IFREG | perm); | 143 | inode = oprofilefs_get_inode(sb, S_IFREG | perm); |
140 | if (!inode) { | 144 | if (!inode) { |
141 | dput(dentry); | 145 | dput(dentry); |
142 | return NULL; | 146 | return -ENOMEM; |
143 | } | 147 | } |
144 | inode->i_fop = fops; | 148 | inode->i_fop = fops; |
145 | d_add(dentry, inode); | 149 | d_add(dentry, inode); |
146 | return dentry; | 150 | dentry->d_inode->i_private = priv; |
151 | return 0; | ||
147 | } | 152 | } |
148 | 153 | ||
149 | 154 | ||
150 | int oprofilefs_create_ulong(struct super_block *sb, struct dentry *root, | 155 | int oprofilefs_create_ulong(struct super_block *sb, struct dentry *root, |
151 | char const *name, unsigned long *val) | 156 | char const *name, unsigned long *val) |
152 | { | 157 | { |
153 | struct dentry *d = __oprofilefs_create_file(sb, root, name, | 158 | return __oprofilefs_create_file(sb, root, name, |
154 | &ulong_fops, 0644); | 159 | &ulong_fops, 0644, val); |
155 | if (!d) | ||
156 | return -EFAULT; | ||
157 | |||
158 | d->d_inode->i_private = val; | ||
159 | return 0; | ||
160 | } | 160 | } |
161 | 161 | ||
162 | 162 | ||
163 | int oprofilefs_create_ro_ulong(struct super_block *sb, struct dentry *root, | 163 | int oprofilefs_create_ro_ulong(struct super_block *sb, struct dentry *root, |
164 | char const *name, unsigned long *val) | 164 | char const *name, unsigned long *val) |
165 | { | 165 | { |
166 | struct dentry *d = __oprofilefs_create_file(sb, root, name, | 166 | return __oprofilefs_create_file(sb, root, name, |
167 | &ulong_ro_fops, 0444); | 167 | &ulong_ro_fops, 0444, val); |
168 | if (!d) | ||
169 | return -EFAULT; | ||
170 | |||
171 | d->d_inode->i_private = val; | ||
172 | return 0; | ||
173 | } | 168 | } |
174 | 169 | ||
175 | 170 | ||
@@ -189,31 +184,22 @@ static const struct file_operations atomic_ro_fops = { | |||
189 | int oprofilefs_create_ro_atomic(struct super_block *sb, struct dentry *root, | 184 | int oprofilefs_create_ro_atomic(struct super_block *sb, struct dentry *root, |
190 | char const *name, atomic_t *val) | 185 | char const *name, atomic_t *val) |
191 | { | 186 | { |
192 | struct dentry *d = __oprofilefs_create_file(sb, root, name, | 187 | return __oprofilefs_create_file(sb, root, name, |
193 | &atomic_ro_fops, 0444); | 188 | &atomic_ro_fops, 0444, val); |
194 | if (!d) | ||
195 | return -EFAULT; | ||
196 | |||
197 | d->d_inode->i_private = val; | ||
198 | return 0; | ||
199 | } | 189 | } |
200 | 190 | ||
201 | 191 | ||
202 | int oprofilefs_create_file(struct super_block *sb, struct dentry *root, | 192 | int oprofilefs_create_file(struct super_block *sb, struct dentry *root, |
203 | char const *name, const struct file_operations *fops) | 193 | char const *name, const struct file_operations *fops) |
204 | { | 194 | { |
205 | if (!__oprofilefs_create_file(sb, root, name, fops, 0644)) | 195 | return __oprofilefs_create_file(sb, root, name, fops, 0644, NULL); |
206 | return -EFAULT; | ||
207 | return 0; | ||
208 | } | 196 | } |
209 | 197 | ||
210 | 198 | ||
211 | int oprofilefs_create_file_perm(struct super_block *sb, struct dentry *root, | 199 | int oprofilefs_create_file_perm(struct super_block *sb, struct dentry *root, |
212 | char const *name, const struct file_operations *fops, int perm) | 200 | char const *name, const struct file_operations *fops, int perm) |
213 | { | 201 | { |
214 | if (!__oprofilefs_create_file(sb, root, name, fops, perm)) | 202 | return __oprofilefs_create_file(sb, root, name, fops, perm, NULL); |
215 | return -EFAULT; | ||
216 | return 0; | ||
217 | } | 203 | } |
218 | 204 | ||
219 | 205 | ||
diff --git a/drivers/parport/share.c b/drivers/parport/share.c index dffa5d4fb298..a2d9d1e59260 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c | |||
@@ -306,7 +306,7 @@ struct parport *parport_register_port(unsigned long base, int irq, int dma, | |||
306 | spin_lock_init(&tmp->pardevice_lock); | 306 | spin_lock_init(&tmp->pardevice_lock); |
307 | tmp->ieee1284.mode = IEEE1284_MODE_COMPAT; | 307 | tmp->ieee1284.mode = IEEE1284_MODE_COMPAT; |
308 | tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE; | 308 | tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE; |
309 | init_MUTEX_LOCKED (&tmp->ieee1284.irq); /* actually a semaphore at 0 */ | 309 | sema_init(&tmp->ieee1284.irq, 0); |
310 | tmp->spintime = parport_default_spintime; | 310 | tmp->spintime = parport_default_spintime; |
311 | atomic_set (&tmp->ref_count, 1); | 311 | atomic_set (&tmp->ref_count, 1); |
312 | INIT_LIST_HEAD(&tmp->full_list); | 312 | INIT_LIST_HEAD(&tmp->full_list); |
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index ad0ed212db4a..348fba0a8976 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c | |||
@@ -1046,13 +1046,13 @@ int scsi_get_vpd_page(struct scsi_device *sdev, u8 page, unsigned char *buf, | |||
1046 | 1046 | ||
1047 | /* If the user actually wanted this page, we can skip the rest */ | 1047 | /* If the user actually wanted this page, we can skip the rest */ |
1048 | if (page == 0) | 1048 | if (page == 0) |
1049 | return -EINVAL; | 1049 | return 0; |
1050 | 1050 | ||
1051 | for (i = 0; i < min((int)buf[3], buf_len - 4); i++) | 1051 | for (i = 0; i < min((int)buf[3], buf_len - 4); i++) |
1052 | if (buf[i + 4] == page) | 1052 | if (buf[i + 4] == page) |
1053 | goto found; | 1053 | goto found; |
1054 | 1054 | ||
1055 | if (i < buf[3] && i > buf_len) | 1055 | if (i < buf[3] && i >= buf_len - 4) |
1056 | /* ran off the end of the buffer, give us benefit of doubt */ | 1056 | /* ran off the end of the buffer, give us benefit of doubt */ |
1057 | goto found; | 1057 | goto found; |
1058 | /* The device claims it doesn't support the requested page */ | 1058 | /* The device claims it doesn't support the requested page */ |
diff --git a/drivers/serial/ioc3_serial.c b/drivers/serial/ioc3_serial.c index 93de907b1208..800c54602339 100644 --- a/drivers/serial/ioc3_serial.c +++ b/drivers/serial/ioc3_serial.c | |||
@@ -2044,6 +2044,7 @@ ioc3uart_probe(struct ioc3_submodule *is, struct ioc3_driver_data *idd) | |||
2044 | if (!port) { | 2044 | if (!port) { |
2045 | printk(KERN_WARNING | 2045 | printk(KERN_WARNING |
2046 | "IOC3 serial memory not available for port\n"); | 2046 | "IOC3 serial memory not available for port\n"); |
2047 | ret = -ENOMEM; | ||
2047 | goto out4; | 2048 | goto out4; |
2048 | } | 2049 | } |
2049 | spin_lock_init(&port->ip_lock); | 2050 | spin_lock_init(&port->ip_lock); |
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 7c8008225ee3..17927b1f9334 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -127,7 +127,10 @@ static void handle_tx(struct vhost_net *net) | |||
127 | size_t len, total_len = 0; | 127 | size_t len, total_len = 0; |
128 | int err, wmem; | 128 | int err, wmem; |
129 | size_t hdr_size; | 129 | size_t hdr_size; |
130 | struct socket *sock = rcu_dereference(vq->private_data); | 130 | struct socket *sock; |
131 | |||
132 | sock = rcu_dereference_check(vq->private_data, | ||
133 | lockdep_is_held(&vq->mutex)); | ||
131 | if (!sock) | 134 | if (!sock) |
132 | return; | 135 | return; |
133 | 136 | ||
@@ -582,7 +585,10 @@ static void vhost_net_disable_vq(struct vhost_net *n, | |||
582 | static void vhost_net_enable_vq(struct vhost_net *n, | 585 | static void vhost_net_enable_vq(struct vhost_net *n, |
583 | struct vhost_virtqueue *vq) | 586 | struct vhost_virtqueue *vq) |
584 | { | 587 | { |
585 | struct socket *sock = vq->private_data; | 588 | struct socket *sock; |
589 | |||
590 | sock = rcu_dereference_protected(vq->private_data, | ||
591 | lockdep_is_held(&vq->mutex)); | ||
586 | if (!sock) | 592 | if (!sock) |
587 | return; | 593 | return; |
588 | if (vq == n->vqs + VHOST_NET_VQ_TX) { | 594 | if (vq == n->vqs + VHOST_NET_VQ_TX) { |
@@ -598,7 +604,8 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, | |||
598 | struct socket *sock; | 604 | struct socket *sock; |
599 | 605 | ||
600 | mutex_lock(&vq->mutex); | 606 | mutex_lock(&vq->mutex); |
601 | sock = vq->private_data; | 607 | sock = rcu_dereference_protected(vq->private_data, |
608 | lockdep_is_held(&vq->mutex)); | ||
602 | vhost_net_disable_vq(n, vq); | 609 | vhost_net_disable_vq(n, vq); |
603 | rcu_assign_pointer(vq->private_data, NULL); | 610 | rcu_assign_pointer(vq->private_data, NULL); |
604 | mutex_unlock(&vq->mutex); | 611 | mutex_unlock(&vq->mutex); |
@@ -736,7 +743,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) | |||
736 | } | 743 | } |
737 | 744 | ||
738 | /* start polling new socket */ | 745 | /* start polling new socket */ |
739 | oldsock = vq->private_data; | 746 | oldsock = rcu_dereference_protected(vq->private_data, |
747 | lockdep_is_held(&vq->mutex)); | ||
740 | if (sock != oldsock) { | 748 | if (sock != oldsock) { |
741 | vhost_net_disable_vq(n, vq); | 749 | vhost_net_disable_vq(n, vq); |
742 | rcu_assign_pointer(vq->private_data, sock); | 750 | rcu_assign_pointer(vq->private_data, sock); |
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index dd3d6f7406f8..8b5a1b33d0fe 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -320,7 +320,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev) | |||
320 | vhost_dev_cleanup(dev); | 320 | vhost_dev_cleanup(dev); |
321 | 321 | ||
322 | memory->nregions = 0; | 322 | memory->nregions = 0; |
323 | dev->memory = memory; | 323 | RCU_INIT_POINTER(dev->memory, memory); |
324 | return 0; | 324 | return 0; |
325 | } | 325 | } |
326 | 326 | ||
@@ -352,8 +352,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev) | |||
352 | fput(dev->log_file); | 352 | fput(dev->log_file); |
353 | dev->log_file = NULL; | 353 | dev->log_file = NULL; |
354 | /* No one will access memory at this point */ | 354 | /* No one will access memory at this point */ |
355 | kfree(dev->memory); | 355 | kfree(rcu_dereference_protected(dev->memory, |
356 | dev->memory = NULL; | 356 | lockdep_is_held(&dev->mutex))); |
357 | RCU_INIT_POINTER(dev->memory, NULL); | ||
357 | if (dev->mm) | 358 | if (dev->mm) |
358 | mmput(dev->mm); | 359 | mmput(dev->mm); |
359 | dev->mm = NULL; | 360 | dev->mm = NULL; |
@@ -440,14 +441,22 @@ static int vq_access_ok(unsigned int num, | |||
440 | /* Caller should have device mutex but not vq mutex */ | 441 | /* Caller should have device mutex but not vq mutex */ |
441 | int vhost_log_access_ok(struct vhost_dev *dev) | 442 | int vhost_log_access_ok(struct vhost_dev *dev) |
442 | { | 443 | { |
443 | return memory_access_ok(dev, dev->memory, 1); | 444 | struct vhost_memory *mp; |
445 | |||
446 | mp = rcu_dereference_protected(dev->memory, | ||
447 | lockdep_is_held(&dev->mutex)); | ||
448 | return memory_access_ok(dev, mp, 1); | ||
444 | } | 449 | } |
445 | 450 | ||
446 | /* Verify access for write logging. */ | 451 | /* Verify access for write logging. */ |
447 | /* Caller should have vq mutex and device mutex */ | 452 | /* Caller should have vq mutex and device mutex */ |
448 | static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) | 453 | static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) |
449 | { | 454 | { |
450 | return vq_memory_access_ok(log_base, vq->dev->memory, | 455 | struct vhost_memory *mp; |
456 | |||
457 | mp = rcu_dereference_protected(vq->dev->memory, | ||
458 | lockdep_is_held(&vq->mutex)); | ||
459 | return vq_memory_access_ok(log_base, mp, | ||
451 | vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && | 460 | vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && |
452 | (!vq->log_used || log_access_ok(log_base, vq->log_addr, | 461 | (!vq->log_used || log_access_ok(log_base, vq->log_addr, |
453 | sizeof *vq->used + | 462 | sizeof *vq->used + |
@@ -487,7 +496,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) | |||
487 | kfree(newmem); | 496 | kfree(newmem); |
488 | return -EFAULT; | 497 | return -EFAULT; |
489 | } | 498 | } |
490 | oldmem = d->memory; | 499 | oldmem = rcu_dereference_protected(d->memory, |
500 | lockdep_is_held(&d->mutex)); | ||
491 | rcu_assign_pointer(d->memory, newmem); | 501 | rcu_assign_pointer(d->memory, newmem); |
492 | synchronize_rcu(); | 502 | synchronize_rcu(); |
493 | kfree(oldmem); | 503 | kfree(oldmem); |
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index afd77295971c..af3c11ded5fd 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h | |||
@@ -106,7 +106,7 @@ struct vhost_virtqueue { | |||
106 | * vhost_work execution acts instead of rcu_read_lock() and the end of | 106 | * vhost_work execution acts instead of rcu_read_lock() and the end of |
107 | * vhost_work execution acts instead of rcu_read_lock(). | 107 | * vhost_work execution acts instead of rcu_read_lock(). |
108 | * Writers use virtqueue mutex. */ | 108 | * Writers use virtqueue mutex. */ |
109 | void *private_data; | 109 | void __rcu *private_data; |
110 | /* Log write descriptors */ | 110 | /* Log write descriptors */ |
111 | void __user *log_base; | 111 | void __user *log_base; |
112 | struct vhost_log log[VHOST_NET_MAX_SG]; | 112 | struct vhost_log log[VHOST_NET_MAX_SG]; |
@@ -116,7 +116,7 @@ struct vhost_dev { | |||
116 | /* Readers use RCU to access memory table pointer | 116 | /* Readers use RCU to access memory table pointer |
117 | * log base pointer and features. | 117 | * log base pointer and features. |
118 | * Writers use mutex below.*/ | 118 | * Writers use mutex below.*/ |
119 | struct vhost_memory *memory; | 119 | struct vhost_memory __rcu *memory; |
120 | struct mm_struct *mm; | 120 | struct mm_struct *mm; |
121 | struct mutex mutex; | 121 | struct mutex mutex; |
122 | unsigned acked_features; | 122 | unsigned acked_features; |
@@ -173,7 +173,11 @@ enum { | |||
173 | 173 | ||
174 | static inline int vhost_has_feature(struct vhost_dev *dev, int bit) | 174 | static inline int vhost_has_feature(struct vhost_dev *dev, int bit) |
175 | { | 175 | { |
176 | unsigned acked_features = rcu_dereference(dev->acked_features); | 176 | unsigned acked_features; |
177 | |||
178 | acked_features = | ||
179 | rcu_dereference_index_check(dev->acked_features, | ||
180 | lockdep_is_held(&dev->mutex)); | ||
177 | return acked_features & (1 << bit); | 181 | return acked_features & (1 << bit); |
178 | } | 182 | } |
179 | 183 | ||
diff --git a/fs/affs/super.c b/fs/affs/super.c index 33c4e7eef470..9581ea94d5a1 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -109,8 +109,8 @@ static void init_once(void *foo) | |||
109 | { | 109 | { |
110 | struct affs_inode_info *ei = (struct affs_inode_info *) foo; | 110 | struct affs_inode_info *ei = (struct affs_inode_info *) foo; |
111 | 111 | ||
112 | init_MUTEX(&ei->i_link_lock); | 112 | sema_init(&ei->i_link_lock, 1); |
113 | init_MUTEX(&ei->i_ext_lock); | 113 | sema_init(&ei->i_ext_lock, 1); |
114 | inode_init_once(&ei->vfs_inode); | 114 | inode_init_once(&ei->vfs_inode); |
115 | } | 115 | } |
116 | 116 | ||
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index f96eff04e11a..a6395bdb26ae 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c | |||
@@ -134,10 +134,6 @@ static int aout_core_dump(struct coredump_params *cprm) | |||
134 | if (!dump_write(file, dump_start, dump_size)) | 134 | if (!dump_write(file, dump_start, dump_size)) |
135 | goto end_coredump; | 135 | goto end_coredump; |
136 | } | 136 | } |
137 | /* Finally dump the task struct. Not be used by gdb, but could be useful */ | ||
138 | set_fs(KERNEL_DS); | ||
139 | if (!dump_write(file, current, sizeof(*current))) | ||
140 | goto end_coredump; | ||
141 | end_coredump: | 137 | end_coredump: |
142 | set_fs(fs); | 138 | set_fs(fs); |
143 | return has_dumped; | 139 | return has_dumped; |
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index 0fcd2640c23f..9eb134ea6eb2 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig | |||
@@ -1,9 +1,11 @@ | |||
1 | config CEPH_FS | 1 | config CEPH_FS |
2 | tristate "Ceph distributed file system (EXPERIMENTAL)" | 2 | tristate "Ceph distributed file system (EXPERIMENTAL)" |
3 | depends on INET && EXPERIMENTAL | 3 | depends on INET && EXPERIMENTAL |
4 | select CEPH_LIB | ||
4 | select LIBCRC32C | 5 | select LIBCRC32C |
5 | select CRYPTO_AES | 6 | select CRYPTO_AES |
6 | select CRYPTO | 7 | select CRYPTO |
8 | default n | ||
7 | help | 9 | help |
8 | Choose Y or M here to include support for mounting the | 10 | Choose Y or M here to include support for mounting the |
9 | experimental Ceph distributed file system. Ceph is an extremely | 11 | experimental Ceph distributed file system. Ceph is an extremely |
@@ -14,15 +16,3 @@ config CEPH_FS | |||
14 | 16 | ||
15 | If unsure, say N. | 17 | If unsure, say N. |
16 | 18 | ||
17 | config CEPH_FS_PRETTYDEBUG | ||
18 | bool "Include file:line in ceph debug output" | ||
19 | depends on CEPH_FS | ||
20 | default n | ||
21 | help | ||
22 | If you say Y here, debug output will include a filename and | ||
23 | line to aid debugging. This icnreases kernel size and slows | ||
24 | execution slightly when debug call sites are enabled (e.g., | ||
25 | via CONFIG_DYNAMIC_DEBUG). | ||
26 | |||
27 | If unsure, say N. | ||
28 | |||
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 278e1172600d..9e6c4f2e8ff1 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
@@ -8,15 +8,8 @@ obj-$(CONFIG_CEPH_FS) += ceph.o | |||
8 | 8 | ||
9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ | 9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ |
10 | export.o caps.o snap.o xattr.o \ | 10 | export.o caps.o snap.o xattr.o \ |
11 | messenger.o msgpool.o buffer.o pagelist.o \ | 11 | mds_client.o mdsmap.o strings.o ceph_frag.o \ |
12 | mds_client.o mdsmap.o \ | 12 | debugfs.o |
13 | mon_client.o \ | ||
14 | osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ | ||
15 | debugfs.o \ | ||
16 | auth.o auth_none.o \ | ||
17 | crypto.o armor.o \ | ||
18 | auth_x.o \ | ||
19 | ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o | ||
20 | 13 | ||
21 | else | 14 | else |
22 | #Otherwise we were called directly from the command | 15 | #Otherwise we were called directly from the command |
diff --git a/fs/ceph/README b/fs/ceph/README deleted file mode 100644 index 18352fab37c0..000000000000 --- a/fs/ceph/README +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | # | ||
2 | # The following files are shared by (and manually synchronized | ||
3 | # between) the Ceph userland and kernel client. | ||
4 | # | ||
5 | # userland kernel | ||
6 | src/include/ceph_fs.h fs/ceph/ceph_fs.h | ||
7 | src/include/ceph_fs.cc fs/ceph/ceph_fs.c | ||
8 | src/include/msgr.h fs/ceph/msgr.h | ||
9 | src/include/rados.h fs/ceph/rados.h | ||
10 | src/include/ceph_strings.cc fs/ceph/ceph_strings.c | ||
11 | src/include/ceph_frag.h fs/ceph/ceph_frag.h | ||
12 | src/include/ceph_frag.cc fs/ceph/ceph_frag.c | ||
13 | src/include/ceph_hash.h fs/ceph/ceph_hash.h | ||
14 | src/include/ceph_hash.cc fs/ceph/ceph_hash.c | ||
15 | src/crush/crush.c fs/ceph/crush/crush.c | ||
16 | src/crush/crush.h fs/ceph/crush/crush.h | ||
17 | src/crush/mapper.c fs/ceph/crush/mapper.c | ||
18 | src/crush/mapper.h fs/ceph/crush/mapper.h | ||
19 | src/crush/hash.h fs/ceph/crush/hash.h | ||
20 | src/crush/hash.c fs/ceph/crush/hash.c | ||
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index efbc604001c8..51bcc5ce3230 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/backing-dev.h> | 3 | #include <linux/backing-dev.h> |
4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
@@ -10,7 +10,8 @@ | |||
10 | #include <linux/task_io_accounting_ops.h> | 10 | #include <linux/task_io_accounting_ops.h> |
11 | 11 | ||
12 | #include "super.h" | 12 | #include "super.h" |
13 | #include "osd_client.h" | 13 | #include "mds_client.h" |
14 | #include <linux/ceph/osd_client.h> | ||
14 | 15 | ||
15 | /* | 16 | /* |
16 | * Ceph address space ops. | 17 | * Ceph address space ops. |
@@ -193,7 +194,8 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
193 | { | 194 | { |
194 | struct inode *inode = filp->f_dentry->d_inode; | 195 | struct inode *inode = filp->f_dentry->d_inode; |
195 | struct ceph_inode_info *ci = ceph_inode(inode); | 196 | struct ceph_inode_info *ci = ceph_inode(inode); |
196 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 197 | struct ceph_osd_client *osdc = |
198 | &ceph_inode_to_client(inode)->client->osdc; | ||
197 | int err = 0; | 199 | int err = 0; |
198 | u64 len = PAGE_CACHE_SIZE; | 200 | u64 len = PAGE_CACHE_SIZE; |
199 | 201 | ||
@@ -265,7 +267,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
265 | { | 267 | { |
266 | struct inode *inode = file->f_dentry->d_inode; | 268 | struct inode *inode = file->f_dentry->d_inode; |
267 | struct ceph_inode_info *ci = ceph_inode(inode); | 269 | struct ceph_inode_info *ci = ceph_inode(inode); |
268 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 270 | struct ceph_osd_client *osdc = |
271 | &ceph_inode_to_client(inode)->client->osdc; | ||
269 | int rc = 0; | 272 | int rc = 0; |
270 | struct page **pages; | 273 | struct page **pages; |
271 | loff_t offset; | 274 | loff_t offset; |
@@ -365,7 +368,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
365 | { | 368 | { |
366 | struct inode *inode; | 369 | struct inode *inode; |
367 | struct ceph_inode_info *ci; | 370 | struct ceph_inode_info *ci; |
368 | struct ceph_client *client; | 371 | struct ceph_fs_client *fsc; |
369 | struct ceph_osd_client *osdc; | 372 | struct ceph_osd_client *osdc; |
370 | loff_t page_off = page->index << PAGE_CACHE_SHIFT; | 373 | loff_t page_off = page->index << PAGE_CACHE_SHIFT; |
371 | int len = PAGE_CACHE_SIZE; | 374 | int len = PAGE_CACHE_SIZE; |
@@ -383,8 +386,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
383 | } | 386 | } |
384 | inode = page->mapping->host; | 387 | inode = page->mapping->host; |
385 | ci = ceph_inode(inode); | 388 | ci = ceph_inode(inode); |
386 | client = ceph_inode_to_client(inode); | 389 | fsc = ceph_inode_to_client(inode); |
387 | osdc = &client->osdc; | 390 | osdc = &fsc->client->osdc; |
388 | 391 | ||
389 | /* verify this is a writeable snap context */ | 392 | /* verify this is a writeable snap context */ |
390 | snapc = (void *)page->private; | 393 | snapc = (void *)page->private; |
@@ -414,10 +417,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
414 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", | 417 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", |
415 | inode, page, page->index, page_off, len, snapc); | 418 | inode, page, page->index, page_off, len, snapc); |
416 | 419 | ||
417 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 420 | writeback_stat = atomic_long_inc_return(&fsc->writeback_count); |
418 | if (writeback_stat > | 421 | if (writeback_stat > |
419 | CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) | 422 | CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) |
420 | set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); | 423 | set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); |
421 | 424 | ||
422 | set_page_writeback(page); | 425 | set_page_writeback(page); |
423 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), | 426 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), |
@@ -496,7 +499,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
496 | struct address_space *mapping = inode->i_mapping; | 499 | struct address_space *mapping = inode->i_mapping; |
497 | __s32 rc = -EIO; | 500 | __s32 rc = -EIO; |
498 | u64 bytes = 0; | 501 | u64 bytes = 0; |
499 | struct ceph_client *client = ceph_inode_to_client(inode); | 502 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
500 | long writeback_stat; | 503 | long writeback_stat; |
501 | unsigned issued = ceph_caps_issued(ci); | 504 | unsigned issued = ceph_caps_issued(ci); |
502 | 505 | ||
@@ -529,10 +532,10 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
529 | WARN_ON(!PageUptodate(page)); | 532 | WARN_ON(!PageUptodate(page)); |
530 | 533 | ||
531 | writeback_stat = | 534 | writeback_stat = |
532 | atomic_long_dec_return(&client->writeback_count); | 535 | atomic_long_dec_return(&fsc->writeback_count); |
533 | if (writeback_stat < | 536 | if (writeback_stat < |
534 | CONGESTION_OFF_THRESH(client->mount_args->congestion_kb)) | 537 | CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb)) |
535 | clear_bdi_congested(&client->backing_dev_info, | 538 | clear_bdi_congested(&fsc->backing_dev_info, |
536 | BLK_RW_ASYNC); | 539 | BLK_RW_ASYNC); |
537 | 540 | ||
538 | ceph_put_snap_context((void *)page->private); | 541 | ceph_put_snap_context((void *)page->private); |
@@ -569,13 +572,13 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
569 | * mempool. we avoid the mempool if we can because req->r_num_pages | 572 | * mempool. we avoid the mempool if we can because req->r_num_pages |
570 | * may be less than the maximum write size. | 573 | * may be less than the maximum write size. |
571 | */ | 574 | */ |
572 | static void alloc_page_vec(struct ceph_client *client, | 575 | static void alloc_page_vec(struct ceph_fs_client *fsc, |
573 | struct ceph_osd_request *req) | 576 | struct ceph_osd_request *req) |
574 | { | 577 | { |
575 | req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, | 578 | req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, |
576 | GFP_NOFS); | 579 | GFP_NOFS); |
577 | if (!req->r_pages) { | 580 | if (!req->r_pages) { |
578 | req->r_pages = mempool_alloc(client->wb_pagevec_pool, GFP_NOFS); | 581 | req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS); |
579 | req->r_pages_from_pool = 1; | 582 | req->r_pages_from_pool = 1; |
580 | WARN_ON(!req->r_pages); | 583 | WARN_ON(!req->r_pages); |
581 | } | 584 | } |
@@ -590,7 +593,7 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
590 | struct inode *inode = mapping->host; | 593 | struct inode *inode = mapping->host; |
591 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 594 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
592 | struct ceph_inode_info *ci = ceph_inode(inode); | 595 | struct ceph_inode_info *ci = ceph_inode(inode); |
593 | struct ceph_client *client; | 596 | struct ceph_fs_client *fsc; |
594 | pgoff_t index, start, end; | 597 | pgoff_t index, start, end; |
595 | int range_whole = 0; | 598 | int range_whole = 0; |
596 | int should_loop = 1; | 599 | int should_loop = 1; |
@@ -617,13 +620,13 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
617 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : | 620 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : |
618 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); | 621 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); |
619 | 622 | ||
620 | client = ceph_inode_to_client(inode); | 623 | fsc = ceph_inode_to_client(inode); |
621 | if (client->mount_state == CEPH_MOUNT_SHUTDOWN) { | 624 | if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { |
622 | pr_warning("writepage_start %p on forced umount\n", inode); | 625 | pr_warning("writepage_start %p on forced umount\n", inode); |
623 | return -EIO; /* we're in a forced umount, don't write! */ | 626 | return -EIO; /* we're in a forced umount, don't write! */ |
624 | } | 627 | } |
625 | if (client->mount_args->wsize && client->mount_args->wsize < wsize) | 628 | if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) |
626 | wsize = client->mount_args->wsize; | 629 | wsize = fsc->mount_options->wsize; |
627 | if (wsize < PAGE_CACHE_SIZE) | 630 | if (wsize < PAGE_CACHE_SIZE) |
628 | wsize = PAGE_CACHE_SIZE; | 631 | wsize = PAGE_CACHE_SIZE; |
629 | max_pages_ever = wsize >> PAGE_CACHE_SHIFT; | 632 | max_pages_ever = wsize >> PAGE_CACHE_SHIFT; |
@@ -769,7 +772,7 @@ get_more_pages: | |||
769 | offset = (unsigned long long)page->index | 772 | offset = (unsigned long long)page->index |
770 | << PAGE_CACHE_SHIFT; | 773 | << PAGE_CACHE_SHIFT; |
771 | len = wsize; | 774 | len = wsize; |
772 | req = ceph_osdc_new_request(&client->osdc, | 775 | req = ceph_osdc_new_request(&fsc->client->osdc, |
773 | &ci->i_layout, | 776 | &ci->i_layout, |
774 | ceph_vino(inode), | 777 | ceph_vino(inode), |
775 | offset, &len, | 778 | offset, &len, |
@@ -782,7 +785,7 @@ get_more_pages: | |||
782 | &inode->i_mtime, true, 1); | 785 | &inode->i_mtime, true, 1); |
783 | max_pages = req->r_num_pages; | 786 | max_pages = req->r_num_pages; |
784 | 787 | ||
785 | alloc_page_vec(client, req); | 788 | alloc_page_vec(fsc, req); |
786 | req->r_callback = writepages_finish; | 789 | req->r_callback = writepages_finish; |
787 | req->r_inode = inode; | 790 | req->r_inode = inode; |
788 | } | 791 | } |
@@ -794,10 +797,10 @@ get_more_pages: | |||
794 | inode, page, page->index); | 797 | inode, page, page->index); |
795 | 798 | ||
796 | writeback_stat = | 799 | writeback_stat = |
797 | atomic_long_inc_return(&client->writeback_count); | 800 | atomic_long_inc_return(&fsc->writeback_count); |
798 | if (writeback_stat > CONGESTION_ON_THRESH( | 801 | if (writeback_stat > CONGESTION_ON_THRESH( |
799 | client->mount_args->congestion_kb)) { | 802 | fsc->mount_options->congestion_kb)) { |
800 | set_bdi_congested(&client->backing_dev_info, | 803 | set_bdi_congested(&fsc->backing_dev_info, |
801 | BLK_RW_ASYNC); | 804 | BLK_RW_ASYNC); |
802 | } | 805 | } |
803 | 806 | ||
@@ -846,7 +849,7 @@ get_more_pages: | |||
846 | op->payload_len = cpu_to_le32(len); | 849 | op->payload_len = cpu_to_le32(len); |
847 | req->r_request->hdr.data_len = cpu_to_le32(len); | 850 | req->r_request->hdr.data_len = cpu_to_le32(len); |
848 | 851 | ||
849 | ceph_osdc_start_request(&client->osdc, req, true); | 852 | ceph_osdc_start_request(&fsc->client->osdc, req, true); |
850 | req = NULL; | 853 | req = NULL; |
851 | 854 | ||
852 | /* continue? */ | 855 | /* continue? */ |
@@ -915,7 +918,7 @@ static int ceph_update_writeable_page(struct file *file, | |||
915 | { | 918 | { |
916 | struct inode *inode = file->f_dentry->d_inode; | 919 | struct inode *inode = file->f_dentry->d_inode; |
917 | struct ceph_inode_info *ci = ceph_inode(inode); | 920 | struct ceph_inode_info *ci = ceph_inode(inode); |
918 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 921 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
919 | loff_t page_off = pos & PAGE_CACHE_MASK; | 922 | loff_t page_off = pos & PAGE_CACHE_MASK; |
920 | int pos_in_page = pos & ~PAGE_CACHE_MASK; | 923 | int pos_in_page = pos & ~PAGE_CACHE_MASK; |
921 | int end_in_page = pos_in_page + len; | 924 | int end_in_page = pos_in_page + len; |
@@ -1053,8 +1056,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, | |||
1053 | struct page *page, void *fsdata) | 1056 | struct page *page, void *fsdata) |
1054 | { | 1057 | { |
1055 | struct inode *inode = file->f_dentry->d_inode; | 1058 | struct inode *inode = file->f_dentry->d_inode; |
1056 | struct ceph_client *client = ceph_inode_to_client(inode); | 1059 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
1057 | struct ceph_mds_client *mdsc = &client->mdsc; | 1060 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1058 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | 1061 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
1059 | int check_cap = 0; | 1062 | int check_cap = 0; |
1060 | 1063 | ||
@@ -1123,7 +1126,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1123 | { | 1126 | { |
1124 | struct inode *inode = vma->vm_file->f_dentry->d_inode; | 1127 | struct inode *inode = vma->vm_file->f_dentry->d_inode; |
1125 | struct page *page = vmf->page; | 1128 | struct page *page = vmf->page; |
1126 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 1129 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
1127 | loff_t off = page->index << PAGE_CACHE_SHIFT; | 1130 | loff_t off = page->index << PAGE_CACHE_SHIFT; |
1128 | loff_t size, len; | 1131 | loff_t size, len; |
1129 | int ret; | 1132 | int ret; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 5e9da996a151..98ab13e2b71d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
@@ -9,8 +9,9 @@ | |||
9 | #include <linux/writeback.h> | 9 | #include <linux/writeback.h> |
10 | 10 | ||
11 | #include "super.h" | 11 | #include "super.h" |
12 | #include "decode.h" | 12 | #include "mds_client.h" |
13 | #include "messenger.h" | 13 | #include <linux/ceph/decode.h> |
14 | #include <linux/ceph/messenger.h> | ||
14 | 15 | ||
15 | /* | 16 | /* |
16 | * Capability management | 17 | * Capability management |
@@ -287,11 +288,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) | |||
287 | spin_unlock(&mdsc->caps_list_lock); | 288 | spin_unlock(&mdsc->caps_list_lock); |
288 | } | 289 | } |
289 | 290 | ||
290 | void ceph_reservation_status(struct ceph_client *client, | 291 | void ceph_reservation_status(struct ceph_fs_client *fsc, |
291 | int *total, int *avail, int *used, int *reserved, | 292 | int *total, int *avail, int *used, int *reserved, |
292 | int *min) | 293 | int *min) |
293 | { | 294 | { |
294 | struct ceph_mds_client *mdsc = &client->mdsc; | 295 | struct ceph_mds_client *mdsc = fsc->mdsc; |
295 | 296 | ||
296 | if (total) | 297 | if (total) |
297 | *total = mdsc->caps_total_count; | 298 | *total = mdsc->caps_total_count; |
@@ -399,7 +400,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci, | |||
399 | static void __cap_set_timeouts(struct ceph_mds_client *mdsc, | 400 | static void __cap_set_timeouts(struct ceph_mds_client *mdsc, |
400 | struct ceph_inode_info *ci) | 401 | struct ceph_inode_info *ci) |
401 | { | 402 | { |
402 | struct ceph_mount_args *ma = mdsc->client->mount_args; | 403 | struct ceph_mount_options *ma = mdsc->fsc->mount_options; |
403 | 404 | ||
404 | ci->i_hold_caps_min = round_jiffies(jiffies + | 405 | ci->i_hold_caps_min = round_jiffies(jiffies + |
405 | ma->caps_wanted_delay_min * HZ); | 406 | ma->caps_wanted_delay_min * HZ); |
@@ -515,7 +516,7 @@ int ceph_add_cap(struct inode *inode, | |||
515 | unsigned seq, unsigned mseq, u64 realmino, int flags, | 516 | unsigned seq, unsigned mseq, u64 realmino, int flags, |
516 | struct ceph_cap_reservation *caps_reservation) | 517 | struct ceph_cap_reservation *caps_reservation) |
517 | { | 518 | { |
518 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 519 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
519 | struct ceph_inode_info *ci = ceph_inode(inode); | 520 | struct ceph_inode_info *ci = ceph_inode(inode); |
520 | struct ceph_cap *new_cap = NULL; | 521 | struct ceph_cap *new_cap = NULL; |
521 | struct ceph_cap *cap; | 522 | struct ceph_cap *cap; |
@@ -873,7 +874,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
873 | struct ceph_mds_session *session = cap->session; | 874 | struct ceph_mds_session *session = cap->session; |
874 | struct ceph_inode_info *ci = cap->ci; | 875 | struct ceph_inode_info *ci = cap->ci; |
875 | struct ceph_mds_client *mdsc = | 876 | struct ceph_mds_client *mdsc = |
876 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 877 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
877 | int removed = 0; | 878 | int removed = 0; |
878 | 879 | ||
879 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 880 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); |
@@ -1210,7 +1211,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, | |||
1210 | int mds; | 1211 | int mds; |
1211 | struct ceph_cap_snap *capsnap; | 1212 | struct ceph_cap_snap *capsnap; |
1212 | u32 mseq; | 1213 | u32 mseq; |
1213 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 1214 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
1214 | struct ceph_mds_session *session = NULL; /* if session != NULL, we hold | 1215 | struct ceph_mds_session *session = NULL; /* if session != NULL, we hold |
1215 | session->s_mutex */ | 1216 | session->s_mutex */ |
1216 | u64 next_follows = 0; /* keep track of how far we've gotten through the | 1217 | u64 next_follows = 0; /* keep track of how far we've gotten through the |
@@ -1336,7 +1337,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
1336 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | 1337 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) |
1337 | { | 1338 | { |
1338 | struct ceph_mds_client *mdsc = | 1339 | struct ceph_mds_client *mdsc = |
1339 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 1340 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
1340 | struct inode *inode = &ci->vfs_inode; | 1341 | struct inode *inode = &ci->vfs_inode; |
1341 | int was = ci->i_dirty_caps; | 1342 | int was = ci->i_dirty_caps; |
1342 | int dirty = 0; | 1343 | int dirty = 0; |
@@ -1378,7 +1379,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1378 | static int __mark_caps_flushing(struct inode *inode, | 1379 | static int __mark_caps_flushing(struct inode *inode, |
1379 | struct ceph_mds_session *session) | 1380 | struct ceph_mds_session *session) |
1380 | { | 1381 | { |
1381 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 1382 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
1382 | struct ceph_inode_info *ci = ceph_inode(inode); | 1383 | struct ceph_inode_info *ci = ceph_inode(inode); |
1383 | int flushing; | 1384 | int flushing; |
1384 | 1385 | ||
@@ -1416,17 +1417,6 @@ static int __mark_caps_flushing(struct inode *inode, | |||
1416 | /* | 1417 | /* |
1417 | * try to invalidate mapping pages without blocking. | 1418 | * try to invalidate mapping pages without blocking. |
1418 | */ | 1419 | */ |
1419 | static int mapping_is_empty(struct address_space *mapping) | ||
1420 | { | ||
1421 | struct page *page = find_get_page(mapping, 0); | ||
1422 | |||
1423 | if (!page) | ||
1424 | return 1; | ||
1425 | |||
1426 | put_page(page); | ||
1427 | return 0; | ||
1428 | } | ||
1429 | |||
1430 | static int try_nonblocking_invalidate(struct inode *inode) | 1420 | static int try_nonblocking_invalidate(struct inode *inode) |
1431 | { | 1421 | { |
1432 | struct ceph_inode_info *ci = ceph_inode(inode); | 1422 | struct ceph_inode_info *ci = ceph_inode(inode); |
@@ -1436,7 +1426,7 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1436 | invalidate_mapping_pages(&inode->i_data, 0, -1); | 1426 | invalidate_mapping_pages(&inode->i_data, 0, -1); |
1437 | spin_lock(&inode->i_lock); | 1427 | spin_lock(&inode->i_lock); |
1438 | 1428 | ||
1439 | if (mapping_is_empty(&inode->i_data) && | 1429 | if (inode->i_data.nrpages == 0 && |
1440 | invalidating_gen == ci->i_rdcache_gen) { | 1430 | invalidating_gen == ci->i_rdcache_gen) { |
1441 | /* success. */ | 1431 | /* success. */ |
1442 | dout("try_nonblocking_invalidate %p success\n", inode); | 1432 | dout("try_nonblocking_invalidate %p success\n", inode); |
@@ -1462,8 +1452,8 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1462 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 1452 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
1463 | struct ceph_mds_session *session) | 1453 | struct ceph_mds_session *session) |
1464 | { | 1454 | { |
1465 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 1455 | struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); |
1466 | struct ceph_mds_client *mdsc = &client->mdsc; | 1456 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1467 | struct inode *inode = &ci->vfs_inode; | 1457 | struct inode *inode = &ci->vfs_inode; |
1468 | struct ceph_cap *cap; | 1458 | struct ceph_cap *cap; |
1469 | int file_wanted, used; | 1459 | int file_wanted, used; |
@@ -1533,7 +1523,7 @@ retry_locked: | |||
1533 | */ | 1523 | */ |
1534 | if ((!is_delayed || mdsc->stopping) && | 1524 | if ((!is_delayed || mdsc->stopping) && |
1535 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ | 1525 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ |
1536 | ci->i_rdcache_gen && /* may have cached pages */ | 1526 | inode->i_data.nrpages && /* have cached pages */ |
1537 | (file_wanted == 0 || /* no open files */ | 1527 | (file_wanted == 0 || /* no open files */ |
1538 | (revoking & (CEPH_CAP_FILE_CACHE| | 1528 | (revoking & (CEPH_CAP_FILE_CACHE| |
1539 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ | 1529 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ |
@@ -1706,7 +1696,7 @@ ack: | |||
1706 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | 1696 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, |
1707 | unsigned *flush_tid) | 1697 | unsigned *flush_tid) |
1708 | { | 1698 | { |
1709 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 1699 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
1710 | struct ceph_inode_info *ci = ceph_inode(inode); | 1700 | struct ceph_inode_info *ci = ceph_inode(inode); |
1711 | int unlock_session = session ? 0 : 1; | 1701 | int unlock_session = session ? 0 : 1; |
1712 | int flushing = 0; | 1702 | int flushing = 0; |
@@ -1872,7 +1862,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1872 | caps_are_flushed(inode, flush_tid)); | 1862 | caps_are_flushed(inode, flush_tid)); |
1873 | } else { | 1863 | } else { |
1874 | struct ceph_mds_client *mdsc = | 1864 | struct ceph_mds_client *mdsc = |
1875 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 1865 | ceph_sb_to_client(inode->i_sb)->mdsc; |
1876 | 1866 | ||
1877 | spin_lock(&inode->i_lock); | 1867 | spin_lock(&inode->i_lock); |
1878 | if (__ceph_caps_dirty(ci)) | 1868 | if (__ceph_caps_dirty(ci)) |
@@ -2465,7 +2455,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
2465 | __releases(inode->i_lock) | 2455 | __releases(inode->i_lock) |
2466 | { | 2456 | { |
2467 | struct ceph_inode_info *ci = ceph_inode(inode); | 2457 | struct ceph_inode_info *ci = ceph_inode(inode); |
2468 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 2458 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
2469 | unsigned seq = le32_to_cpu(m->seq); | 2459 | unsigned seq = le32_to_cpu(m->seq); |
2470 | int dirty = le32_to_cpu(m->dirty); | 2460 | int dirty = le32_to_cpu(m->dirty); |
2471 | int cleaned = 0; | 2461 | int cleaned = 0; |
@@ -2713,7 +2703,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2713 | struct ceph_msg *msg) | 2703 | struct ceph_msg *msg) |
2714 | { | 2704 | { |
2715 | struct ceph_mds_client *mdsc = session->s_mdsc; | 2705 | struct ceph_mds_client *mdsc = session->s_mdsc; |
2716 | struct super_block *sb = mdsc->client->sb; | 2706 | struct super_block *sb = mdsc->fsc->sb; |
2717 | struct inode *inode; | 2707 | struct inode *inode; |
2718 | struct ceph_cap *cap; | 2708 | struct ceph_cap *cap; |
2719 | struct ceph_mds_caps *h; | 2709 | struct ceph_mds_caps *h; |
diff --git a/fs/ceph/ceph_frag.c b/fs/ceph/ceph_frag.c index ab6cf35c4091..bdce8b1fbd06 100644 --- a/fs/ceph/ceph_frag.c +++ b/fs/ceph/ceph_frag.c | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * Ceph 'frag' type | 2 | * Ceph 'frag' type |
3 | */ | 3 | */ |
4 | #include "types.h" | 4 | #include <linux/module.h> |
5 | #include <linux/ceph/types.h> | ||
5 | 6 | ||
6 | int ceph_frag_compare(__u32 a, __u32 b) | 7 | int ceph_frag_compare(__u32 a, __u32 b) |
7 | { | 8 | { |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 6fd8b20a8611..7ae1b3d55b58 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/device.h> | 3 | #include <linux/device.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
@@ -7,143 +7,49 @@ | |||
7 | #include <linux/debugfs.h> | 7 | #include <linux/debugfs.h> |
8 | #include <linux/seq_file.h> | 8 | #include <linux/seq_file.h> |
9 | 9 | ||
10 | #include <linux/ceph/libceph.h> | ||
11 | #include <linux/ceph/mon_client.h> | ||
12 | #include <linux/ceph/auth.h> | ||
13 | #include <linux/ceph/debugfs.h> | ||
14 | |||
10 | #include "super.h" | 15 | #include "super.h" |
11 | #include "mds_client.h" | ||
12 | #include "mon_client.h" | ||
13 | #include "auth.h" | ||
14 | 16 | ||
15 | #ifdef CONFIG_DEBUG_FS | 17 | #ifdef CONFIG_DEBUG_FS |
16 | 18 | ||
17 | /* | 19 | #include "mds_client.h" |
18 | * Implement /sys/kernel/debug/ceph fun | ||
19 | * | ||
20 | * /sys/kernel/debug/ceph/client* - an instance of the ceph client | ||
21 | * .../osdmap - current osdmap | ||
22 | * .../mdsmap - current mdsmap | ||
23 | * .../monmap - current monmap | ||
24 | * .../osdc - active osd requests | ||
25 | * .../mdsc - active mds requests | ||
26 | * .../monc - mon client state | ||
27 | * .../dentry_lru - dump contents of dentry lru | ||
28 | * .../caps - expose cap (reservation) stats | ||
29 | * .../bdi - symlink to ../../bdi/something | ||
30 | */ | ||
31 | |||
32 | static struct dentry *ceph_debugfs_dir; | ||
33 | |||
34 | static int monmap_show(struct seq_file *s, void *p) | ||
35 | { | ||
36 | int i; | ||
37 | struct ceph_client *client = s->private; | ||
38 | |||
39 | if (client->monc.monmap == NULL) | ||
40 | return 0; | ||
41 | |||
42 | seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); | ||
43 | for (i = 0; i < client->monc.monmap->num_mon; i++) { | ||
44 | struct ceph_entity_inst *inst = | ||
45 | &client->monc.monmap->mon_inst[i]; | ||
46 | |||
47 | seq_printf(s, "\t%s%lld\t%s\n", | ||
48 | ENTITY_NAME(inst->name), | ||
49 | pr_addr(&inst->addr.in_addr)); | ||
50 | } | ||
51 | return 0; | ||
52 | } | ||
53 | 20 | ||
54 | static int mdsmap_show(struct seq_file *s, void *p) | 21 | static int mdsmap_show(struct seq_file *s, void *p) |
55 | { | 22 | { |
56 | int i; | 23 | int i; |
57 | struct ceph_client *client = s->private; | 24 | struct ceph_fs_client *fsc = s->private; |
58 | 25 | ||
59 | if (client->mdsc.mdsmap == NULL) | 26 | if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL) |
60 | return 0; | 27 | return 0; |
61 | seq_printf(s, "epoch %d\n", client->mdsc.mdsmap->m_epoch); | 28 | seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch); |
62 | seq_printf(s, "root %d\n", client->mdsc.mdsmap->m_root); | 29 | seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root); |
63 | seq_printf(s, "session_timeout %d\n", | 30 | seq_printf(s, "session_timeout %d\n", |
64 | client->mdsc.mdsmap->m_session_timeout); | 31 | fsc->mdsc->mdsmap->m_session_timeout); |
65 | seq_printf(s, "session_autoclose %d\n", | 32 | seq_printf(s, "session_autoclose %d\n", |
66 | client->mdsc.mdsmap->m_session_autoclose); | 33 | fsc->mdsc->mdsmap->m_session_autoclose); |
67 | for (i = 0; i < client->mdsc.mdsmap->m_max_mds; i++) { | 34 | for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) { |
68 | struct ceph_entity_addr *addr = | 35 | struct ceph_entity_addr *addr = |
69 | &client->mdsc.mdsmap->m_info[i].addr; | 36 | &fsc->mdsc->mdsmap->m_info[i].addr; |
70 | int state = client->mdsc.mdsmap->m_info[i].state; | 37 | int state = fsc->mdsc->mdsmap->m_info[i].state; |
71 | 38 | ||
72 | seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, pr_addr(&addr->in_addr), | 39 | seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, |
40 | ceph_pr_addr(&addr->in_addr), | ||
73 | ceph_mds_state_name(state)); | 41 | ceph_mds_state_name(state)); |
74 | } | 42 | } |
75 | return 0; | 43 | return 0; |
76 | } | 44 | } |
77 | 45 | ||
78 | static int osdmap_show(struct seq_file *s, void *p) | 46 | /* |
79 | { | 47 | * mdsc debugfs |
80 | int i; | 48 | */ |
81 | struct ceph_client *client = s->private; | ||
82 | struct rb_node *n; | ||
83 | |||
84 | if (client->osdc.osdmap == NULL) | ||
85 | return 0; | ||
86 | seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); | ||
87 | seq_printf(s, "flags%s%s\n", | ||
88 | (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? | ||
89 | " NEARFULL" : "", | ||
90 | (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? | ||
91 | " FULL" : ""); | ||
92 | for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { | ||
93 | struct ceph_pg_pool_info *pool = | ||
94 | rb_entry(n, struct ceph_pg_pool_info, node); | ||
95 | seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", | ||
96 | pool->id, pool->v.pg_num, pool->pg_num_mask, | ||
97 | pool->v.lpg_num, pool->lpg_num_mask); | ||
98 | } | ||
99 | for (i = 0; i < client->osdc.osdmap->max_osd; i++) { | ||
100 | struct ceph_entity_addr *addr = | ||
101 | &client->osdc.osdmap->osd_addr[i]; | ||
102 | int state = client->osdc.osdmap->osd_state[i]; | ||
103 | char sb[64]; | ||
104 | |||
105 | seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", | ||
106 | i, pr_addr(&addr->in_addr), | ||
107 | ((client->osdc.osdmap->osd_weight[i]*100) >> 16), | ||
108 | ceph_osdmap_state_str(sb, sizeof(sb), state)); | ||
109 | } | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | static int monc_show(struct seq_file *s, void *p) | ||
114 | { | ||
115 | struct ceph_client *client = s->private; | ||
116 | struct ceph_mon_generic_request *req; | ||
117 | struct ceph_mon_client *monc = &client->monc; | ||
118 | struct rb_node *rp; | ||
119 | |||
120 | mutex_lock(&monc->mutex); | ||
121 | |||
122 | if (monc->have_mdsmap) | ||
123 | seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); | ||
124 | if (monc->have_osdmap) | ||
125 | seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); | ||
126 | if (monc->want_next_osdmap) | ||
127 | seq_printf(s, "want next osdmap\n"); | ||
128 | |||
129 | for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { | ||
130 | __u16 op; | ||
131 | req = rb_entry(rp, struct ceph_mon_generic_request, node); | ||
132 | op = le16_to_cpu(req->request->hdr.type); | ||
133 | if (op == CEPH_MSG_STATFS) | ||
134 | seq_printf(s, "%lld statfs\n", req->tid); | ||
135 | else | ||
136 | seq_printf(s, "%lld unknown\n", req->tid); | ||
137 | } | ||
138 | |||
139 | mutex_unlock(&monc->mutex); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int mdsc_show(struct seq_file *s, void *p) | 49 | static int mdsc_show(struct seq_file *s, void *p) |
144 | { | 50 | { |
145 | struct ceph_client *client = s->private; | 51 | struct ceph_fs_client *fsc = s->private; |
146 | struct ceph_mds_client *mdsc = &client->mdsc; | 52 | struct ceph_mds_client *mdsc = fsc->mdsc; |
147 | struct ceph_mds_request *req; | 53 | struct ceph_mds_request *req; |
148 | struct rb_node *rp; | 54 | struct rb_node *rp; |
149 | int pathlen; | 55 | int pathlen; |
@@ -214,61 +120,12 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
214 | return 0; | 120 | return 0; |
215 | } | 121 | } |
216 | 122 | ||
217 | static int osdc_show(struct seq_file *s, void *pp) | ||
218 | { | ||
219 | struct ceph_client *client = s->private; | ||
220 | struct ceph_osd_client *osdc = &client->osdc; | ||
221 | struct rb_node *p; | ||
222 | |||
223 | mutex_lock(&osdc->request_mutex); | ||
224 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | ||
225 | struct ceph_osd_request *req; | ||
226 | struct ceph_osd_request_head *head; | ||
227 | struct ceph_osd_op *op; | ||
228 | int num_ops; | ||
229 | int opcode, olen; | ||
230 | int i; | ||
231 | |||
232 | req = rb_entry(p, struct ceph_osd_request, r_node); | ||
233 | |||
234 | seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, | ||
235 | req->r_osd ? req->r_osd->o_osd : -1, | ||
236 | le32_to_cpu(req->r_pgid.pool), | ||
237 | le16_to_cpu(req->r_pgid.ps)); | ||
238 | |||
239 | head = req->r_request->front.iov_base; | ||
240 | op = (void *)(head + 1); | ||
241 | |||
242 | num_ops = le16_to_cpu(head->num_ops); | ||
243 | olen = le32_to_cpu(head->object_len); | ||
244 | seq_printf(s, "%.*s", olen, | ||
245 | (const char *)(head->ops + num_ops)); | ||
246 | |||
247 | if (req->r_reassert_version.epoch) | ||
248 | seq_printf(s, "\t%u'%llu", | ||
249 | (unsigned)le32_to_cpu(req->r_reassert_version.epoch), | ||
250 | le64_to_cpu(req->r_reassert_version.version)); | ||
251 | else | ||
252 | seq_printf(s, "\t"); | ||
253 | |||
254 | for (i = 0; i < num_ops; i++) { | ||
255 | opcode = le16_to_cpu(op->op); | ||
256 | seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); | ||
257 | op++; | ||
258 | } | ||
259 | |||
260 | seq_printf(s, "\n"); | ||
261 | } | ||
262 | mutex_unlock(&osdc->request_mutex); | ||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | static int caps_show(struct seq_file *s, void *p) | 123 | static int caps_show(struct seq_file *s, void *p) |
267 | { | 124 | { |
268 | struct ceph_client *client = s->private; | 125 | struct ceph_fs_client *fsc = s->private; |
269 | int total, avail, used, reserved, min; | 126 | int total, avail, used, reserved, min; |
270 | 127 | ||
271 | ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); | 128 | ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); |
272 | seq_printf(s, "total\t\t%d\n" | 129 | seq_printf(s, "total\t\t%d\n" |
273 | "avail\t\t%d\n" | 130 | "avail\t\t%d\n" |
274 | "used\t\t%d\n" | 131 | "used\t\t%d\n" |
@@ -280,8 +137,8 @@ static int caps_show(struct seq_file *s, void *p) | |||
280 | 137 | ||
281 | static int dentry_lru_show(struct seq_file *s, void *ptr) | 138 | static int dentry_lru_show(struct seq_file *s, void *ptr) |
282 | { | 139 | { |
283 | struct ceph_client *client = s->private; | 140 | struct ceph_fs_client *fsc = s->private; |
284 | struct ceph_mds_client *mdsc = &client->mdsc; | 141 | struct ceph_mds_client *mdsc = fsc->mdsc; |
285 | struct ceph_dentry_info *di; | 142 | struct ceph_dentry_info *di; |
286 | 143 | ||
287 | spin_lock(&mdsc->dentry_lru_lock); | 144 | spin_lock(&mdsc->dentry_lru_lock); |
@@ -295,199 +152,124 @@ static int dentry_lru_show(struct seq_file *s, void *ptr) | |||
295 | return 0; | 152 | return 0; |
296 | } | 153 | } |
297 | 154 | ||
298 | #define DEFINE_SHOW_FUNC(name) \ | 155 | CEPH_DEFINE_SHOW_FUNC(mdsmap_show) |
299 | static int name##_open(struct inode *inode, struct file *file) \ | 156 | CEPH_DEFINE_SHOW_FUNC(mdsc_show) |
300 | { \ | 157 | CEPH_DEFINE_SHOW_FUNC(caps_show) |
301 | struct seq_file *sf; \ | 158 | CEPH_DEFINE_SHOW_FUNC(dentry_lru_show) |
302 | int ret; \ | 159 | |
303 | \ | ||
304 | ret = single_open(file, name, NULL); \ | ||
305 | sf = file->private_data; \ | ||
306 | sf->private = inode->i_private; \ | ||
307 | return ret; \ | ||
308 | } \ | ||
309 | \ | ||
310 | static const struct file_operations name##_fops = { \ | ||
311 | .open = name##_open, \ | ||
312 | .read = seq_read, \ | ||
313 | .llseek = seq_lseek, \ | ||
314 | .release = single_release, \ | ||
315 | }; | ||
316 | |||
317 | DEFINE_SHOW_FUNC(monmap_show) | ||
318 | DEFINE_SHOW_FUNC(mdsmap_show) | ||
319 | DEFINE_SHOW_FUNC(osdmap_show) | ||
320 | DEFINE_SHOW_FUNC(monc_show) | ||
321 | DEFINE_SHOW_FUNC(mdsc_show) | ||
322 | DEFINE_SHOW_FUNC(osdc_show) | ||
323 | DEFINE_SHOW_FUNC(dentry_lru_show) | ||
324 | DEFINE_SHOW_FUNC(caps_show) | ||
325 | 160 | ||
161 | /* | ||
162 | * debugfs | ||
163 | */ | ||
326 | static int congestion_kb_set(void *data, u64 val) | 164 | static int congestion_kb_set(void *data, u64 val) |
327 | { | 165 | { |
328 | struct ceph_client *client = (struct ceph_client *)data; | 166 | struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; |
329 | |||
330 | if (client) | ||
331 | client->mount_args->congestion_kb = (int)val; | ||
332 | 167 | ||
168 | fsc->mount_options->congestion_kb = (int)val; | ||
333 | return 0; | 169 | return 0; |
334 | } | 170 | } |
335 | 171 | ||
336 | static int congestion_kb_get(void *data, u64 *val) | 172 | static int congestion_kb_get(void *data, u64 *val) |
337 | { | 173 | { |
338 | struct ceph_client *client = (struct ceph_client *)data; | 174 | struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; |
339 | |||
340 | if (client) | ||
341 | *val = (u64)client->mount_args->congestion_kb; | ||
342 | 175 | ||
176 | *val = (u64)fsc->mount_options->congestion_kb; | ||
343 | return 0; | 177 | return 0; |
344 | } | 178 | } |
345 | 179 | ||
346 | |||
347 | DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, | 180 | DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, |
348 | congestion_kb_set, "%llu\n"); | 181 | congestion_kb_set, "%llu\n"); |
349 | 182 | ||
350 | int __init ceph_debugfs_init(void) | ||
351 | { | ||
352 | ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); | ||
353 | if (!ceph_debugfs_dir) | ||
354 | return -ENOMEM; | ||
355 | return 0; | ||
356 | } | ||
357 | 183 | ||
358 | void ceph_debugfs_cleanup(void) | 184 | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) |
359 | { | 185 | { |
360 | debugfs_remove(ceph_debugfs_dir); | 186 | dout("ceph_fs_debugfs_cleanup\n"); |
187 | debugfs_remove(fsc->debugfs_bdi); | ||
188 | debugfs_remove(fsc->debugfs_congestion_kb); | ||
189 | debugfs_remove(fsc->debugfs_mdsmap); | ||
190 | debugfs_remove(fsc->debugfs_caps); | ||
191 | debugfs_remove(fsc->debugfs_mdsc); | ||
192 | debugfs_remove(fsc->debugfs_dentry_lru); | ||
361 | } | 193 | } |
362 | 194 | ||
363 | int ceph_debugfs_client_init(struct ceph_client *client) | 195 | int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) |
364 | { | 196 | { |
365 | int ret = 0; | 197 | char name[100]; |
366 | char name[80]; | 198 | int err = -ENOMEM; |
367 | |||
368 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, | ||
369 | client->monc.auth->global_id); | ||
370 | 199 | ||
371 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | 200 | dout("ceph_fs_debugfs_init\n"); |
372 | if (!client->debugfs_dir) | 201 | fsc->debugfs_congestion_kb = |
373 | goto out; | 202 | debugfs_create_file("writeback_congestion_kb", |
374 | 203 | 0600, | |
375 | client->monc.debugfs_file = debugfs_create_file("monc", | 204 | fsc->client->debugfs_dir, |
376 | 0600, | 205 | fsc, |
377 | client->debugfs_dir, | 206 | &congestion_kb_fops); |
378 | client, | 207 | if (!fsc->debugfs_congestion_kb) |
379 | &monc_show_fops); | ||
380 | if (!client->monc.debugfs_file) | ||
381 | goto out; | 208 | goto out; |
382 | 209 | ||
383 | client->mdsc.debugfs_file = debugfs_create_file("mdsc", | 210 | dout("a\n"); |
384 | 0600, | ||
385 | client->debugfs_dir, | ||
386 | client, | ||
387 | &mdsc_show_fops); | ||
388 | if (!client->mdsc.debugfs_file) | ||
389 | goto out; | ||
390 | 211 | ||
391 | client->osdc.debugfs_file = debugfs_create_file("osdc", | 212 | snprintf(name, sizeof(name), "../../bdi/%s", |
392 | 0600, | 213 | dev_name(fsc->backing_dev_info.dev)); |
393 | client->debugfs_dir, | 214 | fsc->debugfs_bdi = |
394 | client, | 215 | debugfs_create_symlink("bdi", |
395 | &osdc_show_fops); | 216 | fsc->client->debugfs_dir, |
396 | if (!client->osdc.debugfs_file) | 217 | name); |
218 | if (!fsc->debugfs_bdi) | ||
397 | goto out; | 219 | goto out; |
398 | 220 | ||
399 | client->debugfs_monmap = debugfs_create_file("monmap", | 221 | dout("b\n"); |
222 | fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", | ||
400 | 0600, | 223 | 0600, |
401 | client->debugfs_dir, | 224 | fsc->client->debugfs_dir, |
402 | client, | 225 | fsc, |
403 | &monmap_show_fops); | ||
404 | if (!client->debugfs_monmap) | ||
405 | goto out; | ||
406 | |||
407 | client->debugfs_mdsmap = debugfs_create_file("mdsmap", | ||
408 | 0600, | ||
409 | client->debugfs_dir, | ||
410 | client, | ||
411 | &mdsmap_show_fops); | 226 | &mdsmap_show_fops); |
412 | if (!client->debugfs_mdsmap) | 227 | if (!fsc->debugfs_mdsmap) |
413 | goto out; | ||
414 | |||
415 | client->debugfs_osdmap = debugfs_create_file("osdmap", | ||
416 | 0600, | ||
417 | client->debugfs_dir, | ||
418 | client, | ||
419 | &osdmap_show_fops); | ||
420 | if (!client->debugfs_osdmap) | ||
421 | goto out; | 228 | goto out; |
422 | 229 | ||
423 | client->debugfs_dentry_lru = debugfs_create_file("dentry_lru", | 230 | dout("ca\n"); |
424 | 0600, | 231 | fsc->debugfs_mdsc = debugfs_create_file("mdsc", |
425 | client->debugfs_dir, | 232 | 0600, |
426 | client, | 233 | fsc->client->debugfs_dir, |
427 | &dentry_lru_show_fops); | 234 | fsc, |
428 | if (!client->debugfs_dentry_lru) | 235 | &mdsc_show_fops); |
236 | if (!fsc->debugfs_mdsc) | ||
429 | goto out; | 237 | goto out; |
430 | 238 | ||
431 | client->debugfs_caps = debugfs_create_file("caps", | 239 | dout("da\n"); |
240 | fsc->debugfs_caps = debugfs_create_file("caps", | ||
432 | 0400, | 241 | 0400, |
433 | client->debugfs_dir, | 242 | fsc->client->debugfs_dir, |
434 | client, | 243 | fsc, |
435 | &caps_show_fops); | 244 | &caps_show_fops); |
436 | if (!client->debugfs_caps) | 245 | if (!fsc->debugfs_caps) |
437 | goto out; | 246 | goto out; |
438 | 247 | ||
439 | client->debugfs_congestion_kb = | 248 | dout("ea\n"); |
440 | debugfs_create_file("writeback_congestion_kb", | 249 | fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", |
441 | 0600, | 250 | 0600, |
442 | client->debugfs_dir, | 251 | fsc->client->debugfs_dir, |
443 | client, | 252 | fsc, |
444 | &congestion_kb_fops); | 253 | &dentry_lru_show_fops); |
445 | if (!client->debugfs_congestion_kb) | 254 | if (!fsc->debugfs_dentry_lru) |
446 | goto out; | 255 | goto out; |
447 | 256 | ||
448 | sprintf(name, "../../bdi/%s", dev_name(client->sb->s_bdi->dev)); | ||
449 | client->debugfs_bdi = debugfs_create_symlink("bdi", client->debugfs_dir, | ||
450 | name); | ||
451 | |||
452 | return 0; | 257 | return 0; |
453 | 258 | ||
454 | out: | 259 | out: |
455 | ceph_debugfs_client_cleanup(client); | 260 | ceph_fs_debugfs_cleanup(fsc); |
456 | return ret; | 261 | return err; |
457 | } | 262 | } |
458 | 263 | ||
459 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||
460 | { | ||
461 | debugfs_remove(client->debugfs_bdi); | ||
462 | debugfs_remove(client->debugfs_caps); | ||
463 | debugfs_remove(client->debugfs_dentry_lru); | ||
464 | debugfs_remove(client->debugfs_osdmap); | ||
465 | debugfs_remove(client->debugfs_mdsmap); | ||
466 | debugfs_remove(client->debugfs_monmap); | ||
467 | debugfs_remove(client->osdc.debugfs_file); | ||
468 | debugfs_remove(client->mdsc.debugfs_file); | ||
469 | debugfs_remove(client->monc.debugfs_file); | ||
470 | debugfs_remove(client->debugfs_congestion_kb); | ||
471 | debugfs_remove(client->debugfs_dir); | ||
472 | } | ||
473 | 264 | ||
474 | #else /* CONFIG_DEBUG_FS */ | 265 | #else /* CONFIG_DEBUG_FS */ |
475 | 266 | ||
476 | int __init ceph_debugfs_init(void) | 267 | int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) |
477 | { | ||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | void ceph_debugfs_cleanup(void) | ||
482 | { | ||
483 | } | ||
484 | |||
485 | int ceph_debugfs_client_init(struct ceph_client *client) | ||
486 | { | 268 | { |
487 | return 0; | 269 | return 0; |
488 | } | 270 | } |
489 | 271 | ||
490 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | 272 | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) |
491 | { | 273 | { |
492 | } | 274 | } |
493 | 275 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index a1986eb52045..e0a2dc6fcafc 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/spinlock.h> | 3 | #include <linux/spinlock.h> |
4 | #include <linux/fs_struct.h> | 4 | #include <linux/fs_struct.h> |
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | 8 | ||
9 | #include "super.h" | 9 | #include "super.h" |
10 | #include "mds_client.h" | ||
10 | 11 | ||
11 | /* | 12 | /* |
12 | * Directory operations: readdir, lookup, create, link, unlink, | 13 | * Directory operations: readdir, lookup, create, link, unlink, |
@@ -94,10 +95,7 @@ static unsigned fpos_off(loff_t p) | |||
94 | */ | 95 | */ |
95 | static int __dcache_readdir(struct file *filp, | 96 | static int __dcache_readdir(struct file *filp, |
96 | void *dirent, filldir_t filldir) | 97 | void *dirent, filldir_t filldir) |
97 | __releases(inode->i_lock) | ||
98 | __acquires(inode->i_lock) | ||
99 | { | 98 | { |
100 | struct inode *inode = filp->f_dentry->d_inode; | ||
101 | struct ceph_file_info *fi = filp->private_data; | 99 | struct ceph_file_info *fi = filp->private_data; |
102 | struct dentry *parent = filp->f_dentry; | 100 | struct dentry *parent = filp->f_dentry; |
103 | struct inode *dir = parent->d_inode; | 101 | struct inode *dir = parent->d_inode; |
@@ -153,7 +151,6 @@ more: | |||
153 | 151 | ||
154 | atomic_inc(&dentry->d_count); | 152 | atomic_inc(&dentry->d_count); |
155 | spin_unlock(&dcache_lock); | 153 | spin_unlock(&dcache_lock); |
156 | spin_unlock(&inode->i_lock); | ||
157 | 154 | ||
158 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, | 155 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, |
159 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 156 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); |
@@ -171,35 +168,30 @@ more: | |||
171 | } else { | 168 | } else { |
172 | dput(last); | 169 | dput(last); |
173 | } | 170 | } |
174 | last = NULL; | ||
175 | } | 171 | } |
176 | |||
177 | spin_lock(&inode->i_lock); | ||
178 | spin_lock(&dcache_lock); | ||
179 | |||
180 | last = dentry; | 172 | last = dentry; |
181 | 173 | ||
182 | if (err < 0) | 174 | if (err < 0) |
183 | goto out_unlock; | 175 | goto out; |
184 | 176 | ||
185 | p = p->prev; | ||
186 | filp->f_pos++; | 177 | filp->f_pos++; |
187 | 178 | ||
188 | /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ | 179 | /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ |
189 | if ((ceph_inode(dir)->i_ceph_flags & CEPH_I_COMPLETE)) | 180 | if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { |
190 | goto more; | 181 | dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); |
191 | dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); | 182 | err = -EAGAIN; |
192 | err = -EAGAIN; | 183 | goto out; |
184 | } | ||
185 | |||
186 | spin_lock(&dcache_lock); | ||
187 | p = p->prev; /* advance to next dentry */ | ||
188 | goto more; | ||
193 | 189 | ||
194 | out_unlock: | 190 | out_unlock: |
195 | spin_unlock(&dcache_lock); | 191 | spin_unlock(&dcache_lock); |
196 | 192 | out: | |
197 | if (last) { | 193 | if (last) |
198 | spin_unlock(&inode->i_lock); | ||
199 | dput(last); | 194 | dput(last); |
200 | spin_lock(&inode->i_lock); | ||
201 | } | ||
202 | |||
203 | return err; | 195 | return err; |
204 | } | 196 | } |
205 | 197 | ||
@@ -227,15 +219,15 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
227 | struct ceph_file_info *fi = filp->private_data; | 219 | struct ceph_file_info *fi = filp->private_data; |
228 | struct inode *inode = filp->f_dentry->d_inode; | 220 | struct inode *inode = filp->f_dentry->d_inode; |
229 | struct ceph_inode_info *ci = ceph_inode(inode); | 221 | struct ceph_inode_info *ci = ceph_inode(inode); |
230 | struct ceph_client *client = ceph_inode_to_client(inode); | 222 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
231 | struct ceph_mds_client *mdsc = &client->mdsc; | 223 | struct ceph_mds_client *mdsc = fsc->mdsc; |
232 | unsigned frag = fpos_frag(filp->f_pos); | 224 | unsigned frag = fpos_frag(filp->f_pos); |
233 | int off = fpos_off(filp->f_pos); | 225 | int off = fpos_off(filp->f_pos); |
234 | int err; | 226 | int err; |
235 | u32 ftype; | 227 | u32 ftype; |
236 | struct ceph_mds_reply_info_parsed *rinfo; | 228 | struct ceph_mds_reply_info_parsed *rinfo; |
237 | const int max_entries = client->mount_args->max_readdir; | 229 | const int max_entries = fsc->mount_options->max_readdir; |
238 | const int max_bytes = client->mount_args->max_readdir_bytes; | 230 | const int max_bytes = fsc->mount_options->max_readdir_bytes; |
239 | 231 | ||
240 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 232 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); |
241 | if (fi->at_end) | 233 | if (fi->at_end) |
@@ -267,17 +259,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
267 | /* can we use the dcache? */ | 259 | /* can we use the dcache? */ |
268 | spin_lock(&inode->i_lock); | 260 | spin_lock(&inode->i_lock); |
269 | if ((filp->f_pos == 2 || fi->dentry) && | 261 | if ((filp->f_pos == 2 || fi->dentry) && |
270 | !ceph_test_opt(client, NOASYNCREADDIR) && | 262 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && |
271 | ceph_snap(inode) != CEPH_SNAPDIR && | 263 | ceph_snap(inode) != CEPH_SNAPDIR && |
272 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 264 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
273 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { | 265 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { |
266 | spin_unlock(&inode->i_lock); | ||
274 | err = __dcache_readdir(filp, dirent, filldir); | 267 | err = __dcache_readdir(filp, dirent, filldir); |
275 | if (err != -EAGAIN) { | 268 | if (err != -EAGAIN) |
276 | spin_unlock(&inode->i_lock); | ||
277 | return err; | 269 | return err; |
278 | } | 270 | } else { |
271 | spin_unlock(&inode->i_lock); | ||
279 | } | 272 | } |
280 | spin_unlock(&inode->i_lock); | ||
281 | if (fi->dentry) { | 273 | if (fi->dentry) { |
282 | err = note_last_dentry(fi, fi->dentry->d_name.name, | 274 | err = note_last_dentry(fi, fi->dentry->d_name.name, |
283 | fi->dentry->d_name.len); | 275 | fi->dentry->d_name.len); |
@@ -487,14 +479,13 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) | |||
487 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | 479 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, |
488 | struct dentry *dentry, int err) | 480 | struct dentry *dentry, int err) |
489 | { | 481 | { |
490 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 482 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
491 | struct inode *parent = dentry->d_parent->d_inode; | 483 | struct inode *parent = dentry->d_parent->d_inode; |
492 | 484 | ||
493 | /* .snap dir? */ | 485 | /* .snap dir? */ |
494 | if (err == -ENOENT && | 486 | if (err == -ENOENT && |
495 | ceph_vino(parent).ino != CEPH_INO_ROOT && /* no .snap in root dir */ | ||
496 | strcmp(dentry->d_name.name, | 487 | strcmp(dentry->d_name.name, |
497 | client->mount_args->snapdir_name) == 0) { | 488 | fsc->mount_options->snapdir_name) == 0) { |
498 | struct inode *inode = ceph_get_snapdir(parent); | 489 | struct inode *inode = ceph_get_snapdir(parent); |
499 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", | 490 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", |
500 | dentry, dentry->d_name.len, dentry->d_name.name, inode); | 491 | dentry, dentry->d_name.len, dentry->d_name.name, inode); |
@@ -539,8 +530,8 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) | |||
539 | static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | 530 | static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, |
540 | struct nameidata *nd) | 531 | struct nameidata *nd) |
541 | { | 532 | { |
542 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 533 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
543 | struct ceph_mds_client *mdsc = &client->mdsc; | 534 | struct ceph_mds_client *mdsc = fsc->mdsc; |
544 | struct ceph_mds_request *req; | 535 | struct ceph_mds_request *req; |
545 | int op; | 536 | int op; |
546 | int err; | 537 | int err; |
@@ -572,7 +563,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
572 | spin_lock(&dir->i_lock); | 563 | spin_lock(&dir->i_lock); |
573 | dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); | 564 | dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); |
574 | if (strncmp(dentry->d_name.name, | 565 | if (strncmp(dentry->d_name.name, |
575 | client->mount_args->snapdir_name, | 566 | fsc->mount_options->snapdir_name, |
576 | dentry->d_name.len) && | 567 | dentry->d_name.len) && |
577 | !is_root_ceph_dentry(dir, dentry) && | 568 | !is_root_ceph_dentry(dir, dentry) && |
578 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 569 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
@@ -629,8 +620,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) | |||
629 | static int ceph_mknod(struct inode *dir, struct dentry *dentry, | 620 | static int ceph_mknod(struct inode *dir, struct dentry *dentry, |
630 | int mode, dev_t rdev) | 621 | int mode, dev_t rdev) |
631 | { | 622 | { |
632 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 623 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
633 | struct ceph_mds_client *mdsc = &client->mdsc; | 624 | struct ceph_mds_client *mdsc = fsc->mdsc; |
634 | struct ceph_mds_request *req; | 625 | struct ceph_mds_request *req; |
635 | int err; | 626 | int err; |
636 | 627 | ||
@@ -685,8 +676,8 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, int mode, | |||
685 | static int ceph_symlink(struct inode *dir, struct dentry *dentry, | 676 | static int ceph_symlink(struct inode *dir, struct dentry *dentry, |
686 | const char *dest) | 677 | const char *dest) |
687 | { | 678 | { |
688 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 679 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
689 | struct ceph_mds_client *mdsc = &client->mdsc; | 680 | struct ceph_mds_client *mdsc = fsc->mdsc; |
690 | struct ceph_mds_request *req; | 681 | struct ceph_mds_request *req; |
691 | int err; | 682 | int err; |
692 | 683 | ||
@@ -716,8 +707,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, | |||
716 | 707 | ||
717 | static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) | 708 | static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
718 | { | 709 | { |
719 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 710 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
720 | struct ceph_mds_client *mdsc = &client->mdsc; | 711 | struct ceph_mds_client *mdsc = fsc->mdsc; |
721 | struct ceph_mds_request *req; | 712 | struct ceph_mds_request *req; |
722 | int err = -EROFS; | 713 | int err = -EROFS; |
723 | int op; | 714 | int op; |
@@ -758,8 +749,8 @@ out: | |||
758 | static int ceph_link(struct dentry *old_dentry, struct inode *dir, | 749 | static int ceph_link(struct dentry *old_dentry, struct inode *dir, |
759 | struct dentry *dentry) | 750 | struct dentry *dentry) |
760 | { | 751 | { |
761 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 752 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
762 | struct ceph_mds_client *mdsc = &client->mdsc; | 753 | struct ceph_mds_client *mdsc = fsc->mdsc; |
763 | struct ceph_mds_request *req; | 754 | struct ceph_mds_request *req; |
764 | int err; | 755 | int err; |
765 | 756 | ||
@@ -813,8 +804,8 @@ static int drop_caps_for_unlink(struct inode *inode) | |||
813 | */ | 804 | */ |
814 | static int ceph_unlink(struct inode *dir, struct dentry *dentry) | 805 | static int ceph_unlink(struct inode *dir, struct dentry *dentry) |
815 | { | 806 | { |
816 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 807 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
817 | struct ceph_mds_client *mdsc = &client->mdsc; | 808 | struct ceph_mds_client *mdsc = fsc->mdsc; |
818 | struct inode *inode = dentry->d_inode; | 809 | struct inode *inode = dentry->d_inode; |
819 | struct ceph_mds_request *req; | 810 | struct ceph_mds_request *req; |
820 | int err = -EROFS; | 811 | int err = -EROFS; |
@@ -854,8 +845,8 @@ out: | |||
854 | static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | 845 | static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, |
855 | struct inode *new_dir, struct dentry *new_dentry) | 846 | struct inode *new_dir, struct dentry *new_dentry) |
856 | { | 847 | { |
857 | struct ceph_client *client = ceph_sb_to_client(old_dir->i_sb); | 848 | struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); |
858 | struct ceph_mds_client *mdsc = &client->mdsc; | 849 | struct ceph_mds_client *mdsc = fsc->mdsc; |
859 | struct ceph_mds_request *req; | 850 | struct ceph_mds_request *req; |
860 | int err; | 851 | int err; |
861 | 852 | ||
@@ -1076,7 +1067,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
1076 | struct ceph_inode_info *ci = ceph_inode(inode); | 1067 | struct ceph_inode_info *ci = ceph_inode(inode); |
1077 | int left; | 1068 | int left; |
1078 | 1069 | ||
1079 | if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) | 1070 | if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
1080 | return -EISDIR; | 1071 | return -EISDIR; |
1081 | 1072 | ||
1082 | if (!cf->dir_info) { | 1073 | if (!cf->dir_info) { |
@@ -1177,7 +1168,7 @@ void ceph_dentry_lru_add(struct dentry *dn) | |||
1177 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, | 1168 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, |
1178 | dn->d_name.len, dn->d_name.name); | 1169 | dn->d_name.len, dn->d_name.name); |
1179 | if (di) { | 1170 | if (di) { |
1180 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1171 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
1181 | spin_lock(&mdsc->dentry_lru_lock); | 1172 | spin_lock(&mdsc->dentry_lru_lock); |
1182 | list_add_tail(&di->lru, &mdsc->dentry_lru); | 1173 | list_add_tail(&di->lru, &mdsc->dentry_lru); |
1183 | mdsc->num_dentry++; | 1174 | mdsc->num_dentry++; |
@@ -1193,7 +1184,7 @@ void ceph_dentry_lru_touch(struct dentry *dn) | |||
1193 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, | 1184 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, |
1194 | dn->d_name.len, dn->d_name.name, di->offset); | 1185 | dn->d_name.len, dn->d_name.name, di->offset); |
1195 | if (di) { | 1186 | if (di) { |
1196 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1187 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
1197 | spin_lock(&mdsc->dentry_lru_lock); | 1188 | spin_lock(&mdsc->dentry_lru_lock); |
1198 | list_move_tail(&di->lru, &mdsc->dentry_lru); | 1189 | list_move_tail(&di->lru, &mdsc->dentry_lru); |
1199 | spin_unlock(&mdsc->dentry_lru_lock); | 1190 | spin_unlock(&mdsc->dentry_lru_lock); |
@@ -1208,7 +1199,7 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||
1208 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, | 1199 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, |
1209 | dn->d_name.len, dn->d_name.name); | 1200 | dn->d_name.len, dn->d_name.name); |
1210 | if (di) { | 1201 | if (di) { |
1211 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1202 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
1212 | spin_lock(&mdsc->dentry_lru_lock); | 1203 | spin_lock(&mdsc->dentry_lru_lock); |
1213 | list_del_init(&di->lru); | 1204 | list_del_init(&di->lru); |
1214 | mdsc->num_dentry--; | 1205 | mdsc->num_dentry--; |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index e38423e82f2e..2297d9426992 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -1,10 +1,11 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/exportfs.h> | 3 | #include <linux/exportfs.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | #include <asm/unaligned.h> | 5 | #include <asm/unaligned.h> |
6 | 6 | ||
7 | #include "super.h" | 7 | #include "super.h" |
8 | #include "mds_client.h" | ||
8 | 9 | ||
9 | /* | 10 | /* |
10 | * NFS export support | 11 | * NFS export support |
@@ -120,7 +121,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
120 | static struct dentry *__cfh_to_dentry(struct super_block *sb, | 121 | static struct dentry *__cfh_to_dentry(struct super_block *sb, |
121 | struct ceph_nfs_confh *cfh) | 122 | struct ceph_nfs_confh *cfh) |
122 | { | 123 | { |
123 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc; | 124 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; |
124 | struct inode *inode; | 125 | struct inode *inode; |
125 | struct dentry *dentry; | 126 | struct dentry *dentry; |
126 | struct ceph_vino vino; | 127 | struct ceph_vino vino; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 66e4da6dba22..e77c28cf3690 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/module.h> | ||
3 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
5 | #include <linux/file.h> | 6 | #include <linux/file.h> |
@@ -38,8 +39,8 @@ | |||
38 | static struct ceph_mds_request * | 39 | static struct ceph_mds_request * |
39 | prepare_open_request(struct super_block *sb, int flags, int create_mode) | 40 | prepare_open_request(struct super_block *sb, int flags, int create_mode) |
40 | { | 41 | { |
41 | struct ceph_client *client = ceph_sb_to_client(sb); | 42 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
42 | struct ceph_mds_client *mdsc = &client->mdsc; | 43 | struct ceph_mds_client *mdsc = fsc->mdsc; |
43 | struct ceph_mds_request *req; | 44 | struct ceph_mds_request *req; |
44 | int want_auth = USE_ANY_MDS; | 45 | int want_auth = USE_ANY_MDS; |
45 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; | 46 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; |
@@ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) | |||
117 | int ceph_open(struct inode *inode, struct file *file) | 118 | int ceph_open(struct inode *inode, struct file *file) |
118 | { | 119 | { |
119 | struct ceph_inode_info *ci = ceph_inode(inode); | 120 | struct ceph_inode_info *ci = ceph_inode(inode); |
120 | struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 121 | struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); |
121 | struct ceph_mds_client *mdsc = &client->mdsc; | 122 | struct ceph_mds_client *mdsc = fsc->mdsc; |
122 | struct ceph_mds_request *req; | 123 | struct ceph_mds_request *req; |
123 | struct ceph_file_info *cf = file->private_data; | 124 | struct ceph_file_info *cf = file->private_data; |
124 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 125 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; |
@@ -216,8 +217,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | |||
216 | struct nameidata *nd, int mode, | 217 | struct nameidata *nd, int mode, |
217 | int locked_dir) | 218 | int locked_dir) |
218 | { | 219 | { |
219 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 220 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
220 | struct ceph_mds_client *mdsc = &client->mdsc; | 221 | struct ceph_mds_client *mdsc = fsc->mdsc; |
221 | struct file *file = nd->intent.open.file; | 222 | struct file *file = nd->intent.open.file; |
222 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); | 223 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); |
223 | struct ceph_mds_request *req; | 224 | struct ceph_mds_request *req; |
@@ -270,163 +271,6 @@ int ceph_release(struct inode *inode, struct file *file) | |||
270 | } | 271 | } |
271 | 272 | ||
272 | /* | 273 | /* |
273 | * build a vector of user pages | ||
274 | */ | ||
275 | static struct page **get_direct_page_vector(const char __user *data, | ||
276 | int num_pages, | ||
277 | loff_t off, size_t len) | ||
278 | { | ||
279 | struct page **pages; | ||
280 | int rc; | ||
281 | |||
282 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | ||
283 | if (!pages) | ||
284 | return ERR_PTR(-ENOMEM); | ||
285 | |||
286 | down_read(¤t->mm->mmap_sem); | ||
287 | rc = get_user_pages(current, current->mm, (unsigned long)data, | ||
288 | num_pages, 0, 0, pages, NULL); | ||
289 | up_read(¤t->mm->mmap_sem); | ||
290 | if (rc < 0) | ||
291 | goto fail; | ||
292 | return pages; | ||
293 | |||
294 | fail: | ||
295 | kfree(pages); | ||
296 | return ERR_PTR(rc); | ||
297 | } | ||
298 | |||
299 | static void put_page_vector(struct page **pages, int num_pages) | ||
300 | { | ||
301 | int i; | ||
302 | |||
303 | for (i = 0; i < num_pages; i++) | ||
304 | put_page(pages[i]); | ||
305 | kfree(pages); | ||
306 | } | ||
307 | |||
308 | void ceph_release_page_vector(struct page **pages, int num_pages) | ||
309 | { | ||
310 | int i; | ||
311 | |||
312 | for (i = 0; i < num_pages; i++) | ||
313 | __free_pages(pages[i], 0); | ||
314 | kfree(pages); | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * allocate a vector new pages | ||
319 | */ | ||
320 | static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | ||
321 | { | ||
322 | struct page **pages; | ||
323 | int i; | ||
324 | |||
325 | pages = kmalloc(sizeof(*pages) * num_pages, flags); | ||
326 | if (!pages) | ||
327 | return ERR_PTR(-ENOMEM); | ||
328 | for (i = 0; i < num_pages; i++) { | ||
329 | pages[i] = __page_cache_alloc(flags); | ||
330 | if (pages[i] == NULL) { | ||
331 | ceph_release_page_vector(pages, i); | ||
332 | return ERR_PTR(-ENOMEM); | ||
333 | } | ||
334 | } | ||
335 | return pages; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * copy user data into a page vector | ||
340 | */ | ||
341 | static int copy_user_to_page_vector(struct page **pages, | ||
342 | const char __user *data, | ||
343 | loff_t off, size_t len) | ||
344 | { | ||
345 | int i = 0; | ||
346 | int po = off & ~PAGE_CACHE_MASK; | ||
347 | int left = len; | ||
348 | int l, bad; | ||
349 | |||
350 | while (left > 0) { | ||
351 | l = min_t(int, PAGE_CACHE_SIZE-po, left); | ||
352 | bad = copy_from_user(page_address(pages[i]) + po, data, l); | ||
353 | if (bad == l) | ||
354 | return -EFAULT; | ||
355 | data += l - bad; | ||
356 | left -= l - bad; | ||
357 | po += l - bad; | ||
358 | if (po == PAGE_CACHE_SIZE) { | ||
359 | po = 0; | ||
360 | i++; | ||
361 | } | ||
362 | } | ||
363 | return len; | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * copy user data from a page vector into a user pointer | ||
368 | */ | ||
369 | static int copy_page_vector_to_user(struct page **pages, char __user *data, | ||
370 | loff_t off, size_t len) | ||
371 | { | ||
372 | int i = 0; | ||
373 | int po = off & ~PAGE_CACHE_MASK; | ||
374 | int left = len; | ||
375 | int l, bad; | ||
376 | |||
377 | while (left > 0) { | ||
378 | l = min_t(int, left, PAGE_CACHE_SIZE-po); | ||
379 | bad = copy_to_user(data, page_address(pages[i]) + po, l); | ||
380 | if (bad == l) | ||
381 | return -EFAULT; | ||
382 | data += l - bad; | ||
383 | left -= l - bad; | ||
384 | if (po) { | ||
385 | po += l - bad; | ||
386 | if (po == PAGE_CACHE_SIZE) | ||
387 | po = 0; | ||
388 | } | ||
389 | i++; | ||
390 | } | ||
391 | return len; | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * Zero an extent within a page vector. Offset is relative to the | ||
396 | * start of the first page. | ||
397 | */ | ||
398 | static void zero_page_vector_range(int off, int len, struct page **pages) | ||
399 | { | ||
400 | int i = off >> PAGE_CACHE_SHIFT; | ||
401 | |||
402 | off &= ~PAGE_CACHE_MASK; | ||
403 | |||
404 | dout("zero_page_vector_page %u~%u\n", off, len); | ||
405 | |||
406 | /* leading partial page? */ | ||
407 | if (off) { | ||
408 | int end = min((int)PAGE_CACHE_SIZE, off + len); | ||
409 | dout("zeroing %d %p head from %d\n", i, pages[i], | ||
410 | (int)off); | ||
411 | zero_user_segment(pages[i], off, end); | ||
412 | len -= (end - off); | ||
413 | i++; | ||
414 | } | ||
415 | while (len >= PAGE_CACHE_SIZE) { | ||
416 | dout("zeroing %d %p len=%d\n", i, pages[i], len); | ||
417 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||
418 | len -= PAGE_CACHE_SIZE; | ||
419 | i++; | ||
420 | } | ||
421 | /* trailing partial page? */ | ||
422 | if (len) { | ||
423 | dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); | ||
424 | zero_user_segment(pages[i], 0, len); | ||
425 | } | ||
426 | } | ||
427 | |||
428 | |||
429 | /* | ||
430 | * Read a range of bytes striped over one or more objects. Iterate over | 274 | * Read a range of bytes striped over one or more objects. Iterate over |
431 | * objects we stripe over. (That's not atomic, but good enough for now.) | 275 | * objects we stripe over. (That's not atomic, but good enough for now.) |
432 | * | 276 | * |
@@ -438,7 +282,7 @@ static int striped_read(struct inode *inode, | |||
438 | struct page **pages, int num_pages, | 282 | struct page **pages, int num_pages, |
439 | int *checkeof) | 283 | int *checkeof) |
440 | { | 284 | { |
441 | struct ceph_client *client = ceph_inode_to_client(inode); | 285 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
442 | struct ceph_inode_info *ci = ceph_inode(inode); | 286 | struct ceph_inode_info *ci = ceph_inode(inode); |
443 | u64 pos, this_len; | 287 | u64 pos, this_len; |
444 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 288 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ |
@@ -459,7 +303,7 @@ static int striped_read(struct inode *inode, | |||
459 | 303 | ||
460 | more: | 304 | more: |
461 | this_len = left; | 305 | this_len = left; |
462 | ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode), | 306 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
463 | &ci->i_layout, pos, &this_len, | 307 | &ci->i_layout, pos, &this_len, |
464 | ci->i_truncate_seq, | 308 | ci->i_truncate_seq, |
465 | ci->i_truncate_size, | 309 | ci->i_truncate_size, |
@@ -477,8 +321,8 @@ more: | |||
477 | 321 | ||
478 | if (read < pos - off) { | 322 | if (read < pos - off) { |
479 | dout(" zero gap %llu to %llu\n", off + read, pos); | 323 | dout(" zero gap %llu to %llu\n", off + read, pos); |
480 | zero_page_vector_range(page_off + read, | 324 | ceph_zero_page_vector_range(page_off + read, |
481 | pos - off - read, pages); | 325 | pos - off - read, pages); |
482 | } | 326 | } |
483 | pos += ret; | 327 | pos += ret; |
484 | read = pos - off; | 328 | read = pos - off; |
@@ -495,8 +339,8 @@ more: | |||
495 | /* was original extent fully inside i_size? */ | 339 | /* was original extent fully inside i_size? */ |
496 | if (pos + left <= inode->i_size) { | 340 | if (pos + left <= inode->i_size) { |
497 | dout("zero tail\n"); | 341 | dout("zero tail\n"); |
498 | zero_page_vector_range(page_off + read, len - read, | 342 | ceph_zero_page_vector_range(page_off + read, len - read, |
499 | pages); | 343 | pages); |
500 | read = len; | 344 | read = len; |
501 | goto out; | 345 | goto out; |
502 | } | 346 | } |
@@ -531,7 +375,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
531 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 375 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
532 | 376 | ||
533 | if (file->f_flags & O_DIRECT) { | 377 | if (file->f_flags & O_DIRECT) { |
534 | pages = get_direct_page_vector(data, num_pages, off, len); | 378 | pages = ceph_get_direct_page_vector(data, num_pages, off, len); |
535 | 379 | ||
536 | /* | 380 | /* |
537 | * flush any page cache pages in this range. this | 381 | * flush any page cache pages in this range. this |
@@ -552,13 +396,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
552 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 396 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); |
553 | 397 | ||
554 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 398 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
555 | ret = copy_page_vector_to_user(pages, data, off, ret); | 399 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
556 | if (ret >= 0) | 400 | if (ret >= 0) |
557 | *poff = off + ret; | 401 | *poff = off + ret; |
558 | 402 | ||
559 | done: | 403 | done: |
560 | if (file->f_flags & O_DIRECT) | 404 | if (file->f_flags & O_DIRECT) |
561 | put_page_vector(pages, num_pages); | 405 | ceph_put_page_vector(pages, num_pages); |
562 | else | 406 | else |
563 | ceph_release_page_vector(pages, num_pages); | 407 | ceph_release_page_vector(pages, num_pages); |
564 | dout("sync_read result %d\n", ret); | 408 | dout("sync_read result %d\n", ret); |
@@ -594,7 +438,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
594 | { | 438 | { |
595 | struct inode *inode = file->f_dentry->d_inode; | 439 | struct inode *inode = file->f_dentry->d_inode; |
596 | struct ceph_inode_info *ci = ceph_inode(inode); | 440 | struct ceph_inode_info *ci = ceph_inode(inode); |
597 | struct ceph_client *client = ceph_inode_to_client(inode); | 441 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
598 | struct ceph_osd_request *req; | 442 | struct ceph_osd_request *req; |
599 | struct page **pages; | 443 | struct page **pages; |
600 | int num_pages; | 444 | int num_pages; |
@@ -642,7 +486,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
642 | */ | 486 | */ |
643 | more: | 487 | more: |
644 | len = left; | 488 | len = left; |
645 | req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, | 489 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
646 | ceph_vino(inode), pos, &len, | 490 | ceph_vino(inode), pos, &len, |
647 | CEPH_OSD_OP_WRITE, flags, | 491 | CEPH_OSD_OP_WRITE, flags, |
648 | ci->i_snap_realm->cached_context, | 492 | ci->i_snap_realm->cached_context, |
@@ -655,7 +499,7 @@ more: | |||
655 | num_pages = calc_pages_for(pos, len); | 499 | num_pages = calc_pages_for(pos, len); |
656 | 500 | ||
657 | if (file->f_flags & O_DIRECT) { | 501 | if (file->f_flags & O_DIRECT) { |
658 | pages = get_direct_page_vector(data, num_pages, pos, len); | 502 | pages = ceph_get_direct_page_vector(data, num_pages, pos, len); |
659 | if (IS_ERR(pages)) { | 503 | if (IS_ERR(pages)) { |
660 | ret = PTR_ERR(pages); | 504 | ret = PTR_ERR(pages); |
661 | goto out; | 505 | goto out; |
@@ -673,7 +517,7 @@ more: | |||
673 | ret = PTR_ERR(pages); | 517 | ret = PTR_ERR(pages); |
674 | goto out; | 518 | goto out; |
675 | } | 519 | } |
676 | ret = copy_user_to_page_vector(pages, data, pos, len); | 520 | ret = ceph_copy_user_to_page_vector(pages, data, pos, len); |
677 | if (ret < 0) { | 521 | if (ret < 0) { |
678 | ceph_release_page_vector(pages, num_pages); | 522 | ceph_release_page_vector(pages, num_pages); |
679 | goto out; | 523 | goto out; |
@@ -689,7 +533,7 @@ more: | |||
689 | req->r_num_pages = num_pages; | 533 | req->r_num_pages = num_pages; |
690 | req->r_inode = inode; | 534 | req->r_inode = inode; |
691 | 535 | ||
692 | ret = ceph_osdc_start_request(&client->osdc, req, false); | 536 | ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); |
693 | if (!ret) { | 537 | if (!ret) { |
694 | if (req->r_safe_callback) { | 538 | if (req->r_safe_callback) { |
695 | /* | 539 | /* |
@@ -701,11 +545,11 @@ more: | |||
701 | spin_unlock(&ci->i_unsafe_lock); | 545 | spin_unlock(&ci->i_unsafe_lock); |
702 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); | 546 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); |
703 | } | 547 | } |
704 | ret = ceph_osdc_wait_request(&client->osdc, req); | 548 | ret = ceph_osdc_wait_request(&fsc->client->osdc, req); |
705 | } | 549 | } |
706 | 550 | ||
707 | if (file->f_flags & O_DIRECT) | 551 | if (file->f_flags & O_DIRECT) |
708 | put_page_vector(pages, num_pages); | 552 | ceph_put_page_vector(pages, num_pages); |
709 | else if (file->f_flags & O_SYNC) | 553 | else if (file->f_flags & O_SYNC) |
710 | ceph_release_page_vector(pages, num_pages); | 554 | ceph_release_page_vector(pages, num_pages); |
711 | 555 | ||
@@ -814,7 +658,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
814 | struct ceph_file_info *fi = file->private_data; | 658 | struct ceph_file_info *fi = file->private_data; |
815 | struct inode *inode = file->f_dentry->d_inode; | 659 | struct inode *inode = file->f_dentry->d_inode; |
816 | struct ceph_inode_info *ci = ceph_inode(inode); | 660 | struct ceph_inode_info *ci = ceph_inode(inode); |
817 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 661 | struct ceph_osd_client *osdc = |
662 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | ||
818 | loff_t endoff = pos + iov->iov_len; | 663 | loff_t endoff = pos + iov->iov_len; |
819 | int want, got = 0; | 664 | int want, got = 0; |
820 | int ret, err; | 665 | int ret, err; |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 62377ec37edf..1d6a45b5a04c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
@@ -13,7 +13,8 @@ | |||
13 | #include <linux/pagevec.h> | 13 | #include <linux/pagevec.h> |
14 | 14 | ||
15 | #include "super.h" | 15 | #include "super.h" |
16 | #include "decode.h" | 16 | #include "mds_client.h" |
17 | #include <linux/ceph/decode.h> | ||
17 | 18 | ||
18 | /* | 19 | /* |
19 | * Ceph inode operations | 20 | * Ceph inode operations |
@@ -384,7 +385,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
384 | */ | 385 | */ |
385 | if (ci->i_snap_realm) { | 386 | if (ci->i_snap_realm) { |
386 | struct ceph_mds_client *mdsc = | 387 | struct ceph_mds_client *mdsc = |
387 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 388 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
388 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 389 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
389 | 390 | ||
390 | dout(" dropping residual ref to snap realm %p\n", realm); | 391 | dout(" dropping residual ref to snap realm %p\n", realm); |
@@ -685,7 +686,7 @@ static int fill_inode(struct inode *inode, | |||
685 | } | 686 | } |
686 | 687 | ||
687 | /* it may be better to set st_size in getattr instead? */ | 688 | /* it may be better to set st_size in getattr instead? */ |
688 | if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) | 689 | if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) |
689 | inode->i_size = ci->i_rbytes; | 690 | inode->i_size = ci->i_rbytes; |
690 | break; | 691 | break; |
691 | default: | 692 | default: |
@@ -901,7 +902,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
901 | struct inode *in = NULL; | 902 | struct inode *in = NULL; |
902 | struct ceph_mds_reply_inode *ininfo; | 903 | struct ceph_mds_reply_inode *ininfo; |
903 | struct ceph_vino vino; | 904 | struct ceph_vino vino; |
904 | struct ceph_client *client = ceph_sb_to_client(sb); | 905 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
905 | int i = 0; | 906 | int i = 0; |
906 | int err = 0; | 907 | int err = 0; |
907 | 908 | ||
@@ -965,7 +966,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
965 | */ | 966 | */ |
966 | if (rinfo->head->is_dentry && !req->r_aborted && | 967 | if (rinfo->head->is_dentry && !req->r_aborted && |
967 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, | 968 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, |
968 | client->mount_args->snapdir_name, | 969 | fsc->mount_options->snapdir_name, |
969 | req->r_dentry->d_name.len))) { | 970 | req->r_dentry->d_name.len))) { |
970 | /* | 971 | /* |
971 | * lookup link rename : null -> possibly existing inode | 972 | * lookup link rename : null -> possibly existing inode |
@@ -1533,7 +1534,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1533 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1534 | struct inode *parent_inode = dentry->d_parent->d_inode; |
1534 | const unsigned int ia_valid = attr->ia_valid; | 1535 | const unsigned int ia_valid = attr->ia_valid; |
1535 | struct ceph_mds_request *req; | 1536 | struct ceph_mds_request *req; |
1536 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; | 1537 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; |
1537 | int issued; | 1538 | int issued; |
1538 | int release = 0, dirtied = 0; | 1539 | int release = 0, dirtied = 0; |
1539 | int mask = 0; | 1540 | int mask = 0; |
@@ -1728,8 +1729,8 @@ out: | |||
1728 | */ | 1729 | */ |
1729 | int ceph_do_getattr(struct inode *inode, int mask) | 1730 | int ceph_do_getattr(struct inode *inode, int mask) |
1730 | { | 1731 | { |
1731 | struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 1732 | struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); |
1732 | struct ceph_mds_client *mdsc = &client->mdsc; | 1733 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1733 | struct ceph_mds_request *req; | 1734 | struct ceph_mds_request *req; |
1734 | int err; | 1735 | int err; |
1735 | 1736 | ||
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 76e307d2aba1..8888c9ba68db 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -1,8 +1,10 @@ | |||
1 | #include <linux/in.h> | 1 | #include <linux/in.h> |
2 | 2 | ||
3 | #include "ioctl.h" | ||
4 | #include "super.h" | 3 | #include "super.h" |
5 | #include "ceph_debug.h" | 4 | #include "mds_client.h" |
5 | #include <linux/ceph/ceph_debug.h> | ||
6 | |||
7 | #include "ioctl.h" | ||
6 | 8 | ||
7 | 9 | ||
8 | /* | 10 | /* |
@@ -37,7 +39,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
37 | { | 39 | { |
38 | struct inode *inode = file->f_dentry->d_inode; | 40 | struct inode *inode = file->f_dentry->d_inode; |
39 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 41 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; |
40 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 42 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
41 | struct ceph_mds_request *req; | 43 | struct ceph_mds_request *req; |
42 | struct ceph_ioctl_layout l; | 44 | struct ceph_ioctl_layout l; |
43 | int err, i; | 45 | int err, i; |
@@ -90,6 +92,68 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
90 | } | 92 | } |
91 | 93 | ||
92 | /* | 94 | /* |
95 | * Set a layout policy on a directory inode. All items in the tree | ||
96 | * rooted at this inode will inherit this layout on creation, | ||
97 | * (It doesn't apply retroactively ) | ||
98 | * unless a subdirectory has its own layout policy. | ||
99 | */ | ||
100 | static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) | ||
101 | { | ||
102 | struct inode *inode = file->f_dentry->d_inode; | ||
103 | struct ceph_mds_request *req; | ||
104 | struct ceph_ioctl_layout l; | ||
105 | int err, i; | ||
106 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | ||
107 | |||
108 | /* copy and validate */ | ||
109 | if (copy_from_user(&l, arg, sizeof(l))) | ||
110 | return -EFAULT; | ||
111 | |||
112 | if ((l.object_size & ~PAGE_MASK) || | ||
113 | (l.stripe_unit & ~PAGE_MASK) || | ||
114 | !l.stripe_unit || | ||
115 | (l.object_size && | ||
116 | (unsigned)l.object_size % (unsigned)l.stripe_unit)) | ||
117 | return -EINVAL; | ||
118 | |||
119 | /* make sure it's a valid data pool */ | ||
120 | if (l.data_pool > 0) { | ||
121 | mutex_lock(&mdsc->mutex); | ||
122 | err = -EINVAL; | ||
123 | for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++) | ||
124 | if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) { | ||
125 | err = 0; | ||
126 | break; | ||
127 | } | ||
128 | mutex_unlock(&mdsc->mutex); | ||
129 | if (err) | ||
130 | return err; | ||
131 | } | ||
132 | |||
133 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT, | ||
134 | USE_AUTH_MDS); | ||
135 | |||
136 | if (IS_ERR(req)) | ||
137 | return PTR_ERR(req); | ||
138 | req->r_inode = igrab(inode); | ||
139 | |||
140 | req->r_args.setlayout.layout.fl_stripe_unit = | ||
141 | cpu_to_le32(l.stripe_unit); | ||
142 | req->r_args.setlayout.layout.fl_stripe_count = | ||
143 | cpu_to_le32(l.stripe_count); | ||
144 | req->r_args.setlayout.layout.fl_object_size = | ||
145 | cpu_to_le32(l.object_size); | ||
146 | req->r_args.setlayout.layout.fl_pg_pool = | ||
147 | cpu_to_le32(l.data_pool); | ||
148 | req->r_args.setlayout.layout.fl_pg_preferred = | ||
149 | cpu_to_le32(l.preferred_osd); | ||
150 | |||
151 | err = ceph_mdsc_do_request(mdsc, inode, req); | ||
152 | ceph_mdsc_put_request(req); | ||
153 | return err; | ||
154 | } | ||
155 | |||
156 | /* | ||
93 | * Return object name, size/offset information, and location (OSD | 157 | * Return object name, size/offset information, and location (OSD |
94 | * number, network address) for a given file offset. | 158 | * number, network address) for a given file offset. |
95 | */ | 159 | */ |
@@ -98,7 +162,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
98 | struct ceph_ioctl_dataloc dl; | 162 | struct ceph_ioctl_dataloc dl; |
99 | struct inode *inode = file->f_dentry->d_inode; | 163 | struct inode *inode = file->f_dentry->d_inode; |
100 | struct ceph_inode_info *ci = ceph_inode(inode); | 164 | struct ceph_inode_info *ci = ceph_inode(inode); |
101 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 165 | struct ceph_osd_client *osdc = |
166 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | ||
102 | u64 len = 1, olen; | 167 | u64 len = 1, olen; |
103 | u64 tmp; | 168 | u64 tmp; |
104 | struct ceph_object_layout ol; | 169 | struct ceph_object_layout ol; |
@@ -174,11 +239,15 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
174 | case CEPH_IOC_SET_LAYOUT: | 239 | case CEPH_IOC_SET_LAYOUT: |
175 | return ceph_ioctl_set_layout(file, (void __user *)arg); | 240 | return ceph_ioctl_set_layout(file, (void __user *)arg); |
176 | 241 | ||
242 | case CEPH_IOC_SET_LAYOUT_POLICY: | ||
243 | return ceph_ioctl_set_layout_policy(file, (void __user *)arg); | ||
244 | |||
177 | case CEPH_IOC_GET_DATALOC: | 245 | case CEPH_IOC_GET_DATALOC: |
178 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); | 246 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); |
179 | 247 | ||
180 | case CEPH_IOC_LAZYIO: | 248 | case CEPH_IOC_LAZYIO: |
181 | return ceph_ioctl_lazyio(file); | 249 | return ceph_ioctl_lazyio(file); |
182 | } | 250 | } |
251 | |||
183 | return -ENOTTY; | 252 | return -ENOTTY; |
184 | } | 253 | } |
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index 88451a3b6857..a6ce54e94eb5 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
@@ -4,7 +4,7 @@ | |||
4 | #include <linux/ioctl.h> | 4 | #include <linux/ioctl.h> |
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | 6 | ||
7 | #define CEPH_IOCTL_MAGIC 0x97 | 7 | #define CEPH_IOCTL_MAGIC 0x98 |
8 | 8 | ||
9 | /* just use u64 to align sanely on all archs */ | 9 | /* just use u64 to align sanely on all archs */ |
10 | struct ceph_ioctl_layout { | 10 | struct ceph_ioctl_layout { |
@@ -17,6 +17,8 @@ struct ceph_ioctl_layout { | |||
17 | struct ceph_ioctl_layout) | 17 | struct ceph_ioctl_layout) |
18 | #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2, \ | 18 | #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2, \ |
19 | struct ceph_ioctl_layout) | 19 | struct ceph_ioctl_layout) |
20 | #define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5, \ | ||
21 | struct ceph_ioctl_layout) | ||
20 | 22 | ||
21 | /* | 23 | /* |
22 | * Extract identity, address of the OSD and object storing a given | 24 | * Extract identity, address of the OSD and object storing a given |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ff4e753aae92..40abde93c345 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -1,11 +1,11 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/file.h> | 3 | #include <linux/file.h> |
4 | #include <linux/namei.h> | 4 | #include <linux/namei.h> |
5 | 5 | ||
6 | #include "super.h" | 6 | #include "super.h" |
7 | #include "mds_client.h" | 7 | #include "mds_client.h" |
8 | #include "pagelist.h" | 8 | #include <linux/ceph/pagelist.h> |
9 | 9 | ||
10 | /** | 10 | /** |
11 | * Implement fcntl and flock locking functions. | 11 | * Implement fcntl and flock locking functions. |
@@ -16,7 +16,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
16 | { | 16 | { |
17 | struct inode *inode = file->f_dentry->d_inode; | 17 | struct inode *inode = file->f_dentry->d_inode; |
18 | struct ceph_mds_client *mdsc = | 18 | struct ceph_mds_client *mdsc = |
19 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 19 | ceph_sb_to_client(inode->i_sb)->mdsc; |
20 | struct ceph_mds_request *req; | 20 | struct ceph_mds_request *req; |
21 | int err; | 21 | int err; |
22 | 22 | ||
@@ -181,8 +181,9 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | |||
181 | * Encode the flock and fcntl locks for the given inode into the pagelist. | 181 | * Encode the flock and fcntl locks for the given inode into the pagelist. |
182 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, | 182 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, |
183 | * sequential flock locks. | 183 | * sequential flock locks. |
184 | * Must be called with BLK already held, and the lock numbers should have | 184 | * Must be called with lock_flocks() already held. |
185 | * been gathered under the same lock holding window. | 185 | * If we encounter more of a specific lock type than expected, |
186 | * we return the value 1. | ||
186 | */ | 187 | */ |
187 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | 188 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, |
188 | int num_fcntl_locks, int num_flock_locks) | 189 | int num_fcntl_locks, int num_flock_locks) |
@@ -190,6 +191,8 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
190 | struct file_lock *lock; | 191 | struct file_lock *lock; |
191 | struct ceph_filelock cephlock; | 192 | struct ceph_filelock cephlock; |
192 | int err = 0; | 193 | int err = 0; |
194 | int seen_fcntl = 0; | ||
195 | int seen_flock = 0; | ||
193 | 196 | ||
194 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, | 197 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, |
195 | num_fcntl_locks); | 198 | num_fcntl_locks); |
@@ -198,6 +201,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
198 | goto fail; | 201 | goto fail; |
199 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 202 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
200 | if (lock->fl_flags & FL_POSIX) { | 203 | if (lock->fl_flags & FL_POSIX) { |
204 | ++seen_fcntl; | ||
205 | if (seen_fcntl > num_fcntl_locks) { | ||
206 | err = -ENOSPC; | ||
207 | goto fail; | ||
208 | } | ||
201 | err = lock_to_ceph_filelock(lock, &cephlock); | 209 | err = lock_to_ceph_filelock(lock, &cephlock); |
202 | if (err) | 210 | if (err) |
203 | goto fail; | 211 | goto fail; |
@@ -213,6 +221,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
213 | goto fail; | 221 | goto fail; |
214 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 222 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
215 | if (lock->fl_flags & FL_FLOCK) { | 223 | if (lock->fl_flags & FL_FLOCK) { |
224 | ++seen_flock; | ||
225 | if (seen_flock > num_flock_locks) { | ||
226 | err = -ENOSPC; | ||
227 | goto fail; | ||
228 | } | ||
216 | err = lock_to_ceph_filelock(lock, &cephlock); | 229 | err = lock_to_ceph_filelock(lock, &cephlock); |
217 | if (err) | 230 | if (err) |
218 | goto fail; | 231 | goto fail; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index fad95f8f2608..3142b15940c2 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -1,17 +1,21 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/fs.h> | ||
3 | #include <linux/wait.h> | 4 | #include <linux/wait.h> |
4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
5 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/debugfs.h> | ||
8 | #include <linux/seq_file.h> | ||
6 | #include <linux/smp_lock.h> | 9 | #include <linux/smp_lock.h> |
7 | 10 | ||
8 | #include "mds_client.h" | ||
9 | #include "mon_client.h" | ||
10 | #include "super.h" | 11 | #include "super.h" |
11 | #include "messenger.h" | 12 | #include "mds_client.h" |
12 | #include "decode.h" | 13 | |
13 | #include "auth.h" | 14 | #include <linux/ceph/messenger.h> |
14 | #include "pagelist.h" | 15 | #include <linux/ceph/decode.h> |
16 | #include <linux/ceph/pagelist.h> | ||
17 | #include <linux/ceph/auth.h> | ||
18 | #include <linux/ceph/debugfs.h> | ||
15 | 19 | ||
16 | /* | 20 | /* |
17 | * A cluster of MDS (metadata server) daemons is responsible for | 21 | * A cluster of MDS (metadata server) daemons is responsible for |
@@ -286,8 +290,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s) | |||
286 | atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); | 290 | atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); |
287 | if (atomic_dec_and_test(&s->s_ref)) { | 291 | if (atomic_dec_and_test(&s->s_ref)) { |
288 | if (s->s_authorizer) | 292 | if (s->s_authorizer) |
289 | s->s_mdsc->client->monc.auth->ops->destroy_authorizer( | 293 | s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer( |
290 | s->s_mdsc->client->monc.auth, s->s_authorizer); | 294 | s->s_mdsc->fsc->client->monc.auth, |
295 | s->s_authorizer); | ||
291 | kfree(s); | 296 | kfree(s); |
292 | } | 297 | } |
293 | } | 298 | } |
@@ -344,7 +349,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
344 | s->s_seq = 0; | 349 | s->s_seq = 0; |
345 | mutex_init(&s->s_mutex); | 350 | mutex_init(&s->s_mutex); |
346 | 351 | ||
347 | ceph_con_init(mdsc->client->msgr, &s->s_con); | 352 | ceph_con_init(mdsc->fsc->client->msgr, &s->s_con); |
348 | s->s_con.private = s; | 353 | s->s_con.private = s; |
349 | s->s_con.ops = &mds_con_ops; | 354 | s->s_con.ops = &mds_con_ops; |
350 | s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; | 355 | s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; |
@@ -599,7 +604,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
599 | } else if (req->r_dentry) { | 604 | } else if (req->r_dentry) { |
600 | struct inode *dir = req->r_dentry->d_parent->d_inode; | 605 | struct inode *dir = req->r_dentry->d_parent->d_inode; |
601 | 606 | ||
602 | if (dir->i_sb != mdsc->client->sb) { | 607 | if (dir->i_sb != mdsc->fsc->sb) { |
603 | /* not this fs! */ | 608 | /* not this fs! */ |
604 | inode = req->r_dentry->d_inode; | 609 | inode = req->r_dentry->d_inode; |
605 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { | 610 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { |
@@ -884,7 +889,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
884 | __ceph_remove_cap(cap); | 889 | __ceph_remove_cap(cap); |
885 | if (!__ceph_is_any_real_caps(ci)) { | 890 | if (!__ceph_is_any_real_caps(ci)) { |
886 | struct ceph_mds_client *mdsc = | 891 | struct ceph_mds_client *mdsc = |
887 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 892 | ceph_sb_to_client(inode->i_sb)->mdsc; |
888 | 893 | ||
889 | spin_lock(&mdsc->cap_dirty_lock); | 894 | spin_lock(&mdsc->cap_dirty_lock); |
890 | if (!list_empty(&ci->i_dirty_item)) { | 895 | if (!list_empty(&ci->i_dirty_item)) { |
@@ -1146,7 +1151,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
1146 | struct ceph_msg *msg, *partial = NULL; | 1151 | struct ceph_msg *msg, *partial = NULL; |
1147 | struct ceph_mds_cap_release *head; | 1152 | struct ceph_mds_cap_release *head; |
1148 | int err = -ENOMEM; | 1153 | int err = -ENOMEM; |
1149 | int extra = mdsc->client->mount_args->cap_release_safety; | 1154 | int extra = mdsc->fsc->mount_options->cap_release_safety; |
1150 | int num; | 1155 | int num; |
1151 | 1156 | ||
1152 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, | 1157 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, |
@@ -2085,7 +2090,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2085 | 2090 | ||
2086 | /* insert trace into our cache */ | 2091 | /* insert trace into our cache */ |
2087 | mutex_lock(&req->r_fill_mutex); | 2092 | mutex_lock(&req->r_fill_mutex); |
2088 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); | 2093 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); |
2089 | if (err == 0) { | 2094 | if (err == 0) { |
2090 | if (result == 0 && rinfo->dir_nr) | 2095 | if (result == 0 && rinfo->dir_nr) |
2091 | ceph_readdir_prepopulate(req, req->r_session); | 2096 | ceph_readdir_prepopulate(req, req->r_session); |
@@ -2361,19 +2366,35 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2361 | 2366 | ||
2362 | if (recon_state->flock) { | 2367 | if (recon_state->flock) { |
2363 | int num_fcntl_locks, num_flock_locks; | 2368 | int num_fcntl_locks, num_flock_locks; |
2364 | 2369 | struct ceph_pagelist_cursor trunc_point; | |
2365 | lock_kernel(); | 2370 | |
2366 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | 2371 | ceph_pagelist_set_cursor(pagelist, &trunc_point); |
2367 | rec.v2.flock_len = (2*sizeof(u32) + | 2372 | do { |
2368 | (num_fcntl_locks+num_flock_locks) * | 2373 | lock_flocks(); |
2369 | sizeof(struct ceph_filelock)); | 2374 | ceph_count_locks(inode, &num_fcntl_locks, |
2370 | 2375 | &num_flock_locks); | |
2371 | err = ceph_pagelist_append(pagelist, &rec, reclen); | 2376 | rec.v2.flock_len = (2*sizeof(u32) + |
2372 | if (!err) | 2377 | (num_fcntl_locks+num_flock_locks) * |
2373 | err = ceph_encode_locks(inode, pagelist, | 2378 | sizeof(struct ceph_filelock)); |
2374 | num_fcntl_locks, | 2379 | unlock_flocks(); |
2375 | num_flock_locks); | 2380 | |
2376 | unlock_kernel(); | 2381 | /* pre-alloc pagelist */ |
2382 | ceph_pagelist_truncate(pagelist, &trunc_point); | ||
2383 | err = ceph_pagelist_append(pagelist, &rec, reclen); | ||
2384 | if (!err) | ||
2385 | err = ceph_pagelist_reserve(pagelist, | ||
2386 | rec.v2.flock_len); | ||
2387 | |||
2388 | /* encode locks */ | ||
2389 | if (!err) { | ||
2390 | lock_flocks(); | ||
2391 | err = ceph_encode_locks(inode, | ||
2392 | pagelist, | ||
2393 | num_fcntl_locks, | ||
2394 | num_flock_locks); | ||
2395 | unlock_flocks(); | ||
2396 | } | ||
2397 | } while (err == -ENOSPC); | ||
2377 | } else { | 2398 | } else { |
2378 | err = ceph_pagelist_append(pagelist, &rec, reclen); | 2399 | err = ceph_pagelist_append(pagelist, &rec, reclen); |
2379 | } | 2400 | } |
@@ -2613,7 +2634,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
2613 | struct ceph_mds_session *session, | 2634 | struct ceph_mds_session *session, |
2614 | struct ceph_msg *msg) | 2635 | struct ceph_msg *msg) |
2615 | { | 2636 | { |
2616 | struct super_block *sb = mdsc->client->sb; | 2637 | struct super_block *sb = mdsc->fsc->sb; |
2617 | struct inode *inode; | 2638 | struct inode *inode; |
2618 | struct ceph_inode_info *ci; | 2639 | struct ceph_inode_info *ci; |
2619 | struct dentry *parent, *dentry; | 2640 | struct dentry *parent, *dentry; |
@@ -2891,10 +2912,16 @@ static void delayed_work(struct work_struct *work) | |||
2891 | schedule_delayed(mdsc); | 2912 | schedule_delayed(mdsc); |
2892 | } | 2913 | } |
2893 | 2914 | ||
2915 | int ceph_mdsc_init(struct ceph_fs_client *fsc) | ||
2894 | 2916 | ||
2895 | int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | ||
2896 | { | 2917 | { |
2897 | mdsc->client = client; | 2918 | struct ceph_mds_client *mdsc; |
2919 | |||
2920 | mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS); | ||
2921 | if (!mdsc) | ||
2922 | return -ENOMEM; | ||
2923 | mdsc->fsc = fsc; | ||
2924 | fsc->mdsc = mdsc; | ||
2898 | mutex_init(&mdsc->mutex); | 2925 | mutex_init(&mdsc->mutex); |
2899 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | 2926 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); |
2900 | if (mdsc->mdsmap == NULL) | 2927 | if (mdsc->mdsmap == NULL) |
@@ -2927,7 +2954,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2927 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2954 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
2928 | 2955 | ||
2929 | ceph_caps_init(mdsc); | 2956 | ceph_caps_init(mdsc); |
2930 | ceph_adjust_min_caps(mdsc, client->min_caps); | 2957 | ceph_adjust_min_caps(mdsc, fsc->min_caps); |
2931 | 2958 | ||
2932 | return 0; | 2959 | return 0; |
2933 | } | 2960 | } |
@@ -2939,7 +2966,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2939 | static void wait_requests(struct ceph_mds_client *mdsc) | 2966 | static void wait_requests(struct ceph_mds_client *mdsc) |
2940 | { | 2967 | { |
2941 | struct ceph_mds_request *req; | 2968 | struct ceph_mds_request *req; |
2942 | struct ceph_client *client = mdsc->client; | 2969 | struct ceph_fs_client *fsc = mdsc->fsc; |
2943 | 2970 | ||
2944 | mutex_lock(&mdsc->mutex); | 2971 | mutex_lock(&mdsc->mutex); |
2945 | if (__get_oldest_req(mdsc)) { | 2972 | if (__get_oldest_req(mdsc)) { |
@@ -2947,7 +2974,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) | |||
2947 | 2974 | ||
2948 | dout("wait_requests waiting for requests\n"); | 2975 | dout("wait_requests waiting for requests\n"); |
2949 | wait_for_completion_timeout(&mdsc->safe_umount_waiters, | 2976 | wait_for_completion_timeout(&mdsc->safe_umount_waiters, |
2950 | client->mount_args->mount_timeout * HZ); | 2977 | fsc->client->options->mount_timeout * HZ); |
2951 | 2978 | ||
2952 | /* tear down remaining requests */ | 2979 | /* tear down remaining requests */ |
2953 | mutex_lock(&mdsc->mutex); | 2980 | mutex_lock(&mdsc->mutex); |
@@ -3030,7 +3057,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
3030 | { | 3057 | { |
3031 | u64 want_tid, want_flush; | 3058 | u64 want_tid, want_flush; |
3032 | 3059 | ||
3033 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | 3060 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) |
3034 | return; | 3061 | return; |
3035 | 3062 | ||
3036 | dout("sync\n"); | 3063 | dout("sync\n"); |
@@ -3053,7 +3080,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc) | |||
3053 | { | 3080 | { |
3054 | int i, n = 0; | 3081 | int i, n = 0; |
3055 | 3082 | ||
3056 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | 3083 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) |
3057 | return true; | 3084 | return true; |
3058 | 3085 | ||
3059 | mutex_lock(&mdsc->mutex); | 3086 | mutex_lock(&mdsc->mutex); |
@@ -3071,8 +3098,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
3071 | { | 3098 | { |
3072 | struct ceph_mds_session *session; | 3099 | struct ceph_mds_session *session; |
3073 | int i; | 3100 | int i; |
3074 | struct ceph_client *client = mdsc->client; | 3101 | struct ceph_fs_client *fsc = mdsc->fsc; |
3075 | unsigned long timeout = client->mount_args->mount_timeout * HZ; | 3102 | unsigned long timeout = fsc->client->options->mount_timeout * HZ; |
3076 | 3103 | ||
3077 | dout("close_sessions\n"); | 3104 | dout("close_sessions\n"); |
3078 | 3105 | ||
@@ -3119,7 +3146,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
3119 | dout("stopped\n"); | 3146 | dout("stopped\n"); |
3120 | } | 3147 | } |
3121 | 3148 | ||
3122 | void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | 3149 | static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) |
3123 | { | 3150 | { |
3124 | dout("stop\n"); | 3151 | dout("stop\n"); |
3125 | cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ | 3152 | cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ |
@@ -3129,6 +3156,15 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | |||
3129 | ceph_caps_finalize(mdsc); | 3156 | ceph_caps_finalize(mdsc); |
3130 | } | 3157 | } |
3131 | 3158 | ||
3159 | void ceph_mdsc_destroy(struct ceph_fs_client *fsc) | ||
3160 | { | ||
3161 | struct ceph_mds_client *mdsc = fsc->mdsc; | ||
3162 | |||
3163 | ceph_mdsc_stop(mdsc); | ||
3164 | fsc->mdsc = NULL; | ||
3165 | kfree(mdsc); | ||
3166 | } | ||
3167 | |||
3132 | 3168 | ||
3133 | /* | 3169 | /* |
3134 | * handle mds map update. | 3170 | * handle mds map update. |
@@ -3145,14 +3181,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) | |||
3145 | 3181 | ||
3146 | ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); | 3182 | ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); |
3147 | ceph_decode_copy(&p, &fsid, sizeof(fsid)); | 3183 | ceph_decode_copy(&p, &fsid, sizeof(fsid)); |
3148 | if (ceph_check_fsid(mdsc->client, &fsid) < 0) | 3184 | if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0) |
3149 | return; | 3185 | return; |
3150 | epoch = ceph_decode_32(&p); | 3186 | epoch = ceph_decode_32(&p); |
3151 | maplen = ceph_decode_32(&p); | 3187 | maplen = ceph_decode_32(&p); |
3152 | dout("handle_map epoch %u len %d\n", epoch, (int)maplen); | 3188 | dout("handle_map epoch %u len %d\n", epoch, (int)maplen); |
3153 | 3189 | ||
3154 | /* do we need it? */ | 3190 | /* do we need it? */ |
3155 | ceph_monc_got_mdsmap(&mdsc->client->monc, epoch); | 3191 | ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch); |
3156 | mutex_lock(&mdsc->mutex); | 3192 | mutex_lock(&mdsc->mutex); |
3157 | if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { | 3193 | if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { |
3158 | dout("handle_map epoch %u <= our %u\n", | 3194 | dout("handle_map epoch %u <= our %u\n", |
@@ -3176,7 +3212,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) | |||
3176 | } else { | 3212 | } else { |
3177 | mdsc->mdsmap = newmap; /* first mds map */ | 3213 | mdsc->mdsmap = newmap; /* first mds map */ |
3178 | } | 3214 | } |
3179 | mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; | 3215 | mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; |
3180 | 3216 | ||
3181 | __wake_requests(mdsc, &mdsc->waiting_for_map); | 3217 | __wake_requests(mdsc, &mdsc->waiting_for_map); |
3182 | 3218 | ||
@@ -3277,7 +3313,7 @@ static int get_authorizer(struct ceph_connection *con, | |||
3277 | { | 3313 | { |
3278 | struct ceph_mds_session *s = con->private; | 3314 | struct ceph_mds_session *s = con->private; |
3279 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3315 | struct ceph_mds_client *mdsc = s->s_mdsc; |
3280 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3316 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
3281 | int ret = 0; | 3317 | int ret = 0; |
3282 | 3318 | ||
3283 | if (force_new && s->s_authorizer) { | 3319 | if (force_new && s->s_authorizer) { |
@@ -3311,7 +3347,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len) | |||
3311 | { | 3347 | { |
3312 | struct ceph_mds_session *s = con->private; | 3348 | struct ceph_mds_session *s = con->private; |
3313 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3349 | struct ceph_mds_client *mdsc = s->s_mdsc; |
3314 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3350 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
3315 | 3351 | ||
3316 | return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); | 3352 | return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); |
3317 | } | 3353 | } |
@@ -3320,12 +3356,12 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
3320 | { | 3356 | { |
3321 | struct ceph_mds_session *s = con->private; | 3357 | struct ceph_mds_session *s = con->private; |
3322 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3358 | struct ceph_mds_client *mdsc = s->s_mdsc; |
3323 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3359 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
3324 | 3360 | ||
3325 | if (ac->ops->invalidate_authorizer) | 3361 | if (ac->ops->invalidate_authorizer) |
3326 | ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); | 3362 | ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); |
3327 | 3363 | ||
3328 | return ceph_monc_validate_auth(&mdsc->client->monc); | 3364 | return ceph_monc_validate_auth(&mdsc->fsc->client->monc); |
3329 | } | 3365 | } |
3330 | 3366 | ||
3331 | static const struct ceph_connection_operations mds_con_ops = { | 3367 | static const struct ceph_connection_operations mds_con_ops = { |
@@ -3338,7 +3374,4 @@ static const struct ceph_connection_operations mds_con_ops = { | |||
3338 | .peer_reset = peer_reset, | 3374 | .peer_reset = peer_reset, |
3339 | }; | 3375 | }; |
3340 | 3376 | ||
3341 | |||
3342 | |||
3343 | |||
3344 | /* eof */ | 3377 | /* eof */ |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c98267ce6d2a..d66d63c72355 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -8,9 +8,9 @@ | |||
8 | #include <linux/rbtree.h> | 8 | #include <linux/rbtree.h> |
9 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
10 | 10 | ||
11 | #include "types.h" | 11 | #include <linux/ceph/types.h> |
12 | #include "messenger.h" | 12 | #include <linux/ceph/messenger.h> |
13 | #include "mdsmap.h" | 13 | #include <linux/ceph/mdsmap.h> |
14 | 14 | ||
15 | /* | 15 | /* |
16 | * Some lock dependencies: | 16 | * Some lock dependencies: |
@@ -26,7 +26,7 @@ | |||
26 | * | 26 | * |
27 | */ | 27 | */ |
28 | 28 | ||
29 | struct ceph_client; | 29 | struct ceph_fs_client; |
30 | struct ceph_cap; | 30 | struct ceph_cap; |
31 | 31 | ||
32 | /* | 32 | /* |
@@ -230,7 +230,7 @@ struct ceph_mds_request { | |||
230 | * mds client state | 230 | * mds client state |
231 | */ | 231 | */ |
232 | struct ceph_mds_client { | 232 | struct ceph_mds_client { |
233 | struct ceph_client *client; | 233 | struct ceph_fs_client *fsc; |
234 | struct mutex mutex; /* all nested structures */ | 234 | struct mutex mutex; /* all nested structures */ |
235 | 235 | ||
236 | struct ceph_mdsmap *mdsmap; | 236 | struct ceph_mdsmap *mdsmap; |
@@ -289,11 +289,6 @@ struct ceph_mds_client { | |||
289 | int caps_avail_count; /* unused, unreserved */ | 289 | int caps_avail_count; /* unused, unreserved */ |
290 | int caps_min_count; /* keep at least this many | 290 | int caps_min_count; /* keep at least this many |
291 | (unreserved) */ | 291 | (unreserved) */ |
292 | |||
293 | #ifdef CONFIG_DEBUG_FS | ||
294 | struct dentry *debugfs_file; | ||
295 | #endif | ||
296 | |||
297 | spinlock_t dentry_lru_lock; | 292 | spinlock_t dentry_lru_lock; |
298 | struct list_head dentry_lru; | 293 | struct list_head dentry_lru; |
299 | int num_dentry; | 294 | int num_dentry; |
@@ -316,10 +311,9 @@ extern void ceph_put_mds_session(struct ceph_mds_session *s); | |||
316 | extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, | 311 | extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, |
317 | struct ceph_msg *msg, int mds); | 312 | struct ceph_msg *msg, int mds); |
318 | 313 | ||
319 | extern int ceph_mdsc_init(struct ceph_mds_client *mdsc, | 314 | extern int ceph_mdsc_init(struct ceph_fs_client *fsc); |
320 | struct ceph_client *client); | ||
321 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); | 315 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); |
322 | extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc); | 316 | extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); |
323 | 317 | ||
324 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); | 318 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); |
325 | 319 | ||
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 040be6d1150b..73b7d44e8a35 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/bug.h> | 3 | #include <linux/bug.h> |
4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
@@ -6,9 +6,9 @@ | |||
6 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | 8 | ||
9 | #include "mdsmap.h" | 9 | #include <linux/ceph/mdsmap.h> |
10 | #include "messenger.h" | 10 | #include <linux/ceph/messenger.h> |
11 | #include "decode.h" | 11 | #include <linux/ceph/decode.h> |
12 | 12 | ||
13 | #include "super.h" | 13 | #include "super.h" |
14 | 14 | ||
@@ -117,7 +117,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
117 | } | 117 | } |
118 | 118 | ||
119 | dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", | 119 | dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", |
120 | i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr), | 120 | i+1, n, global_id, mds, inc, |
121 | ceph_pr_addr(&addr.in_addr), | ||
121 | ceph_mds_state_name(state)); | 122 | ceph_mds_state_name(state)); |
122 | if (mds >= 0 && mds < m->m_max_mds && state > 0) { | 123 | if (mds >= 0 && mds < m->m_max_mds && state > 0) { |
123 | m->m_info[mds].global_id = global_id; | 124 | m->m_info[mds].global_id = global_id; |
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c deleted file mode 100644 index 46a368b6dce5..000000000000 --- a/fs/ceph/pagelist.c +++ /dev/null | |||
@@ -1,63 +0,0 @@ | |||
1 | |||
2 | #include <linux/gfp.h> | ||
3 | #include <linux/pagemap.h> | ||
4 | #include <linux/highmem.h> | ||
5 | |||
6 | #include "pagelist.h" | ||
7 | |||
8 | static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) | ||
9 | { | ||
10 | struct page *page = list_entry(pl->head.prev, struct page, | ||
11 | lru); | ||
12 | kunmap(page); | ||
13 | } | ||
14 | |||
15 | int ceph_pagelist_release(struct ceph_pagelist *pl) | ||
16 | { | ||
17 | if (pl->mapped_tail) | ||
18 | ceph_pagelist_unmap_tail(pl); | ||
19 | |||
20 | while (!list_empty(&pl->head)) { | ||
21 | struct page *page = list_first_entry(&pl->head, struct page, | ||
22 | lru); | ||
23 | list_del(&page->lru); | ||
24 | __free_page(page); | ||
25 | } | ||
26 | return 0; | ||
27 | } | ||
28 | |||
29 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | ||
30 | { | ||
31 | struct page *page = __page_cache_alloc(GFP_NOFS); | ||
32 | if (!page) | ||
33 | return -ENOMEM; | ||
34 | pl->room += PAGE_SIZE; | ||
35 | list_add_tail(&page->lru, &pl->head); | ||
36 | if (pl->mapped_tail) | ||
37 | ceph_pagelist_unmap_tail(pl); | ||
38 | pl->mapped_tail = kmap(page); | ||
39 | return 0; | ||
40 | } | ||
41 | |||
42 | int ceph_pagelist_append(struct ceph_pagelist *pl, void *buf, size_t len) | ||
43 | { | ||
44 | while (pl->room < len) { | ||
45 | size_t bit = pl->room; | ||
46 | int ret; | ||
47 | |||
48 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), | ||
49 | buf, bit); | ||
50 | pl->length += bit; | ||
51 | pl->room -= bit; | ||
52 | buf += bit; | ||
53 | len -= bit; | ||
54 | ret = ceph_pagelist_addpage(pl); | ||
55 | if (ret) | ||
56 | return ret; | ||
57 | } | ||
58 | |||
59 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); | ||
60 | pl->length += len; | ||
61 | pl->room -= len; | ||
62 | return 0; | ||
63 | } | ||
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 190b6c4a6f2b..39c243acd062 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -1,10 +1,12 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/sort.h> | 3 | #include <linux/sort.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | 5 | ||
6 | #include "super.h" | 6 | #include "super.h" |
7 | #include "decode.h" | 7 | #include "mds_client.h" |
8 | |||
9 | #include <linux/ceph/decode.h> | ||
8 | 10 | ||
9 | /* | 11 | /* |
10 | * Snapshots in ceph are driven in large part by cooperation from the | 12 | * Snapshots in ceph are driven in large part by cooperation from the |
@@ -526,7 +528,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
526 | struct ceph_cap_snap *capsnap) | 528 | struct ceph_cap_snap *capsnap) |
527 | { | 529 | { |
528 | struct inode *inode = &ci->vfs_inode; | 530 | struct inode *inode = &ci->vfs_inode; |
529 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 531 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
530 | 532 | ||
531 | BUG_ON(capsnap->writing); | 533 | BUG_ON(capsnap->writing); |
532 | capsnap->size = inode->i_size; | 534 | capsnap->size = inode->i_size; |
@@ -747,7 +749,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
747 | struct ceph_mds_session *session, | 749 | struct ceph_mds_session *session, |
748 | struct ceph_msg *msg) | 750 | struct ceph_msg *msg) |
749 | { | 751 | { |
750 | struct super_block *sb = mdsc->client->sb; | 752 | struct super_block *sb = mdsc->fsc->sb; |
751 | int mds = session->s_mds; | 753 | int mds = session->s_mds; |
752 | u64 split; | 754 | u64 split; |
753 | int op; | 755 | int op; |
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/strings.c index c6179d3a26a2..cd5097d7c804 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/strings.c | |||
@@ -1,71 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * Ceph string constants | 2 | * Ceph fs string constants |
3 | */ | 3 | */ |
4 | #include "types.h" | 4 | #include <linux/module.h> |
5 | #include <linux/ceph/types.h> | ||
5 | 6 | ||
6 | const char *ceph_entity_type_name(int type) | ||
7 | { | ||
8 | switch (type) { | ||
9 | case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||
10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||
11 | case CEPH_ENTITY_TYPE_MON: return "mon"; | ||
12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||
13 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; | ||
14 | default: return "unknown"; | ||
15 | } | ||
16 | } | ||
17 | |||
18 | const char *ceph_osd_op_name(int op) | ||
19 | { | ||
20 | switch (op) { | ||
21 | case CEPH_OSD_OP_READ: return "read"; | ||
22 | case CEPH_OSD_OP_STAT: return "stat"; | ||
23 | |||
24 | case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; | ||
25 | |||
26 | case CEPH_OSD_OP_WRITE: return "write"; | ||
27 | case CEPH_OSD_OP_DELETE: return "delete"; | ||
28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | ||
29 | case CEPH_OSD_OP_ZERO: return "zero"; | ||
30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | ||
31 | case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||
32 | |||
33 | case CEPH_OSD_OP_APPEND: return "append"; | ||
34 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | ||
35 | case CEPH_OSD_OP_SETTRUNC: return "settrunc"; | ||
36 | case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; | ||
37 | |||
38 | case CEPH_OSD_OP_TMAPUP: return "tmapup"; | ||
39 | case CEPH_OSD_OP_TMAPGET: return "tmapget"; | ||
40 | case CEPH_OSD_OP_TMAPPUT: return "tmapput"; | ||
41 | |||
42 | case CEPH_OSD_OP_GETXATTR: return "getxattr"; | ||
43 | case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; | ||
44 | case CEPH_OSD_OP_SETXATTR: return "setxattr"; | ||
45 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | ||
46 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | ||
47 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | ||
48 | case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||
49 | |||
50 | case CEPH_OSD_OP_PULL: return "pull"; | ||
51 | case CEPH_OSD_OP_PUSH: return "push"; | ||
52 | case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; | ||
53 | case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; | ||
54 | case CEPH_OSD_OP_SCRUB: return "scrub"; | ||
55 | |||
56 | case CEPH_OSD_OP_WRLOCK: return "wrlock"; | ||
57 | case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; | ||
58 | case CEPH_OSD_OP_RDLOCK: return "rdlock"; | ||
59 | case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; | ||
60 | case CEPH_OSD_OP_UPLOCK: return "uplock"; | ||
61 | case CEPH_OSD_OP_DNLOCK: return "dnlock"; | ||
62 | |||
63 | case CEPH_OSD_OP_CALL: return "call"; | ||
64 | |||
65 | case CEPH_OSD_OP_PGLS: return "pgls"; | ||
66 | } | ||
67 | return "???"; | ||
68 | } | ||
69 | 7 | ||
70 | const char *ceph_mds_state_name(int s) | 8 | const char *ceph_mds_state_name(int s) |
71 | { | 9 | { |
@@ -177,17 +115,3 @@ const char *ceph_snap_op_name(int o) | |||
177 | } | 115 | } |
178 | return "???"; | 116 | return "???"; |
179 | } | 117 | } |
180 | |||
181 | const char *ceph_pool_op_name(int op) | ||
182 | { | ||
183 | switch (op) { | ||
184 | case POOL_OP_CREATE: return "create"; | ||
185 | case POOL_OP_DELETE: return "delete"; | ||
186 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
187 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
188 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
189 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
190 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
191 | } | ||
192 | return "???"; | ||
193 | } | ||
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9922628532b2..d6e0e0421891 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -1,5 +1,5 @@ | |||
1 | 1 | ||
2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
3 | 3 | ||
4 | #include <linux/backing-dev.h> | 4 | #include <linux/backing-dev.h> |
5 | #include <linux/ctype.h> | 5 | #include <linux/ctype.h> |
@@ -15,10 +15,13 @@ | |||
15 | #include <linux/statfs.h> | 15 | #include <linux/statfs.h> |
16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
17 | 17 | ||
18 | #include "decode.h" | ||
19 | #include "super.h" | 18 | #include "super.h" |
20 | #include "mon_client.h" | 19 | #include "mds_client.h" |
21 | #include "auth.h" | 20 | |
21 | #include <linux/ceph/decode.h> | ||
22 | #include <linux/ceph/mon_client.h> | ||
23 | #include <linux/ceph/auth.h> | ||
24 | #include <linux/ceph/debugfs.h> | ||
22 | 25 | ||
23 | /* | 26 | /* |
24 | * Ceph superblock operations | 27 | * Ceph superblock operations |
@@ -26,36 +29,22 @@ | |||
26 | * Handle the basics of mounting, unmounting. | 29 | * Handle the basics of mounting, unmounting. |
27 | */ | 30 | */ |
28 | 31 | ||
29 | |||
30 | /* | ||
31 | * find filename portion of a path (/foo/bar/baz -> baz) | ||
32 | */ | ||
33 | const char *ceph_file_part(const char *s, int len) | ||
34 | { | ||
35 | const char *e = s + len; | ||
36 | |||
37 | while (e != s && *(e-1) != '/') | ||
38 | e--; | ||
39 | return e; | ||
40 | } | ||
41 | |||
42 | |||
43 | /* | 32 | /* |
44 | * super ops | 33 | * super ops |
45 | */ | 34 | */ |
46 | static void ceph_put_super(struct super_block *s) | 35 | static void ceph_put_super(struct super_block *s) |
47 | { | 36 | { |
48 | struct ceph_client *client = ceph_sb_to_client(s); | 37 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); |
49 | 38 | ||
50 | dout("put_super\n"); | 39 | dout("put_super\n"); |
51 | ceph_mdsc_close_sessions(&client->mdsc); | 40 | ceph_mdsc_close_sessions(fsc->mdsc); |
52 | 41 | ||
53 | /* | 42 | /* |
54 | * ensure we release the bdi before put_anon_super releases | 43 | * ensure we release the bdi before put_anon_super releases |
55 | * the device name. | 44 | * the device name. |
56 | */ | 45 | */ |
57 | if (s->s_bdi == &client->backing_dev_info) { | 46 | if (s->s_bdi == &fsc->backing_dev_info) { |
58 | bdi_unregister(&client->backing_dev_info); | 47 | bdi_unregister(&fsc->backing_dev_info); |
59 | s->s_bdi = NULL; | 48 | s->s_bdi = NULL; |
60 | } | 49 | } |
61 | 50 | ||
@@ -64,14 +53,14 @@ static void ceph_put_super(struct super_block *s) | |||
64 | 53 | ||
65 | static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | 54 | static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) |
66 | { | 55 | { |
67 | struct ceph_client *client = ceph_inode_to_client(dentry->d_inode); | 56 | struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode); |
68 | struct ceph_monmap *monmap = client->monc.monmap; | 57 | struct ceph_monmap *monmap = fsc->client->monc.monmap; |
69 | struct ceph_statfs st; | 58 | struct ceph_statfs st; |
70 | u64 fsid; | 59 | u64 fsid; |
71 | int err; | 60 | int err; |
72 | 61 | ||
73 | dout("statfs\n"); | 62 | dout("statfs\n"); |
74 | err = ceph_monc_do_statfs(&client->monc, &st); | 63 | err = ceph_monc_do_statfs(&fsc->client->monc, &st); |
75 | if (err < 0) | 64 | if (err < 0) |
76 | return err; | 65 | return err; |
77 | 66 | ||
@@ -104,238 +93,28 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
104 | 93 | ||
105 | static int ceph_sync_fs(struct super_block *sb, int wait) | 94 | static int ceph_sync_fs(struct super_block *sb, int wait) |
106 | { | 95 | { |
107 | struct ceph_client *client = ceph_sb_to_client(sb); | 96 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
108 | 97 | ||
109 | if (!wait) { | 98 | if (!wait) { |
110 | dout("sync_fs (non-blocking)\n"); | 99 | dout("sync_fs (non-blocking)\n"); |
111 | ceph_flush_dirty_caps(&client->mdsc); | 100 | ceph_flush_dirty_caps(fsc->mdsc); |
112 | dout("sync_fs (non-blocking) done\n"); | 101 | dout("sync_fs (non-blocking) done\n"); |
113 | return 0; | 102 | return 0; |
114 | } | 103 | } |
115 | 104 | ||
116 | dout("sync_fs (blocking)\n"); | 105 | dout("sync_fs (blocking)\n"); |
117 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); | 106 | ceph_osdc_sync(&fsc->client->osdc); |
118 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); | 107 | ceph_mdsc_sync(fsc->mdsc); |
119 | dout("sync_fs (blocking) done\n"); | 108 | dout("sync_fs (blocking) done\n"); |
120 | return 0; | 109 | return 0; |
121 | } | 110 | } |
122 | 111 | ||
123 | static int default_congestion_kb(void) | ||
124 | { | ||
125 | int congestion_kb; | ||
126 | |||
127 | /* | ||
128 | * Copied from NFS | ||
129 | * | ||
130 | * congestion size, scale with available memory. | ||
131 | * | ||
132 | * 64MB: 8192k | ||
133 | * 128MB: 11585k | ||
134 | * 256MB: 16384k | ||
135 | * 512MB: 23170k | ||
136 | * 1GB: 32768k | ||
137 | * 2GB: 46340k | ||
138 | * 4GB: 65536k | ||
139 | * 8GB: 92681k | ||
140 | * 16GB: 131072k | ||
141 | * | ||
142 | * This allows larger machines to have larger/more transfers. | ||
143 | * Limit the default to 256M | ||
144 | */ | ||
145 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
146 | if (congestion_kb > 256*1024) | ||
147 | congestion_kb = 256*1024; | ||
148 | |||
149 | return congestion_kb; | ||
150 | } | ||
151 | |||
152 | /** | ||
153 | * ceph_show_options - Show mount options in /proc/mounts | ||
154 | * @m: seq_file to write to | ||
155 | * @mnt: mount descriptor | ||
156 | */ | ||
157 | static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
158 | { | ||
159 | struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb); | ||
160 | struct ceph_mount_args *args = client->mount_args; | ||
161 | |||
162 | if (args->flags & CEPH_OPT_FSID) | ||
163 | seq_printf(m, ",fsid=%pU", &args->fsid); | ||
164 | if (args->flags & CEPH_OPT_NOSHARE) | ||
165 | seq_puts(m, ",noshare"); | ||
166 | if (args->flags & CEPH_OPT_DIRSTAT) | ||
167 | seq_puts(m, ",dirstat"); | ||
168 | if ((args->flags & CEPH_OPT_RBYTES) == 0) | ||
169 | seq_puts(m, ",norbytes"); | ||
170 | if (args->flags & CEPH_OPT_NOCRC) | ||
171 | seq_puts(m, ",nocrc"); | ||
172 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) | ||
173 | seq_puts(m, ",noasyncreaddir"); | ||
174 | |||
175 | if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
176 | seq_printf(m, ",mount_timeout=%d", args->mount_timeout); | ||
177 | if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
178 | seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); | ||
179 | if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
180 | seq_printf(m, ",osdtimeout=%d", args->osd_timeout); | ||
181 | if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
182 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
183 | args->osd_keepalive_timeout); | ||
184 | if (args->wsize) | ||
185 | seq_printf(m, ",wsize=%d", args->wsize); | ||
186 | if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
187 | seq_printf(m, ",rsize=%d", args->rsize); | ||
188 | if (args->congestion_kb != default_congestion_kb()) | ||
189 | seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); | ||
190 | if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
191 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
192 | args->caps_wanted_delay_min); | ||
193 | if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
194 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
195 | args->caps_wanted_delay_max); | ||
196 | if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
197 | seq_printf(m, ",cap_release_safety=%d", | ||
198 | args->cap_release_safety); | ||
199 | if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
200 | seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); | ||
201 | if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
202 | seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); | ||
203 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | ||
204 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); | ||
205 | if (args->name) | ||
206 | seq_printf(m, ",name=%s", args->name); | ||
207 | if (args->secret) | ||
208 | seq_puts(m, ",secret=<hidden>"); | ||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * caches | ||
214 | */ | ||
215 | struct kmem_cache *ceph_inode_cachep; | ||
216 | struct kmem_cache *ceph_cap_cachep; | ||
217 | struct kmem_cache *ceph_dentry_cachep; | ||
218 | struct kmem_cache *ceph_file_cachep; | ||
219 | |||
220 | static void ceph_inode_init_once(void *foo) | ||
221 | { | ||
222 | struct ceph_inode_info *ci = foo; | ||
223 | inode_init_once(&ci->vfs_inode); | ||
224 | } | ||
225 | |||
226 | static int __init init_caches(void) | ||
227 | { | ||
228 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", | ||
229 | sizeof(struct ceph_inode_info), | ||
230 | __alignof__(struct ceph_inode_info), | ||
231 | (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), | ||
232 | ceph_inode_init_once); | ||
233 | if (ceph_inode_cachep == NULL) | ||
234 | return -ENOMEM; | ||
235 | |||
236 | ceph_cap_cachep = KMEM_CACHE(ceph_cap, | ||
237 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
238 | if (ceph_cap_cachep == NULL) | ||
239 | goto bad_cap; | ||
240 | |||
241 | ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, | ||
242 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
243 | if (ceph_dentry_cachep == NULL) | ||
244 | goto bad_dentry; | ||
245 | |||
246 | ceph_file_cachep = KMEM_CACHE(ceph_file_info, | ||
247 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
248 | if (ceph_file_cachep == NULL) | ||
249 | goto bad_file; | ||
250 | |||
251 | return 0; | ||
252 | |||
253 | bad_file: | ||
254 | kmem_cache_destroy(ceph_dentry_cachep); | ||
255 | bad_dentry: | ||
256 | kmem_cache_destroy(ceph_cap_cachep); | ||
257 | bad_cap: | ||
258 | kmem_cache_destroy(ceph_inode_cachep); | ||
259 | return -ENOMEM; | ||
260 | } | ||
261 | |||
262 | static void destroy_caches(void) | ||
263 | { | ||
264 | kmem_cache_destroy(ceph_inode_cachep); | ||
265 | kmem_cache_destroy(ceph_cap_cachep); | ||
266 | kmem_cache_destroy(ceph_dentry_cachep); | ||
267 | kmem_cache_destroy(ceph_file_cachep); | ||
268 | } | ||
269 | |||
270 | |||
271 | /* | ||
272 | * ceph_umount_begin - initiate forced umount. Tear down down the | ||
273 | * mount, skipping steps that may hang while waiting for server(s). | ||
274 | */ | ||
275 | static void ceph_umount_begin(struct super_block *sb) | ||
276 | { | ||
277 | struct ceph_client *client = ceph_sb_to_client(sb); | ||
278 | |||
279 | dout("ceph_umount_begin - starting forced umount\n"); | ||
280 | if (!client) | ||
281 | return; | ||
282 | client->mount_state = CEPH_MOUNT_SHUTDOWN; | ||
283 | return; | ||
284 | } | ||
285 | |||
286 | static const struct super_operations ceph_super_ops = { | ||
287 | .alloc_inode = ceph_alloc_inode, | ||
288 | .destroy_inode = ceph_destroy_inode, | ||
289 | .write_inode = ceph_write_inode, | ||
290 | .sync_fs = ceph_sync_fs, | ||
291 | .put_super = ceph_put_super, | ||
292 | .show_options = ceph_show_options, | ||
293 | .statfs = ceph_statfs, | ||
294 | .umount_begin = ceph_umount_begin, | ||
295 | }; | ||
296 | |||
297 | |||
298 | const char *ceph_msg_type_name(int type) | ||
299 | { | ||
300 | switch (type) { | ||
301 | case CEPH_MSG_SHUTDOWN: return "shutdown"; | ||
302 | case CEPH_MSG_PING: return "ping"; | ||
303 | case CEPH_MSG_AUTH: return "auth"; | ||
304 | case CEPH_MSG_AUTH_REPLY: return "auth_reply"; | ||
305 | case CEPH_MSG_MON_MAP: return "mon_map"; | ||
306 | case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; | ||
307 | case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; | ||
308 | case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; | ||
309 | case CEPH_MSG_STATFS: return "statfs"; | ||
310 | case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; | ||
311 | case CEPH_MSG_MDS_MAP: return "mds_map"; | ||
312 | case CEPH_MSG_CLIENT_SESSION: return "client_session"; | ||
313 | case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; | ||
314 | case CEPH_MSG_CLIENT_REQUEST: return "client_request"; | ||
315 | case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; | ||
316 | case CEPH_MSG_CLIENT_REPLY: return "client_reply"; | ||
317 | case CEPH_MSG_CLIENT_CAPS: return "client_caps"; | ||
318 | case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; | ||
319 | case CEPH_MSG_CLIENT_SNAP: return "client_snap"; | ||
320 | case CEPH_MSG_CLIENT_LEASE: return "client_lease"; | ||
321 | case CEPH_MSG_OSD_MAP: return "osd_map"; | ||
322 | case CEPH_MSG_OSD_OP: return "osd_op"; | ||
323 | case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; | ||
324 | default: return "unknown"; | ||
325 | } | ||
326 | } | ||
327 | |||
328 | |||
329 | /* | 112 | /* |
330 | * mount options | 113 | * mount options |
331 | */ | 114 | */ |
332 | enum { | 115 | enum { |
333 | Opt_wsize, | 116 | Opt_wsize, |
334 | Opt_rsize, | 117 | Opt_rsize, |
335 | Opt_osdtimeout, | ||
336 | Opt_osdkeepalivetimeout, | ||
337 | Opt_mount_timeout, | ||
338 | Opt_osd_idle_ttl, | ||
339 | Opt_caps_wanted_delay_min, | 118 | Opt_caps_wanted_delay_min, |
340 | Opt_caps_wanted_delay_max, | 119 | Opt_caps_wanted_delay_max, |
341 | Opt_cap_release_safety, | 120 | Opt_cap_release_safety, |
@@ -344,29 +123,19 @@ enum { | |||
344 | Opt_congestion_kb, | 123 | Opt_congestion_kb, |
345 | Opt_last_int, | 124 | Opt_last_int, |
346 | /* int args above */ | 125 | /* int args above */ |
347 | Opt_fsid, | ||
348 | Opt_snapdirname, | 126 | Opt_snapdirname, |
349 | Opt_name, | ||
350 | Opt_secret, | ||
351 | Opt_last_string, | 127 | Opt_last_string, |
352 | /* string args above */ | 128 | /* string args above */ |
353 | Opt_ip, | ||
354 | Opt_noshare, | ||
355 | Opt_dirstat, | 129 | Opt_dirstat, |
356 | Opt_nodirstat, | 130 | Opt_nodirstat, |
357 | Opt_rbytes, | 131 | Opt_rbytes, |
358 | Opt_norbytes, | 132 | Opt_norbytes, |
359 | Opt_nocrc, | ||
360 | Opt_noasyncreaddir, | 133 | Opt_noasyncreaddir, |
361 | }; | 134 | }; |
362 | 135 | ||
363 | static match_table_t arg_tokens = { | 136 | static match_table_t fsopt_tokens = { |
364 | {Opt_wsize, "wsize=%d"}, | 137 | {Opt_wsize, "wsize=%d"}, |
365 | {Opt_rsize, "rsize=%d"}, | 138 | {Opt_rsize, "rsize=%d"}, |
366 | {Opt_osdtimeout, "osdtimeout=%d"}, | ||
367 | {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, | ||
368 | {Opt_mount_timeout, "mount_timeout=%d"}, | ||
369 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | ||
370 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, | 139 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, |
371 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, | 140 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, |
372 | {Opt_cap_release_safety, "cap_release_safety=%d"}, | 141 | {Opt_cap_release_safety, "cap_release_safety=%d"}, |
@@ -374,403 +143,459 @@ static match_table_t arg_tokens = { | |||
374 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | 143 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, |
375 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 144 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
376 | /* int args above */ | 145 | /* int args above */ |
377 | {Opt_fsid, "fsid=%s"}, | ||
378 | {Opt_snapdirname, "snapdirname=%s"}, | 146 | {Opt_snapdirname, "snapdirname=%s"}, |
379 | {Opt_name, "name=%s"}, | ||
380 | {Opt_secret, "secret=%s"}, | ||
381 | /* string args above */ | 147 | /* string args above */ |
382 | {Opt_ip, "ip=%s"}, | ||
383 | {Opt_noshare, "noshare"}, | ||
384 | {Opt_dirstat, "dirstat"}, | 148 | {Opt_dirstat, "dirstat"}, |
385 | {Opt_nodirstat, "nodirstat"}, | 149 | {Opt_nodirstat, "nodirstat"}, |
386 | {Opt_rbytes, "rbytes"}, | 150 | {Opt_rbytes, "rbytes"}, |
387 | {Opt_norbytes, "norbytes"}, | 151 | {Opt_norbytes, "norbytes"}, |
388 | {Opt_nocrc, "nocrc"}, | ||
389 | {Opt_noasyncreaddir, "noasyncreaddir"}, | 152 | {Opt_noasyncreaddir, "noasyncreaddir"}, |
390 | {-1, NULL} | 153 | {-1, NULL} |
391 | }; | 154 | }; |
392 | 155 | ||
393 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | 156 | static int parse_fsopt_token(char *c, void *private) |
394 | { | 157 | { |
395 | int i = 0; | 158 | struct ceph_mount_options *fsopt = private; |
396 | char tmp[3]; | 159 | substring_t argstr[MAX_OPT_ARGS]; |
397 | int err = -EINVAL; | 160 | int token, intval, ret; |
398 | int d; | 161 | |
399 | 162 | token = match_token((char *)c, fsopt_tokens, argstr); | |
400 | dout("parse_fsid '%s'\n", str); | 163 | if (token < 0) |
401 | tmp[2] = 0; | 164 | return -EINVAL; |
402 | while (*str && i < 16) { | 165 | |
403 | if (ispunct(*str)) { | 166 | if (token < Opt_last_int) { |
404 | str++; | 167 | ret = match_int(&argstr[0], &intval); |
405 | continue; | 168 | if (ret < 0) { |
169 | pr_err("bad mount option arg (not int) " | ||
170 | "at '%s'\n", c); | ||
171 | return ret; | ||
406 | } | 172 | } |
407 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | 173 | dout("got int token %d val %d\n", token, intval); |
408 | break; | 174 | } else if (token > Opt_last_int && token < Opt_last_string) { |
409 | tmp[0] = str[0]; | 175 | dout("got string token %d val %s\n", token, |
410 | tmp[1] = str[1]; | 176 | argstr[0].from); |
411 | if (sscanf(tmp, "%x", &d) < 1) | 177 | } else { |
412 | break; | 178 | dout("got token %d\n", token); |
413 | fsid->fsid[i] = d & 0xff; | ||
414 | i++; | ||
415 | str += 2; | ||
416 | } | 179 | } |
417 | 180 | ||
418 | if (i == 16) | 181 | switch (token) { |
419 | err = 0; | 182 | case Opt_snapdirname: |
420 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | 183 | kfree(fsopt->snapdir_name); |
421 | return err; | 184 | fsopt->snapdir_name = kstrndup(argstr[0].from, |
185 | argstr[0].to-argstr[0].from, | ||
186 | GFP_KERNEL); | ||
187 | if (!fsopt->snapdir_name) | ||
188 | return -ENOMEM; | ||
189 | break; | ||
190 | |||
191 | /* misc */ | ||
192 | case Opt_wsize: | ||
193 | fsopt->wsize = intval; | ||
194 | break; | ||
195 | case Opt_rsize: | ||
196 | fsopt->rsize = intval; | ||
197 | break; | ||
198 | case Opt_caps_wanted_delay_min: | ||
199 | fsopt->caps_wanted_delay_min = intval; | ||
200 | break; | ||
201 | case Opt_caps_wanted_delay_max: | ||
202 | fsopt->caps_wanted_delay_max = intval; | ||
203 | break; | ||
204 | case Opt_readdir_max_entries: | ||
205 | fsopt->max_readdir = intval; | ||
206 | break; | ||
207 | case Opt_readdir_max_bytes: | ||
208 | fsopt->max_readdir_bytes = intval; | ||
209 | break; | ||
210 | case Opt_congestion_kb: | ||
211 | fsopt->congestion_kb = intval; | ||
212 | break; | ||
213 | case Opt_dirstat: | ||
214 | fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; | ||
215 | break; | ||
216 | case Opt_nodirstat: | ||
217 | fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; | ||
218 | break; | ||
219 | case Opt_rbytes: | ||
220 | fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; | ||
221 | break; | ||
222 | case Opt_norbytes: | ||
223 | fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; | ||
224 | break; | ||
225 | case Opt_noasyncreaddir: | ||
226 | fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; | ||
227 | break; | ||
228 | default: | ||
229 | BUG_ON(token); | ||
230 | } | ||
231 | return 0; | ||
422 | } | 232 | } |
423 | 233 | ||
424 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, | 234 | static void destroy_mount_options(struct ceph_mount_options *args) |
425 | const char *dev_name, | ||
426 | const char **path) | ||
427 | { | 235 | { |
428 | struct ceph_mount_args *args; | 236 | dout("destroy_mount_options %p\n", args); |
429 | const char *c; | 237 | kfree(args->snapdir_name); |
430 | int err = -ENOMEM; | 238 | kfree(args); |
431 | substring_t argstr[MAX_OPT_ARGS]; | 239 | } |
432 | 240 | ||
433 | args = kzalloc(sizeof(*args), GFP_KERNEL); | 241 | static int strcmp_null(const char *s1, const char *s2) |
434 | if (!args) | 242 | { |
435 | return ERR_PTR(-ENOMEM); | 243 | if (!s1 && !s2) |
436 | args->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*args->mon_addr), | 244 | return 0; |
437 | GFP_KERNEL); | 245 | if (s1 && !s2) |
438 | if (!args->mon_addr) | 246 | return -1; |
439 | goto out; | 247 | if (!s1 && s2) |
248 | return 1; | ||
249 | return strcmp(s1, s2); | ||
250 | } | ||
440 | 251 | ||
441 | dout("parse_mount_args %p, dev_name '%s'\n", args, dev_name); | 252 | static int compare_mount_options(struct ceph_mount_options *new_fsopt, |
442 | 253 | struct ceph_options *new_opt, | |
443 | /* start with defaults */ | 254 | struct ceph_fs_client *fsc) |
444 | args->sb_flags = flags; | 255 | { |
445 | args->flags = CEPH_OPT_DEFAULT; | 256 | struct ceph_mount_options *fsopt1 = new_fsopt; |
446 | args->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; | 257 | struct ceph_mount_options *fsopt2 = fsc->mount_options; |
447 | args->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | 258 | int ofs = offsetof(struct ceph_mount_options, snapdir_name); |
448 | args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | 259 | int ret; |
449 | args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ | ||
450 | args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; | ||
451 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | ||
452 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | ||
453 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | ||
454 | args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; | ||
455 | args->max_readdir = CEPH_MAX_READDIR_DEFAULT; | ||
456 | args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
457 | args->congestion_kb = default_congestion_kb(); | ||
458 | |||
459 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | ||
460 | err = -EINVAL; | ||
461 | if (!dev_name) | ||
462 | goto out; | ||
463 | *path = strstr(dev_name, ":/"); | ||
464 | if (*path == NULL) { | ||
465 | pr_err("device name is missing path (no :/ in %s)\n", | ||
466 | dev_name); | ||
467 | goto out; | ||
468 | } | ||
469 | 260 | ||
470 | /* get mon ip(s) */ | 261 | ret = memcmp(fsopt1, fsopt2, ofs); |
471 | err = ceph_parse_ips(dev_name, *path, args->mon_addr, | 262 | if (ret) |
472 | CEPH_MAX_MON, &args->num_mon); | 263 | return ret; |
473 | if (err < 0) | 264 | |
474 | goto out; | 265 | ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); |
266 | if (ret) | ||
267 | return ret; | ||
268 | |||
269 | return ceph_compare_options(new_opt, fsc->client); | ||
270 | } | ||
271 | |||
272 | static int parse_mount_options(struct ceph_mount_options **pfsopt, | ||
273 | struct ceph_options **popt, | ||
274 | int flags, char *options, | ||
275 | const char *dev_name, | ||
276 | const char **path) | ||
277 | { | ||
278 | struct ceph_mount_options *fsopt; | ||
279 | const char *dev_name_end; | ||
280 | int err = -ENOMEM; | ||
281 | |||
282 | fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); | ||
283 | if (!fsopt) | ||
284 | return -ENOMEM; | ||
285 | |||
286 | dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); | ||
287 | |||
288 | fsopt->sb_flags = flags; | ||
289 | fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; | ||
290 | |||
291 | fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | ||
292 | fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | ||
293 | fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; | ||
294 | fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; | ||
295 | fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
296 | fsopt->congestion_kb = default_congestion_kb(); | ||
297 | |||
298 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | ||
299 | err = -EINVAL; | ||
300 | if (!dev_name) | ||
301 | goto out; | ||
302 | *path = strstr(dev_name, ":/"); | ||
303 | if (*path == NULL) { | ||
304 | pr_err("device name is missing path (no :/ in %s)\n", | ||
305 | dev_name); | ||
306 | goto out; | ||
307 | } | ||
308 | dev_name_end = *path; | ||
309 | dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); | ||
475 | 310 | ||
476 | /* path on server */ | 311 | /* path on server */ |
477 | *path += 2; | 312 | *path += 2; |
478 | dout("server path '%s'\n", *path); | 313 | dout("server path '%s'\n", *path); |
479 | 314 | ||
480 | /* parse mount options */ | 315 | err = ceph_parse_options(popt, options, dev_name, dev_name_end, |
481 | while ((c = strsep(&options, ",")) != NULL) { | 316 | parse_fsopt_token, (void *)fsopt); |
482 | int token, intval, ret; | 317 | if (err) |
483 | if (!*c) | 318 | goto out; |
484 | continue; | 319 | |
485 | err = -EINVAL; | 320 | /* success */ |
486 | token = match_token((char *)c, arg_tokens, argstr); | 321 | *pfsopt = fsopt; |
487 | if (token < 0) { | 322 | return 0; |
488 | pr_err("bad mount option at '%s'\n", c); | ||
489 | goto out; | ||
490 | } | ||
491 | if (token < Opt_last_int) { | ||
492 | ret = match_int(&argstr[0], &intval); | ||
493 | if (ret < 0) { | ||
494 | pr_err("bad mount option arg (not int) " | ||
495 | "at '%s'\n", c); | ||
496 | continue; | ||
497 | } | ||
498 | dout("got int token %d val %d\n", token, intval); | ||
499 | } else if (token > Opt_last_int && token < Opt_last_string) { | ||
500 | dout("got string token %d val %s\n", token, | ||
501 | argstr[0].from); | ||
502 | } else { | ||
503 | dout("got token %d\n", token); | ||
504 | } | ||
505 | switch (token) { | ||
506 | case Opt_ip: | ||
507 | err = ceph_parse_ips(argstr[0].from, | ||
508 | argstr[0].to, | ||
509 | &args->my_addr, | ||
510 | 1, NULL); | ||
511 | if (err < 0) | ||
512 | goto out; | ||
513 | args->flags |= CEPH_OPT_MYIP; | ||
514 | break; | ||
515 | |||
516 | case Opt_fsid: | ||
517 | err = parse_fsid(argstr[0].from, &args->fsid); | ||
518 | if (err == 0) | ||
519 | args->flags |= CEPH_OPT_FSID; | ||
520 | break; | ||
521 | case Opt_snapdirname: | ||
522 | kfree(args->snapdir_name); | ||
523 | args->snapdir_name = kstrndup(argstr[0].from, | ||
524 | argstr[0].to-argstr[0].from, | ||
525 | GFP_KERNEL); | ||
526 | break; | ||
527 | case Opt_name: | ||
528 | args->name = kstrndup(argstr[0].from, | ||
529 | argstr[0].to-argstr[0].from, | ||
530 | GFP_KERNEL); | ||
531 | break; | ||
532 | case Opt_secret: | ||
533 | args->secret = kstrndup(argstr[0].from, | ||
534 | argstr[0].to-argstr[0].from, | ||
535 | GFP_KERNEL); | ||
536 | break; | ||
537 | |||
538 | /* misc */ | ||
539 | case Opt_wsize: | ||
540 | args->wsize = intval; | ||
541 | break; | ||
542 | case Opt_rsize: | ||
543 | args->rsize = intval; | ||
544 | break; | ||
545 | case Opt_osdtimeout: | ||
546 | args->osd_timeout = intval; | ||
547 | break; | ||
548 | case Opt_osdkeepalivetimeout: | ||
549 | args->osd_keepalive_timeout = intval; | ||
550 | break; | ||
551 | case Opt_osd_idle_ttl: | ||
552 | args->osd_idle_ttl = intval; | ||
553 | break; | ||
554 | case Opt_mount_timeout: | ||
555 | args->mount_timeout = intval; | ||
556 | break; | ||
557 | case Opt_caps_wanted_delay_min: | ||
558 | args->caps_wanted_delay_min = intval; | ||
559 | break; | ||
560 | case Opt_caps_wanted_delay_max: | ||
561 | args->caps_wanted_delay_max = intval; | ||
562 | break; | ||
563 | case Opt_readdir_max_entries: | ||
564 | args->max_readdir = intval; | ||
565 | break; | ||
566 | case Opt_readdir_max_bytes: | ||
567 | args->max_readdir_bytes = intval; | ||
568 | break; | ||
569 | case Opt_congestion_kb: | ||
570 | args->congestion_kb = intval; | ||
571 | break; | ||
572 | |||
573 | case Opt_noshare: | ||
574 | args->flags |= CEPH_OPT_NOSHARE; | ||
575 | break; | ||
576 | |||
577 | case Opt_dirstat: | ||
578 | args->flags |= CEPH_OPT_DIRSTAT; | ||
579 | break; | ||
580 | case Opt_nodirstat: | ||
581 | args->flags &= ~CEPH_OPT_DIRSTAT; | ||
582 | break; | ||
583 | case Opt_rbytes: | ||
584 | args->flags |= CEPH_OPT_RBYTES; | ||
585 | break; | ||
586 | case Opt_norbytes: | ||
587 | args->flags &= ~CEPH_OPT_RBYTES; | ||
588 | break; | ||
589 | case Opt_nocrc: | ||
590 | args->flags |= CEPH_OPT_NOCRC; | ||
591 | break; | ||
592 | case Opt_noasyncreaddir: | ||
593 | args->flags |= CEPH_OPT_NOASYNCREADDIR; | ||
594 | break; | ||
595 | |||
596 | default: | ||
597 | BUG_ON(token); | ||
598 | } | ||
599 | } | ||
600 | return args; | ||
601 | 323 | ||
602 | out: | 324 | out: |
603 | kfree(args->mon_addr); | 325 | destroy_mount_options(fsopt); |
604 | kfree(args); | 326 | return err; |
605 | return ERR_PTR(err); | ||
606 | } | 327 | } |
607 | 328 | ||
608 | static void destroy_mount_args(struct ceph_mount_args *args) | 329 | /** |
330 | * ceph_show_options - Show mount options in /proc/mounts | ||
331 | * @m: seq_file to write to | ||
332 | * @mnt: mount descriptor | ||
333 | */ | ||
334 | static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
609 | { | 335 | { |
610 | dout("destroy_mount_args %p\n", args); | 336 | struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb); |
611 | kfree(args->snapdir_name); | 337 | struct ceph_mount_options *fsopt = fsc->mount_options; |
612 | args->snapdir_name = NULL; | 338 | struct ceph_options *opt = fsc->client->options; |
613 | kfree(args->name); | 339 | |
614 | args->name = NULL; | 340 | if (opt->flags & CEPH_OPT_FSID) |
615 | kfree(args->secret); | 341 | seq_printf(m, ",fsid=%pU", &opt->fsid); |
616 | args->secret = NULL; | 342 | if (opt->flags & CEPH_OPT_NOSHARE) |
617 | kfree(args); | 343 | seq_puts(m, ",noshare"); |
344 | if (opt->flags & CEPH_OPT_NOCRC) | ||
345 | seq_puts(m, ",nocrc"); | ||
346 | |||
347 | if (opt->name) | ||
348 | seq_printf(m, ",name=%s", opt->name); | ||
349 | if (opt->secret) | ||
350 | seq_puts(m, ",secret=<hidden>"); | ||
351 | |||
352 | if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
353 | seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); | ||
354 | if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
355 | seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); | ||
356 | if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
357 | seq_printf(m, ",osdtimeout=%d", opt->osd_timeout); | ||
358 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
359 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
360 | opt->osd_keepalive_timeout); | ||
361 | |||
362 | if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) | ||
363 | seq_puts(m, ",dirstat"); | ||
364 | if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0) | ||
365 | seq_puts(m, ",norbytes"); | ||
366 | if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) | ||
367 | seq_puts(m, ",noasyncreaddir"); | ||
368 | |||
369 | if (fsopt->wsize) | ||
370 | seq_printf(m, ",wsize=%d", fsopt->wsize); | ||
371 | if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
372 | seq_printf(m, ",rsize=%d", fsopt->rsize); | ||
373 | if (fsopt->congestion_kb != default_congestion_kb()) | ||
374 | seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); | ||
375 | if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
376 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
377 | fsopt->caps_wanted_delay_min); | ||
378 | if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
379 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
380 | fsopt->caps_wanted_delay_max); | ||
381 | if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
382 | seq_printf(m, ",cap_release_safety=%d", | ||
383 | fsopt->cap_release_safety); | ||
384 | if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
385 | seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); | ||
386 | if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
387 | seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); | ||
388 | if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | ||
389 | seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); | ||
390 | return 0; | ||
618 | } | 391 | } |
619 | 392 | ||
620 | /* | 393 | /* |
621 | * create a fresh client instance | 394 | * handle any mon messages the standard library doesn't understand. |
395 | * return error if we don't either. | ||
622 | */ | 396 | */ |
623 | static struct ceph_client *ceph_create_client(struct ceph_mount_args *args) | 397 | static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) |
624 | { | 398 | { |
625 | struct ceph_client *client; | 399 | struct ceph_fs_client *fsc = client->private; |
400 | int type = le16_to_cpu(msg->hdr.type); | ||
401 | |||
402 | switch (type) { | ||
403 | case CEPH_MSG_MDS_MAP: | ||
404 | ceph_mdsc_handle_map(fsc->mdsc, msg); | ||
405 | return 0; | ||
406 | |||
407 | default: | ||
408 | return -1; | ||
409 | } | ||
410 | } | ||
411 | |||
412 | /* | ||
413 | * create a new fs client | ||
414 | */ | ||
415 | struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, | ||
416 | struct ceph_options *opt) | ||
417 | { | ||
418 | struct ceph_fs_client *fsc; | ||
626 | int err = -ENOMEM; | 419 | int err = -ENOMEM; |
627 | 420 | ||
628 | client = kzalloc(sizeof(*client), GFP_KERNEL); | 421 | fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); |
629 | if (client == NULL) | 422 | if (!fsc) |
630 | return ERR_PTR(-ENOMEM); | 423 | return ERR_PTR(-ENOMEM); |
631 | 424 | ||
632 | mutex_init(&client->mount_mutex); | 425 | fsc->client = ceph_create_client(opt, fsc); |
633 | 426 | if (IS_ERR(fsc->client)) { | |
634 | init_waitqueue_head(&client->auth_wq); | 427 | err = PTR_ERR(fsc->client); |
428 | goto fail; | ||
429 | } | ||
430 | fsc->client->extra_mon_dispatch = extra_mon_dispatch; | ||
431 | fsc->client->supported_features |= CEPH_FEATURE_FLOCK; | ||
432 | fsc->client->monc.want_mdsmap = 1; | ||
635 | 433 | ||
636 | client->sb = NULL; | 434 | fsc->mount_options = fsopt; |
637 | client->mount_state = CEPH_MOUNT_MOUNTING; | ||
638 | client->mount_args = args; | ||
639 | 435 | ||
640 | client->msgr = NULL; | 436 | fsc->sb = NULL; |
437 | fsc->mount_state = CEPH_MOUNT_MOUNTING; | ||
641 | 438 | ||
642 | client->auth_err = 0; | 439 | atomic_long_set(&fsc->writeback_count, 0); |
643 | atomic_long_set(&client->writeback_count, 0); | ||
644 | 440 | ||
645 | err = bdi_init(&client->backing_dev_info); | 441 | err = bdi_init(&fsc->backing_dev_info); |
646 | if (err < 0) | 442 | if (err < 0) |
647 | goto fail; | 443 | goto fail_client; |
648 | 444 | ||
649 | err = -ENOMEM; | 445 | err = -ENOMEM; |
650 | client->wb_wq = create_workqueue("ceph-writeback"); | 446 | fsc->wb_wq = create_workqueue("ceph-writeback"); |
651 | if (client->wb_wq == NULL) | 447 | if (fsc->wb_wq == NULL) |
652 | goto fail_bdi; | 448 | goto fail_bdi; |
653 | client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); | 449 | fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); |
654 | if (client->pg_inv_wq == NULL) | 450 | if (fsc->pg_inv_wq == NULL) |
655 | goto fail_wb_wq; | 451 | goto fail_wb_wq; |
656 | client->trunc_wq = create_singlethread_workqueue("ceph-trunc"); | 452 | fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc"); |
657 | if (client->trunc_wq == NULL) | 453 | if (fsc->trunc_wq == NULL) |
658 | goto fail_pg_inv_wq; | 454 | goto fail_pg_inv_wq; |
659 | 455 | ||
660 | /* set up mempools */ | 456 | /* set up mempools */ |
661 | err = -ENOMEM; | 457 | err = -ENOMEM; |
662 | client->wb_pagevec_pool = mempool_create_kmalloc_pool(10, | 458 | fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, |
663 | client->mount_args->wsize >> PAGE_CACHE_SHIFT); | 459 | fsc->mount_options->wsize >> PAGE_CACHE_SHIFT); |
664 | if (!client->wb_pagevec_pool) | 460 | if (!fsc->wb_pagevec_pool) |
665 | goto fail_trunc_wq; | 461 | goto fail_trunc_wq; |
666 | 462 | ||
667 | /* caps */ | 463 | /* caps */ |
668 | client->min_caps = args->max_readdir; | 464 | fsc->min_caps = fsopt->max_readdir; |
465 | |||
466 | return fsc; | ||
669 | 467 | ||
670 | /* subsystems */ | ||
671 | err = ceph_monc_init(&client->monc, client); | ||
672 | if (err < 0) | ||
673 | goto fail_mempool; | ||
674 | err = ceph_osdc_init(&client->osdc, client); | ||
675 | if (err < 0) | ||
676 | goto fail_monc; | ||
677 | err = ceph_mdsc_init(&client->mdsc, client); | ||
678 | if (err < 0) | ||
679 | goto fail_osdc; | ||
680 | return client; | ||
681 | |||
682 | fail_osdc: | ||
683 | ceph_osdc_stop(&client->osdc); | ||
684 | fail_monc: | ||
685 | ceph_monc_stop(&client->monc); | ||
686 | fail_mempool: | ||
687 | mempool_destroy(client->wb_pagevec_pool); | ||
688 | fail_trunc_wq: | 468 | fail_trunc_wq: |
689 | destroy_workqueue(client->trunc_wq); | 469 | destroy_workqueue(fsc->trunc_wq); |
690 | fail_pg_inv_wq: | 470 | fail_pg_inv_wq: |
691 | destroy_workqueue(client->pg_inv_wq); | 471 | destroy_workqueue(fsc->pg_inv_wq); |
692 | fail_wb_wq: | 472 | fail_wb_wq: |
693 | destroy_workqueue(client->wb_wq); | 473 | destroy_workqueue(fsc->wb_wq); |
694 | fail_bdi: | 474 | fail_bdi: |
695 | bdi_destroy(&client->backing_dev_info); | 475 | bdi_destroy(&fsc->backing_dev_info); |
476 | fail_client: | ||
477 | ceph_destroy_client(fsc->client); | ||
696 | fail: | 478 | fail: |
697 | kfree(client); | 479 | kfree(fsc); |
698 | return ERR_PTR(err); | 480 | return ERR_PTR(err); |
699 | } | 481 | } |
700 | 482 | ||
701 | static void ceph_destroy_client(struct ceph_client *client) | 483 | void destroy_fs_client(struct ceph_fs_client *fsc) |
702 | { | 484 | { |
703 | dout("destroy_client %p\n", client); | 485 | dout("destroy_fs_client %p\n", fsc); |
704 | 486 | ||
705 | /* unmount */ | 487 | destroy_workqueue(fsc->wb_wq); |
706 | ceph_mdsc_stop(&client->mdsc); | 488 | destroy_workqueue(fsc->pg_inv_wq); |
707 | ceph_osdc_stop(&client->osdc); | 489 | destroy_workqueue(fsc->trunc_wq); |
708 | 490 | ||
709 | /* | 491 | bdi_destroy(&fsc->backing_dev_info); |
710 | * make sure mds and osd connections close out before destroying | ||
711 | * the auth module, which is needed to free those connections' | ||
712 | * ceph_authorizers. | ||
713 | */ | ||
714 | ceph_msgr_flush(); | ||
715 | |||
716 | ceph_monc_stop(&client->monc); | ||
717 | 492 | ||
718 | ceph_debugfs_client_cleanup(client); | 493 | mempool_destroy(fsc->wb_pagevec_pool); |
719 | destroy_workqueue(client->wb_wq); | ||
720 | destroy_workqueue(client->pg_inv_wq); | ||
721 | destroy_workqueue(client->trunc_wq); | ||
722 | 494 | ||
723 | bdi_destroy(&client->backing_dev_info); | 495 | destroy_mount_options(fsc->mount_options); |
724 | 496 | ||
725 | if (client->msgr) | 497 | ceph_fs_debugfs_cleanup(fsc); |
726 | ceph_messenger_destroy(client->msgr); | ||
727 | mempool_destroy(client->wb_pagevec_pool); | ||
728 | 498 | ||
729 | destroy_mount_args(client->mount_args); | 499 | ceph_destroy_client(fsc->client); |
730 | 500 | ||
731 | kfree(client); | 501 | kfree(fsc); |
732 | dout("destroy_client %p done\n", client); | 502 | dout("destroy_fs_client %p done\n", fsc); |
733 | } | 503 | } |
734 | 504 | ||
735 | /* | 505 | /* |
736 | * Initially learn our fsid, or verify an fsid matches. | 506 | * caches |
737 | */ | 507 | */ |
738 | int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | 508 | struct kmem_cache *ceph_inode_cachep; |
509 | struct kmem_cache *ceph_cap_cachep; | ||
510 | struct kmem_cache *ceph_dentry_cachep; | ||
511 | struct kmem_cache *ceph_file_cachep; | ||
512 | |||
513 | static void ceph_inode_init_once(void *foo) | ||
739 | { | 514 | { |
740 | if (client->have_fsid) { | 515 | struct ceph_inode_info *ci = foo; |
741 | if (ceph_fsid_compare(&client->fsid, fsid)) { | 516 | inode_init_once(&ci->vfs_inode); |
742 | pr_err("bad fsid, had %pU got %pU", | 517 | } |
743 | &client->fsid, fsid); | 518 | |
744 | return -1; | 519 | static int __init init_caches(void) |
745 | } | 520 | { |
746 | } else { | 521 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
747 | pr_info("client%lld fsid %pU\n", client->monc.auth->global_id, | 522 | sizeof(struct ceph_inode_info), |
748 | fsid); | 523 | __alignof__(struct ceph_inode_info), |
749 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | 524 | (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), |
750 | ceph_debugfs_client_init(client); | 525 | ceph_inode_init_once); |
751 | client->have_fsid = true; | 526 | if (ceph_inode_cachep == NULL) |
752 | } | 527 | return -ENOMEM; |
528 | |||
529 | ceph_cap_cachep = KMEM_CACHE(ceph_cap, | ||
530 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
531 | if (ceph_cap_cachep == NULL) | ||
532 | goto bad_cap; | ||
533 | |||
534 | ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, | ||
535 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
536 | if (ceph_dentry_cachep == NULL) | ||
537 | goto bad_dentry; | ||
538 | |||
539 | ceph_file_cachep = KMEM_CACHE(ceph_file_info, | ||
540 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
541 | if (ceph_file_cachep == NULL) | ||
542 | goto bad_file; | ||
543 | |||
753 | return 0; | 544 | return 0; |
545 | |||
546 | bad_file: | ||
547 | kmem_cache_destroy(ceph_dentry_cachep); | ||
548 | bad_dentry: | ||
549 | kmem_cache_destroy(ceph_cap_cachep); | ||
550 | bad_cap: | ||
551 | kmem_cache_destroy(ceph_inode_cachep); | ||
552 | return -ENOMEM; | ||
754 | } | 553 | } |
755 | 554 | ||
555 | static void destroy_caches(void) | ||
556 | { | ||
557 | kmem_cache_destroy(ceph_inode_cachep); | ||
558 | kmem_cache_destroy(ceph_cap_cachep); | ||
559 | kmem_cache_destroy(ceph_dentry_cachep); | ||
560 | kmem_cache_destroy(ceph_file_cachep); | ||
561 | } | ||
562 | |||
563 | |||
756 | /* | 564 | /* |
757 | * true if we have the mon map (and have thus joined the cluster) | 565 | * ceph_umount_begin - initiate forced umount. Tear down down the |
566 | * mount, skipping steps that may hang while waiting for server(s). | ||
758 | */ | 567 | */ |
759 | static int have_mon_and_osd_map(struct ceph_client *client) | 568 | static void ceph_umount_begin(struct super_block *sb) |
760 | { | 569 | { |
761 | return client->monc.monmap && client->monc.monmap->epoch && | 570 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
762 | client->osdc.osdmap && client->osdc.osdmap->epoch; | 571 | |
572 | dout("ceph_umount_begin - starting forced umount\n"); | ||
573 | if (!fsc) | ||
574 | return; | ||
575 | fsc->mount_state = CEPH_MOUNT_SHUTDOWN; | ||
576 | return; | ||
763 | } | 577 | } |
764 | 578 | ||
579 | static const struct super_operations ceph_super_ops = { | ||
580 | .alloc_inode = ceph_alloc_inode, | ||
581 | .destroy_inode = ceph_destroy_inode, | ||
582 | .write_inode = ceph_write_inode, | ||
583 | .sync_fs = ceph_sync_fs, | ||
584 | .put_super = ceph_put_super, | ||
585 | .show_options = ceph_show_options, | ||
586 | .statfs = ceph_statfs, | ||
587 | .umount_begin = ceph_umount_begin, | ||
588 | }; | ||
589 | |||
765 | /* | 590 | /* |
766 | * Bootstrap mount by opening the root directory. Note the mount | 591 | * Bootstrap mount by opening the root directory. Note the mount |
767 | * @started time from caller, and time out if this takes too long. | 592 | * @started time from caller, and time out if this takes too long. |
768 | */ | 593 | */ |
769 | static struct dentry *open_root_dentry(struct ceph_client *client, | 594 | static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, |
770 | const char *path, | 595 | const char *path, |
771 | unsigned long started) | 596 | unsigned long started) |
772 | { | 597 | { |
773 | struct ceph_mds_client *mdsc = &client->mdsc; | 598 | struct ceph_mds_client *mdsc = fsc->mdsc; |
774 | struct ceph_mds_request *req = NULL; | 599 | struct ceph_mds_request *req = NULL; |
775 | int err; | 600 | int err; |
776 | struct dentry *root; | 601 | struct dentry *root; |
@@ -784,14 +609,14 @@ static struct dentry *open_root_dentry(struct ceph_client *client, | |||
784 | req->r_ino1.ino = CEPH_INO_ROOT; | 609 | req->r_ino1.ino = CEPH_INO_ROOT; |
785 | req->r_ino1.snap = CEPH_NOSNAP; | 610 | req->r_ino1.snap = CEPH_NOSNAP; |
786 | req->r_started = started; | 611 | req->r_started = started; |
787 | req->r_timeout = client->mount_args->mount_timeout * HZ; | 612 | req->r_timeout = fsc->client->options->mount_timeout * HZ; |
788 | req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); | 613 | req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); |
789 | req->r_num_caps = 2; | 614 | req->r_num_caps = 2; |
790 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 615 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
791 | if (err == 0) { | 616 | if (err == 0) { |
792 | dout("open_root_inode success\n"); | 617 | dout("open_root_inode success\n"); |
793 | if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && | 618 | if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && |
794 | client->sb->s_root == NULL) | 619 | fsc->sb->s_root == NULL) |
795 | root = d_alloc_root(req->r_target_inode); | 620 | root = d_alloc_root(req->r_target_inode); |
796 | else | 621 | else |
797 | root = d_obtain_alias(req->r_target_inode); | 622 | root = d_obtain_alias(req->r_target_inode); |
@@ -804,105 +629,86 @@ static struct dentry *open_root_dentry(struct ceph_client *client, | |||
804 | return root; | 629 | return root; |
805 | } | 630 | } |
806 | 631 | ||
632 | |||
633 | |||
634 | |||
807 | /* | 635 | /* |
808 | * mount: join the ceph cluster, and open root directory. | 636 | * mount: join the ceph cluster, and open root directory. |
809 | */ | 637 | */ |
810 | static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | 638 | static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt, |
811 | const char *path) | 639 | const char *path) |
812 | { | 640 | { |
813 | struct ceph_entity_addr *myaddr = NULL; | ||
814 | int err; | 641 | int err; |
815 | unsigned long timeout = client->mount_args->mount_timeout * HZ; | ||
816 | unsigned long started = jiffies; /* note the start time */ | 642 | unsigned long started = jiffies; /* note the start time */ |
817 | struct dentry *root; | 643 | struct dentry *root; |
644 | int first = 0; /* first vfsmount for this super_block */ | ||
818 | 645 | ||
819 | dout("mount start\n"); | 646 | dout("mount start\n"); |
820 | mutex_lock(&client->mount_mutex); | 647 | mutex_lock(&fsc->client->mount_mutex); |
821 | |||
822 | /* initialize the messenger */ | ||
823 | if (client->msgr == NULL) { | ||
824 | if (ceph_test_opt(client, MYIP)) | ||
825 | myaddr = &client->mount_args->my_addr; | ||
826 | client->msgr = ceph_messenger_create(myaddr); | ||
827 | if (IS_ERR(client->msgr)) { | ||
828 | err = PTR_ERR(client->msgr); | ||
829 | client->msgr = NULL; | ||
830 | goto out; | ||
831 | } | ||
832 | client->msgr->nocrc = ceph_test_opt(client, NOCRC); | ||
833 | } | ||
834 | 648 | ||
835 | /* open session, and wait for mon, mds, and osd maps */ | 649 | err = __ceph_open_session(fsc->client, started); |
836 | err = ceph_monc_open_session(&client->monc); | ||
837 | if (err < 0) | 650 | if (err < 0) |
838 | goto out; | 651 | goto out; |
839 | 652 | ||
840 | while (!have_mon_and_osd_map(client)) { | ||
841 | err = -EIO; | ||
842 | if (timeout && time_after_eq(jiffies, started + timeout)) | ||
843 | goto out; | ||
844 | |||
845 | /* wait */ | ||
846 | dout("mount waiting for mon_map\n"); | ||
847 | err = wait_event_interruptible_timeout(client->auth_wq, | ||
848 | have_mon_and_osd_map(client) || (client->auth_err < 0), | ||
849 | timeout); | ||
850 | if (err == -EINTR || err == -ERESTARTSYS) | ||
851 | goto out; | ||
852 | if (client->auth_err < 0) { | ||
853 | err = client->auth_err; | ||
854 | goto out; | ||
855 | } | ||
856 | } | ||
857 | |||
858 | dout("mount opening root\n"); | 653 | dout("mount opening root\n"); |
859 | root = open_root_dentry(client, "", started); | 654 | root = open_root_dentry(fsc, "", started); |
860 | if (IS_ERR(root)) { | 655 | if (IS_ERR(root)) { |
861 | err = PTR_ERR(root); | 656 | err = PTR_ERR(root); |
862 | goto out; | 657 | goto out; |
863 | } | 658 | } |
864 | if (client->sb->s_root) | 659 | if (fsc->sb->s_root) { |
865 | dput(root); | 660 | dput(root); |
866 | else | 661 | } else { |
867 | client->sb->s_root = root; | 662 | fsc->sb->s_root = root; |
663 | first = 1; | ||
664 | |||
665 | err = ceph_fs_debugfs_init(fsc); | ||
666 | if (err < 0) | ||
667 | goto fail; | ||
668 | } | ||
868 | 669 | ||
869 | if (path[0] == 0) { | 670 | if (path[0] == 0) { |
870 | dget(root); | 671 | dget(root); |
871 | } else { | 672 | } else { |
872 | dout("mount opening base mountpoint\n"); | 673 | dout("mount opening base mountpoint\n"); |
873 | root = open_root_dentry(client, path, started); | 674 | root = open_root_dentry(fsc, path, started); |
874 | if (IS_ERR(root)) { | 675 | if (IS_ERR(root)) { |
875 | err = PTR_ERR(root); | 676 | err = PTR_ERR(root); |
876 | dput(client->sb->s_root); | 677 | goto fail; |
877 | client->sb->s_root = NULL; | ||
878 | goto out; | ||
879 | } | 678 | } |
880 | } | 679 | } |
881 | 680 | ||
882 | mnt->mnt_root = root; | 681 | mnt->mnt_root = root; |
883 | mnt->mnt_sb = client->sb; | 682 | mnt->mnt_sb = fsc->sb; |
884 | 683 | ||
885 | client->mount_state = CEPH_MOUNT_MOUNTED; | 684 | fsc->mount_state = CEPH_MOUNT_MOUNTED; |
886 | dout("mount success\n"); | 685 | dout("mount success\n"); |
887 | err = 0; | 686 | err = 0; |
888 | 687 | ||
889 | out: | 688 | out: |
890 | mutex_unlock(&client->mount_mutex); | 689 | mutex_unlock(&fsc->client->mount_mutex); |
891 | return err; | 690 | return err; |
691 | |||
692 | fail: | ||
693 | if (first) { | ||
694 | dput(fsc->sb->s_root); | ||
695 | fsc->sb->s_root = NULL; | ||
696 | } | ||
697 | goto out; | ||
892 | } | 698 | } |
893 | 699 | ||
894 | static int ceph_set_super(struct super_block *s, void *data) | 700 | static int ceph_set_super(struct super_block *s, void *data) |
895 | { | 701 | { |
896 | struct ceph_client *client = data; | 702 | struct ceph_fs_client *fsc = data; |
897 | int ret; | 703 | int ret; |
898 | 704 | ||
899 | dout("set_super %p data %p\n", s, data); | 705 | dout("set_super %p data %p\n", s, data); |
900 | 706 | ||
901 | s->s_flags = client->mount_args->sb_flags; | 707 | s->s_flags = fsc->mount_options->sb_flags; |
902 | s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ | 708 | s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ |
903 | 709 | ||
904 | s->s_fs_info = client; | 710 | s->s_fs_info = fsc; |
905 | client->sb = s; | 711 | fsc->sb = s; |
906 | 712 | ||
907 | s->s_op = &ceph_super_ops; | 713 | s->s_op = &ceph_super_ops; |
908 | s->s_export_op = &ceph_export_ops; | 714 | s->s_export_op = &ceph_export_ops; |
@@ -917,7 +723,7 @@ static int ceph_set_super(struct super_block *s, void *data) | |||
917 | 723 | ||
918 | fail: | 724 | fail: |
919 | s->s_fs_info = NULL; | 725 | s->s_fs_info = NULL; |
920 | client->sb = NULL; | 726 | fsc->sb = NULL; |
921 | return ret; | 727 | return ret; |
922 | } | 728 | } |
923 | 729 | ||
@@ -926,30 +732,23 @@ fail: | |||
926 | */ | 732 | */ |
927 | static int ceph_compare_super(struct super_block *sb, void *data) | 733 | static int ceph_compare_super(struct super_block *sb, void *data) |
928 | { | 734 | { |
929 | struct ceph_client *new = data; | 735 | struct ceph_fs_client *new = data; |
930 | struct ceph_mount_args *args = new->mount_args; | 736 | struct ceph_mount_options *fsopt = new->mount_options; |
931 | struct ceph_client *other = ceph_sb_to_client(sb); | 737 | struct ceph_options *opt = new->client->options; |
932 | int i; | 738 | struct ceph_fs_client *other = ceph_sb_to_client(sb); |
933 | 739 | ||
934 | dout("ceph_compare_super %p\n", sb); | 740 | dout("ceph_compare_super %p\n", sb); |
935 | if (args->flags & CEPH_OPT_FSID) { | 741 | |
936 | if (ceph_fsid_compare(&args->fsid, &other->fsid)) { | 742 | if (compare_mount_options(fsopt, opt, other)) { |
937 | dout("fsid doesn't match\n"); | 743 | dout("monitor(s)/mount options don't match\n"); |
938 | return 0; | 744 | return 0; |
939 | } | ||
940 | } else { | ||
941 | /* do we share (a) monitor? */ | ||
942 | for (i = 0; i < new->monc.monmap->num_mon; i++) | ||
943 | if (ceph_monmap_contains(other->monc.monmap, | ||
944 | &new->monc.monmap->mon_inst[i].addr)) | ||
945 | break; | ||
946 | if (i == new->monc.monmap->num_mon) { | ||
947 | dout("mon ip not part of monmap\n"); | ||
948 | return 0; | ||
949 | } | ||
950 | dout("mon ip matches existing sb %p\n", sb); | ||
951 | } | 745 | } |
952 | if (args->sb_flags != other->mount_args->sb_flags) { | 746 | if ((opt->flags & CEPH_OPT_FSID) && |
747 | ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { | ||
748 | dout("fsid doesn't match\n"); | ||
749 | return 0; | ||
750 | } | ||
751 | if (fsopt->sb_flags != other->mount_options->sb_flags) { | ||
953 | dout("flags differ\n"); | 752 | dout("flags differ\n"); |
954 | return 0; | 753 | return 0; |
955 | } | 754 | } |
@@ -961,19 +760,20 @@ static int ceph_compare_super(struct super_block *sb, void *data) | |||
961 | */ | 760 | */ |
962 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); | 761 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); |
963 | 762 | ||
964 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | 763 | static int ceph_register_bdi(struct super_block *sb, |
764 | struct ceph_fs_client *fsc) | ||
965 | { | 765 | { |
966 | int err; | 766 | int err; |
967 | 767 | ||
968 | /* set ra_pages based on rsize mount option? */ | 768 | /* set ra_pages based on rsize mount option? */ |
969 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) | 769 | if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE) |
970 | client->backing_dev_info.ra_pages = | 770 | fsc->backing_dev_info.ra_pages = |
971 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 771 | (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1) |
972 | >> PAGE_SHIFT; | 772 | >> PAGE_SHIFT; |
973 | err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", | 773 | err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", |
974 | atomic_long_inc_return(&bdi_seq)); | 774 | atomic_long_inc_return(&bdi_seq)); |
975 | if (!err) | 775 | if (!err) |
976 | sb->s_bdi = &client->backing_dev_info; | 776 | sb->s_bdi = &fsc->backing_dev_info; |
977 | return err; | 777 | return err; |
978 | } | 778 | } |
979 | 779 | ||
@@ -982,46 +782,52 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
982 | struct vfsmount *mnt) | 782 | struct vfsmount *mnt) |
983 | { | 783 | { |
984 | struct super_block *sb; | 784 | struct super_block *sb; |
985 | struct ceph_client *client; | 785 | struct ceph_fs_client *fsc; |
986 | int err; | 786 | int err; |
987 | int (*compare_super)(struct super_block *, void *) = ceph_compare_super; | 787 | int (*compare_super)(struct super_block *, void *) = ceph_compare_super; |
988 | const char *path = NULL; | 788 | const char *path = NULL; |
989 | struct ceph_mount_args *args; | 789 | struct ceph_mount_options *fsopt = NULL; |
790 | struct ceph_options *opt = NULL; | ||
990 | 791 | ||
991 | dout("ceph_get_sb\n"); | 792 | dout("ceph_get_sb\n"); |
992 | args = parse_mount_args(flags, data, dev_name, &path); | 793 | err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path); |
993 | if (IS_ERR(args)) { | 794 | if (err < 0) |
994 | err = PTR_ERR(args); | ||
995 | goto out_final; | 795 | goto out_final; |
996 | } | ||
997 | 796 | ||
998 | /* create client (which we may/may not use) */ | 797 | /* create client (which we may/may not use) */ |
999 | client = ceph_create_client(args); | 798 | fsc = create_fs_client(fsopt, opt); |
1000 | if (IS_ERR(client)) { | 799 | if (IS_ERR(fsc)) { |
1001 | err = PTR_ERR(client); | 800 | err = PTR_ERR(fsc); |
801 | kfree(fsopt); | ||
802 | kfree(opt); | ||
1002 | goto out_final; | 803 | goto out_final; |
1003 | } | 804 | } |
1004 | 805 | ||
1005 | if (client->mount_args->flags & CEPH_OPT_NOSHARE) | 806 | err = ceph_mdsc_init(fsc); |
807 | if (err < 0) | ||
808 | goto out; | ||
809 | |||
810 | if (ceph_test_opt(fsc->client, NOSHARE)) | ||
1006 | compare_super = NULL; | 811 | compare_super = NULL; |
1007 | sb = sget(fs_type, compare_super, ceph_set_super, client); | 812 | sb = sget(fs_type, compare_super, ceph_set_super, fsc); |
1008 | if (IS_ERR(sb)) { | 813 | if (IS_ERR(sb)) { |
1009 | err = PTR_ERR(sb); | 814 | err = PTR_ERR(sb); |
1010 | goto out; | 815 | goto out; |
1011 | } | 816 | } |
1012 | 817 | ||
1013 | if (ceph_sb_to_client(sb) != client) { | 818 | if (ceph_sb_to_client(sb) != fsc) { |
1014 | ceph_destroy_client(client); | 819 | ceph_mdsc_destroy(fsc); |
1015 | client = ceph_sb_to_client(sb); | 820 | destroy_fs_client(fsc); |
1016 | dout("get_sb got existing client %p\n", client); | 821 | fsc = ceph_sb_to_client(sb); |
822 | dout("get_sb got existing client %p\n", fsc); | ||
1017 | } else { | 823 | } else { |
1018 | dout("get_sb using new client %p\n", client); | 824 | dout("get_sb using new client %p\n", fsc); |
1019 | err = ceph_register_bdi(sb, client); | 825 | err = ceph_register_bdi(sb, fsc); |
1020 | if (err < 0) | 826 | if (err < 0) |
1021 | goto out_splat; | 827 | goto out_splat; |
1022 | } | 828 | } |
1023 | 829 | ||
1024 | err = ceph_mount(client, mnt, path); | 830 | err = ceph_mount(fsc, mnt, path); |
1025 | if (err < 0) | 831 | if (err < 0) |
1026 | goto out_splat; | 832 | goto out_splat; |
1027 | dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, | 833 | dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, |
@@ -1029,12 +835,13 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
1029 | return 0; | 835 | return 0; |
1030 | 836 | ||
1031 | out_splat: | 837 | out_splat: |
1032 | ceph_mdsc_close_sessions(&client->mdsc); | 838 | ceph_mdsc_close_sessions(fsc->mdsc); |
1033 | deactivate_locked_super(sb); | 839 | deactivate_locked_super(sb); |
1034 | goto out_final; | 840 | goto out_final; |
1035 | 841 | ||
1036 | out: | 842 | out: |
1037 | ceph_destroy_client(client); | 843 | ceph_mdsc_destroy(fsc); |
844 | destroy_fs_client(fsc); | ||
1038 | out_final: | 845 | out_final: |
1039 | dout("ceph_get_sb fail %d\n", err); | 846 | dout("ceph_get_sb fail %d\n", err); |
1040 | return err; | 847 | return err; |
@@ -1042,11 +849,12 @@ out_final: | |||
1042 | 849 | ||
1043 | static void ceph_kill_sb(struct super_block *s) | 850 | static void ceph_kill_sb(struct super_block *s) |
1044 | { | 851 | { |
1045 | struct ceph_client *client = ceph_sb_to_client(s); | 852 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); |
1046 | dout("kill_sb %p\n", s); | 853 | dout("kill_sb %p\n", s); |
1047 | ceph_mdsc_pre_umount(&client->mdsc); | 854 | ceph_mdsc_pre_umount(fsc->mdsc); |
1048 | kill_anon_super(s); /* will call put_super after sb is r/o */ | 855 | kill_anon_super(s); /* will call put_super after sb is r/o */ |
1049 | ceph_destroy_client(client); | 856 | ceph_mdsc_destroy(fsc); |
857 | destroy_fs_client(fsc); | ||
1050 | } | 858 | } |
1051 | 859 | ||
1052 | static struct file_system_type ceph_fs_type = { | 860 | static struct file_system_type ceph_fs_type = { |
@@ -1062,36 +870,20 @@ static struct file_system_type ceph_fs_type = { | |||
1062 | 870 | ||
1063 | static int __init init_ceph(void) | 871 | static int __init init_ceph(void) |
1064 | { | 872 | { |
1065 | int ret = 0; | 873 | int ret = init_caches(); |
1066 | |||
1067 | ret = ceph_debugfs_init(); | ||
1068 | if (ret < 0) | ||
1069 | goto out; | ||
1070 | |||
1071 | ret = ceph_msgr_init(); | ||
1072 | if (ret < 0) | ||
1073 | goto out_debugfs; | ||
1074 | |||
1075 | ret = init_caches(); | ||
1076 | if (ret) | 874 | if (ret) |
1077 | goto out_msgr; | 875 | goto out; |
1078 | 876 | ||
1079 | ret = register_filesystem(&ceph_fs_type); | 877 | ret = register_filesystem(&ceph_fs_type); |
1080 | if (ret) | 878 | if (ret) |
1081 | goto out_icache; | 879 | goto out_icache; |
1082 | 880 | ||
1083 | pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n", | 881 | pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); |
1084 | CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL, | 882 | |
1085 | CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, | ||
1086 | CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); | ||
1087 | return 0; | 883 | return 0; |
1088 | 884 | ||
1089 | out_icache: | 885 | out_icache: |
1090 | destroy_caches(); | 886 | destroy_caches(); |
1091 | out_msgr: | ||
1092 | ceph_msgr_exit(); | ||
1093 | out_debugfs: | ||
1094 | ceph_debugfs_cleanup(); | ||
1095 | out: | 887 | out: |
1096 | return ret; | 888 | return ret; |
1097 | } | 889 | } |
@@ -1101,8 +893,6 @@ static void __exit exit_ceph(void) | |||
1101 | dout("exit_ceph\n"); | 893 | dout("exit_ceph\n"); |
1102 | unregister_filesystem(&ceph_fs_type); | 894 | unregister_filesystem(&ceph_fs_type); |
1103 | destroy_caches(); | 895 | destroy_caches(); |
1104 | ceph_msgr_exit(); | ||
1105 | ceph_debugfs_cleanup(); | ||
1106 | } | 896 | } |
1107 | 897 | ||
1108 | module_init(init_ceph); | 898 | module_init(init_ceph); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b87638e84c4b..1886294e12f7 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _FS_CEPH_SUPER_H | 1 | #ifndef _FS_CEPH_SUPER_H |
2 | #define _FS_CEPH_SUPER_H | 2 | #define _FS_CEPH_SUPER_H |
3 | 3 | ||
4 | #include "ceph_debug.h" | 4 | #include <linux/ceph/ceph_debug.h> |
5 | 5 | ||
6 | #include <asm/unaligned.h> | 6 | #include <asm/unaligned.h> |
7 | #include <linux/backing-dev.h> | 7 | #include <linux/backing-dev.h> |
@@ -14,13 +14,7 @@ | |||
14 | #include <linux/writeback.h> | 14 | #include <linux/writeback.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | 16 | ||
17 | #include "types.h" | 17 | #include <linux/ceph/libceph.h> |
18 | #include "messenger.h" | ||
19 | #include "msgpool.h" | ||
20 | #include "mon_client.h" | ||
21 | #include "mds_client.h" | ||
22 | #include "osd_client.h" | ||
23 | #include "ceph_fs.h" | ||
24 | 18 | ||
25 | /* f_type in struct statfs */ | 19 | /* f_type in struct statfs */ |
26 | #define CEPH_SUPER_MAGIC 0x00c36400 | 20 | #define CEPH_SUPER_MAGIC 0x00c36400 |
@@ -30,42 +24,25 @@ | |||
30 | #define CEPH_BLOCK_SHIFT 20 /* 1 MB */ | 24 | #define CEPH_BLOCK_SHIFT 20 /* 1 MB */ |
31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) | 25 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) |
32 | 26 | ||
33 | /* | 27 | #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ |
34 | * Supported features | 28 | #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ |
35 | */ | 29 | #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ |
36 | #define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK | ||
37 | #define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR | ||
38 | 30 | ||
39 | /* | 31 | #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) |
40 | * mount options | ||
41 | */ | ||
42 | #define CEPH_OPT_FSID (1<<0) | ||
43 | #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ | ||
44 | #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ | ||
45 | #define CEPH_OPT_DIRSTAT (1<<4) /* funky `cat dirname` for stats */ | ||
46 | #define CEPH_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ | ||
47 | #define CEPH_OPT_NOCRC (1<<6) /* no data crc on writes */ | ||
48 | #define CEPH_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ | ||
49 | 32 | ||
50 | #define CEPH_OPT_DEFAULT (CEPH_OPT_RBYTES) | 33 | #define ceph_set_mount_opt(fsc, opt) \ |
34 | (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; | ||
35 | #define ceph_test_mount_opt(fsc, opt) \ | ||
36 | (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) | ||
51 | 37 | ||
52 | #define ceph_set_opt(client, opt) \ | 38 | #define CEPH_MAX_READDIR_DEFAULT 1024 |
53 | (client)->mount_args->flags |= CEPH_OPT_##opt; | 39 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) |
54 | #define ceph_test_opt(client, opt) \ | 40 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" |
55 | (!!((client)->mount_args->flags & CEPH_OPT_##opt)) | ||
56 | 41 | ||
57 | 42 | struct ceph_mount_options { | |
58 | struct ceph_mount_args { | ||
59 | int sb_flags; | ||
60 | int flags; | 43 | int flags; |
61 | struct ceph_fsid fsid; | 44 | int sb_flags; |
62 | struct ceph_entity_addr my_addr; | 45 | |
63 | int num_mon; | ||
64 | struct ceph_entity_addr *mon_addr; | ||
65 | int mount_timeout; | ||
66 | int osd_idle_ttl; | ||
67 | int osd_timeout; | ||
68 | int osd_keepalive_timeout; | ||
69 | int wsize; | 46 | int wsize; |
70 | int rsize; /* max readahead */ | 47 | int rsize; /* max readahead */ |
71 | int congestion_kb; /* max writeback in flight */ | 48 | int congestion_kb; /* max writeback in flight */ |
@@ -73,82 +50,25 @@ struct ceph_mount_args { | |||
73 | int cap_release_safety; | 50 | int cap_release_safety; |
74 | int max_readdir; /* max readdir result (entires) */ | 51 | int max_readdir; /* max readdir result (entires) */ |
75 | int max_readdir_bytes; /* max readdir result (bytes) */ | 52 | int max_readdir_bytes; /* max readdir result (bytes) */ |
76 | char *snapdir_name; /* default ".snap" */ | ||
77 | char *name; | ||
78 | char *secret; | ||
79 | }; | ||
80 | 53 | ||
81 | /* | 54 | /* |
82 | * defaults | 55 | * everything above this point can be memcmp'd; everything below |
83 | */ | 56 | * is handled in compare_mount_options() |
84 | #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 | 57 | */ |
85 | #define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ | ||
86 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | ||
87 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | ||
88 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | ||
89 | #define CEPH_MAX_READDIR_DEFAULT 1024 | ||
90 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) | ||
91 | |||
92 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | ||
93 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | ||
94 | |||
95 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" | ||
96 | #define CEPH_AUTH_NAME_DEFAULT "guest" | ||
97 | /* | ||
98 | * Delay telling the MDS we no longer want caps, in case we reopen | ||
99 | * the file. Delay a minimum amount of time, even if we send a cap | ||
100 | * message for some other reason. Otherwise, take the oppotunity to | ||
101 | * update the mds to avoid sending another message later. | ||
102 | */ | ||
103 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | ||
104 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | ||
105 | |||
106 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
107 | |||
108 | /* mount state */ | ||
109 | enum { | ||
110 | CEPH_MOUNT_MOUNTING, | ||
111 | CEPH_MOUNT_MOUNTED, | ||
112 | CEPH_MOUNT_UNMOUNTING, | ||
113 | CEPH_MOUNT_UNMOUNTED, | ||
114 | CEPH_MOUNT_SHUTDOWN, | ||
115 | }; | ||
116 | |||
117 | /* | ||
118 | * subtract jiffies | ||
119 | */ | ||
120 | static inline unsigned long time_sub(unsigned long a, unsigned long b) | ||
121 | { | ||
122 | BUG_ON(time_after(b, a)); | ||
123 | return (long)a - (long)b; | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * per-filesystem client state | ||
128 | * | ||
129 | * possibly shared by multiple mount points, if they are | ||
130 | * mounting the same ceph filesystem/cluster. | ||
131 | */ | ||
132 | struct ceph_client { | ||
133 | struct ceph_fsid fsid; | ||
134 | bool have_fsid; | ||
135 | 58 | ||
136 | struct mutex mount_mutex; /* serialize mount attempts */ | 59 | char *snapdir_name; /* default ".snap" */ |
137 | struct ceph_mount_args *mount_args; | 60 | }; |
138 | 61 | ||
62 | struct ceph_fs_client { | ||
139 | struct super_block *sb; | 63 | struct super_block *sb; |
140 | 64 | ||
141 | unsigned long mount_state; | 65 | struct ceph_mount_options *mount_options; |
142 | wait_queue_head_t auth_wq; | 66 | struct ceph_client *client; |
143 | |||
144 | int auth_err; | ||
145 | 67 | ||
68 | unsigned long mount_state; | ||
146 | int min_caps; /* min caps i added */ | 69 | int min_caps; /* min caps i added */ |
147 | 70 | ||
148 | struct ceph_messenger *msgr; /* messenger instance */ | 71 | struct ceph_mds_client *mdsc; |
149 | struct ceph_mon_client monc; | ||
150 | struct ceph_mds_client mdsc; | ||
151 | struct ceph_osd_client osdc; | ||
152 | 72 | ||
153 | /* writeback */ | 73 | /* writeback */ |
154 | mempool_t *wb_pagevec_pool; | 74 | mempool_t *wb_pagevec_pool; |
@@ -160,14 +80,14 @@ struct ceph_client { | |||
160 | struct backing_dev_info backing_dev_info; | 80 | struct backing_dev_info backing_dev_info; |
161 | 81 | ||
162 | #ifdef CONFIG_DEBUG_FS | 82 | #ifdef CONFIG_DEBUG_FS |
163 | struct dentry *debugfs_monmap; | 83 | struct dentry *debugfs_dentry_lru, *debugfs_caps; |
164 | struct dentry *debugfs_mdsmap, *debugfs_osdmap; | ||
165 | struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps; | ||
166 | struct dentry *debugfs_congestion_kb; | 84 | struct dentry *debugfs_congestion_kb; |
167 | struct dentry *debugfs_bdi; | 85 | struct dentry *debugfs_bdi; |
86 | struct dentry *debugfs_mdsc, *debugfs_mdsmap; | ||
168 | #endif | 87 | #endif |
169 | }; | 88 | }; |
170 | 89 | ||
90 | |||
171 | /* | 91 | /* |
172 | * File i/o capability. This tracks shared state with the metadata | 92 | * File i/o capability. This tracks shared state with the metadata |
173 | * server that allows us to cache or writeback attributes or to read | 93 | * server that allows us to cache or writeback attributes or to read |
@@ -275,6 +195,20 @@ struct ceph_inode_xattr { | |||
275 | int should_free_val; | 195 | int should_free_val; |
276 | }; | 196 | }; |
277 | 197 | ||
198 | /* | ||
199 | * Ceph dentry state | ||
200 | */ | ||
201 | struct ceph_dentry_info { | ||
202 | struct ceph_mds_session *lease_session; | ||
203 | u32 lease_gen, lease_shared_gen; | ||
204 | u32 lease_seq; | ||
205 | unsigned long lease_renew_after, lease_renew_from; | ||
206 | struct list_head lru; | ||
207 | struct dentry *dentry; | ||
208 | u64 time; | ||
209 | u64 offset; | ||
210 | }; | ||
211 | |||
278 | struct ceph_inode_xattrs_info { | 212 | struct ceph_inode_xattrs_info { |
279 | /* | 213 | /* |
280 | * (still encoded) xattr blob. we avoid the overhead of parsing | 214 | * (still encoded) xattr blob. we avoid the overhead of parsing |
@@ -296,11 +230,6 @@ struct ceph_inode_xattrs_info { | |||
296 | /* | 230 | /* |
297 | * Ceph inode. | 231 | * Ceph inode. |
298 | */ | 232 | */ |
299 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ | ||
300 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ | ||
301 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ | ||
302 | #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ | ||
303 | |||
304 | struct ceph_inode_info { | 233 | struct ceph_inode_info { |
305 | struct ceph_vino i_vino; /* ceph ino + snap */ | 234 | struct ceph_vino i_vino; /* ceph ino + snap */ |
306 | 235 | ||
@@ -391,6 +320,63 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode) | |||
391 | return container_of(inode, struct ceph_inode_info, vfs_inode); | 320 | return container_of(inode, struct ceph_inode_info, vfs_inode); |
392 | } | 321 | } |
393 | 322 | ||
323 | static inline struct ceph_vino ceph_vino(struct inode *inode) | ||
324 | { | ||
325 | return ceph_inode(inode)->i_vino; | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * ino_t is <64 bits on many architectures, blech. | ||
330 | * | ||
331 | * don't include snap in ino hash, at least for now. | ||
332 | */ | ||
333 | static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) | ||
334 | { | ||
335 | ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ | ||
336 | #if BITS_PER_LONG == 32 | ||
337 | ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; | ||
338 | if (!ino) | ||
339 | ino = 1; | ||
340 | #endif | ||
341 | return ino; | ||
342 | } | ||
343 | |||
344 | /* for printf-style formatting */ | ||
345 | #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap | ||
346 | |||
347 | static inline u64 ceph_ino(struct inode *inode) | ||
348 | { | ||
349 | return ceph_inode(inode)->i_vino.ino; | ||
350 | } | ||
351 | static inline u64 ceph_snap(struct inode *inode) | ||
352 | { | ||
353 | return ceph_inode(inode)->i_vino.snap; | ||
354 | } | ||
355 | |||
356 | static inline int ceph_ino_compare(struct inode *inode, void *data) | ||
357 | { | ||
358 | struct ceph_vino *pvino = (struct ceph_vino *)data; | ||
359 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
360 | return ci->i_vino.ino == pvino->ino && | ||
361 | ci->i_vino.snap == pvino->snap; | ||
362 | } | ||
363 | |||
364 | static inline struct inode *ceph_find_inode(struct super_block *sb, | ||
365 | struct ceph_vino vino) | ||
366 | { | ||
367 | ino_t t = ceph_vino_to_ino(vino); | ||
368 | return ilookup5(sb, t, ceph_ino_compare, &vino); | ||
369 | } | ||
370 | |||
371 | |||
372 | /* | ||
373 | * Ceph inode. | ||
374 | */ | ||
375 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ | ||
376 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ | ||
377 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ | ||
378 | #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ | ||
379 | |||
394 | static inline void ceph_i_clear(struct inode *inode, unsigned mask) | 380 | static inline void ceph_i_clear(struct inode *inode, unsigned mask) |
395 | { | 381 | { |
396 | struct ceph_inode_info *ci = ceph_inode(inode); | 382 | struct ceph_inode_info *ci = ceph_inode(inode); |
@@ -414,8 +400,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask) | |||
414 | struct ceph_inode_info *ci = ceph_inode(inode); | 400 | struct ceph_inode_info *ci = ceph_inode(inode); |
415 | bool r; | 401 | bool r; |
416 | 402 | ||
417 | smp_mb(); | 403 | spin_lock(&inode->i_lock); |
418 | r = (ci->i_ceph_flags & mask) == mask; | 404 | r = (ci->i_ceph_flags & mask) == mask; |
405 | spin_unlock(&inode->i_lock); | ||
419 | return r; | 406 | return r; |
420 | } | 407 | } |
421 | 408 | ||
@@ -432,20 +419,6 @@ extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | |||
432 | struct ceph_inode_frag *pfrag, | 419 | struct ceph_inode_frag *pfrag, |
433 | int *found); | 420 | int *found); |
434 | 421 | ||
435 | /* | ||
436 | * Ceph dentry state | ||
437 | */ | ||
438 | struct ceph_dentry_info { | ||
439 | struct ceph_mds_session *lease_session; | ||
440 | u32 lease_gen, lease_shared_gen; | ||
441 | u32 lease_seq; | ||
442 | unsigned long lease_renew_after, lease_renew_from; | ||
443 | struct list_head lru; | ||
444 | struct dentry *dentry; | ||
445 | u64 time; | ||
446 | u64 offset; | ||
447 | }; | ||
448 | |||
449 | static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) | 422 | static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) |
450 | { | 423 | { |
451 | return (struct ceph_dentry_info *)dentry->d_fsdata; | 424 | return (struct ceph_dentry_info *)dentry->d_fsdata; |
@@ -456,22 +429,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off) | |||
456 | return ((loff_t)frag << 32) | (loff_t)off; | 429 | return ((loff_t)frag << 32) | (loff_t)off; |
457 | } | 430 | } |
458 | 431 | ||
459 | /* | ||
460 | * ino_t is <64 bits on many architectures, blech. | ||
461 | * | ||
462 | * don't include snap in ino hash, at least for now. | ||
463 | */ | ||
464 | static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) | ||
465 | { | ||
466 | ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ | ||
467 | #if BITS_PER_LONG == 32 | ||
468 | ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; | ||
469 | if (!ino) | ||
470 | ino = 1; | ||
471 | #endif | ||
472 | return ino; | ||
473 | } | ||
474 | |||
475 | static inline int ceph_set_ino_cb(struct inode *inode, void *data) | 432 | static inline int ceph_set_ino_cb(struct inode *inode, void *data) |
476 | { | 433 | { |
477 | ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; | 434 | ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; |
@@ -479,39 +436,6 @@ static inline int ceph_set_ino_cb(struct inode *inode, void *data) | |||
479 | return 0; | 436 | return 0; |
480 | } | 437 | } |
481 | 438 | ||
482 | static inline struct ceph_vino ceph_vino(struct inode *inode) | ||
483 | { | ||
484 | return ceph_inode(inode)->i_vino; | ||
485 | } | ||
486 | |||
487 | /* for printf-style formatting */ | ||
488 | #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap | ||
489 | |||
490 | static inline u64 ceph_ino(struct inode *inode) | ||
491 | { | ||
492 | return ceph_inode(inode)->i_vino.ino; | ||
493 | } | ||
494 | static inline u64 ceph_snap(struct inode *inode) | ||
495 | { | ||
496 | return ceph_inode(inode)->i_vino.snap; | ||
497 | } | ||
498 | |||
499 | static inline int ceph_ino_compare(struct inode *inode, void *data) | ||
500 | { | ||
501 | struct ceph_vino *pvino = (struct ceph_vino *)data; | ||
502 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
503 | return ci->i_vino.ino == pvino->ino && | ||
504 | ci->i_vino.snap == pvino->snap; | ||
505 | } | ||
506 | |||
507 | static inline struct inode *ceph_find_inode(struct super_block *sb, | ||
508 | struct ceph_vino vino) | ||
509 | { | ||
510 | ino_t t = ceph_vino_to_ino(vino); | ||
511 | return ilookup5(sb, t, ceph_ino_compare, &vino); | ||
512 | } | ||
513 | |||
514 | |||
515 | /* | 439 | /* |
516 | * caps helpers | 440 | * caps helpers |
517 | */ | 441 | */ |
@@ -576,18 +500,18 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||
576 | struct ceph_cap_reservation *ctx, int need); | 500 | struct ceph_cap_reservation *ctx, int need); |
577 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | 501 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
578 | struct ceph_cap_reservation *ctx); | 502 | struct ceph_cap_reservation *ctx); |
579 | extern void ceph_reservation_status(struct ceph_client *client, | 503 | extern void ceph_reservation_status(struct ceph_fs_client *client, |
580 | int *total, int *avail, int *used, | 504 | int *total, int *avail, int *used, |
581 | int *reserved, int *min); | 505 | int *reserved, int *min); |
582 | 506 | ||
583 | static inline struct ceph_client *ceph_inode_to_client(struct inode *inode) | 507 | static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode) |
584 | { | 508 | { |
585 | return (struct ceph_client *)inode->i_sb->s_fs_info; | 509 | return (struct ceph_fs_client *)inode->i_sb->s_fs_info; |
586 | } | 510 | } |
587 | 511 | ||
588 | static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb) | 512 | static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb) |
589 | { | 513 | { |
590 | return (struct ceph_client *)sb->s_fs_info; | 514 | return (struct ceph_fs_client *)sb->s_fs_info; |
591 | } | 515 | } |
592 | 516 | ||
593 | 517 | ||
@@ -617,51 +541,6 @@ struct ceph_file_info { | |||
617 | 541 | ||
618 | 542 | ||
619 | /* | 543 | /* |
620 | * snapshots | ||
621 | */ | ||
622 | |||
623 | /* | ||
624 | * A "snap context" is the set of existing snapshots when we | ||
625 | * write data. It is used by the OSD to guide its COW behavior. | ||
626 | * | ||
627 | * The ceph_snap_context is refcounted, and attached to each dirty | ||
628 | * page, indicating which context the dirty data belonged when it was | ||
629 | * dirtied. | ||
630 | */ | ||
631 | struct ceph_snap_context { | ||
632 | atomic_t nref; | ||
633 | u64 seq; | ||
634 | int num_snaps; | ||
635 | u64 snaps[]; | ||
636 | }; | ||
637 | |||
638 | static inline struct ceph_snap_context * | ||
639 | ceph_get_snap_context(struct ceph_snap_context *sc) | ||
640 | { | ||
641 | /* | ||
642 | printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
643 | atomic_read(&sc->nref)+1); | ||
644 | */ | ||
645 | if (sc) | ||
646 | atomic_inc(&sc->nref); | ||
647 | return sc; | ||
648 | } | ||
649 | |||
650 | static inline void ceph_put_snap_context(struct ceph_snap_context *sc) | ||
651 | { | ||
652 | if (!sc) | ||
653 | return; | ||
654 | /* | ||
655 | printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
656 | atomic_read(&sc->nref)-1); | ||
657 | */ | ||
658 | if (atomic_dec_and_test(&sc->nref)) { | ||
659 | /*printk(" deleting snap_context %p\n", sc);*/ | ||
660 | kfree(sc); | ||
661 | } | ||
662 | } | ||
663 | |||
664 | /* | ||
665 | * A "snap realm" describes a subset of the file hierarchy sharing | 544 | * A "snap realm" describes a subset of the file hierarchy sharing |
666 | * the same set of snapshots that apply to it. The realms themselves | 545 | * the same set of snapshots that apply to it. The realms themselves |
667 | * are organized into a hierarchy, such that children inherit (some of) | 546 | * are organized into a hierarchy, such that children inherit (some of) |
@@ -699,16 +578,33 @@ struct ceph_snap_realm { | |||
699 | spinlock_t inodes_with_caps_lock; | 578 | spinlock_t inodes_with_caps_lock; |
700 | }; | 579 | }; |
701 | 580 | ||
702 | 581 | static inline int default_congestion_kb(void) | |
703 | |||
704 | /* | ||
705 | * calculate the number of pages a given length and offset map onto, | ||
706 | * if we align the data. | ||
707 | */ | ||
708 | static inline int calc_pages_for(u64 off, u64 len) | ||
709 | { | 582 | { |
710 | return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - | 583 | int congestion_kb; |
711 | (off >> PAGE_CACHE_SHIFT); | 584 | |
585 | /* | ||
586 | * Copied from NFS | ||
587 | * | ||
588 | * congestion size, scale with available memory. | ||
589 | * | ||
590 | * 64MB: 8192k | ||
591 | * 128MB: 11585k | ||
592 | * 256MB: 16384k | ||
593 | * 512MB: 23170k | ||
594 | * 1GB: 32768k | ||
595 | * 2GB: 46340k | ||
596 | * 4GB: 65536k | ||
597 | * 8GB: 92681k | ||
598 | * 16GB: 131072k | ||
599 | * | ||
600 | * This allows larger machines to have larger/more transfers. | ||
601 | * Limit the default to 256M | ||
602 | */ | ||
603 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
604 | if (congestion_kb > 256*1024) | ||
605 | congestion_kb = 256*1024; | ||
606 | |||
607 | return congestion_kb; | ||
712 | } | 608 | } |
713 | 609 | ||
714 | 610 | ||
@@ -741,16 +637,6 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) | |||
741 | ci_item)->writing; | 637 | ci_item)->writing; |
742 | } | 638 | } |
743 | 639 | ||
744 | |||
745 | /* super.c */ | ||
746 | extern struct kmem_cache *ceph_inode_cachep; | ||
747 | extern struct kmem_cache *ceph_cap_cachep; | ||
748 | extern struct kmem_cache *ceph_dentry_cachep; | ||
749 | extern struct kmem_cache *ceph_file_cachep; | ||
750 | |||
751 | extern const char *ceph_msg_type_name(int type); | ||
752 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | ||
753 | |||
754 | /* inode.c */ | 640 | /* inode.c */ |
755 | extern const struct inode_operations ceph_file_iops; | 641 | extern const struct inode_operations ceph_file_iops; |
756 | 642 | ||
@@ -857,12 +743,18 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); | |||
857 | /* file.c */ | 743 | /* file.c */ |
858 | extern const struct file_operations ceph_file_fops; | 744 | extern const struct file_operations ceph_file_fops; |
859 | extern const struct address_space_operations ceph_aops; | 745 | extern const struct address_space_operations ceph_aops; |
746 | extern int ceph_copy_to_page_vector(struct page **pages, | ||
747 | const char *data, | ||
748 | loff_t off, size_t len); | ||
749 | extern int ceph_copy_from_page_vector(struct page **pages, | ||
750 | char *data, | ||
751 | loff_t off, size_t len); | ||
752 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | ||
860 | extern int ceph_open(struct inode *inode, struct file *file); | 753 | extern int ceph_open(struct inode *inode, struct file *file); |
861 | extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | 754 | extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, |
862 | struct nameidata *nd, int mode, | 755 | struct nameidata *nd, int mode, |
863 | int locked_dir); | 756 | int locked_dir); |
864 | extern int ceph_release(struct inode *inode, struct file *filp); | 757 | extern int ceph_release(struct inode *inode, struct file *filp); |
865 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||
866 | 758 | ||
867 | /* dir.c */ | 759 | /* dir.c */ |
868 | extern const struct file_operations ceph_dir_fops; | 760 | extern const struct file_operations ceph_dir_fops; |
@@ -892,12 +784,6 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | |||
892 | /* export.c */ | 784 | /* export.c */ |
893 | extern const struct export_operations ceph_export_ops; | 785 | extern const struct export_operations ceph_export_ops; |
894 | 786 | ||
895 | /* debugfs.c */ | ||
896 | extern int ceph_debugfs_init(void); | ||
897 | extern void ceph_debugfs_cleanup(void); | ||
898 | extern int ceph_debugfs_client_init(struct ceph_client *client); | ||
899 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | ||
900 | |||
901 | /* locks.c */ | 787 | /* locks.c */ |
902 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | 788 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); |
903 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | 789 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); |
@@ -914,4 +800,8 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) | |||
914 | return NULL; | 800 | return NULL; |
915 | } | 801 | } |
916 | 802 | ||
803 | /* debugfs.c */ | ||
804 | extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); | ||
805 | extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); | ||
806 | |||
917 | #endif /* _FS_CEPH_SUPER_H */ | 807 | #endif /* _FS_CEPH_SUPER_H */ |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 9578af610b73..6e12a6ba5f79 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -1,6 +1,9 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | |||
2 | #include "super.h" | 3 | #include "super.h" |
3 | #include "decode.h" | 4 | #include "mds_client.h" |
5 | |||
6 | #include <linux/ceph/decode.h> | ||
4 | 7 | ||
5 | #include <linux/xattr.h> | 8 | #include <linux/xattr.h> |
6 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
@@ -620,12 +623,12 @@ out: | |||
620 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | 623 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, |
621 | const char *value, size_t size, int flags) | 624 | const char *value, size_t size, int flags) |
622 | { | 625 | { |
623 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 626 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
624 | struct inode *inode = dentry->d_inode; | 627 | struct inode *inode = dentry->d_inode; |
625 | struct ceph_inode_info *ci = ceph_inode(inode); | 628 | struct ceph_inode_info *ci = ceph_inode(inode); |
626 | struct inode *parent_inode = dentry->d_parent->d_inode; | 629 | struct inode *parent_inode = dentry->d_parent->d_inode; |
627 | struct ceph_mds_request *req; | 630 | struct ceph_mds_request *req; |
628 | struct ceph_mds_client *mdsc = &client->mdsc; | 631 | struct ceph_mds_client *mdsc = fsc->mdsc; |
629 | int err; | 632 | int err; |
630 | int i, nr_pages; | 633 | int i, nr_pages; |
631 | struct page **pages = NULL; | 634 | struct page **pages = NULL; |
@@ -713,10 +716,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name, | |||
713 | 716 | ||
714 | /* preallocate memory for xattr name, value, index node */ | 717 | /* preallocate memory for xattr name, value, index node */ |
715 | err = -ENOMEM; | 718 | err = -ENOMEM; |
716 | newname = kmalloc(name_len + 1, GFP_NOFS); | 719 | newname = kmemdup(name, name_len + 1, GFP_NOFS); |
717 | if (!newname) | 720 | if (!newname) |
718 | goto out; | 721 | goto out; |
719 | memcpy(newname, name, name_len + 1); | ||
720 | 722 | ||
721 | if (val_len) { | 723 | if (val_len) { |
722 | newval = kmalloc(val_len + 1, GFP_NOFS); | 724 | newval = kmalloc(val_len + 1, GFP_NOFS); |
@@ -777,8 +779,8 @@ out: | |||
777 | 779 | ||
778 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) | 780 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) |
779 | { | 781 | { |
780 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 782 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
781 | struct ceph_mds_client *mdsc = &client->mdsc; | 783 | struct ceph_mds_client *mdsc = fsc->mdsc; |
782 | struct inode *inode = dentry->d_inode; | 784 | struct inode *inode = dentry->d_inode; |
783 | struct inode *parent_inode = dentry->d_parent->d_inode; | 785 | struct inode *parent_inode = dentry->d_parent->d_inode; |
784 | struct ceph_mds_request *req; | 786 | struct ceph_mds_request *req; |
@@ -2014,3 +2014,43 @@ fail_creds: | |||
2014 | fail: | 2014 | fail: |
2015 | return; | 2015 | return; |
2016 | } | 2016 | } |
2017 | |||
2018 | /* | ||
2019 | * Core dumping helper functions. These are the only things you should | ||
2020 | * do on a core-file: use only these functions to write out all the | ||
2021 | * necessary info. | ||
2022 | */ | ||
2023 | int dump_write(struct file *file, const void *addr, int nr) | ||
2024 | { | ||
2025 | return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; | ||
2026 | } | ||
2027 | EXPORT_SYMBOL(dump_write); | ||
2028 | |||
2029 | int dump_seek(struct file *file, loff_t off) | ||
2030 | { | ||
2031 | int ret = 1; | ||
2032 | |||
2033 | if (file->f_op->llseek && file->f_op->llseek != no_llseek) { | ||
2034 | if (file->f_op->llseek(file, off, SEEK_CUR) < 0) | ||
2035 | return 0; | ||
2036 | } else { | ||
2037 | char *buf = (char *)get_zeroed_page(GFP_KERNEL); | ||
2038 | |||
2039 | if (!buf) | ||
2040 | return 0; | ||
2041 | while (off > 0) { | ||
2042 | unsigned long n = off; | ||
2043 | |||
2044 | if (n > PAGE_SIZE) | ||
2045 | n = PAGE_SIZE; | ||
2046 | if (!dump_write(file, buf, n)) { | ||
2047 | ret = 0; | ||
2048 | break; | ||
2049 | } | ||
2050 | off -= n; | ||
2051 | } | ||
2052 | free_page((unsigned long)buf); | ||
2053 | } | ||
2054 | return ret; | ||
2055 | } | ||
2056 | EXPORT_SYMBOL(dump_seek); | ||
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index cc9665522148..c465ae066c62 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config GFS2_FS | 1 | config GFS2_FS |
2 | tristate "GFS2 file system support" | 2 | tristate "GFS2 file system support" |
3 | depends on EXPERIMENTAL && (64BIT || LBDAF) | 3 | depends on (64BIT || LBDAF) |
4 | select DLM if GFS2_FS_LOCKING_DLM | 4 | select DLM if GFS2_FS_LOCKING_DLM |
5 | select CONFIGFS_FS if GFS2_FS_LOCKING_DLM | 5 | select CONFIGFS_FS if GFS2_FS_LOCKING_DLM |
6 | select SYSFS if GFS2_FS_LOCKING_DLM | 6 | select SYSFS if GFS2_FS_LOCKING_DLM |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 194fe16d8418..6b24afb96aae 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -36,8 +36,8 @@ | |||
36 | #include "glops.h" | 36 | #include "glops.h" |
37 | 37 | ||
38 | 38 | ||
39 | static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | 39 | void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, |
40 | unsigned int from, unsigned int to) | 40 | unsigned int from, unsigned int to) |
41 | { | 41 | { |
42 | struct buffer_head *head = page_buffers(page); | 42 | struct buffer_head *head = page_buffers(page); |
43 | unsigned int bsize = head->b_size; | 43 | unsigned int bsize = head->b_size; |
@@ -615,7 +615,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
615 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | 615 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; |
616 | int alloc_required; | 616 | int alloc_required; |
617 | int error = 0; | 617 | int error = 0; |
618 | struct gfs2_alloc *al; | 618 | struct gfs2_alloc *al = NULL; |
619 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 619 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
620 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | 620 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
621 | unsigned to = from + len; | 621 | unsigned to = from + len; |
@@ -663,6 +663,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
663 | rblocks += RES_STATFS + RES_QUOTA; | 663 | rblocks += RES_STATFS + RES_QUOTA; |
664 | if (&ip->i_inode == sdp->sd_rindex) | 664 | if (&ip->i_inode == sdp->sd_rindex) |
665 | rblocks += 2 * RES_STATFS; | 665 | rblocks += 2 * RES_STATFS; |
666 | if (alloc_required) | ||
667 | rblocks += gfs2_rg_blocks(al); | ||
666 | 668 | ||
667 | error = gfs2_trans_begin(sdp, rblocks, | 669 | error = gfs2_trans_begin(sdp, rblocks, |
668 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | 670 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); |
@@ -696,13 +698,11 @@ out: | |||
696 | 698 | ||
697 | page_cache_release(page); | 699 | page_cache_release(page); |
698 | 700 | ||
699 | /* | 701 | gfs2_trans_end(sdp); |
700 | * XXX(truncate): the call below should probably be replaced with | ||
701 | * a call to the gfs2-specific truncate blocks helper to actually | ||
702 | * release disk blocks.. | ||
703 | */ | ||
704 | if (pos + len > ip->i_inode.i_size) | 702 | if (pos + len > ip->i_inode.i_size) |
705 | truncate_setsize(&ip->i_inode, ip->i_inode.i_size); | 703 | gfs2_trim_blocks(&ip->i_inode); |
704 | goto out_trans_fail; | ||
705 | |||
706 | out_endtrans: | 706 | out_endtrans: |
707 | gfs2_trans_end(sdp); | 707 | gfs2_trans_end(sdp); |
708 | out_trans_fail: | 708 | out_trans_fail: |
@@ -802,10 +802,8 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, | |||
802 | page_cache_release(page); | 802 | page_cache_release(page); |
803 | 803 | ||
804 | if (copied) { | 804 | if (copied) { |
805 | if (inode->i_size < to) { | 805 | if (inode->i_size < to) |
806 | i_size_write(inode, to); | 806 | i_size_write(inode, to); |
807 | ip->i_disksize = inode->i_size; | ||
808 | } | ||
809 | gfs2_dinode_out(ip, di); | 807 | gfs2_dinode_out(ip, di); |
810 | mark_inode_dirty(inode); | 808 | mark_inode_dirty(inode); |
811 | } | 809 | } |
@@ -876,8 +874,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
876 | 874 | ||
877 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 875 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
878 | if (ret > 0) { | 876 | if (ret > 0) { |
879 | if (inode->i_size > ip->i_disksize) | ||
880 | ip->i_disksize = inode->i_size; | ||
881 | gfs2_dinode_out(ip, dibh->b_data); | 877 | gfs2_dinode_out(ip, dibh->b_data); |
882 | mark_inode_dirty(inode); | 878 | mark_inode_dirty(inode); |
883 | } | 879 | } |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 6f482809d1a3..5476c066d4ee 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -50,7 +50,7 @@ struct strip_mine { | |||
50 | * @ip: the inode | 50 | * @ip: the inode |
51 | * @dibh: the dinode buffer | 51 | * @dibh: the dinode buffer |
52 | * @block: the block number that was allocated | 52 | * @block: the block number that was allocated |
53 | * @private: any locked page held by the caller process | 53 | * @page: The (optional) page. This is looked up if @page is NULL |
54 | * | 54 | * |
55 | * Returns: errno | 55 | * Returns: errno |
56 | */ | 56 | */ |
@@ -109,8 +109,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
109 | /** | 109 | /** |
110 | * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big | 110 | * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big |
111 | * @ip: The GFS2 inode to unstuff | 111 | * @ip: The GFS2 inode to unstuff |
112 | * @unstuffer: the routine that handles unstuffing a non-zero length file | 112 | * @page: The (optional) page. This is looked up if the @page is NULL |
113 | * @private: private data for the unstuffer | ||
114 | * | 113 | * |
115 | * This routine unstuffs a dinode and returns it to a "normal" state such | 114 | * This routine unstuffs a dinode and returns it to a "normal" state such |
116 | * that the height can be grown in the traditional way. | 115 | * that the height can be grown in the traditional way. |
@@ -132,7 +131,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
132 | if (error) | 131 | if (error) |
133 | goto out; | 132 | goto out; |
134 | 133 | ||
135 | if (ip->i_disksize) { | 134 | if (i_size_read(&ip->i_inode)) { |
136 | /* Get a free block, fill it with the stuffed data, | 135 | /* Get a free block, fill it with the stuffed data, |
137 | and write it out to disk */ | 136 | and write it out to disk */ |
138 | 137 | ||
@@ -161,7 +160,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
161 | di = (struct gfs2_dinode *)dibh->b_data; | 160 | di = (struct gfs2_dinode *)dibh->b_data; |
162 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 161 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
163 | 162 | ||
164 | if (ip->i_disksize) { | 163 | if (i_size_read(&ip->i_inode)) { |
165 | *(__be64 *)(di + 1) = cpu_to_be64(block); | 164 | *(__be64 *)(di + 1) = cpu_to_be64(block); |
166 | gfs2_add_inode_blocks(&ip->i_inode, 1); | 165 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
167 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); | 166 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
@@ -885,83 +884,14 @@ out: | |||
885 | } | 884 | } |
886 | 885 | ||
887 | /** | 886 | /** |
888 | * do_grow - Make a file look bigger than it is | ||
889 | * @ip: the inode | ||
890 | * @size: the size to set the file to | ||
891 | * | ||
892 | * Called with an exclusive lock on @ip. | ||
893 | * | ||
894 | * Returns: errno | ||
895 | */ | ||
896 | |||
897 | static int do_grow(struct gfs2_inode *ip, u64 size) | ||
898 | { | ||
899 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
900 | struct gfs2_alloc *al; | ||
901 | struct buffer_head *dibh; | ||
902 | int error; | ||
903 | |||
904 | al = gfs2_alloc_get(ip); | ||
905 | if (!al) | ||
906 | return -ENOMEM; | ||
907 | |||
908 | error = gfs2_quota_lock_check(ip); | ||
909 | if (error) | ||
910 | goto out; | ||
911 | |||
912 | al->al_requested = sdp->sd_max_height + RES_DATA; | ||
913 | |||
914 | error = gfs2_inplace_reserve(ip); | ||
915 | if (error) | ||
916 | goto out_gunlock_q; | ||
917 | |||
918 | error = gfs2_trans_begin(sdp, | ||
919 | sdp->sd_max_height + al->al_rgd->rd_length + | ||
920 | RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0); | ||
921 | if (error) | ||
922 | goto out_ipres; | ||
923 | |||
924 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
925 | if (error) | ||
926 | goto out_end_trans; | ||
927 | |||
928 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { | ||
929 | if (gfs2_is_stuffed(ip)) { | ||
930 | error = gfs2_unstuff_dinode(ip, NULL); | ||
931 | if (error) | ||
932 | goto out_brelse; | ||
933 | } | ||
934 | } | ||
935 | |||
936 | ip->i_disksize = size; | ||
937 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
938 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
939 | gfs2_dinode_out(ip, dibh->b_data); | ||
940 | |||
941 | out_brelse: | ||
942 | brelse(dibh); | ||
943 | out_end_trans: | ||
944 | gfs2_trans_end(sdp); | ||
945 | out_ipres: | ||
946 | gfs2_inplace_release(ip); | ||
947 | out_gunlock_q: | ||
948 | gfs2_quota_unlock(ip); | ||
949 | out: | ||
950 | gfs2_alloc_put(ip); | ||
951 | return error; | ||
952 | } | ||
953 | |||
954 | |||
955 | /** | ||
956 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate | 887 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate |
957 | * | 888 | * |
958 | * This is partly borrowed from ext3. | 889 | * This is partly borrowed from ext3. |
959 | */ | 890 | */ |
960 | static int gfs2_block_truncate_page(struct address_space *mapping) | 891 | static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) |
961 | { | 892 | { |
962 | struct inode *inode = mapping->host; | 893 | struct inode *inode = mapping->host; |
963 | struct gfs2_inode *ip = GFS2_I(inode); | 894 | struct gfs2_inode *ip = GFS2_I(inode); |
964 | loff_t from = inode->i_size; | ||
965 | unsigned long index = from >> PAGE_CACHE_SHIFT; | 895 | unsigned long index = from >> PAGE_CACHE_SHIFT; |
966 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 896 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
967 | unsigned blocksize, iblock, length, pos; | 897 | unsigned blocksize, iblock, length, pos; |
@@ -1023,9 +953,11 @@ unlock: | |||
1023 | return err; | 953 | return err; |
1024 | } | 954 | } |
1025 | 955 | ||
1026 | static int trunc_start(struct gfs2_inode *ip, u64 size) | 956 | static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) |
1027 | { | 957 | { |
1028 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 958 | struct gfs2_inode *ip = GFS2_I(inode); |
959 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
960 | struct address_space *mapping = inode->i_mapping; | ||
1029 | struct buffer_head *dibh; | 961 | struct buffer_head *dibh; |
1030 | int journaled = gfs2_is_jdata(ip); | 962 | int journaled = gfs2_is_jdata(ip); |
1031 | int error; | 963 | int error; |
@@ -1039,31 +971,26 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) | |||
1039 | if (error) | 971 | if (error) |
1040 | goto out; | 972 | goto out; |
1041 | 973 | ||
974 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
975 | |||
1042 | if (gfs2_is_stuffed(ip)) { | 976 | if (gfs2_is_stuffed(ip)) { |
1043 | u64 dsize = size + sizeof(struct gfs2_dinode); | 977 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); |
1044 | ip->i_disksize = size; | ||
1045 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
1046 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1047 | gfs2_dinode_out(ip, dibh->b_data); | ||
1048 | if (dsize > dibh->b_size) | ||
1049 | dsize = dibh->b_size; | ||
1050 | gfs2_buffer_clear_tail(dibh, dsize); | ||
1051 | error = 1; | ||
1052 | } else { | 978 | } else { |
1053 | if (size & (u64)(sdp->sd_sb.sb_bsize - 1)) | 979 | if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) { |
1054 | error = gfs2_block_truncate_page(ip->i_inode.i_mapping); | 980 | error = gfs2_block_truncate_page(mapping, newsize); |
1055 | 981 | if (error) | |
1056 | if (!error) { | 982 | goto out_brelse; |
1057 | ip->i_disksize = size; | ||
1058 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
1059 | ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; | ||
1060 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1061 | gfs2_dinode_out(ip, dibh->b_data); | ||
1062 | } | 983 | } |
984 | ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; | ||
1063 | } | 985 | } |
1064 | 986 | ||
1065 | brelse(dibh); | 987 | i_size_write(inode, newsize); |
988 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
989 | gfs2_dinode_out(ip, dibh->b_data); | ||
1066 | 990 | ||
991 | truncate_pagecache(inode, oldsize, newsize); | ||
992 | out_brelse: | ||
993 | brelse(dibh); | ||
1067 | out: | 994 | out: |
1068 | gfs2_trans_end(sdp); | 995 | gfs2_trans_end(sdp); |
1069 | return error; | 996 | return error; |
@@ -1123,7 +1050,7 @@ static int trunc_end(struct gfs2_inode *ip) | |||
1123 | if (error) | 1050 | if (error) |
1124 | goto out; | 1051 | goto out; |
1125 | 1052 | ||
1126 | if (!ip->i_disksize) { | 1053 | if (!i_size_read(&ip->i_inode)) { |
1127 | ip->i_height = 0; | 1054 | ip->i_height = 0; |
1128 | ip->i_goal = ip->i_no_addr; | 1055 | ip->i_goal = ip->i_no_addr; |
1129 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 1056 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
@@ -1143,92 +1070,154 @@ out: | |||
1143 | 1070 | ||
1144 | /** | 1071 | /** |
1145 | * do_shrink - make a file smaller | 1072 | * do_shrink - make a file smaller |
1146 | * @ip: the inode | 1073 | * @inode: the inode |
1147 | * @size: the size to make the file | 1074 | * @oldsize: the current inode size |
1148 | * @truncator: function to truncate the last partial block | 1075 | * @newsize: the size to make the file |
1149 | * | 1076 | * |
1150 | * Called with an exclusive lock on @ip. | 1077 | * Called with an exclusive lock on @inode. The @size must |
1078 | * be equal to or smaller than the current inode size. | ||
1151 | * | 1079 | * |
1152 | * Returns: errno | 1080 | * Returns: errno |
1153 | */ | 1081 | */ |
1154 | 1082 | ||
1155 | static int do_shrink(struct gfs2_inode *ip, u64 size) | 1083 | static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize) |
1156 | { | 1084 | { |
1085 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1157 | int error; | 1086 | int error; |
1158 | 1087 | ||
1159 | error = trunc_start(ip, size); | 1088 | error = trunc_start(inode, oldsize, newsize); |
1160 | if (error < 0) | 1089 | if (error < 0) |
1161 | return error; | 1090 | return error; |
1162 | if (error > 0) | 1091 | if (gfs2_is_stuffed(ip)) |
1163 | return 0; | 1092 | return 0; |
1164 | 1093 | ||
1165 | error = trunc_dealloc(ip, size); | 1094 | error = trunc_dealloc(ip, newsize); |
1166 | if (!error) | 1095 | if (error == 0) |
1167 | error = trunc_end(ip); | 1096 | error = trunc_end(ip); |
1168 | 1097 | ||
1169 | return error; | 1098 | return error; |
1170 | } | 1099 | } |
1171 | 1100 | ||
1172 | static int do_touch(struct gfs2_inode *ip, u64 size) | 1101 | void gfs2_trim_blocks(struct inode *inode) |
1173 | { | 1102 | { |
1174 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1103 | u64 size = inode->i_size; |
1104 | int ret; | ||
1105 | |||
1106 | ret = do_shrink(inode, size, size); | ||
1107 | WARN_ON(ret != 0); | ||
1108 | } | ||
1109 | |||
1110 | /** | ||
1111 | * do_grow - Touch and update inode size | ||
1112 | * @inode: The inode | ||
1113 | * @size: The new size | ||
1114 | * | ||
1115 | * This function updates the timestamps on the inode and | ||
1116 | * may also increase the size of the inode. This function | ||
1117 | * must not be called with @size any smaller than the current | ||
1118 | * inode size. | ||
1119 | * | ||
1120 | * Although it is not strictly required to unstuff files here, | ||
1121 | * earlier versions of GFS2 have a bug in the stuffed file reading | ||
1122 | * code which will result in a buffer overrun if the size is larger | ||
1123 | * than the max stuffed file size. In order to prevent this from | ||
1124 | * occuring, such files are unstuffed, but in other cases we can | ||
1125 | * just update the inode size directly. | ||
1126 | * | ||
1127 | * Returns: 0 on success, or -ve on error | ||
1128 | */ | ||
1129 | |||
1130 | static int do_grow(struct inode *inode, u64 size) | ||
1131 | { | ||
1132 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1133 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
1175 | struct buffer_head *dibh; | 1134 | struct buffer_head *dibh; |
1135 | struct gfs2_alloc *al = NULL; | ||
1176 | int error; | 1136 | int error; |
1177 | 1137 | ||
1178 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | 1138 | if (gfs2_is_stuffed(ip) && |
1139 | (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { | ||
1140 | al = gfs2_alloc_get(ip); | ||
1141 | if (al == NULL) | ||
1142 | return -ENOMEM; | ||
1143 | |||
1144 | error = gfs2_quota_lock_check(ip); | ||
1145 | if (error) | ||
1146 | goto do_grow_alloc_put; | ||
1147 | |||
1148 | al->al_requested = 1; | ||
1149 | error = gfs2_inplace_reserve(ip); | ||
1150 | if (error) | ||
1151 | goto do_grow_qunlock; | ||
1152 | } | ||
1153 | |||
1154 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); | ||
1179 | if (error) | 1155 | if (error) |
1180 | return error; | 1156 | goto do_grow_release; |
1181 | 1157 | ||
1182 | down_write(&ip->i_rw_mutex); | 1158 | if (al) { |
1159 | error = gfs2_unstuff_dinode(ip, NULL); | ||
1160 | if (error) | ||
1161 | goto do_end_trans; | ||
1162 | } | ||
1183 | 1163 | ||
1184 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1164 | error = gfs2_meta_inode_buffer(ip, &dibh); |
1185 | if (error) | 1165 | if (error) |
1186 | goto do_touch_out; | 1166 | goto do_end_trans; |
1187 | 1167 | ||
1168 | i_size_write(inode, size); | ||
1188 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1169 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
1189 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1170 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
1190 | gfs2_dinode_out(ip, dibh->b_data); | 1171 | gfs2_dinode_out(ip, dibh->b_data); |
1191 | brelse(dibh); | 1172 | brelse(dibh); |
1192 | 1173 | ||
1193 | do_touch_out: | 1174 | do_end_trans: |
1194 | up_write(&ip->i_rw_mutex); | ||
1195 | gfs2_trans_end(sdp); | 1175 | gfs2_trans_end(sdp); |
1176 | do_grow_release: | ||
1177 | if (al) { | ||
1178 | gfs2_inplace_release(ip); | ||
1179 | do_grow_qunlock: | ||
1180 | gfs2_quota_unlock(ip); | ||
1181 | do_grow_alloc_put: | ||
1182 | gfs2_alloc_put(ip); | ||
1183 | } | ||
1196 | return error; | 1184 | return error; |
1197 | } | 1185 | } |
1198 | 1186 | ||
1199 | /** | 1187 | /** |
1200 | * gfs2_truncatei - make a file a given size | 1188 | * gfs2_setattr_size - make a file a given size |
1201 | * @ip: the inode | 1189 | * @inode: the inode |
1202 | * @size: the size to make the file | 1190 | * @newsize: the size to make the file |
1203 | * @truncator: function to truncate the last partial block | ||
1204 | * | 1191 | * |
1205 | * The file size can grow, shrink, or stay the same size. | 1192 | * The file size can grow, shrink, or stay the same size. This |
1193 | * is called holding i_mutex and an exclusive glock on the inode | ||
1194 | * in question. | ||
1206 | * | 1195 | * |
1207 | * Returns: errno | 1196 | * Returns: errno |
1208 | */ | 1197 | */ |
1209 | 1198 | ||
1210 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size) | 1199 | int gfs2_setattr_size(struct inode *inode, u64 newsize) |
1211 | { | 1200 | { |
1212 | int error; | 1201 | int ret; |
1202 | u64 oldsize; | ||
1213 | 1203 | ||
1214 | if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode))) | 1204 | BUG_ON(!S_ISREG(inode->i_mode)); |
1215 | return -EINVAL; | ||
1216 | 1205 | ||
1217 | if (size > ip->i_disksize) | 1206 | ret = inode_newsize_ok(inode, newsize); |
1218 | error = do_grow(ip, size); | 1207 | if (ret) |
1219 | else if (size < ip->i_disksize) | 1208 | return ret; |
1220 | error = do_shrink(ip, size); | ||
1221 | else | ||
1222 | /* update time stamps */ | ||
1223 | error = do_touch(ip, size); | ||
1224 | 1209 | ||
1225 | return error; | 1210 | oldsize = inode->i_size; |
1211 | if (newsize >= oldsize) | ||
1212 | return do_grow(inode, newsize); | ||
1213 | |||
1214 | return do_shrink(inode, oldsize, newsize); | ||
1226 | } | 1215 | } |
1227 | 1216 | ||
1228 | int gfs2_truncatei_resume(struct gfs2_inode *ip) | 1217 | int gfs2_truncatei_resume(struct gfs2_inode *ip) |
1229 | { | 1218 | { |
1230 | int error; | 1219 | int error; |
1231 | error = trunc_dealloc(ip, ip->i_disksize); | 1220 | error = trunc_dealloc(ip, i_size_read(&ip->i_inode)); |
1232 | if (!error) | 1221 | if (!error) |
1233 | error = trunc_end(ip); | 1222 | error = trunc_end(ip); |
1234 | return error; | 1223 | return error; |
@@ -1269,7 +1258,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
1269 | 1258 | ||
1270 | shift = sdp->sd_sb.sb_bsize_shift; | 1259 | shift = sdp->sd_sb.sb_bsize_shift; |
1271 | BUG_ON(gfs2_is_dir(ip)); | 1260 | BUG_ON(gfs2_is_dir(ip)); |
1272 | end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift; | 1261 | end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; |
1273 | lblock = offset >> shift; | 1262 | lblock = offset >> shift; |
1274 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; | 1263 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; |
1275 | if (lblock_stop > end_of_file) | 1264 | if (lblock_stop > end_of_file) |
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index a20a5213135a..42fea03e2bd9 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h | |||
@@ -44,14 +44,16 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip, | |||
44 | } | 44 | } |
45 | } | 45 | } |
46 | 46 | ||
47 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); | 47 | extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); |
48 | int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create); | 48 | extern int gfs2_block_map(struct inode *inode, sector_t lblock, |
49 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); | 49 | struct buffer_head *bh, int create); |
50 | 50 | extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, | |
51 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size); | 51 | u64 *dblock, unsigned *extlen); |
52 | int gfs2_truncatei_resume(struct gfs2_inode *ip); | 52 | extern int gfs2_setattr_size(struct inode *inode, u64 size); |
53 | int gfs2_file_dealloc(struct gfs2_inode *ip); | 53 | extern void gfs2_trim_blocks(struct inode *inode); |
54 | int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | 54 | extern int gfs2_truncatei_resume(struct gfs2_inode *ip); |
55 | unsigned int len); | 55 | extern int gfs2_file_dealloc(struct gfs2_inode *ip); |
56 | extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | ||
57 | unsigned int len); | ||
56 | 58 | ||
57 | #endif /* __BMAP_DOT_H__ */ | 59 | #endif /* __BMAP_DOT_H__ */ |
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index bb7907bde3d8..6798755b3858 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c | |||
@@ -49,7 +49,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
49 | ip = GFS2_I(inode); | 49 | ip = GFS2_I(inode); |
50 | } | 50 | } |
51 | 51 | ||
52 | if (sdp->sd_args.ar_localcaching) | 52 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
53 | goto valid; | 53 | goto valid; |
54 | 54 | ||
55 | had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); | 55 | had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index b9dd88a78dd4..5c356d09c321 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -79,6 +79,9 @@ | |||
79 | #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) | 79 | #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) |
80 | #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) | 80 | #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) |
81 | 81 | ||
82 | struct qstr gfs2_qdot __read_mostly; | ||
83 | struct qstr gfs2_qdotdot __read_mostly; | ||
84 | |||
82 | typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, | 85 | typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, |
83 | u64 leaf_no, void *data); | 86 | u64 leaf_no, void *data); |
84 | typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, | 87 | typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, |
@@ -127,8 +130,8 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, | |||
127 | 130 | ||
128 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 131 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
129 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); | 132 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); |
130 | if (ip->i_disksize < offset + size) | 133 | if (ip->i_inode.i_size < offset + size) |
131 | ip->i_disksize = offset + size; | 134 | i_size_write(&ip->i_inode, offset + size); |
132 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 135 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
133 | gfs2_dinode_out(ip, dibh->b_data); | 136 | gfs2_dinode_out(ip, dibh->b_data); |
134 | 137 | ||
@@ -225,8 +228,8 @@ out: | |||
225 | if (error) | 228 | if (error) |
226 | return error; | 229 | return error; |
227 | 230 | ||
228 | if (ip->i_disksize < offset + copied) | 231 | if (ip->i_inode.i_size < offset + copied) |
229 | ip->i_disksize = offset + copied; | 232 | i_size_write(&ip->i_inode, offset + copied); |
230 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 233 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
231 | 234 | ||
232 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 235 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
@@ -275,12 +278,13 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, | |||
275 | unsigned int o; | 278 | unsigned int o; |
276 | int copied = 0; | 279 | int copied = 0; |
277 | int error = 0; | 280 | int error = 0; |
281 | u64 disksize = i_size_read(&ip->i_inode); | ||
278 | 282 | ||
279 | if (offset >= ip->i_disksize) | 283 | if (offset >= disksize) |
280 | return 0; | 284 | return 0; |
281 | 285 | ||
282 | if (offset + size > ip->i_disksize) | 286 | if (offset + size > disksize) |
283 | size = ip->i_disksize - offset; | 287 | size = disksize - offset; |
284 | 288 | ||
285 | if (!size) | 289 | if (!size) |
286 | return 0; | 290 | return 0; |
@@ -727,7 +731,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, | |||
727 | unsigned hsize = 1 << ip->i_depth; | 731 | unsigned hsize = 1 << ip->i_depth; |
728 | unsigned index; | 732 | unsigned index; |
729 | u64 ln; | 733 | u64 ln; |
730 | if (hsize * sizeof(u64) != ip->i_disksize) { | 734 | if (hsize * sizeof(u64) != i_size_read(inode)) { |
731 | gfs2_consist_inode(ip); | 735 | gfs2_consist_inode(ip); |
732 | return ERR_PTR(-EIO); | 736 | return ERR_PTR(-EIO); |
733 | } | 737 | } |
@@ -879,7 +883,7 @@ static int dir_make_exhash(struct inode *inode) | |||
879 | for (x = sdp->sd_hash_ptrs; x--; lp++) | 883 | for (x = sdp->sd_hash_ptrs; x--; lp++) |
880 | *lp = cpu_to_be64(bn); | 884 | *lp = cpu_to_be64(bn); |
881 | 885 | ||
882 | dip->i_disksize = sdp->sd_sb.sb_bsize / 2; | 886 | i_size_write(inode, sdp->sd_sb.sb_bsize / 2); |
883 | gfs2_add_inode_blocks(&dip->i_inode, 1); | 887 | gfs2_add_inode_blocks(&dip->i_inode, 1); |
884 | dip->i_diskflags |= GFS2_DIF_EXHASH; | 888 | dip->i_diskflags |= GFS2_DIF_EXHASH; |
885 | 889 | ||
@@ -1057,11 +1061,12 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
1057 | u64 *buf; | 1061 | u64 *buf; |
1058 | u64 *from, *to; | 1062 | u64 *from, *to; |
1059 | u64 block; | 1063 | u64 block; |
1064 | u64 disksize = i_size_read(&dip->i_inode); | ||
1060 | int x; | 1065 | int x; |
1061 | int error = 0; | 1066 | int error = 0; |
1062 | 1067 | ||
1063 | hsize = 1 << dip->i_depth; | 1068 | hsize = 1 << dip->i_depth; |
1064 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1069 | if (hsize * sizeof(u64) != disksize) { |
1065 | gfs2_consist_inode(dip); | 1070 | gfs2_consist_inode(dip); |
1066 | return -EIO; | 1071 | return -EIO; |
1067 | } | 1072 | } |
@@ -1072,7 +1077,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
1072 | if (!buf) | 1077 | if (!buf) |
1073 | return -ENOMEM; | 1078 | return -ENOMEM; |
1074 | 1079 | ||
1075 | for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) { | 1080 | for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) { |
1076 | error = gfs2_dir_read_data(dip, (char *)buf, | 1081 | error = gfs2_dir_read_data(dip, (char *)buf, |
1077 | block * sdp->sd_hash_bsize, | 1082 | block * sdp->sd_hash_bsize, |
1078 | sdp->sd_hash_bsize, 1); | 1083 | sdp->sd_hash_bsize, 1); |
@@ -1370,7 +1375,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
1370 | unsigned depth = 0; | 1375 | unsigned depth = 0; |
1371 | 1376 | ||
1372 | hsize = 1 << dip->i_depth; | 1377 | hsize = 1 << dip->i_depth; |
1373 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1378 | if (hsize * sizeof(u64) != i_size_read(inode)) { |
1374 | gfs2_consist_inode(dip); | 1379 | gfs2_consist_inode(dip); |
1375 | return -EIO; | 1380 | return -EIO; |
1376 | } | 1381 | } |
@@ -1784,7 +1789,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) | |||
1784 | int error = 0; | 1789 | int error = 0; |
1785 | 1790 | ||
1786 | hsize = 1 << dip->i_depth; | 1791 | hsize = 1 << dip->i_depth; |
1787 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1792 | if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { |
1788 | gfs2_consist_inode(dip); | 1793 | gfs2_consist_inode(dip); |
1789 | return -EIO; | 1794 | return -EIO; |
1790 | } | 1795 | } |
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 4f919440c3be..a98f644bd3df 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
@@ -17,23 +17,24 @@ struct inode; | |||
17 | struct gfs2_inode; | 17 | struct gfs2_inode; |
18 | struct gfs2_inum; | 18 | struct gfs2_inum; |
19 | 19 | ||
20 | struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename); | 20 | extern struct inode *gfs2_dir_search(struct inode *dir, |
21 | int gfs2_dir_check(struct inode *dir, const struct qstr *filename, | 21 | const struct qstr *filename); |
22 | const struct gfs2_inode *ip); | 22 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, |
23 | int gfs2_dir_add(struct inode *inode, const struct qstr *filename, | 23 | const struct gfs2_inode *ip); |
24 | const struct gfs2_inode *ip, unsigned int type); | 24 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, |
25 | int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); | 25 | const struct gfs2_inode *ip, unsigned int type); |
26 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | 26 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); |
27 | filldir_t filldir); | 27 | extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, |
28 | int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | 28 | filldir_t filldir); |
29 | const struct gfs2_inode *nip, unsigned int new_type); | 29 | extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, |
30 | const struct gfs2_inode *nip, unsigned int new_type); | ||
30 | 31 | ||
31 | int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); | 32 | extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); |
32 | 33 | ||
33 | int gfs2_diradd_alloc_required(struct inode *dir, | 34 | extern int gfs2_diradd_alloc_required(struct inode *dir, |
34 | const struct qstr *filename); | 35 | const struct qstr *filename); |
35 | int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, | 36 | extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, |
36 | struct buffer_head **bhp); | 37 | struct buffer_head **bhp); |
37 | 38 | ||
38 | static inline u32 gfs2_disk_hash(const char *data, int len) | 39 | static inline u32 gfs2_disk_hash(const char *data, int len) |
39 | { | 40 | { |
@@ -61,4 +62,7 @@ static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct | |||
61 | memcpy(dent + 1, name->name, name->len); | 62 | memcpy(dent + 1, name->name, name->len); |
62 | } | 63 | } |
63 | 64 | ||
65 | extern struct qstr gfs2_qdot; | ||
66 | extern struct qstr gfs2_qdotdot; | ||
67 | |||
64 | #endif /* __DIR_DOT_H__ */ | 68 | #endif /* __DIR_DOT_H__ */ |
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index dfe237a3f8ad..06d582732d34 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c | |||
@@ -126,16 +126,9 @@ static int gfs2_get_name(struct dentry *parent, char *name, | |||
126 | 126 | ||
127 | static struct dentry *gfs2_get_parent(struct dentry *child) | 127 | static struct dentry *gfs2_get_parent(struct dentry *child) |
128 | { | 128 | { |
129 | struct qstr dotdot; | ||
130 | struct dentry *dentry; | 129 | struct dentry *dentry; |
131 | 130 | ||
132 | /* | 131 | dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); |
133 | * XXX(hch): it would be a good idea to keep this around as a | ||
134 | * static variable. | ||
135 | */ | ||
136 | gfs2_str2qstr(&dotdot, ".."); | ||
137 | |||
138 | dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1)); | ||
139 | if (!IS_ERR(dentry)) | 132 | if (!IS_ERR(dentry)) |
140 | dentry->d_op = &gfs2_dops; | 133 | dentry->d_op = &gfs2_dops; |
141 | return dentry; | 134 | return dentry; |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 4edd662c8232..237ee6a940df 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -382,8 +382,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
382 | rblocks = RES_DINODE + ind_blocks; | 382 | rblocks = RES_DINODE + ind_blocks; |
383 | if (gfs2_is_jdata(ip)) | 383 | if (gfs2_is_jdata(ip)) |
384 | rblocks += data_blocks ? data_blocks : 1; | 384 | rblocks += data_blocks ? data_blocks : 1; |
385 | if (ind_blocks || data_blocks) | 385 | if (ind_blocks || data_blocks) { |
386 | rblocks += RES_STATFS + RES_QUOTA; | 386 | rblocks += RES_STATFS + RES_QUOTA; |
387 | rblocks += gfs2_rg_blocks(al); | ||
388 | } | ||
387 | ret = gfs2_trans_begin(sdp, rblocks, 0); | 389 | ret = gfs2_trans_begin(sdp, rblocks, 0); |
388 | if (ret) | 390 | if (ret) |
389 | goto out_trans_fail; | 391 | goto out_trans_fail; |
@@ -491,7 +493,7 @@ static int gfs2_open(struct inode *inode, struct file *file) | |||
491 | goto fail; | 493 | goto fail; |
492 | 494 | ||
493 | if (!(file->f_flags & O_LARGEFILE) && | 495 | if (!(file->f_flags & O_LARGEFILE) && |
494 | ip->i_disksize > MAX_NON_LFS) { | 496 | i_size_read(inode) > MAX_NON_LFS) { |
495 | error = -EOVERFLOW; | 497 | error = -EOVERFLOW; |
496 | goto fail_gunlock; | 498 | goto fail_gunlock; |
497 | } | 499 | } |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9adf8f924e08..87778857f099 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -441,6 +441,8 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) | |||
441 | else | 441 | else |
442 | gfs2_glock_put_nolock(gl); | 442 | gfs2_glock_put_nolock(gl); |
443 | } | 443 | } |
444 | if (held1 && held2 && list_empty(&gl->gl_holders)) | ||
445 | clear_bit(GLF_QUEUED, &gl->gl_flags); | ||
444 | 446 | ||
445 | gl->gl_state = new_state; | 447 | gl->gl_state = new_state; |
446 | gl->gl_tchange = jiffies; | 448 | gl->gl_tchange = jiffies; |
@@ -1012,6 +1014,7 @@ fail: | |||
1012 | if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) | 1014 | if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) |
1013 | insert_pt = &gh2->gh_list; | 1015 | insert_pt = &gh2->gh_list; |
1014 | } | 1016 | } |
1017 | set_bit(GLF_QUEUED, &gl->gl_flags); | ||
1015 | if (likely(insert_pt == NULL)) { | 1018 | if (likely(insert_pt == NULL)) { |
1016 | list_add_tail(&gh->gh_list, &gl->gl_holders); | 1019 | list_add_tail(&gh->gh_list, &gl->gl_holders); |
1017 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) | 1020 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) |
@@ -1310,10 +1313,12 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) | |||
1310 | 1313 | ||
1311 | gfs2_glock_hold(gl); | 1314 | gfs2_glock_hold(gl); |
1312 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | 1315 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; |
1313 | if (time_before(now, holdtime)) | 1316 | if (test_bit(GLF_QUEUED, &gl->gl_flags)) { |
1314 | delay = holdtime - now; | 1317 | if (time_before(now, holdtime)) |
1315 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | 1318 | delay = holdtime - now; |
1316 | delay = gl->gl_ops->go_min_hold_time; | 1319 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) |
1320 | delay = gl->gl_ops->go_min_hold_time; | ||
1321 | } | ||
1317 | 1322 | ||
1318 | spin_lock(&gl->gl_spin); | 1323 | spin_lock(&gl->gl_spin); |
1319 | handle_callback(gl, state, delay); | 1324 | handle_callback(gl, state, delay); |
@@ -1512,7 +1517,7 @@ static void clear_glock(struct gfs2_glock *gl) | |||
1512 | spin_unlock(&lru_lock); | 1517 | spin_unlock(&lru_lock); |
1513 | 1518 | ||
1514 | spin_lock(&gl->gl_spin); | 1519 | spin_lock(&gl->gl_spin); |
1515 | if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED) | 1520 | if (gl->gl_state != LM_ST_UNLOCKED) |
1516 | handle_callback(gl, LM_ST_UNLOCKED, 0); | 1521 | handle_callback(gl, LM_ST_UNLOCKED, 0); |
1517 | spin_unlock(&gl->gl_spin); | 1522 | spin_unlock(&gl->gl_spin); |
1518 | gfs2_glock_hold(gl); | 1523 | gfs2_glock_hold(gl); |
@@ -1660,6 +1665,8 @@ static const char *gflags2str(char *buf, const unsigned long *gflags) | |||
1660 | *p++ = 'I'; | 1665 | *p++ = 'I'; |
1661 | if (test_bit(GLF_FROZEN, gflags)) | 1666 | if (test_bit(GLF_FROZEN, gflags)) |
1662 | *p++ = 'F'; | 1667 | *p++ = 'F'; |
1668 | if (test_bit(GLF_QUEUED, gflags)) | ||
1669 | *p++ = 'q'; | ||
1663 | *p = 0; | 1670 | *p = 0; |
1664 | return buf; | 1671 | return buf; |
1665 | } | 1672 | } |
@@ -1776,10 +1783,12 @@ int __init gfs2_glock_init(void) | |||
1776 | } | 1783 | } |
1777 | #endif | 1784 | #endif |
1778 | 1785 | ||
1779 | glock_workqueue = create_workqueue("glock_workqueue"); | 1786 | glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER | |
1787 | WQ_HIGHPRI | WQ_FREEZEABLE, 0); | ||
1780 | if (IS_ERR(glock_workqueue)) | 1788 | if (IS_ERR(glock_workqueue)) |
1781 | return PTR_ERR(glock_workqueue); | 1789 | return PTR_ERR(glock_workqueue); |
1782 | gfs2_delete_workqueue = create_workqueue("delete_workqueue"); | 1790 | gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER | |
1791 | WQ_FREEZEABLE, 0); | ||
1783 | if (IS_ERR(gfs2_delete_workqueue)) { | 1792 | if (IS_ERR(gfs2_delete_workqueue)) { |
1784 | destroy_workqueue(glock_workqueue); | 1793 | destroy_workqueue(glock_workqueue); |
1785 | return PTR_ERR(gfs2_delete_workqueue); | 1794 | return PTR_ERR(gfs2_delete_workqueue); |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2bda1911b156..db1c26d6d220 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -215,7 +215,7 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); | |||
215 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); | 215 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); |
216 | 216 | ||
217 | /** | 217 | /** |
218 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock | 218 | * gfs2_glock_nq_init - initialize a holder and enqueue it on a glock |
219 | * @gl: the glock | 219 | * @gl: the glock |
220 | * @state: the state we're requesting | 220 | * @state: the state we're requesting |
221 | * @flags: the modifier flags | 221 | * @flags: the modifier flags |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 49f97d3bb690..0d149dcc04e5 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -262,13 +262,12 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | |||
262 | const struct gfs2_inode *ip = gl->gl_object; | 262 | const struct gfs2_inode *ip = gl->gl_object; |
263 | if (ip == NULL) | 263 | if (ip == NULL) |
264 | return 0; | 264 | return 0; |
265 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu/%llu\n", | 265 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", |
266 | (unsigned long long)ip->i_no_formal_ino, | 266 | (unsigned long long)ip->i_no_formal_ino, |
267 | (unsigned long long)ip->i_no_addr, | 267 | (unsigned long long)ip->i_no_addr, |
268 | IF2DT(ip->i_inode.i_mode), ip->i_flags, | 268 | IF2DT(ip->i_inode.i_mode), ip->i_flags, |
269 | (unsigned int)ip->i_diskflags, | 269 | (unsigned int)ip->i_diskflags, |
270 | (unsigned long long)ip->i_inode.i_size, | 270 | (unsigned long long)i_size_read(&ip->i_inode)); |
271 | (unsigned long long)ip->i_disksize); | ||
272 | return 0; | 271 | return 0; |
273 | } | 272 | } |
274 | 273 | ||
@@ -453,7 +452,6 @@ const struct gfs2_glock_operations *gfs2_glops_list[] = { | |||
453 | [LM_TYPE_META] = &gfs2_meta_glops, | 452 | [LM_TYPE_META] = &gfs2_meta_glops, |
454 | [LM_TYPE_INODE] = &gfs2_inode_glops, | 453 | [LM_TYPE_INODE] = &gfs2_inode_glops, |
455 | [LM_TYPE_RGRP] = &gfs2_rgrp_glops, | 454 | [LM_TYPE_RGRP] = &gfs2_rgrp_glops, |
456 | [LM_TYPE_NONDISK] = &gfs2_trans_glops, | ||
457 | [LM_TYPE_IOPEN] = &gfs2_iopen_glops, | 455 | [LM_TYPE_IOPEN] = &gfs2_iopen_glops, |
458 | [LM_TYPE_FLOCK] = &gfs2_flock_glops, | 456 | [LM_TYPE_FLOCK] = &gfs2_flock_glops, |
459 | [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, | 457 | [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index fdbf4b366fa5..764fbb49efc8 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -196,6 +196,7 @@ enum { | |||
196 | GLF_REPLY_PENDING = 9, | 196 | GLF_REPLY_PENDING = 9, |
197 | GLF_INITIAL = 10, | 197 | GLF_INITIAL = 10, |
198 | GLF_FROZEN = 11, | 198 | GLF_FROZEN = 11, |
199 | GLF_QUEUED = 12, | ||
199 | }; | 200 | }; |
200 | 201 | ||
201 | struct gfs2_glock { | 202 | struct gfs2_glock { |
@@ -267,7 +268,6 @@ struct gfs2_inode { | |||
267 | u64 i_no_formal_ino; | 268 | u64 i_no_formal_ino; |
268 | u64 i_generation; | 269 | u64 i_generation; |
269 | u64 i_eattr; | 270 | u64 i_eattr; |
270 | loff_t i_disksize; | ||
271 | unsigned long i_flags; /* GIF_... */ | 271 | unsigned long i_flags; /* GIF_... */ |
272 | struct gfs2_glock *i_gl; /* Move into i_gh? */ | 272 | struct gfs2_glock *i_gl; /* Move into i_gh? */ |
273 | struct gfs2_holder i_iopen_gh; | 273 | struct gfs2_holder i_iopen_gh; |
@@ -416,11 +416,8 @@ struct gfs2_args { | |||
416 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ | 416 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ |
417 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ | 417 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ |
418 | unsigned int ar_spectator:1; /* Don't get a journal */ | 418 | unsigned int ar_spectator:1; /* Don't get a journal */ |
419 | unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */ | ||
420 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ | 419 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ |
421 | unsigned int ar_localcaching:1; /* Local caching */ | ||
422 | unsigned int ar_debug:1; /* Oops on errors */ | 420 | unsigned int ar_debug:1; /* Oops on errors */ |
423 | unsigned int ar_upgrade:1; /* Upgrade ondisk format */ | ||
424 | unsigned int ar_posix_acl:1; /* Enable posix acls */ | 421 | unsigned int ar_posix_acl:1; /* Enable posix acls */ |
425 | unsigned int ar_quota:2; /* off/account/on */ | 422 | unsigned int ar_quota:2; /* off/account/on */ |
426 | unsigned int ar_suiddir:1; /* suiddir support */ | 423 | unsigned int ar_suiddir:1; /* suiddir support */ |
@@ -497,7 +494,7 @@ struct gfs2_sb_host { | |||
497 | */ | 494 | */ |
498 | 495 | ||
499 | struct lm_lockstruct { | 496 | struct lm_lockstruct { |
500 | unsigned int ls_jid; | 497 | int ls_jid; |
501 | unsigned int ls_first; | 498 | unsigned int ls_first; |
502 | unsigned int ls_first_done; | 499 | unsigned int ls_first_done; |
503 | unsigned int ls_nodir; | 500 | unsigned int ls_nodir; |
@@ -572,6 +569,7 @@ struct gfs2_sbd { | |||
572 | struct list_head sd_rindex_mru_list; | 569 | struct list_head sd_rindex_mru_list; |
573 | struct gfs2_rgrpd *sd_rindex_forward; | 570 | struct gfs2_rgrpd *sd_rindex_forward; |
574 | unsigned int sd_rgrps; | 571 | unsigned int sd_rgrps; |
572 | unsigned int sd_max_rg_data; | ||
575 | 573 | ||
576 | /* Journal index stuff */ | 574 | /* Journal index stuff */ |
577 | 575 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 08140f185a37..06370f8bd8cf 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -359,8 +359,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
359 | * to do that. | 359 | * to do that. |
360 | */ | 360 | */ |
361 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); | 361 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); |
362 | ip->i_disksize = be64_to_cpu(str->di_size); | 362 | i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); |
363 | i_size_write(&ip->i_inode, ip->i_disksize); | ||
364 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); | 363 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); |
365 | atime.tv_sec = be64_to_cpu(str->di_atime); | 364 | atime.tv_sec = be64_to_cpu(str->di_atime); |
366 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); | 365 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
@@ -1055,7 +1054,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) | |||
1055 | str->di_uid = cpu_to_be32(ip->i_inode.i_uid); | 1054 | str->di_uid = cpu_to_be32(ip->i_inode.i_uid); |
1056 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); | 1055 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); |
1057 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); | 1056 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); |
1058 | str->di_size = cpu_to_be64(ip->i_disksize); | 1057 | str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); |
1059 | str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); | 1058 | str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
1060 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); | 1059 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); |
1061 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); | 1060 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); |
@@ -1085,8 +1084,8 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) | |||
1085 | (unsigned long long)ip->i_no_formal_ino); | 1084 | (unsigned long long)ip->i_no_formal_ino); |
1086 | printk(KERN_INFO " no_addr = %llu\n", | 1085 | printk(KERN_INFO " no_addr = %llu\n", |
1087 | (unsigned long long)ip->i_no_addr); | 1086 | (unsigned long long)ip->i_no_addr); |
1088 | printk(KERN_INFO " i_disksize = %llu\n", | 1087 | printk(KERN_INFO " i_size = %llu\n", |
1089 | (unsigned long long)ip->i_disksize); | 1088 | (unsigned long long)i_size_read(&ip->i_inode)); |
1090 | printk(KERN_INFO " blocks = %llu\n", | 1089 | printk(KERN_INFO " blocks = %llu\n", |
1091 | (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); | 1090 | (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); |
1092 | printk(KERN_INFO " i_goal = %llu\n", | 1091 | printk(KERN_INFO " i_goal = %llu\n", |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 300ada3f21de..6720d7d5fbc6 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -19,6 +19,8 @@ extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); | |||
19 | extern int gfs2_internal_read(struct gfs2_inode *ip, | 19 | extern int gfs2_internal_read(struct gfs2_inode *ip, |
20 | struct file_ra_state *ra_state, | 20 | struct file_ra_state *ra_state, |
21 | char *buf, loff_t *pos, unsigned size); | 21 | char *buf, loff_t *pos, unsigned size); |
22 | extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | ||
23 | unsigned int from, unsigned int to); | ||
22 | extern void gfs2_set_aops(struct inode *inode); | 24 | extern void gfs2_set_aops(struct inode *inode); |
23 | 25 | ||
24 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) | 26 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) |
@@ -80,6 +82,19 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, | |||
80 | dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); | 82 | dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); |
81 | } | 83 | } |
82 | 84 | ||
85 | static inline int gfs2_check_internal_file_size(struct inode *inode, | ||
86 | u64 minsize, u64 maxsize) | ||
87 | { | ||
88 | u64 size = i_size_read(inode); | ||
89 | if (size < minsize || size > maxsize) | ||
90 | goto err; | ||
91 | if (size & ((1 << inode->i_blkbits) - 1)) | ||
92 | goto err; | ||
93 | return 0; | ||
94 | err: | ||
95 | gfs2_consist_inode(GFS2_I(inode)); | ||
96 | return -EIO; | ||
97 | } | ||
83 | 98 | ||
84 | extern void gfs2_set_iop(struct inode *inode); | 99 | extern void gfs2_set_iop(struct inode *inode); |
85 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 100 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 0e0470ed34c2..1c09425b45fd 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
@@ -42,9 +42,9 @@ static void gdlm_ast(void *arg) | |||
42 | ret |= LM_OUT_CANCELED; | 42 | ret |= LM_OUT_CANCELED; |
43 | goto out; | 43 | goto out; |
44 | case -EAGAIN: /* Try lock fails */ | 44 | case -EAGAIN: /* Try lock fails */ |
45 | case -EDEADLK: /* Deadlock detected */ | ||
45 | goto out; | 46 | goto out; |
46 | case -EINVAL: /* Invalid */ | 47 | case -ETIMEDOUT: /* Canceled due to timeout */ |
47 | case -ENOMEM: /* Out of memory */ | ||
48 | ret |= LM_OUT_ERROR; | 48 | ret |= LM_OUT_ERROR; |
49 | goto out; | 49 | goto out; |
50 | case 0: /* Success */ | 50 | case 0: /* Success */ |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index b1e9630eb46a..d7eb1e209aa8 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "glock.h" | 24 | #include "glock.h" |
25 | #include "quota.h" | 25 | #include "quota.h" |
26 | #include "recovery.h" | 26 | #include "recovery.h" |
27 | #include "dir.h" | ||
27 | 28 | ||
28 | static struct shrinker qd_shrinker = { | 29 | static struct shrinker qd_shrinker = { |
29 | .shrink = gfs2_shrink_qd_memory, | 30 | .shrink = gfs2_shrink_qd_memory, |
@@ -78,6 +79,9 @@ static int __init init_gfs2_fs(void) | |||
78 | { | 79 | { |
79 | int error; | 80 | int error; |
80 | 81 | ||
82 | gfs2_str2qstr(&gfs2_qdot, "."); | ||
83 | gfs2_str2qstr(&gfs2_qdotdot, ".."); | ||
84 | |||
81 | error = gfs2_sys_init(); | 85 | error = gfs2_sys_init(); |
82 | if (error) | 86 | if (error) |
83 | return error; | 87 | return error; |
@@ -140,7 +144,7 @@ static int __init init_gfs2_fs(void) | |||
140 | 144 | ||
141 | error = -ENOMEM; | 145 | error = -ENOMEM; |
142 | gfs_recovery_wq = alloc_workqueue("gfs_recovery", | 146 | gfs_recovery_wq = alloc_workqueue("gfs_recovery", |
143 | WQ_NON_REENTRANT | WQ_RESCUER, 0); | 147 | WQ_RESCUER | WQ_FREEZEABLE, 0); |
144 | if (!gfs_recovery_wq) | 148 | if (!gfs_recovery_wq) |
145 | goto fail_wq; | 149 | goto fail_wq; |
146 | 150 | ||
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 4d4b1e8ac64c..aeafc233dc89 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -38,14 +38,6 @@ | |||
38 | #define DO 0 | 38 | #define DO 0 |
39 | #define UNDO 1 | 39 | #define UNDO 1 |
40 | 40 | ||
41 | static const u32 gfs2_old_fs_formats[] = { | ||
42 | 0 | ||
43 | }; | ||
44 | |||
45 | static const u32 gfs2_old_multihost_formats[] = { | ||
46 | 0 | ||
47 | }; | ||
48 | |||
49 | /** | 41 | /** |
50 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | 42 | * gfs2_tune_init - Fill a gfs2_tune structure with default values |
51 | * @gt: tune | 43 | * @gt: tune |
@@ -135,8 +127,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
135 | 127 | ||
136 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | 128 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) |
137 | { | 129 | { |
138 | unsigned int x; | ||
139 | |||
140 | if (sb->sb_magic != GFS2_MAGIC || | 130 | if (sb->sb_magic != GFS2_MAGIC || |
141 | sb->sb_type != GFS2_METATYPE_SB) { | 131 | sb->sb_type != GFS2_METATYPE_SB) { |
142 | if (!silent) | 132 | if (!silent) |
@@ -150,55 +140,9 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int sile | |||
150 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | 140 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) |
151 | return 0; | 141 | return 0; |
152 | 142 | ||
153 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | 143 | fs_warn(sdp, "Unknown on-disk format, unable to mount\n"); |
154 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
155 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
156 | break; | ||
157 | 144 | ||
158 | if (!gfs2_old_fs_formats[x]) { | 145 | return -EINVAL; |
159 | printk(KERN_WARNING | ||
160 | "GFS2: code version (%u, %u) is incompatible " | ||
161 | "with ondisk format (%u, %u)\n", | ||
162 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
163 | sb->sb_fs_format, sb->sb_multihost_format); | ||
164 | printk(KERN_WARNING | ||
165 | "GFS2: I don't know how to upgrade this FS\n"); | ||
166 | return -EINVAL; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
171 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
172 | if (gfs2_old_multihost_formats[x] == | ||
173 | sb->sb_multihost_format) | ||
174 | break; | ||
175 | |||
176 | if (!gfs2_old_multihost_formats[x]) { | ||
177 | printk(KERN_WARNING | ||
178 | "GFS2: code version (%u, %u) is incompatible " | ||
179 | "with ondisk format (%u, %u)\n", | ||
180 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
181 | sb->sb_fs_format, sb->sb_multihost_format); | ||
182 | printk(KERN_WARNING | ||
183 | "GFS2: I don't know how to upgrade this FS\n"); | ||
184 | return -EINVAL; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | if (!sdp->sd_args.ar_upgrade) { | ||
189 | printk(KERN_WARNING | ||
190 | "GFS2: code version (%u, %u) is incompatible " | ||
191 | "with ondisk format (%u, %u)\n", | ||
192 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
193 | sb->sb_fs_format, sb->sb_multihost_format); | ||
194 | printk(KERN_INFO | ||
195 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
196 | "the FS\n"); | ||
197 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
198 | return -EINVAL; | ||
199 | } | ||
200 | |||
201 | return 0; | ||
202 | } | 146 | } |
203 | 147 | ||
204 | static void end_bio_io_page(struct bio *bio, int error) | 148 | static void end_bio_io_page(struct bio *bio, int error) |
@@ -586,7 +530,7 @@ static int map_journal_extents(struct gfs2_sbd *sdp) | |||
586 | 530 | ||
587 | prev_db = 0; | 531 | prev_db = 0; |
588 | 532 | ||
589 | for (lb = 0; lb < ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; lb++) { | 533 | for (lb = 0; lb < i_size_read(jd->jd_inode) >> sdp->sd_sb.sb_bsize_shift; lb++) { |
590 | bh.b_state = 0; | 534 | bh.b_state = 0; |
591 | bh.b_blocknr = 0; | 535 | bh.b_blocknr = 0; |
592 | bh.b_size = 1 << ip->i_inode.i_blkbits; | 536 | bh.b_size = 1 << ip->i_inode.i_blkbits; |
@@ -1022,7 +966,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | |||
1022 | if (!strcmp("lock_nolock", proto)) { | 966 | if (!strcmp("lock_nolock", proto)) { |
1023 | lm = &nolock_ops; | 967 | lm = &nolock_ops; |
1024 | sdp->sd_args.ar_localflocks = 1; | 968 | sdp->sd_args.ar_localflocks = 1; |
1025 | sdp->sd_args.ar_localcaching = 1; | ||
1026 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM | 969 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM |
1027 | } else if (!strcmp("lock_dlm", proto)) { | 970 | } else if (!strcmp("lock_dlm", proto)) { |
1028 | lm = &gfs2_dlm_ops; | 971 | lm = &gfs2_dlm_ops; |
@@ -1113,8 +1056,6 @@ static int gfs2_journalid_wait(void *word) | |||
1113 | 1056 | ||
1114 | static int wait_on_journal(struct gfs2_sbd *sdp) | 1057 | static int wait_on_journal(struct gfs2_sbd *sdp) |
1115 | { | 1058 | { |
1116 | if (sdp->sd_args.ar_spectator) | ||
1117 | return 0; | ||
1118 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) | 1059 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
1119 | return 0; | 1060 | return 0; |
1120 | 1061 | ||
@@ -1217,6 +1158,20 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
1217 | if (error) | 1158 | if (error) |
1218 | goto fail_sb; | 1159 | goto fail_sb; |
1219 | 1160 | ||
1161 | /* | ||
1162 | * If user space has failed to join the cluster or some similar | ||
1163 | * failure has occurred, then the journal id will contain a | ||
1164 | * negative (error) number. This will then be returned to the | ||
1165 | * caller (of the mount syscall). We do this even for spectator | ||
1166 | * mounts (which just write a jid of 0 to indicate "ok" even though | ||
1167 | * the jid is unused in the spectator case) | ||
1168 | */ | ||
1169 | if (sdp->sd_lockstruct.ls_jid < 0) { | ||
1170 | error = sdp->sd_lockstruct.ls_jid; | ||
1171 | sdp->sd_lockstruct.ls_jid = 0; | ||
1172 | goto fail_sb; | ||
1173 | } | ||
1174 | |||
1220 | error = init_inodes(sdp, DO); | 1175 | error = init_inodes(sdp, DO); |
1221 | if (error) | 1176 | if (error) |
1222 | goto fail_sb; | 1177 | goto fail_sb; |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 1009be2c9737..0534510200d5 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <linux/gfs2_ondisk.h> | 18 | #include <linux/gfs2_ondisk.h> |
19 | #include <linux/crc32.h> | 19 | #include <linux/crc32.h> |
20 | #include <linux/fiemap.h> | 20 | #include <linux/fiemap.h> |
21 | #include <linux/swap.h> | ||
22 | #include <linux/falloc.h> | ||
21 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
22 | 24 | ||
23 | #include "gfs2.h" | 25 | #include "gfs2.h" |
@@ -217,7 +219,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
217 | goto out_gunlock_q; | 219 | goto out_gunlock_q; |
218 | 220 | ||
219 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 221 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
220 | al->al_rgd->rd_length + | 222 | gfs2_rg_blocks(al) + |
221 | 2 * RES_DINODE + RES_STATFS + | 223 | 2 * RES_DINODE + RES_STATFS + |
222 | RES_QUOTA, 0); | 224 | RES_QUOTA, 0); |
223 | if (error) | 225 | if (error) |
@@ -406,7 +408,6 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | |||
406 | 408 | ||
407 | ip = ghs[1].gh_gl->gl_object; | 409 | ip = ghs[1].gh_gl->gl_object; |
408 | 410 | ||
409 | ip->i_disksize = size; | ||
410 | i_size_write(inode, size); | 411 | i_size_write(inode, size); |
411 | 412 | ||
412 | error = gfs2_meta_inode_buffer(ip, &dibh); | 413 | error = gfs2_meta_inode_buffer(ip, &dibh); |
@@ -461,7 +462,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
461 | ip = ghs[1].gh_gl->gl_object; | 462 | ip = ghs[1].gh_gl->gl_object; |
462 | 463 | ||
463 | ip->i_inode.i_nlink = 2; | 464 | ip->i_inode.i_nlink = 2; |
464 | ip->i_disksize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); | 465 | i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); |
465 | ip->i_diskflags |= GFS2_DIF_JDATA; | 466 | ip->i_diskflags |= GFS2_DIF_JDATA; |
466 | ip->i_entries = 2; | 467 | ip->i_entries = 2; |
467 | 468 | ||
@@ -470,18 +471,15 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
470 | if (!gfs2_assert_withdraw(sdp, !error)) { | 471 | if (!gfs2_assert_withdraw(sdp, !error)) { |
471 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; | 472 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; |
472 | struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); | 473 | struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); |
473 | struct qstr str; | ||
474 | 474 | ||
475 | gfs2_str2qstr(&str, "."); | ||
476 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 475 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
477 | gfs2_qstr2dirent(&str, GFS2_DIRENT_SIZE(str.len), dent); | 476 | gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent); |
478 | dent->de_inum = di->di_num; /* already GFS2 endian */ | 477 | dent->de_inum = di->di_num; /* already GFS2 endian */ |
479 | dent->de_type = cpu_to_be16(DT_DIR); | 478 | dent->de_type = cpu_to_be16(DT_DIR); |
480 | di->di_entries = cpu_to_be32(1); | 479 | di->di_entries = cpu_to_be32(1); |
481 | 480 | ||
482 | gfs2_str2qstr(&str, ".."); | ||
483 | dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); | 481 | dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); |
484 | gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); | 482 | gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); |
485 | 483 | ||
486 | gfs2_inum_out(dip, dent); | 484 | gfs2_inum_out(dip, dent); |
487 | dent->de_type = cpu_to_be16(DT_DIR); | 485 | dent->de_type = cpu_to_be16(DT_DIR); |
@@ -522,7 +520,6 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
522 | static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | 520 | static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, |
523 | struct gfs2_inode *ip) | 521 | struct gfs2_inode *ip) |
524 | { | 522 | { |
525 | struct qstr dotname; | ||
526 | int error; | 523 | int error; |
527 | 524 | ||
528 | if (ip->i_entries != 2) { | 525 | if (ip->i_entries != 2) { |
@@ -539,13 +536,11 @@ static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | |||
539 | if (error) | 536 | if (error) |
540 | return error; | 537 | return error; |
541 | 538 | ||
542 | gfs2_str2qstr(&dotname, "."); | 539 | error = gfs2_dir_del(ip, &gfs2_qdot); |
543 | error = gfs2_dir_del(ip, &dotname); | ||
544 | if (error) | 540 | if (error) |
545 | return error; | 541 | return error; |
546 | 542 | ||
547 | gfs2_str2qstr(&dotname, ".."); | 543 | error = gfs2_dir_del(ip, &gfs2_qdotdot); |
548 | error = gfs2_dir_del(ip, &dotname); | ||
549 | if (error) | 544 | if (error) |
550 | return error; | 545 | return error; |
551 | 546 | ||
@@ -694,11 +689,8 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | |||
694 | struct inode *dir = &to->i_inode; | 689 | struct inode *dir = &to->i_inode; |
695 | struct super_block *sb = dir->i_sb; | 690 | struct super_block *sb = dir->i_sb; |
696 | struct inode *tmp; | 691 | struct inode *tmp; |
697 | struct qstr dotdot; | ||
698 | int error = 0; | 692 | int error = 0; |
699 | 693 | ||
700 | gfs2_str2qstr(&dotdot, ".."); | ||
701 | |||
702 | igrab(dir); | 694 | igrab(dir); |
703 | 695 | ||
704 | for (;;) { | 696 | for (;;) { |
@@ -711,7 +703,7 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | |||
711 | break; | 703 | break; |
712 | } | 704 | } |
713 | 705 | ||
714 | tmp = gfs2_lookupi(dir, &dotdot, 1); | 706 | tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); |
715 | if (IS_ERR(tmp)) { | 707 | if (IS_ERR(tmp)) { |
716 | error = PTR_ERR(tmp); | 708 | error = PTR_ERR(tmp); |
717 | break; | 709 | break; |
@@ -744,7 +736,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
744 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 736 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
745 | struct gfs2_inode *nip = NULL; | 737 | struct gfs2_inode *nip = NULL; |
746 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 738 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
747 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; | 739 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh; |
748 | struct gfs2_rgrpd *nrgd; | 740 | struct gfs2_rgrpd *nrgd; |
749 | unsigned int num_gh; | 741 | unsigned int num_gh; |
750 | int dir_rename = 0; | 742 | int dir_rename = 0; |
@@ -758,6 +750,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
758 | return 0; | 750 | return 0; |
759 | } | 751 | } |
760 | 752 | ||
753 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
754 | if (error) | ||
755 | return error; | ||
761 | 756 | ||
762 | if (odip != ndip) { | 757 | if (odip != ndip) { |
763 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, | 758 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, |
@@ -887,12 +882,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
887 | 882 | ||
888 | al->al_requested = sdp->sd_max_dirres; | 883 | al->al_requested = sdp->sd_max_dirres; |
889 | 884 | ||
890 | error = gfs2_inplace_reserve(ndip); | 885 | error = gfs2_inplace_reserve_ri(ndip); |
891 | if (error) | 886 | if (error) |
892 | goto out_gunlock_q; | 887 | goto out_gunlock_q; |
893 | 888 | ||
894 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 889 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
895 | al->al_rgd->rd_length + | 890 | gfs2_rg_blocks(al) + |
896 | 4 * RES_DINODE + 4 * RES_LEAF + | 891 | 4 * RES_DINODE + 4 * RES_LEAF + |
897 | RES_STATFS + RES_QUOTA + 4, 0); | 892 | RES_STATFS + RES_QUOTA + 4, 0); |
898 | if (error) | 893 | if (error) |
@@ -920,9 +915,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
920 | } | 915 | } |
921 | 916 | ||
922 | if (dir_rename) { | 917 | if (dir_rename) { |
923 | struct qstr name; | ||
924 | gfs2_str2qstr(&name, ".."); | ||
925 | |||
926 | error = gfs2_change_nlink(ndip, +1); | 918 | error = gfs2_change_nlink(ndip, +1); |
927 | if (error) | 919 | if (error) |
928 | goto out_end_trans; | 920 | goto out_end_trans; |
@@ -930,7 +922,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
930 | if (error) | 922 | if (error) |
931 | goto out_end_trans; | 923 | goto out_end_trans; |
932 | 924 | ||
933 | error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR); | 925 | error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); |
934 | if (error) | 926 | if (error) |
935 | goto out_end_trans; | 927 | goto out_end_trans; |
936 | } else { | 928 | } else { |
@@ -972,6 +964,7 @@ out_gunlock_r: | |||
972 | if (r_gh.gh_gl) | 964 | if (r_gh.gh_gl) |
973 | gfs2_glock_dq_uninit(&r_gh); | 965 | gfs2_glock_dq_uninit(&r_gh); |
974 | out: | 966 | out: |
967 | gfs2_glock_dq_uninit(&ri_gh); | ||
975 | return error; | 968 | return error; |
976 | } | 969 | } |
977 | 970 | ||
@@ -990,7 +983,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
990 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | 983 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); |
991 | struct gfs2_holder i_gh; | 984 | struct gfs2_holder i_gh; |
992 | struct buffer_head *dibh; | 985 | struct buffer_head *dibh; |
993 | unsigned int x; | 986 | unsigned int x, size; |
994 | char *buf; | 987 | char *buf; |
995 | int error; | 988 | int error; |
996 | 989 | ||
@@ -1002,7 +995,8 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
1002 | return NULL; | 995 | return NULL; |
1003 | } | 996 | } |
1004 | 997 | ||
1005 | if (!ip->i_disksize) { | 998 | size = (unsigned int)i_size_read(&ip->i_inode); |
999 | if (size == 0) { | ||
1006 | gfs2_consist_inode(ip); | 1000 | gfs2_consist_inode(ip); |
1007 | buf = ERR_PTR(-EIO); | 1001 | buf = ERR_PTR(-EIO); |
1008 | goto out; | 1002 | goto out; |
@@ -1014,7 +1008,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
1014 | goto out; | 1008 | goto out; |
1015 | } | 1009 | } |
1016 | 1010 | ||
1017 | x = ip->i_disksize + 1; | 1011 | x = size + 1; |
1018 | buf = kmalloc(x, GFP_NOFS); | 1012 | buf = kmalloc(x, GFP_NOFS); |
1019 | if (!buf) | 1013 | if (!buf) |
1020 | buf = ERR_PTR(-ENOMEM); | 1014 | buf = ERR_PTR(-ENOMEM); |
@@ -1071,30 +1065,6 @@ int gfs2_permission(struct inode *inode, int mask) | |||
1071 | return error; | 1065 | return error; |
1072 | } | 1066 | } |
1073 | 1067 | ||
1074 | /* | ||
1075 | * XXX(truncate): the truncate_setsize calls should be moved to the end. | ||
1076 | */ | ||
1077 | static int setattr_size(struct inode *inode, struct iattr *attr) | ||
1078 | { | ||
1079 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1080 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
1081 | int error; | ||
1082 | |||
1083 | if (attr->ia_size != ip->i_disksize) { | ||
1084 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); | ||
1085 | if (error) | ||
1086 | return error; | ||
1087 | truncate_setsize(inode, attr->ia_size); | ||
1088 | gfs2_trans_end(sdp); | ||
1089 | } | ||
1090 | |||
1091 | error = gfs2_truncatei(ip, attr->ia_size); | ||
1092 | if (error && (inode->i_size != ip->i_disksize)) | ||
1093 | i_size_write(inode, ip->i_disksize); | ||
1094 | |||
1095 | return error; | ||
1096 | } | ||
1097 | |||
1098 | static int setattr_chown(struct inode *inode, struct iattr *attr) | 1068 | static int setattr_chown(struct inode *inode, struct iattr *attr) |
1099 | { | 1069 | { |
1100 | struct gfs2_inode *ip = GFS2_I(inode); | 1070 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -1195,7 +1165,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1195 | goto out; | 1165 | goto out; |
1196 | 1166 | ||
1197 | if (attr->ia_valid & ATTR_SIZE) | 1167 | if (attr->ia_valid & ATTR_SIZE) |
1198 | error = setattr_size(inode, attr); | 1168 | error = gfs2_setattr_size(inode, attr->ia_size); |
1199 | else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) | 1169 | else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) |
1200 | error = setattr_chown(inode, attr); | 1170 | error = setattr_chown(inode, attr); |
1201 | else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) | 1171 | else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) |
@@ -1301,6 +1271,257 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) | |||
1301 | return ret; | 1271 | return ret; |
1302 | } | 1272 | } |
1303 | 1273 | ||
1274 | static void empty_write_end(struct page *page, unsigned from, | ||
1275 | unsigned to) | ||
1276 | { | ||
1277 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
1278 | |||
1279 | page_zero_new_buffers(page, from, to); | ||
1280 | flush_dcache_page(page); | ||
1281 | mark_page_accessed(page); | ||
1282 | |||
1283 | if (!gfs2_is_writeback(ip)) | ||
1284 | gfs2_page_add_databufs(ip, page, from, to); | ||
1285 | |||
1286 | block_commit_write(page, from, to); | ||
1287 | } | ||
1288 | |||
1289 | |||
1290 | static int write_empty_blocks(struct page *page, unsigned from, unsigned to) | ||
1291 | { | ||
1292 | unsigned start, end, next; | ||
1293 | struct buffer_head *bh, *head; | ||
1294 | int error; | ||
1295 | |||
1296 | if (!page_has_buffers(page)) { | ||
1297 | error = block_prepare_write(page, from, to, gfs2_block_map); | ||
1298 | if (unlikely(error)) | ||
1299 | return error; | ||
1300 | |||
1301 | empty_write_end(page, from, to); | ||
1302 | return 0; | ||
1303 | } | ||
1304 | |||
1305 | bh = head = page_buffers(page); | ||
1306 | next = end = 0; | ||
1307 | while (next < from) { | ||
1308 | next += bh->b_size; | ||
1309 | bh = bh->b_this_page; | ||
1310 | } | ||
1311 | start = next; | ||
1312 | do { | ||
1313 | next += bh->b_size; | ||
1314 | if (buffer_mapped(bh)) { | ||
1315 | if (end) { | ||
1316 | error = block_prepare_write(page, start, end, | ||
1317 | gfs2_block_map); | ||
1318 | if (unlikely(error)) | ||
1319 | return error; | ||
1320 | empty_write_end(page, start, end); | ||
1321 | end = 0; | ||
1322 | } | ||
1323 | start = next; | ||
1324 | } | ||
1325 | else | ||
1326 | end = next; | ||
1327 | bh = bh->b_this_page; | ||
1328 | } while (next < to); | ||
1329 | |||
1330 | if (end) { | ||
1331 | error = block_prepare_write(page, start, end, gfs2_block_map); | ||
1332 | if (unlikely(error)) | ||
1333 | return error; | ||
1334 | empty_write_end(page, start, end); | ||
1335 | } | ||
1336 | |||
1337 | return 0; | ||
1338 | } | ||
1339 | |||
1340 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | ||
1341 | int mode) | ||
1342 | { | ||
1343 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1344 | struct buffer_head *dibh; | ||
1345 | int error; | ||
1346 | u64 start = offset >> PAGE_CACHE_SHIFT; | ||
1347 | unsigned int start_offset = offset & ~PAGE_CACHE_MASK; | ||
1348 | u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; | ||
1349 | pgoff_t curr; | ||
1350 | struct page *page; | ||
1351 | unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; | ||
1352 | unsigned int from, to; | ||
1353 | |||
1354 | if (!end_offset) | ||
1355 | end_offset = PAGE_CACHE_SIZE; | ||
1356 | |||
1357 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
1358 | if (unlikely(error)) | ||
1359 | goto out; | ||
1360 | |||
1361 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1362 | |||
1363 | if (gfs2_is_stuffed(ip)) { | ||
1364 | error = gfs2_unstuff_dinode(ip, NULL); | ||
1365 | if (unlikely(error)) | ||
1366 | goto out; | ||
1367 | } | ||
1368 | |||
1369 | curr = start; | ||
1370 | offset = start << PAGE_CACHE_SHIFT; | ||
1371 | from = start_offset; | ||
1372 | to = PAGE_CACHE_SIZE; | ||
1373 | while (curr <= end) { | ||
1374 | page = grab_cache_page_write_begin(inode->i_mapping, curr, | ||
1375 | AOP_FLAG_NOFS); | ||
1376 | if (unlikely(!page)) { | ||
1377 | error = -ENOMEM; | ||
1378 | goto out; | ||
1379 | } | ||
1380 | |||
1381 | if (curr == end) | ||
1382 | to = end_offset; | ||
1383 | error = write_empty_blocks(page, from, to); | ||
1384 | if (!error && offset + to > inode->i_size && | ||
1385 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
1386 | i_size_write(inode, offset + to); | ||
1387 | } | ||
1388 | unlock_page(page); | ||
1389 | page_cache_release(page); | ||
1390 | if (error) | ||
1391 | goto out; | ||
1392 | curr++; | ||
1393 | offset += PAGE_CACHE_SIZE; | ||
1394 | from = 0; | ||
1395 | } | ||
1396 | |||
1397 | gfs2_dinode_out(ip, dibh->b_data); | ||
1398 | mark_inode_dirty(inode); | ||
1399 | |||
1400 | brelse(dibh); | ||
1401 | |||
1402 | out: | ||
1403 | return error; | ||
1404 | } | ||
1405 | |||
1406 | static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, | ||
1407 | unsigned int *data_blocks, unsigned int *ind_blocks) | ||
1408 | { | ||
1409 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1410 | unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; | ||
1411 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); | ||
1412 | |||
1413 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { | ||
1414 | tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); | ||
1415 | max_data -= tmp; | ||
1416 | } | ||
1417 | /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, | ||
1418 | so it might end up with fewer data blocks */ | ||
1419 | if (max_data <= *data_blocks) | ||
1420 | return; | ||
1421 | *data_blocks = max_data; | ||
1422 | *ind_blocks = max_blocks - max_data; | ||
1423 | *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; | ||
1424 | if (*len > max) { | ||
1425 | *len = max; | ||
1426 | gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); | ||
1427 | } | ||
1428 | } | ||
1429 | |||
1430 | static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset, | ||
1431 | loff_t len) | ||
1432 | { | ||
1433 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
1434 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1435 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | ||
1436 | loff_t bytes, max_bytes; | ||
1437 | struct gfs2_alloc *al; | ||
1438 | int error; | ||
1439 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; | ||
1440 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; | ||
1441 | |||
1442 | offset = (offset >> sdp->sd_sb.sb_bsize_shift) << | ||
1443 | sdp->sd_sb.sb_bsize_shift; | ||
1444 | |||
1445 | len = next - offset; | ||
1446 | bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; | ||
1447 | if (!bytes) | ||
1448 | bytes = UINT_MAX; | ||
1449 | |||
1450 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); | ||
1451 | error = gfs2_glock_nq(&ip->i_gh); | ||
1452 | if (unlikely(error)) | ||
1453 | goto out_uninit; | ||
1454 | |||
1455 | if (!gfs2_write_alloc_required(ip, offset, len)) | ||
1456 | goto out_unlock; | ||
1457 | |||
1458 | while (len > 0) { | ||
1459 | if (len < bytes) | ||
1460 | bytes = len; | ||
1461 | al = gfs2_alloc_get(ip); | ||
1462 | if (!al) { | ||
1463 | error = -ENOMEM; | ||
1464 | goto out_unlock; | ||
1465 | } | ||
1466 | |||
1467 | error = gfs2_quota_lock_check(ip); | ||
1468 | if (error) | ||
1469 | goto out_alloc_put; | ||
1470 | |||
1471 | retry: | ||
1472 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); | ||
1473 | |||
1474 | al->al_requested = data_blocks + ind_blocks; | ||
1475 | error = gfs2_inplace_reserve(ip); | ||
1476 | if (error) { | ||
1477 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { | ||
1478 | bytes >>= 1; | ||
1479 | goto retry; | ||
1480 | } | ||
1481 | goto out_qunlock; | ||
1482 | } | ||
1483 | max_bytes = bytes; | ||
1484 | calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); | ||
1485 | al->al_requested = data_blocks + ind_blocks; | ||
1486 | |||
1487 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + | ||
1488 | RES_RG_HDR + gfs2_rg_blocks(al); | ||
1489 | if (gfs2_is_jdata(ip)) | ||
1490 | rblocks += data_blocks ? data_blocks : 1; | ||
1491 | |||
1492 | error = gfs2_trans_begin(sdp, rblocks, | ||
1493 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | ||
1494 | if (error) | ||
1495 | goto out_trans_fail; | ||
1496 | |||
1497 | error = fallocate_chunk(inode, offset, max_bytes, mode); | ||
1498 | gfs2_trans_end(sdp); | ||
1499 | |||
1500 | if (error) | ||
1501 | goto out_trans_fail; | ||
1502 | |||
1503 | len -= max_bytes; | ||
1504 | offset += max_bytes; | ||
1505 | gfs2_inplace_release(ip); | ||
1506 | gfs2_quota_unlock(ip); | ||
1507 | gfs2_alloc_put(ip); | ||
1508 | } | ||
1509 | goto out_unlock; | ||
1510 | |||
1511 | out_trans_fail: | ||
1512 | gfs2_inplace_release(ip); | ||
1513 | out_qunlock: | ||
1514 | gfs2_quota_unlock(ip); | ||
1515 | out_alloc_put: | ||
1516 | gfs2_alloc_put(ip); | ||
1517 | out_unlock: | ||
1518 | gfs2_glock_dq(&ip->i_gh); | ||
1519 | out_uninit: | ||
1520 | gfs2_holder_uninit(&ip->i_gh); | ||
1521 | return error; | ||
1522 | } | ||
1523 | |||
1524 | |||
1304 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1525 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
1305 | u64 start, u64 len) | 1526 | u64 start, u64 len) |
1306 | { | 1527 | { |
@@ -1351,6 +1572,7 @@ const struct inode_operations gfs2_file_iops = { | |||
1351 | .getxattr = gfs2_getxattr, | 1572 | .getxattr = gfs2_getxattr, |
1352 | .listxattr = gfs2_listxattr, | 1573 | .listxattr = gfs2_listxattr, |
1353 | .removexattr = gfs2_removexattr, | 1574 | .removexattr = gfs2_removexattr, |
1575 | .fallocate = gfs2_fallocate, | ||
1354 | .fiemap = gfs2_fiemap, | 1576 | .fiemap = gfs2_fiemap, |
1355 | }; | 1577 | }; |
1356 | 1578 | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 1bc6b5695e6d..58a9b9998b42 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -735,10 +735,8 @@ get_a_page: | |||
735 | goto out; | 735 | goto out; |
736 | 736 | ||
737 | size = loc + sizeof(struct gfs2_quota); | 737 | size = loc + sizeof(struct gfs2_quota); |
738 | if (size > inode->i_size) { | 738 | if (size > inode->i_size) |
739 | ip->i_disksize = size; | ||
740 | i_size_write(inode, size); | 739 | i_size_write(inode, size); |
741 | } | ||
742 | inode->i_mtime = inode->i_atime = CURRENT_TIME; | 740 | inode->i_mtime = inode->i_atime = CURRENT_TIME; |
743 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 741 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
744 | gfs2_dinode_out(ip, dibh->b_data); | 742 | gfs2_dinode_out(ip, dibh->b_data); |
@@ -817,7 +815,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
817 | goto out_alloc; | 815 | goto out_alloc; |
818 | 816 | ||
819 | if (nalloc) | 817 | if (nalloc) |
820 | blocks += al->al_rgd->rd_length + nalloc * ind_blocks + RES_STATFS; | 818 | blocks += gfs2_rg_blocks(al) + nalloc * ind_blocks + RES_STATFS; |
821 | 819 | ||
822 | error = gfs2_trans_begin(sdp, blocks, 0); | 820 | error = gfs2_trans_begin(sdp, blocks, 0); |
823 | if (error) | 821 | if (error) |
@@ -1190,18 +1188,17 @@ static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void * | |||
1190 | int gfs2_quota_init(struct gfs2_sbd *sdp) | 1188 | int gfs2_quota_init(struct gfs2_sbd *sdp) |
1191 | { | 1189 | { |
1192 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); | 1190 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); |
1193 | unsigned int blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; | 1191 | u64 size = i_size_read(sdp->sd_qc_inode); |
1192 | unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; | ||
1194 | unsigned int x, slot = 0; | 1193 | unsigned int x, slot = 0; |
1195 | unsigned int found = 0; | 1194 | unsigned int found = 0; |
1196 | u64 dblock; | 1195 | u64 dblock; |
1197 | u32 extlen = 0; | 1196 | u32 extlen = 0; |
1198 | int error; | 1197 | int error; |
1199 | 1198 | ||
1200 | if (!ip->i_disksize || ip->i_disksize > (64 << 20) || | 1199 | if (gfs2_check_internal_file_size(sdp->sd_qc_inode, 1, 64 << 20)) |
1201 | ip->i_disksize & (sdp->sd_sb.sb_bsize - 1)) { | ||
1202 | gfs2_consist_inode(ip); | ||
1203 | return -EIO; | 1200 | return -EIO; |
1204 | } | 1201 | |
1205 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; | 1202 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; |
1206 | sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); | 1203 | sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); |
1207 | 1204 | ||
@@ -1589,6 +1586,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
1589 | error = gfs2_inplace_reserve(ip); | 1586 | error = gfs2_inplace_reserve(ip); |
1590 | if (error) | 1587 | if (error) |
1591 | goto out_alloc; | 1588 | goto out_alloc; |
1589 | blocks += gfs2_rg_blocks(al); | ||
1592 | } | 1590 | } |
1593 | 1591 | ||
1594 | error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); | 1592 | error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index f7f89a94a5a4..f2a02edcac8f 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -455,11 +455,13 @@ void gfs2_recover_func(struct work_struct *work) | |||
455 | int ro = 0; | 455 | int ro = 0; |
456 | unsigned int pass; | 456 | unsigned int pass; |
457 | int error; | 457 | int error; |
458 | int jlocked = 0; | ||
458 | 459 | ||
459 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { | 460 | if (sdp->sd_args.ar_spectator || |
461 | (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) { | ||
460 | fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", | 462 | fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", |
461 | jd->jd_jid); | 463 | jd->jd_jid); |
462 | 464 | jlocked = 1; | |
463 | /* Acquire the journal lock so we can do recovery */ | 465 | /* Acquire the journal lock so we can do recovery */ |
464 | 466 | ||
465 | error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, | 467 | error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, |
@@ -554,13 +556,12 @@ void gfs2_recover_func(struct work_struct *work) | |||
554 | jd->jd_jid, t); | 556 | jd->jd_jid, t); |
555 | } | 557 | } |
556 | 558 | ||
557 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) | ||
558 | gfs2_glock_dq_uninit(&ji_gh); | ||
559 | |||
560 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); | 559 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); |
561 | 560 | ||
562 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) | 561 | if (jlocked) { |
562 | gfs2_glock_dq_uninit(&ji_gh); | ||
563 | gfs2_glock_dq_uninit(&j_gh); | 563 | gfs2_glock_dq_uninit(&j_gh); |
564 | } | ||
564 | 565 | ||
565 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); | 566 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); |
566 | goto done; | 567 | goto done; |
@@ -568,7 +569,7 @@ void gfs2_recover_func(struct work_struct *work) | |||
568 | fail_gunlock_tr: | 569 | fail_gunlock_tr: |
569 | gfs2_glock_dq_uninit(&t_gh); | 570 | gfs2_glock_dq_uninit(&t_gh); |
570 | fail_gunlock_ji: | 571 | fail_gunlock_ji: |
571 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { | 572 | if (jlocked) { |
572 | gfs2_glock_dq_uninit(&ji_gh); | 573 | gfs2_glock_dq_uninit(&ji_gh); |
573 | fail_gunlock_j: | 574 | fail_gunlock_j: |
574 | gfs2_glock_dq_uninit(&j_gh); | 575 | gfs2_glock_dq_uninit(&j_gh); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 171a744f8e45..fb67f593f408 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) | |||
500 | for (rgrps = 0;; rgrps++) { | 500 | for (rgrps = 0;; rgrps++) { |
501 | loff_t pos = rgrps * sizeof(struct gfs2_rindex); | 501 | loff_t pos = rgrps * sizeof(struct gfs2_rindex); |
502 | 502 | ||
503 | if (pos + sizeof(struct gfs2_rindex) >= ip->i_disksize) | 503 | if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) |
504 | break; | 504 | break; |
505 | error = gfs2_internal_read(ip, &ra_state, buf, &pos, | 505 | error = gfs2_internal_read(ip, &ra_state, buf, &pos, |
506 | sizeof(struct gfs2_rindex)); | 506 | sizeof(struct gfs2_rindex)); |
@@ -588,7 +588,9 @@ static int gfs2_ri_update(struct gfs2_inode *ip) | |||
588 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 588 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
589 | struct inode *inode = &ip->i_inode; | 589 | struct inode *inode = &ip->i_inode; |
590 | struct file_ra_state ra_state; | 590 | struct file_ra_state ra_state; |
591 | u64 rgrp_count = ip->i_disksize; | 591 | u64 rgrp_count = i_size_read(inode); |
592 | struct gfs2_rgrpd *rgd; | ||
593 | unsigned int max_data = 0; | ||
592 | int error; | 594 | int error; |
593 | 595 | ||
594 | do_div(rgrp_count, sizeof(struct gfs2_rindex)); | 596 | do_div(rgrp_count, sizeof(struct gfs2_rindex)); |
@@ -603,6 +605,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip) | |||
603 | } | 605 | } |
604 | } | 606 | } |
605 | 607 | ||
608 | list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) | ||
609 | if (rgd->rd_data > max_data) | ||
610 | max_data = rgd->rd_data; | ||
611 | sdp->sd_max_rg_data = max_data; | ||
606 | sdp->sd_rindex_uptodate = 1; | 612 | sdp->sd_rindex_uptodate = 1; |
607 | return 0; | 613 | return 0; |
608 | } | 614 | } |
@@ -622,13 +628,15 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) | |||
622 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 628 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
623 | struct inode *inode = &ip->i_inode; | 629 | struct inode *inode = &ip->i_inode; |
624 | struct file_ra_state ra_state; | 630 | struct file_ra_state ra_state; |
631 | struct gfs2_rgrpd *rgd; | ||
632 | unsigned int max_data = 0; | ||
625 | int error; | 633 | int error; |
626 | 634 | ||
627 | file_ra_state_init(&ra_state, inode->i_mapping); | 635 | file_ra_state_init(&ra_state, inode->i_mapping); |
628 | for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { | 636 | for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { |
629 | /* Ignore partials */ | 637 | /* Ignore partials */ |
630 | if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > | 638 | if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > |
631 | ip->i_disksize) | 639 | i_size_read(inode)) |
632 | break; | 640 | break; |
633 | error = read_rindex_entry(ip, &ra_state); | 641 | error = read_rindex_entry(ip, &ra_state); |
634 | if (error) { | 642 | if (error) { |
@@ -636,6 +644,10 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) | |||
636 | return error; | 644 | return error; |
637 | } | 645 | } |
638 | } | 646 | } |
647 | list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) | ||
648 | if (rgd->rd_data > max_data) | ||
649 | max_data = rgd->rd_data; | ||
650 | sdp->sd_max_rg_data = max_data; | ||
639 | 651 | ||
640 | sdp->sd_rindex_uptodate = 1; | 652 | sdp->sd_rindex_uptodate = 1; |
641 | return 0; | 653 | return 0; |
@@ -1188,7 +1200,8 @@ out: | |||
1188 | * Returns: errno | 1200 | * Returns: errno |
1189 | */ | 1201 | */ |
1190 | 1202 | ||
1191 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | 1203 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, |
1204 | char *file, unsigned int line) | ||
1192 | { | 1205 | { |
1193 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1206 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1194 | struct gfs2_alloc *al = ip->i_alloc; | 1207 | struct gfs2_alloc *al = ip->i_alloc; |
@@ -1199,12 +1212,15 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | |||
1199 | return -EINVAL; | 1212 | return -EINVAL; |
1200 | 1213 | ||
1201 | try_again: | 1214 | try_again: |
1202 | /* We need to hold the rindex unless the inode we're using is | 1215 | if (hold_rindex) { |
1203 | the rindex itself, in which case it's already held. */ | 1216 | /* We need to hold the rindex unless the inode we're using is |
1204 | if (ip != GFS2_I(sdp->sd_rindex)) | 1217 | the rindex itself, in which case it's already held. */ |
1205 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); | 1218 | if (ip != GFS2_I(sdp->sd_rindex)) |
1206 | else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */ | 1219 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); |
1207 | error = gfs2_ri_update_special(ip); | 1220 | else if (!sdp->sd_rgrps) /* We may not have the rindex read |
1221 | in, so: */ | ||
1222 | error = gfs2_ri_update_special(ip); | ||
1223 | } | ||
1208 | 1224 | ||
1209 | if (error) | 1225 | if (error) |
1210 | return error; | 1226 | return error; |
@@ -1215,7 +1231,7 @@ try_again: | |||
1215 | try to free it, and try the allocation again. */ | 1231 | try to free it, and try the allocation again. */ |
1216 | error = get_local_rgrp(ip, &unlinked, &last_unlinked); | 1232 | error = get_local_rgrp(ip, &unlinked, &last_unlinked); |
1217 | if (error) { | 1233 | if (error) { |
1218 | if (ip != GFS2_I(sdp->sd_rindex)) | 1234 | if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) |
1219 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1235 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
1220 | if (error != -EAGAIN) | 1236 | if (error != -EAGAIN) |
1221 | return error; | 1237 | return error; |
@@ -1257,7 +1273,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) | |||
1257 | al->al_rgd = NULL; | 1273 | al->al_rgd = NULL; |
1258 | if (al->al_rgd_gh.gh_gl) | 1274 | if (al->al_rgd_gh.gh_gl) |
1259 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1275 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
1260 | if (ip != GFS2_I(sdp->sd_rindex)) | 1276 | if (ip != GFS2_I(sdp->sd_rindex) && al->al_ri_gh.gh_gl) |
1261 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1277 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
1262 | } | 1278 | } |
1263 | 1279 | ||
@@ -1496,11 +1512,19 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) | |||
1496 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1512 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1497 | struct buffer_head *dibh; | 1513 | struct buffer_head *dibh; |
1498 | struct gfs2_alloc *al = ip->i_alloc; | 1514 | struct gfs2_alloc *al = ip->i_alloc; |
1499 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1515 | struct gfs2_rgrpd *rgd; |
1500 | u32 goal, blk; | 1516 | u32 goal, blk; |
1501 | u64 block; | 1517 | u64 block; |
1502 | int error; | 1518 | int error; |
1503 | 1519 | ||
1520 | /* Only happens if there is a bug in gfs2, return something distinctive | ||
1521 | * to ensure that it is noticed. | ||
1522 | */ | ||
1523 | if (al == NULL) | ||
1524 | return -ECANCELED; | ||
1525 | |||
1526 | rgd = al->al_rgd; | ||
1527 | |||
1504 | if (rgrp_contains_block(rgd, ip->i_goal)) | 1528 | if (rgrp_contains_block(rgd, ip->i_goal)) |
1505 | goal = ip->i_goal - rgd->rd_data0; | 1529 | goal = ip->i_goal - rgd->rd_data0; |
1506 | else | 1530 | else |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index f07119d89557..0e35c0466f9a 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -39,10 +39,12 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip) | |||
39 | ip->i_alloc = NULL; | 39 | ip->i_alloc = NULL; |
40 | } | 40 | } |
41 | 41 | ||
42 | extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, | 42 | extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, |
43 | unsigned int line); | 43 | char *file, unsigned int line); |
44 | #define gfs2_inplace_reserve(ip) \ | 44 | #define gfs2_inplace_reserve(ip) \ |
45 | gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) | 45 | gfs2_inplace_reserve_i((ip), 1, __FILE__, __LINE__) |
46 | #define gfs2_inplace_reserve_ri(ip) \ | ||
47 | gfs2_inplace_reserve_i((ip), 0, __FILE__, __LINE__) | ||
46 | 48 | ||
47 | extern void gfs2_inplace_release(struct gfs2_inode *ip); | 49 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
48 | 50 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 77cb9f830ee4..047d1176096c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -85,6 +85,7 @@ static const match_table_t tokens = { | |||
85 | {Opt_locktable, "locktable=%s"}, | 85 | {Opt_locktable, "locktable=%s"}, |
86 | {Opt_hostdata, "hostdata=%s"}, | 86 | {Opt_hostdata, "hostdata=%s"}, |
87 | {Opt_spectator, "spectator"}, | 87 | {Opt_spectator, "spectator"}, |
88 | {Opt_spectator, "norecovery"}, | ||
88 | {Opt_ignore_local_fs, "ignore_local_fs"}, | 89 | {Opt_ignore_local_fs, "ignore_local_fs"}, |
89 | {Opt_localflocks, "localflocks"}, | 90 | {Opt_localflocks, "localflocks"}, |
90 | {Opt_localcaching, "localcaching"}, | 91 | {Opt_localcaching, "localcaching"}, |
@@ -159,13 +160,13 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
159 | args->ar_spectator = 1; | 160 | args->ar_spectator = 1; |
160 | break; | 161 | break; |
161 | case Opt_ignore_local_fs: | 162 | case Opt_ignore_local_fs: |
162 | args->ar_ignore_local_fs = 1; | 163 | /* Retained for backwards compat only */ |
163 | break; | 164 | break; |
164 | case Opt_localflocks: | 165 | case Opt_localflocks: |
165 | args->ar_localflocks = 1; | 166 | args->ar_localflocks = 1; |
166 | break; | 167 | break; |
167 | case Opt_localcaching: | 168 | case Opt_localcaching: |
168 | args->ar_localcaching = 1; | 169 | /* Retained for backwards compat only */ |
169 | break; | 170 | break; |
170 | case Opt_debug: | 171 | case Opt_debug: |
171 | if (args->ar_errors == GFS2_ERRORS_PANIC) { | 172 | if (args->ar_errors == GFS2_ERRORS_PANIC) { |
@@ -179,7 +180,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
179 | args->ar_debug = 0; | 180 | args->ar_debug = 0; |
180 | break; | 181 | break; |
181 | case Opt_upgrade: | 182 | case Opt_upgrade: |
182 | args->ar_upgrade = 1; | 183 | /* Retained for backwards compat only */ |
183 | break; | 184 | break; |
184 | case Opt_acl: | 185 | case Opt_acl: |
185 | args->ar_posix_acl = 1; | 186 | args->ar_posix_acl = 1; |
@@ -342,15 +343,14 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) | |||
342 | { | 343 | { |
343 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | 344 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); |
344 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | 345 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); |
346 | u64 size = i_size_read(jd->jd_inode); | ||
345 | 347 | ||
346 | if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) || | 348 | if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, 1 << 30)) |
347 | (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) { | ||
348 | gfs2_consist_inode(ip); | ||
349 | return -EIO; | 349 | return -EIO; |
350 | } | ||
351 | jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; | ||
352 | 350 | ||
353 | if (gfs2_write_alloc_required(ip, 0, ip->i_disksize)) { | 351 | jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift; |
352 | |||
353 | if (gfs2_write_alloc_required(ip, 0, size)) { | ||
354 | gfs2_consist_inode(ip); | 354 | gfs2_consist_inode(ip); |
355 | return -EIO; | 355 | return -EIO; |
356 | } | 356 | } |
@@ -1129,9 +1129,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1129 | 1129 | ||
1130 | /* Some flags must not be changed */ | 1130 | /* Some flags must not be changed */ |
1131 | if (args_neq(&args, &sdp->sd_args, spectator) || | 1131 | if (args_neq(&args, &sdp->sd_args, spectator) || |
1132 | args_neq(&args, &sdp->sd_args, ignore_local_fs) || | ||
1133 | args_neq(&args, &sdp->sd_args, localflocks) || | 1132 | args_neq(&args, &sdp->sd_args, localflocks) || |
1134 | args_neq(&args, &sdp->sd_args, localcaching) || | ||
1135 | args_neq(&args, &sdp->sd_args, meta)) | 1133 | args_neq(&args, &sdp->sd_args, meta)) |
1136 | return -EINVAL; | 1134 | return -EINVAL; |
1137 | 1135 | ||
@@ -1234,16 +1232,10 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1234 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); | 1232 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); |
1235 | if (args->ar_spectator) | 1233 | if (args->ar_spectator) |
1236 | seq_printf(s, ",spectator"); | 1234 | seq_printf(s, ",spectator"); |
1237 | if (args->ar_ignore_local_fs) | ||
1238 | seq_printf(s, ",ignore_local_fs"); | ||
1239 | if (args->ar_localflocks) | 1235 | if (args->ar_localflocks) |
1240 | seq_printf(s, ",localflocks"); | 1236 | seq_printf(s, ",localflocks"); |
1241 | if (args->ar_localcaching) | ||
1242 | seq_printf(s, ",localcaching"); | ||
1243 | if (args->ar_debug) | 1237 | if (args->ar_debug) |
1244 | seq_printf(s, ",debug"); | 1238 | seq_printf(s, ",debug"); |
1245 | if (args->ar_upgrade) | ||
1246 | seq_printf(s, ",upgrade"); | ||
1247 | if (args->ar_posix_acl) | 1239 | if (args->ar_posix_acl) |
1248 | seq_printf(s, ",acl"); | 1240 | seq_printf(s, ",acl"); |
1249 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { | 1241 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index ccacffd2faaa..748ccb557c18 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -230,7 +230,10 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len | |||
230 | 230 | ||
231 | if (gltype > LM_TYPE_JOURNAL) | 231 | if (gltype > LM_TYPE_JOURNAL) |
232 | return -EINVAL; | 232 | return -EINVAL; |
233 | glops = gfs2_glops_list[gltype]; | 233 | if (gltype == LM_TYPE_NONDISK && glnum == GFS2_TRANS_LOCK) |
234 | glops = &gfs2_trans_glops; | ||
235 | else | ||
236 | glops = gfs2_glops_list[gltype]; | ||
234 | if (glops == NULL) | 237 | if (glops == NULL) |
235 | return -EINVAL; | 238 | return -EINVAL; |
236 | if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags)) | 239 | if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags)) |
@@ -399,31 +402,32 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) | |||
399 | 402 | ||
400 | static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) | 403 | static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) |
401 | { | 404 | { |
402 | return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid); | 405 | return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid); |
403 | } | 406 | } |
404 | 407 | ||
405 | static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | 408 | static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) |
406 | { | 409 | { |
407 | unsigned jid; | 410 | int jid; |
408 | int rv; | 411 | int rv; |
409 | 412 | ||
410 | rv = sscanf(buf, "%u", &jid); | 413 | rv = sscanf(buf, "%d", &jid); |
411 | if (rv != 1) | 414 | if (rv != 1) |
412 | return -EINVAL; | 415 | return -EINVAL; |
413 | 416 | ||
414 | spin_lock(&sdp->sd_jindex_spin); | 417 | spin_lock(&sdp->sd_jindex_spin); |
415 | rv = -EINVAL; | 418 | rv = -EINVAL; |
416 | if (sdp->sd_args.ar_spectator) | ||
417 | goto out; | ||
418 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) | 419 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
419 | goto out; | 420 | goto out; |
420 | rv = -EBUSY; | 421 | rv = -EBUSY; |
421 | if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) | 422 | if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) |
422 | goto out; | 423 | goto out; |
424 | rv = 0; | ||
425 | if (sdp->sd_args.ar_spectator && jid > 0) | ||
426 | rv = jid = -EINVAL; | ||
423 | sdp->sd_lockstruct.ls_jid = jid; | 427 | sdp->sd_lockstruct.ls_jid = jid; |
428 | clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); | ||
424 | smp_mb__after_clear_bit(); | 429 | smp_mb__after_clear_bit(); |
425 | wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); | 430 | wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); |
426 | rv = 0; | ||
427 | out: | 431 | out: |
428 | spin_unlock(&sdp->sd_jindex_spin); | 432 | spin_unlock(&sdp->sd_jindex_spin); |
429 | return rv ? rv : len; | 433 | return rv ? rv : len; |
@@ -617,7 +621,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, | |||
617 | add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); | 621 | add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); |
618 | add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); | 622 | add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); |
619 | if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) | 623 | if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) |
620 | add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid); | 624 | add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid); |
621 | if (gfs2_uuid_valid(uuid)) | 625 | if (gfs2_uuid_valid(uuid)) |
622 | add_uevent_var(env, "UUID=%pUB", uuid); | 626 | add_uevent_var(env, "UUID=%pUB", uuid); |
623 | return 0; | 627 | return 0; |
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 148d55c14171..cedb0bb96d96 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h | |||
@@ -39,7 +39,8 @@ | |||
39 | {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ | 39 | {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ |
40 | {(1UL << GLF_REPLY_PENDING), "r" }, \ | 40 | {(1UL << GLF_REPLY_PENDING), "r" }, \ |
41 | {(1UL << GLF_INITIAL), "I" }, \ | 41 | {(1UL << GLF_INITIAL), "I" }, \ |
42 | {(1UL << GLF_FROZEN), "F" }) | 42 | {(1UL << GLF_FROZEN), "F" }, \ |
43 | {(1UL << GLF_QUEUED), "q" }) | ||
43 | 44 | ||
44 | #ifndef NUMPTY | 45 | #ifndef NUMPTY |
45 | #define NUMPTY | 46 | #define NUMPTY |
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index edf9d4bd908e..fb56b783e028 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h | |||
@@ -20,11 +20,20 @@ struct gfs2_glock; | |||
20 | #define RES_JDATA 1 | 20 | #define RES_JDATA 1 |
21 | #define RES_DATA 1 | 21 | #define RES_DATA 1 |
22 | #define RES_LEAF 1 | 22 | #define RES_LEAF 1 |
23 | #define RES_RG_HDR 1 | ||
23 | #define RES_RG_BIT 2 | 24 | #define RES_RG_BIT 2 |
24 | #define RES_EATTR 1 | 25 | #define RES_EATTR 1 |
25 | #define RES_STATFS 1 | 26 | #define RES_STATFS 1 |
26 | #define RES_QUOTA 2 | 27 | #define RES_QUOTA 2 |
27 | 28 | ||
29 | /* reserve either the number of blocks to be allocated plus the rg header | ||
30 | * block, or all of the blocks in the rg, whichever is smaller */ | ||
31 | static inline unsigned int gfs2_rg_blocks(const struct gfs2_alloc *al) | ||
32 | { | ||
33 | return (al->al_requested < al->al_rgd->rd_length)? | ||
34 | al->al_requested + 1 : al->al_rgd->rd_length; | ||
35 | } | ||
36 | |||
28 | int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | 37 | int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, |
29 | unsigned int revokes); | 38 | unsigned int revokes); |
30 | 39 | ||
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 776af6eb4bcb..30b58f07c8a6 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
@@ -734,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
734 | goto out_gunlock_q; | 734 | goto out_gunlock_q; |
735 | 735 | ||
736 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), | 736 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), |
737 | blks + al->al_rgd->rd_length + | 737 | blks + gfs2_rg_blocks(al) + |
738 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); | 738 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); |
739 | if (error) | 739 | if (error) |
740 | goto out_ipres; | 740 | goto out_ipres; |
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index 4129cdb3f0d8..571abe97b42a 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c | |||
@@ -23,7 +23,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) | |||
23 | fd->search_key = ptr; | 23 | fd->search_key = ptr; |
24 | fd->key = ptr + tree->max_key_len + 2; | 24 | fd->key = ptr + tree->max_key_len + 2; |
25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); | 25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); |
26 | down(&tree->tree_lock); | 26 | mutex_lock(&tree->tree_lock); |
27 | return 0; | 27 | return 0; |
28 | } | 28 | } |
29 | 29 | ||
@@ -32,7 +32,7 @@ void hfs_find_exit(struct hfs_find_data *fd) | |||
32 | hfs_bnode_put(fd->bnode); | 32 | hfs_bnode_put(fd->bnode); |
33 | kfree(fd->search_key); | 33 | kfree(fd->search_key); |
34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); | 34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); |
35 | up(&fd->tree->tree_lock); | 35 | mutex_unlock(&fd->tree->tree_lock); |
36 | fd->tree = NULL; | 36 | fd->tree = NULL; |
37 | } | 37 | } |
38 | 38 | ||
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 38a0a9917d7f..3ebc437736fe 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c | |||
@@ -27,7 +27,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke | |||
27 | if (!tree) | 27 | if (!tree) |
28 | return NULL; | 28 | return NULL; |
29 | 29 | ||
30 | init_MUTEX(&tree->tree_lock); | 30 | mutex_init(&tree->tree_lock); |
31 | spin_lock_init(&tree->hash_lock); | 31 | spin_lock_init(&tree->hash_lock); |
32 | /* Set the correct compare function */ | 32 | /* Set the correct compare function */ |
33 | tree->sb = sb; | 33 | tree->sb = sb; |
diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h index cc51905ac21d..2a1d712f85dc 100644 --- a/fs/hfs/btree.h +++ b/fs/hfs/btree.h | |||
@@ -33,7 +33,7 @@ struct hfs_btree { | |||
33 | unsigned int depth; | 33 | unsigned int depth; |
34 | 34 | ||
35 | //unsigned int map1_size, map_size; | 35 | //unsigned int map1_size, map_size; |
36 | struct semaphore tree_lock; | 36 | struct mutex tree_lock; |
37 | 37 | ||
38 | unsigned int pages_per_bnode; | 38 | unsigned int pages_per_bnode; |
39 | spinlock_t hash_lock; | 39 | spinlock_t hash_lock; |
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index 5007a41f1be9..d182438c7ae4 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c | |||
@@ -23,7 +23,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) | |||
23 | fd->search_key = ptr; | 23 | fd->search_key = ptr; |
24 | fd->key = ptr + tree->max_key_len + 2; | 24 | fd->key = ptr + tree->max_key_len + 2; |
25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); | 25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); |
26 | down(&tree->tree_lock); | 26 | mutex_lock(&tree->tree_lock); |
27 | return 0; | 27 | return 0; |
28 | } | 28 | } |
29 | 29 | ||
@@ -32,7 +32,7 @@ void hfs_find_exit(struct hfs_find_data *fd) | |||
32 | hfs_bnode_put(fd->bnode); | 32 | hfs_bnode_put(fd->bnode); |
33 | kfree(fd->search_key); | 33 | kfree(fd->search_key); |
34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); | 34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); |
35 | up(&fd->tree->tree_lock); | 35 | mutex_unlock(&fd->tree->tree_lock); |
36 | fd->tree = NULL; | 36 | fd->tree = NULL; |
37 | } | 37 | } |
38 | 38 | ||
@@ -52,6 +52,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) | |||
52 | rec = (e + b) / 2; | 52 | rec = (e + b) / 2; |
53 | len = hfs_brec_lenoff(bnode, rec, &off); | 53 | len = hfs_brec_lenoff(bnode, rec, &off); |
54 | keylen = hfs_brec_keylen(bnode, rec); | 54 | keylen = hfs_brec_keylen(bnode, rec); |
55 | if (keylen == 0) { | ||
56 | res = -EINVAL; | ||
57 | goto fail; | ||
58 | } | ||
55 | hfs_bnode_read(bnode, fd->key, off, keylen); | 59 | hfs_bnode_read(bnode, fd->key, off, keylen); |
56 | cmpval = bnode->tree->keycmp(fd->key, fd->search_key); | 60 | cmpval = bnode->tree->keycmp(fd->key, fd->search_key); |
57 | if (!cmpval) { | 61 | if (!cmpval) { |
@@ -67,6 +71,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) | |||
67 | if (rec != e && e >= 0) { | 71 | if (rec != e && e >= 0) { |
68 | len = hfs_brec_lenoff(bnode, e, &off); | 72 | len = hfs_brec_lenoff(bnode, e, &off); |
69 | keylen = hfs_brec_keylen(bnode, e); | 73 | keylen = hfs_brec_keylen(bnode, e); |
74 | if (keylen == 0) { | ||
75 | res = -EINVAL; | ||
76 | goto fail; | ||
77 | } | ||
70 | hfs_bnode_read(bnode, fd->key, off, keylen); | 78 | hfs_bnode_read(bnode, fd->key, off, keylen); |
71 | } | 79 | } |
72 | done: | 80 | done: |
@@ -75,6 +83,7 @@ done: | |||
75 | fd->keylength = keylen; | 83 | fd->keylength = keylen; |
76 | fd->entryoffset = off + keylen; | 84 | fd->entryoffset = off + keylen; |
77 | fd->entrylength = len - keylen; | 85 | fd->entrylength = len - keylen; |
86 | fail: | ||
78 | return res; | 87 | return res; |
79 | } | 88 | } |
80 | 89 | ||
@@ -198,6 +207,10 @@ int hfs_brec_goto(struct hfs_find_data *fd, int cnt) | |||
198 | 207 | ||
199 | len = hfs_brec_lenoff(bnode, fd->record, &off); | 208 | len = hfs_brec_lenoff(bnode, fd->record, &off); |
200 | keylen = hfs_brec_keylen(bnode, fd->record); | 209 | keylen = hfs_brec_keylen(bnode, fd->record); |
210 | if (keylen == 0) { | ||
211 | res = -EINVAL; | ||
212 | goto out; | ||
213 | } | ||
201 | fd->keyoffset = off; | 214 | fd->keyoffset = off; |
202 | fd->keylength = keylen; | 215 | fd->keylength = keylen; |
203 | fd->entryoffset = off + keylen; | 216 | fd->entryoffset = off + keylen; |
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index ea30afc2a03c..ad57f5991eb1 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max) | 18 | int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max) |
19 | { | 19 | { |
20 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
20 | struct page *page; | 21 | struct page *page; |
21 | struct address_space *mapping; | 22 | struct address_space *mapping; |
22 | __be32 *pptr, *curr, *end; | 23 | __be32 *pptr, *curr, *end; |
@@ -29,8 +30,8 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma | |||
29 | return size; | 30 | return size; |
30 | 31 | ||
31 | dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); | 32 | dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); |
32 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 33 | mutex_lock(&sbi->alloc_mutex); |
33 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; | 34 | mapping = sbi->alloc_file->i_mapping; |
34 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); | 35 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); |
35 | if (IS_ERR(page)) { | 36 | if (IS_ERR(page)) { |
36 | start = size; | 37 | start = size; |
@@ -150,16 +151,17 @@ done: | |||
150 | set_page_dirty(page); | 151 | set_page_dirty(page); |
151 | kunmap(page); | 152 | kunmap(page); |
152 | *max = offset + (curr - pptr) * 32 + i - start; | 153 | *max = offset + (curr - pptr) * 32 + i - start; |
153 | HFSPLUS_SB(sb).free_blocks -= *max; | 154 | sbi->free_blocks -= *max; |
154 | sb->s_dirt = 1; | 155 | sb->s_dirt = 1; |
155 | dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); | 156 | dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); |
156 | out: | 157 | out: |
157 | mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 158 | mutex_unlock(&sbi->alloc_mutex); |
158 | return start; | 159 | return start; |
159 | } | 160 | } |
160 | 161 | ||
161 | int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) | 162 | int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) |
162 | { | 163 | { |
164 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
163 | struct page *page; | 165 | struct page *page; |
164 | struct address_space *mapping; | 166 | struct address_space *mapping; |
165 | __be32 *pptr, *curr, *end; | 167 | __be32 *pptr, *curr, *end; |
@@ -172,11 +174,11 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) | |||
172 | 174 | ||
173 | dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); | 175 | dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); |
174 | /* are all of the bits in range? */ | 176 | /* are all of the bits in range? */ |
175 | if ((offset + count) > HFSPLUS_SB(sb).total_blocks) | 177 | if ((offset + count) > sbi->total_blocks) |
176 | return -2; | 178 | return -2; |
177 | 179 | ||
178 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 180 | mutex_lock(&sbi->alloc_mutex); |
179 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; | 181 | mapping = sbi->alloc_file->i_mapping; |
180 | pnr = offset / PAGE_CACHE_BITS; | 182 | pnr = offset / PAGE_CACHE_BITS; |
181 | page = read_mapping_page(mapping, pnr, NULL); | 183 | page = read_mapping_page(mapping, pnr, NULL); |
182 | pptr = kmap(page); | 184 | pptr = kmap(page); |
@@ -224,9 +226,9 @@ done: | |||
224 | out: | 226 | out: |
225 | set_page_dirty(page); | 227 | set_page_dirty(page); |
226 | kunmap(page); | 228 | kunmap(page); |
227 | HFSPLUS_SB(sb).free_blocks += len; | 229 | sbi->free_blocks += len; |
228 | sb->s_dirt = 1; | 230 | sb->s_dirt = 1; |
229 | mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 231 | mutex_unlock(&sbi->alloc_mutex); |
230 | 232 | ||
231 | return 0; | 233 | return 0; |
232 | } | 234 | } |
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index c88e5d72a402..2f39d05443e1 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c | |||
@@ -42,10 +42,13 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) | |||
42 | recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); | 42 | recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); |
43 | if (!recoff) | 43 | if (!recoff) |
44 | return 0; | 44 | return 0; |
45 | if (node->tree->attributes & HFS_TREE_BIGKEYS) | 45 | |
46 | retval = hfs_bnode_read_u16(node, recoff) + 2; | 46 | retval = hfs_bnode_read_u16(node, recoff) + 2; |
47 | else | 47 | if (retval > node->tree->max_key_len + 2) { |
48 | retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1; | 48 | printk(KERN_ERR "hfs: keylen %d too large\n", |
49 | retval); | ||
50 | retval = 0; | ||
51 | } | ||
49 | } | 52 | } |
50 | return retval; | 53 | return retval; |
51 | } | 54 | } |
@@ -216,7 +219,7 @@ skip: | |||
216 | static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | 219 | static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) |
217 | { | 220 | { |
218 | struct hfs_btree *tree; | 221 | struct hfs_btree *tree; |
219 | struct hfs_bnode *node, *new_node; | 222 | struct hfs_bnode *node, *new_node, *next_node; |
220 | struct hfs_bnode_desc node_desc; | 223 | struct hfs_bnode_desc node_desc; |
221 | int num_recs, new_rec_off, new_off, old_rec_off; | 224 | int num_recs, new_rec_off, new_off, old_rec_off; |
222 | int data_start, data_end, size; | 225 | int data_start, data_end, size; |
@@ -235,6 +238,17 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
235 | new_node->type = node->type; | 238 | new_node->type = node->type; |
236 | new_node->height = node->height; | 239 | new_node->height = node->height; |
237 | 240 | ||
241 | if (node->next) | ||
242 | next_node = hfs_bnode_find(tree, node->next); | ||
243 | else | ||
244 | next_node = NULL; | ||
245 | |||
246 | if (IS_ERR(next_node)) { | ||
247 | hfs_bnode_put(node); | ||
248 | hfs_bnode_put(new_node); | ||
249 | return next_node; | ||
250 | } | ||
251 | |||
238 | size = tree->node_size / 2 - node->num_recs * 2 - 14; | 252 | size = tree->node_size / 2 - node->num_recs * 2 - 14; |
239 | old_rec_off = tree->node_size - 4; | 253 | old_rec_off = tree->node_size - 4; |
240 | num_recs = 1; | 254 | num_recs = 1; |
@@ -248,6 +262,8 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
248 | /* panic? */ | 262 | /* panic? */ |
249 | hfs_bnode_put(node); | 263 | hfs_bnode_put(node); |
250 | hfs_bnode_put(new_node); | 264 | hfs_bnode_put(new_node); |
265 | if (next_node) | ||
266 | hfs_bnode_put(next_node); | ||
251 | return ERR_PTR(-ENOSPC); | 267 | return ERR_PTR(-ENOSPC); |
252 | } | 268 | } |
253 | 269 | ||
@@ -302,8 +318,7 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
302 | hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc)); | 318 | hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc)); |
303 | 319 | ||
304 | /* update next bnode header */ | 320 | /* update next bnode header */ |
305 | if (new_node->next) { | 321 | if (next_node) { |
306 | struct hfs_bnode *next_node = hfs_bnode_find(tree, new_node->next); | ||
307 | next_node->prev = new_node->this; | 322 | next_node->prev = new_node->this; |
308 | hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc)); | 323 | hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc)); |
309 | node_desc.prev = cpu_to_be32(next_node->prev); | 324 | node_desc.prev = cpu_to_be32(next_node->prev); |
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index e49fcee1e293..22e4d4e32999 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c | |||
@@ -30,7 +30,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
30 | if (!tree) | 30 | if (!tree) |
31 | return NULL; | 31 | return NULL; |
32 | 32 | ||
33 | init_MUTEX(&tree->tree_lock); | 33 | mutex_init(&tree->tree_lock); |
34 | spin_lock_init(&tree->hash_lock); | 34 | spin_lock_init(&tree->hash_lock); |
35 | tree->sb = sb; | 35 | tree->sb = sb; |
36 | tree->cnid = id; | 36 | tree->cnid = id; |
@@ -39,10 +39,16 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
39 | goto free_tree; | 39 | goto free_tree; |
40 | tree->inode = inode; | 40 | tree->inode = inode; |
41 | 41 | ||
42 | if (!HFSPLUS_I(tree->inode)->first_blocks) { | ||
43 | printk(KERN_ERR | ||
44 | "hfs: invalid btree extent records (0 size).\n"); | ||
45 | goto free_inode; | ||
46 | } | ||
47 | |||
42 | mapping = tree->inode->i_mapping; | 48 | mapping = tree->inode->i_mapping; |
43 | page = read_mapping_page(mapping, 0, NULL); | 49 | page = read_mapping_page(mapping, 0, NULL); |
44 | if (IS_ERR(page)) | 50 | if (IS_ERR(page)) |
45 | goto free_tree; | 51 | goto free_inode; |
46 | 52 | ||
47 | /* Load the header */ | 53 | /* Load the header */ |
48 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); | 54 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); |
@@ -57,27 +63,56 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
57 | tree->max_key_len = be16_to_cpu(head->max_key_len); | 63 | tree->max_key_len = be16_to_cpu(head->max_key_len); |
58 | tree->depth = be16_to_cpu(head->depth); | 64 | tree->depth = be16_to_cpu(head->depth); |
59 | 65 | ||
60 | /* Set the correct compare function */ | 66 | /* Verify the tree and set the correct compare function */ |
61 | if (id == HFSPLUS_EXT_CNID) { | 67 | switch (id) { |
68 | case HFSPLUS_EXT_CNID: | ||
69 | if (tree->max_key_len != HFSPLUS_EXT_KEYLEN - sizeof(u16)) { | ||
70 | printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", | ||
71 | tree->max_key_len); | ||
72 | goto fail_page; | ||
73 | } | ||
74 | if (tree->attributes & HFS_TREE_VARIDXKEYS) { | ||
75 | printk(KERN_ERR "hfs: invalid extent btree flag\n"); | ||
76 | goto fail_page; | ||
77 | } | ||
78 | |||
62 | tree->keycmp = hfsplus_ext_cmp_key; | 79 | tree->keycmp = hfsplus_ext_cmp_key; |
63 | } else if (id == HFSPLUS_CAT_CNID) { | 80 | break; |
64 | if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) && | 81 | case HFSPLUS_CAT_CNID: |
82 | if (tree->max_key_len != HFSPLUS_CAT_KEYLEN - sizeof(u16)) { | ||
83 | printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", | ||
84 | tree->max_key_len); | ||
85 | goto fail_page; | ||
86 | } | ||
87 | if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) { | ||
88 | printk(KERN_ERR "hfs: invalid catalog btree flag\n"); | ||
89 | goto fail_page; | ||
90 | } | ||
91 | |||
92 | if (test_bit(HFSPLUS_SB_HFSX, &HFSPLUS_SB(sb)->flags) && | ||
65 | (head->key_type == HFSPLUS_KEY_BINARY)) | 93 | (head->key_type == HFSPLUS_KEY_BINARY)) |
66 | tree->keycmp = hfsplus_cat_bin_cmp_key; | 94 | tree->keycmp = hfsplus_cat_bin_cmp_key; |
67 | else { | 95 | else { |
68 | tree->keycmp = hfsplus_cat_case_cmp_key; | 96 | tree->keycmp = hfsplus_cat_case_cmp_key; |
69 | HFSPLUS_SB(sb).flags |= HFSPLUS_SB_CASEFOLD; | 97 | set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
70 | } | 98 | } |
71 | } else { | 99 | break; |
100 | default: | ||
72 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); | 101 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); |
73 | goto fail_page; | 102 | goto fail_page; |
74 | } | 103 | } |
75 | 104 | ||
105 | if (!(tree->attributes & HFS_TREE_BIGKEYS)) { | ||
106 | printk(KERN_ERR "hfs: invalid btree flag\n"); | ||
107 | goto fail_page; | ||
108 | } | ||
109 | |||
76 | size = tree->node_size; | 110 | size = tree->node_size; |
77 | if (!is_power_of_2(size)) | 111 | if (!is_power_of_2(size)) |
78 | goto fail_page; | 112 | goto fail_page; |
79 | if (!tree->node_count) | 113 | if (!tree->node_count) |
80 | goto fail_page; | 114 | goto fail_page; |
115 | |||
81 | tree->node_size_shift = ffs(size) - 1; | 116 | tree->node_size_shift = ffs(size) - 1; |
82 | 117 | ||
83 | tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 118 | tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
@@ -87,10 +122,11 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
87 | return tree; | 122 | return tree; |
88 | 123 | ||
89 | fail_page: | 124 | fail_page: |
90 | tree->inode->i_mapping->a_ops = &hfsplus_aops; | ||
91 | page_cache_release(page); | 125 | page_cache_release(page); |
92 | free_tree: | 126 | free_inode: |
127 | tree->inode->i_mapping->a_ops = &hfsplus_aops; | ||
93 | iput(tree->inode); | 128 | iput(tree->inode); |
129 | free_tree: | ||
94 | kfree(tree); | 130 | kfree(tree); |
95 | return NULL; | 131 | return NULL; |
96 | } | 132 | } |
@@ -192,17 +228,18 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) | |||
192 | 228 | ||
193 | while (!tree->free_nodes) { | 229 | while (!tree->free_nodes) { |
194 | struct inode *inode = tree->inode; | 230 | struct inode *inode = tree->inode; |
231 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
195 | u32 count; | 232 | u32 count; |
196 | int res; | 233 | int res; |
197 | 234 | ||
198 | res = hfsplus_file_extend(inode); | 235 | res = hfsplus_file_extend(inode); |
199 | if (res) | 236 | if (res) |
200 | return ERR_PTR(res); | 237 | return ERR_PTR(res); |
201 | HFSPLUS_I(inode).phys_size = inode->i_size = | 238 | hip->phys_size = inode->i_size = |
202 | (loff_t)HFSPLUS_I(inode).alloc_blocks << | 239 | (loff_t)hip->alloc_blocks << |
203 | HFSPLUS_SB(tree->sb).alloc_blksz_shift; | 240 | HFSPLUS_SB(tree->sb)->alloc_blksz_shift; |
204 | HFSPLUS_I(inode).fs_blocks = HFSPLUS_I(inode).alloc_blocks << | 241 | hip->fs_blocks = |
205 | HFSPLUS_SB(tree->sb).fs_shift; | 242 | hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift; |
206 | inode_set_bytes(inode, inode->i_size); | 243 | inode_set_bytes(inode, inode->i_size); |
207 | count = inode->i_size >> tree->node_size_shift; | 244 | count = inode->i_size >> tree->node_size_shift; |
208 | tree->free_nodes = count - tree->node_count; | 245 | tree->free_nodes = count - tree->node_count; |
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index f6874acb2cf2..8af45fc5b051 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c | |||
@@ -67,7 +67,7 @@ static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent, | |||
67 | key->key_len = cpu_to_be16(6 + ustrlen); | 67 | key->key_len = cpu_to_be16(6 + ustrlen); |
68 | } | 68 | } |
69 | 69 | ||
70 | static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | 70 | void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms) |
71 | { | 71 | { |
72 | if (inode->i_flags & S_IMMUTABLE) | 72 | if (inode->i_flags & S_IMMUTABLE) |
73 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; | 73 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; |
@@ -77,15 +77,24 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | |||
77 | perms->rootflags |= HFSPLUS_FLG_APPEND; | 77 | perms->rootflags |= HFSPLUS_FLG_APPEND; |
78 | else | 78 | else |
79 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; | 79 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; |
80 | HFSPLUS_I(inode).rootflags = perms->rootflags; | 80 | |
81 | HFSPLUS_I(inode).userflags = perms->userflags; | 81 | perms->userflags = HFSPLUS_I(inode)->userflags; |
82 | perms->mode = cpu_to_be16(inode->i_mode); | 82 | perms->mode = cpu_to_be16(inode->i_mode); |
83 | perms->owner = cpu_to_be32(inode->i_uid); | 83 | perms->owner = cpu_to_be32(inode->i_uid); |
84 | perms->group = cpu_to_be32(inode->i_gid); | 84 | perms->group = cpu_to_be32(inode->i_gid); |
85 | |||
86 | if (S_ISREG(inode->i_mode)) | ||
87 | perms->dev = cpu_to_be32(inode->i_nlink); | ||
88 | else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) | ||
89 | perms->dev = cpu_to_be32(inode->i_rdev); | ||
90 | else | ||
91 | perms->dev = 0; | ||
85 | } | 92 | } |
86 | 93 | ||
87 | static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode) | 94 | static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode) |
88 | { | 95 | { |
96 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); | ||
97 | |||
89 | if (S_ISDIR(inode->i_mode)) { | 98 | if (S_ISDIR(inode->i_mode)) { |
90 | struct hfsplus_cat_folder *folder; | 99 | struct hfsplus_cat_folder *folder; |
91 | 100 | ||
@@ -93,13 +102,13 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
93 | memset(folder, 0, sizeof(*folder)); | 102 | memset(folder, 0, sizeof(*folder)); |
94 | folder->type = cpu_to_be16(HFSPLUS_FOLDER); | 103 | folder->type = cpu_to_be16(HFSPLUS_FOLDER); |
95 | folder->id = cpu_to_be32(inode->i_ino); | 104 | folder->id = cpu_to_be32(inode->i_ino); |
96 | HFSPLUS_I(inode).create_date = | 105 | HFSPLUS_I(inode)->create_date = |
97 | folder->create_date = | 106 | folder->create_date = |
98 | folder->content_mod_date = | 107 | folder->content_mod_date = |
99 | folder->attribute_mod_date = | 108 | folder->attribute_mod_date = |
100 | folder->access_date = hfsp_now2mt(); | 109 | folder->access_date = hfsp_now2mt(); |
101 | hfsplus_set_perms(inode, &folder->permissions); | 110 | hfsplus_cat_set_perms(inode, &folder->permissions); |
102 | if (inode == HFSPLUS_SB(inode->i_sb).hidden_dir) | 111 | if (inode == sbi->hidden_dir) |
103 | /* invisible and namelocked */ | 112 | /* invisible and namelocked */ |
104 | folder->user_info.frFlags = cpu_to_be16(0x5000); | 113 | folder->user_info.frFlags = cpu_to_be16(0x5000); |
105 | return sizeof(*folder); | 114 | return sizeof(*folder); |
@@ -111,19 +120,19 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
111 | file->type = cpu_to_be16(HFSPLUS_FILE); | 120 | file->type = cpu_to_be16(HFSPLUS_FILE); |
112 | file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS); | 121 | file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS); |
113 | file->id = cpu_to_be32(cnid); | 122 | file->id = cpu_to_be32(cnid); |
114 | HFSPLUS_I(inode).create_date = | 123 | HFSPLUS_I(inode)->create_date = |
115 | file->create_date = | 124 | file->create_date = |
116 | file->content_mod_date = | 125 | file->content_mod_date = |
117 | file->attribute_mod_date = | 126 | file->attribute_mod_date = |
118 | file->access_date = hfsp_now2mt(); | 127 | file->access_date = hfsp_now2mt(); |
119 | if (cnid == inode->i_ino) { | 128 | if (cnid == inode->i_ino) { |
120 | hfsplus_set_perms(inode, &file->permissions); | 129 | hfsplus_cat_set_perms(inode, &file->permissions); |
121 | if (S_ISLNK(inode->i_mode)) { | 130 | if (S_ISLNK(inode->i_mode)) { |
122 | file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); | 131 | file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); |
123 | file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); | 132 | file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); |
124 | } else { | 133 | } else { |
125 | file->user_info.fdType = cpu_to_be32(HFSPLUS_SB(inode->i_sb).type); | 134 | file->user_info.fdType = cpu_to_be32(sbi->type); |
126 | file->user_info.fdCreator = cpu_to_be32(HFSPLUS_SB(inode->i_sb).creator); | 135 | file->user_info.fdCreator = cpu_to_be32(sbi->creator); |
127 | } | 136 | } |
128 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) | 137 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) |
129 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); | 138 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); |
@@ -131,8 +140,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
131 | file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); | 140 | file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); |
132 | file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); | 141 | file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); |
133 | file->user_info.fdFlags = cpu_to_be16(0x100); | 142 | file->user_info.fdFlags = cpu_to_be16(0x100); |
134 | file->create_date = HFSPLUS_I(HFSPLUS_SB(inode->i_sb).hidden_dir).create_date; | 143 | file->create_date = HFSPLUS_I(sbi->hidden_dir)->create_date; |
135 | file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode).dev); | 144 | file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode)->linkid); |
136 | } | 145 | } |
137 | return sizeof(*file); | 146 | return sizeof(*file); |
138 | } | 147 | } |
@@ -180,15 +189,14 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, | |||
180 | 189 | ||
181 | int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) | 190 | int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) |
182 | { | 191 | { |
192 | struct super_block *sb = dir->i_sb; | ||
183 | struct hfs_find_data fd; | 193 | struct hfs_find_data fd; |
184 | struct super_block *sb; | ||
185 | hfsplus_cat_entry entry; | 194 | hfsplus_cat_entry entry; |
186 | int entry_size; | 195 | int entry_size; |
187 | int err; | 196 | int err; |
188 | 197 | ||
189 | dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); | 198 | dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); |
190 | sb = dir->i_sb; | 199 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
191 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | ||
192 | 200 | ||
193 | hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); | 201 | hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); |
194 | entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? | 202 | entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? |
@@ -234,7 +242,7 @@ err2: | |||
234 | 242 | ||
235 | int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | 243 | int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) |
236 | { | 244 | { |
237 | struct super_block *sb; | 245 | struct super_block *sb = dir->i_sb; |
238 | struct hfs_find_data fd; | 246 | struct hfs_find_data fd; |
239 | struct hfsplus_fork_raw fork; | 247 | struct hfsplus_fork_raw fork; |
240 | struct list_head *pos; | 248 | struct list_head *pos; |
@@ -242,8 +250,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | |||
242 | u16 type; | 250 | u16 type; |
243 | 251 | ||
244 | dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); | 252 | dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); |
245 | sb = dir->i_sb; | 253 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
246 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | ||
247 | 254 | ||
248 | if (!str) { | 255 | if (!str) { |
249 | int len; | 256 | int len; |
@@ -279,7 +286,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | |||
279 | hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); | 286 | hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); |
280 | } | 287 | } |
281 | 288 | ||
282 | list_for_each(pos, &HFSPLUS_I(dir).open_dir_list) { | 289 | list_for_each(pos, &HFSPLUS_I(dir)->open_dir_list) { |
283 | struct hfsplus_readdir_data *rd = | 290 | struct hfsplus_readdir_data *rd = |
284 | list_entry(pos, struct hfsplus_readdir_data, list); | 291 | list_entry(pos, struct hfsplus_readdir_data, list); |
285 | if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) | 292 | if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) |
@@ -312,7 +319,7 @@ int hfsplus_rename_cat(u32 cnid, | |||
312 | struct inode *src_dir, struct qstr *src_name, | 319 | struct inode *src_dir, struct qstr *src_name, |
313 | struct inode *dst_dir, struct qstr *dst_name) | 320 | struct inode *dst_dir, struct qstr *dst_name) |
314 | { | 321 | { |
315 | struct super_block *sb; | 322 | struct super_block *sb = src_dir->i_sb; |
316 | struct hfs_find_data src_fd, dst_fd; | 323 | struct hfs_find_data src_fd, dst_fd; |
317 | hfsplus_cat_entry entry; | 324 | hfsplus_cat_entry entry; |
318 | int entry_size, type; | 325 | int entry_size, type; |
@@ -320,8 +327,7 @@ int hfsplus_rename_cat(u32 cnid, | |||
320 | 327 | ||
321 | dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, | 328 | dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, |
322 | dst_dir->i_ino, dst_name->name); | 329 | dst_dir->i_ino, dst_name->name); |
323 | sb = src_dir->i_sb; | 330 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); |
324 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &src_fd); | ||
325 | dst_fd = src_fd; | 331 | dst_fd = src_fd; |
326 | 332 | ||
327 | /* find the old dir entry and read the data */ | 333 | /* find the old dir entry and read the data */ |
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 764fd1bdca88..d236d85ec9d7 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
@@ -39,7 +39,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, | |||
39 | 39 | ||
40 | dentry->d_op = &hfsplus_dentry_operations; | 40 | dentry->d_op = &hfsplus_dentry_operations; |
41 | dentry->d_fsdata = NULL; | 41 | dentry->d_fsdata = NULL; |
42 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 42 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
43 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); | 43 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); |
44 | again: | 44 | again: |
45 | err = hfs_brec_read(&fd, &entry, sizeof(entry)); | 45 | err = hfs_brec_read(&fd, &entry, sizeof(entry)); |
@@ -68,9 +68,9 @@ again: | |||
68 | cnid = be32_to_cpu(entry.file.id); | 68 | cnid = be32_to_cpu(entry.file.id); |
69 | if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && | 69 | if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && |
70 | entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && | 70 | entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && |
71 | (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb).hidden_dir).create_date || | 71 | (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->create_date || |
72 | entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode).create_date) && | 72 | entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode)->create_date) && |
73 | HFSPLUS_SB(sb).hidden_dir) { | 73 | HFSPLUS_SB(sb)->hidden_dir) { |
74 | struct qstr str; | 74 | struct qstr str; |
75 | char name[32]; | 75 | char name[32]; |
76 | 76 | ||
@@ -86,7 +86,8 @@ again: | |||
86 | linkid = be32_to_cpu(entry.file.permissions.dev); | 86 | linkid = be32_to_cpu(entry.file.permissions.dev); |
87 | str.len = sprintf(name, "iNode%d", linkid); | 87 | str.len = sprintf(name, "iNode%d", linkid); |
88 | str.name = name; | 88 | str.name = name; |
89 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb).hidden_dir->i_ino, &str); | 89 | hfsplus_cat_build_key(sb, fd.search_key, |
90 | HFSPLUS_SB(sb)->hidden_dir->i_ino, &str); | ||
90 | goto again; | 91 | goto again; |
91 | } | 92 | } |
92 | } else if (!dentry->d_fsdata) | 93 | } else if (!dentry->d_fsdata) |
@@ -101,7 +102,7 @@ again: | |||
101 | if (IS_ERR(inode)) | 102 | if (IS_ERR(inode)) |
102 | return ERR_CAST(inode); | 103 | return ERR_CAST(inode); |
103 | if (S_ISREG(inode->i_mode)) | 104 | if (S_ISREG(inode->i_mode)) |
104 | HFSPLUS_I(inode).dev = linkid; | 105 | HFSPLUS_I(inode)->linkid = linkid; |
105 | out: | 106 | out: |
106 | d_add(dentry, inode); | 107 | d_add(dentry, inode); |
107 | return NULL; | 108 | return NULL; |
@@ -124,7 +125,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
124 | if (filp->f_pos >= inode->i_size) | 125 | if (filp->f_pos >= inode->i_size) |
125 | return 0; | 126 | return 0; |
126 | 127 | ||
127 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 128 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
128 | hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); | 129 | hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); |
129 | err = hfs_brec_find(&fd); | 130 | err = hfs_brec_find(&fd); |
130 | if (err) | 131 | if (err) |
@@ -180,8 +181,9 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
180 | err = -EIO; | 181 | err = -EIO; |
181 | goto out; | 182 | goto out; |
182 | } | 183 | } |
183 | if (HFSPLUS_SB(sb).hidden_dir && | 184 | if (HFSPLUS_SB(sb)->hidden_dir && |
184 | HFSPLUS_SB(sb).hidden_dir->i_ino == be32_to_cpu(entry.folder.id)) | 185 | HFSPLUS_SB(sb)->hidden_dir->i_ino == |
186 | be32_to_cpu(entry.folder.id)) | ||
185 | goto next; | 187 | goto next; |
186 | if (filldir(dirent, strbuf, len, filp->f_pos, | 188 | if (filldir(dirent, strbuf, len, filp->f_pos, |
187 | be32_to_cpu(entry.folder.id), DT_DIR)) | 189 | be32_to_cpu(entry.folder.id), DT_DIR)) |
@@ -217,7 +219,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
217 | } | 219 | } |
218 | filp->private_data = rd; | 220 | filp->private_data = rd; |
219 | rd->file = filp; | 221 | rd->file = filp; |
220 | list_add(&rd->list, &HFSPLUS_I(inode).open_dir_list); | 222 | list_add(&rd->list, &HFSPLUS_I(inode)->open_dir_list); |
221 | } | 223 | } |
222 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); | 224 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); |
223 | out: | 225 | out: |
@@ -229,38 +231,18 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file) | |||
229 | { | 231 | { |
230 | struct hfsplus_readdir_data *rd = file->private_data; | 232 | struct hfsplus_readdir_data *rd = file->private_data; |
231 | if (rd) { | 233 | if (rd) { |
234 | mutex_lock(&inode->i_mutex); | ||
232 | list_del(&rd->list); | 235 | list_del(&rd->list); |
236 | mutex_unlock(&inode->i_mutex); | ||
233 | kfree(rd); | 237 | kfree(rd); |
234 | } | 238 | } |
235 | return 0; | 239 | return 0; |
236 | } | 240 | } |
237 | 241 | ||
238 | static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, | ||
239 | struct nameidata *nd) | ||
240 | { | ||
241 | struct inode *inode; | ||
242 | int res; | ||
243 | |||
244 | inode = hfsplus_new_inode(dir->i_sb, mode); | ||
245 | if (!inode) | ||
246 | return -ENOSPC; | ||
247 | |||
248 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | ||
249 | if (res) { | ||
250 | inode->i_nlink = 0; | ||
251 | hfsplus_delete_inode(inode); | ||
252 | iput(inode); | ||
253 | return res; | ||
254 | } | ||
255 | hfsplus_instantiate(dentry, inode, inode->i_ino); | ||
256 | mark_inode_dirty(inode); | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | 242 | static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, |
261 | struct dentry *dst_dentry) | 243 | struct dentry *dst_dentry) |
262 | { | 244 | { |
263 | struct super_block *sb = dst_dir->i_sb; | 245 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dst_dir->i_sb); |
264 | struct inode *inode = src_dentry->d_inode; | 246 | struct inode *inode = src_dentry->d_inode; |
265 | struct inode *src_dir = src_dentry->d_parent->d_inode; | 247 | struct inode *src_dir = src_dentry->d_parent->d_inode; |
266 | struct qstr str; | 248 | struct qstr str; |
@@ -270,7 +252,10 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | |||
270 | 252 | ||
271 | if (HFSPLUS_IS_RSRC(inode)) | 253 | if (HFSPLUS_IS_RSRC(inode)) |
272 | return -EPERM; | 254 | return -EPERM; |
255 | if (!S_ISREG(inode->i_mode)) | ||
256 | return -EPERM; | ||
273 | 257 | ||
258 | mutex_lock(&sbi->vh_mutex); | ||
274 | if (inode->i_ino == (u32)(unsigned long)src_dentry->d_fsdata) { | 259 | if (inode->i_ino == (u32)(unsigned long)src_dentry->d_fsdata) { |
275 | for (;;) { | 260 | for (;;) { |
276 | get_random_bytes(&id, sizeof(cnid)); | 261 | get_random_bytes(&id, sizeof(cnid)); |
@@ -279,40 +264,41 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | |||
279 | str.len = sprintf(name, "iNode%d", id); | 264 | str.len = sprintf(name, "iNode%d", id); |
280 | res = hfsplus_rename_cat(inode->i_ino, | 265 | res = hfsplus_rename_cat(inode->i_ino, |
281 | src_dir, &src_dentry->d_name, | 266 | src_dir, &src_dentry->d_name, |
282 | HFSPLUS_SB(sb).hidden_dir, &str); | 267 | sbi->hidden_dir, &str); |
283 | if (!res) | 268 | if (!res) |
284 | break; | 269 | break; |
285 | if (res != -EEXIST) | 270 | if (res != -EEXIST) |
286 | return res; | 271 | goto out; |
287 | } | 272 | } |
288 | HFSPLUS_I(inode).dev = id; | 273 | HFSPLUS_I(inode)->linkid = id; |
289 | cnid = HFSPLUS_SB(sb).next_cnid++; | 274 | cnid = sbi->next_cnid++; |
290 | src_dentry->d_fsdata = (void *)(unsigned long)cnid; | 275 | src_dentry->d_fsdata = (void *)(unsigned long)cnid; |
291 | res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode); | 276 | res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode); |
292 | if (res) | 277 | if (res) |
293 | /* panic? */ | 278 | /* panic? */ |
294 | return res; | 279 | goto out; |
295 | HFSPLUS_SB(sb).file_count++; | 280 | sbi->file_count++; |
296 | } | 281 | } |
297 | cnid = HFSPLUS_SB(sb).next_cnid++; | 282 | cnid = sbi->next_cnid++; |
298 | res = hfsplus_create_cat(cnid, dst_dir, &dst_dentry->d_name, inode); | 283 | res = hfsplus_create_cat(cnid, dst_dir, &dst_dentry->d_name, inode); |
299 | if (res) | 284 | if (res) |
300 | return res; | 285 | goto out; |
301 | 286 | ||
302 | inc_nlink(inode); | 287 | inc_nlink(inode); |
303 | hfsplus_instantiate(dst_dentry, inode, cnid); | 288 | hfsplus_instantiate(dst_dentry, inode, cnid); |
304 | atomic_inc(&inode->i_count); | 289 | atomic_inc(&inode->i_count); |
305 | inode->i_ctime = CURRENT_TIME_SEC; | 290 | inode->i_ctime = CURRENT_TIME_SEC; |
306 | mark_inode_dirty(inode); | 291 | mark_inode_dirty(inode); |
307 | HFSPLUS_SB(sb).file_count++; | 292 | sbi->file_count++; |
308 | sb->s_dirt = 1; | 293 | dst_dir->i_sb->s_dirt = 1; |
309 | 294 | out: | |
310 | return 0; | 295 | mutex_unlock(&sbi->vh_mutex); |
296 | return res; | ||
311 | } | 297 | } |
312 | 298 | ||
313 | static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | 299 | static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) |
314 | { | 300 | { |
315 | struct super_block *sb = dir->i_sb; | 301 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
316 | struct inode *inode = dentry->d_inode; | 302 | struct inode *inode = dentry->d_inode; |
317 | struct qstr str; | 303 | struct qstr str; |
318 | char name[32]; | 304 | char name[32]; |
@@ -322,21 +308,22 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
322 | if (HFSPLUS_IS_RSRC(inode)) | 308 | if (HFSPLUS_IS_RSRC(inode)) |
323 | return -EPERM; | 309 | return -EPERM; |
324 | 310 | ||
311 | mutex_lock(&sbi->vh_mutex); | ||
325 | cnid = (u32)(unsigned long)dentry->d_fsdata; | 312 | cnid = (u32)(unsigned long)dentry->d_fsdata; |
326 | if (inode->i_ino == cnid && | 313 | if (inode->i_ino == cnid && |
327 | atomic_read(&HFSPLUS_I(inode).opencnt)) { | 314 | atomic_read(&HFSPLUS_I(inode)->opencnt)) { |
328 | str.name = name; | 315 | str.name = name; |
329 | str.len = sprintf(name, "temp%lu", inode->i_ino); | 316 | str.len = sprintf(name, "temp%lu", inode->i_ino); |
330 | res = hfsplus_rename_cat(inode->i_ino, | 317 | res = hfsplus_rename_cat(inode->i_ino, |
331 | dir, &dentry->d_name, | 318 | dir, &dentry->d_name, |
332 | HFSPLUS_SB(sb).hidden_dir, &str); | 319 | sbi->hidden_dir, &str); |
333 | if (!res) | 320 | if (!res) |
334 | inode->i_flags |= S_DEAD; | 321 | inode->i_flags |= S_DEAD; |
335 | return res; | 322 | goto out; |
336 | } | 323 | } |
337 | res = hfsplus_delete_cat(cnid, dir, &dentry->d_name); | 324 | res = hfsplus_delete_cat(cnid, dir, &dentry->d_name); |
338 | if (res) | 325 | if (res) |
339 | return res; | 326 | goto out; |
340 | 327 | ||
341 | if (inode->i_nlink > 0) | 328 | if (inode->i_nlink > 0) |
342 | drop_nlink(inode); | 329 | drop_nlink(inode); |
@@ -344,10 +331,10 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
344 | clear_nlink(inode); | 331 | clear_nlink(inode); |
345 | if (!inode->i_nlink) { | 332 | if (!inode->i_nlink) { |
346 | if (inode->i_ino != cnid) { | 333 | if (inode->i_ino != cnid) { |
347 | HFSPLUS_SB(sb).file_count--; | 334 | sbi->file_count--; |
348 | if (!atomic_read(&HFSPLUS_I(inode).opencnt)) { | 335 | if (!atomic_read(&HFSPLUS_I(inode)->opencnt)) { |
349 | res = hfsplus_delete_cat(inode->i_ino, | 336 | res = hfsplus_delete_cat(inode->i_ino, |
350 | HFSPLUS_SB(sb).hidden_dir, | 337 | sbi->hidden_dir, |
351 | NULL); | 338 | NULL); |
352 | if (!res) | 339 | if (!res) |
353 | hfsplus_delete_inode(inode); | 340 | hfsplus_delete_inode(inode); |
@@ -356,107 +343,108 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
356 | } else | 343 | } else |
357 | hfsplus_delete_inode(inode); | 344 | hfsplus_delete_inode(inode); |
358 | } else | 345 | } else |
359 | HFSPLUS_SB(sb).file_count--; | 346 | sbi->file_count--; |
360 | inode->i_ctime = CURRENT_TIME_SEC; | 347 | inode->i_ctime = CURRENT_TIME_SEC; |
361 | mark_inode_dirty(inode); | 348 | mark_inode_dirty(inode); |
362 | 349 | out: | |
350 | mutex_unlock(&sbi->vh_mutex); | ||
363 | return res; | 351 | return res; |
364 | } | 352 | } |
365 | 353 | ||
366 | static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
367 | { | ||
368 | struct inode *inode; | ||
369 | int res; | ||
370 | |||
371 | inode = hfsplus_new_inode(dir->i_sb, S_IFDIR | mode); | ||
372 | if (!inode) | ||
373 | return -ENOSPC; | ||
374 | |||
375 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | ||
376 | if (res) { | ||
377 | inode->i_nlink = 0; | ||
378 | hfsplus_delete_inode(inode); | ||
379 | iput(inode); | ||
380 | return res; | ||
381 | } | ||
382 | hfsplus_instantiate(dentry, inode, inode->i_ino); | ||
383 | mark_inode_dirty(inode); | ||
384 | return 0; | ||
385 | } | ||
386 | |||
387 | static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) | 354 | static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) |
388 | { | 355 | { |
389 | struct inode *inode; | 356 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
357 | struct inode *inode = dentry->d_inode; | ||
390 | int res; | 358 | int res; |
391 | 359 | ||
392 | inode = dentry->d_inode; | ||
393 | if (inode->i_size != 2) | 360 | if (inode->i_size != 2) |
394 | return -ENOTEMPTY; | 361 | return -ENOTEMPTY; |
362 | |||
363 | mutex_lock(&sbi->vh_mutex); | ||
395 | res = hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); | 364 | res = hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); |
396 | if (res) | 365 | if (res) |
397 | return res; | 366 | goto out; |
398 | clear_nlink(inode); | 367 | clear_nlink(inode); |
399 | inode->i_ctime = CURRENT_TIME_SEC; | 368 | inode->i_ctime = CURRENT_TIME_SEC; |
400 | hfsplus_delete_inode(inode); | 369 | hfsplus_delete_inode(inode); |
401 | mark_inode_dirty(inode); | 370 | mark_inode_dirty(inode); |
402 | return 0; | 371 | out: |
372 | mutex_unlock(&sbi->vh_mutex); | ||
373 | return res; | ||
403 | } | 374 | } |
404 | 375 | ||
405 | static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, | 376 | static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, |
406 | const char *symname) | 377 | const char *symname) |
407 | { | 378 | { |
408 | struct super_block *sb; | 379 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
409 | struct inode *inode; | 380 | struct inode *inode; |
410 | int res; | 381 | int res = -ENOSPC; |
411 | 382 | ||
412 | sb = dir->i_sb; | 383 | mutex_lock(&sbi->vh_mutex); |
413 | inode = hfsplus_new_inode(sb, S_IFLNK | S_IRWXUGO); | 384 | inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO); |
414 | if (!inode) | 385 | if (!inode) |
415 | return -ENOSPC; | 386 | goto out; |
416 | 387 | ||
417 | res = page_symlink(inode, symname, strlen(symname) + 1); | 388 | res = page_symlink(inode, symname, strlen(symname) + 1); |
418 | if (res) { | 389 | if (res) |
419 | inode->i_nlink = 0; | 390 | goto out_err; |
420 | hfsplus_delete_inode(inode); | ||
421 | iput(inode); | ||
422 | return res; | ||
423 | } | ||
424 | 391 | ||
425 | mark_inode_dirty(inode); | ||
426 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | 392 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); |
393 | if (res) | ||
394 | goto out_err; | ||
427 | 395 | ||
428 | if (!res) { | 396 | hfsplus_instantiate(dentry, inode, inode->i_ino); |
429 | hfsplus_instantiate(dentry, inode, inode->i_ino); | 397 | mark_inode_dirty(inode); |
430 | mark_inode_dirty(inode); | 398 | goto out; |
431 | } | ||
432 | 399 | ||
400 | out_err: | ||
401 | inode->i_nlink = 0; | ||
402 | hfsplus_delete_inode(inode); | ||
403 | iput(inode); | ||
404 | out: | ||
405 | mutex_unlock(&sbi->vh_mutex); | ||
433 | return res; | 406 | return res; |
434 | } | 407 | } |
435 | 408 | ||
436 | static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, | 409 | static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, |
437 | int mode, dev_t rdev) | 410 | int mode, dev_t rdev) |
438 | { | 411 | { |
439 | struct super_block *sb; | 412 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
440 | struct inode *inode; | 413 | struct inode *inode; |
441 | int res; | 414 | int res = -ENOSPC; |
442 | 415 | ||
443 | sb = dir->i_sb; | 416 | mutex_lock(&sbi->vh_mutex); |
444 | inode = hfsplus_new_inode(sb, mode); | 417 | inode = hfsplus_new_inode(dir->i_sb, mode); |
445 | if (!inode) | 418 | if (!inode) |
446 | return -ENOSPC; | 419 | goto out; |
420 | |||
421 | if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) | ||
422 | init_special_inode(inode, mode, rdev); | ||
447 | 423 | ||
448 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | 424 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); |
449 | if (res) { | 425 | if (res) { |
450 | inode->i_nlink = 0; | 426 | inode->i_nlink = 0; |
451 | hfsplus_delete_inode(inode); | 427 | hfsplus_delete_inode(inode); |
452 | iput(inode); | 428 | iput(inode); |
453 | return res; | 429 | goto out; |
454 | } | 430 | } |
455 | init_special_inode(inode, mode, rdev); | 431 | |
456 | hfsplus_instantiate(dentry, inode, inode->i_ino); | 432 | hfsplus_instantiate(dentry, inode, inode->i_ino); |
457 | mark_inode_dirty(inode); | 433 | mark_inode_dirty(inode); |
434 | out: | ||
435 | mutex_unlock(&sbi->vh_mutex); | ||
436 | return res; | ||
437 | } | ||
458 | 438 | ||
459 | return 0; | 439 | static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, |
440 | struct nameidata *nd) | ||
441 | { | ||
442 | return hfsplus_mknod(dir, dentry, mode, 0); | ||
443 | } | ||
444 | |||
445 | static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
446 | { | ||
447 | return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0); | ||
460 | } | 448 | } |
461 | 449 | ||
462 | static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, | 450 | static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, |
@@ -466,7 +454,10 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
466 | 454 | ||
467 | /* Unlink destination if it already exists */ | 455 | /* Unlink destination if it already exists */ |
468 | if (new_dentry->d_inode) { | 456 | if (new_dentry->d_inode) { |
469 | res = hfsplus_unlink(new_dir, new_dentry); | 457 | if (S_ISDIR(new_dentry->d_inode->i_mode)) |
458 | res = hfsplus_rmdir(new_dir, new_dentry); | ||
459 | else | ||
460 | res = hfsplus_unlink(new_dir, new_dentry); | ||
470 | if (res) | 461 | if (res) |
471 | return res; | 462 | return res; |
472 | } | 463 | } |
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 0022eec63cda..0c9cb1820a52 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c | |||
@@ -85,35 +85,49 @@ static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext) | |||
85 | 85 | ||
86 | static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) | 86 | static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) |
87 | { | 87 | { |
88 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
88 | int res; | 89 | int res; |
89 | 90 | ||
90 | hfsplus_ext_build_key(fd->search_key, inode->i_ino, HFSPLUS_I(inode).cached_start, | 91 | WARN_ON(!mutex_is_locked(&hip->extents_lock)); |
91 | HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | 92 | |
93 | hfsplus_ext_build_key(fd->search_key, inode->i_ino, hip->cached_start, | ||
94 | HFSPLUS_IS_RSRC(inode) ? | ||
95 | HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | ||
96 | |||
92 | res = hfs_brec_find(fd); | 97 | res = hfs_brec_find(fd); |
93 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_NEW) { | 98 | if (hip->flags & HFSPLUS_FLG_EXT_NEW) { |
94 | if (res != -ENOENT) | 99 | if (res != -ENOENT) |
95 | return; | 100 | return; |
96 | hfs_brec_insert(fd, HFSPLUS_I(inode).cached_extents, sizeof(hfsplus_extent_rec)); | 101 | hfs_brec_insert(fd, hip->cached_extents, |
97 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 102 | sizeof(hfsplus_extent_rec)); |
103 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | ||
98 | } else { | 104 | } else { |
99 | if (res) | 105 | if (res) |
100 | return; | 106 | return; |
101 | hfs_bnode_write(fd->bnode, HFSPLUS_I(inode).cached_extents, fd->entryoffset, fd->entrylength); | 107 | hfs_bnode_write(fd->bnode, hip->cached_extents, |
102 | HFSPLUS_I(inode).flags &= ~HFSPLUS_FLG_EXT_DIRTY; | 108 | fd->entryoffset, fd->entrylength); |
109 | hip->flags &= ~HFSPLUS_FLG_EXT_DIRTY; | ||
103 | } | 110 | } |
104 | } | 111 | } |
105 | 112 | ||
106 | void hfsplus_ext_write_extent(struct inode *inode) | 113 | static void hfsplus_ext_write_extent_locked(struct inode *inode) |
107 | { | 114 | { |
108 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY) { | 115 | if (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_EXT_DIRTY) { |
109 | struct hfs_find_data fd; | 116 | struct hfs_find_data fd; |
110 | 117 | ||
111 | hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd); | 118 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); |
112 | __hfsplus_ext_write_extent(inode, &fd); | 119 | __hfsplus_ext_write_extent(inode, &fd); |
113 | hfs_find_exit(&fd); | 120 | hfs_find_exit(&fd); |
114 | } | 121 | } |
115 | } | 122 | } |
116 | 123 | ||
124 | void hfsplus_ext_write_extent(struct inode *inode) | ||
125 | { | ||
126 | mutex_lock(&HFSPLUS_I(inode)->extents_lock); | ||
127 | hfsplus_ext_write_extent_locked(inode); | ||
128 | mutex_unlock(&HFSPLUS_I(inode)->extents_lock); | ||
129 | } | ||
130 | |||
117 | static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, | 131 | static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, |
118 | struct hfsplus_extent *extent, | 132 | struct hfsplus_extent *extent, |
119 | u32 cnid, u32 block, u8 type) | 133 | u32 cnid, u32 block, u8 type) |
@@ -136,33 +150,39 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, | |||
136 | 150 | ||
137 | static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) | 151 | static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) |
138 | { | 152 | { |
153 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
139 | int res; | 154 | int res; |
140 | 155 | ||
141 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY) | 156 | WARN_ON(!mutex_is_locked(&hip->extents_lock)); |
157 | |||
158 | if (hip->flags & HFSPLUS_FLG_EXT_DIRTY) | ||
142 | __hfsplus_ext_write_extent(inode, fd); | 159 | __hfsplus_ext_write_extent(inode, fd); |
143 | 160 | ||
144 | res = __hfsplus_ext_read_extent(fd, HFSPLUS_I(inode).cached_extents, inode->i_ino, | 161 | res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino, |
145 | block, HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | 162 | block, HFSPLUS_IS_RSRC(inode) ? |
163 | HFSPLUS_TYPE_RSRC : | ||
164 | HFSPLUS_TYPE_DATA); | ||
146 | if (!res) { | 165 | if (!res) { |
147 | HFSPLUS_I(inode).cached_start = be32_to_cpu(fd->key->ext.start_block); | 166 | hip->cached_start = be32_to_cpu(fd->key->ext.start_block); |
148 | HFSPLUS_I(inode).cached_blocks = hfsplus_ext_block_count(HFSPLUS_I(inode).cached_extents); | 167 | hip->cached_blocks = hfsplus_ext_block_count(hip->cached_extents); |
149 | } else { | 168 | } else { |
150 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0; | 169 | hip->cached_start = hip->cached_blocks = 0; |
151 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 170 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); |
152 | } | 171 | } |
153 | return res; | 172 | return res; |
154 | } | 173 | } |
155 | 174 | ||
156 | static int hfsplus_ext_read_extent(struct inode *inode, u32 block) | 175 | static int hfsplus_ext_read_extent(struct inode *inode, u32 block) |
157 | { | 176 | { |
177 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
158 | struct hfs_find_data fd; | 178 | struct hfs_find_data fd; |
159 | int res; | 179 | int res; |
160 | 180 | ||
161 | if (block >= HFSPLUS_I(inode).cached_start && | 181 | if (block >= hip->cached_start && |
162 | block < HFSPLUS_I(inode).cached_start + HFSPLUS_I(inode).cached_blocks) | 182 | block < hip->cached_start + hip->cached_blocks) |
163 | return 0; | 183 | return 0; |
164 | 184 | ||
165 | hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd); | 185 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); |
166 | res = __hfsplus_ext_cache_extent(&fd, inode, block); | 186 | res = __hfsplus_ext_cache_extent(&fd, inode, block); |
167 | hfs_find_exit(&fd); | 187 | hfs_find_exit(&fd); |
168 | return res; | 188 | return res; |
@@ -172,21 +192,21 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block) | |||
172 | int hfsplus_get_block(struct inode *inode, sector_t iblock, | 192 | int hfsplus_get_block(struct inode *inode, sector_t iblock, |
173 | struct buffer_head *bh_result, int create) | 193 | struct buffer_head *bh_result, int create) |
174 | { | 194 | { |
175 | struct super_block *sb; | 195 | struct super_block *sb = inode->i_sb; |
196 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
197 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
176 | int res = -EIO; | 198 | int res = -EIO; |
177 | u32 ablock, dblock, mask; | 199 | u32 ablock, dblock, mask; |
178 | int shift; | 200 | int shift; |
179 | 201 | ||
180 | sb = inode->i_sb; | ||
181 | |||
182 | /* Convert inode block to disk allocation block */ | 202 | /* Convert inode block to disk allocation block */ |
183 | shift = HFSPLUS_SB(sb).alloc_blksz_shift - sb->s_blocksize_bits; | 203 | shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; |
184 | ablock = iblock >> HFSPLUS_SB(sb).fs_shift; | 204 | ablock = iblock >> sbi->fs_shift; |
185 | 205 | ||
186 | if (iblock >= HFSPLUS_I(inode).fs_blocks) { | 206 | if (iblock >= hip->fs_blocks) { |
187 | if (iblock > HFSPLUS_I(inode).fs_blocks || !create) | 207 | if (iblock > hip->fs_blocks || !create) |
188 | return -EIO; | 208 | return -EIO; |
189 | if (ablock >= HFSPLUS_I(inode).alloc_blocks) { | 209 | if (ablock >= hip->alloc_blocks) { |
190 | res = hfsplus_file_extend(inode); | 210 | res = hfsplus_file_extend(inode); |
191 | if (res) | 211 | if (res) |
192 | return res; | 212 | return res; |
@@ -194,33 +214,33 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, | |||
194 | } else | 214 | } else |
195 | create = 0; | 215 | create = 0; |
196 | 216 | ||
197 | if (ablock < HFSPLUS_I(inode).first_blocks) { | 217 | if (ablock < hip->first_blocks) { |
198 | dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).first_extents, ablock); | 218 | dblock = hfsplus_ext_find_block(hip->first_extents, ablock); |
199 | goto done; | 219 | goto done; |
200 | } | 220 | } |
201 | 221 | ||
202 | if (inode->i_ino == HFSPLUS_EXT_CNID) | 222 | if (inode->i_ino == HFSPLUS_EXT_CNID) |
203 | return -EIO; | 223 | return -EIO; |
204 | 224 | ||
205 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 225 | mutex_lock(&hip->extents_lock); |
206 | res = hfsplus_ext_read_extent(inode, ablock); | 226 | res = hfsplus_ext_read_extent(inode, ablock); |
207 | if (!res) { | 227 | if (!res) { |
208 | dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock - | 228 | dblock = hfsplus_ext_find_block(hip->cached_extents, |
209 | HFSPLUS_I(inode).cached_start); | 229 | ablock - hip->cached_start); |
210 | } else { | 230 | } else { |
211 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 231 | mutex_unlock(&hip->extents_lock); |
212 | return -EIO; | 232 | return -EIO; |
213 | } | 233 | } |
214 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 234 | mutex_unlock(&hip->extents_lock); |
215 | 235 | ||
216 | done: | 236 | done: |
217 | dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); | 237 | dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); |
218 | mask = (1 << HFSPLUS_SB(sb).fs_shift) - 1; | 238 | mask = (1 << sbi->fs_shift) - 1; |
219 | map_bh(bh_result, sb, (dblock << HFSPLUS_SB(sb).fs_shift) + HFSPLUS_SB(sb).blockoffset + (iblock & mask)); | 239 | map_bh(bh_result, sb, (dblock << sbi->fs_shift) + sbi->blockoffset + (iblock & mask)); |
220 | if (create) { | 240 | if (create) { |
221 | set_buffer_new(bh_result); | 241 | set_buffer_new(bh_result); |
222 | HFSPLUS_I(inode).phys_size += sb->s_blocksize; | 242 | hip->phys_size += sb->s_blocksize; |
223 | HFSPLUS_I(inode).fs_blocks++; | 243 | hip->fs_blocks++; |
224 | inode_add_bytes(inode, sb->s_blocksize); | 244 | inode_add_bytes(inode, sb->s_blocksize); |
225 | mark_inode_dirty(inode); | 245 | mark_inode_dirty(inode); |
226 | } | 246 | } |
@@ -327,7 +347,7 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw | |||
327 | if (total_blocks == blocks) | 347 | if (total_blocks == blocks) |
328 | return 0; | 348 | return 0; |
329 | 349 | ||
330 | hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); | 350 | hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); |
331 | do { | 351 | do { |
332 | res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, | 352 | res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, |
333 | total_blocks, type); | 353 | total_blocks, type); |
@@ -348,29 +368,33 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw | |||
348 | int hfsplus_file_extend(struct inode *inode) | 368 | int hfsplus_file_extend(struct inode *inode) |
349 | { | 369 | { |
350 | struct super_block *sb = inode->i_sb; | 370 | struct super_block *sb = inode->i_sb; |
371 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
372 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
351 | u32 start, len, goal; | 373 | u32 start, len, goal; |
352 | int res; | 374 | int res; |
353 | 375 | ||
354 | if (HFSPLUS_SB(sb).alloc_file->i_size * 8 < HFSPLUS_SB(sb).total_blocks - HFSPLUS_SB(sb).free_blocks + 8) { | 376 | if (sbi->alloc_file->i_size * 8 < |
377 | sbi->total_blocks - sbi->free_blocks + 8) { | ||
355 | // extend alloc file | 378 | // extend alloc file |
356 | printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", HFSPLUS_SB(sb).alloc_file->i_size * 8, | 379 | printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", |
357 | HFSPLUS_SB(sb).total_blocks, HFSPLUS_SB(sb).free_blocks); | 380 | sbi->alloc_file->i_size * 8, |
381 | sbi->total_blocks, sbi->free_blocks); | ||
358 | return -ENOSPC; | 382 | return -ENOSPC; |
359 | } | 383 | } |
360 | 384 | ||
361 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 385 | mutex_lock(&hip->extents_lock); |
362 | if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks) | 386 | if (hip->alloc_blocks == hip->first_blocks) |
363 | goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents); | 387 | goal = hfsplus_ext_lastblock(hip->first_extents); |
364 | else { | 388 | else { |
365 | res = hfsplus_ext_read_extent(inode, HFSPLUS_I(inode).alloc_blocks); | 389 | res = hfsplus_ext_read_extent(inode, hip->alloc_blocks); |
366 | if (res) | 390 | if (res) |
367 | goto out; | 391 | goto out; |
368 | goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).cached_extents); | 392 | goal = hfsplus_ext_lastblock(hip->cached_extents); |
369 | } | 393 | } |
370 | 394 | ||
371 | len = HFSPLUS_I(inode).clump_blocks; | 395 | len = hip->clump_blocks; |
372 | start = hfsplus_block_allocate(sb, HFSPLUS_SB(sb).total_blocks, goal, &len); | 396 | start = hfsplus_block_allocate(sb, sbi->total_blocks, goal, &len); |
373 | if (start >= HFSPLUS_SB(sb).total_blocks) { | 397 | if (start >= sbi->total_blocks) { |
374 | start = hfsplus_block_allocate(sb, goal, 0, &len); | 398 | start = hfsplus_block_allocate(sb, goal, 0, &len); |
375 | if (start >= goal) { | 399 | if (start >= goal) { |
376 | res = -ENOSPC; | 400 | res = -ENOSPC; |
@@ -379,56 +403,56 @@ int hfsplus_file_extend(struct inode *inode) | |||
379 | } | 403 | } |
380 | 404 | ||
381 | dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); | 405 | dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); |
382 | if (HFSPLUS_I(inode).alloc_blocks <= HFSPLUS_I(inode).first_blocks) { | 406 | |
383 | if (!HFSPLUS_I(inode).first_blocks) { | 407 | if (hip->alloc_blocks <= hip->first_blocks) { |
408 | if (!hip->first_blocks) { | ||
384 | dprint(DBG_EXTENT, "first extents\n"); | 409 | dprint(DBG_EXTENT, "first extents\n"); |
385 | /* no extents yet */ | 410 | /* no extents yet */ |
386 | HFSPLUS_I(inode).first_extents[0].start_block = cpu_to_be32(start); | 411 | hip->first_extents[0].start_block = cpu_to_be32(start); |
387 | HFSPLUS_I(inode).first_extents[0].block_count = cpu_to_be32(len); | 412 | hip->first_extents[0].block_count = cpu_to_be32(len); |
388 | res = 0; | 413 | res = 0; |
389 | } else { | 414 | } else { |
390 | /* try to append to extents in inode */ | 415 | /* try to append to extents in inode */ |
391 | res = hfsplus_add_extent(HFSPLUS_I(inode).first_extents, | 416 | res = hfsplus_add_extent(hip->first_extents, |
392 | HFSPLUS_I(inode).alloc_blocks, | 417 | hip->alloc_blocks, |
393 | start, len); | 418 | start, len); |
394 | if (res == -ENOSPC) | 419 | if (res == -ENOSPC) |
395 | goto insert_extent; | 420 | goto insert_extent; |
396 | } | 421 | } |
397 | if (!res) { | 422 | if (!res) { |
398 | hfsplus_dump_extent(HFSPLUS_I(inode).first_extents); | 423 | hfsplus_dump_extent(hip->first_extents); |
399 | HFSPLUS_I(inode).first_blocks += len; | 424 | hip->first_blocks += len; |
400 | } | 425 | } |
401 | } else { | 426 | } else { |
402 | res = hfsplus_add_extent(HFSPLUS_I(inode).cached_extents, | 427 | res = hfsplus_add_extent(hip->cached_extents, |
403 | HFSPLUS_I(inode).alloc_blocks - | 428 | hip->alloc_blocks - hip->cached_start, |
404 | HFSPLUS_I(inode).cached_start, | ||
405 | start, len); | 429 | start, len); |
406 | if (!res) { | 430 | if (!res) { |
407 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 431 | hfsplus_dump_extent(hip->cached_extents); |
408 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY; | 432 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY; |
409 | HFSPLUS_I(inode).cached_blocks += len; | 433 | hip->cached_blocks += len; |
410 | } else if (res == -ENOSPC) | 434 | } else if (res == -ENOSPC) |
411 | goto insert_extent; | 435 | goto insert_extent; |
412 | } | 436 | } |
413 | out: | 437 | out: |
414 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 438 | mutex_unlock(&hip->extents_lock); |
415 | if (!res) { | 439 | if (!res) { |
416 | HFSPLUS_I(inode).alloc_blocks += len; | 440 | hip->alloc_blocks += len; |
417 | mark_inode_dirty(inode); | 441 | mark_inode_dirty(inode); |
418 | } | 442 | } |
419 | return res; | 443 | return res; |
420 | 444 | ||
421 | insert_extent: | 445 | insert_extent: |
422 | dprint(DBG_EXTENT, "insert new extent\n"); | 446 | dprint(DBG_EXTENT, "insert new extent\n"); |
423 | hfsplus_ext_write_extent(inode); | 447 | hfsplus_ext_write_extent_locked(inode); |
424 | 448 | ||
425 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 449 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
426 | HFSPLUS_I(inode).cached_extents[0].start_block = cpu_to_be32(start); | 450 | hip->cached_extents[0].start_block = cpu_to_be32(start); |
427 | HFSPLUS_I(inode).cached_extents[0].block_count = cpu_to_be32(len); | 451 | hip->cached_extents[0].block_count = cpu_to_be32(len); |
428 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 452 | hfsplus_dump_extent(hip->cached_extents); |
429 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; | 453 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; |
430 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).alloc_blocks; | 454 | hip->cached_start = hip->alloc_blocks; |
431 | HFSPLUS_I(inode).cached_blocks = len; | 455 | hip->cached_blocks = len; |
432 | 456 | ||
433 | res = 0; | 457 | res = 0; |
434 | goto out; | 458 | goto out; |
@@ -437,13 +461,15 @@ insert_extent: | |||
437 | void hfsplus_file_truncate(struct inode *inode) | 461 | void hfsplus_file_truncate(struct inode *inode) |
438 | { | 462 | { |
439 | struct super_block *sb = inode->i_sb; | 463 | struct super_block *sb = inode->i_sb; |
464 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
440 | struct hfs_find_data fd; | 465 | struct hfs_find_data fd; |
441 | u32 alloc_cnt, blk_cnt, start; | 466 | u32 alloc_cnt, blk_cnt, start; |
442 | int res; | 467 | int res; |
443 | 468 | ||
444 | dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", inode->i_ino, | 469 | dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", |
445 | (long long)HFSPLUS_I(inode).phys_size, inode->i_size); | 470 | inode->i_ino, (long long)hip->phys_size, inode->i_size); |
446 | if (inode->i_size > HFSPLUS_I(inode).phys_size) { | 471 | |
472 | if (inode->i_size > hip->phys_size) { | ||
447 | struct address_space *mapping = inode->i_mapping; | 473 | struct address_space *mapping = inode->i_mapping; |
448 | struct page *page; | 474 | struct page *page; |
449 | void *fsdata; | 475 | void *fsdata; |
@@ -460,47 +486,48 @@ void hfsplus_file_truncate(struct inode *inode) | |||
460 | return; | 486 | return; |
461 | mark_inode_dirty(inode); | 487 | mark_inode_dirty(inode); |
462 | return; | 488 | return; |
463 | } else if (inode->i_size == HFSPLUS_I(inode).phys_size) | 489 | } else if (inode->i_size == hip->phys_size) |
464 | return; | 490 | return; |
465 | 491 | ||
466 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb).alloc_blksz - 1) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 492 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >> |
467 | alloc_cnt = HFSPLUS_I(inode).alloc_blocks; | 493 | HFSPLUS_SB(sb)->alloc_blksz_shift; |
494 | alloc_cnt = hip->alloc_blocks; | ||
468 | if (blk_cnt == alloc_cnt) | 495 | if (blk_cnt == alloc_cnt) |
469 | goto out; | 496 | goto out; |
470 | 497 | ||
471 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 498 | mutex_lock(&hip->extents_lock); |
472 | hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); | 499 | hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); |
473 | while (1) { | 500 | while (1) { |
474 | if (alloc_cnt == HFSPLUS_I(inode).first_blocks) { | 501 | if (alloc_cnt == hip->first_blocks) { |
475 | hfsplus_free_extents(sb, HFSPLUS_I(inode).first_extents, | 502 | hfsplus_free_extents(sb, hip->first_extents, |
476 | alloc_cnt, alloc_cnt - blk_cnt); | 503 | alloc_cnt, alloc_cnt - blk_cnt); |
477 | hfsplus_dump_extent(HFSPLUS_I(inode).first_extents); | 504 | hfsplus_dump_extent(hip->first_extents); |
478 | HFSPLUS_I(inode).first_blocks = blk_cnt; | 505 | hip->first_blocks = blk_cnt; |
479 | break; | 506 | break; |
480 | } | 507 | } |
481 | res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); | 508 | res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); |
482 | if (res) | 509 | if (res) |
483 | break; | 510 | break; |
484 | start = HFSPLUS_I(inode).cached_start; | 511 | start = hip->cached_start; |
485 | hfsplus_free_extents(sb, HFSPLUS_I(inode).cached_extents, | 512 | hfsplus_free_extents(sb, hip->cached_extents, |
486 | alloc_cnt - start, alloc_cnt - blk_cnt); | 513 | alloc_cnt - start, alloc_cnt - blk_cnt); |
487 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 514 | hfsplus_dump_extent(hip->cached_extents); |
488 | if (blk_cnt > start) { | 515 | if (blk_cnt > start) { |
489 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY; | 516 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY; |
490 | break; | 517 | break; |
491 | } | 518 | } |
492 | alloc_cnt = start; | 519 | alloc_cnt = start; |
493 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0; | 520 | hip->cached_start = hip->cached_blocks = 0; |
494 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 521 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); |
495 | hfs_brec_remove(&fd); | 522 | hfs_brec_remove(&fd); |
496 | } | 523 | } |
497 | hfs_find_exit(&fd); | 524 | hfs_find_exit(&fd); |
498 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 525 | mutex_unlock(&hip->extents_lock); |
499 | 526 | ||
500 | HFSPLUS_I(inode).alloc_blocks = blk_cnt; | 527 | hip->alloc_blocks = blk_cnt; |
501 | out: | 528 | out: |
502 | HFSPLUS_I(inode).phys_size = inode->i_size; | 529 | hip->phys_size = inode->i_size; |
503 | HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; | 530 | hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; |
504 | inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits); | 531 | inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); |
505 | mark_inode_dirty(inode); | 532 | mark_inode_dirty(inode); |
506 | } | 533 | } |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index dc856be3c2b0..cb3653efb57a 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
@@ -62,7 +62,7 @@ struct hfs_btree { | |||
62 | unsigned int depth; | 62 | unsigned int depth; |
63 | 63 | ||
64 | //unsigned int map1_size, map_size; | 64 | //unsigned int map1_size, map_size; |
65 | struct semaphore tree_lock; | 65 | struct mutex tree_lock; |
66 | 66 | ||
67 | unsigned int pages_per_bnode; | 67 | unsigned int pages_per_bnode; |
68 | spinlock_t hash_lock; | 68 | spinlock_t hash_lock; |
@@ -121,16 +121,21 @@ struct hfsplus_sb_info { | |||
121 | u32 sect_count; | 121 | u32 sect_count; |
122 | int fs_shift; | 122 | int fs_shift; |
123 | 123 | ||
124 | /* Stuff in host order from Vol Header */ | 124 | /* immutable data from the volume header */ |
125 | u32 alloc_blksz; | 125 | u32 alloc_blksz; |
126 | int alloc_blksz_shift; | 126 | int alloc_blksz_shift; |
127 | u32 total_blocks; | 127 | u32 total_blocks; |
128 | u32 data_clump_blocks, rsrc_clump_blocks; | ||
129 | |||
130 | /* mutable data from the volume header, protected by alloc_mutex */ | ||
128 | u32 free_blocks; | 131 | u32 free_blocks; |
129 | u32 next_alloc; | 132 | struct mutex alloc_mutex; |
133 | |||
134 | /* mutable data from the volume header, protected by vh_mutex */ | ||
130 | u32 next_cnid; | 135 | u32 next_cnid; |
131 | u32 file_count; | 136 | u32 file_count; |
132 | u32 folder_count; | 137 | u32 folder_count; |
133 | u32 data_clump_blocks, rsrc_clump_blocks; | 138 | struct mutex vh_mutex; |
134 | 139 | ||
135 | /* Config options */ | 140 | /* Config options */ |
136 | u32 creator; | 141 | u32 creator; |
@@ -143,40 +148,50 @@ struct hfsplus_sb_info { | |||
143 | int part, session; | 148 | int part, session; |
144 | 149 | ||
145 | unsigned long flags; | 150 | unsigned long flags; |
146 | |||
147 | struct hlist_head rsrc_inodes; | ||
148 | }; | 151 | }; |
149 | 152 | ||
150 | #define HFSPLUS_SB_WRITEBACKUP 0x0001 | 153 | #define HFSPLUS_SB_WRITEBACKUP 0 |
151 | #define HFSPLUS_SB_NODECOMPOSE 0x0002 | 154 | #define HFSPLUS_SB_NODECOMPOSE 1 |
152 | #define HFSPLUS_SB_FORCE 0x0004 | 155 | #define HFSPLUS_SB_FORCE 2 |
153 | #define HFSPLUS_SB_HFSX 0x0008 | 156 | #define HFSPLUS_SB_HFSX 3 |
154 | #define HFSPLUS_SB_CASEFOLD 0x0010 | 157 | #define HFSPLUS_SB_CASEFOLD 4 |
155 | 158 | ||
156 | 159 | ||
157 | struct hfsplus_inode_info { | 160 | struct hfsplus_inode_info { |
158 | struct mutex extents_lock; | ||
159 | u32 clump_blocks, alloc_blocks; | ||
160 | sector_t fs_blocks; | ||
161 | /* Allocation extents from catalog record or volume header */ | ||
162 | hfsplus_extent_rec first_extents; | ||
163 | u32 first_blocks; | ||
164 | hfsplus_extent_rec cached_extents; | ||
165 | u32 cached_start, cached_blocks; | ||
166 | atomic_t opencnt; | 161 | atomic_t opencnt; |
167 | 162 | ||
168 | struct inode *rsrc_inode; | 163 | /* |
164 | * Extent allocation information, protected by extents_lock. | ||
165 | */ | ||
166 | u32 first_blocks; | ||
167 | u32 clump_blocks; | ||
168 | u32 alloc_blocks; | ||
169 | u32 cached_start; | ||
170 | u32 cached_blocks; | ||
171 | hfsplus_extent_rec first_extents; | ||
172 | hfsplus_extent_rec cached_extents; | ||
169 | unsigned long flags; | 173 | unsigned long flags; |
174 | struct mutex extents_lock; | ||
170 | 175 | ||
176 | /* | ||
177 | * Immutable data. | ||
178 | */ | ||
179 | struct inode *rsrc_inode; | ||
171 | __be32 create_date; | 180 | __be32 create_date; |
172 | /* Device number in hfsplus_permissions in catalog */ | ||
173 | u32 dev; | ||
174 | /* BSD system and user file flags */ | ||
175 | u8 rootflags; | ||
176 | u8 userflags; | ||
177 | 181 | ||
182 | /* | ||
183 | * Protected by sbi->vh_mutex. | ||
184 | */ | ||
185 | u32 linkid; | ||
186 | |||
187 | /* | ||
188 | * Protected by i_mutex. | ||
189 | */ | ||
190 | sector_t fs_blocks; | ||
191 | u8 userflags; /* BSD user file flags */ | ||
178 | struct list_head open_dir_list; | 192 | struct list_head open_dir_list; |
179 | loff_t phys_size; | 193 | loff_t phys_size; |
194 | |||
180 | struct inode vfs_inode; | 195 | struct inode vfs_inode; |
181 | }; | 196 | }; |
182 | 197 | ||
@@ -184,8 +199,8 @@ struct hfsplus_inode_info { | |||
184 | #define HFSPLUS_FLG_EXT_DIRTY 0x0002 | 199 | #define HFSPLUS_FLG_EXT_DIRTY 0x0002 |
185 | #define HFSPLUS_FLG_EXT_NEW 0x0004 | 200 | #define HFSPLUS_FLG_EXT_NEW 0x0004 |
186 | 201 | ||
187 | #define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC)) | 202 | #define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC)) |
188 | #define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC) | 203 | #define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC) |
189 | 204 | ||
190 | struct hfs_find_data { | 205 | struct hfs_find_data { |
191 | /* filled by caller */ | 206 | /* filled by caller */ |
@@ -311,6 +326,7 @@ int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *); | |||
311 | int hfsplus_delete_cat(u32, struct inode *, struct qstr *); | 326 | int hfsplus_delete_cat(u32, struct inode *, struct qstr *); |
312 | int hfsplus_rename_cat(u32, struct inode *, struct qstr *, | 327 | int hfsplus_rename_cat(u32, struct inode *, struct qstr *, |
313 | struct inode *, struct qstr *); | 328 | struct inode *, struct qstr *); |
329 | void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms); | ||
314 | 330 | ||
315 | /* dir.c */ | 331 | /* dir.c */ |
316 | extern const struct inode_operations hfsplus_dir_inode_operations; | 332 | extern const struct inode_operations hfsplus_dir_inode_operations; |
@@ -372,26 +388,15 @@ int hfsplus_read_wrapper(struct super_block *); | |||
372 | int hfs_part_find(struct super_block *, sector_t *, sector_t *); | 388 | int hfs_part_find(struct super_block *, sector_t *, sector_t *); |
373 | 389 | ||
374 | /* access macros */ | 390 | /* access macros */ |
375 | /* | ||
376 | static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) | 391 | static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) |
377 | { | 392 | { |
378 | return sb->s_fs_info; | 393 | return sb->s_fs_info; |
379 | } | 394 | } |
395 | |||
380 | static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) | 396 | static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) |
381 | { | 397 | { |
382 | return list_entry(inode, struct hfsplus_inode_info, vfs_inode); | 398 | return list_entry(inode, struct hfsplus_inode_info, vfs_inode); |
383 | } | 399 | } |
384 | */ | ||
385 | #define HFSPLUS_SB(super) (*(struct hfsplus_sb_info *)(super)->s_fs_info) | ||
386 | #define HFSPLUS_I(inode) (*list_entry(inode, struct hfsplus_inode_info, vfs_inode)) | ||
387 | |||
388 | #if 1 | ||
389 | #define hfsplus_kmap(p) ({ struct page *__p = (p); kmap(__p); }) | ||
390 | #define hfsplus_kunmap(p) ({ struct page *__p = (p); kunmap(__p); __p; }) | ||
391 | #else | ||
392 | #define hfsplus_kmap(p) kmap(p) | ||
393 | #define hfsplus_kunmap(p) kunmap(p) | ||
394 | #endif | ||
395 | 400 | ||
396 | #define sb_bread512(sb, sec, data) ({ \ | 401 | #define sb_bread512(sb, sec, data) ({ \ |
397 | struct buffer_head *__bh; \ | 402 | struct buffer_head *__bh; \ |
@@ -419,6 +424,4 @@ static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) | |||
419 | #define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec) | 424 | #define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec) |
420 | #define hfsp_now2mt() __hfsp_ut2mt(get_seconds()) | 425 | #define hfsp_now2mt() __hfsp_ut2mt(get_seconds()) |
421 | 426 | ||
422 | #define kdev_t_to_nr(x) (x) | ||
423 | |||
424 | #endif | 427 | #endif |
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index fe99fe8db61a..6892899fd6fb 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h | |||
@@ -200,6 +200,7 @@ struct hfsplus_cat_key { | |||
200 | struct hfsplus_unistr name; | 200 | struct hfsplus_unistr name; |
201 | } __packed; | 201 | } __packed; |
202 | 202 | ||
203 | #define HFSPLUS_CAT_KEYLEN (sizeof(struct hfsplus_cat_key)) | ||
203 | 204 | ||
204 | /* Structs from hfs.h */ | 205 | /* Structs from hfs.h */ |
205 | struct hfsp_point { | 206 | struct hfsp_point { |
@@ -323,7 +324,7 @@ struct hfsplus_ext_key { | |||
323 | __be32 start_block; | 324 | __be32 start_block; |
324 | } __packed; | 325 | } __packed; |
325 | 326 | ||
326 | #define HFSPLUS_EXT_KEYLEN 12 | 327 | #define HFSPLUS_EXT_KEYLEN sizeof(struct hfsplus_ext_key) |
327 | 328 | ||
328 | /* HFS+ generic BTree key */ | 329 | /* HFS+ generic BTree key */ |
329 | typedef union { | 330 | typedef union { |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index c5a979d62c65..78449280dae0 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
@@ -36,7 +36,7 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping, | |||
36 | *pagep = NULL; | 36 | *pagep = NULL; |
37 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 37 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
38 | hfsplus_get_block, | 38 | hfsplus_get_block, |
39 | &HFSPLUS_I(mapping->host).phys_size); | 39 | &HFSPLUS_I(mapping->host)->phys_size); |
40 | if (unlikely(ret)) { | 40 | if (unlikely(ret)) { |
41 | loff_t isize = mapping->host->i_size; | 41 | loff_t isize = mapping->host->i_size; |
42 | if (pos + len > isize) | 42 | if (pos + len > isize) |
@@ -62,13 +62,13 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) | |||
62 | 62 | ||
63 | switch (inode->i_ino) { | 63 | switch (inode->i_ino) { |
64 | case HFSPLUS_EXT_CNID: | 64 | case HFSPLUS_EXT_CNID: |
65 | tree = HFSPLUS_SB(sb).ext_tree; | 65 | tree = HFSPLUS_SB(sb)->ext_tree; |
66 | break; | 66 | break; |
67 | case HFSPLUS_CAT_CNID: | 67 | case HFSPLUS_CAT_CNID: |
68 | tree = HFSPLUS_SB(sb).cat_tree; | 68 | tree = HFSPLUS_SB(sb)->cat_tree; |
69 | break; | 69 | break; |
70 | case HFSPLUS_ATTR_CNID: | 70 | case HFSPLUS_ATTR_CNID: |
71 | tree = HFSPLUS_SB(sb).attr_tree; | 71 | tree = HFSPLUS_SB(sb)->attr_tree; |
72 | break; | 72 | break; |
73 | default: | 73 | default: |
74 | BUG(); | 74 | BUG(); |
@@ -172,12 +172,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
172 | struct hfs_find_data fd; | 172 | struct hfs_find_data fd; |
173 | struct super_block *sb = dir->i_sb; | 173 | struct super_block *sb = dir->i_sb; |
174 | struct inode *inode = NULL; | 174 | struct inode *inode = NULL; |
175 | struct hfsplus_inode_info *hip; | ||
175 | int err; | 176 | int err; |
176 | 177 | ||
177 | if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) | 178 | if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) |
178 | goto out; | 179 | goto out; |
179 | 180 | ||
180 | inode = HFSPLUS_I(dir).rsrc_inode; | 181 | inode = HFSPLUS_I(dir)->rsrc_inode; |
181 | if (inode) | 182 | if (inode) |
182 | goto out; | 183 | goto out; |
183 | 184 | ||
@@ -185,12 +186,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
185 | if (!inode) | 186 | if (!inode) |
186 | return ERR_PTR(-ENOMEM); | 187 | return ERR_PTR(-ENOMEM); |
187 | 188 | ||
189 | hip = HFSPLUS_I(inode); | ||
188 | inode->i_ino = dir->i_ino; | 190 | inode->i_ino = dir->i_ino; |
189 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 191 | INIT_LIST_HEAD(&hip->open_dir_list); |
190 | mutex_init(&HFSPLUS_I(inode).extents_lock); | 192 | mutex_init(&hip->extents_lock); |
191 | HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC; | 193 | hip->flags = HFSPLUS_FLG_RSRC; |
192 | 194 | ||
193 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 195 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
194 | err = hfsplus_find_cat(sb, dir->i_ino, &fd); | 196 | err = hfsplus_find_cat(sb, dir->i_ino, &fd); |
195 | if (!err) | 197 | if (!err) |
196 | err = hfsplus_cat_read_inode(inode, &fd); | 198 | err = hfsplus_cat_read_inode(inode, &fd); |
@@ -199,10 +201,18 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
199 | iput(inode); | 201 | iput(inode); |
200 | return ERR_PTR(err); | 202 | return ERR_PTR(err); |
201 | } | 203 | } |
202 | HFSPLUS_I(inode).rsrc_inode = dir; | 204 | hip->rsrc_inode = dir; |
203 | HFSPLUS_I(dir).rsrc_inode = inode; | 205 | HFSPLUS_I(dir)->rsrc_inode = inode; |
204 | igrab(dir); | 206 | igrab(dir); |
205 | hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes); | 207 | |
208 | /* | ||
209 | * __mark_inode_dirty expects inodes to be hashed. Since we don't | ||
210 | * want resource fork inodes in the regular inode space, we make them | ||
211 | * appear hashed, but do not put on any lists. hlist_del() | ||
212 | * will work fine and require no locking. | ||
213 | */ | ||
214 | inode->i_hash.pprev = &inode->i_hash.next; | ||
215 | |||
206 | mark_inode_dirty(inode); | 216 | mark_inode_dirty(inode); |
207 | out: | 217 | out: |
208 | d_add(dentry, inode); | 218 | d_add(dentry, inode); |
@@ -211,30 +221,27 @@ out: | |||
211 | 221 | ||
212 | static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) | 222 | static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) |
213 | { | 223 | { |
214 | struct super_block *sb = inode->i_sb; | 224 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); |
215 | u16 mode; | 225 | u16 mode; |
216 | 226 | ||
217 | mode = be16_to_cpu(perms->mode); | 227 | mode = be16_to_cpu(perms->mode); |
218 | 228 | ||
219 | inode->i_uid = be32_to_cpu(perms->owner); | 229 | inode->i_uid = be32_to_cpu(perms->owner); |
220 | if (!inode->i_uid && !mode) | 230 | if (!inode->i_uid && !mode) |
221 | inode->i_uid = HFSPLUS_SB(sb).uid; | 231 | inode->i_uid = sbi->uid; |
222 | 232 | ||
223 | inode->i_gid = be32_to_cpu(perms->group); | 233 | inode->i_gid = be32_to_cpu(perms->group); |
224 | if (!inode->i_gid && !mode) | 234 | if (!inode->i_gid && !mode) |
225 | inode->i_gid = HFSPLUS_SB(sb).gid; | 235 | inode->i_gid = sbi->gid; |
226 | 236 | ||
227 | if (dir) { | 237 | if (dir) { |
228 | mode = mode ? (mode & S_IALLUGO) : | 238 | mode = mode ? (mode & S_IALLUGO) : (S_IRWXUGO & ~(sbi->umask)); |
229 | (S_IRWXUGO & ~(HFSPLUS_SB(sb).umask)); | ||
230 | mode |= S_IFDIR; | 239 | mode |= S_IFDIR; |
231 | } else if (!mode) | 240 | } else if (!mode) |
232 | mode = S_IFREG | ((S_IRUGO|S_IWUGO) & | 241 | mode = S_IFREG | ((S_IRUGO|S_IWUGO) & ~(sbi->umask)); |
233 | ~(HFSPLUS_SB(sb).umask)); | ||
234 | inode->i_mode = mode; | 242 | inode->i_mode = mode; |
235 | 243 | ||
236 | HFSPLUS_I(inode).rootflags = perms->rootflags; | 244 | HFSPLUS_I(inode)->userflags = perms->userflags; |
237 | HFSPLUS_I(inode).userflags = perms->userflags; | ||
238 | if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) | 245 | if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) |
239 | inode->i_flags |= S_IMMUTABLE; | 246 | inode->i_flags |= S_IMMUTABLE; |
240 | else | 247 | else |
@@ -245,30 +252,13 @@ static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, i | |||
245 | inode->i_flags &= ~S_APPEND; | 252 | inode->i_flags &= ~S_APPEND; |
246 | } | 253 | } |
247 | 254 | ||
248 | static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | ||
249 | { | ||
250 | if (inode->i_flags & S_IMMUTABLE) | ||
251 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; | ||
252 | else | ||
253 | perms->rootflags &= ~HFSPLUS_FLG_IMMUTABLE; | ||
254 | if (inode->i_flags & S_APPEND) | ||
255 | perms->rootflags |= HFSPLUS_FLG_APPEND; | ||
256 | else | ||
257 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; | ||
258 | perms->userflags = HFSPLUS_I(inode).userflags; | ||
259 | perms->mode = cpu_to_be16(inode->i_mode); | ||
260 | perms->owner = cpu_to_be32(inode->i_uid); | ||
261 | perms->group = cpu_to_be32(inode->i_gid); | ||
262 | perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); | ||
263 | } | ||
264 | |||
265 | static int hfsplus_file_open(struct inode *inode, struct file *file) | 255 | static int hfsplus_file_open(struct inode *inode, struct file *file) |
266 | { | 256 | { |
267 | if (HFSPLUS_IS_RSRC(inode)) | 257 | if (HFSPLUS_IS_RSRC(inode)) |
268 | inode = HFSPLUS_I(inode).rsrc_inode; | 258 | inode = HFSPLUS_I(inode)->rsrc_inode; |
269 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) | 259 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) |
270 | return -EOVERFLOW; | 260 | return -EOVERFLOW; |
271 | atomic_inc(&HFSPLUS_I(inode).opencnt); | 261 | atomic_inc(&HFSPLUS_I(inode)->opencnt); |
272 | return 0; | 262 | return 0; |
273 | } | 263 | } |
274 | 264 | ||
@@ -277,12 +267,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) | |||
277 | struct super_block *sb = inode->i_sb; | 267 | struct super_block *sb = inode->i_sb; |
278 | 268 | ||
279 | if (HFSPLUS_IS_RSRC(inode)) | 269 | if (HFSPLUS_IS_RSRC(inode)) |
280 | inode = HFSPLUS_I(inode).rsrc_inode; | 270 | inode = HFSPLUS_I(inode)->rsrc_inode; |
281 | if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { | 271 | if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) { |
282 | mutex_lock(&inode->i_mutex); | 272 | mutex_lock(&inode->i_mutex); |
283 | hfsplus_file_truncate(inode); | 273 | hfsplus_file_truncate(inode); |
284 | if (inode->i_flags & S_DEAD) { | 274 | if (inode->i_flags & S_DEAD) { |
285 | hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); | 275 | hfsplus_delete_cat(inode->i_ino, |
276 | HFSPLUS_SB(sb)->hidden_dir, NULL); | ||
286 | hfsplus_delete_inode(inode); | 277 | hfsplus_delete_inode(inode); |
287 | } | 278 | } |
288 | mutex_unlock(&inode->i_mutex); | 279 | mutex_unlock(&inode->i_mutex); |
@@ -361,47 +352,52 @@ static const struct file_operations hfsplus_file_operations = { | |||
361 | 352 | ||
362 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) | 353 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) |
363 | { | 354 | { |
355 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
364 | struct inode *inode = new_inode(sb); | 356 | struct inode *inode = new_inode(sb); |
357 | struct hfsplus_inode_info *hip; | ||
358 | |||
365 | if (!inode) | 359 | if (!inode) |
366 | return NULL; | 360 | return NULL; |
367 | 361 | ||
368 | inode->i_ino = HFSPLUS_SB(sb).next_cnid++; | 362 | inode->i_ino = sbi->next_cnid++; |
369 | inode->i_mode = mode; | 363 | inode->i_mode = mode; |
370 | inode->i_uid = current_fsuid(); | 364 | inode->i_uid = current_fsuid(); |
371 | inode->i_gid = current_fsgid(); | 365 | inode->i_gid = current_fsgid(); |
372 | inode->i_nlink = 1; | 366 | inode->i_nlink = 1; |
373 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 367 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
374 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 368 | |
375 | mutex_init(&HFSPLUS_I(inode).extents_lock); | 369 | hip = HFSPLUS_I(inode); |
376 | atomic_set(&HFSPLUS_I(inode).opencnt, 0); | 370 | INIT_LIST_HEAD(&hip->open_dir_list); |
377 | HFSPLUS_I(inode).flags = 0; | 371 | mutex_init(&hip->extents_lock); |
378 | memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec)); | 372 | atomic_set(&hip->opencnt, 0); |
379 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 373 | hip->flags = 0; |
380 | HFSPLUS_I(inode).alloc_blocks = 0; | 374 | memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); |
381 | HFSPLUS_I(inode).first_blocks = 0; | 375 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
382 | HFSPLUS_I(inode).cached_start = 0; | 376 | hip->alloc_blocks = 0; |
383 | HFSPLUS_I(inode).cached_blocks = 0; | 377 | hip->first_blocks = 0; |
384 | HFSPLUS_I(inode).phys_size = 0; | 378 | hip->cached_start = 0; |
385 | HFSPLUS_I(inode).fs_blocks = 0; | 379 | hip->cached_blocks = 0; |
386 | HFSPLUS_I(inode).rsrc_inode = NULL; | 380 | hip->phys_size = 0; |
381 | hip->fs_blocks = 0; | ||
382 | hip->rsrc_inode = NULL; | ||
387 | if (S_ISDIR(inode->i_mode)) { | 383 | if (S_ISDIR(inode->i_mode)) { |
388 | inode->i_size = 2; | 384 | inode->i_size = 2; |
389 | HFSPLUS_SB(sb).folder_count++; | 385 | sbi->folder_count++; |
390 | inode->i_op = &hfsplus_dir_inode_operations; | 386 | inode->i_op = &hfsplus_dir_inode_operations; |
391 | inode->i_fop = &hfsplus_dir_operations; | 387 | inode->i_fop = &hfsplus_dir_operations; |
392 | } else if (S_ISREG(inode->i_mode)) { | 388 | } else if (S_ISREG(inode->i_mode)) { |
393 | HFSPLUS_SB(sb).file_count++; | 389 | sbi->file_count++; |
394 | inode->i_op = &hfsplus_file_inode_operations; | 390 | inode->i_op = &hfsplus_file_inode_operations; |
395 | inode->i_fop = &hfsplus_file_operations; | 391 | inode->i_fop = &hfsplus_file_operations; |
396 | inode->i_mapping->a_ops = &hfsplus_aops; | 392 | inode->i_mapping->a_ops = &hfsplus_aops; |
397 | HFSPLUS_I(inode).clump_blocks = HFSPLUS_SB(sb).data_clump_blocks; | 393 | hip->clump_blocks = sbi->data_clump_blocks; |
398 | } else if (S_ISLNK(inode->i_mode)) { | 394 | } else if (S_ISLNK(inode->i_mode)) { |
399 | HFSPLUS_SB(sb).file_count++; | 395 | sbi->file_count++; |
400 | inode->i_op = &page_symlink_inode_operations; | 396 | inode->i_op = &page_symlink_inode_operations; |
401 | inode->i_mapping->a_ops = &hfsplus_aops; | 397 | inode->i_mapping->a_ops = &hfsplus_aops; |
402 | HFSPLUS_I(inode).clump_blocks = 1; | 398 | hip->clump_blocks = 1; |
403 | } else | 399 | } else |
404 | HFSPLUS_SB(sb).file_count++; | 400 | sbi->file_count++; |
405 | insert_inode_hash(inode); | 401 | insert_inode_hash(inode); |
406 | mark_inode_dirty(inode); | 402 | mark_inode_dirty(inode); |
407 | sb->s_dirt = 1; | 403 | sb->s_dirt = 1; |
@@ -414,11 +410,11 @@ void hfsplus_delete_inode(struct inode *inode) | |||
414 | struct super_block *sb = inode->i_sb; | 410 | struct super_block *sb = inode->i_sb; |
415 | 411 | ||
416 | if (S_ISDIR(inode->i_mode)) { | 412 | if (S_ISDIR(inode->i_mode)) { |
417 | HFSPLUS_SB(sb).folder_count--; | 413 | HFSPLUS_SB(sb)->folder_count--; |
418 | sb->s_dirt = 1; | 414 | sb->s_dirt = 1; |
419 | return; | 415 | return; |
420 | } | 416 | } |
421 | HFSPLUS_SB(sb).file_count--; | 417 | HFSPLUS_SB(sb)->file_count--; |
422 | if (S_ISREG(inode->i_mode)) { | 418 | if (S_ISREG(inode->i_mode)) { |
423 | if (!inode->i_nlink) { | 419 | if (!inode->i_nlink) { |
424 | inode->i_size = 0; | 420 | inode->i_size = 0; |
@@ -434,34 +430,39 @@ void hfsplus_delete_inode(struct inode *inode) | |||
434 | void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) | 430 | void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) |
435 | { | 431 | { |
436 | struct super_block *sb = inode->i_sb; | 432 | struct super_block *sb = inode->i_sb; |
433 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
434 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
437 | u32 count; | 435 | u32 count; |
438 | int i; | 436 | int i; |
439 | 437 | ||
440 | memcpy(&HFSPLUS_I(inode).first_extents, &fork->extents, | 438 | memcpy(&hip->first_extents, &fork->extents, sizeof(hfsplus_extent_rec)); |
441 | sizeof(hfsplus_extent_rec)); | ||
442 | for (count = 0, i = 0; i < 8; i++) | 439 | for (count = 0, i = 0; i < 8; i++) |
443 | count += be32_to_cpu(fork->extents[i].block_count); | 440 | count += be32_to_cpu(fork->extents[i].block_count); |
444 | HFSPLUS_I(inode).first_blocks = count; | 441 | hip->first_blocks = count; |
445 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 442 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
446 | HFSPLUS_I(inode).cached_start = 0; | 443 | hip->cached_start = 0; |
447 | HFSPLUS_I(inode).cached_blocks = 0; | 444 | hip->cached_blocks = 0; |
448 | 445 | ||
449 | HFSPLUS_I(inode).alloc_blocks = be32_to_cpu(fork->total_blocks); | 446 | hip->alloc_blocks = be32_to_cpu(fork->total_blocks); |
450 | inode->i_size = HFSPLUS_I(inode).phys_size = be64_to_cpu(fork->total_size); | 447 | hip->phys_size = inode->i_size = be64_to_cpu(fork->total_size); |
451 | HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; | 448 | hip->fs_blocks = |
452 | inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits); | 449 | (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; |
453 | HFSPLUS_I(inode).clump_blocks = be32_to_cpu(fork->clump_size) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 450 | inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); |
454 | if (!HFSPLUS_I(inode).clump_blocks) | 451 | hip->clump_blocks = |
455 | HFSPLUS_I(inode).clump_blocks = HFSPLUS_IS_RSRC(inode) ? HFSPLUS_SB(sb).rsrc_clump_blocks : | 452 | be32_to_cpu(fork->clump_size) >> sbi->alloc_blksz_shift; |
456 | HFSPLUS_SB(sb).data_clump_blocks; | 453 | if (!hip->clump_blocks) { |
454 | hip->clump_blocks = HFSPLUS_IS_RSRC(inode) ? | ||
455 | sbi->rsrc_clump_blocks : | ||
456 | sbi->data_clump_blocks; | ||
457 | } | ||
457 | } | 458 | } |
458 | 459 | ||
459 | void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) | 460 | void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) |
460 | { | 461 | { |
461 | memcpy(&fork->extents, &HFSPLUS_I(inode).first_extents, | 462 | memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents, |
462 | sizeof(hfsplus_extent_rec)); | 463 | sizeof(hfsplus_extent_rec)); |
463 | fork->total_size = cpu_to_be64(inode->i_size); | 464 | fork->total_size = cpu_to_be64(inode->i_size); |
464 | fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode).alloc_blocks); | 465 | fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode)->alloc_blocks); |
465 | } | 466 | } |
466 | 467 | ||
467 | int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | 468 | int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) |
@@ -472,7 +473,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
472 | 473 | ||
473 | type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); | 474 | type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); |
474 | 475 | ||
475 | HFSPLUS_I(inode).dev = 0; | 476 | HFSPLUS_I(inode)->linkid = 0; |
476 | if (type == HFSPLUS_FOLDER) { | 477 | if (type == HFSPLUS_FOLDER) { |
477 | struct hfsplus_cat_folder *folder = &entry.folder; | 478 | struct hfsplus_cat_folder *folder = &entry.folder; |
478 | 479 | ||
@@ -486,8 +487,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
486 | inode->i_atime = hfsp_mt2ut(folder->access_date); | 487 | inode->i_atime = hfsp_mt2ut(folder->access_date); |
487 | inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); | 488 | inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); |
488 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); | 489 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); |
489 | HFSPLUS_I(inode).create_date = folder->create_date; | 490 | HFSPLUS_I(inode)->create_date = folder->create_date; |
490 | HFSPLUS_I(inode).fs_blocks = 0; | 491 | HFSPLUS_I(inode)->fs_blocks = 0; |
491 | inode->i_op = &hfsplus_dir_inode_operations; | 492 | inode->i_op = &hfsplus_dir_inode_operations; |
492 | inode->i_fop = &hfsplus_dir_operations; | 493 | inode->i_fop = &hfsplus_dir_operations; |
493 | } else if (type == HFSPLUS_FILE) { | 494 | } else if (type == HFSPLUS_FILE) { |
@@ -518,7 +519,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
518 | inode->i_atime = hfsp_mt2ut(file->access_date); | 519 | inode->i_atime = hfsp_mt2ut(file->access_date); |
519 | inode->i_mtime = hfsp_mt2ut(file->content_mod_date); | 520 | inode->i_mtime = hfsp_mt2ut(file->content_mod_date); |
520 | inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); | 521 | inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); |
521 | HFSPLUS_I(inode).create_date = file->create_date; | 522 | HFSPLUS_I(inode)->create_date = file->create_date; |
522 | } else { | 523 | } else { |
523 | printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); | 524 | printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); |
524 | res = -EIO; | 525 | res = -EIO; |
@@ -533,12 +534,12 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
533 | hfsplus_cat_entry entry; | 534 | hfsplus_cat_entry entry; |
534 | 535 | ||
535 | if (HFSPLUS_IS_RSRC(inode)) | 536 | if (HFSPLUS_IS_RSRC(inode)) |
536 | main_inode = HFSPLUS_I(inode).rsrc_inode; | 537 | main_inode = HFSPLUS_I(inode)->rsrc_inode; |
537 | 538 | ||
538 | if (!main_inode->i_nlink) | 539 | if (!main_inode->i_nlink) |
539 | return 0; | 540 | return 0; |
540 | 541 | ||
541 | if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb).cat_tree, &fd)) | 542 | if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb)->cat_tree, &fd)) |
542 | /* panic? */ | 543 | /* panic? */ |
543 | return -EIO; | 544 | return -EIO; |
544 | 545 | ||
@@ -554,7 +555,7 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
554 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, | 555 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, |
555 | sizeof(struct hfsplus_cat_folder)); | 556 | sizeof(struct hfsplus_cat_folder)); |
556 | /* simple node checks? */ | 557 | /* simple node checks? */ |
557 | hfsplus_set_perms(inode, &folder->permissions); | 558 | hfsplus_cat_set_perms(inode, &folder->permissions); |
558 | folder->access_date = hfsp_ut2mt(inode->i_atime); | 559 | folder->access_date = hfsp_ut2mt(inode->i_atime); |
559 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); | 560 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); |
560 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); | 561 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); |
@@ -576,11 +577,7 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
576 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, | 577 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, |
577 | sizeof(struct hfsplus_cat_file)); | 578 | sizeof(struct hfsplus_cat_file)); |
578 | hfsplus_inode_write_fork(inode, &file->data_fork); | 579 | hfsplus_inode_write_fork(inode, &file->data_fork); |
579 | if (S_ISREG(inode->i_mode)) | 580 | hfsplus_cat_set_perms(inode, &file->permissions); |
580 | HFSPLUS_I(inode).dev = inode->i_nlink; | ||
581 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | ||
582 | HFSPLUS_I(inode).dev = kdev_t_to_nr(inode->i_rdev); | ||
583 | hfsplus_set_perms(inode, &file->permissions); | ||
584 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) | 581 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) |
585 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); | 582 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); |
586 | else | 583 | else |
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index ac405f099026..5b4667e08ef7 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c | |||
@@ -17,83 +17,98 @@ | |||
17 | #include <linux/mount.h> | 17 | #include <linux/mount.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/xattr.h> | 19 | #include <linux/xattr.h> |
20 | #include <linux/smp_lock.h> | ||
21 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
22 | #include "hfsplus_fs.h" | 21 | #include "hfsplus_fs.h" |
23 | 22 | ||
24 | long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 23 | static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) |
25 | { | 24 | { |
26 | struct inode *inode = filp->f_path.dentry->d_inode; | 25 | struct inode *inode = file->f_path.dentry->d_inode; |
26 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
27 | unsigned int flags = 0; | ||
28 | |||
29 | if (inode->i_flags & S_IMMUTABLE) | ||
30 | flags |= FS_IMMUTABLE_FL; | ||
31 | if (inode->i_flags |= S_APPEND) | ||
32 | flags |= FS_APPEND_FL; | ||
33 | if (hip->userflags & HFSPLUS_FLG_NODUMP) | ||
34 | flags |= FS_NODUMP_FL; | ||
35 | |||
36 | return put_user(flags, user_flags); | ||
37 | } | ||
38 | |||
39 | static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) | ||
40 | { | ||
41 | struct inode *inode = file->f_path.dentry->d_inode; | ||
42 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
27 | unsigned int flags; | 43 | unsigned int flags; |
44 | int err = 0; | ||
28 | 45 | ||
29 | lock_kernel(); | 46 | err = mnt_want_write(file->f_path.mnt); |
30 | switch (cmd) { | 47 | if (err) |
31 | case HFSPLUS_IOC_EXT2_GETFLAGS: | 48 | goto out; |
32 | flags = 0; | ||
33 | if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE) | ||
34 | flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */ | ||
35 | if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND) | ||
36 | flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */ | ||
37 | if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP) | ||
38 | flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ | ||
39 | return put_user(flags, (int __user *)arg); | ||
40 | case HFSPLUS_IOC_EXT2_SETFLAGS: { | ||
41 | int err = 0; | ||
42 | err = mnt_want_write(filp->f_path.mnt); | ||
43 | if (err) { | ||
44 | unlock_kernel(); | ||
45 | return err; | ||
46 | } | ||
47 | 49 | ||
48 | if (!is_owner_or_cap(inode)) { | 50 | if (!is_owner_or_cap(inode)) { |
49 | err = -EACCES; | 51 | err = -EACCES; |
50 | goto setflags_out; | 52 | goto out_drop_write; |
51 | } | 53 | } |
52 | if (get_user(flags, (int __user *)arg)) { | ||
53 | err = -EFAULT; | ||
54 | goto setflags_out; | ||
55 | } | ||
56 | if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || | ||
57 | HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { | ||
58 | if (!capable(CAP_LINUX_IMMUTABLE)) { | ||
59 | err = -EPERM; | ||
60 | goto setflags_out; | ||
61 | } | ||
62 | } | ||
63 | 54 | ||
64 | /* don't silently ignore unsupported ext2 flags */ | 55 | if (get_user(flags, user_flags)) { |
65 | if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { | 56 | err = -EFAULT; |
66 | err = -EOPNOTSUPP; | 57 | goto out_drop_write; |
67 | goto setflags_out; | 58 | } |
68 | } | 59 | |
69 | if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ | 60 | mutex_lock(&inode->i_mutex); |
70 | inode->i_flags |= S_IMMUTABLE; | 61 | |
71 | HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; | 62 | if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) || |
72 | } else { | 63 | inode->i_flags & (S_IMMUTABLE|S_APPEND)) { |
73 | inode->i_flags &= ~S_IMMUTABLE; | 64 | if (!capable(CAP_LINUX_IMMUTABLE)) { |
74 | HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE; | 65 | err = -EPERM; |
75 | } | 66 | goto out_unlock_inode; |
76 | if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */ | ||
77 | inode->i_flags |= S_APPEND; | ||
78 | HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND; | ||
79 | } else { | ||
80 | inode->i_flags &= ~S_APPEND; | ||
81 | HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND; | ||
82 | } | 67 | } |
83 | if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */ | ||
84 | HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP; | ||
85 | else | ||
86 | HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP; | ||
87 | |||
88 | inode->i_ctime = CURRENT_TIME_SEC; | ||
89 | mark_inode_dirty(inode); | ||
90 | setflags_out: | ||
91 | mnt_drop_write(filp->f_path.mnt); | ||
92 | unlock_kernel(); | ||
93 | return err; | ||
94 | } | 68 | } |
69 | |||
70 | /* don't silently ignore unsupported ext2 flags */ | ||
71 | if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { | ||
72 | err = -EOPNOTSUPP; | ||
73 | goto out_unlock_inode; | ||
74 | } | ||
75 | |||
76 | if (flags & FS_IMMUTABLE_FL) | ||
77 | inode->i_flags |= S_IMMUTABLE; | ||
78 | else | ||
79 | inode->i_flags &= ~S_IMMUTABLE; | ||
80 | |||
81 | if (flags & FS_APPEND_FL) | ||
82 | inode->i_flags |= S_APPEND; | ||
83 | else | ||
84 | inode->i_flags &= ~S_APPEND; | ||
85 | |||
86 | if (flags & FS_NODUMP_FL) | ||
87 | hip->userflags |= HFSPLUS_FLG_NODUMP; | ||
88 | else | ||
89 | hip->userflags &= ~HFSPLUS_FLG_NODUMP; | ||
90 | |||
91 | inode->i_ctime = CURRENT_TIME_SEC; | ||
92 | mark_inode_dirty(inode); | ||
93 | |||
94 | out_unlock_inode: | ||
95 | mutex_lock(&inode->i_mutex); | ||
96 | out_drop_write: | ||
97 | mnt_drop_write(file->f_path.mnt); | ||
98 | out: | ||
99 | return err; | ||
100 | } | ||
101 | |||
102 | long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
103 | { | ||
104 | void __user *argp = (void __user *)arg; | ||
105 | |||
106 | switch (cmd) { | ||
107 | case HFSPLUS_IOC_EXT2_GETFLAGS: | ||
108 | return hfsplus_ioctl_getflags(file, argp); | ||
109 | case HFSPLUS_IOC_EXT2_SETFLAGS: | ||
110 | return hfsplus_ioctl_setflags(file, argp); | ||
95 | default: | 111 | default: |
96 | unlock_kernel(); | ||
97 | return -ENOTTY; | 112 | return -ENOTTY; |
98 | } | 113 | } |
99 | } | 114 | } |
@@ -110,7 +125,7 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name, | |||
110 | if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) | 125 | if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) |
111 | return -EOPNOTSUPP; | 126 | return -EOPNOTSUPP; |
112 | 127 | ||
113 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | 128 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); |
114 | if (res) | 129 | if (res) |
115 | return res; | 130 | return res; |
116 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | 131 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); |
@@ -153,7 +168,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | |||
153 | return -EOPNOTSUPP; | 168 | return -EOPNOTSUPP; |
154 | 169 | ||
155 | if (size) { | 170 | if (size) { |
156 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | 171 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); |
157 | if (res) | 172 | if (res) |
158 | return res; | 173 | return res; |
159 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | 174 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); |
@@ -177,7 +192,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | |||
177 | } else | 192 | } else |
178 | res = size ? -ERANGE : 4; | 193 | res = size ? -ERANGE : 4; |
179 | } else | 194 | } else |
180 | res = -ENODATA; | 195 | res = -EOPNOTSUPP; |
181 | out: | 196 | out: |
182 | if (size) | 197 | if (size) |
183 | hfs_find_exit(&fd); | 198 | hfs_find_exit(&fd); |
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 572628b4b07d..f9ab276a4d8d 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c | |||
@@ -143,13 +143,13 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) | |||
143 | kfree(p); | 143 | kfree(p); |
144 | break; | 144 | break; |
145 | case opt_decompose: | 145 | case opt_decompose: |
146 | sbi->flags &= ~HFSPLUS_SB_NODECOMPOSE; | 146 | clear_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); |
147 | break; | 147 | break; |
148 | case opt_nodecompose: | 148 | case opt_nodecompose: |
149 | sbi->flags |= HFSPLUS_SB_NODECOMPOSE; | 149 | set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); |
150 | break; | 150 | break; |
151 | case opt_force: | 151 | case opt_force: |
152 | sbi->flags |= HFSPLUS_SB_FORCE; | 152 | set_bit(HFSPLUS_SB_FORCE, &sbi->flags); |
153 | break; | 153 | break; |
154 | default: | 154 | default: |
155 | return 0; | 155 | return 0; |
@@ -171,7 +171,7 @@ done: | |||
171 | 171 | ||
172 | int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) | 172 | int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) |
173 | { | 173 | { |
174 | struct hfsplus_sb_info *sbi = &HFSPLUS_SB(mnt->mnt_sb); | 174 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(mnt->mnt_sb); |
175 | 175 | ||
176 | if (sbi->creator != HFSPLUS_DEF_CR_TYPE) | 176 | if (sbi->creator != HFSPLUS_DEF_CR_TYPE) |
177 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); | 177 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); |
@@ -184,7 +184,7 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) | |||
184 | seq_printf(seq, ",session=%u", sbi->session); | 184 | seq_printf(seq, ",session=%u", sbi->session); |
185 | if (sbi->nls) | 185 | if (sbi->nls) |
186 | seq_printf(seq, ",nls=%s", sbi->nls->charset); | 186 | seq_printf(seq, ",nls=%s", sbi->nls->charset); |
187 | if (sbi->flags & HFSPLUS_SB_NODECOMPOSE) | 187 | if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags)) |
188 | seq_printf(seq, ",nodecompose"); | 188 | seq_printf(seq, ",nodecompose"); |
189 | return 0; | 189 | return 0; |
190 | } | 190 | } |
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index 1528a6fd0299..208b16c645cc 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c | |||
@@ -74,6 +74,7 @@ struct old_pmap { | |||
74 | int hfs_part_find(struct super_block *sb, | 74 | int hfs_part_find(struct super_block *sb, |
75 | sector_t *part_start, sector_t *part_size) | 75 | sector_t *part_start, sector_t *part_size) |
76 | { | 76 | { |
77 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
77 | struct buffer_head *bh; | 78 | struct buffer_head *bh; |
78 | __be16 *data; | 79 | __be16 *data; |
79 | int i, size, res; | 80 | int i, size, res; |
@@ -95,7 +96,7 @@ int hfs_part_find(struct super_block *sb, | |||
95 | for (i = 0; i < size; p++, i++) { | 96 | for (i = 0; i < size; p++, i++) { |
96 | if (p->pdStart && p->pdSize && | 97 | if (p->pdStart && p->pdSize && |
97 | p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ && | 98 | p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ && |
98 | (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) { | 99 | (sbi->part < 0 || sbi->part == i)) { |
99 | *part_start += be32_to_cpu(p->pdStart); | 100 | *part_start += be32_to_cpu(p->pdStart); |
100 | *part_size = be32_to_cpu(p->pdSize); | 101 | *part_size = be32_to_cpu(p->pdSize); |
101 | res = 0; | 102 | res = 0; |
@@ -111,7 +112,7 @@ int hfs_part_find(struct super_block *sb, | |||
111 | size = be32_to_cpu(pm->pmMapBlkCnt); | 112 | size = be32_to_cpu(pm->pmMapBlkCnt); |
112 | for (i = 0; i < size;) { | 113 | for (i = 0; i < size;) { |
113 | if (!memcmp(pm->pmPartType,"Apple_HFS", 9) && | 114 | if (!memcmp(pm->pmPartType,"Apple_HFS", 9) && |
114 | (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) { | 115 | (sbi->part < 0 || sbi->part == i)) { |
115 | *part_start += be32_to_cpu(pm->pmPyPartStart); | 116 | *part_start += be32_to_cpu(pm->pmPyPartStart); |
116 | *part_size = be32_to_cpu(pm->pmPartBlkCnt); | 117 | *part_size = be32_to_cpu(pm->pmPartBlkCnt); |
117 | res = 0; | 118 | res = 0; |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 3b55c050c742..9a88d7536103 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/smp_lock.h> | ||
16 | #include <linux/vfs.h> | 15 | #include <linux/vfs.h> |
17 | #include <linux/nls.h> | 16 | #include <linux/nls.h> |
18 | 17 | ||
@@ -21,40 +20,11 @@ static void hfsplus_destroy_inode(struct inode *inode); | |||
21 | 20 | ||
22 | #include "hfsplus_fs.h" | 21 | #include "hfsplus_fs.h" |
23 | 22 | ||
24 | struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | 23 | static int hfsplus_system_read_inode(struct inode *inode) |
25 | { | 24 | { |
26 | struct hfs_find_data fd; | 25 | struct hfsplus_vh *vhdr = HFSPLUS_SB(inode->i_sb)->s_vhdr; |
27 | struct hfsplus_vh *vhdr; | ||
28 | struct inode *inode; | ||
29 | long err = -EIO; | ||
30 | |||
31 | inode = iget_locked(sb, ino); | ||
32 | if (!inode) | ||
33 | return ERR_PTR(-ENOMEM); | ||
34 | if (!(inode->i_state & I_NEW)) | ||
35 | return inode; | ||
36 | 26 | ||
37 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 27 | switch (inode->i_ino) { |
38 | mutex_init(&HFSPLUS_I(inode).extents_lock); | ||
39 | HFSPLUS_I(inode).flags = 0; | ||
40 | HFSPLUS_I(inode).rsrc_inode = NULL; | ||
41 | atomic_set(&HFSPLUS_I(inode).opencnt, 0); | ||
42 | |||
43 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) { | ||
44 | read_inode: | ||
45 | hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | ||
46 | err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | ||
47 | if (!err) | ||
48 | err = hfsplus_cat_read_inode(inode, &fd); | ||
49 | hfs_find_exit(&fd); | ||
50 | if (err) | ||
51 | goto bad_inode; | ||
52 | goto done; | ||
53 | } | ||
54 | vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; | ||
55 | switch(inode->i_ino) { | ||
56 | case HFSPLUS_ROOT_CNID: | ||
57 | goto read_inode; | ||
58 | case HFSPLUS_EXT_CNID: | 28 | case HFSPLUS_EXT_CNID: |
59 | hfsplus_inode_read_fork(inode, &vhdr->ext_file); | 29 | hfsplus_inode_read_fork(inode, &vhdr->ext_file); |
60 | inode->i_mapping->a_ops = &hfsplus_btree_aops; | 30 | inode->i_mapping->a_ops = &hfsplus_btree_aops; |
@@ -75,74 +45,101 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | |||
75 | inode->i_mapping->a_ops = &hfsplus_btree_aops; | 45 | inode->i_mapping->a_ops = &hfsplus_btree_aops; |
76 | break; | 46 | break; |
77 | default: | 47 | default: |
78 | goto bad_inode; | 48 | return -EIO; |
49 | } | ||
50 | |||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | ||
55 | { | ||
56 | struct hfs_find_data fd; | ||
57 | struct inode *inode; | ||
58 | int err; | ||
59 | |||
60 | inode = iget_locked(sb, ino); | ||
61 | if (!inode) | ||
62 | return ERR_PTR(-ENOMEM); | ||
63 | if (!(inode->i_state & I_NEW)) | ||
64 | return inode; | ||
65 | |||
66 | INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list); | ||
67 | mutex_init(&HFSPLUS_I(inode)->extents_lock); | ||
68 | HFSPLUS_I(inode)->flags = 0; | ||
69 | HFSPLUS_I(inode)->rsrc_inode = NULL; | ||
70 | atomic_set(&HFSPLUS_I(inode)->opencnt, 0); | ||
71 | |||
72 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || | ||
73 | inode->i_ino == HFSPLUS_ROOT_CNID) { | ||
74 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); | ||
75 | err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | ||
76 | if (!err) | ||
77 | err = hfsplus_cat_read_inode(inode, &fd); | ||
78 | hfs_find_exit(&fd); | ||
79 | } else { | ||
80 | err = hfsplus_system_read_inode(inode); | ||
81 | } | ||
82 | |||
83 | if (err) { | ||
84 | iget_failed(inode); | ||
85 | return ERR_PTR(err); | ||
79 | } | 86 | } |
80 | 87 | ||
81 | done: | ||
82 | unlock_new_inode(inode); | 88 | unlock_new_inode(inode); |
83 | return inode; | 89 | return inode; |
84 | |||
85 | bad_inode: | ||
86 | iget_failed(inode); | ||
87 | return ERR_PTR(err); | ||
88 | } | 90 | } |
89 | 91 | ||
90 | static int hfsplus_write_inode(struct inode *inode, | 92 | static int hfsplus_system_write_inode(struct inode *inode) |
91 | struct writeback_control *wbc) | ||
92 | { | 93 | { |
93 | struct hfsplus_vh *vhdr; | 94 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); |
94 | int ret = 0; | 95 | struct hfsplus_vh *vhdr = sbi->s_vhdr; |
96 | struct hfsplus_fork_raw *fork; | ||
97 | struct hfs_btree *tree = NULL; | ||
95 | 98 | ||
96 | dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); | ||
97 | hfsplus_ext_write_extent(inode); | ||
98 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) { | ||
99 | return hfsplus_cat_write_inode(inode); | ||
100 | } | ||
101 | vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; | ||
102 | switch (inode->i_ino) { | 99 | switch (inode->i_ino) { |
103 | case HFSPLUS_ROOT_CNID: | ||
104 | ret = hfsplus_cat_write_inode(inode); | ||
105 | break; | ||
106 | case HFSPLUS_EXT_CNID: | 100 | case HFSPLUS_EXT_CNID: |
107 | if (vhdr->ext_file.total_size != cpu_to_be64(inode->i_size)) { | 101 | fork = &vhdr->ext_file; |
108 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 102 | tree = sbi->ext_tree; |
109 | inode->i_sb->s_dirt = 1; | ||
110 | } | ||
111 | hfsplus_inode_write_fork(inode, &vhdr->ext_file); | ||
112 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).ext_tree); | ||
113 | break; | 103 | break; |
114 | case HFSPLUS_CAT_CNID: | 104 | case HFSPLUS_CAT_CNID: |
115 | if (vhdr->cat_file.total_size != cpu_to_be64(inode->i_size)) { | 105 | fork = &vhdr->cat_file; |
116 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 106 | tree = sbi->cat_tree; |
117 | inode->i_sb->s_dirt = 1; | ||
118 | } | ||
119 | hfsplus_inode_write_fork(inode, &vhdr->cat_file); | ||
120 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).cat_tree); | ||
121 | break; | 107 | break; |
122 | case HFSPLUS_ALLOC_CNID: | 108 | case HFSPLUS_ALLOC_CNID: |
123 | if (vhdr->alloc_file.total_size != cpu_to_be64(inode->i_size)) { | 109 | fork = &vhdr->alloc_file; |
124 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | ||
125 | inode->i_sb->s_dirt = 1; | ||
126 | } | ||
127 | hfsplus_inode_write_fork(inode, &vhdr->alloc_file); | ||
128 | break; | 110 | break; |
129 | case HFSPLUS_START_CNID: | 111 | case HFSPLUS_START_CNID: |
130 | if (vhdr->start_file.total_size != cpu_to_be64(inode->i_size)) { | 112 | fork = &vhdr->start_file; |
131 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | ||
132 | inode->i_sb->s_dirt = 1; | ||
133 | } | ||
134 | hfsplus_inode_write_fork(inode, &vhdr->start_file); | ||
135 | break; | 113 | break; |
136 | case HFSPLUS_ATTR_CNID: | 114 | case HFSPLUS_ATTR_CNID: |
137 | if (vhdr->attr_file.total_size != cpu_to_be64(inode->i_size)) { | 115 | fork = &vhdr->attr_file; |
138 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 116 | tree = sbi->attr_tree; |
139 | inode->i_sb->s_dirt = 1; | 117 | default: |
140 | } | 118 | return -EIO; |
141 | hfsplus_inode_write_fork(inode, &vhdr->attr_file); | 119 | } |
142 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).attr_tree); | 120 | |
143 | break; | 121 | if (fork->total_size != cpu_to_be64(inode->i_size)) { |
122 | set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags); | ||
123 | inode->i_sb->s_dirt = 1; | ||
144 | } | 124 | } |
145 | return ret; | 125 | hfsplus_inode_write_fork(inode, fork); |
126 | if (tree) | ||
127 | hfs_btree_write(tree); | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | static int hfsplus_write_inode(struct inode *inode, | ||
132 | struct writeback_control *wbc) | ||
133 | { | ||
134 | dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); | ||
135 | |||
136 | hfsplus_ext_write_extent(inode); | ||
137 | |||
138 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || | ||
139 | inode->i_ino == HFSPLUS_ROOT_CNID) | ||
140 | return hfsplus_cat_write_inode(inode); | ||
141 | else | ||
142 | return hfsplus_system_write_inode(inode); | ||
146 | } | 143 | } |
147 | 144 | ||
148 | static void hfsplus_evict_inode(struct inode *inode) | 145 | static void hfsplus_evict_inode(struct inode *inode) |
@@ -151,51 +148,53 @@ static void hfsplus_evict_inode(struct inode *inode) | |||
151 | truncate_inode_pages(&inode->i_data, 0); | 148 | truncate_inode_pages(&inode->i_data, 0); |
152 | end_writeback(inode); | 149 | end_writeback(inode); |
153 | if (HFSPLUS_IS_RSRC(inode)) { | 150 | if (HFSPLUS_IS_RSRC(inode)) { |
154 | HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; | 151 | HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
155 | iput(HFSPLUS_I(inode).rsrc_inode); | 152 | iput(HFSPLUS_I(inode)->rsrc_inode); |
156 | } | 153 | } |
157 | } | 154 | } |
158 | 155 | ||
159 | int hfsplus_sync_fs(struct super_block *sb, int wait) | 156 | int hfsplus_sync_fs(struct super_block *sb, int wait) |
160 | { | 157 | { |
161 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 158 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); |
159 | struct hfsplus_vh *vhdr = sbi->s_vhdr; | ||
162 | 160 | ||
163 | dprint(DBG_SUPER, "hfsplus_write_super\n"); | 161 | dprint(DBG_SUPER, "hfsplus_write_super\n"); |
164 | 162 | ||
165 | lock_super(sb); | 163 | mutex_lock(&sbi->vh_mutex); |
164 | mutex_lock(&sbi->alloc_mutex); | ||
166 | sb->s_dirt = 0; | 165 | sb->s_dirt = 0; |
167 | 166 | ||
168 | vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks); | 167 | vhdr->free_blocks = cpu_to_be32(sbi->free_blocks); |
169 | vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc); | 168 | vhdr->next_cnid = cpu_to_be32(sbi->next_cnid); |
170 | vhdr->next_cnid = cpu_to_be32(HFSPLUS_SB(sb).next_cnid); | 169 | vhdr->folder_count = cpu_to_be32(sbi->folder_count); |
171 | vhdr->folder_count = cpu_to_be32(HFSPLUS_SB(sb).folder_count); | 170 | vhdr->file_count = cpu_to_be32(sbi->file_count); |
172 | vhdr->file_count = cpu_to_be32(HFSPLUS_SB(sb).file_count); | ||
173 | 171 | ||
174 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 172 | mark_buffer_dirty(sbi->s_vhbh); |
175 | if (HFSPLUS_SB(sb).flags & HFSPLUS_SB_WRITEBACKUP) { | 173 | if (test_and_clear_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags)) { |
176 | if (HFSPLUS_SB(sb).sect_count) { | 174 | if (sbi->sect_count) { |
177 | struct buffer_head *bh; | 175 | struct buffer_head *bh; |
178 | u32 block, offset; | 176 | u32 block, offset; |
179 | 177 | ||
180 | block = HFSPLUS_SB(sb).blockoffset; | 178 | block = sbi->blockoffset; |
181 | block += (HFSPLUS_SB(sb).sect_count - 2) >> (sb->s_blocksize_bits - 9); | 179 | block += (sbi->sect_count - 2) >> (sb->s_blocksize_bits - 9); |
182 | offset = ((HFSPLUS_SB(sb).sect_count - 2) << 9) & (sb->s_blocksize - 1); | 180 | offset = ((sbi->sect_count - 2) << 9) & (sb->s_blocksize - 1); |
183 | printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", HFSPLUS_SB(sb).blockoffset, | 181 | printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", |
184 | HFSPLUS_SB(sb).sect_count, block, offset); | 182 | sbi->blockoffset, sbi->sect_count, |
183 | block, offset); | ||
185 | bh = sb_bread(sb, block); | 184 | bh = sb_bread(sb, block); |
186 | if (bh) { | 185 | if (bh) { |
187 | vhdr = (struct hfsplus_vh *)(bh->b_data + offset); | 186 | vhdr = (struct hfsplus_vh *)(bh->b_data + offset); |
188 | if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) { | 187 | if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) { |
189 | memcpy(vhdr, HFSPLUS_SB(sb).s_vhdr, sizeof(*vhdr)); | 188 | memcpy(vhdr, sbi->s_vhdr, sizeof(*vhdr)); |
190 | mark_buffer_dirty(bh); | 189 | mark_buffer_dirty(bh); |
191 | brelse(bh); | 190 | brelse(bh); |
192 | } else | 191 | } else |
193 | printk(KERN_WARNING "hfs: backup not found!\n"); | 192 | printk(KERN_WARNING "hfs: backup not found!\n"); |
194 | } | 193 | } |
195 | } | 194 | } |
196 | HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP; | ||
197 | } | 195 | } |
198 | unlock_super(sb); | 196 | mutex_unlock(&sbi->alloc_mutex); |
197 | mutex_unlock(&sbi->vh_mutex); | ||
199 | return 0; | 198 | return 0; |
200 | } | 199 | } |
201 | 200 | ||
@@ -209,48 +208,48 @@ static void hfsplus_write_super(struct super_block *sb) | |||
209 | 208 | ||
210 | static void hfsplus_put_super(struct super_block *sb) | 209 | static void hfsplus_put_super(struct super_block *sb) |
211 | { | 210 | { |
211 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
212 | |||
212 | dprint(DBG_SUPER, "hfsplus_put_super\n"); | 213 | dprint(DBG_SUPER, "hfsplus_put_super\n"); |
214 | |||
213 | if (!sb->s_fs_info) | 215 | if (!sb->s_fs_info) |
214 | return; | 216 | return; |
215 | 217 | ||
216 | lock_kernel(); | ||
217 | |||
218 | if (sb->s_dirt) | 218 | if (sb->s_dirt) |
219 | hfsplus_write_super(sb); | 219 | hfsplus_write_super(sb); |
220 | if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) { | 220 | if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { |
221 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 221 | struct hfsplus_vh *vhdr = sbi->s_vhdr; |
222 | 222 | ||
223 | vhdr->modify_date = hfsp_now2mt(); | 223 | vhdr->modify_date = hfsp_now2mt(); |
224 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); | 224 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); |
225 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); | 225 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); |
226 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 226 | mark_buffer_dirty(sbi->s_vhbh); |
227 | sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); | 227 | sync_dirty_buffer(sbi->s_vhbh); |
228 | } | 228 | } |
229 | 229 | ||
230 | hfs_btree_close(HFSPLUS_SB(sb).cat_tree); | 230 | hfs_btree_close(sbi->cat_tree); |
231 | hfs_btree_close(HFSPLUS_SB(sb).ext_tree); | 231 | hfs_btree_close(sbi->ext_tree); |
232 | iput(HFSPLUS_SB(sb).alloc_file); | 232 | iput(sbi->alloc_file); |
233 | iput(HFSPLUS_SB(sb).hidden_dir); | 233 | iput(sbi->hidden_dir); |
234 | brelse(HFSPLUS_SB(sb).s_vhbh); | 234 | brelse(sbi->s_vhbh); |
235 | unload_nls(HFSPLUS_SB(sb).nls); | 235 | unload_nls(sbi->nls); |
236 | kfree(sb->s_fs_info); | 236 | kfree(sb->s_fs_info); |
237 | sb->s_fs_info = NULL; | 237 | sb->s_fs_info = NULL; |
238 | |||
239 | unlock_kernel(); | ||
240 | } | 238 | } |
241 | 239 | ||
242 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) | 240 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) |
243 | { | 241 | { |
244 | struct super_block *sb = dentry->d_sb; | 242 | struct super_block *sb = dentry->d_sb; |
243 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
245 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | 244 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); |
246 | 245 | ||
247 | buf->f_type = HFSPLUS_SUPER_MAGIC; | 246 | buf->f_type = HFSPLUS_SUPER_MAGIC; |
248 | buf->f_bsize = sb->s_blocksize; | 247 | buf->f_bsize = sb->s_blocksize; |
249 | buf->f_blocks = HFSPLUS_SB(sb).total_blocks << HFSPLUS_SB(sb).fs_shift; | 248 | buf->f_blocks = sbi->total_blocks << sbi->fs_shift; |
250 | buf->f_bfree = HFSPLUS_SB(sb).free_blocks << HFSPLUS_SB(sb).fs_shift; | 249 | buf->f_bfree = sbi->free_blocks << sbi->fs_shift; |
251 | buf->f_bavail = buf->f_bfree; | 250 | buf->f_bavail = buf->f_bfree; |
252 | buf->f_files = 0xFFFFFFFF; | 251 | buf->f_files = 0xFFFFFFFF; |
253 | buf->f_ffree = 0xFFFFFFFF - HFSPLUS_SB(sb).next_cnid; | 252 | buf->f_ffree = 0xFFFFFFFF - sbi->next_cnid; |
254 | buf->f_fsid.val[0] = (u32)id; | 253 | buf->f_fsid.val[0] = (u32)id; |
255 | buf->f_fsid.val[1] = (u32)(id >> 32); | 254 | buf->f_fsid.val[1] = (u32)(id >> 32); |
256 | buf->f_namelen = HFSPLUS_MAX_STRLEN; | 255 | buf->f_namelen = HFSPLUS_MAX_STRLEN; |
@@ -263,11 +262,11 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) | |||
263 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 262 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
264 | return 0; | 263 | return 0; |
265 | if (!(*flags & MS_RDONLY)) { | 264 | if (!(*flags & MS_RDONLY)) { |
266 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 265 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr; |
267 | struct hfsplus_sb_info sbi; | 266 | struct hfsplus_sb_info sbi; |
268 | 267 | ||
269 | memset(&sbi, 0, sizeof(struct hfsplus_sb_info)); | 268 | memset(&sbi, 0, sizeof(struct hfsplus_sb_info)); |
270 | sbi.nls = HFSPLUS_SB(sb).nls; | 269 | sbi.nls = HFSPLUS_SB(sb)->nls; |
271 | if (!hfsplus_parse_options(data, &sbi)) | 270 | if (!hfsplus_parse_options(data, &sbi)) |
272 | return -EINVAL; | 271 | return -EINVAL; |
273 | 272 | ||
@@ -276,7 +275,7 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) | |||
276 | "running fsck.hfsplus is recommended. leaving read-only.\n"); | 275 | "running fsck.hfsplus is recommended. leaving read-only.\n"); |
277 | sb->s_flags |= MS_RDONLY; | 276 | sb->s_flags |= MS_RDONLY; |
278 | *flags |= MS_RDONLY; | 277 | *flags |= MS_RDONLY; |
279 | } else if (sbi.flags & HFSPLUS_SB_FORCE) { | 278 | } else if (test_bit(HFSPLUS_SB_FORCE, &sbi.flags)) { |
280 | /* nothing */ | 279 | /* nothing */ |
281 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { | 280 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { |
282 | printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); | 281 | printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); |
@@ -320,7 +319,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
320 | return -ENOMEM; | 319 | return -ENOMEM; |
321 | 320 | ||
322 | sb->s_fs_info = sbi; | 321 | sb->s_fs_info = sbi; |
323 | INIT_HLIST_HEAD(&sbi->rsrc_inodes); | 322 | mutex_init(&sbi->alloc_mutex); |
323 | mutex_init(&sbi->vh_mutex); | ||
324 | hfsplus_fill_defaults(sbi); | 324 | hfsplus_fill_defaults(sbi); |
325 | if (!hfsplus_parse_options(data, sbi)) { | 325 | if (!hfsplus_parse_options(data, sbi)) { |
326 | printk(KERN_ERR "hfs: unable to parse mount options\n"); | 326 | printk(KERN_ERR "hfs: unable to parse mount options\n"); |
@@ -344,7 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
344 | err = -EINVAL; | 344 | err = -EINVAL; |
345 | goto cleanup; | 345 | goto cleanup; |
346 | } | 346 | } |
347 | vhdr = HFSPLUS_SB(sb).s_vhdr; | 347 | vhdr = sbi->s_vhdr; |
348 | 348 | ||
349 | /* Copy parts of the volume header into the superblock */ | 349 | /* Copy parts of the volume header into the superblock */ |
350 | sb->s_magic = HFSPLUS_VOLHEAD_SIG; | 350 | sb->s_magic = HFSPLUS_VOLHEAD_SIG; |
@@ -353,18 +353,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
353 | printk(KERN_ERR "hfs: wrong filesystem version\n"); | 353 | printk(KERN_ERR "hfs: wrong filesystem version\n"); |
354 | goto cleanup; | 354 | goto cleanup; |
355 | } | 355 | } |
356 | HFSPLUS_SB(sb).total_blocks = be32_to_cpu(vhdr->total_blocks); | 356 | sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); |
357 | HFSPLUS_SB(sb).free_blocks = be32_to_cpu(vhdr->free_blocks); | 357 | sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); |
358 | HFSPLUS_SB(sb).next_alloc = be32_to_cpu(vhdr->next_alloc); | 358 | sbi->next_cnid = be32_to_cpu(vhdr->next_cnid); |
359 | HFSPLUS_SB(sb).next_cnid = be32_to_cpu(vhdr->next_cnid); | 359 | sbi->file_count = be32_to_cpu(vhdr->file_count); |
360 | HFSPLUS_SB(sb).file_count = be32_to_cpu(vhdr->file_count); | 360 | sbi->folder_count = be32_to_cpu(vhdr->folder_count); |
361 | HFSPLUS_SB(sb).folder_count = be32_to_cpu(vhdr->folder_count); | 361 | sbi->data_clump_blocks = |
362 | HFSPLUS_SB(sb).data_clump_blocks = be32_to_cpu(vhdr->data_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 362 | be32_to_cpu(vhdr->data_clump_sz) >> sbi->alloc_blksz_shift; |
363 | if (!HFSPLUS_SB(sb).data_clump_blocks) | 363 | if (!sbi->data_clump_blocks) |
364 | HFSPLUS_SB(sb).data_clump_blocks = 1; | 364 | sbi->data_clump_blocks = 1; |
365 | HFSPLUS_SB(sb).rsrc_clump_blocks = be32_to_cpu(vhdr->rsrc_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 365 | sbi->rsrc_clump_blocks = |
366 | if (!HFSPLUS_SB(sb).rsrc_clump_blocks) | 366 | be32_to_cpu(vhdr->rsrc_clump_sz) >> sbi->alloc_blksz_shift; |
367 | HFSPLUS_SB(sb).rsrc_clump_blocks = 1; | 367 | if (!sbi->rsrc_clump_blocks) |
368 | sbi->rsrc_clump_blocks = 1; | ||
368 | 369 | ||
369 | /* Set up operations so we can load metadata */ | 370 | /* Set up operations so we can load metadata */ |
370 | sb->s_op = &hfsplus_sops; | 371 | sb->s_op = &hfsplus_sops; |
@@ -374,7 +375,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
374 | printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, " | 375 | printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, " |
375 | "running fsck.hfsplus is recommended. mounting read-only.\n"); | 376 | "running fsck.hfsplus is recommended. mounting read-only.\n"); |
376 | sb->s_flags |= MS_RDONLY; | 377 | sb->s_flags |= MS_RDONLY; |
377 | } else if (sbi->flags & HFSPLUS_SB_FORCE) { | 378 | } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { |
378 | /* nothing */ | 379 | /* nothing */ |
379 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { | 380 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { |
380 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); | 381 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); |
@@ -384,16 +385,15 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
384 | "use the force option at your own risk, mounting read-only.\n"); | 385 | "use the force option at your own risk, mounting read-only.\n"); |
385 | sb->s_flags |= MS_RDONLY; | 386 | sb->s_flags |= MS_RDONLY; |
386 | } | 387 | } |
387 | sbi->flags &= ~HFSPLUS_SB_FORCE; | ||
388 | 388 | ||
389 | /* Load metadata objects (B*Trees) */ | 389 | /* Load metadata objects (B*Trees) */ |
390 | HFSPLUS_SB(sb).ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); | 390 | sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); |
391 | if (!HFSPLUS_SB(sb).ext_tree) { | 391 | if (!sbi->ext_tree) { |
392 | printk(KERN_ERR "hfs: failed to load extents file\n"); | 392 | printk(KERN_ERR "hfs: failed to load extents file\n"); |
393 | goto cleanup; | 393 | goto cleanup; |
394 | } | 394 | } |
395 | HFSPLUS_SB(sb).cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); | 395 | sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); |
396 | if (!HFSPLUS_SB(sb).cat_tree) { | 396 | if (!sbi->cat_tree) { |
397 | printk(KERN_ERR "hfs: failed to load catalog file\n"); | 397 | printk(KERN_ERR "hfs: failed to load catalog file\n"); |
398 | goto cleanup; | 398 | goto cleanup; |
399 | } | 399 | } |
@@ -404,7 +404,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
404 | err = PTR_ERR(inode); | 404 | err = PTR_ERR(inode); |
405 | goto cleanup; | 405 | goto cleanup; |
406 | } | 406 | } |
407 | HFSPLUS_SB(sb).alloc_file = inode; | 407 | sbi->alloc_file = inode; |
408 | 408 | ||
409 | /* Load the root directory */ | 409 | /* Load the root directory */ |
410 | root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); | 410 | root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); |
@@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
423 | 423 | ||
424 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; | 424 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; |
425 | str.name = HFSP_HIDDENDIR_NAME; | 425 | str.name = HFSP_HIDDENDIR_NAME; |
426 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 426 | hfs_find_init(sbi->cat_tree, &fd); |
427 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); | 427 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); |
428 | if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { | 428 | if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { |
429 | hfs_find_exit(&fd); | 429 | hfs_find_exit(&fd); |
@@ -434,7 +434,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
434 | err = PTR_ERR(inode); | 434 | err = PTR_ERR(inode); |
435 | goto cleanup; | 435 | goto cleanup; |
436 | } | 436 | } |
437 | HFSPLUS_SB(sb).hidden_dir = inode; | 437 | sbi->hidden_dir = inode; |
438 | } else | 438 | } else |
439 | hfs_find_exit(&fd); | 439 | hfs_find_exit(&fd); |
440 | 440 | ||
@@ -449,15 +449,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
449 | be32_add_cpu(&vhdr->write_count, 1); | 449 | be32_add_cpu(&vhdr->write_count, 1); |
450 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); | 450 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); |
451 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); | 451 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); |
452 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 452 | mark_buffer_dirty(sbi->s_vhbh); |
453 | sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); | 453 | sync_dirty_buffer(sbi->s_vhbh); |
454 | 454 | ||
455 | if (!HFSPLUS_SB(sb).hidden_dir) { | 455 | if (!sbi->hidden_dir) { |
456 | printk(KERN_DEBUG "hfs: create hidden dir...\n"); | 456 | printk(KERN_DEBUG "hfs: create hidden dir...\n"); |
457 | HFSPLUS_SB(sb).hidden_dir = hfsplus_new_inode(sb, S_IFDIR); | 457 | |
458 | hfsplus_create_cat(HFSPLUS_SB(sb).hidden_dir->i_ino, sb->s_root->d_inode, | 458 | mutex_lock(&sbi->vh_mutex); |
459 | &str, HFSPLUS_SB(sb).hidden_dir); | 459 | sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); |
460 | mark_inode_dirty(HFSPLUS_SB(sb).hidden_dir); | 460 | hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode, |
461 | &str, sbi->hidden_dir); | ||
462 | mutex_unlock(&sbi->vh_mutex); | ||
463 | |||
464 | mark_inode_dirty(sbi->hidden_dir); | ||
461 | } | 465 | } |
462 | out: | 466 | out: |
463 | unload_nls(sbi->nls); | 467 | unload_nls(sbi->nls); |
@@ -486,7 +490,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb) | |||
486 | 490 | ||
487 | static void hfsplus_destroy_inode(struct inode *inode) | 491 | static void hfsplus_destroy_inode(struct inode *inode) |
488 | { | 492 | { |
489 | kmem_cache_free(hfsplus_inode_cachep, &HFSPLUS_I(inode)); | 493 | kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); |
490 | } | 494 | } |
491 | 495 | ||
492 | #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) | 496 | #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) |
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index 628ccf6fa402..b66d67de882c 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c | |||
@@ -121,7 +121,7 @@ static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) | |||
121 | int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) | 121 | int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) |
122 | { | 122 | { |
123 | const hfsplus_unichr *ip; | 123 | const hfsplus_unichr *ip; |
124 | struct nls_table *nls = HFSPLUS_SB(sb).nls; | 124 | struct nls_table *nls = HFSPLUS_SB(sb)->nls; |
125 | u8 *op; | 125 | u8 *op; |
126 | u16 cc, c0, c1; | 126 | u16 cc, c0, c1; |
127 | u16 *ce1, *ce2; | 127 | u16 *ce1, *ce2; |
@@ -132,7 +132,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c | |||
132 | ustrlen = be16_to_cpu(ustr->length); | 132 | ustrlen = be16_to_cpu(ustr->length); |
133 | len = *len_p; | 133 | len = *len_p; |
134 | ce1 = NULL; | 134 | ce1 = NULL; |
135 | compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 135 | compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
136 | 136 | ||
137 | while (ustrlen > 0) { | 137 | while (ustrlen > 0) { |
138 | c0 = be16_to_cpu(*ip++); | 138 | c0 = be16_to_cpu(*ip++); |
@@ -246,7 +246,7 @@ out: | |||
246 | static inline int asc2unichar(struct super_block *sb, const char *astr, int len, | 246 | static inline int asc2unichar(struct super_block *sb, const char *astr, int len, |
247 | wchar_t *uc) | 247 | wchar_t *uc) |
248 | { | 248 | { |
249 | int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc); | 249 | int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); |
250 | if (size <= 0) { | 250 | if (size <= 0) { |
251 | *uc = '?'; | 251 | *uc = '?'; |
252 | size = 1; | 252 | size = 1; |
@@ -293,7 +293,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, | |||
293 | u16 *dstr, outlen = 0; | 293 | u16 *dstr, outlen = 0; |
294 | wchar_t c; | 294 | wchar_t c; |
295 | 295 | ||
296 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 296 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
297 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { | 297 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { |
298 | size = asc2unichar(sb, astr, len, &c); | 298 | size = asc2unichar(sb, astr, len, &c); |
299 | 299 | ||
@@ -330,8 +330,8 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) | |||
330 | wchar_t c; | 330 | wchar_t c; |
331 | u16 c2; | 331 | u16 c2; |
332 | 332 | ||
333 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | 333 | casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
334 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 334 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
335 | hash = init_name_hash(); | 335 | hash = init_name_hash(); |
336 | astr = str->name; | 336 | astr = str->name; |
337 | len = str->len; | 337 | len = str->len; |
@@ -373,8 +373,8 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr * | |||
373 | u16 c1, c2; | 373 | u16 c1, c2; |
374 | wchar_t c; | 374 | wchar_t c; |
375 | 375 | ||
376 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | 376 | casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
377 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 377 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
378 | astr1 = s1->name; | 378 | astr1 = s1->name; |
379 | len1 = s1->len; | 379 | len1 = s1->len; |
380 | astr2 = s2->name; | 380 | astr2 = s2->name; |
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index bed78ac8f6d1..8972c20b3216 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c | |||
@@ -65,8 +65,8 @@ static int hfsplus_get_last_session(struct super_block *sb, | |||
65 | *start = 0; | 65 | *start = 0; |
66 | *size = sb->s_bdev->bd_inode->i_size >> 9; | 66 | *size = sb->s_bdev->bd_inode->i_size >> 9; |
67 | 67 | ||
68 | if (HFSPLUS_SB(sb).session >= 0) { | 68 | if (HFSPLUS_SB(sb)->session >= 0) { |
69 | te.cdte_track = HFSPLUS_SB(sb).session; | 69 | te.cdte_track = HFSPLUS_SB(sb)->session; |
70 | te.cdte_format = CDROM_LBA; | 70 | te.cdte_format = CDROM_LBA; |
71 | res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te); | 71 | res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te); |
72 | if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { | 72 | if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { |
@@ -87,6 +87,7 @@ static int hfsplus_get_last_session(struct super_block *sb, | |||
87 | /* Takes in super block, returns true if good data read */ | 87 | /* Takes in super block, returns true if good data read */ |
88 | int hfsplus_read_wrapper(struct super_block *sb) | 88 | int hfsplus_read_wrapper(struct super_block *sb) |
89 | { | 89 | { |
90 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
90 | struct buffer_head *bh; | 91 | struct buffer_head *bh; |
91 | struct hfsplus_vh *vhdr; | 92 | struct hfsplus_vh *vhdr; |
92 | struct hfsplus_wd wd; | 93 | struct hfsplus_wd wd; |
@@ -122,7 +123,7 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
122 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) | 123 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) |
123 | break; | 124 | break; |
124 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) { | 125 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) { |
125 | HFSPLUS_SB(sb).flags |= HFSPLUS_SB_HFSX; | 126 | set_bit(HFSPLUS_SB_HFSX, &sbi->flags); |
126 | break; | 127 | break; |
127 | } | 128 | } |
128 | brelse(bh); | 129 | brelse(bh); |
@@ -143,11 +144,11 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
143 | if (blocksize < HFSPLUS_SECTOR_SIZE || | 144 | if (blocksize < HFSPLUS_SECTOR_SIZE || |
144 | ((blocksize - 1) & blocksize)) | 145 | ((blocksize - 1) & blocksize)) |
145 | return -EINVAL; | 146 | return -EINVAL; |
146 | HFSPLUS_SB(sb).alloc_blksz = blocksize; | 147 | sbi->alloc_blksz = blocksize; |
147 | HFSPLUS_SB(sb).alloc_blksz_shift = 0; | 148 | sbi->alloc_blksz_shift = 0; |
148 | while ((blocksize >>= 1) != 0) | 149 | while ((blocksize >>= 1) != 0) |
149 | HFSPLUS_SB(sb).alloc_blksz_shift++; | 150 | sbi->alloc_blksz_shift++; |
150 | blocksize = min(HFSPLUS_SB(sb).alloc_blksz, (u32)PAGE_SIZE); | 151 | blocksize = min(sbi->alloc_blksz, (u32)PAGE_SIZE); |
151 | 152 | ||
152 | /* align block size to block offset */ | 153 | /* align block size to block offset */ |
153 | while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) | 154 | while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) |
@@ -158,23 +159,26 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
158 | return -EINVAL; | 159 | return -EINVAL; |
159 | } | 160 | } |
160 | 161 | ||
161 | HFSPLUS_SB(sb).blockoffset = part_start >> | 162 | sbi->blockoffset = |
162 | (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); | 163 | part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); |
163 | HFSPLUS_SB(sb).sect_count = part_size; | 164 | sbi->sect_count = part_size; |
164 | HFSPLUS_SB(sb).fs_shift = HFSPLUS_SB(sb).alloc_blksz_shift - | 165 | sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; |
165 | sb->s_blocksize_bits; | ||
166 | 166 | ||
167 | bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); | 167 | bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); |
168 | if (!bh) | 168 | if (!bh) |
169 | return -EIO; | 169 | return -EIO; |
170 | 170 | ||
171 | /* should still be the same... */ | 171 | /* should still be the same... */ |
172 | if (vhdr->signature != (HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX ? | 172 | if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { |
173 | cpu_to_be16(HFSPLUS_VOLHEAD_SIGX) : | 173 | if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) |
174 | cpu_to_be16(HFSPLUS_VOLHEAD_SIG))) | 174 | goto error; |
175 | goto error; | 175 | } else { |
176 | HFSPLUS_SB(sb).s_vhbh = bh; | 176 | if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) |
177 | HFSPLUS_SB(sb).s_vhdr = vhdr; | 177 | goto error; |
178 | } | ||
179 | |||
180 | sbi->s_vhbh = bh; | ||
181 | sbi->s_vhdr = vhdr; | ||
178 | 182 | ||
179 | return 0; | 183 | return 0; |
180 | error: | 184 | error: |
diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h index 62f59080e5cc..04d0a977cd43 100644 --- a/include/asm-generic/hardirq.h +++ b/include/asm-generic/hardirq.h | |||
@@ -3,13 +3,13 @@ | |||
3 | 3 | ||
4 | #include <linux/cache.h> | 4 | #include <linux/cache.h> |
5 | #include <linux/threads.h> | 5 | #include <linux/threads.h> |
6 | #include <linux/irq.h> | ||
7 | 6 | ||
8 | typedef struct { | 7 | typedef struct { |
9 | unsigned int __softirq_pending; | 8 | unsigned int __softirq_pending; |
10 | } ____cacheline_aligned irq_cpustat_t; | 9 | } ____cacheline_aligned irq_cpustat_t; |
11 | 10 | ||
12 | #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ | 11 | #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ |
12 | #include <linux/irq.h> | ||
13 | 13 | ||
14 | #ifndef ack_bad_irq | 14 | #ifndef ack_bad_irq |
15 | static inline void ack_bad_irq(unsigned int irq) | 15 | static inline void ack_bad_irq(unsigned int irq) |
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a92a170fb7d..ef2af9948eac 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h | |||
@@ -220,6 +220,8 @@ | |||
220 | \ | 220 | \ |
221 | BUG_TABLE \ | 221 | BUG_TABLE \ |
222 | \ | 222 | \ |
223 | JUMP_TABLE \ | ||
224 | \ | ||
223 | /* PCI quirks */ \ | 225 | /* PCI quirks */ \ |
224 | .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ | 226 | .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ |
225 | VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ | 227 | VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ |
@@ -563,6 +565,14 @@ | |||
563 | #define BUG_TABLE | 565 | #define BUG_TABLE |
564 | #endif | 566 | #endif |
565 | 567 | ||
568 | #define JUMP_TABLE \ | ||
569 | . = ALIGN(8); \ | ||
570 | __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \ | ||
571 | VMLINUX_SYMBOL(__start___jump_table) = .; \ | ||
572 | *(__jump_table) \ | ||
573 | VMLINUX_SYMBOL(__stop___jump_table) = .; \ | ||
574 | } | ||
575 | |||
566 | #ifdef CONFIG_PM_TRACE | 576 | #ifdef CONFIG_PM_TRACE |
567 | #define TRACEDATA \ | 577 | #define TRACEDATA \ |
568 | . = ALIGN(4); \ | 578 | . = ALIGN(4); \ |
diff --git a/fs/ceph/auth.h b/include/linux/ceph/auth.h index d38a2fb4a137..7fff521d7eb5 100644 --- a/fs/ceph/auth.h +++ b/include/linux/ceph/auth.h | |||
@@ -1,8 +1,8 @@ | |||
1 | #ifndef _FS_CEPH_AUTH_H | 1 | #ifndef _FS_CEPH_AUTH_H |
2 | #define _FS_CEPH_AUTH_H | 2 | #define _FS_CEPH_AUTH_H |
3 | 3 | ||
4 | #include "types.h" | 4 | #include <linux/ceph/types.h> |
5 | #include "buffer.h" | 5 | #include <linux/ceph/buffer.h> |
6 | 6 | ||
7 | /* | 7 | /* |
8 | * Abstract interface for communicating with the authenticate module. | 8 | * Abstract interface for communicating with the authenticate module. |
diff --git a/fs/ceph/buffer.h b/include/linux/ceph/buffer.h index 58d19014068f..58d19014068f 100644 --- a/fs/ceph/buffer.h +++ b/include/linux/ceph/buffer.h | |||
diff --git a/fs/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index 1818c2305610..aa2e19182d99 100644 --- a/fs/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 4 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
5 | 5 | ||
6 | #ifdef CONFIG_CEPH_FS_PRETTYDEBUG | 6 | #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG |
7 | 7 | ||
8 | /* | 8 | /* |
9 | * wrap pr_debug to include a filename:lineno prefix on each line. | 9 | * wrap pr_debug to include a filename:lineno prefix on each line. |
@@ -14,7 +14,8 @@ | |||
14 | # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) | 14 | # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) |
15 | extern const char *ceph_file_part(const char *s, int len); | 15 | extern const char *ceph_file_part(const char *s, int len); |
16 | # define dout(fmt, ...) \ | 16 | # define dout(fmt, ...) \ |
17 | pr_debug(" %12.12s:%-4d : " fmt, \ | 17 | pr_debug("%.*s %12.12s:%-4d : " fmt, \ |
18 | 8 - (int)sizeof(KBUILD_MODNAME), " ", \ | ||
18 | ceph_file_part(__FILE__, sizeof(__FILE__)), \ | 19 | ceph_file_part(__FILE__, sizeof(__FILE__)), \ |
19 | __LINE__, ##__VA_ARGS__) | 20 | __LINE__, ##__VA_ARGS__) |
20 | # else | 21 | # else |
diff --git a/fs/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h index 5babb8e95352..5babb8e95352 100644 --- a/fs/ceph/ceph_frag.h +++ b/include/linux/ceph/ceph_frag.h | |||
diff --git a/fs/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index d5619ac86711..c3c74aef289d 100644 --- a/fs/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h | |||
@@ -299,6 +299,7 @@ enum { | |||
299 | CEPH_MDS_OP_SETATTR = 0x01108, | 299 | CEPH_MDS_OP_SETATTR = 0x01108, |
300 | CEPH_MDS_OP_SETFILELOCK= 0x01109, | 300 | CEPH_MDS_OP_SETFILELOCK= 0x01109, |
301 | CEPH_MDS_OP_GETFILELOCK= 0x00110, | 301 | CEPH_MDS_OP_GETFILELOCK= 0x00110, |
302 | CEPH_MDS_OP_SETDIRLAYOUT=0x0110a, | ||
302 | 303 | ||
303 | CEPH_MDS_OP_MKNOD = 0x01201, | 304 | CEPH_MDS_OP_MKNOD = 0x01201, |
304 | CEPH_MDS_OP_LINK = 0x01202, | 305 | CEPH_MDS_OP_LINK = 0x01202, |
diff --git a/fs/ceph/ceph_hash.h b/include/linux/ceph/ceph_hash.h index d099c3f90236..d099c3f90236 100644 --- a/fs/ceph/ceph_hash.h +++ b/include/linux/ceph/ceph_hash.h | |||
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h new file mode 100644 index 000000000000..2a79702e092b --- /dev/null +++ b/include/linux/ceph/debugfs.h | |||
@@ -0,0 +1,33 @@ | |||
1 | #ifndef _FS_CEPH_DEBUGFS_H | ||
2 | #define _FS_CEPH_DEBUGFS_H | ||
3 | |||
4 | #include "ceph_debug.h" | ||
5 | #include "types.h" | ||
6 | |||
7 | #define CEPH_DEFINE_SHOW_FUNC(name) \ | ||
8 | static int name##_open(struct inode *inode, struct file *file) \ | ||
9 | { \ | ||
10 | struct seq_file *sf; \ | ||
11 | int ret; \ | ||
12 | \ | ||
13 | ret = single_open(file, name, NULL); \ | ||
14 | sf = file->private_data; \ | ||
15 | sf->private = inode->i_private; \ | ||
16 | return ret; \ | ||
17 | } \ | ||
18 | \ | ||
19 | static const struct file_operations name##_fops = { \ | ||
20 | .open = name##_open, \ | ||
21 | .read = seq_read, \ | ||
22 | .llseek = seq_lseek, \ | ||
23 | .release = single_release, \ | ||
24 | }; | ||
25 | |||
26 | /* debugfs.c */ | ||
27 | extern int ceph_debugfs_init(void); | ||
28 | extern void ceph_debugfs_cleanup(void); | ||
29 | extern int ceph_debugfs_client_init(struct ceph_client *client); | ||
30 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | ||
31 | |||
32 | #endif | ||
33 | |||
diff --git a/fs/ceph/decode.h b/include/linux/ceph/decode.h index 3d25415afe63..c5b6939fb32a 100644 --- a/fs/ceph/decode.h +++ b/include/linux/ceph/decode.h | |||
@@ -191,6 +191,11 @@ static inline void ceph_encode_string(void **p, void *end, | |||
191 | ceph_encode_need(p, end, n, bad); \ | 191 | ceph_encode_need(p, end, n, bad); \ |
192 | ceph_encode_copy(p, pv, n); \ | 192 | ceph_encode_copy(p, pv, n); \ |
193 | } while (0) | 193 | } while (0) |
194 | #define ceph_encode_string_safe(p, end, s, n, bad) \ | ||
195 | do { \ | ||
196 | ceph_encode_need(p, end, n, bad); \ | ||
197 | ceph_encode_string(p, end, s, n); \ | ||
198 | } while (0) | ||
194 | 199 | ||
195 | 200 | ||
196 | #endif | 201 | #endif |
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h new file mode 100644 index 000000000000..f22b2e941686 --- /dev/null +++ b/include/linux/ceph/libceph.h | |||
@@ -0,0 +1,249 @@ | |||
1 | #ifndef _FS_CEPH_LIBCEPH_H | ||
2 | #define _FS_CEPH_LIBCEPH_H | ||
3 | |||
4 | #include "ceph_debug.h" | ||
5 | |||
6 | #include <asm/unaligned.h> | ||
7 | #include <linux/backing-dev.h> | ||
8 | #include <linux/completion.h> | ||
9 | #include <linux/exportfs.h> | ||
10 | #include <linux/fs.h> | ||
11 | #include <linux/mempool.h> | ||
12 | #include <linux/pagemap.h> | ||
13 | #include <linux/wait.h> | ||
14 | #include <linux/writeback.h> | ||
15 | #include <linux/slab.h> | ||
16 | |||
17 | #include "types.h" | ||
18 | #include "messenger.h" | ||
19 | #include "msgpool.h" | ||
20 | #include "mon_client.h" | ||
21 | #include "osd_client.h" | ||
22 | #include "ceph_fs.h" | ||
23 | |||
24 | /* | ||
25 | * Supported features | ||
26 | */ | ||
27 | #define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR | ||
28 | #define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR | ||
29 | |||
30 | /* | ||
31 | * mount options | ||
32 | */ | ||
33 | #define CEPH_OPT_FSID (1<<0) | ||
34 | #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ | ||
35 | #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ | ||
36 | #define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ | ||
37 | |||
38 | #define CEPH_OPT_DEFAULT (0); | ||
39 | |||
40 | #define ceph_set_opt(client, opt) \ | ||
41 | (client)->options->flags |= CEPH_OPT_##opt; | ||
42 | #define ceph_test_opt(client, opt) \ | ||
43 | (!!((client)->options->flags & CEPH_OPT_##opt)) | ||
44 | |||
45 | struct ceph_options { | ||
46 | int flags; | ||
47 | struct ceph_fsid fsid; | ||
48 | struct ceph_entity_addr my_addr; | ||
49 | int mount_timeout; | ||
50 | int osd_idle_ttl; | ||
51 | int osd_timeout; | ||
52 | int osd_keepalive_timeout; | ||
53 | |||
54 | /* | ||
55 | * any type that can't be simply compared or doesn't need need | ||
56 | * to be compared should go beyond this point, | ||
57 | * ceph_compare_options() should be updated accordingly | ||
58 | */ | ||
59 | |||
60 | struct ceph_entity_addr *mon_addr; /* should be the first | ||
61 | pointer type of args */ | ||
62 | int num_mon; | ||
63 | char *name; | ||
64 | char *secret; | ||
65 | }; | ||
66 | |||
67 | /* | ||
68 | * defaults | ||
69 | */ | ||
70 | #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 | ||
71 | #define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ | ||
72 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | ||
73 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | ||
74 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | ||
75 | |||
76 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | ||
77 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | ||
78 | |||
79 | #define CEPH_AUTH_NAME_DEFAULT "guest" | ||
80 | |||
81 | /* | ||
82 | * Delay telling the MDS we no longer want caps, in case we reopen | ||
83 | * the file. Delay a minimum amount of time, even if we send a cap | ||
84 | * message for some other reason. Otherwise, take the oppotunity to | ||
85 | * update the mds to avoid sending another message later. | ||
86 | */ | ||
87 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | ||
88 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | ||
89 | |||
90 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
91 | |||
92 | /* mount state */ | ||
93 | enum { | ||
94 | CEPH_MOUNT_MOUNTING, | ||
95 | CEPH_MOUNT_MOUNTED, | ||
96 | CEPH_MOUNT_UNMOUNTING, | ||
97 | CEPH_MOUNT_UNMOUNTED, | ||
98 | CEPH_MOUNT_SHUTDOWN, | ||
99 | }; | ||
100 | |||
101 | /* | ||
102 | * subtract jiffies | ||
103 | */ | ||
104 | static inline unsigned long time_sub(unsigned long a, unsigned long b) | ||
105 | { | ||
106 | BUG_ON(time_after(b, a)); | ||
107 | return (long)a - (long)b; | ||
108 | } | ||
109 | |||
110 | struct ceph_mds_client; | ||
111 | |||
112 | /* | ||
113 | * per client state | ||
114 | * | ||
115 | * possibly shared by multiple mount points, if they are | ||
116 | * mounting the same ceph filesystem/cluster. | ||
117 | */ | ||
118 | struct ceph_client { | ||
119 | struct ceph_fsid fsid; | ||
120 | bool have_fsid; | ||
121 | |||
122 | void *private; | ||
123 | |||
124 | struct ceph_options *options; | ||
125 | |||
126 | struct mutex mount_mutex; /* serialize mount attempts */ | ||
127 | wait_queue_head_t auth_wq; | ||
128 | int auth_err; | ||
129 | |||
130 | int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *); | ||
131 | |||
132 | u32 supported_features; | ||
133 | u32 required_features; | ||
134 | |||
135 | struct ceph_messenger *msgr; /* messenger instance */ | ||
136 | struct ceph_mon_client monc; | ||
137 | struct ceph_osd_client osdc; | ||
138 | |||
139 | #ifdef CONFIG_DEBUG_FS | ||
140 | struct dentry *debugfs_dir; | ||
141 | struct dentry *debugfs_monmap; | ||
142 | struct dentry *debugfs_osdmap; | ||
143 | #endif | ||
144 | }; | ||
145 | |||
146 | |||
147 | |||
148 | /* | ||
149 | * snapshots | ||
150 | */ | ||
151 | |||
152 | /* | ||
153 | * A "snap context" is the set of existing snapshots when we | ||
154 | * write data. It is used by the OSD to guide its COW behavior. | ||
155 | * | ||
156 | * The ceph_snap_context is refcounted, and attached to each dirty | ||
157 | * page, indicating which context the dirty data belonged when it was | ||
158 | * dirtied. | ||
159 | */ | ||
160 | struct ceph_snap_context { | ||
161 | atomic_t nref; | ||
162 | u64 seq; | ||
163 | int num_snaps; | ||
164 | u64 snaps[]; | ||
165 | }; | ||
166 | |||
167 | static inline struct ceph_snap_context * | ||
168 | ceph_get_snap_context(struct ceph_snap_context *sc) | ||
169 | { | ||
170 | /* | ||
171 | printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
172 | atomic_read(&sc->nref)+1); | ||
173 | */ | ||
174 | if (sc) | ||
175 | atomic_inc(&sc->nref); | ||
176 | return sc; | ||
177 | } | ||
178 | |||
179 | static inline void ceph_put_snap_context(struct ceph_snap_context *sc) | ||
180 | { | ||
181 | if (!sc) | ||
182 | return; | ||
183 | /* | ||
184 | printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
185 | atomic_read(&sc->nref)-1); | ||
186 | */ | ||
187 | if (atomic_dec_and_test(&sc->nref)) { | ||
188 | /*printk(" deleting snap_context %p\n", sc);*/ | ||
189 | kfree(sc); | ||
190 | } | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * calculate the number of pages a given length and offset map onto, | ||
195 | * if we align the data. | ||
196 | */ | ||
197 | static inline int calc_pages_for(u64 off, u64 len) | ||
198 | { | ||
199 | return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - | ||
200 | (off >> PAGE_CACHE_SHIFT); | ||
201 | } | ||
202 | |||
203 | /* ceph_common.c */ | ||
204 | extern const char *ceph_msg_type_name(int type); | ||
205 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | ||
206 | extern struct kmem_cache *ceph_inode_cachep; | ||
207 | extern struct kmem_cache *ceph_cap_cachep; | ||
208 | extern struct kmem_cache *ceph_dentry_cachep; | ||
209 | extern struct kmem_cache *ceph_file_cachep; | ||
210 | |||
211 | extern int ceph_parse_options(struct ceph_options **popt, char *options, | ||
212 | const char *dev_name, const char *dev_name_end, | ||
213 | int (*parse_extra_token)(char *c, void *private), | ||
214 | void *private); | ||
215 | extern void ceph_destroy_options(struct ceph_options *opt); | ||
216 | extern int ceph_compare_options(struct ceph_options *new_opt, | ||
217 | struct ceph_client *client); | ||
218 | extern struct ceph_client *ceph_create_client(struct ceph_options *opt, | ||
219 | void *private); | ||
220 | extern u64 ceph_client_id(struct ceph_client *client); | ||
221 | extern void ceph_destroy_client(struct ceph_client *client); | ||
222 | extern int __ceph_open_session(struct ceph_client *client, | ||
223 | unsigned long started); | ||
224 | extern int ceph_open_session(struct ceph_client *client); | ||
225 | |||
226 | /* pagevec.c */ | ||
227 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||
228 | |||
229 | extern struct page **ceph_get_direct_page_vector(const char __user *data, | ||
230 | int num_pages, | ||
231 | loff_t off, size_t len); | ||
232 | extern void ceph_put_page_vector(struct page **pages, int num_pages); | ||
233 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||
234 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | ||
235 | extern int ceph_copy_user_to_page_vector(struct page **pages, | ||
236 | const char __user *data, | ||
237 | loff_t off, size_t len); | ||
238 | extern int ceph_copy_to_page_vector(struct page **pages, | ||
239 | const char *data, | ||
240 | loff_t off, size_t len); | ||
241 | extern int ceph_copy_from_page_vector(struct page **pages, | ||
242 | char *data, | ||
243 | loff_t off, size_t len); | ||
244 | extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, | ||
245 | loff_t off, size_t len); | ||
246 | extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); | ||
247 | |||
248 | |||
249 | #endif /* _FS_CEPH_SUPER_H */ | ||
diff --git a/fs/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 4c5cb0880bba..4c5cb0880bba 100644 --- a/fs/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h | |||
diff --git a/fs/ceph/messenger.h b/include/linux/ceph/messenger.h index 76fbc957bc13..5956d62c3057 100644 --- a/fs/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
@@ -65,6 +65,9 @@ struct ceph_messenger { | |||
65 | */ | 65 | */ |
66 | u32 global_seq; | 66 | u32 global_seq; |
67 | spinlock_t global_seq_lock; | 67 | spinlock_t global_seq_lock; |
68 | |||
69 | u32 supported_features; | ||
70 | u32 required_features; | ||
68 | }; | 71 | }; |
69 | 72 | ||
70 | /* | 73 | /* |
@@ -82,6 +85,10 @@ struct ceph_msg { | |||
82 | struct ceph_pagelist *pagelist; /* instead of pages */ | 85 | struct ceph_pagelist *pagelist; /* instead of pages */ |
83 | struct list_head list_head; | 86 | struct list_head list_head; |
84 | struct kref kref; | 87 | struct kref kref; |
88 | struct bio *bio; /* instead of pages/pagelist */ | ||
89 | struct bio *bio_iter; /* bio iterator */ | ||
90 | int bio_seg; /* current bio segment */ | ||
91 | struct ceph_pagelist *trail; /* the trailing part of the data */ | ||
85 | bool front_is_vmalloc; | 92 | bool front_is_vmalloc; |
86 | bool more_to_follow; | 93 | bool more_to_follow; |
87 | bool needs_out_seq; | 94 | bool needs_out_seq; |
@@ -205,7 +212,7 @@ struct ceph_connection { | |||
205 | }; | 212 | }; |
206 | 213 | ||
207 | 214 | ||
208 | extern const char *pr_addr(const struct sockaddr_storage *ss); | 215 | extern const char *ceph_pr_addr(const struct sockaddr_storage *ss); |
209 | extern int ceph_parse_ips(const char *c, const char *end, | 216 | extern int ceph_parse_ips(const char *c, const char *end, |
210 | struct ceph_entity_addr *addr, | 217 | struct ceph_entity_addr *addr, |
211 | int max_count, int *count); | 218 | int max_count, int *count); |
@@ -216,7 +223,8 @@ extern void ceph_msgr_exit(void); | |||
216 | extern void ceph_msgr_flush(void); | 223 | extern void ceph_msgr_flush(void); |
217 | 224 | ||
218 | extern struct ceph_messenger *ceph_messenger_create( | 225 | extern struct ceph_messenger *ceph_messenger_create( |
219 | struct ceph_entity_addr *myaddr); | 226 | struct ceph_entity_addr *myaddr, |
227 | u32 features, u32 required); | ||
220 | extern void ceph_messenger_destroy(struct ceph_messenger *); | 228 | extern void ceph_messenger_destroy(struct ceph_messenger *); |
221 | 229 | ||
222 | extern void ceph_con_init(struct ceph_messenger *msgr, | 230 | extern void ceph_con_init(struct ceph_messenger *msgr, |
diff --git a/fs/ceph/mon_client.h b/include/linux/ceph/mon_client.h index 8e396f2c0963..545f85917780 100644 --- a/fs/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h | |||
@@ -79,6 +79,7 @@ struct ceph_mon_client { | |||
79 | u64 last_tid; | 79 | u64 last_tid; |
80 | 80 | ||
81 | /* mds/osd map */ | 81 | /* mds/osd map */ |
82 | int want_mdsmap; | ||
82 | int want_next_osdmap; /* 1 = want, 2 = want+asked */ | 83 | int want_next_osdmap; /* 1 = want, 2 = want+asked */ |
83 | u32 have_osdmap, have_mdsmap; | 84 | u32 have_osdmap, have_mdsmap; |
84 | 85 | ||
diff --git a/fs/ceph/msgpool.h b/include/linux/ceph/msgpool.h index a362605f9368..a362605f9368 100644 --- a/fs/ceph/msgpool.h +++ b/include/linux/ceph/msgpool.h | |||
diff --git a/fs/ceph/msgr.h b/include/linux/ceph/msgr.h index 680d3d648cac..680d3d648cac 100644 --- a/fs/ceph/msgr.h +++ b/include/linux/ceph/msgr.h | |||
diff --git a/fs/ceph/osd_client.h b/include/linux/ceph/osd_client.h index ce776989ef6a..6c91fb032c39 100644 --- a/fs/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h | |||
@@ -15,6 +15,7 @@ struct ceph_snap_context; | |||
15 | struct ceph_osd_request; | 15 | struct ceph_osd_request; |
16 | struct ceph_osd_client; | 16 | struct ceph_osd_client; |
17 | struct ceph_authorizer; | 17 | struct ceph_authorizer; |
18 | struct ceph_pagelist; | ||
18 | 19 | ||
19 | /* | 20 | /* |
20 | * completion callback for async writepages | 21 | * completion callback for async writepages |
@@ -68,6 +69,7 @@ struct ceph_osd_request { | |||
68 | struct list_head r_unsafe_item; | 69 | struct list_head r_unsafe_item; |
69 | 70 | ||
70 | struct inode *r_inode; /* for use by callbacks */ | 71 | struct inode *r_inode; /* for use by callbacks */ |
72 | void *r_priv; /* ditto */ | ||
71 | 73 | ||
72 | char r_oid[40]; /* object name */ | 74 | char r_oid[40]; /* object name */ |
73 | int r_oid_len; | 75 | int r_oid_len; |
@@ -80,6 +82,11 @@ struct ceph_osd_request { | |||
80 | struct page **r_pages; /* pages for data payload */ | 82 | struct page **r_pages; /* pages for data payload */ |
81 | int r_pages_from_pool; | 83 | int r_pages_from_pool; |
82 | int r_own_pages; /* if true, i own page list */ | 84 | int r_own_pages; /* if true, i own page list */ |
85 | #ifdef CONFIG_BLOCK | ||
86 | struct bio *r_bio; /* instead of pages */ | ||
87 | #endif | ||
88 | |||
89 | struct ceph_pagelist *r_trail; /* trailing part of the data */ | ||
83 | }; | 90 | }; |
84 | 91 | ||
85 | struct ceph_osd_client { | 92 | struct ceph_osd_client { |
@@ -110,6 +117,42 @@ struct ceph_osd_client { | |||
110 | struct ceph_msgpool msgpool_op_reply; | 117 | struct ceph_msgpool msgpool_op_reply; |
111 | }; | 118 | }; |
112 | 119 | ||
120 | struct ceph_osd_req_op { | ||
121 | u16 op; /* CEPH_OSD_OP_* */ | ||
122 | u32 flags; /* CEPH_OSD_FLAG_* */ | ||
123 | union { | ||
124 | struct { | ||
125 | u64 offset, length; | ||
126 | u64 truncate_size; | ||
127 | u32 truncate_seq; | ||
128 | } extent; | ||
129 | struct { | ||
130 | const char *name; | ||
131 | u32 name_len; | ||
132 | const char *val; | ||
133 | u32 value_len; | ||
134 | __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ | ||
135 | __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ | ||
136 | } xattr; | ||
137 | struct { | ||
138 | const char *class_name; | ||
139 | __u8 class_len; | ||
140 | const char *method_name; | ||
141 | __u8 method_len; | ||
142 | __u8 argc; | ||
143 | const char *indata; | ||
144 | u32 indata_len; | ||
145 | } cls; | ||
146 | struct { | ||
147 | u64 cookie, count; | ||
148 | } pgls; | ||
149 | struct { | ||
150 | u64 snapid; | ||
151 | } snap; | ||
152 | }; | ||
153 | u32 payload_len; | ||
154 | }; | ||
155 | |||
113 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, | 156 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, |
114 | struct ceph_client *client); | 157 | struct ceph_client *client); |
115 | extern void ceph_osdc_stop(struct ceph_osd_client *osdc); | 158 | extern void ceph_osdc_stop(struct ceph_osd_client *osdc); |
@@ -119,6 +162,30 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, | |||
119 | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, | 162 | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, |
120 | struct ceph_msg *msg); | 163 | struct ceph_msg *msg); |
121 | 164 | ||
165 | extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc, | ||
166 | struct ceph_file_layout *layout, | ||
167 | u64 snapid, | ||
168 | u64 off, u64 *plen, u64 *bno, | ||
169 | struct ceph_osd_request *req, | ||
170 | struct ceph_osd_req_op *op); | ||
171 | |||
172 | extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | ||
173 | int flags, | ||
174 | struct ceph_snap_context *snapc, | ||
175 | struct ceph_osd_req_op *ops, | ||
176 | bool use_mempool, | ||
177 | gfp_t gfp_flags, | ||
178 | struct page **pages, | ||
179 | struct bio *bio); | ||
180 | |||
181 | extern void ceph_osdc_build_request(struct ceph_osd_request *req, | ||
182 | u64 off, u64 *plen, | ||
183 | struct ceph_osd_req_op *src_ops, | ||
184 | struct ceph_snap_context *snapc, | ||
185 | struct timespec *mtime, | ||
186 | const char *oid, | ||
187 | int oid_len); | ||
188 | |||
122 | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, | 189 | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, |
123 | struct ceph_file_layout *layout, | 190 | struct ceph_file_layout *layout, |
124 | struct ceph_vino vino, | 191 | struct ceph_vino vino, |
diff --git a/fs/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 970b547e510d..ba4c205cbb01 100644 --- a/fs/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h | |||
@@ -4,7 +4,7 @@ | |||
4 | #include <linux/rbtree.h> | 4 | #include <linux/rbtree.h> |
5 | #include "types.h" | 5 | #include "types.h" |
6 | #include "ceph_fs.h" | 6 | #include "ceph_fs.h" |
7 | #include "crush/crush.h" | 7 | #include <linux/crush/crush.h> |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * The osd map describes the current membership of the osd cluster and | 10 | * The osd map describes the current membership of the osd cluster and |
@@ -125,4 +125,6 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
125 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, | 125 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, |
126 | struct ceph_pg pgid); | 126 | struct ceph_pg pgid); |
127 | 127 | ||
128 | extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); | ||
129 | |||
128 | #endif | 130 | #endif |
diff --git a/fs/ceph/pagelist.h b/include/linux/ceph/pagelist.h index e8a4187e1087..9660d6b0a35d 100644 --- a/fs/ceph/pagelist.h +++ b/include/linux/ceph/pagelist.h | |||
@@ -8,6 +8,14 @@ struct ceph_pagelist { | |||
8 | void *mapped_tail; | 8 | void *mapped_tail; |
9 | size_t length; | 9 | size_t length; |
10 | size_t room; | 10 | size_t room; |
11 | struct list_head free_list; | ||
12 | size_t num_pages_free; | ||
13 | }; | ||
14 | |||
15 | struct ceph_pagelist_cursor { | ||
16 | struct ceph_pagelist *pl; /* pagelist, for error checking */ | ||
17 | struct list_head *page_lru; /* page in list */ | ||
18 | size_t room; /* room remaining to reset to */ | ||
11 | }; | 19 | }; |
12 | 20 | ||
13 | static inline void ceph_pagelist_init(struct ceph_pagelist *pl) | 21 | static inline void ceph_pagelist_init(struct ceph_pagelist *pl) |
@@ -16,10 +24,23 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl) | |||
16 | pl->mapped_tail = NULL; | 24 | pl->mapped_tail = NULL; |
17 | pl->length = 0; | 25 | pl->length = 0; |
18 | pl->room = 0; | 26 | pl->room = 0; |
27 | INIT_LIST_HEAD(&pl->free_list); | ||
28 | pl->num_pages_free = 0; | ||
19 | } | 29 | } |
30 | |||
20 | extern int ceph_pagelist_release(struct ceph_pagelist *pl); | 31 | extern int ceph_pagelist_release(struct ceph_pagelist *pl); |
21 | 32 | ||
22 | extern int ceph_pagelist_append(struct ceph_pagelist *pl, void *d, size_t l); | 33 | extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l); |
34 | |||
35 | extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space); | ||
36 | |||
37 | extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl); | ||
38 | |||
39 | extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, | ||
40 | struct ceph_pagelist_cursor *c); | ||
41 | |||
42 | extern int ceph_pagelist_truncate(struct ceph_pagelist *pl, | ||
43 | struct ceph_pagelist_cursor *c); | ||
23 | 44 | ||
24 | static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) | 45 | static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) |
25 | { | 46 | { |
diff --git a/fs/ceph/rados.h b/include/linux/ceph/rados.h index 6d5247f2e81b..6d5247f2e81b 100644 --- a/fs/ceph/rados.h +++ b/include/linux/ceph/rados.h | |||
diff --git a/fs/ceph/types.h b/include/linux/ceph/types.h index 28b35a005ec2..28b35a005ec2 100644 --- a/fs/ceph/types.h +++ b/include/linux/ceph/types.h | |||
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0c991023ee47..709dfb901d11 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -75,7 +75,7 @@ struct cgroup_subsys_state { | |||
75 | 75 | ||
76 | unsigned long flags; | 76 | unsigned long flags; |
77 | /* ID for this css, if possible */ | 77 | /* ID for this css, if possible */ |
78 | struct css_id *id; | 78 | struct css_id __rcu *id; |
79 | }; | 79 | }; |
80 | 80 | ||
81 | /* bits in struct cgroup_subsys_state flags field */ | 81 | /* bits in struct cgroup_subsys_state flags field */ |
@@ -205,7 +205,7 @@ struct cgroup { | |||
205 | struct list_head children; /* my children */ | 205 | struct list_head children; /* my children */ |
206 | 206 | ||
207 | struct cgroup *parent; /* my parent */ | 207 | struct cgroup *parent; /* my parent */ |
208 | struct dentry *dentry; /* cgroup fs entry, RCU protected */ | 208 | struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ |
209 | 209 | ||
210 | /* Private pointers for each registered subsystem */ | 210 | /* Private pointers for each registered subsystem */ |
211 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; | 211 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; |
diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c1a62c56a660..320d6c94ff84 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h | |||
@@ -16,7 +16,11 @@ | |||
16 | # define __release(x) __context__(x,-1) | 16 | # define __release(x) __context__(x,-1) |
17 | # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) | 17 | # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) |
18 | # define __percpu __attribute__((noderef, address_space(3))) | 18 | # define __percpu __attribute__((noderef, address_space(3))) |
19 | #ifdef CONFIG_SPARSE_RCU_POINTER | ||
20 | # define __rcu __attribute__((noderef, address_space(4))) | ||
21 | #else | ||
19 | # define __rcu | 22 | # define __rcu |
23 | #endif | ||
20 | extern void __chk_user_ptr(const volatile void __user *); | 24 | extern void __chk_user_ptr(const volatile void __user *); |
21 | extern void __chk_io_ptr(const volatile void __iomem *); | 25 | extern void __chk_io_ptr(const volatile void __iomem *); |
22 | #else | 26 | #else |
diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 8ba66a9d9022..ba4b85a6d9b8 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h | |||
@@ -9,37 +9,7 @@ | |||
9 | * These are the only things you should do on a core-file: use only these | 9 | * These are the only things you should do on a core-file: use only these |
10 | * functions to write out all the necessary info. | 10 | * functions to write out all the necessary info. |
11 | */ | 11 | */ |
12 | static inline int dump_write(struct file *file, const void *addr, int nr) | 12 | extern int dump_write(struct file *file, const void *addr, int nr); |
13 | { | 13 | extern int dump_seek(struct file *file, loff_t off); |
14 | return file->f_op->write(file, addr, nr, &file->f_pos) == nr; | ||
15 | } | ||
16 | |||
17 | static inline int dump_seek(struct file *file, loff_t off) | ||
18 | { | ||
19 | int ret = 1; | ||
20 | |||
21 | if (file->f_op->llseek && file->f_op->llseek != no_llseek) { | ||
22 | if (file->f_op->llseek(file, off, SEEK_CUR) < 0) | ||
23 | return 0; | ||
24 | } else { | ||
25 | char *buf = (char *)get_zeroed_page(GFP_KERNEL); | ||
26 | |||
27 | if (!buf) | ||
28 | return 0; | ||
29 | while (off > 0) { | ||
30 | unsigned long n = off; | ||
31 | |||
32 | if (n > PAGE_SIZE) | ||
33 | n = PAGE_SIZE; | ||
34 | if (!dump_write(file, buf, n)) { | ||
35 | ret = 0; | ||
36 | break; | ||
37 | } | ||
38 | off -= n; | ||
39 | } | ||
40 | free_page((unsigned long)buf); | ||
41 | } | ||
42 | return ret; | ||
43 | } | ||
44 | 14 | ||
45 | #endif /* _LINUX_COREDUMP_H */ | 15 | #endif /* _LINUX_COREDUMP_H */ |
diff --git a/include/linux/cred.h b/include/linux/cred.h index 4d2c39573f36..4aaeab376446 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h | |||
@@ -84,7 +84,7 @@ struct thread_group_cred { | |||
84 | atomic_t usage; | 84 | atomic_t usage; |
85 | pid_t tgid; /* thread group process ID */ | 85 | pid_t tgid; /* thread group process ID */ |
86 | spinlock_t lock; | 86 | spinlock_t lock; |
87 | struct key *session_keyring; /* keyring inherited over fork */ | 87 | struct key __rcu *session_keyring; /* keyring inherited over fork */ |
88 | struct key *process_keyring; /* keyring private to this process */ | 88 | struct key *process_keyring; /* keyring private to this process */ |
89 | struct rcu_head rcu; /* RCU deletion hook */ | 89 | struct rcu_head rcu; /* RCU deletion hook */ |
90 | }; | 90 | }; |
diff --git a/fs/ceph/crush/crush.h b/include/linux/crush/crush.h index 97e435b191f4..97e435b191f4 100644 --- a/fs/ceph/crush/crush.h +++ b/include/linux/crush/crush.h | |||
diff --git a/fs/ceph/crush/hash.h b/include/linux/crush/hash.h index 91e884230d5d..91e884230d5d 100644 --- a/fs/ceph/crush/hash.h +++ b/include/linux/crush/hash.h | |||
diff --git a/fs/ceph/crush/mapper.h b/include/linux/crush/mapper.h index c46b99c18bb0..c46b99c18bb0 100644 --- a/fs/ceph/crush/mapper.h +++ b/include/linux/crush/mapper.h | |||
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 29b3ce3f2a1d..2833452ea01c 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h | |||
@@ -49,7 +49,6 @@ struct task_struct; | |||
49 | 49 | ||
50 | #ifdef CONFIG_LOCKDEP | 50 | #ifdef CONFIG_LOCKDEP |
51 | extern void debug_show_all_locks(void); | 51 | extern void debug_show_all_locks(void); |
52 | extern void __debug_show_held_locks(struct task_struct *task); | ||
53 | extern void debug_show_held_locks(struct task_struct *task); | 52 | extern void debug_show_held_locks(struct task_struct *task); |
54 | extern void debug_check_no_locks_freed(const void *from, unsigned long len); | 53 | extern void debug_check_no_locks_freed(const void *from, unsigned long len); |
55 | extern void debug_check_no_locks_held(struct task_struct *task); | 54 | extern void debug_check_no_locks_held(struct task_struct *task); |
@@ -58,10 +57,6 @@ static inline void debug_show_all_locks(void) | |||
58 | { | 57 | { |
59 | } | 58 | } |
60 | 59 | ||
61 | static inline void __debug_show_held_locks(struct task_struct *task) | ||
62 | { | ||
63 | } | ||
64 | |||
65 | static inline void debug_show_held_locks(struct task_struct *task) | 60 | static inline void debug_show_held_locks(struct task_struct *task) |
66 | { | 61 | { |
67 | } | 62 | } |
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 52c0da4bdd18..bef3cda44c4c 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _DYNAMIC_DEBUG_H | 1 | #ifndef _DYNAMIC_DEBUG_H |
2 | #define _DYNAMIC_DEBUG_H | 2 | #define _DYNAMIC_DEBUG_H |
3 | 3 | ||
4 | #include <linux/jump_label.h> | ||
5 | |||
4 | /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which | 6 | /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which |
5 | * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They | 7 | * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They |
6 | * use independent hash functions, to reduce the chance of false positives. | 8 | * use independent hash functions, to reduce the chance of false positives. |
@@ -22,8 +24,6 @@ struct _ddebug { | |||
22 | const char *function; | 24 | const char *function; |
23 | const char *filename; | 25 | const char *filename; |
24 | const char *format; | 26 | const char *format; |
25 | char primary_hash; | ||
26 | char secondary_hash; | ||
27 | unsigned int lineno:24; | 27 | unsigned int lineno:24; |
28 | /* | 28 | /* |
29 | * The flags field controls the behaviour at the callsite. | 29 | * The flags field controls the behaviour at the callsite. |
@@ -33,6 +33,7 @@ struct _ddebug { | |||
33 | #define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ | 33 | #define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ |
34 | #define _DPRINTK_FLAGS_DEFAULT 0 | 34 | #define _DPRINTK_FLAGS_DEFAULT 0 |
35 | unsigned int flags:8; | 35 | unsigned int flags:8; |
36 | char enabled; | ||
36 | } __attribute__((aligned(8))); | 37 | } __attribute__((aligned(8))); |
37 | 38 | ||
38 | 39 | ||
@@ -42,33 +43,35 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, | |||
42 | #if defined(CONFIG_DYNAMIC_DEBUG) | 43 | #if defined(CONFIG_DYNAMIC_DEBUG) |
43 | extern int ddebug_remove_module(const char *mod_name); | 44 | extern int ddebug_remove_module(const char *mod_name); |
44 | 45 | ||
45 | #define __dynamic_dbg_enabled(dd) ({ \ | ||
46 | int __ret = 0; \ | ||
47 | if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) && \ | ||
48 | (dynamic_debug_enabled2 & (1LL << DEBUG_HASH2)))) \ | ||
49 | if (unlikely(dd.flags)) \ | ||
50 | __ret = 1; \ | ||
51 | __ret; }) | ||
52 | |||
53 | #define dynamic_pr_debug(fmt, ...) do { \ | 46 | #define dynamic_pr_debug(fmt, ...) do { \ |
47 | __label__ do_printk; \ | ||
48 | __label__ out; \ | ||
54 | static struct _ddebug descriptor \ | 49 | static struct _ddebug descriptor \ |
55 | __used \ | 50 | __used \ |
56 | __attribute__((section("__verbose"), aligned(8))) = \ | 51 | __attribute__((section("__verbose"), aligned(8))) = \ |
57 | { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ | 52 | { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ |
58 | DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ | 53 | _DPRINTK_FLAGS_DEFAULT }; \ |
59 | if (__dynamic_dbg_enabled(descriptor)) \ | 54 | JUMP_LABEL(&descriptor.enabled, do_printk); \ |
60 | printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ | 55 | goto out; \ |
56 | do_printk: \ | ||
57 | printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ | ||
58 | out: ; \ | ||
61 | } while (0) | 59 | } while (0) |
62 | 60 | ||
63 | 61 | ||
64 | #define dynamic_dev_dbg(dev, fmt, ...) do { \ | 62 | #define dynamic_dev_dbg(dev, fmt, ...) do { \ |
63 | __label__ do_printk; \ | ||
64 | __label__ out; \ | ||
65 | static struct _ddebug descriptor \ | 65 | static struct _ddebug descriptor \ |
66 | __used \ | 66 | __used \ |
67 | __attribute__((section("__verbose"), aligned(8))) = \ | 67 | __attribute__((section("__verbose"), aligned(8))) = \ |
68 | { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ | 68 | { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ |
69 | DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ | 69 | _DPRINTK_FLAGS_DEFAULT }; \ |
70 | if (__dynamic_dbg_enabled(descriptor)) \ | 70 | JUMP_LABEL(&descriptor.enabled, do_printk); \ |
71 | dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ | 71 | goto out; \ |
72 | do_printk: \ | ||
73 | dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ | ||
74 | out: ; \ | ||
72 | } while (0) | 75 | } while (0) |
73 | 76 | ||
74 | #else | 77 | #else |
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f59ed297b661..133c0ba25e30 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h | |||
@@ -31,7 +31,7 @@ struct embedded_fd_set { | |||
31 | 31 | ||
32 | struct fdtable { | 32 | struct fdtable { |
33 | unsigned int max_fds; | 33 | unsigned int max_fds; |
34 | struct file ** fd; /* current fd array */ | 34 | struct file __rcu **fd; /* current fd array */ |
35 | fd_set *close_on_exec; | 35 | fd_set *close_on_exec; |
36 | fd_set *open_fds; | 36 | fd_set *open_fds; |
37 | struct rcu_head rcu; | 37 | struct rcu_head rcu; |
@@ -46,7 +46,7 @@ struct files_struct { | |||
46 | * read mostly part | 46 | * read mostly part |
47 | */ | 47 | */ |
48 | atomic_t count; | 48 | atomic_t count; |
49 | struct fdtable *fdt; | 49 | struct fdtable __rcu *fdt; |
50 | struct fdtable fdtab; | 50 | struct fdtable fdtab; |
51 | /* | 51 | /* |
52 | * written part on a separate cache line in SMP | 52 | * written part on a separate cache line in SMP |
@@ -55,7 +55,7 @@ struct files_struct { | |||
55 | int next_fd; | 55 | int next_fd; |
56 | struct embedded_fd_set close_on_exec_init; | 56 | struct embedded_fd_set close_on_exec_init; |
57 | struct embedded_fd_set open_fds_init; | 57 | struct embedded_fd_set open_fds_init; |
58 | struct file * fd_array[NR_OPEN_DEFAULT]; | 58 | struct file __rcu * fd_array[NR_OPEN_DEFAULT]; |
59 | }; | 59 | }; |
60 | 60 | ||
61 | #define rcu_dereference_check_fdtable(files, fdtfd) \ | 61 | #define rcu_dereference_check_fdtable(files, fdtfd) \ |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 63d069bd80b7..3168dcfb94f2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1384,7 +1384,7 @@ struct super_block { | |||
1384 | * Saved mount options for lazy filesystems using | 1384 | * Saved mount options for lazy filesystems using |
1385 | * generic_show_options() | 1385 | * generic_show_options() |
1386 | */ | 1386 | */ |
1387 | char *s_options; | 1387 | char __rcu *s_options; |
1388 | }; | 1388 | }; |
1389 | 1389 | ||
1390 | extern struct timespec current_fs_time(struct super_block *sb); | 1390 | extern struct timespec current_fs_time(struct super_block *sb); |
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 02b8b24f8f51..8beabb958f61 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
@@ -191,8 +191,8 @@ struct ftrace_event_call { | |||
191 | unsigned int flags; | 191 | unsigned int flags; |
192 | 192 | ||
193 | #ifdef CONFIG_PERF_EVENTS | 193 | #ifdef CONFIG_PERF_EVENTS |
194 | int perf_refcount; | 194 | int perf_refcount; |
195 | struct hlist_head *perf_events; | 195 | struct hlist_head __percpu *perf_events; |
196 | #endif | 196 | #endif |
197 | }; | 197 | }; |
198 | 198 | ||
@@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); | |||
252 | 252 | ||
253 | extern int perf_trace_init(struct perf_event *event); | 253 | extern int perf_trace_init(struct perf_event *event); |
254 | extern void perf_trace_destroy(struct perf_event *event); | 254 | extern void perf_trace_destroy(struct perf_event *event); |
255 | extern int perf_trace_enable(struct perf_event *event); | 255 | extern int perf_trace_add(struct perf_event *event, int flags); |
256 | extern void perf_trace_disable(struct perf_event *event); | 256 | extern void perf_trace_del(struct perf_event *event, int flags); |
257 | extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, | 257 | extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, |
258 | char *filter_str); | 258 | char *filter_str); |
259 | extern void ftrace_profile_free_filter(struct perf_event *event); | 259 | extern void ftrace_profile_free_filter(struct perf_event *event); |
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5f2f4c4d8fb0..af3f06b41dc1 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h | |||
@@ -129,8 +129,8 @@ struct blk_scsi_cmd_filter { | |||
129 | struct disk_part_tbl { | 129 | struct disk_part_tbl { |
130 | struct rcu_head rcu_head; | 130 | struct rcu_head rcu_head; |
131 | int len; | 131 | int len; |
132 | struct hd_struct *last_lookup; | 132 | struct hd_struct __rcu *last_lookup; |
133 | struct hd_struct *part[]; | 133 | struct hd_struct __rcu *part[]; |
134 | }; | 134 | }; |
135 | 135 | ||
136 | struct gendisk { | 136 | struct gendisk { |
@@ -149,7 +149,7 @@ struct gendisk { | |||
149 | * non-critical accesses use RCU. Always access through | 149 | * non-critical accesses use RCU. Always access through |
150 | * helpers. | 150 | * helpers. |
151 | */ | 151 | */ |
152 | struct disk_part_tbl *part_tbl; | 152 | struct disk_part_tbl __rcu *part_tbl; |
153 | struct hd_struct part0; | 153 | struct hd_struct part0; |
154 | 154 | ||
155 | const struct block_device_operations *fops; | 155 | const struct block_device_operations *fops; |
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index ff43e9268449..96c323ac44df 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h | |||
@@ -146,7 +146,7 @@ extern void account_system_vtime(struct task_struct *tsk); | |||
146 | #endif | 146 | #endif |
147 | 147 | ||
148 | #if defined(CONFIG_NO_HZ) | 148 | #if defined(CONFIG_NO_HZ) |
149 | #if defined(CONFIG_TINY_RCU) | 149 | #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) |
150 | extern void rcu_enter_nohz(void); | 150 | extern void rcu_enter_nohz(void); |
151 | extern void rcu_exit_nohz(void); | 151 | extern void rcu_exit_nohz(void); |
152 | 152 | ||
diff --git a/include/linux/idr.h b/include/linux/idr.h index e968db71e33a..cdb715e58e3e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h | |||
@@ -50,14 +50,14 @@ | |||
50 | 50 | ||
51 | struct idr_layer { | 51 | struct idr_layer { |
52 | unsigned long bitmap; /* A zero bit means "space here" */ | 52 | unsigned long bitmap; /* A zero bit means "space here" */ |
53 | struct idr_layer *ary[1<<IDR_BITS]; | 53 | struct idr_layer __rcu *ary[1<<IDR_BITS]; |
54 | int count; /* When zero, we can release it */ | 54 | int count; /* When zero, we can release it */ |
55 | int layer; /* distance from leaf */ | 55 | int layer; /* distance from leaf */ |
56 | struct rcu_head rcu_head; | 56 | struct rcu_head rcu_head; |
57 | }; | 57 | }; |
58 | 58 | ||
59 | struct idr { | 59 | struct idr { |
60 | struct idr_layer *top; | 60 | struct idr_layer __rcu *top; |
61 | struct idr_layer *id_free; | 61 | struct idr_layer *id_free; |
62 | int layers; /* only valid without concurrent changes */ | 62 | int layers; /* only valid without concurrent changes */ |
63 | int id_free_cnt; | 63 | int id_free_cnt; |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1f43fa56f600..2fea6c8ef6ba 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -82,11 +82,17 @@ extern struct group_info init_groups; | |||
82 | # define CAP_INIT_BSET CAP_FULL_SET | 82 | # define CAP_INIT_BSET CAP_FULL_SET |
83 | 83 | ||
84 | #ifdef CONFIG_TREE_PREEMPT_RCU | 84 | #ifdef CONFIG_TREE_PREEMPT_RCU |
85 | #define INIT_TASK_RCU_TREE_PREEMPT() \ | ||
86 | .rcu_blocked_node = NULL, | ||
87 | #else | ||
88 | #define INIT_TASK_RCU_TREE_PREEMPT(tsk) | ||
89 | #endif | ||
90 | #ifdef CONFIG_PREEMPT_RCU | ||
85 | #define INIT_TASK_RCU_PREEMPT(tsk) \ | 91 | #define INIT_TASK_RCU_PREEMPT(tsk) \ |
86 | .rcu_read_lock_nesting = 0, \ | 92 | .rcu_read_lock_nesting = 0, \ |
87 | .rcu_read_unlock_special = 0, \ | 93 | .rcu_read_unlock_special = 0, \ |
88 | .rcu_blocked_node = NULL, \ | 94 | .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ |
89 | .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), | 95 | INIT_TASK_RCU_TREE_PREEMPT() |
90 | #else | 96 | #else |
91 | #define INIT_TASK_RCU_PREEMPT(tsk) | 97 | #define INIT_TASK_RCU_PREEMPT(tsk) |
92 | #endif | 98 | #endif |
@@ -137,8 +143,8 @@ extern struct cred init_cred; | |||
137 | .children = LIST_HEAD_INIT(tsk.children), \ | 143 | .children = LIST_HEAD_INIT(tsk.children), \ |
138 | .sibling = LIST_HEAD_INIT(tsk.sibling), \ | 144 | .sibling = LIST_HEAD_INIT(tsk.sibling), \ |
139 | .group_leader = &tsk, \ | 145 | .group_leader = &tsk, \ |
140 | .real_cred = &init_cred, \ | 146 | RCU_INIT_POINTER(.real_cred, &init_cred), \ |
141 | .cred = &init_cred, \ | 147 | RCU_INIT_POINTER(.cred, &init_cred), \ |
142 | .cred_guard_mutex = \ | 148 | .cred_guard_mutex = \ |
143 | __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ | 149 | __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ |
144 | .comm = "swapper", \ | 150 | .comm = "swapper", \ |
diff --git a/include/linux/input.h b/include/linux/input.h index 896a92227bc4..d6ae1761be97 100644 --- a/include/linux/input.h +++ b/include/linux/input.h | |||
@@ -1196,7 +1196,7 @@ struct input_dev { | |||
1196 | int (*flush)(struct input_dev *dev, struct file *file); | 1196 | int (*flush)(struct input_dev *dev, struct file *file); |
1197 | int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); | 1197 | int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); |
1198 | 1198 | ||
1199 | struct input_handle *grab; | 1199 | struct input_handle __rcu *grab; |
1200 | 1200 | ||
1201 | spinlock_t event_lock; | 1201 | spinlock_t event_lock; |
1202 | struct mutex mutex; | 1202 | struct mutex mutex; |
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a0384a4d1e6f..531495db1708 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/atomic.h> | 18 | #include <asm/atomic.h> |
19 | #include <asm/ptrace.h> | 19 | #include <asm/ptrace.h> |
20 | #include <asm/system.h> | 20 | #include <asm/system.h> |
21 | #include <trace/events/irq.h> | ||
21 | 22 | ||
22 | /* | 23 | /* |
23 | * These correspond to the IORESOURCE_IRQ_* defines in | 24 | * These correspond to the IORESOURCE_IRQ_* defines in |
@@ -407,7 +408,12 @@ asmlinkage void do_softirq(void); | |||
407 | asmlinkage void __do_softirq(void); | 408 | asmlinkage void __do_softirq(void); |
408 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); | 409 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); |
409 | extern void softirq_init(void); | 410 | extern void softirq_init(void); |
410 | #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) | 411 | static inline void __raise_softirq_irqoff(unsigned int nr) |
412 | { | ||
413 | trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL); | ||
414 | or_softirq_pending(1UL << nr); | ||
415 | } | ||
416 | |||
411 | extern void raise_softirq_irqoff(unsigned int nr); | 417 | extern void raise_softirq_irqoff(unsigned int nr); |
412 | extern void raise_softirq(unsigned int nr); | 418 | extern void raise_softirq(unsigned int nr); |
413 | extern void wakeup_softirqd(void); | 419 | extern void wakeup_softirqd(void); |
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 64d529133031..3e70b21884a9 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h | |||
@@ -53,7 +53,7 @@ struct io_context { | |||
53 | 53 | ||
54 | struct radix_tree_root radix_root; | 54 | struct radix_tree_root radix_root; |
55 | struct hlist_head cic_list; | 55 | struct hlist_head cic_list; |
56 | void *ioc_data; | 56 | void __rcu *ioc_data; |
57 | }; | 57 | }; |
58 | 58 | ||
59 | static inline struct io_context *ioc_task_link(struct io_context *ioc) | 59 | static inline struct io_context *ioc_task_link(struct io_context *ioc) |
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h new file mode 100644 index 000000000000..4fa09d4d0b71 --- /dev/null +++ b/include/linux/irq_work.h | |||
@@ -0,0 +1,20 @@ | |||
1 | #ifndef _LINUX_IRQ_WORK_H | ||
2 | #define _LINUX_IRQ_WORK_H | ||
3 | |||
4 | struct irq_work { | ||
5 | struct irq_work *next; | ||
6 | void (*func)(struct irq_work *); | ||
7 | }; | ||
8 | |||
9 | static inline | ||
10 | void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *)) | ||
11 | { | ||
12 | entry->next = NULL; | ||
13 | entry->func = func; | ||
14 | } | ||
15 | |||
16 | bool irq_work_queue(struct irq_work *entry); | ||
17 | void irq_work_run(void); | ||
18 | void irq_work_sync(struct irq_work *entry); | ||
19 | |||
20 | #endif /* _LINUX_IRQ_WORK_H */ | ||
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h new file mode 100644 index 000000000000..b67cb180e6e9 --- /dev/null +++ b/include/linux/jump_label.h | |||
@@ -0,0 +1,74 @@ | |||
1 | #ifndef _LINUX_JUMP_LABEL_H | ||
2 | #define _LINUX_JUMP_LABEL_H | ||
3 | |||
4 | #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL) | ||
5 | # include <asm/jump_label.h> | ||
6 | # define HAVE_JUMP_LABEL | ||
7 | #endif | ||
8 | |||
9 | enum jump_label_type { | ||
10 | JUMP_LABEL_ENABLE, | ||
11 | JUMP_LABEL_DISABLE | ||
12 | }; | ||
13 | |||
14 | struct module; | ||
15 | |||
16 | #ifdef HAVE_JUMP_LABEL | ||
17 | |||
18 | extern struct jump_entry __start___jump_table[]; | ||
19 | extern struct jump_entry __stop___jump_table[]; | ||
20 | |||
21 | extern void arch_jump_label_transform(struct jump_entry *entry, | ||
22 | enum jump_label_type type); | ||
23 | extern void arch_jump_label_text_poke_early(jump_label_t addr); | ||
24 | extern void jump_label_update(unsigned long key, enum jump_label_type type); | ||
25 | extern void jump_label_apply_nops(struct module *mod); | ||
26 | extern int jump_label_text_reserved(void *start, void *end); | ||
27 | |||
28 | #define jump_label_enable(key) \ | ||
29 | jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); | ||
30 | |||
31 | #define jump_label_disable(key) \ | ||
32 | jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE); | ||
33 | |||
34 | #else | ||
35 | |||
36 | #define JUMP_LABEL(key, label) \ | ||
37 | do { \ | ||
38 | if (unlikely(*key)) \ | ||
39 | goto label; \ | ||
40 | } while (0) | ||
41 | |||
42 | #define jump_label_enable(cond_var) \ | ||
43 | do { \ | ||
44 | *(cond_var) = 1; \ | ||
45 | } while (0) | ||
46 | |||
47 | #define jump_label_disable(cond_var) \ | ||
48 | do { \ | ||
49 | *(cond_var) = 0; \ | ||
50 | } while (0) | ||
51 | |||
52 | static inline int jump_label_apply_nops(struct module *mod) | ||
53 | { | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static inline int jump_label_text_reserved(void *start, void *end) | ||
58 | { | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | #endif | ||
63 | |||
64 | #define COND_STMT(key, stmt) \ | ||
65 | do { \ | ||
66 | __label__ jl_enabled; \ | ||
67 | JUMP_LABEL(key, jl_enabled); \ | ||
68 | if (0) { \ | ||
69 | jl_enabled: \ | ||
70 | stmt; \ | ||
71 | } \ | ||
72 | } while (0) | ||
73 | |||
74 | #endif | ||
diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h new file mode 100644 index 000000000000..e5d012ad92c6 --- /dev/null +++ b/include/linux/jump_label_ref.h | |||
@@ -0,0 +1,44 @@ | |||
1 | #ifndef _LINUX_JUMP_LABEL_REF_H | ||
2 | #define _LINUX_JUMP_LABEL_REF_H | ||
3 | |||
4 | #include <linux/jump_label.h> | ||
5 | #include <asm/atomic.h> | ||
6 | |||
7 | #ifdef HAVE_JUMP_LABEL | ||
8 | |||
9 | static inline void jump_label_inc(atomic_t *key) | ||
10 | { | ||
11 | if (atomic_add_return(1, key) == 1) | ||
12 | jump_label_enable(key); | ||
13 | } | ||
14 | |||
15 | static inline void jump_label_dec(atomic_t *key) | ||
16 | { | ||
17 | if (atomic_dec_and_test(key)) | ||
18 | jump_label_disable(key); | ||
19 | } | ||
20 | |||
21 | #else /* !HAVE_JUMP_LABEL */ | ||
22 | |||
23 | static inline void jump_label_inc(atomic_t *key) | ||
24 | { | ||
25 | atomic_inc(key); | ||
26 | } | ||
27 | |||
28 | static inline void jump_label_dec(atomic_t *key) | ||
29 | { | ||
30 | atomic_dec(key); | ||
31 | } | ||
32 | |||
33 | #undef JUMP_LABEL | ||
34 | #define JUMP_LABEL(key, label) \ | ||
35 | do { \ | ||
36 | if (unlikely(__builtin_choose_expr( \ | ||
37 | __builtin_types_compatible_p(typeof(key), atomic_t *), \ | ||
38 | atomic_read((atomic_t *)(key)), *(key)))) \ | ||
39 | goto label; \ | ||
40 | } while (0) | ||
41 | |||
42 | #endif /* HAVE_JUMP_LABEL */ | ||
43 | |||
44 | #endif /* _LINUX_JUMP_LABEL_REF_H */ | ||
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b0a35e6bc69..1759ba5adce8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -58,7 +58,18 @@ extern const char linux_proc_banner[]; | |||
58 | 58 | ||
59 | #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) | 59 | #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) |
60 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) | 60 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) |
61 | #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) | 61 | #define roundup(x, y) ( \ |
62 | { \ | ||
63 | typeof(y) __y = y; \ | ||
64 | (((x) + (__y - 1)) / __y) * __y; \ | ||
65 | } \ | ||
66 | ) | ||
67 | #define rounddown(x, y) ( \ | ||
68 | { \ | ||
69 | typeof(x) __x = (x); \ | ||
70 | __x - (__x % (y)); \ | ||
71 | } \ | ||
72 | ) | ||
62 | #define DIV_ROUND_CLOSEST(x, divisor)( \ | 73 | #define DIV_ROUND_CLOSEST(x, divisor)( \ |
63 | { \ | 74 | { \ |
64 | typeof(divisor) __divisor = divisor; \ | 75 | typeof(divisor) __divisor = divisor; \ |
diff --git a/include/linux/key.h b/include/linux/key.h index cd50dfa1d4c2..3db0adce1fda 100644 --- a/include/linux/key.h +++ b/include/linux/key.h | |||
@@ -178,8 +178,9 @@ struct key { | |||
178 | */ | 178 | */ |
179 | union { | 179 | union { |
180 | unsigned long value; | 180 | unsigned long value; |
181 | void __rcu *rcudata; | ||
181 | void *data; | 182 | void *data; |
182 | struct keyring_list *subscriptions; | 183 | struct keyring_list __rcu *subscriptions; |
183 | } payload; | 184 | } payload; |
184 | }; | 185 | }; |
185 | 186 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c13cc48697aa..ac740b26eb10 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -205,7 +205,7 @@ struct kvm { | |||
205 | 205 | ||
206 | struct mutex irq_lock; | 206 | struct mutex irq_lock; |
207 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 207 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
208 | struct kvm_irq_routing_table *irq_routing; | 208 | struct kvm_irq_routing_table __rcu *irq_routing; |
209 | struct hlist_head mask_notifier_list; | 209 | struct hlist_head mask_notifier_list; |
210 | struct hlist_head irq_ack_notifier_list; | 210 | struct hlist_head irq_ack_notifier_list; |
211 | #endif | 211 | #endif |
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 06aed8305bf3..2186a64ee4b5 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h | |||
@@ -32,6 +32,17 @@ extern int lock_stat; | |||
32 | #define MAX_LOCKDEP_SUBCLASSES 8UL | 32 | #define MAX_LOCKDEP_SUBCLASSES 8UL |
33 | 33 | ||
34 | /* | 34 | /* |
35 | * NR_LOCKDEP_CACHING_CLASSES ... Number of classes | ||
36 | * cached in the instance of lockdep_map | ||
37 | * | ||
38 | * Currently main class (subclass == 0) and signle depth subclass | ||
39 | * are cached in lockdep_map. This optimization is mainly targeting | ||
40 | * on rq->lock. double_rq_lock() acquires this highly competitive with | ||
41 | * single depth. | ||
42 | */ | ||
43 | #define NR_LOCKDEP_CACHING_CLASSES 2 | ||
44 | |||
45 | /* | ||
35 | * Lock-classes are keyed via unique addresses, by embedding the | 46 | * Lock-classes are keyed via unique addresses, by embedding the |
36 | * lockclass-key into the kernel (or module) .data section. (For | 47 | * lockclass-key into the kernel (or module) .data section. (For |
37 | * static locks we use the lock address itself as the key.) | 48 | * static locks we use the lock address itself as the key.) |
@@ -138,7 +149,7 @@ void clear_lock_stats(struct lock_class *class); | |||
138 | */ | 149 | */ |
139 | struct lockdep_map { | 150 | struct lockdep_map { |
140 | struct lock_class_key *key; | 151 | struct lock_class_key *key; |
141 | struct lock_class *class_cache; | 152 | struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; |
142 | const char *name; | 153 | const char *name; |
143 | #ifdef CONFIG_LOCK_STAT | 154 | #ifdef CONFIG_LOCK_STAT |
144 | int cpu; | 155 | int cpu; |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ee7e258627f9..cb57d657ce4d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -299,7 +299,7 @@ struct mm_struct { | |||
299 | * new_owner->mm == mm | 299 | * new_owner->mm == mm |
300 | * new_owner->alloc_lock is held | 300 | * new_owner->alloc_lock is held |
301 | */ | 301 | */ |
302 | struct task_struct *owner; | 302 | struct task_struct __rcu *owner; |
303 | #endif | 303 | #endif |
304 | 304 | ||
305 | #ifdef CONFIG_PROC_FS | 305 | #ifdef CONFIG_PROC_FS |
diff --git a/include/linux/module.h b/include/linux/module.h index aace066bad8f..b29e7458b966 100644 --- a/include/linux/module.h +++ b/include/linux/module.h | |||
@@ -350,7 +350,10 @@ struct module | |||
350 | struct tracepoint *tracepoints; | 350 | struct tracepoint *tracepoints; |
351 | unsigned int num_tracepoints; | 351 | unsigned int num_tracepoints; |
352 | #endif | 352 | #endif |
353 | 353 | #ifdef HAVE_JUMP_LABEL | |
354 | struct jump_entry *jump_entries; | ||
355 | unsigned int num_jump_entries; | ||
356 | #endif | ||
354 | #ifdef CONFIG_TRACING | 357 | #ifdef CONFIG_TRACING |
355 | const char **trace_bprintk_fmt_start; | 358 | const char **trace_bprintk_fmt_start; |
356 | unsigned int num_trace_bprintk_fmt; | 359 | unsigned int num_trace_bprintk_fmt; |
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 9ed534c991b9..70cd0603911c 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h | |||
@@ -39,8 +39,9 @@ enum ctattr_type { | |||
39 | CTA_TUPLE_MASTER, | 39 | CTA_TUPLE_MASTER, |
40 | CTA_NAT_SEQ_ADJ_ORIG, | 40 | CTA_NAT_SEQ_ADJ_ORIG, |
41 | CTA_NAT_SEQ_ADJ_REPLY, | 41 | CTA_NAT_SEQ_ADJ_REPLY, |
42 | CTA_SECMARK, | 42 | CTA_SECMARK, /* obsolete */ |
43 | CTA_ZONE, | 43 | CTA_ZONE, |
44 | CTA_SECCTX, | ||
44 | __CTA_MAX | 45 | __CTA_MAX |
45 | }; | 46 | }; |
46 | #define CTA_MAX (__CTA_MAX - 1) | 47 | #define CTA_MAX (__CTA_MAX - 1) |
@@ -172,4 +173,11 @@ enum ctattr_help { | |||
172 | }; | 173 | }; |
173 | #define CTA_HELP_MAX (__CTA_HELP_MAX - 1) | 174 | #define CTA_HELP_MAX (__CTA_HELP_MAX - 1) |
174 | 175 | ||
176 | enum ctattr_secctx { | ||
177 | CTA_SECCTX_UNSPEC, | ||
178 | CTA_SECCTX_NAME, | ||
179 | __CTA_SECCTX_MAX | ||
180 | }; | ||
181 | #define CTA_SECCTX_MAX (__CTA_SECCTX_MAX - 1) | ||
182 | |||
175 | #endif /* _IPCONNTRACK_NETLINK_H */ | 183 | #endif /* _IPCONNTRACK_NETLINK_H */ |
diff --git a/include/linux/netfilter/xt_SECMARK.h b/include/linux/netfilter/xt_SECMARK.h index 6fcd3448b186..989092bd6274 100644 --- a/include/linux/netfilter/xt_SECMARK.h +++ b/include/linux/netfilter/xt_SECMARK.h | |||
@@ -11,18 +11,12 @@ | |||
11 | * packets are being marked for. | 11 | * packets are being marked for. |
12 | */ | 12 | */ |
13 | #define SECMARK_MODE_SEL 0x01 /* SELinux */ | 13 | #define SECMARK_MODE_SEL 0x01 /* SELinux */ |
14 | #define SECMARK_SELCTX_MAX 256 | 14 | #define SECMARK_SECCTX_MAX 256 |
15 | |||
16 | struct xt_secmark_target_selinux_info { | ||
17 | __u32 selsid; | ||
18 | char selctx[SECMARK_SELCTX_MAX]; | ||
19 | }; | ||
20 | 15 | ||
21 | struct xt_secmark_target_info { | 16 | struct xt_secmark_target_info { |
22 | __u8 mode; | 17 | __u8 mode; |
23 | union { | 18 | __u32 secid; |
24 | struct xt_secmark_target_selinux_info sel; | 19 | char secctx[SECMARK_SECCTX_MAX]; |
25 | } u; | ||
26 | }; | 20 | }; |
27 | 21 | ||
28 | #endif /*_XT_SECMARK_H_target */ | 22 | #endif /*_XT_SECMARK_H_target */ |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 508f8cf6da37..d0edf7d823ae 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -185,7 +185,7 @@ struct nfs_inode { | |||
185 | struct nfs4_cached_acl *nfs4_acl; | 185 | struct nfs4_cached_acl *nfs4_acl; |
186 | /* NFSv4 state */ | 186 | /* NFSv4 state */ |
187 | struct list_head open_states; | 187 | struct list_head open_states; |
188 | struct nfs_delegation *delegation; | 188 | struct nfs_delegation __rcu *delegation; |
189 | fmode_t delegation_state; | 189 | fmode_t delegation_state; |
190 | struct rw_semaphore rwsem; | 190 | struct rw_semaphore rwsem; |
191 | #endif /* CONFIG_NFS_V4*/ | 191 | #endif /* CONFIG_NFS_V4*/ |
diff --git a/include/linux/notifier.h b/include/linux/notifier.h index b2f1a4d83550..2026f9e1ceb8 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h | |||
@@ -49,28 +49,28 @@ | |||
49 | 49 | ||
50 | struct notifier_block { | 50 | struct notifier_block { |
51 | int (*notifier_call)(struct notifier_block *, unsigned long, void *); | 51 | int (*notifier_call)(struct notifier_block *, unsigned long, void *); |
52 | struct notifier_block *next; | 52 | struct notifier_block __rcu *next; |
53 | int priority; | 53 | int priority; |
54 | }; | 54 | }; |
55 | 55 | ||
56 | struct atomic_notifier_head { | 56 | struct atomic_notifier_head { |
57 | spinlock_t lock; | 57 | spinlock_t lock; |
58 | struct notifier_block *head; | 58 | struct notifier_block __rcu *head; |
59 | }; | 59 | }; |
60 | 60 | ||
61 | struct blocking_notifier_head { | 61 | struct blocking_notifier_head { |
62 | struct rw_semaphore rwsem; | 62 | struct rw_semaphore rwsem; |
63 | struct notifier_block *head; | 63 | struct notifier_block __rcu *head; |
64 | }; | 64 | }; |
65 | 65 | ||
66 | struct raw_notifier_head { | 66 | struct raw_notifier_head { |
67 | struct notifier_block *head; | 67 | struct notifier_block __rcu *head; |
68 | }; | 68 | }; |
69 | 69 | ||
70 | struct srcu_notifier_head { | 70 | struct srcu_notifier_head { |
71 | struct mutex mutex; | 71 | struct mutex mutex; |
72 | struct srcu_struct srcu; | 72 | struct srcu_struct srcu; |
73 | struct notifier_block *head; | 73 | struct notifier_block __rcu *head; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ | 76 | #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ |
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 5171639ecf0f..32fb81212fd1 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include <linux/types.h> | 16 | #include <linux/types.h> |
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <linux/init.h> | ||
18 | #include <asm/atomic.h> | 19 | #include <asm/atomic.h> |
19 | 20 | ||
20 | /* Each escaped entry is prefixed by ESCAPE_CODE | 21 | /* Each escaped entry is prefixed by ESCAPE_CODE |
@@ -185,4 +186,10 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val); | |||
185 | int oprofile_add_data64(struct op_entry *entry, u64 val); | 186 | int oprofile_add_data64(struct op_entry *entry, u64 val); |
186 | int oprofile_write_commit(struct op_entry *entry); | 187 | int oprofile_write_commit(struct op_entry *entry); |
187 | 188 | ||
189 | #ifdef CONFIG_PERF_EVENTS | ||
190 | int __init oprofile_perf_init(struct oprofile_operations *ops); | ||
191 | void oprofile_perf_exit(void); | ||
192 | char *op_name_from_perf_id(void); | ||
193 | #endif /* CONFIG_PERF_EVENTS */ | ||
194 | |||
188 | #endif /* OPROFILE_H */ | 195 | #endif /* OPROFILE_H */ |
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 49466b13c5c6..0eb50832aa00 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
@@ -39,6 +39,15 @@ | |||
39 | preempt_enable(); \ | 39 | preempt_enable(); \ |
40 | } while (0) | 40 | } while (0) |
41 | 41 | ||
42 | #define get_cpu_ptr(var) ({ \ | ||
43 | preempt_disable(); \ | ||
44 | this_cpu_ptr(var); }) | ||
45 | |||
46 | #define put_cpu_ptr(var) do { \ | ||
47 | (void)(var); \ | ||
48 | preempt_enable(); \ | ||
49 | } while (0) | ||
50 | |||
42 | #ifdef CONFIG_SMP | 51 | #ifdef CONFIG_SMP |
43 | 52 | ||
44 | /* minimum unit size, also is the maximum supported allocation size */ | 53 | /* minimum unit size, also is the maximum supported allocation size */ |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 716f99b682c1..057bf22a8323 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -486,6 +486,8 @@ struct perf_guest_info_callbacks { | |||
486 | #include <linux/workqueue.h> | 486 | #include <linux/workqueue.h> |
487 | #include <linux/ftrace.h> | 487 | #include <linux/ftrace.h> |
488 | #include <linux/cpu.h> | 488 | #include <linux/cpu.h> |
489 | #include <linux/irq_work.h> | ||
490 | #include <linux/jump_label_ref.h> | ||
489 | #include <asm/atomic.h> | 491 | #include <asm/atomic.h> |
490 | #include <asm/local.h> | 492 | #include <asm/local.h> |
491 | 493 | ||
@@ -529,16 +531,22 @@ struct hw_perf_event { | |||
529 | int last_cpu; | 531 | int last_cpu; |
530 | }; | 532 | }; |
531 | struct { /* software */ | 533 | struct { /* software */ |
532 | s64 remaining; | ||
533 | struct hrtimer hrtimer; | 534 | struct hrtimer hrtimer; |
534 | }; | 535 | }; |
535 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 536 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
536 | struct { /* breakpoint */ | 537 | struct { /* breakpoint */ |
537 | struct arch_hw_breakpoint info; | 538 | struct arch_hw_breakpoint info; |
538 | struct list_head bp_list; | 539 | struct list_head bp_list; |
540 | /* | ||
541 | * Crufty hack to avoid the chicken and egg | ||
542 | * problem hw_breakpoint has with context | ||
543 | * creation and event initalization. | ||
544 | */ | ||
545 | struct task_struct *bp_target; | ||
539 | }; | 546 | }; |
540 | #endif | 547 | #endif |
541 | }; | 548 | }; |
549 | int state; | ||
542 | local64_t prev_count; | 550 | local64_t prev_count; |
543 | u64 sample_period; | 551 | u64 sample_period; |
544 | u64 last_period; | 552 | u64 last_period; |
@@ -550,6 +558,13 @@ struct hw_perf_event { | |||
550 | #endif | 558 | #endif |
551 | }; | 559 | }; |
552 | 560 | ||
561 | /* | ||
562 | * hw_perf_event::state flags | ||
563 | */ | ||
564 | #define PERF_HES_STOPPED 0x01 /* the counter is stopped */ | ||
565 | #define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ | ||
566 | #define PERF_HES_ARCH 0x04 | ||
567 | |||
553 | struct perf_event; | 568 | struct perf_event; |
554 | 569 | ||
555 | /* | 570 | /* |
@@ -561,36 +576,70 @@ struct perf_event; | |||
561 | * struct pmu - generic performance monitoring unit | 576 | * struct pmu - generic performance monitoring unit |
562 | */ | 577 | */ |
563 | struct pmu { | 578 | struct pmu { |
564 | int (*enable) (struct perf_event *event); | 579 | struct list_head entry; |
565 | void (*disable) (struct perf_event *event); | 580 | |
566 | int (*start) (struct perf_event *event); | 581 | int * __percpu pmu_disable_count; |
567 | void (*stop) (struct perf_event *event); | 582 | struct perf_cpu_context * __percpu pmu_cpu_context; |
568 | void (*read) (struct perf_event *event); | 583 | int task_ctx_nr; |
569 | void (*unthrottle) (struct perf_event *event); | 584 | |
585 | /* | ||
586 | * Fully disable/enable this PMU, can be used to protect from the PMI | ||
587 | * as well as for lazy/batch writing of the MSRs. | ||
588 | */ | ||
589 | void (*pmu_enable) (struct pmu *pmu); /* optional */ | ||
590 | void (*pmu_disable) (struct pmu *pmu); /* optional */ | ||
570 | 591 | ||
571 | /* | 592 | /* |
572 | * Group events scheduling is treated as a transaction, add group | 593 | * Try and initialize the event for this PMU. |
573 | * events as a whole and perform one schedulability test. If the test | 594 | * Should return -ENOENT when the @event doesn't match this PMU. |
574 | * fails, roll back the whole group | ||
575 | */ | 595 | */ |
596 | int (*event_init) (struct perf_event *event); | ||
597 | |||
598 | #define PERF_EF_START 0x01 /* start the counter when adding */ | ||
599 | #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ | ||
600 | #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ | ||
576 | 601 | ||
577 | /* | 602 | /* |
578 | * Start the transaction, after this ->enable() doesn't need | 603 | * Adds/Removes a counter to/from the PMU, can be done inside |
579 | * to do schedulability tests. | 604 | * a transaction, see the ->*_txn() methods. |
580 | */ | 605 | */ |
581 | void (*start_txn) (const struct pmu *pmu); | 606 | int (*add) (struct perf_event *event, int flags); |
607 | void (*del) (struct perf_event *event, int flags); | ||
608 | |||
582 | /* | 609 | /* |
583 | * If ->start_txn() disabled the ->enable() schedulability test | 610 | * Starts/Stops a counter present on the PMU. The PMI handler |
611 | * should stop the counter when perf_event_overflow() returns | ||
612 | * !0. ->start() will be used to continue. | ||
613 | */ | ||
614 | void (*start) (struct perf_event *event, int flags); | ||
615 | void (*stop) (struct perf_event *event, int flags); | ||
616 | |||
617 | /* | ||
618 | * Updates the counter value of the event. | ||
619 | */ | ||
620 | void (*read) (struct perf_event *event); | ||
621 | |||
622 | /* | ||
623 | * Group events scheduling is treated as a transaction, add | ||
624 | * group events as a whole and perform one schedulability test. | ||
625 | * If the test fails, roll back the whole group | ||
626 | * | ||
627 | * Start the transaction, after this ->add() doesn't need to | ||
628 | * do schedulability tests. | ||
629 | */ | ||
630 | void (*start_txn) (struct pmu *pmu); /* optional */ | ||
631 | /* | ||
632 | * If ->start_txn() disabled the ->add() schedulability test | ||
584 | * then ->commit_txn() is required to perform one. On success | 633 | * then ->commit_txn() is required to perform one. On success |
585 | * the transaction is closed. On error the transaction is kept | 634 | * the transaction is closed. On error the transaction is kept |
586 | * open until ->cancel_txn() is called. | 635 | * open until ->cancel_txn() is called. |
587 | */ | 636 | */ |
588 | int (*commit_txn) (const struct pmu *pmu); | 637 | int (*commit_txn) (struct pmu *pmu); /* optional */ |
589 | /* | 638 | /* |
590 | * Will cancel the transaction, assumes ->disable() is called for | 639 | * Will cancel the transaction, assumes ->del() is called |
591 | * each successfull ->enable() during the transaction. | 640 | * for each successfull ->add() during the transaction. |
592 | */ | 641 | */ |
593 | void (*cancel_txn) (const struct pmu *pmu); | 642 | void (*cancel_txn) (struct pmu *pmu); /* optional */ |
594 | }; | 643 | }; |
595 | 644 | ||
596 | /** | 645 | /** |
@@ -631,11 +680,6 @@ struct perf_buffer { | |||
631 | void *data_pages[0]; | 680 | void *data_pages[0]; |
632 | }; | 681 | }; |
633 | 682 | ||
634 | struct perf_pending_entry { | ||
635 | struct perf_pending_entry *next; | ||
636 | void (*func)(struct perf_pending_entry *); | ||
637 | }; | ||
638 | |||
639 | struct perf_sample_data; | 683 | struct perf_sample_data; |
640 | 684 | ||
641 | typedef void (*perf_overflow_handler_t)(struct perf_event *, int, | 685 | typedef void (*perf_overflow_handler_t)(struct perf_event *, int, |
@@ -656,6 +700,7 @@ struct swevent_hlist { | |||
656 | 700 | ||
657 | #define PERF_ATTACH_CONTEXT 0x01 | 701 | #define PERF_ATTACH_CONTEXT 0x01 |
658 | #define PERF_ATTACH_GROUP 0x02 | 702 | #define PERF_ATTACH_GROUP 0x02 |
703 | #define PERF_ATTACH_TASK 0x04 | ||
659 | 704 | ||
660 | /** | 705 | /** |
661 | * struct perf_event - performance event kernel representation: | 706 | * struct perf_event - performance event kernel representation: |
@@ -669,7 +714,7 @@ struct perf_event { | |||
669 | int nr_siblings; | 714 | int nr_siblings; |
670 | int group_flags; | 715 | int group_flags; |
671 | struct perf_event *group_leader; | 716 | struct perf_event *group_leader; |
672 | const struct pmu *pmu; | 717 | struct pmu *pmu; |
673 | 718 | ||
674 | enum perf_event_active_state state; | 719 | enum perf_event_active_state state; |
675 | unsigned int attach_state; | 720 | unsigned int attach_state; |
@@ -743,7 +788,7 @@ struct perf_event { | |||
743 | int pending_wakeup; | 788 | int pending_wakeup; |
744 | int pending_kill; | 789 | int pending_kill; |
745 | int pending_disable; | 790 | int pending_disable; |
746 | struct perf_pending_entry pending; | 791 | struct irq_work pending; |
747 | 792 | ||
748 | atomic_t event_limit; | 793 | atomic_t event_limit; |
749 | 794 | ||
@@ -763,12 +808,19 @@ struct perf_event { | |||
763 | #endif /* CONFIG_PERF_EVENTS */ | 808 | #endif /* CONFIG_PERF_EVENTS */ |
764 | }; | 809 | }; |
765 | 810 | ||
811 | enum perf_event_context_type { | ||
812 | task_context, | ||
813 | cpu_context, | ||
814 | }; | ||
815 | |||
766 | /** | 816 | /** |
767 | * struct perf_event_context - event context structure | 817 | * struct perf_event_context - event context structure |
768 | * | 818 | * |
769 | * Used as a container for task events and CPU events as well: | 819 | * Used as a container for task events and CPU events as well: |
770 | */ | 820 | */ |
771 | struct perf_event_context { | 821 | struct perf_event_context { |
822 | enum perf_event_context_type type; | ||
823 | struct pmu *pmu; | ||
772 | /* | 824 | /* |
773 | * Protect the states of the events in the list, | 825 | * Protect the states of the events in the list, |
774 | * nr_active, and the list: | 826 | * nr_active, and the list: |
@@ -808,6 +860,12 @@ struct perf_event_context { | |||
808 | struct rcu_head rcu_head; | 860 | struct rcu_head rcu_head; |
809 | }; | 861 | }; |
810 | 862 | ||
863 | /* | ||
864 | * Number of contexts where an event can trigger: | ||
865 | * task, softirq, hardirq, nmi. | ||
866 | */ | ||
867 | #define PERF_NR_CONTEXTS 4 | ||
868 | |||
811 | /** | 869 | /** |
812 | * struct perf_event_cpu_context - per cpu event context structure | 870 | * struct perf_event_cpu_context - per cpu event context structure |
813 | */ | 871 | */ |
@@ -815,18 +873,9 @@ struct perf_cpu_context { | |||
815 | struct perf_event_context ctx; | 873 | struct perf_event_context ctx; |
816 | struct perf_event_context *task_ctx; | 874 | struct perf_event_context *task_ctx; |
817 | int active_oncpu; | 875 | int active_oncpu; |
818 | int max_pertask; | ||
819 | int exclusive; | 876 | int exclusive; |
820 | struct swevent_hlist *swevent_hlist; | 877 | struct list_head rotation_list; |
821 | struct mutex hlist_mutex; | 878 | int jiffies_interval; |
822 | int hlist_refcount; | ||
823 | |||
824 | /* | ||
825 | * Recursion avoidance: | ||
826 | * | ||
827 | * task, softirq, irq, nmi context | ||
828 | */ | ||
829 | int recursion[4]; | ||
830 | }; | 879 | }; |
831 | 880 | ||
832 | struct perf_output_handle { | 881 | struct perf_output_handle { |
@@ -842,26 +891,34 @@ struct perf_output_handle { | |||
842 | 891 | ||
843 | #ifdef CONFIG_PERF_EVENTS | 892 | #ifdef CONFIG_PERF_EVENTS |
844 | 893 | ||
845 | /* | 894 | extern int perf_pmu_register(struct pmu *pmu); |
846 | * Set by architecture code: | 895 | extern void perf_pmu_unregister(struct pmu *pmu); |
847 | */ | 896 | |
848 | extern int perf_max_events; | 897 | extern int perf_num_counters(void); |
898 | extern const char *perf_pmu_name(void); | ||
899 | extern void __perf_event_task_sched_in(struct task_struct *task); | ||
900 | extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); | ||
849 | 901 | ||
850 | extern const struct pmu *hw_perf_event_init(struct perf_event *event); | 902 | extern atomic_t perf_task_events; |
903 | |||
904 | static inline void perf_event_task_sched_in(struct task_struct *task) | ||
905 | { | ||
906 | COND_STMT(&perf_task_events, __perf_event_task_sched_in(task)); | ||
907 | } | ||
908 | |||
909 | static inline | ||
910 | void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) | ||
911 | { | ||
912 | COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next)); | ||
913 | } | ||
851 | 914 | ||
852 | extern void perf_event_task_sched_in(struct task_struct *task); | ||
853 | extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); | ||
854 | extern void perf_event_task_tick(struct task_struct *task); | ||
855 | extern int perf_event_init_task(struct task_struct *child); | 915 | extern int perf_event_init_task(struct task_struct *child); |
856 | extern void perf_event_exit_task(struct task_struct *child); | 916 | extern void perf_event_exit_task(struct task_struct *child); |
857 | extern void perf_event_free_task(struct task_struct *task); | 917 | extern void perf_event_free_task(struct task_struct *task); |
858 | extern void set_perf_event_pending(void); | 918 | extern void perf_event_delayed_put(struct task_struct *task); |
859 | extern void perf_event_do_pending(void); | ||
860 | extern void perf_event_print_debug(void); | 919 | extern void perf_event_print_debug(void); |
861 | extern void __perf_disable(void); | 920 | extern void perf_pmu_disable(struct pmu *pmu); |
862 | extern bool __perf_enable(void); | 921 | extern void perf_pmu_enable(struct pmu *pmu); |
863 | extern void perf_disable(void); | ||
864 | extern void perf_enable(void); | ||
865 | extern int perf_event_task_disable(void); | 922 | extern int perf_event_task_disable(void); |
866 | extern int perf_event_task_enable(void); | 923 | extern int perf_event_task_enable(void); |
867 | extern void perf_event_update_userpage(struct perf_event *event); | 924 | extern void perf_event_update_userpage(struct perf_event *event); |
@@ -869,7 +926,7 @@ extern int perf_event_release_kernel(struct perf_event *event); | |||
869 | extern struct perf_event * | 926 | extern struct perf_event * |
870 | perf_event_create_kernel_counter(struct perf_event_attr *attr, | 927 | perf_event_create_kernel_counter(struct perf_event_attr *attr, |
871 | int cpu, | 928 | int cpu, |
872 | pid_t pid, | 929 | struct task_struct *task, |
873 | perf_overflow_handler_t callback); | 930 | perf_overflow_handler_t callback); |
874 | extern u64 perf_event_read_value(struct perf_event *event, | 931 | extern u64 perf_event_read_value(struct perf_event *event, |
875 | u64 *enabled, u64 *running); | 932 | u64 *enabled, u64 *running); |
@@ -920,14 +977,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, | |||
920 | */ | 977 | */ |
921 | static inline int is_software_event(struct perf_event *event) | 978 | static inline int is_software_event(struct perf_event *event) |
922 | { | 979 | { |
923 | switch (event->attr.type) { | 980 | return event->pmu->task_ctx_nr == perf_sw_context; |
924 | case PERF_TYPE_SOFTWARE: | ||
925 | case PERF_TYPE_TRACEPOINT: | ||
926 | /* for now the breakpoint stuff also works as software event */ | ||
927 | case PERF_TYPE_BREAKPOINT: | ||
928 | return 1; | ||
929 | } | ||
930 | return 0; | ||
931 | } | 981 | } |
932 | 982 | ||
933 | extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 983 | extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
@@ -954,18 +1004,20 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs) | |||
954 | perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); | 1004 | perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); |
955 | } | 1005 | } |
956 | 1006 | ||
957 | static inline void | 1007 | static __always_inline void |
958 | perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) | 1008 | perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) |
959 | { | 1009 | { |
960 | if (atomic_read(&perf_swevent_enabled[event_id])) { | 1010 | struct pt_regs hot_regs; |
961 | struct pt_regs hot_regs; | 1011 | |
962 | 1012 | JUMP_LABEL(&perf_swevent_enabled[event_id], have_event); | |
963 | if (!regs) { | 1013 | return; |
964 | perf_fetch_caller_regs(&hot_regs); | 1014 | |
965 | regs = &hot_regs; | 1015 | have_event: |
966 | } | 1016 | if (!regs) { |
967 | __perf_sw_event(event_id, nr, nmi, regs, addr); | 1017 | perf_fetch_caller_regs(&hot_regs); |
1018 | regs = &hot_regs; | ||
968 | } | 1019 | } |
1020 | __perf_sw_event(event_id, nr, nmi, regs, addr); | ||
969 | } | 1021 | } |
970 | 1022 | ||
971 | extern void perf_event_mmap(struct vm_area_struct *vma); | 1023 | extern void perf_event_mmap(struct vm_area_struct *vma); |
@@ -976,7 +1028,21 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks | |||
976 | extern void perf_event_comm(struct task_struct *tsk); | 1028 | extern void perf_event_comm(struct task_struct *tsk); |
977 | extern void perf_event_fork(struct task_struct *tsk); | 1029 | extern void perf_event_fork(struct task_struct *tsk); |
978 | 1030 | ||
979 | extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); | 1031 | /* Callchains */ |
1032 | DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); | ||
1033 | |||
1034 | extern void perf_callchain_user(struct perf_callchain_entry *entry, | ||
1035 | struct pt_regs *regs); | ||
1036 | extern void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
1037 | struct pt_regs *regs); | ||
1038 | |||
1039 | |||
1040 | static inline void | ||
1041 | perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) | ||
1042 | { | ||
1043 | if (entry->nr < PERF_MAX_STACK_DEPTH) | ||
1044 | entry->ip[entry->nr++] = ip; | ||
1045 | } | ||
980 | 1046 | ||
981 | extern int sysctl_perf_event_paranoid; | 1047 | extern int sysctl_perf_event_paranoid; |
982 | extern int sysctl_perf_event_mlock; | 1048 | extern int sysctl_perf_event_mlock; |
@@ -1019,21 +1085,18 @@ extern int perf_swevent_get_recursion_context(void); | |||
1019 | extern void perf_swevent_put_recursion_context(int rctx); | 1085 | extern void perf_swevent_put_recursion_context(int rctx); |
1020 | extern void perf_event_enable(struct perf_event *event); | 1086 | extern void perf_event_enable(struct perf_event *event); |
1021 | extern void perf_event_disable(struct perf_event *event); | 1087 | extern void perf_event_disable(struct perf_event *event); |
1088 | extern void perf_event_task_tick(void); | ||
1022 | #else | 1089 | #else |
1023 | static inline void | 1090 | static inline void |
1024 | perf_event_task_sched_in(struct task_struct *task) { } | 1091 | perf_event_task_sched_in(struct task_struct *task) { } |
1025 | static inline void | 1092 | static inline void |
1026 | perf_event_task_sched_out(struct task_struct *task, | 1093 | perf_event_task_sched_out(struct task_struct *task, |
1027 | struct task_struct *next) { } | 1094 | struct task_struct *next) { } |
1028 | static inline void | ||
1029 | perf_event_task_tick(struct task_struct *task) { } | ||
1030 | static inline int perf_event_init_task(struct task_struct *child) { return 0; } | 1095 | static inline int perf_event_init_task(struct task_struct *child) { return 0; } |
1031 | static inline void perf_event_exit_task(struct task_struct *child) { } | 1096 | static inline void perf_event_exit_task(struct task_struct *child) { } |
1032 | static inline void perf_event_free_task(struct task_struct *task) { } | 1097 | static inline void perf_event_free_task(struct task_struct *task) { } |
1033 | static inline void perf_event_do_pending(void) { } | 1098 | static inline void perf_event_delayed_put(struct task_struct *task) { } |
1034 | static inline void perf_event_print_debug(void) { } | 1099 | static inline void perf_event_print_debug(void) { } |
1035 | static inline void perf_disable(void) { } | ||
1036 | static inline void perf_enable(void) { } | ||
1037 | static inline int perf_event_task_disable(void) { return -EINVAL; } | 1100 | static inline int perf_event_task_disable(void) { return -EINVAL; } |
1038 | static inline int perf_event_task_enable(void) { return -EINVAL; } | 1101 | static inline int perf_event_task_enable(void) { return -EINVAL; } |
1039 | 1102 | ||
@@ -1056,6 +1119,7 @@ static inline int perf_swevent_get_recursion_context(void) { return -1; } | |||
1056 | static inline void perf_swevent_put_recursion_context(int rctx) { } | 1119 | static inline void perf_swevent_put_recursion_context(int rctx) { } |
1057 | static inline void perf_event_enable(struct perf_event *event) { } | 1120 | static inline void perf_event_enable(struct perf_event *event) { } |
1058 | static inline void perf_event_disable(struct perf_event *event) { } | 1121 | static inline void perf_event_disable(struct perf_event *event) { } |
1122 | static inline void perf_event_task_tick(void) { } | ||
1059 | #endif | 1123 | #endif |
1060 | 1124 | ||
1061 | #define perf_output_put(handle, x) \ | 1125 | #define perf_output_put(handle, x) \ |
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 634b8e674ac5..a39cbed9ee17 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h | |||
@@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr) | |||
47 | { | 47 | { |
48 | return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); | 48 | return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); |
49 | } | 49 | } |
50 | #define radix_tree_indirect_to_ptr(ptr) \ | ||
51 | radix_tree_indirect_to_ptr((void __force *)(ptr)) | ||
50 | 52 | ||
51 | static inline int radix_tree_is_indirect_ptr(void *ptr) | 53 | static inline int radix_tree_is_indirect_ptr(void *ptr) |
52 | { | 54 | { |
@@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) | |||
61 | struct radix_tree_root { | 63 | struct radix_tree_root { |
62 | unsigned int height; | 64 | unsigned int height; |
63 | gfp_t gfp_mask; | 65 | gfp_t gfp_mask; |
64 | struct radix_tree_node *rnode; | 66 | struct radix_tree_node __rcu *rnode; |
65 | }; | 67 | }; |
66 | 68 | ||
67 | #define RADIX_TREE_INIT(mask) { \ | 69 | #define RADIX_TREE_INIT(mask) { \ |
diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4ec3b38ce9c5..f31ef61f1c65 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h | |||
@@ -10,6 +10,21 @@ | |||
10 | #include <linux/rcupdate.h> | 10 | #include <linux/rcupdate.h> |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * Why is there no list_empty_rcu()? Because list_empty() serves this | ||
14 | * purpose. The list_empty() function fetches the RCU-protected pointer | ||
15 | * and compares it to the address of the list head, but neither dereferences | ||
16 | * this pointer itself nor provides this pointer to the caller. Therefore, | ||
17 | * it is not necessary to use rcu_dereference(), so that list_empty() can | ||
18 | * be used anywhere you would want to use a list_empty_rcu(). | ||
19 | */ | ||
20 | |||
21 | /* | ||
22 | * return the ->next pointer of a list_head in an rcu safe | ||
23 | * way, we must not access it directly | ||
24 | */ | ||
25 | #define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) | ||
26 | |||
27 | /* | ||
13 | * Insert a new entry between two known consecutive entries. | 28 | * Insert a new entry between two known consecutive entries. |
14 | * | 29 | * |
15 | * This is only for internal list manipulation where we know | 30 | * This is only for internal list manipulation where we know |
@@ -20,7 +35,7 @@ static inline void __list_add_rcu(struct list_head *new, | |||
20 | { | 35 | { |
21 | new->next = next; | 36 | new->next = next; |
22 | new->prev = prev; | 37 | new->prev = prev; |
23 | rcu_assign_pointer(prev->next, new); | 38 | rcu_assign_pointer(list_next_rcu(prev), new); |
24 | next->prev = new; | 39 | next->prev = new; |
25 | } | 40 | } |
26 | 41 | ||
@@ -138,7 +153,7 @@ static inline void list_replace_rcu(struct list_head *old, | |||
138 | { | 153 | { |
139 | new->next = old->next; | 154 | new->next = old->next; |
140 | new->prev = old->prev; | 155 | new->prev = old->prev; |
141 | rcu_assign_pointer(new->prev->next, new); | 156 | rcu_assign_pointer(list_next_rcu(new->prev), new); |
142 | new->next->prev = new; | 157 | new->next->prev = new; |
143 | old->prev = LIST_POISON2; | 158 | old->prev = LIST_POISON2; |
144 | } | 159 | } |
@@ -193,7 +208,7 @@ static inline void list_splice_init_rcu(struct list_head *list, | |||
193 | */ | 208 | */ |
194 | 209 | ||
195 | last->next = at; | 210 | last->next = at; |
196 | rcu_assign_pointer(head->next, first); | 211 | rcu_assign_pointer(list_next_rcu(head), first); |
197 | first->prev = head; | 212 | first->prev = head; |
198 | at->prev = last; | 213 | at->prev = last; |
199 | } | 214 | } |
@@ -208,7 +223,9 @@ static inline void list_splice_init_rcu(struct list_head *list, | |||
208 | * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). | 223 | * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). |
209 | */ | 224 | */ |
210 | #define list_entry_rcu(ptr, type, member) \ | 225 | #define list_entry_rcu(ptr, type, member) \ |
211 | container_of(rcu_dereference_raw(ptr), type, member) | 226 | ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \ |
227 | container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ | ||
228 | }) | ||
212 | 229 | ||
213 | /** | 230 | /** |
214 | * list_first_entry_rcu - get the first element from a list | 231 | * list_first_entry_rcu - get the first element from a list |
@@ -225,9 +242,9 @@ static inline void list_splice_init_rcu(struct list_head *list, | |||
225 | list_entry_rcu((ptr)->next, type, member) | 242 | list_entry_rcu((ptr)->next, type, member) |
226 | 243 | ||
227 | #define __list_for_each_rcu(pos, head) \ | 244 | #define __list_for_each_rcu(pos, head) \ |
228 | for (pos = rcu_dereference_raw((head)->next); \ | 245 | for (pos = rcu_dereference_raw(list_next_rcu(head)); \ |
229 | pos != (head); \ | 246 | pos != (head); \ |
230 | pos = rcu_dereference_raw(pos->next)) | 247 | pos = rcu_dereference_raw(list_next_rcu((pos))) |
231 | 248 | ||
232 | /** | 249 | /** |
233 | * list_for_each_entry_rcu - iterate over rcu list of given type | 250 | * list_for_each_entry_rcu - iterate over rcu list of given type |
@@ -257,9 +274,9 @@ static inline void list_splice_init_rcu(struct list_head *list, | |||
257 | * as long as the traversal is guarded by rcu_read_lock(). | 274 | * as long as the traversal is guarded by rcu_read_lock(). |
258 | */ | 275 | */ |
259 | #define list_for_each_continue_rcu(pos, head) \ | 276 | #define list_for_each_continue_rcu(pos, head) \ |
260 | for ((pos) = rcu_dereference_raw((pos)->next); \ | 277 | for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \ |
261 | prefetch((pos)->next), (pos) != (head); \ | 278 | prefetch((pos)->next), (pos) != (head); \ |
262 | (pos) = rcu_dereference_raw((pos)->next)) | 279 | (pos) = rcu_dereference_raw(list_next_rcu(pos))) |
263 | 280 | ||
264 | /** | 281 | /** |
265 | * list_for_each_entry_continue_rcu - continue iteration over list of given type | 282 | * list_for_each_entry_continue_rcu - continue iteration over list of given type |
@@ -314,12 +331,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old, | |||
314 | 331 | ||
315 | new->next = next; | 332 | new->next = next; |
316 | new->pprev = old->pprev; | 333 | new->pprev = old->pprev; |
317 | rcu_assign_pointer(*new->pprev, new); | 334 | rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); |
318 | if (next) | 335 | if (next) |
319 | new->next->pprev = &new->next; | 336 | new->next->pprev = &new->next; |
320 | old->pprev = LIST_POISON2; | 337 | old->pprev = LIST_POISON2; |
321 | } | 338 | } |
322 | 339 | ||
340 | /* | ||
341 | * return the first or the next element in an RCU protected hlist | ||
342 | */ | ||
343 | #define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) | ||
344 | #define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) | ||
345 | #define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) | ||
346 | |||
323 | /** | 347 | /** |
324 | * hlist_add_head_rcu | 348 | * hlist_add_head_rcu |
325 | * @n: the element to add to the hash list. | 349 | * @n: the element to add to the hash list. |
@@ -346,7 +370,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, | |||
346 | 370 | ||
347 | n->next = first; | 371 | n->next = first; |
348 | n->pprev = &h->first; | 372 | n->pprev = &h->first; |
349 | rcu_assign_pointer(h->first, n); | 373 | rcu_assign_pointer(hlist_first_rcu(h), n); |
350 | if (first) | 374 | if (first) |
351 | first->pprev = &n->next; | 375 | first->pprev = &n->next; |
352 | } | 376 | } |
@@ -374,7 +398,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, | |||
374 | { | 398 | { |
375 | n->pprev = next->pprev; | 399 | n->pprev = next->pprev; |
376 | n->next = next; | 400 | n->next = next; |
377 | rcu_assign_pointer(*(n->pprev), n); | 401 | rcu_assign_pointer(hlist_pprev_rcu(n), n); |
378 | next->pprev = &n->next; | 402 | next->pprev = &n->next; |
379 | } | 403 | } |
380 | 404 | ||
@@ -401,15 +425,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, | |||
401 | { | 425 | { |
402 | n->next = prev->next; | 426 | n->next = prev->next; |
403 | n->pprev = &prev->next; | 427 | n->pprev = &prev->next; |
404 | rcu_assign_pointer(prev->next, n); | 428 | rcu_assign_pointer(hlist_next_rcu(prev), n); |
405 | if (n->next) | 429 | if (n->next) |
406 | n->next->pprev = &n->next; | 430 | n->next->pprev = &n->next; |
407 | } | 431 | } |
408 | 432 | ||
409 | #define __hlist_for_each_rcu(pos, head) \ | 433 | #define __hlist_for_each_rcu(pos, head) \ |
410 | for (pos = rcu_dereference((head)->first); \ | 434 | for (pos = rcu_dereference(hlist_first_rcu(head)); \ |
411 | pos && ({ prefetch(pos->next); 1; }); \ | 435 | pos && ({ prefetch(pos->next); 1; }); \ |
412 | pos = rcu_dereference(pos->next)) | 436 | pos = rcu_dereference(hlist_next_rcu(pos))) |
413 | 437 | ||
414 | /** | 438 | /** |
415 | * hlist_for_each_entry_rcu - iterate over rcu list of given type | 439 | * hlist_for_each_entry_rcu - iterate over rcu list of given type |
@@ -422,11 +446,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, | |||
422 | * the _rcu list-mutation primitives such as hlist_add_head_rcu() | 446 | * the _rcu list-mutation primitives such as hlist_add_head_rcu() |
423 | * as long as the traversal is guarded by rcu_read_lock(). | 447 | * as long as the traversal is guarded by rcu_read_lock(). |
424 | */ | 448 | */ |
425 | #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ | 449 | #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ |
426 | for (pos = rcu_dereference_raw((head)->first); \ | 450 | for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ |
427 | pos && ({ prefetch(pos->next); 1; }) && \ | 451 | pos && ({ prefetch(pos->next); 1; }) && \ |
428 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ | 452 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ |
429 | pos = rcu_dereference_raw(pos->next)) | 453 | pos = rcu_dereference_raw(hlist_next_rcu(pos))) |
430 | 454 | ||
431 | /** | 455 | /** |
432 | * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type | 456 | * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type |
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index b70ffe53cb9f..2ae13714828b 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h | |||
@@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) | |||
37 | } | 37 | } |
38 | } | 38 | } |
39 | 39 | ||
40 | #define hlist_nulls_first_rcu(head) \ | ||
41 | (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) | ||
42 | |||
43 | #define hlist_nulls_next_rcu(node) \ | ||
44 | (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) | ||
45 | |||
40 | /** | 46 | /** |
41 | * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization | 47 | * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization |
42 | * @n: the element to delete from the hash list. | 48 | * @n: the element to delete from the hash list. |
@@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, | |||
88 | 94 | ||
89 | n->next = first; | 95 | n->next = first; |
90 | n->pprev = &h->first; | 96 | n->pprev = &h->first; |
91 | rcu_assign_pointer(h->first, n); | 97 | rcu_assign_pointer(hlist_nulls_first_rcu(h), n); |
92 | if (!is_a_nulls(first)) | 98 | if (!is_a_nulls(first)) |
93 | first->pprev = &n->next; | 99 | first->pprev = &n->next; |
94 | } | 100 | } |
@@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, | |||
100 | * @member: the name of the hlist_nulls_node within the struct. | 106 | * @member: the name of the hlist_nulls_node within the struct. |
101 | * | 107 | * |
102 | */ | 108 | */ |
103 | #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ | 109 | #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ |
104 | for (pos = rcu_dereference_raw((head)->first); \ | 110 | for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ |
105 | (!is_a_nulls(pos)) && \ | 111 | (!is_a_nulls(pos)) && \ |
106 | ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ | 112 | ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ |
107 | pos = rcu_dereference_raw(pos->next)) | 113 | pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) |
108 | 114 | ||
109 | #endif | 115 | #endif |
110 | #endif | 116 | #endif |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 83af1f8d8b74..03cda7bed985 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
@@ -41,11 +41,15 @@ | |||
41 | #include <linux/lockdep.h> | 41 | #include <linux/lockdep.h> |
42 | #include <linux/completion.h> | 42 | #include <linux/completion.h> |
43 | #include <linux/debugobjects.h> | 43 | #include <linux/debugobjects.h> |
44 | #include <linux/compiler.h> | ||
44 | 45 | ||
45 | #ifdef CONFIG_RCU_TORTURE_TEST | 46 | #ifdef CONFIG_RCU_TORTURE_TEST |
46 | extern int rcutorture_runnable; /* for sysctl */ | 47 | extern int rcutorture_runnable; /* for sysctl */ |
47 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ | 48 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
48 | 49 | ||
50 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) | ||
51 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | ||
52 | |||
49 | /** | 53 | /** |
50 | * struct rcu_head - callback structure for use with RCU | 54 | * struct rcu_head - callback structure for use with RCU |
51 | * @next: next update requests in a list | 55 | * @next: next update requests in a list |
@@ -57,29 +61,94 @@ struct rcu_head { | |||
57 | }; | 61 | }; |
58 | 62 | ||
59 | /* Exported common interfaces */ | 63 | /* Exported common interfaces */ |
60 | extern void rcu_barrier(void); | 64 | extern void call_rcu_sched(struct rcu_head *head, |
65 | void (*func)(struct rcu_head *rcu)); | ||
66 | extern void synchronize_sched(void); | ||
61 | extern void rcu_barrier_bh(void); | 67 | extern void rcu_barrier_bh(void); |
62 | extern void rcu_barrier_sched(void); | 68 | extern void rcu_barrier_sched(void); |
63 | extern void synchronize_sched_expedited(void); | 69 | extern void synchronize_sched_expedited(void); |
64 | extern int sched_expedited_torture_stats(char *page); | 70 | extern int sched_expedited_torture_stats(char *page); |
65 | 71 | ||
72 | static inline void __rcu_read_lock_bh(void) | ||
73 | { | ||
74 | local_bh_disable(); | ||
75 | } | ||
76 | |||
77 | static inline void __rcu_read_unlock_bh(void) | ||
78 | { | ||
79 | local_bh_enable(); | ||
80 | } | ||
81 | |||
82 | #ifdef CONFIG_PREEMPT_RCU | ||
83 | |||
84 | extern void __rcu_read_lock(void); | ||
85 | extern void __rcu_read_unlock(void); | ||
86 | void synchronize_rcu(void); | ||
87 | |||
88 | /* | ||
89 | * Defined as a macro as it is a very low level header included from | ||
90 | * areas that don't even know about current. This gives the rcu_read_lock() | ||
91 | * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other | ||
92 | * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. | ||
93 | */ | ||
94 | #define rcu_preempt_depth() (current->rcu_read_lock_nesting) | ||
95 | |||
96 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | ||
97 | |||
98 | static inline void __rcu_read_lock(void) | ||
99 | { | ||
100 | preempt_disable(); | ||
101 | } | ||
102 | |||
103 | static inline void __rcu_read_unlock(void) | ||
104 | { | ||
105 | preempt_enable(); | ||
106 | } | ||
107 | |||
108 | static inline void synchronize_rcu(void) | ||
109 | { | ||
110 | synchronize_sched(); | ||
111 | } | ||
112 | |||
113 | static inline int rcu_preempt_depth(void) | ||
114 | { | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||
119 | |||
66 | /* Internal to kernel */ | 120 | /* Internal to kernel */ |
67 | extern void rcu_init(void); | 121 | extern void rcu_init(void); |
122 | extern void rcu_sched_qs(int cpu); | ||
123 | extern void rcu_bh_qs(int cpu); | ||
124 | extern void rcu_check_callbacks(int cpu, int user); | ||
125 | struct notifier_block; | ||
126 | |||
127 | #ifdef CONFIG_NO_HZ | ||
128 | |||
129 | extern void rcu_enter_nohz(void); | ||
130 | extern void rcu_exit_nohz(void); | ||
131 | |||
132 | #else /* #ifdef CONFIG_NO_HZ */ | ||
133 | |||
134 | static inline void rcu_enter_nohz(void) | ||
135 | { | ||
136 | } | ||
137 | |||
138 | static inline void rcu_exit_nohz(void) | ||
139 | { | ||
140 | } | ||
141 | |||
142 | #endif /* #else #ifdef CONFIG_NO_HZ */ | ||
68 | 143 | ||
69 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) | 144 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) |
70 | #include <linux/rcutree.h> | 145 | #include <linux/rcutree.h> |
71 | #elif defined(CONFIG_TINY_RCU) | 146 | #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) |
72 | #include <linux/rcutiny.h> | 147 | #include <linux/rcutiny.h> |
73 | #else | 148 | #else |
74 | #error "Unknown RCU implementation specified to kernel configuration" | 149 | #error "Unknown RCU implementation specified to kernel configuration" |
75 | #endif | 150 | #endif |
76 | 151 | ||
77 | #define RCU_HEAD_INIT { .next = NULL, .func = NULL } | ||
78 | #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT | ||
79 | #define INIT_RCU_HEAD(ptr) do { \ | ||
80 | (ptr)->next = NULL; (ptr)->func = NULL; \ | ||
81 | } while (0) | ||
82 | |||
83 | /* | 152 | /* |
84 | * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic | 153 | * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic |
85 | * initialization and destruction of rcu_head on the stack. rcu_head structures | 154 | * initialization and destruction of rcu_head on the stack. rcu_head structures |
@@ -120,14 +189,15 @@ extern struct lockdep_map rcu_sched_lock_map; | |||
120 | extern int debug_lockdep_rcu_enabled(void); | 189 | extern int debug_lockdep_rcu_enabled(void); |
121 | 190 | ||
122 | /** | 191 | /** |
123 | * rcu_read_lock_held - might we be in RCU read-side critical section? | 192 | * rcu_read_lock_held() - might we be in RCU read-side critical section? |
124 | * | 193 | * |
125 | * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU | 194 | * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU |
126 | * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, | 195 | * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, |
127 | * this assumes we are in an RCU read-side critical section unless it can | 196 | * this assumes we are in an RCU read-side critical section unless it can |
128 | * prove otherwise. | 197 | * prove otherwise. This is useful for debug checks in functions that |
198 | * require that they be called within an RCU read-side critical section. | ||
129 | * | 199 | * |
130 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot | 200 | * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot |
131 | * and while lockdep is disabled. | 201 | * and while lockdep is disabled. |
132 | */ | 202 | */ |
133 | static inline int rcu_read_lock_held(void) | 203 | static inline int rcu_read_lock_held(void) |
@@ -144,14 +214,16 @@ static inline int rcu_read_lock_held(void) | |||
144 | extern int rcu_read_lock_bh_held(void); | 214 | extern int rcu_read_lock_bh_held(void); |
145 | 215 | ||
146 | /** | 216 | /** |
147 | * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? | 217 | * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? |
148 | * | 218 | * |
149 | * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an | 219 | * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an |
150 | * RCU-sched read-side critical section. In absence of | 220 | * RCU-sched read-side critical section. In absence of |
151 | * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side | 221 | * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side |
152 | * critical section unless it can prove otherwise. Note that disabling | 222 | * critical section unless it can prove otherwise. Note that disabling |
153 | * of preemption (including disabling irqs) counts as an RCU-sched | 223 | * of preemption (including disabling irqs) counts as an RCU-sched |
154 | * read-side critical section. | 224 | * read-side critical section. This is useful for debug checks in functions |
225 | * that required that they be called within an RCU-sched read-side | ||
226 | * critical section. | ||
155 | * | 227 | * |
156 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot | 228 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot |
157 | * and while lockdep is disabled. | 229 | * and while lockdep is disabled. |
@@ -211,7 +283,11 @@ static inline int rcu_read_lock_sched_held(void) | |||
211 | 283 | ||
212 | extern int rcu_my_thread_group_empty(void); | 284 | extern int rcu_my_thread_group_empty(void); |
213 | 285 | ||
214 | #define __do_rcu_dereference_check(c) \ | 286 | /** |
287 | * rcu_lockdep_assert - emit lockdep splat if specified condition not met | ||
288 | * @c: condition to check | ||
289 | */ | ||
290 | #define rcu_lockdep_assert(c) \ | ||
215 | do { \ | 291 | do { \ |
216 | static bool __warned; \ | 292 | static bool __warned; \ |
217 | if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ | 293 | if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ |
@@ -220,41 +296,163 @@ extern int rcu_my_thread_group_empty(void); | |||
220 | } \ | 296 | } \ |
221 | } while (0) | 297 | } while (0) |
222 | 298 | ||
299 | #else /* #ifdef CONFIG_PROVE_RCU */ | ||
300 | |||
301 | #define rcu_lockdep_assert(c) do { } while (0) | ||
302 | |||
303 | #endif /* #else #ifdef CONFIG_PROVE_RCU */ | ||
304 | |||
305 | /* | ||
306 | * Helper functions for rcu_dereference_check(), rcu_dereference_protected() | ||
307 | * and rcu_assign_pointer(). Some of these could be folded into their | ||
308 | * callers, but they are left separate in order to ease introduction of | ||
309 | * multiple flavors of pointers to match the multiple flavors of RCU | ||
310 | * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in | ||
311 | * the future. | ||
312 | */ | ||
313 | |||
314 | #ifdef __CHECKER__ | ||
315 | #define rcu_dereference_sparse(p, space) \ | ||
316 | ((void)(((typeof(*p) space *)p) == p)) | ||
317 | #else /* #ifdef __CHECKER__ */ | ||
318 | #define rcu_dereference_sparse(p, space) | ||
319 | #endif /* #else #ifdef __CHECKER__ */ | ||
320 | |||
321 | #define __rcu_access_pointer(p, space) \ | ||
322 | ({ \ | ||
323 | typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ | ||
324 | rcu_dereference_sparse(p, space); \ | ||
325 | ((typeof(*p) __force __kernel *)(_________p1)); \ | ||
326 | }) | ||
327 | #define __rcu_dereference_check(p, c, space) \ | ||
328 | ({ \ | ||
329 | typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ | ||
330 | rcu_lockdep_assert(c); \ | ||
331 | rcu_dereference_sparse(p, space); \ | ||
332 | smp_read_barrier_depends(); \ | ||
333 | ((typeof(*p) __force __kernel *)(_________p1)); \ | ||
334 | }) | ||
335 | #define __rcu_dereference_protected(p, c, space) \ | ||
336 | ({ \ | ||
337 | rcu_lockdep_assert(c); \ | ||
338 | rcu_dereference_sparse(p, space); \ | ||
339 | ((typeof(*p) __force __kernel *)(p)); \ | ||
340 | }) | ||
341 | |||
342 | #define __rcu_dereference_index_check(p, c) \ | ||
343 | ({ \ | ||
344 | typeof(p) _________p1 = ACCESS_ONCE(p); \ | ||
345 | rcu_lockdep_assert(c); \ | ||
346 | smp_read_barrier_depends(); \ | ||
347 | (_________p1); \ | ||
348 | }) | ||
349 | #define __rcu_assign_pointer(p, v, space) \ | ||
350 | ({ \ | ||
351 | if (!__builtin_constant_p(v) || \ | ||
352 | ((v) != NULL)) \ | ||
353 | smp_wmb(); \ | ||
354 | (p) = (typeof(*v) __force space *)(v); \ | ||
355 | }) | ||
356 | |||
357 | |||
358 | /** | ||
359 | * rcu_access_pointer() - fetch RCU pointer with no dereferencing | ||
360 | * @p: The pointer to read | ||
361 | * | ||
362 | * Return the value of the specified RCU-protected pointer, but omit the | ||
363 | * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful | ||
364 | * when the value of this pointer is accessed, but the pointer is not | ||
365 | * dereferenced, for example, when testing an RCU-protected pointer against | ||
366 | * NULL. Although rcu_access_pointer() may also be used in cases where | ||
367 | * update-side locks prevent the value of the pointer from changing, you | ||
368 | * should instead use rcu_dereference_protected() for this use case. | ||
369 | */ | ||
370 | #define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) | ||
371 | |||
223 | /** | 372 | /** |
224 | * rcu_dereference_check - rcu_dereference with debug checking | 373 | * rcu_dereference_check() - rcu_dereference with debug checking |
225 | * @p: The pointer to read, prior to dereferencing | 374 | * @p: The pointer to read, prior to dereferencing |
226 | * @c: The conditions under which the dereference will take place | 375 | * @c: The conditions under which the dereference will take place |
227 | * | 376 | * |
228 | * Do an rcu_dereference(), but check that the conditions under which the | 377 | * Do an rcu_dereference(), but check that the conditions under which the |
229 | * dereference will take place are correct. Typically the conditions indicate | 378 | * dereference will take place are correct. Typically the conditions |
230 | * the various locking conditions that should be held at that point. The check | 379 | * indicate the various locking conditions that should be held at that |
231 | * should return true if the conditions are satisfied. | 380 | * point. The check should return true if the conditions are satisfied. |
381 | * An implicit check for being in an RCU read-side critical section | ||
382 | * (rcu_read_lock()) is included. | ||
232 | * | 383 | * |
233 | * For example: | 384 | * For example: |
234 | * | 385 | * |
235 | * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || | 386 | * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); |
236 | * lockdep_is_held(&foo->lock)); | ||
237 | * | 387 | * |
238 | * could be used to indicate to lockdep that foo->bar may only be dereferenced | 388 | * could be used to indicate to lockdep that foo->bar may only be dereferenced |
239 | * if either the RCU read lock is held, or that the lock required to replace | 389 | * if either rcu_read_lock() is held, or that the lock required to replace |
240 | * the bar struct at foo->bar is held. | 390 | * the bar struct at foo->bar is held. |
241 | * | 391 | * |
242 | * Note that the list of conditions may also include indications of when a lock | 392 | * Note that the list of conditions may also include indications of when a lock |
243 | * need not be held, for example during initialisation or destruction of the | 393 | * need not be held, for example during initialisation or destruction of the |
244 | * target struct: | 394 | * target struct: |
245 | * | 395 | * |
246 | * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || | 396 | * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || |
247 | * lockdep_is_held(&foo->lock) || | ||
248 | * atomic_read(&foo->usage) == 0); | 397 | * atomic_read(&foo->usage) == 0); |
398 | * | ||
399 | * Inserts memory barriers on architectures that require them | ||
400 | * (currently only the Alpha), prevents the compiler from refetching | ||
401 | * (and from merging fetches), and, more importantly, documents exactly | ||
402 | * which pointers are protected by RCU and checks that the pointer is | ||
403 | * annotated as __rcu. | ||
249 | */ | 404 | */ |
250 | #define rcu_dereference_check(p, c) \ | 405 | #define rcu_dereference_check(p, c) \ |
251 | ({ \ | 406 | __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) |
252 | __do_rcu_dereference_check(c); \ | 407 | |
253 | rcu_dereference_raw(p); \ | 408 | /** |
254 | }) | 409 | * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking |
410 | * @p: The pointer to read, prior to dereferencing | ||
411 | * @c: The conditions under which the dereference will take place | ||
412 | * | ||
413 | * This is the RCU-bh counterpart to rcu_dereference_check(). | ||
414 | */ | ||
415 | #define rcu_dereference_bh_check(p, c) \ | ||
416 | __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) | ||
255 | 417 | ||
256 | /** | 418 | /** |
257 | * rcu_dereference_protected - fetch RCU pointer when updates prevented | 419 | * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking |
420 | * @p: The pointer to read, prior to dereferencing | ||
421 | * @c: The conditions under which the dereference will take place | ||
422 | * | ||
423 | * This is the RCU-sched counterpart to rcu_dereference_check(). | ||
424 | */ | ||
425 | #define rcu_dereference_sched_check(p, c) \ | ||
426 | __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ | ||
427 | __rcu) | ||
428 | |||
429 | #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ | ||
430 | |||
431 | /** | ||
432 | * rcu_dereference_index_check() - rcu_dereference for indices with debug checking | ||
433 | * @p: The pointer to read, prior to dereferencing | ||
434 | * @c: The conditions under which the dereference will take place | ||
435 | * | ||
436 | * Similar to rcu_dereference_check(), but omits the sparse checking. | ||
437 | * This allows rcu_dereference_index_check() to be used on integers, | ||
438 | * which can then be used as array indices. Attempting to use | ||
439 | * rcu_dereference_check() on an integer will give compiler warnings | ||
440 | * because the sparse address-space mechanism relies on dereferencing | ||
441 | * the RCU-protected pointer. Dereferencing integers is not something | ||
442 | * that even gcc will put up with. | ||
443 | * | ||
444 | * Note that this function does not implicitly check for RCU read-side | ||
445 | * critical sections. If this function gains lots of uses, it might | ||
446 | * make sense to provide versions for each flavor of RCU, but it does | ||
447 | * not make sense as of early 2010. | ||
448 | */ | ||
449 | #define rcu_dereference_index_check(p, c) \ | ||
450 | __rcu_dereference_index_check((p), (c)) | ||
451 | |||
452 | /** | ||
453 | * rcu_dereference_protected() - fetch RCU pointer when updates prevented | ||
454 | * @p: The pointer to read, prior to dereferencing | ||
455 | * @c: The conditions under which the dereference will take place | ||
258 | * | 456 | * |
259 | * Return the value of the specified RCU-protected pointer, but omit | 457 | * Return the value of the specified RCU-protected pointer, but omit |
260 | * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This | 458 | * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This |
@@ -263,35 +461,61 @@ extern int rcu_my_thread_group_empty(void); | |||
263 | * prevent the compiler from repeating this reference or combining it | 461 | * prevent the compiler from repeating this reference or combining it |
264 | * with other references, so it should not be used without protection | 462 | * with other references, so it should not be used without protection |
265 | * of appropriate locks. | 463 | * of appropriate locks. |
464 | * | ||
465 | * This function is only for update-side use. Using this function | ||
466 | * when protected only by rcu_read_lock() will result in infrequent | ||
467 | * but very ugly failures. | ||
266 | */ | 468 | */ |
267 | #define rcu_dereference_protected(p, c) \ | 469 | #define rcu_dereference_protected(p, c) \ |
268 | ({ \ | 470 | __rcu_dereference_protected((p), (c), __rcu) |
269 | __do_rcu_dereference_check(c); \ | ||
270 | (p); \ | ||
271 | }) | ||
272 | 471 | ||
273 | #else /* #ifdef CONFIG_PROVE_RCU */ | 472 | /** |
473 | * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented | ||
474 | * @p: The pointer to read, prior to dereferencing | ||
475 | * @c: The conditions under which the dereference will take place | ||
476 | * | ||
477 | * This is the RCU-bh counterpart to rcu_dereference_protected(). | ||
478 | */ | ||
479 | #define rcu_dereference_bh_protected(p, c) \ | ||
480 | __rcu_dereference_protected((p), (c), __rcu) | ||
274 | 481 | ||
275 | #define rcu_dereference_check(p, c) rcu_dereference_raw(p) | 482 | /** |
276 | #define rcu_dereference_protected(p, c) (p) | 483 | * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented |
484 | * @p: The pointer to read, prior to dereferencing | ||
485 | * @c: The conditions under which the dereference will take place | ||
486 | * | ||
487 | * This is the RCU-sched counterpart to rcu_dereference_protected(). | ||
488 | */ | ||
489 | #define rcu_dereference_sched_protected(p, c) \ | ||
490 | __rcu_dereference_protected((p), (c), __rcu) | ||
277 | 491 | ||
278 | #endif /* #else #ifdef CONFIG_PROVE_RCU */ | ||
279 | 492 | ||
280 | /** | 493 | /** |
281 | * rcu_access_pointer - fetch RCU pointer with no dereferencing | 494 | * rcu_dereference() - fetch RCU-protected pointer for dereferencing |
495 | * @p: The pointer to read, prior to dereferencing | ||
282 | * | 496 | * |
283 | * Return the value of the specified RCU-protected pointer, but omit the | 497 | * This is a simple wrapper around rcu_dereference_check(). |
284 | * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful | 498 | */ |
285 | * when the value of this pointer is accessed, but the pointer is not | 499 | #define rcu_dereference(p) rcu_dereference_check(p, 0) |
286 | * dereferenced, for example, when testing an RCU-protected pointer against | 500 | |
287 | * NULL. This may also be used in cases where update-side locks prevent | 501 | /** |
288 | * the value of the pointer from changing, but rcu_dereference_protected() | 502 | * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing |
289 | * is a lighter-weight primitive for this use case. | 503 | * @p: The pointer to read, prior to dereferencing |
504 | * | ||
505 | * Makes rcu_dereference_check() do the dirty work. | ||
506 | */ | ||
507 | #define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) | ||
508 | |||
509 | /** | ||
510 | * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing | ||
511 | * @p: The pointer to read, prior to dereferencing | ||
512 | * | ||
513 | * Makes rcu_dereference_check() do the dirty work. | ||
290 | */ | 514 | */ |
291 | #define rcu_access_pointer(p) ACCESS_ONCE(p) | 515 | #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) |
292 | 516 | ||
293 | /** | 517 | /** |
294 | * rcu_read_lock - mark the beginning of an RCU read-side critical section. | 518 | * rcu_read_lock() - mark the beginning of an RCU read-side critical section |
295 | * | 519 | * |
296 | * When synchronize_rcu() is invoked on one CPU while other CPUs | 520 | * When synchronize_rcu() is invoked on one CPU while other CPUs |
297 | * are within RCU read-side critical sections, then the | 521 | * are within RCU read-side critical sections, then the |
@@ -302,7 +526,7 @@ extern int rcu_my_thread_group_empty(void); | |||
302 | * until after the all the other CPUs exit their critical sections. | 526 | * until after the all the other CPUs exit their critical sections. |
303 | * | 527 | * |
304 | * Note, however, that RCU callbacks are permitted to run concurrently | 528 | * Note, however, that RCU callbacks are permitted to run concurrently |
305 | * with RCU read-side critical sections. One way that this can happen | 529 | * with new RCU read-side critical sections. One way that this can happen |
306 | * is via the following sequence of events: (1) CPU 0 enters an RCU | 530 | * is via the following sequence of events: (1) CPU 0 enters an RCU |
307 | * read-side critical section, (2) CPU 1 invokes call_rcu() to register | 531 | * read-side critical section, (2) CPU 1 invokes call_rcu() to register |
308 | * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, | 532 | * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, |
@@ -317,7 +541,20 @@ extern int rcu_my_thread_group_empty(void); | |||
317 | * will be deferred until the outermost RCU read-side critical section | 541 | * will be deferred until the outermost RCU read-side critical section |
318 | * completes. | 542 | * completes. |
319 | * | 543 | * |
320 | * It is illegal to block while in an RCU read-side critical section. | 544 | * You can avoid reading and understanding the next paragraph by |
545 | * following this rule: don't put anything in an rcu_read_lock() RCU | ||
546 | * read-side critical section that would block in a !PREEMPT kernel. | ||
547 | * But if you want the full story, read on! | ||
548 | * | ||
549 | * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it | ||
550 | * is illegal to block while in an RCU read-side critical section. In | ||
551 | * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) | ||
552 | * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may | ||
553 | * be preempted, but explicit blocking is illegal. Finally, in preemptible | ||
554 | * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, | ||
555 | * RCU read-side critical sections may be preempted and they may also | ||
556 | * block, but only when acquiring spinlocks that are subject to priority | ||
557 | * inheritance. | ||
321 | */ | 558 | */ |
322 | static inline void rcu_read_lock(void) | 559 | static inline void rcu_read_lock(void) |
323 | { | 560 | { |
@@ -337,7 +574,7 @@ static inline void rcu_read_lock(void) | |||
337 | */ | 574 | */ |
338 | 575 | ||
339 | /** | 576 | /** |
340 | * rcu_read_unlock - marks the end of an RCU read-side critical section. | 577 | * rcu_read_unlock() - marks the end of an RCU read-side critical section. |
341 | * | 578 | * |
342 | * See rcu_read_lock() for more information. | 579 | * See rcu_read_lock() for more information. |
343 | */ | 580 | */ |
@@ -349,15 +586,16 @@ static inline void rcu_read_unlock(void) | |||
349 | } | 586 | } |
350 | 587 | ||
351 | /** | 588 | /** |
352 | * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section | 589 | * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section |
353 | * | 590 | * |
354 | * This is equivalent of rcu_read_lock(), but to be used when updates | 591 | * This is equivalent of rcu_read_lock(), but to be used when updates |
355 | * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks | 592 | * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since |
356 | * consider completion of a softirq handler to be a quiescent state, | 593 | * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a |
357 | * a process in RCU read-side critical section must be protected by | 594 | * softirq handler to be a quiescent state, a process in RCU read-side |
358 | * disabling softirqs. Read-side critical sections in interrupt context | 595 | * critical section must be protected by disabling softirqs. Read-side |
359 | * can use just rcu_read_lock(). | 596 | * critical sections in interrupt context can use just rcu_read_lock(), |
360 | * | 597 | * though this should at least be commented to avoid confusing people |
598 | * reading the code. | ||
361 | */ | 599 | */ |
362 | static inline void rcu_read_lock_bh(void) | 600 | static inline void rcu_read_lock_bh(void) |
363 | { | 601 | { |
@@ -379,13 +617,12 @@ static inline void rcu_read_unlock_bh(void) | |||
379 | } | 617 | } |
380 | 618 | ||
381 | /** | 619 | /** |
382 | * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section | 620 | * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section |
383 | * | 621 | * |
384 | * Should be used with either | 622 | * This is equivalent of rcu_read_lock(), but to be used when updates |
385 | * - synchronize_sched() | 623 | * are being done using call_rcu_sched() or synchronize_rcu_sched(). |
386 | * or | 624 | * Read-side critical sections can also be introduced by anything that |
387 | * - call_rcu_sched() and rcu_barrier_sched() | 625 | * disables preemption, including local_irq_disable() and friends. |
388 | * on the write-side to insure proper synchronization. | ||
389 | */ | 626 | */ |
390 | static inline void rcu_read_lock_sched(void) | 627 | static inline void rcu_read_lock_sched(void) |
391 | { | 628 | { |
@@ -420,54 +657,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) | |||
420 | preempt_enable_notrace(); | 657 | preempt_enable_notrace(); |
421 | } | 658 | } |
422 | 659 | ||
423 | |||
424 | /** | 660 | /** |
425 | * rcu_dereference_raw - fetch an RCU-protected pointer | 661 | * rcu_assign_pointer() - assign to RCU-protected pointer |
662 | * @p: pointer to assign to | ||
663 | * @v: value to assign (publish) | ||
426 | * | 664 | * |
427 | * The caller must be within some flavor of RCU read-side critical | 665 | * Assigns the specified value to the specified RCU-protected |
428 | * section, or must be otherwise preventing the pointer from changing, | 666 | * pointer, ensuring that any concurrent RCU readers will see |
429 | * for example, by holding an appropriate lock. This pointer may later | 667 | * any prior initialization. Returns the value assigned. |
430 | * be safely dereferenced. It is the caller's responsibility to have | ||
431 | * done the right thing, as this primitive does no checking of any kind. | ||
432 | * | ||
433 | * Inserts memory barriers on architectures that require them | ||
434 | * (currently only the Alpha), and, more importantly, documents | ||
435 | * exactly which pointers are protected by RCU. | ||
436 | */ | ||
437 | #define rcu_dereference_raw(p) ({ \ | ||
438 | typeof(p) _________p1 = ACCESS_ONCE(p); \ | ||
439 | smp_read_barrier_depends(); \ | ||
440 | (_________p1); \ | ||
441 | }) | ||
442 | |||
443 | /** | ||
444 | * rcu_dereference - fetch an RCU-protected pointer, checking for RCU | ||
445 | * | ||
446 | * Makes rcu_dereference_check() do the dirty work. | ||
447 | */ | ||
448 | #define rcu_dereference(p) \ | ||
449 | rcu_dereference_check(p, rcu_read_lock_held()) | ||
450 | |||
451 | /** | ||
452 | * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh | ||
453 | * | ||
454 | * Makes rcu_dereference_check() do the dirty work. | ||
455 | */ | ||
456 | #define rcu_dereference_bh(p) \ | ||
457 | rcu_dereference_check(p, rcu_read_lock_bh_held() || irqs_disabled()) | ||
458 | |||
459 | /** | ||
460 | * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched | ||
461 | * | ||
462 | * Makes rcu_dereference_check() do the dirty work. | ||
463 | */ | ||
464 | #define rcu_dereference_sched(p) \ | ||
465 | rcu_dereference_check(p, rcu_read_lock_sched_held()) | ||
466 | |||
467 | /** | ||
468 | * rcu_assign_pointer - assign (publicize) a pointer to a newly | ||
469 | * initialized structure that will be dereferenced by RCU read-side | ||
470 | * critical sections. Returns the value assigned. | ||
471 | * | 668 | * |
472 | * Inserts memory barriers on architectures that require them | 669 | * Inserts memory barriers on architectures that require them |
473 | * (pretty much all of them other than x86), and also prevents | 670 | * (pretty much all of them other than x86), and also prevents |
@@ -476,14 +673,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) | |||
476 | * call documents which pointers will be dereferenced by RCU read-side | 673 | * call documents which pointers will be dereferenced by RCU read-side |
477 | * code. | 674 | * code. |
478 | */ | 675 | */ |
479 | |||
480 | #define rcu_assign_pointer(p, v) \ | 676 | #define rcu_assign_pointer(p, v) \ |
481 | ({ \ | 677 | __rcu_assign_pointer((p), (v), __rcu) |
482 | if (!__builtin_constant_p(v) || \ | 678 | |
483 | ((v) != NULL)) \ | 679 | /** |
484 | smp_wmb(); \ | 680 | * RCU_INIT_POINTER() - initialize an RCU protected pointer |
485 | (p) = (v); \ | 681 | * |
486 | }) | 682 | * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep |
683 | * splats. | ||
684 | */ | ||
685 | #define RCU_INIT_POINTER(p, v) \ | ||
686 | p = (typeof(*v) __force __rcu *)(v) | ||
487 | 687 | ||
488 | /* Infrastructure to implement the synchronize_() primitives. */ | 688 | /* Infrastructure to implement the synchronize_() primitives. */ |
489 | 689 | ||
@@ -494,26 +694,37 @@ struct rcu_synchronize { | |||
494 | 694 | ||
495 | extern void wakeme_after_rcu(struct rcu_head *head); | 695 | extern void wakeme_after_rcu(struct rcu_head *head); |
496 | 696 | ||
697 | #ifdef CONFIG_PREEMPT_RCU | ||
698 | |||
497 | /** | 699 | /** |
498 | * call_rcu - Queue an RCU callback for invocation after a grace period. | 700 | * call_rcu() - Queue an RCU callback for invocation after a grace period. |
499 | * @head: structure to be used for queueing the RCU updates. | 701 | * @head: structure to be used for queueing the RCU updates. |
500 | * @func: actual update function to be invoked after the grace period | 702 | * @func: actual callback function to be invoked after the grace period |
501 | * | 703 | * |
502 | * The update function will be invoked some time after a full grace | 704 | * The callback function will be invoked some time after a full grace |
503 | * period elapses, in other words after all currently executing RCU | 705 | * period elapses, in other words after all pre-existing RCU read-side |
504 | * read-side critical sections have completed. RCU read-side critical | 706 | * critical sections have completed. However, the callback function |
707 | * might well execute concurrently with RCU read-side critical sections | ||
708 | * that started after call_rcu() was invoked. RCU read-side critical | ||
505 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | 709 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), |
506 | * and may be nested. | 710 | * and may be nested. |
507 | */ | 711 | */ |
508 | extern void call_rcu(struct rcu_head *head, | 712 | extern void call_rcu(struct rcu_head *head, |
509 | void (*func)(struct rcu_head *head)); | 713 | void (*func)(struct rcu_head *head)); |
510 | 714 | ||
715 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | ||
716 | |||
717 | /* In classic RCU, call_rcu() is just call_rcu_sched(). */ | ||
718 | #define call_rcu call_rcu_sched | ||
719 | |||
720 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||
721 | |||
511 | /** | 722 | /** |
512 | * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. | 723 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. |
513 | * @head: structure to be used for queueing the RCU updates. | 724 | * @head: structure to be used for queueing the RCU updates. |
514 | * @func: actual update function to be invoked after the grace period | 725 | * @func: actual callback function to be invoked after the grace period |
515 | * | 726 | * |
516 | * The update function will be invoked some time after a full grace | 727 | * The callback function will be invoked some time after a full grace |
517 | * period elapses, in other words after all currently executing RCU | 728 | * period elapses, in other words after all currently executing RCU |
518 | * read-side critical sections have completed. call_rcu_bh() assumes | 729 | * read-side critical sections have completed. call_rcu_bh() assumes |
519 | * that the read-side critical sections end on completion of a softirq | 730 | * that the read-side critical sections end on completion of a softirq |
@@ -566,37 +777,4 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) | |||
566 | } | 777 | } |
567 | #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | 778 | #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ |
568 | 779 | ||
569 | #ifndef CONFIG_PROVE_RCU | ||
570 | #define __do_rcu_dereference_check(c) do { } while (0) | ||
571 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
572 | |||
573 | #define __rcu_dereference_index_check(p, c) \ | ||
574 | ({ \ | ||
575 | typeof(p) _________p1 = ACCESS_ONCE(p); \ | ||
576 | __do_rcu_dereference_check(c); \ | ||
577 | smp_read_barrier_depends(); \ | ||
578 | (_________p1); \ | ||
579 | }) | ||
580 | |||
581 | /** | ||
582 | * rcu_dereference_index_check() - rcu_dereference for indices with debug checking | ||
583 | * @p: The pointer to read, prior to dereferencing | ||
584 | * @c: The conditions under which the dereference will take place | ||
585 | * | ||
586 | * Similar to rcu_dereference_check(), but omits the sparse checking. | ||
587 | * This allows rcu_dereference_index_check() to be used on integers, | ||
588 | * which can then be used as array indices. Attempting to use | ||
589 | * rcu_dereference_check() on an integer will give compiler warnings | ||
590 | * because the sparse address-space mechanism relies on dereferencing | ||
591 | * the RCU-protected pointer. Dereferencing integers is not something | ||
592 | * that even gcc will put up with. | ||
593 | * | ||
594 | * Note that this function does not implicitly check for RCU read-side | ||
595 | * critical sections. If this function gains lots of uses, it might | ||
596 | * make sense to provide versions for each flavor of RCU, but it does | ||
597 | * not make sense as of early 2010. | ||
598 | */ | ||
599 | #define rcu_dereference_index_check(p, c) \ | ||
600 | __rcu_dereference_index_check((p), (c)) | ||
601 | |||
602 | #endif /* __LINUX_RCUPDATE_H */ | 780 | #endif /* __LINUX_RCUPDATE_H */ |
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e2e893144a84..13877cb93a60 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h | |||
@@ -27,103 +27,101 @@ | |||
27 | 27 | ||
28 | #include <linux/cache.h> | 28 | #include <linux/cache.h> |
29 | 29 | ||
30 | void rcu_sched_qs(int cpu); | 30 | #define rcu_init_sched() do { } while (0) |
31 | void rcu_bh_qs(int cpu); | ||
32 | static inline void rcu_note_context_switch(int cpu) | ||
33 | { | ||
34 | rcu_sched_qs(cpu); | ||
35 | } | ||
36 | 31 | ||
37 | #define __rcu_read_lock() preempt_disable() | 32 | #ifdef CONFIG_TINY_RCU |
38 | #define __rcu_read_unlock() preempt_enable() | ||
39 | #define __rcu_read_lock_bh() local_bh_disable() | ||
40 | #define __rcu_read_unlock_bh() local_bh_enable() | ||
41 | #define call_rcu_sched call_rcu | ||
42 | 33 | ||
43 | #define rcu_init_sched() do { } while (0) | 34 | static inline void synchronize_rcu_expedited(void) |
44 | extern void rcu_check_callbacks(int cpu, int user); | 35 | { |
36 | synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ | ||
37 | } | ||
45 | 38 | ||
46 | static inline int rcu_needs_cpu(int cpu) | 39 | static inline void rcu_barrier(void) |
47 | { | 40 | { |
48 | return 0; | 41 | rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */ |
49 | } | 42 | } |
50 | 43 | ||
51 | /* | 44 | #else /* #ifdef CONFIG_TINY_RCU */ |
52 | * Return the number of grace periods. | 45 | |
53 | */ | 46 | void rcu_barrier(void); |
54 | static inline long rcu_batches_completed(void) | 47 | void synchronize_rcu_expedited(void); |
48 | |||
49 | #endif /* #else #ifdef CONFIG_TINY_RCU */ | ||
50 | |||
51 | static inline void synchronize_rcu_bh(void) | ||
55 | { | 52 | { |
56 | return 0; | 53 | synchronize_sched(); |
57 | } | 54 | } |
58 | 55 | ||
59 | /* | 56 | static inline void synchronize_rcu_bh_expedited(void) |
60 | * Return the number of bottom-half grace periods. | ||
61 | */ | ||
62 | static inline long rcu_batches_completed_bh(void) | ||
63 | { | 57 | { |
64 | return 0; | 58 | synchronize_sched(); |
65 | } | 59 | } |
66 | 60 | ||
67 | static inline void rcu_force_quiescent_state(void) | 61 | #ifdef CONFIG_TINY_RCU |
62 | |||
63 | static inline void rcu_preempt_note_context_switch(void) | ||
68 | { | 64 | { |
69 | } | 65 | } |
70 | 66 | ||
71 | static inline void rcu_bh_force_quiescent_state(void) | 67 | static inline void exit_rcu(void) |
72 | { | 68 | { |
73 | } | 69 | } |
74 | 70 | ||
75 | static inline void rcu_sched_force_quiescent_state(void) | 71 | static inline int rcu_needs_cpu(int cpu) |
76 | { | 72 | { |
73 | return 0; | ||
77 | } | 74 | } |
78 | 75 | ||
79 | extern void synchronize_sched(void); | 76 | #else /* #ifdef CONFIG_TINY_RCU */ |
77 | |||
78 | void rcu_preempt_note_context_switch(void); | ||
79 | extern void exit_rcu(void); | ||
80 | int rcu_preempt_needs_cpu(void); | ||
80 | 81 | ||
81 | static inline void synchronize_rcu(void) | 82 | static inline int rcu_needs_cpu(int cpu) |
82 | { | 83 | { |
83 | synchronize_sched(); | 84 | return rcu_preempt_needs_cpu(); |
84 | } | 85 | } |
85 | 86 | ||
86 | static inline void synchronize_rcu_bh(void) | 87 | #endif /* #else #ifdef CONFIG_TINY_RCU */ |
88 | |||
89 | static inline void rcu_note_context_switch(int cpu) | ||
87 | { | 90 | { |
88 | synchronize_sched(); | 91 | rcu_sched_qs(cpu); |
92 | rcu_preempt_note_context_switch(); | ||
89 | } | 93 | } |
90 | 94 | ||
91 | static inline void synchronize_rcu_expedited(void) | 95 | /* |
96 | * Return the number of grace periods. | ||
97 | */ | ||
98 | static inline long rcu_batches_completed(void) | ||
92 | { | 99 | { |
93 | synchronize_sched(); | 100 | return 0; |
94 | } | 101 | } |
95 | 102 | ||
96 | static inline void synchronize_rcu_bh_expedited(void) | 103 | /* |
104 | * Return the number of bottom-half grace periods. | ||
105 | */ | ||
106 | static inline long rcu_batches_completed_bh(void) | ||
97 | { | 107 | { |
98 | synchronize_sched(); | 108 | return 0; |
99 | } | 109 | } |
100 | 110 | ||
101 | struct notifier_block; | 111 | static inline void rcu_force_quiescent_state(void) |
102 | |||
103 | #ifdef CONFIG_NO_HZ | ||
104 | |||
105 | extern void rcu_enter_nohz(void); | ||
106 | extern void rcu_exit_nohz(void); | ||
107 | |||
108 | #else /* #ifdef CONFIG_NO_HZ */ | ||
109 | |||
110 | static inline void rcu_enter_nohz(void) | ||
111 | { | 112 | { |
112 | } | 113 | } |
113 | 114 | ||
114 | static inline void rcu_exit_nohz(void) | 115 | static inline void rcu_bh_force_quiescent_state(void) |
115 | { | 116 | { |
116 | } | 117 | } |
117 | 118 | ||
118 | #endif /* #else #ifdef CONFIG_NO_HZ */ | 119 | static inline void rcu_sched_force_quiescent_state(void) |
119 | |||
120 | static inline void exit_rcu(void) | ||
121 | { | 120 | { |
122 | } | 121 | } |
123 | 122 | ||
124 | static inline int rcu_preempt_depth(void) | 123 | static inline void rcu_cpu_stall_reset(void) |
125 | { | 124 | { |
126 | return 0; | ||
127 | } | 125 | } |
128 | 126 | ||
129 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 127 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c0ed1c056f29..95518e628794 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h | |||
@@ -30,64 +30,23 @@ | |||
30 | #ifndef __LINUX_RCUTREE_H | 30 | #ifndef __LINUX_RCUTREE_H |
31 | #define __LINUX_RCUTREE_H | 31 | #define __LINUX_RCUTREE_H |
32 | 32 | ||
33 | struct notifier_block; | ||
34 | |||
35 | extern void rcu_sched_qs(int cpu); | ||
36 | extern void rcu_bh_qs(int cpu); | ||
37 | extern void rcu_note_context_switch(int cpu); | 33 | extern void rcu_note_context_switch(int cpu); |
38 | extern int rcu_needs_cpu(int cpu); | 34 | extern int rcu_needs_cpu(int cpu); |
35 | extern void rcu_cpu_stall_reset(void); | ||
39 | 36 | ||
40 | #ifdef CONFIG_TREE_PREEMPT_RCU | 37 | #ifdef CONFIG_TREE_PREEMPT_RCU |
41 | 38 | ||
42 | extern void __rcu_read_lock(void); | ||
43 | extern void __rcu_read_unlock(void); | ||
44 | extern void synchronize_rcu(void); | ||
45 | extern void exit_rcu(void); | 39 | extern void exit_rcu(void); |
46 | 40 | ||
47 | /* | ||
48 | * Defined as macro as it is a very low level header | ||
49 | * included from areas that don't even know about current | ||
50 | */ | ||
51 | #define rcu_preempt_depth() (current->rcu_read_lock_nesting) | ||
52 | |||
53 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 41 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
54 | 42 | ||
55 | static inline void __rcu_read_lock(void) | ||
56 | { | ||
57 | preempt_disable(); | ||
58 | } | ||
59 | |||
60 | static inline void __rcu_read_unlock(void) | ||
61 | { | ||
62 | preempt_enable(); | ||
63 | } | ||
64 | |||
65 | #define synchronize_rcu synchronize_sched | ||
66 | |||
67 | static inline void exit_rcu(void) | 43 | static inline void exit_rcu(void) |
68 | { | 44 | { |
69 | } | 45 | } |
70 | 46 | ||
71 | static inline int rcu_preempt_depth(void) | ||
72 | { | ||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 47 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ |
77 | 48 | ||
78 | static inline void __rcu_read_lock_bh(void) | ||
79 | { | ||
80 | local_bh_disable(); | ||
81 | } | ||
82 | static inline void __rcu_read_unlock_bh(void) | ||
83 | { | ||
84 | local_bh_enable(); | ||
85 | } | ||
86 | |||
87 | extern void call_rcu_sched(struct rcu_head *head, | ||
88 | void (*func)(struct rcu_head *rcu)); | ||
89 | extern void synchronize_rcu_bh(void); | 49 | extern void synchronize_rcu_bh(void); |
90 | extern void synchronize_sched(void); | ||
91 | extern void synchronize_rcu_expedited(void); | 50 | extern void synchronize_rcu_expedited(void); |
92 | 51 | ||
93 | static inline void synchronize_rcu_bh_expedited(void) | 52 | static inline void synchronize_rcu_bh_expedited(void) |
@@ -95,7 +54,7 @@ static inline void synchronize_rcu_bh_expedited(void) | |||
95 | synchronize_sched_expedited(); | 54 | synchronize_sched_expedited(); |
96 | } | 55 | } |
97 | 56 | ||
98 | extern void rcu_check_callbacks(int cpu, int user); | 57 | extern void rcu_barrier(void); |
99 | 58 | ||
100 | extern long rcu_batches_completed(void); | 59 | extern long rcu_batches_completed(void); |
101 | extern long rcu_batches_completed_bh(void); | 60 | extern long rcu_batches_completed_bh(void); |
@@ -104,18 +63,6 @@ extern void rcu_force_quiescent_state(void); | |||
104 | extern void rcu_bh_force_quiescent_state(void); | 63 | extern void rcu_bh_force_quiescent_state(void); |
105 | extern void rcu_sched_force_quiescent_state(void); | 64 | extern void rcu_sched_force_quiescent_state(void); |
106 | 65 | ||
107 | #ifdef CONFIG_NO_HZ | ||
108 | void rcu_enter_nohz(void); | ||
109 | void rcu_exit_nohz(void); | ||
110 | #else /* CONFIG_NO_HZ */ | ||
111 | static inline void rcu_enter_nohz(void) | ||
112 | { | ||
113 | } | ||
114 | static inline void rcu_exit_nohz(void) | ||
115 | { | ||
116 | } | ||
117 | #endif /* CONFIG_NO_HZ */ | ||
118 | |||
119 | /* A context switch is a grace period for RCU-sched and RCU-bh. */ | 66 | /* A context switch is a grace period for RCU-sched and RCU-bh. */ |
120 | static inline int rcu_blocking_is_gp(void) | 67 | static inline int rcu_blocking_is_gp(void) |
121 | { | 68 | { |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 2cca9a92f5e5..0383601a927c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1161,6 +1161,13 @@ struct sched_rt_entity { | |||
1161 | 1161 | ||
1162 | struct rcu_node; | 1162 | struct rcu_node; |
1163 | 1163 | ||
1164 | enum perf_event_task_context { | ||
1165 | perf_invalid_context = -1, | ||
1166 | perf_hw_context = 0, | ||
1167 | perf_sw_context, | ||
1168 | perf_nr_task_contexts, | ||
1169 | }; | ||
1170 | |||
1164 | struct task_struct { | 1171 | struct task_struct { |
1165 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | 1172 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
1166 | void *stack; | 1173 | void *stack; |
@@ -1203,11 +1210,13 @@ struct task_struct { | |||
1203 | unsigned int policy; | 1210 | unsigned int policy; |
1204 | cpumask_t cpus_allowed; | 1211 | cpumask_t cpus_allowed; |
1205 | 1212 | ||
1206 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1213 | #ifdef CONFIG_PREEMPT_RCU |
1207 | int rcu_read_lock_nesting; | 1214 | int rcu_read_lock_nesting; |
1208 | char rcu_read_unlock_special; | 1215 | char rcu_read_unlock_special; |
1209 | struct rcu_node *rcu_blocked_node; | ||
1210 | struct list_head rcu_node_entry; | 1216 | struct list_head rcu_node_entry; |
1217 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | ||
1218 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
1219 | struct rcu_node *rcu_blocked_node; | ||
1211 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 1220 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
1212 | 1221 | ||
1213 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 1222 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
@@ -1289,9 +1298,9 @@ struct task_struct { | |||
1289 | struct list_head cpu_timers[3]; | 1298 | struct list_head cpu_timers[3]; |
1290 | 1299 | ||
1291 | /* process credentials */ | 1300 | /* process credentials */ |
1292 | const struct cred *real_cred; /* objective and real subjective task | 1301 | const struct cred __rcu *real_cred; /* objective and real subjective task |
1293 | * credentials (COW) */ | 1302 | * credentials (COW) */ |
1294 | const struct cred *cred; /* effective (overridable) subjective task | 1303 | const struct cred __rcu *cred; /* effective (overridable) subjective task |
1295 | * credentials (COW) */ | 1304 | * credentials (COW) */ |
1296 | struct mutex cred_guard_mutex; /* guard against foreign influences on | 1305 | struct mutex cred_guard_mutex; /* guard against foreign influences on |
1297 | * credential calculations | 1306 | * credential calculations |
@@ -1419,7 +1428,7 @@ struct task_struct { | |||
1419 | #endif | 1428 | #endif |
1420 | #ifdef CONFIG_CGROUPS | 1429 | #ifdef CONFIG_CGROUPS |
1421 | /* Control Group info protected by css_set_lock */ | 1430 | /* Control Group info protected by css_set_lock */ |
1422 | struct css_set *cgroups; | 1431 | struct css_set __rcu *cgroups; |
1423 | /* cg_list protected by css_set_lock and tsk->alloc_lock */ | 1432 | /* cg_list protected by css_set_lock and tsk->alloc_lock */ |
1424 | struct list_head cg_list; | 1433 | struct list_head cg_list; |
1425 | #endif | 1434 | #endif |
@@ -1432,7 +1441,7 @@ struct task_struct { | |||
1432 | struct futex_pi_state *pi_state_cache; | 1441 | struct futex_pi_state *pi_state_cache; |
1433 | #endif | 1442 | #endif |
1434 | #ifdef CONFIG_PERF_EVENTS | 1443 | #ifdef CONFIG_PERF_EVENTS |
1435 | struct perf_event_context *perf_event_ctxp; | 1444 | struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; |
1436 | struct mutex perf_event_mutex; | 1445 | struct mutex perf_event_mutex; |
1437 | struct list_head perf_event_list; | 1446 | struct list_head perf_event_list; |
1438 | #endif | 1447 | #endif |
@@ -1740,7 +1749,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * | |||
1740 | #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) | 1749 | #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) |
1741 | #define used_math() tsk_used_math(current) | 1750 | #define used_math() tsk_used_math(current) |
1742 | 1751 | ||
1743 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1752 | #ifdef CONFIG_PREEMPT_RCU |
1744 | 1753 | ||
1745 | #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ | 1754 | #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ |
1746 | #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ | 1755 | #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ |
@@ -1749,7 +1758,9 @@ static inline void rcu_copy_process(struct task_struct *p) | |||
1749 | { | 1758 | { |
1750 | p->rcu_read_lock_nesting = 0; | 1759 | p->rcu_read_lock_nesting = 0; |
1751 | p->rcu_read_unlock_special = 0; | 1760 | p->rcu_read_unlock_special = 0; |
1761 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
1752 | p->rcu_blocked_node = NULL; | 1762 | p->rcu_blocked_node = NULL; |
1763 | #endif | ||
1753 | INIT_LIST_HEAD(&p->rcu_node_entry); | 1764 | INIT_LIST_HEAD(&p->rcu_node_entry); |
1754 | } | 1765 | } |
1755 | 1766 | ||
diff --git a/include/linux/security.h b/include/linux/security.h index a22219afff09..b8246a8df7d2 100644 --- a/include/linux/security.h +++ b/include/linux/security.h | |||
@@ -74,7 +74,7 @@ extern int cap_file_mmap(struct file *file, unsigned long reqprot, | |||
74 | extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); | 74 | extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); |
75 | extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, | 75 | extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, |
76 | unsigned long arg4, unsigned long arg5); | 76 | unsigned long arg4, unsigned long arg5); |
77 | extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); | 77 | extern int cap_task_setscheduler(struct task_struct *p); |
78 | extern int cap_task_setioprio(struct task_struct *p, int ioprio); | 78 | extern int cap_task_setioprio(struct task_struct *p, int ioprio); |
79 | extern int cap_task_setnice(struct task_struct *p, int nice); | 79 | extern int cap_task_setnice(struct task_struct *p, int nice); |
80 | extern int cap_syslog(int type, bool from_file); | 80 | extern int cap_syslog(int type, bool from_file); |
@@ -959,6 +959,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) | |||
959 | * Sets the new child socket's sid to the openreq sid. | 959 | * Sets the new child socket's sid to the openreq sid. |
960 | * @inet_conn_established: | 960 | * @inet_conn_established: |
961 | * Sets the connection's peersid to the secmark on skb. | 961 | * Sets the connection's peersid to the secmark on skb. |
962 | * @secmark_relabel_packet: | ||
963 | * check if the process should be allowed to relabel packets to the given secid | ||
964 | * @security_secmark_refcount_inc | ||
965 | * tells the LSM to increment the number of secmark labeling rules loaded | ||
966 | * @security_secmark_refcount_dec | ||
967 | * tells the LSM to decrement the number of secmark labeling rules loaded | ||
962 | * @req_classify_flow: | 968 | * @req_classify_flow: |
963 | * Sets the flow's sid to the openreq sid. | 969 | * Sets the flow's sid to the openreq sid. |
964 | * @tun_dev_create: | 970 | * @tun_dev_create: |
@@ -1279,9 +1285,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) | |||
1279 | * Return 0 if permission is granted. | 1285 | * Return 0 if permission is granted. |
1280 | * | 1286 | * |
1281 | * @secid_to_secctx: | 1287 | * @secid_to_secctx: |
1282 | * Convert secid to security context. | 1288 | * Convert secid to security context. If secdata is NULL the length of |
1289 | * the result will be returned in seclen, but no secdata will be returned. | ||
1290 | * This does mean that the length could change between calls to check the | ||
1291 | * length and the next call which actually allocates and returns the secdata. | ||
1283 | * @secid contains the security ID. | 1292 | * @secid contains the security ID. |
1284 | * @secdata contains the pointer that stores the converted security context. | 1293 | * @secdata contains the pointer that stores the converted security context. |
1294 | * @seclen pointer which contains the length of the data | ||
1285 | * @secctx_to_secid: | 1295 | * @secctx_to_secid: |
1286 | * Convert security context to secid. | 1296 | * Convert security context to secid. |
1287 | * @secid contains the pointer to the generated security ID. | 1297 | * @secid contains the pointer to the generated security ID. |
@@ -1501,8 +1511,7 @@ struct security_operations { | |||
1501 | int (*task_getioprio) (struct task_struct *p); | 1511 | int (*task_getioprio) (struct task_struct *p); |
1502 | int (*task_setrlimit) (struct task_struct *p, unsigned int resource, | 1512 | int (*task_setrlimit) (struct task_struct *p, unsigned int resource, |
1503 | struct rlimit *new_rlim); | 1513 | struct rlimit *new_rlim); |
1504 | int (*task_setscheduler) (struct task_struct *p, int policy, | 1514 | int (*task_setscheduler) (struct task_struct *p); |
1505 | struct sched_param *lp); | ||
1506 | int (*task_getscheduler) (struct task_struct *p); | 1515 | int (*task_getscheduler) (struct task_struct *p); |
1507 | int (*task_movememory) (struct task_struct *p); | 1516 | int (*task_movememory) (struct task_struct *p); |
1508 | int (*task_kill) (struct task_struct *p, | 1517 | int (*task_kill) (struct task_struct *p, |
@@ -1594,6 +1603,9 @@ struct security_operations { | |||
1594 | struct request_sock *req); | 1603 | struct request_sock *req); |
1595 | void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); | 1604 | void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); |
1596 | void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); | 1605 | void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); |
1606 | int (*secmark_relabel_packet) (u32 secid); | ||
1607 | void (*secmark_refcount_inc) (void); | ||
1608 | void (*secmark_refcount_dec) (void); | ||
1597 | void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); | 1609 | void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); |
1598 | int (*tun_dev_create)(void); | 1610 | int (*tun_dev_create)(void); |
1599 | void (*tun_dev_post_create)(struct sock *sk); | 1611 | void (*tun_dev_post_create)(struct sock *sk); |
@@ -1752,8 +1764,7 @@ int security_task_setioprio(struct task_struct *p, int ioprio); | |||
1752 | int security_task_getioprio(struct task_struct *p); | 1764 | int security_task_getioprio(struct task_struct *p); |
1753 | int security_task_setrlimit(struct task_struct *p, unsigned int resource, | 1765 | int security_task_setrlimit(struct task_struct *p, unsigned int resource, |
1754 | struct rlimit *new_rlim); | 1766 | struct rlimit *new_rlim); |
1755 | int security_task_setscheduler(struct task_struct *p, | 1767 | int security_task_setscheduler(struct task_struct *p); |
1756 | int policy, struct sched_param *lp); | ||
1757 | int security_task_getscheduler(struct task_struct *p); | 1768 | int security_task_getscheduler(struct task_struct *p); |
1758 | int security_task_movememory(struct task_struct *p); | 1769 | int security_task_movememory(struct task_struct *p); |
1759 | int security_task_kill(struct task_struct *p, struct siginfo *info, | 1770 | int security_task_kill(struct task_struct *p, struct siginfo *info, |
@@ -2320,11 +2331,9 @@ static inline int security_task_setrlimit(struct task_struct *p, | |||
2320 | return 0; | 2331 | return 0; |
2321 | } | 2332 | } |
2322 | 2333 | ||
2323 | static inline int security_task_setscheduler(struct task_struct *p, | 2334 | static inline int security_task_setscheduler(struct task_struct *p) |
2324 | int policy, | ||
2325 | struct sched_param *lp) | ||
2326 | { | 2335 | { |
2327 | return cap_task_setscheduler(p, policy, lp); | 2336 | return cap_task_setscheduler(p); |
2328 | } | 2337 | } |
2329 | 2338 | ||
2330 | static inline int security_task_getscheduler(struct task_struct *p) | 2339 | static inline int security_task_getscheduler(struct task_struct *p) |
@@ -2551,6 +2560,9 @@ void security_inet_csk_clone(struct sock *newsk, | |||
2551 | const struct request_sock *req); | 2560 | const struct request_sock *req); |
2552 | void security_inet_conn_established(struct sock *sk, | 2561 | void security_inet_conn_established(struct sock *sk, |
2553 | struct sk_buff *skb); | 2562 | struct sk_buff *skb); |
2563 | int security_secmark_relabel_packet(u32 secid); | ||
2564 | void security_secmark_refcount_inc(void); | ||
2565 | void security_secmark_refcount_dec(void); | ||
2554 | int security_tun_dev_create(void); | 2566 | int security_tun_dev_create(void); |
2555 | void security_tun_dev_post_create(struct sock *sk); | 2567 | void security_tun_dev_post_create(struct sock *sk); |
2556 | int security_tun_dev_attach(struct sock *sk); | 2568 | int security_tun_dev_attach(struct sock *sk); |
@@ -2705,6 +2717,19 @@ static inline void security_inet_conn_established(struct sock *sk, | |||
2705 | { | 2717 | { |
2706 | } | 2718 | } |
2707 | 2719 | ||
2720 | static inline int security_secmark_relabel_packet(u32 secid) | ||
2721 | { | ||
2722 | return 0; | ||
2723 | } | ||
2724 | |||
2725 | static inline void security_secmark_refcount_inc(void) | ||
2726 | { | ||
2727 | } | ||
2728 | |||
2729 | static inline void security_secmark_refcount_dec(void) | ||
2730 | { | ||
2731 | } | ||
2732 | |||
2708 | static inline int security_tun_dev_create(void) | 2733 | static inline int security_tun_dev_create(void) |
2709 | { | 2734 | { |
2710 | return 0; | 2735 | return 0; |
diff --git a/include/linux/selinux.h b/include/linux/selinux.h index 82e0f26a1299..44f459612690 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h | |||
@@ -21,74 +21,11 @@ struct kern_ipc_perm; | |||
21 | #ifdef CONFIG_SECURITY_SELINUX | 21 | #ifdef CONFIG_SECURITY_SELINUX |
22 | 22 | ||
23 | /** | 23 | /** |
24 | * selinux_string_to_sid - map a security context string to a security ID | ||
25 | * @str: the security context string to be mapped | ||
26 | * @sid: ID value returned via this. | ||
27 | * | ||
28 | * Returns 0 if successful, with the SID stored in sid. A value | ||
29 | * of zero for sid indicates no SID could be determined (but no error | ||
30 | * occurred). | ||
31 | */ | ||
32 | int selinux_string_to_sid(char *str, u32 *sid); | ||
33 | |||
34 | /** | ||
35 | * selinux_secmark_relabel_packet_permission - secmark permission check | ||
36 | * @sid: SECMARK ID value to be applied to network packet | ||
37 | * | ||
38 | * Returns 0 if the current task is allowed to set the SECMARK label of | ||
39 | * packets with the supplied security ID. Note that it is implicit that | ||
40 | * the packet is always being relabeled from the default unlabeled value, | ||
41 | * and that the access control decision is made in the AVC. | ||
42 | */ | ||
43 | int selinux_secmark_relabel_packet_permission(u32 sid); | ||
44 | |||
45 | /** | ||
46 | * selinux_secmark_refcount_inc - increments the secmark use counter | ||
47 | * | ||
48 | * SELinux keeps track of the current SECMARK targets in use so it knows | ||
49 | * when to apply SECMARK label access checks to network packets. This | ||
50 | * function incements this reference count to indicate that a new SECMARK | ||
51 | * target has been configured. | ||
52 | */ | ||
53 | void selinux_secmark_refcount_inc(void); | ||
54 | |||
55 | /** | ||
56 | * selinux_secmark_refcount_dec - decrements the secmark use counter | ||
57 | * | ||
58 | * SELinux keeps track of the current SECMARK targets in use so it knows | ||
59 | * when to apply SECMARK label access checks to network packets. This | ||
60 | * function decements this reference count to indicate that one of the | ||
61 | * existing SECMARK targets has been removed/flushed. | ||
62 | */ | ||
63 | void selinux_secmark_refcount_dec(void); | ||
64 | |||
65 | /** | ||
66 | * selinux_is_enabled - is SELinux enabled? | 24 | * selinux_is_enabled - is SELinux enabled? |
67 | */ | 25 | */ |
68 | bool selinux_is_enabled(void); | 26 | bool selinux_is_enabled(void); |
69 | #else | 27 | #else |
70 | 28 | ||
71 | static inline int selinux_string_to_sid(const char *str, u32 *sid) | ||
72 | { | ||
73 | *sid = 0; | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | static inline int selinux_secmark_relabel_packet_permission(u32 sid) | ||
78 | { | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | static inline void selinux_secmark_refcount_inc(void) | ||
83 | { | ||
84 | return; | ||
85 | } | ||
86 | |||
87 | static inline void selinux_secmark_refcount_dec(void) | ||
88 | { | ||
89 | return; | ||
90 | } | ||
91 | |||
92 | static inline bool selinux_is_enabled(void) | 29 | static inline bool selinux_is_enabled(void) |
93 | { | 30 | { |
94 | return false; | 31 | return false; |
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4d5d2f546dbf..58971e891f48 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h | |||
@@ -108,19 +108,43 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) | |||
108 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 108 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
109 | 109 | ||
110 | /** | 110 | /** |
111 | * srcu_dereference - fetch SRCU-protected pointer with checking | 111 | * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing |
112 | * @p: the pointer to fetch and protect for later dereferencing | ||
113 | * @sp: pointer to the srcu_struct, which is used to check that we | ||
114 | * really are in an SRCU read-side critical section. | ||
115 | * @c: condition to check for update-side use | ||
112 | * | 116 | * |
113 | * Makes rcu_dereference_check() do the dirty work. | 117 | * If PROVE_RCU is enabled, invoking this outside of an RCU read-side |
118 | * critical section will result in an RCU-lockdep splat, unless @c evaluates | ||
119 | * to 1. The @c argument will normally be a logical expression containing | ||
120 | * lockdep_is_held() calls. | ||
114 | */ | 121 | */ |
115 | #define srcu_dereference(p, sp) \ | 122 | #define srcu_dereference_check(p, sp, c) \ |
116 | rcu_dereference_check(p, srcu_read_lock_held(sp)) | 123 | __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) |
124 | |||
125 | /** | ||
126 | * srcu_dereference - fetch SRCU-protected pointer for later dereferencing | ||
127 | * @p: the pointer to fetch and protect for later dereferencing | ||
128 | * @sp: pointer to the srcu_struct, which is used to check that we | ||
129 | * really are in an SRCU read-side critical section. | ||
130 | * | ||
131 | * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU | ||
132 | * is enabled, invoking this outside of an RCU read-side critical | ||
133 | * section will result in an RCU-lockdep splat. | ||
134 | */ | ||
135 | #define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) | ||
117 | 136 | ||
118 | /** | 137 | /** |
119 | * srcu_read_lock - register a new reader for an SRCU-protected structure. | 138 | * srcu_read_lock - register a new reader for an SRCU-protected structure. |
120 | * @sp: srcu_struct in which to register the new reader. | 139 | * @sp: srcu_struct in which to register the new reader. |
121 | * | 140 | * |
122 | * Enter an SRCU read-side critical section. Note that SRCU read-side | 141 | * Enter an SRCU read-side critical section. Note that SRCU read-side |
123 | * critical sections may be nested. | 142 | * critical sections may be nested. However, it is illegal to |
143 | * call anything that waits on an SRCU grace period for the same | ||
144 | * srcu_struct, whether directly or indirectly. Please note that | ||
145 | * one way to indirectly wait on an SRCU grace period is to acquire | ||
146 | * a mutex that is held elsewhere while calling synchronize_srcu() or | ||
147 | * synchronize_srcu_expedited(). | ||
124 | */ | 148 | */ |
125 | static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) | 149 | static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) |
126 | { | 150 | { |
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 6b524a0d02e4..1808960c5059 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h | |||
@@ -126,8 +126,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); | |||
126 | 126 | ||
127 | #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ | 127 | #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ |
128 | 128 | ||
129 | static inline int stop_machine(int (*fn)(void *), void *data, | 129 | static inline int __stop_machine(int (*fn)(void *), void *data, |
130 | const struct cpumask *cpus) | 130 | const struct cpumask *cpus) |
131 | { | 131 | { |
132 | int ret; | 132 | int ret; |
133 | local_irq_disable(); | 133 | local_irq_disable(); |
@@ -136,5 +136,11 @@ static inline int stop_machine(int (*fn)(void *), void *data, | |||
136 | return ret; | 136 | return ret; |
137 | } | 137 | } |
138 | 138 | ||
139 | static inline int stop_machine(int (*fn)(void *), void *data, | ||
140 | const struct cpumask *cpus) | ||
141 | { | ||
142 | return __stop_machine(fn, data, cpus); | ||
143 | } | ||
144 | |||
139 | #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ | 145 | #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ |
140 | #endif /* _LINUX_STOP_MACHINE */ | 146 | #endif /* _LINUX_STOP_MACHINE */ |
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 671538d25bc1..8eee9dbbfe7a 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h | |||
@@ -69,7 +69,7 @@ struct gss_cl_ctx { | |||
69 | enum rpc_gss_proc gc_proc; | 69 | enum rpc_gss_proc gc_proc; |
70 | u32 gc_seq; | 70 | u32 gc_seq; |
71 | spinlock_t gc_seq_lock; | 71 | spinlock_t gc_seq_lock; |
72 | struct gss_ctx *gc_gss_ctx; | 72 | struct gss_ctx __rcu *gc_gss_ctx; |
73 | struct xdr_netobj gc_wire_ctx; | 73 | struct xdr_netobj gc_wire_ctx; |
74 | u32 gc_win; | 74 | u32 gc_win; |
75 | unsigned long gc_expiry; | 75 | unsigned long gc_expiry; |
@@ -80,7 +80,7 @@ struct gss_upcall_msg; | |||
80 | struct gss_cred { | 80 | struct gss_cred { |
81 | struct rpc_cred gc_base; | 81 | struct rpc_cred gc_base; |
82 | enum rpc_gss_svc gc_service; | 82 | enum rpc_gss_svc gc_service; |
83 | struct gss_cl_ctx *gc_ctx; | 83 | struct gss_cl_ctx __rcu *gc_ctx; |
84 | struct gss_upcall_msg *gc_upcall; | 84 | struct gss_upcall_msg *gc_upcall; |
85 | unsigned long gc_upcall_timestamp; | 85 | unsigned long gc_upcall_timestamp; |
86 | unsigned char gc_machine_cred : 1; | 86 | unsigned char gc_machine_cred : 1; |
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 103d1b61aacb..a4a90b6726ce 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/errno.h> | 17 | #include <linux/errno.h> |
18 | #include <linux/types.h> | 18 | #include <linux/types.h> |
19 | #include <linux/rcupdate.h> | 19 | #include <linux/rcupdate.h> |
20 | #include <linux/jump_label.h> | ||
20 | 21 | ||
21 | struct module; | 22 | struct module; |
22 | struct tracepoint; | 23 | struct tracepoint; |
@@ -145,7 +146,9 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, | |||
145 | extern struct tracepoint __tracepoint_##name; \ | 146 | extern struct tracepoint __tracepoint_##name; \ |
146 | static inline void trace_##name(proto) \ | 147 | static inline void trace_##name(proto) \ |
147 | { \ | 148 | { \ |
148 | if (unlikely(__tracepoint_##name.state)) \ | 149 | JUMP_LABEL(&__tracepoint_##name.state, do_trace); \ |
150 | return; \ | ||
151 | do_trace: \ | ||
149 | __DO_TRACE(&__tracepoint_##name, \ | 152 | __DO_TRACE(&__tracepoint_##name, \ |
150 | TP_PROTO(data_proto), \ | 153 | TP_PROTO(data_proto), \ |
151 | TP_ARGS(data_args)); \ | 154 | TP_ARGS(data_args)); \ |
diff --git a/include/linux/types.h b/include/linux/types.h index 01a082f56ef4..357dbc19606f 100644 --- a/include/linux/types.h +++ b/include/linux/types.h | |||
@@ -121,7 +121,15 @@ typedef __u64 u_int64_t; | |||
121 | typedef __s64 int64_t; | 121 | typedef __s64 int64_t; |
122 | #endif | 122 | #endif |
123 | 123 | ||
124 | /* this is a special 64bit data type that is 8-byte aligned */ | 124 | /* |
125 | * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid | ||
126 | * common 32/64-bit compat problems. | ||
127 | * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other | ||
128 | * architectures) and to 8-byte boundaries on 64-bit architetures. The new | ||
129 | * aligned_64 type enforces 8-byte alignment so that structs containing | ||
130 | * aligned_64 values have the same alignment on 32-bit and 64-bit architectures. | ||
131 | * No conversions are necessary between 32-bit user-space and a 64-bit kernel. | ||
132 | */ | ||
125 | #define aligned_u64 __u64 __attribute__((aligned(8))) | 133 | #define aligned_u64 __u64 __attribute__((aligned(8))) |
126 | #define aligned_be64 __be64 __attribute__((aligned(8))) | 134 | #define aligned_be64 __be64 __attribute__((aligned(8))) |
127 | #define aligned_le64 __le64 __attribute__((aligned(8))) | 135 | #define aligned_le64 __le64 __attribute__((aligned(8))) |
@@ -178,6 +186,11 @@ typedef __u64 __bitwise __be64; | |||
178 | typedef __u16 __bitwise __sum16; | 186 | typedef __u16 __bitwise __sum16; |
179 | typedef __u32 __bitwise __wsum; | 187 | typedef __u32 __bitwise __wsum; |
180 | 188 | ||
189 | /* this is a special 64bit data type that is 8-byte aligned */ | ||
190 | #define __aligned_u64 __u64 __attribute__((aligned(8))) | ||
191 | #define __aligned_be64 __be64 __attribute__((aligned(8))) | ||
192 | #define __aligned_le64 __le64 __attribute__((aligned(8))) | ||
193 | |||
181 | #ifdef __KERNEL__ | 194 | #ifdef __KERNEL__ |
182 | typedef unsigned __bitwise__ gfp_t; | 195 | typedef unsigned __bitwise__ gfp_t; |
183 | typedef unsigned __bitwise__ fmode_t; | 196 | typedef unsigned __bitwise__ fmode_t; |
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index ef6c24a529e1..a4dc5b027bd9 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h | |||
@@ -51,7 +51,8 @@ static inline u32 task_cls_classid(struct task_struct *p) | |||
51 | return 0; | 51 | return 0; |
52 | 52 | ||
53 | rcu_read_lock(); | 53 | rcu_read_lock(); |
54 | id = rcu_dereference(net_cls_subsys_id); | 54 | id = rcu_dereference_index_check(net_cls_subsys_id, |
55 | rcu_read_lock_held()); | ||
55 | if (id >= 0) | 56 | if (id >= 0) |
56 | classid = container_of(task_subsys_state(p, id), | 57 | classid = container_of(task_subsys_state(p, id), |
57 | struct cgroup_cls_state, css)->classid; | 58 | struct cgroup_cls_state, css)->classid; |
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e624dae54fa4..caf17db87dbc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h | |||
@@ -75,7 +75,7 @@ struct nf_conntrack_helper; | |||
75 | /* nf_conn feature for connections that have a helper */ | 75 | /* nf_conn feature for connections that have a helper */ |
76 | struct nf_conn_help { | 76 | struct nf_conn_help { |
77 | /* Helper. if any */ | 77 | /* Helper. if any */ |
78 | struct nf_conntrack_helper *helper; | 78 | struct nf_conntrack_helper __rcu *helper; |
79 | 79 | ||
80 | union nf_conntrack_help help; | 80 | union nf_conntrack_help help; |
81 | 81 | ||
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 0e4cfb694fe7..6fa7cbab7d93 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h | |||
@@ -5,7 +5,9 @@ | |||
5 | #define _TRACE_IRQ_H | 5 | #define _TRACE_IRQ_H |
6 | 6 | ||
7 | #include <linux/tracepoint.h> | 7 | #include <linux/tracepoint.h> |
8 | #include <linux/interrupt.h> | 8 | |
9 | struct irqaction; | ||
10 | struct softirq_action; | ||
9 | 11 | ||
10 | #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } | 12 | #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } |
11 | #define show_softirq_name(val) \ | 13 | #define show_softirq_name(val) \ |
@@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq, | |||
93 | ), | 95 | ), |
94 | 96 | ||
95 | TP_fast_assign( | 97 | TP_fast_assign( |
96 | __entry->vec = (int)(h - vec); | 98 | if (vec) |
99 | __entry->vec = (int)(h - vec); | ||
100 | else | ||
101 | __entry->vec = (int)(long)h; | ||
97 | ), | 102 | ), |
98 | 103 | ||
99 | TP_printk("vec=%d [action=%s]", __entry->vec, | 104 | TP_printk("vec=%d [action=%s]", __entry->vec, |
@@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit, | |||
136 | TP_ARGS(h, vec) | 141 | TP_ARGS(h, vec) |
137 | ); | 142 | ); |
138 | 143 | ||
144 | /** | ||
145 | * softirq_raise - called immediately when a softirq is raised | ||
146 | * @h: pointer to struct softirq_action | ||
147 | * @vec: pointer to first struct softirq_action in softirq_vec array | ||
148 | * | ||
149 | * The @h parameter contains a pointer to the softirq vector number which is | ||
150 | * raised. @vec is NULL and it means @h includes vector number not | ||
151 | * softirq_action. When used in combination with the softirq_entry tracepoint | ||
152 | * we can determine the softirq raise latency. | ||
153 | */ | ||
154 | DEFINE_EVENT(softirq, softirq_raise, | ||
155 | |||
156 | TP_PROTO(struct softirq_action *h, struct softirq_action *vec), | ||
157 | |||
158 | TP_ARGS(h, vec) | ||
159 | ); | ||
160 | |||
139 | #endif /* _TRACE_IRQ_H */ | 161 | #endif /* _TRACE_IRQ_H */ |
140 | 162 | ||
141 | /* This part must be outside protection */ | 163 | /* This part must be outside protection */ |
diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 188deca2f3c7..8fe1e93f531d 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h | |||
@@ -6,10 +6,31 @@ | |||
6 | 6 | ||
7 | #include <linux/netdevice.h> | 7 | #include <linux/netdevice.h> |
8 | #include <linux/tracepoint.h> | 8 | #include <linux/tracepoint.h> |
9 | #include <linux/ftrace.h> | ||
10 | |||
11 | #define NO_DEV "(no_device)" | ||
12 | |||
13 | TRACE_EVENT(napi_poll, | ||
9 | 14 | ||
10 | DECLARE_TRACE(napi_poll, | ||
11 | TP_PROTO(struct napi_struct *napi), | 15 | TP_PROTO(struct napi_struct *napi), |
12 | TP_ARGS(napi)); | 16 | |
17 | TP_ARGS(napi), | ||
18 | |||
19 | TP_STRUCT__entry( | ||
20 | __field( struct napi_struct *, napi) | ||
21 | __string( dev_name, napi->dev ? napi->dev->name : NO_DEV) | ||
22 | ), | ||
23 | |||
24 | TP_fast_assign( | ||
25 | __entry->napi = napi; | ||
26 | __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); | ||
27 | ), | ||
28 | |||
29 | TP_printk("napi poll on napi struct %p for device %s", | ||
30 | __entry->napi, __get_str(dev_name)) | ||
31 | ); | ||
32 | |||
33 | #undef NO_DEV | ||
13 | 34 | ||
14 | #endif /* _TRACE_NAPI_H_ */ | 35 | #endif /* _TRACE_NAPI_H_ */ |
15 | 36 | ||
diff --git a/include/trace/events/net.h b/include/trace/events/net.h new file mode 100644 index 000000000000..5f247f5ffc56 --- /dev/null +++ b/include/trace/events/net.h | |||
@@ -0,0 +1,82 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM net | ||
3 | |||
4 | #if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_NET_H | ||
6 | |||
7 | #include <linux/skbuff.h> | ||
8 | #include <linux/netdevice.h> | ||
9 | #include <linux/ip.h> | ||
10 | #include <linux/tracepoint.h> | ||
11 | |||
12 | TRACE_EVENT(net_dev_xmit, | ||
13 | |||
14 | TP_PROTO(struct sk_buff *skb, | ||
15 | int rc), | ||
16 | |||
17 | TP_ARGS(skb, rc), | ||
18 | |||
19 | TP_STRUCT__entry( | ||
20 | __field( void *, skbaddr ) | ||
21 | __field( unsigned int, len ) | ||
22 | __field( int, rc ) | ||
23 | __string( name, skb->dev->name ) | ||
24 | ), | ||
25 | |||
26 | TP_fast_assign( | ||
27 | __entry->skbaddr = skb; | ||
28 | __entry->len = skb->len; | ||
29 | __entry->rc = rc; | ||
30 | __assign_str(name, skb->dev->name); | ||
31 | ), | ||
32 | |||
33 | TP_printk("dev=%s skbaddr=%p len=%u rc=%d", | ||
34 | __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) | ||
35 | ); | ||
36 | |||
37 | DECLARE_EVENT_CLASS(net_dev_template, | ||
38 | |||
39 | TP_PROTO(struct sk_buff *skb), | ||
40 | |||
41 | TP_ARGS(skb), | ||
42 | |||
43 | TP_STRUCT__entry( | ||
44 | __field( void *, skbaddr ) | ||
45 | __field( unsigned int, len ) | ||
46 | __string( name, skb->dev->name ) | ||
47 | ), | ||
48 | |||
49 | TP_fast_assign( | ||
50 | __entry->skbaddr = skb; | ||
51 | __entry->len = skb->len; | ||
52 | __assign_str(name, skb->dev->name); | ||
53 | ), | ||
54 | |||
55 | TP_printk("dev=%s skbaddr=%p len=%u", | ||
56 | __get_str(name), __entry->skbaddr, __entry->len) | ||
57 | ) | ||
58 | |||
59 | DEFINE_EVENT(net_dev_template, net_dev_queue, | ||
60 | |||
61 | TP_PROTO(struct sk_buff *skb), | ||
62 | |||
63 | TP_ARGS(skb) | ||
64 | ); | ||
65 | |||
66 | DEFINE_EVENT(net_dev_template, netif_receive_skb, | ||
67 | |||
68 | TP_PROTO(struct sk_buff *skb), | ||
69 | |||
70 | TP_ARGS(skb) | ||
71 | ); | ||
72 | |||
73 | DEFINE_EVENT(net_dev_template, netif_rx, | ||
74 | |||
75 | TP_PROTO(struct sk_buff *skb), | ||
76 | |||
77 | TP_ARGS(skb) | ||
78 | ); | ||
79 | #endif /* _TRACE_NET_H */ | ||
80 | |||
81 | /* This part must be outside protection */ | ||
82 | #include <trace/define_trace.h> | ||
diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 35a2a6e7bf1e..286784d69b8f 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h | |||
@@ -10,12 +10,17 @@ | |||
10 | #ifndef _TRACE_POWER_ENUM_ | 10 | #ifndef _TRACE_POWER_ENUM_ |
11 | #define _TRACE_POWER_ENUM_ | 11 | #define _TRACE_POWER_ENUM_ |
12 | enum { | 12 | enum { |
13 | POWER_NONE = 0, | 13 | POWER_NONE = 0, |
14 | POWER_CSTATE = 1, | 14 | POWER_CSTATE = 1, /* C-State */ |
15 | POWER_PSTATE = 2, | 15 | POWER_PSTATE = 2, /* Fequency change or DVFS */ |
16 | POWER_SSTATE = 3, /* Suspend */ | ||
16 | }; | 17 | }; |
17 | #endif | 18 | #endif |
18 | 19 | ||
20 | /* | ||
21 | * The power events are used for cpuidle & suspend (power_start, power_end) | ||
22 | * and for cpufreq (power_frequency) | ||
23 | */ | ||
19 | DECLARE_EVENT_CLASS(power, | 24 | DECLARE_EVENT_CLASS(power, |
20 | 25 | ||
21 | TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), | 26 | TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), |
@@ -70,6 +75,85 @@ TRACE_EVENT(power_end, | |||
70 | 75 | ||
71 | ); | 76 | ); |
72 | 77 | ||
78 | /* | ||
79 | * The clock events are used for clock enable/disable and for | ||
80 | * clock rate change | ||
81 | */ | ||
82 | DECLARE_EVENT_CLASS(clock, | ||
83 | |||
84 | TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), | ||
85 | |||
86 | TP_ARGS(name, state, cpu_id), | ||
87 | |||
88 | TP_STRUCT__entry( | ||
89 | __string( name, name ) | ||
90 | __field( u64, state ) | ||
91 | __field( u64, cpu_id ) | ||
92 | ), | ||
93 | |||
94 | TP_fast_assign( | ||
95 | __assign_str(name, name); | ||
96 | __entry->state = state; | ||
97 | __entry->cpu_id = cpu_id; | ||
98 | ), | ||
99 | |||
100 | TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), | ||
101 | (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) | ||
102 | ); | ||
103 | |||
104 | DEFINE_EVENT(clock, clock_enable, | ||
105 | |||
106 | TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), | ||
107 | |||
108 | TP_ARGS(name, state, cpu_id) | ||
109 | ); | ||
110 | |||
111 | DEFINE_EVENT(clock, clock_disable, | ||
112 | |||
113 | TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), | ||
114 | |||
115 | TP_ARGS(name, state, cpu_id) | ||
116 | ); | ||
117 | |||
118 | DEFINE_EVENT(clock, clock_set_rate, | ||
119 | |||
120 | TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), | ||
121 | |||
122 | TP_ARGS(name, state, cpu_id) | ||
123 | ); | ||
124 | |||
125 | /* | ||
126 | * The power domain events are used for power domains transitions | ||
127 | */ | ||
128 | DECLARE_EVENT_CLASS(power_domain, | ||
129 | |||
130 | TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), | ||
131 | |||
132 | TP_ARGS(name, state, cpu_id), | ||
133 | |||
134 | TP_STRUCT__entry( | ||
135 | __string( name, name ) | ||
136 | __field( u64, state ) | ||
137 | __field( u64, cpu_id ) | ||
138 | ), | ||
139 | |||
140 | TP_fast_assign( | ||
141 | __assign_str(name, name); | ||
142 | __entry->state = state; | ||
143 | __entry->cpu_id = cpu_id; | ||
144 | ), | ||
145 | |||
146 | TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), | ||
147 | (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) | ||
148 | ); | ||
149 | |||
150 | DEFINE_EVENT(power_domain, power_domain_target, | ||
151 | |||
152 | TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), | ||
153 | |||
154 | TP_ARGS(name, state, cpu_id) | ||
155 | ); | ||
156 | |||
73 | #endif /* _TRACE_POWER_H */ | 157 | #endif /* _TRACE_POWER_H */ |
74 | 158 | ||
75 | /* This part must be outside protection */ | 159 | /* This part must be outside protection */ |
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 4b2be6dc76f0..75ce9d500d8e 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h | |||
@@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb, | |||
35 | __entry->skbaddr, __entry->protocol, __entry->location) | 35 | __entry->skbaddr, __entry->protocol, __entry->location) |
36 | ); | 36 | ); |
37 | 37 | ||
38 | TRACE_EVENT(consume_skb, | ||
39 | |||
40 | TP_PROTO(struct sk_buff *skb), | ||
41 | |||
42 | TP_ARGS(skb), | ||
43 | |||
44 | TP_STRUCT__entry( | ||
45 | __field( void *, skbaddr ) | ||
46 | ), | ||
47 | |||
48 | TP_fast_assign( | ||
49 | __entry->skbaddr = skb; | ||
50 | ), | ||
51 | |||
52 | TP_printk("skbaddr=%p", __entry->skbaddr) | ||
53 | ); | ||
54 | |||
38 | TRACE_EVENT(skb_copy_datagram_iovec, | 55 | TRACE_EVENT(skb_copy_datagram_iovec, |
39 | 56 | ||
40 | TP_PROTO(const struct sk_buff *skb, int len), | 57 | TP_PROTO(const struct sk_buff *skb, int len), |
diff --git a/init/Kconfig b/init/Kconfig index 2de5b1cbadd9..7b920aafa98a 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -21,6 +21,13 @@ config CONSTRUCTORS | |||
21 | depends on !UML | 21 | depends on !UML |
22 | default y | 22 | default y |
23 | 23 | ||
24 | config HAVE_IRQ_WORK | ||
25 | bool | ||
26 | |||
27 | config IRQ_WORK | ||
28 | bool | ||
29 | depends on HAVE_IRQ_WORK | ||
30 | |||
24 | menu "General setup" | 31 | menu "General setup" |
25 | 32 | ||
26 | config EXPERIMENTAL | 33 | config EXPERIMENTAL |
@@ -340,6 +347,7 @@ choice | |||
340 | 347 | ||
341 | config TREE_RCU | 348 | config TREE_RCU |
342 | bool "Tree-based hierarchical RCU" | 349 | bool "Tree-based hierarchical RCU" |
350 | depends on !PREEMPT && SMP | ||
343 | help | 351 | help |
344 | This option selects the RCU implementation that is | 352 | This option selects the RCU implementation that is |
345 | designed for very large SMP system with hundreds or | 353 | designed for very large SMP system with hundreds or |
@@ -347,7 +355,7 @@ config TREE_RCU | |||
347 | smaller systems. | 355 | smaller systems. |
348 | 356 | ||
349 | config TREE_PREEMPT_RCU | 357 | config TREE_PREEMPT_RCU |
350 | bool "Preemptable tree-based hierarchical RCU" | 358 | bool "Preemptible tree-based hierarchical RCU" |
351 | depends on PREEMPT | 359 | depends on PREEMPT |
352 | help | 360 | help |
353 | This option selects the RCU implementation that is | 361 | This option selects the RCU implementation that is |
@@ -365,8 +373,22 @@ config TINY_RCU | |||
365 | is not required. This option greatly reduces the | 373 | is not required. This option greatly reduces the |
366 | memory footprint of RCU. | 374 | memory footprint of RCU. |
367 | 375 | ||
376 | config TINY_PREEMPT_RCU | ||
377 | bool "Preemptible UP-only small-memory-footprint RCU" | ||
378 | depends on !SMP && PREEMPT | ||
379 | help | ||
380 | This option selects the RCU implementation that is designed | ||
381 | for real-time UP systems. This option greatly reduces the | ||
382 | memory footprint of RCU. | ||
383 | |||
368 | endchoice | 384 | endchoice |
369 | 385 | ||
386 | config PREEMPT_RCU | ||
387 | def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU ) | ||
388 | help | ||
389 | This option enables preemptible-RCU code that is common between | ||
390 | the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. | ||
391 | |||
370 | config RCU_TRACE | 392 | config RCU_TRACE |
371 | bool "Enable tracing for RCU" | 393 | bool "Enable tracing for RCU" |
372 | depends on TREE_RCU || TREE_PREEMPT_RCU | 394 | depends on TREE_RCU || TREE_PREEMPT_RCU |
@@ -387,9 +409,12 @@ config RCU_FANOUT | |||
387 | help | 409 | help |
388 | This option controls the fanout of hierarchical implementations | 410 | This option controls the fanout of hierarchical implementations |
389 | of RCU, allowing RCU to work efficiently on machines with | 411 | of RCU, allowing RCU to work efficiently on machines with |
390 | large numbers of CPUs. This value must be at least the cube | 412 | large numbers of CPUs. This value must be at least the fourth |
391 | root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit | 413 | root of NR_CPUS, which allows NR_CPUS to be insanely large. |
392 | systems and up to 262,144 for 64-bit systems. | 414 | The default value of RCU_FANOUT should be used for production |
415 | systems, but if you are stress-testing the RCU implementation | ||
416 | itself, small RCU_FANOUT values allow you to test large-system | ||
417 | code paths on small(er) systems. | ||
393 | 418 | ||
394 | Select a specific number if testing RCU itself. | 419 | Select a specific number if testing RCU itself. |
395 | Take the default if unsure. | 420 | Take the default if unsure. |
@@ -987,6 +1012,7 @@ config PERF_EVENTS | |||
987 | default y if (PROFILING || PERF_COUNTERS) | 1012 | default y if (PROFILING || PERF_COUNTERS) |
988 | depends on HAVE_PERF_EVENTS | 1013 | depends on HAVE_PERF_EVENTS |
989 | select ANON_INODES | 1014 | select ANON_INODES |
1015 | select IRQ_WORK | ||
990 | help | 1016 | help |
991 | Enable kernel support for various performance events provided | 1017 | Enable kernel support for various performance events provided |
992 | by software and hardware. | 1018 | by software and hardware. |
diff --git a/kernel/Makefile b/kernel/Makefile index 0b72d1a74be0..e2c9d52cfe9e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ | |||
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ | 12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ |
13 | async.o range.o | 13 | async.o range.o jump_label.o |
14 | obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o | 14 | obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o |
15 | obj-y += groups.o | 15 | obj-y += groups.o |
16 | 16 | ||
@@ -23,6 +23,7 @@ CFLAGS_REMOVE_rtmutex-debug.o = -pg | |||
23 | CFLAGS_REMOVE_cgroup-debug.o = -pg | 23 | CFLAGS_REMOVE_cgroup-debug.o = -pg |
24 | CFLAGS_REMOVE_sched_clock.o = -pg | 24 | CFLAGS_REMOVE_sched_clock.o = -pg |
25 | CFLAGS_REMOVE_perf_event.o = -pg | 25 | CFLAGS_REMOVE_perf_event.o = -pg |
26 | CFLAGS_REMOVE_irq_work.o = -pg | ||
26 | endif | 27 | endif |
27 | 28 | ||
28 | obj-$(CONFIG_FREEZER) += freezer.o | 29 | obj-$(CONFIG_FREEZER) += freezer.o |
@@ -86,6 +87,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o | |||
86 | obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o | 87 | obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o |
87 | obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o | 88 | obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o |
88 | obj-$(CONFIG_TINY_RCU) += rcutiny.o | 89 | obj-$(CONFIG_TINY_RCU) += rcutiny.o |
90 | obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o | ||
89 | obj-$(CONFIG_RELAY) += relay.o | 91 | obj-$(CONFIG_RELAY) += relay.o |
90 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o | 92 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
91 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 93 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
@@ -100,6 +102,7 @@ obj-$(CONFIG_TRACING) += trace/ | |||
100 | obj-$(CONFIG_X86_DS) += trace/ | 102 | obj-$(CONFIG_X86_DS) += trace/ |
101 | obj-$(CONFIG_RING_BUFFER) += trace/ | 103 | obj-$(CONFIG_RING_BUFFER) += trace/ |
102 | obj-$(CONFIG_SMP) += sched_cpupri.o | 104 | obj-$(CONFIG_SMP) += sched_cpupri.o |
105 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | ||
103 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 106 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
104 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | 107 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o |
105 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o | 108 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c9483d8f6140..291ba3d04bea 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -138,7 +138,7 @@ struct css_id { | |||
138 | * is called after synchronize_rcu(). But for safe use, css_is_removed() | 138 | * is called after synchronize_rcu(). But for safe use, css_is_removed() |
139 | * css_tryget() should be used for avoiding race. | 139 | * css_tryget() should be used for avoiding race. |
140 | */ | 140 | */ |
141 | struct cgroup_subsys_state *css; | 141 | struct cgroup_subsys_state __rcu *css; |
142 | /* | 142 | /* |
143 | * ID of this css. | 143 | * ID of this css. |
144 | */ | 144 | */ |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b23c0979bbe7..51b143e2a07a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1397,7 +1397,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
1397 | if (tsk->flags & PF_THREAD_BOUND) | 1397 | if (tsk->flags & PF_THREAD_BOUND) |
1398 | return -EINVAL; | 1398 | return -EINVAL; |
1399 | 1399 | ||
1400 | ret = security_task_setscheduler(tsk, 0, NULL); | 1400 | ret = security_task_setscheduler(tsk); |
1401 | if (ret) | 1401 | if (ret) |
1402 | return ret; | 1402 | return ret; |
1403 | if (threadgroup) { | 1403 | if (threadgroup) { |
@@ -1405,7 +1405,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
1405 | 1405 | ||
1406 | rcu_read_lock(); | 1406 | rcu_read_lock(); |
1407 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | 1407 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { |
1408 | ret = security_task_setscheduler(c, 0, NULL); | 1408 | ret = security_task_setscheduler(c); |
1409 | if (ret) { | 1409 | if (ret) { |
1410 | rcu_read_unlock(); | 1410 | rcu_read_unlock(); |
1411 | return ret; | 1411 | return ret; |
diff --git a/kernel/exit.c b/kernel/exit.c index 03120229db28..e2bdf37f9fde 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -149,9 +149,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp) | |||
149 | { | 149 | { |
150 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | 150 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); |
151 | 151 | ||
152 | #ifdef CONFIG_PERF_EVENTS | 152 | perf_event_delayed_put(tsk); |
153 | WARN_ON_ONCE(tsk->perf_event_ctxp); | ||
154 | #endif | ||
155 | trace_sched_process_free(tsk); | 153 | trace_sched_process_free(tsk); |
156 | put_task_struct(tsk); | 154 | put_task_struct(tsk); |
157 | } | 155 | } |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 1decafbb6b1a..72206cf5c6cf 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -931,6 +931,7 @@ static inline int | |||
931 | remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) | 931 | remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) |
932 | { | 932 | { |
933 | if (hrtimer_is_queued(timer)) { | 933 | if (hrtimer_is_queued(timer)) { |
934 | unsigned long state; | ||
934 | int reprogram; | 935 | int reprogram; |
935 | 936 | ||
936 | /* | 937 | /* |
@@ -944,8 +945,13 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) | |||
944 | debug_deactivate(timer); | 945 | debug_deactivate(timer); |
945 | timer_stats_hrtimer_clear_start_info(timer); | 946 | timer_stats_hrtimer_clear_start_info(timer); |
946 | reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); | 947 | reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); |
947 | __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, | 948 | /* |
948 | reprogram); | 949 | * We must preserve the CALLBACK state flag here, |
950 | * otherwise we could move the timer base in | ||
951 | * switch_hrtimer_base. | ||
952 | */ | ||
953 | state = timer->state & HRTIMER_STATE_CALLBACK; | ||
954 | __remove_hrtimer(timer, base, state, reprogram); | ||
949 | return 1; | 955 | return 1; |
950 | } | 956 | } |
951 | return 0; | 957 | return 0; |
@@ -1231,6 +1237,9 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) | |||
1231 | BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); | 1237 | BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); |
1232 | enqueue_hrtimer(timer, base); | 1238 | enqueue_hrtimer(timer, base); |
1233 | } | 1239 | } |
1240 | |||
1241 | WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK)); | ||
1242 | |||
1234 | timer->state &= ~HRTIMER_STATE_CALLBACK; | 1243 | timer->state &= ~HRTIMER_STATE_CALLBACK; |
1235 | } | 1244 | } |
1236 | 1245 | ||
diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 0c642d51aac2..53ead174da2f 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c | |||
@@ -98,7 +98,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) | |||
98 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | 98 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" |
99 | " disables this message.\n"); | 99 | " disables this message.\n"); |
100 | sched_show_task(t); | 100 | sched_show_task(t); |
101 | __debug_show_held_locks(t); | 101 | debug_show_held_locks(t); |
102 | 102 | ||
103 | touch_nmi_watchdog(); | 103 | touch_nmi_watchdog(); |
104 | 104 | ||
@@ -111,7 +111,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) | |||
111 | * periodically exit the critical section and enter a new one. | 111 | * periodically exit the critical section and enter a new one. |
112 | * | 112 | * |
113 | * For preemptible RCU it is sufficient to call rcu_read_unlock in order | 113 | * For preemptible RCU it is sufficient to call rcu_read_unlock in order |
114 | * exit the grace period. For classic RCU, a reschedule is required. | 114 | * to exit the grace period. For classic RCU, a reschedule is required. |
115 | */ | 115 | */ |
116 | static void rcu_lock_break(struct task_struct *g, struct task_struct *t) | 116 | static void rcu_lock_break(struct task_struct *g, struct task_struct *t) |
117 | { | 117 | { |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c7c2aed9e2dc..2c9120f0afca 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -113,12 +113,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) | |||
113 | */ | 113 | */ |
114 | static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type) | 114 | static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type) |
115 | { | 115 | { |
116 | struct perf_event_context *ctx = bp->ctx; | 116 | struct task_struct *tsk = bp->hw.bp_target; |
117 | struct perf_event *iter; | 117 | struct perf_event *iter; |
118 | int count = 0; | 118 | int count = 0; |
119 | 119 | ||
120 | list_for_each_entry(iter, &bp_task_head, hw.bp_list) { | 120 | list_for_each_entry(iter, &bp_task_head, hw.bp_list) { |
121 | if (iter->ctx == ctx && find_slot_idx(iter) == type) | 121 | if (iter->hw.bp_target == tsk && find_slot_idx(iter) == type) |
122 | count += hw_breakpoint_weight(iter); | 122 | count += hw_breakpoint_weight(iter); |
123 | } | 123 | } |
124 | 124 | ||
@@ -134,7 +134,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, | |||
134 | enum bp_type_idx type) | 134 | enum bp_type_idx type) |
135 | { | 135 | { |
136 | int cpu = bp->cpu; | 136 | int cpu = bp->cpu; |
137 | struct task_struct *tsk = bp->ctx->task; | 137 | struct task_struct *tsk = bp->hw.bp_target; |
138 | 138 | ||
139 | if (cpu >= 0) { | 139 | if (cpu >= 0) { |
140 | slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu); | 140 | slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu); |
@@ -213,7 +213,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, | |||
213 | int weight) | 213 | int weight) |
214 | { | 214 | { |
215 | int cpu = bp->cpu; | 215 | int cpu = bp->cpu; |
216 | struct task_struct *tsk = bp->ctx->task; | 216 | struct task_struct *tsk = bp->hw.bp_target; |
217 | 217 | ||
218 | /* Pinned counter cpu profiling */ | 218 | /* Pinned counter cpu profiling */ |
219 | if (!tsk) { | 219 | if (!tsk) { |
@@ -433,8 +433,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, | |||
433 | perf_overflow_handler_t triggered, | 433 | perf_overflow_handler_t triggered, |
434 | struct task_struct *tsk) | 434 | struct task_struct *tsk) |
435 | { | 435 | { |
436 | return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk), | 436 | return perf_event_create_kernel_counter(attr, -1, tsk, triggered); |
437 | triggered); | ||
438 | } | 437 | } |
439 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | 438 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); |
440 | 439 | ||
@@ -516,7 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, | |||
516 | get_online_cpus(); | 515 | get_online_cpus(); |
517 | for_each_online_cpu(cpu) { | 516 | for_each_online_cpu(cpu) { |
518 | pevent = per_cpu_ptr(cpu_events, cpu); | 517 | pevent = per_cpu_ptr(cpu_events, cpu); |
519 | bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); | 518 | bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered); |
520 | 519 | ||
521 | *pevent = bp; | 520 | *pevent = bp; |
522 | 521 | ||
@@ -566,6 +565,61 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { | |||
566 | .priority = 0x7fffffff | 565 | .priority = 0x7fffffff |
567 | }; | 566 | }; |
568 | 567 | ||
568 | static void bp_perf_event_destroy(struct perf_event *event) | ||
569 | { | ||
570 | release_bp_slot(event); | ||
571 | } | ||
572 | |||
573 | static int hw_breakpoint_event_init(struct perf_event *bp) | ||
574 | { | ||
575 | int err; | ||
576 | |||
577 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) | ||
578 | return -ENOENT; | ||
579 | |||
580 | err = register_perf_hw_breakpoint(bp); | ||
581 | if (err) | ||
582 | return err; | ||
583 | |||
584 | bp->destroy = bp_perf_event_destroy; | ||
585 | |||
586 | return 0; | ||
587 | } | ||
588 | |||
589 | static int hw_breakpoint_add(struct perf_event *bp, int flags) | ||
590 | { | ||
591 | if (!(flags & PERF_EF_START)) | ||
592 | bp->hw.state = PERF_HES_STOPPED; | ||
593 | |||
594 | return arch_install_hw_breakpoint(bp); | ||
595 | } | ||
596 | |||
597 | static void hw_breakpoint_del(struct perf_event *bp, int flags) | ||
598 | { | ||
599 | arch_uninstall_hw_breakpoint(bp); | ||
600 | } | ||
601 | |||
602 | static void hw_breakpoint_start(struct perf_event *bp, int flags) | ||
603 | { | ||
604 | bp->hw.state = 0; | ||
605 | } | ||
606 | |||
607 | static void hw_breakpoint_stop(struct perf_event *bp, int flags) | ||
608 | { | ||
609 | bp->hw.state = PERF_HES_STOPPED; | ||
610 | } | ||
611 | |||
612 | static struct pmu perf_breakpoint = { | ||
613 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ | ||
614 | |||
615 | .event_init = hw_breakpoint_event_init, | ||
616 | .add = hw_breakpoint_add, | ||
617 | .del = hw_breakpoint_del, | ||
618 | .start = hw_breakpoint_start, | ||
619 | .stop = hw_breakpoint_stop, | ||
620 | .read = hw_breakpoint_pmu_read, | ||
621 | }; | ||
622 | |||
569 | static int __init init_hw_breakpoint(void) | 623 | static int __init init_hw_breakpoint(void) |
570 | { | 624 | { |
571 | unsigned int **task_bp_pinned; | 625 | unsigned int **task_bp_pinned; |
@@ -587,6 +641,8 @@ static int __init init_hw_breakpoint(void) | |||
587 | 641 | ||
588 | constraints_initialized = 1; | 642 | constraints_initialized = 1; |
589 | 643 | ||
644 | perf_pmu_register(&perf_breakpoint); | ||
645 | |||
590 | return register_die_notifier(&hw_breakpoint_exceptions_nb); | 646 | return register_die_notifier(&hw_breakpoint_exceptions_nb); |
591 | 647 | ||
592 | err_alloc: | 648 | err_alloc: |
@@ -602,8 +658,3 @@ static int __init init_hw_breakpoint(void) | |||
602 | core_initcall(init_hw_breakpoint); | 658 | core_initcall(init_hw_breakpoint); |
603 | 659 | ||
604 | 660 | ||
605 | struct pmu perf_ops_bp = { | ||
606 | .enable = arch_install_hw_breakpoint, | ||
607 | .disable = arch_uninstall_hw_breakpoint, | ||
608 | .read = hw_breakpoint_pmu_read, | ||
609 | }; | ||
diff --git a/kernel/irq_work.c b/kernel/irq_work.c new file mode 100644 index 000000000000..f16763ff8481 --- /dev/null +++ b/kernel/irq_work.c | |||
@@ -0,0 +1,164 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
3 | * | ||
4 | * Provides a framework for enqueueing and running callbacks from hardirq | ||
5 | * context. The enqueueing is NMI-safe. | ||
6 | */ | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/irq_work.h> | ||
11 | #include <linux/hardirq.h> | ||
12 | |||
13 | /* | ||
14 | * An entry can be in one of four states: | ||
15 | * | ||
16 | * free NULL, 0 -> {claimed} : free to be used | ||
17 | * claimed NULL, 3 -> {pending} : claimed to be enqueued | ||
18 | * pending next, 3 -> {busy} : queued, pending callback | ||
19 | * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed | ||
20 | * | ||
21 | * We use the lower two bits of the next pointer to keep PENDING and BUSY | ||
22 | * flags. | ||
23 | */ | ||
24 | |||
25 | #define IRQ_WORK_PENDING 1UL | ||
26 | #define IRQ_WORK_BUSY 2UL | ||
27 | #define IRQ_WORK_FLAGS 3UL | ||
28 | |||
29 | static inline bool irq_work_is_set(struct irq_work *entry, int flags) | ||
30 | { | ||
31 | return (unsigned long)entry->next & flags; | ||
32 | } | ||
33 | |||
34 | static inline struct irq_work *irq_work_next(struct irq_work *entry) | ||
35 | { | ||
36 | unsigned long next = (unsigned long)entry->next; | ||
37 | next &= ~IRQ_WORK_FLAGS; | ||
38 | return (struct irq_work *)next; | ||
39 | } | ||
40 | |||
41 | static inline struct irq_work *next_flags(struct irq_work *entry, int flags) | ||
42 | { | ||
43 | unsigned long next = (unsigned long)entry; | ||
44 | next |= flags; | ||
45 | return (struct irq_work *)next; | ||
46 | } | ||
47 | |||
48 | static DEFINE_PER_CPU(struct irq_work *, irq_work_list); | ||
49 | |||
50 | /* | ||
51 | * Claim the entry so that no one else will poke at it. | ||
52 | */ | ||
53 | static bool irq_work_claim(struct irq_work *entry) | ||
54 | { | ||
55 | struct irq_work *next, *nflags; | ||
56 | |||
57 | do { | ||
58 | next = entry->next; | ||
59 | if ((unsigned long)next & IRQ_WORK_PENDING) | ||
60 | return false; | ||
61 | nflags = next_flags(next, IRQ_WORK_FLAGS); | ||
62 | } while (cmpxchg(&entry->next, next, nflags) != next); | ||
63 | |||
64 | return true; | ||
65 | } | ||
66 | |||
67 | |||
68 | void __weak arch_irq_work_raise(void) | ||
69 | { | ||
70 | /* | ||
71 | * Lame architectures will get the timer tick callback | ||
72 | */ | ||
73 | } | ||
74 | |||
75 | /* | ||
76 | * Queue the entry and raise the IPI if needed. | ||
77 | */ | ||
78 | static void __irq_work_queue(struct irq_work *entry) | ||
79 | { | ||
80 | struct irq_work **head, *next; | ||
81 | |||
82 | head = &get_cpu_var(irq_work_list); | ||
83 | |||
84 | do { | ||
85 | next = *head; | ||
86 | /* Can assign non-atomic because we keep the flags set. */ | ||
87 | entry->next = next_flags(next, IRQ_WORK_FLAGS); | ||
88 | } while (cmpxchg(head, next, entry) != next); | ||
89 | |||
90 | /* The list was empty, raise self-interrupt to start processing. */ | ||
91 | if (!irq_work_next(entry)) | ||
92 | arch_irq_work_raise(); | ||
93 | |||
94 | put_cpu_var(irq_work_list); | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Enqueue the irq_work @entry, returns true on success, failure when the | ||
99 | * @entry was already enqueued by someone else. | ||
100 | * | ||
101 | * Can be re-enqueued while the callback is still in progress. | ||
102 | */ | ||
103 | bool irq_work_queue(struct irq_work *entry) | ||
104 | { | ||
105 | if (!irq_work_claim(entry)) { | ||
106 | /* | ||
107 | * Already enqueued, can't do! | ||
108 | */ | ||
109 | return false; | ||
110 | } | ||
111 | |||
112 | __irq_work_queue(entry); | ||
113 | return true; | ||
114 | } | ||
115 | EXPORT_SYMBOL_GPL(irq_work_queue); | ||
116 | |||
117 | /* | ||
118 | * Run the irq_work entries on this cpu. Requires to be ran from hardirq | ||
119 | * context with local IRQs disabled. | ||
120 | */ | ||
121 | void irq_work_run(void) | ||
122 | { | ||
123 | struct irq_work *list, **head; | ||
124 | |||
125 | head = &__get_cpu_var(irq_work_list); | ||
126 | if (*head == NULL) | ||
127 | return; | ||
128 | |||
129 | BUG_ON(!in_irq()); | ||
130 | BUG_ON(!irqs_disabled()); | ||
131 | |||
132 | list = xchg(head, NULL); | ||
133 | while (list != NULL) { | ||
134 | struct irq_work *entry = list; | ||
135 | |||
136 | list = irq_work_next(list); | ||
137 | |||
138 | /* | ||
139 | * Clear the PENDING bit, after this point the @entry | ||
140 | * can be re-used. | ||
141 | */ | ||
142 | entry->next = next_flags(NULL, IRQ_WORK_BUSY); | ||
143 | entry->func(entry); | ||
144 | /* | ||
145 | * Clear the BUSY bit and return to the free state if | ||
146 | * no-one else claimed it meanwhile. | ||
147 | */ | ||
148 | cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); | ||
149 | } | ||
150 | } | ||
151 | EXPORT_SYMBOL_GPL(irq_work_run); | ||
152 | |||
153 | /* | ||
154 | * Synchronize against the irq_work @entry, ensures the entry is not | ||
155 | * currently in use. | ||
156 | */ | ||
157 | void irq_work_sync(struct irq_work *entry) | ||
158 | { | ||
159 | WARN_ON_ONCE(irqs_disabled()); | ||
160 | |||
161 | while (irq_work_is_set(entry, IRQ_WORK_BUSY)) | ||
162 | cpu_relax(); | ||
163 | } | ||
164 | EXPORT_SYMBOL_GPL(irq_work_sync); | ||
diff --git a/kernel/jump_label.c b/kernel/jump_label.c new file mode 100644 index 000000000000..7be868bf25c6 --- /dev/null +++ b/kernel/jump_label.c | |||
@@ -0,0 +1,429 @@ | |||
1 | /* | ||
2 | * jump label support | ||
3 | * | ||
4 | * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> | ||
5 | * | ||
6 | */ | ||
7 | #include <linux/jump_label.h> | ||
8 | #include <linux/memory.h> | ||
9 | #include <linux/uaccess.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/list.h> | ||
12 | #include <linux/jhash.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/sort.h> | ||
15 | #include <linux/err.h> | ||
16 | |||
17 | #ifdef HAVE_JUMP_LABEL | ||
18 | |||
19 | #define JUMP_LABEL_HASH_BITS 6 | ||
20 | #define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS) | ||
21 | static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE]; | ||
22 | |||
23 | /* mutex to protect coming/going of the the jump_label table */ | ||
24 | static DEFINE_MUTEX(jump_label_mutex); | ||
25 | |||
26 | struct jump_label_entry { | ||
27 | struct hlist_node hlist; | ||
28 | struct jump_entry *table; | ||
29 | int nr_entries; | ||
30 | /* hang modules off here */ | ||
31 | struct hlist_head modules; | ||
32 | unsigned long key; | ||
33 | }; | ||
34 | |||
35 | struct jump_label_module_entry { | ||
36 | struct hlist_node hlist; | ||
37 | struct jump_entry *table; | ||
38 | int nr_entries; | ||
39 | struct module *mod; | ||
40 | }; | ||
41 | |||
42 | static int jump_label_cmp(const void *a, const void *b) | ||
43 | { | ||
44 | const struct jump_entry *jea = a; | ||
45 | const struct jump_entry *jeb = b; | ||
46 | |||
47 | if (jea->key < jeb->key) | ||
48 | return -1; | ||
49 | |||
50 | if (jea->key > jeb->key) | ||
51 | return 1; | ||
52 | |||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | static void | ||
57 | sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop) | ||
58 | { | ||
59 | unsigned long size; | ||
60 | |||
61 | size = (((unsigned long)stop - (unsigned long)start) | ||
62 | / sizeof(struct jump_entry)); | ||
63 | sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); | ||
64 | } | ||
65 | |||
66 | static struct jump_label_entry *get_jump_label_entry(jump_label_t key) | ||
67 | { | ||
68 | struct hlist_head *head; | ||
69 | struct hlist_node *node; | ||
70 | struct jump_label_entry *e; | ||
71 | u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0); | ||
72 | |||
73 | head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; | ||
74 | hlist_for_each_entry(e, node, head, hlist) { | ||
75 | if (key == e->key) | ||
76 | return e; | ||
77 | } | ||
78 | return NULL; | ||
79 | } | ||
80 | |||
81 | static struct jump_label_entry * | ||
82 | add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table) | ||
83 | { | ||
84 | struct hlist_head *head; | ||
85 | struct jump_label_entry *e; | ||
86 | u32 hash; | ||
87 | |||
88 | e = get_jump_label_entry(key); | ||
89 | if (e) | ||
90 | return ERR_PTR(-EEXIST); | ||
91 | |||
92 | e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL); | ||
93 | if (!e) | ||
94 | return ERR_PTR(-ENOMEM); | ||
95 | |||
96 | hash = jhash((void *)&key, sizeof(jump_label_t), 0); | ||
97 | head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; | ||
98 | e->key = key; | ||
99 | e->table = table; | ||
100 | e->nr_entries = nr_entries; | ||
101 | INIT_HLIST_HEAD(&(e->modules)); | ||
102 | hlist_add_head(&e->hlist, head); | ||
103 | return e; | ||
104 | } | ||
105 | |||
106 | static int | ||
107 | build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop) | ||
108 | { | ||
109 | struct jump_entry *iter, *iter_begin; | ||
110 | struct jump_label_entry *entry; | ||
111 | int count; | ||
112 | |||
113 | sort_jump_label_entries(start, stop); | ||
114 | iter = start; | ||
115 | while (iter < stop) { | ||
116 | entry = get_jump_label_entry(iter->key); | ||
117 | if (!entry) { | ||
118 | iter_begin = iter; | ||
119 | count = 0; | ||
120 | while ((iter < stop) && | ||
121 | (iter->key == iter_begin->key)) { | ||
122 | iter++; | ||
123 | count++; | ||
124 | } | ||
125 | entry = add_jump_label_entry(iter_begin->key, | ||
126 | count, iter_begin); | ||
127 | if (IS_ERR(entry)) | ||
128 | return PTR_ERR(entry); | ||
129 | } else { | ||
130 | WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n"); | ||
131 | return -1; | ||
132 | } | ||
133 | } | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | /*** | ||
138 | * jump_label_update - update jump label text | ||
139 | * @key - key value associated with a a jump label | ||
140 | * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE | ||
141 | * | ||
142 | * Will enable/disable the jump for jump label @key, depending on the | ||
143 | * value of @type. | ||
144 | * | ||
145 | */ | ||
146 | |||
147 | void jump_label_update(unsigned long key, enum jump_label_type type) | ||
148 | { | ||
149 | struct jump_entry *iter; | ||
150 | struct jump_label_entry *entry; | ||
151 | struct hlist_node *module_node; | ||
152 | struct jump_label_module_entry *e_module; | ||
153 | int count; | ||
154 | |||
155 | mutex_lock(&jump_label_mutex); | ||
156 | entry = get_jump_label_entry((jump_label_t)key); | ||
157 | if (entry) { | ||
158 | count = entry->nr_entries; | ||
159 | iter = entry->table; | ||
160 | while (count--) { | ||
161 | if (kernel_text_address(iter->code)) | ||
162 | arch_jump_label_transform(iter, type); | ||
163 | iter++; | ||
164 | } | ||
165 | /* eanble/disable jump labels in modules */ | ||
166 | hlist_for_each_entry(e_module, module_node, &(entry->modules), | ||
167 | hlist) { | ||
168 | count = e_module->nr_entries; | ||
169 | iter = e_module->table; | ||
170 | while (count--) { | ||
171 | if (kernel_text_address(iter->code)) | ||
172 | arch_jump_label_transform(iter, type); | ||
173 | iter++; | ||
174 | } | ||
175 | } | ||
176 | } | ||
177 | mutex_unlock(&jump_label_mutex); | ||
178 | } | ||
179 | |||
180 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) | ||
181 | { | ||
182 | if (entry->code <= (unsigned long)end && | ||
183 | entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start) | ||
184 | return 1; | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | #ifdef CONFIG_MODULES | ||
190 | |||
191 | static int module_conflict(void *start, void *end) | ||
192 | { | ||
193 | struct hlist_head *head; | ||
194 | struct hlist_node *node, *node_next, *module_node, *module_node_next; | ||
195 | struct jump_label_entry *e; | ||
196 | struct jump_label_module_entry *e_module; | ||
197 | struct jump_entry *iter; | ||
198 | int i, count; | ||
199 | int conflict = 0; | ||
200 | |||
201 | for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { | ||
202 | head = &jump_label_table[i]; | ||
203 | hlist_for_each_entry_safe(e, node, node_next, head, hlist) { | ||
204 | hlist_for_each_entry_safe(e_module, module_node, | ||
205 | module_node_next, | ||
206 | &(e->modules), hlist) { | ||
207 | count = e_module->nr_entries; | ||
208 | iter = e_module->table; | ||
209 | while (count--) { | ||
210 | if (addr_conflict(iter, start, end)) { | ||
211 | conflict = 1; | ||
212 | goto out; | ||
213 | } | ||
214 | iter++; | ||
215 | } | ||
216 | } | ||
217 | } | ||
218 | } | ||
219 | out: | ||
220 | return conflict; | ||
221 | } | ||
222 | |||
223 | #endif | ||
224 | |||
225 | /*** | ||
226 | * jump_label_text_reserved - check if addr range is reserved | ||
227 | * @start: start text addr | ||
228 | * @end: end text addr | ||
229 | * | ||
230 | * checks if the text addr located between @start and @end | ||
231 | * overlaps with any of the jump label patch addresses. Code | ||
232 | * that wants to modify kernel text should first verify that | ||
233 | * it does not overlap with any of the jump label addresses. | ||
234 | * | ||
235 | * returns 1 if there is an overlap, 0 otherwise | ||
236 | */ | ||
237 | int jump_label_text_reserved(void *start, void *end) | ||
238 | { | ||
239 | struct jump_entry *iter; | ||
240 | struct jump_entry *iter_start = __start___jump_table; | ||
241 | struct jump_entry *iter_stop = __start___jump_table; | ||
242 | int conflict = 0; | ||
243 | |||
244 | mutex_lock(&jump_label_mutex); | ||
245 | iter = iter_start; | ||
246 | while (iter < iter_stop) { | ||
247 | if (addr_conflict(iter, start, end)) { | ||
248 | conflict = 1; | ||
249 | goto out; | ||
250 | } | ||
251 | iter++; | ||
252 | } | ||
253 | |||
254 | /* now check modules */ | ||
255 | #ifdef CONFIG_MODULES | ||
256 | conflict = module_conflict(start, end); | ||
257 | #endif | ||
258 | out: | ||
259 | mutex_unlock(&jump_label_mutex); | ||
260 | return conflict; | ||
261 | } | ||
262 | |||
263 | static __init int init_jump_label(void) | ||
264 | { | ||
265 | int ret; | ||
266 | struct jump_entry *iter_start = __start___jump_table; | ||
267 | struct jump_entry *iter_stop = __stop___jump_table; | ||
268 | struct jump_entry *iter; | ||
269 | |||
270 | mutex_lock(&jump_label_mutex); | ||
271 | ret = build_jump_label_hashtable(__start___jump_table, | ||
272 | __stop___jump_table); | ||
273 | iter = iter_start; | ||
274 | while (iter < iter_stop) { | ||
275 | arch_jump_label_text_poke_early(iter->code); | ||
276 | iter++; | ||
277 | } | ||
278 | mutex_unlock(&jump_label_mutex); | ||
279 | return ret; | ||
280 | } | ||
281 | early_initcall(init_jump_label); | ||
282 | |||
283 | #ifdef CONFIG_MODULES | ||
284 | |||
285 | static struct jump_label_module_entry * | ||
286 | add_jump_label_module_entry(struct jump_label_entry *entry, | ||
287 | struct jump_entry *iter_begin, | ||
288 | int count, struct module *mod) | ||
289 | { | ||
290 | struct jump_label_module_entry *e; | ||
291 | |||
292 | e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL); | ||
293 | if (!e) | ||
294 | return ERR_PTR(-ENOMEM); | ||
295 | e->mod = mod; | ||
296 | e->nr_entries = count; | ||
297 | e->table = iter_begin; | ||
298 | hlist_add_head(&e->hlist, &entry->modules); | ||
299 | return e; | ||
300 | } | ||
301 | |||
302 | static int add_jump_label_module(struct module *mod) | ||
303 | { | ||
304 | struct jump_entry *iter, *iter_begin; | ||
305 | struct jump_label_entry *entry; | ||
306 | struct jump_label_module_entry *module_entry; | ||
307 | int count; | ||
308 | |||
309 | /* if the module doesn't have jump label entries, just return */ | ||
310 | if (!mod->num_jump_entries) | ||
311 | return 0; | ||
312 | |||
313 | sort_jump_label_entries(mod->jump_entries, | ||
314 | mod->jump_entries + mod->num_jump_entries); | ||
315 | iter = mod->jump_entries; | ||
316 | while (iter < mod->jump_entries + mod->num_jump_entries) { | ||
317 | entry = get_jump_label_entry(iter->key); | ||
318 | iter_begin = iter; | ||
319 | count = 0; | ||
320 | while ((iter < mod->jump_entries + mod->num_jump_entries) && | ||
321 | (iter->key == iter_begin->key)) { | ||
322 | iter++; | ||
323 | count++; | ||
324 | } | ||
325 | if (!entry) { | ||
326 | entry = add_jump_label_entry(iter_begin->key, 0, NULL); | ||
327 | if (IS_ERR(entry)) | ||
328 | return PTR_ERR(entry); | ||
329 | } | ||
330 | module_entry = add_jump_label_module_entry(entry, iter_begin, | ||
331 | count, mod); | ||
332 | if (IS_ERR(module_entry)) | ||
333 | return PTR_ERR(module_entry); | ||
334 | } | ||
335 | return 0; | ||
336 | } | ||
337 | |||
338 | static void remove_jump_label_module(struct module *mod) | ||
339 | { | ||
340 | struct hlist_head *head; | ||
341 | struct hlist_node *node, *node_next, *module_node, *module_node_next; | ||
342 | struct jump_label_entry *e; | ||
343 | struct jump_label_module_entry *e_module; | ||
344 | int i; | ||
345 | |||
346 | /* if the module doesn't have jump label entries, just return */ | ||
347 | if (!mod->num_jump_entries) | ||
348 | return; | ||
349 | |||
350 | for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { | ||
351 | head = &jump_label_table[i]; | ||
352 | hlist_for_each_entry_safe(e, node, node_next, head, hlist) { | ||
353 | hlist_for_each_entry_safe(e_module, module_node, | ||
354 | module_node_next, | ||
355 | &(e->modules), hlist) { | ||
356 | if (e_module->mod == mod) { | ||
357 | hlist_del(&e_module->hlist); | ||
358 | kfree(e_module); | ||
359 | } | ||
360 | } | ||
361 | if (hlist_empty(&e->modules) && (e->nr_entries == 0)) { | ||
362 | hlist_del(&e->hlist); | ||
363 | kfree(e); | ||
364 | } | ||
365 | } | ||
366 | } | ||
367 | } | ||
368 | |||
369 | static int | ||
370 | jump_label_module_notify(struct notifier_block *self, unsigned long val, | ||
371 | void *data) | ||
372 | { | ||
373 | struct module *mod = data; | ||
374 | int ret = 0; | ||
375 | |||
376 | switch (val) { | ||
377 | case MODULE_STATE_COMING: | ||
378 | mutex_lock(&jump_label_mutex); | ||
379 | ret = add_jump_label_module(mod); | ||
380 | if (ret) | ||
381 | remove_jump_label_module(mod); | ||
382 | mutex_unlock(&jump_label_mutex); | ||
383 | break; | ||
384 | case MODULE_STATE_GOING: | ||
385 | mutex_lock(&jump_label_mutex); | ||
386 | remove_jump_label_module(mod); | ||
387 | mutex_unlock(&jump_label_mutex); | ||
388 | break; | ||
389 | } | ||
390 | return ret; | ||
391 | } | ||
392 | |||
393 | /*** | ||
394 | * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop() | ||
395 | * @mod: module to patch | ||
396 | * | ||
397 | * Allow for run-time selection of the optimal nops. Before the module | ||
398 | * loads patch these with arch_get_jump_label_nop(), which is specified by | ||
399 | * the arch specific jump label code. | ||
400 | */ | ||
401 | void jump_label_apply_nops(struct module *mod) | ||
402 | { | ||
403 | struct jump_entry *iter; | ||
404 | |||
405 | /* if the module doesn't have jump label entries, just return */ | ||
406 | if (!mod->num_jump_entries) | ||
407 | return; | ||
408 | |||
409 | iter = mod->jump_entries; | ||
410 | while (iter < mod->jump_entries + mod->num_jump_entries) { | ||
411 | arch_jump_label_text_poke_early(iter->code); | ||
412 | iter++; | ||
413 | } | ||
414 | } | ||
415 | |||
416 | struct notifier_block jump_label_module_nb = { | ||
417 | .notifier_call = jump_label_module_notify, | ||
418 | .priority = 0, | ||
419 | }; | ||
420 | |||
421 | static __init int init_jump_label_module(void) | ||
422 | { | ||
423 | return register_module_notifier(&jump_label_module_nb); | ||
424 | } | ||
425 | early_initcall(init_jump_label_module); | ||
426 | |||
427 | #endif /* CONFIG_MODULES */ | ||
428 | |||
429 | #endif | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 282035f3ae96..ec4210c6501e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/memory.h> | 47 | #include <linux/memory.h> |
48 | #include <linux/ftrace.h> | 48 | #include <linux/ftrace.h> |
49 | #include <linux/cpu.h> | 49 | #include <linux/cpu.h> |
50 | #include <linux/jump_label.h> | ||
50 | 51 | ||
51 | #include <asm-generic/sections.h> | 52 | #include <asm-generic/sections.h> |
52 | #include <asm/cacheflush.h> | 53 | #include <asm/cacheflush.h> |
@@ -399,7 +400,7 @@ static inline int kprobe_optready(struct kprobe *p) | |||
399 | * Return an optimized kprobe whose optimizing code replaces | 400 | * Return an optimized kprobe whose optimizing code replaces |
400 | * instructions including addr (exclude breakpoint). | 401 | * instructions including addr (exclude breakpoint). |
401 | */ | 402 | */ |
402 | struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) | 403 | static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) |
403 | { | 404 | { |
404 | int i; | 405 | int i; |
405 | struct kprobe *p = NULL; | 406 | struct kprobe *p = NULL; |
@@ -831,6 +832,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, | |||
831 | 832 | ||
832 | void __kprobes kretprobe_hash_lock(struct task_struct *tsk, | 833 | void __kprobes kretprobe_hash_lock(struct task_struct *tsk, |
833 | struct hlist_head **head, unsigned long *flags) | 834 | struct hlist_head **head, unsigned long *flags) |
835 | __acquires(hlist_lock) | ||
834 | { | 836 | { |
835 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); | 837 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); |
836 | spinlock_t *hlist_lock; | 838 | spinlock_t *hlist_lock; |
@@ -842,6 +844,7 @@ void __kprobes kretprobe_hash_lock(struct task_struct *tsk, | |||
842 | 844 | ||
843 | static void __kprobes kretprobe_table_lock(unsigned long hash, | 845 | static void __kprobes kretprobe_table_lock(unsigned long hash, |
844 | unsigned long *flags) | 846 | unsigned long *flags) |
847 | __acquires(hlist_lock) | ||
845 | { | 848 | { |
846 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); | 849 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); |
847 | spin_lock_irqsave(hlist_lock, *flags); | 850 | spin_lock_irqsave(hlist_lock, *flags); |
@@ -849,6 +852,7 @@ static void __kprobes kretprobe_table_lock(unsigned long hash, | |||
849 | 852 | ||
850 | void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, | 853 | void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, |
851 | unsigned long *flags) | 854 | unsigned long *flags) |
855 | __releases(hlist_lock) | ||
852 | { | 856 | { |
853 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); | 857 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); |
854 | spinlock_t *hlist_lock; | 858 | spinlock_t *hlist_lock; |
@@ -857,7 +861,9 @@ void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, | |||
857 | spin_unlock_irqrestore(hlist_lock, *flags); | 861 | spin_unlock_irqrestore(hlist_lock, *flags); |
858 | } | 862 | } |
859 | 863 | ||
860 | void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) | 864 | static void __kprobes kretprobe_table_unlock(unsigned long hash, |
865 | unsigned long *flags) | ||
866 | __releases(hlist_lock) | ||
861 | { | 867 | { |
862 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); | 868 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); |
863 | spin_unlock_irqrestore(hlist_lock, *flags); | 869 | spin_unlock_irqrestore(hlist_lock, *flags); |
@@ -1141,7 +1147,8 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1141 | preempt_disable(); | 1147 | preempt_disable(); |
1142 | if (!kernel_text_address((unsigned long) p->addr) || | 1148 | if (!kernel_text_address((unsigned long) p->addr) || |
1143 | in_kprobes_functions((unsigned long) p->addr) || | 1149 | in_kprobes_functions((unsigned long) p->addr) || |
1144 | ftrace_text_reserved(p->addr, p->addr)) { | 1150 | ftrace_text_reserved(p->addr, p->addr) || |
1151 | jump_label_text_reserved(p->addr, p->addr)) { | ||
1145 | preempt_enable(); | 1152 | preempt_enable(); |
1146 | return -EINVAL; | 1153 | return -EINVAL; |
1147 | } | 1154 | } |
@@ -1339,18 +1346,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num) | |||
1339 | if (num <= 0) | 1346 | if (num <= 0) |
1340 | return -EINVAL; | 1347 | return -EINVAL; |
1341 | for (i = 0; i < num; i++) { | 1348 | for (i = 0; i < num; i++) { |
1342 | unsigned long addr; | 1349 | unsigned long addr, offset; |
1343 | jp = jps[i]; | 1350 | jp = jps[i]; |
1344 | addr = arch_deref_entry_point(jp->entry); | 1351 | addr = arch_deref_entry_point(jp->entry); |
1345 | 1352 | ||
1346 | if (!kernel_text_address(addr)) | 1353 | /* Verify probepoint is a function entry point */ |
1347 | ret = -EINVAL; | 1354 | if (kallsyms_lookup_size_offset(addr, NULL, &offset) && |
1348 | else { | 1355 | offset == 0) { |
1349 | /* Todo: Verify probepoint is a function entry point */ | ||
1350 | jp->kp.pre_handler = setjmp_pre_handler; | 1356 | jp->kp.pre_handler = setjmp_pre_handler; |
1351 | jp->kp.break_handler = longjmp_break_handler; | 1357 | jp->kp.break_handler = longjmp_break_handler; |
1352 | ret = register_kprobe(&jp->kp); | 1358 | ret = register_kprobe(&jp->kp); |
1353 | } | 1359 | } else |
1360 | ret = -EINVAL; | ||
1361 | |||
1354 | if (ret < 0) { | 1362 | if (ret < 0) { |
1355 | if (i > 0) | 1363 | if (i > 0) |
1356 | unregister_jprobes(jps, i); | 1364 | unregister_jprobes(jps, i); |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index f2852a510232..42ba65dff7d9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -639,6 +639,16 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
639 | } | 639 | } |
640 | #endif | 640 | #endif |
641 | 641 | ||
642 | if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { | ||
643 | debug_locks_off(); | ||
644 | printk(KERN_ERR | ||
645 | "BUG: looking up invalid subclass: %u\n", subclass); | ||
646 | printk(KERN_ERR | ||
647 | "turning off the locking correctness validator.\n"); | ||
648 | dump_stack(); | ||
649 | return NULL; | ||
650 | } | ||
651 | |||
642 | /* | 652 | /* |
643 | * Static locks do not have their class-keys yet - for them the key | 653 | * Static locks do not have their class-keys yet - for them the key |
644 | * is the lock object itself: | 654 | * is the lock object itself: |
@@ -774,7 +784,9 @@ out_unlock_set: | |||
774 | raw_local_irq_restore(flags); | 784 | raw_local_irq_restore(flags); |
775 | 785 | ||
776 | if (!subclass || force) | 786 | if (!subclass || force) |
777 | lock->class_cache = class; | 787 | lock->class_cache[0] = class; |
788 | else if (subclass < NR_LOCKDEP_CACHING_CLASSES) | ||
789 | lock->class_cache[subclass] = class; | ||
778 | 790 | ||
779 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) | 791 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) |
780 | return NULL; | 792 | return NULL; |
@@ -2679,7 +2691,11 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
2679 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | 2691 | void lockdep_init_map(struct lockdep_map *lock, const char *name, |
2680 | struct lock_class_key *key, int subclass) | 2692 | struct lock_class_key *key, int subclass) |
2681 | { | 2693 | { |
2682 | lock->class_cache = NULL; | 2694 | int i; |
2695 | |||
2696 | for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) | ||
2697 | lock->class_cache[i] = NULL; | ||
2698 | |||
2683 | #ifdef CONFIG_LOCK_STAT | 2699 | #ifdef CONFIG_LOCK_STAT |
2684 | lock->cpu = raw_smp_processor_id(); | 2700 | lock->cpu = raw_smp_processor_id(); |
2685 | #endif | 2701 | #endif |
@@ -2739,21 +2755,13 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
2739 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2755 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
2740 | return 0; | 2756 | return 0; |
2741 | 2757 | ||
2742 | if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { | ||
2743 | debug_locks_off(); | ||
2744 | printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n"); | ||
2745 | printk("turning off the locking correctness validator.\n"); | ||
2746 | dump_stack(); | ||
2747 | return 0; | ||
2748 | } | ||
2749 | |||
2750 | if (lock->key == &__lockdep_no_validate__) | 2758 | if (lock->key == &__lockdep_no_validate__) |
2751 | check = 1; | 2759 | check = 1; |
2752 | 2760 | ||
2753 | if (!subclass) | 2761 | if (subclass < NR_LOCKDEP_CACHING_CLASSES) |
2754 | class = lock->class_cache; | 2762 | class = lock->class_cache[subclass]; |
2755 | /* | 2763 | /* |
2756 | * Not cached yet or subclass? | 2764 | * Not cached? |
2757 | */ | 2765 | */ |
2758 | if (unlikely(!class)) { | 2766 | if (unlikely(!class)) { |
2759 | class = register_lock_class(lock, subclass, 0); | 2767 | class = register_lock_class(lock, subclass, 0); |
@@ -2918,7 +2926,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) | |||
2918 | return 1; | 2926 | return 1; |
2919 | 2927 | ||
2920 | if (hlock->references) { | 2928 | if (hlock->references) { |
2921 | struct lock_class *class = lock->class_cache; | 2929 | struct lock_class *class = lock->class_cache[0]; |
2922 | 2930 | ||
2923 | if (!class) | 2931 | if (!class) |
2924 | class = look_up_lock_class(lock, 0); | 2932 | class = look_up_lock_class(lock, 0); |
@@ -3559,7 +3567,12 @@ void lockdep_reset_lock(struct lockdep_map *lock) | |||
3559 | if (list_empty(head)) | 3567 | if (list_empty(head)) |
3560 | continue; | 3568 | continue; |
3561 | list_for_each_entry_safe(class, next, head, hash_entry) { | 3569 | list_for_each_entry_safe(class, next, head, hash_entry) { |
3562 | if (unlikely(class == lock->class_cache)) { | 3570 | int match = 0; |
3571 | |||
3572 | for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) | ||
3573 | match |= class == lock->class_cache[j]; | ||
3574 | |||
3575 | if (unlikely(match)) { | ||
3563 | if (debug_locks_off_graph_unlock()) | 3576 | if (debug_locks_off_graph_unlock()) |
3564 | WARN_ON(1); | 3577 | WARN_ON(1); |
3565 | goto out_restore; | 3578 | goto out_restore; |
@@ -3775,7 +3788,7 @@ EXPORT_SYMBOL_GPL(debug_show_all_locks); | |||
3775 | * Careful: only use this function if you are sure that | 3788 | * Careful: only use this function if you are sure that |
3776 | * the task cannot run in parallel! | 3789 | * the task cannot run in parallel! |
3777 | */ | 3790 | */ |
3778 | void __debug_show_held_locks(struct task_struct *task) | 3791 | void debug_show_held_locks(struct task_struct *task) |
3779 | { | 3792 | { |
3780 | if (unlikely(!debug_locks)) { | 3793 | if (unlikely(!debug_locks)) { |
3781 | printk("INFO: lockdep is turned off.\n"); | 3794 | printk("INFO: lockdep is turned off.\n"); |
@@ -3783,12 +3796,6 @@ void __debug_show_held_locks(struct task_struct *task) | |||
3783 | } | 3796 | } |
3784 | lockdep_print_held_locks(task); | 3797 | lockdep_print_held_locks(task); |
3785 | } | 3798 | } |
3786 | EXPORT_SYMBOL_GPL(__debug_show_held_locks); | ||
3787 | |||
3788 | void debug_show_held_locks(struct task_struct *task) | ||
3789 | { | ||
3790 | __debug_show_held_locks(task); | ||
3791 | } | ||
3792 | EXPORT_SYMBOL_GPL(debug_show_held_locks); | 3799 | EXPORT_SYMBOL_GPL(debug_show_held_locks); |
3793 | 3800 | ||
3794 | void lockdep_sys_exit(void) | 3801 | void lockdep_sys_exit(void) |
diff --git a/kernel/module.c b/kernel/module.c index ccd641991842..2df46301a7a4 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <linux/async.h> | 55 | #include <linux/async.h> |
56 | #include <linux/percpu.h> | 56 | #include <linux/percpu.h> |
57 | #include <linux/kmemleak.h> | 57 | #include <linux/kmemleak.h> |
58 | #include <linux/jump_label.h> | ||
58 | 59 | ||
59 | #define CREATE_TRACE_POINTS | 60 | #define CREATE_TRACE_POINTS |
60 | #include <trace/events/module.h> | 61 | #include <trace/events/module.h> |
@@ -2309,6 +2310,11 @@ static void find_module_sections(struct module *mod, struct load_info *info) | |||
2309 | sizeof(*mod->tracepoints), | 2310 | sizeof(*mod->tracepoints), |
2310 | &mod->num_tracepoints); | 2311 | &mod->num_tracepoints); |
2311 | #endif | 2312 | #endif |
2313 | #ifdef HAVE_JUMP_LABEL | ||
2314 | mod->jump_entries = section_objs(info, "__jump_table", | ||
2315 | sizeof(*mod->jump_entries), | ||
2316 | &mod->num_jump_entries); | ||
2317 | #endif | ||
2312 | #ifdef CONFIG_EVENT_TRACING | 2318 | #ifdef CONFIG_EVENT_TRACING |
2313 | mod->trace_events = section_objs(info, "_ftrace_events", | 2319 | mod->trace_events = section_objs(info, "_ftrace_events", |
2314 | sizeof(*mod->trace_events), | 2320 | sizeof(*mod->trace_events), |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index b98bed3d8182..f309e8014c78 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -31,24 +31,18 @@ | |||
31 | #include <linux/kernel_stat.h> | 31 | #include <linux/kernel_stat.h> |
32 | #include <linux/perf_event.h> | 32 | #include <linux/perf_event.h> |
33 | #include <linux/ftrace_event.h> | 33 | #include <linux/ftrace_event.h> |
34 | #include <linux/hw_breakpoint.h> | ||
35 | 34 | ||
36 | #include <asm/irq_regs.h> | 35 | #include <asm/irq_regs.h> |
37 | 36 | ||
38 | /* | 37 | atomic_t perf_task_events __read_mostly; |
39 | * Each CPU has a list of per CPU events: | ||
40 | */ | ||
41 | static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); | ||
42 | |||
43 | int perf_max_events __read_mostly = 1; | ||
44 | static int perf_reserved_percpu __read_mostly; | ||
45 | static int perf_overcommit __read_mostly = 1; | ||
46 | |||
47 | static atomic_t nr_events __read_mostly; | ||
48 | static atomic_t nr_mmap_events __read_mostly; | 38 | static atomic_t nr_mmap_events __read_mostly; |
49 | static atomic_t nr_comm_events __read_mostly; | 39 | static atomic_t nr_comm_events __read_mostly; |
50 | static atomic_t nr_task_events __read_mostly; | 40 | static atomic_t nr_task_events __read_mostly; |
51 | 41 | ||
42 | static LIST_HEAD(pmus); | ||
43 | static DEFINE_MUTEX(pmus_lock); | ||
44 | static struct srcu_struct pmus_srcu; | ||
45 | |||
52 | /* | 46 | /* |
53 | * perf event paranoia level: | 47 | * perf event paranoia level: |
54 | * -1 - not paranoid at all | 48 | * -1 - not paranoid at all |
@@ -67,36 +61,43 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; | |||
67 | 61 | ||
68 | static atomic64_t perf_event_id; | 62 | static atomic64_t perf_event_id; |
69 | 63 | ||
70 | /* | 64 | void __weak perf_event_print_debug(void) { } |
71 | * Lock for (sysadmin-configurable) event reservations: | ||
72 | */ | ||
73 | static DEFINE_SPINLOCK(perf_resource_lock); | ||
74 | 65 | ||
75 | /* | 66 | extern __weak const char *perf_pmu_name(void) |
76 | * Architecture provided APIs - weak aliases: | ||
77 | */ | ||
78 | extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) | ||
79 | { | 67 | { |
80 | return NULL; | 68 | return "pmu"; |
81 | } | 69 | } |
82 | 70 | ||
83 | void __weak hw_perf_disable(void) { barrier(); } | 71 | void perf_pmu_disable(struct pmu *pmu) |
84 | void __weak hw_perf_enable(void) { barrier(); } | 72 | { |
85 | 73 | int *count = this_cpu_ptr(pmu->pmu_disable_count); | |
86 | void __weak perf_event_print_debug(void) { } | 74 | if (!(*count)++) |
87 | 75 | pmu->pmu_disable(pmu); | |
88 | static DEFINE_PER_CPU(int, perf_disable_count); | 76 | } |
89 | 77 | ||
90 | void perf_disable(void) | 78 | void perf_pmu_enable(struct pmu *pmu) |
91 | { | 79 | { |
92 | if (!__get_cpu_var(perf_disable_count)++) | 80 | int *count = this_cpu_ptr(pmu->pmu_disable_count); |
93 | hw_perf_disable(); | 81 | if (!--(*count)) |
82 | pmu->pmu_enable(pmu); | ||
94 | } | 83 | } |
95 | 84 | ||
96 | void perf_enable(void) | 85 | static DEFINE_PER_CPU(struct list_head, rotation_list); |
86 | |||
87 | /* | ||
88 | * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized | ||
89 | * because they're strictly cpu affine and rotate_start is called with IRQs | ||
90 | * disabled, while rotate_context is called from IRQ context. | ||
91 | */ | ||
92 | static void perf_pmu_rotate_start(struct pmu *pmu) | ||
97 | { | 93 | { |
98 | if (!--__get_cpu_var(perf_disable_count)) | 94 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
99 | hw_perf_enable(); | 95 | struct list_head *head = &__get_cpu_var(rotation_list); |
96 | |||
97 | WARN_ON(!irqs_disabled()); | ||
98 | |||
99 | if (list_empty(&cpuctx->rotation_list)) | ||
100 | list_add(&cpuctx->rotation_list, head); | ||
100 | } | 101 | } |
101 | 102 | ||
102 | static void get_ctx(struct perf_event_context *ctx) | 103 | static void get_ctx(struct perf_event_context *ctx) |
@@ -151,13 +152,13 @@ static u64 primary_event_id(struct perf_event *event) | |||
151 | * the context could get moved to another task. | 152 | * the context could get moved to another task. |
152 | */ | 153 | */ |
153 | static struct perf_event_context * | 154 | static struct perf_event_context * |
154 | perf_lock_task_context(struct task_struct *task, unsigned long *flags) | 155 | perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) |
155 | { | 156 | { |
156 | struct perf_event_context *ctx; | 157 | struct perf_event_context *ctx; |
157 | 158 | ||
158 | rcu_read_lock(); | 159 | rcu_read_lock(); |
159 | retry: | 160 | retry: |
160 | ctx = rcu_dereference(task->perf_event_ctxp); | 161 | ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); |
161 | if (ctx) { | 162 | if (ctx) { |
162 | /* | 163 | /* |
163 | * If this context is a clone of another, it might | 164 | * If this context is a clone of another, it might |
@@ -170,7 +171,7 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) | |||
170 | * can't get swapped on us any more. | 171 | * can't get swapped on us any more. |
171 | */ | 172 | */ |
172 | raw_spin_lock_irqsave(&ctx->lock, *flags); | 173 | raw_spin_lock_irqsave(&ctx->lock, *flags); |
173 | if (ctx != rcu_dereference(task->perf_event_ctxp)) { | 174 | if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { |
174 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); | 175 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); |
175 | goto retry; | 176 | goto retry; |
176 | } | 177 | } |
@@ -189,12 +190,13 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) | |||
189 | * can't get swapped to another task. This also increments its | 190 | * can't get swapped to another task. This also increments its |
190 | * reference count so that the context can't get freed. | 191 | * reference count so that the context can't get freed. |
191 | */ | 192 | */ |
192 | static struct perf_event_context *perf_pin_task_context(struct task_struct *task) | 193 | static struct perf_event_context * |
194 | perf_pin_task_context(struct task_struct *task, int ctxn) | ||
193 | { | 195 | { |
194 | struct perf_event_context *ctx; | 196 | struct perf_event_context *ctx; |
195 | unsigned long flags; | 197 | unsigned long flags; |
196 | 198 | ||
197 | ctx = perf_lock_task_context(task, &flags); | 199 | ctx = perf_lock_task_context(task, ctxn, &flags); |
198 | if (ctx) { | 200 | if (ctx) { |
199 | ++ctx->pin_count; | 201 | ++ctx->pin_count; |
200 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 202 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
@@ -302,6 +304,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
302 | } | 304 | } |
303 | 305 | ||
304 | list_add_rcu(&event->event_entry, &ctx->event_list); | 306 | list_add_rcu(&event->event_entry, &ctx->event_list); |
307 | if (!ctx->nr_events) | ||
308 | perf_pmu_rotate_start(ctx->pmu); | ||
305 | ctx->nr_events++; | 309 | ctx->nr_events++; |
306 | if (event->attr.inherit_stat) | 310 | if (event->attr.inherit_stat) |
307 | ctx->nr_stat++; | 311 | ctx->nr_stat++; |
@@ -311,7 +315,12 @@ static void perf_group_attach(struct perf_event *event) | |||
311 | { | 315 | { |
312 | struct perf_event *group_leader = event->group_leader; | 316 | struct perf_event *group_leader = event->group_leader; |
313 | 317 | ||
314 | WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP); | 318 | /* |
319 | * We can have double attach due to group movement in perf_event_open. | ||
320 | */ | ||
321 | if (event->attach_state & PERF_ATTACH_GROUP) | ||
322 | return; | ||
323 | |||
315 | event->attach_state |= PERF_ATTACH_GROUP; | 324 | event->attach_state |= PERF_ATTACH_GROUP; |
316 | 325 | ||
317 | if (group_leader == event) | 326 | if (group_leader == event) |
@@ -408,8 +417,8 @@ event_filter_match(struct perf_event *event) | |||
408 | return event->cpu == -1 || event->cpu == smp_processor_id(); | 417 | return event->cpu == -1 || event->cpu == smp_processor_id(); |
409 | } | 418 | } |
410 | 419 | ||
411 | static void | 420 | static int |
412 | event_sched_out(struct perf_event *event, | 421 | __event_sched_out(struct perf_event *event, |
413 | struct perf_cpu_context *cpuctx, | 422 | struct perf_cpu_context *cpuctx, |
414 | struct perf_event_context *ctx) | 423 | struct perf_event_context *ctx) |
415 | { | 424 | { |
@@ -428,15 +437,14 @@ event_sched_out(struct perf_event *event, | |||
428 | } | 437 | } |
429 | 438 | ||
430 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 439 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
431 | return; | 440 | return 0; |
432 | 441 | ||
433 | event->state = PERF_EVENT_STATE_INACTIVE; | 442 | event->state = PERF_EVENT_STATE_INACTIVE; |
434 | if (event->pending_disable) { | 443 | if (event->pending_disable) { |
435 | event->pending_disable = 0; | 444 | event->pending_disable = 0; |
436 | event->state = PERF_EVENT_STATE_OFF; | 445 | event->state = PERF_EVENT_STATE_OFF; |
437 | } | 446 | } |
438 | event->tstamp_stopped = ctx->time; | 447 | event->pmu->del(event, 0); |
439 | event->pmu->disable(event); | ||
440 | event->oncpu = -1; | 448 | event->oncpu = -1; |
441 | 449 | ||
442 | if (!is_software_event(event)) | 450 | if (!is_software_event(event)) |
@@ -444,6 +452,19 @@ event_sched_out(struct perf_event *event, | |||
444 | ctx->nr_active--; | 452 | ctx->nr_active--; |
445 | if (event->attr.exclusive || !cpuctx->active_oncpu) | 453 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
446 | cpuctx->exclusive = 0; | 454 | cpuctx->exclusive = 0; |
455 | return 1; | ||
456 | } | ||
457 | |||
458 | static void | ||
459 | event_sched_out(struct perf_event *event, | ||
460 | struct perf_cpu_context *cpuctx, | ||
461 | struct perf_event_context *ctx) | ||
462 | { | ||
463 | int ret; | ||
464 | |||
465 | ret = __event_sched_out(event, cpuctx, ctx); | ||
466 | if (ret) | ||
467 | event->tstamp_stopped = ctx->time; | ||
447 | } | 468 | } |
448 | 469 | ||
449 | static void | 470 | static void |
@@ -466,6 +487,12 @@ group_sched_out(struct perf_event *group_event, | |||
466 | cpuctx->exclusive = 0; | 487 | cpuctx->exclusive = 0; |
467 | } | 488 | } |
468 | 489 | ||
490 | static inline struct perf_cpu_context * | ||
491 | __get_cpu_context(struct perf_event_context *ctx) | ||
492 | { | ||
493 | return this_cpu_ptr(ctx->pmu->pmu_cpu_context); | ||
494 | } | ||
495 | |||
469 | /* | 496 | /* |
470 | * Cross CPU call to remove a performance event | 497 | * Cross CPU call to remove a performance event |
471 | * | 498 | * |
@@ -474,9 +501,9 @@ group_sched_out(struct perf_event *group_event, | |||
474 | */ | 501 | */ |
475 | static void __perf_event_remove_from_context(void *info) | 502 | static void __perf_event_remove_from_context(void *info) |
476 | { | 503 | { |
477 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
478 | struct perf_event *event = info; | 504 | struct perf_event *event = info; |
479 | struct perf_event_context *ctx = event->ctx; | 505 | struct perf_event_context *ctx = event->ctx; |
506 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
480 | 507 | ||
481 | /* | 508 | /* |
482 | * If this is a task context, we need to check whether it is | 509 | * If this is a task context, we need to check whether it is |
@@ -487,27 +514,11 @@ static void __perf_event_remove_from_context(void *info) | |||
487 | return; | 514 | return; |
488 | 515 | ||
489 | raw_spin_lock(&ctx->lock); | 516 | raw_spin_lock(&ctx->lock); |
490 | /* | ||
491 | * Protect the list operation against NMI by disabling the | ||
492 | * events on a global level. | ||
493 | */ | ||
494 | perf_disable(); | ||
495 | 517 | ||
496 | event_sched_out(event, cpuctx, ctx); | 518 | event_sched_out(event, cpuctx, ctx); |
497 | 519 | ||
498 | list_del_event(event, ctx); | 520 | list_del_event(event, ctx); |
499 | 521 | ||
500 | if (!ctx->task) { | ||
501 | /* | ||
502 | * Allow more per task events with respect to the | ||
503 | * reservation: | ||
504 | */ | ||
505 | cpuctx->max_pertask = | ||
506 | min(perf_max_events - ctx->nr_events, | ||
507 | perf_max_events - perf_reserved_percpu); | ||
508 | } | ||
509 | |||
510 | perf_enable(); | ||
511 | raw_spin_unlock(&ctx->lock); | 522 | raw_spin_unlock(&ctx->lock); |
512 | } | 523 | } |
513 | 524 | ||
@@ -572,8 +583,8 @@ retry: | |||
572 | static void __perf_event_disable(void *info) | 583 | static void __perf_event_disable(void *info) |
573 | { | 584 | { |
574 | struct perf_event *event = info; | 585 | struct perf_event *event = info; |
575 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
576 | struct perf_event_context *ctx = event->ctx; | 586 | struct perf_event_context *ctx = event->ctx; |
587 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
577 | 588 | ||
578 | /* | 589 | /* |
579 | * If this is a per-task event, need to check whether this | 590 | * If this is a per-task event, need to check whether this |
@@ -628,7 +639,7 @@ void perf_event_disable(struct perf_event *event) | |||
628 | return; | 639 | return; |
629 | } | 640 | } |
630 | 641 | ||
631 | retry: | 642 | retry: |
632 | task_oncpu_function_call(task, __perf_event_disable, event); | 643 | task_oncpu_function_call(task, __perf_event_disable, event); |
633 | 644 | ||
634 | raw_spin_lock_irq(&ctx->lock); | 645 | raw_spin_lock_irq(&ctx->lock); |
@@ -653,7 +664,7 @@ void perf_event_disable(struct perf_event *event) | |||
653 | } | 664 | } |
654 | 665 | ||
655 | static int | 666 | static int |
656 | event_sched_in(struct perf_event *event, | 667 | __event_sched_in(struct perf_event *event, |
657 | struct perf_cpu_context *cpuctx, | 668 | struct perf_cpu_context *cpuctx, |
658 | struct perf_event_context *ctx) | 669 | struct perf_event_context *ctx) |
659 | { | 670 | { |
@@ -667,14 +678,12 @@ event_sched_in(struct perf_event *event, | |||
667 | */ | 678 | */ |
668 | smp_wmb(); | 679 | smp_wmb(); |
669 | 680 | ||
670 | if (event->pmu->enable(event)) { | 681 | if (event->pmu->add(event, PERF_EF_START)) { |
671 | event->state = PERF_EVENT_STATE_INACTIVE; | 682 | event->state = PERF_EVENT_STATE_INACTIVE; |
672 | event->oncpu = -1; | 683 | event->oncpu = -1; |
673 | return -EAGAIN; | 684 | return -EAGAIN; |
674 | } | 685 | } |
675 | 686 | ||
676 | event->tstamp_running += ctx->time - event->tstamp_stopped; | ||
677 | |||
678 | if (!is_software_event(event)) | 687 | if (!is_software_event(event)) |
679 | cpuctx->active_oncpu++; | 688 | cpuctx->active_oncpu++; |
680 | ctx->nr_active++; | 689 | ctx->nr_active++; |
@@ -685,28 +694,56 @@ event_sched_in(struct perf_event *event, | |||
685 | return 0; | 694 | return 0; |
686 | } | 695 | } |
687 | 696 | ||
697 | static inline int | ||
698 | event_sched_in(struct perf_event *event, | ||
699 | struct perf_cpu_context *cpuctx, | ||
700 | struct perf_event_context *ctx) | ||
701 | { | ||
702 | int ret = __event_sched_in(event, cpuctx, ctx); | ||
703 | if (ret) | ||
704 | return ret; | ||
705 | event->tstamp_running += ctx->time - event->tstamp_stopped; | ||
706 | return 0; | ||
707 | } | ||
708 | |||
709 | static void | ||
710 | group_commit_event_sched_in(struct perf_event *group_event, | ||
711 | struct perf_cpu_context *cpuctx, | ||
712 | struct perf_event_context *ctx) | ||
713 | { | ||
714 | struct perf_event *event; | ||
715 | u64 now = ctx->time; | ||
716 | |||
717 | group_event->tstamp_running += now - group_event->tstamp_stopped; | ||
718 | /* | ||
719 | * Schedule in siblings as one group (if any): | ||
720 | */ | ||
721 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | ||
722 | event->tstamp_running += now - event->tstamp_stopped; | ||
723 | } | ||
724 | } | ||
725 | |||
688 | static int | 726 | static int |
689 | group_sched_in(struct perf_event *group_event, | 727 | group_sched_in(struct perf_event *group_event, |
690 | struct perf_cpu_context *cpuctx, | 728 | struct perf_cpu_context *cpuctx, |
691 | struct perf_event_context *ctx) | 729 | struct perf_event_context *ctx) |
692 | { | 730 | { |
693 | struct perf_event *event, *partial_group = NULL; | 731 | struct perf_event *event, *partial_group = NULL; |
694 | const struct pmu *pmu = group_event->pmu; | 732 | struct pmu *pmu = group_event->pmu; |
695 | bool txn = false; | ||
696 | 733 | ||
697 | if (group_event->state == PERF_EVENT_STATE_OFF) | 734 | if (group_event->state == PERF_EVENT_STATE_OFF) |
698 | return 0; | 735 | return 0; |
699 | 736 | ||
700 | /* Check if group transaction availabe */ | 737 | pmu->start_txn(pmu); |
701 | if (pmu->start_txn) | ||
702 | txn = true; | ||
703 | 738 | ||
704 | if (txn) | 739 | /* |
705 | pmu->start_txn(pmu); | 740 | * use __event_sched_in() to delay updating tstamp_running |
706 | 741 | * until the transaction is committed. In case of failure | |
707 | if (event_sched_in(group_event, cpuctx, ctx)) { | 742 | * we will keep an unmodified tstamp_running which is a |
708 | if (txn) | 743 | * requirement to get correct timing information |
709 | pmu->cancel_txn(pmu); | 744 | */ |
745 | if (__event_sched_in(group_event, cpuctx, ctx)) { | ||
746 | pmu->cancel_txn(pmu); | ||
710 | return -EAGAIN; | 747 | return -EAGAIN; |
711 | } | 748 | } |
712 | 749 | ||
@@ -714,29 +751,33 @@ group_sched_in(struct perf_event *group_event, | |||
714 | * Schedule in siblings as one group (if any): | 751 | * Schedule in siblings as one group (if any): |
715 | */ | 752 | */ |
716 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 753 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
717 | if (event_sched_in(event, cpuctx, ctx)) { | 754 | if (__event_sched_in(event, cpuctx, ctx)) { |
718 | partial_group = event; | 755 | partial_group = event; |
719 | goto group_error; | 756 | goto group_error; |
720 | } | 757 | } |
721 | } | 758 | } |
722 | 759 | ||
723 | if (!txn || !pmu->commit_txn(pmu)) | 760 | if (!pmu->commit_txn(pmu)) { |
761 | /* commit tstamp_running */ | ||
762 | group_commit_event_sched_in(group_event, cpuctx, ctx); | ||
724 | return 0; | 763 | return 0; |
725 | 764 | } | |
726 | group_error: | 765 | group_error: |
727 | /* | 766 | /* |
728 | * Groups can be scheduled in as one unit only, so undo any | 767 | * Groups can be scheduled in as one unit only, so undo any |
729 | * partial group before returning: | 768 | * partial group before returning: |
769 | * | ||
770 | * use __event_sched_out() to avoid updating tstamp_stopped | ||
771 | * because the event never actually ran | ||
730 | */ | 772 | */ |
731 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 773 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
732 | if (event == partial_group) | 774 | if (event == partial_group) |
733 | break; | 775 | break; |
734 | event_sched_out(event, cpuctx, ctx); | 776 | __event_sched_out(event, cpuctx, ctx); |
735 | } | 777 | } |
736 | event_sched_out(group_event, cpuctx, ctx); | 778 | __event_sched_out(group_event, cpuctx, ctx); |
737 | 779 | ||
738 | if (txn) | 780 | pmu->cancel_txn(pmu); |
739 | pmu->cancel_txn(pmu); | ||
740 | 781 | ||
741 | return -EAGAIN; | 782 | return -EAGAIN; |
742 | } | 783 | } |
@@ -789,10 +830,10 @@ static void add_event_to_ctx(struct perf_event *event, | |||
789 | */ | 830 | */ |
790 | static void __perf_install_in_context(void *info) | 831 | static void __perf_install_in_context(void *info) |
791 | { | 832 | { |
792 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
793 | struct perf_event *event = info; | 833 | struct perf_event *event = info; |
794 | struct perf_event_context *ctx = event->ctx; | 834 | struct perf_event_context *ctx = event->ctx; |
795 | struct perf_event *leader = event->group_leader; | 835 | struct perf_event *leader = event->group_leader; |
836 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
796 | int err; | 837 | int err; |
797 | 838 | ||
798 | /* | 839 | /* |
@@ -812,12 +853,6 @@ static void __perf_install_in_context(void *info) | |||
812 | ctx->is_active = 1; | 853 | ctx->is_active = 1; |
813 | update_context_time(ctx); | 854 | update_context_time(ctx); |
814 | 855 | ||
815 | /* | ||
816 | * Protect the list operation against NMI by disabling the | ||
817 | * events on a global level. NOP for non NMI based events. | ||
818 | */ | ||
819 | perf_disable(); | ||
820 | |||
821 | add_event_to_ctx(event, ctx); | 856 | add_event_to_ctx(event, ctx); |
822 | 857 | ||
823 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 858 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
@@ -855,12 +890,7 @@ static void __perf_install_in_context(void *info) | |||
855 | } | 890 | } |
856 | } | 891 | } |
857 | 892 | ||
858 | if (!err && !ctx->task && cpuctx->max_pertask) | 893 | unlock: |
859 | cpuctx->max_pertask--; | ||
860 | |||
861 | unlock: | ||
862 | perf_enable(); | ||
863 | |||
864 | raw_spin_unlock(&ctx->lock); | 894 | raw_spin_unlock(&ctx->lock); |
865 | } | 895 | } |
866 | 896 | ||
@@ -883,6 +913,8 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
883 | { | 913 | { |
884 | struct task_struct *task = ctx->task; | 914 | struct task_struct *task = ctx->task; |
885 | 915 | ||
916 | event->ctx = ctx; | ||
917 | |||
886 | if (!task) { | 918 | if (!task) { |
887 | /* | 919 | /* |
888 | * Per cpu events are installed via an smp call and | 920 | * Per cpu events are installed via an smp call and |
@@ -931,10 +963,12 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
931 | 963 | ||
932 | event->state = PERF_EVENT_STATE_INACTIVE; | 964 | event->state = PERF_EVENT_STATE_INACTIVE; |
933 | event->tstamp_enabled = ctx->time - event->total_time_enabled; | 965 | event->tstamp_enabled = ctx->time - event->total_time_enabled; |
934 | list_for_each_entry(sub, &event->sibling_list, group_entry) | 966 | list_for_each_entry(sub, &event->sibling_list, group_entry) { |
935 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) | 967 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) { |
936 | sub->tstamp_enabled = | 968 | sub->tstamp_enabled = |
937 | ctx->time - sub->total_time_enabled; | 969 | ctx->time - sub->total_time_enabled; |
970 | } | ||
971 | } | ||
938 | } | 972 | } |
939 | 973 | ||
940 | /* | 974 | /* |
@@ -943,9 +977,9 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
943 | static void __perf_event_enable(void *info) | 977 | static void __perf_event_enable(void *info) |
944 | { | 978 | { |
945 | struct perf_event *event = info; | 979 | struct perf_event *event = info; |
946 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
947 | struct perf_event_context *ctx = event->ctx; | 980 | struct perf_event_context *ctx = event->ctx; |
948 | struct perf_event *leader = event->group_leader; | 981 | struct perf_event *leader = event->group_leader; |
982 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
949 | int err; | 983 | int err; |
950 | 984 | ||
951 | /* | 985 | /* |
@@ -979,12 +1013,10 @@ static void __perf_event_enable(void *info) | |||
979 | if (!group_can_go_on(event, cpuctx, 1)) { | 1013 | if (!group_can_go_on(event, cpuctx, 1)) { |
980 | err = -EEXIST; | 1014 | err = -EEXIST; |
981 | } else { | 1015 | } else { |
982 | perf_disable(); | ||
983 | if (event == leader) | 1016 | if (event == leader) |
984 | err = group_sched_in(event, cpuctx, ctx); | 1017 | err = group_sched_in(event, cpuctx, ctx); |
985 | else | 1018 | else |
986 | err = event_sched_in(event, cpuctx, ctx); | 1019 | err = event_sched_in(event, cpuctx, ctx); |
987 | perf_enable(); | ||
988 | } | 1020 | } |
989 | 1021 | ||
990 | if (err) { | 1022 | if (err) { |
@@ -1000,7 +1032,7 @@ static void __perf_event_enable(void *info) | |||
1000 | } | 1032 | } |
1001 | } | 1033 | } |
1002 | 1034 | ||
1003 | unlock: | 1035 | unlock: |
1004 | raw_spin_unlock(&ctx->lock); | 1036 | raw_spin_unlock(&ctx->lock); |
1005 | } | 1037 | } |
1006 | 1038 | ||
@@ -1041,7 +1073,7 @@ void perf_event_enable(struct perf_event *event) | |||
1041 | if (event->state == PERF_EVENT_STATE_ERROR) | 1073 | if (event->state == PERF_EVENT_STATE_ERROR) |
1042 | event->state = PERF_EVENT_STATE_OFF; | 1074 | event->state = PERF_EVENT_STATE_OFF; |
1043 | 1075 | ||
1044 | retry: | 1076 | retry: |
1045 | raw_spin_unlock_irq(&ctx->lock); | 1077 | raw_spin_unlock_irq(&ctx->lock); |
1046 | task_oncpu_function_call(task, __perf_event_enable, event); | 1078 | task_oncpu_function_call(task, __perf_event_enable, event); |
1047 | 1079 | ||
@@ -1061,7 +1093,7 @@ void perf_event_enable(struct perf_event *event) | |||
1061 | if (event->state == PERF_EVENT_STATE_OFF) | 1093 | if (event->state == PERF_EVENT_STATE_OFF) |
1062 | __perf_event_mark_enabled(event, ctx); | 1094 | __perf_event_mark_enabled(event, ctx); |
1063 | 1095 | ||
1064 | out: | 1096 | out: |
1065 | raw_spin_unlock_irq(&ctx->lock); | 1097 | raw_spin_unlock_irq(&ctx->lock); |
1066 | } | 1098 | } |
1067 | 1099 | ||
@@ -1092,26 +1124,26 @@ static void ctx_sched_out(struct perf_event_context *ctx, | |||
1092 | struct perf_event *event; | 1124 | struct perf_event *event; |
1093 | 1125 | ||
1094 | raw_spin_lock(&ctx->lock); | 1126 | raw_spin_lock(&ctx->lock); |
1127 | perf_pmu_disable(ctx->pmu); | ||
1095 | ctx->is_active = 0; | 1128 | ctx->is_active = 0; |
1096 | if (likely(!ctx->nr_events)) | 1129 | if (likely(!ctx->nr_events)) |
1097 | goto out; | 1130 | goto out; |
1098 | update_context_time(ctx); | 1131 | update_context_time(ctx); |
1099 | 1132 | ||
1100 | perf_disable(); | ||
1101 | if (!ctx->nr_active) | 1133 | if (!ctx->nr_active) |
1102 | goto out_enable; | 1134 | goto out; |
1103 | 1135 | ||
1104 | if (event_type & EVENT_PINNED) | 1136 | if (event_type & EVENT_PINNED) { |
1105 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | 1137 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) |
1106 | group_sched_out(event, cpuctx, ctx); | 1138 | group_sched_out(event, cpuctx, ctx); |
1139 | } | ||
1107 | 1140 | ||
1108 | if (event_type & EVENT_FLEXIBLE) | 1141 | if (event_type & EVENT_FLEXIBLE) { |
1109 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | 1142 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) |
1110 | group_sched_out(event, cpuctx, ctx); | 1143 | group_sched_out(event, cpuctx, ctx); |
1111 | 1144 | } | |
1112 | out_enable: | 1145 | out: |
1113 | perf_enable(); | 1146 | perf_pmu_enable(ctx->pmu); |
1114 | out: | ||
1115 | raw_spin_unlock(&ctx->lock); | 1147 | raw_spin_unlock(&ctx->lock); |
1116 | } | 1148 | } |
1117 | 1149 | ||
@@ -1209,34 +1241,25 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1209 | } | 1241 | } |
1210 | } | 1242 | } |
1211 | 1243 | ||
1212 | /* | 1244 | void perf_event_context_sched_out(struct task_struct *task, int ctxn, |
1213 | * Called from scheduler to remove the events of the current task, | 1245 | struct task_struct *next) |
1214 | * with interrupts disabled. | ||
1215 | * | ||
1216 | * We stop each event and update the event value in event->count. | ||
1217 | * | ||
1218 | * This does not protect us against NMI, but disable() | ||
1219 | * sets the disabled bit in the control field of event _before_ | ||
1220 | * accessing the event control register. If a NMI hits, then it will | ||
1221 | * not restart the event. | ||
1222 | */ | ||
1223 | void perf_event_task_sched_out(struct task_struct *task, | ||
1224 | struct task_struct *next) | ||
1225 | { | 1246 | { |
1226 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1247 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; |
1227 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
1228 | struct perf_event_context *next_ctx; | 1248 | struct perf_event_context *next_ctx; |
1229 | struct perf_event_context *parent; | 1249 | struct perf_event_context *parent; |
1250 | struct perf_cpu_context *cpuctx; | ||
1230 | int do_switch = 1; | 1251 | int do_switch = 1; |
1231 | 1252 | ||
1232 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | 1253 | if (likely(!ctx)) |
1254 | return; | ||
1233 | 1255 | ||
1234 | if (likely(!ctx || !cpuctx->task_ctx)) | 1256 | cpuctx = __get_cpu_context(ctx); |
1257 | if (!cpuctx->task_ctx) | ||
1235 | return; | 1258 | return; |
1236 | 1259 | ||
1237 | rcu_read_lock(); | 1260 | rcu_read_lock(); |
1238 | parent = rcu_dereference(ctx->parent_ctx); | 1261 | parent = rcu_dereference(ctx->parent_ctx); |
1239 | next_ctx = next->perf_event_ctxp; | 1262 | next_ctx = next->perf_event_ctxp[ctxn]; |
1240 | if (parent && next_ctx && | 1263 | if (parent && next_ctx && |
1241 | rcu_dereference(next_ctx->parent_ctx) == parent) { | 1264 | rcu_dereference(next_ctx->parent_ctx) == parent) { |
1242 | /* | 1265 | /* |
@@ -1255,8 +1278,8 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1255 | * XXX do we need a memory barrier of sorts | 1278 | * XXX do we need a memory barrier of sorts |
1256 | * wrt to rcu_dereference() of perf_event_ctxp | 1279 | * wrt to rcu_dereference() of perf_event_ctxp |
1257 | */ | 1280 | */ |
1258 | task->perf_event_ctxp = next_ctx; | 1281 | task->perf_event_ctxp[ctxn] = next_ctx; |
1259 | next->perf_event_ctxp = ctx; | 1282 | next->perf_event_ctxp[ctxn] = ctx; |
1260 | ctx->task = next; | 1283 | ctx->task = next; |
1261 | next_ctx->task = task; | 1284 | next_ctx->task = task; |
1262 | do_switch = 0; | 1285 | do_switch = 0; |
@@ -1274,10 +1297,35 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1274 | } | 1297 | } |
1275 | } | 1298 | } |
1276 | 1299 | ||
1300 | #define for_each_task_context_nr(ctxn) \ | ||
1301 | for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) | ||
1302 | |||
1303 | /* | ||
1304 | * Called from scheduler to remove the events of the current task, | ||
1305 | * with interrupts disabled. | ||
1306 | * | ||
1307 | * We stop each event and update the event value in event->count. | ||
1308 | * | ||
1309 | * This does not protect us against NMI, but disable() | ||
1310 | * sets the disabled bit in the control field of event _before_ | ||
1311 | * accessing the event control register. If a NMI hits, then it will | ||
1312 | * not restart the event. | ||
1313 | */ | ||
1314 | void __perf_event_task_sched_out(struct task_struct *task, | ||
1315 | struct task_struct *next) | ||
1316 | { | ||
1317 | int ctxn; | ||
1318 | |||
1319 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | ||
1320 | |||
1321 | for_each_task_context_nr(ctxn) | ||
1322 | perf_event_context_sched_out(task, ctxn, next); | ||
1323 | } | ||
1324 | |||
1277 | static void task_ctx_sched_out(struct perf_event_context *ctx, | 1325 | static void task_ctx_sched_out(struct perf_event_context *ctx, |
1278 | enum event_type_t event_type) | 1326 | enum event_type_t event_type) |
1279 | { | 1327 | { |
1280 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1328 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
1281 | 1329 | ||
1282 | if (!cpuctx->task_ctx) | 1330 | if (!cpuctx->task_ctx) |
1283 | return; | 1331 | return; |
@@ -1292,14 +1340,6 @@ static void task_ctx_sched_out(struct perf_event_context *ctx, | |||
1292 | /* | 1340 | /* |
1293 | * Called with IRQs disabled | 1341 | * Called with IRQs disabled |
1294 | */ | 1342 | */ |
1295 | static void __perf_event_task_sched_out(struct perf_event_context *ctx) | ||
1296 | { | ||
1297 | task_ctx_sched_out(ctx, EVENT_ALL); | ||
1298 | } | ||
1299 | |||
1300 | /* | ||
1301 | * Called with IRQs disabled | ||
1302 | */ | ||
1303 | static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, | 1343 | static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, |
1304 | enum event_type_t event_type) | 1344 | enum event_type_t event_type) |
1305 | { | 1345 | { |
@@ -1350,9 +1390,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, | |||
1350 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1390 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
1351 | continue; | 1391 | continue; |
1352 | 1392 | ||
1353 | if (group_can_go_on(event, cpuctx, can_add_hw)) | 1393 | if (group_can_go_on(event, cpuctx, can_add_hw)) { |
1354 | if (group_sched_in(event, cpuctx, ctx)) | 1394 | if (group_sched_in(event, cpuctx, ctx)) |
1355 | can_add_hw = 0; | 1395 | can_add_hw = 0; |
1396 | } | ||
1356 | } | 1397 | } |
1357 | } | 1398 | } |
1358 | 1399 | ||
@@ -1368,8 +1409,6 @@ ctx_sched_in(struct perf_event_context *ctx, | |||
1368 | 1409 | ||
1369 | ctx->timestamp = perf_clock(); | 1410 | ctx->timestamp = perf_clock(); |
1370 | 1411 | ||
1371 | perf_disable(); | ||
1372 | |||
1373 | /* | 1412 | /* |
1374 | * First go through the list and put on any pinned groups | 1413 | * First go through the list and put on any pinned groups |
1375 | * in order to give them the best chance of going on. | 1414 | * in order to give them the best chance of going on. |
@@ -1381,8 +1420,7 @@ ctx_sched_in(struct perf_event_context *ctx, | |||
1381 | if (event_type & EVENT_FLEXIBLE) | 1420 | if (event_type & EVENT_FLEXIBLE) |
1382 | ctx_flexible_sched_in(ctx, cpuctx); | 1421 | ctx_flexible_sched_in(ctx, cpuctx); |
1383 | 1422 | ||
1384 | perf_enable(); | 1423 | out: |
1385 | out: | ||
1386 | raw_spin_unlock(&ctx->lock); | 1424 | raw_spin_unlock(&ctx->lock); |
1387 | } | 1425 | } |
1388 | 1426 | ||
@@ -1394,43 +1432,28 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
1394 | ctx_sched_in(ctx, cpuctx, event_type); | 1432 | ctx_sched_in(ctx, cpuctx, event_type); |
1395 | } | 1433 | } |
1396 | 1434 | ||
1397 | static void task_ctx_sched_in(struct task_struct *task, | 1435 | static void task_ctx_sched_in(struct perf_event_context *ctx, |
1398 | enum event_type_t event_type) | 1436 | enum event_type_t event_type) |
1399 | { | 1437 | { |
1400 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1438 | struct perf_cpu_context *cpuctx; |
1401 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
1402 | 1439 | ||
1403 | if (likely(!ctx)) | 1440 | cpuctx = __get_cpu_context(ctx); |
1404 | return; | ||
1405 | if (cpuctx->task_ctx == ctx) | 1441 | if (cpuctx->task_ctx == ctx) |
1406 | return; | 1442 | return; |
1443 | |||
1407 | ctx_sched_in(ctx, cpuctx, event_type); | 1444 | ctx_sched_in(ctx, cpuctx, event_type); |
1408 | cpuctx->task_ctx = ctx; | 1445 | cpuctx->task_ctx = ctx; |
1409 | } | 1446 | } |
1410 | /* | ||
1411 | * Called from scheduler to add the events of the current task | ||
1412 | * with interrupts disabled. | ||
1413 | * | ||
1414 | * We restore the event value and then enable it. | ||
1415 | * | ||
1416 | * This does not protect us against NMI, but enable() | ||
1417 | * sets the enabled bit in the control field of event _before_ | ||
1418 | * accessing the event control register. If a NMI hits, then it will | ||
1419 | * keep the event running. | ||
1420 | */ | ||
1421 | void perf_event_task_sched_in(struct task_struct *task) | ||
1422 | { | ||
1423 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
1424 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
1425 | 1447 | ||
1426 | if (likely(!ctx)) | 1448 | void perf_event_context_sched_in(struct perf_event_context *ctx) |
1427 | return; | 1449 | { |
1450 | struct perf_cpu_context *cpuctx; | ||
1428 | 1451 | ||
1452 | cpuctx = __get_cpu_context(ctx); | ||
1429 | if (cpuctx->task_ctx == ctx) | 1453 | if (cpuctx->task_ctx == ctx) |
1430 | return; | 1454 | return; |
1431 | 1455 | ||
1432 | perf_disable(); | 1456 | perf_pmu_disable(ctx->pmu); |
1433 | |||
1434 | /* | 1457 | /* |
1435 | * We want to keep the following priority order: | 1458 | * We want to keep the following priority order: |
1436 | * cpu pinned (that don't need to move), task pinned, | 1459 | * cpu pinned (that don't need to move), task pinned, |
@@ -1444,7 +1467,37 @@ void perf_event_task_sched_in(struct task_struct *task) | |||
1444 | 1467 | ||
1445 | cpuctx->task_ctx = ctx; | 1468 | cpuctx->task_ctx = ctx; |
1446 | 1469 | ||
1447 | perf_enable(); | 1470 | /* |
1471 | * Since these rotations are per-cpu, we need to ensure the | ||
1472 | * cpu-context we got scheduled on is actually rotating. | ||
1473 | */ | ||
1474 | perf_pmu_rotate_start(ctx->pmu); | ||
1475 | perf_pmu_enable(ctx->pmu); | ||
1476 | } | ||
1477 | |||
1478 | /* | ||
1479 | * Called from scheduler to add the events of the current task | ||
1480 | * with interrupts disabled. | ||
1481 | * | ||
1482 | * We restore the event value and then enable it. | ||
1483 | * | ||
1484 | * This does not protect us against NMI, but enable() | ||
1485 | * sets the enabled bit in the control field of event _before_ | ||
1486 | * accessing the event control register. If a NMI hits, then it will | ||
1487 | * keep the event running. | ||
1488 | */ | ||
1489 | void __perf_event_task_sched_in(struct task_struct *task) | ||
1490 | { | ||
1491 | struct perf_event_context *ctx; | ||
1492 | int ctxn; | ||
1493 | |||
1494 | for_each_task_context_nr(ctxn) { | ||
1495 | ctx = task->perf_event_ctxp[ctxn]; | ||
1496 | if (likely(!ctx)) | ||
1497 | continue; | ||
1498 | |||
1499 | perf_event_context_sched_in(ctx); | ||
1500 | } | ||
1448 | } | 1501 | } |
1449 | 1502 | ||
1450 | #define MAX_INTERRUPTS (~0ULL) | 1503 | #define MAX_INTERRUPTS (~0ULL) |
@@ -1524,22 +1577,6 @@ do { \ | |||
1524 | return div64_u64(dividend, divisor); | 1577 | return div64_u64(dividend, divisor); |
1525 | } | 1578 | } |
1526 | 1579 | ||
1527 | static void perf_event_stop(struct perf_event *event) | ||
1528 | { | ||
1529 | if (!event->pmu->stop) | ||
1530 | return event->pmu->disable(event); | ||
1531 | |||
1532 | return event->pmu->stop(event); | ||
1533 | } | ||
1534 | |||
1535 | static int perf_event_start(struct perf_event *event) | ||
1536 | { | ||
1537 | if (!event->pmu->start) | ||
1538 | return event->pmu->enable(event); | ||
1539 | |||
1540 | return event->pmu->start(event); | ||
1541 | } | ||
1542 | |||
1543 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | 1580 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) |
1544 | { | 1581 | { |
1545 | struct hw_perf_event *hwc = &event->hw; | 1582 | struct hw_perf_event *hwc = &event->hw; |
@@ -1559,15 +1596,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | |||
1559 | hwc->sample_period = sample_period; | 1596 | hwc->sample_period = sample_period; |
1560 | 1597 | ||
1561 | if (local64_read(&hwc->period_left) > 8*sample_period) { | 1598 | if (local64_read(&hwc->period_left) > 8*sample_period) { |
1562 | perf_disable(); | 1599 | event->pmu->stop(event, PERF_EF_UPDATE); |
1563 | perf_event_stop(event); | ||
1564 | local64_set(&hwc->period_left, 0); | 1600 | local64_set(&hwc->period_left, 0); |
1565 | perf_event_start(event); | 1601 | event->pmu->start(event, PERF_EF_RELOAD); |
1566 | perf_enable(); | ||
1567 | } | 1602 | } |
1568 | } | 1603 | } |
1569 | 1604 | ||
1570 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | 1605 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) |
1571 | { | 1606 | { |
1572 | struct perf_event *event; | 1607 | struct perf_event *event; |
1573 | struct hw_perf_event *hwc; | 1608 | struct hw_perf_event *hwc; |
@@ -1592,23 +1627,19 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1592 | */ | 1627 | */ |
1593 | if (interrupts == MAX_INTERRUPTS) { | 1628 | if (interrupts == MAX_INTERRUPTS) { |
1594 | perf_log_throttle(event, 1); | 1629 | perf_log_throttle(event, 1); |
1595 | perf_disable(); | 1630 | event->pmu->start(event, 0); |
1596 | event->pmu->unthrottle(event); | ||
1597 | perf_enable(); | ||
1598 | } | 1631 | } |
1599 | 1632 | ||
1600 | if (!event->attr.freq || !event->attr.sample_freq) | 1633 | if (!event->attr.freq || !event->attr.sample_freq) |
1601 | continue; | 1634 | continue; |
1602 | 1635 | ||
1603 | perf_disable(); | ||
1604 | event->pmu->read(event); | 1636 | event->pmu->read(event); |
1605 | now = local64_read(&event->count); | 1637 | now = local64_read(&event->count); |
1606 | delta = now - hwc->freq_count_stamp; | 1638 | delta = now - hwc->freq_count_stamp; |
1607 | hwc->freq_count_stamp = now; | 1639 | hwc->freq_count_stamp = now; |
1608 | 1640 | ||
1609 | if (delta > 0) | 1641 | if (delta > 0) |
1610 | perf_adjust_period(event, TICK_NSEC, delta); | 1642 | perf_adjust_period(event, period, delta); |
1611 | perf_enable(); | ||
1612 | } | 1643 | } |
1613 | raw_spin_unlock(&ctx->lock); | 1644 | raw_spin_unlock(&ctx->lock); |
1614 | } | 1645 | } |
@@ -1626,32 +1657,38 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
1626 | raw_spin_unlock(&ctx->lock); | 1657 | raw_spin_unlock(&ctx->lock); |
1627 | } | 1658 | } |
1628 | 1659 | ||
1629 | void perf_event_task_tick(struct task_struct *curr) | 1660 | /* |
1661 | * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized | ||
1662 | * because they're strictly cpu affine and rotate_start is called with IRQs | ||
1663 | * disabled, while rotate_context is called from IRQ context. | ||
1664 | */ | ||
1665 | static void perf_rotate_context(struct perf_cpu_context *cpuctx) | ||
1630 | { | 1666 | { |
1631 | struct perf_cpu_context *cpuctx; | 1667 | u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; |
1632 | struct perf_event_context *ctx; | 1668 | struct perf_event_context *ctx = NULL; |
1633 | int rotate = 0; | 1669 | int rotate = 0, remove = 1; |
1634 | |||
1635 | if (!atomic_read(&nr_events)) | ||
1636 | return; | ||
1637 | 1670 | ||
1638 | cpuctx = &__get_cpu_var(perf_cpu_context); | 1671 | if (cpuctx->ctx.nr_events) { |
1639 | if (cpuctx->ctx.nr_events && | 1672 | remove = 0; |
1640 | cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) | 1673 | if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) |
1641 | rotate = 1; | 1674 | rotate = 1; |
1675 | } | ||
1642 | 1676 | ||
1643 | ctx = curr->perf_event_ctxp; | 1677 | ctx = cpuctx->task_ctx; |
1644 | if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active) | 1678 | if (ctx && ctx->nr_events) { |
1645 | rotate = 1; | 1679 | remove = 0; |
1680 | if (ctx->nr_events != ctx->nr_active) | ||
1681 | rotate = 1; | ||
1682 | } | ||
1646 | 1683 | ||
1647 | perf_ctx_adjust_freq(&cpuctx->ctx); | 1684 | perf_pmu_disable(cpuctx->ctx.pmu); |
1685 | perf_ctx_adjust_freq(&cpuctx->ctx, interval); | ||
1648 | if (ctx) | 1686 | if (ctx) |
1649 | perf_ctx_adjust_freq(ctx); | 1687 | perf_ctx_adjust_freq(ctx, interval); |
1650 | 1688 | ||
1651 | if (!rotate) | 1689 | if (!rotate) |
1652 | return; | 1690 | goto done; |
1653 | 1691 | ||
1654 | perf_disable(); | ||
1655 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | 1692 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
1656 | if (ctx) | 1693 | if (ctx) |
1657 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); | 1694 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); |
@@ -1662,8 +1699,27 @@ void perf_event_task_tick(struct task_struct *curr) | |||
1662 | 1699 | ||
1663 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | 1700 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); |
1664 | if (ctx) | 1701 | if (ctx) |
1665 | task_ctx_sched_in(curr, EVENT_FLEXIBLE); | 1702 | task_ctx_sched_in(ctx, EVENT_FLEXIBLE); |
1666 | perf_enable(); | 1703 | |
1704 | done: | ||
1705 | if (remove) | ||
1706 | list_del_init(&cpuctx->rotation_list); | ||
1707 | |||
1708 | perf_pmu_enable(cpuctx->ctx.pmu); | ||
1709 | } | ||
1710 | |||
1711 | void perf_event_task_tick(void) | ||
1712 | { | ||
1713 | struct list_head *head = &__get_cpu_var(rotation_list); | ||
1714 | struct perf_cpu_context *cpuctx, *tmp; | ||
1715 | |||
1716 | WARN_ON(!irqs_disabled()); | ||
1717 | |||
1718 | list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { | ||
1719 | if (cpuctx->jiffies_interval == 1 || | ||
1720 | !(jiffies % cpuctx->jiffies_interval)) | ||
1721 | perf_rotate_context(cpuctx); | ||
1722 | } | ||
1667 | } | 1723 | } |
1668 | 1724 | ||
1669 | static int event_enable_on_exec(struct perf_event *event, | 1725 | static int event_enable_on_exec(struct perf_event *event, |
@@ -1685,20 +1741,18 @@ static int event_enable_on_exec(struct perf_event *event, | |||
1685 | * Enable all of a task's events that have been marked enable-on-exec. | 1741 | * Enable all of a task's events that have been marked enable-on-exec. |
1686 | * This expects task == current. | 1742 | * This expects task == current. |
1687 | */ | 1743 | */ |
1688 | static void perf_event_enable_on_exec(struct task_struct *task) | 1744 | static void perf_event_enable_on_exec(struct perf_event_context *ctx) |
1689 | { | 1745 | { |
1690 | struct perf_event_context *ctx; | ||
1691 | struct perf_event *event; | 1746 | struct perf_event *event; |
1692 | unsigned long flags; | 1747 | unsigned long flags; |
1693 | int enabled = 0; | 1748 | int enabled = 0; |
1694 | int ret; | 1749 | int ret; |
1695 | 1750 | ||
1696 | local_irq_save(flags); | 1751 | local_irq_save(flags); |
1697 | ctx = task->perf_event_ctxp; | ||
1698 | if (!ctx || !ctx->nr_events) | 1752 | if (!ctx || !ctx->nr_events) |
1699 | goto out; | 1753 | goto out; |
1700 | 1754 | ||
1701 | __perf_event_task_sched_out(ctx); | 1755 | task_ctx_sched_out(ctx, EVENT_ALL); |
1702 | 1756 | ||
1703 | raw_spin_lock(&ctx->lock); | 1757 | raw_spin_lock(&ctx->lock); |
1704 | 1758 | ||
@@ -1722,8 +1776,8 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1722 | 1776 | ||
1723 | raw_spin_unlock(&ctx->lock); | 1777 | raw_spin_unlock(&ctx->lock); |
1724 | 1778 | ||
1725 | perf_event_task_sched_in(task); | 1779 | perf_event_context_sched_in(ctx); |
1726 | out: | 1780 | out: |
1727 | local_irq_restore(flags); | 1781 | local_irq_restore(flags); |
1728 | } | 1782 | } |
1729 | 1783 | ||
@@ -1732,9 +1786,9 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1732 | */ | 1786 | */ |
1733 | static void __perf_event_read(void *info) | 1787 | static void __perf_event_read(void *info) |
1734 | { | 1788 | { |
1735 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
1736 | struct perf_event *event = info; | 1789 | struct perf_event *event = info; |
1737 | struct perf_event_context *ctx = event->ctx; | 1790 | struct perf_event_context *ctx = event->ctx; |
1791 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
1738 | 1792 | ||
1739 | /* | 1793 | /* |
1740 | * If this is a task context, we need to check whether it is | 1794 | * If this is a task context, we need to check whether it is |
@@ -1773,7 +1827,13 @@ static u64 perf_event_read(struct perf_event *event) | |||
1773 | unsigned long flags; | 1827 | unsigned long flags; |
1774 | 1828 | ||
1775 | raw_spin_lock_irqsave(&ctx->lock, flags); | 1829 | raw_spin_lock_irqsave(&ctx->lock, flags); |
1776 | update_context_time(ctx); | 1830 | /* |
1831 | * may read while context is not active | ||
1832 | * (e.g., thread is blocked), in that case | ||
1833 | * we cannot update context time | ||
1834 | */ | ||
1835 | if (ctx->is_active) | ||
1836 | update_context_time(ctx); | ||
1777 | update_event_times(event); | 1837 | update_event_times(event); |
1778 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 1838 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
1779 | } | 1839 | } |
@@ -1782,11 +1842,219 @@ static u64 perf_event_read(struct perf_event *event) | |||
1782 | } | 1842 | } |
1783 | 1843 | ||
1784 | /* | 1844 | /* |
1785 | * Initialize the perf_event context in a task_struct: | 1845 | * Callchain support |
1786 | */ | 1846 | */ |
1847 | |||
1848 | struct callchain_cpus_entries { | ||
1849 | struct rcu_head rcu_head; | ||
1850 | struct perf_callchain_entry *cpu_entries[0]; | ||
1851 | }; | ||
1852 | |||
1853 | static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); | ||
1854 | static atomic_t nr_callchain_events; | ||
1855 | static DEFINE_MUTEX(callchain_mutex); | ||
1856 | struct callchain_cpus_entries *callchain_cpus_entries; | ||
1857 | |||
1858 | |||
1859 | __weak void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
1860 | struct pt_regs *regs) | ||
1861 | { | ||
1862 | } | ||
1863 | |||
1864 | __weak void perf_callchain_user(struct perf_callchain_entry *entry, | ||
1865 | struct pt_regs *regs) | ||
1866 | { | ||
1867 | } | ||
1868 | |||
1869 | static void release_callchain_buffers_rcu(struct rcu_head *head) | ||
1870 | { | ||
1871 | struct callchain_cpus_entries *entries; | ||
1872 | int cpu; | ||
1873 | |||
1874 | entries = container_of(head, struct callchain_cpus_entries, rcu_head); | ||
1875 | |||
1876 | for_each_possible_cpu(cpu) | ||
1877 | kfree(entries->cpu_entries[cpu]); | ||
1878 | |||
1879 | kfree(entries); | ||
1880 | } | ||
1881 | |||
1882 | static void release_callchain_buffers(void) | ||
1883 | { | ||
1884 | struct callchain_cpus_entries *entries; | ||
1885 | |||
1886 | entries = callchain_cpus_entries; | ||
1887 | rcu_assign_pointer(callchain_cpus_entries, NULL); | ||
1888 | call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); | ||
1889 | } | ||
1890 | |||
1891 | static int alloc_callchain_buffers(void) | ||
1892 | { | ||
1893 | int cpu; | ||
1894 | int size; | ||
1895 | struct callchain_cpus_entries *entries; | ||
1896 | |||
1897 | /* | ||
1898 | * We can't use the percpu allocation API for data that can be | ||
1899 | * accessed from NMI. Use a temporary manual per cpu allocation | ||
1900 | * until that gets sorted out. | ||
1901 | */ | ||
1902 | size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * | ||
1903 | num_possible_cpus(); | ||
1904 | |||
1905 | entries = kzalloc(size, GFP_KERNEL); | ||
1906 | if (!entries) | ||
1907 | return -ENOMEM; | ||
1908 | |||
1909 | size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; | ||
1910 | |||
1911 | for_each_possible_cpu(cpu) { | ||
1912 | entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, | ||
1913 | cpu_to_node(cpu)); | ||
1914 | if (!entries->cpu_entries[cpu]) | ||
1915 | goto fail; | ||
1916 | } | ||
1917 | |||
1918 | rcu_assign_pointer(callchain_cpus_entries, entries); | ||
1919 | |||
1920 | return 0; | ||
1921 | |||
1922 | fail: | ||
1923 | for_each_possible_cpu(cpu) | ||
1924 | kfree(entries->cpu_entries[cpu]); | ||
1925 | kfree(entries); | ||
1926 | |||
1927 | return -ENOMEM; | ||
1928 | } | ||
1929 | |||
1930 | static int get_callchain_buffers(void) | ||
1931 | { | ||
1932 | int err = 0; | ||
1933 | int count; | ||
1934 | |||
1935 | mutex_lock(&callchain_mutex); | ||
1936 | |||
1937 | count = atomic_inc_return(&nr_callchain_events); | ||
1938 | if (WARN_ON_ONCE(count < 1)) { | ||
1939 | err = -EINVAL; | ||
1940 | goto exit; | ||
1941 | } | ||
1942 | |||
1943 | if (count > 1) { | ||
1944 | /* If the allocation failed, give up */ | ||
1945 | if (!callchain_cpus_entries) | ||
1946 | err = -ENOMEM; | ||
1947 | goto exit; | ||
1948 | } | ||
1949 | |||
1950 | err = alloc_callchain_buffers(); | ||
1951 | if (err) | ||
1952 | release_callchain_buffers(); | ||
1953 | exit: | ||
1954 | mutex_unlock(&callchain_mutex); | ||
1955 | |||
1956 | return err; | ||
1957 | } | ||
1958 | |||
1959 | static void put_callchain_buffers(void) | ||
1960 | { | ||
1961 | if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { | ||
1962 | release_callchain_buffers(); | ||
1963 | mutex_unlock(&callchain_mutex); | ||
1964 | } | ||
1965 | } | ||
1966 | |||
1967 | static int get_recursion_context(int *recursion) | ||
1968 | { | ||
1969 | int rctx; | ||
1970 | |||
1971 | if (in_nmi()) | ||
1972 | rctx = 3; | ||
1973 | else if (in_irq()) | ||
1974 | rctx = 2; | ||
1975 | else if (in_softirq()) | ||
1976 | rctx = 1; | ||
1977 | else | ||
1978 | rctx = 0; | ||
1979 | |||
1980 | if (recursion[rctx]) | ||
1981 | return -1; | ||
1982 | |||
1983 | recursion[rctx]++; | ||
1984 | barrier(); | ||
1985 | |||
1986 | return rctx; | ||
1987 | } | ||
1988 | |||
1989 | static inline void put_recursion_context(int *recursion, int rctx) | ||
1990 | { | ||
1991 | barrier(); | ||
1992 | recursion[rctx]--; | ||
1993 | } | ||
1994 | |||
1995 | static struct perf_callchain_entry *get_callchain_entry(int *rctx) | ||
1996 | { | ||
1997 | int cpu; | ||
1998 | struct callchain_cpus_entries *entries; | ||
1999 | |||
2000 | *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); | ||
2001 | if (*rctx == -1) | ||
2002 | return NULL; | ||
2003 | |||
2004 | entries = rcu_dereference(callchain_cpus_entries); | ||
2005 | if (!entries) | ||
2006 | return NULL; | ||
2007 | |||
2008 | cpu = smp_processor_id(); | ||
2009 | |||
2010 | return &entries->cpu_entries[cpu][*rctx]; | ||
2011 | } | ||
2012 | |||
1787 | static void | 2013 | static void |
1788 | __perf_event_init_context(struct perf_event_context *ctx, | 2014 | put_callchain_entry(int rctx) |
1789 | struct task_struct *task) | 2015 | { |
2016 | put_recursion_context(__get_cpu_var(callchain_recursion), rctx); | ||
2017 | } | ||
2018 | |||
2019 | static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
2020 | { | ||
2021 | int rctx; | ||
2022 | struct perf_callchain_entry *entry; | ||
2023 | |||
2024 | |||
2025 | entry = get_callchain_entry(&rctx); | ||
2026 | if (rctx == -1) | ||
2027 | return NULL; | ||
2028 | |||
2029 | if (!entry) | ||
2030 | goto exit_put; | ||
2031 | |||
2032 | entry->nr = 0; | ||
2033 | |||
2034 | if (!user_mode(regs)) { | ||
2035 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
2036 | perf_callchain_kernel(entry, regs); | ||
2037 | if (current->mm) | ||
2038 | regs = task_pt_regs(current); | ||
2039 | else | ||
2040 | regs = NULL; | ||
2041 | } | ||
2042 | |||
2043 | if (regs) { | ||
2044 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
2045 | perf_callchain_user(entry, regs); | ||
2046 | } | ||
2047 | |||
2048 | exit_put: | ||
2049 | put_callchain_entry(rctx); | ||
2050 | |||
2051 | return entry; | ||
2052 | } | ||
2053 | |||
2054 | /* | ||
2055 | * Initialize the perf_event context in a task_struct: | ||
2056 | */ | ||
2057 | static void __perf_event_init_context(struct perf_event_context *ctx) | ||
1790 | { | 2058 | { |
1791 | raw_spin_lock_init(&ctx->lock); | 2059 | raw_spin_lock_init(&ctx->lock); |
1792 | mutex_init(&ctx->mutex); | 2060 | mutex_init(&ctx->mutex); |
@@ -1794,45 +2062,38 @@ __perf_event_init_context(struct perf_event_context *ctx, | |||
1794 | INIT_LIST_HEAD(&ctx->flexible_groups); | 2062 | INIT_LIST_HEAD(&ctx->flexible_groups); |
1795 | INIT_LIST_HEAD(&ctx->event_list); | 2063 | INIT_LIST_HEAD(&ctx->event_list); |
1796 | atomic_set(&ctx->refcount, 1); | 2064 | atomic_set(&ctx->refcount, 1); |
1797 | ctx->task = task; | ||
1798 | } | 2065 | } |
1799 | 2066 | ||
1800 | static struct perf_event_context *find_get_context(pid_t pid, int cpu) | 2067 | static struct perf_event_context * |
2068 | alloc_perf_context(struct pmu *pmu, struct task_struct *task) | ||
1801 | { | 2069 | { |
1802 | struct perf_event_context *ctx; | 2070 | struct perf_event_context *ctx; |
1803 | struct perf_cpu_context *cpuctx; | ||
1804 | struct task_struct *task; | ||
1805 | unsigned long flags; | ||
1806 | int err; | ||
1807 | |||
1808 | if (pid == -1 && cpu != -1) { | ||
1809 | /* Must be root to operate on a CPU event: */ | ||
1810 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
1811 | return ERR_PTR(-EACCES); | ||
1812 | 2071 | ||
1813 | if (cpu < 0 || cpu >= nr_cpumask_bits) | 2072 | ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); |
1814 | return ERR_PTR(-EINVAL); | 2073 | if (!ctx) |
2074 | return NULL; | ||
1815 | 2075 | ||
1816 | /* | 2076 | __perf_event_init_context(ctx); |
1817 | * We could be clever and allow to attach a event to an | 2077 | if (task) { |
1818 | * offline CPU and activate it when the CPU comes up, but | 2078 | ctx->task = task; |
1819 | * that's for later. | 2079 | get_task_struct(task); |
1820 | */ | 2080 | } |
1821 | if (!cpu_online(cpu)) | 2081 | ctx->pmu = pmu; |
1822 | return ERR_PTR(-ENODEV); | ||
1823 | 2082 | ||
1824 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 2083 | return ctx; |
1825 | ctx = &cpuctx->ctx; | 2084 | } |
1826 | get_ctx(ctx); | ||
1827 | 2085 | ||
1828 | return ctx; | 2086 | static struct task_struct * |
1829 | } | 2087 | find_lively_task_by_vpid(pid_t vpid) |
2088 | { | ||
2089 | struct task_struct *task; | ||
2090 | int err; | ||
1830 | 2091 | ||
1831 | rcu_read_lock(); | 2092 | rcu_read_lock(); |
1832 | if (!pid) | 2093 | if (!vpid) |
1833 | task = current; | 2094 | task = current; |
1834 | else | 2095 | else |
1835 | task = find_task_by_vpid(pid); | 2096 | task = find_task_by_vpid(vpid); |
1836 | if (task) | 2097 | if (task) |
1837 | get_task_struct(task); | 2098 | get_task_struct(task); |
1838 | rcu_read_unlock(); | 2099 | rcu_read_unlock(); |
@@ -1852,36 +2113,78 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
1852 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 2113 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
1853 | goto errout; | 2114 | goto errout; |
1854 | 2115 | ||
1855 | retry: | 2116 | return task; |
1856 | ctx = perf_lock_task_context(task, &flags); | 2117 | errout: |
2118 | put_task_struct(task); | ||
2119 | return ERR_PTR(err); | ||
2120 | |||
2121 | } | ||
2122 | |||
2123 | static struct perf_event_context * | ||
2124 | find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | ||
2125 | { | ||
2126 | struct perf_event_context *ctx; | ||
2127 | struct perf_cpu_context *cpuctx; | ||
2128 | unsigned long flags; | ||
2129 | int ctxn, err; | ||
2130 | |||
2131 | if (!task && cpu != -1) { | ||
2132 | /* Must be root to operate on a CPU event: */ | ||
2133 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
2134 | return ERR_PTR(-EACCES); | ||
2135 | |||
2136 | if (cpu < 0 || cpu >= nr_cpumask_bits) | ||
2137 | return ERR_PTR(-EINVAL); | ||
2138 | |||
2139 | /* | ||
2140 | * We could be clever and allow to attach a event to an | ||
2141 | * offline CPU and activate it when the CPU comes up, but | ||
2142 | * that's for later. | ||
2143 | */ | ||
2144 | if (!cpu_online(cpu)) | ||
2145 | return ERR_PTR(-ENODEV); | ||
2146 | |||
2147 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
2148 | ctx = &cpuctx->ctx; | ||
2149 | get_ctx(ctx); | ||
2150 | |||
2151 | return ctx; | ||
2152 | } | ||
2153 | |||
2154 | err = -EINVAL; | ||
2155 | ctxn = pmu->task_ctx_nr; | ||
2156 | if (ctxn < 0) | ||
2157 | goto errout; | ||
2158 | |||
2159 | retry: | ||
2160 | ctx = perf_lock_task_context(task, ctxn, &flags); | ||
1857 | if (ctx) { | 2161 | if (ctx) { |
1858 | unclone_ctx(ctx); | 2162 | unclone_ctx(ctx); |
1859 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 2163 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
1860 | } | 2164 | } |
1861 | 2165 | ||
1862 | if (!ctx) { | 2166 | if (!ctx) { |
1863 | ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); | 2167 | ctx = alloc_perf_context(pmu, task); |
1864 | err = -ENOMEM; | 2168 | err = -ENOMEM; |
1865 | if (!ctx) | 2169 | if (!ctx) |
1866 | goto errout; | 2170 | goto errout; |
1867 | __perf_event_init_context(ctx, task); | 2171 | |
1868 | get_ctx(ctx); | 2172 | get_ctx(ctx); |
1869 | if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { | 2173 | |
2174 | if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { | ||
1870 | /* | 2175 | /* |
1871 | * We raced with some other task; use | 2176 | * We raced with some other task; use |
1872 | * the context they set. | 2177 | * the context they set. |
1873 | */ | 2178 | */ |
2179 | put_task_struct(task); | ||
1874 | kfree(ctx); | 2180 | kfree(ctx); |
1875 | goto retry; | 2181 | goto retry; |
1876 | } | 2182 | } |
1877 | get_task_struct(task); | ||
1878 | } | 2183 | } |
1879 | 2184 | ||
1880 | put_task_struct(task); | ||
1881 | return ctx; | 2185 | return ctx; |
1882 | 2186 | ||
1883 | errout: | 2187 | errout: |
1884 | put_task_struct(task); | ||
1885 | return ERR_PTR(err); | 2188 | return ERR_PTR(err); |
1886 | } | 2189 | } |
1887 | 2190 | ||
@@ -1898,21 +2201,23 @@ static void free_event_rcu(struct rcu_head *head) | |||
1898 | kfree(event); | 2201 | kfree(event); |
1899 | } | 2202 | } |
1900 | 2203 | ||
1901 | static void perf_pending_sync(struct perf_event *event); | ||
1902 | static void perf_buffer_put(struct perf_buffer *buffer); | 2204 | static void perf_buffer_put(struct perf_buffer *buffer); |
1903 | 2205 | ||
1904 | static void free_event(struct perf_event *event) | 2206 | static void free_event(struct perf_event *event) |
1905 | { | 2207 | { |
1906 | perf_pending_sync(event); | 2208 | irq_work_sync(&event->pending); |
1907 | 2209 | ||
1908 | if (!event->parent) { | 2210 | if (!event->parent) { |
1909 | atomic_dec(&nr_events); | 2211 | if (event->attach_state & PERF_ATTACH_TASK) |
2212 | jump_label_dec(&perf_task_events); | ||
1910 | if (event->attr.mmap || event->attr.mmap_data) | 2213 | if (event->attr.mmap || event->attr.mmap_data) |
1911 | atomic_dec(&nr_mmap_events); | 2214 | atomic_dec(&nr_mmap_events); |
1912 | if (event->attr.comm) | 2215 | if (event->attr.comm) |
1913 | atomic_dec(&nr_comm_events); | 2216 | atomic_dec(&nr_comm_events); |
1914 | if (event->attr.task) | 2217 | if (event->attr.task) |
1915 | atomic_dec(&nr_task_events); | 2218 | atomic_dec(&nr_task_events); |
2219 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) | ||
2220 | put_callchain_buffers(); | ||
1916 | } | 2221 | } |
1917 | 2222 | ||
1918 | if (event->buffer) { | 2223 | if (event->buffer) { |
@@ -1923,7 +2228,9 @@ static void free_event(struct perf_event *event) | |||
1923 | if (event->destroy) | 2228 | if (event->destroy) |
1924 | event->destroy(event); | 2229 | event->destroy(event); |
1925 | 2230 | ||
1926 | put_ctx(event->ctx); | 2231 | if (event->ctx) |
2232 | put_ctx(event->ctx); | ||
2233 | |||
1927 | call_rcu(&event->rcu_head, free_event_rcu); | 2234 | call_rcu(&event->rcu_head, free_event_rcu); |
1928 | } | 2235 | } |
1929 | 2236 | ||
@@ -2342,6 +2649,9 @@ int perf_event_task_disable(void) | |||
2342 | 2649 | ||
2343 | static int perf_event_index(struct perf_event *event) | 2650 | static int perf_event_index(struct perf_event *event) |
2344 | { | 2651 | { |
2652 | if (event->hw.state & PERF_HES_STOPPED) | ||
2653 | return 0; | ||
2654 | |||
2345 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 2655 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
2346 | return 0; | 2656 | return 0; |
2347 | 2657 | ||
@@ -2845,16 +3155,7 @@ void perf_event_wakeup(struct perf_event *event) | |||
2845 | } | 3155 | } |
2846 | } | 3156 | } |
2847 | 3157 | ||
2848 | /* | 3158 | static void perf_pending_event(struct irq_work *entry) |
2849 | * Pending wakeups | ||
2850 | * | ||
2851 | * Handle the case where we need to wakeup up from NMI (or rq->lock) context. | ||
2852 | * | ||
2853 | * The NMI bit means we cannot possibly take locks. Therefore, maintain a | ||
2854 | * single linked list and use cmpxchg() to add entries lockless. | ||
2855 | */ | ||
2856 | |||
2857 | static void perf_pending_event(struct perf_pending_entry *entry) | ||
2858 | { | 3159 | { |
2859 | struct perf_event *event = container_of(entry, | 3160 | struct perf_event *event = container_of(entry, |
2860 | struct perf_event, pending); | 3161 | struct perf_event, pending); |
@@ -2870,99 +3171,6 @@ static void perf_pending_event(struct perf_pending_entry *entry) | |||
2870 | } | 3171 | } |
2871 | } | 3172 | } |
2872 | 3173 | ||
2873 | #define PENDING_TAIL ((struct perf_pending_entry *)-1UL) | ||
2874 | |||
2875 | static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { | ||
2876 | PENDING_TAIL, | ||
2877 | }; | ||
2878 | |||
2879 | static void perf_pending_queue(struct perf_pending_entry *entry, | ||
2880 | void (*func)(struct perf_pending_entry *)) | ||
2881 | { | ||
2882 | struct perf_pending_entry **head; | ||
2883 | |||
2884 | if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) | ||
2885 | return; | ||
2886 | |||
2887 | entry->func = func; | ||
2888 | |||
2889 | head = &get_cpu_var(perf_pending_head); | ||
2890 | |||
2891 | do { | ||
2892 | entry->next = *head; | ||
2893 | } while (cmpxchg(head, entry->next, entry) != entry->next); | ||
2894 | |||
2895 | set_perf_event_pending(); | ||
2896 | |||
2897 | put_cpu_var(perf_pending_head); | ||
2898 | } | ||
2899 | |||
2900 | static int __perf_pending_run(void) | ||
2901 | { | ||
2902 | struct perf_pending_entry *list; | ||
2903 | int nr = 0; | ||
2904 | |||
2905 | list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); | ||
2906 | while (list != PENDING_TAIL) { | ||
2907 | void (*func)(struct perf_pending_entry *); | ||
2908 | struct perf_pending_entry *entry = list; | ||
2909 | |||
2910 | list = list->next; | ||
2911 | |||
2912 | func = entry->func; | ||
2913 | entry->next = NULL; | ||
2914 | /* | ||
2915 | * Ensure we observe the unqueue before we issue the wakeup, | ||
2916 | * so that we won't be waiting forever. | ||
2917 | * -- see perf_not_pending(). | ||
2918 | */ | ||
2919 | smp_wmb(); | ||
2920 | |||
2921 | func(entry); | ||
2922 | nr++; | ||
2923 | } | ||
2924 | |||
2925 | return nr; | ||
2926 | } | ||
2927 | |||
2928 | static inline int perf_not_pending(struct perf_event *event) | ||
2929 | { | ||
2930 | /* | ||
2931 | * If we flush on whatever cpu we run, there is a chance we don't | ||
2932 | * need to wait. | ||
2933 | */ | ||
2934 | get_cpu(); | ||
2935 | __perf_pending_run(); | ||
2936 | put_cpu(); | ||
2937 | |||
2938 | /* | ||
2939 | * Ensure we see the proper queue state before going to sleep | ||
2940 | * so that we do not miss the wakeup. -- see perf_pending_handle() | ||
2941 | */ | ||
2942 | smp_rmb(); | ||
2943 | return event->pending.next == NULL; | ||
2944 | } | ||
2945 | |||
2946 | static void perf_pending_sync(struct perf_event *event) | ||
2947 | { | ||
2948 | wait_event(event->waitq, perf_not_pending(event)); | ||
2949 | } | ||
2950 | |||
2951 | void perf_event_do_pending(void) | ||
2952 | { | ||
2953 | __perf_pending_run(); | ||
2954 | } | ||
2955 | |||
2956 | /* | ||
2957 | * Callchain support -- arch specific | ||
2958 | */ | ||
2959 | |||
2960 | __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
2961 | { | ||
2962 | return NULL; | ||
2963 | } | ||
2964 | |||
2965 | |||
2966 | /* | 3174 | /* |
2967 | * We assume there is only KVM supporting the callbacks. | 3175 | * We assume there is only KVM supporting the callbacks. |
2968 | * Later on, we might change it to a list if there is | 3176 | * Later on, we might change it to a list if there is |
@@ -3012,8 +3220,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle) | |||
3012 | 3220 | ||
3013 | if (handle->nmi) { | 3221 | if (handle->nmi) { |
3014 | handle->event->pending_wakeup = 1; | 3222 | handle->event->pending_wakeup = 1; |
3015 | perf_pending_queue(&handle->event->pending, | 3223 | irq_work_queue(&handle->event->pending); |
3016 | perf_pending_event); | ||
3017 | } else | 3224 | } else |
3018 | perf_event_wakeup(handle->event); | 3225 | perf_event_wakeup(handle->event); |
3019 | } | 3226 | } |
@@ -3069,7 +3276,7 @@ again: | |||
3069 | if (handle->wakeup != local_read(&buffer->wakeup)) | 3276 | if (handle->wakeup != local_read(&buffer->wakeup)) |
3070 | perf_output_wakeup(handle); | 3277 | perf_output_wakeup(handle); |
3071 | 3278 | ||
3072 | out: | 3279 | out: |
3073 | preempt_enable(); | 3280 | preempt_enable(); |
3074 | } | 3281 | } |
3075 | 3282 | ||
@@ -3457,14 +3664,20 @@ static void perf_event_output(struct perf_event *event, int nmi, | |||
3457 | struct perf_output_handle handle; | 3664 | struct perf_output_handle handle; |
3458 | struct perf_event_header header; | 3665 | struct perf_event_header header; |
3459 | 3666 | ||
3667 | /* protect the callchain buffers */ | ||
3668 | rcu_read_lock(); | ||
3669 | |||
3460 | perf_prepare_sample(&header, data, event, regs); | 3670 | perf_prepare_sample(&header, data, event, regs); |
3461 | 3671 | ||
3462 | if (perf_output_begin(&handle, event, header.size, nmi, 1)) | 3672 | if (perf_output_begin(&handle, event, header.size, nmi, 1)) |
3463 | return; | 3673 | goto exit; |
3464 | 3674 | ||
3465 | perf_output_sample(&handle, &header, data, event); | 3675 | perf_output_sample(&handle, &header, data, event); |
3466 | 3676 | ||
3467 | perf_output_end(&handle); | 3677 | perf_output_end(&handle); |
3678 | |||
3679 | exit: | ||
3680 | rcu_read_unlock(); | ||
3468 | } | 3681 | } |
3469 | 3682 | ||
3470 | /* | 3683 | /* |
@@ -3578,16 +3791,27 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, | |||
3578 | static void perf_event_task_event(struct perf_task_event *task_event) | 3791 | static void perf_event_task_event(struct perf_task_event *task_event) |
3579 | { | 3792 | { |
3580 | struct perf_cpu_context *cpuctx; | 3793 | struct perf_cpu_context *cpuctx; |
3581 | struct perf_event_context *ctx = task_event->task_ctx; | 3794 | struct perf_event_context *ctx; |
3795 | struct pmu *pmu; | ||
3796 | int ctxn; | ||
3582 | 3797 | ||
3583 | rcu_read_lock(); | 3798 | rcu_read_lock(); |
3584 | cpuctx = &get_cpu_var(perf_cpu_context); | 3799 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3585 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3800 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3586 | if (!ctx) | 3801 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3587 | ctx = rcu_dereference(current->perf_event_ctxp); | 3802 | |
3588 | if (ctx) | 3803 | ctx = task_event->task_ctx; |
3589 | perf_event_task_ctx(ctx, task_event); | 3804 | if (!ctx) { |
3590 | put_cpu_var(perf_cpu_context); | 3805 | ctxn = pmu->task_ctx_nr; |
3806 | if (ctxn < 0) | ||
3807 | goto next; | ||
3808 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
3809 | } | ||
3810 | if (ctx) | ||
3811 | perf_event_task_ctx(ctx, task_event); | ||
3812 | next: | ||
3813 | put_cpu_ptr(pmu->pmu_cpu_context); | ||
3814 | } | ||
3591 | rcu_read_unlock(); | 3815 | rcu_read_unlock(); |
3592 | } | 3816 | } |
3593 | 3817 | ||
@@ -3692,8 +3916,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3692 | { | 3916 | { |
3693 | struct perf_cpu_context *cpuctx; | 3917 | struct perf_cpu_context *cpuctx; |
3694 | struct perf_event_context *ctx; | 3918 | struct perf_event_context *ctx; |
3695 | unsigned int size; | ||
3696 | char comm[TASK_COMM_LEN]; | 3919 | char comm[TASK_COMM_LEN]; |
3920 | unsigned int size; | ||
3921 | struct pmu *pmu; | ||
3922 | int ctxn; | ||
3697 | 3923 | ||
3698 | memset(comm, 0, sizeof(comm)); | 3924 | memset(comm, 0, sizeof(comm)); |
3699 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); | 3925 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); |
@@ -3705,21 +3931,36 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3705 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 3931 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; |
3706 | 3932 | ||
3707 | rcu_read_lock(); | 3933 | rcu_read_lock(); |
3708 | cpuctx = &get_cpu_var(perf_cpu_context); | 3934 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3709 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3935 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3710 | ctx = rcu_dereference(current->perf_event_ctxp); | 3936 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
3711 | if (ctx) | 3937 | |
3712 | perf_event_comm_ctx(ctx, comm_event); | 3938 | ctxn = pmu->task_ctx_nr; |
3713 | put_cpu_var(perf_cpu_context); | 3939 | if (ctxn < 0) |
3940 | goto next; | ||
3941 | |||
3942 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
3943 | if (ctx) | ||
3944 | perf_event_comm_ctx(ctx, comm_event); | ||
3945 | next: | ||
3946 | put_cpu_ptr(pmu->pmu_cpu_context); | ||
3947 | } | ||
3714 | rcu_read_unlock(); | 3948 | rcu_read_unlock(); |
3715 | } | 3949 | } |
3716 | 3950 | ||
3717 | void perf_event_comm(struct task_struct *task) | 3951 | void perf_event_comm(struct task_struct *task) |
3718 | { | 3952 | { |
3719 | struct perf_comm_event comm_event; | 3953 | struct perf_comm_event comm_event; |
3954 | struct perf_event_context *ctx; | ||
3955 | int ctxn; | ||
3956 | |||
3957 | for_each_task_context_nr(ctxn) { | ||
3958 | ctx = task->perf_event_ctxp[ctxn]; | ||
3959 | if (!ctx) | ||
3960 | continue; | ||
3720 | 3961 | ||
3721 | if (task->perf_event_ctxp) | 3962 | perf_event_enable_on_exec(ctx); |
3722 | perf_event_enable_on_exec(task); | 3963 | } |
3723 | 3964 | ||
3724 | if (!atomic_read(&nr_comm_events)) | 3965 | if (!atomic_read(&nr_comm_events)) |
3725 | return; | 3966 | return; |
@@ -3821,6 +4062,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
3821 | char tmp[16]; | 4062 | char tmp[16]; |
3822 | char *buf = NULL; | 4063 | char *buf = NULL; |
3823 | const char *name; | 4064 | const char *name; |
4065 | struct pmu *pmu; | ||
4066 | int ctxn; | ||
3824 | 4067 | ||
3825 | memset(tmp, 0, sizeof(tmp)); | 4068 | memset(tmp, 0, sizeof(tmp)); |
3826 | 4069 | ||
@@ -3873,12 +4116,23 @@ got_name: | |||
3873 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; | 4116 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; |
3874 | 4117 | ||
3875 | rcu_read_lock(); | 4118 | rcu_read_lock(); |
3876 | cpuctx = &get_cpu_var(perf_cpu_context); | 4119 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3877 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); | 4120 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3878 | ctx = rcu_dereference(current->perf_event_ctxp); | 4121 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, |
3879 | if (ctx) | 4122 | vma->vm_flags & VM_EXEC); |
3880 | perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); | 4123 | |
3881 | put_cpu_var(perf_cpu_context); | 4124 | ctxn = pmu->task_ctx_nr; |
4125 | if (ctxn < 0) | ||
4126 | goto next; | ||
4127 | |||
4128 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
4129 | if (ctx) { | ||
4130 | perf_event_mmap_ctx(ctx, mmap_event, | ||
4131 | vma->vm_flags & VM_EXEC); | ||
4132 | } | ||
4133 | next: | ||
4134 | put_cpu_ptr(pmu->pmu_cpu_context); | ||
4135 | } | ||
3882 | rcu_read_unlock(); | 4136 | rcu_read_unlock(); |
3883 | 4137 | ||
3884 | kfree(buf); | 4138 | kfree(buf); |
@@ -3960,8 +4214,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3960 | struct hw_perf_event *hwc = &event->hw; | 4214 | struct hw_perf_event *hwc = &event->hw; |
3961 | int ret = 0; | 4215 | int ret = 0; |
3962 | 4216 | ||
3963 | throttle = (throttle && event->pmu->unthrottle != NULL); | ||
3964 | |||
3965 | if (!throttle) { | 4217 | if (!throttle) { |
3966 | hwc->interrupts++; | 4218 | hwc->interrupts++; |
3967 | } else { | 4219 | } else { |
@@ -4004,8 +4256,7 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
4004 | event->pending_kill = POLL_HUP; | 4256 | event->pending_kill = POLL_HUP; |
4005 | if (nmi) { | 4257 | if (nmi) { |
4006 | event->pending_disable = 1; | 4258 | event->pending_disable = 1; |
4007 | perf_pending_queue(&event->pending, | 4259 | irq_work_queue(&event->pending); |
4008 | perf_pending_event); | ||
4009 | } else | 4260 | } else |
4010 | perf_event_disable(event); | 4261 | perf_event_disable(event); |
4011 | } | 4262 | } |
@@ -4029,6 +4280,17 @@ int perf_event_overflow(struct perf_event *event, int nmi, | |||
4029 | * Generic software event infrastructure | 4280 | * Generic software event infrastructure |
4030 | */ | 4281 | */ |
4031 | 4282 | ||
4283 | struct swevent_htable { | ||
4284 | struct swevent_hlist *swevent_hlist; | ||
4285 | struct mutex hlist_mutex; | ||
4286 | int hlist_refcount; | ||
4287 | |||
4288 | /* Recursion avoidance in each contexts */ | ||
4289 | int recursion[PERF_NR_CONTEXTS]; | ||
4290 | }; | ||
4291 | |||
4292 | static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); | ||
4293 | |||
4032 | /* | 4294 | /* |
4033 | * We directly increment event->count and keep a second value in | 4295 | * We directly increment event->count and keep a second value in |
4034 | * event->hw.period_left to count intervals. This period event | 4296 | * event->hw.period_left to count intervals. This period event |
@@ -4086,7 +4348,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, | |||
4086 | } | 4348 | } |
4087 | } | 4349 | } |
4088 | 4350 | ||
4089 | static void perf_swevent_add(struct perf_event *event, u64 nr, | 4351 | static void perf_swevent_event(struct perf_event *event, u64 nr, |
4090 | int nmi, struct perf_sample_data *data, | 4352 | int nmi, struct perf_sample_data *data, |
4091 | struct pt_regs *regs) | 4353 | struct pt_regs *regs) |
4092 | { | 4354 | { |
@@ -4112,6 +4374,9 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, | |||
4112 | static int perf_exclude_event(struct perf_event *event, | 4374 | static int perf_exclude_event(struct perf_event *event, |
4113 | struct pt_regs *regs) | 4375 | struct pt_regs *regs) |
4114 | { | 4376 | { |
4377 | if (event->hw.state & PERF_HES_STOPPED) | ||
4378 | return 0; | ||
4379 | |||
4115 | if (regs) { | 4380 | if (regs) { |
4116 | if (event->attr.exclude_user && user_mode(regs)) | 4381 | if (event->attr.exclude_user && user_mode(regs)) |
4117 | return 1; | 4382 | return 1; |
@@ -4158,11 +4423,11 @@ __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id) | |||
4158 | 4423 | ||
4159 | /* For the read side: events when they trigger */ | 4424 | /* For the read side: events when they trigger */ |
4160 | static inline struct hlist_head * | 4425 | static inline struct hlist_head * |
4161 | find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) | 4426 | find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id) |
4162 | { | 4427 | { |
4163 | struct swevent_hlist *hlist; | 4428 | struct swevent_hlist *hlist; |
4164 | 4429 | ||
4165 | hlist = rcu_dereference(ctx->swevent_hlist); | 4430 | hlist = rcu_dereference(swhash->swevent_hlist); |
4166 | if (!hlist) | 4431 | if (!hlist) |
4167 | return NULL; | 4432 | return NULL; |
4168 | 4433 | ||
@@ -4171,7 +4436,7 @@ find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) | |||
4171 | 4436 | ||
4172 | /* For the event head insertion and removal in the hlist */ | 4437 | /* For the event head insertion and removal in the hlist */ |
4173 | static inline struct hlist_head * | 4438 | static inline struct hlist_head * |
4174 | find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) | 4439 | find_swevent_head(struct swevent_htable *swhash, struct perf_event *event) |
4175 | { | 4440 | { |
4176 | struct swevent_hlist *hlist; | 4441 | struct swevent_hlist *hlist; |
4177 | u32 event_id = event->attr.config; | 4442 | u32 event_id = event->attr.config; |
@@ -4182,7 +4447,7 @@ find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) | |||
4182 | * and release. Which makes the protected version suitable here. | 4447 | * and release. Which makes the protected version suitable here. |
4183 | * The context lock guarantees that. | 4448 | * The context lock guarantees that. |
4184 | */ | 4449 | */ |
4185 | hlist = rcu_dereference_protected(ctx->swevent_hlist, | 4450 | hlist = rcu_dereference_protected(swhash->swevent_hlist, |
4186 | lockdep_is_held(&event->ctx->lock)); | 4451 | lockdep_is_held(&event->ctx->lock)); |
4187 | if (!hlist) | 4452 | if (!hlist) |
4188 | return NULL; | 4453 | return NULL; |
@@ -4195,23 +4460,19 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
4195 | struct perf_sample_data *data, | 4460 | struct perf_sample_data *data, |
4196 | struct pt_regs *regs) | 4461 | struct pt_regs *regs) |
4197 | { | 4462 | { |
4198 | struct perf_cpu_context *cpuctx; | 4463 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); |
4199 | struct perf_event *event; | 4464 | struct perf_event *event; |
4200 | struct hlist_node *node; | 4465 | struct hlist_node *node; |
4201 | struct hlist_head *head; | 4466 | struct hlist_head *head; |
4202 | 4467 | ||
4203 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
4204 | |||
4205 | rcu_read_lock(); | 4468 | rcu_read_lock(); |
4206 | 4469 | head = find_swevent_head_rcu(swhash, type, event_id); | |
4207 | head = find_swevent_head_rcu(cpuctx, type, event_id); | ||
4208 | |||
4209 | if (!head) | 4470 | if (!head) |
4210 | goto end; | 4471 | goto end; |
4211 | 4472 | ||
4212 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 4473 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { |
4213 | if (perf_swevent_match(event, type, event_id, data, regs)) | 4474 | if (perf_swevent_match(event, type, event_id, data, regs)) |
4214 | perf_swevent_add(event, nr, nmi, data, regs); | 4475 | perf_swevent_event(event, nr, nmi, data, regs); |
4215 | } | 4476 | } |
4216 | end: | 4477 | end: |
4217 | rcu_read_unlock(); | 4478 | rcu_read_unlock(); |
@@ -4219,33 +4480,17 @@ end: | |||
4219 | 4480 | ||
4220 | int perf_swevent_get_recursion_context(void) | 4481 | int perf_swevent_get_recursion_context(void) |
4221 | { | 4482 | { |
4222 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 4483 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); |
4223 | int rctx; | ||
4224 | |||
4225 | if (in_nmi()) | ||
4226 | rctx = 3; | ||
4227 | else if (in_irq()) | ||
4228 | rctx = 2; | ||
4229 | else if (in_softirq()) | ||
4230 | rctx = 1; | ||
4231 | else | ||
4232 | rctx = 0; | ||
4233 | |||
4234 | if (cpuctx->recursion[rctx]) | ||
4235 | return -1; | ||
4236 | 4484 | ||
4237 | cpuctx->recursion[rctx]++; | 4485 | return get_recursion_context(swhash->recursion); |
4238 | barrier(); | ||
4239 | |||
4240 | return rctx; | ||
4241 | } | 4486 | } |
4242 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | 4487 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); |
4243 | 4488 | ||
4244 | void inline perf_swevent_put_recursion_context(int rctx) | 4489 | void inline perf_swevent_put_recursion_context(int rctx) |
4245 | { | 4490 | { |
4246 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 4491 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); |
4247 | barrier(); | 4492 | |
4248 | cpuctx->recursion[rctx]--; | 4493 | put_recursion_context(swhash->recursion, rctx); |
4249 | } | 4494 | } |
4250 | 4495 | ||
4251 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 4496 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, |
@@ -4271,20 +4516,20 @@ static void perf_swevent_read(struct perf_event *event) | |||
4271 | { | 4516 | { |
4272 | } | 4517 | } |
4273 | 4518 | ||
4274 | static int perf_swevent_enable(struct perf_event *event) | 4519 | static int perf_swevent_add(struct perf_event *event, int flags) |
4275 | { | 4520 | { |
4521 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); | ||
4276 | struct hw_perf_event *hwc = &event->hw; | 4522 | struct hw_perf_event *hwc = &event->hw; |
4277 | struct perf_cpu_context *cpuctx; | ||
4278 | struct hlist_head *head; | 4523 | struct hlist_head *head; |
4279 | 4524 | ||
4280 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
4281 | |||
4282 | if (hwc->sample_period) { | 4525 | if (hwc->sample_period) { |
4283 | hwc->last_period = hwc->sample_period; | 4526 | hwc->last_period = hwc->sample_period; |
4284 | perf_swevent_set_period(event); | 4527 | perf_swevent_set_period(event); |
4285 | } | 4528 | } |
4286 | 4529 | ||
4287 | head = find_swevent_head(cpuctx, event); | 4530 | hwc->state = !(flags & PERF_EF_START); |
4531 | |||
4532 | head = find_swevent_head(swhash, event); | ||
4288 | if (WARN_ON_ONCE(!head)) | 4533 | if (WARN_ON_ONCE(!head)) |
4289 | return -EINVAL; | 4534 | return -EINVAL; |
4290 | 4535 | ||
@@ -4293,202 +4538,27 @@ static int perf_swevent_enable(struct perf_event *event) | |||
4293 | return 0; | 4538 | return 0; |
4294 | } | 4539 | } |
4295 | 4540 | ||
4296 | static void perf_swevent_disable(struct perf_event *event) | 4541 | static void perf_swevent_del(struct perf_event *event, int flags) |
4297 | { | 4542 | { |
4298 | hlist_del_rcu(&event->hlist_entry); | 4543 | hlist_del_rcu(&event->hlist_entry); |
4299 | } | 4544 | } |
4300 | 4545 | ||
4301 | static void perf_swevent_void(struct perf_event *event) | 4546 | static void perf_swevent_start(struct perf_event *event, int flags) |
4302 | { | ||
4303 | } | ||
4304 | |||
4305 | static int perf_swevent_int(struct perf_event *event) | ||
4306 | { | ||
4307 | return 0; | ||
4308 | } | ||
4309 | |||
4310 | static const struct pmu perf_ops_generic = { | ||
4311 | .enable = perf_swevent_enable, | ||
4312 | .disable = perf_swevent_disable, | ||
4313 | .start = perf_swevent_int, | ||
4314 | .stop = perf_swevent_void, | ||
4315 | .read = perf_swevent_read, | ||
4316 | .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ | ||
4317 | }; | ||
4318 | |||
4319 | /* | ||
4320 | * hrtimer based swevent callback | ||
4321 | */ | ||
4322 | |||
4323 | static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | ||
4324 | { | 4547 | { |
4325 | enum hrtimer_restart ret = HRTIMER_RESTART; | 4548 | event->hw.state = 0; |
4326 | struct perf_sample_data data; | ||
4327 | struct pt_regs *regs; | ||
4328 | struct perf_event *event; | ||
4329 | u64 period; | ||
4330 | |||
4331 | event = container_of(hrtimer, struct perf_event, hw.hrtimer); | ||
4332 | event->pmu->read(event); | ||
4333 | |||
4334 | perf_sample_data_init(&data, 0); | ||
4335 | data.period = event->hw.last_period; | ||
4336 | regs = get_irq_regs(); | ||
4337 | |||
4338 | if (regs && !perf_exclude_event(event, regs)) { | ||
4339 | if (!(event->attr.exclude_idle && current->pid == 0)) | ||
4340 | if (perf_event_overflow(event, 0, &data, regs)) | ||
4341 | ret = HRTIMER_NORESTART; | ||
4342 | } | ||
4343 | |||
4344 | period = max_t(u64, 10000, event->hw.sample_period); | ||
4345 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
4346 | |||
4347 | return ret; | ||
4348 | } | 4549 | } |
4349 | 4550 | ||
4350 | static void perf_swevent_start_hrtimer(struct perf_event *event) | 4551 | static void perf_swevent_stop(struct perf_event *event, int flags) |
4351 | { | 4552 | { |
4352 | struct hw_perf_event *hwc = &event->hw; | 4553 | event->hw.state = PERF_HES_STOPPED; |
4353 | |||
4354 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
4355 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
4356 | if (hwc->sample_period) { | ||
4357 | u64 period; | ||
4358 | |||
4359 | if (hwc->remaining) { | ||
4360 | if (hwc->remaining < 0) | ||
4361 | period = 10000; | ||
4362 | else | ||
4363 | period = hwc->remaining; | ||
4364 | hwc->remaining = 0; | ||
4365 | } else { | ||
4366 | period = max_t(u64, 10000, hwc->sample_period); | ||
4367 | } | ||
4368 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
4369 | ns_to_ktime(period), 0, | ||
4370 | HRTIMER_MODE_REL, 0); | ||
4371 | } | ||
4372 | } | ||
4373 | |||
4374 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | ||
4375 | { | ||
4376 | struct hw_perf_event *hwc = &event->hw; | ||
4377 | |||
4378 | if (hwc->sample_period) { | ||
4379 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | ||
4380 | hwc->remaining = ktime_to_ns(remaining); | ||
4381 | |||
4382 | hrtimer_cancel(&hwc->hrtimer); | ||
4383 | } | ||
4384 | } | ||
4385 | |||
4386 | /* | ||
4387 | * Software event: cpu wall time clock | ||
4388 | */ | ||
4389 | |||
4390 | static void cpu_clock_perf_event_update(struct perf_event *event) | ||
4391 | { | ||
4392 | int cpu = raw_smp_processor_id(); | ||
4393 | s64 prev; | ||
4394 | u64 now; | ||
4395 | |||
4396 | now = cpu_clock(cpu); | ||
4397 | prev = local64_xchg(&event->hw.prev_count, now); | ||
4398 | local64_add(now - prev, &event->count); | ||
4399 | } | ||
4400 | |||
4401 | static int cpu_clock_perf_event_enable(struct perf_event *event) | ||
4402 | { | ||
4403 | struct hw_perf_event *hwc = &event->hw; | ||
4404 | int cpu = raw_smp_processor_id(); | ||
4405 | |||
4406 | local64_set(&hwc->prev_count, cpu_clock(cpu)); | ||
4407 | perf_swevent_start_hrtimer(event); | ||
4408 | |||
4409 | return 0; | ||
4410 | } | ||
4411 | |||
4412 | static void cpu_clock_perf_event_disable(struct perf_event *event) | ||
4413 | { | ||
4414 | perf_swevent_cancel_hrtimer(event); | ||
4415 | cpu_clock_perf_event_update(event); | ||
4416 | } | ||
4417 | |||
4418 | static void cpu_clock_perf_event_read(struct perf_event *event) | ||
4419 | { | ||
4420 | cpu_clock_perf_event_update(event); | ||
4421 | } | ||
4422 | |||
4423 | static const struct pmu perf_ops_cpu_clock = { | ||
4424 | .enable = cpu_clock_perf_event_enable, | ||
4425 | .disable = cpu_clock_perf_event_disable, | ||
4426 | .read = cpu_clock_perf_event_read, | ||
4427 | }; | ||
4428 | |||
4429 | /* | ||
4430 | * Software event: task time clock | ||
4431 | */ | ||
4432 | |||
4433 | static void task_clock_perf_event_update(struct perf_event *event, u64 now) | ||
4434 | { | ||
4435 | u64 prev; | ||
4436 | s64 delta; | ||
4437 | |||
4438 | prev = local64_xchg(&event->hw.prev_count, now); | ||
4439 | delta = now - prev; | ||
4440 | local64_add(delta, &event->count); | ||
4441 | } | ||
4442 | |||
4443 | static int task_clock_perf_event_enable(struct perf_event *event) | ||
4444 | { | ||
4445 | struct hw_perf_event *hwc = &event->hw; | ||
4446 | u64 now; | ||
4447 | |||
4448 | now = event->ctx->time; | ||
4449 | |||
4450 | local64_set(&hwc->prev_count, now); | ||
4451 | |||
4452 | perf_swevent_start_hrtimer(event); | ||
4453 | |||
4454 | return 0; | ||
4455 | } | ||
4456 | |||
4457 | static void task_clock_perf_event_disable(struct perf_event *event) | ||
4458 | { | ||
4459 | perf_swevent_cancel_hrtimer(event); | ||
4460 | task_clock_perf_event_update(event, event->ctx->time); | ||
4461 | |||
4462 | } | ||
4463 | |||
4464 | static void task_clock_perf_event_read(struct perf_event *event) | ||
4465 | { | ||
4466 | u64 time; | ||
4467 | |||
4468 | if (!in_nmi()) { | ||
4469 | update_context_time(event->ctx); | ||
4470 | time = event->ctx->time; | ||
4471 | } else { | ||
4472 | u64 now = perf_clock(); | ||
4473 | u64 delta = now - event->ctx->timestamp; | ||
4474 | time = event->ctx->time + delta; | ||
4475 | } | ||
4476 | |||
4477 | task_clock_perf_event_update(event, time); | ||
4478 | } | 4554 | } |
4479 | 4555 | ||
4480 | static const struct pmu perf_ops_task_clock = { | ||
4481 | .enable = task_clock_perf_event_enable, | ||
4482 | .disable = task_clock_perf_event_disable, | ||
4483 | .read = task_clock_perf_event_read, | ||
4484 | }; | ||
4485 | |||
4486 | /* Deref the hlist from the update side */ | 4556 | /* Deref the hlist from the update side */ |
4487 | static inline struct swevent_hlist * | 4557 | static inline struct swevent_hlist * |
4488 | swevent_hlist_deref(struct perf_cpu_context *cpuctx) | 4558 | swevent_hlist_deref(struct swevent_htable *swhash) |
4489 | { | 4559 | { |
4490 | return rcu_dereference_protected(cpuctx->swevent_hlist, | 4560 | return rcu_dereference_protected(swhash->swevent_hlist, |
4491 | lockdep_is_held(&cpuctx->hlist_mutex)); | 4561 | lockdep_is_held(&swhash->hlist_mutex)); |
4492 | } | 4562 | } |
4493 | 4563 | ||
4494 | static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) | 4564 | static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) |
@@ -4499,27 +4569,27 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) | |||
4499 | kfree(hlist); | 4569 | kfree(hlist); |
4500 | } | 4570 | } |
4501 | 4571 | ||
4502 | static void swevent_hlist_release(struct perf_cpu_context *cpuctx) | 4572 | static void swevent_hlist_release(struct swevent_htable *swhash) |
4503 | { | 4573 | { |
4504 | struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx); | 4574 | struct swevent_hlist *hlist = swevent_hlist_deref(swhash); |
4505 | 4575 | ||
4506 | if (!hlist) | 4576 | if (!hlist) |
4507 | return; | 4577 | return; |
4508 | 4578 | ||
4509 | rcu_assign_pointer(cpuctx->swevent_hlist, NULL); | 4579 | rcu_assign_pointer(swhash->swevent_hlist, NULL); |
4510 | call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); | 4580 | call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); |
4511 | } | 4581 | } |
4512 | 4582 | ||
4513 | static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) | 4583 | static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) |
4514 | { | 4584 | { |
4515 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 4585 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
4516 | 4586 | ||
4517 | mutex_lock(&cpuctx->hlist_mutex); | 4587 | mutex_lock(&swhash->hlist_mutex); |
4518 | 4588 | ||
4519 | if (!--cpuctx->hlist_refcount) | 4589 | if (!--swhash->hlist_refcount) |
4520 | swevent_hlist_release(cpuctx); | 4590 | swevent_hlist_release(swhash); |
4521 | 4591 | ||
4522 | mutex_unlock(&cpuctx->hlist_mutex); | 4592 | mutex_unlock(&swhash->hlist_mutex); |
4523 | } | 4593 | } |
4524 | 4594 | ||
4525 | static void swevent_hlist_put(struct perf_event *event) | 4595 | static void swevent_hlist_put(struct perf_event *event) |
@@ -4537,12 +4607,12 @@ static void swevent_hlist_put(struct perf_event *event) | |||
4537 | 4607 | ||
4538 | static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) | 4608 | static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) |
4539 | { | 4609 | { |
4540 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 4610 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
4541 | int err = 0; | 4611 | int err = 0; |
4542 | 4612 | ||
4543 | mutex_lock(&cpuctx->hlist_mutex); | 4613 | mutex_lock(&swhash->hlist_mutex); |
4544 | 4614 | ||
4545 | if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) { | 4615 | if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { |
4546 | struct swevent_hlist *hlist; | 4616 | struct swevent_hlist *hlist; |
4547 | 4617 | ||
4548 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | 4618 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); |
@@ -4550,11 +4620,11 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) | |||
4550 | err = -ENOMEM; | 4620 | err = -ENOMEM; |
4551 | goto exit; | 4621 | goto exit; |
4552 | } | 4622 | } |
4553 | rcu_assign_pointer(cpuctx->swevent_hlist, hlist); | 4623 | rcu_assign_pointer(swhash->swevent_hlist, hlist); |
4554 | } | 4624 | } |
4555 | cpuctx->hlist_refcount++; | 4625 | swhash->hlist_refcount++; |
4556 | exit: | 4626 | exit: |
4557 | mutex_unlock(&cpuctx->hlist_mutex); | 4627 | mutex_unlock(&swhash->hlist_mutex); |
4558 | 4628 | ||
4559 | return err; | 4629 | return err; |
4560 | } | 4630 | } |
@@ -4578,7 +4648,7 @@ static int swevent_hlist_get(struct perf_event *event) | |||
4578 | put_online_cpus(); | 4648 | put_online_cpus(); |
4579 | 4649 | ||
4580 | return 0; | 4650 | return 0; |
4581 | fail: | 4651 | fail: |
4582 | for_each_possible_cpu(cpu) { | 4652 | for_each_possible_cpu(cpu) { |
4583 | if (cpu == failed_cpu) | 4653 | if (cpu == failed_cpu) |
4584 | break; | 4654 | break; |
@@ -4589,17 +4659,64 @@ static int swevent_hlist_get(struct perf_event *event) | |||
4589 | return err; | 4659 | return err; |
4590 | } | 4660 | } |
4591 | 4661 | ||
4592 | #ifdef CONFIG_EVENT_TRACING | 4662 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
4663 | |||
4664 | static void sw_perf_event_destroy(struct perf_event *event) | ||
4665 | { | ||
4666 | u64 event_id = event->attr.config; | ||
4667 | |||
4668 | WARN_ON(event->parent); | ||
4669 | |||
4670 | jump_label_dec(&perf_swevent_enabled[event_id]); | ||
4671 | swevent_hlist_put(event); | ||
4672 | } | ||
4673 | |||
4674 | static int perf_swevent_init(struct perf_event *event) | ||
4675 | { | ||
4676 | int event_id = event->attr.config; | ||
4677 | |||
4678 | if (event->attr.type != PERF_TYPE_SOFTWARE) | ||
4679 | return -ENOENT; | ||
4680 | |||
4681 | switch (event_id) { | ||
4682 | case PERF_COUNT_SW_CPU_CLOCK: | ||
4683 | case PERF_COUNT_SW_TASK_CLOCK: | ||
4684 | return -ENOENT; | ||
4593 | 4685 | ||
4594 | static const struct pmu perf_ops_tracepoint = { | 4686 | default: |
4595 | .enable = perf_trace_enable, | 4687 | break; |
4596 | .disable = perf_trace_disable, | 4688 | } |
4597 | .start = perf_swevent_int, | 4689 | |
4598 | .stop = perf_swevent_void, | 4690 | if (event_id > PERF_COUNT_SW_MAX) |
4691 | return -ENOENT; | ||
4692 | |||
4693 | if (!event->parent) { | ||
4694 | int err; | ||
4695 | |||
4696 | err = swevent_hlist_get(event); | ||
4697 | if (err) | ||
4698 | return err; | ||
4699 | |||
4700 | jump_label_inc(&perf_swevent_enabled[event_id]); | ||
4701 | event->destroy = sw_perf_event_destroy; | ||
4702 | } | ||
4703 | |||
4704 | return 0; | ||
4705 | } | ||
4706 | |||
4707 | static struct pmu perf_swevent = { | ||
4708 | .task_ctx_nr = perf_sw_context, | ||
4709 | |||
4710 | .event_init = perf_swevent_init, | ||
4711 | .add = perf_swevent_add, | ||
4712 | .del = perf_swevent_del, | ||
4713 | .start = perf_swevent_start, | ||
4714 | .stop = perf_swevent_stop, | ||
4599 | .read = perf_swevent_read, | 4715 | .read = perf_swevent_read, |
4600 | .unthrottle = perf_swevent_void, | ||
4601 | }; | 4716 | }; |
4602 | 4717 | ||
4718 | #ifdef CONFIG_EVENT_TRACING | ||
4719 | |||
4603 | static int perf_tp_filter_match(struct perf_event *event, | 4720 | static int perf_tp_filter_match(struct perf_event *event, |
4604 | struct perf_sample_data *data) | 4721 | struct perf_sample_data *data) |
4605 | { | 4722 | { |
@@ -4643,7 +4760,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, | |||
4643 | 4760 | ||
4644 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 4761 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { |
4645 | if (perf_tp_event_match(event, &data, regs)) | 4762 | if (perf_tp_event_match(event, &data, regs)) |
4646 | perf_swevent_add(event, count, 1, &data, regs); | 4763 | perf_swevent_event(event, count, 1, &data, regs); |
4647 | } | 4764 | } |
4648 | 4765 | ||
4649 | perf_swevent_put_recursion_context(rctx); | 4766 | perf_swevent_put_recursion_context(rctx); |
@@ -4655,10 +4772,13 @@ static void tp_perf_event_destroy(struct perf_event *event) | |||
4655 | perf_trace_destroy(event); | 4772 | perf_trace_destroy(event); |
4656 | } | 4773 | } |
4657 | 4774 | ||
4658 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4775 | static int perf_tp_event_init(struct perf_event *event) |
4659 | { | 4776 | { |
4660 | int err; | 4777 | int err; |
4661 | 4778 | ||
4779 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | ||
4780 | return -ENOENT; | ||
4781 | |||
4662 | /* | 4782 | /* |
4663 | * Raw tracepoint data is a severe data leak, only allow root to | 4783 | * Raw tracepoint data is a severe data leak, only allow root to |
4664 | * have these. | 4784 | * have these. |
@@ -4666,15 +4786,31 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) | |||
4666 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && | 4786 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && |
4667 | perf_paranoid_tracepoint_raw() && | 4787 | perf_paranoid_tracepoint_raw() && |
4668 | !capable(CAP_SYS_ADMIN)) | 4788 | !capable(CAP_SYS_ADMIN)) |
4669 | return ERR_PTR(-EPERM); | 4789 | return -EPERM; |
4670 | 4790 | ||
4671 | err = perf_trace_init(event); | 4791 | err = perf_trace_init(event); |
4672 | if (err) | 4792 | if (err) |
4673 | return NULL; | 4793 | return err; |
4674 | 4794 | ||
4675 | event->destroy = tp_perf_event_destroy; | 4795 | event->destroy = tp_perf_event_destroy; |
4676 | 4796 | ||
4677 | return &perf_ops_tracepoint; | 4797 | return 0; |
4798 | } | ||
4799 | |||
4800 | static struct pmu perf_tracepoint = { | ||
4801 | .task_ctx_nr = perf_sw_context, | ||
4802 | |||
4803 | .event_init = perf_tp_event_init, | ||
4804 | .add = perf_trace_add, | ||
4805 | .del = perf_trace_del, | ||
4806 | .start = perf_swevent_start, | ||
4807 | .stop = perf_swevent_stop, | ||
4808 | .read = perf_swevent_read, | ||
4809 | }; | ||
4810 | |||
4811 | static inline void perf_tp_register(void) | ||
4812 | { | ||
4813 | perf_pmu_register(&perf_tracepoint); | ||
4678 | } | 4814 | } |
4679 | 4815 | ||
4680 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4816 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
@@ -4702,9 +4838,8 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4702 | 4838 | ||
4703 | #else | 4839 | #else |
4704 | 4840 | ||
4705 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4841 | static inline void perf_tp_register(void) |
4706 | { | 4842 | { |
4707 | return NULL; | ||
4708 | } | 4843 | } |
4709 | 4844 | ||
4710 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4845 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
@@ -4719,105 +4854,389 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4719 | #endif /* CONFIG_EVENT_TRACING */ | 4854 | #endif /* CONFIG_EVENT_TRACING */ |
4720 | 4855 | ||
4721 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 4856 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
4722 | static void bp_perf_event_destroy(struct perf_event *event) | 4857 | void perf_bp_event(struct perf_event *bp, void *data) |
4723 | { | 4858 | { |
4724 | release_bp_slot(event); | 4859 | struct perf_sample_data sample; |
4860 | struct pt_regs *regs = data; | ||
4861 | |||
4862 | perf_sample_data_init(&sample, bp->attr.bp_addr); | ||
4863 | |||
4864 | if (!bp->hw.state && !perf_exclude_event(bp, regs)) | ||
4865 | perf_swevent_event(bp, 1, 1, &sample, regs); | ||
4725 | } | 4866 | } |
4867 | #endif | ||
4726 | 4868 | ||
4727 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | 4869 | /* |
4870 | * hrtimer based swevent callback | ||
4871 | */ | ||
4872 | |||
4873 | static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | ||
4728 | { | 4874 | { |
4729 | int err; | 4875 | enum hrtimer_restart ret = HRTIMER_RESTART; |
4876 | struct perf_sample_data data; | ||
4877 | struct pt_regs *regs; | ||
4878 | struct perf_event *event; | ||
4879 | u64 period; | ||
4730 | 4880 | ||
4731 | err = register_perf_hw_breakpoint(bp); | 4881 | event = container_of(hrtimer, struct perf_event, hw.hrtimer); |
4732 | if (err) | 4882 | event->pmu->read(event); |
4733 | return ERR_PTR(err); | 4883 | |
4884 | perf_sample_data_init(&data, 0); | ||
4885 | data.period = event->hw.last_period; | ||
4886 | regs = get_irq_regs(); | ||
4887 | |||
4888 | if (regs && !perf_exclude_event(event, regs)) { | ||
4889 | if (!(event->attr.exclude_idle && current->pid == 0)) | ||
4890 | if (perf_event_overflow(event, 0, &data, regs)) | ||
4891 | ret = HRTIMER_NORESTART; | ||
4892 | } | ||
4893 | |||
4894 | period = max_t(u64, 10000, event->hw.sample_period); | ||
4895 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
4734 | 4896 | ||
4735 | bp->destroy = bp_perf_event_destroy; | 4897 | return ret; |
4898 | } | ||
4736 | 4899 | ||
4737 | return &perf_ops_bp; | 4900 | static void perf_swevent_start_hrtimer(struct perf_event *event) |
4901 | { | ||
4902 | struct hw_perf_event *hwc = &event->hw; | ||
4903 | |||
4904 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
4905 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
4906 | if (hwc->sample_period) { | ||
4907 | s64 period = local64_read(&hwc->period_left); | ||
4908 | |||
4909 | if (period) { | ||
4910 | if (period < 0) | ||
4911 | period = 10000; | ||
4912 | |||
4913 | local64_set(&hwc->period_left, 0); | ||
4914 | } else { | ||
4915 | period = max_t(u64, 10000, hwc->sample_period); | ||
4916 | } | ||
4917 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
4918 | ns_to_ktime(period), 0, | ||
4919 | HRTIMER_MODE_REL_PINNED, 0); | ||
4920 | } | ||
4738 | } | 4921 | } |
4739 | 4922 | ||
4740 | void perf_bp_event(struct perf_event *bp, void *data) | 4923 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) |
4741 | { | 4924 | { |
4742 | struct perf_sample_data sample; | 4925 | struct hw_perf_event *hwc = &event->hw; |
4743 | struct pt_regs *regs = data; | ||
4744 | 4926 | ||
4745 | perf_sample_data_init(&sample, bp->attr.bp_addr); | 4927 | if (hwc->sample_period) { |
4928 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | ||
4929 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); | ||
4746 | 4930 | ||
4747 | if (!perf_exclude_event(bp, regs)) | 4931 | hrtimer_cancel(&hwc->hrtimer); |
4748 | perf_swevent_add(bp, 1, 1, &sample, regs); | 4932 | } |
4749 | } | 4933 | } |
4750 | #else | 4934 | |
4751 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | 4935 | /* |
4936 | * Software event: cpu wall time clock | ||
4937 | */ | ||
4938 | |||
4939 | static void cpu_clock_event_update(struct perf_event *event) | ||
4752 | { | 4940 | { |
4753 | return NULL; | 4941 | s64 prev; |
4942 | u64 now; | ||
4943 | |||
4944 | now = local_clock(); | ||
4945 | prev = local64_xchg(&event->hw.prev_count, now); | ||
4946 | local64_add(now - prev, &event->count); | ||
4754 | } | 4947 | } |
4755 | 4948 | ||
4756 | void perf_bp_event(struct perf_event *bp, void *regs) | 4949 | static void cpu_clock_event_start(struct perf_event *event, int flags) |
4757 | { | 4950 | { |
4951 | local64_set(&event->hw.prev_count, local_clock()); | ||
4952 | perf_swevent_start_hrtimer(event); | ||
4758 | } | 4953 | } |
4759 | #endif | ||
4760 | 4954 | ||
4761 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 4955 | static void cpu_clock_event_stop(struct perf_event *event, int flags) |
4956 | { | ||
4957 | perf_swevent_cancel_hrtimer(event); | ||
4958 | cpu_clock_event_update(event); | ||
4959 | } | ||
4762 | 4960 | ||
4763 | static void sw_perf_event_destroy(struct perf_event *event) | 4961 | static int cpu_clock_event_add(struct perf_event *event, int flags) |
4764 | { | 4962 | { |
4765 | u64 event_id = event->attr.config; | 4963 | if (flags & PERF_EF_START) |
4964 | cpu_clock_event_start(event, flags); | ||
4766 | 4965 | ||
4767 | WARN_ON(event->parent); | 4966 | return 0; |
4967 | } | ||
4768 | 4968 | ||
4769 | atomic_dec(&perf_swevent_enabled[event_id]); | 4969 | static void cpu_clock_event_del(struct perf_event *event, int flags) |
4770 | swevent_hlist_put(event); | 4970 | { |
4971 | cpu_clock_event_stop(event, flags); | ||
4771 | } | 4972 | } |
4772 | 4973 | ||
4773 | static const struct pmu *sw_perf_event_init(struct perf_event *event) | 4974 | static void cpu_clock_event_read(struct perf_event *event) |
4774 | { | 4975 | { |
4775 | const struct pmu *pmu = NULL; | 4976 | cpu_clock_event_update(event); |
4776 | u64 event_id = event->attr.config; | 4977 | } |
4978 | |||
4979 | static int cpu_clock_event_init(struct perf_event *event) | ||
4980 | { | ||
4981 | if (event->attr.type != PERF_TYPE_SOFTWARE) | ||
4982 | return -ENOENT; | ||
4983 | |||
4984 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) | ||
4985 | return -ENOENT; | ||
4986 | |||
4987 | return 0; | ||
4988 | } | ||
4777 | 4989 | ||
4990 | static struct pmu perf_cpu_clock = { | ||
4991 | .task_ctx_nr = perf_sw_context, | ||
4992 | |||
4993 | .event_init = cpu_clock_event_init, | ||
4994 | .add = cpu_clock_event_add, | ||
4995 | .del = cpu_clock_event_del, | ||
4996 | .start = cpu_clock_event_start, | ||
4997 | .stop = cpu_clock_event_stop, | ||
4998 | .read = cpu_clock_event_read, | ||
4999 | }; | ||
5000 | |||
5001 | /* | ||
5002 | * Software event: task time clock | ||
5003 | */ | ||
5004 | |||
5005 | static void task_clock_event_update(struct perf_event *event, u64 now) | ||
5006 | { | ||
5007 | u64 prev; | ||
5008 | s64 delta; | ||
5009 | |||
5010 | prev = local64_xchg(&event->hw.prev_count, now); | ||
5011 | delta = now - prev; | ||
5012 | local64_add(delta, &event->count); | ||
5013 | } | ||
5014 | |||
5015 | static void task_clock_event_start(struct perf_event *event, int flags) | ||
5016 | { | ||
5017 | local64_set(&event->hw.prev_count, event->ctx->time); | ||
5018 | perf_swevent_start_hrtimer(event); | ||
5019 | } | ||
5020 | |||
5021 | static void task_clock_event_stop(struct perf_event *event, int flags) | ||
5022 | { | ||
5023 | perf_swevent_cancel_hrtimer(event); | ||
5024 | task_clock_event_update(event, event->ctx->time); | ||
5025 | } | ||
5026 | |||
5027 | static int task_clock_event_add(struct perf_event *event, int flags) | ||
5028 | { | ||
5029 | if (flags & PERF_EF_START) | ||
5030 | task_clock_event_start(event, flags); | ||
5031 | |||
5032 | return 0; | ||
5033 | } | ||
5034 | |||
5035 | static void task_clock_event_del(struct perf_event *event, int flags) | ||
5036 | { | ||
5037 | task_clock_event_stop(event, PERF_EF_UPDATE); | ||
5038 | } | ||
5039 | |||
5040 | static void task_clock_event_read(struct perf_event *event) | ||
5041 | { | ||
5042 | u64 time; | ||
5043 | |||
5044 | if (!in_nmi()) { | ||
5045 | update_context_time(event->ctx); | ||
5046 | time = event->ctx->time; | ||
5047 | } else { | ||
5048 | u64 now = perf_clock(); | ||
5049 | u64 delta = now - event->ctx->timestamp; | ||
5050 | time = event->ctx->time + delta; | ||
5051 | } | ||
5052 | |||
5053 | task_clock_event_update(event, time); | ||
5054 | } | ||
5055 | |||
5056 | static int task_clock_event_init(struct perf_event *event) | ||
5057 | { | ||
5058 | if (event->attr.type != PERF_TYPE_SOFTWARE) | ||
5059 | return -ENOENT; | ||
5060 | |||
5061 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) | ||
5062 | return -ENOENT; | ||
5063 | |||
5064 | return 0; | ||
5065 | } | ||
5066 | |||
5067 | static struct pmu perf_task_clock = { | ||
5068 | .task_ctx_nr = perf_sw_context, | ||
5069 | |||
5070 | .event_init = task_clock_event_init, | ||
5071 | .add = task_clock_event_add, | ||
5072 | .del = task_clock_event_del, | ||
5073 | .start = task_clock_event_start, | ||
5074 | .stop = task_clock_event_stop, | ||
5075 | .read = task_clock_event_read, | ||
5076 | }; | ||
5077 | |||
5078 | static void perf_pmu_nop_void(struct pmu *pmu) | ||
5079 | { | ||
5080 | } | ||
5081 | |||
5082 | static int perf_pmu_nop_int(struct pmu *pmu) | ||
5083 | { | ||
5084 | return 0; | ||
5085 | } | ||
5086 | |||
5087 | static void perf_pmu_start_txn(struct pmu *pmu) | ||
5088 | { | ||
5089 | perf_pmu_disable(pmu); | ||
5090 | } | ||
5091 | |||
5092 | static int perf_pmu_commit_txn(struct pmu *pmu) | ||
5093 | { | ||
5094 | perf_pmu_enable(pmu); | ||
5095 | return 0; | ||
5096 | } | ||
5097 | |||
5098 | static void perf_pmu_cancel_txn(struct pmu *pmu) | ||
5099 | { | ||
5100 | perf_pmu_enable(pmu); | ||
5101 | } | ||
5102 | |||
5103 | /* | ||
5104 | * Ensures all contexts with the same task_ctx_nr have the same | ||
5105 | * pmu_cpu_context too. | ||
5106 | */ | ||
5107 | static void *find_pmu_context(int ctxn) | ||
5108 | { | ||
5109 | struct pmu *pmu; | ||
5110 | |||
5111 | if (ctxn < 0) | ||
5112 | return NULL; | ||
5113 | |||
5114 | list_for_each_entry(pmu, &pmus, entry) { | ||
5115 | if (pmu->task_ctx_nr == ctxn) | ||
5116 | return pmu->pmu_cpu_context; | ||
5117 | } | ||
5118 | |||
5119 | return NULL; | ||
5120 | } | ||
5121 | |||
5122 | static void free_pmu_context(void * __percpu cpu_context) | ||
5123 | { | ||
5124 | struct pmu *pmu; | ||
5125 | |||
5126 | mutex_lock(&pmus_lock); | ||
4778 | /* | 5127 | /* |
4779 | * Software events (currently) can't in general distinguish | 5128 | * Like a real lame refcount. |
4780 | * between user, kernel and hypervisor events. | ||
4781 | * However, context switches and cpu migrations are considered | ||
4782 | * to be kernel events, and page faults are never hypervisor | ||
4783 | * events. | ||
4784 | */ | 5129 | */ |
4785 | switch (event_id) { | 5130 | list_for_each_entry(pmu, &pmus, entry) { |
4786 | case PERF_COUNT_SW_CPU_CLOCK: | 5131 | if (pmu->pmu_cpu_context == cpu_context) |
4787 | pmu = &perf_ops_cpu_clock; | 5132 | goto out; |
5133 | } | ||
4788 | 5134 | ||
4789 | break; | 5135 | free_percpu(cpu_context); |
4790 | case PERF_COUNT_SW_TASK_CLOCK: | 5136 | out: |
4791 | /* | 5137 | mutex_unlock(&pmus_lock); |
4792 | * If the user instantiates this as a per-cpu event, | 5138 | } |
4793 | * use the cpu_clock event instead. | ||
4794 | */ | ||
4795 | if (event->ctx->task) | ||
4796 | pmu = &perf_ops_task_clock; | ||
4797 | else | ||
4798 | pmu = &perf_ops_cpu_clock; | ||
4799 | 5139 | ||
4800 | break; | 5140 | int perf_pmu_register(struct pmu *pmu) |
4801 | case PERF_COUNT_SW_PAGE_FAULTS: | 5141 | { |
4802 | case PERF_COUNT_SW_PAGE_FAULTS_MIN: | 5142 | int cpu, ret; |
4803 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: | 5143 | |
4804 | case PERF_COUNT_SW_CONTEXT_SWITCHES: | 5144 | mutex_lock(&pmus_lock); |
4805 | case PERF_COUNT_SW_CPU_MIGRATIONS: | 5145 | ret = -ENOMEM; |
4806 | case PERF_COUNT_SW_ALIGNMENT_FAULTS: | 5146 | pmu->pmu_disable_count = alloc_percpu(int); |
4807 | case PERF_COUNT_SW_EMULATION_FAULTS: | 5147 | if (!pmu->pmu_disable_count) |
4808 | if (!event->parent) { | 5148 | goto unlock; |
4809 | int err; | ||
4810 | |||
4811 | err = swevent_hlist_get(event); | ||
4812 | if (err) | ||
4813 | return ERR_PTR(err); | ||
4814 | 5149 | ||
4815 | atomic_inc(&perf_swevent_enabled[event_id]); | 5150 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); |
4816 | event->destroy = sw_perf_event_destroy; | 5151 | if (pmu->pmu_cpu_context) |
5152 | goto got_cpu_context; | ||
5153 | |||
5154 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); | ||
5155 | if (!pmu->pmu_cpu_context) | ||
5156 | goto free_pdc; | ||
5157 | |||
5158 | for_each_possible_cpu(cpu) { | ||
5159 | struct perf_cpu_context *cpuctx; | ||
5160 | |||
5161 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
5162 | __perf_event_init_context(&cpuctx->ctx); | ||
5163 | cpuctx->ctx.type = cpu_context; | ||
5164 | cpuctx->ctx.pmu = pmu; | ||
5165 | cpuctx->jiffies_interval = 1; | ||
5166 | INIT_LIST_HEAD(&cpuctx->rotation_list); | ||
5167 | } | ||
5168 | |||
5169 | got_cpu_context: | ||
5170 | if (!pmu->start_txn) { | ||
5171 | if (pmu->pmu_enable) { | ||
5172 | /* | ||
5173 | * If we have pmu_enable/pmu_disable calls, install | ||
5174 | * transaction stubs that use that to try and batch | ||
5175 | * hardware accesses. | ||
5176 | */ | ||
5177 | pmu->start_txn = perf_pmu_start_txn; | ||
5178 | pmu->commit_txn = perf_pmu_commit_txn; | ||
5179 | pmu->cancel_txn = perf_pmu_cancel_txn; | ||
5180 | } else { | ||
5181 | pmu->start_txn = perf_pmu_nop_void; | ||
5182 | pmu->commit_txn = perf_pmu_nop_int; | ||
5183 | pmu->cancel_txn = perf_pmu_nop_void; | ||
5184 | } | ||
5185 | } | ||
5186 | |||
5187 | if (!pmu->pmu_enable) { | ||
5188 | pmu->pmu_enable = perf_pmu_nop_void; | ||
5189 | pmu->pmu_disable = perf_pmu_nop_void; | ||
5190 | } | ||
5191 | |||
5192 | list_add_rcu(&pmu->entry, &pmus); | ||
5193 | ret = 0; | ||
5194 | unlock: | ||
5195 | mutex_unlock(&pmus_lock); | ||
5196 | |||
5197 | return ret; | ||
5198 | |||
5199 | free_pdc: | ||
5200 | free_percpu(pmu->pmu_disable_count); | ||
5201 | goto unlock; | ||
5202 | } | ||
5203 | |||
5204 | void perf_pmu_unregister(struct pmu *pmu) | ||
5205 | { | ||
5206 | mutex_lock(&pmus_lock); | ||
5207 | list_del_rcu(&pmu->entry); | ||
5208 | mutex_unlock(&pmus_lock); | ||
5209 | |||
5210 | /* | ||
5211 | * We dereference the pmu list under both SRCU and regular RCU, so | ||
5212 | * synchronize against both of those. | ||
5213 | */ | ||
5214 | synchronize_srcu(&pmus_srcu); | ||
5215 | synchronize_rcu(); | ||
5216 | |||
5217 | free_percpu(pmu->pmu_disable_count); | ||
5218 | free_pmu_context(pmu->pmu_cpu_context); | ||
5219 | } | ||
5220 | |||
5221 | struct pmu *perf_init_event(struct perf_event *event) | ||
5222 | { | ||
5223 | struct pmu *pmu = NULL; | ||
5224 | int idx; | ||
5225 | |||
5226 | idx = srcu_read_lock(&pmus_srcu); | ||
5227 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
5228 | int ret = pmu->event_init(event); | ||
5229 | if (!ret) | ||
5230 | goto unlock; | ||
5231 | |||
5232 | if (ret != -ENOENT) { | ||
5233 | pmu = ERR_PTR(ret); | ||
5234 | goto unlock; | ||
4817 | } | 5235 | } |
4818 | pmu = &perf_ops_generic; | ||
4819 | break; | ||
4820 | } | 5236 | } |
5237 | pmu = ERR_PTR(-ENOENT); | ||
5238 | unlock: | ||
5239 | srcu_read_unlock(&pmus_srcu, idx); | ||
4821 | 5240 | ||
4822 | return pmu; | 5241 | return pmu; |
4823 | } | 5242 | } |
@@ -4826,20 +5245,18 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) | |||
4826 | * Allocate and initialize a event structure | 5245 | * Allocate and initialize a event structure |
4827 | */ | 5246 | */ |
4828 | static struct perf_event * | 5247 | static struct perf_event * |
4829 | perf_event_alloc(struct perf_event_attr *attr, | 5248 | perf_event_alloc(struct perf_event_attr *attr, int cpu, |
4830 | int cpu, | 5249 | struct task_struct *task, |
4831 | struct perf_event_context *ctx, | 5250 | struct perf_event *group_leader, |
4832 | struct perf_event *group_leader, | 5251 | struct perf_event *parent_event, |
4833 | struct perf_event *parent_event, | 5252 | perf_overflow_handler_t overflow_handler) |
4834 | perf_overflow_handler_t overflow_handler, | 5253 | { |
4835 | gfp_t gfpflags) | 5254 | struct pmu *pmu; |
4836 | { | ||
4837 | const struct pmu *pmu; | ||
4838 | struct perf_event *event; | 5255 | struct perf_event *event; |
4839 | struct hw_perf_event *hwc; | 5256 | struct hw_perf_event *hwc; |
4840 | long err; | 5257 | long err; |
4841 | 5258 | ||
4842 | event = kzalloc(sizeof(*event), gfpflags); | 5259 | event = kzalloc(sizeof(*event), GFP_KERNEL); |
4843 | if (!event) | 5260 | if (!event) |
4844 | return ERR_PTR(-ENOMEM); | 5261 | return ERR_PTR(-ENOMEM); |
4845 | 5262 | ||
@@ -4857,6 +5274,7 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4857 | INIT_LIST_HEAD(&event->event_entry); | 5274 | INIT_LIST_HEAD(&event->event_entry); |
4858 | INIT_LIST_HEAD(&event->sibling_list); | 5275 | INIT_LIST_HEAD(&event->sibling_list); |
4859 | init_waitqueue_head(&event->waitq); | 5276 | init_waitqueue_head(&event->waitq); |
5277 | init_irq_work(&event->pending, perf_pending_event); | ||
4860 | 5278 | ||
4861 | mutex_init(&event->mmap_mutex); | 5279 | mutex_init(&event->mmap_mutex); |
4862 | 5280 | ||
@@ -4864,7 +5282,6 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4864 | event->attr = *attr; | 5282 | event->attr = *attr; |
4865 | event->group_leader = group_leader; | 5283 | event->group_leader = group_leader; |
4866 | event->pmu = NULL; | 5284 | event->pmu = NULL; |
4867 | event->ctx = ctx; | ||
4868 | event->oncpu = -1; | 5285 | event->oncpu = -1; |
4869 | 5286 | ||
4870 | event->parent = parent_event; | 5287 | event->parent = parent_event; |
@@ -4874,6 +5291,17 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4874 | 5291 | ||
4875 | event->state = PERF_EVENT_STATE_INACTIVE; | 5292 | event->state = PERF_EVENT_STATE_INACTIVE; |
4876 | 5293 | ||
5294 | if (task) { | ||
5295 | event->attach_state = PERF_ATTACH_TASK; | ||
5296 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
5297 | /* | ||
5298 | * hw_breakpoint is a bit difficult here.. | ||
5299 | */ | ||
5300 | if (attr->type == PERF_TYPE_BREAKPOINT) | ||
5301 | event->hw.bp_target = task; | ||
5302 | #endif | ||
5303 | } | ||
5304 | |||
4877 | if (!overflow_handler && parent_event) | 5305 | if (!overflow_handler && parent_event) |
4878 | overflow_handler = parent_event->overflow_handler; | 5306 | overflow_handler = parent_event->overflow_handler; |
4879 | 5307 | ||
@@ -4898,29 +5326,8 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4898 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) | 5326 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) |
4899 | goto done; | 5327 | goto done; |
4900 | 5328 | ||
4901 | switch (attr->type) { | 5329 | pmu = perf_init_event(event); |
4902 | case PERF_TYPE_RAW: | ||
4903 | case PERF_TYPE_HARDWARE: | ||
4904 | case PERF_TYPE_HW_CACHE: | ||
4905 | pmu = hw_perf_event_init(event); | ||
4906 | break; | ||
4907 | |||
4908 | case PERF_TYPE_SOFTWARE: | ||
4909 | pmu = sw_perf_event_init(event); | ||
4910 | break; | ||
4911 | |||
4912 | case PERF_TYPE_TRACEPOINT: | ||
4913 | pmu = tp_perf_event_init(event); | ||
4914 | break; | ||
4915 | 5330 | ||
4916 | case PERF_TYPE_BREAKPOINT: | ||
4917 | pmu = bp_perf_event_init(event); | ||
4918 | break; | ||
4919 | |||
4920 | |||
4921 | default: | ||
4922 | break; | ||
4923 | } | ||
4924 | done: | 5331 | done: |
4925 | err = 0; | 5332 | err = 0; |
4926 | if (!pmu) | 5333 | if (!pmu) |
@@ -4938,13 +5345,21 @@ done: | |||
4938 | event->pmu = pmu; | 5345 | event->pmu = pmu; |
4939 | 5346 | ||
4940 | if (!event->parent) { | 5347 | if (!event->parent) { |
4941 | atomic_inc(&nr_events); | 5348 | if (event->attach_state & PERF_ATTACH_TASK) |
5349 | jump_label_inc(&perf_task_events); | ||
4942 | if (event->attr.mmap || event->attr.mmap_data) | 5350 | if (event->attr.mmap || event->attr.mmap_data) |
4943 | atomic_inc(&nr_mmap_events); | 5351 | atomic_inc(&nr_mmap_events); |
4944 | if (event->attr.comm) | 5352 | if (event->attr.comm) |
4945 | atomic_inc(&nr_comm_events); | 5353 | atomic_inc(&nr_comm_events); |
4946 | if (event->attr.task) | 5354 | if (event->attr.task) |
4947 | atomic_inc(&nr_task_events); | 5355 | atomic_inc(&nr_task_events); |
5356 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { | ||
5357 | err = get_callchain_buffers(); | ||
5358 | if (err) { | ||
5359 | free_event(event); | ||
5360 | return ERR_PTR(err); | ||
5361 | } | ||
5362 | } | ||
4948 | } | 5363 | } |
4949 | 5364 | ||
4950 | return event; | 5365 | return event; |
@@ -5092,12 +5507,16 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5092 | struct perf_event_attr __user *, attr_uptr, | 5507 | struct perf_event_attr __user *, attr_uptr, |
5093 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) | 5508 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) |
5094 | { | 5509 | { |
5095 | struct perf_event *event, *group_leader = NULL, *output_event = NULL; | 5510 | struct perf_event *group_leader = NULL, *output_event = NULL; |
5511 | struct perf_event *event, *sibling; | ||
5096 | struct perf_event_attr attr; | 5512 | struct perf_event_attr attr; |
5097 | struct perf_event_context *ctx; | 5513 | struct perf_event_context *ctx; |
5098 | struct file *event_file = NULL; | 5514 | struct file *event_file = NULL; |
5099 | struct file *group_file = NULL; | 5515 | struct file *group_file = NULL; |
5516 | struct task_struct *task = NULL; | ||
5517 | struct pmu *pmu; | ||
5100 | int event_fd; | 5518 | int event_fd; |
5519 | int move_group = 0; | ||
5101 | int fput_needed = 0; | 5520 | int fput_needed = 0; |
5102 | int err; | 5521 | int err; |
5103 | 5522 | ||
@@ -5123,20 +5542,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5123 | if (event_fd < 0) | 5542 | if (event_fd < 0) |
5124 | return event_fd; | 5543 | return event_fd; |
5125 | 5544 | ||
5126 | /* | ||
5127 | * Get the target context (task or percpu): | ||
5128 | */ | ||
5129 | ctx = find_get_context(pid, cpu); | ||
5130 | if (IS_ERR(ctx)) { | ||
5131 | err = PTR_ERR(ctx); | ||
5132 | goto err_fd; | ||
5133 | } | ||
5134 | |||
5135 | if (group_fd != -1) { | 5545 | if (group_fd != -1) { |
5136 | group_leader = perf_fget_light(group_fd, &fput_needed); | 5546 | group_leader = perf_fget_light(group_fd, &fput_needed); |
5137 | if (IS_ERR(group_leader)) { | 5547 | if (IS_ERR(group_leader)) { |
5138 | err = PTR_ERR(group_leader); | 5548 | err = PTR_ERR(group_leader); |
5139 | goto err_put_context; | 5549 | goto err_fd; |
5140 | } | 5550 | } |
5141 | group_file = group_leader->filp; | 5551 | group_file = group_leader->filp; |
5142 | if (flags & PERF_FLAG_FD_OUTPUT) | 5552 | if (flags & PERF_FLAG_FD_OUTPUT) |
@@ -5145,6 +5555,58 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5145 | group_leader = NULL; | 5555 | group_leader = NULL; |
5146 | } | 5556 | } |
5147 | 5557 | ||
5558 | if (pid != -1) { | ||
5559 | task = find_lively_task_by_vpid(pid); | ||
5560 | if (IS_ERR(task)) { | ||
5561 | err = PTR_ERR(task); | ||
5562 | goto err_group_fd; | ||
5563 | } | ||
5564 | } | ||
5565 | |||
5566 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL); | ||
5567 | if (IS_ERR(event)) { | ||
5568 | err = PTR_ERR(event); | ||
5569 | goto err_task; | ||
5570 | } | ||
5571 | |||
5572 | /* | ||
5573 | * Special case software events and allow them to be part of | ||
5574 | * any hardware group. | ||
5575 | */ | ||
5576 | pmu = event->pmu; | ||
5577 | |||
5578 | if (group_leader && | ||
5579 | (is_software_event(event) != is_software_event(group_leader))) { | ||
5580 | if (is_software_event(event)) { | ||
5581 | /* | ||
5582 | * If event and group_leader are not both a software | ||
5583 | * event, and event is, then group leader is not. | ||
5584 | * | ||
5585 | * Allow the addition of software events to !software | ||
5586 | * groups, this is safe because software events never | ||
5587 | * fail to schedule. | ||
5588 | */ | ||
5589 | pmu = group_leader->pmu; | ||
5590 | } else if (is_software_event(group_leader) && | ||
5591 | (group_leader->group_flags & PERF_GROUP_SOFTWARE)) { | ||
5592 | /* | ||
5593 | * In case the group is a pure software group, and we | ||
5594 | * try to add a hardware event, move the whole group to | ||
5595 | * the hardware context. | ||
5596 | */ | ||
5597 | move_group = 1; | ||
5598 | } | ||
5599 | } | ||
5600 | |||
5601 | /* | ||
5602 | * Get the target context (task or percpu): | ||
5603 | */ | ||
5604 | ctx = find_get_context(pmu, task, cpu); | ||
5605 | if (IS_ERR(ctx)) { | ||
5606 | err = PTR_ERR(ctx); | ||
5607 | goto err_alloc; | ||
5608 | } | ||
5609 | |||
5148 | /* | 5610 | /* |
5149 | * Look up the group leader (we will attach this event to it): | 5611 | * Look up the group leader (we will attach this event to it): |
5150 | */ | 5612 | */ |
@@ -5156,42 +5618,66 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5156 | * becoming part of another group-sibling): | 5618 | * becoming part of another group-sibling): |
5157 | */ | 5619 | */ |
5158 | if (group_leader->group_leader != group_leader) | 5620 | if (group_leader->group_leader != group_leader) |
5159 | goto err_put_context; | 5621 | goto err_context; |
5160 | /* | 5622 | /* |
5161 | * Do not allow to attach to a group in a different | 5623 | * Do not allow to attach to a group in a different |
5162 | * task or CPU context: | 5624 | * task or CPU context: |
5163 | */ | 5625 | */ |
5164 | if (group_leader->ctx != ctx) | 5626 | if (move_group) { |
5165 | goto err_put_context; | 5627 | if (group_leader->ctx->type != ctx->type) |
5628 | goto err_context; | ||
5629 | } else { | ||
5630 | if (group_leader->ctx != ctx) | ||
5631 | goto err_context; | ||
5632 | } | ||
5633 | |||
5166 | /* | 5634 | /* |
5167 | * Only a group leader can be exclusive or pinned | 5635 | * Only a group leader can be exclusive or pinned |
5168 | */ | 5636 | */ |
5169 | if (attr.exclusive || attr.pinned) | 5637 | if (attr.exclusive || attr.pinned) |
5170 | goto err_put_context; | 5638 | goto err_context; |
5171 | } | ||
5172 | |||
5173 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, | ||
5174 | NULL, NULL, GFP_KERNEL); | ||
5175 | if (IS_ERR(event)) { | ||
5176 | err = PTR_ERR(event); | ||
5177 | goto err_put_context; | ||
5178 | } | 5639 | } |
5179 | 5640 | ||
5180 | if (output_event) { | 5641 | if (output_event) { |
5181 | err = perf_event_set_output(event, output_event); | 5642 | err = perf_event_set_output(event, output_event); |
5182 | if (err) | 5643 | if (err) |
5183 | goto err_free_put_context; | 5644 | goto err_context; |
5184 | } | 5645 | } |
5185 | 5646 | ||
5186 | event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); | 5647 | event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); |
5187 | if (IS_ERR(event_file)) { | 5648 | if (IS_ERR(event_file)) { |
5188 | err = PTR_ERR(event_file); | 5649 | err = PTR_ERR(event_file); |
5189 | goto err_free_put_context; | 5650 | goto err_context; |
5651 | } | ||
5652 | |||
5653 | if (move_group) { | ||
5654 | struct perf_event_context *gctx = group_leader->ctx; | ||
5655 | |||
5656 | mutex_lock(&gctx->mutex); | ||
5657 | perf_event_remove_from_context(group_leader); | ||
5658 | list_for_each_entry(sibling, &group_leader->sibling_list, | ||
5659 | group_entry) { | ||
5660 | perf_event_remove_from_context(sibling); | ||
5661 | put_ctx(gctx); | ||
5662 | } | ||
5663 | mutex_unlock(&gctx->mutex); | ||
5664 | put_ctx(gctx); | ||
5190 | } | 5665 | } |
5191 | 5666 | ||
5192 | event->filp = event_file; | 5667 | event->filp = event_file; |
5193 | WARN_ON_ONCE(ctx->parent_ctx); | 5668 | WARN_ON_ONCE(ctx->parent_ctx); |
5194 | mutex_lock(&ctx->mutex); | 5669 | mutex_lock(&ctx->mutex); |
5670 | |||
5671 | if (move_group) { | ||
5672 | perf_install_in_context(ctx, group_leader, cpu); | ||
5673 | get_ctx(ctx); | ||
5674 | list_for_each_entry(sibling, &group_leader->sibling_list, | ||
5675 | group_entry) { | ||
5676 | perf_install_in_context(ctx, sibling, cpu); | ||
5677 | get_ctx(ctx); | ||
5678 | } | ||
5679 | } | ||
5680 | |||
5195 | perf_install_in_context(ctx, event, cpu); | 5681 | perf_install_in_context(ctx, event, cpu); |
5196 | ++ctx->generation; | 5682 | ++ctx->generation; |
5197 | mutex_unlock(&ctx->mutex); | 5683 | mutex_unlock(&ctx->mutex); |
@@ -5212,11 +5698,15 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5212 | fd_install(event_fd, event_file); | 5698 | fd_install(event_fd, event_file); |
5213 | return event_fd; | 5699 | return event_fd; |
5214 | 5700 | ||
5215 | err_free_put_context: | 5701 | err_context: |
5702 | put_ctx(ctx); | ||
5703 | err_alloc: | ||
5216 | free_event(event); | 5704 | free_event(event); |
5217 | err_put_context: | 5705 | err_task: |
5706 | if (task) | ||
5707 | put_task_struct(task); | ||
5708 | err_group_fd: | ||
5218 | fput_light(group_file, fput_needed); | 5709 | fput_light(group_file, fput_needed); |
5219 | put_ctx(ctx); | ||
5220 | err_fd: | 5710 | err_fd: |
5221 | put_unused_fd(event_fd); | 5711 | put_unused_fd(event_fd); |
5222 | return err; | 5712 | return err; |
@@ -5227,32 +5717,31 @@ err_fd: | |||
5227 | * | 5717 | * |
5228 | * @attr: attributes of the counter to create | 5718 | * @attr: attributes of the counter to create |
5229 | * @cpu: cpu in which the counter is bound | 5719 | * @cpu: cpu in which the counter is bound |
5230 | * @pid: task to profile | 5720 | * @task: task to profile (NULL for percpu) |
5231 | */ | 5721 | */ |
5232 | struct perf_event * | 5722 | struct perf_event * |
5233 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | 5723 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, |
5234 | pid_t pid, | 5724 | struct task_struct *task, |
5235 | perf_overflow_handler_t overflow_handler) | 5725 | perf_overflow_handler_t overflow_handler) |
5236 | { | 5726 | { |
5237 | struct perf_event *event; | ||
5238 | struct perf_event_context *ctx; | 5727 | struct perf_event_context *ctx; |
5728 | struct perf_event *event; | ||
5239 | int err; | 5729 | int err; |
5240 | 5730 | ||
5241 | /* | 5731 | /* |
5242 | * Get the target context (task or percpu): | 5732 | * Get the target context (task or percpu): |
5243 | */ | 5733 | */ |
5244 | 5734 | ||
5245 | ctx = find_get_context(pid, cpu); | 5735 | event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler); |
5246 | if (IS_ERR(ctx)) { | ||
5247 | err = PTR_ERR(ctx); | ||
5248 | goto err_exit; | ||
5249 | } | ||
5250 | |||
5251 | event = perf_event_alloc(attr, cpu, ctx, NULL, | ||
5252 | NULL, overflow_handler, GFP_KERNEL); | ||
5253 | if (IS_ERR(event)) { | 5736 | if (IS_ERR(event)) { |
5254 | err = PTR_ERR(event); | 5737 | err = PTR_ERR(event); |
5255 | goto err_put_context; | 5738 | goto err; |
5739 | } | ||
5740 | |||
5741 | ctx = find_get_context(event->pmu, task, cpu); | ||
5742 | if (IS_ERR(ctx)) { | ||
5743 | err = PTR_ERR(ctx); | ||
5744 | goto err_free; | ||
5256 | } | 5745 | } |
5257 | 5746 | ||
5258 | event->filp = NULL; | 5747 | event->filp = NULL; |
@@ -5270,112 +5759,13 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
5270 | 5759 | ||
5271 | return event; | 5760 | return event; |
5272 | 5761 | ||
5273 | err_put_context: | 5762 | err_free: |
5274 | put_ctx(ctx); | 5763 | free_event(event); |
5275 | err_exit: | 5764 | err: |
5276 | return ERR_PTR(err); | 5765 | return ERR_PTR(err); |
5277 | } | 5766 | } |
5278 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | 5767 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); |
5279 | 5768 | ||
5280 | /* | ||
5281 | * inherit a event from parent task to child task: | ||
5282 | */ | ||
5283 | static struct perf_event * | ||
5284 | inherit_event(struct perf_event *parent_event, | ||
5285 | struct task_struct *parent, | ||
5286 | struct perf_event_context *parent_ctx, | ||
5287 | struct task_struct *child, | ||
5288 | struct perf_event *group_leader, | ||
5289 | struct perf_event_context *child_ctx) | ||
5290 | { | ||
5291 | struct perf_event *child_event; | ||
5292 | |||
5293 | /* | ||
5294 | * Instead of creating recursive hierarchies of events, | ||
5295 | * we link inherited events back to the original parent, | ||
5296 | * which has a filp for sure, which we use as the reference | ||
5297 | * count: | ||
5298 | */ | ||
5299 | if (parent_event->parent) | ||
5300 | parent_event = parent_event->parent; | ||
5301 | |||
5302 | child_event = perf_event_alloc(&parent_event->attr, | ||
5303 | parent_event->cpu, child_ctx, | ||
5304 | group_leader, parent_event, | ||
5305 | NULL, GFP_KERNEL); | ||
5306 | if (IS_ERR(child_event)) | ||
5307 | return child_event; | ||
5308 | get_ctx(child_ctx); | ||
5309 | |||
5310 | /* | ||
5311 | * Make the child state follow the state of the parent event, | ||
5312 | * not its attr.disabled bit. We hold the parent's mutex, | ||
5313 | * so we won't race with perf_event_{en, dis}able_family. | ||
5314 | */ | ||
5315 | if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) | ||
5316 | child_event->state = PERF_EVENT_STATE_INACTIVE; | ||
5317 | else | ||
5318 | child_event->state = PERF_EVENT_STATE_OFF; | ||
5319 | |||
5320 | if (parent_event->attr.freq) { | ||
5321 | u64 sample_period = parent_event->hw.sample_period; | ||
5322 | struct hw_perf_event *hwc = &child_event->hw; | ||
5323 | |||
5324 | hwc->sample_period = sample_period; | ||
5325 | hwc->last_period = sample_period; | ||
5326 | |||
5327 | local64_set(&hwc->period_left, sample_period); | ||
5328 | } | ||
5329 | |||
5330 | child_event->overflow_handler = parent_event->overflow_handler; | ||
5331 | |||
5332 | /* | ||
5333 | * Link it up in the child's context: | ||
5334 | */ | ||
5335 | add_event_to_ctx(child_event, child_ctx); | ||
5336 | |||
5337 | /* | ||
5338 | * Get a reference to the parent filp - we will fput it | ||
5339 | * when the child event exits. This is safe to do because | ||
5340 | * we are in the parent and we know that the filp still | ||
5341 | * exists and has a nonzero count: | ||
5342 | */ | ||
5343 | atomic_long_inc(&parent_event->filp->f_count); | ||
5344 | |||
5345 | /* | ||
5346 | * Link this into the parent event's child list | ||
5347 | */ | ||
5348 | WARN_ON_ONCE(parent_event->ctx->parent_ctx); | ||
5349 | mutex_lock(&parent_event->child_mutex); | ||
5350 | list_add_tail(&child_event->child_list, &parent_event->child_list); | ||
5351 | mutex_unlock(&parent_event->child_mutex); | ||
5352 | |||
5353 | return child_event; | ||
5354 | } | ||
5355 | |||
5356 | static int inherit_group(struct perf_event *parent_event, | ||
5357 | struct task_struct *parent, | ||
5358 | struct perf_event_context *parent_ctx, | ||
5359 | struct task_struct *child, | ||
5360 | struct perf_event_context *child_ctx) | ||
5361 | { | ||
5362 | struct perf_event *leader; | ||
5363 | struct perf_event *sub; | ||
5364 | struct perf_event *child_ctr; | ||
5365 | |||
5366 | leader = inherit_event(parent_event, parent, parent_ctx, | ||
5367 | child, NULL, child_ctx); | ||
5368 | if (IS_ERR(leader)) | ||
5369 | return PTR_ERR(leader); | ||
5370 | list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { | ||
5371 | child_ctr = inherit_event(sub, parent, parent_ctx, | ||
5372 | child, leader, child_ctx); | ||
5373 | if (IS_ERR(child_ctr)) | ||
5374 | return PTR_ERR(child_ctr); | ||
5375 | } | ||
5376 | return 0; | ||
5377 | } | ||
5378 | |||
5379 | static void sync_child_event(struct perf_event *child_event, | 5769 | static void sync_child_event(struct perf_event *child_event, |
5380 | struct task_struct *child) | 5770 | struct task_struct *child) |
5381 | { | 5771 | { |
@@ -5432,16 +5822,13 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
5432 | } | 5822 | } |
5433 | } | 5823 | } |
5434 | 5824 | ||
5435 | /* | 5825 | static void perf_event_exit_task_context(struct task_struct *child, int ctxn) |
5436 | * When a child task exits, feed back event values to parent events. | ||
5437 | */ | ||
5438 | void perf_event_exit_task(struct task_struct *child) | ||
5439 | { | 5826 | { |
5440 | struct perf_event *child_event, *tmp; | 5827 | struct perf_event *child_event, *tmp; |
5441 | struct perf_event_context *child_ctx; | 5828 | struct perf_event_context *child_ctx; |
5442 | unsigned long flags; | 5829 | unsigned long flags; |
5443 | 5830 | ||
5444 | if (likely(!child->perf_event_ctxp)) { | 5831 | if (likely(!child->perf_event_ctxp[ctxn])) { |
5445 | perf_event_task(child, NULL, 0); | 5832 | perf_event_task(child, NULL, 0); |
5446 | return; | 5833 | return; |
5447 | } | 5834 | } |
@@ -5453,8 +5840,8 @@ void perf_event_exit_task(struct task_struct *child) | |||
5453 | * scheduled, so we are now safe from rescheduling changing | 5840 | * scheduled, so we are now safe from rescheduling changing |
5454 | * our context. | 5841 | * our context. |
5455 | */ | 5842 | */ |
5456 | child_ctx = child->perf_event_ctxp; | 5843 | child_ctx = child->perf_event_ctxp[ctxn]; |
5457 | __perf_event_task_sched_out(child_ctx); | 5844 | task_ctx_sched_out(child_ctx, EVENT_ALL); |
5458 | 5845 | ||
5459 | /* | 5846 | /* |
5460 | * Take the context lock here so that if find_get_context is | 5847 | * Take the context lock here so that if find_get_context is |
@@ -5462,7 +5849,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
5462 | * incremented the context's refcount before we do put_ctx below. | 5849 | * incremented the context's refcount before we do put_ctx below. |
5463 | */ | 5850 | */ |
5464 | raw_spin_lock(&child_ctx->lock); | 5851 | raw_spin_lock(&child_ctx->lock); |
5465 | child->perf_event_ctxp = NULL; | 5852 | child->perf_event_ctxp[ctxn] = NULL; |
5466 | /* | 5853 | /* |
5467 | * If this context is a clone; unclone it so it can't get | 5854 | * If this context is a clone; unclone it so it can't get |
5468 | * swapped to another process while we're removing all | 5855 | * swapped to another process while we're removing all |
@@ -5515,6 +5902,17 @@ again: | |||
5515 | put_ctx(child_ctx); | 5902 | put_ctx(child_ctx); |
5516 | } | 5903 | } |
5517 | 5904 | ||
5905 | /* | ||
5906 | * When a child task exits, feed back event values to parent events. | ||
5907 | */ | ||
5908 | void perf_event_exit_task(struct task_struct *child) | ||
5909 | { | ||
5910 | int ctxn; | ||
5911 | |||
5912 | for_each_task_context_nr(ctxn) | ||
5913 | perf_event_exit_task_context(child, ctxn); | ||
5914 | } | ||
5915 | |||
5518 | static void perf_free_event(struct perf_event *event, | 5916 | static void perf_free_event(struct perf_event *event, |
5519 | struct perf_event_context *ctx) | 5917 | struct perf_event_context *ctx) |
5520 | { | 5918 | { |
@@ -5536,48 +5934,166 @@ static void perf_free_event(struct perf_event *event, | |||
5536 | 5934 | ||
5537 | /* | 5935 | /* |
5538 | * free an unexposed, unused context as created by inheritance by | 5936 | * free an unexposed, unused context as created by inheritance by |
5539 | * init_task below, used by fork() in case of fail. | 5937 | * perf_event_init_task below, used by fork() in case of fail. |
5540 | */ | 5938 | */ |
5541 | void perf_event_free_task(struct task_struct *task) | 5939 | void perf_event_free_task(struct task_struct *task) |
5542 | { | 5940 | { |
5543 | struct perf_event_context *ctx = task->perf_event_ctxp; | 5941 | struct perf_event_context *ctx; |
5544 | struct perf_event *event, *tmp; | 5942 | struct perf_event *event, *tmp; |
5943 | int ctxn; | ||
5545 | 5944 | ||
5546 | if (!ctx) | 5945 | for_each_task_context_nr(ctxn) { |
5547 | return; | 5946 | ctx = task->perf_event_ctxp[ctxn]; |
5947 | if (!ctx) | ||
5948 | continue; | ||
5548 | 5949 | ||
5549 | mutex_lock(&ctx->mutex); | 5950 | mutex_lock(&ctx->mutex); |
5550 | again: | 5951 | again: |
5551 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 5952 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, |
5552 | perf_free_event(event, ctx); | 5953 | group_entry) |
5954 | perf_free_event(event, ctx); | ||
5553 | 5955 | ||
5554 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, | 5956 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, |
5555 | group_entry) | 5957 | group_entry) |
5556 | perf_free_event(event, ctx); | 5958 | perf_free_event(event, ctx); |
5557 | 5959 | ||
5558 | if (!list_empty(&ctx->pinned_groups) || | 5960 | if (!list_empty(&ctx->pinned_groups) || |
5559 | !list_empty(&ctx->flexible_groups)) | 5961 | !list_empty(&ctx->flexible_groups)) |
5560 | goto again; | 5962 | goto again; |
5561 | 5963 | ||
5562 | mutex_unlock(&ctx->mutex); | 5964 | mutex_unlock(&ctx->mutex); |
5563 | 5965 | ||
5564 | put_ctx(ctx); | 5966 | put_ctx(ctx); |
5967 | } | ||
5968 | } | ||
5969 | |||
5970 | void perf_event_delayed_put(struct task_struct *task) | ||
5971 | { | ||
5972 | int ctxn; | ||
5973 | |||
5974 | for_each_task_context_nr(ctxn) | ||
5975 | WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); | ||
5976 | } | ||
5977 | |||
5978 | /* | ||
5979 | * inherit a event from parent task to child task: | ||
5980 | */ | ||
5981 | static struct perf_event * | ||
5982 | inherit_event(struct perf_event *parent_event, | ||
5983 | struct task_struct *parent, | ||
5984 | struct perf_event_context *parent_ctx, | ||
5985 | struct task_struct *child, | ||
5986 | struct perf_event *group_leader, | ||
5987 | struct perf_event_context *child_ctx) | ||
5988 | { | ||
5989 | struct perf_event *child_event; | ||
5990 | unsigned long flags; | ||
5991 | |||
5992 | /* | ||
5993 | * Instead of creating recursive hierarchies of events, | ||
5994 | * we link inherited events back to the original parent, | ||
5995 | * which has a filp for sure, which we use as the reference | ||
5996 | * count: | ||
5997 | */ | ||
5998 | if (parent_event->parent) | ||
5999 | parent_event = parent_event->parent; | ||
6000 | |||
6001 | child_event = perf_event_alloc(&parent_event->attr, | ||
6002 | parent_event->cpu, | ||
6003 | child, | ||
6004 | group_leader, parent_event, | ||
6005 | NULL); | ||
6006 | if (IS_ERR(child_event)) | ||
6007 | return child_event; | ||
6008 | get_ctx(child_ctx); | ||
6009 | |||
6010 | /* | ||
6011 | * Make the child state follow the state of the parent event, | ||
6012 | * not its attr.disabled bit. We hold the parent's mutex, | ||
6013 | * so we won't race with perf_event_{en, dis}able_family. | ||
6014 | */ | ||
6015 | if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) | ||
6016 | child_event->state = PERF_EVENT_STATE_INACTIVE; | ||
6017 | else | ||
6018 | child_event->state = PERF_EVENT_STATE_OFF; | ||
6019 | |||
6020 | if (parent_event->attr.freq) { | ||
6021 | u64 sample_period = parent_event->hw.sample_period; | ||
6022 | struct hw_perf_event *hwc = &child_event->hw; | ||
6023 | |||
6024 | hwc->sample_period = sample_period; | ||
6025 | hwc->last_period = sample_period; | ||
6026 | |||
6027 | local64_set(&hwc->period_left, sample_period); | ||
6028 | } | ||
6029 | |||
6030 | child_event->ctx = child_ctx; | ||
6031 | child_event->overflow_handler = parent_event->overflow_handler; | ||
6032 | |||
6033 | /* | ||
6034 | * Link it up in the child's context: | ||
6035 | */ | ||
6036 | raw_spin_lock_irqsave(&child_ctx->lock, flags); | ||
6037 | add_event_to_ctx(child_event, child_ctx); | ||
6038 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); | ||
6039 | |||
6040 | /* | ||
6041 | * Get a reference to the parent filp - we will fput it | ||
6042 | * when the child event exits. This is safe to do because | ||
6043 | * we are in the parent and we know that the filp still | ||
6044 | * exists and has a nonzero count: | ||
6045 | */ | ||
6046 | atomic_long_inc(&parent_event->filp->f_count); | ||
6047 | |||
6048 | /* | ||
6049 | * Link this into the parent event's child list | ||
6050 | */ | ||
6051 | WARN_ON_ONCE(parent_event->ctx->parent_ctx); | ||
6052 | mutex_lock(&parent_event->child_mutex); | ||
6053 | list_add_tail(&child_event->child_list, &parent_event->child_list); | ||
6054 | mutex_unlock(&parent_event->child_mutex); | ||
6055 | |||
6056 | return child_event; | ||
6057 | } | ||
6058 | |||
6059 | static int inherit_group(struct perf_event *parent_event, | ||
6060 | struct task_struct *parent, | ||
6061 | struct perf_event_context *parent_ctx, | ||
6062 | struct task_struct *child, | ||
6063 | struct perf_event_context *child_ctx) | ||
6064 | { | ||
6065 | struct perf_event *leader; | ||
6066 | struct perf_event *sub; | ||
6067 | struct perf_event *child_ctr; | ||
6068 | |||
6069 | leader = inherit_event(parent_event, parent, parent_ctx, | ||
6070 | child, NULL, child_ctx); | ||
6071 | if (IS_ERR(leader)) | ||
6072 | return PTR_ERR(leader); | ||
6073 | list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { | ||
6074 | child_ctr = inherit_event(sub, parent, parent_ctx, | ||
6075 | child, leader, child_ctx); | ||
6076 | if (IS_ERR(child_ctr)) | ||
6077 | return PTR_ERR(child_ctr); | ||
6078 | } | ||
6079 | return 0; | ||
5565 | } | 6080 | } |
5566 | 6081 | ||
5567 | static int | 6082 | static int |
5568 | inherit_task_group(struct perf_event *event, struct task_struct *parent, | 6083 | inherit_task_group(struct perf_event *event, struct task_struct *parent, |
5569 | struct perf_event_context *parent_ctx, | 6084 | struct perf_event_context *parent_ctx, |
5570 | struct task_struct *child, | 6085 | struct task_struct *child, int ctxn, |
5571 | int *inherited_all) | 6086 | int *inherited_all) |
5572 | { | 6087 | { |
5573 | int ret; | 6088 | int ret; |
5574 | struct perf_event_context *child_ctx = child->perf_event_ctxp; | 6089 | struct perf_event_context *child_ctx; |
5575 | 6090 | ||
5576 | if (!event->attr.inherit) { | 6091 | if (!event->attr.inherit) { |
5577 | *inherited_all = 0; | 6092 | *inherited_all = 0; |
5578 | return 0; | 6093 | return 0; |
5579 | } | 6094 | } |
5580 | 6095 | ||
6096 | child_ctx = child->perf_event_ctxp[ctxn]; | ||
5581 | if (!child_ctx) { | 6097 | if (!child_ctx) { |
5582 | /* | 6098 | /* |
5583 | * This is executed from the parent task context, so | 6099 | * This is executed from the parent task context, so |
@@ -5586,14 +6102,11 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
5586 | * child. | 6102 | * child. |
5587 | */ | 6103 | */ |
5588 | 6104 | ||
5589 | child_ctx = kzalloc(sizeof(struct perf_event_context), | 6105 | child_ctx = alloc_perf_context(event->pmu, child); |
5590 | GFP_KERNEL); | ||
5591 | if (!child_ctx) | 6106 | if (!child_ctx) |
5592 | return -ENOMEM; | 6107 | return -ENOMEM; |
5593 | 6108 | ||
5594 | __perf_event_init_context(child_ctx, child); | 6109 | child->perf_event_ctxp[ctxn] = child_ctx; |
5595 | child->perf_event_ctxp = child_ctx; | ||
5596 | get_task_struct(child); | ||
5597 | } | 6110 | } |
5598 | 6111 | ||
5599 | ret = inherit_group(event, parent, parent_ctx, | 6112 | ret = inherit_group(event, parent, parent_ctx, |
@@ -5605,11 +6118,10 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
5605 | return ret; | 6118 | return ret; |
5606 | } | 6119 | } |
5607 | 6120 | ||
5608 | |||
5609 | /* | 6121 | /* |
5610 | * Initialize the perf_event context in task_struct | 6122 | * Initialize the perf_event context in task_struct |
5611 | */ | 6123 | */ |
5612 | int perf_event_init_task(struct task_struct *child) | 6124 | int perf_event_init_context(struct task_struct *child, int ctxn) |
5613 | { | 6125 | { |
5614 | struct perf_event_context *child_ctx, *parent_ctx; | 6126 | struct perf_event_context *child_ctx, *parent_ctx; |
5615 | struct perf_event_context *cloned_ctx; | 6127 | struct perf_event_context *cloned_ctx; |
@@ -5618,19 +6130,19 @@ int perf_event_init_task(struct task_struct *child) | |||
5618 | int inherited_all = 1; | 6130 | int inherited_all = 1; |
5619 | int ret = 0; | 6131 | int ret = 0; |
5620 | 6132 | ||
5621 | child->perf_event_ctxp = NULL; | 6133 | child->perf_event_ctxp[ctxn] = NULL; |
5622 | 6134 | ||
5623 | mutex_init(&child->perf_event_mutex); | 6135 | mutex_init(&child->perf_event_mutex); |
5624 | INIT_LIST_HEAD(&child->perf_event_list); | 6136 | INIT_LIST_HEAD(&child->perf_event_list); |
5625 | 6137 | ||
5626 | if (likely(!parent->perf_event_ctxp)) | 6138 | if (likely(!parent->perf_event_ctxp[ctxn])) |
5627 | return 0; | 6139 | return 0; |
5628 | 6140 | ||
5629 | /* | 6141 | /* |
5630 | * If the parent's context is a clone, pin it so it won't get | 6142 | * If the parent's context is a clone, pin it so it won't get |
5631 | * swapped under us. | 6143 | * swapped under us. |
5632 | */ | 6144 | */ |
5633 | parent_ctx = perf_pin_task_context(parent); | 6145 | parent_ctx = perf_pin_task_context(parent, ctxn); |
5634 | 6146 | ||
5635 | /* | 6147 | /* |
5636 | * No need to check if parent_ctx != NULL here; since we saw | 6148 | * No need to check if parent_ctx != NULL here; since we saw |
@@ -5650,20 +6162,20 @@ int perf_event_init_task(struct task_struct *child) | |||
5650 | * the list, not manipulating it: | 6162 | * the list, not manipulating it: |
5651 | */ | 6163 | */ |
5652 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { | 6164 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { |
5653 | ret = inherit_task_group(event, parent, parent_ctx, child, | 6165 | ret = inherit_task_group(event, parent, parent_ctx, |
5654 | &inherited_all); | 6166 | child, ctxn, &inherited_all); |
5655 | if (ret) | 6167 | if (ret) |
5656 | break; | 6168 | break; |
5657 | } | 6169 | } |
5658 | 6170 | ||
5659 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 6171 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
5660 | ret = inherit_task_group(event, parent, parent_ctx, child, | 6172 | ret = inherit_task_group(event, parent, parent_ctx, |
5661 | &inherited_all); | 6173 | child, ctxn, &inherited_all); |
5662 | if (ret) | 6174 | if (ret) |
5663 | break; | 6175 | break; |
5664 | } | 6176 | } |
5665 | 6177 | ||
5666 | child_ctx = child->perf_event_ctxp; | 6178 | child_ctx = child->perf_event_ctxp[ctxn]; |
5667 | 6179 | ||
5668 | if (child_ctx && inherited_all) { | 6180 | if (child_ctx && inherited_all) { |
5669 | /* | 6181 | /* |
@@ -5692,63 +6204,98 @@ int perf_event_init_task(struct task_struct *child) | |||
5692 | return ret; | 6204 | return ret; |
5693 | } | 6205 | } |
5694 | 6206 | ||
6207 | /* | ||
6208 | * Initialize the perf_event context in task_struct | ||
6209 | */ | ||
6210 | int perf_event_init_task(struct task_struct *child) | ||
6211 | { | ||
6212 | int ctxn, ret; | ||
6213 | |||
6214 | for_each_task_context_nr(ctxn) { | ||
6215 | ret = perf_event_init_context(child, ctxn); | ||
6216 | if (ret) | ||
6217 | return ret; | ||
6218 | } | ||
6219 | |||
6220 | return 0; | ||
6221 | } | ||
6222 | |||
5695 | static void __init perf_event_init_all_cpus(void) | 6223 | static void __init perf_event_init_all_cpus(void) |
5696 | { | 6224 | { |
6225 | struct swevent_htable *swhash; | ||
5697 | int cpu; | 6226 | int cpu; |
5698 | struct perf_cpu_context *cpuctx; | ||
5699 | 6227 | ||
5700 | for_each_possible_cpu(cpu) { | 6228 | for_each_possible_cpu(cpu) { |
5701 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 6229 | swhash = &per_cpu(swevent_htable, cpu); |
5702 | mutex_init(&cpuctx->hlist_mutex); | 6230 | mutex_init(&swhash->hlist_mutex); |
5703 | __perf_event_init_context(&cpuctx->ctx, NULL); | 6231 | INIT_LIST_HEAD(&per_cpu(rotation_list, cpu)); |
5704 | } | 6232 | } |
5705 | } | 6233 | } |
5706 | 6234 | ||
5707 | static void __cpuinit perf_event_init_cpu(int cpu) | 6235 | static void __cpuinit perf_event_init_cpu(int cpu) |
5708 | { | 6236 | { |
5709 | struct perf_cpu_context *cpuctx; | 6237 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
5710 | |||
5711 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
5712 | 6238 | ||
5713 | spin_lock(&perf_resource_lock); | 6239 | mutex_lock(&swhash->hlist_mutex); |
5714 | cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; | 6240 | if (swhash->hlist_refcount > 0) { |
5715 | spin_unlock(&perf_resource_lock); | ||
5716 | |||
5717 | mutex_lock(&cpuctx->hlist_mutex); | ||
5718 | if (cpuctx->hlist_refcount > 0) { | ||
5719 | struct swevent_hlist *hlist; | 6241 | struct swevent_hlist *hlist; |
5720 | 6242 | ||
5721 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | 6243 | hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu)); |
5722 | WARN_ON_ONCE(!hlist); | 6244 | WARN_ON(!hlist); |
5723 | rcu_assign_pointer(cpuctx->swevent_hlist, hlist); | 6245 | rcu_assign_pointer(swhash->swevent_hlist, hlist); |
5724 | } | 6246 | } |
5725 | mutex_unlock(&cpuctx->hlist_mutex); | 6247 | mutex_unlock(&swhash->hlist_mutex); |
5726 | } | 6248 | } |
5727 | 6249 | ||
5728 | #ifdef CONFIG_HOTPLUG_CPU | 6250 | #ifdef CONFIG_HOTPLUG_CPU |
5729 | static void __perf_event_exit_cpu(void *info) | 6251 | static void perf_pmu_rotate_stop(struct pmu *pmu) |
5730 | { | 6252 | { |
5731 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 6253 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
5732 | struct perf_event_context *ctx = &cpuctx->ctx; | 6254 | |
6255 | WARN_ON(!irqs_disabled()); | ||
6256 | |||
6257 | list_del_init(&cpuctx->rotation_list); | ||
6258 | } | ||
6259 | |||
6260 | static void __perf_event_exit_context(void *__info) | ||
6261 | { | ||
6262 | struct perf_event_context *ctx = __info; | ||
5733 | struct perf_event *event, *tmp; | 6263 | struct perf_event *event, *tmp; |
5734 | 6264 | ||
6265 | perf_pmu_rotate_stop(ctx->pmu); | ||
6266 | |||
5735 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 6267 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5736 | __perf_event_remove_from_context(event); | 6268 | __perf_event_remove_from_context(event); |
5737 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | 6269 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) |
5738 | __perf_event_remove_from_context(event); | 6270 | __perf_event_remove_from_context(event); |
5739 | } | 6271 | } |
6272 | |||
6273 | static void perf_event_exit_cpu_context(int cpu) | ||
6274 | { | ||
6275 | struct perf_event_context *ctx; | ||
6276 | struct pmu *pmu; | ||
6277 | int idx; | ||
6278 | |||
6279 | idx = srcu_read_lock(&pmus_srcu); | ||
6280 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
6281 | ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx; | ||
6282 | |||
6283 | mutex_lock(&ctx->mutex); | ||
6284 | smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1); | ||
6285 | mutex_unlock(&ctx->mutex); | ||
6286 | } | ||
6287 | srcu_read_unlock(&pmus_srcu, idx); | ||
6288 | } | ||
6289 | |||
5740 | static void perf_event_exit_cpu(int cpu) | 6290 | static void perf_event_exit_cpu(int cpu) |
5741 | { | 6291 | { |
5742 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 6292 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
5743 | struct perf_event_context *ctx = &cpuctx->ctx; | ||
5744 | 6293 | ||
5745 | mutex_lock(&cpuctx->hlist_mutex); | 6294 | mutex_lock(&swhash->hlist_mutex); |
5746 | swevent_hlist_release(cpuctx); | 6295 | swevent_hlist_release(swhash); |
5747 | mutex_unlock(&cpuctx->hlist_mutex); | 6296 | mutex_unlock(&swhash->hlist_mutex); |
5748 | 6297 | ||
5749 | mutex_lock(&ctx->mutex); | 6298 | perf_event_exit_cpu_context(cpu); |
5750 | smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); | ||
5751 | mutex_unlock(&ctx->mutex); | ||
5752 | } | 6299 | } |
5753 | #else | 6300 | #else |
5754 | static inline void perf_event_exit_cpu(int cpu) { } | 6301 | static inline void perf_event_exit_cpu(int cpu) { } |
@@ -5778,118 +6325,13 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
5778 | return NOTIFY_OK; | 6325 | return NOTIFY_OK; |
5779 | } | 6326 | } |
5780 | 6327 | ||
5781 | /* | ||
5782 | * This has to have a higher priority than migration_notifier in sched.c. | ||
5783 | */ | ||
5784 | static struct notifier_block __cpuinitdata perf_cpu_nb = { | ||
5785 | .notifier_call = perf_cpu_notify, | ||
5786 | .priority = 20, | ||
5787 | }; | ||
5788 | |||
5789 | void __init perf_event_init(void) | 6328 | void __init perf_event_init(void) |
5790 | { | 6329 | { |
5791 | perf_event_init_all_cpus(); | 6330 | perf_event_init_all_cpus(); |
5792 | perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, | 6331 | init_srcu_struct(&pmus_srcu); |
5793 | (void *)(long)smp_processor_id()); | 6332 | perf_pmu_register(&perf_swevent); |
5794 | perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, | 6333 | perf_pmu_register(&perf_cpu_clock); |
5795 | (void *)(long)smp_processor_id()); | 6334 | perf_pmu_register(&perf_task_clock); |
5796 | register_cpu_notifier(&perf_cpu_nb); | 6335 | perf_tp_register(); |
5797 | } | 6336 | perf_cpu_notifier(perf_cpu_notify); |
5798 | |||
5799 | static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, | ||
5800 | struct sysdev_class_attribute *attr, | ||
5801 | char *buf) | ||
5802 | { | ||
5803 | return sprintf(buf, "%d\n", perf_reserved_percpu); | ||
5804 | } | ||
5805 | |||
5806 | static ssize_t | ||
5807 | perf_set_reserve_percpu(struct sysdev_class *class, | ||
5808 | struct sysdev_class_attribute *attr, | ||
5809 | const char *buf, | ||
5810 | size_t count) | ||
5811 | { | ||
5812 | struct perf_cpu_context *cpuctx; | ||
5813 | unsigned long val; | ||
5814 | int err, cpu, mpt; | ||
5815 | |||
5816 | err = strict_strtoul(buf, 10, &val); | ||
5817 | if (err) | ||
5818 | return err; | ||
5819 | if (val > perf_max_events) | ||
5820 | return -EINVAL; | ||
5821 | |||
5822 | spin_lock(&perf_resource_lock); | ||
5823 | perf_reserved_percpu = val; | ||
5824 | for_each_online_cpu(cpu) { | ||
5825 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
5826 | raw_spin_lock_irq(&cpuctx->ctx.lock); | ||
5827 | mpt = min(perf_max_events - cpuctx->ctx.nr_events, | ||
5828 | perf_max_events - perf_reserved_percpu); | ||
5829 | cpuctx->max_pertask = mpt; | ||
5830 | raw_spin_unlock_irq(&cpuctx->ctx.lock); | ||
5831 | } | ||
5832 | spin_unlock(&perf_resource_lock); | ||
5833 | |||
5834 | return count; | ||
5835 | } | ||
5836 | |||
5837 | static ssize_t perf_show_overcommit(struct sysdev_class *class, | ||
5838 | struct sysdev_class_attribute *attr, | ||
5839 | char *buf) | ||
5840 | { | ||
5841 | return sprintf(buf, "%d\n", perf_overcommit); | ||
5842 | } | ||
5843 | |||
5844 | static ssize_t | ||
5845 | perf_set_overcommit(struct sysdev_class *class, | ||
5846 | struct sysdev_class_attribute *attr, | ||
5847 | const char *buf, size_t count) | ||
5848 | { | ||
5849 | unsigned long val; | ||
5850 | int err; | ||
5851 | |||
5852 | err = strict_strtoul(buf, 10, &val); | ||
5853 | if (err) | ||
5854 | return err; | ||
5855 | if (val > 1) | ||
5856 | return -EINVAL; | ||
5857 | |||
5858 | spin_lock(&perf_resource_lock); | ||
5859 | perf_overcommit = val; | ||
5860 | spin_unlock(&perf_resource_lock); | ||
5861 | |||
5862 | return count; | ||
5863 | } | ||
5864 | |||
5865 | static SYSDEV_CLASS_ATTR( | ||
5866 | reserve_percpu, | ||
5867 | 0644, | ||
5868 | perf_show_reserve_percpu, | ||
5869 | perf_set_reserve_percpu | ||
5870 | ); | ||
5871 | |||
5872 | static SYSDEV_CLASS_ATTR( | ||
5873 | overcommit, | ||
5874 | 0644, | ||
5875 | perf_show_overcommit, | ||
5876 | perf_set_overcommit | ||
5877 | ); | ||
5878 | |||
5879 | static struct attribute *perfclass_attrs[] = { | ||
5880 | &attr_reserve_percpu.attr, | ||
5881 | &attr_overcommit.attr, | ||
5882 | NULL | ||
5883 | }; | ||
5884 | |||
5885 | static struct attribute_group perfclass_attr_group = { | ||
5886 | .attrs = perfclass_attrs, | ||
5887 | .name = "perf_events", | ||
5888 | }; | ||
5889 | |||
5890 | static int __init perf_event_sysfs_init(void) | ||
5891 | { | ||
5892 | return sysfs_create_group(&cpu_sysdev_class.kset.kobj, | ||
5893 | &perfclass_attr_group); | ||
5894 | } | 6337 | } |
5895 | device_initcall(perf_event_sysfs_init); | ||
diff --git a/kernel/pid.c b/kernel/pid.c index d55c6fb8d087..39b65b69584f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -401,7 +401,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) | |||
401 | struct task_struct *result = NULL; | 401 | struct task_struct *result = NULL; |
402 | if (pid) { | 402 | if (pid) { |
403 | struct hlist_node *first; | 403 | struct hlist_node *first; |
404 | first = rcu_dereference_check(pid->tasks[type].first, | 404 | first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), |
405 | rcu_read_lock_held() || | 405 | rcu_read_lock_held() || |
406 | lockdep_tasklist_lock_is_held()); | 406 | lockdep_tasklist_lock_is_held()); |
407 | if (first) | 407 | if (first) |
@@ -416,6 +416,7 @@ EXPORT_SYMBOL(pid_task); | |||
416 | */ | 416 | */ |
417 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) | 417 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) |
418 | { | 418 | { |
419 | rcu_lockdep_assert(rcu_read_lock_held()); | ||
419 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); | 420 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); |
420 | } | 421 | } |
421 | 422 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 8fe465ac008a..2531017795f6 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(oops_in_progress); | |||
85 | * provides serialisation for access to the entire console | 85 | * provides serialisation for access to the entire console |
86 | * driver system. | 86 | * driver system. |
87 | */ | 87 | */ |
88 | static DECLARE_MUTEX(console_sem); | 88 | static DEFINE_SEMAPHORE(console_sem); |
89 | struct console *console_drivers; | 89 | struct console *console_drivers; |
90 | EXPORT_SYMBOL_GPL(console_drivers); | 90 | EXPORT_SYMBOL_GPL(console_drivers); |
91 | 91 | ||
@@ -556,7 +556,7 @@ static void zap_locks(void) | |||
556 | /* If a crash is occurring, make sure we can't deadlock */ | 556 | /* If a crash is occurring, make sure we can't deadlock */ |
557 | spin_lock_init(&logbuf_lock); | 557 | spin_lock_init(&logbuf_lock); |
558 | /* And make sure that we print immediately */ | 558 | /* And make sure that we print immediately */ |
559 | init_MUTEX(&console_sem); | 559 | sema_init(&console_sem, 1); |
560 | } | 560 | } |
561 | 561 | ||
562 | #if defined(CONFIG_PRINTK_TIME) | 562 | #if defined(CONFIG_PRINTK_TIME) |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 4d169835fb36..a23a57a976d1 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -73,12 +73,14 @@ int debug_lockdep_rcu_enabled(void) | |||
73 | EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); | 73 | EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); |
74 | 74 | ||
75 | /** | 75 | /** |
76 | * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? | 76 | * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? |
77 | * | 77 | * |
78 | * Check for bottom half being disabled, which covers both the | 78 | * Check for bottom half being disabled, which covers both the |
79 | * CONFIG_PROVE_RCU and not cases. Note that if someone uses | 79 | * CONFIG_PROVE_RCU and not cases. Note that if someone uses |
80 | * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) | 80 | * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) |
81 | * will show the situation. | 81 | * will show the situation. This is useful for debug checks in functions |
82 | * that require that they be called within an RCU read-side critical | ||
83 | * section. | ||
82 | * | 84 | * |
83 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. | 85 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. |
84 | */ | 86 | */ |
@@ -86,7 +88,7 @@ int rcu_read_lock_bh_held(void) | |||
86 | { | 88 | { |
87 | if (!debug_lockdep_rcu_enabled()) | 89 | if (!debug_lockdep_rcu_enabled()) |
88 | return 1; | 90 | return 1; |
89 | return in_softirq(); | 91 | return in_softirq() || irqs_disabled(); |
90 | } | 92 | } |
91 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | 93 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); |
92 | 94 | ||
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 196ec02f8be0..d806735342ac 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly; | |||
59 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | 59 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); |
60 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 60 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
61 | 61 | ||
62 | /* Forward declarations for rcutiny_plugin.h. */ | ||
63 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); | ||
64 | static void __call_rcu(struct rcu_head *head, | ||
65 | void (*func)(struct rcu_head *rcu), | ||
66 | struct rcu_ctrlblk *rcp); | ||
67 | |||
68 | #include "rcutiny_plugin.h" | ||
69 | |||
62 | #ifdef CONFIG_NO_HZ | 70 | #ifdef CONFIG_NO_HZ |
63 | 71 | ||
64 | static long rcu_dynticks_nesting = 1; | 72 | static long rcu_dynticks_nesting = 1; |
@@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user) | |||
140 | rcu_sched_qs(cpu); | 148 | rcu_sched_qs(cpu); |
141 | else if (!in_softirq()) | 149 | else if (!in_softirq()) |
142 | rcu_bh_qs(cpu); | 150 | rcu_bh_qs(cpu); |
151 | rcu_preempt_check_callbacks(); | ||
143 | } | 152 | } |
144 | 153 | ||
145 | /* | 154 | /* |
@@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
162 | *rcp->donetail = NULL; | 171 | *rcp->donetail = NULL; |
163 | if (rcp->curtail == rcp->donetail) | 172 | if (rcp->curtail == rcp->donetail) |
164 | rcp->curtail = &rcp->rcucblist; | 173 | rcp->curtail = &rcp->rcucblist; |
174 | rcu_preempt_remove_callbacks(rcp); | ||
165 | rcp->donetail = &rcp->rcucblist; | 175 | rcp->donetail = &rcp->rcucblist; |
166 | local_irq_restore(flags); | 176 | local_irq_restore(flags); |
167 | 177 | ||
@@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) | |||
182 | { | 192 | { |
183 | __rcu_process_callbacks(&rcu_sched_ctrlblk); | 193 | __rcu_process_callbacks(&rcu_sched_ctrlblk); |
184 | __rcu_process_callbacks(&rcu_bh_ctrlblk); | 194 | __rcu_process_callbacks(&rcu_bh_ctrlblk); |
195 | rcu_preempt_process_callbacks(); | ||
185 | } | 196 | } |
186 | 197 | ||
187 | /* | 198 | /* |
@@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head, | |||
223 | } | 234 | } |
224 | 235 | ||
225 | /* | 236 | /* |
226 | * Post an RCU callback to be invoked after the end of an RCU grace | 237 | * Post an RCU callback to be invoked after the end of an RCU-sched grace |
227 | * period. But since we have but one CPU, that would be after any | 238 | * period. But since we have but one CPU, that would be after any |
228 | * quiescent state. | 239 | * quiescent state. |
229 | */ | 240 | */ |
230 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 241 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
231 | { | 242 | { |
232 | __call_rcu(head, func, &rcu_sched_ctrlblk); | 243 | __call_rcu(head, func, &rcu_sched_ctrlblk); |
233 | } | 244 | } |
234 | EXPORT_SYMBOL_GPL(call_rcu); | 245 | EXPORT_SYMBOL_GPL(call_rcu_sched); |
235 | 246 | ||
236 | /* | 247 | /* |
237 | * Post an RCU bottom-half callback to be invoked after any subsequent | 248 | * Post an RCU bottom-half callback to be invoked after any subsequent |
@@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
243 | } | 254 | } |
244 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 255 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
245 | 256 | ||
246 | void rcu_barrier(void) | ||
247 | { | ||
248 | struct rcu_synchronize rcu; | ||
249 | |||
250 | init_rcu_head_on_stack(&rcu.head); | ||
251 | init_completion(&rcu.completion); | ||
252 | /* Will wake me after RCU finished. */ | ||
253 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
254 | /* Wait for it. */ | ||
255 | wait_for_completion(&rcu.completion); | ||
256 | destroy_rcu_head_on_stack(&rcu.head); | ||
257 | } | ||
258 | EXPORT_SYMBOL_GPL(rcu_barrier); | ||
259 | |||
260 | void rcu_barrier_bh(void) | 257 | void rcu_barrier_bh(void) |
261 | { | 258 | { |
262 | struct rcu_synchronize rcu; | 259 | struct rcu_synchronize rcu; |
@@ -289,5 +286,3 @@ void __init rcu_init(void) | |||
289 | { | 286 | { |
290 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 287 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
291 | } | 288 | } |
292 | |||
293 | #include "rcutiny_plugin.h" | ||
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index d223a92bc742..6ceca4f745ff 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) | 2 | * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition |
3 | * Internal non-public definitions that provide either classic | 3 | * Internal non-public definitions that provide either classic |
4 | * or preemptable semantics. | 4 | * or preemptible semantics. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
@@ -17,11 +17,587 @@ | |||
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
19 | * | 19 | * |
20 | * Copyright IBM Corporation, 2009 | 20 | * Copyright (c) 2010 Linaro |
21 | * | 21 | * |
22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #ifdef CONFIG_TINY_PREEMPT_RCU | ||
26 | |||
27 | #include <linux/delay.h> | ||
28 | |||
29 | /* Global control variables for preemptible RCU. */ | ||
30 | struct rcu_preempt_ctrlblk { | ||
31 | struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */ | ||
32 | struct rcu_head **nexttail; | ||
33 | /* Tasks blocked in a preemptible RCU */ | ||
34 | /* read-side critical section while an */ | ||
35 | /* preemptible-RCU grace period is in */ | ||
36 | /* progress must wait for a later grace */ | ||
37 | /* period. This pointer points to the */ | ||
38 | /* ->next pointer of the last task that */ | ||
39 | /* must wait for a later grace period, or */ | ||
40 | /* to &->rcb.rcucblist if there is no */ | ||
41 | /* such task. */ | ||
42 | struct list_head blkd_tasks; | ||
43 | /* Tasks blocked in RCU read-side critical */ | ||
44 | /* section. Tasks are placed at the head */ | ||
45 | /* of this list and age towards the tail. */ | ||
46 | struct list_head *gp_tasks; | ||
47 | /* Pointer to the first task blocking the */ | ||
48 | /* current grace period, or NULL if there */ | ||
49 | /* is not such task. */ | ||
50 | struct list_head *exp_tasks; | ||
51 | /* Pointer to first task blocking the */ | ||
52 | /* current expedited grace period, or NULL */ | ||
53 | /* if there is no such task. If there */ | ||
54 | /* is no current expedited grace period, */ | ||
55 | /* then there cannot be any such task. */ | ||
56 | u8 gpnum; /* Current grace period. */ | ||
57 | u8 gpcpu; /* Last grace period blocked by the CPU. */ | ||
58 | u8 completed; /* Last grace period completed. */ | ||
59 | /* If all three are equal, RCU is idle. */ | ||
60 | }; | ||
61 | |||
62 | static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | ||
63 | .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist, | ||
64 | .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, | ||
65 | .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, | ||
66 | .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), | ||
67 | }; | ||
68 | |||
69 | static int rcu_preempted_readers_exp(void); | ||
70 | static void rcu_report_exp_done(void); | ||
71 | |||
72 | /* | ||
73 | * Return true if the CPU has not yet responded to the current grace period. | ||
74 | */ | ||
75 | static int rcu_cpu_blocking_cur_gp(void) | ||
76 | { | ||
77 | return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum; | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Check for a running RCU reader. Because there is only one CPU, | ||
82 | * there can be but one running RCU reader at a time. ;-) | ||
83 | */ | ||
84 | static int rcu_preempt_running_reader(void) | ||
85 | { | ||
86 | return current->rcu_read_lock_nesting; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * Check for preempted RCU readers blocking any grace period. | ||
91 | * If the caller needs a reliable answer, it must disable hard irqs. | ||
92 | */ | ||
93 | static int rcu_preempt_blocked_readers_any(void) | ||
94 | { | ||
95 | return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks); | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * Check for preempted RCU readers blocking the current grace period. | ||
100 | * If the caller needs a reliable answer, it must disable hard irqs. | ||
101 | */ | ||
102 | static int rcu_preempt_blocked_readers_cgp(void) | ||
103 | { | ||
104 | return rcu_preempt_ctrlblk.gp_tasks != NULL; | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * Return true if another preemptible-RCU grace period is needed. | ||
109 | */ | ||
110 | static int rcu_preempt_needs_another_gp(void) | ||
111 | { | ||
112 | return *rcu_preempt_ctrlblk.rcb.curtail != NULL; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * Return true if a preemptible-RCU grace period is in progress. | ||
117 | * The caller must disable hardirqs. | ||
118 | */ | ||
119 | static int rcu_preempt_gp_in_progress(void) | ||
120 | { | ||
121 | return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * Record a preemptible-RCU quiescent state for the specified CPU. Note | ||
126 | * that this just means that the task currently running on the CPU is | ||
127 | * in a quiescent state. There might be any number of tasks blocked | ||
128 | * while in an RCU read-side critical section. | ||
129 | * | ||
130 | * Unlike the other rcu_*_qs() functions, callers to this function | ||
131 | * must disable irqs in order to protect the assignment to | ||
132 | * ->rcu_read_unlock_special. | ||
133 | * | ||
134 | * Because this is a single-CPU implementation, the only way a grace | ||
135 | * period can end is if the CPU is in a quiescent state. The reason is | ||
136 | * that a blocked preemptible-RCU reader can exit its critical section | ||
137 | * only if the CPU is running it at the time. Therefore, when the | ||
138 | * last task blocking the current grace period exits its RCU read-side | ||
139 | * critical section, neither the CPU nor blocked tasks will be stopping | ||
140 | * the current grace period. (In contrast, SMP implementations | ||
141 | * might have CPUs running in RCU read-side critical sections that | ||
142 | * block later grace periods -- but this is not possible given only | ||
143 | * one CPU.) | ||
144 | */ | ||
145 | static void rcu_preempt_cpu_qs(void) | ||
146 | { | ||
147 | /* Record both CPU and task as having responded to current GP. */ | ||
148 | rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; | ||
149 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
150 | |||
151 | /* | ||
152 | * If there is no GP, or if blocked readers are still blocking GP, | ||
153 | * then there is nothing more to do. | ||
154 | */ | ||
155 | if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) | ||
156 | return; | ||
157 | |||
158 | /* Advance callbacks. */ | ||
159 | rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; | ||
160 | rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail; | ||
161 | rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail; | ||
162 | |||
163 | /* If there are no blocked readers, next GP is done instantly. */ | ||
164 | if (!rcu_preempt_blocked_readers_any()) | ||
165 | rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; | ||
166 | |||
167 | /* If there are done callbacks, make RCU_SOFTIRQ process them. */ | ||
168 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) | ||
169 | raise_softirq(RCU_SOFTIRQ); | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Start a new RCU grace period if warranted. Hard irqs must be disabled. | ||
174 | */ | ||
175 | static void rcu_preempt_start_gp(void) | ||
176 | { | ||
177 | if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) { | ||
178 | |||
179 | /* Official start of GP. */ | ||
180 | rcu_preempt_ctrlblk.gpnum++; | ||
181 | |||
182 | /* Any blocked RCU readers block new GP. */ | ||
183 | if (rcu_preempt_blocked_readers_any()) | ||
184 | rcu_preempt_ctrlblk.gp_tasks = | ||
185 | rcu_preempt_ctrlblk.blkd_tasks.next; | ||
186 | |||
187 | /* If there is no running reader, CPU is done with GP. */ | ||
188 | if (!rcu_preempt_running_reader()) | ||
189 | rcu_preempt_cpu_qs(); | ||
190 | } | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * We have entered the scheduler, and the current task might soon be | ||
195 | * context-switched away from. If this task is in an RCU read-side | ||
196 | * critical section, we will no longer be able to rely on the CPU to | ||
197 | * record that fact, so we enqueue the task on the blkd_tasks list. | ||
198 | * If the task started after the current grace period began, as recorded | ||
199 | * by ->gpcpu, we enqueue at the beginning of the list. Otherwise | ||
200 | * before the element referenced by ->gp_tasks (or at the tail if | ||
201 | * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element. | ||
202 | * The task will dequeue itself when it exits the outermost enclosing | ||
203 | * RCU read-side critical section. Therefore, the current grace period | ||
204 | * cannot be permitted to complete until the ->gp_tasks pointer becomes | ||
205 | * NULL. | ||
206 | * | ||
207 | * Caller must disable preemption. | ||
208 | */ | ||
209 | void rcu_preempt_note_context_switch(void) | ||
210 | { | ||
211 | struct task_struct *t = current; | ||
212 | unsigned long flags; | ||
213 | |||
214 | local_irq_save(flags); /* must exclude scheduler_tick(). */ | ||
215 | if (rcu_preempt_running_reader() && | ||
216 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | ||
217 | |||
218 | /* Possibly blocking in an RCU read-side critical section. */ | ||
219 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | ||
220 | |||
221 | /* | ||
222 | * If this CPU has already checked in, then this task | ||
223 | * will hold up the next grace period rather than the | ||
224 | * current grace period. Queue the task accordingly. | ||
225 | * If the task is queued for the current grace period | ||
226 | * (i.e., this CPU has not yet passed through a quiescent | ||
227 | * state for the current grace period), then as long | ||
228 | * as that task remains queued, the current grace period | ||
229 | * cannot end. | ||
230 | */ | ||
231 | list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks); | ||
232 | if (rcu_cpu_blocking_cur_gp()) | ||
233 | rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry; | ||
234 | } | ||
235 | |||
236 | /* | ||
237 | * Either we were not in an RCU read-side critical section to | ||
238 | * begin with, or we have now recorded that critical section | ||
239 | * globally. Either way, we can now note a quiescent state | ||
240 | * for this CPU. Again, if we were in an RCU read-side critical | ||
241 | * section, and if that critical section was blocking the current | ||
242 | * grace period, then the fact that the task has been enqueued | ||
243 | * means that current grace period continues to be blocked. | ||
244 | */ | ||
245 | rcu_preempt_cpu_qs(); | ||
246 | local_irq_restore(flags); | ||
247 | } | ||
248 | |||
249 | /* | ||
250 | * Tiny-preemptible RCU implementation for rcu_read_lock(). | ||
251 | * Just increment ->rcu_read_lock_nesting, shared state will be updated | ||
252 | * if we block. | ||
253 | */ | ||
254 | void __rcu_read_lock(void) | ||
255 | { | ||
256 | current->rcu_read_lock_nesting++; | ||
257 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */ | ||
258 | } | ||
259 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | ||
260 | |||
261 | /* | ||
262 | * Handle special cases during rcu_read_unlock(), such as needing to | ||
263 | * notify RCU core processing or task having blocked during the RCU | ||
264 | * read-side critical section. | ||
265 | */ | ||
266 | static void rcu_read_unlock_special(struct task_struct *t) | ||
267 | { | ||
268 | int empty; | ||
269 | int empty_exp; | ||
270 | unsigned long flags; | ||
271 | struct list_head *np; | ||
272 | int special; | ||
273 | |||
274 | /* | ||
275 | * NMI handlers cannot block and cannot safely manipulate state. | ||
276 | * They therefore cannot possibly be special, so just leave. | ||
277 | */ | ||
278 | if (in_nmi()) | ||
279 | return; | ||
280 | |||
281 | local_irq_save(flags); | ||
282 | |||
283 | /* | ||
284 | * If RCU core is waiting for this CPU to exit critical section, | ||
285 | * let it know that we have done so. | ||
286 | */ | ||
287 | special = t->rcu_read_unlock_special; | ||
288 | if (special & RCU_READ_UNLOCK_NEED_QS) | ||
289 | rcu_preempt_cpu_qs(); | ||
290 | |||
291 | /* Hardware IRQ handlers cannot block. */ | ||
292 | if (in_irq()) { | ||
293 | local_irq_restore(flags); | ||
294 | return; | ||
295 | } | ||
296 | |||
297 | /* Clean up if blocked during RCU read-side critical section. */ | ||
298 | if (special & RCU_READ_UNLOCK_BLOCKED) { | ||
299 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; | ||
300 | |||
301 | /* | ||
302 | * Remove this task from the ->blkd_tasks list and adjust | ||
303 | * any pointers that might have been referencing it. | ||
304 | */ | ||
305 | empty = !rcu_preempt_blocked_readers_cgp(); | ||
306 | empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; | ||
307 | np = t->rcu_node_entry.next; | ||
308 | if (np == &rcu_preempt_ctrlblk.blkd_tasks) | ||
309 | np = NULL; | ||
310 | list_del(&t->rcu_node_entry); | ||
311 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) | ||
312 | rcu_preempt_ctrlblk.gp_tasks = np; | ||
313 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) | ||
314 | rcu_preempt_ctrlblk.exp_tasks = np; | ||
315 | INIT_LIST_HEAD(&t->rcu_node_entry); | ||
316 | |||
317 | /* | ||
318 | * If this was the last task on the current list, and if | ||
319 | * we aren't waiting on the CPU, report the quiescent state | ||
320 | * and start a new grace period if needed. | ||
321 | */ | ||
322 | if (!empty && !rcu_preempt_blocked_readers_cgp()) { | ||
323 | rcu_preempt_cpu_qs(); | ||
324 | rcu_preempt_start_gp(); | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * If this was the last task on the expedited lists, | ||
329 | * then we need wake up the waiting task. | ||
330 | */ | ||
331 | if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) | ||
332 | rcu_report_exp_done(); | ||
333 | } | ||
334 | local_irq_restore(flags); | ||
335 | } | ||
336 | |||
337 | /* | ||
338 | * Tiny-preemptible RCU implementation for rcu_read_unlock(). | ||
339 | * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost | ||
340 | * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then | ||
341 | * invoke rcu_read_unlock_special() to clean up after a context switch | ||
342 | * in an RCU read-side critical section and other special cases. | ||
343 | */ | ||
344 | void __rcu_read_unlock(void) | ||
345 | { | ||
346 | struct task_struct *t = current; | ||
347 | |||
348 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ | ||
349 | --t->rcu_read_lock_nesting; | ||
350 | barrier(); /* decrement before load of ->rcu_read_unlock_special */ | ||
351 | if (t->rcu_read_lock_nesting == 0 && | ||
352 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | ||
353 | rcu_read_unlock_special(t); | ||
354 | #ifdef CONFIG_PROVE_LOCKING | ||
355 | WARN_ON_ONCE(t->rcu_read_lock_nesting < 0); | ||
356 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | ||
357 | } | ||
358 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | ||
359 | |||
360 | /* | ||
361 | * Check for a quiescent state from the current CPU. When a task blocks, | ||
362 | * the task is recorded in the rcu_preempt_ctrlblk structure, which is | ||
363 | * checked elsewhere. This is called from the scheduling-clock interrupt. | ||
364 | * | ||
365 | * Caller must disable hard irqs. | ||
366 | */ | ||
367 | static void rcu_preempt_check_callbacks(void) | ||
368 | { | ||
369 | struct task_struct *t = current; | ||
370 | |||
371 | if (rcu_preempt_gp_in_progress() && | ||
372 | (!rcu_preempt_running_reader() || | ||
373 | !rcu_cpu_blocking_cur_gp())) | ||
374 | rcu_preempt_cpu_qs(); | ||
375 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != | ||
376 | rcu_preempt_ctrlblk.rcb.donetail) | ||
377 | raise_softirq(RCU_SOFTIRQ); | ||
378 | if (rcu_preempt_gp_in_progress() && | ||
379 | rcu_cpu_blocking_cur_gp() && | ||
380 | rcu_preempt_running_reader()) | ||
381 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * TINY_PREEMPT_RCU has an extra callback-list tail pointer to | ||
386 | * update, so this is invoked from __rcu_process_callbacks() to | ||
387 | * handle that case. Of course, it is invoked for all flavors of | ||
388 | * RCU, but RCU callbacks can appear only on one of the lists, and | ||
389 | * neither ->nexttail nor ->donetail can possibly be NULL, so there | ||
390 | * is no need for an explicit check. | ||
391 | */ | ||
392 | static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | ||
393 | { | ||
394 | if (rcu_preempt_ctrlblk.nexttail == rcp->donetail) | ||
395 | rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * Process callbacks for preemptible RCU. | ||
400 | */ | ||
401 | static void rcu_preempt_process_callbacks(void) | ||
402 | { | ||
403 | __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * Queue a preemptible -RCU callback for invocation after a grace period. | ||
408 | */ | ||
409 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
410 | { | ||
411 | unsigned long flags; | ||
412 | |||
413 | debug_rcu_head_queue(head); | ||
414 | head->func = func; | ||
415 | head->next = NULL; | ||
416 | |||
417 | local_irq_save(flags); | ||
418 | *rcu_preempt_ctrlblk.nexttail = head; | ||
419 | rcu_preempt_ctrlblk.nexttail = &head->next; | ||
420 | rcu_preempt_start_gp(); /* checks to see if GP needed. */ | ||
421 | local_irq_restore(flags); | ||
422 | } | ||
423 | EXPORT_SYMBOL_GPL(call_rcu); | ||
424 | |||
425 | void rcu_barrier(void) | ||
426 | { | ||
427 | struct rcu_synchronize rcu; | ||
428 | |||
429 | init_rcu_head_on_stack(&rcu.head); | ||
430 | init_completion(&rcu.completion); | ||
431 | /* Will wake me after RCU finished. */ | ||
432 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
433 | /* Wait for it. */ | ||
434 | wait_for_completion(&rcu.completion); | ||
435 | destroy_rcu_head_on_stack(&rcu.head); | ||
436 | } | ||
437 | EXPORT_SYMBOL_GPL(rcu_barrier); | ||
438 | |||
439 | /* | ||
440 | * synchronize_rcu - wait until a grace period has elapsed. | ||
441 | * | ||
442 | * Control will return to the caller some time after a full grace | ||
443 | * period has elapsed, in other words after all currently executing RCU | ||
444 | * read-side critical sections have completed. RCU read-side critical | ||
445 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | ||
446 | * and may be nested. | ||
447 | */ | ||
448 | void synchronize_rcu(void) | ||
449 | { | ||
450 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
451 | if (!rcu_scheduler_active) | ||
452 | return; | ||
453 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
454 | |||
455 | WARN_ON_ONCE(rcu_preempt_running_reader()); | ||
456 | if (!rcu_preempt_blocked_readers_any()) | ||
457 | return; | ||
458 | |||
459 | /* Once we get past the fastpath checks, same code as rcu_barrier(). */ | ||
460 | rcu_barrier(); | ||
461 | } | ||
462 | EXPORT_SYMBOL_GPL(synchronize_rcu); | ||
463 | |||
464 | static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); | ||
465 | static unsigned long sync_rcu_preempt_exp_count; | ||
466 | static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | ||
467 | |||
468 | /* | ||
469 | * Return non-zero if there are any tasks in RCU read-side critical | ||
470 | * sections blocking the current preemptible-RCU expedited grace period. | ||
471 | * If there is no preemptible-RCU expedited grace period currently in | ||
472 | * progress, returns zero unconditionally. | ||
473 | */ | ||
474 | static int rcu_preempted_readers_exp(void) | ||
475 | { | ||
476 | return rcu_preempt_ctrlblk.exp_tasks != NULL; | ||
477 | } | ||
478 | |||
479 | /* | ||
480 | * Report the exit from RCU read-side critical section for the last task | ||
481 | * that queued itself during or before the current expedited preemptible-RCU | ||
482 | * grace period. | ||
483 | */ | ||
484 | static void rcu_report_exp_done(void) | ||
485 | { | ||
486 | wake_up(&sync_rcu_preempt_exp_wq); | ||
487 | } | ||
488 | |||
489 | /* | ||
490 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea | ||
491 | * is to rely in the fact that there is but one CPU, and that it is | ||
492 | * illegal for a task to invoke synchronize_rcu_expedited() while in a | ||
493 | * preemptible-RCU read-side critical section. Therefore, any such | ||
494 | * critical sections must correspond to blocked tasks, which must therefore | ||
495 | * be on the ->blkd_tasks list. So just record the current head of the | ||
496 | * list in the ->exp_tasks pointer, and wait for all tasks including and | ||
497 | * after the task pointed to by ->exp_tasks to drain. | ||
498 | */ | ||
499 | void synchronize_rcu_expedited(void) | ||
500 | { | ||
501 | unsigned long flags; | ||
502 | struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk; | ||
503 | unsigned long snap; | ||
504 | |||
505 | barrier(); /* ensure prior action seen before grace period. */ | ||
506 | |||
507 | WARN_ON_ONCE(rcu_preempt_running_reader()); | ||
508 | |||
509 | /* | ||
510 | * Acquire lock so that there is only one preemptible RCU grace | ||
511 | * period in flight. Of course, if someone does the expedited | ||
512 | * grace period for us while we are acquiring the lock, just leave. | ||
513 | */ | ||
514 | snap = sync_rcu_preempt_exp_count + 1; | ||
515 | mutex_lock(&sync_rcu_preempt_exp_mutex); | ||
516 | if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count)) | ||
517 | goto unlock_mb_ret; /* Others did our work for us. */ | ||
518 | |||
519 | local_irq_save(flags); | ||
520 | |||
521 | /* | ||
522 | * All RCU readers have to already be on blkd_tasks because | ||
523 | * we cannot legally be executing in an RCU read-side critical | ||
524 | * section. | ||
525 | */ | ||
526 | |||
527 | /* Snapshot current head of ->blkd_tasks list. */ | ||
528 | rpcp->exp_tasks = rpcp->blkd_tasks.next; | ||
529 | if (rpcp->exp_tasks == &rpcp->blkd_tasks) | ||
530 | rpcp->exp_tasks = NULL; | ||
531 | local_irq_restore(flags); | ||
532 | |||
533 | /* Wait for tail of ->blkd_tasks list to drain. */ | ||
534 | if (rcu_preempted_readers_exp()) | ||
535 | wait_event(sync_rcu_preempt_exp_wq, | ||
536 | !rcu_preempted_readers_exp()); | ||
537 | |||
538 | /* Clean up and exit. */ | ||
539 | barrier(); /* ensure expedited GP seen before counter increment. */ | ||
540 | sync_rcu_preempt_exp_count++; | ||
541 | unlock_mb_ret: | ||
542 | mutex_unlock(&sync_rcu_preempt_exp_mutex); | ||
543 | barrier(); /* ensure subsequent action seen after grace period. */ | ||
544 | } | ||
545 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | ||
546 | |||
547 | /* | ||
548 | * Does preemptible RCU need the CPU to stay out of dynticks mode? | ||
549 | */ | ||
550 | int rcu_preempt_needs_cpu(void) | ||
551 | { | ||
552 | if (!rcu_preempt_running_reader()) | ||
553 | rcu_preempt_cpu_qs(); | ||
554 | return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; | ||
555 | } | ||
556 | |||
557 | /* | ||
558 | * Check for a task exiting while in a preemptible -RCU read-side | ||
559 | * critical section, clean up if so. No need to issue warnings, | ||
560 | * as debug_check_no_locks_held() already does this if lockdep | ||
561 | * is enabled. | ||
562 | */ | ||
563 | void exit_rcu(void) | ||
564 | { | ||
565 | struct task_struct *t = current; | ||
566 | |||
567 | if (t->rcu_read_lock_nesting == 0) | ||
568 | return; | ||
569 | t->rcu_read_lock_nesting = 1; | ||
570 | rcu_read_unlock(); | ||
571 | } | ||
572 | |||
573 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ | ||
574 | |||
575 | /* | ||
576 | * Because preemptible RCU does not exist, it never has any callbacks | ||
577 | * to check. | ||
578 | */ | ||
579 | static void rcu_preempt_check_callbacks(void) | ||
580 | { | ||
581 | } | ||
582 | |||
583 | /* | ||
584 | * Because preemptible RCU does not exist, it never has any callbacks | ||
585 | * to remove. | ||
586 | */ | ||
587 | static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | ||
588 | { | ||
589 | } | ||
590 | |||
591 | /* | ||
592 | * Because preemptible RCU does not exist, it never has any callbacks | ||
593 | * to process. | ||
594 | */ | ||
595 | static void rcu_preempt_process_callbacks(void) | ||
596 | { | ||
597 | } | ||
598 | |||
599 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ | ||
600 | |||
25 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 601 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
26 | 602 | ||
27 | #include <linux/kernel_stat.h> | 603 | #include <linux/kernel_stat.h> |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 2e2726d790b9..9d8e8fb2515f 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -120,7 +120,7 @@ struct rcu_torture { | |||
120 | }; | 120 | }; |
121 | 121 | ||
122 | static LIST_HEAD(rcu_torture_freelist); | 122 | static LIST_HEAD(rcu_torture_freelist); |
123 | static struct rcu_torture *rcu_torture_current; | 123 | static struct rcu_torture __rcu *rcu_torture_current; |
124 | static long rcu_torture_current_version; | 124 | static long rcu_torture_current_version; |
125 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; | 125 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; |
126 | static DEFINE_SPINLOCK(rcu_torture_lock); | 126 | static DEFINE_SPINLOCK(rcu_torture_lock); |
@@ -153,8 +153,10 @@ int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; | |||
153 | #define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ | 153 | #define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ |
154 | #define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ | 154 | #define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ |
155 | static int fullstop = FULLSTOP_RMMOD; | 155 | static int fullstop = FULLSTOP_RMMOD; |
156 | DEFINE_MUTEX(fullstop_mutex); /* Protect fullstop transitions and spawning */ | 156 | /* |
157 | /* of kthreads. */ | 157 | * Protect fullstop transitions and spawning of kthreads. |
158 | */ | ||
159 | static DEFINE_MUTEX(fullstop_mutex); | ||
158 | 160 | ||
159 | /* | 161 | /* |
160 | * Detect and respond to a system shutdown. | 162 | * Detect and respond to a system shutdown. |
@@ -303,6 +305,10 @@ static void rcu_read_delay(struct rcu_random_state *rrsp) | |||
303 | mdelay(longdelay_ms); | 305 | mdelay(longdelay_ms); |
304 | if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) | 306 | if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) |
305 | udelay(shortdelay_us); | 307 | udelay(shortdelay_us); |
308 | #ifdef CONFIG_PREEMPT | ||
309 | if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000))) | ||
310 | preempt_schedule(); /* No QS if preempt_disable() in effect */ | ||
311 | #endif | ||
306 | } | 312 | } |
307 | 313 | ||
308 | static void rcu_torture_read_unlock(int idx) __releases(RCU) | 314 | static void rcu_torture_read_unlock(int idx) __releases(RCU) |
@@ -536,6 +542,8 @@ static void srcu_read_delay(struct rcu_random_state *rrsp) | |||
536 | delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); | 542 | delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); |
537 | if (!delay) | 543 | if (!delay) |
538 | schedule_timeout_interruptible(longdelay); | 544 | schedule_timeout_interruptible(longdelay); |
545 | else | ||
546 | rcu_read_delay(rrsp); | ||
539 | } | 547 | } |
540 | 548 | ||
541 | static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) | 549 | static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) |
@@ -731,7 +739,8 @@ rcu_torture_writer(void *arg) | |||
731 | continue; | 739 | continue; |
732 | rp->rtort_pipe_count = 0; | 740 | rp->rtort_pipe_count = 0; |
733 | udelay(rcu_random(&rand) & 0x3ff); | 741 | udelay(rcu_random(&rand) & 0x3ff); |
734 | old_rp = rcu_torture_current; | 742 | old_rp = rcu_dereference_check(rcu_torture_current, |
743 | current == writer_task); | ||
735 | rp->rtort_mbtest = 1; | 744 | rp->rtort_mbtest = 1; |
736 | rcu_assign_pointer(rcu_torture_current, rp); | 745 | rcu_assign_pointer(rcu_torture_current, rp); |
737 | smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */ | 746 | smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */ |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d5bc43976c5a..ccdc04c47981 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -143,6 +143,11 @@ module_param(blimit, int, 0); | |||
143 | module_param(qhimark, int, 0); | 143 | module_param(qhimark, int, 0); |
144 | module_param(qlowmark, int, 0); | 144 | module_param(qlowmark, int, 0); |
145 | 145 | ||
146 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | ||
147 | int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT; | ||
148 | module_param(rcu_cpu_stall_suppress, int, 0644); | ||
149 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | ||
150 | |||
146 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); | 151 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); |
147 | static int rcu_pending(int cpu); | 152 | static int rcu_pending(int cpu); |
148 | 153 | ||
@@ -450,7 +455,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
450 | 455 | ||
451 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | 456 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR |
452 | 457 | ||
453 | int rcu_cpu_stall_panicking __read_mostly; | 458 | int rcu_cpu_stall_suppress __read_mostly; |
454 | 459 | ||
455 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 460 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
456 | { | 461 | { |
@@ -482,8 +487,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
482 | rcu_print_task_stall(rnp); | 487 | rcu_print_task_stall(rnp); |
483 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 488 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
484 | 489 | ||
485 | /* OK, time to rat on our buddy... */ | 490 | /* |
486 | 491 | * OK, time to rat on our buddy... | |
492 | * See Documentation/RCU/stallwarn.txt for info on how to debug | ||
493 | * RCU CPU stall warnings. | ||
494 | */ | ||
487 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", | 495 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", |
488 | rsp->name); | 496 | rsp->name); |
489 | rcu_for_each_leaf_node(rsp, rnp) { | 497 | rcu_for_each_leaf_node(rsp, rnp) { |
@@ -512,6 +520,11 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
512 | unsigned long flags; | 520 | unsigned long flags; |
513 | struct rcu_node *rnp = rcu_get_root(rsp); | 521 | struct rcu_node *rnp = rcu_get_root(rsp); |
514 | 522 | ||
523 | /* | ||
524 | * OK, time to rat on ourselves... | ||
525 | * See Documentation/RCU/stallwarn.txt for info on how to debug | ||
526 | * RCU CPU stall warnings. | ||
527 | */ | ||
515 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", | 528 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", |
516 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); | 529 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); |
517 | trigger_all_cpu_backtrace(); | 530 | trigger_all_cpu_backtrace(); |
@@ -530,11 +543,11 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
530 | long delta; | 543 | long delta; |
531 | struct rcu_node *rnp; | 544 | struct rcu_node *rnp; |
532 | 545 | ||
533 | if (rcu_cpu_stall_panicking) | 546 | if (rcu_cpu_stall_suppress) |
534 | return; | 547 | return; |
535 | delta = jiffies - rsp->jiffies_stall; | 548 | delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall); |
536 | rnp = rdp->mynode; | 549 | rnp = rdp->mynode; |
537 | if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { | 550 | if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && delta >= 0) { |
538 | 551 | ||
539 | /* We haven't checked in, so go dump stack. */ | 552 | /* We haven't checked in, so go dump stack. */ |
540 | print_cpu_stall(rsp); | 553 | print_cpu_stall(rsp); |
@@ -548,10 +561,26 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
548 | 561 | ||
549 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) | 562 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) |
550 | { | 563 | { |
551 | rcu_cpu_stall_panicking = 1; | 564 | rcu_cpu_stall_suppress = 1; |
552 | return NOTIFY_DONE; | 565 | return NOTIFY_DONE; |
553 | } | 566 | } |
554 | 567 | ||
568 | /** | ||
569 | * rcu_cpu_stall_reset - prevent further stall warnings in current grace period | ||
570 | * | ||
571 | * Set the stall-warning timeout way off into the future, thus preventing | ||
572 | * any RCU CPU stall-warning messages from appearing in the current set of | ||
573 | * RCU grace periods. | ||
574 | * | ||
575 | * The caller must disable hard irqs. | ||
576 | */ | ||
577 | void rcu_cpu_stall_reset(void) | ||
578 | { | ||
579 | rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; | ||
580 | rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; | ||
581 | rcu_preempt_stall_reset(); | ||
582 | } | ||
583 | |||
555 | static struct notifier_block rcu_panic_block = { | 584 | static struct notifier_block rcu_panic_block = { |
556 | .notifier_call = rcu_panic, | 585 | .notifier_call = rcu_panic, |
557 | }; | 586 | }; |
@@ -571,6 +600,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
571 | { | 600 | { |
572 | } | 601 | } |
573 | 602 | ||
603 | void rcu_cpu_stall_reset(void) | ||
604 | { | ||
605 | } | ||
606 | |||
574 | static void __init check_cpu_stall_init(void) | 607 | static void __init check_cpu_stall_init(void) |
575 | { | 608 | { |
576 | } | 609 | } |
@@ -712,7 +745,7 @@ static void | |||
712 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | 745 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) |
713 | __releases(rcu_get_root(rsp)->lock) | 746 | __releases(rcu_get_root(rsp)->lock) |
714 | { | 747 | { |
715 | struct rcu_data *rdp = rsp->rda[smp_processor_id()]; | 748 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
716 | struct rcu_node *rnp = rcu_get_root(rsp); | 749 | struct rcu_node *rnp = rcu_get_root(rsp); |
717 | 750 | ||
718 | if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { | 751 | if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { |
@@ -960,7 +993,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
960 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 993 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) |
961 | { | 994 | { |
962 | int i; | 995 | int i; |
963 | struct rcu_data *rdp = rsp->rda[smp_processor_id()]; | 996 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
964 | 997 | ||
965 | if (rdp->nxtlist == NULL) | 998 | if (rdp->nxtlist == NULL) |
966 | return; /* irqs disabled, so comparison is stable. */ | 999 | return; /* irqs disabled, so comparison is stable. */ |
@@ -971,6 +1004,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | |||
971 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1004 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
972 | rdp->nxttail[i] = &rdp->nxtlist; | 1005 | rdp->nxttail[i] = &rdp->nxtlist; |
973 | rsp->orphan_qlen += rdp->qlen; | 1006 | rsp->orphan_qlen += rdp->qlen; |
1007 | rdp->n_cbs_orphaned += rdp->qlen; | ||
974 | rdp->qlen = 0; | 1008 | rdp->qlen = 0; |
975 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | 1009 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ |
976 | } | 1010 | } |
@@ -984,7 +1018,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | |||
984 | struct rcu_data *rdp; | 1018 | struct rcu_data *rdp; |
985 | 1019 | ||
986 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1020 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
987 | rdp = rsp->rda[smp_processor_id()]; | 1021 | rdp = this_cpu_ptr(rsp->rda); |
988 | if (rsp->orphan_cbs_list == NULL) { | 1022 | if (rsp->orphan_cbs_list == NULL) { |
989 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 1023 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
990 | return; | 1024 | return; |
@@ -992,6 +1026,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | |||
992 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; | 1026 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; |
993 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; | 1027 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; |
994 | rdp->qlen += rsp->orphan_qlen; | 1028 | rdp->qlen += rsp->orphan_qlen; |
1029 | rdp->n_cbs_adopted += rsp->orphan_qlen; | ||
995 | rsp->orphan_cbs_list = NULL; | 1030 | rsp->orphan_cbs_list = NULL; |
996 | rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; | 1031 | rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; |
997 | rsp->orphan_qlen = 0; | 1032 | rsp->orphan_qlen = 0; |
@@ -1007,7 +1042,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1007 | unsigned long flags; | 1042 | unsigned long flags; |
1008 | unsigned long mask; | 1043 | unsigned long mask; |
1009 | int need_report = 0; | 1044 | int need_report = 0; |
1010 | struct rcu_data *rdp = rsp->rda[cpu]; | 1045 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
1011 | struct rcu_node *rnp; | 1046 | struct rcu_node *rnp; |
1012 | 1047 | ||
1013 | /* Exclude any attempts to start a new grace period. */ | 1048 | /* Exclude any attempts to start a new grace period. */ |
@@ -1123,6 +1158,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1123 | 1158 | ||
1124 | /* Update count, and requeue any remaining callbacks. */ | 1159 | /* Update count, and requeue any remaining callbacks. */ |
1125 | rdp->qlen -= count; | 1160 | rdp->qlen -= count; |
1161 | rdp->n_cbs_invoked += count; | ||
1126 | if (list != NULL) { | 1162 | if (list != NULL) { |
1127 | *tail = rdp->nxtlist; | 1163 | *tail = rdp->nxtlist; |
1128 | rdp->nxtlist = list; | 1164 | rdp->nxtlist = list; |
@@ -1226,7 +1262,8 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) | |||
1226 | cpu = rnp->grplo; | 1262 | cpu = rnp->grplo; |
1227 | bit = 1; | 1263 | bit = 1; |
1228 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { | 1264 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { |
1229 | if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) | 1265 | if ((rnp->qsmask & bit) != 0 && |
1266 | f(per_cpu_ptr(rsp->rda, cpu))) | ||
1230 | mask |= bit; | 1267 | mask |= bit; |
1231 | } | 1268 | } |
1232 | if (mask != 0) { | 1269 | if (mask != 0) { |
@@ -1402,7 +1439,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1402 | * a quiescent state betweentimes. | 1439 | * a quiescent state betweentimes. |
1403 | */ | 1440 | */ |
1404 | local_irq_save(flags); | 1441 | local_irq_save(flags); |
1405 | rdp = rsp->rda[smp_processor_id()]; | 1442 | rdp = this_cpu_ptr(rsp->rda); |
1406 | rcu_process_gp_end(rsp, rdp); | 1443 | rcu_process_gp_end(rsp, rdp); |
1407 | check_for_new_grace_period(rsp, rdp); | 1444 | check_for_new_grace_period(rsp, rdp); |
1408 | 1445 | ||
@@ -1701,7 +1738,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
1701 | { | 1738 | { |
1702 | unsigned long flags; | 1739 | unsigned long flags; |
1703 | int i; | 1740 | int i; |
1704 | struct rcu_data *rdp = rsp->rda[cpu]; | 1741 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
1705 | struct rcu_node *rnp = rcu_get_root(rsp); | 1742 | struct rcu_node *rnp = rcu_get_root(rsp); |
1706 | 1743 | ||
1707 | /* Set up local state, ensuring consistent view of global state. */ | 1744 | /* Set up local state, ensuring consistent view of global state. */ |
@@ -1729,7 +1766,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) | |||
1729 | { | 1766 | { |
1730 | unsigned long flags; | 1767 | unsigned long flags; |
1731 | unsigned long mask; | 1768 | unsigned long mask; |
1732 | struct rcu_data *rdp = rsp->rda[cpu]; | 1769 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
1733 | struct rcu_node *rnp = rcu_get_root(rsp); | 1770 | struct rcu_node *rnp = rcu_get_root(rsp); |
1734 | 1771 | ||
1735 | /* Set up local state, ensuring consistent view of global state. */ | 1772 | /* Set up local state, ensuring consistent view of global state. */ |
@@ -1865,7 +1902,8 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
1865 | /* | 1902 | /* |
1866 | * Helper function for rcu_init() that initializes one rcu_state structure. | 1903 | * Helper function for rcu_init() that initializes one rcu_state structure. |
1867 | */ | 1904 | */ |
1868 | static void __init rcu_init_one(struct rcu_state *rsp) | 1905 | static void __init rcu_init_one(struct rcu_state *rsp, |
1906 | struct rcu_data __percpu *rda) | ||
1869 | { | 1907 | { |
1870 | static char *buf[] = { "rcu_node_level_0", | 1908 | static char *buf[] = { "rcu_node_level_0", |
1871 | "rcu_node_level_1", | 1909 | "rcu_node_level_1", |
@@ -1918,37 +1956,23 @@ static void __init rcu_init_one(struct rcu_state *rsp) | |||
1918 | } | 1956 | } |
1919 | } | 1957 | } |
1920 | 1958 | ||
1959 | rsp->rda = rda; | ||
1921 | rnp = rsp->level[NUM_RCU_LVLS - 1]; | 1960 | rnp = rsp->level[NUM_RCU_LVLS - 1]; |
1922 | for_each_possible_cpu(i) { | 1961 | for_each_possible_cpu(i) { |
1923 | while (i > rnp->grphi) | 1962 | while (i > rnp->grphi) |
1924 | rnp++; | 1963 | rnp++; |
1925 | rsp->rda[i]->mynode = rnp; | 1964 | per_cpu_ptr(rsp->rda, i)->mynode = rnp; |
1926 | rcu_boot_init_percpu_data(i, rsp); | 1965 | rcu_boot_init_percpu_data(i, rsp); |
1927 | } | 1966 | } |
1928 | } | 1967 | } |
1929 | 1968 | ||
1930 | /* | ||
1931 | * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used | ||
1932 | * nowhere else! Assigns leaf node pointers into each CPU's rcu_data | ||
1933 | * structure. | ||
1934 | */ | ||
1935 | #define RCU_INIT_FLAVOR(rsp, rcu_data) \ | ||
1936 | do { \ | ||
1937 | int i; \ | ||
1938 | \ | ||
1939 | for_each_possible_cpu(i) { \ | ||
1940 | (rsp)->rda[i] = &per_cpu(rcu_data, i); \ | ||
1941 | } \ | ||
1942 | rcu_init_one(rsp); \ | ||
1943 | } while (0) | ||
1944 | |||
1945 | void __init rcu_init(void) | 1969 | void __init rcu_init(void) |
1946 | { | 1970 | { |
1947 | int cpu; | 1971 | int cpu; |
1948 | 1972 | ||
1949 | rcu_bootup_announce(); | 1973 | rcu_bootup_announce(); |
1950 | RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); | 1974 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
1951 | RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); | 1975 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
1952 | __rcu_init_preempt(); | 1976 | __rcu_init_preempt(); |
1953 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 1977 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
1954 | 1978 | ||
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 14c040b18ed0..91d4170c5c13 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -202,6 +202,9 @@ struct rcu_data { | |||
202 | long qlen; /* # of queued callbacks */ | 202 | long qlen; /* # of queued callbacks */ |
203 | long qlen_last_fqs_check; | 203 | long qlen_last_fqs_check; |
204 | /* qlen at last check for QS forcing */ | 204 | /* qlen at last check for QS forcing */ |
205 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ | ||
206 | unsigned long n_cbs_orphaned; /* RCU cbs sent to orphanage. */ | ||
207 | unsigned long n_cbs_adopted; /* RCU cbs adopted from orphanage. */ | ||
205 | unsigned long n_force_qs_snap; | 208 | unsigned long n_force_qs_snap; |
206 | /* did other CPU force QS recently? */ | 209 | /* did other CPU force QS recently? */ |
207 | long blimit; /* Upper limit on a processed batch */ | 210 | long blimit; /* Upper limit on a processed batch */ |
@@ -254,19 +257,23 @@ struct rcu_data { | |||
254 | #define RCU_STALL_DELAY_DELTA 0 | 257 | #define RCU_STALL_DELAY_DELTA 0 |
255 | #endif | 258 | #endif |
256 | 259 | ||
257 | #define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA) | 260 | #define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \ |
261 | RCU_STALL_DELAY_DELTA) | ||
258 | /* for rsp->jiffies_stall */ | 262 | /* for rsp->jiffies_stall */ |
259 | #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA) | 263 | #define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30) |
260 | /* for rsp->jiffies_stall */ | 264 | /* for rsp->jiffies_stall */ |
261 | #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ | 265 | #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ |
262 | /* to take at least one */ | 266 | /* to take at least one */ |
263 | /* scheduling clock irq */ | 267 | /* scheduling clock irq */ |
264 | /* before ratting on them. */ | 268 | /* before ratting on them. */ |
265 | 269 | ||
266 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 270 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE |
271 | #define RCU_CPU_STALL_SUPPRESS_INIT 0 | ||
272 | #else | ||
273 | #define RCU_CPU_STALL_SUPPRESS_INIT 1 | ||
274 | #endif | ||
267 | 275 | ||
268 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) | 276 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
269 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | ||
270 | 277 | ||
271 | /* | 278 | /* |
272 | * RCU global state, including node hierarchy. This hierarchy is | 279 | * RCU global state, including node hierarchy. This hierarchy is |
@@ -283,7 +290,7 @@ struct rcu_state { | |||
283 | struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ | 290 | struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ |
284 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ | 291 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ |
285 | u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ | 292 | u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ |
286 | struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ | 293 | struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ |
287 | 294 | ||
288 | /* The following fields are guarded by the root rcu_node's lock. */ | 295 | /* The following fields are guarded by the root rcu_node's lock. */ |
289 | 296 | ||
@@ -365,6 +372,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | |||
365 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | 372 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR |
366 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); | 373 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); |
367 | static void rcu_print_task_stall(struct rcu_node *rnp); | 374 | static void rcu_print_task_stall(struct rcu_node *rnp); |
375 | static void rcu_preempt_stall_reset(void); | ||
368 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 376 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
369 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | 377 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); |
370 | #ifdef CONFIG_HOTPLUG_CPU | 378 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 0e4f420245d9..71a4147473f9 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -57,7 +57,7 @@ static void __init rcu_bootup_announce_oddness(void) | |||
57 | printk(KERN_INFO | 57 | printk(KERN_INFO |
58 | "\tRCU-based detection of stalled CPUs is disabled.\n"); | 58 | "\tRCU-based detection of stalled CPUs is disabled.\n"); |
59 | #endif | 59 | #endif |
60 | #ifndef CONFIG_RCU_CPU_STALL_VERBOSE | 60 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) |
61 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); | 61 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); |
62 | #endif | 62 | #endif |
63 | #if NUM_RCU_LVL_4 != 0 | 63 | #if NUM_RCU_LVL_4 != 0 |
@@ -154,7 +154,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
154 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | 154 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
155 | 155 | ||
156 | /* Possibly blocking in an RCU read-side critical section. */ | 156 | /* Possibly blocking in an RCU read-side critical section. */ |
157 | rdp = rcu_preempt_state.rda[cpu]; | 157 | rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); |
158 | rnp = rdp->mynode; | 158 | rnp = rdp->mynode; |
159 | raw_spin_lock_irqsave(&rnp->lock, flags); | 159 | raw_spin_lock_irqsave(&rnp->lock, flags); |
160 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | 160 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; |
@@ -201,7 +201,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
201 | */ | 201 | */ |
202 | void __rcu_read_lock(void) | 202 | void __rcu_read_lock(void) |
203 | { | 203 | { |
204 | ACCESS_ONCE(current->rcu_read_lock_nesting)++; | 204 | current->rcu_read_lock_nesting++; |
205 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ | 205 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ |
206 | } | 206 | } |
207 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | 207 | EXPORT_SYMBOL_GPL(__rcu_read_lock); |
@@ -344,7 +344,9 @@ void __rcu_read_unlock(void) | |||
344 | struct task_struct *t = current; | 344 | struct task_struct *t = current; |
345 | 345 | ||
346 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ | 346 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ |
347 | if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && | 347 | --t->rcu_read_lock_nesting; |
348 | barrier(); /* decrement before load of ->rcu_read_unlock_special */ | ||
349 | if (t->rcu_read_lock_nesting == 0 && | ||
348 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 350 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) |
349 | rcu_read_unlock_special(t); | 351 | rcu_read_unlock_special(t); |
350 | #ifdef CONFIG_PROVE_LOCKING | 352 | #ifdef CONFIG_PROVE_LOCKING |
@@ -417,6 +419,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp) | |||
417 | } | 419 | } |
418 | } | 420 | } |
419 | 421 | ||
422 | /* | ||
423 | * Suppress preemptible RCU's CPU stall warnings by pushing the | ||
424 | * time of the next stall-warning message comfortably far into the | ||
425 | * future. | ||
426 | */ | ||
427 | static void rcu_preempt_stall_reset(void) | ||
428 | { | ||
429 | rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; | ||
430 | } | ||
431 | |||
420 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 432 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
421 | 433 | ||
422 | /* | 434 | /* |
@@ -546,9 +558,11 @@ EXPORT_SYMBOL_GPL(call_rcu); | |||
546 | * | 558 | * |
547 | * Control will return to the caller some time after a full grace | 559 | * Control will return to the caller some time after a full grace |
548 | * period has elapsed, in other words after all currently executing RCU | 560 | * period has elapsed, in other words after all currently executing RCU |
549 | * read-side critical sections have completed. RCU read-side critical | 561 | * read-side critical sections have completed. Note, however, that |
550 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | 562 | * upon return from synchronize_rcu(), the caller might well be executing |
551 | * and may be nested. | 563 | * concurrently with new RCU read-side critical sections that began while |
564 | * synchronize_rcu() was waiting. RCU read-side critical sections are | ||
565 | * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. | ||
552 | */ | 566 | */ |
553 | void synchronize_rcu(void) | 567 | void synchronize_rcu(void) |
554 | { | 568 | { |
@@ -771,7 +785,7 @@ static void rcu_preempt_send_cbs_to_orphanage(void) | |||
771 | */ | 785 | */ |
772 | static void __init __rcu_init_preempt(void) | 786 | static void __init __rcu_init_preempt(void) |
773 | { | 787 | { |
774 | RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data); | 788 | rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); |
775 | } | 789 | } |
776 | 790 | ||
777 | /* | 791 | /* |
@@ -865,6 +879,14 @@ static void rcu_print_task_stall(struct rcu_node *rnp) | |||
865 | { | 879 | { |
866 | } | 880 | } |
867 | 881 | ||
882 | /* | ||
883 | * Because preemptible RCU does not exist, there is no need to suppress | ||
884 | * its CPU stall warnings. | ||
885 | */ | ||
886 | static void rcu_preempt_stall_reset(void) | ||
887 | { | ||
888 | } | ||
889 | |||
868 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 890 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
869 | 891 | ||
870 | /* | 892 | /* |
@@ -919,15 +941,6 @@ static void rcu_preempt_process_callbacks(void) | |||
919 | } | 941 | } |
920 | 942 | ||
921 | /* | 943 | /* |
922 | * In classic RCU, call_rcu() is just call_rcu_sched(). | ||
923 | */ | ||
924 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
925 | { | ||
926 | call_rcu_sched(head, func); | ||
927 | } | ||
928 | EXPORT_SYMBOL_GPL(call_rcu); | ||
929 | |||
930 | /* | ||
931 | * Wait for an rcu-preempt grace period, but make it happen quickly. | 944 | * Wait for an rcu-preempt grace period, but make it happen quickly. |
932 | * But because preemptable RCU does not exist, map to rcu-sched. | 945 | * But because preemptable RCU does not exist, map to rcu-sched. |
933 | */ | 946 | */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 36c95b45738e..d15430b9d122 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -64,7 +64,9 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
64 | rdp->dynticks_fqs); | 64 | rdp->dynticks_fqs); |
65 | #endif /* #ifdef CONFIG_NO_HZ */ | 65 | #endif /* #ifdef CONFIG_NO_HZ */ |
66 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); | 66 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); |
67 | seq_printf(m, " ql=%ld b=%ld\n", rdp->qlen, rdp->blimit); | 67 | seq_printf(m, " ql=%ld b=%ld", rdp->qlen, rdp->blimit); |
68 | seq_printf(m, " ci=%lu co=%lu ca=%lu\n", | ||
69 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); | ||
68 | } | 70 | } |
69 | 71 | ||
70 | #define PRINT_RCU_DATA(name, func, m) \ | 72 | #define PRINT_RCU_DATA(name, func, m) \ |
@@ -119,7 +121,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
119 | rdp->dynticks_fqs); | 121 | rdp->dynticks_fqs); |
120 | #endif /* #ifdef CONFIG_NO_HZ */ | 122 | #endif /* #ifdef CONFIG_NO_HZ */ |
121 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); | 123 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); |
122 | seq_printf(m, ",%ld,%ld\n", rdp->qlen, rdp->blimit); | 124 | seq_printf(m, ",%ld,%ld", rdp->qlen, rdp->blimit); |
125 | seq_printf(m, ",%lu,%lu,%lu\n", | ||
126 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); | ||
123 | } | 127 | } |
124 | 128 | ||
125 | static int show_rcudata_csv(struct seq_file *m, void *unused) | 129 | static int show_rcudata_csv(struct seq_file *m, void *unused) |
@@ -128,7 +132,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) | |||
128 | #ifdef CONFIG_NO_HZ | 132 | #ifdef CONFIG_NO_HZ |
129 | seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); | 133 | seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); |
130 | #endif /* #ifdef CONFIG_NO_HZ */ | 134 | #endif /* #ifdef CONFIG_NO_HZ */ |
131 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n"); | 135 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); |
132 | #ifdef CONFIG_TREE_PREEMPT_RCU | 136 | #ifdef CONFIG_TREE_PREEMPT_RCU |
133 | seq_puts(m, "\"rcu_preempt:\"\n"); | 137 | seq_puts(m, "\"rcu_preempt:\"\n"); |
134 | PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); | 138 | PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); |
@@ -262,7 +266,7 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) | |||
262 | struct rcu_data *rdp; | 266 | struct rcu_data *rdp; |
263 | 267 | ||
264 | for_each_possible_cpu(cpu) { | 268 | for_each_possible_cpu(cpu) { |
265 | rdp = rsp->rda[cpu]; | 269 | rdp = per_cpu_ptr(rsp->rda, cpu); |
266 | if (rdp->beenonline) | 270 | if (rdp->beenonline) |
267 | print_one_rcu_pending(m, rdp); | 271 | print_one_rcu_pending(m, rdp); |
268 | } | 272 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index 5998222f901c..d42992bccdfa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3714,7 +3714,7 @@ void scheduler_tick(void) | |||
3714 | curr->sched_class->task_tick(rq, curr, 0); | 3714 | curr->sched_class->task_tick(rq, curr, 0); |
3715 | raw_spin_unlock(&rq->lock); | 3715 | raw_spin_unlock(&rq->lock); |
3716 | 3716 | ||
3717 | perf_event_task_tick(curr); | 3717 | perf_event_task_tick(); |
3718 | 3718 | ||
3719 | #ifdef CONFIG_SMP | 3719 | #ifdef CONFIG_SMP |
3720 | rq->idle_at_tick = idle_cpu(cpu); | 3720 | rq->idle_at_tick = idle_cpu(cpu); |
@@ -4772,7 +4772,7 @@ recheck: | |||
4772 | } | 4772 | } |
4773 | 4773 | ||
4774 | if (user) { | 4774 | if (user) { |
4775 | retval = security_task_setscheduler(p, policy, param); | 4775 | retval = security_task_setscheduler(p); |
4776 | if (retval) | 4776 | if (retval) |
4777 | return retval; | 4777 | return retval; |
4778 | } | 4778 | } |
@@ -5023,7 +5023,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
5023 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) | 5023 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) |
5024 | goto out_unlock; | 5024 | goto out_unlock; |
5025 | 5025 | ||
5026 | retval = security_task_setscheduler(p, 0, NULL); | 5026 | retval = security_task_setscheduler(p); |
5027 | if (retval) | 5027 | if (retval) |
5028 | goto out_unlock; | 5028 | goto out_unlock; |
5029 | 5029 | ||
@@ -5473,7 +5473,19 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
5473 | idle->se.exec_start = sched_clock(); | 5473 | idle->se.exec_start = sched_clock(); |
5474 | 5474 | ||
5475 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 5475 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
5476 | /* | ||
5477 | * We're having a chicken and egg problem, even though we are | ||
5478 | * holding rq->lock, the cpu isn't yet set to this cpu so the | ||
5479 | * lockdep check in task_group() will fail. | ||
5480 | * | ||
5481 | * Similar case to sched_fork(). / Alternatively we could | ||
5482 | * use task_rq_lock() here and obtain the other rq->lock. | ||
5483 | * | ||
5484 | * Silence PROVE_RCU | ||
5485 | */ | ||
5486 | rcu_read_lock(); | ||
5476 | __set_task_cpu(idle, cpu); | 5487 | __set_task_cpu(idle, cpu); |
5488 | rcu_read_unlock(); | ||
5477 | 5489 | ||
5478 | rq->curr = rq->idle = idle; | 5490 | rq->curr = rq->idle = idle; |
5479 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 5491 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 74cccfae87a8..933f3d1b62ea 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -3793,8 +3793,11 @@ static void task_fork_fair(struct task_struct *p) | |||
3793 | 3793 | ||
3794 | update_rq_clock(rq); | 3794 | update_rq_clock(rq); |
3795 | 3795 | ||
3796 | if (unlikely(task_cpu(p) != this_cpu)) | 3796 | if (unlikely(task_cpu(p) != this_cpu)) { |
3797 | rcu_read_lock(); | ||
3797 | __set_task_cpu(p, this_cpu); | 3798 | __set_task_cpu(p, this_cpu); |
3799 | rcu_read_unlock(); | ||
3800 | } | ||
3798 | 3801 | ||
3799 | update_curr(cfs_rq); | 3802 | update_curr(cfs_rq); |
3800 | 3803 | ||
diff --git a/kernel/srcu.c b/kernel/srcu.c index 2980da3fd509..c71e07500536 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
@@ -46,11 +46,9 @@ static int init_srcu_struct_fields(struct srcu_struct *sp) | |||
46 | int __init_srcu_struct(struct srcu_struct *sp, const char *name, | 46 | int __init_srcu_struct(struct srcu_struct *sp, const char *name, |
47 | struct lock_class_key *key) | 47 | struct lock_class_key *key) |
48 | { | 48 | { |
49 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
50 | /* Don't re-initialize a lock while it is held. */ | 49 | /* Don't re-initialize a lock while it is held. */ |
51 | debug_check_no_locks_freed((void *)sp, sizeof(*sp)); | 50 | debug_check_no_locks_freed((void *)sp, sizeof(*sp)); |
52 | lockdep_init_map(&sp->dep_map, name, key, 0); | 51 | lockdep_init_map(&sp->dep_map, name, key, 0); |
53 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
54 | return init_srcu_struct_fields(sp); | 52 | return init_srcu_struct_fields(sp); |
55 | } | 53 | } |
56 | EXPORT_SYMBOL_GPL(__init_srcu_struct); | 54 | EXPORT_SYMBOL_GPL(__init_srcu_struct); |
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 04cdcf72c827..10b90d8a03c4 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c | |||
@@ -143,15 +143,6 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) | |||
143 | if (!table->maxlen) | 143 | if (!table->maxlen) |
144 | set_fail(&fail, table, "No maxlen"); | 144 | set_fail(&fail, table, "No maxlen"); |
145 | } | 145 | } |
146 | if ((table->proc_handler == proc_doulongvec_minmax) || | ||
147 | (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { | ||
148 | if (table->maxlen > sizeof (unsigned long)) { | ||
149 | if (!table->extra1) | ||
150 | set_fail(&fail, table, "No min"); | ||
151 | if (!table->extra2) | ||
152 | set_fail(&fail, table, "No max"); | ||
153 | } | ||
154 | } | ||
155 | #ifdef CONFIG_PROC_SYSCTL | 146 | #ifdef CONFIG_PROC_SYSCTL |
156 | if (table->procname && !table->proc_handler) | 147 | if (table->procname && !table->proc_handler) |
157 | set_fail(&fail, table, "No proc_handler"); | 148 | set_fail(&fail, table, "No proc_handler"); |
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c index 4f104515a19b..f8b11a283171 100644 --- a/kernel/test_kprobes.c +++ b/kernel/test_kprobes.c | |||
@@ -115,7 +115,9 @@ static int test_kprobes(void) | |||
115 | int ret; | 115 | int ret; |
116 | struct kprobe *kps[2] = {&kp, &kp2}; | 116 | struct kprobe *kps[2] = {&kp, &kp2}; |
117 | 117 | ||
118 | kp.addr = 0; /* addr should be cleard for reusing kprobe. */ | 118 | /* addr and flags should be cleard for reusing kprobe. */ |
119 | kp.addr = NULL; | ||
120 | kp.flags = 0; | ||
119 | ret = register_kprobes(kps, 2); | 121 | ret = register_kprobes(kps, 2); |
120 | if (ret < 0) { | 122 | if (ret < 0) { |
121 | printk(KERN_ERR "Kprobe smoke test failed: " | 123 | printk(KERN_ERR "Kprobe smoke test failed: " |
@@ -210,7 +212,9 @@ static int test_jprobes(void) | |||
210 | int ret; | 212 | int ret; |
211 | struct jprobe *jps[2] = {&jp, &jp2}; | 213 | struct jprobe *jps[2] = {&jp, &jp2}; |
212 | 214 | ||
213 | jp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ | 215 | /* addr and flags should be cleard for reusing kprobe. */ |
216 | jp.kp.addr = NULL; | ||
217 | jp.kp.flags = 0; | ||
214 | ret = register_jprobes(jps, 2); | 218 | ret = register_jprobes(jps, 2); |
215 | if (ret < 0) { | 219 | if (ret < 0) { |
216 | printk(KERN_ERR "Kprobe smoke test failed: " | 220 | printk(KERN_ERR "Kprobe smoke test failed: " |
@@ -323,7 +327,9 @@ static int test_kretprobes(void) | |||
323 | int ret; | 327 | int ret; |
324 | struct kretprobe *rps[2] = {&rp, &rp2}; | 328 | struct kretprobe *rps[2] = {&rp, &rp2}; |
325 | 329 | ||
326 | rp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ | 330 | /* addr and flags should be cleard for reusing kprobe. */ |
331 | rp.kp.addr = NULL; | ||
332 | rp.kp.flags = 0; | ||
327 | ret = register_kretprobes(rps, 2); | 333 | ret = register_kretprobes(rps, 2); |
328 | if (ret < 0) { | 334 | if (ret < 0) { |
329 | printk(KERN_ERR "Kprobe smoke test failed: " | 335 | printk(KERN_ERR "Kprobe smoke test failed: " |
diff --git a/kernel/timer.c b/kernel/timer.c index 97bf05baade7..68a9ae7679b7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -37,7 +37,7 @@ | |||
37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
39 | #include <linux/kallsyms.h> | 39 | #include <linux/kallsyms.h> |
40 | #include <linux/perf_event.h> | 40 | #include <linux/irq_work.h> |
41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | 43 | ||
@@ -1279,7 +1279,10 @@ void update_process_times(int user_tick) | |||
1279 | run_local_timers(); | 1279 | run_local_timers(); |
1280 | rcu_check_callbacks(cpu, user_tick); | 1280 | rcu_check_callbacks(cpu, user_tick); |
1281 | printk_tick(); | 1281 | printk_tick(); |
1282 | perf_event_do_pending(); | 1282 | #ifdef CONFIG_IRQ_WORK |
1283 | if (in_irq()) | ||
1284 | irq_work_run(); | ||
1285 | #endif | ||
1283 | scheduler_tick(); | 1286 | scheduler_tick(); |
1284 | run_posix_cpu_timers(p); | 1287 | run_posix_cpu_timers(p); |
1285 | } | 1288 | } |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 538501c6ea50..e550d2eda1df 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS | |||
49 | help | 49 | help |
50 | See Documentation/trace/ftrace-design.txt | 50 | See Documentation/trace/ftrace-design.txt |
51 | 51 | ||
52 | config HAVE_C_RECORDMCOUNT | ||
53 | bool | ||
54 | help | ||
55 | C version of recordmcount available? | ||
56 | |||
52 | config TRACER_MAX_TRACE | 57 | config TRACER_MAX_TRACE |
53 | bool | 58 | bool |
54 | 59 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fa7ece649fe1..ebd80d50c474 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -884,10 +884,8 @@ enum { | |||
884 | FTRACE_ENABLE_CALLS = (1 << 0), | 884 | FTRACE_ENABLE_CALLS = (1 << 0), |
885 | FTRACE_DISABLE_CALLS = (1 << 1), | 885 | FTRACE_DISABLE_CALLS = (1 << 1), |
886 | FTRACE_UPDATE_TRACE_FUNC = (1 << 2), | 886 | FTRACE_UPDATE_TRACE_FUNC = (1 << 2), |
887 | FTRACE_ENABLE_MCOUNT = (1 << 3), | 887 | FTRACE_START_FUNC_RET = (1 << 3), |
888 | FTRACE_DISABLE_MCOUNT = (1 << 4), | 888 | FTRACE_STOP_FUNC_RET = (1 << 4), |
889 | FTRACE_START_FUNC_RET = (1 << 5), | ||
890 | FTRACE_STOP_FUNC_RET = (1 << 6), | ||
891 | }; | 889 | }; |
892 | 890 | ||
893 | static int ftrace_filtered; | 891 | static int ftrace_filtered; |
@@ -1226,8 +1224,6 @@ static void ftrace_shutdown(int command) | |||
1226 | 1224 | ||
1227 | static void ftrace_startup_sysctl(void) | 1225 | static void ftrace_startup_sysctl(void) |
1228 | { | 1226 | { |
1229 | int command = FTRACE_ENABLE_MCOUNT; | ||
1230 | |||
1231 | if (unlikely(ftrace_disabled)) | 1227 | if (unlikely(ftrace_disabled)) |
1232 | return; | 1228 | return; |
1233 | 1229 | ||
@@ -1235,23 +1231,17 @@ static void ftrace_startup_sysctl(void) | |||
1235 | saved_ftrace_func = NULL; | 1231 | saved_ftrace_func = NULL; |
1236 | /* ftrace_start_up is true if we want ftrace running */ | 1232 | /* ftrace_start_up is true if we want ftrace running */ |
1237 | if (ftrace_start_up) | 1233 | if (ftrace_start_up) |
1238 | command |= FTRACE_ENABLE_CALLS; | 1234 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); |
1239 | |||
1240 | ftrace_run_update_code(command); | ||
1241 | } | 1235 | } |
1242 | 1236 | ||
1243 | static void ftrace_shutdown_sysctl(void) | 1237 | static void ftrace_shutdown_sysctl(void) |
1244 | { | 1238 | { |
1245 | int command = FTRACE_DISABLE_MCOUNT; | ||
1246 | |||
1247 | if (unlikely(ftrace_disabled)) | 1239 | if (unlikely(ftrace_disabled)) |
1248 | return; | 1240 | return; |
1249 | 1241 | ||
1250 | /* ftrace_start_up is true if ftrace is running */ | 1242 | /* ftrace_start_up is true if ftrace is running */ |
1251 | if (ftrace_start_up) | 1243 | if (ftrace_start_up) |
1252 | command |= FTRACE_DISABLE_CALLS; | 1244 | ftrace_run_update_code(FTRACE_DISABLE_CALLS); |
1253 | |||
1254 | ftrace_run_update_code(command); | ||
1255 | } | 1245 | } |
1256 | 1246 | ||
1257 | static cycle_t ftrace_update_time; | 1247 | static cycle_t ftrace_update_time; |
@@ -1368,24 +1358,29 @@ enum { | |||
1368 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ | 1358 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ |
1369 | 1359 | ||
1370 | struct ftrace_iterator { | 1360 | struct ftrace_iterator { |
1371 | struct ftrace_page *pg; | 1361 | loff_t pos; |
1372 | int hidx; | 1362 | loff_t func_pos; |
1373 | int idx; | 1363 | struct ftrace_page *pg; |
1374 | unsigned flags; | 1364 | struct dyn_ftrace *func; |
1375 | struct trace_parser parser; | 1365 | struct ftrace_func_probe *probe; |
1366 | struct trace_parser parser; | ||
1367 | int hidx; | ||
1368 | int idx; | ||
1369 | unsigned flags; | ||
1376 | }; | 1370 | }; |
1377 | 1371 | ||
1378 | static void * | 1372 | static void * |
1379 | t_hash_next(struct seq_file *m, void *v, loff_t *pos) | 1373 | t_hash_next(struct seq_file *m, loff_t *pos) |
1380 | { | 1374 | { |
1381 | struct ftrace_iterator *iter = m->private; | 1375 | struct ftrace_iterator *iter = m->private; |
1382 | struct hlist_node *hnd = v; | 1376 | struct hlist_node *hnd = NULL; |
1383 | struct hlist_head *hhd; | 1377 | struct hlist_head *hhd; |
1384 | 1378 | ||
1385 | WARN_ON(!(iter->flags & FTRACE_ITER_HASH)); | ||
1386 | |||
1387 | (*pos)++; | 1379 | (*pos)++; |
1380 | iter->pos = *pos; | ||
1388 | 1381 | ||
1382 | if (iter->probe) | ||
1383 | hnd = &iter->probe->node; | ||
1389 | retry: | 1384 | retry: |
1390 | if (iter->hidx >= FTRACE_FUNC_HASHSIZE) | 1385 | if (iter->hidx >= FTRACE_FUNC_HASHSIZE) |
1391 | return NULL; | 1386 | return NULL; |
@@ -1408,7 +1403,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos) | |||
1408 | } | 1403 | } |
1409 | } | 1404 | } |
1410 | 1405 | ||
1411 | return hnd; | 1406 | if (WARN_ON_ONCE(!hnd)) |
1407 | return NULL; | ||
1408 | |||
1409 | iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node); | ||
1410 | |||
1411 | return iter; | ||
1412 | } | 1412 | } |
1413 | 1413 | ||
1414 | static void *t_hash_start(struct seq_file *m, loff_t *pos) | 1414 | static void *t_hash_start(struct seq_file *m, loff_t *pos) |
@@ -1417,26 +1417,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos) | |||
1417 | void *p = NULL; | 1417 | void *p = NULL; |
1418 | loff_t l; | 1418 | loff_t l; |
1419 | 1419 | ||
1420 | if (!(iter->flags & FTRACE_ITER_HASH)) | 1420 | if (iter->func_pos > *pos) |
1421 | *pos = 0; | 1421 | return NULL; |
1422 | |||
1423 | iter->flags |= FTRACE_ITER_HASH; | ||
1424 | 1422 | ||
1425 | iter->hidx = 0; | 1423 | iter->hidx = 0; |
1426 | for (l = 0; l <= *pos; ) { | 1424 | for (l = 0; l <= (*pos - iter->func_pos); ) { |
1427 | p = t_hash_next(m, p, &l); | 1425 | p = t_hash_next(m, &l); |
1428 | if (!p) | 1426 | if (!p) |
1429 | break; | 1427 | break; |
1430 | } | 1428 | } |
1431 | return p; | 1429 | if (!p) |
1430 | return NULL; | ||
1431 | |||
1432 | /* Only set this if we have an item */ | ||
1433 | iter->flags |= FTRACE_ITER_HASH; | ||
1434 | |||
1435 | return iter; | ||
1432 | } | 1436 | } |
1433 | 1437 | ||
1434 | static int t_hash_show(struct seq_file *m, void *v) | 1438 | static int |
1439 | t_hash_show(struct seq_file *m, struct ftrace_iterator *iter) | ||
1435 | { | 1440 | { |
1436 | struct ftrace_func_probe *rec; | 1441 | struct ftrace_func_probe *rec; |
1437 | struct hlist_node *hnd = v; | ||
1438 | 1442 | ||
1439 | rec = hlist_entry(hnd, struct ftrace_func_probe, node); | 1443 | rec = iter->probe; |
1444 | if (WARN_ON_ONCE(!rec)) | ||
1445 | return -EIO; | ||
1440 | 1446 | ||
1441 | if (rec->ops->print) | 1447 | if (rec->ops->print) |
1442 | return rec->ops->print(m, rec->ip, rec->ops, rec->data); | 1448 | return rec->ops->print(m, rec->ip, rec->ops, rec->data); |
@@ -1457,12 +1463,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
1457 | struct dyn_ftrace *rec = NULL; | 1463 | struct dyn_ftrace *rec = NULL; |
1458 | 1464 | ||
1459 | if (iter->flags & FTRACE_ITER_HASH) | 1465 | if (iter->flags & FTRACE_ITER_HASH) |
1460 | return t_hash_next(m, v, pos); | 1466 | return t_hash_next(m, pos); |
1461 | 1467 | ||
1462 | (*pos)++; | 1468 | (*pos)++; |
1469 | iter->pos = *pos; | ||
1463 | 1470 | ||
1464 | if (iter->flags & FTRACE_ITER_PRINTALL) | 1471 | if (iter->flags & FTRACE_ITER_PRINTALL) |
1465 | return NULL; | 1472 | return t_hash_start(m, pos); |
1466 | 1473 | ||
1467 | retry: | 1474 | retry: |
1468 | if (iter->idx >= iter->pg->index) { | 1475 | if (iter->idx >= iter->pg->index) { |
@@ -1491,7 +1498,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
1491 | } | 1498 | } |
1492 | } | 1499 | } |
1493 | 1500 | ||
1494 | return rec; | 1501 | if (!rec) |
1502 | return t_hash_start(m, pos); | ||
1503 | |||
1504 | iter->func_pos = *pos; | ||
1505 | iter->func = rec; | ||
1506 | |||
1507 | return iter; | ||
1508 | } | ||
1509 | |||
1510 | static void reset_iter_read(struct ftrace_iterator *iter) | ||
1511 | { | ||
1512 | iter->pos = 0; | ||
1513 | iter->func_pos = 0; | ||
1514 | iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH); | ||
1495 | } | 1515 | } |
1496 | 1516 | ||
1497 | static void *t_start(struct seq_file *m, loff_t *pos) | 1517 | static void *t_start(struct seq_file *m, loff_t *pos) |
@@ -1502,6 +1522,12 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
1502 | 1522 | ||
1503 | mutex_lock(&ftrace_lock); | 1523 | mutex_lock(&ftrace_lock); |
1504 | /* | 1524 | /* |
1525 | * If an lseek was done, then reset and start from beginning. | ||
1526 | */ | ||
1527 | if (*pos < iter->pos) | ||
1528 | reset_iter_read(iter); | ||
1529 | |||
1530 | /* | ||
1505 | * For set_ftrace_filter reading, if we have the filter | 1531 | * For set_ftrace_filter reading, if we have the filter |
1506 | * off, we can short cut and just print out that all | 1532 | * off, we can short cut and just print out that all |
1507 | * functions are enabled. | 1533 | * functions are enabled. |
@@ -1518,6 +1544,11 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
1518 | if (iter->flags & FTRACE_ITER_HASH) | 1544 | if (iter->flags & FTRACE_ITER_HASH) |
1519 | return t_hash_start(m, pos); | 1545 | return t_hash_start(m, pos); |
1520 | 1546 | ||
1547 | /* | ||
1548 | * Unfortunately, we need to restart at ftrace_pages_start | ||
1549 | * every time we let go of the ftrace_mutex. This is because | ||
1550 | * those pointers can change without the lock. | ||
1551 | */ | ||
1521 | iter->pg = ftrace_pages_start; | 1552 | iter->pg = ftrace_pages_start; |
1522 | iter->idx = 0; | 1553 | iter->idx = 0; |
1523 | for (l = 0; l <= *pos; ) { | 1554 | for (l = 0; l <= *pos; ) { |
@@ -1526,10 +1557,14 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
1526 | break; | 1557 | break; |
1527 | } | 1558 | } |
1528 | 1559 | ||
1529 | if (!p && iter->flags & FTRACE_ITER_FILTER) | 1560 | if (!p) { |
1530 | return t_hash_start(m, pos); | 1561 | if (iter->flags & FTRACE_ITER_FILTER) |
1562 | return t_hash_start(m, pos); | ||
1531 | 1563 | ||
1532 | return p; | 1564 | return NULL; |
1565 | } | ||
1566 | |||
1567 | return iter; | ||
1533 | } | 1568 | } |
1534 | 1569 | ||
1535 | static void t_stop(struct seq_file *m, void *p) | 1570 | static void t_stop(struct seq_file *m, void *p) |
@@ -1540,16 +1575,18 @@ static void t_stop(struct seq_file *m, void *p) | |||
1540 | static int t_show(struct seq_file *m, void *v) | 1575 | static int t_show(struct seq_file *m, void *v) |
1541 | { | 1576 | { |
1542 | struct ftrace_iterator *iter = m->private; | 1577 | struct ftrace_iterator *iter = m->private; |
1543 | struct dyn_ftrace *rec = v; | 1578 | struct dyn_ftrace *rec; |
1544 | 1579 | ||
1545 | if (iter->flags & FTRACE_ITER_HASH) | 1580 | if (iter->flags & FTRACE_ITER_HASH) |
1546 | return t_hash_show(m, v); | 1581 | return t_hash_show(m, iter); |
1547 | 1582 | ||
1548 | if (iter->flags & FTRACE_ITER_PRINTALL) { | 1583 | if (iter->flags & FTRACE_ITER_PRINTALL) { |
1549 | seq_printf(m, "#### all functions enabled ####\n"); | 1584 | seq_printf(m, "#### all functions enabled ####\n"); |
1550 | return 0; | 1585 | return 0; |
1551 | } | 1586 | } |
1552 | 1587 | ||
1588 | rec = iter->func; | ||
1589 | |||
1553 | if (!rec) | 1590 | if (!rec) |
1554 | return 0; | 1591 | return 0; |
1555 | 1592 | ||
@@ -1601,8 +1638,8 @@ ftrace_failures_open(struct inode *inode, struct file *file) | |||
1601 | 1638 | ||
1602 | ret = ftrace_avail_open(inode, file); | 1639 | ret = ftrace_avail_open(inode, file); |
1603 | if (!ret) { | 1640 | if (!ret) { |
1604 | m = (struct seq_file *)file->private_data; | 1641 | m = file->private_data; |
1605 | iter = (struct ftrace_iterator *)m->private; | 1642 | iter = m->private; |
1606 | iter->flags = FTRACE_ITER_FAILURES; | 1643 | iter->flags = FTRACE_ITER_FAILURES; |
1607 | } | 1644 | } |
1608 | 1645 | ||
@@ -2418,7 +2455,7 @@ static const struct file_operations ftrace_filter_fops = { | |||
2418 | .open = ftrace_filter_open, | 2455 | .open = ftrace_filter_open, |
2419 | .read = seq_read, | 2456 | .read = seq_read, |
2420 | .write = ftrace_filter_write, | 2457 | .write = ftrace_filter_write, |
2421 | .llseek = no_llseek, | 2458 | .llseek = ftrace_regex_lseek, |
2422 | .release = ftrace_filter_release, | 2459 | .release = ftrace_filter_release, |
2423 | }; | 2460 | }; |
2424 | 2461 | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bca96377fd4e..c5a632a669e1 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -2606,6 +2606,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | |||
2606 | } | 2606 | } |
2607 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | 2607 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); |
2608 | 2608 | ||
2609 | /* | ||
2610 | * The total entries in the ring buffer is the running counter | ||
2611 | * of entries entered into the ring buffer, minus the sum of | ||
2612 | * the entries read from the ring buffer and the number of | ||
2613 | * entries that were overwritten. | ||
2614 | */ | ||
2615 | static inline unsigned long | ||
2616 | rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) | ||
2617 | { | ||
2618 | return local_read(&cpu_buffer->entries) - | ||
2619 | (local_read(&cpu_buffer->overrun) + cpu_buffer->read); | ||
2620 | } | ||
2621 | |||
2609 | /** | 2622 | /** |
2610 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer | 2623 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer |
2611 | * @buffer: The ring buffer | 2624 | * @buffer: The ring buffer |
@@ -2614,16 +2627,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | |||
2614 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | 2627 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) |
2615 | { | 2628 | { |
2616 | struct ring_buffer_per_cpu *cpu_buffer; | 2629 | struct ring_buffer_per_cpu *cpu_buffer; |
2617 | unsigned long ret; | ||
2618 | 2630 | ||
2619 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2631 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2620 | return 0; | 2632 | return 0; |
2621 | 2633 | ||
2622 | cpu_buffer = buffer->buffers[cpu]; | 2634 | cpu_buffer = buffer->buffers[cpu]; |
2623 | ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun)) | ||
2624 | - cpu_buffer->read; | ||
2625 | 2635 | ||
2626 | return ret; | 2636 | return rb_num_of_entries(cpu_buffer); |
2627 | } | 2637 | } |
2628 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); | 2638 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); |
2629 | 2639 | ||
@@ -2684,8 +2694,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) | |||
2684 | /* if you care about this being correct, lock the buffer */ | 2694 | /* if you care about this being correct, lock the buffer */ |
2685 | for_each_buffer_cpu(buffer, cpu) { | 2695 | for_each_buffer_cpu(buffer, cpu) { |
2686 | cpu_buffer = buffer->buffers[cpu]; | 2696 | cpu_buffer = buffer->buffers[cpu]; |
2687 | entries += (local_read(&cpu_buffer->entries) - | 2697 | entries += rb_num_of_entries(cpu_buffer); |
2688 | local_read(&cpu_buffer->overrun)) - cpu_buffer->read; | ||
2689 | } | 2698 | } |
2690 | 2699 | ||
2691 | return entries; | 2700 | return entries; |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9ec59f541156..001bcd2ccf4a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -2196,7 +2196,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp) | |||
2196 | 2196 | ||
2197 | static int tracing_release(struct inode *inode, struct file *file) | 2197 | static int tracing_release(struct inode *inode, struct file *file) |
2198 | { | 2198 | { |
2199 | struct seq_file *m = (struct seq_file *)file->private_data; | 2199 | struct seq_file *m = file->private_data; |
2200 | struct trace_iterator *iter; | 2200 | struct trace_iterator *iter; |
2201 | int cpu; | 2201 | int cpu; |
2202 | 2202 | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d39b3c5454a5..9021f8c0c0c3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -343,6 +343,10 @@ void trace_function(struct trace_array *tr, | |||
343 | unsigned long ip, | 343 | unsigned long ip, |
344 | unsigned long parent_ip, | 344 | unsigned long parent_ip, |
345 | unsigned long flags, int pc); | 345 | unsigned long flags, int pc); |
346 | void trace_graph_function(struct trace_array *tr, | ||
347 | unsigned long ip, | ||
348 | unsigned long parent_ip, | ||
349 | unsigned long flags, int pc); | ||
346 | void trace_default_header(struct seq_file *m); | 350 | void trace_default_header(struct seq_file *m); |
347 | void print_trace_header(struct seq_file *m, struct trace_iterator *iter); | 351 | void print_trace_header(struct seq_file *m, struct trace_iterator *iter); |
348 | int trace_empty(struct trace_iterator *iter); | 352 | int trace_empty(struct trace_iterator *iter); |
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 31cc4cb0dbf2..39c059ca670e 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include <linux/kprobes.h> | 9 | #include <linux/kprobes.h> |
10 | #include "trace.h" | 10 | #include "trace.h" |
11 | 11 | ||
12 | static char *perf_trace_buf[4]; | 12 | static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * Force it to be aligned to unsigned long to avoid misaligned accesses | 15 | * Force it to be aligned to unsigned long to avoid misaligned accesses |
@@ -24,7 +24,7 @@ static int total_ref_count; | |||
24 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, | 24 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, |
25 | struct perf_event *p_event) | 25 | struct perf_event *p_event) |
26 | { | 26 | { |
27 | struct hlist_head *list; | 27 | struct hlist_head __percpu *list; |
28 | int ret = -ENOMEM; | 28 | int ret = -ENOMEM; |
29 | int cpu; | 29 | int cpu; |
30 | 30 | ||
@@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, | |||
42 | tp_event->perf_events = list; | 42 | tp_event->perf_events = list; |
43 | 43 | ||
44 | if (!total_ref_count) { | 44 | if (!total_ref_count) { |
45 | char *buf; | 45 | char __percpu *buf; |
46 | int i; | 46 | int i; |
47 | 47 | ||
48 | for (i = 0; i < 4; i++) { | 48 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { |
49 | buf = (char *)alloc_percpu(perf_trace_t); | 49 | buf = (char __percpu *)alloc_percpu(perf_trace_t); |
50 | if (!buf) | 50 | if (!buf) |
51 | goto fail; | 51 | goto fail; |
52 | 52 | ||
@@ -65,7 +65,7 @@ fail: | |||
65 | if (!total_ref_count) { | 65 | if (!total_ref_count) { |
66 | int i; | 66 | int i; |
67 | 67 | ||
68 | for (i = 0; i < 4; i++) { | 68 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { |
69 | free_percpu(perf_trace_buf[i]); | 69 | free_percpu(perf_trace_buf[i]); |
70 | perf_trace_buf[i] = NULL; | 70 | perf_trace_buf[i] = NULL; |
71 | } | 71 | } |
@@ -101,22 +101,26 @@ int perf_trace_init(struct perf_event *p_event) | |||
101 | return ret; | 101 | return ret; |
102 | } | 102 | } |
103 | 103 | ||
104 | int perf_trace_enable(struct perf_event *p_event) | 104 | int perf_trace_add(struct perf_event *p_event, int flags) |
105 | { | 105 | { |
106 | struct ftrace_event_call *tp_event = p_event->tp_event; | 106 | struct ftrace_event_call *tp_event = p_event->tp_event; |
107 | struct hlist_head __percpu *pcpu_list; | ||
107 | struct hlist_head *list; | 108 | struct hlist_head *list; |
108 | 109 | ||
109 | list = tp_event->perf_events; | 110 | pcpu_list = tp_event->perf_events; |
110 | if (WARN_ON_ONCE(!list)) | 111 | if (WARN_ON_ONCE(!pcpu_list)) |
111 | return -EINVAL; | 112 | return -EINVAL; |
112 | 113 | ||
113 | list = this_cpu_ptr(list); | 114 | if (!(flags & PERF_EF_START)) |
115 | p_event->hw.state = PERF_HES_STOPPED; | ||
116 | |||
117 | list = this_cpu_ptr(pcpu_list); | ||
114 | hlist_add_head_rcu(&p_event->hlist_entry, list); | 118 | hlist_add_head_rcu(&p_event->hlist_entry, list); |
115 | 119 | ||
116 | return 0; | 120 | return 0; |
117 | } | 121 | } |
118 | 122 | ||
119 | void perf_trace_disable(struct perf_event *p_event) | 123 | void perf_trace_del(struct perf_event *p_event, int flags) |
120 | { | 124 | { |
121 | hlist_del_rcu(&p_event->hlist_entry); | 125 | hlist_del_rcu(&p_event->hlist_entry); |
122 | } | 126 | } |
@@ -142,7 +146,7 @@ void perf_trace_destroy(struct perf_event *p_event) | |||
142 | tp_event->perf_events = NULL; | 146 | tp_event->perf_events = NULL; |
143 | 147 | ||
144 | if (!--total_ref_count) { | 148 | if (!--total_ref_count) { |
145 | for (i = 0; i < 4; i++) { | 149 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { |
146 | free_percpu(perf_trace_buf[i]); | 150 | free_percpu(perf_trace_buf[i]); |
147 | perf_trace_buf[i] = NULL; | 151 | perf_trace_buf[i] = NULL; |
148 | } | 152 | } |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4c758f146328..398c0e8b332c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -600,21 +600,29 @@ out: | |||
600 | 600 | ||
601 | enum { | 601 | enum { |
602 | FORMAT_HEADER = 1, | 602 | FORMAT_HEADER = 1, |
603 | FORMAT_PRINTFMT = 2, | 603 | FORMAT_FIELD_SEPERATOR = 2, |
604 | FORMAT_PRINTFMT = 3, | ||
604 | }; | 605 | }; |
605 | 606 | ||
606 | static void *f_next(struct seq_file *m, void *v, loff_t *pos) | 607 | static void *f_next(struct seq_file *m, void *v, loff_t *pos) |
607 | { | 608 | { |
608 | struct ftrace_event_call *call = m->private; | 609 | struct ftrace_event_call *call = m->private; |
609 | struct ftrace_event_field *field; | 610 | struct ftrace_event_field *field; |
610 | struct list_head *head; | 611 | struct list_head *common_head = &ftrace_common_fields; |
612 | struct list_head *head = trace_get_fields(call); | ||
611 | 613 | ||
612 | (*pos)++; | 614 | (*pos)++; |
613 | 615 | ||
614 | switch ((unsigned long)v) { | 616 | switch ((unsigned long)v) { |
615 | case FORMAT_HEADER: | 617 | case FORMAT_HEADER: |
616 | head = &ftrace_common_fields; | 618 | if (unlikely(list_empty(common_head))) |
619 | return NULL; | ||
620 | |||
621 | field = list_entry(common_head->prev, | ||
622 | struct ftrace_event_field, link); | ||
623 | return field; | ||
617 | 624 | ||
625 | case FORMAT_FIELD_SEPERATOR: | ||
618 | if (unlikely(list_empty(head))) | 626 | if (unlikely(list_empty(head))) |
619 | return NULL; | 627 | return NULL; |
620 | 628 | ||
@@ -626,31 +634,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos) | |||
626 | return NULL; | 634 | return NULL; |
627 | } | 635 | } |
628 | 636 | ||
629 | head = trace_get_fields(call); | ||
630 | |||
631 | /* | ||
632 | * To separate common fields from event fields, the | ||
633 | * LSB is set on the first event field. Clear it in case. | ||
634 | */ | ||
635 | v = (void *)((unsigned long)v & ~1L); | ||
636 | |||
637 | field = v; | 637 | field = v; |
638 | /* | 638 | if (field->link.prev == common_head) |
639 | * If this is a common field, and at the end of the list, then | 639 | return (void *)FORMAT_FIELD_SEPERATOR; |
640 | * continue with main list. | 640 | else if (field->link.prev == head) |
641 | */ | ||
642 | if (field->link.prev == &ftrace_common_fields) { | ||
643 | if (unlikely(list_empty(head))) | ||
644 | return NULL; | ||
645 | field = list_entry(head->prev, struct ftrace_event_field, link); | ||
646 | /* Set the LSB to notify f_show to print an extra newline */ | ||
647 | field = (struct ftrace_event_field *) | ||
648 | ((unsigned long)field | 1); | ||
649 | return field; | ||
650 | } | ||
651 | |||
652 | /* If we are done tell f_show to print the format */ | ||
653 | if (field->link.prev == head) | ||
654 | return (void *)FORMAT_PRINTFMT; | 641 | return (void *)FORMAT_PRINTFMT; |
655 | 642 | ||
656 | field = list_entry(field->link.prev, struct ftrace_event_field, link); | 643 | field = list_entry(field->link.prev, struct ftrace_event_field, link); |
@@ -688,22 +675,16 @@ static int f_show(struct seq_file *m, void *v) | |||
688 | seq_printf(m, "format:\n"); | 675 | seq_printf(m, "format:\n"); |
689 | return 0; | 676 | return 0; |
690 | 677 | ||
678 | case FORMAT_FIELD_SEPERATOR: | ||
679 | seq_putc(m, '\n'); | ||
680 | return 0; | ||
681 | |||
691 | case FORMAT_PRINTFMT: | 682 | case FORMAT_PRINTFMT: |
692 | seq_printf(m, "\nprint fmt: %s\n", | 683 | seq_printf(m, "\nprint fmt: %s\n", |
693 | call->print_fmt); | 684 | call->print_fmt); |
694 | return 0; | 685 | return 0; |
695 | } | 686 | } |
696 | 687 | ||
697 | /* | ||
698 | * To separate common fields from event fields, the | ||
699 | * LSB is set on the first event field. Clear it and | ||
700 | * print a newline if it is set. | ||
701 | */ | ||
702 | if ((unsigned long)v & 1) { | ||
703 | seq_putc(m, '\n'); | ||
704 | v = (void *)((unsigned long)v & ~1L); | ||
705 | } | ||
706 | |||
707 | field = v; | 688 | field = v; |
708 | 689 | ||
709 | /* | 690 | /* |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 6f233698518e..76b05980225c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -15,15 +15,19 @@ | |||
15 | #include "trace.h" | 15 | #include "trace.h" |
16 | #include "trace_output.h" | 16 | #include "trace_output.h" |
17 | 17 | ||
18 | /* When set, irq functions will be ignored */ | ||
19 | static int ftrace_graph_skip_irqs; | ||
20 | |||
18 | struct fgraph_cpu_data { | 21 | struct fgraph_cpu_data { |
19 | pid_t last_pid; | 22 | pid_t last_pid; |
20 | int depth; | 23 | int depth; |
24 | int depth_irq; | ||
21 | int ignore; | 25 | int ignore; |
22 | unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; | 26 | unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; |
23 | }; | 27 | }; |
24 | 28 | ||
25 | struct fgraph_data { | 29 | struct fgraph_data { |
26 | struct fgraph_cpu_data *cpu_data; | 30 | struct fgraph_cpu_data __percpu *cpu_data; |
27 | 31 | ||
28 | /* Place to preserve last processed entry. */ | 32 | /* Place to preserve last processed entry. */ |
29 | struct ftrace_graph_ent_entry ent; | 33 | struct ftrace_graph_ent_entry ent; |
@@ -41,6 +45,7 @@ struct fgraph_data { | |||
41 | #define TRACE_GRAPH_PRINT_PROC 0x8 | 45 | #define TRACE_GRAPH_PRINT_PROC 0x8 |
42 | #define TRACE_GRAPH_PRINT_DURATION 0x10 | 46 | #define TRACE_GRAPH_PRINT_DURATION 0x10 |
43 | #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 | 47 | #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 |
48 | #define TRACE_GRAPH_PRINT_IRQS 0x40 | ||
44 | 49 | ||
45 | static struct tracer_opt trace_opts[] = { | 50 | static struct tracer_opt trace_opts[] = { |
46 | /* Display overruns? (for self-debug purpose) */ | 51 | /* Display overruns? (for self-debug purpose) */ |
@@ -55,13 +60,15 @@ static struct tracer_opt trace_opts[] = { | |||
55 | { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, | 60 | { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, |
56 | /* Display absolute time of an entry */ | 61 | /* Display absolute time of an entry */ |
57 | { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, | 62 | { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, |
63 | /* Display interrupts */ | ||
64 | { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) }, | ||
58 | { } /* Empty entry */ | 65 | { } /* Empty entry */ |
59 | }; | 66 | }; |
60 | 67 | ||
61 | static struct tracer_flags tracer_flags = { | 68 | static struct tracer_flags tracer_flags = { |
62 | /* Don't display overruns and proc by default */ | 69 | /* Don't display overruns and proc by default */ |
63 | .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | | 70 | .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | |
64 | TRACE_GRAPH_PRINT_DURATION, | 71 | TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS, |
65 | .opts = trace_opts | 72 | .opts = trace_opts |
66 | }; | 73 | }; |
67 | 74 | ||
@@ -204,6 +211,14 @@ int __trace_graph_entry(struct trace_array *tr, | |||
204 | return 1; | 211 | return 1; |
205 | } | 212 | } |
206 | 213 | ||
214 | static inline int ftrace_graph_ignore_irqs(void) | ||
215 | { | ||
216 | if (!ftrace_graph_skip_irqs) | ||
217 | return 0; | ||
218 | |||
219 | return in_irq(); | ||
220 | } | ||
221 | |||
207 | int trace_graph_entry(struct ftrace_graph_ent *trace) | 222 | int trace_graph_entry(struct ftrace_graph_ent *trace) |
208 | { | 223 | { |
209 | struct trace_array *tr = graph_array; | 224 | struct trace_array *tr = graph_array; |
@@ -218,7 +233,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) | |||
218 | return 0; | 233 | return 0; |
219 | 234 | ||
220 | /* trace it when it is-nested-in or is a function enabled. */ | 235 | /* trace it when it is-nested-in or is a function enabled. */ |
221 | if (!(trace->depth || ftrace_graph_addr(trace->func))) | 236 | if (!(trace->depth || ftrace_graph_addr(trace->func)) || |
237 | ftrace_graph_ignore_irqs()) | ||
222 | return 0; | 238 | return 0; |
223 | 239 | ||
224 | local_irq_save(flags); | 240 | local_irq_save(flags); |
@@ -246,6 +262,34 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace) | |||
246 | return trace_graph_entry(trace); | 262 | return trace_graph_entry(trace); |
247 | } | 263 | } |
248 | 264 | ||
265 | static void | ||
266 | __trace_graph_function(struct trace_array *tr, | ||
267 | unsigned long ip, unsigned long flags, int pc) | ||
268 | { | ||
269 | u64 time = trace_clock_local(); | ||
270 | struct ftrace_graph_ent ent = { | ||
271 | .func = ip, | ||
272 | .depth = 0, | ||
273 | }; | ||
274 | struct ftrace_graph_ret ret = { | ||
275 | .func = ip, | ||
276 | .depth = 0, | ||
277 | .calltime = time, | ||
278 | .rettime = time, | ||
279 | }; | ||
280 | |||
281 | __trace_graph_entry(tr, &ent, flags, pc); | ||
282 | __trace_graph_return(tr, &ret, flags, pc); | ||
283 | } | ||
284 | |||
285 | void | ||
286 | trace_graph_function(struct trace_array *tr, | ||
287 | unsigned long ip, unsigned long parent_ip, | ||
288 | unsigned long flags, int pc) | ||
289 | { | ||
290 | __trace_graph_function(tr, ip, flags, pc); | ||
291 | } | ||
292 | |||
249 | void __trace_graph_return(struct trace_array *tr, | 293 | void __trace_graph_return(struct trace_array *tr, |
250 | struct ftrace_graph_ret *trace, | 294 | struct ftrace_graph_ret *trace, |
251 | unsigned long flags, | 295 | unsigned long flags, |
@@ -649,8 +693,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) | |||
649 | 693 | ||
650 | /* Print nsecs (we don't want to exceed 7 numbers) */ | 694 | /* Print nsecs (we don't want to exceed 7 numbers) */ |
651 | if (len < 7) { | 695 | if (len < 7) { |
652 | snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu", | 696 | size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len); |
653 | nsecs_rem); | 697 | |
698 | snprintf(nsecs_str, slen, "%03lu", nsecs_rem); | ||
654 | ret = trace_seq_printf(s, ".%s", nsecs_str); | 699 | ret = trace_seq_printf(s, ".%s", nsecs_str); |
655 | if (!ret) | 700 | if (!ret) |
656 | return TRACE_TYPE_PARTIAL_LINE; | 701 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -855,6 +900,108 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | |||
855 | return 0; | 900 | return 0; |
856 | } | 901 | } |
857 | 902 | ||
903 | /* | ||
904 | * Entry check for irq code | ||
905 | * | ||
906 | * returns 1 if | ||
907 | * - we are inside irq code | ||
908 | * - we just extered irq code | ||
909 | * | ||
910 | * retunns 0 if | ||
911 | * - funcgraph-interrupts option is set | ||
912 | * - we are not inside irq code | ||
913 | */ | ||
914 | static int | ||
915 | check_irq_entry(struct trace_iterator *iter, u32 flags, | ||
916 | unsigned long addr, int depth) | ||
917 | { | ||
918 | int cpu = iter->cpu; | ||
919 | int *depth_irq; | ||
920 | struct fgraph_data *data = iter->private; | ||
921 | |||
922 | /* | ||
923 | * If we are either displaying irqs, or we got called as | ||
924 | * a graph event and private data does not exist, | ||
925 | * then we bypass the irq check. | ||
926 | */ | ||
927 | if ((flags & TRACE_GRAPH_PRINT_IRQS) || | ||
928 | (!data)) | ||
929 | return 0; | ||
930 | |||
931 | depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
932 | |||
933 | /* | ||
934 | * We are inside the irq code | ||
935 | */ | ||
936 | if (*depth_irq >= 0) | ||
937 | return 1; | ||
938 | |||
939 | if ((addr < (unsigned long)__irqentry_text_start) || | ||
940 | (addr >= (unsigned long)__irqentry_text_end)) | ||
941 | return 0; | ||
942 | |||
943 | /* | ||
944 | * We are entering irq code. | ||
945 | */ | ||
946 | *depth_irq = depth; | ||
947 | return 1; | ||
948 | } | ||
949 | |||
950 | /* | ||
951 | * Return check for irq code | ||
952 | * | ||
953 | * returns 1 if | ||
954 | * - we are inside irq code | ||
955 | * - we just left irq code | ||
956 | * | ||
957 | * returns 0 if | ||
958 | * - funcgraph-interrupts option is set | ||
959 | * - we are not inside irq code | ||
960 | */ | ||
961 | static int | ||
962 | check_irq_return(struct trace_iterator *iter, u32 flags, int depth) | ||
963 | { | ||
964 | int cpu = iter->cpu; | ||
965 | int *depth_irq; | ||
966 | struct fgraph_data *data = iter->private; | ||
967 | |||
968 | /* | ||
969 | * If we are either displaying irqs, or we got called as | ||
970 | * a graph event and private data does not exist, | ||
971 | * then we bypass the irq check. | ||
972 | */ | ||
973 | if ((flags & TRACE_GRAPH_PRINT_IRQS) || | ||
974 | (!data)) | ||
975 | return 0; | ||
976 | |||
977 | depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
978 | |||
979 | /* | ||
980 | * We are not inside the irq code. | ||
981 | */ | ||
982 | if (*depth_irq == -1) | ||
983 | return 0; | ||
984 | |||
985 | /* | ||
986 | * We are inside the irq code, and this is returning entry. | ||
987 | * Let's not trace it and clear the entry depth, since | ||
988 | * we are out of irq code. | ||
989 | * | ||
990 | * This condition ensures that we 'leave the irq code' once | ||
991 | * we are out of the entry depth. Thus protecting us from | ||
992 | * the RETURN entry loss. | ||
993 | */ | ||
994 | if (*depth_irq >= depth) { | ||
995 | *depth_irq = -1; | ||
996 | return 1; | ||
997 | } | ||
998 | |||
999 | /* | ||
1000 | * We are inside the irq code, and this is not the entry. | ||
1001 | */ | ||
1002 | return 1; | ||
1003 | } | ||
1004 | |||
858 | static enum print_line_t | 1005 | static enum print_line_t |
859 | print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, | 1006 | print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, |
860 | struct trace_iterator *iter, u32 flags) | 1007 | struct trace_iterator *iter, u32 flags) |
@@ -865,6 +1012,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, | |||
865 | static enum print_line_t ret; | 1012 | static enum print_line_t ret; |
866 | int cpu = iter->cpu; | 1013 | int cpu = iter->cpu; |
867 | 1014 | ||
1015 | if (check_irq_entry(iter, flags, call->func, call->depth)) | ||
1016 | return TRACE_TYPE_HANDLED; | ||
1017 | |||
868 | if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) | 1018 | if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) |
869 | return TRACE_TYPE_PARTIAL_LINE; | 1019 | return TRACE_TYPE_PARTIAL_LINE; |
870 | 1020 | ||
@@ -902,6 +1052,9 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
902 | int ret; | 1052 | int ret; |
903 | int i; | 1053 | int i; |
904 | 1054 | ||
1055 | if (check_irq_return(iter, flags, trace->depth)) | ||
1056 | return TRACE_TYPE_HANDLED; | ||
1057 | |||
905 | if (data) { | 1058 | if (data) { |
906 | struct fgraph_cpu_data *cpu_data; | 1059 | struct fgraph_cpu_data *cpu_data; |
907 | int cpu = iter->cpu; | 1060 | int cpu = iter->cpu; |
@@ -1054,7 +1207,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
1054 | 1207 | ||
1055 | 1208 | ||
1056 | enum print_line_t | 1209 | enum print_line_t |
1057 | print_graph_function_flags(struct trace_iterator *iter, u32 flags) | 1210 | __print_graph_function_flags(struct trace_iterator *iter, u32 flags) |
1058 | { | 1211 | { |
1059 | struct ftrace_graph_ent_entry *field; | 1212 | struct ftrace_graph_ent_entry *field; |
1060 | struct fgraph_data *data = iter->private; | 1213 | struct fgraph_data *data = iter->private; |
@@ -1117,7 +1270,18 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) | |||
1117 | static enum print_line_t | 1270 | static enum print_line_t |
1118 | print_graph_function(struct trace_iterator *iter) | 1271 | print_graph_function(struct trace_iterator *iter) |
1119 | { | 1272 | { |
1120 | return print_graph_function_flags(iter, tracer_flags.val); | 1273 | return __print_graph_function_flags(iter, tracer_flags.val); |
1274 | } | ||
1275 | |||
1276 | enum print_line_t print_graph_function_flags(struct trace_iterator *iter, | ||
1277 | u32 flags) | ||
1278 | { | ||
1279 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | ||
1280 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
1281 | else | ||
1282 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
1283 | |||
1284 | return __print_graph_function_flags(iter, flags); | ||
1121 | } | 1285 | } |
1122 | 1286 | ||
1123 | static enum print_line_t | 1287 | static enum print_line_t |
@@ -1149,7 +1313,7 @@ static void print_lat_header(struct seq_file *s, u32 flags) | |||
1149 | seq_printf(s, "#%.*s|||| / \n", size, spaces); | 1313 | seq_printf(s, "#%.*s|||| / \n", size, spaces); |
1150 | } | 1314 | } |
1151 | 1315 | ||
1152 | void print_graph_headers_flags(struct seq_file *s, u32 flags) | 1316 | static void __print_graph_headers_flags(struct seq_file *s, u32 flags) |
1153 | { | 1317 | { |
1154 | int lat = trace_flags & TRACE_ITER_LATENCY_FMT; | 1318 | int lat = trace_flags & TRACE_ITER_LATENCY_FMT; |
1155 | 1319 | ||
@@ -1190,6 +1354,23 @@ void print_graph_headers(struct seq_file *s) | |||
1190 | print_graph_headers_flags(s, tracer_flags.val); | 1354 | print_graph_headers_flags(s, tracer_flags.val); |
1191 | } | 1355 | } |
1192 | 1356 | ||
1357 | void print_graph_headers_flags(struct seq_file *s, u32 flags) | ||
1358 | { | ||
1359 | struct trace_iterator *iter = s->private; | ||
1360 | |||
1361 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | ||
1362 | /* print nothing if the buffers are empty */ | ||
1363 | if (trace_empty(iter)) | ||
1364 | return; | ||
1365 | |||
1366 | print_trace_header(s, iter); | ||
1367 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
1368 | } else | ||
1369 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
1370 | |||
1371 | __print_graph_headers_flags(s, flags); | ||
1372 | } | ||
1373 | |||
1193 | void graph_trace_open(struct trace_iterator *iter) | 1374 | void graph_trace_open(struct trace_iterator *iter) |
1194 | { | 1375 | { |
1195 | /* pid and depth on the last trace processed */ | 1376 | /* pid and depth on the last trace processed */ |
@@ -1210,9 +1391,12 @@ void graph_trace_open(struct trace_iterator *iter) | |||
1210 | pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); | 1391 | pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); |
1211 | int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); | 1392 | int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); |
1212 | int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); | 1393 | int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); |
1394 | int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
1395 | |||
1213 | *pid = -1; | 1396 | *pid = -1; |
1214 | *depth = 0; | 1397 | *depth = 0; |
1215 | *ignore = 0; | 1398 | *ignore = 0; |
1399 | *depth_irq = -1; | ||
1216 | } | 1400 | } |
1217 | 1401 | ||
1218 | iter->private = data; | 1402 | iter->private = data; |
@@ -1235,6 +1419,14 @@ void graph_trace_close(struct trace_iterator *iter) | |||
1235 | } | 1419 | } |
1236 | } | 1420 | } |
1237 | 1421 | ||
1422 | static int func_graph_set_flag(u32 old_flags, u32 bit, int set) | ||
1423 | { | ||
1424 | if (bit == TRACE_GRAPH_PRINT_IRQS) | ||
1425 | ftrace_graph_skip_irqs = !set; | ||
1426 | |||
1427 | return 0; | ||
1428 | } | ||
1429 | |||
1238 | static struct trace_event_functions graph_functions = { | 1430 | static struct trace_event_functions graph_functions = { |
1239 | .trace = print_graph_function_event, | 1431 | .trace = print_graph_function_event, |
1240 | }; | 1432 | }; |
@@ -1261,6 +1453,7 @@ static struct tracer graph_trace __read_mostly = { | |||
1261 | .print_line = print_graph_function, | 1453 | .print_line = print_graph_function, |
1262 | .print_header = print_graph_headers, | 1454 | .print_header = print_graph_headers, |
1263 | .flags = &tracer_flags, | 1455 | .flags = &tracer_flags, |
1456 | .set_flag = func_graph_set_flag, | ||
1264 | #ifdef CONFIG_FTRACE_SELFTEST | 1457 | #ifdef CONFIG_FTRACE_SELFTEST |
1265 | .selftest = trace_selftest_startup_function_graph, | 1458 | .selftest = trace_selftest_startup_function_graph, |
1266 | #endif | 1459 | #endif |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 73a6b0601f2e..5cf8c602b880 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
@@ -87,14 +87,22 @@ static __cacheline_aligned_in_smp unsigned long max_sequence; | |||
87 | 87 | ||
88 | #ifdef CONFIG_FUNCTION_TRACER | 88 | #ifdef CONFIG_FUNCTION_TRACER |
89 | /* | 89 | /* |
90 | * irqsoff uses its own tracer function to keep the overhead down: | 90 | * Prologue for the preempt and irqs off function tracers. |
91 | * | ||
92 | * Returns 1 if it is OK to continue, and data->disabled is | ||
93 | * incremented. | ||
94 | * 0 if the trace is to be ignored, and data->disabled | ||
95 | * is kept the same. | ||
96 | * | ||
97 | * Note, this function is also used outside this ifdef but | ||
98 | * inside the #ifdef of the function graph tracer below. | ||
99 | * This is OK, since the function graph tracer is | ||
100 | * dependent on the function tracer. | ||
91 | */ | 101 | */ |
92 | static void | 102 | static int func_prolog_dec(struct trace_array *tr, |
93 | irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | 103 | struct trace_array_cpu **data, |
104 | unsigned long *flags) | ||
94 | { | 105 | { |
95 | struct trace_array *tr = irqsoff_trace; | ||
96 | struct trace_array_cpu *data; | ||
97 | unsigned long flags; | ||
98 | long disabled; | 106 | long disabled; |
99 | int cpu; | 107 | int cpu; |
100 | 108 | ||
@@ -106,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | |||
106 | */ | 114 | */ |
107 | cpu = raw_smp_processor_id(); | 115 | cpu = raw_smp_processor_id(); |
108 | if (likely(!per_cpu(tracing_cpu, cpu))) | 116 | if (likely(!per_cpu(tracing_cpu, cpu))) |
109 | return; | 117 | return 0; |
110 | 118 | ||
111 | local_save_flags(flags); | 119 | local_save_flags(*flags); |
112 | /* slight chance to get a false positive on tracing_cpu */ | 120 | /* slight chance to get a false positive on tracing_cpu */ |
113 | if (!irqs_disabled_flags(flags)) | 121 | if (!irqs_disabled_flags(*flags)) |
114 | return; | 122 | return 0; |
115 | 123 | ||
116 | data = tr->data[cpu]; | 124 | *data = tr->data[cpu]; |
117 | disabled = atomic_inc_return(&data->disabled); | 125 | disabled = atomic_inc_return(&(*data)->disabled); |
118 | 126 | ||
119 | if (likely(disabled == 1)) | 127 | if (likely(disabled == 1)) |
120 | trace_function(tr, ip, parent_ip, flags, preempt_count()); | 128 | return 1; |
129 | |||
130 | atomic_dec(&(*data)->disabled); | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * irqsoff uses its own tracer function to keep the overhead down: | ||
137 | */ | ||
138 | static void | ||
139 | irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | ||
140 | { | ||
141 | struct trace_array *tr = irqsoff_trace; | ||
142 | struct trace_array_cpu *data; | ||
143 | unsigned long flags; | ||
144 | |||
145 | if (!func_prolog_dec(tr, &data, &flags)) | ||
146 | return; | ||
147 | |||
148 | trace_function(tr, ip, parent_ip, flags, preempt_count()); | ||
121 | 149 | ||
122 | atomic_dec(&data->disabled); | 150 | atomic_dec(&data->disabled); |
123 | } | 151 | } |
@@ -155,30 +183,16 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) | |||
155 | struct trace_array *tr = irqsoff_trace; | 183 | struct trace_array *tr = irqsoff_trace; |
156 | struct trace_array_cpu *data; | 184 | struct trace_array_cpu *data; |
157 | unsigned long flags; | 185 | unsigned long flags; |
158 | long disabled; | ||
159 | int ret; | 186 | int ret; |
160 | int cpu; | ||
161 | int pc; | 187 | int pc; |
162 | 188 | ||
163 | cpu = raw_smp_processor_id(); | 189 | if (!func_prolog_dec(tr, &data, &flags)) |
164 | if (likely(!per_cpu(tracing_cpu, cpu))) | ||
165 | return 0; | 190 | return 0; |
166 | 191 | ||
167 | local_save_flags(flags); | 192 | pc = preempt_count(); |
168 | /* slight chance to get a false positive on tracing_cpu */ | 193 | ret = __trace_graph_entry(tr, trace, flags, pc); |
169 | if (!irqs_disabled_flags(flags)) | ||
170 | return 0; | ||
171 | |||
172 | data = tr->data[cpu]; | ||
173 | disabled = atomic_inc_return(&data->disabled); | ||
174 | |||
175 | if (likely(disabled == 1)) { | ||
176 | pc = preempt_count(); | ||
177 | ret = __trace_graph_entry(tr, trace, flags, pc); | ||
178 | } else | ||
179 | ret = 0; | ||
180 | |||
181 | atomic_dec(&data->disabled); | 194 | atomic_dec(&data->disabled); |
195 | |||
182 | return ret; | 196 | return ret; |
183 | } | 197 | } |
184 | 198 | ||
@@ -187,27 +201,13 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace) | |||
187 | struct trace_array *tr = irqsoff_trace; | 201 | struct trace_array *tr = irqsoff_trace; |
188 | struct trace_array_cpu *data; | 202 | struct trace_array_cpu *data; |
189 | unsigned long flags; | 203 | unsigned long flags; |
190 | long disabled; | ||
191 | int cpu; | ||
192 | int pc; | 204 | int pc; |
193 | 205 | ||
194 | cpu = raw_smp_processor_id(); | 206 | if (!func_prolog_dec(tr, &data, &flags)) |
195 | if (likely(!per_cpu(tracing_cpu, cpu))) | ||
196 | return; | 207 | return; |
197 | 208 | ||
198 | local_save_flags(flags); | 209 | pc = preempt_count(); |
199 | /* slight chance to get a false positive on tracing_cpu */ | 210 | __trace_graph_return(tr, trace, flags, pc); |
200 | if (!irqs_disabled_flags(flags)) | ||
201 | return; | ||
202 | |||
203 | data = tr->data[cpu]; | ||
204 | disabled = atomic_inc_return(&data->disabled); | ||
205 | |||
206 | if (likely(disabled == 1)) { | ||
207 | pc = preempt_count(); | ||
208 | __trace_graph_return(tr, trace, flags, pc); | ||
209 | } | ||
210 | |||
211 | atomic_dec(&data->disabled); | 211 | atomic_dec(&data->disabled); |
212 | } | 212 | } |
213 | 213 | ||
@@ -229,75 +229,33 @@ static void irqsoff_trace_close(struct trace_iterator *iter) | |||
229 | 229 | ||
230 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) | 230 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) |
231 | { | 231 | { |
232 | u32 flags = GRAPH_TRACER_FLAGS; | ||
233 | |||
234 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | ||
235 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
236 | else | ||
237 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
238 | |||
239 | /* | 232 | /* |
240 | * In graph mode call the graph tracer output function, | 233 | * In graph mode call the graph tracer output function, |
241 | * otherwise go with the TRACE_FN event handler | 234 | * otherwise go with the TRACE_FN event handler |
242 | */ | 235 | */ |
243 | if (is_graph()) | 236 | if (is_graph()) |
244 | return print_graph_function_flags(iter, flags); | 237 | return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS); |
245 | 238 | ||
246 | return TRACE_TYPE_UNHANDLED; | 239 | return TRACE_TYPE_UNHANDLED; |
247 | } | 240 | } |
248 | 241 | ||
249 | static void irqsoff_print_header(struct seq_file *s) | 242 | static void irqsoff_print_header(struct seq_file *s) |
250 | { | 243 | { |
251 | if (is_graph()) { | 244 | if (is_graph()) |
252 | struct trace_iterator *iter = s->private; | 245 | print_graph_headers_flags(s, GRAPH_TRACER_FLAGS); |
253 | u32 flags = GRAPH_TRACER_FLAGS; | 246 | else |
254 | |||
255 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | ||
256 | /* print nothing if the buffers are empty */ | ||
257 | if (trace_empty(iter)) | ||
258 | return; | ||
259 | |||
260 | print_trace_header(s, iter); | ||
261 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
262 | } else | ||
263 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
264 | |||
265 | print_graph_headers_flags(s, flags); | ||
266 | } else | ||
267 | trace_default_header(s); | 247 | trace_default_header(s); |
268 | } | 248 | } |
269 | 249 | ||
270 | static void | 250 | static void |
271 | trace_graph_function(struct trace_array *tr, | ||
272 | unsigned long ip, unsigned long flags, int pc) | ||
273 | { | ||
274 | u64 time = trace_clock_local(); | ||
275 | struct ftrace_graph_ent ent = { | ||
276 | .func = ip, | ||
277 | .depth = 0, | ||
278 | }; | ||
279 | struct ftrace_graph_ret ret = { | ||
280 | .func = ip, | ||
281 | .depth = 0, | ||
282 | .calltime = time, | ||
283 | .rettime = time, | ||
284 | }; | ||
285 | |||
286 | __trace_graph_entry(tr, &ent, flags, pc); | ||
287 | __trace_graph_return(tr, &ret, flags, pc); | ||
288 | } | ||
289 | |||
290 | static void | ||
291 | __trace_function(struct trace_array *tr, | 251 | __trace_function(struct trace_array *tr, |
292 | unsigned long ip, unsigned long parent_ip, | 252 | unsigned long ip, unsigned long parent_ip, |
293 | unsigned long flags, int pc) | 253 | unsigned long flags, int pc) |
294 | { | 254 | { |
295 | if (!is_graph()) | 255 | if (is_graph()) |
256 | trace_graph_function(tr, ip, parent_ip, flags, pc); | ||
257 | else | ||
296 | trace_function(tr, ip, parent_ip, flags, pc); | 258 | trace_function(tr, ip, parent_ip, flags, pc); |
297 | else { | ||
298 | trace_graph_function(tr, parent_ip, flags, pc); | ||
299 | trace_graph_function(tr, ip, flags, pc); | ||
300 | } | ||
301 | } | 259 | } |
302 | 260 | ||
303 | #else | 261 | #else |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 4086eae6e81b..7319559ed59f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
@@ -31,48 +31,98 @@ static int wakeup_rt; | |||
31 | static arch_spinlock_t wakeup_lock = | 31 | static arch_spinlock_t wakeup_lock = |
32 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | 32 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
33 | 33 | ||
34 | static void wakeup_reset(struct trace_array *tr); | ||
34 | static void __wakeup_reset(struct trace_array *tr); | 35 | static void __wakeup_reset(struct trace_array *tr); |
36 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace); | ||
37 | static void wakeup_graph_return(struct ftrace_graph_ret *trace); | ||
35 | 38 | ||
36 | static int save_lat_flag; | 39 | static int save_lat_flag; |
37 | 40 | ||
41 | #define TRACE_DISPLAY_GRAPH 1 | ||
42 | |||
43 | static struct tracer_opt trace_opts[] = { | ||
44 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
45 | /* display latency trace as call graph */ | ||
46 | { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) }, | ||
47 | #endif | ||
48 | { } /* Empty entry */ | ||
49 | }; | ||
50 | |||
51 | static struct tracer_flags tracer_flags = { | ||
52 | .val = 0, | ||
53 | .opts = trace_opts, | ||
54 | }; | ||
55 | |||
56 | #define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH) | ||
57 | |||
38 | #ifdef CONFIG_FUNCTION_TRACER | 58 | #ifdef CONFIG_FUNCTION_TRACER |
59 | |||
39 | /* | 60 | /* |
40 | * irqsoff uses its own tracer function to keep the overhead down: | 61 | * Prologue for the wakeup function tracers. |
62 | * | ||
63 | * Returns 1 if it is OK to continue, and preemption | ||
64 | * is disabled and data->disabled is incremented. | ||
65 | * 0 if the trace is to be ignored, and preemption | ||
66 | * is not disabled and data->disabled is | ||
67 | * kept the same. | ||
68 | * | ||
69 | * Note, this function is also used outside this ifdef but | ||
70 | * inside the #ifdef of the function graph tracer below. | ||
71 | * This is OK, since the function graph tracer is | ||
72 | * dependent on the function tracer. | ||
41 | */ | 73 | */ |
42 | static void | 74 | static int |
43 | wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) | 75 | func_prolog_preempt_disable(struct trace_array *tr, |
76 | struct trace_array_cpu **data, | ||
77 | int *pc) | ||
44 | { | 78 | { |
45 | struct trace_array *tr = wakeup_trace; | ||
46 | struct trace_array_cpu *data; | ||
47 | unsigned long flags; | ||
48 | long disabled; | 79 | long disabled; |
49 | int cpu; | 80 | int cpu; |
50 | int pc; | ||
51 | 81 | ||
52 | if (likely(!wakeup_task)) | 82 | if (likely(!wakeup_task)) |
53 | return; | 83 | return 0; |
54 | 84 | ||
55 | pc = preempt_count(); | 85 | *pc = preempt_count(); |
56 | preempt_disable_notrace(); | 86 | preempt_disable_notrace(); |
57 | 87 | ||
58 | cpu = raw_smp_processor_id(); | 88 | cpu = raw_smp_processor_id(); |
59 | if (cpu != wakeup_current_cpu) | 89 | if (cpu != wakeup_current_cpu) |
60 | goto out_enable; | 90 | goto out_enable; |
61 | 91 | ||
62 | data = tr->data[cpu]; | 92 | *data = tr->data[cpu]; |
63 | disabled = atomic_inc_return(&data->disabled); | 93 | disabled = atomic_inc_return(&(*data)->disabled); |
64 | if (unlikely(disabled != 1)) | 94 | if (unlikely(disabled != 1)) |
65 | goto out; | 95 | goto out; |
66 | 96 | ||
67 | local_irq_save(flags); | 97 | return 1; |
68 | 98 | ||
69 | trace_function(tr, ip, parent_ip, flags, pc); | 99 | out: |
100 | atomic_dec(&(*data)->disabled); | ||
101 | |||
102 | out_enable: | ||
103 | preempt_enable_notrace(); | ||
104 | return 0; | ||
105 | } | ||
70 | 106 | ||
107 | /* | ||
108 | * wakeup uses its own tracer function to keep the overhead down: | ||
109 | */ | ||
110 | static void | ||
111 | wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) | ||
112 | { | ||
113 | struct trace_array *tr = wakeup_trace; | ||
114 | struct trace_array_cpu *data; | ||
115 | unsigned long flags; | ||
116 | int pc; | ||
117 | |||
118 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
119 | return; | ||
120 | |||
121 | local_irq_save(flags); | ||
122 | trace_function(tr, ip, parent_ip, flags, pc); | ||
71 | local_irq_restore(flags); | 123 | local_irq_restore(flags); |
72 | 124 | ||
73 | out: | ||
74 | atomic_dec(&data->disabled); | 125 | atomic_dec(&data->disabled); |
75 | out_enable: | ||
76 | preempt_enable_notrace(); | 126 | preempt_enable_notrace(); |
77 | } | 127 | } |
78 | 128 | ||
@@ -82,6 +132,156 @@ static struct ftrace_ops trace_ops __read_mostly = | |||
82 | }; | 132 | }; |
83 | #endif /* CONFIG_FUNCTION_TRACER */ | 133 | #endif /* CONFIG_FUNCTION_TRACER */ |
84 | 134 | ||
135 | static int start_func_tracer(int graph) | ||
136 | { | ||
137 | int ret; | ||
138 | |||
139 | if (!graph) | ||
140 | ret = register_ftrace_function(&trace_ops); | ||
141 | else | ||
142 | ret = register_ftrace_graph(&wakeup_graph_return, | ||
143 | &wakeup_graph_entry); | ||
144 | |||
145 | if (!ret && tracing_is_enabled()) | ||
146 | tracer_enabled = 1; | ||
147 | else | ||
148 | tracer_enabled = 0; | ||
149 | |||
150 | return ret; | ||
151 | } | ||
152 | |||
153 | static void stop_func_tracer(int graph) | ||
154 | { | ||
155 | tracer_enabled = 0; | ||
156 | |||
157 | if (!graph) | ||
158 | unregister_ftrace_function(&trace_ops); | ||
159 | else | ||
160 | unregister_ftrace_graph(); | ||
161 | } | ||
162 | |||
163 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
164 | static int wakeup_set_flag(u32 old_flags, u32 bit, int set) | ||
165 | { | ||
166 | |||
167 | if (!(bit & TRACE_DISPLAY_GRAPH)) | ||
168 | return -EINVAL; | ||
169 | |||
170 | if (!(is_graph() ^ set)) | ||
171 | return 0; | ||
172 | |||
173 | stop_func_tracer(!set); | ||
174 | |||
175 | wakeup_reset(wakeup_trace); | ||
176 | tracing_max_latency = 0; | ||
177 | |||
178 | return start_func_tracer(set); | ||
179 | } | ||
180 | |||
181 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace) | ||
182 | { | ||
183 | struct trace_array *tr = wakeup_trace; | ||
184 | struct trace_array_cpu *data; | ||
185 | unsigned long flags; | ||
186 | int pc, ret = 0; | ||
187 | |||
188 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
189 | return 0; | ||
190 | |||
191 | local_save_flags(flags); | ||
192 | ret = __trace_graph_entry(tr, trace, flags, pc); | ||
193 | atomic_dec(&data->disabled); | ||
194 | preempt_enable_notrace(); | ||
195 | |||
196 | return ret; | ||
197 | } | ||
198 | |||
199 | static void wakeup_graph_return(struct ftrace_graph_ret *trace) | ||
200 | { | ||
201 | struct trace_array *tr = wakeup_trace; | ||
202 | struct trace_array_cpu *data; | ||
203 | unsigned long flags; | ||
204 | int pc; | ||
205 | |||
206 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
207 | return; | ||
208 | |||
209 | local_save_flags(flags); | ||
210 | __trace_graph_return(tr, trace, flags, pc); | ||
211 | atomic_dec(&data->disabled); | ||
212 | |||
213 | preempt_enable_notrace(); | ||
214 | return; | ||
215 | } | ||
216 | |||
217 | static void wakeup_trace_open(struct trace_iterator *iter) | ||
218 | { | ||
219 | if (is_graph()) | ||
220 | graph_trace_open(iter); | ||
221 | } | ||
222 | |||
223 | static void wakeup_trace_close(struct trace_iterator *iter) | ||
224 | { | ||
225 | if (iter->private) | ||
226 | graph_trace_close(iter); | ||
227 | } | ||
228 | |||
229 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC) | ||
230 | |||
231 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) | ||
232 | { | ||
233 | /* | ||
234 | * In graph mode call the graph tracer output function, | ||
235 | * otherwise go with the TRACE_FN event handler | ||
236 | */ | ||
237 | if (is_graph()) | ||
238 | return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS); | ||
239 | |||
240 | return TRACE_TYPE_UNHANDLED; | ||
241 | } | ||
242 | |||
243 | static void wakeup_print_header(struct seq_file *s) | ||
244 | { | ||
245 | if (is_graph()) | ||
246 | print_graph_headers_flags(s, GRAPH_TRACER_FLAGS); | ||
247 | else | ||
248 | trace_default_header(s); | ||
249 | } | ||
250 | |||
251 | static void | ||
252 | __trace_function(struct trace_array *tr, | ||
253 | unsigned long ip, unsigned long parent_ip, | ||
254 | unsigned long flags, int pc) | ||
255 | { | ||
256 | if (is_graph()) | ||
257 | trace_graph_function(tr, ip, parent_ip, flags, pc); | ||
258 | else | ||
259 | trace_function(tr, ip, parent_ip, flags, pc); | ||
260 | } | ||
261 | #else | ||
262 | #define __trace_function trace_function | ||
263 | |||
264 | static int wakeup_set_flag(u32 old_flags, u32 bit, int set) | ||
265 | { | ||
266 | return -EINVAL; | ||
267 | } | ||
268 | |||
269 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace) | ||
270 | { | ||
271 | return -1; | ||
272 | } | ||
273 | |||
274 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) | ||
275 | { | ||
276 | return TRACE_TYPE_UNHANDLED; | ||
277 | } | ||
278 | |||
279 | static void wakeup_graph_return(struct ftrace_graph_ret *trace) { } | ||
280 | static void wakeup_print_header(struct seq_file *s) { } | ||
281 | static void wakeup_trace_open(struct trace_iterator *iter) { } | ||
282 | static void wakeup_trace_close(struct trace_iterator *iter) { } | ||
283 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
284 | |||
85 | /* | 285 | /* |
86 | * Should this new latency be reported/recorded? | 286 | * Should this new latency be reported/recorded? |
87 | */ | 287 | */ |
@@ -152,7 +352,7 @@ probe_wakeup_sched_switch(void *ignore, | |||
152 | /* The task we are waiting for is waking up */ | 352 | /* The task we are waiting for is waking up */ |
153 | data = wakeup_trace->data[wakeup_cpu]; | 353 | data = wakeup_trace->data[wakeup_cpu]; |
154 | 354 | ||
155 | trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); | 355 | __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); |
156 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); | 356 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); |
157 | 357 | ||
158 | T0 = data->preempt_timestamp; | 358 | T0 = data->preempt_timestamp; |
@@ -252,7 +452,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) | |||
252 | * is not called by an assembly function (where as schedule is) | 452 | * is not called by an assembly function (where as schedule is) |
253 | * it should be safe to use it here. | 453 | * it should be safe to use it here. |
254 | */ | 454 | */ |
255 | trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); | 455 | __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); |
256 | 456 | ||
257 | out_locked: | 457 | out_locked: |
258 | arch_spin_unlock(&wakeup_lock); | 458 | arch_spin_unlock(&wakeup_lock); |
@@ -303,12 +503,8 @@ static void start_wakeup_tracer(struct trace_array *tr) | |||
303 | */ | 503 | */ |
304 | smp_wmb(); | 504 | smp_wmb(); |
305 | 505 | ||
306 | register_ftrace_function(&trace_ops); | 506 | if (start_func_tracer(is_graph())) |
307 | 507 | printk(KERN_ERR "failed to start wakeup tracer\n"); | |
308 | if (tracing_is_enabled()) | ||
309 | tracer_enabled = 1; | ||
310 | else | ||
311 | tracer_enabled = 0; | ||
312 | 508 | ||
313 | return; | 509 | return; |
314 | fail_deprobe_wake_new: | 510 | fail_deprobe_wake_new: |
@@ -320,7 +516,7 @@ fail_deprobe: | |||
320 | static void stop_wakeup_tracer(struct trace_array *tr) | 516 | static void stop_wakeup_tracer(struct trace_array *tr) |
321 | { | 517 | { |
322 | tracer_enabled = 0; | 518 | tracer_enabled = 0; |
323 | unregister_ftrace_function(&trace_ops); | 519 | stop_func_tracer(is_graph()); |
324 | unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); | 520 | unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); |
325 | unregister_trace_sched_wakeup_new(probe_wakeup, NULL); | 521 | unregister_trace_sched_wakeup_new(probe_wakeup, NULL); |
326 | unregister_trace_sched_wakeup(probe_wakeup, NULL); | 522 | unregister_trace_sched_wakeup(probe_wakeup, NULL); |
@@ -379,9 +575,15 @@ static struct tracer wakeup_tracer __read_mostly = | |||
379 | .start = wakeup_tracer_start, | 575 | .start = wakeup_tracer_start, |
380 | .stop = wakeup_tracer_stop, | 576 | .stop = wakeup_tracer_stop, |
381 | .print_max = 1, | 577 | .print_max = 1, |
578 | .print_header = wakeup_print_header, | ||
579 | .print_line = wakeup_print_line, | ||
580 | .flags = &tracer_flags, | ||
581 | .set_flag = wakeup_set_flag, | ||
382 | #ifdef CONFIG_FTRACE_SELFTEST | 582 | #ifdef CONFIG_FTRACE_SELFTEST |
383 | .selftest = trace_selftest_startup_wakeup, | 583 | .selftest = trace_selftest_startup_wakeup, |
384 | #endif | 584 | #endif |
585 | .open = wakeup_trace_open, | ||
586 | .close = wakeup_trace_close, | ||
385 | .use_max_tr = 1, | 587 | .use_max_tr = 1, |
386 | }; | 588 | }; |
387 | 589 | ||
@@ -394,9 +596,15 @@ static struct tracer wakeup_rt_tracer __read_mostly = | |||
394 | .stop = wakeup_tracer_stop, | 596 | .stop = wakeup_tracer_stop, |
395 | .wait_pipe = poll_wait_pipe, | 597 | .wait_pipe = poll_wait_pipe, |
396 | .print_max = 1, | 598 | .print_max = 1, |
599 | .print_header = wakeup_print_header, | ||
600 | .print_line = wakeup_print_line, | ||
601 | .flags = &tracer_flags, | ||
602 | .set_flag = wakeup_set_flag, | ||
397 | #ifdef CONFIG_FTRACE_SELFTEST | 603 | #ifdef CONFIG_FTRACE_SELFTEST |
398 | .selftest = trace_selftest_startup_wakeup, | 604 | .selftest = trace_selftest_startup_wakeup, |
399 | #endif | 605 | #endif |
606 | .open = wakeup_trace_open, | ||
607 | .close = wakeup_trace_close, | ||
400 | .use_max_tr = 1, | 608 | .use_max_tr = 1, |
401 | }; | 609 | }; |
402 | 610 | ||
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index a7cc3793baf6..209b379a4721 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c | |||
@@ -263,6 +263,11 @@ int __init trace_workqueue_early_init(void) | |||
263 | { | 263 | { |
264 | int ret, cpu; | 264 | int ret, cpu; |
265 | 265 | ||
266 | for_each_possible_cpu(cpu) { | ||
267 | spin_lock_init(&workqueue_cpu_stat(cpu)->lock); | ||
268 | INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list); | ||
269 | } | ||
270 | |||
266 | ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); | 271 | ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); |
267 | if (ret) | 272 | if (ret) |
268 | goto out; | 273 | goto out; |
@@ -279,11 +284,6 @@ int __init trace_workqueue_early_init(void) | |||
279 | if (ret) | 284 | if (ret) |
280 | goto no_creation; | 285 | goto no_creation; |
281 | 286 | ||
282 | for_each_possible_cpu(cpu) { | ||
283 | spin_lock_init(&workqueue_cpu_stat(cpu)->lock); | ||
284 | INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list); | ||
285 | } | ||
286 | |||
287 | return 0; | 287 | return 0; |
288 | 288 | ||
289 | no_creation: | 289 | no_creation: |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index c77f3eceea25..e95ee7f31d43 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/jump_label.h> | ||
28 | 29 | ||
29 | extern struct tracepoint __start___tracepoints[]; | 30 | extern struct tracepoint __start___tracepoints[]; |
30 | extern struct tracepoint __stop___tracepoints[]; | 31 | extern struct tracepoint __stop___tracepoints[]; |
@@ -263,7 +264,13 @@ static void set_tracepoint(struct tracepoint_entry **entry, | |||
263 | * is used. | 264 | * is used. |
264 | */ | 265 | */ |
265 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); | 266 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); |
266 | elem->state = active; | 267 | if (!elem->state && active) { |
268 | jump_label_enable(&elem->state); | ||
269 | elem->state = active; | ||
270 | } else if (elem->state && !active) { | ||
271 | jump_label_disable(&elem->state); | ||
272 | elem->state = active; | ||
273 | } | ||
267 | } | 274 | } |
268 | 275 | ||
269 | /* | 276 | /* |
@@ -277,7 +284,10 @@ static void disable_tracepoint(struct tracepoint *elem) | |||
277 | if (elem->unregfunc && elem->state) | 284 | if (elem->unregfunc && elem->state) |
278 | elem->unregfunc(); | 285 | elem->unregfunc(); |
279 | 286 | ||
280 | elem->state = 0; | 287 | if (elem->state) { |
288 | jump_label_disable(&elem->state); | ||
289 | elem->state = 0; | ||
290 | } | ||
281 | rcu_assign_pointer(elem->funcs, NULL); | 291 | rcu_assign_pointer(elem->funcs, NULL); |
282 | } | 292 | } |
283 | 293 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7f9c3c52ecc1..bafba687a6d8 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -43,7 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | |||
43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | static int __read_mostly did_panic; | ||
47 | static int __initdata no_watchdog; | 46 | static int __initdata no_watchdog; |
48 | 47 | ||
49 | 48 | ||
@@ -187,18 +186,6 @@ static int is_softlockup(unsigned long touch_ts) | |||
187 | return 0; | 186 | return 0; |
188 | } | 187 | } |
189 | 188 | ||
190 | static int | ||
191 | watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr) | ||
192 | { | ||
193 | did_panic = 1; | ||
194 | |||
195 | return NOTIFY_DONE; | ||
196 | } | ||
197 | |||
198 | static struct notifier_block panic_block = { | ||
199 | .notifier_call = watchdog_panic, | ||
200 | }; | ||
201 | |||
202 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 189 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
203 | static struct perf_event_attr wd_hw_attr = { | 190 | static struct perf_event_attr wd_hw_attr = { |
204 | .type = PERF_TYPE_HARDWARE, | 191 | .type = PERF_TYPE_HARDWARE, |
@@ -209,7 +196,7 @@ static struct perf_event_attr wd_hw_attr = { | |||
209 | }; | 196 | }; |
210 | 197 | ||
211 | /* Callback function for perf event subsystem */ | 198 | /* Callback function for perf event subsystem */ |
212 | void watchdog_overflow_callback(struct perf_event *event, int nmi, | 199 | static void watchdog_overflow_callback(struct perf_event *event, int nmi, |
213 | struct perf_sample_data *data, | 200 | struct perf_sample_data *data, |
214 | struct pt_regs *regs) | 201 | struct pt_regs *regs) |
215 | { | 202 | { |
@@ -371,14 +358,14 @@ static int watchdog_nmi_enable(int cpu) | |||
371 | /* Try to register using hardware perf events */ | 358 | /* Try to register using hardware perf events */ |
372 | wd_attr = &wd_hw_attr; | 359 | wd_attr = &wd_hw_attr; |
373 | wd_attr->sample_period = hw_nmi_get_sample_period(); | 360 | wd_attr->sample_period = hw_nmi_get_sample_period(); |
374 | event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback); | 361 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback); |
375 | if (!IS_ERR(event)) { | 362 | if (!IS_ERR(event)) { |
376 | printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); | 363 | printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); |
377 | goto out_save; | 364 | goto out_save; |
378 | } | 365 | } |
379 | 366 | ||
380 | printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); | 367 | printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); |
381 | return -1; | 368 | return PTR_ERR(event); |
382 | 369 | ||
383 | /* success path */ | 370 | /* success path */ |
384 | out_save: | 371 | out_save: |
@@ -422,17 +409,19 @@ static int watchdog_prepare_cpu(int cpu) | |||
422 | static int watchdog_enable(int cpu) | 409 | static int watchdog_enable(int cpu) |
423 | { | 410 | { |
424 | struct task_struct *p = per_cpu(softlockup_watchdog, cpu); | 411 | struct task_struct *p = per_cpu(softlockup_watchdog, cpu); |
412 | int err; | ||
425 | 413 | ||
426 | /* enable the perf event */ | 414 | /* enable the perf event */ |
427 | if (watchdog_nmi_enable(cpu) != 0) | 415 | err = watchdog_nmi_enable(cpu); |
428 | return -1; | 416 | if (err) |
417 | return err; | ||
429 | 418 | ||
430 | /* create the watchdog thread */ | 419 | /* create the watchdog thread */ |
431 | if (!p) { | 420 | if (!p) { |
432 | p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); | 421 | p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); |
433 | if (IS_ERR(p)) { | 422 | if (IS_ERR(p)) { |
434 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); | 423 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); |
435 | return -1; | 424 | return PTR_ERR(p); |
436 | } | 425 | } |
437 | kthread_bind(p, cpu); | 426 | kthread_bind(p, cpu); |
438 | per_cpu(watchdog_touch_ts, cpu) = 0; | 427 | per_cpu(watchdog_touch_ts, cpu) = 0; |
@@ -484,6 +473,9 @@ static void watchdog_disable_all_cpus(void) | |||
484 | { | 473 | { |
485 | int cpu; | 474 | int cpu; |
486 | 475 | ||
476 | if (no_watchdog) | ||
477 | return; | ||
478 | |||
487 | for_each_online_cpu(cpu) | 479 | for_each_online_cpu(cpu) |
488 | watchdog_disable(cpu); | 480 | watchdog_disable(cpu); |
489 | 481 | ||
@@ -526,17 +518,16 @@ static int __cpuinit | |||
526 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | 518 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) |
527 | { | 519 | { |
528 | int hotcpu = (unsigned long)hcpu; | 520 | int hotcpu = (unsigned long)hcpu; |
521 | int err = 0; | ||
529 | 522 | ||
530 | switch (action) { | 523 | switch (action) { |
531 | case CPU_UP_PREPARE: | 524 | case CPU_UP_PREPARE: |
532 | case CPU_UP_PREPARE_FROZEN: | 525 | case CPU_UP_PREPARE_FROZEN: |
533 | if (watchdog_prepare_cpu(hotcpu)) | 526 | err = watchdog_prepare_cpu(hotcpu); |
534 | return NOTIFY_BAD; | ||
535 | break; | 527 | break; |
536 | case CPU_ONLINE: | 528 | case CPU_ONLINE: |
537 | case CPU_ONLINE_FROZEN: | 529 | case CPU_ONLINE_FROZEN: |
538 | if (watchdog_enable(hotcpu)) | 530 | err = watchdog_enable(hotcpu); |
539 | return NOTIFY_BAD; | ||
540 | break; | 531 | break; |
541 | #ifdef CONFIG_HOTPLUG_CPU | 532 | #ifdef CONFIG_HOTPLUG_CPU |
542 | case CPU_UP_CANCELED: | 533 | case CPU_UP_CANCELED: |
@@ -549,7 +540,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
549 | break; | 540 | break; |
550 | #endif /* CONFIG_HOTPLUG_CPU */ | 541 | #endif /* CONFIG_HOTPLUG_CPU */ |
551 | } | 542 | } |
552 | return NOTIFY_OK; | 543 | return notifier_from_errno(err); |
553 | } | 544 | } |
554 | 545 | ||
555 | static struct notifier_block __cpuinitdata cpu_nfb = { | 546 | static struct notifier_block __cpuinitdata cpu_nfb = { |
@@ -565,13 +556,11 @@ static int __init spawn_watchdog_task(void) | |||
565 | return 0; | 556 | return 0; |
566 | 557 | ||
567 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | 558 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); |
568 | WARN_ON(err == NOTIFY_BAD); | 559 | WARN_ON(notifier_to_errno(err)); |
569 | 560 | ||
570 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | 561 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); |
571 | register_cpu_notifier(&cpu_nfb); | 562 | register_cpu_notifier(&cpu_nfb); |
572 | 563 | ||
573 | atomic_notifier_chain_register(&panic_notifier_list, &panic_block); | ||
574 | |||
575 | return 0; | 564 | return 0; |
576 | } | 565 | } |
577 | early_initcall(spawn_watchdog_task); | 566 | early_initcall(spawn_watchdog_task); |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1b4afd2e6ca0..21ac83070a80 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -482,6 +482,7 @@ config PROVE_LOCKING | |||
482 | select DEBUG_SPINLOCK | 482 | select DEBUG_SPINLOCK |
483 | select DEBUG_MUTEXES | 483 | select DEBUG_MUTEXES |
484 | select DEBUG_LOCK_ALLOC | 484 | select DEBUG_LOCK_ALLOC |
485 | select TRACE_IRQFLAGS | ||
485 | default n | 486 | default n |
486 | help | 487 | help |
487 | This feature enables the kernel to prove that all locking | 488 | This feature enables the kernel to prove that all locking |
@@ -539,6 +540,23 @@ config PROVE_RCU_REPEATEDLY | |||
539 | disabling, allowing multiple RCU-lockdep warnings to be printed | 540 | disabling, allowing multiple RCU-lockdep warnings to be printed |
540 | on a single reboot. | 541 | on a single reboot. |
541 | 542 | ||
543 | Say Y to allow multiple RCU-lockdep warnings per boot. | ||
544 | |||
545 | Say N if you are unsure. | ||
546 | |||
547 | config SPARSE_RCU_POINTER | ||
548 | bool "RCU debugging: sparse-based checks for pointer usage" | ||
549 | default n | ||
550 | help | ||
551 | This feature enables the __rcu sparse annotation for | ||
552 | RCU-protected pointers. This annotation will cause sparse | ||
553 | to flag any non-RCU used of annotated pointers. This can be | ||
554 | helpful when debugging RCU usage. Please note that this feature | ||
555 | is not intended to enforce code cleanliness; it is instead merely | ||
556 | a debugging aid. | ||
557 | |||
558 | Say Y to make sparse flag questionable use of RCU-protected pointers | ||
559 | |||
542 | Say N if you are unsure. | 560 | Say N if you are unsure. |
543 | 561 | ||
544 | config LOCKDEP | 562 | config LOCKDEP |
@@ -579,11 +597,10 @@ config DEBUG_LOCKDEP | |||
579 | of more runtime overhead. | 597 | of more runtime overhead. |
580 | 598 | ||
581 | config TRACE_IRQFLAGS | 599 | config TRACE_IRQFLAGS |
582 | depends on DEBUG_KERNEL | ||
583 | bool | 600 | bool |
584 | default y | 601 | help |
585 | depends on TRACE_IRQFLAGS_SUPPORT | 602 | Enables hooks to interrupt enabling and disabling for |
586 | depends on PROVE_LOCKING | 603 | either tracing or lock debugging. |
587 | 604 | ||
588 | config DEBUG_SPINLOCK_SLEEP | 605 | config DEBUG_SPINLOCK_SLEEP |
589 | bool "Spinlock debugging: sleep-inside-spinlock checking" | 606 | bool "Spinlock debugging: sleep-inside-spinlock checking" |
@@ -832,6 +849,30 @@ config RCU_CPU_STALL_DETECTOR | |||
832 | 849 | ||
833 | Say Y if you are unsure. | 850 | Say Y if you are unsure. |
834 | 851 | ||
852 | config RCU_CPU_STALL_TIMEOUT | ||
853 | int "RCU CPU stall timeout in seconds" | ||
854 | depends on RCU_CPU_STALL_DETECTOR | ||
855 | range 3 300 | ||
856 | default 60 | ||
857 | help | ||
858 | If a given RCU grace period extends more than the specified | ||
859 | number of seconds, a CPU stall warning is printed. If the | ||
860 | RCU grace period persists, additional CPU stall warnings are | ||
861 | printed at more widely spaced intervals. | ||
862 | |||
863 | config RCU_CPU_STALL_DETECTOR_RUNNABLE | ||
864 | bool "RCU CPU stall checking starts automatically at boot" | ||
865 | depends on RCU_CPU_STALL_DETECTOR | ||
866 | default y | ||
867 | help | ||
868 | If set, start checking for RCU CPU stalls immediately on | ||
869 | boot. Otherwise, RCU CPU stall checking must be manually | ||
870 | enabled. | ||
871 | |||
872 | Say Y if you are unsure. | ||
873 | |||
874 | Say N if you wish to suppress RCU CPU stall checking during boot. | ||
875 | |||
835 | config RCU_CPU_STALL_VERBOSE | 876 | config RCU_CPU_STALL_VERBOSE |
836 | bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" | 877 | bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" |
837 | depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU | 878 | depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU |
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 02afc2533728..7bd6df781ce5 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c | |||
@@ -26,19 +26,11 @@ | |||
26 | #include <linux/dynamic_debug.h> | 26 | #include <linux/dynamic_debug.h> |
27 | #include <linux/debugfs.h> | 27 | #include <linux/debugfs.h> |
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/jump_label.h> | ||
29 | 30 | ||
30 | extern struct _ddebug __start___verbose[]; | 31 | extern struct _ddebug __start___verbose[]; |
31 | extern struct _ddebug __stop___verbose[]; | 32 | extern struct _ddebug __stop___verbose[]; |
32 | 33 | ||
33 | /* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which | ||
34 | * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They | ||
35 | * use independent hash functions, to reduce the chance of false positives. | ||
36 | */ | ||
37 | long long dynamic_debug_enabled; | ||
38 | EXPORT_SYMBOL_GPL(dynamic_debug_enabled); | ||
39 | long long dynamic_debug_enabled2; | ||
40 | EXPORT_SYMBOL_GPL(dynamic_debug_enabled2); | ||
41 | |||
42 | struct ddebug_table { | 34 | struct ddebug_table { |
43 | struct list_head link; | 35 | struct list_head link; |
44 | char *mod_name; | 36 | char *mod_name; |
@@ -88,26 +80,6 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, | |||
88 | } | 80 | } |
89 | 81 | ||
90 | /* | 82 | /* |
91 | * must be called with ddebug_lock held | ||
92 | */ | ||
93 | |||
94 | static int disabled_hash(char hash, bool first_table) | ||
95 | { | ||
96 | struct ddebug_table *dt; | ||
97 | char table_hash_value; | ||
98 | |||
99 | list_for_each_entry(dt, &ddebug_tables, link) { | ||
100 | if (first_table) | ||
101 | table_hash_value = dt->ddebugs->primary_hash; | ||
102 | else | ||
103 | table_hash_value = dt->ddebugs->secondary_hash; | ||
104 | if (dt->num_enabled && (hash == table_hash_value)) | ||
105 | return 0; | ||
106 | } | ||
107 | return 1; | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * Search the tables for _ddebug's which match the given | 83 | * Search the tables for _ddebug's which match the given |
112 | * `query' and apply the `flags' and `mask' to them. Tells | 84 | * `query' and apply the `flags' and `mask' to them. Tells |
113 | * the user which ddebug's were changed, or whether none | 85 | * the user which ddebug's were changed, or whether none |
@@ -170,17 +142,9 @@ static void ddebug_change(const struct ddebug_query *query, | |||
170 | dt->num_enabled++; | 142 | dt->num_enabled++; |
171 | dp->flags = newflags; | 143 | dp->flags = newflags; |
172 | if (newflags) { | 144 | if (newflags) { |
173 | dynamic_debug_enabled |= | 145 | jump_label_enable(&dp->enabled); |
174 | (1LL << dp->primary_hash); | ||
175 | dynamic_debug_enabled2 |= | ||
176 | (1LL << dp->secondary_hash); | ||
177 | } else { | 146 | } else { |
178 | if (disabled_hash(dp->primary_hash, true)) | 147 | jump_label_disable(&dp->enabled); |
179 | dynamic_debug_enabled &= | ||
180 | ~(1LL << dp->primary_hash); | ||
181 | if (disabled_hash(dp->secondary_hash, false)) | ||
182 | dynamic_debug_enabled2 &= | ||
183 | ~(1LL << dp->secondary_hash); | ||
184 | } | 148 | } |
185 | if (verbose) | 149 | if (verbose) |
186 | printk(KERN_INFO | 150 | printk(KERN_INFO |
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index efd16fa80b1c..6f412ab4c24f 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -49,7 +49,7 @@ struct radix_tree_node { | |||
49 | unsigned int height; /* Height from the bottom */ | 49 | unsigned int height; /* Height from the bottom */ |
50 | unsigned int count; | 50 | unsigned int count; |
51 | struct rcu_head rcu_head; | 51 | struct rcu_head rcu_head; |
52 | void *slots[RADIX_TREE_MAP_SIZE]; | 52 | void __rcu *slots[RADIX_TREE_MAP_SIZE]; |
53 | unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; | 53 | unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; |
54 | }; | 54 | }; |
55 | 55 | ||
diff --git a/net/Kconfig b/net/Kconfig index e926884c1675..55fd82e9ffd9 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
@@ -293,6 +293,7 @@ source "net/wimax/Kconfig" | |||
293 | source "net/rfkill/Kconfig" | 293 | source "net/rfkill/Kconfig" |
294 | source "net/9p/Kconfig" | 294 | source "net/9p/Kconfig" |
295 | source "net/caif/Kconfig" | 295 | source "net/caif/Kconfig" |
296 | source "net/ceph/Kconfig" | ||
296 | 297 | ||
297 | 298 | ||
298 | endif # if NET | 299 | endif # if NET |
diff --git a/net/Makefile b/net/Makefile index ea60fbce9b1b..6b7bfd7f1416 100644 --- a/net/Makefile +++ b/net/Makefile | |||
@@ -68,3 +68,4 @@ obj-$(CONFIG_SYSCTL) += sysctl_net.o | |||
68 | endif | 68 | endif |
69 | obj-$(CONFIG_WIMAX) += wimax/ | 69 | obj-$(CONFIG_WIMAX) += wimax/ |
70 | obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ | 70 | obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ |
71 | obj-$(CONFIG_CEPH_LIB) += ceph/ | ||
diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 622b471e14e0..74bcc662c3dd 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c | |||
@@ -778,7 +778,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb) | |||
778 | eg->packets_rcvd++; | 778 | eg->packets_rcvd++; |
779 | mpc->eg_ops->put(eg); | 779 | mpc->eg_ops->put(eg); |
780 | 780 | ||
781 | memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); | 781 | memset(ATM_SKB(new_skb), 0, sizeof(struct atm_skb_data)); |
782 | netif_rx(new_skb); | 782 | netif_rx(new_skb); |
783 | } | 783 | } |
784 | 784 | ||
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig new file mode 100644 index 000000000000..ad424049b0cf --- /dev/null +++ b/net/ceph/Kconfig | |||
@@ -0,0 +1,28 @@ | |||
1 | config CEPH_LIB | ||
2 | tristate "Ceph core library (EXPERIMENTAL)" | ||
3 | depends on INET && EXPERIMENTAL | ||
4 | select LIBCRC32C | ||
5 | select CRYPTO_AES | ||
6 | select CRYPTO | ||
7 | default n | ||
8 | help | ||
9 | Choose Y or M here to include cephlib, which provides the | ||
10 | common functionality to both the Ceph filesystem and | ||
11 | to the rados block device (rbd). | ||
12 | |||
13 | More information at http://ceph.newdream.net/. | ||
14 | |||
15 | If unsure, say N. | ||
16 | |||
17 | config CEPH_LIB_PRETTYDEBUG | ||
18 | bool "Include file:line in ceph debug output" | ||
19 | depends on CEPH_LIB | ||
20 | default n | ||
21 | help | ||
22 | If you say Y here, debug output will include a filename and | ||
23 | line to aid debugging. This increases kernel size and slows | ||
24 | execution slightly when debug call sites are enabled (e.g., | ||
25 | via CONFIG_DYNAMIC_DEBUG). | ||
26 | |||
27 | If unsure, say N. | ||
28 | |||
diff --git a/net/ceph/Makefile b/net/ceph/Makefile new file mode 100644 index 000000000000..aab1cabb8035 --- /dev/null +++ b/net/ceph/Makefile | |||
@@ -0,0 +1,37 @@ | |||
1 | # | ||
2 | # Makefile for CEPH filesystem. | ||
3 | # | ||
4 | |||
5 | ifneq ($(KERNELRELEASE),) | ||
6 | |||
7 | obj-$(CONFIG_CEPH_LIB) += libceph.o | ||
8 | |||
9 | libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ | ||
10 | mon_client.o \ | ||
11 | osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ | ||
12 | debugfs.o \ | ||
13 | auth.o auth_none.o \ | ||
14 | crypto.o armor.o \ | ||
15 | auth_x.o \ | ||
16 | ceph_fs.o ceph_strings.o ceph_hash.o \ | ||
17 | pagevec.o | ||
18 | |||
19 | else | ||
20 | #Otherwise we were called directly from the command | ||
21 | # line; invoke the kernel build system. | ||
22 | |||
23 | KERNELDIR ?= /lib/modules/$(shell uname -r)/build | ||
24 | PWD := $(shell pwd) | ||
25 | |||
26 | default: all | ||
27 | |||
28 | all: | ||
29 | $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules | ||
30 | |||
31 | modules_install: | ||
32 | $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install | ||
33 | |||
34 | clean: | ||
35 | $(MAKE) -C $(KERNELDIR) M=$(PWD) clean | ||
36 | |||
37 | endif | ||
diff --git a/fs/ceph/armor.c b/net/ceph/armor.c index eb2a666b0be7..eb2a666b0be7 100644 --- a/fs/ceph/armor.c +++ b/net/ceph/armor.c | |||
diff --git a/fs/ceph/auth.c b/net/ceph/auth.c index 6d2e30600627..549c1f43e1d5 100644 --- a/fs/ceph/auth.c +++ b/net/ceph/auth.c | |||
@@ -1,16 +1,16 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
5 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
6 | 6 | ||
7 | #include "types.h" | 7 | #include <linux/ceph/types.h> |
8 | #include <linux/ceph/decode.h> | ||
9 | #include <linux/ceph/libceph.h> | ||
10 | #include <linux/ceph/messenger.h> | ||
8 | #include "auth_none.h" | 11 | #include "auth_none.h" |
9 | #include "auth_x.h" | 12 | #include "auth_x.h" |
10 | #include "decode.h" | ||
11 | #include "super.h" | ||
12 | 13 | ||
13 | #include "messenger.h" | ||
14 | 14 | ||
15 | /* | 15 | /* |
16 | * get protocol handler | 16 | * get protocol handler |
diff --git a/fs/ceph/auth_none.c b/net/ceph/auth_none.c index ad1dc21286c7..214c2bb43d62 100644 --- a/fs/ceph/auth_none.c +++ b/net/ceph/auth_none.c | |||
@@ -1,14 +1,15 @@ | |||
1 | 1 | ||
2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
3 | 3 | ||
4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/random.h> | 6 | #include <linux/random.h> |
7 | #include <linux/slab.h> | 7 | #include <linux/slab.h> |
8 | 8 | ||
9 | #include <linux/ceph/decode.h> | ||
10 | #include <linux/ceph/auth.h> | ||
11 | |||
9 | #include "auth_none.h" | 12 | #include "auth_none.h" |
10 | #include "auth.h" | ||
11 | #include "decode.h" | ||
12 | 13 | ||
13 | static void reset(struct ceph_auth_client *ac) | 14 | static void reset(struct ceph_auth_client *ac) |
14 | { | 15 | { |
diff --git a/fs/ceph/auth_none.h b/net/ceph/auth_none.h index 8164df1a08be..ed7d088b1bc9 100644 --- a/fs/ceph/auth_none.h +++ b/net/ceph/auth_none.h | |||
@@ -2,8 +2,7 @@ | |||
2 | #define _FS_CEPH_AUTH_NONE_H | 2 | #define _FS_CEPH_AUTH_NONE_H |
3 | 3 | ||
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | 5 | #include <linux/ceph/auth.h> | |
6 | #include "auth.h" | ||
7 | 6 | ||
8 | /* | 7 | /* |
9 | * null security mode. | 8 | * null security mode. |
diff --git a/fs/ceph/auth_x.c b/net/ceph/auth_x.c index a2d002cbdec2..7fd5dfcf6e18 100644 --- a/fs/ceph/auth_x.c +++ b/net/ceph/auth_x.c | |||
@@ -1,16 +1,17 @@ | |||
1 | 1 | ||
2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
3 | 3 | ||
4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/random.h> | 6 | #include <linux/random.h> |
7 | #include <linux/slab.h> | 7 | #include <linux/slab.h> |
8 | 8 | ||
9 | #include <linux/ceph/decode.h> | ||
10 | #include <linux/ceph/auth.h> | ||
11 | |||
12 | #include "crypto.h" | ||
9 | #include "auth_x.h" | 13 | #include "auth_x.h" |
10 | #include "auth_x_protocol.h" | 14 | #include "auth_x_protocol.h" |
11 | #include "crypto.h" | ||
12 | #include "auth.h" | ||
13 | #include "decode.h" | ||
14 | 15 | ||
15 | #define TEMP_TICKET_BUF_LEN 256 | 16 | #define TEMP_TICKET_BUF_LEN 256 |
16 | 17 | ||
diff --git a/fs/ceph/auth_x.h b/net/ceph/auth_x.h index ff6f8180e681..e02da7a5c5a1 100644 --- a/fs/ceph/auth_x.h +++ b/net/ceph/auth_x.h | |||
@@ -3,8 +3,9 @@ | |||
3 | 3 | ||
4 | #include <linux/rbtree.h> | 4 | #include <linux/rbtree.h> |
5 | 5 | ||
6 | #include <linux/ceph/auth.h> | ||
7 | |||
6 | #include "crypto.h" | 8 | #include "crypto.h" |
7 | #include "auth.h" | ||
8 | #include "auth_x_protocol.h" | 9 | #include "auth_x_protocol.h" |
9 | 10 | ||
10 | /* | 11 | /* |
diff --git a/fs/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h index 671d30576c4f..671d30576c4f 100644 --- a/fs/ceph/auth_x_protocol.h +++ b/net/ceph/auth_x_protocol.h | |||
diff --git a/fs/ceph/buffer.c b/net/ceph/buffer.c index cd39f17021de..53d8abfa25d5 100644 --- a/fs/ceph/buffer.c +++ b/net/ceph/buffer.c | |||
@@ -1,10 +1,11 @@ | |||
1 | 1 | ||
2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
3 | 3 | ||
4 | #include <linux/module.h> | ||
4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
5 | 6 | ||
6 | #include "buffer.h" | 7 | #include <linux/ceph/buffer.h> |
7 | #include "decode.h" | 8 | #include <linux/ceph/decode.h> |
8 | 9 | ||
9 | struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) | 10 | struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) |
10 | { | 11 | { |
@@ -32,6 +33,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) | |||
32 | dout("buffer_new %p\n", b); | 33 | dout("buffer_new %p\n", b); |
33 | return b; | 34 | return b; |
34 | } | 35 | } |
36 | EXPORT_SYMBOL(ceph_buffer_new); | ||
35 | 37 | ||
36 | void ceph_buffer_release(struct kref *kref) | 38 | void ceph_buffer_release(struct kref *kref) |
37 | { | 39 | { |
@@ -46,6 +48,7 @@ void ceph_buffer_release(struct kref *kref) | |||
46 | } | 48 | } |
47 | kfree(b); | 49 | kfree(b); |
48 | } | 50 | } |
51 | EXPORT_SYMBOL(ceph_buffer_release); | ||
49 | 52 | ||
50 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) | 53 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) |
51 | { | 54 | { |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c new file mode 100644 index 000000000000..f3e4a13fea0c --- /dev/null +++ b/net/ceph/ceph_common.c | |||
@@ -0,0 +1,529 @@ | |||
1 | |||
2 | #include <linux/ceph/ceph_debug.h> | ||
3 | #include <linux/backing-dev.h> | ||
4 | #include <linux/ctype.h> | ||
5 | #include <linux/fs.h> | ||
6 | #include <linux/inet.h> | ||
7 | #include <linux/in6.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/mount.h> | ||
10 | #include <linux/parser.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/seq_file.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/statfs.h> | ||
15 | #include <linux/string.h> | ||
16 | |||
17 | |||
18 | #include <linux/ceph/libceph.h> | ||
19 | #include <linux/ceph/debugfs.h> | ||
20 | #include <linux/ceph/decode.h> | ||
21 | #include <linux/ceph/mon_client.h> | ||
22 | #include <linux/ceph/auth.h> | ||
23 | |||
24 | |||
25 | |||
26 | /* | ||
27 | * find filename portion of a path (/foo/bar/baz -> baz) | ||
28 | */ | ||
29 | const char *ceph_file_part(const char *s, int len) | ||
30 | { | ||
31 | const char *e = s + len; | ||
32 | |||
33 | while (e != s && *(e-1) != '/') | ||
34 | e--; | ||
35 | return e; | ||
36 | } | ||
37 | EXPORT_SYMBOL(ceph_file_part); | ||
38 | |||
39 | const char *ceph_msg_type_name(int type) | ||
40 | { | ||
41 | switch (type) { | ||
42 | case CEPH_MSG_SHUTDOWN: return "shutdown"; | ||
43 | case CEPH_MSG_PING: return "ping"; | ||
44 | case CEPH_MSG_AUTH: return "auth"; | ||
45 | case CEPH_MSG_AUTH_REPLY: return "auth_reply"; | ||
46 | case CEPH_MSG_MON_MAP: return "mon_map"; | ||
47 | case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; | ||
48 | case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; | ||
49 | case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; | ||
50 | case CEPH_MSG_STATFS: return "statfs"; | ||
51 | case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; | ||
52 | case CEPH_MSG_MDS_MAP: return "mds_map"; | ||
53 | case CEPH_MSG_CLIENT_SESSION: return "client_session"; | ||
54 | case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; | ||
55 | case CEPH_MSG_CLIENT_REQUEST: return "client_request"; | ||
56 | case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; | ||
57 | case CEPH_MSG_CLIENT_REPLY: return "client_reply"; | ||
58 | case CEPH_MSG_CLIENT_CAPS: return "client_caps"; | ||
59 | case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; | ||
60 | case CEPH_MSG_CLIENT_SNAP: return "client_snap"; | ||
61 | case CEPH_MSG_CLIENT_LEASE: return "client_lease"; | ||
62 | case CEPH_MSG_OSD_MAP: return "osd_map"; | ||
63 | case CEPH_MSG_OSD_OP: return "osd_op"; | ||
64 | case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; | ||
65 | default: return "unknown"; | ||
66 | } | ||
67 | } | ||
68 | EXPORT_SYMBOL(ceph_msg_type_name); | ||
69 | |||
70 | /* | ||
71 | * Initially learn our fsid, or verify an fsid matches. | ||
72 | */ | ||
73 | int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | ||
74 | { | ||
75 | if (client->have_fsid) { | ||
76 | if (ceph_fsid_compare(&client->fsid, fsid)) { | ||
77 | pr_err("bad fsid, had %pU got %pU", | ||
78 | &client->fsid, fsid); | ||
79 | return -1; | ||
80 | } | ||
81 | } else { | ||
82 | pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); | ||
83 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | ||
84 | ceph_debugfs_client_init(client); | ||
85 | client->have_fsid = true; | ||
86 | } | ||
87 | return 0; | ||
88 | } | ||
89 | EXPORT_SYMBOL(ceph_check_fsid); | ||
90 | |||
91 | static int strcmp_null(const char *s1, const char *s2) | ||
92 | { | ||
93 | if (!s1 && !s2) | ||
94 | return 0; | ||
95 | if (s1 && !s2) | ||
96 | return -1; | ||
97 | if (!s1 && s2) | ||
98 | return 1; | ||
99 | return strcmp(s1, s2); | ||
100 | } | ||
101 | |||
102 | int ceph_compare_options(struct ceph_options *new_opt, | ||
103 | struct ceph_client *client) | ||
104 | { | ||
105 | struct ceph_options *opt1 = new_opt; | ||
106 | struct ceph_options *opt2 = client->options; | ||
107 | int ofs = offsetof(struct ceph_options, mon_addr); | ||
108 | int i; | ||
109 | int ret; | ||
110 | |||
111 | ret = memcmp(opt1, opt2, ofs); | ||
112 | if (ret) | ||
113 | return ret; | ||
114 | |||
115 | ret = strcmp_null(opt1->name, opt2->name); | ||
116 | if (ret) | ||
117 | return ret; | ||
118 | |||
119 | ret = strcmp_null(opt1->secret, opt2->secret); | ||
120 | if (ret) | ||
121 | return ret; | ||
122 | |||
123 | /* any matching mon ip implies a match */ | ||
124 | for (i = 0; i < opt1->num_mon; i++) { | ||
125 | if (ceph_monmap_contains(client->monc.monmap, | ||
126 | &opt1->mon_addr[i])) | ||
127 | return 0; | ||
128 | } | ||
129 | return -1; | ||
130 | } | ||
131 | EXPORT_SYMBOL(ceph_compare_options); | ||
132 | |||
133 | |||
134 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | ||
135 | { | ||
136 | int i = 0; | ||
137 | char tmp[3]; | ||
138 | int err = -EINVAL; | ||
139 | int d; | ||
140 | |||
141 | dout("parse_fsid '%s'\n", str); | ||
142 | tmp[2] = 0; | ||
143 | while (*str && i < 16) { | ||
144 | if (ispunct(*str)) { | ||
145 | str++; | ||
146 | continue; | ||
147 | } | ||
148 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | ||
149 | break; | ||
150 | tmp[0] = str[0]; | ||
151 | tmp[1] = str[1]; | ||
152 | if (sscanf(tmp, "%x", &d) < 1) | ||
153 | break; | ||
154 | fsid->fsid[i] = d & 0xff; | ||
155 | i++; | ||
156 | str += 2; | ||
157 | } | ||
158 | |||
159 | if (i == 16) | ||
160 | err = 0; | ||
161 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | ||
162 | return err; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * ceph options | ||
167 | */ | ||
168 | enum { | ||
169 | Opt_osdtimeout, | ||
170 | Opt_osdkeepalivetimeout, | ||
171 | Opt_mount_timeout, | ||
172 | Opt_osd_idle_ttl, | ||
173 | Opt_last_int, | ||
174 | /* int args above */ | ||
175 | Opt_fsid, | ||
176 | Opt_name, | ||
177 | Opt_secret, | ||
178 | Opt_ip, | ||
179 | Opt_last_string, | ||
180 | /* string args above */ | ||
181 | Opt_noshare, | ||
182 | Opt_nocrc, | ||
183 | }; | ||
184 | |||
185 | static match_table_t opt_tokens = { | ||
186 | {Opt_osdtimeout, "osdtimeout=%d"}, | ||
187 | {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, | ||
188 | {Opt_mount_timeout, "mount_timeout=%d"}, | ||
189 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | ||
190 | /* int args above */ | ||
191 | {Opt_fsid, "fsid=%s"}, | ||
192 | {Opt_name, "name=%s"}, | ||
193 | {Opt_secret, "secret=%s"}, | ||
194 | {Opt_ip, "ip=%s"}, | ||
195 | /* string args above */ | ||
196 | {Opt_noshare, "noshare"}, | ||
197 | {Opt_nocrc, "nocrc"}, | ||
198 | {-1, NULL} | ||
199 | }; | ||
200 | |||
201 | void ceph_destroy_options(struct ceph_options *opt) | ||
202 | { | ||
203 | dout("destroy_options %p\n", opt); | ||
204 | kfree(opt->name); | ||
205 | kfree(opt->secret); | ||
206 | kfree(opt); | ||
207 | } | ||
208 | EXPORT_SYMBOL(ceph_destroy_options); | ||
209 | |||
210 | int ceph_parse_options(struct ceph_options **popt, char *options, | ||
211 | const char *dev_name, const char *dev_name_end, | ||
212 | int (*parse_extra_token)(char *c, void *private), | ||
213 | void *private) | ||
214 | { | ||
215 | struct ceph_options *opt; | ||
216 | const char *c; | ||
217 | int err = -ENOMEM; | ||
218 | substring_t argstr[MAX_OPT_ARGS]; | ||
219 | |||
220 | opt = kzalloc(sizeof(*opt), GFP_KERNEL); | ||
221 | if (!opt) | ||
222 | return err; | ||
223 | opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), | ||
224 | GFP_KERNEL); | ||
225 | if (!opt->mon_addr) | ||
226 | goto out; | ||
227 | |||
228 | dout("parse_options %p options '%s' dev_name '%s'\n", opt, options, | ||
229 | dev_name); | ||
230 | |||
231 | /* start with defaults */ | ||
232 | opt->flags = CEPH_OPT_DEFAULT; | ||
233 | opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; | ||
234 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | ||
235 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | ||
236 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ | ||
237 | |||
238 | /* get mon ip(s) */ | ||
239 | /* ip1[:port1][,ip2[:port2]...] */ | ||
240 | err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr, | ||
241 | CEPH_MAX_MON, &opt->num_mon); | ||
242 | if (err < 0) | ||
243 | goto out; | ||
244 | |||
245 | /* parse mount options */ | ||
246 | while ((c = strsep(&options, ",")) != NULL) { | ||
247 | int token, intval, ret; | ||
248 | if (!*c) | ||
249 | continue; | ||
250 | err = -EINVAL; | ||
251 | token = match_token((char *)c, opt_tokens, argstr); | ||
252 | if (token < 0 && parse_extra_token) { | ||
253 | /* extra? */ | ||
254 | err = parse_extra_token((char *)c, private); | ||
255 | if (err < 0) { | ||
256 | pr_err("bad option at '%s'\n", c); | ||
257 | goto out; | ||
258 | } | ||
259 | continue; | ||
260 | } | ||
261 | if (token < Opt_last_int) { | ||
262 | ret = match_int(&argstr[0], &intval); | ||
263 | if (ret < 0) { | ||
264 | pr_err("bad mount option arg (not int) " | ||
265 | "at '%s'\n", c); | ||
266 | continue; | ||
267 | } | ||
268 | dout("got int token %d val %d\n", token, intval); | ||
269 | } else if (token > Opt_last_int && token < Opt_last_string) { | ||
270 | dout("got string token %d val %s\n", token, | ||
271 | argstr[0].from); | ||
272 | } else { | ||
273 | dout("got token %d\n", token); | ||
274 | } | ||
275 | switch (token) { | ||
276 | case Opt_ip: | ||
277 | err = ceph_parse_ips(argstr[0].from, | ||
278 | argstr[0].to, | ||
279 | &opt->my_addr, | ||
280 | 1, NULL); | ||
281 | if (err < 0) | ||
282 | goto out; | ||
283 | opt->flags |= CEPH_OPT_MYIP; | ||
284 | break; | ||
285 | |||
286 | case Opt_fsid: | ||
287 | err = parse_fsid(argstr[0].from, &opt->fsid); | ||
288 | if (err == 0) | ||
289 | opt->flags |= CEPH_OPT_FSID; | ||
290 | break; | ||
291 | case Opt_name: | ||
292 | opt->name = kstrndup(argstr[0].from, | ||
293 | argstr[0].to-argstr[0].from, | ||
294 | GFP_KERNEL); | ||
295 | break; | ||
296 | case Opt_secret: | ||
297 | opt->secret = kstrndup(argstr[0].from, | ||
298 | argstr[0].to-argstr[0].from, | ||
299 | GFP_KERNEL); | ||
300 | break; | ||
301 | |||
302 | /* misc */ | ||
303 | case Opt_osdtimeout: | ||
304 | opt->osd_timeout = intval; | ||
305 | break; | ||
306 | case Opt_osdkeepalivetimeout: | ||
307 | opt->osd_keepalive_timeout = intval; | ||
308 | break; | ||
309 | case Opt_osd_idle_ttl: | ||
310 | opt->osd_idle_ttl = intval; | ||
311 | break; | ||
312 | case Opt_mount_timeout: | ||
313 | opt->mount_timeout = intval; | ||
314 | break; | ||
315 | |||
316 | case Opt_noshare: | ||
317 | opt->flags |= CEPH_OPT_NOSHARE; | ||
318 | break; | ||
319 | |||
320 | case Opt_nocrc: | ||
321 | opt->flags |= CEPH_OPT_NOCRC; | ||
322 | break; | ||
323 | |||
324 | default: | ||
325 | BUG_ON(token); | ||
326 | } | ||
327 | } | ||
328 | |||
329 | /* success */ | ||
330 | *popt = opt; | ||
331 | return 0; | ||
332 | |||
333 | out: | ||
334 | ceph_destroy_options(opt); | ||
335 | return err; | ||
336 | } | ||
337 | EXPORT_SYMBOL(ceph_parse_options); | ||
338 | |||
339 | u64 ceph_client_id(struct ceph_client *client) | ||
340 | { | ||
341 | return client->monc.auth->global_id; | ||
342 | } | ||
343 | EXPORT_SYMBOL(ceph_client_id); | ||
344 | |||
345 | /* | ||
346 | * create a fresh client instance | ||
347 | */ | ||
348 | struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) | ||
349 | { | ||
350 | struct ceph_client *client; | ||
351 | int err = -ENOMEM; | ||
352 | |||
353 | client = kzalloc(sizeof(*client), GFP_KERNEL); | ||
354 | if (client == NULL) | ||
355 | return ERR_PTR(-ENOMEM); | ||
356 | |||
357 | client->private = private; | ||
358 | client->options = opt; | ||
359 | |||
360 | mutex_init(&client->mount_mutex); | ||
361 | init_waitqueue_head(&client->auth_wq); | ||
362 | client->auth_err = 0; | ||
363 | |||
364 | client->extra_mon_dispatch = NULL; | ||
365 | client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT; | ||
366 | client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT; | ||
367 | |||
368 | client->msgr = NULL; | ||
369 | |||
370 | /* subsystems */ | ||
371 | err = ceph_monc_init(&client->monc, client); | ||
372 | if (err < 0) | ||
373 | goto fail; | ||
374 | err = ceph_osdc_init(&client->osdc, client); | ||
375 | if (err < 0) | ||
376 | goto fail_monc; | ||
377 | |||
378 | return client; | ||
379 | |||
380 | fail_monc: | ||
381 | ceph_monc_stop(&client->monc); | ||
382 | fail: | ||
383 | kfree(client); | ||
384 | return ERR_PTR(err); | ||
385 | } | ||
386 | EXPORT_SYMBOL(ceph_create_client); | ||
387 | |||
388 | void ceph_destroy_client(struct ceph_client *client) | ||
389 | { | ||
390 | dout("destroy_client %p\n", client); | ||
391 | |||
392 | /* unmount */ | ||
393 | ceph_osdc_stop(&client->osdc); | ||
394 | |||
395 | /* | ||
396 | * make sure mds and osd connections close out before destroying | ||
397 | * the auth module, which is needed to free those connections' | ||
398 | * ceph_authorizers. | ||
399 | */ | ||
400 | ceph_msgr_flush(); | ||
401 | |||
402 | ceph_monc_stop(&client->monc); | ||
403 | |||
404 | ceph_debugfs_client_cleanup(client); | ||
405 | |||
406 | if (client->msgr) | ||
407 | ceph_messenger_destroy(client->msgr); | ||
408 | |||
409 | ceph_destroy_options(client->options); | ||
410 | |||
411 | kfree(client); | ||
412 | dout("destroy_client %p done\n", client); | ||
413 | } | ||
414 | EXPORT_SYMBOL(ceph_destroy_client); | ||
415 | |||
416 | /* | ||
417 | * true if we have the mon map (and have thus joined the cluster) | ||
418 | */ | ||
419 | static int have_mon_and_osd_map(struct ceph_client *client) | ||
420 | { | ||
421 | return client->monc.monmap && client->monc.monmap->epoch && | ||
422 | client->osdc.osdmap && client->osdc.osdmap->epoch; | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * mount: join the ceph cluster, and open root directory. | ||
427 | */ | ||
428 | int __ceph_open_session(struct ceph_client *client, unsigned long started) | ||
429 | { | ||
430 | struct ceph_entity_addr *myaddr = NULL; | ||
431 | int err; | ||
432 | unsigned long timeout = client->options->mount_timeout * HZ; | ||
433 | |||
434 | /* initialize the messenger */ | ||
435 | if (client->msgr == NULL) { | ||
436 | if (ceph_test_opt(client, MYIP)) | ||
437 | myaddr = &client->options->my_addr; | ||
438 | client->msgr = ceph_messenger_create(myaddr, | ||
439 | client->supported_features, | ||
440 | client->required_features); | ||
441 | if (IS_ERR(client->msgr)) { | ||
442 | client->msgr = NULL; | ||
443 | return PTR_ERR(client->msgr); | ||
444 | } | ||
445 | client->msgr->nocrc = ceph_test_opt(client, NOCRC); | ||
446 | } | ||
447 | |||
448 | /* open session, and wait for mon and osd maps */ | ||
449 | err = ceph_monc_open_session(&client->monc); | ||
450 | if (err < 0) | ||
451 | return err; | ||
452 | |||
453 | while (!have_mon_and_osd_map(client)) { | ||
454 | err = -EIO; | ||
455 | if (timeout && time_after_eq(jiffies, started + timeout)) | ||
456 | return err; | ||
457 | |||
458 | /* wait */ | ||
459 | dout("mount waiting for mon_map\n"); | ||
460 | err = wait_event_interruptible_timeout(client->auth_wq, | ||
461 | have_mon_and_osd_map(client) || (client->auth_err < 0), | ||
462 | timeout); | ||
463 | if (err == -EINTR || err == -ERESTARTSYS) | ||
464 | return err; | ||
465 | if (client->auth_err < 0) | ||
466 | return client->auth_err; | ||
467 | } | ||
468 | |||
469 | return 0; | ||
470 | } | ||
471 | EXPORT_SYMBOL(__ceph_open_session); | ||
472 | |||
473 | |||
474 | int ceph_open_session(struct ceph_client *client) | ||
475 | { | ||
476 | int ret; | ||
477 | unsigned long started = jiffies; /* note the start time */ | ||
478 | |||
479 | dout("open_session start\n"); | ||
480 | mutex_lock(&client->mount_mutex); | ||
481 | |||
482 | ret = __ceph_open_session(client, started); | ||
483 | |||
484 | mutex_unlock(&client->mount_mutex); | ||
485 | return ret; | ||
486 | } | ||
487 | EXPORT_SYMBOL(ceph_open_session); | ||
488 | |||
489 | |||
490 | static int __init init_ceph_lib(void) | ||
491 | { | ||
492 | int ret = 0; | ||
493 | |||
494 | ret = ceph_debugfs_init(); | ||
495 | if (ret < 0) | ||
496 | goto out; | ||
497 | |||
498 | ret = ceph_msgr_init(); | ||
499 | if (ret < 0) | ||
500 | goto out_debugfs; | ||
501 | |||
502 | pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", | ||
503 | CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, | ||
504 | CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, | ||
505 | CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); | ||
506 | |||
507 | return 0; | ||
508 | |||
509 | out_debugfs: | ||
510 | ceph_debugfs_cleanup(); | ||
511 | out: | ||
512 | return ret; | ||
513 | } | ||
514 | |||
515 | static void __exit exit_ceph_lib(void) | ||
516 | { | ||
517 | dout("exit_ceph_lib\n"); | ||
518 | ceph_msgr_exit(); | ||
519 | ceph_debugfs_cleanup(); | ||
520 | } | ||
521 | |||
522 | module_init(init_ceph_lib); | ||
523 | module_exit(exit_ceph_lib); | ||
524 | |||
525 | MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); | ||
526 | MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); | ||
527 | MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); | ||
528 | MODULE_DESCRIPTION("Ceph filesystem for Linux"); | ||
529 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/ceph/ceph_fs.c b/net/ceph/ceph_fs.c index 3ac6cc7c1156..a3a3a31d3c37 100644 --- a/fs/ceph/ceph_fs.c +++ b/net/ceph/ceph_fs.c | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * Some non-inline ceph helpers | 2 | * Some non-inline ceph helpers |
3 | */ | 3 | */ |
4 | #include "types.h" | 4 | #include <linux/module.h> |
5 | #include <linux/ceph/types.h> | ||
5 | 6 | ||
6 | /* | 7 | /* |
7 | * return true if @layout appears to be valid | 8 | * return true if @layout appears to be valid |
@@ -52,6 +53,7 @@ int ceph_flags_to_mode(int flags) | |||
52 | 53 | ||
53 | return mode; | 54 | return mode; |
54 | } | 55 | } |
56 | EXPORT_SYMBOL(ceph_flags_to_mode); | ||
55 | 57 | ||
56 | int ceph_caps_for_mode(int mode) | 58 | int ceph_caps_for_mode(int mode) |
57 | { | 59 | { |
@@ -70,3 +72,4 @@ int ceph_caps_for_mode(int mode) | |||
70 | 72 | ||
71 | return caps; | 73 | return caps; |
72 | } | 74 | } |
75 | EXPORT_SYMBOL(ceph_caps_for_mode); | ||
diff --git a/fs/ceph/ceph_hash.c b/net/ceph/ceph_hash.c index bd570015d147..815ef8826796 100644 --- a/fs/ceph/ceph_hash.c +++ b/net/ceph/ceph_hash.c | |||
@@ -1,5 +1,5 @@ | |||
1 | 1 | ||
2 | #include "types.h" | 2 | #include <linux/ceph/types.h> |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * Robert Jenkin's hash function. | 5 | * Robert Jenkin's hash function. |
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c new file mode 100644 index 000000000000..3fbda04de29c --- /dev/null +++ b/net/ceph/ceph_strings.c | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | * Ceph string constants | ||
3 | */ | ||
4 | #include <linux/module.h> | ||
5 | #include <linux/ceph/types.h> | ||
6 | |||
7 | const char *ceph_entity_type_name(int type) | ||
8 | { | ||
9 | switch (type) { | ||
10 | case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||
11 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||
12 | case CEPH_ENTITY_TYPE_MON: return "mon"; | ||
13 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||
14 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; | ||
15 | default: return "unknown"; | ||
16 | } | ||
17 | } | ||
18 | |||
19 | const char *ceph_osd_op_name(int op) | ||
20 | { | ||
21 | switch (op) { | ||
22 | case CEPH_OSD_OP_READ: return "read"; | ||
23 | case CEPH_OSD_OP_STAT: return "stat"; | ||
24 | |||
25 | case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; | ||
26 | |||
27 | case CEPH_OSD_OP_WRITE: return "write"; | ||
28 | case CEPH_OSD_OP_DELETE: return "delete"; | ||
29 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | ||
30 | case CEPH_OSD_OP_ZERO: return "zero"; | ||
31 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | ||
32 | case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||
33 | |||
34 | case CEPH_OSD_OP_APPEND: return "append"; | ||
35 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | ||
36 | case CEPH_OSD_OP_SETTRUNC: return "settrunc"; | ||
37 | case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; | ||
38 | |||
39 | case CEPH_OSD_OP_TMAPUP: return "tmapup"; | ||
40 | case CEPH_OSD_OP_TMAPGET: return "tmapget"; | ||
41 | case CEPH_OSD_OP_TMAPPUT: return "tmapput"; | ||
42 | |||
43 | case CEPH_OSD_OP_GETXATTR: return "getxattr"; | ||
44 | case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; | ||
45 | case CEPH_OSD_OP_SETXATTR: return "setxattr"; | ||
46 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | ||
47 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | ||
48 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | ||
49 | case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||
50 | |||
51 | case CEPH_OSD_OP_PULL: return "pull"; | ||
52 | case CEPH_OSD_OP_PUSH: return "push"; | ||
53 | case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; | ||
54 | case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; | ||
55 | case CEPH_OSD_OP_SCRUB: return "scrub"; | ||
56 | |||
57 | case CEPH_OSD_OP_WRLOCK: return "wrlock"; | ||
58 | case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; | ||
59 | case CEPH_OSD_OP_RDLOCK: return "rdlock"; | ||
60 | case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; | ||
61 | case CEPH_OSD_OP_UPLOCK: return "uplock"; | ||
62 | case CEPH_OSD_OP_DNLOCK: return "dnlock"; | ||
63 | |||
64 | case CEPH_OSD_OP_CALL: return "call"; | ||
65 | |||
66 | case CEPH_OSD_OP_PGLS: return "pgls"; | ||
67 | } | ||
68 | return "???"; | ||
69 | } | ||
70 | |||
71 | |||
72 | const char *ceph_pool_op_name(int op) | ||
73 | { | ||
74 | switch (op) { | ||
75 | case POOL_OP_CREATE: return "create"; | ||
76 | case POOL_OP_DELETE: return "delete"; | ||
77 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
78 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
79 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
80 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
81 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
82 | } | ||
83 | return "???"; | ||
84 | } | ||
diff --git a/fs/ceph/crush/crush.c b/net/ceph/crush/crush.c index fabd302e5779..d6ebb13a18a4 100644 --- a/fs/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c | |||
@@ -8,7 +8,7 @@ | |||
8 | # define BUG_ON(x) assert(!(x)) | 8 | # define BUG_ON(x) assert(!(x)) |
9 | #endif | 9 | #endif |
10 | 10 | ||
11 | #include "crush.h" | 11 | #include <linux/crush/crush.h> |
12 | 12 | ||
13 | const char *crush_bucket_alg_name(int alg) | 13 | const char *crush_bucket_alg_name(int alg) |
14 | { | 14 | { |
diff --git a/fs/ceph/crush/hash.c b/net/ceph/crush/hash.c index 5873aed694bf..5bb63e37a8a1 100644 --- a/fs/ceph/crush/hash.c +++ b/net/ceph/crush/hash.c | |||
@@ -1,6 +1,6 @@ | |||
1 | 1 | ||
2 | #include <linux/types.h> | 2 | #include <linux/types.h> |
3 | #include "hash.h" | 3 | #include <linux/crush/hash.h> |
4 | 4 | ||
5 | /* | 5 | /* |
6 | * Robert Jenkins' function for mixing 32-bit values | 6 | * Robert Jenkins' function for mixing 32-bit values |
diff --git a/fs/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index a4eec133258e..42599e31dcad 100644 --- a/fs/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c | |||
@@ -18,8 +18,8 @@ | |||
18 | # define kfree(x) free(x) | 18 | # define kfree(x) free(x) |
19 | #endif | 19 | #endif |
20 | 20 | ||
21 | #include "crush.h" | 21 | #include <linux/crush/crush.h> |
22 | #include "hash.h" | 22 | #include <linux/crush/hash.h> |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * Implement the core CRUSH mapping algorithm. | 25 | * Implement the core CRUSH mapping algorithm. |
diff --git a/fs/ceph/crypto.c b/net/ceph/crypto.c index a3e627f63293..7b505b0c983f 100644 --- a/fs/ceph/crypto.c +++ b/net/ceph/crypto.c | |||
@@ -1,13 +1,13 @@ | |||
1 | 1 | ||
2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
3 | 3 | ||
4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
5 | #include <linux/scatterlist.h> | 5 | #include <linux/scatterlist.h> |
6 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
7 | #include <crypto/hash.h> | 7 | #include <crypto/hash.h> |
8 | 8 | ||
9 | #include <linux/ceph/decode.h> | ||
9 | #include "crypto.h" | 10 | #include "crypto.h" |
10 | #include "decode.h" | ||
11 | 11 | ||
12 | int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) | 12 | int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) |
13 | { | 13 | { |
diff --git a/fs/ceph/crypto.h b/net/ceph/crypto.h index bdf38607323c..f9eccace592b 100644 --- a/fs/ceph/crypto.h +++ b/net/ceph/crypto.h | |||
@@ -1,8 +1,8 @@ | |||
1 | #ifndef _FS_CEPH_CRYPTO_H | 1 | #ifndef _FS_CEPH_CRYPTO_H |
2 | #define _FS_CEPH_CRYPTO_H | 2 | #define _FS_CEPH_CRYPTO_H |
3 | 3 | ||
4 | #include "types.h" | 4 | #include <linux/ceph/types.h> |
5 | #include "buffer.h" | 5 | #include <linux/ceph/buffer.h> |
6 | 6 | ||
7 | /* | 7 | /* |
8 | * cryptographic secret | 8 | * cryptographic secret |
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c new file mode 100644 index 000000000000..27d4ea315d12 --- /dev/null +++ b/net/ceph/debugfs.c | |||
@@ -0,0 +1,267 @@ | |||
1 | #include <linux/ceph/ceph_debug.h> | ||
2 | |||
3 | #include <linux/device.h> | ||
4 | #include <linux/slab.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/ctype.h> | ||
7 | #include <linux/debugfs.h> | ||
8 | #include <linux/seq_file.h> | ||
9 | |||
10 | #include <linux/ceph/libceph.h> | ||
11 | #include <linux/ceph/mon_client.h> | ||
12 | #include <linux/ceph/auth.h> | ||
13 | #include <linux/ceph/debugfs.h> | ||
14 | |||
15 | #ifdef CONFIG_DEBUG_FS | ||
16 | |||
17 | /* | ||
18 | * Implement /sys/kernel/debug/ceph fun | ||
19 | * | ||
20 | * /sys/kernel/debug/ceph/client* - an instance of the ceph client | ||
21 | * .../osdmap - current osdmap | ||
22 | * .../monmap - current monmap | ||
23 | * .../osdc - active osd requests | ||
24 | * .../monc - mon client state | ||
25 | * .../dentry_lru - dump contents of dentry lru | ||
26 | * .../caps - expose cap (reservation) stats | ||
27 | * .../bdi - symlink to ../../bdi/something | ||
28 | */ | ||
29 | |||
30 | static struct dentry *ceph_debugfs_dir; | ||
31 | |||
32 | static int monmap_show(struct seq_file *s, void *p) | ||
33 | { | ||
34 | int i; | ||
35 | struct ceph_client *client = s->private; | ||
36 | |||
37 | if (client->monc.monmap == NULL) | ||
38 | return 0; | ||
39 | |||
40 | seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); | ||
41 | for (i = 0; i < client->monc.monmap->num_mon; i++) { | ||
42 | struct ceph_entity_inst *inst = | ||
43 | &client->monc.monmap->mon_inst[i]; | ||
44 | |||
45 | seq_printf(s, "\t%s%lld\t%s\n", | ||
46 | ENTITY_NAME(inst->name), | ||
47 | ceph_pr_addr(&inst->addr.in_addr)); | ||
48 | } | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static int osdmap_show(struct seq_file *s, void *p) | ||
53 | { | ||
54 | int i; | ||
55 | struct ceph_client *client = s->private; | ||
56 | struct rb_node *n; | ||
57 | |||
58 | if (client->osdc.osdmap == NULL) | ||
59 | return 0; | ||
60 | seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); | ||
61 | seq_printf(s, "flags%s%s\n", | ||
62 | (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? | ||
63 | " NEARFULL" : "", | ||
64 | (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? | ||
65 | " FULL" : ""); | ||
66 | for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { | ||
67 | struct ceph_pg_pool_info *pool = | ||
68 | rb_entry(n, struct ceph_pg_pool_info, node); | ||
69 | seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", | ||
70 | pool->id, pool->v.pg_num, pool->pg_num_mask, | ||
71 | pool->v.lpg_num, pool->lpg_num_mask); | ||
72 | } | ||
73 | for (i = 0; i < client->osdc.osdmap->max_osd; i++) { | ||
74 | struct ceph_entity_addr *addr = | ||
75 | &client->osdc.osdmap->osd_addr[i]; | ||
76 | int state = client->osdc.osdmap->osd_state[i]; | ||
77 | char sb[64]; | ||
78 | |||
79 | seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", | ||
80 | i, ceph_pr_addr(&addr->in_addr), | ||
81 | ((client->osdc.osdmap->osd_weight[i]*100) >> 16), | ||
82 | ceph_osdmap_state_str(sb, sizeof(sb), state)); | ||
83 | } | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | static int monc_show(struct seq_file *s, void *p) | ||
88 | { | ||
89 | struct ceph_client *client = s->private; | ||
90 | struct ceph_mon_generic_request *req; | ||
91 | struct ceph_mon_client *monc = &client->monc; | ||
92 | struct rb_node *rp; | ||
93 | |||
94 | mutex_lock(&monc->mutex); | ||
95 | |||
96 | if (monc->have_mdsmap) | ||
97 | seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); | ||
98 | if (monc->have_osdmap) | ||
99 | seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); | ||
100 | if (monc->want_next_osdmap) | ||
101 | seq_printf(s, "want next osdmap\n"); | ||
102 | |||
103 | for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { | ||
104 | __u16 op; | ||
105 | req = rb_entry(rp, struct ceph_mon_generic_request, node); | ||
106 | op = le16_to_cpu(req->request->hdr.type); | ||
107 | if (op == CEPH_MSG_STATFS) | ||
108 | seq_printf(s, "%lld statfs\n", req->tid); | ||
109 | else | ||
110 | seq_printf(s, "%lld unknown\n", req->tid); | ||
111 | } | ||
112 | |||
113 | mutex_unlock(&monc->mutex); | ||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | static int osdc_show(struct seq_file *s, void *pp) | ||
118 | { | ||
119 | struct ceph_client *client = s->private; | ||
120 | struct ceph_osd_client *osdc = &client->osdc; | ||
121 | struct rb_node *p; | ||
122 | |||
123 | mutex_lock(&osdc->request_mutex); | ||
124 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | ||
125 | struct ceph_osd_request *req; | ||
126 | struct ceph_osd_request_head *head; | ||
127 | struct ceph_osd_op *op; | ||
128 | int num_ops; | ||
129 | int opcode, olen; | ||
130 | int i; | ||
131 | |||
132 | req = rb_entry(p, struct ceph_osd_request, r_node); | ||
133 | |||
134 | seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, | ||
135 | req->r_osd ? req->r_osd->o_osd : -1, | ||
136 | le32_to_cpu(req->r_pgid.pool), | ||
137 | le16_to_cpu(req->r_pgid.ps)); | ||
138 | |||
139 | head = req->r_request->front.iov_base; | ||
140 | op = (void *)(head + 1); | ||
141 | |||
142 | num_ops = le16_to_cpu(head->num_ops); | ||
143 | olen = le32_to_cpu(head->object_len); | ||
144 | seq_printf(s, "%.*s", olen, | ||
145 | (const char *)(head->ops + num_ops)); | ||
146 | |||
147 | if (req->r_reassert_version.epoch) | ||
148 | seq_printf(s, "\t%u'%llu", | ||
149 | (unsigned)le32_to_cpu(req->r_reassert_version.epoch), | ||
150 | le64_to_cpu(req->r_reassert_version.version)); | ||
151 | else | ||
152 | seq_printf(s, "\t"); | ||
153 | |||
154 | for (i = 0; i < num_ops; i++) { | ||
155 | opcode = le16_to_cpu(op->op); | ||
156 | seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); | ||
157 | op++; | ||
158 | } | ||
159 | |||
160 | seq_printf(s, "\n"); | ||
161 | } | ||
162 | mutex_unlock(&osdc->request_mutex); | ||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | CEPH_DEFINE_SHOW_FUNC(monmap_show) | ||
167 | CEPH_DEFINE_SHOW_FUNC(osdmap_show) | ||
168 | CEPH_DEFINE_SHOW_FUNC(monc_show) | ||
169 | CEPH_DEFINE_SHOW_FUNC(osdc_show) | ||
170 | |||
171 | int ceph_debugfs_init(void) | ||
172 | { | ||
173 | ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); | ||
174 | if (!ceph_debugfs_dir) | ||
175 | return -ENOMEM; | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | void ceph_debugfs_cleanup(void) | ||
180 | { | ||
181 | debugfs_remove(ceph_debugfs_dir); | ||
182 | } | ||
183 | |||
184 | int ceph_debugfs_client_init(struct ceph_client *client) | ||
185 | { | ||
186 | int ret = -ENOMEM; | ||
187 | char name[80]; | ||
188 | |||
189 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, | ||
190 | client->monc.auth->global_id); | ||
191 | |||
192 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | ||
193 | if (!client->debugfs_dir) | ||
194 | goto out; | ||
195 | |||
196 | client->monc.debugfs_file = debugfs_create_file("monc", | ||
197 | 0600, | ||
198 | client->debugfs_dir, | ||
199 | client, | ||
200 | &monc_show_fops); | ||
201 | if (!client->monc.debugfs_file) | ||
202 | goto out; | ||
203 | |||
204 | client->osdc.debugfs_file = debugfs_create_file("osdc", | ||
205 | 0600, | ||
206 | client->debugfs_dir, | ||
207 | client, | ||
208 | &osdc_show_fops); | ||
209 | if (!client->osdc.debugfs_file) | ||
210 | goto out; | ||
211 | |||
212 | client->debugfs_monmap = debugfs_create_file("monmap", | ||
213 | 0600, | ||
214 | client->debugfs_dir, | ||
215 | client, | ||
216 | &monmap_show_fops); | ||
217 | if (!client->debugfs_monmap) | ||
218 | goto out; | ||
219 | |||
220 | client->debugfs_osdmap = debugfs_create_file("osdmap", | ||
221 | 0600, | ||
222 | client->debugfs_dir, | ||
223 | client, | ||
224 | &osdmap_show_fops); | ||
225 | if (!client->debugfs_osdmap) | ||
226 | goto out; | ||
227 | |||
228 | return 0; | ||
229 | |||
230 | out: | ||
231 | ceph_debugfs_client_cleanup(client); | ||
232 | return ret; | ||
233 | } | ||
234 | |||
235 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||
236 | { | ||
237 | debugfs_remove(client->debugfs_osdmap); | ||
238 | debugfs_remove(client->debugfs_monmap); | ||
239 | debugfs_remove(client->osdc.debugfs_file); | ||
240 | debugfs_remove(client->monc.debugfs_file); | ||
241 | debugfs_remove(client->debugfs_dir); | ||
242 | } | ||
243 | |||
244 | #else /* CONFIG_DEBUG_FS */ | ||
245 | |||
246 | int ceph_debugfs_init(void) | ||
247 | { | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | void ceph_debugfs_cleanup(void) | ||
252 | { | ||
253 | } | ||
254 | |||
255 | int ceph_debugfs_client_init(struct ceph_client *client) | ||
256 | { | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||
261 | { | ||
262 | } | ||
263 | |||
264 | #endif /* CONFIG_DEBUG_FS */ | ||
265 | |||
266 | EXPORT_SYMBOL(ceph_debugfs_init); | ||
267 | EXPORT_SYMBOL(ceph_debugfs_cleanup); | ||
diff --git a/fs/ceph/messenger.c b/net/ceph/messenger.c index 2502d76fcec1..0e8157ee5d43 100644 --- a/fs/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/crc32c.h> | 3 | #include <linux/crc32c.h> |
4 | #include <linux/ctype.h> | 4 | #include <linux/ctype.h> |
@@ -9,12 +9,14 @@ | |||
9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
10 | #include <linux/socket.h> | 10 | #include <linux/socket.h> |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/bio.h> | ||
13 | #include <linux/blkdev.h> | ||
12 | #include <net/tcp.h> | 14 | #include <net/tcp.h> |
13 | 15 | ||
14 | #include "super.h" | 16 | #include <linux/ceph/libceph.h> |
15 | #include "messenger.h" | 17 | #include <linux/ceph/messenger.h> |
16 | #include "decode.h" | 18 | #include <linux/ceph/decode.h> |
17 | #include "pagelist.h" | 19 | #include <linux/ceph/pagelist.h> |
18 | 20 | ||
19 | /* | 21 | /* |
20 | * Ceph uses the messenger to exchange ceph_msg messages with other | 22 | * Ceph uses the messenger to exchange ceph_msg messages with other |
@@ -48,7 +50,7 @@ static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; | |||
48 | static DEFINE_SPINLOCK(addr_str_lock); | 50 | static DEFINE_SPINLOCK(addr_str_lock); |
49 | static int last_addr_str; | 51 | static int last_addr_str; |
50 | 52 | ||
51 | const char *pr_addr(const struct sockaddr_storage *ss) | 53 | const char *ceph_pr_addr(const struct sockaddr_storage *ss) |
52 | { | 54 | { |
53 | int i; | 55 | int i; |
54 | char *s; | 56 | char *s; |
@@ -79,6 +81,7 @@ const char *pr_addr(const struct sockaddr_storage *ss) | |||
79 | 81 | ||
80 | return s; | 82 | return s; |
81 | } | 83 | } |
84 | EXPORT_SYMBOL(ceph_pr_addr); | ||
82 | 85 | ||
83 | static void encode_my_addr(struct ceph_messenger *msgr) | 86 | static void encode_my_addr(struct ceph_messenger *msgr) |
84 | { | 87 | { |
@@ -91,7 +94,7 @@ static void encode_my_addr(struct ceph_messenger *msgr) | |||
91 | */ | 94 | */ |
92 | struct workqueue_struct *ceph_msgr_wq; | 95 | struct workqueue_struct *ceph_msgr_wq; |
93 | 96 | ||
94 | int __init ceph_msgr_init(void) | 97 | int ceph_msgr_init(void) |
95 | { | 98 | { |
96 | ceph_msgr_wq = create_workqueue("ceph-msgr"); | 99 | ceph_msgr_wq = create_workqueue("ceph-msgr"); |
97 | if (IS_ERR(ceph_msgr_wq)) { | 100 | if (IS_ERR(ceph_msgr_wq)) { |
@@ -102,16 +105,19 @@ int __init ceph_msgr_init(void) | |||
102 | } | 105 | } |
103 | return 0; | 106 | return 0; |
104 | } | 107 | } |
108 | EXPORT_SYMBOL(ceph_msgr_init); | ||
105 | 109 | ||
106 | void ceph_msgr_exit(void) | 110 | void ceph_msgr_exit(void) |
107 | { | 111 | { |
108 | destroy_workqueue(ceph_msgr_wq); | 112 | destroy_workqueue(ceph_msgr_wq); |
109 | } | 113 | } |
114 | EXPORT_SYMBOL(ceph_msgr_exit); | ||
110 | 115 | ||
111 | void ceph_msgr_flush(void) | 116 | void ceph_msgr_flush(void) |
112 | { | 117 | { |
113 | flush_workqueue(ceph_msgr_wq); | 118 | flush_workqueue(ceph_msgr_wq); |
114 | } | 119 | } |
120 | EXPORT_SYMBOL(ceph_msgr_flush); | ||
115 | 121 | ||
116 | 122 | ||
117 | /* | 123 | /* |
@@ -221,19 +227,19 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) | |||
221 | 227 | ||
222 | set_sock_callbacks(sock, con); | 228 | set_sock_callbacks(sock, con); |
223 | 229 | ||
224 | dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); | 230 | dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); |
225 | 231 | ||
226 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), | 232 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), |
227 | O_NONBLOCK); | 233 | O_NONBLOCK); |
228 | if (ret == -EINPROGRESS) { | 234 | if (ret == -EINPROGRESS) { |
229 | dout("connect %s EINPROGRESS sk_state = %u\n", | 235 | dout("connect %s EINPROGRESS sk_state = %u\n", |
230 | pr_addr(&con->peer_addr.in_addr), | 236 | ceph_pr_addr(&con->peer_addr.in_addr), |
231 | sock->sk->sk_state); | 237 | sock->sk->sk_state); |
232 | ret = 0; | 238 | ret = 0; |
233 | } | 239 | } |
234 | if (ret < 0) { | 240 | if (ret < 0) { |
235 | pr_err("connect %s error %d\n", | 241 | pr_err("connect %s error %d\n", |
236 | pr_addr(&con->peer_addr.in_addr), ret); | 242 | ceph_pr_addr(&con->peer_addr.in_addr), ret); |
237 | sock_release(sock); | 243 | sock_release(sock); |
238 | con->sock = NULL; | 244 | con->sock = NULL; |
239 | con->error_msg = "connect error"; | 245 | con->error_msg = "connect error"; |
@@ -334,7 +340,8 @@ static void reset_connection(struct ceph_connection *con) | |||
334 | */ | 340 | */ |
335 | void ceph_con_close(struct ceph_connection *con) | 341 | void ceph_con_close(struct ceph_connection *con) |
336 | { | 342 | { |
337 | dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr)); | 343 | dout("con_close %p peer %s\n", con, |
344 | ceph_pr_addr(&con->peer_addr.in_addr)); | ||
338 | set_bit(CLOSED, &con->state); /* in case there's queued work */ | 345 | set_bit(CLOSED, &con->state); /* in case there's queued work */ |
339 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ | 346 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ |
340 | clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ | 347 | clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ |
@@ -347,19 +354,21 @@ void ceph_con_close(struct ceph_connection *con) | |||
347 | mutex_unlock(&con->mutex); | 354 | mutex_unlock(&con->mutex); |
348 | queue_con(con); | 355 | queue_con(con); |
349 | } | 356 | } |
357 | EXPORT_SYMBOL(ceph_con_close); | ||
350 | 358 | ||
351 | /* | 359 | /* |
352 | * Reopen a closed connection, with a new peer address. | 360 | * Reopen a closed connection, with a new peer address. |
353 | */ | 361 | */ |
354 | void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) | 362 | void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) |
355 | { | 363 | { |
356 | dout("con_open %p %s\n", con, pr_addr(&addr->in_addr)); | 364 | dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); |
357 | set_bit(OPENING, &con->state); | 365 | set_bit(OPENING, &con->state); |
358 | clear_bit(CLOSED, &con->state); | 366 | clear_bit(CLOSED, &con->state); |
359 | memcpy(&con->peer_addr, addr, sizeof(*addr)); | 367 | memcpy(&con->peer_addr, addr, sizeof(*addr)); |
360 | con->delay = 0; /* reset backoff memory */ | 368 | con->delay = 0; /* reset backoff memory */ |
361 | queue_con(con); | 369 | queue_con(con); |
362 | } | 370 | } |
371 | EXPORT_SYMBOL(ceph_con_open); | ||
363 | 372 | ||
364 | /* | 373 | /* |
365 | * return true if this connection ever successfully opened | 374 | * return true if this connection ever successfully opened |
@@ -406,6 +415,7 @@ void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) | |||
406 | INIT_LIST_HEAD(&con->out_sent); | 415 | INIT_LIST_HEAD(&con->out_sent); |
407 | INIT_DELAYED_WORK(&con->work, con_work); | 416 | INIT_DELAYED_WORK(&con->work, con_work); |
408 | } | 417 | } |
418 | EXPORT_SYMBOL(ceph_con_init); | ||
409 | 419 | ||
410 | 420 | ||
411 | /* | 421 | /* |
@@ -529,8 +539,11 @@ static void prepare_write_message(struct ceph_connection *con) | |||
529 | if (le32_to_cpu(m->hdr.data_len) > 0) { | 539 | if (le32_to_cpu(m->hdr.data_len) > 0) { |
530 | /* initialize page iterator */ | 540 | /* initialize page iterator */ |
531 | con->out_msg_pos.page = 0; | 541 | con->out_msg_pos.page = 0; |
532 | con->out_msg_pos.page_pos = | 542 | if (m->pages) |
533 | le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; | 543 | con->out_msg_pos.page_pos = |
544 | le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; | ||
545 | else | ||
546 | con->out_msg_pos.page_pos = 0; | ||
534 | con->out_msg_pos.data_pos = 0; | 547 | con->out_msg_pos.data_pos = 0; |
535 | con->out_msg_pos.did_page_crc = 0; | 548 | con->out_msg_pos.did_page_crc = 0; |
536 | con->out_more = 1; /* data + footer will follow */ | 549 | con->out_more = 1; /* data + footer will follow */ |
@@ -647,7 +660,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 660 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
648 | con->connect_seq, global_seq, proto); | 661 | con->connect_seq, global_seq, proto); |
649 | 662 | ||
650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED); | 663 | con->out_connect.features = cpu_to_le64(msgr->supported_features); |
651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 664 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 665 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
653 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 666 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
@@ -712,6 +725,31 @@ out: | |||
712 | return ret; /* done! */ | 725 | return ret; /* done! */ |
713 | } | 726 | } |
714 | 727 | ||
728 | #ifdef CONFIG_BLOCK | ||
729 | static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) | ||
730 | { | ||
731 | if (!bio) { | ||
732 | *iter = NULL; | ||
733 | *seg = 0; | ||
734 | return; | ||
735 | } | ||
736 | *iter = bio; | ||
737 | *seg = bio->bi_idx; | ||
738 | } | ||
739 | |||
740 | static void iter_bio_next(struct bio **bio_iter, int *seg) | ||
741 | { | ||
742 | if (*bio_iter == NULL) | ||
743 | return; | ||
744 | |||
745 | BUG_ON(*seg >= (*bio_iter)->bi_vcnt); | ||
746 | |||
747 | (*seg)++; | ||
748 | if (*seg == (*bio_iter)->bi_vcnt) | ||
749 | init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); | ||
750 | } | ||
751 | #endif | ||
752 | |||
715 | /* | 753 | /* |
716 | * Write as much message data payload as we can. If we finish, queue | 754 | * Write as much message data payload as we can. If we finish, queue |
717 | * up the footer. | 755 | * up the footer. |
@@ -726,21 +764,46 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
726 | size_t len; | 764 | size_t len; |
727 | int crc = con->msgr->nocrc; | 765 | int crc = con->msgr->nocrc; |
728 | int ret; | 766 | int ret; |
767 | int total_max_write; | ||
768 | int in_trail = 0; | ||
769 | size_t trail_len = (msg->trail ? msg->trail->length : 0); | ||
729 | 770 | ||
730 | dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", | 771 | dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", |
731 | con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, | 772 | con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, |
732 | con->out_msg_pos.page_pos); | 773 | con->out_msg_pos.page_pos); |
733 | 774 | ||
734 | while (con->out_msg_pos.page < con->out_msg->nr_pages) { | 775 | #ifdef CONFIG_BLOCK |
776 | if (msg->bio && !msg->bio_iter) | ||
777 | init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); | ||
778 | #endif | ||
779 | |||
780 | while (data_len > con->out_msg_pos.data_pos) { | ||
735 | struct page *page = NULL; | 781 | struct page *page = NULL; |
736 | void *kaddr = NULL; | 782 | void *kaddr = NULL; |
783 | int max_write = PAGE_SIZE; | ||
784 | int page_shift = 0; | ||
785 | |||
786 | total_max_write = data_len - trail_len - | ||
787 | con->out_msg_pos.data_pos; | ||
737 | 788 | ||
738 | /* | 789 | /* |
739 | * if we are calculating the data crc (the default), we need | 790 | * if we are calculating the data crc (the default), we need |
740 | * to map the page. if our pages[] has been revoked, use the | 791 | * to map the page. if our pages[] has been revoked, use the |
741 | * zero page. | 792 | * zero page. |
742 | */ | 793 | */ |
743 | if (msg->pages) { | 794 | |
795 | /* have we reached the trail part of the data? */ | ||
796 | if (con->out_msg_pos.data_pos >= data_len - trail_len) { | ||
797 | in_trail = 1; | ||
798 | |||
799 | total_max_write = data_len - con->out_msg_pos.data_pos; | ||
800 | |||
801 | page = list_first_entry(&msg->trail->head, | ||
802 | struct page, lru); | ||
803 | if (crc) | ||
804 | kaddr = kmap(page); | ||
805 | max_write = PAGE_SIZE; | ||
806 | } else if (msg->pages) { | ||
744 | page = msg->pages[con->out_msg_pos.page]; | 807 | page = msg->pages[con->out_msg_pos.page]; |
745 | if (crc) | 808 | if (crc) |
746 | kaddr = kmap(page); | 809 | kaddr = kmap(page); |
@@ -749,13 +812,25 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
749 | struct page, lru); | 812 | struct page, lru); |
750 | if (crc) | 813 | if (crc) |
751 | kaddr = kmap(page); | 814 | kaddr = kmap(page); |
815 | #ifdef CONFIG_BLOCK | ||
816 | } else if (msg->bio) { | ||
817 | struct bio_vec *bv; | ||
818 | |||
819 | bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); | ||
820 | page = bv->bv_page; | ||
821 | page_shift = bv->bv_offset; | ||
822 | if (crc) | ||
823 | kaddr = kmap(page) + page_shift; | ||
824 | max_write = bv->bv_len; | ||
825 | #endif | ||
752 | } else { | 826 | } else { |
753 | page = con->msgr->zero_page; | 827 | page = con->msgr->zero_page; |
754 | if (crc) | 828 | if (crc) |
755 | kaddr = page_address(con->msgr->zero_page); | 829 | kaddr = page_address(con->msgr->zero_page); |
756 | } | 830 | } |
757 | len = min((int)(PAGE_SIZE - con->out_msg_pos.page_pos), | 831 | len = min_t(int, max_write - con->out_msg_pos.page_pos, |
758 | (int)(data_len - con->out_msg_pos.data_pos)); | 832 | total_max_write); |
833 | |||
759 | if (crc && !con->out_msg_pos.did_page_crc) { | 834 | if (crc && !con->out_msg_pos.did_page_crc) { |
760 | void *base = kaddr + con->out_msg_pos.page_pos; | 835 | void *base = kaddr + con->out_msg_pos.page_pos; |
761 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); | 836 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); |
@@ -765,13 +840,14 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
765 | cpu_to_le32(crc32c(tmpcrc, base, len)); | 840 | cpu_to_le32(crc32c(tmpcrc, base, len)); |
766 | con->out_msg_pos.did_page_crc = 1; | 841 | con->out_msg_pos.did_page_crc = 1; |
767 | } | 842 | } |
768 | |||
769 | ret = kernel_sendpage(con->sock, page, | 843 | ret = kernel_sendpage(con->sock, page, |
770 | con->out_msg_pos.page_pos, len, | 844 | con->out_msg_pos.page_pos + page_shift, |
845 | len, | ||
771 | MSG_DONTWAIT | MSG_NOSIGNAL | | 846 | MSG_DONTWAIT | MSG_NOSIGNAL | |
772 | MSG_MORE); | 847 | MSG_MORE); |
773 | 848 | ||
774 | if (crc && (msg->pages || msg->pagelist)) | 849 | if (crc && |
850 | (msg->pages || msg->pagelist || msg->bio || in_trail)) | ||
775 | kunmap(page); | 851 | kunmap(page); |
776 | 852 | ||
777 | if (ret <= 0) | 853 | if (ret <= 0) |
@@ -783,9 +859,16 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
783 | con->out_msg_pos.page_pos = 0; | 859 | con->out_msg_pos.page_pos = 0; |
784 | con->out_msg_pos.page++; | 860 | con->out_msg_pos.page++; |
785 | con->out_msg_pos.did_page_crc = 0; | 861 | con->out_msg_pos.did_page_crc = 0; |
786 | if (msg->pagelist) | 862 | if (in_trail) |
863 | list_move_tail(&page->lru, | ||
864 | &msg->trail->head); | ||
865 | else if (msg->pagelist) | ||
787 | list_move_tail(&page->lru, | 866 | list_move_tail(&page->lru, |
788 | &msg->pagelist->head); | 867 | &msg->pagelist->head); |
868 | #ifdef CONFIG_BLOCK | ||
869 | else if (msg->bio) | ||
870 | iter_bio_next(&msg->bio_iter, &msg->bio_seg); | ||
871 | #endif | ||
789 | } | 872 | } |
790 | } | 873 | } |
791 | 874 | ||
@@ -938,7 +1021,7 @@ static int verify_hello(struct ceph_connection *con) | |||
938 | { | 1021 | { |
939 | if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { | 1022 | if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { |
940 | pr_err("connect to %s got bad banner\n", | 1023 | pr_err("connect to %s got bad banner\n", |
941 | pr_addr(&con->peer_addr.in_addr)); | 1024 | ceph_pr_addr(&con->peer_addr.in_addr)); |
942 | con->error_msg = "protocol error, bad banner"; | 1025 | con->error_msg = "protocol error, bad banner"; |
943 | return -1; | 1026 | return -1; |
944 | } | 1027 | } |
@@ -1041,7 +1124,7 @@ int ceph_parse_ips(const char *c, const char *end, | |||
1041 | 1124 | ||
1042 | addr_set_port(ss, port); | 1125 | addr_set_port(ss, port); |
1043 | 1126 | ||
1044 | dout("parse_ips got %s\n", pr_addr(ss)); | 1127 | dout("parse_ips got %s\n", ceph_pr_addr(ss)); |
1045 | 1128 | ||
1046 | if (p == end) | 1129 | if (p == end) |
1047 | break; | 1130 | break; |
@@ -1061,6 +1144,7 @@ bad: | |||
1061 | pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); | 1144 | pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); |
1062 | return -EINVAL; | 1145 | return -EINVAL; |
1063 | } | 1146 | } |
1147 | EXPORT_SYMBOL(ceph_parse_ips); | ||
1064 | 1148 | ||
1065 | static int process_banner(struct ceph_connection *con) | 1149 | static int process_banner(struct ceph_connection *con) |
1066 | { | 1150 | { |
@@ -1082,9 +1166,9 @@ static int process_banner(struct ceph_connection *con) | |||
1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && | 1166 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && |
1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | 1167 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { |
1084 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", | 1168 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", |
1085 | pr_addr(&con->peer_addr.in_addr), | 1169 | ceph_pr_addr(&con->peer_addr.in_addr), |
1086 | (int)le32_to_cpu(con->peer_addr.nonce), | 1170 | (int)le32_to_cpu(con->peer_addr.nonce), |
1087 | pr_addr(&con->actual_peer_addr.in_addr), | 1171 | ceph_pr_addr(&con->actual_peer_addr.in_addr), |
1088 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); | 1172 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); |
1089 | con->error_msg = "wrong peer at address"; | 1173 | con->error_msg = "wrong peer at address"; |
1090 | return -1; | 1174 | return -1; |
@@ -1102,7 +1186,7 @@ static int process_banner(struct ceph_connection *con) | |||
1102 | addr_set_port(&con->msgr->inst.addr.in_addr, port); | 1186 | addr_set_port(&con->msgr->inst.addr.in_addr, port); |
1103 | encode_my_addr(con->msgr); | 1187 | encode_my_addr(con->msgr); |
1104 | dout("process_banner learned my addr is %s\n", | 1188 | dout("process_banner learned my addr is %s\n", |
1105 | pr_addr(&con->msgr->inst.addr.in_addr)); | 1189 | ceph_pr_addr(&con->msgr->inst.addr.in_addr)); |
1106 | } | 1190 | } |
1107 | 1191 | ||
1108 | set_bit(NEGOTIATING, &con->state); | 1192 | set_bit(NEGOTIATING, &con->state); |
@@ -1123,8 +1207,8 @@ static void fail_protocol(struct ceph_connection *con) | |||
1123 | 1207 | ||
1124 | static int process_connect(struct ceph_connection *con) | 1208 | static int process_connect(struct ceph_connection *con) |
1125 | { | 1209 | { |
1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; | 1210 | u64 sup_feat = con->msgr->supported_features; |
1127 | u64 req_feat = CEPH_FEATURE_REQUIRED; | 1211 | u64 req_feat = con->msgr->required_features; |
1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1212 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
1129 | 1213 | ||
1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1214 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
@@ -1134,7 +1218,7 @@ static int process_connect(struct ceph_connection *con) | |||
1134 | pr_err("%s%lld %s feature set mismatch," | 1218 | pr_err("%s%lld %s feature set mismatch," |
1135 | " my %llx < server's %llx, missing %llx\n", | 1219 | " my %llx < server's %llx, missing %llx\n", |
1136 | ENTITY_NAME(con->peer_name), | 1220 | ENTITY_NAME(con->peer_name), |
1137 | pr_addr(&con->peer_addr.in_addr), | 1221 | ceph_pr_addr(&con->peer_addr.in_addr), |
1138 | sup_feat, server_feat, server_feat & ~sup_feat); | 1222 | sup_feat, server_feat, server_feat & ~sup_feat); |
1139 | con->error_msg = "missing required protocol features"; | 1223 | con->error_msg = "missing required protocol features"; |
1140 | fail_protocol(con); | 1224 | fail_protocol(con); |
@@ -1144,7 +1228,7 @@ static int process_connect(struct ceph_connection *con) | |||
1144 | pr_err("%s%lld %s protocol version mismatch," | 1228 | pr_err("%s%lld %s protocol version mismatch," |
1145 | " my %d != server's %d\n", | 1229 | " my %d != server's %d\n", |
1146 | ENTITY_NAME(con->peer_name), | 1230 | ENTITY_NAME(con->peer_name), |
1147 | pr_addr(&con->peer_addr.in_addr), | 1231 | ceph_pr_addr(&con->peer_addr.in_addr), |
1148 | le32_to_cpu(con->out_connect.protocol_version), | 1232 | le32_to_cpu(con->out_connect.protocol_version), |
1149 | le32_to_cpu(con->in_reply.protocol_version)); | 1233 | le32_to_cpu(con->in_reply.protocol_version)); |
1150 | con->error_msg = "protocol version mismatch"; | 1234 | con->error_msg = "protocol version mismatch"; |
@@ -1178,7 +1262,7 @@ static int process_connect(struct ceph_connection *con) | |||
1178 | le32_to_cpu(con->in_connect.connect_seq)); | 1262 | le32_to_cpu(con->in_connect.connect_seq)); |
1179 | pr_err("%s%lld %s connection reset\n", | 1263 | pr_err("%s%lld %s connection reset\n", |
1180 | ENTITY_NAME(con->peer_name), | 1264 | ENTITY_NAME(con->peer_name), |
1181 | pr_addr(&con->peer_addr.in_addr)); | 1265 | ceph_pr_addr(&con->peer_addr.in_addr)); |
1182 | reset_connection(con); | 1266 | reset_connection(con); |
1183 | prepare_write_connect(con->msgr, con, 0); | 1267 | prepare_write_connect(con->msgr, con, 0); |
1184 | prepare_read_connect(con); | 1268 | prepare_read_connect(con); |
@@ -1223,7 +1307,7 @@ static int process_connect(struct ceph_connection *con) | |||
1223 | pr_err("%s%lld %s protocol feature mismatch," | 1307 | pr_err("%s%lld %s protocol feature mismatch," |
1224 | " my required %llx > server's %llx, need %llx\n", | 1308 | " my required %llx > server's %llx, need %llx\n", |
1225 | ENTITY_NAME(con->peer_name), | 1309 | ENTITY_NAME(con->peer_name), |
1226 | pr_addr(&con->peer_addr.in_addr), | 1310 | ceph_pr_addr(&con->peer_addr.in_addr), |
1227 | req_feat, server_feat, req_feat & ~server_feat); | 1311 | req_feat, server_feat, req_feat & ~server_feat); |
1228 | con->error_msg = "missing required protocol features"; | 1312 | con->error_msg = "missing required protocol features"; |
1229 | fail_protocol(con); | 1313 | fail_protocol(con); |
@@ -1305,8 +1389,7 @@ static int read_partial_message_section(struct ceph_connection *con, | |||
1305 | struct kvec *section, | 1389 | struct kvec *section, |
1306 | unsigned int sec_len, u32 *crc) | 1390 | unsigned int sec_len, u32 *crc) |
1307 | { | 1391 | { |
1308 | int left; | 1392 | int ret, left; |
1309 | int ret; | ||
1310 | 1393 | ||
1311 | BUG_ON(!section); | 1394 | BUG_ON(!section); |
1312 | 1395 | ||
@@ -1329,13 +1412,83 @@ static int read_partial_message_section(struct ceph_connection *con, | |||
1329 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | 1412 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, |
1330 | struct ceph_msg_header *hdr, | 1413 | struct ceph_msg_header *hdr, |
1331 | int *skip); | 1414 | int *skip); |
1415 | |||
1416 | |||
1417 | static int read_partial_message_pages(struct ceph_connection *con, | ||
1418 | struct page **pages, | ||
1419 | unsigned data_len, int datacrc) | ||
1420 | { | ||
1421 | void *p; | ||
1422 | int ret; | ||
1423 | int left; | ||
1424 | |||
1425 | left = min((int)(data_len - con->in_msg_pos.data_pos), | ||
1426 | (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); | ||
1427 | /* (page) data */ | ||
1428 | BUG_ON(pages == NULL); | ||
1429 | p = kmap(pages[con->in_msg_pos.page]); | ||
1430 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | ||
1431 | left); | ||
1432 | if (ret > 0 && datacrc) | ||
1433 | con->in_data_crc = | ||
1434 | crc32c(con->in_data_crc, | ||
1435 | p + con->in_msg_pos.page_pos, ret); | ||
1436 | kunmap(pages[con->in_msg_pos.page]); | ||
1437 | if (ret <= 0) | ||
1438 | return ret; | ||
1439 | con->in_msg_pos.data_pos += ret; | ||
1440 | con->in_msg_pos.page_pos += ret; | ||
1441 | if (con->in_msg_pos.page_pos == PAGE_SIZE) { | ||
1442 | con->in_msg_pos.page_pos = 0; | ||
1443 | con->in_msg_pos.page++; | ||
1444 | } | ||
1445 | |||
1446 | return ret; | ||
1447 | } | ||
1448 | |||
1449 | #ifdef CONFIG_BLOCK | ||
1450 | static int read_partial_message_bio(struct ceph_connection *con, | ||
1451 | struct bio **bio_iter, int *bio_seg, | ||
1452 | unsigned data_len, int datacrc) | ||
1453 | { | ||
1454 | struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); | ||
1455 | void *p; | ||
1456 | int ret, left; | ||
1457 | |||
1458 | if (IS_ERR(bv)) | ||
1459 | return PTR_ERR(bv); | ||
1460 | |||
1461 | left = min((int)(data_len - con->in_msg_pos.data_pos), | ||
1462 | (int)(bv->bv_len - con->in_msg_pos.page_pos)); | ||
1463 | |||
1464 | p = kmap(bv->bv_page) + bv->bv_offset; | ||
1465 | |||
1466 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | ||
1467 | left); | ||
1468 | if (ret > 0 && datacrc) | ||
1469 | con->in_data_crc = | ||
1470 | crc32c(con->in_data_crc, | ||
1471 | p + con->in_msg_pos.page_pos, ret); | ||
1472 | kunmap(bv->bv_page); | ||
1473 | if (ret <= 0) | ||
1474 | return ret; | ||
1475 | con->in_msg_pos.data_pos += ret; | ||
1476 | con->in_msg_pos.page_pos += ret; | ||
1477 | if (con->in_msg_pos.page_pos == bv->bv_len) { | ||
1478 | con->in_msg_pos.page_pos = 0; | ||
1479 | iter_bio_next(bio_iter, bio_seg); | ||
1480 | } | ||
1481 | |||
1482 | return ret; | ||
1483 | } | ||
1484 | #endif | ||
1485 | |||
1332 | /* | 1486 | /* |
1333 | * read (part of) a message. | 1487 | * read (part of) a message. |
1334 | */ | 1488 | */ |
1335 | static int read_partial_message(struct ceph_connection *con) | 1489 | static int read_partial_message(struct ceph_connection *con) |
1336 | { | 1490 | { |
1337 | struct ceph_msg *m = con->in_msg; | 1491 | struct ceph_msg *m = con->in_msg; |
1338 | void *p; | ||
1339 | int ret; | 1492 | int ret; |
1340 | int to, left; | 1493 | int to, left; |
1341 | unsigned front_len, middle_len, data_len, data_off; | 1494 | unsigned front_len, middle_len, data_len, data_off; |
@@ -1381,7 +1534,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
1381 | if ((s64)seq - (s64)con->in_seq < 1) { | 1534 | if ((s64)seq - (s64)con->in_seq < 1) { |
1382 | pr_info("skipping %s%lld %s seq %lld, expected %lld\n", | 1535 | pr_info("skipping %s%lld %s seq %lld, expected %lld\n", |
1383 | ENTITY_NAME(con->peer_name), | 1536 | ENTITY_NAME(con->peer_name), |
1384 | pr_addr(&con->peer_addr.in_addr), | 1537 | ceph_pr_addr(&con->peer_addr.in_addr), |
1385 | seq, con->in_seq + 1); | 1538 | seq, con->in_seq + 1); |
1386 | con->in_base_pos = -front_len - middle_len - data_len - | 1539 | con->in_base_pos = -front_len - middle_len - data_len - |
1387 | sizeof(m->footer); | 1540 | sizeof(m->footer); |
@@ -1422,7 +1575,10 @@ static int read_partial_message(struct ceph_connection *con) | |||
1422 | m->middle->vec.iov_len = 0; | 1575 | m->middle->vec.iov_len = 0; |
1423 | 1576 | ||
1424 | con->in_msg_pos.page = 0; | 1577 | con->in_msg_pos.page = 0; |
1425 | con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; | 1578 | if (m->pages) |
1579 | con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; | ||
1580 | else | ||
1581 | con->in_msg_pos.page_pos = 0; | ||
1426 | con->in_msg_pos.data_pos = 0; | 1582 | con->in_msg_pos.data_pos = 0; |
1427 | } | 1583 | } |
1428 | 1584 | ||
@@ -1440,27 +1596,29 @@ static int read_partial_message(struct ceph_connection *con) | |||
1440 | if (ret <= 0) | 1596 | if (ret <= 0) |
1441 | return ret; | 1597 | return ret; |
1442 | } | 1598 | } |
1599 | #ifdef CONFIG_BLOCK | ||
1600 | if (m->bio && !m->bio_iter) | ||
1601 | init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); | ||
1602 | #endif | ||
1443 | 1603 | ||
1444 | /* (page) data */ | 1604 | /* (page) data */ |
1445 | while (con->in_msg_pos.data_pos < data_len) { | 1605 | while (con->in_msg_pos.data_pos < data_len) { |
1446 | left = min((int)(data_len - con->in_msg_pos.data_pos), | 1606 | if (m->pages) { |
1447 | (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); | 1607 | ret = read_partial_message_pages(con, m->pages, |
1448 | BUG_ON(m->pages == NULL); | 1608 | data_len, datacrc); |
1449 | p = kmap(m->pages[con->in_msg_pos.page]); | 1609 | if (ret <= 0) |
1450 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | 1610 | return ret; |
1451 | left); | 1611 | #ifdef CONFIG_BLOCK |
1452 | if (ret > 0 && datacrc) | 1612 | } else if (m->bio) { |
1453 | con->in_data_crc = | 1613 | |
1454 | crc32c(con->in_data_crc, | 1614 | ret = read_partial_message_bio(con, |
1455 | p + con->in_msg_pos.page_pos, ret); | 1615 | &m->bio_iter, &m->bio_seg, |
1456 | kunmap(m->pages[con->in_msg_pos.page]); | 1616 | data_len, datacrc); |
1457 | if (ret <= 0) | 1617 | if (ret <= 0) |
1458 | return ret; | 1618 | return ret; |
1459 | con->in_msg_pos.data_pos += ret; | 1619 | #endif |
1460 | con->in_msg_pos.page_pos += ret; | 1620 | } else { |
1461 | if (con->in_msg_pos.page_pos == PAGE_SIZE) { | 1621 | BUG_ON(1); |
1462 | con->in_msg_pos.page_pos = 0; | ||
1463 | con->in_msg_pos.page++; | ||
1464 | } | 1622 | } |
1465 | } | 1623 | } |
1466 | 1624 | ||
@@ -1874,9 +2032,9 @@ out: | |||
1874 | static void ceph_fault(struct ceph_connection *con) | 2032 | static void ceph_fault(struct ceph_connection *con) |
1875 | { | 2033 | { |
1876 | pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | 2034 | pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), |
1877 | pr_addr(&con->peer_addr.in_addr), con->error_msg); | 2035 | ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); |
1878 | dout("fault %p state %lu to peer %s\n", | 2036 | dout("fault %p state %lu to peer %s\n", |
1879 | con, con->state, pr_addr(&con->peer_addr.in_addr)); | 2037 | con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); |
1880 | 2038 | ||
1881 | if (test_bit(LOSSYTX, &con->state)) { | 2039 | if (test_bit(LOSSYTX, &con->state)) { |
1882 | dout("fault on LOSSYTX channel\n"); | 2040 | dout("fault on LOSSYTX channel\n"); |
@@ -1936,7 +2094,9 @@ out: | |||
1936 | /* | 2094 | /* |
1937 | * create a new messenger instance | 2095 | * create a new messenger instance |
1938 | */ | 2096 | */ |
1939 | struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | 2097 | struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, |
2098 | u32 supported_features, | ||
2099 | u32 required_features) | ||
1940 | { | 2100 | { |
1941 | struct ceph_messenger *msgr; | 2101 | struct ceph_messenger *msgr; |
1942 | 2102 | ||
@@ -1944,6 +2104,9 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||
1944 | if (msgr == NULL) | 2104 | if (msgr == NULL) |
1945 | return ERR_PTR(-ENOMEM); | 2105 | return ERR_PTR(-ENOMEM); |
1946 | 2106 | ||
2107 | msgr->supported_features = supported_features; | ||
2108 | msgr->required_features = required_features; | ||
2109 | |||
1947 | spin_lock_init(&msgr->global_seq_lock); | 2110 | spin_lock_init(&msgr->global_seq_lock); |
1948 | 2111 | ||
1949 | /* the zero page is needed if a request is "canceled" while the message | 2112 | /* the zero page is needed if a request is "canceled" while the message |
@@ -1966,6 +2129,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||
1966 | dout("messenger_create %p\n", msgr); | 2129 | dout("messenger_create %p\n", msgr); |
1967 | return msgr; | 2130 | return msgr; |
1968 | } | 2131 | } |
2132 | EXPORT_SYMBOL(ceph_messenger_create); | ||
1969 | 2133 | ||
1970 | void ceph_messenger_destroy(struct ceph_messenger *msgr) | 2134 | void ceph_messenger_destroy(struct ceph_messenger *msgr) |
1971 | { | 2135 | { |
@@ -1975,6 +2139,7 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr) | |||
1975 | kfree(msgr); | 2139 | kfree(msgr); |
1976 | dout("destroyed messenger %p\n", msgr); | 2140 | dout("destroyed messenger %p\n", msgr); |
1977 | } | 2141 | } |
2142 | EXPORT_SYMBOL(ceph_messenger_destroy); | ||
1978 | 2143 | ||
1979 | /* | 2144 | /* |
1980 | * Queue up an outgoing message on the given connection. | 2145 | * Queue up an outgoing message on the given connection. |
@@ -2011,6 +2176,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
2011 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2176 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
2012 | queue_con(con); | 2177 | queue_con(con); |
2013 | } | 2178 | } |
2179 | EXPORT_SYMBOL(ceph_con_send); | ||
2014 | 2180 | ||
2015 | /* | 2181 | /* |
2016 | * Revoke a message that was previously queued for send | 2182 | * Revoke a message that was previously queued for send |
@@ -2076,6 +2242,7 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||
2076 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2242 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
2077 | queue_con(con); | 2243 | queue_con(con); |
2078 | } | 2244 | } |
2245 | EXPORT_SYMBOL(ceph_con_keepalive); | ||
2079 | 2246 | ||
2080 | 2247 | ||
2081 | /* | 2248 | /* |
@@ -2136,6 +2303,10 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) | |||
2136 | m->nr_pages = 0; | 2303 | m->nr_pages = 0; |
2137 | m->pages = NULL; | 2304 | m->pages = NULL; |
2138 | m->pagelist = NULL; | 2305 | m->pagelist = NULL; |
2306 | m->bio = NULL; | ||
2307 | m->bio_iter = NULL; | ||
2308 | m->bio_seg = 0; | ||
2309 | m->trail = NULL; | ||
2139 | 2310 | ||
2140 | dout("ceph_msg_new %p front %d\n", m, front_len); | 2311 | dout("ceph_msg_new %p front %d\n", m, front_len); |
2141 | return m; | 2312 | return m; |
@@ -2146,6 +2317,7 @@ out: | |||
2146 | pr_err("msg_new can't create type %d front %d\n", type, front_len); | 2317 | pr_err("msg_new can't create type %d front %d\n", type, front_len); |
2147 | return NULL; | 2318 | return NULL; |
2148 | } | 2319 | } |
2320 | EXPORT_SYMBOL(ceph_msg_new); | ||
2149 | 2321 | ||
2150 | /* | 2322 | /* |
2151 | * Allocate "middle" portion of a message, if it is needed and wasn't | 2323 | * Allocate "middle" portion of a message, if it is needed and wasn't |
@@ -2250,11 +2422,14 @@ void ceph_msg_last_put(struct kref *kref) | |||
2250 | m->pagelist = NULL; | 2422 | m->pagelist = NULL; |
2251 | } | 2423 | } |
2252 | 2424 | ||
2425 | m->trail = NULL; | ||
2426 | |||
2253 | if (m->pool) | 2427 | if (m->pool) |
2254 | ceph_msgpool_put(m->pool, m); | 2428 | ceph_msgpool_put(m->pool, m); |
2255 | else | 2429 | else |
2256 | ceph_msg_kfree(m); | 2430 | ceph_msg_kfree(m); |
2257 | } | 2431 | } |
2432 | EXPORT_SYMBOL(ceph_msg_last_put); | ||
2258 | 2433 | ||
2259 | void ceph_msg_dump(struct ceph_msg *msg) | 2434 | void ceph_msg_dump(struct ceph_msg *msg) |
2260 | { | 2435 | { |
@@ -2275,3 +2450,4 @@ void ceph_msg_dump(struct ceph_msg *msg) | |||
2275 | DUMP_PREFIX_OFFSET, 16, 1, | 2450 | DUMP_PREFIX_OFFSET, 16, 1, |
2276 | &msg->footer, sizeof(msg->footer), true); | 2451 | &msg->footer, sizeof(msg->footer), true); |
2277 | } | 2452 | } |
2453 | EXPORT_SYMBOL(ceph_msg_dump); | ||
diff --git a/fs/ceph/mon_client.c b/net/ceph/mon_client.c index b2a5a3e4a671..8a079399174a 100644 --- a/fs/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
@@ -1,14 +1,16 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/module.h> | ||
3 | #include <linux/types.h> | 4 | #include <linux/types.h> |
4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
5 | #include <linux/random.h> | 6 | #include <linux/random.h> |
6 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
7 | 8 | ||
8 | #include "mon_client.h" | 9 | #include <linux/ceph/mon_client.h> |
9 | #include "super.h" | 10 | #include <linux/ceph/libceph.h> |
10 | #include "auth.h" | 11 | #include <linux/ceph/decode.h> |
11 | #include "decode.h" | 12 | |
13 | #include <linux/ceph/auth.h> | ||
12 | 14 | ||
13 | /* | 15 | /* |
14 | * Interact with Ceph monitor cluster. Handle requests for new map | 16 | * Interact with Ceph monitor cluster. Handle requests for new map |
@@ -74,7 +76,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) | |||
74 | m->num_mon); | 76 | m->num_mon); |
75 | for (i = 0; i < m->num_mon; i++) | 77 | for (i = 0; i < m->num_mon; i++) |
76 | dout("monmap_decode mon%d is %s\n", i, | 78 | dout("monmap_decode mon%d is %s\n", i, |
77 | pr_addr(&m->mon_inst[i].addr.in_addr)); | 79 | ceph_pr_addr(&m->mon_inst[i].addr.in_addr)); |
78 | return m; | 80 | return m; |
79 | 81 | ||
80 | bad: | 82 | bad: |
@@ -191,30 +193,33 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
191 | struct ceph_msg *msg = monc->m_subscribe; | 193 | struct ceph_msg *msg = monc->m_subscribe; |
192 | struct ceph_mon_subscribe_item *i; | 194 | struct ceph_mon_subscribe_item *i; |
193 | void *p, *end; | 195 | void *p, *end; |
196 | int num; | ||
194 | 197 | ||
195 | p = msg->front.iov_base; | 198 | p = msg->front.iov_base; |
196 | end = p + msg->front_max; | 199 | end = p + msg->front_max; |
197 | 200 | ||
198 | dout("__send_subscribe to 'mdsmap' %u+\n", | 201 | num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap; |
199 | (unsigned)monc->have_mdsmap); | 202 | ceph_encode_32(&p, num); |
203 | |||
200 | if (monc->want_next_osdmap) { | 204 | if (monc->want_next_osdmap) { |
201 | dout("__send_subscribe to 'osdmap' %u\n", | 205 | dout("__send_subscribe to 'osdmap' %u\n", |
202 | (unsigned)monc->have_osdmap); | 206 | (unsigned)monc->have_osdmap); |
203 | ceph_encode_32(&p, 3); | ||
204 | ceph_encode_string(&p, end, "osdmap", 6); | 207 | ceph_encode_string(&p, end, "osdmap", 6); |
205 | i = p; | 208 | i = p; |
206 | i->have = cpu_to_le64(monc->have_osdmap); | 209 | i->have = cpu_to_le64(monc->have_osdmap); |
207 | i->onetime = 1; | 210 | i->onetime = 1; |
208 | p += sizeof(*i); | 211 | p += sizeof(*i); |
209 | monc->want_next_osdmap = 2; /* requested */ | 212 | monc->want_next_osdmap = 2; /* requested */ |
210 | } else { | ||
211 | ceph_encode_32(&p, 2); | ||
212 | } | 213 | } |
213 | ceph_encode_string(&p, end, "mdsmap", 6); | 214 | if (monc->want_mdsmap) { |
214 | i = p; | 215 | dout("__send_subscribe to 'mdsmap' %u+\n", |
215 | i->have = cpu_to_le64(monc->have_mdsmap); | 216 | (unsigned)monc->have_mdsmap); |
216 | i->onetime = 0; | 217 | ceph_encode_string(&p, end, "mdsmap", 6); |
217 | p += sizeof(*i); | 218 | i = p; |
219 | i->have = cpu_to_le64(monc->have_mdsmap); | ||
220 | i->onetime = 0; | ||
221 | p += sizeof(*i); | ||
222 | } | ||
218 | ceph_encode_string(&p, end, "monmap", 6); | 223 | ceph_encode_string(&p, end, "monmap", 6); |
219 | i = p; | 224 | i = p; |
220 | i->have = 0; | 225 | i->have = 0; |
@@ -243,7 +248,8 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc, | |||
243 | mutex_lock(&monc->mutex); | 248 | mutex_lock(&monc->mutex); |
244 | if (monc->hunting) { | 249 | if (monc->hunting) { |
245 | pr_info("mon%d %s session established\n", | 250 | pr_info("mon%d %s session established\n", |
246 | monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr)); | 251 | monc->cur_mon, |
252 | ceph_pr_addr(&monc->con->peer_addr.in_addr)); | ||
247 | monc->hunting = false; | 253 | monc->hunting = false; |
248 | } | 254 | } |
249 | dout("handle_subscribe_ack after %d seconds\n", seconds); | 255 | dout("handle_subscribe_ack after %d seconds\n", seconds); |
@@ -266,6 +272,7 @@ int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got) | |||
266 | mutex_unlock(&monc->mutex); | 272 | mutex_unlock(&monc->mutex); |
267 | return 0; | 273 | return 0; |
268 | } | 274 | } |
275 | EXPORT_SYMBOL(ceph_monc_got_mdsmap); | ||
269 | 276 | ||
270 | int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) | 277 | int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) |
271 | { | 278 | { |
@@ -310,6 +317,7 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) | |||
310 | mutex_unlock(&monc->mutex); | 317 | mutex_unlock(&monc->mutex); |
311 | return 0; | 318 | return 0; |
312 | } | 319 | } |
320 | EXPORT_SYMBOL(ceph_monc_open_session); | ||
313 | 321 | ||
314 | /* | 322 | /* |
315 | * The monitor responds with mount ack indicate mount success. The | 323 | * The monitor responds with mount ack indicate mount success. The |
@@ -540,6 +548,7 @@ out: | |||
540 | kref_put(&req->kref, release_generic_request); | 548 | kref_put(&req->kref, release_generic_request); |
541 | return err; | 549 | return err; |
542 | } | 550 | } |
551 | EXPORT_SYMBOL(ceph_monc_do_statfs); | ||
543 | 552 | ||
544 | /* | 553 | /* |
545 | * pool ops | 554 | * pool ops |
@@ -651,6 +660,7 @@ int ceph_monc_create_snapid(struct ceph_mon_client *monc, | |||
651 | pool, 0, (char *)snapid, sizeof(*snapid)); | 660 | pool, 0, (char *)snapid, sizeof(*snapid)); |
652 | 661 | ||
653 | } | 662 | } |
663 | EXPORT_SYMBOL(ceph_monc_create_snapid); | ||
654 | 664 | ||
655 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | 665 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, |
656 | u32 pool, u64 snapid) | 666 | u32 pool, u64 snapid) |
@@ -708,9 +718,9 @@ static void delayed_work(struct work_struct *work) | |||
708 | */ | 718 | */ |
709 | static int build_initial_monmap(struct ceph_mon_client *monc) | 719 | static int build_initial_monmap(struct ceph_mon_client *monc) |
710 | { | 720 | { |
711 | struct ceph_mount_args *args = monc->client->mount_args; | 721 | struct ceph_options *opt = monc->client->options; |
712 | struct ceph_entity_addr *mon_addr = args->mon_addr; | 722 | struct ceph_entity_addr *mon_addr = opt->mon_addr; |
713 | int num_mon = args->num_mon; | 723 | int num_mon = opt->num_mon; |
714 | int i; | 724 | int i; |
715 | 725 | ||
716 | /* build initial monmap */ | 726 | /* build initial monmap */ |
@@ -728,11 +738,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc) | |||
728 | } | 738 | } |
729 | monc->monmap->num_mon = num_mon; | 739 | monc->monmap->num_mon = num_mon; |
730 | monc->have_fsid = false; | 740 | monc->have_fsid = false; |
731 | |||
732 | /* release addr memory */ | ||
733 | kfree(args->mon_addr); | ||
734 | args->mon_addr = NULL; | ||
735 | args->num_mon = 0; | ||
736 | return 0; | 741 | return 0; |
737 | } | 742 | } |
738 | 743 | ||
@@ -753,8 +758,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
753 | monc->con = NULL; | 758 | monc->con = NULL; |
754 | 759 | ||
755 | /* authentication */ | 760 | /* authentication */ |
756 | monc->auth = ceph_auth_init(cl->mount_args->name, | 761 | monc->auth = ceph_auth_init(cl->options->name, |
757 | cl->mount_args->secret); | 762 | cl->options->secret); |
758 | if (IS_ERR(monc->auth)) | 763 | if (IS_ERR(monc->auth)) |
759 | return PTR_ERR(monc->auth); | 764 | return PTR_ERR(monc->auth); |
760 | monc->auth->want_keys = | 765 | monc->auth->want_keys = |
@@ -808,6 +813,7 @@ out_monmap: | |||
808 | out: | 813 | out: |
809 | return err; | 814 | return err; |
810 | } | 815 | } |
816 | EXPORT_SYMBOL(ceph_monc_init); | ||
811 | 817 | ||
812 | void ceph_monc_stop(struct ceph_mon_client *monc) | 818 | void ceph_monc_stop(struct ceph_mon_client *monc) |
813 | { | 819 | { |
@@ -832,6 +838,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc) | |||
832 | 838 | ||
833 | kfree(monc->monmap); | 839 | kfree(monc->monmap); |
834 | } | 840 | } |
841 | EXPORT_SYMBOL(ceph_monc_stop); | ||
835 | 842 | ||
836 | static void handle_auth_reply(struct ceph_mon_client *monc, | 843 | static void handle_auth_reply(struct ceph_mon_client *monc, |
837 | struct ceph_msg *msg) | 844 | struct ceph_msg *msg) |
@@ -889,6 +896,7 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc) | |||
889 | mutex_unlock(&monc->mutex); | 896 | mutex_unlock(&monc->mutex); |
890 | return ret; | 897 | return ret; |
891 | } | 898 | } |
899 | EXPORT_SYMBOL(ceph_monc_validate_auth); | ||
892 | 900 | ||
893 | /* | 901 | /* |
894 | * handle incoming message | 902 | * handle incoming message |
@@ -922,15 +930,16 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
922 | ceph_monc_handle_map(monc, msg); | 930 | ceph_monc_handle_map(monc, msg); |
923 | break; | 931 | break; |
924 | 932 | ||
925 | case CEPH_MSG_MDS_MAP: | ||
926 | ceph_mdsc_handle_map(&monc->client->mdsc, msg); | ||
927 | break; | ||
928 | |||
929 | case CEPH_MSG_OSD_MAP: | 933 | case CEPH_MSG_OSD_MAP: |
930 | ceph_osdc_handle_map(&monc->client->osdc, msg); | 934 | ceph_osdc_handle_map(&monc->client->osdc, msg); |
931 | break; | 935 | break; |
932 | 936 | ||
933 | default: | 937 | default: |
938 | /* can the chained handler handle it? */ | ||
939 | if (monc->client->extra_mon_dispatch && | ||
940 | monc->client->extra_mon_dispatch(monc->client, msg) == 0) | ||
941 | break; | ||
942 | |||
934 | pr_err("received unknown message type %d %s\n", type, | 943 | pr_err("received unknown message type %d %s\n", type, |
935 | ceph_msg_type_name(type)); | 944 | ceph_msg_type_name(type)); |
936 | } | 945 | } |
@@ -994,7 +1003,7 @@ static void mon_fault(struct ceph_connection *con) | |||
994 | if (monc->con && !monc->hunting) | 1003 | if (monc->con && !monc->hunting) |
995 | pr_info("mon%d %s session lost, " | 1004 | pr_info("mon%d %s session lost, " |
996 | "hunting for new mon\n", monc->cur_mon, | 1005 | "hunting for new mon\n", monc->cur_mon, |
997 | pr_addr(&monc->con->peer_addr.in_addr)); | 1006 | ceph_pr_addr(&monc->con->peer_addr.in_addr)); |
998 | 1007 | ||
999 | __close_session(monc); | 1008 | __close_session(monc); |
1000 | if (!monc->hunting) { | 1009 | if (!monc->hunting) { |
diff --git a/fs/ceph/msgpool.c b/net/ceph/msgpool.c index dd65a6438131..d5f2d97ac05c 100644 --- a/fs/ceph/msgpool.c +++ b/net/ceph/msgpool.c | |||
@@ -1,11 +1,11 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/err.h> | 3 | #include <linux/err.h> |
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | #include <linux/vmalloc.h> | 6 | #include <linux/vmalloc.h> |
7 | 7 | ||
8 | #include "msgpool.h" | 8 | #include <linux/ceph/msgpool.h> |
9 | 9 | ||
10 | static void *alloc_fn(gfp_t gfp_mask, void *arg) | 10 | static void *alloc_fn(gfp_t gfp_mask, void *arg) |
11 | { | 11 | { |
diff --git a/fs/ceph/osd_client.c b/net/ceph/osd_client.c index 3b5571b8ce22..79391994b3ed 100644 --- a/fs/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -1,17 +1,22 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/module.h> | ||
3 | #include <linux/err.h> | 4 | #include <linux/err.h> |
4 | #include <linux/highmem.h> | 5 | #include <linux/highmem.h> |
5 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
6 | #include <linux/pagemap.h> | 7 | #include <linux/pagemap.h> |
7 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
8 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #ifdef CONFIG_BLOCK | ||
11 | #include <linux/bio.h> | ||
12 | #endif | ||
9 | 13 | ||
10 | #include "super.h" | 14 | #include <linux/ceph/libceph.h> |
11 | #include "osd_client.h" | 15 | #include <linux/ceph/osd_client.h> |
12 | #include "messenger.h" | 16 | #include <linux/ceph/messenger.h> |
13 | #include "decode.h" | 17 | #include <linux/ceph/decode.h> |
14 | #include "auth.h" | 18 | #include <linux/ceph/auth.h> |
19 | #include <linux/ceph/pagelist.h> | ||
15 | 20 | ||
16 | #define OSD_OP_FRONT_LEN 4096 | 21 | #define OSD_OP_FRONT_LEN 4096 |
17 | #define OSD_OPREPLY_FRONT_LEN 512 | 22 | #define OSD_OPREPLY_FRONT_LEN 512 |
@@ -22,6 +27,59 @@ static int __kick_requests(struct ceph_osd_client *osdc, | |||
22 | 27 | ||
23 | static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); | 28 | static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); |
24 | 29 | ||
30 | static int op_needs_trail(int op) | ||
31 | { | ||
32 | switch (op) { | ||
33 | case CEPH_OSD_OP_GETXATTR: | ||
34 | case CEPH_OSD_OP_SETXATTR: | ||
35 | case CEPH_OSD_OP_CMPXATTR: | ||
36 | case CEPH_OSD_OP_CALL: | ||
37 | return 1; | ||
38 | default: | ||
39 | return 0; | ||
40 | } | ||
41 | } | ||
42 | |||
43 | static int op_has_extent(int op) | ||
44 | { | ||
45 | return (op == CEPH_OSD_OP_READ || | ||
46 | op == CEPH_OSD_OP_WRITE); | ||
47 | } | ||
48 | |||
49 | void ceph_calc_raw_layout(struct ceph_osd_client *osdc, | ||
50 | struct ceph_file_layout *layout, | ||
51 | u64 snapid, | ||
52 | u64 off, u64 *plen, u64 *bno, | ||
53 | struct ceph_osd_request *req, | ||
54 | struct ceph_osd_req_op *op) | ||
55 | { | ||
56 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | ||
57 | u64 orig_len = *plen; | ||
58 | u64 objoff, objlen; /* extent in object */ | ||
59 | |||
60 | reqhead->snapid = cpu_to_le64(snapid); | ||
61 | |||
62 | /* object extent? */ | ||
63 | ceph_calc_file_object_mapping(layout, off, plen, bno, | ||
64 | &objoff, &objlen); | ||
65 | if (*plen < orig_len) | ||
66 | dout(" skipping last %llu, final file extent %llu~%llu\n", | ||
67 | orig_len - *plen, off, *plen); | ||
68 | |||
69 | if (op_has_extent(op->op)) { | ||
70 | op->extent.offset = objoff; | ||
71 | op->extent.length = objlen; | ||
72 | } | ||
73 | req->r_num_pages = calc_pages_for(off, *plen); | ||
74 | if (op->op == CEPH_OSD_OP_WRITE) | ||
75 | op->payload_len = *plen; | ||
76 | |||
77 | dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", | ||
78 | *bno, objoff, objlen, req->r_num_pages); | ||
79 | |||
80 | } | ||
81 | EXPORT_SYMBOL(ceph_calc_raw_layout); | ||
82 | |||
25 | /* | 83 | /* |
26 | * Implement client access to distributed object storage cluster. | 84 | * Implement client access to distributed object storage cluster. |
27 | * | 85 | * |
@@ -48,34 +106,19 @@ static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); | |||
48 | * fill osd op in request message. | 106 | * fill osd op in request message. |
49 | */ | 107 | */ |
50 | static void calc_layout(struct ceph_osd_client *osdc, | 108 | static void calc_layout(struct ceph_osd_client *osdc, |
51 | struct ceph_vino vino, struct ceph_file_layout *layout, | 109 | struct ceph_vino vino, |
110 | struct ceph_file_layout *layout, | ||
52 | u64 off, u64 *plen, | 111 | u64 off, u64 *plen, |
53 | struct ceph_osd_request *req) | 112 | struct ceph_osd_request *req, |
113 | struct ceph_osd_req_op *op) | ||
54 | { | 114 | { |
55 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | ||
56 | struct ceph_osd_op *op = (void *)(reqhead + 1); | ||
57 | u64 orig_len = *plen; | ||
58 | u64 objoff, objlen; /* extent in object */ | ||
59 | u64 bno; | 115 | u64 bno; |
60 | 116 | ||
61 | reqhead->snapid = cpu_to_le64(vino.snap); | 117 | ceph_calc_raw_layout(osdc, layout, vino.snap, off, |
62 | 118 | plen, &bno, req, op); | |
63 | /* object extent? */ | ||
64 | ceph_calc_file_object_mapping(layout, off, plen, &bno, | ||
65 | &objoff, &objlen); | ||
66 | if (*plen < orig_len) | ||
67 | dout(" skipping last %llu, final file extent %llu~%llu\n", | ||
68 | orig_len - *plen, off, *plen); | ||
69 | 119 | ||
70 | sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); | 120 | sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); |
71 | req->r_oid_len = strlen(req->r_oid); | 121 | req->r_oid_len = strlen(req->r_oid); |
72 | |||
73 | op->extent.offset = cpu_to_le64(objoff); | ||
74 | op->extent.length = cpu_to_le64(objlen); | ||
75 | req->r_num_pages = calc_pages_for(off, *plen); | ||
76 | |||
77 | dout("calc_layout %s (%d) %llu~%llu (%d pages)\n", | ||
78 | req->r_oid, req->r_oid_len, objoff, objlen, req->r_num_pages); | ||
79 | } | 122 | } |
80 | 123 | ||
81 | /* | 124 | /* |
@@ -101,56 +144,66 @@ void ceph_osdc_release_request(struct kref *kref) | |||
101 | if (req->r_own_pages) | 144 | if (req->r_own_pages) |
102 | ceph_release_page_vector(req->r_pages, | 145 | ceph_release_page_vector(req->r_pages, |
103 | req->r_num_pages); | 146 | req->r_num_pages); |
147 | #ifdef CONFIG_BLOCK | ||
148 | if (req->r_bio) | ||
149 | bio_put(req->r_bio); | ||
150 | #endif | ||
104 | ceph_put_snap_context(req->r_snapc); | 151 | ceph_put_snap_context(req->r_snapc); |
152 | if (req->r_trail) { | ||
153 | ceph_pagelist_release(req->r_trail); | ||
154 | kfree(req->r_trail); | ||
155 | } | ||
105 | if (req->r_mempool) | 156 | if (req->r_mempool) |
106 | mempool_free(req, req->r_osdc->req_mempool); | 157 | mempool_free(req, req->r_osdc->req_mempool); |
107 | else | 158 | else |
108 | kfree(req); | 159 | kfree(req); |
109 | } | 160 | } |
161 | EXPORT_SYMBOL(ceph_osdc_release_request); | ||
110 | 162 | ||
111 | /* | 163 | static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail) |
112 | * build new request AND message, calculate layout, and adjust file | 164 | { |
113 | * extent as needed. | 165 | int i = 0; |
114 | * | 166 | |
115 | * if the file was recently truncated, we include information about its | 167 | if (needs_trail) |
116 | * old and new size so that the object can be updated appropriately. (we | 168 | *needs_trail = 0; |
117 | * avoid synchronously deleting truncated objects because it's slow.) | 169 | while (ops[i].op) { |
118 | * | 170 | if (needs_trail && op_needs_trail(ops[i].op)) |
119 | * if @do_sync, include a 'startsync' command so that the osd will flush | 171 | *needs_trail = 1; |
120 | * data quickly. | 172 | i++; |
121 | */ | 173 | } |
122 | struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | 174 | |
123 | struct ceph_file_layout *layout, | 175 | return i; |
124 | struct ceph_vino vino, | 176 | } |
125 | u64 off, u64 *plen, | 177 | |
126 | int opcode, int flags, | 178 | struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, |
179 | int flags, | ||
127 | struct ceph_snap_context *snapc, | 180 | struct ceph_snap_context *snapc, |
128 | int do_sync, | 181 | struct ceph_osd_req_op *ops, |
129 | u32 truncate_seq, | 182 | bool use_mempool, |
130 | u64 truncate_size, | 183 | gfp_t gfp_flags, |
131 | struct timespec *mtime, | 184 | struct page **pages, |
132 | bool use_mempool, int num_reply) | 185 | struct bio *bio) |
133 | { | 186 | { |
134 | struct ceph_osd_request *req; | 187 | struct ceph_osd_request *req; |
135 | struct ceph_msg *msg; | 188 | struct ceph_msg *msg; |
136 | struct ceph_osd_request_head *head; | 189 | int needs_trail; |
137 | struct ceph_osd_op *op; | 190 | int num_op = get_num_ops(ops, &needs_trail); |
138 | void *p; | 191 | size_t msg_size = sizeof(struct ceph_osd_request_head); |
139 | int num_op = 1 + do_sync; | 192 | |
140 | size_t msg_size = sizeof(*head) + num_op*sizeof(*op); | 193 | msg_size += num_op*sizeof(struct ceph_osd_op); |
141 | int i; | ||
142 | 194 | ||
143 | if (use_mempool) { | 195 | if (use_mempool) { |
144 | req = mempool_alloc(osdc->req_mempool, GFP_NOFS); | 196 | req = mempool_alloc(osdc->req_mempool, gfp_flags); |
145 | memset(req, 0, sizeof(*req)); | 197 | memset(req, 0, sizeof(*req)); |
146 | } else { | 198 | } else { |
147 | req = kzalloc(sizeof(*req), GFP_NOFS); | 199 | req = kzalloc(sizeof(*req), gfp_flags); |
148 | } | 200 | } |
149 | if (req == NULL) | 201 | if (req == NULL) |
150 | return NULL; | 202 | return NULL; |
151 | 203 | ||
152 | req->r_osdc = osdc; | 204 | req->r_osdc = osdc; |
153 | req->r_mempool = use_mempool; | 205 | req->r_mempool = use_mempool; |
206 | |||
154 | kref_init(&req->r_kref); | 207 | kref_init(&req->r_kref); |
155 | init_completion(&req->r_completion); | 208 | init_completion(&req->r_completion); |
156 | init_completion(&req->r_safe_completion); | 209 | init_completion(&req->r_safe_completion); |
@@ -164,13 +217,22 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); | 217 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); |
165 | else | 218 | else |
166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, | 219 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, |
167 | OSD_OPREPLY_FRONT_LEN, GFP_NOFS); | 220 | OSD_OPREPLY_FRONT_LEN, gfp_flags); |
168 | if (!msg) { | 221 | if (!msg) { |
169 | ceph_osdc_put_request(req); | 222 | ceph_osdc_put_request(req); |
170 | return NULL; | 223 | return NULL; |
171 | } | 224 | } |
172 | req->r_reply = msg; | 225 | req->r_reply = msg; |
173 | 226 | ||
227 | /* allocate space for the trailing data */ | ||
228 | if (needs_trail) { | ||
229 | req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); | ||
230 | if (!req->r_trail) { | ||
231 | ceph_osdc_put_request(req); | ||
232 | return NULL; | ||
233 | } | ||
234 | ceph_pagelist_init(req->r_trail); | ||
235 | } | ||
174 | /* create request message; allow space for oid */ | 236 | /* create request message; allow space for oid */ |
175 | msg_size += 40; | 237 | msg_size += 40; |
176 | if (snapc) | 238 | if (snapc) |
@@ -178,18 +240,115 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
178 | if (use_mempool) | 240 | if (use_mempool) |
179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); | 241 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); |
180 | else | 242 | else |
181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); | 243 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags); |
182 | if (!msg) { | 244 | if (!msg) { |
183 | ceph_osdc_put_request(req); | 245 | ceph_osdc_put_request(req); |
184 | return NULL; | 246 | return NULL; |
185 | } | 247 | } |
248 | |||
186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); | 249 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); |
187 | memset(msg->front.iov_base, 0, msg->front.iov_len); | 250 | memset(msg->front.iov_base, 0, msg->front.iov_len); |
251 | |||
252 | req->r_request = msg; | ||
253 | req->r_pages = pages; | ||
254 | #ifdef CONFIG_BLOCK | ||
255 | if (bio) { | ||
256 | req->r_bio = bio; | ||
257 | bio_get(req->r_bio); | ||
258 | } | ||
259 | #endif | ||
260 | |||
261 | return req; | ||
262 | } | ||
263 | EXPORT_SYMBOL(ceph_osdc_alloc_request); | ||
264 | |||
265 | static void osd_req_encode_op(struct ceph_osd_request *req, | ||
266 | struct ceph_osd_op *dst, | ||
267 | struct ceph_osd_req_op *src) | ||
268 | { | ||
269 | dst->op = cpu_to_le16(src->op); | ||
270 | |||
271 | switch (dst->op) { | ||
272 | case CEPH_OSD_OP_READ: | ||
273 | case CEPH_OSD_OP_WRITE: | ||
274 | dst->extent.offset = | ||
275 | cpu_to_le64(src->extent.offset); | ||
276 | dst->extent.length = | ||
277 | cpu_to_le64(src->extent.length); | ||
278 | dst->extent.truncate_size = | ||
279 | cpu_to_le64(src->extent.truncate_size); | ||
280 | dst->extent.truncate_seq = | ||
281 | cpu_to_le32(src->extent.truncate_seq); | ||
282 | break; | ||
283 | |||
284 | case CEPH_OSD_OP_GETXATTR: | ||
285 | case CEPH_OSD_OP_SETXATTR: | ||
286 | case CEPH_OSD_OP_CMPXATTR: | ||
287 | BUG_ON(!req->r_trail); | ||
288 | |||
289 | dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); | ||
290 | dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); | ||
291 | dst->xattr.cmp_op = src->xattr.cmp_op; | ||
292 | dst->xattr.cmp_mode = src->xattr.cmp_mode; | ||
293 | ceph_pagelist_append(req->r_trail, src->xattr.name, | ||
294 | src->xattr.name_len); | ||
295 | ceph_pagelist_append(req->r_trail, src->xattr.val, | ||
296 | src->xattr.value_len); | ||
297 | break; | ||
298 | case CEPH_OSD_OP_CALL: | ||
299 | BUG_ON(!req->r_trail); | ||
300 | |||
301 | dst->cls.class_len = src->cls.class_len; | ||
302 | dst->cls.method_len = src->cls.method_len; | ||
303 | dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); | ||
304 | |||
305 | ceph_pagelist_append(req->r_trail, src->cls.class_name, | ||
306 | src->cls.class_len); | ||
307 | ceph_pagelist_append(req->r_trail, src->cls.method_name, | ||
308 | src->cls.method_len); | ||
309 | ceph_pagelist_append(req->r_trail, src->cls.indata, | ||
310 | src->cls.indata_len); | ||
311 | break; | ||
312 | case CEPH_OSD_OP_ROLLBACK: | ||
313 | dst->snap.snapid = cpu_to_le64(src->snap.snapid); | ||
314 | break; | ||
315 | case CEPH_OSD_OP_STARTSYNC: | ||
316 | break; | ||
317 | default: | ||
318 | pr_err("unrecognized osd opcode %d\n", dst->op); | ||
319 | WARN_ON(1); | ||
320 | break; | ||
321 | } | ||
322 | dst->payload_len = cpu_to_le32(src->payload_len); | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * build new request AND message | ||
327 | * | ||
328 | */ | ||
329 | void ceph_osdc_build_request(struct ceph_osd_request *req, | ||
330 | u64 off, u64 *plen, | ||
331 | struct ceph_osd_req_op *src_ops, | ||
332 | struct ceph_snap_context *snapc, | ||
333 | struct timespec *mtime, | ||
334 | const char *oid, | ||
335 | int oid_len) | ||
336 | { | ||
337 | struct ceph_msg *msg = req->r_request; | ||
338 | struct ceph_osd_request_head *head; | ||
339 | struct ceph_osd_req_op *src_op; | ||
340 | struct ceph_osd_op *op; | ||
341 | void *p; | ||
342 | int num_op = get_num_ops(src_ops, NULL); | ||
343 | size_t msg_size = sizeof(*head) + num_op*sizeof(*op); | ||
344 | int flags = req->r_flags; | ||
345 | u64 data_len = 0; | ||
346 | int i; | ||
347 | |||
188 | head = msg->front.iov_base; | 348 | head = msg->front.iov_base; |
189 | op = (void *)(head + 1); | 349 | op = (void *)(head + 1); |
190 | p = (void *)(op + num_op); | 350 | p = (void *)(op + num_op); |
191 | 351 | ||
192 | req->r_request = msg; | ||
193 | req->r_snapc = ceph_get_snap_context(snapc); | 352 | req->r_snapc = ceph_get_snap_context(snapc); |
194 | 353 | ||
195 | head->client_inc = cpu_to_le32(1); /* always, for now. */ | 354 | head->client_inc = cpu_to_le32(1); /* always, for now. */ |
@@ -197,29 +356,23 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
197 | if (flags & CEPH_OSD_FLAG_WRITE) | 356 | if (flags & CEPH_OSD_FLAG_WRITE) |
198 | ceph_encode_timespec(&head->mtime, mtime); | 357 | ceph_encode_timespec(&head->mtime, mtime); |
199 | head->num_ops = cpu_to_le16(num_op); | 358 | head->num_ops = cpu_to_le16(num_op); |
200 | op->op = cpu_to_le16(opcode); | ||
201 | 359 | ||
202 | /* calculate max write size */ | ||
203 | calc_layout(osdc, vino, layout, off, plen, req); | ||
204 | req->r_file_layout = *layout; /* keep a copy */ | ||
205 | |||
206 | if (flags & CEPH_OSD_FLAG_WRITE) { | ||
207 | req->r_request->hdr.data_off = cpu_to_le16(off); | ||
208 | req->r_request->hdr.data_len = cpu_to_le32(*plen); | ||
209 | op->payload_len = cpu_to_le32(*plen); | ||
210 | } | ||
211 | op->extent.truncate_size = cpu_to_le64(truncate_size); | ||
212 | op->extent.truncate_seq = cpu_to_le32(truncate_seq); | ||
213 | 360 | ||
214 | /* fill in oid */ | 361 | /* fill in oid */ |
215 | head->object_len = cpu_to_le32(req->r_oid_len); | 362 | head->object_len = cpu_to_le32(oid_len); |
216 | memcpy(p, req->r_oid, req->r_oid_len); | 363 | memcpy(p, oid, oid_len); |
217 | p += req->r_oid_len; | 364 | p += oid_len; |
218 | 365 | ||
219 | if (do_sync) { | 366 | src_op = src_ops; |
367 | while (src_op->op) { | ||
368 | osd_req_encode_op(req, op, src_op); | ||
369 | src_op++; | ||
220 | op++; | 370 | op++; |
221 | op->op = cpu_to_le16(CEPH_OSD_OP_STARTSYNC); | ||
222 | } | 371 | } |
372 | |||
373 | if (req->r_trail) | ||
374 | data_len += req->r_trail->length; | ||
375 | |||
223 | if (snapc) { | 376 | if (snapc) { |
224 | head->snap_seq = cpu_to_le64(snapc->seq); | 377 | head->snap_seq = cpu_to_le64(snapc->seq); |
225 | head->num_snaps = cpu_to_le32(snapc->num_snaps); | 378 | head->num_snaps = cpu_to_le32(snapc->num_snaps); |
@@ -229,12 +382,79 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
229 | } | 382 | } |
230 | } | 383 | } |
231 | 384 | ||
385 | if (flags & CEPH_OSD_FLAG_WRITE) { | ||
386 | req->r_request->hdr.data_off = cpu_to_le16(off); | ||
387 | req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len); | ||
388 | } else if (data_len) { | ||
389 | req->r_request->hdr.data_off = 0; | ||
390 | req->r_request->hdr.data_len = cpu_to_le32(data_len); | ||
391 | } | ||
392 | |||
232 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); | 393 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); |
233 | msg_size = p - msg->front.iov_base; | 394 | msg_size = p - msg->front.iov_base; |
234 | msg->front.iov_len = msg_size; | 395 | msg->front.iov_len = msg_size; |
235 | msg->hdr.front_len = cpu_to_le32(msg_size); | 396 | msg->hdr.front_len = cpu_to_le32(msg_size); |
397 | return; | ||
398 | } | ||
399 | EXPORT_SYMBOL(ceph_osdc_build_request); | ||
400 | |||
401 | /* | ||
402 | * build new request AND message, calculate layout, and adjust file | ||
403 | * extent as needed. | ||
404 | * | ||
405 | * if the file was recently truncated, we include information about its | ||
406 | * old and new size so that the object can be updated appropriately. (we | ||
407 | * avoid synchronously deleting truncated objects because it's slow.) | ||
408 | * | ||
409 | * if @do_sync, include a 'startsync' command so that the osd will flush | ||
410 | * data quickly. | ||
411 | */ | ||
412 | struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | ||
413 | struct ceph_file_layout *layout, | ||
414 | struct ceph_vino vino, | ||
415 | u64 off, u64 *plen, | ||
416 | int opcode, int flags, | ||
417 | struct ceph_snap_context *snapc, | ||
418 | int do_sync, | ||
419 | u32 truncate_seq, | ||
420 | u64 truncate_size, | ||
421 | struct timespec *mtime, | ||
422 | bool use_mempool, int num_reply) | ||
423 | { | ||
424 | struct ceph_osd_req_op ops[3]; | ||
425 | struct ceph_osd_request *req; | ||
426 | |||
427 | ops[0].op = opcode; | ||
428 | ops[0].extent.truncate_seq = truncate_seq; | ||
429 | ops[0].extent.truncate_size = truncate_size; | ||
430 | ops[0].payload_len = 0; | ||
431 | |||
432 | if (do_sync) { | ||
433 | ops[1].op = CEPH_OSD_OP_STARTSYNC; | ||
434 | ops[1].payload_len = 0; | ||
435 | ops[2].op = 0; | ||
436 | } else | ||
437 | ops[1].op = 0; | ||
438 | |||
439 | req = ceph_osdc_alloc_request(osdc, flags, | ||
440 | snapc, ops, | ||
441 | use_mempool, | ||
442 | GFP_NOFS, NULL, NULL); | ||
443 | if (IS_ERR(req)) | ||
444 | return req; | ||
445 | |||
446 | /* calculate max write size */ | ||
447 | calc_layout(osdc, vino, layout, off, plen, req, ops); | ||
448 | req->r_file_layout = *layout; /* keep a copy */ | ||
449 | |||
450 | ceph_osdc_build_request(req, off, plen, ops, | ||
451 | snapc, | ||
452 | mtime, | ||
453 | req->r_oid, req->r_oid_len); | ||
454 | |||
236 | return req; | 455 | return req; |
237 | } | 456 | } |
457 | EXPORT_SYMBOL(ceph_osdc_new_request); | ||
238 | 458 | ||
239 | /* | 459 | /* |
240 | * We keep osd requests in an rbtree, sorted by ->r_tid. | 460 | * We keep osd requests in an rbtree, sorted by ->r_tid. |
@@ -389,7 +609,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc, | |||
389 | dout("__move_osd_to_lru %p\n", osd); | 609 | dout("__move_osd_to_lru %p\n", osd); |
390 | BUG_ON(!list_empty(&osd->o_osd_lru)); | 610 | BUG_ON(!list_empty(&osd->o_osd_lru)); |
391 | list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); | 611 | list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); |
392 | osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ; | 612 | osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; |
393 | } | 613 | } |
394 | 614 | ||
395 | static void __remove_osd_from_lru(struct ceph_osd *osd) | 615 | static void __remove_osd_from_lru(struct ceph_osd *osd) |
@@ -483,7 +703,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) | |||
483 | static void __schedule_osd_timeout(struct ceph_osd_client *osdc) | 703 | static void __schedule_osd_timeout(struct ceph_osd_client *osdc) |
484 | { | 704 | { |
485 | schedule_delayed_work(&osdc->timeout_work, | 705 | schedule_delayed_work(&osdc->timeout_work, |
486 | osdc->client->mount_args->osd_keepalive_timeout * HZ); | 706 | osdc->client->options->osd_keepalive_timeout * HZ); |
487 | } | 707 | } |
488 | 708 | ||
489 | static void __cancel_osd_timeout(struct ceph_osd_client *osdc) | 709 | static void __cancel_osd_timeout(struct ceph_osd_client *osdc) |
@@ -684,9 +904,9 @@ static void handle_timeout(struct work_struct *work) | |||
684 | container_of(work, struct ceph_osd_client, timeout_work.work); | 904 | container_of(work, struct ceph_osd_client, timeout_work.work); |
685 | struct ceph_osd_request *req, *last_req = NULL; | 905 | struct ceph_osd_request *req, *last_req = NULL; |
686 | struct ceph_osd *osd; | 906 | struct ceph_osd *osd; |
687 | unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; | 907 | unsigned long timeout = osdc->client->options->osd_timeout * HZ; |
688 | unsigned long keepalive = | 908 | unsigned long keepalive = |
689 | osdc->client->mount_args->osd_keepalive_timeout * HZ; | 909 | osdc->client->options->osd_keepalive_timeout * HZ; |
690 | unsigned long last_stamp = 0; | 910 | unsigned long last_stamp = 0; |
691 | struct rb_node *p; | 911 | struct rb_node *p; |
692 | struct list_head slow_osds; | 912 | struct list_head slow_osds; |
@@ -773,7 +993,7 @@ static void handle_osds_timeout(struct work_struct *work) | |||
773 | container_of(work, struct ceph_osd_client, | 993 | container_of(work, struct ceph_osd_client, |
774 | osds_timeout_work.work); | 994 | osds_timeout_work.work); |
775 | unsigned long delay = | 995 | unsigned long delay = |
776 | osdc->client->mount_args->osd_idle_ttl * HZ >> 2; | 996 | osdc->client->options->osd_idle_ttl * HZ >> 2; |
777 | 997 | ||
778 | dout("osds timeout\n"); | 998 | dout("osds timeout\n"); |
779 | down_read(&osdc->map_sem); | 999 | down_read(&osdc->map_sem); |
@@ -1104,6 +1324,10 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, | |||
1104 | 1324 | ||
1105 | req->r_request->pages = req->r_pages; | 1325 | req->r_request->pages = req->r_pages; |
1106 | req->r_request->nr_pages = req->r_num_pages; | 1326 | req->r_request->nr_pages = req->r_num_pages; |
1327 | #ifdef CONFIG_BLOCK | ||
1328 | req->r_request->bio = req->r_bio; | ||
1329 | #endif | ||
1330 | req->r_request->trail = req->r_trail; | ||
1107 | 1331 | ||
1108 | register_request(osdc, req); | 1332 | register_request(osdc, req); |
1109 | 1333 | ||
@@ -1131,6 +1355,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, | |||
1131 | up_read(&osdc->map_sem); | 1355 | up_read(&osdc->map_sem); |
1132 | return rc; | 1356 | return rc; |
1133 | } | 1357 | } |
1358 | EXPORT_SYMBOL(ceph_osdc_start_request); | ||
1134 | 1359 | ||
1135 | /* | 1360 | /* |
1136 | * wait for a request to complete | 1361 | * wait for a request to complete |
@@ -1153,6 +1378,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, | |||
1153 | dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); | 1378 | dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); |
1154 | return req->r_result; | 1379 | return req->r_result; |
1155 | } | 1380 | } |
1381 | EXPORT_SYMBOL(ceph_osdc_wait_request); | ||
1156 | 1382 | ||
1157 | /* | 1383 | /* |
1158 | * sync - wait for all in-flight requests to flush. avoid starvation. | 1384 | * sync - wait for all in-flight requests to flush. avoid starvation. |
@@ -1186,6 +1412,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) | |||
1186 | mutex_unlock(&osdc->request_mutex); | 1412 | mutex_unlock(&osdc->request_mutex); |
1187 | dout("sync done (thru tid %llu)\n", last_tid); | 1413 | dout("sync done (thru tid %llu)\n", last_tid); |
1188 | } | 1414 | } |
1415 | EXPORT_SYMBOL(ceph_osdc_sync); | ||
1189 | 1416 | ||
1190 | /* | 1417 | /* |
1191 | * init, shutdown | 1418 | * init, shutdown |
@@ -1211,7 +1438,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
1211 | INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); | 1438 | INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); |
1212 | 1439 | ||
1213 | schedule_delayed_work(&osdc->osds_timeout_work, | 1440 | schedule_delayed_work(&osdc->osds_timeout_work, |
1214 | round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ)); | 1441 | round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); |
1215 | 1442 | ||
1216 | err = -ENOMEM; | 1443 | err = -ENOMEM; |
1217 | osdc->req_mempool = mempool_create_kmalloc_pool(10, | 1444 | osdc->req_mempool = mempool_create_kmalloc_pool(10, |
@@ -1237,6 +1464,7 @@ out_mempool: | |||
1237 | out: | 1464 | out: |
1238 | return err; | 1465 | return err; |
1239 | } | 1466 | } |
1467 | EXPORT_SYMBOL(ceph_osdc_init); | ||
1240 | 1468 | ||
1241 | void ceph_osdc_stop(struct ceph_osd_client *osdc) | 1469 | void ceph_osdc_stop(struct ceph_osd_client *osdc) |
1242 | { | 1470 | { |
@@ -1251,6 +1479,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) | |||
1251 | ceph_msgpool_destroy(&osdc->msgpool_op); | 1479 | ceph_msgpool_destroy(&osdc->msgpool_op); |
1252 | ceph_msgpool_destroy(&osdc->msgpool_op_reply); | 1480 | ceph_msgpool_destroy(&osdc->msgpool_op_reply); |
1253 | } | 1481 | } |
1482 | EXPORT_SYMBOL(ceph_osdc_stop); | ||
1254 | 1483 | ||
1255 | /* | 1484 | /* |
1256 | * Read some contiguous pages. If we cross a stripe boundary, shorten | 1485 | * Read some contiguous pages. If we cross a stripe boundary, shorten |
@@ -1288,6 +1517,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
1288 | dout("readpages result %d\n", rc); | 1517 | dout("readpages result %d\n", rc); |
1289 | return rc; | 1518 | return rc; |
1290 | } | 1519 | } |
1520 | EXPORT_SYMBOL(ceph_osdc_readpages); | ||
1291 | 1521 | ||
1292 | /* | 1522 | /* |
1293 | * do a synchronous write on N pages | 1523 | * do a synchronous write on N pages |
@@ -1330,6 +1560,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1330 | dout("writepages result %d\n", rc); | 1560 | dout("writepages result %d\n", rc); |
1331 | return rc; | 1561 | return rc; |
1332 | } | 1562 | } |
1563 | EXPORT_SYMBOL(ceph_osdc_writepages); | ||
1333 | 1564 | ||
1334 | /* | 1565 | /* |
1335 | * handle incoming message | 1566 | * handle incoming message |
@@ -1420,6 +1651,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1420 | } | 1651 | } |
1421 | m->pages = req->r_pages; | 1652 | m->pages = req->r_pages; |
1422 | m->nr_pages = req->r_num_pages; | 1653 | m->nr_pages = req->r_num_pages; |
1654 | #ifdef CONFIG_BLOCK | ||
1655 | m->bio = req->r_bio; | ||
1656 | #endif | ||
1423 | } | 1657 | } |
1424 | *skip = 0; | 1658 | *skip = 0; |
1425 | req->r_con_filling_msg = ceph_con_get(con); | 1659 | req->r_con_filling_msg = ceph_con_get(con); |
diff --git a/fs/ceph/osdmap.c b/net/ceph/osdmap.c index e31f118f1392..d73f3f6efa36 100644 --- a/fs/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -1,14 +1,15 @@ | |||
1 | 1 | ||
2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
3 | 3 | ||
4 | #include <linux/module.h> | ||
4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
5 | #include <asm/div64.h> | 6 | #include <asm/div64.h> |
6 | 7 | ||
7 | #include "super.h" | 8 | #include <linux/ceph/libceph.h> |
8 | #include "osdmap.h" | 9 | #include <linux/ceph/osdmap.h> |
9 | #include "crush/hash.h" | 10 | #include <linux/ceph/decode.h> |
10 | #include "crush/mapper.h" | 11 | #include <linux/crush/hash.h> |
11 | #include "decode.h" | 12 | #include <linux/crush/mapper.h> |
12 | 13 | ||
13 | char *ceph_osdmap_state_str(char *str, int len, int state) | 14 | char *ceph_osdmap_state_str(char *str, int len, int state) |
14 | { | 15 | { |
@@ -417,6 +418,20 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | |||
417 | return NULL; | 418 | return NULL; |
418 | } | 419 | } |
419 | 420 | ||
421 | int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) | ||
422 | { | ||
423 | struct rb_node *rbp; | ||
424 | |||
425 | for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) { | ||
426 | struct ceph_pg_pool_info *pi = | ||
427 | rb_entry(rbp, struct ceph_pg_pool_info, node); | ||
428 | if (pi->name && strcmp(pi->name, name) == 0) | ||
429 | return pi->id; | ||
430 | } | ||
431 | return -ENOENT; | ||
432 | } | ||
433 | EXPORT_SYMBOL(ceph_pg_poolid_by_name); | ||
434 | |||
420 | static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | 435 | static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) |
421 | { | 436 | { |
422 | rb_erase(&pi->node, root); | 437 | rb_erase(&pi->node, root); |
@@ -966,6 +981,7 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | |||
966 | 981 | ||
967 | dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); | 982 | dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); |
968 | } | 983 | } |
984 | EXPORT_SYMBOL(ceph_calc_file_object_mapping); | ||
969 | 985 | ||
970 | /* | 986 | /* |
971 | * calculate an object layout (i.e. pgid) from an oid, | 987 | * calculate an object layout (i.e. pgid) from an oid, |
@@ -1011,6 +1027,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
1011 | ol->ol_stripe_unit = fl->fl_object_stripe_unit; | 1027 | ol->ol_stripe_unit = fl->fl_object_stripe_unit; |
1012 | return 0; | 1028 | return 0; |
1013 | } | 1029 | } |
1030 | EXPORT_SYMBOL(ceph_calc_object_layout); | ||
1014 | 1031 | ||
1015 | /* | 1032 | /* |
1016 | * Calculate raw osd vector for the given pgid. Return pointer to osd | 1033 | * Calculate raw osd vector for the given pgid. Return pointer to osd |
@@ -1108,3 +1125,4 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | |||
1108 | return osds[i]; | 1125 | return osds[i]; |
1109 | return -1; | 1126 | return -1; |
1110 | } | 1127 | } |
1128 | EXPORT_SYMBOL(ceph_calc_pg_primary); | ||
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c new file mode 100644 index 000000000000..13cb409a7bba --- /dev/null +++ b/net/ceph/pagelist.c | |||
@@ -0,0 +1,154 @@ | |||
1 | |||
2 | #include <linux/module.h> | ||
3 | #include <linux/gfp.h> | ||
4 | #include <linux/pagemap.h> | ||
5 | #include <linux/highmem.h> | ||
6 | #include <linux/ceph/pagelist.h> | ||
7 | |||
8 | static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) | ||
9 | { | ||
10 | if (pl->mapped_tail) { | ||
11 | struct page *page = list_entry(pl->head.prev, struct page, lru); | ||
12 | kunmap(page); | ||
13 | pl->mapped_tail = NULL; | ||
14 | } | ||
15 | } | ||
16 | |||
17 | int ceph_pagelist_release(struct ceph_pagelist *pl) | ||
18 | { | ||
19 | ceph_pagelist_unmap_tail(pl); | ||
20 | while (!list_empty(&pl->head)) { | ||
21 | struct page *page = list_first_entry(&pl->head, struct page, | ||
22 | lru); | ||
23 | list_del(&page->lru); | ||
24 | __free_page(page); | ||
25 | } | ||
26 | ceph_pagelist_free_reserve(pl); | ||
27 | return 0; | ||
28 | } | ||
29 | EXPORT_SYMBOL(ceph_pagelist_release); | ||
30 | |||
31 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | ||
32 | { | ||
33 | struct page *page; | ||
34 | |||
35 | if (!pl->num_pages_free) { | ||
36 | page = __page_cache_alloc(GFP_NOFS); | ||
37 | } else { | ||
38 | page = list_first_entry(&pl->free_list, struct page, lru); | ||
39 | list_del(&page->lru); | ||
40 | --pl->num_pages_free; | ||
41 | } | ||
42 | if (!page) | ||
43 | return -ENOMEM; | ||
44 | pl->room += PAGE_SIZE; | ||
45 | ceph_pagelist_unmap_tail(pl); | ||
46 | list_add_tail(&page->lru, &pl->head); | ||
47 | pl->mapped_tail = kmap(page); | ||
48 | return 0; | ||
49 | } | ||
50 | |||
51 | int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len) | ||
52 | { | ||
53 | while (pl->room < len) { | ||
54 | size_t bit = pl->room; | ||
55 | int ret; | ||
56 | |||
57 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), | ||
58 | buf, bit); | ||
59 | pl->length += bit; | ||
60 | pl->room -= bit; | ||
61 | buf += bit; | ||
62 | len -= bit; | ||
63 | ret = ceph_pagelist_addpage(pl); | ||
64 | if (ret) | ||
65 | return ret; | ||
66 | } | ||
67 | |||
68 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); | ||
69 | pl->length += len; | ||
70 | pl->room -= len; | ||
71 | return 0; | ||
72 | } | ||
73 | EXPORT_SYMBOL(ceph_pagelist_append); | ||
74 | |||
75 | /** | ||
76 | * Allocate enough pages for a pagelist to append the given amount | ||
77 | * of data without without allocating. | ||
78 | * Returns: 0 on success, -ENOMEM on error. | ||
79 | */ | ||
80 | int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space) | ||
81 | { | ||
82 | if (space <= pl->room) | ||
83 | return 0; | ||
84 | space -= pl->room; | ||
85 | space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT; /* conv to num pages */ | ||
86 | |||
87 | while (space > pl->num_pages_free) { | ||
88 | struct page *page = __page_cache_alloc(GFP_NOFS); | ||
89 | if (!page) | ||
90 | return -ENOMEM; | ||
91 | list_add_tail(&page->lru, &pl->free_list); | ||
92 | ++pl->num_pages_free; | ||
93 | } | ||
94 | return 0; | ||
95 | } | ||
96 | EXPORT_SYMBOL(ceph_pagelist_reserve); | ||
97 | |||
98 | /** | ||
99 | * Free any pages that have been preallocated. | ||
100 | */ | ||
101 | int ceph_pagelist_free_reserve(struct ceph_pagelist *pl) | ||
102 | { | ||
103 | while (!list_empty(&pl->free_list)) { | ||
104 | struct page *page = list_first_entry(&pl->free_list, | ||
105 | struct page, lru); | ||
106 | list_del(&page->lru); | ||
107 | __free_page(page); | ||
108 | --pl->num_pages_free; | ||
109 | } | ||
110 | BUG_ON(pl->num_pages_free); | ||
111 | return 0; | ||
112 | } | ||
113 | EXPORT_SYMBOL(ceph_pagelist_free_reserve); | ||
114 | |||
115 | /** | ||
116 | * Create a truncation point. | ||
117 | */ | ||
118 | void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, | ||
119 | struct ceph_pagelist_cursor *c) | ||
120 | { | ||
121 | c->pl = pl; | ||
122 | c->page_lru = pl->head.prev; | ||
123 | c->room = pl->room; | ||
124 | } | ||
125 | EXPORT_SYMBOL(ceph_pagelist_set_cursor); | ||
126 | |||
127 | /** | ||
128 | * Truncate a pagelist to the given point. Move extra pages to reserve. | ||
129 | * This won't sleep. | ||
130 | * Returns: 0 on success, | ||
131 | * -EINVAL if the pagelist doesn't match the trunc point pagelist | ||
132 | */ | ||
133 | int ceph_pagelist_truncate(struct ceph_pagelist *pl, | ||
134 | struct ceph_pagelist_cursor *c) | ||
135 | { | ||
136 | struct page *page; | ||
137 | |||
138 | if (pl != c->pl) | ||
139 | return -EINVAL; | ||
140 | ceph_pagelist_unmap_tail(pl); | ||
141 | while (pl->head.prev != c->page_lru) { | ||
142 | page = list_entry(pl->head.prev, struct page, lru); | ||
143 | list_del(&page->lru); /* remove from pagelist */ | ||
144 | list_add_tail(&page->lru, &pl->free_list); /* add to reserve */ | ||
145 | ++pl->num_pages_free; | ||
146 | } | ||
147 | pl->room = c->room; | ||
148 | if (!list_empty(&pl->head)) { | ||
149 | page = list_entry(pl->head.prev, struct page, lru); | ||
150 | pl->mapped_tail = kmap(page); | ||
151 | } | ||
152 | return 0; | ||
153 | } | ||
154 | EXPORT_SYMBOL(ceph_pagelist_truncate); | ||
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c new file mode 100644 index 000000000000..54caf0687155 --- /dev/null +++ b/net/ceph/pagevec.c | |||
@@ -0,0 +1,223 @@ | |||
1 | #include <linux/ceph/ceph_debug.h> | ||
2 | |||
3 | #include <linux/module.h> | ||
4 | #include <linux/sched.h> | ||
5 | #include <linux/slab.h> | ||
6 | #include <linux/file.h> | ||
7 | #include <linux/namei.h> | ||
8 | #include <linux/writeback.h> | ||
9 | |||
10 | #include <linux/ceph/libceph.h> | ||
11 | |||
12 | /* | ||
13 | * build a vector of user pages | ||
14 | */ | ||
15 | struct page **ceph_get_direct_page_vector(const char __user *data, | ||
16 | int num_pages, | ||
17 | loff_t off, size_t len) | ||
18 | { | ||
19 | struct page **pages; | ||
20 | int rc; | ||
21 | |||
22 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | ||
23 | if (!pages) | ||
24 | return ERR_PTR(-ENOMEM); | ||
25 | |||
26 | down_read(¤t->mm->mmap_sem); | ||
27 | rc = get_user_pages(current, current->mm, (unsigned long)data, | ||
28 | num_pages, 0, 0, pages, NULL); | ||
29 | up_read(¤t->mm->mmap_sem); | ||
30 | if (rc < 0) | ||
31 | goto fail; | ||
32 | return pages; | ||
33 | |||
34 | fail: | ||
35 | kfree(pages); | ||
36 | return ERR_PTR(rc); | ||
37 | } | ||
38 | EXPORT_SYMBOL(ceph_get_direct_page_vector); | ||
39 | |||
40 | void ceph_put_page_vector(struct page **pages, int num_pages) | ||
41 | { | ||
42 | int i; | ||
43 | |||
44 | for (i = 0; i < num_pages; i++) | ||
45 | put_page(pages[i]); | ||
46 | kfree(pages); | ||
47 | } | ||
48 | EXPORT_SYMBOL(ceph_put_page_vector); | ||
49 | |||
50 | void ceph_release_page_vector(struct page **pages, int num_pages) | ||
51 | { | ||
52 | int i; | ||
53 | |||
54 | for (i = 0; i < num_pages; i++) | ||
55 | __free_pages(pages[i], 0); | ||
56 | kfree(pages); | ||
57 | } | ||
58 | EXPORT_SYMBOL(ceph_release_page_vector); | ||
59 | |||
60 | /* | ||
61 | * allocate a vector new pages | ||
62 | */ | ||
63 | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | ||
64 | { | ||
65 | struct page **pages; | ||
66 | int i; | ||
67 | |||
68 | pages = kmalloc(sizeof(*pages) * num_pages, flags); | ||
69 | if (!pages) | ||
70 | return ERR_PTR(-ENOMEM); | ||
71 | for (i = 0; i < num_pages; i++) { | ||
72 | pages[i] = __page_cache_alloc(flags); | ||
73 | if (pages[i] == NULL) { | ||
74 | ceph_release_page_vector(pages, i); | ||
75 | return ERR_PTR(-ENOMEM); | ||
76 | } | ||
77 | } | ||
78 | return pages; | ||
79 | } | ||
80 | EXPORT_SYMBOL(ceph_alloc_page_vector); | ||
81 | |||
82 | /* | ||
83 | * copy user data into a page vector | ||
84 | */ | ||
85 | int ceph_copy_user_to_page_vector(struct page **pages, | ||
86 | const char __user *data, | ||
87 | loff_t off, size_t len) | ||
88 | { | ||
89 | int i = 0; | ||
90 | int po = off & ~PAGE_CACHE_MASK; | ||
91 | int left = len; | ||
92 | int l, bad; | ||
93 | |||
94 | while (left > 0) { | ||
95 | l = min_t(int, PAGE_CACHE_SIZE-po, left); | ||
96 | bad = copy_from_user(page_address(pages[i]) + po, data, l); | ||
97 | if (bad == l) | ||
98 | return -EFAULT; | ||
99 | data += l - bad; | ||
100 | left -= l - bad; | ||
101 | po += l - bad; | ||
102 | if (po == PAGE_CACHE_SIZE) { | ||
103 | po = 0; | ||
104 | i++; | ||
105 | } | ||
106 | } | ||
107 | return len; | ||
108 | } | ||
109 | EXPORT_SYMBOL(ceph_copy_user_to_page_vector); | ||
110 | |||
111 | int ceph_copy_to_page_vector(struct page **pages, | ||
112 | const char *data, | ||
113 | loff_t off, size_t len) | ||
114 | { | ||
115 | int i = 0; | ||
116 | size_t po = off & ~PAGE_CACHE_MASK; | ||
117 | size_t left = len; | ||
118 | size_t l; | ||
119 | |||
120 | while (left > 0) { | ||
121 | l = min_t(size_t, PAGE_CACHE_SIZE-po, left); | ||
122 | memcpy(page_address(pages[i]) + po, data, l); | ||
123 | data += l; | ||
124 | left -= l; | ||
125 | po += l; | ||
126 | if (po == PAGE_CACHE_SIZE) { | ||
127 | po = 0; | ||
128 | i++; | ||
129 | } | ||
130 | } | ||
131 | return len; | ||
132 | } | ||
133 | EXPORT_SYMBOL(ceph_copy_to_page_vector); | ||
134 | |||
135 | int ceph_copy_from_page_vector(struct page **pages, | ||
136 | char *data, | ||
137 | loff_t off, size_t len) | ||
138 | { | ||
139 | int i = 0; | ||
140 | size_t po = off & ~PAGE_CACHE_MASK; | ||
141 | size_t left = len; | ||
142 | size_t l; | ||
143 | |||
144 | while (left > 0) { | ||
145 | l = min_t(size_t, PAGE_CACHE_SIZE-po, left); | ||
146 | memcpy(data, page_address(pages[i]) + po, l); | ||
147 | data += l; | ||
148 | left -= l; | ||
149 | po += l; | ||
150 | if (po == PAGE_CACHE_SIZE) { | ||
151 | po = 0; | ||
152 | i++; | ||
153 | } | ||
154 | } | ||
155 | return len; | ||
156 | } | ||
157 | EXPORT_SYMBOL(ceph_copy_from_page_vector); | ||
158 | |||
159 | /* | ||
160 | * copy user data from a page vector into a user pointer | ||
161 | */ | ||
162 | int ceph_copy_page_vector_to_user(struct page **pages, | ||
163 | char __user *data, | ||
164 | loff_t off, size_t len) | ||
165 | { | ||
166 | int i = 0; | ||
167 | int po = off & ~PAGE_CACHE_MASK; | ||
168 | int left = len; | ||
169 | int l, bad; | ||
170 | |||
171 | while (left > 0) { | ||
172 | l = min_t(int, left, PAGE_CACHE_SIZE-po); | ||
173 | bad = copy_to_user(data, page_address(pages[i]) + po, l); | ||
174 | if (bad == l) | ||
175 | return -EFAULT; | ||
176 | data += l - bad; | ||
177 | left -= l - bad; | ||
178 | if (po) { | ||
179 | po += l - bad; | ||
180 | if (po == PAGE_CACHE_SIZE) | ||
181 | po = 0; | ||
182 | } | ||
183 | i++; | ||
184 | } | ||
185 | return len; | ||
186 | } | ||
187 | EXPORT_SYMBOL(ceph_copy_page_vector_to_user); | ||
188 | |||
189 | /* | ||
190 | * Zero an extent within a page vector. Offset is relative to the | ||
191 | * start of the first page. | ||
192 | */ | ||
193 | void ceph_zero_page_vector_range(int off, int len, struct page **pages) | ||
194 | { | ||
195 | int i = off >> PAGE_CACHE_SHIFT; | ||
196 | |||
197 | off &= ~PAGE_CACHE_MASK; | ||
198 | |||
199 | dout("zero_page_vector_page %u~%u\n", off, len); | ||
200 | |||
201 | /* leading partial page? */ | ||
202 | if (off) { | ||
203 | int end = min((int)PAGE_CACHE_SIZE, off + len); | ||
204 | dout("zeroing %d %p head from %d\n", i, pages[i], | ||
205 | (int)off); | ||
206 | zero_user_segment(pages[i], off, end); | ||
207 | len -= (end - off); | ||
208 | i++; | ||
209 | } | ||
210 | while (len >= PAGE_CACHE_SIZE) { | ||
211 | dout("zeroing %d %p len=%d\n", i, pages[i], len); | ||
212 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||
213 | len -= PAGE_CACHE_SIZE; | ||
214 | i++; | ||
215 | } | ||
216 | /* trailing partial page? */ | ||
217 | if (len) { | ||
218 | dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); | ||
219 | zero_user_segment(pages[i], 0, len); | ||
220 | } | ||
221 | } | ||
222 | EXPORT_SYMBOL(ceph_zero_page_vector_range); | ||
223 | |||
diff --git a/net/core/datagram.c b/net/core/datagram.c index 251997a95483..282806ba7a57 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -243,6 +243,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) | |||
243 | unlock_sock_fast(sk, slow); | 243 | unlock_sock_fast(sk, slow); |
244 | 244 | ||
245 | /* skb is now orphaned, can be freed outside of locked section */ | 245 | /* skb is now orphaned, can be freed outside of locked section */ |
246 | trace_kfree_skb(skb, skb_free_datagram_locked); | ||
246 | __kfree_skb(skb); | 247 | __kfree_skb(skb); |
247 | } | 248 | } |
248 | EXPORT_SYMBOL(skb_free_datagram_locked); | 249 | EXPORT_SYMBOL(skb_free_datagram_locked); |
diff --git a/net/core/dev.c b/net/core/dev.c index 660dd41aaaa6..7ec85e27beed 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -128,6 +128,8 @@ | |||
128 | #include <linux/jhash.h> | 128 | #include <linux/jhash.h> |
129 | #include <linux/random.h> | 129 | #include <linux/random.h> |
130 | #include <trace/events/napi.h> | 130 | #include <trace/events/napi.h> |
131 | #include <trace/events/net.h> | ||
132 | #include <trace/events/skb.h> | ||
131 | #include <linux/pci.h> | 133 | #include <linux/pci.h> |
132 | 134 | ||
133 | #include "net-sysfs.h" | 135 | #include "net-sysfs.h" |
@@ -1978,6 +1980,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1978 | } | 1980 | } |
1979 | 1981 | ||
1980 | rc = ops->ndo_start_xmit(skb, dev); | 1982 | rc = ops->ndo_start_xmit(skb, dev); |
1983 | trace_net_dev_xmit(skb, rc); | ||
1981 | if (rc == NETDEV_TX_OK) | 1984 | if (rc == NETDEV_TX_OK) |
1982 | txq_trans_update(txq); | 1985 | txq_trans_update(txq); |
1983 | return rc; | 1986 | return rc; |
@@ -1998,6 +2001,7 @@ gso: | |||
1998 | skb_dst_drop(nskb); | 2001 | skb_dst_drop(nskb); |
1999 | 2002 | ||
2000 | rc = ops->ndo_start_xmit(nskb, dev); | 2003 | rc = ops->ndo_start_xmit(nskb, dev); |
2004 | trace_net_dev_xmit(nskb, rc); | ||
2001 | if (unlikely(rc != NETDEV_TX_OK)) { | 2005 | if (unlikely(rc != NETDEV_TX_OK)) { |
2002 | if (rc & ~NETDEV_TX_MASK) | 2006 | if (rc & ~NETDEV_TX_MASK) |
2003 | goto out_kfree_gso_skb; | 2007 | goto out_kfree_gso_skb; |
@@ -2186,6 +2190,7 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2186 | #ifdef CONFIG_NET_CLS_ACT | 2190 | #ifdef CONFIG_NET_CLS_ACT |
2187 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); | 2191 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); |
2188 | #endif | 2192 | #endif |
2193 | trace_net_dev_queue(skb); | ||
2189 | if (q->enqueue) { | 2194 | if (q->enqueue) { |
2190 | rc = __dev_xmit_skb(skb, q, dev, txq); | 2195 | rc = __dev_xmit_skb(skb, q, dev, txq); |
2191 | goto out; | 2196 | goto out; |
@@ -2512,6 +2517,7 @@ int netif_rx(struct sk_buff *skb) | |||
2512 | if (netdev_tstamp_prequeue) | 2517 | if (netdev_tstamp_prequeue) |
2513 | net_timestamp_check(skb); | 2518 | net_timestamp_check(skb); |
2514 | 2519 | ||
2520 | trace_netif_rx(skb); | ||
2515 | #ifdef CONFIG_RPS | 2521 | #ifdef CONFIG_RPS |
2516 | { | 2522 | { |
2517 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 2523 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
@@ -2571,6 +2577,7 @@ static void net_tx_action(struct softirq_action *h) | |||
2571 | clist = clist->next; | 2577 | clist = clist->next; |
2572 | 2578 | ||
2573 | WARN_ON(atomic_read(&skb->users)); | 2579 | WARN_ON(atomic_read(&skb->users)); |
2580 | trace_kfree_skb(skb, net_tx_action); | ||
2574 | __kfree_skb(skb); | 2581 | __kfree_skb(skb); |
2575 | } | 2582 | } |
2576 | } | 2583 | } |
@@ -2828,6 +2835,7 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2828 | if (!netdev_tstamp_prequeue) | 2835 | if (!netdev_tstamp_prequeue) |
2829 | net_timestamp_check(skb); | 2836 | net_timestamp_check(skb); |
2830 | 2837 | ||
2838 | trace_netif_receive_skb(skb); | ||
2831 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) | 2839 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) |
2832 | return NET_RX_SUCCESS; | 2840 | return NET_RX_SUCCESS; |
2833 | 2841 | ||
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4016ac6bdd5e..8451ab481095 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -397,7 +397,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, | |||
397 | (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) | 397 | (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) |
398 | return -ENOMEM; | 398 | return -ENOMEM; |
399 | full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; | 399 | full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; |
400 | indir = kmalloc(full_size, GFP_USER); | 400 | indir = kzalloc(full_size, GFP_USER); |
401 | if (!indir) | 401 | if (!indir) |
402 | return -ENOMEM; | 402 | return -ENOMEM; |
403 | 403 | ||
@@ -538,7 +538,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) | |||
538 | 538 | ||
539 | gstrings.len = ret; | 539 | gstrings.len = ret; |
540 | 540 | ||
541 | data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); | 541 | data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); |
542 | if (!data) | 542 | if (!data) |
543 | return -ENOMEM; | 543 | return -ENOMEM; |
544 | 544 | ||
@@ -775,7 +775,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) | |||
775 | if (regs.len > reglen) | 775 | if (regs.len > reglen) |
776 | regs.len = reglen; | 776 | regs.len = reglen; |
777 | 777 | ||
778 | regbuf = kmalloc(reglen, GFP_USER); | 778 | regbuf = kzalloc(reglen, GFP_USER); |
779 | if (!regbuf) | 779 | if (!regbuf) |
780 | return -ENOMEM; | 780 | return -ENOMEM; |
781 | 781 | ||
diff --git a/net/core/net-traces.c b/net/core/net-traces.c index afa6380ed88a..7f1bb2aba03b 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c | |||
@@ -26,6 +26,7 @@ | |||
26 | 26 | ||
27 | #define CREATE_TRACE_POINTS | 27 | #define CREATE_TRACE_POINTS |
28 | #include <trace/events/skb.h> | 28 | #include <trace/events/skb.h> |
29 | #include <trace/events/net.h> | ||
29 | #include <trace/events/napi.h> | 30 | #include <trace/events/napi.h> |
30 | 31 | ||
31 | EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); | 32 | EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c83b421341c0..56ba3c4e4761 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -466,6 +466,7 @@ void consume_skb(struct sk_buff *skb) | |||
466 | smp_rmb(); | 466 | smp_rmb(); |
467 | else if (likely(!atomic_dec_and_test(&skb->users))) | 467 | else if (likely(!atomic_dec_and_test(&skb->users))) |
468 | return; | 468 | return; |
469 | trace_consume_skb(skb); | ||
469 | __kfree_skb(skb); | 470 | __kfree_skb(skb); |
470 | } | 471 | } |
471 | EXPORT_SYMBOL(consume_skb); | 472 | EXPORT_SYMBOL(consume_skb); |
diff --git a/net/core/sock.c b/net/core/sock.c index ef30e9d286e7..7d99e13148e6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1078,8 +1078,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) | |||
1078 | #ifdef CONFIG_CGROUPS | 1078 | #ifdef CONFIG_CGROUPS |
1079 | void sock_update_classid(struct sock *sk) | 1079 | void sock_update_classid(struct sock *sk) |
1080 | { | 1080 | { |
1081 | u32 classid = task_cls_classid(current); | 1081 | u32 classid; |
1082 | 1082 | ||
1083 | rcu_read_lock(); /* doing current task, which cannot vanish. */ | ||
1084 | classid = task_cls_classid(current); | ||
1085 | rcu_read_unlock(); | ||
1083 | if (classid && classid != sk->sk_classid) | 1086 | if (classid && classid != sk->sk_classid) |
1084 | sk->sk_classid = classid; | 1087 | sk->sk_classid = classid; |
1085 | } | 1088 | } |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 244f7cb08d68..37f8adb68c79 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/proc_fs.h> | 11 | #include <linux/proc_fs.h> |
12 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
13 | #include <linux/percpu.h> | 13 | #include <linux/percpu.h> |
14 | #include <linux/security.h> | ||
14 | #include <net/net_namespace.h> | 15 | #include <net/net_namespace.h> |
15 | 16 | ||
16 | #include <linux/netfilter.h> | 17 | #include <linux/netfilter.h> |
@@ -87,6 +88,29 @@ static void ct_seq_stop(struct seq_file *s, void *v) | |||
87 | rcu_read_unlock(); | 88 | rcu_read_unlock(); |
88 | } | 89 | } |
89 | 90 | ||
91 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | ||
92 | static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) | ||
93 | { | ||
94 | int ret; | ||
95 | u32 len; | ||
96 | char *secctx; | ||
97 | |||
98 | ret = security_secid_to_secctx(ct->secmark, &secctx, &len); | ||
99 | if (ret) | ||
100 | return ret; | ||
101 | |||
102 | ret = seq_printf(s, "secctx=%s ", secctx); | ||
103 | |||
104 | security_release_secctx(secctx, len); | ||
105 | return ret; | ||
106 | } | ||
107 | #else | ||
108 | static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) | ||
109 | { | ||
110 | return 0; | ||
111 | } | ||
112 | #endif | ||
113 | |||
90 | static int ct_seq_show(struct seq_file *s, void *v) | 114 | static int ct_seq_show(struct seq_file *s, void *v) |
91 | { | 115 | { |
92 | struct nf_conntrack_tuple_hash *hash = v; | 116 | struct nf_conntrack_tuple_hash *hash = v; |
@@ -148,10 +172,8 @@ static int ct_seq_show(struct seq_file *s, void *v) | |||
148 | goto release; | 172 | goto release; |
149 | #endif | 173 | #endif |
150 | 174 | ||
151 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 175 | if (ct_show_secctx(s, ct)) |
152 | if (seq_printf(s, "secmark=%u ", ct->secmark)) | ||
153 | goto release; | 176 | goto release; |
154 | #endif | ||
155 | 177 | ||
156 | if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) | 178 | if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) |
157 | goto release; | 179 | goto release; |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 8c8632d9b93c..957c9241fb0c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -38,7 +38,7 @@ static DEFINE_SPINLOCK(nf_nat_lock); | |||
38 | static struct nf_conntrack_l3proto *l3proto __read_mostly; | 38 | static struct nf_conntrack_l3proto *l3proto __read_mostly; |
39 | 39 | ||
40 | #define MAX_IP_NAT_PROTO 256 | 40 | #define MAX_IP_NAT_PROTO 256 |
41 | static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] | 41 | static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO] |
42 | __read_mostly; | 42 | __read_mostly; |
43 | 43 | ||
44 | static inline const struct nf_nat_protocol * | 44 | static inline const struct nf_nat_protocol * |
diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 78b505d33bfb..fdaec7daff1d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | static DEFINE_MUTEX(afinfo_mutex); | 28 | static DEFINE_MUTEX(afinfo_mutex); |
29 | 29 | ||
30 | const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; | 30 | const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; |
31 | EXPORT_SYMBOL(nf_afinfo); | 31 | EXPORT_SYMBOL(nf_afinfo); |
32 | 32 | ||
33 | int nf_register_afinfo(const struct nf_afinfo *afinfo) | 33 | int nf_register_afinfo(const struct nf_afinfo *afinfo) |
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index cdcc7649476b..5702de35e2bb 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c | |||
@@ -26,10 +26,10 @@ | |||
26 | 26 | ||
27 | static DEFINE_MUTEX(nf_ct_ecache_mutex); | 27 | static DEFINE_MUTEX(nf_ct_ecache_mutex); |
28 | 28 | ||
29 | struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; | 29 | struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly; |
30 | EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); | 30 | EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); |
31 | 31 | ||
32 | struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; | 32 | struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly; |
33 | EXPORT_SYMBOL_GPL(nf_expect_event_cb); | 33 | EXPORT_SYMBOL_GPL(nf_expect_event_cb); |
34 | 34 | ||
35 | /* deliver cached events and clear cache entry - must be called with locally | 35 | /* deliver cached events and clear cache entry - must be called with locally |
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 8d9e4c949b96..bd82450c193f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <linux/skbuff.h> | 16 | #include <linux/skbuff.h> |
17 | #include <net/netfilter/nf_conntrack_extend.h> | 17 | #include <net/netfilter/nf_conntrack_extend.h> |
18 | 18 | ||
19 | static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM]; | 19 | static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM]; |
20 | static DEFINE_MUTEX(nf_ct_ext_type_mutex); | 20 | static DEFINE_MUTEX(nf_ct_ext_type_mutex); |
21 | 21 | ||
22 | void __nf_ct_ext_destroy(struct nf_conn *ct) | 22 | void __nf_ct_ext_destroy(struct nf_conn *ct) |
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 5bae1cd15eea..146476c6441a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/rculist_nulls.h> | 22 | #include <linux/rculist_nulls.h> |
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | #include <linux/timer.h> | 24 | #include <linux/timer.h> |
25 | #include <linux/security.h> | ||
25 | #include <linux/skbuff.h> | 26 | #include <linux/skbuff.h> |
26 | #include <linux/errno.h> | 27 | #include <linux/errno.h> |
27 | #include <linux/netlink.h> | 28 | #include <linux/netlink.h> |
@@ -245,16 +246,31 @@ nla_put_failure: | |||
245 | 246 | ||
246 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 247 | #ifdef CONFIG_NF_CONNTRACK_SECMARK |
247 | static inline int | 248 | static inline int |
248 | ctnetlink_dump_secmark(struct sk_buff *skb, const struct nf_conn *ct) | 249 | ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) |
249 | { | 250 | { |
250 | NLA_PUT_BE32(skb, CTA_SECMARK, htonl(ct->secmark)); | 251 | struct nlattr *nest_secctx; |
251 | return 0; | 252 | int len, ret; |
253 | char *secctx; | ||
254 | |||
255 | ret = security_secid_to_secctx(ct->secmark, &secctx, &len); | ||
256 | if (ret) | ||
257 | return ret; | ||
258 | |||
259 | ret = -1; | ||
260 | nest_secctx = nla_nest_start(skb, CTA_SECCTX | NLA_F_NESTED); | ||
261 | if (!nest_secctx) | ||
262 | goto nla_put_failure; | ||
263 | |||
264 | NLA_PUT_STRING(skb, CTA_SECCTX_NAME, secctx); | ||
265 | nla_nest_end(skb, nest_secctx); | ||
252 | 266 | ||
267 | ret = 0; | ||
253 | nla_put_failure: | 268 | nla_put_failure: |
254 | return -1; | 269 | security_release_secctx(secctx, len); |
270 | return ret; | ||
255 | } | 271 | } |
256 | #else | 272 | #else |
257 | #define ctnetlink_dump_secmark(a, b) (0) | 273 | #define ctnetlink_dump_secctx(a, b) (0) |
258 | #endif | 274 | #endif |
259 | 275 | ||
260 | #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) | 276 | #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) |
@@ -391,7 +407,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, | |||
391 | ctnetlink_dump_protoinfo(skb, ct) < 0 || | 407 | ctnetlink_dump_protoinfo(skb, ct) < 0 || |
392 | ctnetlink_dump_helpinfo(skb, ct) < 0 || | 408 | ctnetlink_dump_helpinfo(skb, ct) < 0 || |
393 | ctnetlink_dump_mark(skb, ct) < 0 || | 409 | ctnetlink_dump_mark(skb, ct) < 0 || |
394 | ctnetlink_dump_secmark(skb, ct) < 0 || | 410 | ctnetlink_dump_secctx(skb, ct) < 0 || |
395 | ctnetlink_dump_id(skb, ct) < 0 || | 411 | ctnetlink_dump_id(skb, ct) < 0 || |
396 | ctnetlink_dump_use(skb, ct) < 0 || | 412 | ctnetlink_dump_use(skb, ct) < 0 || |
397 | ctnetlink_dump_master(skb, ct) < 0 || | 413 | ctnetlink_dump_master(skb, ct) < 0 || |
@@ -437,6 +453,17 @@ ctnetlink_counters_size(const struct nf_conn *ct) | |||
437 | ; | 453 | ; |
438 | } | 454 | } |
439 | 455 | ||
456 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | ||
457 | static int ctnetlink_nlmsg_secctx_size(const struct nf_conn *ct) | ||
458 | { | ||
459 | int len; | ||
460 | |||
461 | security_secid_to_secctx(ct->secmark, NULL, &len); | ||
462 | |||
463 | return sizeof(char) * len; | ||
464 | } | ||
465 | #endif | ||
466 | |||
440 | static inline size_t | 467 | static inline size_t |
441 | ctnetlink_nlmsg_size(const struct nf_conn *ct) | 468 | ctnetlink_nlmsg_size(const struct nf_conn *ct) |
442 | { | 469 | { |
@@ -453,7 +480,8 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) | |||
453 | + nla_total_size(0) /* CTA_HELP */ | 480 | + nla_total_size(0) /* CTA_HELP */ |
454 | + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ | 481 | + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ |
455 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 482 | #ifdef CONFIG_NF_CONNTRACK_SECMARK |
456 | + nla_total_size(sizeof(u_int32_t)) /* CTA_SECMARK */ | 483 | + nla_total_size(0) /* CTA_SECCTX */ |
484 | + nla_total_size(ctnetlink_nlmsg_secctx_size(ct)) /* CTA_SECCTX_NAME */ | ||
457 | #endif | 485 | #endif |
458 | #ifdef CONFIG_NF_NAT_NEEDED | 486 | #ifdef CONFIG_NF_NAT_NEEDED |
459 | + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ | 487 | + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ |
@@ -556,7 +584,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) | |||
556 | 584 | ||
557 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 585 | #ifdef CONFIG_NF_CONNTRACK_SECMARK |
558 | if ((events & (1 << IPCT_SECMARK) || ct->secmark) | 586 | if ((events & (1 << IPCT_SECMARK) || ct->secmark) |
559 | && ctnetlink_dump_secmark(skb, ct) < 0) | 587 | && ctnetlink_dump_secctx(skb, ct) < 0) |
560 | goto nla_put_failure; | 588 | goto nla_put_failure; |
561 | #endif | 589 | #endif |
562 | 590 | ||
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 5886ba1d52a0..ed6d92958023 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c | |||
@@ -28,8 +28,8 @@ | |||
28 | #include <net/netfilter/nf_conntrack_l4proto.h> | 28 | #include <net/netfilter/nf_conntrack_l4proto.h> |
29 | #include <net/netfilter/nf_conntrack_core.h> | 29 | #include <net/netfilter/nf_conntrack_core.h> |
30 | 30 | ||
31 | static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly; | 31 | static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly; |
32 | struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly; | 32 | struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly; |
33 | EXPORT_SYMBOL_GPL(nf_ct_l3protos); | 33 | EXPORT_SYMBOL_GPL(nf_ct_l3protos); |
34 | 34 | ||
35 | static DEFINE_MUTEX(nf_ct_proto_mutex); | 35 | static DEFINE_MUTEX(nf_ct_proto_mutex); |
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index eb973fcd67ab..0fb65705b44b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/seq_file.h> | 15 | #include <linux/seq_file.h> |
16 | #include <linux/percpu.h> | 16 | #include <linux/percpu.h> |
17 | #include <linux/netdevice.h> | 17 | #include <linux/netdevice.h> |
18 | #include <linux/security.h> | ||
18 | #include <net/net_namespace.h> | 19 | #include <net/net_namespace.h> |
19 | #ifdef CONFIG_SYSCTL | 20 | #ifdef CONFIG_SYSCTL |
20 | #include <linux/sysctl.h> | 21 | #include <linux/sysctl.h> |
@@ -108,6 +109,29 @@ static void ct_seq_stop(struct seq_file *s, void *v) | |||
108 | rcu_read_unlock(); | 109 | rcu_read_unlock(); |
109 | } | 110 | } |
110 | 111 | ||
112 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | ||
113 | static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) | ||
114 | { | ||
115 | int ret; | ||
116 | u32 len; | ||
117 | char *secctx; | ||
118 | |||
119 | ret = security_secid_to_secctx(ct->secmark, &secctx, &len); | ||
120 | if (ret) | ||
121 | return ret; | ||
122 | |||
123 | ret = seq_printf(s, "secctx=%s ", secctx); | ||
124 | |||
125 | security_release_secctx(secctx, len); | ||
126 | return ret; | ||
127 | } | ||
128 | #else | ||
129 | static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) | ||
130 | { | ||
131 | return 0; | ||
132 | } | ||
133 | #endif | ||
134 | |||
111 | /* return 0 on success, 1 in case of error */ | 135 | /* return 0 on success, 1 in case of error */ |
112 | static int ct_seq_show(struct seq_file *s, void *v) | 136 | static int ct_seq_show(struct seq_file *s, void *v) |
113 | { | 137 | { |
@@ -168,10 +192,8 @@ static int ct_seq_show(struct seq_file *s, void *v) | |||
168 | goto release; | 192 | goto release; |
169 | #endif | 193 | #endif |
170 | 194 | ||
171 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 195 | if (ct_show_secctx(s, ct)) |
172 | if (seq_printf(s, "secmark=%u ", ct->secmark)) | ||
173 | goto release; | 196 | goto release; |
174 | #endif | ||
175 | 197 | ||
176 | #ifdef CONFIG_NF_CONNTRACK_ZONES | 198 | #ifdef CONFIG_NF_CONNTRACK_ZONES |
177 | if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) | 199 | if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) |
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 7df37fd786bc..b07393eab88e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #define NF_LOG_PREFIXLEN 128 | 16 | #define NF_LOG_PREFIXLEN 128 |
17 | #define NFLOGGER_NAME_LEN 64 | 17 | #define NFLOGGER_NAME_LEN 64 |
18 | 18 | ||
19 | static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; | 19 | static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; |
20 | static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; | 20 | static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; |
21 | static DEFINE_MUTEX(nf_log_mutex); | 21 | static DEFINE_MUTEX(nf_log_mutex); |
22 | 22 | ||
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 78b3cf9c519c..74aebed5bd28 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c | |||
@@ -18,7 +18,7 @@ | |||
18 | * long term mutex. The handler must provide an an outfn() to accept packets | 18 | * long term mutex. The handler must provide an an outfn() to accept packets |
19 | * for queueing and must reinject all packets it receives, no matter what. | 19 | * for queueing and must reinject all packets it receives, no matter what. |
20 | */ | 20 | */ |
21 | static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly; | 21 | static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly; |
22 | 22 | ||
23 | static DEFINE_MUTEX(queue_handler_mutex); | 23 | static DEFINE_MUTEX(queue_handler_mutex); |
24 | 24 | ||
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 0cb6053f02fd..782e51986a6f 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/gfp.h> | 10 | #include <linux/gfp.h> |
11 | #include <linux/skbuff.h> | 11 | #include <linux/skbuff.h> |
12 | #include <linux/selinux.h> | ||
13 | #include <linux/netfilter_ipv4/ip_tables.h> | 12 | #include <linux/netfilter_ipv4/ip_tables.h> |
14 | #include <linux/netfilter_ipv6/ip6_tables.h> | 13 | #include <linux/netfilter_ipv6/ip6_tables.h> |
15 | #include <linux/netfilter/x_tables.h> | 14 | #include <linux/netfilter/x_tables.h> |
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 23b2d6c486b5..9faf5e050b79 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c | |||
@@ -14,8 +14,8 @@ | |||
14 | */ | 14 | */ |
15 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 15 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/security.h> | ||
17 | #include <linux/skbuff.h> | 18 | #include <linux/skbuff.h> |
18 | #include <linux/selinux.h> | ||
19 | #include <linux/netfilter/x_tables.h> | 19 | #include <linux/netfilter/x_tables.h> |
20 | #include <linux/netfilter/xt_SECMARK.h> | 20 | #include <linux/netfilter/xt_SECMARK.h> |
21 | 21 | ||
@@ -39,9 +39,8 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
39 | 39 | ||
40 | switch (mode) { | 40 | switch (mode) { |
41 | case SECMARK_MODE_SEL: | 41 | case SECMARK_MODE_SEL: |
42 | secmark = info->u.sel.selsid; | 42 | secmark = info->secid; |
43 | break; | 43 | break; |
44 | |||
45 | default: | 44 | default: |
46 | BUG(); | 45 | BUG(); |
47 | } | 46 | } |
@@ -50,33 +49,33 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
50 | return XT_CONTINUE; | 49 | return XT_CONTINUE; |
51 | } | 50 | } |
52 | 51 | ||
53 | static int checkentry_selinux(struct xt_secmark_target_info *info) | 52 | static int checkentry_lsm(struct xt_secmark_target_info *info) |
54 | { | 53 | { |
55 | int err; | 54 | int err; |
56 | struct xt_secmark_target_selinux_info *sel = &info->u.sel; | ||
57 | 55 | ||
58 | sel->selctx[SECMARK_SELCTX_MAX - 1] = '\0'; | 56 | info->secctx[SECMARK_SECCTX_MAX - 1] = '\0'; |
57 | info->secid = 0; | ||
59 | 58 | ||
60 | err = selinux_string_to_sid(sel->selctx, &sel->selsid); | 59 | err = security_secctx_to_secid(info->secctx, strlen(info->secctx), |
60 | &info->secid); | ||
61 | if (err) { | 61 | if (err) { |
62 | if (err == -EINVAL) | 62 | if (err == -EINVAL) |
63 | pr_info("invalid SELinux context \'%s\'\n", | 63 | pr_info("invalid security context \'%s\'\n", info->secctx); |
64 | sel->selctx); | ||
65 | return err; | 64 | return err; |
66 | } | 65 | } |
67 | 66 | ||
68 | if (!sel->selsid) { | 67 | if (!info->secid) { |
69 | pr_info("unable to map SELinux context \'%s\'\n", sel->selctx); | 68 | pr_info("unable to map security context \'%s\'\n", info->secctx); |
70 | return -ENOENT; | 69 | return -ENOENT; |
71 | } | 70 | } |
72 | 71 | ||
73 | err = selinux_secmark_relabel_packet_permission(sel->selsid); | 72 | err = security_secmark_relabel_packet(info->secid); |
74 | if (err) { | 73 | if (err) { |
75 | pr_info("unable to obtain relabeling permission\n"); | 74 | pr_info("unable to obtain relabeling permission\n"); |
76 | return err; | 75 | return err; |
77 | } | 76 | } |
78 | 77 | ||
79 | selinux_secmark_refcount_inc(); | 78 | security_secmark_refcount_inc(); |
80 | return 0; | 79 | return 0; |
81 | } | 80 | } |
82 | 81 | ||
@@ -100,16 +99,16 @@ static int secmark_tg_check(const struct xt_tgchk_param *par) | |||
100 | 99 | ||
101 | switch (info->mode) { | 100 | switch (info->mode) { |
102 | case SECMARK_MODE_SEL: | 101 | case SECMARK_MODE_SEL: |
103 | err = checkentry_selinux(info); | ||
104 | if (err <= 0) | ||
105 | return err; | ||
106 | break; | 102 | break; |
107 | |||
108 | default: | 103 | default: |
109 | pr_info("invalid mode: %hu\n", info->mode); | 104 | pr_info("invalid mode: %hu\n", info->mode); |
110 | return -EINVAL; | 105 | return -EINVAL; |
111 | } | 106 | } |
112 | 107 | ||
108 | err = checkentry_lsm(info); | ||
109 | if (err) | ||
110 | return err; | ||
111 | |||
113 | if (!mode) | 112 | if (!mode) |
114 | mode = info->mode; | 113 | mode = info->mode; |
115 | return 0; | 114 | return 0; |
@@ -119,7 +118,7 @@ static void secmark_tg_destroy(const struct xt_tgdtor_param *par) | |||
119 | { | 118 | { |
120 | switch (mode) { | 119 | switch (mode) { |
121 | case SECMARK_MODE_SEL: | 120 | case SECMARK_MODE_SEL: |
122 | selinux_secmark_refcount_dec(); | 121 | security_secmark_refcount_dec(); |
123 | } | 122 | } |
124 | } | 123 | } |
125 | 124 | ||
diff --git a/net/rds/page.c b/net/rds/page.c index 595a952d4b17..1dfbfea12e9b 100644 --- a/net/rds/page.c +++ b/net/rds/page.c | |||
@@ -57,30 +57,17 @@ int rds_page_copy_user(struct page *page, unsigned long offset, | |||
57 | unsigned long ret; | 57 | unsigned long ret; |
58 | void *addr; | 58 | void *addr; |
59 | 59 | ||
60 | if (to_user) | 60 | addr = kmap(page); |
61 | if (to_user) { | ||
61 | rds_stats_add(s_copy_to_user, bytes); | 62 | rds_stats_add(s_copy_to_user, bytes); |
62 | else | 63 | ret = copy_to_user(ptr, addr + offset, bytes); |
64 | } else { | ||
63 | rds_stats_add(s_copy_from_user, bytes); | 65 | rds_stats_add(s_copy_from_user, bytes); |
64 | 66 | ret = copy_from_user(addr + offset, ptr, bytes); | |
65 | addr = kmap_atomic(page, KM_USER0); | ||
66 | if (to_user) | ||
67 | ret = __copy_to_user_inatomic(ptr, addr + offset, bytes); | ||
68 | else | ||
69 | ret = __copy_from_user_inatomic(addr + offset, ptr, bytes); | ||
70 | kunmap_atomic(addr, KM_USER0); | ||
71 | |||
72 | if (ret) { | ||
73 | addr = kmap(page); | ||
74 | if (to_user) | ||
75 | ret = copy_to_user(ptr, addr + offset, bytes); | ||
76 | else | ||
77 | ret = copy_from_user(addr + offset, ptr, bytes); | ||
78 | kunmap(page); | ||
79 | if (ret) | ||
80 | return -EFAULT; | ||
81 | } | 67 | } |
68 | kunmap(page); | ||
82 | 69 | ||
83 | return 0; | 70 | return ret ? -EFAULT : 0; |
84 | } | 71 | } |
85 | EXPORT_SYMBOL_GPL(rds_page_copy_user); | 72 | EXPORT_SYMBOL_GPL(rds_page_copy_user); |
86 | 73 | ||
diff --git a/scripts/Makefile b/scripts/Makefile index 842dbc2d5aed..2e088109fbd5 100644 --- a/scripts/Makefile +++ b/scripts/Makefile | |||
@@ -11,6 +11,7 @@ hostprogs-$(CONFIG_KALLSYMS) += kallsyms | |||
11 | hostprogs-$(CONFIG_LOGO) += pnmtologo | 11 | hostprogs-$(CONFIG_LOGO) += pnmtologo |
12 | hostprogs-$(CONFIG_VT) += conmakehash | 12 | hostprogs-$(CONFIG_VT) += conmakehash |
13 | hostprogs-$(CONFIG_IKCONFIG) += bin2c | 13 | hostprogs-$(CONFIG_IKCONFIG) += bin2c |
14 | hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount | ||
14 | 15 | ||
15 | always := $(hostprogs-y) $(hostprogs-m) | 16 | always := $(hostprogs-y) $(hostprogs-m) |
16 | 17 | ||
diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a1a5cf95a68d..843bd4f4ffc9 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build | |||
@@ -209,12 +209,22 @@ cmd_modversions = \ | |||
209 | endif | 209 | endif |
210 | 210 | ||
211 | ifdef CONFIG_FTRACE_MCOUNT_RECORD | 211 | ifdef CONFIG_FTRACE_MCOUNT_RECORD |
212 | ifdef BUILD_C_RECORDMCOUNT | ||
213 | # Due to recursion, we must skip empty.o. | ||
214 | # The empty.o file is created in the make process in order to determine | ||
215 | # the target endianness and word size. It is made before all other C | ||
216 | # files, including recordmcount. | ||
217 | cmd_record_mcount = if [ $(@) != "scripts/mod/empty.o" ]; then \ | ||
218 | $(objtree)/scripts/recordmcount "$(@)"; \ | ||
219 | fi; | ||
220 | else | ||
212 | cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ | 221 | cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ |
213 | "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ | 222 | "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ |
214 | "$(if $(CONFIG_64BIT),64,32)" \ | 223 | "$(if $(CONFIG_64BIT),64,32)" \ |
215 | "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ | 224 | "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ |
216 | "$(if $(part-of-module),1,0)" "$(@)"; | 225 | "$(if $(part-of-module),1,0)" "$(@)"; |
217 | endif | 226 | endif |
227 | endif | ||
218 | 228 | ||
219 | define rule_cc_o_c | 229 | define rule_cc_o_c |
220 | $(call echo-cmd,checksrc) $(cmd_checksrc) \ | 230 | $(call echo-cmd,checksrc) $(cmd_checksrc) \ |
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 54fd1b700131..7bfcf1a09ac5 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib | |||
@@ -101,14 +101,6 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))" | |||
101 | modname_flags = $(if $(filter 1,$(words $(modname))),\ | 101 | modname_flags = $(if $(filter 1,$(words $(modname))),\ |
102 | -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") | 102 | -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") |
103 | 103 | ||
104 | #hash values | ||
105 | ifdef CONFIG_DYNAMIC_DEBUG | ||
106 | debug_flags = -D"DEBUG_HASH=$(shell ./scripts/basic/hash djb2 $(@D)$(modname))"\ | ||
107 | -D"DEBUG_HASH2=$(shell ./scripts/basic/hash r5 $(@D)$(modname))" | ||
108 | else | ||
109 | debug_flags = | ||
110 | endif | ||
111 | |||
112 | orig_c_flags = $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(KBUILD_SUBDIR_CCFLAGS) \ | 104 | orig_c_flags = $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(KBUILD_SUBDIR_CCFLAGS) \ |
113 | $(ccflags-y) $(CFLAGS_$(basetarget).o) | 105 | $(ccflags-y) $(CFLAGS_$(basetarget).o) |
114 | _c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags)) | 106 | _c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags)) |
@@ -152,8 +144,7 @@ endif | |||
152 | 144 | ||
153 | c_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ | 145 | c_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ |
154 | $(__c_flags) $(modkern_cflags) \ | 146 | $(__c_flags) $(modkern_cflags) \ |
155 | -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags) \ | 147 | -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags) |
156 | $(debug_flags) | ||
157 | 148 | ||
158 | a_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ | 149 | a_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ |
159 | $(__a_flags) $(modkern_aflags) | 150 | $(__a_flags) $(modkern_aflags) |
diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile index 09559951df12..4c324a1f1e0e 100644 --- a/scripts/basic/Makefile +++ b/scripts/basic/Makefile | |||
@@ -9,7 +9,7 @@ | |||
9 | # fixdep: Used to generate dependency information during build process | 9 | # fixdep: Used to generate dependency information during build process |
10 | # docproc: Used in Documentation/DocBook | 10 | # docproc: Used in Documentation/DocBook |
11 | 11 | ||
12 | hostprogs-y := fixdep docproc hash | 12 | hostprogs-y := fixdep docproc |
13 | always := $(hostprogs-y) | 13 | always := $(hostprogs-y) |
14 | 14 | ||
15 | # fixdep is needed to compile other host programs | 15 | # fixdep is needed to compile other host programs |
diff --git a/scripts/basic/hash.c b/scripts/basic/hash.c deleted file mode 100644 index 2ef5d3f666b8..000000000000 --- a/scripts/basic/hash.c +++ /dev/null | |||
@@ -1,64 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Red Hat, Inc., Jason Baron <jbaron@redhat.com> | ||
3 | * | ||
4 | */ | ||
5 | |||
6 | #include <stdio.h> | ||
7 | #include <stdlib.h> | ||
8 | #include <string.h> | ||
9 | |||
10 | #define DYNAMIC_DEBUG_HASH_BITS 6 | ||
11 | |||
12 | static const char *program; | ||
13 | |||
14 | static void usage(void) | ||
15 | { | ||
16 | printf("Usage: %s <djb2|r5> <modname>\n", program); | ||
17 | exit(1); | ||
18 | } | ||
19 | |||
20 | /* djb2 hashing algorithm by Dan Bernstein. From: | ||
21 | * http://www.cse.yorku.ca/~oz/hash.html | ||
22 | */ | ||
23 | |||
24 | static unsigned int djb2_hash(char *str) | ||
25 | { | ||
26 | unsigned long hash = 5381; | ||
27 | int c; | ||
28 | |||
29 | c = *str; | ||
30 | while (c) { | ||
31 | hash = ((hash << 5) + hash) + c; | ||
32 | c = *++str; | ||
33 | } | ||
34 | return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1)); | ||
35 | } | ||
36 | |||
37 | static unsigned int r5_hash(char *str) | ||
38 | { | ||
39 | unsigned long hash = 0; | ||
40 | int c; | ||
41 | |||
42 | c = *str; | ||
43 | while (c) { | ||
44 | hash = (hash + (c << 4) + (c >> 4)) * 11; | ||
45 | c = *++str; | ||
46 | } | ||
47 | return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1)); | ||
48 | } | ||
49 | |||
50 | int main(int argc, char *argv[]) | ||
51 | { | ||
52 | program = argv[0]; | ||
53 | |||
54 | if (argc != 3) | ||
55 | usage(); | ||
56 | if (!strcmp(argv[1], "djb2")) | ||
57 | printf("%d\n", djb2_hash(argv[2])); | ||
58 | else if (!strcmp(argv[1], "r5")) | ||
59 | printf("%d\n", r5_hash(argv[2])); | ||
60 | else | ||
61 | usage(); | ||
62 | exit(0); | ||
63 | } | ||
64 | |||
diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh new file mode 100644 index 000000000000..520d16b1ffaf --- /dev/null +++ b/scripts/gcc-goto.sh | |||
@@ -0,0 +1,5 @@ | |||
1 | #!/bin/sh | ||
2 | # Test for gcc 'asm goto' suport | ||
3 | # Copyright (C) 2010, Jason Baron <jbaron@redhat.com> | ||
4 | |||
5 | echo "int main(void) { entry: asm goto (\"\"::::entry); return 0; }" | $@ -x c - -c -o /dev/null >/dev/null 2>&1 && echo "y" | ||
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c new file mode 100644 index 000000000000..26e1271259ba --- /dev/null +++ b/scripts/recordmcount.c | |||
@@ -0,0 +1,363 @@ | |||
1 | /* | ||
2 | * recordmcount.c: construct a table of the locations of calls to 'mcount' | ||
3 | * so that ftrace can find them quickly. | ||
4 | * Copyright 2009 John F. Reiser <jreiser@BitWagon.com>. All rights reserved. | ||
5 | * Licensed under the GNU General Public License, version 2 (GPLv2). | ||
6 | * | ||
7 | * Restructured to fit Linux format, as well as other updates: | ||
8 | * Copyright 2010 Steven Rostedt <srostedt@redhat.com>, Red Hat Inc. | ||
9 | */ | ||
10 | |||
11 | /* | ||
12 | * Strategy: alter the .o file in-place. | ||
13 | * | ||
14 | * Append a new STRTAB that has the new section names, followed by a new array | ||
15 | * ElfXX_Shdr[] that has the new section headers, followed by the section | ||
16 | * contents for __mcount_loc and its relocations. The old shstrtab strings, | ||
17 | * and the old ElfXX_Shdr[] array, remain as "garbage" (commonly, a couple | ||
18 | * kilobytes.) Subsequent processing by /bin/ld (or the kernel module loader) | ||
19 | * will ignore the garbage regions, because they are not designated by the | ||
20 | * new .e_shoff nor the new ElfXX_Shdr[]. [In order to remove the garbage, | ||
21 | * then use "ld -r" to create a new file that omits the garbage.] | ||
22 | */ | ||
23 | |||
24 | #include <sys/types.h> | ||
25 | #include <sys/mman.h> | ||
26 | #include <sys/stat.h> | ||
27 | #include <elf.h> | ||
28 | #include <fcntl.h> | ||
29 | #include <setjmp.h> | ||
30 | #include <stdio.h> | ||
31 | #include <stdlib.h> | ||
32 | #include <string.h> | ||
33 | #include <unistd.h> | ||
34 | |||
35 | static int fd_map; /* File descriptor for file being modified. */ | ||
36 | static int mmap_failed; /* Boolean flag. */ | ||
37 | static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */ | ||
38 | static char gpfx; /* prefix for global symbol name (sometimes '_') */ | ||
39 | static struct stat sb; /* Remember .st_size, etc. */ | ||
40 | static jmp_buf jmpenv; /* setjmp/longjmp per-file error escape */ | ||
41 | |||
42 | /* setjmp() return values */ | ||
43 | enum { | ||
44 | SJ_SETJMP = 0, /* hardwired first return */ | ||
45 | SJ_FAIL, | ||
46 | SJ_SUCCEED | ||
47 | }; | ||
48 | |||
49 | /* Per-file resource cleanup when multiple files. */ | ||
50 | static void | ||
51 | cleanup(void) | ||
52 | { | ||
53 | if (!mmap_failed) | ||
54 | munmap(ehdr_curr, sb.st_size); | ||
55 | else | ||
56 | free(ehdr_curr); | ||
57 | close(fd_map); | ||
58 | } | ||
59 | |||
60 | static void __attribute__((noreturn)) | ||
61 | fail_file(void) | ||
62 | { | ||
63 | cleanup(); | ||
64 | longjmp(jmpenv, SJ_FAIL); | ||
65 | } | ||
66 | |||
67 | static void __attribute__((noreturn)) | ||
68 | succeed_file(void) | ||
69 | { | ||
70 | cleanup(); | ||
71 | longjmp(jmpenv, SJ_SUCCEED); | ||
72 | } | ||
73 | |||
74 | /* ulseek, uread, ...: Check return value for errors. */ | ||
75 | |||
76 | static off_t | ||
77 | ulseek(int const fd, off_t const offset, int const whence) | ||
78 | { | ||
79 | off_t const w = lseek(fd, offset, whence); | ||
80 | if ((off_t)-1 == w) { | ||
81 | perror("lseek"); | ||
82 | fail_file(); | ||
83 | } | ||
84 | return w; | ||
85 | } | ||
86 | |||
87 | static size_t | ||
88 | uread(int const fd, void *const buf, size_t const count) | ||
89 | { | ||
90 | size_t const n = read(fd, buf, count); | ||
91 | if (n != count) { | ||
92 | perror("read"); | ||
93 | fail_file(); | ||
94 | } | ||
95 | return n; | ||
96 | } | ||
97 | |||
98 | static size_t | ||
99 | uwrite(int const fd, void const *const buf, size_t const count) | ||
100 | { | ||
101 | size_t const n = write(fd, buf, count); | ||
102 | if (n != count) { | ||
103 | perror("write"); | ||
104 | fail_file(); | ||
105 | } | ||
106 | return n; | ||
107 | } | ||
108 | |||
109 | static void * | ||
110 | umalloc(size_t size) | ||
111 | { | ||
112 | void *const addr = malloc(size); | ||
113 | if (0 == addr) { | ||
114 | fprintf(stderr, "malloc failed: %zu bytes\n", size); | ||
115 | fail_file(); | ||
116 | } | ||
117 | return addr; | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * Get the whole file as a programming convenience in order to avoid | ||
122 | * malloc+lseek+read+free of many pieces. If successful, then mmap | ||
123 | * avoids copying unused pieces; else just read the whole file. | ||
124 | * Open for both read and write; new info will be appended to the file. | ||
125 | * Use MAP_PRIVATE so that a few changes to the in-memory ElfXX_Ehdr | ||
126 | * do not propagate to the file until an explicit overwrite at the last. | ||
127 | * This preserves most aspects of consistency (all except .st_size) | ||
128 | * for simultaneous readers of the file while we are appending to it. | ||
129 | * However, multiple writers still are bad. We choose not to use | ||
130 | * locking because it is expensive and the use case of kernel build | ||
131 | * makes multiple writers unlikely. | ||
132 | */ | ||
133 | static void *mmap_file(char const *fname) | ||
134 | { | ||
135 | void *addr; | ||
136 | |||
137 | fd_map = open(fname, O_RDWR); | ||
138 | if (0 > fd_map || 0 > fstat(fd_map, &sb)) { | ||
139 | perror(fname); | ||
140 | fail_file(); | ||
141 | } | ||
142 | if (!S_ISREG(sb.st_mode)) { | ||
143 | fprintf(stderr, "not a regular file: %s\n", fname); | ||
144 | fail_file(); | ||
145 | } | ||
146 | addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, | ||
147 | fd_map, 0); | ||
148 | mmap_failed = 0; | ||
149 | if (MAP_FAILED == addr) { | ||
150 | mmap_failed = 1; | ||
151 | addr = umalloc(sb.st_size); | ||
152 | uread(fd_map, addr, sb.st_size); | ||
153 | } | ||
154 | return addr; | ||
155 | } | ||
156 | |||
157 | /* w8rev, w8nat, ...: Handle endianness. */ | ||
158 | |||
159 | static uint64_t w8rev(uint64_t const x) | ||
160 | { | ||
161 | return ((0xff & (x >> (0 * 8))) << (7 * 8)) | ||
162 | | ((0xff & (x >> (1 * 8))) << (6 * 8)) | ||
163 | | ((0xff & (x >> (2 * 8))) << (5 * 8)) | ||
164 | | ((0xff & (x >> (3 * 8))) << (4 * 8)) | ||
165 | | ((0xff & (x >> (4 * 8))) << (3 * 8)) | ||
166 | | ((0xff & (x >> (5 * 8))) << (2 * 8)) | ||
167 | | ((0xff & (x >> (6 * 8))) << (1 * 8)) | ||
168 | | ((0xff & (x >> (7 * 8))) << (0 * 8)); | ||
169 | } | ||
170 | |||
171 | static uint32_t w4rev(uint32_t const x) | ||
172 | { | ||
173 | return ((0xff & (x >> (0 * 8))) << (3 * 8)) | ||
174 | | ((0xff & (x >> (1 * 8))) << (2 * 8)) | ||
175 | | ((0xff & (x >> (2 * 8))) << (1 * 8)) | ||
176 | | ((0xff & (x >> (3 * 8))) << (0 * 8)); | ||
177 | } | ||
178 | |||
179 | static uint32_t w2rev(uint16_t const x) | ||
180 | { | ||
181 | return ((0xff & (x >> (0 * 8))) << (1 * 8)) | ||
182 | | ((0xff & (x >> (1 * 8))) << (0 * 8)); | ||
183 | } | ||
184 | |||
185 | static uint64_t w8nat(uint64_t const x) | ||
186 | { | ||
187 | return x; | ||
188 | } | ||
189 | |||
190 | static uint32_t w4nat(uint32_t const x) | ||
191 | { | ||
192 | return x; | ||
193 | } | ||
194 | |||
195 | static uint32_t w2nat(uint16_t const x) | ||
196 | { | ||
197 | return x; | ||
198 | } | ||
199 | |||
200 | static uint64_t (*w8)(uint64_t); | ||
201 | static uint32_t (*w)(uint32_t); | ||
202 | static uint32_t (*w2)(uint16_t); | ||
203 | |||
204 | /* Names of the sections that could contain calls to mcount. */ | ||
205 | static int | ||
206 | is_mcounted_section_name(char const *const txtname) | ||
207 | { | ||
208 | return 0 == strcmp(".text", txtname) || | ||
209 | 0 == strcmp(".sched.text", txtname) || | ||
210 | 0 == strcmp(".spinlock.text", txtname) || | ||
211 | 0 == strcmp(".irqentry.text", txtname) || | ||
212 | 0 == strcmp(".text.unlikely", txtname); | ||
213 | } | ||
214 | |||
215 | /* 32 bit and 64 bit are very similar */ | ||
216 | #include "recordmcount.h" | ||
217 | #define RECORD_MCOUNT_64 | ||
218 | #include "recordmcount.h" | ||
219 | |||
220 | static void | ||
221 | do_file(char const *const fname) | ||
222 | { | ||
223 | Elf32_Ehdr *const ehdr = mmap_file(fname); | ||
224 | unsigned int reltype = 0; | ||
225 | |||
226 | ehdr_curr = ehdr; | ||
227 | w = w4nat; | ||
228 | w2 = w2nat; | ||
229 | w8 = w8nat; | ||
230 | switch (ehdr->e_ident[EI_DATA]) { | ||
231 | static unsigned int const endian = 1; | ||
232 | default: { | ||
233 | fprintf(stderr, "unrecognized ELF data encoding %d: %s\n", | ||
234 | ehdr->e_ident[EI_DATA], fname); | ||
235 | fail_file(); | ||
236 | } break; | ||
237 | case ELFDATA2LSB: { | ||
238 | if (1 != *(unsigned char const *)&endian) { | ||
239 | /* main() is big endian, file.o is little endian. */ | ||
240 | w = w4rev; | ||
241 | w2 = w2rev; | ||
242 | w8 = w8rev; | ||
243 | } | ||
244 | } break; | ||
245 | case ELFDATA2MSB: { | ||
246 | if (0 != *(unsigned char const *)&endian) { | ||
247 | /* main() is little endian, file.o is big endian. */ | ||
248 | w = w4rev; | ||
249 | w2 = w2rev; | ||
250 | w8 = w8rev; | ||
251 | } | ||
252 | } break; | ||
253 | } /* end switch */ | ||
254 | if (0 != memcmp(ELFMAG, ehdr->e_ident, SELFMAG) | ||
255 | || ET_REL != w2(ehdr->e_type) | ||
256 | || EV_CURRENT != ehdr->e_ident[EI_VERSION]) { | ||
257 | fprintf(stderr, "unrecognized ET_REL file %s\n", fname); | ||
258 | fail_file(); | ||
259 | } | ||
260 | |||
261 | gpfx = 0; | ||
262 | switch (w2(ehdr->e_machine)) { | ||
263 | default: { | ||
264 | fprintf(stderr, "unrecognized e_machine %d %s\n", | ||
265 | w2(ehdr->e_machine), fname); | ||
266 | fail_file(); | ||
267 | } break; | ||
268 | case EM_386: reltype = R_386_32; break; | ||
269 | case EM_ARM: reltype = R_ARM_ABS32; break; | ||
270 | case EM_IA_64: reltype = R_IA64_IMM64; gpfx = '_'; break; | ||
271 | case EM_PPC: reltype = R_PPC_ADDR32; gpfx = '_'; break; | ||
272 | case EM_PPC64: reltype = R_PPC64_ADDR64; gpfx = '_'; break; | ||
273 | case EM_S390: /* reltype: e_class */ gpfx = '_'; break; | ||
274 | case EM_SH: reltype = R_SH_DIR32; break; | ||
275 | case EM_SPARCV9: reltype = R_SPARC_64; gpfx = '_'; break; | ||
276 | case EM_X86_64: reltype = R_X86_64_64; break; | ||
277 | } /* end switch */ | ||
278 | |||
279 | switch (ehdr->e_ident[EI_CLASS]) { | ||
280 | default: { | ||
281 | fprintf(stderr, "unrecognized ELF class %d %s\n", | ||
282 | ehdr->e_ident[EI_CLASS], fname); | ||
283 | fail_file(); | ||
284 | } break; | ||
285 | case ELFCLASS32: { | ||
286 | if (sizeof(Elf32_Ehdr) != w2(ehdr->e_ehsize) | ||
287 | || sizeof(Elf32_Shdr) != w2(ehdr->e_shentsize)) { | ||
288 | fprintf(stderr, | ||
289 | "unrecognized ET_REL file: %s\n", fname); | ||
290 | fail_file(); | ||
291 | } | ||
292 | if (EM_S390 == w2(ehdr->e_machine)) | ||
293 | reltype = R_390_32; | ||
294 | do32(ehdr, fname, reltype); | ||
295 | } break; | ||
296 | case ELFCLASS64: { | ||
297 | Elf64_Ehdr *const ghdr = (Elf64_Ehdr *)ehdr; | ||
298 | if (sizeof(Elf64_Ehdr) != w2(ghdr->e_ehsize) | ||
299 | || sizeof(Elf64_Shdr) != w2(ghdr->e_shentsize)) { | ||
300 | fprintf(stderr, | ||
301 | "unrecognized ET_REL file: %s\n", fname); | ||
302 | fail_file(); | ||
303 | } | ||
304 | if (EM_S390 == w2(ghdr->e_machine)) | ||
305 | reltype = R_390_64; | ||
306 | do64(ghdr, fname, reltype); | ||
307 | } break; | ||
308 | } /* end switch */ | ||
309 | |||
310 | cleanup(); | ||
311 | } | ||
312 | |||
313 | int | ||
314 | main(int argc, char const *argv[]) | ||
315 | { | ||
316 | const char ftrace[] = "kernel/trace/ftrace.o"; | ||
317 | int ftrace_size = sizeof(ftrace) - 1; | ||
318 | int n_error = 0; /* gcc-4.3.0 false positive complaint */ | ||
319 | |||
320 | if (argc <= 1) { | ||
321 | fprintf(stderr, "usage: recordmcount file.o...\n"); | ||
322 | return 0; | ||
323 | } | ||
324 | |||
325 | /* Process each file in turn, allowing deep failure. */ | ||
326 | for (--argc, ++argv; 0 < argc; --argc, ++argv) { | ||
327 | int const sjval = setjmp(jmpenv); | ||
328 | int len; | ||
329 | |||
330 | /* | ||
331 | * The file kernel/trace/ftrace.o references the mcount | ||
332 | * function but does not call it. Since ftrace.o should | ||
333 | * not be traced anyway, we just skip it. | ||
334 | */ | ||
335 | len = strlen(argv[0]); | ||
336 | if (len >= ftrace_size && | ||
337 | strcmp(argv[0] + (len - ftrace_size), ftrace) == 0) | ||
338 | continue; | ||
339 | |||
340 | switch (sjval) { | ||
341 | default: { | ||
342 | fprintf(stderr, "internal error: %s\n", argv[0]); | ||
343 | exit(1); | ||
344 | } break; | ||
345 | case SJ_SETJMP: { /* normal sequence */ | ||
346 | /* Avoid problems if early cleanup() */ | ||
347 | fd_map = -1; | ||
348 | ehdr_curr = NULL; | ||
349 | mmap_failed = 1; | ||
350 | do_file(argv[0]); | ||
351 | } break; | ||
352 | case SJ_FAIL: { /* error in do_file or below */ | ||
353 | ++n_error; | ||
354 | } break; | ||
355 | case SJ_SUCCEED: { /* premature success */ | ||
356 | /* do nothing */ | ||
357 | } break; | ||
358 | } /* end switch */ | ||
359 | } | ||
360 | return !!n_error; | ||
361 | } | ||
362 | |||
363 | |||
diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h new file mode 100644 index 000000000000..7f39d0943d2d --- /dev/null +++ b/scripts/recordmcount.h | |||
@@ -0,0 +1,366 @@ | |||
1 | /* | ||
2 | * recordmcount.h | ||
3 | * | ||
4 | * This code was taken out of recordmcount.c written by | ||
5 | * Copyright 2009 John F. Reiser <jreiser@BitWagon.com>. All rights reserved. | ||
6 | * | ||
7 | * The original code had the same algorithms for both 32bit | ||
8 | * and 64bit ELF files, but the code was duplicated to support | ||
9 | * the difference in structures that were used. This | ||
10 | * file creates a macro of everything that is different between | ||
11 | * the 64 and 32 bit code, such that by including this header | ||
12 | * twice we can create both sets of functions by including this | ||
13 | * header once with RECORD_MCOUNT_64 undefined, and again with | ||
14 | * it defined. | ||
15 | * | ||
16 | * This conversion to macros was done by: | ||
17 | * Copyright 2010 Steven Rostedt <srostedt@redhat.com>, Red Hat Inc. | ||
18 | * | ||
19 | * Licensed under the GNU General Public License, version 2 (GPLv2). | ||
20 | */ | ||
21 | #undef append_func | ||
22 | #undef sift_rel_mcount | ||
23 | #undef find_secsym_ndx | ||
24 | #undef __has_rel_mcount | ||
25 | #undef has_rel_mcount | ||
26 | #undef tot_relsize | ||
27 | #undef do_func | ||
28 | #undef Elf_Ehdr | ||
29 | #undef Elf_Shdr | ||
30 | #undef Elf_Rel | ||
31 | #undef Elf_Rela | ||
32 | #undef Elf_Sym | ||
33 | #undef ELF_R_SYM | ||
34 | #undef ELF_R_INFO | ||
35 | #undef ELF_ST_BIND | ||
36 | #undef uint_t | ||
37 | #undef _w | ||
38 | #undef _align | ||
39 | #undef _size | ||
40 | |||
41 | #ifdef RECORD_MCOUNT_64 | ||
42 | # define append_func append64 | ||
43 | # define sift_rel_mcount sift64_rel_mcount | ||
44 | # define find_secsym_ndx find64_secsym_ndx | ||
45 | # define __has_rel_mcount __has64_rel_mcount | ||
46 | # define has_rel_mcount has64_rel_mcount | ||
47 | # define tot_relsize tot64_relsize | ||
48 | # define do_func do64 | ||
49 | # define Elf_Ehdr Elf64_Ehdr | ||
50 | # define Elf_Shdr Elf64_Shdr | ||
51 | # define Elf_Rel Elf64_Rel | ||
52 | # define Elf_Rela Elf64_Rela | ||
53 | # define Elf_Sym Elf64_Sym | ||
54 | # define ELF_R_SYM ELF64_R_SYM | ||
55 | # define ELF_R_INFO ELF64_R_INFO | ||
56 | # define ELF_ST_BIND ELF64_ST_BIND | ||
57 | # define uint_t uint64_t | ||
58 | # define _w w8 | ||
59 | # define _align 7u | ||
60 | # define _size 8 | ||
61 | #else | ||
62 | # define append_func append32 | ||
63 | # define sift_rel_mcount sift32_rel_mcount | ||
64 | # define find_secsym_ndx find32_secsym_ndx | ||
65 | # define __has_rel_mcount __has32_rel_mcount | ||
66 | # define has_rel_mcount has32_rel_mcount | ||
67 | # define tot_relsize tot32_relsize | ||
68 | # define do_func do32 | ||
69 | # define Elf_Ehdr Elf32_Ehdr | ||
70 | # define Elf_Shdr Elf32_Shdr | ||
71 | # define Elf_Rel Elf32_Rel | ||
72 | # define Elf_Rela Elf32_Rela | ||
73 | # define Elf_Sym Elf32_Sym | ||
74 | # define ELF_R_SYM ELF32_R_SYM | ||
75 | # define ELF_R_INFO ELF32_R_INFO | ||
76 | # define ELF_ST_BIND ELF32_ST_BIND | ||
77 | # define uint_t uint32_t | ||
78 | # define _w w | ||
79 | # define _align 3u | ||
80 | # define _size 4 | ||
81 | #endif | ||
82 | |||
83 | /* Append the new shstrtab, Elf_Shdr[], __mcount_loc and its relocations. */ | ||
84 | static void append_func(Elf_Ehdr *const ehdr, | ||
85 | Elf_Shdr *const shstr, | ||
86 | uint_t const *const mloc0, | ||
87 | uint_t const *const mlocp, | ||
88 | Elf_Rel const *const mrel0, | ||
89 | Elf_Rel const *const mrelp, | ||
90 | unsigned int const rel_entsize, | ||
91 | unsigned int const symsec_sh_link) | ||
92 | { | ||
93 | /* Begin constructing output file */ | ||
94 | Elf_Shdr mcsec; | ||
95 | char const *mc_name = (sizeof(Elf_Rela) == rel_entsize) | ||
96 | ? ".rela__mcount_loc" | ||
97 | : ".rel__mcount_loc"; | ||
98 | unsigned const old_shnum = w2(ehdr->e_shnum); | ||
99 | uint_t const old_shoff = _w(ehdr->e_shoff); | ||
100 | uint_t const old_shstr_sh_size = _w(shstr->sh_size); | ||
101 | uint_t const old_shstr_sh_offset = _w(shstr->sh_offset); | ||
102 | uint_t t = 1 + strlen(mc_name) + _w(shstr->sh_size); | ||
103 | uint_t new_e_shoff; | ||
104 | |||
105 | shstr->sh_size = _w(t); | ||
106 | shstr->sh_offset = _w(sb.st_size); | ||
107 | t += sb.st_size; | ||
108 | t += (_align & -t); /* word-byte align */ | ||
109 | new_e_shoff = t; | ||
110 | |||
111 | /* body for new shstrtab */ | ||
112 | ulseek(fd_map, sb.st_size, SEEK_SET); | ||
113 | uwrite(fd_map, old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size); | ||
114 | uwrite(fd_map, mc_name, 1 + strlen(mc_name)); | ||
115 | |||
116 | /* old(modified) Elf_Shdr table, word-byte aligned */ | ||
117 | ulseek(fd_map, t, SEEK_SET); | ||
118 | t += sizeof(Elf_Shdr) * old_shnum; | ||
119 | uwrite(fd_map, old_shoff + (void *)ehdr, | ||
120 | sizeof(Elf_Shdr) * old_shnum); | ||
121 | |||
122 | /* new sections __mcount_loc and .rel__mcount_loc */ | ||
123 | t += 2*sizeof(mcsec); | ||
124 | mcsec.sh_name = w((sizeof(Elf_Rela) == rel_entsize) + strlen(".rel") | ||
125 | + old_shstr_sh_size); | ||
126 | mcsec.sh_type = w(SHT_PROGBITS); | ||
127 | mcsec.sh_flags = _w(SHF_ALLOC); | ||
128 | mcsec.sh_addr = 0; | ||
129 | mcsec.sh_offset = _w(t); | ||
130 | mcsec.sh_size = _w((void *)mlocp - (void *)mloc0); | ||
131 | mcsec.sh_link = 0; | ||
132 | mcsec.sh_info = 0; | ||
133 | mcsec.sh_addralign = _w(_size); | ||
134 | mcsec.sh_entsize = _w(_size); | ||
135 | uwrite(fd_map, &mcsec, sizeof(mcsec)); | ||
136 | |||
137 | mcsec.sh_name = w(old_shstr_sh_size); | ||
138 | mcsec.sh_type = (sizeof(Elf_Rela) == rel_entsize) | ||
139 | ? w(SHT_RELA) | ||
140 | : w(SHT_REL); | ||
141 | mcsec.sh_flags = 0; | ||
142 | mcsec.sh_addr = 0; | ||
143 | mcsec.sh_offset = _w((void *)mlocp - (void *)mloc0 + t); | ||
144 | mcsec.sh_size = _w((void *)mrelp - (void *)mrel0); | ||
145 | mcsec.sh_link = w(symsec_sh_link); | ||
146 | mcsec.sh_info = w(old_shnum); | ||
147 | mcsec.sh_addralign = _w(_size); | ||
148 | mcsec.sh_entsize = _w(rel_entsize); | ||
149 | uwrite(fd_map, &mcsec, sizeof(mcsec)); | ||
150 | |||
151 | uwrite(fd_map, mloc0, (void *)mlocp - (void *)mloc0); | ||
152 | uwrite(fd_map, mrel0, (void *)mrelp - (void *)mrel0); | ||
153 | |||
154 | ehdr->e_shoff = _w(new_e_shoff); | ||
155 | ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum)); /* {.rel,}__mcount_loc */ | ||
156 | ulseek(fd_map, 0, SEEK_SET); | ||
157 | uwrite(fd_map, ehdr, sizeof(*ehdr)); | ||
158 | } | ||
159 | |||
160 | |||
161 | /* | ||
162 | * Look at the relocations in order to find the calls to mcount. | ||
163 | * Accumulate the section offsets that are found, and their relocation info, | ||
164 | * onto the end of the existing arrays. | ||
165 | */ | ||
166 | static uint_t *sift_rel_mcount(uint_t *mlocp, | ||
167 | unsigned const offbase, | ||
168 | Elf_Rel **const mrelpp, | ||
169 | Elf_Shdr const *const relhdr, | ||
170 | Elf_Ehdr const *const ehdr, | ||
171 | unsigned const recsym, | ||
172 | uint_t const recval, | ||
173 | unsigned const reltype) | ||
174 | { | ||
175 | uint_t *const mloc0 = mlocp; | ||
176 | Elf_Rel *mrelp = *mrelpp; | ||
177 | Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) | ||
178 | + (void *)ehdr); | ||
179 | unsigned const symsec_sh_link = w(relhdr->sh_link); | ||
180 | Elf_Shdr const *const symsec = &shdr0[symsec_sh_link]; | ||
181 | Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symsec->sh_offset) | ||
182 | + (void *)ehdr); | ||
183 | |||
184 | Elf_Shdr const *const strsec = &shdr0[w(symsec->sh_link)]; | ||
185 | char const *const str0 = (char const *)(_w(strsec->sh_offset) | ||
186 | + (void *)ehdr); | ||
187 | |||
188 | Elf_Rel const *const rel0 = (Elf_Rel const *)(_w(relhdr->sh_offset) | ||
189 | + (void *)ehdr); | ||
190 | unsigned rel_entsize = _w(relhdr->sh_entsize); | ||
191 | unsigned const nrel = _w(relhdr->sh_size) / rel_entsize; | ||
192 | Elf_Rel const *relp = rel0; | ||
193 | |||
194 | unsigned mcountsym = 0; | ||
195 | unsigned t; | ||
196 | |||
197 | for (t = nrel; t; --t) { | ||
198 | if (!mcountsym) { | ||
199 | Elf_Sym const *const symp = | ||
200 | &sym0[ELF_R_SYM(_w(relp->r_info))]; | ||
201 | char const *symname = &str0[w(symp->st_name)]; | ||
202 | |||
203 | if ('.' == symname[0]) | ||
204 | ++symname; /* ppc64 hack */ | ||
205 | if (0 == strcmp((('_' == gpfx) ? "_mcount" : "mcount"), | ||
206 | symname)) | ||
207 | mcountsym = ELF_R_SYM(_w(relp->r_info)); | ||
208 | } | ||
209 | |||
210 | if (mcountsym == ELF_R_SYM(_w(relp->r_info))) { | ||
211 | uint_t const addend = _w(_w(relp->r_offset) - recval); | ||
212 | |||
213 | mrelp->r_offset = _w(offbase | ||
214 | + ((void *)mlocp - (void *)mloc0)); | ||
215 | mrelp->r_info = _w(ELF_R_INFO(recsym, reltype)); | ||
216 | if (sizeof(Elf_Rela) == rel_entsize) { | ||
217 | ((Elf_Rela *)mrelp)->r_addend = addend; | ||
218 | *mlocp++ = 0; | ||
219 | } else | ||
220 | *mlocp++ = addend; | ||
221 | |||
222 | mrelp = (Elf_Rel *)(rel_entsize + (void *)mrelp); | ||
223 | } | ||
224 | relp = (Elf_Rel const *)(rel_entsize + (void *)relp); | ||
225 | } | ||
226 | *mrelpp = mrelp; | ||
227 | return mlocp; | ||
228 | } | ||
229 | |||
230 | |||
231 | /* | ||
232 | * Find a symbol in the given section, to be used as the base for relocating | ||
233 | * the table of offsets of calls to mcount. A local or global symbol suffices, | ||
234 | * but avoid a Weak symbol because it may be overridden; the change in value | ||
235 | * would invalidate the relocations of the offsets of the calls to mcount. | ||
236 | * Often the found symbol will be the unnamed local symbol generated by | ||
237 | * GNU 'as' for the start of each section. For example: | ||
238 | * Num: Value Size Type Bind Vis Ndx Name | ||
239 | * 2: 00000000 0 SECTION LOCAL DEFAULT 1 | ||
240 | */ | ||
241 | static unsigned find_secsym_ndx(unsigned const txtndx, | ||
242 | char const *const txtname, | ||
243 | uint_t *const recvalp, | ||
244 | Elf_Shdr const *const symhdr, | ||
245 | Elf_Ehdr const *const ehdr) | ||
246 | { | ||
247 | Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symhdr->sh_offset) | ||
248 | + (void *)ehdr); | ||
249 | unsigned const nsym = _w(symhdr->sh_size) / _w(symhdr->sh_entsize); | ||
250 | Elf_Sym const *symp; | ||
251 | unsigned t; | ||
252 | |||
253 | for (symp = sym0, t = nsym; t; --t, ++symp) { | ||
254 | unsigned int const st_bind = ELF_ST_BIND(symp->st_info); | ||
255 | |||
256 | if (txtndx == w2(symp->st_shndx) | ||
257 | /* avoid STB_WEAK */ | ||
258 | && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) { | ||
259 | *recvalp = _w(symp->st_value); | ||
260 | return symp - sym0; | ||
261 | } | ||
262 | } | ||
263 | fprintf(stderr, "Cannot find symbol for section %d: %s.\n", | ||
264 | txtndx, txtname); | ||
265 | fail_file(); | ||
266 | } | ||
267 | |||
268 | |||
269 | /* Evade ISO C restriction: no declaration after statement in has_rel_mcount. */ | ||
270 | static char const * | ||
271 | __has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */ | ||
272 | Elf_Shdr const *const shdr0, | ||
273 | char const *const shstrtab, | ||
274 | char const *const fname) | ||
275 | { | ||
276 | /* .sh_info depends on .sh_type == SHT_REL[,A] */ | ||
277 | Elf_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)]; | ||
278 | char const *const txtname = &shstrtab[w(txthdr->sh_name)]; | ||
279 | |||
280 | if (0 == strcmp("__mcount_loc", txtname)) { | ||
281 | fprintf(stderr, "warning: __mcount_loc already exists: %s\n", | ||
282 | fname); | ||
283 | succeed_file(); | ||
284 | } | ||
285 | if (SHT_PROGBITS != w(txthdr->sh_type) || | ||
286 | !is_mcounted_section_name(txtname)) | ||
287 | return NULL; | ||
288 | return txtname; | ||
289 | } | ||
290 | |||
291 | static char const *has_rel_mcount(Elf_Shdr const *const relhdr, | ||
292 | Elf_Shdr const *const shdr0, | ||
293 | char const *const shstrtab, | ||
294 | char const *const fname) | ||
295 | { | ||
296 | if (SHT_REL != w(relhdr->sh_type) && SHT_RELA != w(relhdr->sh_type)) | ||
297 | return NULL; | ||
298 | return __has_rel_mcount(relhdr, shdr0, shstrtab, fname); | ||
299 | } | ||
300 | |||
301 | |||
302 | static unsigned tot_relsize(Elf_Shdr const *const shdr0, | ||
303 | unsigned nhdr, | ||
304 | const char *const shstrtab, | ||
305 | const char *const fname) | ||
306 | { | ||
307 | unsigned totrelsz = 0; | ||
308 | Elf_Shdr const *shdrp = shdr0; | ||
309 | |||
310 | for (; nhdr; --nhdr, ++shdrp) { | ||
311 | if (has_rel_mcount(shdrp, shdr0, shstrtab, fname)) | ||
312 | totrelsz += _w(shdrp->sh_size); | ||
313 | } | ||
314 | return totrelsz; | ||
315 | } | ||
316 | |||
317 | |||
318 | /* Overall supervision for Elf32 ET_REL file. */ | ||
319 | static void | ||
320 | do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) | ||
321 | { | ||
322 | Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) | ||
323 | + (void *)ehdr); | ||
324 | unsigned const nhdr = w2(ehdr->e_shnum); | ||
325 | Elf_Shdr *const shstr = &shdr0[w2(ehdr->e_shstrndx)]; | ||
326 | char const *const shstrtab = (char const *)(_w(shstr->sh_offset) | ||
327 | + (void *)ehdr); | ||
328 | |||
329 | Elf_Shdr const *relhdr; | ||
330 | unsigned k; | ||
331 | |||
332 | /* Upper bound on space: assume all relevant relocs are for mcount. */ | ||
333 | unsigned const totrelsz = tot_relsize(shdr0, nhdr, shstrtab, fname); | ||
334 | Elf_Rel *const mrel0 = umalloc(totrelsz); | ||
335 | Elf_Rel * mrelp = mrel0; | ||
336 | |||
337 | /* 2*sizeof(address) <= sizeof(Elf_Rel) */ | ||
338 | uint_t *const mloc0 = umalloc(totrelsz>>1); | ||
339 | uint_t * mlocp = mloc0; | ||
340 | |||
341 | unsigned rel_entsize = 0; | ||
342 | unsigned symsec_sh_link = 0; | ||
343 | |||
344 | for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) { | ||
345 | char const *const txtname = has_rel_mcount(relhdr, shdr0, | ||
346 | shstrtab, fname); | ||
347 | if (txtname) { | ||
348 | uint_t recval = 0; | ||
349 | unsigned const recsym = find_secsym_ndx( | ||
350 | w(relhdr->sh_info), txtname, &recval, | ||
351 | &shdr0[symsec_sh_link = w(relhdr->sh_link)], | ||
352 | ehdr); | ||
353 | |||
354 | rel_entsize = _w(relhdr->sh_entsize); | ||
355 | mlocp = sift_rel_mcount(mlocp, | ||
356 | (void *)mlocp - (void *)mloc0, &mrelp, | ||
357 | relhdr, ehdr, recsym, recval, reltype); | ||
358 | } | ||
359 | } | ||
360 | if (mloc0 != mlocp) { | ||
361 | append_func(ehdr, shstr, mloc0, mlocp, mrel0, mrelp, | ||
362 | rel_entsize, symsec_sh_link); | ||
363 | } | ||
364 | free(mrel0); | ||
365 | free(mloc0); | ||
366 | } | ||
diff --git a/security/apparmor/.gitignore b/security/apparmor/.gitignore index 0a0a99f3b083..4d995aeaebc0 100644 --- a/security/apparmor/.gitignore +++ b/security/apparmor/.gitignore | |||
@@ -3,3 +3,4 @@ | |||
3 | # | 3 | # |
4 | af_names.h | 4 | af_names.h |
5 | capability_names.h | 5 | capability_names.h |
6 | rlim_names.h | ||
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 7320331b44ab..544ff5837cb6 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c | |||
@@ -29,7 +29,7 @@ | |||
29 | * aa_simple_write_to_buffer - common routine for getting policy from user | 29 | * aa_simple_write_to_buffer - common routine for getting policy from user |
30 | * @op: operation doing the user buffer copy | 30 | * @op: operation doing the user buffer copy |
31 | * @userbuf: user buffer to copy data from (NOT NULL) | 31 | * @userbuf: user buffer to copy data from (NOT NULL) |
32 | * @alloc_size: size of user buffer | 32 | * @alloc_size: size of user buffer (REQUIRES: @alloc_size >= @copy_size) |
33 | * @copy_size: size of data to copy from user buffer | 33 | * @copy_size: size of data to copy from user buffer |
34 | * @pos: position write is at in the file (NOT NULL) | 34 | * @pos: position write is at in the file (NOT NULL) |
35 | * | 35 | * |
@@ -42,6 +42,8 @@ static char *aa_simple_write_to_buffer(int op, const char __user *userbuf, | |||
42 | { | 42 | { |
43 | char *data; | 43 | char *data; |
44 | 44 | ||
45 | BUG_ON(copy_size > alloc_size); | ||
46 | |||
45 | if (*pos != 0) | 47 | if (*pos != 0) |
46 | /* only writes from pos 0, that is complete writes */ | 48 | /* only writes from pos 0, that is complete writes */ |
47 | return ERR_PTR(-ESPIPE); | 49 | return ERR_PTR(-ESPIPE); |
diff --git a/security/capability.c b/security/capability.c index 95a6599a37bb..30ae00fbecd5 100644 --- a/security/capability.c +++ b/security/capability.c | |||
@@ -677,7 +677,18 @@ static void cap_inet_conn_established(struct sock *sk, struct sk_buff *skb) | |||
677 | { | 677 | { |
678 | } | 678 | } |
679 | 679 | ||
680 | static int cap_secmark_relabel_packet(u32 secid) | ||
681 | { | ||
682 | return 0; | ||
683 | } | ||
680 | 684 | ||
685 | static void cap_secmark_refcount_inc(void) | ||
686 | { | ||
687 | } | ||
688 | |||
689 | static void cap_secmark_refcount_dec(void) | ||
690 | { | ||
691 | } | ||
681 | 692 | ||
682 | static void cap_req_classify_flow(const struct request_sock *req, | 693 | static void cap_req_classify_flow(const struct request_sock *req, |
683 | struct flowi *fl) | 694 | struct flowi *fl) |
@@ -777,7 +788,8 @@ static int cap_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) | |||
777 | 788 | ||
778 | static int cap_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) | 789 | static int cap_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) |
779 | { | 790 | { |
780 | return -EOPNOTSUPP; | 791 | *secid = 0; |
792 | return 0; | ||
781 | } | 793 | } |
782 | 794 | ||
783 | static void cap_release_secctx(char *secdata, u32 seclen) | 795 | static void cap_release_secctx(char *secdata, u32 seclen) |
@@ -1018,6 +1030,9 @@ void __init security_fixup_ops(struct security_operations *ops) | |||
1018 | set_to_cap_if_null(ops, inet_conn_request); | 1030 | set_to_cap_if_null(ops, inet_conn_request); |
1019 | set_to_cap_if_null(ops, inet_csk_clone); | 1031 | set_to_cap_if_null(ops, inet_csk_clone); |
1020 | set_to_cap_if_null(ops, inet_conn_established); | 1032 | set_to_cap_if_null(ops, inet_conn_established); |
1033 | set_to_cap_if_null(ops, secmark_relabel_packet); | ||
1034 | set_to_cap_if_null(ops, secmark_refcount_inc); | ||
1035 | set_to_cap_if_null(ops, secmark_refcount_dec); | ||
1021 | set_to_cap_if_null(ops, req_classify_flow); | 1036 | set_to_cap_if_null(ops, req_classify_flow); |
1022 | set_to_cap_if_null(ops, tun_dev_create); | 1037 | set_to_cap_if_null(ops, tun_dev_create); |
1023 | set_to_cap_if_null(ops, tun_dev_post_create); | 1038 | set_to_cap_if_null(ops, tun_dev_post_create); |
diff --git a/security/commoncap.c b/security/commoncap.c index 9d172e6e330c..5e632b4857e4 100644 --- a/security/commoncap.c +++ b/security/commoncap.c | |||
@@ -719,14 +719,11 @@ static int cap_safe_nice(struct task_struct *p) | |||
719 | /** | 719 | /** |
720 | * cap_task_setscheduler - Detemine if scheduler policy change is permitted | 720 | * cap_task_setscheduler - Detemine if scheduler policy change is permitted |
721 | * @p: The task to affect | 721 | * @p: The task to affect |
722 | * @policy: The policy to effect | ||
723 | * @lp: The parameters to the scheduling policy | ||
724 | * | 722 | * |
725 | * Detemine if the requested scheduler policy change is permitted for the | 723 | * Detemine if the requested scheduler policy change is permitted for the |
726 | * specified task, returning 0 if permission is granted, -ve if denied. | 724 | * specified task, returning 0 if permission is granted, -ve if denied. |
727 | */ | 725 | */ |
728 | int cap_task_setscheduler(struct task_struct *p, int policy, | 726 | int cap_task_setscheduler(struct task_struct *p) |
729 | struct sched_param *lp) | ||
730 | { | 727 | { |
731 | return cap_safe_nice(p); | 728 | return cap_safe_nice(p); |
732 | } | 729 | } |
diff --git a/security/security.c b/security/security.c index c53949f17d9e..b50f472061a4 100644 --- a/security/security.c +++ b/security/security.c | |||
@@ -89,20 +89,12 @@ __setup("security=", choose_lsm); | |||
89 | * Return true if: | 89 | * Return true if: |
90 | * -The passed LSM is the one chosen by user at boot time, | 90 | * -The passed LSM is the one chosen by user at boot time, |
91 | * -or the passed LSM is configured as the default and the user did not | 91 | * -or the passed LSM is configured as the default and the user did not |
92 | * choose an alternate LSM at boot time, | 92 | * choose an alternate LSM at boot time. |
93 | * -or there is no default LSM set and the user didn't specify a | ||
94 | * specific LSM and we're the first to ask for registration permission, | ||
95 | * -or the passed LSM is currently loaded. | ||
96 | * Otherwise, return false. | 93 | * Otherwise, return false. |
97 | */ | 94 | */ |
98 | int __init security_module_enable(struct security_operations *ops) | 95 | int __init security_module_enable(struct security_operations *ops) |
99 | { | 96 | { |
100 | if (!*chosen_lsm) | 97 | return !strcmp(ops->name, chosen_lsm); |
101 | strncpy(chosen_lsm, ops->name, SECURITY_NAME_MAX); | ||
102 | else if (strncmp(ops->name, chosen_lsm, SECURITY_NAME_MAX)) | ||
103 | return 0; | ||
104 | |||
105 | return 1; | ||
106 | } | 98 | } |
107 | 99 | ||
108 | /** | 100 | /** |
@@ -786,10 +778,9 @@ int security_task_setrlimit(struct task_struct *p, unsigned int resource, | |||
786 | return security_ops->task_setrlimit(p, resource, new_rlim); | 778 | return security_ops->task_setrlimit(p, resource, new_rlim); |
787 | } | 779 | } |
788 | 780 | ||
789 | int security_task_setscheduler(struct task_struct *p, | 781 | int security_task_setscheduler(struct task_struct *p) |
790 | int policy, struct sched_param *lp) | ||
791 | { | 782 | { |
792 | return security_ops->task_setscheduler(p, policy, lp); | 783 | return security_ops->task_setscheduler(p); |
793 | } | 784 | } |
794 | 785 | ||
795 | int security_task_getscheduler(struct task_struct *p) | 786 | int security_task_getscheduler(struct task_struct *p) |
@@ -1145,6 +1136,24 @@ void security_inet_conn_established(struct sock *sk, | |||
1145 | security_ops->inet_conn_established(sk, skb); | 1136 | security_ops->inet_conn_established(sk, skb); |
1146 | } | 1137 | } |
1147 | 1138 | ||
1139 | int security_secmark_relabel_packet(u32 secid) | ||
1140 | { | ||
1141 | return security_ops->secmark_relabel_packet(secid); | ||
1142 | } | ||
1143 | EXPORT_SYMBOL(security_secmark_relabel_packet); | ||
1144 | |||
1145 | void security_secmark_refcount_inc(void) | ||
1146 | { | ||
1147 | security_ops->secmark_refcount_inc(); | ||
1148 | } | ||
1149 | EXPORT_SYMBOL(security_secmark_refcount_inc); | ||
1150 | |||
1151 | void security_secmark_refcount_dec(void) | ||
1152 | { | ||
1153 | security_ops->secmark_refcount_dec(); | ||
1154 | } | ||
1155 | EXPORT_SYMBOL(security_secmark_refcount_dec); | ||
1156 | |||
1148 | int security_tun_dev_create(void) | 1157 | int security_tun_dev_create(void) |
1149 | { | 1158 | { |
1150 | return security_ops->tun_dev_create(); | 1159 | return security_ops->tun_dev_create(); |
diff --git a/security/selinux/Makefile b/security/selinux/Makefile index 58d80f3bd6f6..ad5cd76ec231 100644 --- a/security/selinux/Makefile +++ b/security/selinux/Makefile | |||
@@ -2,25 +2,20 @@ | |||
2 | # Makefile for building the SELinux module as part of the kernel tree. | 2 | # Makefile for building the SELinux module as part of the kernel tree. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_SECURITY_SELINUX) := selinux.o ss/ | 5 | obj-$(CONFIG_SECURITY_SELINUX) := selinux.o |
6 | 6 | ||
7 | selinux-y := avc.o \ | 7 | selinux-y := avc.o hooks.o selinuxfs.o netlink.o nlmsgtab.o netif.o \ |
8 | hooks.o \ | 8 | netnode.o netport.o exports.o \ |
9 | selinuxfs.o \ | 9 | ss/ebitmap.o ss/hashtab.o ss/symtab.o ss/sidtab.o ss/avtab.o \ |
10 | netlink.o \ | 10 | ss/policydb.o ss/services.o ss/conditional.o ss/mls.o ss/status.o |
11 | nlmsgtab.o \ | ||
12 | netif.o \ | ||
13 | netnode.o \ | ||
14 | netport.o \ | ||
15 | exports.o | ||
16 | 11 | ||
17 | selinux-$(CONFIG_SECURITY_NETWORK_XFRM) += xfrm.o | 12 | selinux-$(CONFIG_SECURITY_NETWORK_XFRM) += xfrm.o |
18 | 13 | ||
19 | selinux-$(CONFIG_NETLABEL) += netlabel.o | 14 | selinux-$(CONFIG_NETLABEL) += netlabel.o |
20 | 15 | ||
21 | EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include | 16 | ccflags-y := -Isecurity/selinux -Isecurity/selinux/include |
22 | 17 | ||
23 | $(obj)/avc.o: $(obj)/flask.h | 18 | $(addprefix $(obj)/,$(selinux-y)): $(obj)/flask.h |
24 | 19 | ||
25 | quiet_cmd_flask = GEN $(obj)/flask.h $(obj)/av_permissions.h | 20 | quiet_cmd_flask = GEN $(obj)/flask.h $(obj)/av_permissions.h |
26 | cmd_flask = scripts/selinux/genheaders/genheaders $(obj)/flask.h $(obj)/av_permissions.h | 21 | cmd_flask = scripts/selinux/genheaders/genheaders $(obj)/flask.h $(obj)/av_permissions.h |
diff --git a/security/selinux/exports.c b/security/selinux/exports.c index c0a454aee1e0..90664385dead 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c | |||
@@ -11,58 +11,9 @@ | |||
11 | * it under the terms of the GNU General Public License version 2, | 11 | * it under the terms of the GNU General Public License version 2, |
12 | * as published by the Free Software Foundation. | 12 | * as published by the Free Software Foundation. |
13 | */ | 13 | */ |
14 | #include <linux/types.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/module.h> | 14 | #include <linux/module.h> |
17 | #include <linux/selinux.h> | ||
18 | #include <linux/fs.h> | ||
19 | #include <linux/ipc.h> | ||
20 | #include <asm/atomic.h> | ||
21 | 15 | ||
22 | #include "security.h" | 16 | #include "security.h" |
23 | #include "objsec.h" | ||
24 | |||
25 | /* SECMARK reference count */ | ||
26 | extern atomic_t selinux_secmark_refcount; | ||
27 | |||
28 | int selinux_string_to_sid(char *str, u32 *sid) | ||
29 | { | ||
30 | if (selinux_enabled) | ||
31 | return security_context_to_sid(str, strlen(str), sid); | ||
32 | else { | ||
33 | *sid = 0; | ||
34 | return 0; | ||
35 | } | ||
36 | } | ||
37 | EXPORT_SYMBOL_GPL(selinux_string_to_sid); | ||
38 | |||
39 | int selinux_secmark_relabel_packet_permission(u32 sid) | ||
40 | { | ||
41 | if (selinux_enabled) { | ||
42 | const struct task_security_struct *__tsec; | ||
43 | u32 tsid; | ||
44 | |||
45 | __tsec = current_security(); | ||
46 | tsid = __tsec->sid; | ||
47 | |||
48 | return avc_has_perm(tsid, sid, SECCLASS_PACKET, | ||
49 | PACKET__RELABELTO, NULL); | ||
50 | } | ||
51 | return 0; | ||
52 | } | ||
53 | EXPORT_SYMBOL_GPL(selinux_secmark_relabel_packet_permission); | ||
54 | |||
55 | void selinux_secmark_refcount_inc(void) | ||
56 | { | ||
57 | atomic_inc(&selinux_secmark_refcount); | ||
58 | } | ||
59 | EXPORT_SYMBOL_GPL(selinux_secmark_refcount_inc); | ||
60 | |||
61 | void selinux_secmark_refcount_dec(void) | ||
62 | { | ||
63 | atomic_dec(&selinux_secmark_refcount); | ||
64 | } | ||
65 | EXPORT_SYMBOL_GPL(selinux_secmark_refcount_dec); | ||
66 | 17 | ||
67 | bool selinux_is_enabled(void) | 18 | bool selinux_is_enabled(void) |
68 | { | 19 | { |
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4796ddd4e721..d9154cf90ae1 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
@@ -3354,11 +3354,11 @@ static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, | |||
3354 | return 0; | 3354 | return 0; |
3355 | } | 3355 | } |
3356 | 3356 | ||
3357 | static int selinux_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp) | 3357 | static int selinux_task_setscheduler(struct task_struct *p) |
3358 | { | 3358 | { |
3359 | int rc; | 3359 | int rc; |
3360 | 3360 | ||
3361 | rc = cap_task_setscheduler(p, policy, lp); | 3361 | rc = cap_task_setscheduler(p); |
3362 | if (rc) | 3362 | if (rc) |
3363 | return rc; | 3363 | return rc; |
3364 | 3364 | ||
@@ -4279,6 +4279,27 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) | |||
4279 | selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); | 4279 | selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); |
4280 | } | 4280 | } |
4281 | 4281 | ||
4282 | static int selinux_secmark_relabel_packet(u32 sid) | ||
4283 | { | ||
4284 | const struct task_security_struct *__tsec; | ||
4285 | u32 tsid; | ||
4286 | |||
4287 | __tsec = current_security(); | ||
4288 | tsid = __tsec->sid; | ||
4289 | |||
4290 | return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); | ||
4291 | } | ||
4292 | |||
4293 | static void selinux_secmark_refcount_inc(void) | ||
4294 | { | ||
4295 | atomic_inc(&selinux_secmark_refcount); | ||
4296 | } | ||
4297 | |||
4298 | static void selinux_secmark_refcount_dec(void) | ||
4299 | { | ||
4300 | atomic_dec(&selinux_secmark_refcount); | ||
4301 | } | ||
4302 | |||
4282 | static void selinux_req_classify_flow(const struct request_sock *req, | 4303 | static void selinux_req_classify_flow(const struct request_sock *req, |
4283 | struct flowi *fl) | 4304 | struct flowi *fl) |
4284 | { | 4305 | { |
@@ -5533,6 +5554,9 @@ static struct security_operations selinux_ops = { | |||
5533 | .inet_conn_request = selinux_inet_conn_request, | 5554 | .inet_conn_request = selinux_inet_conn_request, |
5534 | .inet_csk_clone = selinux_inet_csk_clone, | 5555 | .inet_csk_clone = selinux_inet_csk_clone, |
5535 | .inet_conn_established = selinux_inet_conn_established, | 5556 | .inet_conn_established = selinux_inet_conn_established, |
5557 | .secmark_relabel_packet = selinux_secmark_relabel_packet, | ||
5558 | .secmark_refcount_inc = selinux_secmark_refcount_inc, | ||
5559 | .secmark_refcount_dec = selinux_secmark_refcount_dec, | ||
5536 | .req_classify_flow = selinux_req_classify_flow, | 5560 | .req_classify_flow = selinux_req_classify_flow, |
5537 | .tun_dev_create = selinux_tun_dev_create, | 5561 | .tun_dev_create = selinux_tun_dev_create, |
5538 | .tun_dev_post_create = selinux_tun_dev_post_create, | 5562 | .tun_dev_post_create = selinux_tun_dev_post_create, |
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index b4c9eb4bd6f9..8858d2b2d4b6 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h | |||
@@ -17,7 +17,7 @@ struct security_class_mapping secclass_map[] = { | |||
17 | { "compute_av", "compute_create", "compute_member", | 17 | { "compute_av", "compute_create", "compute_member", |
18 | "check_context", "load_policy", "compute_relabel", | 18 | "check_context", "load_policy", "compute_relabel", |
19 | "compute_user", "setenforce", "setbool", "setsecparam", | 19 | "compute_user", "setenforce", "setbool", "setsecparam", |
20 | "setcheckreqprot", NULL } }, | 20 | "setcheckreqprot", "read_policy", NULL } }, |
21 | { "process", | 21 | { "process", |
22 | { "fork", "transition", "sigchld", "sigkill", | 22 | { "fork", "transition", "sigchld", "sigkill", |
23 | "sigstop", "signull", "signal", "ptrace", "getsched", "setsched", | 23 | "sigstop", "signull", "signal", "ptrace", "getsched", "setsched", |
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 1f7c2491d3dc..671273eb1115 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h | |||
@@ -9,6 +9,7 @@ | |||
9 | #define _SELINUX_SECURITY_H_ | 9 | #define _SELINUX_SECURITY_H_ |
10 | 10 | ||
11 | #include <linux/magic.h> | 11 | #include <linux/magic.h> |
12 | #include <linux/types.h> | ||
12 | #include "flask.h" | 13 | #include "flask.h" |
13 | 14 | ||
14 | #define SECSID_NULL 0x00000000 /* unspecified SID */ | 15 | #define SECSID_NULL 0x00000000 /* unspecified SID */ |
@@ -82,6 +83,8 @@ extern int selinux_policycap_openperm; | |||
82 | int security_mls_enabled(void); | 83 | int security_mls_enabled(void); |
83 | 84 | ||
84 | int security_load_policy(void *data, size_t len); | 85 | int security_load_policy(void *data, size_t len); |
86 | int security_read_policy(void **data, ssize_t *len); | ||
87 | size_t security_policydb_len(void); | ||
85 | 88 | ||
86 | int security_policycap_supported(unsigned int req_cap); | 89 | int security_policycap_supported(unsigned int req_cap); |
87 | 90 | ||
@@ -191,5 +194,25 @@ static inline int security_netlbl_sid_to_secattr(u32 sid, | |||
191 | 194 | ||
192 | const char *security_get_initial_sid_context(u32 sid); | 195 | const char *security_get_initial_sid_context(u32 sid); |
193 | 196 | ||
197 | /* | ||
198 | * status notifier using mmap interface | ||
199 | */ | ||
200 | extern struct page *selinux_kernel_status_page(void); | ||
201 | |||
202 | #define SELINUX_KERNEL_STATUS_VERSION 1 | ||
203 | struct selinux_kernel_status { | ||
204 | u32 version; /* version number of thie structure */ | ||
205 | u32 sequence; /* sequence number of seqlock logic */ | ||
206 | u32 enforcing; /* current setting of enforcing mode */ | ||
207 | u32 policyload; /* times of policy reloaded */ | ||
208 | u32 deny_unknown; /* current setting of deny_unknown */ | ||
209 | /* | ||
210 | * The version > 0 supports above members. | ||
211 | */ | ||
212 | } __attribute__((packed)); | ||
213 | |||
214 | extern void selinux_status_update_setenforce(int enforcing); | ||
215 | extern void selinux_status_update_policyload(int seqno); | ||
216 | |||
194 | #endif /* _SELINUX_SECURITY_H_ */ | 217 | #endif /* _SELINUX_SECURITY_H_ */ |
195 | 218 | ||
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 79a1bb635662..87e0556bae70 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c | |||
@@ -68,6 +68,8 @@ static int *bool_pending_values; | |||
68 | static struct dentry *class_dir; | 68 | static struct dentry *class_dir; |
69 | static unsigned long last_class_ino; | 69 | static unsigned long last_class_ino; |
70 | 70 | ||
71 | static char policy_opened; | ||
72 | |||
71 | /* global data for policy capabilities */ | 73 | /* global data for policy capabilities */ |
72 | static struct dentry *policycap_dir; | 74 | static struct dentry *policycap_dir; |
73 | 75 | ||
@@ -110,6 +112,8 @@ enum sel_inos { | |||
110 | SEL_COMPAT_NET, /* whether to use old compat network packet controls */ | 112 | SEL_COMPAT_NET, /* whether to use old compat network packet controls */ |
111 | SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */ | 113 | SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */ |
112 | SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */ | 114 | SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */ |
115 | SEL_STATUS, /* export current status using mmap() */ | ||
116 | SEL_POLICY, /* allow userspace to read the in kernel policy */ | ||
113 | SEL_INO_NEXT, /* The next inode number to use */ | 117 | SEL_INO_NEXT, /* The next inode number to use */ |
114 | }; | 118 | }; |
115 | 119 | ||
@@ -171,6 +175,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, | |||
171 | if (selinux_enforcing) | 175 | if (selinux_enforcing) |
172 | avc_ss_reset(0); | 176 | avc_ss_reset(0); |
173 | selnl_notify_setenforce(selinux_enforcing); | 177 | selnl_notify_setenforce(selinux_enforcing); |
178 | selinux_status_update_setenforce(selinux_enforcing); | ||
174 | } | 179 | } |
175 | length = count; | 180 | length = count; |
176 | out: | 181 | out: |
@@ -205,6 +210,59 @@ static const struct file_operations sel_handle_unknown_ops = { | |||
205 | .llseek = generic_file_llseek, | 210 | .llseek = generic_file_llseek, |
206 | }; | 211 | }; |
207 | 212 | ||
213 | static int sel_open_handle_status(struct inode *inode, struct file *filp) | ||
214 | { | ||
215 | struct page *status = selinux_kernel_status_page(); | ||
216 | |||
217 | if (!status) | ||
218 | return -ENOMEM; | ||
219 | |||
220 | filp->private_data = status; | ||
221 | |||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | static ssize_t sel_read_handle_status(struct file *filp, char __user *buf, | ||
226 | size_t count, loff_t *ppos) | ||
227 | { | ||
228 | struct page *status = filp->private_data; | ||
229 | |||
230 | BUG_ON(!status); | ||
231 | |||
232 | return simple_read_from_buffer(buf, count, ppos, | ||
233 | page_address(status), | ||
234 | sizeof(struct selinux_kernel_status)); | ||
235 | } | ||
236 | |||
237 | static int sel_mmap_handle_status(struct file *filp, | ||
238 | struct vm_area_struct *vma) | ||
239 | { | ||
240 | struct page *status = filp->private_data; | ||
241 | unsigned long size = vma->vm_end - vma->vm_start; | ||
242 | |||
243 | BUG_ON(!status); | ||
244 | |||
245 | /* only allows one page from the head */ | ||
246 | if (vma->vm_pgoff > 0 || size != PAGE_SIZE) | ||
247 | return -EIO; | ||
248 | /* disallow writable mapping */ | ||
249 | if (vma->vm_flags & VM_WRITE) | ||
250 | return -EPERM; | ||
251 | /* disallow mprotect() turns it into writable */ | ||
252 | vma->vm_flags &= ~VM_MAYWRITE; | ||
253 | |||
254 | return remap_pfn_range(vma, vma->vm_start, | ||
255 | page_to_pfn(status), | ||
256 | size, vma->vm_page_prot); | ||
257 | } | ||
258 | |||
259 | static const struct file_operations sel_handle_status_ops = { | ||
260 | .open = sel_open_handle_status, | ||
261 | .read = sel_read_handle_status, | ||
262 | .mmap = sel_mmap_handle_status, | ||
263 | .llseek = generic_file_llseek, | ||
264 | }; | ||
265 | |||
208 | #ifdef CONFIG_SECURITY_SELINUX_DISABLE | 266 | #ifdef CONFIG_SECURITY_SELINUX_DISABLE |
209 | static ssize_t sel_write_disable(struct file *file, const char __user *buf, | 267 | static ssize_t sel_write_disable(struct file *file, const char __user *buf, |
210 | size_t count, loff_t *ppos) | 268 | size_t count, loff_t *ppos) |
@@ -296,6 +354,141 @@ static const struct file_operations sel_mls_ops = { | |||
296 | .llseek = generic_file_llseek, | 354 | .llseek = generic_file_llseek, |
297 | }; | 355 | }; |
298 | 356 | ||
357 | struct policy_load_memory { | ||
358 | size_t len; | ||
359 | void *data; | ||
360 | }; | ||
361 | |||
362 | static int sel_open_policy(struct inode *inode, struct file *filp) | ||
363 | { | ||
364 | struct policy_load_memory *plm = NULL; | ||
365 | int rc; | ||
366 | |||
367 | BUG_ON(filp->private_data); | ||
368 | |||
369 | mutex_lock(&sel_mutex); | ||
370 | |||
371 | rc = task_has_security(current, SECURITY__READ_POLICY); | ||
372 | if (rc) | ||
373 | goto err; | ||
374 | |||
375 | rc = -EBUSY; | ||
376 | if (policy_opened) | ||
377 | goto err; | ||
378 | |||
379 | rc = -ENOMEM; | ||
380 | plm = kzalloc(sizeof(*plm), GFP_KERNEL); | ||
381 | if (!plm) | ||
382 | goto err; | ||
383 | |||
384 | if (i_size_read(inode) != security_policydb_len()) { | ||
385 | mutex_lock(&inode->i_mutex); | ||
386 | i_size_write(inode, security_policydb_len()); | ||
387 | mutex_unlock(&inode->i_mutex); | ||
388 | } | ||
389 | |||
390 | rc = security_read_policy(&plm->data, &plm->len); | ||
391 | if (rc) | ||
392 | goto err; | ||
393 | |||
394 | policy_opened = 1; | ||
395 | |||
396 | filp->private_data = plm; | ||
397 | |||
398 | mutex_unlock(&sel_mutex); | ||
399 | |||
400 | return 0; | ||
401 | err: | ||
402 | mutex_unlock(&sel_mutex); | ||
403 | |||
404 | if (plm) | ||
405 | vfree(plm->data); | ||
406 | kfree(plm); | ||
407 | return rc; | ||
408 | } | ||
409 | |||
410 | static int sel_release_policy(struct inode *inode, struct file *filp) | ||
411 | { | ||
412 | struct policy_load_memory *plm = filp->private_data; | ||
413 | |||
414 | BUG_ON(!plm); | ||
415 | |||
416 | policy_opened = 0; | ||
417 | |||
418 | vfree(plm->data); | ||
419 | kfree(plm); | ||
420 | |||
421 | return 0; | ||
422 | } | ||
423 | |||
424 | static ssize_t sel_read_policy(struct file *filp, char __user *buf, | ||
425 | size_t count, loff_t *ppos) | ||
426 | { | ||
427 | struct policy_load_memory *plm = filp->private_data; | ||
428 | int ret; | ||
429 | |||
430 | mutex_lock(&sel_mutex); | ||
431 | |||
432 | ret = task_has_security(current, SECURITY__READ_POLICY); | ||
433 | if (ret) | ||
434 | goto out; | ||
435 | |||
436 | ret = simple_read_from_buffer(buf, count, ppos, plm->data, plm->len); | ||
437 | out: | ||
438 | mutex_unlock(&sel_mutex); | ||
439 | return ret; | ||
440 | } | ||
441 | |||
442 | static int sel_mmap_policy_fault(struct vm_area_struct *vma, | ||
443 | struct vm_fault *vmf) | ||
444 | { | ||
445 | struct policy_load_memory *plm = vma->vm_file->private_data; | ||
446 | unsigned long offset; | ||
447 | struct page *page; | ||
448 | |||
449 | if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE)) | ||
450 | return VM_FAULT_SIGBUS; | ||
451 | |||
452 | offset = vmf->pgoff << PAGE_SHIFT; | ||
453 | if (offset >= roundup(plm->len, PAGE_SIZE)) | ||
454 | return VM_FAULT_SIGBUS; | ||
455 | |||
456 | page = vmalloc_to_page(plm->data + offset); | ||
457 | get_page(page); | ||
458 | |||
459 | vmf->page = page; | ||
460 | |||
461 | return 0; | ||
462 | } | ||
463 | |||
464 | static struct vm_operations_struct sel_mmap_policy_ops = { | ||
465 | .fault = sel_mmap_policy_fault, | ||
466 | .page_mkwrite = sel_mmap_policy_fault, | ||
467 | }; | ||
468 | |||
469 | int sel_mmap_policy(struct file *filp, struct vm_area_struct *vma) | ||
470 | { | ||
471 | if (vma->vm_flags & VM_SHARED) { | ||
472 | /* do not allow mprotect to make mapping writable */ | ||
473 | vma->vm_flags &= ~VM_MAYWRITE; | ||
474 | |||
475 | if (vma->vm_flags & VM_WRITE) | ||
476 | return -EACCES; | ||
477 | } | ||
478 | |||
479 | vma->vm_flags |= VM_RESERVED; | ||
480 | vma->vm_ops = &sel_mmap_policy_ops; | ||
481 | |||
482 | return 0; | ||
483 | } | ||
484 | |||
485 | static const struct file_operations sel_policy_ops = { | ||
486 | .open = sel_open_policy, | ||
487 | .read = sel_read_policy, | ||
488 | .mmap = sel_mmap_policy, | ||
489 | .release = sel_release_policy, | ||
490 | }; | ||
491 | |||
299 | static ssize_t sel_write_load(struct file *file, const char __user *buf, | 492 | static ssize_t sel_write_load(struct file *file, const char __user *buf, |
300 | size_t count, loff_t *ppos) | 493 | size_t count, loff_t *ppos) |
301 | 494 | ||
@@ -1612,6 +1805,8 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) | |||
1612 | [SEL_CHECKREQPROT] = {"checkreqprot", &sel_checkreqprot_ops, S_IRUGO|S_IWUSR}, | 1805 | [SEL_CHECKREQPROT] = {"checkreqprot", &sel_checkreqprot_ops, S_IRUGO|S_IWUSR}, |
1613 | [SEL_REJECT_UNKNOWN] = {"reject_unknown", &sel_handle_unknown_ops, S_IRUGO}, | 1806 | [SEL_REJECT_UNKNOWN] = {"reject_unknown", &sel_handle_unknown_ops, S_IRUGO}, |
1614 | [SEL_DENY_UNKNOWN] = {"deny_unknown", &sel_handle_unknown_ops, S_IRUGO}, | 1807 | [SEL_DENY_UNKNOWN] = {"deny_unknown", &sel_handle_unknown_ops, S_IRUGO}, |
1808 | [SEL_STATUS] = {"status", &sel_handle_status_ops, S_IRUGO}, | ||
1809 | [SEL_POLICY] = {"policy", &sel_policy_ops, S_IRUSR}, | ||
1615 | /* last one */ {""} | 1810 | /* last one */ {""} |
1616 | }; | 1811 | }; |
1617 | ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files); | 1812 | ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files); |
diff --git a/security/selinux/ss/Makefile b/security/selinux/ss/Makefile deleted file mode 100644 index 15d4e62917de..000000000000 --- a/security/selinux/ss/Makefile +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | # | ||
2 | # Makefile for building the SELinux security server as part of the kernel tree. | ||
3 | # | ||
4 | |||
5 | EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include | ||
6 | obj-y := ss.o | ||
7 | |||
8 | ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o | ||
9 | |||
diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index 929480c6c430..a3dd9faa19c0 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c | |||
@@ -266,8 +266,8 @@ int avtab_alloc(struct avtab *h, u32 nrules) | |||
266 | if (shift > 2) | 266 | if (shift > 2) |
267 | shift = shift - 2; | 267 | shift = shift - 2; |
268 | nslot = 1 << shift; | 268 | nslot = 1 << shift; |
269 | if (nslot > MAX_AVTAB_SIZE) | 269 | if (nslot > MAX_AVTAB_HASH_BUCKETS) |
270 | nslot = MAX_AVTAB_SIZE; | 270 | nslot = MAX_AVTAB_HASH_BUCKETS; |
271 | mask = nslot - 1; | 271 | mask = nslot - 1; |
272 | 272 | ||
273 | h->htable = kcalloc(nslot, sizeof(*(h->htable)), GFP_KERNEL); | 273 | h->htable = kcalloc(nslot, sizeof(*(h->htable)), GFP_KERNEL); |
@@ -501,6 +501,48 @@ bad: | |||
501 | goto out; | 501 | goto out; |
502 | } | 502 | } |
503 | 503 | ||
504 | int avtab_write_item(struct policydb *p, struct avtab_node *cur, void *fp) | ||
505 | { | ||
506 | __le16 buf16[4]; | ||
507 | __le32 buf32[1]; | ||
508 | int rc; | ||
509 | |||
510 | buf16[0] = cpu_to_le16(cur->key.source_type); | ||
511 | buf16[1] = cpu_to_le16(cur->key.target_type); | ||
512 | buf16[2] = cpu_to_le16(cur->key.target_class); | ||
513 | buf16[3] = cpu_to_le16(cur->key.specified); | ||
514 | rc = put_entry(buf16, sizeof(u16), 4, fp); | ||
515 | if (rc) | ||
516 | return rc; | ||
517 | buf32[0] = cpu_to_le32(cur->datum.data); | ||
518 | rc = put_entry(buf32, sizeof(u32), 1, fp); | ||
519 | if (rc) | ||
520 | return rc; | ||
521 | return 0; | ||
522 | } | ||
523 | |||
524 | int avtab_write(struct policydb *p, struct avtab *a, void *fp) | ||
525 | { | ||
526 | unsigned int i; | ||
527 | int rc = 0; | ||
528 | struct avtab_node *cur; | ||
529 | __le32 buf[1]; | ||
530 | |||
531 | buf[0] = cpu_to_le32(a->nel); | ||
532 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
533 | if (rc) | ||
534 | return rc; | ||
535 | |||
536 | for (i = 0; i < a->nslot; i++) { | ||
537 | for (cur = a->htable[i]; cur; cur = cur->next) { | ||
538 | rc = avtab_write_item(p, cur, fp); | ||
539 | if (rc) | ||
540 | return rc; | ||
541 | } | ||
542 | } | ||
543 | |||
544 | return rc; | ||
545 | } | ||
504 | void avtab_cache_init(void) | 546 | void avtab_cache_init(void) |
505 | { | 547 | { |
506 | avtab_node_cachep = kmem_cache_create("avtab_node", | 548 | avtab_node_cachep = kmem_cache_create("avtab_node", |
diff --git a/security/selinux/ss/avtab.h b/security/selinux/ss/avtab.h index cd4f734e2749..dff0c75345c1 100644 --- a/security/selinux/ss/avtab.h +++ b/security/selinux/ss/avtab.h | |||
@@ -71,6 +71,8 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol, | |||
71 | void *p); | 71 | void *p); |
72 | 72 | ||
73 | int avtab_read(struct avtab *a, void *fp, struct policydb *pol); | 73 | int avtab_read(struct avtab *a, void *fp, struct policydb *pol); |
74 | int avtab_write_item(struct policydb *p, struct avtab_node *cur, void *fp); | ||
75 | int avtab_write(struct policydb *p, struct avtab *a, void *fp); | ||
74 | 76 | ||
75 | struct avtab_node *avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, | 77 | struct avtab_node *avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, |
76 | struct avtab_datum *datum); | 78 | struct avtab_datum *datum); |
@@ -85,7 +87,6 @@ void avtab_cache_destroy(void); | |||
85 | #define MAX_AVTAB_HASH_BITS 11 | 87 | #define MAX_AVTAB_HASH_BITS 11 |
86 | #define MAX_AVTAB_HASH_BUCKETS (1 << MAX_AVTAB_HASH_BITS) | 88 | #define MAX_AVTAB_HASH_BUCKETS (1 << MAX_AVTAB_HASH_BITS) |
87 | #define MAX_AVTAB_HASH_MASK (MAX_AVTAB_HASH_BUCKETS-1) | 89 | #define MAX_AVTAB_HASH_MASK (MAX_AVTAB_HASH_BUCKETS-1) |
88 | #define MAX_AVTAB_SIZE MAX_AVTAB_HASH_BUCKETS | ||
89 | 90 | ||
90 | #endif /* _SS_AVTAB_H_ */ | 91 | #endif /* _SS_AVTAB_H_ */ |
91 | 92 | ||
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index c91e150c3087..655fe1c6cc69 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c | |||
@@ -490,6 +490,129 @@ err: | |||
490 | return rc; | 490 | return rc; |
491 | } | 491 | } |
492 | 492 | ||
493 | int cond_write_bool(void *vkey, void *datum, void *ptr) | ||
494 | { | ||
495 | char *key = vkey; | ||
496 | struct cond_bool_datum *booldatum = datum; | ||
497 | struct policy_data *pd = ptr; | ||
498 | void *fp = pd->fp; | ||
499 | __le32 buf[3]; | ||
500 | u32 len; | ||
501 | int rc; | ||
502 | |||
503 | len = strlen(key); | ||
504 | buf[0] = cpu_to_le32(booldatum->value); | ||
505 | buf[1] = cpu_to_le32(booldatum->state); | ||
506 | buf[2] = cpu_to_le32(len); | ||
507 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
508 | if (rc) | ||
509 | return rc; | ||
510 | rc = put_entry(key, 1, len, fp); | ||
511 | if (rc) | ||
512 | return rc; | ||
513 | return 0; | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * cond_write_cond_av_list doesn't write out the av_list nodes. | ||
518 | * Instead it writes out the key/value pairs from the avtab. This | ||
519 | * is necessary because there is no way to uniquely identifying rules | ||
520 | * in the avtab so it is not possible to associate individual rules | ||
521 | * in the avtab with a conditional without saving them as part of | ||
522 | * the conditional. This means that the avtab with the conditional | ||
523 | * rules will not be saved but will be rebuilt on policy load. | ||
524 | */ | ||
525 | static int cond_write_av_list(struct policydb *p, | ||
526 | struct cond_av_list *list, struct policy_file *fp) | ||
527 | { | ||
528 | __le32 buf[1]; | ||
529 | struct cond_av_list *cur_list; | ||
530 | u32 len; | ||
531 | int rc; | ||
532 | |||
533 | len = 0; | ||
534 | for (cur_list = list; cur_list != NULL; cur_list = cur_list->next) | ||
535 | len++; | ||
536 | |||
537 | buf[0] = cpu_to_le32(len); | ||
538 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
539 | if (rc) | ||
540 | return rc; | ||
541 | |||
542 | if (len == 0) | ||
543 | return 0; | ||
544 | |||
545 | for (cur_list = list; cur_list != NULL; cur_list = cur_list->next) { | ||
546 | rc = avtab_write_item(p, cur_list->node, fp); | ||
547 | if (rc) | ||
548 | return rc; | ||
549 | } | ||
550 | |||
551 | return 0; | ||
552 | } | ||
553 | |||
554 | int cond_write_node(struct policydb *p, struct cond_node *node, | ||
555 | struct policy_file *fp) | ||
556 | { | ||
557 | struct cond_expr *cur_expr; | ||
558 | __le32 buf[2]; | ||
559 | int rc; | ||
560 | u32 len = 0; | ||
561 | |||
562 | buf[0] = cpu_to_le32(node->cur_state); | ||
563 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
564 | if (rc) | ||
565 | return rc; | ||
566 | |||
567 | for (cur_expr = node->expr; cur_expr != NULL; cur_expr = cur_expr->next) | ||
568 | len++; | ||
569 | |||
570 | buf[0] = cpu_to_le32(len); | ||
571 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
572 | if (rc) | ||
573 | return rc; | ||
574 | |||
575 | for (cur_expr = node->expr; cur_expr != NULL; cur_expr = cur_expr->next) { | ||
576 | buf[0] = cpu_to_le32(cur_expr->expr_type); | ||
577 | buf[1] = cpu_to_le32(cur_expr->bool); | ||
578 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
579 | if (rc) | ||
580 | return rc; | ||
581 | } | ||
582 | |||
583 | rc = cond_write_av_list(p, node->true_list, fp); | ||
584 | if (rc) | ||
585 | return rc; | ||
586 | rc = cond_write_av_list(p, node->false_list, fp); | ||
587 | if (rc) | ||
588 | return rc; | ||
589 | |||
590 | return 0; | ||
591 | } | ||
592 | |||
593 | int cond_write_list(struct policydb *p, struct cond_node *list, void *fp) | ||
594 | { | ||
595 | struct cond_node *cur; | ||
596 | u32 len; | ||
597 | __le32 buf[1]; | ||
598 | int rc; | ||
599 | |||
600 | len = 0; | ||
601 | for (cur = list; cur != NULL; cur = cur->next) | ||
602 | len++; | ||
603 | buf[0] = cpu_to_le32(len); | ||
604 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
605 | if (rc) | ||
606 | return rc; | ||
607 | |||
608 | for (cur = list; cur != NULL; cur = cur->next) { | ||
609 | rc = cond_write_node(p, cur, fp); | ||
610 | if (rc) | ||
611 | return rc; | ||
612 | } | ||
613 | |||
614 | return 0; | ||
615 | } | ||
493 | /* Determine whether additional permissions are granted by the conditional | 616 | /* Determine whether additional permissions are granted by the conditional |
494 | * av table, and if so, add them to the result | 617 | * av table, and if so, add them to the result |
495 | */ | 618 | */ |
diff --git a/security/selinux/ss/conditional.h b/security/selinux/ss/conditional.h index 53ddb013ae57..3f209c635295 100644 --- a/security/selinux/ss/conditional.h +++ b/security/selinux/ss/conditional.h | |||
@@ -69,6 +69,8 @@ int cond_index_bool(void *key, void *datum, void *datap); | |||
69 | 69 | ||
70 | int cond_read_bool(struct policydb *p, struct hashtab *h, void *fp); | 70 | int cond_read_bool(struct policydb *p, struct hashtab *h, void *fp); |
71 | int cond_read_list(struct policydb *p, void *fp); | 71 | int cond_read_list(struct policydb *p, void *fp); |
72 | int cond_write_bool(void *key, void *datum, void *ptr); | ||
73 | int cond_write_list(struct policydb *p, struct cond_node *list, void *fp); | ||
72 | 74 | ||
73 | void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decision *avd); | 75 | void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decision *avd); |
74 | 76 | ||
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index 04b6145d767f..d42951fcbe87 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include "ebitmap.h" | 22 | #include "ebitmap.h" |
23 | #include "policydb.h" | 23 | #include "policydb.h" |
24 | 24 | ||
25 | #define BITS_PER_U64 (sizeof(u64) * 8) | ||
26 | |||
25 | int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2) | 27 | int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2) |
26 | { | 28 | { |
27 | struct ebitmap_node *n1, *n2; | 29 | struct ebitmap_node *n1, *n2; |
@@ -363,10 +365,10 @@ int ebitmap_read(struct ebitmap *e, void *fp) | |||
363 | e->highbit = le32_to_cpu(buf[1]); | 365 | e->highbit = le32_to_cpu(buf[1]); |
364 | count = le32_to_cpu(buf[2]); | 366 | count = le32_to_cpu(buf[2]); |
365 | 367 | ||
366 | if (mapunit != sizeof(u64) * 8) { | 368 | if (mapunit != BITS_PER_U64) { |
367 | printk(KERN_ERR "SELinux: ebitmap: map size %u does not " | 369 | printk(KERN_ERR "SELinux: ebitmap: map size %u does not " |
368 | "match my size %Zd (high bit was %d)\n", | 370 | "match my size %Zd (high bit was %d)\n", |
369 | mapunit, sizeof(u64) * 8, e->highbit); | 371 | mapunit, BITS_PER_U64, e->highbit); |
370 | goto bad; | 372 | goto bad; |
371 | } | 373 | } |
372 | 374 | ||
@@ -446,3 +448,78 @@ bad: | |||
446 | ebitmap_destroy(e); | 448 | ebitmap_destroy(e); |
447 | goto out; | 449 | goto out; |
448 | } | 450 | } |
451 | |||
452 | int ebitmap_write(struct ebitmap *e, void *fp) | ||
453 | { | ||
454 | struct ebitmap_node *n; | ||
455 | u32 count; | ||
456 | __le32 buf[3]; | ||
457 | u64 map; | ||
458 | int bit, last_bit, last_startbit, rc; | ||
459 | |||
460 | buf[0] = cpu_to_le32(BITS_PER_U64); | ||
461 | |||
462 | count = 0; | ||
463 | last_bit = 0; | ||
464 | last_startbit = -1; | ||
465 | ebitmap_for_each_positive_bit(e, n, bit) { | ||
466 | if (rounddown(bit, (int)BITS_PER_U64) > last_startbit) { | ||
467 | count++; | ||
468 | last_startbit = rounddown(bit, BITS_PER_U64); | ||
469 | } | ||
470 | last_bit = roundup(bit + 1, BITS_PER_U64); | ||
471 | } | ||
472 | buf[1] = cpu_to_le32(last_bit); | ||
473 | buf[2] = cpu_to_le32(count); | ||
474 | |||
475 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
476 | if (rc) | ||
477 | return rc; | ||
478 | |||
479 | map = 0; | ||
480 | last_startbit = INT_MIN; | ||
481 | ebitmap_for_each_positive_bit(e, n, bit) { | ||
482 | if (rounddown(bit, (int)BITS_PER_U64) > last_startbit) { | ||
483 | __le64 buf64[1]; | ||
484 | |||
485 | /* this is the very first bit */ | ||
486 | if (!map) { | ||
487 | last_startbit = rounddown(bit, BITS_PER_U64); | ||
488 | map = (u64)1 << (bit - last_startbit); | ||
489 | continue; | ||
490 | } | ||
491 | |||
492 | /* write the last node */ | ||
493 | buf[0] = cpu_to_le32(last_startbit); | ||
494 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
495 | if (rc) | ||
496 | return rc; | ||
497 | |||
498 | buf64[0] = cpu_to_le64(map); | ||
499 | rc = put_entry(buf64, sizeof(u64), 1, fp); | ||
500 | if (rc) | ||
501 | return rc; | ||
502 | |||
503 | /* set up for the next node */ | ||
504 | map = 0; | ||
505 | last_startbit = rounddown(bit, BITS_PER_U64); | ||
506 | } | ||
507 | map |= (u64)1 << (bit - last_startbit); | ||
508 | } | ||
509 | /* write the last node */ | ||
510 | if (map) { | ||
511 | __le64 buf64[1]; | ||
512 | |||
513 | /* write the last node */ | ||
514 | buf[0] = cpu_to_le32(last_startbit); | ||
515 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
516 | if (rc) | ||
517 | return rc; | ||
518 | |||
519 | buf64[0] = cpu_to_le64(map); | ||
520 | rc = put_entry(buf64, sizeof(u64), 1, fp); | ||
521 | if (rc) | ||
522 | return rc; | ||
523 | } | ||
524 | return 0; | ||
525 | } | ||
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index f283b4367f54..1f4e93c2ae86 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h | |||
@@ -123,6 +123,7 @@ int ebitmap_get_bit(struct ebitmap *e, unsigned long bit); | |||
123 | int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value); | 123 | int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value); |
124 | void ebitmap_destroy(struct ebitmap *e); | 124 | void ebitmap_destroy(struct ebitmap *e); |
125 | int ebitmap_read(struct ebitmap *e, void *fp); | 125 | int ebitmap_read(struct ebitmap *e, void *fp); |
126 | int ebitmap_write(struct ebitmap *e, void *fp); | ||
126 | 127 | ||
127 | #ifdef CONFIG_NETLABEL | 128 | #ifdef CONFIG_NETLABEL |
128 | int ebitmap_netlbl_export(struct ebitmap *ebmap, | 129 | int ebitmap_netlbl_export(struct ebitmap *ebmap, |
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 3a29704be8ce..94f630d93a5c 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include "policydb.h" | 37 | #include "policydb.h" |
38 | #include "conditional.h" | 38 | #include "conditional.h" |
39 | #include "mls.h" | 39 | #include "mls.h" |
40 | #include "services.h" | ||
40 | 41 | ||
41 | #define _DEBUG_HASHES | 42 | #define _DEBUG_HASHES |
42 | 43 | ||
@@ -185,9 +186,19 @@ static u32 rangetr_hash(struct hashtab *h, const void *k) | |||
185 | static int rangetr_cmp(struct hashtab *h, const void *k1, const void *k2) | 186 | static int rangetr_cmp(struct hashtab *h, const void *k1, const void *k2) |
186 | { | 187 | { |
187 | const struct range_trans *key1 = k1, *key2 = k2; | 188 | const struct range_trans *key1 = k1, *key2 = k2; |
188 | return (key1->source_type != key2->source_type || | 189 | int v; |
189 | key1->target_type != key2->target_type || | 190 | |
190 | key1->target_class != key2->target_class); | 191 | v = key1->source_type - key2->source_type; |
192 | if (v) | ||
193 | return v; | ||
194 | |||
195 | v = key1->target_type - key2->target_type; | ||
196 | if (v) | ||
197 | return v; | ||
198 | |||
199 | v = key1->target_class - key2->target_class; | ||
200 | |||
201 | return v; | ||
191 | } | 202 | } |
192 | 203 | ||
193 | /* | 204 | /* |
@@ -1624,11 +1635,11 @@ static int role_bounds_sanity_check(void *key, void *datum, void *datap) | |||
1624 | 1635 | ||
1625 | static int type_bounds_sanity_check(void *key, void *datum, void *datap) | 1636 | static int type_bounds_sanity_check(void *key, void *datum, void *datap) |
1626 | { | 1637 | { |
1627 | struct type_datum *upper, *type; | 1638 | struct type_datum *upper; |
1628 | struct policydb *p = datap; | 1639 | struct policydb *p = datap; |
1629 | int depth = 0; | 1640 | int depth = 0; |
1630 | 1641 | ||
1631 | upper = type = datum; | 1642 | upper = datum; |
1632 | while (upper->bounds) { | 1643 | while (upper->bounds) { |
1633 | if (++depth == POLICYDB_BOUNDS_MAXDEPTH) { | 1644 | if (++depth == POLICYDB_BOUNDS_MAXDEPTH) { |
1634 | printk(KERN_ERR "SELinux: type %s: " | 1645 | printk(KERN_ERR "SELinux: type %s: " |
@@ -2306,3 +2317,843 @@ bad: | |||
2306 | policydb_destroy(p); | 2317 | policydb_destroy(p); |
2307 | goto out; | 2318 | goto out; |
2308 | } | 2319 | } |
2320 | |||
2321 | /* | ||
2322 | * Write a MLS level structure to a policydb binary | ||
2323 | * representation file. | ||
2324 | */ | ||
2325 | static int mls_write_level(struct mls_level *l, void *fp) | ||
2326 | { | ||
2327 | __le32 buf[1]; | ||
2328 | int rc; | ||
2329 | |||
2330 | buf[0] = cpu_to_le32(l->sens); | ||
2331 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2332 | if (rc) | ||
2333 | return rc; | ||
2334 | |||
2335 | rc = ebitmap_write(&l->cat, fp); | ||
2336 | if (rc) | ||
2337 | return rc; | ||
2338 | |||
2339 | return 0; | ||
2340 | } | ||
2341 | |||
2342 | /* | ||
2343 | * Write a MLS range structure to a policydb binary | ||
2344 | * representation file. | ||
2345 | */ | ||
2346 | static int mls_write_range_helper(struct mls_range *r, void *fp) | ||
2347 | { | ||
2348 | __le32 buf[3]; | ||
2349 | size_t items; | ||
2350 | int rc, eq; | ||
2351 | |||
2352 | eq = mls_level_eq(&r->level[1], &r->level[0]); | ||
2353 | |||
2354 | if (eq) | ||
2355 | items = 2; | ||
2356 | else | ||
2357 | items = 3; | ||
2358 | buf[0] = cpu_to_le32(items-1); | ||
2359 | buf[1] = cpu_to_le32(r->level[0].sens); | ||
2360 | if (!eq) | ||
2361 | buf[2] = cpu_to_le32(r->level[1].sens); | ||
2362 | |||
2363 | BUG_ON(items > (sizeof(buf)/sizeof(buf[0]))); | ||
2364 | |||
2365 | rc = put_entry(buf, sizeof(u32), items, fp); | ||
2366 | if (rc) | ||
2367 | return rc; | ||
2368 | |||
2369 | rc = ebitmap_write(&r->level[0].cat, fp); | ||
2370 | if (rc) | ||
2371 | return rc; | ||
2372 | if (!eq) { | ||
2373 | rc = ebitmap_write(&r->level[1].cat, fp); | ||
2374 | if (rc) | ||
2375 | return rc; | ||
2376 | } | ||
2377 | |||
2378 | return 0; | ||
2379 | } | ||
2380 | |||
2381 | static int sens_write(void *vkey, void *datum, void *ptr) | ||
2382 | { | ||
2383 | char *key = vkey; | ||
2384 | struct level_datum *levdatum = datum; | ||
2385 | struct policy_data *pd = ptr; | ||
2386 | void *fp = pd->fp; | ||
2387 | __le32 buf[2]; | ||
2388 | size_t len; | ||
2389 | int rc; | ||
2390 | |||
2391 | len = strlen(key); | ||
2392 | buf[0] = cpu_to_le32(len); | ||
2393 | buf[1] = cpu_to_le32(levdatum->isalias); | ||
2394 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
2395 | if (rc) | ||
2396 | return rc; | ||
2397 | |||
2398 | rc = put_entry(key, 1, len, fp); | ||
2399 | if (rc) | ||
2400 | return rc; | ||
2401 | |||
2402 | rc = mls_write_level(levdatum->level, fp); | ||
2403 | if (rc) | ||
2404 | return rc; | ||
2405 | |||
2406 | return 0; | ||
2407 | } | ||
2408 | |||
2409 | static int cat_write(void *vkey, void *datum, void *ptr) | ||
2410 | { | ||
2411 | char *key = vkey; | ||
2412 | struct cat_datum *catdatum = datum; | ||
2413 | struct policy_data *pd = ptr; | ||
2414 | void *fp = pd->fp; | ||
2415 | __le32 buf[3]; | ||
2416 | size_t len; | ||
2417 | int rc; | ||
2418 | |||
2419 | len = strlen(key); | ||
2420 | buf[0] = cpu_to_le32(len); | ||
2421 | buf[1] = cpu_to_le32(catdatum->value); | ||
2422 | buf[2] = cpu_to_le32(catdatum->isalias); | ||
2423 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
2424 | if (rc) | ||
2425 | return rc; | ||
2426 | |||
2427 | rc = put_entry(key, 1, len, fp); | ||
2428 | if (rc) | ||
2429 | return rc; | ||
2430 | |||
2431 | return 0; | ||
2432 | } | ||
2433 | |||
2434 | static int role_trans_write(struct role_trans *r, void *fp) | ||
2435 | { | ||
2436 | struct role_trans *tr; | ||
2437 | u32 buf[3]; | ||
2438 | size_t nel; | ||
2439 | int rc; | ||
2440 | |||
2441 | nel = 0; | ||
2442 | for (tr = r; tr; tr = tr->next) | ||
2443 | nel++; | ||
2444 | buf[0] = cpu_to_le32(nel); | ||
2445 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2446 | if (rc) | ||
2447 | return rc; | ||
2448 | for (tr = r; tr; tr = tr->next) { | ||
2449 | buf[0] = cpu_to_le32(tr->role); | ||
2450 | buf[1] = cpu_to_le32(tr->type); | ||
2451 | buf[2] = cpu_to_le32(tr->new_role); | ||
2452 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
2453 | if (rc) | ||
2454 | return rc; | ||
2455 | } | ||
2456 | |||
2457 | return 0; | ||
2458 | } | ||
2459 | |||
2460 | static int role_allow_write(struct role_allow *r, void *fp) | ||
2461 | { | ||
2462 | struct role_allow *ra; | ||
2463 | u32 buf[2]; | ||
2464 | size_t nel; | ||
2465 | int rc; | ||
2466 | |||
2467 | nel = 0; | ||
2468 | for (ra = r; ra; ra = ra->next) | ||
2469 | nel++; | ||
2470 | buf[0] = cpu_to_le32(nel); | ||
2471 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2472 | if (rc) | ||
2473 | return rc; | ||
2474 | for (ra = r; ra; ra = ra->next) { | ||
2475 | buf[0] = cpu_to_le32(ra->role); | ||
2476 | buf[1] = cpu_to_le32(ra->new_role); | ||
2477 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
2478 | if (rc) | ||
2479 | return rc; | ||
2480 | } | ||
2481 | return 0; | ||
2482 | } | ||
2483 | |||
2484 | /* | ||
2485 | * Write a security context structure | ||
2486 | * to a policydb binary representation file. | ||
2487 | */ | ||
2488 | static int context_write(struct policydb *p, struct context *c, | ||
2489 | void *fp) | ||
2490 | { | ||
2491 | int rc; | ||
2492 | __le32 buf[3]; | ||
2493 | |||
2494 | buf[0] = cpu_to_le32(c->user); | ||
2495 | buf[1] = cpu_to_le32(c->role); | ||
2496 | buf[2] = cpu_to_le32(c->type); | ||
2497 | |||
2498 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
2499 | if (rc) | ||
2500 | return rc; | ||
2501 | |||
2502 | rc = mls_write_range_helper(&c->range, fp); | ||
2503 | if (rc) | ||
2504 | return rc; | ||
2505 | |||
2506 | return 0; | ||
2507 | } | ||
2508 | |||
2509 | /* | ||
2510 | * The following *_write functions are used to | ||
2511 | * write the symbol data to a policy database | ||
2512 | * binary representation file. | ||
2513 | */ | ||
2514 | |||
2515 | static int perm_write(void *vkey, void *datum, void *fp) | ||
2516 | { | ||
2517 | char *key = vkey; | ||
2518 | struct perm_datum *perdatum = datum; | ||
2519 | __le32 buf[2]; | ||
2520 | size_t len; | ||
2521 | int rc; | ||
2522 | |||
2523 | len = strlen(key); | ||
2524 | buf[0] = cpu_to_le32(len); | ||
2525 | buf[1] = cpu_to_le32(perdatum->value); | ||
2526 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
2527 | if (rc) | ||
2528 | return rc; | ||
2529 | |||
2530 | rc = put_entry(key, 1, len, fp); | ||
2531 | if (rc) | ||
2532 | return rc; | ||
2533 | |||
2534 | return 0; | ||
2535 | } | ||
2536 | |||
2537 | static int common_write(void *vkey, void *datum, void *ptr) | ||
2538 | { | ||
2539 | char *key = vkey; | ||
2540 | struct common_datum *comdatum = datum; | ||
2541 | struct policy_data *pd = ptr; | ||
2542 | void *fp = pd->fp; | ||
2543 | __le32 buf[4]; | ||
2544 | size_t len; | ||
2545 | int rc; | ||
2546 | |||
2547 | len = strlen(key); | ||
2548 | buf[0] = cpu_to_le32(len); | ||
2549 | buf[1] = cpu_to_le32(comdatum->value); | ||
2550 | buf[2] = cpu_to_le32(comdatum->permissions.nprim); | ||
2551 | buf[3] = cpu_to_le32(comdatum->permissions.table->nel); | ||
2552 | rc = put_entry(buf, sizeof(u32), 4, fp); | ||
2553 | if (rc) | ||
2554 | return rc; | ||
2555 | |||
2556 | rc = put_entry(key, 1, len, fp); | ||
2557 | if (rc) | ||
2558 | return rc; | ||
2559 | |||
2560 | rc = hashtab_map(comdatum->permissions.table, perm_write, fp); | ||
2561 | if (rc) | ||
2562 | return rc; | ||
2563 | |||
2564 | return 0; | ||
2565 | } | ||
2566 | |||
2567 | static int write_cons_helper(struct policydb *p, struct constraint_node *node, | ||
2568 | void *fp) | ||
2569 | { | ||
2570 | struct constraint_node *c; | ||
2571 | struct constraint_expr *e; | ||
2572 | __le32 buf[3]; | ||
2573 | u32 nel; | ||
2574 | int rc; | ||
2575 | |||
2576 | for (c = node; c; c = c->next) { | ||
2577 | nel = 0; | ||
2578 | for (e = c->expr; e; e = e->next) | ||
2579 | nel++; | ||
2580 | buf[0] = cpu_to_le32(c->permissions); | ||
2581 | buf[1] = cpu_to_le32(nel); | ||
2582 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
2583 | if (rc) | ||
2584 | return rc; | ||
2585 | for (e = c->expr; e; e = e->next) { | ||
2586 | buf[0] = cpu_to_le32(e->expr_type); | ||
2587 | buf[1] = cpu_to_le32(e->attr); | ||
2588 | buf[2] = cpu_to_le32(e->op); | ||
2589 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
2590 | if (rc) | ||
2591 | return rc; | ||
2592 | |||
2593 | switch (e->expr_type) { | ||
2594 | case CEXPR_NAMES: | ||
2595 | rc = ebitmap_write(&e->names, fp); | ||
2596 | if (rc) | ||
2597 | return rc; | ||
2598 | break; | ||
2599 | default: | ||
2600 | break; | ||
2601 | } | ||
2602 | } | ||
2603 | } | ||
2604 | |||
2605 | return 0; | ||
2606 | } | ||
2607 | |||
2608 | static int class_write(void *vkey, void *datum, void *ptr) | ||
2609 | { | ||
2610 | char *key = vkey; | ||
2611 | struct class_datum *cladatum = datum; | ||
2612 | struct policy_data *pd = ptr; | ||
2613 | void *fp = pd->fp; | ||
2614 | struct policydb *p = pd->p; | ||
2615 | struct constraint_node *c; | ||
2616 | __le32 buf[6]; | ||
2617 | u32 ncons; | ||
2618 | size_t len, len2; | ||
2619 | int rc; | ||
2620 | |||
2621 | len = strlen(key); | ||
2622 | if (cladatum->comkey) | ||
2623 | len2 = strlen(cladatum->comkey); | ||
2624 | else | ||
2625 | len2 = 0; | ||
2626 | |||
2627 | ncons = 0; | ||
2628 | for (c = cladatum->constraints; c; c = c->next) | ||
2629 | ncons++; | ||
2630 | |||
2631 | buf[0] = cpu_to_le32(len); | ||
2632 | buf[1] = cpu_to_le32(len2); | ||
2633 | buf[2] = cpu_to_le32(cladatum->value); | ||
2634 | buf[3] = cpu_to_le32(cladatum->permissions.nprim); | ||
2635 | if (cladatum->permissions.table) | ||
2636 | buf[4] = cpu_to_le32(cladatum->permissions.table->nel); | ||
2637 | else | ||
2638 | buf[4] = 0; | ||
2639 | buf[5] = cpu_to_le32(ncons); | ||
2640 | rc = put_entry(buf, sizeof(u32), 6, fp); | ||
2641 | if (rc) | ||
2642 | return rc; | ||
2643 | |||
2644 | rc = put_entry(key, 1, len, fp); | ||
2645 | if (rc) | ||
2646 | return rc; | ||
2647 | |||
2648 | if (cladatum->comkey) { | ||
2649 | rc = put_entry(cladatum->comkey, 1, len2, fp); | ||
2650 | if (rc) | ||
2651 | return rc; | ||
2652 | } | ||
2653 | |||
2654 | rc = hashtab_map(cladatum->permissions.table, perm_write, fp); | ||
2655 | if (rc) | ||
2656 | return rc; | ||
2657 | |||
2658 | rc = write_cons_helper(p, cladatum->constraints, fp); | ||
2659 | if (rc) | ||
2660 | return rc; | ||
2661 | |||
2662 | /* write out the validatetrans rule */ | ||
2663 | ncons = 0; | ||
2664 | for (c = cladatum->validatetrans; c; c = c->next) | ||
2665 | ncons++; | ||
2666 | |||
2667 | buf[0] = cpu_to_le32(ncons); | ||
2668 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2669 | if (rc) | ||
2670 | return rc; | ||
2671 | |||
2672 | rc = write_cons_helper(p, cladatum->validatetrans, fp); | ||
2673 | if (rc) | ||
2674 | return rc; | ||
2675 | |||
2676 | return 0; | ||
2677 | } | ||
2678 | |||
2679 | static int role_write(void *vkey, void *datum, void *ptr) | ||
2680 | { | ||
2681 | char *key = vkey; | ||
2682 | struct role_datum *role = datum; | ||
2683 | struct policy_data *pd = ptr; | ||
2684 | void *fp = pd->fp; | ||
2685 | struct policydb *p = pd->p; | ||
2686 | __le32 buf[3]; | ||
2687 | size_t items, len; | ||
2688 | int rc; | ||
2689 | |||
2690 | len = strlen(key); | ||
2691 | items = 0; | ||
2692 | buf[items++] = cpu_to_le32(len); | ||
2693 | buf[items++] = cpu_to_le32(role->value); | ||
2694 | if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) | ||
2695 | buf[items++] = cpu_to_le32(role->bounds); | ||
2696 | |||
2697 | BUG_ON(items > (sizeof(buf)/sizeof(buf[0]))); | ||
2698 | |||
2699 | rc = put_entry(buf, sizeof(u32), items, fp); | ||
2700 | if (rc) | ||
2701 | return rc; | ||
2702 | |||
2703 | rc = put_entry(key, 1, len, fp); | ||
2704 | if (rc) | ||
2705 | return rc; | ||
2706 | |||
2707 | rc = ebitmap_write(&role->dominates, fp); | ||
2708 | if (rc) | ||
2709 | return rc; | ||
2710 | |||
2711 | rc = ebitmap_write(&role->types, fp); | ||
2712 | if (rc) | ||
2713 | return rc; | ||
2714 | |||
2715 | return 0; | ||
2716 | } | ||
2717 | |||
2718 | static int type_write(void *vkey, void *datum, void *ptr) | ||
2719 | { | ||
2720 | char *key = vkey; | ||
2721 | struct type_datum *typdatum = datum; | ||
2722 | struct policy_data *pd = ptr; | ||
2723 | struct policydb *p = pd->p; | ||
2724 | void *fp = pd->fp; | ||
2725 | __le32 buf[4]; | ||
2726 | int rc; | ||
2727 | size_t items, len; | ||
2728 | |||
2729 | len = strlen(key); | ||
2730 | items = 0; | ||
2731 | buf[items++] = cpu_to_le32(len); | ||
2732 | buf[items++] = cpu_to_le32(typdatum->value); | ||
2733 | if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) { | ||
2734 | u32 properties = 0; | ||
2735 | |||
2736 | if (typdatum->primary) | ||
2737 | properties |= TYPEDATUM_PROPERTY_PRIMARY; | ||
2738 | |||
2739 | if (typdatum->attribute) | ||
2740 | properties |= TYPEDATUM_PROPERTY_ATTRIBUTE; | ||
2741 | |||
2742 | buf[items++] = cpu_to_le32(properties); | ||
2743 | buf[items++] = cpu_to_le32(typdatum->bounds); | ||
2744 | } else { | ||
2745 | buf[items++] = cpu_to_le32(typdatum->primary); | ||
2746 | } | ||
2747 | BUG_ON(items > (sizeof(buf) / sizeof(buf[0]))); | ||
2748 | rc = put_entry(buf, sizeof(u32), items, fp); | ||
2749 | if (rc) | ||
2750 | return rc; | ||
2751 | |||
2752 | rc = put_entry(key, 1, len, fp); | ||
2753 | if (rc) | ||
2754 | return rc; | ||
2755 | |||
2756 | return 0; | ||
2757 | } | ||
2758 | |||
2759 | static int user_write(void *vkey, void *datum, void *ptr) | ||
2760 | { | ||
2761 | char *key = vkey; | ||
2762 | struct user_datum *usrdatum = datum; | ||
2763 | struct policy_data *pd = ptr; | ||
2764 | struct policydb *p = pd->p; | ||
2765 | void *fp = pd->fp; | ||
2766 | __le32 buf[3]; | ||
2767 | size_t items, len; | ||
2768 | int rc; | ||
2769 | |||
2770 | len = strlen(key); | ||
2771 | items = 0; | ||
2772 | buf[items++] = cpu_to_le32(len); | ||
2773 | buf[items++] = cpu_to_le32(usrdatum->value); | ||
2774 | if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) | ||
2775 | buf[items++] = cpu_to_le32(usrdatum->bounds); | ||
2776 | BUG_ON(items > (sizeof(buf) / sizeof(buf[0]))); | ||
2777 | rc = put_entry(buf, sizeof(u32), items, fp); | ||
2778 | if (rc) | ||
2779 | return rc; | ||
2780 | |||
2781 | rc = put_entry(key, 1, len, fp); | ||
2782 | if (rc) | ||
2783 | return rc; | ||
2784 | |||
2785 | rc = ebitmap_write(&usrdatum->roles, fp); | ||
2786 | if (rc) | ||
2787 | return rc; | ||
2788 | |||
2789 | rc = mls_write_range_helper(&usrdatum->range, fp); | ||
2790 | if (rc) | ||
2791 | return rc; | ||
2792 | |||
2793 | rc = mls_write_level(&usrdatum->dfltlevel, fp); | ||
2794 | if (rc) | ||
2795 | return rc; | ||
2796 | |||
2797 | return 0; | ||
2798 | } | ||
2799 | |||
2800 | static int (*write_f[SYM_NUM]) (void *key, void *datum, | ||
2801 | void *datap) = | ||
2802 | { | ||
2803 | common_write, | ||
2804 | class_write, | ||
2805 | role_write, | ||
2806 | type_write, | ||
2807 | user_write, | ||
2808 | cond_write_bool, | ||
2809 | sens_write, | ||
2810 | cat_write, | ||
2811 | }; | ||
2812 | |||
2813 | static int ocontext_write(struct policydb *p, struct policydb_compat_info *info, | ||
2814 | void *fp) | ||
2815 | { | ||
2816 | unsigned int i, j, rc; | ||
2817 | size_t nel, len; | ||
2818 | __le32 buf[3]; | ||
2819 | u32 nodebuf[8]; | ||
2820 | struct ocontext *c; | ||
2821 | for (i = 0; i < info->ocon_num; i++) { | ||
2822 | nel = 0; | ||
2823 | for (c = p->ocontexts[i]; c; c = c->next) | ||
2824 | nel++; | ||
2825 | buf[0] = cpu_to_le32(nel); | ||
2826 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2827 | if (rc) | ||
2828 | return rc; | ||
2829 | for (c = p->ocontexts[i]; c; c = c->next) { | ||
2830 | switch (i) { | ||
2831 | case OCON_ISID: | ||
2832 | buf[0] = cpu_to_le32(c->sid[0]); | ||
2833 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2834 | if (rc) | ||
2835 | return rc; | ||
2836 | rc = context_write(p, &c->context[0], fp); | ||
2837 | if (rc) | ||
2838 | return rc; | ||
2839 | break; | ||
2840 | case OCON_FS: | ||
2841 | case OCON_NETIF: | ||
2842 | len = strlen(c->u.name); | ||
2843 | buf[0] = cpu_to_le32(len); | ||
2844 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2845 | if (rc) | ||
2846 | return rc; | ||
2847 | rc = put_entry(c->u.name, 1, len, fp); | ||
2848 | if (rc) | ||
2849 | return rc; | ||
2850 | rc = context_write(p, &c->context[0], fp); | ||
2851 | if (rc) | ||
2852 | return rc; | ||
2853 | rc = context_write(p, &c->context[1], fp); | ||
2854 | if (rc) | ||
2855 | return rc; | ||
2856 | break; | ||
2857 | case OCON_PORT: | ||
2858 | buf[0] = cpu_to_le32(c->u.port.protocol); | ||
2859 | buf[1] = cpu_to_le32(c->u.port.low_port); | ||
2860 | buf[2] = cpu_to_le32(c->u.port.high_port); | ||
2861 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
2862 | if (rc) | ||
2863 | return rc; | ||
2864 | rc = context_write(p, &c->context[0], fp); | ||
2865 | if (rc) | ||
2866 | return rc; | ||
2867 | break; | ||
2868 | case OCON_NODE: | ||
2869 | nodebuf[0] = c->u.node.addr; /* network order */ | ||
2870 | nodebuf[1] = c->u.node.mask; /* network order */ | ||
2871 | rc = put_entry(nodebuf, sizeof(u32), 2, fp); | ||
2872 | if (rc) | ||
2873 | return rc; | ||
2874 | rc = context_write(p, &c->context[0], fp); | ||
2875 | if (rc) | ||
2876 | return rc; | ||
2877 | break; | ||
2878 | case OCON_FSUSE: | ||
2879 | buf[0] = cpu_to_le32(c->v.behavior); | ||
2880 | len = strlen(c->u.name); | ||
2881 | buf[1] = cpu_to_le32(len); | ||
2882 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
2883 | if (rc) | ||
2884 | return rc; | ||
2885 | rc = put_entry(c->u.name, 1, len, fp); | ||
2886 | if (rc) | ||
2887 | return rc; | ||
2888 | rc = context_write(p, &c->context[0], fp); | ||
2889 | if (rc) | ||
2890 | return rc; | ||
2891 | break; | ||
2892 | case OCON_NODE6: | ||
2893 | for (j = 0; j < 4; j++) | ||
2894 | nodebuf[j] = c->u.node6.addr[j]; /* network order */ | ||
2895 | for (j = 0; j < 4; j++) | ||
2896 | nodebuf[j + 4] = c->u.node6.mask[j]; /* network order */ | ||
2897 | rc = put_entry(nodebuf, sizeof(u32), 8, fp); | ||
2898 | if (rc) | ||
2899 | return rc; | ||
2900 | rc = context_write(p, &c->context[0], fp); | ||
2901 | if (rc) | ||
2902 | return rc; | ||
2903 | break; | ||
2904 | } | ||
2905 | } | ||
2906 | } | ||
2907 | return 0; | ||
2908 | } | ||
2909 | |||
2910 | static int genfs_write(struct policydb *p, void *fp) | ||
2911 | { | ||
2912 | struct genfs *genfs; | ||
2913 | struct ocontext *c; | ||
2914 | size_t len; | ||
2915 | __le32 buf[1]; | ||
2916 | int rc; | ||
2917 | |||
2918 | len = 0; | ||
2919 | for (genfs = p->genfs; genfs; genfs = genfs->next) | ||
2920 | len++; | ||
2921 | buf[0] = cpu_to_le32(len); | ||
2922 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2923 | if (rc) | ||
2924 | return rc; | ||
2925 | for (genfs = p->genfs; genfs; genfs = genfs->next) { | ||
2926 | len = strlen(genfs->fstype); | ||
2927 | buf[0] = cpu_to_le32(len); | ||
2928 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2929 | if (rc) | ||
2930 | return rc; | ||
2931 | rc = put_entry(genfs->fstype, 1, len, fp); | ||
2932 | if (rc) | ||
2933 | return rc; | ||
2934 | len = 0; | ||
2935 | for (c = genfs->head; c; c = c->next) | ||
2936 | len++; | ||
2937 | buf[0] = cpu_to_le32(len); | ||
2938 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2939 | if (rc) | ||
2940 | return rc; | ||
2941 | for (c = genfs->head; c; c = c->next) { | ||
2942 | len = strlen(c->u.name); | ||
2943 | buf[0] = cpu_to_le32(len); | ||
2944 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2945 | if (rc) | ||
2946 | return rc; | ||
2947 | rc = put_entry(c->u.name, 1, len, fp); | ||
2948 | if (rc) | ||
2949 | return rc; | ||
2950 | buf[0] = cpu_to_le32(c->v.sclass); | ||
2951 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2952 | if (rc) | ||
2953 | return rc; | ||
2954 | rc = context_write(p, &c->context[0], fp); | ||
2955 | if (rc) | ||
2956 | return rc; | ||
2957 | } | ||
2958 | } | ||
2959 | return 0; | ||
2960 | } | ||
2961 | |||
2962 | static int range_count(void *key, void *data, void *ptr) | ||
2963 | { | ||
2964 | int *cnt = ptr; | ||
2965 | *cnt = *cnt + 1; | ||
2966 | |||
2967 | return 0; | ||
2968 | } | ||
2969 | |||
2970 | static int range_write_helper(void *key, void *data, void *ptr) | ||
2971 | { | ||
2972 | __le32 buf[2]; | ||
2973 | struct range_trans *rt = key; | ||
2974 | struct mls_range *r = data; | ||
2975 | struct policy_data *pd = ptr; | ||
2976 | void *fp = pd->fp; | ||
2977 | struct policydb *p = pd->p; | ||
2978 | int rc; | ||
2979 | |||
2980 | buf[0] = cpu_to_le32(rt->source_type); | ||
2981 | buf[1] = cpu_to_le32(rt->target_type); | ||
2982 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
2983 | if (rc) | ||
2984 | return rc; | ||
2985 | if (p->policyvers >= POLICYDB_VERSION_RANGETRANS) { | ||
2986 | buf[0] = cpu_to_le32(rt->target_class); | ||
2987 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
2988 | if (rc) | ||
2989 | return rc; | ||
2990 | } | ||
2991 | rc = mls_write_range_helper(r, fp); | ||
2992 | if (rc) | ||
2993 | return rc; | ||
2994 | |||
2995 | return 0; | ||
2996 | } | ||
2997 | |||
2998 | static int range_write(struct policydb *p, void *fp) | ||
2999 | { | ||
3000 | size_t nel; | ||
3001 | __le32 buf[1]; | ||
3002 | int rc; | ||
3003 | struct policy_data pd; | ||
3004 | |||
3005 | pd.p = p; | ||
3006 | pd.fp = fp; | ||
3007 | |||
3008 | /* count the number of entries in the hashtab */ | ||
3009 | nel = 0; | ||
3010 | rc = hashtab_map(p->range_tr, range_count, &nel); | ||
3011 | if (rc) | ||
3012 | return rc; | ||
3013 | |||
3014 | buf[0] = cpu_to_le32(nel); | ||
3015 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
3016 | if (rc) | ||
3017 | return rc; | ||
3018 | |||
3019 | /* actually write all of the entries */ | ||
3020 | rc = hashtab_map(p->range_tr, range_write_helper, &pd); | ||
3021 | if (rc) | ||
3022 | return rc; | ||
3023 | |||
3024 | return 0; | ||
3025 | } | ||
3026 | |||
3027 | /* | ||
3028 | * Write the configuration data in a policy database | ||
3029 | * structure to a policy database binary representation | ||
3030 | * file. | ||
3031 | */ | ||
3032 | int policydb_write(struct policydb *p, void *fp) | ||
3033 | { | ||
3034 | unsigned int i, num_syms; | ||
3035 | int rc; | ||
3036 | __le32 buf[4]; | ||
3037 | u32 config; | ||
3038 | size_t len; | ||
3039 | struct policydb_compat_info *info; | ||
3040 | |||
3041 | /* | ||
3042 | * refuse to write policy older than compressed avtab | ||
3043 | * to simplify the writer. There are other tests dropped | ||
3044 | * since we assume this throughout the writer code. Be | ||
3045 | * careful if you ever try to remove this restriction | ||
3046 | */ | ||
3047 | if (p->policyvers < POLICYDB_VERSION_AVTAB) { | ||
3048 | printk(KERN_ERR "SELinux: refusing to write policy version %d." | ||
3049 | " Because it is less than version %d\n", p->policyvers, | ||
3050 | POLICYDB_VERSION_AVTAB); | ||
3051 | return -EINVAL; | ||
3052 | } | ||
3053 | |||
3054 | config = 0; | ||
3055 | if (p->mls_enabled) | ||
3056 | config |= POLICYDB_CONFIG_MLS; | ||
3057 | |||
3058 | if (p->reject_unknown) | ||
3059 | config |= REJECT_UNKNOWN; | ||
3060 | if (p->allow_unknown) | ||
3061 | config |= ALLOW_UNKNOWN; | ||
3062 | |||
3063 | /* Write the magic number and string identifiers. */ | ||
3064 | buf[0] = cpu_to_le32(POLICYDB_MAGIC); | ||
3065 | len = strlen(POLICYDB_STRING); | ||
3066 | buf[1] = cpu_to_le32(len); | ||
3067 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
3068 | if (rc) | ||
3069 | return rc; | ||
3070 | rc = put_entry(POLICYDB_STRING, 1, len, fp); | ||
3071 | if (rc) | ||
3072 | return rc; | ||
3073 | |||
3074 | /* Write the version, config, and table sizes. */ | ||
3075 | info = policydb_lookup_compat(p->policyvers); | ||
3076 | if (!info) { | ||
3077 | printk(KERN_ERR "SELinux: compatibility lookup failed for policy " | ||
3078 | "version %d", p->policyvers); | ||
3079 | return rc; | ||
3080 | } | ||
3081 | |||
3082 | buf[0] = cpu_to_le32(p->policyvers); | ||
3083 | buf[1] = cpu_to_le32(config); | ||
3084 | buf[2] = cpu_to_le32(info->sym_num); | ||
3085 | buf[3] = cpu_to_le32(info->ocon_num); | ||
3086 | |||
3087 | rc = put_entry(buf, sizeof(u32), 4, fp); | ||
3088 | if (rc) | ||
3089 | return rc; | ||
3090 | |||
3091 | if (p->policyvers >= POLICYDB_VERSION_POLCAP) { | ||
3092 | rc = ebitmap_write(&p->policycaps, fp); | ||
3093 | if (rc) | ||
3094 | return rc; | ||
3095 | } | ||
3096 | |||
3097 | if (p->policyvers >= POLICYDB_VERSION_PERMISSIVE) { | ||
3098 | rc = ebitmap_write(&p->permissive_map, fp); | ||
3099 | if (rc) | ||
3100 | return rc; | ||
3101 | } | ||
3102 | |||
3103 | num_syms = info->sym_num; | ||
3104 | for (i = 0; i < num_syms; i++) { | ||
3105 | struct policy_data pd; | ||
3106 | |||
3107 | pd.fp = fp; | ||
3108 | pd.p = p; | ||
3109 | |||
3110 | buf[0] = cpu_to_le32(p->symtab[i].nprim); | ||
3111 | buf[1] = cpu_to_le32(p->symtab[i].table->nel); | ||
3112 | |||
3113 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
3114 | if (rc) | ||
3115 | return rc; | ||
3116 | rc = hashtab_map(p->symtab[i].table, write_f[i], &pd); | ||
3117 | if (rc) | ||
3118 | return rc; | ||
3119 | } | ||
3120 | |||
3121 | rc = avtab_write(p, &p->te_avtab, fp); | ||
3122 | if (rc) | ||
3123 | return rc; | ||
3124 | |||
3125 | rc = cond_write_list(p, p->cond_list, fp); | ||
3126 | if (rc) | ||
3127 | return rc; | ||
3128 | |||
3129 | rc = role_trans_write(p->role_tr, fp); | ||
3130 | if (rc) | ||
3131 | return rc; | ||
3132 | |||
3133 | rc = role_allow_write(p->role_allow, fp); | ||
3134 | if (rc) | ||
3135 | return rc; | ||
3136 | |||
3137 | rc = ocontext_write(p, info, fp); | ||
3138 | if (rc) | ||
3139 | return rc; | ||
3140 | |||
3141 | rc = genfs_write(p, fp); | ||
3142 | if (rc) | ||
3143 | return rc; | ||
3144 | |||
3145 | rc = range_write(p, fp); | ||
3146 | if (rc) | ||
3147 | return rc; | ||
3148 | |||
3149 | for (i = 0; i < p->p_types.nprim; i++) { | ||
3150 | struct ebitmap *e = flex_array_get(p->type_attr_map_array, i); | ||
3151 | |||
3152 | BUG_ON(!e); | ||
3153 | rc = ebitmap_write(e, fp); | ||
3154 | if (rc) | ||
3155 | return rc; | ||
3156 | } | ||
3157 | |||
3158 | return 0; | ||
3159 | } | ||
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h index 310e94442cb8..95d3d7de361e 100644 --- a/security/selinux/ss/policydb.h +++ b/security/selinux/ss/policydb.h | |||
@@ -254,6 +254,9 @@ struct policydb { | |||
254 | 254 | ||
255 | struct ebitmap permissive_map; | 255 | struct ebitmap permissive_map; |
256 | 256 | ||
257 | /* length of this policy when it was loaded */ | ||
258 | size_t len; | ||
259 | |||
257 | unsigned int policyvers; | 260 | unsigned int policyvers; |
258 | 261 | ||
259 | unsigned int reject_unknown : 1; | 262 | unsigned int reject_unknown : 1; |
@@ -270,6 +273,7 @@ extern int policydb_class_isvalid(struct policydb *p, unsigned int class); | |||
270 | extern int policydb_type_isvalid(struct policydb *p, unsigned int type); | 273 | extern int policydb_type_isvalid(struct policydb *p, unsigned int type); |
271 | extern int policydb_role_isvalid(struct policydb *p, unsigned int role); | 274 | extern int policydb_role_isvalid(struct policydb *p, unsigned int role); |
272 | extern int policydb_read(struct policydb *p, void *fp); | 275 | extern int policydb_read(struct policydb *p, void *fp); |
276 | extern int policydb_write(struct policydb *p, void *fp); | ||
273 | 277 | ||
274 | #define PERM_SYMTAB_SIZE 32 | 278 | #define PERM_SYMTAB_SIZE 32 |
275 | 279 | ||
@@ -290,6 +294,11 @@ struct policy_file { | |||
290 | size_t len; | 294 | size_t len; |
291 | }; | 295 | }; |
292 | 296 | ||
297 | struct policy_data { | ||
298 | struct policydb *p; | ||
299 | void *fp; | ||
300 | }; | ||
301 | |||
293 | static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes) | 302 | static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes) |
294 | { | 303 | { |
295 | if (bytes > fp->len) | 304 | if (bytes > fp->len) |
@@ -301,6 +310,17 @@ static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes) | |||
301 | return 0; | 310 | return 0; |
302 | } | 311 | } |
303 | 312 | ||
313 | static inline int put_entry(void *buf, size_t bytes, int num, struct policy_file *fp) | ||
314 | { | ||
315 | size_t len = bytes * num; | ||
316 | |||
317 | memcpy(fp->data, buf, len); | ||
318 | fp->data += len; | ||
319 | fp->len -= len; | ||
320 | |||
321 | return 0; | ||
322 | } | ||
323 | |||
304 | extern u16 string_to_security_class(struct policydb *p, const char *name); | 324 | extern u16 string_to_security_class(struct policydb *p, const char *name); |
305 | extern u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name); | 325 | extern u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name); |
306 | 326 | ||
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 9ea2feca3cd4..223c1ff6ef23 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/mutex.h> | 51 | #include <linux/mutex.h> |
52 | #include <linux/selinux.h> | 52 | #include <linux/selinux.h> |
53 | #include <linux/flex_array.h> | 53 | #include <linux/flex_array.h> |
54 | #include <linux/vmalloc.h> | ||
54 | #include <net/netlabel.h> | 55 | #include <net/netlabel.h> |
55 | 56 | ||
56 | #include "flask.h" | 57 | #include "flask.h" |
@@ -991,7 +992,8 @@ static int context_struct_to_string(struct context *context, char **scontext, u3 | |||
991 | { | 992 | { |
992 | char *scontextp; | 993 | char *scontextp; |
993 | 994 | ||
994 | *scontext = NULL; | 995 | if (scontext) |
996 | *scontext = NULL; | ||
995 | *scontext_len = 0; | 997 | *scontext_len = 0; |
996 | 998 | ||
997 | if (context->len) { | 999 | if (context->len) { |
@@ -1008,6 +1010,9 @@ static int context_struct_to_string(struct context *context, char **scontext, u3 | |||
1008 | *scontext_len += strlen(policydb.p_type_val_to_name[context->type - 1]) + 1; | 1010 | *scontext_len += strlen(policydb.p_type_val_to_name[context->type - 1]) + 1; |
1009 | *scontext_len += mls_compute_context_len(context); | 1011 | *scontext_len += mls_compute_context_len(context); |
1010 | 1012 | ||
1013 | if (!scontext) | ||
1014 | return 0; | ||
1015 | |||
1011 | /* Allocate space for the context; caller must free this space. */ | 1016 | /* Allocate space for the context; caller must free this space. */ |
1012 | scontextp = kmalloc(*scontext_len, GFP_ATOMIC); | 1017 | scontextp = kmalloc(*scontext_len, GFP_ATOMIC); |
1013 | if (!scontextp) | 1018 | if (!scontextp) |
@@ -1047,7 +1052,8 @@ static int security_sid_to_context_core(u32 sid, char **scontext, | |||
1047 | struct context *context; | 1052 | struct context *context; |
1048 | int rc = 0; | 1053 | int rc = 0; |
1049 | 1054 | ||
1050 | *scontext = NULL; | 1055 | if (scontext) |
1056 | *scontext = NULL; | ||
1051 | *scontext_len = 0; | 1057 | *scontext_len = 0; |
1052 | 1058 | ||
1053 | if (!ss_initialized) { | 1059 | if (!ss_initialized) { |
@@ -1055,6 +1061,8 @@ static int security_sid_to_context_core(u32 sid, char **scontext, | |||
1055 | char *scontextp; | 1061 | char *scontextp; |
1056 | 1062 | ||
1057 | *scontext_len = strlen(initial_sid_to_string[sid]) + 1; | 1063 | *scontext_len = strlen(initial_sid_to_string[sid]) + 1; |
1064 | if (!scontext) | ||
1065 | goto out; | ||
1058 | scontextp = kmalloc(*scontext_len, GFP_ATOMIC); | 1066 | scontextp = kmalloc(*scontext_len, GFP_ATOMIC); |
1059 | if (!scontextp) { | 1067 | if (!scontextp) { |
1060 | rc = -ENOMEM; | 1068 | rc = -ENOMEM; |
@@ -1769,6 +1777,7 @@ int security_load_policy(void *data, size_t len) | |||
1769 | return rc; | 1777 | return rc; |
1770 | } | 1778 | } |
1771 | 1779 | ||
1780 | policydb.len = len; | ||
1772 | rc = selinux_set_mapping(&policydb, secclass_map, | 1781 | rc = selinux_set_mapping(&policydb, secclass_map, |
1773 | ¤t_mapping, | 1782 | ¤t_mapping, |
1774 | ¤t_mapping_size); | 1783 | ¤t_mapping_size); |
@@ -1791,6 +1800,7 @@ int security_load_policy(void *data, size_t len) | |||
1791 | selinux_complete_init(); | 1800 | selinux_complete_init(); |
1792 | avc_ss_reset(seqno); | 1801 | avc_ss_reset(seqno); |
1793 | selnl_notify_policyload(seqno); | 1802 | selnl_notify_policyload(seqno); |
1803 | selinux_status_update_policyload(seqno); | ||
1794 | selinux_netlbl_cache_invalidate(); | 1804 | selinux_netlbl_cache_invalidate(); |
1795 | selinux_xfrm_notify_policyload(); | 1805 | selinux_xfrm_notify_policyload(); |
1796 | return 0; | 1806 | return 0; |
@@ -1804,6 +1814,7 @@ int security_load_policy(void *data, size_t len) | |||
1804 | if (rc) | 1814 | if (rc) |
1805 | return rc; | 1815 | return rc; |
1806 | 1816 | ||
1817 | newpolicydb.len = len; | ||
1807 | /* If switching between different policy types, log MLS status */ | 1818 | /* If switching between different policy types, log MLS status */ |
1808 | if (policydb.mls_enabled && !newpolicydb.mls_enabled) | 1819 | if (policydb.mls_enabled && !newpolicydb.mls_enabled) |
1809 | printk(KERN_INFO "SELinux: Disabling MLS support...\n"); | 1820 | printk(KERN_INFO "SELinux: Disabling MLS support...\n"); |
@@ -1870,6 +1881,7 @@ int security_load_policy(void *data, size_t len) | |||
1870 | 1881 | ||
1871 | avc_ss_reset(seqno); | 1882 | avc_ss_reset(seqno); |
1872 | selnl_notify_policyload(seqno); | 1883 | selnl_notify_policyload(seqno); |
1884 | selinux_status_update_policyload(seqno); | ||
1873 | selinux_netlbl_cache_invalidate(); | 1885 | selinux_netlbl_cache_invalidate(); |
1874 | selinux_xfrm_notify_policyload(); | 1886 | selinux_xfrm_notify_policyload(); |
1875 | 1887 | ||
@@ -1883,6 +1895,17 @@ err: | |||
1883 | 1895 | ||
1884 | } | 1896 | } |
1885 | 1897 | ||
1898 | size_t security_policydb_len(void) | ||
1899 | { | ||
1900 | size_t len; | ||
1901 | |||
1902 | read_lock(&policy_rwlock); | ||
1903 | len = policydb.len; | ||
1904 | read_unlock(&policy_rwlock); | ||
1905 | |||
1906 | return len; | ||
1907 | } | ||
1908 | |||
1886 | /** | 1909 | /** |
1887 | * security_port_sid - Obtain the SID for a port. | 1910 | * security_port_sid - Obtain the SID for a port. |
1888 | * @protocol: protocol number | 1911 | * @protocol: protocol number |
@@ -2374,6 +2397,7 @@ out: | |||
2374 | if (!rc) { | 2397 | if (!rc) { |
2375 | avc_ss_reset(seqno); | 2398 | avc_ss_reset(seqno); |
2376 | selnl_notify_policyload(seqno); | 2399 | selnl_notify_policyload(seqno); |
2400 | selinux_status_update_policyload(seqno); | ||
2377 | selinux_xfrm_notify_policyload(); | 2401 | selinux_xfrm_notify_policyload(); |
2378 | } | 2402 | } |
2379 | return rc; | 2403 | return rc; |
@@ -3129,3 +3153,38 @@ netlbl_sid_to_secattr_failure: | |||
3129 | return rc; | 3153 | return rc; |
3130 | } | 3154 | } |
3131 | #endif /* CONFIG_NETLABEL */ | 3155 | #endif /* CONFIG_NETLABEL */ |
3156 | |||
3157 | /** | ||
3158 | * security_read_policy - read the policy. | ||
3159 | * @data: binary policy data | ||
3160 | * @len: length of data in bytes | ||
3161 | * | ||
3162 | */ | ||
3163 | int security_read_policy(void **data, ssize_t *len) | ||
3164 | { | ||
3165 | int rc; | ||
3166 | struct policy_file fp; | ||
3167 | |||
3168 | if (!ss_initialized) | ||
3169 | return -EINVAL; | ||
3170 | |||
3171 | *len = security_policydb_len(); | ||
3172 | |||
3173 | *data = vmalloc_user(*len); | ||
3174 | if (!*data) | ||
3175 | return -ENOMEM; | ||
3176 | |||
3177 | fp.data = *data; | ||
3178 | fp.len = *len; | ||
3179 | |||
3180 | read_lock(&policy_rwlock); | ||
3181 | rc = policydb_write(&policydb, &fp); | ||
3182 | read_unlock(&policy_rwlock); | ||
3183 | |||
3184 | if (rc) | ||
3185 | return rc; | ||
3186 | |||
3187 | *len = (unsigned long)fp.data - (unsigned long)*data; | ||
3188 | return 0; | ||
3189 | |||
3190 | } | ||
diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c new file mode 100644 index 000000000000..d982365f9d1a --- /dev/null +++ b/security/selinux/ss/status.c | |||
@@ -0,0 +1,126 @@ | |||
1 | /* | ||
2 | * mmap based event notifications for SELinux | ||
3 | * | ||
4 | * Author: KaiGai Kohei <kaigai@ak.jp.nec.com> | ||
5 | * | ||
6 | * Copyright (C) 2010 NEC corporation | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2, | ||
10 | * as published by the Free Software Foundation. | ||
11 | */ | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/gfp.h> | ||
14 | #include <linux/mm.h> | ||
15 | #include <linux/mutex.h> | ||
16 | #include "avc.h" | ||
17 | #include "services.h" | ||
18 | |||
19 | /* | ||
20 | * The selinux_status_page shall be exposed to userspace applications | ||
21 | * using mmap interface on /selinux/status. | ||
22 | * It enables to notify applications a few events that will cause reset | ||
23 | * of userspace access vector without context switching. | ||
24 | * | ||
25 | * The selinux_kernel_status structure on the head of status page is | ||
26 | * protected from concurrent accesses using seqlock logic, so userspace | ||
27 | * application should reference the status page according to the seqlock | ||
28 | * logic. | ||
29 | * | ||
30 | * Typically, application checks status->sequence at the head of access | ||
31 | * control routine. If it is odd-number, kernel is updating the status, | ||
32 | * so please wait for a moment. If it is changed from the last sequence | ||
33 | * number, it means something happen, so application will reset userspace | ||
34 | * avc, if needed. | ||
35 | * In most cases, application shall confirm the kernel status is not | ||
36 | * changed without any system call invocations. | ||
37 | */ | ||
38 | static struct page *selinux_status_page; | ||
39 | static DEFINE_MUTEX(selinux_status_lock); | ||
40 | |||
41 | /* | ||
42 | * selinux_kernel_status_page | ||
43 | * | ||
44 | * It returns a reference to selinux_status_page. If the status page is | ||
45 | * not allocated yet, it also tries to allocate it at the first time. | ||
46 | */ | ||
47 | struct page *selinux_kernel_status_page(void) | ||
48 | { | ||
49 | struct selinux_kernel_status *status; | ||
50 | struct page *result = NULL; | ||
51 | |||
52 | mutex_lock(&selinux_status_lock); | ||
53 | if (!selinux_status_page) { | ||
54 | selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO); | ||
55 | |||
56 | if (selinux_status_page) { | ||
57 | status = page_address(selinux_status_page); | ||
58 | |||
59 | status->version = SELINUX_KERNEL_STATUS_VERSION; | ||
60 | status->sequence = 0; | ||
61 | status->enforcing = selinux_enforcing; | ||
62 | /* | ||
63 | * NOTE: the next policyload event shall set | ||
64 | * a positive value on the status->policyload, | ||
65 | * although it may not be 1, but never zero. | ||
66 | * So, application can know it was updated. | ||
67 | */ | ||
68 | status->policyload = 0; | ||
69 | status->deny_unknown = !security_get_allow_unknown(); | ||
70 | } | ||
71 | } | ||
72 | result = selinux_status_page; | ||
73 | mutex_unlock(&selinux_status_lock); | ||
74 | |||
75 | return result; | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * selinux_status_update_setenforce | ||
80 | * | ||
81 | * It updates status of the current enforcing/permissive mode. | ||
82 | */ | ||
83 | void selinux_status_update_setenforce(int enforcing) | ||
84 | { | ||
85 | struct selinux_kernel_status *status; | ||
86 | |||
87 | mutex_lock(&selinux_status_lock); | ||
88 | if (selinux_status_page) { | ||
89 | status = page_address(selinux_status_page); | ||
90 | |||
91 | status->sequence++; | ||
92 | smp_wmb(); | ||
93 | |||
94 | status->enforcing = enforcing; | ||
95 | |||
96 | smp_wmb(); | ||
97 | status->sequence++; | ||
98 | } | ||
99 | mutex_unlock(&selinux_status_lock); | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * selinux_status_update_policyload | ||
104 | * | ||
105 | * It updates status of the times of policy reloaded, and current | ||
106 | * setting of deny_unknown. | ||
107 | */ | ||
108 | void selinux_status_update_policyload(int seqno) | ||
109 | { | ||
110 | struct selinux_kernel_status *status; | ||
111 | |||
112 | mutex_lock(&selinux_status_lock); | ||
113 | if (selinux_status_page) { | ||
114 | status = page_address(selinux_status_page); | ||
115 | |||
116 | status->sequence++; | ||
117 | smp_wmb(); | ||
118 | |||
119 | status->policyload = seqno; | ||
120 | status->deny_unknown = !security_get_allow_unknown(); | ||
121 | |||
122 | smp_wmb(); | ||
123 | status->sequence++; | ||
124 | } | ||
125 | mutex_unlock(&selinux_status_lock); | ||
126 | } | ||
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index c448d57ae2b7..bc39f4067af6 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c | |||
@@ -1281,12 +1281,11 @@ static int smack_task_getioprio(struct task_struct *p) | |||
1281 | * | 1281 | * |
1282 | * Return 0 if read access is permitted | 1282 | * Return 0 if read access is permitted |
1283 | */ | 1283 | */ |
1284 | static int smack_task_setscheduler(struct task_struct *p, int policy, | 1284 | static int smack_task_setscheduler(struct task_struct *p) |
1285 | struct sched_param *lp) | ||
1286 | { | 1285 | { |
1287 | int rc; | 1286 | int rc; |
1288 | 1287 | ||
1289 | rc = cap_task_setscheduler(p, policy, lp); | 1288 | rc = cap_task_setscheduler(p); |
1290 | if (rc == 0) | 1289 | if (rc == 0) |
1291 | rc = smk_curacc_on_task(p, MAY_WRITE); | 1290 | rc = smk_curacc_on_task(p, MAY_WRITE); |
1292 | return rc; | 1291 | return rc; |
@@ -3005,7 +3004,8 @@ static int smack_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) | |||
3005 | { | 3004 | { |
3006 | char *sp = smack_from_secid(secid); | 3005 | char *sp = smack_from_secid(secid); |
3007 | 3006 | ||
3008 | *secdata = sp; | 3007 | if (secdata) |
3008 | *secdata = sp; | ||
3009 | *seclen = strlen(sp); | 3009 | *seclen = strlen(sp); |
3010 | return 0; | 3010 | return 0; |
3011 | } | 3011 | } |
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index c668b447c725..7556315c1978 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c | |||
@@ -768,8 +768,10 @@ static bool tomoyo_select_one(struct tomoyo_io_buffer *head, const char *data) | |||
768 | return true; /* Do nothing if open(O_WRONLY). */ | 768 | return true; /* Do nothing if open(O_WRONLY). */ |
769 | memset(&head->r, 0, sizeof(head->r)); | 769 | memset(&head->r, 0, sizeof(head->r)); |
770 | head->r.print_this_domain_only = true; | 770 | head->r.print_this_domain_only = true; |
771 | head->r.eof = !domain; | 771 | if (domain) |
772 | head->r.domain = &domain->list; | 772 | head->r.domain = &domain->list; |
773 | else | ||
774 | head->r.eof = 1; | ||
773 | tomoyo_io_printf(head, "# select %s\n", data); | 775 | tomoyo_io_printf(head, "# select %s\n", data); |
774 | if (domain && domain->is_deleted) | 776 | if (domain && domain->is_deleted) |
775 | tomoyo_io_printf(head, "# This is a deleted domain.\n"); | 777 | tomoyo_io_printf(head, "# This is a deleted domain.\n"); |
@@ -2051,13 +2053,22 @@ void tomoyo_check_profile(void) | |||
2051 | const u8 profile = domain->profile; | 2053 | const u8 profile = domain->profile; |
2052 | if (tomoyo_profile_ptr[profile]) | 2054 | if (tomoyo_profile_ptr[profile]) |
2053 | continue; | 2055 | continue; |
2056 | printk(KERN_ERR "You need to define profile %u before using it.\n", | ||
2057 | profile); | ||
2058 | printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ " | ||
2059 | "for more information.\n"); | ||
2054 | panic("Profile %u (used by '%s') not defined.\n", | 2060 | panic("Profile %u (used by '%s') not defined.\n", |
2055 | profile, domain->domainname->name); | 2061 | profile, domain->domainname->name); |
2056 | } | 2062 | } |
2057 | tomoyo_read_unlock(idx); | 2063 | tomoyo_read_unlock(idx); |
2058 | if (tomoyo_profile_version != 20090903) | 2064 | if (tomoyo_profile_version != 20090903) { |
2065 | printk(KERN_ERR "You need to install userland programs for " | ||
2066 | "TOMOYO 2.3 and initialize policy configuration.\n"); | ||
2067 | printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ " | ||
2068 | "for more information.\n"); | ||
2059 | panic("Profile version %u is not supported.\n", | 2069 | panic("Profile version %u is not supported.\n", |
2060 | tomoyo_profile_version); | 2070 | tomoyo_profile_version); |
2071 | } | ||
2061 | printk(KERN_INFO "TOMOYO: 2.3.0\n"); | 2072 | printk(KERN_INFO "TOMOYO: 2.3.0\n"); |
2062 | printk(KERN_INFO "Mandatory Access Control activated.\n"); | 2073 | printk(KERN_INFO "Mandatory Access Control activated.\n"); |
2063 | } | 2074 | } |
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index a7868ad4d530..cbbed0db9e56 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c | |||
@@ -535,13 +535,15 @@ static int snd_rawmidi_release(struct inode *inode, struct file *file) | |||
535 | { | 535 | { |
536 | struct snd_rawmidi_file *rfile; | 536 | struct snd_rawmidi_file *rfile; |
537 | struct snd_rawmidi *rmidi; | 537 | struct snd_rawmidi *rmidi; |
538 | struct module *module; | ||
538 | 539 | ||
539 | rfile = file->private_data; | 540 | rfile = file->private_data; |
540 | rmidi = rfile->rmidi; | 541 | rmidi = rfile->rmidi; |
541 | rawmidi_release_priv(rfile); | 542 | rawmidi_release_priv(rfile); |
542 | kfree(rfile); | 543 | kfree(rfile); |
544 | module = rmidi->card->module; | ||
543 | snd_card_file_remove(rmidi->card, file); | 545 | snd_card_file_remove(rmidi->card, file); |
544 | module_put(rmidi->card->module); | 546 | module_put(module); |
545 | return 0; | 547 | return 0; |
546 | } | 548 | } |
547 | 549 | ||
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 5164a655c39f..b2c63309a651 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt | |||
@@ -8,7 +8,7 @@ perf-annotate - Read perf.data (created by perf record) and display annotated co | |||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf annotate' [-i <file> | --input=file] symbol_name | 11 | 'perf annotate' [-i <file> | --input=file] [symbol_name] |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
@@ -24,6 +24,13 @@ OPTIONS | |||
24 | --input=:: | 24 | --input=:: |
25 | Input file name. (default: perf.data) | 25 | Input file name. (default: perf.data) |
26 | 26 | ||
27 | --stdio:: Use the stdio interface. | ||
28 | |||
29 | --tui:: Use the TUI interface Use of --tui requires a tty, if one is not | ||
30 | present, as when piping to other commands, the stdio interface is | ||
31 | used. This interfaces starts by centering on the line with more | ||
32 | samples, TAB/UNTAB cycles thru the lines with more samples. | ||
33 | |||
27 | SEE ALSO | 34 | SEE ALSO |
28 | -------- | 35 | -------- |
29 | linkperf:perf-record[1] | 36 | linkperf:perf-record[1], linkperf:perf-report[1] |
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index abfabe9147a4..12052c9ed0ba 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -65,6 +65,13 @@ OPTIONS | |||
65 | the tree is considered as a new profiled object. + | 65 | the tree is considered as a new profiled object. + |
66 | Default: fractal,0.5. | 66 | Default: fractal,0.5. |
67 | 67 | ||
68 | --stdio:: Use the stdio interface. | ||
69 | |||
70 | --tui:: Use the TUI interface, that is integrated with annotate and allows | ||
71 | zooming into DSOs or threads, among other features. Use of --tui | ||
72 | requires a tty, if one is not present, as when piping to other | ||
73 | commands, the stdio interface is used. | ||
74 | |||
68 | SEE ALSO | 75 | SEE ALSO |
69 | -------- | 76 | -------- |
70 | linkperf:perf-stat[1] | 77 | linkperf:perf-stat[1] |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 1950e19af1cf..d1db0f676a4b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -313,6 +313,9 @@ TEST_PROGRAMS = | |||
313 | 313 | ||
314 | SCRIPT_SH += perf-archive.sh | 314 | SCRIPT_SH += perf-archive.sh |
315 | 315 | ||
316 | grep-libs = $(filter -l%,$(1)) | ||
317 | strip-libs = $(filter-out -l%,$(1)) | ||
318 | |||
316 | # | 319 | # |
317 | # No Perl scripts right now: | 320 | # No Perl scripts right now: |
318 | # | 321 | # |
@@ -588,14 +591,17 @@ endif | |||
588 | ifdef NO_LIBPERL | 591 | ifdef NO_LIBPERL |
589 | BASIC_CFLAGS += -DNO_LIBPERL | 592 | BASIC_CFLAGS += -DNO_LIBPERL |
590 | else | 593 | else |
591 | PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null` | 594 | PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) |
595 | PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) | ||
596 | PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) | ||
592 | PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` | 597 | PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` |
593 | FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) | 598 | FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) |
594 | 599 | ||
595 | ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y) | 600 | ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y) |
596 | BASIC_CFLAGS += -DNO_LIBPERL | 601 | BASIC_CFLAGS += -DNO_LIBPERL |
597 | else | 602 | else |
598 | ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) | 603 | ALL_LDFLAGS += $(PERL_EMBED_LDFLAGS) |
604 | EXTLIBS += $(PERL_EMBED_LIBADD) | ||
599 | LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o | 605 | LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o |
600 | LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o | 606 | LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o |
601 | endif | 607 | endif |
@@ -604,13 +610,16 @@ endif | |||
604 | ifdef NO_LIBPYTHON | 610 | ifdef NO_LIBPYTHON |
605 | BASIC_CFLAGS += -DNO_LIBPYTHON | 611 | BASIC_CFLAGS += -DNO_LIBPYTHON |
606 | else | 612 | else |
607 | PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null` | 613 | PYTHON_EMBED_LDOPTS = $(shell python-config --ldflags 2>/dev/null) |
614 | PYTHON_EMBED_LDFLAGS = $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) | ||
615 | PYTHON_EMBED_LIBADD = $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) | ||
608 | PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` | 616 | PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` |
609 | FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) | 617 | FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) |
610 | ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y) | 618 | ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y) |
611 | BASIC_CFLAGS += -DNO_LIBPYTHON | 619 | BASIC_CFLAGS += -DNO_LIBPYTHON |
612 | else | 620 | else |
613 | ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS) | 621 | ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS) |
622 | EXTLIBS += $(PYTHON_EMBED_LIBADD) | ||
614 | LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o | 623 | LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o |
615 | LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o | 624 | LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o |
616 | endif | 625 | endif |
@@ -653,6 +662,15 @@ else | |||
653 | endif | 662 | endif |
654 | endif | 663 | endif |
655 | 664 | ||
665 | |||
666 | ifdef NO_STRLCPY | ||
667 | BASIC_CFLAGS += -DNO_STRLCPY | ||
668 | else | ||
669 | ifneq ($(call try-cc,$(SOURCE_STRLCPY),),y) | ||
670 | BASIC_CFLAGS += -DNO_STRLCPY | ||
671 | endif | ||
672 | endif | ||
673 | |||
656 | ifndef CC_LD_DYNPATH | 674 | ifndef CC_LD_DYNPATH |
657 | ifdef NO_R_TO_GCC_LINKER | 675 | ifdef NO_R_TO_GCC_LINKER |
658 | # Some gcc does not accept and pass -R to the linker to specify | 676 | # Some gcc does not accept and pass -R to the linker to specify |
@@ -910,8 +928,8 @@ $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS | |||
910 | $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ | 928 | $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ |
911 | 929 | ||
912 | $(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) | 930 | $(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) |
913 | $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(OUTPUT)perf.o \ | 931 | $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \ |
914 | $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) | 932 | $(BUILTIN_OBJS) $(LIBS) -o $@ |
915 | 933 | ||
916 | $(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS | 934 | $(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS |
917 | $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ | 935 | $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ |
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 1478dc64bf15..6d5604d8df95 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c | |||
@@ -28,7 +28,7 @@ | |||
28 | 28 | ||
29 | static char const *input_name = "perf.data"; | 29 | static char const *input_name = "perf.data"; |
30 | 30 | ||
31 | static bool force; | 31 | static bool force, use_tui, use_stdio; |
32 | 32 | ||
33 | static bool full_paths; | 33 | static bool full_paths; |
34 | 34 | ||
@@ -321,7 +321,7 @@ static int hist_entry__tty_annotate(struct hist_entry *he) | |||
321 | 321 | ||
322 | static void hists__find_annotations(struct hists *self) | 322 | static void hists__find_annotations(struct hists *self) |
323 | { | 323 | { |
324 | struct rb_node *first = rb_first(&self->entries), *nd = first; | 324 | struct rb_node *nd = rb_first(&self->entries), *next; |
325 | int key = KEY_RIGHT; | 325 | int key = KEY_RIGHT; |
326 | 326 | ||
327 | while (nd) { | 327 | while (nd) { |
@@ -343,20 +343,19 @@ find_next: | |||
343 | 343 | ||
344 | if (use_browser > 0) { | 344 | if (use_browser > 0) { |
345 | key = hist_entry__tui_annotate(he); | 345 | key = hist_entry__tui_annotate(he); |
346 | if (is_exit_key(key)) | ||
347 | break; | ||
348 | switch (key) { | 346 | switch (key) { |
349 | case KEY_RIGHT: | 347 | case KEY_RIGHT: |
350 | case '\t': | 348 | next = rb_next(nd); |
351 | nd = rb_next(nd); | ||
352 | break; | 349 | break; |
353 | case KEY_LEFT: | 350 | case KEY_LEFT: |
354 | if (nd == first) | 351 | next = rb_prev(nd); |
355 | continue; | ||
356 | nd = rb_prev(nd); | ||
357 | default: | ||
358 | break; | 352 | break; |
353 | default: | ||
354 | return; | ||
359 | } | 355 | } |
356 | |||
357 | if (next != NULL) | ||
358 | nd = next; | ||
360 | } else { | 359 | } else { |
361 | hist_entry__tty_annotate(he); | 360 | hist_entry__tty_annotate(he); |
362 | nd = rb_next(nd); | 361 | nd = rb_next(nd); |
@@ -428,6 +427,8 @@ static const struct option options[] = { | |||
428 | "be more verbose (show symbol address, etc)"), | 427 | "be more verbose (show symbol address, etc)"), |
429 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | 428 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, |
430 | "dump raw trace in ASCII"), | 429 | "dump raw trace in ASCII"), |
430 | OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), | ||
431 | OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), | ||
431 | OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, | 432 | OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, |
432 | "file", "vmlinux pathname"), | 433 | "file", "vmlinux pathname"), |
433 | OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, | 434 | OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, |
@@ -443,6 +444,11 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) | |||
443 | { | 444 | { |
444 | argc = parse_options(argc, argv, options, annotate_usage, 0); | 445 | argc = parse_options(argc, argv, options, annotate_usage, 0); |
445 | 446 | ||
447 | if (use_stdio) | ||
448 | use_browser = 0; | ||
449 | else if (use_tui) | ||
450 | use_browser = 1; | ||
451 | |||
446 | setup_browser(); | 452 | setup_browser(); |
447 | 453 | ||
448 | symbol_conf.priv_size = sizeof(struct sym_priv); | 454 | symbol_conf.priv_size = sizeof(struct sym_priv); |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 55fc1f46892a..5de405d45230 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -32,7 +32,7 @@ | |||
32 | 32 | ||
33 | static char const *input_name = "perf.data"; | 33 | static char const *input_name = "perf.data"; |
34 | 34 | ||
35 | static bool force; | 35 | static bool force, use_tui, use_stdio; |
36 | static bool hide_unresolved; | 36 | static bool hide_unresolved; |
37 | static bool dont_use_callchains; | 37 | static bool dont_use_callchains; |
38 | 38 | ||
@@ -107,7 +107,8 @@ static int perf_session__add_hist_entry(struct perf_session *self, | |||
107 | goto out_free_syms; | 107 | goto out_free_syms; |
108 | err = 0; | 108 | err = 0; |
109 | if (symbol_conf.use_callchain) { | 109 | if (symbol_conf.use_callchain) { |
110 | err = append_chain(he->callchain, data->callchain, syms, data->period); | 110 | err = callchain_append(he->callchain, data->callchain, syms, |
111 | data->period); | ||
111 | if (err) | 112 | if (err) |
112 | goto out_free_syms; | 113 | goto out_free_syms; |
113 | } | 114 | } |
@@ -450,6 +451,8 @@ static const struct option options[] = { | |||
450 | "Show per-thread event counters"), | 451 | "Show per-thread event counters"), |
451 | OPT_STRING(0, "pretty", &pretty_printing_style, "key", | 452 | OPT_STRING(0, "pretty", &pretty_printing_style, "key", |
452 | "pretty printing style key: normal raw"), | 453 | "pretty printing style key: normal raw"), |
454 | OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), | ||
455 | OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), | ||
453 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | 456 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", |
454 | "sort by key(s): pid, comm, dso, symbol, parent"), | 457 | "sort by key(s): pid, comm, dso, symbol, parent"), |
455 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, | 458 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, |
@@ -482,8 +485,15 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
482 | { | 485 | { |
483 | argc = parse_options(argc, argv, options, report_usage, 0); | 486 | argc = parse_options(argc, argv, options, report_usage, 0); |
484 | 487 | ||
488 | if (use_stdio) | ||
489 | use_browser = 0; | ||
490 | else if (use_tui) | ||
491 | use_browser = 1; | ||
492 | |||
485 | if (strcmp(input_name, "-") != 0) | 493 | if (strcmp(input_name, "-") != 0) |
486 | setup_browser(); | 494 | setup_browser(); |
495 | else | ||
496 | use_browser = 0; | ||
487 | /* | 497 | /* |
488 | * Only in the newt browser we are doing integrated annotation, | 498 | * Only in the newt browser we are doing integrated annotation, |
489 | * so don't allocate extra space that won't be used in the stdio | 499 | * so don't allocate extra space that won't be used in the stdio |
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak index 7a7b60859053..b253db634f04 100644 --- a/tools/perf/feature-tests.mak +++ b/tools/perf/feature-tests.mak | |||
@@ -110,6 +110,17 @@ int main(void) | |||
110 | } | 110 | } |
111 | endef | 111 | endef |
112 | 112 | ||
113 | define SOURCE_STRLCPY | ||
114 | #include <stdlib.h> | ||
115 | extern size_t strlcpy(char *dest, const char *src, size_t size); | ||
116 | |||
117 | int main(void) | ||
118 | { | ||
119 | strlcpy(NULL, NULL, 0); | ||
120 | return 0; | ||
121 | } | ||
122 | endef | ||
123 | |||
113 | # try-cc | 124 | # try-cc |
114 | # Usage: option = $(call try-cc, source-to-build, cc-options) | 125 | # Usage: option = $(call try-cc, source-to-build, cc-options) |
115 | try-cc = $(shell sh -c \ | 126 | try-cc = $(shell sh -c \ |
diff --git a/tools/perf/scripts/python/bin/netdev-times-record b/tools/perf/scripts/python/bin/netdev-times-record new file mode 100644 index 000000000000..d931a828126b --- /dev/null +++ b/tools/perf/scripts/python/bin/netdev-times-record | |||
@@ -0,0 +1,8 @@ | |||
1 | #!/bin/bash | ||
2 | perf record -a -e net:net_dev_xmit -e net:net_dev_queue \ | ||
3 | -e net:netif_receive_skb -e net:netif_rx \ | ||
4 | -e skb:consume_skb -e skb:kfree_skb \ | ||
5 | -e skb:skb_copy_datagram_iovec -e napi:napi_poll \ | ||
6 | -e irq:irq_handler_entry -e irq:irq_handler_exit \ | ||
7 | -e irq:softirq_entry -e irq:softirq_exit \ | ||
8 | -e irq:softirq_raise $@ | ||
diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report new file mode 100644 index 000000000000..c3d0a638123d --- /dev/null +++ b/tools/perf/scripts/python/bin/netdev-times-report | |||
@@ -0,0 +1,5 @@ | |||
1 | #!/bin/bash | ||
2 | # description: display a process of packet and processing time | ||
3 | # args: [tx] [rx] [dev=] [debug] | ||
4 | |||
5 | perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@ | ||
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py new file mode 100644 index 000000000000..9aa0a32972e8 --- /dev/null +++ b/tools/perf/scripts/python/netdev-times.py | |||
@@ -0,0 +1,464 @@ | |||
1 | # Display a process of packets and processed time. | ||
2 | # It helps us to investigate networking or network device. | ||
3 | # | ||
4 | # options | ||
5 | # tx: show only tx chart | ||
6 | # rx: show only rx chart | ||
7 | # dev=: show only thing related to specified device | ||
8 | # debug: work with debug mode. It shows buffer status. | ||
9 | |||
10 | import os | ||
11 | import sys | ||
12 | |||
13 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ | ||
14 | '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') | ||
15 | |||
16 | from perf_trace_context import * | ||
17 | from Core import * | ||
18 | from Util import * | ||
19 | |||
20 | all_event_list = []; # insert all tracepoint event related with this script | ||
21 | irq_dic = {}; # key is cpu and value is a list which stacks irqs | ||
22 | # which raise NET_RX softirq | ||
23 | net_rx_dic = {}; # key is cpu and value include time of NET_RX softirq-entry | ||
24 | # and a list which stacks receive | ||
25 | receive_hunk_list = []; # a list which include a sequence of receive events | ||
26 | rx_skb_list = []; # received packet list for matching | ||
27 | # skb_copy_datagram_iovec | ||
28 | |||
29 | buffer_budget = 65536; # the budget of rx_skb_list, tx_queue_list and | ||
30 | # tx_xmit_list | ||
31 | of_count_rx_skb_list = 0; # overflow count | ||
32 | |||
33 | tx_queue_list = []; # list of packets which pass through dev_queue_xmit | ||
34 | of_count_tx_queue_list = 0; # overflow count | ||
35 | |||
36 | tx_xmit_list = []; # list of packets which pass through dev_hard_start_xmit | ||
37 | of_count_tx_xmit_list = 0; # overflow count | ||
38 | |||
39 | tx_free_list = []; # list of packets which is freed | ||
40 | |||
41 | # options | ||
42 | show_tx = 0; | ||
43 | show_rx = 0; | ||
44 | dev = 0; # store a name of device specified by option "dev=" | ||
45 | debug = 0; | ||
46 | |||
47 | # indices of event_info tuple | ||
48 | EINFO_IDX_NAME= 0 | ||
49 | EINFO_IDX_CONTEXT=1 | ||
50 | EINFO_IDX_CPU= 2 | ||
51 | EINFO_IDX_TIME= 3 | ||
52 | EINFO_IDX_PID= 4 | ||
53 | EINFO_IDX_COMM= 5 | ||
54 | |||
55 | # Calculate a time interval(msec) from src(nsec) to dst(nsec) | ||
56 | def diff_msec(src, dst): | ||
57 | return (dst - src) / 1000000.0 | ||
58 | |||
59 | # Display a process of transmitting a packet | ||
60 | def print_transmit(hunk): | ||
61 | if dev != 0 and hunk['dev'].find(dev) < 0: | ||
62 | return | ||
63 | print "%7s %5d %6d.%06dsec %12.3fmsec %12.3fmsec" % \ | ||
64 | (hunk['dev'], hunk['len'], | ||
65 | nsecs_secs(hunk['queue_t']), | ||
66 | nsecs_nsecs(hunk['queue_t'])/1000, | ||
67 | diff_msec(hunk['queue_t'], hunk['xmit_t']), | ||
68 | diff_msec(hunk['xmit_t'], hunk['free_t'])) | ||
69 | |||
70 | # Format for displaying rx packet processing | ||
71 | PF_IRQ_ENTRY= " irq_entry(+%.3fmsec irq=%d:%s)" | ||
72 | PF_SOFT_ENTRY=" softirq_entry(+%.3fmsec)" | ||
73 | PF_NAPI_POLL= " napi_poll_exit(+%.3fmsec %s)" | ||
74 | PF_JOINT= " |" | ||
75 | PF_WJOINT= " | |" | ||
76 | PF_NET_RECV= " |---netif_receive_skb(+%.3fmsec skb=%x len=%d)" | ||
77 | PF_NET_RX= " |---netif_rx(+%.3fmsec skb=%x)" | ||
78 | PF_CPY_DGRAM= " | skb_copy_datagram_iovec(+%.3fmsec %d:%s)" | ||
79 | PF_KFREE_SKB= " | kfree_skb(+%.3fmsec location=%x)" | ||
80 | PF_CONS_SKB= " | consume_skb(+%.3fmsec)" | ||
81 | |||
82 | # Display a process of received packets and interrputs associated with | ||
83 | # a NET_RX softirq | ||
84 | def print_receive(hunk): | ||
85 | show_hunk = 0 | ||
86 | irq_list = hunk['irq_list'] | ||
87 | cpu = irq_list[0]['cpu'] | ||
88 | base_t = irq_list[0]['irq_ent_t'] | ||
89 | # check if this hunk should be showed | ||
90 | if dev != 0: | ||
91 | for i in range(len(irq_list)): | ||
92 | if irq_list[i]['name'].find(dev) >= 0: | ||
93 | show_hunk = 1 | ||
94 | break | ||
95 | else: | ||
96 | show_hunk = 1 | ||
97 | if show_hunk == 0: | ||
98 | return | ||
99 | |||
100 | print "%d.%06dsec cpu=%d" % \ | ||
101 | (nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu) | ||
102 | for i in range(len(irq_list)): | ||
103 | print PF_IRQ_ENTRY % \ | ||
104 | (diff_msec(base_t, irq_list[i]['irq_ent_t']), | ||
105 | irq_list[i]['irq'], irq_list[i]['name']) | ||
106 | print PF_JOINT | ||
107 | irq_event_list = irq_list[i]['event_list'] | ||
108 | for j in range(len(irq_event_list)): | ||
109 | irq_event = irq_event_list[j] | ||
110 | if irq_event['event'] == 'netif_rx': | ||
111 | print PF_NET_RX % \ | ||
112 | (diff_msec(base_t, irq_event['time']), | ||
113 | irq_event['skbaddr']) | ||
114 | print PF_JOINT | ||
115 | print PF_SOFT_ENTRY % \ | ||
116 | diff_msec(base_t, hunk['sirq_ent_t']) | ||
117 | print PF_JOINT | ||
118 | event_list = hunk['event_list'] | ||
119 | for i in range(len(event_list)): | ||
120 | event = event_list[i] | ||
121 | if event['event_name'] == 'napi_poll': | ||
122 | print PF_NAPI_POLL % \ | ||
123 | (diff_msec(base_t, event['event_t']), event['dev']) | ||
124 | if i == len(event_list) - 1: | ||
125 | print "" | ||
126 | else: | ||
127 | print PF_JOINT | ||
128 | else: | ||
129 | print PF_NET_RECV % \ | ||
130 | (diff_msec(base_t, event['event_t']), event['skbaddr'], | ||
131 | event['len']) | ||
132 | if 'comm' in event.keys(): | ||
133 | print PF_WJOINT | ||
134 | print PF_CPY_DGRAM % \ | ||
135 | (diff_msec(base_t, event['comm_t']), | ||
136 | event['pid'], event['comm']) | ||
137 | elif 'handle' in event.keys(): | ||
138 | print PF_WJOINT | ||
139 | if event['handle'] == "kfree_skb": | ||
140 | print PF_KFREE_SKB % \ | ||
141 | (diff_msec(base_t, | ||
142 | event['comm_t']), | ||
143 | event['location']) | ||
144 | elif event['handle'] == "consume_skb": | ||
145 | print PF_CONS_SKB % \ | ||
146 | diff_msec(base_t, | ||
147 | event['comm_t']) | ||
148 | print PF_JOINT | ||
149 | |||
150 | def trace_begin(): | ||
151 | global show_tx | ||
152 | global show_rx | ||
153 | global dev | ||
154 | global debug | ||
155 | |||
156 | for i in range(len(sys.argv)): | ||
157 | if i == 0: | ||
158 | continue | ||
159 | arg = sys.argv[i] | ||
160 | if arg == 'tx': | ||
161 | show_tx = 1 | ||
162 | elif arg =='rx': | ||
163 | show_rx = 1 | ||
164 | elif arg.find('dev=',0, 4) >= 0: | ||
165 | dev = arg[4:] | ||
166 | elif arg == 'debug': | ||
167 | debug = 1 | ||
168 | if show_tx == 0 and show_rx == 0: | ||
169 | show_tx = 1 | ||
170 | show_rx = 1 | ||
171 | |||
172 | def trace_end(): | ||
173 | # order all events in time | ||
174 | all_event_list.sort(lambda a,b :cmp(a[EINFO_IDX_TIME], | ||
175 | b[EINFO_IDX_TIME])) | ||
176 | # process all events | ||
177 | for i in range(len(all_event_list)): | ||
178 | event_info = all_event_list[i] | ||
179 | name = event_info[EINFO_IDX_NAME] | ||
180 | if name == 'irq__softirq_exit': | ||
181 | handle_irq_softirq_exit(event_info) | ||
182 | elif name == 'irq__softirq_entry': | ||
183 | handle_irq_softirq_entry(event_info) | ||
184 | elif name == 'irq__softirq_raise': | ||
185 | handle_irq_softirq_raise(event_info) | ||
186 | elif name == 'irq__irq_handler_entry': | ||
187 | handle_irq_handler_entry(event_info) | ||
188 | elif name == 'irq__irq_handler_exit': | ||
189 | handle_irq_handler_exit(event_info) | ||
190 | elif name == 'napi__napi_poll': | ||
191 | handle_napi_poll(event_info) | ||
192 | elif name == 'net__netif_receive_skb': | ||
193 | handle_netif_receive_skb(event_info) | ||
194 | elif name == 'net__netif_rx': | ||
195 | handle_netif_rx(event_info) | ||
196 | elif name == 'skb__skb_copy_datagram_iovec': | ||
197 | handle_skb_copy_datagram_iovec(event_info) | ||
198 | elif name == 'net__net_dev_queue': | ||
199 | handle_net_dev_queue(event_info) | ||
200 | elif name == 'net__net_dev_xmit': | ||
201 | handle_net_dev_xmit(event_info) | ||
202 | elif name == 'skb__kfree_skb': | ||
203 | handle_kfree_skb(event_info) | ||
204 | elif name == 'skb__consume_skb': | ||
205 | handle_consume_skb(event_info) | ||
206 | # display receive hunks | ||
207 | if show_rx: | ||
208 | for i in range(len(receive_hunk_list)): | ||
209 | print_receive(receive_hunk_list[i]) | ||
210 | # display transmit hunks | ||
211 | if show_tx: | ||
212 | print " dev len Qdisc " \ | ||
213 | " netdevice free" | ||
214 | for i in range(len(tx_free_list)): | ||
215 | print_transmit(tx_free_list[i]) | ||
216 | if debug: | ||
217 | print "debug buffer status" | ||
218 | print "----------------------------" | ||
219 | print "xmit Qdisc:remain:%d overflow:%d" % \ | ||
220 | (len(tx_queue_list), of_count_tx_queue_list) | ||
221 | print "xmit netdevice:remain:%d overflow:%d" % \ | ||
222 | (len(tx_xmit_list), of_count_tx_xmit_list) | ||
223 | print "receive:remain:%d overflow:%d" % \ | ||
224 | (len(rx_skb_list), of_count_rx_skb_list) | ||
225 | |||
226 | # called from perf, when it finds a correspoinding event | ||
227 | def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, vec): | ||
228 | if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX": | ||
229 | return | ||
230 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec) | ||
231 | all_event_list.append(event_info) | ||
232 | |||
233 | def irq__softirq_exit(name, context, cpu, sec, nsec, pid, comm, vec): | ||
234 | if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX": | ||
235 | return | ||
236 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec) | ||
237 | all_event_list.append(event_info) | ||
238 | |||
239 | def irq__softirq_raise(name, context, cpu, sec, nsec, pid, comm, vec): | ||
240 | if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX": | ||
241 | return | ||
242 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec) | ||
243 | all_event_list.append(event_info) | ||
244 | |||
245 | def irq__irq_handler_entry(name, context, cpu, sec, nsec, pid, comm, | ||
246 | irq, irq_name): | ||
247 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, | ||
248 | irq, irq_name) | ||
249 | all_event_list.append(event_info) | ||
250 | |||
251 | def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, irq, ret): | ||
252 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret) | ||
253 | all_event_list.append(event_info) | ||
254 | |||
255 | def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, napi, dev_name): | ||
256 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, | ||
257 | napi, dev_name) | ||
258 | all_event_list.append(event_info) | ||
259 | |||
260 | def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr, | ||
261 | skblen, dev_name): | ||
262 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, | ||
263 | skbaddr, skblen, dev_name) | ||
264 | all_event_list.append(event_info) | ||
265 | |||
266 | def net__netif_rx(name, context, cpu, sec, nsec, pid, comm, skbaddr, | ||
267 | skblen, dev_name): | ||
268 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, | ||
269 | skbaddr, skblen, dev_name) | ||
270 | all_event_list.append(event_info) | ||
271 | |||
272 | def net__net_dev_queue(name, context, cpu, sec, nsec, pid, comm, | ||
273 | skbaddr, skblen, dev_name): | ||
274 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, | ||
275 | skbaddr, skblen, dev_name) | ||
276 | all_event_list.append(event_info) | ||
277 | |||
278 | def net__net_dev_xmit(name, context, cpu, sec, nsec, pid, comm, | ||
279 | skbaddr, skblen, rc, dev_name): | ||
280 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, | ||
281 | skbaddr, skblen, rc ,dev_name) | ||
282 | all_event_list.append(event_info) | ||
283 | |||
284 | def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, | ||
285 | skbaddr, protocol, location): | ||
286 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, | ||
287 | skbaddr, protocol, location) | ||
288 | all_event_list.append(event_info) | ||
289 | |||
290 | def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr): | ||
291 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, | ||
292 | skbaddr) | ||
293 | all_event_list.append(event_info) | ||
294 | |||
295 | def skb__skb_copy_datagram_iovec(name, context, cpu, sec, nsec, pid, comm, | ||
296 | skbaddr, skblen): | ||
297 | event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, | ||
298 | skbaddr, skblen) | ||
299 | all_event_list.append(event_info) | ||
300 | |||
301 | def handle_irq_handler_entry(event_info): | ||
302 | (name, context, cpu, time, pid, comm, irq, irq_name) = event_info | ||
303 | if cpu not in irq_dic.keys(): | ||
304 | irq_dic[cpu] = [] | ||
305 | irq_record = {'irq':irq, 'name':irq_name, 'cpu':cpu, 'irq_ent_t':time} | ||
306 | irq_dic[cpu].append(irq_record) | ||
307 | |||
308 | def handle_irq_handler_exit(event_info): | ||
309 | (name, context, cpu, time, pid, comm, irq, ret) = event_info | ||
310 | if cpu not in irq_dic.keys(): | ||
311 | return | ||
312 | irq_record = irq_dic[cpu].pop() | ||
313 | if irq != irq_record['irq']: | ||
314 | return | ||
315 | irq_record.update({'irq_ext_t':time}) | ||
316 | # if an irq doesn't include NET_RX softirq, drop. | ||
317 | if 'event_list' in irq_record.keys(): | ||
318 | irq_dic[cpu].append(irq_record) | ||
319 | |||
320 | def handle_irq_softirq_raise(event_info): | ||
321 | (name, context, cpu, time, pid, comm, vec) = event_info | ||
322 | if cpu not in irq_dic.keys() \ | ||
323 | or len(irq_dic[cpu]) == 0: | ||
324 | return | ||
325 | irq_record = irq_dic[cpu].pop() | ||
326 | if 'event_list' in irq_record.keys(): | ||
327 | irq_event_list = irq_record['event_list'] | ||
328 | else: | ||
329 | irq_event_list = [] | ||
330 | irq_event_list.append({'time':time, 'event':'sirq_raise'}) | ||
331 | irq_record.update({'event_list':irq_event_list}) | ||
332 | irq_dic[cpu].append(irq_record) | ||
333 | |||
334 | def handle_irq_softirq_entry(event_info): | ||
335 | (name, context, cpu, time, pid, comm, vec) = event_info | ||
336 | net_rx_dic[cpu] = {'sirq_ent_t':time, 'event_list':[]} | ||
337 | |||
338 | def handle_irq_softirq_exit(event_info): | ||
339 | (name, context, cpu, time, pid, comm, vec) = event_info | ||
340 | irq_list = [] | ||
341 | event_list = 0 | ||
342 | if cpu in irq_dic.keys(): | ||
343 | irq_list = irq_dic[cpu] | ||
344 | del irq_dic[cpu] | ||
345 | if cpu in net_rx_dic.keys(): | ||
346 | sirq_ent_t = net_rx_dic[cpu]['sirq_ent_t'] | ||
347 | event_list = net_rx_dic[cpu]['event_list'] | ||
348 | del net_rx_dic[cpu] | ||
349 | if irq_list == [] or event_list == 0: | ||
350 | return | ||
351 | rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time, | ||
352 | 'irq_list':irq_list, 'event_list':event_list} | ||
353 | # merge information realted to a NET_RX softirq | ||
354 | receive_hunk_list.append(rec_data) | ||
355 | |||
356 | def handle_napi_poll(event_info): | ||
357 | (name, context, cpu, time, pid, comm, napi, dev_name) = event_info | ||
358 | if cpu in net_rx_dic.keys(): | ||
359 | event_list = net_rx_dic[cpu]['event_list'] | ||
360 | rec_data = {'event_name':'napi_poll', | ||
361 | 'dev':dev_name, 'event_t':time} | ||
362 | event_list.append(rec_data) | ||
363 | |||
364 | def handle_netif_rx(event_info): | ||
365 | (name, context, cpu, time, pid, comm, | ||
366 | skbaddr, skblen, dev_name) = event_info | ||
367 | if cpu not in irq_dic.keys() \ | ||
368 | or len(irq_dic[cpu]) == 0: | ||
369 | return | ||
370 | irq_record = irq_dic[cpu].pop() | ||
371 | if 'event_list' in irq_record.keys(): | ||
372 | irq_event_list = irq_record['event_list'] | ||
373 | else: | ||
374 | irq_event_list = [] | ||
375 | irq_event_list.append({'time':time, 'event':'netif_rx', | ||
376 | 'skbaddr':skbaddr, 'skblen':skblen, 'dev_name':dev_name}) | ||
377 | irq_record.update({'event_list':irq_event_list}) | ||
378 | irq_dic[cpu].append(irq_record) | ||
379 | |||
380 | def handle_netif_receive_skb(event_info): | ||
381 | global of_count_rx_skb_list | ||
382 | |||
383 | (name, context, cpu, time, pid, comm, | ||
384 | skbaddr, skblen, dev_name) = event_info | ||
385 | if cpu in net_rx_dic.keys(): | ||
386 | rec_data = {'event_name':'netif_receive_skb', | ||
387 | 'event_t':time, 'skbaddr':skbaddr, 'len':skblen} | ||
388 | event_list = net_rx_dic[cpu]['event_list'] | ||
389 | event_list.append(rec_data) | ||
390 | rx_skb_list.insert(0, rec_data) | ||
391 | if len(rx_skb_list) > buffer_budget: | ||
392 | rx_skb_list.pop() | ||
393 | of_count_rx_skb_list += 1 | ||
394 | |||
395 | def handle_net_dev_queue(event_info): | ||
396 | global of_count_tx_queue_list | ||
397 | |||
398 | (name, context, cpu, time, pid, comm, | ||
399 | skbaddr, skblen, dev_name) = event_info | ||
400 | skb = {'dev':dev_name, 'skbaddr':skbaddr, 'len':skblen, 'queue_t':time} | ||
401 | tx_queue_list.insert(0, skb) | ||
402 | if len(tx_queue_list) > buffer_budget: | ||
403 | tx_queue_list.pop() | ||
404 | of_count_tx_queue_list += 1 | ||
405 | |||
406 | def handle_net_dev_xmit(event_info): | ||
407 | global of_count_tx_xmit_list | ||
408 | |||
409 | (name, context, cpu, time, pid, comm, | ||
410 | skbaddr, skblen, rc, dev_name) = event_info | ||
411 | if rc == 0: # NETDEV_TX_OK | ||
412 | for i in range(len(tx_queue_list)): | ||
413 | skb = tx_queue_list[i] | ||
414 | if skb['skbaddr'] == skbaddr: | ||
415 | skb['xmit_t'] = time | ||
416 | tx_xmit_list.insert(0, skb) | ||
417 | del tx_queue_list[i] | ||
418 | if len(tx_xmit_list) > buffer_budget: | ||
419 | tx_xmit_list.pop() | ||
420 | of_count_tx_xmit_list += 1 | ||
421 | return | ||
422 | |||
423 | def handle_kfree_skb(event_info): | ||
424 | (name, context, cpu, time, pid, comm, | ||
425 | skbaddr, protocol, location) = event_info | ||
426 | for i in range(len(tx_queue_list)): | ||
427 | skb = tx_queue_list[i] | ||
428 | if skb['skbaddr'] == skbaddr: | ||
429 | del tx_queue_list[i] | ||
430 | return | ||
431 | for i in range(len(tx_xmit_list)): | ||
432 | skb = tx_xmit_list[i] | ||
433 | if skb['skbaddr'] == skbaddr: | ||
434 | skb['free_t'] = time | ||
435 | tx_free_list.append(skb) | ||
436 | del tx_xmit_list[i] | ||
437 | return | ||
438 | for i in range(len(rx_skb_list)): | ||
439 | rec_data = rx_skb_list[i] | ||
440 | if rec_data['skbaddr'] == skbaddr: | ||
441 | rec_data.update({'handle':"kfree_skb", | ||
442 | 'comm':comm, 'pid':pid, 'comm_t':time}) | ||
443 | del rx_skb_list[i] | ||
444 | return | ||
445 | |||
446 | def handle_consume_skb(event_info): | ||
447 | (name, context, cpu, time, pid, comm, skbaddr) = event_info | ||
448 | for i in range(len(tx_xmit_list)): | ||
449 | skb = tx_xmit_list[i] | ||
450 | if skb['skbaddr'] == skbaddr: | ||
451 | skb['free_t'] = time | ||
452 | tx_free_list.append(skb) | ||
453 | del tx_xmit_list[i] | ||
454 | return | ||
455 | |||
456 | def handle_skb_copy_datagram_iovec(event_info): | ||
457 | (name, context, cpu, time, pid, comm, skbaddr, skblen) = event_info | ||
458 | for i in range(len(rx_skb_list)): | ||
459 | rec_data = rx_skb_list[i] | ||
460 | if skbaddr == rec_data['skbaddr']: | ||
461 | rec_data.update({'handle':"skb_copy_datagram_iovec", | ||
462 | 'comm':comm, 'pid':pid, 'comm_t':time}) | ||
463 | del rx_skb_list[i] | ||
464 | return | ||
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 27e9ebe4076e..a7729797fd96 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h | |||
@@ -82,6 +82,8 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2 | |||
82 | extern char *perf_pathdup(const char *fmt, ...) | 82 | extern char *perf_pathdup(const char *fmt, ...) |
83 | __attribute__((format (printf, 1, 2))); | 83 | __attribute__((format (printf, 1, 2))); |
84 | 84 | ||
85 | #ifdef NO_STRLCPY | ||
85 | extern size_t strlcpy(char *dest, const char *src, size_t size); | 86 | extern size_t strlcpy(char *dest, const char *src, size_t size); |
87 | #endif | ||
86 | 88 | ||
87 | #endif /* __PERF_CACHE_H */ | 89 | #endif /* __PERF_CACHE_H */ |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index f231f43424d2..e12d539417b2 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
@@ -28,6 +28,9 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event) | |||
28 | #define chain_for_each_child(child, parent) \ | 28 | #define chain_for_each_child(child, parent) \ |
29 | list_for_each_entry(child, &parent->children, brothers) | 29 | list_for_each_entry(child, &parent->children, brothers) |
30 | 30 | ||
31 | #define chain_for_each_child_safe(child, next, parent) \ | ||
32 | list_for_each_entry_safe(child, next, &parent->children, brothers) | ||
33 | |||
31 | static void | 34 | static void |
32 | rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, | 35 | rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, |
33 | enum chain_mode mode) | 36 | enum chain_mode mode) |
@@ -86,10 +89,10 @@ __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, | |||
86 | * sort them by hit | 89 | * sort them by hit |
87 | */ | 90 | */ |
88 | static void | 91 | static void |
89 | sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, | 92 | sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root, |
90 | u64 min_hit, struct callchain_param *param __used) | 93 | u64 min_hit, struct callchain_param *param __used) |
91 | { | 94 | { |
92 | __sort_chain_flat(rb_root, node, min_hit); | 95 | __sort_chain_flat(rb_root, &root->node, min_hit); |
93 | } | 96 | } |
94 | 97 | ||
95 | static void __sort_chain_graph_abs(struct callchain_node *node, | 98 | static void __sort_chain_graph_abs(struct callchain_node *node, |
@@ -108,11 +111,11 @@ static void __sort_chain_graph_abs(struct callchain_node *node, | |||
108 | } | 111 | } |
109 | 112 | ||
110 | static void | 113 | static void |
111 | sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_node *chain_root, | 114 | sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root, |
112 | u64 min_hit, struct callchain_param *param __used) | 115 | u64 min_hit, struct callchain_param *param __used) |
113 | { | 116 | { |
114 | __sort_chain_graph_abs(chain_root, min_hit); | 117 | __sort_chain_graph_abs(&chain_root->node, min_hit); |
115 | rb_root->rb_node = chain_root->rb_root.rb_node; | 118 | rb_root->rb_node = chain_root->node.rb_root.rb_node; |
116 | } | 119 | } |
117 | 120 | ||
118 | static void __sort_chain_graph_rel(struct callchain_node *node, | 121 | static void __sort_chain_graph_rel(struct callchain_node *node, |
@@ -133,11 +136,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node, | |||
133 | } | 136 | } |
134 | 137 | ||
135 | static void | 138 | static void |
136 | sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, | 139 | sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root, |
137 | u64 min_hit __used, struct callchain_param *param) | 140 | u64 min_hit __used, struct callchain_param *param) |
138 | { | 141 | { |
139 | __sort_chain_graph_rel(chain_root, param->min_percent / 100.0); | 142 | __sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0); |
140 | rb_root->rb_node = chain_root->rb_root.rb_node; | 143 | rb_root->rb_node = chain_root->node.rb_root.rb_node; |
141 | } | 144 | } |
142 | 145 | ||
143 | int register_callchain_param(struct callchain_param *param) | 146 | int register_callchain_param(struct callchain_param *param) |
@@ -284,19 +287,18 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain, | |||
284 | } | 287 | } |
285 | 288 | ||
286 | static int | 289 | static int |
287 | __append_chain(struct callchain_node *root, struct resolved_chain *chain, | 290 | append_chain(struct callchain_node *root, struct resolved_chain *chain, |
288 | unsigned int start, u64 period); | 291 | unsigned int start, u64 period); |
289 | 292 | ||
290 | static void | 293 | static void |
291 | __append_chain_children(struct callchain_node *root, | 294 | append_chain_children(struct callchain_node *root, struct resolved_chain *chain, |
292 | struct resolved_chain *chain, | 295 | unsigned int start, u64 period) |
293 | unsigned int start, u64 period) | ||
294 | { | 296 | { |
295 | struct callchain_node *rnode; | 297 | struct callchain_node *rnode; |
296 | 298 | ||
297 | /* lookup in childrens */ | 299 | /* lookup in childrens */ |
298 | chain_for_each_child(rnode, root) { | 300 | chain_for_each_child(rnode, root) { |
299 | unsigned int ret = __append_chain(rnode, chain, start, period); | 301 | unsigned int ret = append_chain(rnode, chain, start, period); |
300 | 302 | ||
301 | if (!ret) | 303 | if (!ret) |
302 | goto inc_children_hit; | 304 | goto inc_children_hit; |
@@ -309,8 +311,8 @@ inc_children_hit: | |||
309 | } | 311 | } |
310 | 312 | ||
311 | static int | 313 | static int |
312 | __append_chain(struct callchain_node *root, struct resolved_chain *chain, | 314 | append_chain(struct callchain_node *root, struct resolved_chain *chain, |
313 | unsigned int start, u64 period) | 315 | unsigned int start, u64 period) |
314 | { | 316 | { |
315 | struct callchain_list *cnode; | 317 | struct callchain_list *cnode; |
316 | unsigned int i = start; | 318 | unsigned int i = start; |
@@ -357,7 +359,7 @@ __append_chain(struct callchain_node *root, struct resolved_chain *chain, | |||
357 | } | 359 | } |
358 | 360 | ||
359 | /* We match the node and still have a part remaining */ | 361 | /* We match the node and still have a part remaining */ |
360 | __append_chain_children(root, chain, i, period); | 362 | append_chain_children(root, chain, i, period); |
361 | 363 | ||
362 | return 0; | 364 | return 0; |
363 | } | 365 | } |
@@ -380,8 +382,8 @@ static void filter_context(struct ip_callchain *old, struct resolved_chain *new, | |||
380 | } | 382 | } |
381 | 383 | ||
382 | 384 | ||
383 | int append_chain(struct callchain_node *root, struct ip_callchain *chain, | 385 | int callchain_append(struct callchain_root *root, struct ip_callchain *chain, |
384 | struct map_symbol *syms, u64 period) | 386 | struct map_symbol *syms, u64 period) |
385 | { | 387 | { |
386 | struct resolved_chain *filtered; | 388 | struct resolved_chain *filtered; |
387 | 389 | ||
@@ -398,9 +400,65 @@ int append_chain(struct callchain_node *root, struct ip_callchain *chain, | |||
398 | if (!filtered->nr) | 400 | if (!filtered->nr) |
399 | goto end; | 401 | goto end; |
400 | 402 | ||
401 | __append_chain_children(root, filtered, 0, period); | 403 | append_chain_children(&root->node, filtered, 0, period); |
404 | |||
405 | if (filtered->nr > root->max_depth) | ||
406 | root->max_depth = filtered->nr; | ||
402 | end: | 407 | end: |
403 | free(filtered); | 408 | free(filtered); |
404 | 409 | ||
405 | return 0; | 410 | return 0; |
406 | } | 411 | } |
412 | |||
413 | static int | ||
414 | merge_chain_branch(struct callchain_node *dst, struct callchain_node *src, | ||
415 | struct resolved_chain *chain) | ||
416 | { | ||
417 | struct callchain_node *child, *next_child; | ||
418 | struct callchain_list *list, *next_list; | ||
419 | int old_pos = chain->nr; | ||
420 | int err = 0; | ||
421 | |||
422 | list_for_each_entry_safe(list, next_list, &src->val, list) { | ||
423 | chain->ips[chain->nr].ip = list->ip; | ||
424 | chain->ips[chain->nr].ms = list->ms; | ||
425 | chain->nr++; | ||
426 | list_del(&list->list); | ||
427 | free(list); | ||
428 | } | ||
429 | |||
430 | if (src->hit) | ||
431 | append_chain_children(dst, chain, 0, src->hit); | ||
432 | |||
433 | chain_for_each_child_safe(child, next_child, src) { | ||
434 | err = merge_chain_branch(dst, child, chain); | ||
435 | if (err) | ||
436 | break; | ||
437 | |||
438 | list_del(&child->brothers); | ||
439 | free(child); | ||
440 | } | ||
441 | |||
442 | chain->nr = old_pos; | ||
443 | |||
444 | return err; | ||
445 | } | ||
446 | |||
447 | int callchain_merge(struct callchain_root *dst, struct callchain_root *src) | ||
448 | { | ||
449 | struct resolved_chain *chain; | ||
450 | int err; | ||
451 | |||
452 | chain = malloc(sizeof(*chain) + | ||
453 | src->max_depth * sizeof(struct resolved_ip)); | ||
454 | if (!chain) | ||
455 | return -ENOMEM; | ||
456 | |||
457 | chain->nr = 0; | ||
458 | |||
459 | err = merge_chain_branch(&dst->node, &src->node, chain); | ||
460 | |||
461 | free(chain); | ||
462 | |||
463 | return err; | ||
464 | } | ||
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 6de4313924fb..c15fb8c24ad2 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
@@ -26,9 +26,14 @@ struct callchain_node { | |||
26 | u64 children_hit; | 26 | u64 children_hit; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | struct callchain_root { | ||
30 | u64 max_depth; | ||
31 | struct callchain_node node; | ||
32 | }; | ||
33 | |||
29 | struct callchain_param; | 34 | struct callchain_param; |
30 | 35 | ||
31 | typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_node *, | 36 | typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *, |
32 | u64, struct callchain_param *); | 37 | u64, struct callchain_param *); |
33 | 38 | ||
34 | struct callchain_param { | 39 | struct callchain_param { |
@@ -44,15 +49,16 @@ struct callchain_list { | |||
44 | struct list_head list; | 49 | struct list_head list; |
45 | }; | 50 | }; |
46 | 51 | ||
47 | static inline void callchain_init(struct callchain_node *node) | 52 | static inline void callchain_init(struct callchain_root *root) |
48 | { | 53 | { |
49 | INIT_LIST_HEAD(&node->brothers); | 54 | INIT_LIST_HEAD(&root->node.brothers); |
50 | INIT_LIST_HEAD(&node->children); | 55 | INIT_LIST_HEAD(&root->node.children); |
51 | INIT_LIST_HEAD(&node->val); | 56 | INIT_LIST_HEAD(&root->node.val); |
52 | 57 | ||
53 | node->children_hit = 0; | 58 | root->node.parent = NULL; |
54 | node->parent = NULL; | 59 | root->node.hit = 0; |
55 | node->hit = 0; | 60 | root->node.children_hit = 0; |
61 | root->max_depth = 0; | ||
56 | } | 62 | } |
57 | 63 | ||
58 | static inline u64 cumul_hits(struct callchain_node *node) | 64 | static inline u64 cumul_hits(struct callchain_node *node) |
@@ -61,8 +67,9 @@ static inline u64 cumul_hits(struct callchain_node *node) | |||
61 | } | 67 | } |
62 | 68 | ||
63 | int register_callchain_param(struct callchain_param *param); | 69 | int register_callchain_param(struct callchain_param *param); |
64 | int append_chain(struct callchain_node *root, struct ip_callchain *chain, | 70 | int callchain_append(struct callchain_root *root, struct ip_callchain *chain, |
65 | struct map_symbol *syms, u64 period); | 71 | struct map_symbol *syms, u64 period); |
72 | int callchain_merge(struct callchain_root *dst, struct callchain_root *src); | ||
66 | 73 | ||
67 | bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event); | 74 | bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event); |
68 | #endif /* __PERF_CALLCHAIN_H */ | 75 | #endif /* __PERF_CALLCHAIN_H */ |
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index be22ae6ef055..2022e8740994 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -87,7 +87,7 @@ static void hist_entry__add_cpumode_period(struct hist_entry *self, | |||
87 | 87 | ||
88 | static struct hist_entry *hist_entry__new(struct hist_entry *template) | 88 | static struct hist_entry *hist_entry__new(struct hist_entry *template) |
89 | { | 89 | { |
90 | size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_node) : 0; | 90 | size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; |
91 | struct hist_entry *self = malloc(sizeof(*self) + callchain_size); | 91 | struct hist_entry *self = malloc(sizeof(*self) + callchain_size); |
92 | 92 | ||
93 | if (self != NULL) { | 93 | if (self != NULL) { |
@@ -226,6 +226,8 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he) | |||
226 | 226 | ||
227 | if (!cmp) { | 227 | if (!cmp) { |
228 | iter->period += he->period; | 228 | iter->period += he->period; |
229 | if (symbol_conf.use_callchain) | ||
230 | callchain_merge(iter->callchain, he->callchain); | ||
229 | hist_entry__free(he); | 231 | hist_entry__free(he); |
230 | return false; | 232 | return false; |
231 | } | 233 | } |
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 58a470d036dd..bd7497711424 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c | |||
@@ -22,6 +22,7 @@ static const char *get_perf_dir(void) | |||
22 | return "."; | 22 | return "."; |
23 | } | 23 | } |
24 | 24 | ||
25 | #ifdef NO_STRLCPY | ||
25 | size_t strlcpy(char *dest, const char *src, size_t size) | 26 | size_t strlcpy(char *dest, const char *src, size_t size) |
26 | { | 27 | { |
27 | size_t ret = strlen(src); | 28 | size_t ret = strlen(src); |
@@ -33,7 +34,7 @@ size_t strlcpy(char *dest, const char *src, size_t size) | |||
33 | } | 34 | } |
34 | return ret; | 35 | return ret; |
35 | } | 36 | } |
36 | 37 | #endif | |
37 | 38 | ||
38 | static char *get_pathname(void) | 39 | static char *get_pathname(void) |
39 | { | 40 | { |
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 46e531d09e8b..0b91053a7d11 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h | |||
@@ -70,7 +70,7 @@ struct hist_entry { | |||
70 | struct hist_entry *pair; | 70 | struct hist_entry *pair; |
71 | struct rb_root sorted_chain; | 71 | struct rb_root sorted_chain; |
72 | }; | 72 | }; |
73 | struct callchain_node callchain[0]; | 73 | struct callchain_root callchain[0]; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | enum sort_type { | 76 | enum sort_type { |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b2f5ae97f33d..b39f499e575a 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -388,6 +388,20 @@ size_t dso__fprintf_buildid(struct dso *self, FILE *fp) | |||
388 | return fprintf(fp, "%s", sbuild_id); | 388 | return fprintf(fp, "%s", sbuild_id); |
389 | } | 389 | } |
390 | 390 | ||
391 | size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp) | ||
392 | { | ||
393 | size_t ret = 0; | ||
394 | struct rb_node *nd; | ||
395 | struct symbol_name_rb_node *pos; | ||
396 | |||
397 | for (nd = rb_first(&self->symbol_names[type]); nd; nd = rb_next(nd)) { | ||
398 | pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); | ||
399 | fprintf(fp, "%s\n", pos->sym.name); | ||
400 | } | ||
401 | |||
402 | return ret; | ||
403 | } | ||
404 | |||
391 | size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) | 405 | size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) |
392 | { | 406 | { |
393 | struct rb_node *nd; | 407 | struct rb_node *nd; |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ea95c2756f05..038f2201ee09 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -182,6 +182,7 @@ size_t machines__fprintf_dsos(struct rb_root *self, FILE *fp); | |||
182 | size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits); | 182 | size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits); |
183 | 183 | ||
184 | size_t dso__fprintf_buildid(struct dso *self, FILE *fp); | 184 | size_t dso__fprintf_buildid(struct dso *self, FILE *fp); |
185 | size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp); | ||
185 | size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); | 186 | size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); |
186 | 187 | ||
187 | enum dso_origin { | 188 | enum dso_origin { |
diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c index 66f2d583d8c4..6d0df809a2ed 100644 --- a/tools/perf/util/ui/browser.c +++ b/tools/perf/util/ui/browser.c | |||
@@ -1,16 +1,6 @@ | |||
1 | #define _GNU_SOURCE | ||
2 | #include <stdio.h> | ||
3 | #undef _GNU_SOURCE | ||
4 | /* | ||
5 | * slang versions <= 2.0.6 have a "#if HAVE_LONG_LONG" that breaks | ||
6 | * the build if it isn't defined. Use the equivalent one that glibc | ||
7 | * has on features.h. | ||
8 | */ | ||
9 | #include <features.h> | ||
10 | #ifndef HAVE_LONG_LONG | ||
11 | #define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG | ||
12 | #endif | ||
13 | #include <slang.h> | 1 | #include <slang.h> |
2 | #include "libslang.h" | ||
3 | #include <linux/compiler.h> | ||
14 | #include <linux/list.h> | 4 | #include <linux/list.h> |
15 | #include <linux/rbtree.h> | 5 | #include <linux/rbtree.h> |
16 | #include <stdlib.h> | 6 | #include <stdlib.h> |
@@ -19,17 +9,9 @@ | |||
19 | #include "helpline.h" | 9 | #include "helpline.h" |
20 | #include "../color.h" | 10 | #include "../color.h" |
21 | #include "../util.h" | 11 | #include "../util.h" |
12 | #include <stdio.h> | ||
22 | 13 | ||
23 | #if SLANG_VERSION < 20104 | 14 | static int ui_browser__percent_color(double percent, bool current) |
24 | #define sltt_set_color(obj, name, fg, bg) \ | ||
25 | SLtt_set_color(obj,(char *)name, (char *)fg, (char *)bg) | ||
26 | #else | ||
27 | #define sltt_set_color SLtt_set_color | ||
28 | #endif | ||
29 | |||
30 | newtComponent newt_form__new(void); | ||
31 | |||
32 | int ui_browser__percent_color(double percent, bool current) | ||
33 | { | 15 | { |
34 | if (current) | 16 | if (current) |
35 | return HE_COLORSET_SELECTED; | 17 | return HE_COLORSET_SELECTED; |
@@ -40,6 +22,23 @@ int ui_browser__percent_color(double percent, bool current) | |||
40 | return HE_COLORSET_NORMAL; | 22 | return HE_COLORSET_NORMAL; |
41 | } | 23 | } |
42 | 24 | ||
25 | void ui_browser__set_color(struct ui_browser *self __used, int color) | ||
26 | { | ||
27 | SLsmg_set_color(color); | ||
28 | } | ||
29 | |||
30 | void ui_browser__set_percent_color(struct ui_browser *self, | ||
31 | double percent, bool current) | ||
32 | { | ||
33 | int color = ui_browser__percent_color(percent, current); | ||
34 | ui_browser__set_color(self, color); | ||
35 | } | ||
36 | |||
37 | void ui_browser__gotorc(struct ui_browser *self, int y, int x) | ||
38 | { | ||
39 | SLsmg_gotorc(self->y + y, self->x + x); | ||
40 | } | ||
41 | |||
43 | void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence) | 42 | void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence) |
44 | { | 43 | { |
45 | struct list_head *head = self->entries; | 44 | struct list_head *head = self->entries; |
@@ -111,7 +110,7 @@ unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self) | |||
111 | nd = self->top; | 110 | nd = self->top; |
112 | 111 | ||
113 | while (nd != NULL) { | 112 | while (nd != NULL) { |
114 | SLsmg_gotorc(self->y + row, self->x); | 113 | ui_browser__gotorc(self, row, 0); |
115 | self->write(self, nd, row); | 114 | self->write(self, nd, row); |
116 | if (++row == self->height) | 115 | if (++row == self->height) |
117 | break; | 116 | break; |
@@ -131,13 +130,10 @@ void ui_browser__refresh_dimensions(struct ui_browser *self) | |||
131 | int cols, rows; | 130 | int cols, rows; |
132 | newtGetScreenSize(&cols, &rows); | 131 | newtGetScreenSize(&cols, &rows); |
133 | 132 | ||
134 | if (self->width > cols - 4) | 133 | self->width = cols - 1; |
135 | self->width = cols - 4; | 134 | self->height = rows - 2; |
136 | self->height = rows - 5; | 135 | self->y = 1; |
137 | if (self->height > self->nr_entries) | 136 | self->x = 0; |
138 | self->height = self->nr_entries; | ||
139 | self->y = (rows - self->height) / 2; | ||
140 | self->x = (cols - self->width) / 2; | ||
141 | } | 137 | } |
142 | 138 | ||
143 | void ui_browser__reset_index(struct ui_browser *self) | 139 | void ui_browser__reset_index(struct ui_browser *self) |
@@ -146,34 +142,48 @@ void ui_browser__reset_index(struct ui_browser *self) | |||
146 | self->seek(self, 0, SEEK_SET); | 142 | self->seek(self, 0, SEEK_SET); |
147 | } | 143 | } |
148 | 144 | ||
145 | void ui_browser__add_exit_key(struct ui_browser *self, int key) | ||
146 | { | ||
147 | newtFormAddHotKey(self->form, key); | ||
148 | } | ||
149 | |||
150 | void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]) | ||
151 | { | ||
152 | int i = 0; | ||
153 | |||
154 | while (keys[i] && i < 64) { | ||
155 | ui_browser__add_exit_key(self, keys[i]); | ||
156 | ++i; | ||
157 | } | ||
158 | } | ||
159 | |||
149 | int ui_browser__show(struct ui_browser *self, const char *title, | 160 | int ui_browser__show(struct ui_browser *self, const char *title, |
150 | const char *helpline, ...) | 161 | const char *helpline, ...) |
151 | { | 162 | { |
152 | va_list ap; | 163 | va_list ap; |
164 | int keys[] = { NEWT_KEY_UP, NEWT_KEY_DOWN, NEWT_KEY_PGUP, | ||
165 | NEWT_KEY_PGDN, NEWT_KEY_HOME, NEWT_KEY_END, ' ', | ||
166 | NEWT_KEY_LEFT, NEWT_KEY_ESCAPE, 'q', CTRL('c'), 0 }; | ||
153 | 167 | ||
154 | if (self->form != NULL) { | 168 | if (self->form != NULL) |
155 | newtFormDestroy(self->form); | 169 | newtFormDestroy(self->form); |
156 | newtPopWindow(); | 170 | |
157 | } | ||
158 | ui_browser__refresh_dimensions(self); | 171 | ui_browser__refresh_dimensions(self); |
159 | newtCenteredWindow(self->width, self->height, title); | 172 | self->form = newtForm(NULL, NULL, 0); |
160 | self->form = newt_form__new(); | ||
161 | if (self->form == NULL) | 173 | if (self->form == NULL) |
162 | return -1; | 174 | return -1; |
163 | 175 | ||
164 | self->sb = newtVerticalScrollbar(self->width, 0, self->height, | 176 | self->sb = newtVerticalScrollbar(self->width, 1, self->height, |
165 | HE_COLORSET_NORMAL, | 177 | HE_COLORSET_NORMAL, |
166 | HE_COLORSET_SELECTED); | 178 | HE_COLORSET_SELECTED); |
167 | if (self->sb == NULL) | 179 | if (self->sb == NULL) |
168 | return -1; | 180 | return -1; |
169 | 181 | ||
170 | newtFormAddHotKey(self->form, NEWT_KEY_UP); | 182 | SLsmg_gotorc(0, 0); |
171 | newtFormAddHotKey(self->form, NEWT_KEY_DOWN); | 183 | ui_browser__set_color(self, NEWT_COLORSET_ROOT); |
172 | newtFormAddHotKey(self->form, NEWT_KEY_PGUP); | 184 | slsmg_write_nstring(title, self->width); |
173 | newtFormAddHotKey(self->form, NEWT_KEY_PGDN); | 185 | |
174 | newtFormAddHotKey(self->form, NEWT_KEY_HOME); | 186 | ui_browser__add_exit_keys(self, keys); |
175 | newtFormAddHotKey(self->form, NEWT_KEY_END); | ||
176 | newtFormAddHotKey(self->form, ' '); | ||
177 | newtFormAddComponent(self->form, self->sb); | 187 | newtFormAddComponent(self->form, self->sb); |
178 | 188 | ||
179 | va_start(ap, helpline); | 189 | va_start(ap, helpline); |
@@ -185,7 +195,6 @@ int ui_browser__show(struct ui_browser *self, const char *title, | |||
185 | void ui_browser__hide(struct ui_browser *self) | 195 | void ui_browser__hide(struct ui_browser *self) |
186 | { | 196 | { |
187 | newtFormDestroy(self->form); | 197 | newtFormDestroy(self->form); |
188 | newtPopWindow(); | ||
189 | self->form = NULL; | 198 | self->form = NULL; |
190 | ui_helpline__pop(); | 199 | ui_helpline__pop(); |
191 | } | 200 | } |
@@ -196,28 +205,28 @@ int ui_browser__refresh(struct ui_browser *self) | |||
196 | 205 | ||
197 | newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); | 206 | newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); |
198 | row = self->refresh(self); | 207 | row = self->refresh(self); |
199 | SLsmg_set_color(HE_COLORSET_NORMAL); | 208 | ui_browser__set_color(self, HE_COLORSET_NORMAL); |
200 | SLsmg_fill_region(self->y + row, self->x, | 209 | SLsmg_fill_region(self->y + row, self->x, |
201 | self->height - row, self->width, ' '); | 210 | self->height - row, self->width, ' '); |
202 | 211 | ||
203 | return 0; | 212 | return 0; |
204 | } | 213 | } |
205 | 214 | ||
206 | int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es) | 215 | int ui_browser__run(struct ui_browser *self) |
207 | { | 216 | { |
217 | struct newtExitStruct es; | ||
218 | |||
208 | if (ui_browser__refresh(self) < 0) | 219 | if (ui_browser__refresh(self) < 0) |
209 | return -1; | 220 | return -1; |
210 | 221 | ||
211 | while (1) { | 222 | while (1) { |
212 | off_t offset; | 223 | off_t offset; |
213 | 224 | ||
214 | newtFormRun(self->form, es); | 225 | newtFormRun(self->form, &es); |
215 | 226 | ||
216 | if (es->reason != NEWT_EXIT_HOTKEY) | 227 | if (es.reason != NEWT_EXIT_HOTKEY) |
217 | break; | 228 | break; |
218 | if (is_exit_key(es->u.key)) | 229 | switch (es.u.key) { |
219 | return es->u.key; | ||
220 | switch (es->u.key) { | ||
221 | case NEWT_KEY_DOWN: | 230 | case NEWT_KEY_DOWN: |
222 | if (self->index == self->nr_entries - 1) | 231 | if (self->index == self->nr_entries - 1) |
223 | break; | 232 | break; |
@@ -274,12 +283,12 @@ int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es) | |||
274 | self->seek(self, -offset, SEEK_END); | 283 | self->seek(self, -offset, SEEK_END); |
275 | break; | 284 | break; |
276 | default: | 285 | default: |
277 | return es->u.key; | 286 | return es.u.key; |
278 | } | 287 | } |
279 | if (ui_browser__refresh(self) < 0) | 288 | if (ui_browser__refresh(self) < 0) |
280 | return -1; | 289 | return -1; |
281 | } | 290 | } |
282 | return 0; | 291 | return -1; |
283 | } | 292 | } |
284 | 293 | ||
285 | unsigned int ui_browser__list_head_refresh(struct ui_browser *self) | 294 | unsigned int ui_browser__list_head_refresh(struct ui_browser *self) |
@@ -294,7 +303,7 @@ unsigned int ui_browser__list_head_refresh(struct ui_browser *self) | |||
294 | pos = self->top; | 303 | pos = self->top; |
295 | 304 | ||
296 | list_for_each_from(pos, head) { | 305 | list_for_each_from(pos, head) { |
297 | SLsmg_gotorc(self->y + row, self->x); | 306 | ui_browser__gotorc(self, row, 0); |
298 | self->write(self, pos, row); | 307 | self->write(self, pos, row); |
299 | if (++row == self->height) | 308 | if (++row == self->height) |
300 | break; | 309 | break; |
diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h index 0b9f829214f7..0dc7e4da36f5 100644 --- a/tools/perf/util/ui/browser.h +++ b/tools/perf/util/ui/browser.h | |||
@@ -25,16 +25,21 @@ struct ui_browser { | |||
25 | }; | 25 | }; |
26 | 26 | ||
27 | 27 | ||
28 | int ui_browser__percent_color(double percent, bool current); | 28 | void ui_browser__set_color(struct ui_browser *self, int color); |
29 | void ui_browser__set_percent_color(struct ui_browser *self, | ||
30 | double percent, bool current); | ||
29 | bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row); | 31 | bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row); |
30 | void ui_browser__refresh_dimensions(struct ui_browser *self); | 32 | void ui_browser__refresh_dimensions(struct ui_browser *self); |
31 | void ui_browser__reset_index(struct ui_browser *self); | 33 | void ui_browser__reset_index(struct ui_browser *self); |
32 | 34 | ||
35 | void ui_browser__gotorc(struct ui_browser *self, int y, int x); | ||
36 | void ui_browser__add_exit_key(struct ui_browser *self, int key); | ||
37 | void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]); | ||
33 | int ui_browser__show(struct ui_browser *self, const char *title, | 38 | int ui_browser__show(struct ui_browser *self, const char *title, |
34 | const char *helpline, ...); | 39 | const char *helpline, ...); |
35 | void ui_browser__hide(struct ui_browser *self); | 40 | void ui_browser__hide(struct ui_browser *self); |
36 | int ui_browser__refresh(struct ui_browser *self); | 41 | int ui_browser__refresh(struct ui_browser *self); |
37 | int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es); | 42 | int ui_browser__run(struct ui_browser *self); |
38 | 43 | ||
39 | void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence); | 44 | void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence); |
40 | unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self); | 45 | unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self); |
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index a90273e63f4f..82b78f99251b 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c | |||
@@ -40,14 +40,12 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro | |||
40 | 40 | ||
41 | if (ol->offset != -1) { | 41 | if (ol->offset != -1) { |
42 | struct objdump_line_rb_node *olrb = objdump_line__rb(ol); | 42 | struct objdump_line_rb_node *olrb = objdump_line__rb(ol); |
43 | int color = ui_browser__percent_color(olrb->percent, current_entry); | 43 | ui_browser__set_percent_color(self, olrb->percent, current_entry); |
44 | SLsmg_set_color(color); | ||
45 | slsmg_printf(" %7.2f ", olrb->percent); | 44 | slsmg_printf(" %7.2f ", olrb->percent); |
46 | if (!current_entry) | 45 | if (!current_entry) |
47 | SLsmg_set_color(HE_COLORSET_CODE); | 46 | ui_browser__set_color(self, HE_COLORSET_CODE); |
48 | } else { | 47 | } else { |
49 | int color = ui_browser__percent_color(0, current_entry); | 48 | ui_browser__set_percent_color(self, 0, current_entry); |
50 | SLsmg_set_color(color); | ||
51 | slsmg_write_nstring(" ", 9); | 49 | slsmg_write_nstring(" ", 9); |
52 | } | 50 | } |
53 | 51 | ||
@@ -135,32 +133,31 @@ static void annotate_browser__set_top(struct annotate_browser *self, | |||
135 | self->curr_hot = nd; | 133 | self->curr_hot = nd; |
136 | } | 134 | } |
137 | 135 | ||
138 | static int annotate_browser__run(struct annotate_browser *self, | 136 | static int annotate_browser__run(struct annotate_browser *self) |
139 | struct newtExitStruct *es) | ||
140 | { | 137 | { |
141 | struct rb_node *nd; | 138 | struct rb_node *nd; |
142 | struct hist_entry *he = self->b.priv; | 139 | struct hist_entry *he = self->b.priv; |
140 | int key; | ||
143 | 141 | ||
144 | if (ui_browser__show(&self->b, he->ms.sym->name, | 142 | if (ui_browser__show(&self->b, he->ms.sym->name, |
145 | "<- or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) | 143 | "<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) |
146 | return -1; | 144 | return -1; |
147 | 145 | /* | |
148 | newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); | 146 | * To allow builtin-annotate to cycle thru multiple symbols by |
149 | newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT); | 147 | * examining the exit key for this function. |
148 | */ | ||
149 | ui_browser__add_exit_key(&self->b, NEWT_KEY_RIGHT); | ||
150 | 150 | ||
151 | nd = self->curr_hot; | 151 | nd = self->curr_hot; |
152 | if (nd) { | 152 | if (nd) { |
153 | newtFormAddHotKey(self->b.form, NEWT_KEY_TAB); | 153 | int tabs[] = { NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0 }; |
154 | newtFormAddHotKey(self->b.form, NEWT_KEY_UNTAB); | 154 | ui_browser__add_exit_keys(&self->b, tabs); |
155 | } | 155 | } |
156 | 156 | ||
157 | while (1) { | 157 | while (1) { |
158 | ui_browser__run(&self->b, es); | 158 | key = ui_browser__run(&self->b); |
159 | |||
160 | if (es->reason != NEWT_EXIT_HOTKEY) | ||
161 | break; | ||
162 | 159 | ||
163 | switch (es->u.key) { | 160 | switch (key) { |
164 | case NEWT_KEY_TAB: | 161 | case NEWT_KEY_TAB: |
165 | nd = rb_prev(nd); | 162 | nd = rb_prev(nd); |
166 | if (nd == NULL) | 163 | if (nd == NULL) |
@@ -179,12 +176,11 @@ static int annotate_browser__run(struct annotate_browser *self, | |||
179 | } | 176 | } |
180 | out: | 177 | out: |
181 | ui_browser__hide(&self->b); | 178 | ui_browser__hide(&self->b); |
182 | return es->u.key; | 179 | return key; |
183 | } | 180 | } |
184 | 181 | ||
185 | int hist_entry__tui_annotate(struct hist_entry *self) | 182 | int hist_entry__tui_annotate(struct hist_entry *self) |
186 | { | 183 | { |
187 | struct newtExitStruct es; | ||
188 | struct objdump_line *pos, *n; | 184 | struct objdump_line *pos, *n; |
189 | struct objdump_line_rb_node *rbpos; | 185 | struct objdump_line_rb_node *rbpos; |
190 | LIST_HEAD(head); | 186 | LIST_HEAD(head); |
@@ -232,7 +228,7 @@ int hist_entry__tui_annotate(struct hist_entry *self) | |||
232 | annotate_browser__set_top(&browser, browser.curr_hot); | 228 | annotate_browser__set_top(&browser, browser.curr_hot); |
233 | 229 | ||
234 | browser.b.width += 18; /* Percentage */ | 230 | browser.b.width += 18; /* Percentage */ |
235 | ret = annotate_browser__run(&browser, &es); | 231 | ret = annotate_browser__run(&browser); |
236 | list_for_each_entry_safe(pos, n, &head, node) { | 232 | list_for_each_entry_safe(pos, n, &head, node) { |
237 | list_del(&pos->node); | 233 | list_del(&pos->node); |
238 | objdump_line__free(pos); | 234 | objdump_line__free(pos); |
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index 6866aa4c41e0..ebda8c3fde9e 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c | |||
@@ -58,6 +58,11 @@ static char callchain_list__folded(const struct callchain_list *self) | |||
58 | return map_symbol__folded(&self->ms); | 58 | return map_symbol__folded(&self->ms); |
59 | } | 59 | } |
60 | 60 | ||
61 | static void map_symbol__set_folding(struct map_symbol *self, bool unfold) | ||
62 | { | ||
63 | self->unfolded = unfold ? self->has_children : false; | ||
64 | } | ||
65 | |||
61 | static int callchain_node__count_rows_rb_tree(struct callchain_node *self) | 66 | static int callchain_node__count_rows_rb_tree(struct callchain_node *self) |
62 | { | 67 | { |
63 | int n = 0; | 68 | int n = 0; |
@@ -129,16 +134,16 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *se | |||
129 | for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { | 134 | for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { |
130 | struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); | 135 | struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); |
131 | struct callchain_list *chain; | 136 | struct callchain_list *chain; |
132 | int first = true; | 137 | bool first = true; |
133 | 138 | ||
134 | list_for_each_entry(chain, &child->val, list) { | 139 | list_for_each_entry(chain, &child->val, list) { |
135 | if (first) { | 140 | if (first) { |
136 | first = false; | 141 | first = false; |
137 | chain->ms.has_children = chain->list.next != &child->val || | 142 | chain->ms.has_children = chain->list.next != &child->val || |
138 | rb_first(&child->rb_root) != NULL; | 143 | !RB_EMPTY_ROOT(&child->rb_root); |
139 | } else | 144 | } else |
140 | chain->ms.has_children = chain->list.next == &child->val && | 145 | chain->ms.has_children = chain->list.next == &child->val && |
141 | rb_first(&child->rb_root) != NULL; | 146 | !RB_EMPTY_ROOT(&child->rb_root); |
142 | } | 147 | } |
143 | 148 | ||
144 | callchain_node__init_have_children_rb_tree(child); | 149 | callchain_node__init_have_children_rb_tree(child); |
@@ -150,7 +155,7 @@ static void callchain_node__init_have_children(struct callchain_node *self) | |||
150 | struct callchain_list *chain; | 155 | struct callchain_list *chain; |
151 | 156 | ||
152 | list_for_each_entry(chain, &self->val, list) | 157 | list_for_each_entry(chain, &self->val, list) |
153 | chain->ms.has_children = rb_first(&self->rb_root) != NULL; | 158 | chain->ms.has_children = !RB_EMPTY_ROOT(&self->rb_root); |
154 | 159 | ||
155 | callchain_node__init_have_children_rb_tree(self); | 160 | callchain_node__init_have_children_rb_tree(self); |
156 | } | 161 | } |
@@ -168,6 +173,7 @@ static void callchain__init_have_children(struct rb_root *self) | |||
168 | static void hist_entry__init_have_children(struct hist_entry *self) | 173 | static void hist_entry__init_have_children(struct hist_entry *self) |
169 | { | 174 | { |
170 | if (!self->init_have_children) { | 175 | if (!self->init_have_children) { |
176 | self->ms.has_children = !RB_EMPTY_ROOT(&self->sorted_chain); | ||
171 | callchain__init_have_children(&self->sorted_chain); | 177 | callchain__init_have_children(&self->sorted_chain); |
172 | self->init_have_children = true; | 178 | self->init_have_children = true; |
173 | } | 179 | } |
@@ -195,43 +201,114 @@ static bool hist_browser__toggle_fold(struct hist_browser *self) | |||
195 | return false; | 201 | return false; |
196 | } | 202 | } |
197 | 203 | ||
198 | static int hist_browser__run(struct hist_browser *self, const char *title, | 204 | static int callchain_node__set_folding_rb_tree(struct callchain_node *self, bool unfold) |
199 | struct newtExitStruct *es) | 205 | { |
206 | int n = 0; | ||
207 | struct rb_node *nd; | ||
208 | |||
209 | for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { | ||
210 | struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); | ||
211 | struct callchain_list *chain; | ||
212 | bool has_children = false; | ||
213 | |||
214 | list_for_each_entry(chain, &child->val, list) { | ||
215 | ++n; | ||
216 | map_symbol__set_folding(&chain->ms, unfold); | ||
217 | has_children = chain->ms.has_children; | ||
218 | } | ||
219 | |||
220 | if (has_children) | ||
221 | n += callchain_node__set_folding_rb_tree(child, unfold); | ||
222 | } | ||
223 | |||
224 | return n; | ||
225 | } | ||
226 | |||
227 | static int callchain_node__set_folding(struct callchain_node *node, bool unfold) | ||
228 | { | ||
229 | struct callchain_list *chain; | ||
230 | bool has_children = false; | ||
231 | int n = 0; | ||
232 | |||
233 | list_for_each_entry(chain, &node->val, list) { | ||
234 | ++n; | ||
235 | map_symbol__set_folding(&chain->ms, unfold); | ||
236 | has_children = chain->ms.has_children; | ||
237 | } | ||
238 | |||
239 | if (has_children) | ||
240 | n += callchain_node__set_folding_rb_tree(node, unfold); | ||
241 | |||
242 | return n; | ||
243 | } | ||
244 | |||
245 | static int callchain__set_folding(struct rb_root *chain, bool unfold) | ||
246 | { | ||
247 | struct rb_node *nd; | ||
248 | int n = 0; | ||
249 | |||
250 | for (nd = rb_first(chain); nd; nd = rb_next(nd)) { | ||
251 | struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); | ||
252 | n += callchain_node__set_folding(node, unfold); | ||
253 | } | ||
254 | |||
255 | return n; | ||
256 | } | ||
257 | |||
258 | static void hist_entry__set_folding(struct hist_entry *self, bool unfold) | ||
259 | { | ||
260 | hist_entry__init_have_children(self); | ||
261 | map_symbol__set_folding(&self->ms, unfold); | ||
262 | |||
263 | if (self->ms.has_children) { | ||
264 | int n = callchain__set_folding(&self->sorted_chain, unfold); | ||
265 | self->nr_rows = unfold ? n : 0; | ||
266 | } else | ||
267 | self->nr_rows = 0; | ||
268 | } | ||
269 | |||
270 | static void hists__set_folding(struct hists *self, bool unfold) | ||
271 | { | ||
272 | struct rb_node *nd; | ||
273 | |||
274 | self->nr_entries = 0; | ||
275 | |||
276 | for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { | ||
277 | struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); | ||
278 | hist_entry__set_folding(he, unfold); | ||
279 | self->nr_entries += 1 + he->nr_rows; | ||
280 | } | ||
281 | } | ||
282 | |||
283 | static void hist_browser__set_folding(struct hist_browser *self, bool unfold) | ||
284 | { | ||
285 | hists__set_folding(self->hists, unfold); | ||
286 | self->b.nr_entries = self->hists->nr_entries; | ||
287 | /* Go to the start, we may be way after valid entries after a collapse */ | ||
288 | ui_browser__reset_index(&self->b); | ||
289 | } | ||
290 | |||
291 | static int hist_browser__run(struct hist_browser *self, const char *title) | ||
200 | { | 292 | { |
201 | char str[256], unit; | 293 | int key; |
202 | unsigned long nr_events = self->hists->stats.nr_events[PERF_RECORD_SAMPLE]; | 294 | int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't', |
295 | NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, 0, }; | ||
203 | 296 | ||
204 | self->b.entries = &self->hists->entries; | 297 | self->b.entries = &self->hists->entries; |
205 | self->b.nr_entries = self->hists->nr_entries; | 298 | self->b.nr_entries = self->hists->nr_entries; |
206 | 299 | ||
207 | hist_browser__refresh_dimensions(self); | 300 | hist_browser__refresh_dimensions(self); |
208 | 301 | ||
209 | nr_events = convert_unit(nr_events, &unit); | ||
210 | snprintf(str, sizeof(str), "Events: %lu%c ", | ||
211 | nr_events, unit); | ||
212 | newtDrawRootText(0, 0, str); | ||
213 | |||
214 | if (ui_browser__show(&self->b, title, | 302 | if (ui_browser__show(&self->b, title, |
215 | "Press '?' for help on key bindings") < 0) | 303 | "Press '?' for help on key bindings") < 0) |
216 | return -1; | 304 | return -1; |
217 | 305 | ||
218 | newtFormAddHotKey(self->b.form, 'a'); | 306 | ui_browser__add_exit_keys(&self->b, exit_keys); |
219 | newtFormAddHotKey(self->b.form, '?'); | ||
220 | newtFormAddHotKey(self->b.form, 'h'); | ||
221 | newtFormAddHotKey(self->b.form, 'd'); | ||
222 | newtFormAddHotKey(self->b.form, 'D'); | ||
223 | newtFormAddHotKey(self->b.form, 't'); | ||
224 | |||
225 | newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); | ||
226 | newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT); | ||
227 | newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER); | ||
228 | 307 | ||
229 | while (1) { | 308 | while (1) { |
230 | ui_browser__run(&self->b, es); | 309 | key = ui_browser__run(&self->b); |
231 | 310 | ||
232 | if (es->reason != NEWT_EXIT_HOTKEY) | 311 | switch (key) { |
233 | break; | ||
234 | switch (es->u.key) { | ||
235 | case 'D': { /* Debug */ | 312 | case 'D': { /* Debug */ |
236 | static int seq; | 313 | static int seq; |
237 | struct hist_entry *h = rb_entry(self->b.top, | 314 | struct hist_entry *h = rb_entry(self->b.top, |
@@ -245,18 +322,26 @@ static int hist_browser__run(struct hist_browser *self, const char *title, | |||
245 | self->b.top_idx, | 322 | self->b.top_idx, |
246 | h->row_offset, h->nr_rows); | 323 | h->row_offset, h->nr_rows); |
247 | } | 324 | } |
248 | continue; | 325 | break; |
326 | case 'C': | ||
327 | /* Collapse the whole world. */ | ||
328 | hist_browser__set_folding(self, false); | ||
329 | break; | ||
330 | case 'E': | ||
331 | /* Expand the whole world. */ | ||
332 | hist_browser__set_folding(self, true); | ||
333 | break; | ||
249 | case NEWT_KEY_ENTER: | 334 | case NEWT_KEY_ENTER: |
250 | if (hist_browser__toggle_fold(self)) | 335 | if (hist_browser__toggle_fold(self)) |
251 | break; | 336 | break; |
252 | /* fall thru */ | 337 | /* fall thru */ |
253 | default: | 338 | default: |
254 | return 0; | 339 | goto out; |
255 | } | 340 | } |
256 | } | 341 | } |
257 | 342 | out: | |
258 | ui_browser__hide(&self->b); | 343 | ui_browser__hide(&self->b); |
259 | return 0; | 344 | return key; |
260 | } | 345 | } |
261 | 346 | ||
262 | static char *callchain_list__sym_name(struct callchain_list *self, | 347 | static char *callchain_list__sym_name(struct callchain_list *self, |
@@ -306,15 +391,10 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self, | |||
306 | int color; | 391 | int color; |
307 | bool was_first = first; | 392 | bool was_first = first; |
308 | 393 | ||
309 | if (first) { | 394 | if (first) |
310 | first = false; | 395 | first = false; |
311 | chain->ms.has_children = chain->list.next != &child->val || | 396 | else |
312 | rb_first(&child->rb_root) != NULL; | ||
313 | } else { | ||
314 | extra_offset = LEVEL_OFFSET_STEP; | 397 | extra_offset = LEVEL_OFFSET_STEP; |
315 | chain->ms.has_children = chain->list.next == &child->val && | ||
316 | rb_first(&child->rb_root) != NULL; | ||
317 | } | ||
318 | 398 | ||
319 | folded_sign = callchain_list__folded(chain); | 399 | folded_sign = callchain_list__folded(chain); |
320 | if (*row_offset != 0) { | 400 | if (*row_offset != 0) { |
@@ -341,8 +421,8 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self, | |||
341 | *is_current_entry = true; | 421 | *is_current_entry = true; |
342 | } | 422 | } |
343 | 423 | ||
344 | SLsmg_set_color(color); | 424 | ui_browser__set_color(&self->b, color); |
345 | SLsmg_gotorc(self->b.y + row, self->b.x); | 425 | ui_browser__gotorc(&self->b, row, 0); |
346 | slsmg_write_nstring(" ", offset + extra_offset); | 426 | slsmg_write_nstring(" ", offset + extra_offset); |
347 | slsmg_printf("%c ", folded_sign); | 427 | slsmg_printf("%c ", folded_sign); |
348 | slsmg_write_nstring(str, width); | 428 | slsmg_write_nstring(str, width); |
@@ -384,12 +464,7 @@ static int hist_browser__show_callchain_node(struct hist_browser *self, | |||
384 | list_for_each_entry(chain, &node->val, list) { | 464 | list_for_each_entry(chain, &node->val, list) { |
385 | char ipstr[BITS_PER_LONG / 4 + 1], *s; | 465 | char ipstr[BITS_PER_LONG / 4 + 1], *s; |
386 | int color; | 466 | int color; |
387 | /* | 467 | |
388 | * FIXME: This should be moved to somewhere else, | ||
389 | * probably when the callchain is created, so as not to | ||
390 | * traverse it all over again | ||
391 | */ | ||
392 | chain->ms.has_children = rb_first(&node->rb_root) != NULL; | ||
393 | folded_sign = callchain_list__folded(chain); | 468 | folded_sign = callchain_list__folded(chain); |
394 | 469 | ||
395 | if (*row_offset != 0) { | 470 | if (*row_offset != 0) { |
@@ -405,8 +480,8 @@ static int hist_browser__show_callchain_node(struct hist_browser *self, | |||
405 | } | 480 | } |
406 | 481 | ||
407 | s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); | 482 | s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); |
408 | SLsmg_gotorc(self->b.y + row, self->b.x); | 483 | ui_browser__gotorc(&self->b, row, 0); |
409 | SLsmg_set_color(color); | 484 | ui_browser__set_color(&self->b, color); |
410 | slsmg_write_nstring(" ", offset); | 485 | slsmg_write_nstring(" ", offset); |
411 | slsmg_printf("%c ", folded_sign); | 486 | slsmg_printf("%c ", folded_sign); |
412 | slsmg_write_nstring(s, width - 2); | 487 | slsmg_write_nstring(s, width - 2); |
@@ -465,7 +540,7 @@ static int hist_browser__show_entry(struct hist_browser *self, | |||
465 | } | 540 | } |
466 | 541 | ||
467 | if (symbol_conf.use_callchain) { | 542 | if (symbol_conf.use_callchain) { |
468 | entry->ms.has_children = !RB_EMPTY_ROOT(&entry->sorted_chain); | 543 | hist_entry__init_have_children(entry); |
469 | folded_sign = hist_entry__folded(entry); | 544 | folded_sign = hist_entry__folded(entry); |
470 | } | 545 | } |
471 | 546 | ||
@@ -484,8 +559,8 @@ static int hist_browser__show_entry(struct hist_browser *self, | |||
484 | color = HE_COLORSET_NORMAL; | 559 | color = HE_COLORSET_NORMAL; |
485 | } | 560 | } |
486 | 561 | ||
487 | SLsmg_set_color(color); | 562 | ui_browser__set_color(&self->b, color); |
488 | SLsmg_gotorc(self->b.y + row, self->b.x); | 563 | ui_browser__gotorc(&self->b, row, 0); |
489 | if (symbol_conf.use_callchain) { | 564 | if (symbol_conf.use_callchain) { |
490 | slsmg_printf("%c ", folded_sign); | 565 | slsmg_printf("%c ", folded_sign); |
491 | width -= 2; | 566 | width -= 2; |
@@ -687,8 +762,6 @@ static struct hist_browser *hist_browser__new(struct hists *hists) | |||
687 | 762 | ||
688 | static void hist_browser__delete(struct hist_browser *self) | 763 | static void hist_browser__delete(struct hist_browser *self) |
689 | { | 764 | { |
690 | newtFormDestroy(self->b.form); | ||
691 | newtPopWindow(); | ||
692 | free(self); | 765 | free(self); |
693 | } | 766 | } |
694 | 767 | ||
@@ -702,21 +775,26 @@ static struct thread *hist_browser__selected_thread(struct hist_browser *self) | |||
702 | return self->he_selection->thread; | 775 | return self->he_selection->thread; |
703 | } | 776 | } |
704 | 777 | ||
705 | static int hist_browser__title(char *bf, size_t size, const char *ev_name, | 778 | static int hists__browser_title(struct hists *self, char *bf, size_t size, |
706 | const struct dso *dso, const struct thread *thread) | 779 | const char *ev_name, const struct dso *dso, |
780 | const struct thread *thread) | ||
707 | { | 781 | { |
708 | int printed = 0; | 782 | char unit; |
783 | int printed; | ||
784 | unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE]; | ||
785 | |||
786 | nr_events = convert_unit(nr_events, &unit); | ||
787 | printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); | ||
709 | 788 | ||
710 | if (thread) | 789 | if (thread) |
711 | printed += snprintf(bf + printed, size - printed, | 790 | printed += snprintf(bf + printed, size - printed, |
712 | "Thread: %s(%d)", | 791 | ", Thread: %s(%d)", |
713 | (thread->comm_set ? thread->comm : ""), | 792 | (thread->comm_set ? thread->comm : ""), |
714 | thread->pid); | 793 | thread->pid); |
715 | if (dso) | 794 | if (dso) |
716 | printed += snprintf(bf + printed, size - printed, | 795 | printed += snprintf(bf + printed, size - printed, |
717 | "%sDSO: %s", thread ? " " : "", | 796 | ", DSO: %s", dso->short_name); |
718 | dso->short_name); | 797 | return printed; |
719 | return printed ?: snprintf(bf, size, "Event: %s", ev_name); | ||
720 | } | 798 | } |
721 | 799 | ||
722 | int hists__browse(struct hists *self, const char *helpline, const char *ev_name) | 800 | int hists__browse(struct hists *self, const char *helpline, const char *ev_name) |
@@ -725,7 +803,6 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name) | |||
725 | struct pstack *fstack; | 803 | struct pstack *fstack; |
726 | const struct thread *thread_filter = NULL; | 804 | const struct thread *thread_filter = NULL; |
727 | const struct dso *dso_filter = NULL; | 805 | const struct dso *dso_filter = NULL; |
728 | struct newtExitStruct es; | ||
729 | char msg[160]; | 806 | char msg[160]; |
730 | int key = -1; | 807 | int key = -1; |
731 | 808 | ||
@@ -738,9 +815,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name) | |||
738 | 815 | ||
739 | ui_helpline__push(helpline); | 816 | ui_helpline__push(helpline); |
740 | 817 | ||
741 | hist_browser__title(msg, sizeof(msg), ev_name, | 818 | hists__browser_title(self, msg, sizeof(msg), ev_name, |
742 | dso_filter, thread_filter); | 819 | dso_filter, thread_filter); |
743 | |||
744 | while (1) { | 820 | while (1) { |
745 | const struct thread *thread; | 821 | const struct thread *thread; |
746 | const struct dso *dso; | 822 | const struct dso *dso; |
@@ -749,70 +825,63 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name) | |||
749 | annotate = -2, zoom_dso = -2, zoom_thread = -2, | 825 | annotate = -2, zoom_dso = -2, zoom_thread = -2, |
750 | browse_map = -2; | 826 | browse_map = -2; |
751 | 827 | ||
752 | if (hist_browser__run(browser, msg, &es)) | 828 | key = hist_browser__run(browser, msg); |
753 | break; | ||
754 | 829 | ||
755 | thread = hist_browser__selected_thread(browser); | 830 | thread = hist_browser__selected_thread(browser); |
756 | dso = browser->selection->map ? browser->selection->map->dso : NULL; | 831 | dso = browser->selection->map ? browser->selection->map->dso : NULL; |
757 | 832 | ||
758 | if (es.reason == NEWT_EXIT_HOTKEY) { | 833 | switch (key) { |
759 | key = es.u.key; | 834 | case NEWT_KEY_TAB: |
760 | 835 | case NEWT_KEY_UNTAB: | |
761 | switch (key) { | 836 | /* |
762 | case NEWT_KEY_F1: | 837 | * Exit the browser, let hists__browser_tree |
763 | goto do_help; | 838 | * go to the next or previous |
764 | case NEWT_KEY_TAB: | 839 | */ |
765 | case NEWT_KEY_UNTAB: | 840 | goto out_free_stack; |
766 | /* | 841 | case 'a': |
767 | * Exit the browser, let hists__browser_tree | 842 | if (browser->selection->map == NULL && |
768 | * go to the next or previous | 843 | browser->selection->map->dso->annotate_warned) |
769 | */ | ||
770 | goto out_free_stack; | ||
771 | default:; | ||
772 | } | ||
773 | |||
774 | switch (key) { | ||
775 | case 'a': | ||
776 | if (browser->selection->map == NULL || | ||
777 | browser->selection->map->dso->annotate_warned) | ||
778 | continue; | ||
779 | goto do_annotate; | ||
780 | case 'd': | ||
781 | goto zoom_dso; | ||
782 | case 't': | ||
783 | goto zoom_thread; | ||
784 | case 'h': | ||
785 | case '?': | ||
786 | do_help: | ||
787 | ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n" | ||
788 | "<- Zoom out\n" | ||
789 | "a Annotate current symbol\n" | ||
790 | "h/?/F1 Show this window\n" | ||
791 | "d Zoom into current DSO\n" | ||
792 | "t Zoom into current Thread\n" | ||
793 | "q/CTRL+C Exit browser"); | ||
794 | continue; | 844 | continue; |
795 | default:; | 845 | goto do_annotate; |
796 | } | 846 | case 'd': |
797 | if (is_exit_key(key)) { | 847 | goto zoom_dso; |
798 | if (key == NEWT_KEY_ESCAPE && | 848 | case 't': |
799 | !ui__dialog_yesno("Do you really want to exit?")) | 849 | goto zoom_thread; |
800 | continue; | 850 | case NEWT_KEY_F1: |
801 | break; | 851 | case 'h': |
802 | } | 852 | case '?': |
803 | 853 | ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n" | |
804 | if (es.u.key == NEWT_KEY_LEFT) { | 854 | "<- Zoom out\n" |
805 | const void *top; | 855 | "a Annotate current symbol\n" |
856 | "h/?/F1 Show this window\n" | ||
857 | "C Collapse all callchains\n" | ||
858 | "E Expand all callchains\n" | ||
859 | "d Zoom into current DSO\n" | ||
860 | "t Zoom into current Thread\n" | ||
861 | "q/CTRL+C Exit browser"); | ||
862 | continue; | ||
863 | case NEWT_KEY_ENTER: | ||
864 | case NEWT_KEY_RIGHT: | ||
865 | /* menu */ | ||
866 | break; | ||
867 | case NEWT_KEY_LEFT: { | ||
868 | const void *top; | ||
806 | 869 | ||
807 | if (pstack__empty(fstack)) | 870 | if (pstack__empty(fstack)) |
808 | continue; | ||
809 | top = pstack__pop(fstack); | ||
810 | if (top == &dso_filter) | ||
811 | goto zoom_out_dso; | ||
812 | if (top == &thread_filter) | ||
813 | goto zoom_out_thread; | ||
814 | continue; | 871 | continue; |
815 | } | 872 | top = pstack__pop(fstack); |
873 | if (top == &dso_filter) | ||
874 | goto zoom_out_dso; | ||
875 | if (top == &thread_filter) | ||
876 | goto zoom_out_thread; | ||
877 | continue; | ||
878 | } | ||
879 | case NEWT_KEY_ESCAPE: | ||
880 | if (!ui__dialog_yesno("Do you really want to exit?")) | ||
881 | continue; | ||
882 | /* Fall thru */ | ||
883 | default: | ||
884 | goto out_free_stack; | ||
816 | } | 885 | } |
817 | 886 | ||
818 | if (browser->selection->sym != NULL && | 887 | if (browser->selection->sym != NULL && |
@@ -885,8 +954,8 @@ zoom_out_dso: | |||
885 | pstack__push(fstack, &dso_filter); | 954 | pstack__push(fstack, &dso_filter); |
886 | } | 955 | } |
887 | hists__filter_by_dso(self, dso_filter); | 956 | hists__filter_by_dso(self, dso_filter); |
888 | hist_browser__title(msg, sizeof(msg), ev_name, | 957 | hists__browser_title(self, msg, sizeof(msg), ev_name, |
889 | dso_filter, thread_filter); | 958 | dso_filter, thread_filter); |
890 | hist_browser__reset(browser); | 959 | hist_browser__reset(browser); |
891 | } else if (choice == zoom_thread) { | 960 | } else if (choice == zoom_thread) { |
892 | zoom_thread: | 961 | zoom_thread: |
@@ -903,8 +972,8 @@ zoom_out_thread: | |||
903 | pstack__push(fstack, &thread_filter); | 972 | pstack__push(fstack, &thread_filter); |
904 | } | 973 | } |
905 | hists__filter_by_thread(self, thread_filter); | 974 | hists__filter_by_thread(self, thread_filter); |
906 | hist_browser__title(msg, sizeof(msg), ev_name, | 975 | hists__browser_title(self, msg, sizeof(msg), ev_name, |
907 | dso_filter, thread_filter); | 976 | dso_filter, thread_filter); |
908 | hist_browser__reset(browser); | 977 | hist_browser__reset(browser); |
909 | } | 978 | } |
910 | } | 979 | } |
@@ -925,10 +994,6 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help) | |||
925 | const char *ev_name = __event_name(hists->type, hists->config); | 994 | const char *ev_name = __event_name(hists->type, hists->config); |
926 | 995 | ||
927 | key = hists__browse(hists, help, ev_name); | 996 | key = hists__browse(hists, help, ev_name); |
928 | |||
929 | if (is_exit_key(key)) | ||
930 | break; | ||
931 | |||
932 | switch (key) { | 997 | switch (key) { |
933 | case NEWT_KEY_TAB: | 998 | case NEWT_KEY_TAB: |
934 | next = rb_next(nd); | 999 | next = rb_next(nd); |
@@ -940,7 +1005,7 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help) | |||
940 | continue; | 1005 | continue; |
941 | nd = rb_prev(nd); | 1006 | nd = rb_prev(nd); |
942 | default: | 1007 | default: |
943 | break; | 1008 | return key; |
944 | } | 1009 | } |
945 | } | 1010 | } |
946 | 1011 | ||
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c index 142b825b42bf..e35437dfa5b4 100644 --- a/tools/perf/util/ui/browsers/map.c +++ b/tools/perf/util/ui/browsers/map.c | |||
@@ -1,6 +1,5 @@ | |||
1 | #include "../libslang.h" | 1 | #include "../libslang.h" |
2 | #include <elf.h> | 2 | #include <elf.h> |
3 | #include <newt.h> | ||
4 | #include <sys/ttydefaults.h> | 3 | #include <sys/ttydefaults.h> |
5 | #include <ctype.h> | 4 | #include <ctype.h> |
6 | #include <string.h> | 5 | #include <string.h> |
@@ -47,7 +46,6 @@ out_free_form: | |||
47 | struct map_browser { | 46 | struct map_browser { |
48 | struct ui_browser b; | 47 | struct ui_browser b; |
49 | struct map *map; | 48 | struct map *map; |
50 | u16 namelen; | ||
51 | u8 addrlen; | 49 | u8 addrlen; |
52 | }; | 50 | }; |
53 | 51 | ||
@@ -56,14 +54,16 @@ static void map_browser__write(struct ui_browser *self, void *nd, int row) | |||
56 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); | 54 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); |
57 | struct map_browser *mb = container_of(self, struct map_browser, b); | 55 | struct map_browser *mb = container_of(self, struct map_browser, b); |
58 | bool current_entry = ui_browser__is_current_entry(self, row); | 56 | bool current_entry = ui_browser__is_current_entry(self, row); |
59 | int color = ui_browser__percent_color(0, current_entry); | 57 | int width; |
60 | 58 | ||
61 | SLsmg_set_color(color); | 59 | ui_browser__set_percent_color(self, 0, current_entry); |
62 | slsmg_printf("%*llx %*llx %c ", | 60 | slsmg_printf("%*llx %*llx %c ", |
63 | mb->addrlen, sym->start, mb->addrlen, sym->end, | 61 | mb->addrlen, sym->start, mb->addrlen, sym->end, |
64 | sym->binding == STB_GLOBAL ? 'g' : | 62 | sym->binding == STB_GLOBAL ? 'g' : |
65 | sym->binding == STB_LOCAL ? 'l' : 'w'); | 63 | sym->binding == STB_LOCAL ? 'l' : 'w'); |
66 | slsmg_write_nstring(sym->name, mb->namelen); | 64 | width = self->width - ((mb->addrlen * 2) + 4); |
65 | if (width > 0) | ||
66 | slsmg_write_nstring(sym->name, width); | ||
67 | } | 67 | } |
68 | 68 | ||
69 | /* FIXME uber-kludgy, see comment on cmd_report... */ | 69 | /* FIXME uber-kludgy, see comment on cmd_report... */ |
@@ -98,31 +98,29 @@ static int map_browser__search(struct map_browser *self) | |||
98 | return 0; | 98 | return 0; |
99 | } | 99 | } |
100 | 100 | ||
101 | static int map_browser__run(struct map_browser *self, struct newtExitStruct *es) | 101 | static int map_browser__run(struct map_browser *self) |
102 | { | 102 | { |
103 | int key; | ||
104 | |||
103 | if (ui_browser__show(&self->b, self->map->dso->long_name, | 105 | if (ui_browser__show(&self->b, self->map->dso->long_name, |
104 | "Press <- or ESC to exit, %s / to search", | 106 | "Press <- or ESC to exit, %s / to search", |
105 | verbose ? "" : "restart with -v to use") < 0) | 107 | verbose ? "" : "restart with -v to use") < 0) |
106 | return -1; | 108 | return -1; |
107 | 109 | ||
108 | newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); | ||
109 | newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER); | ||
110 | if (verbose) | 110 | if (verbose) |
111 | newtFormAddHotKey(self->b.form, '/'); | 111 | ui_browser__add_exit_key(&self->b, '/'); |
112 | 112 | ||
113 | while (1) { | 113 | while (1) { |
114 | ui_browser__run(&self->b, es); | 114 | key = ui_browser__run(&self->b); |
115 | 115 | ||
116 | if (es->reason != NEWT_EXIT_HOTKEY) | 116 | if (verbose && key == '/') |
117 | break; | ||
118 | if (verbose && es->u.key == '/') | ||
119 | map_browser__search(self); | 117 | map_browser__search(self); |
120 | else | 118 | else |
121 | break; | 119 | break; |
122 | } | 120 | } |
123 | 121 | ||
124 | ui_browser__hide(&self->b); | 122 | ui_browser__hide(&self->b); |
125 | return 0; | 123 | return key; |
126 | } | 124 | } |
127 | 125 | ||
128 | int map__browse(struct map *self) | 126 | int map__browse(struct map *self) |
@@ -136,7 +134,6 @@ int map__browse(struct map *self) | |||
136 | }, | 134 | }, |
137 | .map = self, | 135 | .map = self, |
138 | }; | 136 | }; |
139 | struct newtExitStruct es; | ||
140 | struct rb_node *nd; | 137 | struct rb_node *nd; |
141 | char tmp[BITS_PER_LONG / 4]; | 138 | char tmp[BITS_PER_LONG / 4]; |
142 | u64 maxaddr = 0; | 139 | u64 maxaddr = 0; |
@@ -144,8 +141,6 @@ int map__browse(struct map *self) | |||
144 | for (nd = rb_first(mb.b.entries); nd; nd = rb_next(nd)) { | 141 | for (nd = rb_first(mb.b.entries); nd; nd = rb_next(nd)) { |
145 | struct symbol *pos = rb_entry(nd, struct symbol, rb_node); | 142 | struct symbol *pos = rb_entry(nd, struct symbol, rb_node); |
146 | 143 | ||
147 | if (mb.namelen < pos->namelen) | ||
148 | mb.namelen = pos->namelen; | ||
149 | if (maxaddr < pos->end) | 144 | if (maxaddr < pos->end) |
150 | maxaddr = pos->end; | 145 | maxaddr = pos->end; |
151 | if (verbose) { | 146 | if (verbose) { |
@@ -156,6 +151,5 @@ int map__browse(struct map *self) | |||
156 | } | 151 | } |
157 | 152 | ||
158 | mb.addrlen = snprintf(tmp, sizeof(tmp), "%llx", maxaddr); | 153 | mb.addrlen = snprintf(tmp, sizeof(tmp), "%llx", maxaddr); |
159 | mb.b.width += mb.addrlen * 2 + 4 + mb.namelen; | 154 | return map_browser__run(&mb); |
160 | return map_browser__run(&mb, &es); | ||
161 | } | 155 | } |
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c index 04600e26ceea..9706d9d40279 100644 --- a/tools/perf/util/ui/util.c +++ b/tools/perf/util/ui/util.c | |||
@@ -11,8 +11,6 @@ | |||
11 | #include "helpline.h" | 11 | #include "helpline.h" |
12 | #include "util.h" | 12 | #include "util.h" |
13 | 13 | ||
14 | newtComponent newt_form__new(void); | ||
15 | |||
16 | static void newt_form__set_exit_keys(newtComponent self) | 14 | static void newt_form__set_exit_keys(newtComponent self) |
17 | { | 15 | { |
18 | newtFormAddHotKey(self, NEWT_KEY_LEFT); | 16 | newtFormAddHotKey(self, NEWT_KEY_LEFT); |
@@ -22,7 +20,7 @@ static void newt_form__set_exit_keys(newtComponent self) | |||
22 | newtFormAddHotKey(self, CTRL('c')); | 20 | newtFormAddHotKey(self, CTRL('c')); |
23 | } | 21 | } |
24 | 22 | ||
25 | newtComponent newt_form__new(void) | 23 | static newtComponent newt_form__new(void) |
26 | { | 24 | { |
27 | newtComponent self = newtForm(NULL, NULL, 0); | 25 | newtComponent self = newtForm(NULL, NULL, 0); |
28 | if (self) | 26 | if (self) |
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index f380fed74359..7562707ddd1c 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h | |||
@@ -266,19 +266,6 @@ bool strglobmatch(const char *str, const char *pat); | |||
266 | bool strlazymatch(const char *str, const char *pat); | 266 | bool strlazymatch(const char *str, const char *pat); |
267 | unsigned long convert_unit(unsigned long value, char *unit); | 267 | unsigned long convert_unit(unsigned long value, char *unit); |
268 | 268 | ||
269 | #ifndef ESC | ||
270 | #define ESC 27 | ||
271 | #endif | ||
272 | |||
273 | static inline bool is_exit_key(int key) | ||
274 | { | ||
275 | char up; | ||
276 | if (key == CTRL('c') || key == ESC) | ||
277 | return true; | ||
278 | up = toupper(key); | ||
279 | return up == 'Q'; | ||
280 | } | ||
281 | |||
282 | #define _STR(x) #x | 269 | #define _STR(x) #x |
283 | #define STR(x) _STR(x) | 270 | #define STR(x) _STR(x) |
284 | 271 | ||