aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/acpi/acpixf.h2
-rw-r--r--include/asm-generic/atomic.h5
-rw-r--r--include/asm-generic/cmpxchg-local.h1
-rw-r--r--include/asm-generic/hardirq.h2
-rw-r--r--include/asm-generic/irqflags.h52
-rw-r--r--include/asm-generic/pgtable.h4
-rw-r--r--include/asm-generic/vmlinux.lds.h14
-rw-r--r--include/drm/drmP.h29
-rw-r--r--include/drm/drm_pciids.h2
-rw-r--r--include/drm/ttm/ttm_bo_api.h4
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/acpi_pmtmr.h2
-rw-r--r--include/linux/amba/bus.h15
-rw-r--r--include/linux/amba/mmci.h2
-rw-r--r--include/linux/amba/pl022.h13
-rw-r--r--include/linux/amba/serial.h11
-rw-r--r--include/linux/ata.h46
-rw-r--r--include/linux/ceph/auth.h92
-rw-r--r--include/linux/ceph/buffer.h39
-rw-r--r--include/linux/ceph/ceph_debug.h38
-rw-r--r--include/linux/ceph/ceph_frag.h109
-rw-r--r--include/linux/ceph/ceph_fs.h729
-rw-r--r--include/linux/ceph/ceph_hash.h13
-rw-r--r--include/linux/ceph/debugfs.h33
-rw-r--r--include/linux/ceph/decode.h201
-rw-r--r--include/linux/ceph/libceph.h249
-rw-r--r--include/linux/ceph/mdsmap.h62
-rw-r--r--include/linux/ceph/messenger.h261
-rw-r--r--include/linux/ceph/mon_client.h122
-rw-r--r--include/linux/ceph/msgpool.h25
-rw-r--r--include/linux/ceph/msgr.h175
-rw-r--r--include/linux/ceph/osd_client.h234
-rw-r--r--include/linux/ceph/osdmap.h130
-rw-r--r--include/linux/ceph/pagelist.h75
-rw-r--r--include/linux/ceph/rados.h405
-rw-r--r--include/linux/ceph/types.h29
-rw-r--r--include/linux/cgroup.h4
-rw-r--r--include/linux/compiler.h4
-rw-r--r--include/linux/coredump.h34
-rw-r--r--include/linux/cpuidle.h1
-rw-r--r--include/linux/cred.h2
-rw-r--r--include/linux/crush/crush.h180
-rw-r--r--include/linux/crush/hash.h17
-rw-r--r--include/linux/crush/mapper.h20
-rw-r--r--include/linux/debug_locks.h5
-rw-r--r--include/linux/dmaengine.h2
-rw-r--r--include/linux/dmar.h16
-rw-r--r--include/linux/dynamic_debug.h39
-rw-r--r--include/linux/early_res.h23
-rw-r--r--include/linux/edac.h4
-rw-r--r--include/linux/elevator.h1
-rw-r--r--include/linux/fdtable.h6
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/ftrace_event.h8
-rw-r--r--include/linux/genhd.h6
-rw-r--r--include/linux/hardirq.h12
-rw-r--r--include/linux/htirq.h5
-rw-r--r--include/linux/idr.h4
-rw-r--r--include/linux/init_task.h14
-rw-r--r--include/linux/input.h2
-rw-r--r--include/linux/interrupt.h11
-rw-r--r--include/linux/iocontext.h2
-rw-r--r--include/linux/irq.h447
-rw-r--r--include/linux/irq_work.h20
-rw-r--r--include/linux/irqdesc.h159
-rw-r--r--include/linux/irqflags.h107
-rw-r--r--include/linux/irqnr.h5
-rw-r--r--include/linux/jump_label.h74
-rw-r--r--include/linux/jump_label_ref.h44
-rw-r--r--include/linux/kernel.h13
-rw-r--r--include/linux/key.h3
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--include/linux/libata.h65
-rw-r--r--include/linux/list.h1
-rw-r--r--include/linux/lockdep.h21
-rw-r--r--include/linux/memblock.h168
-rw-r--r--include/linux/mfd/tc35892.h4
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/module.h10
-rw-r--r--include/linux/msi.h13
-rw-r--r--include/linux/netfilter/nfnetlink_conntrack.h10
-rw-r--r--include/linux/netfilter/xt_SECMARK.h12
-rw-r--r--include/linux/nfs_fs.h2
-rw-r--r--include/linux/notifier.h10
-rw-r--r--include/linux/opp.h105
-rw-r--r--include/linux/oprofile.h7
-rw-r--r--include/linux/pci_ids.h9
-rw-r--r--include/linux/percpu-defs.h21
-rw-r--r--include/linux/percpu.h9
-rw-r--r--include/linux/perf_event.h212
-rw-r--r--include/linux/pm.h38
-rw-r--r--include/linux/pm_runtime.h121
-rw-r--r--include/linux/pm_wakeup.h127
-rw-r--r--include/linux/radix-tree.h4
-rw-r--r--include/linux/rculist.h62
-rw-r--r--include/linux/rculist_nulls.h16
-rw-r--r--include/linux/rcupdate.h490
-rw-r--r--include/linux/rcutiny.h104
-rw-r--r--include/linux/rcutree.h57
-rw-r--r--include/linux/resume-trace.h2
-rw-r--r--include/linux/sched.h48
-rw-r--r--include/linux/security.h45
-rw-r--r--include/linux/selinux.h63
-rw-r--r--include/linux/smp_lock.h7
-rw-r--r--include/linux/spinlock.h1
-rw-r--r--include/linux/srcu.h34
-rw-r--r--include/linux/stop_machine.h10
-rw-r--r--include/linux/sunrpc/auth_gss.h4
-rw-r--r--include/linux/suspend.h6
-rw-r--r--include/linux/sysfs.h15
-rw-r--r--include/linux/thread_info.h4
-rw-r--r--include/linux/topology.h6
-rw-r--r--include/linux/tracepoint.h5
-rw-r--r--include/linux/types.h15
-rw-r--r--include/linux/wait.h1
-rw-r--r--include/media/videobuf-dma-sg.h1
-rw-r--r--include/net/bluetooth/bluetooth.h18
-rw-r--r--include/net/cls_cgroup.h3
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--include/pcmcia/cs.h95
-rw-r--r--include/pcmcia/ds.h85
-rw-r--r--include/pcmcia/ss.h1
-rw-r--r--include/trace/events/irq.h26
-rw-r--r--include/trace/events/napi.h25
-rw-r--r--include/trace/events/net.h82
-rw-r--r--include/trace/events/power.h90
-rw-r--r--include/trace/events/sched.h29
-rw-r--r--include/trace/events/skb.h17
129 files changed, 5706 insertions, 1262 deletions
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index c0786d446a00..984cdc62e30b 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -55,7 +55,7 @@
55extern u8 acpi_gbl_permanent_mmap; 55extern u8 acpi_gbl_permanent_mmap;
56 56
57/* 57/*
58 * Globals that are publically available, allowing for 58 * Globals that are publicly available, allowing for
59 * run time configuration 59 * run time configuration
60 */ 60 */
61extern u32 acpi_dbg_level; 61extern u32 acpi_dbg_level;
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index e53347fbf1da..fd57b8477fab 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -43,6 +43,7 @@
43 */ 43 */
44#define atomic_set(v, i) (((v)->counter) = (i)) 44#define atomic_set(v, i) (((v)->counter) = (i))
45 45
46#include <linux/irqflags.h>
46#include <asm/system.h> 47#include <asm/system.h>
47 48
48/** 49/**
@@ -57,7 +58,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
57 unsigned long flags; 58 unsigned long flags;
58 int temp; 59 int temp;
59 60
60 raw_local_irq_save(flags); /* Don't trace it in a irqsoff handler */ 61 raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */
61 temp = v->counter; 62 temp = v->counter;
62 temp += i; 63 temp += i;
63 v->counter = temp; 64 v->counter = temp;
@@ -78,7 +79,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
78 unsigned long flags; 79 unsigned long flags;
79 int temp; 80 int temp;
80 81
81 raw_local_irq_save(flags); /* Don't trace it in a irqsoff handler */ 82 raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */
82 temp = v->counter; 83 temp = v->counter;
83 temp -= i; 84 temp -= i;
84 v->counter = temp; 85 v->counter = temp;
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
index b2ba2fc8829a..2533fddd34a6 100644
--- a/include/asm-generic/cmpxchg-local.h
+++ b/include/asm-generic/cmpxchg-local.h
@@ -2,6 +2,7 @@
2#define __ASM_GENERIC_CMPXCHG_LOCAL_H 2#define __ASM_GENERIC_CMPXCHG_LOCAL_H
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/irqflags.h>
5 6
6extern unsigned long wrong_size_cmpxchg(volatile void *ptr); 7extern unsigned long wrong_size_cmpxchg(volatile void *ptr);
7 8
diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h
index 62f59080e5cc..04d0a977cd43 100644
--- a/include/asm-generic/hardirq.h
+++ b/include/asm-generic/hardirq.h
@@ -3,13 +3,13 @@
3 3
4#include <linux/cache.h> 4#include <linux/cache.h>
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/irq.h>
7 6
8typedef struct { 7typedef struct {
9 unsigned int __softirq_pending; 8 unsigned int __softirq_pending;
10} ____cacheline_aligned irq_cpustat_t; 9} ____cacheline_aligned irq_cpustat_t;
11 10
12#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ 11#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
12#include <linux/irq.h>
13 13
14#ifndef ack_bad_irq 14#ifndef ack_bad_irq
15static inline void ack_bad_irq(unsigned int irq) 15static inline void ack_bad_irq(unsigned int irq)
diff --git a/include/asm-generic/irqflags.h b/include/asm-generic/irqflags.h
index 9aebf618275a..1f40d0024cf3 100644
--- a/include/asm-generic/irqflags.h
+++ b/include/asm-generic/irqflags.h
@@ -5,68 +5,62 @@
5 * All architectures should implement at least the first two functions, 5 * All architectures should implement at least the first two functions,
6 * usually inline assembly will be the best way. 6 * usually inline assembly will be the best way.
7 */ 7 */
8#ifndef RAW_IRQ_DISABLED 8#ifndef ARCH_IRQ_DISABLED
9#define RAW_IRQ_DISABLED 0 9#define ARCH_IRQ_DISABLED 0
10#define RAW_IRQ_ENABLED 1 10#define ARCH_IRQ_ENABLED 1
11#endif 11#endif
12 12
13/* read interrupt enabled status */ 13/* read interrupt enabled status */
14#ifndef __raw_local_save_flags 14#ifndef arch_local_save_flags
15unsigned long __raw_local_save_flags(void); 15unsigned long arch_local_save_flags(void);
16#endif 16#endif
17 17
18/* set interrupt enabled status */ 18/* set interrupt enabled status */
19#ifndef raw_local_irq_restore 19#ifndef arch_local_irq_restore
20void raw_local_irq_restore(unsigned long flags); 20void arch_local_irq_restore(unsigned long flags);
21#endif 21#endif
22 22
23/* get status and disable interrupts */ 23/* get status and disable interrupts */
24#ifndef __raw_local_irq_save 24#ifndef arch_local_irq_save
25static inline unsigned long __raw_local_irq_save(void) 25static inline unsigned long arch_local_irq_save(void)
26{ 26{
27 unsigned long flags; 27 unsigned long flags;
28 flags = __raw_local_save_flags(); 28 flags = arch_local_save_flags();
29 raw_local_irq_restore(RAW_IRQ_DISABLED); 29 arch_local_irq_restore(ARCH_IRQ_DISABLED);
30 return flags; 30 return flags;
31} 31}
32#endif 32#endif
33 33
34/* test flags */ 34/* test flags */
35#ifndef raw_irqs_disabled_flags 35#ifndef arch_irqs_disabled_flags
36static inline int raw_irqs_disabled_flags(unsigned long flags) 36static inline int arch_irqs_disabled_flags(unsigned long flags)
37{ 37{
38 return flags == RAW_IRQ_DISABLED; 38 return flags == ARCH_IRQ_DISABLED;
39} 39}
40#endif 40#endif
41 41
42/* unconditionally enable interrupts */ 42/* unconditionally enable interrupts */
43#ifndef raw_local_irq_enable 43#ifndef arch_local_irq_enable
44static inline void raw_local_irq_enable(void) 44static inline void arch_local_irq_enable(void)
45{ 45{
46 raw_local_irq_restore(RAW_IRQ_ENABLED); 46 arch_local_irq_restore(ARCH_IRQ_ENABLED);
47} 47}
48#endif 48#endif
49 49
50/* unconditionally disable interrupts */ 50/* unconditionally disable interrupts */
51#ifndef raw_local_irq_disable 51#ifndef arch_local_irq_disable
52static inline void raw_local_irq_disable(void) 52static inline void arch_local_irq_disable(void)
53{ 53{
54 raw_local_irq_restore(RAW_IRQ_DISABLED); 54 arch_local_irq_restore(ARCH_IRQ_DISABLED);
55} 55}
56#endif 56#endif
57 57
58/* test hardware interrupt enable bit */ 58/* test hardware interrupt enable bit */
59#ifndef raw_irqs_disabled 59#ifndef arch_irqs_disabled
60static inline int raw_irqs_disabled(void) 60static inline int arch_irqs_disabled(void)
61{ 61{
62 return raw_irqs_disabled_flags(__raw_local_save_flags()); 62 return arch_irqs_disabled_flags(arch_local_save_flags());
63} 63}
64#endif 64#endif
65 65
66#define raw_local_save_flags(flags) \
67 do { (flags) = __raw_local_save_flags(); } while (0)
68
69#define raw_local_irq_save(flags) \
70 do { (flags) = __raw_local_irq_save(); } while (0)
71
72#endif /* __ASM_GENERIC_IRQFLAGS_H */ 66#endif /* __ASM_GENERIC_IRQFLAGS_H */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index e2bd73e8f9c0..f4d4120e5128 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
129#define move_pte(pte, prot, old_addr, new_addr) (pte) 129#define move_pte(pte, prot, old_addr, new_addr) (pte)
130#endif 130#endif
131 131
132#ifndef flush_tlb_fix_spurious_fault
133#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
134#endif
135
132#ifndef pgprot_noncached 136#ifndef pgprot_noncached
133#define pgprot_noncached(prot) (prot) 137#define pgprot_noncached(prot) (prot)
134#endif 138#endif
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8a92a170fb7d..f4229fb315e1 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -220,6 +220,8 @@
220 \ 220 \
221 BUG_TABLE \ 221 BUG_TABLE \
222 \ 222 \
223 JUMP_TABLE \
224 \
223 /* PCI quirks */ \ 225 /* PCI quirks */ \
224 .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ 226 .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \
225 VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ 227 VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \
@@ -563,6 +565,14 @@
563#define BUG_TABLE 565#define BUG_TABLE
564#endif 566#endif
565 567
568#define JUMP_TABLE \
569 . = ALIGN(8); \
570 __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \
571 VMLINUX_SYMBOL(__start___jump_table) = .; \
572 *(__jump_table) \
573 VMLINUX_SYMBOL(__stop___jump_table) = .; \
574 }
575
566#ifdef CONFIG_PM_TRACE 576#ifdef CONFIG_PM_TRACE
567#define TRACEDATA \ 577#define TRACEDATA \
568 . = ALIGN(4); \ 578 . = ALIGN(4); \
@@ -677,7 +687,9 @@
677 - LOAD_OFFSET) { \ 687 - LOAD_OFFSET) { \
678 VMLINUX_SYMBOL(__per_cpu_start) = .; \ 688 VMLINUX_SYMBOL(__per_cpu_start) = .; \
679 *(.data..percpu..first) \ 689 *(.data..percpu..first) \
690 . = ALIGN(PAGE_SIZE); \
680 *(.data..percpu..page_aligned) \ 691 *(.data..percpu..page_aligned) \
692 *(.data..percpu..readmostly) \
681 *(.data..percpu) \ 693 *(.data..percpu) \
682 *(.data..percpu..shared_aligned) \ 694 *(.data..percpu..shared_aligned) \
683 VMLINUX_SYMBOL(__per_cpu_end) = .; \ 695 VMLINUX_SYMBOL(__per_cpu_end) = .; \
@@ -703,7 +715,9 @@
703 VMLINUX_SYMBOL(__per_cpu_load) = .; \ 715 VMLINUX_SYMBOL(__per_cpu_load) = .; \
704 VMLINUX_SYMBOL(__per_cpu_start) = .; \ 716 VMLINUX_SYMBOL(__per_cpu_start) = .; \
705 *(.data..percpu..first) \ 717 *(.data..percpu..first) \
718 . = ALIGN(PAGE_SIZE); \
706 *(.data..percpu..page_aligned) \ 719 *(.data..percpu..page_aligned) \
720 *(.data..percpu..readmostly) \
707 *(.data..percpu) \ 721 *(.data..percpu) \
708 *(.data..percpu..shared_aligned) \ 722 *(.data..percpu..shared_aligned) \
709 VMLINUX_SYMBOL(__per_cpu_end) = .; \ 723 VMLINUX_SYMBOL(__per_cpu_end) = .; \
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 7809d230adee..4c9461a4f9e6 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -612,7 +612,7 @@ struct drm_gem_object {
612 struct kref refcount; 612 struct kref refcount;
613 613
614 /** Handle count of this object. Each handle also holds a reference */ 614 /** Handle count of this object. Each handle also holds a reference */
615 struct kref handlecount; 615 atomic_t handle_count; /* number of handles on this object */
616 616
617 /** Related drm device */ 617 /** Related drm device */
618 struct drm_device *dev; 618 struct drm_device *dev;
@@ -808,7 +808,6 @@ struct drm_driver {
808 */ 808 */
809 int (*gem_init_object) (struct drm_gem_object *obj); 809 int (*gem_init_object) (struct drm_gem_object *obj);
810 void (*gem_free_object) (struct drm_gem_object *obj); 810 void (*gem_free_object) (struct drm_gem_object *obj);
811 void (*gem_free_object_unlocked) (struct drm_gem_object *obj);
812 811
813 /* vga arb irq handler */ 812 /* vga arb irq handler */
814 void (*vgaarb_irq)(struct drm_device *dev, bool state); 813 void (*vgaarb_irq)(struct drm_device *dev, bool state);
@@ -1175,6 +1174,7 @@ extern int drm_release(struct inode *inode, struct file *filp);
1175extern int drm_mmap(struct file *filp, struct vm_area_struct *vma); 1174extern int drm_mmap(struct file *filp, struct vm_area_struct *vma);
1176extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma); 1175extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma);
1177extern void drm_vm_open_locked(struct vm_area_struct *vma); 1176extern void drm_vm_open_locked(struct vm_area_struct *vma);
1177extern void drm_vm_close_locked(struct vm_area_struct *vma);
1178extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map); 1178extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map);
1179extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev); 1179extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev);
1180extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait); 1180extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
@@ -1455,12 +1455,11 @@ int drm_gem_init(struct drm_device *dev);
1455void drm_gem_destroy(struct drm_device *dev); 1455void drm_gem_destroy(struct drm_device *dev);
1456void drm_gem_object_release(struct drm_gem_object *obj); 1456void drm_gem_object_release(struct drm_gem_object *obj);
1457void drm_gem_object_free(struct kref *kref); 1457void drm_gem_object_free(struct kref *kref);
1458void drm_gem_object_free_unlocked(struct kref *kref);
1459struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, 1458struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev,
1460 size_t size); 1459 size_t size);
1461int drm_gem_object_init(struct drm_device *dev, 1460int drm_gem_object_init(struct drm_device *dev,
1462 struct drm_gem_object *obj, size_t size); 1461 struct drm_gem_object *obj, size_t size);
1463void drm_gem_object_handle_free(struct kref *kref); 1462void drm_gem_object_handle_free(struct drm_gem_object *obj);
1464void drm_gem_vm_open(struct vm_area_struct *vma); 1463void drm_gem_vm_open(struct vm_area_struct *vma);
1465void drm_gem_vm_close(struct vm_area_struct *vma); 1464void drm_gem_vm_close(struct vm_area_struct *vma);
1466int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); 1465int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
@@ -1483,8 +1482,12 @@ drm_gem_object_unreference(struct drm_gem_object *obj)
1483static inline void 1482static inline void
1484drm_gem_object_unreference_unlocked(struct drm_gem_object *obj) 1483drm_gem_object_unreference_unlocked(struct drm_gem_object *obj)
1485{ 1484{
1486 if (obj != NULL) 1485 if (obj != NULL) {
1487 kref_put(&obj->refcount, drm_gem_object_free_unlocked); 1486 struct drm_device *dev = obj->dev;
1487 mutex_lock(&dev->struct_mutex);
1488 kref_put(&obj->refcount, drm_gem_object_free);
1489 mutex_unlock(&dev->struct_mutex);
1490 }
1488} 1491}
1489 1492
1490int drm_gem_handle_create(struct drm_file *file_priv, 1493int drm_gem_handle_create(struct drm_file *file_priv,
@@ -1495,7 +1498,7 @@ static inline void
1495drm_gem_object_handle_reference(struct drm_gem_object *obj) 1498drm_gem_object_handle_reference(struct drm_gem_object *obj)
1496{ 1499{
1497 drm_gem_object_reference(obj); 1500 drm_gem_object_reference(obj);
1498 kref_get(&obj->handlecount); 1501 atomic_inc(&obj->handle_count);
1499} 1502}
1500 1503
1501static inline void 1504static inline void
@@ -1504,12 +1507,15 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj)
1504 if (obj == NULL) 1507 if (obj == NULL)
1505 return; 1508 return;
1506 1509
1510 if (atomic_read(&obj->handle_count) == 0)
1511 return;
1507 /* 1512 /*
1508 * Must bump handle count first as this may be the last 1513 * Must bump handle count first as this may be the last
1509 * ref, in which case the object would disappear before we 1514 * ref, in which case the object would disappear before we
1510 * checked for a name 1515 * checked for a name
1511 */ 1516 */
1512 kref_put(&obj->handlecount, drm_gem_object_handle_free); 1517 if (atomic_dec_and_test(&obj->handle_count))
1518 drm_gem_object_handle_free(obj);
1513 drm_gem_object_unreference(obj); 1519 drm_gem_object_unreference(obj);
1514} 1520}
1515 1521
@@ -1519,12 +1525,17 @@ drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj)
1519 if (obj == NULL) 1525 if (obj == NULL)
1520 return; 1526 return;
1521 1527
1528 if (atomic_read(&obj->handle_count) == 0)
1529 return;
1530
1522 /* 1531 /*
1523 * Must bump handle count first as this may be the last 1532 * Must bump handle count first as this may be the last
1524 * ref, in which case the object would disappear before we 1533 * ref, in which case the object would disappear before we
1525 * checked for a name 1534 * checked for a name
1526 */ 1535 */
1527 kref_put(&obj->handlecount, drm_gem_object_handle_free); 1536
1537 if (atomic_dec_and_test(&obj->handle_count))
1538 drm_gem_object_handle_free(obj);
1528 drm_gem_object_unreference_unlocked(obj); 1539 drm_gem_object_unreference_unlocked(obj);
1529} 1540}
1530 1541
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 3a9940ef728b..883c1d439899 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -85,7 +85,6 @@
85 {0x1002, 0x5460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ 85 {0x1002, 0x5460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
86 {0x1002, 0x5462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ 86 {0x1002, 0x5462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
87 {0x1002, 0x5464, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ 87 {0x1002, 0x5464, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
88 {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \
89 {0x1002, 0x5548, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ 88 {0x1002, 0x5548, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \
90 {0x1002, 0x5549, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ 89 {0x1002, 0x5549, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \
91 {0x1002, 0x554A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ 90 {0x1002, 0x554A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \
@@ -103,6 +102,7 @@
103 {0x1002, 0x564F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ 102 {0x1002, 0x564F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
104 {0x1002, 0x5652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ 103 {0x1002, 0x5652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
105 {0x1002, 0x5653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ 104 {0x1002, 0x5653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
105 {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
106 {0x1002, 0x5834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP}, \ 106 {0x1002, 0x5834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP}, \
107 {0x1002, 0x5835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \ 107 {0x1002, 0x5835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \
108 {0x1002, 0x5954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS480|RADEON_IS_IGP|RADEON_IS_MOBILITY|RADEON_IS_IGPGART}, \ 108 {0x1002, 0x5954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS480|RADEON_IS_IGP|RADEON_IS_MOBILITY|RADEON_IS_IGPGART}, \
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 267a86c74e2e..2040e6c4f172 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -246,9 +246,11 @@ struct ttm_buffer_object {
246 246
247 atomic_t reserved; 247 atomic_t reserved;
248 248
249
250 /** 249 /**
251 * Members protected by the bo::lock 250 * Members protected by the bo::lock
251 * In addition, setting sync_obj to anything else
252 * than NULL requires bo::reserved to be held. This allows for
253 * checking NULL while reserved but not holding bo::lock.
252 */ 254 */
253 255
254 void *sync_obj_arg; 256 void *sync_obj_arg;
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 626b629429ff..4e8ea8c8ec1e 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -118,7 +118,6 @@ header-y += eventpoll.h
118header-y += ext2_fs.h 118header-y += ext2_fs.h
119header-y += fadvise.h 119header-y += fadvise.h
120header-y += falloc.h 120header-y += falloc.h
121header-y += fanotify.h
122header-y += fb.h 121header-y += fb.h
123header-y += fcntl.h 122header-y += fcntl.h
124header-y += fd.h 123header-y += fd.h
diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h
index 7e3d2859be50..1d0ef1ae8036 100644
--- a/include/linux/acpi_pmtmr.h
+++ b/include/linux/acpi_pmtmr.h
@@ -25,8 +25,6 @@ static inline u32 acpi_pm_read_early(void)
25 return acpi_pm_read_verified() & ACPI_PM_MASK; 25 return acpi_pm_read_verified() & ACPI_PM_MASK;
26} 26}
27 27
28extern void pmtimer_wait(unsigned);
29
30#else 28#else
31 29
32static inline u32 acpi_pm_read_early(void) 30static inline u32 acpi_pm_read_early(void)
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index b0c174012436..c6454cca0447 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -20,6 +20,7 @@
20#include <linux/resource.h> 20#include <linux/resource.h>
21 21
22#define AMBA_NR_IRQS 2 22#define AMBA_NR_IRQS 2
23#define AMBA_CID 0xb105f00d
23 24
24struct clk; 25struct clk;
25 26
@@ -70,9 +71,15 @@ void amba_release_regions(struct amba_device *);
70#define amba_pclk_disable(d) \ 71#define amba_pclk_disable(d) \
71 do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0) 72 do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0)
72 73
73#define amba_config(d) (((d)->periphid >> 24) & 0xff) 74/* Some drivers don't use the struct amba_device */
74#define amba_rev(d) (((d)->periphid >> 20) & 0x0f) 75#define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff)
75#define amba_manf(d) (((d)->periphid >> 12) & 0xff) 76#define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f)
76#define amba_part(d) ((d)->periphid & 0xfff) 77#define AMBA_MANF_BITS(a) (((a) >> 12) & 0xff)
78#define AMBA_PART_BITS(a) ((a) & 0xfff)
79
80#define amba_config(d) AMBA_CONFIG_BITS((d)->periphid)
81#define amba_rev(d) AMBA_REV_BITS((d)->periphid)
82#define amba_manf(d) AMBA_MANF_BITS((d)->periphid)
83#define amba_part(d) AMBA_PART_BITS((d)->periphid)
77 84
78#endif 85#endif
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index ca84ce70d5d5..f4ee9acc9721 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -24,6 +24,7 @@
24 * whether a card is present in the MMC slot or not 24 * whether a card is present in the MMC slot or not
25 * @gpio_wp: read this GPIO pin to see if the card is write protected 25 * @gpio_wp: read this GPIO pin to see if the card is write protected
26 * @gpio_cd: read this GPIO pin to detect card insertion 26 * @gpio_cd: read this GPIO pin to detect card insertion
27 * @cd_invert: true if the gpio_cd pin value is active low
27 * @capabilities: the capabilities of the block as implemented in 28 * @capabilities: the capabilities of the block as implemented in
28 * this platform, signify anything MMC_CAP_* from mmc/host.h 29 * this platform, signify anything MMC_CAP_* from mmc/host.h
29 */ 30 */
@@ -35,6 +36,7 @@ struct mmci_platform_data {
35 unsigned int (*status)(struct device *); 36 unsigned int (*status)(struct device *);
36 int gpio_wp; 37 int gpio_wp;
37 int gpio_cd; 38 int gpio_cd;
39 bool cd_invert;
38 unsigned long capabilities; 40 unsigned long capabilities;
39}; 41};
40 42
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
index abf26cc47a2b..4ce98f54186b 100644
--- a/include/linux/amba/pl022.h
+++ b/include/linux/amba/pl022.h
@@ -228,6 +228,7 @@ enum ssp_chip_select {
228}; 228};
229 229
230 230
231struct dma_chan;
231/** 232/**
232 * struct pl022_ssp_master - device.platform_data for SPI controller devices. 233 * struct pl022_ssp_master - device.platform_data for SPI controller devices.
233 * @num_chipselect: chipselects are used to distinguish individual 234 * @num_chipselect: chipselects are used to distinguish individual
@@ -235,11 +236,16 @@ enum ssp_chip_select {
235 * each slave has a chipselect signal, but it's common that not 236 * each slave has a chipselect signal, but it's common that not
236 * every chipselect is connected to a slave. 237 * every chipselect is connected to a slave.
237 * @enable_dma: if true enables DMA driven transfers. 238 * @enable_dma: if true enables DMA driven transfers.
239 * @dma_rx_param: parameter to locate an RX DMA channel.
240 * @dma_tx_param: parameter to locate a TX DMA channel.
238 */ 241 */
239struct pl022_ssp_controller { 242struct pl022_ssp_controller {
240 u16 bus_id; 243 u16 bus_id;
241 u8 num_chipselect; 244 u8 num_chipselect;
242 u8 enable_dma:1; 245 u8 enable_dma:1;
246 bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
247 void *dma_rx_param;
248 void *dma_tx_param;
243}; 249};
244 250
245/** 251/**
@@ -270,20 +276,13 @@ struct pl022_ssp_controller {
270 * @dma_config: DMA configuration for SSP controller and peripheral 276 * @dma_config: DMA configuration for SSP controller and peripheral
271 */ 277 */
272struct pl022_config_chip { 278struct pl022_config_chip {
273 struct device *dev;
274 enum ssp_loopback lbm;
275 enum ssp_interface iface; 279 enum ssp_interface iface;
276 enum ssp_hierarchy hierarchy; 280 enum ssp_hierarchy hierarchy;
277 bool slave_tx_disable; 281 bool slave_tx_disable;
278 struct ssp_clock_params clk_freq; 282 struct ssp_clock_params clk_freq;
279 enum ssp_rx_endian endian_rx;
280 enum ssp_tx_endian endian_tx;
281 enum ssp_data_size data_size;
282 enum ssp_mode com_mode; 283 enum ssp_mode com_mode;
283 enum ssp_rx_level_trig rx_lev_trig; 284 enum ssp_rx_level_trig rx_lev_trig;
284 enum ssp_tx_level_trig tx_lev_trig; 285 enum ssp_tx_level_trig tx_lev_trig;
285 enum ssp_spi_clk_phase clk_phase;
286 enum ssp_spi_clk_pol clk_pol;
287 enum ssp_microwire_ctrl_len ctrl_len; 286 enum ssp_microwire_ctrl_len ctrl_len;
288 enum ssp_microwire_wait_state wait_state; 287 enum ssp_microwire_wait_state wait_state;
289 enum ssp_duplex duplex; 288 enum ssp_duplex duplex;
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index e1b634b635f2..6021588ba0a8 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -32,7 +32,9 @@
32#define UART01x_RSR 0x04 /* Receive status register (Read). */ 32#define UART01x_RSR 0x04 /* Receive status register (Read). */
33#define UART01x_ECR 0x04 /* Error clear register (Write). */ 33#define UART01x_ECR 0x04 /* Error clear register (Write). */
34#define UART010_LCRH 0x08 /* Line control register, high byte. */ 34#define UART010_LCRH 0x08 /* Line control register, high byte. */
35#define ST_UART011_DMAWM 0x08 /* DMA watermark configure register. */
35#define UART010_LCRM 0x0C /* Line control register, middle byte. */ 36#define UART010_LCRM 0x0C /* Line control register, middle byte. */
37#define ST_UART011_TIMEOUT 0x0C /* Timeout period register. */
36#define UART010_LCRL 0x10 /* Line control register, low byte. */ 38#define UART010_LCRL 0x10 /* Line control register, low byte. */
37#define UART010_CR 0x14 /* Control register. */ 39#define UART010_CR 0x14 /* Control register. */
38#define UART01x_FR 0x18 /* Flag register (Read only). */ 40#define UART01x_FR 0x18 /* Flag register (Read only). */
@@ -51,6 +53,15 @@
51#define UART011_MIS 0x40 /* Masked interrupt status. */ 53#define UART011_MIS 0x40 /* Masked interrupt status. */
52#define UART011_ICR 0x44 /* Interrupt clear register. */ 54#define UART011_ICR 0x44 /* Interrupt clear register. */
53#define UART011_DMACR 0x48 /* DMA control register. */ 55#define UART011_DMACR 0x48 /* DMA control register. */
56#define ST_UART011_XFCR 0x50 /* XON/XOFF control register. */
57#define ST_UART011_XON1 0x54 /* XON1 register. */
58#define ST_UART011_XON2 0x58 /* XON2 register. */
59#define ST_UART011_XOFF1 0x5C /* XON1 register. */
60#define ST_UART011_XOFF2 0x60 /* XON2 register. */
61#define ST_UART011_ITCR 0x80 /* Integration test control register. */
62#define ST_UART011_ITIP 0x84 /* Integration test input register. */
63#define ST_UART011_ABCR 0x100 /* Autobaud control register. */
64#define ST_UART011_ABIMSC 0x15C /* Autobaud interrupt mask/clear register. */
54 65
55#define UART011_DR_OE (1 << 11) 66#define UART011_DR_OE (1 << 11)
56#define UART011_DR_BE (1 << 10) 67#define UART011_DR_BE (1 << 10)
diff --git a/include/linux/ata.h b/include/linux/ata.h
index fe6e681a9d74..0c4929fa34d3 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -89,6 +89,7 @@ enum {
89 ATA_ID_SPG = 98, 89 ATA_ID_SPG = 98,
90 ATA_ID_LBA_CAPACITY_2 = 100, 90 ATA_ID_LBA_CAPACITY_2 = 100,
91 ATA_ID_SECTOR_SIZE = 106, 91 ATA_ID_SECTOR_SIZE = 106,
92 ATA_ID_LOGICAL_SECTOR_SIZE = 117, /* and 118 */
92 ATA_ID_LAST_LUN = 126, 93 ATA_ID_LAST_LUN = 126,
93 ATA_ID_DLF = 128, 94 ATA_ID_DLF = 128,
94 ATA_ID_CSFO = 129, 95 ATA_ID_CSFO = 129,
@@ -640,16 +641,49 @@ static inline int ata_id_flush_ext_enabled(const u16 *id)
640 return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400; 641 return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400;
641} 642}
642 643
643static inline int ata_id_has_large_logical_sectors(const u16 *id) 644static inline u32 ata_id_logical_sector_size(const u16 *id)
644{ 645{
645 if ((id[ATA_ID_SECTOR_SIZE] & 0xc000) != 0x4000) 646 /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128.
646 return 0; 647 * IDENTIFY DEVICE data, word 117-118.
647 return id[ATA_ID_SECTOR_SIZE] & (1 << 13); 648 * 0xd000 ignores bit 13 (logical:physical > 1)
649 */
650 if ((id[ATA_ID_SECTOR_SIZE] & 0xd000) == 0x5000)
651 return (((id[ATA_ID_LOGICAL_SECTOR_SIZE+1] << 16)
652 + id[ATA_ID_LOGICAL_SECTOR_SIZE]) * sizeof(u16)) ;
653 return ATA_SECT_SIZE;
654}
655
656static inline u8 ata_id_log2_per_physical_sector(const u16 *id)
657{
658 /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128.
659 * IDENTIFY DEVICE data, word 106.
660 * 0xe000 ignores bit 12 (logical sector > 512 bytes)
661 */
662 if ((id[ATA_ID_SECTOR_SIZE] & 0xe000) == 0x6000)
663 return (id[ATA_ID_SECTOR_SIZE] & 0xf);
664 return 0;
648} 665}
649 666
650static inline u16 ata_id_logical_per_physical_sectors(const u16 *id) 667/* Offset of logical sectors relative to physical sectors.
668 *
669 * If device has more than one logical sector per physical sector
670 * (aka 512 byte emulation), vendors might offset the "sector 0" address
671 * so sector 63 is "naturally aligned" - e.g. FAT partition table.
672 * This avoids Read/Mod/Write penalties when using FAT partition table
673 * and updating "well aligned" (FS perspective) physical sectors on every
674 * transaction.
675 */
676static inline u16 ata_id_logical_sector_offset(const u16 *id,
677 u8 log2_per_phys)
651{ 678{
652 return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf); 679 u16 word_209 = id[209];
680
681 if ((log2_per_phys > 1) && (word_209 & 0xc000) == 0x4000) {
682 u16 first = word_209 & 0x3fff;
683 if (first > 0)
684 return (1 << log2_per_phys) - first;
685 }
686 return 0;
653} 687}
654 688
655static inline int ata_id_has_lba48(const u16 *id) 689static inline int ata_id_has_lba48(const u16 *id)
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
new file mode 100644
index 000000000000..7fff521d7eb5
--- /dev/null
+++ b/include/linux/ceph/auth.h
@@ -0,0 +1,92 @@
1#ifndef _FS_CEPH_AUTH_H
2#define _FS_CEPH_AUTH_H
3
4#include <linux/ceph/types.h>
5#include <linux/ceph/buffer.h>
6
7/*
8 * Abstract interface for communicating with the authenticate module.
9 * There is some handshake that takes place between us and the monitor
10 * to acquire the necessary keys. These are used to generate an
11 * 'authorizer' that we use when connecting to a service (mds, osd).
12 */
13
14struct ceph_auth_client;
15struct ceph_authorizer;
16
17struct ceph_auth_client_ops {
18 const char *name;
19
20 /*
21 * true if we are authenticated and can connect to
22 * services.
23 */
24 int (*is_authenticated)(struct ceph_auth_client *ac);
25
26 /*
27 * true if we should (re)authenticate, e.g., when our tickets
28 * are getting old and crusty.
29 */
30 int (*should_authenticate)(struct ceph_auth_client *ac);
31
32 /*
33 * build requests and process replies during monitor
34 * handshake. if handle_reply returns -EAGAIN, we build
35 * another request.
36 */
37 int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
38 int (*handle_reply)(struct ceph_auth_client *ac, int result,
39 void *buf, void *end);
40
41 /*
42 * Create authorizer for connecting to a service, and verify
43 * the response to authenticate the service.
44 */
45 int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
46 struct ceph_authorizer **a,
47 void **buf, size_t *len,
48 void **reply_buf, size_t *reply_len);
49 int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
50 struct ceph_authorizer *a, size_t len);
51 void (*destroy_authorizer)(struct ceph_auth_client *ac,
52 struct ceph_authorizer *a);
53 void (*invalidate_authorizer)(struct ceph_auth_client *ac,
54 int peer_type);
55
56 /* reset when we (re)connect to a monitor */
57 void (*reset)(struct ceph_auth_client *ac);
58
59 void (*destroy)(struct ceph_auth_client *ac);
60};
61
62struct ceph_auth_client {
63 u32 protocol; /* CEPH_AUTH_* */
64 void *private; /* for use by protocol implementation */
65 const struct ceph_auth_client_ops *ops; /* null iff protocol==0 */
66
67 bool negotiating; /* true if negotiating protocol */
68 const char *name; /* entity name */
69 u64 global_id; /* our unique id in system */
70 const char *secret; /* our secret key */
71 unsigned want_keys; /* which services we want */
72};
73
74extern struct ceph_auth_client *ceph_auth_init(const char *name,
75 const char *secret);
76extern void ceph_auth_destroy(struct ceph_auth_client *ac);
77
78extern void ceph_auth_reset(struct ceph_auth_client *ac);
79
80extern int ceph_auth_build_hello(struct ceph_auth_client *ac,
81 void *buf, size_t len);
82extern int ceph_handle_auth_reply(struct ceph_auth_client *ac,
83 void *buf, size_t len,
84 void *reply_buf, size_t reply_len);
85extern int ceph_entity_name_encode(const char *name, void **p, void *end);
86
87extern int ceph_build_auth(struct ceph_auth_client *ac,
88 void *msg_buf, size_t msg_len);
89
90extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
91
92#endif
diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h
new file mode 100644
index 000000000000..58d19014068f
--- /dev/null
+++ b/include/linux/ceph/buffer.h
@@ -0,0 +1,39 @@
1#ifndef __FS_CEPH_BUFFER_H
2#define __FS_CEPH_BUFFER_H
3
4#include <linux/kref.h>
5#include <linux/mm.h>
6#include <linux/vmalloc.h>
7#include <linux/types.h>
8#include <linux/uio.h>
9
10/*
11 * a simple reference counted buffer.
12 *
13 * use kmalloc for small sizes (<= one page), vmalloc for larger
14 * sizes.
15 */
16struct ceph_buffer {
17 struct kref kref;
18 struct kvec vec;
19 size_t alloc_len;
20 bool is_vmalloc;
21};
22
23extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp);
24extern void ceph_buffer_release(struct kref *kref);
25
26static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b)
27{
28 kref_get(&b->kref);
29 return b;
30}
31
32static inline void ceph_buffer_put(struct ceph_buffer *b)
33{
34 kref_put(&b->kref, ceph_buffer_release);
35}
36
37extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end);
38
39#endif
diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h
new file mode 100644
index 000000000000..aa2e19182d99
--- /dev/null
+++ b/include/linux/ceph/ceph_debug.h
@@ -0,0 +1,38 @@
1#ifndef _FS_CEPH_DEBUG_H
2#define _FS_CEPH_DEBUG_H
3
4#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
5
6#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
7
8/*
9 * wrap pr_debug to include a filename:lineno prefix on each line.
10 * this incurs some overhead (kernel size and execution time) due to
11 * the extra function call at each call site.
12 */
13
14# if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
15extern const char *ceph_file_part(const char *s, int len);
16# define dout(fmt, ...) \
17 pr_debug("%.*s %12.12s:%-4d : " fmt, \
18 8 - (int)sizeof(KBUILD_MODNAME), " ", \
19 ceph_file_part(__FILE__, sizeof(__FILE__)), \
20 __LINE__, ##__VA_ARGS__)
21# else
22/* faux printk call just to see any compiler warnings. */
23# define dout(fmt, ...) do { \
24 if (0) \
25 printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
26 } while (0)
27# endif
28
29#else
30
31/*
32 * or, just wrap pr_debug
33 */
34# define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__)
35
36#endif
37
38#endif
diff --git a/include/linux/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h
new file mode 100644
index 000000000000..5babb8e95352
--- /dev/null
+++ b/include/linux/ceph/ceph_frag.h
@@ -0,0 +1,109 @@
1#ifndef FS_CEPH_FRAG_H
2#define FS_CEPH_FRAG_H
3
4/*
5 * "Frags" are a way to describe a subset of a 32-bit number space,
6 * using a mask and a value to match against that mask. Any given frag
7 * (subset of the number space) can be partitioned into 2^n sub-frags.
8 *
9 * Frags are encoded into a 32-bit word:
10 * 8 upper bits = "bits"
11 * 24 lower bits = "value"
12 * (We could go to 5+27 bits, but who cares.)
13 *
14 * We use the _most_ significant bits of the 24 bit value. This makes
15 * values logically sort.
16 *
17 * Unfortunately, because the "bits" field is still in the high bits, we
18 * can't sort encoded frags numerically. However, it does allow you
19 * to feed encoded frags as values into frag_contains_value.
20 */
21static inline __u32 ceph_frag_make(__u32 b, __u32 v)
22{
23 return (b << 24) |
24 (v & (0xffffffu << (24-b)) & 0xffffffu);
25}
26static inline __u32 ceph_frag_bits(__u32 f)
27{
28 return f >> 24;
29}
30static inline __u32 ceph_frag_value(__u32 f)
31{
32 return f & 0xffffffu;
33}
34static inline __u32 ceph_frag_mask(__u32 f)
35{
36 return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu;
37}
38static inline __u32 ceph_frag_mask_shift(__u32 f)
39{
40 return 24 - ceph_frag_bits(f);
41}
42
43static inline int ceph_frag_contains_value(__u32 f, __u32 v)
44{
45 return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
46}
47static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
48{
49 /* is sub as specific as us, and contained by us? */
50 return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
51 (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
52}
53
54static inline __u32 ceph_frag_parent(__u32 f)
55{
56 return ceph_frag_make(ceph_frag_bits(f) - 1,
57 ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
58}
59static inline int ceph_frag_is_left_child(__u32 f)
60{
61 return ceph_frag_bits(f) > 0 &&
62 (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
63}
64static inline int ceph_frag_is_right_child(__u32 f)
65{
66 return ceph_frag_bits(f) > 0 &&
67 (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
68}
69static inline __u32 ceph_frag_sibling(__u32 f)
70{
71 return ceph_frag_make(ceph_frag_bits(f),
72 ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
73}
74static inline __u32 ceph_frag_left_child(__u32 f)
75{
76 return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
77}
78static inline __u32 ceph_frag_right_child(__u32 f)
79{
80 return ceph_frag_make(ceph_frag_bits(f)+1,
81 ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
82}
83static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
84{
85 int newbits = ceph_frag_bits(f) + by;
86 return ceph_frag_make(newbits,
87 ceph_frag_value(f) | (i << (24 - newbits)));
88}
89static inline int ceph_frag_is_leftmost(__u32 f)
90{
91 return ceph_frag_value(f) == 0;
92}
93static inline int ceph_frag_is_rightmost(__u32 f)
94{
95 return ceph_frag_value(f) == ceph_frag_mask(f);
96}
97static inline __u32 ceph_frag_next(__u32 f)
98{
99 return ceph_frag_make(ceph_frag_bits(f),
100 ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f)));
101}
102
103/*
104 * comparator to sort frags logically, as when traversing the
105 * number space in ascending order...
106 */
107int ceph_frag_compare(__u32 a, __u32 b);
108
109#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
new file mode 100644
index 000000000000..c3c74aef289d
--- /dev/null
+++ b/include/linux/ceph/ceph_fs.h
@@ -0,0 +1,729 @@
1/*
2 * ceph_fs.h - Ceph constants and data types to share between kernel and
3 * user space.
4 *
5 * Most types in this file are defined as little-endian, and are
6 * primarily intended to describe data structures that pass over the
7 * wire or that are stored on disk.
8 *
9 * LGPL2
10 */
11
12#ifndef CEPH_FS_H
13#define CEPH_FS_H
14
15#include "msgr.h"
16#include "rados.h"
17
18/*
19 * subprotocol versions. when specific messages types or high-level
20 * protocols change, bump the affected components. we keep rev
21 * internal cluster protocols separately from the public,
22 * client-facing protocol.
23 */
24#define CEPH_OSD_PROTOCOL 8 /* cluster internal */
25#define CEPH_MDS_PROTOCOL 12 /* cluster internal */
26#define CEPH_MON_PROTOCOL 5 /* cluster internal */
27#define CEPH_OSDC_PROTOCOL 24 /* server/client */
28#define CEPH_MDSC_PROTOCOL 32 /* server/client */
29#define CEPH_MONC_PROTOCOL 15 /* server/client */
30
31
32#define CEPH_INO_ROOT 1
33#define CEPH_INO_CEPH 2 /* hidden .ceph dir */
34
35/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
36#define CEPH_MAX_MON 31
37
38
39/*
40 * feature bits
41 */
42#define CEPH_FEATURE_UID (1<<0)
43#define CEPH_FEATURE_NOSRCADDR (1<<1)
44#define CEPH_FEATURE_MONCLOCKCHECK (1<<2)
45#define CEPH_FEATURE_FLOCK (1<<3)
46
47
48/*
49 * ceph_file_layout - describe data layout for a file/inode
50 */
51struct ceph_file_layout {
52 /* file -> object mapping */
53 __le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple
54 of page size. */
55 __le32 fl_stripe_count; /* over this many objects */
56 __le32 fl_object_size; /* until objects are this big, then move to
57 new objects */
58 __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */
59
60 /* pg -> disk layout */
61 __le32 fl_object_stripe_unit; /* for per-object parity, if any */
62
63 /* object -> pg layout */
64 __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
65 __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
66} __attribute__ ((packed));
67
68#define CEPH_MIN_STRIPE_UNIT 65536
69
70int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
71
72
73/* crypto algorithms */
74#define CEPH_CRYPTO_NONE 0x0
75#define CEPH_CRYPTO_AES 0x1
76
77#define CEPH_AES_IV "cephsageyudagreg"
78
79/* security/authentication protocols */
80#define CEPH_AUTH_UNKNOWN 0x0
81#define CEPH_AUTH_NONE 0x1
82#define CEPH_AUTH_CEPHX 0x2
83
84#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
85
86
87/*********************************************
88 * message layer
89 */
90
91/*
92 * message types
93 */
94
95/* misc */
96#define CEPH_MSG_SHUTDOWN 1
97#define CEPH_MSG_PING 2
98
99/* client <-> monitor */
100#define CEPH_MSG_MON_MAP 4
101#define CEPH_MSG_MON_GET_MAP 5
102#define CEPH_MSG_STATFS 13
103#define CEPH_MSG_STATFS_REPLY 14
104#define CEPH_MSG_MON_SUBSCRIBE 15
105#define CEPH_MSG_MON_SUBSCRIBE_ACK 16
106#define CEPH_MSG_AUTH 17
107#define CEPH_MSG_AUTH_REPLY 18
108
109/* client <-> mds */
110#define CEPH_MSG_MDS_MAP 21
111
112#define CEPH_MSG_CLIENT_SESSION 22
113#define CEPH_MSG_CLIENT_RECONNECT 23
114
115#define CEPH_MSG_CLIENT_REQUEST 24
116#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
117#define CEPH_MSG_CLIENT_REPLY 26
118#define CEPH_MSG_CLIENT_CAPS 0x310
119#define CEPH_MSG_CLIENT_LEASE 0x311
120#define CEPH_MSG_CLIENT_SNAP 0x312
121#define CEPH_MSG_CLIENT_CAPRELEASE 0x313
122
123/* pool ops */
124#define CEPH_MSG_POOLOP_REPLY 48
125#define CEPH_MSG_POOLOP 49
126
127
128/* osd */
129#define CEPH_MSG_OSD_MAP 41
130#define CEPH_MSG_OSD_OP 42
131#define CEPH_MSG_OSD_OPREPLY 43
132
133/* pool operations */
134enum {
135 POOL_OP_CREATE = 0x01,
136 POOL_OP_DELETE = 0x02,
137 POOL_OP_AUID_CHANGE = 0x03,
138 POOL_OP_CREATE_SNAP = 0x11,
139 POOL_OP_DELETE_SNAP = 0x12,
140 POOL_OP_CREATE_UNMANAGED_SNAP = 0x21,
141 POOL_OP_DELETE_UNMANAGED_SNAP = 0x22,
142};
143
144struct ceph_mon_request_header {
145 __le64 have_version;
146 __le16 session_mon;
147 __le64 session_mon_tid;
148} __attribute__ ((packed));
149
150struct ceph_mon_statfs {
151 struct ceph_mon_request_header monhdr;
152 struct ceph_fsid fsid;
153} __attribute__ ((packed));
154
155struct ceph_statfs {
156 __le64 kb, kb_used, kb_avail;
157 __le64 num_objects;
158} __attribute__ ((packed));
159
160struct ceph_mon_statfs_reply {
161 struct ceph_fsid fsid;
162 __le64 version;
163 struct ceph_statfs st;
164} __attribute__ ((packed));
165
166const char *ceph_pool_op_name(int op);
167
168struct ceph_mon_poolop {
169 struct ceph_mon_request_header monhdr;
170 struct ceph_fsid fsid;
171 __le32 pool;
172 __le32 op;
173 __le64 auid;
174 __le64 snapid;
175 __le32 name_len;
176} __attribute__ ((packed));
177
178struct ceph_mon_poolop_reply {
179 struct ceph_mon_request_header monhdr;
180 struct ceph_fsid fsid;
181 __le32 reply_code;
182 __le32 epoch;
183 char has_data;
184 char data[0];
185} __attribute__ ((packed));
186
187struct ceph_mon_unmanaged_snap {
188 __le64 snapid;
189} __attribute__ ((packed));
190
191struct ceph_osd_getmap {
192 struct ceph_mon_request_header monhdr;
193 struct ceph_fsid fsid;
194 __le32 start;
195} __attribute__ ((packed));
196
197struct ceph_mds_getmap {
198 struct ceph_mon_request_header monhdr;
199 struct ceph_fsid fsid;
200} __attribute__ ((packed));
201
202struct ceph_client_mount {
203 struct ceph_mon_request_header monhdr;
204} __attribute__ ((packed));
205
206struct ceph_mon_subscribe_item {
207 __le64 have_version; __le64 have;
208 __u8 onetime;
209} __attribute__ ((packed));
210
211struct ceph_mon_subscribe_ack {
212 __le32 duration; /* seconds */
213 struct ceph_fsid fsid;
214} __attribute__ ((packed));
215
216/*
217 * mds states
218 * > 0 -> in
219 * <= 0 -> out
220 */
221#define CEPH_MDS_STATE_DNE 0 /* down, does not exist. */
222#define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees.
223 empty log. */
224#define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. */
225#define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment. */
226#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */
227#define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */
228#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */
229
230#define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */
231#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed
232 operations (import, rename, etc.) */
233#define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */
234#define CEPH_MDS_STATE_REJOIN 11 /* up, rejoining distributed cache */
235#define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */
236#define CEPH_MDS_STATE_ACTIVE 13 /* up, active */
237#define CEPH_MDS_STATE_STOPPING 14 /* up, but exporting metadata */
238
239extern const char *ceph_mds_state_name(int s);
240
241
242/*
243 * metadata lock types.
244 * - these are bitmasks.. we can compose them
245 * - they also define the lock ordering by the MDS
246 * - a few of these are internal to the mds
247 */
248#define CEPH_LOCK_DVERSION 1
249#define CEPH_LOCK_DN 2
250#define CEPH_LOCK_ISNAP 16
251#define CEPH_LOCK_IVERSION 32 /* mds internal */
252#define CEPH_LOCK_IFILE 64
253#define CEPH_LOCK_IAUTH 128
254#define CEPH_LOCK_ILINK 256
255#define CEPH_LOCK_IDFT 512 /* dir frag tree */
256#define CEPH_LOCK_INEST 1024 /* mds internal */
257#define CEPH_LOCK_IXATTR 2048
258#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */
259#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */
260
261/* client_session ops */
262enum {
263 CEPH_SESSION_REQUEST_OPEN,
264 CEPH_SESSION_OPEN,
265 CEPH_SESSION_REQUEST_CLOSE,
266 CEPH_SESSION_CLOSE,
267 CEPH_SESSION_REQUEST_RENEWCAPS,
268 CEPH_SESSION_RENEWCAPS,
269 CEPH_SESSION_STALE,
270 CEPH_SESSION_RECALL_STATE,
271};
272
273extern const char *ceph_session_op_name(int op);
274
275struct ceph_mds_session_head {
276 __le32 op;
277 __le64 seq;
278 struct ceph_timespec stamp;
279 __le32 max_caps, max_leases;
280} __attribute__ ((packed));
281
282/* client_request */
283/*
284 * metadata ops.
285 * & 0x001000 -> write op
286 * & 0x010000 -> follow symlink (e.g. stat(), not lstat()).
287 & & 0x100000 -> use weird ino/path trace
288 */
289#define CEPH_MDS_OP_WRITE 0x001000
290enum {
291 CEPH_MDS_OP_LOOKUP = 0x00100,
292 CEPH_MDS_OP_GETATTR = 0x00101,
293 CEPH_MDS_OP_LOOKUPHASH = 0x00102,
294 CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
295
296 CEPH_MDS_OP_SETXATTR = 0x01105,
297 CEPH_MDS_OP_RMXATTR = 0x01106,
298 CEPH_MDS_OP_SETLAYOUT = 0x01107,
299 CEPH_MDS_OP_SETATTR = 0x01108,
300 CEPH_MDS_OP_SETFILELOCK= 0x01109,
301 CEPH_MDS_OP_GETFILELOCK= 0x00110,
302 CEPH_MDS_OP_SETDIRLAYOUT=0x0110a,
303
304 CEPH_MDS_OP_MKNOD = 0x01201,
305 CEPH_MDS_OP_LINK = 0x01202,
306 CEPH_MDS_OP_UNLINK = 0x01203,
307 CEPH_MDS_OP_RENAME = 0x01204,
308 CEPH_MDS_OP_MKDIR = 0x01220,
309 CEPH_MDS_OP_RMDIR = 0x01221,
310 CEPH_MDS_OP_SYMLINK = 0x01222,
311
312 CEPH_MDS_OP_CREATE = 0x01301,
313 CEPH_MDS_OP_OPEN = 0x00302,
314 CEPH_MDS_OP_READDIR = 0x00305,
315
316 CEPH_MDS_OP_LOOKUPSNAP = 0x00400,
317 CEPH_MDS_OP_MKSNAP = 0x01400,
318 CEPH_MDS_OP_RMSNAP = 0x01401,
319 CEPH_MDS_OP_LSSNAP = 0x00402,
320};
321
322extern const char *ceph_mds_op_name(int op);
323
324
325#define CEPH_SETATTR_MODE 1
326#define CEPH_SETATTR_UID 2
327#define CEPH_SETATTR_GID 4
328#define CEPH_SETATTR_MTIME 8
329#define CEPH_SETATTR_ATIME 16
330#define CEPH_SETATTR_SIZE 32
331#define CEPH_SETATTR_CTIME 64
332
333union ceph_mds_request_args {
334 struct {
335 __le32 mask; /* CEPH_CAP_* */
336 } __attribute__ ((packed)) getattr;
337 struct {
338 __le32 mode;
339 __le32 uid;
340 __le32 gid;
341 struct ceph_timespec mtime;
342 struct ceph_timespec atime;
343 __le64 size, old_size; /* old_size needed by truncate */
344 __le32 mask; /* CEPH_SETATTR_* */
345 } __attribute__ ((packed)) setattr;
346 struct {
347 __le32 frag; /* which dir fragment */
348 __le32 max_entries; /* how many dentries to grab */
349 __le32 max_bytes;
350 } __attribute__ ((packed)) readdir;
351 struct {
352 __le32 mode;
353 __le32 rdev;
354 } __attribute__ ((packed)) mknod;
355 struct {
356 __le32 mode;
357 } __attribute__ ((packed)) mkdir;
358 struct {
359 __le32 flags;
360 __le32 mode;
361 __le32 stripe_unit; /* layout for newly created file */
362 __le32 stripe_count; /* ... */
363 __le32 object_size;
364 __le32 file_replication;
365 __le32 preferred;
366 } __attribute__ ((packed)) open;
367 struct {
368 __le32 flags;
369 } __attribute__ ((packed)) setxattr;
370 struct {
371 struct ceph_file_layout layout;
372 } __attribute__ ((packed)) setlayout;
373 struct {
374 __u8 rule; /* currently fcntl or flock */
375 __u8 type; /* shared, exclusive, remove*/
376 __le64 pid; /* process id requesting the lock */
377 __le64 pid_namespace;
378 __le64 start; /* initial location to lock */
379 __le64 length; /* num bytes to lock from start */
380 __u8 wait; /* will caller wait for lock to become available? */
381 } __attribute__ ((packed)) filelock_change;
382} __attribute__ ((packed));
383
384#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
385#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
386
387struct ceph_mds_request_head {
388 __le64 oldest_client_tid;
389 __le32 mdsmap_epoch; /* on client */
390 __le32 flags; /* CEPH_MDS_FLAG_* */
391 __u8 num_retry, num_fwd; /* count retry, fwd attempts */
392 __le16 num_releases; /* # include cap/lease release records */
393 __le32 op; /* mds op code */
394 __le32 caller_uid, caller_gid;
395 __le64 ino; /* use this ino for openc, mkdir, mknod,
396 etc. (if replaying) */
397 union ceph_mds_request_args args;
398} __attribute__ ((packed));
399
400/* cap/lease release record */
401struct ceph_mds_request_release {
402 __le64 ino, cap_id; /* ino and unique cap id */
403 __le32 caps, wanted; /* new issued, wanted */
404 __le32 seq, issue_seq, mseq;
405 __le32 dname_seq; /* if releasing a dentry lease, a */
406 __le32 dname_len; /* string follows. */
407} __attribute__ ((packed));
408
409/* client reply */
410struct ceph_mds_reply_head {
411 __le32 op;
412 __le32 result;
413 __le32 mdsmap_epoch;
414 __u8 safe; /* true if committed to disk */
415 __u8 is_dentry, is_target; /* true if dentry, target inode records
416 are included with reply */
417} __attribute__ ((packed));
418
419/* one for each node split */
420struct ceph_frag_tree_split {
421 __le32 frag; /* this frag splits... */
422 __le32 by; /* ...by this many bits */
423} __attribute__ ((packed));
424
425struct ceph_frag_tree_head {
426 __le32 nsplits; /* num ceph_frag_tree_split records */
427 struct ceph_frag_tree_split splits[];
428} __attribute__ ((packed));
429
430/* capability issue, for bundling with mds reply */
431struct ceph_mds_reply_cap {
432 __le32 caps, wanted; /* caps issued, wanted */
433 __le64 cap_id;
434 __le32 seq, mseq;
435 __le64 realm; /* snap realm */
436 __u8 flags; /* CEPH_CAP_FLAG_* */
437} __attribute__ ((packed));
438
439#define CEPH_CAP_FLAG_AUTH 1 /* cap is issued by auth mds */
440
441/* inode record, for bundling with mds reply */
442struct ceph_mds_reply_inode {
443 __le64 ino;
444 __le64 snapid;
445 __le32 rdev;
446 __le64 version; /* inode version */
447 __le64 xattr_version; /* version for xattr blob */
448 struct ceph_mds_reply_cap cap; /* caps issued for this inode */
449 struct ceph_file_layout layout;
450 struct ceph_timespec ctime, mtime, atime;
451 __le32 time_warp_seq;
452 __le64 size, max_size, truncate_size;
453 __le32 truncate_seq;
454 __le32 mode, uid, gid;
455 __le32 nlink;
456 __le64 files, subdirs, rbytes, rfiles, rsubdirs; /* dir stats */
457 struct ceph_timespec rctime;
458 struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */
459} __attribute__ ((packed));
460/* followed by frag array, then symlink string, then xattr blob */
461
462/* reply_lease follows dname, and reply_inode */
463struct ceph_mds_reply_lease {
464 __le16 mask; /* lease type(s) */
465 __le32 duration_ms; /* lease duration */
466 __le32 seq;
467} __attribute__ ((packed));
468
469struct ceph_mds_reply_dirfrag {
470 __le32 frag; /* fragment */
471 __le32 auth; /* auth mds, if this is a delegation point */
472 __le32 ndist; /* number of mds' this is replicated on */
473 __le32 dist[];
474} __attribute__ ((packed));
475
476#define CEPH_LOCK_FCNTL 1
477#define CEPH_LOCK_FLOCK 2
478
479#define CEPH_LOCK_SHARED 1
480#define CEPH_LOCK_EXCL 2
481#define CEPH_LOCK_UNLOCK 4
482
483struct ceph_filelock {
484 __le64 start;/* file offset to start lock at */
485 __le64 length; /* num bytes to lock; 0 for all following start */
486 __le64 client; /* which client holds the lock */
487 __le64 pid; /* process id holding the lock on the client */
488 __le64 pid_namespace;
489 __u8 type; /* shared lock, exclusive lock, or unlock */
490} __attribute__ ((packed));
491
492
493/* file access modes */
494#define CEPH_FILE_MODE_PIN 0
495#define CEPH_FILE_MODE_RD 1
496#define CEPH_FILE_MODE_WR 2
497#define CEPH_FILE_MODE_RDWR 3 /* RD | WR */
498#define CEPH_FILE_MODE_LAZY 4 /* lazy io */
499#define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */
500
501int ceph_flags_to_mode(int flags);
502
503
504/* capability bits */
505#define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */
506
507/* generic cap bits */
508#define CEPH_CAP_GSHARED 1 /* client can reads */
509#define CEPH_CAP_GEXCL 2 /* client can read and update */
510#define CEPH_CAP_GCACHE 4 /* (file) client can cache reads */
511#define CEPH_CAP_GRD 8 /* (file) client can read */
512#define CEPH_CAP_GWR 16 /* (file) client can write */
513#define CEPH_CAP_GBUFFER 32 /* (file) client can buffer writes */
514#define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */
515#define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */
516
517/* per-lock shift */
518#define CEPH_CAP_SAUTH 2
519#define CEPH_CAP_SLINK 4
520#define CEPH_CAP_SXATTR 6
521#define CEPH_CAP_SFILE 8
522#define CEPH_CAP_SFLOCK 20
523
524#define CEPH_CAP_BITS 22
525
526/* composed values */
527#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH)
528#define CEPH_CAP_AUTH_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SAUTH)
529#define CEPH_CAP_LINK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SLINK)
530#define CEPH_CAP_LINK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SLINK)
531#define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SXATTR)
532#define CEPH_CAP_XATTR_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR)
533#define CEPH_CAP_FILE(x) (x << CEPH_CAP_SFILE)
534#define CEPH_CAP_FILE_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFILE)
535#define CEPH_CAP_FILE_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFILE)
536#define CEPH_CAP_FILE_CACHE (CEPH_CAP_GCACHE << CEPH_CAP_SFILE)
537#define CEPH_CAP_FILE_RD (CEPH_CAP_GRD << CEPH_CAP_SFILE)
538#define CEPH_CAP_FILE_WR (CEPH_CAP_GWR << CEPH_CAP_SFILE)
539#define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE)
540#define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE)
541#define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE)
542#define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK)
543#define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK)
544
545
546/* cap masks (for getattr) */
547#define CEPH_STAT_CAP_INODE CEPH_CAP_PIN
548#define CEPH_STAT_CAP_TYPE CEPH_CAP_PIN /* mode >> 12 */
549#define CEPH_STAT_CAP_SYMLINK CEPH_CAP_PIN
550#define CEPH_STAT_CAP_UID CEPH_CAP_AUTH_SHARED
551#define CEPH_STAT_CAP_GID CEPH_CAP_AUTH_SHARED
552#define CEPH_STAT_CAP_MODE CEPH_CAP_AUTH_SHARED
553#define CEPH_STAT_CAP_NLINK CEPH_CAP_LINK_SHARED
554#define CEPH_STAT_CAP_LAYOUT CEPH_CAP_FILE_SHARED
555#define CEPH_STAT_CAP_MTIME CEPH_CAP_FILE_SHARED
556#define CEPH_STAT_CAP_SIZE CEPH_CAP_FILE_SHARED
557#define CEPH_STAT_CAP_ATIME CEPH_CAP_FILE_SHARED /* fixme */
558#define CEPH_STAT_CAP_XATTR CEPH_CAP_XATTR_SHARED
559#define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN | \
560 CEPH_CAP_AUTH_SHARED | \
561 CEPH_CAP_LINK_SHARED | \
562 CEPH_CAP_FILE_SHARED | \
563 CEPH_CAP_XATTR_SHARED)
564
565#define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \
566 CEPH_CAP_LINK_SHARED | \
567 CEPH_CAP_XATTR_SHARED | \
568 CEPH_CAP_FILE_SHARED)
569#define CEPH_CAP_ANY_RD (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD | \
570 CEPH_CAP_FILE_CACHE)
571
572#define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL | \
573 CEPH_CAP_LINK_EXCL | \
574 CEPH_CAP_XATTR_EXCL | \
575 CEPH_CAP_FILE_EXCL)
576#define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \
577 CEPH_CAP_FILE_EXCL)
578#define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)
579#define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \
580 CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \
581 CEPH_CAP_PIN)
582
583#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \
584 CEPH_LOCK_IXATTR)
585
586int ceph_caps_for_mode(int mode);
587
588enum {
589 CEPH_CAP_OP_GRANT, /* mds->client grant */
590 CEPH_CAP_OP_REVOKE, /* mds->client revoke */
591 CEPH_CAP_OP_TRUNC, /* mds->client trunc notify */
592 CEPH_CAP_OP_EXPORT, /* mds has exported the cap */
593 CEPH_CAP_OP_IMPORT, /* mds has imported the cap */
594 CEPH_CAP_OP_UPDATE, /* client->mds update */
595 CEPH_CAP_OP_DROP, /* client->mds drop cap bits */
596 CEPH_CAP_OP_FLUSH, /* client->mds cap writeback */
597 CEPH_CAP_OP_FLUSH_ACK, /* mds->client flushed */
598 CEPH_CAP_OP_FLUSHSNAP, /* client->mds flush snapped metadata */
599 CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */
600 CEPH_CAP_OP_RELEASE, /* client->mds release (clean) cap */
601 CEPH_CAP_OP_RENEW, /* client->mds renewal request */
602};
603
604extern const char *ceph_cap_op_name(int op);
605
606/*
607 * caps message, used for capability callbacks, acks, requests, etc.
608 */
609struct ceph_mds_caps {
610 __le32 op; /* CEPH_CAP_OP_* */
611 __le64 ino, realm;
612 __le64 cap_id;
613 __le32 seq, issue_seq;
614 __le32 caps, wanted, dirty; /* latest issued/wanted/dirty */
615 __le32 migrate_seq;
616 __le64 snap_follows;
617 __le32 snap_trace_len;
618
619 /* authlock */
620 __le32 uid, gid, mode;
621
622 /* linklock */
623 __le32 nlink;
624
625 /* xattrlock */
626 __le32 xattr_len;
627 __le64 xattr_version;
628
629 /* filelock */
630 __le64 size, max_size, truncate_size;
631 __le32 truncate_seq;
632 struct ceph_timespec mtime, atime, ctime;
633 struct ceph_file_layout layout;
634 __le32 time_warp_seq;
635} __attribute__ ((packed));
636
637/* cap release msg head */
638struct ceph_mds_cap_release {
639 __le32 num; /* number of cap_items that follow */
640} __attribute__ ((packed));
641
642struct ceph_mds_cap_item {
643 __le64 ino;
644 __le64 cap_id;
645 __le32 migrate_seq, seq;
646} __attribute__ ((packed));
647
648#define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */
649#define CEPH_MDS_LEASE_RELEASE 2 /* client -> mds */
650#define CEPH_MDS_LEASE_RENEW 3 /* client <-> mds */
651#define CEPH_MDS_LEASE_REVOKE_ACK 4 /* client -> mds */
652
653extern const char *ceph_lease_op_name(int o);
654
655/* lease msg header */
656struct ceph_mds_lease {
657 __u8 action; /* CEPH_MDS_LEASE_* */
658 __le16 mask; /* which lease */
659 __le64 ino;
660 __le64 first, last; /* snap range */
661 __le32 seq;
662 __le32 duration_ms; /* duration of renewal */
663} __attribute__ ((packed));
664/* followed by a __le32+string for dname */
665
666/* client reconnect */
667struct ceph_mds_cap_reconnect {
668 __le64 cap_id;
669 __le32 wanted;
670 __le32 issued;
671 __le64 snaprealm;
672 __le64 pathbase; /* base ino for our path to this ino */
673 __le32 flock_len; /* size of flock state blob, if any */
674} __attribute__ ((packed));
675/* followed by flock blob */
676
677struct ceph_mds_cap_reconnect_v1 {
678 __le64 cap_id;
679 __le32 wanted;
680 __le32 issued;
681 __le64 size;
682 struct ceph_timespec mtime, atime;
683 __le64 snaprealm;
684 __le64 pathbase; /* base ino for our path to this ino */
685} __attribute__ ((packed));
686
687struct ceph_mds_snaprealm_reconnect {
688 __le64 ino; /* snap realm base */
689 __le64 seq; /* snap seq for this snap realm */
690 __le64 parent; /* parent realm */
691} __attribute__ ((packed));
692
693/*
694 * snaps
695 */
696enum {
697 CEPH_SNAP_OP_UPDATE, /* CREATE or DESTROY */
698 CEPH_SNAP_OP_CREATE,
699 CEPH_SNAP_OP_DESTROY,
700 CEPH_SNAP_OP_SPLIT,
701};
702
703extern const char *ceph_snap_op_name(int o);
704
705/* snap msg header */
706struct ceph_mds_snap_head {
707 __le32 op; /* CEPH_SNAP_OP_* */
708 __le64 split; /* ino to split off, if any */
709 __le32 num_split_inos; /* # inos belonging to new child realm */
710 __le32 num_split_realms; /* # child realms udner new child realm */
711 __le32 trace_len; /* size of snap trace blob */
712} __attribute__ ((packed));
713/* followed by split ino list, then split realms, then the trace blob */
714
715/*
716 * encode info about a snaprealm, as viewed by a client
717 */
718struct ceph_mds_snap_realm {
719 __le64 ino; /* ino */
720 __le64 created; /* snap: when created */
721 __le64 parent; /* ino: parent realm */
722 __le64 parent_since; /* snap: same parent since */
723 __le64 seq; /* snap: version */
724 __le32 num_snaps;
725 __le32 num_prior_parent_snaps;
726} __attribute__ ((packed));
727/* followed by my snap list, then prior parent snap list */
728
729#endif
diff --git a/include/linux/ceph/ceph_hash.h b/include/linux/ceph/ceph_hash.h
new file mode 100644
index 000000000000..d099c3f90236
--- /dev/null
+++ b/include/linux/ceph/ceph_hash.h
@@ -0,0 +1,13 @@
1#ifndef FS_CEPH_HASH_H
2#define FS_CEPH_HASH_H
3
4#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */
5#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */
6
7extern unsigned ceph_str_hash_linux(const char *s, unsigned len);
8extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len);
9
10extern unsigned ceph_str_hash(int type, const char *s, unsigned len);
11extern const char *ceph_str_hash_name(int type);
12
13#endif
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h
new file mode 100644
index 000000000000..2a79702e092b
--- /dev/null
+++ b/include/linux/ceph/debugfs.h
@@ -0,0 +1,33 @@
1#ifndef _FS_CEPH_DEBUGFS_H
2#define _FS_CEPH_DEBUGFS_H
3
4#include "ceph_debug.h"
5#include "types.h"
6
7#define CEPH_DEFINE_SHOW_FUNC(name) \
8static int name##_open(struct inode *inode, struct file *file) \
9{ \
10 struct seq_file *sf; \
11 int ret; \
12 \
13 ret = single_open(file, name, NULL); \
14 sf = file->private_data; \
15 sf->private = inode->i_private; \
16 return ret; \
17} \
18 \
19static const struct file_operations name##_fops = { \
20 .open = name##_open, \
21 .read = seq_read, \
22 .llseek = seq_lseek, \
23 .release = single_release, \
24};
25
26/* debugfs.c */
27extern int ceph_debugfs_init(void);
28extern void ceph_debugfs_cleanup(void);
29extern int ceph_debugfs_client_init(struct ceph_client *client);
30extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
31
32#endif
33
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
new file mode 100644
index 000000000000..c5b6939fb32a
--- /dev/null
+++ b/include/linux/ceph/decode.h
@@ -0,0 +1,201 @@
1#ifndef __CEPH_DECODE_H
2#define __CEPH_DECODE_H
3
4#include <asm/unaligned.h>
5#include <linux/time.h>
6
7#include "types.h"
8
9/*
10 * in all cases,
11 * void **p pointer to position pointer
12 * void *end pointer to end of buffer (last byte + 1)
13 */
14
15static inline u64 ceph_decode_64(void **p)
16{
17 u64 v = get_unaligned_le64(*p);
18 *p += sizeof(u64);
19 return v;
20}
21static inline u32 ceph_decode_32(void **p)
22{
23 u32 v = get_unaligned_le32(*p);
24 *p += sizeof(u32);
25 return v;
26}
27static inline u16 ceph_decode_16(void **p)
28{
29 u16 v = get_unaligned_le16(*p);
30 *p += sizeof(u16);
31 return v;
32}
33static inline u8 ceph_decode_8(void **p)
34{
35 u8 v = *(u8 *)*p;
36 (*p)++;
37 return v;
38}
39static inline void ceph_decode_copy(void **p, void *pv, size_t n)
40{
41 memcpy(pv, *p, n);
42 *p += n;
43}
44
45/*
46 * bounds check input.
47 */
48#define ceph_decode_need(p, end, n, bad) \
49 do { \
50 if (unlikely(*(p) + (n) > (end))) \
51 goto bad; \
52 } while (0)
53
54#define ceph_decode_64_safe(p, end, v, bad) \
55 do { \
56 ceph_decode_need(p, end, sizeof(u64), bad); \
57 v = ceph_decode_64(p); \
58 } while (0)
59#define ceph_decode_32_safe(p, end, v, bad) \
60 do { \
61 ceph_decode_need(p, end, sizeof(u32), bad); \
62 v = ceph_decode_32(p); \
63 } while (0)
64#define ceph_decode_16_safe(p, end, v, bad) \
65 do { \
66 ceph_decode_need(p, end, sizeof(u16), bad); \
67 v = ceph_decode_16(p); \
68 } while (0)
69#define ceph_decode_8_safe(p, end, v, bad) \
70 do { \
71 ceph_decode_need(p, end, sizeof(u8), bad); \
72 v = ceph_decode_8(p); \
73 } while (0)
74
75#define ceph_decode_copy_safe(p, end, pv, n, bad) \
76 do { \
77 ceph_decode_need(p, end, n, bad); \
78 ceph_decode_copy(p, pv, n); \
79 } while (0)
80
81/*
82 * struct ceph_timespec <-> struct timespec
83 */
84static inline void ceph_decode_timespec(struct timespec *ts,
85 const struct ceph_timespec *tv)
86{
87 ts->tv_sec = le32_to_cpu(tv->tv_sec);
88 ts->tv_nsec = le32_to_cpu(tv->tv_nsec);
89}
90static inline void ceph_encode_timespec(struct ceph_timespec *tv,
91 const struct timespec *ts)
92{
93 tv->tv_sec = cpu_to_le32(ts->tv_sec);
94 tv->tv_nsec = cpu_to_le32(ts->tv_nsec);
95}
96
97/*
98 * sockaddr_storage <-> ceph_sockaddr
99 */
100static inline void ceph_encode_addr(struct ceph_entity_addr *a)
101{
102 __be16 ss_family = htons(a->in_addr.ss_family);
103 a->in_addr.ss_family = *(__u16 *)&ss_family;
104}
105static inline void ceph_decode_addr(struct ceph_entity_addr *a)
106{
107 __be16 ss_family = *(__be16 *)&a->in_addr.ss_family;
108 a->in_addr.ss_family = ntohs(ss_family);
109 WARN_ON(a->in_addr.ss_family == 512);
110}
111
112/*
113 * encoders
114 */
115static inline void ceph_encode_64(void **p, u64 v)
116{
117 put_unaligned_le64(v, (__le64 *)*p);
118 *p += sizeof(u64);
119}
120static inline void ceph_encode_32(void **p, u32 v)
121{
122 put_unaligned_le32(v, (__le32 *)*p);
123 *p += sizeof(u32);
124}
125static inline void ceph_encode_16(void **p, u16 v)
126{
127 put_unaligned_le16(v, (__le16 *)*p);
128 *p += sizeof(u16);
129}
130static inline void ceph_encode_8(void **p, u8 v)
131{
132 *(u8 *)*p = v;
133 (*p)++;
134}
135static inline void ceph_encode_copy(void **p, const void *s, int len)
136{
137 memcpy(*p, s, len);
138 *p += len;
139}
140
141/*
142 * filepath, string encoders
143 */
144static inline void ceph_encode_filepath(void **p, void *end,
145 u64 ino, const char *path)
146{
147 u32 len = path ? strlen(path) : 0;
148 BUG_ON(*p + sizeof(ino) + sizeof(len) + len > end);
149 ceph_encode_8(p, 1);
150 ceph_encode_64(p, ino);
151 ceph_encode_32(p, len);
152 if (len)
153 memcpy(*p, path, len);
154 *p += len;
155}
156
157static inline void ceph_encode_string(void **p, void *end,
158 const char *s, u32 len)
159{
160 BUG_ON(*p + sizeof(len) + len > end);
161 ceph_encode_32(p, len);
162 if (len)
163 memcpy(*p, s, len);
164 *p += len;
165}
166
167#define ceph_encode_need(p, end, n, bad) \
168 do { \
169 if (unlikely(*(p) + (n) > (end))) \
170 goto bad; \
171 } while (0)
172
173#define ceph_encode_64_safe(p, end, v, bad) \
174 do { \
175 ceph_encode_need(p, end, sizeof(u64), bad); \
176 ceph_encode_64(p, v); \
177 } while (0)
178#define ceph_encode_32_safe(p, end, v, bad) \
179 do { \
180 ceph_encode_need(p, end, sizeof(u32), bad); \
181 ceph_encode_32(p, v); \
182 } while (0)
183#define ceph_encode_16_safe(p, end, v, bad) \
184 do { \
185 ceph_encode_need(p, end, sizeof(u16), bad); \
186 ceph_encode_16(p, v); \
187 } while (0)
188
189#define ceph_encode_copy_safe(p, end, pv, n, bad) \
190 do { \
191 ceph_encode_need(p, end, n, bad); \
192 ceph_encode_copy(p, pv, n); \
193 } while (0)
194#define ceph_encode_string_safe(p, end, s, n, bad) \
195 do { \
196 ceph_encode_need(p, end, n, bad); \
197 ceph_encode_string(p, end, s, n); \
198 } while (0)
199
200
201#endif
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
new file mode 100644
index 000000000000..f22b2e941686
--- /dev/null
+++ b/include/linux/ceph/libceph.h
@@ -0,0 +1,249 @@
1#ifndef _FS_CEPH_LIBCEPH_H
2#define _FS_CEPH_LIBCEPH_H
3
4#include "ceph_debug.h"
5
6#include <asm/unaligned.h>
7#include <linux/backing-dev.h>
8#include <linux/completion.h>
9#include <linux/exportfs.h>
10#include <linux/fs.h>
11#include <linux/mempool.h>
12#include <linux/pagemap.h>
13#include <linux/wait.h>
14#include <linux/writeback.h>
15#include <linux/slab.h>
16
17#include "types.h"
18#include "messenger.h"
19#include "msgpool.h"
20#include "mon_client.h"
21#include "osd_client.h"
22#include "ceph_fs.h"
23
24/*
25 * Supported features
26 */
27#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR
28#define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR
29
30/*
31 * mount options
32 */
33#define CEPH_OPT_FSID (1<<0)
34#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
35#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
36#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
37
38#define CEPH_OPT_DEFAULT (0);
39
40#define ceph_set_opt(client, opt) \
41 (client)->options->flags |= CEPH_OPT_##opt;
42#define ceph_test_opt(client, opt) \
43 (!!((client)->options->flags & CEPH_OPT_##opt))
44
45struct ceph_options {
46 int flags;
47 struct ceph_fsid fsid;
48 struct ceph_entity_addr my_addr;
49 int mount_timeout;
50 int osd_idle_ttl;
51 int osd_timeout;
52 int osd_keepalive_timeout;
53
54 /*
55 * any type that can't be simply compared or doesn't need need
56 * to be compared should go beyond this point,
57 * ceph_compare_options() should be updated accordingly
58 */
59
60 struct ceph_entity_addr *mon_addr; /* should be the first
61 pointer type of args */
62 int num_mon;
63 char *name;
64 char *secret;
65};
66
67/*
68 * defaults
69 */
70#define CEPH_MOUNT_TIMEOUT_DEFAULT 60
71#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */
72#define CEPH_OSD_KEEPALIVE_DEFAULT 5
73#define CEPH_OSD_IDLE_TTL_DEFAULT 60
74#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */
75
76#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
77#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
78
79#define CEPH_AUTH_NAME_DEFAULT "guest"
80
81/*
82 * Delay telling the MDS we no longer want caps, in case we reopen
83 * the file. Delay a minimum amount of time, even if we send a cap
84 * message for some other reason. Otherwise, take the oppotunity to
85 * update the mds to avoid sending another message later.
86 */
87#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */
88#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
89
90#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4)
91
92/* mount state */
93enum {
94 CEPH_MOUNT_MOUNTING,
95 CEPH_MOUNT_MOUNTED,
96 CEPH_MOUNT_UNMOUNTING,
97 CEPH_MOUNT_UNMOUNTED,
98 CEPH_MOUNT_SHUTDOWN,
99};
100
101/*
102 * subtract jiffies
103 */
104static inline unsigned long time_sub(unsigned long a, unsigned long b)
105{
106 BUG_ON(time_after(b, a));
107 return (long)a - (long)b;
108}
109
110struct ceph_mds_client;
111
112/*
113 * per client state
114 *
115 * possibly shared by multiple mount points, if they are
116 * mounting the same ceph filesystem/cluster.
117 */
118struct ceph_client {
119 struct ceph_fsid fsid;
120 bool have_fsid;
121
122 void *private;
123
124 struct ceph_options *options;
125
126 struct mutex mount_mutex; /* serialize mount attempts */
127 wait_queue_head_t auth_wq;
128 int auth_err;
129
130 int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
131
132 u32 supported_features;
133 u32 required_features;
134
135 struct ceph_messenger *msgr; /* messenger instance */
136 struct ceph_mon_client monc;
137 struct ceph_osd_client osdc;
138
139#ifdef CONFIG_DEBUG_FS
140 struct dentry *debugfs_dir;
141 struct dentry *debugfs_monmap;
142 struct dentry *debugfs_osdmap;
143#endif
144};
145
146
147
148/*
149 * snapshots
150 */
151
152/*
153 * A "snap context" is the set of existing snapshots when we
154 * write data. It is used by the OSD to guide its COW behavior.
155 *
156 * The ceph_snap_context is refcounted, and attached to each dirty
157 * page, indicating which context the dirty data belonged when it was
158 * dirtied.
159 */
160struct ceph_snap_context {
161 atomic_t nref;
162 u64 seq;
163 int num_snaps;
164 u64 snaps[];
165};
166
167static inline struct ceph_snap_context *
168ceph_get_snap_context(struct ceph_snap_context *sc)
169{
170 /*
171 printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
172 atomic_read(&sc->nref)+1);
173 */
174 if (sc)
175 atomic_inc(&sc->nref);
176 return sc;
177}
178
179static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
180{
181 if (!sc)
182 return;
183 /*
184 printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
185 atomic_read(&sc->nref)-1);
186 */
187 if (atomic_dec_and_test(&sc->nref)) {
188 /*printk(" deleting snap_context %p\n", sc);*/
189 kfree(sc);
190 }
191}
192
193/*
194 * calculate the number of pages a given length and offset map onto,
195 * if we align the data.
196 */
197static inline int calc_pages_for(u64 off, u64 len)
198{
199 return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
200 (off >> PAGE_CACHE_SHIFT);
201}
202
203/* ceph_common.c */
204extern const char *ceph_msg_type_name(int type);
205extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
206extern struct kmem_cache *ceph_inode_cachep;
207extern struct kmem_cache *ceph_cap_cachep;
208extern struct kmem_cache *ceph_dentry_cachep;
209extern struct kmem_cache *ceph_file_cachep;
210
211extern int ceph_parse_options(struct ceph_options **popt, char *options,
212 const char *dev_name, const char *dev_name_end,
213 int (*parse_extra_token)(char *c, void *private),
214 void *private);
215extern void ceph_destroy_options(struct ceph_options *opt);
216extern int ceph_compare_options(struct ceph_options *new_opt,
217 struct ceph_client *client);
218extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
219 void *private);
220extern u64 ceph_client_id(struct ceph_client *client);
221extern void ceph_destroy_client(struct ceph_client *client);
222extern int __ceph_open_session(struct ceph_client *client,
223 unsigned long started);
224extern int ceph_open_session(struct ceph_client *client);
225
226/* pagevec.c */
227extern void ceph_release_page_vector(struct page **pages, int num_pages);
228
229extern struct page **ceph_get_direct_page_vector(const char __user *data,
230 int num_pages,
231 loff_t off, size_t len);
232extern void ceph_put_page_vector(struct page **pages, int num_pages);
233extern void ceph_release_page_vector(struct page **pages, int num_pages);
234extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
235extern int ceph_copy_user_to_page_vector(struct page **pages,
236 const char __user *data,
237 loff_t off, size_t len);
238extern int ceph_copy_to_page_vector(struct page **pages,
239 const char *data,
240 loff_t off, size_t len);
241extern int ceph_copy_from_page_vector(struct page **pages,
242 char *data,
243 loff_t off, size_t len);
244extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data,
245 loff_t off, size_t len);
246extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
247
248
249#endif /* _FS_CEPH_SUPER_H */
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
new file mode 100644
index 000000000000..4c5cb0880bba
--- /dev/null
+++ b/include/linux/ceph/mdsmap.h
@@ -0,0 +1,62 @@
1#ifndef _FS_CEPH_MDSMAP_H
2#define _FS_CEPH_MDSMAP_H
3
4#include "types.h"
5
6/*
7 * mds map - describe servers in the mds cluster.
8 *
9 * we limit fields to those the client actually xcares about
10 */
11struct ceph_mds_info {
12 u64 global_id;
13 struct ceph_entity_addr addr;
14 s32 state;
15 int num_export_targets;
16 bool laggy;
17 u32 *export_targets;
18};
19
20struct ceph_mdsmap {
21 u32 m_epoch, m_client_epoch, m_last_failure;
22 u32 m_root;
23 u32 m_session_timeout; /* seconds */
24 u32 m_session_autoclose; /* seconds */
25 u64 m_max_file_size;
26 u32 m_max_mds; /* size of m_addr, m_state arrays */
27 struct ceph_mds_info *m_info;
28
29 /* which object pools file data can be stored in */
30 int m_num_data_pg_pools;
31 u32 *m_data_pg_pools;
32 u32 m_cas_pg_pool;
33};
34
35static inline struct ceph_entity_addr *
36ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
37{
38 if (w >= m->m_max_mds)
39 return NULL;
40 return &m->m_info[w].addr;
41}
42
43static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
44{
45 BUG_ON(w < 0);
46 if (w >= m->m_max_mds)
47 return CEPH_MDS_STATE_DNE;
48 return m->m_info[w].state;
49}
50
51static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
52{
53 if (w >= 0 && w < m->m_max_mds)
54 return m->m_info[w].laggy;
55 return false;
56}
57
58extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
59extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
60extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
61
62#endif
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
new file mode 100644
index 000000000000..5956d62c3057
--- /dev/null
+++ b/include/linux/ceph/messenger.h
@@ -0,0 +1,261 @@
1#ifndef __FS_CEPH_MESSENGER_H
2#define __FS_CEPH_MESSENGER_H
3
4#include <linux/kref.h>
5#include <linux/mutex.h>
6#include <linux/net.h>
7#include <linux/radix-tree.h>
8#include <linux/uio.h>
9#include <linux/version.h>
10#include <linux/workqueue.h>
11
12#include "types.h"
13#include "buffer.h"
14
15struct ceph_msg;
16struct ceph_connection;
17
18extern struct workqueue_struct *ceph_msgr_wq; /* receive work queue */
19
20/*
21 * Ceph defines these callbacks for handling connection events.
22 */
23struct ceph_connection_operations {
24 struct ceph_connection *(*get)(struct ceph_connection *);
25 void (*put)(struct ceph_connection *);
26
27 /* handle an incoming message. */
28 void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
29
30 /* authorize an outgoing connection */
31 int (*get_authorizer) (struct ceph_connection *con,
32 void **buf, int *len, int *proto,
33 void **reply_buf, int *reply_len, int force_new);
34 int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
35 int (*invalidate_authorizer)(struct ceph_connection *con);
36
37 /* protocol version mismatch */
38 void (*bad_proto) (struct ceph_connection *con);
39
40 /* there was some error on the socket (disconnect, whatever) */
41 void (*fault) (struct ceph_connection *con);
42
43 /* a remote host as terminated a message exchange session, and messages
44 * we sent (or they tried to send us) may be lost. */
45 void (*peer_reset) (struct ceph_connection *con);
46
47 struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
48 struct ceph_msg_header *hdr,
49 int *skip);
50};
51
52/* use format string %s%d */
53#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
54
55struct ceph_messenger {
56 struct ceph_entity_inst inst; /* my name+address */
57 struct ceph_entity_addr my_enc_addr;
58 struct page *zero_page; /* used in certain error cases */
59
60 bool nocrc;
61
62 /*
63 * the global_seq counts connections i (attempt to) initiate
64 * in order to disambiguate certain connect race conditions.
65 */
66 u32 global_seq;
67 spinlock_t global_seq_lock;
68
69 u32 supported_features;
70 u32 required_features;
71};
72
73/*
74 * a single message. it contains a header (src, dest, message type, etc.),
75 * footer (crc values, mainly), a "front" message body, and possibly a
76 * data payload (stored in some number of pages).
77 */
78struct ceph_msg {
79 struct ceph_msg_header hdr; /* header */
80 struct ceph_msg_footer footer; /* footer */
81 struct kvec front; /* unaligned blobs of message */
82 struct ceph_buffer *middle;
83 struct page **pages; /* data payload. NOT OWNER. */
84 unsigned nr_pages; /* size of page array */
85 struct ceph_pagelist *pagelist; /* instead of pages */
86 struct list_head list_head;
87 struct kref kref;
88 struct bio *bio; /* instead of pages/pagelist */
89 struct bio *bio_iter; /* bio iterator */
90 int bio_seg; /* current bio segment */
91 struct ceph_pagelist *trail; /* the trailing part of the data */
92 bool front_is_vmalloc;
93 bool more_to_follow;
94 bool needs_out_seq;
95 int front_max;
96
97 struct ceph_msgpool *pool;
98};
99
100struct ceph_msg_pos {
101 int page, page_pos; /* which page; offset in page */
102 int data_pos; /* offset in data payload */
103 int did_page_crc; /* true if we've calculated crc for current page */
104};
105
106/* ceph connection fault delay defaults, for exponential backoff */
107#define BASE_DELAY_INTERVAL (HZ/2)
108#define MAX_DELAY_INTERVAL (5 * 60 * HZ)
109
110/*
111 * ceph_connection state bit flags
112 *
113 * QUEUED and BUSY are used together to ensure that only a single
114 * thread is currently opening, reading or writing data to the socket.
115 */
116#define LOSSYTX 0 /* we can close channel or drop messages on errors */
117#define CONNECTING 1
118#define NEGOTIATING 2
119#define KEEPALIVE_PENDING 3
120#define WRITE_PENDING 4 /* we have data ready to send */
121#define QUEUED 5 /* there is work queued on this connection */
122#define BUSY 6 /* work is being done */
123#define STANDBY 8 /* no outgoing messages, socket closed. we keep
124 * the ceph_connection around to maintain shared
125 * state with the peer. */
126#define CLOSED 10 /* we've closed the connection */
127#define SOCK_CLOSED 11 /* socket state changed to closed */
128#define OPENING 13 /* open connection w/ (possibly new) peer */
129#define DEAD 14 /* dead, about to kfree */
130
131/*
132 * A single connection with another host.
133 *
134 * We maintain a queue of outgoing messages, and some session state to
135 * ensure that we can preserve the lossless, ordered delivery of
136 * messages in the case of a TCP disconnect.
137 */
138struct ceph_connection {
139 void *private;
140 atomic_t nref;
141
142 const struct ceph_connection_operations *ops;
143
144 struct ceph_messenger *msgr;
145 struct socket *sock;
146 unsigned long state; /* connection state (see flags above) */
147 const char *error_msg; /* error message, if any */
148
149 struct ceph_entity_addr peer_addr; /* peer address */
150 struct ceph_entity_name peer_name; /* peer name */
151 struct ceph_entity_addr peer_addr_for_me;
152 unsigned peer_features;
153 u32 connect_seq; /* identify the most recent connection
154 attempt for this connection, client */
155 u32 peer_global_seq; /* peer's global seq for this connection */
156
157 int auth_retry; /* true if we need a newer authorizer */
158 void *auth_reply_buf; /* where to put the authorizer reply */
159 int auth_reply_buf_len;
160
161 struct mutex mutex;
162
163 /* out queue */
164 struct list_head out_queue;
165 struct list_head out_sent; /* sending or sent but unacked */
166 u64 out_seq; /* last message queued for send */
167 bool out_keepalive_pending;
168
169 u64 in_seq, in_seq_acked; /* last message received, acked */
170
171 /* connection negotiation temps */
172 char in_banner[CEPH_BANNER_MAX_LEN];
173 union {
174 struct { /* outgoing connection */
175 struct ceph_msg_connect out_connect;
176 struct ceph_msg_connect_reply in_reply;
177 };
178 struct { /* incoming */
179 struct ceph_msg_connect in_connect;
180 struct ceph_msg_connect_reply out_reply;
181 };
182 };
183 struct ceph_entity_addr actual_peer_addr;
184
185 /* message out temps */
186 struct ceph_msg *out_msg; /* sending message (== tail of
187 out_sent) */
188 bool out_msg_done;
189 struct ceph_msg_pos out_msg_pos;
190
191 struct kvec out_kvec[8], /* sending header/footer data */
192 *out_kvec_cur;
193 int out_kvec_left; /* kvec's left in out_kvec */
194 int out_skip; /* skip this many bytes */
195 int out_kvec_bytes; /* total bytes left */
196 bool out_kvec_is_msg; /* kvec refers to out_msg */
197 int out_more; /* there is more data after the kvecs */
198 __le64 out_temp_ack; /* for writing an ack */
199
200 /* message in temps */
201 struct ceph_msg_header in_hdr;
202 struct ceph_msg *in_msg;
203 struct ceph_msg_pos in_msg_pos;
204 u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */
205
206 char in_tag; /* protocol control byte */
207 int in_base_pos; /* bytes read */
208 __le64 in_temp_ack; /* for reading an ack */
209
210 struct delayed_work work; /* send|recv work */
211 unsigned long delay; /* current delay interval */
212};
213
214
215extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
216extern int ceph_parse_ips(const char *c, const char *end,
217 struct ceph_entity_addr *addr,
218 int max_count, int *count);
219
220
221extern int ceph_msgr_init(void);
222extern void ceph_msgr_exit(void);
223extern void ceph_msgr_flush(void);
224
225extern struct ceph_messenger *ceph_messenger_create(
226 struct ceph_entity_addr *myaddr,
227 u32 features, u32 required);
228extern void ceph_messenger_destroy(struct ceph_messenger *);
229
230extern void ceph_con_init(struct ceph_messenger *msgr,
231 struct ceph_connection *con);
232extern void ceph_con_open(struct ceph_connection *con,
233 struct ceph_entity_addr *addr);
234extern bool ceph_con_opened(struct ceph_connection *con);
235extern void ceph_con_close(struct ceph_connection *con);
236extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
237extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
238extern void ceph_con_revoke_message(struct ceph_connection *con,
239 struct ceph_msg *msg);
240extern void ceph_con_keepalive(struct ceph_connection *con);
241extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
242extern void ceph_con_put(struct ceph_connection *con);
243
244extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags);
245extern void ceph_msg_kfree(struct ceph_msg *m);
246
247
248static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
249{
250 kref_get(&msg->kref);
251 return msg;
252}
253extern void ceph_msg_last_put(struct kref *kref);
254static inline void ceph_msg_put(struct ceph_msg *msg)
255{
256 kref_put(&msg->kref, ceph_msg_last_put);
257}
258
259extern void ceph_msg_dump(struct ceph_msg *msg);
260
261#endif
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
new file mode 100644
index 000000000000..545f85917780
--- /dev/null
+++ b/include/linux/ceph/mon_client.h
@@ -0,0 +1,122 @@
1#ifndef _FS_CEPH_MON_CLIENT_H
2#define _FS_CEPH_MON_CLIENT_H
3
4#include <linux/completion.h>
5#include <linux/kref.h>
6#include <linux/rbtree.h>
7
8#include "messenger.h"
9
10struct ceph_client;
11struct ceph_mount_args;
12struct ceph_auth_client;
13
14/*
15 * The monitor map enumerates the set of all monitors.
16 */
17struct ceph_monmap {
18 struct ceph_fsid fsid;
19 u32 epoch;
20 u32 num_mon;
21 struct ceph_entity_inst mon_inst[0];
22};
23
24struct ceph_mon_client;
25struct ceph_mon_generic_request;
26
27
28/*
29 * Generic mechanism for resending monitor requests.
30 */
31typedef void (*ceph_monc_request_func_t)(struct ceph_mon_client *monc,
32 int newmon);
33
34/* a pending monitor request */
35struct ceph_mon_request {
36 struct ceph_mon_client *monc;
37 struct delayed_work delayed_work;
38 unsigned long delay;
39 ceph_monc_request_func_t do_request;
40};
41
42/*
43 * ceph_mon_generic_request is being used for the statfs and poolop requests
44 * which are bening done a bit differently because we need to get data back
45 * to the caller
46 */
47struct ceph_mon_generic_request {
48 struct kref kref;
49 u64 tid;
50 struct rb_node node;
51 int result;
52 void *buf;
53 int buf_len;
54 struct completion completion;
55 struct ceph_msg *request; /* original request */
56 struct ceph_msg *reply; /* and reply */
57};
58
59struct ceph_mon_client {
60 struct ceph_client *client;
61 struct ceph_monmap *monmap;
62
63 struct mutex mutex;
64 struct delayed_work delayed_work;
65
66 struct ceph_auth_client *auth;
67 struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack;
68 int pending_auth;
69
70 bool hunting;
71 int cur_mon; /* last monitor i contacted */
72 unsigned long sub_sent, sub_renew_after;
73 struct ceph_connection *con;
74 bool have_fsid;
75
76 /* pending generic requests */
77 struct rb_root generic_request_tree;
78 int num_generic_requests;
79 u64 last_tid;
80
81 /* mds/osd map */
82 int want_mdsmap;
83 int want_next_osdmap; /* 1 = want, 2 = want+asked */
84 u32 have_osdmap, have_mdsmap;
85
86#ifdef CONFIG_DEBUG_FS
87 struct dentry *debugfs_file;
88#endif
89};
90
91extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end);
92extern int ceph_monmap_contains(struct ceph_monmap *m,
93 struct ceph_entity_addr *addr);
94
95extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
96extern void ceph_monc_stop(struct ceph_mon_client *monc);
97
98/*
99 * The model here is to indicate that we need a new map of at least
100 * epoch @want, and also call in when we receive a map. We will
101 * periodically rerequest the map from the monitor cluster until we
102 * get what we want.
103 */
104extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
105extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
106
107extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
108
109extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
110 struct ceph_statfs *buf);
111
112extern int ceph_monc_open_session(struct ceph_mon_client *monc);
113
114extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);
115
116extern int ceph_monc_create_snapid(struct ceph_mon_client *monc,
117 u32 pool, u64 *snapid);
118
119extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
120 u32 pool, u64 snapid);
121
122#endif
diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
new file mode 100644
index 000000000000..a362605f9368
--- /dev/null
+++ b/include/linux/ceph/msgpool.h
@@ -0,0 +1,25 @@
1#ifndef _FS_CEPH_MSGPOOL
2#define _FS_CEPH_MSGPOOL
3
4#include <linux/mempool.h>
5#include "messenger.h"
6
7/*
8 * we use memory pools for preallocating messages we may receive, to
9 * avoid unexpected OOM conditions.
10 */
11struct ceph_msgpool {
12 const char *name;
13 mempool_t *pool;
14 int front_len; /* preallocated payload size */
15};
16
17extern int ceph_msgpool_init(struct ceph_msgpool *pool,
18 int front_len, int size, bool blocking,
19 const char *name);
20extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
21extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *,
22 int front_len);
23extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
24
25#endif
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
new file mode 100644
index 000000000000..680d3d648cac
--- /dev/null
+++ b/include/linux/ceph/msgr.h
@@ -0,0 +1,175 @@
1#ifndef CEPH_MSGR_H
2#define CEPH_MSGR_H
3
4/*
5 * Data types for message passing layer used by Ceph.
6 */
7
8#define CEPH_MON_PORT 6789 /* default monitor port */
9
10/*
11 * client-side processes will try to bind to ports in this
12 * range, simply for the benefit of tools like nmap or wireshark
13 * that would like to identify the protocol.
14 */
15#define CEPH_PORT_FIRST 6789
16#define CEPH_PORT_START 6800 /* non-monitors start here */
17#define CEPH_PORT_LAST 6900
18
19/*
20 * tcp connection banner. include a protocol version. and adjust
21 * whenever the wire protocol changes. try to keep this string length
22 * constant.
23 */
24#define CEPH_BANNER "ceph v027"
25#define CEPH_BANNER_MAX_LEN 30
26
27
28/*
29 * Rollover-safe type and comparator for 32-bit sequence numbers.
30 * Comparator returns -1, 0, or 1.
31 */
32typedef __u32 ceph_seq_t;
33
34static inline __s32 ceph_seq_cmp(__u32 a, __u32 b)
35{
36 return (__s32)a - (__s32)b;
37}
38
39
40/*
41 * entity_name -- logical name for a process participating in the
42 * network, e.g. 'mds0' or 'osd3'.
43 */
44struct ceph_entity_name {
45 __u8 type; /* CEPH_ENTITY_TYPE_* */
46 __le64 num;
47} __attribute__ ((packed));
48
49#define CEPH_ENTITY_TYPE_MON 0x01
50#define CEPH_ENTITY_TYPE_MDS 0x02
51#define CEPH_ENTITY_TYPE_OSD 0x04
52#define CEPH_ENTITY_TYPE_CLIENT 0x08
53#define CEPH_ENTITY_TYPE_AUTH 0x20
54
55#define CEPH_ENTITY_TYPE_ANY 0xFF
56
57extern const char *ceph_entity_type_name(int type);
58
59/*
60 * entity_addr -- network address
61 */
62struct ceph_entity_addr {
63 __le32 type;
64 __le32 nonce; /* unique id for process (e.g. pid) */
65 struct sockaddr_storage in_addr;
66} __attribute__ ((packed));
67
68struct ceph_entity_inst {
69 struct ceph_entity_name name;
70 struct ceph_entity_addr addr;
71} __attribute__ ((packed));
72
73
74/* used by message exchange protocol */
75#define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */
76#define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */
77#define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing
78 incoming connection */
79#define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again
80 with higher cseq */
81#define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again
82 with higher gseq */
83#define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */
84#define CEPH_MSGR_TAG_MSG 7 /* message */
85#define CEPH_MSGR_TAG_ACK 8 /* message ack */
86#define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */
87#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */
88#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
89#define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */
90
91
92/*
93 * connection negotiation
94 */
95struct ceph_msg_connect {
96 __le64 features; /* supported feature bits */
97 __le32 host_type; /* CEPH_ENTITY_TYPE_* */
98 __le32 global_seq; /* count connections initiated by this host */
99 __le32 connect_seq; /* count connections initiated in this session */
100 __le32 protocol_version;
101 __le32 authorizer_protocol;
102 __le32 authorizer_len;
103 __u8 flags; /* CEPH_MSG_CONNECT_* */
104} __attribute__ ((packed));
105
106struct ceph_msg_connect_reply {
107 __u8 tag;
108 __le64 features; /* feature bits for this session */
109 __le32 global_seq;
110 __le32 connect_seq;
111 __le32 protocol_version;
112 __le32 authorizer_len;
113 __u8 flags;
114} __attribute__ ((packed));
115
116#define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */
117
118
119/*
120 * message header
121 */
122struct ceph_msg_header_old {
123 __le64 seq; /* message seq# for this session */
124 __le64 tid; /* transaction id */
125 __le16 type; /* message type */
126 __le16 priority; /* priority. higher value == higher priority */
127 __le16 version; /* version of message encoding */
128
129 __le32 front_len; /* bytes in main payload */
130 __le32 middle_len;/* bytes in middle payload */
131 __le32 data_len; /* bytes of data payload */
132 __le16 data_off; /* sender: include full offset;
133 receiver: mask against ~PAGE_MASK */
134
135 struct ceph_entity_inst src, orig_src;
136 __le32 reserved;
137 __le32 crc; /* header crc32c */
138} __attribute__ ((packed));
139
140struct ceph_msg_header {
141 __le64 seq; /* message seq# for this session */
142 __le64 tid; /* transaction id */
143 __le16 type; /* message type */
144 __le16 priority; /* priority. higher value == higher priority */
145 __le16 version; /* version of message encoding */
146
147 __le32 front_len; /* bytes in main payload */
148 __le32 middle_len;/* bytes in middle payload */
149 __le32 data_len; /* bytes of data payload */
150 __le16 data_off; /* sender: include full offset;
151 receiver: mask against ~PAGE_MASK */
152
153 struct ceph_entity_name src;
154 __le32 reserved;
155 __le32 crc; /* header crc32c */
156} __attribute__ ((packed));
157
158#define CEPH_MSG_PRIO_LOW 64
159#define CEPH_MSG_PRIO_DEFAULT 127
160#define CEPH_MSG_PRIO_HIGH 196
161#define CEPH_MSG_PRIO_HIGHEST 255
162
163/*
164 * follows data payload
165 */
166struct ceph_msg_footer {
167 __le32 front_crc, middle_crc, data_crc;
168 __u8 flags;
169} __attribute__ ((packed));
170
171#define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */
172#define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */
173
174
175#endif
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
new file mode 100644
index 000000000000..6c91fb032c39
--- /dev/null
+++ b/include/linux/ceph/osd_client.h
@@ -0,0 +1,234 @@
1#ifndef _FS_CEPH_OSD_CLIENT_H
2#define _FS_CEPH_OSD_CLIENT_H
3
4#include <linux/completion.h>
5#include <linux/kref.h>
6#include <linux/mempool.h>
7#include <linux/rbtree.h>
8
9#include "types.h"
10#include "osdmap.h"
11#include "messenger.h"
12
13struct ceph_msg;
14struct ceph_snap_context;
15struct ceph_osd_request;
16struct ceph_osd_client;
17struct ceph_authorizer;
18struct ceph_pagelist;
19
20/*
21 * completion callback for async writepages
22 */
23typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
24 struct ceph_msg *);
25
26/* a given osd we're communicating with */
27struct ceph_osd {
28 atomic_t o_ref;
29 struct ceph_osd_client *o_osdc;
30 int o_osd;
31 int o_incarnation;
32 struct rb_node o_node;
33 struct ceph_connection o_con;
34 struct list_head o_requests;
35 struct list_head o_osd_lru;
36 struct ceph_authorizer *o_authorizer;
37 void *o_authorizer_buf, *o_authorizer_reply_buf;
38 size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
39 unsigned long lru_ttl;
40 int o_marked_for_keepalive;
41 struct list_head o_keepalive_item;
42};
43
44/* an in-flight request */
45struct ceph_osd_request {
46 u64 r_tid; /* unique for this client */
47 struct rb_node r_node;
48 struct list_head r_req_lru_item;
49 struct list_head r_osd_item;
50 struct ceph_osd *r_osd;
51 struct ceph_pg r_pgid;
52 int r_pg_osds[CEPH_PG_MAX_SIZE];
53 int r_num_pg_osds;
54
55 struct ceph_connection *r_con_filling_msg;
56
57 struct ceph_msg *r_request, *r_reply;
58 int r_result;
59 int r_flags; /* any additional flags for the osd */
60 u32 r_sent; /* >0 if r_request is sending/sent */
61 int r_got_reply;
62
63 struct ceph_osd_client *r_osdc;
64 struct kref r_kref;
65 bool r_mempool;
66 struct completion r_completion, r_safe_completion;
67 ceph_osdc_callback_t r_callback, r_safe_callback;
68 struct ceph_eversion r_reassert_version;
69 struct list_head r_unsafe_item;
70
71 struct inode *r_inode; /* for use by callbacks */
72 void *r_priv; /* ditto */
73
74 char r_oid[40]; /* object name */
75 int r_oid_len;
76 unsigned long r_stamp; /* send OR check time */
77 bool r_resend; /* msg send failed, needs retry */
78
79 struct ceph_file_layout r_file_layout;
80 struct ceph_snap_context *r_snapc; /* snap context for writes */
81 unsigned r_num_pages; /* size of page array (follows) */
82 struct page **r_pages; /* pages for data payload */
83 int r_pages_from_pool;
84 int r_own_pages; /* if true, i own page list */
85#ifdef CONFIG_BLOCK
86 struct bio *r_bio; /* instead of pages */
87#endif
88
89 struct ceph_pagelist *r_trail; /* trailing part of the data */
90};
91
92struct ceph_osd_client {
93 struct ceph_client *client;
94
95 struct ceph_osdmap *osdmap; /* current map */
96 struct rw_semaphore map_sem;
97 struct completion map_waiters;
98 u64 last_requested_map;
99
100 struct mutex request_mutex;
101 struct rb_root osds; /* osds */
102 struct list_head osd_lru; /* idle osds */
103 u64 timeout_tid; /* tid of timeout triggering rq */
104 u64 last_tid; /* tid of last request */
105 struct rb_root requests; /* pending requests */
106 struct list_head req_lru; /* pending requests lru */
107 int num_requests;
108 struct delayed_work timeout_work;
109 struct delayed_work osds_timeout_work;
110#ifdef CONFIG_DEBUG_FS
111 struct dentry *debugfs_file;
112#endif
113
114 mempool_t *req_mempool;
115
116 struct ceph_msgpool msgpool_op;
117 struct ceph_msgpool msgpool_op_reply;
118};
119
120struct ceph_osd_req_op {
121 u16 op; /* CEPH_OSD_OP_* */
122 u32 flags; /* CEPH_OSD_FLAG_* */
123 union {
124 struct {
125 u64 offset, length;
126 u64 truncate_size;
127 u32 truncate_seq;
128 } extent;
129 struct {
130 const char *name;
131 u32 name_len;
132 const char *val;
133 u32 value_len;
134 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
135 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
136 } xattr;
137 struct {
138 const char *class_name;
139 __u8 class_len;
140 const char *method_name;
141 __u8 method_len;
142 __u8 argc;
143 const char *indata;
144 u32 indata_len;
145 } cls;
146 struct {
147 u64 cookie, count;
148 } pgls;
149 struct {
150 u64 snapid;
151 } snap;
152 };
153 u32 payload_len;
154};
155
156extern int ceph_osdc_init(struct ceph_osd_client *osdc,
157 struct ceph_client *client);
158extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
159
160extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
161 struct ceph_msg *msg);
162extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
163 struct ceph_msg *msg);
164
165extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
166 struct ceph_file_layout *layout,
167 u64 snapid,
168 u64 off, u64 *plen, u64 *bno,
169 struct ceph_osd_request *req,
170 struct ceph_osd_req_op *op);
171
172extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
173 int flags,
174 struct ceph_snap_context *snapc,
175 struct ceph_osd_req_op *ops,
176 bool use_mempool,
177 gfp_t gfp_flags,
178 struct page **pages,
179 struct bio *bio);
180
181extern void ceph_osdc_build_request(struct ceph_osd_request *req,
182 u64 off, u64 *plen,
183 struct ceph_osd_req_op *src_ops,
184 struct ceph_snap_context *snapc,
185 struct timespec *mtime,
186 const char *oid,
187 int oid_len);
188
189extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
190 struct ceph_file_layout *layout,
191 struct ceph_vino vino,
192 u64 offset, u64 *len, int op, int flags,
193 struct ceph_snap_context *snapc,
194 int do_sync, u32 truncate_seq,
195 u64 truncate_size,
196 struct timespec *mtime,
197 bool use_mempool, int num_reply);
198
199static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
200{
201 kref_get(&req->r_kref);
202}
203extern void ceph_osdc_release_request(struct kref *kref);
204static inline void ceph_osdc_put_request(struct ceph_osd_request *req)
205{
206 kref_put(&req->r_kref, ceph_osdc_release_request);
207}
208
209extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
210 struct ceph_osd_request *req,
211 bool nofail);
212extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
213 struct ceph_osd_request *req);
214extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
215
216extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
217 struct ceph_vino vino,
218 struct ceph_file_layout *layout,
219 u64 off, u64 *plen,
220 u32 truncate_seq, u64 truncate_size,
221 struct page **pages, int nr_pages);
222
223extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
224 struct ceph_vino vino,
225 struct ceph_file_layout *layout,
226 struct ceph_snap_context *sc,
227 u64 off, u64 len,
228 u32 truncate_seq, u64 truncate_size,
229 struct timespec *mtime,
230 struct page **pages, int nr_pages,
231 int flags, int do_sync, bool nofail);
232
233#endif
234
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
new file mode 100644
index 000000000000..ba4c205cbb01
--- /dev/null
+++ b/include/linux/ceph/osdmap.h
@@ -0,0 +1,130 @@
1#ifndef _FS_CEPH_OSDMAP_H
2#define _FS_CEPH_OSDMAP_H
3
4#include <linux/rbtree.h>
5#include "types.h"
6#include "ceph_fs.h"
7#include <linux/crush/crush.h>
8
9/*
10 * The osd map describes the current membership of the osd cluster and
11 * specifies the mapping of objects to placement groups and placement
12 * groups to (sets of) osds. That is, it completely specifies the
13 * (desired) distribution of all data objects in the system at some
14 * point in time.
15 *
16 * Each map version is identified by an epoch, which increases monotonically.
17 *
18 * The map can be updated either via an incremental map (diff) describing
19 * the change between two successive epochs, or as a fully encoded map.
20 */
21struct ceph_pg_pool_info {
22 struct rb_node node;
23 int id;
24 struct ceph_pg_pool v;
25 int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
26 char *name;
27};
28
29struct ceph_pg_mapping {
30 struct rb_node node;
31 struct ceph_pg pgid;
32 int len;
33 int osds[];
34};
35
36struct ceph_osdmap {
37 struct ceph_fsid fsid;
38 u32 epoch;
39 u32 mkfs_epoch;
40 struct ceph_timespec created, modified;
41
42 u32 flags; /* CEPH_OSDMAP_* */
43
44 u32 max_osd; /* size of osd_state, _offload, _addr arrays */
45 u8 *osd_state; /* CEPH_OSD_* */
46 u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */
47 struct ceph_entity_addr *osd_addr;
48
49 struct rb_root pg_temp;
50 struct rb_root pg_pools;
51 u32 pool_max;
52
53 /* the CRUSH map specifies the mapping of placement groups to
54 * the list of osds that store+replicate them. */
55 struct crush_map *crush;
56};
57
58/*
59 * file layout helpers
60 */
61#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
62#define ceph_file_layout_stripe_count(l) \
63 ((__s32)le32_to_cpu((l).fl_stripe_count))
64#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
65#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
66#define ceph_file_layout_object_su(l) \
67 ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
68#define ceph_file_layout_pg_preferred(l) \
69 ((__s32)le32_to_cpu((l).fl_pg_preferred))
70#define ceph_file_layout_pg_pool(l) \
71 ((__s32)le32_to_cpu((l).fl_pg_pool))
72
73static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
74{
75 return le32_to_cpu(l->fl_stripe_unit) *
76 le32_to_cpu(l->fl_stripe_count);
77}
78
79/* "period" == bytes before i start on a new set of objects */
80static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
81{
82 return le32_to_cpu(l->fl_object_size) *
83 le32_to_cpu(l->fl_stripe_count);
84}
85
86
87static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
88{
89 return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
90}
91
92static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
93{
94 return map && (map->flags & flag);
95}
96
97extern char *ceph_osdmap_state_str(char *str, int len, int state);
98
99static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
100 int osd)
101{
102 if (osd >= map->max_osd)
103 return NULL;
104 return &map->osd_addr[osd];
105}
106
107extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
108extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
109 struct ceph_osdmap *map,
110 struct ceph_messenger *msgr);
111extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
112
113/* calculate mapping of a file extent to an object */
114extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
115 u64 off, u64 *plen,
116 u64 *bno, u64 *oxoff, u64 *oxlen);
117
118/* calculate mapping of object to a placement group */
119extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
120 const char *oid,
121 struct ceph_file_layout *fl,
122 struct ceph_osdmap *osdmap);
123extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
124 int *acting);
125extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
126 struct ceph_pg pgid);
127
128extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
129
130#endif
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
new file mode 100644
index 000000000000..9660d6b0a35d
--- /dev/null
+++ b/include/linux/ceph/pagelist.h
@@ -0,0 +1,75 @@
1#ifndef __FS_CEPH_PAGELIST_H
2#define __FS_CEPH_PAGELIST_H
3
4#include <linux/list.h>
5
6struct ceph_pagelist {
7 struct list_head head;
8 void *mapped_tail;
9 size_t length;
10 size_t room;
11 struct list_head free_list;
12 size_t num_pages_free;
13};
14
15struct ceph_pagelist_cursor {
16 struct ceph_pagelist *pl; /* pagelist, for error checking */
17 struct list_head *page_lru; /* page in list */
18 size_t room; /* room remaining to reset to */
19};
20
21static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
22{
23 INIT_LIST_HEAD(&pl->head);
24 pl->mapped_tail = NULL;
25 pl->length = 0;
26 pl->room = 0;
27 INIT_LIST_HEAD(&pl->free_list);
28 pl->num_pages_free = 0;
29}
30
31extern int ceph_pagelist_release(struct ceph_pagelist *pl);
32
33extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
34
35extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space);
36
37extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl);
38
39extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
40 struct ceph_pagelist_cursor *c);
41
42extern int ceph_pagelist_truncate(struct ceph_pagelist *pl,
43 struct ceph_pagelist_cursor *c);
44
45static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v)
46{
47 __le64 ev = cpu_to_le64(v);
48 return ceph_pagelist_append(pl, &ev, sizeof(ev));
49}
50static inline int ceph_pagelist_encode_32(struct ceph_pagelist *pl, u32 v)
51{
52 __le32 ev = cpu_to_le32(v);
53 return ceph_pagelist_append(pl, &ev, sizeof(ev));
54}
55static inline int ceph_pagelist_encode_16(struct ceph_pagelist *pl, u16 v)
56{
57 __le16 ev = cpu_to_le16(v);
58 return ceph_pagelist_append(pl, &ev, sizeof(ev));
59}
60static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
61{
62 return ceph_pagelist_append(pl, &v, 1);
63}
64static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
65 char *s, size_t len)
66{
67 int ret = ceph_pagelist_encode_32(pl, len);
68 if (ret)
69 return ret;
70 if (len)
71 return ceph_pagelist_append(pl, s, len);
72 return 0;
73}
74
75#endif
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
new file mode 100644
index 000000000000..6d5247f2e81b
--- /dev/null
+++ b/include/linux/ceph/rados.h
@@ -0,0 +1,405 @@
1#ifndef CEPH_RADOS_H
2#define CEPH_RADOS_H
3
4/*
5 * Data types for the Ceph distributed object storage layer RADOS
6 * (Reliable Autonomic Distributed Object Store).
7 */
8
9#include "msgr.h"
10
11/*
12 * osdmap encoding versions
13 */
14#define CEPH_OSDMAP_INC_VERSION 5
15#define CEPH_OSDMAP_INC_VERSION_EXT 5
16#define CEPH_OSDMAP_VERSION 5
17#define CEPH_OSDMAP_VERSION_EXT 5
18
19/*
20 * fs id
21 */
22struct ceph_fsid {
23 unsigned char fsid[16];
24};
25
26static inline int ceph_fsid_compare(const struct ceph_fsid *a,
27 const struct ceph_fsid *b)
28{
29 return memcmp(a, b, sizeof(*a));
30}
31
32/*
33 * ino, object, etc.
34 */
35typedef __le64 ceph_snapid_t;
36#define CEPH_SNAPDIR ((__u64)(-1)) /* reserved for hidden .snap dir */
37#define CEPH_NOSNAP ((__u64)(-2)) /* "head", "live" revision */
38#define CEPH_MAXSNAP ((__u64)(-3)) /* largest valid snapid */
39
40struct ceph_timespec {
41 __le32 tv_sec;
42 __le32 tv_nsec;
43} __attribute__ ((packed));
44
45
46/*
47 * object layout - how objects are mapped into PGs
48 */
49#define CEPH_OBJECT_LAYOUT_HASH 1
50#define CEPH_OBJECT_LAYOUT_LINEAR 2
51#define CEPH_OBJECT_LAYOUT_HASHINO 3
52
53/*
54 * pg layout -- how PGs are mapped onto (sets of) OSDs
55 */
56#define CEPH_PG_LAYOUT_CRUSH 0
57#define CEPH_PG_LAYOUT_HASH 1
58#define CEPH_PG_LAYOUT_LINEAR 2
59#define CEPH_PG_LAYOUT_HYBRID 3
60
61#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
62
63/*
64 * placement group.
65 * we encode this into one __le64.
66 */
67struct ceph_pg {
68 __le16 preferred; /* preferred primary osd */
69 __le16 ps; /* placement seed */
70 __le32 pool; /* object pool */
71} __attribute__ ((packed));
72
73/*
74 * pg_pool is a set of pgs storing a pool of objects
75 *
76 * pg_num -- base number of pseudorandomly placed pgs
77 *
78 * pgp_num -- effective number when calculating pg placement. this
79 * is used for pg_num increases. new pgs result in data being "split"
80 * into new pgs. for this to proceed smoothly, new pgs are intiially
81 * colocated with their parents; that is, pgp_num doesn't increase
82 * until the new pgs have successfully split. only _then_ are the new
83 * pgs placed independently.
84 *
85 * lpg_num -- localized pg count (per device). replicas are randomly
86 * selected.
87 *
88 * lpgp_num -- as above.
89 */
90#define CEPH_PG_TYPE_REP 1
91#define CEPH_PG_TYPE_RAID4 2
92#define CEPH_PG_POOL_VERSION 2
93struct ceph_pg_pool {
94 __u8 type; /* CEPH_PG_TYPE_* */
95 __u8 size; /* number of osds in each pg */
96 __u8 crush_ruleset; /* crush placement rule */
97 __u8 object_hash; /* hash mapping object name to ps */
98 __le32 pg_num, pgp_num; /* number of pg's */
99 __le32 lpg_num, lpgp_num; /* number of localized pg's */
100 __le32 last_change; /* most recent epoch changed */
101 __le64 snap_seq; /* seq for per-pool snapshot */
102 __le32 snap_epoch; /* epoch of last snap */
103 __le32 num_snaps;
104 __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */
105 __le64 auid; /* who owns the pg */
106} __attribute__ ((packed));
107
108/*
109 * stable_mod func is used to control number of placement groups.
110 * similar to straight-up modulo, but produces a stable mapping as b
111 * increases over time. b is the number of bins, and bmask is the
112 * containing power of 2 minus 1.
113 *
114 * b <= bmask and bmask=(2**n)-1
115 * e.g., b=12 -> bmask=15, b=123 -> bmask=127
116 */
117static inline int ceph_stable_mod(int x, int b, int bmask)
118{
119 if ((x & bmask) < b)
120 return x & bmask;
121 else
122 return x & (bmask >> 1);
123}
124
125/*
126 * object layout - how a given object should be stored.
127 */
128struct ceph_object_layout {
129 struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */
130 __le32 ol_stripe_unit; /* for per-object parity, if any */
131} __attribute__ ((packed));
132
133/*
134 * compound epoch+version, used by storage layer to serialize mutations
135 */
136struct ceph_eversion {
137 __le32 epoch;
138 __le64 version;
139} __attribute__ ((packed));
140
141/*
142 * osd map bits
143 */
144
145/* status bits */
146#define CEPH_OSD_EXISTS 1
147#define CEPH_OSD_UP 2
148
149/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
150#define CEPH_OSD_IN 0x10000
151#define CEPH_OSD_OUT 0
152
153
154/*
155 * osd map flag bits
156 */
157#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */
158#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */
159#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
160#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
161#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
162
163/*
164 * osd ops
165 */
166#define CEPH_OSD_OP_MODE 0xf000
167#define CEPH_OSD_OP_MODE_RD 0x1000
168#define CEPH_OSD_OP_MODE_WR 0x2000
169#define CEPH_OSD_OP_MODE_RMW 0x3000
170#define CEPH_OSD_OP_MODE_SUB 0x4000
171
172#define CEPH_OSD_OP_TYPE 0x0f00
173#define CEPH_OSD_OP_TYPE_LOCK 0x0100
174#define CEPH_OSD_OP_TYPE_DATA 0x0200
175#define CEPH_OSD_OP_TYPE_ATTR 0x0300
176#define CEPH_OSD_OP_TYPE_EXEC 0x0400
177#define CEPH_OSD_OP_TYPE_PG 0x0500
178
179enum {
180 /** data **/
181 /* read */
182 CEPH_OSD_OP_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1,
183 CEPH_OSD_OP_STAT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2,
184
185 /* fancy read */
186 CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4,
187
188 /* write */
189 CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
190 CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2,
191 CEPH_OSD_OP_TRUNCATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3,
192 CEPH_OSD_OP_ZERO = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4,
193 CEPH_OSD_OP_DELETE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5,
194
195 /* fancy write */
196 CEPH_OSD_OP_APPEND = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6,
197 CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7,
198 CEPH_OSD_OP_SETTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8,
199 CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9,
200
201 CEPH_OSD_OP_TMAPUP = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10,
202 CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11,
203 CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12,
204
205 CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13,
206 CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14,
207
208 /** attrs **/
209 /* read */
210 CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
211 CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2,
212 CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3,
213
214 /* write */
215 CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1,
216 CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2,
217 CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3,
218 CEPH_OSD_OP_RMXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4,
219
220 /** subop **/
221 CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1,
222 CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2,
223 CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3,
224 CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4,
225 CEPH_OSD_OP_SCRUB = CEPH_OSD_OP_MODE_SUB | 5,
226
227 /** lock **/
228 CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
229 CEPH_OSD_OP_WRUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2,
230 CEPH_OSD_OP_RDLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3,
231 CEPH_OSD_OP_RDUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4,
232 CEPH_OSD_OP_UPLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5,
233 CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
234
235 /** exec **/
236 CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
237
238 /** pg **/
239 CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
240};
241
242static inline int ceph_osd_op_type_lock(int op)
243{
244 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK;
245}
246static inline int ceph_osd_op_type_data(int op)
247{
248 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA;
249}
250static inline int ceph_osd_op_type_attr(int op)
251{
252 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR;
253}
254static inline int ceph_osd_op_type_exec(int op)
255{
256 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC;
257}
258static inline int ceph_osd_op_type_pg(int op)
259{
260 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
261}
262
263static inline int ceph_osd_op_mode_subop(int op)
264{
265 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB;
266}
267static inline int ceph_osd_op_mode_read(int op)
268{
269 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD;
270}
271static inline int ceph_osd_op_mode_modify(int op)
272{
273 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR;
274}
275
276/*
277 * note that the following tmap stuff is also defined in the ceph librados.h
278 * any modification here needs to be updated there
279 */
280#define CEPH_OSD_TMAP_HDR 'h'
281#define CEPH_OSD_TMAP_SET 's'
282#define CEPH_OSD_TMAP_RM 'r'
283
284extern const char *ceph_osd_op_name(int op);
285
286
287/*
288 * osd op flags
289 *
290 * An op may be READ, WRITE, or READ|WRITE.
291 */
292enum {
293 CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */
294 CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */
295 CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */
296 CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */
297 CEPH_OSD_FLAG_READ = 16, /* op may read */
298 CEPH_OSD_FLAG_WRITE = 32, /* op may write */
299 CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */
300 CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */
301 CEPH_OSD_FLAG_BALANCE_READS = 256,
302 CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */
303 CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */
304 CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */
305 CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */
306};
307
308enum {
309 CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */
310};
311
312#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/
313#define EBLACKLISTED ESHUTDOWN /* blacklisted */
314
315/* xattr comparison */
316enum {
317 CEPH_OSD_CMPXATTR_OP_NOP = 0,
318 CEPH_OSD_CMPXATTR_OP_EQ = 1,
319 CEPH_OSD_CMPXATTR_OP_NE = 2,
320 CEPH_OSD_CMPXATTR_OP_GT = 3,
321 CEPH_OSD_CMPXATTR_OP_GTE = 4,
322 CEPH_OSD_CMPXATTR_OP_LT = 5,
323 CEPH_OSD_CMPXATTR_OP_LTE = 6
324};
325
326enum {
327 CEPH_OSD_CMPXATTR_MODE_STRING = 1,
328 CEPH_OSD_CMPXATTR_MODE_U64 = 2
329};
330
331/*
332 * an individual object operation. each may be accompanied by some data
333 * payload
334 */
335struct ceph_osd_op {
336 __le16 op; /* CEPH_OSD_OP_* */
337 __le32 flags; /* CEPH_OSD_FLAG_* */
338 union {
339 struct {
340 __le64 offset, length;
341 __le64 truncate_size;
342 __le32 truncate_seq;
343 } __attribute__ ((packed)) extent;
344 struct {
345 __le32 name_len;
346 __le32 value_len;
347 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
348 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
349 } __attribute__ ((packed)) xattr;
350 struct {
351 __u8 class_len;
352 __u8 method_len;
353 __u8 argc;
354 __le32 indata_len;
355 } __attribute__ ((packed)) cls;
356 struct {
357 __le64 cookie, count;
358 } __attribute__ ((packed)) pgls;
359 struct {
360 __le64 snapid;
361 } __attribute__ ((packed)) snap;
362 };
363 __le32 payload_len;
364} __attribute__ ((packed));
365
366/*
367 * osd request message header. each request may include multiple
368 * ceph_osd_op object operations.
369 */
370struct ceph_osd_request_head {
371 __le32 client_inc; /* client incarnation */
372 struct ceph_object_layout layout; /* pgid */
373 __le32 osdmap_epoch; /* client's osdmap epoch */
374
375 __le32 flags;
376
377 struct ceph_timespec mtime; /* for mutations only */
378 struct ceph_eversion reassert_version; /* if we are replaying op */
379
380 __le32 object_len; /* length of object name */
381
382 __le64 snapid; /* snapid to read */
383 __le64 snap_seq; /* writer's snap context */
384 __le32 num_snaps;
385
386 __le16 num_ops;
387 struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */
388} __attribute__ ((packed));
389
390struct ceph_osd_reply_head {
391 __le32 client_inc; /* client incarnation */
392 __le32 flags;
393 struct ceph_object_layout layout;
394 __le32 osdmap_epoch;
395 struct ceph_eversion reassert_version; /* for replaying uncommitted */
396
397 __le32 result; /* result code */
398
399 __le32 object_len; /* length of object name */
400 __le32 num_ops;
401 struct ceph_osd_op ops[0]; /* ops[], object */
402} __attribute__ ((packed));
403
404
405#endif
diff --git a/include/linux/ceph/types.h b/include/linux/ceph/types.h
new file mode 100644
index 000000000000..28b35a005ec2
--- /dev/null
+++ b/include/linux/ceph/types.h
@@ -0,0 +1,29 @@
1#ifndef _FS_CEPH_TYPES_H
2#define _FS_CEPH_TYPES_H
3
4/* needed before including ceph_fs.h */
5#include <linux/in.h>
6#include <linux/types.h>
7#include <linux/fcntl.h>
8#include <linux/string.h>
9
10#include "ceph_fs.h"
11#include "ceph_frag.h"
12#include "ceph_hash.h"
13
14/*
15 * Identify inodes by both their ino AND snapshot id (a u64).
16 */
17struct ceph_vino {
18 u64 ino;
19 u64 snap;
20};
21
22
23/* context for the caps reservation mechanism */
24struct ceph_cap_reservation {
25 int count;
26};
27
28
29#endif
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 0c991023ee47..709dfb901d11 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -75,7 +75,7 @@ struct cgroup_subsys_state {
75 75
76 unsigned long flags; 76 unsigned long flags;
77 /* ID for this css, if possible */ 77 /* ID for this css, if possible */
78 struct css_id *id; 78 struct css_id __rcu *id;
79}; 79};
80 80
81/* bits in struct cgroup_subsys_state flags field */ 81/* bits in struct cgroup_subsys_state flags field */
@@ -205,7 +205,7 @@ struct cgroup {
205 struct list_head children; /* my children */ 205 struct list_head children; /* my children */
206 206
207 struct cgroup *parent; /* my parent */ 207 struct cgroup *parent; /* my parent */
208 struct dentry *dentry; /* cgroup fs entry, RCU protected */ 208 struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */
209 209
210 /* Private pointers for each registered subsystem */ 210 /* Private pointers for each registered subsystem */
211 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 211 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c1a62c56a660..320d6c94ff84 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -16,7 +16,11 @@
16# define __release(x) __context__(x,-1) 16# define __release(x) __context__(x,-1)
17# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) 17# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
18# define __percpu __attribute__((noderef, address_space(3))) 18# define __percpu __attribute__((noderef, address_space(3)))
19#ifdef CONFIG_SPARSE_RCU_POINTER
20# define __rcu __attribute__((noderef, address_space(4)))
21#else
19# define __rcu 22# define __rcu
23#endif
20extern void __chk_user_ptr(const volatile void __user *); 24extern void __chk_user_ptr(const volatile void __user *);
21extern void __chk_io_ptr(const volatile void __iomem *); 25extern void __chk_io_ptr(const volatile void __iomem *);
22#else 26#else
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 8ba66a9d9022..ba4b85a6d9b8 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -9,37 +9,7 @@
9 * These are the only things you should do on a core-file: use only these 9 * These are the only things you should do on a core-file: use only these
10 * functions to write out all the necessary info. 10 * functions to write out all the necessary info.
11 */ 11 */
12static inline int dump_write(struct file *file, const void *addr, int nr) 12extern int dump_write(struct file *file, const void *addr, int nr);
13{ 13extern int dump_seek(struct file *file, loff_t off);
14 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
15}
16
17static inline int dump_seek(struct file *file, loff_t off)
18{
19 int ret = 1;
20
21 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
22 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
23 return 0;
24 } else {
25 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
26
27 if (!buf)
28 return 0;
29 while (off > 0) {
30 unsigned long n = off;
31
32 if (n > PAGE_SIZE)
33 n = PAGE_SIZE;
34 if (!dump_write(file, buf, n)) {
35 ret = 0;
36 break;
37 }
38 off -= n;
39 }
40 free_page((unsigned long)buf);
41 }
42 return ret;
43}
44 14
45#endif /* _LINUX_COREDUMP_H */ 15#endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 36ca9721a0c2..1be416bbbb82 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -53,6 +53,7 @@ struct cpuidle_state {
53#define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */ 53#define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */
54#define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */ 54#define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */
55#define CPUIDLE_FLAG_IGNORE (0x100) /* ignore during this idle period */ 55#define CPUIDLE_FLAG_IGNORE (0x100) /* ignore during this idle period */
56#define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */
56 57
57#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) 58#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
58 59
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4d2c39573f36..4aaeab376446 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -84,7 +84,7 @@ struct thread_group_cred {
84 atomic_t usage; 84 atomic_t usage;
85 pid_t tgid; /* thread group process ID */ 85 pid_t tgid; /* thread group process ID */
86 spinlock_t lock; 86 spinlock_t lock;
87 struct key *session_keyring; /* keyring inherited over fork */ 87 struct key __rcu *session_keyring; /* keyring inherited over fork */
88 struct key *process_keyring; /* keyring private to this process */ 88 struct key *process_keyring; /* keyring private to this process */
89 struct rcu_head rcu; /* RCU deletion hook */ 89 struct rcu_head rcu; /* RCU deletion hook */
90}; 90};
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
new file mode 100644
index 000000000000..97e435b191f4
--- /dev/null
+++ b/include/linux/crush/crush.h
@@ -0,0 +1,180 @@
1#ifndef CEPH_CRUSH_CRUSH_H
2#define CEPH_CRUSH_CRUSH_H
3
4#include <linux/types.h>
5
6/*
7 * CRUSH is a pseudo-random data distribution algorithm that
8 * efficiently distributes input values (typically, data objects)
9 * across a heterogeneous, structured storage cluster.
10 *
11 * The algorithm was originally described in detail in this paper
12 * (although the algorithm has evolved somewhat since then):
13 *
14 * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf
15 *
16 * LGPL2
17 */
18
19
20#define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */
21
22
23#define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */
24#define CRUSH_MAX_SET 10 /* max size of a mapping result */
25
26
27/*
28 * CRUSH uses user-defined "rules" to describe how inputs should be
29 * mapped to devices. A rule consists of sequence of steps to perform
30 * to generate the set of output devices.
31 */
32struct crush_rule_step {
33 __u32 op;
34 __s32 arg1;
35 __s32 arg2;
36};
37
38/* step op codes */
39enum {
40 CRUSH_RULE_NOOP = 0,
41 CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */
42 CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */
43 /* arg2 = type */
44 CRUSH_RULE_CHOOSE_INDEP = 3, /* same */
45 CRUSH_RULE_EMIT = 4, /* no args */
46 CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6,
47 CRUSH_RULE_CHOOSE_LEAF_INDEP = 7,
48};
49
50/*
51 * for specifying choose num (arg1) relative to the max parameter
52 * passed to do_rule
53 */
54#define CRUSH_CHOOSE_N 0
55#define CRUSH_CHOOSE_N_MINUS(x) (-(x))
56
57/*
58 * The rule mask is used to describe what the rule is intended for.
59 * Given a ruleset and size of output set, we search through the
60 * rule list for a matching rule_mask.
61 */
62struct crush_rule_mask {
63 __u8 ruleset;
64 __u8 type;
65 __u8 min_size;
66 __u8 max_size;
67};
68
69struct crush_rule {
70 __u32 len;
71 struct crush_rule_mask mask;
72 struct crush_rule_step steps[0];
73};
74
75#define crush_rule_size(len) (sizeof(struct crush_rule) + \
76 (len)*sizeof(struct crush_rule_step))
77
78
79
80/*
81 * A bucket is a named container of other items (either devices or
82 * other buckets). Items within a bucket are chosen using one of a
83 * few different algorithms. The table summarizes how the speed of
84 * each option measures up against mapping stability when items are
85 * added or removed.
86 *
87 * Bucket Alg Speed Additions Removals
88 * ------------------------------------------------
89 * uniform O(1) poor poor
90 * list O(n) optimal poor
91 * tree O(log n) good good
92 * straw O(n) optimal optimal
93 */
94enum {
95 CRUSH_BUCKET_UNIFORM = 1,
96 CRUSH_BUCKET_LIST = 2,
97 CRUSH_BUCKET_TREE = 3,
98 CRUSH_BUCKET_STRAW = 4
99};
100extern const char *crush_bucket_alg_name(int alg);
101
102struct crush_bucket {
103 __s32 id; /* this'll be negative */
104 __u16 type; /* non-zero; type=0 is reserved for devices */
105 __u8 alg; /* one of CRUSH_BUCKET_* */
106 __u8 hash; /* which hash function to use, CRUSH_HASH_* */
107 __u32 weight; /* 16-bit fixed point */
108 __u32 size; /* num items */
109 __s32 *items;
110
111 /*
112 * cached random permutation: used for uniform bucket and for
113 * the linear search fallback for the other bucket types.
114 */
115 __u32 perm_x; /* @x for which *perm is defined */
116 __u32 perm_n; /* num elements of *perm that are permuted/defined */
117 __u32 *perm;
118};
119
120struct crush_bucket_uniform {
121 struct crush_bucket h;
122 __u32 item_weight; /* 16-bit fixed point; all items equally weighted */
123};
124
125struct crush_bucket_list {
126 struct crush_bucket h;
127 __u32 *item_weights; /* 16-bit fixed point */
128 __u32 *sum_weights; /* 16-bit fixed point. element i is sum
129 of weights 0..i, inclusive */
130};
131
132struct crush_bucket_tree {
133 struct crush_bucket h; /* note: h.size is _tree_ size, not number of
134 actual items */
135 __u8 num_nodes;
136 __u32 *node_weights;
137};
138
139struct crush_bucket_straw {
140 struct crush_bucket h;
141 __u32 *item_weights; /* 16-bit fixed point */
142 __u32 *straws; /* 16-bit fixed point */
143};
144
145
146
147/*
148 * CRUSH map includes all buckets, rules, etc.
149 */
150struct crush_map {
151 struct crush_bucket **buckets;
152 struct crush_rule **rules;
153
154 /*
155 * Parent pointers to identify the parent bucket a device or
156 * bucket in the hierarchy. If an item appears more than
157 * once, this is the _last_ time it appeared (where buckets
158 * are processed in bucket id order, from -1 on down to
159 * -max_buckets.
160 */
161 __u32 *bucket_parents;
162 __u32 *device_parents;
163
164 __s32 max_buckets;
165 __u32 max_rules;
166 __s32 max_devices;
167};
168
169
170/* crush.c */
171extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
172extern void crush_calc_parents(struct crush_map *map);
173extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
174extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
175extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
176extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
177extern void crush_destroy_bucket(struct crush_bucket *b);
178extern void crush_destroy(struct crush_map *map);
179
180#endif
diff --git a/include/linux/crush/hash.h b/include/linux/crush/hash.h
new file mode 100644
index 000000000000..91e884230d5d
--- /dev/null
+++ b/include/linux/crush/hash.h
@@ -0,0 +1,17 @@
1#ifndef CEPH_CRUSH_HASH_H
2#define CEPH_CRUSH_HASH_H
3
4#define CRUSH_HASH_RJENKINS1 0
5
6#define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1
7
8extern const char *crush_hash_name(int type);
9
10extern __u32 crush_hash32(int type, __u32 a);
11extern __u32 crush_hash32_2(int type, __u32 a, __u32 b);
12extern __u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c);
13extern __u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d);
14extern __u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d,
15 __u32 e);
16
17#endif
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
new file mode 100644
index 000000000000..c46b99c18bb0
--- /dev/null
+++ b/include/linux/crush/mapper.h
@@ -0,0 +1,20 @@
1#ifndef CEPH_CRUSH_MAPPER_H
2#define CEPH_CRUSH_MAPPER_H
3
4/*
5 * CRUSH functions for find rules and then mapping an input to an
6 * output set.
7 *
8 * LGPL2
9 */
10
11#include "crush.h"
12
13extern int crush_find_rule(struct crush_map *map, int pool, int type, int size);
14extern int crush_do_rule(struct crush_map *map,
15 int ruleno,
16 int x, int *result, int result_max,
17 int forcefeed, /* -1 for none */
18 __u32 *weights);
19
20#endif
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 29b3ce3f2a1d..2833452ea01c 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -49,7 +49,6 @@ struct task_struct;
49 49
50#ifdef CONFIG_LOCKDEP 50#ifdef CONFIG_LOCKDEP
51extern void debug_show_all_locks(void); 51extern void debug_show_all_locks(void);
52extern void __debug_show_held_locks(struct task_struct *task);
53extern void debug_show_held_locks(struct task_struct *task); 52extern void debug_show_held_locks(struct task_struct *task);
54extern void debug_check_no_locks_freed(const void *from, unsigned long len); 53extern void debug_check_no_locks_freed(const void *from, unsigned long len);
55extern void debug_check_no_locks_held(struct task_struct *task); 54extern void debug_check_no_locks_held(struct task_struct *task);
@@ -58,10 +57,6 @@ static inline void debug_show_all_locks(void)
58{ 57{
59} 58}
60 59
61static inline void __debug_show_held_locks(struct task_struct *task)
62{
63}
64
65static inline void debug_show_held_locks(struct task_struct *task) 60static inline void debug_show_held_locks(struct task_struct *task)
66{ 61{
67} 62}
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c61d4ca27bcc..e2106495cc11 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -548,7 +548,7 @@ static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
548 return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; 548 return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
549} 549}
550 550
551static unsigned short dma_dev_to_maxpq(struct dma_device *dma) 551static inline unsigned short dma_dev_to_maxpq(struct dma_device *dma)
552{ 552{
553 return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; 553 return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
554} 554}
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index d7cecc90ed34..a7d9dc21391d 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -57,15 +57,15 @@ extern int dmar_table_init(void);
57extern int dmar_dev_scope_init(void); 57extern int dmar_dev_scope_init(void);
58 58
59/* Intel IOMMU detection */ 59/* Intel IOMMU detection */
60extern void detect_intel_iommu(void); 60extern int detect_intel_iommu(void);
61extern int enable_drhd_fault_handling(void); 61extern int enable_drhd_fault_handling(void);
62 62
63extern int parse_ioapics_under_ir(void); 63extern int parse_ioapics_under_ir(void);
64extern int alloc_iommu(struct dmar_drhd_unit *); 64extern int alloc_iommu(struct dmar_drhd_unit *);
65#else 65#else
66static inline void detect_intel_iommu(void) 66static inline int detect_intel_iommu(void)
67{ 67{
68 return; 68 return -ENODEV;
69} 69}
70 70
71static inline int dmar_table_init(void) 71static inline int dmar_table_init(void)
@@ -106,6 +106,7 @@ struct irte {
106 __u64 high; 106 __u64 high;
107 }; 107 };
108}; 108};
109
109#ifdef CONFIG_INTR_REMAP 110#ifdef CONFIG_INTR_REMAP
110extern int intr_remapping_enabled; 111extern int intr_remapping_enabled;
111extern int intr_remapping_supported(void); 112extern int intr_remapping_supported(void);
@@ -119,11 +120,8 @@ extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
119extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, 120extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
120 u16 sub_handle); 121 u16 sub_handle);
121extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); 122extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
122extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index);
123extern int flush_irte(int irq);
124extern int free_irte(int irq); 123extern int free_irte(int irq);
125 124
126extern int irq_remapped(int irq);
127extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); 125extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
128extern struct intel_iommu *map_ioapic_to_ir(int apic); 126extern struct intel_iommu *map_ioapic_to_ir(int apic);
129extern struct intel_iommu *map_hpet_to_ir(u8 id); 127extern struct intel_iommu *map_hpet_to_ir(u8 id);
@@ -177,7 +175,6 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
177 return 0; 175 return 0;
178} 176}
179 177
180#define irq_remapped(irq) (0)
181#define enable_intr_remapping(mode) (-1) 178#define enable_intr_remapping(mode) (-1)
182#define disable_intr_remapping() (0) 179#define disable_intr_remapping() (0)
183#define reenable_intr_remapping(mode) (0) 180#define reenable_intr_remapping(mode) (0)
@@ -187,8 +184,9 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
187/* Can't use the common MSI interrupt functions 184/* Can't use the common MSI interrupt functions
188 * since DMAR is not a pci device 185 * since DMAR is not a pci device
189 */ 186 */
190extern void dmar_msi_unmask(unsigned int irq); 187struct irq_data;
191extern void dmar_msi_mask(unsigned int irq); 188extern void dmar_msi_unmask(struct irq_data *data);
189extern void dmar_msi_mask(struct irq_data *data);
192extern void dmar_msi_read(int irq, struct msi_msg *msg); 190extern void dmar_msi_read(int irq, struct msi_msg *msg);
193extern void dmar_msi_write(int irq, struct msi_msg *msg); 191extern void dmar_msi_write(int irq, struct msi_msg *msg);
194extern int dmar_set_interrupt(struct intel_iommu *iommu); 192extern int dmar_set_interrupt(struct intel_iommu *iommu);
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 52c0da4bdd18..bef3cda44c4c 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -1,6 +1,8 @@
1#ifndef _DYNAMIC_DEBUG_H 1#ifndef _DYNAMIC_DEBUG_H
2#define _DYNAMIC_DEBUG_H 2#define _DYNAMIC_DEBUG_H
3 3
4#include <linux/jump_label.h>
5
4/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which 6/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
5 * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They 7 * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
6 * use independent hash functions, to reduce the chance of false positives. 8 * use independent hash functions, to reduce the chance of false positives.
@@ -22,8 +24,6 @@ struct _ddebug {
22 const char *function; 24 const char *function;
23 const char *filename; 25 const char *filename;
24 const char *format; 26 const char *format;
25 char primary_hash;
26 char secondary_hash;
27 unsigned int lineno:24; 27 unsigned int lineno:24;
28 /* 28 /*
29 * The flags field controls the behaviour at the callsite. 29 * The flags field controls the behaviour at the callsite.
@@ -33,6 +33,7 @@ struct _ddebug {
33#define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ 33#define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */
34#define _DPRINTK_FLAGS_DEFAULT 0 34#define _DPRINTK_FLAGS_DEFAULT 0
35 unsigned int flags:8; 35 unsigned int flags:8;
36 char enabled;
36} __attribute__((aligned(8))); 37} __attribute__((aligned(8)));
37 38
38 39
@@ -42,33 +43,35 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
42#if defined(CONFIG_DYNAMIC_DEBUG) 43#if defined(CONFIG_DYNAMIC_DEBUG)
43extern int ddebug_remove_module(const char *mod_name); 44extern int ddebug_remove_module(const char *mod_name);
44 45
45#define __dynamic_dbg_enabled(dd) ({ \
46 int __ret = 0; \
47 if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) && \
48 (dynamic_debug_enabled2 & (1LL << DEBUG_HASH2)))) \
49 if (unlikely(dd.flags)) \
50 __ret = 1; \
51 __ret; })
52
53#define dynamic_pr_debug(fmt, ...) do { \ 46#define dynamic_pr_debug(fmt, ...) do { \
47 __label__ do_printk; \
48 __label__ out; \
54 static struct _ddebug descriptor \ 49 static struct _ddebug descriptor \
55 __used \ 50 __used \
56 __attribute__((section("__verbose"), aligned(8))) = \ 51 __attribute__((section("__verbose"), aligned(8))) = \
57 { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ 52 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \
58 DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ 53 _DPRINTK_FLAGS_DEFAULT }; \
59 if (__dynamic_dbg_enabled(descriptor)) \ 54 JUMP_LABEL(&descriptor.enabled, do_printk); \
60 printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ 55 goto out; \
56do_printk: \
57 printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
58out: ; \
61 } while (0) 59 } while (0)
62 60
63 61
64#define dynamic_dev_dbg(dev, fmt, ...) do { \ 62#define dynamic_dev_dbg(dev, fmt, ...) do { \
63 __label__ do_printk; \
64 __label__ out; \
65 static struct _ddebug descriptor \ 65 static struct _ddebug descriptor \
66 __used \ 66 __used \
67 __attribute__((section("__verbose"), aligned(8))) = \ 67 __attribute__((section("__verbose"), aligned(8))) = \
68 { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ 68 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \
69 DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ 69 _DPRINTK_FLAGS_DEFAULT }; \
70 if (__dynamic_dbg_enabled(descriptor)) \ 70 JUMP_LABEL(&descriptor.enabled, do_printk); \
71 dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ 71 goto out; \
72do_printk: \
73 dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \
74out: ; \
72 } while (0) 75 } while (0)
73 76
74#else 77#else
diff --git a/include/linux/early_res.h b/include/linux/early_res.h
deleted file mode 100644
index 29c09f57a13c..000000000000
--- a/include/linux/early_res.h
+++ /dev/null
@@ -1,23 +0,0 @@
1#ifndef _LINUX_EARLY_RES_H
2#define _LINUX_EARLY_RES_H
3#ifdef __KERNEL__
4
5extern void reserve_early(u64 start, u64 end, char *name);
6extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
7extern void free_early(u64 start, u64 end);
8void free_early_partial(u64 start, u64 end);
9extern void early_res_to_bootmem(u64 start, u64 end);
10
11void reserve_early_without_check(u64 start, u64 end, char *name);
12u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
13 u64 size, u64 align);
14u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
15 u64 *sizep, u64 align);
16u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align);
17u64 get_max_mapped(void);
18#include <linux/range.h>
19int get_free_all_memory_range(struct range **rangep, int nodeid);
20
21#endif /* __KERNEL__ */
22
23#endif /* _LINUX_EARLY_RES_H */
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 7cf92e8a4196..36c66443bdfd 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -13,6 +13,7 @@
13#define _LINUX_EDAC_H_ 13#define _LINUX_EDAC_H_
14 14
15#include <asm/atomic.h> 15#include <asm/atomic.h>
16#include <linux/sysdev.h>
16 17
17#define EDAC_OPSTATE_INVAL -1 18#define EDAC_OPSTATE_INVAL -1
18#define EDAC_OPSTATE_POLL 0 19#define EDAC_OPSTATE_POLL 0
@@ -22,9 +23,12 @@
22extern int edac_op_state; 23extern int edac_op_state;
23extern int edac_err_assert; 24extern int edac_err_assert;
24extern atomic_t edac_handlers; 25extern atomic_t edac_handlers;
26extern struct sysdev_class edac_class;
25 27
26extern int edac_handler_set(void); 28extern int edac_handler_set(void);
27extern void edac_atomic_assert_error(void); 29extern void edac_atomic_assert_error(void);
30extern struct sysdev_class *edac_get_sysfs_class(void);
31extern void edac_put_sysfs_class(void);
28 32
29static inline void opstate_init(void) 33static inline void opstate_init(void)
30{ 34{
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 926b50322a46..4fd978e7eb83 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -93,6 +93,7 @@ struct elevator_queue
93 struct elevator_type *elevator_type; 93 struct elevator_type *elevator_type;
94 struct mutex sysfs_lock; 94 struct mutex sysfs_lock;
95 struct hlist_head *hash; 95 struct hlist_head *hash;
96 unsigned int registered:1;
96}; 97};
97 98
98/* 99/*
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index f59ed297b661..133c0ba25e30 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -31,7 +31,7 @@ struct embedded_fd_set {
31 31
32struct fdtable { 32struct fdtable {
33 unsigned int max_fds; 33 unsigned int max_fds;
34 struct file ** fd; /* current fd array */ 34 struct file __rcu **fd; /* current fd array */
35 fd_set *close_on_exec; 35 fd_set *close_on_exec;
36 fd_set *open_fds; 36 fd_set *open_fds;
37 struct rcu_head rcu; 37 struct rcu_head rcu;
@@ -46,7 +46,7 @@ struct files_struct {
46 * read mostly part 46 * read mostly part
47 */ 47 */
48 atomic_t count; 48 atomic_t count;
49 struct fdtable *fdt; 49 struct fdtable __rcu *fdt;
50 struct fdtable fdtab; 50 struct fdtable fdtab;
51 /* 51 /*
52 * written part on a separate cache line in SMP 52 * written part on a separate cache line in SMP
@@ -55,7 +55,7 @@ struct files_struct {
55 int next_fd; 55 int next_fd;
56 struct embedded_fd_set close_on_exec_init; 56 struct embedded_fd_set close_on_exec_init;
57 struct embedded_fd_set open_fds_init; 57 struct embedded_fd_set open_fds_init;
58 struct file * fd_array[NR_OPEN_DEFAULT]; 58 struct file __rcu * fd_array[NR_OPEN_DEFAULT];
59}; 59};
60 60
61#define rcu_dereference_check_fdtable(files, fdtfd) \ 61#define rcu_dereference_check_fdtable(files, fdtfd) \
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 180325268237..75e27a25c6e3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1390,7 +1390,7 @@ struct super_block {
1390 * Saved mount options for lazy filesystems using 1390 * Saved mount options for lazy filesystems using
1391 * generic_show_options() 1391 * generic_show_options()
1392 */ 1392 */
1393 char *s_options; 1393 char __rcu *s_options;
1394}; 1394};
1395 1395
1396extern struct timespec current_fs_time(struct super_block *sb); 1396extern struct timespec current_fs_time(struct super_block *sb);
@@ -2384,6 +2384,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
2384 2384
2385extern int generic_file_fsync(struct file *, int); 2385extern int generic_file_fsync(struct file *, int);
2386 2386
2387extern int generic_check_addressable(unsigned, u64);
2388
2387#ifdef CONFIG_MIGRATION 2389#ifdef CONFIG_MIGRATION
2388extern int buffer_migrate_page(struct address_space *, 2390extern int buffer_migrate_page(struct address_space *,
2389 struct page *, struct page *); 2391 struct page *, struct page *);
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 02b8b24f8f51..8beabb958f61 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -191,8 +191,8 @@ struct ftrace_event_call {
191 unsigned int flags; 191 unsigned int flags;
192 192
193#ifdef CONFIG_PERF_EVENTS 193#ifdef CONFIG_PERF_EVENTS
194 int perf_refcount; 194 int perf_refcount;
195 struct hlist_head *perf_events; 195 struct hlist_head __percpu *perf_events;
196#endif 196#endif
197}; 197};
198 198
@@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
252 252
253extern int perf_trace_init(struct perf_event *event); 253extern int perf_trace_init(struct perf_event *event);
254extern void perf_trace_destroy(struct perf_event *event); 254extern void perf_trace_destroy(struct perf_event *event);
255extern int perf_trace_enable(struct perf_event *event); 255extern int perf_trace_add(struct perf_event *event, int flags);
256extern void perf_trace_disable(struct perf_event *event); 256extern void perf_trace_del(struct perf_event *event, int flags);
257extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, 257extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
258 char *filter_str); 258 char *filter_str);
259extern void ftrace_profile_free_filter(struct perf_event *event); 259extern void ftrace_profile_free_filter(struct perf_event *event);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 5f2f4c4d8fb0..af3f06b41dc1 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -129,8 +129,8 @@ struct blk_scsi_cmd_filter {
129struct disk_part_tbl { 129struct disk_part_tbl {
130 struct rcu_head rcu_head; 130 struct rcu_head rcu_head;
131 int len; 131 int len;
132 struct hd_struct *last_lookup; 132 struct hd_struct __rcu *last_lookup;
133 struct hd_struct *part[]; 133 struct hd_struct __rcu *part[];
134}; 134};
135 135
136struct gendisk { 136struct gendisk {
@@ -149,7 +149,7 @@ struct gendisk {
149 * non-critical accesses use RCU. Always access through 149 * non-critical accesses use RCU. Always access through
150 * helpers. 150 * helpers.
151 */ 151 */
152 struct disk_part_tbl *part_tbl; 152 struct disk_part_tbl __rcu *part_tbl;
153 struct hd_struct part0; 153 struct hd_struct part0;
154 154
155 const struct block_device_operations *fops; 155 const struct block_device_operations *fops;
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index d5b387669dab..8a389b608ce3 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -8,7 +8,6 @@
8#include <linux/lockdep.h> 8#include <linux/lockdep.h>
9#include <linux/ftrace_irq.h> 9#include <linux/ftrace_irq.h>
10#include <asm/hardirq.h> 10#include <asm/hardirq.h>
11#include <asm/system.h>
12 11
13/* 12/*
14 * We put the hardirq and softirq counter into the preemption 13 * We put the hardirq and softirq counter into the preemption
@@ -64,6 +63,8 @@
64#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) 63#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
65#define NMI_OFFSET (1UL << NMI_SHIFT) 64#define NMI_OFFSET (1UL << NMI_SHIFT)
66 65
66#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
67
67#ifndef PREEMPT_ACTIVE 68#ifndef PREEMPT_ACTIVE
68#define PREEMPT_ACTIVE_BITS 1 69#define PREEMPT_ACTIVE_BITS 1
69#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) 70#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
@@ -82,10 +83,13 @@
82/* 83/*
83 * Are we doing bottom half or hardware interrupt processing? 84 * Are we doing bottom half or hardware interrupt processing?
84 * Are we in a softirq context? Interrupt context? 85 * Are we in a softirq context? Interrupt context?
86 * in_softirq - Are we currently processing softirq or have bh disabled?
87 * in_serving_softirq - Are we currently processing softirq?
85 */ 88 */
86#define in_irq() (hardirq_count()) 89#define in_irq() (hardirq_count())
87#define in_softirq() (softirq_count()) 90#define in_softirq() (softirq_count())
88#define in_interrupt() (irq_count()) 91#define in_interrupt() (irq_count())
92#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
89 93
90/* 94/*
91 * Are we in NMI context? 95 * Are we in NMI context?
@@ -132,14 +136,16 @@ extern void synchronize_irq(unsigned int irq);
132 136
133struct task_struct; 137struct task_struct;
134 138
135#ifndef CONFIG_VIRT_CPU_ACCOUNTING 139#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
136static inline void account_system_vtime(struct task_struct *tsk) 140static inline void account_system_vtime(struct task_struct *tsk)
137{ 141{
138} 142}
143#else
144extern void account_system_vtime(struct task_struct *tsk);
139#endif 145#endif
140 146
141#if defined(CONFIG_NO_HZ) 147#if defined(CONFIG_NO_HZ)
142#if defined(CONFIG_TINY_RCU) 148#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
143extern void rcu_enter_nohz(void); 149extern void rcu_enter_nohz(void);
144extern void rcu_exit_nohz(void); 150extern void rcu_exit_nohz(void);
145 151
diff --git a/include/linux/htirq.h b/include/linux/htirq.h
index c96ea46737d0..70a1dbbf2093 100644
--- a/include/linux/htirq.h
+++ b/include/linux/htirq.h
@@ -9,8 +9,9 @@ struct ht_irq_msg {
9/* Helper functions.. */ 9/* Helper functions.. */
10void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); 10void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
11void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); 11void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
12void mask_ht_irq(unsigned int irq); 12struct irq_data;
13void unmask_ht_irq(unsigned int irq); 13void mask_ht_irq(struct irq_data *data);
14void unmask_ht_irq(struct irq_data *data);
14 15
15/* The arch hook for getting things started */ 16/* The arch hook for getting things started */
16int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev); 17int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev);
diff --git a/include/linux/idr.h b/include/linux/idr.h
index e968db71e33a..cdb715e58e3e 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -50,14 +50,14 @@
50 50
51struct idr_layer { 51struct idr_layer {
52 unsigned long bitmap; /* A zero bit means "space here" */ 52 unsigned long bitmap; /* A zero bit means "space here" */
53 struct idr_layer *ary[1<<IDR_BITS]; 53 struct idr_layer __rcu *ary[1<<IDR_BITS];
54 int count; /* When zero, we can release it */ 54 int count; /* When zero, we can release it */
55 int layer; /* distance from leaf */ 55 int layer; /* distance from leaf */
56 struct rcu_head rcu_head; 56 struct rcu_head rcu_head;
57}; 57};
58 58
59struct idr { 59struct idr {
60 struct idr_layer *top; 60 struct idr_layer __rcu *top;
61 struct idr_layer *id_free; 61 struct idr_layer *id_free;
62 int layers; /* only valid without concurrent changes */ 62 int layers; /* only valid without concurrent changes */
63 int id_free_cnt; 63 int id_free_cnt;
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 1f43fa56f600..2fea6c8ef6ba 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -82,11 +82,17 @@ extern struct group_info init_groups;
82# define CAP_INIT_BSET CAP_FULL_SET 82# define CAP_INIT_BSET CAP_FULL_SET
83 83
84#ifdef CONFIG_TREE_PREEMPT_RCU 84#ifdef CONFIG_TREE_PREEMPT_RCU
85#define INIT_TASK_RCU_TREE_PREEMPT() \
86 .rcu_blocked_node = NULL,
87#else
88#define INIT_TASK_RCU_TREE_PREEMPT(tsk)
89#endif
90#ifdef CONFIG_PREEMPT_RCU
85#define INIT_TASK_RCU_PREEMPT(tsk) \ 91#define INIT_TASK_RCU_PREEMPT(tsk) \
86 .rcu_read_lock_nesting = 0, \ 92 .rcu_read_lock_nesting = 0, \
87 .rcu_read_unlock_special = 0, \ 93 .rcu_read_unlock_special = 0, \
88 .rcu_blocked_node = NULL, \ 94 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \
89 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), 95 INIT_TASK_RCU_TREE_PREEMPT()
90#else 96#else
91#define INIT_TASK_RCU_PREEMPT(tsk) 97#define INIT_TASK_RCU_PREEMPT(tsk)
92#endif 98#endif
@@ -137,8 +143,8 @@ extern struct cred init_cred;
137 .children = LIST_HEAD_INIT(tsk.children), \ 143 .children = LIST_HEAD_INIT(tsk.children), \
138 .sibling = LIST_HEAD_INIT(tsk.sibling), \ 144 .sibling = LIST_HEAD_INIT(tsk.sibling), \
139 .group_leader = &tsk, \ 145 .group_leader = &tsk, \
140 .real_cred = &init_cred, \ 146 RCU_INIT_POINTER(.real_cred, &init_cred), \
141 .cred = &init_cred, \ 147 RCU_INIT_POINTER(.cred, &init_cred), \
142 .cred_guard_mutex = \ 148 .cred_guard_mutex = \
143 __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ 149 __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \
144 .comm = "swapper", \ 150 .comm = "swapper", \
diff --git a/include/linux/input.h b/include/linux/input.h
index 896a92227bc4..d6ae1761be97 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1196,7 +1196,7 @@ struct input_dev {
1196 int (*flush)(struct input_dev *dev, struct file *file); 1196 int (*flush)(struct input_dev *dev, struct file *file);
1197 int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); 1197 int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value);
1198 1198
1199 struct input_handle *grab; 1199 struct input_handle __rcu *grab;
1200 1200
1201 spinlock_t event_lock; 1201 spinlock_t event_lock;
1202 struct mutex mutex; 1202 struct mutex mutex;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a0384a4d1e6f..414328577ced 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -18,6 +18,7 @@
18#include <asm/atomic.h> 18#include <asm/atomic.h>
19#include <asm/ptrace.h> 19#include <asm/ptrace.h>
20#include <asm/system.h> 20#include <asm/system.h>
21#include <trace/events/irq.h>
21 22
22/* 23/*
23 * These correspond to the IORESOURCE_IRQ_* defines in 24 * These correspond to the IORESOURCE_IRQ_* defines in
@@ -407,7 +408,12 @@ asmlinkage void do_softirq(void);
407asmlinkage void __do_softirq(void); 408asmlinkage void __do_softirq(void);
408extern void open_softirq(int nr, void (*action)(struct softirq_action *)); 409extern void open_softirq(int nr, void (*action)(struct softirq_action *));
409extern void softirq_init(void); 410extern void softirq_init(void);
410#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) 411static inline void __raise_softirq_irqoff(unsigned int nr)
412{
413 trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
414 or_softirq_pending(1UL << nr);
415}
416
411extern void raise_softirq_irqoff(unsigned int nr); 417extern void raise_softirq_irqoff(unsigned int nr);
412extern void raise_softirq(unsigned int nr); 418extern void raise_softirq(unsigned int nr);
413extern void wakeup_softirqd(void); 419extern void wakeup_softirqd(void);
@@ -641,11 +647,8 @@ static inline void init_irq_proc(void)
641struct seq_file; 647struct seq_file;
642int show_interrupts(struct seq_file *p, void *v); 648int show_interrupts(struct seq_file *p, void *v);
643 649
644struct irq_desc;
645
646extern int early_irq_init(void); 650extern int early_irq_init(void);
647extern int arch_probe_nr_irqs(void); 651extern int arch_probe_nr_irqs(void);
648extern int arch_early_irq_init(void); 652extern int arch_early_irq_init(void);
649extern int arch_init_chip_data(struct irq_desc *desc, int node);
650 653
651#endif 654#endif
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 64d529133031..3e70b21884a9 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -53,7 +53,7 @@ struct io_context {
53 53
54 struct radix_tree_root radix_root; 54 struct radix_tree_root radix_root;
55 struct hlist_head cic_list; 55 struct hlist_head cic_list;
56 void *ioc_data; 56 void __rcu *ioc_data;
57}; 57};
58 58
59static inline struct io_context *ioc_task_link(struct io_context *ioc) 59static inline struct io_context *ioc_task_link(struct io_context *ioc)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c03243ad84b4..e9639115dff1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -72,6 +72,10 @@ typedef void (*irq_flow_handler_t)(unsigned int irq,
72#define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */ 72#define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */
73#define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */ 73#define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */
74 74
75#define IRQF_MODIFY_MASK \
76 (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
77 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL)
78
75#ifdef CONFIG_IRQ_PER_CPU 79#ifdef CONFIG_IRQ_PER_CPU
76# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) 80# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
77# define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) 81# define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
@@ -80,36 +84,77 @@ typedef void (*irq_flow_handler_t)(unsigned int irq,
80# define IRQ_NO_BALANCING_MASK IRQ_NO_BALANCING 84# define IRQ_NO_BALANCING_MASK IRQ_NO_BALANCING
81#endif 85#endif
82 86
83struct proc_dir_entry;
84struct msi_desc; 87struct msi_desc;
85 88
86/** 89/**
90 * struct irq_data - per irq and irq chip data passed down to chip functions
91 * @irq: interrupt number
92 * @node: node index useful for balancing
93 * @chip: low level interrupt hardware access
94 * @handler_data: per-IRQ data for the irq_chip methods
95 * @chip_data: platform-specific per-chip private data for the chip
96 * methods, to allow shared chip implementations
97 * @msi_desc: MSI descriptor
98 * @affinity: IRQ affinity on SMP
99 *
100 * The fields here need to overlay the ones in irq_desc until we
101 * cleaned up the direct references and switched everything over to
102 * irq_data.
103 */
104struct irq_data {
105 unsigned int irq;
106 unsigned int node;
107 struct irq_chip *chip;
108 void *handler_data;
109 void *chip_data;
110 struct msi_desc *msi_desc;
111#ifdef CONFIG_SMP
112 cpumask_var_t affinity;
113#endif
114};
115
116/**
87 * struct irq_chip - hardware interrupt chip descriptor 117 * struct irq_chip - hardware interrupt chip descriptor
88 * 118 *
89 * @name: name for /proc/interrupts 119 * @name: name for /proc/interrupts
90 * @startup: start up the interrupt (defaults to ->enable if NULL) 120 * @startup: deprecated, replaced by irq_startup
91 * @shutdown: shut down the interrupt (defaults to ->disable if NULL) 121 * @shutdown: deprecated, replaced by irq_shutdown
92 * @enable: enable the interrupt (defaults to chip->unmask if NULL) 122 * @enable: deprecated, replaced by irq_enable
93 * @disable: disable the interrupt 123 * @disable: deprecated, replaced by irq_disable
94 * @ack: start of a new interrupt 124 * @ack: deprecated, replaced by irq_ack
95 * @mask: mask an interrupt source 125 * @mask: deprecated, replaced by irq_mask
96 * @mask_ack: ack and mask an interrupt source 126 * @mask_ack: deprecated, replaced by irq_mask_ack
97 * @unmask: unmask an interrupt source 127 * @unmask: deprecated, replaced by irq_unmask
98 * @eoi: end of interrupt - chip level 128 * @eoi: deprecated, replaced by irq_eoi
99 * @end: end of interrupt - flow level 129 * @end: deprecated, will go away with __do_IRQ()
100 * @set_affinity: set the CPU affinity on SMP machines 130 * @set_affinity: deprecated, replaced by irq_set_affinity
101 * @retrigger: resend an IRQ to the CPU 131 * @retrigger: deprecated, replaced by irq_retrigger
102 * @set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ 132 * @set_type: deprecated, replaced by irq_set_type
103 * @set_wake: enable/disable power-management wake-on of an IRQ 133 * @set_wake: deprecated, replaced by irq_wake
134 * @bus_lock: deprecated, replaced by irq_bus_lock
135 * @bus_sync_unlock: deprecated, replaced by irq_bus_sync_unlock
104 * 136 *
105 * @bus_lock: function to lock access to slow bus (i2c) chips 137 * @irq_startup: start up the interrupt (defaults to ->enable if NULL)
106 * @bus_sync_unlock: function to sync and unlock slow bus (i2c) chips 138 * @irq_shutdown: shut down the interrupt (defaults to ->disable if NULL)
139 * @irq_enable: enable the interrupt (defaults to chip->unmask if NULL)
140 * @irq_disable: disable the interrupt
141 * @irq_ack: start of a new interrupt
142 * @irq_mask: mask an interrupt source
143 * @irq_mask_ack: ack and mask an interrupt source
144 * @irq_unmask: unmask an interrupt source
145 * @irq_eoi: end of interrupt
146 * @irq_set_affinity: set the CPU affinity on SMP machines
147 * @irq_retrigger: resend an IRQ to the CPU
148 * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
149 * @irq_set_wake: enable/disable power-management wake-on of an IRQ
150 * @irq_bus_lock: function to lock access to slow bus (i2c) chips
151 * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
107 * 152 *
108 * @release: release function solely used by UML 153 * @release: release function solely used by UML
109 * @typename: obsoleted by name, kept as migration helper
110 */ 154 */
111struct irq_chip { 155struct irq_chip {
112 const char *name; 156 const char *name;
157#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
113 unsigned int (*startup)(unsigned int irq); 158 unsigned int (*startup)(unsigned int irq);
114 void (*shutdown)(unsigned int irq); 159 void (*shutdown)(unsigned int irq);
115 void (*enable)(unsigned int irq); 160 void (*enable)(unsigned int irq);
@@ -130,154 +175,66 @@ struct irq_chip {
130 175
131 void (*bus_lock)(unsigned int irq); 176 void (*bus_lock)(unsigned int irq);
132 void (*bus_sync_unlock)(unsigned int irq); 177 void (*bus_sync_unlock)(unsigned int irq);
178#endif
179 unsigned int (*irq_startup)(struct irq_data *data);
180 void (*irq_shutdown)(struct irq_data *data);
181 void (*irq_enable)(struct irq_data *data);
182 void (*irq_disable)(struct irq_data *data);
183
184 void (*irq_ack)(struct irq_data *data);
185 void (*irq_mask)(struct irq_data *data);
186 void (*irq_mask_ack)(struct irq_data *data);
187 void (*irq_unmask)(struct irq_data *data);
188 void (*irq_eoi)(struct irq_data *data);
189
190 int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force);
191 int (*irq_retrigger)(struct irq_data *data);
192 int (*irq_set_type)(struct irq_data *data, unsigned int flow_type);
193 int (*irq_set_wake)(struct irq_data *data, unsigned int on);
194
195 void (*irq_bus_lock)(struct irq_data *data);
196 void (*irq_bus_sync_unlock)(struct irq_data *data);
133 197
134 /* Currently used only by UML, might disappear one day.*/ 198 /* Currently used only by UML, might disappear one day.*/
135#ifdef CONFIG_IRQ_RELEASE_METHOD 199#ifdef CONFIG_IRQ_RELEASE_METHOD
136 void (*release)(unsigned int irq, void *dev_id); 200 void (*release)(unsigned int irq, void *dev_id);
137#endif 201#endif
138 /*
139 * For compatibility, ->typename is copied into ->name.
140 * Will disappear.
141 */
142 const char *typename;
143}; 202};
144 203
145struct timer_rand_state; 204/* This include will go away once we isolated irq_desc usage to core code */
146struct irq_2_iommu; 205#include <linux/irqdesc.h>
147/**
148 * struct irq_desc - interrupt descriptor
149 * @irq: interrupt number for this descriptor
150 * @timer_rand_state: pointer to timer rand state struct
151 * @kstat_irqs: irq stats per cpu
152 * @irq_2_iommu: iommu with this irq
153 * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()]
154 * @chip: low level interrupt hardware access
155 * @msi_desc: MSI descriptor
156 * @handler_data: per-IRQ data for the irq_chip methods
157 * @chip_data: platform-specific per-chip private data for the chip
158 * methods, to allow shared chip implementations
159 * @action: the irq action chain
160 * @status: status information
161 * @depth: disable-depth, for nested irq_disable() calls
162 * @wake_depth: enable depth, for multiple set_irq_wake() callers
163 * @irq_count: stats field to detect stalled irqs
164 * @last_unhandled: aging timer for unhandled count
165 * @irqs_unhandled: stats field for spurious unhandled interrupts
166 * @lock: locking for SMP
167 * @affinity: IRQ affinity on SMP
168 * @node: node index useful for balancing
169 * @pending_mask: pending rebalanced interrupts
170 * @threads_active: number of irqaction threads currently running
171 * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers
172 * @dir: /proc/irq/ procfs entry
173 * @name: flow handler name for /proc/interrupts output
174 */
175struct irq_desc {
176 unsigned int irq;
177 struct timer_rand_state *timer_rand_state;
178 unsigned int *kstat_irqs;
179#ifdef CONFIG_INTR_REMAP
180 struct irq_2_iommu *irq_2_iommu;
181#endif
182 irq_flow_handler_t handle_irq;
183 struct irq_chip *chip;
184 struct msi_desc *msi_desc;
185 void *handler_data;
186 void *chip_data;
187 struct irqaction *action; /* IRQ action list */
188 unsigned int status; /* IRQ status */
189
190 unsigned int depth; /* nested irq disables */
191 unsigned int wake_depth; /* nested wake enables */
192 unsigned int irq_count; /* For detecting broken IRQs */
193 unsigned long last_unhandled; /* Aging timer for unhandled count */
194 unsigned int irqs_unhandled;
195 raw_spinlock_t lock;
196#ifdef CONFIG_SMP
197 cpumask_var_t affinity;
198 const struct cpumask *affinity_hint;
199 unsigned int node;
200#ifdef CONFIG_GENERIC_PENDING_IRQ
201 cpumask_var_t pending_mask;
202#endif
203#endif
204 atomic_t threads_active;
205 wait_queue_head_t wait_for_threads;
206#ifdef CONFIG_PROC_FS
207 struct proc_dir_entry *dir;
208#endif
209 const char *name;
210} ____cacheline_internodealigned_in_smp;
211 206
212extern void arch_init_copy_chip_data(struct irq_desc *old_desc, 207/*
213 struct irq_desc *desc, int node); 208 * Pick up the arch-dependent methods:
214extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); 209 */
210#include <asm/hw_irq.h>
215 211
216#ifndef CONFIG_SPARSE_IRQ 212#ifndef NR_IRQS_LEGACY
217extern struct irq_desc irq_desc[NR_IRQS]; 213# define NR_IRQS_LEGACY 0
218#endif 214#endif
219 215
220#ifdef CONFIG_NUMA_IRQ_DESC 216#ifndef ARCH_IRQ_INIT_FLAGS
221extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); 217# define ARCH_IRQ_INIT_FLAGS 0
222#else
223static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
224{
225 return desc;
226}
227#endif 218#endif
228 219
229extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); 220#define IRQ_DEFAULT_INIT_FLAGS (IRQ_DISABLED | ARCH_IRQ_INIT_FLAGS)
230
231/*
232 * Pick up the arch-dependent methods:
233 */
234#include <asm/hw_irq.h>
235 221
222struct irqaction;
236extern int setup_irq(unsigned int irq, struct irqaction *new); 223extern int setup_irq(unsigned int irq, struct irqaction *new);
237extern void remove_irq(unsigned int irq, struct irqaction *act); 224extern void remove_irq(unsigned int irq, struct irqaction *act);
238 225
239#ifdef CONFIG_GENERIC_HARDIRQS 226#ifdef CONFIG_GENERIC_HARDIRQS
240 227
241#ifdef CONFIG_SMP 228#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
242
243#ifdef CONFIG_GENERIC_PENDING_IRQ
244
245void move_native_irq(int irq); 229void move_native_irq(int irq);
246void move_masked_irq(int irq); 230void move_masked_irq(int irq);
247 231#else
248#else /* CONFIG_GENERIC_PENDING_IRQ */ 232static inline void move_native_irq(int irq) { }
249 233static inline void move_masked_irq(int irq) { }
250static inline void move_irq(int irq) 234#endif
251{
252}
253
254static inline void move_native_irq(int irq)
255{
256}
257
258static inline void move_masked_irq(int irq)
259{
260}
261
262#endif /* CONFIG_GENERIC_PENDING_IRQ */
263
264#else /* CONFIG_SMP */
265
266#define move_native_irq(x)
267#define move_masked_irq(x)
268
269#endif /* CONFIG_SMP */
270 235
271extern int no_irq_affinity; 236extern int no_irq_affinity;
272 237
273static inline int irq_balancing_disabled(unsigned int irq)
274{
275 struct irq_desc *desc;
276
277 desc = irq_to_desc(irq);
278 return desc->status & IRQ_NO_BALANCING_MASK;
279}
280
281/* Handle irq action chains: */ 238/* Handle irq action chains: */
282extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action); 239extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action);
283 240
@@ -293,42 +250,10 @@ extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc);
293extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc); 250extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
294extern void handle_nested_irq(unsigned int irq); 251extern void handle_nested_irq(unsigned int irq);
295 252
296/*
297 * Monolithic do_IRQ implementation.
298 */
299#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
300extern unsigned int __do_IRQ(unsigned int irq);
301#endif
302
303/*
304 * Architectures call this to let the generic IRQ layer
305 * handle an interrupt. If the descriptor is attached to an
306 * irqchip-style controller then we call the ->handle_irq() handler,
307 * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
308 */
309static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
310{
311#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
312 desc->handle_irq(irq, desc);
313#else
314 if (likely(desc->handle_irq))
315 desc->handle_irq(irq, desc);
316 else
317 __do_IRQ(irq);
318#endif
319}
320
321static inline void generic_handle_irq(unsigned int irq)
322{
323 generic_handle_irq_desc(irq, irq_to_desc(irq));
324}
325
326/* Handling of unhandled and spurious interrupts: */ 253/* Handling of unhandled and spurious interrupts: */
327extern void note_interrupt(unsigned int irq, struct irq_desc *desc, 254extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
328 irqreturn_t action_ret); 255 irqreturn_t action_ret);
329 256
330/* Resending of interrupts :*/
331void check_irq_resend(struct irq_desc *desc, unsigned int irq);
332 257
333/* Enable/disable irq debugging output: */ 258/* Enable/disable irq debugging output: */
334extern int noirqdebug_setup(char *str); 259extern int noirqdebug_setup(char *str);
@@ -351,16 +276,6 @@ extern void
351__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, 276__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
352 const char *name); 277 const char *name);
353 278
354/* caller has locked the irq_desc and both params are valid */
355static inline void __set_irq_handler_unlocked(int irq,
356 irq_flow_handler_t handler)
357{
358 struct irq_desc *desc;
359
360 desc = irq_to_desc(irq);
361 desc->handle_irq = handler;
362}
363
364/* 279/*
365 * Set a highlevel flow handler for a given IRQ: 280 * Set a highlevel flow handler for a given IRQ:
366 */ 281 */
@@ -384,141 +299,121 @@ set_irq_chained_handler(unsigned int irq,
384 299
385extern void set_irq_nested_thread(unsigned int irq, int nest); 300extern void set_irq_nested_thread(unsigned int irq, int nest);
386 301
387extern void set_irq_noprobe(unsigned int irq); 302void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set);
388extern void set_irq_probe(unsigned int irq); 303
304static inline void irq_set_status_flags(unsigned int irq, unsigned long set)
305{
306 irq_modify_status(irq, 0, set);
307}
308
309static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr)
310{
311 irq_modify_status(irq, clr, 0);
312}
313
314static inline void set_irq_noprobe(unsigned int irq)
315{
316 irq_modify_status(irq, 0, IRQ_NOPROBE);
317}
318
319static inline void set_irq_probe(unsigned int irq)
320{
321 irq_modify_status(irq, IRQ_NOPROBE, 0);
322}
389 323
390/* Handle dynamic irq creation and destruction */ 324/* Handle dynamic irq creation and destruction */
391extern unsigned int create_irq_nr(unsigned int irq_want, int node); 325extern unsigned int create_irq_nr(unsigned int irq_want, int node);
392extern int create_irq(void); 326extern int create_irq(void);
393extern void destroy_irq(unsigned int irq); 327extern void destroy_irq(unsigned int irq);
394 328
395/* Test to see if a driver has successfully requested an irq */ 329/*
396static inline int irq_has_action(unsigned int irq) 330 * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
331 * irq_free_desc instead.
332 */
333extern void dynamic_irq_cleanup(unsigned int irq);
334static inline void dynamic_irq_init(unsigned int irq)
397{ 335{
398 struct irq_desc *desc = irq_to_desc(irq); 336 dynamic_irq_cleanup(irq);
399 return desc->action != NULL;
400} 337}
401 338
402/* Dynamic irq helper functions */
403extern void dynamic_irq_init(unsigned int irq);
404void dynamic_irq_init_keep_chip_data(unsigned int irq);
405extern void dynamic_irq_cleanup(unsigned int irq);
406void dynamic_irq_cleanup_keep_chip_data(unsigned int irq);
407
408/* Set/get chip/data for an IRQ: */ 339/* Set/get chip/data for an IRQ: */
409extern int set_irq_chip(unsigned int irq, struct irq_chip *chip); 340extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
410extern int set_irq_data(unsigned int irq, void *data); 341extern int set_irq_data(unsigned int irq, void *data);
411extern int set_irq_chip_data(unsigned int irq, void *data); 342extern int set_irq_chip_data(unsigned int irq, void *data);
412extern int set_irq_type(unsigned int irq, unsigned int type); 343extern int set_irq_type(unsigned int irq, unsigned int type);
413extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); 344extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
345extern struct irq_data *irq_get_irq_data(unsigned int irq);
414 346
415#define get_irq_chip(irq) (irq_to_desc(irq)->chip) 347static inline struct irq_chip *get_irq_chip(unsigned int irq)
416#define get_irq_chip_data(irq) (irq_to_desc(irq)->chip_data)
417#define get_irq_data(irq) (irq_to_desc(irq)->handler_data)
418#define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc)
419
420#define get_irq_desc_chip(desc) ((desc)->chip)
421#define get_irq_desc_chip_data(desc) ((desc)->chip_data)
422#define get_irq_desc_data(desc) ((desc)->handler_data)
423#define get_irq_desc_msi(desc) ((desc)->msi_desc)
424
425#endif /* CONFIG_GENERIC_HARDIRQS */
426
427#endif /* !CONFIG_S390 */
428
429#ifdef CONFIG_SMP
430/**
431 * alloc_desc_masks - allocate cpumasks for irq_desc
432 * @desc: pointer to irq_desc struct
433 * @node: node which will be handling the cpumasks
434 * @boot: true if need bootmem
435 *
436 * Allocates affinity and pending_mask cpumask if required.
437 * Returns true if successful (or not required).
438 */
439static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
440 bool boot)
441{ 348{
442 gfp_t gfp = GFP_ATOMIC; 349 struct irq_data *d = irq_get_irq_data(irq);
443 350 return d ? d->chip : NULL;
444 if (boot) 351}
445 gfp = GFP_NOWAIT;
446
447#ifdef CONFIG_CPUMASK_OFFSTACK
448 if (!alloc_cpumask_var_node(&desc->affinity, gfp, node))
449 return false;
450 352
451#ifdef CONFIG_GENERIC_PENDING_IRQ 353static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d)
452 if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { 354{
453 free_cpumask_var(desc->affinity); 355 return d->chip;
454 return false;
455 }
456#endif
457#endif
458 return true;
459} 356}
460 357
461static inline void init_desc_masks(struct irq_desc *desc) 358static inline void *get_irq_chip_data(unsigned int irq)
462{ 359{
463 cpumask_setall(desc->affinity); 360 struct irq_data *d = irq_get_irq_data(irq);
464#ifdef CONFIG_GENERIC_PENDING_IRQ 361 return d ? d->chip_data : NULL;
465 cpumask_clear(desc->pending_mask);
466#endif
467} 362}
468 363
469/** 364static inline void *irq_data_get_irq_chip_data(struct irq_data *d)
470 * init_copy_desc_masks - copy cpumasks for irq_desc 365{
471 * @old_desc: pointer to old irq_desc struct 366 return d->chip_data;
472 * @new_desc: pointer to new irq_desc struct 367}
473 *
474 * Insures affinity and pending_masks are copied to new irq_desc.
475 * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
476 * irq_desc struct so the copy is redundant.
477 */
478 368
479static inline void init_copy_desc_masks(struct irq_desc *old_desc, 369static inline void *get_irq_data(unsigned int irq)
480 struct irq_desc *new_desc)
481{ 370{
482#ifdef CONFIG_CPUMASK_OFFSTACK 371 struct irq_data *d = irq_get_irq_data(irq);
483 cpumask_copy(new_desc->affinity, old_desc->affinity); 372 return d ? d->handler_data : NULL;
373}
484 374
485#ifdef CONFIG_GENERIC_PENDING_IRQ 375static inline void *irq_data_get_irq_data(struct irq_data *d)
486 cpumask_copy(new_desc->pending_mask, old_desc->pending_mask); 376{
487#endif 377 return d->handler_data;
488#endif
489} 378}
490 379
491static inline void free_desc_masks(struct irq_desc *old_desc, 380static inline struct msi_desc *get_irq_msi(unsigned int irq)
492 struct irq_desc *new_desc)
493{ 381{
494 free_cpumask_var(old_desc->affinity); 382 struct irq_data *d = irq_get_irq_data(irq);
383 return d ? d->msi_desc : NULL;
384}
495 385
496#ifdef CONFIG_GENERIC_PENDING_IRQ 386static inline struct msi_desc *irq_data_get_msi(struct irq_data *d)
497 free_cpumask_var(old_desc->pending_mask); 387{
498#endif 388 return d->msi_desc;
499} 389}
500 390
501#else /* !CONFIG_SMP */ 391int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
392void irq_free_descs(unsigned int irq, unsigned int cnt);
393int irq_reserve_irqs(unsigned int from, unsigned int cnt);
502 394
503static inline bool alloc_desc_masks(struct irq_desc *desc, int node, 395static inline int irq_alloc_desc(int node)
504 bool boot)
505{ 396{
506 return true; 397 return irq_alloc_descs(-1, 0, 1, node);
507} 398}
508 399
509static inline void init_desc_masks(struct irq_desc *desc) 400static inline int irq_alloc_desc_at(unsigned int at, int node)
510{ 401{
402 return irq_alloc_descs(at, at, 1, node);
511} 403}
512 404
513static inline void init_copy_desc_masks(struct irq_desc *old_desc, 405static inline int irq_alloc_desc_from(unsigned int from, int node)
514 struct irq_desc *new_desc)
515{ 406{
407 return irq_alloc_descs(-1, from, 1, node);
516} 408}
517 409
518static inline void free_desc_masks(struct irq_desc *old_desc, 410static inline void irq_free_desc(unsigned int irq)
519 struct irq_desc *new_desc)
520{ 411{
412 irq_free_descs(irq, 1);
521} 413}
522#endif /* CONFIG_SMP */ 414
415#endif /* CONFIG_GENERIC_HARDIRQS */
416
417#endif /* !CONFIG_S390 */
523 418
524#endif /* _LINUX_IRQ_H */ 419#endif /* _LINUX_IRQ_H */
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
new file mode 100644
index 000000000000..4fa09d4d0b71
--- /dev/null
+++ b/include/linux/irq_work.h
@@ -0,0 +1,20 @@
1#ifndef _LINUX_IRQ_WORK_H
2#define _LINUX_IRQ_WORK_H
3
4struct irq_work {
5 struct irq_work *next;
6 void (*func)(struct irq_work *);
7};
8
9static inline
10void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
11{
12 entry->next = NULL;
13 entry->func = func;
14}
15
16bool irq_work_queue(struct irq_work *entry);
17void irq_work_run(void);
18void irq_work_sync(struct irq_work *entry);
19
20#endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
new file mode 100644
index 000000000000..979c68cc7458
--- /dev/null
+++ b/include/linux/irqdesc.h
@@ -0,0 +1,159 @@
1#ifndef _LINUX_IRQDESC_H
2#define _LINUX_IRQDESC_H
3
4/*
5 * Core internal functions to deal with irq descriptors
6 *
7 * This include will move to kernel/irq once we cleaned up the tree.
8 * For now it's included from <linux/irq.h>
9 */
10
11struct proc_dir_entry;
12struct timer_rand_state;
13/**
14 * struct irq_desc - interrupt descriptor
15 * @irq_data: per irq and chip data passed down to chip functions
16 * @timer_rand_state: pointer to timer rand state struct
17 * @kstat_irqs: irq stats per cpu
18 * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()]
19 * @action: the irq action chain
20 * @status: status information
21 * @depth: disable-depth, for nested irq_disable() calls
22 * @wake_depth: enable depth, for multiple set_irq_wake() callers
23 * @irq_count: stats field to detect stalled irqs
24 * @last_unhandled: aging timer for unhandled count
25 * @irqs_unhandled: stats field for spurious unhandled interrupts
26 * @lock: locking for SMP
27 * @pending_mask: pending rebalanced interrupts
28 * @threads_active: number of irqaction threads currently running
29 * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers
30 * @dir: /proc/irq/ procfs entry
31 * @name: flow handler name for /proc/interrupts output
32 */
33struct irq_desc {
34
35#ifdef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
36 struct irq_data irq_data;
37#else
38 /*
39 * This union will go away, once we fixed the direct access to
40 * irq_desc all over the place. The direct fields are a 1:1
41 * overlay of irq_data.
42 */
43 union {
44 struct irq_data irq_data;
45 struct {
46 unsigned int irq;
47 unsigned int node;
48 struct irq_chip *chip;
49 void *handler_data;
50 void *chip_data;
51 struct msi_desc *msi_desc;
52#ifdef CONFIG_SMP
53 cpumask_var_t affinity;
54#endif
55 };
56 };
57#endif
58
59 struct timer_rand_state *timer_rand_state;
60 unsigned int *kstat_irqs;
61 irq_flow_handler_t handle_irq;
62 struct irqaction *action; /* IRQ action list */
63 unsigned int status; /* IRQ status */
64
65 unsigned int depth; /* nested irq disables */
66 unsigned int wake_depth; /* nested wake enables */
67 unsigned int irq_count; /* For detecting broken IRQs */
68 unsigned long last_unhandled; /* Aging timer for unhandled count */
69 unsigned int irqs_unhandled;
70 raw_spinlock_t lock;
71#ifdef CONFIG_SMP
72 const struct cpumask *affinity_hint;
73#ifdef CONFIG_GENERIC_PENDING_IRQ
74 cpumask_var_t pending_mask;
75#endif
76#endif
77 atomic_t threads_active;
78 wait_queue_head_t wait_for_threads;
79#ifdef CONFIG_PROC_FS
80 struct proc_dir_entry *dir;
81#endif
82 const char *name;
83} ____cacheline_internodealigned_in_smp;
84
85#ifndef CONFIG_SPARSE_IRQ
86extern struct irq_desc irq_desc[NR_IRQS];
87#endif
88
89/* Will be removed once the last users in power and sh are gone */
90extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
91static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
92{
93 return desc;
94}
95
96#ifdef CONFIG_GENERIC_HARDIRQS
97
98#define get_irq_desc_chip(desc) ((desc)->irq_data.chip)
99#define get_irq_desc_chip_data(desc) ((desc)->irq_data.chip_data)
100#define get_irq_desc_data(desc) ((desc)->irq_data.handler_data)
101#define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc)
102
103/*
104 * Monolithic do_IRQ implementation.
105 */
106#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
107extern unsigned int __do_IRQ(unsigned int irq);
108#endif
109
110/*
111 * Architectures call this to let the generic IRQ layer
112 * handle an interrupt. If the descriptor is attached to an
113 * irqchip-style controller then we call the ->handle_irq() handler,
114 * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
115 */
116static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
117{
118#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
119 desc->handle_irq(irq, desc);
120#else
121 if (likely(desc->handle_irq))
122 desc->handle_irq(irq, desc);
123 else
124 __do_IRQ(irq);
125#endif
126}
127
128static inline void generic_handle_irq(unsigned int irq)
129{
130 generic_handle_irq_desc(irq, irq_to_desc(irq));
131}
132
133/* Test to see if a driver has successfully requested an irq */
134static inline int irq_has_action(unsigned int irq)
135{
136 struct irq_desc *desc = irq_to_desc(irq);
137 return desc->action != NULL;
138}
139
140static inline int irq_balancing_disabled(unsigned int irq)
141{
142 struct irq_desc *desc;
143
144 desc = irq_to_desc(irq);
145 return desc->status & IRQ_NO_BALANCING_MASK;
146}
147
148/* caller has locked the irq_desc and both params are valid */
149static inline void __set_irq_handler_unlocked(int irq,
150 irq_flow_handler_t handler)
151{
152 struct irq_desc *desc;
153
154 desc = irq_to_desc(irq);
155 desc->handle_irq = handler;
156}
157#endif
158
159#endif
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 006bf45eae30..d176d658fe25 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -12,6 +12,7 @@
12#define _LINUX_TRACE_IRQFLAGS_H 12#define _LINUX_TRACE_IRQFLAGS_H
13 13
14#include <linux/typecheck.h> 14#include <linux/typecheck.h>
15#include <asm/irqflags.h>
15 16
16#ifdef CONFIG_TRACE_IRQFLAGS 17#ifdef CONFIG_TRACE_IRQFLAGS
17 extern void trace_softirqs_on(unsigned long ip); 18 extern void trace_softirqs_on(unsigned long ip);
@@ -52,17 +53,45 @@
52# define start_critical_timings() do { } while (0) 53# define start_critical_timings() do { } while (0)
53#endif 54#endif
54 55
55#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 56/*
56 57 * Wrap the arch provided IRQ routines to provide appropriate checks.
57#include <asm/irqflags.h> 58 */
59#define raw_local_irq_disable() arch_local_irq_disable()
60#define raw_local_irq_enable() arch_local_irq_enable()
61#define raw_local_irq_save(flags) \
62 do { \
63 typecheck(unsigned long, flags); \
64 flags = arch_local_irq_save(); \
65 } while (0)
66#define raw_local_irq_restore(flags) \
67 do { \
68 typecheck(unsigned long, flags); \
69 arch_local_irq_restore(flags); \
70 } while (0)
71#define raw_local_save_flags(flags) \
72 do { \
73 typecheck(unsigned long, flags); \
74 flags = arch_local_save_flags(); \
75 } while (0)
76#define raw_irqs_disabled_flags(flags) \
77 ({ \
78 typecheck(unsigned long, flags); \
79 arch_irqs_disabled_flags(flags); \
80 })
81#define raw_irqs_disabled() (arch_irqs_disabled())
82#define raw_safe_halt() arch_safe_halt()
58 83
84/*
85 * The local_irq_*() APIs are equal to the raw_local_irq*()
86 * if !TRACE_IRQFLAGS.
87 */
88#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
59#define local_irq_enable() \ 89#define local_irq_enable() \
60 do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) 90 do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
61#define local_irq_disable() \ 91#define local_irq_disable() \
62 do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) 92 do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
63#define local_irq_save(flags) \ 93#define local_irq_save(flags) \
64 do { \ 94 do { \
65 typecheck(unsigned long, flags); \
66 raw_local_irq_save(flags); \ 95 raw_local_irq_save(flags); \
67 trace_hardirqs_off(); \ 96 trace_hardirqs_off(); \
68 } while (0) 97 } while (0)
@@ -70,7 +99,6 @@
70 99
71#define local_irq_restore(flags) \ 100#define local_irq_restore(flags) \
72 do { \ 101 do { \
73 typecheck(unsigned long, flags); \
74 if (raw_irqs_disabled_flags(flags)) { \ 102 if (raw_irqs_disabled_flags(flags)) { \
75 raw_local_irq_restore(flags); \ 103 raw_local_irq_restore(flags); \
76 trace_hardirqs_off(); \ 104 trace_hardirqs_off(); \
@@ -79,51 +107,44 @@
79 raw_local_irq_restore(flags); \ 107 raw_local_irq_restore(flags); \
80 } \ 108 } \
81 } while (0) 109 } while (0)
82#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ 110#define local_save_flags(flags) \
83/*
84 * The local_irq_*() APIs are equal to the raw_local_irq*()
85 * if !TRACE_IRQFLAGS.
86 */
87# define raw_local_irq_disable() local_irq_disable()
88# define raw_local_irq_enable() local_irq_enable()
89# define raw_local_irq_save(flags) \
90 do { \
91 typecheck(unsigned long, flags); \
92 local_irq_save(flags); \
93 } while (0)
94# define raw_local_irq_restore(flags) \
95 do { \ 111 do { \
96 typecheck(unsigned long, flags); \ 112 raw_local_save_flags(flags); \
97 local_irq_restore(flags); \
98 } while (0) 113 } while (0)
99#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
100 114
101#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 115#define irqs_disabled_flags(flags) \
102#define safe_halt() \ 116 ({ \
103 do { \ 117 raw_irqs_disabled_flags(flags); \
104 trace_hardirqs_on(); \ 118 })
105 raw_safe_halt(); \
106 } while (0)
107 119
108#define local_save_flags(flags) \ 120#define irqs_disabled() \
109 do { \ 121 ({ \
110 typecheck(unsigned long, flags); \ 122 unsigned long _flags; \
111 raw_local_save_flags(flags); \ 123 raw_local_save_flags(_flags); \
124 raw_irqs_disabled_flags(_flags); \
125 })
126
127#define safe_halt() \
128 do { \
129 trace_hardirqs_on(); \
130 raw_safe_halt(); \
112 } while (0) 131 } while (0)
113 132
114#define irqs_disabled() \
115({ \
116 unsigned long _flags; \
117 \
118 raw_local_save_flags(_flags); \
119 raw_irqs_disabled_flags(_flags); \
120})
121 133
122#define irqs_disabled_flags(flags) \ 134#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
123({ \ 135
124 typecheck(unsigned long, flags); \ 136#define local_irq_enable() do { raw_local_irq_enable(); } while (0)
125 raw_irqs_disabled_flags(flags); \ 137#define local_irq_disable() do { raw_local_irq_disable(); } while (0)
126}) 138#define local_irq_save(flags) \
139 do { \
140 raw_local_irq_save(flags); \
141 } while (0)
142#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)
143#define local_save_flags(flags) do { raw_local_save_flags(flags); } while (0)
144#define irqs_disabled() (raw_irqs_disabled())
145#define irqs_disabled_flags(flags) (raw_irqs_disabled_flags(flags))
146#define safe_halt() do { raw_safe_halt(); } while (0)
147
127#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ 148#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
128 149
129#endif 150#endif
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 7bf89bc8cbca..05aa8c23483f 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -25,6 +25,7 @@
25 25
26extern int nr_irqs; 26extern int nr_irqs;
27extern struct irq_desc *irq_to_desc(unsigned int irq); 27extern struct irq_desc *irq_to_desc(unsigned int irq);
28unsigned int irq_get_next_irq(unsigned int offset);
28 29
29# define for_each_irq_desc(irq, desc) \ 30# define for_each_irq_desc(irq, desc) \
30 for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ 31 for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \
@@ -47,6 +48,10 @@ extern struct irq_desc *irq_to_desc(unsigned int irq);
47#define irq_node(irq) 0 48#define irq_node(irq) 0
48#endif 49#endif
49 50
51# define for_each_active_irq(irq) \
52 for (irq = irq_get_next_irq(0); irq < nr_irqs; \
53 irq = irq_get_next_irq(irq + 1))
54
50#endif /* CONFIG_GENERIC_HARDIRQS */ 55#endif /* CONFIG_GENERIC_HARDIRQS */
51 56
52#define for_each_irq_nr(irq) \ 57#define for_each_irq_nr(irq) \
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
new file mode 100644
index 000000000000..b67cb180e6e9
--- /dev/null
+++ b/include/linux/jump_label.h
@@ -0,0 +1,74 @@
1#ifndef _LINUX_JUMP_LABEL_H
2#define _LINUX_JUMP_LABEL_H
3
4#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL)
5# include <asm/jump_label.h>
6# define HAVE_JUMP_LABEL
7#endif
8
9enum jump_label_type {
10 JUMP_LABEL_ENABLE,
11 JUMP_LABEL_DISABLE
12};
13
14struct module;
15
16#ifdef HAVE_JUMP_LABEL
17
18extern struct jump_entry __start___jump_table[];
19extern struct jump_entry __stop___jump_table[];
20
21extern void arch_jump_label_transform(struct jump_entry *entry,
22 enum jump_label_type type);
23extern void arch_jump_label_text_poke_early(jump_label_t addr);
24extern void jump_label_update(unsigned long key, enum jump_label_type type);
25extern void jump_label_apply_nops(struct module *mod);
26extern int jump_label_text_reserved(void *start, void *end);
27
28#define jump_label_enable(key) \
29 jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
30
31#define jump_label_disable(key) \
32 jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
33
34#else
35
36#define JUMP_LABEL(key, label) \
37do { \
38 if (unlikely(*key)) \
39 goto label; \
40} while (0)
41
42#define jump_label_enable(cond_var) \
43do { \
44 *(cond_var) = 1; \
45} while (0)
46
47#define jump_label_disable(cond_var) \
48do { \
49 *(cond_var) = 0; \
50} while (0)
51
52static inline int jump_label_apply_nops(struct module *mod)
53{
54 return 0;
55}
56
57static inline int jump_label_text_reserved(void *start, void *end)
58{
59 return 0;
60}
61
62#endif
63
64#define COND_STMT(key, stmt) \
65do { \
66 __label__ jl_enabled; \
67 JUMP_LABEL(key, jl_enabled); \
68 if (0) { \
69jl_enabled: \
70 stmt; \
71 } \
72} while (0)
73
74#endif
diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h
new file mode 100644
index 000000000000..e5d012ad92c6
--- /dev/null
+++ b/include/linux/jump_label_ref.h
@@ -0,0 +1,44 @@
1#ifndef _LINUX_JUMP_LABEL_REF_H
2#define _LINUX_JUMP_LABEL_REF_H
3
4#include <linux/jump_label.h>
5#include <asm/atomic.h>
6
7#ifdef HAVE_JUMP_LABEL
8
9static inline void jump_label_inc(atomic_t *key)
10{
11 if (atomic_add_return(1, key) == 1)
12 jump_label_enable(key);
13}
14
15static inline void jump_label_dec(atomic_t *key)
16{
17 if (atomic_dec_and_test(key))
18 jump_label_disable(key);
19}
20
21#else /* !HAVE_JUMP_LABEL */
22
23static inline void jump_label_inc(atomic_t *key)
24{
25 atomic_inc(key);
26}
27
28static inline void jump_label_dec(atomic_t *key)
29{
30 atomic_dec(key);
31}
32
33#undef JUMP_LABEL
34#define JUMP_LABEL(key, label) \
35do { \
36 if (unlikely(__builtin_choose_expr( \
37 __builtin_types_compatible_p(typeof(key), atomic_t *), \
38 atomic_read((atomic_t *)(key)), *(key)))) \
39 goto label; \
40} while (0)
41
42#endif /* HAVE_JUMP_LABEL */
43
44#endif /* _LINUX_JUMP_LABEL_REF_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b0a35e6bc69..1759ba5adce8 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -58,7 +58,18 @@ extern const char linux_proc_banner[];
58 58
59#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) 59#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
60#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) 60#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
61#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) 61#define roundup(x, y) ( \
62{ \
63 typeof(y) __y = y; \
64 (((x) + (__y - 1)) / __y) * __y; \
65} \
66)
67#define rounddown(x, y) ( \
68{ \
69 typeof(x) __x = (x); \
70 __x - (__x % (y)); \
71} \
72)
62#define DIV_ROUND_CLOSEST(x, divisor)( \ 73#define DIV_ROUND_CLOSEST(x, divisor)( \
63{ \ 74{ \
64 typeof(divisor) __divisor = divisor; \ 75 typeof(divisor) __divisor = divisor; \
diff --git a/include/linux/key.h b/include/linux/key.h
index cd50dfa1d4c2..3db0adce1fda 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -178,8 +178,9 @@ struct key {
178 */ 178 */
179 union { 179 union {
180 unsigned long value; 180 unsigned long value;
181 void __rcu *rcudata;
181 void *data; 182 void *data;
182 struct keyring_list *subscriptions; 183 struct keyring_list __rcu *subscriptions;
183 } payload; 184 } payload;
184}; 185};
185 186
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c13cc48697aa..ac740b26eb10 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -205,7 +205,7 @@ struct kvm {
205 205
206 struct mutex irq_lock; 206 struct mutex irq_lock;
207#ifdef CONFIG_HAVE_KVM_IRQCHIP 207#ifdef CONFIG_HAVE_KVM_IRQCHIP
208 struct kvm_irq_routing_table *irq_routing; 208 struct kvm_irq_routing_table __rcu *irq_routing;
209 struct hlist_head mask_notifier_list; 209 struct hlist_head mask_notifier_list;
210 struct hlist_head irq_ack_notifier_list; 210 struct hlist_head irq_ack_notifier_list;
211#endif 211#endif
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 45fb2967b66d..15b77b8dc7e1 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -37,6 +37,7 @@
37#include <scsi/scsi_host.h> 37#include <scsi/scsi_host.h>
38#include <linux/acpi.h> 38#include <linux/acpi.h>
39#include <linux/cdrom.h> 39#include <linux/cdrom.h>
40#include <linux/sched.h>
40 41
41/* 42/*
42 * Define if arch has non-standard setup. This is a _PCI_ standard 43 * Define if arch has non-standard setup. This is a _PCI_ standard
@@ -172,6 +173,7 @@ enum {
172 ATA_LFLAG_NO_RETRY = (1 << 5), /* don't retry this link */ 173 ATA_LFLAG_NO_RETRY = (1 << 5), /* don't retry this link */
173 ATA_LFLAG_DISABLED = (1 << 6), /* link is disabled */ 174 ATA_LFLAG_DISABLED = (1 << 6), /* link is disabled */
174 ATA_LFLAG_SW_ACTIVITY = (1 << 7), /* keep activity stats */ 175 ATA_LFLAG_SW_ACTIVITY = (1 << 7), /* keep activity stats */
176 ATA_LFLAG_NO_LPM = (1 << 8), /* disable LPM on this link */
175 177
176 /* struct ata_port flags */ 178 /* struct ata_port flags */
177 ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ 179 ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */
@@ -196,7 +198,7 @@ enum {
196 ATA_FLAG_ACPI_SATA = (1 << 17), /* need native SATA ACPI layout */ 198 ATA_FLAG_ACPI_SATA = (1 << 17), /* need native SATA ACPI layout */
197 ATA_FLAG_AN = (1 << 18), /* controller supports AN */ 199 ATA_FLAG_AN = (1 << 18), /* controller supports AN */
198 ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */ 200 ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */
199 ATA_FLAG_IPM = (1 << 20), /* driver can handle IPM */ 201 ATA_FLAG_LPM = (1 << 20), /* driver can handle LPM */
200 ATA_FLAG_EM = (1 << 21), /* driver supports enclosure 202 ATA_FLAG_EM = (1 << 21), /* driver supports enclosure
201 * management */ 203 * management */
202 ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity 204 ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity
@@ -324,12 +326,11 @@ enum {
324 ATA_EH_HARDRESET = (1 << 2), /* meaningful only in ->prereset */ 326 ATA_EH_HARDRESET = (1 << 2), /* meaningful only in ->prereset */
325 ATA_EH_RESET = ATA_EH_SOFTRESET | ATA_EH_HARDRESET, 327 ATA_EH_RESET = ATA_EH_SOFTRESET | ATA_EH_HARDRESET,
326 ATA_EH_ENABLE_LINK = (1 << 3), 328 ATA_EH_ENABLE_LINK = (1 << 3),
327 ATA_EH_LPM = (1 << 4), /* link power management action */
328 ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ 329 ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */
329 330
330 ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK, 331 ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK,
331 ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | 332 ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET |
332 ATA_EH_ENABLE_LINK | ATA_EH_LPM, 333 ATA_EH_ENABLE_LINK,
333 334
334 /* ata_eh_info->flags */ 335 /* ata_eh_info->flags */
335 ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ 336 ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */
@@ -341,7 +342,7 @@ enum {
341 ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */ 342 ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */
342 ATA_EHI_PRINTINFO = (1 << 18), /* print configuration info */ 343 ATA_EHI_PRINTINFO = (1 << 18), /* print configuration info */
343 ATA_EHI_SETMODE = (1 << 19), /* configure transfer mode */ 344 ATA_EHI_SETMODE = (1 << 19), /* configure transfer mode */
344 ATA_EHI_POST_SETMODE = (1 << 20), /* revaildating after setmode */ 345 ATA_EHI_POST_SETMODE = (1 << 20), /* revalidating after setmode */
345 346
346 ATA_EHI_DID_RESET = ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET, 347 ATA_EHI_DID_RESET = ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET,
347 348
@@ -377,7 +378,6 @@ enum {
377 ATA_HORKAGE_BROKEN_HPA = (1 << 4), /* Broken HPA */ 378 ATA_HORKAGE_BROKEN_HPA = (1 << 4), /* Broken HPA */
378 ATA_HORKAGE_DISABLE = (1 << 5), /* Disable it */ 379 ATA_HORKAGE_DISABLE = (1 << 5), /* Disable it */
379 ATA_HORKAGE_HPA_SIZE = (1 << 6), /* native size off by one */ 380 ATA_HORKAGE_HPA_SIZE = (1 << 6), /* native size off by one */
380 ATA_HORKAGE_IPM = (1 << 7), /* Link PM problems */
381 ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ 381 ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */
382 ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ 382 ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */
383 ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ 383 ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */
@@ -464,6 +464,22 @@ enum ata_completion_errors {
464 AC_ERR_NCQ = (1 << 10), /* marker for offending NCQ qc */ 464 AC_ERR_NCQ = (1 << 10), /* marker for offending NCQ qc */
465}; 465};
466 466
467/*
468 * Link power management policy: If you alter this, you also need to
469 * alter libata-scsi.c (for the ascii descriptions)
470 */
471enum ata_lpm_policy {
472 ATA_LPM_UNKNOWN,
473 ATA_LPM_MAX_POWER,
474 ATA_LPM_MED_POWER,
475 ATA_LPM_MIN_POWER,
476};
477
478enum ata_lpm_hints {
479 ATA_LPM_EMPTY = (1 << 0), /* port empty/probing */
480 ATA_LPM_HIPM = (1 << 1), /* may use HIPM */
481};
482
467/* forward declarations */ 483/* forward declarations */
468struct scsi_device; 484struct scsi_device;
469struct ata_port_operations; 485struct ata_port_operations;
@@ -478,16 +494,6 @@ typedef int (*ata_reset_fn_t)(struct ata_link *link, unsigned int *classes,
478 unsigned long deadline); 494 unsigned long deadline);
479typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes); 495typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes);
480 496
481/*
482 * host pm policy: If you alter this, you also need to alter libata-scsi.c
483 * (for the ascii descriptions)
484 */
485enum link_pm {
486 NOT_AVAILABLE,
487 MIN_POWER,
488 MAX_PERFORMANCE,
489 MEDIUM_POWER,
490};
491extern struct device_attribute dev_attr_link_power_management_policy; 497extern struct device_attribute dev_attr_link_power_management_policy;
492extern struct device_attribute dev_attr_unload_heads; 498extern struct device_attribute dev_attr_unload_heads;
493extern struct device_attribute dev_attr_em_message_type; 499extern struct device_attribute dev_attr_em_message_type;
@@ -530,6 +536,10 @@ struct ata_host {
530 void *private_data; 536 void *private_data;
531 struct ata_port_operations *ops; 537 struct ata_port_operations *ops;
532 unsigned long flags; 538 unsigned long flags;
539
540 struct mutex eh_mutex;
541 struct task_struct *eh_owner;
542
533#ifdef CONFIG_ATA_ACPI 543#ifdef CONFIG_ATA_ACPI
534 acpi_handle acpi_handle; 544 acpi_handle acpi_handle;
535#endif 545#endif
@@ -560,13 +570,13 @@ struct ata_queued_cmd {
560 unsigned int extrabytes; 570 unsigned int extrabytes;
561 unsigned int curbytes; 571 unsigned int curbytes;
562 572
563 struct scatterlist *cursg;
564 unsigned int cursg_ofs;
565
566 struct scatterlist sgent; 573 struct scatterlist sgent;
567 574
568 struct scatterlist *sg; 575 struct scatterlist *sg;
569 576
577 struct scatterlist *cursg;
578 unsigned int cursg_ofs;
579
570 unsigned int err_mask; 580 unsigned int err_mask;
571 struct ata_taskfile result_tf; 581 struct ata_taskfile result_tf;
572 ata_qc_cb_t complete_fn; 582 ata_qc_cb_t complete_fn;
@@ -604,6 +614,7 @@ struct ata_device {
604 union acpi_object *gtf_cache; 614 union acpi_object *gtf_cache;
605 unsigned int gtf_filter; 615 unsigned int gtf_filter;
606#endif 616#endif
617 struct device tdev;
607 /* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */ 618 /* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */
608 u64 n_sectors; /* size of device, if ATA */ 619 u64 n_sectors; /* size of device, if ATA */
609 u64 n_native_sectors; /* native size, if ATA */ 620 u64 n_native_sectors; /* native size, if ATA */
@@ -690,6 +701,7 @@ struct ata_link {
690 struct ata_port *ap; 701 struct ata_port *ap;
691 int pmp; /* port multiplier port # */ 702 int pmp; /* port multiplier port # */
692 703
704 struct device tdev;
693 unsigned int active_tag; /* active tag on this link */ 705 unsigned int active_tag; /* active tag on this link */
694 u32 sactive; /* active NCQ commands */ 706 u32 sactive; /* active NCQ commands */
695 707
@@ -699,6 +711,7 @@ struct ata_link {
699 unsigned int hw_sata_spd_limit; 711 unsigned int hw_sata_spd_limit;
700 unsigned int sata_spd_limit; 712 unsigned int sata_spd_limit;
701 unsigned int sata_spd; /* current SATA PHY speed */ 713 unsigned int sata_spd; /* current SATA PHY speed */
714 enum ata_lpm_policy lpm_policy;
702 715
703 /* record runtime error info, protected by host_set lock */ 716 /* record runtime error info, protected by host_set lock */
704 struct ata_eh_info eh_info; 717 struct ata_eh_info eh_info;
@@ -707,6 +720,8 @@ struct ata_link {
707 720
708 struct ata_device device[ATA_MAX_DEVICES]; 721 struct ata_device device[ATA_MAX_DEVICES];
709}; 722};
723#define ATA_LINK_CLEAR_BEGIN offsetof(struct ata_link, active_tag)
724#define ATA_LINK_CLEAR_END offsetof(struct ata_link, device[0])
710 725
711struct ata_port { 726struct ata_port {
712 struct Scsi_Host *scsi_host; /* our co-allocated scsi host */ 727 struct Scsi_Host *scsi_host; /* our co-allocated scsi host */
@@ -752,6 +767,7 @@ struct ata_port {
752 struct ata_port_stats stats; 767 struct ata_port_stats stats;
753 struct ata_host *host; 768 struct ata_host *host;
754 struct device *dev; 769 struct device *dev;
770 struct device tdev;
755 771
756 struct mutex scsi_scan_mutex; 772 struct mutex scsi_scan_mutex;
757 struct delayed_work hotplug_task; 773 struct delayed_work hotplug_task;
@@ -767,7 +783,7 @@ struct ata_port {
767 783
768 pm_message_t pm_mesg; 784 pm_message_t pm_mesg;
769 int *pm_result; 785 int *pm_result;
770 enum link_pm pm_policy; 786 enum ata_lpm_policy target_lpm_policy;
771 787
772 struct timer_list fastdrain_timer; 788 struct timer_list fastdrain_timer;
773 unsigned long fastdrain_cnt; 789 unsigned long fastdrain_cnt;
@@ -833,8 +849,8 @@ struct ata_port_operations {
833 int (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val); 849 int (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val);
834 void (*pmp_attach)(struct ata_port *ap); 850 void (*pmp_attach)(struct ata_port *ap);
835 void (*pmp_detach)(struct ata_port *ap); 851 void (*pmp_detach)(struct ata_port *ap);
836 int (*enable_pm)(struct ata_port *ap, enum link_pm policy); 852 int (*set_lpm)(struct ata_link *link, enum ata_lpm_policy policy,
837 void (*disable_pm)(struct ata_port *ap); 853 unsigned hints);
838 854
839 /* 855 /*
840 * Start, stop, suspend and resume 856 * Start, stop, suspend and resume
@@ -946,6 +962,8 @@ extern int sata_link_debounce(struct ata_link *link,
946 const unsigned long *params, unsigned long deadline); 962 const unsigned long *params, unsigned long deadline);
947extern int sata_link_resume(struct ata_link *link, const unsigned long *params, 963extern int sata_link_resume(struct ata_link *link, const unsigned long *params,
948 unsigned long deadline); 964 unsigned long deadline);
965extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
966 bool spm_wakeup);
949extern int sata_link_hardreset(struct ata_link *link, 967extern int sata_link_hardreset(struct ata_link *link,
950 const unsigned long *timing, unsigned long deadline, 968 const unsigned long *timing, unsigned long deadline,
951 bool *online, int (*check_ready)(struct ata_link *)); 969 bool *online, int (*check_ready)(struct ata_link *));
@@ -991,8 +1009,9 @@ extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg);
991extern void ata_host_resume(struct ata_host *host); 1009extern void ata_host_resume(struct ata_host *host);
992#endif 1010#endif
993extern int ata_ratelimit(void); 1011extern int ata_ratelimit(void);
994extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, 1012extern void ata_msleep(struct ata_port *ap, unsigned int msecs);
995 unsigned long interval, unsigned long timeout); 1013extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask,
1014 u32 val, unsigned long interval, unsigned long timeout);
996extern int atapi_cmd_type(u8 opcode); 1015extern int atapi_cmd_type(u8 opcode);
997extern void ata_tf_to_fis(const struct ata_taskfile *tf, 1016extern void ata_tf_to_fis(const struct ata_taskfile *tf,
998 u8 pmp, int is_cmd, u8 *fis); 1017 u8 pmp, int is_cmd, u8 *fis);
diff --git a/include/linux/list.h b/include/linux/list.h
index d167b5d7c0ac..88a000617d77 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -5,7 +5,6 @@
5#include <linux/stddef.h> 5#include <linux/stddef.h>
6#include <linux/poison.h> 6#include <linux/poison.h>
7#include <linux/prefetch.h> 7#include <linux/prefetch.h>
8#include <asm/system.h>
9 8
10/* 9/*
11 * Simple doubly linked list implementation. 10 * Simple doubly linked list implementation.
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 06aed8305bf3..71c09b26c759 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -32,6 +32,17 @@ extern int lock_stat;
32#define MAX_LOCKDEP_SUBCLASSES 8UL 32#define MAX_LOCKDEP_SUBCLASSES 8UL
33 33
34/* 34/*
35 * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
36 * cached in the instance of lockdep_map
37 *
38 * Currently main class (subclass == 0) and signle depth subclass
39 * are cached in lockdep_map. This optimization is mainly targeting
40 * on rq->lock. double_rq_lock() acquires this highly competitive with
41 * single depth.
42 */
43#define NR_LOCKDEP_CACHING_CLASSES 2
44
45/*
35 * Lock-classes are keyed via unique addresses, by embedding the 46 * Lock-classes are keyed via unique addresses, by embedding the
36 * lockclass-key into the kernel (or module) .data section. (For 47 * lockclass-key into the kernel (or module) .data section. (For
37 * static locks we use the lock address itself as the key.) 48 * static locks we use the lock address itself as the key.)
@@ -138,7 +149,7 @@ void clear_lock_stats(struct lock_class *class);
138 */ 149 */
139struct lockdep_map { 150struct lockdep_map {
140 struct lock_class_key *key; 151 struct lock_class_key *key;
141 struct lock_class *class_cache; 152 struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
142 const char *name; 153 const char *name;
143#ifdef CONFIG_LOCK_STAT 154#ifdef CONFIG_LOCK_STAT
144 int cpu; 155 int cpu;
@@ -424,14 +435,6 @@ do { \
424 435
425#endif /* CONFIG_LOCKDEP */ 436#endif /* CONFIG_LOCKDEP */
426 437
427#ifdef CONFIG_GENERIC_HARDIRQS
428extern void early_init_irq_lock_class(void);
429#else
430static inline void early_init_irq_lock_class(void)
431{
432}
433#endif
434
435#ifdef CONFIG_TRACE_IRQFLAGS 438#ifdef CONFIG_TRACE_IRQFLAGS
436extern void early_boot_irqs_off(void); 439extern void early_boot_irqs_off(void);
437extern void early_boot_irqs_on(void); 440extern void early_boot_irqs_on(void);
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index a59faf2b5edd..62a10c2a11f2 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -2,6 +2,7 @@
2#define _LINUX_MEMBLOCK_H 2#define _LINUX_MEMBLOCK_H
3#ifdef __KERNEL__ 3#ifdef __KERNEL__
4 4
5#ifdef CONFIG_HAVE_MEMBLOCK
5/* 6/*
6 * Logical memory blocks. 7 * Logical memory blocks.
7 * 8 *
@@ -16,73 +17,150 @@
16#include <linux/init.h> 17#include <linux/init.h>
17#include <linux/mm.h> 18#include <linux/mm.h>
18 19
19#define MAX_MEMBLOCK_REGIONS 128 20#include <asm/memblock.h>
20 21
21struct memblock_property { 22#define INIT_MEMBLOCK_REGIONS 128
22 u64 base; 23#define MEMBLOCK_ERROR 0
23 u64 size;
24};
25 24
26struct memblock_region { 25struct memblock_region {
27 unsigned long cnt; 26 phys_addr_t base;
28 u64 size; 27 phys_addr_t size;
29 struct memblock_property region[MAX_MEMBLOCK_REGIONS+1]; 28};
29
30struct memblock_type {
31 unsigned long cnt; /* number of regions */
32 unsigned long max; /* size of the allocated array */
33 struct memblock_region *regions;
30}; 34};
31 35
32struct memblock { 36struct memblock {
33 unsigned long debug; 37 phys_addr_t current_limit;
34 u64 rmo_size; 38 phys_addr_t memory_size; /* Updated by memblock_analyze() */
35 struct memblock_region memory; 39 struct memblock_type memory;
36 struct memblock_region reserved; 40 struct memblock_type reserved;
37}; 41};
38 42
39extern struct memblock memblock; 43extern struct memblock memblock;
44extern int memblock_debug;
45extern int memblock_can_resize;
40 46
41extern void __init memblock_init(void); 47#define memblock_dbg(fmt, ...) \
42extern void __init memblock_analyze(void); 48 if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
43extern long memblock_add(u64 base, u64 size); 49
44extern long memblock_remove(u64 base, u64 size); 50u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align);
45extern long __init memblock_free(u64 base, u64 size); 51int memblock_free_reserved_regions(void);
46extern long __init memblock_reserve(u64 base, u64 size); 52int memblock_reserve_reserved_regions(void);
47extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, 53
48 u64 (*nid_range)(u64, u64, int *)); 54extern void memblock_init(void);
49extern u64 __init memblock_alloc(u64 size, u64 align); 55extern void memblock_analyze(void);
50extern u64 __init memblock_alloc_base(u64 size, 56extern long memblock_add(phys_addr_t base, phys_addr_t size);
51 u64, u64 max_addr); 57extern long memblock_remove(phys_addr_t base, phys_addr_t size);
52extern u64 __init __memblock_alloc_base(u64 size, 58extern long memblock_free(phys_addr_t base, phys_addr_t size);
53 u64 align, u64 max_addr); 59extern long memblock_reserve(phys_addr_t base, phys_addr_t size);
54extern u64 __init memblock_phys_mem_size(void); 60
55extern u64 memblock_end_of_DRAM(void); 61/* The numa aware allocator is only available if
56extern void __init memblock_enforce_memory_limit(u64 memory_limit); 62 * CONFIG_ARCH_POPULATES_NODE_MAP is set
57extern int __init memblock_is_reserved(u64 addr); 63 */
58extern int memblock_is_region_reserved(u64 base, u64 size); 64extern phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align,
59extern int memblock_find(struct memblock_property *res); 65 int nid);
66extern phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
67 int nid);
68
69extern phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
70
71/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
72#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0)
73#define MEMBLOCK_ALLOC_ACCESSIBLE 0
74
75extern phys_addr_t memblock_alloc_base(phys_addr_t size,
76 phys_addr_t align,
77 phys_addr_t max_addr);
78extern phys_addr_t __memblock_alloc_base(phys_addr_t size,
79 phys_addr_t align,
80 phys_addr_t max_addr);
81extern phys_addr_t memblock_phys_mem_size(void);
82extern phys_addr_t memblock_end_of_DRAM(void);
83extern void memblock_enforce_memory_limit(phys_addr_t memory_limit);
84extern int memblock_is_memory(phys_addr_t addr);
85extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
86extern int memblock_is_reserved(phys_addr_t addr);
87extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
60 88
61extern void memblock_dump_all(void); 89extern void memblock_dump_all(void);
62 90
63static inline u64 91/* Provided by the architecture */
64memblock_size_bytes(struct memblock_region *type, unsigned long region_nr) 92extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid);
93extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
94 phys_addr_t addr2, phys_addr_t size2);
95
96/**
97 * memblock_set_current_limit - Set the current allocation limit to allow
98 * limiting allocations to what is currently
99 * accessible during boot
100 * @limit: New limit value (physical address)
101 */
102extern void memblock_set_current_limit(phys_addr_t limit);
103
104
105/*
106 * pfn conversion functions
107 *
108 * While the memory MEMBLOCKs should always be page aligned, the reserved
109 * MEMBLOCKs may not be. This accessor attempt to provide a very clear
110 * idea of what they return for such non aligned MEMBLOCKs.
111 */
112
113/**
114 * memblock_region_memory_base_pfn - Return the lowest pfn intersecting with the memory region
115 * @reg: memblock_region structure
116 */
117static inline unsigned long memblock_region_memory_base_pfn(const struct memblock_region *reg)
65{ 118{
66 return type->region[region_nr].size; 119 return PFN_UP(reg->base);
67} 120}
68static inline u64 121
69memblock_size_pages(struct memblock_region *type, unsigned long region_nr) 122/**
123 * memblock_region_memory_end_pfn - Return the end_pfn this region
124 * @reg: memblock_region structure
125 */
126static inline unsigned long memblock_region_memory_end_pfn(const struct memblock_region *reg)
70{ 127{
71 return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT; 128 return PFN_DOWN(reg->base + reg->size);
72} 129}
73static inline u64 130
74memblock_start_pfn(struct memblock_region *type, unsigned long region_nr) 131/**
132 * memblock_region_reserved_base_pfn - Return the lowest pfn intersecting with the reserved region
133 * @reg: memblock_region structure
134 */
135static inline unsigned long memblock_region_reserved_base_pfn(const struct memblock_region *reg)
75{ 136{
76 return type->region[region_nr].base >> PAGE_SHIFT; 137 return PFN_DOWN(reg->base);
77} 138}
78static inline u64 139
79memblock_end_pfn(struct memblock_region *type, unsigned long region_nr) 140/**
141 * memblock_region_reserved_end_pfn - Return the end_pfn this region
142 * @reg: memblock_region structure
143 */
144static inline unsigned long memblock_region_reserved_end_pfn(const struct memblock_region *reg)
80{ 145{
81 return memblock_start_pfn(type, region_nr) + 146 return PFN_UP(reg->base + reg->size);
82 memblock_size_pages(type, region_nr);
83} 147}
84 148
85#include <asm/memblock.h> 149#define for_each_memblock(memblock_type, region) \
150 for (region = memblock.memblock_type.regions; \
151 region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \
152 region++)
153
154
155#ifdef ARCH_DISCARD_MEMBLOCK
156#define __init_memblock __init
157#define __initdata_memblock __initdata
158#else
159#define __init_memblock
160#define __initdata_memblock
161#endif
162
163#endif /* CONFIG_HAVE_MEMBLOCK */
86 164
87#endif /* __KERNEL__ */ 165#endif /* __KERNEL__ */
88 166
diff --git a/include/linux/mfd/tc35892.h b/include/linux/mfd/tc35892.h
index e47f770d3068..eff3094ca84e 100644
--- a/include/linux/mfd/tc35892.h
+++ b/include/linux/mfd/tc35892.h
@@ -111,9 +111,13 @@ extern int tc35892_set_bits(struct tc35892 *tc35892, u8 reg, u8 mask, u8 val);
111 * struct tc35892_gpio_platform_data - TC35892 GPIO platform data 111 * struct tc35892_gpio_platform_data - TC35892 GPIO platform data
112 * @gpio_base: first gpio number assigned to TC35892. A maximum of 112 * @gpio_base: first gpio number assigned to TC35892. A maximum of
113 * %TC35892_NR_GPIOS GPIOs will be allocated. 113 * %TC35892_NR_GPIOS GPIOs will be allocated.
114 * @setup: callback for board-specific initialization
115 * @remove: callback for board-specific teardown
114 */ 116 */
115struct tc35892_gpio_platform_data { 117struct tc35892_gpio_platform_data {
116 int gpio_base; 118 int gpio_base;
119 void (*setup)(struct tc35892 *tc35892, unsigned gpio_base);
120 void (*remove)(struct tc35892 *tc35892, unsigned gpio_base);
117}; 121};
118 122
119/** 123/**
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 74949fbef8c6..7687228dd3b7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1175,6 +1175,8 @@ extern void free_bootmem_with_active_regions(int nid,
1175 unsigned long max_low_pfn); 1175 unsigned long max_low_pfn);
1176int add_from_early_node_map(struct range *range, int az, 1176int add_from_early_node_map(struct range *range, int az,
1177 int nr_range, int nid); 1177 int nr_range, int nid);
1178u64 __init find_memory_core_early(int nid, u64 size, u64 align,
1179 u64 goal, u64 limit);
1178void *__alloc_memory_core_early(int nodeid, u64 size, u64 align, 1180void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
1179 u64 goal, u64 limit); 1181 u64 goal, u64 limit);
1180typedef int (*work_fn_t)(unsigned long, unsigned long, void *); 1182typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ee7e258627f9..cb57d657ce4d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -299,7 +299,7 @@ struct mm_struct {
299 * new_owner->mm == mm 299 * new_owner->mm == mm
300 * new_owner->alloc_lock is held 300 * new_owner->alloc_lock is held
301 */ 301 */
302 struct task_struct *owner; 302 struct task_struct __rcu *owner;
303#endif 303#endif
304 304
305#ifdef CONFIG_PROC_FS 305#ifdef CONFIG_PROC_FS
diff --git a/include/linux/module.h b/include/linux/module.h
index 8a6b9fdc7ffa..b29e7458b966 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -350,7 +350,10 @@ struct module
350 struct tracepoint *tracepoints; 350 struct tracepoint *tracepoints;
351 unsigned int num_tracepoints; 351 unsigned int num_tracepoints;
352#endif 352#endif
353 353#ifdef HAVE_JUMP_LABEL
354 struct jump_entry *jump_entries;
355 unsigned int num_jump_entries;
356#endif
354#ifdef CONFIG_TRACING 357#ifdef CONFIG_TRACING
355 const char **trace_bprintk_fmt_start; 358 const char **trace_bprintk_fmt_start;
356 unsigned int num_trace_bprintk_fmt; 359 unsigned int num_trace_bprintk_fmt;
@@ -686,17 +689,16 @@ extern int module_sysfs_initialized;
686 689
687 690
688#ifdef CONFIG_GENERIC_BUG 691#ifdef CONFIG_GENERIC_BUG
689int module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, 692void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
690 struct module *); 693 struct module *);
691void module_bug_cleanup(struct module *); 694void module_bug_cleanup(struct module *);
692 695
693#else /* !CONFIG_GENERIC_BUG */ 696#else /* !CONFIG_GENERIC_BUG */
694 697
695static inline int module_bug_finalize(const Elf_Ehdr *hdr, 698static inline void module_bug_finalize(const Elf_Ehdr *hdr,
696 const Elf_Shdr *sechdrs, 699 const Elf_Shdr *sechdrs,
697 struct module *mod) 700 struct module *mod)
698{ 701{
699 return 0;
700} 702}
701static inline void module_bug_cleanup(struct module *mod) {} 703static inline void module_bug_cleanup(struct module *mod) {}
702#endif /* CONFIG_GENERIC_BUG */ 704#endif /* CONFIG_GENERIC_BUG */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 91b05c171854..05acced439a3 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -10,12 +10,13 @@ struct msi_msg {
10}; 10};
11 11
12/* Helper functions */ 12/* Helper functions */
13struct irq_desc; 13struct irq_data;
14extern void mask_msi_irq(unsigned int irq); 14struct msi_desc;
15extern void unmask_msi_irq(unsigned int irq); 15extern void mask_msi_irq(struct irq_data *data);
16extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); 16extern void unmask_msi_irq(struct irq_data *data);
17extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); 17extern void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
18extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); 18extern void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
19extern void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
19extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); 20extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
20extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); 21extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
21extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); 22extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 9ed534c991b9..70cd0603911c 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -39,8 +39,9 @@ enum ctattr_type {
39 CTA_TUPLE_MASTER, 39 CTA_TUPLE_MASTER,
40 CTA_NAT_SEQ_ADJ_ORIG, 40 CTA_NAT_SEQ_ADJ_ORIG,
41 CTA_NAT_SEQ_ADJ_REPLY, 41 CTA_NAT_SEQ_ADJ_REPLY,
42 CTA_SECMARK, 42 CTA_SECMARK, /* obsolete */
43 CTA_ZONE, 43 CTA_ZONE,
44 CTA_SECCTX,
44 __CTA_MAX 45 __CTA_MAX
45}; 46};
46#define CTA_MAX (__CTA_MAX - 1) 47#define CTA_MAX (__CTA_MAX - 1)
@@ -172,4 +173,11 @@ enum ctattr_help {
172}; 173};
173#define CTA_HELP_MAX (__CTA_HELP_MAX - 1) 174#define CTA_HELP_MAX (__CTA_HELP_MAX - 1)
174 175
176enum ctattr_secctx {
177 CTA_SECCTX_UNSPEC,
178 CTA_SECCTX_NAME,
179 __CTA_SECCTX_MAX
180};
181#define CTA_SECCTX_MAX (__CTA_SECCTX_MAX - 1)
182
175#endif /* _IPCONNTRACK_NETLINK_H */ 183#endif /* _IPCONNTRACK_NETLINK_H */
diff --git a/include/linux/netfilter/xt_SECMARK.h b/include/linux/netfilter/xt_SECMARK.h
index 6fcd3448b186..989092bd6274 100644
--- a/include/linux/netfilter/xt_SECMARK.h
+++ b/include/linux/netfilter/xt_SECMARK.h
@@ -11,18 +11,12 @@
11 * packets are being marked for. 11 * packets are being marked for.
12 */ 12 */
13#define SECMARK_MODE_SEL 0x01 /* SELinux */ 13#define SECMARK_MODE_SEL 0x01 /* SELinux */
14#define SECMARK_SELCTX_MAX 256 14#define SECMARK_SECCTX_MAX 256
15
16struct xt_secmark_target_selinux_info {
17 __u32 selsid;
18 char selctx[SECMARK_SELCTX_MAX];
19};
20 15
21struct xt_secmark_target_info { 16struct xt_secmark_target_info {
22 __u8 mode; 17 __u8 mode;
23 union { 18 __u32 secid;
24 struct xt_secmark_target_selinux_info sel; 19 char secctx[SECMARK_SECCTX_MAX];
25 } u;
26}; 20};
27 21
28#endif /*_XT_SECMARK_H_target */ 22#endif /*_XT_SECMARK_H_target */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 508f8cf6da37..d0edf7d823ae 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -185,7 +185,7 @@ struct nfs_inode {
185 struct nfs4_cached_acl *nfs4_acl; 185 struct nfs4_cached_acl *nfs4_acl;
186 /* NFSv4 state */ 186 /* NFSv4 state */
187 struct list_head open_states; 187 struct list_head open_states;
188 struct nfs_delegation *delegation; 188 struct nfs_delegation __rcu *delegation;
189 fmode_t delegation_state; 189 fmode_t delegation_state;
190 struct rw_semaphore rwsem; 190 struct rw_semaphore rwsem;
191#endif /* CONFIG_NFS_V4*/ 191#endif /* CONFIG_NFS_V4*/
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index b2f1a4d83550..2026f9e1ceb8 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -49,28 +49,28 @@
49 49
50struct notifier_block { 50struct notifier_block {
51 int (*notifier_call)(struct notifier_block *, unsigned long, void *); 51 int (*notifier_call)(struct notifier_block *, unsigned long, void *);
52 struct notifier_block *next; 52 struct notifier_block __rcu *next;
53 int priority; 53 int priority;
54}; 54};
55 55
56struct atomic_notifier_head { 56struct atomic_notifier_head {
57 spinlock_t lock; 57 spinlock_t lock;
58 struct notifier_block *head; 58 struct notifier_block __rcu *head;
59}; 59};
60 60
61struct blocking_notifier_head { 61struct blocking_notifier_head {
62 struct rw_semaphore rwsem; 62 struct rw_semaphore rwsem;
63 struct notifier_block *head; 63 struct notifier_block __rcu *head;
64}; 64};
65 65
66struct raw_notifier_head { 66struct raw_notifier_head {
67 struct notifier_block *head; 67 struct notifier_block __rcu *head;
68}; 68};
69 69
70struct srcu_notifier_head { 70struct srcu_notifier_head {
71 struct mutex mutex; 71 struct mutex mutex;
72 struct srcu_struct srcu; 72 struct srcu_struct srcu;
73 struct notifier_block *head; 73 struct notifier_block __rcu *head;
74}; 74};
75 75
76#define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ 76#define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \
diff --git a/include/linux/opp.h b/include/linux/opp.h
new file mode 100644
index 000000000000..5449945d589f
--- /dev/null
+++ b/include/linux/opp.h
@@ -0,0 +1,105 @@
1/*
2 * Generic OPP Interface
3 *
4 * Copyright (C) 2009-2010 Texas Instruments Incorporated.
5 * Nishanth Menon
6 * Romit Dasgupta
7 * Kevin Hilman
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#ifndef __LINUX_OPP_H__
15#define __LINUX_OPP_H__
16
17#include <linux/err.h>
18#include <linux/cpufreq.h>
19
20struct opp;
21
22#if defined(CONFIG_PM_OPP)
23
24unsigned long opp_get_voltage(struct opp *opp);
25
26unsigned long opp_get_freq(struct opp *opp);
27
28int opp_get_opp_count(struct device *dev);
29
30struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
31 bool available);
32
33struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq);
34
35struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq);
36
37int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt);
38
39int opp_enable(struct device *dev, unsigned long freq);
40
41int opp_disable(struct device *dev, unsigned long freq);
42
43#else
44static inline unsigned long opp_get_voltage(struct opp *opp)
45{
46 return 0;
47}
48
49static inline unsigned long opp_get_freq(struct opp *opp)
50{
51 return 0;
52}
53
54static inline int opp_get_opp_count(struct device *dev)
55{
56 return 0;
57}
58
59static inline struct opp *opp_find_freq_exact(struct device *dev,
60 unsigned long freq, bool available)
61{
62 return ERR_PTR(-EINVAL);
63}
64
65static inline struct opp *opp_find_freq_floor(struct device *dev,
66 unsigned long *freq)
67{
68 return ERR_PTR(-EINVAL);
69}
70
71static inline struct opp *opp_find_freq_ceil(struct device *dev,
72 unsigned long *freq)
73{
74 return ERR_PTR(-EINVAL);
75}
76
77static inline int opp_add(struct device *dev, unsigned long freq,
78 unsigned long u_volt)
79{
80 return -EINVAL;
81}
82
83static inline int opp_enable(struct device *dev, unsigned long freq)
84{
85 return 0;
86}
87
88static inline int opp_disable(struct device *dev, unsigned long freq)
89{
90 return 0;
91}
92#endif /* CONFIG_PM */
93
94#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)
95int opp_init_cpufreq_table(struct device *dev,
96 struct cpufreq_frequency_table **table);
97#else
98static inline int opp_init_cpufreq_table(struct device *dev,
99 struct cpufreq_frequency_table **table)
100{
101 return -EINVAL;
102}
103#endif /* CONFIG_CPU_FREQ */
104
105#endif /* __LINUX_OPP_H__ */
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 5171639ecf0f..32fb81212fd1 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -15,6 +15,7 @@
15 15
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <linux/init.h>
18#include <asm/atomic.h> 19#include <asm/atomic.h>
19 20
20/* Each escaped entry is prefixed by ESCAPE_CODE 21/* Each escaped entry is prefixed by ESCAPE_CODE
@@ -185,4 +186,10 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val);
185int oprofile_add_data64(struct op_entry *entry, u64 val); 186int oprofile_add_data64(struct op_entry *entry, u64 val);
186int oprofile_write_commit(struct op_entry *entry); 187int oprofile_write_commit(struct op_entry *entry);
187 188
189#ifdef CONFIG_PERF_EVENTS
190int __init oprofile_perf_init(struct oprofile_operations *ops);
191void oprofile_perf_exit(void);
192char *op_name_from_perf_id(void);
193#endif /* CONFIG_PERF_EVENTS */
194
188#endif /* OPROFILE_H */ 195#endif /* OPROFILE_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 570fddeb0388..dad30734432a 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -517,6 +517,7 @@
517#define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 517#define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302
518#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 518#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
519#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 519#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
520#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603
520#define PCI_DEVICE_ID_AMD_LANCE 0x2000 521#define PCI_DEVICE_ID_AMD_LANCE 0x2000
521#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 522#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
522#define PCI_DEVICE_ID_AMD_SCSI 0x2020 523#define PCI_DEVICE_ID_AMD_SCSI 0x2020
@@ -2315,6 +2316,14 @@
2315#define PCI_DEVICE_ID_P4080 0x0401 2316#define PCI_DEVICE_ID_P4080 0x0401
2316#define PCI_DEVICE_ID_P4040E 0x0408 2317#define PCI_DEVICE_ID_P4040E 0x0408
2317#define PCI_DEVICE_ID_P4040 0x0409 2318#define PCI_DEVICE_ID_P4040 0x0409
2319#define PCI_DEVICE_ID_P2040E 0x0410
2320#define PCI_DEVICE_ID_P2040 0x0411
2321#define PCI_DEVICE_ID_P3041E 0x041E
2322#define PCI_DEVICE_ID_P3041 0x041F
2323#define PCI_DEVICE_ID_P5020E 0x0420
2324#define PCI_DEVICE_ID_P5020 0x0421
2325#define PCI_DEVICE_ID_P5010E 0x0428
2326#define PCI_DEVICE_ID_P5010 0x0429
2318#define PCI_DEVICE_ID_MPC8641 0x7010 2327#define PCI_DEVICE_ID_MPC8641 0x7010
2319#define PCI_DEVICE_ID_MPC8641D 0x7011 2328#define PCI_DEVICE_ID_MPC8641D 0x7011
2320#define PCI_DEVICE_ID_MPC8610 0x7018 2329#define PCI_DEVICE_ID_MPC8610 0x7018
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index ce2dc655cd1d..018db9a62ffe 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -139,6 +139,27 @@
139 __aligned(PAGE_SIZE) 139 __aligned(PAGE_SIZE)
140 140
141/* 141/*
142 * Declaration/definition used for per-CPU variables that must be read mostly.
143 */
144#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \
145 DECLARE_PER_CPU_SECTION(type, name, "..readmostly")
146
147#define DEFINE_PER_CPU_READ_MOSTLY(type, name) \
148 DEFINE_PER_CPU_SECTION(type, name, "..readmostly")
149
150/*
151 * Declaration/definition used for large per-CPU variables that must be
152 * aligned to something larger than the pagesize.
153 */
154#define DECLARE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \
155 DECLARE_PER_CPU_SECTION(type, name, "..page_aligned") \
156 __aligned(size)
157
158#define DEFINE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \
159 DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \
160 __aligned(size)
161
162/*
142 * Intermodule exports for per-CPU variables. sparse forgets about 163 * Intermodule exports for per-CPU variables. sparse forgets about
143 * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to 164 * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to
144 * noop if __CHECKER__. 165 * noop if __CHECKER__.
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 49466b13c5c6..0eb50832aa00 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -39,6 +39,15 @@
39 preempt_enable(); \ 39 preempt_enable(); \
40} while (0) 40} while (0)
41 41
42#define get_cpu_ptr(var) ({ \
43 preempt_disable(); \
44 this_cpu_ptr(var); })
45
46#define put_cpu_ptr(var) do { \
47 (void)(var); \
48 preempt_enable(); \
49} while (0)
50
42#ifdef CONFIG_SMP 51#ifdef CONFIG_SMP
43 52
44/* minimum unit size, also is the maximum supported allocation size */ 53/* minimum unit size, also is the maximum supported allocation size */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 716f99b682c1..057bf22a8323 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -486,6 +486,8 @@ struct perf_guest_info_callbacks {
486#include <linux/workqueue.h> 486#include <linux/workqueue.h>
487#include <linux/ftrace.h> 487#include <linux/ftrace.h>
488#include <linux/cpu.h> 488#include <linux/cpu.h>
489#include <linux/irq_work.h>
490#include <linux/jump_label_ref.h>
489#include <asm/atomic.h> 491#include <asm/atomic.h>
490#include <asm/local.h> 492#include <asm/local.h>
491 493
@@ -529,16 +531,22 @@ struct hw_perf_event {
529 int last_cpu; 531 int last_cpu;
530 }; 532 };
531 struct { /* software */ 533 struct { /* software */
532 s64 remaining;
533 struct hrtimer hrtimer; 534 struct hrtimer hrtimer;
534 }; 535 };
535#ifdef CONFIG_HAVE_HW_BREAKPOINT 536#ifdef CONFIG_HAVE_HW_BREAKPOINT
536 struct { /* breakpoint */ 537 struct { /* breakpoint */
537 struct arch_hw_breakpoint info; 538 struct arch_hw_breakpoint info;
538 struct list_head bp_list; 539 struct list_head bp_list;
540 /*
541 * Crufty hack to avoid the chicken and egg
542 * problem hw_breakpoint has with context
543 * creation and event initalization.
544 */
545 struct task_struct *bp_target;
539 }; 546 };
540#endif 547#endif
541 }; 548 };
549 int state;
542 local64_t prev_count; 550 local64_t prev_count;
543 u64 sample_period; 551 u64 sample_period;
544 u64 last_period; 552 u64 last_period;
@@ -550,6 +558,13 @@ struct hw_perf_event {
550#endif 558#endif
551}; 559};
552 560
561/*
562 * hw_perf_event::state flags
563 */
564#define PERF_HES_STOPPED 0x01 /* the counter is stopped */
565#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */
566#define PERF_HES_ARCH 0x04
567
553struct perf_event; 568struct perf_event;
554 569
555/* 570/*
@@ -561,36 +576,70 @@ struct perf_event;
561 * struct pmu - generic performance monitoring unit 576 * struct pmu - generic performance monitoring unit
562 */ 577 */
563struct pmu { 578struct pmu {
564 int (*enable) (struct perf_event *event); 579 struct list_head entry;
565 void (*disable) (struct perf_event *event); 580
566 int (*start) (struct perf_event *event); 581 int * __percpu pmu_disable_count;
567 void (*stop) (struct perf_event *event); 582 struct perf_cpu_context * __percpu pmu_cpu_context;
568 void (*read) (struct perf_event *event); 583 int task_ctx_nr;
569 void (*unthrottle) (struct perf_event *event); 584
585 /*
586 * Fully disable/enable this PMU, can be used to protect from the PMI
587 * as well as for lazy/batch writing of the MSRs.
588 */
589 void (*pmu_enable) (struct pmu *pmu); /* optional */
590 void (*pmu_disable) (struct pmu *pmu); /* optional */
570 591
571 /* 592 /*
572 * Group events scheduling is treated as a transaction, add group 593 * Try and initialize the event for this PMU.
573 * events as a whole and perform one schedulability test. If the test 594 * Should return -ENOENT when the @event doesn't match this PMU.
574 * fails, roll back the whole group
575 */ 595 */
596 int (*event_init) (struct perf_event *event);
597
598#define PERF_EF_START 0x01 /* start the counter when adding */
599#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
600#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
576 601
577 /* 602 /*
578 * Start the transaction, after this ->enable() doesn't need 603 * Adds/Removes a counter to/from the PMU, can be done inside
579 * to do schedulability tests. 604 * a transaction, see the ->*_txn() methods.
580 */ 605 */
581 void (*start_txn) (const struct pmu *pmu); 606 int (*add) (struct perf_event *event, int flags);
607 void (*del) (struct perf_event *event, int flags);
608
582 /* 609 /*
583 * If ->start_txn() disabled the ->enable() schedulability test 610 * Starts/Stops a counter present on the PMU. The PMI handler
611 * should stop the counter when perf_event_overflow() returns
612 * !0. ->start() will be used to continue.
613 */
614 void (*start) (struct perf_event *event, int flags);
615 void (*stop) (struct perf_event *event, int flags);
616
617 /*
618 * Updates the counter value of the event.
619 */
620 void (*read) (struct perf_event *event);
621
622 /*
623 * Group events scheduling is treated as a transaction, add
624 * group events as a whole and perform one schedulability test.
625 * If the test fails, roll back the whole group
626 *
627 * Start the transaction, after this ->add() doesn't need to
628 * do schedulability tests.
629 */
630 void (*start_txn) (struct pmu *pmu); /* optional */
631 /*
632 * If ->start_txn() disabled the ->add() schedulability test
584 * then ->commit_txn() is required to perform one. On success 633 * then ->commit_txn() is required to perform one. On success
585 * the transaction is closed. On error the transaction is kept 634 * the transaction is closed. On error the transaction is kept
586 * open until ->cancel_txn() is called. 635 * open until ->cancel_txn() is called.
587 */ 636 */
588 int (*commit_txn) (const struct pmu *pmu); 637 int (*commit_txn) (struct pmu *pmu); /* optional */
589 /* 638 /*
590 * Will cancel the transaction, assumes ->disable() is called for 639 * Will cancel the transaction, assumes ->del() is called
591 * each successfull ->enable() during the transaction. 640 * for each successfull ->add() during the transaction.
592 */ 641 */
593 void (*cancel_txn) (const struct pmu *pmu); 642 void (*cancel_txn) (struct pmu *pmu); /* optional */
594}; 643};
595 644
596/** 645/**
@@ -631,11 +680,6 @@ struct perf_buffer {
631 void *data_pages[0]; 680 void *data_pages[0];
632}; 681};
633 682
634struct perf_pending_entry {
635 struct perf_pending_entry *next;
636 void (*func)(struct perf_pending_entry *);
637};
638
639struct perf_sample_data; 683struct perf_sample_data;
640 684
641typedef void (*perf_overflow_handler_t)(struct perf_event *, int, 685typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
@@ -656,6 +700,7 @@ struct swevent_hlist {
656 700
657#define PERF_ATTACH_CONTEXT 0x01 701#define PERF_ATTACH_CONTEXT 0x01
658#define PERF_ATTACH_GROUP 0x02 702#define PERF_ATTACH_GROUP 0x02
703#define PERF_ATTACH_TASK 0x04
659 704
660/** 705/**
661 * struct perf_event - performance event kernel representation: 706 * struct perf_event - performance event kernel representation:
@@ -669,7 +714,7 @@ struct perf_event {
669 int nr_siblings; 714 int nr_siblings;
670 int group_flags; 715 int group_flags;
671 struct perf_event *group_leader; 716 struct perf_event *group_leader;
672 const struct pmu *pmu; 717 struct pmu *pmu;
673 718
674 enum perf_event_active_state state; 719 enum perf_event_active_state state;
675 unsigned int attach_state; 720 unsigned int attach_state;
@@ -743,7 +788,7 @@ struct perf_event {
743 int pending_wakeup; 788 int pending_wakeup;
744 int pending_kill; 789 int pending_kill;
745 int pending_disable; 790 int pending_disable;
746 struct perf_pending_entry pending; 791 struct irq_work pending;
747 792
748 atomic_t event_limit; 793 atomic_t event_limit;
749 794
@@ -763,12 +808,19 @@ struct perf_event {
763#endif /* CONFIG_PERF_EVENTS */ 808#endif /* CONFIG_PERF_EVENTS */
764}; 809};
765 810
811enum perf_event_context_type {
812 task_context,
813 cpu_context,
814};
815
766/** 816/**
767 * struct perf_event_context - event context structure 817 * struct perf_event_context - event context structure
768 * 818 *
769 * Used as a container for task events and CPU events as well: 819 * Used as a container for task events and CPU events as well:
770 */ 820 */
771struct perf_event_context { 821struct perf_event_context {
822 enum perf_event_context_type type;
823 struct pmu *pmu;
772 /* 824 /*
773 * Protect the states of the events in the list, 825 * Protect the states of the events in the list,
774 * nr_active, and the list: 826 * nr_active, and the list:
@@ -808,6 +860,12 @@ struct perf_event_context {
808 struct rcu_head rcu_head; 860 struct rcu_head rcu_head;
809}; 861};
810 862
863/*
864 * Number of contexts where an event can trigger:
865 * task, softirq, hardirq, nmi.
866 */
867#define PERF_NR_CONTEXTS 4
868
811/** 869/**
812 * struct perf_event_cpu_context - per cpu event context structure 870 * struct perf_event_cpu_context - per cpu event context structure
813 */ 871 */
@@ -815,18 +873,9 @@ struct perf_cpu_context {
815 struct perf_event_context ctx; 873 struct perf_event_context ctx;
816 struct perf_event_context *task_ctx; 874 struct perf_event_context *task_ctx;
817 int active_oncpu; 875 int active_oncpu;
818 int max_pertask;
819 int exclusive; 876 int exclusive;
820 struct swevent_hlist *swevent_hlist; 877 struct list_head rotation_list;
821 struct mutex hlist_mutex; 878 int jiffies_interval;
822 int hlist_refcount;
823
824 /*
825 * Recursion avoidance:
826 *
827 * task, softirq, irq, nmi context
828 */
829 int recursion[4];
830}; 879};
831 880
832struct perf_output_handle { 881struct perf_output_handle {
@@ -842,26 +891,34 @@ struct perf_output_handle {
842 891
843#ifdef CONFIG_PERF_EVENTS 892#ifdef CONFIG_PERF_EVENTS
844 893
845/* 894extern int perf_pmu_register(struct pmu *pmu);
846 * Set by architecture code: 895extern void perf_pmu_unregister(struct pmu *pmu);
847 */ 896
848extern int perf_max_events; 897extern int perf_num_counters(void);
898extern const char *perf_pmu_name(void);
899extern void __perf_event_task_sched_in(struct task_struct *task);
900extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
849 901
850extern const struct pmu *hw_perf_event_init(struct perf_event *event); 902extern atomic_t perf_task_events;
903
904static inline void perf_event_task_sched_in(struct task_struct *task)
905{
906 COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
907}
908
909static inline
910void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
911{
912 COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
913}
851 914
852extern void perf_event_task_sched_in(struct task_struct *task);
853extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
854extern void perf_event_task_tick(struct task_struct *task);
855extern int perf_event_init_task(struct task_struct *child); 915extern int perf_event_init_task(struct task_struct *child);
856extern void perf_event_exit_task(struct task_struct *child); 916extern void perf_event_exit_task(struct task_struct *child);
857extern void perf_event_free_task(struct task_struct *task); 917extern void perf_event_free_task(struct task_struct *task);
858extern void set_perf_event_pending(void); 918extern void perf_event_delayed_put(struct task_struct *task);
859extern void perf_event_do_pending(void);
860extern void perf_event_print_debug(void); 919extern void perf_event_print_debug(void);
861extern void __perf_disable(void); 920extern void perf_pmu_disable(struct pmu *pmu);
862extern bool __perf_enable(void); 921extern void perf_pmu_enable(struct pmu *pmu);
863extern void perf_disable(void);
864extern void perf_enable(void);
865extern int perf_event_task_disable(void); 922extern int perf_event_task_disable(void);
866extern int perf_event_task_enable(void); 923extern int perf_event_task_enable(void);
867extern void perf_event_update_userpage(struct perf_event *event); 924extern void perf_event_update_userpage(struct perf_event *event);
@@ -869,7 +926,7 @@ extern int perf_event_release_kernel(struct perf_event *event);
869extern struct perf_event * 926extern struct perf_event *
870perf_event_create_kernel_counter(struct perf_event_attr *attr, 927perf_event_create_kernel_counter(struct perf_event_attr *attr,
871 int cpu, 928 int cpu,
872 pid_t pid, 929 struct task_struct *task,
873 perf_overflow_handler_t callback); 930 perf_overflow_handler_t callback);
874extern u64 perf_event_read_value(struct perf_event *event, 931extern u64 perf_event_read_value(struct perf_event *event,
875 u64 *enabled, u64 *running); 932 u64 *enabled, u64 *running);
@@ -920,14 +977,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
920 */ 977 */
921static inline int is_software_event(struct perf_event *event) 978static inline int is_software_event(struct perf_event *event)
922{ 979{
923 switch (event->attr.type) { 980 return event->pmu->task_ctx_nr == perf_sw_context;
924 case PERF_TYPE_SOFTWARE:
925 case PERF_TYPE_TRACEPOINT:
926 /* for now the breakpoint stuff also works as software event */
927 case PERF_TYPE_BREAKPOINT:
928 return 1;
929 }
930 return 0;
931} 981}
932 982
933extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 983extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -954,18 +1004,20 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs)
954 perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); 1004 perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
955} 1005}
956 1006
957static inline void 1007static __always_inline void
958perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) 1008perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
959{ 1009{
960 if (atomic_read(&perf_swevent_enabled[event_id])) { 1010 struct pt_regs hot_regs;
961 struct pt_regs hot_regs; 1011
962 1012 JUMP_LABEL(&perf_swevent_enabled[event_id], have_event);
963 if (!regs) { 1013 return;
964 perf_fetch_caller_regs(&hot_regs); 1014
965 regs = &hot_regs; 1015have_event:
966 } 1016 if (!regs) {
967 __perf_sw_event(event_id, nr, nmi, regs, addr); 1017 perf_fetch_caller_regs(&hot_regs);
1018 regs = &hot_regs;
968 } 1019 }
1020 __perf_sw_event(event_id, nr, nmi, regs, addr);
969} 1021}
970 1022
971extern void perf_event_mmap(struct vm_area_struct *vma); 1023extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -976,7 +1028,21 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
976extern void perf_event_comm(struct task_struct *tsk); 1028extern void perf_event_comm(struct task_struct *tsk);
977extern void perf_event_fork(struct task_struct *tsk); 1029extern void perf_event_fork(struct task_struct *tsk);
978 1030
979extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); 1031/* Callchains */
1032DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
1033
1034extern void perf_callchain_user(struct perf_callchain_entry *entry,
1035 struct pt_regs *regs);
1036extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
1037 struct pt_regs *regs);
1038
1039
1040static inline void
1041perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
1042{
1043 if (entry->nr < PERF_MAX_STACK_DEPTH)
1044 entry->ip[entry->nr++] = ip;
1045}
980 1046
981extern int sysctl_perf_event_paranoid; 1047extern int sysctl_perf_event_paranoid;
982extern int sysctl_perf_event_mlock; 1048extern int sysctl_perf_event_mlock;
@@ -1019,21 +1085,18 @@ extern int perf_swevent_get_recursion_context(void);
1019extern void perf_swevent_put_recursion_context(int rctx); 1085extern void perf_swevent_put_recursion_context(int rctx);
1020extern void perf_event_enable(struct perf_event *event); 1086extern void perf_event_enable(struct perf_event *event);
1021extern void perf_event_disable(struct perf_event *event); 1087extern void perf_event_disable(struct perf_event *event);
1088extern void perf_event_task_tick(void);
1022#else 1089#else
1023static inline void 1090static inline void
1024perf_event_task_sched_in(struct task_struct *task) { } 1091perf_event_task_sched_in(struct task_struct *task) { }
1025static inline void 1092static inline void
1026perf_event_task_sched_out(struct task_struct *task, 1093perf_event_task_sched_out(struct task_struct *task,
1027 struct task_struct *next) { } 1094 struct task_struct *next) { }
1028static inline void
1029perf_event_task_tick(struct task_struct *task) { }
1030static inline int perf_event_init_task(struct task_struct *child) { return 0; } 1095static inline int perf_event_init_task(struct task_struct *child) { return 0; }
1031static inline void perf_event_exit_task(struct task_struct *child) { } 1096static inline void perf_event_exit_task(struct task_struct *child) { }
1032static inline void perf_event_free_task(struct task_struct *task) { } 1097static inline void perf_event_free_task(struct task_struct *task) { }
1033static inline void perf_event_do_pending(void) { } 1098static inline void perf_event_delayed_put(struct task_struct *task) { }
1034static inline void perf_event_print_debug(void) { } 1099static inline void perf_event_print_debug(void) { }
1035static inline void perf_disable(void) { }
1036static inline void perf_enable(void) { }
1037static inline int perf_event_task_disable(void) { return -EINVAL; } 1100static inline int perf_event_task_disable(void) { return -EINVAL; }
1038static inline int perf_event_task_enable(void) { return -EINVAL; } 1101static inline int perf_event_task_enable(void) { return -EINVAL; }
1039 1102
@@ -1056,6 +1119,7 @@ static inline int perf_swevent_get_recursion_context(void) { return -1; }
1056static inline void perf_swevent_put_recursion_context(int rctx) { } 1119static inline void perf_swevent_put_recursion_context(int rctx) { }
1057static inline void perf_event_enable(struct perf_event *event) { } 1120static inline void perf_event_enable(struct perf_event *event) { }
1058static inline void perf_event_disable(struct perf_event *event) { } 1121static inline void perf_event_disable(struct perf_event *event) { }
1122static inline void perf_event_task_tick(void) { }
1059#endif 1123#endif
1060 1124
1061#define perf_output_put(handle, x) \ 1125#define perf_output_put(handle, x) \
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 52e8c55ff314..40f3f45702ba 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -41,6 +41,12 @@ extern void (*pm_power_off_prepare)(void);
41 41
42struct device; 42struct device;
43 43
44#ifdef CONFIG_PM
45extern const char power_group_name[]; /* = "power" */
46#else
47#define power_group_name NULL
48#endif
49
44typedef struct pm_message { 50typedef struct pm_message {
45 int event; 51 int event;
46} pm_message_t; 52} pm_message_t;
@@ -438,6 +444,9 @@ enum rpm_status {
438 * 444 *
439 * RPM_REQ_SUSPEND Run the device bus type's ->runtime_suspend() callback 445 * RPM_REQ_SUSPEND Run the device bus type's ->runtime_suspend() callback
440 * 446 *
447 * RPM_REQ_AUTOSUSPEND Same as RPM_REQ_SUSPEND, but not until the device has
448 * been inactive for as long as power.autosuspend_delay
449 *
441 * RPM_REQ_RESUME Run the device bus type's ->runtime_resume() callback 450 * RPM_REQ_RESUME Run the device bus type's ->runtime_resume() callback
442 */ 451 */
443 452
@@ -445,26 +454,28 @@ enum rpm_request {
445 RPM_REQ_NONE = 0, 454 RPM_REQ_NONE = 0,
446 RPM_REQ_IDLE, 455 RPM_REQ_IDLE,
447 RPM_REQ_SUSPEND, 456 RPM_REQ_SUSPEND,
457 RPM_REQ_AUTOSUSPEND,
448 RPM_REQ_RESUME, 458 RPM_REQ_RESUME,
449}; 459};
450 460
461struct wakeup_source;
462
451struct dev_pm_info { 463struct dev_pm_info {
452 pm_message_t power_state; 464 pm_message_t power_state;
453 unsigned int can_wakeup:1; 465 unsigned int can_wakeup:1;
454 unsigned int should_wakeup:1;
455 unsigned async_suspend:1; 466 unsigned async_suspend:1;
456 enum dpm_state status; /* Owned by the PM core */ 467 enum dpm_state status; /* Owned by the PM core */
468 spinlock_t lock;
457#ifdef CONFIG_PM_SLEEP 469#ifdef CONFIG_PM_SLEEP
458 struct list_head entry; 470 struct list_head entry;
459 struct completion completion; 471 struct completion completion;
460 unsigned long wakeup_count; 472 struct wakeup_source *wakeup;
461#endif 473#endif
462#ifdef CONFIG_PM_RUNTIME 474#ifdef CONFIG_PM_RUNTIME
463 struct timer_list suspend_timer; 475 struct timer_list suspend_timer;
464 unsigned long timer_expires; 476 unsigned long timer_expires;
465 struct work_struct work; 477 struct work_struct work;
466 wait_queue_head_t wait_queue; 478 wait_queue_head_t wait_queue;
467 spinlock_t lock;
468 atomic_t usage_count; 479 atomic_t usage_count;
469 atomic_t child_count; 480 atomic_t child_count;
470 unsigned int disable_depth:3; 481 unsigned int disable_depth:3;
@@ -474,9 +485,14 @@ struct dev_pm_info {
474 unsigned int deferred_resume:1; 485 unsigned int deferred_resume:1;
475 unsigned int run_wake:1; 486 unsigned int run_wake:1;
476 unsigned int runtime_auto:1; 487 unsigned int runtime_auto:1;
488 unsigned int no_callbacks:1;
489 unsigned int use_autosuspend:1;
490 unsigned int timer_autosuspends:1;
477 enum rpm_request request; 491 enum rpm_request request;
478 enum rpm_status runtime_status; 492 enum rpm_status runtime_status;
479 int runtime_error; 493 int runtime_error;
494 int autosuspend_delay;
495 unsigned long last_busy;
480 unsigned long active_jiffies; 496 unsigned long active_jiffies;
481 unsigned long suspended_jiffies; 497 unsigned long suspended_jiffies;
482 unsigned long accounting_timestamp; 498 unsigned long accounting_timestamp;
@@ -558,12 +574,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
558 __suspend_report_result(__func__, fn, ret); \ 574 __suspend_report_result(__func__, fn, ret); \
559 } while (0) 575 } while (0)
560 576
561extern void device_pm_wait_for_dev(struct device *sub, struct device *dev); 577extern int device_pm_wait_for_dev(struct device *sub, struct device *dev);
562
563/* drivers/base/power/wakeup.c */
564extern void pm_wakeup_event(struct device *dev, unsigned int msec);
565extern void pm_stay_awake(struct device *dev);
566extern void pm_relax(void);
567#else /* !CONFIG_PM_SLEEP */ 578#else /* !CONFIG_PM_SLEEP */
568 579
569#define device_pm_lock() do {} while (0) 580#define device_pm_lock() do {} while (0)
@@ -576,11 +587,10 @@ static inline int dpm_suspend_start(pm_message_t state)
576 587
577#define suspend_report_result(fn, ret) do {} while (0) 588#define suspend_report_result(fn, ret) do {} while (0)
578 589
579static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {} 590static inline int device_pm_wait_for_dev(struct device *a, struct device *b)
580 591{
581static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} 592 return 0;
582static inline void pm_stay_awake(struct device *dev) {} 593}
583static inline void pm_relax(void) {}
584#endif /* !CONFIG_PM_SLEEP */ 594#endif /* !CONFIG_PM_SLEEP */
585 595
586/* How to reorder dpm_list after device_move() */ 596/* How to reorder dpm_list after device_move() */
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 6e81888c6222..3ec2358f8692 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -12,18 +12,24 @@
12#include <linux/device.h> 12#include <linux/device.h>
13#include <linux/pm.h> 13#include <linux/pm.h>
14 14
15#include <linux/jiffies.h>
16
17/* Runtime PM flag argument bits */
18#define RPM_ASYNC 0x01 /* Request is asynchronous */
19#define RPM_NOWAIT 0x02 /* Don't wait for concurrent
20 state change */
21#define RPM_GET_PUT 0x04 /* Increment/decrement the
22 usage_count */
23#define RPM_AUTO 0x08 /* Use autosuspend_delay */
24
15#ifdef CONFIG_PM_RUNTIME 25#ifdef CONFIG_PM_RUNTIME
16 26
17extern struct workqueue_struct *pm_wq; 27extern struct workqueue_struct *pm_wq;
18 28
19extern int pm_runtime_idle(struct device *dev); 29extern int __pm_runtime_idle(struct device *dev, int rpmflags);
20extern int pm_runtime_suspend(struct device *dev); 30extern int __pm_runtime_suspend(struct device *dev, int rpmflags);
21extern int pm_runtime_resume(struct device *dev); 31extern int __pm_runtime_resume(struct device *dev, int rpmflags);
22extern int pm_request_idle(struct device *dev);
23extern int pm_schedule_suspend(struct device *dev, unsigned int delay); 32extern int pm_schedule_suspend(struct device *dev, unsigned int delay);
24extern int pm_request_resume(struct device *dev);
25extern int __pm_runtime_get(struct device *dev, bool sync);
26extern int __pm_runtime_put(struct device *dev, bool sync);
27extern int __pm_runtime_set_status(struct device *dev, unsigned int status); 33extern int __pm_runtime_set_status(struct device *dev, unsigned int status);
28extern int pm_runtime_barrier(struct device *dev); 34extern int pm_runtime_barrier(struct device *dev);
29extern void pm_runtime_enable(struct device *dev); 35extern void pm_runtime_enable(struct device *dev);
@@ -33,6 +39,10 @@ extern void pm_runtime_forbid(struct device *dev);
33extern int pm_generic_runtime_idle(struct device *dev); 39extern int pm_generic_runtime_idle(struct device *dev);
34extern int pm_generic_runtime_suspend(struct device *dev); 40extern int pm_generic_runtime_suspend(struct device *dev);
35extern int pm_generic_runtime_resume(struct device *dev); 41extern int pm_generic_runtime_resume(struct device *dev);
42extern void pm_runtime_no_callbacks(struct device *dev);
43extern void __pm_runtime_use_autosuspend(struct device *dev, bool use);
44extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
45extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
36 46
37static inline bool pm_children_suspended(struct device *dev) 47static inline bool pm_children_suspended(struct device *dev)
38{ 48{
@@ -70,19 +80,29 @@ static inline bool pm_runtime_suspended(struct device *dev)
70 return dev->power.runtime_status == RPM_SUSPENDED; 80 return dev->power.runtime_status == RPM_SUSPENDED;
71} 81}
72 82
83static inline void pm_runtime_mark_last_busy(struct device *dev)
84{
85 ACCESS_ONCE(dev->power.last_busy) = jiffies;
86}
87
73#else /* !CONFIG_PM_RUNTIME */ 88#else /* !CONFIG_PM_RUNTIME */
74 89
75static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; } 90static inline int __pm_runtime_idle(struct device *dev, int rpmflags)
76static inline int pm_runtime_suspend(struct device *dev) { return -ENOSYS; } 91{
77static inline int pm_runtime_resume(struct device *dev) { return 0; } 92 return -ENOSYS;
78static inline int pm_request_idle(struct device *dev) { return -ENOSYS; } 93}
94static inline int __pm_runtime_suspend(struct device *dev, int rpmflags)
95{
96 return -ENOSYS;
97}
98static inline int __pm_runtime_resume(struct device *dev, int rpmflags)
99{
100 return 1;
101}
79static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) 102static inline int pm_schedule_suspend(struct device *dev, unsigned int delay)
80{ 103{
81 return -ENOSYS; 104 return -ENOSYS;
82} 105}
83static inline int pm_request_resume(struct device *dev) { return 0; }
84static inline int __pm_runtime_get(struct device *dev, bool sync) { return 1; }
85static inline int __pm_runtime_put(struct device *dev, bool sync) { return 0; }
86static inline int __pm_runtime_set_status(struct device *dev, 106static inline int __pm_runtime_set_status(struct device *dev,
87 unsigned int status) { return 0; } 107 unsigned int status) { return 0; }
88static inline int pm_runtime_barrier(struct device *dev) { return 0; } 108static inline int pm_runtime_barrier(struct device *dev) { return 0; }
@@ -102,27 +122,82 @@ static inline bool pm_runtime_suspended(struct device *dev) { return false; }
102static inline int pm_generic_runtime_idle(struct device *dev) { return 0; } 122static inline int pm_generic_runtime_idle(struct device *dev) { return 0; }
103static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } 123static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
104static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } 124static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
125static inline void pm_runtime_no_callbacks(struct device *dev) {}
126
127static inline void pm_runtime_mark_last_busy(struct device *dev) {}
128static inline void __pm_runtime_use_autosuspend(struct device *dev,
129 bool use) {}
130static inline void pm_runtime_set_autosuspend_delay(struct device *dev,
131 int delay) {}
132static inline unsigned long pm_runtime_autosuspend_expiration(
133 struct device *dev) { return 0; }
105 134
106#endif /* !CONFIG_PM_RUNTIME */ 135#endif /* !CONFIG_PM_RUNTIME */
107 136
137static inline int pm_runtime_idle(struct device *dev)
138{
139 return __pm_runtime_idle(dev, 0);
140}
141
142static inline int pm_runtime_suspend(struct device *dev)
143{
144 return __pm_runtime_suspend(dev, 0);
145}
146
147static inline int pm_runtime_autosuspend(struct device *dev)
148{
149 return __pm_runtime_suspend(dev, RPM_AUTO);
150}
151
152static inline int pm_runtime_resume(struct device *dev)
153{
154 return __pm_runtime_resume(dev, 0);
155}
156
157static inline int pm_request_idle(struct device *dev)
158{
159 return __pm_runtime_idle(dev, RPM_ASYNC);
160}
161
162static inline int pm_request_resume(struct device *dev)
163{
164 return __pm_runtime_resume(dev, RPM_ASYNC);
165}
166
167static inline int pm_request_autosuspend(struct device *dev)
168{
169 return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO);
170}
171
108static inline int pm_runtime_get(struct device *dev) 172static inline int pm_runtime_get(struct device *dev)
109{ 173{
110 return __pm_runtime_get(dev, false); 174 return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC);
111} 175}
112 176
113static inline int pm_runtime_get_sync(struct device *dev) 177static inline int pm_runtime_get_sync(struct device *dev)
114{ 178{
115 return __pm_runtime_get(dev, true); 179 return __pm_runtime_resume(dev, RPM_GET_PUT);
116} 180}
117 181
118static inline int pm_runtime_put(struct device *dev) 182static inline int pm_runtime_put(struct device *dev)
119{ 183{
120 return __pm_runtime_put(dev, false); 184 return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
185}
186
187static inline int pm_runtime_put_autosuspend(struct device *dev)
188{
189 return __pm_runtime_suspend(dev,
190 RPM_GET_PUT | RPM_ASYNC | RPM_AUTO);
121} 191}
122 192
123static inline int pm_runtime_put_sync(struct device *dev) 193static inline int pm_runtime_put_sync(struct device *dev)
124{ 194{
125 return __pm_runtime_put(dev, true); 195 return __pm_runtime_idle(dev, RPM_GET_PUT);
196}
197
198static inline int pm_runtime_put_sync_autosuspend(struct device *dev)
199{
200 return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO);
126} 201}
127 202
128static inline int pm_runtime_set_active(struct device *dev) 203static inline int pm_runtime_set_active(struct device *dev)
@@ -140,4 +215,14 @@ static inline void pm_runtime_disable(struct device *dev)
140 __pm_runtime_disable(dev, true); 215 __pm_runtime_disable(dev, true);
141} 216}
142 217
218static inline void pm_runtime_use_autosuspend(struct device *dev)
219{
220 __pm_runtime_use_autosuspend(dev, true);
221}
222
223static inline void pm_runtime_dont_use_autosuspend(struct device *dev)
224{
225 __pm_runtime_use_autosuspend(dev, false);
226}
227
143#endif 228#endif
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 76aca48722ae..9cff00dd6b63 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -2,6 +2,7 @@
2 * pm_wakeup.h - Power management wakeup interface 2 * pm_wakeup.h - Power management wakeup interface
3 * 3 *
4 * Copyright (C) 2008 Alan Stern 4 * Copyright (C) 2008 Alan Stern
5 * Copyright (C) 2010 Rafael J. Wysocki, Novell Inc.
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
@@ -27,19 +28,77 @@
27 28
28#include <linux/types.h> 29#include <linux/types.h>
29 30
30#ifdef CONFIG_PM 31/**
31 32 * struct wakeup_source - Representation of wakeup sources
32/* Changes to device_may_wakeup take effect on the next pm state change.
33 * 33 *
34 * By default, most devices should leave wakeup disabled. The exceptions 34 * @total_time: Total time this wakeup source has been active.
35 * are devices that everyone expects to be wakeup sources: keyboards, 35 * @max_time: Maximum time this wakeup source has been continuously active.
36 * power buttons, possibly network interfaces, etc. 36 * @last_time: Monotonic clock when the wakeup source's was activated last time.
37 * @event_count: Number of signaled wakeup events.
38 * @active_count: Number of times the wakeup sorce was activated.
39 * @relax_count: Number of times the wakeup sorce was deactivated.
40 * @hit_count: Number of times the wakeup sorce might abort system suspend.
41 * @active: Status of the wakeup source.
37 */ 42 */
38static inline void device_init_wakeup(struct device *dev, bool val) 43struct wakeup_source {
44 char *name;
45 struct list_head entry;
46 spinlock_t lock;
47 struct timer_list timer;
48 unsigned long timer_expires;
49 ktime_t total_time;
50 ktime_t max_time;
51 ktime_t last_time;
52 unsigned long event_count;
53 unsigned long active_count;
54 unsigned long relax_count;
55 unsigned long hit_count;
56 unsigned int active:1;
57};
58
59#ifdef CONFIG_PM_SLEEP
60
61/*
62 * Changes to device_may_wakeup take effect on the next pm state change.
63 */
64
65static inline void device_set_wakeup_capable(struct device *dev, bool capable)
66{
67 dev->power.can_wakeup = capable;
68}
69
70static inline bool device_can_wakeup(struct device *dev)
71{
72 return dev->power.can_wakeup;
73}
74
75
76
77static inline bool device_may_wakeup(struct device *dev)
39{ 78{
40 dev->power.can_wakeup = dev->power.should_wakeup = val; 79 return dev->power.can_wakeup && !!dev->power.wakeup;
41} 80}
42 81
82/* drivers/base/power/wakeup.c */
83extern struct wakeup_source *wakeup_source_create(const char *name);
84extern void wakeup_source_destroy(struct wakeup_source *ws);
85extern void wakeup_source_add(struct wakeup_source *ws);
86extern void wakeup_source_remove(struct wakeup_source *ws);
87extern struct wakeup_source *wakeup_source_register(const char *name);
88extern void wakeup_source_unregister(struct wakeup_source *ws);
89extern int device_wakeup_enable(struct device *dev);
90extern int device_wakeup_disable(struct device *dev);
91extern int device_init_wakeup(struct device *dev, bool val);
92extern int device_set_wakeup_enable(struct device *dev, bool enable);
93extern void __pm_stay_awake(struct wakeup_source *ws);
94extern void pm_stay_awake(struct device *dev);
95extern void __pm_relax(struct wakeup_source *ws);
96extern void pm_relax(struct device *dev);
97extern void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec);
98extern void pm_wakeup_event(struct device *dev, unsigned int msec);
99
100#else /* !CONFIG_PM_SLEEP */
101
43static inline void device_set_wakeup_capable(struct device *dev, bool capable) 102static inline void device_set_wakeup_capable(struct device *dev, bool capable)
44{ 103{
45 dev->power.can_wakeup = capable; 104 dev->power.can_wakeup = capable;
@@ -50,43 +109,63 @@ static inline bool device_can_wakeup(struct device *dev)
50 return dev->power.can_wakeup; 109 return dev->power.can_wakeup;
51} 110}
52 111
53static inline void device_set_wakeup_enable(struct device *dev, bool enable) 112static inline bool device_may_wakeup(struct device *dev)
54{ 113{
55 dev->power.should_wakeup = enable; 114 return false;
56} 115}
57 116
58static inline bool device_may_wakeup(struct device *dev) 117static inline struct wakeup_source *wakeup_source_create(const char *name)
59{ 118{
60 return dev->power.can_wakeup && dev->power.should_wakeup; 119 return NULL;
61} 120}
62 121
63#else /* !CONFIG_PM */ 122static inline void wakeup_source_destroy(struct wakeup_source *ws) {}
123
124static inline void wakeup_source_add(struct wakeup_source *ws) {}
64 125
65/* For some reason the following routines work even without CONFIG_PM */ 126static inline void wakeup_source_remove(struct wakeup_source *ws) {}
66static inline void device_init_wakeup(struct device *dev, bool val) 127
128static inline struct wakeup_source *wakeup_source_register(const char *name)
67{ 129{
68 dev->power.can_wakeup = val; 130 return NULL;
69} 131}
70 132
71static inline void device_set_wakeup_capable(struct device *dev, bool capable) 133static inline void wakeup_source_unregister(struct wakeup_source *ws) {}
134
135static inline int device_wakeup_enable(struct device *dev)
72{ 136{
73 dev->power.can_wakeup = capable; 137 return -EINVAL;
74} 138}
75 139
76static inline bool device_can_wakeup(struct device *dev) 140static inline int device_wakeup_disable(struct device *dev)
77{ 141{
78 return dev->power.can_wakeup; 142 return 0;
79} 143}
80 144
81static inline void device_set_wakeup_enable(struct device *dev, bool enable) 145static inline int device_init_wakeup(struct device *dev, bool val)
82{ 146{
147 dev->power.can_wakeup = val;
148 return val ? -EINVAL : 0;
83} 149}
84 150
85static inline bool device_may_wakeup(struct device *dev) 151
152static inline int device_set_wakeup_enable(struct device *dev, bool enable)
86{ 153{
87 return false; 154 return -EINVAL;
88} 155}
89 156
90#endif /* !CONFIG_PM */ 157static inline void __pm_stay_awake(struct wakeup_source *ws) {}
158
159static inline void pm_stay_awake(struct device *dev) {}
160
161static inline void __pm_relax(struct wakeup_source *ws) {}
162
163static inline void pm_relax(struct device *dev) {}
164
165static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) {}
166
167static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {}
168
169#endif /* !CONFIG_PM_SLEEP */
91 170
92#endif /* _LINUX_PM_WAKEUP_H */ 171#endif /* _LINUX_PM_WAKEUP_H */
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 634b8e674ac5..a39cbed9ee17 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr)
47{ 47{
48 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); 48 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
49} 49}
50#define radix_tree_indirect_to_ptr(ptr) \
51 radix_tree_indirect_to_ptr((void __force *)(ptr))
50 52
51static inline int radix_tree_is_indirect_ptr(void *ptr) 53static inline int radix_tree_is_indirect_ptr(void *ptr)
52{ 54{
@@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr)
61struct radix_tree_root { 63struct radix_tree_root {
62 unsigned int height; 64 unsigned int height;
63 gfp_t gfp_mask; 65 gfp_t gfp_mask;
64 struct radix_tree_node *rnode; 66 struct radix_tree_node __rcu *rnode;
65}; 67};
66 68
67#define RADIX_TREE_INIT(mask) { \ 69#define RADIX_TREE_INIT(mask) { \
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 4ec3b38ce9c5..f31ef61f1c65 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -10,6 +10,21 @@
10#include <linux/rcupdate.h> 10#include <linux/rcupdate.h>
11 11
12/* 12/*
13 * Why is there no list_empty_rcu()? Because list_empty() serves this
14 * purpose. The list_empty() function fetches the RCU-protected pointer
15 * and compares it to the address of the list head, but neither dereferences
16 * this pointer itself nor provides this pointer to the caller. Therefore,
17 * it is not necessary to use rcu_dereference(), so that list_empty() can
18 * be used anywhere you would want to use a list_empty_rcu().
19 */
20
21/*
22 * return the ->next pointer of a list_head in an rcu safe
23 * way, we must not access it directly
24 */
25#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next)))
26
27/*
13 * Insert a new entry between two known consecutive entries. 28 * Insert a new entry between two known consecutive entries.
14 * 29 *
15 * This is only for internal list manipulation where we know 30 * This is only for internal list manipulation where we know
@@ -20,7 +35,7 @@ static inline void __list_add_rcu(struct list_head *new,
20{ 35{
21 new->next = next; 36 new->next = next;
22 new->prev = prev; 37 new->prev = prev;
23 rcu_assign_pointer(prev->next, new); 38 rcu_assign_pointer(list_next_rcu(prev), new);
24 next->prev = new; 39 next->prev = new;
25} 40}
26 41
@@ -138,7 +153,7 @@ static inline void list_replace_rcu(struct list_head *old,
138{ 153{
139 new->next = old->next; 154 new->next = old->next;
140 new->prev = old->prev; 155 new->prev = old->prev;
141 rcu_assign_pointer(new->prev->next, new); 156 rcu_assign_pointer(list_next_rcu(new->prev), new);
142 new->next->prev = new; 157 new->next->prev = new;
143 old->prev = LIST_POISON2; 158 old->prev = LIST_POISON2;
144} 159}
@@ -193,7 +208,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
193 */ 208 */
194 209
195 last->next = at; 210 last->next = at;
196 rcu_assign_pointer(head->next, first); 211 rcu_assign_pointer(list_next_rcu(head), first);
197 first->prev = head; 212 first->prev = head;
198 at->prev = last; 213 at->prev = last;
199} 214}
@@ -208,7 +223,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
208 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). 223 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
209 */ 224 */
210#define list_entry_rcu(ptr, type, member) \ 225#define list_entry_rcu(ptr, type, member) \
211 container_of(rcu_dereference_raw(ptr), type, member) 226 ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \
227 container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \
228 })
212 229
213/** 230/**
214 * list_first_entry_rcu - get the first element from a list 231 * list_first_entry_rcu - get the first element from a list
@@ -225,9 +242,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
225 list_entry_rcu((ptr)->next, type, member) 242 list_entry_rcu((ptr)->next, type, member)
226 243
227#define __list_for_each_rcu(pos, head) \ 244#define __list_for_each_rcu(pos, head) \
228 for (pos = rcu_dereference_raw((head)->next); \ 245 for (pos = rcu_dereference_raw(list_next_rcu(head)); \
229 pos != (head); \ 246 pos != (head); \
230 pos = rcu_dereference_raw(pos->next)) 247 pos = rcu_dereference_raw(list_next_rcu((pos)))
231 248
232/** 249/**
233 * list_for_each_entry_rcu - iterate over rcu list of given type 250 * list_for_each_entry_rcu - iterate over rcu list of given type
@@ -257,9 +274,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
257 * as long as the traversal is guarded by rcu_read_lock(). 274 * as long as the traversal is guarded by rcu_read_lock().
258 */ 275 */
259#define list_for_each_continue_rcu(pos, head) \ 276#define list_for_each_continue_rcu(pos, head) \
260 for ((pos) = rcu_dereference_raw((pos)->next); \ 277 for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \
261 prefetch((pos)->next), (pos) != (head); \ 278 prefetch((pos)->next), (pos) != (head); \
262 (pos) = rcu_dereference_raw((pos)->next)) 279 (pos) = rcu_dereference_raw(list_next_rcu(pos)))
263 280
264/** 281/**
265 * list_for_each_entry_continue_rcu - continue iteration over list of given type 282 * list_for_each_entry_continue_rcu - continue iteration over list of given type
@@ -314,12 +331,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old,
314 331
315 new->next = next; 332 new->next = next;
316 new->pprev = old->pprev; 333 new->pprev = old->pprev;
317 rcu_assign_pointer(*new->pprev, new); 334 rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new);
318 if (next) 335 if (next)
319 new->next->pprev = &new->next; 336 new->next->pprev = &new->next;
320 old->pprev = LIST_POISON2; 337 old->pprev = LIST_POISON2;
321} 338}
322 339
340/*
341 * return the first or the next element in an RCU protected hlist
342 */
343#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first)))
344#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next)))
345#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev)))
346
323/** 347/**
324 * hlist_add_head_rcu 348 * hlist_add_head_rcu
325 * @n: the element to add to the hash list. 349 * @n: the element to add to the hash list.
@@ -346,7 +370,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n,
346 370
347 n->next = first; 371 n->next = first;
348 n->pprev = &h->first; 372 n->pprev = &h->first;
349 rcu_assign_pointer(h->first, n); 373 rcu_assign_pointer(hlist_first_rcu(h), n);
350 if (first) 374 if (first)
351 first->pprev = &n->next; 375 first->pprev = &n->next;
352} 376}
@@ -374,7 +398,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
374{ 398{
375 n->pprev = next->pprev; 399 n->pprev = next->pprev;
376 n->next = next; 400 n->next = next;
377 rcu_assign_pointer(*(n->pprev), n); 401 rcu_assign_pointer(hlist_pprev_rcu(n), n);
378 next->pprev = &n->next; 402 next->pprev = &n->next;
379} 403}
380 404
@@ -401,15 +425,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
401{ 425{
402 n->next = prev->next; 426 n->next = prev->next;
403 n->pprev = &prev->next; 427 n->pprev = &prev->next;
404 rcu_assign_pointer(prev->next, n); 428 rcu_assign_pointer(hlist_next_rcu(prev), n);
405 if (n->next) 429 if (n->next)
406 n->next->pprev = &n->next; 430 n->next->pprev = &n->next;
407} 431}
408 432
409#define __hlist_for_each_rcu(pos, head) \ 433#define __hlist_for_each_rcu(pos, head) \
410 for (pos = rcu_dereference((head)->first); \ 434 for (pos = rcu_dereference(hlist_first_rcu(head)); \
411 pos && ({ prefetch(pos->next); 1; }); \ 435 pos && ({ prefetch(pos->next); 1; }); \
412 pos = rcu_dereference(pos->next)) 436 pos = rcu_dereference(hlist_next_rcu(pos)))
413 437
414/** 438/**
415 * hlist_for_each_entry_rcu - iterate over rcu list of given type 439 * hlist_for_each_entry_rcu - iterate over rcu list of given type
@@ -422,11 +446,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
422 * the _rcu list-mutation primitives such as hlist_add_head_rcu() 446 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
423 * as long as the traversal is guarded by rcu_read_lock(). 447 * as long as the traversal is guarded by rcu_read_lock().
424 */ 448 */
425#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ 449#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
426 for (pos = rcu_dereference_raw((head)->first); \ 450 for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \
427 pos && ({ prefetch(pos->next); 1; }) && \ 451 pos && ({ prefetch(pos->next); 1; }) && \
428 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ 452 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
429 pos = rcu_dereference_raw(pos->next)) 453 pos = rcu_dereference_raw(hlist_next_rcu(pos)))
430 454
431/** 455/**
432 * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type 456 * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index b70ffe53cb9f..2ae13714828b 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
37 } 37 }
38} 38}
39 39
40#define hlist_nulls_first_rcu(head) \
41 (*((struct hlist_nulls_node __rcu __force **)&(head)->first))
42
43#define hlist_nulls_next_rcu(node) \
44 (*((struct hlist_nulls_node __rcu __force **)&(node)->next))
45
40/** 46/**
41 * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization 47 * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
42 * @n: the element to delete from the hash list. 48 * @n: the element to delete from the hash list.
@@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
88 94
89 n->next = first; 95 n->next = first;
90 n->pprev = &h->first; 96 n->pprev = &h->first;
91 rcu_assign_pointer(h->first, n); 97 rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
92 if (!is_a_nulls(first)) 98 if (!is_a_nulls(first))
93 first->pprev = &n->next; 99 first->pprev = &n->next;
94} 100}
@@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
100 * @member: the name of the hlist_nulls_node within the struct. 106 * @member: the name of the hlist_nulls_node within the struct.
101 * 107 *
102 */ 108 */
103#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ 109#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
104 for (pos = rcu_dereference_raw((head)->first); \ 110 for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
105 (!is_a_nulls(pos)) && \ 111 (!is_a_nulls(pos)) && \
106 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ 112 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
107 pos = rcu_dereference_raw(pos->next)) 113 pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
108 114
109#endif 115#endif
110#endif 116#endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9fbc54a2585d..03cda7bed985 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -41,11 +41,15 @@
41#include <linux/lockdep.h> 41#include <linux/lockdep.h>
42#include <linux/completion.h> 42#include <linux/completion.h>
43#include <linux/debugobjects.h> 43#include <linux/debugobjects.h>
44#include <linux/compiler.h>
44 45
45#ifdef CONFIG_RCU_TORTURE_TEST 46#ifdef CONFIG_RCU_TORTURE_TEST
46extern int rcutorture_runnable; /* for sysctl */ 47extern int rcutorture_runnable; /* for sysctl */
47#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 48#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
48 49
50#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
51#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
52
49/** 53/**
50 * struct rcu_head - callback structure for use with RCU 54 * struct rcu_head - callback structure for use with RCU
51 * @next: next update requests in a list 55 * @next: next update requests in a list
@@ -57,29 +61,94 @@ struct rcu_head {
57}; 61};
58 62
59/* Exported common interfaces */ 63/* Exported common interfaces */
60extern void rcu_barrier(void); 64extern void call_rcu_sched(struct rcu_head *head,
65 void (*func)(struct rcu_head *rcu));
66extern void synchronize_sched(void);
61extern void rcu_barrier_bh(void); 67extern void rcu_barrier_bh(void);
62extern void rcu_barrier_sched(void); 68extern void rcu_barrier_sched(void);
63extern void synchronize_sched_expedited(void); 69extern void synchronize_sched_expedited(void);
64extern int sched_expedited_torture_stats(char *page); 70extern int sched_expedited_torture_stats(char *page);
65 71
72static inline void __rcu_read_lock_bh(void)
73{
74 local_bh_disable();
75}
76
77static inline void __rcu_read_unlock_bh(void)
78{
79 local_bh_enable();
80}
81
82#ifdef CONFIG_PREEMPT_RCU
83
84extern void __rcu_read_lock(void);
85extern void __rcu_read_unlock(void);
86void synchronize_rcu(void);
87
88/*
89 * Defined as a macro as it is a very low level header included from
90 * areas that don't even know about current. This gives the rcu_read_lock()
91 * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other
92 * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
93 */
94#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
95
96#else /* #ifdef CONFIG_PREEMPT_RCU */
97
98static inline void __rcu_read_lock(void)
99{
100 preempt_disable();
101}
102
103static inline void __rcu_read_unlock(void)
104{
105 preempt_enable();
106}
107
108static inline void synchronize_rcu(void)
109{
110 synchronize_sched();
111}
112
113static inline int rcu_preempt_depth(void)
114{
115 return 0;
116}
117
118#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
119
66/* Internal to kernel */ 120/* Internal to kernel */
67extern void rcu_init(void); 121extern void rcu_init(void);
122extern void rcu_sched_qs(int cpu);
123extern void rcu_bh_qs(int cpu);
124extern void rcu_check_callbacks(int cpu, int user);
125struct notifier_block;
126
127#ifdef CONFIG_NO_HZ
128
129extern void rcu_enter_nohz(void);
130extern void rcu_exit_nohz(void);
131
132#else /* #ifdef CONFIG_NO_HZ */
133
134static inline void rcu_enter_nohz(void)
135{
136}
137
138static inline void rcu_exit_nohz(void)
139{
140}
141
142#endif /* #else #ifdef CONFIG_NO_HZ */
68 143
69#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) 144#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
70#include <linux/rcutree.h> 145#include <linux/rcutree.h>
71#elif defined(CONFIG_TINY_RCU) 146#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
72#include <linux/rcutiny.h> 147#include <linux/rcutiny.h>
73#else 148#else
74#error "Unknown RCU implementation specified to kernel configuration" 149#error "Unknown RCU implementation specified to kernel configuration"
75#endif 150#endif
76 151
77#define RCU_HEAD_INIT { .next = NULL, .func = NULL }
78#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
79#define INIT_RCU_HEAD(ptr) do { \
80 (ptr)->next = NULL; (ptr)->func = NULL; \
81} while (0)
82
83/* 152/*
84 * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic 153 * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
85 * initialization and destruction of rcu_head on the stack. rcu_head structures 154 * initialization and destruction of rcu_head on the stack. rcu_head structures
@@ -120,14 +189,15 @@ extern struct lockdep_map rcu_sched_lock_map;
120extern int debug_lockdep_rcu_enabled(void); 189extern int debug_lockdep_rcu_enabled(void);
121 190
122/** 191/**
123 * rcu_read_lock_held - might we be in RCU read-side critical section? 192 * rcu_read_lock_held() - might we be in RCU read-side critical section?
124 * 193 *
125 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU 194 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
126 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, 195 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
127 * this assumes we are in an RCU read-side critical section unless it can 196 * this assumes we are in an RCU read-side critical section unless it can
128 * prove otherwise. 197 * prove otherwise. This is useful for debug checks in functions that
198 * require that they be called within an RCU read-side critical section.
129 * 199 *
130 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot 200 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
131 * and while lockdep is disabled. 201 * and while lockdep is disabled.
132 */ 202 */
133static inline int rcu_read_lock_held(void) 203static inline int rcu_read_lock_held(void)
@@ -144,14 +214,16 @@ static inline int rcu_read_lock_held(void)
144extern int rcu_read_lock_bh_held(void); 214extern int rcu_read_lock_bh_held(void);
145 215
146/** 216/**
147 * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? 217 * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
148 * 218 *
149 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an 219 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
150 * RCU-sched read-side critical section. In absence of 220 * RCU-sched read-side critical section. In absence of
151 * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side 221 * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
152 * critical section unless it can prove otherwise. Note that disabling 222 * critical section unless it can prove otherwise. Note that disabling
153 * of preemption (including disabling irqs) counts as an RCU-sched 223 * of preemption (including disabling irqs) counts as an RCU-sched
154 * read-side critical section. 224 * read-side critical section. This is useful for debug checks in functions
225 * that required that they be called within an RCU-sched read-side
226 * critical section.
155 * 227 *
156 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot 228 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
157 * and while lockdep is disabled. 229 * and while lockdep is disabled.
@@ -211,7 +283,11 @@ static inline int rcu_read_lock_sched_held(void)
211 283
212extern int rcu_my_thread_group_empty(void); 284extern int rcu_my_thread_group_empty(void);
213 285
214#define __do_rcu_dereference_check(c) \ 286/**
287 * rcu_lockdep_assert - emit lockdep splat if specified condition not met
288 * @c: condition to check
289 */
290#define rcu_lockdep_assert(c) \
215 do { \ 291 do { \
216 static bool __warned; \ 292 static bool __warned; \
217 if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ 293 if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
@@ -220,41 +296,163 @@ extern int rcu_my_thread_group_empty(void);
220 } \ 296 } \
221 } while (0) 297 } while (0)
222 298
299#else /* #ifdef CONFIG_PROVE_RCU */
300
301#define rcu_lockdep_assert(c) do { } while (0)
302
303#endif /* #else #ifdef CONFIG_PROVE_RCU */
304
305/*
306 * Helper functions for rcu_dereference_check(), rcu_dereference_protected()
307 * and rcu_assign_pointer(). Some of these could be folded into their
308 * callers, but they are left separate in order to ease introduction of
309 * multiple flavors of pointers to match the multiple flavors of RCU
310 * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in
311 * the future.
312 */
313
314#ifdef __CHECKER__
315#define rcu_dereference_sparse(p, space) \
316 ((void)(((typeof(*p) space *)p) == p))
317#else /* #ifdef __CHECKER__ */
318#define rcu_dereference_sparse(p, space)
319#endif /* #else #ifdef __CHECKER__ */
320
321#define __rcu_access_pointer(p, space) \
322 ({ \
323 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
324 rcu_dereference_sparse(p, space); \
325 ((typeof(*p) __force __kernel *)(_________p1)); \
326 })
327#define __rcu_dereference_check(p, c, space) \
328 ({ \
329 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
330 rcu_lockdep_assert(c); \
331 rcu_dereference_sparse(p, space); \
332 smp_read_barrier_depends(); \
333 ((typeof(*p) __force __kernel *)(_________p1)); \
334 })
335#define __rcu_dereference_protected(p, c, space) \
336 ({ \
337 rcu_lockdep_assert(c); \
338 rcu_dereference_sparse(p, space); \
339 ((typeof(*p) __force __kernel *)(p)); \
340 })
341
342#define __rcu_dereference_index_check(p, c) \
343 ({ \
344 typeof(p) _________p1 = ACCESS_ONCE(p); \
345 rcu_lockdep_assert(c); \
346 smp_read_barrier_depends(); \
347 (_________p1); \
348 })
349#define __rcu_assign_pointer(p, v, space) \
350 ({ \
351 if (!__builtin_constant_p(v) || \
352 ((v) != NULL)) \
353 smp_wmb(); \
354 (p) = (typeof(*v) __force space *)(v); \
355 })
356
357
358/**
359 * rcu_access_pointer() - fetch RCU pointer with no dereferencing
360 * @p: The pointer to read
361 *
362 * Return the value of the specified RCU-protected pointer, but omit the
363 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful
364 * when the value of this pointer is accessed, but the pointer is not
365 * dereferenced, for example, when testing an RCU-protected pointer against
366 * NULL. Although rcu_access_pointer() may also be used in cases where
367 * update-side locks prevent the value of the pointer from changing, you
368 * should instead use rcu_dereference_protected() for this use case.
369 */
370#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu)
371
223/** 372/**
224 * rcu_dereference_check - rcu_dereference with debug checking 373 * rcu_dereference_check() - rcu_dereference with debug checking
225 * @p: The pointer to read, prior to dereferencing 374 * @p: The pointer to read, prior to dereferencing
226 * @c: The conditions under which the dereference will take place 375 * @c: The conditions under which the dereference will take place
227 * 376 *
228 * Do an rcu_dereference(), but check that the conditions under which the 377 * Do an rcu_dereference(), but check that the conditions under which the
229 * dereference will take place are correct. Typically the conditions indicate 378 * dereference will take place are correct. Typically the conditions
230 * the various locking conditions that should be held at that point. The check 379 * indicate the various locking conditions that should be held at that
231 * should return true if the conditions are satisfied. 380 * point. The check should return true if the conditions are satisfied.
381 * An implicit check for being in an RCU read-side critical section
382 * (rcu_read_lock()) is included.
232 * 383 *
233 * For example: 384 * For example:
234 * 385 *
235 * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || 386 * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock));
236 * lockdep_is_held(&foo->lock));
237 * 387 *
238 * could be used to indicate to lockdep that foo->bar may only be dereferenced 388 * could be used to indicate to lockdep that foo->bar may only be dereferenced
239 * if either the RCU read lock is held, or that the lock required to replace 389 * if either rcu_read_lock() is held, or that the lock required to replace
240 * the bar struct at foo->bar is held. 390 * the bar struct at foo->bar is held.
241 * 391 *
242 * Note that the list of conditions may also include indications of when a lock 392 * Note that the list of conditions may also include indications of when a lock
243 * need not be held, for example during initialisation or destruction of the 393 * need not be held, for example during initialisation or destruction of the
244 * target struct: 394 * target struct:
245 * 395 *
246 * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || 396 * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) ||
247 * lockdep_is_held(&foo->lock) ||
248 * atomic_read(&foo->usage) == 0); 397 * atomic_read(&foo->usage) == 0);
398 *
399 * Inserts memory barriers on architectures that require them
400 * (currently only the Alpha), prevents the compiler from refetching
401 * (and from merging fetches), and, more importantly, documents exactly
402 * which pointers are protected by RCU and checks that the pointer is
403 * annotated as __rcu.
249 */ 404 */
250#define rcu_dereference_check(p, c) \ 405#define rcu_dereference_check(p, c) \
251 ({ \ 406 __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu)
252 __do_rcu_dereference_check(c); \ 407
253 rcu_dereference_raw(p); \ 408/**
254 }) 409 * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
410 * @p: The pointer to read, prior to dereferencing
411 * @c: The conditions under which the dereference will take place
412 *
413 * This is the RCU-bh counterpart to rcu_dereference_check().
414 */
415#define rcu_dereference_bh_check(p, c) \
416 __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu)
255 417
256/** 418/**
257 * rcu_dereference_protected - fetch RCU pointer when updates prevented 419 * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
420 * @p: The pointer to read, prior to dereferencing
421 * @c: The conditions under which the dereference will take place
422 *
423 * This is the RCU-sched counterpart to rcu_dereference_check().
424 */
425#define rcu_dereference_sched_check(p, c) \
426 __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \
427 __rcu)
428
429#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
430
431/**
432 * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
433 * @p: The pointer to read, prior to dereferencing
434 * @c: The conditions under which the dereference will take place
435 *
436 * Similar to rcu_dereference_check(), but omits the sparse checking.
437 * This allows rcu_dereference_index_check() to be used on integers,
438 * which can then be used as array indices. Attempting to use
439 * rcu_dereference_check() on an integer will give compiler warnings
440 * because the sparse address-space mechanism relies on dereferencing
441 * the RCU-protected pointer. Dereferencing integers is not something
442 * that even gcc will put up with.
443 *
444 * Note that this function does not implicitly check for RCU read-side
445 * critical sections. If this function gains lots of uses, it might
446 * make sense to provide versions for each flavor of RCU, but it does
447 * not make sense as of early 2010.
448 */
449#define rcu_dereference_index_check(p, c) \
450 __rcu_dereference_index_check((p), (c))
451
452/**
453 * rcu_dereference_protected() - fetch RCU pointer when updates prevented
454 * @p: The pointer to read, prior to dereferencing
455 * @c: The conditions under which the dereference will take place
258 * 456 *
259 * Return the value of the specified RCU-protected pointer, but omit 457 * Return the value of the specified RCU-protected pointer, but omit
260 * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This 458 * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This
@@ -263,35 +461,61 @@ extern int rcu_my_thread_group_empty(void);
263 * prevent the compiler from repeating this reference or combining it 461 * prevent the compiler from repeating this reference or combining it
264 * with other references, so it should not be used without protection 462 * with other references, so it should not be used without protection
265 * of appropriate locks. 463 * of appropriate locks.
464 *
465 * This function is only for update-side use. Using this function
466 * when protected only by rcu_read_lock() will result in infrequent
467 * but very ugly failures.
266 */ 468 */
267#define rcu_dereference_protected(p, c) \ 469#define rcu_dereference_protected(p, c) \
268 ({ \ 470 __rcu_dereference_protected((p), (c), __rcu)
269 __do_rcu_dereference_check(c); \
270 (p); \
271 })
272 471
273#else /* #ifdef CONFIG_PROVE_RCU */ 472/**
473 * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented
474 * @p: The pointer to read, prior to dereferencing
475 * @c: The conditions under which the dereference will take place
476 *
477 * This is the RCU-bh counterpart to rcu_dereference_protected().
478 */
479#define rcu_dereference_bh_protected(p, c) \
480 __rcu_dereference_protected((p), (c), __rcu)
274 481
275#define rcu_dereference_check(p, c) rcu_dereference_raw(p) 482/**
276#define rcu_dereference_protected(p, c) (p) 483 * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented
484 * @p: The pointer to read, prior to dereferencing
485 * @c: The conditions under which the dereference will take place
486 *
487 * This is the RCU-sched counterpart to rcu_dereference_protected().
488 */
489#define rcu_dereference_sched_protected(p, c) \
490 __rcu_dereference_protected((p), (c), __rcu)
277 491
278#endif /* #else #ifdef CONFIG_PROVE_RCU */
279 492
280/** 493/**
281 * rcu_access_pointer - fetch RCU pointer with no dereferencing 494 * rcu_dereference() - fetch RCU-protected pointer for dereferencing
495 * @p: The pointer to read, prior to dereferencing
282 * 496 *
283 * Return the value of the specified RCU-protected pointer, but omit the 497 * This is a simple wrapper around rcu_dereference_check().
284 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful 498 */
285 * when the value of this pointer is accessed, but the pointer is not 499#define rcu_dereference(p) rcu_dereference_check(p, 0)
286 * dereferenced, for example, when testing an RCU-protected pointer against 500
287 * NULL. This may also be used in cases where update-side locks prevent 501/**
288 * the value of the pointer from changing, but rcu_dereference_protected() 502 * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing
289 * is a lighter-weight primitive for this use case. 503 * @p: The pointer to read, prior to dereferencing
504 *
505 * Makes rcu_dereference_check() do the dirty work.
506 */
507#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0)
508
509/**
510 * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing
511 * @p: The pointer to read, prior to dereferencing
512 *
513 * Makes rcu_dereference_check() do the dirty work.
290 */ 514 */
291#define rcu_access_pointer(p) ACCESS_ONCE(p) 515#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
292 516
293/** 517/**
294 * rcu_read_lock - mark the beginning of an RCU read-side critical section. 518 * rcu_read_lock() - mark the beginning of an RCU read-side critical section
295 * 519 *
296 * When synchronize_rcu() is invoked on one CPU while other CPUs 520 * When synchronize_rcu() is invoked on one CPU while other CPUs
297 * are within RCU read-side critical sections, then the 521 * are within RCU read-side critical sections, then the
@@ -302,7 +526,7 @@ extern int rcu_my_thread_group_empty(void);
302 * until after the all the other CPUs exit their critical sections. 526 * until after the all the other CPUs exit their critical sections.
303 * 527 *
304 * Note, however, that RCU callbacks are permitted to run concurrently 528 * Note, however, that RCU callbacks are permitted to run concurrently
305 * with RCU read-side critical sections. One way that this can happen 529 * with new RCU read-side critical sections. One way that this can happen
306 * is via the following sequence of events: (1) CPU 0 enters an RCU 530 * is via the following sequence of events: (1) CPU 0 enters an RCU
307 * read-side critical section, (2) CPU 1 invokes call_rcu() to register 531 * read-side critical section, (2) CPU 1 invokes call_rcu() to register
308 * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, 532 * an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
@@ -317,7 +541,20 @@ extern int rcu_my_thread_group_empty(void);
317 * will be deferred until the outermost RCU read-side critical section 541 * will be deferred until the outermost RCU read-side critical section
318 * completes. 542 * completes.
319 * 543 *
320 * It is illegal to block while in an RCU read-side critical section. 544 * You can avoid reading and understanding the next paragraph by
545 * following this rule: don't put anything in an rcu_read_lock() RCU
546 * read-side critical section that would block in a !PREEMPT kernel.
547 * But if you want the full story, read on!
548 *
549 * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it
550 * is illegal to block while in an RCU read-side critical section. In
551 * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
552 * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
553 * be preempted, but explicit blocking is illegal. Finally, in preemptible
554 * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds,
555 * RCU read-side critical sections may be preempted and they may also
556 * block, but only when acquiring spinlocks that are subject to priority
557 * inheritance.
321 */ 558 */
322static inline void rcu_read_lock(void) 559static inline void rcu_read_lock(void)
323{ 560{
@@ -337,7 +574,7 @@ static inline void rcu_read_lock(void)
337 */ 574 */
338 575
339/** 576/**
340 * rcu_read_unlock - marks the end of an RCU read-side critical section. 577 * rcu_read_unlock() - marks the end of an RCU read-side critical section.
341 * 578 *
342 * See rcu_read_lock() for more information. 579 * See rcu_read_lock() for more information.
343 */ 580 */
@@ -349,15 +586,16 @@ static inline void rcu_read_unlock(void)
349} 586}
350 587
351/** 588/**
352 * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section 589 * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
353 * 590 *
354 * This is equivalent of rcu_read_lock(), but to be used when updates 591 * This is equivalent of rcu_read_lock(), but to be used when updates
355 * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks 592 * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since
356 * consider completion of a softirq handler to be a quiescent state, 593 * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a
357 * a process in RCU read-side critical section must be protected by 594 * softirq handler to be a quiescent state, a process in RCU read-side
358 * disabling softirqs. Read-side critical sections in interrupt context 595 * critical section must be protected by disabling softirqs. Read-side
359 * can use just rcu_read_lock(). 596 * critical sections in interrupt context can use just rcu_read_lock(),
360 * 597 * though this should at least be commented to avoid confusing people
598 * reading the code.
361 */ 599 */
362static inline void rcu_read_lock_bh(void) 600static inline void rcu_read_lock_bh(void)
363{ 601{
@@ -379,13 +617,12 @@ static inline void rcu_read_unlock_bh(void)
379} 617}
380 618
381/** 619/**
382 * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section 620 * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
383 * 621 *
384 * Should be used with either 622 * This is equivalent of rcu_read_lock(), but to be used when updates
385 * - synchronize_sched() 623 * are being done using call_rcu_sched() or synchronize_rcu_sched().
386 * or 624 * Read-side critical sections can also be introduced by anything that
387 * - call_rcu_sched() and rcu_barrier_sched() 625 * disables preemption, including local_irq_disable() and friends.
388 * on the write-side to insure proper synchronization.
389 */ 626 */
390static inline void rcu_read_lock_sched(void) 627static inline void rcu_read_lock_sched(void)
391{ 628{
@@ -420,54 +657,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
420 preempt_enable_notrace(); 657 preempt_enable_notrace();
421} 658}
422 659
423
424/** 660/**
425 * rcu_dereference_raw - fetch an RCU-protected pointer 661 * rcu_assign_pointer() - assign to RCU-protected pointer
662 * @p: pointer to assign to
663 * @v: value to assign (publish)
426 * 664 *
427 * The caller must be within some flavor of RCU read-side critical 665 * Assigns the specified value to the specified RCU-protected
428 * section, or must be otherwise preventing the pointer from changing, 666 * pointer, ensuring that any concurrent RCU readers will see
429 * for example, by holding an appropriate lock. This pointer may later 667 * any prior initialization. Returns the value assigned.
430 * be safely dereferenced. It is the caller's responsibility to have
431 * done the right thing, as this primitive does no checking of any kind.
432 *
433 * Inserts memory barriers on architectures that require them
434 * (currently only the Alpha), and, more importantly, documents
435 * exactly which pointers are protected by RCU.
436 */
437#define rcu_dereference_raw(p) ({ \
438 typeof(p) _________p1 = ACCESS_ONCE(p); \
439 smp_read_barrier_depends(); \
440 (_________p1); \
441 })
442
443/**
444 * rcu_dereference - fetch an RCU-protected pointer, checking for RCU
445 *
446 * Makes rcu_dereference_check() do the dirty work.
447 */
448#define rcu_dereference(p) \
449 rcu_dereference_check(p, rcu_read_lock_held())
450
451/**
452 * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh
453 *
454 * Makes rcu_dereference_check() do the dirty work.
455 */
456#define rcu_dereference_bh(p) \
457 rcu_dereference_check(p, rcu_read_lock_bh_held())
458
459/**
460 * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched
461 *
462 * Makes rcu_dereference_check() do the dirty work.
463 */
464#define rcu_dereference_sched(p) \
465 rcu_dereference_check(p, rcu_read_lock_sched_held())
466
467/**
468 * rcu_assign_pointer - assign (publicize) a pointer to a newly
469 * initialized structure that will be dereferenced by RCU read-side
470 * critical sections. Returns the value assigned.
471 * 668 *
472 * Inserts memory barriers on architectures that require them 669 * Inserts memory barriers on architectures that require them
473 * (pretty much all of them other than x86), and also prevents 670 * (pretty much all of them other than x86), and also prevents
@@ -476,14 +673,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
476 * call documents which pointers will be dereferenced by RCU read-side 673 * call documents which pointers will be dereferenced by RCU read-side
477 * code. 674 * code.
478 */ 675 */
479
480#define rcu_assign_pointer(p, v) \ 676#define rcu_assign_pointer(p, v) \
481 ({ \ 677 __rcu_assign_pointer((p), (v), __rcu)
482 if (!__builtin_constant_p(v) || \ 678
483 ((v) != NULL)) \ 679/**
484 smp_wmb(); \ 680 * RCU_INIT_POINTER() - initialize an RCU protected pointer
485 (p) = (v); \ 681 *
486 }) 682 * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep
683 * splats.
684 */
685#define RCU_INIT_POINTER(p, v) \
686 p = (typeof(*v) __force __rcu *)(v)
487 687
488/* Infrastructure to implement the synchronize_() primitives. */ 688/* Infrastructure to implement the synchronize_() primitives. */
489 689
@@ -494,26 +694,37 @@ struct rcu_synchronize {
494 694
495extern void wakeme_after_rcu(struct rcu_head *head); 695extern void wakeme_after_rcu(struct rcu_head *head);
496 696
697#ifdef CONFIG_PREEMPT_RCU
698
497/** 699/**
498 * call_rcu - Queue an RCU callback for invocation after a grace period. 700 * call_rcu() - Queue an RCU callback for invocation after a grace period.
499 * @head: structure to be used for queueing the RCU updates. 701 * @head: structure to be used for queueing the RCU updates.
500 * @func: actual update function to be invoked after the grace period 702 * @func: actual callback function to be invoked after the grace period
501 * 703 *
502 * The update function will be invoked some time after a full grace 704 * The callback function will be invoked some time after a full grace
503 * period elapses, in other words after all currently executing RCU 705 * period elapses, in other words after all pre-existing RCU read-side
504 * read-side critical sections have completed. RCU read-side critical 706 * critical sections have completed. However, the callback function
707 * might well execute concurrently with RCU read-side critical sections
708 * that started after call_rcu() was invoked. RCU read-side critical
505 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 709 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
506 * and may be nested. 710 * and may be nested.
507 */ 711 */
508extern void call_rcu(struct rcu_head *head, 712extern void call_rcu(struct rcu_head *head,
509 void (*func)(struct rcu_head *head)); 713 void (*func)(struct rcu_head *head));
510 714
715#else /* #ifdef CONFIG_PREEMPT_RCU */
716
717/* In classic RCU, call_rcu() is just call_rcu_sched(). */
718#define call_rcu call_rcu_sched
719
720#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
721
511/** 722/**
512 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. 723 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
513 * @head: structure to be used for queueing the RCU updates. 724 * @head: structure to be used for queueing the RCU updates.
514 * @func: actual update function to be invoked after the grace period 725 * @func: actual callback function to be invoked after the grace period
515 * 726 *
516 * The update function will be invoked some time after a full grace 727 * The callback function will be invoked some time after a full grace
517 * period elapses, in other words after all currently executing RCU 728 * period elapses, in other words after all currently executing RCU
518 * read-side critical sections have completed. call_rcu_bh() assumes 729 * read-side critical sections have completed. call_rcu_bh() assumes
519 * that the read-side critical sections end on completion of a softirq 730 * that the read-side critical sections end on completion of a softirq
@@ -566,37 +777,4 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
566} 777}
567#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 778#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
568 779
569#ifndef CONFIG_PROVE_RCU
570#define __do_rcu_dereference_check(c) do { } while (0)
571#endif /* #ifdef CONFIG_PROVE_RCU */
572
573#define __rcu_dereference_index_check(p, c) \
574 ({ \
575 typeof(p) _________p1 = ACCESS_ONCE(p); \
576 __do_rcu_dereference_check(c); \
577 smp_read_barrier_depends(); \
578 (_________p1); \
579 })
580
581/**
582 * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
583 * @p: The pointer to read, prior to dereferencing
584 * @c: The conditions under which the dereference will take place
585 *
586 * Similar to rcu_dereference_check(), but omits the sparse checking.
587 * This allows rcu_dereference_index_check() to be used on integers,
588 * which can then be used as array indices. Attempting to use
589 * rcu_dereference_check() on an integer will give compiler warnings
590 * because the sparse address-space mechanism relies on dereferencing
591 * the RCU-protected pointer. Dereferencing integers is not something
592 * that even gcc will put up with.
593 *
594 * Note that this function does not implicitly check for RCU read-side
595 * critical sections. If this function gains lots of uses, it might
596 * make sense to provide versions for each flavor of RCU, but it does
597 * not make sense as of early 2010.
598 */
599#define rcu_dereference_index_check(p, c) \
600 __rcu_dereference_index_check((p), (c))
601
602#endif /* __LINUX_RCUPDATE_H */ 780#endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e2e893144a84..13877cb93a60 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,103 +27,101 @@
27 27
28#include <linux/cache.h> 28#include <linux/cache.h>
29 29
30void rcu_sched_qs(int cpu); 30#define rcu_init_sched() do { } while (0)
31void rcu_bh_qs(int cpu);
32static inline void rcu_note_context_switch(int cpu)
33{
34 rcu_sched_qs(cpu);
35}
36 31
37#define __rcu_read_lock() preempt_disable() 32#ifdef CONFIG_TINY_RCU
38#define __rcu_read_unlock() preempt_enable()
39#define __rcu_read_lock_bh() local_bh_disable()
40#define __rcu_read_unlock_bh() local_bh_enable()
41#define call_rcu_sched call_rcu
42 33
43#define rcu_init_sched() do { } while (0) 34static inline void synchronize_rcu_expedited(void)
44extern void rcu_check_callbacks(int cpu, int user); 35{
36 synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */
37}
45 38
46static inline int rcu_needs_cpu(int cpu) 39static inline void rcu_barrier(void)
47{ 40{
48 return 0; 41 rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */
49} 42}
50 43
51/* 44#else /* #ifdef CONFIG_TINY_RCU */
52 * Return the number of grace periods. 45
53 */ 46void rcu_barrier(void);
54static inline long rcu_batches_completed(void) 47void synchronize_rcu_expedited(void);
48
49#endif /* #else #ifdef CONFIG_TINY_RCU */
50
51static inline void synchronize_rcu_bh(void)
55{ 52{
56 return 0; 53 synchronize_sched();
57} 54}
58 55
59/* 56static inline void synchronize_rcu_bh_expedited(void)
60 * Return the number of bottom-half grace periods.
61 */
62static inline long rcu_batches_completed_bh(void)
63{ 57{
64 return 0; 58 synchronize_sched();
65} 59}
66 60
67static inline void rcu_force_quiescent_state(void) 61#ifdef CONFIG_TINY_RCU
62
63static inline void rcu_preempt_note_context_switch(void)
68{ 64{
69} 65}
70 66
71static inline void rcu_bh_force_quiescent_state(void) 67static inline void exit_rcu(void)
72{ 68{
73} 69}
74 70
75static inline void rcu_sched_force_quiescent_state(void) 71static inline int rcu_needs_cpu(int cpu)
76{ 72{
73 return 0;
77} 74}
78 75
79extern void synchronize_sched(void); 76#else /* #ifdef CONFIG_TINY_RCU */
77
78void rcu_preempt_note_context_switch(void);
79extern void exit_rcu(void);
80int rcu_preempt_needs_cpu(void);
80 81
81static inline void synchronize_rcu(void) 82static inline int rcu_needs_cpu(int cpu)
82{ 83{
83 synchronize_sched(); 84 return rcu_preempt_needs_cpu();
84} 85}
85 86
86static inline void synchronize_rcu_bh(void) 87#endif /* #else #ifdef CONFIG_TINY_RCU */
88
89static inline void rcu_note_context_switch(int cpu)
87{ 90{
88 synchronize_sched(); 91 rcu_sched_qs(cpu);
92 rcu_preempt_note_context_switch();
89} 93}
90 94
91static inline void synchronize_rcu_expedited(void) 95/*
96 * Return the number of grace periods.
97 */
98static inline long rcu_batches_completed(void)
92{ 99{
93 synchronize_sched(); 100 return 0;
94} 101}
95 102
96static inline void synchronize_rcu_bh_expedited(void) 103/*
104 * Return the number of bottom-half grace periods.
105 */
106static inline long rcu_batches_completed_bh(void)
97{ 107{
98 synchronize_sched(); 108 return 0;
99} 109}
100 110
101struct notifier_block; 111static inline void rcu_force_quiescent_state(void)
102
103#ifdef CONFIG_NO_HZ
104
105extern void rcu_enter_nohz(void);
106extern void rcu_exit_nohz(void);
107
108#else /* #ifdef CONFIG_NO_HZ */
109
110static inline void rcu_enter_nohz(void)
111{ 112{
112} 113}
113 114
114static inline void rcu_exit_nohz(void) 115static inline void rcu_bh_force_quiescent_state(void)
115{ 116{
116} 117}
117 118
118#endif /* #else #ifdef CONFIG_NO_HZ */ 119static inline void rcu_sched_force_quiescent_state(void)
119
120static inline void exit_rcu(void)
121{ 120{
122} 121}
123 122
124static inline int rcu_preempt_depth(void) 123static inline void rcu_cpu_stall_reset(void)
125{ 124{
126 return 0;
127} 125}
128 126
129#ifdef CONFIG_DEBUG_LOCK_ALLOC 127#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c0ed1c056f29..95518e628794 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,64 +30,23 @@
30#ifndef __LINUX_RCUTREE_H 30#ifndef __LINUX_RCUTREE_H
31#define __LINUX_RCUTREE_H 31#define __LINUX_RCUTREE_H
32 32
33struct notifier_block;
34
35extern void rcu_sched_qs(int cpu);
36extern void rcu_bh_qs(int cpu);
37extern void rcu_note_context_switch(int cpu); 33extern void rcu_note_context_switch(int cpu);
38extern int rcu_needs_cpu(int cpu); 34extern int rcu_needs_cpu(int cpu);
35extern void rcu_cpu_stall_reset(void);
39 36
40#ifdef CONFIG_TREE_PREEMPT_RCU 37#ifdef CONFIG_TREE_PREEMPT_RCU
41 38
42extern void __rcu_read_lock(void);
43extern void __rcu_read_unlock(void);
44extern void synchronize_rcu(void);
45extern void exit_rcu(void); 39extern void exit_rcu(void);
46 40
47/*
48 * Defined as macro as it is a very low level header
49 * included from areas that don't even know about current
50 */
51#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
52
53#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 41#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
54 42
55static inline void __rcu_read_lock(void)
56{
57 preempt_disable();
58}
59
60static inline void __rcu_read_unlock(void)
61{
62 preempt_enable();
63}
64
65#define synchronize_rcu synchronize_sched
66
67static inline void exit_rcu(void) 43static inline void exit_rcu(void)
68{ 44{
69} 45}
70 46
71static inline int rcu_preempt_depth(void)
72{
73 return 0;
74}
75
76#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 47#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
77 48
78static inline void __rcu_read_lock_bh(void)
79{
80 local_bh_disable();
81}
82static inline void __rcu_read_unlock_bh(void)
83{
84 local_bh_enable();
85}
86
87extern void call_rcu_sched(struct rcu_head *head,
88 void (*func)(struct rcu_head *rcu));
89extern void synchronize_rcu_bh(void); 49extern void synchronize_rcu_bh(void);
90extern void synchronize_sched(void);
91extern void synchronize_rcu_expedited(void); 50extern void synchronize_rcu_expedited(void);
92 51
93static inline void synchronize_rcu_bh_expedited(void) 52static inline void synchronize_rcu_bh_expedited(void)
@@ -95,7 +54,7 @@ static inline void synchronize_rcu_bh_expedited(void)
95 synchronize_sched_expedited(); 54 synchronize_sched_expedited();
96} 55}
97 56
98extern void rcu_check_callbacks(int cpu, int user); 57extern void rcu_barrier(void);
99 58
100extern long rcu_batches_completed(void); 59extern long rcu_batches_completed(void);
101extern long rcu_batches_completed_bh(void); 60extern long rcu_batches_completed_bh(void);
@@ -104,18 +63,6 @@ extern void rcu_force_quiescent_state(void);
104extern void rcu_bh_force_quiescent_state(void); 63extern void rcu_bh_force_quiescent_state(void);
105extern void rcu_sched_force_quiescent_state(void); 64extern void rcu_sched_force_quiescent_state(void);
106 65
107#ifdef CONFIG_NO_HZ
108void rcu_enter_nohz(void);
109void rcu_exit_nohz(void);
110#else /* CONFIG_NO_HZ */
111static inline void rcu_enter_nohz(void)
112{
113}
114static inline void rcu_exit_nohz(void)
115{
116}
117#endif /* CONFIG_NO_HZ */
118
119/* A context switch is a grace period for RCU-sched and RCU-bh. */ 66/* A context switch is a grace period for RCU-sched and RCU-bh. */
120static inline int rcu_blocking_is_gp(void) 67static inline int rcu_blocking_is_gp(void)
121{ 68{
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index bc8c3881c729..f31db2368782 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -3,6 +3,7 @@
3 3
4#ifdef CONFIG_PM_TRACE 4#ifdef CONFIG_PM_TRACE
5#include <asm/resume-trace.h> 5#include <asm/resume-trace.h>
6#include <linux/types.h>
6 7
7extern int pm_trace_enabled; 8extern int pm_trace_enabled;
8 9
@@ -14,6 +15,7 @@ static inline int pm_trace_is_enabled(void)
14struct device; 15struct device;
15extern void set_trace_device(struct device *); 16extern void set_trace_device(struct device *);
16extern void generate_resume_trace(const void *tracedata, unsigned int user); 17extern void generate_resume_trace(const void *tracedata, unsigned int user);
18extern int show_trace_dev_match(char *buf, size_t size);
17 19
18#define TRACE_DEVICE(dev) do { \ 20#define TRACE_DEVICE(dev) do { \
19 if (pm_trace_enabled) \ 21 if (pm_trace_enabled) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e2a6db2d7dd..0383601a927c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -875,6 +875,7 @@ enum sched_domain_level {
875 SD_LV_NONE = 0, 875 SD_LV_NONE = 0,
876 SD_LV_SIBLING, 876 SD_LV_SIBLING,
877 SD_LV_MC, 877 SD_LV_MC,
878 SD_LV_BOOK,
878 SD_LV_CPU, 879 SD_LV_CPU,
879 SD_LV_NODE, 880 SD_LV_NODE,
880 SD_LV_ALLNODES, 881 SD_LV_ALLNODES,
@@ -1160,6 +1161,13 @@ struct sched_rt_entity {
1160 1161
1161struct rcu_node; 1162struct rcu_node;
1162 1163
1164enum perf_event_task_context {
1165 perf_invalid_context = -1,
1166 perf_hw_context = 0,
1167 perf_sw_context,
1168 perf_nr_task_contexts,
1169};
1170
1163struct task_struct { 1171struct task_struct {
1164 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 1172 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
1165 void *stack; 1173 void *stack;
@@ -1202,11 +1210,13 @@ struct task_struct {
1202 unsigned int policy; 1210 unsigned int policy;
1203 cpumask_t cpus_allowed; 1211 cpumask_t cpus_allowed;
1204 1212
1205#ifdef CONFIG_TREE_PREEMPT_RCU 1213#ifdef CONFIG_PREEMPT_RCU
1206 int rcu_read_lock_nesting; 1214 int rcu_read_lock_nesting;
1207 char rcu_read_unlock_special; 1215 char rcu_read_unlock_special;
1208 struct rcu_node *rcu_blocked_node;
1209 struct list_head rcu_node_entry; 1216 struct list_head rcu_node_entry;
1217#endif /* #ifdef CONFIG_PREEMPT_RCU */
1218#ifdef CONFIG_TREE_PREEMPT_RCU
1219 struct rcu_node *rcu_blocked_node;
1210#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1220#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1211 1221
1212#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 1222#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -1288,9 +1298,9 @@ struct task_struct {
1288 struct list_head cpu_timers[3]; 1298 struct list_head cpu_timers[3];
1289 1299
1290/* process credentials */ 1300/* process credentials */
1291 const struct cred *real_cred; /* objective and real subjective task 1301 const struct cred __rcu *real_cred; /* objective and real subjective task
1292 * credentials (COW) */ 1302 * credentials (COW) */
1293 const struct cred *cred; /* effective (overridable) subjective task 1303 const struct cred __rcu *cred; /* effective (overridable) subjective task
1294 * credentials (COW) */ 1304 * credentials (COW) */
1295 struct mutex cred_guard_mutex; /* guard against foreign influences on 1305 struct mutex cred_guard_mutex; /* guard against foreign influences on
1296 * credential calculations 1306 * credential calculations
@@ -1418,7 +1428,7 @@ struct task_struct {
1418#endif 1428#endif
1419#ifdef CONFIG_CGROUPS 1429#ifdef CONFIG_CGROUPS
1420 /* Control Group info protected by css_set_lock */ 1430 /* Control Group info protected by css_set_lock */
1421 struct css_set *cgroups; 1431 struct css_set __rcu *cgroups;
1422 /* cg_list protected by css_set_lock and tsk->alloc_lock */ 1432 /* cg_list protected by css_set_lock and tsk->alloc_lock */
1423 struct list_head cg_list; 1433 struct list_head cg_list;
1424#endif 1434#endif
@@ -1431,7 +1441,7 @@ struct task_struct {
1431 struct futex_pi_state *pi_state_cache; 1441 struct futex_pi_state *pi_state_cache;
1432#endif 1442#endif
1433#ifdef CONFIG_PERF_EVENTS 1443#ifdef CONFIG_PERF_EVENTS
1434 struct perf_event_context *perf_event_ctxp; 1444 struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
1435 struct mutex perf_event_mutex; 1445 struct mutex perf_event_mutex;
1436 struct list_head perf_event_list; 1446 struct list_head perf_event_list;
1437#endif 1447#endif
@@ -1681,8 +1691,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
1681/* 1691/*
1682 * Per process flags 1692 * Per process flags
1683 */ 1693 */
1684#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ 1694#define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd */
1685 /* Not implemented yet, only for 486*/
1686#define PF_STARTING 0x00000002 /* being created */ 1695#define PF_STARTING 0x00000002 /* being created */
1687#define PF_EXITING 0x00000004 /* getting shut down */ 1696#define PF_EXITING 0x00000004 /* getting shut down */
1688#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ 1697#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
@@ -1740,7 +1749,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
1740#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) 1749#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
1741#define used_math() tsk_used_math(current) 1750#define used_math() tsk_used_math(current)
1742 1751
1743#ifdef CONFIG_TREE_PREEMPT_RCU 1752#ifdef CONFIG_PREEMPT_RCU
1744 1753
1745#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ 1754#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
1746#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ 1755#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
@@ -1749,7 +1758,9 @@ static inline void rcu_copy_process(struct task_struct *p)
1749{ 1758{
1750 p->rcu_read_lock_nesting = 0; 1759 p->rcu_read_lock_nesting = 0;
1751 p->rcu_read_unlock_special = 0; 1760 p->rcu_read_unlock_special = 0;
1761#ifdef CONFIG_TREE_PREEMPT_RCU
1752 p->rcu_blocked_node = NULL; 1762 p->rcu_blocked_node = NULL;
1763#endif
1753 INIT_LIST_HEAD(&p->rcu_node_entry); 1764 INIT_LIST_HEAD(&p->rcu_node_entry);
1754} 1765}
1755 1766
@@ -1826,6 +1837,19 @@ extern void sched_clock_idle_sleep_event(void);
1826extern void sched_clock_idle_wakeup_event(u64 delta_ns); 1837extern void sched_clock_idle_wakeup_event(u64 delta_ns);
1827#endif 1838#endif
1828 1839
1840#ifdef CONFIG_IRQ_TIME_ACCOUNTING
1841/*
1842 * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
1843 * The reason for this explicit opt-in is not to have perf penalty with
1844 * slow sched_clocks.
1845 */
1846extern void enable_sched_clock_irqtime(void);
1847extern void disable_sched_clock_irqtime(void);
1848#else
1849static inline void enable_sched_clock_irqtime(void) {}
1850static inline void disable_sched_clock_irqtime(void) {}
1851#endif
1852
1829extern unsigned long long 1853extern unsigned long long
1830task_sched_runtime(struct task_struct *task); 1854task_sched_runtime(struct task_struct *task);
1831extern unsigned long long thread_group_sched_runtime(struct task_struct *task); 1855extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
@@ -2367,9 +2391,9 @@ extern int __cond_resched_lock(spinlock_t *lock);
2367 2391
2368extern int __cond_resched_softirq(void); 2392extern int __cond_resched_softirq(void);
2369 2393
2370#define cond_resched_softirq() ({ \ 2394#define cond_resched_softirq() ({ \
2371 __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET); \ 2395 __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
2372 __cond_resched_softirq(); \ 2396 __cond_resched_softirq(); \
2373}) 2397})
2374 2398
2375/* 2399/*
diff --git a/include/linux/security.h b/include/linux/security.h
index a22219afff09..b8246a8df7d2 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -74,7 +74,7 @@ extern int cap_file_mmap(struct file *file, unsigned long reqprot,
74extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); 74extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags);
75extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, 75extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
76 unsigned long arg4, unsigned long arg5); 76 unsigned long arg4, unsigned long arg5);
77extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); 77extern int cap_task_setscheduler(struct task_struct *p);
78extern int cap_task_setioprio(struct task_struct *p, int ioprio); 78extern int cap_task_setioprio(struct task_struct *p, int ioprio);
79extern int cap_task_setnice(struct task_struct *p, int nice); 79extern int cap_task_setnice(struct task_struct *p, int nice);
80extern int cap_syslog(int type, bool from_file); 80extern int cap_syslog(int type, bool from_file);
@@ -959,6 +959,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
959 * Sets the new child socket's sid to the openreq sid. 959 * Sets the new child socket's sid to the openreq sid.
960 * @inet_conn_established: 960 * @inet_conn_established:
961 * Sets the connection's peersid to the secmark on skb. 961 * Sets the connection's peersid to the secmark on skb.
962 * @secmark_relabel_packet:
963 * check if the process should be allowed to relabel packets to the given secid
964 * @security_secmark_refcount_inc
965 * tells the LSM to increment the number of secmark labeling rules loaded
966 * @security_secmark_refcount_dec
967 * tells the LSM to decrement the number of secmark labeling rules loaded
962 * @req_classify_flow: 968 * @req_classify_flow:
963 * Sets the flow's sid to the openreq sid. 969 * Sets the flow's sid to the openreq sid.
964 * @tun_dev_create: 970 * @tun_dev_create:
@@ -1279,9 +1285,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
1279 * Return 0 if permission is granted. 1285 * Return 0 if permission is granted.
1280 * 1286 *
1281 * @secid_to_secctx: 1287 * @secid_to_secctx:
1282 * Convert secid to security context. 1288 * Convert secid to security context. If secdata is NULL the length of
1289 * the result will be returned in seclen, but no secdata will be returned.
1290 * This does mean that the length could change between calls to check the
1291 * length and the next call which actually allocates and returns the secdata.
1283 * @secid contains the security ID. 1292 * @secid contains the security ID.
1284 * @secdata contains the pointer that stores the converted security context. 1293 * @secdata contains the pointer that stores the converted security context.
1294 * @seclen pointer which contains the length of the data
1285 * @secctx_to_secid: 1295 * @secctx_to_secid:
1286 * Convert security context to secid. 1296 * Convert security context to secid.
1287 * @secid contains the pointer to the generated security ID. 1297 * @secid contains the pointer to the generated security ID.
@@ -1501,8 +1511,7 @@ struct security_operations {
1501 int (*task_getioprio) (struct task_struct *p); 1511 int (*task_getioprio) (struct task_struct *p);
1502 int (*task_setrlimit) (struct task_struct *p, unsigned int resource, 1512 int (*task_setrlimit) (struct task_struct *p, unsigned int resource,
1503 struct rlimit *new_rlim); 1513 struct rlimit *new_rlim);
1504 int (*task_setscheduler) (struct task_struct *p, int policy, 1514 int (*task_setscheduler) (struct task_struct *p);
1505 struct sched_param *lp);
1506 int (*task_getscheduler) (struct task_struct *p); 1515 int (*task_getscheduler) (struct task_struct *p);
1507 int (*task_movememory) (struct task_struct *p); 1516 int (*task_movememory) (struct task_struct *p);
1508 int (*task_kill) (struct task_struct *p, 1517 int (*task_kill) (struct task_struct *p,
@@ -1594,6 +1603,9 @@ struct security_operations {
1594 struct request_sock *req); 1603 struct request_sock *req);
1595 void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); 1604 void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req);
1596 void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); 1605 void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb);
1606 int (*secmark_relabel_packet) (u32 secid);
1607 void (*secmark_refcount_inc) (void);
1608 void (*secmark_refcount_dec) (void);
1597 void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); 1609 void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
1598 int (*tun_dev_create)(void); 1610 int (*tun_dev_create)(void);
1599 void (*tun_dev_post_create)(struct sock *sk); 1611 void (*tun_dev_post_create)(struct sock *sk);
@@ -1752,8 +1764,7 @@ int security_task_setioprio(struct task_struct *p, int ioprio);
1752int security_task_getioprio(struct task_struct *p); 1764int security_task_getioprio(struct task_struct *p);
1753int security_task_setrlimit(struct task_struct *p, unsigned int resource, 1765int security_task_setrlimit(struct task_struct *p, unsigned int resource,
1754 struct rlimit *new_rlim); 1766 struct rlimit *new_rlim);
1755int security_task_setscheduler(struct task_struct *p, 1767int security_task_setscheduler(struct task_struct *p);
1756 int policy, struct sched_param *lp);
1757int security_task_getscheduler(struct task_struct *p); 1768int security_task_getscheduler(struct task_struct *p);
1758int security_task_movememory(struct task_struct *p); 1769int security_task_movememory(struct task_struct *p);
1759int security_task_kill(struct task_struct *p, struct siginfo *info, 1770int security_task_kill(struct task_struct *p, struct siginfo *info,
@@ -2320,11 +2331,9 @@ static inline int security_task_setrlimit(struct task_struct *p,
2320 return 0; 2331 return 0;
2321} 2332}
2322 2333
2323static inline int security_task_setscheduler(struct task_struct *p, 2334static inline int security_task_setscheduler(struct task_struct *p)
2324 int policy,
2325 struct sched_param *lp)
2326{ 2335{
2327 return cap_task_setscheduler(p, policy, lp); 2336 return cap_task_setscheduler(p);
2328} 2337}
2329 2338
2330static inline int security_task_getscheduler(struct task_struct *p) 2339static inline int security_task_getscheduler(struct task_struct *p)
@@ -2551,6 +2560,9 @@ void security_inet_csk_clone(struct sock *newsk,
2551 const struct request_sock *req); 2560 const struct request_sock *req);
2552void security_inet_conn_established(struct sock *sk, 2561void security_inet_conn_established(struct sock *sk,
2553 struct sk_buff *skb); 2562 struct sk_buff *skb);
2563int security_secmark_relabel_packet(u32 secid);
2564void security_secmark_refcount_inc(void);
2565void security_secmark_refcount_dec(void);
2554int security_tun_dev_create(void); 2566int security_tun_dev_create(void);
2555void security_tun_dev_post_create(struct sock *sk); 2567void security_tun_dev_post_create(struct sock *sk);
2556int security_tun_dev_attach(struct sock *sk); 2568int security_tun_dev_attach(struct sock *sk);
@@ -2705,6 +2717,19 @@ static inline void security_inet_conn_established(struct sock *sk,
2705{ 2717{
2706} 2718}
2707 2719
2720static inline int security_secmark_relabel_packet(u32 secid)
2721{
2722 return 0;
2723}
2724
2725static inline void security_secmark_refcount_inc(void)
2726{
2727}
2728
2729static inline void security_secmark_refcount_dec(void)
2730{
2731}
2732
2708static inline int security_tun_dev_create(void) 2733static inline int security_tun_dev_create(void)
2709{ 2734{
2710 return 0; 2735 return 0;
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index 82e0f26a1299..44f459612690 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -21,74 +21,11 @@ struct kern_ipc_perm;
21#ifdef CONFIG_SECURITY_SELINUX 21#ifdef CONFIG_SECURITY_SELINUX
22 22
23/** 23/**
24 * selinux_string_to_sid - map a security context string to a security ID
25 * @str: the security context string to be mapped
26 * @sid: ID value returned via this.
27 *
28 * Returns 0 if successful, with the SID stored in sid. A value
29 * of zero for sid indicates no SID could be determined (but no error
30 * occurred).
31 */
32int selinux_string_to_sid(char *str, u32 *sid);
33
34/**
35 * selinux_secmark_relabel_packet_permission - secmark permission check
36 * @sid: SECMARK ID value to be applied to network packet
37 *
38 * Returns 0 if the current task is allowed to set the SECMARK label of
39 * packets with the supplied security ID. Note that it is implicit that
40 * the packet is always being relabeled from the default unlabeled value,
41 * and that the access control decision is made in the AVC.
42 */
43int selinux_secmark_relabel_packet_permission(u32 sid);
44
45/**
46 * selinux_secmark_refcount_inc - increments the secmark use counter
47 *
48 * SELinux keeps track of the current SECMARK targets in use so it knows
49 * when to apply SECMARK label access checks to network packets. This
50 * function incements this reference count to indicate that a new SECMARK
51 * target has been configured.
52 */
53void selinux_secmark_refcount_inc(void);
54
55/**
56 * selinux_secmark_refcount_dec - decrements the secmark use counter
57 *
58 * SELinux keeps track of the current SECMARK targets in use so it knows
59 * when to apply SECMARK label access checks to network packets. This
60 * function decements this reference count to indicate that one of the
61 * existing SECMARK targets has been removed/flushed.
62 */
63void selinux_secmark_refcount_dec(void);
64
65/**
66 * selinux_is_enabled - is SELinux enabled? 24 * selinux_is_enabled - is SELinux enabled?
67 */ 25 */
68bool selinux_is_enabled(void); 26bool selinux_is_enabled(void);
69#else 27#else
70 28
71static inline int selinux_string_to_sid(const char *str, u32 *sid)
72{
73 *sid = 0;
74 return 0;
75}
76
77static inline int selinux_secmark_relabel_packet_permission(u32 sid)
78{
79 return 0;
80}
81
82static inline void selinux_secmark_refcount_inc(void)
83{
84 return;
85}
86
87static inline void selinux_secmark_refcount_dec(void)
88{
89 return;
90}
91
92static inline bool selinux_is_enabled(void) 29static inline bool selinux_is_enabled(void)
93{ 30{
94 return false; 31 return false;
diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index 2ea1dd1ba21c..291f721144c2 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -54,12 +54,15 @@ static inline void cycle_kernel_lock(void)
54 54
55#else 55#else
56 56
57#ifdef CONFIG_BKL /* provoke build bug if not set */
57#define lock_kernel() 58#define lock_kernel()
58#define unlock_kernel() 59#define unlock_kernel()
59#define release_kernel_lock(task) do { } while(0)
60#define cycle_kernel_lock() do { } while(0) 60#define cycle_kernel_lock() do { } while(0)
61#define reacquire_kernel_lock(task) 0
62#define kernel_locked() 1 61#define kernel_locked() 1
62#endif /* CONFIG_BKL */
63
64#define release_kernel_lock(task) do { } while(0)
65#define reacquire_kernel_lock(task) 0
63 66
64#endif /* CONFIG_LOCK_KERNEL */ 67#endif /* CONFIG_LOCK_KERNEL */
65#endif /* __LINUX_SMPLOCK_H */ 68#endif /* __LINUX_SMPLOCK_H */
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index f8854655860e..80e535897de6 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -50,6 +50,7 @@
50#include <linux/preempt.h> 50#include <linux/preempt.h>
51#include <linux/linkage.h> 51#include <linux/linkage.h>
52#include <linux/compiler.h> 52#include <linux/compiler.h>
53#include <linux/irqflags.h>
53#include <linux/thread_info.h> 54#include <linux/thread_info.h>
54#include <linux/kernel.h> 55#include <linux/kernel.h>
55#include <linux/stringify.h> 56#include <linux/stringify.h>
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4d5d2f546dbf..58971e891f48 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -108,19 +108,43 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
108#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 108#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
109 109
110/** 110/**
111 * srcu_dereference - fetch SRCU-protected pointer with checking 111 * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
112 * @p: the pointer to fetch and protect for later dereferencing
113 * @sp: pointer to the srcu_struct, which is used to check that we
114 * really are in an SRCU read-side critical section.
115 * @c: condition to check for update-side use
112 * 116 *
113 * Makes rcu_dereference_check() do the dirty work. 117 * If PROVE_RCU is enabled, invoking this outside of an RCU read-side
118 * critical section will result in an RCU-lockdep splat, unless @c evaluates
119 * to 1. The @c argument will normally be a logical expression containing
120 * lockdep_is_held() calls.
114 */ 121 */
115#define srcu_dereference(p, sp) \ 122#define srcu_dereference_check(p, sp, c) \
116 rcu_dereference_check(p, srcu_read_lock_held(sp)) 123 __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu)
124
125/**
126 * srcu_dereference - fetch SRCU-protected pointer for later dereferencing
127 * @p: the pointer to fetch and protect for later dereferencing
128 * @sp: pointer to the srcu_struct, which is used to check that we
129 * really are in an SRCU read-side critical section.
130 *
131 * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU
132 * is enabled, invoking this outside of an RCU read-side critical
133 * section will result in an RCU-lockdep splat.
134 */
135#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0)
117 136
118/** 137/**
119 * srcu_read_lock - register a new reader for an SRCU-protected structure. 138 * srcu_read_lock - register a new reader for an SRCU-protected structure.
120 * @sp: srcu_struct in which to register the new reader. 139 * @sp: srcu_struct in which to register the new reader.
121 * 140 *
122 * Enter an SRCU read-side critical section. Note that SRCU read-side 141 * Enter an SRCU read-side critical section. Note that SRCU read-side
123 * critical sections may be nested. 142 * critical sections may be nested. However, it is illegal to
143 * call anything that waits on an SRCU grace period for the same
144 * srcu_struct, whether directly or indirectly. Please note that
145 * one way to indirectly wait on an SRCU grace period is to acquire
146 * a mutex that is held elsewhere while calling synchronize_srcu() or
147 * synchronize_srcu_expedited().
124 */ 148 */
125static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) 149static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
126{ 150{
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 6b524a0d02e4..1808960c5059 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -126,8 +126,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
126 126
127#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ 127#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */
128 128
129static inline int stop_machine(int (*fn)(void *), void *data, 129static inline int __stop_machine(int (*fn)(void *), void *data,
130 const struct cpumask *cpus) 130 const struct cpumask *cpus)
131{ 131{
132 int ret; 132 int ret;
133 local_irq_disable(); 133 local_irq_disable();
@@ -136,5 +136,11 @@ static inline int stop_machine(int (*fn)(void *), void *data,
136 return ret; 136 return ret;
137} 137}
138 138
139static inline int stop_machine(int (*fn)(void *), void *data,
140 const struct cpumask *cpus)
141{
142 return __stop_machine(fn, data, cpus);
143}
144
139#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ 145#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */
140#endif /* _LINUX_STOP_MACHINE */ 146#endif /* _LINUX_STOP_MACHINE */
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 671538d25bc1..8eee9dbbfe7a 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -69,7 +69,7 @@ struct gss_cl_ctx {
69 enum rpc_gss_proc gc_proc; 69 enum rpc_gss_proc gc_proc;
70 u32 gc_seq; 70 u32 gc_seq;
71 spinlock_t gc_seq_lock; 71 spinlock_t gc_seq_lock;
72 struct gss_ctx *gc_gss_ctx; 72 struct gss_ctx __rcu *gc_gss_ctx;
73 struct xdr_netobj gc_wire_ctx; 73 struct xdr_netobj gc_wire_ctx;
74 u32 gc_win; 74 u32 gc_win;
75 unsigned long gc_expiry; 75 unsigned long gc_expiry;
@@ -80,7 +80,7 @@ struct gss_upcall_msg;
80struct gss_cred { 80struct gss_cred {
81 struct rpc_cred gc_base; 81 struct rpc_cred gc_base;
82 enum rpc_gss_svc gc_service; 82 enum rpc_gss_svc gc_service;
83 struct gss_cl_ctx *gc_ctx; 83 struct gss_cl_ctx __rcu *gc_ctx;
84 struct gss_upcall_msg *gc_upcall; 84 struct gss_upcall_msg *gc_upcall;
85 unsigned long gc_upcall_timestamp; 85 unsigned long gc_upcall_timestamp;
86 unsigned char gc_machine_cred : 1; 86 unsigned char gc_machine_cred : 1;
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 4af270ec2204..26697514c5ec 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -293,8 +293,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
293extern bool events_check_enabled; 293extern bool events_check_enabled;
294 294
295extern bool pm_check_wakeup_events(void); 295extern bool pm_check_wakeup_events(void);
296extern bool pm_get_wakeup_count(unsigned long *count); 296extern bool pm_get_wakeup_count(unsigned int *count);
297extern bool pm_save_wakeup_count(unsigned long count); 297extern bool pm_save_wakeup_count(unsigned int count);
298#else /* !CONFIG_PM_SLEEP */ 298#else /* !CONFIG_PM_SLEEP */
299 299
300static inline int register_pm_notifier(struct notifier_block *nb) 300static inline int register_pm_notifier(struct notifier_block *nb)
@@ -308,6 +308,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
308} 308}
309 309
310#define pm_notifier(fn, pri) do { (void)(fn); } while (0) 310#define pm_notifier(fn, pri) do { (void)(fn); } while (0)
311
312static inline bool pm_check_wakeup_events(void) { return true; }
311#endif /* !CONFIG_PM_SLEEP */ 313#endif /* !CONFIG_PM_SLEEP */
312 314
313extern struct mutex pm_mutex; 315extern struct mutex pm_mutex;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 96eb576d82fd..30b881555fa5 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -164,6 +164,10 @@ int sysfs_add_file_to_group(struct kobject *kobj,
164 const struct attribute *attr, const char *group); 164 const struct attribute *attr, const char *group);
165void sysfs_remove_file_from_group(struct kobject *kobj, 165void sysfs_remove_file_from_group(struct kobject *kobj,
166 const struct attribute *attr, const char *group); 166 const struct attribute *attr, const char *group);
167int sysfs_merge_group(struct kobject *kobj,
168 const struct attribute_group *grp);
169void sysfs_unmerge_group(struct kobject *kobj,
170 const struct attribute_group *grp);
167 171
168void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); 172void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
169void sysfs_notify_dirent(struct sysfs_dirent *sd); 173void sysfs_notify_dirent(struct sysfs_dirent *sd);
@@ -302,6 +306,17 @@ static inline void sysfs_remove_file_from_group(struct kobject *kobj,
302{ 306{
303} 307}
304 308
309static inline int sysfs_merge_group(struct kobject *kobj,
310 const struct attribute_group *grp)
311{
312 return 0;
313}
314
315static inline void sysfs_unmerge_group(struct kobject *kobj,
316 const struct attribute_group *grp)
317{
318}
319
305static inline void sysfs_notify(struct kobject *kobj, const char *dir, 320static inline void sysfs_notify(struct kobject *kobj, const char *dir,
306 const char *attr) 321 const char *attr)
307{ 322{
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index a8cc4e13434c..c90696544176 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -23,12 +23,12 @@ struct restart_block {
23 }; 23 };
24 /* For futex_wait and futex_wait_requeue_pi */ 24 /* For futex_wait and futex_wait_requeue_pi */
25 struct { 25 struct {
26 u32 *uaddr; 26 u32 __user *uaddr;
27 u32 val; 27 u32 val;
28 u32 flags; 28 u32 flags;
29 u32 bitset; 29 u32 bitset;
30 u64 time; 30 u64 time;
31 u32 *uaddr2; 31 u32 __user *uaddr2;
32 } futex; 32 } futex;
33 /* For nanosleep */ 33 /* For nanosleep */
34 struct { 34 struct {
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 64e084ff5e5c..b91a40e847d2 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -201,6 +201,12 @@ int arch_update_cpu_topology(void);
201 .balance_interval = 64, \ 201 .balance_interval = 64, \
202} 202}
203 203
204#ifdef CONFIG_SCHED_BOOK
205#ifndef SD_BOOK_INIT
206#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!!
207#endif
208#endif /* CONFIG_SCHED_BOOK */
209
204#ifdef CONFIG_NUMA 210#ifdef CONFIG_NUMA
205#ifndef SD_NODE_INIT 211#ifndef SD_NODE_INIT
206#error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! 212#error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 103d1b61aacb..a4a90b6726ce 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -17,6 +17,7 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/types.h> 18#include <linux/types.h>
19#include <linux/rcupdate.h> 19#include <linux/rcupdate.h>
20#include <linux/jump_label.h>
20 21
21struct module; 22struct module;
22struct tracepoint; 23struct tracepoint;
@@ -145,7 +146,9 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
145 extern struct tracepoint __tracepoint_##name; \ 146 extern struct tracepoint __tracepoint_##name; \
146 static inline void trace_##name(proto) \ 147 static inline void trace_##name(proto) \
147 { \ 148 { \
148 if (unlikely(__tracepoint_##name.state)) \ 149 JUMP_LABEL(&__tracepoint_##name.state, do_trace); \
150 return; \
151do_trace: \
149 __DO_TRACE(&__tracepoint_##name, \ 152 __DO_TRACE(&__tracepoint_##name, \
150 TP_PROTO(data_proto), \ 153 TP_PROTO(data_proto), \
151 TP_ARGS(data_args)); \ 154 TP_ARGS(data_args)); \
diff --git a/include/linux/types.h b/include/linux/types.h
index 01a082f56ef4..357dbc19606f 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -121,7 +121,15 @@ typedef __u64 u_int64_t;
121typedef __s64 int64_t; 121typedef __s64 int64_t;
122#endif 122#endif
123 123
124/* this is a special 64bit data type that is 8-byte aligned */ 124/*
125 * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid
126 * common 32/64-bit compat problems.
127 * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other
128 * architectures) and to 8-byte boundaries on 64-bit architetures. The new
129 * aligned_64 type enforces 8-byte alignment so that structs containing
130 * aligned_64 values have the same alignment on 32-bit and 64-bit architectures.
131 * No conversions are necessary between 32-bit user-space and a 64-bit kernel.
132 */
125#define aligned_u64 __u64 __attribute__((aligned(8))) 133#define aligned_u64 __u64 __attribute__((aligned(8)))
126#define aligned_be64 __be64 __attribute__((aligned(8))) 134#define aligned_be64 __be64 __attribute__((aligned(8)))
127#define aligned_le64 __le64 __attribute__((aligned(8))) 135#define aligned_le64 __le64 __attribute__((aligned(8)))
@@ -178,6 +186,11 @@ typedef __u64 __bitwise __be64;
178typedef __u16 __bitwise __sum16; 186typedef __u16 __bitwise __sum16;
179typedef __u32 __bitwise __wsum; 187typedef __u32 __bitwise __wsum;
180 188
189/* this is a special 64bit data type that is 8-byte aligned */
190#define __aligned_u64 __u64 __attribute__((aligned(8)))
191#define __aligned_be64 __be64 __attribute__((aligned(8)))
192#define __aligned_le64 __le64 __attribute__((aligned(8)))
193
181#ifdef __KERNEL__ 194#ifdef __KERNEL__
182typedef unsigned __bitwise__ gfp_t; 195typedef unsigned __bitwise__ gfp_t;
183typedef unsigned __bitwise__ fmode_t; 196typedef unsigned __bitwise__ fmode_t;
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 0836ccc57121..3efc9f3f43a0 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -614,6 +614,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
614 (wait)->private = current; \ 614 (wait)->private = current; \
615 (wait)->func = autoremove_wake_function; \ 615 (wait)->func = autoremove_wake_function; \
616 INIT_LIST_HEAD(&(wait)->task_list); \ 616 INIT_LIST_HEAD(&(wait)->task_list); \
617 (wait)->flags = 0; \
617 } while (0) 618 } while (0)
618 619
619/** 620/**
diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h
index 97e07f46a0fa..aa4ebb42a565 100644
--- a/include/media/videobuf-dma-sg.h
+++ b/include/media/videobuf-dma-sg.h
@@ -48,6 +48,7 @@ struct videobuf_dmabuf {
48 48
49 /* for userland buffer */ 49 /* for userland buffer */
50 int offset; 50 int offset;
51 size_t size;
51 struct page **pages; 52 struct page **pages;
52 53
53 /* for kernel buffers */ 54 /* for kernel buffers */
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 27a902d9b3a9..30fce0128dd7 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -161,12 +161,30 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, unsigned long l
161{ 161{
162 struct sk_buff *skb; 162 struct sk_buff *skb;
163 163
164 release_sock(sk);
164 if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) { 165 if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) {
165 skb_reserve(skb, BT_SKB_RESERVE); 166 skb_reserve(skb, BT_SKB_RESERVE);
166 bt_cb(skb)->incoming = 0; 167 bt_cb(skb)->incoming = 0;
167 } 168 }
169 lock_sock(sk);
170
171 if (!skb && *err)
172 return NULL;
173
174 *err = sock_error(sk);
175 if (*err)
176 goto out;
177
178 if (sk->sk_shutdown) {
179 *err = -ECONNRESET;
180 goto out;
181 }
168 182
169 return skb; 183 return skb;
184
185out:
186 kfree_skb(skb);
187 return NULL;
170} 188}
171 189
172int bt_err(__u16 code); 190int bt_err(__u16 code);
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index ef6c24a529e1..a4dc5b027bd9 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -51,7 +51,8 @@ static inline u32 task_cls_classid(struct task_struct *p)
51 return 0; 51 return 0;
52 52
53 rcu_read_lock(); 53 rcu_read_lock();
54 id = rcu_dereference(net_cls_subsys_id); 54 id = rcu_dereference_index_check(net_cls_subsys_id,
55 rcu_read_lock_held());
55 if (id >= 0) 56 if (id >= 0)
56 classid = container_of(task_subsys_state(p, id), 57 classid = container_of(task_subsys_state(p, id),
57 struct cgroup_cls_state, css)->classid; 58 struct cgroup_cls_state, css)->classid;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index e624dae54fa4..caf17db87dbc 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -75,7 +75,7 @@ struct nf_conntrack_helper;
75/* nf_conn feature for connections that have a helper */ 75/* nf_conn feature for connections that have a helper */
76struct nf_conn_help { 76struct nf_conn_help {
77 /* Helper. if any */ 77 /* Helper. if any */
78 struct nf_conntrack_helper *helper; 78 struct nf_conntrack_helper __rcu *helper;
79 79
80 union nf_conntrack_help help; 80 union nf_conntrack_help help;
81 81
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
deleted file mode 100644
index 68d8bde7e8d6..000000000000
--- a/include/pcmcia/cs.h
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * cs.h
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * The initial developer of the original code is David A. Hinds
9 * <dahinds@users.sourceforge.net>. Portions created by David A. Hinds
10 * are Copyright (C) 1999 David A. Hinds. All Rights Reserved.
11 *
12 * (C) 1999 David A. Hinds
13 */
14
15#ifndef _LINUX_CS_H
16#define _LINUX_CS_H
17
18#ifdef __KERNEL__
19#include <linux/interrupt.h>
20#endif
21
22/* ModifyConfiguration */
23typedef struct modconf_t {
24 u_int Attributes;
25 u_int Vcc, Vpp1, Vpp2;
26} modconf_t;
27
28/* Attributes for ModifyConfiguration */
29#define CONF_IRQ_CHANGE_VALID 0x0100
30#define CONF_VCC_CHANGE_VALID 0x0200
31#define CONF_VPP1_CHANGE_VALID 0x0400
32#define CONF_VPP2_CHANGE_VALID 0x0800
33#define CONF_IO_CHANGE_WIDTH 0x1000
34
35/* For RequestConfiguration */
36typedef struct config_req_t {
37 u_int Attributes;
38 u_int Vpp; /* both Vpp1 and Vpp2 */
39 u_int IntType;
40 u_int ConfigBase;
41 u_char Status, Pin, Copy, ExtStatus;
42 u_char ConfigIndex;
43 u_int Present;
44} config_req_t;
45
46/* Attributes for RequestConfiguration */
47#define CONF_ENABLE_IRQ 0x01
48#define CONF_ENABLE_DMA 0x02
49#define CONF_ENABLE_SPKR 0x04
50#define CONF_ENABLE_PULSE_IRQ 0x08
51#define CONF_VALID_CLIENT 0x100
52
53/* IntType field */
54#define INT_MEMORY 0x01
55#define INT_MEMORY_AND_IO 0x02
56#define INT_CARDBUS 0x04
57#define INT_ZOOMED_VIDEO 0x08
58
59/* Configuration registers present */
60#define PRESENT_OPTION 0x001
61#define PRESENT_STATUS 0x002
62#define PRESENT_PIN_REPLACE 0x004
63#define PRESENT_COPY 0x008
64#define PRESENT_EXT_STATUS 0x010
65#define PRESENT_IOBASE_0 0x020
66#define PRESENT_IOBASE_1 0x040
67#define PRESENT_IOBASE_2 0x080
68#define PRESENT_IOBASE_3 0x100
69#define PRESENT_IOSIZE 0x200
70
71/* For RequestWindow */
72typedef struct win_req_t {
73 u_int Attributes;
74 u_long Base;
75 u_int Size;
76 u_int AccessSpeed;
77} win_req_t;
78
79/* Attributes for RequestWindow */
80#define WIN_MEMORY_TYPE_CM 0x00 /* default */
81#define WIN_MEMORY_TYPE_AM 0x20 /* MAP_ATTRIB */
82#define WIN_DATA_WIDTH_8 0x00 /* default */
83#define WIN_DATA_WIDTH_16 0x02 /* MAP_16BIT */
84#define WIN_ENABLE 0x01 /* MAP_ACTIVE */
85#define WIN_USE_WAIT 0x40 /* MAP_USE_WAIT */
86
87#define WIN_FLAGS_MAP 0x63 /* MAP_ATTRIB | MAP_16BIT | MAP_ACTIVE |
88 MAP_USE_WAIT */
89#define WIN_FLAGS_REQ 0x1c /* mapping to socket->win[i]:
90 0x04 -> 0
91 0x08 -> 1
92 0x0c -> 2
93 0x10 -> 3 */
94
95#endif /* _LINUX_CS_H */
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 70c58ed2278c..d830c87ff0a7 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -24,9 +24,11 @@
24 24
25#ifdef __KERNEL__ 25#ifdef __KERNEL__
26#include <linux/device.h> 26#include <linux/device.h>
27#include <linux/interrupt.h>
27#include <pcmcia/ss.h> 28#include <pcmcia/ss.h>
28#include <asm/atomic.h> 29#include <asm/atomic.h>
29 30
31
30/* 32/*
31 * PCMCIA device drivers (16-bit cards only; 32-bit cards require CardBus 33 * PCMCIA device drivers (16-bit cards only; 32-bit cards require CardBus
32 * a.k.a. PCI drivers 34 * a.k.a. PCI drivers
@@ -36,8 +38,6 @@ struct pcmcia_device;
36struct config_t; 38struct config_t;
37struct net_device; 39struct net_device;
38 40
39typedef struct resource *window_handle_t;
40
41/* dynamic device IDs for PCMCIA device drivers. See 41/* dynamic device IDs for PCMCIA device drivers. See
42 * Documentation/pcmcia/driver.txt for details. 42 * Documentation/pcmcia/driver.txt for details.
43*/ 43*/
@@ -47,6 +47,8 @@ struct pcmcia_dynids {
47}; 47};
48 48
49struct pcmcia_driver { 49struct pcmcia_driver {
50 const char *name;
51
50 int (*probe) (struct pcmcia_device *dev); 52 int (*probe) (struct pcmcia_device *dev);
51 void (*remove) (struct pcmcia_device *dev); 53 void (*remove) (struct pcmcia_device *dev);
52 54
@@ -90,15 +92,17 @@ struct pcmcia_device {
90 92
91 struct list_head socket_device_list; 93 struct list_head socket_device_list;
92 94
93 /* deprecated, will be cleaned up soon */
94 config_req_t conf;
95 window_handle_t win;
96
97 /* device setup */ 95 /* device setup */
98 unsigned int irq; 96 unsigned int irq;
99 struct resource *resource[PCMCIA_NUM_RESOURCES]; 97 struct resource *resource[PCMCIA_NUM_RESOURCES];
98 resource_size_t card_addr; /* for the 1st IOMEM resource */
99 unsigned int vpp;
100 100
101 unsigned int io_lines; /* number of I/O lines */ 101 unsigned int config_flags; /* CONF_ENABLE_ flags below */
102 unsigned int config_base;
103 unsigned int config_index;
104 unsigned int config_regs; /* PRESENT_ flags below */
105 unsigned int io_lines; /* number of I/O lines */
102 106
103 /* Is the device suspended? */ 107 /* Is the device suspended? */
104 u16 suspended:1; 108 u16 suspended:1;
@@ -174,9 +178,6 @@ int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse);
174/* loop CIS entries for valid configuration */ 178/* loop CIS entries for valid configuration */
175int pcmcia_loop_config(struct pcmcia_device *p_dev, 179int pcmcia_loop_config(struct pcmcia_device *p_dev,
176 int (*conf_check) (struct pcmcia_device *p_dev, 180 int (*conf_check) (struct pcmcia_device *p_dev,
177 cistpl_cftable_entry_t *cf,
178 cistpl_cftable_entry_t *dflt,
179 unsigned int vcc,
180 void *priv_data), 181 void *priv_data),
181 void *priv_data); 182 void *priv_data);
182 183
@@ -206,16 +207,17 @@ pcmcia_request_exclusive_irq(struct pcmcia_device *p_dev,
206int __must_check pcmcia_request_irq(struct pcmcia_device *p_dev, 207int __must_check pcmcia_request_irq(struct pcmcia_device *p_dev,
207 irq_handler_t handler); 208 irq_handler_t handler);
208 209
209int pcmcia_request_configuration(struct pcmcia_device *p_dev, 210int pcmcia_enable_device(struct pcmcia_device *p_dev);
210 config_req_t *req);
211 211
212int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, 212int pcmcia_request_window(struct pcmcia_device *p_dev, struct resource *res,
213 window_handle_t *wh); 213 unsigned int speed);
214int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); 214int pcmcia_release_window(struct pcmcia_device *p_dev, struct resource *res);
215int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, 215int pcmcia_map_mem_page(struct pcmcia_device *p_dev, struct resource *res,
216 unsigned int offset); 216 unsigned int offset);
217 217
218int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod); 218int pcmcia_fixup_vpp(struct pcmcia_device *p_dev, unsigned char new_vpp);
219int pcmcia_fixup_iowidth(struct pcmcia_device *p_dev);
220
219void pcmcia_disable_device(struct pcmcia_device *p_dev); 221void pcmcia_disable_device(struct pcmcia_device *p_dev);
220 222
221/* IO ports */ 223/* IO ports */
@@ -224,15 +226,46 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev);
224#define IO_DATA_PATH_WIDTH_16 0x08 226#define IO_DATA_PATH_WIDTH_16 0x08
225#define IO_DATA_PATH_WIDTH_AUTO 0x10 227#define IO_DATA_PATH_WIDTH_AUTO 0x10
226 228
227/* convert flag found in cfgtable to data path width parameter */ 229/* IO memory */
228static inline int pcmcia_io_cfg_data_width(unsigned int flags) 230#define WIN_MEMORY_TYPE_CM 0x00 /* default */
229{ 231#define WIN_MEMORY_TYPE_AM 0x20 /* MAP_ATTRIB */
230 if (!(flags & CISTPL_IO_8BIT)) 232#define WIN_DATA_WIDTH_8 0x00 /* default */
231 return IO_DATA_PATH_WIDTH_16; 233#define WIN_DATA_WIDTH_16 0x02 /* MAP_16BIT */
232 if (!(flags & CISTPL_IO_16BIT)) 234#define WIN_ENABLE 0x01 /* MAP_ACTIVE */
233 return IO_DATA_PATH_WIDTH_8; 235#define WIN_USE_WAIT 0x40 /* MAP_USE_WAIT */
234 return IO_DATA_PATH_WIDTH_AUTO; 236
235} 237#define WIN_FLAGS_MAP 0x63 /* MAP_ATTRIB | MAP_16BIT | MAP_ACTIVE |
238 MAP_USE_WAIT */
239#define WIN_FLAGS_REQ 0x1c /* mapping to socket->win[i]:
240 0x04 -> 0
241 0x08 -> 1
242 0x0c -> 2
243 0x10 -> 3 */
244
245/* config_reg{ister}s present for this PCMCIA device */
246#define PRESENT_OPTION 0x001
247#define PRESENT_STATUS 0x002
248#define PRESENT_PIN_REPLACE 0x004
249#define PRESENT_COPY 0x008
250#define PRESENT_EXT_STATUS 0x010
251#define PRESENT_IOBASE_0 0x020
252#define PRESENT_IOBASE_1 0x040
253#define PRESENT_IOBASE_2 0x080
254#define PRESENT_IOBASE_3 0x100
255#define PRESENT_IOSIZE 0x200
256
257/* flags to be passed to pcmcia_enable_device() */
258#define CONF_ENABLE_IRQ 0x0001
259#define CONF_ENABLE_SPKR 0x0002
260#define CONF_ENABLE_PULSE_IRQ 0x0004
261#define CONF_ENABLE_ESR 0x0008
262
263/* flags used by pcmcia_loop_config() autoconfiguration */
264#define CONF_AUTO_CHECK_VCC 0x0100 /* check for matching Vcc? */
265#define CONF_AUTO_SET_VPP 0x0200 /* set Vpp? */
266#define CONF_AUTO_AUDIO 0x0400 /* enable audio line? */
267#define CONF_AUTO_SET_IO 0x0800 /* set ->resource[0,1] */
268#define CONF_AUTO_SET_IOMEM 0x1000 /* set ->resource[2] */
236 269
237#endif /* __KERNEL__ */ 270#endif /* __KERNEL__ */
238 271
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 626b63c33d9e..731cde010f42 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -19,7 +19,6 @@
19#include <linux/sched.h> /* task_struct, completion */ 19#include <linux/sched.h> /* task_struct, completion */
20#include <linux/mutex.h> 20#include <linux/mutex.h>
21 21
22#include <pcmcia/cs.h>
23#ifdef CONFIG_CARDBUS 22#ifdef CONFIG_CARDBUS
24#include <linux/pci.h> 23#include <linux/pci.h>
25#endif 24#endif
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 0e4cfb694fe7..6fa7cbab7d93 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -5,7 +5,9 @@
5#define _TRACE_IRQ_H 5#define _TRACE_IRQ_H
6 6
7#include <linux/tracepoint.h> 7#include <linux/tracepoint.h>
8#include <linux/interrupt.h> 8
9struct irqaction;
10struct softirq_action;
9 11
10#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } 12#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
11#define show_softirq_name(val) \ 13#define show_softirq_name(val) \
@@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq,
93 ), 95 ),
94 96
95 TP_fast_assign( 97 TP_fast_assign(
96 __entry->vec = (int)(h - vec); 98 if (vec)
99 __entry->vec = (int)(h - vec);
100 else
101 __entry->vec = (int)(long)h;
97 ), 102 ),
98 103
99 TP_printk("vec=%d [action=%s]", __entry->vec, 104 TP_printk("vec=%d [action=%s]", __entry->vec,
@@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit,
136 TP_ARGS(h, vec) 141 TP_ARGS(h, vec)
137); 142);
138 143
144/**
145 * softirq_raise - called immediately when a softirq is raised
146 * @h: pointer to struct softirq_action
147 * @vec: pointer to first struct softirq_action in softirq_vec array
148 *
149 * The @h parameter contains a pointer to the softirq vector number which is
150 * raised. @vec is NULL and it means @h includes vector number not
151 * softirq_action. When used in combination with the softirq_entry tracepoint
152 * we can determine the softirq raise latency.
153 */
154DEFINE_EVENT(softirq, softirq_raise,
155
156 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
157
158 TP_ARGS(h, vec)
159);
160
139#endif /* _TRACE_IRQ_H */ 161#endif /* _TRACE_IRQ_H */
140 162
141/* This part must be outside protection */ 163/* This part must be outside protection */
diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index 188deca2f3c7..8fe1e93f531d 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -6,10 +6,31 @@
6 6
7#include <linux/netdevice.h> 7#include <linux/netdevice.h>
8#include <linux/tracepoint.h> 8#include <linux/tracepoint.h>
9#include <linux/ftrace.h>
10
11#define NO_DEV "(no_device)"
12
13TRACE_EVENT(napi_poll,
9 14
10DECLARE_TRACE(napi_poll,
11 TP_PROTO(struct napi_struct *napi), 15 TP_PROTO(struct napi_struct *napi),
12 TP_ARGS(napi)); 16
17 TP_ARGS(napi),
18
19 TP_STRUCT__entry(
20 __field( struct napi_struct *, napi)
21 __string( dev_name, napi->dev ? napi->dev->name : NO_DEV)
22 ),
23
24 TP_fast_assign(
25 __entry->napi = napi;
26 __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
27 ),
28
29 TP_printk("napi poll on napi struct %p for device %s",
30 __entry->napi, __get_str(dev_name))
31);
32
33#undef NO_DEV
13 34
14#endif /* _TRACE_NAPI_H_ */ 35#endif /* _TRACE_NAPI_H_ */
15 36
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
new file mode 100644
index 000000000000..5f247f5ffc56
--- /dev/null
+++ b/include/trace/events/net.h
@@ -0,0 +1,82 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM net
3
4#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_NET_H
6
7#include <linux/skbuff.h>
8#include <linux/netdevice.h>
9#include <linux/ip.h>
10#include <linux/tracepoint.h>
11
12TRACE_EVENT(net_dev_xmit,
13
14 TP_PROTO(struct sk_buff *skb,
15 int rc),
16
17 TP_ARGS(skb, rc),
18
19 TP_STRUCT__entry(
20 __field( void *, skbaddr )
21 __field( unsigned int, len )
22 __field( int, rc )
23 __string( name, skb->dev->name )
24 ),
25
26 TP_fast_assign(
27 __entry->skbaddr = skb;
28 __entry->len = skb->len;
29 __entry->rc = rc;
30 __assign_str(name, skb->dev->name);
31 ),
32
33 TP_printk("dev=%s skbaddr=%p len=%u rc=%d",
34 __get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
35);
36
37DECLARE_EVENT_CLASS(net_dev_template,
38
39 TP_PROTO(struct sk_buff *skb),
40
41 TP_ARGS(skb),
42
43 TP_STRUCT__entry(
44 __field( void *, skbaddr )
45 __field( unsigned int, len )
46 __string( name, skb->dev->name )
47 ),
48
49 TP_fast_assign(
50 __entry->skbaddr = skb;
51 __entry->len = skb->len;
52 __assign_str(name, skb->dev->name);
53 ),
54
55 TP_printk("dev=%s skbaddr=%p len=%u",
56 __get_str(name), __entry->skbaddr, __entry->len)
57)
58
59DEFINE_EVENT(net_dev_template, net_dev_queue,
60
61 TP_PROTO(struct sk_buff *skb),
62
63 TP_ARGS(skb)
64);
65
66DEFINE_EVENT(net_dev_template, netif_receive_skb,
67
68 TP_PROTO(struct sk_buff *skb),
69
70 TP_ARGS(skb)
71);
72
73DEFINE_EVENT(net_dev_template, netif_rx,
74
75 TP_PROTO(struct sk_buff *skb),
76
77 TP_ARGS(skb)
78);
79#endif /* _TRACE_NET_H */
80
81/* This part must be outside protection */
82#include <trace/define_trace.h>
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 35a2a6e7bf1e..286784d69b8f 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -10,12 +10,17 @@
10#ifndef _TRACE_POWER_ENUM_ 10#ifndef _TRACE_POWER_ENUM_
11#define _TRACE_POWER_ENUM_ 11#define _TRACE_POWER_ENUM_
12enum { 12enum {
13 POWER_NONE = 0, 13 POWER_NONE = 0,
14 POWER_CSTATE = 1, 14 POWER_CSTATE = 1, /* C-State */
15 POWER_PSTATE = 2, 15 POWER_PSTATE = 2, /* Fequency change or DVFS */
16 POWER_SSTATE = 3, /* Suspend */
16}; 17};
17#endif 18#endif
18 19
20/*
21 * The power events are used for cpuidle & suspend (power_start, power_end)
22 * and for cpufreq (power_frequency)
23 */
19DECLARE_EVENT_CLASS(power, 24DECLARE_EVENT_CLASS(power,
20 25
21 TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), 26 TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
@@ -70,6 +75,85 @@ TRACE_EVENT(power_end,
70 75
71); 76);
72 77
78/*
79 * The clock events are used for clock enable/disable and for
80 * clock rate change
81 */
82DECLARE_EVENT_CLASS(clock,
83
84 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
85
86 TP_ARGS(name, state, cpu_id),
87
88 TP_STRUCT__entry(
89 __string( name, name )
90 __field( u64, state )
91 __field( u64, cpu_id )
92 ),
93
94 TP_fast_assign(
95 __assign_str(name, name);
96 __entry->state = state;
97 __entry->cpu_id = cpu_id;
98 ),
99
100 TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
101 (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
102);
103
104DEFINE_EVENT(clock, clock_enable,
105
106 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
107
108 TP_ARGS(name, state, cpu_id)
109);
110
111DEFINE_EVENT(clock, clock_disable,
112
113 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
114
115 TP_ARGS(name, state, cpu_id)
116);
117
118DEFINE_EVENT(clock, clock_set_rate,
119
120 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
121
122 TP_ARGS(name, state, cpu_id)
123);
124
125/*
126 * The power domain events are used for power domains transitions
127 */
128DECLARE_EVENT_CLASS(power_domain,
129
130 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
131
132 TP_ARGS(name, state, cpu_id),
133
134 TP_STRUCT__entry(
135 __string( name, name )
136 __field( u64, state )
137 __field( u64, cpu_id )
138 ),
139
140 TP_fast_assign(
141 __assign_str(name, name);
142 __entry->state = state;
143 __entry->cpu_id = cpu_id;
144),
145
146 TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
147 (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
148);
149
150DEFINE_EVENT(power_domain, power_domain_target,
151
152 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
153
154 TP_ARGS(name, state, cpu_id)
155);
156
73#endif /* _TRACE_POWER_H */ 157#endif /* _TRACE_POWER_H */
74 158
75/* This part must be outside protection */ 159/* This part must be outside protection */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9208c92aeab5..f6334782a593 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -362,6 +362,35 @@ TRACE_EVENT(sched_stat_runtime,
362 (unsigned long long)__entry->vruntime) 362 (unsigned long long)__entry->vruntime)
363); 363);
364 364
365/*
366 * Tracepoint for showing priority inheritance modifying a tasks
367 * priority.
368 */
369TRACE_EVENT(sched_pi_setprio,
370
371 TP_PROTO(struct task_struct *tsk, int newprio),
372
373 TP_ARGS(tsk, newprio),
374
375 TP_STRUCT__entry(
376 __array( char, comm, TASK_COMM_LEN )
377 __field( pid_t, pid )
378 __field( int, oldprio )
379 __field( int, newprio )
380 ),
381
382 TP_fast_assign(
383 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
384 __entry->pid = tsk->pid;
385 __entry->oldprio = tsk->prio;
386 __entry->newprio = newprio;
387 ),
388
389 TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
390 __entry->comm, __entry->pid,
391 __entry->oldprio, __entry->newprio)
392);
393
365#endif /* _TRACE_SCHED_H */ 394#endif /* _TRACE_SCHED_H */
366 395
367/* This part must be outside protection */ 396/* This part must be outside protection */
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 4b2be6dc76f0..75ce9d500d8e 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb,
35 __entry->skbaddr, __entry->protocol, __entry->location) 35 __entry->skbaddr, __entry->protocol, __entry->location)
36); 36);
37 37
38TRACE_EVENT(consume_skb,
39
40 TP_PROTO(struct sk_buff *skb),
41
42 TP_ARGS(skb),
43
44 TP_STRUCT__entry(
45 __field( void *, skbaddr )
46 ),
47
48 TP_fast_assign(
49 __entry->skbaddr = skb;
50 ),
51
52 TP_printk("skbaddr=%p", __entry->skbaddr)
53);
54
38TRACE_EVENT(skb_copy_datagram_iovec, 55TRACE_EVENT(skb_copy_datagram_iovec,
39 56
40 TP_PROTO(const struct sk_buff *skb, int len), 57 TP_PROTO(const struct sk_buff *skb, int len),