From 63cc8c75156462d4b42cbdd76c293b7eee7ddbfe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 May 2008 15:44:40 +0200 Subject: percpu: introduce DEFINE_PER_CPU_PAGE_ALIGNED() macro While examining holes in percpu section I found this : c05f5000 D per_cpu__current_task c05f5000 D __per_cpu_start c05f5004 D per_cpu__cpu_number c05f5008 D per_cpu__irq_regs c05f500c d per_cpu__cpu_devices c05f5040 D per_cpu__cyc2ns c05f6000 d per_cpu__cpuid4_info c05f6004 d per_cpu__cache_kobject c05f6008 d per_cpu__index_kobject c05f7000 D per_cpu__gdt_page This is because gdt_page is a percpu variable, defined with a page alignement, and linker is doing its job, two times because of .o nesting in the build process. I introduced a new macro DEFINE_PER_CPU_PAGE_ALIGNED() to avoid wasting this space. All page aligned variables (only one at this time) are put in a separate subsection .data.percpu.page_aligned, at the very begining of percpu zone. Before patch , on a x86_32 machine : .data.percpu 30232 3227471872 .data.percpu 22168 3227471872 Thats 8064 bytes saved for each CPU. Signed-off-by: Eric Dumazet Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/percpu.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4cdd393e71e1..2edacc8e6b8b 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -23,12 +23,19 @@ __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ ____cacheline_aligned_in_smp + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.page_aligned"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #else #define DEFINE_PER_CPU(type, name) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ DEFINE_PER_CPU(type, name) + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) #endif #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -- cgit v1.2.2 From 1886e8a90a580f3ad343f2065c84c1b9e1dac9ef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:37 -0700 Subject: x64, x2apic/intr-remap: code re-structuring, to be used by both DMA and Interrupt remapping Allocate the iommu during the parse of DMA remapping hardware definition structures. And also, introduce routines for device scope initialization which will be explicitly called during dma-remapping initialization. These will be used for enabling interrupt remapping separately from the existing DMA-remapping enabling sequence. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 56c73b847551..3ab07e425583 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -46,12 +46,14 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ struct pci_dev **devices; /* target device array */ int devices_cnt; /* target device count */ @@ -62,6 +64,7 @@ struct dmar_drhd_unit { struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 base_address; /* reserved base address*/ u64 end_address; /* reserved end address */ struct pci_dev **devices; /* target devices */ @@ -72,6 +75,8 @@ struct dmar_rmrr_unit { list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) + +extern int alloc_iommu(struct dmar_drhd_unit *); #else static inline void detect_intel_iommu(void) { @@ -81,6 +86,9 @@ static inline int intel_iommu_init(void) { return -ENODEV; } - +static inline int dmar_table_init(void) +{ + return -ENODEV; +} #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- cgit v1.2.2 From ad3ad3f6a2caebf56869b83b69e23eb9fa5e0ab6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:40 -0700 Subject: x64, x2apic/intr-remap: parse ioapic scope under vt-d structures Parse the vt-d device scope structures to find the mapping between IO-APICs and the interrupt remapping hardware units. This will be used later for enabling Interrupt-remapping for IOAPIC devices. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 3ab07e425583..c4e96eb29617 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -47,6 +47,7 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); extern int dmar_dev_scope_init(void); +extern int parse_ioapics_under_ir(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; -- cgit v1.2.2 From 2ae21010694e56461a63bfc80e960090ce0a5ed9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:43 -0700 Subject: x64, x2apic/intr-remap: Interrupt remapping infrastructure Interrupt remapping (part of Intel Virtualization Tech for directed I/O) infrastructure. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 120 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 85 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index c4e96eb29617..8a0238dd2c11 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -25,9 +25,85 @@ #include #include -#ifdef CONFIG_DMAR +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; +struct dmar_drhd_unit { + struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + u64 reg_base_addr; /* register base address*/ + struct pci_dev **devices; /* target device array */ + int devices_cnt; /* target device count */ + u8 ignored:1; /* ignore drhd */ + u8 include_all:1; + struct intel_iommu *iommu; +}; + +extern struct list_head dmar_drhd_units; + +#define for_each_drhd_unit(drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) + +extern int dmar_table_init(void); +extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); + +/* Intel IOMMU detection */ +extern void detect_intel_iommu(void); + + +extern int parse_ioapics_under_ir(void); +extern int alloc_iommu(struct dmar_drhd_unit *); +#else +static inline void detect_intel_iommu(void) +{ + return; +} + +static inline int dmar_table_init(void) +{ + return -ENODEV; +} +#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ + +#ifdef CONFIG_INTR_REMAP +extern int intr_remapping_enabled; +extern int enable_intr_remapping(int); + +struct irte { + union { + struct { + __u64 present : 1, + fpd : 1, + dst_mode : 1, + redir_hint : 1, + trigger_mode : 1, + dlvry_mode : 3, + avail : 4, + __reserved_1 : 4, + vector : 8, + __reserved_2 : 8, + dest_id : 32; + }; + __u64 low; + }; + + union { + struct { + __u64 sid : 16, + sq : 2, + svt : 2, + __reserved_3 : 44; + }; + __u64 high; + }; +}; +#else +#define enable_intr_remapping(mode) (-1) +#define intr_remapping_enabled (0) +#endif + +#ifdef CONFIG_DMAR extern const char *dmar_get_fault_reason(u8 fault_reason); /* Can't use the common MSI interrupt functions @@ -40,29 +116,8 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern int arch_setup_dmar_msi(unsigned int irq); -/* Intel IOMMU detection and initialization functions */ -extern void detect_intel_iommu(void); -extern int intel_iommu_init(void); - -extern int dmar_table_init(void); -extern int early_dmar_detect(void); -extern int dmar_dev_scope_init(void); -extern int parse_ioapics_under_ir(void); - -extern struct list_head dmar_drhd_units; +extern int iommu_detected, no_iommu; extern struct list_head dmar_rmrr_units; - -struct dmar_drhd_unit { - struct list_head list; /* list of drhd units */ - struct acpi_dmar_header *hdr; /* ACPI header */ - u64 reg_base_addr; /* register base address*/ - struct pci_dev **devices; /* target device array */ - int devices_cnt; /* target device count */ - u8 ignored:1; /* ignore drhd */ - u8 include_all:1; - struct intel_iommu *iommu; -}; - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -72,24 +127,19 @@ struct dmar_rmrr_unit { int devices_cnt; /* target device count */ }; -#define for_each_drhd_unit(drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) - -extern int alloc_iommu(struct dmar_drhd_unit *); +/* Intel DMAR initialization functions */ +extern int intel_iommu_init(void); +extern int dmar_disabled; #else -static inline void detect_intel_iommu(void) -{ - return; -} static inline int intel_iommu_init(void) { +#ifdef CONFIG_INTR_REMAP + return dmar_dev_scope_init(); +#else return -ENODEV; -} -static inline int dmar_table_init(void) -{ - return -ENODEV; +#endif } #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- cgit v1.2.2 From b6fcb33ad6c05f152a672f7c96c1fab006527b80 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:44 -0700 Subject: x64, x2apic/intr-remap: routines managing Interrupt remapping table entries. Routines handling the management of interrupt remapping table entries. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8a0238dd2c11..324bbca85a26 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -98,7 +98,19 @@ struct irte { __u64 high; }; }; +extern int get_irte(int irq, struct irte *entry); +extern int modify_irte(int irq, struct irte *irte_modified); +extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); +extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, + u16 sub_handle); +extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); +extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index); +extern int flush_irte(int irq); +extern int free_irte(int irq); + +extern int irq_remapped(int irq); #else +#define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define intr_remapping_enabled (0) #endif -- cgit v1.2.2 From 72b1e22dfcad1daca6906148fd956ffe404bb0bc Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:45 -0700 Subject: x64, x2apic/intr-remap: generic irq migration support from process context Generic infrastructure for migrating the irq from the process context in the presence of CONFIG_GENERIC_PENDING_IRQ. This will be used later for migrating irq in the presence of interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 552e0ec269c9..c211984b55e5 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ #define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ +#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) -- cgit v1.2.2 From 89027d35aa5b8f45ce0f7fa0911db85b46563da0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:56 -0700 Subject: x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping IO-APIC support in the presence of interrupt-remapping infrastructure. IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, trigger mode etc, which traditionally was present in the IO-APIC RTE. Introduce a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this cleanly). For edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flush the hardware cache. For level triggered, we need to modify the io-apic RTE aswell with the update vector information, along with modifying IRTE with vector and cpu destination. So irq migration for level triggered is little bit more complex compared to edge triggered migration. But the good news is, we use the same algorithm for level triggered migration as we have today, only difference being, we now initiate the irq migration from process context instead of the interrupt context. In future, when we do a directed EOI (combined with cpu EOI broadcast suppression) to the IO-APIC, level triggered irq migration will also be as simple as edge triggered migration and we can do the irq migration with a simple atomic update to IO-APIC RTE. TBD: some tests/changes needed in the presence of fixup_irqs() for level triggered irq migration. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 324bbca85a26..bf41ffa74705 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) -- cgit v1.2.2 From 75c46fa61bc5b4ccd20a168ff325c58771248fcd Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:57 -0700 Subject: x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure MSI and MSI-X support for interrupt remapping infrastructure. MSI address register will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, etc. For MSI-X, all the IRTE's will be consecutively allocated in the table, and the address registers will contain the starting index to the block and the data register will contain the subindex with in that block. This also introduces a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this). As MSI is edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flushing the hardware cache. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index bf41ffa74705..c360c558e59e 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) -- cgit v1.2.2 From 3cac97cbb14aed00d83eb33d4613b0fe3aaea863 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 6 Jul 2008 17:23:55 +0800 Subject: rcu classic: simplify the next pending batch use a batch number(rcp->pending) instead of a flag(rcp->next_pending) rcu_start_batch() need to change this flag, so mb()s is needed for memory-access safe. but(after this patch applied) rcu_start_batch() do not change this batch number(rcp->pending), rcp->pending is managed by __rcu_process_callbacks only, and troublesome mb()s are eliminated. And codes look simpler and clearer. Signed-off-by: Lai Jiangshan Cc: "Paul E. McKenney" Cc: Dipankar Sarma Cc: Gautham Shenoy Cc: Dhaval Giani Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 8c774905dcfe..c847e59c6006 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -45,7 +45,7 @@ struct rcu_ctrlblk { long cur; /* Current batch number. */ long completed; /* Number of the last completed batch */ - int next_pending; /* Is the next batch already waiting? */ + long pending; /* Number of the last pending batch */ int signaled; -- cgit v1.2.2 From 5127bed588a2f8f3a1f732de2a8a190b7df5dce3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 6 Jul 2008 17:23:59 +0800 Subject: rcu classic: new algorithm for callbacks-processing(v2) This is v2, it's a little deference from v1 that I had send to lkml. use ACCESS_ONCE use rcu_batch_after/rcu_batch_before for batch # comparison. rcutorture test result: (hotplugs: do cpu-online/offline once per second) No CONFIG_NO_HZ: OK, 12hours No CONFIG_NO_HZ, hotplugs: OK, 12hours CONFIG_NO_HZ=y: OK, 24hours CONFIG_NO_HZ=y, hotplugs: Failed. (Failed also without my patch applied, exactly the same bug occurred, http://lkml.org/lkml/2008/7/3/24) v1's email thread: http://lkml.org/lkml/2008/6/2/539 v1's description: The code/algorithm of the implement of current callbacks-processing is very efficient and technical. But when I studied it and I found a disadvantage: In multi-CPU systems, when a new RCU callback is being queued(call_rcu[_bh]), this callback will be invoked after the grace period for the batch with batch number = rcp->cur+2 has completed very very likely in current implement. Actually, this callback can be invoked after the grace period for the batch with batch number = rcp->cur+1 has completed. The delay of invocation means that latency of synchronize_rcu() is extended. But more important thing is that the callbacks usually free memory, and these works are delayed too! it's necessary for reclaimer to free memory as soon as possible when left memory is few. A very simple way can solve this problem: a field(struct rcu_head::batch) is added to record the batch number for the RCU callback. And when a new RCU callback is being queued, we determine the batch number for this callback(head->batch = rcp->cur+1) and we move this callback to rdp->donelist if we find that head->batch <= rcp->completed when we process callbacks. This simple way reduces the wait time for invocation a lot. (about 2.5Grace Period -> 1.5Grace Period in average in multi-CPU systems) This is my algorithm. But I do not add any field for struct rcu_head in my implement. We just need to memorize the last 2 batches and their batch number, because these 2 batches include all entries that for whom the grace period hasn't completed. So we use a special linked-list rather than add a field. Please see the comment of struct rcu_data. Signed-off-by: Lai Jiangshan Cc: "Paul E. McKenney" Cc: Dipankar Sarma Cc: Gautham Shenoy Cc: Dhaval Giani Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index c847e59c6006..04c728147be0 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -66,11 +66,7 @@ static inline int rcu_batch_after(long a, long b) return (a - b) > 0; } -/* - * Per-CPU data for Read-Copy UPdate. - * nxtlist - new callbacks are added here - * curlist - current batch for which quiescent cycle started if any - */ +/* Per-CPU data for Read-Copy UPdate. */ struct rcu_data { /* 1) quiescent state handling : */ long quiescbatch; /* Batch # for grace period */ @@ -78,12 +74,24 @@ struct rcu_data { int qs_pending; /* core waits for quiesc state */ /* 2) batch handling */ - long batch; /* Batch # for current RCU batch */ + /* + * if nxtlist is not NULL, then: + * batch: + * The batch # for the last entry of nxtlist + * [*nxttail[1], NULL = *nxttail[2]): + * Entries that batch # <= batch + * [*nxttail[0], *nxttail[1]): + * Entries that batch # <= batch - 1 + * [nxtlist, *nxttail[0]): + * Entries that batch # <= batch - 2 + * The grace period for these entries has completed, and + * the other grace-period-completed entries may be moved + * here temporarily in rcu_process_callbacks(). + */ + long batch; struct rcu_head *nxtlist; - struct rcu_head **nxttail; + struct rcu_head **nxttail[3]; long qlen; /* # of queued callbacks */ - struct rcu_head *curlist; - struct rcu_head **curtail; struct rcu_head *donelist; struct rcu_head **donetail; long blimit; /* Upper limit on a processed batch */ -- cgit v1.2.2 From 67182ae1c42206e516f7efb292b745e826497b24 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 10 Aug 2008 18:35:38 -0700 Subject: rcu, debug: detect stalled grace periods this is a diagnostic patch for Classic RCU. The approach is to record a timestamp at the beginning of the grace period (in rcu_start_batch()), then have rcu_check_callbacks() complain if: 1. it is running on a CPU that has holding up grace periods for a long time (say one second). This will identify the culprit assuming that the culprit has not disabled hardware irqs, instruction execution, or some such. 2. it is running on a CPU that is not holding up grace periods, but grace periods have been held up for an even longer time (say two seconds). It is enabled via the default-off CONFIG_DEBUG_RCU_STALL kernel parameter. Rather than exponential backoff, it backs off to once per 30 seconds. My feeling upon thinking on it was that if you have stalled RCU grace periods for that long, a few extra printk() messages are probably the least of your worries... Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Yinghai Lu Cc: David Witbrodt Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 04c728147be0..16589958b40e 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -46,6 +46,9 @@ struct rcu_ctrlblk { long cur; /* Current batch number. */ long completed; /* Number of the last completed batch */ long pending; /* Number of the last pending batch */ +#ifdef CONFIG_DEBUG_RCU_STALL + unsigned long gp_check; /* Time grace period should end, in seconds. */ +#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */ int signaled; -- cgit v1.2.2 From c1bc667e844c2677cdf927102ab384fe7b033762 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Thu, 7 Aug 2008 16:43:38 +0200 Subject: IPVS: Add genetlink interface definitions to ip_vs.h Add IPVS Generic Netlink interface definitions to include/linux/ip_vs.h. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/linux/ip_vs.h | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index ec6eb49af2d8..0f434a28fb58 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -242,4 +242,164 @@ struct ip_vs_daemon_user { int syncid; }; +/* + * + * IPVS Generic Netlink interface definitions + * + */ + +/* Generic Netlink family info */ + +#define IPVS_GENL_NAME "IPVS" +#define IPVS_GENL_VERSION 0x1 + +struct ip_vs_flags { + __be32 flags; + __be32 mask; +}; + +/* Generic Netlink command attributes */ +enum { + IPVS_CMD_UNSPEC = 0, + + IPVS_CMD_NEW_SERVICE, /* add service */ + IPVS_CMD_SET_SERVICE, /* modify service */ + IPVS_CMD_DEL_SERVICE, /* delete service */ + IPVS_CMD_GET_SERVICE, /* get service info */ + + IPVS_CMD_NEW_DEST, /* add destination */ + IPVS_CMD_SET_DEST, /* modify destination */ + IPVS_CMD_DEL_DEST, /* delete destination */ + IPVS_CMD_GET_DEST, /* get destination info */ + + IPVS_CMD_NEW_DAEMON, /* start sync daemon */ + IPVS_CMD_DEL_DAEMON, /* stop sync daemon */ + IPVS_CMD_GET_DAEMON, /* get sync daemon status */ + + IPVS_CMD_SET_CONFIG, /* set config settings */ + IPVS_CMD_GET_CONFIG, /* get config settings */ + + IPVS_CMD_SET_INFO, /* only used in GET_INFO reply */ + IPVS_CMD_GET_INFO, /* get general IPVS info */ + + IPVS_CMD_ZERO, /* zero all counters and stats */ + IPVS_CMD_FLUSH, /* flush services and dests */ + + __IPVS_CMD_MAX, +}; + +#define IPVS_CMD_MAX (__IPVS_CMD_MAX - 1) + +/* Attributes used in the first level of commands */ +enum { + IPVS_CMD_ATTR_UNSPEC = 0, + IPVS_CMD_ATTR_SERVICE, /* nested service attribute */ + IPVS_CMD_ATTR_DEST, /* nested destination attribute */ + IPVS_CMD_ATTR_DAEMON, /* nested sync daemon attribute */ + IPVS_CMD_ATTR_TIMEOUT_TCP, /* TCP connection timeout */ + IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, /* TCP FIN wait timeout */ + IPVS_CMD_ATTR_TIMEOUT_UDP, /* UDP timeout */ + __IPVS_CMD_ATTR_MAX, +}; + +#define IPVS_CMD_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1) + +/* + * Attributes used to describe a service + * + * Used inside nested attribute IPVS_CMD_ATTR_SERVICE + */ +enum { + IPVS_SVC_ATTR_UNSPEC = 0, + IPVS_SVC_ATTR_AF, /* address family */ + IPVS_SVC_ATTR_PROTOCOL, /* virtual service protocol */ + IPVS_SVC_ATTR_ADDR, /* virtual service address */ + IPVS_SVC_ATTR_PORT, /* virtual service port */ + IPVS_SVC_ATTR_FWMARK, /* firewall mark of service */ + + IPVS_SVC_ATTR_SCHED_NAME, /* name of scheduler */ + IPVS_SVC_ATTR_FLAGS, /* virtual service flags */ + IPVS_SVC_ATTR_TIMEOUT, /* persistent timeout */ + IPVS_SVC_ATTR_NETMASK, /* persistent netmask */ + + IPVS_SVC_ATTR_STATS, /* nested attribute for service stats */ + __IPVS_SVC_ATTR_MAX, +}; + +#define IPVS_SVC_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1) + +/* + * Attributes used to describe a destination (real server) + * + * Used inside nested attribute IPVS_CMD_ATTR_DEST + */ +enum { + IPVS_DEST_ATTR_UNSPEC = 0, + IPVS_DEST_ATTR_ADDR, /* real server address */ + IPVS_DEST_ATTR_PORT, /* real server port */ + + IPVS_DEST_ATTR_FWD_METHOD, /* forwarding method */ + IPVS_DEST_ATTR_WEIGHT, /* destination weight */ + + IPVS_DEST_ATTR_U_THRESH, /* upper threshold */ + IPVS_DEST_ATTR_L_THRESH, /* lower threshold */ + + IPVS_DEST_ATTR_ACTIVE_CONNS, /* active connections */ + IPVS_DEST_ATTR_INACT_CONNS, /* inactive connections */ + IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */ + + IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */ + __IPVS_DEST_ATTR_MAX, +}; + +#define IPVS_DEST_ATTR_MAX (__IPVS_DEST_ATTR_MAX - 1) + +/* + * Attributes describing a sync daemon + * + * Used inside nested attribute IPVS_CMD_ATTR_DAEMON + */ +enum { + IPVS_DAEMON_ATTR_UNSPEC = 0, + IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */ + IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */ + IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */ + __IPVS_DAEMON_ATTR_MAX, +}; + +#define IPVS_DAEMON_ATTR_MAX (__IPVS_DAEMON_ATTR_MAX - 1) + +/* + * Attributes used to describe service or destination entry statistics + * + * Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS + */ +enum { + IPVS_STATS_ATTR_UNSPEC = 0, + IPVS_STATS_ATTR_CONNS, /* connections scheduled */ + IPVS_STATS_ATTR_INPKTS, /* incoming packets */ + IPVS_STATS_ATTR_OUTPKTS, /* outgoing packets */ + IPVS_STATS_ATTR_INBYTES, /* incoming bytes */ + IPVS_STATS_ATTR_OUTBYTES, /* outgoing bytes */ + + IPVS_STATS_ATTR_CPS, /* current connection rate */ + IPVS_STATS_ATTR_INPPS, /* current in packet rate */ + IPVS_STATS_ATTR_OUTPPS, /* current out packet rate */ + IPVS_STATS_ATTR_INBPS, /* current in byte rate */ + IPVS_STATS_ATTR_OUTBPS, /* current out byte rate */ + __IPVS_STATS_ATTR_MAX, +}; + +#define IPVS_STATS_ATTR_MAX (__IPVS_STATS_ATTR_MAX - 1) + +/* Attributes used in response to IPVS_CMD_GET_INFO command */ +enum { + IPVS_INFO_ATTR_UNSPEC = 0, + IPVS_INFO_ATTR_VERSION, /* IPVS version number */ + IPVS_INFO_ATTR_CONN_TAB_SIZE, /* size of connection hash table */ + __IPVS_INFO_ATTR_MAX, +}; + +#define IPVS_INFO_ATTR_MAX (__IPVS_INFO_ATTR_MAX - 1) + #endif /* _IP_VS_H */ -- cgit v1.2.2 From ff9cf2ce7afe76435d66c898cc9dacaa68e79d41 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 1 Aug 2008 14:10:02 -0700 Subject: rcu: fixes to include/linux/rcupreempt.h Hello! Compared tip/core/rcu to my latest patchset, and found the following issues: o the memory barrier in rcu_exit_nohz() somehow got out of place (it is correct in mainline as of 2.6.26-rc7). o There is a duplicate declaration of rcu_dyntick_sched. The attached patch fixes these. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rcupreempt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 0967f03b0705..addb5e282f39 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -111,7 +111,6 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); struct softirq_action; #ifdef CONFIG_NO_HZ -DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); static inline void rcu_enter_nohz(void) { @@ -126,8 +125,8 @@ static inline void rcu_exit_nohz(void) { static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ __get_cpu_var(rcu_dyntick_sched).dynticks++; + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1), &rs); } -- cgit v1.2.2 From 34d7c2b38d124219b7034356716e3455c439acd3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 1 Aug 2008 14:11:05 -0700 Subject: rcu: remove list_for_each_rcu() All of the in-tree uses of list_for_each_rcu() have been converted to list_for_each_entry_rcu(), so list_for_each_rcu() can now be removed. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rculist.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index eb4443c7e05b..e649bd3f2c97 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -198,20 +198,6 @@ static inline void list_splice_init_rcu(struct list_head *list, at->prev = last; } -/** - * list_for_each_rcu - iterate over an rcu-protected list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - prefetch(pos->next), pos != (head); \ - pos = rcu_dereference(pos->next)) - #define __list_for_each_rcu(pos, head) \ for (pos = rcu_dereference((head)->next); \ pos != (head); \ -- cgit v1.2.2 From 07dd20e0324f4d3e33bde1944d4f7771a09c498c Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 1 Aug 2008 13:18:04 +0100 Subject: sched: reorder signal_struct to remove 8 bytes on 64 bit builds reorder structure to remove 8 bytes of padding on 64 bit builds Signed-off-by: Richard Kennedy Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index cfb0d87b99fc..5a8058e44f58 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -451,8 +451,8 @@ struct signal_struct { * - everyone except group_exit_task is stopped during signal delivery * of fatal signals, group_exit_task processes the signal. */ - struct task_struct *group_exit_task; int notify_count; + struct task_struct *group_exit_task; /* thread group stop support, overloads group_exit_code too */ int group_stop_count; -- cgit v1.2.2 From bee367ed066e26c14263d808136fba8eec3bd70a Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 1 Aug 2008 13:24:08 +0100 Subject: sched: reorder struct sched_rt_entity to remove padding on 64 bit builds remove 8 bytes of padding on 64 bit builds (also removes 8 bytes from task_struct) Signed-off-by: Richard Kennedy Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5a8058e44f58..08a87b5f29e1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1010,8 +1010,8 @@ struct sched_entity { struct sched_rt_entity { struct list_head run_list; - unsigned int time_slice; unsigned long timeout; + unsigned int time_slice; int nr_cpus_allowed; struct sched_rt_entity *back; -- cgit v1.2.2 From 3fb669dd6ec11e14819c0114a0e68a9ddcec65e1 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 1 Aug 2008 13:36:28 +0100 Subject: reorder struct prop_local_single to remove padding on 64 bit builds reorder structure to remove 8 bytes of padding on 64 bit builds (also removes 8 bytes from task_struct) Signed-off-by: Richard Kennedy Cc: peterz@infradead.org Signed-off-by: Ingo Molnar --- include/linux/proportions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 5afc1b23346d..cf793bbbd05e 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -104,8 +104,8 @@ struct prop_local_single { * snapshot of the last seen global state * and a lock protecting this state */ - int shift; unsigned long period; + int shift; spinlock_t lock; /* protect the snapshot state */ }; -- cgit v1.2.2 From dd0078f4f04d939950a792c493d7d97d7ce663b8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 30 Jul 2008 14:20:54 -0400 Subject: rcu: just rename call_rcu_bh instead of making it a macro Seems that I found a box that has a config that passes call_rcu_bh as a function pointer (see net/sctp/sm_make_chunk.c), so declaring the call_rcu_bh has a macro function isn't good enough. This patch makes it just another name of call_rcu for rcupreempt. Signed-off-by: Steven Rostedt Reviewed-by: "Paul E. McKenney" Signed-off-by: Ingo Molnar --- include/linux/rcupreempt.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index addb5e282f39..3e05c09b54a2 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -57,7 +57,13 @@ static inline void rcu_qsctr_inc(int cpu) rdssp->sched_qs++; } #define rcu_bh_qsctr_inc(cpu) -#define call_rcu_bh(head, rcu) call_rcu(head, rcu) + +/* + * Someone might want to pass call_rcu_bh as a function pointer. + * So this needs to just be a rename and not a macro function. + * (no parentheses) + */ +#define call_rcu_bh call_rcu /** * call_rcu_sched - Queue RCU callback for invocation after sched grace period. -- cgit v1.2.2 From ded00a56e99555c3f4000ef3eebfd5fe0d574565 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 17 Aug 2008 12:50:36 -0700 Subject: rcu: remove redundant ACCESS_ONCE definition from rcupreempt.c Remove the redundant definition of ACCESS_ONCE() from rcupreempt.c in favor of the one in compiler.h. Also merge the comment header from rcupreempt.c's definition into that in compiler.h. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c8bd2daf95ec..8322141ee480 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -190,7 +190,9 @@ extern void __chk_io_ptr(const volatile void __iomem *); * ACCESS_ONCE() in different C statements. * * This macro does absolutely -nothing- to prevent the CPU from reordering, - * merging, or refetching absolutely anything at any time. + * merging, or refetching absolutely anything at any time. Its main intended + * use is to mediate communication between process-level code and irq/NMI + * handlers, all running on the same CPU. */ #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) -- cgit v1.2.2 From d9105c2b01eedb620cae96073dde4f760367817f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Va=C5=A1ut?= Date: Sun, 3 Aug 2008 21:34:08 +0100 Subject: [ARM] 5184/1: Split ucb1400_ts into core and touchscreen This patch splits ucb1400_ts into ucb1400_ts and ucb1400_core. Since this chip supports more features than only touchscreen, it was necessary to prepare it for feature addition. The previous functionality is preserved by applying this patch. [Build fixes for non-ARM by Stephen Rothwell and Takashi Iwai] Signed-off-by: Marek Vasut Signed-off-by: Russell King --- include/linux/ucb1400.h | 161 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 include/linux/ucb1400.h (limited to 'include/linux') diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h new file mode 100644 index 000000000000..970473bf8d5a --- /dev/null +++ b/include/linux/ucb1400.h @@ -0,0 +1,161 @@ +/* + * Register definitions and functions for: + * Philips UCB1400 driver + * + * Based on ucb1400_ts: + * Author: Nicolas Pitre + * Created: September 25, 2006 + * Copyright: MontaVista Software, Inc. + * + * Spliting done by: Marek Vasut + * If something doesnt work and it worked before spliting, e-mail me, + * dont bother Nicolas please ;-) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This code is heavily based on ucb1x00-*.c copyrighted by Russell King + * covering the UCB1100, UCB1200 and UCB1300.. Support for the UCB1400 has + * been made separate from ucb1x00-core/ucb1x00-ts on Russell's request. + */ + +#ifndef _LINUX__UCB1400_H +#define _LINUX__UCB1400_H + +#include +#include +#include + +/* + * UCB1400 AC-link registers + */ + +#define UCB_IO_DATA 0x5a +#define UCB_IO_DIR 0x5c +#define UCB_IE_RIS 0x5e +#define UCB_IE_FAL 0x60 +#define UCB_IE_STATUS 0x62 +#define UCB_IE_CLEAR 0x62 +#define UCB_IE_ADC (1 << 11) +#define UCB_IE_TSPX (1 << 12) + +#define UCB_TS_CR 0x64 +#define UCB_TS_CR_TSMX_POW (1 << 0) +#define UCB_TS_CR_TSPX_POW (1 << 1) +#define UCB_TS_CR_TSMY_POW (1 << 2) +#define UCB_TS_CR_TSPY_POW (1 << 3) +#define UCB_TS_CR_TSMX_GND (1 << 4) +#define UCB_TS_CR_TSPX_GND (1 << 5) +#define UCB_TS_CR_TSMY_GND (1 << 6) +#define UCB_TS_CR_TSPY_GND (1 << 7) +#define UCB_TS_CR_MODE_INT (0 << 8) +#define UCB_TS_CR_MODE_PRES (1 << 8) +#define UCB_TS_CR_MODE_POS (2 << 8) +#define UCB_TS_CR_BIAS_ENA (1 << 11) +#define UCB_TS_CR_TSPX_LOW (1 << 12) +#define UCB_TS_CR_TSMX_LOW (1 << 13) + +#define UCB_ADC_CR 0x66 +#define UCB_ADC_SYNC_ENA (1 << 0) +#define UCB_ADC_VREFBYP_CON (1 << 1) +#define UCB_ADC_INP_TSPX (0 << 2) +#define UCB_ADC_INP_TSMX (1 << 2) +#define UCB_ADC_INP_TSPY (2 << 2) +#define UCB_ADC_INP_TSMY (3 << 2) +#define UCB_ADC_INP_AD0 (4 << 2) +#define UCB_ADC_INP_AD1 (5 << 2) +#define UCB_ADC_INP_AD2 (6 << 2) +#define UCB_ADC_INP_AD3 (7 << 2) +#define UCB_ADC_EXT_REF (1 << 5) +#define UCB_ADC_START (1 << 7) +#define UCB_ADC_ENA (1 << 15) + +#define UCB_ADC_DATA 0x68 +#define UCB_ADC_DAT_VALID (1 << 15) +#define UCB_ADC_DAT_MASK 0x3ff + +#define UCB_ID 0x7e +#define UCB_ID_1400 0x4304 + +struct ucb1400_ts { + struct input_dev *ts_idev; + struct task_struct *ts_task; + int id; + wait_queue_head_t ts_wait; + unsigned int ts_restart:1; + int irq; + unsigned int irq_pending; /* not bit field shared */ + struct snd_ac97 *ac97; +}; + +struct ucb1400 { + struct platform_device *ucb1400_ts; +}; + +static inline u16 ucb1400_reg_read(struct snd_ac97 *ac97, u16 reg) +{ + return ac97->bus->ops->read(ac97, reg); +} + +static inline void ucb1400_reg_write(struct snd_ac97 *ac97, u16 reg, u16 val) +{ + ac97->bus->ops->write(ac97, reg, val); +} + +static inline u16 ucb1400_gpio_get_value(struct snd_ac97 *ac97, u16 gpio) +{ + return ucb1400_reg_read(ac97, UCB_IO_DATA) & (1 << gpio); +} + +static inline void ucb1400_gpio_set_value(struct snd_ac97 *ac97, u16 gpio, + u16 val) +{ + ucb1400_reg_write(ac97, UCB_IO_DATA, val ? + ucb1400_reg_read(ac97, UCB_IO_DATA) | (1 << gpio) : + ucb1400_reg_read(ac97, UCB_IO_DATA) & ~(1 << gpio)); +} + +static inline u16 ucb1400_gpio_get_direction(struct snd_ac97 *ac97, u16 gpio) +{ + return ucb1400_reg_read(ac97, UCB_IO_DIR) & (1 << gpio); +} + +static inline void ucb1400_gpio_set_direction(struct snd_ac97 *ac97, u16 gpio, + u16 dir) +{ + ucb1400_reg_write(ac97, UCB_IO_DIR, dir ? + ucb1400_reg_read(ac97, UCB_IO_DIR) | (1 << gpio) : + ucb1400_reg_read(ac97, UCB_IO_DIR) & ~(1 << gpio)); +} + +static inline void ucb1400_adc_enable(struct snd_ac97 *ac97) +{ + ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA); +} + +static unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel, + int adcsync) +{ + unsigned int val; + + if (adcsync) + adc_channel |= UCB_ADC_SYNC_ENA; + + ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel); + ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel | + UCB_ADC_START); + + while (!((val = ucb1400_reg_read(ac97, UCB_ADC_DATA)) + & UCB_ADC_DAT_VALID)) + schedule_timeout_uninterruptible(1); + + return val & UCB_ADC_DAT_MASK; +} + +static inline void ucb1400_adc_disable(struct snd_ac97 *ac97) +{ + ucb1400_reg_write(ac97, UCB_ADC_CR, 0); +} + +#endif -- cgit v1.2.2 From 9961920199ec88d6b581d3e38502088935925c04 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 2 Aug 2008 15:10:58 -0300 Subject: rfkill: add default global states (v2) Add a second set of global states, "rfkill_default_states", to track the state that will be used when the first rfkill class of a given type is registered, and also to save "undo" information when rfkill_epo is called. Add a new exported function, rfkill_set_default(), which can be used by platform drivers to restore radio state saved by the platform across reboots or shutdown. Also, fix rfkill_epo to properly update rfkill_states, but still preserve a copy of the state so that we can undo the effect of rfkill_epo later if we want to. Add rfkill_restore_states() to restore rfkill_states from the copy. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 741d1a62cc3f..aa3c7d5852f6 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -116,6 +116,7 @@ int rfkill_register(struct rfkill *rfkill); void rfkill_unregister(struct rfkill *rfkill); int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state); +int rfkill_set_default(enum rfkill_type type, enum rfkill_state state); /** * rfkill_state_complement - return complementar state -- cgit v1.2.2 From 77fba13ccc3a2a3db100892a4a6cc5e2f8290cc7 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 2 Aug 2008 15:10:59 -0300 Subject: rfkill: add __must_check annotations rfkill is not a small, mere detail in wireless support. Once it starts supporting rfkill and users start counting on that support, a wireless device is at risk of operating in dangerous conditions should rfkill support fail to properly activate. Therefore, add the required __must_check annotations on some key functions of the rfkill API, for which the wireless drivers absolutely MUST handle the failure mode safely in order to avoid a potentially dangerous situation where the wireless transmitter is left enabled when the user don't want it to. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Cc: Matthew Garrett Signed-off-by: John W. Linville --- include/linux/rfkill.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index aa3c7d5852f6..e92d8e94bb88 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -110,9 +110,10 @@ struct rfkill { }; #define to_rfkill(d) container_of(d, struct rfkill, dev) -struct rfkill *rfkill_allocate(struct device *parent, enum rfkill_type type); +struct rfkill * __must_check rfkill_allocate(struct device *parent, + enum rfkill_type type); void rfkill_free(struct rfkill *rfkill); -int rfkill_register(struct rfkill *rfkill); +int __must_check rfkill_register(struct rfkill *rfkill); void rfkill_unregister(struct rfkill *rfkill); int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state); -- cgit v1.2.2 From 96c87607ac8f9b0e641d11ba6e57f8ec0214ea1c Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 2 Aug 2008 15:11:00 -0300 Subject: rfkill: introduce RFKILL_STATE_MAX While it is interesting to not add last-enum-markers because it allows gcc to warn us of switch() statements missing a valid state, we really should be handling memory corruption on a rfkill state with default clauses, anyway. So add RFKILL_STATE_MAX and use it where applicable. It makes for safer code in the long run. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index e92d8e94bb88..4cd64b0d9825 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -49,6 +49,7 @@ enum rfkill_state { RFKILL_STATE_SOFT_BLOCKED = 0, /* Radio output blocked */ RFKILL_STATE_UNBLOCKED = 1, /* Radio output allowed */ RFKILL_STATE_HARD_BLOCKED = 2, /* Output blocked, non-overrideable */ + RFKILL_STATE_MAX, /* marker for last valid state */ }; /* -- cgit v1.2.2 From fef1643bf0cdd092a52dc3378479e4811fd65152 Mon Sep 17 00:00:00 2001 From: Jasper Bryant-Greene Date: Sun, 3 Aug 2008 11:30:55 +1200 Subject: move ETH_P_PAE from ieee80211_i.h to if_ether.h ETH_P_PAE belongs in if_ether.h with the other ETH_P_* definitions. This patch moves it there. Signed-off-by: Jasper Bryant-Greene Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index e157c1399b61..5028e0b6082b 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -74,6 +74,7 @@ #define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport * over Ethernet */ +#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_TIPC 0x88CA /* TIPC */ -- cgit v1.2.2 From 65eb3dc609dec17deea48dcd4de2e549d29a9824 Mon Sep 17 00:00:00 2001 From: Kevin Diggs Date: Tue, 26 Aug 2008 10:26:54 +0200 Subject: sched: add kernel doc for the completion, fix kernel-doc-nano-HOWTO.txt This patch adds kernel doc for the completion feature. An error in the split-man.pl PERL snippet in kernel-doc-nano-HOWTO.txt is also fixed. Signed-off-by: Kevin Diggs Signed-off-by: Ingo Molnar --- include/linux/completion.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index 02ef8835999c..4a6b604ef7e4 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -10,6 +10,18 @@ #include +/** + * struct completion - structure used to maintain state for a "completion" + * + * This is the opaque structure used to maintain the state for a "completion". + * Completions currently use a FIFO to queue threads that have to wait for + * the "completion" event. + * + * See also: complete(), wait_for_completion() (and friends _timeout, + * _interruptible, _interruptible_timeout, and _killable), init_completion(), + * and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and + * INIT_COMPLETION(). + */ struct completion { unsigned int done; wait_queue_head_t wait; @@ -21,6 +33,14 @@ struct completion { #define COMPLETION_INITIALIZER_ONSTACK(work) \ ({ init_completion(&work); work; }) +/** + * DECLARE_COMPLETION: - declare and initialize a completion structure + * @work: identifier for the completion structure + * + * This macro declares and initializes a completion structure. Generally used + * for static declarations. You should use the _ONSTACK variant for automatic + * variables. + */ #define DECLARE_COMPLETION(work) \ struct completion work = COMPLETION_INITIALIZER(work) @@ -29,6 +49,13 @@ struct completion { * completions - so we use the _ONSTACK() variant for those that * are on the kernel stack: */ +/** + * DECLARE_COMPLETION_ONSTACK: - declare and initialize a completion structure + * @work: identifier for the completion structure + * + * This macro declares and initializes a completion structure on the kernel + * stack. + */ #ifdef CONFIG_LOCKDEP # define DECLARE_COMPLETION_ONSTACK(work) \ struct completion work = COMPLETION_INITIALIZER_ONSTACK(work) @@ -36,6 +63,13 @@ struct completion { # define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work) #endif +/** + * init_completion: - Initialize a dynamically allocated completion + * @x: completion structure that is to be initialized + * + * This inline function will initialize a dynamically created completion + * structure. + */ static inline void init_completion(struct completion *x) { x->done = 0; @@ -55,6 +89,13 @@ extern bool completion_done(struct completion *x); extern void complete(struct completion *); extern void complete_all(struct completion *); +/** + * INIT_COMPLETION: - reinitialize a completion structure + * @x: completion structure to be reinitialized + * + * This macro should be used to reinitialize a completion structure so it can + * be reused. This is especially important after complete_all() is used. + */ #define INIT_COMPLETION(x) ((x).done = 0) -- cgit v1.2.2 From da31894ed7b654e2e1741e7ac4ef6c15be0dd14b Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 22 Aug 2008 11:35:57 -0400 Subject: securityfs: do not depend on CONFIG_SECURITY Add a new Kconfig option SECURITYFS which will build securityfs support but does not require CONFIG_SECURITY. The only current user of securityfs does not depend on CONFIG_SECURITY and there is no reason the full LSM needs to be built to build this fs. Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/security.h | 54 +++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 80c4d002864c..f5c4a51eb42e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1560,11 +1560,6 @@ struct security_operations { extern int security_init(void); extern int security_module_enable(struct security_operations *ops); extern int register_security(struct security_operations *ops); -extern struct dentry *securityfs_create_file(const char *name, mode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops); -extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent); -extern void securityfs_remove(struct dentry *dentry); /* Security operations */ int security_ptrace_may_access(struct task_struct *child, unsigned int mode); @@ -2424,25 +2419,6 @@ static inline int security_netlink_recv(struct sk_buff *skb, int cap) return cap_netlink_recv(skb, cap); } -static inline struct dentry *securityfs_create_dir(const char *name, - struct dentry *parent) -{ - return ERR_PTR(-ENODEV); -} - -static inline struct dentry *securityfs_create_file(const char *name, - mode_t mode, - struct dentry *parent, - void *data, - const struct file_operations *fops) -{ - return ERR_PTR(-ENODEV); -} - -static inline void securityfs_remove(struct dentry *dentry) -{ -} - static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) { return -EOPNOTSUPP; @@ -2806,5 +2782,35 @@ static inline void security_audit_rule_free(void *lsmrule) #endif /* CONFIG_SECURITY */ #endif /* CONFIG_AUDIT */ +#ifdef CONFIG_SECURITYFS + +extern struct dentry *securityfs_create_file(const char *name, mode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); +extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent); +extern void securityfs_remove(struct dentry *dentry); + +#else /* CONFIG_SECURITYFS */ + +static inline struct dentry *securityfs_create_dir(const char *name, + struct dentry *parent) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *securityfs_create_file(const char *name, + mode_t mode, + struct dentry *parent, + void *data, + const struct file_operations *fops) +{ + return ERR_PTR(-ENODEV); +} + +static inline void securityfs_remove(struct dentry *dentry) +{} + +#endif + #endif /* ! __LINUX_SECURITY_H */ -- cgit v1.2.2 From 0f8e0d9a317406612700426fad3efab0b7bbc467 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 6 Aug 2008 13:30:24 -0500 Subject: dlm: allow multiple lockspace creates Add a count for lockspace create and release so that create can be called multiple times to use the lockspace from different places. Also add the new flag DLM_LSFL_NEWEXCL to create a lockspace with the previous behavior of returning -EEXIST if the lockspace already exists. Signed-off-by: David Teigland --- include/linux/dlm.h | 5 ++++- include/linux/dlm_device.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index 203a025e30e5..b9cd38603fd8 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -65,9 +65,12 @@ struct dlm_lksb { char * sb_lvbptr; }; +/* dlm_new_lockspace() flags */ + #define DLM_LSFL_NODIR 0x00000001 #define DLM_LSFL_TIMEWARN 0x00000002 #define DLM_LSFL_FS 0x00000004 +#define DLM_LSFL_NEWEXCL 0x00000008 #ifdef __KERNEL__ diff --git a/include/linux/dlm_device.h b/include/linux/dlm_device.h index c6034508fed9..3060783c4191 100644 --- a/include/linux/dlm_device.h +++ b/include/linux/dlm_device.h @@ -26,7 +26,7 @@ /* Version of the device interface */ #define DLM_DEVICE_VERSION_MAJOR 6 #define DLM_DEVICE_VERSION_MINOR 0 -#define DLM_DEVICE_VERSION_PATCH 0 +#define DLM_DEVICE_VERSION_PATCH 1 /* struct passed to the lock write */ struct dlm_lock_params { -- cgit v1.2.2 From da7f033ddc9fdebb3223b0bf88a2a2ab5b797608 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 31 Jul 2008 17:08:25 +0800 Subject: crypto: cryptomgr - Add test infrastructure This patch moves the newly created alg_test infrastructure into cryptomgr. This shall allow us to use it for testing at algorithm registrations. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index c43dc47fdf75..7ea0a4bc4ced 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -515,6 +515,8 @@ struct crypto_tfm *crypto_alloc_tfm(const char *alg_name, u32 tfm_flags); struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask); void crypto_free_tfm(struct crypto_tfm *tfm); +int alg_test(const char *driver, const char *alg, u32 type, u32 mask); + /* * Transform helpers which query the underlying algorithm. */ -- cgit v1.2.2 From 73d3864a4823abda19ebc4387b6ddcbf416e3a77 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 3 Aug 2008 21:15:23 +0800 Subject: crypto: api - Use test infrastructure This patch makes use of the new testing infrastructure by requiring algorithms to pass a run-time test before they're made available to users. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7ea0a4bc4ced..81d994a3bdaf 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -60,6 +60,14 @@ */ #define CRYPTO_ALG_GENIV 0x00000200 +/* + * Set if the algorithm has passed automated run-time testing. Note that + * if there is no run-time testing for a given algorithm it is considered + * to have passed. + */ + +#define CRYPTO_ALG_TESTED 0x00000400 + /* * Transform masks and values (for crt_flags). */ -- cgit v1.2.2 From 17f0f4a47df9aea9ee26c939f8057c35e0be1847 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 14 Aug 2008 22:15:52 +1000 Subject: crypto: rng - RNG interface and implementation This patch adds a random number generator interface as well as a cryptographic pseudo-random number generator based on AES. It is meant to be used in cases where a deterministic CPRNG is required. One of the first applications will be as an input in the IPsec IV generation process. Signed-off-by: Neil Horman Signed-off-by: Herbert Xu --- include/linux/crypto.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 81d994a3bdaf..3d2317e4af2e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -38,6 +38,7 @@ #define CRYPTO_ALG_TYPE_DIGEST 0x00000008 #define CRYPTO_ALG_TYPE_HASH 0x00000009 #define CRYPTO_ALG_TYPE_AHASH 0x0000000a +#define CRYPTO_ALG_TYPE_RNG 0x0000000c #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000c @@ -113,6 +114,7 @@ struct crypto_aead; struct crypto_blkcipher; struct crypto_hash; struct crypto_ahash; +struct crypto_rng; struct crypto_tfm; struct crypto_type; struct aead_givcrypt_request; @@ -298,6 +300,15 @@ struct compress_alg { unsigned int slen, u8 *dst, unsigned int *dlen); }; +struct rng_alg { + int (*rng_make_random)(struct crypto_rng *tfm, u8 *rdata, + unsigned int dlen); + int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen); + + unsigned int seedsize; +}; + + #define cra_ablkcipher cra_u.ablkcipher #define cra_aead cra_u.aead #define cra_blkcipher cra_u.blkcipher @@ -306,6 +317,7 @@ struct compress_alg { #define cra_hash cra_u.hash #define cra_ahash cra_u.ahash #define cra_compress cra_u.compress +#define cra_rng cra_u.rng struct crypto_alg { struct list_head cra_list; @@ -333,6 +345,7 @@ struct crypto_alg { struct hash_alg hash; struct ahash_alg ahash; struct compress_alg compress; + struct rng_alg rng; } cra_u; int (*cra_init)(struct crypto_tfm *tfm); @@ -438,6 +451,12 @@ struct compress_tfm { u8 *dst, unsigned int *dlen); }; +struct rng_tfm { + int (*rng_gen_random)(struct crypto_rng *tfm, u8 *rdata, + unsigned int dlen); + int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen); +}; + #define crt_ablkcipher crt_u.ablkcipher #define crt_aead crt_u.aead #define crt_blkcipher crt_u.blkcipher @@ -445,6 +464,7 @@ struct compress_tfm { #define crt_hash crt_u.hash #define crt_ahash crt_u.ahash #define crt_compress crt_u.compress +#define crt_rng crt_u.rng struct crypto_tfm { @@ -458,6 +478,7 @@ struct crypto_tfm { struct hash_tfm hash; struct ahash_tfm ahash; struct compress_tfm compress; + struct rng_tfm rng; } crt_u; struct crypto_alg *__crt_alg; @@ -489,6 +510,10 @@ struct crypto_hash { struct crypto_tfm base; }; +struct crypto_rng { + struct crypto_tfm base; +}; + enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, -- cgit v1.2.2 From 9f1ba9062e032fb7b395cd27fc564754fe4e9867 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 7 Aug 2008 20:07:01 +0300 Subject: mac80211/cfg80211: Add BSS configuration options for AP mode This change adds a new cfg80211 command, NL80211_CMD_SET_BSS, to allow AP mode BSS parameters to be changed from user space (e.g., hostapd). The drivers using mac80211 are expected to be modified with separate changes to use the new BSS info parameter for short slot time in the bss_info_changed() handler. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2be7c63bc0f2..447c02a5190e 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -89,6 +89,8 @@ * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC * or, if no MAC address given, all mesh paths, on the interface identified * by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by + * %NL80211_ATTR_IFINDEX. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -127,6 +129,8 @@ enum nl80211_commands { NL80211_CMD_NEW_MPATH, NL80211_CMD_DEL_MPATH, + NL80211_CMD_SET_BSS, + /* add commands here */ /* used to define NL80211_CMD_MAX below */ @@ -134,6 +138,11 @@ enum nl80211_commands { NL80211_CMD_MAX = __NL80211_CMD_AFTER_LAST - 1 }; +/* + * Allow user space programs to use #ifdef on new commands by defining them + * here + */ +#define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS /** * enum nl80211_attrs - nl80211 netlink attributes @@ -192,6 +201,12 @@ enum nl80211_commands { * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of * &enum nl80211_mntr_flags. * + * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled + * (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled + * (u8, 0 or 1) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -235,6 +250,10 @@ enum nl80211_attrs { NL80211_ATTR_MPATH_NEXT_HOP, NL80211_ATTR_MPATH_INFO, + NL80211_ATTR_BSS_CTS_PROT, + NL80211_ATTR_BSS_SHORT_PREAMBLE, + NL80211_ATTR_BSS_SHORT_SLOT_TIME, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.2 From 18d7c65ba6628d2759bd650e7039d6307f2c99e9 Mon Sep 17 00:00:00 2001 From: Sujith Date: Thu, 14 Aug 2008 13:27:41 +0530 Subject: mac80211: Add an 802.11n definition This patch adds a HT Capability (DSSS/CCK Mode in 40MHz BSS) definition to ieee80211.h Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7f4df7c7659d..be456450cd2e 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -714,6 +714,7 @@ struct ieee80211_ht_addt_info { #define IEEE80211_HT_CAP_SGI_40 0x0040 #define IEEE80211_HT_CAP_DELAY_BA 0x0400 #define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 +#define IEEE80211_HT_CAP_DSSSCCK40 0x1000 /* 802.11n HT capability AMPDU settings */ #define IEEE80211_HT_CAP_AMPDU_FACTOR 0x03 #define IEEE80211_HT_CAP_AMPDU_DENSITY 0x1C -- cgit v1.2.2 From 095f695cbb07281682462da0618fffabb499d0be Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 19 Aug 2008 12:50:31 -0500 Subject: ssb: Update for Rev. 5 SPROM Although a revision 5 SPROM has not been seen in the wild, the open-source portion of the MIPS driver 4.150.10.5 describes its layout, which is mostly inherited from revision 4. This patch implements the differences. Signed-off-by: Larry Finger Acked-by: Michael Buesch Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index ebad0bac9801..271bb4b6446e 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -316,6 +316,21 @@ #define SSB_SPROM4_PA1B1 0x1090 #define SSB_SPROM4_PA1B2 0x1092 +/* SPROM Revision 5 (inherits most data from rev 4) */ +#define SSB_SPROM5_BFLLO 0x104A /* Boardflags (low 16 bits) */ +#define SSB_SPROM5_BFLHI 0x104C /* Board Flags Hi */ +#define SSB_SPROM5_IL0MAC 0x1052 /* 6 byte MAC address for a/b/g/n */ +#define SSB_SPROM5_CCODE 0x1044 /* Country Code (2 bytes) */ +#define SSB_SPROM5_GPIOA 0x1076 /* Gen. Purpose IO # 0 and 1 */ +#define SSB_SPROM5_GPIOA_P0 0x00FF /* Pin 0 */ +#define SSB_SPROM5_GPIOA_P1 0xFF00 /* Pin 1 */ +#define SSB_SPROM5_GPIOA_P1_SHIFT 8 +#define SSB_SPROM5_GPIOB 0x1078 /* Gen. Purpose IO # 2 and 3 */ +#define SSB_SPROM5_GPIOB_P2 0x00FF /* Pin 2 */ +#define SSB_SPROM5_GPIOB_P3 0xFF00 /* Pin 3 */ +#define SSB_SPROM5_GPIOB_P3_SHIFT 8 + + /* Values for SSB_SPROM1_BINF_CCODE */ enum { SSB_SPROM1CCODE_WORLD = 0, -- cgit v1.2.2 From 31ce12fb3ebf88b054deb99ad729e84888bf6125 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 20 Aug 2008 17:45:06 -0500 Subject: ssb: Clean up extraction of MAC addresses from SPROM Only rev 1 and 2 ssb SPROMs have fields named et0mac and et1mac; however, all of the extraction routines extract pseudo data for these fields from regions that are all 1's resulting in a hardware address of FF:FF:FF:FF:FF:FF. This patch forces such a fill at the beginning of the data extraction process, and only does the formal extraction if the SPROM rev is 1 or 2. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index 271bb4b6446e..99a0f991e850 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -245,8 +245,6 @@ /* SPROM Revision 3 (inherits most data from rev 2) */ #define SSB_SPROM3_IL0MAC 0x104A /* 6 bytes MAC address for 802.11b/g */ -#define SSB_SPROM3_ET0MAC 0x1050 /* 6 bytes MAC address for Ethernet ?? */ -#define SSB_SPROM3_ET1MAC 0x1050 /* 6 bytes MAC address for 802.11a ?? */ #define SSB_SPROM3_OFDMAPO 0x102C /* A-PHY OFDM Mid Power Offset (4 bytes, BigEndian) */ #define SSB_SPROM3_OFDMALPO 0x1030 /* A-PHY OFDM Low Power Offset (4 bytes, BigEndian) */ #define SSB_SPROM3_OFDMAHPO 0x1034 /* A-PHY OFDM High Power Offset (4 bytes, BigEndian) */ @@ -267,8 +265,6 @@ /* SPROM Revision 4 */ #define SSB_SPROM4_IL0MAC 0x104C /* 6 byte MAC address for a/b/g/n */ -#define SSB_SPROM4_ET0MAC 0x1018 /* 6 bytes MAC address for Ethernet ?? */ -#define SSB_SPROM4_ET1MAC 0x1018 /* 6 bytes MAC address for 802.11a ?? */ #define SSB_SPROM4_ETHPHY 0x105A /* Ethernet PHY settings ?? */ #define SSB_SPROM4_ETHPHY_ET0A 0x001F /* MII Address for enet0 */ #define SSB_SPROM4_ETHPHY_ET1A 0x03E0 /* MII Address for enet1 */ -- cgit v1.2.2 From 36aedc903ea11a4188de0a118d26c9f20afdd272 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 25 Aug 2008 11:58:58 +0300 Subject: mac80211/cfg80211: HT capabilities for NEW_STA Allow userspace (e.g., hostapd) to set HT capabilities for associated STAs. This is based on a patch from Zhu Yi (only the NL80211_ATTR_HT_CAPABILITY for NEW_STA part is included here). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 447c02a5190e..0c1147de3ec7 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -207,6 +207,9 @@ enum nl80211_commands { * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled * (u8, 0 or 1) * + * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -254,16 +257,25 @@ enum nl80211_attrs { NL80211_ATTR_BSS_SHORT_PREAMBLE, NL80211_ATTR_BSS_SHORT_SLOT_TIME, + NL80211_ATTR_HT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1 }; +/* + * Allow user space programs to use #ifdef on new attributes by defining them + * here + */ +#define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY + #define NL80211_MAX_SUPP_RATES 32 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 +#define NL80211_HT_CAPABILITY_LEN 26 /** * enum nl80211_iftype - (virtual) interface types -- cgit v1.2.2 From b4eec206370b7154dc354dc30f0a3f02ea8468b2 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Implement lookup table for feature-negotiation information A lookup table for feature-negotiation information, extracted from RFC 4340/42, is provided by this patch. All currently known features can be found in this table, along with their feature location, their default value, and type. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6080449fbec9..3978aff197d9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -176,19 +176,20 @@ enum { }; /* DCCP features (RFC 4340 section 6.4) */ -enum { +enum dccp_feature_numbers { DCCPF_RESERVED = 0, DCCPF_CCID = 1, - DCCPF_SHORT_SEQNOS = 2, /* XXX: not yet implemented */ + DCCPF_SHORT_SEQNOS = 2, DCCPF_SEQUENCE_WINDOW = 3, - DCCPF_ECN_INCAPABLE = 4, /* XXX: not yet implemented */ + DCCPF_ECN_INCAPABLE = 4, DCCPF_ACK_RATIO = 5, DCCPF_SEND_ACK_VECTOR = 6, DCCPF_SEND_NDP_COUNT = 7, DCCPF_MIN_CSUM_COVER = 8, - DCCPF_DATA_CHECKSUM = 9, /* XXX: not yet implemented */ + DCCPF_DATA_CHECKSUM = 9, /* 10-127 reserved */ DCCPF_MIN_CCID_SPECIFIC = 128, + DCCPF_SEND_LEV_RATE = 192, /* RFC 4342, sec. 8.4 */ DCCPF_MAX_CCID_SPECIFIC = 255, }; -- cgit v1.2.2 From 828755cee087e4a34f45d6c9db661ccd0631cc6d Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Per-socket initialisation of feature negotiation This provides feature-negotiation initialisation for both DCCP sockets and DCCP request_sockets, to support feature negotiation during connection setup. It also resolves a FIXME regarding the congestion control initialisation. Thanks to Wei Yongjun for help with the IPv6 side of this patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3978aff197d9..484b8a1fb023 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -412,6 +412,7 @@ extern void dccp_minisock_init(struct dccp_minisock *dmsk); * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1) * @dreq_isr: initial sequence number received on the Request * @dreq_service: service code present on the Request (there is just one) + * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo @@ -421,6 +422,7 @@ struct dccp_request_sock { __u64 dreq_iss; __u64 dreq_isr; __be32 dreq_service; + struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; @@ -498,6 +500,7 @@ struct dccp_ackvec; * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) * @dccps_minisock - associated minisock (accessed via dccp_msk) + * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) @@ -535,6 +538,7 @@ struct dccp_sock { __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; + struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; -- cgit v1.2.2 From 71bb49596bbf4e5a3328e1704d18604e822ba181 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Query supported CCIDs This provides a data structure to record which CCIDs are locally supported and three accessor functions: - a test function for internal use which is used to validate CCID requests made by the user; - a copy function so that the list can be used for feature-negotiation; - documented getsockopt() support so that the user can query capabilities. The data structure is a table which is filled in at compile-time with the list of available CCIDs (which in turn depends on the Kconfig choices). Using the copy function for cloning the list of supported CCIDs is useful for feature negotiation, since the negotiation is now with the full list of available CCIDs (e.g. {2, 3}) instead of the default value {2}. This means negotiation will not fail if the peer requests to use CCID3 instead of CCID2. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 484b8a1fb023..d3ac1bde60b4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -209,6 +209,7 @@ struct dccp_so_feat { #define DCCP_SOCKOPT_SERVER_TIMEWAIT 6 #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 +#define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 -- cgit v1.2.2 From 668144f7b41716a9efe1b398e15ead32a26cd101 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Deprecate old setsockopt framework The previous setsockopt interface, which passed socket options via struct dccp_so_feat, is complicated/difficult to use. Continuing to support it leads to ugly code since the old approach did not distinguish between NN and SP values. This patch removes the old setsockopt interface and replaces it with two new functions to register NN/SP values for feature negotiation. These are essentially wrappers around the internal __feat_register functions, with checking added to avoid * wrong usage (type); * changing values while the connection is in progress. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index d3ac1bde60b4..6eaaca9b037a 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -193,13 +193,6 @@ enum dccp_feature_numbers { DCCPF_MAX_CCID_SPECIFIC = 255, }; -/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ -struct dccp_so_feat { - __u8 dccpsf_feat; - __u8 __user *dccpsf_val; - __u8 dccpsf_len; -}; - /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ #define DCCP_SOCKOPT_SERVICE 2 -- cgit v1.2.2 From 20f41eee82864e308a5499308a1722dc3181cc3a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Feature negotiation for minimum-checksum-coverage This provides feature negotiation for server minimum checksum coverage which so far has been missing. Since sender/receiver coverage values range only from 0...15, their type has also been reduced in size from u16 to u4. Feature-negotiation options are now generated for both sender and receiver coverage, i.e. when the peer has `forgotten' to enable partial coverage then feature negotiation will automatically enable (negotiate) the partial coverage value for this connection. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6eaaca9b037a..5a5a89935dbc 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -527,8 +527,8 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; - __u16 dccps_pcslen; - __u16 dccps_pcrlen; + __u8 dccps_pcslen:4; + __u8 dccps_pcrlen:4; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; -- cgit v1.2.2 From 17c30b40ed79e9f3955e884632c8f01e577b204a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Deprecate Ack Ratio sysctl This patch deprecates the Ack Ratio sysctl, since * Ack Ratio is entirely ignored by CCID-3 and CCID-4, * Ack Ratio currently doesn't work in CCID-2 (i.e. is always set to 1); * even if it would work in CCID-2, there is no point for a user to change it: - Ack Ratio is constrained by cwnd (RFC 4341, 6.1.2), - if Ack Ratio > cwnd, the system resorts to spurious RTO timeouts (since waiting for Acks which will never arrive in this window), - cwnd is not a user-configurable value. The only reasonable place for Ack Ratio is to print it for debugging. It is planned to do this later on, as part of e.g. dccp_probe. With this patch Ack Ratio is now under full control of feature negotiation: * Ack Ratio is resolved as a dependency of the selected CCID; * if the chosen CCID supports it (i.e. CCID == CCID-2), Ack Ratio is set to the default of 2, following RFC 4340, 11.3 - "New connections start with Ack Ratio 2 for both endpoints"; * what happens then is part of another patch set, since it concerns the dynamic update of Ack Ratio while the connection is in full flight. Thanks to Tomasz Grobelny for discussion leading up to this patch. Signed-off-by: Gerrit Renker Acked-by: Arnaldo Carvalho de Melo --- include/linux/dccp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 5a5a89935dbc..eda389ce04f4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -368,7 +368,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) - * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ @@ -378,7 +377,6 @@ struct dccp_minisock { __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; - __u8 dccpms_ack_ratio; struct list_head dccpms_pending; struct list_head dccpms_conf; }; -- cgit v1.2.2 From fade756f18d42694e3acb00e3471ab43002cba16 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Set per-connection CCIDs via socket options With this patch, TX/RX CCIDs can now be changed on a per-connection basis, which overrides the defaults set by the global sysctl variables for TX/RX CCIDs. To make full use of this facility, the remaining patches of this patch set are needed, which track dependencies and activate negotiated feature values. Note on the maximum number of CCIDs that can be registered: ----------------------------------------------------------- The maximum number of CCIDs that can be registered on the socket is constrained by the space in a Confirm/Change feature negotiation option. The space in these in turn depends on the size of header options as defined in RFC 4340, 5.8. Since this is a recurring constant, it has been moved from ackvec.h into linux/dccp.h, clarifying its purpose. Relative to this size, the maximum number of CCID identifiers that can be present in a Confirm option (which always consumes 1 byte more than a Change option, cf. 6.1) is 2 bytes less than the maximum TLV size: one for the CCID-feature-type and one for the selected value. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index eda389ce04f4..6a72ff52a8a4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -168,6 +168,8 @@ enum { DCCPO_MIN_CCID_SPECIFIC = 128, DCCPO_MAX_CCID_SPECIFIC = 255, }; +/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ +#define DCCP_SINGLE_OPT_MAXLEN 253 /* DCCP CCIDS */ enum { @@ -203,6 +205,9 @@ enum dccp_feature_numbers { #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 #define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 +#define DCCP_SOCKOPT_CCID 13 +#define DCCP_SOCKOPT_TX_CCID 14 +#define DCCP_SOCKOPT_RX_CCID 15 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 -- cgit v1.2.2 From 78673e24df27c76ec75565f4024d45c2c74ef148 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Remove obsolete parts of the old CCID interface The TX/RX CCIDs of the minisock are now redundant: similar to the Ack Vector case, their value equals initially that of the sysctl, but at the end of feature negotiation may be something different. The old interface removed by this patch thus has been replaced by the newer interface to dynamically query the currently loaded CCIDs earlier in this patch set. Also removed the constructors for the TX CCID and the RX CCID, since the switch rx/non-rx is done by the handler in minisocks.c (and the handler is the only place in the code where CCIDs are loaded). Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6a72ff52a8a4..46daea312d92 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -370,7 +370,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated @@ -378,8 +377,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_rx_ccid; - __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; -- cgit v1.2.2 From 68e074bfcef269bc61006c2740d7f89ccbbd93d7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Remove manual influence on NDP Count feature Updating the NDP count feature is handled automatically now: * for CCID-2 it is disabled, since the code does not use NDP counts; * for CCID-3 it is enabled, as NDP counts are used to determine loss lengths. Allowing the user to change NDP values leads to unpredictable and failing behaviour, since it is then possible to disable NDP counts even when they are needed (e.g. in CCID-3). This means that only those user settings are sensible that agree with the values for Send NDP Count implied by the choice of CCID. But those settings are already activated by the feature negotiation (CCID dependency tracking), hence this form of support is redundant. At startup the initialisation of the NDP count feature is with the default value of 0, which is done implicitly by the zeroing-out of the socket when it is allocated. If the choice of CCID or feature negotiation enables NDP count, this will then be updated via the NDP activation handler. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 46daea312d92..60e94438eadd 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -371,14 +371,12 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) - * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; __u8 dccpms_send_ack_vector; - __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; struct list_head dccpms_conf; }; @@ -490,6 +488,7 @@ struct dccp_ackvec; * @dccps_r_ack_ratio - feature-remote Ack Ratio * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) + * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) @@ -529,6 +528,7 @@ struct dccp_sock { __u16 dccps_r_ack_ratio; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; + __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; -- cgit v1.2.2 From b235dc4abbc1356284bd0dc730efa711f394e0e2 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Phase out the use of boolean Ack Vector sysctl This removes the use of the sysctl and the minisock variable for the Send Ack Vector feature, which is now handled fully dynamically via feature negotiation; i.e. when CCID2 is enabled, Ack Vectors are automatically enabled (as per RFC 4341, 4.). Using a sysctl in parallel to this implementation would open the door to crashes, since much of the code relies on tests of the boolean minisock / sysctl variable. Thus, this patch replaces all tests of type if (dccp_msk(sk)->dccpms_send_ack_vector) /* ... */ with if (dp->dccps_hc_rx_ackvec != NULL) /* ... */ The dccps_hc_rx_ackvec is allocated by the dccp_hdlr_ackvec() when feature negotiation concluded that Ack Vectors are to be used on the half-connection. Otherwise, it is NULL (due to dccp_init_sock/dccp_create_openreq_child), so that the test is a valid one. The activation handler for Ack Vectors is called as soon as the feature negotiation has concluded at the * server when the Ack marking the transition RESPOND => OPEN arrives; * client after it has sent its ACK, marking the transition REQUEST => PARTOPEN. Adding the sequence number of the Response packet to the Ack Vector has been removed, since (a) connection establishment implies that the Response has been received; (b) the CCIDs only look at packets received in the (PART)OPEN state, i.e. this entry will always be ignored; (c) it can not be used for anything useful - to detect loss for instance, only packets received after the loss can serve as pseudo-dupacks. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 60e94438eadd..61734e27abb7 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -360,7 +360,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 #define DCCPF_INITIAL_ACK_RATIO 2 #define DCCPF_INITIAL_CCID DCCPC_CCID2 -#define DCCPF_INITIAL_SEND_ACK_VECTOR 1 /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 @@ -370,13 +369,11 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_send_ack_vector; struct list_head dccpms_pending; struct list_head dccpms_conf; }; -- cgit v1.2.2 From 5d3dac267a7fd0811ec777e76a81f97f5cdcb395 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Initialisation framework for feature negotiation This initialises feature negotiation from two tables, which are initialised from sysctls. As a novel feature, specifics of the implementation (e.g. currently short seqnos and ECN are not supported) are advertised for robustness. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 61734e27abb7..990e97fa1f07 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -369,28 +369,9 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_pending - List of features being negotiated - * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; - struct list_head dccpms_pending; - struct list_head dccpms_conf; -}; - -struct dccp_opt_conf { - __u8 *dccpoc_val; - __u8 dccpoc_len; -}; - -struct dccp_opt_pend { - struct list_head dccpop_node; - __u8 dccpop_type; - __u8 dccpop_feat; - __u8 *dccpop_val; - __u8 dccpop_len; - int dccpop_conf; - struct dccp_opt_conf *dccpop_sc; }; extern void dccp_minisock_init(struct dccp_minisock *dmsk); -- cgit v1.2.2 From 51c7d4fa2675c106a980ddcdbe308b54b5151945 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Implement both feature-local and feature-remote Sequence Window feature This adds full support for local/remote Sequence Window feature, from which the * sequence-number-validity (W) and * acknowledgment-number-validity (W') windows derive as specified in RFC 4340, 7.5.3. Specifically, the following changes are introduced: * integrated new socket fields into dccp_sk; * updated the update_gsr/gss routines with regard to these fields; * updated handler code: the Sequence Window feature is located at the TX side, so the local feature is meant if the handler-rx flag is false; * the initialisation of `rcv_wnd' in reqsk is removed, since - rcv_wnd is not used by the code anywhere; - sequence number checks are not done in the LISTEN state (cf. 7.5.3); - dccp_check_req checks the Ack number validity more rigorously; * the `struct dccp_minisock' became empty and is now removed. Until the handshake completes with activating negotiated values, the local/remote Sequence-Window values are undefined and thus can not reliably be estimated. This issue is addressed in a separate patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 990e97fa1f07..7a0502ab383a 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -363,19 +363,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 -/** - * struct dccp_minisock - Minimal DCCP connection representation - * - * Will be used to pass the state from dccp_request_sock to dccp_sock. - * - * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - */ -struct dccp_minisock { - __u64 dccpms_sequence_window; -}; - -extern void dccp_minisock_init(struct dccp_minisock *dmsk); - /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from @@ -464,13 +451,14 @@ struct dccp_ackvec; * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo * @dccps_l_ack_ratio - feature-local Ack Ratio * @dccps_r_ack_ratio - feature-remote Ack Ratio + * @dccps_l_seq_win - local Sequence Window (influences ack number validity) + * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) - * @dccps_minisock - associated minisock (accessed via dccp_msk) * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) @@ -504,12 +492,13 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; + __u64 dccps_l_seq_win:48; + __u64 dccps_r_seq_win:48; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; - struct dccp_minisock dccps_minisock; struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; @@ -527,11 +516,6 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) return (struct dccp_sock *)sk; } -static inline struct dccp_minisock *dccp_msk(const struct sock *sk) -{ - return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; -} - static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { -- cgit v1.2.2 From 0a4822679d94e2b0117aeead06a19fad59533905 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Initialisation and type-checking of feature sysctls This patch takes care of initialising and type-checking sysctls related to feature negotiation. Type checking is important since some of the sysctls now directly act on the feature-negotiation process. The sysctls are initialised with the known default values for each feature. For the type-checking the value constraints from RFC 4340 are used: * Sequence Window uses the specified Wmin=32, the maximum is ulong (4 bytes), tested and confirmed that it works up to 4294967295 - for Gbps speed; * Ack Ratio is between 0 .. 0xffff (2-byte unsigned integer); * CCIDs are between 0 .. 255; * request_retries, retries1, retries2 also between 0..255 for good measure; * tx_qlen is checked to be non-negative; * sync_ratelimit remains as before. Further changes: ---------------- Performed s@sysctl_dccp_feat@sysctl_dccp@g since the sysctls are now in feat.c. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7a0502ab383a..7434a8353e23 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -355,14 +355,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) return __dccp_hdr_len(dccp_hdr(skb)); } - -/* initial values for each feature */ -#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 -#define DCCPF_INITIAL_ACK_RATIO 2 -#define DCCPF_INITIAL_CCID DCCPC_CCID2 -/* FIXME: for now we're default to 1 but it should really be 0 */ -#define DCCPF_INITIAL_SEND_NDP_COUNT 1 - /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from -- cgit v1.2.2 From f10ecaee6dc2c6d56783462b2a82e98bc81b55f4 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Replace magic CCID-specific numbers by symbolic constants The constants DCCPO_{MIN,MAX}_CCID_SPECIFIC are nowhere used in the code, but instead for the CCID-specific options numbers are used. This patch unifies the use of CCID-specific option numbers, by adding symbolic names reflecting the definitions in RFC 4340, 10.3. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7434a8353e23..7187bd8a75f6 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -165,8 +165,10 @@ enum { DCCPO_TIMESTAMP_ECHO = 42, DCCPO_ELAPSED_TIME = 43, DCCPO_MAX = 45, - DCCPO_MIN_CCID_SPECIFIC = 128, - DCCPO_MAX_CCID_SPECIFIC = 255, + DCCPO_MIN_RX_CCID_SPECIFIC = 128, /* from sender to receiver */ + DCCPO_MAX_RX_CCID_SPECIFIC = 191, + DCCPO_MIN_TX_CCID_SPECIFIC = 192, /* from receiver to sender */ + DCCPO_MAX_TX_CCID_SPECIFIC = 255, }; /* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ #define DCCP_SINGLE_OPT_MAXLEN 253 -- cgit v1.2.2 From c2f42077bd06f300ae959204f3c007f820f5e769 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Schedule Sync as out-of-band mechanism The problem with Ack Vectors is that i) their length is variable and can in principle grow quite large, ii) it is hard to predict exactly how large they will be. Due to the second point it seems not a good idea to reduce the MPS; in particular when on average there is enough room for the Ack Vector and an increase in length is momentarily due to some burst loss, after which the Ack Vector returns to its normal/average length. The solution taken by this patch is to subtract a minimum-expected Ack Vector length from the MPS (previous patch), and to defer any larger Ack Vectors onto a separate Sync - but only if indeed there is no space left on the skb. This patch provides the infrastructure to schedule Sync-packets for transporting (urgent) out-of-band data. Its signalling is quicker than scheduling an Ack, since it does not need to wait for new application data. It can thus serve other parts of the DCCP code as well. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7187bd8a75f6..83197b601a4f 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -462,6 +462,7 @@ struct dccp_ackvec; * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) + * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" * @dccps_xmit_timer - timer for when CCID is not ready to send * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ @@ -502,6 +503,7 @@ struct dccp_sock { __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; + __u8 dccps_sync_scheduled:1; struct timer_list dccps_xmit_timer; }; -- cgit v1.2.2 From e7937772d7a2b0127cc4cbc67bc594e139fdaf63 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Extend CCID packet dequeueing interface This extends the packet dequeuing interface of dccp_write_xmit() to allow 1. CCIDs to take care of timing when the next packet may be sent; 2. delayed sending (as before, with an inter-packet gap up to 65.535 seconds). The main purpose is to take CCID2 out of its polling mode (when it is network- limited, it tries every millisecond to send, without interruption). The interface can also be used to support other CCIDs. The mode of operation for (2) is as follows: * new packet is enqueued via dccp_sendmsg() => dccp_write_xmit(), * ccid_hc_tx_send_packet() detects that it may not send (e.g. window full), * it signals this condition via `CCID_PACKET_WILL_DEQUEUE_LATER', * dccp_write_xmit() returns without further action; * after some time the wait-condition for CCID becomes true, * that CCID schedules the tasklet, * tasklet function calls ccid_hc_tx_send_packet() via dccp_write_xmit(), * since the wait-condition is now true, ccid_hc_tx_packet() returns "send now", * packet is sent, and possibly more (since dccp_write_xmit() loops). Code reuse: the taskled function calls dccp_write_xmit(), the timer function reduces to a wrapper around the same code. If the tasklet finds that the socket is locked, it re-schedules the tasklet function (not the tasklet) after one jiffy. Changed DCCP_BUG to dccp_pr_debug when transmit_skb returns an error (e.g. when a local qdisc is used, NET_XMIT_DROP=1 can be returned for many packets). Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 83197b601a4f..eed52bcd35d0 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -463,7 +463,8 @@ struct dccp_ackvec; * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" - * @dccps_xmit_timer - timer for when CCID is not ready to send + * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets + * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ struct dccp_sock { @@ -504,6 +505,7 @@ struct dccp_sock { __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; __u8 dccps_sync_scheduled:1; + struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; -- cgit v1.2.2 From d6da3511d6b558d0b017777b61dc08b8fbc06ea4 Mon Sep 17 00:00:00 2001 From: Tomasz Grobelny Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Policy-based packet dequeueing infrastructure This patch adds a generic infrastructure for policy-based dequeueing of TX packets and provides two policies: * a simple FIFO policy (which is the default) and * a priority based policy (set via socket options). Both policies honour the tx_qlen sysctl for the maximum size of the write queue (can be overridden via socket options). The priority policy uses skb->priority internally to assign an u32 priority identifier, using the same ranking as SO_PRIORITY. The skb->priority field is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary data using cmsg(3), the patch also provides the requisite parsing routines. Signed-off-by: Tomasz Grobelny Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index eed52bcd35d0..010e2d87ed75 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -197,6 +197,21 @@ enum dccp_feature_numbers { DCCPF_MAX_CCID_SPECIFIC = 255, }; +/* DCCP socket control message types for cmsg */ +enum dccp_cmsg_type { + DCCP_SCM_PRIORITY = 1, + DCCP_SCM_QPOLICY_MAX = 0xFFFF, + /* ^-- Up to here reserved exclusively for qpolicy parameters */ + DCCP_SCM_MAX +}; + +/* DCCP priorities for outgoing/queued packets */ +enum dccp_packet_dequeueing_policy { + DCCPQ_POLICY_SIMPLE, + DCCPQ_POLICY_PRIO, + DCCPQ_POLICY_MAX +}; + /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ #define DCCP_SOCKOPT_SERVICE 2 @@ -210,6 +225,8 @@ enum dccp_feature_numbers { #define DCCP_SOCKOPT_CCID 13 #define DCCP_SOCKOPT_TX_CCID 14 #define DCCP_SOCKOPT_RX_CCID 15 +#define DCCP_SOCKOPT_QPOLICY_ID 16 +#define DCCP_SOCKOPT_QPOLICY_TXQLEN 17 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 @@ -458,6 +475,8 @@ struct dccp_ackvec; * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) * @dccps_options_received - parsed set of retrieved options + * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy + * @dccps_tx_qlen - maximum length of the TX queue * @dccps_role - role of this sock, one of %dccp_role * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending @@ -500,6 +519,8 @@ struct dccp_sock { struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; + __u8 dccps_qpolicy; + __u32 dccps_tx_qlen; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; -- cgit v1.2.2 From 268364a0f48aee2f851f9d1ef8a6cda0f3039ef1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:02:44 +0200 Subject: IO resources: add reserve_region_with_split() add reserve_region_with_split() to not lose e820 reserved entries if they overlap with existing IO regions: with test case by extend 0xe0000000 - 0xeffffff to 0xdd800000 - we get: e0000000-efffffff : PCI MMCONFIG 0 e0000000-efffffff : reserved and in /proc/iomem we get: found conflict for reserved [dd800000, efffffff], try to reserve with split __reserve_region_with_split: (PCI Bus #80) [dd000000, ddffffff], res: (reserved) [dd800000, efffffff] __reserve_region_with_split: (PCI Bus #00) [de000000, dfffffff], res: (reserved) [de000000, efffffff] initcall pci_subsys_init+0x0/0x121 returned 0 after 381 msecs in dmesg various fixes and improvements suggested by Linus. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/ioport.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 8d3b7a9afd17..fded376b94e3 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -108,6 +108,9 @@ extern struct resource iomem_resource; extern int request_resource(struct resource *root, struct resource *new); extern int release_resource(struct resource *new); +extern void reserve_region_with_split(struct resource *root, + resource_size_t start, resource_size_t end, + const char *name); extern int insert_resource(struct resource *parent, struct resource *new); extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); extern int allocate_resource(struct resource *root, struct resource *new, -- cgit v1.2.2 From f7981c1c67b53abb4a7d8a501e68585b9826179a Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 10:23:22 +0200 Subject: mv643xx_eth: require contiguous receive and transmit queue numbering Simplify receive and transmit queue handling by requiring the set of queue numbers to be contiguous starting from zero. Signed-off-by: Lennert Buytenhek --- include/linux/mv643xx_eth.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 12078577aef6..eb78b00edcde 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -49,10 +49,10 @@ struct mv643xx_eth_platform_data { int duplex; /* - * Which RX/TX queues to use. + * How many RX/TX queues to use. */ - int rx_queue_mask; - int tx_queue_mask; + int rx_queue_count; + int tx_queue_count; /* * Override default RX/TX queue sizes if nonzero. -- cgit v1.2.2 From fc0eb9f226d8ecc8e3b563bf808bd6d61a6153a1 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 12:56:56 +0200 Subject: mv643xx_eth: smi sharing is a per-unit property, not a per-port one Which top-level unit's SMI interface to use should be a property of the top-level unit, not of the individual ports. This patch moves the ->shared_smi pointer from the per-port platform data to the global platform data. Signed-off-by: Lennert Buytenhek --- include/linux/mv643xx_eth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index eb78b00edcde..12339eb65704 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -17,6 +17,7 @@ struct mv643xx_eth_shared_platform_data { struct mbus_dram_target_info *dram; + struct platform_device *shared_smi; unsigned int t_clk; }; @@ -30,7 +31,6 @@ struct mv643xx_eth_platform_data { /* * Whether a PHY is present, and if yes, at which address. */ - struct platform_device *shared_smi; int force_phy_addr; int phy_addr; -- cgit v1.2.2 From ac840605f3b1d9b99e1e6629a54994f8e003ff91 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 14:06:47 +0200 Subject: mv643xx_eth: remove force_phy_addr field Currently, there are two different fields in the mv643xx_eth_platform_data struct that together describe the PHY address -- one field (phy_addr) has the address of the PHY, but if that address is zero, a second field (force_phy_addr) needs to be set to distinguish the actual address zero from a zero due to not having filled in the PHY address explicitly (which should mean 'use the default PHY address'). If we are a bit smarter about the encoding of the phy_addr field, we can avoid the need for a second field -- this patch does that. Signed-off-by: Lennert Buytenhek --- include/linux/mv643xx_eth.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 12339eb65704..cbbbe9bfecad 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -21,6 +21,10 @@ struct mv643xx_eth_shared_platform_data { unsigned int t_clk; }; +#define MV643XX_ETH_PHY_ADDR_DEFAULT 0 +#define MV643XX_ETH_PHY_ADDR(x) (0x80 | (x)) +#define MV643XX_ETH_PHY_NONE 0xff + struct mv643xx_eth_platform_data { /* * Pointer back to our parent instance, and our port number. @@ -31,7 +35,6 @@ struct mv643xx_eth_platform_data { /* * Whether a PHY is present, and if yes, at which address. */ - int force_phy_addr; int phy_addr; /* -- cgit v1.2.2 From ca1af29a733629b9158a4a32a927d16ff9009a95 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Sep 2008 13:13:39 +0200 Subject: x86, pci: add northbridge pci ids for fam 0x11 processors The PCI device ids for AMD family 0x11 processors are missing in pci_ids.h. This patch adds them. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/linux/pci_ids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 9ec2bcce8e83..0d0b314476c9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -497,6 +497,11 @@ #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 #define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102 #define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 +#define PCI_DEVICE_ID_AMD_11H_NB_HT 0x1300 +#define PCI_DEVICE_ID_AMD_11H_NB_MAP 0x1301 +#define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 +#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 +#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 -- cgit v1.2.2 From f59ac0481660e66cec67f1d6b024e78b9dc715fe Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 29 Aug 2008 16:26:43 -0700 Subject: cfg80211: keep track of supported interface modes It is obviously good for userspace to know up front which interface modes a given piece of hardware might support (even if adding such an interface might fail later because of concurrency issues), so let's make cfg80211 aware of that. For good measure, disallow adding interfaces in all other modes so drivers don't forget to announce support for one mode when they add it. Signed-off-by: Johannes Berg Signed-off-by: Stephen Blackheath Signed-off-by: Ivo van Doorn Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 0c1147de3ec7..5e51f4e7600b 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -210,6 +210,10 @@ enum nl80211_commands { * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) * + * @NL80211_ATTR_SUPPORTED_IFTYPES: nested attribute containing all + * supported interface types, each a flag attribute with the number + * of the interface mode. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -259,6 +263,8 @@ enum nl80211_attrs { NL80211_ATTR_HT_CAPABILITY, + NL80211_ATTR_SUPPORTED_IFTYPES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.2 From b0dbcf511c4bd10350902e79a1bdd4f5dcca66b6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 4 Sep 2008 21:13:37 +0100 Subject: [NET] smc91x: provide configurable leds This patch provides a mechanism for platforms to be able to supply the LED configuration via platform data, rather than having to hard code it in smc91x.h. Acked-by: Eric Miao Acked-by: Nicolas Pitre Acked-by: Jeff Garzik Signed-off-by: Russell King --- include/linux/smc91x.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h index 3827b922ba1f..ed25483d25d9 100644 --- a/include/linux/smc91x.h +++ b/include/linux/smc91x.h @@ -18,6 +18,8 @@ struct smc91x_platdata { unsigned long flags; + unsigned char leda; + unsigned char ledb; }; #endif /* __SMC91X_H__ */ -- cgit v1.2.2 From e545a6140b698b2494daf0b32107bdcc5e901390 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 7 Sep 2008 16:57:22 +0200 Subject: kernel/cpu.c: create a CPU_STARTING cpu_chain notifier Right now, there is no notifier that is called on a new cpu, before the new cpu begins processing interrupts/softirqs. Various kernel function would need that notification, e.g. kvm works around by calling smp_call_function_single(), rcu polls cpu_online_map. The patch adds a CPU_STARTING notification. It also adds a helper function that sends the message to all cpu_chain handlers. Tested on x86-64. All other archs are untested. Especially on sparc, I'm not sure if I got it right. Signed-off-by: Manfred Spraul Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 1 + include/linux/notifier.h | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d7faf8808497..c2747ac2ae43 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -69,6 +69,7 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) #endif int cpu_up(unsigned int cpu); +void notify_cpu_starting(unsigned int cpu); extern void cpu_hotplug_init(void); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); diff --git a/include/linux/notifier.h b/include/linux/notifier.h index da2698b0fdd1..b86fa2ffca0c 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -213,9 +213,16 @@ static inline int notifier_to_errno(int ret) #define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ #define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, - * not handling interrupts, soon dead */ + * not handling interrupts, soon dead. + * Called on the dying cpu, interrupts + * are already disabled. Must not + * sleep, must not fail */ #define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug * lock is dropped */ +#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running. + * Called on the new cpu, just before + * enabling interrupts. Must not sleep, + * must not fail */ /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend * operation in progress @@ -229,6 +236,7 @@ static inline int notifier_to_errno(int ret) #define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) #define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) +#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN) /* Hibernation and suspend events */ #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ -- cgit v1.2.2 From 410e27a49bb98bc7fa3ff5fc05cc313817b9f253 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 9 Sep 2008 13:27:22 +0200 Subject: This reverts "Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/dccp_exp" as it accentally contained the wrong set of patches. These will be submitted separately. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 122 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 71 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 010e2d87ed75..6080449fbec9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -165,13 +165,9 @@ enum { DCCPO_TIMESTAMP_ECHO = 42, DCCPO_ELAPSED_TIME = 43, DCCPO_MAX = 45, - DCCPO_MIN_RX_CCID_SPECIFIC = 128, /* from sender to receiver */ - DCCPO_MAX_RX_CCID_SPECIFIC = 191, - DCCPO_MIN_TX_CCID_SPECIFIC = 192, /* from receiver to sender */ - DCCPO_MAX_TX_CCID_SPECIFIC = 255, + DCCPO_MIN_CCID_SPECIFIC = 128, + DCCPO_MAX_CCID_SPECIFIC = 255, }; -/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ -#define DCCP_SINGLE_OPT_MAXLEN 253 /* DCCP CCIDS */ enum { @@ -180,36 +176,27 @@ enum { }; /* DCCP features (RFC 4340 section 6.4) */ -enum dccp_feature_numbers { +enum { DCCPF_RESERVED = 0, DCCPF_CCID = 1, - DCCPF_SHORT_SEQNOS = 2, + DCCPF_SHORT_SEQNOS = 2, /* XXX: not yet implemented */ DCCPF_SEQUENCE_WINDOW = 3, - DCCPF_ECN_INCAPABLE = 4, + DCCPF_ECN_INCAPABLE = 4, /* XXX: not yet implemented */ DCCPF_ACK_RATIO = 5, DCCPF_SEND_ACK_VECTOR = 6, DCCPF_SEND_NDP_COUNT = 7, DCCPF_MIN_CSUM_COVER = 8, - DCCPF_DATA_CHECKSUM = 9, + DCCPF_DATA_CHECKSUM = 9, /* XXX: not yet implemented */ /* 10-127 reserved */ DCCPF_MIN_CCID_SPECIFIC = 128, - DCCPF_SEND_LEV_RATE = 192, /* RFC 4342, sec. 8.4 */ DCCPF_MAX_CCID_SPECIFIC = 255, }; -/* DCCP socket control message types for cmsg */ -enum dccp_cmsg_type { - DCCP_SCM_PRIORITY = 1, - DCCP_SCM_QPOLICY_MAX = 0xFFFF, - /* ^-- Up to here reserved exclusively for qpolicy parameters */ - DCCP_SCM_MAX -}; - -/* DCCP priorities for outgoing/queued packets */ -enum dccp_packet_dequeueing_policy { - DCCPQ_POLICY_SIMPLE, - DCCPQ_POLICY_PRIO, - DCCPQ_POLICY_MAX +/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ +struct dccp_so_feat { + __u8 dccpsf_feat; + __u8 __user *dccpsf_val; + __u8 dccpsf_len; }; /* DCCP socket options */ @@ -221,12 +208,6 @@ enum dccp_packet_dequeueing_policy { #define DCCP_SOCKOPT_SERVER_TIMEWAIT 6 #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 -#define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 -#define DCCP_SOCKOPT_CCID 13 -#define DCCP_SOCKOPT_TX_CCID 14 -#define DCCP_SOCKOPT_RX_CCID 15 -#define DCCP_SOCKOPT_QPOLICY_ID 16 -#define DCCP_SOCKOPT_QPOLICY_TXQLEN 17 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 @@ -374,13 +355,62 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) return __dccp_hdr_len(dccp_hdr(skb)); } + +/* initial values for each feature */ +#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 +#define DCCPF_INITIAL_ACK_RATIO 2 +#define DCCPF_INITIAL_CCID DCCPC_CCID2 +#define DCCPF_INITIAL_SEND_ACK_VECTOR 1 +/* FIXME: for now we're default to 1 but it should really be 0 */ +#define DCCPF_INITIAL_SEND_NDP_COUNT 1 + +/** + * struct dccp_minisock - Minimal DCCP connection representation + * + * Will be used to pass the state from dccp_request_sock to dccp_sock. + * + * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) + * @dccpms_ccid - Congestion Control Id (CCID) (section 10) + * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) + * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) + * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3) + * @dccpms_pending - List of features being negotiated + * @dccpms_conf - + */ +struct dccp_minisock { + __u64 dccpms_sequence_window; + __u8 dccpms_rx_ccid; + __u8 dccpms_tx_ccid; + __u8 dccpms_send_ack_vector; + __u8 dccpms_send_ndp_count; + __u8 dccpms_ack_ratio; + struct list_head dccpms_pending; + struct list_head dccpms_conf; +}; + +struct dccp_opt_conf { + __u8 *dccpoc_val; + __u8 dccpoc_len; +}; + +struct dccp_opt_pend { + struct list_head dccpop_node; + __u8 dccpop_type; + __u8 dccpop_feat; + __u8 *dccpop_val; + __u8 dccpop_len; + int dccpop_conf; + struct dccp_opt_conf *dccpop_sc; +}; + +extern void dccp_minisock_init(struct dccp_minisock *dmsk); + /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1) * @dreq_isr: initial sequence number received on the Request * @dreq_service: service code present on the Request (there is just one) - * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo @@ -390,7 +420,6 @@ struct dccp_request_sock { __u64 dreq_iss; __u64 dreq_isr; __be32 dreq_service; - struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; @@ -462,28 +491,21 @@ struct dccp_ackvec; * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo * @dccps_l_ack_ratio - feature-local Ack Ratio * @dccps_r_ack_ratio - feature-remote Ack Ratio - * @dccps_l_seq_win - local Sequence Window (influences ack number validity) - * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) - * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) - * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) + * @dccps_minisock - associated minisock (accessed via dccp_msk) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) * @dccps_options_received - parsed set of retrieved options - * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy - * @dccps_tx_qlen - maximum length of the TX queue * @dccps_role - role of this sock, one of %dccp_role * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) - * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" - * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets - * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) + * @dccps_xmit_timer - timer for when CCID is not ready to send * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ struct dccp_sock { @@ -507,26 +529,19 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; - __u64 dccps_l_seq_win:48; - __u64 dccps_r_seq_win:48; - __u8 dccps_pcslen:4; - __u8 dccps_pcrlen:4; - __u8 dccps_send_ndp_count:1; + __u16 dccps_pcslen; + __u16 dccps_pcrlen; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; - struct list_head dccps_featneg; + struct dccp_minisock dccps_minisock; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; - __u8 dccps_qpolicy; - __u32 dccps_tx_qlen; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; - __u8 dccps_sync_scheduled:1; - struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; @@ -535,6 +550,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) return (struct dccp_sock *)sk; } +static inline struct dccp_minisock *dccp_msk(const struct sock *sk) +{ + return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; +} + static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { -- cgit v1.2.2 From fb683f1627745e937ef199edd3428ac4b2ef1e08 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Aug 2008 16:36:28 +0200 Subject: Export smc91x led definitions Now that we can configure smc91x leds from its platform data, it seems rather useful to move the led definitions to the externally visible header file. Signed-off-by: Marc Zyngier --- include/linux/smc91x.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h index ed25483d25d9..bc21db598c06 100644 --- a/include/linux/smc91x.h +++ b/include/linux/smc91x.h @@ -16,6 +16,15 @@ #define SMC91X_USE_DMA (1 << 6) +#define RPC_LED_100_10 (0x00) /* LED = 100Mbps OR's with 10Mbps link detect */ +#define RPC_LED_RES (0x01) /* LED = Reserved */ +#define RPC_LED_10 (0x02) /* LED = 10Mbps link detect */ +#define RPC_LED_FD (0x03) /* LED = Full Duplex Mode */ +#define RPC_LED_TX_RX (0x04) /* LED = TX or RX packet occurred */ +#define RPC_LED_100 (0x05) /* LED = 100Mbps link dectect */ +#define RPC_LED_TX (0x06) /* LED = TX packet occurred */ +#define RPC_LED_RX (0x07) /* LED = RX packet occurred */ + struct smc91x_platdata { unsigned long flags; unsigned char leda; -- cgit v1.2.2 From 636dc67cbf8c481a996faf6c23f0532d0f02ebad Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 01:06:46 +0900 Subject: add is_buffer_dma_capable helper function is_buffer_dma_capable helper function is to see if a memory region is DMA-capable or not. The arugments are the dma_mask (or coherent_dma_mask) of a device and the address and size of a memory region. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/linux/dma-mapping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 952e0f857ac9..6ed50c1642f1 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -48,6 +48,11 @@ static inline int is_device_dma_capable(struct device *dev) return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE; } +static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size) +{ + return addr + size <= mask; +} + #ifdef CONFIG_HAS_DMA #include #else -- cgit v1.2.2 From 271bff7afbb2cbaa81e744006ad2fff1f3e10b1b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 11 Sep 2008 04:48:58 -0700 Subject: net: Add DMA mapping tokens to skb_shared_info. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 909923717830..4b2be23903c4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -146,8 +146,14 @@ struct skb_shared_info { unsigned short gso_segs; unsigned short gso_type; __be32 ip6_frag_id; +#ifdef CONFIG_HAS_DMA + unsigned int num_dma_maps; +#endif struct sk_buff *frag_list; skb_frag_t frags[MAX_SKB_FRAGS]; +#ifdef CONFIG_HAS_DMA + dma_addr_t dma_maps[MAX_SKB_FRAGS + 1]; +#endif }; /* We divide dataref into two halves. The higher 16 bits hold references -- cgit v1.2.2 From a40c24a13366e324bc0ff8c3bb107db89312c984 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 11 Sep 2008 04:51:14 -0700 Subject: net: Add SKB DMA mapping helper functions. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4b2be23903c4..aa80ad9cbc88 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -359,6 +359,14 @@ struct sk_buff { #include +#ifdef CONFIG_HAS_DMA +#include +extern int skb_dma_map(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir); +extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir); +#endif + extern void kfree_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, -- cgit v1.2.2 From 00c5ae2fa0f8191a1b204e71f0ee11359e3b2c06 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 3 Sep 2008 11:26:42 +0800 Subject: mac80211: change MIMO_PS to SM_PS This patch follows 11n spec naming more rigorously replacing MIMO_PS with SM_PS (Spatial Multiplexing Power Save). (Originally submitted as 4 patches, "mac80211: change MIMO_PS to SM_PS", "iwlwifi: change MIMO_PS to SM_PS", "ath9k: change MIMO_PS to SM_PS", and "iwlwifi: remove double definition of SM PS". -- JWL) Signed-off-by: Ron Rindjunsky Signed-off-by: Tomas Winkler Signed-off-by: Zhu Yi Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index be456450cd2e..333d3ae76883 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -708,7 +708,7 @@ struct ieee80211_ht_addt_info { /* 802.11n HT capabilities masks */ #define IEEE80211_HT_CAP_SUP_WIDTH 0x0002 -#define IEEE80211_HT_CAP_MIMO_PS 0x000C +#define IEEE80211_HT_CAP_SM_PS 0x000C #define IEEE80211_HT_CAP_GRN_FLD 0x0010 #define IEEE80211_HT_CAP_SGI_20 0x0020 #define IEEE80211_HT_CAP_SGI_40 0x0040 @@ -737,11 +737,11 @@ struct ieee80211_ht_addt_info { #define IEEE80211_HT_IE_NON_GF_STA_PRSNT 0x0004 #define IEEE80211_HT_IE_NON_HT_STA_PRSNT 0x0010 -/* MIMO Power Save Modes */ -#define WLAN_HT_CAP_MIMO_PS_STATIC 0 -#define WLAN_HT_CAP_MIMO_PS_DYNAMIC 1 -#define WLAN_HT_CAP_MIMO_PS_INVALID 2 -#define WLAN_HT_CAP_MIMO_PS_DISABLED 3 +/* Spatial Multiplexing Power Save Modes */ +#define WLAN_HT_CAP_SM_PS_STATIC 0 +#define WLAN_HT_CAP_SM_PS_DYNAMIC 1 +#define WLAN_HT_CAP_SM_PS_INVALID 2 +#define WLAN_HT_CAP_SM_PS_DISABLED 3 /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 -- cgit v1.2.2 From 44d414dbff9d5bf46fc09f2e68567b5848cbbfd3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Sep 2008 17:44:28 +0200 Subject: mac80211: move some HT code out of mlme.c Some of the HT code in mlme.c is misplaced: * constants/definitions belong to the ieee80211.h header * code being used in other modes as well shouldn't be there Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 333d3ae76883..abc1abc63bf0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -643,6 +643,9 @@ struct ieee80211_mgmt { } u; } __attribute__ ((packed)); +/* mgmt header + 1 byte category code */ +#define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) + /* Control frames */ struct ieee80211_rts { @@ -737,6 +740,21 @@ struct ieee80211_ht_addt_info { #define IEEE80211_HT_IE_NON_GF_STA_PRSNT 0x0004 #define IEEE80211_HT_IE_NON_HT_STA_PRSNT 0x0010 +/* block-ack parameters */ +#define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 +#define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C +#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0 +#define IEEE80211_DELBA_PARAM_TID_MASK 0xF000 +#define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800 + +/* + * A-PMDU buffer sizes + * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2) + */ +#define IEEE80211_MIN_AMPDU_BUF 0x8 +#define IEEE80211_MAX_AMPDU_BUF 0x40 + + /* Spatial Multiplexing Power Save Modes */ #define WLAN_HT_CAP_SM_PS_STATIC 0 #define WLAN_HT_CAP_SM_PS_DYNAMIC 1 -- cgit v1.2.2 From 92651940ab00dbe64722e908f70d816713d677b7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2008 16:29:34 -0700 Subject: pkt_sched: Add multiqueue scheduler support This patch is intended to add a qdisc to support the new tx multiqueue architecture by providing a band for each hardware queue. By doing this it is possible to support a different qdisc per physical hardware queue. This qdisc uses the skb->queue_mapping to select which band to place the traffic onto. It then uses a round robin w/ a check to see if the subqueue is stopped to determine which band to dequeue the packet from. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index e5de421ac7b4..5d921fa91a5b 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -123,6 +123,13 @@ struct tc_prio_qopt __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ }; +/* MULTIQ section */ + +struct tc_multiq_qopt { + __u16 bands; /* Number of bands */ + __u16 max_bands; /* Maximum number of queues */ +}; + /* TBF section */ struct tc_tbf_qopt -- cgit v1.2.2 From ca9b0e27e072be4cef2f5f0cbc0b0fd94eae3520 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2008 16:30:20 -0700 Subject: pkt_action: add new action skbedit This new action will have the ability to change the priority and/or queue_mapping fields on an sk_buff. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/tc_act/Kbuild | 1 + include/linux/tc_act/tc_skbedit.h | 44 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 include/linux/tc_act/tc_skbedit.h (limited to 'include/linux') diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 6dac0d7365cc..76990937f4c9 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -3,3 +3,4 @@ header-y += tc_ipt.h header-y += tc_mirred.h header-y += tc_pedit.h header-y += tc_nat.h +header-y += tc_skbedit.h diff --git a/include/linux/tc_act/tc_skbedit.h b/include/linux/tc_act/tc_skbedit.h new file mode 100644 index 000000000000..a14e461a7af7 --- /dev/null +++ b/include/linux/tc_act/tc_skbedit.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck + */ + +#ifndef __LINUX_TC_SKBEDIT_H +#define __LINUX_TC_SKBEDIT_H + +#include + +#define TCA_ACT_SKBEDIT 11 + +#define SKBEDIT_F_PRIORITY 0x1 +#define SKBEDIT_F_QUEUE_MAPPING 0x2 + +struct tc_skbedit { + tc_gen; +}; + +enum { + TCA_SKBEDIT_UNSPEC, + TCA_SKBEDIT_TM, + TCA_SKBEDIT_PARMS, + TCA_SKBEDIT_PRIORITY, + TCA_SKBEDIT_QUEUE_MAPPING, + __TCA_SKBEDIT_MAX +}; +#define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) + +#endif -- cgit v1.2.2 From eecfffc154ffbfe70686a9905c090b488778c28e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 12 Sep 2008 19:42:33 +0900 Subject: iommu: add iommu_device_max_index IOMMU helper function This function helps IOMMUs to know the highest address that a device can access to. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- include/linux/iommu-helper.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index c975caf75385..58f41107e4ae 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -1,3 +1,13 @@ +static inline unsigned long iommu_device_max_index(unsigned long size, + unsigned long offset, + u64 dma_mask) +{ + if (size + offset > dma_mask) + return dma_mask - offset + 1; + else + return size; +} + extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, unsigned long shift, unsigned long boundary_size); -- cgit v1.2.2 From 589fc9a6e2102b498978f6350581ec7fa5aeb032 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 12 Sep 2008 19:42:34 +0900 Subject: iommu: add dma_get_mask helper function Several IOMMUs do the same thing to get the dma_mask of a device. This adds a helper function to do the same thing to sweep them. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- include/linux/dma-mapping.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 6ed50c1642f1..0dba7433af18 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -63,6 +63,13 @@ static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size) #define dma_sync_single dma_sync_single_for_cpu #define dma_sync_sg dma_sync_sg_for_cpu +static inline u64 dma_get_mask(struct device *dev) +{ + if (dev->dma_mask && *dev->dma_mask) + return *dev->dma_mask; + return DMA_32BIT_MASK; +} + extern u64 dma_get_required_mask(struct device *dev); static inline unsigned int dma_get_max_seg_size(struct device *dev) -- cgit v1.2.2 From b2e1b30290539b344cbaff0d9da38012e03aa347 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 9 Sep 2008 23:19:48 -0700 Subject: cfg80211: Add new wireless regulatory infrastructure This adds the new wireless regulatory infrastructure. The main motiviation behind this was to centralize regulatory code as each driver was implementing their own regulatory solution, and to replace the initial centralized code we have where: * only 3 regulatory domains are supported: US, JP and EU * regulatory domains can only be changed through module parameter * all rules were built statically in the kernel We now have support for regulatory domains for many countries and regulatory domains are now queried through a userspace agent through udev allowing distributions to update regulatory rules without updating the kernel. Each driver can regulatory_hint() a regulatory domain based on either their EEPROM mapped regulatory domain value to a respective ISO/IEC 3166-1 country code or pass an internally built regulatory domain. We also add support to let the user set the regulatory domain through userspace in case of faulty EEPROMs to further help compliance. Support for world roaming will be added soon for cards capable of this. For more information see: http://wireless.kernel.org/en/developers/Regulatory/CRDA For now we leave an option to enable the old module parameter, ieee80211_regdom, and to build the 3 old regdomains statically (US, JP and EU). This option is CONFIG_WIRELESS_OLD_REGULATORY. These old static definitions and the module parameter is being scheduled for removal for 2.6.29. Note that if you use this you won't make use of a world regulatory domain as its pointless. If you leave this option enabled and if CRDA is present and you use US or JP we will try to ask CRDA to update us a regulatory domain for us. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/nl80211.h | 96 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 5e51f4e7600b..9bad65400fba 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -92,6 +92,20 @@ * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by * %NL80211_ATTR_IFINDEX. * + * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command + * after being queried by the kernel. CRDA replies by sending a regulatory + * domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our + * current alpha2 if it found a match. It also provides + * NL80211_ATTR_REG_RULE_FLAGS, and a set of regulatory rules. Each + * regulatory rule is a nested set of attributes given by + * %NL80211_ATTR_REG_RULE_FREQ_[START|END] and + * %NL80211_ATTR_FREQ_RANGE_MAX_BW with an attached power rule given by + * %NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and + * %NL80211_ATTR_REG_RULE_POWER_MAX_EIRP. + * @NL80211_CMD_REQ_SET_REG: ask the wireless core to set the regulatory domain + * to the the specified ISO/IEC 3166-1 alpha2 country code. The core will + * store this as a valid request and then query userspace for it. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -131,7 +145,10 @@ enum nl80211_commands { NL80211_CMD_SET_BSS, - /* add commands here */ + NL80211_CMD_SET_REG, + NL80211_CMD_REQ_SET_REG, + + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ __NL80211_CMD_AFTER_LAST, @@ -197,10 +214,21 @@ enum nl80211_commands { * info given for %NL80211_CMD_GET_MPATH, nested attribute described at * &enum nl80211_mpath_info. * - * * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of * &enum nl80211_mntr_flags. * + * @NL80211_ATTR_REG_ALPHA2: an ISO-3166-alpha2 country code for which the + * current regulatory domain should be set to or is already set to. + * For example, 'CR', for Costa Rica. This attribute is used by the kernel + * to query the CRDA to retrieve one regulatory domain. This attribute can + * also be used by userspace to query the kernel for the currently set + * regulatory domain. We chose an alpha2 as that is also used by the + * IEEE-802.11d country information element to identify a country. + * Users can also simply ask the wireless core to set regulatory domain + * to a specific alpha2. + * @NL80211_ATTR_REG_RULES: a nested array of regulatory domain regulatory + * rules. + * * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1) * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled * (u8, 0 or 1) @@ -265,6 +293,9 @@ enum nl80211_attrs { NL80211_ATTR_SUPPORTED_IFTYPES, + NL80211_ATTR_REG_ALPHA2, + NL80211_ATTR_REG_RULES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -278,6 +309,7 @@ enum nl80211_attrs { #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY #define NL80211_MAX_SUPP_RATES 32 +#define NL80211_MAX_SUPP_REG_RULES 32 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 @@ -472,6 +504,66 @@ enum nl80211_bitrate_attr { NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_reg_rule_attr - regulatory rule attributes + * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional + * considerations for a given frequency range. These are the + * &enum nl80211_reg_rule_flags. + * @NL80211_ATTR_FREQ_RANGE_START: starting frequencry for the regulatory + * rule in KHz. This is not a center of frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_END: ending frequency for the regulatory rule + * in KHz. This is not a center a frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this + * frequency range, in KHz. + * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain + * for a given frequency range. The value is in mBi (100 * dBi). + * If you don't have one then don't send this. + * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for + * a given frequency range. The value is in mBm (100 * dBm). + */ +enum nl80211_reg_rule_attr { + __NL80211_REG_RULE_ATTR_INVALID, + NL80211_ATTR_REG_RULE_FLAGS, + + NL80211_ATTR_FREQ_RANGE_START, + NL80211_ATTR_FREQ_RANGE_END, + NL80211_ATTR_FREQ_RANGE_MAX_BW, + + NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN, + NL80211_ATTR_POWER_RULE_MAX_EIRP, + + /* keep last */ + __NL80211_REG_RULE_ATTR_AFTER_LAST, + NL80211_REG_RULE_ATTR_MAX = __NL80211_REG_RULE_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_reg_rule_flags - regulatory rule flags + * + * @NL80211_RRF_NO_OFDM: OFDM modulation not allowed + * @NL80211_RRF_NO_CCK: CCK modulation not allowed + * @NL80211_RRF_NO_INDOOR: indoor operation not allowed + * @NL80211_RRF_NO_OUTDOOR: outdoor operation not allowed + * @NL80211_RRF_DFS: DFS support is required to be used + * @NL80211_RRF_PTP_ONLY: this is only for Point To Point links + * @NL80211_RRF_PTMP_ONLY: this is only for Point To Multi Point links + * @NL80211_RRF_PASSIVE_SCAN: passive scan is required + * @NL80211_RRF_NO_IBSS: no IBSS is allowed + */ +enum nl80211_reg_rule_flags { + NL80211_RRF_NO_OFDM = 1<<0, + NL80211_RRF_NO_CCK = 1<<1, + NL80211_RRF_NO_INDOOR = 1<<2, + NL80211_RRF_NO_OUTDOOR = 1<<3, + NL80211_RRF_DFS = 1<<4, + NL80211_RRF_PTP_ONLY = 1<<5, + NL80211_RRF_PTMP_ONLY = 1<<6, + NL80211_RRF_PASSIVE_SCAN = 1<<7, + NL80211_RRF_NO_IBSS = 1<<8, +}; + /** * enum nl80211_mntr_flags - monitor configuration flags * -- cgit v1.2.2 From 29bdc88384c2b24e37e5760df0dc898546083d6b Mon Sep 17 00:00:00 2001 From: Vladimir Sokolovsky Date: Mon, 15 Sep 2008 14:25:23 -0700 Subject: IB/mlx4: Fix up fast register page list format Byte swap the addresses in the page list for fast register work requests to big endian to match what the HCA expectx. Also, the addresses must have the "present" bit set so that the HCA knows it can access them. Otherwise the HCA will fault the first time it accesses the memory region. Signed-off-by: Vladimir Sokolovsky Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 655ea0d1ee14..b2f944468313 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -141,6 +141,10 @@ enum { MLX4_STAT_RATE_OFFSET = 5 }; +enum { + MLX4_MTT_FLAG_PRESENT = 1 +}; + static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; -- cgit v1.2.2 From b08508c40adf3fd1330aabc4f37d3254179776c4 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Tue, 26 Aug 2008 08:20:34 -0700 Subject: PCI: fix compiler warnings in pci_get_subsys() pci_get_subsys() changed in 2.6.26 so that the from pointer is modified when the call is being invoked, so fix up the 'const' marking of it that the compiler is complaining about. Reported-by: Rufus & Azrael Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jesse Barnes --- include/linux/pci.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c0e14008a3c2..98dc6243a706 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -534,7 +534,7 @@ extern void pci_sort_breadthfirst(void); #ifdef CONFIG_PCI_LEGACY struct pci_dev __deprecated *pci_find_device(unsigned int vendor, unsigned int device, - const struct pci_dev *from); + struct pci_dev *from); struct pci_dev __deprecated *pci_find_slot(unsigned int bus, unsigned int devfn); #endif /* CONFIG_PCI_LEGACY */ @@ -550,7 +550,7 @@ struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, struct pci_dev *from); struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, unsigned int ss_vendor, unsigned int ss_device, - const struct pci_dev *from); + struct pci_dev *from); struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn); struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from); @@ -816,7 +816,7 @@ _PCI_NOP_ALL(write,) static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int device, - const struct pci_dev *from) + struct pci_dev *from) { return NULL; } @@ -838,7 +838,7 @@ static inline struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, unsigned int ss_vendor, unsigned int ss_device, - const struct pci_dev *from) + struct pci_dev *from) { return NULL; } -- cgit v1.2.2 From ef3d7714f6b75b51825ad0384b5ce48358427e50 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 16 Sep 2008 15:00:11 -0700 Subject: Fix PNP build failure, bugzilla #11276 This fill fix the following regression list entry: Bug-Entry : http://bugzilla.kernel.org/show_bug.cgi?id=11276 Subject : build error: CONFIG_OPTIMIZE_INLINING=y causes gcc 4.2 to do stupid things Submitter : Randy Dunlap Date : 2008-08-06 17:18 (38 days old) References : http://marc.info/?l=linux-kernel&m=121804329014332&w=4 http://lkml.org/lkml/2008/7/22/353 Handled-By : Bjorn Helgaas Patch : http://lkml.org/lkml/2008/7/22/364 with what I believe is a better fix than the one referenced in the regression entry above. These PNP header interfaces try to work in such a way that you can reference some of them even if PNP is not enabled, and the compiler was expected to optimize everything away. Which is mostly fine, except that there was one interface for which there was not provided an inline "NOP" implementation. Once we add that, all of these compile failures cannot handle any more. pnp: Provide NOP inline implementation of pnp_get_resource() when !PNP Fixes kernel bugzilla #11276. Signed-off-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/pnp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 1ce54b63085d..be764e514e35 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -21,7 +21,14 @@ struct pnp_dev; /* * Resource Management */ +#ifdef CONFIG_PNP struct resource *pnp_get_resource(struct pnp_dev *, unsigned int, unsigned int); +#else +static inline struct resource *pnp_get_resource(struct pnp_dev *dev, unsigned int type, unsigned int num) +{ + return NULL; +} +#endif static inline int pnp_resource_valid(struct resource *res) { -- cgit v1.2.2 From 6ae19b04ab41a4db0f0c48ec0b78950f6b028823 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Wed, 10 Sep 2008 12:06:15 -0400 Subject: Input: ads7846 - introduce .gpio_pendown to get pendown state The GPIO connected to ADS7846 nPENIRQ signal is usually used to get the pendown state as well. Introduce a .gpio_pendown, and use this to decide the pendown state if .get_pendown_state is NULL. Signed-off-by: Eric Miao Signed-off-by: Dmitry Torokhov --- include/linux/spi/ads7846.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h index daf744017a31..05eab2f11e63 100644 --- a/include/linux/spi/ads7846.h +++ b/include/linux/spi/ads7846.h @@ -43,6 +43,9 @@ struct ads7846_platform_data { u16 debounce_tol; /* tolerance used for filtering */ u16 debounce_rep; /* additional consecutive good readings * required after the first two */ + int gpio_pendown; /* the GPIO used to decide the pendown + * state if get_pendown_state == NULL + */ int (*get_pendown_state)(void); int (*filter_init) (struct ads7846_platform_data *pdata, void **filter_data); -- cgit v1.2.2 From 452c1ce218a68b5dbd626397ecfc65ca89dd3cbb Mon Sep 17 00:00:00 2001 From: Chris Snook Date: Sun, 14 Sep 2008 19:56:10 -0500 Subject: atl2: add atl2 driver Driver for Atheros L2 10/100 network device. Includes necessary changes for Kconfig, Makefile, and pci_ids.h. Signed-off-by: Chris Snook Signed-off-by: Jay Cliburn Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f1624b396754..90a132ab84a6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2213,6 +2213,7 @@ #define PCI_VENDOR_ID_ATTANSIC 0x1969 #define PCI_DEVICE_ID_ATTANSIC_L1 0x1048 +#define PCI_DEVICE_ID_ATTANSIC_L2 0x2048 #define PCI_VENDOR_ID_JMICRON 0x197B #define PCI_DEVICE_ID_JMICRON_JMB360 0x2360 -- cgit v1.2.2 From 01f2e4ead2c51226ed1283ef6a8388ca6f4cff8f Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Mon, 15 Sep 2008 09:17:11 -0700 Subject: enic: add Cisco 10G Ethernet NIC driver Signed-off-by: Scott Feldman Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 90a132ab84a6..6f4276d461c0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1411,6 +1411,8 @@ #define PCI_DEVICE_ID_EICON_MAESTRAQ_U 0xe013 #define PCI_DEVICE_ID_EICON_MAESTRAP 0xe014 +#define PCI_VENDOR_ID_CISCO 0x1137 + #define PCI_VENDOR_ID_ZIATECH 0x1138 #define PCI_DEVICE_ID_ZIATECH_5550_HC 0x5550 -- cgit v1.2.2 From 4fd5f812c23c7deee6425f4a318e85c317cd1d6c Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 13:08:46 +0200 Subject: phylib: allow incremental scanning of an mii bus This patch splits the bus scanning code in mdiobus_register() off into a separate function, and makes this function available for calling from external code. This allows incrementally scanning an mii bus, e.g. as information about which addresses are 'safe' to scan becomes available. Signed-off-by: Lennert Buytenhek Acked-by: Andy Fleming --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7224c4099a28..5f170f5b1a30 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -410,6 +410,8 @@ int phy_start_aneg(struct phy_device *phydev); int mdiobus_register(struct mii_bus *bus); void mdiobus_unregister(struct mii_bus *bus); +struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); + void phy_sanitize_settings(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); int phy_enable_interrupts(struct phy_device *phydev); -- cgit v1.2.2 From 07a2c01a0c2a0cb4581a67d50d4f17cb4d2457c4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 19 Sep 2008 02:02:05 +0900 Subject: convert swiotlb to use dma_get_mask swiotlb can use dma_get_mask() instead of the homegrown function. Signed-off-by: FUJITA Tomonori Cc: tony.luck@intel.com Signed-off-by: Ingo Molnar --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 0dba7433af18..ba9114ec5d3a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -65,7 +65,7 @@ static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size) static inline u64 dma_get_mask(struct device *dev) { - if (dev->dma_mask && *dev->dma_mask) + if (dev && dev->dma_mask && *dev->dma_mask) return *dev->dma_mask; return DMA_32BIT_MASK; } -- cgit v1.2.2 From 006f582c73f4eda35e06fd323193c3df43fb3459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 20 Sep 2008 21:20:20 -0700 Subject: tcp: convert retransmit_cnt_hint to seqno MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Main benefit in this is that we can then freely point the retransmit_skb_hint to anywhere we want to because there's no longer need to know what would be the count changes involve, and since this is really used only as a terminator, unnecessary work is one time walk at most, and if some retransmissions are necessary after that point later on, the walk is not full waste of time anyway. Since retransmit_high must be kept valid, all lost markers must ensure that. Now I also have learned how those "holes" in the rexmittable skbs can appear, mtu probe does them. So I removed the misleading comment as well. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2e2557388e36..d7637c4b2840 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -358,7 +358,7 @@ struct tcp_sock { */ int lost_cnt_hint; - int retransmit_cnt_hint; + u32 retransmit_high; /* L-bits may be on up to this seqno */ u32 lost_retrans_low; /* Sent seq after any rxmit (lowest) */ -- cgit v1.2.2 From 0e1c54c2a405494281e0639aacc90db03b50ae77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 20 Sep 2008 21:24:21 -0700 Subject: tcp: reorganize retransmit code loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both loops are quite similar, so they can be combined with little effort. As a result, forward_skb_hint becomes obsolete as well. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d7637c4b2840..767290628292 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -342,7 +342,6 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *scoreboard_skb_hint; struct sk_buff *retransmit_skb_hint; - struct sk_buff *forward_skb_hint; struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ -- cgit v1.2.2 From 67fed45930fa31e92c11beb3a3dbf83a1a92a58d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Sep 2008 22:36:24 -0700 Subject: net: Add new interfaces for SKB list light-weight init and splicing. This will be used by subsequent changesets. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 96 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index aa80ad9cbc88..027b06170b40 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -660,6 +660,22 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) return list_->qlen; } +/** + * __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head + * @list: queue to initialize + * + * This initializes only the list and queue length aspects of + * an sk_buff_head object. This allows to initialize the list + * aspects of an sk_buff_head without reinitializing things like + * the spinlock. It can also be used for on-stack sk_buff_head + * objects where the spinlock is known to not be used. + */ +static inline void __skb_queue_head_init(struct sk_buff_head *list) +{ + list->prev = list->next = (struct sk_buff *)list; + list->qlen = 0; +} + /* * This function creates a split out lock class for each invocation; * this is needed for now since a whole lot of users of the skb-queue @@ -671,8 +687,7 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) static inline void skb_queue_head_init(struct sk_buff_head *list) { spin_lock_init(&list->lock); - list->prev = list->next = (struct sk_buff *)list; - list->qlen = 0; + __skb_queue_head_init(list); } static inline void skb_queue_head_init_class(struct sk_buff_head *list, @@ -699,6 +714,83 @@ static inline void __skb_insert(struct sk_buff *newsk, list->qlen++; } +static inline void __skb_queue_splice(const struct sk_buff_head *list, + struct sk_buff *prev, + struct sk_buff *next) +{ + struct sk_buff *first = list->next; + struct sk_buff *last = list->prev; + + first->prev = prev; + prev->next = first; + + last->next = next; + next->prev = last; +} + +/** + * skb_queue_splice - join two skb lists, this is designed for stacks + * @list: the new list to add + * @head: the place to add it in the first list + */ +static inline void skb_queue_splice(const struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, (struct sk_buff *) head, head->next); + head->qlen = list->qlen; + } +} + +/** + * skb_queue_splice - join two skb lists and reinitialise the emptied list + * @list: the new list to add + * @head: the place to add it in the first list + * + * The list at @list is reinitialised + */ +static inline void skb_queue_splice_init(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, (struct sk_buff *) head, head->next); + head->qlen = list->qlen; + __skb_queue_head_init(list); + } +} + +/** + * skb_queue_splice_tail - join two skb lists, each list being a queue + * @list: the new list to add + * @head: the place to add it in the first list + */ +static inline void skb_queue_splice_tail(const struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, head->prev, (struct sk_buff *) head); + head->qlen = list->qlen; + } +} + +/** + * skb_queue_splice_tail - join two skb lists and reinitialise the emptied list + * @list: the new list to add + * @head: the place to add it in the first list + * + * Each of the lists is a queue. + * The list at @list is reinitialised + */ +static inline void skb_queue_splice_tail_init(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, head->prev, (struct sk_buff *) head); + head->qlen = list->qlen; + __skb_queue_head_init(list); + } +} + /** * __skb_queue_after - queue a buffer at the list head * @list: list to use -- cgit v1.2.2 From 6d80c39f9155e289fe8037a8b6352931ff916ceb Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 22 Sep 2008 07:29:31 +0100 Subject: GFS2: Add UUID to GFS2 sb This patch adds a UUID to the GFS2 sb structure. This field is not actually referenced from kernel space at all, but is added for completeness and due to the userland tools which get their on-disk structure information from the gfs2_ondisk.h header file. Since we have to be backwards compatible, we will assume that any GFS2 sb for which the UUID is all 0 does not have a UUID as such. We should then be (after some userland changes) able to support the -U mount option. This addresses Fedora bugzilla #242689 Signed-off-by: Steven Whitehouse --- include/linux/gfs2_ondisk.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index c3c19f926e6f..14d0df0b5749 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -118,7 +118,11 @@ struct gfs2_sb { char sb_lockproto[GFS2_LOCKNAME_LEN]; char sb_locktable[GFS2_LOCKNAME_LEN]; - /* In gfs1, quota and license dinodes followed */ + + struct gfs2_inum __pad3; /* Was quota inode in gfs1 */ + struct gfs2_inum __pad4; /* Was licence inode in gfs1 */ +#define GFS2_HAS_UUID 1 + __u8 sb_uuid[16]; /* The UUID, maybe 0 for backwards compat */ }; /* -- cgit v1.2.2 From 38783e671399b5405f1fd177d602c400a9577ae6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 01:15:02 -0700 Subject: isdn: isdn_ppp: Use SKB list facilities instead of home-grown implementation. Signed-off-by: David S. Miller --- include/linux/isdn_ppp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h index 8687a7dc0632..4c218ee7587a 100644 --- a/include/linux/isdn_ppp.h +++ b/include/linux/isdn_ppp.h @@ -157,7 +157,7 @@ typedef struct { typedef struct { int mp_mrru; /* unused */ - struct sk_buff * frags; /* fragments sl list -- use skb->next */ + struct sk_buff_head frags; /* fragments sl list */ long frames; /* number of frames in the frame list */ unsigned int seq; /* last processed packet seq #: any packets * with smaller seq # will be dropped -- cgit v1.2.2 From 15afe09bf496ae10c989e1a375a6b5da7bd3e16e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 20 Sep 2008 23:38:02 +0200 Subject: sched: wakeup preempt when small overlap Lin Ming reported a 10% OLTP regression against 2.6.27-rc4. The difference seems to come from different preemption agressiveness, which affects the cache footprint of the workload and its effective cache trashing. Aggresively preempt a task if its avg overlap is very small, this should avoid the task going to sleep and find it still running when we schedule back to it - saving a wakeup. Reported-by: Lin Ming Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b3b7a8f32477..d8e699b55858 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -897,7 +897,7 @@ struct sched_class { void (*yield_task) (struct rq *rq); int (*select_task_rq)(struct task_struct *p, int sync); - void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); + void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync); struct task_struct * (*pick_next_task) (struct rq *rq); void (*put_prev_task) (struct rq *rq, struct task_struct *p); -- cgit v1.2.2 From d26dbc5cf94b0a28acc947285c3b54814a73cb2e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 22 Sep 2008 22:35:07 +0900 Subject: iommu: export iommu_area_reserve helper function x86 has set_bit_string() that does the exact same thing that set_bit_area() in lib/iommu-helper.c does. This patch exports set_bit_area() in lib/iommu-helper.c as iommu_area_reserve(), converts GART, Calgary, and AMD IOMMU to use it. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/linux/iommu-helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index 58f41107e4ae..786539e432d7 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -11,6 +11,7 @@ static inline unsigned long iommu_device_max_index(unsigned long size, extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, unsigned long shift, unsigned long boundary_size); +extern void iommu_area_reserve(unsigned long *map, unsigned long i, int len); extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long shift, -- cgit v1.2.2 From 5c1824587f0797373c95719a196f6098f7c6d20c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 22 Sep 2008 19:48:19 -0700 Subject: ipsec: Fix xfrm_state_walk race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As discovered by Timo Teräs, the currently xfrm_state_walk scheme is racy because if a second dump finishes before the first, we may free xfrm states that the first dump would walk over later. This patch fixes this by storing the dumps in a list in order to calculate the correct completion counter which cures this problem. I've expanded netlink_cb in order to accomodate the extra state related to this. It shouldn't be a big deal since netlink_cb is kmalloced for each dump and we're just increasing it by 4 or 8 bytes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9ff1b54908f3..cbba7760545b 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -220,7 +220,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[6]; + long args[7]; }; struct netlink_notify -- cgit v1.2.2 From bce7b15426cac3000bf6a9cf59d9356ef0be2dec Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 19:51:15 -0700 Subject: Phonet: global definitions Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/if_phonet.h | 14 ++++++ include/linux/phonet.h | 125 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 4 ++ include/linux/socket.h | 4 +- 5 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 include/linux/if_phonet.h create mode 100644 include/linux/phonet.h (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 5028e0b6082b..723a1c5fbc6c 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -100,6 +100,7 @@ #define ETH_P_ECONET 0x0018 /* Acorn Econet */ #define ETH_P_HDLC 0x0019 /* HDLC frames */ #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ +#define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ /* * This is an Ethernet frame header. diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h new file mode 100644 index 000000000000..22df25fbc4e2 --- /dev/null +++ b/include/linux/if_phonet.h @@ -0,0 +1,14 @@ +/* + * File: if_phonet.h + * + * Phonet interface kernel definitions + * + * Copyright (C) 2008 Nokia Corporation. All rights reserved. + */ + +#define PHONET_HEADER_LEN 8 /* Phonet header length */ + +#define PHONET_MIN_MTU 6 +/* 6 bytes header + 65535 bytes payload */ +#define PHONET_MAX_MTU 65541 +#define PHONET_DEV_MTU PHONET_MAX_MTU diff --git a/include/linux/phonet.h b/include/linux/phonet.h new file mode 100644 index 000000000000..6a764f8584a4 --- /dev/null +++ b/include/linux/phonet.h @@ -0,0 +1,125 @@ +/** + * file phonet.h + * + * Phonet sockets kernel interface + * + * Copyright (C) 2008 Nokia Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef LINUX_PHONET_H +#define LINUX_PHONET_H + +/* Automatic protocol selection */ +#define PN_PROTO_TRANSPORT 0 +/* Phonet datagram socket */ +#define PN_PROTO_PHONET 1 +#define PHONET_NPROTO 2 + +#define PNADDR_ANY 0 +#define PNPORT_RESOURCE_ROUTING 0 + +/* Phonet protocol header */ +struct phonethdr { + __u8 pn_rdev; + __u8 pn_sdev; + __u8 pn_res; + __be16 pn_length; + __u8 pn_robj; + __u8 pn_sobj; +} __attribute__((packed)); + +/* Phonet socket address structure */ +struct sockaddr_pn { + sa_family_t spn_family; + __u8 spn_obj; + __u8 spn_dev; + __u8 spn_resource; + __u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3]; +} __attribute__ ((packed)); + +static inline __u16 pn_object(__u8 addr, __u16 port) +{ + return (addr << 8) | (port & 0x3ff); +} + +static inline __u8 pn_obj(__u16 handle) +{ + return handle & 0xff; +} + +static inline __u8 pn_dev(__u16 handle) +{ + return handle >> 8; +} + +static inline __u16 pn_port(__u16 handle) +{ + return handle & 0x3ff; +} + +static inline __u8 pn_addr(__u16 handle) +{ + return (handle >> 8) & 0xfc; +} + +static inline void pn_sockaddr_set_addr(struct sockaddr_pn *spn, __u8 addr) +{ + spn->spn_dev &= 0x03; + spn->spn_dev |= addr & 0xfc; +} + +static inline void pn_sockaddr_set_port(struct sockaddr_pn *spn, __u16 port) +{ + spn->spn_dev &= 0xfc; + spn->spn_dev |= (port >> 8) & 0x03; + spn->spn_obj = port & 0xff; +} + +static inline void pn_sockaddr_set_object(struct sockaddr_pn *spn, + __u16 handle) +{ + spn->spn_dev = pn_dev(handle); + spn->spn_obj = pn_obj(handle); +} + +static inline void pn_sockaddr_set_resource(struct sockaddr_pn *spn, + __u8 resource) +{ + spn->spn_resource = resource; +} + +static inline __u8 pn_sockaddr_get_addr(const struct sockaddr_pn *spn) +{ + return spn->spn_dev & 0xfc; +} + +static inline __u16 pn_sockaddr_get_port(const struct sockaddr_pn *spn) +{ + return ((spn->spn_dev & 0x03) << 8) | spn->spn_obj; +} + +static inline __u16 pn_sockaddr_get_object(const struct sockaddr_pn *spn) +{ + return pn_object(spn->spn_dev, spn->spn_obj); +} + +static inline __u8 pn_sockaddr_get_resource(const struct sockaddr_pn *spn) +{ + return spn->spn_resource; +} + +#endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ca643b13b026..2b3d51c6ec9c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -582,6 +582,10 @@ enum rtnetlink_groups { #define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE RTNLGRP_ND_USEROPT, #define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT + RTNLGRP_PHONET_IFADDR, +#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR + RTNLGRP_PHONET_ROUTE, +#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/include/linux/socket.h b/include/linux/socket.h index dc5086fe7736..818ca33bf79f 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -190,7 +190,8 @@ struct ucred { #define AF_IUCV 32 /* IUCV sockets */ #define AF_RXRPC 33 /* RxRPC sockets */ #define AF_ISDN 34 /* mISDN sockets */ -#define AF_MAX 35 /* For now.. */ +#define AF_PHONET 35 /* Phonet sockets */ +#define AF_MAX 36 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -227,6 +228,7 @@ struct ucred { #define PF_IUCV AF_IUCV #define PF_RXRPC AF_RXRPC #define PF_ISDN AF_ISDN +#define PF_PHONET AF_PHONET #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ -- cgit v1.2.2 From ba113a94b7503ee23ffe819e7045134b0c1d31de Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:05:19 -0700 Subject: Phonet: common socket glue This provides the socket API for the Phonet protocols family. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 6a764f8584a4..001c0e679099 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -32,6 +32,9 @@ #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 +/* ioctls */ +#define SIOCPNGETOBJECT (SIOCPROTOPRIVATE + 0) + /* Phonet protocol header */ struct phonethdr { __u8 pn_rdev; -- cgit v1.2.2 From 5f77076d75d35c9f5619e1f9d7e7428a627f65e6 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:08:04 -0700 Subject: Phonet: provide MAC header operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_phonet.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h index 22df25fbc4e2..7e989216ec17 100644 --- a/include/linux/if_phonet.h +++ b/include/linux/if_phonet.h @@ -12,3 +12,7 @@ /* 6 bytes header + 65535 bytes payload */ #define PHONET_MAX_MTU 65541 #define PHONET_DEV_MTU PHONET_MAX_MTU + +#ifdef __KERNEL__ +extern struct header_ops phonet_header_ops; +#endif -- cgit v1.2.2 From be0c52bfed7f7828494fa00060efd5d758e92580 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:09:13 -0700 Subject: Phonet: emit errors when a packet cannot be delivered locally When there is no listener socket for a received packet, send an error back to the sender. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 001c0e679099..3a027f588a4a 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -45,6 +45,38 @@ struct phonethdr { __u8 pn_sobj; } __attribute__((packed)); +/* Common Phonet payload header */ +struct phonetmsg { + __u8 pn_trans_id; /* transaction ID */ + __u8 pn_msg_id; /* message type */ + union { + struct { + __u8 pn_submsg_id; /* message subtype */ + __u8 pn_data[5]; + } base; + struct { + __u16 pn_e_res_id; /* extended resource ID */ + __u8 pn_e_submsg_id; /* message subtype */ + __u8 pn_e_data[3]; + } ext; + } pn_msg_u; +}; +#define PN_COMMON_MESSAGE 0xF0 +#define PN_PREFIX 0xE0 /* resource for extended messages */ +#define pn_submsg_id pn_msg_u.base.pn_submsg_id +#define pn_e_submsg_id pn_msg_u.ext.pn_e_submsg_id +#define pn_e_res_id pn_msg_u.ext.pn_e_res_id +#define pn_data pn_msg_u.base.pn_data +#define pn_e_data pn_msg_u.ext.pn_e_data + +/* data for unreachable errors */ +#define PN_COMM_SERVICE_NOT_IDENTIFIED_RESP 0x01 +#define PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP 0x14 +#define pn_orig_msg_id pn_data[0] +#define pn_status pn_data[1] +#define pn_e_orig_msg_id pn_e_data[0] +#define pn_e_status pn_e_data[1] + /* Phonet socket address structure */ struct sockaddr_pn { sa_family_t spn_family; -- cgit v1.2.2 From 0b815a1a6d43ab498674b8430c8c35ab08487a16 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 22 Sep 2008 21:28:11 -0700 Subject: net: network device name ifalias support This patch add support for keeping an additional character alias associated with an network interface. This is useful for maintaining the SNMP ifAlias value which is a user defined value. Routers use this to hold information like which circuit or line it is connected to. It is just an arbitrary text label on the network device. There are two exposed interfaces with this patch, the value can be read/written either via netlink or sysfs. This could be maintained just by the snmp daemon, but it is more generally useful for other management tools, and the kernel is good place to act as an agreed upon interface to store it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/if.h | 1 + include/linux/if_link.h | 1 + include/linux/netdevice.h | 3 +++ 3 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if.h b/include/linux/if.h index 5c9d1fa93fef..65246846c844 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -24,6 +24,7 @@ #include /* for "__user" et al */ #define IFNAMSIZ 16 +#define IFALIASZ 256 #include /* Standard interface flags (netdevice->flags). */ diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 84c3492ae5cb..f9032c88716a 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -79,6 +79,7 @@ enum IFLA_LINKINFO, #define IFLA_LINKINFO IFLA_LINKINFO IFLA_NET_NS_PID, + IFLA_IFALIAS, __IFLA_MAX }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 488c56e649b5..d675df08b946 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -471,6 +471,8 @@ struct net_device char name[IFNAMSIZ]; /* device name hash chain */ struct hlist_node name_hlist; + /* snmp alias */ + char *ifalias; /* * I/O specific fields @@ -1224,6 +1226,7 @@ extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); extern int dev_change_name(struct net_device *, char *); +extern int dev_set_alias(struct net_device *, const char *, size_t); extern int dev_change_net_namespace(struct net_device *, struct net *, const char *); extern int dev_set_mtu(struct net_device *, int); -- cgit v1.2.2 From 1d4a31dde95af56edac4dee268249a29a21fa7c0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 21:57:21 -0700 Subject: net: Fix bus in SKB queue splicing interfaces. Handle the case of head being non-empty, by adding list->qlen to head->qlen instead of using direct assignment. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 027b06170b40..4a144e8d0538 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -738,7 +738,7 @@ static inline void skb_queue_splice(const struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, (struct sk_buff *) head, head->next); - head->qlen = list->qlen; + head->qlen += list->qlen; } } @@ -754,7 +754,7 @@ static inline void skb_queue_splice_init(struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, (struct sk_buff *) head, head->next); - head->qlen = list->qlen; + head->qlen += list->qlen; __skb_queue_head_init(list); } } @@ -769,7 +769,7 @@ static inline void skb_queue_splice_tail(const struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, head->prev, (struct sk_buff *) head); - head->qlen = list->qlen; + head->qlen += list->qlen; } } @@ -786,7 +786,7 @@ static inline void skb_queue_splice_tail_init(struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, head->prev, (struct sk_buff *) head); - head->qlen = list->qlen; + head->qlen += list->qlen; __skb_queue_head_init(list); } } -- cgit v1.2.2 From fc7ebb212d3e51d1188948d975aa93dbb0f58b25 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:34:07 -0700 Subject: net: Add skb_queue_is_last(). Several bits of code want to know "is this the last SKB in a queue", and all of them implement this by hand. Provide an common interface to make this check. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4a144e8d0538..3a5838da160e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -472,6 +472,19 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) return list->next == (struct sk_buff *)list; } +/** + * skb_queue_is_last - check if skb is the last entry in the queue + * @list: queue head + * @skb: buffer + * + * Returns true if @skb is the last buffer on the list. + */ +static inline bool skb_queue_is_last(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + return (skb->next == (struct sk_buff *) list); +} + /** * skb_get - reference buffer * @skb: buffer to reference -- cgit v1.2.2 From 249c8b42c7e5e6f33d0ff983041f08278b137e53 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:44:42 -0700 Subject: net: Add skb_queue_next(). A lot of code wants to iterate over an SKB queue at the top level using it's own control structure and iterator scheme. Provide skb_queue_next(), which is only valid to invoke if skb_queue_is_last() returns false. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a5838da160e..d2f1778877d7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -485,6 +485,24 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, return (skb->next == (struct sk_buff *) list); } +/** + * skb_queue_next - return the next packet in the queue + * @list: queue head + * @skb: current buffer + * + * Return the next packet in @list after @skb. It is only valid to + * call this if skb_queue_is_last() evaluates to false. + */ +static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + /* This BUG_ON may seem severe, but if we just return then we + * are going to dereference garbage. + */ + BUG_ON(skb_queue_is_last(list, skb)); + return skb->next; +} + /** * skb_get - reference buffer * @skb: buffer to reference -- cgit v1.2.2 From 1164f52a244204830c7625b3c22812781996d7b4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:49:44 -0700 Subject: net: Add skb_queue_walk_from() and skb_queue_walk_from_safe(). These will be used by TCP write queue handling and elsewhere. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d2f1778877d7..a19ea43fea02 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1571,6 +1571,15 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb != (struct sk_buff *)(queue); \ skb = tmp, tmp = skb->next) +#define skb_queue_walk_from(queue, skb) \ + for (; prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ + skb = skb->next) + +#define skb_queue_walk_from_safe(queue, skb, tmp) \ + for (tmp = skb->next; \ + skb != (struct sk_buff *)(queue); \ + skb = tmp, tmp = skb->next) + #define skb_queue_reverse_walk(queue, skb) \ for (skb = (queue)->prev; \ prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ -- cgit v1.2.2 From c32a162fd420fe8dfb049db941b2438061047fcc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 22 Sep 2008 13:57:43 -0700 Subject: smb.h: do not include linux/time.h in userspace linux/time.h conflicts with time.h from glibc It breaks building smbmount from samba. It's regression introduced by commit 76308da (" smb.h: uses struct timespec but didn't include linux/time.h"). Signed-off-by: Kirill A. Shutemov Cc: [2.6.26.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smb.h b/include/linux/smb.h index caa43b2370cb..82fefddc5987 100644 --- a/include/linux/smb.h +++ b/include/linux/smb.h @@ -11,7 +11,9 @@ #include #include +#ifdef __KERNEL__ #include +#endif enum smb_protocol { SMB_PROTOCOL_NONE, -- cgit v1.2.2 From faa312da9cd0b044bdc84483162c6ee10b9c83c0 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Fri, 29 Aug 2008 04:18:43 +0800 Subject: lcd: allow lcd device to handle mode change events Some LCD panels are capable of different resolutions, and is allowed to change at run-time, so to make "struct lcd_device" to be able to handle mode change events here. Signed-off-by: Eric Miao Acked-by: Krzysztof Helt Signed-off-by: Russell King --- include/linux/lcd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 173febac6656..c67fecafff90 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -11,6 +11,7 @@ #include #include #include +#include /* Notes on locking: * @@ -45,6 +46,8 @@ struct lcd_ops { int (*get_contrast)(struct lcd_device *); /* Set LCD panel contrast */ int (*set_contrast)(struct lcd_device *, int contrast); + /* Set LCD panel mode (resolutions ...) */ + int (*set_mode)(struct lcd_device *, struct fb_videomode *); /* Check if given framebuffer device is the one LCD is bound to; return 0 if not, !=0 if it is. If NULL, lcd always matches the fb. */ int (*check_fb)(struct lcd_device *, struct fb_info *); -- cgit v1.2.2 From b18250a8f66050bd2a52287cd543fb93100e8ee0 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Fri, 29 Aug 2008 04:21:44 +0800 Subject: lcd: add SPI-based LCD and backlight driver for SHARP corgi/spitz The driver is based on different source files including corgi_ssp.c, corgi_lcd.c and corgi_bl.c, previously authored by Richard Purdie and many others. The LCD and Backlight device actually share the same SPI device, so they are made into this single driver. Signed-off-by: Eric Miao Cc: Richard Purdie Signed-off-by: Russell King --- include/linux/spi/corgi_lcd.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/spi/corgi_lcd.h (limited to 'include/linux') diff --git a/include/linux/spi/corgi_lcd.h b/include/linux/spi/corgi_lcd.h new file mode 100644 index 000000000000..3c53ac26c8d1 --- /dev/null +++ b/include/linux/spi/corgi_lcd.h @@ -0,0 +1,16 @@ +#ifndef __LINUX_SPI_CORGI_LCD_H +#define __LINUX_SPI_CORGI_LCD_H + +#define CORGI_LCD_MODE_QVGA 1 +#define CORGI_LCD_MODE_VGA 2 + +struct corgi_lcd_platform_data { + int init_mode; + int max_intensity; + int default_intensity; + + void (*notify)(int intensity); + void (*kick_battery)(void); +}; + +#endif /* __LINUX_SPI_CORGI_LCD_H */ -- cgit v1.2.2 From bfdcaa3b6899bbfc6ba633aff3f5f2422486c8c1 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Fri, 29 Aug 2008 05:57:20 +0800 Subject: lcd: add corgibl_limit_intensity() to corgi_lcd This is not generic enough, added here for backward compatibility. And make this an individual commit so future revert will be a bit easier. Signed-off-by: Eric Miao Cc: Richard Purdie Signed-off-by: Russell King --- include/linux/spi/corgi_lcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/corgi_lcd.h b/include/linux/spi/corgi_lcd.h index 3c53ac26c8d1..b6161aae2752 100644 --- a/include/linux/spi/corgi_lcd.h +++ b/include/linux/spi/corgi_lcd.h @@ -8,6 +8,7 @@ struct corgi_lcd_platform_data { int init_mode; int max_intensity; int default_intensity; + int limit_mask; void (*notify)(int intensity); void (*kick_battery)(void); -- cgit v1.2.2 From 79617deeebb9cf089e2bc2aad19743b1209043f6 Mon Sep 17 00:00:00 2001 From: YanBo Date: Mon, 22 Sep 2008 13:30:32 +0800 Subject: mac80211: mesh portal functionality support Currently the mesh code doesn't support bridging mesh point interfaces with wired ethernet or AP to construct an MPP or MAP. This patch adds code to support the "6 address frame format packet" functionality to mesh point interfaces. Now the mesh network can be used as backhaul for end to end communication. Signed-off-by: Li YanBo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index abc1abc63bf0..14126bc36641 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -471,6 +471,11 @@ struct ieee80211s_hdr { u8 eaddr3[6]; } __attribute__ ((packed)); +/* Mesh flags */ +#define MESH_FLAGS_AE_A4 0x1 +#define MESH_FLAGS_AE_A5_A6 0x2 +#define MESH_FLAGS_PS_DEEP 0x4 + /** * struct ieee80211_quiet_ie * -- cgit v1.2.2 From 040dec3b37e4b9ec15b359bf5744f1ceba39fe3e Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Fri, 12 Sep 2008 06:55:14 -0700 Subject: netxen: add pci ids Define old and new pci vendor and device ids. Signed-off-by: Dhananjay Phadke Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6f4276d461c0..a65b082a888a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2247,6 +2247,16 @@ #define PCI_DEVICE_ID_3DLABS_PERMEDIA2 0x0007 #define PCI_DEVICE_ID_3DLABS_PERMEDIA2V 0x0009 +#define PCI_VENDOR_ID_NETXEN 0x4040 +#define PCI_DEVICE_ID_NX2031_10GXSR 0x0001 +#define PCI_DEVICE_ID_NX2031_10GCX4 0x0002 +#define PCI_DEVICE_ID_NX2031_4GCU 0x0003 +#define PCI_DEVICE_ID_NX2031_IMEZ 0x0004 +#define PCI_DEVICE_ID_NX2031_HMEZ 0x0005 +#define PCI_DEVICE_ID_NX2031_XG_MGMT 0x0024 +#define PCI_DEVICE_ID_NX2031_XG_MGMT2 0x0025 +#define PCI_DEVICE_ID_NX3031 0x0100 + #define PCI_VENDOR_ID_AKS 0x416c #define PCI_DEVICE_ID_AKS_ALADDINCARD 0x0100 -- cgit v1.2.2 From b4f151ff899362fec952c45d166252c9912c041f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 24 Sep 2008 17:48:26 +0100 Subject: MN10300: Move asm-arm/cnt32_to_63.h to include/linux/ Move asm-arm/cnt32_to_63.h to include/linux/ so that MN10300 can make use of it too. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/cnt32_to_63.h | 80 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 include/linux/cnt32_to_63.h (limited to 'include/linux') diff --git a/include/linux/cnt32_to_63.h b/include/linux/cnt32_to_63.h new file mode 100644 index 000000000000..8c0f9505b48c --- /dev/null +++ b/include/linux/cnt32_to_63.h @@ -0,0 +1,80 @@ +/* + * Extend a 32-bit counter to 63 bits + * + * Author: Nicolas Pitre + * Created: December 3, 2006 + * Copyright: MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#ifndef __LINUX_CNT32_TO_63_H__ +#define __LINUX_CNT32_TO_63_H__ + +#include +#include +#include + +/* this is used only to give gcc a clue about good code generation */ +union cnt32_to_63 { + struct { +#if defined(__LITTLE_ENDIAN) + u32 lo, hi; +#elif defined(__BIG_ENDIAN) + u32 hi, lo; +#endif + }; + u64 val; +}; + + +/** + * cnt32_to_63 - Expand a 32-bit counter to a 63-bit counter + * @cnt_lo: The low part of the counter + * + * Many hardware clock counters are only 32 bits wide and therefore have + * a relatively short period making wrap-arounds rather frequent. This + * is a problem when implementing sched_clock() for example, where a 64-bit + * non-wrapping monotonic value is expected to be returned. + * + * To overcome that limitation, let's extend a 32-bit counter to 63 bits + * in a completely lock free fashion. Bits 0 to 31 of the clock are provided + * by the hardware while bits 32 to 62 are stored in memory. The top bit in + * memory is used to synchronize with the hardware clock half-period. When + * the top bit of both counters (hardware and in memory) differ then the + * memory is updated with a new value, incrementing it when the hardware + * counter wraps around. + * + * Because a word store in memory is atomic then the incremented value will + * always be in synch with the top bit indicating to any potential concurrent + * reader if the value in memory is up to date or not with regards to the + * needed increment. And any race in updating the value in memory is harmless + * as the same value would simply be stored more than once. + * + * The only restriction for the algorithm to work properly is that this + * code must be executed at least once per each half period of the 32-bit + * counter to properly update the state bit in memory. This is usually not a + * problem in practice, but if it is then a kernel timer could be scheduled + * to manage for this code to be executed often enough. + * + * Note that the top bit (bit 63) in the returned value should be considered + * as garbage. It is not cleared here because callers are likely to use a + * multiplier on the returned value which can get rid of the top bit + * implicitly by making the multiplier even, therefore saving on a runtime + * clear-bit instruction. Otherwise caller must remember to clear the top + * bit explicitly. + */ +#define cnt32_to_63(cnt_lo) \ +({ \ + static volatile u32 __m_cnt_hi; \ + union cnt32_to_63 __x; \ + __x.hi = __m_cnt_hi; \ + __x.lo = (cnt_lo); \ + if (unlikely((s32)(__x.hi ^ __x.lo) < 0)) \ + __m_cnt_hi = __x.hi = (__x.hi ^ 0x80000000) + (__x.hi >> 31); \ + __x.val; \ +}) + +#endif -- cgit v1.2.2 From ff7a4c7130c0ad97d55f7ab3f0a35fbc1f41b376 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Sun, 7 Sep 2008 11:30:06 +0800 Subject: [ARM] corgi_lcd: use GPIO API for BACKLIGHT_ON and BACKLIGHT_CONT Signed-off-by: Eric Miao Signed-off-by: Russell King --- include/linux/spi/corgi_lcd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/corgi_lcd.h b/include/linux/spi/corgi_lcd.h index b6161aae2752..6692b3418ccf 100644 --- a/include/linux/spi/corgi_lcd.h +++ b/include/linux/spi/corgi_lcd.h @@ -10,6 +10,9 @@ struct corgi_lcd_platform_data { int default_intensity; int limit_mask; + int gpio_backlight_on; /* -1 if n/a */ + int gpio_backlight_cont; /* -1 if n/a */ + void (*notify)(int intensity); void (*kick_battery)(void); }; -- cgit v1.2.2 From 82ef04fb4c82542b3eda81cca461f0594ce9cd0b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 31 Jul 2008 17:02:40 +0900 Subject: libata: make SCR access ops per-link Logically, SCR access ops should take @link; however, there was no compelling reason to convert all SCR access ops when adding @link abstraction as there's one-to-one mapping between a port and a non-PMP link. However, that assumption won't hold anymore with the scheduled addition of slave link. Make SCR access ops per-link. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 225bfc5bd9ec..ffd622fa319c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -772,8 +772,8 @@ struct ata_port_operations { /* * Optional features */ - int (*scr_read)(struct ata_port *ap, unsigned int sc_reg, u32 *val); - int (*scr_write)(struct ata_port *ap, unsigned int sc_reg, u32 val); + int (*scr_read)(struct ata_link *link, unsigned int sc_reg, u32 *val); + int (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val); void (*pmp_attach)(struct ata_port *ap); void (*pmp_detach)(struct ata_port *ap); int (*enable_pm)(struct ata_port *ap, enum link_pm policy); -- cgit v1.2.2 From aadffb682cc5572f48cc24883681db65530bd284 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 31 Jul 2008 17:02:41 +0900 Subject: libata: reimplement link iterator Implement __ata_port_next_link() and reimplement __ata_port_for_each_link() and ata_port_for_each_link() using it. This removes relatively large inlined code and makes iteration easier to extend. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index ffd622fa319c..3eaca347ce29 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1265,34 +1265,17 @@ static inline int ata_link_active(struct ata_link *link) return ata_tag_valid(link->active_tag) || link->sactive; } -static inline struct ata_link *ata_port_first_link(struct ata_port *ap) -{ - if (sata_pmp_attached(ap)) - return ap->pmp_link; - return &ap->link; -} - -static inline struct ata_link *ata_port_next_link(struct ata_link *link) -{ - struct ata_port *ap = link->ap; - - if (ata_is_host_link(link)) { - if (!sata_pmp_attached(ap)) - return NULL; - return ap->pmp_link; - } - - if (++link < ap->nr_pmp_links + ap->pmp_link) - return link; - return NULL; -} +extern struct ata_link *__ata_port_next_link(struct ata_port *ap, + struct ata_link *link, + bool dev_only); -#define __ata_port_for_each_link(lk, ap) \ - for ((lk) = &(ap)->link; (lk); (lk) = ata_port_next_link(lk)) +#define __ata_port_for_each_link(link, ap) \ + for ((link) = __ata_port_next_link((ap), NULL, false); (link); \ + (link) = __ata_port_next_link((ap), (link), false)) #define ata_port_for_each_link(link, ap) \ - for ((link) = ata_port_first_link(ap); (link); \ - (link) = ata_port_next_link(link)) + for ((link) = __ata_port_next_link((ap), NULL, true); (link); \ + (link) = __ata_port_next_link((ap), (link), true)) #define ata_link_for_each_dev(dev, link) \ for ((dev) = (link)->device; \ -- cgit v1.2.2 From b5b3fa386b8f96c7fa92e507e5deddc2637924b4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 31 Jul 2008 17:02:42 +0900 Subject: libata: misc updates to prepare for slave link * Add ATA_EH_ALL_ACTIONS. * Make sata_link_{on|off}_line() return bool instead of int. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 3eaca347ce29..0c7e6f3c28eb 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -321,6 +321,8 @@ enum { ATA_EH_LPM = (1 << 4), /* link power management action */ ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE, + ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | + ATA_EH_ENABLE_LINK | ATA_EH_LPM, /* ata_eh_info->flags */ ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ @@ -920,8 +922,8 @@ extern int sata_scr_valid(struct ata_link *link); extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); -extern int ata_link_online(struct ata_link *link); -extern int ata_link_offline(struct ata_link *link); +extern bool ata_link_online(struct ata_link *link); +extern bool ata_link_offline(struct ata_link *link); #ifdef CONFIG_PM extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); -- cgit v1.2.2 From b1c72916abbdd0a55015c87358536ca0ebaf6735 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 31 Jul 2008 17:02:43 +0900 Subject: libata: implement slave_link Explanation taken from the comment of ata_slave_link_init(). In libata, a port contains links and a link contains devices. There is single host link but if a PMP is attached to it, there can be multiple fan-out links. On SATA, there's usually a single device connected to a link but PATA and SATA controllers emulating TF based interface can have two - master and slave. However, there are a few controllers which don't fit into this abstraction too well - SATA controllers which emulate TF interface with both master and slave devices but also have separate SCR register sets for each device. These controllers need separate links for physical link handling (e.g. onlineness, link speed) but should be treated like a traditional M/S controller for everything else (e.g. command issue, softreset). slave_link is libata's way of handling this class of controllers without impacting core layer too much. For anything other than physical link handling, the default host link is used for both master and slave. For physical link handling, separate @ap->slave_link is used. All dirty details are implemented inside libata core layer. From LLD's POV, the only difference is that prereset, hardreset and postreset are called once more for the slave link, so the reset sequence looks like the following. prereset(M) -> prereset(S) -> hardreset(M) -> hardreset(S) -> softreset(M) -> postreset(M) -> postreset(S) Note that softreset is called only for the master. Softreset resets both M/S by definition, so SRST on master should handle both (the standard method will work just fine). As slave_link excludes PMP support and only code paths which deal with the attributes of physical link are affected, all the changes are localized to libata.h, libata-core.c and libata-eh.c. * ata_is_host_link() updated so that slave_link is considered as host link too. * iterator extended to iterate over the slave_link when using the underbarred version. * force param handling updated such that devno 16 is mapped to the slave link/device. * ata_link_on/offline() updated to return the combined result from master and slave link. ata_phys_link_on/offline() are the direct versions. * EH autopsy and report are performed separately for master slave links. Reset is udpated to implement the above described reset sequence. Except for reset update, most changes are minor, many of them just modifying dev->link to ata_dev_phys_link(dev) or using phys online test instead. After this update, LLDs can take full advantage of per-dev SCR registers by simply turning on slave link. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 0c7e6f3c28eb..244ff601559a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -690,7 +690,8 @@ struct ata_port { unsigned int qc_active; int nr_active_links; /* #links with active qcs */ - struct ata_link link; /* host default link */ + struct ata_link link; /* host default link */ + struct ata_link *slave_link; /* see ata_slave_link_init() */ int nr_pmp_links; /* nr of available PMP links */ struct ata_link *pmp_link; /* array of PMP links */ @@ -897,6 +898,7 @@ extern void ata_port_disable(struct ata_port *); extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports); extern struct ata_host *ata_host_alloc_pinfo(struct device *dev, const struct ata_port_info * const * ppi, int n_ports); +extern int ata_slave_link_init(struct ata_port *ap); extern int ata_host_start(struct ata_host *host); extern int ata_host_register(struct ata_host *host, struct scsi_host_template *sht); @@ -1136,7 +1138,7 @@ static inline bool sata_pmp_attached(struct ata_port *ap) static inline int ata_is_host_link(const struct ata_link *link) { - return link == &link->ap->link; + return link == &link->ap->link || link == link->ap->slave_link; } #else /* CONFIG_SATA_PMP */ static inline bool sata_pmp_supported(struct ata_port *ap) @@ -1169,7 +1171,7 @@ static inline int sata_srst_pmp(struct ata_link *link) printk("%sata%u: "fmt, lv, (ap)->print_id , ##args) #define ata_link_printk(link, lv, fmt, args...) do { \ - if (sata_pmp_attached((link)->ap)) \ + if (sata_pmp_attached((link)->ap) || (link)->ap->slave_link) \ printk("%sata%u.%02u: "fmt, lv, (link)->ap->print_id, \ (link)->pmp , ##args); \ else \ -- cgit v1.2.2 From ea6ce53cd5d005455ec0a3cc1d45d3af0cb90919 Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Fri, 19 Sep 2008 23:46:01 +0200 Subject: [libata] Introduce ata_id_has_unload() Add a function to check an ATA device's id for head unload support as specified in ATA-7. Signed-off-by: Elias Oltmanns Signed-off-by: Jeff Garzik --- include/linux/ata.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 8a12d718c169..a26ebd25bac1 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -667,6 +667,15 @@ static inline int ata_id_has_dword_io(const u16 *id) return 0; } +static inline int ata_id_has_unload(const u16 *id) +{ + if (ata_id_major_version(id) >= 7 && + (id[ATA_ID_CFSSE] & 0xC000) == 0x4000 && + id[ATA_ID_CFSSE] & (1 << 13)) + return 1; + return 0; +} + static inline int ata_id_current_chs_valid(const u16 *id) { /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command -- cgit v1.2.2 From 45fabbb77bd95adff7a80bde1c7a0ace1075fde6 Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Sun, 21 Sep 2008 11:54:08 +0200 Subject: libata: Implement disk shock protection support On user request (through sysfs), the IDLE IMMEDIATE command with UNLOAD FEATURE as specified in ATA-7 is issued to the device and processing of the request queue is stopped thereafter until the specified timeout expires or user space asks to resume normal operation. This is supposed to prevent the heads of a hard drive from accidentally crashing onto the platter when a heavy shock is anticipated (like a falling laptop expected to hit the floor). In fact, the whole port stops processing commands until the timeout has expired in order to avoid any resets due to failed commands on another device. Signed-off-by: Elias Oltmanns Signed-off-by: Jeff Garzik --- include/linux/libata.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 244ff601559a..1f44cfb847e1 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -146,6 +146,7 @@ enum { ATA_DFLAG_SPUNDOWN = (1 << 14), /* XXX: for spindown_compat */ ATA_DFLAG_SLEEPING = (1 << 15), /* device is sleeping */ ATA_DFLAG_DUBIOUS_XFER = (1 << 16), /* data transfer not verified */ + ATA_DFLAG_NO_UNLOAD = (1 << 17), /* device doesn't support unload */ ATA_DFLAG_INIT_MASK = (1 << 24) - 1, ATA_DFLAG_DETACH = (1 << 24), @@ -244,6 +245,7 @@ enum { ATA_TMOUT_BOOT = 30000, /* heuristic */ ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ ATA_TMOUT_INTERNAL_QUICK = 5000, + ATA_TMOUT_MAX_PARK = 30000, /* FIXME: GoVault needs 2s but we can't afford that without * parallel probing. 800ms is enough for iVDR disk @@ -319,8 +321,9 @@ enum { ATA_EH_RESET = ATA_EH_SOFTRESET | ATA_EH_HARDRESET, ATA_EH_ENABLE_LINK = (1 << 3), ATA_EH_LPM = (1 << 4), /* link power management action */ + ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ - ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE, + ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | ATA_EH_ENABLE_LINK | ATA_EH_LPM, @@ -454,6 +457,7 @@ enum link_pm { MEDIUM_POWER, }; extern struct device_attribute dev_attr_link_power_management_policy; +extern struct device_attribute dev_attr_unload_heads; extern struct device_attribute dev_attr_em_message_type; extern struct device_attribute dev_attr_em_message; extern struct device_attribute dev_attr_sw_activity; @@ -566,6 +570,7 @@ struct ata_device { /* n_sector is used as CLEAR_OFFSET, read comment above CLEAR_OFFSET */ u64 n_sectors; /* size of device, if ATA */ unsigned int class; /* ATA_DEV_xxx */ + unsigned long unpark_deadline; u8 pio_mode; u8 dma_mode; @@ -623,6 +628,7 @@ struct ata_eh_context { [ATA_EH_CMD_TIMEOUT_TABLE_SIZE]; unsigned int classes[ATA_MAX_DEVICES]; unsigned int did_probe_mask; + unsigned int unloaded_mask; unsigned int saved_ncq_enabled; u8 saved_xfer_mode[ATA_MAX_DEVICES]; /* timestamp for the last reset attempt or success */ @@ -712,6 +718,7 @@ struct ata_port { struct list_head eh_done_q; wait_queue_head_t eh_wait_q; int eh_tries; + struct completion park_req_pending; pm_message_t pm_mesg; int *pm_result; @@ -1102,6 +1109,7 @@ extern void ata_std_error_handler(struct ata_port *ap); */ extern const struct ata_port_operations ata_base_port_ops; extern const struct ata_port_operations sata_port_ops; +extern struct device_attribute *ata_common_sdev_attrs[]; #define ATA_BASE_SHT(drv_name) \ .module = THIS_MODULE, \ @@ -1116,7 +1124,8 @@ extern const struct ata_port_operations sata_port_ops; .proc_name = drv_name, \ .slave_configure = ata_scsi_slave_config, \ .slave_destroy = ata_scsi_slave_destroy, \ - .bios_param = ata_std_bios_param + .bios_param = ata_std_bios_param, \ + .sdev_attrs = ata_common_sdev_attrs #define ATA_NCQ_SHT(drv_name) \ ATA_BASE_SHT(drv_name), \ -- cgit v1.2.2 From 6866e7bc83f13a1bc6de59099930e9db1ab0042f Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 22 Sep 2008 14:47:13 -0700 Subject: libata: reorder ata_device to remove 8 bytes of padding on 64 bits reduce size by 8 bytes from 1160 to 1152 allowing it to fit in 1 fewer cachelines. Signed-off-by: Richard Kennedy Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 1f44cfb847e1..947cf84e555d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -560,8 +560,8 @@ struct ata_ering { struct ata_device { struct ata_link *link; unsigned int devno; /* 0 or 1 */ - unsigned long flags; /* ATA_DFLAG_xxx */ unsigned int horkage; /* List of broken features */ + unsigned long flags; /* ATA_DFLAG_xxx */ struct scsi_device *sdev; /* attached SCSI device */ #ifdef CONFIG_ATA_ACPI acpi_handle acpi_handle; -- cgit v1.2.2 From b00c1a99e7758f794923c61e5cd55268d61c9469 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 29 Sep 2008 15:44:46 +0200 Subject: hrtimer: mark migration state Impact: during migration active hrtimers can be seen as inactive The migration code removes the hrtimers from the queues of the dead CPU and sets the state temporary to INACTIVE. The enqueue code sets it to ACTIVE/PENDING again. Prevent that the wrong state can be seen by using a separate migration state bit. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6d93dce61cbb..bdd88df1b4e5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -67,9 +67,10 @@ enum hrtimer_cb_mode { * 0x02 callback function running * 0x04 callback pending (high resolution mode) * - * Special case: + * Special cases: * 0x03 callback function running and enqueued * (was requeued on another CPU) + * 0x09 timer was migrated on CPU hotunplug * The "callback function running and enqueued" status is only possible on * SMP. It happens for example when a posix timer expired and the callback * queued a signal. Between dropping the lock which protects the posix timer @@ -87,6 +88,7 @@ enum hrtimer_cb_mode { #define HRTIMER_STATE_ENQUEUED 0x01 #define HRTIMER_STATE_CALLBACK 0x02 #define HRTIMER_STATE_PENDING 0x04 +#define HRTIMER_STATE_MIGRATE 0x08 /** * struct hrtimer - the basic hrtimer structure -- cgit v1.2.2 From ccc7dadf736639da86f3e0c86832c11a66fc8221 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 29 Sep 2008 15:47:42 +0200 Subject: hrtimer: prevent migration of per CPU hrtimers Impact: per CPU hrtimers can be migrated from a dead CPU The hrtimer code has no knowledge about per CPU timers, but we need to prevent the migration of such timers and warn when such a timer is active at migration time. Explicitely mark the timers as per CPU and use a more understandable mode descriptor for the interrupts safe unlocked callback mode, which is used by hrtimer_sleeper and the scheduler code. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bdd88df1b4e5..2f245fe63bda 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -47,14 +47,22 @@ enum hrtimer_restart { * HRTIMER_CB_IRQSAFE: Callback may run in hardirq context * HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and * does not restart the timer - * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: Callback must run in hardirq context - * Special mode for tick emultation + * HRTIMER_CB_IRQSAFE_PERCPU: Callback must run in hardirq context + * Special mode for tick emulation and + * scheduler timer. Such timers are per + * cpu and not allowed to be migrated on + * cpu unplug. + * HRTIMER_CB_IRQSAFE_UNLOCKED: Callback should run in hardirq context + * with timer->base lock unlocked + * used for timers which call wakeup to + * avoid lock order problems with rq->lock */ enum hrtimer_cb_mode { HRTIMER_CB_SOFTIRQ, HRTIMER_CB_IRQSAFE, HRTIMER_CB_IRQSAFE_NO_RESTART, - HRTIMER_CB_IRQSAFE_NO_SOFTIRQ, + HRTIMER_CB_IRQSAFE_PERCPU, + HRTIMER_CB_IRQSAFE_UNLOCKED, }; /* -- cgit v1.2.2 From cf04a4c764cd3e651a64b3e667bb6a673ead99e1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 30 Sep 2008 02:22:14 -0700 Subject: netdev: use const for some name functions dev_change_name and netdev_drivername should use const char on parameters that are read-only input values. The strcpy to newname is not needed since newname is not used later in function. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d675df08b946..9cfd20be8b7f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1225,7 +1225,7 @@ extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); -extern int dev_change_name(struct net_device *, char *); +extern int dev_change_name(struct net_device *, const char *); extern int dev_set_alias(struct net_device *, const char *, size_t); extern int dev_change_net_namespace(struct net_device *, struct net *, const char *); @@ -1670,7 +1670,7 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern int netdev_class_create_file(struct class_attribute *class_attr); extern void netdev_class_remove_file(struct class_attribute *class_attr); -extern char *netdev_drivername(struct net_device *dev, char *buffer, int len); +extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len); extern void linkwatch_run_queue(void); -- cgit v1.2.2 From a57334e95e4fb132acca05bdc0efa2f9dda194af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 30 Sep 2008 02:53:18 -0700 Subject: Phonet: declare headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/Kbuild | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index b68ec09399be..f431e40725d6 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -126,6 +126,7 @@ header-y += pci_regs.h header-y += pfkeyv2.h header-y += pg.h header-y += phantom.h +header-y += phonet.h header-y += pkt_cls.h header-y += pkt_sched.h header-y += posix_types.h @@ -232,6 +233,7 @@ unifdef-y += if_fddi.h unifdef-y += if_frad.h unifdef-y += if_ltalk.h unifdef-y += if_link.h +unifdef-y += if_phonet.h unifdef-y += if_pppol2tp.h unifdef-y += if_pppox.h unifdef-y += if_tr.h -- cgit v1.2.2 From 1c50b728c3e734150b8a4a8310ce3e01bc5c70be Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Sep 2008 11:06:46 -0400 Subject: rcu: add rcu_read_lock_sched() / rcu_read_unlock_sched() Add rcu_read_lock_sched() and rcu_read_unlock_sched() to rcupdate.h to match the recently added write-side call_rcu_sched() and rcu_barrier_sched(). They also match the no-so-recently-added synchronize_sched(). It will help following matching use of the update/read lock primitives. Those new read lock will replace preempt_disable()/enable() used in pair with RCU-classic synchronization. Signed-off-by: Mathieu Desnoyers Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index e8b4039cfb2f..86f1f5e43e33 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -132,6 +132,26 @@ struct rcu_head { */ #define rcu_read_unlock_bh() __rcu_read_unlock_bh() +/** + * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section + * + * Should be used with either + * - synchronize_sched() + * or + * - call_rcu_sched() and rcu_barrier_sched() + * on the write-side to insure proper synchronization. + */ +#define rcu_read_lock_sched() preempt_disable() + +/* + * rcu_read_unlock_sched - marks the end of a RCU-classic critical section + * + * See rcu_read_lock_sched for more information. + */ +#define rcu_read_unlock_sched() preempt_enable() + + + /** * rcu_dereference - fetch an RCU-protected pointer in an * RCU read-side critical section. This pointer may later -- cgit v1.2.2 From 24268245d8ba9270152b2281666099ddc8ca389d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Oct 2008 08:59:40 +0200 Subject: x86: add PCI IDs for AMD Barcelona PCI devices Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar --- include/linux/pci_ids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2886b0eb53ec..c114103af987 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -497,6 +497,11 @@ #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 #define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102 #define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 +#define PCI_DEVICE_ID_AMD_10H_NB_HT 0x1200 +#define PCI_DEVICE_ID_AMD_10H_NB_MAP 0x1201 +#define PCI_DEVICE_ID_AMD_10H_NB_DRAM 0x1202 +#define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203 +#define PCI_DEVICE_ID_AMD_10H_NB_LINK 0x1204 #define PCI_DEVICE_ID_AMD_11H_NB_HT 0x1300 #define PCI_DEVICE_ID_AMD_11H_NB_MAP 0x1301 #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 -- cgit v1.2.2 From 6e50e8a2136f1a90de251c653226ded447c5c915 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Wed, 1 Oct 2008 01:30:19 -0700 Subject: phonet: Protect if_phonet.h against multiple inclusions. From: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_phonet.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h index 7e989216ec17..d70034bcec05 100644 --- a/include/linux/if_phonet.h +++ b/include/linux/if_phonet.h @@ -5,14 +5,15 @@ * * Copyright (C) 2008 Nokia Corporation. All rights reserved. */ +#ifndef LINUX_IF_PHONET_H +#define LINUX_IF_PHONET_H -#define PHONET_HEADER_LEN 8 /* Phonet header length */ - -#define PHONET_MIN_MTU 6 -/* 6 bytes header + 65535 bytes payload */ -#define PHONET_MAX_MTU 65541 +#define PHONET_MIN_MTU 6 /* pn_length = 0 */ +#define PHONET_MAX_MTU 65541 /* pn_length = 0xffff */ #define PHONET_DEV_MTU PHONET_MAX_MTU #ifdef __KERNEL__ extern struct header_ops phonet_header_ops; #endif + +#endif -- cgit v1.2.2 From 04a4bb55bcf35b63d40fd2725e58599ff8310dd7 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 1 Oct 2008 02:33:12 -0700 Subject: net: add skb_recycle_check() to enable netdriver skb recycling This patch adds skb_recycle_check(), which can be used by a network driver after transmitting an skb to check whether this skb can be recycled as a receive buffer. skb_recycle_check() checks that the skb is not shared or cloned, and that it is linear and its head portion large enough (as determined by the driver) to be recycled as a receive buffer. If these conditions are met, it does any necessary reference count dropping and cleans up the skbuff as if it just came from __alloc_skb(). Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a19ea43fea02..720b688c22b6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -383,6 +383,8 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, 1, -1); } +extern int skb_recycle_check(struct sk_buff *skb, int skb_size); + extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); -- cgit v1.2.2 From 12a169e7d8f4b1c95252d8b04ed0f1033ed7cfe2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 1 Oct 2008 07:03:24 -0700 Subject: ipsec: Put dumpers on the dump list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Herbert Xu came up with the idea and the original patch to make xfrm_state dump list contain also dumpers: As it is we go to extraordinary lengths to ensure that states don't go away while dumpers go to sleep. It's much easier if we just put the dumpers themselves on the list since they can't go away while they're going. I've also changed the order of addition on new states to prevent a never-ending dump. Timo Teräs improved the patch to apply cleanly to latest tree, modified iteration code to be more readable by using a common struct for entries in the list, implemented the same idea for xfrm_policy dumping and moved the af_key specific "last" entry caching to af_key. Signed-off-by: Herbert Xu Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index cbba7760545b..9ff1b54908f3 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -220,7 +220,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[7]; + long args[6]; }; struct netlink_notify -- cgit v1.2.2 From f5715aea4564f233767ea1d944b2637a5fd7cd2e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:30:02 -0700 Subject: ipv4: Implement IP_TRANSPARENT socket option This patch introduces the IP_TRANSPARENT socket option: enabling that will make the IPv4 routing omit the non-local source address check on output. Setting IP_TRANSPARENT requires NET_ADMIN capability. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/linux/in.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index 4065313cd7ee..db458beef19d 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -75,6 +75,7 @@ struct in_addr { #define IP_IPSEC_POLICY 16 #define IP_XFRM_POLICY 17 #define IP_PASSSEC 18 +#define IP_TRANSPARENT 19 /* BSD compatibility */ #define IP_RECVRETOPTS IP_RETOPTS -- cgit v1.2.2 From 4e9687d9c843dc34d368358a36f5f1610e4fbab3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Va=C5=A1ut?= Date: Thu, 11 Sep 2008 19:37:32 +0100 Subject: [ARM] 5248/1: wm97xx generic battery driver This patch adds generic battery driver for wm97xx chips. Signed-off-by: Marek Vasut Acked-by: Anton Vorontsov Acked-by: Mark Brown Signed-off-by: Russell King --- include/linux/wm97xx_batt.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/linux/wm97xx_batt.h (limited to 'include/linux') diff --git a/include/linux/wm97xx_batt.h b/include/linux/wm97xx_batt.h new file mode 100644 index 000000000000..9681d1ab0e4f --- /dev/null +++ b/include/linux/wm97xx_batt.h @@ -0,0 +1,26 @@ +#ifndef _LINUX_WM97XX_BAT_H +#define _LINUX_WM97XX_BAT_H + +#include + +struct wm97xx_batt_info { + int batt_aux; + int temp_aux; + int charge_gpio; + int min_voltage; + int max_voltage; + int batt_div; + int batt_mult; + int temp_div; + int temp_mult; + int batt_tech; + char *batt_name; +}; + +#ifdef CONFIG_BATTERY_WM97XX +void __init wm97xx_bat_set_pdata(struct wm97xx_batt_info *data); +#else +static inline void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data) {} +#endif + +#endif -- cgit v1.2.2 From 4b19de6d1cb07c8bcb6778e771f9cfd5bcfdfd3e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 2 Oct 2008 14:50:16 -0700 Subject: mm: tiny-shmem nommu fix The previous patch db203d53d474aa068984e409d807628f5841da1b ("mm: tiny-shmem fix lock ordering: mmap_sem vs i_mutex") to fix the lock ordering in tiny-shmem breaks shared anonymous and IPC memory on NOMMU architectures because it was using the expanding truncate to signal ramfs to allocate a physically contiguous RAM backing the inode (otherwise it is unusable for "memory mapping" it to userspace). However do_truncate is what caused the lock ordering error, due to it taking i_mutex. In this case, we can actually just call ramfs directly to allocate memory for the mapping, rather than go via truncate. Acked-by: David Howells Acked-by: Hugh Dickins Signed-off-by: Nick Piggin Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ramfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index b160fb18e8d6..37aaf2b39863 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -6,6 +6,7 @@ extern int ramfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt); #ifndef CONFIG_MMU +extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize); extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, -- cgit v1.2.2 From 2133b5d7ff531bc15a923db4a6a50bf96c561be9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Oct 2008 16:06:39 -0700 Subject: rcu: RCU-based detection of stalled CPUs for Classic RCU This patch adds stalled-CPU detection to Classic RCU. This capability is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which defaults disabled. This is a debugging feature to detect infinite loops in kernel code, not something that non-kernel-hackers would be expected to care about. This feature can detect looping CPUs in !PREEMPT builds and looping CPUs with preemption disabled in PREEMPT builds. This is essentially a port of this functionality from the treercu patch, replacing the stall debug patch that is already in tip/core/rcu (commit 67182ae1c4). The changes from the patch in tip/core/rcu include making the config variable name match that in treercu, changing from seconds to jiffies to avoid spurious warnings, and printing a boot message when this feature is enabled. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 29bf528c7dcc..5f89b62e6983 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -40,15 +40,21 @@ #include #include +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR +#define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* Global control variables for rcupdate callback mechanism. */ struct rcu_ctrlblk { long cur; /* Current batch number. */ long completed; /* Number of the last completed batch */ long pending; /* Number of the last pending batch */ -#ifdef CONFIG_DEBUG_RCU_STALL - unsigned long gp_check; /* Time grace period should end, in seconds. */ -#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + unsigned long gp_start; /* Time at which GP started in jiffies. */ + unsigned long jiffies_stall; + /* Time at which to check for CPU stalls. */ +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ int signaled; -- cgit v1.2.2 From 3c9f3681d0b4af09c1cbf04f92fdfb72bd81ad7b Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 31 Aug 2008 10:13:54 -0500 Subject: [SCSI] lib: add generic helper to print sizes rounded to the correct SI range This patch adds the ability to print sizes in either units of 10^3 (SI) or 2^10 (Binary) units. It rounds up to three significant figures and can be used for either memory or storage capacities. Oh, and I'm fully aware that 64 bits is only 16EiB ... the Zetta and Yotta units are added for future proofing against the day we have 128 bit computers ... [fujita.tomonori@lab.ntt.co.jp: fix missed unsigned long long cast] Signed-off-by: James Bottomley --- include/linux/string_helpers.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/string_helpers.h (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h new file mode 100644 index 000000000000..a3eb2f65b656 --- /dev/null +++ b/include/linux/string_helpers.h @@ -0,0 +1,16 @@ +#ifndef _LINUX_STRING_HELPERS_H_ +#define _LINUX_STRING_HELPERS_H_ + +#include + +/* Descriptions of the types of units to + * print in */ +enum string_size_units { + STRING_UNITS_10, /* use powers of 10^3 (standard SI) */ + STRING_UNITS_2, /* use binary powers of 2^10 */ +}; + +int string_get_size(u64 size, enum string_size_units units, + char *buf, int len); + +#endif -- cgit v1.2.2 From 897312bd240357c88ce906633703c324c6f0a5cd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 3 Oct 2008 15:23:41 -0700 Subject: include/linux/stacktrace.h: declare struct task_struct include/linux/stacktrace.h:13: warning: 'struct task_struct' declared inside parameter list (This might be a hard error on sparc64, which uses this header and has -Werror) Reported-by: "Randy.Dunlap" Acked-by: Ingo Molnar Cc: Peter Zijlstra Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stacktrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 5da9794b2d78..b106fd8e0d5c 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -1,6 +1,8 @@ #ifndef __LINUX_STACKTRACE_H #define __LINUX_STACKTRACE_H +struct task_struct; + #ifdef CONFIG_STACKTRACE struct stack_trace { unsigned int nr_entries, max_entries; -- cgit v1.2.2 From f20f258603ebc5da91e76884cf0c0d7ac9804b1c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 5 Oct 2008 18:23:27 +0200 Subject: ide-cd: temporary tray close fix This one fixes http://bugzilla.kernel.org/show_bug.cgi?id=11602. A more generic fix for drives which cannot autoclose tray will follow. Signed-off-by: Borislav Petkov Cc: Jens Axboe [bart: add an extra parentheses for consistency with the rest of kernel code] Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 1524829f73f2..6514db8fd2e4 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -366,7 +366,9 @@ enum { /* Currently on a filemark */ IDE_AFLAG_FILEMARK = (1 << 25), /* 0 = no tape is loaded, so we don't rewind after ejecting */ - IDE_AFLAG_MEDIUM_PRESENT = (1 << 26) + IDE_AFLAG_MEDIUM_PRESENT = (1 << 26), + + IDE_AFLAG_NO_AUTOCLOSE = (1 << 27), }; struct ide_drive_s { -- cgit v1.2.2 From 9641458d3ec42def729fde64669abf07f3220cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:15:13 -0700 Subject: Phonet: Pipe End Point for Phonet Pipes protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This protocol provides some connection handling and negotiated congestion control. Nokia cellular modems use it for bulk transfers. It provides packet boundaries (hence SOCK_SEQPACKET). Congestion control is per packet rather per byte, so we do not re-use the generic socket memory accounting. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 3a027f588a4a..f92185242078 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -27,7 +27,9 @@ #define PN_PROTO_TRANSPORT 0 /* Phonet datagram socket */ #define PN_PROTO_PHONET 1 -#define PHONET_NPROTO 2 +/* Phonet pipe */ +#define PN_PROTO_PIPE 2 +#define PHONET_NPROTO 3 #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 -- cgit v1.2.2 From 02a47617cdce440f60c71a51f3a93f9f5fcc5a7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:16:16 -0700 Subject: Phonet: implement GPRS virtual interface over PEP socket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 8 ++++++++ include/linux/socket.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index f92185242078..c9609f9aedac 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -31,9 +31,17 @@ #define PN_PROTO_PIPE 2 #define PHONET_NPROTO 3 +/* Socket options for SOL_PNPIPE level */ +#define PNPIPE_ENCAP 1 +#define PNPIPE_IFINDEX 2 + #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 +/* Values for PNPIPE_ENCAP option */ +#define PNPIPE_ENCAP_NONE 0 +#define PNPIPE_ENCAP_IP 1 + /* ioctls */ #define SIOCPNGETOBJECT (SIOCPROTOPRIVATE + 0) diff --git a/include/linux/socket.h b/include/linux/socket.h index 818ca33bf79f..20fc4bbfca42 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -297,6 +297,7 @@ struct ucred { #define SOL_RXRPC 272 #define SOL_PPPOL2TP 273 #define SOL_BLUETOOTH 274 +#define SOL_PNPIPE 275 /* IPX options */ #define IPX_TYPE 1 -- cgit v1.2.2 From 13c1d18931ebb5cf407cb348ef2cd6284d68902d Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Sun, 5 Oct 2008 13:33:42 -0700 Subject: xfrm: MIGRATE enhancements (draft-ebalard-mext-pfkey-enhanced-migrate) Provides implementation of the enhancements of XFRM/PF_KEY MIGRATE mechanism specified in draft-ebalard-mext-pfkey-enhanced-migrate-00. Defines associated PF_KEY SADB_X_EXT_KMADDRESS extension and XFRM/netlink XFRMA_KMADDRESS attribute. Signed-off-by: Arnaud Ebalard Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 13 ++++++++++++- include/linux/xfrm.h | 10 ++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index 700725ddcaae..01b262959f2e 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -226,6 +226,15 @@ struct sadb_x_sec_ctx { } __attribute__((packed)); /* sizeof(struct sadb_sec_ctx) = 8 */ +/* Used by MIGRATE to pass addresses IKE will use to perform + * negotiation with the peer */ +struct sadb_x_kmaddress { + uint16_t sadb_x_kmaddress_len; + uint16_t sadb_x_kmaddress_exttype; + uint32_t sadb_x_kmaddress_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_x_kmaddress) == 8 */ + /* Message types */ #define SADB_RESERVED 0 #define SADB_GETSPI 1 @@ -346,7 +355,9 @@ struct sadb_x_sec_ctx { #define SADB_X_EXT_NAT_T_DPORT 22 #define SADB_X_EXT_NAT_T_OA 23 #define SADB_X_EXT_SEC_CTX 24 -#define SADB_EXT_MAX 24 +/* Used with MIGRATE to pass @ to IKE for negotiation */ +#define SADB_X_EXT_KMADDRESS 25 +#define SADB_EXT_MAX 25 /* Identity Extension values */ #define SADB_IDENTTYPE_RESERVED 0 diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index fb0c215a3051..4bc1e6b86cb2 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -279,6 +279,7 @@ enum xfrm_attr_type_t { XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ XFRMA_MIGRATE, XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */ + XFRMA_KMADDRESS, /* struct xfrm_user_kmaddress */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -415,6 +416,15 @@ struct xfrm_user_report { struct xfrm_selector sel; }; +/* Used by MIGRATE to pass addresses IKE should use to perform + * SA negotiation with the peer */ +struct xfrm_user_kmaddress { + xfrm_address_t local; + xfrm_address_t remote; + __u32 reserved; + __u16 family; +}; + struct xfrm_user_migrate { xfrm_address_t old_daddr; xfrm_address_t old_saddr; -- cgit v1.2.2 From 1e19b16a30c34c042f1eaa23db4c99bfad1dac0e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Oct 2008 14:15:24 +0200 Subject: AMD IOMMU: use iommu_device_max_index, fix include/linux/iommu-helper.h has no header guards, which breaks sparc64 build. Add them. Signed-off-by: Thomas Gleixner Cc: Joerg Roedel Cc: FUJITA Tomonori Signed-off-by: Ingo Molnar --- include/linux/iommu-helper.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index 786539e432d7..a6d0586e2bf7 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -1,3 +1,6 @@ +#ifndef _LINUX_IOMMU_HELPER_H +#define _LINUX_IOMMU_HELPER_H + static inline unsigned long iommu_device_max_index(unsigned long size, unsigned long offset, u64 dma_mask) @@ -19,3 +22,5 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long align_mask); extern void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr); + +#endif -- cgit v1.2.2 From 654bed16cf86a9ef94495d9e6131b7ff7840a3dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Oct 2008 14:22:33 -0700 Subject: net: packet split receive api Add some packet-split receive hooks. For one this allows to do NUMA node affine page allocs. Later on these hooks will be extended to do emergency reserve allocations for fragments. Signed-off-by: Peter Zijlstra Signed-off-by: David S. Miller --- include/linux/skbuff.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 720b688c22b6..2725f4e5a9bf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -968,6 +968,9 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, skb_shinfo(skb)->nr_frags = i + 1; } +extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, + int off, int size); + #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) #define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) @@ -1382,6 +1385,26 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } +extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask); + +/** + * netdev_alloc_page - allocate a page for ps-rx on a specific device + * @dev: network device to receive on + * + * Allocate a new page node local to the specified device. + * + * %NULL is returned if there is no free memory. + */ +static inline struct page *netdev_alloc_page(struct net_device *dev) +{ + return __netdev_alloc_page(dev, GFP_ATOMIC); +} + +static inline void netdev_free_page(struct net_device *dev, struct page *page) +{ + __free_page(page); +} + /** * skb_clone_writable - is the header of a clone writable * @skb: buffer to check -- cgit v1.2.2 From 33f5f57eeb0c6386fdd85f9c690dc8d700ba7928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 7 Oct 2008 14:43:06 -0700 Subject: tcp: kill pointless urg_mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It all started from me noticing that this urgent check in tcp_clean_rtx_queue is unnecessarily inside the loop. Then I took a longer look to it and found out that the users of urg_mode can trivially do without, well almost, there was one gotcha. Bonus: those funny people who use urg with >= 2^31 write_seq - snd_una could now rejoice too (that's the only purpose for the between being there, otherwise a simple compare would have done the thing). Not that I assume that the rest of the tcp code happily lives with such mind-boggling numbers :-). Alas, it turned out to be impossible to set wmem to such numbers anyway, yes I really tried a big sendfile after setting some wmem but nothing happened :-). ...Tcp_wmem is int and so is sk_sndbuf... So I hacked a bit variable to long and found out that it seems to work... :-) Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 767290628292..fe77e1499ab7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -312,8 +312,11 @@ struct tcp_sock { u32 retrans_out; /* Retransmitted packets out */ u16 urg_data; /* Saved octet of OOB data and control flags */ - u8 urg_mode; /* In urgent mode */ u8 ecn_flags; /* ECN status bits. */ + u8 reordering; /* Packet reordering metric. */ + u32 snd_up; /* Urgent pointer */ + + u8 keepalive_probes; /* num of allowed keep alive probes */ /* * Options received (usually on last packet, some only on SYN packets). */ @@ -361,8 +364,6 @@ struct tcp_sock { u32 lost_retrans_low; /* Sent seq after any rxmit (lowest) */ - u8 reordering; /* Packet reordering metric. */ - u8 keepalive_probes; /* num of allowed keep alive probes */ u32 prior_ssthresh; /* ssthresh saved at recovery start */ u32 high_seq; /* snd_nxt at onset of congestion */ @@ -374,8 +375,6 @@ struct tcp_sock { u32 total_retrans; /* Total retransmits for entire connection */ u32 urg_seq; /* Seq of received urgent pointer */ - u32 snd_up; /* Urgent pointer */ - unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ -- cgit v1.2.2 From b8bae41ed6a53cce56c50811a91cd963e3187d1c Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Tue, 7 Oct 2008 15:34:37 -0700 Subject: ipv4: add mc_count to in_device. This patch add mc_count to struct in_device and updates increment/decrement/initilaize of this field in IPv4 and in IPv6. - Also printing the vfs /proc entry (/proc/net/igmp) is adjusted to use the new mc_count. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index c6f51ad52d5b..06fcdb45106b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -25,6 +25,7 @@ struct in_device struct in_ifaddr *ifa_list; /* IP ifaddr chain */ rwlock_t mc_list_lock; struct ip_mc_list *mc_list; /* IP multicast filter chain */ + int mc_count; /* Number of installed mcasts */ spinlock_t mc_tomb_lock; struct ip_mc_list *mc_tomb; unsigned long mr_v1_seen; -- cgit v1.2.2 From 76108cea065cda58366d16a7eb6ca90d717a1396 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:00 +0200 Subject: netfilter: Use unsigned types for hooknum and pf vars and (try to) consistently use u_int8_t for the L3 family. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 30 ++++++++++++++++-------------- include/linux/netfilter/x_tables.h | 28 ++++++++++++++-------------- 2 files changed, 30 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0c5eb7ed8b3f..8c83d2e23bde 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -92,8 +92,8 @@ struct nf_hook_ops /* User fills in from here down. */ nf_hookfn *hook; struct module *owner; - int pf; - int hooknum; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ int priority; }; @@ -102,7 +102,7 @@ struct nf_sockopt_ops { struct list_head list; - int pf; + u_int8_t pf; /* Non-inclusive ranges: use 0/0/NULL to never get called. */ int set_optmin; @@ -140,7 +140,7 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, +int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh); @@ -151,7 +151,7 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ -static inline int nf_hook_thresh(int pf, unsigned int hook, +static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, @@ -167,7 +167,7 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { @@ -212,14 +212,14 @@ __ret;}) NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN) /* Call setsockopt() */ -int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt, +int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int len); -int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, +int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); -int compat_nf_setsockopt(struct sock *sk, int pf, int optval, +int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int len); -int compat_nf_getsockopt(struct sock *sk, int pf, int optval, +int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); /* Call this before modifying an existing packet: ensures it is @@ -292,7 +292,7 @@ extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo); extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); static inline void -nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { #ifdef CONFIG_NF_NAT_NEEDED void (*decodefn)(struct sk_buff *, struct flowi *); @@ -315,7 +315,7 @@ extern struct proc_dir_entry *proc_net_netfilter; #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) -static inline int nf_hook_thresh(int pf, unsigned int hook, +static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, @@ -324,7 +324,7 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, { return okfn(skb); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { @@ -332,7 +332,9 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, } struct flowi; static inline void -nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {} +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) +{ +} #endif /*CONFIG_NETFILTER*/ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 2326296b6f25..6989b22716e6 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -292,7 +292,7 @@ struct xt_table /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; - int af; /* address/protocol family */ + u_int8_t af; /* address/protocol family */ }; #include @@ -346,19 +346,19 @@ extern struct xt_table_info *xt_replace_table(struct xt_table *table, struct xt_table_info *newinfo, int *error); -extern struct xt_match *xt_find_match(int af, const char *name, u8 revision); -extern struct xt_target *xt_find_target(int af, const char *name, u8 revision); -extern struct xt_target *xt_request_find_target(int af, const char *name, +extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); +extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision); +extern struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); -extern int xt_find_revision(int af, const char *name, u8 revision, int target, - int *err); +extern int xt_find_revision(u8 af, const char *name, u8 revision, + int target, int *err); -extern struct xt_table *xt_find_table_lock(struct net *net, int af, +extern struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name); extern void xt_table_unlock(struct xt_table *t); -extern int xt_proto_init(struct net *net, int af); -extern void xt_proto_fini(struct net *net, int af); +extern int xt_proto_init(struct net *net, u_int8_t af); +extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); @@ -423,12 +423,12 @@ struct compat_xt_counters_info #define COMPAT_XT_ALIGN(s) (((s) + (__alignof__(struct compat_xt_counters)-1)) \ & ~(__alignof__(struct compat_xt_counters)-1)) -extern void xt_compat_lock(int af); -extern void xt_compat_unlock(int af); +extern void xt_compat_lock(u_int8_t af); +extern void xt_compat_unlock(u_int8_t af); -extern int xt_compat_add_offset(int af, unsigned int offset, short delta); -extern void xt_compat_flush_offsets(int af); -extern short xt_compat_calc_jump(int af, unsigned int offset); +extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); +extern void xt_compat_flush_offsets(u_int8_t af); +extern short xt_compat_calc_jump(u_int8_t af, unsigned int offset); extern int xt_compat_match_offset(const struct xt_match *match); extern int xt_compat_match_from_user(struct xt_entry_match *m, -- cgit v1.2.2 From e948b20a71a06a740c925d6ea22b59b4e17cfa0c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:00 +0200 Subject: netfilter: rename ipt_recent to xt_recent Like with other modules (such as ipt_state), ipt_recent.h is changed to forward definitions to (IOW include) xt_recent.h, and xt_recent.c is changed to use the new constant names. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_recent.h | 26 ++++++++++++++++++++++++++ include/linux/netfilter_ipv4/ipt_recent.h | 28 +++++++++++----------------- 3 files changed, 38 insertions(+), 17 deletions(-) create mode 100644 include/linux/netfilter/xt_recent.h (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 3aff513d12c8..5a8af875bce2 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -32,6 +32,7 @@ header-y += xt_owner.h header-y += xt_pkttype.h header-y += xt_rateest.h header-y += xt_realm.h +header-y += xt_recent.h header-y += xt_sctp.h header-y += xt_state.h header-y += xt_statistic.h diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h new file mode 100644 index 000000000000..5cfeb81c6794 --- /dev/null +++ b/include/linux/netfilter/xt_recent.h @@ -0,0 +1,26 @@ +#ifndef _LINUX_NETFILTER_XT_RECENT_H +#define _LINUX_NETFILTER_XT_RECENT_H 1 + +enum { + XT_RECENT_CHECK = 1 << 0, + XT_RECENT_SET = 1 << 1, + XT_RECENT_UPDATE = 1 << 2, + XT_RECENT_REMOVE = 1 << 3, + XT_RECENT_TTL = 1 << 4, + + XT_RECENT_SOURCE = 0, + XT_RECENT_DEST = 1, + + XT_RECENT_NAME_LEN = 200, +}; + +struct xt_recent_mtinfo { + u_int32_t seconds; + u_int32_t hit_count; + u_int8_t check_set; + u_int8_t invert; + char name[XT_RECENT_NAME_LEN]; + u_int8_t side; +}; + +#endif /* _LINUX_NETFILTER_XT_RECENT_H */ diff --git a/include/linux/netfilter_ipv4/ipt_recent.h b/include/linux/netfilter_ipv4/ipt_recent.h index 6508a4592651..d636cca133c2 100644 --- a/include/linux/netfilter_ipv4/ipt_recent.h +++ b/include/linux/netfilter_ipv4/ipt_recent.h @@ -1,27 +1,21 @@ #ifndef _IPT_RECENT_H #define _IPT_RECENT_H -#define RECENT_NAME "ipt_recent" -#define RECENT_VER "v0.3.1" +#include -#define IPT_RECENT_CHECK 1 -#define IPT_RECENT_SET 2 -#define IPT_RECENT_UPDATE 4 -#define IPT_RECENT_REMOVE 8 -#define IPT_RECENT_TTL 16 +#define ipt_recent_info xt_recent_mtinfo -#define IPT_RECENT_SOURCE 0 -#define IPT_RECENT_DEST 1 +enum { + IPT_RECENT_CHECK = XT_RECENT_CHECK, + IPT_RECENT_SET = XT_RECENT_SET, + IPT_RECENT_UPDATE = XT_RECENT_UPDATE, + IPT_RECENT_REMOVE = XT_RECENT_REMOVE, + IPT_RECENT_TTL = XT_RECENT_TTL, -#define IPT_RECENT_NAME_LEN 200 + IPT_RECENT_SOURCE = XT_RECENT_SOURCE, + IPT_RECENT_DEST = XT_RECENT_DEST, -struct ipt_recent_info { - u_int32_t seconds; - u_int32_t hit_count; - u_int8_t check_set; - u_int8_t invert; - char name[IPT_RECENT_NAME_LEN]; - u_int8_t side; + IPT_RECENT_NAME_LEN = XT_RECENT_NAME_LEN, }; #endif /*_IPT_RECENT_H*/ -- cgit v1.2.2 From 7e9c6eeb136a46dfd941852803b3a9dd78939b69 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:00 +0200 Subject: netfilter: Introduce NFPROTO_* constants The netfilter subsystem only supports a handful of protocols (much less than PF_*) and even non-PF protocols like ARP and pseudo-protocols like PF_BRIDGE. By creating NFPROTO_*, we can earn a few memory savings on arrays that previously were always PF_MAX-sized and keep the pseudo-protocols to ourselves. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 8c83d2e23bde..bf3afb0844f7 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -52,6 +52,16 @@ enum nf_inet_hooks { NF_INET_NUMHOOKS }; +enum { + NFPROTO_UNSPEC = 0, + NFPROTO_IPV4 = 2, + NFPROTO_ARP = 3, + NFPROTO_BRIDGE = 7, + NFPROTO_IPV6 = 10, + NFPROTO_DECNET = 12, + NFPROTO_NUMPROTO, +}; + union nf_inet_addr { __u32 all[4]; __be32 ip; @@ -138,7 +148,7 @@ extern struct ctl_path nf_net_netfilter_sysctl_path[]; extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; #endif /* CONFIG_SYSCTL */ -extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; +extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, @@ -247,7 +257,7 @@ struct nf_afinfo { int route_key_size; }; -extern const struct nf_afinfo *nf_afinfo[NPROTO]; +extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO]; static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family) { return rcu_dereference(nf_afinfo[family]); -- cgit v1.2.2 From 48dc7865aa3db9404aedc8677d9daf8f8f469ab0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:01 +0200 Subject: netfilter: netns: remove nf_*_net() wrappers Now that dev_net() exists, the usefullness of them is even less. Also they're a big problem in resolving circular header dependencies necessary for NOTRACK-in-netns patch. See below. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 53 ----------------------------------------------- 1 file changed, 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index bf3afb0844f7..48cfe51bfddc 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -5,13 +5,11 @@ #include #include #include -#include #include #include #include #include #include -#include #endif #include #include @@ -355,56 +353,5 @@ extern void (*nf_ct_destroy)(struct nf_conntrack *); static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} #endif -static inline struct net *nf_pre_routing_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return in->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_local_in_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return in->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_forward_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - BUG_ON(in->nd_net != out->nd_net); - return in->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_local_out_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return out->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_post_routing_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return out->nd_net; -#else - return &init_net; -#endif -} - #endif /*__KERNEL__*/ #endif /*__LINUX_NETFILTER_H*/ -- cgit v1.2.2 From 3bb0d1c00f86b13bb184193a8f0189ddd6f0459f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:10 +0200 Subject: netfilter: netns nf_conntrack: GRE conntracking in netns * make keymap list per-netns * per-netns keymal lock (not strictly necessary) * flush keymap at netns stop and module unload. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_proto_gre.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 535e4219d2bb..2a10efda17fb 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -87,7 +87,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); -extern void nf_ct_gre_keymap_flush(void); +extern void nf_ct_gre_keymap_flush(struct net *net); extern void nf_nat_need_gre(void); #endif /* __KERNEL__ */ -- cgit v1.2.2 From e84392707e10301b93121e1b74e2823db50cdf9e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 8 Oct 2008 11:35:12 +0200 Subject: netfilter: iptables TPROXY target The TPROXY target implements redirection of non-local TCP/UDP traffic to local sockets. Additionally, it's possible to manipulate the packet mark if and only if a socket has been found. (We need this because we cannot use multiple targets in the same iptables rule.) Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_TPROXY.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/netfilter/xt_TPROXY.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h new file mode 100644 index 000000000000..152e8f97132b --- /dev/null +++ b/include/linux/netfilter/xt_TPROXY.h @@ -0,0 +1,14 @@ +#ifndef _XT_TPROXY_H_target +#define _XT_TPROXY_H_target + +/* TPROXY target is capable of marking the packet to perform + * redirection. We can get rid of that whenever we get support for + * mutliple targets in the same rule. */ +struct xt_tproxy_target_info { + u_int32_t mark_mask; + u_int32_t mark_value; + __be32 laddr; + __be16 lport; +}; + +#endif /* _XT_TPROXY_H_target */ -- cgit v1.2.2 From 18219d3f7d6a5bc43825a41e0763158efbdb80d3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: ebtables: do centralized size checking Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 892f5b7771c7..fd085af8962d 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -215,6 +215,7 @@ struct ebt_match int (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *matchdata, unsigned int datalen); void (*destroy)(void *matchdata, unsigned int datalen); + unsigned int matchsize; struct module *me; }; @@ -229,6 +230,7 @@ struct ebt_watcher int (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *watcherdata, unsigned int datalen); void (*destroy)(void *watcherdata, unsigned int datalen); + unsigned int targetsize; struct module *me; }; @@ -244,6 +246,7 @@ struct ebt_target int (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *targetdata, unsigned int datalen); void (*destroy)(void *targetdata, unsigned int datalen); + unsigned int targetsize; struct module *me; }; -- cgit v1.2.2 From 19eda879a136889110c692dec4c2ab59e0e43cef Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: change return types of check functions for Ebtables extensions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index fd085af8962d..5f71719b7a27 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -211,8 +211,7 @@ struct ebt_match int (*match)(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *matchdata, unsigned int datalen); - /* 0 == let it in */ - int (*check)(const char *tablename, unsigned int hookmask, + bool (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *matchdata, unsigned int datalen); void (*destroy)(void *matchdata, unsigned int datalen); unsigned int matchsize; @@ -226,8 +225,7 @@ struct ebt_watcher void (*watcher)(const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *watcherdata, unsigned int datalen); - /* 0 == let it in */ - int (*check)(const char *tablename, unsigned int hookmask, + bool (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *watcherdata, unsigned int datalen); void (*destroy)(void *watcherdata, unsigned int datalen); unsigned int targetsize; @@ -242,8 +240,7 @@ struct ebt_target int (*target)(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *targetdata, unsigned int datalen); - /* 0 == let it in */ - int (*check)(const char *tablename, unsigned int hookmask, + bool (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *targetdata, unsigned int datalen); void (*destroy)(void *targetdata, unsigned int datalen); unsigned int targetsize; -- cgit v1.2.2 From 8cc784eec6676b58e7f60419c88179aaa97bf71c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: change return types of match functions for ebtables extensions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 5f71719b7a27..f9fda2c442a0 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -207,8 +207,7 @@ struct ebt_match { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - /* 0 == it matches */ - int (*match)(const struct sk_buff *skb, const struct net_device *in, + bool (*match)(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *matchdata, unsigned int datalen); bool (*check)(const char *tablename, unsigned int hookmask, -- cgit v1.2.2 From 0ac6ab1f7915fc820ca0cf8f597290dbb249edcc Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: Change return types of targets/watchers for Ebtables extensions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index f9fda2c442a0..097432b94c55 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -221,7 +221,7 @@ struct ebt_watcher { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - void (*watcher)(const struct sk_buff *skb, unsigned int hooknr, + unsigned int (*watcher)(const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *watcherdata, unsigned int datalen); bool (*check)(const char *tablename, unsigned int hookmask, @@ -235,8 +235,8 @@ struct ebt_target { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - /* returns one of the standard verdicts */ - int (*target)(struct sk_buff *skb, unsigned int hooknr, + /* returns one of the standard EBT_* verdicts */ + unsigned int (*target)(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *targetdata, unsigned int datalen); bool (*check)(const char *tablename, unsigned int hookmask, -- cgit v1.2.2 From 001a18d369f4813ed792629ff4a9a6ade2a4a031 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:14 +0200 Subject: netfilter: add dummy members to Ebtables code to ease transition to Xtables Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 097432b94c55..82f854bf37e7 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -214,6 +214,8 @@ struct ebt_match const struct ebt_entry *e, void *matchdata, unsigned int datalen); void (*destroy)(void *matchdata, unsigned int datalen); unsigned int matchsize; + u_int8_t revision; + u_int8_t family; struct module *me; }; @@ -228,6 +230,8 @@ struct ebt_watcher const struct ebt_entry *e, void *watcherdata, unsigned int datalen); void (*destroy)(void *watcherdata, unsigned int datalen); unsigned int targetsize; + u_int8_t revision; + u_int8_t family; struct module *me; }; @@ -243,6 +247,8 @@ struct ebt_target const struct ebt_entry *e, void *targetdata, unsigned int datalen); void (*destroy)(void *targetdata, unsigned int datalen); unsigned int targetsize; + u_int8_t revision; + u_int8_t family; struct module *me; }; -- cgit v1.2.2 From 2d06d4a5cc107046508d860a0b47dbc43b829b79 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:15 +0200 Subject: netfilter: change Ebtables function signatures to match Xtables's Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 43 +++++++++++++++++++------------ 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 82f854bf37e7..f20a57da7a25 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -31,6 +31,9 @@ * The 4 lsb are more than enough to store the verdict. */ #define EBT_VERDICT_BITS 0x0000000F +struct xt_match; +struct xt_target; + struct ebt_counter { uint64_t pcnt; @@ -208,11 +211,13 @@ struct ebt_match struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; bool (*match)(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *matchdata, - unsigned int datalen); - bool (*check)(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *matchdata, unsigned int datalen); - void (*destroy)(void *matchdata, unsigned int datalen); + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop); + bool (*checkentry)(const char *table, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask); + void (*destroy)(const struct xt_match *match, void *matchinfo); unsigned int matchsize; u_int8_t revision; u_int8_t family; @@ -223,12 +228,14 @@ struct ebt_watcher { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - unsigned int (*watcher)(const struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *watcherdata, unsigned int datalen); - bool (*check)(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *watcherdata, unsigned int datalen); - void (*destroy)(void *watcherdata, unsigned int datalen); + unsigned int (*target)(struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + unsigned int hook_num, const struct xt_target *target, + const void *targinfo); + bool (*checkentry)(const char *table, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask); + void (*destroy)(const struct xt_target *target, void *targinfo); unsigned int targetsize; u_int8_t revision; u_int8_t family; @@ -240,12 +247,14 @@ struct ebt_target struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; /* returns one of the standard EBT_* verdicts */ - unsigned int (*target)(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *targetdata, unsigned int datalen); - bool (*check)(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *targetdata, unsigned int datalen); - void (*destroy)(void *targetdata, unsigned int datalen); + unsigned int (*target)(struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + unsigned int hook_num, const struct xt_target *target, + const void *targinfo); + bool (*checkentry)(const char *table, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask); + void (*destroy)(const struct xt_target *target, void *targinfo); unsigned int targetsize; u_int8_t revision; u_int8_t family; -- cgit v1.2.2 From 043ef46c7690bfdbd5b012e15812a14a19ca5604 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:15 +0200 Subject: netfilter: move Ebtables to use Xtables Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index f20a57da7a25..d3f9243b9d9b 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -124,7 +124,7 @@ struct ebt_entry_match { union { char name[EBT_FUNCTION_MAXNAMELEN]; - struct ebt_match *match; + struct xt_match *match; } u; /* size of data */ unsigned int match_size; @@ -135,7 +135,7 @@ struct ebt_entry_watcher { union { char name[EBT_FUNCTION_MAXNAMELEN]; - struct ebt_watcher *watcher; + struct xt_target *watcher; } u; /* size of data */ unsigned int watcher_size; @@ -146,7 +146,7 @@ struct ebt_entry_target { union { char name[EBT_FUNCTION_MAXNAMELEN]; - struct ebt_target *target; + struct xt_target *target; } u; /* size of data */ unsigned int target_size; -- cgit v1.2.2 From 66bff35b722956cc2423f55fcf1b69cefa24ef8b Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:16 +0200 Subject: netfilter: remove unused Ebtables functions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index d3f9243b9d9b..568a690f6a62 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -302,12 +302,6 @@ struct ebt_table ~(__alignof__(struct ebt_replace)-1)) extern int ebt_register_table(struct ebt_table *table); extern void ebt_unregister_table(struct ebt_table *table); -extern int ebt_register_match(struct ebt_match *match); -extern void ebt_unregister_match(struct ebt_match *match); -extern int ebt_register_watcher(struct ebt_watcher *watcher); -extern void ebt_unregister_watcher(struct ebt_watcher *watcher); -extern int ebt_register_target(struct ebt_target *target); -extern void ebt_unregister_target(struct ebt_target *target); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, struct ebt_table *table); -- cgit v1.2.2 From 367c679007fa4f990eb7ee381326ec59d8148b0e Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:17 +0200 Subject: netfilter: xtables: do centralized checkentry call (1/2) It used to be that {ip,ip6,etc}_tables called extension->checkentry themselves, but this can be moved into the xtables core. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 6989b22716e6..85aa42785a5e 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -330,10 +330,12 @@ extern void xt_unregister_matches(struct xt_match *match, unsigned int n); extern int xt_check_match(const struct xt_match *match, unsigned short family, unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto); + unsigned short proto, int inv_proto, + const void *entry, void *matchinfo); extern int xt_check_target(const struct xt_target *target, unsigned short family, unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto); + unsigned short proto, int inv_proto, + const void *entry, void *targinfo); extern struct xt_table *xt_register_table(struct net *net, struct xt_table *table, -- cgit v1.2.2 From f7108a20dee44e5bb037f9e48f6a207b42e6ae1c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:18 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (1/6) The function signatures for Xtables extensions have grown over time. It involves a lot of typing/replication, and also a bit of stack space even if they are not used. Realize an NFWS2008 idea and pack them into structs. The skb remains outside of the struct so gcc can continue to apply its optimizations. This patch does this for match extensions' match functions. A few ambiguities have also been addressed. The "offset" parameter for example has been renamed to "fragoff" (there are so many different offsets already) and "protoff" to "thoff" (there is more than just one protocol here, so clarify). Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 85aa42785a5e..bcd40ec83257 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -173,6 +173,26 @@ struct xt_counters_info #include +/** + * struct xt_match_param - parameters for match extensions' match functions + * + * @in: input netdevice + * @out: output netdevice + * @match: struct xt_match through which this function was invoked + * @matchinfo: per-match data + * @fragoff: packet is a fragment, this is the data offset + * @thoff: position of transport header relative to skb->data + * @hotdrop: drop packet if we had inspection problems + */ +struct xt_match_param { + const struct net_device *in, *out; + const struct xt_match *match; + const void *matchinfo; + int fragoff; + unsigned int thoff; + bool *hotdrop; +}; + struct xt_match { struct list_head list; @@ -185,13 +205,7 @@ struct xt_match non-linear skb, using skb_header_pointer and skb_ip_make_writable. */ bool (*match)(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop); + const struct xt_match_param *); /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ -- cgit v1.2.2 From 9b4fce7a3508a9776534188b6065b206a9608ccf Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:18 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (2/6) This patch does this for match extensions' checkentry functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index bcd40ec83257..763a704ce83f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -193,6 +193,25 @@ struct xt_match_param { bool *hotdrop; }; +/** + * struct xt_mtchk_param - parameters for match extensions' + * checkentry functions + * + * @table: table the rule is tried to be inserted into + * @entryinfo: the family-specific rule data + * (struct ipt_ip, ip6t_ip, ebt_entry) + * @match: struct xt_match through which this function was invoked + * @matchinfo: per-match data + * @hook_mask: via which hooks the new rule is reachable + */ +struct xt_mtchk_param { + const char *table; + const void *entryinfo; + const struct xt_match *match; + void *matchinfo; + unsigned int hook_mask; +}; + struct xt_match { struct list_head list; @@ -208,12 +227,7 @@ struct xt_match const struct xt_match_param *); /* Called when user tries to insert an entry of this type. */ - /* Should return true or false. */ - bool (*checkentry)(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask); + bool (*checkentry)(const struct xt_mtchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_match *match, void *matchinfo); @@ -342,10 +356,8 @@ extern void xt_unregister_match(struct xt_match *target); extern int xt_register_matches(struct xt_match *match, unsigned int n); extern void xt_unregister_matches(struct xt_match *match, unsigned int n); -extern int xt_check_match(const struct xt_match *match, unsigned short family, - unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto, - const void *entry, void *matchinfo); +extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family, + unsigned int size, u_int8_t proto, bool inv_proto); extern int xt_check_target(const struct xt_target *target, unsigned short family, unsigned int size, const char *table, unsigned int hook, unsigned short proto, int inv_proto, -- cgit v1.2.2 From 6be3d8598e883fb632edf059ba2f8d1b9f4da138 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (3/6) This patch does this for match extensions' destroy functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 763a704ce83f..c79c88380149 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -212,6 +212,12 @@ struct xt_mtchk_param { unsigned int hook_mask; }; +/* Match destructor parameters */ +struct xt_mtdtor_param { + const struct xt_match *match; + void *matchinfo; +}; + struct xt_match { struct list_head list; @@ -230,7 +236,7 @@ struct xt_match bool (*checkentry)(const struct xt_mtchk_param *); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_match *match, void *matchinfo); + void (*destroy)(const struct xt_mtdtor_param *); /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, void *src); -- cgit v1.2.2 From 7eb3558655aaa87a3e71a0c065dfaddda521fa6d Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (4/6) This patch does this for target extensions' target functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c79c88380149..46d0cb1ad340 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -218,6 +218,22 @@ struct xt_mtdtor_param { void *matchinfo; }; +/** + * struct xt_target_param - parameters for target extensions' target functions + * + * @hooknum: hook through which this target was invoked + * @target: struct xt_target through which this function was invoked + * @targinfo: per-target data + * + * Other fields see above. + */ +struct xt_target_param { + const struct net_device *in, *out; + unsigned int hooknum; + const struct xt_target *target; + const void *targinfo; +}; + struct xt_match { struct list_head list; @@ -269,11 +285,7 @@ struct xt_target must now handle non-linear skbs, using skb_copy_bits and skb_ip_make_writable. */ unsigned int (*target)(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo); + const struct xt_target_param *); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be -- cgit v1.2.2 From af5d6dc200eb0fcc6fbd3df1ab4d8969004cb37f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (5/6) This patch does this for target extensions' checkentry functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 29 ++++++++++++++++++++--------- include/linux/netfilter_bridge/ebtables.h | 4 ++-- 2 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 46d0cb1ad340..8daeb496ba7a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -234,6 +234,23 @@ struct xt_target_param { const void *targinfo; }; +/** + * struct xt_tgchk_param - parameters for target extensions' + * checkentry functions + * + * @entryinfo: the family-specific rule data + * (struct ipt_entry, ip6t_entry, arpt_entry, ebt_entry) + * + * Other fields see above. + */ +struct xt_tgchk_param { + const char *table; + void *entryinfo; + const struct xt_target *target; + void *targinfo; + unsigned int hook_mask; +}; + struct xt_match { struct list_head list; @@ -291,11 +308,7 @@ struct xt_target hook_mask is a bitmask of hooks from which it can be called. */ /* Should return true or false. */ - bool (*checkentry)(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask); + bool (*checkentry)(const struct xt_tgchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_target *target, void *targinfo); @@ -376,10 +389,8 @@ extern void xt_unregister_matches(struct xt_match *match, unsigned int n); extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family, unsigned int size, u_int8_t proto, bool inv_proto); -extern int xt_check_target(const struct xt_target *target, unsigned short family, - unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto, - const void *entry, void *targinfo); +extern int xt_check_target(struct xt_tgchk_param *, u_int8_t family, + unsigned int size, u_int8_t proto, bool inv_proto); extern struct xt_table *xt_register_table(struct net *net, struct xt_table *table, diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 568a690f6a62..d45e29cd1cfb 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -310,9 +310,9 @@ extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, #define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg)) /* True if the hook mask denotes that the rule is in a base chain, * used in the check() functions */ -#define BASE_CHAIN (hookmask & (1 << NF_BR_NUMHOOKS)) +#define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS)) /* Clear the bit in the hook mask that tells if the rule is on a base chain */ -#define CLEAR_BASE_CHAIN_BIT (hookmask &= ~(1 << NF_BR_NUMHOOKS)) +#define CLEAR_BASE_CHAIN_BIT (par->hook_mask &= ~(1 << NF_BR_NUMHOOKS)) /* True if the target is not a standard target */ #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) -- cgit v1.2.2 From a2df1648ba615dd5908e9a1fa7b2f133fa302487 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (6/6) This patch does this for target extensions' destroy functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8daeb496ba7a..e3b3b669a143 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -251,6 +251,12 @@ struct xt_tgchk_param { unsigned int hook_mask; }; +/* Target destructor parameters */ +struct xt_tgdtor_param { + const struct xt_target *target; + void *targinfo; +}; + struct xt_match { struct list_head list; @@ -311,7 +317,7 @@ struct xt_target bool (*checkentry)(const struct xt_tgchk_param *); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_target *target, void *targinfo); + void (*destroy)(const struct xt_tgdtor_param *); /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, void *src); -- cgit v1.2.2 From 916a917dfec18535ff9e2afdafba82e6279eb4f4 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:20 +0200 Subject: netfilter: xtables: provide invoked family value to extensions By passing in the family through which extensions were invoked, a bit of data space can be reclaimed. The "family" member will be added to the parameter structures and the check functions be adjusted. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e3b3b669a143..be41b609c88f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -183,6 +183,8 @@ struct xt_counters_info * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data * @hotdrop: drop packet if we had inspection problems + * @family: Actual NFPROTO_* through which the function is invoked + * (helpful when match->family == NFPROTO_UNSPEC) */ struct xt_match_param { const struct net_device *in, *out; @@ -191,6 +193,7 @@ struct xt_match_param { int fragoff; unsigned int thoff; bool *hotdrop; + u_int8_t family; }; /** @@ -210,12 +213,14 @@ struct xt_mtchk_param { const struct xt_match *match; void *matchinfo; unsigned int hook_mask; + u_int8_t family; }; /* Match destructor parameters */ struct xt_mtdtor_param { const struct xt_match *match; void *matchinfo; + u_int8_t family; }; /** @@ -232,6 +237,7 @@ struct xt_target_param { unsigned int hooknum; const struct xt_target *target; const void *targinfo; + u_int8_t family; }; /** @@ -249,12 +255,14 @@ struct xt_tgchk_param { const struct xt_target *target; void *targinfo; unsigned int hook_mask; + u_int8_t family; }; /* Target destructor parameters */ struct xt_tgdtor_param { const struct xt_target *target; void *targinfo; + u_int8_t family; }; struct xt_match @@ -393,9 +401,9 @@ extern void xt_unregister_match(struct xt_match *target); extern int xt_register_matches(struct xt_match *match, unsigned int n); extern void xt_unregister_matches(struct xt_match *match, unsigned int n); -extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family, +extern int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); -extern int xt_check_target(struct xt_tgchk_param *, u_int8_t family, +extern int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); extern struct xt_table *xt_register_table(struct net *net, -- cgit v1.2.2 From 18ee49ddb0d242ed1d0e273038d5e4f6de7379d3 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 1 Oct 2008 15:41:33 +0000 Subject: phylib: rename mii_bus::dev to mii_bus::parent In preparation of giving mii_bus objects a device tree presence of their own, rename struct mii_bus's ->dev argument to ->parent, since having a 'struct device *dev' that points to our parent device conflicts with introducing a 'struct device dev' representing our own device. Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller Acked-by: Andy Fleming --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 5f170f5b1a30..87499bdedc6f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -99,7 +99,7 @@ struct mii_bus { */ struct mutex mdio_lock; - struct device *dev; + struct device *parent; /* list of all PHYs on bus */ struct phy_device *phy_map[PHY_MAX_ADDR]; -- cgit v1.2.2 From 298cf9beb9679522de995e249eccbd82f7c51999 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 8 Oct 2008 16:29:57 -0700 Subject: phylib: move to dynamic allocation of struct mii_bus This patch introduces mdiobus_alloc() and mdiobus_free(), and makes all mdio bus drivers use these functions to allocate their struct mii_bus'es dynamically. Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller Acked-by: Andy Fleming --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 87499bdedc6f..ca4a83c4c537 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -408,8 +408,10 @@ void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); +struct mii_bus *mdiobus_alloc(void); int mdiobus_register(struct mii_bus *bus); void mdiobus_unregister(struct mii_bus *bus); +void mdiobus_free(struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); void phy_sanitize_settings(struct phy_device *phydev); -- cgit v1.2.2 From 46abc02175b3c246dd5141d878f565a8725060c9 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 8 Oct 2008 16:33:40 -0700 Subject: phylib: give mdio buses a device tree presence Introduce the mdio_bus class, and give each 'struct mii_bus' its own 'struct device', so that mii_bus objects are represented in the device tree and can be found by querying the device tree. Signed-off-by: Lennert Buytenhek Acked-by: Andy Fleming Signed-off-by: David S. Miller --- include/linux/phy.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index ca4a83c4c537..891f27f9137e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -100,6 +100,13 @@ struct mii_bus { struct mutex mdio_lock; struct device *parent; + enum { + MDIOBUS_ALLOCATED = 1, + MDIOBUS_REGISTERED, + MDIOBUS_UNREGISTERED, + MDIOBUS_RELEASED, + } state; + struct device dev; /* list of all PHYs on bus */ struct phy_device *phy_map[PHY_MAX_ADDR]; @@ -113,6 +120,7 @@ struct mii_bus { */ int *irq; }; +#define to_mii_bus(d) container_of(d, struct mii_bus, dev) #define PHY_INTERRUPT_DISABLED 0x0 #define PHY_INTERRUPT_ENABLED 0x80000000 -- cgit v1.2.2 From 2e888103295f47b8fcbf7e9bb8c5da97dd2ecd76 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 29 Sep 2008 17:12:35 +0000 Subject: phylib: add mdiobus_{read,write} Add mdiobus_{read,write} routines to allow direct reading/writing of registers on an mii bus without having to go through the PHY abstraction, and make phy_{read,write} use these primitives. Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- include/linux/phy.h | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 891f27f9137e..77c4ed60b982 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -122,6 +122,15 @@ struct mii_bus { }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) +struct mii_bus *mdiobus_alloc(void); +int mdiobus_register(struct mii_bus *bus); +void mdiobus_unregister(struct mii_bus *bus); +void mdiobus_free(struct mii_bus *bus); +struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); +int mdiobus_read(struct mii_bus *bus, int addr, u16 regnum); +int mdiobus_write(struct mii_bus *bus, int addr, u16 regnum, u16 val); + + #define PHY_INTERRUPT_DISABLED 0x0 #define PHY_INTERRUPT_ENABLED 0x80000000 @@ -399,8 +408,35 @@ struct phy_fixup { int (*run)(struct phy_device *phydev); }; -int phy_read(struct phy_device *phydev, u16 regnum); -int phy_write(struct phy_device *phydev, u16 regnum, u16 val); +/** + * phy_read - Convenience function for reading a given PHY register + * @phydev: the phy_device struct + * @regnum: register number to read + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +static inline int phy_read(struct phy_device *phydev, u16 regnum) +{ + return mdiobus_read(phydev->bus, phydev->addr, regnum); +} + +/** + * phy_write - Convenience function for writing a given PHY register + * @phydev: the phy_device struct + * @regnum: register number to write + * @val: value to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +static inline int phy_write(struct phy_device *phydev, u16 regnum, u16 val) +{ + return mdiobus_write(phydev->bus, phydev->addr, regnum, val); +} + int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id); struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_clear_interrupt(struct phy_device *phydev); @@ -416,12 +452,6 @@ void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); -struct mii_bus *mdiobus_alloc(void); -int mdiobus_register(struct mii_bus *bus); -void mdiobus_unregister(struct mii_bus *bus); -void mdiobus_free(struct mii_bus *bus); -struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); - void phy_sanitize_settings(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); int phy_enable_interrupts(struct phy_device *phydev); -- cgit v1.2.2 From 91da11f870f00a3322b81c73042291d7f0be5a17 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:44:02 +0000 Subject: net: Distributed Switch Architecture protocol support Distributed Switch Architecture is a protocol for managing hardware switch chips. It consists of a set of MII management registers and commands to configure the switch, and an ethernet header format to signal which of the ports of the switch a packet was received from or is intended to be sent to. The switches that this driver supports are typically embedded in access points and routers, and a typical setup with a DSA switch looks something like this: +-----------+ +-----------+ | | RGMII | | | +-------+ +------ 1000baseT MDI ("WAN") | | | 6-port +------ 1000baseT MDI ("LAN1") | CPU | | ethernet +------ 1000baseT MDI ("LAN2") | |MIImgmt| switch +------ 1000baseT MDI ("LAN3") | +-------+ w/5 PHYs +------ 1000baseT MDI ("LAN4") | | | | +-----------+ +-----------+ The switch driver presents each port on the switch as a separate network interface to Linux, polls the switch to maintain software link state of those ports, forwards MII management interface accesses to those network interfaces (e.g. as done by ethtool) to the switch, and exposes the switch's hardware statistics counters via the appropriate Linux kernel interfaces. This initial patch supports the MII management interface register layout of the Marvell 88E6123, 88E6161 and 88E6165 switch chips, and supports the "Ethertype DSA" packet tagging format. (There is no officially registered ethertype for the Ethertype DSA packet format, so we just grab a random one. The ethertype to use is programmed into the switch, and the switch driver uses the value of ETH_P_EDSA for this, so this define can be changed at any time in the future if the one we chose is allocated to another protocol or if Ethertype DSA gets its own officially registered ethertype, and everything will continue to work.) Signed-off-by: Lennert Buytenhek Tested-by: Nicolas Pitre Tested-by: Byron Bradley Tested-by: Tim Ellis Tested-by: Peter van Valderen Tested-by: Dirk Teurlings Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 723a1c5fbc6c..2140aacb6338 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -77,6 +77,7 @@ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_TIPC 0x88CA /* TIPC */ +#define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ /* * Non DIX types. Won't clash for 1500 types. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9cfd20be8b7f..794eeb4b3462 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -607,6 +607,9 @@ struct net_device /* Protocol specific pointers */ +#ifdef CONFIG_NET_DSA + void *dsa_ptr; /* dsa specific data */ +#endif void *atalk_ptr; /* AppleTalk link */ void *ip_ptr; /* IPv4 specific data */ void *dn_ptr; /* DECnet specific data */ -- cgit v1.2.2 From cf85d08fdf4548ee46657ccfb7f9949a85145db5 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:45:02 +0000 Subject: dsa: add support for original DSA tagging format Most of the DSA switches currently in the field do not support the Ethertype DSA tagging format that one of the previous patches added support for, but only the original DSA tagging format. The original DSA tagging format carries the same information as the Ethertype DSA tagging format, but with the difference that it does not have an ethertype field. In other words, when receiving a packet that is tagged with an original DSA tag, there is no way of telling in eth_type_trans() that this packet is in fact a DSA-tagged packet. This patch adds a hook into eth_type_trans() which is only compiled in if support for a switch chip that doesn't support Ethertype DSA is selected, and which checks whether there is a DSA switch driver instance attached to this network device which uses the old tag format. If so, it sets the protocol field to ETH_P_DSA without looking at the packet, so that the packet ends up in the right place. Signed-off-by: Lennert Buytenhek Tested-by: Nicolas Pitre Tested-by: Peter van Valderen Tested-by: Dirk Teurlings Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 2140aacb6338..32b9dcda68c7 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -101,6 +101,7 @@ #define ETH_P_ECONET 0x0018 /* Acorn Econet */ #define ETH_P_HDLC 0x0019 /* HDLC frames */ #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ +#define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 794eeb4b3462..97f0c64c152a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -42,6 +42,7 @@ #include #include +#include struct vlan_group; struct ethtool_ops; @@ -801,6 +802,16 @@ void dev_net_set(struct net_device *dev, struct net *net) #endif } +static inline bool netdev_uses_dsa_tags(struct net_device *dev) +{ +#ifdef CONFIG_NET_DSA_TAG_DSA + if (dev->dsa_ptr != NULL) + return dsa_uses_dsa_tags(dev->dsa_ptr); +#endif + + return 0; +} + /** * netdev_priv - access network device private data * @dev: network device -- cgit v1.2.2 From 396138f03f4521c55ecc3a5dd75d4c56e6323244 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:46:07 +0000 Subject: dsa: add support for Trailer tagging format This adds support for the Trailer switch tagging format. This is another tagging that doesn't explicitly mark tagged packets with a distinct ethertype, so that we need to add a similar hack in the receive path as for the Original DSA tagging format. Signed-off-by: Lennert Buytenhek Tested-by: Byron Bradley Tested-by: Tim Ellis Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 32b9dcda68c7..a0099e98b5c4 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -102,6 +102,7 @@ #define ETH_P_HDLC 0x0019 /* HDLC frames */ #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ #define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ +#define ETH_P_TRAILER 0x001C /* Trailer switch tagging */ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97f0c64c152a..d3ea3de70a8a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -812,6 +812,16 @@ static inline bool netdev_uses_dsa_tags(struct net_device *dev) return 0; } +static inline bool netdev_uses_trailer_tags(struct net_device *dev) +{ +#ifdef CONFIG_NET_DSA_TAG_TRAILER + if (dev->dsa_ptr != NULL) + return dsa_uses_trailer_tags(dev->dsa_ptr); +#endif + + return 0; +} + /** * netdev_priv - access network device private data * @dev: network device -- cgit v1.2.2 From 7a67f63b3233ff28e753854fe27891c44f8588ae Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 8 Aug 2008 11:17:12 +0200 Subject: block: add bio_has_data() to detect whether a bio carries data or not Signed-off-by: Jens Axboe --- include/linux/bio.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 0933a14e6414..9e93c9299479 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -445,6 +445,14 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) +/* + * Check whether this bio carries any data or not. A NULL bio is allowed. + */ +static inline int bio_has_data(struct bio *bio) +{ + return bio && bio->bi_io_vec != NULL; +} + #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) -- cgit v1.2.2 From a9c701e594669dd49fed448c27c64f20cfacc8a7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 8 Aug 2008 11:04:44 +0200 Subject: block: use bio_has_data() to check for data carrying bio Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 9e93c9299479..dbeb66f813ab 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -185,7 +185,7 @@ struct bio { #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) -#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size) +#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) static inline unsigned int bio_cur_sectors(struct bio *bio) { -- cgit v1.2.2 From d628eaef310533767ce68664873869c2d7f78f09 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 9 Aug 2008 16:22:17 +0100 Subject: Fix up comments about matching flags between bio and rq Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- include/linux/blkdev.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index dbeb66f813ab..17f1fbdb31bf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -150,8 +150,8 @@ struct bio { * bit 3 -- fail fast, don't want low level driver retries * bit 4 -- synchronous I/O hint: the block layer will unplug immediately */ -#define BIO_RW 0 -#define BIO_RW_AHEAD 1 +#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ +#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ #define BIO_RW_BARRIER 2 #define BIO_RW_FAILFAST 3 #define BIO_RW_SYNC 4 diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53ea933cf60b..e0ba018f5e88 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -84,7 +84,7 @@ enum { }; /* - * request type modified bits. first three bits match BIO_RW* bits, important + * request type modified bits. first two bits match BIO_RW* bits, important */ enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ -- cgit v1.2.2 From fb2dce862d9f9a68e6b9374579056ec9eca02a63 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Aug 2008 18:01:53 +0100 Subject: Add 'discard' request handling Some block devices benefit from a hint that they can forget the contents of certain sectors. Add basic support for this to the block core, along with a 'blkdev_issue_discard()' helper function which issues such requests. The caller doesn't get to provide an end_io functio, since blkdev_issue_discard() will automatically split the request up into multiple bios if appropriate. Neither does the function wait for completion -- it's expected that callers won't care about when, or even _if_, the request completes. It's only a hint to the device anyway. By definition, the file system doesn't _care_ about these sectors any more. [With feedback from OGAWA Hirofumi and Jens Axboe Signed-off-by: Jens Axboe --- include/linux/bio.h | 8 ++++++-- include/linux/blkdev.h | 16 ++++++++++++++++ include/linux/fs.h | 3 ++- 3 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 17f1fbdb31bf..1fdfc5621c83 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -149,6 +149,8 @@ struct bio { * bit 2 -- barrier * bit 3 -- fail fast, don't want low level driver retries * bit 4 -- synchronous I/O hint: the block layer will unplug immediately + * bit 5 -- metadata request + * bit 6 -- discard sectors */ #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ @@ -156,6 +158,7 @@ struct bio { #define BIO_RW_FAILFAST 3 #define BIO_RW_SYNC 4 #define BIO_RW_META 5 +#define BIO_RW_DISCARD 6 /* * upper 16 bits of bi_rw define the io priority of this bio @@ -186,13 +189,14 @@ struct bio { #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) +#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) static inline unsigned int bio_cur_sectors(struct bio *bio) { if (bio->bi_vcnt) return bio_iovec(bio)->bv_len >> 9; - - return 0; + else /* dataless requests such as discard */ + return bio->bi_size >> 9; } static inline void *bio_data(struct bio *bio) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e0ba018f5e88..26ececbbebe2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -89,6 +89,7 @@ enum { enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ __REQ_FAILFAST, /* no low level driver retries */ + __REQ_DISCARD, /* request to discard sectors */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ __REQ_HARDBARRIER, /* may not be passed by drive either */ @@ -111,6 +112,7 @@ enum rq_flag_bits { }; #define REQ_RW (1 << __REQ_RW) +#define REQ_DISCARD (1 << __REQ_DISCARD) #define REQ_FAILFAST (1 << __REQ_FAILFAST) #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) @@ -252,6 +254,7 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); +typedef int (prepare_discard_fn) (struct request_queue *, struct request *); struct bio_vec; struct bvec_merge_data { @@ -307,6 +310,7 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unplug_fn *unplug_fn; + prepare_discard_fn *prepare_discard_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; @@ -546,6 +550,7 @@ enum { #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) +#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) /* rq->queuelist of dequeued request must be list_empty() */ @@ -796,6 +801,7 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); +extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); extern int blk_do_ordered(struct request_queue *, struct request **); @@ -837,6 +843,16 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } extern int blkdev_issue_flush(struct block_device *, sector_t *); +extern int blkdev_issue_discard(struct block_device *, sector_t sector, + unsigned nr_sects); + +static inline int sb_issue_discard(struct super_block *sb, + sector_t block, unsigned nr_blocks) +{ + block <<= (sb->s_blocksize_bits - 9); + nr_blocks <<= (sb->s_blocksize_bits - 9); + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks); +} /* * command filter functions diff --git a/include/linux/fs.h b/include/linux/fs.h index 580b513668fe..eb0131319134 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -86,7 +86,8 @@ extern int dir_notify_enable; #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) -#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) +#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) +#define WRITE_DISCARD (WRITE | (1 << BIO_RW_DISCARD)) #define SEL_IN 1 #define SEL_OUT 2 -- cgit v1.2.2 From eae9acd13a8d14b50c00a961fa959606f34bbd92 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Aug 2008 18:08:25 +0100 Subject: Support 'discard sectors' operation in translation layer support core Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + include/linux/mtd/blktrans.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 26ececbbebe2..727886d25c4e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -81,6 +81,7 @@ enum { */ REQ_LB_OP_EJECT = 0x40, /* eject request */ REQ_LB_OP_FLUSH = 0x41, /* flush device */ + REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; /* diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h index 310e61606415..8b4aa0523db7 100644 --- a/include/linux/mtd/blktrans.h +++ b/include/linux/mtd/blktrans.h @@ -41,6 +41,8 @@ struct mtd_blktrans_ops { unsigned long block, char *buffer); int (*writesect)(struct mtd_blktrans_dev *dev, unsigned long block, char *buffer); + int (*discard)(struct mtd_blktrans_dev *dev, + unsigned long block, unsigned nr_blocks); /* Block layer ioctls */ int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo); -- cgit v1.2.2 From 27b29e86bf3d4b3cf6641a0efd78ed11a9b633b2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 Aug 2008 11:21:57 +0100 Subject: blktrace: support discard requests Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index d084b8d227a5..27da2cc682ee 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -21,6 +21,7 @@ enum blktrace_cat { BLK_TC_NOTIFY = 1 << 10, /* special message */ BLK_TC_AHEAD = 1 << 11, /* readahead */ BLK_TC_META = 1 << 12, /* metadata */ + BLK_TC_DISCARD = 1 << 13, /* discard requests */ BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ }; @@ -195,6 +196,9 @@ static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (likely(!bt)) return; + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + if (blk_pc_request(rq)) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); -- cgit v1.2.2 From d30a2605be9d5132d95944916e8f578fcfe4f976 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 11 Aug 2008 15:58:42 +0100 Subject: Add BLKDISCARD ioctl to allow userspace to discard sectors We may well want mkfs tools to use this to mark the whole device as unwanted before they format it, for example. The ioctl takes a pair of uint64_ts, which are start offset and length in _bytes_. Although at the moment it might make sense for them both to be in 512-byte sectors, I don't want to limit the ABI to that. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index eb0131319134..88358ca6af25 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -223,6 +223,7 @@ extern int dir_notify_enable; #define BLKTRACESTART _IO(0x12,116) #define BLKTRACESTOP _IO(0x12,117) #define BLKTRACETEARDOWN _IO(0x12,118) +#define BLKDISCARD _IO(0x12,119) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.2.2 From e17fc0a1ccf88f6d4dcb363729f3141b0958c325 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 9 Aug 2008 16:42:20 +0100 Subject: Allow elevators to sort/merge discard requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit But blkdev_issue_discard() still emits requests which are interpreted as soft barriers, because naïve callers might otherwise issue subsequent writes to those same sectors, which might cross on the queue (if they're reallocated quickly enough). Callers still _can_ issue non-barrier discard requests, but they have to take care of queue ordering for themselves. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- include/linux/blkdev.h | 5 +++-- include/linux/fs.h | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1fdfc5621c83..33c3947d61e9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -188,8 +188,8 @@ struct bio { #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) -#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) #define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) +#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) static inline unsigned int bio_cur_sectors(struct bio *bio) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 727886d25c4e..e9eb35c9bf26 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -541,7 +541,7 @@ enum { #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) -#define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) +#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) @@ -598,7 +598,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw) #define RQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) #define rq_mergeable(rq) \ - (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) + (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ + (blk_discard_rq(rq) || blk_fs_request((rq)))) /* * q->prep_rq_fn return values diff --git a/include/linux/fs.h b/include/linux/fs.h index 88358ca6af25..860689f541b1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -87,7 +87,8 @@ extern int dir_notify_enable; #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) -#define WRITE_DISCARD (WRITE | (1 << BIO_RW_DISCARD)) +#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) +#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) #define SEL_IN 1 #define SEL_OUT 2 -- cgit v1.2.2 From 1a8e2bddd5c29008f311613e75925fecbf522c5b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 13 Aug 2008 12:35:09 +0100 Subject: Kill REQ_TYPE_FLUSH It was only used by ps3disk, and it should probably have been REQ_TYPE_LINUX_BLOCK + REQ_LB_OP_FLUSH. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e9eb35c9bf26..f131776f029e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -54,7 +54,6 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_FLUSH, /* flush request */ REQ_TYPE_SPECIAL, /* driver defined type */ REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ /* @@ -76,11 +75,8 @@ enum rq_cmd_type_bits { * */ enum { - /* - * just examples for now - */ REQ_LB_OP_EJECT = 0x40, /* eject request */ - REQ_LB_OP_FLUSH = 0x41, /* flush device */ + REQ_LB_OP_FLUSH = 0x41, /* flush request */ REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; -- cgit v1.2.2 From 766ca4428d1239a970926856c447310c9c191af2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Thu, 14 Aug 2008 09:59:13 +0200 Subject: virtio_blk: use a wrapper function to access io context information of IO requests struct request has an ioprio member but it is never updated because currently bios do not hold io context information. The implication of this is that virtio_blk ends up passing useless information to the backend driver. That said, some IO schedulers such as CFQ do store io context information in struct request, but use private members for that, which means that that information cannot be directly accessed in a IO scheduler-independent way. This patch adds a function to obtain the ioprio of a request. We should avoid accessing ioprio directly and use this function instead, so that its users do not have to care about future changes in block layer structures or what the currently active IO controller is. This patch does not introduce any functional changes but paves the way for future clean-ups and enhancements. Signed-off-by: Fernando Luis Vazquez Cao Acked-by: Rusty Russell Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f131776f029e..490ce458b031 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -232,6 +232,11 @@ struct request { struct request *next_rq; }; +static inline unsigned short req_get_ioprio(struct request *req) +{ + return req->ioprio; +} + /* * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME * requests. Some step values could eventually be made generic. -- cgit v1.2.2 From b8b3e16cfe6435d961f6aaebcfd52a1ff2a988c5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 15 Aug 2008 10:15:19 +0200 Subject: block: drop virtual merging accounting Remove virtual merge accounting. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- include/linux/bio.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 33c3947d61e9..894d16ce0020 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -26,21 +26,8 @@ #ifdef CONFIG_BLOCK -/* Platforms may set this to teach the BIO layer about IOMMU hardware. */ #include -#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY) -#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1)) -#define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE) -#else -#define BIOVEC_VIRT_START_SIZE(x) 0 -#define BIOVEC_VIRT_OVERSIZE(x) 0 -#endif - -#ifndef BIO_VMERGE_BOUNDARY -#define BIO_VMERGE_BOUNDARY 0 -#endif - #define BIO_DEBUG #ifdef BIO_DEBUG @@ -240,8 +227,6 @@ static inline void *bio_data(struct bio *bio) ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) #endif -#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \ - ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0) #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ (((addr1) | (mask)) == (((addr2) - 1) | (mask))) #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ -- cgit v1.2.2 From 5df97b91b5d7ed426034fcc84cb6e7cf682b8838 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 15 Aug 2008 10:20:02 +0200 Subject: drop vmerge accounting Remove hw_segments field from struct bio and struct request. Without virtual merge accounting they have no purpose. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- include/linux/bio.h | 16 +--------------- include/linux/blkdev.h | 7 ------- 2 files changed, 1 insertion(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 894d16ce0020..dfc3556d311c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -77,21 +77,8 @@ struct bio { */ unsigned short bi_phys_segments; - /* Number of segments after physical and DMA remapping - * hardware coalescing is performed. - */ - unsigned short bi_hw_segments; - unsigned int bi_size; /* residual I/O count */ - /* - * To keep track of the max hw size, we account for the - * sizes of the first and last virtually mergeable segments - * in this bio - */ - unsigned int bi_hw_front_size; - unsigned int bi_hw_back_size; - unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ struct bio_vec *bi_io_vec; /* the actual vec list */ @@ -113,7 +100,7 @@ struct bio { #define BIO_UPTODATE 0 /* ok after I/O completion */ #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ #define BIO_EOF 2 /* out-out-bounds error */ -#define BIO_SEG_VALID 3 /* nr_hw_seg valid */ +#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ #define BIO_CLONED 4 /* doesn't own data */ #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ @@ -324,7 +311,6 @@ extern void bio_free(struct bio *, struct bio_set *); extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); -extern int bio_hw_segments(struct request_queue *, struct bio *); extern void __bio_clone(struct bio *, struct bio *); extern struct bio *bio_clone(struct bio *, gfp_t); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 490ce458b031..1adb03827bd3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -189,13 +189,6 @@ struct request { */ unsigned short nr_phys_segments; - /* Number of scatter-gather addr+len pairs after - * physical and DMA remapping hardware coalescing is performed. - * This is the number of scatter-gather entries the driver - * will actually have to deal with after DMA mapping is done. - */ - unsigned short nr_hw_segments; - unsigned short ioprio; void *special; -- cgit v1.2.2 From 5b99c2ffa980528a197f26c7d876cceeccce8dd5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Aug 2008 10:56:11 +0200 Subject: block: make bi_phys_segments an unsigned int instead of short raid5 can overflow with more than 255 stripes, and we can increase it to an int for free on both 32 and 64-bit archs due to the padding. Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index dfc3556d311c..2c0c09034fd2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -75,7 +75,7 @@ struct bio { /* Number of segments in this BIO after * physical address coalescing is performed. */ - unsigned short bi_phys_segments; + unsigned int bi_phys_segments; unsigned int bi_size; /* residual I/O count */ -- cgit v1.2.2 From a1ed5b0cffe4b16a93a6a3390e8cee0fbef94f86 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:50:16 +0200 Subject: klist: don't iterate over deleted entries A klist entry is kept on the list till all its current iterations are finished; however, a new iteration after deletion also iterates over deleted entries as long as their reference count stays above zero. This causes problems for cases where there are users which iterate over the list while synchronized against list manipulations and natuarally expect already deleted entries to not show up during iteration. This patch implements dead flag which gets set on deletion so that iteration can skip already deleted entries. The dead flag piggy backs on the lowest bit of knode->n_klist and only visible to klist implementation proper. While at it, drop klist_iter->i_head as it's redundant and doesn't offer anything in semantics or performance wise as klist_iter->i_klist is dereferenced on every iteration anyway. Signed-off-by: Tejun Heo Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/klist.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/klist.h b/include/linux/klist.h index 06c338ef7f1b..8ea98db223e5 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -38,7 +38,7 @@ extern void klist_init(struct klist *k, void (*get)(struct klist_node *), void (*put)(struct klist_node *)); struct klist_node { - struct klist *n_klist; + void *n_klist; /* never access directly */ struct list_head n_node; struct kref n_ref; struct completion n_removed; @@ -57,7 +57,6 @@ extern int klist_node_attached(struct klist_node *n); struct klist_iter { struct klist *i_klist; - struct list_head *i_head; struct klist_node *i_cur; }; -- cgit v1.2.2 From 5a3ceb861663040f9ef0176df4aaa494bba5e352 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:50:19 +0200 Subject: driver-core: use klist for class device list and implement iterator Iterating over entries using callback usually isn't too fun especially when the entry being iterated over can't be manipulated freely. This patch converts class->p->class_devices to klist and implements class device iterator so that the users can freely build their own control structure. The users are also free to call back into class code without worrying about locking. class_for_each_device() and class_find_device() are converted to use the new iterators, so their users don't have to worry about locking anymore either. Note: This depends on klist-dont-iterate-over-deleted-entries patch because class_intf->add/remove_dev() depends on proper synchronization with device removal. Signed-off-by: Tejun Heo Cc: Greg Kroah-Hartman Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/device.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 4d8372d135df..246937c9cbc7 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -199,6 +199,11 @@ struct class { struct class_private *p; }; +struct class_dev_iter { + struct klist_iter ki; + const struct device_type *type; +}; + extern struct kobject *sysfs_dev_block_kobj; extern struct kobject *sysfs_dev_char_kobj; extern int __must_check __class_register(struct class *class, @@ -213,6 +218,13 @@ extern void class_unregister(struct class *class); __class_register(class, &__key); \ }) +extern void class_dev_iter_init(struct class_dev_iter *iter, + struct class *class, + struct device *start, + const struct device_type *type); +extern struct device *class_dev_iter_next(struct class_dev_iter *iter); +extern void class_dev_iter_exit(struct class_dev_iter *iter); + extern int class_for_each_device(struct class *class, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); @@ -396,7 +408,7 @@ struct device { spinlock_t devres_lock; struct list_head devres_head; - struct list_head node; + struct klist_node knode_class; struct class *class; dev_t devt; /* dev_t, creates the sysfs "dev" */ struct attribute_group **groups; /* optional groups */ -- cgit v1.2.2 From 310a2c1012934f590192377f65940cad4aa72b15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:17 +0900 Subject: block: misc updates This patch makes the following misc updates in preparation for disk->part dereference fix and extended block devt support. * implment part_to_disk() * fix comment about gendisk->part indexing * rename get_part() to disk_map_sector() * don't use n which is always zero while printing disk information in diskstats_show() Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index be4f5e5bfe06..c64e659c9843 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -116,7 +116,7 @@ struct gendisk { int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ char disk_name[32]; /* name of major driver */ - struct hd_struct **part; /* [indexed by minor] */ + struct hd_struct **part; /* [indexed by minor - 1] */ struct block_device_operations *fops; struct request_queue *queue; void *private_data; @@ -145,14 +145,21 @@ struct gendisk { #endif }; +static inline struct gendisk *part_to_disk(struct hd_struct *part) +{ + if (likely(part)) + return dev_to_disk((part)->dev.parent); + return NULL; +} + /* * Macros to operate on percpu disk statistics: * * The __ variants should only be called in critical sections. The full * variants disable/enable preemption. */ -static inline struct hd_struct *get_part(struct gendisk *gendiskp, - sector_t sector) +static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, + sector_t sector) { struct hd_struct *part; int i; -- cgit v1.2.2 From cf771cb5a7b716f3f9e532fd42a1e3a0a75adec5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:01:09 +0200 Subject: block: make variable and argument names more consistent In hd_struct, @partno is used to denote partition number and a number of other places use @part to denote hd_struct. Functions use @part and @index instead. This causes confusion and makes it difficult to use consistent variable names for hd_struct. Always use @partno if a variable represents partition number. Also, print out functions use @f or @part for seq_file argument. Use @seqf uniformly instead. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c64e659c9843..d1723c0a8600 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -365,7 +365,7 @@ extern int get_blkdev_list(char *, int); extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern void unlink_gendisk(struct gendisk *gp); -extern struct gendisk *get_gendisk(dev_t dev, int *part); +extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); @@ -534,8 +534,8 @@ struct unixware_disklabel { #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 -extern dev_t blk_lookup_devt(const char *name, int part); -extern char *disk_name (struct gendisk *hd, int part, char *buf); +extern dev_t blk_lookup_devt(const char *name, int partno); +extern char *disk_name (struct gendisk *hd, int partno, char *buf); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); @@ -553,16 +553,16 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); -static inline struct block_device *bdget_disk(struct gendisk *disk, int index) +static inline struct block_device *bdget_disk(struct gendisk *disk, int partno) { - return bdget(MKDEV(disk->major, disk->first_minor) + index); + return bdget(MKDEV(disk->major, disk->first_minor) + partno); } #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -static inline dev_t blk_lookup_devt(const char *name, int part) +static inline dev_t blk_lookup_devt(const char *name, int partno) { dev_t devt = MKDEV(0, 0); return devt; -- cgit v1.2.2 From f331c0296f2a9fee0d396a70598b954062603015 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:01:48 +0200 Subject: block: don't depend on consecutive minor space * Implement disk_devt() and part_devt() and use them to directly access devt instead of computing it from ->major and ->first_minor. Note that all references to ->major and ->first_minor outside of block layer is used to determine devt of the disk (the part0) and as ->major and ->first_minor will continue to represent devt for the disk, converting these users aren't strictly necessary. However, convert them for consistency. * Implement disk_max_parts() to avoid directly deferencing genhd->minors. * Update bdget_disk() such that it doesn't assume consecutive minor space. * Move devt computation from register_disk() to add_disk() and make it the only one (all other usages use the initially determined value). These changes clean up the code and will help disk->part dereference fix and extended block device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index d1723c0a8600..0ff75329199c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -111,10 +111,14 @@ struct hd_struct { #define GENHD_FL_FAIL 64 struct gendisk { + /* major, first_minor and minors are input parameters only, + * don't use directly. Use disk_devt() and disk_max_parts(). + */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ + char disk_name[32]; /* name of major driver */ struct hd_struct **part; /* [indexed by minor - 1] */ struct block_device_operations *fops; @@ -152,6 +156,21 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) return NULL; } +static inline int disk_max_parts(struct gendisk *disk) +{ + return disk->minors - 1; +} + +static inline dev_t disk_devt(struct gendisk *disk) +{ + return disk->dev.devt; +} + +static inline dev_t part_devt(struct hd_struct *part) +{ + return part->dev.devt; +} + /* * Macros to operate on percpu disk statistics: * @@ -163,7 +182,7 @@ static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, { struct hd_struct *part; int i; - for (i = 0; i < gendiskp->minors - 1; i++) { + for (i = 0; i < disk_max_parts(gendiskp); i++) { part = gendiskp->part[i]; if (part && part->start_sect <= sector && sector < part->start_sect + part->nr_sects) @@ -366,6 +385,7 @@ extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern void unlink_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); +extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); @@ -553,11 +573,6 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); -static inline struct block_device *bdget_disk(struct gendisk *disk, int partno) -{ - return bdget(MKDEV(disk->major, disk->first_minor) + partno); -} - #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -- cgit v1.2.2 From e71bf0d0ee89e51b92776391c5634938236977d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:03:02 +0200 Subject: block: fix disk->part[] dereferencing race disk->part[] is protected by its matching bdev's lock. However, non-critical accesses like collecting stats and printing out sysfs and proc information used to be performed without any locking. As partitions can come and go dynamically, partitions can go away underneath those non-critical accesses. As some of those accesses are writes, this theoretically can lead to silent corruption. This patch fixes the race by using RCU for the partition array and dev reference counter to hold partitions. * Rename disk->part[] to disk->__part[] to make sure no one outside genhd layer proper accesses it directly. * Use RCU for disk->__part[] dereferencing. * Implement disk_{get|put}_part() which can be used to get and put partitions from gendisk respectively. * Iterators are implemented to help iterate through all partitions safely. * Functions which require RCU readlock are marked with _rcu suffix. * Use disk_put_part() in __blkdev_put() instead of directly putting the contained kobject. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 53 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0ff75329199c..7fbba19e076b 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -11,6 +11,7 @@ #include #include +#include #ifdef CONFIG_BLOCK @@ -100,6 +101,7 @@ struct hd_struct { #else struct disk_stats dkstats; #endif + struct rcu_head rcu_head; }; #define GENHD_FL_REMOVABLE 1 @@ -120,7 +122,14 @@ struct gendisk { * disks that can't be partitioned. */ char disk_name[32]; /* name of major driver */ - struct hd_struct **part; /* [indexed by minor - 1] */ + + /* Array of pointers to partitions indexed by partno - 1. + * Protected with matching bdev lock but stat and other + * non-critical accesses use RCU. Always access through + * helpers. + */ + struct hd_struct **__part; + struct block_device_operations *fops; struct request_queue *queue; void *private_data; @@ -171,25 +180,41 @@ static inline dev_t part_devt(struct hd_struct *part) return part->dev.devt; } +extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); + +static inline void disk_put_part(struct hd_struct *part) +{ + if (likely(part)) + put_device(&part->dev); +} + +/* + * Smarter partition iterator without context limits. + */ +#define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ +#define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ + +struct disk_part_iter { + struct gendisk *disk; + struct hd_struct *part; + int idx; + unsigned int flags; +}; + +extern void disk_part_iter_init(struct disk_part_iter *piter, + struct gendisk *disk, unsigned int flags); +extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); +extern void disk_part_iter_exit(struct disk_part_iter *piter); + +extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, + sector_t sector); + /* * Macros to operate on percpu disk statistics: * * The __ variants should only be called in critical sections. The full * variants disable/enable preemption. */ -static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, - sector_t sector) -{ - struct hd_struct *part; - int i; - for (i = 0; i < disk_max_parts(gendiskp); i++) { - part = gendiskp->part[i]; - if (part && part->start_sect <= sector - && sector < part->start_sect + part->nr_sects) - return part; - } - return NULL; -} #ifdef CONFIG_SMP #define __disk_stat_add(gendiskp, field, addnd) \ -- cgit v1.2.2 From c9959059161ddd7bf4670cf47367033d6b2f79c4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:21 +0900 Subject: block: fix diskstats access There are two variants of stat functions - ones prefixed with double underbars which don't care about preemption and ones without which disable preemption before manipulating per-cpu counters. It's unclear whether the underbarred ones assume that preemtion is disabled on entry as some callers don't do that. This patch unifies diskstats access by implementing disk_stat_lock() and disk_stat_unlock() which take care of both RCU (for partition access) and preemption (for per-cpu counter access). diskstats access should always be enclosed between the two functions. As such, there's no need for the versions which disables preemption. They're removed and double underbars ones are renamed to drop the underbars. As an extra argument is added, there's no danger of using the old version unconverted. disk_stat_lock() uses get_cpu() and returns the cpu index and all diskstat functions which access per-cpu counters now has @cpu argument to help RT. This change adds RCU or preemption operations at some places but also collapses several preemption ops into one at others. Overall, the performance difference should be negligible as all involved ops are very lightweight per-cpu ones. Signed-off-by: Tejun Heo Cc: Peter Zijlstra Signed-off-by: Jens Axboe --- include/linux/genhd.h | 139 +++++++++++++++++++++----------------------------- 1 file changed, 57 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7fbba19e076b..ac8a901f2002 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -209,16 +209,24 @@ extern void disk_part_iter_exit(struct disk_part_iter *piter); extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); -/* +/* * Macros to operate on percpu disk statistics: * - * The __ variants should only be called in critical sections. The full - * variants disable/enable preemption. + * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters + * and should be called between disk_stat_lock() and + * disk_stat_unlock(). + * + * part_stat_read() can be called at any time. + * + * part_stat_{add|set_all}() and {init|free}_part_stats are for + * internal use only. */ - #ifdef CONFIG_SMP -#define __disk_stat_add(gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) +#define disk_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define disk_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) + +#define disk_stat_add(cpu, gendiskp, field, addnd) \ + (per_cpu_ptr(gendiskp->dkstats, cpu)->field += addnd) #define disk_stat_read(gendiskp, field) \ ({ \ @@ -229,7 +237,8 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, res; \ }) -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { +static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +{ int i; for_each_possible_cpu(i) @@ -237,14 +246,14 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { sizeof(struct disk_stats)); } -#define __part_stat_add(part, field, addnd) \ - (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd) +#define part_stat_add(cpu, part, field, addnd) \ + (per_cpu_ptr(part->dkstats, cpu)->field += addnd) -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - __part_stat_add(part, field, addnd); \ - __disk_stat_add(gendiskp, field, addnd); \ +#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ +({ \ + if (part) \ + part_stat_add(cpu, part, field, addnd); \ + disk_stat_add(cpu, gendiskp, field, addnd); \ }) #define part_stat_read(part, field) \ @@ -264,10 +273,13 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) memset(per_cpu_ptr(part->dkstats, i), value, sizeof(struct disk_stats)); } - + #else /* !CONFIG_SMP */ -#define __disk_stat_add(gendiskp, field, addnd) \ - (gendiskp->dkstats.field += addnd) +#define disk_stat_lock() ({ rcu_read_lock(); 0; }) +#define disk_stat_unlock() rcu_read_unlock() + +#define disk_stat_add(cpu, gendiskp, field, addnd) \ + (gendiskp->dkstats.field += addnd) #define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) @@ -275,14 +287,14 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); } -#define __part_stat_add(part, field, addnd) \ +#define part_stat_add(cpu, part, field, addnd) \ (part->dkstats.field += addnd) -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part->dkstats.field += addnd; \ - __disk_stat_add(gendiskp, field, addnd); \ +#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ +({ \ + if (part) \ + part_stat_add(cpu, part, field, addnd); \ + disk_stat_add(cpu, gendiskp, field, addnd); \ }) #define part_stat_read(part, field) (part->dkstats.field) @@ -294,63 +306,26 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) #endif /* CONFIG_SMP */ -#define disk_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __disk_stat_add(gendiskp, field, addnd); \ - preempt_enable(); \ - } while (0) - -#define __disk_stat_dec(gendiskp, field) __disk_stat_add(gendiskp, field, -1) -#define disk_stat_dec(gendiskp, field) disk_stat_add(gendiskp, field, -1) - -#define __disk_stat_inc(gendiskp, field) __disk_stat_add(gendiskp, field, 1) -#define disk_stat_inc(gendiskp, field) disk_stat_add(gendiskp, field, 1) - -#define __disk_stat_sub(gendiskp, field, subnd) \ - __disk_stat_add(gendiskp, field, -subnd) -#define disk_stat_sub(gendiskp, field, subnd) \ - disk_stat_add(gendiskp, field, -subnd) - -#define part_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __part_stat_add(gendiskp, field, addnd);\ - preempt_enable(); \ - } while (0) - -#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1) -#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1) - -#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1) -#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1) - -#define __part_stat_sub(gendiskp, field, subnd) \ - __part_stat_add(gendiskp, field, -subnd) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) - -#define all_stat_add(gendiskp, part, field, addnd, sector) \ - do { \ - preempt_disable(); \ - __all_stat_add(gendiskp, part, field, addnd, sector); \ - preempt_enable(); \ - } while (0) - -#define __all_stat_dec(gendiskp, field, sector) \ - __all_stat_add(gendiskp, field, -1, sector) -#define all_stat_dec(gendiskp, field, sector) \ - all_stat_add(gendiskp, field, -1, sector) - -#define __all_stat_inc(gendiskp, part, field, sector) \ - __all_stat_add(gendiskp, part, field, 1, sector) -#define all_stat_inc(gendiskp, part, field, sector) \ - all_stat_add(gendiskp, part, field, 1, sector) - -#define __all_stat_sub(gendiskp, part, field, subnd, sector) \ - __all_stat_add(gendiskp, part, field, -subnd, sector) -#define all_stat_sub(gendiskp, part, field, subnd, sector) \ - all_stat_add(gendiskp, part, field, -subnd, sector) +#define disk_stat_dec(cpu, gendiskp, field) \ + disk_stat_add(cpu, gendiskp, field, -1) +#define disk_stat_inc(cpu, gendiskp, field) \ + disk_stat_add(cpu, gendiskp, field, 1) +#define disk_stat_sub(cpu, gendiskp, field, subnd) \ + disk_stat_add(cpu, gendiskp, field, -subnd) + +#define part_stat_dec(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, -1) +#define part_stat_inc(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, 1) +#define part_stat_sub(cpu, gendiskp, field, subnd) \ + part_stat_add(cpu, gendiskp, field, -subnd) + +#define all_stat_dec(cpu, gendiskp, field, sector) \ + all_stat_add(cpu, gendiskp, field, -1, sector) +#define all_stat_inc(cpu, gendiskp, part, field, sector) \ + all_stat_add(cpu, gendiskp, part, field, 1, sector) +#define all_stat_sub(cpu, gendiskp, part, field, subnd, sector) \ + all_stat_add(cpu, gendiskp, part, field, -subnd, sector) /* Inlines to alloc and free disk stats in struct gendisk */ #ifdef CONFIG_SMP @@ -401,8 +376,8 @@ static inline void free_part_stats(struct hd_struct *part) #endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -extern void disk_round_stats(struct gendisk *disk); -extern void part_round_stats(struct hd_struct *part); +extern void disk_round_stats(int cpu, struct gendisk *disk); +extern void part_round_stats(int cpu, struct hd_struct *part); /* drivers/block/genhd.c */ extern int get_blkdev_list(char *, int); -- cgit v1.2.2 From bcce3de1be61e424deef35d1e86e86a35c4b6e65 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:22 +0900 Subject: block: implement extended dev numbers Implement extended device numbers. A block driver can tell block layer that it wants to use extended device numbers. After the usual minor space is used up, block layer automatically allocates devt's from EXT_BLOCK_MAJOR. Currently only one major number is allocated for this but as the allocation is strictly on-demand, ~1mil minor space under it should suffice unless the system actually has more than ~1mil partitions and if that ever happens adding more majors to the extended devt area is easy. Due to internal implementation issues, the first partition can't be allocated on the extended area. In other words, genhd->minors should at least be 1. This limitation will be lifted by later changes. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 13 ++++++++++--- include/linux/major.h | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ac8a901f2002..6fc532424062 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -113,13 +113,15 @@ struct hd_struct { #define GENHD_FL_FAIL 64 struct gendisk { - /* major, first_minor and minors are input parameters only, - * don't use directly. Use disk_devt() and disk_max_parts(). + /* major, first_minor, minors and ext_minors are input + * parameters only, don't use directly. Use disk_devt() and + * disk_max_parts(). */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ + int ext_minors; /* number of extended dynamic minors */ char disk_name[32]; /* name of major driver */ @@ -167,7 +169,7 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) static inline int disk_max_parts(struct gendisk *disk) { - return disk->minors - 1; + return disk->minors + disk->ext_minors - 1; } static inline dev_t disk_devt(struct gendisk *disk) @@ -554,6 +556,8 @@ struct unixware_disklabel { #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 +extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt); +extern void blk_free_devt(dev_t devt); extern dev_t blk_lookup_devt(const char *name, int partno); extern char *disk_name (struct gendisk *hd, int partno, char *buf); @@ -564,6 +568,9 @@ extern void printk_all_partitions(void); extern struct gendisk *alloc_disk_node(int minors, int node_id); extern struct gendisk *alloc_disk(int minors); +extern struct gendisk *alloc_disk_ext_node(int minors, int ext_minrs, + int node_id); +extern struct gendisk *alloc_disk_ext(int minors, int ext_minors); extern struct kobject *get_disk(struct gendisk *disk); extern void put_disk(struct gendisk *disk); extern void blk_register_region(dev_t devt, unsigned long range, diff --git a/include/linux/major.h b/include/linux/major.h index 53d5fafd85c3..88249452b935 100644 --- a/include/linux/major.h +++ b/include/linux/major.h @@ -170,4 +170,6 @@ #define VIOTAPE_MAJOR 230 +#define BLOCK_EXT_MAJOR 259 + #endif -- cgit v1.2.2 From 1f0142905d4812966831613847db38a66da29eb8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:23 +0900 Subject: block: adjust formatting for large minors and add ext_range sysfs attr With extended minors and the soon-to-follow debug feature, large minor numbers for block devices will be common. This patch does the followings to make printouts pretty. * Adapt print formats such that large minors don't break the formatting. * For extended MAJ:MIN, %02x%02x for MAJ:MIN used in printk_all_partitions() doesn't cut it anymore. Update it such that %03x:%05x is used if either MAJ or MIN doesn't fit in %02x. * Implement ext_range sysfs attribute which shows total minors the device can use including both conventional minor space and the extended one. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 860689f541b1..02a9fb5a830c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1685,6 +1685,7 @@ extern void chrdev_show(struct seq_file *,off_t); /* fs/block_dev.c */ #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ +#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ #ifdef CONFIG_BLOCK #define BLKDEV_MAJOR_HASH_SIZE 255 -- cgit v1.2.2 From ed9e1982347b36573cd622ee5f4e2a7ccd79b3fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:05 +0900 Subject: block: implement and use {disk|part}_to_dev() Implement {disk|part}_to_dev() and use them to access generic device instead of directly dereferencing {disk|part}->dev. To make sure no user is left behind, rename generic devices fields to __dev. This is in preparation of unifying partition 0 handling with other partitions. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6fc532424062..e4e18c509ac5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -15,9 +15,11 @@ #ifdef CONFIG_BLOCK -#define kobj_to_dev(k) container_of(k, struct device, kobj) -#define dev_to_disk(device) container_of(device, struct gendisk, dev) -#define dev_to_part(device) container_of(device, struct hd_struct, dev) +#define kobj_to_dev(k) container_of((k), struct device, kobj) +#define dev_to_disk(device) container_of((device), struct gendisk, __dev) +#define dev_to_part(device) container_of((device), struct hd_struct, __dev) +#define disk_to_dev(disk) (&((disk)->__dev)) +#define part_to_dev(part) (&((part)->__dev)) extern struct device_type part_type; extern struct kobject *block_depr; @@ -88,7 +90,7 @@ struct disk_stats { struct hd_struct { sector_t start_sect; sector_t nr_sects; - struct device dev; + struct device __dev; struct kobject *holder_dir; int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -139,7 +141,7 @@ struct gendisk { int flags; struct device *driverfs_dev; // FIXME: remove - struct device dev; + struct device __dev; struct kobject *holder_dir; struct kobject *slave_dir; @@ -163,7 +165,7 @@ struct gendisk { static inline struct gendisk *part_to_disk(struct hd_struct *part) { if (likely(part)) - return dev_to_disk((part)->dev.parent); + return dev_to_disk(part_to_dev(part)->parent); return NULL; } @@ -174,12 +176,12 @@ static inline int disk_max_parts(struct gendisk *disk) static inline dev_t disk_devt(struct gendisk *disk) { - return disk->dev.devt; + return disk_to_dev(disk)->devt; } static inline dev_t part_devt(struct hd_struct *part) { - return part->dev.devt; + return part_to_dev(part)->devt; } extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); @@ -187,7 +189,7 @@ extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); static inline void disk_put_part(struct hd_struct *part) { if (likely(part)) - put_device(&part->dev); + put_device(part_to_dev(part)); } /* -- cgit v1.2.2 From b5d0b9df0ba5d9a044f3a21e7544f53d90bd1465 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:06:42 +0200 Subject: block: introduce partition 0 genhd and partition code handled disk and partitions separately. All information about the whole disk was in struct genhd and partitions in struct hd_struct. However, the whole disk (part0) and other partitions have a lot in common and the data structures end up having good number of common fields and thus separate code paths doing the same thing. Also, the partition array was indexed by partno - 1 which gets pretty confusing at times. This patch introduces partition 0 and makes the partition array indexed by partno. Following patches will unify the handling of disk and parts piece-by-piece. This patch also implements disk_partitionable() which tests whether a disk is partitionable. With coming dynamic partition array change, the most common usage of disk_max_parts() will be testing whether a disk is partitionable and the number of max partitions will become much less important. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e4e18c509ac5..9e866a2aee50 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -127,12 +127,13 @@ struct gendisk { char disk_name[32]; /* name of major driver */ - /* Array of pointers to partitions indexed by partno - 1. + /* Array of pointers to partitions indexed by partno. * Protected with matching bdev lock but stat and other * non-critical accesses use RCU. Always access through * helpers. */ struct hd_struct **__part; + struct hd_struct part0; struct block_device_operations *fops; struct request_queue *queue; @@ -171,7 +172,12 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) static inline int disk_max_parts(struct gendisk *disk) { - return disk->minors + disk->ext_minors - 1; + return disk->minors + disk->ext_minors; +} + +static inline bool disk_partitionable(struct gendisk *disk) +{ + return disk_max_parts(disk) > 1; } static inline dev_t disk_devt(struct gendisk *disk) @@ -197,6 +203,7 @@ static inline void disk_put_part(struct hd_struct *part) */ #define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ #define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ +#define DISK_PITER_INCL_PART0 (1 << 2) /* include partition 0 */ struct disk_part_iter { struct gendisk *disk; -- cgit v1.2.2 From 80795aefb76d10c5d698e60c7e7750b5330787da Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:07 +0900 Subject: block: move capacity from disk to part0 Move disk->capacity to part0->nr_sects and convert all users who directly accessed the field to use {get|set}_capacity(). This is done early to allow the __dev field to be moved. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9e866a2aee50..1cf828148ec6 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -138,7 +138,6 @@ struct gendisk { struct block_device_operations *fops; struct request_queue *queue; void *private_data; - sector_t capacity; int flags; struct device *driverfs_dev; // FIXME: remove @@ -411,11 +410,11 @@ static inline sector_t get_start_sect(struct block_device *bdev) } static inline sector_t get_capacity(struct gendisk *disk) { - return disk->capacity; + return disk->part0.nr_sects; } static inline void set_capacity(struct gendisk *disk, sector_t size) { - disk->capacity = size; + disk->part0.nr_sects = size; } #ifdef CONFIG_SOLARIS_X86_PARTITION -- cgit v1.2.2 From 548b10eb2959c96cef6fc29fc96e0931eeb53bc5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 29 Aug 2008 09:01:47 +0200 Subject: block: move __dev from disk to part0 Move disk->__dev to part0->__dev. This simplifies bdget_disk() and lookup_devt() and allows common sysfs attributes to be unified. part_to_disk() is updated to handle part0 -> disk. Updated to include a fix from Bartlomiej Zolnierkiewicz , he writes: "part0 is a "special" partition and doesn't need to have capacity set - this fixes regression caused by "block: move __dev from disk to part0" commit." Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 1cf828148ec6..ff293ec8b3f7 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -16,9 +16,9 @@ #ifdef CONFIG_BLOCK #define kobj_to_dev(k) container_of((k), struct device, kobj) -#define dev_to_disk(device) container_of((device), struct gendisk, __dev) +#define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev) #define dev_to_part(device) container_of((device), struct hd_struct, __dev) -#define disk_to_dev(disk) (&((disk)->__dev)) +#define disk_to_dev(disk) (&(disk)->part0.__dev) #define part_to_dev(part) (&((part)->__dev)) extern struct device_type part_type; @@ -141,7 +141,6 @@ struct gendisk { int flags; struct device *driverfs_dev; // FIXME: remove - struct device __dev; struct kobject *holder_dir; struct kobject *slave_dir; @@ -164,8 +163,12 @@ struct gendisk { static inline struct gendisk *part_to_disk(struct hd_struct *part) { - if (likely(part)) - return dev_to_disk(part_to_dev(part)->parent); + if (likely(part)) { + if (part->partno) + return dev_to_disk(part_to_dev(part)->parent); + else + return dev_to_disk(part_to_dev(part)); + } return NULL; } -- cgit v1.2.2 From e56105214943ce5f0901d20e972a7cfd0d1d0656 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:09 +0900 Subject: block: unify sysfs size node handling Now that capacity and __dev are moved to part0, part0 and others can share the same method. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ff293ec8b3f7..9cb8380cf0eb 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -591,6 +591,9 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); +extern ssize_t part_size_show(struct device *dev, + struct device_attribute *attr, char *buf); + #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -- cgit v1.2.2 From b7db9956e57c8151b930d5e5fe5c766e6aad3ff7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:10 +0900 Subject: block: move policy from disk to part0 Move disk->policy to part0->policy. Implement and use get_disk_ro(). Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9cb8380cf0eb..4411bdd671dd 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -145,7 +145,6 @@ struct gendisk { struct kobject *slave_dir; struct timer_rand_state *random; - int policy; atomic_t sync_io; /* RAID */ unsigned long stamp; @@ -403,6 +402,11 @@ extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); +static inline int get_disk_ro(struct gendisk *disk) +{ + return disk->part0.policy; +} + /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk); extern void rand_initialize_disk(struct gendisk *disk); -- cgit v1.2.2 From 4c46501d1659475dc6c89554af6ce7fe6ecf615c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:11 +0900 Subject: block: move holder_dir from disk to part0 Move disk->holder_dir to part0->holder_dir. Kill now mostly superflous bdev_get_holder(). While at it, kill superflous kobject_get/put() around holder_dir, slave_dir and cmd_filter creation and collapse disk_sysfs_add_subdirs() into register_disk(). These serve no purpose but obfuscating the code. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 4411bdd671dd..2c0e1b597ab4 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -141,7 +141,6 @@ struct gendisk { int flags; struct device *driverfs_dev; // FIXME: remove - struct kobject *holder_dir; struct kobject *slave_dir; struct timer_rand_state *random; -- cgit v1.2.2 From 0762b8bde9729f10f8e6249809660ff2ec3ad735 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:12 +0900 Subject: block: always set bdev->bd_part Till now, bdev->bd_part is set only if the bdev was for parts other than part0. This patch makes bdev->bd_part always set so that code paths don't have to differenciate common handling. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 2c0e1b597ab4..45a3682b5d87 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -412,7 +412,7 @@ extern void rand_initialize_disk(struct gendisk *disk); static inline sector_t get_start_sect(struct block_device *bdev) { - return bdev->bd_contains == bdev ? 0 : bdev->bd_part->start_sect; + return bdev->bd_part->start_sect; } static inline sector_t get_capacity(struct gendisk *disk) { -- cgit v1.2.2 From eddb2e26b5ee3c5da68ba4bf1921ba20e2097bff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:13 +0900 Subject: block: kill GENHD_FL_FAIL and use part0->make_it_fail GENHD_FL_FAIL for disk is what make_it_fail is for parts. Kill it and use part0->make_it_fail. Sysfs node handling is unified too. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 45a3682b5d87..3d15b42dc352 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -112,7 +112,6 @@ struct hd_struct { #define GENHD_FL_CD 8 #define GENHD_FL_UP 16 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 -#define GENHD_FL_FAIL 64 struct gendisk { /* major, first_minor, minors and ext_minors are input @@ -596,6 +595,13 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); +#ifdef CONFIG_FAIL_MAKE_REQUEST +extern ssize_t part_fail_show(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t part_fail_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count); +#endif /* CONFIG_FAIL_MAKE_REQUEST */ #else /* CONFIG_BLOCK */ -- cgit v1.2.2 From 074a7aca7afa6f230104e8e65eba3420263714a5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:14 +0900 Subject: block: move stats from disk to part0 Move stats related fields - stamp, in_flight, dkstats - from disk to part0 and unify stat handling such that... * part_stat_*() now updates part0 together if the specified partition is not part0. ie. part_stat_*() are now essentially all_stat_*(). * {disk|all}_stat_*() are gone. * part_round_stats() is updated similary. It handles part0 stats automatically and disk_round_stats() is killed. * part_{inc|dec}_in_fligh() is implemented which automatically updates part0 stats for parts other than part0. * disk_map_sector_rcu() is updated to return part0 if no part matches. Combined with the above changes, this makes NULL special case handling in callers unnecessary. * Separate stats show code paths for disk are collapsed into part stats show code paths. * Rename disk_stat_lock/unlock() to part_stat_lock/unlock() While at it, reposition stat handling macros a bit and add missing parentheses around macro parameters. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 159 ++++++++++++++------------------------------------ 1 file changed, 45 insertions(+), 114 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3d15b42dc352..c90e1b4fbe5a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -145,13 +145,6 @@ struct gendisk { struct timer_rand_state *random; atomic_t sync_io; /* RAID */ - unsigned long stamp; - int in_flight; -#ifdef CONFIG_SMP - struct disk_stats *dkstats; -#else - struct disk_stats dkstats; -#endif struct work_struct async_notify; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity *integrity; @@ -232,46 +225,18 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, * internal use only. */ #ifdef CONFIG_SMP -#define disk_stat_lock() ({ rcu_read_lock(); get_cpu(); }) -#define disk_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) - -#define disk_stat_add(cpu, gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, cpu)->field += addnd) - -#define disk_stat_read(gendiskp, field) \ -({ \ - typeof(gendiskp->dkstats->field) res = 0; \ - int i; \ - for_each_possible_cpu(i) \ - res += per_cpu_ptr(gendiskp->dkstats, i)->field; \ - res; \ -}) - -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) -{ - int i; - - for_each_possible_cpu(i) - memset(per_cpu_ptr(gendiskp->dkstats, i), value, - sizeof(struct disk_stats)); -} +#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) -#define part_stat_add(cpu, part, field, addnd) \ - (per_cpu_ptr(part->dkstats, cpu)->field += addnd) - -#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part_stat_add(cpu, part, field, addnd); \ - disk_stat_add(cpu, gendiskp, field, addnd); \ -}) +#define __part_stat_add(cpu, part, field, addnd) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd)) #define part_stat_read(part, field) \ ({ \ - typeof(part->dkstats->field) res = 0; \ + typeof((part)->dkstats->field) res = 0; \ int i; \ for_each_possible_cpu(i) \ - res += per_cpu_ptr(part->dkstats, i)->field; \ + res += per_cpu_ptr((part)->dkstats, i)->field; \ res; \ }) @@ -284,109 +249,73 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) sizeof(struct disk_stats)); } -#else /* !CONFIG_SMP */ -#define disk_stat_lock() ({ rcu_read_lock(); 0; }) -#define disk_stat_unlock() rcu_read_unlock() - -#define disk_stat_add(cpu, gendiskp, field, addnd) \ - (gendiskp->dkstats.field += addnd) -#define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) - -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +static inline int init_part_stats(struct hd_struct *part) { - memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); + part->dkstats = alloc_percpu(struct disk_stats); + if (!part->dkstats) + return 0; + return 1; } -#define part_stat_add(cpu, part, field, addnd) \ - (part->dkstats.field += addnd) - -#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part_stat_add(cpu, part, field, addnd); \ - disk_stat_add(cpu, gendiskp, field, addnd); \ -}) - -#define part_stat_read(part, field) (part->dkstats.field) - -static inline void part_stat_set_all(struct hd_struct *part, int value) +static inline void free_part_stats(struct hd_struct *part) { - memset(&part->dkstats, value, sizeof(struct disk_stats)); + free_percpu(part->dkstats); } -#endif /* CONFIG_SMP */ - -#define disk_stat_dec(cpu, gendiskp, field) \ - disk_stat_add(cpu, gendiskp, field, -1) -#define disk_stat_inc(cpu, gendiskp, field) \ - disk_stat_add(cpu, gendiskp, field, 1) -#define disk_stat_sub(cpu, gendiskp, field, subnd) \ - disk_stat_add(cpu, gendiskp, field, -subnd) - -#define part_stat_dec(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, -1) -#define part_stat_inc(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, 1) -#define part_stat_sub(cpu, gendiskp, field, subnd) \ - part_stat_add(cpu, gendiskp, field, -subnd) +#else /* !CONFIG_SMP */ +#define part_stat_lock() ({ rcu_read_lock(); 0; }) +#define part_stat_unlock() rcu_read_unlock() -#define all_stat_dec(cpu, gendiskp, field, sector) \ - all_stat_add(cpu, gendiskp, field, -1, sector) -#define all_stat_inc(cpu, gendiskp, part, field, sector) \ - all_stat_add(cpu, gendiskp, part, field, 1, sector) -#define all_stat_sub(cpu, gendiskp, part, field, subnd, sector) \ - all_stat_add(cpu, gendiskp, part, field, -subnd, sector) +#define __part_stat_add(cpu, part, field, addnd) \ + ((part)->dkstats.field += addnd) -/* Inlines to alloc and free disk stats in struct gendisk */ -#ifdef CONFIG_SMP -static inline int init_disk_stats(struct gendisk *disk) -{ - disk->dkstats = alloc_percpu(struct disk_stats); - if (!disk->dkstats) - return 0; - return 1; -} +#define part_stat_read(part, field) ((part)->dkstats.field) -static inline void free_disk_stats(struct gendisk *disk) +static inline void part_stat_set_all(struct hd_struct *part, int value) { - free_percpu(disk->dkstats); + memset(&part->dkstats, value, sizeof(struct disk_stats)); } static inline int init_part_stats(struct hd_struct *part) { - part->dkstats = alloc_percpu(struct disk_stats); - if (!part->dkstats) - return 0; return 1; } static inline void free_part_stats(struct hd_struct *part) { - free_percpu(part->dkstats); } -#else /* CONFIG_SMP */ -static inline int init_disk_stats(struct gendisk *disk) -{ - return 1; -} +#endif /* CONFIG_SMP */ -static inline void free_disk_stats(struct gendisk *disk) -{ -} +#define part_stat_add(cpu, part, field, addnd) do { \ + __part_stat_add((cpu), (part), field, addnd); \ + if ((part)->partno) \ + __part_stat_add((cpu), &part_to_disk((part))->part0, \ + field, addnd); \ +} while (0) -static inline int init_part_stats(struct hd_struct *part) +#define part_stat_dec(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, -1) +#define part_stat_inc(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, 1) +#define part_stat_sub(cpu, gendiskp, field, subnd) \ + part_stat_add(cpu, gendiskp, field, -subnd) + +static inline void part_inc_in_flight(struct hd_struct *part) { - return 1; + part->in_flight++; + if (part->partno) + part_to_disk(part)->part0.in_flight++; } -static inline void free_part_stats(struct hd_struct *part) +static inline void part_dec_in_flight(struct hd_struct *part) { + part->in_flight--; + if (part->partno) + part_to_disk(part)->part0.in_flight--; } -#endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -extern void disk_round_stats(int cpu, struct gendisk *disk); extern void part_round_stats(int cpu, struct hd_struct *part); /* drivers/block/genhd.c */ @@ -595,6 +524,8 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); -- cgit v1.2.2 From 540eed5637b766bb1e881ef744c42617760b4815 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:15 +0900 Subject: block: make partition array dynamic disk->__part used to be statically allocated to the maximum possible number of partitions. This patch makes partition array allocation dynamic. The added overhead is minimal as only real change is one memory dereference changed to RCU one. This saves both a bit of memory and cpu cycles iterating through unoccupied slots and makes increasing partition limit easier. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c90e1b4fbe5a..ecf649c3deed 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -113,6 +113,21 @@ struct hd_struct { #define GENHD_FL_UP 16 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 +#define BLK_SCSI_MAX_CMDS (256) +#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) + +struct blk_scsi_cmd_filter { + unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; + unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; + struct kobject kobj; +}; + +struct disk_part_tbl { + struct rcu_head rcu_head; + int len; + struct hd_struct *part[]; +}; + struct gendisk { /* major, first_minor, minors and ext_minors are input * parameters only, don't use directly. Use disk_devt() and @@ -131,7 +146,7 @@ struct gendisk { * non-critical accesses use RCU. Always access through * helpers. */ - struct hd_struct **__part; + struct disk_part_tbl *part_tbl; struct hd_struct part0; struct block_device_operations *fops; @@ -149,6 +164,7 @@ struct gendisk { #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity *integrity; #endif + int node_id; }; static inline struct gendisk *part_to_disk(struct hd_struct *part) @@ -503,6 +519,7 @@ extern void blk_free_devt(dev_t devt); extern dev_t blk_lookup_devt(const char *name, int partno); extern char *disk_name (struct gendisk *hd, int partno, char *buf); +extern int disk_expand_part_tbl(struct gendisk *disk, int target); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); extern void delete_partition(struct gendisk *, int); -- cgit v1.2.2 From 689d6fac40b41c7bf154f362deaf442548e4dc81 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:16 +0900 Subject: block: replace @ext_minors with GENHD_FL_EXT_DEVT With previous changes, it's meaningless to limit the number of partitions. Replace @ext_minors with GENHD_FL_EXT_DEVT such that setting the flag allows the disk to have maximum number of allowed partitions (only limited by the number of entries in parsed_partitions as determined by MAX_PART constant). This kills not-too-pretty alloc_disk_ext[_node]() functions and makes @minors parameter to alloc_disk[_node]() unnecessary. The parameter is left alone to avoid disturbing the users. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ecf649c3deed..04524c213de1 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -58,6 +58,8 @@ enum { UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ }; +#define DISK_MAX_PARTS 256 + #include #include #include @@ -112,6 +114,7 @@ struct hd_struct { #define GENHD_FL_CD 8 #define GENHD_FL_UP 16 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 +#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ #define BLK_SCSI_MAX_CMDS (256) #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) @@ -129,15 +132,13 @@ struct disk_part_tbl { }; struct gendisk { - /* major, first_minor, minors and ext_minors are input - * parameters only, don't use directly. Use disk_devt() and - * disk_max_parts(). + /* major, first_minor and minors are input parameters only, + * don't use directly. Use disk_devt() and disk_max_parts(). */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - int ext_minors; /* number of extended dynamic minors */ char disk_name[32]; /* name of major driver */ @@ -180,7 +181,9 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) static inline int disk_max_parts(struct gendisk *disk) { - return disk->minors + disk->ext_minors; + if (disk->flags & GENHD_FL_EXT_DEVT) + return DISK_MAX_PARTS; + return disk->minors; } static inline bool disk_partitionable(struct gendisk *disk) @@ -527,9 +530,6 @@ extern void printk_all_partitions(void); extern struct gendisk *alloc_disk_node(int minors, int node_id); extern struct gendisk *alloc_disk(int minors); -extern struct gendisk *alloc_disk_ext_node(int minors, int ext_minrs, - int node_id); -extern struct gendisk *alloc_disk_ext(int minors, int ext_minors); extern struct kobject *get_disk(struct gendisk *disk); extern void put_disk(struct gendisk *disk); extern void blk_register_region(dev_t devt, unsigned long range, -- cgit v1.2.2 From 3e1a7ff8a0a7b948f2684930166954f9e8e776fe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:17 +0900 Subject: block: allow disk to have extended device number Now that disk and partition handlings are mostly unified, it's easy to allow disk to have extended device number. This patch makes add_disk() use extended device number if disk->minors is zero. Both sd and ide-disk are updated to use this. * sd_format_disk_name() is implemented which can generically determine the drive name. This removes disk number restriction stemming from limited device names. * If sd index goes over SD_MAX_DISKS (which can be increased now BTW), sd simply doesn't initialize minors letting block layer choose extended device number. * If CONFIG_DEBUG_EXT_DEVT is set, both sd and ide-disk always set minors to 0 and use extended device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 04524c213de1..206cdf96c3a7 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -59,6 +59,7 @@ enum { }; #define DISK_MAX_PARTS 256 +#define DISK_NAME_LEN 32 #include #include @@ -140,7 +141,7 @@ struct gendisk { int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - char disk_name[32]; /* name of major driver */ + char disk_name[DISK_NAME_LEN]; /* name of major driver */ /* Array of pointers to partitions indexed by partno. * Protected with matching bdev lock but stat and other -- cgit v1.2.2 From 18887ad910e56066233a07fd3cfb2fa11338b782 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 28 Jul 2008 13:08:45 +0200 Subject: block: make kblockd_schedule_work() take the queue as parameter Preparatory patch for checking queuing affinity. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1adb03827bd3..10aa46c8f170 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -912,7 +912,7 @@ static inline void put_dev_sector(Sector p) } struct work_struct; -int kblockd_schedule_work(struct work_struct *work); +int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); void kblockd_flush_work(struct work_struct *work); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ -- cgit v1.2.2 From c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 13 Sep 2008 20:26:01 +0200 Subject: block: add support for IO CPU affinity This patch adds support for controlling the IO completion CPU of either all requests on a queue, or on a per-request basis. We export a sysfs variable (rq_affinity) which, if set, migrates completions of requests to the CPU that originally submitted it. A bio helper (bio_set_completion_cpu()) is also added, so that queuers can ask for completion on that specific CPU. In testing, this has been show to cut the system time by as much as 20-40% on synthetic workloads where CPU affinity is desired. This requires a little help from the architecture, so it'll only work as designed for archs that are using the new generic smp helper infrastructure. Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 +++++++++++ include/linux/blkdev.h | 5 ++++- include/linux/elevator.h | 8 ++++---- 3 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 2c0c09034fd2..13aba20edb2d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -81,6 +81,8 @@ struct bio { unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ + unsigned int bi_comp_cpu; /* completion CPU */ + struct bio_vec *bi_io_vec; /* the actual vec list */ bio_end_io_t *bi_end_io; @@ -105,6 +107,7 @@ struct bio { #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ #define BIO_EOPNOTSUPP 7 /* not supported */ +#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* @@ -342,6 +345,14 @@ void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); extern unsigned int bvec_nr_vecs(unsigned short idx); +/* + * Allow queuer to specify a completion CPU for this bio + */ +static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu) +{ + bio->bi_comp_cpu = cpu; +} + /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 10aa46c8f170..93204bf7b297 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -139,7 +140,8 @@ enum rq_flag_bits { */ struct request { struct list_head queuelist; - struct list_head donelist; + struct call_single_data csd; + int cpu; struct request_queue *q; @@ -420,6 +422,7 @@ struct request_queue #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ static inline int queue_is_locked(struct request_queue *q) { diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 639624b55fbe..bb791c311a56 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -173,15 +173,15 @@ enum { #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) /* - * Hack to reuse the donelist list_head as the fifo time holder while + * Hack to reuse the csd.list list_head as the fifo time holder while * the request is in the io scheduler. Saves an unsigned long in rq. */ -#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next) -#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp)) +#define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next) +#define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp)) #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define rq_fifo_clear(rq) do { \ list_del_init(&(rq)->queuelist); \ - INIT_LIST_HEAD(&(rq)->donelist); \ + INIT_LIST_HEAD(&(rq)->csd.list); \ } while (0) /* -- cgit v1.2.2 From ab780f1ece0dc8d5e8e8e85435acc5e4747ccda3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 26 Aug 2008 10:25:02 +0200 Subject: block: inherit CPU completion on bio->rq and rq->rq merges Somewhat incomplete, as we do allow merges of requests and bios that have different completion CPUs given. This is done on the assumption that a larger IO is still more beneficial than CPU locality. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 93204bf7b297..12df8efeef19 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -545,6 +545,7 @@ enum { #define blk_pm_request(rq) \ (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) +#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) -- cgit v1.2.2 From a3bce90edd8f6cafe3f63b1a943800792e830178 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:05 +0900 Subject: block: add gfp_mask argument to blk_rq_map_user and blk_rq_map_user_iov Currently, blk_rq_map_user and blk_rq_map_user_iov always do GFP_KERNEL allocation. This adds gfp_mask argument to blk_rq_map_user and blk_rq_map_user_iov so sg can use it (sg always does GFP_ATOMIC allocation). Signed-off-by: FUJITA Tomonori Signed-off-by: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 +++++---- include/linux/blkdev.h | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 13aba20edb2d..200b185c3e83 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -325,11 +325,11 @@ extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, - unsigned long, unsigned int, int); + unsigned long, unsigned int, int, gfp_t); struct sg_iovec; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, - struct sg_iovec *, int, int); + struct sg_iovec *, int, int, gfp_t); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); @@ -337,9 +337,10 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); +extern struct bio *bio_copy_user(struct request_queue *, unsigned long, + unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, - int, int); + int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12df8efeef19..00e388d0e221 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -710,11 +710,12 @@ extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); -extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); +extern int blk_rq_map_user(struct request_queue *, struct request *, + void __user *, unsigned long, gfp_t); extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct sg_iovec *, int, unsigned int); + struct sg_iovec *, int, unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.2 From 152e283fdfea0cd11e297d982378b55937842dde Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:06 +0900 Subject: block: introduce struct rq_map_data to use reserved pages This patch introduces struct rq_map_data to enable bio_copy_use_iov() use reserved pages. Currently, bio_copy_user_iov allocates bounce pages but drivers/scsi/sg.c wants to allocate pages by itself and use them. struct rq_map_data can be used to pass allocated pages to bio_copy_user_iov. The current users of bio_copy_user_iov simply passes NULL (they don't want to use pre-allocated pages). Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Cc: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- include/linux/bio.h | 8 +++++--- include/linux/blkdev.h | 12 ++++++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 200b185c3e83..bc386cd5e996 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -327,6 +327,7 @@ extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int, gfp_t); struct sg_iovec; +struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, struct sg_iovec *, int, int, gfp_t); @@ -337,9 +338,10 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -extern struct bio *bio_copy_user(struct request_queue *, unsigned long, - unsigned int, int, gfp_t); -extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, +extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, + unsigned long, unsigned int, int, gfp_t); +extern struct bio *bio_copy_user_iov(struct request_queue *, + struct rq_map_data *, struct sg_iovec *, int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 00e388d0e221..358ac423ed2f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -642,6 +642,12 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) } #endif /* CONFIG_MMU */ +struct rq_map_data { + struct page **pages; + int page_order; + int nr_entries; +}; + struct req_iterator { int i; struct bio *bio; @@ -711,11 +717,13 @@ extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, - void __user *, unsigned long, gfp_t); + struct rq_map_data *, void __user *, unsigned long, + gfp_t); extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct sg_iovec *, int, unsigned int, gfp_t); + struct rq_map_data *, struct sg_iovec *, int, + unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.2 From 879040742cf09f2360a9ac41846288707e4e567c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 15:05:58 +0900 Subject: block: add blk_rq_aligned helper function This adds blk_rq_aligned helper function to see if alignment and padding requirement is satisfied for DMA transfer. This also converts blk_rq_map_kern and __blk_rq_map_user to use the helper function. Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 358ac423ed2f..9c2549260427 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -899,6 +899,13 @@ static inline int queue_dma_alignment(struct request_queue *q) return q ? q->dma_alignment : 511; } +static inline int blk_rq_aligned(struct request_queue *q, void *addr, + unsigned int len) +{ + unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; + return !((unsigned long)addr & alignment) && !(len & alignment); +} + /* assumes size > 256 */ static inline unsigned int blksize_bits(unsigned int size) { -- cgit v1.2.2 From 818827669d85b84241696ffef2de485db46b0b5e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 16:20:19 +0900 Subject: block: make blk_rq_map_user take a NULL user-space buffer This patch changes blk_rq_map_user to accept a NULL user-space buffer with a READ command if rq_map_data is not NULL. Thus a caller can pass page frames to lk_rq_map_user to just set up a request and bios with page frames propely. bio_uncopy_user (called via blk_rq_unmap_user) doesn't copy data to user space with such request. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index bc386cd5e996..7af373f253dc 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -108,6 +108,7 @@ struct bio { #define BIO_USER_MAPPED 6 /* contains user pages */ #define BIO_EOPNOTSUPP 7 /* not supported */ #define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ +#define BIO_NULL_MAPPED 9 /* contains invalid user pages */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* -- cgit v1.2.2 From 0c002c2f74e10baa9021d3ecc50585c6eafea568 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:20 -0600 Subject: Wrapper for lower-level revalidate_disk routines. This is a wrapper for the lower-level revalidate_disk call-backs such as sd_revalidate_disk(). It allows us to perform pre and post operations when calling them. We will use this wrapper in a later patch to adjust block device sizes after an online resize (a _post_ operation). Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 02a9fb5a830c..d63461f97983 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1722,6 +1722,7 @@ extern int fs_may_remount_ro(struct super_block *); */ #define bio_data_dir(bio) ((bio)->bi_rw & 1) +extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *); extern int invalidate_partition(struct gendisk *, int); -- cgit v1.2.2 From c3279d1454cdfed02a557d789d8a6d08ab4cbe70 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:25 -0600 Subject: Adjust block device size after an online resize of a disk. The revalidate_disk routine now checks if a disk has been resized by comparing the gendisk capacity to the bdev inode size. If they are different (usually because the disk has been resized underneath the kernel) the bdev inode size is adjusted to match the capacity. Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d63461f97983..32477e8872d5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1722,6 +1722,8 @@ extern int fs_may_remount_ro(struct super_block *); */ #define bio_data_dir(bio) ((bio)->bi_rw & 1) +extern void check_disk_size_change(struct gendisk *disk, + struct block_device *bdev); extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *); -- cgit v1.2.2 From 242f9dcb8ba6f68fcd217a119a7648a4f69290e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 14 Sep 2008 05:55:09 -0700 Subject: block: unify request timeout handling Right now SCSI and others do their own command timeout handling. Move those bits to the block layer. Instead of having a timer per command, we try to be a bit more clever and simply have one per-queue. This avoids the overhead of having to tear down and setup a timer for each command, so it will result in a lot less timer fiddling. Signed-off-by: Mike Anderson Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9c2549260427..067f28b80072 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -147,6 +147,7 @@ struct request { unsigned int cmd_flags; enum rq_cmd_type_bits cmd_type; + unsigned long atomic_flags; /* Maintain bio traversal state for part by part I/O submission. * hard_* are block layer internals, no driver should touch them! @@ -214,6 +215,8 @@ struct request { void *data; void *sense; + unsigned long deadline; + struct list_head timeout_list; unsigned int timeout; int retries; @@ -266,6 +269,14 @@ typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); +enum blk_eh_timer_return { + BLK_EH_NOT_HANDLED, + BLK_EH_HANDLED, + BLK_EH_RESET_TIMER, +}; + +typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); + enum blk_queue_state { Queue_down, Queue_up, @@ -311,6 +322,7 @@ struct request_queue merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; + rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; /* @@ -386,6 +398,10 @@ struct request_queue unsigned int nr_sorted; unsigned int in_flight; + unsigned int rq_timeout; + struct timer_list timeout; + struct list_head timeout_list; + /* * sg stuff */ @@ -770,6 +786,8 @@ extern int blk_end_request_callback(struct request *rq, int error, unsigned int nr_bytes, int (drv_callback)(struct request *)); extern void blk_complete_request(struct request *); +extern void __blk_complete_request(struct request *); +extern void blk_abort_request(struct request *); /* * blk_end_request() takes bytes instead of sectors as a complete size. @@ -811,6 +829,8 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); +extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); +extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); extern int blk_do_ordered(struct request_queue *, struct request **); -- cgit v1.2.2 From 11914a53d2ec2974a565311af327b8983d8c820d Mon Sep 17 00:00:00 2001 From: Mike Anderson Date: Sat, 13 Sep 2008 20:31:27 +0200 Subject: block: Add interface to abort queued requests Signed-off-by: Mike Anderson Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + include/linux/blktrace_api.h | 2 ++ include/linux/elevator.h | 1 + 3 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 067f28b80072..37781d6fe045 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -788,6 +788,7 @@ extern int blk_end_request_callback(struct request *rq, int error, extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); +extern void blk_abort_queue(struct request_queue *); /* * blk_end_request() takes bytes instead of sectors as a complete size. diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 27da2cc682ee..dcaf2452ed1f 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -48,6 +48,7 @@ enum blktrace_act { __BLK_TA_SPLIT, /* bio was split */ __BLK_TA_BOUNCE, /* bio was bounced */ __BLK_TA_REMAP, /* bio was remapped */ + __BLK_TA_ABORT, /* request aborted */ }; /* @@ -78,6 +79,7 @@ enum blktrace_notify { #define BLK_TA_SPLIT (__BLK_TA_SPLIT) #define BLK_TA_BOUNCE (__BLK_TA_BOUNCE) #define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE)) #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index bb791c311a56..92f6f634e3e6 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -112,6 +112,7 @@ extern struct request *elv_latter_request(struct request_queue *, struct request extern int elv_register_queue(struct request_queue *q); extern void elv_unregister_queue(struct request_queue *q); extern int elv_may_queue(struct request_queue *, int); +extern void elv_abort_queue(struct request_queue *); extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *, struct request *, gfp_t); extern void elv_put_request(struct request_queue *, struct request *); -- cgit v1.2.2 From 3e6053d76dcbd92b2f9f4ad5ece9bce83149523e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 11 Sep 2008 10:57:55 +0200 Subject: block: adjust blkdev_issue_discard for swap Two mods to blkdev_issue_discard(), thinking ahead to its use on swap: 1. Add gfp_mask argument, so swap allocation can use it where GFP_KERNEL might deadlock but GFP_NOIO is safe. 2. Enlarge nr_sects argument from unsigned to sector_t: unsigned long is enough to cover a whole swap area, but sector_t suits any partition. Change sb_issue_discard()'s nr_blocks to sector_t too; but no need seen for a gfp_mask there, just pass GFP_KERNEL down to blkdev_issue_discard(). Signed-off-by: Hugh Dickins Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 37781d6fe045..b47767c72ce3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -873,15 +874,15 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } extern int blkdev_issue_flush(struct block_device *, sector_t *); -extern int blkdev_issue_discard(struct block_device *, sector_t sector, - unsigned nr_sects); +extern int blkdev_issue_discard(struct block_device *, + sector_t sector, sector_t nr_sects, gfp_t); static inline int sb_issue_discard(struct super_block *sb, - sector_t block, unsigned nr_blocks) + sector_t block, sector_t nr_blocks) { block <<= (sb->s_blocksize_bits - 9); nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks); + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL); } /* -- cgit v1.2.2 From 0a0d96b03a1f3bfd6bc3ea08008699e8e59fccd9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 11 Sep 2008 13:17:37 +0200 Subject: block: add bio_kmalloc() Not all callers need (or want!) the mempool backing guarentee, it essentially means that you can only use bio_alloc() for short allocations and not for preallocating some bio's at setup or init time. So add bio_kmalloc() which does the same thing as bio_alloc(), except it just uses kmalloc() as the backing instead of the bio mempools. Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7af373f253dc..6520ee1a3f6d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -308,6 +308,7 @@ extern struct bio_set *bioset_create(int, int); extern void bioset_free(struct bio_set *); extern struct bio *bio_alloc(gfp_t, int); +extern struct bio *bio_kmalloc(gfp_t, int); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); extern void bio_free(struct bio *, struct bio_set *); -- cgit v1.2.2 From 581d4e28d9195aa8b2231383dbabc288988d615e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 14 Sep 2008 05:56:33 -0700 Subject: block: add fault injection mechanism for faking request timeouts Only works for the generic request timer handling. Allows one to sporadically ignore request completions, thus exercising the timeout handling. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b47767c72ce3..e34999d14c16 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -440,6 +440,7 @@ struct request_queue #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ +#define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ static inline int queue_is_locked(struct request_queue *q) { -- cgit v1.2.2 From 9c02f2b02e29a2244e36c6e1f246080d8afc6cff Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 18 Sep 2008 09:31:53 -0700 Subject: block: cleanup some of the integrity stuff in blkdev.h Don't put functions that are only used in fs/bio-integrity.c in blkdev.h, it's much cleaner to just keep it in there. Also kill completely unused bdev_get_tag_size() Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e34999d14c16..e23b838825bd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1004,47 +1004,6 @@ extern int blk_integrity_compare(struct block_device *, struct block_device *); extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request *); -static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) -{ - if (bi) - return bi->tuple_size; - - return 0; -} - -static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) -{ - return bdev->bd_disk->integrity; -} - -static inline unsigned int bdev_get_tag_size(struct block_device *bdev) -{ - struct blk_integrity *bi = bdev_get_integrity(bdev); - - if (bi) - return bi->tag_size; - - return 0; -} - -static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) -{ - struct blk_integrity *bi = bdev_get_integrity(bdev); - - if (bi == NULL) - return 0; - - if (rw == READ && bi->verify_fn != NULL && - (bi->flags & INTEGRITY_FLAG_READ)) - return 1; - - if (rw == WRITE && bi->generate_fn != NULL && - (bi->flags & INTEGRITY_FLAG_WRITE)) - return 1; - - return 0; -} - static inline int blk_integrity_rq(struct request *rq) { if (rq->bio == NULL) @@ -1058,8 +1017,6 @@ static inline int blk_integrity_rq(struct request *rq) #define blk_integrity_rq(rq) (0) #define blk_rq_count_integrity_sg(a) (0) #define blk_rq_map_integrity_sg(a, b) (0) -#define bdev_get_integrity(a) (0) -#define bdev_get_tag_size(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); -- cgit v1.2.2 From 32fab448e5e86694beade415e750363538ea5f49 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Thu, 18 Sep 2008 10:45:09 -0400 Subject: block: add request update interface This patch adds blk_update_request(), which updates struct request with completing its data part, but doesn't complete the struct request itself. Though it looks like end_that_request_first() of older kernels, blk_update_request() should be used only by request stacking drivers. Request-based dm will use it in bio->bi_end_io callback to update the original request when a data part of a cloned request completes. Followings are additional background information of why request-based dm needs this interface. - Request stacking drivers can't use blk_end_request() directly from the lower driver's completion context (bio->bi_end_io or rq->end_io), because some device drivers (e.g. ide) may try to complete their request with queue lock held, and it may cause deadlock. See below for detailed description of possible deadlock: - To solve that, request-based dm offloads the completion of cloned struct request to softirq context (i.e. using blk_complete_request() from rq->end_io). - Though it is possible to use the same solution from bio->bi_end_io, it will delay the notification of bio completion to the original submitter. Also, it will cause inefficient partial completion, because the lower driver can't perform the cloned request anymore and request-based dm needs to requeue and redispatch it to the lower driver again later. That's not good. - So request-based dm needs blk_update_request() to perform the bio completion in the lower driver's completion context, which is more efficient. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e23b838825bd..e82a84c9f37a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -791,6 +791,8 @@ extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); extern void blk_abort_queue(struct request_queue *); +extern void blk_update_request(struct request *rq, int error, + unsigned int nr_bytes); /* * blk_end_request() takes bytes instead of sectors as a complete size. -- cgit v1.2.2 From 82124d60354846623a4b94af335717a5e142a074 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Thu, 18 Sep 2008 10:45:38 -0400 Subject: block: add request submission interface This patch adds blk_insert_cloned_request(), a generic request submission interface for request stacking drivers. Request-based dm will use it to submit their clones to underlying devices. blk_rq_check_limits() is also added because it is possible that the lower queue has stronger limitations than the upper queue if multiple drivers are stacking at request-level. Not only for blk_insert_cloned_request()'s internal use, the function will be used by request-based dm when the queue limitation is modified (e.g. by replacing dm's table). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e82a84c9f37a..964c246bc271 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -693,6 +693,9 @@ extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); +extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); +extern int blk_insert_cloned_request(struct request_queue *q, + struct request *rq); extern void blk_plug_device(struct request_queue *); extern void blk_plug_device_unlocked(struct request_queue *); extern int blk_remove_plug(struct request_queue *); -- cgit v1.2.2 From 4ee5eaf4516a60f8ef64d3c246c64c6be0cf8c3a Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Thu, 18 Sep 2008 10:46:13 -0400 Subject: block: add a queue flag for request stacking support This patch adds a queue flag to indicate the block device can be used for request stacking. Request stacking drivers need to stack their devices on top of only devices of which q->request_fn is functional. Since bio stacking drivers (e.g. md, loop) basically initialize their queue using blk_alloc_queue() and don't set q->request_fn, the check of (q->request_fn == NULL) looks enough for that purpose. However, dm will become both types of stacking driver (bio-based and request-based). And dm will always set q->request_fn even if the dm device is bio-based of which q->request_fn is not functional actually. So we need something else to distinguish the type of the device. Adding a queue flag is a solution for that. The reason why dm always sets q->request_fn is to keep the compatibility of dm user-space tools. Currently, all dm user-space tools are using bio-based dm without specifying the type of the dm device they use. To use request-based dm without changing such tools, the kernel must decide the type of the dm device automatically. The automatic type decision can't be done at the device creation time and needs to be deferred until such tools load a mapping table, since the actual type is decided by dm target type included in the mapping table. So a dm device has to be initialized using blk_init_queue() so that we can load either type of table. Then, all queue stuffs are set (e.g. q->request_fn) and we have no element to distinguish that it is bio-based or request-based, even after a table is loaded and the type of the device is decided. By the way, some stuffs of the queue (e.g. request_list, elevator) are needless when the dm device is used as bio-based. But the memory size is not so large (about 20[KB] per queue on ia64), so I hope the memory loss can be acceptable for bio-based dm users. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 964c246bc271..86f77ef127f4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -441,6 +441,7 @@ struct request_queue #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ +#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ static inline int queue_is_locked(struct request_queue *q) { @@ -547,6 +548,8 @@ enum { #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) +#define blk_queue_stackable(q) \ + test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) -- cgit v1.2.2 From 9e49184c82e9ec3ab4d45f9ea5a17ccaf43869f0 Mon Sep 17 00:00:00 2001 From: Keith Wansbrough Date: Mon, 22 Sep 2008 14:57:17 -0700 Subject: floppy: support arbitrary first-sector numbers The current floppy_struct allows floppies to number sectors starting from 0 or 1. This patch allows arbitrary first-sector numbers - for example, 0xC1 for Amstrad CPC disks. This extends the existing 1-bit field (FD_ZEROBASED, bit 2 of stretch) to 8 bits (FD_SECTMASK, bits 2 to 9). Currently 0x00 denotes a first sector number of 1, and 0x01 denotes a first sector number of 0. We extend this by interpreting FD_SECTMASK as the first sector number with the LSB flipped. Signed-off-by: Keith Wansbrough Cc: Alain Knaff Cc: Michael Kerrisk Cc: Karel Zak Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/fd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fd.h b/include/linux/fd.h index b6bd41d2b460..f5d194af07a8 100644 --- a/include/linux/fd.h +++ b/include/linux/fd.h @@ -15,10 +15,16 @@ struct floppy_struct { sect, /* sectors per track */ head, /* nr of heads */ track, /* nr of tracks */ - stretch; /* !=0 means double track steps */ + stretch; /* bit 0 !=0 means double track steps */ + /* bit 1 != 0 means swap sides */ + /* bits 2..9 give the first sector */ + /* number (the LSB is flipped) */ #define FD_STRETCH 1 #define FD_SWAPSIDES 2 #define FD_ZEROBASED 4 +#define FD_SECTBASEMASK 0x3FC +#define FD_MKSECTBASE(s) (((s) ^ 1) << 2) +#define FD_SECTBASE(floppy) ((((floppy)->stretch & FD_SECTBASEMASK) >> 2) ^ 1) unsigned char gap, /* gap1 size */ -- cgit v1.2.2 From a68bbddba486020c9c74825ce90c4c1ec463e0e8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Sep 2008 13:03:33 +0200 Subject: block: add queue flag for SSD/non-rotational devices We don't want to idle in AS/CFQ if the device doesn't have a seek penalty. So add a QUEUE_FLAG_NONROT to indicate a non-rotational device, low level drivers should set this flag upon discovery of an SSD or similar device type. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 86f77ef127f4..0cf3e619fb21 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -442,6 +442,7 @@ struct request_queue #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ +#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ static inline int queue_is_locked(struct request_queue *q) { @@ -547,6 +548,7 @@ enum { #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) +#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) -- cgit v1.2.2 From 8bff7c6b0f63c7ee9c5e3a076338d74125b8debb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Sep 2008 13:05:10 +0200 Subject: libata: set queue SSD flag for SSD devices SSD devices should give an RPM setting of 1 in word 217 of the ID page. If we see such a device, tell the block layer about it. Signed-off-by: Jens Axboe --- include/linux/ata.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 8a12d718c169..c1c8b4a4ba26 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -88,6 +88,7 @@ enum { ATA_ID_DLF = 128, ATA_ID_CSFO = 129, ATA_ID_CFA_POWER = 160, + ATA_ID_ROT_SPEED = 217, ATA_ID_PIO4 = (1 << 1), ATA_ID_SERNO_LEN = 20, @@ -691,6 +692,11 @@ static inline int ata_id_is_cfa(const u16 *id) return 0; } +static inline int ata_id_is_ssd(const u16 *id) +{ + return id[ATA_ID_ROT_SPEED] == 0x01; +} + static inline int ata_drive_40wire(const u16 *dev_id) { if (ata_id_is_sata(dev_id)) -- cgit v1.2.2 From c0ddffa84a7d12da9943a94d04dadbfb1883b904 Mon Sep 17 00:00:00 2001 From: Sven Schuetz Date: Fri, 26 Sep 2008 10:58:02 +0200 Subject: include blktrace_api.h in headers_install This header file is of interest for user space programming, i.e. for tools that process blktrace data. We would like to use it for a tool on-top of blktrace which processes data provided by blktrace. For this purpose, it would be helpful if the blktrace API would make it to /usr/include/linux. The git tree for the blktrace tools comes with its own copy of this header file. I didn't manage to replace that copy with the file generated by the patch below yet. A few more cleanups would be needed. For example, the blktrace ioctl numbers, which are currently defined in usr/include/fs.h, might need to be moved. Should be feasible, though. Signed-off-by: Sven Schuetz Signed-off-by: Martin Peschke Signed-off-by: Jens Axboe --- include/linux/Kbuild | 1 + include/linux/blktrace_api.h | 58 ++++++++++++++++++++++++-------------------- 2 files changed, 33 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index b68ec09399be..31474e89c59a 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -180,6 +180,7 @@ unifdef-y += audit.h unifdef-y += auto_fs.h unifdef-y += auxvec.h unifdef-y += binfmts.h +unifdef-y += blktrace_api.h unifdef-y += capability.h unifdef-y += capi.h unifdef-y += cciss_ioctl.h diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index dcaf2452ed1f..a2a7d0ca2758 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -1,8 +1,10 @@ #ifndef BLKTRACE_H #define BLKTRACE_H +#ifdef __KERNEL__ #include #include +#endif /* * Trace categories @@ -92,17 +94,17 @@ enum blktrace_notify { * The trace itself */ struct blk_io_trace { - u32 magic; /* MAGIC << 8 | version */ - u32 sequence; /* event number */ - u64 time; /* in microseconds */ - u64 sector; /* disk offset */ - u32 bytes; /* transfer length */ - u32 action; /* what happened */ - u32 pid; /* who did it */ - u32 device; /* device number */ - u32 cpu; /* on what cpu did it happen */ - u16 error; /* completion error */ - u16 pdu_len; /* length of data after this trace */ + __u32 magic; /* MAGIC << 8 | version */ + __u32 sequence; /* event number */ + __u64 time; /* in microseconds */ + __u64 sector; /* disk offset */ + __u32 bytes; /* transfer length */ + __u32 action; /* what happened */ + __u32 pid; /* who did it */ + __u32 device; /* device number */ + __u32 cpu; /* on what cpu did it happen */ + __u16 error; /* completion error */ + __u16 pdu_len; /* length of data after this trace */ }; /* @@ -120,6 +122,25 @@ enum { Blktrace_stopped, }; +/* + * User setup structure passed with BLKTRACESTART + */ +struct blk_user_trace_setup { +#ifdef __KERNEL__ + char name[BDEVNAME_SIZE]; /* output */ +#else + char name[32]; /* output */ +#endif + __u16 act_mask; /* input */ + __u32 buf_size; /* input */ + __u32 buf_nr; /* input */ + __u64 start_lba; + __u64 end_lba; + __u32 pid; +}; + +#ifdef __KERNEL__ +#if defined(CONFIG_BLK_DEV_IO_TRACE) struct blk_trace { int trace_state; struct rchan *rchan; @@ -136,21 +157,6 @@ struct blk_trace { atomic_t dropped; }; -/* - * User setup structure passed with BLKTRACESTART - */ -struct blk_user_trace_setup { - char name[BDEVNAME_SIZE]; /* output */ - u16 act_mask; /* input */ - u32 buf_size; /* input */ - u32 buf_nr; /* input */ - u64 start_lba; - u64 end_lba; - u32 pid; -}; - -#ifdef __KERNEL__ -#if defined(CONFIG_BLK_DEV_IO_TRACE) extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); -- cgit v1.2.2 From ef9e3facdf1fe1228721a7c295a76d1b7a0e57ec Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 16:12:15 +0200 Subject: block: add lld busy state exporting interface This patch adds an new interface, blk_lld_busy(), to check lld's busy state from the block layer. blk_lld_busy() calls down into low-level drivers for the checking if the drivers set q->lld_busy_fn() using blk_queue_lld_busy(). This resolves a performance problem on request stacking devices below. Some drivers like scsi mid layer stop dispatching request when they detect busy state on its low-level device like host/target/device. It allows other requests to stay in the I/O scheduler's queue for a chance of merging. Request stacking drivers like request-based dm should follow the same logic. However, there is no generic interface for the stacked device to check if the underlying device(s) are busy. If the request stacking driver dispatches and submits requests to the busy underlying device, the requests will stay in the underlying device's queue without a chance of merging. This causes performance problem on burst I/O load. With this patch, busy state of the underlying device is exported via q->lld_busy_fn(). So the request stacking driver can check it and stop dispatching requests if busy. The underlying device driver must return the busy state appropriately: 1: when the device driver can't process requests immediately. 0: when the device driver can process requests immediately, including abnormal situations where the device driver needs to kill all requests. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0cf3e619fb21..9e0ee1a8254e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -269,6 +269,7 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); +typedef int (lld_busy_fn) (struct request_queue *q); enum blk_eh_timer_return { BLK_EH_NOT_HANDLED, @@ -325,6 +326,7 @@ struct request_queue softirq_done_fn *softirq_done_fn; rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; + lld_busy_fn *lld_busy_fn; /* * Dispatch queue sorting @@ -699,6 +701,7 @@ extern struct request *blk_get_request(struct request_queue *, int, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); +extern int blk_lld_busy(struct request_queue *q); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern void blk_plug_device(struct request_queue *); @@ -835,6 +838,7 @@ extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); +extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); -- cgit v1.2.2 From 0497b345e7d067109e0dd9bf9f4978a6847ee13b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 1 Oct 2008 16:16:25 +0200 Subject: blktrace: use BLKTRACE_BDEV_SIZE as the name size for setup structure Define as 32, which is is what BDEVNAME_SIZE is/was as well. This keeps the user interface the same and gets rid of the difference between kernel and user api here. Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index a2a7d0ca2758..3a31eb506164 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -122,15 +122,13 @@ enum { Blktrace_stopped, }; +#define BLKTRACE_BDEV_SIZE 32 + /* * User setup structure passed with BLKTRACESTART */ struct blk_user_trace_setup { -#ifdef __KERNEL__ - char name[BDEVNAME_SIZE]; /* output */ -#else - char name[32]; /* output */ -#endif + char name[BLKTRACE_BDEV_SIZE]; /* output */ __u16 act_mask; /* input */ __u32 buf_size; /* input */ __u32 buf_nr; /* input */ -- cgit v1.2.2 From d00e29fd99dd63d1c51917604e35dee824ed567f Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 10:14:46 -0400 Subject: block: remove end_{queued|dequeued}_request() This patch removes end_queued_request() and end_dequeued_request(), which are no longer used. As a results, users of __end_request() became only end_request(). So the actual code in __end_request() is moved to end_request() and __end_request() is removed. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9e0ee1a8254e..bfc18e497c7f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -793,8 +793,6 @@ extern int __blk_end_request(struct request *rq, int error, extern int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes); extern void end_request(struct request *, int); -extern void end_queued_request(struct request *, int); -extern void end_dequeued_request(struct request *, int); extern int blk_end_request_callback(struct request *rq, int error, unsigned int nr_bytes, int (drv_callback)(struct request *)); -- cgit v1.2.2 From 8deaf7210728c453295dc1cb2a5b66c68183ac85 Mon Sep 17 00:00:00 2001 From: Alberto Bertogli Date: Thu, 2 Oct 2008 12:46:53 +0200 Subject: bio.h: Remove unused conditional code The whole bio_integrity() definition is inside an #ifdef CONFIG_BLK_DEV_INTEGRITY, there's no need for the conditional code. Signed-off-by: Alberto Bertogli Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 6520ee1a3f6d..98c2d0570657 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -457,14 +457,7 @@ static inline int bio_has_data(struct bio *bio) #define bip_for_each_vec(bvl, bip, i) \ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) -static inline int bio_integrity(struct bio *bio) -{ -#if defined(CONFIG_BLK_DEV_INTEGRITY) - return bio->bi_integrity != NULL; -#else - return 0; -#endif -} +#define bio_integrity(bio) (bio->bi_integrity != NULL) extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); -- cgit v1.2.2 From b04accc425d52ca59699290661e0dfd09b0feeeb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 2 Oct 2008 12:53:22 +0200 Subject: block: revert part of d7533ad0e132f92e75c1b2eb7c26387b25a583c1 We need bdev_get_integrity() to support the pending md/dm patches. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bfc18e497c7f..bc693f5c3886 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1016,6 +1016,12 @@ extern int blk_integrity_compare(struct block_device *, struct block_device *); extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request *); +static inline +struct blk_integrity *bdev_get_integrity(struct block_device *bdev) +{ + return bdev->bd_disk->integrity; +} + static inline int blk_integrity_rq(struct request *rq) { if (rq->bio == NULL) @@ -1029,6 +1035,7 @@ static inline int blk_integrity_rq(struct request *rq) #define blk_integrity_rq(rq) (0) #define blk_rq_count_integrity_sg(a) (0) #define blk_rq_map_integrity_sg(a, b) (0) +#define bdev_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); -- cgit v1.2.2 From 74aa8c2cc010035a7eef2b4ca4d6430e0dae206a Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Oct 2008 03:38:37 -0400 Subject: block: Introduce integrity data ownership flag A filesystem might supply its own integrity metadata. Introduce a flag that indicates whether the filesystem or the block layer owns the integrity buffer. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 98c2d0570657..d86d39d490e6 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -109,6 +109,7 @@ struct bio { #define BIO_EOPNOTSUPP 7 /* not supported */ #define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ #define BIO_NULL_MAPPED 9 /* contains invalid user pages */ +#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* -- cgit v1.2.2 From ad7fce93147d32ae53d25d9ea1a8ba31a239deee Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Oct 2008 03:38:39 -0400 Subject: block: Switch blk_integrity_compare from bdev to gendisk The DM and MD integrity support now depends on being able to use gendisks instead of block_devices when comparing integrity profiles. Change function parameters accordingly. Also update comparison logic so that two NULL profiles are a valid configuration. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc693f5c3886..00d340b0f758 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1012,7 +1012,7 @@ struct blk_integrity { extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); -extern int blk_integrity_compare(struct block_device *, struct block_device *); +extern int blk_integrity_compare(struct gendisk *, struct gendisk *); extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request *); -- cgit v1.2.2 From b02739b01c5309d74a59859f2ce92c931d1f1955 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 2 Oct 2008 18:47:49 +0200 Subject: block: gendisk integrity wrapper This is a wrapper for accessing a gendisk's integrity bits. It allows the integrity support in MD to be compiled with BLK_DEV_INTEGRITY off. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 00d340b0f758..a92d9e4ea96e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1022,6 +1022,11 @@ struct blk_integrity *bdev_get_integrity(struct block_device *bdev) return bdev->bd_disk->integrity; } +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +{ + return disk->integrity; +} + static inline int blk_integrity_rq(struct request *rq) { if (rq->bio == NULL) @@ -1036,6 +1041,7 @@ static inline int blk_integrity_rq(struct request *rq) #define blk_rq_count_integrity_sg(a) (0) #define blk_rq_map_integrity_sg(a, b) (0) #define bdev_get_integrity(a) (0) +#define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); -- cgit v1.2.2 From ad3316bf4eeb53c89164f759767f911072b56203 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Oct 2008 22:42:53 -0400 Subject: block: Find bio sector offset given idx and offset Helper function to find the sector offset in a bio given bvec index and page offset. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index d86d39d490e6..fe12d0f9ebaa 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -327,6 +327,7 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); extern int bio_get_nr_vecs(struct block_device *); +extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int, gfp_t); struct sg_iovec; -- cgit v1.2.2 From 6feef531f55cf4a20fd9eb39f5352e5745203603 Mon Sep 17 00:00:00 2001 From: Denis ChengRq Date: Thu, 9 Oct 2008 08:57:05 +0200 Subject: block: mark bio_split_pool static Since all bio_split calls refer the same single bio_split_pool, the bio_split function can use bio_split_pool directly instead of the mempool_t parameter; then the mempool_t parameter can be removed from bio_split param list, and bio_split_pool is only referred in fs/bio.c file, can be marked static. Signed-off-by: Denis ChengRq Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index fe12d0f9ebaa..fb97221d7c30 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -300,9 +300,7 @@ struct bio_pair { atomic_t cnt; int error; }; -extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, - int first_sectors); -extern mempool_t *bio_split_pool; +extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(int, int); -- cgit v1.2.2 From af5639424008ffe96f89b059bea1aec15e0115a9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 9 Oct 2008 09:01:10 +0200 Subject: block: add some comments around the bio read-write flags Signed-off-by: Jens Axboe --- include/linux/bio.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index fb97221d7c30..ff5b4cf9e2da 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -123,13 +123,23 @@ struct bio { /* * bio bi_rw flags * - * bit 0 -- read (not set) or write (set) + * bit 0 -- data direction + * If not set, bio is a read from device. If set, it's a write to device. * bit 1 -- rw-ahead when set * bit 2 -- barrier + * Insert a serialization point in the IO queue, forcing previously + * submitted IO to be completed before this oen is issued. * bit 3 -- fail fast, don't want low level driver retries * bit 4 -- synchronous I/O hint: the block layer will unplug immediately + * Note that this does NOT indicate that the IO itself is sync, just + * that the block layer will not postpone issue of this IO by plugging. * bit 5 -- metadata request + * Used for tracing to differentiate metadata and data IO. May also + * get some preferential treatment in the IO scheduler * bit 6 -- discard sectors + * Informs the lower level device that this range of sectors is no longer + * used by the file system and may thus be freed by the device. Used + * for flash based storage. */ #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ -- cgit v1.2.2 From a5d8c3483a6e19aca95ef6a2c5890e33bfa5b293 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Oct 2008 11:35:51 +0200 Subject: sched debug: add name to sched_domain sysctl entries add /proc/sys/kernel/sched_domain/cpu0/domain0/name, to make it easier to see which specific scheduler domain remained at that entry. Since we process the scheduler domain tree and simplify it, it's not always immediately clear during debugging which domain came from where. depends on CONFIG_SCHED_DEBUG=y. Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d8e699b55858..5d0819ee442a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -824,6 +824,9 @@ struct sched_domain { unsigned int ttwu_move_affine; unsigned int ttwu_move_balance; #endif +#ifdef CONFIG_SCHED_DEBUG + char *name; +#endif }; extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, -- cgit v1.2.2 From bf0b90e357c883e8efd72954432efe652de74c76 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:07 -0700 Subject: [CPUFREQ][1/6] cpufreq: Add cpu number parameter to __cpufreq_driver_getavg() Add a cpu parameter to __cpufreq_driver_getavg(). This is needed for software cpufreq coordination where policy->cpu may not be same as the CPU on which we want to getavg frequency. A follow-on patch will use this parameter to getavg freq from all cpus in policy->cpus. Change since last patch. Fix the offline/online and suspend/resume oops reported by Youquan Song Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- include/linux/cpufreq.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 6fd5668aa572..1ee608fd7b77 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -187,7 +187,8 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation); -extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy); +extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy, + unsigned int cpu); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); @@ -226,7 +227,9 @@ struct cpufreq_driver { unsigned int (*get) (unsigned int cpu); /* optional */ - unsigned int (*getavg) (unsigned int cpu); + unsigned int (*getavg) (struct cpufreq_policy *policy, + unsigned int cpu); + int (*exit) (struct cpufreq_policy *policy); int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy); -- cgit v1.2.2 From 8083e4ad970e4eb567e31037060cdd4ba346f0c0 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:11 -0700 Subject: [CPUFREQ][5/6] cpufreq: Changes to get_cpu_idle_time_us(), used by ondemand governor export get_cpu_idle_time_us() for it to be used in ondemand governor. Last update time can be current time when the CPU is currently non-idle, accounting for the busy time since last idle. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- include/linux/tick.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 8cf8cfe2cc97..98921a3e1aa8 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -126,7 +126,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void) return len; } static inline void tick_nohz_stop_idle(int cpu) { } -static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; } +static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } # endif /* !NO_HZ */ #endif -- cgit v1.2.2 From c19e654ddbe3831252f61e76a74d661e1a755530 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 9 Oct 2008 11:59:55 -0700 Subject: gre: Add netlink interface This patch adds a netlink interface that will eventually displace the existing ioctl interface. It utilises the elegant rtnl_link_ops mechanism. This also means that user-space no longer needs to rely on the tunnel interface being of type GRE to identify GRE tunnels. The identification can now occur using rtnl_link_ops. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if_tunnel.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index d4efe4014705..aeab2cb32a9c 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -2,6 +2,7 @@ #define _IF_TUNNEL_H_ #include +#include #define SIOCGETTUNNEL (SIOCDEVPRIVATE + 0) #define SIOCADDTUNNEL (SIOCDEVPRIVATE + 1) @@ -47,4 +48,22 @@ struct ip_tunnel_prl { /* PRL flags */ #define PRL_DEFAULT 0x0001 +enum +{ + IFLA_GRE_UNSPEC, + IFLA_GRE_LINK, + IFLA_GRE_IFLAGS, + IFLA_GRE_OFLAGS, + IFLA_GRE_IKEY, + IFLA_GRE_OKEY, + IFLA_GRE_LOCAL, + IFLA_GRE_REMOTE, + IFLA_GRE_TTL, + IFLA_GRE_TOS, + IFLA_GRE_PMTUDISC, + __IFLA_GRE_MAX, +}; + +#define IFLA_GRE_MAX (__IFLA_GRE_MAX - 1) + #endif /* _IF_TUNNEL_H_ */ -- cgit v1.2.2 From e1a8000228e16212c93b23cfbed4d622e2ec7a6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 9 Oct 2008 12:00:17 -0700 Subject: gre: Add Transparent Ethernet Bridging This patch adds support for Ethernet over GRE encapsulation. This is exposed to user-space with a new link type of "gretap" instead of "gre". It will create an ARPHRD_ETHER device in lieu of the usual ARPHRD_IPGRE. Note that to preserver backwards compatibility all Transparent Ethernet Bridging packets are passed to an ARPHRD_IPGRE tunnel if its key matches and there is no ARPHRD_ETHER device whose key matches more closely. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index a0099e98b5c4..bf1a53b2682e 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -56,6 +56,7 @@ #define ETH_P_DIAG 0x6005 /* DEC Diagnostics */ #define ETH_P_CUST 0x6006 /* DEC Customer use */ #define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */ +#define ETH_P_TEB 0x6558 /* Trans Ether Bridging */ #define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */ #define ETH_P_ATALK 0x809B /* Appletalk DDP */ #define ETH_P_AARP 0x80F3 /* Appletalk AARP */ -- cgit v1.2.2 From 82b1519b345d61dcfae526e3fcb08128f39f9bcc Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:09 +0100 Subject: dm: export struct dm_dev Split struct dm_dev in two and publish the part that other targets need in include/linux/device-mapper.h. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a90222e3297d..178390de195e 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -13,7 +13,6 @@ struct dm_target; struct dm_table; -struct dm_dev; struct mapped_device; struct bio_vec; @@ -84,6 +83,12 @@ void dm_error(const char *message); */ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev); +struct dm_dev { + struct block_device *bdev; + int mode; + char name[16]; +}; + /* * Constructors should call these functions to ensure destination devices * are opened/closed correctly. -- cgit v1.2.2 From 89343da077ad564ed130c46e5ea6a79388410fa5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:10 +0100 Subject: dm: publish dm_get_mapinfo Publish dm_get_mapinfo in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 178390de195e..6b80961650f0 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -207,6 +207,7 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid); struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct mapped_device *md); int dm_noflush_suspending(struct dm_target *ti); +union map_info *dm_get_mapinfo(struct bio *bio); /* * Geometry functions. -- cgit v1.2.2 From ea0ec640940c2ae3a8d71af3249fccf06a9997a3 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:11 +0100 Subject: dm: publish dm_table_unplug_all Publish dm_table_unplug_all in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 6b80961650f0..e462c7f3b391 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -237,6 +237,11 @@ int dm_table_add_target(struct dm_table *t, const char *type, */ int dm_table_complete(struct dm_table *t); +/* + * Unplug all devices in a table. + */ +void dm_table_unplug_all(struct dm_table *t); + /* * Table reference counting. */ -- cgit v1.2.2 From 54160904260fa764ba6e2dc738770be30fdf9553 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:12 +0100 Subject: dm: publish dm_vcalloc Publish dm_vcalloc in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index e462c7f3b391..08d783592b73 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -267,6 +267,11 @@ void dm_table_event(struct dm_table *t); */ int dm_swap_table(struct mapped_device *md, struct dm_table *t); +/* + * A wrapper around vmalloc. + */ +void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); + /*----------------------------------------------------------------- * Macros. *---------------------------------------------------------------*/ -- cgit v1.2.2