From df46b9a44ceb5af2ea2351ce8e28ae7bd840b00f Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Jun 2005 14:04:44 +0200 Subject: [PATCH] Add blk_rq_map_kern() Add blk_rq_map_kern which takes a kernel buffer and maps it into a request and bio. This can be used by the dm hw_handlers, old sg_scsi_ioctl, and one day scsi special requests so all requests comming into scsi will have bios. All requests having bios should allow scsi to use scatter lists for all IO and allow it to use block layer functions. Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 ++ include/linux/blkdev.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 038022763f09..1dd2bc2e84ae 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -282,6 +282,8 @@ extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int); extern void bio_unmap_user(struct bio *); +extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, + unsigned int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4a99b76c5a33..67339bc5f6bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -560,6 +560,8 @@ extern void blk_run_queue(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int); extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int); +extern struct request *blk_rq_map_kern(request_queue_t *, int, void *, + unsigned int, unsigned int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.2 From dd1cab95f356f1395278633565f198463cf6bd24 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Jun 2005 14:06:01 +0200 Subject: [PATCH] Cleanup blk_rq_map_* interfaces Change the blk_rq_map_user() and blk_rq_map_kern() interface to require a previously allocated request to be passed in. This is both more efficient for multiple iterations of mapping data to the same request, and it is also a much nicer API. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 67339bc5f6bc..fc0dce078616 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -558,10 +558,9 @@ extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(request_queue_t *q); extern void blk_run_queue(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); -extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int); -extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int); -extern struct request *blk_rq_map_kern(request_queue_t *, int, void *, - unsigned int, unsigned int); +extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); +extern int blk_rq_unmap_user(struct bio *, unsigned int); +extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.2 From f1970baf6d74e03bd32072ab453f2fc01bc1b8d3 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 20 Jun 2005 14:06:52 +0200 Subject: [PATCH] Add scatter-gather support for the block layer SG_IO Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++++ include/linux/blkdev.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1dd2bc2e84ae..ebcd03ba2e20 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -281,6 +281,10 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int); +struct sg_iovec; +extern struct bio *bio_map_user_iov(struct request_queue *, + struct block_device *, + struct sg_iovec *, int, int); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, unsigned int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fc0dce078616..0430ea3e5f2e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,6 +561,7 @@ extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); extern int blk_rq_unmap_user(struct bio *, unsigned int); extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); +extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.2 From 994ca9a19616f0d4161a9e825f0835925d522426 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 20 Jun 2005 14:11:09 +0200 Subject: [PATCH] update blk_execute_rq to take an at_head parameter Original From: Mike Christie Modified to split out block changes (this patch) and SCSI pieces. Signed-off-by: Jens Axboe Signed-off-by: James Bottomley --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0430ea3e5f2e..a48dc12c6699 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -562,8 +562,8 @@ extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, u extern int blk_rq_unmap_user(struct bio *, unsigned int); extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); -extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); - +extern int blk_execute_rq(request_queue_t *, struct gendisk *, + struct request *, int); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) { return bdev->bd_disk->queue; -- cgit v1.2.2 From 1f3a6a15771ed70d3b2581663dcc6b9bc134baa5 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Thu, 28 Jul 2005 14:42:00 -0400 Subject: [ACPI] acpi_register_gsi() can return error Current acpi_register_gsi() function has no way to indicate errors to its callers even though acpi_register_gsi() can fail to register gsi because of some reasons (out of memory, lack of interrupt vectors, incorrect BIOS, and so on). As a result, caller of acpi_register_gsi() cannot handle the case that acpi_register_gsi() fails. I think failure of acpi_register_gsi() should be handled properly. This series of patches changes acpi_register_gsi() to return negative value on error, and also changes callers of acpi_register_gsi() to handle failure of acpi_register_gsi(). This patch changes the type of return value of acpi_register_gsi() from "unsigned int" to "int" to indicate an error. If acpi_register_gsi() fails to register gsi, it returns negative value. Signed-off-by: Kenji Kaneshige Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ca0cd240cee0..9378bcde73a4 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -432,7 +432,7 @@ static inline int acpi_boot_table_init(void) #endif /*!CONFIG_ACPI_BOOT*/ -unsigned int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low); +int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); /* -- cgit v1.2.2 From 53de49f52e305e96143375d1741f15acff7bf34b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 30 Jul 2005 04:18:00 -0400 Subject: [ACPI] CONFIG_ACPI=n build fix Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- include/linux/acpi.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9378bcde73a4..bf96ae9d93a3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -420,16 +420,6 @@ extern int sbf_port ; #define acpi_mp_config 0 -static inline int acpi_boot_init(void) -{ - return 0; -} - -static inline int acpi_boot_table_init(void) -{ - return 0; -} - #endif /*!CONFIG_ACPI_BOOT*/ int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low); @@ -536,5 +526,17 @@ static inline int acpi_get_pxm(acpi_handle handle) extern int pnpacpi_disabled; +#else /* CONFIG_ACPI */ + +static inline int acpi_boot_init(void) +{ + return 0; +} + +static inline int acpi_boot_table_init(void) +{ + return 0; +} + #endif /* CONFIG_ACPI */ #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.2 From d0a7e574007fd547d72ec693bfa35778623d0738 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 14 Aug 2005 17:09:01 -0500 Subject: [SCSI] correct transport class abstraction to work outside SCSI I recently tried to construct a totally generic transport class and found there were certain features missing from the current abstract transport class. Most notable is that you have to hang the data on the class_device but most of the API is framed in terms of the generic device, not the class_device. These changes are two fold - Provide the class_device to all of the setup and configure APIs - Provide and extra API to take the device and the attribute class and return the corresponding class_device Signed-off-by: James Bottomley --- include/linux/attribute_container.h | 9 +++------ include/linux/transport_class.h | 11 ++++++++--- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index af1010b6dab7..f54b05b052b3 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -11,10 +11,12 @@ #include #include +#include struct attribute_container { struct list_head node; struct list_head containers; + spinlock_t containers_lock; struct class *class; struct class_device_attribute **attrs; int (*match)(struct attribute_container *, struct device *); @@ -62,12 +64,7 @@ int attribute_container_add_class_device_adapter(struct attribute_container *con struct class_device *classdev); void attribute_container_remove_attrs(struct class_device *classdev); void attribute_container_class_device_del(struct class_device *classdev); - - - - - - +struct class_device *attribute_container_find_class_device(struct attribute_container *, struct device *); struct class_device_attribute **attribute_container_classdev_to_attrs(const struct class_device *classdev); #endif diff --git a/include/linux/transport_class.h b/include/linux/transport_class.h index 87d98d1faefb..1d6cc22e5f42 100644 --- a/include/linux/transport_class.h +++ b/include/linux/transport_class.h @@ -12,11 +12,16 @@ #include #include +struct transport_container; + struct transport_class { struct class class; - int (*setup)(struct device *); - int (*configure)(struct device *); - int (*remove)(struct device *); + int (*setup)(struct transport_container *, struct device *, + struct class_device *); + int (*configure)(struct transport_container *, struct device *, + struct class_device *); + int (*remove)(struct transport_container *, struct device *, + struct class_device *); }; #define DECLARE_TRANSPORT_CLASS(cls, nm, su, rm, cfg) \ -- cgit v1.2.2 From 888ba6c62bc61a995d283977eb3a6cbafd6f4ac6 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 24 Aug 2005 12:07:20 -0400 Subject: [ACPI] delete CONFIG_ACPI_BOOT it has been a synonym for CONFIG_ACPI since 2.6.12 Signed-off-by: Len Brown --- include/linux/acpi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fd48db320f5b..fa1ad1a60a09 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -41,7 +41,7 @@ #include -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI enum acpi_irq_model_id { ACPI_IRQ_MODEL_PIC = 0, @@ -429,11 +429,11 @@ extern int pci_mmcfg_config_num; extern int sbf_port ; -#else /*!CONFIG_ACPI_BOOT*/ +#else /* !CONFIG_ACPI */ #define acpi_mp_config 0 -#endif /*!CONFIG_ACPI_BOOT*/ +#endif /* !CONFIG_ACPI */ int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); -- cgit v1.2.2 From 8466361ad5233d4356a4601e16b66c25277920d1 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 24 Aug 2005 12:09:07 -0400 Subject: [ACPI] delete CONFIG_ACPI_INTERPRETER it is a synonym for CONFIG_ACPI Signed-off-by: Len Brown --- include/linux/acpi.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fa1ad1a60a09..6882b32aa40f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -488,20 +488,9 @@ extern int ec_write(u8 addr, u8 val); #endif /*CONFIG_ACPI_EC*/ -#ifdef CONFIG_ACPI_INTERPRETER - extern int acpi_blacklisted(void); extern void acpi_bios_year(char *s); -#else /*!CONFIG_ACPI_INTERPRETER*/ - -static inline int acpi_blacklisted(void) -{ - return 0; -} - -#endif /*!CONFIG_ACPI_INTERPRETER*/ - #define ACPI_CSTATE_LIMIT_DEFINED /* for driver builds */ #ifdef CONFIG_ACPI -- cgit v1.2.2 From 6153df7b2f4d27c8bde054db1b947369a6f64d83 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 25 Aug 2005 12:27:09 -0400 Subject: [ACPI] delete CONFIG_ACPI_PCI Delete the ability to build an ACPI kernel that does not include PCI support. When such a machine is created and it requires a tuned kernel, send a patch. http://bugzilla.kernel.org/show_bug.cgi?id=1364 Signed-off-by: Len Brown --- include/linux/acpi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6882b32aa40f..026c3c011dc0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -445,7 +445,7 @@ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); */ void acpi_unregister_gsi (u32 gsi); -#ifdef CONFIG_ACPI_PCI +#ifdef CONFIG_ACPI struct acpi_prt_entry { struct list_head node; @@ -479,7 +479,7 @@ struct acpi_pci_driver { int acpi_pci_register_driver(struct acpi_pci_driver *driver); void acpi_pci_unregister_driver(struct acpi_pci_driver *driver); -#endif /*CONFIG_ACPI_PCI*/ +#endif /* CONFIG_ACPI */ #ifdef CONFIG_ACPI_EC -- cgit v1.2.2 From caca1779870b1bcc0fb07e48ebd2403901f356b8 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 16 Aug 2005 17:26:10 -0500 Subject: [SCSI] add missing attribute container function prototype attribute_container_classdev_to_container is an exported function of the attribute_container.c file. However, there's no prototype for it. Now I actually want to use it, so add one. Signed-off-by: James Bottomley --- include/linux/attribute_container.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index f54b05b052b3..ee83fe64a102 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -64,6 +64,7 @@ int attribute_container_add_class_device_adapter(struct attribute_container *con struct class_device *classdev); void attribute_container_remove_attrs(struct class_device *classdev); void attribute_container_class_device_del(struct class_device *classdev); +struct attribute_container *attribute_container_classdev_to_container(struct class_device *); struct class_device *attribute_container_find_class_device(struct attribute_container *, struct device *); struct class_device_attribute **attribute_container_classdev_to_attrs(const struct class_device *classdev); -- cgit v1.2.2 From 53c165e0a6c8a4ff7df316557528fa7a52d20711 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 22 Aug 2005 10:06:19 -0500 Subject: [SCSI] correct attribute_container list usage One of the changes in the attribute_container code in the scsi-misc tree was to add a lock to protect the list of devices per container. This, unfortunately, leads to potential scheduling while atomic problems if there's a sleep in the function called by a trigger. The correct solution is to use the kernel klist infrastructure instead which allows lockless traversal of a list. Signed-off-by: James Bottomley --- include/linux/attribute_container.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index ee83fe64a102..93bfb0beb62a 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -11,12 +11,12 @@ #include #include +#include #include struct attribute_container { struct list_head node; - struct list_head containers; - spinlock_t containers_lock; + struct klist containers; struct class *class; struct class_device_attribute **attrs; int (*match)(struct attribute_container *, struct device *); -- cgit v1.2.2 From 61a7afa2c476a3be261cf88a95b0dea0c3bd29d4 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 16 Aug 2005 18:27:34 -0500 Subject: [SCSI] embryonic RAID class The idea behind a RAID class is to provide a uniform interface to all RAID subsystems (both hardware and software) in the kernel. To do that, I've made this class a transport class that's entirely subsystem independent (although the matching routines have to match per subsystem, as you'll see looking at the code). I put it in the scsi subdirectory purely because I needed somewhere to play with it, but it's not a scsi specific module. I used a fusion raid card as the test bed for this; with that kind of card, this is the type of class output you get: jejb@titanic> ls -l /sys/class/raid_devices/20\:0\:0\:0/ total 0 lrwxrwxrwx 1 root root 0 Aug 16 17:21 component-0 -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:1:0/20:1:0:0/ lrwxrwxrwx 1 root root 0 Aug 16 17:21 component-1 -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:1:1/20:1:1:0/ lrwxrwxrwx 1 root root 0 Aug 16 17:21 device -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:0:0/20:0:0:0/ -r--r--r-- 1 root root 16384 Aug 16 17:21 level -r--r--r-- 1 root root 16384 Aug 16 17:21 resync -r--r--r-- 1 root root 16384 Aug 16 17:21 state So it's really simple: for a SCSI device representing a hardware raid, it shows the raid level, the array state, the resync % complete (if the state is resyncing) and the underlying components of the RAID (these are exposed in fusion on the virtual channel 1). As you can see, this type of information can be exported by almost anything, including software raid. The more difficult trick, of course, is going to be getting it to perform configuration type actions with writable attributes. Signed-off-by: James Bottomley --- include/linux/raid_class.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 include/linux/raid_class.h (limited to 'include/linux') diff --git a/include/linux/raid_class.h b/include/linux/raid_class.h new file mode 100644 index 000000000000..a71123c28272 --- /dev/null +++ b/include/linux/raid_class.h @@ -0,0 +1,59 @@ +/* + */ +#include + +struct raid_template { + struct transport_container raid_attrs; +}; + +struct raid_function_template { + void *cookie; + int (*is_raid)(struct device *); + void (*get_resync)(struct device *); + void (*get_state)(struct device *); +}; + +enum raid_state { + RAID_ACTIVE = 1, + RAID_DEGRADED, + RAID_RESYNCING, + RAID_OFFLINE, +}; + +struct raid_data { + struct list_head component_list; + int component_count; + int level; + enum raid_state state; + int resync; +}; + +#define DEFINE_RAID_ATTRIBUTE(type, attr) \ +static inline void \ +raid_set_##attr(struct raid_template *r, struct device *dev, type value) { \ + struct class_device *cdev = \ + attribute_container_find_class_device(&r->raid_attrs.ac, dev);\ + struct raid_data *rd; \ + BUG_ON(!cdev); \ + rd = class_get_devdata(cdev); \ + rd->attr = value; \ +} \ +static inline type \ +raid_get_##attr(struct raid_template *r, struct device *dev) { \ + struct class_device *cdev = \ + attribute_container_find_class_device(&r->raid_attrs.ac, dev);\ + struct raid_data *rd; \ + BUG_ON(!cdev); \ + rd = class_get_devdata(cdev); \ + return rd->attr; \ +} + +DEFINE_RAID_ATTRIBUTE(int, level) +DEFINE_RAID_ATTRIBUTE(int, resync) +DEFINE_RAID_ATTRIBUTE(enum raid_state, state) + +struct raid_template *raid_class_attach(struct raid_function_template *); +void raid_class_release(struct raid_template *); + +void raid_component_add(struct raid_template *, struct device *, + struct device *); -- cgit v1.2.2 From 93c37f292110a37dd77e4cc0aaf1c341d79bf6aa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 6 Sep 2005 13:57:08 -0700 Subject: [SERIAL]: Avoid 'statement with no effect' warnings. When SUPPORT_SYSRQ is false, gcc can emit warnings for the uart_handle_sysrq_char() that results. Using an empty inline returning zero kills the warning. Signed-off-by: David S. Miller --- include/linux/serial_core.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index cf0f64ea2bc0..9b12fe731612 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -385,11 +385,11 @@ int uart_resume_port(struct uart_driver *reg, struct uart_port *port); /* * The following are helper functions for the low level drivers. */ -#ifdef SUPPORT_SYSRQ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch, struct pt_regs *regs) { +#ifdef SUPPORT_SYSRQ if (port->sysrq) { if (ch && time_before(jiffies, port->sysrq)) { handle_sysrq(ch, regs, NULL); @@ -398,11 +398,9 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch, } port->sysrq = 0; } +#endif return 0; } -#else -#define uart_handle_sysrq_char(port,ch,regs) (0) -#endif /* * We do the SysRQ and SAK checking like this... -- cgit v1.2.2 From 2248bcfcd8fb622ec88b8587d0c1f139635ffd2e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 6 Sep 2005 15:06:42 -0700 Subject: [NETFILTER]: Add support for permanent expectations A permanent expectation exists until timeing out and can expect multiple related connections. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 088742befe49..7e033e9271a8 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -263,6 +263,9 @@ struct ip_conntrack_expect /* Unique ID */ unsigned int id; + /* Flags */ + unsigned int flags; + #ifdef CONFIG_IP_NF_NAT_NEEDED /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ @@ -272,6 +275,8 @@ struct ip_conntrack_expect #endif }; +#define IP_CT_EXPECT_PERMANENT 0x1 + static inline struct ip_conntrack * tuplehash_to_ctrack(const struct ip_conntrack_tuple_hash *hash) { -- cgit v1.2.2 From 03486a4f838c55481317fca5ac2e7d12550a4fb7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 6 Sep 2005 15:09:43 -0700 Subject: [NETFILTER]: Handle NAT module load race When the NAT module is loaded when connections are already confirmed it must not change their tuples anymore. This is especially important with CONFIG_NETFILTER_DEBUG, the netfilter listhelp functions will refuse to remove an entry from a list when it can not be found on the list, so when a changed tuple hashes to a new bucket the entry is kept in the list until and after the conntrack is freed. Allocate the exact conntrack tuple for NAT for already confirmed connections or drop them if that fails. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat_rule.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_nat_rule.h b/include/linux/netfilter_ipv4/ip_nat_rule.h index fecd2a06dcd8..73b9552e6a89 100644 --- a/include/linux/netfilter_ipv4/ip_nat_rule.h +++ b/include/linux/netfilter_ipv4/ip_nat_rule.h @@ -19,5 +19,10 @@ extern unsigned int alloc_null_binding(struct ip_conntrack *conntrack, struct ip_nat_info *info, unsigned int hooknum); + +extern unsigned int +alloc_null_binding_confirmed(struct ip_conntrack *conntrack, + struct ip_nat_info *info, + unsigned int hooknum); #endif #endif /* _IP_NAT_RULE_H */ -- cgit v1.2.2 From 49719eb355d32fa07793017b4b46b1c02e88b275 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 6 Sep 2005 15:10:46 -0700 Subject: [NETFILTER]: kill __ip_ct_expect_unlink_destroy The following patch kills __ip_ct_expect_unlink_destroy and export unlink_expect as ip_ct_unlink_expect. As it was discussed [1], the function __ip_ct_expect_unlink_destroy is a bit confusing so better do the following sequence: ip_ct_destroy_expect and ip_conntrack_expect_put. [1] https://lists.netfilter.org/pipermail/netfilter-devel/2005-August/020794.html Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index dc4d2a0575de..907d4f5ca5dc 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -52,7 +52,7 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb) return ret; } -extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); +extern void ip_ct_unlink_expect(struct ip_conntrack_expect *exp); extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; -- cgit v1.2.2 From f2c383988d68c91a7d474b7cf26c0a2df49bbafe Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 6 Sep 2005 15:48:03 -0700 Subject: [NET]: skb_get/set_timestamp use const The new timestamp get/set routines should have const attribute on parameters (helps to indicate direction). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 42edce6abe23..da7da9c0ed1b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1251,7 +1251,7 @@ extern void skb_add_mtu(int mtu); * This function converts the offset back to a struct timeval and stores * it in stamp. */ -static inline void skb_get_timestamp(struct sk_buff *skb, struct timeval *stamp) +static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp) { stamp->tv_sec = skb->tstamp.off_sec; stamp->tv_usec = skb->tstamp.off_usec; @@ -1270,7 +1270,7 @@ static inline void skb_get_timestamp(struct sk_buff *skb, struct timeval *stamp) * This function converts a struct timeval to an offset and stores * it in the skb. */ -static inline void skb_set_timestamp(struct sk_buff *skb, struct timeval *stamp) +static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp) { skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec; skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec; -- cgit v1.2.2 From aaec0fab5f8809fe1509fdc204e769bb35ebe41a Mon Sep 17 00:00:00 2001 From: Jens Osterkamp Date: Mon, 5 Sep 2005 15:19:29 -0700 Subject: [PATCH] net: add driver for the NIC on Cell Blades This patch adds a driver for a new 1000 Mbit ethernet NIC. It is integrated on the south bridge that is used for our Cell Blades. The code gets the MAC address from the Open Firmware device tree, so it won't compile on platforms other than ppc64. This is the first public release, so I don't expect the first version to get merged, but I'd aim for integration within the 2.6.13 time frame. Cc: Utz Bacher Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 95c941f8c747..ee0ab7a5f91b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1612,6 +1612,7 @@ #define PCI_DEVICE_ID_TOSHIBA_TC35815CF 0x0030 #define PCI_DEVICE_ID_TOSHIBA_TX4927 0x0180 #define PCI_DEVICE_ID_TOSHIBA_TC86C001_MISC 0x0108 +#define PCI_DEVICE_ID_TOSHIBA_SPIDER_NET 0x01b3 #define PCI_VENDOR_ID_RICOH 0x1180 #define PCI_DEVICE_ID_RICOH_RL5C465 0x0465 -- cgit v1.2.2 From 6582c164f2b3b6e58d1f13c1c031b19ee691eb14 Mon Sep 17 00:00:00 2001 From: Jean Tourrilhes Date: Fri, 2 Sep 2005 11:32:28 -0700 Subject: [PATCH] WE-19 for kernel 2.6.13 Hi Jeff, This is version 19 of the Wireless Extensions. It was supposed to be the fallback of the WPA API changes, but people seem quite happy about it (especially Jouni), so the patch is rather small. The patch has been fully tested with 2.6.13 and various wireless drivers, and is in its final version. Would you mind pushing that into Linus's kernel so that the driver and the apps can take advantage ot it ? It includes : o iwstat improvement (explicit dBm). This is the result of long discussions with Dan Williams, the authors of NetworkManager. Thanks to him for all the fruitful feedback. o remove pointer from event stream. I was not totally sure if this pointer was 32-64 bits clean, so I'd rather remove it and be at peace with it. o remove linux header from wireless.h. This has long been requested by people writting user space apps, now it's done, and it was not even painful. o final deprecation of spy_offset. You did not like it, it's now gone for good. o Start deprecating dev->get_wireless_stats -> debloat netdev o Add "check" version of event macros for ieee802.11 stack. Jiri Benc doesn't like the current macros, we aim to please ;-) All those changes, except the last one, have been bit-roting on my web pages for a while... Patches for most kernel drivers will follow. Patches for the Orinoco and the HostAP drivers have been sent to their respective maintainers. Have fun... Jean Signed-off-by: Jeff Garzik --- include/linux/wireless.h | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wireless.h b/include/linux/wireless.h index ae485f9c916e..dab5afdaf71c 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -1,7 +1,7 @@ /* * This file define a set of standard wireless extensions * - * Version : 18 12.3.05 + * Version : 19 18.3.05 * * Authors : Jean Tourrilhes - HPL - * Copyright (c) 1997-2005 Jean Tourrilhes, All Rights Reserved. @@ -69,11 +69,12 @@ /***************************** INCLUDES *****************************/ -/* To minimise problems in user space, I might remove those headers - * at some point. Jean II */ -#include /* for "caddr_t" et al */ -#include /* for "struct sockaddr" et al */ -#include /* for IFNAMSIZ and co... */ +/* Do not put any header in this file, this creates a mess when + * exported to user space. Most users have included all the + * relevant headers anyway... Jean II */ +/*#include */ /* for "caddr_t" et al */ +/*#include */ /* for "struct sockaddr" et al */ +/*#include */ /* for IFNAMSIZ and co... */ /***************************** VERSION *****************************/ /* @@ -82,7 +83,7 @@ * (there is some stuff that will be added in the future...) * I just plan to increment with each new version. */ -#define WIRELESS_EXT 18 +#define WIRELESS_EXT 19 /* * Changes : @@ -197,6 +198,15 @@ * related parameters (extensible up to 4096 parameter values) * - Add wireless events: IWEVGENIE, IWEVMICHAELMICFAILURE, * IWEVASSOCREQIE, IWEVASSOCRESPIE, IWEVPMKIDCAND + * + * V18 to V19 + * ---------- + * - Remove (struct iw_point *)->pointer from events and streams + * - Remove header includes to help user space + * - Increase IW_ENCODING_TOKEN_MAX from 32 to 64 + * - Add IW_QUAL_ALL_UPDATED and IW_QUAL_ALL_INVALID macros + * - Add explicit flag to tell stats are in dBm : IW_QUAL_DBM + * - Add IW_IOCTL_IDX() and IW_EVENT_IDX() macros */ /**************************** CONSTANTS ****************************/ @@ -322,6 +332,7 @@ /* The first and the last (range) */ #define SIOCIWFIRST 0x8B00 #define SIOCIWLAST SIOCIWLASTPRIV /* 0x8BFF */ +#define IW_IOCTL_IDX(cmd) ((cmd) - SIOCIWFIRST) /* Even : get (world access), odd : set (root access) */ #define IW_IS_SET(cmd) (!((cmd) & 0x1)) @@ -366,6 +377,7 @@ * (struct iw_pmkid_cand) */ #define IWEVFIRST 0x8C00 +#define IW_EVENT_IDX(cmd) ((cmd) - IWEVFIRST) /* ------------------------- PRIVATE INFO ------------------------- */ /* @@ -427,12 +439,15 @@ #define IW_MODE_MONITOR 6 /* Passive monitor (listen only) */ /* Statistics flags (bitmask in updated) */ -#define IW_QUAL_QUAL_UPDATED 0x1 /* Value was updated since last read */ -#define IW_QUAL_LEVEL_UPDATED 0x2 -#define IW_QUAL_NOISE_UPDATED 0x4 +#define IW_QUAL_QUAL_UPDATED 0x01 /* Value was updated since last read */ +#define IW_QUAL_LEVEL_UPDATED 0x02 +#define IW_QUAL_NOISE_UPDATED 0x04 +#define IW_QUAL_ALL_UPDATED 0x07 +#define IW_QUAL_DBM 0x08 /* Level + Noise are dBm */ #define IW_QUAL_QUAL_INVALID 0x10 /* Driver doesn't provide value */ #define IW_QUAL_LEVEL_INVALID 0x20 #define IW_QUAL_NOISE_INVALID 0x40 +#define IW_QUAL_ALL_INVALID 0x70 /* Frequency flags */ #define IW_FREQ_AUTO 0x00 /* Let the driver decides */ @@ -443,7 +458,7 @@ #define IW_MAX_ENCODING_SIZES 8 /* Maximum size of the encoding token in bytes */ -#define IW_ENCODING_TOKEN_MAX 32 /* 256 bits (for now) */ +#define IW_ENCODING_TOKEN_MAX 64 /* 512 bits (for now) */ /* Flags for encoding (along with the token) */ #define IW_ENCODE_INDEX 0x00FF /* Token index (if needed) */ @@ -1039,12 +1054,16 @@ struct iw_event #define IW_EV_CHAR_LEN (IW_EV_LCP_LEN + IFNAMSIZ) #define IW_EV_UINT_LEN (IW_EV_LCP_LEN + sizeof(__u32)) #define IW_EV_FREQ_LEN (IW_EV_LCP_LEN + sizeof(struct iw_freq)) -#define IW_EV_POINT_LEN (IW_EV_LCP_LEN + sizeof(struct iw_point)) #define IW_EV_PARAM_LEN (IW_EV_LCP_LEN + sizeof(struct iw_param)) #define IW_EV_ADDR_LEN (IW_EV_LCP_LEN + sizeof(struct sockaddr)) #define IW_EV_QUAL_LEN (IW_EV_LCP_LEN + sizeof(struct iw_quality)) -/* Note : in the case of iw_point, the extra data will come at the - * end of the event */ +/* iw_point events are special. First, the payload (extra data) come at + * the end of the event, so they are bigger than IW_EV_POINT_LEN. Second, + * we omit the pointer, so start at an offset. */ +#define IW_EV_POINT_OFF (((char *) &(((struct iw_point *) NULL)->length)) - \ + (char *) NULL) +#define IW_EV_POINT_LEN (IW_EV_LCP_LEN + sizeof(struct iw_point) - \ + IW_EV_POINT_OFF) #endif /* _LINUX_WIRELESS_H */ -- cgit v1.2.2 From bbeec90b98a3066f6f2b8d41c80561f5665e4631 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Wed, 7 Sep 2005 00:27:54 -0400 Subject: [wireless] build fixes after merging WE-19 --- include/linux/wireless.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wireless.h b/include/linux/wireless.h index dab5afdaf71c..a555a0f7a7b4 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -69,12 +69,9 @@ /***************************** INCLUDES *****************************/ -/* Do not put any header in this file, this creates a mess when - * exported to user space. Most users have included all the - * relevant headers anyway... Jean II */ -/*#include */ /* for "caddr_t" et al */ -/*#include */ /* for "struct sockaddr" et al */ -/*#include */ /* for IFNAMSIZ and co... */ +#include /* for "caddr_t" et al */ +#include /* for "struct sockaddr" et al */ +#include /* for IFNAMSIZ and co... */ /***************************** VERSION *****************************/ /* -- cgit v1.2.2 From 54d5d42404e7705cf3804593189e963350d470e5 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 6 Sep 2005 15:16:15 -0700 Subject: [PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity When handling writes to /proc/irq, current code is re-programming rte entries directly. This is not recommended and could potentially cause chipset's to lockup, or cause missing interrupts. CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the interrupt is pending. The same needs to be done for /proc/irq handling as well. Otherwise user space irq balancers are really not doing the right thing. - Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for lack of a generic name. - added move_irq out of IRQ_BALANCE, and added this same to X86_64 - Added new proc handler for write, so we can do deferred write at irq handling time. - Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead it now shows only active cpu masks, or exactly what was set. - Provided a common move_irq implementation, instead of duplicating when using generic irq framework. Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off. Tested UP builds as well. MSI testing: tbd: I have cards, need to look for a x-over cable, although I did test an earlier version of this patch. Will test in a couple days. Signed-off-by: Ashok Raj Acked-by: Zwane Mwaikambo Grudgingly-acked-by: Andi Kleen Signed-off-by: Coywolf Qi Hunt Signed-off-by: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/irq.h | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 069d3b84d311..4a362b9ec966 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -71,16 +71,139 @@ typedef struct irq_desc { unsigned int irq_count; /* For detecting broken interrupts */ unsigned int irqs_unhandled; spinlock_t lock; +#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) + unsigned int move_irq; /* Flag need to re-target intr dest*/ +#endif } ____cacheline_aligned irq_desc_t; extern irq_desc_t irq_desc [NR_IRQS]; +/* Return a pointer to the irq descriptor for IRQ. */ +static inline irq_desc_t * +irq_descp (int irq) +{ + return irq_desc + irq; +} + #include /* the arch dependent stuff */ extern int setup_irq(unsigned int irq, struct irqaction * new); #ifdef CONFIG_GENERIC_HARDIRQS extern cpumask_t irq_affinity[NR_IRQS]; + +#ifdef CONFIG_SMP +static inline void set_native_irq_info(int irq, cpumask_t mask) +{ + irq_affinity[irq] = mask; +} +#else +static inline void set_native_irq_info(int irq, cpumask_t mask) +{ +} +#endif + +#ifdef CONFIG_SMP + +#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) +extern cpumask_t pending_irq_cpumask[NR_IRQS]; + +static inline void set_pending_irq(unsigned int irq, cpumask_t mask) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + desc->move_irq = 1; + pending_irq_cpumask[irq] = mask; + spin_unlock_irqrestore(&desc->lock, flags); +} + +static inline void +move_native_irq(int irq) +{ + cpumask_t tmp; + irq_desc_t *desc = irq_descp(irq); + + if (likely (!desc->move_irq)) + return; + + desc->move_irq = 0; + + if (likely(cpus_empty(pending_irq_cpumask[irq]))) + return; + + if (!desc->handler->set_affinity) + return; + + /* note - we hold the desc->lock */ + cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); + + /* + * If there was a valid mask to work with, please + * do the disable, re-program, enable sequence. + * This is *not* particularly important for level triggered + * but in a edge trigger case, we might be setting rte + * when an active trigger is comming in. This could + * cause some ioapics to mal-function. + * Being paranoid i guess! + */ + if (unlikely(!cpus_empty(tmp))) { + desc->handler->disable(irq); + desc->handler->set_affinity(irq,tmp); + desc->handler->enable(irq); + } + cpus_clear(pending_irq_cpumask[irq]); +} + +#ifdef CONFIG_PCI_MSI +/* + * Wonder why these are dummies? + * For e.g the set_ioapic_affinity_vector() calls the set_ioapic_affinity_irq() + * counter part after translating the vector to irq info. We need to perform + * this operation on the real irq, when we dont use vector, i.e when + * pci_use_vector() is false. + */ +static inline void move_irq(int irq) +{ +} + +static inline void set_irq_info(int irq, cpumask_t mask) +{ +} + +#else // CONFIG_PCI_MSI + +static inline void move_irq(int irq) +{ + move_native_irq(irq); +} + +static inline void set_irq_info(int irq, cpumask_t mask) +{ + set_native_irq_info(irq, mask); +} +#endif // CONFIG_PCI_MSI + +#else // CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE + +#define move_irq(x) +#define move_native_irq(x) +#define set_pending_irq(x,y) +static inline void set_irq_info(int irq, cpumask_t mask) +{ + set_native_irq_info(irq, mask); +} + +#endif // CONFIG_GENERIC_PENDING_IRQ + +#else // CONFIG_SMP + +#define move_irq(x) +#define move_native_irq(x) + +#endif // CONFIG_SMP + extern int no_irq_affinity; extern int noirqdebug_setup(char *str); -- cgit v1.2.2 From 4732efbeb997189d9f9b04708dc26bf8613ed721 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 6 Sep 2005 15:16:25 -0700 Subject: [PATCH] FUTEX_WAKE_OP: pthread_cond_signal() speedup ATM pthread_cond_signal is unnecessarily slow, because it wakes one waiter (which at least on UP usually means an immediate context switch to one of the waiter threads). This waiter wakes up and after a few instructions it attempts to acquire the cv internal lock, but that lock is still held by the thread calling pthread_cond_signal. So it goes to sleep and eventually the signalling thread is scheduled in, unlocks the internal lock and wakes the waiter again. Now, before 2003-09-21 NPTL was using FUTEX_REQUEUE in pthread_cond_signal to avoid this performance issue, but it was removed when locks were redesigned to the 3 state scheme (unlocked, locked uncontended, locked contended). Following scenario shows why simply using FUTEX_REQUEUE in pthread_cond_signal together with using lll_mutex_unlock_force in place of lll_mutex_unlock is not enough and probably why it has been disabled at that time: The number is value in cv->__data.__lock. thr1 thr2 thr3 0 pthread_cond_wait 1 lll_mutex_lock (cv->__data.__lock) 0 lll_mutex_unlock (cv->__data.__lock) 0 lll_futex_wait (&cv->__data.__futex, futexval) 0 pthread_cond_signal 1 lll_mutex_lock (cv->__data.__lock) 1 pthread_cond_signal 2 lll_mutex_lock (cv->__data.__lock) 2 lll_futex_wait (&cv->__data.__lock, 2) 2 lll_futex_requeue (&cv->__data.__futex, 0, 1, &cv->__data.__lock) # FUTEX_REQUEUE, not FUTEX_CMP_REQUEUE 2 lll_mutex_unlock_force (cv->__data.__lock) 0 cv->__data.__lock = 0 0 lll_futex_wake (&cv->__data.__lock, 1) 1 lll_mutex_lock (cv->__data.__lock) 0 lll_mutex_unlock (cv->__data.__lock) # Here, lll_mutex_unlock doesn't know there are threads waiting # on the internal cv's lock Now, I believe it is possible to use FUTEX_REQUEUE in pthread_cond_signal, but it will cost us not one, but 2 extra syscalls and, what's worse, one of these extra syscalls will be done for every single waiting loop in pthread_cond_*wait. We would need to use lll_mutex_unlock_force in pthread_cond_signal after requeue and lll_mutex_cond_lock in pthread_cond_*wait after lll_futex_wait. Another alternative is to do the unlocking pthread_cond_signal needs to do (the lock can't be unlocked before lll_futex_wake, as that is racy) in the kernel. I have implemented both variants, futex-requeue-glibc.patch is the first one and futex-wake_op{,-glibc}.patch is the unlocking inside of the kernel. The kernel interface allows userland to specify how exactly an unlocking operation should look like (some atomic arithmetic operation with optional constant argument and comparison of the previous futex value with another constant). It has been implemented just for ppc*, x86_64 and i?86, for other architectures I'm including just a stub header which can be used as a starting point by maintainers to write support for their arches and ATM will just return -ENOSYS for FUTEX_WAKE_OP. The requeue patch has been (lightly) tested just on x86_64, the wake_op patch on ppc64 kernel running 32-bit and 64-bit NPTL and x86_64 kernel running 32-bit and 64-bit NPTL. With the following benchmark on UP x86-64 I get: for i in nptl-orig nptl-requeue nptl-wake_op; do echo time elf/ld.so --library-path .:$i /tmp/bench; \ for j in 1 2; do echo ( time elf/ld.so --library-path .:$i /tmp/bench ) 2>&1; done; done time elf/ld.so --library-path .:nptl-orig /tmp/bench real 0m0.655s user 0m0.253s sys 0m0.403s real 0m0.657s user 0m0.269s sys 0m0.388s time elf/ld.so --library-path .:nptl-requeue /tmp/bench real 0m0.496s user 0m0.225s sys 0m0.271s real 0m0.531s user 0m0.242s sys 0m0.288s time elf/ld.so --library-path .:nptl-wake_op /tmp/bench real 0m0.380s user 0m0.176s sys 0m0.204s real 0m0.382s user 0m0.175s sys 0m0.207s The benchmark is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00001.txt Older futex-requeue-glibc.patch version is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00002.txt Older futex-wake_op-glibc.patch version is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00003.txt Will post a new version (just x86-64 fixes so that the patch applies against pthread_cond_signal.S) to libc-hacker ml soon. Attached is the kernel FUTEX_WAKE_OP patch as well as a simple-minded testcase that will not test the atomicity of the operation, but at least check if the threads that should have been woken up are woken up and whether the arithmetic operation in the kernel gave the expected results. Acked-by: Ingo Molnar Cc: Ulrich Drepper Cc: Jamie Lokier Cc: Rusty Russell Signed-off-by: Yoichi Yuasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/futex.h | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 65d6cfdb6d39..10f96c31971e 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -4,14 +4,40 @@ /* Second argument to futex syscall */ -#define FUTEX_WAIT (0) -#define FUTEX_WAKE (1) -#define FUTEX_FD (2) -#define FUTEX_REQUEUE (3) -#define FUTEX_CMP_REQUEUE (4) +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 +#define FUTEX_FD 2 +#define FUTEX_REQUEUE 3 +#define FUTEX_CMP_REQUEUE 4 +#define FUTEX_WAKE_OP 5 long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, unsigned long uaddr2, int val2, int val3); +#define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */ +#define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */ +#define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */ +#define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */ +#define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */ + +#define FUTEX_OP_OPARG_SHIFT 8 /* Use (1 << OPARG) instead of OPARG. */ + +#define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */ +#define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */ +#define FUTEX_OP_CMP_LT 2 /* if (oldval < CMPARG) wake */ +#define FUTEX_OP_CMP_LE 3 /* if (oldval <= CMPARG) wake */ +#define FUTEX_OP_CMP_GT 4 /* if (oldval > CMPARG) wake */ +#define FUTEX_OP_CMP_GE 5 /* if (oldval >= CMPARG) wake */ + +/* FUTEX_WAKE_OP will perform atomically + int oldval = *(int *)UADDR2; + *(int *)UADDR2 = oldval OP OPARG; + if (oldval CMP CMPARG) + wake UADDR2; */ + +#define FUTEX_OP(op, oparg, cmp, cmparg) \ + (((op & 0xf) << 28) | ((cmp & 0xf) << 24) \ + | ((oparg & 0xfff) << 12) | (cmparg & 0xfff)) + #endif -- cgit v1.2.2 From 8446f1d391f3d27e6bf9c43d4cbcdac0ca720417 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 6 Sep 2005 15:16:27 -0700 Subject: [PATCH] detect soft lockups This patch adds a new kernel debug feature: CONFIG_DETECT_SOFTLOCKUP. When enabled then per-CPU watchdog threads are started, which try to run once per second. If they get delayed for more than 10 seconds then a callback from the timer interrupt detects this condition and prints out a warning message and a stack dump (once per lockup incident). The feature is otherwise non-intrusive, it doesnt try to unlock the box in any way, it only gets the debug info out, automatically, and on all CPUs affected by the lockup. Signed-off-by: Ingo Molnar Signed-off-by: Nishanth Aravamudan Signed-Off-By: Matthias Urlichs Signed-off-by: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index dec5827c7742..5fb31bede103 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -176,6 +176,23 @@ extern void trap_init(void); extern void update_process_times(int user); extern void scheduler_tick(void); +#ifdef CONFIG_DETECT_SOFTLOCKUP +extern void softlockup_tick(struct pt_regs *regs); +extern void spawn_softlockup_task(void); +extern void touch_softlockup_watchdog(void); +#else +static inline void softlockup_tick(struct pt_regs *regs) +{ +} +static inline void spawn_softlockup_task(void) +{ +} +static inline void touch_softlockup_watchdog(void) +{ +} +#endif + + /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) /* Is this address in the __sched functions? */ -- cgit v1.2.2 From e82894f84dbba130ab46c97748c03647f8204f92 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 6 Sep 2005 15:16:30 -0700 Subject: [PATCH] relayfs Here's the latest version of relayfs, against linux-2.6.11-mm2. I'm hoping you'll consider putting this version back into your tree - the previous rounds of comment seem to have shaken out all the API issues and the number of comments on the code itself have also steadily dwindled. This patch is essentially the same as the relayfs redux part 5 patch, with some minor changes based on reviewer comments. Thanks again to Pekka Enberg for those. The patch size without documentation is now a little smaller at just over 40k. Here's a detailed list of the changes: - removed the attribute_flags in relay open and changed it to a boolean specifying either overwrite or no-overwrite mode, and removed everything referencing the attribute flags. - added a check for NULL names in relayfs_create_entry() - got rid of the unnecessary multiple labels in relay_create_buf() - some minor simplification of relay_alloc_buf() which got rid of a couple params - updated the Documentation In addition, this version (through code contained in the relay-apps tarball linked to below, not as part of the relayfs patch) tries to make it as easy as possible to create the cooperating kernel/user pieces of a typical and common type of logging application, one where kernel logging is kicked off when a user space data collection app starts and stops when the collection app exits, with the data being automatically logged to disk in between. To create this type of application, you basically just include a header file (relay-app.h, included in the relay-apps tarball) in your kernel module, define a couple of callbacks and call an initialization function, and on the user side call a single function that sets up and continuously monitors the buffers, and writes data to files as it becomes available. Channels are created when the collection app is started and destroyed when it exits, not when the kernel module is inserted, so different channel buffer sizes can be specified for each separate run via command-line options. See the README in the relay-apps tarball for details. Also included in the relay-apps tarball are a couple examples demonstrating how you can use this to create quick and dirty kernel logging/debugging applications. They are: - tprintk, short for 'tee printk', which temporarily puts a kprobe on printk() and writes a duplicate stream of printk output to a relayfs channel. This could be used anywhere there's printk() debugging code in the kernel which you'd like to exercise, but would rather not have your system logs cluttered with debugging junk. You'd probably want to kill klogd while you do this, otherwise there wouldn't be much point (since putting a kprobe on printk() doesn't change the output of printk()). I've used this method to temporarily divert the packet logging output of the iptables LOG target from the system logs to relayfs files instead, for instance. - klog, which just provides a printk-like formatted logging function on top of relayfs. Again, you can use this to keep stuff out of your system logs if used in place of printk. The example applications can be found here: http://prdownloads.sourceforge.net/dprobes/relay-apps.tar.gz?download From: Christoph Hellwig avoid lookup_hash usage in relayfs Signed-off-by: Tom Zanussi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relayfs_fs.h | 255 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 255 insertions(+) create mode 100644 include/linux/relayfs_fs.h (limited to 'include/linux') diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h new file mode 100644 index 000000000000..cfafc3e76bc2 --- /dev/null +++ b/include/linux/relayfs_fs.h @@ -0,0 +1,255 @@ +/* + * linux/include/linux/relayfs_fs.h + * + * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com) + * + * RelayFS definitions and declarations + */ + +#ifndef _LINUX_RELAYFS_FS_H +#define _LINUX_RELAYFS_FS_H + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Tracks changes to rchan_buf struct + */ +#define RELAYFS_CHANNEL_VERSION 5 + +/* + * Per-cpu relay channel buffer + */ +struct rchan_buf +{ + void *start; /* start of channel buffer */ + void *data; /* start of current sub-buffer */ + size_t offset; /* current offset into sub-buffer */ + size_t subbufs_produced; /* count of sub-buffers produced */ + size_t subbufs_consumed; /* count of sub-buffers consumed */ + struct rchan *chan; /* associated channel */ + wait_queue_head_t read_wait; /* reader wait queue */ + struct work_struct wake_readers; /* reader wake-up work struct */ + struct dentry *dentry; /* channel file dentry */ + struct kref kref; /* channel buffer refcount */ + struct page **page_array; /* array of current buffer pages */ + unsigned int page_count; /* number of current buffer pages */ + unsigned int finalized; /* buffer has been finalized */ + size_t *padding; /* padding counts per sub-buffer */ + size_t prev_padding; /* temporary variable */ + size_t bytes_consumed; /* bytes consumed in cur read subbuf */ + unsigned int cpu; /* this buf's cpu */ +} ____cacheline_aligned; + +/* + * Relay channel data structure + */ +struct rchan +{ + u32 version; /* the version of this struct */ + size_t subbuf_size; /* sub-buffer size */ + size_t n_subbufs; /* number of sub-buffers per buffer */ + size_t alloc_size; /* total buffer size allocated */ + struct rchan_callbacks *cb; /* client callbacks */ + struct kref kref; /* channel refcount */ + void *private_data; /* for user-defined data */ + struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */ +}; + +/* + * Relayfs inode + */ +struct relayfs_inode_info +{ + struct inode vfs_inode; + struct rchan_buf *buf; +}; + +static inline struct relayfs_inode_info *RELAYFS_I(struct inode *inode) +{ + return container_of(inode, struct relayfs_inode_info, vfs_inode); +} + +/* + * Relay channel client callbacks + */ +struct rchan_callbacks +{ + /* + * subbuf_start - called on buffer-switch to a new sub-buffer + * @buf: the channel buffer containing the new sub-buffer + * @subbuf: the start of the new sub-buffer + * @prev_subbuf: the start of the previous sub-buffer + * @prev_padding: unused space at the end of previous sub-buffer + * + * The client should return 1 to continue logging, 0 to stop + * logging. + * + * NOTE: subbuf_start will also be invoked when the buffer is + * created, so that the first sub-buffer can be initialized + * if necessary. In this case, prev_subbuf will be NULL. + * + * NOTE: the client can reserve bytes at the beginning of the new + * sub-buffer by calling subbuf_start_reserve() in this callback. + */ + int (*subbuf_start) (struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + size_t prev_padding); + + /* + * buf_mapped - relayfs buffer mmap notification + * @buf: the channel buffer + * @filp: relayfs file pointer + * + * Called when a relayfs file is successfully mmapped + */ + void (*buf_mapped)(struct rchan_buf *buf, + struct file *filp); + + /* + * buf_unmapped - relayfs buffer unmap notification + * @buf: the channel buffer + * @filp: relayfs file pointer + * + * Called when a relayfs file is successfully unmapped + */ + void (*buf_unmapped)(struct rchan_buf *buf, + struct file *filp); +}; + +/* + * relayfs kernel API, fs/relayfs/relay.c + */ + +struct rchan *relay_open(const char *base_filename, + struct dentry *parent, + size_t subbuf_size, + size_t n_subbufs, + struct rchan_callbacks *cb); +extern void relay_close(struct rchan *chan); +extern void relay_flush(struct rchan *chan); +extern void relay_subbufs_consumed(struct rchan *chan, + unsigned int cpu, + size_t consumed); +extern void relay_reset(struct rchan *chan); +extern int relay_buf_full(struct rchan_buf *buf); + +extern size_t relay_switch_subbuf(struct rchan_buf *buf, + size_t length); +extern struct dentry *relayfs_create_dir(const char *name, + struct dentry *parent); +extern int relayfs_remove_dir(struct dentry *dentry); + +/** + * relay_write - write data into the channel + * @chan: relay channel + * @data: data to be written + * @length: number of bytes to write + * + * Writes data into the current cpu's channel buffer. + * + * Protects the buffer by disabling interrupts. Use this + * if you might be logging from interrupt context. Try + * __relay_write() if you know you won't be logging from + * interrupt context. + */ +static inline void relay_write(struct rchan *chan, + const void *data, + size_t length) +{ + unsigned long flags; + struct rchan_buf *buf; + + local_irq_save(flags); + buf = chan->buf[smp_processor_id()]; + if (unlikely(buf->offset + length > chan->subbuf_size)) + length = relay_switch_subbuf(buf, length); + memcpy(buf->data + buf->offset, data, length); + buf->offset += length; + local_irq_restore(flags); +} + +/** + * __relay_write - write data into the channel + * @chan: relay channel + * @data: data to be written + * @length: number of bytes to write + * + * Writes data into the current cpu's channel buffer. + * + * Protects the buffer by disabling preemption. Use + * relay_write() if you might be logging from interrupt + * context. + */ +static inline void __relay_write(struct rchan *chan, + const void *data, + size_t length) +{ + struct rchan_buf *buf; + + buf = chan->buf[get_cpu()]; + if (unlikely(buf->offset + length > buf->chan->subbuf_size)) + length = relay_switch_subbuf(buf, length); + memcpy(buf->data + buf->offset, data, length); + buf->offset += length; + put_cpu(); +} + +/** + * relay_reserve - reserve slot in channel buffer + * @chan: relay channel + * @length: number of bytes to reserve + * + * Returns pointer to reserved slot, NULL if full. + * + * Reserves a slot in the current cpu's channel buffer. + * Does not protect the buffer at all - caller must provide + * appropriate synchronization. + */ +static inline void *relay_reserve(struct rchan *chan, size_t length) +{ + void *reserved; + struct rchan_buf *buf = chan->buf[smp_processor_id()]; + + if (unlikely(buf->offset + length > buf->chan->subbuf_size)) { + length = relay_switch_subbuf(buf, length); + if (!length) + return NULL; + } + reserved = buf->data + buf->offset; + buf->offset += length; + + return reserved; +} + +/** + * subbuf_start_reserve - reserve bytes at the start of a sub-buffer + * @buf: relay channel buffer + * @length: number of bytes to reserve + * + * Helper function used to reserve bytes at the beginning of + * a sub-buffer in the subbuf_start() callback. + */ +static inline void subbuf_start_reserve(struct rchan_buf *buf, + size_t length) +{ + BUG_ON(length >= buf->chan->subbuf_size - 1); + buf->offset = length; +} + +/* + * exported relayfs file operations, fs/relayfs/inode.c + */ + +extern struct file_operations relayfs_file_operations; + +#endif /* _LINUX_RELAYFS_FS_H */ + -- cgit v1.2.2 From 202e5979af4d91c7ca05892641131dee22653259 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 6 Sep 2005 15:16:40 -0700 Subject: [PATCH] compat: be more consistent about [ug]id_t When I first wrote the compat layer patches, I was somewhat cavalier about the definition of compat_uid_t and compat_gid_t (or maybe I just misunderstood :-)). This patch makes the compat types much more consistent with the types we are being compatible with and hopefully will fix a few bugs along the way. compat type type in compat arch __compat_[ug]id_t __kernel_[ug]id_t __compat_[ug]id32_t __kernel_[ug]id32_t compat_[ug]id_t [ug]id_t The difference is that compat_uid_t is always 32 bits (for the archs we care about) but __compat_uid_t may be 16 bits on some. Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index b58b7d6f2fdb..f9ca534787e2 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -18,6 +18,9 @@ #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) +typedef __compat_uid32_t compat_uid_t; +typedef __compat_gid32_t compat_gid_t; + struct rusage; struct compat_itimerspec { -- cgit v1.2.2 From 022a4a7bbdefdedc2706a13c81c832d8c3173c6d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 6 Sep 2005 15:16:41 -0700 Subject: [PATCH] fs/jbd/: cleanups This patch contains the following cleanups: - make needlessly global functions static - journal.c: remove the unused global function __journal_internal_check and move the check to journal_init - remove the following write-only global variable: - journal.c: current_journal - remove the following unneeded EXPORT_SYMBOL: - journal.c: journal_recover Signed-off-by: Adrian Bunk Acked-by: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/jbd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 593407e865b1..84321a4cac93 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -914,7 +914,6 @@ extern int journal_wipe (journal_t *, int); extern int journal_skip_recovery (journal_t *); extern void journal_update_superblock (journal_t *, int); extern void __journal_abort_hard (journal_t *); -extern void __journal_abort_soft (journal_t *, int); extern void journal_abort (journal_t *, int); extern int journal_errno (journal_t *); extern void journal_ack_err (journal_t *); -- cgit v1.2.2 From 3676347a5e216a7fec7f8eedbbcf8bed6b9c4e40 Mon Sep 17 00:00:00 2001 From: Peter Osterlund Date: Tue, 6 Sep 2005 15:16:42 -0700 Subject: [PATCH] kill bio->bi_set Jens: ->bi_set is totally unnecessary bloat of struct bio. Just define a proper destructor for the bio and it already knows what bio_set it belongs too. Peter: Fixed the bugs. Signed-off-by: Jens Axboe Signed-off-by: Peter Osterlund Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 36ef29fa0d8b..69e047989f1c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -111,7 +111,6 @@ struct bio { void *bi_private; bio_destructor_t *bi_destructor; /* destructor */ - struct bio_set *bi_set; /* memory pools set */ }; /* @@ -280,6 +279,7 @@ extern void bioset_free(struct bio_set *); extern struct bio *bio_alloc(unsigned int __nocast, int); extern struct bio *bio_alloc_bioset(unsigned int __nocast, int, struct bio_set *); extern void bio_put(struct bio *); +extern void bio_free(struct bio *, struct bio_set *); extern void bio_endio(struct bio *, unsigned int, int); struct request_queue; -- cgit v1.2.2 From 5dd42c262bd742fa3602180bbe5550b4828de8f3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 6 Sep 2005 15:16:43 -0700 Subject: [PATCH] remove register_ioctl32_conversion and unregister_ioctl32_conversion All users have been converted. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioctl32.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioctl32.h b/include/linux/ioctl32.h index e8c4af32b3bb..948809d99917 100644 --- a/include/linux/ioctl32.h +++ b/include/linux/ioctl32.h @@ -14,26 +14,4 @@ struct ioctl_trans { struct ioctl_trans *next; }; -/* - * Register an 32bit ioctl translation handler for ioctl cmd. - * - * handler == NULL: use 64bit ioctl handler. - * arguments to handler: fd: file descriptor - * cmd: ioctl command. - * arg: ioctl argument - * struct file *file: file descriptor pointer. - */ - -#ifdef CONFIG_COMPAT -extern int __deprecated register_ioctl32_conversion(unsigned int cmd, - ioctl_trans_handler_t handler); -extern int __deprecated unregister_ioctl32_conversion(unsigned int cmd); - -#else - -#define register_ioctl32_conversion(cmd, handler) ({ 0; }) -#define unregister_ioctl32_conversion(cmd) ({ 0; }) - -#endif - #endif -- cgit v1.2.2 From 36d57ac4a818cb4aa3edbdf63ad2ebc31106f925 Mon Sep 17 00:00:00 2001 From: "H. J. Lu" Date: Tue, 6 Sep 2005 15:16:49 -0700 Subject: [PATCH] auxiliary vector cleanups The size of auxiliary vector is fixed at 42 in linux/sched.h. But it isn't very obvious when looking at linux/elf.h. This patch adds AT_VECTOR_SIZE so that we can change it if necessary when a new vector is added. Because of include file ordering problems, doing this necessitated the extraction of the AT_* symbols into a standalone header file. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auxvec.h | 31 +++++++++++++++++++++++++++++++ include/linux/elf.h | 24 +----------------------- include/linux/sched.h | 4 +++- 3 files changed, 35 insertions(+), 24 deletions(-) create mode 100644 include/linux/auxvec.h (limited to 'include/linux') diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h new file mode 100644 index 000000000000..9a7b374c9fb4 --- /dev/null +++ b/include/linux/auxvec.h @@ -0,0 +1,31 @@ +#ifndef _LINUX_AUXVEC_H +#define _LINUX_AUXVEC_H + +#include + +/* Symbolic values for the entries in the auxiliary table + put on the initial stack */ +#define AT_NULL 0 /* end of vector */ +#define AT_IGNORE 1 /* entry should be ignored */ +#define AT_EXECFD 2 /* file descriptor of program */ +#define AT_PHDR 3 /* program headers for program */ +#define AT_PHENT 4 /* size of program header entry */ +#define AT_PHNUM 5 /* number of program headers */ +#define AT_PAGESZ 6 /* system page size */ +#define AT_BASE 7 /* base address of interpreter */ +#define AT_FLAGS 8 /* flags */ +#define AT_ENTRY 9 /* entry point of program */ +#define AT_NOTELF 10 /* program is not ELF */ +#define AT_UID 11 /* real uid */ +#define AT_EUID 12 /* effective uid */ +#define AT_GID 13 /* real gid */ +#define AT_EGID 14 /* effective gid */ +#define AT_PLATFORM 15 /* string identifying CPU for optimizations */ +#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ +#define AT_CLKTCK 17 /* frequency at which times() increments */ + +#define AT_SECURE 23 /* secure mode boolean */ + +#define AT_VECTOR_SIZE 42 /* Size of auxiliary table. */ + +#endif /* _LINUX_AUXVEC_H */ diff --git a/include/linux/elf.h b/include/linux/elf.h index f5b3ba5a317d..ff955dbf510d 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -2,6 +2,7 @@ #define _LINUX_ELF_H #include +#include #include #ifndef elf_read_implies_exec @@ -158,29 +159,6 @@ typedef __s64 Elf64_Sxword; #define ELF64_ST_BIND(x) ELF_ST_BIND(x) #define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) -/* Symbolic values for the entries in the auxiliary table - put on the initial stack */ -#define AT_NULL 0 /* end of vector */ -#define AT_IGNORE 1 /* entry should be ignored */ -#define AT_EXECFD 2 /* file descriptor of program */ -#define AT_PHDR 3 /* program headers for program */ -#define AT_PHENT 4 /* size of program header entry */ -#define AT_PHNUM 5 /* number of program headers */ -#define AT_PAGESZ 6 /* system page size */ -#define AT_BASE 7 /* base address of interpreter */ -#define AT_FLAGS 8 /* flags */ -#define AT_ENTRY 9 /* entry point of program */ -#define AT_NOTELF 10 /* program is not ELF */ -#define AT_UID 11 /* real uid */ -#define AT_EUID 12 /* effective uid */ -#define AT_GID 13 /* real gid */ -#define AT_EGID 14 /* effective gid */ -#define AT_PLATFORM 15 /* string identifying CPU for optimizations */ -#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ -#define AT_CLKTCK 17 /* frequency at which times() increments */ - -#define AT_SECURE 23 /* secure mode boolean */ - typedef struct dynamic{ Elf32_Sword d_tag; union{ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5fb31bede103..b5a22ea80045 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -35,6 +35,8 @@ #include #include +#include /* For AT_VECTOR_SIZE */ + struct exec_domain; /* @@ -261,7 +263,7 @@ struct mm_struct { mm_counter_t _rss; mm_counter_t _anon_rss; - unsigned long saved_auxv[42]; /* for /proc/PID/auxv */ + unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ unsigned dumpable:2; cpumask_t cpu_vm_mask; -- cgit v1.2.2 From 8fc2751beb0941966d3a97b26544e8585e428c08 Mon Sep 17 00:00:00 2001 From: Mark Bellon Date: Tue, 6 Sep 2005 15:16:54 -0700 Subject: [PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon Acked-by: Dave Kleikamp Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs.h | 3 +++ include/linux/ext3_fs.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index a657130ba03a..f7bd1c7ebefb 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -313,6 +313,9 @@ struct ext2_inode { #define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ #define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ #define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ +#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ +#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ + #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt #define set_opt(o, opt) o |= EXT2_MOUNT_##opt diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index c16662836c58..c0272d73ab20 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -373,6 +373,8 @@ struct ext3_inode { #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ #define EXT3_MOUNT_NOBH 0x40000 /* No bufferheads */ #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ +#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ +#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H -- cgit v1.2.2 From 2865cf001878d22d5fd12e5215621dffbcad76dc Mon Sep 17 00:00:00 2001 From: Zhigang Huo Date: Tue, 6 Sep 2005 15:17:00 -0700 Subject: [PATCH] remove pipe definitions These no longer have any users. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 36725e7c02c6..1767073df26f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -39,9 +39,6 @@ struct pipe_inode_info { #define PIPE_SEM(inode) (&(inode).i_sem) #define PIPE_WAIT(inode) (&(inode).i_pipe->wait) -#define PIPE_BASE(inode) ((inode).i_pipe->base) -#define PIPE_START(inode) ((inode).i_pipe->start) -#define PIPE_LEN(inode) ((inode).i_pipe->len) #define PIPE_READERS(inode) ((inode).i_pipe->readers) #define PIPE_WRITERS(inode) ((inode).i_pipe->writers) #define PIPE_WAITING_WRITERS(inode) ((inode).i_pipe->waiting_writers) -- cgit v1.2.2 From d2052c1676a39cae101a81f3da8a4ade8b668c88 Mon Sep 17 00:00:00 2001 From: Erik Waling Date: Tue, 6 Sep 2005 15:17:02 -0700 Subject: [PATCH] sonypi SPIC initialisation fix Newer Sony VAIO models (VGN-S480, VGN-S460, VGN-S3XP etc) use a new method to initialize the SPIC device. The new way to initialize (and disable) the device comes directly from the AML code in the _CRS, _SRS and _DIS methods from the DSDT table. This patch adds support for the new models. Signed-off-by: Erik Waling Signed-off-by: Stelian Pop Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sonypi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h index 768cbba617d0..f56d24734950 100644 --- a/include/linux/sonypi.h +++ b/include/linux/sonypi.h @@ -99,6 +99,8 @@ #define SONYPI_EVENT_BATTERY_INSERT 57 #define SONYPI_EVENT_BATTERY_REMOVE 58 #define SONYPI_EVENT_FNKEY_RELEASED 59 +#define SONYPI_EVENT_WIRELESS_ON 60 +#define SONYPI_EVENT_WIRELESS_OFF 61 /* get/set brightness */ #define SONYPI_IOCGBRT _IOR('v', 0, __u8) -- cgit v1.2.2 From e139aa595c5d3bd01699530cbe017dec75fdb07f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 6 Sep 2005 15:17:05 -0700 Subject: [PATCH] PNP: make pnp_dbg conditional directly on CONFIG_PNP_DEBUG Seems pointless to require .c files to test CONFIG_PNP_DEBUG and conditionally define DEBUG before including . Just test CONFIG_PNP_DEBUG directly in pnp.h. Signed-off-by: Bjorn Helgaas Cc: Adam Belay Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pnp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 5ec2bd0c2848..aadbac29103c 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -443,7 +443,7 @@ static inline void pnp_unregister_driver(struct pnp_driver *drv) { ; } #define pnp_info(format, arg...) printk(KERN_INFO "pnp: " format "\n" , ## arg) #define pnp_warn(format, arg...) printk(KERN_WARNING "pnp: " format "\n" , ## arg) -#ifdef DEBUG +#ifdef CONFIG_PNP_DEBUG #define pnp_dbg(format, arg...) printk(KERN_DEBUG "pnp: " format "\n" , ## arg) #else #define pnp_dbg(format, arg...) do {} while (0) -- cgit v1.2.2 From f35279d3f713e5c97b98cbdbf47d98f79942c11f Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Tue, 6 Sep 2005 15:17:08 -0700 Subject: [PATCH] sunrpc: cache_register can use wrong module reference When registering an RPC cache, cache_register() always sets the owner as the sunrpc module. However, there are RPC caches owned by other modules. With the incorrect owner setting, the real owning module can be removed potentially with an open reference to the cache from userspace. For example, if one were to stop the nfs server and unmount the nfsd filesystem, the nfsd module could be removed eventhough rpc.idmapd had references to the idtoname and nametoid caches (i.e. /proc/net/rpc/nfs4./channel is still open). This resulted in a system panic on one of our machines when attempting to restart the nfs services after reloading the nfsd module. The following patch adds a 'struct module *owner' field in struct cache_detail. The owner is further assigned to the struct proc_dir_entry in cache_register() so that the module cannot be unloaded while user-space daemons have an open reference on the associated file under /proc. Signed-off-by: Bruce Allan Cc: Trond Myklebust Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 6864063d1b9f..c4e3ea7cf154 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -60,6 +60,7 @@ struct cache_head { #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ struct cache_detail { + struct module * owner; int hash_size; struct cache_head ** hash_table; rwlock_t hash_lock; -- cgit v1.2.2 From 19b4946ca9d1e35d4c641dcebe27378de34f3ddd Mon Sep 17 00:00:00 2001 From: Mike Waychison Date: Tue, 6 Sep 2005 15:17:10 -0700 Subject: [PATCH] ipc: convert /proc/sysvipc/* to generic seq_file interface Change the /proc/sysvipc/shm|sem|msg files to use the generic seq_file implementation for struct ipc_ids. Signed-off-by: Mike Waychison Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 1 + include/linux/sem.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msg.h b/include/linux/msg.h index 2c4c6aa643ff..903e0ab8101f 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -77,6 +77,7 @@ struct msg_msg { /* one msq_queue structure for each present queue on the system */ struct msg_queue { struct kern_ipc_perm q_perm; + int q_id; time_t q_stime; /* last msgsnd time */ time_t q_rtime; /* last msgrcv time */ time_t q_ctime; /* last change time */ diff --git a/include/linux/sem.h b/include/linux/sem.h index 2d8516be9fd7..106f9757339a 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -88,6 +88,7 @@ struct sem { /* One sem_array data structure for each set of semaphores in the system. */ struct sem_array { struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ + int sem_id; time_t sem_otime; /* last semop time */ time_t sem_ctime; /* last change time */ struct sem *sem_base; /* ptr to first semaphore in array */ -- cgit v1.2.2 From 6e3eaab02028c4087a92711b20abb9e72cc803a7 Mon Sep 17 00:00:00 2001 From: Abhay Salunke Date: Tue, 6 Sep 2005 15:17:13 -0700 Subject: [PATCH] modified firmware_class.c to support no hotplug Upgrade the request_firmware_nowait function to not start the hotplug action on a firmware update. This patch is tested along with dell_rbu driver on i386 and x86-64 systems. Signed-off-by: Abhay Salunke Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/firmware.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 886255b69bb9..2063c0839d4f 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -3,6 +3,9 @@ #include #include #define FIRMWARE_NAME_MAX 30 +#define FW_ACTION_NOHOTPLUG 0 +#define FW_ACTION_HOTPLUG 1 + struct firmware { size_t size; u8 *data; @@ -11,7 +14,7 @@ struct device; int request_firmware(const struct firmware **fw, const char *name, struct device *device); int request_firmware_nowait( - struct module *module, + struct module *module, int hotplug, const char *name, struct device *device, void *context, void (*cont)(const struct firmware *fw, void *context)); -- cgit v1.2.2 From f26fdd59929e1144c6caf72adcaf4561d6e682a4 Mon Sep 17 00:00:00 2001 From: Karsten Wiese Date: Tue, 6 Sep 2005 15:17:25 -0700 Subject: [PATCH] CHECK_IRQ_PER_CPU() to avoid dead code in __do_IRQ() IRQ_PER_CPU is not used by all architectures. This patch introduces the macros ARCH_HAS_IRQ_PER_CPU and CHECK_IRQ_PER_CPU() to avoid the generation of dead code in __do_IRQ(). ARCH_HAS_IRQ_PER_CPU is defined by architectures using IRQ_PER_CPU in their include/asm_ARCH/irq.h file. Through grepping the tree I found the following architectures currently use IRQ_PER_CPU: cris, ia64, ppc, ppc64 and parisc. Signed-off-by: Karsten Wiese Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/irq.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 4a362b9ec966..69681c3b1f05 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -32,7 +32,12 @@ #define IRQ_WAITING 32 /* IRQ not yet seen - for autodetection */ #define IRQ_LEVEL 64 /* IRQ level triggered */ #define IRQ_MASKED 128 /* IRQ masked - shouldn't be seen again */ -#define IRQ_PER_CPU 256 /* IRQ is per CPU */ +#if defined(ARCH_HAS_IRQ_PER_CPU) +# define IRQ_PER_CPU 256 /* IRQ is per CPU */ +# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) +#else +# define CHECK_IRQ_PER_CPU(var) 0 +#endif /* * Interrupt controller descriptor. This is all we need -- cgit v1.2.2 From f23ef184b486ac021b6a471b4e94cfa04860d3b0 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 6 Sep 2005 15:17:29 -0700 Subject: [PATCH] Delete unused do_nanosleep declaration There is no do_nanosleep function so kill it's declaration in . Signed-off-by: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 5634497ff5df..c10d4c21c183 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -97,7 +97,6 @@ extern int do_settimeofday(struct timespec *tv); extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); extern void clock_was_set(void); // call when ever the clock is set extern int do_posix_clock_monotonic_gettime(struct timespec *tp); -extern long do_nanosleep(struct timespec *t); extern long do_utimes(char __user * filename, struct timeval * times); struct itimerval; extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); -- cgit v1.2.2 From 2832e9366a1fcd6f76957a42157be041240f994e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Sep 2005 15:17:38 -0700 Subject: [PATCH] remove file.f_maxcount struct file cleanup: f_maxcount has an unique value (INT_MAX). Just use the hard-wired value. Signed-off-by: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 67e6732d4fdc..2036747c7d1f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -594,7 +594,6 @@ struct file { unsigned int f_uid, f_gid; struct file_ra_state f_ra; - size_t f_maxcount; unsigned long f_version; void *f_security; -- cgit v1.2.2 From b149ee2233edf08fb59b11e879a2c5941929bcb8 Mon Sep 17 00:00:00 2001 From: john stultz Date: Tue, 6 Sep 2005 15:17:46 -0700 Subject: [PATCH] NTP: ntp-helper functions This patch cleans up a commonly repeated set of changes to the NTP state variables by adding two helper inline functions: ntp_clear(): Clears the ntp state variables ntp_synced(): Returns 1 if the system is synced with a time server. This was compile tested for alpha, arm, i386, x86-64, ppc64, s390, sparc, sparc64. Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/timex.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 74fdd07d3792..7e050a2cc35b 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -260,6 +260,29 @@ extern long pps_calcnt; /* calibration intervals */ extern long pps_errcnt; /* calibration errors */ extern long pps_stbcnt; /* stability limit exceeded */ +/** + * ntp_clear - Clears the NTP state variables + * + * Must be called while holding a write on the xtime_lock + */ +static inline void ntp_clear(void) +{ + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; +} + +/** + * ntp_synced - Returns 1 if the NTP status is not UNSYNC + * + */ +static inline int ntp_synced(void) +{ + return !(time_status & STA_UNSYNC); +} + + #ifdef CONFIG_TIME_INTERPOLATION #define TIME_SOURCE_CPU 0 -- cgit v1.2.2 From 9c45817f41af987277353e463c78a1c6beb37da2 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 6 Sep 2005 15:17:47 -0700 Subject: [PATCH] Remove non-arch consumers of asm/segment.h asm/segment.h varies greatly on different architectures but is clearly deprecated. Removing all non-architecture consumers will make it easier for us to get ride of asm/segment.h all together. Signed-off-by: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/isdn.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/isdn.h b/include/linux/isdn.h index 862083eb58ab..53eaee96065b 100644 --- a/include/linux/isdn.h +++ b/include/linux/isdn.h @@ -150,7 +150,6 @@ typedef struct { #include #include #include -#include #include #include #include -- cgit v1.2.2 From 7ea6040b0eff07d3a9a4e2d248ac137c6ad02d42 Mon Sep 17 00:00:00 2001 From: John McCutchan Date: Tue, 6 Sep 2005 15:18:02 -0700 Subject: [PATCH] inotify: fix event loss on hardlinked files People have run into a problem when they do this: watch (file1, all_events); watch (file2, some_events); if file2 is a hard link to file1, some events will be missed because by default we replace the mask. The patch below adds a flag IN_MASK_ADD which will cause inotify to add to the existing mask if present. Signed-off-by: John McCutchan Signed-off-by: Robert Love Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/inotify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/inotify.h b/include/linux/inotify.h index 93bb3afe646b..ee5b239092ed 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -47,6 +47,7 @@ struct inotify_event { #define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) /* moves */ /* special flags */ +#define IN_MASK_ADD 0x20000000 /* add to the mask of an already existing watch */ #define IN_ISDIR 0x40000000 /* event occurred against dir */ #define IN_ONESHOT 0x80000000 /* only send event once */ -- cgit v1.2.2 From f90b1d2f1aaaa40c6519a32e69615edc25bb97d5 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Tue, 6 Sep 2005 15:18:10 -0700 Subject: [PATCH] cpusets: new __GFP_HARDWALL flag Add another GFP flag: __GFP_HARDWALL. A subsequent "cpuset_zone_allowed" patch will use this flag to mark GFP_USER allocations, and distinguish them from GFP_KERNEL allocations. Allocations (such as GFP_USER) marked GFP_HARDWALL are constrainted to the current tasks cpuset. Other allocations (such as GFP_KERNEL) can steal from the possibly larger nearest mem_exclusive cpuset ancestor, if memory is tight on every node in the current cpuset. This patch collides with Mel Gorman's patch to reduce fragmentation in the standard buddy allocator, which adds two GFP flags. This was discussed on linux-mm in July. Most likely, one of his flags for user reclaimable memory can be the same as my __GFP_HARDWALL flag, under some generic name meaning its user address space memory. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 7c7400137e97..4dc990f3b5cc 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -40,6 +40,7 @@ struct vm_area_struct; #define __GFP_ZERO 0x8000u /* Return zeroed page on success */ #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */ #define __GFP_NORECLAIM 0x20000u /* No realy zone reclaim during allocation */ +#define __GFP_HARDWALL 0x40000u /* Enforce hardwall cpuset memory allocs */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) @@ -48,14 +49,15 @@ struct vm_area_struct; #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ - __GFP_NOMEMALLOC|__GFP_NORECLAIM) + __GFP_NOMEMALLOC|__GFP_NORECLAIM|__GFP_HARDWALL) #define GFP_ATOMIC (__GFP_HIGH) #define GFP_NOIO (__GFP_WAIT) #define GFP_NOFS (__GFP_WAIT | __GFP_IO) #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) -#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS) -#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM) +#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ + __GFP_HIGHMEM) /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ -- cgit v1.2.2 From 9bf2229f8817677127a60c177aefce1badd22d7b Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Tue, 6 Sep 2005 15:18:12 -0700 Subject: [PATCH] cpusets: formalize intermediate GFP_KERNEL containment This patch makes use of the previously underutilized cpuset flag 'mem_exclusive' to provide what amounts to another layer of memory placement resolution. With this patch, there are now the following four layers of memory placement available: 1) The whole system (interrupt and GFP_ATOMIC allocations can use this), 2) The nearest enclosing mem_exclusive cpuset (GFP_KERNEL allocations can use), 3) The current tasks cpuset (GFP_USER allocations constrained to here), and 4) Specific node placement, using mbind and set_mempolicy. These nest - each layer is a subset (same or within) of the previous. Layer (2) above is new, with this patch. The call used to check whether a zone (its node, actually) is in a cpuset (in its mems_allowed, actually) is extended to take a gfp_mask argument, and its logic is extended, in the case that __GFP_HARDWALL is not set in the flag bits, to look up the cpuset hierarchy for the nearest enclosing mem_exclusive cpuset, to determine if placement is allowed. The definition of GFP_USER, which used to be identical to GFP_KERNEL, is changed to also set the __GFP_HARDWALL bit, in the previous cpuset_gfp_hardwall_flag patch. GFP_ATOMIC and GFP_KERNEL allocations will stay within the current tasks cpuset, so long as any node therein is not too tight on memory, but will escape to the larger layer, if need be. The intended use is to allow something like a batch manager to handle several jobs, each job in its own cpuset, but using common kernel memory for caches and such. Swapper and oom_kill activity is also constrained to Layer (2). A task in or below one mem_exclusive cpuset should not cause swapping on nodes in another non-overlapping mem_exclusive cpuset, nor provoke oom_killing of a task in another such cpuset. Heavy use of kernel memory for i/o caching and such by one job should not impact the memory available to jobs in other non-overlapping mem_exclusive cpusets. This patch enables providing hardwall, inescapable cpusets for memory allocations of each job, while sharing kernel memory allocations between several jobs, in an enclosing mem_exclusive cpuset. Like Dinakar's patch earlier to enable administering sched domains using the cpu_exclusive flag, this patch also provides a useful meaning to a cpuset flag that had previously done nothing much useful other than restrict what cpuset configurations were allowed. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 3438233305a3..1fe1c3ebad30 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -23,7 +23,7 @@ void cpuset_init_current_mems_allowed(void); void cpuset_update_current_mems_allowed(void); void cpuset_restrict_to_mems_allowed(unsigned long *nodes); int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); -int cpuset_zone_allowed(struct zone *z); +extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask); extern struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); @@ -48,7 +48,8 @@ static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) return 1; } -static inline int cpuset_zone_allowed(struct zone *z) +static inline int cpuset_zone_allowed(struct zone *z, + unsigned int __nocast gfp_mask) { return 1; } -- cgit v1.2.2 From ef08e3b4981aebf2ba9bd7025ef7210e8eec07ce Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Tue, 6 Sep 2005 15:18:13 -0700 Subject: [PATCH] cpusets: confine oom_killer to mem_exclusive cpuset Now the real motivation for this cpuset mem_exclusive patch series seems trivial. This patch keeps a task in or under one mem_exclusive cpuset from provoking an oom kill of a task under a non-overlapping mem_exclusive cpuset. Since only interrupt and GFP_ATOMIC allocations are allowed to escape mem_exclusive containment, there is little to gain from oom killing a task under a non-overlapping mem_exclusive cpuset, as almost all kernel and user memory allocation must come from disjoint memory nodes. This patch enables configuring a system so that a runaway job under one mem_exclusive cpuset cannot cause the killing of a job in another such cpuset that might be using very high compute and memory resources for a prolonged time. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 1fe1c3ebad30..24062a1dbf61 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -24,6 +24,7 @@ void cpuset_update_current_mems_allowed(void); void cpuset_restrict_to_mems_allowed(unsigned long *nodes); int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask); +extern int cpuset_excl_nodes_overlap(const struct task_struct *p); extern struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); @@ -54,6 +55,11 @@ static inline int cpuset_zone_allowed(struct zone *z, return 1; } +static inline int cpuset_excl_nodes_overlap(const struct task_struct *p) +{ + return 1; +} + static inline char *cpuset_task_status_allowed(struct task_struct *task, char *buffer) { -- cgit v1.2.2 From 9c1cfda20a508b181bdda8c0045f7c0c333880a5 Mon Sep 17 00:00:00 2001 From: John Hawkes Date: Tue, 6 Sep 2005 15:18:14 -0700 Subject: [PATCH] cpusets: Move the ia64 domain setup code to the generic code Signed-off-by: John Hawkes Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 7 ------- include/linux/topology.h | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b5a22ea80045..ea1b5f32ec5c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -564,13 +564,6 @@ struct sched_domain { extern void partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2); -#ifdef ARCH_HAS_SCHED_DOMAIN -/* Useful helpers that arch setup code may use. Defined in kernel/sched.c */ -extern cpumask_t cpu_isolated_map; -extern void init_sched_build_groups(struct sched_group groups[], - cpumask_t span, int (*group_fn)(int cpu)); -extern void cpu_attach_domain(struct sched_domain *sd, int cpu); -#endif /* ARCH_HAS_SCHED_DOMAIN */ #endif /* CONFIG_SMP */ diff --git a/include/linux/topology.h b/include/linux/topology.h index 0320225e96da..3df1d474e5c5 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -135,6 +135,29 @@ } #endif +/* sched_domains SD_ALLNODES_INIT for NUMA machines */ +#define SD_ALLNODES_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 64, \ + .max_interval = 64*num_online_cpus(), \ + .busy_factor = 128, \ + .imbalance_pct = 133, \ + .cache_hot_time = (10*1000000), \ + .cache_nice_tries = 1, \ + .busy_idx = 3, \ + .idle_idx = 3, \ + .newidle_idx = 0, /* unused */ \ + .wake_idx = 0, /* unused */ \ + .forkexec_idx = 0, /* unused */ \ + .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 64, \ + .nr_balance_failed = 0, \ +} + #ifdef CONFIG_NUMA #ifndef SD_NODE_INIT #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! -- cgit v1.2.2 From 3f4bb1f4199b7dc0c958447b1e4898980013b884 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Sep 2005 15:18:16 -0700 Subject: [PATCH] struct dentry: place d_hash close to d_parent and d_name to speedup lookups dentry cache uses sophisticated RCU technology (and prefetching if available) but touches 2 cache lines per dentry during hlist lookup. This patch moves d_hash in the same cache line than d_parent and d_name fields so that : 1) One cache line is needed instead of two. 2) the hlist_for_each_rcu() prefetching has a chance to bring all the needed data in advance, not only the part that includes d_hash.next. I also changed one old comment that was wrong for 64bits. A further optimisation would be to separate dentry in two parts, one that is mostly read, and one writen (d_count/d_lock) to avoid false sharing on SMP/NUMA but this would need different field placement depending on 32bits or 64bits platform. Signed-off-by: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dcache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 50be290d24d2..ab04b4f9b0db 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -88,8 +88,9 @@ struct dentry { * negative */ /* * The next three fields are touched by __d_lookup. Place them here - * so they all fit in a 16-byte range, with 16-byte alignment. + * so they all fit in a cache line. */ + struct hlist_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; @@ -103,7 +104,6 @@ struct dentry { void *d_fsdata; /* fs-specific data */ struct rcu_head d_rcu; struct dcookie_struct *d_cookie; /* cookie, if any */ - struct hlist_node d_hash; /* lookup hash list */ int d_mounted; unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; -- cgit v1.2.2 From e89bbd3a0b3c054d9a94feb0db7bbae1cdb99e54 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:21 -0700 Subject: [PATCH] remove iattr.ia_attr_flags Remove unused ia_attr_flags from struct iattr, and related defines. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2036747c7d1f..57e294bf83f4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -281,18 +281,8 @@ struct iattr { struct timespec ia_atime; struct timespec ia_mtime; struct timespec ia_ctime; - unsigned int ia_attr_flags; }; -/* - * This is the inode attributes flag definitions - */ -#define ATTR_FLAG_SYNCRONOUS 1 /* Syncronous write */ -#define ATTR_FLAG_NOATIME 2 /* Don't update atime */ -#define ATTR_FLAG_APPEND 4 /* Append-only file */ -#define ATTR_FLAG_IMMUTABLE 8 /* Immutable file */ -#define ATTR_FLAG_NODIRATIME 16 /* Don't update atime for directory */ - /* * Includes for diskquotas. */ -- cgit v1.2.2 From ab8d11beb46f0bd0617e04205c01f5c1fe845b61 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:24 -0700 Subject: [PATCH] remove duplicated code from proc and ptrace Extract common code used by ptrace_attach() and may_ptrace_attach() into a separate function. Signed-off-by: Miklos Szeredi Cc: Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 2afdafb62123..dc6f3647bfbc 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -90,6 +90,7 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void ptrace_untrace(struct task_struct *child); +extern int ptrace_may_attach(struct task_struct *task); static inline void ptrace_link(struct task_struct *child, struct task_struct *new_parent) -- cgit v1.2.2 From e922efc342d565a38eed3af377ff403f52148864 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:25 -0700 Subject: [PATCH] remove duplicated sys_open32() code from 64bit archs 64 bit architectures all implement their own compatibility sys_open(), when in fact the difference is simply not forcing the O_LARGEFILE flag. So use the a common function instead. Signed-off-by: Miklos Szeredi Cc: Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 57e294bf83f4..7e1b589842af 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1280,6 +1280,7 @@ static inline int break_lease(struct inode *inode, unsigned int mode) /* fs/open.c */ extern int do_truncate(struct dentry *, loff_t start); +extern long do_sys_open(const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); extern int filp_close(struct file *, fl_owner_t id); -- cgit v1.2.2 From ebad6a4230bdb5927495e28bc7837f515bf667a7 Mon Sep 17 00:00:00 2001 From: Andrey Panin Date: Tue, 6 Sep 2005 15:18:29 -0700 Subject: [PATCH] dmi: add onboard devices discovery This patch adds onboard devices and IPMI BMC discovery into DMI scan code. Drivers can use dmi_find_device() function to search for devices by type and name. Signed-off-by: Andrey Panin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dmi.h | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 5e93e6dce9a4..c30175e8dec6 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -1,6 +1,8 @@ #ifndef __DMI_H__ #define __DMI_H__ +#include + enum dmi_field { DMI_NONE, DMI_BIOS_VENDOR, @@ -16,6 +18,24 @@ enum dmi_field { DMI_STRING_MAX, }; +enum dmi_device_type { + DMI_DEV_TYPE_ANY = 0, + DMI_DEV_TYPE_OTHER, + DMI_DEV_TYPE_UNKNOWN, + DMI_DEV_TYPE_VIDEO, + DMI_DEV_TYPE_SCSI, + DMI_DEV_TYPE_ETHERNET, + DMI_DEV_TYPE_TOKENRING, + DMI_DEV_TYPE_SOUND, + DMI_DEV_TYPE_IPMI = -1 +}; + +struct dmi_header { + u8 type; + u8 length; + u16 handle; +}; + /* * DMI callbacks for problem boards */ @@ -26,22 +46,32 @@ struct dmi_strmatch { struct dmi_system_id { int (*callback)(struct dmi_system_id *); - char *ident; + const char *ident; struct dmi_strmatch matches[4]; void *driver_data; }; -#define DMI_MATCH(a,b) { a, b } +#define DMI_MATCH(a, b) { a, b } + +struct dmi_device { + struct list_head list; + int type; + const char *name; + void *device_data; /* Type specific data */ +}; #if defined(CONFIG_X86) && !defined(CONFIG_X86_64) extern int dmi_check_system(struct dmi_system_id *list); extern char * dmi_get_system_info(int field); - +extern struct dmi_device * dmi_find_device(int type, const char *name, + struct dmi_device *from); #else static inline int dmi_check_system(struct dmi_system_id *list) { return 0; } static inline char * dmi_get_system_info(int field) { return NULL; } +static struct dmi_device * dmi_find_device(int type, const char *name, + struct dmi_device *from) { return NULL; } #endif -- cgit v1.2.2 From dd3927105b6f65afb7dac17682172cdfb86d3f00 Mon Sep 17 00:00:00 2001 From: Pekka J Enberg Date: Tue, 6 Sep 2005 15:18:31 -0700 Subject: [PATCH] introduce and use kzalloc This patch introduces a kzalloc wrapper and converts kernel/ to use it. It saves a little program text. Signed-off-by: Pekka Enberg Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 80b2dfde2e80..42a6bea58af3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -99,7 +99,21 @@ found: return __kmalloc(size, flags); } -extern void *kcalloc(size_t, size_t, unsigned int __nocast); +extern void *kzalloc(size_t, unsigned int __nocast); + +/** + * kcalloc - allocate memory for an array. The memory is set to zero. + * @n: number of elements. + * @size: element size. + * @flags: the type of memory to allocate. + */ +static inline void *kcalloc(size_t n, size_t size, unsigned int __nocast flags) +{ + if (n != 0 && size > INT_MAX / n) + return NULL; + return kzalloc(n * size, flags); +} + extern void kfree(const void *); extern unsigned int ksize(const void *); -- cgit v1.2.2 From c14979b993021377228958498937bcdd9539cbce Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 6 Sep 2005 15:18:38 -0700 Subject: [PATCH] ipmi: add per-channel IPMB addresses IPMI allows multiple IPMB channels on a single interface, and each channel might have a different IPMB address. However, the driver has only one IPMB address that it uses for everything. This patch adds new IOCTLS and a new internal interface for setting per-channel IPMB addresses and LUNs. New systems are coming out with support for multiple IPMB channels, and they are broken without this patch. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipmi.h | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 596ca6130159..846b69899776 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -298,13 +298,19 @@ void ipmi_get_version(ipmi_user_t user, this user, so it will affect all users of this interface. This is so some initialization code can come in and do the OEM-specific things it takes to determine your address (if not the BMC) and set - it for everyone else. */ -void ipmi_set_my_address(ipmi_user_t user, - unsigned char address); -unsigned char ipmi_get_my_address(ipmi_user_t user); -void ipmi_set_my_LUN(ipmi_user_t user, - unsigned char LUN); -unsigned char ipmi_get_my_LUN(ipmi_user_t user); + it for everyone else. Note that each channel can have its own address. */ +int ipmi_set_my_address(ipmi_user_t user, + unsigned int channel, + unsigned char address); +int ipmi_get_my_address(ipmi_user_t user, + unsigned int channel, + unsigned char *address); +int ipmi_set_my_LUN(ipmi_user_t user, + unsigned int channel, + unsigned char LUN); +int ipmi_get_my_LUN(ipmi_user_t user, + unsigned int channel, + unsigned char *LUN); /* * Like ipmi_request, but lets you specify the number of retries and @@ -585,6 +591,16 @@ struct ipmi_cmdspec * things it takes to determine your address (if not the BMC) and set * it for everyone else. You should probably leave the LUN alone. */ +struct ipmi_channel_lun_address_set +{ + unsigned short channel; + unsigned char value; +}; +#define IPMICTL_SET_MY_CHANNEL_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 24, struct ipmi_channel_lun_address_set) +#define IPMICTL_GET_MY_CHANNEL_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 25, struct ipmi_channel_lun_address_set) +#define IPMICTL_SET_MY_CHANNEL_LUN_CMD _IOR(IPMI_IOC_MAGIC, 26, struct ipmi_channel_lun_address_set) +#define IPMICTL_GET_MY_CHANNEL_LUN_CMD _IOR(IPMI_IOC_MAGIC, 27, struct ipmi_channel_lun_address_set) +/* Legacy interfaces, these only set IPMB 0. */ #define IPMICTL_SET_MY_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 17, unsigned int) #define IPMICTL_GET_MY_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 18, unsigned int) #define IPMICTL_SET_MY_LUN_CMD _IOR(IPMI_IOC_MAGIC, 19, unsigned int) -- cgit v1.2.2 From 07766f241b54d67999907d529b99ffaa61d8b7d9 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 6 Sep 2005 15:18:40 -0700 Subject: [PATCH] ipmi: allow userland to include ipmi.h The IPMI driver include file needs to include compiler.h so it has definitions for __user and such. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 846b69899776..dd30adedd07d 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -35,6 +35,7 @@ #define __LINUX_IPMI_H #include +#include /* * This file describes an interface to an IPMI driver. You have to -- cgit v1.2.2 From 56a55ec64806fb56e0cd43b0f726020b74c6689b Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 6 Sep 2005 15:18:42 -0700 Subject: [PATCH] ipmi: fix panic ipmb response The "null message handler" in the IPMI driver is used in startup and panic situations to handle messages. It was only designed to work with messages from the local management controller, but in some cases it was used to get messages from remote managmenet controllers, and the system would then panic. This patch makes the "null message handler" in the IPMI driver more general so it works with any kind of message. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipmi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index dd30adedd07d..938d55b813a5 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -242,7 +242,8 @@ struct ipmi_recv_msg /* The user_msg_data is the data supplied when a message was sent, if this is a response to a sent message. If this is not a response to a sent message, then user_msg_data will - be NULL. */ + be NULL. If the user above is NULL, then this will be the + intf. */ void *user_msg_data; /* Call this when done with the message. It will presumably free -- cgit v1.2.2 From 8c702e16207c70119d03df924de35f8c3629a5c4 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 6 Sep 2005 15:18:46 -0700 Subject: [PATCH] ipmi poweroff: fix chassis control The IPMI power control function proc_write_chassctrl was badly written, it directly used userspace pointers, it assumed that strings were NULL terminated, and it used the evil sscanf function. This converts over to using the sysctl interface for this data and changes the semantics to be a little more logical. Signed-off-by: Corey Minyard Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e82be96d4906..532a6c5c24e9 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -711,6 +711,7 @@ enum { DEV_RAID=4, DEV_MAC_HID=5, DEV_SCSI=6, + DEV_IPMI=7, }; /* /proc/sys/dev/cdrom */ @@ -776,6 +777,11 @@ enum { DEV_SCSI_LOGGING_LEVEL=1, }; +/* /proc/sys/dev/ipmi */ +enum { + DEV_IPMI_POWEROFF_POWERCYCLE=1, +}; + /* /proc/sys/abi */ enum { -- cgit v1.2.2 From 335eadf2ef6a1122a720aea98e758e5d431da87d Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Tue, 6 Sep 2005 15:18:50 -0700 Subject: [PATCH] sd: initialize SD cards Support for the Secure Digital protocol in the MMC layer. A summary of the legal issues surrounding SD cards, as understood by yours truly: Members of the Secure Digital Association, hereafter SDA, are required to sign a NDA[1] before given access to any specifications. It has been speculated that including an SD implementation would forbid these members to redistribute Linux. This is the basic problem with SD support so it is unclear if it even is a problem since it has no effect on those of us that aren't members. The SDA doesn't seem to enforce these rules though since the patches included here are based on documentation made public by some of the members. The most complete specs[2] are actually released by Sandisk, one of the founding companies of the SDA. Because of this the NDA is considered a non-issue by most involved in the discussions concerning these patches. It might be that the SDA is only interested in protecting the so called "secure" bits of SD, which so far hasn't been found in any public spec. (The card is split into two sections, one "normal" and one "secure" which has an access scheme similar to TPM:s). (As a side note, Microsoft is working to make things easier for us since they want to be able to include the source code for a SD driver in one of their development kits. HP is making sure that the new NDA will allow a Linux implementation. So far only the SDIO specs have been opened up[3]. More will hopefully follow.) [1] http://www.sdcard.org/membership/images/ippolicy.pdf [2] http://www.sandisk.com/pdf/oem/ProdManualSDCardv1.9.pdf [3] http://www.sdcard.org/sdio/Simplified%20SDIO%20Card%20Specification.pdf This patch contains the central parts of the SD support. If no MMC cards are found on a bus then the MMC layer proceeds looking for SD cards. Helper functions are extended to handle the special needs of SD cards. Signed-off-by: Pierre Ossman Cc: Russell King Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmc/card.h | 3 +++ include/linux/mmc/host.h | 4 ++++ include/linux/mmc/mmc.h | 2 ++ 3 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index aefedf04b9bb..538e8c86336c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -47,6 +47,7 @@ struct mmc_card { #define MMC_STATE_PRESENT (1<<0) /* present in sysfs */ #define MMC_STATE_DEAD (1<<1) /* device no longer in stack */ #define MMC_STATE_BAD (1<<2) /* unrecognised device */ +#define MMC_STATE_SDCARD (1<<3) /* is an SD card */ u32 raw_cid[4]; /* raw card CID */ u32 raw_csd[4]; /* raw card CSD */ struct mmc_cid cid; /* card identification */ @@ -56,10 +57,12 @@ struct mmc_card { #define mmc_card_present(c) ((c)->state & MMC_STATE_PRESENT) #define mmc_card_dead(c) ((c)->state & MMC_STATE_DEAD) #define mmc_card_bad(c) ((c)->state & MMC_STATE_BAD) +#define mmc_card_sd(c) ((c)->state & MMC_STATE_SDCARD) #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) #define mmc_card_set_dead(c) ((c)->state |= MMC_STATE_DEAD) #define mmc_card_set_bad(c) ((c)->state |= MMC_STATE_BAD) +#define mmc_card_set_sd(c) ((c)->state |= MMC_STATE_SDCARD) #define mmc_card_name(c) ((c)->cid.prod_name) #define mmc_card_id(c) ((c)->dev.bus_id) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 30f68c0c8c6e..845020d90c60 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -87,6 +87,10 @@ struct mmc_host { struct mmc_ios ios; /* current io bus settings */ u32 ocr; /* the current OCR setting */ + unsigned int mode; /* current card mode of host */ +#define MMC_MODE_MMC 0 +#define MMC_MODE_SD 1 + struct list_head cards; /* devices attached to this host */ wait_queue_head_t wq; diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 0d35d4ffb360..1ab78e8d6c53 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -88,6 +88,8 @@ struct mmc_card; extern int mmc_wait_for_req(struct mmc_host *, struct mmc_request *); extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int); +extern int mmc_wait_for_app_cmd(struct mmc_host *, unsigned int, + struct mmc_command *, int); extern int __mmc_claim_host(struct mmc_host *host, struct mmc_card *card); -- cgit v1.2.2 From a00fc09029f02ca833cf90e5d5625f08c4ac4f51 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Tue, 6 Sep 2005 15:18:52 -0700 Subject: [PATCH] sd: read-only switch Support for the read-only switch on SD cards which must be enforced by the host. Signed-off-by: Pierre Ossman Cc: Russell King Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmc/card.h | 3 +++ include/linux/mmc/host.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 538e8c86336c..0e9ec01b9c5b 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -48,6 +48,7 @@ struct mmc_card { #define MMC_STATE_DEAD (1<<1) /* device no longer in stack */ #define MMC_STATE_BAD (1<<2) /* unrecognised device */ #define MMC_STATE_SDCARD (1<<3) /* is an SD card */ +#define MMC_STATE_READONLY (1<<4) /* card is read-only */ u32 raw_cid[4]; /* raw card CID */ u32 raw_csd[4]; /* raw card CSD */ struct mmc_cid cid; /* card identification */ @@ -58,11 +59,13 @@ struct mmc_card { #define mmc_card_dead(c) ((c)->state & MMC_STATE_DEAD) #define mmc_card_bad(c) ((c)->state & MMC_STATE_BAD) #define mmc_card_sd(c) ((c)->state & MMC_STATE_SDCARD) +#define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY) #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) #define mmc_card_set_dead(c) ((c)->state |= MMC_STATE_DEAD) #define mmc_card_set_bad(c) ((c)->state |= MMC_STATE_BAD) #define mmc_card_set_sd(c) ((c)->state |= MMC_STATE_SDCARD) +#define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY) #define mmc_card_name(c) ((c)->cid.prod_name) #define mmc_card_id(c) ((c)->dev.bus_id) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 845020d90c60..8c5f71376e41 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -62,6 +62,7 @@ struct mmc_ios { struct mmc_host_ops { void (*request)(struct mmc_host *host, struct mmc_request *req); void (*set_ios)(struct mmc_host *host, struct mmc_ios *ios); + int (*get_ro)(struct mmc_host *host); }; struct mmc_card; -- cgit v1.2.2 From b57c43ad81602589afca3948a5a7121e40026e17 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Tue, 6 Sep 2005 15:18:53 -0700 Subject: [PATCH] sd: SCR register Read the SD specific SCR register from the card. Signed-off-by: Pierre Ossman Cc: Russell King Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmc/card.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 0e9ec01b9c5b..18fc77f682de 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -33,6 +33,13 @@ struct mmc_csd { unsigned int capacity; }; +struct sd_scr { + unsigned char sda_vsn; + unsigned char bus_widths; +#define SD_SCR_BUS_WIDTH_1 (1<<0) +#define SD_SCR_BUS_WIDTH_4 (1<<2) +}; + struct mmc_host; /* @@ -51,8 +58,10 @@ struct mmc_card { #define MMC_STATE_READONLY (1<<4) /* card is read-only */ u32 raw_cid[4]; /* raw card CID */ u32 raw_csd[4]; /* raw card CSD */ + u32 raw_scr[2]; /* raw card SCR */ struct mmc_cid cid; /* card identification */ struct mmc_csd csd; /* card specific */ + struct sd_scr scr; /* extra SD information */ }; #define mmc_card_present(c) ((c)->state & MMC_STATE_PRESENT) -- cgit v1.2.2 From f218278a456b3c272b480443c89004c3d2a49f18 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Tue, 6 Sep 2005 15:18:55 -0700 Subject: [PATCH] sd: SD 4-bit bus Infrastructure for 4-bit bus transfers with SD cards. Signed-off-by: Pierre Ossman Cc: Russell King Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmc/host.h | 9 +++++++++ include/linux/mmc/protocol.h | 7 +++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8c5f71376e41..6014160d9c06 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -57,6 +57,11 @@ struct mmc_ios { #define MMC_POWER_OFF 0 #define MMC_POWER_UP 1 #define MMC_POWER_ON 2 + + unsigned char bus_width; /* data bus width */ + +#define MMC_BUS_WIDTH_1 0 +#define MMC_BUS_WIDTH_4 2 }; struct mmc_host_ops { @@ -77,6 +82,10 @@ struct mmc_host { unsigned int f_max; u32 ocr_avail; + unsigned long caps; /* Host capabilities */ + +#define MMC_CAP_4_BIT_DATA (1 << 0) /* Can the host do 4 bit transfers */ + /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ unsigned short max_hw_segs; /* see blk_queue_max_hw_segments */ diff --git a/include/linux/mmc/protocol.h b/include/linux/mmc/protocol.h index 896342817b97..f819cae92266 100644 --- a/include/linux/mmc/protocol.h +++ b/include/linux/mmc/protocol.h @@ -236,5 +236,12 @@ struct _mmc_csd { #define CSD_SPEC_VER_2 2 /* Implements system specification 2.0 - 2.2 */ #define CSD_SPEC_VER_3 3 /* Implements system specification 3.1 */ + +/* + * SD bus widths + */ +#define SD_BUS_WIDTH_1 0 +#define SD_BUS_WIDTH_4 2 + #endif /* MMC_MMC_PROTOCOL_H */ -- cgit v1.2.2 From 3158106685acac8f8d4e74a17b974f160fe77c0b Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Tue, 6 Sep 2005 15:19:06 -0700 Subject: [PATCH] Input: Add a new switch event type The corgi keyboard has need of a switch event type with slightly type to the input system as recommended by the input maintainer. Signed-off-by: Richard Purdie Cc: Vojtech Pavlik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/input.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index bdc53c6cc962..4767e5429534 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -66,6 +66,7 @@ struct input_absinfo { #define EVIOCGKEY(len) _IOC(_IOC_READ, 'E', 0x18, len) /* get global keystate */ #define EVIOCGLED(len) _IOC(_IOC_READ, 'E', 0x19, len) /* get all LEDs */ #define EVIOCGSND(len) _IOC(_IOC_READ, 'E', 0x1a, len) /* get all sounds status */ +#define EVIOCGSW(len) _IOC(_IOC_READ, 'E', 0x1b, len) /* get all switch states */ #define EVIOCGBIT(ev,len) _IOC(_IOC_READ, 'E', 0x20 + ev, len) /* get event bits */ #define EVIOCGABS(abs) _IOR('E', 0x40 + abs, struct input_absinfo) /* get abs value/limits */ @@ -86,6 +87,7 @@ struct input_absinfo { #define EV_REL 0x02 #define EV_ABS 0x03 #define EV_MSC 0x04 +#define EV_SW 0x05 #define EV_LED 0x11 #define EV_SND 0x12 #define EV_REP 0x14 @@ -550,6 +552,20 @@ struct input_absinfo { #define ABS_MISC 0x28 #define ABS_MAX 0x3f +/* + * Switch events + */ + +#define SW_0 0x00 +#define SW_1 0x01 +#define SW_2 0x02 +#define SW_3 0x03 +#define SW_4 0x04 +#define SW_5 0x05 +#define SW_6 0x06 +#define SW_7 0x07 +#define SW_MAX 0x0f + /* * Misc events */ @@ -824,6 +840,7 @@ struct input_dev { unsigned long ledbit[NBITS(LED_MAX)]; unsigned long sndbit[NBITS(SND_MAX)]; unsigned long ffbit[NBITS(FF_MAX)]; + unsigned long swbit[NBITS(SW_MAX)]; int ff_effects_max; unsigned int keycodemax; @@ -844,6 +861,7 @@ struct input_dev { unsigned long key[NBITS(KEY_MAX)]; unsigned long led[NBITS(LED_MAX)]; unsigned long snd[NBITS(SND_MAX)]; + unsigned long sw[NBITS(SW_MAX)]; int absmax[ABS_MAX + 1]; int absmin[ABS_MAX + 1]; @@ -886,6 +904,7 @@ struct input_dev { #define INPUT_DEVICE_ID_MATCH_LEDBIT 0x200 #define INPUT_DEVICE_ID_MATCH_SNDBIT 0x400 #define INPUT_DEVICE_ID_MATCH_FFBIT 0x800 +#define INPUT_DEVICE_ID_MATCH_SWBIT 0x1000 #define INPUT_DEVICE_ID_MATCH_DEVICE\ (INPUT_DEVICE_ID_MATCH_BUS | INPUT_DEVICE_ID_MATCH_VENDOR | INPUT_DEVICE_ID_MATCH_PRODUCT) @@ -906,6 +925,7 @@ struct input_device_id { unsigned long ledbit[NBITS(LED_MAX)]; unsigned long sndbit[NBITS(SND_MAX)]; unsigned long ffbit[NBITS(FF_MAX)]; + unsigned long swbit[NBITS(SW_MAX)]; unsigned long driver_info; }; @@ -998,6 +1018,11 @@ static inline void input_report_ff_status(struct input_dev *dev, unsigned int co input_event(dev, EV_FF_STATUS, code, value); } +static inline void input_report_switch(struct input_dev *dev, unsigned int code, int value) +{ + input_event(dev, EV_SW, code, !!value); +} + static inline void input_regs(struct input_dev *dev, struct pt_regs *regs) { dev->regs = regs; -- cgit v1.2.2 From a7662236253374012d364106b6dc9161bd929e2e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Sep 2005 15:19:10 -0700 Subject: [PATCH] Make ll_rw_block() wait for buffer lock Introduce new ll_rw_block() operation SWRITE meaning that block layer should wait for the buffer lock and write-out afterwards. Hence data in buffers at the time of call are guaranteed to be submitted to the disk. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7e1b589842af..fd93ab7da905 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -69,6 +69,7 @@ extern int dir_notify_enable; #define READ 0 #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ +#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ #define SPECIAL 4 /* For non-blockdevice requests in request queue */ #define READ_SYNC (READ | (1 << BIO_RW_SYNC)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) -- cgit v1.2.2 From d0aaff9796c3310326d10da44fc0faed352a1d29 Mon Sep 17 00:00:00 2001 From: Prasanna S Panchamukhi Date: Tue, 6 Sep 2005 15:19:26 -0700 Subject: [PATCH] Kprobes: prevent possible race conditions generic There are possible race conditions if probes are placed on routines within the kprobes files and routines used by the kprobes. For example if you put probe on get_kprobe() routines, the system can hang while inserting probes on any routine such as do_fork(). Because while inserting probes on do_fork(), register_kprobes() routine grabs the kprobes spin lock and executes get_kprobe() routine and to handle probe of get_kprobe(), kprobes_handler() gets executed and tries to grab kprobes spin lock, and spins forever. This patch avoids such possible race conditions by preventing probes on routines within the kprobes file and routines used by kprobes. I have modified the patches as per Andi Kleen's suggestion to move kprobes routines and other routines used by kprobes to a seperate section .kprobes.text. Also moved page fault and exception handlers, general protection fault to .kprobes.text section. These patches have been tested on i386, x86_64 and ppc64 architectures, also compiled on ia64 and sparc64 architectures. Signed-off-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 3 +++ include/linux/linkage.h | 7 +++++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e050fc2d4c26..e30afdca7917 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -42,6 +42,9 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 +/* Attach to insert probes on any functions which should be ignored*/ +#define __kprobes __attribute__((__section__(".kprobes.text"))) + struct kprobe; struct pt_regs; struct kretprobe; diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 338f7795d8a0..147eb01e0d4b 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -33,6 +33,13 @@ ALIGN; \ name: +#define KPROBE_ENTRY(name) \ + .section .kprobes.text, "ax"; \ + .globl name; \ + ALIGN; \ + name: + + #endif #define NORET_TYPE /**/ -- cgit v1.2.2 From 34bb61f9ddabd7a7f909cbfb05592eb775f6662a Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 6 Sep 2005 16:56:51 -0700 Subject: [PATCH] fix klist semantics for lists which have elements removed on traversal The problem is that klists claim to provide semantics for safe traversal of lists which are being modified. The failure case is when traversal of a list causes element removal (a fairly common case). The issue is that although the list node is refcounted, if it is embedded in an object (which is universally the case), then the object will be freed regardless of the klist refcount leading to slab corruption because the klist iterator refers to the prior element to get the next. The solution is to make the klist take and release references to the embedding object meaning that the embedding object won't be released until the list relinquishes the reference to it. (akpm: fast-track this because it's needed for the 2.6.13 scsi merge) Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/klist.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/klist.h b/include/linux/klist.h index c4d1fae4dd89..74071254c9d3 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -17,15 +17,17 @@ #include #include - +struct klist_node; struct klist { spinlock_t k_lock; struct list_head k_list; + void (*get)(struct klist_node *); + void (*put)(struct klist_node *); }; -extern void klist_init(struct klist * k); - +extern void klist_init(struct klist * k, void (*get)(struct klist_node *), + void (*put)(struct klist_node *)); struct klist_node { struct klist * n_klist; -- cgit v1.2.2