From b67b4b117746aef686e527c3205792db0f2c9e16 Mon Sep 17 00:00:00 2001 From: Dominic Curran Date: Mon, 27 Oct 2008 22:30:53 -0400 Subject: Input: gpio-keys - add flag to allow auto repeat This patch adds a flag to gpio-key driver to turn on the input subsystems auto repeat feature if needed. Signed-off-by: Dominic Curran Signed-off-by: Dmitry Torokhov --- include/linux/gpio_keys.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index ec6ecd74781d..1289fa7623ca 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -15,6 +15,7 @@ struct gpio_keys_button { struct gpio_keys_platform_data { struct gpio_keys_button *buttons; int nbuttons; + unsigned int rep:1; /* enable input subsystem auto repeat */ }; #endif -- cgit v1.2.2 From 5e1f8c9e20a92743eefc9a82c2db835213905e26 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 28 Oct 2008 13:21:55 -0400 Subject: ext3: Add support for non-native signed/unsigned htree hash algorithms The original ext3 hash algorithms assumed that variables of type char were signed, as God and K&R intended. Unfortunately, this assumption is not true on some architectures. Userspace support for marking filesystems with non-native signed/unsigned chars was added two years ago, but the kernel-side support was never added (until now). Signed-off-by: "Theodore Ts'o" Cc: akpm@linux-foundation.org Cc: linux-kernel@vger.kernel.org --- include/linux/ext3_fs.h | 28 +++++++++++++++++++++++++++- include/linux/ext3_fs_sb.h | 1 + 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index d14f02918483..9004794a35fe 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -353,6 +353,13 @@ struct ext3_inode { #define EXT3_ERROR_FS 0x0002 /* Errors detected */ #define EXT3_ORPHAN_FS 0x0004 /* Orphans being recovered */ +/* + * Misc. filesystem flags + */ +#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ +#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ +#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ + /* * Mount flags */ @@ -489,7 +496,23 @@ struct ext3_super_block { __u16 s_reserved_word_pad; __le32 s_default_mount_opts; __le32 s_first_meta_bg; /* First metablock block group */ - __u32 s_reserved[190]; /* Padding to the end of the block */ + __le32 s_mkfs_time; /* When the filesystem was created */ + __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ + /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ +/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ + __le32 s_r_blocks_count_hi; /* Reserved blocks count */ + __le32 s_free_blocks_count_hi; /* Free blocks count */ + __le16 s_min_extra_isize; /* All inodes have at least # bytes */ + __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ + __le32 s_flags; /* Miscellaneous flags */ + __le16 s_raid_stride; /* RAID stride */ + __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ + __le64 s_mmp_block; /* Block for multi-mount protection */ + __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ + __u8 s_log_groups_per_flex; /* FLEX_BG group size */ + __u8 s_reserved_char_pad2; + __le16 s_reserved_pad; + __u32 s_reserved[162]; /* Padding to the end of the block */ }; #ifdef __KERNEL__ @@ -694,6 +717,9 @@ static inline __le16 ext3_rec_len_to_disk(unsigned len) #define DX_HASH_LEGACY 0 #define DX_HASH_HALF_MD4 1 #define DX_HASH_TEA 2 +#define DX_HASH_LEGACY_UNSIGNED 3 +#define DX_HASH_HALF_MD4_UNSIGNED 4 +#define DX_HASH_TEA_UNSIGNED 5 #ifdef __KERNEL__ diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h index e024e38248ff..a4e9216b3a6d 100644 --- a/include/linux/ext3_fs_sb.h +++ b/include/linux/ext3_fs_sb.h @@ -57,6 +57,7 @@ struct ext3_sb_info { u32 s_next_generation; u32 s_hash_seed[4]; int s_def_hash_version; + int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; -- cgit v1.2.2 From a20c7ab570ffdce1d6f67c7acf8c1c502a3b3839 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 16 Oct 2008 18:43:48 +0400 Subject: [MTD] sharpsl-nand: use platform_data for model-specific values Add platform_data which holds all model-specific values, like badblocks pattern, oobinfo, partitions. Signed-off-by: Dmitry Baryshkov --- include/linux/mtd/sharpsl.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/mtd/sharpsl.h (limited to 'include/linux') diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h new file mode 100644 index 000000000000..25f4d2a845c1 --- /dev/null +++ b/include/linux/mtd/sharpsl.h @@ -0,0 +1,20 @@ +/* + * SharpSL NAND support + * + * Copyright (C) 2008 Dmitry Baryshkov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +struct sharpsl_nand_platform_data { + struct nand_bbt_descr *badblock_pattern; + struct nand_ecclayout *ecc_layout; + struct mtd_partition *partitions; + unsigned int nr_partitions; +}; -- cgit v1.2.2 From 171bbfbeab7730031eec8025341401fabe540bd5 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 25 Nov 2008 17:42:31 -0500 Subject: jbd2: Add BH_JBDPrivateStart Add this so that file systems using JBD2 can safely allocate unused b_state bits. In this case, we add it so that Ocfs2 can define a single bit for tracking the validation state of a buffer. Signed-off-by: Mark Fasheh Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index c7d106ef22e2..f36645745489 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -329,6 +329,7 @@ enum jbd_state_bits { BH_State, /* Pins most journal_head state */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ + BH_JBDPrivateStart, /* First bit available for private use by FS */ }; BUFFER_FNS(JBD, jbd) -- cgit v1.2.2 From e07f7183a486cf9783d1f8c9d2997b5b39eeb2d4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 26 Nov 2008 01:14:26 -0500 Subject: jbd2: improve jbd2 fsync batching This patch removes the static sleep time in favor of a more self optimizing approach where we measure the average amount of time it takes to commit a transaction to disk and the ammount of time a transaction has been running. If somebody does a sync write or an fsync() traditionally we would sleep for 1 jiffies, which depending on the value of HZ could be a significant amount of time compared to how long it takes to commit a transaction to the underlying storage. With this patch instead of sleeping for a jiffie, we check to see if the amount of time this transaction has been running is less than the average commit time, and if it is we sleep for the delta using schedule_hrtimeout to give us a higher precision sleep time. This greatly benefits high end storage where you could end up sleeping for longer than it takes to commit the transaction and therefore sitting idle instead of allowing the transaction to be committed by keeping the sleep time to a minimum so you are sure to always be doing something. Signed-off-by: Josef Bacik Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f36645745489..ab8cef130c28 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -637,6 +637,11 @@ struct transaction_s */ unsigned long t_expires; + /* + * When this transaction started, in nanoseconds [no locking] + */ + ktime_t t_start_time; + /* * How many handles used this transaction? [t_handle_lock] */ @@ -939,8 +944,18 @@ struct journal_s struct buffer_head **j_wbuf; int j_wbufsize; + /* + * this is the pid of hte last person to run a synchronous operation + * through the journal + */ pid_t j_last_sync_writer; + /* + * the average amount of time in nanoseconds it takes to commit a + * transaction to disk. [j_state_lock] + */ + u64 j_average_commit_time; + /* This function is called when a transaction is closed */ void (*j_commit_callback)(journal_t *, transaction_t *); -- cgit v1.2.2 From 30773840c19cea60dcef39545960d541b1ac1cf8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 20:27:38 -0500 Subject: ext4: add fsync batch tuning knobs Add new mount options, min_batch_time and max_batch_time, which controls how long the jbd2 layer should wait for additional filesystem operations to get batched with a synchronous write transaction. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ab8cef130c28..a3cd647ea1bc 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -956,6 +956,14 @@ struct journal_s */ u64 j_average_commit_time; + /* + * minimum and maximum times that we should wait for + * additional filesystem operations to get batched into a + * synchronous handle in microseconds + */ + u32 j_min_batch_time; + u32 j_max_batch_time; + /* This function is called when a transaction is closed */ void (*j_commit_callback)(journal_t *, transaction_t *); -- cgit v1.2.2 From 1a0d3786dd57dbd74f340322054c3d618b999dcf Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Nov 2008 00:09:22 -0500 Subject: jbd2: Remove a large array of bh's from the stack of the checkpoint routine jbd2_log_do_checkpoint()n is one of the kernel's largest stack users. Move the array of buffer head's from the stack of jbd2_log_do_checkpoint() to the in-core journal structure. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a3cd647ea1bc..004c9a8d63ed 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -687,6 +687,8 @@ jbd2_time_diff(unsigned long start, unsigned long end) return end + (MAX_JIFFY_OFFSET - start); } +#define JBD2_NR_BATCH 64 + /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. @@ -830,6 +832,14 @@ struct journal_s /* Semaphore for locking against concurrent checkpoints */ struct mutex j_checkpoint_mutex; + /* + * List of buffer heads used by the checkpoint routine. This + * was moved from jbd2_log_do_checkpoint() to reduce stack + * usage. Access to this array is controlled by the + * j_checkpoint_mutex. [j_checkpoint_mutex] + */ + struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH]; + /* * Journal head: identifies the first unused block in the journal. * [j_state_lock] -- cgit v1.2.2 From fb68407b0d9efba962c03f55009c797e22f024bc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 6 Nov 2008 17:50:21 -0500 Subject: jbd2: Call journal commit callback without holding j_list_lock Avoid freeing the transaction in __jbd2_journal_drop_transaction() so the journal commit callback can run without holding j_list_lock, to avoid lock contention on this spinlock. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 004c9a8d63ed..9d82084a1605 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1179,8 +1179,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid); int jbd2_log_do_checkpoint(journal_t *journal); void __jbd2_log_wait_for_space(journal_t *journal); -extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); -extern int jbd2_cleanup_journal_tail(journal_t *); +extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); +extern int jbd2_cleanup_journal_tail(journal_t *); /* Debugging code only: */ -- cgit v1.2.2 From 3eb1aa43ef5cb871ba3fb2f08633675eca374d2e Mon Sep 17 00:00:00 2001 From: Jaya Kumar Date: Wed, 19 Nov 2008 16:58:50 -0500 Subject: Input: add support for Wacom W8001 penabled serial touchscreen The Wacom W8001 sensor is a sensor device (uses electromagnetic resonance) and it is interfaced via its serial microcontroller to the host. Signed-off-by: Jaya Kumar Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serio.h b/include/linux/serio.h index 25641d9e0ea8..1bcb357a01a1 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -213,5 +213,6 @@ static inline void serio_unpin_driver(struct serio *serio) #define SERIO_ZHENHUA 0x36 #define SERIO_INEXIO 0x37 #define SERIO_TOUCHIT213 0x37 +#define SERIO_W8001 0x39 #endif -- cgit v1.2.2 From a2d781fc8d9b16113dd9440107d73c0f21d7cbef Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 19 Nov 2008 17:02:24 -0500 Subject: Input: libps2 - handle 0xfc responses from devices Signed-off-by: Dmitry Torokhov --- include/linux/libps2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libps2.h b/include/linux/libps2.h index afc413369101..b94534b7e266 100644 --- a/include/linux/libps2.h +++ b/include/linux/libps2.h @@ -18,11 +18,13 @@ #define PS2_RET_ID 0x00 #define PS2_RET_ACK 0xfa #define PS2_RET_NAK 0xfe +#define PS2_RET_ERR 0xfc #define PS2_FLAG_ACK 1 /* Waiting for ACK/NAK */ #define PS2_FLAG_CMD 2 /* Waiting for command to finish */ #define PS2_FLAG_CMD1 4 /* Waiting for the first byte of command response */ #define PS2_FLAG_WAITID 8 /* Command execiting is GET ID */ +#define PS2_FLAG_NAK 16 /* Last transmission was NAKed */ struct ps2dev { struct serio *serio; -- cgit v1.2.2 From 193da6092764ab693da7170c5badbf60d7758c1d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 26 Nov 2008 12:03:54 +0100 Subject: fuse: move FUSE_MINOR to miscdevice.h Move FUSE_MINOR to miscdevice.h. While at it, de-uglify the file. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 3 --- include/linux/miscdevice.h | 42 +++++++++++++++++++++--------------------- 2 files changed, 21 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 350fe9767bbc..7caa473306e4 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -40,9 +40,6 @@ /** The major number of the fuse character device */ #define FUSE_MAJOR MISC_MAJOR -/** The minor number of the fuse character device */ -#define FUSE_MINOR 229 - /* Make sure all structures are padded to 64bit boundary, so 32bit userspace works under 64bit kernels */ diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 26433ec520b3..a820f816a49e 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -3,33 +3,33 @@ #include #include -#define PSMOUSE_MINOR 1 -#define MS_BUSMOUSE_MINOR 2 -#define ATIXL_BUSMOUSE_MINOR 3 -/*#define AMIGAMOUSE_MINOR 4 FIXME OBSOLETE */ -#define ATARIMOUSE_MINOR 5 -#define SUN_MOUSE_MINOR 6 -#define APOLLO_MOUSE_MINOR 7 -#define PC110PAD_MINOR 9 -/*#define ADB_MOUSE_MINOR 10 FIXME OBSOLETE */ +#define PSMOUSE_MINOR 1 +#define MS_BUSMOUSE_MINOR 2 +#define ATIXL_BUSMOUSE_MINOR 3 +/*#define AMIGAMOUSE_MINOR 4 FIXME OBSOLETE */ +#define ATARIMOUSE_MINOR 5 +#define SUN_MOUSE_MINOR 6 +#define APOLLO_MOUSE_MINOR 7 +#define PC110PAD_MINOR 9 +/*#define ADB_MOUSE_MINOR 10 FIXME OBSOLETE */ #define WATCHDOG_MINOR 130 /* Watchdog timer */ #define TEMP_MINOR 131 /* Temperature Sensor */ -#define RTC_MINOR 135 +#define RTC_MINOR 135 #define EFI_RTC_MINOR 136 /* EFI Time services */ -#define SUN_OPENPROM_MINOR 139 +#define SUN_OPENPROM_MINOR 139 #define DMAPI_MINOR 140 /* DMAPI */ -#define NVRAM_MINOR 144 -#define SGI_MMTIMER 153 +#define NVRAM_MINOR 144 +#define SGI_MMTIMER 153 #define STORE_QUEUE_MINOR 155 -#define I2O_MINOR 166 +#define I2O_MINOR 166 #define MICROCODE_MINOR 184 -#define MWAVE_MINOR 219 /* ACP/Mwave Modem */ -#define MPT_MINOR 220 -#define MISC_DYNAMIC_MINOR 255 - -#define TUN_MINOR 200 -#define HPET_MINOR 228 -#define KVM_MINOR 232 +#define TUN_MINOR 200 +#define MWAVE_MINOR 219 /* ACP/Mwave Modem */ +#define MPT_MINOR 220 +#define HPET_MINOR 228 +#define FUSE_MINOR 229 +#define KVM_MINOR 232 +#define MISC_DYNAMIC_MINOR 255 struct device; -- cgit v1.2.2 From 59efec7b903987dcb60b9ebc85c7acd4443a11a1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 26 Nov 2008 12:03:55 +0100 Subject: fuse: implement ioctl support Generic ioctl support is tricky to implement because only the ioctl implementation itself knows which memory regions need to be read and/or written. To support this, fuse client can request retry of ioctl specifying memory regions to read and write. Deep copying (nested pointers) can be implemented by retrying multiple times resolving one depth of dereference at a time. For security and cleanliness considerations, ioctl implementation has restricted mode where the kernel determines data transfer directions and sizes using the _IOC_*() macros on the ioctl command. In this mode, retry is not allowed. For all FUSE servers, restricted mode is enforced. Unrestricted ioctl will be used by CUSE. Plese read the comment on top of fs/fuse/file.c::fuse_file_do_ioctl() for more information. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 7caa473306e4..608e300ae883 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -148,6 +148,21 @@ struct fuse_file_lock { */ #define FUSE_READ_LOCKOWNER (1 << 1) +/** + * Ioctl flags + * + * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine + * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed + * FUSE_IOCTL_RETRY: retry with new iovecs + * + * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs + */ +#define FUSE_IOCTL_COMPAT (1 << 0) +#define FUSE_IOCTL_UNRESTRICTED (1 << 1) +#define FUSE_IOCTL_RETRY (1 << 2) + +#define FUSE_IOCTL_MAX_IOV 256 + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -185,6 +200,7 @@ enum fuse_opcode { FUSE_INTERRUPT = 36, FUSE_BMAP = 37, FUSE_DESTROY = 38, + FUSE_IOCTL = 39, }; /* The read buffer is required to be at least 8k, but may be much larger */ @@ -385,6 +401,22 @@ struct fuse_bmap_out { __u64 block; }; +struct fuse_ioctl_in { + __u64 fh; + __u32 flags; + __u32 cmd; + __u64 arg; + __u32 in_size; + __u32 out_size; +}; + +struct fuse_ioctl_out { + __s32 result; + __u32 flags; + __u32 in_iovs; + __u32 out_iovs; +}; + struct fuse_in_header { __u32 len; __u32 opcode; -- cgit v1.2.2 From 8599396b5062bf6bd2a0b433503849e2322df1c2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 26 Nov 2008 12:03:55 +0100 Subject: fuse: implement unsolicited notification Clients always used to write only in response to read requests. To implement poll efficiently, clients should be able to issue unsolicited notifications. This patch implements basic notification support. Zero fuse_out_header.unique is now accepted and considered unsolicited notification and the error field contains notification code. This patch doesn't implement any actual notification. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 608e300ae883..abde9949e2c0 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -203,6 +203,10 @@ enum fuse_opcode { FUSE_IOCTL = 39, }; +enum fuse_notify_code { + FUSE_NOTIFY_CODE_MAX, +}; + /* The read buffer is required to be at least 8k, but may be much larger */ #define FUSE_MIN_READ_BUFFER 8192 -- cgit v1.2.2 From 95668a69a4bb862063c4d28a746e55107dee7b98 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 26 Nov 2008 12:03:55 +0100 Subject: fuse: implement poll support Implement poll support. Polled files are indexed using kh in a RB tree rooted at fuse_conn->polled_files. Client should send FUSE_NOTIFY_POLL notification once after processing FUSE_POLL which has FUSE_POLL_SCHEDULE_NOTIFY set. Sending notification unconditionally after the latest poll or everytime file content might have changed is inefficient but won't cause malfunction. fuse_file_poll() can sleep and requires patches from the following thread which allows f_op->poll() to sleep. http://thread.gmane.org/gmane.linux.kernel/726176 Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index abde9949e2c0..5650cf033e73 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -163,6 +163,13 @@ struct fuse_file_lock { #define FUSE_IOCTL_MAX_IOV 256 +/** + * Poll flags + * + * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify + */ +#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -201,9 +208,11 @@ enum fuse_opcode { FUSE_BMAP = 37, FUSE_DESTROY = 38, FUSE_IOCTL = 39, + FUSE_POLL = 40, }; enum fuse_notify_code { + FUSE_NOTIFY_POLL = 1, FUSE_NOTIFY_CODE_MAX, }; @@ -421,6 +430,22 @@ struct fuse_ioctl_out { __u32 out_iovs; }; +struct fuse_poll_in { + __u64 fh; + __u64 kh; + __u32 flags; + __u32 padding; +}; + +struct fuse_poll_out { + __u32 revents; + __u32 padding; +}; + +struct fuse_notify_poll_wakeup_out { + __u64 kh; +}; + struct fuse_in_header { __u32 len; __u32 opcode; -- cgit v1.2.2 From 1f55ed06cf0c361b293b32e5947d35d173eff2aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 1 Dec 2008 19:14:02 +0100 Subject: fuse: update interface version Change interface version to 7.11 after adding the IOCTL and POLL messages. Also clean up the header a bit: - update copyright date to 2008 - fix checkpatch warning: WARNING: Use #include instead of - remove FUSE_MAJOR define, which is not being used any more Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 5650cf033e73..162e5defe683 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -1,6 +1,6 @@ /* FUSE: Filesystem in Userspace - Copyright (C) 2001-2006 Miklos Szeredi + Copyright (C) 2001-2008 Miklos Szeredi This program can be distributed under the terms of the GNU GPL. See the file COPYING. @@ -20,26 +20,27 @@ * * 7.10 * - add nonseekable open flag + * + * 7.11 + * - add IOCTL message + * - add unsolicited notification support + * - add POLL message and NOTIFY_POLL notification */ #ifndef _LINUX_FUSE_H #define _LINUX_FUSE_H -#include -#include +#include /** Version number of this interface */ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 10 +#define FUSE_KERNEL_MINOR_VERSION 11 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 -/** The major number of the fuse character device */ -#define FUSE_MAJOR MISC_MAJOR - /* Make sure all structures are padded to 64bit boundary, so 32bit userspace works under 64bit kernels */ -- cgit v1.2.2 From 69423d99fc182a81f3c5db3eb5c140acc6fc64be Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 10 Dec 2008 13:37:21 +0000 Subject: [MTD] update internal API to support 64-bit device size MTD internal API presently uses 32-bit values to represent device size. This patch updates them to 64-bits but leaves the external API unchanged. Extending the external API is a separate issue for several reasons. First, no one needs it at the moment. Secondly, whether the implementation is done with IOCTLs, sysfs or both is still debated. Thirdly external API changes require the internal API to be accepted first. Note that although the MTD API will be able to support 64-bit device sizes, existing drivers do not and are not required to do so, although NAND base has been updated. In general, changing from 32-bit to 64-bit values cause little or no changes to the majority of the code with the following exceptions: - printk message formats - division and modulus of 64-bit values - NAND base support - 32-bit local variables used by mtdpart and mtdconcat - naughtily assuming one structure maps to another in MEMERASE ioctl Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 57 ++++++++++++++++++++++++++++++++++++------ include/linux/mtd/nand.h | 2 +- include/linux/mtd/partitions.h | 4 +-- 3 files changed, 52 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index eae26bb6430a..95e585ecc297 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -15,6 +15,8 @@ #include #include +#include + #define MTD_CHAR_MAJOR 90 #define MTD_BLOCK_MAJOR 31 #define MAX_MTD_DEVICES 32 @@ -25,16 +27,16 @@ #define MTD_ERASE_DONE 0x08 #define MTD_ERASE_FAILED 0x10 -#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff +#define MTD_FAIL_ADDR_UNKNOWN -1LL /* If the erase fails, fail_addr might indicate exactly which block failed. If fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not specific to any particular block. */ struct erase_info { struct mtd_info *mtd; - u_int32_t addr; - u_int32_t len; - u_int32_t fail_addr; + uint64_t addr; + uint64_t len; + uint64_t fail_addr; u_long time; u_long retries; u_int dev; @@ -46,7 +48,7 @@ struct erase_info { }; struct mtd_erase_region_info { - u_int32_t offset; /* At which this region starts, from the beginning of the MTD */ + uint64_t offset; /* At which this region starts, from the beginning of the MTD */ u_int32_t erasesize; /* For this region */ u_int32_t numblocks; /* Number of blocks of erasesize in this region */ unsigned long *lockmap; /* If keeping bitmap of locks */ @@ -101,7 +103,7 @@ struct mtd_oob_ops { struct mtd_info { u_char type; u_int32_t flags; - u_int32_t size; // Total size of the MTD + uint64_t size; // Total size of the MTD /* "Major" erase size for the device. Naïve users may take this * to be the only erase size available, or may use the more detailed @@ -120,6 +122,16 @@ struct mtd_info { u_int32_t oobsize; // Amount of OOB data per block (e.g. 16) u_int32_t oobavail; // Available OOB bytes per block + /* + * If erasesize is a power of 2 then the shift is stored in + * erasesize_shift otherwise erasesize_shift is zero. Ditto writesize. + */ + unsigned int erasesize_shift; + unsigned int writesize_shift; + /* Masks based on erasesize_shift and writesize_shift */ + unsigned int erasesize_mask; + unsigned int writesize_mask; + // Kernel-only stuff starts here. const char *name; int index; @@ -190,8 +202,8 @@ struct mtd_info { void (*sync) (struct mtd_info *mtd); /* Chip-supported device locking */ - int (*lock) (struct mtd_info *mtd, loff_t ofs, size_t len); - int (*unlock) (struct mtd_info *mtd, loff_t ofs, size_t len); + int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); + int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); /* Power Management functions */ int (*suspend) (struct mtd_info *mtd); @@ -221,6 +233,35 @@ struct mtd_info { void (*put_device) (struct mtd_info *mtd); }; +static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->erasesize_shift) + return sz >> mtd->erasesize_shift; + do_div(sz, mtd->erasesize); + return sz; +} + +static inline u_int32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->erasesize_shift) + return sz & mtd->erasesize_mask; + return do_div(sz, mtd->erasesize); +} + +static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->writesize_shift) + return sz >> mtd->writesize_shift; + do_div(sz, mtd->writesize); + return sz; +} + +static inline u_int32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->writesize_shift) + return sz & mtd->writesize_mask; + return do_div(sz, mtd->writesize); +} /* Kernel-side ioctl definitions */ diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 733d3f3b4eb8..c0677b8082be 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -399,7 +399,7 @@ struct nand_chip { int bbt_erase_shift; int chip_shift; int numchips; - unsigned long chipsize; + uint64_t chipsize; int pagemask; int pagebuf; int subpagesize; diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index c92b4d439609..164c7d78687d 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -36,8 +36,8 @@ struct mtd_partition { char *name; /* identifier string */ - u_int32_t size; /* partition size */ - u_int32_t offset; /* offset within the master MTD space */ + uint64_t size; /* partition size */ + uint64_t offset; /* offset within the master MTD space */ u_int32_t mask_flags; /* master MTD flags to mask out for this partition */ struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/ struct mtd_info **mtdp; /* pointer to store the MTD object */ -- cgit v1.2.2 From 3854be7712f7b4bdcaed14664fc7c7124b3fef0d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 10 Dec 2008 14:06:42 +0000 Subject: [MTD] Remove strange u_int32_t types from FTL Signed-off-by: David Woodhouse --- include/linux/mtd/ftl.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/ftl.h b/include/linux/mtd/ftl.h index 0be442f881dd..0555f7a0b9ed 100644 --- a/include/linux/mtd/ftl.h +++ b/include/linux/mtd/ftl.h @@ -32,25 +32,25 @@ #define _LINUX_FTL_H typedef struct erase_unit_header_t { - u_int8_t LinkTargetTuple[5]; - u_int8_t DataOrgTuple[10]; - u_int8_t NumTransferUnits; - u_int32_t EraseCount; - u_int16_t LogicalEUN; - u_int8_t BlockSize; - u_int8_t EraseUnitSize; - u_int16_t FirstPhysicalEUN; - u_int16_t NumEraseUnits; - u_int32_t FormattedSize; - u_int32_t FirstVMAddress; - u_int16_t NumVMPages; - u_int8_t Flags; - u_int8_t Code; - u_int32_t SerialNumber; - u_int32_t AltEUHOffset; - u_int32_t BAMOffset; - u_int8_t Reserved[12]; - u_int8_t EndTuple[2]; + uint8_t LinkTargetTuple[5]; + uint8_t DataOrgTuple[10]; + uint8_t NumTransferUnits; + uint32_t EraseCount; + uint16_t LogicalEUN; + uint8_t BlockSize; + uint8_t EraseUnitSize; + uint16_t FirstPhysicalEUN; + uint16_t NumEraseUnits; + uint32_t FormattedSize; + uint32_t FirstVMAddress; + uint16_t NumVMPages; + uint8_t Flags; + uint8_t Code; + uint32_t SerialNumber; + uint32_t AltEUHOffset; + uint32_t BAMOffset; + uint8_t Reserved[12]; + uint8_t EndTuple[2]; } erase_unit_header_t; /* Flags in erase_unit_header_t */ -- cgit v1.2.2 From 26cdb67c74aedc22367e6d0271f7f955220cca65 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 10 Dec 2008 14:08:12 +0000 Subject: [MTD] Remove more strange u_intxx_t types Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 26 +++++++++++++------------- include/linux/mtd/partitions.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 95e585ecc297..adef674855f3 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -39,8 +39,8 @@ struct erase_info { uint64_t fail_addr; u_long time; u_long retries; - u_int dev; - u_int cell; + unsigned dev; + unsigned cell; void (*callback) (struct erase_info *self); u_long priv; u_char state; @@ -49,8 +49,8 @@ struct erase_info { struct mtd_erase_region_info { uint64_t offset; /* At which this region starts, from the beginning of the MTD */ - u_int32_t erasesize; /* For this region */ - u_int32_t numblocks; /* Number of blocks of erasesize in this region */ + uint32_t erasesize; /* For this region */ + uint32_t numblocks; /* Number of blocks of erasesize in this region */ unsigned long *lockmap; /* If keeping bitmap of locks */ }; @@ -102,14 +102,14 @@ struct mtd_oob_ops { struct mtd_info { u_char type; - u_int32_t flags; + uint32_t flags; uint64_t size; // Total size of the MTD /* "Major" erase size for the device. Naïve users may take this * to be the only erase size available, or may use the more detailed * information below if they desire */ - u_int32_t erasesize; + uint32_t erasesize; /* Minimal writable flash unit size. In case of NOR flash it is 1 (even * though individual bits can be cleared), in case of NAND flash it is * one NAND page (or half, or one-fourths of it), in case of ECC-ed NOR @@ -117,10 +117,10 @@ struct mtd_info { * Any driver registering a struct mtd_info must ensure a writesize of * 1 or larger. */ - u_int32_t writesize; + uint32_t writesize; - u_int32_t oobsize; // Amount of OOB data per block (e.g. 16) - u_int32_t oobavail; // Available OOB bytes per block + uint32_t oobsize; // Amount of OOB data per block (e.g. 16) + uint32_t oobavail; // Available OOB bytes per block /* * If erasesize is a power of 2 then the shift is stored in @@ -233,7 +233,7 @@ struct mtd_info { void (*put_device) (struct mtd_info *mtd); }; -static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) { if (mtd->erasesize_shift) return sz >> mtd->erasesize_shift; @@ -241,14 +241,14 @@ static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) return sz; } -static inline u_int32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd) { if (mtd->erasesize_shift) return sz & mtd->erasesize_mask; return do_div(sz, mtd->erasesize); } -static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) { if (mtd->writesize_shift) return sz >> mtd->writesize_shift; @@ -256,7 +256,7 @@ static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) return sz; } -static inline u_int32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) { if (mtd->writesize_shift) return sz & mtd->writesize_mask; diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 164c7d78687d..a45dd831b3f8 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -38,7 +38,7 @@ struct mtd_partition { char *name; /* identifier string */ uint64_t size; /* partition size */ uint64_t offset; /* offset within the master MTD space */ - u_int32_t mask_flags; /* master MTD flags to mask out for this partition */ + uint32_t mask_flags; /* master MTD flags to mask out for this partition */ struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/ struct mtd_info **mtdp; /* pointer to store the MTD object */ }; -- cgit v1.2.2 From d3af0f048c114dd53713d5920c54f6d5b6b12139 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 1 Dec 2008 14:23:38 -0800 Subject: [MTD] [NAND] remove excess kernel-doc notation Delete extra kernel-doc notation for struct fields and function parameters that don't exist: Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'wq' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'datbuf' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'oobbuf' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'oobdirty' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'data_poi' description in 'nand_chip' Warning(drivers/mtd/nand/nand_base.c:2527): Excess function parameter 'maxchips' description in 'nand_scan_tail' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c0677b8082be..db5b63da2a7e 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -335,17 +335,12 @@ struct nand_buffers { * @erase_cmd: [INTERN] erase command write function, selectable due to AND support * @scan_bbt: [REPLACEABLE] function to scan bad block table * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transfering data from array to read regs (tR) - * @wq: [INTERN] wait queue to sleep on if a NAND operation is in progress * @state: [INTERN] the current state of the NAND device * @oob_poi: poison value buffer * @page_shift: [INTERN] number of address bits in a page (column address bits) * @phys_erase_shift: [INTERN] number of address bits in a physical eraseblock * @bbt_erase_shift: [INTERN] number of address bits in a bbt entry * @chip_shift: [INTERN] number of address bits in one chip - * @datbuf: [INTERN] internal buffer for one page + oob - * @oobbuf: [INTERN] oob buffer for one eraseblock - * @oobdirty: [INTERN] indicates that oob_buf must be reinitialized - * @data_poi: [INTERN] pointer to a data buffer * @options: [BOARDSPECIFIC] various chip options. They can partly be set to inform nand_scan about * special functionality. See the defines for further explanation * @badblockpos: [INTERN] position of the bad block marker in the oob area -- cgit v1.2.2 From 3f4b0ef7f2899c91b1d6958779f084b44dd59d32 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 26 Oct 2008 20:52:15 +0100 Subject: ACPI hibernate: Add a mechanism to save/restore ACPI NVS memory According to the ACPI Specification 3.0b, Section 15.3.2, "OSPM will call the _PTS control method some time before entering a sleeping state, to allow the platform's AML code to update this memory image before entering the sleeping state. After the system awakes from an S4 state, OSPM will restore this memory area and call the _WAK control method to enable the BIOS to reclaim its memory image." For this reason, implement a mechanism allowing us to save the NVS memory during hibernation and to restore it during the subsequent resume. Based on a patch by Zhang Rui. Signed-off-by: Rafael J. Wysocki Acked-by: Nigel Cunningham Cc: Zhang Rui Signed-off-by: Len Brown --- include/linux/suspend.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 2ce8207686e2..2b409c44db83 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -232,6 +232,11 @@ extern unsigned long get_safe_page(gfp_t gfp_mask); extern void hibernation_set_ops(struct platform_hibernation_ops *ops); extern int hibernate(void); +extern int hibernate_nvs_register(unsigned long start, unsigned long size); +extern int hibernate_nvs_alloc(void); +extern void hibernate_nvs_free(void); +extern void hibernate_nvs_save(void); +extern void hibernate_nvs_restore(void); #else /* CONFIG_HIBERNATION */ static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} @@ -239,6 +244,14 @@ static inline void swsusp_unset_page_free(struct page *p) {} static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {} static inline int hibernate(void) { return -ENOSYS; } +static inline int hibernate_nvs_register(unsigned long a, unsigned long b) +{ + return 0; +} +static inline int hibernate_nvs_alloc(void) { return 0; } +static inline void hibernate_nvs_free(void) {} +static inline void hibernate_nvs_save(void) {} +static inline void hibernate_nvs_restore(void) {} #endif /* CONFIG_HIBERNATION */ #ifdef CONFIG_PM_SLEEP -- cgit v1.2.2 From ba84ed9546e91348fdf3ff2bff859b0ee53b407a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 26 Oct 2008 20:56:30 +0100 Subject: ACPI hibernate: Introduce new kernel parameter acpi_sleep=s4_nonvs On some machines it may be necessary to disable the saving/restoring of the ACPI NVS memory region during hibernation/resume. For this purpose, introduce new ACPI kernel command line option acpi_sleep=s4_nonvs. Based on a patch by Zhang Rui. Signed-off-by: Rafael J. Wysocki Acked-by: Nigel Cunningham Acked-by: Pavel Machek Signed-off-by: Len Brown --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fba8051fb297..dfa0a5356c53 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -270,6 +270,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, #ifdef CONFIG_PM_SLEEP void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); +void __init acpi_s4_no_nvs(void); #endif /* CONFIG_PM_SLEEP */ #else /* CONFIG_ACPI */ -- cgit v1.2.2 From 50b6f1f4a430608f7345f66ecd68a129bff11649 Mon Sep 17 00:00:00 2001 From: Kwangwoo Lee Date: Sat, 20 Dec 2008 04:26:01 -0500 Subject: Input: add tsc2007 based touchscreen driver This drive has been tested on ARM9 based SoC - MV86XX. Signed-off-by: Kwangwoo Lee Acked-by: Jean Delvare Signed-off-by: Dmitry Torokhov --- include/linux/i2c/tsc2007.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/linux/i2c/tsc2007.h (limited to 'include/linux') diff --git a/include/linux/i2c/tsc2007.h b/include/linux/i2c/tsc2007.h new file mode 100644 index 000000000000..c6361fbb7bf9 --- /dev/null +++ b/include/linux/i2c/tsc2007.h @@ -0,0 +1,17 @@ +#ifndef __LINUX_I2C_TSC2007_H +#define __LINUX_I2C_TSC2007_H + +/* linux/i2c/tsc2007.h */ + +struct tsc2007_platform_data { + u16 model; /* 2007. */ + u16 x_plate_ohms; + + int (*get_pendown_state)(void); + void (*clear_penirq)(void); /* If needed, clear 2nd level + interrupt source */ + int (*init_platform_hw)(void); + void (*exit_platform_hw)(void); +}; + +#endif -- cgit v1.2.2 From 160bbab3000dafccbe43688e48208cecf4deb879 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 23 Dec 2008 10:00:14 +0000 Subject: [MTD] struct device - replace bus_id with dev_name(), dev_set_name() Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman Signed-off-by: David Woodhouse --- include/linux/mtd/concat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h index c02f3d264ecf..e80c674daeb3 100644 --- a/include/linux/mtd/concat.h +++ b/include/linux/mtd/concat.h @@ -13,7 +13,7 @@ struct mtd_info *mtd_concat_create( struct mtd_info *subdev[], /* subdevices to concatenate */ int num_devs, /* number of subdevices */ - char *name); /* name for the new device */ + const char *name); /* name for the new device */ void mtd_concat_destroy(struct mtd_info *mtd); -- cgit v1.2.2 From 34a4c5eb421dab6fe8381aa12c990f9d6f645b17 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 29 Dec 2008 04:00:23 -0800 Subject: Input: map_to_7segment.h - convert to __inline__ for userspace Use __inline__ rather than inline for map_to_seg7() since it is exported to userspace. Signed-off-by: Mike Frysinger Signed-off-by: Dmitry Torokhov --- include/linux/map_to_7segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/map_to_7segment.h b/include/linux/map_to_7segment.h index 7df8432c4402..12d62a54d470 100644 --- a/include/linux/map_to_7segment.h +++ b/include/linux/map_to_7segment.h @@ -75,7 +75,7 @@ struct seg7_conversion_map { unsigned char table[128]; }; -static inline int map_to_seg7(struct seg7_conversion_map *map, int c) +static __inline__ int map_to_seg7(struct seg7_conversion_map *map, int c) { return c >= 0 && c < sizeof(map->table) ? map->table[c] : -EINVAL; } -- cgit v1.2.2 From 47fea2adfc9e16846bc57c2f64ff233b354fef39 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Dec 2008 23:39:17 +0530 Subject: sched: sched.c declare variables before they get used Impact: cleanup, avoid sparse warnings In linux/sched.h moved out sysctl_sched_latency, sysctl_sched_min_granularity, sysctl_sched_wakeup_granularity, sysctl_sched_shares_ratelimit and sysctl_sched_shares_thresh from #ifdef CONFIG_SCHED_DEBUG as these variables are common for both. Fixes these sparse warnings: kernel/sched.c:825:14: warning: symbol 'sysctl_sched_shares_ratelimit' was not declared. Should it be static? kernel/sched.c:832:14: warning: symbol 'sysctl_sched_shares_thresh' was not declared. Should it be static? kernel/sched_fair.c:37:14: warning: symbol 'sysctl_sched_latency' was not declared. Should it be static? kernel/sched_fair.c:43:14: warning: symbol 'sysctl_sched_min_granularity' was not declared. Should it be static? kernel/sched_fair.c:72:14: warning: symbol 'sysctl_sched_wakeup_granularity' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8395e715809d..01d9fd268eb0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1651,16 +1651,16 @@ extern void wake_up_idle_cpu(int cpu); static inline void wake_up_idle_cpu(int cpu) { } #endif -#ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; +extern unsigned int sysctl_sched_shares_ratelimit; +extern unsigned int sysctl_sched_shares_thresh; +#ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_shares_ratelimit; -extern unsigned int sysctl_sched_shares_thresh; int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, -- cgit v1.2.2 From f748bafa3ca1fb056e63afdeecacc1c68d8104df Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 8 Dec 2008 21:30:31 -0700 Subject: ACPI: PCI: move struct acpi_prt_entry declaration out of public header file The struct acpi_prt_entry is used only in pci_irq.c, so there's no need for the declaration to be public. This patch moves it into pci_irq.c. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- include/linux/acpi.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fba8051fb297..813f937b3ab4 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -131,22 +131,6 @@ extern int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity); */ void acpi_unregister_gsi (u32 gsi); -struct acpi_prt_entry { - struct list_head node; - struct acpi_pci_id id; - u8 pin; - struct { - acpi_handle handle; - u32 index; - } link; - u32 irq; -}; - -struct acpi_prt_list { - int count; - struct list_head entries; -}; - struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); -- cgit v1.2.2 From ea7e96e0f2277107d9ea14c3f16c86ba82b2e560 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Tue, 16 Dec 2008 16:28:17 +0800 Subject: ACPI: remove private acpica headers from driver files External driver files should not include any private acpica headers. Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/linux/pci_hotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index a00bd1a0f156..c2d1a7d1886a 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -223,7 +223,6 @@ struct hotplug_params { #ifdef CONFIG_ACPI #include #include -#include extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, struct hotplug_params *hpp); int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); -- cgit v1.2.2 From 1c5745aa380efb6417b5681104b007c8612fb496 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Dec 2008 23:05:28 +0100 Subject: sched_clock: prevent scd->clock from moving backwards, take #2 Redo: 5b7dba4: sched_clock: prevent scd->clock from moving backwards which had to be reverted due to s2ram hangs: ca7e716: Revert "sched_clock: prevent scd->clock from moving backwards" ... this time with resume restoring GTOD later in the sequence taken into account as well. The "timekeeping_suspended" flag is not very nice but we cannot call into GTOD before it has been properly resumed and the scheduler will run very early in the resume sequence. Cc: Signed-off-by: Ingo Molnar --- include/linux/time.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index ce321ac5c8f8..fbbd2a1c92ba 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -105,6 +105,7 @@ extern unsigned long read_persistent_clock(void); extern int update_persistent_clock(struct timespec now); extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); +extern int timekeeping_suspended; unsigned long get_seconds(void); struct timespec current_kernel_time(void); -- cgit v1.2.2 From 56c451f4b583ccdf80c9e676179c9cb49de86745 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 18 Dec 2008 14:49:37 +0900 Subject: [SCSI] block: fix the partial mappings with struct rq_map_data This fixes bio_copy_user_iov to properly handle the partial mappings with struct rq_map_data (which only sg uses for now but st and osst will shortly). It adds the offset member to struct rq_map_data and changes blk_rq_map_user to update it so that bio_copy_user_iov can add an appropriate page frame via bio_add_pc_page(). Signed-off-by: FUJITA Tomonori Acked-by: Jens Axboe Signed-off-by: James Bottomley --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7035cec583b6..811e5342c452 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -690,6 +690,7 @@ struct rq_map_data { struct page **pages; int page_order; int nr_entries; + unsigned long offset; }; struct req_iterator { -- cgit v1.2.2 From 97ae77a1cd332c7b011d71315c8faabce6840c72 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 18 Dec 2008 14:49:38 +0900 Subject: [SCSI] block: make blk_rq_map_user take a NULL user-space buffer for WRITE The commit 818827669d85b84241696ffef2de485db46b0b5e (block: make blk_rq_map_user take a NULL user-space buffer) extended blk_rq_map_user to accept a NULL user-space buffer with a READ command. It was necessary to convert sg to use the block layer mapping API. This patch extends blk_rq_map_user again for a WRITE command. It is necessary to convert st and osst drivers to use the block layer apping API. Signed-off-by: FUJITA Tomonori Acked-by: Jens Axboe Signed-off-by: James Bottomley --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 811e5342c452..044467ef7b11 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -691,6 +691,7 @@ struct rq_map_data { int page_order; int nr_entries; unsigned long offset; + int null_mapped; }; struct req_iterator { -- cgit v1.2.2 From 87d8fe1ee6b8d2f95076142d58c440dba4e7bdc2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 09:47:09 -0500 Subject: add releasepage hooks to block devices which can be used by file systems Implement blkdev_releasepage() to release the buffer_heads and pages after we release private data belonging to a mounted filesystem. Cc: Toshiyuki Okajima Cc: linux-fsdevel@vger.kernel.org Signed-off-by: "Theodore Ts'o" --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f2a3010140e3..0f54ae0f0ccd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -565,6 +565,7 @@ struct address_space { struct block_device { dev_t bd_dev; /* not a kdev_t - it's a search key */ struct inode * bd_inode; /* will die */ + struct super_block * bd_super; int bd_openers; struct mutex bd_mutex; /* open/close mutex */ struct semaphore bd_mount_sem; @@ -1385,6 +1386,7 @@ struct super_operations { ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); #endif + int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); }; /* -- cgit v1.2.2 From c31910672376dfb8d020e32afa7249763bcd924a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Jan 2009 11:14:25 -0500 Subject: ext4: Remove code to create the journal inode This code has been obsolete in quite some time, since the supported method for adding a journal inode is to use tune2fs (or to creating new filesystem with a journal via mke2fs or mkfs.ext4). Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 9d82084a1605..adef1c9940d3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1104,7 +1104,6 @@ extern int jbd2_journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); extern void jbd2_journal_clear_features (journal_t *, unsigned long, unsigned long, unsigned long); -extern int jbd2_journal_create (journal_t *); extern int jbd2_journal_load (journal_t *journal); extern int jbd2_journal_destroy (journal_t *); extern int jbd2_journal_recover (journal_t *journal); -- cgit v1.2.2 From 14eaddc967b16017d4a1a24d2be6c28ecbe06ed8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 31 Dec 2008 15:15:42 +0000 Subject: CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #2] Fix a regression in cap_capable() due to: commit 5ff7711e635b32f0a1e558227d030c7e45b4a465 Author: David Howells Date: Wed Dec 31 02:52:28 2008 +0000 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change splits capable() from has_capability() down into the commoncap and SELinux code. The capable() security op now only deals with the current process, and uses the current process's subjective creds. A new security op - task_capable() - is introduced that can check any task's objective creds. strictly the capable() security op is superfluous with the presence of the task_capable() op, however it should be faster to call the capable() op since two fewer arguments need be passed down through the various layers. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include #include #include #include #include #include #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells Signed-off-by: James Morris --- include/linux/capability.h | 17 ++++++++++++++-- include/linux/security.h | 49 +++++++++++++++++++++++++++++++++++++--------- 2 files changed, 55 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index e22f48c2a46f..5b8a13214451 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -529,8 +529,21 @@ extern const kernel_cap_t __cap_init_eff_set; * * Note that this does not set PF_SUPERPRIV on the task. */ -#define has_capability(t, cap) (security_capable((t), (cap)) == 0) -#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0) +#define has_capability(t, cap) (security_task_capable((t), (cap)) == 0) + +/** + * has_capability_noaudit - Determine if a task has a superior capability available (unaudited) + * @t: The task in question + * @cap: The capability to be tested for + * + * Return true if the specified task has the given superior capability + * currently in effect, false if not, but don't write an audit message for the + * check. + * + * Note that this does not set PF_SUPERPRIV on the task. + */ +#define has_capability_noaudit(t, cap) \ + (security_task_capable_noaudit((t), (cap)) == 0) extern int capable(int cap); diff --git a/include/linux/security.h b/include/linux/security.h index 3416cb85e77b..76989b8bc34f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -48,7 +48,9 @@ struct audit_krule; * These functions are in security/capability.c and are used * as the default capabilities functions */ -extern int cap_capable(struct task_struct *tsk, int cap, int audit); +extern int cap_capable(int cap, int audit); +extern int cap_task_capable(struct task_struct *tsk, const struct cred *cred, + int cap, int audit); extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); @@ -1195,9 +1197,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @permitted contains the permitted capability set. * Return 0 and update @new if permission is granted. * @capable: - * Check whether the @tsk process has the @cap capability. + * Check whether the current process has the @cap capability in its + * subjective/effective credentials. + * @cap contains the capability . + * @audit: Whether to write an audit message or not + * Return 0 if the capability is granted for @tsk. + * @task_capable: + * Check whether the @tsk process has the @cap capability in its + * objective/real credentials. * @tsk contains the task_struct for the process. + * @cred contains the credentials to use. * @cap contains the capability . + * @audit: Whether to write an audit message or not * Return 0 if the capability is granted for @tsk. * @acct: * Check permission before enabling or disabling process accounting. If @@ -1290,7 +1301,9 @@ struct security_operations { const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); - int (*capable) (struct task_struct *tsk, int cap, int audit); + int (*capable) (int cap, int audit); + int (*task_capable) (struct task_struct *tsk, const struct cred *cred, + int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); int (*quotactl) (int cmds, int type, int id, struct super_block *sb); @@ -1556,8 +1569,9 @@ int security_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -int security_capable(struct task_struct *tsk, int cap); -int security_capable_noaudit(struct task_struct *tsk, int cap); +int security_capable(int cap); +int security_task_capable(struct task_struct *tsk, int cap); +int security_task_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); @@ -1754,14 +1768,31 @@ static inline int security_capset(struct cred *new, return cap_capset(new, old, effective, inheritable, permitted); } -static inline int security_capable(struct task_struct *tsk, int cap) +static inline int security_capable(int cap) { - return cap_capable(tsk, cap, SECURITY_CAP_AUDIT); + return cap_capable(cap, SECURITY_CAP_AUDIT); } -static inline int security_capable_noaudit(struct task_struct *tsk, int cap) +static inline int security_task_capable(struct task_struct *tsk, int cap) { - return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT); + int ret; + + rcu_read_lock(); + ret = cap_task_capable(tsk, __task_cred(tsk), cap, SECURITY_CAP_AUDIT); + rcu_read_unlock(); + return ret; +} + +static inline +int security_task_capable_noaudit(struct task_struct *tsk, int cap) +{ + int ret; + + rcu_read_lock(); + ret = cap_task_capable(tsk, __task_cred(tsk), cap, + SECURITY_CAP_NOAUDIT); + rcu_read_unlock(); + return ret; } static inline int security_acct(struct file *file) -- cgit v1.2.2 From a6037b61c2f5fc99c57c15b26d7cfa58bbb34008 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 Jan 2009 11:28:22 +0100 Subject: hrtimer: fix recursion deadlock by re-introducing the softirq Impact: fix rare runtime deadlock There are a few sites that do: spin_lock_irq(&foo) hrtimer_start(&bar) __run_hrtimer(&bar) func() spin_lock(&foo) which obviously deadlocks. In order to avoid this, never call __run_hrtimer() from hrtimer_start*() context, but instead defer this to softirq context. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0702c4d7bdf0..2062833f5f7a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -253,7 +253,8 @@ enum BLOCK_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, - RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ + HRTIMER_SOFTIRQ, + RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS }; -- cgit v1.2.2 From c70f22d203fc02c805b6ed4a3483b740dc36786b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 5 Jan 2009 19:07:50 +0800 Subject: sched: clean up arch_reinit_sched_domains() - Make arch_reinit_sched_domains() static. It was exported to be used in s390, but now rebuild_sched_domains() is used instead. - Make it return void. Signed-off-by: Li Zefan Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 38a3f4b15394..91207df702e8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -912,7 +912,6 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, struct sched_domain_attr *dattr_new); -extern int arch_reinit_sched_domains(void); /* Test a flag in parent sched domain */ static inline int test_sd_parent(struct sched_domain *sd, int flag) -- cgit v1.2.2 From 922ab535bbe73975ce62f71ab9bf8ec9bce71c29 Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:13:58 +0000 Subject: [MTD] LPDDR QINFO records definitions There are declaraton of structures and macros definitions necessary for operations with QINFO in this patch. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/qinfo.h | 91 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 include/linux/mtd/qinfo.h (limited to 'include/linux') diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h new file mode 100644 index 000000000000..7b3d487d8b3f --- /dev/null +++ b/include/linux/mtd/qinfo.h @@ -0,0 +1,91 @@ +#ifndef __LINUX_MTD_QINFO_H +#define __LINUX_MTD_QINFO_H + +#include +#include +#include +#include +#include +#include +#include + +/* lpddr_private describes lpddr flash chip in memory map + * @ManufactId - Chip Manufacture ID + * @DevId - Chip Device ID + * @qinfo - pointer to qinfo records describing the chip + * @numchips - number of chips including virual RWW partitions + * @chipshift - Chip/partiton size 2^chipshift + * @chips - per-chip data structure + */ +struct lpddr_private { + uint16_t ManufactId; + uint16_t DevId; + struct qinfo_chip *qinfo; + int numchips; + unsigned long chipshift; + struct flchip chips[0]; +}; + +/* qinfo_query_info structure contains request information for + * each qinfo record + * @major - major number of qinfo record + * @major - minor number of qinfo record + * @id_str - descriptive string to access the record + * @desc - detailed description for the qinfo record + */ +struct qinfo_query_info { + uint8_t major; + uint8_t minor; + char *id_str; + char *desc; +}; + +/* + * qinfo_chip structure contains necessary qinfo records data + * @DevSizeShift - Device size 2^n bytes + * @BufSizeShift - Program buffer size 2^n bytes + * @TotalBlocksNum - Total number of blocks + * @UniformBlockSizeShift - Uniform block size 2^UniformBlockSizeShift bytes + * @HWPartsNum - Number of hardware partitions + * @SuspEraseSupp - Suspend erase supported + * @SingleWordProgTime - Single word program 2^SingleWordProgTime u-sec + * @ProgBufferTime - Program buffer write 2^ProgBufferTime u-sec + * @BlockEraseTime - Block erase 2^BlockEraseTime m-sec + */ +struct qinfo_chip { + /* General device info */ + uint16_t DevSizeShift; + uint16_t BufSizeShift; + /* Erase block information */ + uint16_t TotalBlocksNum; + uint16_t UniformBlockSizeShift; + /* Partition information */ + uint16_t HWPartsNum; + /* Optional features */ + uint16_t SuspEraseSupp; + /* Operation typical time */ + uint16_t SingleWordProgTime; + uint16_t ProgBufferTime; + uint16_t BlockEraseTime; +}; + +/* defines for fixup usage */ +#define LPDDR_MFR_ANY 0xffff +#define LPDDR_ID_ANY 0xffff +#define NUMONYX_MFGR_ID 0x0089 +#define R18_DEVICE_ID_1G 0x893c + +static inline map_word lpddr_build_cmd(u_long cmd, struct map_info *map) +{ + map_word val = { {0} }; + val.x[0] = cmd; + return val; +} + +#define CMD(x) lpddr_build_cmd(x, map) +#define CMDVAL(cmd) cmd.x[0] + +struct mtd_info *lpddr_cmdset(struct map_info *); + +#endif + -- cgit v1.2.2 From eb3db27507f74b99241abfa11824d8b6d92b84ef Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:15:33 +0000 Subject: [MTD] LPDDR PFOW definition LPDDR chips use PFOW window for sending commands, reading status and capabilites requesting. This pfow.h - contains definitions for PFOW window fileds, possible commands, error flags and some common macro function to avoid code duplications. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/pfow.h | 159 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 include/linux/mtd/pfow.h (limited to 'include/linux') diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h new file mode 100644 index 000000000000..b730d4f84655 --- /dev/null +++ b/include/linux/mtd/pfow.h @@ -0,0 +1,159 @@ +/* Primary function overlay window definitions + * and service functions used by LPDDR chips + */ +#ifndef __LINUX_MTD_PFOW_H +#define __LINUX_MTD_PFOW_H + +#include + +/* PFOW registers addressing */ +/* Address of symbol "P" */ +#define PFOW_QUERY_STRING_P 0x0000 +/* Address of symbol "F" */ +#define PFOW_QUERY_STRING_F 0x0002 +/* Address of symbol "O" */ +#define PFOW_QUERY_STRING_O 0x0004 +/* Address of symbol "W" */ +#define PFOW_QUERY_STRING_W 0x0006 +/* Identification info for LPDDR chip */ +#define PFOW_MANUFACTURER_ID 0x0020 +#define PFOW_DEVICE_ID 0x0022 +/* Address in PFOW where prog buffer can can be found */ +#define PFOW_PROGRAM_BUFFER_OFFSET 0x0040 +/* Size of program buffer in words */ +#define PFOW_PROGRAM_BUFFER_SIZE 0x0042 +/* Address command code register */ +#define PFOW_COMMAND_CODE 0x0080 +/* command data register */ +#define PFOW_COMMAND_DATA 0x0084 +/* command address register lower address bits */ +#define PFOW_COMMAND_ADDRESS_L 0x0088 +/* command address register upper address bits */ +#define PFOW_COMMAND_ADDRESS_H 0x008a +/* number of bytes to be proggrammed lower address bits */ +#define PFOW_DATA_COUNT_L 0x0090 +/* number of bytes to be proggrammed higher address bits */ +#define PFOW_DATA_COUNT_H 0x0092 +/* command execution register, the only possible value is 0x01 */ +#define PFOW_COMMAND_EXECUTE 0x00c0 +/* 0x01 should be written at this address to clear buffer */ +#define PFOW_CLEAR_PROGRAM_BUFFER 0x00c4 +/* device program/erase suspend register */ +#define PFOW_PROGRAM_ERASE_SUSPEND 0x00c8 +/* device status register */ +#define PFOW_DSR 0x00cc + +/* LPDDR memory device command codes */ +/* They are possible values of PFOW command code register */ +#define LPDDR_WORD_PROGRAM 0x0041 +#define LPDDR_BUFF_PROGRAM 0x00E9 +#define LPDDR_BLOCK_ERASE 0x0020 +#define LPDDR_LOCK_BLOCK 0x0061 +#define LPDDR_UNLOCK_BLOCK 0x0062 +#define LPDDR_READ_BLOCK_LOCK_STATUS 0x0065 +#define LPDDR_INFO_QUERY 0x0098 +#define LPDDR_READ_OTP 0x0097 +#define LPDDR_PROG_OTP 0x00C0 +#define LPDDR_RESUME 0x00D0 + +/* Defines possible value of PFOW command execution register */ +#define LPDDR_START_EXECUTION 0x0001 + +/* Defines possible value of PFOW program/erase suspend register */ +#define LPDDR_SUSPEND 0x0001 + +/* Possible values of PFOW device status register */ +/* access R - read; RC read & clearable */ +#define DSR_DPS (1<<1) /* RC; device protect status + * 0 - not protected 1 - locked */ +#define DSR_PSS (1<<2) /* R; program suspend status; + * 0-prog in progress/completed, + * 1- prog suspended */ +#define DSR_VPPS (1<<3) /* RC; 0-Vpp OK, * 1-Vpp low */ +#define DSR_PROGRAM_STATUS (1<<4) /* RC; 0-successful, 1-error */ +#define DSR_ERASE_STATUS (1<<5) /* RC; erase or blank check status; + * 0-success erase/blank check, + * 1 blank check error */ +#define DSR_ESS (1<<6) /* R; erase suspend status; + * 0-erase in progress/complete, + * 1 erase suspended */ +#define DSR_READY_STATUS (1<<7) /* R; Device status + * 0-busy, + * 1-ready */ +#define DSR_RPS (0x3<<8) /* RC; region program status + * 00 - Success, + * 01-re-program attempt in region with + * object mode data, + * 10-object mode program w attempt in + * region with control mode data + * 11-attempt to program invalid half + * with 0x41 command */ +#define DSR_AOS (1<<12) /* RC; 1- AO related failure */ +#define DSR_AVAILABLE (1<<15) /* R; Device availbility + * 1 - Device available + * 0 - not available */ + +/* The superset of all possible error bits in DSR */ +#define DSR_ERR 0x133A + +static inline void send_pfow_command(struct map_info *map, + unsigned long cmd_code, unsigned long adr, + unsigned long len, map_word *datum) +{ + int bits_per_chip = map_bankwidth(map) * 8; + int chipnum; + struct lpddr_private *lpddr = map->fldrv_priv; + chipnum = adr >> lpddr->chipshift; + + map_write(map, CMD(cmd_code), map->pfow_base + PFOW_COMMAND_CODE); + map_write(map, CMD(adr & ((1<pfow_base + PFOW_COMMAND_ADDRESS_L); + map_write(map, CMD(adr>>bits_per_chip), + map->pfow_base + PFOW_COMMAND_ADDRESS_H); + if (len) { + map_write(map, CMD(len & ((1<pfow_base + PFOW_DATA_COUNT_L); + map_write(map, CMD(len>>bits_per_chip), + map->pfow_base + PFOW_DATA_COUNT_H); + } + if (datum) + map_write(map, *datum, map->pfow_base + PFOW_COMMAND_DATA); + + /* Command execution start */ + map_write(map, CMD(LPDDR_START_EXECUTION), + map->pfow_base + PFOW_COMMAND_EXECUTE); +} + +static inline void print_drs_error(unsigned dsr) +{ + int prog_status = (dsr & DSR_RPS) >> 8; + + if (!(dsr & DSR_AVAILABLE)) + printk(KERN_NOTICE"DSR.15: (0) Device not Available\n"); + if (prog_status & 0x03) + printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid " + "half with 41h command\n"); + else if (prog_status & 0x02) + printk(KERN_NOTICE"DSR.9,8: (10) Object Mode Program attempt " + "in region with Control Mode data\n"); + else if (prog_status & 0x01) + printk(KERN_NOTICE"DSR.9,8: (01) Program attempt in region " + "with Object Mode data\n"); + if (!(dsr & DSR_READY_STATUS)) + printk(KERN_NOTICE"DSR.7: (0) Device is Busy\n"); + if (dsr & DSR_ESS) + printk(KERN_NOTICE"DSR.6: (1) Erase Suspended\n"); + if (dsr & DSR_ERASE_STATUS) + printk(KERN_NOTICE"DSR.5: (1) Erase/Blank check error\n"); + if (dsr & DSR_PROGRAM_STATUS) + printk(KERN_NOTICE"DSR.4: (1) Program Error\n"); + if (dsr & DSR_VPPS) + printk(KERN_NOTICE"DSR.3: (1) Vpp low detect, operation " + "aborted\n"); + if (dsr & DSR_PSS) + printk(KERN_NOTICE"DSR.2: (1) Program suspended\n"); + if (dsr & DSR_DPS) + printk(KERN_NOTICE"DSR.1: (1) Aborted Erase/Program attempt " + "on locked block\n"); +} +#endif /* __LINUX_MTD_PFOW_H */ -- cgit v1.2.2 From d13e51e747fee301b404dffcf4a7e1bdc558969b Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:21:10 +0000 Subject: [MTD] LPDDR added new pfow_base parameter We need to supply additional parameter to mapping driver and tell LPDDR drivers where PFOW window is in chip mapping. It leads to necessity of map_info structure extendoing. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/map.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index aa30244492c6..b981b8772217 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -223,6 +223,7 @@ struct map_info { must leave it enabled. */ void (*set_vpp)(struct map_info *, int); + unsigned long pfow_base; unsigned long map_priv_1; unsigned long map_priv_2; void *fldrv_priv; -- cgit v1.2.2 From d81408304b06a71c28417445202af9cd6673168d Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:22:39 +0000 Subject: [MTD] LPDDR extended physmap driver to support LPDDR flash Physmap is a generic map driver for different platforms and flash types. We added support of LPDDR to physmap. All changes here are related to introduction of new pfow_base parameter. This parameter is valid in case of LPDDR chips only. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/physmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h index c8e63a5ee72e..76f7cabf07d3 100644 --- a/include/linux/mtd/physmap.h +++ b/include/linux/mtd/physmap.h @@ -24,6 +24,7 @@ struct physmap_flash_data { unsigned int width; void (*set_vpp)(struct map_info *, int); unsigned int nr_parts; + unsigned int pfow_base; struct mtd_partition *parts; }; -- cgit v1.2.2 From be92d7af38fb8a91f8575ab2272e00f2e51667ff Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 5 Jan 2009 14:34:42 +0100 Subject: genirq: provide irq_to_desc() to non-genirq architectures too Impact: build fix on non-genirq architectures Sam Ravnborg reported this build failure on sparc32 allmodconfig, the GPIO drivers assume the presence of irq_to_desc(): drivers/gpio/gpiolib.c: In function `gpiolib_dbg_show': drivers/gpio/gpiolib.c:1146: error: implicit declaration of function 'irq_to_desc' Add it in the !genirq case too. Reported-by: Sam Ravnborg Signed-off-by: Ingo Molnar Tested-by: Sam Ravnborg --- include/linux/irqnr.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 5504a5c97836..86af92e9e84c 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -8,7 +8,12 @@ #ifndef CONFIG_GENERIC_HARDIRQS #include -# define nr_irqs NR_IRQS + +/* + * Wrappers for non-genirq architectures: + */ +#define nr_irqs NR_IRQS +#define irq_to_desc(irq) (&irq_desc[irq]) # define for_each_irq_desc(irq, desc) \ for (irq = 0; irq < nr_irqs; irq++) -- cgit v1.2.2 From c42aa775cc8a8ca558db0cc75979fb8e16667447 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Thu, 20 Nov 2008 15:59:12 +0100 Subject: atmel-mci: move atmel-mci.h file to include/linux Needed to use the atmel-mci driver in an architecture independant maner. Signed-off-by: Nicolas Ferre Signed-off-by: Haavard Skinnemoen --- include/linux/atmel-mci.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 include/linux/atmel-mci.h (limited to 'include/linux') diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h new file mode 100644 index 000000000000..2a2213eefd85 --- /dev/null +++ b/include/linux/atmel-mci.h @@ -0,0 +1,39 @@ +#ifndef __LINUX_ATMEL_MCI_H +#define __LINUX_ATMEL_MCI_H + +#define ATMEL_MCI_MAX_NR_SLOTS 2 + +struct dma_slave; + +/** + * struct mci_slot_pdata - board-specific per-slot configuration + * @bus_width: Number of data lines wired up the slot + * @detect_pin: GPIO pin wired to the card detect switch + * @wp_pin: GPIO pin wired to the write protect sensor + * + * If a given slot is not present on the board, @bus_width should be + * set to 0. The other fields are ignored in this case. + * + * Any pins that aren't available should be set to a negative value. + * + * Note that support for multiple slots is experimental -- some cards + * might get upset if we don't get the clock management exactly right. + * But in most cases, it should work just fine. + */ +struct mci_slot_pdata { + unsigned int bus_width; + int detect_pin; + int wp_pin; +}; + +/** + * struct mci_platform_data - board-specific MMC/SDcard configuration + * @dma_slave: DMA slave interface to use in data transfers, or NULL. + * @slot: Per-slot configuration data. + */ +struct mci_platform_data { + struct dma_slave *dma_slave; + struct mci_slot_pdata slot[ATMEL_MCI_MAX_NR_SLOTS]; +}; + +#endif /* __LINUX_ATMEL_MCI_H */ -- cgit v1.2.2 From 07f2211e4fbce6990722d78c4f04225da9c0e9cf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Jan 2009 17:14:31 -0700 Subject: dmaengine: remove dependency on async_tx async_tx.ko is a consumer of dma channels. A circular dependency arises if modules in drivers/dma rely on common code in async_tx.ko. It prevents either module from being unloaded. Move dma_wait_for_async_tx and async_tx_run_dependencies to dmaeninge.o where they should have been from the beginning. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/async_tx.h | 15 --------------- include/linux/dmaengine.h | 9 +++++++++ 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 0f50d4cc4360..1c816775f135 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -60,8 +60,6 @@ enum async_tx_flags { #ifdef CONFIG_DMA_ENGINE void async_tx_issue_pending_all(void); -enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); -void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx); #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include #else @@ -77,19 +75,6 @@ static inline void async_tx_issue_pending_all(void) do { } while (0); } -static inline enum dma_status -dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) -{ - return DMA_SUCCESS; -} - -static inline void -async_tx_run_dependencies(struct dma_async_tx_descriptor *tx, - struct dma_chan *host_chan) -{ - do { } while (0); -} - static inline struct dma_chan * async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, enum dma_transaction_type tx_type, struct page **dst, int dst_count, diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index adb0b084eb5a..e4ec7e7b8056 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -475,11 +475,20 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie, } enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie); +#ifdef CONFIG_DMA_ENGINE +enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); +#else +static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) +{ + return DMA_SUCCESS; +} +#endif /* --- DMA device --- */ int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); +void dma_run_dependencies(struct dma_async_tx_descriptor *tx); /* --- Helper iov-locking functions --- */ -- cgit v1.2.2 From b3881f74b31b7d47d0f1c4d89ac3e7f0b9c05e3e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 5 Jan 2009 22:46:26 -0500 Subject: ext4: Add mount option to set kjournald's I/O priority Signed-off-by: "Theodore Ts'o" Cc: Jens Axboe --- include/linux/ioprio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index f98a656b17e5..76dad4808847 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -86,4 +86,6 @@ static inline int task_nice_ioclass(struct task_struct *task) */ extern int ioprio_best(unsigned short aprio, unsigned short bprio); +extern int set_task_ioprio(struct task_struct *task, int ioprio); + #endif -- cgit v1.2.2 From 835481d9bcd65720b473db6b38746a74a3964218 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 4 Jan 2009 05:18:06 -0800 Subject: cpumask: convert struct cpufreq_policy to cpumask_var_t Impact: use new cpumask API to reduce memory usage This is part of an effort to reduce structure sizes for machines configured with large NR_CPUS. cpumask_t gets replaced by cpumask_var_t, which is either struct cpumask[1] (small NR_CPUS) or struct cpumask * (large NR_CPUS). Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Dave Jones Signed-off-by: Ingo Molnar --- include/linux/cpufreq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 484b3abf61bb..384b38d3e8e2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -80,8 +80,8 @@ struct cpufreq_real_policy { }; struct cpufreq_policy { - cpumask_t cpus; /* CPUs requiring sw coordination */ - cpumask_t related_cpus; /* CPUs with any coordination */ + cpumask_var_t cpus; /* CPUs requiring sw coordination */ + cpumask_var_t related_cpus; /* CPUs with any coordination */ unsigned int shared_type; /* ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of registered CPU */ -- cgit v1.2.2 From 025dfdafe77f20b3890981a394774baab7b9c827 Mon Sep 17 00:00:00 2001 From: Frederik Schwarzer Date: Thu, 16 Oct 2008 19:02:37 +0200 Subject: trivial: fix then -> than typos in comments and documentation - (better, more, bigger ...) then -> (...) than Signed-off-by: Frederik Schwarzer Signed-off-by: Jiri Kosina --- include/linux/mtd/mtd.h | 2 +- include/linux/spi/spi.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index eae26bb6430a..64433eb411d7 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -83,7 +83,7 @@ typedef enum { * @datbuf: data buffer - if NULL only oob data are read/written * @oobbuf: oob data buffer * - * Note, it is allowed to read more then one OOB area at one go, but not write. + * Note, it is allowed to read more than one OOB area at one go, but not write. * The interface assumes that the OOB write requests program only one page's * OOB area. */ diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 82229317753d..68bb1c501d0d 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -327,9 +327,9 @@ extern struct spi_master *spi_busnum_to_master(u16 busnum); * @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped * @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped * @len: size of rx and tx buffers (in bytes) - * @speed_hz: Select a speed other then the device default for this + * @speed_hz: Select a speed other than the device default for this * transfer. If 0 the default (from @spi_device) is used. - * @bits_per_word: select a bits_per_word other then the device default + * @bits_per_word: select a bits_per_word other than the device default * for this transfer. If 0 the default (from @spi_device) is used. * @cs_change: affects chipselect after this transfer completes * @delay_usecs: microseconds to delay after this transfer before -- cgit v1.2.2 From 0211a9c8508b2183e0e539509aad60414f1c3813 Mon Sep 17 00:00:00 2001 From: Frederik Schwarzer Date: Mon, 29 Dec 2008 22:14:56 +0100 Subject: trivial: fix an -> a typos in documentation and comments It is always "an" if there is a vowel _spoken_ (not written). So it is: "an hour" (spoken vowel) but "a uniform" (spoken 'j') Signed-off-by: Frederik Schwarzer Signed-off-by: Jiri Kosina --- include/linux/ncp_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index 9f2d76347f19..f69e66d151cc 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -87,7 +87,7 @@ struct ncp_objectname_ioctl #define NCP_AUTH_NDS 0x32 int auth_type; size_t object_name_len; - void __user * object_name; /* an userspace data, in most cases user name */ + void __user * object_name; /* a userspace data, in most cases user name */ }; struct ncp_privatedata_ioctl -- cgit v1.2.2 From bd53cbcce501b61921a1af2dddfa87c5b9923dfd Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:48 +0100 Subject: ide: add ->cur_port to struct ide_host and use it for serialized hosts * Pass 'ide_hwif_t *' instead of 'ide_hwgroup_t *' to unexpected_intr(). * Cache pointer to the port currently being serviced in ->cur_port and use it instead of hwif->hwgroup on serialized hosts. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index db5ef8ae1ab9..3de13df8bcef 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -852,6 +852,7 @@ struct ide_host { unsigned int (*init_chipset)(struct pci_dev *); unsigned long host_flags; void *host_priv; + ide_hwif_t *cur_port; /* for hosts requiring serialization */ }; /* -- cgit v1.2.2 From ae86afaee6a1c77c7a06d81dcc3bf872204d3bec Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:48 +0100 Subject: ide: use per-port IRQ handlers Use hwif instead of hwgroup as {request,free}_irq()'s cookie, teach ide_intr() to return early for non-active serialized ports, modify unexpected_intr() accordingly and then use per-port IRQ handlers instead of per-hwgroup ones. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 3de13df8bcef..f5382ad0bd4c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1274,14 +1274,14 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(unsigned long); extern irqreturn_t ide_intr(int irq, void *dev_id); -static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup) +static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup, ide_hwif_t *hwif) { if (hwgroup->busy) return 1; hwgroup->busy = 1; /* for atari only */ - ide_get_lock(ide_intr, hwgroup); + ide_get_lock(ide_intr, hwif); return 0; } -- cgit v1.2.2 From efe0397eef544ac4bcca23d39aa8d5db154952e0 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:49 +0100 Subject: ide: remove hwgroup->hwif and {drive,hwif}->next * Add 'int port_count' field to ide_hwgroup_t to keep the track of the number of ports in the hwgroup. Then update init_irq() and ide_remove_port_from_hwgroup() to use it. * Remove no longer needed hwgroup->hwif, {drive,hwif}->next, ide_add_drive_to_hwgroup() and ide_remove_drive_from_hwgroup() (hwgroup->drive now only denotes the currently active device in the hwgroup). * Update locking documentation in . While at it: * Rename ->drive field in ide_hwgroup_t to ->cur_dev. * Use __func__ in ide_timer_expiry(). Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f5382ad0bd4c..8b74ccdd221c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -588,7 +588,6 @@ struct ide_drive_s { struct request_queue *queue; /* request queue */ struct request *rq; /* current request */ - struct ide_drive_s *next; /* circular list of hwgroup drives */ void *driver_data; /* extra driver data */ u16 *id; /* identification info */ #ifdef CONFIG_IDE_PROC_FS @@ -750,7 +749,6 @@ struct ide_dma_ops { struct ide_host; typedef struct hwif_s { - struct hwif_s *next; /* for linked-list in ide_hwgroup_t */ struct hwif_s *mate; /* other hwif from same PCI chip */ struct hwgroup_s *hwgroup; /* actually (ide_hwgroup_t *) */ struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ @@ -874,9 +872,7 @@ typedef struct hwgroup_s { unsigned int polling : 1; /* current drive */ - ide_drive_t *drive; - /* ptr to current hwif in linked-list */ - ide_hwif_t *hwif; + ide_drive_t *cur_dev; /* current request */ struct request *rq; @@ -892,6 +888,8 @@ typedef struct hwgroup_s { int req_gen_timer; spinlock_t lock; + + int port_count; } ide_hwgroup_t; typedef struct ide_driver_s ide_driver_t; @@ -1622,12 +1620,7 @@ extern struct mutex ide_cfg_mtx; /* * Structure locking: * - * ide_cfg_mtx and hwgroup->lock together protect changes to - * ide_hwif_t->next - * ide_drive_t->next - * * ide_hwgroup_t->busy: hwgroup->lock - * ide_hwgroup_t->hwif: hwgroup->lock * ide_hwif_t->{hwgroup,mate}: constant, no locking * ide_drive_t->hwif: constant, no locking */ -- cgit v1.2.2 From 5b31f855f10d0053e738baa6d91fb6a3fad35119 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:49 +0100 Subject: ide: use lock bitops for ports serialization (v2) * Add ->host_busy field to struct ide_host and use it's first bit together with lock bitops to provide new ports serialization method. * Convert core IDE code to use new ide_[un]lock_host() helpers. This removes the need for taking hwgroup->lock if host is already busy on serialized hosts and makes it possible to merge ide_hwgroup_t into ide_hwif_t (done in the later patch). * Remove no longer needed ide_hwgroup_t.busy and ide_[un]lock_hwgroup(). * Update do_ide_request() documentation. v2: * ide_release_lock() should be called inside IDE_HFLAG_SERIALIZE check. * Add ide_hwif_t.busy flag and ide_[un]lock_port() for serializing devices on a port. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 8b74ccdd221c..00df155b5a02 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -828,6 +828,7 @@ typedef struct hwif_s { unsigned present : 1; /* this interface exists */ unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ + unsigned busy : 1; /* serializes devices on a port */ struct device gendev; struct device *portdev; @@ -851,8 +852,13 @@ struct ide_host { unsigned long host_flags; void *host_priv; ide_hwif_t *cur_port; /* for hosts requiring serialization */ + + /* used for hosts requiring serialization */ + volatile long host_busy; }; +#define IDE_HOST_BUSY 0 + /* * internal ide interrupt handler type */ @@ -866,8 +872,6 @@ typedef struct hwgroup_s { /* irq handler, if active */ ide_startstop_t (*handler)(ide_drive_t *); - /* BOOL: protects all fields below */ - volatile int busy; /* BOOL: polling active & poll_timeout field valid */ unsigned int polling : 1; @@ -1271,26 +1275,6 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(unsigned long); extern irqreturn_t ide_intr(int irq, void *dev_id); - -static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup, ide_hwif_t *hwif) -{ - if (hwgroup->busy) - return 1; - - hwgroup->busy = 1; - /* for atari only */ - ide_get_lock(ide_intr, hwif); - - return 0; -} - -static inline void ide_unlock_hwgroup(ide_hwgroup_t *hwgroup) -{ - /* for atari only */ - ide_release_lock(); - hwgroup->busy = 0; -} - extern void do_ide_request(struct request_queue *); void ide_init_disk(struct gendisk *, ide_drive_t *); @@ -1617,13 +1601,6 @@ static inline void ide_set_max_pio(ide_drive_t *drive) extern spinlock_t ide_lock; extern struct mutex ide_cfg_mtx; -/* - * Structure locking: - * - * ide_hwgroup_t->busy: hwgroup->lock - * ide_hwif_t->{hwgroup,mate}: constant, no locking - * ide_drive_t->hwif: constant, no locking - */ #define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0) -- cgit v1.2.2 From b65fac32cfe3b2f98cd472fef400bd1c1340de23 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:50 +0100 Subject: ide: merge ide_hwgroup_t with ide_hwif_t (v2) * Merge ide_hwgroup_t with ide_hwif_t. * Cleanup init_irq() accordingly, then remove no longer needed ide_remove_port_from_hwgroup() and ide_ports[]. * Remove now unused HWGROUP() macro. While at it: * ide_dump_ata_error() fixups v2: * Fix ->quirk_list check in do_ide_request() (s/hwif->cur_dev/prev_port->cur_dev). Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 55 +++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 00df155b5a02..f27f130ba000 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -42,7 +42,6 @@ typedef unsigned char byte; /* used everywhere */ #define ERROR_RECAL 1 /* Recalibrate every 2nd retry */ #define HWIF(drive) ((ide_hwif_t *)((drive)->hwif)) -#define HWGROUP(drive) ((ide_hwgroup_t *)(HWIF(drive)->hwgroup)) /* * Definitions for accessing IDE controller registers @@ -750,7 +749,6 @@ struct ide_host; typedef struct hwif_s { struct hwif_s *mate; /* other hwif from same PCI chip */ - struct hwgroup_s *hwgroup; /* actually (ide_hwgroup_t *) */ struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ struct ide_host *host; @@ -840,6 +838,30 @@ typedef struct hwif_s { #ifdef CONFIG_BLK_DEV_IDEACPI struct ide_acpi_hwif_link *acpidata; #endif + + /* IRQ handler, if active */ + ide_startstop_t (*handler)(ide_drive_t *); + + /* BOOL: polling active & poll_timeout field valid */ + unsigned int polling : 1; + + /* current drive */ + ide_drive_t *cur_dev; + + /* current request */ + struct request *rq; + + /* failsafe timer */ + struct timer_list timer; + /* timeout value during long polls */ + unsigned long poll_timeout; + /* queried upon timeouts */ + int (*expiry)(ide_drive_t *); + + int req_gen; + int req_gen_timer; + + spinlock_t lock; } ____cacheline_internodealigned_in_smp ide_hwif_t; #define MAX_HOST_PORTS 4 @@ -868,34 +890,6 @@ typedef int (ide_expiry_t)(ide_drive_t *); /* used by ide-cd, ide-floppy, etc. */ typedef void (xfer_func_t)(ide_drive_t *, struct request *rq, void *, unsigned); -typedef struct hwgroup_s { - /* irq handler, if active */ - ide_startstop_t (*handler)(ide_drive_t *); - - /* BOOL: polling active & poll_timeout field valid */ - unsigned int polling : 1; - - /* current drive */ - ide_drive_t *cur_dev; - - /* current request */ - struct request *rq; - - /* failsafe timer */ - struct timer_list timer; - /* timeout value during long polls */ - unsigned long poll_timeout; - /* queried upon timeouts */ - int (*expiry)(ide_drive_t *); - - int req_gen; - int req_gen_timer; - - spinlock_t lock; - - int port_count; -} ide_hwgroup_t; - typedef struct ide_driver_s ide_driver_t; extern struct mutex ide_setting_mtx; @@ -1512,7 +1506,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; } static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} #endif -void ide_remove_port_from_hwgroup(ide_hwif_t *); void ide_unregister(ide_hwif_t *); void ide_register_region(struct gendisk *); -- cgit v1.2.2 From b40d1b88f1001f0224c63fa2c008914514bcef33 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:51 +0100 Subject: ide: move ide_init_port_data() and friends to ide-probe.c * Move IDE_DEFAULT_MAX_FAILURES to . * Move ide_cfg_mtx, ide_hwif_to_major[], ide_port_init_devices_data(), ide_init_port_data(), ide_init_port_hw() and ide_unregister() to ide-probe.c from ide.c. * Make ide_unregister(), ide_init_port_data(), ide_init_port_hw() and ide_cfg_mtx static. While at it: * Remove stale ide_init_port_data() documentation and ide_lock extern. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f27f130ba000..ee2f461882ad 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -37,6 +37,7 @@ typedef unsigned char byte; /* used everywhere */ /* * Probably not wise to fiddle with these */ +#define IDE_DEFAULT_MAX_FAILURES 1 #define ERROR_MAX 8 /* Max read/write errors per sector */ #define ERROR_RESET 3 /* Reset controller every 4th retry */ #define ERROR_RECAL 1 /* Recalibrate every 2nd retry */ @@ -184,9 +185,6 @@ typedef struct hw_regs_s { unsigned long config; } hw_regs_t; -void ide_init_port_data(struct hwif_s *, unsigned int); -void ide_init_port_hw(struct hwif_s *, hw_regs_t *); - static inline void ide_std_init_ports(hw_regs_t *hw, unsigned long io_addr, unsigned long ctl_addr) @@ -1506,8 +1504,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; } static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} #endif -void ide_unregister(ide_hwif_t *); - void ide_register_region(struct gendisk *); void ide_unregister_region(struct gendisk *); @@ -1592,9 +1588,6 @@ static inline void ide_set_max_pio(ide_drive_t *drive) ide_set_pio(drive, 255); } -extern spinlock_t ide_lock; -extern struct mutex ide_cfg_mtx; - #define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0) char *ide_media_string(ide_drive_t *); -- cgit v1.2.2 From 898ec223fea2a2df88035e58dbf50f493577e225 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:52 +0100 Subject: ide: remove HWIF() macro Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index ee2f461882ad..58b9c99482cd 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -42,8 +42,6 @@ typedef unsigned char byte; /* used everywhere */ #define ERROR_RESET 3 /* Reset controller every 4th retry */ #define ERROR_RECAL 1 /* Recalibrate every 2nd retry */ -#define HWIF(drive) ((ide_hwif_t *)((drive)->hwif)) - /* * Definitions for accessing IDE controller registers */ -- cgit v1.2.2 From 54cc1428cfa619e16d75baae8cb041a2eff015f0 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:52 +0100 Subject: ide: remove local_irq_set() macro Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 58b9c99482cd..82d500c5a847 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1586,8 +1586,6 @@ static inline void ide_set_max_pio(ide_drive_t *drive) ide_set_pio(drive, 255); } -#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0) - char *ide_media_string(ide_drive_t *); extern struct device_attribute ide_dev_attrs[]; -- cgit v1.2.2 From c0ae50234771684ae0cbac5dfb70e1a09c22aa89 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:52 +0100 Subject: ide: remove ide_pci_enablebit_t typedef Remove needless parens while at it. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 82d500c5a847..cca6cfb299bc 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1300,11 +1300,11 @@ static inline int ide_hwif_setup_dma(ide_hwif_t *hwif, } #endif -typedef struct ide_pci_enablebit_s { +struct ide_pci_enablebit { u8 reg; /* byte pci reg holding the enable-bit */ u8 mask; /* mask to isolate the enable-bit */ u8 val; /* value of masked reg when "enabled" */ -} ide_pci_enablebit_t; +}; enum { /* Uses ISA control ports not PCI ones. */ @@ -1393,7 +1393,8 @@ struct ide_port_info { const struct ide_port_ops *port_ops; const struct ide_dma_ops *dma_ops; - ide_pci_enablebit_t enablebits[2]; + struct ide_pci_enablebit enablebits[2]; + hwif_chipset_t chipset; u16 max_sectors; /* if < than the default one */ -- cgit v1.2.2 From 9892ec5497af1ec38c46974b5a370ba11c636b19 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:53 +0100 Subject: ide: remove 'byte' typedef Just use u8 instead, also s/__u8/u8/ in ide-cd.h while at it. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index cca6cfb299bc..545a67f1f6b5 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -32,8 +32,6 @@ # define SUPPORT_VLB_SYNC 1 #endif -typedef unsigned char byte; /* used everywhere */ - /* * Probably not wise to fiddle with these */ @@ -1161,7 +1159,7 @@ void ide_pad_transfer(ide_drive_t *, int, int); ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); -ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); +ide_startstop_t ide_error(ide_drive_t *, const char *, u8); void ide_fix_driveid(u16 *); -- cgit v1.2.2 From 7f3c868ba78e486bd9d7569f884dd46d8f59bb18 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:53 +0100 Subject: ide: remove ide_driver_t typedef While at it: - s/struct ide_driver_s/struct ide_driver/ - use to_ide_driver() macro in ide-proc.c Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 545a67f1f6b5..fcbcfa2cbe75 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -437,7 +437,7 @@ struct ide_atapi_pc { }; struct ide_devset; -struct ide_driver_s; +struct ide_driver; #ifdef CONFIG_BLK_DEV_IDEACPI struct ide_acpi_drive_link; @@ -884,8 +884,6 @@ typedef int (ide_expiry_t)(ide_drive_t *); /* used by ide-cd, ide-floppy, etc. */ typedef void (xfer_func_t)(ide_drive_t *, struct request *rq, void *, unsigned); -typedef struct ide_driver_s ide_driver_t; - extern struct mutex ide_setting_mtx; /* @@ -1011,8 +1009,8 @@ void ide_proc_register_port(ide_hwif_t *); void ide_proc_port_register_devices(ide_hwif_t *); void ide_proc_unregister_device(ide_drive_t *); void ide_proc_unregister_port(ide_hwif_t *); -void ide_proc_register_driver(ide_drive_t *, ide_driver_t *); -void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *); +void ide_proc_register_driver(ide_drive_t *, struct ide_driver *); +void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *); read_proc_t proc_ide_read_capacity; read_proc_t proc_ide_read_geometry; @@ -1039,8 +1037,10 @@ static inline void ide_proc_register_port(ide_hwif_t *hwif) { ; } static inline void ide_proc_port_register_devices(ide_hwif_t *hwif) { ; } static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; } static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; } -static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } -static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } +static inline void ide_proc_register_driver(ide_drive_t *drive, + struct ide_driver *driver) { ; } +static inline void ide_proc_unregister_driver(ide_drive_t *drive, + struct ide_driver *driver) { ; } #define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; #endif @@ -1109,7 +1109,7 @@ void ide_check_pm_state(ide_drive_t *, struct request *); * The gendriver.owner field should be set to the module owner of this driver. * The gendriver.name field should be set to the name of this driver */ -struct ide_driver_s { +struct ide_driver { const char *version; ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); @@ -1125,7 +1125,7 @@ struct ide_driver_s { #endif }; -#define to_ide_driver(drv) container_of(drv, ide_driver_t, gen_driver) +#define to_ide_driver(drv) container_of(drv, struct ide_driver, gen_driver) int ide_device_get(ide_drive_t *); void ide_device_put(ide_drive_t *); -- cgit v1.2.2 From 627e05daa10896a8f012fa78e8434c07e9e55ea7 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:54 +0100 Subject: ide: remove ->error method from struct ide_driver * Remove (now superfluous) ->error method from struct ide_driver. * Unexport __ide_error() and make it static. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index fcbcfa2cbe75..9f6fe1fe7a6c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1113,7 +1113,6 @@ struct ide_driver { const char *version; ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); - ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); struct device_driver gen_driver; int (*probe)(ide_drive_t *); void (*remove)(ide_drive_t *); @@ -1157,8 +1156,6 @@ void ide_execute_pkt_cmd(ide_drive_t *); void ide_pad_transfer(ide_drive_t *, int, int); -ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); - ide_startstop_t ide_error(ide_drive_t *, const char *, u8); void ide_fix_driveid(u16 *); -- cgit v1.2.2 From 5e7f3a46690f7f6c9f2781c700ab4370874aa0e8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:56 +0100 Subject: ide: dynamic allocation of device structures Allocate device structures dynamically instead of having them embedded in ide_hwif_t: * Remove needless zeroing of port structure from ide_init_port_data(). * Add ide_hwif_t.devices[MAX_DRIVES] (table of pointers to the devices). * Add ide_port_{alloc,free}_devices() helpers and use them respectively in ide_{host,free}_alloc(). * Convert all users of ->drives[] to use ->devices[] instead. While at it: * Use drive->dn for the slave device check in scc_pata.c. As a nice side-effect this patch cuts ~1kB (x86-32) from the resulting code size: text data bss dec hex filename 53963 1244 237 55444 d894 drivers/ide/ide-core.o.before 52981 1244 237 54462 d4be drivers/ide/ide-core.o.after Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 9f6fe1fe7a6c..f00086b10be3 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -753,7 +753,7 @@ typedef struct hwif_s { unsigned long sata_scr[SATA_NR_PORTS]; - ide_drive_t drives[MAX_DRIVES]; /* drive info */ + ide_drive_t *devices[MAX_DRIVES]; u8 major; /* our major number */ u8 index; /* 0 for ide0; 1 for ide1; ... */ @@ -1600,7 +1600,7 @@ static inline int hwif_to_node(ide_hwif_t *hwif) static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive) { - ide_drive_t *peer = &drive->hwif->drives[(drive->dn ^ 1) & 1]; + ide_drive_t *peer = drive->hwif->devices[(drive->dn ^ 1) & 1]; return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL; } -- cgit v1.2.2 From 2bd24a1cfc99d242c2cff9a6b74ca49fcaac3fb6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:56 +0100 Subject: ide: add port and host iterators Add ide_port_for_each_dev() / ide_host_for_each_port() iterators and update IDE code to use them. While at it: - s/unit/i/ variable in ide_port_wait_ready(), ide_probe_port(), ide_port_tune_devices(), ide_port_init_devices_data(), do_reset1(), ide_acpi_set_state() and scc_dma_end() - s/d/i/ variable in ide_proc_port_register_devices() There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f00086b10be3..4cecd923fc79 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -753,7 +753,7 @@ typedef struct hwif_s { unsigned long sata_scr[SATA_NR_PORTS]; - ide_drive_t *devices[MAX_DRIVES]; + ide_drive_t *devices[MAX_DRIVES + 1]; u8 major; /* our major number */ u8 index; /* 0 for ide0; 1 for ide1; ... */ @@ -861,7 +861,7 @@ typedef struct hwif_s { #define MAX_HOST_PORTS 4 struct ide_host { - ide_hwif_t *ports[MAX_HOST_PORTS]; + ide_hwif_t *ports[MAX_HOST_PORTS + 1]; unsigned int n_ports; struct device *dev[2]; unsigned int (*init_chipset)(struct pci_dev *); @@ -1604,4 +1604,11 @@ static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive) return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL; } + +#define ide_port_for_each_dev(i, dev, port) \ + for ((i) = 0; ((dev) = (port)->devices[i]) || (i) < MAX_DRIVES; (i)++) + +#define ide_host_for_each_port(i, port, host) \ + for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++) + #endif /* _IDE_H */ -- cgit v1.2.2 From d6251d4488a361c93da2398818e1ec69cffb6073 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 6 Jan 2009 17:20:58 +0100 Subject: ide-cd: convert to ide-atapi facilities ... and remove no longer needed cdrom_start_packet_command and cdrom_transfer_packet_command. Tested lightly with ide-cd and ide-floppy. Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 4cecd923fc79..13deba5e0157 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -654,6 +654,8 @@ struct ide_drive_s { int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, unsigned int, int); + ide_startstop_t (*irq_handler)(struct ide_drive_s *); + unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; -- cgit v1.2.2 From 906ef986a71d541a726550fa40dcbc5c356f810e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:59 +0100 Subject: ide: struct ide_atapi_pc - remove unused fields and update documentation Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 13deba5e0157..a7dbfd857115 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -426,13 +426,9 @@ struct ide_atapi_pc { struct idetape_bh *bh; char *b_data; - /* idescsi only for now */ struct scatterlist *sg; unsigned int sg_cnt; - struct scsi_cmnd *scsi_cmd; - void (*done) (struct scsi_cmnd *); - unsigned long timeout; }; -- cgit v1.2.2 From 94c96445f32c16cfdc398b20b7e78945ab7e35f9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:59 +0100 Subject: ide: remove unused ide_hwif_t.sg_mapped field Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index a7dbfd857115..ebc22a836520 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -817,7 +817,6 @@ typedef struct hwif_s { unsigned extra_ports; /* number of extra dma ports */ unsigned present : 1; /* this interface exists */ - unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ unsigned busy : 1; /* serializes devices on a port */ struct device gendev; -- cgit v1.2.2 From 391ad1908a9c13d457ea12ce1508d6b8a7ba72ad Mon Sep 17 00:00:00 2001 From: Shane McDonald Date: Tue, 6 Jan 2009 17:21:01 +0100 Subject: Resurrect IT8172 IDE controller driver Support for the IT8172 IDE controller was removed from the kernel sometime after 2.6.18. Support for the only boards that used the IT8172 was removed from the kernel after 2.6.18, as they had never compiled since 2.6.0. However, there are a couple of platforms that use this chip: the PMC-Sierra Xiao Hu thin-client computer, which is no longer in production, and the Linksys NSS4000 Network Attached Storage box, which is based on the Xiao Hu board. I am attempting to add support for the Xiao Hu to the kernel, and this IT8172 IDE controller is the first bit of code in this effort. This patch resurrects the IT8172 IDE controller code. I began with the 2.6.18 version of the it8172.c file, and have moved it forward so that it works with the latest version of the kernel. I have run this driver on a PMC-Sierra Xiao Hu board with the 2.6.28 kernel, and I have had no problems with it in my configuration. The attached patch applies cleanly against 2.6.28. Signed-off-by: Shane McDonald Acked-by: Sergei Shtylyov Cc: alan@lxorguk.ukuu.org.uk [bart: s/HWIF(drive)/drive->hwif/] Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 218c73b1e6d4..d543365518ab 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1658,6 +1658,7 @@ #define PCI_VENDOR_ID_ROCKWELL 0x127A #define PCI_VENDOR_ID_ITE 0x1283 +#define PCI_DEVICE_ID_ITE_8172 0x8172 #define PCI_DEVICE_ID_ITE_8211 0x8211 #define PCI_DEVICE_ID_ITE_8212 0x8212 #define PCI_DEVICE_ID_ITE_8213 0x8213 -- cgit v1.2.2 From 592b5315219881c6c0af4785f96456ad2043193a Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 6 Jan 2009 17:21:02 +0100 Subject: ide: move read_sff_dma_status() method to 'struct ide_dma_ops' Move apparently misplaced read_sff_dma_status() method from 'struct ide_tp_ops' to 'struct ide_dma_ops', renaming it to dma_sff_read_status() and making only required for SFF-8038i compatible IDE controller drivers (greatly cutting down the number of initializers) as its only user (outside ide-dma-sff.c and such drivers) appears to be ide_pci_check_simplex() which is only called for such controllers... Signed-off-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index ebc22a836520..3644f6323384 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -674,7 +674,6 @@ struct ide_tp_ops { void (*exec_command)(struct hwif_s *, u8); u8 (*read_status)(struct hwif_s *); u8 (*read_altstatus)(struct hwif_s *); - u8 (*read_sff_dma_status)(struct hwif_s *); void (*set_irq)(struct hwif_s *, int); @@ -735,6 +734,11 @@ struct ide_dma_ops { int (*dma_test_irq)(struct ide_drive_s *); void (*dma_lost_irq)(struct ide_drive_s *); void (*dma_timeout)(struct ide_drive_s *); + /* + * The following method is optional and only required to be + * implemented for the SFF-8038i compatible controllers. + */ + u8 (*dma_sff_read_status)(struct hwif_s *); }; struct ide_host; @@ -1177,7 +1181,6 @@ void ide_tf_dump(const char *, struct ide_taskfile *); void ide_exec_command(ide_hwif_t *, u8); u8 ide_read_status(ide_hwif_t *); u8 ide_read_altstatus(ide_hwif_t *); -u8 ide_read_sff_dma_status(ide_hwif_t *); void ide_set_irq(ide_hwif_t *, int); @@ -1458,6 +1461,7 @@ void ide_dma_exec_cmd(ide_drive_t *, u8); extern void ide_dma_start(ide_drive_t *); int ide_dma_end(ide_drive_t *); int ide_dma_test_irq(ide_drive_t *); +u8 ide_dma_sff_read_status(ide_hwif_t *); extern const struct ide_dma_ops sff_dma_ops; #else static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; } -- cgit v1.2.2 From 548eaca46b3cf4419b6c2be839a106d8641ffb70 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Oct 2008 17:48:43 -0400 Subject: nfsd: document new filehandle fsid types Descriptions taken from mountd code (in nfs-utils/utils/mountd/cache.c). Signed-off-by: J. Bruce Fields --- include/linux/nfsd/nfsfh.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index d1941cb965e9..b2e093870bc6 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -68,6 +68,10 @@ struct nfs_fhbase_old { * 1 - 4 byte user specified identifier * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED * 3 - 4 byte device id, encoded for user-space, 4 byte inode number + * 4 - 4 byte inode number and 4 byte uuid + * 5 - 8 byte uuid + * 6 - 16 byte uuid + * 7 - 8 byte inode number and 16 byte uuid * * The fileid_type identified how the file within the filesystem is encoded. * This is (will be) passed to, and set by, the underlying filesystem if it supports -- cgit v1.2.2 From c9233eb7b0b11ef176d4bf68da2ce85464b6ec39 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 20 Oct 2008 11:51:57 -0400 Subject: sunrpc: add sv_maxconn field to svc_serv (try #3) svc_check_conn_limits() attempts to prevent denial of service attacks by having the service close old connections once it reaches a threshold. This threshold is based on the number of threads in the service: (serv->sv_nrthreads + 3) * 20 Once we reach this, we drop the oldest connections and a printk pops to warn the admin that they should increase the number of threads. Increasing the number of threads isn't an option however for services like lockd. We don't want to eliminate this check entirely for such services but we need some way to increase this limit. This patch adds a sv_maxconn field to the svc_serv struct. When it's set to 0, we use the current method to calculate the max number of connections. RPC services can then set this on an as-needed basis. Signed-off-by: Jeff Layton Acked-by: Neil Brown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3afe7fb403b2..3435d24bfe55 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -58,10 +58,13 @@ struct svc_serv { struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; unsigned int sv_nrthreads; /* # of server threads */ + unsigned int sv_maxconn; /* max connections allowed or + * '0' causing max to be based + * on number of threads. */ + unsigned int sv_max_payload; /* datagram payload size */ unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ unsigned int sv_xdrsize; /* XDR buffer size */ - struct list_head sv_permsocks; /* all permanent sockets */ struct list_head sv_tempsocks; /* all temporary sockets */ int sv_tmpcnt; /* count of temporary sockets */ -- cgit v1.2.2 From 7538ce1eb656a1477bedd5b1c202226e7abf5e7b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:19:45 -0500 Subject: NLM: Use modern style for pointer fields in nlm_host Clean up: I'm about to add another "char *" field to the nlm_host structure. The h_name field, for example, uses an older style of declaring a "char *" field. If I match that style for the new field, checkpatch.pl will complain. So, fix pointer fields to use the new style. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 23da3fa69efa..3dbdd353156c 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -43,8 +43,8 @@ struct nlm_host { struct sockaddr_storage h_addr; /* peer address */ size_t h_addrlen; struct sockaddr_storage h_srcaddr; /* our address (optional) */ - struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */ - char * h_name; /* remote hostname */ + struct rpc_clnt *h_rpcclnt; /* RPC client to talk to peer */ + char *h_name; /* remote hostname */ u32 h_version; /* interface version */ unsigned short h_proto; /* transport proto */ unsigned short h_reclaiming : 1, @@ -64,7 +64,7 @@ struct nlm_host { spinlock_t h_lock; struct list_head h_granted; /* Locks in GRANTED state */ struct list_head h_reclaim; /* Locks in RECLAIM state */ - struct nsm_handle * h_nsmhandle; /* NSM status handle */ + struct nsm_handle *h_nsmhandle; /* NSM status handle */ char h_addrbuf[48], /* address eyecatchers */ h_srcaddrbuf[48]; -- cgit v1.2.2 From 1df40b609ad5a622904eb652109c287fe9c93ec5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:19:53 -0500 Subject: NLM: Remove address eye-catcher buffers from nlm_host The h_name field in struct nlm_host is a just copy of h_nsmhandle->sm_name. Likewise, the contents of the h_addrbuf field should be identical to the sm_addrbuf field. The h_srcaddrbuf field is used only in one place for debugging. We can live without this until we get %pI formatting for printk(). Currently these buffers are 48 bytes, but we need to support scope IDs in IPv6 presentation addresses, which means making the buffers even larger. Instead, let's find ways to eliminate them to save space. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 3dbdd353156c..dae22cb4c38d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -65,9 +65,7 @@ struct nlm_host { struct list_head h_granted; /* Locks in GRANTED state */ struct list_head h_reclaim; /* Locks in RECLAIM state */ struct nsm_handle *h_nsmhandle; /* NSM status handle */ - - char h_addrbuf[48], /* address eyecatchers */ - h_srcaddrbuf[48]; + char *h_addrbuf; /* address eyecatcher */ }; struct nsm_handle { -- cgit v1.2.2 From bc995801a09d1fead0bec1356bfd836911c8eed7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:20:08 -0500 Subject: NLM: Support IPv6 scope IDs in nlm_display_address() Scope ID support is needed since the kernel's NSM implementation is about to use these displayed addresses as a mon_name in some cases. When nsm_use_hostnames is zero, without scope ID support NSM will fail to handle peers that contact us via a link-local address. Link-local addresses do not work without an interface ID, which is stored in the sockaddr's sin6_scope_id field. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index dae22cb4c38d..80a0a2cff2b8 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -68,6 +68,14 @@ struct nlm_host { char *h_addrbuf; /* address eyecatcher */ }; +/* + * The largest string sm_addrbuf should hold is a full-size IPv6 address + * (no "::" anywhere) with a scope ID. The buffer size is computed to + * hold eight groups of colon-separated four-hex-digit numbers, a + * percent sign, a scope id (at most 32 bits, in decimal), and NUL. + */ +#define NSM_ADDRBUF ((8 * 4 + 7) + (1 + 10) + 1) + struct nsm_handle { struct list_head sm_link; atomic_t sm_count; @@ -76,7 +84,7 @@ struct nsm_handle { size_t sm_addrlen; unsigned int sm_monitored : 1, sm_sticky : 1; /* don't unmonitor */ - char sm_addrbuf[48]; /* address eyecatcher */ + char sm_addrbuf[NSM_ADDRBUF]; }; /* -- cgit v1.2.2 From f47534f7f0ac7727e05ec4274b764b181df2cf7f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:20:38 -0500 Subject: NSM: Use modern style for sm_name field in nsm_handle Clean up: I'm about to add another "char *" field to the nsm_handle structure. The sm_name field uses an older style of declaring a "char *" field. If I match that style for the new field, checkpatch.pl will complain. So, fix the sm_name field to use the new style. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 80a0a2cff2b8..54dbb458e73c 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -79,7 +79,7 @@ struct nlm_host { struct nsm_handle { struct list_head sm_link; atomic_t sm_count; - char * sm_name; + char *sm_name; struct sockaddr_storage sm_addr; size_t sm_addrlen; unsigned int sm_monitored : 1, -- cgit v1.2.2 From 29ed1407ed81086b778ebf12145b048ac3f7e10e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:20:46 -0500 Subject: NSM: Support IPv6 version of mon_name The "mon_name" argument of the NSMPROC_MON and NSMPROC_UNMON upcalls is a string that contains the hostname or IP address of the remote peer to be notified when this host has rebooted. The sm-notify command uses this identifier to contact the peer when we reboot, so it must be either a well-qualified DNS hostname or a presentation format IP address string. When the "nsm_use_hostnames" sysctl is set to zero, the kernel's NSM provides a presentation format IP address in the "mon_name" argument. Otherwise, the "caller_name" argument from NLM requests is used, which is usually just the DNS hostname of the peer. To support IPv6 addresses for the mon_name argument, we use the nsm_handle's address eye-catcher, which already contains an appropriate presentation format address string. Using the eye-catcher string obviates the need to use a large buffer on the stack to form the presentation address string for the upcall. This patch also addresses a subtle bug. An NSMPROC_MON request and the subsequent NSMPROC_UNMON request for the same peer are required to use the same value for the "mon_name" argument. Otherwise, rpc.statd's NSMPROC_UNMON processing cannot locate the database entry for that peer and remove it. If the setting of nsm_use_hostnames is changed between the time the kernel sends an NSMPROC_MON request and the time it sends the NSMPROC_UNMON request for the same peer, the "mon_name" argument for these two requests may not be the same. This is because the value of "mon_name" is currently chosen at the moment the call is made based on the setting of nsm_use_hostnames To ensure both requests pass identical contents in the "mon_name" argument, we now select which string to use for the argument in the nsm_monitor() function. A pointer to this string is saved in the nsm_handle so it can be used for a subsequent NSMPROC_UNMON upcall. NB: There are other potential problems, such as how nlm_host_rebooted() might behave if nsm_use_hostnames were changed while hosts are still being monitored. This patch does not attempt to address those problems. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 54dbb458e73c..d3c7247d23e8 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -79,6 +79,7 @@ struct nlm_host { struct nsm_handle { struct list_head sm_link; atomic_t sm_count; + char *sm_mon_name; char *sm_name; struct sockaddr_storage sm_addr; size_t sm_addrlen; -- cgit v1.2.2 From 1e49323c4ab044d05bbc68cf13cadcbd4372468c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:21:24 -0500 Subject: NLM: Move the public declaration of nsm_monitor() to lockd.h Clean up. Make the nlm_host argument "const," and move the public declaration to lockd.h with other NSM public function (nsm_release, eg) and global variable declarations. Add a documenting comment. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 4 ++++ include/linux/lockd/sm_inter.h | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index d3c7247d23e8..f15a4f5ccbfb 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -242,6 +242,10 @@ extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, unsigned int, u32); void nsm_release(struct nsm_handle *); +/* + * Host monitoring + */ +int nsm_monitor(const struct nlm_host *host); /* * This is used in garbage collection and resource reclaim diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 5a5448bdb17d..546b6102b0d7 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -41,7 +41,6 @@ struct nsm_res { u32 state; }; -int nsm_monitor(struct nlm_host *); int nsm_unmonitor(struct nlm_host *); extern int nsm_local_state; -- cgit v1.2.2 From c8c23c423dec49cb439697d3dc714e1500ff1610 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:21:31 -0500 Subject: NSM: Release nsmhandle in nlm_destroy_host The nsm_handle's reference count is bumped in nlm_lookup_host(). It should be decremented in nlm_destroy_host() to make it easier to see the balance of these two operations. Move the nsm_release() call to fs/lockd/host.c. The h_nsmhandle pointer is set in nlm_lookup_host(), and never cleared. The nlm_destroy_host() function is never called for the same nlm_host twice, so h_nsmhandle won't ever be NULL when nsm_unmonitor() is called. All references to the nlm_host are gone before it is freed. We can skip making h_nsmhandle NULL just before the nlm_host is deallocated. It's also likely we can remove the h_nsmhandle NULL check in nlmsvc_is_client() as well, but we can do that later when rearchitect- ing the nlm_host cache. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index f15a4f5ccbfb..30a6a9c1ce42 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -240,7 +240,6 @@ void nlm_release_host(struct nlm_host *); void nlm_shutdown_hosts(void); extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, unsigned int, u32); -void nsm_release(struct nsm_handle *); /* * Host monitoring -- cgit v1.2.2 From 356c3eb466fd1a12afd6448d90fba3922836e5f1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:21:38 -0500 Subject: NLM: Move the public declaration of nsm_unmonitor() to lockd.h Clean up. Make the nlm_host argument "const," and move the public declaration to lockd.h. Add a documenting comment. Bruce observed that nsm_unmonitor()'s only caller doesn't care about its return code, so make nsm_unmonitor() return void. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + include/linux/lockd/sm_inter.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 30a6a9c1ce42..38344bfb814a 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -245,6 +245,7 @@ extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, * Host monitoring */ int nsm_monitor(const struct nlm_host *host); +void nsm_unmonitor(const struct nlm_host *host); /* * This is used in garbage collection and resource reclaim diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 546b6102b0d7..896a5e303323 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -41,7 +41,6 @@ struct nsm_res { u32 state; }; -int nsm_unmonitor(struct nlm_host *); extern int nsm_local_state; #endif /* LINUX_LOCKD_SM_INTER_H */ -- cgit v1.2.2 From 9c1bfd037f7ff8badaecb47418f109148d88bf45 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:01:59 -0500 Subject: NSM: Move NSM-related XDR data structures to lockd's xdr.h Clean up: NSM's XDR data structures are used only in fs/lockd/mon.c, so move them there. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/sm_inter.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 896a5e303323..dd9d8a5bb316 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -21,26 +21,6 @@ #define SM_MAXSTRLEN 1024 #define SM_PRIV_SIZE 16 -/* - * Arguments for all calls to statd - */ -struct nsm_args { - __be32 addr; /* remote address */ - u32 prog; /* RPC callback info */ - u32 vers; - u32 proc; - - char * mon_name; -}; - -/* - * Result returned by statd - */ -struct nsm_res { - u32 status; - u32 state; -}; - extern int nsm_local_state; #endif /* LINUX_LOCKD_SM_INTER_H */ -- cgit v1.2.2 From 36e8e668d3e6a61848a8921ddeb663b417299fa5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:02:07 -0500 Subject: NSM: Move NSM program and procedure numbers to fs/lockd/mon.c Clean up: Move the RPC program and procedure numbers for NSM into the one source file that needs them: fs/lockd/mon.c. And, as with NLM, NFS, and rpcbind calls, use NSMPROC_FOO instead of SM_FOO for NSM procedure numbers. Finally, make a couple of comments more precise: what is referred to here as SM_NOTIFY is really the NLM (lockd) NLMPROC_SM_NOTIFY downcall, not NSMPROC_NOTIFY. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/sm_inter.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index dd9d8a5bb316..116bf38535a0 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -9,15 +9,6 @@ #ifndef LINUX_LOCKD_SM_INTER_H #define LINUX_LOCKD_SM_INTER_H -#define SM_PROGRAM 100024 -#define SM_VERSION 1 -#define SM_STAT 1 -#define SM_MON 2 -#define SM_UNMON 3 -#define SM_UNMON_ALL 4 -#define SM_SIMU_CRASH 5 -#define SM_NOTIFY 6 - #define SM_MAXSTRLEN 1024 #define SM_PRIV_SIZE 16 -- cgit v1.2.2 From 67c6d107a689243979a2b5f15244b5261634a924 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:02:45 -0500 Subject: NSM: Move nsm_find() to fs/lockd/mon.c The nsm_find() function sets up fresh nsm_handle entries. This is where we will store the "priv" cookie used to lookup nsm_handles during reboot recovery. The cookie will be constructed when nsm_find() creates a new nsm_handle. As much as possible, I would like to keep everything that handles a "priv" cookie in fs/lockd/mon.c so that all the smarts are in one source file. That organization should make it pretty simple to see how all this works. To me, it makes more sense than the current arrangement to keep nsm_find() with nsm_monitor() and nsm_unmonitor(). So, start reorganizing by moving nsm_find() into fs/lockd/mon.c. The nsm_release() function comes along too, since it shares the nsm_lock global variable. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 38344bfb814a..8d715363c6ac 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -247,6 +247,12 @@ extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, int nsm_monitor(const struct nlm_host *host); void nsm_unmonitor(const struct nlm_host *host); +struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen, + const char *hostname, + const size_t hostname_len, + const int create); +void nsm_release(struct nsm_handle *nsm); + /* * This is used in garbage collection and resource reclaim * A return value != 0 means destroy the lock/block/share -- cgit v1.2.2 From 7e44d3bea21fbb9494930d1cd35ca92a9a4a3279 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:16 -0500 Subject: NSM: Generate NSMPROC_MON's "priv" argument when nsm_handle is created Introduce a new data type, used by both the in-kernel NLM and NSM implementations, that is used to manage the opaque "priv" argument for the NSMPROC_MON and NLMPROC_SM_NOTIFY calls. Construct the "priv" cookie when the nsm_handle is created. The nsm_init_private() function may look a little strange, but it is roughly equivalent to how the XDR encoder formed the "priv" argument. It's going to go away soon. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + include/linux/lockd/sm_inter.h | 1 - include/linux/lockd/xdr.h | 6 ++++++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 8d715363c6ac..194fa8a66398 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -85,6 +85,7 @@ struct nsm_handle { size_t sm_addrlen; unsigned int sm_monitored : 1, sm_sticky : 1; /* don't unmonitor */ + struct nsm_private sm_priv; char sm_addrbuf[NSM_ADDRBUF]; }; diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 116bf38535a0..5cef5a79dd94 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -10,7 +10,6 @@ #define LINUX_LOCKD_SM_INTER_H #define SM_MAXSTRLEN 1024 -#define SM_PRIV_SIZE 16 extern int nsm_local_state; diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index d6b3a802c046..6b5199263858 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -13,6 +13,12 @@ #include #include +#define SM_PRIV_SIZE 16 + +struct nsm_private { + unsigned char data[SM_PRIV_SIZE]; +}; + struct svc_rqst; #define NLM_MAXCOOKIELEN 32 -- cgit v1.2.2 From 7fefc9cb9d5f129c238d93166f705c96ca2e7e51 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:31 -0500 Subject: NLM: Change nlm_host_rebooted() to take a single nlm_reboot argument Pass the nlm_reboot data structure directly from the NLMPROC_SM_NOTIFY XDR decoders to nlm_host_rebooted(). This eliminates some packing and unpacking of the NLMPROC_SM_NOTIFY results, and prepares for passing these results, including the "priv" cookie, directly to a lookup routine in fs/lockd/mon.c. This patch changes code organization but should not cause any behavioral change. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 194fa8a66398..2a3533ea38dd 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -239,8 +239,7 @@ void nlm_rebind_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *); void nlm_release_host(struct nlm_host *); void nlm_shutdown_hosts(void); -extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, - unsigned int, u32); +void nlm_host_rebooted(const struct nlm_reboot *); /* * Host monitoring -- cgit v1.2.2 From 576df4634e37e46b441fefb91915184edb13bb94 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:39 -0500 Subject: NLM: Decode "priv" argument of NLMPROC_SM_NOTIFY as an opaque The NLM XDR decoders for the NLMPROC_SM_NOTIFY procedure should treat their "priv" argument truly as an opaque, as defined by the protocol, and let the upper layers figure out what is in it. This will make it easier to modify the contents and interpretation of the "priv" argument, and keep knowledge about what's in "priv" local to fs/lockd/mon.c. For now, the NLM and NSM implementations should behave exactly as they did before. The formation of the address of the rebooted host in nlm_host_rebooted() may look a little strange, but it is the inverse of how nsm_init_private() forms the private cookie. Plus, it's going away soon anyway. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/xdr.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 6b5199263858..6338866222a8 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -83,10 +83,10 @@ struct nlm_res { * statd callback when client has rebooted */ struct nlm_reboot { - char * mon; - unsigned int len; - u32 state; - __be32 addr; + char *mon; + unsigned int len; + u32 state; + struct nsm_private priv; }; /* -- cgit v1.2.2 From 3420a8c4359a189f7d854ed7075d151257415447 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:46 -0500 Subject: NSM: Add nsm_lookup() function Introduce a new API to fs/lockd/mon.c that allows nlm_host_rebooted() to lookup up nsm_handles via the contents of an nlm_reboot struct. The new function is equivalent to calling nsm_find() with @create set to zero, but it takes a struct nlm_reboot instead of separate arguments. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 2a3533ea38dd..5e3ad926de89 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -251,6 +251,7 @@ struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen, const char *hostname, const size_t hostname_len, const int create); +struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info); void nsm_release(struct nsm_handle *nsm); /* -- cgit v1.2.2 From 92fd91b998a5216a6d6606704e71d541a180216c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:04:01 -0500 Subject: NLM: Remove "create" argument from nsm_find() Clean up: nsm_find() now has only one caller, and that caller unconditionally sets the @create argument. Thus the @create argument is no longer needed. Since nsm_find() now has a more specific purpose, pick a more appropriate name for it. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 5e3ad926de89..1ccd49e97a7f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -247,10 +247,10 @@ void nlm_host_rebooted(const struct nlm_reboot *); int nsm_monitor(const struct nlm_host *host); void nsm_unmonitor(const struct nlm_host *host); -struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen, +struct nsm_handle *nsm_get_handle(const struct sockaddr *sap, + const size_t salen, const char *hostname, - const size_t hostname_len, - const int create); + const size_t hostname_len); struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info); void nsm_release(struct nsm_handle *nsm); -- cgit v1.2.2 From e6765b83977f07983c7a10e6bbb19d6c7bbfc3a4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:14 -0500 Subject: NSM: Remove include/linux/lockd/sm_inter.h Clean up: The include/linux/lockd/sm_inter.h header is nearly empty now. Remove it. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + include/linux/lockd/sm_inter.h | 16 ---------------- include/linux/lockd/xdr.h | 1 + 3 files changed, 2 insertions(+), 16 deletions(-) delete mode 100644 include/linux/lockd/sm_inter.h (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 1ccd49e97a7f..8b57467375cc 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -205,6 +205,7 @@ extern struct svc_procedure nlmsvc_procedures4[]; extern int nlmsvc_grace_period; extern unsigned long nlmsvc_timeout; extern int nsm_use_hostnames; +extern int nsm_local_state; /* * Lockd client functions diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h deleted file mode 100644 index 5cef5a79dd94..000000000000 --- a/include/linux/lockd/sm_inter.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * linux/include/linux/lockd/sm_inter.h - * - * Declarations for the kernel statd client. - * - * Copyright (C) 1996, Olaf Kirch - */ - -#ifndef LINUX_LOCKD_SM_INTER_H -#define LINUX_LOCKD_SM_INTER_H - -#define SM_MAXSTRLEN 1024 - -extern int nsm_local_state; - -#endif /* LINUX_LOCKD_SM_INTER_H */ diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 6338866222a8..7dc5b6cb44cd 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -13,6 +13,7 @@ #include #include +#define SM_MAXSTRLEN 1024 #define SM_PRIV_SIZE 16 struct nsm_private { -- cgit v1.2.2 From 8529bc51d30b8f001734b29b21a51b579c260f5b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:22 -0500 Subject: NSM: Move nsm_addr() to fs/lockd/mon.c Clean up: nsm_addr_in() is no longer used, and nsm_addr() is used only in fs/lockd/mon.c, so move it there. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 8b57467375cc..6ab0449bc828 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -112,16 +112,6 @@ static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host) return (struct sockaddr *)&host->h_srcaddr; } -static inline struct sockaddr_in *nsm_addr_in(const struct nsm_handle *handle) -{ - return (struct sockaddr_in *)&handle->sm_addr; -} - -static inline struct sockaddr *nsm_addr(const struct nsm_handle *handle) -{ - return (struct sockaddr *)&handle->sm_addr; -} - /* * Map an fl_owner_t into a unique 32-bit "pid" */ -- cgit v1.2.2 From d1208f70738c91f13b4eadb1b7a694082e439da2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:44 -0500 Subject: NLM: nlm_privileged_requester() doesn't recognize mapped loopback address Commit b85e4676 added the nlm_privileged_requester() helper to check whether an RPC request was sent from a local privileged caller. It recognizes IPv4 privileged callers (from "127.0.0.1"), and IPv6 privileged callers (from "::1"). However, IPV6_ADDR_LOOPBACK is not set for the mapped IPv4 loopback address (::ffff:7f00:0001), so the test breaks when the kernel's RPC service is IPv6-enabled but user space is calling via the IPv4 loopback address. This is actually the most common case for IPv6- enabled RPC services on Linux. Rewrite the IPv6 check to handle the mapped IPv4 loopback address as well as a normal IPv6 loopback address. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 6ab0449bc828..80d7e8a8257d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -299,8 +299,14 @@ static inline int __nlm_privileged_request4(const struct sockaddr *sap) static inline int __nlm_privileged_request6(const struct sockaddr *sap) { const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; - return (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK) && - (ntohs(sin6->sin6_port) < 1024); + + if (ntohs(sin6->sin6_port) > 1023) + return 0; + + if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) + return ipv4_is_loopback(sin6->sin6_addr.s6_addr32[3]); + + return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK; } #else /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ static inline int __nlm_privileged_request6(const struct sockaddr *sap) -- cgit v1.2.2 From 57ef692588bc225853ca3267ca5b7cea2b07e058 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:52 -0500 Subject: NLM: Rewrite IPv4 privileged requester's check Clean up. For consistency, rewrite the IPv4 check to match the same style as the new IPv6 check. Note that ipv4_is_loopback() is somewhat broader in its interpretation of what is a loopback address than simply "127.0.0.1". Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 80d7e8a8257d..aa6fe7026de7 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -291,8 +291,11 @@ static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) static inline int __nlm_privileged_request4(const struct sockaddr *sap) { const struct sockaddr_in *sin = (struct sockaddr_in *)sap; - return (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) && - (ntohs(sin->sin_port) < 1024); + + if (ntohs(sin->sin_port) > 1023) + return 0; + + return ipv4_is_loopback(sin->sin_addr.s_addr); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -- cgit v1.2.2 From 6f49a57aa5a0c6d4e4e27c85f7af6c83325a12d1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:14 -0700 Subject: dmaengine: up-level reference counting to the module level Simply, if a client wants any dmaengine channel then prevent all dmaengine modules from being removed. Once the clients are done re-enable module removal. Why?, beyond reducing complication: 1/ Tracking reference counts per-transaction in an efficient manner, as is currently done, requires a complicated scheme to avoid cache-line bouncing effects. 2/ Per-transaction ref-counting gives the false impression that a dma-driver can be gracefully removed ahead of its user (net, md, or dma-slave) 3/ None of the in-tree dma-drivers talk to hot pluggable hardware, but if such an engine were built one day we still would not need to notify clients of remove events. The driver can simply return NULL to a ->prep() request, something that is much easier for a client to handle. Reviewed-by: Andrew Morton Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index e4ec7e7b8056..d18d37d1015d 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -165,7 +165,6 @@ struct dma_slave { */ struct dma_chan_percpu { - local_t refcount; /* stats */ unsigned long memcpy_count; unsigned long bytes_transferred; @@ -205,26 +204,6 @@ struct dma_chan { void dma_chan_cleanup(struct kref *kref); -static inline void dma_chan_get(struct dma_chan *chan) -{ - if (unlikely(chan->slow_ref)) - kref_get(&chan->refcount); - else { - local_inc(&(per_cpu_ptr(chan->local, get_cpu())->refcount)); - put_cpu(); - } -} - -static inline void dma_chan_put(struct dma_chan *chan) -{ - if (unlikely(chan->slow_ref)) - kref_put(&chan->refcount, dma_chan_cleanup); - else { - local_dec(&(per_cpu_ptr(chan->local, get_cpu())->refcount)); - put_cpu(); - } -} - /* * typedef dma_event_callback - function pointer to a DMA event callback * For each channel added to the system this routine is called for each client. -- cgit v1.2.2 From bec085134e446577a983f17f57d642a88d1af53b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:14 -0700 Subject: dmaengine: centralize channel allocation, introduce dma_find_channel Allowing multiple clients to each define their own channel allocation scheme quickly leads to a pathological situation. For memory-to-memory offload all clients can share a central allocator. This simply moves the existing async_tx allocator to dmaengine with minimal fixups: * async_tx.c:get_chan_ref_by_cap --> dmaengine.c:nth_chan * async_tx.c:async_tx_rebalance --> dmaengine.c:dma_channel_rebalance * split out common code from async_tx.c:__async_tx_find_channel --> dma_find_channel Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d18d37d1015d..b466f02e2433 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -182,6 +182,7 @@ struct dma_chan_percpu { * @device_node: used to add this to the device chan list * @local: per-cpu pointer to a struct dma_chan_percpu * @client-count: how many clients are using this channel + * @table_count: number of appearances in the mem-to-mem allocation table */ struct dma_chan { struct dma_device *device; @@ -198,6 +199,7 @@ struct dma_chan { struct list_head device_node; struct dma_chan_percpu *local; int client_count; + int table_count; }; #define to_dma_chan(p) container_of(p, struct dma_chan, dev) @@ -468,6 +470,7 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); +struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); /* --- Helper iov-locking functions --- */ -- cgit v1.2.2 From 2ba05622b8b143b0c95968ba59bddfbd6d2f2559 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:14 -0700 Subject: dmaengine: provide a common 'issue_pending_all' implementation async_tx and net_dma each have open-coded versions of issue_pending_all, so provide a common routine in dmaengine. The implementation needs to walk the global device list, so implement rcu to allow dma_issue_pending_all to run lockless. Clients protect themselves from channel removal events by holding a dmaengine reference. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/async_tx.h | 2 +- include/linux/dmaengine.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 1c816775f135..45f6297821bd 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -59,7 +59,7 @@ enum async_tx_flags { }; #ifdef CONFIG_DMA_ENGINE -void async_tx_issue_pending_all(void); +#define async_tx_issue_pending_all dma_issue_pending_all #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include #else diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index b466f02e2433..57a43adfc39e 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -471,6 +471,7 @@ int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); +void dma_issue_pending_all(void); /* --- Helper iov-locking functions --- */ -- cgit v1.2.2 From f67b45999205164958de4ec0658d51fa4bee066d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:15 -0700 Subject: net_dma: convert to dma_find_channel Use the general-purpose channel allocation provided by dmaengine. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/netdevice.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 41e1224651cf..bac2c458d9b8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1113,9 +1113,6 @@ struct softnet_data struct sk_buff *completion_queue; struct napi_struct backlog; -#ifdef CONFIG_NET_DMA - struct dma_chan *net_dma; -#endif }; DECLARE_PER_CPU(struct softnet_data,softnet_data); -- cgit v1.2.2 From 59b5ec21446b9239d706ab237fb261d525b75e81 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:15 -0700 Subject: dmaengine: introduce dma_request_channel and private channels This interface is primarily for device-to-memory clients which need to search for dma channels with platform-specific characteristics. The prototype is: struct dma_chan *dma_request_channel(dma_cap_mask_t mask, dma_filter_fn filter_fn, void *filter_param); When the optional 'filter_fn' parameter is set to NULL dma_request_channel simply returns the first channel that satisfies the capability mask. Otherwise, when the mask parameter is insufficient for specifying the necessary channel, the filter_fn routine can be used to disposition the available channels in the system. The filter_fn routine is called once for each free channel in the system. Upon seeing a suitable channel filter_fn returns DMA_ACK which flags that channel to be the return value from dma_request_channel. A channel allocated via this interface is exclusive to the caller, until dma_release_channel() is called. To ensure that all channels are not consumed by the general-purpose allocator the DMA_PRIVATE capability is provided to exclude a dma_device from general-purpose (memory-to-memory) consideration. Reviewed-by: Andrew Morton Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 57a43adfc39e..fe40bc020af6 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -89,6 +89,7 @@ enum dma_transaction_type { DMA_MEMSET, DMA_MEMCPY_CRC32C, DMA_INTERRUPT, + DMA_PRIVATE, DMA_SLAVE, }; @@ -223,6 +224,18 @@ struct dma_client; typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, struct dma_chan *chan, enum dma_state state); +/** + * typedef dma_filter_fn - callback filter for dma_request_channel + * @chan: channel to be reviewed + * @filter_param: opaque parameter passed through dma_request_channel + * + * When this optional parameter is specified in a call to dma_request_channel a + * suitable channel is passed to this routine for further dispositioning before + * being returned. Where 'suitable' indicates a non-busy channel that + * satisfies the given capability mask. + */ +typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); + /** * struct dma_client - info on the entity making use of DMA services * @event_callback: func ptr to call when something happens @@ -472,6 +485,9 @@ void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); void dma_issue_pending_all(void); +#define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y) +struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param); +void dma_release_channel(struct dma_chan *chan); /* --- Helper iov-locking functions --- */ -- cgit v1.2.2 From 33df8ca068123457db56c316946a3c0e4ef787d6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:15 -0700 Subject: dmatest: convert to dma_request_channel Replace the client registration infrastructure with a custom loop to poll for channels. Once dma_request_channel returns NULL stop asking for channels. A userspace side effect of this change if that loading the dmatest module before loading a dma driver will result in no channels being found, previously dmatest would get a callback. To facilitate testing in the built-in case dmatest_init is marked as a late_initcall. Another side effect is that channels under test can not be used for any other purpose. Cc: Haavard Skinnemoen Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index fe40bc020af6..6f2d070ac7f3 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -400,6 +400,12 @@ __dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp) set_bit(tx_type, dstp->bits); } +#define dma_cap_zero(mask) __dma_cap_zero(&(mask)) +static inline void __dma_cap_zero(dma_cap_mask_t *dstp) +{ + bitmap_zero(dstp->bits, DMA_TX_TYPE_END); +} + #define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask)) static inline int __dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp) -- cgit v1.2.2 From 74465b4ff9ac1da503025c0a0042e023bfa6505c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:16 -0700 Subject: atmel-mci: convert to dma_request_channel and down-level dma_slave dma_request_channel provides an exclusive channel, so we no longer need to pass slave data through dmaengine. Cc: Haavard Skinnemoen Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 38 -------------------------------------- include/linux/dw_dmac.h | 31 +++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6f2d070ac7f3..d63544cf8a1a 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -96,17 +96,6 @@ enum dma_transaction_type { /* last transaction type for creation of the capabilities mask */ #define DMA_TX_TYPE_END (DMA_SLAVE + 1) -/** - * enum dma_slave_width - DMA slave register access width. - * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses - * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses - * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses - */ -enum dma_slave_width { - DMA_SLAVE_WIDTH_8BIT, - DMA_SLAVE_WIDTH_16BIT, - DMA_SLAVE_WIDTH_32BIT, -}; /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, @@ -132,32 +121,6 @@ enum dma_ctrl_flags { */ typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t; -/** - * struct dma_slave - Information about a DMA slave - * @dev: device acting as DMA slave - * @dma_dev: required DMA master device. If non-NULL, the client can not be - * bound to other masters than this. - * @tx_reg: physical address of data register used for - * memory-to-peripheral transfers - * @rx_reg: physical address of data register used for - * peripheral-to-memory transfers - * @reg_width: peripheral register width - * - * If dma_dev is non-NULL, the client can not be bound to other DMA - * masters than the one corresponding to this device. The DMA master - * driver may use this to determine if there is controller-specific - * data wrapped around this struct. Drivers of platform code that sets - * the dma_dev field must therefore make sure to use an appropriate - * controller-specific dma slave structure wrapping this struct. - */ -struct dma_slave { - struct device *dev; - struct device *dma_dev; - dma_addr_t tx_reg; - dma_addr_t rx_reg; - enum dma_slave_width reg_width; -}; - /** * struct dma_chan_percpu - the per-CPU part of struct dma_chan * @refcount: local_t used for open-coded "bigref" counting @@ -248,7 +211,6 @@ typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filt struct dma_client { dma_event_callback event_callback; dma_cap_mask_t cap_mask; - struct dma_slave *slave; struct list_head global_node; }; diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h index 04d217b442bf..d797dde247f7 100644 --- a/include/linux/dw_dmac.h +++ b/include/linux/dw_dmac.h @@ -21,15 +21,35 @@ struct dw_dma_platform_data { unsigned int nr_channels; }; +/** + * enum dw_dma_slave_width - DMA slave register access width. + * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses + * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses + * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses + */ +enum dw_dma_slave_width { + DW_DMA_SLAVE_WIDTH_8BIT, + DW_DMA_SLAVE_WIDTH_16BIT, + DW_DMA_SLAVE_WIDTH_32BIT, +}; + /** * struct dw_dma_slave - Controller-specific information about a slave - * @slave: Generic information about the slave - * @ctl_lo: Platform-specific initializer for the CTL_LO register + * + * @dma_dev: required DMA master device + * @tx_reg: physical address of data register used for + * memory-to-peripheral transfers + * @rx_reg: physical address of data register used for + * peripheral-to-memory transfers + * @reg_width: peripheral register width * @cfg_hi: Platform-specific initializer for the CFG_HI register * @cfg_lo: Platform-specific initializer for the CFG_LO register */ struct dw_dma_slave { - struct dma_slave slave; + struct device *dma_dev; + dma_addr_t tx_reg; + dma_addr_t rx_reg; + enum dw_dma_slave_width reg_width; u32 cfg_hi; u32 cfg_lo; }; @@ -54,9 +74,4 @@ struct dw_dma_slave { #define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ #define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ -static inline struct dw_dma_slave *to_dw_dma_slave(struct dma_slave *slave) -{ - return container_of(slave, struct dw_dma_slave, slave); -} - #endif /* DW_DMAC_H */ -- cgit v1.2.2 From 209b84a88fe81341b4d8d465acc4a67cb7c3feb3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:17 -0700 Subject: dmaengine: replace dma_async_client_register with dmaengine_get Now that clients no longer need to be notified of channel arrival dma_async_client_register can simply increment the dmaengine_ref_count. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d63544cf8a1a..37d95db156d3 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -318,8 +318,8 @@ struct dma_device { /* --- public DMA engine API --- */ -void dma_async_client_register(struct dma_client *client); -void dma_async_client_unregister(struct dma_client *client); +void dmaengine_get(void); +void dmaengine_put(void); void dma_async_client_chan_request(struct dma_client *client); dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); -- cgit v1.2.2 From aa1e6f1a385eb2b04171ec841f3b760091e4a8ee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:17 -0700 Subject: dmaengine: kill struct dma_client and supporting infrastructure All users have been converted to either the general-purpose allocator, dma_find_channel, or dma_request_channel. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 50 +---------------------------------------------- 1 file changed, 1 insertion(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 37d95db156d3..db050e97d2b4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -28,20 +28,6 @@ #include #include -/** - * enum dma_state - resource PNP/power management state - * @DMA_RESOURCE_SUSPEND: DMA device going into low power state - * @DMA_RESOURCE_RESUME: DMA device returning to full power - * @DMA_RESOURCE_AVAILABLE: DMA device available to the system - * @DMA_RESOURCE_REMOVED: DMA device removed from the system - */ -enum dma_state { - DMA_RESOURCE_SUSPEND, - DMA_RESOURCE_RESUME, - DMA_RESOURCE_AVAILABLE, - DMA_RESOURCE_REMOVED, -}; - /** * enum dma_state_client - state of the channel in the client * @DMA_ACK: client would like to use, or was using this channel @@ -170,23 +156,6 @@ struct dma_chan { void dma_chan_cleanup(struct kref *kref); -/* - * typedef dma_event_callback - function pointer to a DMA event callback - * For each channel added to the system this routine is called for each client. - * If the client would like to use the channel it returns '1' to signal (ack) - * the dmaengine core to take out a reference on the channel and its - * corresponding device. A client must not 'ack' an available channel more - * than once. When a channel is removed all clients are notified. If a client - * is using the channel it must 'ack' the removal. A client must not 'ack' a - * removed channel more than once. - * @client - 'this' pointer for the client context - * @chan - channel to be acted upon - * @state - available or removed - */ -struct dma_client; -typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, - struct dma_chan *chan, enum dma_state state); - /** * typedef dma_filter_fn - callback filter for dma_request_channel * @chan: channel to be reviewed @@ -199,21 +168,6 @@ typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, */ typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); -/** - * struct dma_client - info on the entity making use of DMA services - * @event_callback: func ptr to call when something happens - * @cap_mask: only return channels that satisfy the requested capabilities - * a value of zero corresponds to any capability - * @slave: data for preparing slave transfer. Must be non-NULL iff the - * DMA_SLAVE capability is requested. - * @global_node: list_head for global dma_client_list - */ -struct dma_client { - dma_event_callback event_callback; - dma_cap_mask_t cap_mask; - struct list_head global_node; -}; - typedef void (*dma_async_tx_callback)(void *dma_async_param); /** * struct dma_async_tx_descriptor - async transaction descriptor @@ -285,8 +239,7 @@ struct dma_device { int dev_id; struct device *dev; - int (*device_alloc_chan_resources)(struct dma_chan *chan, - struct dma_client *client); + int (*device_alloc_chan_resources)(struct dma_chan *chan); void (*device_free_chan_resources)(struct dma_chan *chan); struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( @@ -320,7 +273,6 @@ struct dma_device { void dmaengine_get(void); void dmaengine_put(void); -void dma_async_client_chan_request(struct dma_client *client); dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, -- cgit v1.2.2 From f27c580c3628d79b17f38976d842a6d7f3616e2e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:18 -0700 Subject: dmaengine: remove 'bigref' infrastructure Reference counting is done at the module level so clients need not worry that a channel will leave while they are actively using dmaengine. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index db050e97d2b4..bca2fc758894 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -142,10 +142,6 @@ struct dma_chan { int chan_id; struct device dev; - struct kref refcount; - int slow_ref; - struct rcu_head rcu; - struct list_head device_node; struct dma_chan_percpu *local; int client_count; @@ -233,9 +229,6 @@ struct dma_device { dma_cap_mask_t cap_mask; int max_xor; - struct kref refcount; - struct completion done; - int dev_id; struct device *dev; -- cgit v1.2.2 From 7dd602510128d7a64b11ff3b7d4f30ac8e3946ce Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:19 -0700 Subject: dmaengine: kill enum dma_state_client DMA_NAK is now useless. We can just use a bool instead. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index bca2fc758894..1419a5094478 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -28,18 +28,6 @@ #include #include -/** - * enum dma_state_client - state of the channel in the client - * @DMA_ACK: client would like to use, or was using this channel - * @DMA_DUP: client has already seen this channel, or is not using this channel - * @DMA_NAK: client does not want to see any more channels - */ -enum dma_state_client { - DMA_ACK, - DMA_DUP, - DMA_NAK, -}; - /** * typedef dma_cookie_t - an opaque DMA cookie * @@ -160,9 +148,10 @@ void dma_chan_cleanup(struct kref *kref); * When this optional parameter is specified in a call to dma_request_channel a * suitable channel is passed to this routine for further dispositioning before * being returned. Where 'suitable' indicates a non-busy channel that - * satisfies the given capability mask. + * satisfies the given capability mask. It returns 'true' to indicate that the + * channel is suitable. */ -typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); +typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); typedef void (*dma_async_tx_callback)(void *dma_async_param); /** -- cgit v1.2.2 From 41d5e59c1299f27983977bcfe3b360600996051c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:21 -0700 Subject: dmaengine: add a release for dma class devices and dependent infrastructure Resolves: WARNING: at drivers/base/core.c:122 device_release+0x4d/0x52() Device 'dma0chan0' does not have a release() function, it is broken and must be fixed. The dma_chan_dev object is introduced to gear-match sysfs kobject and dmaengine channel lifetimes. When a channel is removed access to the sysfs entries return -ENODEV until the kobject can be released. The bulk of the change is updates to existing code to handle the extra layer of indirection between a dma_chan and its struct device. Reported-by: Alexander Beregalov Acked-by: Stephen Hemminger Cc: Haavard Skinnemoen Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 1419a5094478..d6b6bff355f4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -113,7 +113,7 @@ struct dma_chan_percpu { * @device: ptr to the dma device who supplies this channel, always !%NULL * @cookie: last cookie value returned to client * @chan_id: channel ID for sysfs - * @class_dev: class device for sysfs + * @dev: class device for sysfs * @refcount: kref, used in "bigref" slow-mode * @slow_ref: indicates that the DMA channel is free * @rcu: the DMA channel's RCU head @@ -128,7 +128,7 @@ struct dma_chan { /* sysfs */ int chan_id; - struct device dev; + struct dma_chan_dev *dev; struct list_head device_node; struct dma_chan_percpu *local; @@ -136,7 +136,20 @@ struct dma_chan { int table_count; }; -#define to_dma_chan(p) container_of(p, struct dma_chan, dev) +/** + * struct dma_chan_dev - relate sysfs device node to backing channel device + * @chan - driver channel device + * @device - sysfs device + */ +struct dma_chan_dev { + struct dma_chan *chan; + struct device device; +}; + +static inline const char *dma_chan_name(struct dma_chan *chan) +{ + return dev_name(&chan->dev->device); +} void dma_chan_cleanup(struct kref *kref); -- cgit v1.2.2 From 864498aaa9fef69ee166da023d12413a7776342d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:21 -0700 Subject: dmaengine: use idr for registering dma device numbers This brings some predictability to dma device numbers, i.e. an rmmod/insmod cycle may now result in /sys/class/dma/dma0chan0 being restored rather than /sys/class/dma/dma1chan0 appearing. Cc: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d6b6bff355f4..64dea2ab326c 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -140,10 +140,14 @@ struct dma_chan { * struct dma_chan_dev - relate sysfs device node to backing channel device * @chan - driver channel device * @device - sysfs device + * @dev_id - parent dma_device dev_id + * @idr_ref - reference count to gate release of dma_device dev_id */ struct dma_chan_dev { struct dma_chan *chan; struct device device; + int dev_id; + atomic_t *idr_ref; }; static inline const char *dma_chan_name(struct dma_chan *chan) -- cgit v1.2.2 From adf094931ffb25ef4b381559918f1a34181a5273 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 6 Oct 2008 22:46:05 +0200 Subject: PM: Simplify the new suspend/hibernation framework for devices PM: Simplify the new suspend/hibernation framework for devices Following the discussion at the Kernel Summit, simplify the new device PM framework by merging 'struct pm_ops' and 'struct pm_ext_ops' and removing pointers to 'struct pm_ext_ops' from 'struct platform_driver' and 'struct pci_driver'. After this change, the suspend/hibernation callbacks will only reside in 'struct device_driver' as well as at the bus type/ device class/device type level. Accordingly, PCI and platform device drivers are now expected to put their suspend/hibernation callbacks into the 'struct device_driver' embedded in 'struct pci_driver' or 'struct platform_driver', respectively. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Jesse Barnes Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 8 ++--- include/linux/pci.h | 1 - include/linux/platform_device.h | 1 - include/linux/pm.h | 76 ++++++++++++++--------------------------- 4 files changed, 29 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 1a3686d15f98..4a520051c315 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -65,7 +65,7 @@ struct bus_type { int (*resume_early)(struct device *dev); int (*resume)(struct device *dev); - struct pm_ext_ops *pm; + struct dev_pm_ops *pm; struct bus_type_private *p; }; @@ -133,7 +133,7 @@ struct device_driver { int (*resume) (struct device *dev); struct attribute_group **groups; - struct pm_ops *pm; + struct dev_pm_ops *pm; struct driver_private *p; }; @@ -198,7 +198,7 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); - struct pm_ops *pm; + struct dev_pm_ops *pm; struct class_private *p; }; @@ -291,7 +291,7 @@ struct device_type { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); - struct pm_ops *pm; + struct dev_pm_ops *pm; }; /* interface for exporting device attributes */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 03b0b8c3c81b..4bb156ba854a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -421,7 +421,6 @@ struct pci_driver { int (*resume_early) (struct pci_dev *dev); int (*resume) (struct pci_dev *dev); /* Device woken up */ void (*shutdown) (struct pci_dev *dev); - struct pm_ext_ops *pm; struct pci_error_handlers *err_handler; struct device_driver driver; struct pci_dynids dynids; diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 4b8cc6a32479..9a342699c607 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -55,7 +55,6 @@ struct platform_driver { int (*suspend_late)(struct platform_device *, pm_message_t state); int (*resume_early)(struct platform_device *); int (*resume)(struct platform_device *); - struct pm_ext_ops *pm; struct device_driver driver; }; diff --git a/include/linux/pm.h b/include/linux/pm.h index 42de4003c4ee..5785666d0cc4 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -41,7 +41,7 @@ typedef struct pm_message { } pm_message_t; /** - * struct pm_ops - device PM callbacks + * struct dev_pm_ops - device PM callbacks * * Several driver power state transitions are externally visible, affecting * the state of pending I/O queues and (for drivers that touch hardware) @@ -126,46 +126,6 @@ typedef struct pm_message { * On most platforms, there are no restrictions on availability of * resources like clocks during @restore(). * - * All of the above callbacks, except for @complete(), return error codes. - * However, the error codes returned by the resume operations, @resume(), - * @thaw(), and @restore(), do not cause the PM core to abort the resume - * transition during which they are returned. The error codes returned in - * that cases are only printed by the PM core to the system logs for debugging - * purposes. Still, it is recommended that drivers only return error codes - * from their resume methods in case of an unrecoverable failure (i.e. when the - * device being handled refuses to resume and becomes unusable) to allow us to - * modify the PM core in the future, so that it can avoid attempting to handle - * devices that failed to resume and their children. - * - * It is allowed to unregister devices while the above callbacks are being - * executed. However, it is not allowed to unregister a device from within any - * of its own callbacks. - */ - -struct pm_ops { - int (*prepare)(struct device *dev); - void (*complete)(struct device *dev); - int (*suspend)(struct device *dev); - int (*resume)(struct device *dev); - int (*freeze)(struct device *dev); - int (*thaw)(struct device *dev); - int (*poweroff)(struct device *dev); - int (*restore)(struct device *dev); -}; - -/** - * struct pm_ext_ops - extended device PM callbacks - * - * Some devices require certain operations related to suspend and hibernation - * to be carried out with interrupts disabled. Thus, 'struct pm_ext_ops' below - * is defined, adding callbacks to be executed with interrupts disabled to - * 'struct pm_ops'. - * - * The following callbacks included in 'struct pm_ext_ops' are executed with - * the nonboot CPUs switched off and with interrupts disabled on the only - * functional CPU. They also are executed with the PM core list of devices - * locked, so they must NOT unregister any devices. - * * @suspend_noirq: Complete the operations of ->suspend() by carrying out any * actions required for suspending the device that need interrupts to be * disabled @@ -190,18 +150,32 @@ struct pm_ops { * actions required for restoring the operations of the device that need * interrupts to be disabled * - * All of the above callbacks return error codes, but the error codes returned - * by the resume operations, @resume_noirq(), @thaw_noirq(), and - * @restore_noirq(), do not cause the PM core to abort the resume transition - * during which they are returned. The error codes returned in that cases are - * only printed by the PM core to the system logs for debugging purposes. - * Still, as stated above, it is recommended that drivers only return error - * codes from their resume methods if the device being handled fails to resume - * and is not usable any more. + * All of the above callbacks, except for @complete(), return error codes. + * However, the error codes returned by the resume operations, @resume(), + * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq() do + * not cause the PM core to abort the resume transition during which they are + * returned. The error codes returned in that cases are only printed by the PM + * core to the system logs for debugging purposes. Still, it is recommended + * that drivers only return error codes from their resume methods in case of an + * unrecoverable failure (i.e. when the device being handled refuses to resume + * and becomes unusable) to allow us to modify the PM core in the future, so + * that it can avoid attempting to handle devices that failed to resume and + * their children. + * + * It is allowed to unregister devices while the above callbacks are being + * executed. However, it is not allowed to unregister a device from within any + * of its own callbacks. */ -struct pm_ext_ops { - struct pm_ops base; +struct dev_pm_ops { + int (*prepare)(struct device *dev); + void (*complete)(struct device *dev); + int (*suspend)(struct device *dev); + int (*resume)(struct device *dev); + int (*freeze)(struct device *dev); + int (*thaw)(struct device *dev); + int (*poweroff)(struct device *dev); + int (*restore)(struct device *dev); int (*suspend_noirq)(struct device *dev); int (*resume_noirq)(struct device *dev); int (*freeze_noirq)(struct device *dev); -- cgit v1.2.2 From 7f4f5d4516b441d712fa0ffe5380618fb7fc545e Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 17 Nov 2008 11:14:19 -0500 Subject: Fix misspellings in pm.h macros This patch (as1167) fixes some misspellings in various recently-added macros in pm.h. Fortunately these macros are not yet used anywhere. Signed-off-by: Alan Stern Acked-by: Rafael J. Wysocki --- include/linux/pm.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 5785666d0cc4..de2e0a8f6728 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -252,7 +252,7 @@ struct dev_pm_ops { #define PM_EVENT_SLEEP (PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE) #define PM_EVENT_USER_SUSPEND (PM_EVENT_USER | PM_EVENT_SUSPEND) #define PM_EVENT_USER_RESUME (PM_EVENT_USER | PM_EVENT_RESUME) -#define PM_EVENT_REMOTE_WAKEUP (PM_EVENT_REMOTE | PM_EVENT_RESUME) +#define PM_EVENT_REMOTE_RESUME (PM_EVENT_REMOTE | PM_EVENT_RESUME) #define PM_EVENT_AUTO_SUSPEND (PM_EVENT_AUTO | PM_EVENT_SUSPEND) #define PM_EVENT_AUTO_RESUME (PM_EVENT_AUTO | PM_EVENT_RESUME) @@ -265,15 +265,15 @@ struct dev_pm_ops { #define PMSG_THAW ((struct pm_message){ .event = PM_EVENT_THAW, }) #define PMSG_RESTORE ((struct pm_message){ .event = PM_EVENT_RESTORE, }) #define PMSG_RECOVER ((struct pm_message){ .event = PM_EVENT_RECOVER, }) -#define PMSG_USER_SUSPEND ((struct pm_messge) \ +#define PMSG_USER_SUSPEND ((struct pm_message) \ { .event = PM_EVENT_USER_SUSPEND, }) -#define PMSG_USER_RESUME ((struct pm_messge) \ +#define PMSG_USER_RESUME ((struct pm_message) \ { .event = PM_EVENT_USER_RESUME, }) -#define PMSG_REMOTE_RESUME ((struct pm_messge) \ +#define PMSG_REMOTE_RESUME ((struct pm_message) \ { .event = PM_EVENT_REMOTE_RESUME, }) -#define PMSG_AUTO_SUSPEND ((struct pm_messge) \ +#define PMSG_AUTO_SUSPEND ((struct pm_message) \ { .event = PM_EVENT_AUTO_SUSPEND, }) -#define PMSG_AUTO_RESUME ((struct pm_messge) \ +#define PMSG_AUTO_RESUME ((struct pm_message) \ { .event = PM_EVENT_AUTO_RESUME, }) /** -- cgit v1.2.2 From 929d2fa5955ab27aa21fac615b23e0e92e8dc3a0 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 16 Oct 2008 15:51:35 -0600 Subject: driver core: Rearrange struct device for better packing This minor rearrangement saves 16 bytes from sizeof(struct device) according to pahole. Signed-off-by: Matthew Wilcox Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 4a520051c315..4e14fad41430 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -373,9 +373,9 @@ struct device { struct kobject kobj; char bus_id[BUS_ID_SIZE]; /* position on parent bus */ + unsigned uevent_suppress:1; const char *init_name; /* initial name of the device */ struct device_type *type; - unsigned uevent_suppress:1; struct semaphore sem; /* semaphore to synchronize calls to * its driver. @@ -408,12 +408,13 @@ struct device { /* arch specific additions */ struct dev_archdata archdata; + dev_t devt; /* dev_t, creates the sysfs "dev" */ + spinlock_t devres_lock; struct list_head devres_head; struct klist_node knode_class; struct class *class; - dev_t devt; /* dev_t, creates the sysfs "dev" */ struct attribute_group **groups; /* optional groups */ void (*release)(struct device *dev); -- cgit v1.2.2 From 210272a28465a7a31bcd580d2f9529f924965aa5 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 16 Oct 2008 14:57:54 -0600 Subject: driver core: Remove completion from struct klist_node Removing the completion from klist_node reduces its size from 64 bytes to 28 on x86-64. To maintain the semantics of klist_remove(), we add a single list of klist nodes which are pending deletion and scan them. Signed-off-by: Matthew Wilcox Signed-off-by: Greg Kroah-Hartman --- include/linux/klist.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/klist.h b/include/linux/klist.h index 8ea98db223e5..d5a27af9dba5 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -13,7 +13,6 @@ #define _LINUX_KLIST_H #include -#include #include #include @@ -41,7 +40,6 @@ struct klist_node { void *n_klist; /* never access directly */ struct list_head n_node; struct kref n_ref; - struct completion n_removed; }; extern void klist_add_tail(struct klist_node *n, struct klist *k); -- cgit v1.2.2 From 2831fe6f9cc4e16c103504ee09a47a084297c0f3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Dec 2008 12:23:36 -0800 Subject: driver core: create a private portion of struct device This is to be used to move things out of struct device that no code outside of the driver core should ever touch. Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 4e14fad41430..d6d34084fd37 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -28,6 +28,7 @@ #define BUS_ID_SIZE 20 struct device; +struct device_private; struct device_driver; struct driver_private; struct class; @@ -371,6 +372,8 @@ struct device { struct klist_node knode_bus; struct device *parent; + struct device_private *p; + struct kobject kobj; char bus_id[BUS_ID_SIZE]; /* position on parent bus */ unsigned uevent_suppress:1; -- cgit v1.2.2 From 11c3b5c3e08f4d855cbef52883c266b9ab9df879 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Dec 2008 12:24:56 -0800 Subject: driver core: move klist_children into private structure Nothing outside of the driver core should ever touch klist_children, or knode_parent, so move them out of the public eye. Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index d6d34084fd37..60423e687205 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,8 +366,6 @@ struct device_dma_parameters { }; struct device { - struct klist klist_children; - struct klist_node knode_parent; /* node in sibling list */ struct klist_node knode_driver; struct klist_node knode_bus; struct device *parent; -- cgit v1.2.2 From 93e746db183b3bdbbda67900f79b5835f9cb388f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Dec 2008 12:25:49 -0800 Subject: driver core: move knode_driver into private structure Nothing outside of the driver core should ever touch knode_driver, so move it out of the public eye. Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 60423e687205..e3630222c3c1 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,7 +366,6 @@ struct device_dma_parameters { }; struct device { - struct klist_node knode_driver; struct klist_node knode_bus; struct device *parent; -- cgit v1.2.2 From b9daa99ee533578e3f88231e7a16784dcb44ec42 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Dec 2008 12:26:21 -0800 Subject: driver core: move knode_bus into private structure Nothing outside of the driver core should ever touch knode_bus, so move it out of the public eye. Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index e3630222c3c1..e21b5d69d67c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,7 +366,6 @@ struct device_dma_parameters { }; struct device { - struct klist_node knode_bus; struct device *parent; struct device_private *p; -- cgit v1.2.2 From d0d85ff989222f08dd1fa66321fef5567bbc4a7b Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 4 Dec 2008 16:55:47 +0100 Subject: Make DEBUG take precedence over DYNAMIC_PRINTK_DEBUG Statically defined DEBUG should take precedence over dynamically enabled debugging; otherwise adding DEBUG (like, for example, via CONFIG_DEBUG_KOBJECT) does not have the expected result of printing pr_debug() and dev_dbg() messages unconditionally. Signed-off-by: Cornelia Huck Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 8 ++++---- include/linux/kernel.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index e21b5d69d67c..b97a0cf1eb05 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -553,13 +553,13 @@ extern const char *dev_driver_string(const struct device *dev); #define dev_info(dev, format, arg...) \ dev_printk(KERN_INFO , dev , format , ## arg) -#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#if defined(DEBUG) +#define dev_dbg(dev, format, arg...) \ + dev_printk(KERN_DEBUG , dev , format , ## arg) +#elif defined(CONFIG_DYNAMIC_PRINTK_DEBUG) #define dev_dbg(dev, format, ...) do { \ dynamic_dev_dbg(dev, format, ##__VA_ARGS__); \ } while (0) -#elif defined(DEBUG) -#define dev_dbg(dev, format, arg...) \ - dev_printk(KERN_DEBUG , dev , format , ## arg) #else #define dev_dbg(dev, format, arg...) \ ({ if (0) dev_printk(KERN_DEBUG, dev, format, ##arg); 0; }) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ca9ff6411dfa..d242fe1381fd 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -349,13 +349,13 @@ static inline char *pack_hex_byte(char *buf, u8 byte) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* If you are writing a driver, please use dev_dbg instead */ -#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#if defined(DEBUG) +#define pr_debug(fmt, ...) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#elif defined(CONFIG_DYNAMIC_PRINTK_DEBUG) #define pr_debug(fmt, ...) do { \ dynamic_pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) -#elif defined(DEBUG) -#define pr_debug(fmt, ...) \ - printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug(fmt, ...) \ ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; }) -- cgit v1.2.2 From 0aa0dc41bfd993491c2344870eee7a3b218551fb Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Mon, 15 Dec 2008 12:58:26 +0000 Subject: driver core: add root_device_register() Add support for allocating root device objects which group device objects under /sys/devices directories. Also add a sysfs 'module' symlink which points to the owner of the root device object. This symlink will be used in virtio to allow userspace to determine which virtio bus implementation a given device is associated with. [Includes suggestions from Cornelia Huck] Signed-off-by: Mark McLoughlin Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index b97a0cf1eb05..7d9da4b4993f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -482,6 +482,17 @@ extern struct device *device_find_child(struct device *dev, void *data, extern int device_rename(struct device *dev, char *new_name); extern int device_move(struct device *dev, struct device *new_parent); +/* + * Root device objects for grouping under /sys/devices + */ +extern struct device *__root_device_register(const char *name, + struct module *owner); +static inline struct device *root_device_register(const char *name) +{ + return __root_device_register(name, THIS_MODULE); +} +extern void root_device_unregister(struct device *root); + /* * Manual binding of a device to driver. See drivers/base/bus.c * for information on use. -- cgit v1.2.2 From 475b44c19913b877537c8bc19799f75b0b142641 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 6 Jan 2009 10:44:38 -0800 Subject: mtd: struct device - replace bus_id with dev_name(), dev_set_name() CC: David Woodhouse Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/mtd/concat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h index c02f3d264ecf..e80c674daeb3 100644 --- a/include/linux/mtd/concat.h +++ b/include/linux/mtd/concat.h @@ -13,7 +13,7 @@ struct mtd_info *mtd_concat_create( struct mtd_info *subdev[], /* subdevices to concatenate */ int num_devs, /* number of subdevices */ - char *name); /* name for the new device */ + const char *name); /* name for the new device */ void mtd_concat_destroy(struct mtd_info *mtd); -- cgit v1.2.2 From e70c412ee45332db2636a8f5a35a0685efb0e4aa Mon Sep 17 00:00:00 2001 From: "Hans J. Koch" Date: Sat, 6 Dec 2008 02:23:13 +0100 Subject: UIO: Pass information about ioports to userspace (V2) Devices sometimes have memory where all or parts of it can not be mapped to userspace. But it might still be possible to access this memory from userspace by other means. An example are PCI cards that advertise not only mappable memory but also ioport ranges. On x86 architectures, these can be accessed with ioperm, iopl, inb, outb, and friends. Mike Frysinger (CCed) reported a similar problem on Blackfin arch where it doesn't seem to be easy to mmap non-cached memory but it can still be accessed from userspace. This patch allows kernel drivers to pass information about such ports to userspace. Similar to the existing mem[] array, it adds a port[] array to struct uio_info. Each port range is described by start, size, and porttype. If a driver fills in at least one such port range, the UIO core will simply pass this information to userspace by creating a new directory "portio" underneath /sys/class/uio/uioN/. Similar to the "mem" directory, it will contain a subdirectory (portX) for each port range given. Note that UIO simply passes this information to userspace, it performs no action whatsoever with this data. It's userspace's responsibility to obtain access to these ports and to solve arch dependent issues. The "porttype" attribute tells userspace what kind of port it is dealing with. This mechanism could also be used to give userspace information about GPIOs related to a device. You frequently find such hardware in embedded devices, so I added a UIO_PORT_GPIO definition. I'm not really sure if this is a good idea since there are other solutions to this problem, but it won't hurt much anyway. Signed-off-by: Hans J. Koch Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index cdf338d94b7f..20be327bfbb4 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -38,6 +38,24 @@ struct uio_mem { #define MAX_UIO_MAPS 5 +struct uio_portio; + +/** + * struct uio_port - description of a UIO port region + * @start: start of port region + * @size: size of port region + * @porttype: type of port (see UIO_PORT_* below) + * @portio: for use by the UIO core only. + */ +struct uio_port { + unsigned long start; + unsigned long size; + int porttype; + struct uio_portio *portio; +}; + +#define MAX_UIO_PORT_REGIONS 5 + struct uio_device; /** @@ -46,6 +64,7 @@ struct uio_device; * @name: device name * @version: device driver version * @mem: list of mappable memory regions, size==0 for end of list + * @port: list of port regions, size==0 for end of list * @irq: interrupt number or UIO_IRQ_CUSTOM * @irq_flags: flags for request_irq() * @priv: optional private data @@ -60,6 +79,7 @@ struct uio_info { char *name; char *version; struct uio_mem mem[MAX_UIO_MAPS]; + struct uio_port port[MAX_UIO_PORT_REGIONS]; long irq; unsigned long irq_flags; void *priv; @@ -92,4 +112,10 @@ extern void uio_event_notify(struct uio_info *info); #define UIO_MEM_LOGICAL 2 #define UIO_MEM_VIRTUAL 3 +/* defines for uio_port->porttype */ +#define UIO_PORT_NONE 0 +#define UIO_PORT_X86 1 +#define UIO_PORT_GPIO 2 +#define UIO_PORT_OTHER 3 + #endif /* _LINUX_UIO_DRIVER_H_ */ -- cgit v1.2.2 From b8ac9fc0e8cda9f9776019c5b0464b0c6d2d4c90 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 12 Dec 2008 11:44:21 +0100 Subject: uio: make uio_info's name and version const These are only ever assigned constant strings and never modified. This was noticed because Wolfram Sang needed to cast the result of of_get_property() in order to assign it to the name field of a struct uio_info. Signed-off-by: Stephen Rothwell Signed-off-by: Hans J. Koch Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 20be327bfbb4..a0bb6bd2e5c1 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -76,8 +76,8 @@ struct uio_device; */ struct uio_info { struct uio_device *uio_dev; - char *name; - char *version; + const char *name; + const char *version; struct uio_mem mem[MAX_UIO_MAPS]; struct uio_port port[MAX_UIO_PORT_REGIONS]; long irq; -- cgit v1.2.2 From 96e93eab20337d063c70d537bd7bc70d90e04fa3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 6 Jan 2009 10:49:34 -0800 Subject: gro: Add internal interfaces for VLAN Previously GRO's only entry point from the outside is through napi_gro_receive and napi_gro_frags. These interfaces are for device drivers. This patch rearranges things to provide a new set of interfaces for VLANs. These interfaces are for internal use only. The VLAN code itself can then provide a set of entry points for device drivers. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c28bbba3c23d..114091be8872 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1373,8 +1373,14 @@ extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); extern void napi_gro_flush(struct napi_struct *napi); +extern int dev_gro_receive(struct napi_struct *napi, + struct sk_buff *skb); extern int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); +extern void napi_reuse_skb(struct napi_struct *napi, + struct sk_buff *skb); +extern struct sk_buff * napi_fraginfo_skb(struct napi_struct *napi, + struct napi_gro_fraginfo *info); extern int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info); extern void netif_nit_deliver(struct sk_buff *skb); -- cgit v1.2.2 From e1c096e251e52773afeffbbcb74d0a072be47ea3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 6 Jan 2009 10:50:09 -0800 Subject: vlan: Add GRO interfaces This patch adds GRO interfaces for hardware-assisted VLAN reception. With this in place we're now at parity with LRO as far as the interface is concerned. That is, you can now take any LRO driver and convert it over to GRO. As the CB memory clashes with GRO's use of CB, I've removed it entirely by storing dev in skb->dev. This is OK because VLAN gets called first thing in netif_receive_skb and skb->dev is not used in between us calling netif_rx and netif_receive_skb getting called. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a5cb0c3f6dcf..f8ff918c208f 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -115,6 +115,11 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); extern int vlan_hwaccel_do_receive(struct sk_buff *skb); +extern int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb); +extern int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, + struct napi_gro_fraginfo *info); #else static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) @@ -140,6 +145,20 @@ static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) { return 0; } + +static inline int vlan_gro_receive(struct napi_struct *napi, + struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb) +{ + return NET_RX_DROP; +} + +static inline int vlan_gro_frags(struct napi_struct *napi, + struct vlan_group *grp, unsigned int vlan_tci, + struct napi_gro_fraginfo *info) +{ + return NET_RX_DROP; +} #endif /** -- cgit v1.2.2 From 1fa17d4ba43d7e5aab5e90777b07da06524f6748 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 6 Jan 2009 11:07:54 -0800 Subject: can: omit unneeded skb_clone() calls The AF_CAN core delivered always cloned sk_buffs to the AF_CAN protocols, although this was _only_ needed by the can-raw protocol. With this (additionally documented) change, the AF_CAN core calls the callback functions of the registered AF_CAN protocols with the original (uncloned) sk_buff pointer and let's the can-raw protocol do the skb_clone() itself which omits all unneeded skb_clone() calls for other AF_CAN protocols. Signed-off-by: Oliver Hartkopp Signed-off-by: Urs Thuermann Signed-off-by: David S. Miller --- include/linux/can/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index f50785ad4781..25085cbadcfc 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -19,7 +19,7 @@ #include #include -#define CAN_VERSION "20081130" +#define CAN_VERSION "20090105" /* increment this number each time you change some user-space interface */ #define CAN_ABI_VERSION "8" -- cgit v1.2.2 From 29881c4502ba05f46bc12ae8053d4e08d7e2615c Mon Sep 17 00:00:00 2001 From: James Morris Date: Wed, 7 Jan 2009 09:21:54 +1100 Subject: Revert "CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #2]" This reverts commit 14eaddc967b16017d4a1a24d2be6c28ecbe06ed8. David has a better version to come. --- include/linux/capability.h | 17 ++-------------- include/linux/security.h | 49 +++++++++------------------------------------- 2 files changed, 11 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 5b8a13214451..e22f48c2a46f 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -529,21 +529,8 @@ extern const kernel_cap_t __cap_init_eff_set; * * Note that this does not set PF_SUPERPRIV on the task. */ -#define has_capability(t, cap) (security_task_capable((t), (cap)) == 0) - -/** - * has_capability_noaudit - Determine if a task has a superior capability available (unaudited) - * @t: The task in question - * @cap: The capability to be tested for - * - * Return true if the specified task has the given superior capability - * currently in effect, false if not, but don't write an audit message for the - * check. - * - * Note that this does not set PF_SUPERPRIV on the task. - */ -#define has_capability_noaudit(t, cap) \ - (security_task_capable_noaudit((t), (cap)) == 0) +#define has_capability(t, cap) (security_capable((t), (cap)) == 0) +#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0) extern int capable(int cap); diff --git a/include/linux/security.h b/include/linux/security.h index 76989b8bc34f..3416cb85e77b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -48,9 +48,7 @@ struct audit_krule; * These functions are in security/capability.c and are used * as the default capabilities functions */ -extern int cap_capable(int cap, int audit); -extern int cap_task_capable(struct task_struct *tsk, const struct cred *cred, - int cap, int audit); +extern int cap_capable(struct task_struct *tsk, int cap, int audit); extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); @@ -1197,18 +1195,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @permitted contains the permitted capability set. * Return 0 and update @new if permission is granted. * @capable: - * Check whether the current process has the @cap capability in its - * subjective/effective credentials. - * @cap contains the capability . - * @audit: Whether to write an audit message or not - * Return 0 if the capability is granted for @tsk. - * @task_capable: - * Check whether the @tsk process has the @cap capability in its - * objective/real credentials. + * Check whether the @tsk process has the @cap capability. * @tsk contains the task_struct for the process. - * @cred contains the credentials to use. * @cap contains the capability . - * @audit: Whether to write an audit message or not * Return 0 if the capability is granted for @tsk. * @acct: * Check permission before enabling or disabling process accounting. If @@ -1301,9 +1290,7 @@ struct security_operations { const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); - int (*capable) (int cap, int audit); - int (*task_capable) (struct task_struct *tsk, const struct cred *cred, - int cap, int audit); + int (*capable) (struct task_struct *tsk, int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); int (*quotactl) (int cmds, int type, int id, struct super_block *sb); @@ -1569,9 +1556,8 @@ int security_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -int security_capable(int cap); -int security_task_capable(struct task_struct *tsk, int cap); -int security_task_capable_noaudit(struct task_struct *tsk, int cap); +int security_capable(struct task_struct *tsk, int cap); +int security_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); @@ -1768,31 +1754,14 @@ static inline int security_capset(struct cred *new, return cap_capset(new, old, effective, inheritable, permitted); } -static inline int security_capable(int cap) +static inline int security_capable(struct task_struct *tsk, int cap) { - return cap_capable(cap, SECURITY_CAP_AUDIT); + return cap_capable(tsk, cap, SECURITY_CAP_AUDIT); } -static inline int security_task_capable(struct task_struct *tsk, int cap) +static inline int security_capable_noaudit(struct task_struct *tsk, int cap) { - int ret; - - rcu_read_lock(); - ret = cap_task_capable(tsk, __task_cred(tsk), cap, SECURITY_CAP_AUDIT); - rcu_read_unlock(); - return ret; -} - -static inline -int security_task_capable_noaudit(struct task_struct *tsk, int cap) -{ - int ret; - - rcu_read_lock(); - ret = cap_task_capable(tsk, __task_cred(tsk), cap, - SECURITY_CAP_NOAUDIT); - rcu_read_unlock(); - return ret; + return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT); } static inline int security_acct(struct file *file) -- cgit v1.2.2 From 3699c53c485bf0168e6500d0ed18bf931584dd7c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 6 Jan 2009 22:27:01 +0000 Subject: CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3] Fix a regression in cap_capable() due to: commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Author: David Howells Date: Fri Nov 14 10:39:26 2008 +1100 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change passes the set of credentials to be tested down into the commoncap and SELinux code. The security functions called by capable() and has_capability() select the appropriate set of credentials from the process being checked. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include #include #include #include #include #include #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells Tested-by: J. Bruce Fields Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/capability.h | 17 +++++++++++++++-- include/linux/security.h | 41 ++++++++++++++++++++++++++++++++--------- 2 files changed, 47 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index e22f48c2a46f..02bdb768d43b 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -529,8 +529,21 @@ extern const kernel_cap_t __cap_init_eff_set; * * Note that this does not set PF_SUPERPRIV on the task. */ -#define has_capability(t, cap) (security_capable((t), (cap)) == 0) -#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0) +#define has_capability(t, cap) (security_real_capable((t), (cap)) == 0) + +/** + * has_capability_noaudit - Determine if a task has a superior capability available (unaudited) + * @t: The task in question + * @cap: The capability to be tested for + * + * Return true if the specified task has the given superior capability + * currently in effect, false if not, but don't write an audit message for the + * check. + * + * Note that this does not set PF_SUPERPRIV on the task. + */ +#define has_capability_noaudit(t, cap) \ + (security_real_capable_noaudit((t), (cap)) == 0) extern int capable(int cap); diff --git a/include/linux/security.h b/include/linux/security.h index 3416cb85e77b..f9c390494f18 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -48,7 +48,8 @@ struct audit_krule; * These functions are in security/capability.c and are used * as the default capabilities functions */ -extern int cap_capable(struct task_struct *tsk, int cap, int audit); +extern int cap_capable(struct task_struct *tsk, const struct cred *cred, + int cap, int audit); extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); @@ -1195,9 +1196,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @permitted contains the permitted capability set. * Return 0 and update @new if permission is granted. * @capable: - * Check whether the @tsk process has the @cap capability. + * Check whether the @tsk process has the @cap capability in the indicated + * credentials. * @tsk contains the task_struct for the process. + * @cred contains the credentials to use. * @cap contains the capability . + * @audit: Whether to write an audit message or not * Return 0 if the capability is granted for @tsk. * @acct: * Check permission before enabling or disabling process accounting. If @@ -1290,7 +1294,8 @@ struct security_operations { const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); - int (*capable) (struct task_struct *tsk, int cap, int audit); + int (*capable) (struct task_struct *tsk, const struct cred *cred, + int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); int (*quotactl) (int cmds, int type, int id, struct super_block *sb); @@ -1556,8 +1561,9 @@ int security_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -int security_capable(struct task_struct *tsk, int cap); -int security_capable_noaudit(struct task_struct *tsk, int cap); +int security_capable(int cap); +int security_real_capable(struct task_struct *tsk, int cap); +int security_real_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); @@ -1754,14 +1760,31 @@ static inline int security_capset(struct cred *new, return cap_capset(new, old, effective, inheritable, permitted); } -static inline int security_capable(struct task_struct *tsk, int cap) +static inline int security_capable(int cap) { - return cap_capable(tsk, cap, SECURITY_CAP_AUDIT); + return cap_capable(current, current_cred(), cap, SECURITY_CAP_AUDIT); } -static inline int security_capable_noaudit(struct task_struct *tsk, int cap) +static inline int security_real_capable(struct task_struct *tsk, int cap) { - return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT); + int ret; + + rcu_read_lock(); + ret = cap_capable(tsk, __task_cred(tsk), cap, SECURITY_CAP_AUDIT); + rcu_read_unlock(); + return ret; +} + +static inline +int security_real_capable_noaudit(struct task_struct *tsk, int cap) +{ + int ret; + + rcu_read_lock(); + ret = cap_capable(tsk, __task_cred(tsk), cap, + SECURITY_CAP_NOAUDIT); + rcu_read_unlock(); + return ret; } static inline int security_acct(struct file *file) -- cgit v1.2.2 From 08fba69986e20c1c9e5fe2e6064d146cc4f42480 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 6 Jan 2009 14:38:53 -0800 Subject: mm: report the pagesize backing a VMA in /proc/pid/smaps It is useful to verify a hugepage-aware application is using the expected pagesizes for its memory regions. This patch creates an entry called KernelPageSize in /proc/pid/smaps that is the size of page used by the kernel to back a VMA. The entry is not called PageSize as it is possible the MMU uses a different size. This extension should not break any sensible parser that skips lines containing unrecognised information. Signed-off-by: Mel Gorman Acked-by: "KOSAKI Motohiro" Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e1c8afc002c0..648e1e25979e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -233,6 +233,8 @@ static inline unsigned long huge_page_size(struct hstate *h) return (unsigned long)PAGE_SIZE << h->order; } +extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); + static inline unsigned long huge_page_mask(struct hstate *h) { return h->mask; @@ -273,6 +275,7 @@ struct hstate {}; #define hstate_inode(i) NULL #define huge_page_size(h) PAGE_SIZE #define huge_page_mask(h) PAGE_MASK +#define vma_kernel_pagesize(v) PAGE_SIZE #define huge_page_order(h) 0 #define huge_page_shift(h) PAGE_SHIFT static inline unsigned int pages_per_huge_page(struct hstate *h) -- cgit v1.2.2 From 3340289ddf29ca75c3acfb3a6b72f234b2f74d5c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 6 Jan 2009 14:38:54 -0800 Subject: mm: report the MMU pagesize in /proc/pid/smaps The KernelPageSize entry in /proc/pid/smaps is the pagesize used by the kernel to back a VMA. This matches the size used by the MMU in the majority of cases. However, one counter-example occurs on PPC64 kernels whereby a kernel using 64K as a base pagesize may still use 4K pages for the MMU on older processor. To distinguish, this patch reports MMUPageSize as the pagesize used by the MMU in /proc/pid/smaps. Signed-off-by: Mel Gorman Cc: "KOSAKI Motohiro" Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 648e1e25979e..f1d2fba19ea0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -235,6 +235,8 @@ static inline unsigned long huge_page_size(struct hstate *h) extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); +extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); + static inline unsigned long huge_page_mask(struct hstate *h) { return h->mask; @@ -276,6 +278,7 @@ struct hstate {}; #define huge_page_size(h) PAGE_SIZE #define huge_page_mask(h) PAGE_MASK #define vma_kernel_pagesize(v) PAGE_SIZE +#define vma_mmu_pagesize(v) PAGE_SIZE #define huge_page_order(h) 0 #define huge_page_shift(h) PAGE_SHIFT static inline unsigned int pages_per_huge_page(struct hstate *h) -- cgit v1.2.2 From 1c0fe6e3bda0464728c23c8d84aa47567e8b716c Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:38:59 -0800 Subject: mm: invoke oom-killer from page fault Rather than have the pagefault handler kill a process directly if it gets a VM_FAULT_OOM, have it call into the OOM killer. With increasingly sophisticated oom behaviour (cpusets, memory cgroups, oom killing throttling, oom priority adjustment or selective disabling, panic on oom, etc), it's silly to unconditionally kill the faulting process at page fault time. Create a hook for pagefault oom path to call into instead. Only converted x86 and uml so far. [akpm@linux-foundation.org: make __out_of_memory() static] [akpm@linux-foundation.org: fix comment] Signed-off-by: Nick Piggin Cc: Jeff Dike Acked-by: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index aaa8b843be28..4a3d28c86443 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -717,6 +717,11 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) +/* + * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. + */ +extern void pagefault_out_of_memory(void); + #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) extern void show_free_areas(void); -- cgit v1.2.2 From 75aa199410359dc5fbcf9025ff7af98a9d20f0d5 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 6 Jan 2009 14:39:01 -0800 Subject: oom: print triggering task's cpuset and mems allowed When cpusets are enabled, it's necessary to print the triggering task's set of allowable nodes so the subsequently printed meminfo can be interpreted correctly. We also print the task's cpuset name for informational purposes. [rientjes@google.com: task lock current before dereferencing cpuset] Cc: Paul Menage Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 8e540d32c9fe..51ea2bdea0f9 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -78,6 +78,8 @@ extern int current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); +extern void cpuset_print_task_mems_allowed(struct task_struct *p); + #else /* !CONFIG_CPUSETS */ static inline int cpuset_init_early(void) { return 0; } @@ -159,6 +161,10 @@ static inline void rebuild_sched_domains(void) partition_sched_domains(1, NULL, NULL); } +static inline void cpuset_print_task_mems_allowed(struct task_struct *p) +{ +} + #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ -- cgit v1.2.2 From c04fc586c1a480ba198f03ae7b6cbd7b57380b91 Mon Sep 17 00:00:00 2001 From: Gary Hade Date: Tue, 6 Jan 2009 14:39:14 -0800 Subject: mm: show node to memory section relationship with symlinks in sysfs Show node to memory section relationship with symlinks in sysfs Add /sys/devices/system/node/nodeX/memoryY symlinks for all the memory sections located on nodeX. For example: /sys/devices/system/node/node1/memory135 -> ../../memory/memory135 indicates that memory section 135 resides on node1. Also revises documentation to cover this change as well as updating Documentation/ABI/testing/sysfs-devices-memory to include descriptions of memory hotremove files 'phys_device', 'phys_index', and 'state' that were previously not described there. In addition to it always being a good policy to provide users with the maximum possible amount of physical location information for resources that can be hot-added and/or hot-removed, the following are some (but likely not all) of the user benefits provided by this change. Immediate: - Provides information needed to determine the specific node on which a defective DIMM is located. This will reduce system downtime when the node or defective DIMM is swapped out. - Prevents unintended onlining of a memory section that was previously offlined due to a defective DIMM. This could happen during node hot-add when the user or node hot-add assist script onlines _all_ offlined sections due to user or script inability to identify the specific memory sections located on the hot-added node. The consequences of reintroducing the defective memory could be ugly. - Provides information needed to vary the amount and distribution of memory on specific nodes for testing or debugging purposes. Future: - Will provide information needed to identify the memory sections that need to be offlined prior to physical removal of a specific node. Symlink creation during boot was tested on 2-node x86_64, 2-node ppc64, and 2-node ia64 systems. Symlink creation during physical memory hot-add tested on a 2-node x86_64 system. Signed-off-by: Gary Hade Signed-off-by: Badari Pulavarty Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 6 +++--- include/linux/memory_hotplug.h | 2 +- include/linux/node.h | 13 +++++++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index 36c82c9e6ea7..3fdc10806d31 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -79,14 +79,14 @@ static inline int memory_notify(unsigned long val, void *v) #else extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); -extern int register_new_memory(struct mem_section *); +extern int register_new_memory(int, struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); extern int memory_notify(unsigned long val, void *v); +extern struct memory_block *find_memory_block(struct mem_section *); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION< Date: Tue, 6 Jan 2009 14:39:15 -0800 Subject: mm: get rid of pagevec_release_nonlru() speculative page references patch (commit: e286781d5f2e9c846e012a39653a166e9d31777d) removed last pagevec_release_nonlru() caller. So this function can be removed now. This patch doesn't have any functional change. Signed-off-by: KOSAKI Motohiro Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index e90a2cb02915..7b2886fa7fdc 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -21,7 +21,6 @@ struct pagevec { }; void __pagevec_release(struct pagevec *pvec); -void __pagevec_release_nonlru(struct pagevec *pvec); void __pagevec_free(struct pagevec *pvec); void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); void pagevec_strip(struct pagevec *pvec); @@ -69,12 +68,6 @@ static inline void pagevec_release(struct pagevec *pvec) __pagevec_release(pvec); } -static inline void pagevec_release_nonlru(struct pagevec *pvec) -{ - if (pagevec_count(pvec)) - __pagevec_release_nonlru(pvec); -} - static inline void pagevec_free(struct pagevec *pvec) { if (pagevec_count(pvec)) -- cgit v1.2.2 From 64cdd548ffe26849d4cd113ac640f60606063b14 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 6 Jan 2009 14:39:16 -0800 Subject: mm: cleanup: remove #ifdef CONFIG_MIGRATION #ifdef in *.c file decrease source readability a bit. removing is better. This patch doesn't have any functional change. Signed-off-by: KOSAKI Motohiro Cc: Christoph Lameter Cc: Mel Gorman Cc: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3f34005068d4..527602cdea1c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -7,6 +7,8 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); #ifdef CONFIG_MIGRATION +#define PAGE_MIGRATION 1 + extern int putback_lru_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *); @@ -20,6 +22,8 @@ extern int migrate_vmas(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, unsigned long flags); #else +#define PAGE_MIGRATION 0 + static inline int putback_lru_pages(struct list_head *l) { return 0; } static inline int migrate_pages(struct list_head *l, new_page_t x, unsigned long private) { return -ENOSYS; } -- cgit v1.2.2 From e5991371ee0d1c0ce19e133c6f9075b49c5b4ae8 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:22 -0800 Subject: mm: remove cgroup_mm_owner_callbacks cgroup_mm_owner_callbacks() was brought in to support the memrlimit controller, but sneaked into mainline ahead of it. That controller has now been shelved, and the mm_owner_changed() args were inadequate for it anyway (they needed an mm pointer instead of a task pointer). Remove the dead code, and restore mm_update_next_owner() locking to how it was before: taking mmap_sem there does nothing for memcontrol.c, now the only user of mm->owner. Signed-off-by: Hugh Dickins Cc: Paul Menage Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1164963c3a85..08b78c09b09a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -329,13 +329,7 @@ struct cgroup_subsys { struct cgroup *cgrp); void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); - /* - * This routine is called with the task_lock of mm->owner held - */ - void (*mm_owner_changed)(struct cgroup_subsys *ss, - struct cgroup *old, - struct cgroup *new, - struct task_struct *p); + int subsys_id; int active; int disabled; @@ -400,9 +394,6 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -void cgroup_mm_owner_callbacks(struct task_struct *old, - struct task_struct *new); - #else /* !CONFIG_CGROUPS */ static inline int cgroup_init_early(void) { return 0; } @@ -420,9 +411,6 @@ static inline int cgroupstats_build(struct cgroupstats *stats, return -EINVAL; } -static inline void cgroup_mm_owner_callbacks(struct task_struct *old, - struct task_struct *new) {} - #endif /* !CONFIG_CGROUPS */ #endif /* _LINUX_CGROUP_H */ -- cgit v1.2.2 From 3c1d43787b48c798f44dc32a6e6deb5ca2da3e68 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:23 -0800 Subject: mm: remove GFP_HIGHUSER_PAGECACHE GFP_HIGHUSER_PAGECACHE is just an alias for GFP_HIGHUSER_MOVABLE, making that harder to track down: remove it, and its out-of-work brothers GFP_NOFS_PAGECACHE and GFP_USER_PAGECACHE. Since we're making that improvement to hotremove_migrate_alloc(), I think we can now also remove one of the "o"s from its comment. Signed-off-by: Hugh Dickins Acked-by: Mel Gorman Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e8003afeffba..dd20cd78faa8 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -69,12 +69,6 @@ struct vm_area_struct; #define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ __GFP_HARDWALL | __GFP_HIGHMEM | \ __GFP_MOVABLE) -#define GFP_NOFS_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_MOVABLE) -#define GFP_USER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ - __GFP_HARDWALL | __GFP_MOVABLE) -#define GFP_HIGHUSER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ - __GFP_HARDWALL | __GFP_HIGHMEM | \ - __GFP_MOVABLE) #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) -- cgit v1.2.2 From 6d91add09f4bad5f4d4233b13faa392f0c4b16be Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:24 -0800 Subject: mm: add Set,ClearPageSwapCache stubs If we add NOOP stubs for SetPageSwapCache() and ClearPageSwapCache(), then we can remove the #ifdef CONFIG_SWAPs from mm/migrate.c. Signed-off-by: Hugh Dickins Acked-by: Christoph Lameter Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b12f93a3c345..628ec0802492 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -228,6 +228,7 @@ PAGEFLAG_FALSE(HighMem) PAGEFLAG(SwapCache, swapcache) #else PAGEFLAG_FALSE(SwapCache) + SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache) #endif #ifdef CONFIG_UNEVICTABLE_LRU -- cgit v1.2.2 From b5934c531849ff4a51ce0f290141efe564290e40 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:25 -0800 Subject: mm: add_active_or_unevictable into rmap lru_cache_add_active_or_unevictable() and page_add_new_anon_rmap() always appear together. Save some symbol table space and some jumping around by removing lru_cache_add_active_or_unevictable(), folding its code into page_add_new_anon_rmap(): like how we add file pages to lru just after adding them to page cache. Remove the nearby "TODO: is this safe?" comments (yes, it is safe), and change page_add_new_anon_rmap()'s address BUG_ON to VM_BUG_ON as originally intended. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Cc: Lee Schermerhorn Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index a3af95b2cb6d..48f309dc5a0c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -174,8 +174,6 @@ extern unsigned int nr_free_pagecache_pages(void); /* linux/mm/swap.c */ extern void __lru_cache_add(struct page *, enum lru_list lru); extern void lru_cache_add_lru(struct page *, enum lru_list lru); -extern void lru_cache_add_active_or_unevictable(struct page *, - struct vm_area_struct *); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); -- cgit v1.2.2 From 2afd1c928f1132b8d0099866e75ce8ad713a1180 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:26 -0800 Subject: mm: make page_lock_anon_vma() static page_lock_anon_vma() and page_unlock_anon_vma() were made available to show_page_path() in vmscan.c; but now that has been removed, make them static in rmap.c again, they're better kept private if possible. Signed-off-by: Hugh Dickins Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 89f0564b10c8..3593b18a07dd 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -63,9 +63,6 @@ void anon_vma_unlink(struct vm_area_struct *); void anon_vma_link(struct vm_area_struct *); void __anon_vma_link(struct vm_area_struct *); -extern struct anon_vma *page_lock_anon_vma(struct page *page); -extern void page_unlock_anon_vma(struct anon_vma *anon_vma); - /* * rmap interfaces called when adding or removing pte of page */ -- cgit v1.2.2 From 364aeb2849789b51bf4b9af2ddd02fee7285c54e Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 6 Jan 2009 14:39:29 -0800 Subject: mm: change dirty limit type specifiers to unsigned long The background dirty and dirty limits are better defined with type specifiers of unsigned long since negative writeback thresholds are not possible. These values, as returned by get_dirty_limits(), are normally compared with ZVC values to determine whether writeback shall commence or be throttled. Such page counts cannot be negative, so declaring the page limits as signed is unnecessary. Acked-by: Peter Zijlstra Cc: Dave Chinner Cc: Christoph Lameter Signed-off-by: David Rientjes Cc: Andrea Righi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index e585657e9831..259e9ea58cab 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -125,8 +125,8 @@ struct file; int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, - struct backing_dev_info *bdi); +void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, + unsigned long *pbdi_dirty, struct backing_dev_info *bdi); void page_writeback_init(void); void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, -- cgit v1.2.2 From 2da02997e08d3efe8174c7a47696e6f7cbe69ba9 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 6 Jan 2009 14:39:31 -0800 Subject: mm: add dirty_background_bytes and dirty_bytes sysctls This change introduces two new sysctls to /proc/sys/vm: dirty_background_bytes and dirty_bytes. dirty_background_bytes is the counterpart to dirty_background_ratio and dirty_bytes is the counterpart to dirty_ratio. With growing memory capacities of individual machines, it's no longer sufficient to specify dirty thresholds as a percentage of the amount of dirtyable memory over the entire system. dirty_background_bytes and dirty_bytes specify quantities of memory, in bytes, that represent the dirty limits for the entire system. If either of these values is set, its value represents the amount of dirty memory that is needed to commence either background or direct writeback. When a `bytes' or `ratio' file is written, its counterpart becomes a function of the written value. For example, if dirty_bytes is written to be 8096, 8K of memory is required to commence direct writeback. dirty_ratio is then functionally equivalent to 8K / the amount of dirtyable memory: dirtyable_memory = free pages + mapped pages + file cache dirty_background_bytes = dirty_background_ratio * dirtyable_memory -or- dirty_background_ratio = dirty_background_bytes / dirtyable_memory AND dirty_bytes = dirty_ratio * dirtyable_memory -or- dirty_ratio = dirty_bytes / dirtyable_memory Only one of dirty_background_bytes and dirty_background_ratio may be specified at a time, and only one of dirty_bytes and dirty_ratio may be specified. When one sysctl is written, the other appears as 0 when read. The `bytes' files operate on a page size granularity since dirty limits are compared with ZVC values, which are in page units. Prior to this change, the minimum dirty_ratio was 5 as implemented by get_dirty_limits() although /proc/sys/vm/dirty_ratio would show any user written value between 0 and 100. This restriction is maintained, but dirty_bytes has a lower limit of only one page. Also prior to this change, the dirty_background_ratio could not equal or exceed dirty_ratio. This restriction is maintained in addition to restricting dirty_background_bytes. If either background threshold equals or exceeds that of the dirty threshold, it is implicitly set to half the dirty threshold. Acked-by: Peter Zijlstra Cc: Dave Chinner Cc: Christoph Lameter Signed-off-by: David Rientjes Cc: Andrea Righi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 259e9ea58cab..bb28c975c1d7 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -107,7 +107,9 @@ void throttle_vm_writeout(gfp_t gfp_mask); /* These are exported to sysctl. */ extern int dirty_background_ratio; +extern unsigned long dirty_background_bytes; extern int vm_dirty_ratio; +extern unsigned long vm_dirty_bytes; extern int dirty_writeback_interval; extern int dirty_expire_interval; extern int vm_highmem_is_dirtyable; @@ -116,9 +118,18 @@ extern int laptop_mode; extern unsigned long determine_dirtyable_memory(void); +extern int dirty_background_ratio_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); +extern int dirty_background_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int dirty_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); struct ctl_table; struct file; -- cgit v1.2.2 From 7b1fe59793e61f826bef053107b57b23954833bb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:34 -0800 Subject: mm: reuse_swap_page replaces can_share_swap_page A good place to free up old swap is where do_wp_page(), or do_swap_page(), is about to redirty the page: the data on disk is then stale and won't be read again; and if we do decide to write the page out later, using the previous swap location makes an unnecessary disk seek very likely. So give can_share_swap_page() the side-effect of delete_from_swap_cache() when it safely can. And can_share_swap_page() was always a misleading name, the more so if it has a side-effect: rename it reuse_swap_page(). Irrelevant cleanup nearby: remove swap_token_default_timeout definition from swap.h: it's used nowhere. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Acked-by: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 48f309dc5a0c..366556c5b148 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -304,7 +304,7 @@ extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); extern sector_t swapdev_block(int, pgoff_t); extern struct swap_info_struct *get_swap_info_struct(unsigned); -extern int can_share_swap_page(struct page *); +extern int reuse_swap_page(struct page *); extern int remove_exclusive_swap_page(struct page *); extern int remove_exclusive_swap_page_ref(struct page *); struct backing_dev_info; @@ -372,8 +372,6 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp) return NULL; } -#define can_share_swap_page(p) (page_mapcount(p) == 1) - static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) { @@ -388,7 +386,7 @@ static inline void delete_from_swap_cache(struct page *page) { } -#define swap_token_default_timeout 0 +#define reuse_swap_page(page) (page_mapcount(page) == 1) static inline int remove_exclusive_swap_page(struct page *p) { -- cgit v1.2.2 From a2c43eed8334e878702fca713b212ae2a11d84b9 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:36 -0800 Subject: mm: try_to_free_swap replaces remove_exclusive_swap_page remove_exclusive_swap_page(): its problem is in living up to its name. It doesn't matter if someone else has a reference to the page (raised page_count); it doesn't matter if the page is mapped into userspace (raised page_mapcount - though that hints it may be worth keeping the swap): all that matters is that there be no more references to the swap (and no writeback in progress). swapoff (try_to_unuse) has been removing pages from swapcache for years, with no concern for page count or page mapcount, and we used to have a comment in lookup_swap_cache() recognizing that: if you go for a page of swapcache, you'll get the right page, but it could have been removed from swapcache by the time you get page lock. So, give up asking for exclusivity: get rid of remove_exclusive_swap_page(), and remove_exclusive_swap_page_ref() and remove_exclusive_swap_page_count() which were spawned for the recent LRU work: replace them by the simpler try_to_free_swap() which just checks page_swapcount(). Similarly, remove the page_count limitation from free_swap_and_count(), but assume that it's worth holding on to the swap if page is mapped and swap nowhere near full. Add a vm_swap_full() test in free_swap_cache()? It would be consistent, but I think we probably have enough for now. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Cc: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 366556c5b148..c3ecd478840e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -305,8 +305,7 @@ extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); extern sector_t swapdev_block(int, pgoff_t); extern struct swap_info_struct *get_swap_info_struct(unsigned); extern int reuse_swap_page(struct page *); -extern int remove_exclusive_swap_page(struct page *); -extern int remove_exclusive_swap_page_ref(struct page *); +extern int try_to_free_swap(struct page *); struct backing_dev_info; /* linux/mm/thrash.c */ @@ -388,12 +387,7 @@ static inline void delete_from_swap_cache(struct page *page) #define reuse_swap_page(page) (page_mapcount(page) == 1) -static inline int remove_exclusive_swap_page(struct page *p) -{ - return 0; -} - -static inline int remove_exclusive_swap_page_ref(struct page *page) +static inline int try_to_free_swap(struct page *page) { return 0; } -- cgit v1.2.2 From ac47b003d03c2a4f28aef1d505b66d24ad191c4f Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:39 -0800 Subject: mm: remove gfp_mask from add_to_swap Remove gfp_mask argument from add_to_swap(): it's misleading because its only caller, shrink_page_list(), is not atomic at that point; and in due course (implementing discard) we'll sometimes want to allocate some memory with GFP_NOIO (as is used in swap_writepage) when allocating swap. No change to the gfp_mask passed down to add_to_swap_cache(): still use __GFP_HIGH without __GFP_WAIT (with nomemalloc and nowarn as before): though it's not obvious if that's the best combination to ask for here. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Cc: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index c3ecd478840e..c38bd157695b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -278,7 +278,7 @@ extern void end_swap_bio_read(struct bio *bio, int err); extern struct address_space swapper_space; #define total_swapcache_pages swapper_space.nrpages extern void show_swap_cache_info(void); -extern int add_to_swap(struct page *, gfp_t); +extern int add_to_swap(struct page *); extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); extern void __delete_from_swap_cache(struct page *); extern void delete_from_swap_cache(struct page *); -- cgit v1.2.2 From 60371d971a3d01afd102f0bbf2681f32ecc31d78 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:40 -0800 Subject: mm: add add_to_swap stub If we add a failing stub for add_to_swap(), then we can remove the #ifdef CONFIG_SWAP from mm/vmscan.c. This was intended as a source cleanup, but looking more closely, it turns out that the !CONFIG_SWAP case was going to keep_locked for an anonymous page, whereas now it goes to the more suitable activate_locked, like the CONFIG_SWAP nr_swap_pages 0 case. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Acked-by: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index c38bd157695b..c0d23ac710d5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -371,6 +371,11 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp) return NULL; } +static inline int add_to_swap(struct page *page) +{ + return 0; +} + static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) { -- cgit v1.2.2 From b962716b459505a8d83aea313fea0abe76749f42 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:41 -0800 Subject: mm: optimize get_scan_ratio for no swap Rik suggests a simplified get_scan_ratio() for !CONFIG_SWAP. Yes, the gcc optimizer gives us that, when nr_swap_pages is #defined as 0L. Move usual declaration to swapfile.c: it never belonged in page_alloc.c. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Acked-by: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index c0d23ac710d5..3a31cc25bd2c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -163,7 +163,6 @@ struct swap_list_t { /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; -extern long nr_swap_pages; extern unsigned int nr_free_buffer_pages(void); extern unsigned int nr_free_pagecache_pages(void); @@ -291,6 +290,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr); /* linux/mm/swapfile.c */ +extern long nr_swap_pages; extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); @@ -331,7 +331,8 @@ static inline void disable_swap_token(void) #else /* CONFIG_SWAP */ -#define total_swap_pages 0 +#define nr_swap_pages 0L +#define total_swap_pages 0L #define total_swapcache_pages 0UL #define si_swapinfo(val) \ -- cgit v1.2.2 From 69beeb1d3428424fbc7546f85e5cd7ac4119c09d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 6 Jan 2009 14:39:46 -0800 Subject: mm: make vread() and vwrite() declaration Sparse output following warnings. mm/vmalloc.c:1436:6: warning: symbol 'vread' was not declared. Should it be static? mm/vmalloc.c:1474:6: warning: symbol 'vwrite' was not declared. Should it be static? However, it is used by /dev/kmem. fixed here. Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 307b88577eaa..506e7620a986 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -97,6 +97,10 @@ extern void unmap_kernel_range(unsigned long addr, unsigned long size); extern struct vm_struct *alloc_vm_area(size_t size); extern void free_vm_area(struct vm_struct *area); +/* for /dev/kmem */ +extern long vread(char *buf, char *addr, unsigned long count); +extern long vwrite(char *buf, char *addr, unsigned long count); + /* * Internals. Dont't use.. */ -- cgit v1.2.2 From 22c6f8fdb31993cf49bdd4a47b64a7002391e1c7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:48 -0800 Subject: swapfile: remove SWP_ACTIVE mask Remove the SWP_ACTIVE mask: it just obscures the SWP_WRITEOK flag. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 3a31cc25bd2c..410c8e473727 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -120,7 +120,6 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ - SWP_ACTIVE = (SWP_USED | SWP_WRITEOK), /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; -- cgit v1.2.2 From ebebbbe904634b0ca1c674457b399f68db5e05b1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:50 -0800 Subject: swapfile: rearrange scan and swap_info Before making functional changes, rearrange scan_swap_map() to simplify subsequent diffs. Actually, there is one functional change in there: leave cluster_nr negative while scanning for a new cluster - resetting it early increased the likelihood that when we have difficulty finding a free cluster, another task may come in and try doing exactly the same - just a waste of cpu. Before making functional changes, rearrange struct swap_info_struct slightly: flags will be needed as an unsigned long (for wait_on_bit), next is a good int to pair with prio, old_block_size is uninteresting so shift it to the end. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 410c8e473727..9cabb8b21aba 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -133,14 +133,14 @@ enum { * The in-memory structure used to track swap areas. */ struct swap_info_struct { - unsigned int flags; + unsigned long flags; int prio; /* swap priority */ + int next; /* next entry on swap list */ struct file *swap_file; struct block_device *bdev; struct list_head extent_list; struct swap_extent *curr_swap_extent; - unsigned old_block_size; - unsigned short * swap_map; + unsigned short *swap_map; unsigned int lowest_bit; unsigned int highest_bit; unsigned int cluster_next; @@ -148,7 +148,7 @@ struct swap_info_struct { unsigned int pages; unsigned int max; unsigned int inuse_pages; - int next; /* next entry on swap list */ + unsigned int old_block_size; }; struct swap_list_t { -- cgit v1.2.2 From 6a6ba83175c029c7820765bae44692266b29e67a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:51 -0800 Subject: swapfile: swapon use discard (trim) When adding swap, all the old data on swap can be forgotten: sys_swapon() discard all but the header page of the swap partition (or every extent but the header of the swap file), to give a solidstate swap device the opportunity to optimize its wear-levelling. If that succeeds, note SWP_DISCARDABLE for later use, and report it with a "D" at the right end of the kernel's "Adding ... swap" message. Perhaps something should be shown in /proc/swaps (swapon -s), but we have to be more cautious before making any addition to that format. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Cc: David Woodhouse Cc: Jens Axboe Cc: Matthew Wilcox Cc: Joern Engel Cc: James Bottomley Cc: Donjun Shin Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 9cabb8b21aba..0b9210ea96c7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -120,6 +120,7 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ + SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; -- cgit v1.2.2 From 7992fde72ce06c73280a1939b7a1e903bc95ef85 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:53 -0800 Subject: swapfile: swap allocation use discard When scan_swap_map() finds a free cluster of swap pages to allocate, discard the old contents of the cluster if the device supports discard. But don't bother when swap is so fragmented that we allocate single pages. Be careful about racing allocations made while we're scanning for a cluster; and hold up allocations made while we're discarding. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Cc: David Woodhouse Cc: Jens Axboe Cc: Matthew Wilcox Cc: Joern Engel Cc: James Bottomley Cc: Donjun Shin Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 0b9210ea96c7..fe79f44c858e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -121,6 +121,7 @@ enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ + SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; @@ -144,6 +145,8 @@ struct swap_info_struct { unsigned short *swap_map; unsigned int lowest_bit; unsigned int highest_bit; + unsigned int lowest_alloc; /* while preparing discard cluster */ + unsigned int highest_alloc; /* while preparing discard cluster */ unsigned int cluster_next; unsigned int cluster_nr; unsigned int pages; -- cgit v1.2.2 From 20137a490f397d9c01fc9fadd83a8d198bda4477 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:54 -0800 Subject: swapfile: swapon randomize if nonrot Swap allocation has always started from the beginning of the swap area; but if we're dealing with a solidstate swap device which can only remap blocks within limited zones, that would sooner wear out the first zone. Therefore sys_swapon() test whether blk_queue is non-rotational, and if so randomize the cluster_next starting position for allocation. If blk_queue is nonrot, note SWP_SOLIDSTATE for later use, and report it with an "SS" at the right end of the kernel's "Adding ... swap" message (so that if it's both nonrot and discardable, "SSD" will be shown there). Perhaps something should be shown in /proc/swaps (swapon -s), but we have to be more cautious before making any addition to that format. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Cc: David Woodhouse Cc: Jens Axboe Cc: Matthew Wilcox Cc: Joern Engel Cc: James Bottomley Cc: Donjun Shin Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index fe79f44c858e..cbf7fbed3dfd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -122,6 +122,7 @@ enum { SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ + SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; -- cgit v1.2.2 From 79f4b7bf393e67bbffec807cc68caaefc72b82ee Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:40:05 -0800 Subject: badpage: simplify page_alloc flag check+clear Simplify the PAGE_FLAGS checking and clearing when freeing and allocating a page: check the same flags as before when freeing, clear ALL the flags (unless PageReserved) when freeing, check ALL flags off when allocating. Signed-off-by: Hugh Dickins Cc: Nick Piggin Cc: Christoph Lameter Cc: Mel Gorman Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 628ec0802492..219a523ecdb0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -373,31 +373,22 @@ static inline void __ClearPageTail(struct page *page) #define __PG_MLOCKED 0 #endif -#define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ - 1 << PG_buddy | 1 << PG_writeback | \ - 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - __PG_UNEVICTABLE | __PG_MLOCKED) - -/* - * Flags checked in bad_page(). Pages on the free list should not have - * these flags set. It they are, there is a problem. - */ -#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \ - 1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked) - /* * Flags checked when a page is freed. Pages being freed should not have * these flags set. It they are, there is a problem. */ -#define PAGE_FLAGS_CHECK_AT_FREE (PAGE_FLAGS | 1 << PG_reserved) +#define PAGE_FLAGS_CHECK_AT_FREE \ + (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ + 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ + 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ + __PG_UNEVICTABLE | __PG_MLOCKED) /* * Flags checked when a page is prepped for return by the page allocator. - * Pages being prepped should not have these flags set. It they are, there - * is a problem. + * Pages being prepped should not have any flags set. It they are set, + * there has been a kernel bug or struct page corruption. */ -#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \ - 1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked) +#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ -- cgit v1.2.2 From 2509ef26db4699a5d9fa876e90ddfc107afcab84 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:40:10 -0800 Subject: badpage: zap print_bad_pte on swap and file Complete zap_pte_range()'s coverage of bad pagetable entries by calling print_bad_pte() on a pte_file in a linear vma and on a bad swap entry. That needs free_swap_and_cache() to tell it, which will also have shown one of those "swap_free" errors (but with much less information). Similar checks in fork's copy_one_pte()? No, that would be more noisy than helpful: we'll see them when parent and child exec or exit. Where do_nonlinear_fault() calls print_bad_pte(): omit !VM_CAN_NONLINEAR case, that could only be a bug in sys_remap_file_pages(), not a bad pte. VM_FAULT_OOM rather than VM_FAULT_SIGBUS? Well, okay, that is consistent with what happens if do_swap_page() operates a bad swap entry; but don't we have patches to be more careful about killing when VM_FAULT_OOM? Signed-off-by: Hugh Dickins Cc: Nick Piggin Cc: Christoph Lameter Cc: Mel Gorman Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index cbf7fbed3dfd..91dee50fe260 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -302,7 +302,7 @@ extern swp_entry_t get_swap_page_of_type(int); extern int swap_duplicate(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern void swap_free(swp_entry_t); -extern void free_swap_and_cache(swp_entry_t); +extern int free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t, struct block_device **); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); @@ -352,14 +352,8 @@ static inline void show_swap_cache_info(void) { } -static inline void free_swap_and_cache(swp_entry_t swp) -{ -} - -static inline int swap_duplicate(swp_entry_t swp) -{ - return 0; -} +#define free_swap_and_cache(swp) is_migration_entry(swp) +#define swap_duplicate(swp) is_migration_entry(swp) static inline void swap_free(swp_entry_t swp) { -- cgit v1.2.2 From edc315fd222497ae4f4b959a9e31ada1e68a4755 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:40:11 -0800 Subject: badpage: remove vma from page_remove_rmap Remove page_remove_rmap()'s vma arg, which was only for the Eeek message. And remove the BUG_ON(page_mapcount(page) == 0) from CONFIG_DEBUG_VM's page_dup_rmap(): we're trying to be more resilient about that than BUGs. Signed-off-by: Hugh Dickins Cc: Nick Piggin Cc: Christoph Lameter Cc: Mel Gorman Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 3593b18a07dd..b35bc0e19cd9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -69,7 +69,7 @@ void __anon_vma_link(struct vm_area_struct *); void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); void page_add_file_rmap(struct page *); -void page_remove_rmap(struct page *, struct vm_area_struct *); +void page_remove_rmap(struct page *); #ifdef CONFIG_DEBUG_VM void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address); -- cgit v1.2.2 From 4f5a99d64c17470a784a6c68064207d82e3e74a5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:40:25 -0800 Subject: fs: remove WB_SYNC_HOLD Remove WB_SYNC_HOLD. The primary motiviation is the design of my anti-starvation code for fsync. It requires taking an inode lock over the sync operation, so we could run into lock ordering problems with multiple inodes. It is possible to take a single global lock to solve the ordering problem, but then that would prevent a future nice implementation of "sync multiple inodes" based on lock order via inode address. Seems like a backward step to remove this, but actually it is busted anyway: we can't use the inode lists for data integrity wait: an inode can be taken off the dirty lists but still be under writeback. In order to satisfy data integrity semantics, we should wait for it to finish writeback, but if we only search the dirty lists, we'll miss it. It would be possible to have a "writeback" list, for sys_sync, I suppose. But why complicate things by prematurely optimise? For unmounting, we could avoid the "livelock avoidance" code, which would be easier, but again premature IMO. Fixing the existing data integrity problem will come next. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index bb28c975c1d7..7300ecdc480c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -30,7 +30,6 @@ static inline int task_is_pdflush(struct task_struct *task) enum writeback_sync_modes { WB_SYNC_NONE, /* Don't wait on anything */ WB_SYNC_ALL, /* Wait on every mapping */ - WB_SYNC_HOLD, /* Hold the inode on sb_dirty for sys_sync() */ }; /* -- cgit v1.2.2 From 856bf4d717feb8c55d4e2f817b71ebb70cfbc67b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:40:26 -0800 Subject: fs: sys_sync fix s_syncing livelock avoidance was breaking data integrity guarantee of sys_sync, by allowing sys_sync to skip writing or waiting for superblocks if there is a concurrent sys_sync happening. This livelock avoidance is much less important now that we don't have the get_super_to_sync() call after every sb that we sync. This was replaced by __put_super_and_need_restart. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index fb59673c60b1..d7eba77f666e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1133,7 +1133,6 @@ struct super_block { struct rw_semaphore s_umount; struct mutex s_lock; int s_count; - int s_syncing; int s_need_sync_fs; atomic_t s_active; #ifdef CONFIG_SECURITY -- cgit v1.2.2 From 901608d9045146aec6f14a7777ea4b1501c379f0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 6 Jan 2009 14:40:29 -0800 Subject: mm: introduce get_mm_hiwater_xxx(), fix taskstats->hiwater_xxx accounting xacct_add_tsk() relies on do_exit()->update_hiwater_xxx() and uses mm->hiwater_xxx directly, this leads to 2 problems: - taskstats_user_cmd() can call fill_pid()->xacct_add_tsk() at any moment before the task exits, so we should check the current values of rss/vm anyway. - do_exit()->update_hiwater_xxx() calls are racy. An exiting thread can be preempted right before mm->hiwater_xxx = new_val, and another thread can use A_LOT of memory and exit in between. When the first thread resumes it can be the last thread in the thread group, in that case we report the wrong hiwater_xxx values which do not take A_LOT into account. Introduce get_mm_hiwater_rss() and get_mm_hiwater_vm() helpers and change xacct_add_tsk() to use them. The first helper will also be used by rusage->ru_maxrss accounting. Kill do_exit()->update_hiwater_xxx() calls. Unless we are going to decrease rss/vm there is no point to update mm->hiwater_xxx, and nobody can look at this mm_struct when exit_mmap() actually unmaps the memory. Signed-off-by: Oleg Nesterov Acked-by: Hugh Dickins Reviewed-by: KOSAKI Motohiro Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 38a3f4b15394..ea415136ac9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -386,6 +386,9 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); (mm)->hiwater_vm = (mm)->total_vm; \ } while (0) +#define get_mm_hiwater_rss(mm) max((mm)->hiwater_rss, get_mm_rss(mm)) +#define get_mm_hiwater_vm(mm) max((mm)->hiwater_vm, (mm)->total_vm) + extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); -- cgit v1.2.2 From ea435467500612636f8f4fb639ff6e76b2496e4b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 6 Jan 2009 14:40:39 -0800 Subject: atomic_t: unify all arch definitions The atomic_t type cannot currently be used in some header files because it would create an include loop with asm/atomic.h. Move the type definition to linux/types.h to break the loop. Signed-off-by: Matthew Wilcox Cc: Huang Ying Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/types.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 121f349cb7ec..3b864f2d9560 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -195,6 +195,16 @@ typedef u32 phys_addr_t; typedef phys_addr_t resource_size_t; +typedef struct { + volatile int counter; +} atomic_t; + +#ifdef CONFIG_64BIT +typedef struct { + volatile long counter; +} atomic64_t; +#endif + struct ustat { __kernel_daddr_t f_tfree; __kernel_ino_t f_tinode; -- cgit v1.2.2 From f1883f86dea84fe47a71a39fc1afccc005915ed8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Jan 2009 14:40:45 -0800 Subject: Remove remaining unwinder code Signed-off-by: Alexey Dobriyan Cc: Gabor Gombas Cc: Jan Beulich Cc: Andi Kleen Cc: Ingo Molnar , Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/module.h | 3 --- include/linux/unwind.h | 68 -------------------------------------------------- 2 files changed, 71 deletions(-) delete mode 100644 include/linux/unwind.h (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 3bfed013350b..03cb93d1865a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -294,9 +294,6 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; - /* The handle returned from unwind_add_table. */ - void *unwind_info; - /* Arch-specific module values */ struct mod_arch_specific arch; diff --git a/include/linux/unwind.h b/include/linux/unwind.h deleted file mode 100644 index 7760860fa170..000000000000 --- a/include/linux/unwind.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _LINUX_UNWIND_H -#define _LINUX_UNWIND_H - -/* - * Copyright (C) 2002-2006 Novell, Inc. - * Jan Beulich - * This code is released under version 2 of the GNU GPL. - * - * A simple API for unwinding kernel stacks. This is used for - * debugging and error reporting purposes. The kernel doesn't need - * full-blown stack unwinding with all the bells and whistles, so there - * is not much point in implementing the full Dwarf2 unwind API. - */ - -struct module; - -struct unwind_frame_info {}; - -static inline void unwind_init(void) {} -static inline void unwind_setup(void) {} - -#ifdef CONFIG_MODULES - -static inline void *unwind_add_table(struct module *mod, - const void *table_start, - unsigned long table_size) -{ - return NULL; -} - -static inline void unwind_remove_table(void *handle, int init_only) -{ -} - -#endif - -static inline int unwind_init_frame_info(struct unwind_frame_info *info, - struct task_struct *tsk, - const struct pt_regs *regs) -{ - return -ENOSYS; -} - -static inline int unwind_init_blocked(struct unwind_frame_info *info, - struct task_struct *tsk) -{ - return -ENOSYS; -} - -static inline int unwind_init_running(struct unwind_frame_info *info, - asmlinkage int (*cb)(struct unwind_frame_info *, - void *arg), - void *arg) -{ - return -ENOSYS; -} - -static inline int unwind(struct unwind_frame_info *info) -{ - return -ENOSYS; -} - -static inline int unwind_to_user(struct unwind_frame_info *info) -{ - return -ENOSYS; -} - -#endif /* _LINUX_UNWIND_H */ -- cgit v1.2.2 From 9fe06081ef145d6582c39e18203b5fffe6f3abc2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 6 Jan 2009 14:40:51 -0800 Subject: Create a DIV_ROUND_CLOSEST macro to do division with rounding Create a helper macro to divide two numbers and round the result to the nearest whole number. This is a helper macro for hwmon drivers that want to convert incoming sysfs values per standard hwmon practice, though the macro itself can be used by anyone. Signed-off-by: Darrick J. Wong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ca9ff6411dfa..721984844c94 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -48,6 +48,12 @@ extern const char linux_proc_banner[]; #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#define DIV_ROUND_CLOSEST(x, divisor)( \ +{ \ + typeof(divisor) __divisor = divisor; \ + (((x) + ((__divisor) / 2)) / (__divisor)); \ +} \ +) #define _RET_IP_ (unsigned long)__builtin_return_address(0) #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) -- cgit v1.2.2 From 5f820f648c92a5ecc771a96b3c29aa6e90013bba Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 6 Jan 2009 14:40:59 -0800 Subject: poll: allow f_op->poll to sleep f_op->poll is the only vfs operation which is not allowed to sleep. It's because poll and select implementation used task state to synchronize against wake ups, which doesn't have to be the case anymore as wait/wake interface can now use custom wake up functions. The non-sleep restriction can be a bit tricky because ->poll is not called from an atomic context and the result of accidentally sleeping in ->poll only shows up as temporary busy looping when the timing is right or rather wrong. This patch converts poll/select to use custom wake up function and use separate triggered variable to synchronize against wake up events. The only added overhead is an extra function call during wake up and negligible. This patch removes the one non-sleep exception from vfs locking rules and is beneficial to userland filesystem implementations like FUSE, 9p or peculiar fs like spufs as it's very difficult for those to implement non-sleeping poll method. While at it, make the following cosmetic changes to make poll.h and select.c checkpatch friendly. * s/type * symbol/type *symbol/ : three places in poll.h * remove blank line before EXPORT_SYMBOL() : two places in select.c Oleg: spotted missing barrier in poll_schedule_timeout() Davide: spotted missing write barrier in pollwake() Signed-off-by: Tejun Heo Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Ingo Molnar Cc: Christoph Hellwig Signed-off-by: Miklos Szeredi Cc: Davide Libenzi Cc: Brad Boyer Cc: Al Viro Cc: Roland McGrath Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Cc: Davide Libenzi Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/poll.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/poll.h b/include/linux/poll.h index badd98ab06f6..8c24ef8d9976 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -46,9 +46,9 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) } struct poll_table_entry { - struct file * filp; + struct file *filp; wait_queue_t wait; - wait_queue_head_t * wait_address; + wait_queue_head_t *wait_address; }; /* @@ -56,7 +56,9 @@ struct poll_table_entry { */ struct poll_wqueues { poll_table pt; - struct poll_table_page * table; + struct poll_table_page *table; + struct task_struct *polling_task; + int triggered; int error; int inline_index; struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES]; @@ -64,6 +66,13 @@ struct poll_wqueues { extern void poll_initwait(struct poll_wqueues *pwq); extern void poll_freewait(struct poll_wqueues *pwq); +extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, + ktime_t *expires, unsigned long slack); + +static inline int poll_schedule(struct poll_wqueues *pwq, int state) +{ + return poll_schedule_timeout(pwq, state, NULL, 0); +} /* * Scaleable version of the fd_set. -- cgit v1.2.2 From 179f7ebff6be45738c6e2fa68c8d2cc5c2c6308e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Jan 2009 14:41:04 -0800 Subject: percpu_counter: FBC_BATCH should be a variable For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS Considering more and more distros are using high NR_CPUS values, it makes sense to use a more sensible value for FBC_BATCH, and get rid of NR_CPUS. A sensible value is 2*num_online_cpus(), with a minimum value of 32 (This minimum value helps branch prediction in __percpu_counter_add()) We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically. We rename FBC_BATCH to percpu_counter_batch since its not a constant anymore. Signed-off-by: Eric Dumazet Acked-by: David S. Miller Acked-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 9007ccdfc112..99de7a31bab8 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -24,11 +24,7 @@ struct percpu_counter { s32 *counters; }; -#if NR_CPUS >= 16 -#define FBC_BATCH (NR_CPUS*2) -#else -#define FBC_BATCH (NR_CPUS*4) -#endif +extern int percpu_counter_batch; int percpu_counter_init(struct percpu_counter *fbc, s64 amount); int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); @@ -39,7 +35,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { - __percpu_counter_add(fbc, amount, FBC_BATCH); + __percpu_counter_add(fbc, amount, percpu_counter_batch); } static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) -- cgit v1.2.2 From 8c3659347efb43857b2c2d7bc63a9c7d68d1a608 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 6 Jan 2009 14:41:14 -0800 Subject: include/linux/interrupt.h: do not include linux/irqnr.h twice Signed-off-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/interrupt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0702c4d7bdf0..af886b26c9d1 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -14,7 +14,6 @@ #include #include #include -#include #include #include -- cgit v1.2.2 From 5cf0cc4e670b8da2231a3375db87ec3b6cb84432 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 6 Jan 2009 14:41:38 -0800 Subject: binfmts.h: include list.h linux_binfmt uses list_head, so list.h is needed. [akpm@linux-foundation.org: fix `make headerscheck'] Signed-off-by: Hiroshi Shimamoto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 6cbfbe297180..0d0150b4901e 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -18,6 +18,7 @@ struct pt_regs; #define BINPRM_BUF_SIZE 128 #ifdef __KERNEL__ +#include #define CORENAME_MAX_SIZE 128 -- cgit v1.2.2 From d29389de0b0ee1715333bafc6ac3f22a75aa4313 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 6 Jan 2009 14:41:41 -0800 Subject: spi_gpio driver Generalize the old at91rm9200 "bootstrap" bitbanging SPI master driver as "spi_gpio", so it works with arbitrary GPIOs and can be configured through platform_data. Such SPI masters support: - any number of bus instances (bus_num is the platform_device.id) - any number of chipselects (one GPIO per spi_device) - all four SPI_MODE values, and SPI_CS_HIGH - i/o word sizes from 1 to 32 bits; - devices configured as with any other spi_master controller When configured using platform_data, this provides relatively low clock rates. On platforms that support inlined GPIO calls, significantly improved transfer speeds are also possible with a semi-custom driver. (It's still painful when accessing flash memory, but less so.) Sanity checked by using this version to replace both native controllers on a board with six different SPI slaves, relying on three different SPI_MODE_* values and both SPI_CS_HIGH settings for correct operation. [akpm@linux-foundation.org: cleanups] Signed-off-by: David Brownell Acked-by: Magnus Damm Tested-by: Magnus Damm Cc: Torgil Svensson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spi/spi_gpio.h | 60 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 include/linux/spi/spi_gpio.h (limited to 'include/linux') diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h new file mode 100644 index 000000000000..0f01a0f1f40c --- /dev/null +++ b/include/linux/spi/spi_gpio.h @@ -0,0 +1,60 @@ +#ifndef __LINUX_SPI_GPIO_H +#define __LINUX_SPI_GPIO_H + +/* + * For each bitbanged SPI bus, set up a platform_device node with: + * - name "spi_gpio" + * - id the same as the SPI bus number it implements + * - dev.platform data pointing to a struct spi_gpio_platform_data + * + * Or, see the driver code for information about speedups that are + * possible on platforms that support inlined access for GPIOs (no + * spi_gpio_platform_data is used). + * + * Use spi_board_info with these busses in the usual way, being sure + * that the controller_data being the GPIO used for each device's + * chipselect: + * + * static struct spi_board_info ... [] = { + * ... + * // this slave uses GPIO 42 for its chipselect + * .controller_data = (void *) 42, + * ... + * // this one uses GPIO 86 for its chipselect + * .controller_data = (void *) 86, + * ... + * }; + * + * If the bitbanged bus is later switched to a "native" controller, + * that platform_device and controller_data should be removed. + */ + +/** + * struct spi_gpio_platform_data - parameter for bitbanged SPI master + * @sck: number of the GPIO used for clock output + * @mosi: number of the GPIO used for Master Output, Slave In (MOSI) data + * @miso: number of the GPIO used for Master Input, Slave Output (MISO) data + * @num_chipselect: how many slaves to allow + * + * All GPIO signals used with the SPI bus managed through this driver + * (chipselects, MOSI, MISO, SCK) must be configured as GPIOs, instead + * of some alternate function. + * + * It can be convenient to use this driver with pins that have alternate + * functions associated with a "native" SPI controller if a driver for that + * controller is not available, or is missing important functionality. + * + * On platforms which can do so, configure MISO with a weak pullup unless + * there's an external pullup on that signal. That saves power by avoiding + * floating signals. (A weak pulldown would save power too, but many + * drivers expect to see all-ones data as the no slave "response".) + */ +struct spi_gpio_platform_data { + unsigned sck; + unsigned mosi; + unsigned miso; + + u16 num_chipselect; +}; + +#endif /* __LINUX_SPI_GPIO_H */ -- cgit v1.2.2 From a06f6211ef9b1785922f9d0e8766d63ac4e66de1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 14:41:49 -0800 Subject: module: add within_module_core() and within_module_init() This series of patches allows kprobes to probe module's __init and __exit functions. This means, you can probe driver initialization and terminating. Currently, kprobes can't probe __init function because these functions are freed after module initialization. And it also can't probe module __exit functions because kprobe increments reference count of target module and user can't unload it. this means __exit functions never be called unless removing probes from the module. To solve both cases, this series of patches introduces GONE flag and sets it when the target code is freed(for this purpose, kprobes hooks MODULE_STATE_* events). This also removes refcount incrementing for allowing user to unload target module. Users can check which probes are GONE by debugfs interface. For taking timing of freeing module's .init text, these also include a patch which adds module's notifier of MODULE_STATE_LIVE event. This patch: Add within_module_core() and within_module_init() for checking whether an address is in the module .init.text section or .text section, and replace within() local inline functions in kernel/module.c with them. kprobes uses these functions to check where the kprobe is inserted. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/module.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 03cb93d1865a..4f7ea12463d3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -365,6 +365,18 @@ struct module *module_text_address(unsigned long addr); struct module *__module_text_address(unsigned long addr); int is_module_address(unsigned long addr); +static inline int within_module_core(unsigned long addr, struct module *mod) +{ + return (unsigned long)mod->module_core <= addr && + addr < (unsigned long)mod->module_core + mod->core_size; +} + +static inline int within_module_init(unsigned long addr, struct module *mod) +{ + return (unsigned long)mod->module_init <= addr && + addr < (unsigned long)mod->module_init + mod->init_size; +} + /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, -- cgit v1.2.2 From 129415607845d4daea11ddcba706005c69dcb942 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 14:41:50 -0800 Subject: kprobes: add kprobe_insn_mutex and cleanup arch_remove_kprobe() Add kprobe_insn_mutex for protecting kprobe_insn_pages hlist, and remove kprobe_mutex from architecture dependent code. This allows us to call arch_remove_kprobe() (and free_insn_slot) while holding kprobe_mutex. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Russell King Cc: "Luck, Tony" Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 497b1d1f7a05..b93e44ce2284 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -201,7 +201,6 @@ static inline int init_test_probes(void) } #endif /* CONFIG_KPROBES_SANITY_TEST */ -extern struct mutex kprobe_mutex; extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); -- cgit v1.2.2 From e8386a0cb22f4a2d439384212c494ad0bda848fe Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 14:41:52 -0800 Subject: kprobes: support probing module __exit function Allows kprobes to probe __exit routine. This adds flags member to struct kprobe. When module is freed(kprobes hooks module_notifier to get this event), kprobes which probe the functions in that module are set to "Gone" flag to the flags member. These "Gone" probes are never be enabled. Users can check the GONE flag through debugfs. This also removes mod_refcounted, because we couldn't free a module if kprobe incremented the refcount of that module. [akpm@linux-foundation.org: document some locking] [mhiramat@redhat.com: bugfix: pass aggr_kprobe to arch_remove_kprobe] [mhiramat@redhat.com: bugfix: release old_p's insn_slot before error return] Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Signed-off-by: Masami Hiramatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index b93e44ce2284..d6ea19e314bb 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -69,9 +69,6 @@ struct kprobe { /* list of kprobes for multi-handler support */ struct list_head list; - /* Indicates that the corresponding module has been ref counted */ - unsigned int mod_refcounted; - /*count the number of times this probe was temporarily disarmed */ unsigned long nmissed; @@ -103,8 +100,19 @@ struct kprobe { /* copy of the original instruction */ struct arch_specific_insn ainsn; + + /* Indicates various status flags. Protected by kprobe_mutex. */ + u32 flags; }; +/* Kprobe status flags */ +#define KPROBE_FLAG_GONE 1 /* breakpoint has already gone */ + +static inline int kprobe_gone(struct kprobe *p) +{ + return p->flags & KPROBE_FLAG_GONE; +} + /* * Special probe type that uses setjmp-longjmp type tricks to resume * execution at a specified entry with a matching prototype corresponding -- cgit v1.2.2 From 730c9eeca9808fc2cfb506cc68c90aa330da17b0 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 6 Jan 2009 14:42:06 -0800 Subject: autofs4: improve parameter usage The parameter usage in the device node ioctl code uses arg1 and arg2 as parameter names. This patch redefines the parameter names to reflect what they actually are in an effort to make the code more readable. Signed-off-by: Ian Kent Signed-off-by: Jeff Moyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_dev-ioctl.h | 75 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index f4d05ccd731f..91a773993a5c 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h @@ -10,6 +10,7 @@ #ifndef _LINUX_AUTO_DEV_IOCTL_H #define _LINUX_AUTO_DEV_IOCTL_H +#include #include #define AUTOFS_DEVICE_NAME "autofs" @@ -25,6 +26,60 @@ * An ioctl interface for autofs mount point control. */ +struct args_protover { + __u32 version; +}; + +struct args_protosubver { + __u32 sub_version; +}; + +struct args_openmount { + __u32 devid; +}; + +struct args_ready { + __u32 token; +}; + +struct args_fail { + __u32 token; + __s32 status; +}; + +struct args_setpipefd { + __s32 pipefd; +}; + +struct args_timeout { + __u64 timeout; +}; + +struct args_requester { + __u32 uid; + __u32 gid; +}; + +struct args_expire { + __u32 how; +}; + +struct args_askumount { + __u32 may_umount; +}; + +struct args_ismountpoint { + union { + struct args_in { + __u32 type; + } in; + struct args_out { + __u32 devid; + __u32 magic; + } out; + }; +}; + /* * All the ioctls use this structure. * When sending a path size must account for the total length @@ -39,20 +94,32 @@ struct autofs_dev_ioctl { * including this struct */ __s32 ioctlfd; /* automount command fd */ - __u32 arg1; /* Command parameters */ - __u32 arg2; + /* Command parameters */ + + union { + struct args_protover protover; + struct args_protosubver protosubver; + struct args_openmount openmount; + struct args_ready ready; + struct args_fail fail; + struct args_setpipefd setpipefd; + struct args_timeout timeout; + struct args_requester requester; + struct args_expire expire; + struct args_askumount askumount; + struct args_ismountpoint ismountpoint; + }; char path[0]; }; static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) { + memset(in, 0, sizeof(struct autofs_dev_ioctl)); in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; in->size = sizeof(struct autofs_dev_ioctl); in->ioctlfd = -1; - in->arg1 = 0; - in->arg2 = 0; return; } -- cgit v1.2.2 From a92daf6ba1f9ace8584edc8eb557a77aa7c2c71d Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 6 Jan 2009 14:42:08 -0800 Subject: autofs4: make autofs type usage explicit - the type assigned at mount when no type is given is changed from 0 to AUTOFS_TYPE_INDIRECT. This was done because 0 and AUTOFS_TYPE_INDIRECT were being treated implicitly as the same type. - previously, an offset mount had it's type set to AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET but the mount control re-implementation needs to be able distinguish all three types. So this was changed to make the type setting explicit. - a type AUTOFS_TYPE_ANY was added for use by the re-implementation when checking if a given path is a mountpoint. It's not really a type as we use this to ask if a given path is a mountpoint in the autofs_dev_ioctl_ismountpoint() function. - functions to set and test the autofs mount types have been added to improve readability and make the type usage explicit. - the mount type is used from user space for the mount control re-implementtion so, for consistency, all the definitions have been moved to the user space include file include/linux/auto_fs4.h. Signed-off-by: Ian Kent Signed-off-by: Jeff Moyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_fs4.h | 62 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index 2253716d4b92..55fa478bd639 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -29,10 +29,64 @@ #define AUTOFS_EXP_IMMEDIATE 1 #define AUTOFS_EXP_LEAVES 2 -#define AUTOFS_TYPE_ANY 0x0000 -#define AUTOFS_TYPE_INDIRECT 0x0001 -#define AUTOFS_TYPE_DIRECT 0x0002 -#define AUTOFS_TYPE_OFFSET 0x0004 +#define AUTOFS_TYPE_ANY 0U +#define AUTOFS_TYPE_INDIRECT 1U +#define AUTOFS_TYPE_DIRECT 2U +#define AUTOFS_TYPE_OFFSET 4U + +static inline void set_autofs_type_indirect(unsigned int *type) +{ + *type = AUTOFS_TYPE_INDIRECT; + return; +} + +static inline unsigned int autofs_type_indirect(unsigned int type) +{ + return (type == AUTOFS_TYPE_INDIRECT); +} + +static inline void set_autofs_type_direct(unsigned int *type) +{ + *type = AUTOFS_TYPE_DIRECT; + return; +} + +static inline unsigned int autofs_type_direct(unsigned int type) +{ + return (type == AUTOFS_TYPE_DIRECT); +} + +static inline void set_autofs_type_offset(unsigned int *type) +{ + *type = AUTOFS_TYPE_OFFSET; + return; +} + +static inline unsigned int autofs_type_offset(unsigned int type) +{ + return (type == AUTOFS_TYPE_OFFSET); +} + +static inline unsigned int autofs_type_trigger(unsigned int type) +{ + return (type == AUTOFS_TYPE_DIRECT || type == AUTOFS_TYPE_OFFSET); +} + +/* + * This isn't really a type as we use it to say "no type set" to + * indicate we want to search for "any" mount in the + * autofs_dev_ioctl_ismountpoint() device ioctl function. + */ +static inline void set_autofs_type_any(unsigned int *type) +{ + *type = AUTOFS_TYPE_ANY; + return; +} + +static inline unsigned int autofs_type_any(unsigned int type) +{ + return (type == AUTOFS_TYPE_ANY); +} /* Daemon notification packet types */ enum autofs_notify { -- cgit v1.2.2 From cabb3fc4bd1628c37c37e054960eb3e4bf30dc26 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 6 Jan 2009 14:42:26 -0800 Subject: twl4030-gpio: cleanup debounce Provide a static debounce configuration mechanism for twl4030 GPIOs, replacing the previous dynamic one. The single user of that mechanism was for MMC card detect debouncing. Boards can provide a bitmask saying which GPIOs to debounce (30 msec). It's always enabled for pins with the MMC card-detect/VMMCx link active, so most boards won't need to set the debounce mask. This is a net code shrink, including runtime footprint. Signed-off-by: David Brownell Signed-off-by: Tony Lindgren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/twl4030.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index a8f84c01f82e..8137f660a5cc 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -234,6 +234,9 @@ struct twl4030_gpio_platform_data { /* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */ u8 mmc_cd; + /* if BIT(N) is set, or VMMC(n+1) is linked, debounce GPIO-N */ + u32 debounce; + /* For gpio-N, bit (1 << N) in "pullups" is set if that pullup * should be enabled. Else, if that bit is set in "pulldowns", * that pulldown is enabled. Don't waste power by letting any @@ -307,12 +310,6 @@ int twl4030_sih_setup(int module); #define TWL4030_VAUX3_DEV_GRP 0x1F #define TWL4030_VAUX3_DEDICATED 0x22 -/* - * Exported TWL4030 GPIO APIs - * - * WARNING -- use standard GPIO and IRQ calls instead; these will vanish. - */ -int twl4030_set_gpio_debounce(int gpio, int enable); #if defined(CONFIG_TWL4030_BCI_BATTERY) || \ defined(CONFIG_TWL4030_BCI_BATTERY_MODULE) -- cgit v1.2.2 From d3635abfee0c55ad9dcd6b6172a0c6a5455900c9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Jan 2009 14:42:41 -0800 Subject: rapidio: remove excess kernel-doc notation Remove excess kernel-doc notation from rio header and driver: Warning(include/linux/rio_drv.h:399): Excess function parameter or struct member 'buffer' description in 'rio_get_inb_message' Signed-off-by: Randy Dunlap Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rio_drv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index 32c0547ffafc..c93a58a40033 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -391,7 +391,6 @@ static inline int rio_add_inb_buffer(struct rio_mport *mport, int mbox, * rio_get_inb_message - Get A RIO message from an inbound mailbox queue * @mport: Master port containing the inbound mailbox * @mbox: The inbound mailbox number - * @buffer: Pointer to the message buffer * * Get a RIO message from an inbound mailbox queue. Returns 0 on success. */ -- cgit v1.2.2 From 8cd3ac3aca3f2afe8570708066d64d893da468e8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 6 Jan 2009 14:42:48 -0800 Subject: fs/exec.c: make do_coredump() void No one cares do_coredump()'s return value, and also it seems that it is also not necessary. So make it void. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: WANG Cong Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 0d0150b4901e..77b4a9e46004 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -107,7 +107,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, extern int bprm_mm_init(struct linux_binprm *bprm); extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); -extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); +extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); extern int set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); -- cgit v1.2.2 From 991c0e6d1ae3df59f0ddfe05edecec8319e35a1b Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 6 Jan 2009 14:56:21 -0800 Subject: byteorder: only use linux/swab.h The first step to make swab.h a regular header that will include an asm/swab.h with arch overrides. Avoid the gratuitous differences introduced in the new linux/swab.h by naming the ___constant_swabXX bits and __fswabXX bits exactly as found in the old implementation in byteorder/swab[b].h Use this new swab.h in byteorder/[big|little]_endian.h and remove the two old swab headers. Although the inclusion of asm/byteorder.h looks strange in linux/swab.h, this will allow each arch to move the actual arch overrides for the swab bits in an asm file and then the includes can be cleaned up without requiring a flag day for all arches at once. Keep providing __fswabXX in case some userspace was using them directly, but the revised __swabXX should be used instead in any new code and will always do constant folding not dependent on the optimization level, which means the __constant versions can be phased out in-kernel. Arches that use the old-style arch macros will lose their optimized versions until they move to the new style, but at least they will still compile. Many arches have already moved and the patches to move the remaining arches are trivial. Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- include/linux/byteorder/Kbuild | 2 - include/linux/byteorder/big_endian.h | 3 +- include/linux/byteorder/little_endian.h | 3 +- include/linux/byteorder/swab.h | 222 -------------------------------- include/linux/byteorder/swabb.h | 135 ------------------- include/linux/swab.h | 50 +++---- 6 files changed, 27 insertions(+), 388 deletions(-) delete mode 100644 include/linux/byteorder/swab.h delete mode 100644 include/linux/byteorder/swabb.h (limited to 'include/linux') diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild index fbaa7f9cee32..38437225b092 100644 --- a/include/linux/byteorder/Kbuild +++ b/include/linux/byteorder/Kbuild @@ -1,4 +1,2 @@ unifdef-y += big_endian.h unifdef-y += little_endian.h -unifdef-y += swab.h -unifdef-y += swabb.h diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h index 1cba3f3efe5f..3c80fd7e8b56 100644 --- a/include/linux/byteorder/big_endian.h +++ b/include/linux/byteorder/big_endian.h @@ -9,8 +9,7 @@ #endif #include -#include -#include +#include #define __constant_htonl(x) ((__force __be32)(__u32)(x)) #define __constant_ntohl(x) ((__force __u32)(__be32)(x)) diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h index cedc1b5a289c..83195fb82962 100644 --- a/include/linux/byteorder/little_endian.h +++ b/include/linux/byteorder/little_endian.h @@ -9,8 +9,7 @@ #endif #include -#include -#include +#include #define __constant_htonl(x) ((__force __be32)___constant_swab32((x))) #define __constant_ntohl(x) ___constant_swab32((__force __be32)(x)) diff --git a/include/linux/byteorder/swab.h b/include/linux/byteorder/swab.h deleted file mode 100644 index 142134ff1645..000000000000 --- a/include/linux/byteorder/swab.h +++ /dev/null @@ -1,222 +0,0 @@ -#ifndef _LINUX_BYTEORDER_SWAB_H -#define _LINUX_BYTEORDER_SWAB_H - -/* - * linux/byteorder/swab.h - * Byte-swapping, independently from CPU endianness - * swabXX[ps]?(foo) - * - * Francois-Rene Rideau 19971205 - * separated swab functions from cpu_to_XX, - * to clean up support for bizarre-endian architectures. - * - * Trent Piepho 2007114 - * make constant-folding work, provide C versions that - * gcc can optimize better, explain different versions - * - * See asm-i386/byteorder.h and suches for examples of how to provide - * architecture-dependent optimized versions - * - */ - -#include - -/* Functions/macros defined, there are a lot: - * - * ___swabXX - * Generic C versions of the swab functions. - * - * ___constant_swabXX - * C versions that gcc can fold into a compile-time constant when - * the argument is a compile-time constant. - * - * __arch__swabXX[sp]? - * Architecture optimized versions of all the swab functions - * (including the s and p versions). These can be defined in - * asm-arch/byteorder.h. Any which are not, are defined here. - * __arch__swabXXs() is defined in terms of __arch__swabXXp(), which - * is defined in terms of __arch__swabXX(), which is in turn defined - * in terms of ___swabXX(x). - * These must be macros. They may be unsafe for arguments with - * side-effects. - * - * __fswabXX - * Inline function versions of the __arch__ macros. These _are_ safe - * if the arguments have side-effects. Note there are no s and p - * versions of these. - * - * __swabXX[sb] - * There are the ones you should actually use. The __swabXX versions - * will be a constant given a constant argument and use the arch - * specific code (if any) for non-constant arguments. The s and p - * versions always use the arch specific code (constant folding - * doesn't apply). They are safe to use with arguments with - * side-effects. - * - * swabXX[sb] - * Nicknames for __swabXX[sb] to use in the kernel. - */ - -/* casts are necessary for constants, because we never know how for sure - * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. - */ - -static __inline__ __attribute_const__ __u16 ___swab16(__u16 x) -{ - return x<<8 | x>>8; -} -static __inline__ __attribute_const__ __u32 ___swab32(__u32 x) -{ - return x<<24 | x>>24 | - (x & (__u32)0x0000ff00UL)<<8 | - (x & (__u32)0x00ff0000UL)>>8; -} -static __inline__ __attribute_const__ __u64 ___swab64(__u64 x) -{ - return x<<56 | x>>56 | - (x & (__u64)0x000000000000ff00ULL)<<40 | - (x & (__u64)0x0000000000ff0000ULL)<<24 | - (x & (__u64)0x00000000ff000000ULL)<< 8 | - (x & (__u64)0x000000ff00000000ULL)>> 8 | - (x & (__u64)0x0000ff0000000000ULL)>>24 | - (x & (__u64)0x00ff000000000000ULL)>>40; -} - -#define ___constant_swab16(x) \ - ((__u16)( \ - (((__u16)(x) & (__u16)0x00ffU) << 8) | \ - (((__u16)(x) & (__u16)0xff00U) >> 8) )) -#define ___constant_swab32(x) \ - ((__u32)( \ - (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \ - (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \ - (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \ - (((__u32)(x) & (__u32)0xff000000UL) >> 24) )) -#define ___constant_swab64(x) \ - ((__u64)( \ - (__u64)(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \ - (__u64)(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \ - (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \ - (__u64)(((__u64)(x) & (__u64)0x00000000ff000000ULL) << 8) | \ - (__u64)(((__u64)(x) & (__u64)0x000000ff00000000ULL) >> 8) | \ - (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ - (__u64)(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \ - (__u64)(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56) )) - -/* - * provide defaults when no architecture-specific optimization is detected - */ -#ifndef __arch__swab16 -# define __arch__swab16(x) ___swab16(x) -#endif -#ifndef __arch__swab32 -# define __arch__swab32(x) ___swab32(x) -#endif -#ifndef __arch__swab64 -# define __arch__swab64(x) ___swab64(x) -#endif - -#ifndef __arch__swab16p -# define __arch__swab16p(x) __arch__swab16(*(x)) -#endif -#ifndef __arch__swab32p -# define __arch__swab32p(x) __arch__swab32(*(x)) -#endif -#ifndef __arch__swab64p -# define __arch__swab64p(x) __arch__swab64(*(x)) -#endif - -#ifndef __arch__swab16s -# define __arch__swab16s(x) ((void)(*(x) = __arch__swab16p(x))) -#endif -#ifndef __arch__swab32s -# define __arch__swab32s(x) ((void)(*(x) = __arch__swab32p(x))) -#endif -#ifndef __arch__swab64s -# define __arch__swab64s(x) ((void)(*(x) = __arch__swab64p(x))) -#endif - - -/* - * Allow constant folding - */ -#if defined(__GNUC__) && defined(__OPTIMIZE__) -# define __swab16(x) \ -(__builtin_constant_p((__u16)(x)) ? \ - ___constant_swab16((x)) : \ - __fswab16((x))) -# define __swab32(x) \ -(__builtin_constant_p((__u32)(x)) ? \ - ___constant_swab32((x)) : \ - __fswab32((x))) -# define __swab64(x) \ -(__builtin_constant_p((__u64)(x)) ? \ - ___constant_swab64((x)) : \ - __fswab64((x))) -#else -# define __swab16(x) __fswab16(x) -# define __swab32(x) __fswab32(x) -# define __swab64(x) __fswab64(x) -#endif /* OPTIMIZE */ - - -static __inline__ __attribute_const__ __u16 __fswab16(__u16 x) -{ - return __arch__swab16(x); -} -static __inline__ __u16 __swab16p(const __u16 *x) -{ - return __arch__swab16p(x); -} -static __inline__ void __swab16s(__u16 *addr) -{ - __arch__swab16s(addr); -} - -static __inline__ __attribute_const__ __u32 __fswab32(__u32 x) -{ - return __arch__swab32(x); -} -static __inline__ __u32 __swab32p(const __u32 *x) -{ - return __arch__swab32p(x); -} -static __inline__ void __swab32s(__u32 *addr) -{ - __arch__swab32s(addr); -} - -#ifdef __BYTEORDER_HAS_U64__ -static __inline__ __attribute_const__ __u64 __fswab64(__u64 x) -{ -# ifdef __SWAB_64_THRU_32__ - __u32 h = x >> 32; - __u32 l = x & ((1ULL<<32)-1); - return (((__u64)__swab32(l)) << 32) | ((__u64)(__swab32(h))); -# else - return __arch__swab64(x); -# endif -} -static __inline__ __u64 __swab64p(const __u64 *x) -{ - return __arch__swab64p(x); -} -static __inline__ void __swab64s(__u64 *addr) -{ - __arch__swab64s(addr); -} -#endif /* __BYTEORDER_HAS_U64__ */ - -#if defined(__KERNEL__) -#define swab16 __swab16 -#define swab32 __swab32 -#define swab64 __swab64 -#define swab16p __swab16p -#define swab32p __swab32p -#define swab64p __swab64p -#define swab16s __swab16s -#define swab32s __swab32s -#define swab64s __swab64s -#endif - -#endif /* _LINUX_BYTEORDER_SWAB_H */ diff --git a/include/linux/byteorder/swabb.h b/include/linux/byteorder/swabb.h deleted file mode 100644 index 8c780c7d779e..000000000000 --- a/include/linux/byteorder/swabb.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef _LINUX_BYTEORDER_SWABB_H -#define _LINUX_BYTEORDER_SWABB_H - -/* - * linux/byteorder/swabb.h - * SWAp Bytes Bizarrely - * swaHHXX[ps]?(foo) - * - * Support for obNUXIous pdp-endian and other bizarre architectures. - * Will Linux ever run on such ancient beasts? if not, this file - * will be but a programming pearl. Still, it's a reminder that we - * shouldn't be making too many assumptions when trying to be portable. - * - */ - -/* - * Meaning of the names I chose (vaxlinux people feel free to correct them): - * swahw32 swap 16-bit half-words in a 32-bit word - * swahb32 swap 8-bit halves of each 16-bit half-word in a 32-bit word - * - * No 64-bit support yet. I don't know NUXI conventions for long longs. - * I guarantee it will be a mess when it's there, though :-> - * It will be even worse if there are conflicting 64-bit conventions. - * Hopefully, no one ever used 64-bit objects on NUXI machines. - * - */ - -#include - -#define ___swahw32(x) \ -({ \ - __u32 __x = (x); \ - ((__u32)( \ - (((__u32)(__x) & (__u32)0x0000ffffUL) << 16) | \ - (((__u32)(__x) & (__u32)0xffff0000UL) >> 16) )); \ -}) -#define ___swahb32(x) \ -({ \ - __u32 __x = (x); \ - ((__u32)( \ - (((__u32)(__x) & (__u32)0x00ff00ffUL) << 8) | \ - (((__u32)(__x) & (__u32)0xff00ff00UL) >> 8) )); \ -}) - -#define ___constant_swahw32(x) \ - ((__u32)( \ - (((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \ - (((__u32)(x) & (__u32)0xffff0000UL) >> 16) )) -#define ___constant_swahb32(x) \ - ((__u32)( \ - (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \ - (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) )) - -/* - * provide defaults when no architecture-specific optimization is detected - */ -#ifndef __arch__swahw32 -# define __arch__swahw32(x) ___swahw32(x) -#endif -#ifndef __arch__swahb32 -# define __arch__swahb32(x) ___swahb32(x) -#endif - -#ifndef __arch__swahw32p -# define __arch__swahw32p(x) __swahw32(*(x)) -#endif -#ifndef __arch__swahb32p -# define __arch__swahb32p(x) __swahb32(*(x)) -#endif - -#ifndef __arch__swahw32s -# define __arch__swahw32s(x) do { *(x) = __swahw32p((x)); } while (0) -#endif -#ifndef __arch__swahb32s -# define __arch__swahb32s(x) do { *(x) = __swahb32p((x)); } while (0) -#endif - - -/* - * Allow constant folding - */ -#define __swahw32(x) \ -(__builtin_constant_p((__u32)(x)) ? \ - ___swahw32((x)) : \ - __fswahw32((x))) -#define __swahb32(x) \ -(__builtin_constant_p((__u32)(x)) ? \ - ___swahb32((x)) : \ - __fswahb32((x))) - - -static inline __u32 __fswahw32(__u32 x) -{ - return __arch__swahw32(x); -} - -static inline __u32 __swahw32p(__u32 *x) -{ - return __arch__swahw32p(x); -} - -static inline void __swahw32s(__u32 *addr) -{ - __arch__swahw32s(addr); -} - -static inline __u32 __fswahb32(__u32 x) -{ - return __arch__swahb32(x); -} - -static inline __u32 __swahb32p(__u32 *x) -{ - return __arch__swahb32p(x); -} - -static inline void __swahb32s(__u32 *addr) -{ - __arch__swahb32s(addr); -} - -#ifdef __BYTEORDER_HAS_U64__ -/* - * Not supported yet - */ -#endif /* __BYTEORDER_HAS_U64__ */ - -#define swahw32 __swahw32 -#define swahb32 __swahb32 -#define swahw32p __swahw32p -#define swahb32p __swahb32p -#define swahw32s __swahw32s -#define swahb32s __swahb32s - -#endif /* _LINUX_BYTEORDER_SWABB_H */ diff --git a/include/linux/swab.h b/include/linux/swab.h index bbed279f3b32..9a2d33e0a98a 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -9,17 +9,17 @@ * casts are necessary for constants, because we never know how for sure * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. */ -#define __const_swab16(x) ((__u16)( \ +#define ___constant_swab16(x) ((__u16)( \ (((__u16)(x) & (__u16)0x00ffU) << 8) | \ (((__u16)(x) & (__u16)0xff00U) >> 8))) -#define __const_swab32(x) ((__u32)( \ +#define ___constant_swab32(x) ((__u32)( \ (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \ (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \ (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \ (((__u32)(x) & (__u32)0xff000000UL) >> 24))) -#define __const_swab64(x) ((__u64)( \ +#define ___constant_swab64(x) ((__u64)( \ (((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \ (((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \ (((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \ @@ -29,11 +29,11 @@ (((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \ (((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56))) -#define __const_swahw32(x) ((__u32)( \ +#define ___constant_swahw32(x) ((__u32)( \ (((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \ (((__u32)(x) & (__u32)0xffff0000UL) >> 16))) -#define __const_swahb32(x) ((__u32)( \ +#define ___constant_swahb32(x) ((__u32)( \ (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \ (((__u32)(x) & (__u32)0xff00ff00UL) >> 8))) @@ -43,25 +43,25 @@ * ___swab16, ___swab32, ___swab64, ___swahw32, ___swahb32 */ -static inline __attribute_const__ __u16 ___swab16(__u16 val) +static inline __attribute_const__ __u16 __fswab16(__u16 val) { #ifdef __arch_swab16 return __arch_swab16(val); #else - return __const_swab16(val); + return ___constant_swab16(val); #endif } -static inline __attribute_const__ __u32 ___swab32(__u32 val) +static inline __attribute_const__ __u32 __fswab32(__u32 val) { #ifdef __arch_swab32 return __arch_swab32(val); #else - return __const_swab32(val); + return ___constant_swab32(val); #endif } -static inline __attribute_const__ __u64 ___swab64(__u64 val) +static inline __attribute_const__ __u64 __fswab64(__u64 val) { #ifdef __arch_swab64 return __arch_swab64(val); @@ -70,25 +70,25 @@ static inline __attribute_const__ __u64 ___swab64(__u64 val) __u32 l = val & ((1ULL << 32) - 1); return (((__u64)___swab32(l)) << 32) | ((__u64)(___swab32(h))); #else - return __const_swab64(val); + return ___constant_swab64(val); #endif } -static inline __attribute_const__ __u32 ___swahw32(__u32 val) +static inline __attribute_const__ __u32 __fswahw32(__u32 val) { #ifdef __arch_swahw32 return __arch_swahw32(val); #else - return __const_swahw32(val); + return ___constant_swahw32(val); #endif } -static inline __attribute_const__ __u32 ___swahb32(__u32 val) +static inline __attribute_const__ __u32 __fswahb32(__u32 val) { #ifdef __arch_swahb32 return __arch_swahb32(val); #else - return __const_swahb32(val); + return ___constant_swahb32(val); #endif } @@ -98,8 +98,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val) */ #define __swab16(x) \ (__builtin_constant_p((__u16)(x)) ? \ - __const_swab16((x)) : \ - ___swab16((x))) + ___constant_swab16(x) : \ + __fswab16(x)) /** * __swab32 - return a byteswapped 32-bit value @@ -107,8 +107,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val) */ #define __swab32(x) \ (__builtin_constant_p((__u32)(x)) ? \ - __const_swab32((x)) : \ - ___swab32((x))) + ___constant_swab32(x) : \ + __fswab32(x)) /** * __swab64 - return a byteswapped 64-bit value @@ -116,8 +116,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val) */ #define __swab64(x) \ (__builtin_constant_p((__u64)(x)) ? \ - __const_swab64((x)) : \ - ___swab64((x))) + ___constant_swab64(x) : \ + __fswab64(x)) /** * __swahw32 - return a word-swapped 32-bit value @@ -127,8 +127,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val) */ #define __swahw32(x) \ (__builtin_constant_p((__u32)(x)) ? \ - __const_swahw32((x)) : \ - ___swahw32((x))) + ___constant_swahw32(x) : \ + __fswahw32(x)) /** * __swahb32 - return a high and low byte-swapped 32-bit value @@ -138,8 +138,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val) */ #define __swahb32(x) \ (__builtin_constant_p((__u32)(x)) ? \ - __const_swahb32((x)) : \ - ___swahb32((x))) + ___constant_swahb32(x) : \ + __fswahb32(x)) /** * __swab16p - return a byteswapped 16-bit value from a pointer -- cgit v1.2.2 From 637b180c23313f2964e0ef20f1ee375203866968 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 6 Jan 2009 13:30:58 -0800 Subject: byteorder: remove the now unused byteorder.h This implementation caused problems in userspace which can, and does define _both_ __LITTLE_ENDIAN and __BIG_ENDIAN. Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 1 - include/linux/byteorder.h | 372 ---------------------------------------------- 2 files changed, 373 deletions(-) delete mode 100644 include/linux/byteorder.h (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 39da666067b9..a3323f337e4d 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -179,7 +179,6 @@ unifdef-y += auto_fs.h unifdef-y += auxvec.h unifdef-y += binfmts.h unifdef-y += blktrace_api.h -unifdef-y += byteorder.h unifdef-y += capability.h unifdef-y += capi.h unifdef-y += cciss_ioctl.h diff --git a/include/linux/byteorder.h b/include/linux/byteorder.h deleted file mode 100644 index 29f002d73d98..000000000000 --- a/include/linux/byteorder.h +++ /dev/null @@ -1,372 +0,0 @@ -#ifndef _LINUX_BYTEORDER_H -#define _LINUX_BYTEORDER_H - -#include -#include - -#if defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) -# error Fix asm/byteorder.h to define one endianness -#endif - -#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN) -# error Fix asm/byteorder.h to define arch endianness -#endif - -#ifdef __LITTLE_ENDIAN -# undef __LITTLE_ENDIAN -# define __LITTLE_ENDIAN 1234 -#endif - -#ifdef __BIG_ENDIAN -# undef __BIG_ENDIAN -# define __BIG_ENDIAN 4321 -#endif - -#if defined(__LITTLE_ENDIAN) && !defined(__LITTLE_ENDIAN_BITFIELD) -# define __LITTLE_ENDIAN_BITFIELD -#endif - -#if defined(__BIG_ENDIAN) && !defined(__BIG_ENDIAN_BITFIELD) -# define __BIG_ENDIAN_BITFIELD -#endif - -#ifdef __LITTLE_ENDIAN -# define __le16_to_cpu(x) ((__force __u16)(__le16)(x)) -# define __le32_to_cpu(x) ((__force __u32)(__le32)(x)) -# define __le64_to_cpu(x) ((__force __u64)(__le64)(x)) -# define __cpu_to_le16(x) ((__force __le16)(__u16)(x)) -# define __cpu_to_le32(x) ((__force __le32)(__u32)(x)) -# define __cpu_to_le64(x) ((__force __le64)(__u64)(x)) - -# define __be16_to_cpu(x) __swab16((__force __u16)(__be16)(x)) -# define __be32_to_cpu(x) __swab32((__force __u32)(__be32)(x)) -# define __be64_to_cpu(x) __swab64((__force __u64)(__be64)(x)) -# define __cpu_to_be16(x) ((__force __be16)__swab16(x)) -# define __cpu_to_be32(x) ((__force __be32)__swab32(x)) -# define __cpu_to_be64(x) ((__force __be64)__swab64(x)) -#endif - -#ifdef __BIG_ENDIAN -# define __be16_to_cpu(x) ((__force __u16)(__be16)(x)) -# define __be32_to_cpu(x) ((__force __u32)(__be32)(x)) -# define __be64_to_cpu(x) ((__force __u64)(__be64)(x)) -# define __cpu_to_be16(x) ((__force __be16)(__u16)(x)) -# define __cpu_to_be32(x) ((__force __be32)(__u32)(x)) -# define __cpu_to_be64(x) ((__force __be64)(__u64)(x)) - -# define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x)) -# define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x)) -# define __le64_to_cpu(x) __swab64((__force __u64)(__le64)(x)) -# define __cpu_to_le16(x) ((__force __le16)__swab16(x)) -# define __cpu_to_le32(x) ((__force __le32)__swab32(x)) -# define __cpu_to_le64(x) ((__force __le64)__swab64(x)) -#endif - -/* - * These helpers could be phased out over time as the base version - * handles constant folding. - */ -#define __constant_htonl(x) __cpu_to_be32(x) -#define __constant_ntohl(x) __be32_to_cpu(x) -#define __constant_htons(x) __cpu_to_be16(x) -#define __constant_ntohs(x) __be16_to_cpu(x) - -#define __constant_le16_to_cpu(x) __le16_to_cpu(x) -#define __constant_le32_to_cpu(x) __le32_to_cpu(x) -#define __constant_le64_to_cpu(x) __le64_to_cpu(x) -#define __constant_be16_to_cpu(x) __be16_to_cpu(x) -#define __constant_be32_to_cpu(x) __be32_to_cpu(x) -#define __constant_be64_to_cpu(x) __be64_to_cpu(x) - -#define __constant_cpu_to_le16(x) __cpu_to_le16(x) -#define __constant_cpu_to_le32(x) __cpu_to_le32(x) -#define __constant_cpu_to_le64(x) __cpu_to_le64(x) -#define __constant_cpu_to_be16(x) __cpu_to_be16(x) -#define __constant_cpu_to_be32(x) __cpu_to_be32(x) -#define __constant_cpu_to_be64(x) __cpu_to_be64(x) - -static inline void __le16_to_cpus(__u16 *p) -{ -#ifdef __BIG_ENDIAN - __swab16s(p); -#endif -} - -static inline void __cpu_to_le16s(__u16 *p) -{ -#ifdef __BIG_ENDIAN - __swab16s(p); -#endif -} - -static inline void __le32_to_cpus(__u32 *p) -{ -#ifdef __BIG_ENDIAN - __swab32s(p); -#endif -} - -static inline void __cpu_to_le32s(__u32 *p) -{ -#ifdef __BIG_ENDIAN - __swab32s(p); -#endif -} - -static inline void __le64_to_cpus(__u64 *p) -{ -#ifdef __BIG_ENDIAN - __swab64s(p); -#endif -} - -static inline void __cpu_to_le64s(__u64 *p) -{ -#ifdef __BIG_ENDIAN - __swab64s(p); -#endif -} - -static inline void __be16_to_cpus(__u16 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab16s(p); -#endif -} - -static inline void __cpu_to_be16s(__u16 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab16s(p); -#endif -} - -static inline void __be32_to_cpus(__u32 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab32s(p); -#endif -} - -static inline void __cpu_to_be32s(__u32 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab32s(p); -#endif -} - -static inline void __be64_to_cpus(__u64 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab64s(p); -#endif -} - -static inline void __cpu_to_be64s(__u64 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab64s(p); -#endif -} - -static inline __u16 __le16_to_cpup(const __le16 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __u16)*p; -#else - return __swab16p((__force __u16 *)p); -#endif -} - -static inline __u32 __le32_to_cpup(const __le32 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __u32)*p; -#else - return __swab32p((__force __u32 *)p); -#endif -} - -static inline __u64 __le64_to_cpup(const __le64 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __u64)*p; -#else - return __swab64p((__force __u64 *)p); -#endif -} - -static inline __le16 __cpu_to_le16p(const __u16 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __le16)*p; -#else - return (__force __le16)__swab16p(p); -#endif -} - -static inline __le32 __cpu_to_le32p(const __u32 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __le32)*p; -#else - return (__force __le32)__swab32p(p); -#endif -} - -static inline __le64 __cpu_to_le64p(const __u64 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __le64)*p; -#else - return (__force __le64)__swab64p(p); -#endif -} - -static inline __u16 __be16_to_cpup(const __be16 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __u16)*p; -#else - return __swab16p((__force __u16 *)p); -#endif -} - -static inline __u32 __be32_to_cpup(const __be32 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __u32)*p; -#else - return __swab32p((__force __u32 *)p); -#endif -} - -static inline __u64 __be64_to_cpup(const __be64 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __u64)*p; -#else - return __swab64p((__force __u64 *)p); -#endif -} - -static inline __be16 __cpu_to_be16p(const __u16 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __be16)*p; -#else - return (__force __be16)__swab16p(p); -#endif -} - -static inline __be32 __cpu_to_be32p(const __u32 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __be32)*p; -#else - return (__force __be32)__swab32p(p); -#endif -} - -static inline __be64 __cpu_to_be64p(const __u64 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __be64)*p; -#else - return (__force __be64)__swab64p(p); -#endif -} - -#ifdef __KERNEL__ - -# define le16_to_cpu __le16_to_cpu -# define le32_to_cpu __le32_to_cpu -# define le64_to_cpu __le64_to_cpu -# define be16_to_cpu __be16_to_cpu -# define be32_to_cpu __be32_to_cpu -# define be64_to_cpu __be64_to_cpu -# define cpu_to_le16 __cpu_to_le16 -# define cpu_to_le32 __cpu_to_le32 -# define cpu_to_le64 __cpu_to_le64 -# define cpu_to_be16 __cpu_to_be16 -# define cpu_to_be32 __cpu_to_be32 -# define cpu_to_be64 __cpu_to_be64 - -# define le16_to_cpup __le16_to_cpup -# define le32_to_cpup __le32_to_cpup -# define le64_to_cpup __le64_to_cpup -# define be16_to_cpup __be16_to_cpup -# define be32_to_cpup __be32_to_cpup -# define be64_to_cpup __be64_to_cpup -# define cpu_to_le16p __cpu_to_le16p -# define cpu_to_le32p __cpu_to_le32p -# define cpu_to_le64p __cpu_to_le64p -# define cpu_to_be16p __cpu_to_be16p -# define cpu_to_be32p __cpu_to_be32p -# define cpu_to_be64p __cpu_to_be64p - -# define le16_to_cpus __le16_to_cpus -# define le32_to_cpus __le32_to_cpus -# define le64_to_cpus __le64_to_cpus -# define be16_to_cpus __be16_to_cpus -# define be32_to_cpus __be32_to_cpus -# define be64_to_cpus __be64_to_cpus -# define cpu_to_le16s __cpu_to_le16s -# define cpu_to_le32s __cpu_to_le32s -# define cpu_to_le64s __cpu_to_le64s -# define cpu_to_be16s __cpu_to_be16s -# define cpu_to_be32s __cpu_to_be32s -# define cpu_to_be64s __cpu_to_be64s - -/* - * They have to be macros in order to do the constant folding - * correctly - if the argument passed into a inline function - * it is no longer constant according to gcc.. - */ -# undef ntohl -# undef ntohs -# undef htonl -# undef htons - -# define ___htonl(x) __cpu_to_be32(x) -# define ___htons(x) __cpu_to_be16(x) -# define ___ntohl(x) __be32_to_cpu(x) -# define ___ntohs(x) __be16_to_cpu(x) - -# define htonl(x) ___htonl(x) -# define ntohl(x) ___ntohl(x) -# define htons(x) ___htons(x) -# define ntohs(x) ___ntohs(x) - -static inline void le16_add_cpu(__le16 *var, u16 val) -{ - *var = cpu_to_le16(le16_to_cpup(var) + val); -} - -static inline void le32_add_cpu(__le32 *var, u32 val) -{ - *var = cpu_to_le32(le32_to_cpup(var) + val); -} - -static inline void le64_add_cpu(__le64 *var, u64 val) -{ - *var = cpu_to_le64(le64_to_cpup(var) + val); -} - -static inline void be16_add_cpu(__be16 *var, u16 val) -{ - *var = cpu_to_be16(be16_to_cpup(var) + val); -} - -static inline void be32_add_cpu(__be32 *var, u32 val) -{ - *var = cpu_to_be32(be32_to_cpup(var) + val); -} - -static inline void be64_add_cpu(__be64 *var, u64 val) -{ - *var = cpu_to_be64(be64_to_cpup(var) + val); -} - -#endif /* __KERNEL__ */ -#endif /* _LINUX_BYTEORDER_H */ -- cgit v1.2.2 From ede6f5aea054d3fb67c78857f7abdee602302043 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 6 Jan 2009 21:17:57 -0800 Subject: Fix up 64-bit byte swaps for most 32-bit architectures The __SWAB_64_THRU_32__ case of a 64-bit byte swap was depending on the no-longer-existant ___swab32() method (three underscores). We got rid of some of the worst indirection and complexity, and now it should just use the 32-bit swab function that was defined right above it. Reported-and-tested-by: Nicolas Pitre Reported-by: Benjamin Herrenschmidt Cc: Harvey Harrison Signed-off-by: Linus Torvalds --- include/linux/swab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swab.h b/include/linux/swab.h index 9a2d33e0a98a..be5284d4a053 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -68,7 +68,7 @@ static inline __attribute_const__ __u64 __fswab64(__u64 val) #elif defined(__SWAB_64_THRU_32__) __u32 h = val >> 32; __u32 l = val & ((1ULL << 32) - 1); - return (((__u64)___swab32(l)) << 32) | ((__u64)(___swab32(h))); + return (((__u64)__fswab32(l)) << 32) | ((__u64)(__fswab32(h))); #else return ___constant_swab64(val); #endif -- cgit v1.2.2 From e1995f65be0786ca201f466f049dad1e2e4c3421 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 7 Jan 2009 14:29:16 +0100 Subject: i2c: Drop I2C_CLASS_ALL I2C_CLASS_ALL is almost never what bus driver authors really want. These i2c classes are really only about which devices must be probed, not what devices can be present. As device drivers get converted to the new i2c device driver model, only a few device types will keep relying on probing. Signed-off-by: Jean Delvare Acked-by: Sonic Zhang --- include/linux/i2c.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 33a5992d4936..0184de4050b8 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -397,7 +397,6 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) #define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */ #define I2C_CLASS_SOUND (1<<6) /* sound devices */ #define I2C_CLASS_SPD (1<<7) /* SPD EEPROMs and similar */ -#define I2C_CLASS_ALL (UINT_MAX) /* all of the above */ /* i2c_client_address_data is the struct for holding default client * addresses for a driver and for the parameters supplied on the -- cgit v1.2.2 From 994a075f0f2e8cdb919d8e495f98211651e3c461 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 7 Jan 2009 14:29:17 +0100 Subject: i2c: Drop I2C_CLASS_CAM_ANALOG and I2C_CLASS_SOUND There are no users left of these two i2c probe class flags so we can drop the now. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 0184de4050b8..4fd79d2e4021 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -393,9 +393,7 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) #define I2C_CLASS_TV_ANALOG (1<<1) /* bttv + friends */ #define I2C_CLASS_TV_DIGITAL (1<<2) /* dvb cards */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ -#define I2C_CLASS_CAM_ANALOG (1<<4) /* camera with analog CCD */ #define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */ -#define I2C_CLASS_SOUND (1<<6) /* sound devices */ #define I2C_CLASS_SPD (1<<7) /* SPD EEPROMs and similar */ /* i2c_client_address_data is the struct for holding default client -- cgit v1.2.2 From b305271861219f0ce162eb565f0f28f4c781299d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 7 Jan 2009 14:29:17 +0100 Subject: i2c: Drop I2C_CLASS_CAM_DIGITAL There are a number of drivers which set their i2c bus class to I2C_CLASS_CAM_DIGITAL, however no chip driver actually checks for this flag, so we might as well drop it now. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 4fd79d2e4021..20873d402467 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -393,7 +393,6 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) #define I2C_CLASS_TV_ANALOG (1<<1) /* bttv + friends */ #define I2C_CLASS_TV_DIGITAL (1<<2) /* dvb cards */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ -#define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */ #define I2C_CLASS_SPD (1<<7) /* SPD EEPROMs and similar */ /* i2c_client_address_data is the struct for holding default client -- cgit v1.2.2 From 22a9d645677feefd402befd02edd59b122289ef1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 7 Jan 2009 08:45:46 -0800 Subject: async: Asynchronous function calls to speed up kernel boot Right now, most of the kernel boot is strictly synchronous, such that various hardware delays are done sequentially. In order to make the kernel boot faster, this patch introduces infrastructure to allow doing some of the initialization steps asynchronously, which will hide significant portions of the hardware delays in practice. In order to not change device order and other similar observables, this patch does NOT do full parallel initialization. Rather, it operates more in the way an out of order CPU does; the work may be done out of order and asynchronous, but the observable effects (instruction retiring for the CPU) are still done in the original sequence. Signed-off-by: Arjan van de Ven --- include/linux/async.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/async.h (limited to 'include/linux') diff --git a/include/linux/async.h b/include/linux/async.h new file mode 100644 index 000000000000..c4ecacd0b327 --- /dev/null +++ b/include/linux/async.h @@ -0,0 +1,25 @@ +/* + * async.h: Asynchronous function calls for boot performance + * + * (C) Copyright 2009 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include + +typedef u64 async_cookie_t; +typedef void (async_func_ptr) (void *data, async_cookie_t cookie); + +extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data); +extern async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *list); +extern void async_synchronize_full(void); +extern void async_synchronize_full_special(struct list_head *list); +extern void async_synchronize_cookie(async_cookie_t cookie); +extern void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *list); + -- cgit v1.2.2 From efaee192063a54749c56b7383803e16fe553630e Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 6 Jan 2009 07:20:54 -0800 Subject: async: make the final inode deletion an asynchronous event this makes "rm -rf" on a (names cached) kernel tree go from 11.6 to 8.6 seconds on an ext3 filesystem Signed-off-by: Arjan van de Ven --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7eba77f666e..e38a64d71eff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1184,6 +1184,11 @@ struct super_block { * generic_show_options() */ char *s_options; + + /* + * storage for asynchronous operations + */ + struct list_head s_async_list; }; extern struct timespec current_fs_time(struct super_block *sb); -- cgit v1.2.2 From b92a78e582b1a45649143dc86e526f5824092478 Mon Sep 17 00:00:00 2001 From: Rodolfo Giometti Date: Thu, 23 Oct 2008 10:08:07 +0200 Subject: usb host: Oxford OXU210HP HCD driver. This driver implements the support for Oxford OXU210HP USB high-speed host, no peripheral nor OTG. Signed-off-by: Rodolfo Giometti Cc: Kan Liu Signed-off-by: Greg Kroah-Hartman --- include/linux/oxu210hp.h | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 include/linux/oxu210hp.h (limited to 'include/linux') diff --git a/include/linux/oxu210hp.h b/include/linux/oxu210hp.h new file mode 100644 index 000000000000..0bf96eae5389 --- /dev/null +++ b/include/linux/oxu210hp.h @@ -0,0 +1,7 @@ +/* platform data for the OXU210HP HCD */ + +struct oxu210hp_platform_data { + unsigned int bus16:1; + unsigned int use_hcd_otg:1; + unsigned int use_hcd_sph:1; +}; -- cgit v1.2.2 From d767d888750a8e15656b7ee15d68f90a151b8936 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 6 Nov 2008 22:32:15 -0800 Subject: USB: wusb: annotate association types withe proper endianness Also a trivial annotation in rh.c for: drivers/usb/wusbcore/rh.c:366:9: warning: incorrect type in assignment (different base types) drivers/usb/wusbcore/rh.c:366:9: expected unsigned short [unsigned] [short] [usertype] drivers/usb/wusbcore/rh.c:366:9: got restricted __le16 [usertype] drivers/usb/wusbcore/rh.c:367:9: warning: incorrect type in assignment (different base types) drivers/usb/wusbcore/rh.c:367:9: expected unsigned short [unsigned] [short] [usertype] drivers/usb/wusbcore/rh.c:367:9: got restricted __le16 [usertype] Association types annotation fixes piles of warnings similar to: drivers/usb/wusbcore/cbaf.c:238:30: warning: incorrect type in initializer (different base types) drivers/usb/wusbcore/cbaf.c:238:30: expected restricted __le16 [usertype] id drivers/usb/wusbcore/cbaf.c:238:30: got int drivers/usb/wusbcore/cbaf.c:238:30: warning: incorrect type in initializer (different base types) drivers/usb/wusbcore/cbaf.c:238:30: expected restricted __le16 [usertype] len drivers/usb/wusbcore/cbaf.c:238:30: got int Signed-off-by: Harvey Harrison Cc: David Vrabel Cc: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/association.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/association.h b/include/linux/usb/association.h index 07c5e3cf5898..0a4a18b3c1bb 100644 --- a/include/linux/usb/association.h +++ b/include/linux/usb/association.h @@ -28,17 +28,17 @@ struct wusb_am_attr { }; /* Different fields defined by the spec */ -#define WUSB_AR_AssociationTypeId { .id = 0x0000, .len = 2 } -#define WUSB_AR_AssociationSubTypeId { .id = 0x0001, .len = 2 } -#define WUSB_AR_Length { .id = 0x0002, .len = 4 } -#define WUSB_AR_AssociationStatus { .id = 0x0004, .len = 4 } -#define WUSB_AR_LangID { .id = 0x0008, .len = 2 } -#define WUSB_AR_DeviceFriendlyName { .id = 0x000b, .len = 64 } /* max */ -#define WUSB_AR_HostFriendlyName { .id = 0x000c, .len = 64 } /* max */ -#define WUSB_AR_CHID { .id = 0x1000, .len = 16 } -#define WUSB_AR_CDID { .id = 0x1001, .len = 16 } -#define WUSB_AR_ConnectionContext { .id = 0x1002, .len = 48 } -#define WUSB_AR_BandGroups { .id = 0x1004, .len = 2 } +#define WUSB_AR_AssociationTypeId { .id = cpu_to_le16(0x0000), .len = cpu_to_le16(2) } +#define WUSB_AR_AssociationSubTypeId { .id = cpu_to_le16(0x0001), .len = cpu_to_le16(2) } +#define WUSB_AR_Length { .id = cpu_to_le16(0x0002), .len = cpu_to_le16(4) } +#define WUSB_AR_AssociationStatus { .id = cpu_to_le16(0x0004), .len = cpu_to_le16(4) } +#define WUSB_AR_LangID { .id = cpu_to_le16(0x0008), .len = cpu_to_le16(2) } +#define WUSB_AR_DeviceFriendlyName { .id = cpu_to_le16(0x000b), .len = cpu_to_le16(64) } /* max */ +#define WUSB_AR_HostFriendlyName { .id = cpu_to_le16(0x000c), .len = cpu_to_le16(64) } /* max */ +#define WUSB_AR_CHID { .id = cpu_to_le16(0x1000), .len = cpu_to_le16(16) } +#define WUSB_AR_CDID { .id = cpu_to_le16(0x1001), .len = cpu_to_le16(16) } +#define WUSB_AR_ConnectionContext { .id = cpu_to_le16(0x1002), .len = cpu_to_le16(48) } +#define WUSB_AR_BandGroups { .id = cpu_to_le16(0x1004), .len = cpu_to_le16(2) } /* CBAF Control Requests (AMS1.0[T4-1] */ enum { -- cgit v1.2.2 From 9ac39f28b5237a629e41ccfc1f73d3a55723045c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 12 Nov 2008 16:19:49 -0500 Subject: USB: add asynchronous autosuspend/autoresume support This patch (as1160b) adds support routines for asynchronous autosuspend and autoresume, with accompanying documentation updates. There already are several potential users of this interface, and others are likely to arise as autosuspend support becomes more widespread. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index f72aa51f7bcd..859a88e6ce9c 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -398,6 +398,7 @@ struct usb_tt; * @urbnum: number of URBs submitted for the whole device * @active_duration: total time device is not suspended * @autosuspend: for delayed autosuspends + * @autoresume: for autoresumes requested while in_interrupt * @pm_mutex: protects PM operations * @last_busy: time of last use * @autosuspend_delay: in jiffies @@ -476,6 +477,7 @@ struct usb_device { #ifdef CONFIG_PM struct delayed_work autosuspend; + struct work_struct autoresume; struct mutex pm_mutex; unsigned long last_busy; @@ -513,6 +515,8 @@ extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id); extern int usb_autopm_set_interface(struct usb_interface *intf); extern int usb_autopm_get_interface(struct usb_interface *intf); extern void usb_autopm_put_interface(struct usb_interface *intf); +extern int usb_autopm_get_interface_async(struct usb_interface *intf); +extern void usb_autopm_put_interface_async(struct usb_interface *intf); static inline void usb_autopm_enable(struct usb_interface *intf) { @@ -539,8 +543,13 @@ static inline int usb_autopm_set_interface(struct usb_interface *intf) static inline int usb_autopm_get_interface(struct usb_interface *intf) { return 0; } +static inline int usb_autopm_get_interface_async(struct usb_interface *intf) +{ return 0; } + static inline void usb_autopm_put_interface(struct usb_interface *intf) { } +static inline void usb_autopm_put_interface_async(struct usb_interface *intf) +{ } static inline void usb_autopm_enable(struct usb_interface *intf) { } static inline void usb_autopm_disable(struct usb_interface *intf) -- cgit v1.2.2 From dc023dceec861c60bc1d1a17a2c6496ddac26ee7 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Thu, 13 Nov 2008 10:31:35 -0800 Subject: USB: Introduce usb_queue_reset() to do resets from atomic contexts This patch introduces a new call to be able to do a USB reset from an atomic contect. This is quite helpful in USB callbacks to handle errors (when the only thing that can be done is to do a device reset). It is done queuing a work struct that will do the actual reset. The struct is "attached" to an interface so pending requests from an interface are removed when said interface is unbound from the driver. The call flow then becomes: usb_queue_reset_device() __usb_queue_reset_device() [workqueue] usb_reset_device() usb_probe_interface() usb_cancel_queue_reset() [error path] usb_unbind_interface() usb_cancel_queue_reset() usb_driver_release_interface() usb_cancel_queue_reset() Note usb_cancel_queue_reset() needs smarts to try not to unqueue when it is actually being executed. This happens when we run the reset from the workqueue: usb_reset_device() is called and on interface unbind time, usb_cancel_queue_reset() would be called. That would deadlock on cancel_work_sync(). To avoid that, we set (before running usb_reset_device()) usb_intf->reset_running and clear it inmediately after returning. Patch is against 2.6.28-rc2 and depends on http://marc.info/?l=linux-usb&m=122581634925308&w=2 (as submitted by Alan Stern). Signed-off-by: Inaky Perez-Gonzalez Cc: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 859a88e6ce9c..c8e55aa979de 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -120,6 +120,11 @@ enum usb_interface_condition { * to the sysfs representation for that device. * @pm_usage_cnt: PM usage counter for this interface; autosuspend is not * allowed unless the counter is 0. + * @reset_ws: Used for scheduling resets from atomic context. + * @reset_running: set to 1 if the interface is currently running a + * queued reset so that usb_cancel_queued_reset() doesn't try to + * remove from the workqueue when running inside the worker + * thread. See __usb_queue_reset_device(). * * USB device drivers attach to interfaces on a physical device. Each * interface encapsulates a single high level function, such as feeding @@ -168,10 +173,12 @@ struct usb_interface { unsigned needs_remote_wakeup:1; /* driver requires remote wakeup */ unsigned needs_altsetting0:1; /* switch to altsetting 0 is pending */ unsigned needs_binding:1; /* needs delayed unbind/rebind */ + unsigned reset_running:1; struct device dev; /* interface specific device info */ struct device *usb_dev; int pm_usage_cnt; /* usage counter for autosuspend */ + struct work_struct reset_ws; /* for resets in atomic context */ }; #define to_usb_interface(d) container_of(d, struct usb_interface, dev) #define interface_to_usbdev(intf) \ @@ -507,6 +514,7 @@ extern int usb_lock_device_for_reset(struct usb_device *udev, /* USB port reset for device reinitialization */ extern int usb_reset_device(struct usb_device *dev); +extern void usb_queue_reset_device(struct usb_interface *dev); extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id); -- cgit v1.2.2 From f150fa1afbf69a87f54752579ff2bb769aad88b3 Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Thu, 13 Nov 2008 21:31:21 -0700 Subject: USB: Allow usbmon as a module even if usbcore is builtin usbmon can only be built as a module if usbcore is a module too. Trivial changes to the relevant Kconfig and Makefile (and a few trivial changes elsewhere) allow usbmon to be built as a module even if usbcore is builtin. This is verified to work in all 9 permutations (3 correctly prohibited by Kconfig, 6 build a suitable result). Signed-off-by: Paul Bolle Signed-off-by: Pete Zaitcev Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index c8e55aa979de..8bc81bffc195 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -336,7 +336,7 @@ struct usb_bus { #endif struct device *dev; /* device for this bus */ -#if defined(CONFIG_USB_MON) +#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE) struct mon_bus *mon_bus; /* non-null when associated */ int monitored; /* non-zero when monitored */ #endif -- cgit v1.2.2 From 1537e0ad944acf3a4c2b311a646d7993b89499f7 Mon Sep 17 00:00:00 2001 From: Ben Efros Date: Tue, 18 Nov 2008 13:31:13 -0800 Subject: USB: storage devices and SAT Add the SANE SENSE flag to indicate that a device is capable of handling more than 18-bytes of sense data. This functionality is required for USB-ATA bridges implementing SAT. A future patch will actually enable this function for several devices. The logic behind this is that we can detect support for SANE_SENSE in a few ways: 1) ATA PASS THROUGH (12) or (16) execute successfully 2) SPC-3 or higher is in use 3) A previous CHECK CONDITION occurred with sense format 70-73 and had a length greater than 18-bytes total Signed-off-by: Ben Efros Signed-off-by: Matthew Dharm Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index d9a3bbe38e6b..998e5cbbf29e 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -52,8 +52,9 @@ US_FLAG(MAX_SECTORS_MIN,0x00002000) \ /* Sets max_sectors to arch min */ \ US_FLAG(BULK_IGNORE_TAG,0x00004000) \ - /* Ignore tag mismatch in bulk operations */ - + /* Ignore tag mismatch in bulk operations */ \ + US_FLAG(SANE_SENSE, 0x00008000) + /* Sane Sense (> 18 bytes) */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.2 From 6084f1bf0c51a99cbba612ee90a4607cffb8b042 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 24 Nov 2008 12:00:01 -0800 Subject: USB: otg: gpio_vbus transceiver stub gpio_vbus provides simple GPIO VBUS sensing for peripheral controllers with an internal transceiver. Optionally, a second GPIO can be used to control D+ pullup. It also interfaces with the regulator framework to limit charging currents when powered via USB. gpio_vbus requests the regulator supplying "vbus_draw" and can enable/disable it or limit its current depending on USB state. [dbrownell@users.sourceforge.net: use drivers/otg, cleanups ] Signed-off-by: Philipp Zabel Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gpio_vbus.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/linux/usb/gpio_vbus.h (limited to 'include/linux') diff --git a/include/linux/usb/gpio_vbus.h b/include/linux/usb/gpio_vbus.h new file mode 100644 index 000000000000..d9f03ccc2d60 --- /dev/null +++ b/include/linux/usb/gpio_vbus.h @@ -0,0 +1,30 @@ +/* + * A simple GPIO VBUS sensing driver for B peripheral only devices + * with internal transceivers. + * Optionally D+ pullup can be controlled by a second GPIO. + * + * Copyright (c) 2008 Philipp Zabel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +/** + * struct gpio_vbus_mach_info - configuration for gpio_vbus + * @gpio_vbus: VBUS sensing GPIO + * @gpio_pullup: optional D+ or D- pullup GPIO (else negative/invalid) + * @gpio_vbus_inverted: true if gpio_vbus is active low + * @gpio_pullup_inverted: true if gpio_pullup is active low + * + * The VBUS sensing GPIO should have a pulldown, which will normally be + * part of a resistor ladder turning a 4.0V-5.25V level on VBUS into a + * value the GPIO detects as active. Some systems will use comparators. + */ +struct gpio_vbus_mach_info { + int gpio_vbus; + int gpio_pullup; + bool gpio_vbus_inverted; + bool gpio_pullup_inverted; +}; -- cgit v1.2.2 From 68144e0cc92125f41157ede7b060f83367bc4fe7 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 24 Nov 2008 12:01:17 -0800 Subject: USB: otg: add otg_put_transceiver() As Russell King points out, calling put_device(otg_transceiver->dev) directly in driver cleanup paths makes assumptions about otg_transceiver internals. Signed-off-by: Philipp Zabel Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/otg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h index 1db25d152ad8..94df4fe6c6c0 100644 --- a/include/linux/usb/otg.h +++ b/include/linux/usb/otg.h @@ -84,6 +84,7 @@ extern int otg_set_transceiver(struct otg_transceiver *); /* for usb host and peripheral controller drivers */ extern struct otg_transceiver *otg_get_transceiver(void); +extern void otg_put_transceiver(struct otg_transceiver *); static inline int otg_start_hnp(struct otg_transceiver *otg) -- cgit v1.2.2 From 65bfd2967c906ca322a4bb69a285fe0de8916ac6 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 25 Nov 2008 16:39:18 -0500 Subject: USB: Enhance usage of pm_message_t This patch (as1177) modifies the USB core suspend and resume routines. The resume functions now will take a pm_message_t argument, so they will know what sort of resume is occurring. The new argument is also passed to the port suspend/resume and bus suspend/resume routines (although they don't use it for anything but debugging). In addition, special pm_message_t values are used for user-initiated, device-initiated (i.e., remote wakeup), and automatic suspend/resume. By testing these values, drivers can tell whether or not a particular suspend was an autosuspend. Unfortunately, they can't do the same for resumes -- not until the pm_message_t argument is also passed to the drivers' resume methods. That will require a bigger change. IMO, the whole Power Management framework should have been set up this way in the first place. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 8bc81bffc195..74d0b9990c73 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1067,7 +1067,7 @@ struct usb_device_driver { void (*disconnect) (struct usb_device *udev); int (*suspend) (struct usb_device *udev, pm_message_t message); - int (*resume) (struct usb_device *udev); + int (*resume) (struct usb_device *udev, pm_message_t message); struct usbdrv_wrap drvwrap; unsigned int supports_autosuspend:1; }; -- cgit v1.2.2 From 2ffcdb3bdadaf8260986e96384df26c94a6ad42c Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Tue, 2 Dec 2008 21:33:43 +0200 Subject: USB: musb: use new platform data interface of musb to replace old one Signed-off-by: Bryan Wu Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/musb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index 630962c04ca4..d6aad0ea6033 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -47,6 +47,11 @@ struct musb_hdrc_config { u8 ram_bits; /* ram address size */ struct musb_hdrc_eps_bits *eps_bits; +#ifdef CONFIG_BLACKFIN + /* A GPIO controlling VRSEL in Blackfin */ + unsigned int gpio_vrsel; +#endif + }; struct musb_hdrc_platform_data { -- cgit v1.2.2 From 3b23dd6f8a718e5339de4f7d86ce76a078b5f771 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 5 Dec 2008 14:10:34 -0500 Subject: USB: utilize the bus notifiers This patch (as1185) makes usbcore take advantage of the bus notifications sent out by the driver core. Now we can create all our device and interface attribute files before the device or interface uevent is broadcast. A side effect is that we no longer create the endpoint "pseudo" devices at the same time as a device or interface is registered -- it seems like a bad idea to try registering an endpoint before the registration of its parent is complete. So the routines for creating and removing endpoint devices have been split out and renamed, and they are called explicitly when needed. A new bitflag is used for keeping track of whether or not the interface's endpoint devices have been created, since (just as with the interface attributes) they vary with the altsetting and hence can be changed at random times. Signed-off-by: Alan Stern Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 74d0b9990c73..e9d63562325a 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -108,6 +108,7 @@ enum usb_interface_condition { * (in probe()), bound to a driver, or unbinding (in disconnect()) * @is_active: flag set when the interface is bound and not suspended. * @sysfs_files_created: sysfs attributes exist + * @ep_devs_created: endpoint child pseudo-devices exist * @unregistering: flag set when the interface is being unregistered * @needs_remote_wakeup: flag set when the driver requires remote-wakeup * capability during autosuspend. @@ -169,6 +170,7 @@ struct usb_interface { enum usb_interface_condition condition; /* state of binding */ unsigned is_active:1; /* the interface is not suspended */ unsigned sysfs_files_created:1; /* the sysfs attributes exist */ + unsigned ep_devs_created:1; /* endpoint "devices" exist */ unsigned unregistering:1; /* unregistration is in progress */ unsigned needs_remote_wakeup:1; /* driver requires remote wakeup */ unsigned needs_altsetting0:1; /* switch to altsetting 0 is pending */ -- cgit v1.2.2 From 49367d8f1d9f26482cf7089489e90f0afd0a942c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 12 Dec 2008 21:38:45 +0800 Subject: USB: mark "reject" field of struct urb as atomic_t It is enough to protect accesses to reject field of urb by marking it as atomic_t,also it is the only reason of existence of usb_reject_lock,so remove the lock to make code more clean. Signed-off-by: Ming Lei Acked-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index e9d63562325a..4e8654a18250 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1340,7 +1340,7 @@ struct urb { struct kref kref; /* reference count of the URB */ void *hcpriv; /* private data for host controller */ atomic_t use_count; /* concurrent submissions counter */ - u8 reject; /* submissions will fail */ + atomic_t reject; /* submissions will fail */ int unlinked; /* unlink error code */ /* public: documented fields in the urb that can be used by drivers */ -- cgit v1.2.2 From 856395d6e137b4e7194972cb7765f3de6a72ba61 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 18 Dec 2008 09:17:49 +0100 Subject: USB: extension of anchor API to unpoison an anchor This extension allows unpoisoning an anchor allowing drivers that resubmit URBs to reuse an anchor for methods like resume() Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 4e8654a18250..e89639896508 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1485,6 +1485,7 @@ extern void usb_poison_urb(struct urb *urb); extern void usb_unpoison_urb(struct urb *urb); extern void usb_kill_anchored_urbs(struct usb_anchor *anchor); extern void usb_poison_anchored_urbs(struct usb_anchor *anchor); +extern void usb_unpoison_anchored_urbs(struct usb_anchor *anchor); extern void usb_unlink_anchored_urbs(struct usb_anchor *anchor); extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor); extern void usb_unanchor_urb(struct urb *urb); -- cgit v1.2.2 From 25ff1c316f6a763f1eefe7f8984b2d8c03888432 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Dec 2008 12:43:41 -0500 Subject: USB: storage: add last-sector hacks This patch (as1189b) adds some hacks to usb-storage for dealing with the growing problems involving bad capacity values and last-sector accesses: A new flag, US_FL_CAPACITY_OK, is created to indicate that the device is known to report its capacity correctly. An unusual_devs entry for Linux's own File-backed Storage Gadget is added with this flag set, since g_file_storage always reports the correct capacity and since the capacity need not be even (it is determined by the size of the backing file). An entry in unusual_devs.h which has only the CAPACITY_OK flag set shouldn't prejudice libusual, since the device will work perfectly well with either usb-storage or ub. So a new macro, COMPLIANT_DEV, is added to let libusual know about these entries. When a last-sector access succeeds and the total number of sectors is odd (the unexpected case, in which guessing that the number is even might cause trouble), a WARN is triggered. The kerneloops.org project will collect these warnings, allowing us to add CAPACITY_OK flags for the devices in question before implementing the default-to-even heuristic. If users want to prevent the stack dump produced by the WARN, they can disable the hack by adding an unusual_devs entry for their device with the CAPACITY_OK flag. When a last-sector access fails three times in a row and neither the FIX_CAPACITY nor the CAPACITY_OK flag is set, we assume the last-sector bug is present. We replace the existing status and sense data with values that will cause the SCSI core to fail the access immediately rather than retry indefinitely. This should fix the difficulties people have been having with Nokia phones. Signed-off-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 998e5cbbf29e..1eea1ab68dc4 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -53,8 +53,10 @@ /* Sets max_sectors to arch min */ \ US_FLAG(BULK_IGNORE_TAG,0x00004000) \ /* Ignore tag mismatch in bulk operations */ \ - US_FLAG(SANE_SENSE, 0x00008000) - /* Sane Sense (> 18 bytes) */ + US_FLAG(SANE_SENSE, 0x00008000) \ + /* Sane Sense (> 18 bytes) */ \ + US_FLAG(CAPACITY_OK, 0x00010000) \ + /* READ CAPACITY response is correct */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.2 From 338b67b0c1a97ca705023a8189cf41aa0828d294 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 14 Aug 2008 09:37:34 -0700 Subject: USB: remove warn() macro from usb.h USB should not be having it's own printk macros, so remove warn() and use the system-wide standard of dev_warn() wherever possible. In the few places that will not work out, use a basic printk(). Now that all in-tree users are gone, remove the macro. Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index e89639896508..28f68f587793 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1744,8 +1744,6 @@ extern void usb_unregister_notify(struct notifier_block *nb); format "\n" , ## arg) #define info(format, arg...) printk(KERN_INFO KBUILD_MODNAME ": " \ format "\n" , ## arg) -#define warn(format, arg...) printk(KERN_WARNING KBUILD_MODNAME ": " \ - format "\n" , ## arg) #endif /* __KERNEL__ */ -- cgit v1.2.2 From 34c65d82e02147331701c7795e3144d511adf4e9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 18 Aug 2008 13:21:04 -0700 Subject: USB: remove info() macro from usb.h USB should not be having it's own printk macros, so remove info() and use the system-wide standard of dev_info() wherever possible. No one in the tree is using the macro, so it can now be removed. Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 28f68f587793..85ee9be9361e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1742,8 +1742,6 @@ extern void usb_unregister_notify(struct notifier_block *nb); #define err(format, arg...) printk(KERN_ERR KBUILD_MODNAME ": " \ format "\n" , ## arg) -#define info(format, arg...) printk(KERN_INFO KBUILD_MODNAME ": " \ - format "\n" , ## arg) #endif /* __KERNEL__ */ -- cgit v1.2.2 From 5e07878787ad07361571150230cc3a8d522ae046 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:39 -0800 Subject: debugfs: add helpers for exporting a size_t simple value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the same spirit as debugfs_create_*(), introduce helpers for exporting size_t values over debugfs. The only trick done is that the format verifier is kept at %llu instead of %zu; otherwise type warnings would pop up: format ‘%zu’ expects type ‘size_t’, but argument 2 has type ‘long long unsigned int’ There is no real way to fix this one--however, we can consider %llu and %zu to be compatible if we consider that we are using the same for validating in debugfs_create_{x,u}{8,16,32}(). Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index e1a6c046cea3..23936b16426b 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -63,6 +63,8 @@ struct dentry *debugfs_create_x16(const char *name, mode_t mode, struct dentry *parent, u16 *value); struct dentry *debugfs_create_x32(const char *name, mode_t mode, struct dentry *parent, u32 *value); +struct dentry *debugfs_create_size_t(const char *name, mode_t mode, + struct dentry *parent, size_t *value); struct dentry *debugfs_create_bool(const char *name, mode_t mode, struct dentry *parent, u32 *value); -- cgit v1.2.2 From ace22f0881e1333d0c55ddf484e5352fe03a806a Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:33 -0800 Subject: wimax: headers for kernel API and user space interaction Definitions for the user/kernel API protocol through generic netlink. User space can copy it verbatim and use it. Kernel API definition declares the main data types and calls for the drivers to integrate into the WiMAX stack. Provides usage documentation. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/wimax.h | 234 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 include/linux/wimax.h (limited to 'include/linux') diff --git a/include/linux/wimax.h b/include/linux/wimax.h new file mode 100644 index 000000000000..c89de7f4e5b9 --- /dev/null +++ b/include/linux/wimax.h @@ -0,0 +1,234 @@ +/* + * Linux WiMax + * API for user space + * + * + * Copyright (C) 2007-2008 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Intel Corporation + * Inaky Perez-Gonzalez + * - Initial implementation + * + * + * This file declares the user/kernel protocol that is spoken over + * Generic Netlink, as well as any type declaration that is to be used + * by kernel and user space. + * + * It is intended for user space to clone it verbatim to use it as a + * primary reference for definitions. + * + * Stuff intended for kernel usage as well as full protocol and stack + * documentation is rooted in include/net/wimax.h. + */ + +#ifndef __LINUX__WIMAX_H__ +#define __LINUX__WIMAX_H__ + +#include + +enum { + /** + * Version of the interface (unsigned decimal, MMm, max 25.5) + * M - Major: change if removing or modifying an existing call. + * m - minor: change when adding a new call + */ + WIMAX_GNL_VERSION = 00, + /* Generic NetLink attributes */ + WIMAX_GNL_ATTR_INVALID = 0x00, + WIMAX_GNL_ATTR_MAX = 10, +}; + + +/* + * Generic NetLink operations + * + * Most of these map to an API call; _OP_ stands for operation, _RP_ + * for reply and _RE_ for report (aka: signal). + */ +enum { + WIMAX_GNL_OP_MSG_FROM_USER, /* User to kernel message */ + WIMAX_GNL_OP_MSG_TO_USER, /* Kernel to user message */ + WIMAX_GNL_OP_RFKILL, /* Run wimax_rfkill() */ + WIMAX_GNL_OP_RESET, /* Run wimax_rfkill() */ + WIMAX_GNL_RE_STATE_CHANGE, /* Report: status change */ +}; + + +/* Message from user / to user */ +enum { + WIMAX_GNL_MSG_IFIDX = 1, + WIMAX_GNL_MSG_PIPE_NAME, + WIMAX_GNL_MSG_DATA, +}; + + +/* + * wimax_rfkill() + * + * The state of the radio (ON/OFF) is mapped to the rfkill subsystem's + * switch state (DISABLED/ENABLED). + */ +enum wimax_rf_state { + WIMAX_RF_OFF = 0, /* Radio is off, rfkill on/enabled */ + WIMAX_RF_ON = 1, /* Radio is on, rfkill off/disabled */ + WIMAX_RF_QUERY = 2, +}; + +/* Attributes */ +enum { + WIMAX_GNL_RFKILL_IFIDX = 1, + WIMAX_GNL_RFKILL_STATE, +}; + + +/* Attributes for wimax_reset() */ +enum { + WIMAX_GNL_RESET_IFIDX = 1, +}; + + +/* + * Attributes for the Report State Change + * + * For now we just have the old and new states; new attributes might + * be added later on. + */ +enum { + WIMAX_GNL_STCH_IFIDX = 1, + WIMAX_GNL_STCH_STATE_OLD, + WIMAX_GNL_STCH_STATE_NEW, +}; + + +/** + * enum wimax_st - The different states of a WiMAX device + * @__WIMAX_ST_NULL: The device structure has been allocated and zeroed, + * but still wimax_dev_add() hasn't been called. There is no state. + * + * @WIMAX_ST_DOWN: The device has been registered with the WiMAX and + * networking stacks, but it is not initialized (normally that is + * done with 'ifconfig DEV up' [or equivalent], which can upload + * firmware and enable communications with the device). + * In this state, the device is powered down and using as less + * power as possible. + * This state is the default after a call to wimax_dev_add(). It + * is ok to have drivers move directly to %WIMAX_ST_UNINITIALIZED + * or %WIMAX_ST_RADIO_OFF in _probe() after the call to + * wimax_dev_add(). + * It is recommended that the driver leaves this state when + * calling 'ifconfig DEV up' and enters it back on 'ifconfig DEV + * down'. + * + * @__WIMAX_ST_QUIESCING: The device is being torn down, so no API + * operations are allowed to proceed except the ones needed to + * complete the device clean up process. + * + * @WIMAX_ST_UNINITIALIZED: [optional] Communication with the device + * is setup, but the device still requires some configuration + * before being operational. + * Some WiMAX API calls might work. + * + * @WIMAX_ST_RADIO_OFF: The device is fully up; radio is off (wether + * by hardware or software switches). + * It is recommended to always leave the device in this state + * after initialization. + * + * @WIMAX_ST_READY: The device is fully up and radio is on. + * + * @WIMAX_ST_SCANNING: [optional] The device has been instructed to + * scan. In this state, the device cannot be actively connected to + * a network. + * + * @WIMAX_ST_CONNECTING: The device is connecting to a network. This + * state exists because in some devices, the connect process can + * include a number of negotiations between user space, kernel + * space and the device. User space needs to know what the device + * is doing. If the connect sequence in a device is atomic and + * fast, the device can transition directly to CONNECTED + * + * @WIMAX_ST_CONNECTED: The device is connected to a network. + * + * @__WIMAX_ST_INVALID: This is an invalid state used to mark the + * maximum numeric value of states. + * + * Description: + * + * Transitions from one state to another one are atomic and can only + * be caused in kernel space with wimax_state_change(). To read the + * state, use wimax_state_get(). + * + * States starting with __ are internal and shall not be used or + * referred to by drivers or userspace. They look ugly, but that's the + * point -- if any use is made non-internal to the stack, it is easier + * to catch on review. + * + * All API operations [with well defined exceptions] will take the + * device mutex before starting and then check the state. If the state + * is %__WIMAX_ST_NULL, %WIMAX_ST_DOWN, %WIMAX_ST_UNINITIALIZED or + * %__WIMAX_ST_QUIESCING, it will drop the lock and quit with + * -%EINVAL, -%ENOMEDIUM, -%ENOTCONN or -%ESHUTDOWN. + * + * The order of the definitions is important, so we can do numerical + * comparisons (eg: < %WIMAX_ST_RADIO_OFF means the device is not ready + * to operate). + */ +/* + * The allowed state transitions are described in the table below + * (states in rows can go to states in columns where there is an X): + * + * UNINI RADIO READY SCAN CONNEC CONNEC + * NULL DOWN QUIESCING TIALIZED OFF NING TING TED + * NULL - x + * DOWN - x x x + * QUIESCING x - + * UNINITIALIZED x - x + * RADIO_OFF x - x + * READY x x - x x x + * SCANNING x x x - x x + * CONNECTING x x x x - x + * CONNECTED x x x - + * + * This table not available in kernel-doc because the formatting messes it up. + */ + enum wimax_st { + __WIMAX_ST_NULL = 0, + WIMAX_ST_DOWN, + __WIMAX_ST_QUIESCING, + WIMAX_ST_UNINITIALIZED, + WIMAX_ST_RADIO_OFF, + WIMAX_ST_READY, + WIMAX_ST_SCANNING, + WIMAX_ST_CONNECTING, + WIMAX_ST_CONNECTED, + __WIMAX_ST_INVALID /* Always keep last */ +}; + + +#endif /* #ifndef __LINUX__WIMAX_H__ */ -- cgit v1.2.2 From ea912f4e7f264981faf8665cfb63d46d7f948117 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:35 -0800 Subject: wimax: debug macros and debug settings for the WiMAX stack This file contains a simple debug framework that is used in the stack; it allows the debug level to be controlled at compile-time (so the debug code is optimized out) and at run-time (for what wasn't compiled out). This is eventually going to be moved to use dynamic_printk(). Just need to find time to do it. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/wimax/debug.h | 453 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 453 insertions(+) create mode 100644 include/linux/wimax/debug.h (limited to 'include/linux') diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h new file mode 100644 index 000000000000..ba0c49399a83 --- /dev/null +++ b/include/linux/wimax/debug.h @@ -0,0 +1,453 @@ +/* + * Linux WiMAX + * Collection of tools to manage debug operations. + * + * + * Copyright (C) 2005-2007 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * Don't #include this file directly, read on! + * + * + * EXECUTING DEBUGGING ACTIONS OR NOT + * + * The main thing this framework provides is decission power to take a + * debug action (like printing a message) if the current debug level + * allows it. + * + * The decission power is at two levels: at compile-time (what does + * not make it is compiled out) and at run-time. The run-time + * selection is done per-submodule (as they are declared by the user + * of the framework). + * + * A call to d_test(L) (L being the target debug level) returns true + * if the action should be taken because the current debug levels + * allow it (both compile and run time). + * + * It follows that a call to d_test() that can be determined to be + * always false at compile time will get the code depending on it + * compiled out by optimization. + * + * + * DEBUG LEVELS + * + * It is up to the caller to define how much a debugging level is. + * + * Convention sets 0 as "no debug" (so an action marked as debug level 0 + * will always be taken). The increasing debug levels are used for + * increased verbosity. + * + * + * USAGE + * + * Group the code in modules and submodules inside each module [which + * in most cases maps to Linux modules and .c files that compose + * those]. + * + * + * For each module, there is: + * + * - a MODULENAME (single word, legal C identifier) + * + * - a debug-levels.h header file that declares the list of + * submodules and that is included by all .c files that use + * the debugging tools. The file name can be anything. + * + * - some (optional) .c code to manipulate the runtime debug levels + * through debugfs. + * + * The debug-levels.h file would look like: + * + * #ifndef __debug_levels__h__ + * #define __debug_levels__h__ + * + * #define D_MODULENAME modulename + * #define D_MASTER 10 + * + * #include + * + * enum d_module { + * D_SUBMODULE_DECLARE(submodule_1), + * D_SUBMODULE_DECLARE(submodule_2), + * ... + * D_SUBMODULE_DECLARE(submodule_N) + * }; + * + * #endif + * + * D_MASTER is the maximum compile-time debug level; any debug actions + * above this will be out. D_MODULENAME is the module name (legal C + * identifier), which has to be unique for each module (to avoid + * namespace collisions during linkage). Note those #defines need to + * be done before #including debug.h + * + * We declare N different submodules whose debug level can be + * independently controlled during runtime. + * + * In a .c file of the module (and only in one of them), define the + * following code: + * + * struct d_level D_LEVEL[] = { + * D_SUBMODULE_DEFINE(submodule_1), + * D_SUBMODULE_DEFINE(submodule_2), + * ... + * D_SUBMODULE_DEFINE(submodule_N), + * }; + * size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); + * + * Externs for d_level_MODULENAME and d_level_size_MODULENAME are used + * and declared in this file using the D_LEVEL and D_LEVEL_SIZE macros + * #defined also in this file. + * + * To manipulate from user space the levels, create a debugfs dentry + * and then register each submodule with: + * + * result = d_level_register_debugfs("PREFIX_", submodule_X, parent); + * if (result < 0) + * goto error; + * + * Where PREFIX_ is a name of your chosing. This will create debugfs + * file with a single numeric value that can be use to tweak it. To + * remove the entires, just use debugfs_remove_recursive() on 'parent'. + * + * NOTE: remember that even if this will show attached to some + * particular instance of a device, the settings are *global*. + * + * + * On each submodule (for example, .c files), the debug infrastructure + * should be included like this: + * + * #define D_SUBMODULE submodule_x // matches one in debug-levels.h + * #include "debug-levels.h" + * + * after #including all your include files. + * + * + * Now you can use the d_*() macros below [d_test(), d_fnstart(), + * d_fnend(), d_printf(), d_dump()]. + * + * If their debug level is greater than D_MASTER, they will be + * compiled out. + * + * If their debug level is lower or equal than D_MASTER but greater + * than the current debug level of their submodule, they'll be + * ignored. + * + * Otherwise, the action will be performed. + */ +#ifndef __debug__h__ +#define __debug__h__ + +#include +#include + + +/* Backend stuff */ + +/* + * Debug backend: generate a message header from a 'struct device' + * + * @head: buffer where to place the header + * @head_size: length of @head + * @dev: pointer to device used to generate a header from. If NULL, + * an empty ("") header is generated. + */ +static inline +void __d_head(char *head, size_t head_size, + struct device *dev) +{ + if (dev == NULL) + head[0] = 0; + else if ((unsigned long)dev < 4096) { + printk(KERN_ERR "E: Corrupt dev %p\n", dev); + WARN_ON(1); + } else + snprintf(head, head_size, "%s %s: ", + dev_driver_string(dev), dev->bus_id); +} + + +/* + * Debug backend: log some message if debugging is enabled + * + * @l: intended debug level + * @tag: tag to prefix the message with + * @dev: 'struct device' associated to this message + * @f: printf-like format and arguments + * + * Note this is optimized out if it doesn't pass the compile-time + * check; however, it is *always* compiled. This is useful to make + * sure the printf-like formats and variables are always checked and + * they don't get bit rot if you have all the debugging disabled. + */ +#define _d_printf(l, tag, dev, f, a...) \ +do { \ + char head[64]; \ + if (!d_test(l)) \ + break; \ + __d_head(head, sizeof(head), dev); \ + printk(KERN_ERR "%s%s%s: " f, head, __func__, tag, ##a); \ +} while (0) + + +/* + * CPP sintatic sugar to generate A_B like symbol names when one of + * the arguments is a a preprocessor #define. + */ +#define __D_PASTE__(varname, modulename) varname##_##modulename +#define __D_PASTE(varname, modulename) (__D_PASTE__(varname, modulename)) +#define _D_SUBMODULE_INDEX(_name) (D_SUBMODULE_DECLARE(_name)) + + +/* + * Store a submodule's runtime debug level and name + */ +struct d_level { + u8 level; + const char *name; +}; + + +/* + * List of available submodules and their debug levels + * + * We call them d_level_MODULENAME and d_level_size_MODULENAME; the + * macros D_LEVEL and D_LEVEL_SIZE contain the name already for + * convenience. + * + * This array and the size are defined on some .c file that is part of + * the current module. + */ +#define D_LEVEL __D_PASTE(d_level, D_MODULENAME) +#define D_LEVEL_SIZE __D_PASTE(d_level_size, D_MODULENAME) + +extern struct d_level D_LEVEL[]; +extern size_t D_LEVEL_SIZE; + + +/* + * Frontend stuff + * + * + * Stuff you need to declare prior to using the actual "debug" actions + * (defined below). + */ + +#ifndef D_MODULENAME +#error D_MODULENAME is not defined in your debug-levels.h file +/** + * D_MODULE - Name of the current module + * + * #define in your module's debug-levels.h, making sure it is + * unique. This has to be a legal C identifier. + */ +#define D_MODULENAME undefined_modulename +#endif + + +#ifndef D_MASTER +#warning D_MASTER not defined, but debug.h included! [see docs] +/** + * D_MASTER - Compile time maximum debug level + * + * #define in your debug-levels.h file to the maximum debug level the + * runtime code will be allowed to have. This allows you to provide a + * main knob. + * + * Anything above that level will be optimized out of the compile. + * + * Defaults to zero (no debug code compiled in). + * + * Maximum one definition per module (at the debug-levels.h file). + */ +#define D_MASTER 0 +#endif + +#ifndef D_SUBMODULE +#error D_SUBMODULE not defined, but debug.h included! [see docs] +/** + * D_SUBMODULE - Name of the current submodule + * + * #define in your submodule .c file before #including debug-levels.h + * to the name of the current submodule as previously declared and + * defined with D_SUBMODULE_DECLARE() (in your module's + * debug-levels.h) and D_SUBMODULE_DEFINE(). + * + * This is used to provide runtime-control over the debug levels. + * + * Maximum one per .c file! Can be shared among different .c files + * (meaning they belong to the same submodule categorization). + */ +#define D_SUBMODULE undefined_module +#endif + + +/** + * D_SUBMODULE_DECLARE - Declare a submodule for runtime debug level control + * + * @_name: name of the submodule, restricted to the chars that make up a + * valid C identifier ([a-zA-Z0-9_]). + * + * Declare in the module's debug-levels.h header file as: + * + * enum d_module { + * D_SUBMODULE_DECLARE(submodule_1), + * D_SUBMODULE_DECLARE(submodule_2), + * D_SUBMODULE_DECLARE(submodule_3), + * }; + * + * Some corresponding .c file needs to have a matching + * D_SUBMODULE_DEFINE(). + */ +#define D_SUBMODULE_DECLARE(_name) __D_SUBMODULE_##_name + + +/** + * D_SUBMODULE_DEFINE - Define a submodule for runtime debug level control + * + * @_name: name of the submodule, restricted to the chars that make up a + * valid C identifier ([a-zA-Z0-9_]). + * + * Use once per module (in some .c file) as: + * + * static + * struct d_level d_level_SUBMODULENAME[] = { + * D_SUBMODULE_DEFINE(submodule_1), + * D_SUBMODULE_DEFINE(submodule_2), + * D_SUBMODULE_DEFINE(submodule_3), + * }; + * size_t d_level_size_SUBDMODULENAME = ARRAY_SIZE(d_level_SUBDMODULENAME); + * + * Matching D_SUBMODULE_DECLARE()s have to be present in a + * debug-levels.h header file. + */ +#define D_SUBMODULE_DEFINE(_name) \ +[__D_SUBMODULE_##_name] = { \ + .level = 0, \ + .name = #_name \ +} + + + +/* The actual "debug" operations */ + + +/** + * d_test - Returns true if debugging should be enabled + * + * @l: intended debug level (unsigned) + * + * If the master debug switch is enabled and the current settings are + * higher or equal to the requested level, then debugging + * output/actions should be enabled. + * + * NOTE: + * + * This needs to be coded so that it can be evaluated in compile + * time; this is why the ugly BUG_ON() is placed in there, so the + * D_MASTER evaluation compiles all out if it is compile-time false. + */ +#define d_test(l) \ +({ \ + unsigned __l = l; /* type enforcer */ \ + (D_MASTER) >= __l \ + && ({ \ + BUG_ON(_D_SUBMODULE_INDEX(D_SUBMODULE) >= D_LEVEL_SIZE);\ + D_LEVEL[_D_SUBMODULE_INDEX(D_SUBMODULE)].level >= __l; \ + }); \ +}) + + +/** + * d_fnstart - log message at function start if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_fnstart(l, _dev, f, a...) _d_printf(l, " FNSTART", _dev, f, ## a) + + +/** + * d_fnend - log message at function end if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_fnend(l, _dev, f, a...) _d_printf(l, " FNEND", _dev, f, ## a) + + +/** + * d_printf - log message if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_printf(l, _dev, f, a...) _d_printf(l, "", _dev, f, ## a) + + +/** + * d_dump - log buffer hex dump if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_dump(l, dev, ptr, size) \ +do { \ + char head[64]; \ + if (!d_test(l)) \ + break; \ + __d_head(head, sizeof(head), dev); \ + print_hex_dump(KERN_ERR, head, 0, 16, 1, \ + ((void *) ptr), (size), 0); \ +} while (0) + + +/** + * Export a submodule's debug level over debugfs as PREFIXSUBMODULE + * + * @prefix: string to prefix the name with + * @submodule: name of submodule (not a string, just the name) + * @dentry: debugfs parent dentry + * + * Returns: 0 if ok, < 0 errno on error. + * + * For removing, just use debugfs_remove_recursive() on the parent. + */ +#define d_level_register_debugfs(prefix, name, parent) \ +({ \ + int rc; \ + struct dentry *fd; \ + struct dentry *verify_parent_type = parent; \ + fd = debugfs_create_u8( \ + prefix #name, 0600, verify_parent_type, \ + &(D_LEVEL[__D_SUBMODULE_ ## name].level)); \ + rc = PTR_ERR(fd); \ + if (IS_ERR(fd) && rc != -ENODEV) \ + printk(KERN_ERR "%s: Can't create debugfs entry %s: " \ + "%d\n", __func__, prefix #name, rc); \ + else \ + rc = 0; \ + rc; \ +}) + + +#endif /* #ifndef __debug__h__ */ -- cgit v1.2.2 From ea24652d253eabfb83e955e55ce032228d9d99b9 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:43 -0800 Subject: i2400m: host/device procotol and core driver definitions The wimax/i2400m.h defines the structures and constants for the host-device protocols: - boot / firmware upload protocol - general data transport protocol - control protocol It is done in such a way that can also be used verbatim by user space. drivers/net/wimax/i2400m.h defines all the APIs used by the core, bus-generic driver (i2400m) and the bus specific drivers (i2400m-BUSNAME). It also gives a roadmap to the driver implementation. debug-levels.h adds the core driver's debug settings. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/wimax/i2400m.h | 512 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 512 insertions(+) create mode 100644 include/linux/wimax/i2400m.h (limited to 'include/linux') diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h new file mode 100644 index 000000000000..74198f5bb4dc --- /dev/null +++ b/include/linux/wimax/i2400m.h @@ -0,0 +1,512 @@ +/* + * Intel Wireless WiMax Connection 2400m + * Host-Device protocol interface definitions + * + * + * Copyright (C) 2007-2008 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Intel Corporation + * Inaky Perez-Gonzalez + * - Initial implementation + * + * + * This header defines the data structures and constants used to + * communicate with the device. + * + * BOOTMODE/BOOTROM/FIRMWARE UPLOAD PROTOCOL + * + * The firmware upload protocol is quite simple and only requires a + * handful of commands. See drivers/net/wimax/i2400m/fw.c for more + * details. + * + * The BCF data structure is for the firmware file header. + * + * + * THE DATA / CONTROL PROTOCOL + * + * This is the normal protocol spoken with the device once the + * firmware is uploaded. It transports data payloads and control + * messages back and forth. + * + * It consists 'messages' that pack one or more payloads each. The + * format is described in detail in drivers/net/wimax/i2400m/rx.c and + * tx.c. + * + * + * THE L3L4 PROTOCOL + * + * The term L3L4 refers to Layer 3 (the device), Layer 4 (the + * driver/host software). + * + * This is the control protocol used by the host to control the i2400m + * device (scan, connect, disconnect...). This is sent to / received + * as control frames. These frames consist of a header and zero or + * more TLVs with information. We call each control frame a "message". + * + * Each message is composed of: + * + * HEADER + * [TLV0 + PAYLOAD0] + * [TLV1 + PAYLOAD1] + * [...] + * [TLVN + PAYLOADN] + * + * The HEADER is defined by 'struct i2400m_l3l4_hdr'. The payloads are + * defined by a TLV structure (Type Length Value) which is a 'header' + * (struct i2400m_tlv_hdr) and then the payload. + * + * All integers are represented as Little Endian. + * + * - REQUESTS AND EVENTS + * + * The requests can be clasified as follows: + * + * COMMAND: implies a request from the host to the device requesting + * an action being performed. The device will reply with a + * message (with the same type as the command), status and + * no (TLV) payload. Execution of a command might cause + * events (of different type) to be sent later on as + * device's state changes. + * + * GET/SET: similar to COMMAND, but will not cause other + * EVENTs. The reply, in the case of GET, will contain + * TLVs with the requested information. + * + * EVENT: asynchronous messages sent from the device, maybe as a + * consequence of previous COMMANDs but disassociated from + * them. + * + * Only one request might be pending at the same time (ie: don't + * parallelize nor post another GET request before the previous + * COMMAND has been acknowledged with it's corresponding reply by the + * device). + * + * The different requests and their formats are described below: + * + * I2400M_MT_* Message types + * I2400M_MS_* Message status (for replies, events) + * i2400m_tlv_* TLVs + * + * data types are named 'struct i2400m_msg_OPNAME', OPNAME matching the + * operation. + */ + +#ifndef __LINUX__WIMAX__I2400M_H__ +#define __LINUX__WIMAX__I2400M_H__ + +#include + + +/* + * Host Device Interface (HDI) common to all busses + */ + +/* Boot-mode (firmware upload mode) commands */ + +/* Header for the firmware file */ +struct i2400m_bcf_hdr { + __le32 module_type; + __le32 header_len; + __le32 header_version; + __le32 module_id; + __le32 module_vendor; + __le32 date; /* BCD YYYMMDD */ + __le32 size; + __le32 key_size; /* in dwords */ + __le32 modulus_size; /* in dwords */ + __le32 exponent_size; /* in dwords */ + __u8 reserved[88]; +} __attribute__ ((packed)); + +/* Boot mode opcodes */ +enum i2400m_brh_opcode { + I2400M_BRH_READ = 1, + I2400M_BRH_WRITE = 2, + I2400M_BRH_JUMP = 3, + I2400M_BRH_SIGNED_JUMP = 8, + I2400M_BRH_HASH_PAYLOAD_ONLY = 9, +}; + +/* Boot mode command masks and stuff */ +enum i2400m_brh { + I2400M_BRH_SIGNATURE = 0xcbbc0000, + I2400M_BRH_SIGNATURE_MASK = 0xffff0000, + I2400M_BRH_SIGNATURE_SHIFT = 16, + I2400M_BRH_OPCODE_MASK = 0x0000000f, + I2400M_BRH_RESPONSE_MASK = 0x000000f0, + I2400M_BRH_RESPONSE_SHIFT = 4, + I2400M_BRH_DIRECT_ACCESS = 0x00000400, + I2400M_BRH_RESPONSE_REQUIRED = 0x00000200, + I2400M_BRH_USE_CHECKSUM = 0x00000100, +}; + + +/* Constants for bcf->module_id */ +enum i2400m_bcf_mod_id { + /* Firmware file carries its own pokes -- pokes are a set of + * magical values that have to be written in certain memory + * addresses to get the device up and ready for firmware + * download when it is in non-signed boot mode. */ + I2400M_BCF_MOD_ID_POKES = 0x000000001, +}; + + +/** + * i2400m_bootrom_header - Header for a boot-mode command + * + * @cmd: the above command descriptor + * @target_addr: where on the device memory should the action be performed. + * @data_size: for read/write, amount of data to be read/written + * @block_checksum: checksum value (if applicable) + * @payload: the beginning of data attached to this header + */ +struct i2400m_bootrom_header { + __le32 command; /* Compose with enum i2400_brh */ + __le32 target_addr; + __le32 data_size; + __le32 block_checksum; + char payload[0]; +} __attribute__ ((packed)); + + +/* + * Data / control protocol + */ + +/* Packet types for the host-device interface */ +enum i2400m_pt { + I2400M_PT_DATA = 0, + I2400M_PT_CTRL, + I2400M_PT_TRACE, /* For device debug */ + I2400M_PT_RESET_WARM, /* device reset */ + I2400M_PT_RESET_COLD, /* USB[transport] reset, like reconnect */ + I2400M_PT_ILLEGAL +}; + + +/* + * Payload for a data packet + * + * This is prefixed to each and every outgoing DATA type. + */ +struct i2400m_pl_data_hdr { + __le32 reserved; +} __attribute__((packed)); + + +/* Misc constants */ +enum { + I2400M_PL_PAD = 16, /* Payload data size alignment */ + I2400M_PL_SIZE_MAX = 0x3EFF, + I2400M_MAX_PLS_IN_MSG = 60, + /* protocol barkers: sync sequences; for notifications they + * are sent in groups of four. */ + I2400M_H2D_PREVIEW_BARKER = 0xcafe900d, + I2400M_COLD_RESET_BARKER = 0xc01dc01d, + I2400M_WARM_RESET_BARKER = 0x50f750f7, + I2400M_NBOOT_BARKER = 0xdeadbeef, + I2400M_SBOOT_BARKER = 0x0ff1c1a1, + I2400M_ACK_BARKER = 0xfeedbabe, + I2400M_D2H_MSG_BARKER = 0xbeefbabe, +}; + + +/* + * Hardware payload descriptor + * + * Bitfields encoded in a struct to enforce typing semantics. + * + * Look in rx.c and tx.c for a full description of the format. + */ +struct i2400m_pld { + __le32 val; +} __attribute__ ((packed)); + +#define I2400M_PLD_SIZE_MASK 0x00003fff +#define I2400M_PLD_TYPE_SHIFT 16 +#define I2400M_PLD_TYPE_MASK 0x000f0000 + +/* + * Header for a TX message or RX message + * + * @barker: preamble + * @size: used for management of the FIFO queue buffer; before + * sending, this is converted to be a real preamble. This + * indicates the real size of the TX message that starts at this + * point. If the highest bit is set, then this message is to be + * skipped. + * @sequence: sequence number of this message + * @offset: offset where the message itself starts -- see the comments + * in the file header about message header and payload descriptor + * alignment. + * @num_pls: number of payloads in this message + * @padding: amount of padding bytes at the end of the message to make + * it be of block-size aligned + * + * Look in rx.c and tx.c for a full description of the format. + */ +struct i2400m_msg_hdr { + union { + __le32 barker; + __u32 size; /* same size type as barker!! */ + }; + union { + __le32 sequence; + __u32 offset; /* same size type as barker!! */ + }; + __le16 num_pls; + __le16 rsv1; + __le16 padding; + __le16 rsv2; + struct i2400m_pld pld[0]; +} __attribute__ ((packed)); + + + +/* + * L3/L4 control protocol + */ + +enum { + /* Interface version */ + I2400M_L3L4_VERSION = 0x0100, +}; + +/* Message types */ +enum i2400m_mt { + I2400M_MT_RESERVED = 0x0000, + I2400M_MT_INVALID = 0xffff, + I2400M_MT_REPORT_MASK = 0x8000, + + I2400M_MT_GET_SCAN_RESULT = 0x4202, + I2400M_MT_SET_SCAN_PARAM = 0x4402, + I2400M_MT_CMD_RF_CONTROL = 0x4602, + I2400M_MT_CMD_SCAN = 0x4603, + I2400M_MT_CMD_CONNECT = 0x4604, + I2400M_MT_CMD_DISCONNECT = 0x4605, + I2400M_MT_CMD_EXIT_IDLE = 0x4606, + I2400M_MT_GET_LM_VERSION = 0x5201, + I2400M_MT_GET_DEVICE_INFO = 0x5202, + I2400M_MT_GET_LINK_STATUS = 0x5203, + I2400M_MT_GET_STATISTICS = 0x5204, + I2400M_MT_GET_STATE = 0x5205, + I2400M_MT_GET_MEDIA_STATUS = 0x5206, + I2400M_MT_SET_INIT_CONFIG = 0x5404, + I2400M_MT_CMD_INIT = 0x5601, + I2400M_MT_CMD_TERMINATE = 0x5602, + I2400M_MT_CMD_MODE_OF_OP = 0x5603, + I2400M_MT_CMD_RESET_DEVICE = 0x5604, + I2400M_MT_CMD_MONITOR_CONTROL = 0x5605, + I2400M_MT_CMD_ENTER_POWERSAVE = 0x5606, + I2400M_MT_GET_TLS_OPERATION_RESULT = 0x6201, + I2400M_MT_SET_EAP_SUCCESS = 0x6402, + I2400M_MT_SET_EAP_FAIL = 0x6403, + I2400M_MT_SET_EAP_KEY = 0x6404, + I2400M_MT_CMD_SEND_EAP_RESPONSE = 0x6602, + I2400M_MT_REPORT_SCAN_RESULT = 0xc002, + I2400M_MT_REPORT_STATE = 0xd002, + I2400M_MT_REPORT_POWERSAVE_READY = 0xd005, + I2400M_MT_REPORT_EAP_REQUEST = 0xe002, + I2400M_MT_REPORT_EAP_RESTART = 0xe003, + I2400M_MT_REPORT_ALT_ACCEPT = 0xe004, + I2400M_MT_REPORT_KEY_REQUEST = 0xe005, +}; + + +/* + * Message Ack Status codes + * + * When a message is replied-to, this status is reported. + */ +enum i2400m_ms { + I2400M_MS_DONE_OK = 0, + I2400M_MS_DONE_IN_PROGRESS = 1, + I2400M_MS_INVALID_OP = 2, + I2400M_MS_BAD_STATE = 3, + I2400M_MS_ILLEGAL_VALUE = 4, + I2400M_MS_MISSING_PARAMS = 5, + I2400M_MS_VERSION_ERROR = 6, + I2400M_MS_ACCESSIBILITY_ERROR = 7, + I2400M_MS_BUSY = 8, + I2400M_MS_CORRUPTED_TLV = 9, + I2400M_MS_UNINITIALIZED = 10, + I2400M_MS_UNKNOWN_ERROR = 11, + I2400M_MS_PRODUCTION_ERROR = 12, + I2400M_MS_NO_RF = 13, + I2400M_MS_NOT_READY_FOR_POWERSAVE = 14, + I2400M_MS_THERMAL_CRITICAL = 15, + I2400M_MS_MAX +}; + + +/** + * i2400m_tlv - enumeration of the different types of TLVs + * + * TLVs stand for type-length-value and are the header for a payload + * composed of almost anything. Each payload has a type assigned + * and a length. + */ +enum i2400m_tlv { + I2400M_TLV_L4_MESSAGE_VERSIONS = 129, + I2400M_TLV_SYSTEM_STATE = 141, + I2400M_TLV_MEDIA_STATUS = 161, + I2400M_TLV_RF_OPERATION = 162, + I2400M_TLV_RF_STATUS = 163, + I2400M_TLV_DEVICE_RESET_TYPE = 132, + I2400M_TLV_CONFIG_IDLE_PARAMETERS = 601, +}; + + +struct i2400m_tlv_hdr { + __le16 type; + __le16 length; /* payload's */ + __u8 pl[0]; +} __attribute__((packed)); + + +struct i2400m_l3l4_hdr { + __le16 type; + __le16 length; /* payload's */ + __le16 version; + __le16 resv1; + __le16 status; + __le16 resv2; + struct i2400m_tlv_hdr pl[0]; +} __attribute__((packed)); + + +/** + * i2400m_system_state - different states of the device + */ +enum i2400m_system_state { + I2400M_SS_UNINITIALIZED = 1, + I2400M_SS_INIT, + I2400M_SS_READY, + I2400M_SS_SCAN, + I2400M_SS_STANDBY, + I2400M_SS_CONNECTING, + I2400M_SS_WIMAX_CONNECTED, + I2400M_SS_DATA_PATH_CONNECTED, + I2400M_SS_IDLE, + I2400M_SS_DISCONNECTING, + I2400M_SS_OUT_OF_ZONE, + I2400M_SS_SLEEPACTIVE, + I2400M_SS_PRODUCTION, + I2400M_SS_CONFIG, + I2400M_SS_RF_OFF, + I2400M_SS_RF_SHUTDOWN, + I2400M_SS_DEVICE_DISCONNECT, + I2400M_SS_MAX, +}; + + +/** + * i2400m_tlv_system_state - report on the state of the system + * + * @state: see enum i2400m_system_state + */ +struct i2400m_tlv_system_state { + struct i2400m_tlv_hdr hdr; + __le32 state; +} __attribute__((packed)); + + +struct i2400m_tlv_l4_message_versions { + struct i2400m_tlv_hdr hdr; + __le16 major; + __le16 minor; + __le16 branch; + __le16 reserved; +} __attribute__((packed)); + + +struct i2400m_tlv_detailed_device_info { + struct i2400m_tlv_hdr hdr; + __u8 reserved1[400]; + __u8 mac_address[6]; + __u8 reserved2[2]; +} __attribute__((packed)); + + +enum i2400m_rf_switch_status { + I2400M_RF_SWITCH_ON = 1, + I2400M_RF_SWITCH_OFF = 2, +}; + +struct i2400m_tlv_rf_switches_status { + struct i2400m_tlv_hdr hdr; + __u8 sw_rf_switch; /* 1 ON, 2 OFF */ + __u8 hw_rf_switch; /* 1 ON, 2 OFF */ + __u8 reserved[2]; +} __attribute__((packed)); + + +enum { + i2400m_rf_operation_on = 1, + i2400m_rf_operation_off = 2 +}; + +struct i2400m_tlv_rf_operation { + struct i2400m_tlv_hdr hdr; + __le32 status; /* 1 ON, 2 OFF */ +} __attribute__((packed)); + + +enum i2400m_tlv_reset_type { + I2400M_RESET_TYPE_COLD = 1, + I2400M_RESET_TYPE_WARM +}; + +struct i2400m_tlv_device_reset_type { + struct i2400m_tlv_hdr hdr; + __le32 reset_type; +} __attribute__((packed)); + + +struct i2400m_tlv_config_idle_parameters { + struct i2400m_tlv_hdr hdr; + __le32 idle_timeout; /* 100 to 300000 ms [5min], 100 increments + * 0 disabled */ + __le32 idle_paging_interval; /* frames */ +} __attribute__((packed)); + + +enum i2400m_media_status { + I2400M_MEDIA_STATUS_LINK_UP = 1, + I2400M_MEDIA_STATUS_LINK_DOWN, + I2400M_MEDIA_STATUS_LINK_RENEW, +}; + +struct i2400m_tlv_media_status { + struct i2400m_tlv_hdr hdr; + __le32 media_status; +} __attribute__((packed)); + +#endif /* #ifndef __LINUX__WIMAX__I2400M_H__ */ -- cgit v1.2.2 From e30698743419d20dce03d033761f203b4d847ab0 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:59 -0800 Subject: wimax: export linux/wimax.h and linux/wimax/i2400m.h with headers_install These two files are what user space can use to establish communication with the WiMAX kernel API and to speak the Intel 2400m Wireless WiMAX connection's control protocol. Signed-off-by: Inaky Perez-Gonzalez Cc: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/Kbuild | 2 ++ include/linux/wimax/Kbuild | 1 + 2 files changed, 3 insertions(+) create mode 100644 include/linux/wimax/Kbuild (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index a3323f337e4d..12e9a2957caf 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -371,3 +371,5 @@ unifdef-y += xattr.h unifdef-y += xfrm.h objhdr-y += version.h +header-y += wimax.h +header-y += wimax/ diff --git a/include/linux/wimax/Kbuild b/include/linux/wimax/Kbuild new file mode 100644 index 000000000000..3cb4f269bb09 --- /dev/null +++ b/include/linux/wimax/Kbuild @@ -0,0 +1 @@ +header-y += i2400m.h -- cgit v1.2.2 From f7b7baae6b30ff04124259ff8d7c0c0d281320e6 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 11 Nov 2008 17:17:46 +0800 Subject: PCI: add PCI Advanced Feature Capability defines PCI Advanced Features Capability is introduced by "Conventional PCI Advanced Caps ECN" (can be downloaded in pcisig.com). Add defines for the various AF capabilities, including function level reset (FLR). Reviewed-by: Matthew Wilcox Signed-off-by: Sheng Yang Signed-off-by: Jesse Barnes --- include/linux/pci_regs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index e5effd47ed74..7766488470e4 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -210,6 +210,7 @@ #define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ #define PCI_CAP_ID_EXP 0x10 /* PCI Express */ #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ +#define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ #define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ #define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ #define PCI_CAP_SIZEOF 4 @@ -316,6 +317,17 @@ #define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ #define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ +/* PCI Advanced Feature registers */ + +#define PCI_AF_LENGTH 2 +#define PCI_AF_CAP 3 +#define PCI_AF_CAP_TP 0x01 +#define PCI_AF_CAP_FLR 0x02 +#define PCI_AF_CTRL 4 +#define PCI_AF_CTRL_FLR 0x01 +#define PCI_AF_STATUS 5 +#define PCI_AF_STATUS_TP 0x01 + /* PCI-X registers */ #define PCI_X_CMD 2 /* Modes & Features */ -- cgit v1.2.2 From 8b62091e20215730be1b94b7cd135a78a3e692ca Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:40 -0700 Subject: ACPI/PCI: include missing acpi.h file in pci-acpi.h. The pci-acpi.h file will not compile without including linux/acpi.h. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 8837928fbf33..a9e4c34e9389 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -8,6 +8,8 @@ #ifndef _PCI_ACPI_H_ #define _PCI_ACPI_H_ +#include + #define OSC_QUERY_TYPE 0 #define OSC_SUPPORT_TYPE 1 #define OSC_CONTROL_TYPE 2 -- cgit v1.2.2 From 990a7ac5645883a833a11b900bb6f25b65dea65b Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:45 -0700 Subject: ACPI/PCI: call _OSC support during root bridge discovery Add pci_acpi_osc_support() and call it when a PCI bridge is added. This allows us to avoid having every individual PCI root bridge driver call _OSC support for every root bridge in their probe functions, a significant savings in boot time. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index a9e4c34e9389..424f06f84cab 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -51,6 +51,7 @@ #ifdef CONFIG_ACPI extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); extern acpi_status __pci_osc_support_set(u32 flags, const char *hid); +int pci_acpi_osc_support(acpi_handle handle, u32 flags); static inline acpi_status pci_osc_support_set(u32 flags) { return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING); -- cgit v1.2.2 From 0ef5f8f6159e44b4faa997be08d1a3bcbf44ad08 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:50 -0700 Subject: ACPI/PCI: PCI extended config _OSC support called when root bridge added The _OSC capability OSC_EXT_PCI_CONFIG_SUPPORT is set when the root bridge is added with pci_acpi_osc_support() if we can access PCI extended config space. This adds the function pci_ext_cfg_avail which returns true if we can access PCI extended config space (offset greater than 0xff). It currently only returns false if arch=x86 and raw_pci_ext_ops is not set (which might happen if pci=nommcfg is set on the kernel command-line). Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4bb156ba854a..6fd47654ca4e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1140,6 +1140,8 @@ static inline void pci_mmcfg_early_init(void) { } static inline void pci_mmcfg_late_init(void) { } #endif +int pci_ext_cfg_avail(struct pci_dev *dev); + #ifdef CONFIG_HAS_IOMEM static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) { -- cgit v1.2.2 From 3e1b16002af29758b6bc9c38939d43838d9335bc Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:55 -0700 Subject: ACPI/PCI: PCIe ASPM _OSC support capabilities called when root bridge added The _OSC capabilities OSC_ACTIVE_STATE_PWR_SUPPORT and OSC_CLOCK_PWR_CAPABILITY_SUPPORT are set when the root bridge is added with pci_acpi_osc_support(), so we no longer need to do it in the ASPM driver. Also add the function pcie_aspm_enabled, which returns true if pcie_aspm=off is not on the kernel command-line. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 6fd47654ca4e..eae97a2bf603 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -791,6 +791,15 @@ extern void msi_remove_pci_irq_vectors(struct pci_dev *dev); extern void pci_restore_msi_state(struct pci_dev *dev); #endif +#ifndef CONFIG_PCIEASPM +static inline int pcie_aspm_enabled(void) +{ + return 0; +} +#else +extern int pcie_aspm_enabled(void); +#endif + #ifdef CONFIG_HT_IRQ /* The functions a driver should call */ int ht_create_irq(struct pci_dev *dev, int idx); -- cgit v1.2.2 From 07ae95f988a34465bdcb384bfa73c03424fe2312 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:31:05 -0700 Subject: ACPI/PCI: PCI MSI _OSC support capabilities called when root bridge added The _OSC capability OSC_MSI_SUPPORT is set when the root bridge is added with pci_acpi_osc_support(), so we no longer need to do it in the PCI MSI driver. Also adds the function pci_msi_enabled, which returns true if pci=nomsi is not on the kernel command-line. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eae97a2bf603..59a3dc2059d3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -779,6 +779,10 @@ static inline void msi_remove_pci_irq_vectors(struct pci_dev *dev) static inline void pci_restore_msi_state(struct pci_dev *dev) { } +static inline int pci_msi_enabled(void) +{ + return 0; +} #else extern int pci_enable_msi(struct pci_dev *dev); extern void pci_msi_shutdown(struct pci_dev *dev); @@ -789,6 +793,7 @@ extern void pci_msix_shutdown(struct pci_dev *dev); extern void pci_disable_msix(struct pci_dev *dev); extern void msi_remove_pci_irq_vectors(struct pci_dev *dev); extern void pci_restore_msi_state(struct pci_dev *dev); +extern int pci_msi_enabled(void); #endif #ifndef CONFIG_PCIEASPM -- cgit v1.2.2 From 23616941914917cf25b94789856b5326b68d8ee8 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:31:10 -0700 Subject: ACPI/PCI: remove obsolete _OSC capability support functions The acpi_query_osc, __pci_osc_support_set, pci_osc_support_set, and pcie_osc_support_set functions have been obsoleted in favor of setting these capabilities during root bridge discovery with pci_acpi_osc_support. There are no longer any callers of these functions, so remove them. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 424f06f84cab..871e096e0fbc 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -50,16 +50,7 @@ #ifdef CONFIG_ACPI extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); -extern acpi_status __pci_osc_support_set(u32 flags, const char *hid); int pci_acpi_osc_support(acpi_handle handle, u32 flags); -static inline acpi_status pci_osc_support_set(u32 flags) -{ - return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING); -} -static inline acpi_status pcie_osc_support_set(u32 flags) -{ - return __pci_osc_support_set(flags, PCI_EXPRESS_ROOT_HID_STRING); -} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { /* Find root host bridge */ @@ -76,8 +67,6 @@ typedef u32 acpi_status; #endif static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) {return AE_ERROR;} -static inline acpi_status pci_osc_support_set(u32 flags) {return AE_ERROR;} -static inline acpi_status pcie_osc_support_set(u32 flags) {return AE_ERROR;} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { return NULL; } #endif -- cgit v1.2.2 From e8de1481fd7126ee9e93d6889da6f00c05e1e019 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 22 Oct 2008 19:55:31 -0700 Subject: resource: allow MMIO exclusivity for device drivers Device drivers that use pci_request_regions() (and similar APIs) have a reasonable expectation that they are the only ones accessing their device. As part of the e1000e hunt, we were afraid that some userland (X or some bootsplash stuff) was mapping the MMIO region that the driver thought it had exclusively via /dev/mem or via various sysfs resource mappings. This patch adds the option for device drivers to cause their reserved regions to the "banned from /dev/mem use" list, so now both kernel memory and device-exclusive MMIO regions are banned. NOTE: This is only active when CONFIG_STRICT_DEVMEM is set. In addition to the config option, a kernel parameter iomem=relaxed is provided for the cases where developers want to diagnose, in the field, drivers issues from userspace. Reviewed-by: Matthew Wilcox Signed-off-by: Arjan van de Ven Signed-off-by: Jesse Barnes --- include/linux/ioport.h | 11 ++++++++--- include/linux/pci.h | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 041e95aac2bf..f6bb2ca8e3ba 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -49,6 +49,7 @@ struct resource_list { #define IORESOURCE_SIZEALIGN 0x00020000 /* size indicates alignment */ #define IORESOURCE_STARTALIGN 0x00040000 /* start field is alignment */ +#define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 #define IORESOURCE_AUTO 0x40000000 @@ -133,13 +134,16 @@ static inline unsigned long resource_type(struct resource *res) } /* Convenience shorthand with allocation */ -#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name)) -#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name)) +#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) +#define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) +#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) +#define request_mem_region_exclusive(start,n,name) \ + __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) #define rename_region(region, newname) do { (region)->name = (newname); } while (0) extern struct resource * __request_region(struct resource *, resource_size_t start, - resource_size_t n, const char *name); + resource_size_t n, const char *name, int relaxed); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) @@ -175,6 +179,7 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); +extern int iomem_is_exclusive(u64 addr); #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 59a3dc2059d3..bfcb39ca8879 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -686,10 +686,13 @@ void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(struct pci_dev *, u8, u8)); #define HAVE_PCI_REQ_REGIONS 2 int __must_check pci_request_regions(struct pci_dev *, const char *); +int __must_check pci_request_regions_exclusive(struct pci_dev *, const char *); void pci_release_regions(struct pci_dev *); int __must_check pci_request_region(struct pci_dev *, int, const char *); +int __must_check pci_request_region_exclusive(struct pci_dev *, int, const char *); void pci_release_region(struct pci_dev *, int); int pci_request_selected_regions(struct pci_dev *, int, const char *); +int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *); void pci_release_selected_regions(struct pci_dev *, int); /* drivers/pci/bus.c */ -- cgit v1.2.2 From 57c2cf71c12318b72ebaa5720d210476b6bac4d4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 11 Dec 2008 11:24:23 -0700 Subject: PCI: add pci_swizzle_interrupt_pin() This patch adds pci_swizzle_interrupt_pin(), which implements the INTx swizzling algorithm specified in Table 9-1 of the "PCI-to-PCI Bridge Architecture Specification," revision 1.2. There are many architecture-specific implementations of this swizzle that can be replaced by this common one. Reviewed-by: David Howells Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index bfcb39ca8879..58357d14f94c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -532,6 +532,7 @@ int __must_check pci_bus_add_device(struct pci_dev *dev); void pci_read_bridge_bases(struct pci_bus *child); struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); +u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); extern struct pci_dev *pci_dev_get(struct pci_dev *dev); extern void pci_dev_put(struct pci_dev *dev); -- cgit v1.2.2 From 1684f5ddd4c0c754f52c78eaa2c5c69ad09fb18c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 1 Dec 2008 14:30:30 -0800 Subject: PCI: uninline pci_ioremap_bar() It's too large to be inlined. Acked-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- include/linux/pci.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 58357d14f94c..0d8bc920c2e5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1160,20 +1160,7 @@ static inline void pci_mmcfg_late_init(void) { } int pci_ext_cfg_avail(struct pci_dev *dev); -#ifdef CONFIG_HAS_IOMEM -static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) -{ - /* - * Make sure the BAR is actually a memory resource, not an IO resource - */ - if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { - WARN_ON(1); - return NULL; - } - return ioremap_nocache(pci_resource_start(pdev, bar), - pci_resource_len(pdev, bar)); -} -#endif +void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar); #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.2 From 14add80b5120966fe0659d61815b9e9b4b68fdc5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:38:52 +0800 Subject: PCI: remove unnecessary arg of pci_update_resource() This cleanup removes unnecessary argument 'struct resource *res' in pci_update_resource(), so it takes same arguments as other companion functions (pci_assign_resource(), etc.). Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0d8bc920c2e5..c5e02f324e13 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -648,7 +648,7 @@ int pcie_get_readrq(struct pci_dev *dev); int pcie_set_readrq(struct pci_dev *dev, int rq); int pci_reset_function(struct pci_dev *dev); int pci_execute_reset_function(struct pci_dev *dev); -void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); +void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int pci_select_bars(struct pci_dev *dev, unsigned long flags); -- cgit v1.2.2 From fde09c6d8f92de0c9f75698a75f0989f2234c517 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:39:32 +0800 Subject: PCI: define PCI resource names in an 'enum' This patch moves all definitions of the PCI resource names to an 'enum', and also replaces some hard-coded resource variables with symbol names. This change eases introduction of device specific resources. Reviewed-by: Bjorn Helgaas Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- include/linux/pci.h | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c5e02f324e13..da1c22bab40e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -82,7 +82,30 @@ enum pci_mmap_state { #define PCI_DMA_FROMDEVICE 2 #define PCI_DMA_NONE 3 -#define DEVICE_COUNT_RESOURCE 12 +/* + * For PCI devices, the region numbers are assigned this way: + */ +enum { + /* #0-5: standard PCI resources */ + PCI_STD_RESOURCES, + PCI_STD_RESOURCE_END = 5, + + /* #6: expansion ROM resource */ + PCI_ROM_RESOURCE, + + /* resources assigned to buses behind the bridge */ +#define PCI_BRIDGE_RESOURCE_NUM 4 + + PCI_BRIDGE_RESOURCES, + PCI_BRIDGE_RESOURCE_END = PCI_BRIDGE_RESOURCES + + PCI_BRIDGE_RESOURCE_NUM - 1, + + /* total resources associated with a PCI device */ + PCI_NUM_RESOURCES, + + /* preserve this for compatibility */ + DEVICE_COUNT_RESOURCE +}; typedef int __bitwise pci_power_t; @@ -274,18 +297,6 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev, hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space); } -/* - * For PCI devices, the region numbers are assigned this way: - * - * 0-5 standard PCI regions - * 6 expansion ROM - * 7-10 bridges: address space assigned to buses behind the bridge - */ - -#define PCI_ROM_RESOURCE 6 -#define PCI_BRIDGE_RESOURCES 7 -#define PCI_NUM_RESOURCES 11 - #ifndef PCI_BUS_NUM_RESOURCES #define PCI_BUS_NUM_RESOURCES 16 #endif -- cgit v1.2.2 From e8c331e963c58b83db24b7d0e39e8c07f687dbc6 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 17 Dec 2008 12:09:12 +0900 Subject: PCI hotplug: introduce functions for ACPI slot detection Some ACPI related PCI hotplug code can be shared among PCI hotplug drivers. This patch introduces the following functions in drivers/pci/hotplug/acpi_pcihp.c to share the code, and changes acpiphp and pciehp to use them. - int acpi_pci_detect_ejectable(struct pci_bus *pbus) This checks if the specified PCI bus has ejectable slots. - int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle) This checks if the specified handle is ejectable ACPI PCI slot. The 'pbus' parameter is needed to check if 'handle' is PCI related ACPI object. This patch also introduces the following inline function in include/linux/pci-acpi.h, which is useful to get ACPI handle of the PCI bridge from struct pci_bus of the bridge's secondary bus. - static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) This returns ACPI handle of the PCI bridge which generates PCI bus specified by 'pbus'. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 9 +++++++++ include/linux/pci_hotplug.h | 2 ++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 871e096e0fbc..042c166f65d5 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -60,6 +60,15 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus), pdev->bus->number); } + +static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) +{ + int seg = pci_domain_nr(pbus), busnr = pbus->number; + struct pci_dev *bridge = pbus->self; + if (bridge) + return DEVICE_ACPI_HANDLE(&(bridge->dev)); + return acpi_get_pci_rootbridge_handle(seg, busnr); +} #else #if !defined(AE_ERROR) typedef u32 acpi_status; diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index a00bd1a0f156..f7cc204fab07 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -228,6 +228,8 @@ extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, struct hotplug_params *hpp); int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); int acpi_root_bridge(acpi_handle handle); +int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle); +int acpi_pci_detect_ejectable(struct pci_bus *pbus); #endif #endif -- cgit v1.2.2 From 68feac87de15edfc2c700d2d81b814288c93d003 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 16 Dec 2008 21:36:55 -0700 Subject: PCI: add pci_common_swizzle() for INTx swizzling This patch adds pci_common_swizzle(), which swizzles INTx values all the way up to a root bridge. This common implementation can replace several architecture-specific ones. This should someday be combined with pci_get_interrupt_pin(), but I left it separate for now to make reviewing easier. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index da1c22bab40e..170f9ae2d8a0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -545,6 +545,7 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); +u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); extern struct pci_dev *pci_dev_get(struct pci_dev *dev); extern void pci_dev_put(struct pci_dev *dev); extern void pci_remove_bus(struct pci_bus *b); -- cgit v1.2.2 From 287d19ce2e67c15e79a187b3bdcbbea1a0a51a7d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 18 Dec 2008 09:17:16 -0800 Subject: PCI: revise VPD access interface Change PCI VPD API which was only used by sysfs to something usable in drivers. * move iteration over multiple words to the low level * use conventional types for arguments * add exportable wrapper Signed-off-by: Stephen Hemminger Signed-off-by: Jesse Barnes --- include/linux/pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 170f9ae2d8a0..76079e106895 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -687,6 +687,10 @@ int pci_back_from_sleep(struct pci_dev *dev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); +/* Vital product data routines */ +ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); +ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); + /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ void pci_bus_assign_resources(struct pci_bus *bus); void pci_bus_size_bridges(struct pci_bus *bus); -- cgit v1.2.2 From db5679437a2b938c9127480a3923633721583a4f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 18 Dec 2008 09:17:16 -0800 Subject: PCI: add interface to set visible size of VPD The VPD on all devices may not be 32K. Unfortunately, there is no generic way to find the size, so this adds a simple API hook to reset it. Signed-off-by: Stephen Hemminger Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 76079e106895..7cbecef19bb6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -690,6 +690,7 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); /* Vital product data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); +int pci_vpd_truncate(struct pci_dev *dev, size_t size); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ void pci_bus_assign_resources(struct pci_bus *bus); -- cgit v1.2.2 From 322162a71bd9fc4edb1b11236e7bc8aa27ccac22 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 19 Dec 2008 15:19:02 +0900 Subject: PCI: pciehp: cleanup register and field definitions Clean up register definitions related to PCI Express Hot plug. - Add register definitions into include/linux/pci_regs.h, and use them instead of pciehp's locally definied register definitions. - Remove pciehp's locally defined register definitions - Remove unused register definitions in pciehp. - Some minor cleanups. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- include/linux/pci_regs.h | 64 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 7766488470e4..027815b4635e 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -411,20 +411,70 @@ #define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ #define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ -#define PCI_EXP_LNKCAP_ASPMS 0xc00 /* ASPM Support */ -#define PCI_EXP_LNKCAP_L0SEL 0x7000 /* L0s Exit Latency */ -#define PCI_EXP_LNKCAP_L1EL 0x38000 /* L1 Exit Latency */ -#define PCI_EXP_LNKCAP_CLKPM 0x40000 /* L1 Clock Power Management */ +#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ +#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ +#define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ +#define PCI_EXP_LNKCAP_L1EL 0x00038000 /* L1 Exit Latency */ +#define PCI_EXP_LNKCAP_CLKPM 0x00040000 /* L1 Clock Power Management */ +#define PCI_EXP_LNKCAP_SDERC 0x00080000 /* Suprise Down Error Reporting Capable */ +#define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ +#define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ +#define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ #define PCI_EXP_LNKCTL 16 /* Link Control */ -#define PCI_EXP_LNKCTL_RL 0x20 /* Retrain Link */ -#define PCI_EXP_LNKCTL_CCC 0x40 /* Common Clock COnfiguration */ +#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ +#define PCI_EXP_LNKCTL_RCB 0x0008 /* Read Completion Boundary */ +#define PCI_EXP_LNKCTL_LD 0x0010 /* Link Disable */ +#define PCI_EXP_LNKCTL_RL 0x0020 /* Retrain Link */ +#define PCI_EXP_LNKCTL_CCC 0x0040 /* Common Clock Configuration */ +#define PCI_EXP_LNKCTL_ES 0x0080 /* Extended Synch */ #define PCI_EXP_LNKCTL_CLKREQ_EN 0x100 /* Enable clkreq */ +#define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ +#define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ +#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Lnk Autonomous Bandwidth Interrupt Enable */ #define PCI_EXP_LNKSTA 18 /* Link Status */ -#define PCI_EXP_LNKSTA_LT 0x800 /* Link Training */ +#define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ +#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Nogotiated Link Width */ +#define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ #define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ +#define PCI_EXP_LNKSTA_DLLLA 0x2000 /* Data Link Layer Link Active */ +#define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ +#define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ #define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ +#define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ +#define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ +#define PCI_EXP_SLTCAP_AIP 0x00000008 /* Attention Indicator Present */ +#define PCI_EXP_SLTCAP_PIP 0x00000010 /* Power Indicator Present */ +#define PCI_EXP_SLTCAP_HPS 0x00000020 /* Hot-Plug Surprise */ +#define PCI_EXP_SLTCAP_HPC 0x00000040 /* Hot-Plug Capable */ +#define PCI_EXP_SLTCAP_SPLV 0x00007f80 /* Slot Power Limit Value */ +#define PCI_EXP_SLTCAP_SPLS 0x00018000 /* Slot Power Limit Scale */ +#define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ +#define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ +#define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ #define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ +#define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ +#define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ +#define PCI_EXP_SLTCTL_PDCE 0x0008 /* Presence Detect Changed Enable */ +#define PCI_EXP_SLTCTL_CCIE 0x0010 /* Command Completed Interrupt Enable */ +#define PCI_EXP_SLTCTL_HPIE 0x0020 /* Hot-Plug Interrupt Enable */ +#define PCI_EXP_SLTCTL_AIC 0x00c0 /* Attention Indicator Control */ +#define PCI_EXP_SLTCTL_PIC 0x0300 /* Power Indicator Control */ +#define PCI_EXP_SLTCTL_PCC 0x0400 /* Power Controller Control */ +#define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ +#define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ #define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ +#define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ +#define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ +#define PCI_EXP_SLTSTA_PDC 0x0008 /* Presence Detect Changed */ +#define PCI_EXP_SLTSTA_CC 0x0010 /* Command Completed */ +#define PCI_EXP_SLTSTA_MRLSS 0x0020 /* MRL Sensor State */ +#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ +#define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ +#define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ #define PCI_EXP_RTCTL 28 /* Root Control */ #define PCI_EXP_RTCTL_SECEE 0x01 /* System Error on Correctable Error */ #define PCI_EXP_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */ -- cgit v1.2.2 From 6a479079c07211bf348ac8a79754f26bea258f26 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 23 Dec 2008 03:08:29 +0000 Subject: PCI: Add pci_clear_master() as opposite of pci_set_master() During an online device reset it may be useful to disable bus-mastering. pci_disable_device() does that, and far more besides, so is not suitable for an online reset. Add pci_clear_master() which does just this. Signed-off-by: Ben Hutchings Reviewed-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7cbecef19bb6..0f6d2bb1df9c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -642,6 +642,7 @@ static inline int pci_is_managed(struct pci_dev *pdev) void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); +void pci_clear_master(struct pci_dev *dev); int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state); #define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); -- cgit v1.2.2 From 16cf0ebc35dd63f72628ba1246132a6fd17bced2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 5 Jan 2009 14:50:27 +0100 Subject: x86/PCI: Do not use interrupt links for devices using MSI-X pcibios_enable_device() and pcibios_disable_device() don't handle IRQs for devices that have MSI enabled and it should treat the devices with MSI-X enabled in the same way. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Signed-off-by: Jesse Barnes --- include/linux/pci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0f6d2bb1df9c..80f8b8b65fde 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -336,6 +336,15 @@ struct pci_bus { #define pci_bus_b(n) list_entry(n, struct pci_bus, node) #define to_pci_bus(n) container_of(n, struct pci_bus, dev) +#ifdef CONFIG_PCI_MSI +static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) +{ + return pci_dev->msi_enabled || pci_dev->msix_enabled; +} +#else +static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false; } +#endif + /* * Error values that may be returned by PCI functions. */ -- cgit v1.2.2 From 940fbf411e5fb42aee8ab7dd814b24080951dbfc Mon Sep 17 00:00:00 2001 From: Detlef Riekenberg Date: Wed, 7 Jan 2009 10:11:44 +0100 Subject: linux/types.h: Don't depend on __GNUC__ for __le64/__be64 The typedefs for __u64 and __s64 where fixed to be available for other compiler on May 2 2008 by H. Peter Anvin (in commit edfa5cfa3dc5) Acked-by: H. Peter Anvin Signed-off-by: Detlef Riekenberg Signed-off-by: Linus Torvalds --- include/linux/types.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 3b864f2d9560..712ca53bc348 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -176,10 +176,9 @@ typedef __u16 __bitwise __le16; typedef __u16 __bitwise __be16; typedef __u32 __bitwise __le32; typedef __u32 __bitwise __be32; -#if defined(__GNUC__) typedef __u64 __bitwise __le64; typedef __u64 __bitwise __be64; -#endif + typedef __u16 __bitwise __sum16; typedef __u32 __bitwise __wsum; -- cgit v1.2.2 From 8d1a0a13edecfdcb47fee3238ed4a2af2a2867f9 Mon Sep 17 00:00:00 2001 From: Anders Larsen Date: Thu, 1 Jan 2009 17:17:35 +0100 Subject: qnx: include for definitions of __[us]{8,16,32,64} types On 2008-12-30 11:32:33, Sam Ravnborg wrote: > We have added a few additional validation checks of the userspace headers: ... > 3) We should include and not > 4) If we use a __[us]{8,16,32,64} type then we must include Satisfy these requirements for the linux/qnx*.h headers. Signed-off-by: Anders Larsen Signed-off-by: Sam Ravnborg --- include/linux/qnx4_fs.h | 4 +--- include/linux/qnxtypes.h | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index 34a196ee7941..787d19ea9f46 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -2,14 +2,12 @@ * Name : qnx4_fs.h * Author : Richard Frowijn * Function : qnx4 global filesystem definitions - * Version : 1.0.2 - * Last modified : 2000-01-31 - * * History : 23-03-1998 created */ #ifndef _LINUX_QNX4_FS_H #define _LINUX_QNX4_FS_H +#include #include #include diff --git a/include/linux/qnxtypes.h b/include/linux/qnxtypes.h index a3eb1137857b..bebbe5cc4fb8 100644 --- a/include/linux/qnxtypes.h +++ b/include/linux/qnxtypes.h @@ -2,9 +2,6 @@ * Name : qnxtypes.h * Author : Richard Frowijn * Function : standard qnx types - * Version : 1.0.2 - * Last modified : 2000-01-06 - * * History : 22-03-1998 created * */ @@ -12,6 +9,8 @@ #ifndef _QNX4TYPES_H #define _QNX4TYPES_H +#include + typedef __le16 qnx4_nxtnt_t; typedef __u8 qnx4_ftype_t; -- cgit v1.2.2 From 14f0ca8eaea42a5b5a69cfcb699665dd2618db5f Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 7 Jan 2009 21:50:22 +0100 Subject: oprofile: make new cpu buffer functions part of the api This patch creates the new functions oprofile_write_reserve() oprofile_add_data() oprofile_write_commit() and makes them part of the oprofile api. Signed-off-by: Robert Richter --- include/linux/oprofile.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 1ce9fe572e51..1d9518bc4c58 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -164,4 +164,22 @@ void oprofile_put_buff(unsigned long *buf, unsigned int start, unsigned long oprofile_get_cpu_buffer_size(void); void oprofile_cpu_buffer_inc_smpl_lost(void); +/* cpu buffer functions */ + +struct op_sample; + +struct op_entry { + struct ring_buffer_event *event; + struct op_sample *sample; + unsigned long irq_flags; + unsigned long size; + unsigned long *data; +}; + +void oprofile_write_reserve(struct op_entry *entry, + struct pt_regs * const regs, + unsigned long pc, int code, int size); +int oprofile_add_data(struct op_entry *entry, unsigned long val); +int oprofile_write_commit(struct op_entry *entry); + #endif /* OPROFILE_H */ -- cgit v1.2.2 From 87df4de8073f922a1f643b9fa6ba0412d5529ecf Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 15 Dec 2008 19:42:03 +0200 Subject: nfsd: last_byte_offset refactor the nfs4 server lock code to use last_byte_offset to compute the last byte covered by the lock. Check for overflow so that the last byte is set to NFS4_MAX_UINT64 if offset + len wraps around. Also, use NFS4_MAX_UINT64 for ~(u64)0 where appropriate. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- include/linux/nfs4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index ea0366769484..b912311a56b1 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -88,6 +88,8 @@ #define NFS4_ACE_GENERIC_EXECUTE 0x001200A0 #define NFS4_ACE_MASK_ALL 0x001F01FF +#define NFS4_MAX_UINT64 (~(u64)0) + enum nfs4_acl_whotype { NFS4_ACL_WHO_NAMED = 0, NFS4_ACL_WHO_OWNER, -- cgit v1.2.2 From db43910cb42285a99f45f7e0a0a32e32d0b61dcf Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 15 Dec 2008 19:42:24 +0200 Subject: nfsd: get rid of NFSD_VERSION Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- include/linux/nfsd/nfsd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 21269405ffe2..e19f45991b2e 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -23,7 +23,6 @@ /* * nfsd version */ -#define NFSD_VERSION "0.5" #define NFSD_SUPPORTED_MINOR_VERSION 0 /* -- cgit v1.2.2 From 5886188dc7ba9a76babcd37452f44079a9a77f71 Mon Sep 17 00:00:00 2001 From: Benjamin Krill Date: Wed, 7 Jan 2009 10:32:38 +0100 Subject: serial: Add driver for the Cell Network Processor serial port NWP device Add support for the nwp serial device which is connected to a DCR bus. It uses the of_serial device driver to determine necessary properties from the device tree. The supported device is added as serial port number 85. NWP stands for network processor and it is part of the QPACE - Quantum Chromodynamics Parallel Computing on the Cell Broadband Engine project. The implementation is a lightweight uart implementation with the focus to consume as little resources as possible and it is connected to a DCR bus. Signed-off-by: Benjamin Krill Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- include/linux/nwpserial.h | 18 ++++++++++++++++++ include/linux/serial_core.h | 3 +++ 2 files changed, 21 insertions(+) create mode 100644 include/linux/nwpserial.h (limited to 'include/linux') diff --git a/include/linux/nwpserial.h b/include/linux/nwpserial.h new file mode 100644 index 000000000000..9acb21572eaf --- /dev/null +++ b/include/linux/nwpserial.h @@ -0,0 +1,18 @@ +/* + * Serial Port driver for a NWP uart device + * + * Copyright (C) 2008 IBM Corp., Benjamin Krill + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#ifndef _NWPSERIAL_H +#define _NWPSERIAL_H + +int nwpserial_register_port(struct uart_port *port); +void nwpserial_unregister_port(int line); + +#endif /* _NWPSERIAL_H */ diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index b4199841f1fc..90bbbf0b1161 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -161,6 +161,9 @@ #define PORT_S3C6400 84 +/* NWPSERIAL */ +#define PORT_NWPSERIAL 85 + #ifdef __KERNEL__ #include -- cgit v1.2.2 From 8feae13110d60cc6287afabc2887366b0eb226c2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: NOMMU: Make VMAs per MM as for MMU-mode linux Make VMAs per mm_struct as for MMU-mode linux. This solves two problems: (1) In SYSV SHM where nattch for a segment does not reflect the number of shmat's (and forks) done. (2) In mmap() where the VMA's vm_mm is set to point to the parent mm by an exec'ing process when VM_EXECUTABLE is specified, regardless of the fact that a VMA might be shared and already have its vm_mm assigned to another process or a dead process. A new struct (vm_region) is introduced to track a mapped region and to remember the circumstances under which it may be shared and the vm_list_struct structure is discarded as it's no longer required. This patch makes the following additional changes: (1) Regions are now allocated with alloc_pages() rather than kmalloc() and with no recourse to __GFP_COMP, so the pages are not composite. Instead, each page has a reference on it held by the region. Anything else that is interested in such a page will have to get a reference on it to retain it. When the pages are released due to unmapping, each page is passed to put_page() and will be freed when the page usage count reaches zero. (2) Excess pages are trimmed after an allocation as the allocation must be made as a power-of-2 quantity of pages. (3) VMAs are added to the parent MM's R/B tree and mmap lists. As an MM may end up with overlapping VMAs within the tree, the VMA struct address is appended to the sort key. (4) Non-anonymous VMAs are now added to the backing inode's prio list. (5) Holes may be punched in anonymous VMAs with munmap(), releasing parts of the backing region. The VMA and region structs will be split if necessary. (6) sys_shmdt() only releases one attachment to a SYSV IPC shared memory segment instead of all the attachments at that addresss. Multiple shmat()'s return the same address under NOMMU-mode instead of different virtual addresses as under MMU-mode. (7) Core dumping for ELF-FDPIC requires fewer exceptions for NOMMU-mode. (8) /proc/maps is now the global list of mapped regions, and may list bits that aren't actually mapped anywhere. (9) /proc/meminfo gains a line (tagged "MmapCopy") that indicates the amount of RAM currently allocated by mmap to hold mappable regions that can't be mapped directly. These are copies of the backing device or file if not anonymous. These changes make NOMMU mode more similar to MMU mode. The downside is that NOMMU mode requires some extra memory to track things over NOMMU without this patch (VMAs are no longer shared, and there are now region structs). Signed-off-by: David Howells Tested-by: Mike Frysinger Acked-by: Paul Mundt --- include/linux/mm.h | 18 ++++++------------ include/linux/mm_types.h | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4a3d28c86443..b91a73fd1bcc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -56,19 +56,9 @@ extern unsigned long mmap_min_addr; extern struct kmem_cache *vm_area_cachep; -/* - * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is - * disabled, then there's a single shared list of VMAs maintained by the - * system, and mm's subscribe to these individually - */ -struct vm_list_struct { - struct vm_list_struct *next; - struct vm_area_struct *vma; -}; - #ifndef CONFIG_MMU -extern struct rb_root nommu_vma_tree; -extern struct rw_semaphore nommu_vma_sem; +extern struct rb_root nommu_region_tree; +extern struct rw_semaphore nommu_region_sem; extern unsigned int kobjsize(const void *objp); #endif @@ -1061,6 +1051,7 @@ extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, enum memmap_context); extern void setup_per_zone_pages_min(void); extern void mem_init(void); +extern void __init mmap_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); @@ -1072,6 +1063,9 @@ extern void setup_per_cpu_pageset(void); static inline void setup_per_cpu_pageset(void) {} #endif +/* nommu.c */ +extern atomic_t mmap_pages_allocated; + /* prio_tree.c */ void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9cfc9b627fdd..1c1e0d3a1714 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -96,6 +96,22 @@ struct page { #endif /* WANT_PAGE_VIRTUAL */ }; +/* + * A region containing a mapping of a non-memory backed file under NOMMU + * conditions. These are held in a global tree and are pinned by the VMAs that + * map parts of them. + */ +struct vm_region { + struct rb_node vm_rb; /* link in global region tree */ + unsigned long vm_flags; /* VMA vm_flags */ + unsigned long vm_start; /* start address of region */ + unsigned long vm_end; /* region initialised to here */ + unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ + struct file *vm_file; /* the backing file or NULL */ + + atomic_t vm_usage; /* region usage count */ +}; + /* * This struct defines a memory VMM memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -152,7 +168,7 @@ struct vm_area_struct { unsigned long vm_truncate_count;/* truncate_count or restart_addr */ #ifndef CONFIG_MMU - atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */ + struct vm_region *vm_region; /* NOMMU mapping region */ #endif #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ -- cgit v1.2.2 From dd8632a12e500a684478fea0951f380478d56fed Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: NOMMU: Make mmap allocation page trimming behaviour configurable. NOMMU mmap allocates a piece of memory for an mmap that's rounded up in size to the nearest power-of-2 number of pages. Currently it then discards the excess pages back to the page allocator, making that memory available for use by other things. This can, however, cause greater amount of fragmentation. To counter this, a sysctl is added in order to fine-tune the trimming behaviour. The default behaviour remains to trim pages aggressively, while this can either be disabled completely or set to a higher page-granular watermark in order to have finer-grained control. vm region vm_top bits taken from an earlier patch by David Howells. Signed-off-by: Paul Mundt Signed-off-by: David Howells Tested-by: Mike Frysinger --- include/linux/mm_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1c1e0d3a1714..92915e81443f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -106,6 +106,7 @@ struct vm_region { unsigned long vm_flags; /* VMA vm_flags */ unsigned long vm_start; /* start address of region */ unsigned long vm_end; /* region initialised to here */ + unsigned long vm_top; /* region allocated to here */ unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ struct file *vm_file; /* the backing file or NULL */ -- cgit v1.2.2 From e2387d6c20752ccdb2895ba5de664fa39652f4cc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 17 Nov 2008 14:35:44 +0000 Subject: leds: Make header variable naming consistent There is one place where the struct led_classdev as the function argument is named differently. Fix it. Signed-off-by: Wolfram Sang Signed-off-by: Richard Purdie --- include/linux/leds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index d3a73f5a48c3..3c1a8ce6a5ea 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -62,7 +62,7 @@ struct led_classdev { extern int led_classdev_register(struct device *parent, struct led_classdev *led_cdev); -extern void led_classdev_unregister(struct led_classdev *lcd); +extern void led_classdev_unregister(struct led_classdev *led_cdev); extern void led_classdev_suspend(struct led_classdev *led_cdev); extern void led_classdev_resume(struct led_classdev *led_cdev); -- cgit v1.2.2 From 934cd3f979a1daacbd403398f2c7a8f6720c33aa Mon Sep 17 00:00:00 2001 From: Riku Voipio Date: Wed, 3 Dec 2008 08:21:36 +0000 Subject: leds: leds-pcs9532 - Move i2c work to a workqueque Apparently these might be called under atomic context, and i2c operations may sleep. BUG found by Ross Burton Signed-off-by: Riku Voipio Signed-off-by: Richard Purdie --- include/linux/leds-pca9532.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h index 81b4207deb95..96eea90f01a8 100644 --- a/include/linux/leds-pca9532.h +++ b/include/linux/leds-pca9532.h @@ -15,6 +15,7 @@ #define __LINUX_PCA9532_H #include +#include enum pca9532_state { PCA9532_OFF = 0x0, @@ -31,6 +32,7 @@ struct pca9532_led { struct i2c_client *client; char *name; struct led_classdev ldev; + struct work_struct work; enum pca9532_type type; enum pca9532_state state; }; -- cgit v1.2.2 From 0081e8020ebd814a99e45720a10e869a54ee08a6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 4 Dec 2008 16:52:33 +0000 Subject: leds: Add WM8350 LED driver The voltage and current regulators on the WM8350 AudioPlus PMIC can be used in concert to provide a power efficient LED driver. This driver implements support for this within the standard LED class. Platform initialisation code should configure the LED hardware in the init callback provided by the WM8350 core driver. The callback should use wm8350_isink_set_flash(), wm8350_dcdc25_set_mode() and wm8350_dcdc_set_slot() to configure the operating parameters of the regulators for their hardware and then then use wm8350_register_led() to instantiate the LED driver. This driver was originally written by Liam Girdwood, though it has been extensively modified since then. Signed-off-by: Mark Brown Signed-off-by: Richard Purdie --- include/linux/mfd/wm8350/pmic.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h index 96acbfc8aa12..be3264e286e0 100644 --- a/include/linux/mfd/wm8350/pmic.h +++ b/include/linux/mfd/wm8350/pmic.h @@ -13,6 +13,10 @@ #ifndef __LINUX_MFD_WM8350_PMIC_H #define __LINUX_MFD_WM8350_PMIC_H +#include +#include +#include + /* * Register values. */ @@ -700,6 +704,33 @@ struct wm8350; struct platform_device; struct regulator_init_data; +/* + * WM8350 LED platform data + */ +struct wm8350_led_platform_data { + const char *name; + const char *default_trigger; + int max_uA; +}; + +struct wm8350_led { + struct platform_device *pdev; + struct mutex mutex; + struct work_struct work; + spinlock_t value_lock; + enum led_brightness value; + struct led_classdev cdev; + int max_uA_index; + int enabled; + + struct regulator *isink; + struct regulator_consumer_supply isink_consumer; + struct regulator_init_data isink_init; + struct regulator *dcdc; + struct regulator_consumer_supply dcdc_consumer; + struct regulator_init_data dcdc_init; +}; + struct wm8350_pmic { /* Number of regulators of each type on this device */ int max_dcdc; @@ -717,10 +748,15 @@ struct wm8350_pmic { /* regulator devices */ struct platform_device *pdev[NUM_WM8350_REGULATORS]; + + /* LED devices */ + struct wm8350_led led[2]; }; int wm8350_register_regulator(struct wm8350 *wm8350, int reg, struct regulator_init_data *initdata); +int wm8350_register_led(struct wm8350 *wm8350, int lednum, int dcdc, int isink, + struct wm8350_led_platform_data *pdata); /* * Additional DCDC control not supported via regulator API -- cgit v1.2.2 From c835ee7f4154992e6cf0674d7ee136f5d36247a4 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Tue, 6 Jan 2009 21:00:19 +0000 Subject: backlight: Add suspend/resume support to the backlight core Add suspend/resume support to the backlight core and enable use of it by appropriate drivers. Signed-off-by: Richard Purdie --- include/linux/backlight.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 1ee9488ca2e4..79ca2da81c87 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -31,6 +31,10 @@ struct backlight_device; struct fb_info; struct backlight_ops { + unsigned int options; + +#define BL_CORE_SUSPENDRESUME (1 << 0) + /* Notify the backlight driver some property has changed */ int (*update_status)(struct backlight_device *); /* Return the current backlight brightness (accounting for power, @@ -51,7 +55,19 @@ struct backlight_properties { modes; 4: full off), see FB_BLANK_XXX */ int power; /* FB Blanking active? (values as for power) */ + /* Due to be removed, please use (state & BL_CORE_FBBLANK) */ int fb_blank; + /* Flags used to signal drivers of state changes */ + /* Upper 4 bits are reserved for driver internal use */ + unsigned int state; + +#define BL_CORE_SUSPENDED (1 << 0) /* backlight is suspended */ +#define BL_CORE_FBBLANK (1 << 1) /* backlight is under an fb blank event */ +#define BL_CORE_DRIVER4 (1 << 28) /* reserved for driver specific use */ +#define BL_CORE_DRIVER3 (1 << 29) /* reserved for driver specific use */ +#define BL_CORE_DRIVER2 (1 << 30) /* reserved for driver specific use */ +#define BL_CORE_DRIVER1 (1 << 31) /* reserved for driver specific use */ + }; struct backlight_device { -- cgit v1.2.2 From 1107ba885e46964316c083d441d5dd185b6c9e49 Mon Sep 17 00:00:00 2001 From: Alex Zeffertt Date: Wed, 7 Jan 2009 18:07:11 -0800 Subject: xen: add xenfs to allow usermode <-> Xen interaction The xenfs filesystem exports various interfaces to usermode. Initially this exports a file to allow usermode to interact with xenbus/xenstore. Traditionally this appeared in /proc/xen. Rather than extending procfs, this patch adds a backward-compat mountpoint on /proc/xen, and provides a xenfs filesystem which can be mounted there. Signed-off-by: Alex Zeffertt Signed-off-by: Jeremy Fitzhardinge Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/magic.h b/include/linux/magic.h index f7f3fdddbef0..439f6f3cb0c4 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -13,6 +13,7 @@ #define EFS_SUPER_MAGIC 0x414A53 #define EXT2_SUPER_MAGIC 0xEF53 #define EXT3_SUPER_MAGIC 0xEF53 +#define XENFS_SUPER_MAGIC 0xabba1974 #define EXT4_SUPER_MAGIC 0xEF53 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 -- cgit v1.2.2 From 18a82eb9f980b5e02cea651e4ecda26265d98933 Mon Sep 17 00:00:00 2001 From: Pekka J Enberg Date: Wed, 7 Jan 2009 18:07:19 -0800 Subject: ext2: allocate ->s_blockgroup_lock separately As spotted by kmemtrace, struct ext2_sb_info is 17024 bytes on 64-bit which makes it a very bad fit for SLAB allocators. The culprit of the wasted memory is ->s_blockgroup_lock which can be as big as 16 KB when NR_CPUS >= 32. To fix that, allocate ->s_blockgroup_lock, which fits nicely in a order 2 page in the worst case, separately. This shinks down struct ext2_sb_info enough to fit a 1 KB slab cache so now we allocate 16 KB + 1 KB instead of 32 KB saving 15 KB of memory. Acked-by: Andreas Dilger Signed-off-by: Pekka Enberg Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs_sb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h index dc541f3653d1..1cdb66367c98 100644 --- a/include/linux/ext2_fs_sb.h +++ b/include/linux/ext2_fs_sb.h @@ -101,7 +101,7 @@ struct ext2_sb_info { struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; - struct blockgroup_lock s_blockgroup_lock; + struct blockgroup_lock *s_blockgroup_lock; /* root of the per fs reservation window tree */ spinlock_t s_rsv_window_lock; struct rb_root s_rsv_window_root; @@ -111,7 +111,7 @@ struct ext2_sb_info { static inline spinlock_t * sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group) { - return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); + return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); } #endif /* _LINUX_EXT2_FS_SB */ -- cgit v1.2.2 From 0e090f1e05a563cc9acdda442767176bf1616001 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:20 -0800 Subject: ext2: don't inherit inappropriate inode flags from parent At present BTREE/INDEX is the only flag that new ext2 inodes do NOT inherit from their parent. In addition prevent the flags DIRTY, ECOMPR, INDEX, IMAGIC and TOPDIR from being inherited. List inheritable flags explicitly to prevent future flags from accidentally being inherited. This fixes the TOPDIR flag inheritance bug reported at http://bugzilla.kernel.org/show_bug.cgi?id=9866. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index 78c775a83f7c..c3a051819363 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -194,6 +194,13 @@ struct ext2_group_desc #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ #define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ +/* Flags that should be inherited by new inodes from their parent. */ +#define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\ + EXT2_SYNC_FL | EXT2_IMMUTABLE_FL | EXT2_APPEND_FL |\ + EXT2_NODUMP_FL | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL|\ + EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\ + EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL) + /* * ioctl commands */ -- cgit v1.2.2 From ef8b646183868b2d042fa6cde0eef2a31263ff85 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:21 -0800 Subject: ext2: tighten restrictions on inode flags At the moment there are few restrictions on which flags may be set on which inodes. Specifically DIRSYNC may only be set on directories and IMMUTABLE and APPEND may not be set on links. Tighten that to disallow TOPDIR being set on non-directories and only NODUMP and NOATIME to be set on non-regular file, non-directories. Introduces a flags masking function which masks flags based on mode and use it during inode creation and when flags are set via the ioctl to facilitate future consistency. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index c3a051819363..121720d74e15 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -201,6 +201,23 @@ struct ext2_group_desc EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\ EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL) +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define EXT2_REG_FLMASK (~(EXT2_DIRSYNC_FL | EXT2_TOPDIR_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define EXT2_OTHER_FLMASK (EXT2_NODUMP_FL | EXT2_NOATIME_FL) + +/* Mask out flags that are inappropriate for the given type of inode. */ +static inline __u32 ext2_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & EXT2_REG_FLMASK; + else + return flags & EXT2_OTHER_FLMASK; +} + /* * ioctl commands */ -- cgit v1.2.2 From f420d4dc4272fd223986762df2ad06056ddebada Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 7 Jan 2009 18:07:24 -0800 Subject: jbd: improve fsync batching There is a flaw with the way jbd handles fsync batching. If we fsync() a file and we were not the last person to run fsync() on this fs then we automatically sleep for 1 jiffie in order to wait for new writers to join into the transaction before forcing the commit. The problem with this is that with really fast storage (ie a Clariion) the time it takes to commit a transaction to disk is way faster than 1 jiffie in most cases, so sleeping means waiting longer with nothing to do than if we just committed the transaction and kept going. Ric Wheeler noticed this when using fs_mark with more than 1 thread, the throughput would plummet as he added more threads. This patch attempts to fix this problem by recording the average time in nanoseconds that it takes to commit a transaction to disk, and what time we started the transaction. If we run an fsync() and we have been running for less time than it takes to commit the transaction to disk, we sleep for the delta amount of time and then commit to disk. We acheive sub-jiffie sleeping using schedule_hrtimeout. This means that the wait time is auto-tuned to the speed of the underlying disk, instead of having this static timeout. I weighted the average according to somebody's comments (Andreas Dilger I think) in order to help normalize random outliers where we take way longer or way less time to commit than the average. I also have a min() check in there to make sure we don't sleep longer than a jiffie in case our storage is super slow, this was requested by Andrew. I unfortunately do not have access to a Clariion, so I had to use a ramdisk to represent a super fast array. I tested with a SATA drive with barrier=1 to make sure there was no regression with local disks, I tested with a 4 way multipathed Apple Xserve RAID array and of course the ramdisk. I ran the following command fs_mark -d /mnt/ext3-test -s 4096 -n 2000 -D 64 -t $i where $i was 2, 4, 8, 16 and 32. I mkfs'ed the fs each time. Here are my results type threads with patch without patch sata 2 24.6 26.3 sata 4 49.2 48.1 sata 8 70.1 67.0 sata 16 104.0 94.1 sata 32 153.6 142.7 xserve 2 246.4 222.0 xserve 4 480.0 440.8 xserve 8 829.5 730.8 xserve 16 1172.7 1026.9 xserve 32 1816.3 1650.5 ramdisk 2 2538.3 1745.6 ramdisk 4 2942.3 661.9 ramdisk 8 2882.5 999.8 ramdisk 16 2738.7 1801.9 ramdisk 32 2541.9 2394.0 Signed-off-by: Josef Bacik Cc: Andreas Dilger Cc: Arjan van de Ven Cc: Ric Wheeler Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/jbd.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 346e2b80be7d..6384b19efe64 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -542,6 +542,11 @@ struct transaction_s */ unsigned long t_expires; + /* + * When this transaction started, in nanoseconds [no locking] + */ + ktime_t t_start_time; + /* * How many handles used this transaction? [t_handle_lock] */ @@ -798,8 +803,18 @@ struct journal_s struct buffer_head **j_wbuf; int j_wbufsize; + /* + * this is the pid of the last person to run a synchronous operation + * through the journal. + */ pid_t j_last_sync_writer; + /* + * the average amount of time in nanoseconds it takes to commit a + * transaction to the disk. [j_state_lock] + */ + u64 j_average_commit_time; + /* * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here -- cgit v1.2.2 From 5df096d67ec2b6578518caed7d57317a4b807aa1 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 7 Jan 2009 18:07:25 -0800 Subject: ext3: allocate ->s_blockgroup_lock separately As spotted by kmemtrace, struct ext3_sb_info is 17152 bytes on 64-bit which makes it a very bad fit for SLAB allocators. The culprit of the wasted memory is ->s_blockgroup_lock which can be as big as 16 KB when NR_CPUS >= 32. To fix that, allocate ->s_blockgroup_lock, which fits nicely in a order 2 page in the worst case, separately. This shinks down struct ext3_sb_info enough to fit a 1 KB slab cache so now we allocate 16 KB + 1 KB instead of 32 KB saving 15 KB of memory. Acked-by: Andreas Dilger Signed-off-by: Pekka Enberg Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs_sb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h index e024e38248ff..76fdc0f4b028 100644 --- a/include/linux/ext3_fs_sb.h +++ b/include/linux/ext3_fs_sb.h @@ -60,7 +60,7 @@ struct ext3_sb_info { struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; - struct blockgroup_lock s_blockgroup_lock; + struct blockgroup_lock *s_blockgroup_lock; /* root of the per fs reservation window tree */ spinlock_t s_rsv_window_lock; @@ -86,7 +86,7 @@ struct ext3_sb_info { static inline spinlock_t * sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group) { - return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); + return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); } #endif /* _LINUX_EXT3_FS_SB */ -- cgit v1.2.2 From 2e8671cb566da993425d324fc355af31edc6e7f1 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:26 -0800 Subject: ext3: don't inherit inappropriate inode flags from parent At present INDEX is the only flag that new ext3 inodes do NOT inherit from their parent. In addition prevent the flags DIRTY, ECOMPR, IMAGIC and TOPDIR from being inherited. List inheritable flags explicitly to prevent future flags from accidentally being inherited. This fixes the TOPDIR flag inheritance bug reported at http://bugzilla.kernel.org/show_bug.cgi?id=9866. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index d14f02918483..b745619a9b8e 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -178,6 +178,13 @@ struct ext3_group_desc #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +/* Flags that should be inherited by new inodes from their parent. */ +#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\ + EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\ + EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\ + EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\ + EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL) + /* * Inode dynamic state flags */ -- cgit v1.2.2 From 04143e2fb9d512c21e1dcfb561dbb0445dcfdc8c Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:26 -0800 Subject: ext3: tighten restrictions on inode flags At the moment there are few restrictions on which flags may be set on which inodes. Specifically DIRSYNC may only be set on directories and IMMUTABLE and APPEND may not be set on links. Tighten that to disallow TOPDIR being set on non-directories and only NODUMP and NOATIME to be set on non-regular file, non-directories. Introduces a flags masking function which masks flags based on mode and use it during inode creation and when flags are set via the ioctl to facilitate future consistency. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index b745619a9b8e..d76800f6ecf0 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -185,6 +185,23 @@ struct ext3_group_desc EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\ EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL) +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL) + +/* Mask out flags that are inappropriate for the given type of inode. */ +static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & EXT3_REG_FLMASK; + else + return flags & EXT3_OTHER_FLMASK; +} + /* * Inode dynamic state flags */ -- cgit v1.2.2 From b2aa30f7bb381e04c93eed106089ba55553955f1 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 7 Jan 2009 18:07:37 -0800 Subject: cgroups: don't put struct cgroupfs_root protected by RCU We don't access struct cgroupfs_root in fast path, so we should not put struct cgroupfs_root protected by RCU But the comment in struct cgroup_subsys.root confuse us. struct cgroup_subsys.root is used in these places: 1 find_css_set(): if (ss->root->subsys_list.next == &ss->sibling) 2 rebind_subsystems(): if (ss->root != &rootnode) rcu_assign_pointer(ss->root, root); rcu_assign_pointer(subsys[i]->root, &rootnode); 3 cgroup_has_css_refs(): if (ss->root != cgrp->root) 4 cgroup_init_subsys(): ss->root = &rootnode; 5 proc_cgroupstats_show(): ss->name, ss->root->subsys_bits, ss->root->number_of_cgroups, !ss->disabled); 6 cgroup_clone(): root = subsys->root; if ((root != subsys->root) || All these place we have held cgroup_lock() or we don't dereference to struct cgroupfs_root. It's means wo don't need RCU when use struct cgroup_subsys.root, and we should not put struct cgroupfs_root protected by RCU. Signed-off-by: Lai Jiangshan Reviewed-by: Paul Menage Cc: KAMEZAWA Hiroyuki Cc: Pavel Emelyanov Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 08b78c09b09a..f68dfd8dd53a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -337,7 +337,6 @@ struct cgroup_subsys { #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; - /* Protected by RCU */ struct cgroupfs_root *root; struct list_head sibling; -- cgit v1.2.2 From a47295e6bc42ad35f9c15ac66f598aa24debd4e2 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 7 Jan 2009 18:07:44 -0800 Subject: cgroups: make cgroup_path() RCU-safe Fix races between /proc/sched_debug by freeing cgroup objects via an RCU callback. Thus any cgroup reference obtained from an RCU-safe source will remain valid during the RCU section. Since dentries are also RCU-safe, this allows us to traverse up the tree safely. Additionally, make cgroup_path() check for a NULL cgrp->dentry to avoid trying to report a path for a partially-created cgroup. [lizf@cn.fujitsu.com: call deactive_super() in cgroup_diput()] Signed-off-by: Paul Menage Reviewed-by: Li Zefan Tested-by: Li Zefan Cc: Peter Zijlstra Signed-off-by: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f68dfd8dd53a..73d1c730c3c4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -116,7 +116,7 @@ struct cgroup { struct list_head children; /* my children */ struct cgroup *parent; /* my parent */ - struct dentry *dentry; /* cgroup fs entry */ + struct dentry *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; @@ -145,6 +145,9 @@ struct cgroup { int pids_use_count; /* Length of the current tasks_pids array */ int pids_length; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; }; /* A css_set is a structure holding pointers to a set of -- cgit v1.2.2 From 7a81b88cb53e335ff7d019e6398c95792c817d93 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:48 -0800 Subject: memcg: introduce charge-commit-cancel style of functions There is a small race in do_swap_page(). When the page swapped-in is charged, the mapcount can be greater than 0. But, at the same time some process (shares it ) call unmap and make mapcount 1->0 and the page is uncharged. CPUA CPUB mapcount == 1. (1) charge if mapcount==0 zap_pte_range() (2) mapcount 1 => 0. (3) uncharge(). (success) (4) set page's rmap() mapcount 0=>1 Then, this swap page's account is leaked. For fixing this, I added a new interface. - charge account to res_counter by PAGE_SIZE and try to free pages if necessary. - commit register page_cgroup and add to LRU if necessary. - cancel uncharge PAGE_SIZE because of do_swap_page failure. CPUA (1) charge (always) (2) set page's rmap (mapcount > 0) (3) commit charge was necessary or not after set_pte(). This protocol uses PCG_USED bit on page_cgroup for avoiding over accounting. Usual mem_cgroup_charge_common() does charge -> commit at a time. And this patch also adds following function to clarify all charges. - mem_cgroup_newpage_charge() ....replacement for mem_cgroup_charge() called against newly allocated anon pages. - mem_cgroup_charge_migrate_fixup() called only from remove_migration_ptes(). we'll have to rewrite this later.(this patch just keeps old behavior) This function will be removed by additional patch to make migration clearer. Good for clarifying "what we do" Then, we have 4 following charge points. - newpage - swap-in - add-to-cache. - migration. [akpm@linux-foundation.org: add missing inline directives to stubs] Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Daisuke Nishimura Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1fbe14d39521..c592f315cd02 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -27,8 +27,17 @@ struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, +extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +extern int mem_cgroup_charge_migrate_fixup(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask); +/* for swap handling */ +extern int mem_cgroup_try_charge(struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **ptr); +extern void mem_cgroup_commit_charge_swapin(struct page *page, + struct mem_cgroup *ptr); +extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); + extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); @@ -71,7 +80,9 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, #else /* CONFIG_CGROUP_MEM_RES_CTLR */ -static inline int mem_cgroup_charge(struct page *page, +struct mem_cgroup; + +static inline int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return 0; @@ -83,6 +94,27 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } +static inline int mem_cgroup_charge_migrate_fixup(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) +{ + return 0; +} + +static inline int mem_cgroup_try_charge(struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **ptr) +{ + return 0; +} + +static inline void mem_cgroup_commit_charge_swapin(struct page *page, + struct mem_cgroup *ptr) +{ +} + +static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr) +{ +} + static inline void mem_cgroup_uncharge_page(struct page *page) { } -- cgit v1.2.2 From 01b1ae63c2270cbacfd43fea94578c17950eb548 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:50 -0800 Subject: memcg: simple migration handling Now, management of "charge" under page migration is done under following manner. (Assume migrate page contents from oldpage to newpage) before - "newpage" is charged before migration. at success. - "oldpage" is uncharged at somewhere(unmap, radix-tree-replace) at failure - "newpage" is uncharged. - "oldpage" is charged if necessary (*1) But (*1) is not reliable....because of GFP_ATOMIC. This patch tries to change behavior as following by charge/commit/cancel ops. before - charge PAGE_SIZE (no target page) success - commit charge against "newpage". failure - commit charge against "oldpage". (PCG_USED bit works effectively to avoid double-counting) - if "oldpage" is obsolete, cancel charge of PAGE_SIZE. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Daisuke Nishimura Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c592f315cd02..b095f5f6ecf7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -29,8 +29,6 @@ struct mm_struct; extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -extern int mem_cgroup_charge_migrate_fixup(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask); /* for swap handling */ extern int mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **ptr); @@ -60,8 +58,9 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); ((cgroup) == mem_cgroup_from_task((mm)->owner)) extern int -mem_cgroup_prepare_migration(struct page *page, struct page *newpage); -extern void mem_cgroup_end_migration(struct page *page); +mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr); +extern void mem_cgroup_end_migration(struct mem_cgroup *mem, + struct page *oldpage, struct page *newpage); /* * For memory reclaim. @@ -94,12 +93,6 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } -static inline int mem_cgroup_charge_migrate_fixup(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask) -{ - return 0; -} - static inline int mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **ptr) { @@ -144,12 +137,14 @@ static inline int task_in_mem_cgroup(struct task_struct *task, } static inline int -mem_cgroup_prepare_migration(struct page *page, struct page *newpage) +mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) { return 0; } -static inline void mem_cgroup_end_migration(struct page *page) +static inline void mem_cgroup_end_migration(struct mem_cgroup *mem, + struct page *oldpage, + struct page *newpage) { } -- cgit v1.2.2 From d13d144309d2e5a3e6ad978b16c1d0226ddc9231 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:56 -0800 Subject: memcg: handle swap caches SwapCache support for memory resource controller (memcg) Before mem+swap controller, memcg itself should handle SwapCache in proper way. This is cut-out from it. In current memcg, SwapCache is just leaked and the user can create tons of SwapCache. This is a leak of account and should be handled. SwapCache accounting is done as following. charge (anon) - charged when it's mapped. (because of readahead, charge at add_to_swap_cache() is not sane) uncharge (anon) - uncharged when it's dropped from swapcache and fully unmapped. means it's not uncharged at unmap. Note: delete from swap cache at swap-in is done after rmap information is established. charge (shmem) - charged at swap-in. this prevents charge at add_to_page_cache(). uncharge (shmem) - uncharged when it's dropped from swapcache and not on shmem's radix-tree. at migration, check against 'old page' is modified to handle shmem. Comparing to the old version discussed (and caused troubles), we have advantages of - PCG_USED bit. - simple migrating handling. So, situation is much easier than several months ago, maybe. [hugh@veritas.com: memcg: handle swap caches build fix] Reviewed-by: Daisuke Nishimura Tested-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 91dee50fe260..f8f3907533f0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -333,6 +333,22 @@ static inline void disable_swap_token(void) put_swap_token(swap_token_mm); } +#ifdef CONFIG_CGROUP_MEM_RES_CTLR +extern int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked); +extern void mem_cgroup_uncharge_swapcache(struct page *page); +#else +static inline +int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked) +{ + return 0; +} +static inline void mem_cgroup_uncharge_swapcache(struct page *page) +{ +} +#endif + #else /* CONFIG_SWAP */ #define nr_swap_pages 0L @@ -409,6 +425,12 @@ static inline swp_entry_t get_swap_page(void) #define has_swap_token(x) 0 #define disable_swap_token() do { } while(0) +static inline int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked) +{ + return 0; +} + #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ -- cgit v1.2.2 From c077719be8e9e6b55702117513d1b5f41d80404a Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:57 -0800 Subject: memcg: mem+swap controller Kconfig Config and control variable for mem+swap controller. This patch adds CONFIG_CGROUP_MEM_RES_CTLR_SWAP (memory resource controller swap extension.) For accounting swap, it's obvious that we have to use additional memory to remember "who uses swap". This adds more overhead. So, it's better to offer "choice" to users. This patch adds 2 choices. This patch adds 2 parameters to enable swap extension or not. - CONFIG - boot option Reviewed-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b095f5f6ecf7..41b46cc9d1f1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -77,6 +77,9 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +extern int do_swap_account; +#endif #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; -- cgit v1.2.2 From 27a7faa0779dd13729196c1a818c294f44bbd1ee Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:58 -0800 Subject: memcg: swap cgroup for remembering usage For accounting swap, we need a record per swap entry, at least. This patch adds following function. - swap_cgroup_swapon() .... called from swapon - swap_cgroup_swapoff() ... called at the end of swapoff. - swap_cgroup_record() .... record information of swap entry. - swap_cgroup_lookup() .... lookup information of swap entry. This patch just implements "how to record information". No actual method for limit the usage of swap. These routine uses flat table to record and lookup. "wise" lookup system like radix-tree requires requires memory allocation at new records but swap-out is usually called under memory shortage (or memcg hits limit.) So, I used static allocation. (maybe dynamic allocation is not very hard but it adds additional memory allocation in memory shortage path.) Note1: In this, we use pointer to record information and this means 8bytes per swap entry. I think we can reduce this when we create "id of cgroup" in the range of 0-65535 or 0-255. Reported-by: Daisuke Nishimura Reviewed-by: Daisuke Nishimura Tested-by: Daisuke Nishimura Reported-by: Hugh Dickins Reported-by: Balbir Singh Reported-by: Andrew Morton Signed-off-by: KAMEZAWA Hiroyuki Cc: Pavel Emelianov Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 1e6d34bfa094..d754b2dfbf2d 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -104,5 +104,40 @@ static inline void page_cgroup_init(void) { } +#endif + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#include +extern struct mem_cgroup * +swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem); +extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent); +extern int swap_cgroup_swapon(int type, unsigned long max_pages); +extern void swap_cgroup_swapoff(int type); +#else +#include + +static inline +struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) +{ + return NULL; +} + +static inline +struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) +{ + return NULL; +} + +static inline int +swap_cgroup_swapon(int type, unsigned long max_pages) +{ + return 0; +} + +static inline void swap_cgroup_swapoff(int type) +{ + return; +} + #endif #endif -- cgit v1.2.2 From 8c7c6e34a1256a5082d38c8e9bd1474476912715 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:00 -0800 Subject: memcg: mem+swap controller core This patch implements per cgroup limit for usage of memory+swap. However there are SwapCache, double counting of swap-cache and swap-entry is avoided. Mem+Swap controller works as following. - memory usage is limited by memory.limit_in_bytes. - memory + swap usage is limited by memory.memsw_limit_in_bytes. This has following benefits. - A user can limit total resource usage of mem+swap. Without this, because memory resource controller doesn't take care of usage of swap, a process can exhaust all the swap (by memory leak.) We can avoid this case. And Swap is shared resource but it cannot be reclaimed (goes back to memory) until it's used. This characteristic can be trouble when the memory is divided into some parts by cpuset or memcg. Assume group A and group B. After some application executes, the system can be.. Group A -- very large free memory space but occupy 99% of swap. Group B -- under memory shortage but cannot use swap...it's nearly full. Ability to set appropriate swap limit for each group is required. Maybe someone wonder "why not swap but mem+swap ?" - The global LRU(kswapd) can swap out arbitrary pages. Swap-out means to move account from memory to swap...there is no change in usage of mem+swap. In other words, when we want to limit the usage of swap without affecting global LRU, mem+swap limit is better than just limiting swap. Accounting target information is stored in swap_cgroup which is per swap entry record. Charge is done as following. map - charge page and memsw. unmap - uncharge page/memsw if not SwapCache. swap-out (__delete_from_swap_cache) - uncharge page - record mem_cgroup information to swap_cgroup. swap-in (do_swap_page) - charged as page and memsw. record in swap_cgroup is cleared. memsw accounting is decremented. swap-free (swap_free()) - if swap entry is freed, memsw is uncharged by PAGE_SIZE. There are people work under never-swap environments and consider swap as something bad. For such people, this mem+swap controller extension is just an overhead. This overhead is avoided by config or boot option. (see Kconfig. detail is not in this patch.) TODO: - maybe more optimization can be don in swap-in path. (but not very safe.) But we just do simple accounting at this stage. [nishimura@mxp.nes.nec.co.jp: make resize limit hold mutex] [hugh@veritas.com: memswap controller core swapcache fixes] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Daisuke Nishimura Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++-- include/linux/swap.h | 14 +++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 41b46cc9d1f1..ca51ac72d6c0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -32,6 +32,8 @@ extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, /* for swap handling */ extern int mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **ptr); +extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, + struct page *page, gfp_t mask, struct mem_cgroup **ptr); extern void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr); extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); @@ -80,7 +82,6 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; #endif - #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -97,7 +98,13 @@ static inline int mem_cgroup_cache_charge(struct page *page, } static inline int mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **ptr) + gfp_t gfp_mask, struct mem_cgroup **ptr) +{ + return 0; +} + +static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm, + struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr) { return 0; } diff --git a/include/linux/swap.h b/include/linux/swap.h index f8f3907533f0..be938ce4895a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -214,7 +214,7 @@ static inline void lru_cache_add_active_file(struct page *page) extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, - gfp_t gfp_mask); + gfp_t gfp_mask, bool noswap); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; @@ -336,7 +336,7 @@ static inline void disable_swap_token(void) #ifdef CONFIG_CGROUP_MEM_RES_CTLR extern int mem_cgroup_cache_charge_swapin(struct page *page, struct mm_struct *mm, gfp_t mask, bool locked); -extern void mem_cgroup_uncharge_swapcache(struct page *page); +extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent); #else static inline int mem_cgroup_cache_charge_swapin(struct page *page, @@ -344,7 +344,15 @@ int mem_cgroup_cache_charge_swapin(struct page *page, { return 0; } -static inline void mem_cgroup_uncharge_swapcache(struct page *page) +static inline void +mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) +{ +} +#endif +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +extern void mem_cgroup_uncharge_swap(swp_entry_t ent); +#else +static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) { } #endif -- cgit v1.2.2 From 08e552c69c6930d64722de3ec18c51844d06ee28 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:01 -0800 Subject: memcg: synchronized LRU A big patch for changing memcg's LRU semantics. Now, - page_cgroup is linked to mem_cgroup's its own LRU (per zone). - LRU of page_cgroup is not synchronous with global LRU. - page and page_cgroup is one-to-one and statically allocated. - To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as - lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc); - SwapCache is handled. And, when we handle LRU list of page_cgroup, we do following. pc = lookup_page_cgroup(page); lock_page_cgroup(pc); .....................(1) mz = page_cgroup_zoneinfo(pc); spin_lock(&mz->lru_lock); .....add to LRU spin_unlock(&mz->lru_lock); unlock_page_cgroup(pc); But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock. So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct. This is a trial to remove this dirty nesting of locks. This patch changes mz->lru_lock to be zone->lru_lock. Then, above sequence will be written as spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU mem_cgroup_add/remove/etc_lru() { pc = lookup_page_cgroup(page); mz = page_cgroup_zoneinfo(pc); if (PageCgroupUsed(pc)) { ....add to LRU } spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU This is much simpler. (*) We're safe even if we don't take lock_page_cgroup(pc). Because.. 1. When pc->mem_cgroup can be modified. - at charge. - at account_move(). 2. at charge the PCG_USED bit is not set before pc->mem_cgroup is fixed. 3. at account_move() the page is isolated and not on LRU. Pros. - easy for maintenance. - memcg can make use of laziness of pagevec. - we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup. - LRU status of memcg will be synchronized with global LRU's one. - # of locks are reduced. - account_move() is simplified very much. Cons. - may increase cost of LRU rotation. (no impact if memcg is not configured.) Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 29 +++++++++++++++++++++++++++-- include/linux/mm_inline.h | 3 +++ include/linux/page_cgroup.h | 17 ----------------- 3 files changed, 30 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ca51ac72d6c0..32c07b1852d6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -40,7 +40,12 @@ extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); +extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru); +extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru); +extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru); +extern void mem_cgroup_del_lru(struct page *page); +extern void mem_cgroup_move_lists(struct page *page, + enum lru_list from, enum lru_list to); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); @@ -131,7 +136,27 @@ static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) return 0; } -static inline void mem_cgroup_move_lists(struct page *page, bool active) +static inline void mem_cgroup_add_lru_list(struct page *page, int lru) +{ +} + +static inline void mem_cgroup_del_lru_list(struct page *page, int lru) +{ + return ; +} + +static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru) +{ + return ; +} + +static inline void mem_cgroup_del_lru(struct page *page) +{ + return ; +} + +static inline void +mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to) { } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index c948350c378e..37ef13d0f01e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -28,6 +28,7 @@ add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) { list_add(&page->lru, &zone->lru[l].list); __inc_zone_state(zone, NR_LRU_BASE + l); + mem_cgroup_add_lru_list(page, l); } static inline void @@ -35,6 +36,7 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) { list_del(&page->lru); __dec_zone_state(zone, NR_LRU_BASE + l); + mem_cgroup_del_lru_list(page, l); } static inline void @@ -54,6 +56,7 @@ del_page_from_lru(struct zone *zone, struct page *page) l += page_is_file_cache(page); } __dec_zone_state(zone, NR_LRU_BASE + l); + mem_cgroup_del_lru_list(page, l); } /** diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index d754b2dfbf2d..602cc1fdee90 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -26,10 +26,6 @@ enum { PCG_LOCK, /* page cgroup is locked */ PCG_CACHE, /* charged as cache */ PCG_USED, /* this object is in use. */ - /* flags for LRU placement */ - PCG_ACTIVE, /* page is active in this cgroup */ - PCG_FILE, /* page is file system backed */ - PCG_UNEVICTABLE, /* page is unevictableable */ }; #define TESTPCGFLAG(uname, lname) \ @@ -50,19 +46,6 @@ TESTPCGFLAG(Cache, CACHE) TESTPCGFLAG(Used, USED) CLEARPCGFLAG(Used, USED) -/* LRU management flags (from global-lru definition) */ -TESTPCGFLAG(File, FILE) -SETPCGFLAG(File, FILE) -CLEARPCGFLAG(File, FILE) - -TESTPCGFLAG(Active, ACTIVE) -SETPCGFLAG(Active, ACTIVE) -CLEARPCGFLAG(Active, ACTIVE) - -TESTPCGFLAG(Unevictable, UNEVICTABLE) -SETPCGFLAG(Unevictable, UNEVICTABLE) -CLEARPCGFLAG(Unevictable, UNEVICTABLE) - static inline int page_cgroup_nid(struct page_cgroup *pc) { return page_to_nid(pc->page); -- cgit v1.2.2 From f8d665422603ee1b8ed04dcad4242f14d623c941 Mon Sep 17 00:00:00 2001 From: Hirokazu Takahashi Date: Wed, 7 Jan 2009 18:08:02 -0800 Subject: memcg: add mem_cgroup_disabled() We check mem_cgroup is disabled or not by checking mem_cgroup_subsys.disabled. I think it has more references than expected, now. replacing if (mem_cgroup_subsys.disabled) with if (mem_cgroup_disabled()) give us good look, I think. [kamezawa.hiroyu@jp.fujitsu.com: fix typo] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 32c07b1852d6..472efd09118c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -19,7 +19,7 @@ #ifndef _LINUX_MEMCONTROL_H #define _LINUX_MEMCONTROL_H - +#include struct mem_cgroup; struct page_cgroup; struct page; @@ -87,6 +87,14 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; #endif + +static inline bool mem_cgroup_disabled(void) +{ + if (mem_cgroup_subsys.disabled) + return true; + return false; +} + #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -214,6 +222,11 @@ static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, { return 0; } + +static inline bool mem_cgroup_disabled(void) +{ + return true; +} #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.2 From 28dbc4b6a01fb579a9441c7b81e3d3413dc452df Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 7 Jan 2009 18:08:05 -0800 Subject: memcg: memory cgroup resource counters for hierarchy Add support for building hierarchies in resource counters. Cgroups allows us to build a deep hierarchy, but we currently don't link the resource counters belonging to the memory controller control groups, in the same fashion as the corresponding cgroup entries in the cgroup hierarchy. This patch provides the infrastructure for resource counters that have the same hiearchy as their cgroup counter parts. These set of patches are based on the resource counter hiearchy patches posted by Pavel Emelianov. NOTE: Building hiearchies is expensive, deeper hierarchies imply charging the all the way up to the root. It is known that hiearchies are expensive, so the user needs to be careful and aware of the trade-offs before creating very deep ones. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Balbir Singh Cc: YAMAMOTO Takashi Cc: Paul Menage Cc: Li Zefan Cc: David Rientjes Cc: Pavel Emelianov Cc: Dhaval Giani Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 271c1c2c9f6f..dede0a2cfc45 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -43,6 +43,10 @@ struct res_counter { * the routines below consider this to be IRQ-safe */ spinlock_t lock; + /* + * Parent counter, used for hierarchial resource accounting + */ + struct res_counter *parent; }; /** @@ -87,7 +91,7 @@ enum { * helpers for accounting */ -void res_counter_init(struct res_counter *counter); +void res_counter_init(struct res_counter *counter, struct res_counter *parent); /* * charge - try to consume more resource. @@ -103,7 +107,7 @@ void res_counter_init(struct res_counter *counter); int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val); int __must_check res_counter_charge(struct res_counter *counter, - unsigned long val); + unsigned long val, struct res_counter **limit_fail_at); /* * uncharge - tell that some portion of the resource is released -- cgit v1.2.2 From 2e4d40915fb85207fe48cfc31201824ec6d7426e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 7 Jan 2009 18:08:07 -0800 Subject: memcontrol: rcu_read_lock() to protect mm_match_cgroup() mm_match_cgroup() calls cgroup_subsys_state(). We must use rcu_read_lock() to protect cgroup_subsys_state(). Signed-off-by: Lai Jiangshan Cc: Paul Menage Reviewed-by: KAMEZAWA Hiroyuki Cc: Pavel Emelyanov Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 472efd09118c..2de6504e01fb 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -61,8 +61,15 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); -#define mm_match_cgroup(mm, cgroup) \ - ((cgroup) == mem_cgroup_from_task((mm)->owner)) +static inline +int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) +{ + struct mem_cgroup *mem; + rcu_read_lock(); + mem = mem_cgroup_from_task((mm)->owner); + rcu_read_unlock(); + return cgroup == mem; +} extern int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr); -- cgit v1.2.2 From a636b327f731143ccc544b966cfd8de6cb6d72c6 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:08 -0800 Subject: memcg: avoid unnecessary system-wide-oom-killer Current mmtom has new oom function as pagefault_out_of_memory(). It's added for select bad process rathar than killing current. When memcg hit limit and calls OOM at page_fault, this handler called and system-wide-oom handling happens. (means kernel panics if panic_on_oom is true....) To avoid overkill, check memcg's recent behavior before starting system-wide-oom. And this patch also fixes to guarantee "don't accnout against process with TIF_MEMDIE". This is necessary for smooth OOM. [akpm@linux-foundation.org: build fix] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Badari Pulavarty Cc: Jan Blunck Cc: Hirokazu Takahashi Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2de6504e01fb..2fdd1380bf0a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -102,6 +102,8 @@ static inline bool mem_cgroup_disabled(void) return false; } +extern bool mem_cgroup_oom_called(struct task_struct *task); + #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -234,6 +236,11 @@ static inline bool mem_cgroup_disabled(void) { return true; } + +static inline bool mem_cgroup_oom_called(struct task_struct *task) +{ + return false; +} #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.2 From 2c26fdd70c3094fa3e84caf9ef434911933d5477 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:10 -0800 Subject: memcg: revert gfp mask fix My patch, memcg-fix-gfp_mask-of-callers-of-charge.patch changed gfp_mask of callers of charge to be GFP_HIGHUSER_MOVABLE for showing what will happen at memory reclaim. But in recent discussion, it's NACKed because it sounds ugly. This patch is for reverting it and add some clean up to gfp_mask of callers of charge. No behavior change but need review before generating HUNK in deep queue. This patch also adds explanation to meaning of gfp_mask passed to charge functions in memcontrol.h. Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2fdd1380bf0a..59ac95a64508 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -26,6 +26,16 @@ struct page; struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR +/* + * All "charge" functions with gfp_mask should use GFP_KERNEL or + * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't + * alloc memory but reclaims memory from all available zones. So, "where I want + * memory from" bits of gfp_mask has no meaning. So any bits of that field is + * available but adding a rule is better. charge functions' gfp_mask should + * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous + * codes. + * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.) + */ extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -- cgit v1.2.2 From f89eb90e33fd4e4e0cc1a6d20afd63c5a561885a Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:14 -0800 Subject: inactive_anon_is_low: move to vmscan The inactive_anon_is_low() is called only vmscan. Then it can move to vmscan.c This patch doesn't have any functional change. Reviewd-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 37ef13d0f01e..7fbb97267556 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -81,23 +81,4 @@ static inline enum lru_list page_lru(struct page *page) return lru; } -/** - * inactive_anon_is_low - check if anonymous pages need to be deactivated - * @zone: zone to check - * - * Returns true if the zone does not have enough inactive anon pages, - * meaning some active anon pages need to be deactivated. - */ -static inline int inactive_anon_is_low(struct zone *zone) -{ - unsigned long active, inactive; - - active = zone_page_state(zone, NR_ACTIVE_ANON); - inactive = zone_page_state(zone, NR_INACTIVE_ANON); - - if (inactive * zone->inactive_ratio < active) - return 1; - - return 0; -} #endif -- cgit v1.2.2 From 6e9015716ae9b59e9635d692fddfcfb9582c146c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:15 -0800 Subject: mm: introduce zone_reclaim struct Add zone_reclam_stat struct for later enhancement. A later patch uses this. This patch doesn't any behavior change (yet). Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 35a7b5e19465..09c14e213b63 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -263,6 +263,19 @@ enum zone_type { #error ZONES_SHIFT -- too many zones configured adjust calculation #endif +struct zone_reclaim_stat { + /* + * The pageout code in vmscan.c keeps track of how many of the + * mem/swap backed and file backed pages are refeferenced. + * The higher the rotated/scanned ratio, the more valuable + * that cache is. + * + * The anon LRU stats live in [0], file LRU stats in [1] + */ + unsigned long recent_rotated[2]; + unsigned long recent_scanned[2]; +}; + struct zone { /* Fields commonly accessed by the page allocator */ unsigned long pages_min, pages_low, pages_high; @@ -315,16 +328,7 @@ struct zone { unsigned long nr_scan; } lru[NR_LRU_LISTS]; - /* - * The pageout code in vmscan.c keeps track of how many of the - * mem/swap backed and file backed pages are refeferenced. - * The higher the rotated/scanned ratio, the more valuable - * that cache is. - * - * The anon LRU stats live in [0], file LRU stats in [1] - */ - unsigned long recent_rotated[2]; - unsigned long recent_scanned[2]; + struct zone_reclaim_stat reclaim_stat; unsigned long pages_scanned; /* since last reclaim */ unsigned long flags; /* zone flags, see below */ -- cgit v1.2.2 From 14797e2363c2b2f1ce139fd1c5a215e4e05aa1d9 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:18 -0800 Subject: memcg: add inactive_anon_is_low() The inactive_anon_is_low() is key component of active/inactive anon balancing on reclaim. However current inactive_anon_is_low() function only consider global reclaim. Therefore, we need following ugly scan_global_lru() condition. if (lru == LRU_ACTIVE_ANON && (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { shrink_active_list(nr_to_scan, zone, sc, priority, file); return 0; it cause that memcg reclaim always deactivate pages when shrink_list() is called. To make mem_cgroup_inactive_anon_is_low() improve active/inactive anon balancing of memcgroup. Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Cyrill Gorcunov Cc: "Pekka Enberg" Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 59ac95a64508..aad9377c9828 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -100,6 +100,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, + struct zone *zone); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -251,6 +253,13 @@ static inline bool mem_cgroup_oom_called(struct task_struct *task) { return false; } + +static inline int +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +{ + return 1; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.2 From a3d8e0549d913e30968fa02e505dfe02c0a23e0d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:19 -0800 Subject: memcg: add mem_cgroup_zone_nr_pages() Introduce mem_cgroup_zone_nr_pages(). It is called by zone_nr_pages() helper function. This patch doesn't have any behavior change. Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Acked-by: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index aad9377c9828..b1defd6a2783 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -102,6 +102,9 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone); +unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, + struct zone *zone, + enum lru_list lru); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -260,6 +263,14 @@ mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) return 1; } +static inline unsigned long +mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, + enum lru_list lru) +{ + return 0; +} + + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.2 From 3e2f41f1f64744f7942980d93cc93dd3e5924560 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:20 -0800 Subject: memcg: add zone_reclaim_stat Introduce mem_cgroup_per_zone::reclaim_stat member and its statics collecting function. Now, get_scan_ratio() can calculate correct value on memcg reclaim. [hugh@veritas.com: avoid reclaim_stat oops when disabled] Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b1defd6a2783..36b8ebb39b82 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -105,6 +105,10 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, enum lru_list lru); +struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, + struct zone *zone); +struct zone_reclaim_stat* +mem_cgroup_get_reclaim_stat_from_page(struct page *page); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -271,6 +275,18 @@ mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, } +static inline struct zone_reclaim_stat* +mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone) +{ + return NULL; +} + +static inline struct zone_reclaim_stat* +mem_cgroup_get_reclaim_stat_from_page(struct page *page) +{ + return NULL; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.2 From 9439c1c95b5c25b8031b2a7eb7e1590eb84be7f5 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:21 -0800 Subject: memcg: remove mem_cgroup_cal_reclaim() Now, get_scan_ratio() return correct value although memcg reclaim. Then, mem_cgroup_calc_reclaim() can be removed. So, memcg reclaim get the same capability of anon/file reclaim balancing as global reclaim now. Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 36b8ebb39b82..8752052da8df 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -97,9 +97,6 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority); extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority); - -extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, - int priority, enum lru_list lru); int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone); unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, @@ -244,13 +241,6 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, { } -static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, - struct zone *zone, int priority, - enum lru_list lru) -{ - return 0; -} - static inline bool mem_cgroup_disabled(void) { return true; -- cgit v1.2.2 From a7885eb8ad465ec9db99ac5b5e6680f0ca8e11c8 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:24 -0800 Subject: memcg: swappiness Currently, /proc/sys/vm/swappiness can change swappiness ratio for global reclaim. However, memcg reclaim doesn't have tuning parameter for itself. In general, the optimal swappiness depend on workload. (e.g. hpc workload need to low swappiness than the others.) Then, per cgroup swappiness improve administrator tunability. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index be938ce4895a..4ccca25d0f05 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -214,7 +214,8 @@ static inline void lru_cache_add_active_file(struct page *page) extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, - gfp_t gfp_mask, bool noswap); + gfp_t gfp_mask, bool noswap, + unsigned int swappiness); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; -- cgit v1.2.2 From c772be939e078afd2505ede7d596a30f8f61de95 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:25 -0800 Subject: memcg: fix calculation of active_ratio Currently, inactive_ratio of memcg is calculated at setting limit. because page_alloc.c does so and current implementation is straightforward porting. However, memcg introduced hierarchy feature recently. In hierarchy restriction, memory limit is not only decided memory.limit_in_bytes of current cgroup, but also parent limit and sibling memory usage. Then, The optimal inactive_ratio is changed frequently. So, everytime calculation is better. Tested-by: KAMEZAWA Hiroyuki Acked-by: KAMEZAWA Hiroyuki Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8752052da8df..056cf82c0e86 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -97,8 +97,7 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority); extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority); -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, - struct zone *zone); +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg); unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, enum lru_list lru); @@ -252,7 +251,7 @@ static inline bool mem_cgroup_oom_called(struct task_struct *task) } static inline int -mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) { return 1; } -- cgit v1.2.2 From a5e924f5f8abf97944e625d74967cc9452cfbce8 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Wed, 7 Jan 2009 18:08:28 -0800 Subject: memcg: remove mem_cgroup_try_charge After previous patch, mem_cgroup_try_charge is not used by anyone, so we can remove it. Signed-off-by: Daisuke Nishimura Acked-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 056cf82c0e86..8ae6ece8c962 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -40,8 +40,6 @@ struct mm_struct; extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); /* for swap handling */ -extern int mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **ptr); extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, gfp_t mask, struct mem_cgroup **ptr); extern void mem_cgroup_commit_charge_swapin(struct page *page, @@ -134,12 +132,6 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } -static inline int mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **ptr) -{ - return 0; -} - static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr) { -- cgit v1.2.2 From b5a84319a4343a0db753436fd8147e61eaafa7ea Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:35 -0800 Subject: memcg: fix shmem's swap accounting Now, you can see following even when swap accounting is enabled. 1. Create Group 01, and 02. 2. allocate a "file" on tmpfs by a task under 01. 3. swap out the "file" (by memory pressure) 4. Read "file" from a task in group 02. 5. the charge of "file" is moved to group 02. This is not ideal behavior. This is because SwapCache which was loaded by read-ahead is not taken into account.. This is a patch to fix shmem's swapcache behavior. - remove mem_cgroup_cache_charge_swapin(). - Add SwapCache handler routine to mem_cgroup_cache_charge(). By this, shmem's file cache is charged at add_to_page_cache() with GFP_NOWAIT. - pass the page of swapcache to shrink_mem_cgroup. Signed-off-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Cc: Balbir Singh Cc: Paul Menage Cc: Li Zefan Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++-- include/linux/swap.h | 8 -------- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8ae6ece8c962..326f45c86530 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -56,7 +56,8 @@ extern void mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); +extern int mem_cgroup_shrink_usage(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, @@ -155,7 +156,8 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page) { } -static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) +static inline int mem_cgroup_shrink_usage(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) { return 0; } diff --git a/include/linux/swap.h b/include/linux/swap.h index 4ccca25d0f05..d30215578877 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -335,16 +335,8 @@ static inline void disable_swap_token(void) } #ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern int mem_cgroup_cache_charge_swapin(struct page *page, - struct mm_struct *mm, gfp_t mask, bool locked); extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent); #else -static inline -int mem_cgroup_cache_charge_swapin(struct page *page, - struct mm_struct *mm, gfp_t mask, bool locked) -{ - return 0; -} static inline void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) { -- cgit v1.2.2 From 999cd8a450f8f93701669a61cac4d3b19eca07e8 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 7 Jan 2009 18:08:36 -0800 Subject: cgroups: add a per-subsystem hierarchy_mutex These patches introduce new locking/refcount support for cgroups to reduce the need for subsystems to call cgroup_lock(). This will ultimately allow the atomicity of cgroup_rmdir() (which was removed recently) to be restored. These three patches give: 1/3 - introduce a per-subsystem hierarchy_mutex which a subsystem can use to prevent changes to its own cgroup tree 2/3 - use hierarchy_mutex in place of calling cgroup_lock() in the memory controller 3/3 - introduce a css_tryget() function similar to the one recently proposed by Kamezawa, but avoiding spurious refcount failures in the event of a race between a css_tryget() and an unsuccessful cgroup_rmdir() Future patches will likely involve: - using hierarchy mutex in place of cgroup_lock() in more subsystems where appropriate - restoring the atomicity of cgroup_rmdir() with respect to cgroup_create() This patch: Add a hierarchy_mutex to the cgroup_subsys object that protects changes to the hierarchy observed by that subsystem. It is taken by the cgroup subsystem (in addition to cgroup_mutex) for the following operations: - linking a cgroup into that subsystem's cgroup tree - unlinking a cgroup from that subsystem's cgroup tree - moving the subsystem to/from a hierarchy (including across the bind() callback) Thus if the subsystem holds its own hierarchy_mutex, it can safely traverse its own hierarchy. Signed-off-by: Paul Menage Tested-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 73d1c730c3c4..ce1c1f34c30c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -340,8 +340,23 @@ struct cgroup_subsys { #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; - struct cgroupfs_root *root; + /* + * Protects sibling/children links of cgroups in this + * hierarchy, plus protects which hierarchy (or none) the + * subsystem is a part of (i.e. root/sibling). To avoid + * potential deadlocks, the following operations should not be + * undertaken while holding any hierarchy_mutex: + * + * - allocating memory + * - initiating hotplug events + */ + struct mutex hierarchy_mutex; + /* + * Link to parent, and list entry in parent's children. + * Protected by this->hierarchy_mutex and cgroup_lock() + */ + struct cgroupfs_root *root; struct list_head sibling; }; -- cgit v1.2.2 From e7c5ec9193d32b9559a3bb8893ceedbda85201ff Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 7 Jan 2009 18:08:38 -0800 Subject: cgroups: add css_tryget() Add css_tryget(), that obtains a counted reference on a CSS. It is used in situations where the caller has a "weak" reference to the CSS, i.e. one that does not protect the cgroup from removal via a reference count, but would instead be cleaned up by a destroy() callback. css_tryget() will return true on success, or false if the cgroup is being removed. This is similar to Kamezawa Hiroyuki's patch from a week or two ago, but with the difference that in the event of css_tryget() racing with a cgroup_rmdir(), css_tryget() will only return false if the cgroup really does get removed. This implementation is done by biasing css->refcnt, so that a refcnt of 1 means "releasable" and 0 means "released or releasing". In the event of a race, css_tryget() distinguishes between "released" and "releasing" by checking for the CSS_REMOVED flag in css->flags. Signed-off-by: Paul Menage Tested-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ce1c1f34c30c..e267e62827bb 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -52,9 +52,9 @@ struct cgroup_subsys_state { * hierarchy structure */ struct cgroup *cgroup; - /* State maintained by the cgroup system to allow - * subsystems to be "busy". Should be accessed via css_get() - * and css_put() */ + /* State maintained by the cgroup system to allow subsystems + * to be "busy". Should be accessed via css_get(), + * css_tryget() and and css_put(). */ atomic_t refcnt; @@ -64,11 +64,14 @@ struct cgroup_subsys_state { /* bits in struct cgroup_subsys_state flags field */ enum { CSS_ROOT, /* This CSS is the root of the subsystem */ + CSS_REMOVED, /* This CSS is dead */ }; /* - * Call css_get() to hold a reference on the cgroup; - * + * Call css_get() to hold a reference on the css; it can be used + * for a reference obtained via: + * - an existing ref-counted reference to the css + * - task->cgroups for a locked task */ static inline void css_get(struct cgroup_subsys_state *css) @@ -77,9 +80,32 @@ static inline void css_get(struct cgroup_subsys_state *css) if (!test_bit(CSS_ROOT, &css->flags)) atomic_inc(&css->refcnt); } + +static inline bool css_is_removed(struct cgroup_subsys_state *css) +{ + return test_bit(CSS_REMOVED, &css->flags); +} + +/* + * Call css_tryget() to take a reference on a css if your existing + * (known-valid) reference isn't already ref-counted. Returns false if + * the css has been destroyed. + */ + +static inline bool css_tryget(struct cgroup_subsys_state *css) +{ + if (test_bit(CSS_ROOT, &css->flags)) + return true; + while (!atomic_inc_not_zero(&css->refcnt)) { + if (test_bit(CSS_REMOVED, &css->flags)) + return false; + } + return true; +} + /* * css_put() should be called to release a reference taken by - * css_get() + * css_get() or css_tryget() */ extern void __css_put(struct cgroup_subsys_state *css); -- cgit v1.2.2 From 6af866af34a96fed24a55979a78b6f73bd4e8e87 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 7 Jan 2009 18:08:45 -0800 Subject: cpuset: remove remaining pointers to cpumask_t Impact: cleanups, use new cpumask API Final trivial cleanups: mainly s/cpumask_t/struct cpumask Note there is a FIXME in generate_sched_domains(). A future patch will change struct cpumask *doms to struct cpumask *doms[]. (I suppose Rusty will do this.) Signed-off-by: Li Zefan Cc: Ingo Molnar Cc: Rusty Russell Acked-by: Mike Travis Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 51ea2bdea0f9..90c6074a36ca 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -20,8 +20,9 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ extern int cpuset_init_early(void); extern int cpuset_init(void); extern void cpuset_init_smp(void); -extern void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask); -extern void cpuset_cpus_allowed_locked(struct task_struct *p, cpumask_t *mask); +extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); +extern void cpuset_cpus_allowed_locked(struct task_struct *p, + struct cpumask *mask); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -86,12 +87,13 @@ static inline int cpuset_init_early(void) { return 0; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} -static inline void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask) +static inline void cpuset_cpus_allowed(struct task_struct *p, + struct cpumask *mask) { *mask = cpu_possible_map; } static inline void cpuset_cpus_allowed_locked(struct task_struct *p, - cpumask_t *mask) + struct cpumask *mask) { *mask = cpu_possible_map; } -- cgit v1.2.2 From f9fb860f67b9542cd78d1558dec7058092b57d8e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2009 18:08:46 -0800 Subject: pid: implement ns_of_pid A current problem with the pid namespace is that it is easy to do pid related work after exit_task_namespaces which drops the nsproxy pointer. However if we are doing pid namespace related work we are always operating on some struct pid which retains the pid_namespace pointer of the pid namespace it was allocated in. So provide ns_of_pid which allows us to find the pid namespace a pid was allocated in. Using this we have the needed infrastructure to do pid namespace related work at anytime we have a struct pid, removing the chance of accidentally having a NULL pointer dereference when accessing current->nsproxy. Signed-off-by: Eric W. Biederman Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: Bastian Blank Cc: Pavel Emelyanov Cc: Nadia Derbey Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index bb206c56d1f0..49f1c2f66e95 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -122,6 +122,24 @@ int next_pidmap(struct pid_namespace *pid_ns, int last); extern struct pid *alloc_pid(struct pid_namespace *ns); extern void free_pid(struct pid *pid); +/* + * ns_of_pid() returns the pid namespace in which the specified pid was + * allocated. + * + * NOTE: + * ns_of_pid() is expected to be called for a process (task) that has + * an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid + * is expected to be non-NULL. If @pid is NULL, caller should handle + * the resulting NULL pid-ns. + */ +static inline struct pid_namespace *ns_of_pid(struct pid *pid) +{ + struct pid_namespace *ns = NULL; + if (pid) + ns = pid->numbers[pid->level].ns; + return ns; +} + /* * the helpers to get the pid's id seen from different namespaces * -- cgit v1.2.2 From 61bce0f1371cfff497fe85594fd39d1a0b15ebe1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2009 18:08:49 -0800 Subject: pid: generalize task_active_pid_ns Currently task_active_pid_ns is not safe to call after a task becomes a zombie and exit_task_namespaces is called, as nsproxy becomes NULL. By reading the pid namespace from the pid of the task we can trivially solve this problem at the cost of one extra memory read in what should be the same cacheline as we read the namespace from. When moving things around I have made task_active_pid_ns out of line because keeping it in pid_namespace.h would require adding includes of pid.h and sched.h that I don't think we want. This change does make task_active_pid_ns unsafe to call during copy_process until we attach a pid on the task_struct which seems to be a reasonable trade off. Signed-off-by: Eric W. Biederman Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: Bastian Blank Cc: Pavel Emelyanov Cc: Nadia Derbey Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid_namespace.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index d82fe825d62f..38d10326246a 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -79,11 +79,7 @@ static inline void zap_pid_ns_processes(struct pid_namespace *ns) } #endif /* CONFIG_PID_NS */ -static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) -{ - return tsk->nsproxy->pid_ns; -} - +extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); void pidmap_init(void); -- cgit v1.2.2 From f06295b44c296c8fb08823a3118468ae343b60f2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Jan 2009 18:08:52 -0800 Subject: ELF: implement AT_RANDOM for glibc PRNG seeding While discussing[1] the need for glibc to have access to random bytes during program load, it seems that an earlier attempt to implement AT_RANDOM got stalled. This implements a random 16 byte string, available to every ELF program via a new auxv AT_RANDOM vector. [1] http://sourceware.org/ml/libc-alpha/2008-10/msg00006.html Ulrich said: glibc needs right after startup a bit of random data for internal protections (stack canary etc). What is now in upstream glibc is that we always unconditionally open /dev/urandom, read some data, and use it. For every process startup. That's slow. ... The solution is to provide a limited amount of random data to the starting process in the aux vector. I suggested 16 bytes and this is what the patch implements. If we need only 16 bytes or less we use the data directly. If we need more we'll use the 16 bytes to see a PRNG. This avoids the costly /dev/urandom use and it allows the kernel to use the most adequate source of random data for this purpose. It might not be the same pool as that for /dev/urandom. Concerns were expressed about the depletion of the randomness pool. But this patch doesn't make the situation worse, it doesn't deplete entropy more than happens now. Signed-off-by: Kees Cook Cc: Jakub Jelinek Cc: Andi Kleen Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auxvec.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h index d7afa9dd6635..f3b5d4e3a2ac 100644 --- a/include/linux/auxvec.h +++ b/include/linux/auxvec.h @@ -23,16 +23,16 @@ #define AT_PLATFORM 15 /* string identifying CPU for optimizations */ #define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ #define AT_CLKTCK 17 /* frequency at which times() increments */ - +/* AT_* values 18 through 22 are reserved */ #define AT_SECURE 23 /* secure mode boolean */ - #define AT_BASE_PLATFORM 24 /* string identifying real platform, may * differ from AT_PLATFORM. */ +#define AT_RANDOM 25 /* address of 16 random bytes */ #define AT_EXECFN 31 /* filename of program */ #ifdef __KERNEL__ -#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */ +#define AT_VECTOR_SIZE_BASE 19 /* NEW_AUX_ENT entries in auxiliary table */ /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ #endif -- cgit v1.2.2 From 91f68b7359144aa40bb9668124543d15284750b4 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 7 Jan 2009 18:09:12 -0800 Subject: generic swap(): introduce global macro swap(a, b) There have been some local definitions of swap(), it's time to replace them all with a uniform one. Signed-off-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 6b8e2027165e..343df9ef2412 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -476,6 +476,12 @@ static inline char *pack_hex_byte(char *buf, u8 byte) __val = __val < __min ? __min: __val; \ __val > __max ? __max: __val; }) + +/* + * swap - swap value of @a and @b + */ +#define swap(a, b) ({ typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; }) + /** * container_of - cast a member of a structure out to the containing structure * @ptr: the pointer to the member. -- cgit v1.2.2 From 859cb7f2a4244ea6da206d3fe9cc8a6810947a68 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Thu, 8 Jan 2009 17:55:03 +0000 Subject: leds: Add suspend/resume to the core class Add suspend/resume to the core class and remove all the now unneeded code from various drivers. Originally the class code couldn't support suspend/resume but since class_device can there is no reason for each driver doing its own suspend/resume anymore. --- include/linux/leds.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 3c1a8ce6a5ea..24489da701e3 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -32,7 +32,10 @@ struct led_classdev { int brightness; int flags; + /* Lower 16 bits reflect status */ #define LED_SUSPENDED (1 << 0) + /* Upper 16 bits reflect control information */ +#define LED_CORE_SUSPENDRESUME (1 << 16) /* Set LED brightness level */ /* Must not sleep, use a workqueue if needed */ -- cgit v1.2.2 From 69279fb9a95051971ac03e558c4d46e7ba84ab3a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 31 Dec 2008 12:52:41 +0000 Subject: regulator: Clean up kerneldoc warnings Remove kerneldoc warnings that don't relate to missing documentation, mostly by renaming parameters in the documentation to match their actual names. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/consumer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index afdc4558bb94..801bf77ff4e2 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -104,10 +104,10 @@ struct regulator; /** * struct regulator_bulk_data - Data used for bulk regulator operations. * - * @supply The name of the supply. Initialised by the user before - * using the bulk regulator APIs. - * @consumer The regulator consumer for the supply. This will be managed - * by the bulk API. + * @supply: The name of the supply. Initialised by the user before + * using the bulk regulator APIs. + * @consumer: The regulator consumer for the supply. This will be managed + * by the bulk API. * * The regulator APIs provide a series of regulator_bulk_() API calls as * a convenience to consumers which require multiple supplies. This -- cgit v1.2.2 From c8e7e4640facbe99d10a6e262523b25be129b9b9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 31 Dec 2008 12:52:42 +0000 Subject: regulator: Add missing kerneldoc This is only the documentation that the kerneldoc system warns about. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/driver.h | 40 +++++++++++++++++++++++++++++++++++++- include/linux/regulator/machine.h | 41 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index e37d80561985..84c3737c2d26 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -24,7 +24,37 @@ struct regulator_init_data; /** * struct regulator_ops - regulator operations. * - * This struct describes regulator operations. + * This struct describes regulator operations which can be implemented by + * regulator chip drivers. + * + * @enable: Enable the regulator. + * @disable: Disable the regulator. + * @is_enabled: Return 1 if the reguator is enabled, 0 otherwise. + * + * @set_voltage: Set the voltage for the regulator within the range specified. + * The driver should select the voltage closest to min_uV. + * @get_voltage: Return the currently configured voltage for the regulator. + * + * @set_current: Set the current for the regulator within the range specified. + * The driver should select the current closest to min_uA. + * @get_current: Return the currently configured current for the regulator. + * + * @set_current_limit: Configure a limit for a current-limited regulator. + * @get_current_limit: Get the limit for a current-limited regulator. + * + * @set_mode: Set the operating mode for the regulator. + * @get_mode: Get the current operating mode for the regulator. + * @get_optimum_mode: Get the most efficient operating mode for the regulator + * when running with the specified parameters. + * + * @set_suspend_voltage: Set the voltage for the regulator when the system + * is suspended. + * @set_suspend_enable: Mark the regulator as enabled when the system is + * suspended. + * @set_suspend_disable: Mark the regulator as disabled when the system is + * suspended. + * @set_suspend_mode: Set the operating mode for the regulator when the + * system is suspended. */ struct regulator_ops { @@ -75,6 +105,14 @@ enum regulator_type { /** * struct regulator_desc - Regulator descriptor * + * Each regulator registered with the core is described with a structure of + * this type. + * + * @name: Identifying name for the regulator. + * @id: Numerical identifier for the regulator. + * @ops: Regulator operations table. + * @type: Indicates if the regulator is a voltage or current regulator. + * @owner: Module providing the regulator, used for refcounting. */ struct regulator_desc { const char *name; diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index c6d69331a81e..3794773b23d2 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -44,6 +44,10 @@ struct regulator; * struct regulator_state - regulator state during low power syatem states * * This describes a regulators state during a system wide low power state. + * + * @uV: Operating voltage during suspend. + * @mode: Operating mode during suspend. + * @enabled: Enabled during suspend. */ struct regulator_state { int uV; /* suspend voltage */ @@ -55,6 +59,30 @@ struct regulator_state { * struct regulation_constraints - regulator operating constraints. * * This struct describes regulator and board/machine specific constraints. + * + * @name: Descriptive name for the constraints, used for display purposes. + * + * @min_uV: Smallest voltage consumers may set. + * @max_uV: Largest voltage consumers may set. + * + * @min_uA: Smallest consumers consumers may set. + * @max_uA: Largest current consumers may set. + * + * @valid_modes_mask: Mask of modes which may be configured by consumers. + * @valid_ops_mask: Operations which may be performed by consumers. + * + * @always_on: Set if the regulator should never be disabled. + * @boot_on: Set if the regulator is enabled when the system is initially + * started. + * @apply_uV: Apply the voltage constraint when initialising. + * + * @input_uV: Input voltage for regulator when supplied by another regulator. + * + * @state_disk: State for regulator when system is suspended in disk mode. + * @state_mem: State for regulator when system is suspended in mem mode. + * @state_standby: State for regulator when system is suspended in standby + * mode. + * @initial_state: Suspend state to set by default. */ struct regulation_constraints { @@ -93,6 +121,9 @@ struct regulation_constraints { * struct regulator_consumer_supply - supply -> device mapping * * This maps a supply name to a device. + * + * @dev: Device structure for the consumer. + * @supply: Name for the supply. */ struct regulator_consumer_supply { struct device *dev; /* consumer */ @@ -103,6 +134,16 @@ struct regulator_consumer_supply { * struct regulator_init_data - regulator platform initialisation data. * * Initialisation constraints, our supply and consumers supplies. + * + * @supply_regulator_dev: Parent regulator (if any). + * + * @constraints: Constraints. These must be specified for the regulator to + * be usable. + * @num_consumer_supplies: Number of consumer device supplies. + * @consumer_supplies: Consumer device supply configuration. + * + * @regulator_init: Callback invoked when the regulator has been registered. + * @driver_data: Data passed to regulator_init. */ struct regulator_init_data { struct device *supply_regulator_dev; /* or NULL for LINE */ -- cgit v1.2.2 From 0ba4887c6329043d6cee5b5b477cfe50c2b57674 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 8 Jan 2009 11:50:23 -0800 Subject: regulator: fix kernel-doc warnings Fix kernel-doc warnings in regulator/driver.h: Warning(linux-next-20090108//include/linux/regulator/driver.h:95): Excess struct/union/enum/typedef member 'set_current' description in 'regulator_ops' Warning(linux-next-20090108//include/linux/regulator/driver.h:95): Excess struct/union/enum/typedef member 'get_current' description in 'regulator_ops' Warning(linux-next-20090108//include/linux/regulator/driver.h:124): No description found for parameter 'irq' Signed-off-by: Randy Dunlap cc: Liam Girdwood cc: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/driver.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 84c3737c2d26..2dae05705f13 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -29,16 +29,12 @@ struct regulator_init_data; * * @enable: Enable the regulator. * @disable: Disable the regulator. - * @is_enabled: Return 1 if the reguator is enabled, 0 otherwise. + * @is_enabled: Return 1 if the regulator is enabled, 0 otherwise. * * @set_voltage: Set the voltage for the regulator within the range specified. * The driver should select the voltage closest to min_uV. * @get_voltage: Return the currently configured voltage for the regulator. * - * @set_current: Set the current for the regulator within the range specified. - * The driver should select the current closest to min_uA. - * @get_current: Return the currently configured current for the regulator. - * * @set_current_limit: Configure a limit for a current-limited regulator. * @get_current_limit: Get the limit for a current-limited regulator. * @@ -111,6 +107,7 @@ enum regulator_type { * @name: Identifying name for the regulator. * @id: Numerical identifier for the regulator. * @ops: Regulator operations table. + * @irq: Interrupt number for the regulator. * @type: Indicates if the regulator is a voltage or current regulator. * @owner: Module providing the regulator, used for refcounting. */ -- cgit v1.2.2 From f4f6bda00fc6bf995a35d8246db45aacaa9b3f09 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 3 Dec 2008 08:48:52 +0000 Subject: backlight: add support for Toppoly TDO35S series to tdo24m lcd driver Signed-off-by: Mike Rapoport Acked-by: Eric Miao Signed-off-by: Richard Purdie --- include/linux/spi/tdo24m.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/spi/tdo24m.h (limited to 'include/linux') diff --git a/include/linux/spi/tdo24m.h b/include/linux/spi/tdo24m.h new file mode 100644 index 000000000000..7572d4e1fe76 --- /dev/null +++ b/include/linux/spi/tdo24m.h @@ -0,0 +1,13 @@ +#ifndef __TDO24M_H__ +#define __TDO24M_H__ + +enum tdo24m_model { + TDO24M, + TDO35S, +}; + +struct tdo24m_platform_data { + enum tdo24m_model model; +}; + +#endif /* __TDO24M_H__ */ -- cgit v1.2.2 From 0c3573f19d135d718264e38c46597295bd6154b7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:05 +1100 Subject: md: use sysfs_notify_dirent to notify changes to md/sync_action. There is no compelling need for this, but sysfs_notify_dirent is a nicer interface and the change is good for consistency. Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 8fc909ef6787..663803eaf0de 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -244,6 +244,7 @@ struct mddev_s struct sysfs_dirent *sysfs_state; /* handle for 'array_state' * file in sysfs. */ + struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */ spinlock_t write_lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ -- cgit v1.2.2 From 019c4e2f3e02aac4b44003913b54ca4b332e4371 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:06 +1100 Subject: md: raid0: Represent device offset in sectors. Rename zone->dev_offset to zone->dev_start to make sure all users have been converted. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index 1b2dda035f8e..61c3d29dc158 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -6,7 +6,7 @@ struct strip_zone { sector_t zone_offset; /* Zone offset in md_dev */ - sector_t dev_offset; /* Zone offset in real dev */ + sector_t dev_start; /* Zone offset in real dev (in sectors) */ sector_t size; /* Zone size */ int nb_dev; /* # of devices attached to the zone */ mdk_rdev_t **dev; /* Devices attached to the zone */ -- cgit v1.2.2 From 6199d3db0fc34f8ada895879d04a353a6ae632bc Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:07 +1100 Subject: md: raid0: Represent zone->zone_offset in sectors. For the same reason as in the previous patch, rename it from zone_offset to zone_start. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index 61c3d29dc158..eaf4f6ac55f6 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -5,7 +5,7 @@ struct strip_zone { - sector_t zone_offset; /* Zone offset in md_dev */ + sector_t zone_start; /* Zone offset in md_dev (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ sector_t size; /* Zone size */ int nb_dev; /* # of devices attached to the zone */ -- cgit v1.2.2 From 83838ed87898e0a8ff8dbf001e54e6c017f0a011 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:07 +1100 Subject: md: raid0: Represent the size of strip zones in sectors. This completes the block -> sector conversion of struct strip_zone. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index eaf4f6ac55f6..c12521d027e2 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -7,7 +7,7 @@ struct strip_zone { sector_t zone_start; /* Zone offset in md_dev (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ - sector_t size; /* Zone size */ + sector_t sectors; /* Zone size in sectors */ int nb_dev; /* # of devices attached to the zone */ mdk_rdev_t **dev; /* Devices attached to the zone */ }; -- cgit v1.2.2 From ccacc7d2cf03114a24ab903f710118e9e5d43273 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: raid0: make hash_spacing and preshift sector-based. This patch renames the hash_spacing and preshift members of struct raid0_private_data to spacing and sector_shift respectively and changes the semantics as follows: We always have spacing = 2 * hash_spacing. In case sizeof(sector_t) > sizeof(u32) we also have sector_shift = preshift + 1 while sector_shift = preshift = 0 otherwise. Note that the values of nb_zone and zone are unaffected by these changes because in the sector_div() preceeding the assignement of these two variables both arguments double. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index c12521d027e2..fd42aa87c391 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -19,8 +19,8 @@ struct raid0_private_data mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ int nr_strip_zones; - sector_t hash_spacing; - int preshift; /* shift this before divide by hash_spacing */ + sector_t spacing; + int sector_shift; /* shift this before divide by spacing */ }; typedef struct raid0_private_data raid0_conf_t; -- cgit v1.2.2 From 159ec1fc060ab22b157a62364045f5e98749c4d3 Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: use list_for_each_entry macro directly The rdev_for_each macro defined in is identical to list_for_each_entry_safe, from , it should be defined to use list_for_each_entry_safe, instead of reinventing the wheel. But some calls to each_entry_safe don't really need a safe version, just a direct list_for_each_entry is enough, this could save a temp variable (tmp) in every function that used rdev_for_each. In this patch, most rdev_for_each loops are replaced by list_for_each_entry, totally save many tmp vars; and only in the other situations that will call list_del to delete an entry, the safe version is used. Signed-off-by: Cheng Renquan Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 663803eaf0de..8f9a54c1fb0e 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -335,17 +335,14 @@ static inline char * mdname (mddev_t * mddev) * iterates through some rdev ringlist. It's safe to remove the * current 'rdev'. Dont touch 'tmp' though. */ -#define rdev_for_each_list(rdev, tmp, list) \ - \ - for ((tmp) = (list).next; \ - (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \ - (tmp) = (tmp)->next, (tmp)->prev != &(list) \ - ; ) +#define rdev_for_each_list(rdev, tmp, head) \ + list_for_each_entry_safe(rdev, tmp, head, same_set) + /* * iterates through the 'same array disks' ringlist */ #define rdev_for_each(rdev, tmp, mddev) \ - rdev_for_each_list(rdev, tmp, (mddev)->disks) + list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) #define rdev_for_each_rcu(rdev, mddev) \ list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) -- cgit v1.2.2 From cd2ac9321c26dc7a76455cd2a4df89123fa2b73e Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: need another print_sb for mdp_superblock_1 md_print_devices is called in two code path: MD_BUG(...), and md_ioctl with PRINT_RAID_DEBUG. it will dump out all in use md devices information; However, it wrongly processed two types of superblock in one: The header file has defined two types of superblock, struct mdp_superblock_s (typedefed with mdp_super_t) according to md with metadata 0.90, and struct mdp_superblock_1 according to md with metadata 1.0 and later, These two types of superblock are very different, The md_print_devices code processed them both in mdp_super_t, that would lead to wrong informaton dump like: [ 6742.345877] [ 6742.345887] md: ********************************** [ 6742.345890] md: * * [ 6742.345892] md: ********************************** [ 6742.345896] md1: [ 6742.345907] md: rdev ram7, SZ:00065472 F:0 S:1 DN:3 [ 6742.345909] md: rdev superblock: [ 6742.345914] md: SB: (V:0.90.0) ID:<42ef13c7.598c059a.5f9f1645.801e9ee6> CT:4919856d [ 6742.345918] md: L5 S00065472 ND:4 RD:4 md1 LO:2 CS:65536 [ 6742.345922] md: UT:4919856d ST:1 AD:4 WD:4 FD:0 SD:0 CSUM:b7992907 E:00000001 [ 6742.345924] D 0: DISK [ 6742.345930] D 1: DISK [ 6742.345933] D 2: DISK [ 6742.345937] D 3: DISK [ 6742.345942] md: THIS: DISK ... [ 6742.346058] md0: [ 6742.346067] md: rdev ram3, SZ:00065472 F:0 S:1 DN:3 [ 6742.346070] md: rdev superblock: [ 6742.346073] md: SB: (V:1.0.0) ID:<369aad81.00000000.00000000.00000000> CT:9a322a9c [ 6742.346077] md: L-1507699579 S976570180 ND:48 RD:0 md0 LO:65536 CS:196610 [ 6742.346081] md: UT:00000018 ST:0 AD:131048 WD:0 FD:8 SD:0 CSUM:00000000 E:00000000 [ 6742.346084] D 0: DISK [ 6742.346089] D 1: DISK [ 6742.346092] D 2: DISK [ 6742.346096] D 3: DISK [ 6742.346102] md: THIS: DISK ... [ 6742.346219] md: ********************************** [ 6742.346221] Here md1 is metadata 0.90.0, and md0 is metadata 1.2 After some more code to distinguish these two types of superblock, in this patch, it will generate dump information like: [ 7906.755790] [ 7906.755799] md: ********************************** [ 7906.755802] md: * * [ 7906.755804] md: ********************************** [ 7906.755808] md1: [ 7906.755819] md: rdev ram7, SZ:00065472 F:0 S:1 DN:3 [ 7906.755821] md: rdev superblock (MJ:0): [ 7906.755826] md: SB: (V:0.90.0) ID:<3fca7a0d.a612bfed.5f9f1645.801e9ee6> CT:491989f3 [ 7906.755830] md: L5 S00065472 ND:4 RD:4 md1 LO:2 CS:65536 [ 7906.755834] md: UT:491989f3 ST:1 AD:4 WD:4 FD:0 SD:0 CSUM:00fb52ad E:00000001 [ 7906.755836] D 0: DISK [ 7906.755842] D 1: DISK [ 7906.755845] D 2: DISK [ 7906.755849] D 3: DISK [ 7906.755855] md: THIS: DISK ... [ 7906.755972] md0: [ 7906.755981] md: rdev ram3, SZ:00065472 F:0 S:1 DN:3 [ 7906.755984] md: rdev superblock (MJ:1): [ 7906.755989] md: SB: (V:1) (F:0) Array-ID:<5fbcf158:55aa:5fbe:9a79:1e939880dcbd> [ 7906.755990] md: Name: "DG5:0" CT:1226410480 [ 7906.755998] md: L5 SZ130944 RD:4 LO:2 CS:128 DO:24 DS:131048 SO:8 RO:0 [ 7906.755999] md: Dev:00000003 UUID: 9194d744:87f7:a448:85f2:7497b84ce30a [ 7906.756001] md: (F:0) UT:1226410480 Events:0 ResyncOffset:-1 CSUM:0dbcd829 [ 7906.756003] md: (MaxDev:384) ... [ 7906.756113] md: ********************************** [ 7906.756116] this md0 (metadata 1.2) information dumping is exactly according to struct mdp_superblock_1. Signed-off-by: Cheng Renquan Cc: Neil Brown Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: NeilBrown --- include/linux/raid/md_p.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 8b4de4a41ff1..9491026afe66 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -194,6 +194,8 @@ static inline __u64 md_event(mdp_super_t *sb) { return (ev<<32)| sb->events_lo; } +#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1) + /* * The version-1 superblock : * All numeric fields are little-endian. -- cgit v1.2.2 From d3374825ce57ba2214d375023979f6197ccc1385 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:10 +1100 Subject: md: make devices disappear when they are no longer needed. Currently md devices, once created, never disappear until the module is unloaded. This is essentially because the gendisk holds a reference to the mddev, and the mddev holds a reference to the gendisk, this a circular reference. If we drop the reference from mddev to gendisk, then we need to ensure that the mddev is destroyed when the gendisk is destroyed. However it is not possible to hook into the gendisk destruction process to enable this. So we drop the reference from the gendisk to the mddev and destroy the gendisk when the mddev gets destroyed. However this has a complication. Between the call __blkdev_get->get_gendisk->kobj_lookup->md_probe and the call __blkdev_get->md_open there is no obvious way to hold a reference on the mddev any more, so unless something is done, it will disappear and gendisk will be destroyed prematurely. Also, once we decide to destroy the mddev, there will be an unlockable moment before the gendisk is unlinked (blk_unregister_region) during which a new reference to the gendisk can be created. We need to ensure that this reference can not be used. i.e. the ->open must fail. So: 1/ in md_probe we set a flag in the mddev (hold_active) which indicates that the array should be treated as active, even though there are no references, and no appearance of activity. This is cleared by md_release when the device is closed if it is no longer needed. This ensures that the gendisk will survive between md_probe and md_open. 2/ In md_open we check if the mddev we expect to open matches the gendisk that we did open. If there is a mismatch we return -ERESTARTSYS and modify __blkdev_get to retry from the top in that case. In the -ERESTARTSYS sys case we make sure to wait until the old gendisk (that we succeeded in opening) is really gone so we loop at most once. Some udev configurations will always open an md device when it first appears. If we allow an md device that was just created by an open to disappear on an immediate close, then this can race with such udev configurations and result in an infinite loop the device being opened and closed, then re-open due to the 'ADD' even from the first open, and then close and so on. So we make sure an md device, once created by an open, remains active at least until some md 'ioctl' has been made on it. This means that all normal usage of md devices will allow them to disappear promptly when not needed, but the worst that an incorrect usage will do it cause an inactive md device to be left in existence (it can easily be removed). As an array can be stopped by writing to a sysfs attribute echo clear > /sys/block/mdXXX/md/array_state we need to use scheduled work for deleting the gendisk and other kobjects. This allows us to wait for any pending gendisk deletion to complete by simply calling flush_scheduled_work(). Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 8f9a54c1fb0e..e3d17c7f954e 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -137,6 +137,8 @@ struct mddev_s struct gendisk *gendisk; struct kobject kobj; + int hold_active; +#define UNTIL_IOCTL 1 /* Superblock information */ int major_version, @@ -246,6 +248,8 @@ struct mddev_s */ struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */ + struct work_struct del_work; /* used for delayed sysfs removal */ + spinlock_t write_lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ atomic_t pending_writes; /* number of active superblock writes */ -- cgit v1.2.2 From efeb53c0e57213e843b7ef3cc6ebcdea7d6186ac Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:10 +1100 Subject: md: Allow md devices to be created by name. Using sequential numbers to identify md devices is somewhat artificial. Using names can be a lot more user-friendly. Also, creating md devices by opening the device special file is a bit awkward. So this patch provides a new option for creating and naming devices. Writing a name such as "md_home" to /sys/modules/md_mod/parameters/new_array will cause an array with that name to be created. It will appear in /sys/block/ /proc/partitions and /proc/mdstat as 'md_home'. It will have an arbitrary minor number allocated. md devices that a created by an open are destroyed on the last close when the device is inactive. For named md devices, they will not be destroyed until the array is explicitly stopped, either with the STOP_ARRAY ioctl or by writing 'clear' to /sys/block/md_XXXX/md/array_state. The name of the array must start 'md_' to avoid conflict with other devices. Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index e3d17c7f954e..dac4217194b8 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -139,6 +139,7 @@ struct mddev_s struct kobject kobj; int hold_active; #define UNTIL_IOCTL 1 +#define UNTIL_STOP 2 /* Superblock information */ int major_version, -- cgit v1.2.2 From 4044ba58dd15cb01797c4fd034f39ef4a75f7cc3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:11 +1100 Subject: md: don't retry recovery of raid1 that fails due to error on source drive. If a raid1 has only one working drive and it has a sector which gives an error on read, then an attempt to recover onto a spare will fail, but as the single remaining drive is not removed from the array, the recovery will be immediately re-attempted, resulting in an infinite recovery loop. So detect this situation and don't retry recovery once an error on the lone remaining drive is detected. Allow recovery to be retried once every time a spare is added in case the problem wasn't actually a media error. Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index dac4217194b8..9743e4dbc918 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -218,6 +218,9 @@ struct mddev_s #define MD_RECOVERY_FROZEN 9 unsigned long recovery; + int recovery_disabled; /* if we detect that recovery + * will always fail, set this + * so we don't loop trying */ int in_sync; /* know to not need resync */ struct mutex reconfig_mutex; -- cgit v1.2.2 From 871af1210f13966ab911ed2166e4ab2ce775b99d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 5 Jan 2009 14:16:39 +0000 Subject: libata: Add 32bit PIO support This matters for some controllers and in one or two cases almost doubles PIO performance. Add a bmdma32 operations set we can inherit and activate it for some controllers Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- include/linux/libata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 3449de597eff..4f7c8fb4d3fe 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1518,6 +1518,7 @@ extern void sata_pmp_error_handler(struct ata_port *ap); extern const struct ata_port_operations ata_sff_port_ops; extern const struct ata_port_operations ata_bmdma_port_ops; +extern const struct ata_port_operations ata_bmdma32_port_ops; /* PIO only, sg_tablesize and dma_boundary limits can be removed */ #define ATA_PIO_SHT(drv_name) \ @@ -1545,6 +1546,8 @@ extern void ata_sff_exec_command(struct ata_port *ap, const struct ata_taskfile *tf); extern unsigned int ata_sff_data_xfer(struct ata_device *dev, unsigned char *buf, unsigned int buflen, int rw); +extern unsigned int ata_sff_data_xfer32(struct ata_device *dev, + unsigned char *buf, unsigned int buflen, int rw); extern unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, unsigned int buflen, int rw); extern u8 ata_sff_irq_on(struct ata_port *ap); -- cgit v1.2.2 From fefae48bf8caab7d56ee4f8181f06602cf73d29e Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Thu, 8 Jan 2009 19:21:27 +0100 Subject: [MTD] CFI: remove major/minor version check for command set 0x0002 The NOR Flash memory K8P2815UQB from Samsung uses the major version number '0'. Add a quirk to cope with it. Signed-off-by: Wolfgang Grandegger Signed-off-by: David Woodhouse --- include/linux/mtd/cfi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 00e2b575021f..88d3d8fbf9f2 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -520,6 +520,7 @@ struct cfi_fixup { #define CFI_MFR_AMD 0x0001 #define CFI_MFR_ATMEL 0x001F +#define CFI_MFR_SAMSUNG 0x00EC #define CFI_MFR_ST 0x0020 /* STMicroelectronics */ void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups); -- cgit v1.2.2 From 8dd2f36f317569665e454268a2677cfba3e848f1 Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Sat, 2 Aug 2008 22:51:52 +0200 Subject: mISDN: Add feature via MISDN_CTRL_FILL_EMPTY to fill fifo if empty This prevents underrun of fifo when filled and in case of an underrun it prevents subsequent underruns due to jitter. Improve dsp, so buffers are kept filled with a certain delay, so moderate jitter will not cause underrun all the time -> the audio quality is highly improved. tones are not interrupted by gaps anymore, except when CPU is stalling or in high load. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil --- include/linux/mISDNhw.h | 25 +++++++++++++------------ include/linux/mISDNif.h | 1 + 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h index e794dfb87504..9384b92dfc65 100644 --- a/include/linux/mISDNhw.h +++ b/include/linux/mISDNhw.h @@ -57,20 +57,21 @@ #define FLG_L2DATA 14 /* channel use L2 DATA primitivs */ #define FLG_ORIGIN 15 /* channel is on origin site */ /* channel specific stuff */ +#define FLG_FILLEMPTY 16 /* fill fifo on first frame (empty) */ /* arcofi specific */ -#define FLG_ARCOFI_TIMER 16 -#define FLG_ARCOFI_ERROR 17 +#define FLG_ARCOFI_TIMER 17 +#define FLG_ARCOFI_ERROR 18 /* isar specific */ -#define FLG_INITIALIZED 16 -#define FLG_DLEETX 17 -#define FLG_LASTDLE 18 -#define FLG_FIRST 19 -#define FLG_LASTDATA 20 -#define FLG_NMD_DATA 21 -#define FLG_FTI_RUN 22 -#define FLG_LL_OK 23 -#define FLG_LL_CONN 24 -#define FLG_DTMFSEND 25 +#define FLG_INITIALIZED 17 +#define FLG_DLEETX 18 +#define FLG_LASTDLE 19 +#define FLG_FIRST 20 +#define FLG_LASTDATA 21 +#define FLG_NMD_DATA 22 +#define FLG_FTI_RUN 23 +#define FLG_LL_OK 24 +#define FLG_LL_CONN 25 +#define FLG_DTMFSEND 26 /* workq events */ #define FLG_RECVQUEUE 30 diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 8f2d60da04e7..74c903cd7a0a 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -312,6 +312,7 @@ clear_channelmap(u_int nr, u_char *map) #define MISDN_CTRL_SETPEER 0x0040 #define MISDN_CTRL_UNSETPEER 0x0080 #define MISDN_CTRL_RX_OFF 0x0100 +#define MISDN_CTRL_FILL_EMPTY 0x0200 #define MISDN_CTRL_HW_FEATURES_OP 0x2000 #define MISDN_CTRL_HW_FEATURES 0x2001 #define MISDN_CTRL_HFC_OP 0x4000 -- cgit v1.2.2 From 8b6015f736125050722dbe59c4f943e78cd626f0 Mon Sep 17 00:00:00 2001 From: Matthias Urlichs Date: Tue, 12 Aug 2008 10:12:09 +0200 Subject: mISDN: Added an ioctl to change the device name To get persistent device names with hotplug we need to rename devices sometime. Signed-off-by: Matthias Urlichs Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 74c903cd7a0a..be09476ed854 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -36,8 +36,8 @@ * - should be incremented on every checkin */ #define MISDN_MAJOR_VERSION 1 -#define MISDN_MINOR_VERSION 0 -#define MISDN_RELEASE 19 +#define MISDN_MINOR_VERSION 1 +#define MISDN_RELEASE 20 /* primitives for information exchange * generell format @@ -255,16 +255,6 @@ struct sockaddr_mISDN { unsigned char tei; }; -/* timer device ioctl */ -#define IMADDTIMER _IOR('I', 64, int) -#define IMDELTIMER _IOR('I', 65, int) -/* socket ioctls */ -#define IMGETVERSION _IOR('I', 66, int) -#define IMGETCOUNT _IOR('I', 67, int) -#define IMGETDEVINFO _IOR('I', 68, int) -#define IMCTRLREQ _IOR('I', 69, int) -#define IMCLEAR_L2 _IOR('I', 70, int) - struct mISDNversion { unsigned char major; unsigned char minor; @@ -281,6 +271,23 @@ struct mISDN_devinfo { char name[MISDN_MAX_IDLEN]; }; +struct mISDN_devrename { + u_int id; + char name[MISDN_MAX_IDLEN]; /* new name */ +}; + +/* timer device ioctl */ +#define IMADDTIMER _IOR('I', 64, int) +#define IMDELTIMER _IOR('I', 65, int) + +/* socket ioctls */ +#define IMGETVERSION _IOR('I', 66, int) +#define IMGETCOUNT _IOR('I', 67, int) +#define IMGETDEVINFO _IOR('I', 68, int) +#define IMCTRLREQ _IOR('I', 69, int) +#define IMCLEAR_L2 _IOR('I', 70, int) +#define IMSETDEVNAME _IOR('I', 71, struct mISDN_devrename) + static inline int test_channelmap(u_int nr, u_char *map) { -- cgit v1.2.2 From 837468d135dcc49cdabc9fa92fc9550479f60704 Mon Sep 17 00:00:00 2001 From: Matthias Urlichs Date: Sat, 16 Aug 2008 00:04:33 +0200 Subject: mISDN: Use struct device name field struct device already has a 'name' member, use it. Signed-off-by: Matthias Urlichs Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index be09476ed854..a59febb6143a 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -431,7 +431,6 @@ struct mISDN_sock { struct mISDNdevice { struct mISDNchannel D; u_int id; - char name[MISDN_MAX_IDLEN]; u_int Dprotocols; u_int Bprotocols; u_int nrbchan; -- cgit v1.2.2 From 1f28fa19d34c0d9186f274e61e4b3dcfc6428c5c Mon Sep 17 00:00:00 2001 From: Martin Bachem Date: Wed, 3 Sep 2008 15:17:45 +0200 Subject: mISDN: Add E-Channel logging features New prim PH_DATA_E_IND. - all E-ch frames are indicated by recv_Echannel(), which pushes E-Channel frames into dch's rqueue - if dchannel is opened with channel nr 0, no E-Channel logging is requested - if dchannel is opened with channel nr 1, E-Channel logging is requested. if layer1 does not support that, -EINVAL in return is appropriate Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil --- include/linux/mISDNhw.h | 1 + include/linux/mISDNif.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h index 9384b92dfc65..97ffdc1d3442 100644 --- a/include/linux/mISDNhw.h +++ b/include/linux/mISDNhw.h @@ -184,6 +184,7 @@ extern void queue_ch_frame(struct mISDNchannel *, u_int, extern int dchannel_senddata(struct dchannel *, struct sk_buff *); extern int bchannel_senddata(struct bchannel *, struct sk_buff *); extern void recv_Dchannel(struct dchannel *); +extern void recv_Echannel(struct dchannel *, struct dchannel *); extern void recv_Bchannel(struct bchannel *); extern void recv_Dchannel_skb(struct dchannel *, struct sk_buff *); extern void recv_Bchannel_skb(struct bchannel *, struct sk_buff *); diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index a59febb6143a..f75d596c5316 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -80,6 +80,7 @@ #define PH_DEACTIVATE_IND 0x0202 #define PH_DEACTIVATE_CNF 0x4202 #define PH_DATA_IND 0x2002 +#define PH_DATA_E_IND 0x3002 #define MPH_ACTIVATE_IND 0x0502 #define MPH_DEACTIVATE_IND 0x0602 #define MPH_INFORMATION_IND 0x0702 -- cgit v1.2.2 From 3bd69ad197a4a3d0085a5dc3b5796111bf176b12 Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Sat, 6 Sep 2008 09:03:46 +0200 Subject: mISDN: Add ISDN sample clock API to mISDN core Add ISDN sample clock API to mISDN core (new file clock.c) hfcmulti and mISDNdsp use clock API. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index f75d596c5316..364f1018f0d1 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -371,6 +371,7 @@ struct mISDN_ctrl_req { #define DEBUG_L2_TEI 0x00100000 #define DEBUG_L2_TEIFSM 0x00200000 #define DEBUG_TIMER 0x01000000 +#define DEBUG_CLOCK 0x02000000 #define mISDN_HEAD_P(s) ((struct mISDNhead *)&s->cb[0]) #define mISDN_HEAD_PRIM(s) (((struct mISDNhead *)&s->cb[0])->prim) @@ -384,6 +385,7 @@ struct mISDN_ctrl_req { struct mISDNchannel; struct mISDNdevice; struct mISDNstack; +struct mISDNclock; struct channel_req { u_int protocol; @@ -460,6 +462,16 @@ struct mISDNstack { #endif }; +typedef int (clockctl_func_t)(void *, int); + +struct mISDNclock { + struct list_head list; + char name[64]; + int pri; + clockctl_func_t *ctl; + void *priv; +}; + /* global alloc/queue functions */ static inline struct sk_buff * @@ -510,8 +522,13 @@ extern int mISDN_register_device(struct mISDNdevice *, char *name); extern void mISDN_unregister_device(struct mISDNdevice *); extern int mISDN_register_Bprotocol(struct Bprotocol *); extern void mISDN_unregister_Bprotocol(struct Bprotocol *); +extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *, + void *); +extern void mISDN_unregister_clock(struct mISDNclock *); extern void set_channel_address(struct mISDNchannel *, u_int, u_int); +extern void mISDN_clock_update(struct mISDNclock *, int, struct timeval *); +extern unsigned short mISDN_clock_get(void); #endif /* __KERNEL__ */ #endif /* mISDNIF_H */ -- cgit v1.2.2 From 02282eee56b75a35e6bbc42cc34c9005eb1653f4 Mon Sep 17 00:00:00 2001 From: Martin Bachem Date: Mon, 8 Sep 2008 15:57:48 +0200 Subject: mISDN: Add ISDN_P_TE_UP0 / ISDN_P_NT_UP0 - new layer1 protocols for UP0 bus - helper #defines to test for TE/NT/S0/E1/UP0 Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 364f1018f0d1..7f65aa0c1cc5 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -200,6 +200,18 @@ #define ISDN_P_NT_S0 0x02 #define ISDN_P_TE_E1 0x03 #define ISDN_P_NT_E1 0x04 +#define ISDN_P_TE_UP0 0x05 +#define ISDN_P_NT_UP0 0x06 + +#define IS_ISDN_P_TE(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_TE_E1) || \ + (p == ISDN_P_TE_UP0)) +#define IS_ISDN_P_NT(p) ((p == ISDN_P_NT_S0) || (p == ISDN_P_NT_E1) || \ + (p == ISDN_P_NT_UP0)) +#define IS_ISDN_P_S0(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_NT_S0)) +#define IS_ISDN_P_E1(p) ((p == ISDN_P_TE_E1) || (p == ISDN_P_NT_E1)) +#define IS_ISDN_P_UP0(p) ((p == ISDN_P_TE_UP0) || (p == ISDN_P_NT_UP0)) + + #define ISDN_P_LAPD_TE 0x10 #define ISDN_P_LAPD_NT 0x11 -- cgit v1.2.2 From 1b4d33121f1d991f6ae226cc3333428ff87627bb Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Sun, 14 Sep 2008 12:30:18 +0200 Subject: mISDN: Fix deactivation, if peer IP is removed from l1oip instance. Added GETPEER operation. Socket now checks if device is already busy at a differen mode. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 7f65aa0c1cc5..3f9988849f32 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -204,9 +204,9 @@ #define ISDN_P_NT_UP0 0x06 #define IS_ISDN_P_TE(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_TE_E1) || \ - (p == ISDN_P_TE_UP0)) + (p == ISDN_P_TE_UP0) || (p == ISDN_P_LAPD_TE)) #define IS_ISDN_P_NT(p) ((p == ISDN_P_NT_S0) || (p == ISDN_P_NT_E1) || \ - (p == ISDN_P_NT_UP0)) + (p == ISDN_P_NT_UP0) || (p == ISDN_P_LAPD_NT)) #define IS_ISDN_P_S0(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_NT_S0)) #define IS_ISDN_P_E1(p) ((p == ISDN_P_TE_E1) || (p == ISDN_P_NT_E1)) #define IS_ISDN_P_UP0(p) ((p == ISDN_P_TE_UP0) || (p == ISDN_P_NT_UP0)) @@ -333,6 +333,7 @@ clear_channelmap(u_int nr, u_char *map) #define MISDN_CTRL_UNSETPEER 0x0080 #define MISDN_CTRL_RX_OFF 0x0100 #define MISDN_CTRL_FILL_EMPTY 0x0200 +#define MISDN_CTRL_GETPEER 0x0400 #define MISDN_CTRL_HW_FEATURES_OP 0x2000 #define MISDN_CTRL_HW_FEATURES 0x2001 #define MISDN_CTRL_HFC_OP 0x4000 -- cgit v1.2.2 From b36b654a7e82308cea063cdf909a7f246105c2a3 Mon Sep 17 00:00:00 2001 From: Matthias Urlichs Date: Sat, 16 Aug 2008 00:09:24 +0200 Subject: mISDN: Create /sys/class/mISDN Create /sys/class/mISDN and implement functions to handle device renames. Signed-Off-By: Matthias Urlichs Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 3f9988849f32..d4229aebf648 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -531,7 +531,8 @@ _queue_data(struct mISDNchannel *ch, u_int prim, /* global register/unregister functions */ -extern int mISDN_register_device(struct mISDNdevice *, char *name); +extern int mISDN_register_device(struct mISDNdevice *, + struct device *parent, char *name); extern void mISDN_unregister_device(struct mISDNdevice *); extern int mISDN_register_Bprotocol(struct Bprotocol *); extern void mISDN_unregister_Bprotocol(struct Bprotocol *); @@ -539,6 +540,11 @@ extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *, void *); extern void mISDN_unregister_clock(struct mISDNclock *); +static inline struct mISDNdevice *dev_to_mISDN(struct device *dev) +{ + return dev_get_drvdata(dev); +} + extern void set_channel_address(struct mISDNchannel *, u_int, u_int); extern void mISDN_clock_update(struct mISDNclock *, int, struct timeval *); extern unsigned short mISDN_clock_get(void); -- cgit v1.2.2 From 3f75e84a6a697c5cffb78ee15e79498a35473e05 Mon Sep 17 00:00:00 2001 From: Martin Bachem Date: Tue, 4 Nov 2008 14:11:22 +0100 Subject: mISDN: Add layer1 prim MPH_INFORMATION_REQ MPH_INFORMATION provides full D- and B-Channel status overview - new layer1 primitive: MPF_INFORMATON_REQ - layer1 replies with MPH_INFORMATION_IND containing - dch->[state,Flags,nrbchan] - bch[]->[protocol,Flags] - hardware driver should send MPH_INFORMATION_IND on all ph state changes and BChannel state changes to MISDN_ID_ANY Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index d4229aebf648..557477ac3d5b 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -289,6 +289,23 @@ struct mISDN_devrename { char name[MISDN_MAX_IDLEN]; /* new name */ }; +/* MPH_INFORMATION_REQ payload */ +struct ph_info_ch { + __u32 protocol; + __u64 Flags; +}; + +struct ph_info_dch { + struct ph_info_ch ch; + __u16 state; + __u16 num_bch; +}; + +struct ph_info { + struct ph_info_dch dch; + struct ph_info_ch bch[]; +}; + /* timer device ioctl */ #define IMADDTIMER _IOR('I', 64, int) #define IMDELTIMER _IOR('I', 65, int) -- cgit v1.2.2 From 4db8e282f2d1dfa43d51ce2a4817901312c9134d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 14:32:46 -0800 Subject: Revert "driver core: move knode_bus into private structure" This reverts commit b9daa99ee533578e3f88231e7a16784dcb44ec42. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 7d9da4b4993f..8987f4776064 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,6 +366,7 @@ struct device_dma_parameters { }; struct device { + struct klist_node knode_bus; struct device *parent; struct device_private *p; -- cgit v1.2.2 From cda5e83fdea476dce9c0a9b1152cd6ca46832cc4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 14:44:18 -0800 Subject: Revert "driver core: move knode_driver into private structure" This reverts commit 93e746db183b3bdbbda67900f79b5835f9cb388f. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 8987f4776064..c66ceb15acd8 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,6 +366,7 @@ struct device_dma_parameters { }; struct device { + struct klist_node knode_driver; struct klist_node knode_bus; struct device *parent; -- cgit v1.2.2 From e2d4077678c7ec7661003c268120582adc544897 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 14:55:37 -0800 Subject: Revert "driver core: move klist_children into private structure" This reverts commit 11c3b5c3e08f4d855cbef52883c266b9ab9df879. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index c66ceb15acd8..2975351635d3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,6 +366,8 @@ struct device_dma_parameters { }; struct device { + struct klist klist_children; + struct klist_node knode_parent; /* node in sibling list */ struct klist_node knode_driver; struct klist_node knode_bus; struct device *parent; -- cgit v1.2.2 From 926beadb3dfaddccb3348a5b9e6c2a1f8290a220 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 15:06:12 -0800 Subject: Revert "driver core: create a private portion of struct device" This reverts commit 2831fe6f9cc4e16c103504ee09a47a084297c0f3. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 2975351635d3..45e5b1921fbb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -28,7 +28,6 @@ #define BUS_ID_SIZE 20 struct device; -struct device_private; struct device_driver; struct driver_private; struct class; @@ -372,8 +371,6 @@ struct device { struct klist_node knode_bus; struct device *parent; - struct device_private *p; - struct kobject kobj; char bus_id[BUS_ID_SIZE]; /* position on parent bus */ unsigned uevent_suppress:1; -- cgit v1.2.2 From 85c210edc46d602a1562aeea0fc74919349c8cf0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 9 Jan 2009 16:40:53 -0800 Subject: compiler-gcc.h: add more comments to RELOC_HIDE Requested by C. Lameter Signed-off-by: Andi Kleen Cc: Christoph Lameter Cc: Andi Kleen Cc: Rusty Russell Cc: Stephen Rothwell Cc: Mike Travis Cc: Ingo Molnar Cc: Richard Henderson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index af40f8eb86f0..1514d534deeb 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -11,9 +11,19 @@ /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") -/* This macro obfuscates arithmetic on a variable address so that gcc - shouldn't recognize the original var, and make assumptions about it */ /* + * This macro obfuscates arithmetic on a variable address so that gcc + * shouldn't recognize the original var, and make assumptions about it. + * + * This is needed because the C standard makes it undefined to do + * pointer arithmetic on "objects" outside their boundaries and the + * gcc optimizers assume this is the case. In particular they + * assume such arithmetic does not wrap. + * + * A miscompilation has been observed because of this on PPC. + * To work around it we hide the relationship of the pointer and the object + * using this macro. + * * Versions of the ppc64 compiler before 4.1 had a bug where use of * RELOC_HIDE could trash r30. The bug can be worked around by changing * the inline assembly constraint from =g to =r, in this particular -- cgit v1.2.2 From 69347a236b22c3962ea812511495e502dedfd50c Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 9 Jan 2009 16:40:56 -0800 Subject: memstick: annotate endianness of attribute structs The code was shifting the endianness appropriately everywhere, annotate the structs to avoid the sparse warnings when assigning the endian types to the struct members, or passing them to be[16|32]_to_cpu: drivers/memstick/core/mspro_block.c:331:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:333:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:335:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:337:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:341:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:347:4: warning: cast to restricted __be32 drivers/memstick/core/mspro_block.c:356:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:358:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:364:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:367:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:369:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:371:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:377:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:478:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:480:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:482:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:484:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:486:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:689:22: expected unsigned int [unsigned] [assigned] data_address drivers/memstick/core/mspro_block.c:689:22: got restricted __be32 [usertype] drivers/memstick/core/mspro_block.c:697:3: warning: cast to restricted __be32 drivers/memstick/core/mspro_block.c:960:17: warning: incorrect type in initializer (different base types) drivers/memstick/core/mspro_block.c:960:17: expected unsigned short [unsigned] data_count drivers/memstick/core/mspro_block.c:960:17: got restricted __be16 [usertype] drivers/memstick/core/mspro_block.c:993:6: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:995:28: warning: cast to restricted __be16 Signed-off-by: Harvey Harrison Cc: Alex Dubov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memstick.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memstick.h b/include/linux/memstick.h index d0c37e682234..690c35a9d4cc 100644 --- a/include/linux/memstick.h +++ b/include/linux/memstick.h @@ -100,8 +100,8 @@ struct mspro_param_register { #define MEMSTICK_SYS_PAR8 0x40 #define MEMSTICK_SYS_SERIAL 0x80 - unsigned short data_count; - unsigned int data_address; + __be16 data_count; + __be32 data_address; unsigned char tpc_param; } __attribute__((packed)); -- cgit v1.2.2 From c4be0c1dc4cdc37b175579be1460f15ac6495e9a Mon Sep 17 00:00:00 2001 From: Takashi Sato Date: Fri, 9 Jan 2009 16:40:58 -0800 Subject: filesystem freeze: add error handling of write_super_lockfs/unlockfs Currently, ext3 in mainline Linux doesn't have the freeze feature which suspends write requests. So, we cannot take a backup which keeps the filesystem's consistency with the storage device's features (snapshot and replication) while it is mounted. In many case, a commercial filesystem (e.g. VxFS) has the freeze feature and it would be used to get the consistent backup. If Linux's standard filesystem ext3 has the freeze feature, we can do it without a commercial filesystem. So I have implemented the ioctls of the freeze feature. I think we can take the consistent backup with the following steps. 1. Freeze the filesystem with the freeze ioctl. 2. Separate the replication volume or create the snapshot with the storage device's feature. 3. Unfreeze the filesystem with the unfreeze ioctl. 4. Take the backup from the separated replication volume or the snapshot. This patch: VFS: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they can return an error. Rename write_super_lockfs and unlockfs of the super block operation freeze_fs and unfreeze_fs to avoid a confusion. ext3, ext4, xfs, gfs2, jfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that write_super_lockfs returns an error if needed, and unlockfs always returns 0. reiserfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they always return 0 (success) to keep a current behavior. Signed-off-by: Takashi Sato Signed-off-by: Masayuki Hamaguchi Cc: Cc: Cc: Christoph Hellwig Cc: Dave Kleikamp Cc: Dave Chinner Cc: Alasdair G Kergon Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0b87b29f4797..3e59182de9df 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1377,8 +1377,8 @@ struct super_operations { void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - void (*write_super_lockfs) (struct super_block *); - void (*unlockfs) (struct super_block *); + int (*freeze_fs) (struct super_block *); + int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); -- cgit v1.2.2 From fcccf502540e3d752d33b2d8e976034dee81f9f7 Mon Sep 17 00:00:00 2001 From: Takashi Sato Date: Fri, 9 Jan 2009 16:40:59 -0800 Subject: filesystem freeze: implement generic freeze feature The ioctls for the generic freeze feature are below. o Freeze the filesystem int ioctl(int fd, int FIFREEZE, arg) fd: The file descriptor of the mountpoint FIFREEZE: request code for the freeze arg: Ignored Return value: 0 if the operation succeeds. Otherwise, -1 o Unfreeze the filesystem int ioctl(int fd, int FITHAW, arg) fd: The file descriptor of the mountpoint FITHAW: request code for unfreeze arg: Ignored Return value: 0 if the operation succeeds. Otherwise, -1 Error number: If the filesystem has already been unfrozen, errno is set to EINVAL. [akpm@linux-foundation.org: fix CONFIG_BLOCK=n] Signed-off-by: Takashi Sato Signed-off-by: Masayuki Hamaguchi Cc: Cc: Cc: Christoph Hellwig Cc: Dave Kleikamp Cc: Dave Chinner Cc: Alasdair G Kergon Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 11 ++++++++++- include/linux/fs.h | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 8605f8a74df9..bd7ac793be19 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -171,7 +171,7 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); int fsync_bdev(struct block_device *); struct super_block *freeze_bdev(struct block_device *); -void thaw_bdev(struct block_device *, struct super_block *); +int thaw_bdev(struct block_device *, struct super_block *); int fsync_super(struct super_block *); int fsync_no_super(struct block_device *); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, @@ -346,6 +346,15 @@ static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } static inline void invalidate_bdev(struct block_device *bdev) {} +static inline struct super_block *freeze_bdev(struct block_device *sb) +{ + return NULL; +} + +static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + return 0; +} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 3e59182de9df..6022f44043f2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -234,6 +234,8 @@ struct inodes_stat_t { #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ +#define FIFREEZE _IOWR('X', 119, int) /* Freeze */ +#define FITHAW _IOWR('X', 120, int) /* Thaw */ #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_IOC_SETFLAGS _IOW('f', 2, long) @@ -591,6 +593,11 @@ struct block_device { * care to not mess up bd_private for that case. */ unsigned long bd_private; + + /* The counter of freeze processes */ + int bd_fsfreeze_count; + /* Mutex for freeze */ + struct mutex bd_fsfreeze_mutex; }; /* -- cgit v1.2.2 From f4b477c47332367d35686bd2b808c2156b96d7c7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 10 Jan 2009 11:12:09 +0000 Subject: rbtree: add const qualifier to some functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'rb_first()', 'rb_last()', 'rb_next()' and 'rb_prev()' calls take a pointer to an RB node or RB root. They do not change the pointed objects, so add a 'const' qualifier in order to make life of the users of these functions easier. Indeed, if I have my own constant pointer &const struct my_type *p, and I call 'rb_next(&p->rb)', I get a GCC warning: warning: passing argument 1 of ‘rb_next’ discards qualifiers from pointer target type Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- include/linux/rbtree.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 344bc3495ddb..9c295411d01f 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -140,10 +140,10 @@ extern void rb_insert_color(struct rb_node *, struct rb_root *); extern void rb_erase(struct rb_node *, struct rb_root *); /* Find logical next and previous nodes in a tree */ -extern struct rb_node *rb_next(struct rb_node *); -extern struct rb_node *rb_prev(struct rb_node *); -extern struct rb_node *rb_first(struct rb_root *); -extern struct rb_node *rb_last(struct rb_root *); +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, -- cgit v1.2.2