From 5e1f8c9e20a92743eefc9a82c2db835213905e26 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 28 Oct 2008 13:21:55 -0400 Subject: ext3: Add support for non-native signed/unsigned htree hash algorithms The original ext3 hash algorithms assumed that variables of type char were signed, as God and K&R intended. Unfortunately, this assumption is not true on some architectures. Userspace support for marking filesystems with non-native signed/unsigned chars was added two years ago, but the kernel-side support was never added (until now). Signed-off-by: "Theodore Ts'o" Cc: akpm@linux-foundation.org Cc: linux-kernel@vger.kernel.org --- include/linux/ext3_fs.h | 28 +++++++++++++++++++++++++++- include/linux/ext3_fs_sb.h | 1 + 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index d14f02918483..9004794a35fe 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -353,6 +353,13 @@ struct ext3_inode { #define EXT3_ERROR_FS 0x0002 /* Errors detected */ #define EXT3_ORPHAN_FS 0x0004 /* Orphans being recovered */ +/* + * Misc. filesystem flags + */ +#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ +#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ +#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ + /* * Mount flags */ @@ -489,7 +496,23 @@ struct ext3_super_block { __u16 s_reserved_word_pad; __le32 s_default_mount_opts; __le32 s_first_meta_bg; /* First metablock block group */ - __u32 s_reserved[190]; /* Padding to the end of the block */ + __le32 s_mkfs_time; /* When the filesystem was created */ + __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ + /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ +/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ + __le32 s_r_blocks_count_hi; /* Reserved blocks count */ + __le32 s_free_blocks_count_hi; /* Free blocks count */ + __le16 s_min_extra_isize; /* All inodes have at least # bytes */ + __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ + __le32 s_flags; /* Miscellaneous flags */ + __le16 s_raid_stride; /* RAID stride */ + __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ + __le64 s_mmp_block; /* Block for multi-mount protection */ + __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ + __u8 s_log_groups_per_flex; /* FLEX_BG group size */ + __u8 s_reserved_char_pad2; + __le16 s_reserved_pad; + __u32 s_reserved[162]; /* Padding to the end of the block */ }; #ifdef __KERNEL__ @@ -694,6 +717,9 @@ static inline __le16 ext3_rec_len_to_disk(unsigned len) #define DX_HASH_LEGACY 0 #define DX_HASH_HALF_MD4 1 #define DX_HASH_TEA 2 +#define DX_HASH_LEGACY_UNSIGNED 3 +#define DX_HASH_HALF_MD4_UNSIGNED 4 +#define DX_HASH_TEA_UNSIGNED 5 #ifdef __KERNEL__ diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h index e024e38248ff..a4e9216b3a6d 100644 --- a/include/linux/ext3_fs_sb.h +++ b/include/linux/ext3_fs_sb.h @@ -57,6 +57,7 @@ struct ext3_sb_info { u32 s_next_generation; u32 s_hash_seed[4]; int s_def_hash_version; + int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; -- cgit v1.2.2 From a20c7ab570ffdce1d6f67c7acf8c1c502a3b3839 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 16 Oct 2008 18:43:48 +0400 Subject: [MTD] sharpsl-nand: use platform_data for model-specific values Add platform_data which holds all model-specific values, like badblocks pattern, oobinfo, partitions. Signed-off-by: Dmitry Baryshkov --- include/linux/mtd/sharpsl.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/mtd/sharpsl.h (limited to 'include/linux') diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h new file mode 100644 index 000000000000..25f4d2a845c1 --- /dev/null +++ b/include/linux/mtd/sharpsl.h @@ -0,0 +1,20 @@ +/* + * SharpSL NAND support + * + * Copyright (C) 2008 Dmitry Baryshkov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +struct sharpsl_nand_platform_data { + struct nand_bbt_descr *badblock_pattern; + struct nand_ecclayout *ecc_layout; + struct mtd_partition *partitions; + unsigned int nr_partitions; +}; -- cgit v1.2.2 From 171bbfbeab7730031eec8025341401fabe540bd5 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 25 Nov 2008 17:42:31 -0500 Subject: jbd2: Add BH_JBDPrivateStart Add this so that file systems using JBD2 can safely allocate unused b_state bits. In this case, we add it so that Ocfs2 can define a single bit for tracking the validation state of a buffer. Signed-off-by: Mark Fasheh Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index c7d106ef22e2..f36645745489 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -329,6 +329,7 @@ enum jbd_state_bits { BH_State, /* Pins most journal_head state */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ + BH_JBDPrivateStart, /* First bit available for private use by FS */ }; BUFFER_FNS(JBD, jbd) -- cgit v1.2.2 From e07f7183a486cf9783d1f8c9d2997b5b39eeb2d4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 26 Nov 2008 01:14:26 -0500 Subject: jbd2: improve jbd2 fsync batching This patch removes the static sleep time in favor of a more self optimizing approach where we measure the average amount of time it takes to commit a transaction to disk and the ammount of time a transaction has been running. If somebody does a sync write or an fsync() traditionally we would sleep for 1 jiffies, which depending on the value of HZ could be a significant amount of time compared to how long it takes to commit a transaction to the underlying storage. With this patch instead of sleeping for a jiffie, we check to see if the amount of time this transaction has been running is less than the average commit time, and if it is we sleep for the delta using schedule_hrtimeout to give us a higher precision sleep time. This greatly benefits high end storage where you could end up sleeping for longer than it takes to commit the transaction and therefore sitting idle instead of allowing the transaction to be committed by keeping the sleep time to a minimum so you are sure to always be doing something. Signed-off-by: Josef Bacik Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f36645745489..ab8cef130c28 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -637,6 +637,11 @@ struct transaction_s */ unsigned long t_expires; + /* + * When this transaction started, in nanoseconds [no locking] + */ + ktime_t t_start_time; + /* * How many handles used this transaction? [t_handle_lock] */ @@ -939,8 +944,18 @@ struct journal_s struct buffer_head **j_wbuf; int j_wbufsize; + /* + * this is the pid of hte last person to run a synchronous operation + * through the journal + */ pid_t j_last_sync_writer; + /* + * the average amount of time in nanoseconds it takes to commit a + * transaction to disk. [j_state_lock] + */ + u64 j_average_commit_time; + /* This function is called when a transaction is closed */ void (*j_commit_callback)(journal_t *, transaction_t *); -- cgit v1.2.2 From 30773840c19cea60dcef39545960d541b1ac1cf8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 20:27:38 -0500 Subject: ext4: add fsync batch tuning knobs Add new mount options, min_batch_time and max_batch_time, which controls how long the jbd2 layer should wait for additional filesystem operations to get batched with a synchronous write transaction. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ab8cef130c28..a3cd647ea1bc 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -956,6 +956,14 @@ struct journal_s */ u64 j_average_commit_time; + /* + * minimum and maximum times that we should wait for + * additional filesystem operations to get batched into a + * synchronous handle in microseconds + */ + u32 j_min_batch_time; + u32 j_max_batch_time; + /* This function is called when a transaction is closed */ void (*j_commit_callback)(journal_t *, transaction_t *); -- cgit v1.2.2 From 1a0d3786dd57dbd74f340322054c3d618b999dcf Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Nov 2008 00:09:22 -0500 Subject: jbd2: Remove a large array of bh's from the stack of the checkpoint routine jbd2_log_do_checkpoint()n is one of the kernel's largest stack users. Move the array of buffer head's from the stack of jbd2_log_do_checkpoint() to the in-core journal structure. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a3cd647ea1bc..004c9a8d63ed 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -687,6 +687,8 @@ jbd2_time_diff(unsigned long start, unsigned long end) return end + (MAX_JIFFY_OFFSET - start); } +#define JBD2_NR_BATCH 64 + /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. @@ -830,6 +832,14 @@ struct journal_s /* Semaphore for locking against concurrent checkpoints */ struct mutex j_checkpoint_mutex; + /* + * List of buffer heads used by the checkpoint routine. This + * was moved from jbd2_log_do_checkpoint() to reduce stack + * usage. Access to this array is controlled by the + * j_checkpoint_mutex. [j_checkpoint_mutex] + */ + struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH]; + /* * Journal head: identifies the first unused block in the journal. * [j_state_lock] -- cgit v1.2.2 From fb68407b0d9efba962c03f55009c797e22f024bc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 6 Nov 2008 17:50:21 -0500 Subject: jbd2: Call journal commit callback without holding j_list_lock Avoid freeing the transaction in __jbd2_journal_drop_transaction() so the journal commit callback can run without holding j_list_lock, to avoid lock contention on this spinlock. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 004c9a8d63ed..9d82084a1605 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1179,8 +1179,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid); int jbd2_log_do_checkpoint(journal_t *journal); void __jbd2_log_wait_for_space(journal_t *journal); -extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); -extern int jbd2_cleanup_journal_tail(journal_t *); +extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); +extern int jbd2_cleanup_journal_tail(journal_t *); /* Debugging code only: */ -- cgit v1.2.2 From 69423d99fc182a81f3c5db3eb5c140acc6fc64be Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 10 Dec 2008 13:37:21 +0000 Subject: [MTD] update internal API to support 64-bit device size MTD internal API presently uses 32-bit values to represent device size. This patch updates them to 64-bits but leaves the external API unchanged. Extending the external API is a separate issue for several reasons. First, no one needs it at the moment. Secondly, whether the implementation is done with IOCTLs, sysfs or both is still debated. Thirdly external API changes require the internal API to be accepted first. Note that although the MTD API will be able to support 64-bit device sizes, existing drivers do not and are not required to do so, although NAND base has been updated. In general, changing from 32-bit to 64-bit values cause little or no changes to the majority of the code with the following exceptions: - printk message formats - division and modulus of 64-bit values - NAND base support - 32-bit local variables used by mtdpart and mtdconcat - naughtily assuming one structure maps to another in MEMERASE ioctl Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 57 ++++++++++++++++++++++++++++++++++++------ include/linux/mtd/nand.h | 2 +- include/linux/mtd/partitions.h | 4 +-- 3 files changed, 52 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index eae26bb6430a..95e585ecc297 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -15,6 +15,8 @@ #include #include +#include + #define MTD_CHAR_MAJOR 90 #define MTD_BLOCK_MAJOR 31 #define MAX_MTD_DEVICES 32 @@ -25,16 +27,16 @@ #define MTD_ERASE_DONE 0x08 #define MTD_ERASE_FAILED 0x10 -#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff +#define MTD_FAIL_ADDR_UNKNOWN -1LL /* If the erase fails, fail_addr might indicate exactly which block failed. If fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not specific to any particular block. */ struct erase_info { struct mtd_info *mtd; - u_int32_t addr; - u_int32_t len; - u_int32_t fail_addr; + uint64_t addr; + uint64_t len; + uint64_t fail_addr; u_long time; u_long retries; u_int dev; @@ -46,7 +48,7 @@ struct erase_info { }; struct mtd_erase_region_info { - u_int32_t offset; /* At which this region starts, from the beginning of the MTD */ + uint64_t offset; /* At which this region starts, from the beginning of the MTD */ u_int32_t erasesize; /* For this region */ u_int32_t numblocks; /* Number of blocks of erasesize in this region */ unsigned long *lockmap; /* If keeping bitmap of locks */ @@ -101,7 +103,7 @@ struct mtd_oob_ops { struct mtd_info { u_char type; u_int32_t flags; - u_int32_t size; // Total size of the MTD + uint64_t size; // Total size of the MTD /* "Major" erase size for the device. Naïve users may take this * to be the only erase size available, or may use the more detailed @@ -120,6 +122,16 @@ struct mtd_info { u_int32_t oobsize; // Amount of OOB data per block (e.g. 16) u_int32_t oobavail; // Available OOB bytes per block + /* + * If erasesize is a power of 2 then the shift is stored in + * erasesize_shift otherwise erasesize_shift is zero. Ditto writesize. + */ + unsigned int erasesize_shift; + unsigned int writesize_shift; + /* Masks based on erasesize_shift and writesize_shift */ + unsigned int erasesize_mask; + unsigned int writesize_mask; + // Kernel-only stuff starts here. const char *name; int index; @@ -190,8 +202,8 @@ struct mtd_info { void (*sync) (struct mtd_info *mtd); /* Chip-supported device locking */ - int (*lock) (struct mtd_info *mtd, loff_t ofs, size_t len); - int (*unlock) (struct mtd_info *mtd, loff_t ofs, size_t len); + int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); + int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); /* Power Management functions */ int (*suspend) (struct mtd_info *mtd); @@ -221,6 +233,35 @@ struct mtd_info { void (*put_device) (struct mtd_info *mtd); }; +static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->erasesize_shift) + return sz >> mtd->erasesize_shift; + do_div(sz, mtd->erasesize); + return sz; +} + +static inline u_int32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->erasesize_shift) + return sz & mtd->erasesize_mask; + return do_div(sz, mtd->erasesize); +} + +static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->writesize_shift) + return sz >> mtd->writesize_shift; + do_div(sz, mtd->writesize); + return sz; +} + +static inline u_int32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->writesize_shift) + return sz & mtd->writesize_mask; + return do_div(sz, mtd->writesize); +} /* Kernel-side ioctl definitions */ diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 733d3f3b4eb8..c0677b8082be 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -399,7 +399,7 @@ struct nand_chip { int bbt_erase_shift; int chip_shift; int numchips; - unsigned long chipsize; + uint64_t chipsize; int pagemask; int pagebuf; int subpagesize; diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index c92b4d439609..164c7d78687d 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -36,8 +36,8 @@ struct mtd_partition { char *name; /* identifier string */ - u_int32_t size; /* partition size */ - u_int32_t offset; /* offset within the master MTD space */ + uint64_t size; /* partition size */ + uint64_t offset; /* offset within the master MTD space */ u_int32_t mask_flags; /* master MTD flags to mask out for this partition */ struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/ struct mtd_info **mtdp; /* pointer to store the MTD object */ -- cgit v1.2.2 From 3854be7712f7b4bdcaed14664fc7c7124b3fef0d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 10 Dec 2008 14:06:42 +0000 Subject: [MTD] Remove strange u_int32_t types from FTL Signed-off-by: David Woodhouse --- include/linux/mtd/ftl.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/ftl.h b/include/linux/mtd/ftl.h index 0be442f881dd..0555f7a0b9ed 100644 --- a/include/linux/mtd/ftl.h +++ b/include/linux/mtd/ftl.h @@ -32,25 +32,25 @@ #define _LINUX_FTL_H typedef struct erase_unit_header_t { - u_int8_t LinkTargetTuple[5]; - u_int8_t DataOrgTuple[10]; - u_int8_t NumTransferUnits; - u_int32_t EraseCount; - u_int16_t LogicalEUN; - u_int8_t BlockSize; - u_int8_t EraseUnitSize; - u_int16_t FirstPhysicalEUN; - u_int16_t NumEraseUnits; - u_int32_t FormattedSize; - u_int32_t FirstVMAddress; - u_int16_t NumVMPages; - u_int8_t Flags; - u_int8_t Code; - u_int32_t SerialNumber; - u_int32_t AltEUHOffset; - u_int32_t BAMOffset; - u_int8_t Reserved[12]; - u_int8_t EndTuple[2]; + uint8_t LinkTargetTuple[5]; + uint8_t DataOrgTuple[10]; + uint8_t NumTransferUnits; + uint32_t EraseCount; + uint16_t LogicalEUN; + uint8_t BlockSize; + uint8_t EraseUnitSize; + uint16_t FirstPhysicalEUN; + uint16_t NumEraseUnits; + uint32_t FormattedSize; + uint32_t FirstVMAddress; + uint16_t NumVMPages; + uint8_t Flags; + uint8_t Code; + uint32_t SerialNumber; + uint32_t AltEUHOffset; + uint32_t BAMOffset; + uint8_t Reserved[12]; + uint8_t EndTuple[2]; } erase_unit_header_t; /* Flags in erase_unit_header_t */ -- cgit v1.2.2 From 26cdb67c74aedc22367e6d0271f7f955220cca65 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 10 Dec 2008 14:08:12 +0000 Subject: [MTD] Remove more strange u_intxx_t types Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 26 +++++++++++++------------- include/linux/mtd/partitions.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 95e585ecc297..adef674855f3 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -39,8 +39,8 @@ struct erase_info { uint64_t fail_addr; u_long time; u_long retries; - u_int dev; - u_int cell; + unsigned dev; + unsigned cell; void (*callback) (struct erase_info *self); u_long priv; u_char state; @@ -49,8 +49,8 @@ struct erase_info { struct mtd_erase_region_info { uint64_t offset; /* At which this region starts, from the beginning of the MTD */ - u_int32_t erasesize; /* For this region */ - u_int32_t numblocks; /* Number of blocks of erasesize in this region */ + uint32_t erasesize; /* For this region */ + uint32_t numblocks; /* Number of blocks of erasesize in this region */ unsigned long *lockmap; /* If keeping bitmap of locks */ }; @@ -102,14 +102,14 @@ struct mtd_oob_ops { struct mtd_info { u_char type; - u_int32_t flags; + uint32_t flags; uint64_t size; // Total size of the MTD /* "Major" erase size for the device. Naïve users may take this * to be the only erase size available, or may use the more detailed * information below if they desire */ - u_int32_t erasesize; + uint32_t erasesize; /* Minimal writable flash unit size. In case of NOR flash it is 1 (even * though individual bits can be cleared), in case of NAND flash it is * one NAND page (or half, or one-fourths of it), in case of ECC-ed NOR @@ -117,10 +117,10 @@ struct mtd_info { * Any driver registering a struct mtd_info must ensure a writesize of * 1 or larger. */ - u_int32_t writesize; + uint32_t writesize; - u_int32_t oobsize; // Amount of OOB data per block (e.g. 16) - u_int32_t oobavail; // Available OOB bytes per block + uint32_t oobsize; // Amount of OOB data per block (e.g. 16) + uint32_t oobavail; // Available OOB bytes per block /* * If erasesize is a power of 2 then the shift is stored in @@ -233,7 +233,7 @@ struct mtd_info { void (*put_device) (struct mtd_info *mtd); }; -static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) { if (mtd->erasesize_shift) return sz >> mtd->erasesize_shift; @@ -241,14 +241,14 @@ static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) return sz; } -static inline u_int32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd) { if (mtd->erasesize_shift) return sz & mtd->erasesize_mask; return do_div(sz, mtd->erasesize); } -static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) { if (mtd->writesize_shift) return sz >> mtd->writesize_shift; @@ -256,7 +256,7 @@ static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) return sz; } -static inline u_int32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) { if (mtd->writesize_shift) return sz & mtd->writesize_mask; diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 164c7d78687d..a45dd831b3f8 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -38,7 +38,7 @@ struct mtd_partition { char *name; /* identifier string */ uint64_t size; /* partition size */ uint64_t offset; /* offset within the master MTD space */ - u_int32_t mask_flags; /* master MTD flags to mask out for this partition */ + uint32_t mask_flags; /* master MTD flags to mask out for this partition */ struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/ struct mtd_info **mtdp; /* pointer to store the MTD object */ }; -- cgit v1.2.2 From d3af0f048c114dd53713d5920c54f6d5b6b12139 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 1 Dec 2008 14:23:38 -0800 Subject: [MTD] [NAND] remove excess kernel-doc notation Delete extra kernel-doc notation for struct fields and function parameters that don't exist: Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'wq' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'datbuf' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'oobbuf' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'oobdirty' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'data_poi' description in 'nand_chip' Warning(drivers/mtd/nand/nand_base.c:2527): Excess function parameter 'maxchips' description in 'nand_scan_tail' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c0677b8082be..db5b63da2a7e 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -335,17 +335,12 @@ struct nand_buffers { * @erase_cmd: [INTERN] erase command write function, selectable due to AND support * @scan_bbt: [REPLACEABLE] function to scan bad block table * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transfering data from array to read regs (tR) - * @wq: [INTERN] wait queue to sleep on if a NAND operation is in progress * @state: [INTERN] the current state of the NAND device * @oob_poi: poison value buffer * @page_shift: [INTERN] number of address bits in a page (column address bits) * @phys_erase_shift: [INTERN] number of address bits in a physical eraseblock * @bbt_erase_shift: [INTERN] number of address bits in a bbt entry * @chip_shift: [INTERN] number of address bits in one chip - * @datbuf: [INTERN] internal buffer for one page + oob - * @oobbuf: [INTERN] oob buffer for one eraseblock - * @oobdirty: [INTERN] indicates that oob_buf must be reinitialized - * @data_poi: [INTERN] pointer to a data buffer * @options: [BOARDSPECIFIC] various chip options. They can partly be set to inform nand_scan about * special functionality. See the defines for further explanation * @badblockpos: [INTERN] position of the bad block marker in the oob area -- cgit v1.2.2 From 3f4b0ef7f2899c91b1d6958779f084b44dd59d32 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 26 Oct 2008 20:52:15 +0100 Subject: ACPI hibernate: Add a mechanism to save/restore ACPI NVS memory According to the ACPI Specification 3.0b, Section 15.3.2, "OSPM will call the _PTS control method some time before entering a sleeping state, to allow the platform's AML code to update this memory image before entering the sleeping state. After the system awakes from an S4 state, OSPM will restore this memory area and call the _WAK control method to enable the BIOS to reclaim its memory image." For this reason, implement a mechanism allowing us to save the NVS memory during hibernation and to restore it during the subsequent resume. Based on a patch by Zhang Rui. Signed-off-by: Rafael J. Wysocki Acked-by: Nigel Cunningham Cc: Zhang Rui Signed-off-by: Len Brown --- include/linux/suspend.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 2ce8207686e2..2b409c44db83 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -232,6 +232,11 @@ extern unsigned long get_safe_page(gfp_t gfp_mask); extern void hibernation_set_ops(struct platform_hibernation_ops *ops); extern int hibernate(void); +extern int hibernate_nvs_register(unsigned long start, unsigned long size); +extern int hibernate_nvs_alloc(void); +extern void hibernate_nvs_free(void); +extern void hibernate_nvs_save(void); +extern void hibernate_nvs_restore(void); #else /* CONFIG_HIBERNATION */ static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} @@ -239,6 +244,14 @@ static inline void swsusp_unset_page_free(struct page *p) {} static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {} static inline int hibernate(void) { return -ENOSYS; } +static inline int hibernate_nvs_register(unsigned long a, unsigned long b) +{ + return 0; +} +static inline int hibernate_nvs_alloc(void) { return 0; } +static inline void hibernate_nvs_free(void) {} +static inline void hibernate_nvs_save(void) {} +static inline void hibernate_nvs_restore(void) {} #endif /* CONFIG_HIBERNATION */ #ifdef CONFIG_PM_SLEEP -- cgit v1.2.2 From ba84ed9546e91348fdf3ff2bff859b0ee53b407a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 26 Oct 2008 20:56:30 +0100 Subject: ACPI hibernate: Introduce new kernel parameter acpi_sleep=s4_nonvs On some machines it may be necessary to disable the saving/restoring of the ACPI NVS memory region during hibernation/resume. For this purpose, introduce new ACPI kernel command line option acpi_sleep=s4_nonvs. Based on a patch by Zhang Rui. Signed-off-by: Rafael J. Wysocki Acked-by: Nigel Cunningham Acked-by: Pavel Machek Signed-off-by: Len Brown --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fba8051fb297..dfa0a5356c53 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -270,6 +270,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, #ifdef CONFIG_PM_SLEEP void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); +void __init acpi_s4_no_nvs(void); #endif /* CONFIG_PM_SLEEP */ #else /* CONFIG_ACPI */ -- cgit v1.2.2 From 160bbab3000dafccbe43688e48208cecf4deb879 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 23 Dec 2008 10:00:14 +0000 Subject: [MTD] struct device - replace bus_id with dev_name(), dev_set_name() Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman Signed-off-by: David Woodhouse --- include/linux/mtd/concat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h index c02f3d264ecf..e80c674daeb3 100644 --- a/include/linux/mtd/concat.h +++ b/include/linux/mtd/concat.h @@ -13,7 +13,7 @@ struct mtd_info *mtd_concat_create( struct mtd_info *subdev[], /* subdevices to concatenate */ int num_devs, /* number of subdevices */ - char *name); /* name for the new device */ + const char *name); /* name for the new device */ void mtd_concat_destroy(struct mtd_info *mtd); -- cgit v1.2.2 From f748bafa3ca1fb056e63afdeecacc1c68d8104df Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 8 Dec 2008 21:30:31 -0700 Subject: ACPI: PCI: move struct acpi_prt_entry declaration out of public header file The struct acpi_prt_entry is used only in pci_irq.c, so there's no need for the declaration to be public. This patch moves it into pci_irq.c. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- include/linux/acpi.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fba8051fb297..813f937b3ab4 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -131,22 +131,6 @@ extern int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity); */ void acpi_unregister_gsi (u32 gsi); -struct acpi_prt_entry { - struct list_head node; - struct acpi_pci_id id; - u8 pin; - struct { - acpi_handle handle; - u32 index; - } link; - u32 irq; -}; - -struct acpi_prt_list { - int count; - struct list_head entries; -}; - struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); -- cgit v1.2.2 From ea7e96e0f2277107d9ea14c3f16c86ba82b2e560 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Tue, 16 Dec 2008 16:28:17 +0800 Subject: ACPI: remove private acpica headers from driver files External driver files should not include any private acpica headers. Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/linux/pci_hotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index a00bd1a0f156..c2d1a7d1886a 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -223,7 +223,6 @@ struct hotplug_params { #ifdef CONFIG_ACPI #include #include -#include extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, struct hotplug_params *hpp); int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); -- cgit v1.2.2 From 56c451f4b583ccdf80c9e676179c9cb49de86745 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 18 Dec 2008 14:49:37 +0900 Subject: [SCSI] block: fix the partial mappings with struct rq_map_data This fixes bio_copy_user_iov to properly handle the partial mappings with struct rq_map_data (which only sg uses for now but st and osst will shortly). It adds the offset member to struct rq_map_data and changes blk_rq_map_user to update it so that bio_copy_user_iov can add an appropriate page frame via bio_add_pc_page(). Signed-off-by: FUJITA Tomonori Acked-by: Jens Axboe Signed-off-by: James Bottomley --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7035cec583b6..811e5342c452 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -690,6 +690,7 @@ struct rq_map_data { struct page **pages; int page_order; int nr_entries; + unsigned long offset; }; struct req_iterator { -- cgit v1.2.2 From 97ae77a1cd332c7b011d71315c8faabce6840c72 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 18 Dec 2008 14:49:38 +0900 Subject: [SCSI] block: make blk_rq_map_user take a NULL user-space buffer for WRITE The commit 818827669d85b84241696ffef2de485db46b0b5e (block: make blk_rq_map_user take a NULL user-space buffer) extended blk_rq_map_user to accept a NULL user-space buffer with a READ command. It was necessary to convert sg to use the block layer mapping API. This patch extends blk_rq_map_user again for a WRITE command. It is necessary to convert st and osst drivers to use the block layer apping API. Signed-off-by: FUJITA Tomonori Acked-by: Jens Axboe Signed-off-by: James Bottomley --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 811e5342c452..044467ef7b11 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -691,6 +691,7 @@ struct rq_map_data { int page_order; int nr_entries; unsigned long offset; + int null_mapped; }; struct req_iterator { -- cgit v1.2.2 From 87d8fe1ee6b8d2f95076142d58c440dba4e7bdc2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 09:47:09 -0500 Subject: add releasepage hooks to block devices which can be used by file systems Implement blkdev_releasepage() to release the buffer_heads and pages after we release private data belonging to a mounted filesystem. Cc: Toshiyuki Okajima Cc: linux-fsdevel@vger.kernel.org Signed-off-by: "Theodore Ts'o" --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f2a3010140e3..0f54ae0f0ccd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -565,6 +565,7 @@ struct address_space { struct block_device { dev_t bd_dev; /* not a kdev_t - it's a search key */ struct inode * bd_inode; /* will die */ + struct super_block * bd_super; int bd_openers; struct mutex bd_mutex; /* open/close mutex */ struct semaphore bd_mount_sem; @@ -1385,6 +1386,7 @@ struct super_operations { ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); #endif + int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); }; /* -- cgit v1.2.2 From c31910672376dfb8d020e32afa7249763bcd924a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Jan 2009 11:14:25 -0500 Subject: ext4: Remove code to create the journal inode This code has been obsolete in quite some time, since the supported method for adding a journal inode is to use tune2fs (or to creating new filesystem with a journal via mke2fs or mkfs.ext4). Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 9d82084a1605..adef1c9940d3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1104,7 +1104,6 @@ extern int jbd2_journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); extern void jbd2_journal_clear_features (journal_t *, unsigned long, unsigned long, unsigned long); -extern int jbd2_journal_create (journal_t *); extern int jbd2_journal_load (journal_t *journal); extern int jbd2_journal_destroy (journal_t *); extern int jbd2_journal_recover (journal_t *journal); -- cgit v1.2.2 From 922ab535bbe73975ce62f71ab9bf8ec9bce71c29 Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:13:58 +0000 Subject: [MTD] LPDDR QINFO records definitions There are declaraton of structures and macros definitions necessary for operations with QINFO in this patch. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/qinfo.h | 91 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 include/linux/mtd/qinfo.h (limited to 'include/linux') diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h new file mode 100644 index 000000000000..7b3d487d8b3f --- /dev/null +++ b/include/linux/mtd/qinfo.h @@ -0,0 +1,91 @@ +#ifndef __LINUX_MTD_QINFO_H +#define __LINUX_MTD_QINFO_H + +#include +#include +#include +#include +#include +#include +#include + +/* lpddr_private describes lpddr flash chip in memory map + * @ManufactId - Chip Manufacture ID + * @DevId - Chip Device ID + * @qinfo - pointer to qinfo records describing the chip + * @numchips - number of chips including virual RWW partitions + * @chipshift - Chip/partiton size 2^chipshift + * @chips - per-chip data structure + */ +struct lpddr_private { + uint16_t ManufactId; + uint16_t DevId; + struct qinfo_chip *qinfo; + int numchips; + unsigned long chipshift; + struct flchip chips[0]; +}; + +/* qinfo_query_info structure contains request information for + * each qinfo record + * @major - major number of qinfo record + * @major - minor number of qinfo record + * @id_str - descriptive string to access the record + * @desc - detailed description for the qinfo record + */ +struct qinfo_query_info { + uint8_t major; + uint8_t minor; + char *id_str; + char *desc; +}; + +/* + * qinfo_chip structure contains necessary qinfo records data + * @DevSizeShift - Device size 2^n bytes + * @BufSizeShift - Program buffer size 2^n bytes + * @TotalBlocksNum - Total number of blocks + * @UniformBlockSizeShift - Uniform block size 2^UniformBlockSizeShift bytes + * @HWPartsNum - Number of hardware partitions + * @SuspEraseSupp - Suspend erase supported + * @SingleWordProgTime - Single word program 2^SingleWordProgTime u-sec + * @ProgBufferTime - Program buffer write 2^ProgBufferTime u-sec + * @BlockEraseTime - Block erase 2^BlockEraseTime m-sec + */ +struct qinfo_chip { + /* General device info */ + uint16_t DevSizeShift; + uint16_t BufSizeShift; + /* Erase block information */ + uint16_t TotalBlocksNum; + uint16_t UniformBlockSizeShift; + /* Partition information */ + uint16_t HWPartsNum; + /* Optional features */ + uint16_t SuspEraseSupp; + /* Operation typical time */ + uint16_t SingleWordProgTime; + uint16_t ProgBufferTime; + uint16_t BlockEraseTime; +}; + +/* defines for fixup usage */ +#define LPDDR_MFR_ANY 0xffff +#define LPDDR_ID_ANY 0xffff +#define NUMONYX_MFGR_ID 0x0089 +#define R18_DEVICE_ID_1G 0x893c + +static inline map_word lpddr_build_cmd(u_long cmd, struct map_info *map) +{ + map_word val = { {0} }; + val.x[0] = cmd; + return val; +} + +#define CMD(x) lpddr_build_cmd(x, map) +#define CMDVAL(cmd) cmd.x[0] + +struct mtd_info *lpddr_cmdset(struct map_info *); + +#endif + -- cgit v1.2.2 From eb3db27507f74b99241abfa11824d8b6d92b84ef Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:15:33 +0000 Subject: [MTD] LPDDR PFOW definition LPDDR chips use PFOW window for sending commands, reading status and capabilites requesting. This pfow.h - contains definitions for PFOW window fileds, possible commands, error flags and some common macro function to avoid code duplications. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/pfow.h | 159 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 include/linux/mtd/pfow.h (limited to 'include/linux') diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h new file mode 100644 index 000000000000..b730d4f84655 --- /dev/null +++ b/include/linux/mtd/pfow.h @@ -0,0 +1,159 @@ +/* Primary function overlay window definitions + * and service functions used by LPDDR chips + */ +#ifndef __LINUX_MTD_PFOW_H +#define __LINUX_MTD_PFOW_H + +#include + +/* PFOW registers addressing */ +/* Address of symbol "P" */ +#define PFOW_QUERY_STRING_P 0x0000 +/* Address of symbol "F" */ +#define PFOW_QUERY_STRING_F 0x0002 +/* Address of symbol "O" */ +#define PFOW_QUERY_STRING_O 0x0004 +/* Address of symbol "W" */ +#define PFOW_QUERY_STRING_W 0x0006 +/* Identification info for LPDDR chip */ +#define PFOW_MANUFACTURER_ID 0x0020 +#define PFOW_DEVICE_ID 0x0022 +/* Address in PFOW where prog buffer can can be found */ +#define PFOW_PROGRAM_BUFFER_OFFSET 0x0040 +/* Size of program buffer in words */ +#define PFOW_PROGRAM_BUFFER_SIZE 0x0042 +/* Address command code register */ +#define PFOW_COMMAND_CODE 0x0080 +/* command data register */ +#define PFOW_COMMAND_DATA 0x0084 +/* command address register lower address bits */ +#define PFOW_COMMAND_ADDRESS_L 0x0088 +/* command address register upper address bits */ +#define PFOW_COMMAND_ADDRESS_H 0x008a +/* number of bytes to be proggrammed lower address bits */ +#define PFOW_DATA_COUNT_L 0x0090 +/* number of bytes to be proggrammed higher address bits */ +#define PFOW_DATA_COUNT_H 0x0092 +/* command execution register, the only possible value is 0x01 */ +#define PFOW_COMMAND_EXECUTE 0x00c0 +/* 0x01 should be written at this address to clear buffer */ +#define PFOW_CLEAR_PROGRAM_BUFFER 0x00c4 +/* device program/erase suspend register */ +#define PFOW_PROGRAM_ERASE_SUSPEND 0x00c8 +/* device status register */ +#define PFOW_DSR 0x00cc + +/* LPDDR memory device command codes */ +/* They are possible values of PFOW command code register */ +#define LPDDR_WORD_PROGRAM 0x0041 +#define LPDDR_BUFF_PROGRAM 0x00E9 +#define LPDDR_BLOCK_ERASE 0x0020 +#define LPDDR_LOCK_BLOCK 0x0061 +#define LPDDR_UNLOCK_BLOCK 0x0062 +#define LPDDR_READ_BLOCK_LOCK_STATUS 0x0065 +#define LPDDR_INFO_QUERY 0x0098 +#define LPDDR_READ_OTP 0x0097 +#define LPDDR_PROG_OTP 0x00C0 +#define LPDDR_RESUME 0x00D0 + +/* Defines possible value of PFOW command execution register */ +#define LPDDR_START_EXECUTION 0x0001 + +/* Defines possible value of PFOW program/erase suspend register */ +#define LPDDR_SUSPEND 0x0001 + +/* Possible values of PFOW device status register */ +/* access R - read; RC read & clearable */ +#define DSR_DPS (1<<1) /* RC; device protect status + * 0 - not protected 1 - locked */ +#define DSR_PSS (1<<2) /* R; program suspend status; + * 0-prog in progress/completed, + * 1- prog suspended */ +#define DSR_VPPS (1<<3) /* RC; 0-Vpp OK, * 1-Vpp low */ +#define DSR_PROGRAM_STATUS (1<<4) /* RC; 0-successful, 1-error */ +#define DSR_ERASE_STATUS (1<<5) /* RC; erase or blank check status; + * 0-success erase/blank check, + * 1 blank check error */ +#define DSR_ESS (1<<6) /* R; erase suspend status; + * 0-erase in progress/complete, + * 1 erase suspended */ +#define DSR_READY_STATUS (1<<7) /* R; Device status + * 0-busy, + * 1-ready */ +#define DSR_RPS (0x3<<8) /* RC; region program status + * 00 - Success, + * 01-re-program attempt in region with + * object mode data, + * 10-object mode program w attempt in + * region with control mode data + * 11-attempt to program invalid half + * with 0x41 command */ +#define DSR_AOS (1<<12) /* RC; 1- AO related failure */ +#define DSR_AVAILABLE (1<<15) /* R; Device availbility + * 1 - Device available + * 0 - not available */ + +/* The superset of all possible error bits in DSR */ +#define DSR_ERR 0x133A + +static inline void send_pfow_command(struct map_info *map, + unsigned long cmd_code, unsigned long adr, + unsigned long len, map_word *datum) +{ + int bits_per_chip = map_bankwidth(map) * 8; + int chipnum; + struct lpddr_private *lpddr = map->fldrv_priv; + chipnum = adr >> lpddr->chipshift; + + map_write(map, CMD(cmd_code), map->pfow_base + PFOW_COMMAND_CODE); + map_write(map, CMD(adr & ((1<pfow_base + PFOW_COMMAND_ADDRESS_L); + map_write(map, CMD(adr>>bits_per_chip), + map->pfow_base + PFOW_COMMAND_ADDRESS_H); + if (len) { + map_write(map, CMD(len & ((1<pfow_base + PFOW_DATA_COUNT_L); + map_write(map, CMD(len>>bits_per_chip), + map->pfow_base + PFOW_DATA_COUNT_H); + } + if (datum) + map_write(map, *datum, map->pfow_base + PFOW_COMMAND_DATA); + + /* Command execution start */ + map_write(map, CMD(LPDDR_START_EXECUTION), + map->pfow_base + PFOW_COMMAND_EXECUTE); +} + +static inline void print_drs_error(unsigned dsr) +{ + int prog_status = (dsr & DSR_RPS) >> 8; + + if (!(dsr & DSR_AVAILABLE)) + printk(KERN_NOTICE"DSR.15: (0) Device not Available\n"); + if (prog_status & 0x03) + printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid " + "half with 41h command\n"); + else if (prog_status & 0x02) + printk(KERN_NOTICE"DSR.9,8: (10) Object Mode Program attempt " + "in region with Control Mode data\n"); + else if (prog_status & 0x01) + printk(KERN_NOTICE"DSR.9,8: (01) Program attempt in region " + "with Object Mode data\n"); + if (!(dsr & DSR_READY_STATUS)) + printk(KERN_NOTICE"DSR.7: (0) Device is Busy\n"); + if (dsr & DSR_ESS) + printk(KERN_NOTICE"DSR.6: (1) Erase Suspended\n"); + if (dsr & DSR_ERASE_STATUS) + printk(KERN_NOTICE"DSR.5: (1) Erase/Blank check error\n"); + if (dsr & DSR_PROGRAM_STATUS) + printk(KERN_NOTICE"DSR.4: (1) Program Error\n"); + if (dsr & DSR_VPPS) + printk(KERN_NOTICE"DSR.3: (1) Vpp low detect, operation " + "aborted\n"); + if (dsr & DSR_PSS) + printk(KERN_NOTICE"DSR.2: (1) Program suspended\n"); + if (dsr & DSR_DPS) + printk(KERN_NOTICE"DSR.1: (1) Aborted Erase/Program attempt " + "on locked block\n"); +} +#endif /* __LINUX_MTD_PFOW_H */ -- cgit v1.2.2 From d13e51e747fee301b404dffcf4a7e1bdc558969b Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:21:10 +0000 Subject: [MTD] LPDDR added new pfow_base parameter We need to supply additional parameter to mapping driver and tell LPDDR drivers where PFOW window is in chip mapping. It leads to necessity of map_info structure extendoing. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/map.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index aa30244492c6..b981b8772217 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -223,6 +223,7 @@ struct map_info { must leave it enabled. */ void (*set_vpp)(struct map_info *, int); + unsigned long pfow_base; unsigned long map_priv_1; unsigned long map_priv_2; void *fldrv_priv; -- cgit v1.2.2 From d81408304b06a71c28417445202af9cd6673168d Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:22:39 +0000 Subject: [MTD] LPDDR extended physmap driver to support LPDDR flash Physmap is a generic map driver for different platforms and flash types. We added support of LPDDR to physmap. All changes here are related to introduction of new pfow_base parameter. This parameter is valid in case of LPDDR chips only. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/physmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h index c8e63a5ee72e..76f7cabf07d3 100644 --- a/include/linux/mtd/physmap.h +++ b/include/linux/mtd/physmap.h @@ -24,6 +24,7 @@ struct physmap_flash_data { unsigned int width; void (*set_vpp)(struct map_info *, int); unsigned int nr_parts; + unsigned int pfow_base; struct mtd_partition *parts; }; -- cgit v1.2.2 From 07f2211e4fbce6990722d78c4f04225da9c0e9cf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Jan 2009 17:14:31 -0700 Subject: dmaengine: remove dependency on async_tx async_tx.ko is a consumer of dma channels. A circular dependency arises if modules in drivers/dma rely on common code in async_tx.ko. It prevents either module from being unloaded. Move dma_wait_for_async_tx and async_tx_run_dependencies to dmaeninge.o where they should have been from the beginning. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/async_tx.h | 15 --------------- include/linux/dmaengine.h | 9 +++++++++ 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 0f50d4cc4360..1c816775f135 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -60,8 +60,6 @@ enum async_tx_flags { #ifdef CONFIG_DMA_ENGINE void async_tx_issue_pending_all(void); -enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); -void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx); #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include #else @@ -77,19 +75,6 @@ static inline void async_tx_issue_pending_all(void) do { } while (0); } -static inline enum dma_status -dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) -{ - return DMA_SUCCESS; -} - -static inline void -async_tx_run_dependencies(struct dma_async_tx_descriptor *tx, - struct dma_chan *host_chan) -{ - do { } while (0); -} - static inline struct dma_chan * async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, enum dma_transaction_type tx_type, struct page **dst, int dst_count, diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index adb0b084eb5a..e4ec7e7b8056 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -475,11 +475,20 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie, } enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie); +#ifdef CONFIG_DMA_ENGINE +enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); +#else +static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) +{ + return DMA_SUCCESS; +} +#endif /* --- DMA device --- */ int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); +void dma_run_dependencies(struct dma_async_tx_descriptor *tx); /* --- Helper iov-locking functions --- */ -- cgit v1.2.2 From b3881f74b31b7d47d0f1c4d89ac3e7f0b9c05e3e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 5 Jan 2009 22:46:26 -0500 Subject: ext4: Add mount option to set kjournald's I/O priority Signed-off-by: "Theodore Ts'o" Cc: Jens Axboe --- include/linux/ioprio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index f98a656b17e5..76dad4808847 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -86,4 +86,6 @@ static inline int task_nice_ioclass(struct task_struct *task) */ extern int ioprio_best(unsigned short aprio, unsigned short bprio); +extern int set_task_ioprio(struct task_struct *task, int ioprio); + #endif -- cgit v1.2.2 From 835481d9bcd65720b473db6b38746a74a3964218 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 4 Jan 2009 05:18:06 -0800 Subject: cpumask: convert struct cpufreq_policy to cpumask_var_t Impact: use new cpumask API to reduce memory usage This is part of an effort to reduce structure sizes for machines configured with large NR_CPUS. cpumask_t gets replaced by cpumask_var_t, which is either struct cpumask[1] (small NR_CPUS) or struct cpumask * (large NR_CPUS). Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Dave Jones Signed-off-by: Ingo Molnar --- include/linux/cpufreq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 484b3abf61bb..384b38d3e8e2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -80,8 +80,8 @@ struct cpufreq_real_policy { }; struct cpufreq_policy { - cpumask_t cpus; /* CPUs requiring sw coordination */ - cpumask_t related_cpus; /* CPUs with any coordination */ + cpumask_var_t cpus; /* CPUs requiring sw coordination */ + cpumask_var_t related_cpus; /* CPUs with any coordination */ unsigned int shared_type; /* ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of registered CPU */ -- cgit v1.2.2 From 548eaca46b3cf4419b6c2be839a106d8641ffb70 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Oct 2008 17:48:43 -0400 Subject: nfsd: document new filehandle fsid types Descriptions taken from mountd code (in nfs-utils/utils/mountd/cache.c). Signed-off-by: J. Bruce Fields --- include/linux/nfsd/nfsfh.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index d1941cb965e9..b2e093870bc6 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -68,6 +68,10 @@ struct nfs_fhbase_old { * 1 - 4 byte user specified identifier * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED * 3 - 4 byte device id, encoded for user-space, 4 byte inode number + * 4 - 4 byte inode number and 4 byte uuid + * 5 - 8 byte uuid + * 6 - 16 byte uuid + * 7 - 8 byte inode number and 16 byte uuid * * The fileid_type identified how the file within the filesystem is encoded. * This is (will be) passed to, and set by, the underlying filesystem if it supports -- cgit v1.2.2 From c9233eb7b0b11ef176d4bf68da2ce85464b6ec39 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 20 Oct 2008 11:51:57 -0400 Subject: sunrpc: add sv_maxconn field to svc_serv (try #3) svc_check_conn_limits() attempts to prevent denial of service attacks by having the service close old connections once it reaches a threshold. This threshold is based on the number of threads in the service: (serv->sv_nrthreads + 3) * 20 Once we reach this, we drop the oldest connections and a printk pops to warn the admin that they should increase the number of threads. Increasing the number of threads isn't an option however for services like lockd. We don't want to eliminate this check entirely for such services but we need some way to increase this limit. This patch adds a sv_maxconn field to the svc_serv struct. When it's set to 0, we use the current method to calculate the max number of connections. RPC services can then set this on an as-needed basis. Signed-off-by: Jeff Layton Acked-by: Neil Brown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3afe7fb403b2..3435d24bfe55 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -58,10 +58,13 @@ struct svc_serv { struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; unsigned int sv_nrthreads; /* # of server threads */ + unsigned int sv_maxconn; /* max connections allowed or + * '0' causing max to be based + * on number of threads. */ + unsigned int sv_max_payload; /* datagram payload size */ unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ unsigned int sv_xdrsize; /* XDR buffer size */ - struct list_head sv_permsocks; /* all permanent sockets */ struct list_head sv_tempsocks; /* all temporary sockets */ int sv_tmpcnt; /* count of temporary sockets */ -- cgit v1.2.2 From 7538ce1eb656a1477bedd5b1c202226e7abf5e7b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:19:45 -0500 Subject: NLM: Use modern style for pointer fields in nlm_host Clean up: I'm about to add another "char *" field to the nlm_host structure. The h_name field, for example, uses an older style of declaring a "char *" field. If I match that style for the new field, checkpatch.pl will complain. So, fix pointer fields to use the new style. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 23da3fa69efa..3dbdd353156c 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -43,8 +43,8 @@ struct nlm_host { struct sockaddr_storage h_addr; /* peer address */ size_t h_addrlen; struct sockaddr_storage h_srcaddr; /* our address (optional) */ - struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */ - char * h_name; /* remote hostname */ + struct rpc_clnt *h_rpcclnt; /* RPC client to talk to peer */ + char *h_name; /* remote hostname */ u32 h_version; /* interface version */ unsigned short h_proto; /* transport proto */ unsigned short h_reclaiming : 1, @@ -64,7 +64,7 @@ struct nlm_host { spinlock_t h_lock; struct list_head h_granted; /* Locks in GRANTED state */ struct list_head h_reclaim; /* Locks in RECLAIM state */ - struct nsm_handle * h_nsmhandle; /* NSM status handle */ + struct nsm_handle *h_nsmhandle; /* NSM status handle */ char h_addrbuf[48], /* address eyecatchers */ h_srcaddrbuf[48]; -- cgit v1.2.2 From 1df40b609ad5a622904eb652109c287fe9c93ec5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:19:53 -0500 Subject: NLM: Remove address eye-catcher buffers from nlm_host The h_name field in struct nlm_host is a just copy of h_nsmhandle->sm_name. Likewise, the contents of the h_addrbuf field should be identical to the sm_addrbuf field. The h_srcaddrbuf field is used only in one place for debugging. We can live without this until we get %pI formatting for printk(). Currently these buffers are 48 bytes, but we need to support scope IDs in IPv6 presentation addresses, which means making the buffers even larger. Instead, let's find ways to eliminate them to save space. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 3dbdd353156c..dae22cb4c38d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -65,9 +65,7 @@ struct nlm_host { struct list_head h_granted; /* Locks in GRANTED state */ struct list_head h_reclaim; /* Locks in RECLAIM state */ struct nsm_handle *h_nsmhandle; /* NSM status handle */ - - char h_addrbuf[48], /* address eyecatchers */ - h_srcaddrbuf[48]; + char *h_addrbuf; /* address eyecatcher */ }; struct nsm_handle { -- cgit v1.2.2 From bc995801a09d1fead0bec1356bfd836911c8eed7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:20:08 -0500 Subject: NLM: Support IPv6 scope IDs in nlm_display_address() Scope ID support is needed since the kernel's NSM implementation is about to use these displayed addresses as a mon_name in some cases. When nsm_use_hostnames is zero, without scope ID support NSM will fail to handle peers that contact us via a link-local address. Link-local addresses do not work without an interface ID, which is stored in the sockaddr's sin6_scope_id field. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index dae22cb4c38d..80a0a2cff2b8 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -68,6 +68,14 @@ struct nlm_host { char *h_addrbuf; /* address eyecatcher */ }; +/* + * The largest string sm_addrbuf should hold is a full-size IPv6 address + * (no "::" anywhere) with a scope ID. The buffer size is computed to + * hold eight groups of colon-separated four-hex-digit numbers, a + * percent sign, a scope id (at most 32 bits, in decimal), and NUL. + */ +#define NSM_ADDRBUF ((8 * 4 + 7) + (1 + 10) + 1) + struct nsm_handle { struct list_head sm_link; atomic_t sm_count; @@ -76,7 +84,7 @@ struct nsm_handle { size_t sm_addrlen; unsigned int sm_monitored : 1, sm_sticky : 1; /* don't unmonitor */ - char sm_addrbuf[48]; /* address eyecatcher */ + char sm_addrbuf[NSM_ADDRBUF]; }; /* -- cgit v1.2.2 From f47534f7f0ac7727e05ec4274b764b181df2cf7f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:20:38 -0500 Subject: NSM: Use modern style for sm_name field in nsm_handle Clean up: I'm about to add another "char *" field to the nsm_handle structure. The sm_name field uses an older style of declaring a "char *" field. If I match that style for the new field, checkpatch.pl will complain. So, fix the sm_name field to use the new style. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 80a0a2cff2b8..54dbb458e73c 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -79,7 +79,7 @@ struct nlm_host { struct nsm_handle { struct list_head sm_link; atomic_t sm_count; - char * sm_name; + char *sm_name; struct sockaddr_storage sm_addr; size_t sm_addrlen; unsigned int sm_monitored : 1, -- cgit v1.2.2 From 29ed1407ed81086b778ebf12145b048ac3f7e10e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:20:46 -0500 Subject: NSM: Support IPv6 version of mon_name The "mon_name" argument of the NSMPROC_MON and NSMPROC_UNMON upcalls is a string that contains the hostname or IP address of the remote peer to be notified when this host has rebooted. The sm-notify command uses this identifier to contact the peer when we reboot, so it must be either a well-qualified DNS hostname or a presentation format IP address string. When the "nsm_use_hostnames" sysctl is set to zero, the kernel's NSM provides a presentation format IP address in the "mon_name" argument. Otherwise, the "caller_name" argument from NLM requests is used, which is usually just the DNS hostname of the peer. To support IPv6 addresses for the mon_name argument, we use the nsm_handle's address eye-catcher, which already contains an appropriate presentation format address string. Using the eye-catcher string obviates the need to use a large buffer on the stack to form the presentation address string for the upcall. This patch also addresses a subtle bug. An NSMPROC_MON request and the subsequent NSMPROC_UNMON request for the same peer are required to use the same value for the "mon_name" argument. Otherwise, rpc.statd's NSMPROC_UNMON processing cannot locate the database entry for that peer and remove it. If the setting of nsm_use_hostnames is changed between the time the kernel sends an NSMPROC_MON request and the time it sends the NSMPROC_UNMON request for the same peer, the "mon_name" argument for these two requests may not be the same. This is because the value of "mon_name" is currently chosen at the moment the call is made based on the setting of nsm_use_hostnames To ensure both requests pass identical contents in the "mon_name" argument, we now select which string to use for the argument in the nsm_monitor() function. A pointer to this string is saved in the nsm_handle so it can be used for a subsequent NSMPROC_UNMON upcall. NB: There are other potential problems, such as how nlm_host_rebooted() might behave if nsm_use_hostnames were changed while hosts are still being monitored. This patch does not attempt to address those problems. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 54dbb458e73c..d3c7247d23e8 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -79,6 +79,7 @@ struct nlm_host { struct nsm_handle { struct list_head sm_link; atomic_t sm_count; + char *sm_mon_name; char *sm_name; struct sockaddr_storage sm_addr; size_t sm_addrlen; -- cgit v1.2.2 From 1e49323c4ab044d05bbc68cf13cadcbd4372468c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:21:24 -0500 Subject: NLM: Move the public declaration of nsm_monitor() to lockd.h Clean up. Make the nlm_host argument "const," and move the public declaration to lockd.h with other NSM public function (nsm_release, eg) and global variable declarations. Add a documenting comment. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 4 ++++ include/linux/lockd/sm_inter.h | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index d3c7247d23e8..f15a4f5ccbfb 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -242,6 +242,10 @@ extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, unsigned int, u32); void nsm_release(struct nsm_handle *); +/* + * Host monitoring + */ +int nsm_monitor(const struct nlm_host *host); /* * This is used in garbage collection and resource reclaim diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 5a5448bdb17d..546b6102b0d7 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -41,7 +41,6 @@ struct nsm_res { u32 state; }; -int nsm_monitor(struct nlm_host *); int nsm_unmonitor(struct nlm_host *); extern int nsm_local_state; -- cgit v1.2.2 From c8c23c423dec49cb439697d3dc714e1500ff1610 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:21:31 -0500 Subject: NSM: Release nsmhandle in nlm_destroy_host The nsm_handle's reference count is bumped in nlm_lookup_host(). It should be decremented in nlm_destroy_host() to make it easier to see the balance of these two operations. Move the nsm_release() call to fs/lockd/host.c. The h_nsmhandle pointer is set in nlm_lookup_host(), and never cleared. The nlm_destroy_host() function is never called for the same nlm_host twice, so h_nsmhandle won't ever be NULL when nsm_unmonitor() is called. All references to the nlm_host are gone before it is freed. We can skip making h_nsmhandle NULL just before the nlm_host is deallocated. It's also likely we can remove the h_nsmhandle NULL check in nlmsvc_is_client() as well, but we can do that later when rearchitect- ing the nlm_host cache. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index f15a4f5ccbfb..30a6a9c1ce42 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -240,7 +240,6 @@ void nlm_release_host(struct nlm_host *); void nlm_shutdown_hosts(void); extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, unsigned int, u32); -void nsm_release(struct nsm_handle *); /* * Host monitoring -- cgit v1.2.2 From 356c3eb466fd1a12afd6448d90fba3922836e5f1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:21:38 -0500 Subject: NLM: Move the public declaration of nsm_unmonitor() to lockd.h Clean up. Make the nlm_host argument "const," and move the public declaration to lockd.h. Add a documenting comment. Bruce observed that nsm_unmonitor()'s only caller doesn't care about its return code, so make nsm_unmonitor() return void. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + include/linux/lockd/sm_inter.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 30a6a9c1ce42..38344bfb814a 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -245,6 +245,7 @@ extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, * Host monitoring */ int nsm_monitor(const struct nlm_host *host); +void nsm_unmonitor(const struct nlm_host *host); /* * This is used in garbage collection and resource reclaim diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 546b6102b0d7..896a5e303323 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -41,7 +41,6 @@ struct nsm_res { u32 state; }; -int nsm_unmonitor(struct nlm_host *); extern int nsm_local_state; #endif /* LINUX_LOCKD_SM_INTER_H */ -- cgit v1.2.2 From 9c1bfd037f7ff8badaecb47418f109148d88bf45 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:01:59 -0500 Subject: NSM: Move NSM-related XDR data structures to lockd's xdr.h Clean up: NSM's XDR data structures are used only in fs/lockd/mon.c, so move them there. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/sm_inter.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 896a5e303323..dd9d8a5bb316 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -21,26 +21,6 @@ #define SM_MAXSTRLEN 1024 #define SM_PRIV_SIZE 16 -/* - * Arguments for all calls to statd - */ -struct nsm_args { - __be32 addr; /* remote address */ - u32 prog; /* RPC callback info */ - u32 vers; - u32 proc; - - char * mon_name; -}; - -/* - * Result returned by statd - */ -struct nsm_res { - u32 status; - u32 state; -}; - extern int nsm_local_state; #endif /* LINUX_LOCKD_SM_INTER_H */ -- cgit v1.2.2 From 36e8e668d3e6a61848a8921ddeb663b417299fa5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:02:07 -0500 Subject: NSM: Move NSM program and procedure numbers to fs/lockd/mon.c Clean up: Move the RPC program and procedure numbers for NSM into the one source file that needs them: fs/lockd/mon.c. And, as with NLM, NFS, and rpcbind calls, use NSMPROC_FOO instead of SM_FOO for NSM procedure numbers. Finally, make a couple of comments more precise: what is referred to here as SM_NOTIFY is really the NLM (lockd) NLMPROC_SM_NOTIFY downcall, not NSMPROC_NOTIFY. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/sm_inter.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index dd9d8a5bb316..116bf38535a0 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -9,15 +9,6 @@ #ifndef LINUX_LOCKD_SM_INTER_H #define LINUX_LOCKD_SM_INTER_H -#define SM_PROGRAM 100024 -#define SM_VERSION 1 -#define SM_STAT 1 -#define SM_MON 2 -#define SM_UNMON 3 -#define SM_UNMON_ALL 4 -#define SM_SIMU_CRASH 5 -#define SM_NOTIFY 6 - #define SM_MAXSTRLEN 1024 #define SM_PRIV_SIZE 16 -- cgit v1.2.2 From 67c6d107a689243979a2b5f15244b5261634a924 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:02:45 -0500 Subject: NSM: Move nsm_find() to fs/lockd/mon.c The nsm_find() function sets up fresh nsm_handle entries. This is where we will store the "priv" cookie used to lookup nsm_handles during reboot recovery. The cookie will be constructed when nsm_find() creates a new nsm_handle. As much as possible, I would like to keep everything that handles a "priv" cookie in fs/lockd/mon.c so that all the smarts are in one source file. That organization should make it pretty simple to see how all this works. To me, it makes more sense than the current arrangement to keep nsm_find() with nsm_monitor() and nsm_unmonitor(). So, start reorganizing by moving nsm_find() into fs/lockd/mon.c. The nsm_release() function comes along too, since it shares the nsm_lock global variable. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 38344bfb814a..8d715363c6ac 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -247,6 +247,12 @@ extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, int nsm_monitor(const struct nlm_host *host); void nsm_unmonitor(const struct nlm_host *host); +struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen, + const char *hostname, + const size_t hostname_len, + const int create); +void nsm_release(struct nsm_handle *nsm); + /* * This is used in garbage collection and resource reclaim * A return value != 0 means destroy the lock/block/share -- cgit v1.2.2 From 7e44d3bea21fbb9494930d1cd35ca92a9a4a3279 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:16 -0500 Subject: NSM: Generate NSMPROC_MON's "priv" argument when nsm_handle is created Introduce a new data type, used by both the in-kernel NLM and NSM implementations, that is used to manage the opaque "priv" argument for the NSMPROC_MON and NLMPROC_SM_NOTIFY calls. Construct the "priv" cookie when the nsm_handle is created. The nsm_init_private() function may look a little strange, but it is roughly equivalent to how the XDR encoder formed the "priv" argument. It's going to go away soon. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + include/linux/lockd/sm_inter.h | 1 - include/linux/lockd/xdr.h | 6 ++++++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 8d715363c6ac..194fa8a66398 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -85,6 +85,7 @@ struct nsm_handle { size_t sm_addrlen; unsigned int sm_monitored : 1, sm_sticky : 1; /* don't unmonitor */ + struct nsm_private sm_priv; char sm_addrbuf[NSM_ADDRBUF]; }; diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 116bf38535a0..5cef5a79dd94 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -10,7 +10,6 @@ #define LINUX_LOCKD_SM_INTER_H #define SM_MAXSTRLEN 1024 -#define SM_PRIV_SIZE 16 extern int nsm_local_state; diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index d6b3a802c046..6b5199263858 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -13,6 +13,12 @@ #include #include +#define SM_PRIV_SIZE 16 + +struct nsm_private { + unsigned char data[SM_PRIV_SIZE]; +}; + struct svc_rqst; #define NLM_MAXCOOKIELEN 32 -- cgit v1.2.2 From 7fefc9cb9d5f129c238d93166f705c96ca2e7e51 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:31 -0500 Subject: NLM: Change nlm_host_rebooted() to take a single nlm_reboot argument Pass the nlm_reboot data structure directly from the NLMPROC_SM_NOTIFY XDR decoders to nlm_host_rebooted(). This eliminates some packing and unpacking of the NLMPROC_SM_NOTIFY results, and prepares for passing these results, including the "priv" cookie, directly to a lookup routine in fs/lockd/mon.c. This patch changes code organization but should not cause any behavioral change. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 194fa8a66398..2a3533ea38dd 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -239,8 +239,7 @@ void nlm_rebind_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *); void nlm_release_host(struct nlm_host *); void nlm_shutdown_hosts(void); -extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, - unsigned int, u32); +void nlm_host_rebooted(const struct nlm_reboot *); /* * Host monitoring -- cgit v1.2.2 From 576df4634e37e46b441fefb91915184edb13bb94 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:39 -0500 Subject: NLM: Decode "priv" argument of NLMPROC_SM_NOTIFY as an opaque The NLM XDR decoders for the NLMPROC_SM_NOTIFY procedure should treat their "priv" argument truly as an opaque, as defined by the protocol, and let the upper layers figure out what is in it. This will make it easier to modify the contents and interpretation of the "priv" argument, and keep knowledge about what's in "priv" local to fs/lockd/mon.c. For now, the NLM and NSM implementations should behave exactly as they did before. The formation of the address of the rebooted host in nlm_host_rebooted() may look a little strange, but it is the inverse of how nsm_init_private() forms the private cookie. Plus, it's going away soon anyway. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/xdr.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 6b5199263858..6338866222a8 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -83,10 +83,10 @@ struct nlm_res { * statd callback when client has rebooted */ struct nlm_reboot { - char * mon; - unsigned int len; - u32 state; - __be32 addr; + char *mon; + unsigned int len; + u32 state; + struct nsm_private priv; }; /* -- cgit v1.2.2 From 3420a8c4359a189f7d854ed7075d151257415447 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:46 -0500 Subject: NSM: Add nsm_lookup() function Introduce a new API to fs/lockd/mon.c that allows nlm_host_rebooted() to lookup up nsm_handles via the contents of an nlm_reboot struct. The new function is equivalent to calling nsm_find() with @create set to zero, but it takes a struct nlm_reboot instead of separate arguments. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 2a3533ea38dd..5e3ad926de89 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -251,6 +251,7 @@ struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen, const char *hostname, const size_t hostname_len, const int create); +struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info); void nsm_release(struct nsm_handle *nsm); /* -- cgit v1.2.2 From 92fd91b998a5216a6d6606704e71d541a180216c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:04:01 -0500 Subject: NLM: Remove "create" argument from nsm_find() Clean up: nsm_find() now has only one caller, and that caller unconditionally sets the @create argument. Thus the @create argument is no longer needed. Since nsm_find() now has a more specific purpose, pick a more appropriate name for it. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 5e3ad926de89..1ccd49e97a7f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -247,10 +247,10 @@ void nlm_host_rebooted(const struct nlm_reboot *); int nsm_monitor(const struct nlm_host *host); void nsm_unmonitor(const struct nlm_host *host); -struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen, +struct nsm_handle *nsm_get_handle(const struct sockaddr *sap, + const size_t salen, const char *hostname, - const size_t hostname_len, - const int create); + const size_t hostname_len); struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info); void nsm_release(struct nsm_handle *nsm); -- cgit v1.2.2 From e6765b83977f07983c7a10e6bbb19d6c7bbfc3a4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:14 -0500 Subject: NSM: Remove include/linux/lockd/sm_inter.h Clean up: The include/linux/lockd/sm_inter.h header is nearly empty now. Remove it. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + include/linux/lockd/sm_inter.h | 16 ---------------- include/linux/lockd/xdr.h | 1 + 3 files changed, 2 insertions(+), 16 deletions(-) delete mode 100644 include/linux/lockd/sm_inter.h (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 1ccd49e97a7f..8b57467375cc 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -205,6 +205,7 @@ extern struct svc_procedure nlmsvc_procedures4[]; extern int nlmsvc_grace_period; extern unsigned long nlmsvc_timeout; extern int nsm_use_hostnames; +extern int nsm_local_state; /* * Lockd client functions diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h deleted file mode 100644 index 5cef5a79dd94..000000000000 --- a/include/linux/lockd/sm_inter.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * linux/include/linux/lockd/sm_inter.h - * - * Declarations for the kernel statd client. - * - * Copyright (C) 1996, Olaf Kirch - */ - -#ifndef LINUX_LOCKD_SM_INTER_H -#define LINUX_LOCKD_SM_INTER_H - -#define SM_MAXSTRLEN 1024 - -extern int nsm_local_state; - -#endif /* LINUX_LOCKD_SM_INTER_H */ diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 6338866222a8..7dc5b6cb44cd 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -13,6 +13,7 @@ #include #include +#define SM_MAXSTRLEN 1024 #define SM_PRIV_SIZE 16 struct nsm_private { -- cgit v1.2.2 From 8529bc51d30b8f001734b29b21a51b579c260f5b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:22 -0500 Subject: NSM: Move nsm_addr() to fs/lockd/mon.c Clean up: nsm_addr_in() is no longer used, and nsm_addr() is used only in fs/lockd/mon.c, so move it there. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 8b57467375cc..6ab0449bc828 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -112,16 +112,6 @@ static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host) return (struct sockaddr *)&host->h_srcaddr; } -static inline struct sockaddr_in *nsm_addr_in(const struct nsm_handle *handle) -{ - return (struct sockaddr_in *)&handle->sm_addr; -} - -static inline struct sockaddr *nsm_addr(const struct nsm_handle *handle) -{ - return (struct sockaddr *)&handle->sm_addr; -} - /* * Map an fl_owner_t into a unique 32-bit "pid" */ -- cgit v1.2.2 From d1208f70738c91f13b4eadb1b7a694082e439da2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:44 -0500 Subject: NLM: nlm_privileged_requester() doesn't recognize mapped loopback address Commit b85e4676 added the nlm_privileged_requester() helper to check whether an RPC request was sent from a local privileged caller. It recognizes IPv4 privileged callers (from "127.0.0.1"), and IPv6 privileged callers (from "::1"). However, IPV6_ADDR_LOOPBACK is not set for the mapped IPv4 loopback address (::ffff:7f00:0001), so the test breaks when the kernel's RPC service is IPv6-enabled but user space is calling via the IPv4 loopback address. This is actually the most common case for IPv6- enabled RPC services on Linux. Rewrite the IPv6 check to handle the mapped IPv4 loopback address as well as a normal IPv6 loopback address. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 6ab0449bc828..80d7e8a8257d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -299,8 +299,14 @@ static inline int __nlm_privileged_request4(const struct sockaddr *sap) static inline int __nlm_privileged_request6(const struct sockaddr *sap) { const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; - return (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK) && - (ntohs(sin6->sin6_port) < 1024); + + if (ntohs(sin6->sin6_port) > 1023) + return 0; + + if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) + return ipv4_is_loopback(sin6->sin6_addr.s6_addr32[3]); + + return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK; } #else /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ static inline int __nlm_privileged_request6(const struct sockaddr *sap) -- cgit v1.2.2 From 57ef692588bc225853ca3267ca5b7cea2b07e058 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:52 -0500 Subject: NLM: Rewrite IPv4 privileged requester's check Clean up. For consistency, rewrite the IPv4 check to match the same style as the new IPv6 check. Note that ipv4_is_loopback() is somewhat broader in its interpretation of what is a loopback address than simply "127.0.0.1". Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 80d7e8a8257d..aa6fe7026de7 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -291,8 +291,11 @@ static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) static inline int __nlm_privileged_request4(const struct sockaddr *sap) { const struct sockaddr_in *sin = (struct sockaddr_in *)sap; - return (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) && - (ntohs(sin->sin_port) < 1024); + + if (ntohs(sin->sin_port) > 1023) + return 0; + + return ipv4_is_loopback(sin->sin_addr.s_addr); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -- cgit v1.2.2 From 6f49a57aa5a0c6d4e4e27c85f7af6c83325a12d1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:14 -0700 Subject: dmaengine: up-level reference counting to the module level Simply, if a client wants any dmaengine channel then prevent all dmaengine modules from being removed. Once the clients are done re-enable module removal. Why?, beyond reducing complication: 1/ Tracking reference counts per-transaction in an efficient manner, as is currently done, requires a complicated scheme to avoid cache-line bouncing effects. 2/ Per-transaction ref-counting gives the false impression that a dma-driver can be gracefully removed ahead of its user (net, md, or dma-slave) 3/ None of the in-tree dma-drivers talk to hot pluggable hardware, but if such an engine were built one day we still would not need to notify clients of remove events. The driver can simply return NULL to a ->prep() request, something that is much easier for a client to handle. Reviewed-by: Andrew Morton Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index e4ec7e7b8056..d18d37d1015d 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -165,7 +165,6 @@ struct dma_slave { */ struct dma_chan_percpu { - local_t refcount; /* stats */ unsigned long memcpy_count; unsigned long bytes_transferred; @@ -205,26 +204,6 @@ struct dma_chan { void dma_chan_cleanup(struct kref *kref); -static inline void dma_chan_get(struct dma_chan *chan) -{ - if (unlikely(chan->slow_ref)) - kref_get(&chan->refcount); - else { - local_inc(&(per_cpu_ptr(chan->local, get_cpu())->refcount)); - put_cpu(); - } -} - -static inline void dma_chan_put(struct dma_chan *chan) -{ - if (unlikely(chan->slow_ref)) - kref_put(&chan->refcount, dma_chan_cleanup); - else { - local_dec(&(per_cpu_ptr(chan->local, get_cpu())->refcount)); - put_cpu(); - } -} - /* * typedef dma_event_callback - function pointer to a DMA event callback * For each channel added to the system this routine is called for each client. -- cgit v1.2.2 From bec085134e446577a983f17f57d642a88d1af53b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:14 -0700 Subject: dmaengine: centralize channel allocation, introduce dma_find_channel Allowing multiple clients to each define their own channel allocation scheme quickly leads to a pathological situation. For memory-to-memory offload all clients can share a central allocator. This simply moves the existing async_tx allocator to dmaengine with minimal fixups: * async_tx.c:get_chan_ref_by_cap --> dmaengine.c:nth_chan * async_tx.c:async_tx_rebalance --> dmaengine.c:dma_channel_rebalance * split out common code from async_tx.c:__async_tx_find_channel --> dma_find_channel Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d18d37d1015d..b466f02e2433 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -182,6 +182,7 @@ struct dma_chan_percpu { * @device_node: used to add this to the device chan list * @local: per-cpu pointer to a struct dma_chan_percpu * @client-count: how many clients are using this channel + * @table_count: number of appearances in the mem-to-mem allocation table */ struct dma_chan { struct dma_device *device; @@ -198,6 +199,7 @@ struct dma_chan { struct list_head device_node; struct dma_chan_percpu *local; int client_count; + int table_count; }; #define to_dma_chan(p) container_of(p, struct dma_chan, dev) @@ -468,6 +470,7 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); +struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); /* --- Helper iov-locking functions --- */ -- cgit v1.2.2 From 2ba05622b8b143b0c95968ba59bddfbd6d2f2559 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:14 -0700 Subject: dmaengine: provide a common 'issue_pending_all' implementation async_tx and net_dma each have open-coded versions of issue_pending_all, so provide a common routine in dmaengine. The implementation needs to walk the global device list, so implement rcu to allow dma_issue_pending_all to run lockless. Clients protect themselves from channel removal events by holding a dmaengine reference. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/async_tx.h | 2 +- include/linux/dmaengine.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 1c816775f135..45f6297821bd 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -59,7 +59,7 @@ enum async_tx_flags { }; #ifdef CONFIG_DMA_ENGINE -void async_tx_issue_pending_all(void); +#define async_tx_issue_pending_all dma_issue_pending_all #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include #else diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index b466f02e2433..57a43adfc39e 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -471,6 +471,7 @@ int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); +void dma_issue_pending_all(void); /* --- Helper iov-locking functions --- */ -- cgit v1.2.2 From f67b45999205164958de4ec0658d51fa4bee066d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:15 -0700 Subject: net_dma: convert to dma_find_channel Use the general-purpose channel allocation provided by dmaengine. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/netdevice.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 41e1224651cf..bac2c458d9b8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1113,9 +1113,6 @@ struct softnet_data struct sk_buff *completion_queue; struct napi_struct backlog; -#ifdef CONFIG_NET_DMA - struct dma_chan *net_dma; -#endif }; DECLARE_PER_CPU(struct softnet_data,softnet_data); -- cgit v1.2.2 From 59b5ec21446b9239d706ab237fb261d525b75e81 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:15 -0700 Subject: dmaengine: introduce dma_request_channel and private channels This interface is primarily for device-to-memory clients which need to search for dma channels with platform-specific characteristics. The prototype is: struct dma_chan *dma_request_channel(dma_cap_mask_t mask, dma_filter_fn filter_fn, void *filter_param); When the optional 'filter_fn' parameter is set to NULL dma_request_channel simply returns the first channel that satisfies the capability mask. Otherwise, when the mask parameter is insufficient for specifying the necessary channel, the filter_fn routine can be used to disposition the available channels in the system. The filter_fn routine is called once for each free channel in the system. Upon seeing a suitable channel filter_fn returns DMA_ACK which flags that channel to be the return value from dma_request_channel. A channel allocated via this interface is exclusive to the caller, until dma_release_channel() is called. To ensure that all channels are not consumed by the general-purpose allocator the DMA_PRIVATE capability is provided to exclude a dma_device from general-purpose (memory-to-memory) consideration. Reviewed-by: Andrew Morton Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 57a43adfc39e..fe40bc020af6 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -89,6 +89,7 @@ enum dma_transaction_type { DMA_MEMSET, DMA_MEMCPY_CRC32C, DMA_INTERRUPT, + DMA_PRIVATE, DMA_SLAVE, }; @@ -223,6 +224,18 @@ struct dma_client; typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, struct dma_chan *chan, enum dma_state state); +/** + * typedef dma_filter_fn - callback filter for dma_request_channel + * @chan: channel to be reviewed + * @filter_param: opaque parameter passed through dma_request_channel + * + * When this optional parameter is specified in a call to dma_request_channel a + * suitable channel is passed to this routine for further dispositioning before + * being returned. Where 'suitable' indicates a non-busy channel that + * satisfies the given capability mask. + */ +typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); + /** * struct dma_client - info on the entity making use of DMA services * @event_callback: func ptr to call when something happens @@ -472,6 +485,9 @@ void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); void dma_issue_pending_all(void); +#define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y) +struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param); +void dma_release_channel(struct dma_chan *chan); /* --- Helper iov-locking functions --- */ -- cgit v1.2.2 From 33df8ca068123457db56c316946a3c0e4ef787d6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:15 -0700 Subject: dmatest: convert to dma_request_channel Replace the client registration infrastructure with a custom loop to poll for channels. Once dma_request_channel returns NULL stop asking for channels. A userspace side effect of this change if that loading the dmatest module before loading a dma driver will result in no channels being found, previously dmatest would get a callback. To facilitate testing in the built-in case dmatest_init is marked as a late_initcall. Another side effect is that channels under test can not be used for any other purpose. Cc: Haavard Skinnemoen Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index fe40bc020af6..6f2d070ac7f3 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -400,6 +400,12 @@ __dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp) set_bit(tx_type, dstp->bits); } +#define dma_cap_zero(mask) __dma_cap_zero(&(mask)) +static inline void __dma_cap_zero(dma_cap_mask_t *dstp) +{ + bitmap_zero(dstp->bits, DMA_TX_TYPE_END); +} + #define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask)) static inline int __dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp) -- cgit v1.2.2 From 74465b4ff9ac1da503025c0a0042e023bfa6505c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:16 -0700 Subject: atmel-mci: convert to dma_request_channel and down-level dma_slave dma_request_channel provides an exclusive channel, so we no longer need to pass slave data through dmaengine. Cc: Haavard Skinnemoen Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 38 -------------------------------------- include/linux/dw_dmac.h | 31 +++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6f2d070ac7f3..d63544cf8a1a 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -96,17 +96,6 @@ enum dma_transaction_type { /* last transaction type for creation of the capabilities mask */ #define DMA_TX_TYPE_END (DMA_SLAVE + 1) -/** - * enum dma_slave_width - DMA slave register access width. - * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses - * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses - * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses - */ -enum dma_slave_width { - DMA_SLAVE_WIDTH_8BIT, - DMA_SLAVE_WIDTH_16BIT, - DMA_SLAVE_WIDTH_32BIT, -}; /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, @@ -132,32 +121,6 @@ enum dma_ctrl_flags { */ typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t; -/** - * struct dma_slave - Information about a DMA slave - * @dev: device acting as DMA slave - * @dma_dev: required DMA master device. If non-NULL, the client can not be - * bound to other masters than this. - * @tx_reg: physical address of data register used for - * memory-to-peripheral transfers - * @rx_reg: physical address of data register used for - * peripheral-to-memory transfers - * @reg_width: peripheral register width - * - * If dma_dev is non-NULL, the client can not be bound to other DMA - * masters than the one corresponding to this device. The DMA master - * driver may use this to determine if there is controller-specific - * data wrapped around this struct. Drivers of platform code that sets - * the dma_dev field must therefore make sure to use an appropriate - * controller-specific dma slave structure wrapping this struct. - */ -struct dma_slave { - struct device *dev; - struct device *dma_dev; - dma_addr_t tx_reg; - dma_addr_t rx_reg; - enum dma_slave_width reg_width; -}; - /** * struct dma_chan_percpu - the per-CPU part of struct dma_chan * @refcount: local_t used for open-coded "bigref" counting @@ -248,7 +211,6 @@ typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filt struct dma_client { dma_event_callback event_callback; dma_cap_mask_t cap_mask; - struct dma_slave *slave; struct list_head global_node; }; diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h index 04d217b442bf..d797dde247f7 100644 --- a/include/linux/dw_dmac.h +++ b/include/linux/dw_dmac.h @@ -21,15 +21,35 @@ struct dw_dma_platform_data { unsigned int nr_channels; }; +/** + * enum dw_dma_slave_width - DMA slave register access width. + * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses + * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses + * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses + */ +enum dw_dma_slave_width { + DW_DMA_SLAVE_WIDTH_8BIT, + DW_DMA_SLAVE_WIDTH_16BIT, + DW_DMA_SLAVE_WIDTH_32BIT, +}; + /** * struct dw_dma_slave - Controller-specific information about a slave - * @slave: Generic information about the slave - * @ctl_lo: Platform-specific initializer for the CTL_LO register + * + * @dma_dev: required DMA master device + * @tx_reg: physical address of data register used for + * memory-to-peripheral transfers + * @rx_reg: physical address of data register used for + * peripheral-to-memory transfers + * @reg_width: peripheral register width * @cfg_hi: Platform-specific initializer for the CFG_HI register * @cfg_lo: Platform-specific initializer for the CFG_LO register */ struct dw_dma_slave { - struct dma_slave slave; + struct device *dma_dev; + dma_addr_t tx_reg; + dma_addr_t rx_reg; + enum dw_dma_slave_width reg_width; u32 cfg_hi; u32 cfg_lo; }; @@ -54,9 +74,4 @@ struct dw_dma_slave { #define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ #define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ -static inline struct dw_dma_slave *to_dw_dma_slave(struct dma_slave *slave) -{ - return container_of(slave, struct dw_dma_slave, slave); -} - #endif /* DW_DMAC_H */ -- cgit v1.2.2 From 209b84a88fe81341b4d8d465acc4a67cb7c3feb3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:17 -0700 Subject: dmaengine: replace dma_async_client_register with dmaengine_get Now that clients no longer need to be notified of channel arrival dma_async_client_register can simply increment the dmaengine_ref_count. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d63544cf8a1a..37d95db156d3 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -318,8 +318,8 @@ struct dma_device { /* --- public DMA engine API --- */ -void dma_async_client_register(struct dma_client *client); -void dma_async_client_unregister(struct dma_client *client); +void dmaengine_get(void); +void dmaengine_put(void); void dma_async_client_chan_request(struct dma_client *client); dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); -- cgit v1.2.2 From aa1e6f1a385eb2b04171ec841f3b760091e4a8ee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:17 -0700 Subject: dmaengine: kill struct dma_client and supporting infrastructure All users have been converted to either the general-purpose allocator, dma_find_channel, or dma_request_channel. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 50 +---------------------------------------------- 1 file changed, 1 insertion(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 37d95db156d3..db050e97d2b4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -28,20 +28,6 @@ #include #include -/** - * enum dma_state - resource PNP/power management state - * @DMA_RESOURCE_SUSPEND: DMA device going into low power state - * @DMA_RESOURCE_RESUME: DMA device returning to full power - * @DMA_RESOURCE_AVAILABLE: DMA device available to the system - * @DMA_RESOURCE_REMOVED: DMA device removed from the system - */ -enum dma_state { - DMA_RESOURCE_SUSPEND, - DMA_RESOURCE_RESUME, - DMA_RESOURCE_AVAILABLE, - DMA_RESOURCE_REMOVED, -}; - /** * enum dma_state_client - state of the channel in the client * @DMA_ACK: client would like to use, or was using this channel @@ -170,23 +156,6 @@ struct dma_chan { void dma_chan_cleanup(struct kref *kref); -/* - * typedef dma_event_callback - function pointer to a DMA event callback - * For each channel added to the system this routine is called for each client. - * If the client would like to use the channel it returns '1' to signal (ack) - * the dmaengine core to take out a reference on the channel and its - * corresponding device. A client must not 'ack' an available channel more - * than once. When a channel is removed all clients are notified. If a client - * is using the channel it must 'ack' the removal. A client must not 'ack' a - * removed channel more than once. - * @client - 'this' pointer for the client context - * @chan - channel to be acted upon - * @state - available or removed - */ -struct dma_client; -typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, - struct dma_chan *chan, enum dma_state state); - /** * typedef dma_filter_fn - callback filter for dma_request_channel * @chan: channel to be reviewed @@ -199,21 +168,6 @@ typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, */ typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); -/** - * struct dma_client - info on the entity making use of DMA services - * @event_callback: func ptr to call when something happens - * @cap_mask: only return channels that satisfy the requested capabilities - * a value of zero corresponds to any capability - * @slave: data for preparing slave transfer. Must be non-NULL iff the - * DMA_SLAVE capability is requested. - * @global_node: list_head for global dma_client_list - */ -struct dma_client { - dma_event_callback event_callback; - dma_cap_mask_t cap_mask; - struct list_head global_node; -}; - typedef void (*dma_async_tx_callback)(void *dma_async_param); /** * struct dma_async_tx_descriptor - async transaction descriptor @@ -285,8 +239,7 @@ struct dma_device { int dev_id; struct device *dev; - int (*device_alloc_chan_resources)(struct dma_chan *chan, - struct dma_client *client); + int (*device_alloc_chan_resources)(struct dma_chan *chan); void (*device_free_chan_resources)(struct dma_chan *chan); struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( @@ -320,7 +273,6 @@ struct dma_device { void dmaengine_get(void); void dmaengine_put(void); -void dma_async_client_chan_request(struct dma_client *client); dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, -- cgit v1.2.2 From f27c580c3628d79b17f38976d842a6d7f3616e2e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:18 -0700 Subject: dmaengine: remove 'bigref' infrastructure Reference counting is done at the module level so clients need not worry that a channel will leave while they are actively using dmaengine. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index db050e97d2b4..bca2fc758894 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -142,10 +142,6 @@ struct dma_chan { int chan_id; struct device dev; - struct kref refcount; - int slow_ref; - struct rcu_head rcu; - struct list_head device_node; struct dma_chan_percpu *local; int client_count; @@ -233,9 +229,6 @@ struct dma_device { dma_cap_mask_t cap_mask; int max_xor; - struct kref refcount; - struct completion done; - int dev_id; struct device *dev; -- cgit v1.2.2 From 7dd602510128d7a64b11ff3b7d4f30ac8e3946ce Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:19 -0700 Subject: dmaengine: kill enum dma_state_client DMA_NAK is now useless. We can just use a bool instead. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index bca2fc758894..1419a5094478 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -28,18 +28,6 @@ #include #include -/** - * enum dma_state_client - state of the channel in the client - * @DMA_ACK: client would like to use, or was using this channel - * @DMA_DUP: client has already seen this channel, or is not using this channel - * @DMA_NAK: client does not want to see any more channels - */ -enum dma_state_client { - DMA_ACK, - DMA_DUP, - DMA_NAK, -}; - /** * typedef dma_cookie_t - an opaque DMA cookie * @@ -160,9 +148,10 @@ void dma_chan_cleanup(struct kref *kref); * When this optional parameter is specified in a call to dma_request_channel a * suitable channel is passed to this routine for further dispositioning before * being returned. Where 'suitable' indicates a non-busy channel that - * satisfies the given capability mask. + * satisfies the given capability mask. It returns 'true' to indicate that the + * channel is suitable. */ -typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); +typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); typedef void (*dma_async_tx_callback)(void *dma_async_param); /** -- cgit v1.2.2 From 41d5e59c1299f27983977bcfe3b360600996051c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:21 -0700 Subject: dmaengine: add a release for dma class devices and dependent infrastructure Resolves: WARNING: at drivers/base/core.c:122 device_release+0x4d/0x52() Device 'dma0chan0' does not have a release() function, it is broken and must be fixed. The dma_chan_dev object is introduced to gear-match sysfs kobject and dmaengine channel lifetimes. When a channel is removed access to the sysfs entries return -ENODEV until the kobject can be released. The bulk of the change is updates to existing code to handle the extra layer of indirection between a dma_chan and its struct device. Reported-by: Alexander Beregalov Acked-by: Stephen Hemminger Cc: Haavard Skinnemoen Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 1419a5094478..d6b6bff355f4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -113,7 +113,7 @@ struct dma_chan_percpu { * @device: ptr to the dma device who supplies this channel, always !%NULL * @cookie: last cookie value returned to client * @chan_id: channel ID for sysfs - * @class_dev: class device for sysfs + * @dev: class device for sysfs * @refcount: kref, used in "bigref" slow-mode * @slow_ref: indicates that the DMA channel is free * @rcu: the DMA channel's RCU head @@ -128,7 +128,7 @@ struct dma_chan { /* sysfs */ int chan_id; - struct device dev; + struct dma_chan_dev *dev; struct list_head device_node; struct dma_chan_percpu *local; @@ -136,7 +136,20 @@ struct dma_chan { int table_count; }; -#define to_dma_chan(p) container_of(p, struct dma_chan, dev) +/** + * struct dma_chan_dev - relate sysfs device node to backing channel device + * @chan - driver channel device + * @device - sysfs device + */ +struct dma_chan_dev { + struct dma_chan *chan; + struct device device; +}; + +static inline const char *dma_chan_name(struct dma_chan *chan) +{ + return dev_name(&chan->dev->device); +} void dma_chan_cleanup(struct kref *kref); -- cgit v1.2.2 From 864498aaa9fef69ee166da023d12413a7776342d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:21 -0700 Subject: dmaengine: use idr for registering dma device numbers This brings some predictability to dma device numbers, i.e. an rmmod/insmod cycle may now result in /sys/class/dma/dma0chan0 being restored rather than /sys/class/dma/dma1chan0 appearing. Cc: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d6b6bff355f4..64dea2ab326c 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -140,10 +140,14 @@ struct dma_chan { * struct dma_chan_dev - relate sysfs device node to backing channel device * @chan - driver channel device * @device - sysfs device + * @dev_id - parent dma_device dev_id + * @idr_ref - reference count to gate release of dma_device dev_id */ struct dma_chan_dev { struct dma_chan *chan; struct device device; + int dev_id; + atomic_t *idr_ref; }; static inline const char *dma_chan_name(struct dma_chan *chan) -- cgit v1.2.2 From 96e93eab20337d063c70d537bd7bc70d90e04fa3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 6 Jan 2009 10:49:34 -0800 Subject: gro: Add internal interfaces for VLAN Previously GRO's only entry point from the outside is through napi_gro_receive and napi_gro_frags. These interfaces are for device drivers. This patch rearranges things to provide a new set of interfaces for VLANs. These interfaces are for internal use only. The VLAN code itself can then provide a set of entry points for device drivers. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c28bbba3c23d..114091be8872 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1373,8 +1373,14 @@ extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); extern void napi_gro_flush(struct napi_struct *napi); +extern int dev_gro_receive(struct napi_struct *napi, + struct sk_buff *skb); extern int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); +extern void napi_reuse_skb(struct napi_struct *napi, + struct sk_buff *skb); +extern struct sk_buff * napi_fraginfo_skb(struct napi_struct *napi, + struct napi_gro_fraginfo *info); extern int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info); extern void netif_nit_deliver(struct sk_buff *skb); -- cgit v1.2.2 From e1c096e251e52773afeffbbcb74d0a072be47ea3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 6 Jan 2009 10:50:09 -0800 Subject: vlan: Add GRO interfaces This patch adds GRO interfaces for hardware-assisted VLAN reception. With this in place we're now at parity with LRO as far as the interface is concerned. That is, you can now take any LRO driver and convert it over to GRO. As the CB memory clashes with GRO's use of CB, I've removed it entirely by storing dev in skb->dev. This is OK because VLAN gets called first thing in netif_receive_skb and skb->dev is not used in between us calling netif_rx and netif_receive_skb getting called. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a5cb0c3f6dcf..f8ff918c208f 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -115,6 +115,11 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); extern int vlan_hwaccel_do_receive(struct sk_buff *skb); +extern int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb); +extern int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, + struct napi_gro_fraginfo *info); #else static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) @@ -140,6 +145,20 @@ static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) { return 0; } + +static inline int vlan_gro_receive(struct napi_struct *napi, + struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb) +{ + return NET_RX_DROP; +} + +static inline int vlan_gro_frags(struct napi_struct *napi, + struct vlan_group *grp, unsigned int vlan_tci, + struct napi_gro_fraginfo *info) +{ + return NET_RX_DROP; +} #endif /** -- cgit v1.2.2 From 1fa17d4ba43d7e5aab5e90777b07da06524f6748 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 6 Jan 2009 11:07:54 -0800 Subject: can: omit unneeded skb_clone() calls The AF_CAN core delivered always cloned sk_buffs to the AF_CAN protocols, although this was _only_ needed by the can-raw protocol. With this (additionally documented) change, the AF_CAN core calls the callback functions of the registered AF_CAN protocols with the original (uncloned) sk_buff pointer and let's the can-raw protocol do the skb_clone() itself which omits all unneeded skb_clone() calls for other AF_CAN protocols. Signed-off-by: Oliver Hartkopp Signed-off-by: Urs Thuermann Signed-off-by: David S. Miller --- include/linux/can/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index f50785ad4781..25085cbadcfc 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -19,7 +19,7 @@ #include #include -#define CAN_VERSION "20081130" +#define CAN_VERSION "20090105" /* increment this number each time you change some user-space interface */ #define CAN_ABI_VERSION "8" -- cgit v1.2.2 From 22a9d645677feefd402befd02edd59b122289ef1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 7 Jan 2009 08:45:46 -0800 Subject: async: Asynchronous function calls to speed up kernel boot Right now, most of the kernel boot is strictly synchronous, such that various hardware delays are done sequentially. In order to make the kernel boot faster, this patch introduces infrastructure to allow doing some of the initialization steps asynchronously, which will hide significant portions of the hardware delays in practice. In order to not change device order and other similar observables, this patch does NOT do full parallel initialization. Rather, it operates more in the way an out of order CPU does; the work may be done out of order and asynchronous, but the observable effects (instruction retiring for the CPU) are still done in the original sequence. Signed-off-by: Arjan van de Ven --- include/linux/async.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/async.h (limited to 'include/linux') diff --git a/include/linux/async.h b/include/linux/async.h new file mode 100644 index 000000000000..c4ecacd0b327 --- /dev/null +++ b/include/linux/async.h @@ -0,0 +1,25 @@ +/* + * async.h: Asynchronous function calls for boot performance + * + * (C) Copyright 2009 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include + +typedef u64 async_cookie_t; +typedef void (async_func_ptr) (void *data, async_cookie_t cookie); + +extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data); +extern async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *list); +extern void async_synchronize_full(void); +extern void async_synchronize_full_special(struct list_head *list); +extern void async_synchronize_cookie(async_cookie_t cookie); +extern void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *list); + -- cgit v1.2.2 From efaee192063a54749c56b7383803e16fe553630e Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 6 Jan 2009 07:20:54 -0800 Subject: async: make the final inode deletion an asynchronous event this makes "rm -rf" on a (names cached) kernel tree go from 11.6 to 8.6 seconds on an ext3 filesystem Signed-off-by: Arjan van de Ven --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7eba77f666e..e38a64d71eff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1184,6 +1184,11 @@ struct super_block { * generic_show_options() */ char *s_options; + + /* + * storage for asynchronous operations + */ + struct list_head s_async_list; }; extern struct timespec current_fs_time(struct super_block *sb); -- cgit v1.2.2 From b92a78e582b1a45649143dc86e526f5824092478 Mon Sep 17 00:00:00 2001 From: Rodolfo Giometti Date: Thu, 23 Oct 2008 10:08:07 +0200 Subject: usb host: Oxford OXU210HP HCD driver. This driver implements the support for Oxford OXU210HP USB high-speed host, no peripheral nor OTG. Signed-off-by: Rodolfo Giometti Cc: Kan Liu Signed-off-by: Greg Kroah-Hartman --- include/linux/oxu210hp.h | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 include/linux/oxu210hp.h (limited to 'include/linux') diff --git a/include/linux/oxu210hp.h b/include/linux/oxu210hp.h new file mode 100644 index 000000000000..0bf96eae5389 --- /dev/null +++ b/include/linux/oxu210hp.h @@ -0,0 +1,7 @@ +/* platform data for the OXU210HP HCD */ + +struct oxu210hp_platform_data { + unsigned int bus16:1; + unsigned int use_hcd_otg:1; + unsigned int use_hcd_sph:1; +}; -- cgit v1.2.2 From d767d888750a8e15656b7ee15d68f90a151b8936 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 6 Nov 2008 22:32:15 -0800 Subject: USB: wusb: annotate association types withe proper endianness Also a trivial annotation in rh.c for: drivers/usb/wusbcore/rh.c:366:9: warning: incorrect type in assignment (different base types) drivers/usb/wusbcore/rh.c:366:9: expected unsigned short [unsigned] [short] [usertype] drivers/usb/wusbcore/rh.c:366:9: got restricted __le16 [usertype] drivers/usb/wusbcore/rh.c:367:9: warning: incorrect type in assignment (different base types) drivers/usb/wusbcore/rh.c:367:9: expected unsigned short [unsigned] [short] [usertype] drivers/usb/wusbcore/rh.c:367:9: got restricted __le16 [usertype] Association types annotation fixes piles of warnings similar to: drivers/usb/wusbcore/cbaf.c:238:30: warning: incorrect type in initializer (different base types) drivers/usb/wusbcore/cbaf.c:238:30: expected restricted __le16 [usertype] id drivers/usb/wusbcore/cbaf.c:238:30: got int drivers/usb/wusbcore/cbaf.c:238:30: warning: incorrect type in initializer (different base types) drivers/usb/wusbcore/cbaf.c:238:30: expected restricted __le16 [usertype] len drivers/usb/wusbcore/cbaf.c:238:30: got int Signed-off-by: Harvey Harrison Cc: David Vrabel Cc: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/association.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/association.h b/include/linux/usb/association.h index 07c5e3cf5898..0a4a18b3c1bb 100644 --- a/include/linux/usb/association.h +++ b/include/linux/usb/association.h @@ -28,17 +28,17 @@ struct wusb_am_attr { }; /* Different fields defined by the spec */ -#define WUSB_AR_AssociationTypeId { .id = 0x0000, .len = 2 } -#define WUSB_AR_AssociationSubTypeId { .id = 0x0001, .len = 2 } -#define WUSB_AR_Length { .id = 0x0002, .len = 4 } -#define WUSB_AR_AssociationStatus { .id = 0x0004, .len = 4 } -#define WUSB_AR_LangID { .id = 0x0008, .len = 2 } -#define WUSB_AR_DeviceFriendlyName { .id = 0x000b, .len = 64 } /* max */ -#define WUSB_AR_HostFriendlyName { .id = 0x000c, .len = 64 } /* max */ -#define WUSB_AR_CHID { .id = 0x1000, .len = 16 } -#define WUSB_AR_CDID { .id = 0x1001, .len = 16 } -#define WUSB_AR_ConnectionContext { .id = 0x1002, .len = 48 } -#define WUSB_AR_BandGroups { .id = 0x1004, .len = 2 } +#define WUSB_AR_AssociationTypeId { .id = cpu_to_le16(0x0000), .len = cpu_to_le16(2) } +#define WUSB_AR_AssociationSubTypeId { .id = cpu_to_le16(0x0001), .len = cpu_to_le16(2) } +#define WUSB_AR_Length { .id = cpu_to_le16(0x0002), .len = cpu_to_le16(4) } +#define WUSB_AR_AssociationStatus { .id = cpu_to_le16(0x0004), .len = cpu_to_le16(4) } +#define WUSB_AR_LangID { .id = cpu_to_le16(0x0008), .len = cpu_to_le16(2) } +#define WUSB_AR_DeviceFriendlyName { .id = cpu_to_le16(0x000b), .len = cpu_to_le16(64) } /* max */ +#define WUSB_AR_HostFriendlyName { .id = cpu_to_le16(0x000c), .len = cpu_to_le16(64) } /* max */ +#define WUSB_AR_CHID { .id = cpu_to_le16(0x1000), .len = cpu_to_le16(16) } +#define WUSB_AR_CDID { .id = cpu_to_le16(0x1001), .len = cpu_to_le16(16) } +#define WUSB_AR_ConnectionContext { .id = cpu_to_le16(0x1002), .len = cpu_to_le16(48) } +#define WUSB_AR_BandGroups { .id = cpu_to_le16(0x1004), .len = cpu_to_le16(2) } /* CBAF Control Requests (AMS1.0[T4-1] */ enum { -- cgit v1.2.2 From 9ac39f28b5237a629e41ccfc1f73d3a55723045c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 12 Nov 2008 16:19:49 -0500 Subject: USB: add asynchronous autosuspend/autoresume support This patch (as1160b) adds support routines for asynchronous autosuspend and autoresume, with accompanying documentation updates. There already are several potential users of this interface, and others are likely to arise as autosuspend support becomes more widespread. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index f72aa51f7bcd..859a88e6ce9c 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -398,6 +398,7 @@ struct usb_tt; * @urbnum: number of URBs submitted for the whole device * @active_duration: total time device is not suspended * @autosuspend: for delayed autosuspends + * @autoresume: for autoresumes requested while in_interrupt * @pm_mutex: protects PM operations * @last_busy: time of last use * @autosuspend_delay: in jiffies @@ -476,6 +477,7 @@ struct usb_device { #ifdef CONFIG_PM struct delayed_work autosuspend; + struct work_struct autoresume; struct mutex pm_mutex; unsigned long last_busy; @@ -513,6 +515,8 @@ extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id); extern int usb_autopm_set_interface(struct usb_interface *intf); extern int usb_autopm_get_interface(struct usb_interface *intf); extern void usb_autopm_put_interface(struct usb_interface *intf); +extern int usb_autopm_get_interface_async(struct usb_interface *intf); +extern void usb_autopm_put_interface_async(struct usb_interface *intf); static inline void usb_autopm_enable(struct usb_interface *intf) { @@ -539,8 +543,13 @@ static inline int usb_autopm_set_interface(struct usb_interface *intf) static inline int usb_autopm_get_interface(struct usb_interface *intf) { return 0; } +static inline int usb_autopm_get_interface_async(struct usb_interface *intf) +{ return 0; } + static inline void usb_autopm_put_interface(struct usb_interface *intf) { } +static inline void usb_autopm_put_interface_async(struct usb_interface *intf) +{ } static inline void usb_autopm_enable(struct usb_interface *intf) { } static inline void usb_autopm_disable(struct usb_interface *intf) -- cgit v1.2.2 From dc023dceec861c60bc1d1a17a2c6496ddac26ee7 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Thu, 13 Nov 2008 10:31:35 -0800 Subject: USB: Introduce usb_queue_reset() to do resets from atomic contexts This patch introduces a new call to be able to do a USB reset from an atomic contect. This is quite helpful in USB callbacks to handle errors (when the only thing that can be done is to do a device reset). It is done queuing a work struct that will do the actual reset. The struct is "attached" to an interface so pending requests from an interface are removed when said interface is unbound from the driver. The call flow then becomes: usb_queue_reset_device() __usb_queue_reset_device() [workqueue] usb_reset_device() usb_probe_interface() usb_cancel_queue_reset() [error path] usb_unbind_interface() usb_cancel_queue_reset() usb_driver_release_interface() usb_cancel_queue_reset() Note usb_cancel_queue_reset() needs smarts to try not to unqueue when it is actually being executed. This happens when we run the reset from the workqueue: usb_reset_device() is called and on interface unbind time, usb_cancel_queue_reset() would be called. That would deadlock on cancel_work_sync(). To avoid that, we set (before running usb_reset_device()) usb_intf->reset_running and clear it inmediately after returning. Patch is against 2.6.28-rc2 and depends on http://marc.info/?l=linux-usb&m=122581634925308&w=2 (as submitted by Alan Stern). Signed-off-by: Inaky Perez-Gonzalez Cc: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 859a88e6ce9c..c8e55aa979de 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -120,6 +120,11 @@ enum usb_interface_condition { * to the sysfs representation for that device. * @pm_usage_cnt: PM usage counter for this interface; autosuspend is not * allowed unless the counter is 0. + * @reset_ws: Used for scheduling resets from atomic context. + * @reset_running: set to 1 if the interface is currently running a + * queued reset so that usb_cancel_queued_reset() doesn't try to + * remove from the workqueue when running inside the worker + * thread. See __usb_queue_reset_device(). * * USB device drivers attach to interfaces on a physical device. Each * interface encapsulates a single high level function, such as feeding @@ -168,10 +173,12 @@ struct usb_interface { unsigned needs_remote_wakeup:1; /* driver requires remote wakeup */ unsigned needs_altsetting0:1; /* switch to altsetting 0 is pending */ unsigned needs_binding:1; /* needs delayed unbind/rebind */ + unsigned reset_running:1; struct device dev; /* interface specific device info */ struct device *usb_dev; int pm_usage_cnt; /* usage counter for autosuspend */ + struct work_struct reset_ws; /* for resets in atomic context */ }; #define to_usb_interface(d) container_of(d, struct usb_interface, dev) #define interface_to_usbdev(intf) \ @@ -507,6 +514,7 @@ extern int usb_lock_device_for_reset(struct usb_device *udev, /* USB port reset for device reinitialization */ extern int usb_reset_device(struct usb_device *dev); +extern void usb_queue_reset_device(struct usb_interface *dev); extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id); -- cgit v1.2.2 From f150fa1afbf69a87f54752579ff2bb769aad88b3 Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Thu, 13 Nov 2008 21:31:21 -0700 Subject: USB: Allow usbmon as a module even if usbcore is builtin usbmon can only be built as a module if usbcore is a module too. Trivial changes to the relevant Kconfig and Makefile (and a few trivial changes elsewhere) allow usbmon to be built as a module even if usbcore is builtin. This is verified to work in all 9 permutations (3 correctly prohibited by Kconfig, 6 build a suitable result). Signed-off-by: Paul Bolle Signed-off-by: Pete Zaitcev Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index c8e55aa979de..8bc81bffc195 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -336,7 +336,7 @@ struct usb_bus { #endif struct device *dev; /* device for this bus */ -#if defined(CONFIG_USB_MON) +#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE) struct mon_bus *mon_bus; /* non-null when associated */ int monitored; /* non-zero when monitored */ #endif -- cgit v1.2.2 From 1537e0ad944acf3a4c2b311a646d7993b89499f7 Mon Sep 17 00:00:00 2001 From: Ben Efros Date: Tue, 18 Nov 2008 13:31:13 -0800 Subject: USB: storage devices and SAT Add the SANE SENSE flag to indicate that a device is capable of handling more than 18-bytes of sense data. This functionality is required for USB-ATA bridges implementing SAT. A future patch will actually enable this function for several devices. The logic behind this is that we can detect support for SANE_SENSE in a few ways: 1) ATA PASS THROUGH (12) or (16) execute successfully 2) SPC-3 or higher is in use 3) A previous CHECK CONDITION occurred with sense format 70-73 and had a length greater than 18-bytes total Signed-off-by: Ben Efros Signed-off-by: Matthew Dharm Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index d9a3bbe38e6b..998e5cbbf29e 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -52,8 +52,9 @@ US_FLAG(MAX_SECTORS_MIN,0x00002000) \ /* Sets max_sectors to arch min */ \ US_FLAG(BULK_IGNORE_TAG,0x00004000) \ - /* Ignore tag mismatch in bulk operations */ - + /* Ignore tag mismatch in bulk operations */ \ + US_FLAG(SANE_SENSE, 0x00008000) + /* Sane Sense (> 18 bytes) */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.2 From 6084f1bf0c51a99cbba612ee90a4607cffb8b042 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 24 Nov 2008 12:00:01 -0800 Subject: USB: otg: gpio_vbus transceiver stub gpio_vbus provides simple GPIO VBUS sensing for peripheral controllers with an internal transceiver. Optionally, a second GPIO can be used to control D+ pullup. It also interfaces with the regulator framework to limit charging currents when powered via USB. gpio_vbus requests the regulator supplying "vbus_draw" and can enable/disable it or limit its current depending on USB state. [dbrownell@users.sourceforge.net: use drivers/otg, cleanups ] Signed-off-by: Philipp Zabel Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gpio_vbus.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/linux/usb/gpio_vbus.h (limited to 'include/linux') diff --git a/include/linux/usb/gpio_vbus.h b/include/linux/usb/gpio_vbus.h new file mode 100644 index 000000000000..d9f03ccc2d60 --- /dev/null +++ b/include/linux/usb/gpio_vbus.h @@ -0,0 +1,30 @@ +/* + * A simple GPIO VBUS sensing driver for B peripheral only devices + * with internal transceivers. + * Optionally D+ pullup can be controlled by a second GPIO. + * + * Copyright (c) 2008 Philipp Zabel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +/** + * struct gpio_vbus_mach_info - configuration for gpio_vbus + * @gpio_vbus: VBUS sensing GPIO + * @gpio_pullup: optional D+ or D- pullup GPIO (else negative/invalid) + * @gpio_vbus_inverted: true if gpio_vbus is active low + * @gpio_pullup_inverted: true if gpio_pullup is active low + * + * The VBUS sensing GPIO should have a pulldown, which will normally be + * part of a resistor ladder turning a 4.0V-5.25V level on VBUS into a + * value the GPIO detects as active. Some systems will use comparators. + */ +struct gpio_vbus_mach_info { + int gpio_vbus; + int gpio_pullup; + bool gpio_vbus_inverted; + bool gpio_pullup_inverted; +}; -- cgit v1.2.2 From 68144e0cc92125f41157ede7b060f83367bc4fe7 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 24 Nov 2008 12:01:17 -0800 Subject: USB: otg: add otg_put_transceiver() As Russell King points out, calling put_device(otg_transceiver->dev) directly in driver cleanup paths makes assumptions about otg_transceiver internals. Signed-off-by: Philipp Zabel Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/otg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h index 1db25d152ad8..94df4fe6c6c0 100644 --- a/include/linux/usb/otg.h +++ b/include/linux/usb/otg.h @@ -84,6 +84,7 @@ extern int otg_set_transceiver(struct otg_transceiver *); /* for usb host and peripheral controller drivers */ extern struct otg_transceiver *otg_get_transceiver(void); +extern void otg_put_transceiver(struct otg_transceiver *); static inline int otg_start_hnp(struct otg_transceiver *otg) -- cgit v1.2.2 From 65bfd2967c906ca322a4bb69a285fe0de8916ac6 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 25 Nov 2008 16:39:18 -0500 Subject: USB: Enhance usage of pm_message_t This patch (as1177) modifies the USB core suspend and resume routines. The resume functions now will take a pm_message_t argument, so they will know what sort of resume is occurring. The new argument is also passed to the port suspend/resume and bus suspend/resume routines (although they don't use it for anything but debugging). In addition, special pm_message_t values are used for user-initiated, device-initiated (i.e., remote wakeup), and automatic suspend/resume. By testing these values, drivers can tell whether or not a particular suspend was an autosuspend. Unfortunately, they can't do the same for resumes -- not until the pm_message_t argument is also passed to the drivers' resume methods. That will require a bigger change. IMO, the whole Power Management framework should have been set up this way in the first place. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 8bc81bffc195..74d0b9990c73 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1067,7 +1067,7 @@ struct usb_device_driver { void (*disconnect) (struct usb_device *udev); int (*suspend) (struct usb_device *udev, pm_message_t message); - int (*resume) (struct usb_device *udev); + int (*resume) (struct usb_device *udev, pm_message_t message); struct usbdrv_wrap drvwrap; unsigned int supports_autosuspend:1; }; -- cgit v1.2.2 From 2ffcdb3bdadaf8260986e96384df26c94a6ad42c Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Tue, 2 Dec 2008 21:33:43 +0200 Subject: USB: musb: use new platform data interface of musb to replace old one Signed-off-by: Bryan Wu Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/musb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index 630962c04ca4..d6aad0ea6033 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -47,6 +47,11 @@ struct musb_hdrc_config { u8 ram_bits; /* ram address size */ struct musb_hdrc_eps_bits *eps_bits; +#ifdef CONFIG_BLACKFIN + /* A GPIO controlling VRSEL in Blackfin */ + unsigned int gpio_vrsel; +#endif + }; struct musb_hdrc_platform_data { -- cgit v1.2.2 From 3b23dd6f8a718e5339de4f7d86ce76a078b5f771 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 5 Dec 2008 14:10:34 -0500 Subject: USB: utilize the bus notifiers This patch (as1185) makes usbcore take advantage of the bus notifications sent out by the driver core. Now we can create all our device and interface attribute files before the device or interface uevent is broadcast. A side effect is that we no longer create the endpoint "pseudo" devices at the same time as a device or interface is registered -- it seems like a bad idea to try registering an endpoint before the registration of its parent is complete. So the routines for creating and removing endpoint devices have been split out and renamed, and they are called explicitly when needed. A new bitflag is used for keeping track of whether or not the interface's endpoint devices have been created, since (just as with the interface attributes) they vary with the altsetting and hence can be changed at random times. Signed-off-by: Alan Stern Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 74d0b9990c73..e9d63562325a 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -108,6 +108,7 @@ enum usb_interface_condition { * (in probe()), bound to a driver, or unbinding (in disconnect()) * @is_active: flag set when the interface is bound and not suspended. * @sysfs_files_created: sysfs attributes exist + * @ep_devs_created: endpoint child pseudo-devices exist * @unregistering: flag set when the interface is being unregistered * @needs_remote_wakeup: flag set when the driver requires remote-wakeup * capability during autosuspend. @@ -169,6 +170,7 @@ struct usb_interface { enum usb_interface_condition condition; /* state of binding */ unsigned is_active:1; /* the interface is not suspended */ unsigned sysfs_files_created:1; /* the sysfs attributes exist */ + unsigned ep_devs_created:1; /* endpoint "devices" exist */ unsigned unregistering:1; /* unregistration is in progress */ unsigned needs_remote_wakeup:1; /* driver requires remote wakeup */ unsigned needs_altsetting0:1; /* switch to altsetting 0 is pending */ -- cgit v1.2.2 From 49367d8f1d9f26482cf7089489e90f0afd0a942c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 12 Dec 2008 21:38:45 +0800 Subject: USB: mark "reject" field of struct urb as atomic_t It is enough to protect accesses to reject field of urb by marking it as atomic_t,also it is the only reason of existence of usb_reject_lock,so remove the lock to make code more clean. Signed-off-by: Ming Lei Acked-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index e9d63562325a..4e8654a18250 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1340,7 +1340,7 @@ struct urb { struct kref kref; /* reference count of the URB */ void *hcpriv; /* private data for host controller */ atomic_t use_count; /* concurrent submissions counter */ - u8 reject; /* submissions will fail */ + atomic_t reject; /* submissions will fail */ int unlinked; /* unlink error code */ /* public: documented fields in the urb that can be used by drivers */ -- cgit v1.2.2 From 856395d6e137b4e7194972cb7765f3de6a72ba61 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 18 Dec 2008 09:17:49 +0100 Subject: USB: extension of anchor API to unpoison an anchor This extension allows unpoisoning an anchor allowing drivers that resubmit URBs to reuse an anchor for methods like resume() Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 4e8654a18250..e89639896508 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1485,6 +1485,7 @@ extern void usb_poison_urb(struct urb *urb); extern void usb_unpoison_urb(struct urb *urb); extern void usb_kill_anchored_urbs(struct usb_anchor *anchor); extern void usb_poison_anchored_urbs(struct usb_anchor *anchor); +extern void usb_unpoison_anchored_urbs(struct usb_anchor *anchor); extern void usb_unlink_anchored_urbs(struct usb_anchor *anchor); extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor); extern void usb_unanchor_urb(struct urb *urb); -- cgit v1.2.2 From 25ff1c316f6a763f1eefe7f8984b2d8c03888432 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Dec 2008 12:43:41 -0500 Subject: USB: storage: add last-sector hacks This patch (as1189b) adds some hacks to usb-storage for dealing with the growing problems involving bad capacity values and last-sector accesses: A new flag, US_FL_CAPACITY_OK, is created to indicate that the device is known to report its capacity correctly. An unusual_devs entry for Linux's own File-backed Storage Gadget is added with this flag set, since g_file_storage always reports the correct capacity and since the capacity need not be even (it is determined by the size of the backing file). An entry in unusual_devs.h which has only the CAPACITY_OK flag set shouldn't prejudice libusual, since the device will work perfectly well with either usb-storage or ub. So a new macro, COMPLIANT_DEV, is added to let libusual know about these entries. When a last-sector access succeeds and the total number of sectors is odd (the unexpected case, in which guessing that the number is even might cause trouble), a WARN is triggered. The kerneloops.org project will collect these warnings, allowing us to add CAPACITY_OK flags for the devices in question before implementing the default-to-even heuristic. If users want to prevent the stack dump produced by the WARN, they can disable the hack by adding an unusual_devs entry for their device with the CAPACITY_OK flag. When a last-sector access fails three times in a row and neither the FIX_CAPACITY nor the CAPACITY_OK flag is set, we assume the last-sector bug is present. We replace the existing status and sense data with values that will cause the SCSI core to fail the access immediately rather than retry indefinitely. This should fix the difficulties people have been having with Nokia phones. Signed-off-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 998e5cbbf29e..1eea1ab68dc4 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -53,8 +53,10 @@ /* Sets max_sectors to arch min */ \ US_FLAG(BULK_IGNORE_TAG,0x00004000) \ /* Ignore tag mismatch in bulk operations */ \ - US_FLAG(SANE_SENSE, 0x00008000) - /* Sane Sense (> 18 bytes) */ + US_FLAG(SANE_SENSE, 0x00008000) \ + /* Sane Sense (> 18 bytes) */ \ + US_FLAG(CAPACITY_OK, 0x00010000) \ + /* READ CAPACITY response is correct */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.2 From 338b67b0c1a97ca705023a8189cf41aa0828d294 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 14 Aug 2008 09:37:34 -0700 Subject: USB: remove warn() macro from usb.h USB should not be having it's own printk macros, so remove warn() and use the system-wide standard of dev_warn() wherever possible. In the few places that will not work out, use a basic printk(). Now that all in-tree users are gone, remove the macro. Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index e89639896508..28f68f587793 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1744,8 +1744,6 @@ extern void usb_unregister_notify(struct notifier_block *nb); format "\n" , ## arg) #define info(format, arg...) printk(KERN_INFO KBUILD_MODNAME ": " \ format "\n" , ## arg) -#define warn(format, arg...) printk(KERN_WARNING KBUILD_MODNAME ": " \ - format "\n" , ## arg) #endif /* __KERNEL__ */ -- cgit v1.2.2 From 34c65d82e02147331701c7795e3144d511adf4e9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 18 Aug 2008 13:21:04 -0700 Subject: USB: remove info() macro from usb.h USB should not be having it's own printk macros, so remove info() and use the system-wide standard of dev_info() wherever possible. No one in the tree is using the macro, so it can now be removed. Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 28f68f587793..85ee9be9361e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1742,8 +1742,6 @@ extern void usb_unregister_notify(struct notifier_block *nb); #define err(format, arg...) printk(KERN_ERR KBUILD_MODNAME ": " \ format "\n" , ## arg) -#define info(format, arg...) printk(KERN_INFO KBUILD_MODNAME ": " \ - format "\n" , ## arg) #endif /* __KERNEL__ */ -- cgit v1.2.2 From 5e07878787ad07361571150230cc3a8d522ae046 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:39 -0800 Subject: debugfs: add helpers for exporting a size_t simple value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the same spirit as debugfs_create_*(), introduce helpers for exporting size_t values over debugfs. The only trick done is that the format verifier is kept at %llu instead of %zu; otherwise type warnings would pop up: format ‘%zu’ expects type ‘size_t’, but argument 2 has type ‘long long unsigned int’ There is no real way to fix this one--however, we can consider %llu and %zu to be compatible if we consider that we are using the same for validating in debugfs_create_{x,u}{8,16,32}(). Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index e1a6c046cea3..23936b16426b 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -63,6 +63,8 @@ struct dentry *debugfs_create_x16(const char *name, mode_t mode, struct dentry *parent, u16 *value); struct dentry *debugfs_create_x32(const char *name, mode_t mode, struct dentry *parent, u32 *value); +struct dentry *debugfs_create_size_t(const char *name, mode_t mode, + struct dentry *parent, size_t *value); struct dentry *debugfs_create_bool(const char *name, mode_t mode, struct dentry *parent, u32 *value); -- cgit v1.2.2 From ace22f0881e1333d0c55ddf484e5352fe03a806a Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:33 -0800 Subject: wimax: headers for kernel API and user space interaction Definitions for the user/kernel API protocol through generic netlink. User space can copy it verbatim and use it. Kernel API definition declares the main data types and calls for the drivers to integrate into the WiMAX stack. Provides usage documentation. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/wimax.h | 234 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 include/linux/wimax.h (limited to 'include/linux') diff --git a/include/linux/wimax.h b/include/linux/wimax.h new file mode 100644 index 000000000000..c89de7f4e5b9 --- /dev/null +++ b/include/linux/wimax.h @@ -0,0 +1,234 @@ +/* + * Linux WiMax + * API for user space + * + * + * Copyright (C) 2007-2008 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Intel Corporation + * Inaky Perez-Gonzalez + * - Initial implementation + * + * + * This file declares the user/kernel protocol that is spoken over + * Generic Netlink, as well as any type declaration that is to be used + * by kernel and user space. + * + * It is intended for user space to clone it verbatim to use it as a + * primary reference for definitions. + * + * Stuff intended for kernel usage as well as full protocol and stack + * documentation is rooted in include/net/wimax.h. + */ + +#ifndef __LINUX__WIMAX_H__ +#define __LINUX__WIMAX_H__ + +#include + +enum { + /** + * Version of the interface (unsigned decimal, MMm, max 25.5) + * M - Major: change if removing or modifying an existing call. + * m - minor: change when adding a new call + */ + WIMAX_GNL_VERSION = 00, + /* Generic NetLink attributes */ + WIMAX_GNL_ATTR_INVALID = 0x00, + WIMAX_GNL_ATTR_MAX = 10, +}; + + +/* + * Generic NetLink operations + * + * Most of these map to an API call; _OP_ stands for operation, _RP_ + * for reply and _RE_ for report (aka: signal). + */ +enum { + WIMAX_GNL_OP_MSG_FROM_USER, /* User to kernel message */ + WIMAX_GNL_OP_MSG_TO_USER, /* Kernel to user message */ + WIMAX_GNL_OP_RFKILL, /* Run wimax_rfkill() */ + WIMAX_GNL_OP_RESET, /* Run wimax_rfkill() */ + WIMAX_GNL_RE_STATE_CHANGE, /* Report: status change */ +}; + + +/* Message from user / to user */ +enum { + WIMAX_GNL_MSG_IFIDX = 1, + WIMAX_GNL_MSG_PIPE_NAME, + WIMAX_GNL_MSG_DATA, +}; + + +/* + * wimax_rfkill() + * + * The state of the radio (ON/OFF) is mapped to the rfkill subsystem's + * switch state (DISABLED/ENABLED). + */ +enum wimax_rf_state { + WIMAX_RF_OFF = 0, /* Radio is off, rfkill on/enabled */ + WIMAX_RF_ON = 1, /* Radio is on, rfkill off/disabled */ + WIMAX_RF_QUERY = 2, +}; + +/* Attributes */ +enum { + WIMAX_GNL_RFKILL_IFIDX = 1, + WIMAX_GNL_RFKILL_STATE, +}; + + +/* Attributes for wimax_reset() */ +enum { + WIMAX_GNL_RESET_IFIDX = 1, +}; + + +/* + * Attributes for the Report State Change + * + * For now we just have the old and new states; new attributes might + * be added later on. + */ +enum { + WIMAX_GNL_STCH_IFIDX = 1, + WIMAX_GNL_STCH_STATE_OLD, + WIMAX_GNL_STCH_STATE_NEW, +}; + + +/** + * enum wimax_st - The different states of a WiMAX device + * @__WIMAX_ST_NULL: The device structure has been allocated and zeroed, + * but still wimax_dev_add() hasn't been called. There is no state. + * + * @WIMAX_ST_DOWN: The device has been registered with the WiMAX and + * networking stacks, but it is not initialized (normally that is + * done with 'ifconfig DEV up' [or equivalent], which can upload + * firmware and enable communications with the device). + * In this state, the device is powered down and using as less + * power as possible. + * This state is the default after a call to wimax_dev_add(). It + * is ok to have drivers move directly to %WIMAX_ST_UNINITIALIZED + * or %WIMAX_ST_RADIO_OFF in _probe() after the call to + * wimax_dev_add(). + * It is recommended that the driver leaves this state when + * calling 'ifconfig DEV up' and enters it back on 'ifconfig DEV + * down'. + * + * @__WIMAX_ST_QUIESCING: The device is being torn down, so no API + * operations are allowed to proceed except the ones needed to + * complete the device clean up process. + * + * @WIMAX_ST_UNINITIALIZED: [optional] Communication with the device + * is setup, but the device still requires some configuration + * before being operational. + * Some WiMAX API calls might work. + * + * @WIMAX_ST_RADIO_OFF: The device is fully up; radio is off (wether + * by hardware or software switches). + * It is recommended to always leave the device in this state + * after initialization. + * + * @WIMAX_ST_READY: The device is fully up and radio is on. + * + * @WIMAX_ST_SCANNING: [optional] The device has been instructed to + * scan. In this state, the device cannot be actively connected to + * a network. + * + * @WIMAX_ST_CONNECTING: The device is connecting to a network. This + * state exists because in some devices, the connect process can + * include a number of negotiations between user space, kernel + * space and the device. User space needs to know what the device + * is doing. If the connect sequence in a device is atomic and + * fast, the device can transition directly to CONNECTED + * + * @WIMAX_ST_CONNECTED: The device is connected to a network. + * + * @__WIMAX_ST_INVALID: This is an invalid state used to mark the + * maximum numeric value of states. + * + * Description: + * + * Transitions from one state to another one are atomic and can only + * be caused in kernel space with wimax_state_change(). To read the + * state, use wimax_state_get(). + * + * States starting with __ are internal and shall not be used or + * referred to by drivers or userspace. They look ugly, but that's the + * point -- if any use is made non-internal to the stack, it is easier + * to catch on review. + * + * All API operations [with well defined exceptions] will take the + * device mutex before starting and then check the state. If the state + * is %__WIMAX_ST_NULL, %WIMAX_ST_DOWN, %WIMAX_ST_UNINITIALIZED or + * %__WIMAX_ST_QUIESCING, it will drop the lock and quit with + * -%EINVAL, -%ENOMEDIUM, -%ENOTCONN or -%ESHUTDOWN. + * + * The order of the definitions is important, so we can do numerical + * comparisons (eg: < %WIMAX_ST_RADIO_OFF means the device is not ready + * to operate). + */ +/* + * The allowed state transitions are described in the table below + * (states in rows can go to states in columns where there is an X): + * + * UNINI RADIO READY SCAN CONNEC CONNEC + * NULL DOWN QUIESCING TIALIZED OFF NING TING TED + * NULL - x + * DOWN - x x x + * QUIESCING x - + * UNINITIALIZED x - x + * RADIO_OFF x - x + * READY x x - x x x + * SCANNING x x x - x x + * CONNECTING x x x x - x + * CONNECTED x x x - + * + * This table not available in kernel-doc because the formatting messes it up. + */ + enum wimax_st { + __WIMAX_ST_NULL = 0, + WIMAX_ST_DOWN, + __WIMAX_ST_QUIESCING, + WIMAX_ST_UNINITIALIZED, + WIMAX_ST_RADIO_OFF, + WIMAX_ST_READY, + WIMAX_ST_SCANNING, + WIMAX_ST_CONNECTING, + WIMAX_ST_CONNECTED, + __WIMAX_ST_INVALID /* Always keep last */ +}; + + +#endif /* #ifndef __LINUX__WIMAX_H__ */ -- cgit v1.2.2 From ea912f4e7f264981faf8665cfb63d46d7f948117 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:35 -0800 Subject: wimax: debug macros and debug settings for the WiMAX stack This file contains a simple debug framework that is used in the stack; it allows the debug level to be controlled at compile-time (so the debug code is optimized out) and at run-time (for what wasn't compiled out). This is eventually going to be moved to use dynamic_printk(). Just need to find time to do it. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/wimax/debug.h | 453 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 453 insertions(+) create mode 100644 include/linux/wimax/debug.h (limited to 'include/linux') diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h new file mode 100644 index 000000000000..ba0c49399a83 --- /dev/null +++ b/include/linux/wimax/debug.h @@ -0,0 +1,453 @@ +/* + * Linux WiMAX + * Collection of tools to manage debug operations. + * + * + * Copyright (C) 2005-2007 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * Don't #include this file directly, read on! + * + * + * EXECUTING DEBUGGING ACTIONS OR NOT + * + * The main thing this framework provides is decission power to take a + * debug action (like printing a message) if the current debug level + * allows it. + * + * The decission power is at two levels: at compile-time (what does + * not make it is compiled out) and at run-time. The run-time + * selection is done per-submodule (as they are declared by the user + * of the framework). + * + * A call to d_test(L) (L being the target debug level) returns true + * if the action should be taken because the current debug levels + * allow it (both compile and run time). + * + * It follows that a call to d_test() that can be determined to be + * always false at compile time will get the code depending on it + * compiled out by optimization. + * + * + * DEBUG LEVELS + * + * It is up to the caller to define how much a debugging level is. + * + * Convention sets 0 as "no debug" (so an action marked as debug level 0 + * will always be taken). The increasing debug levels are used for + * increased verbosity. + * + * + * USAGE + * + * Group the code in modules and submodules inside each module [which + * in most cases maps to Linux modules and .c files that compose + * those]. + * + * + * For each module, there is: + * + * - a MODULENAME (single word, legal C identifier) + * + * - a debug-levels.h header file that declares the list of + * submodules and that is included by all .c files that use + * the debugging tools. The file name can be anything. + * + * - some (optional) .c code to manipulate the runtime debug levels + * through debugfs. + * + * The debug-levels.h file would look like: + * + * #ifndef __debug_levels__h__ + * #define __debug_levels__h__ + * + * #define D_MODULENAME modulename + * #define D_MASTER 10 + * + * #include + * + * enum d_module { + * D_SUBMODULE_DECLARE(submodule_1), + * D_SUBMODULE_DECLARE(submodule_2), + * ... + * D_SUBMODULE_DECLARE(submodule_N) + * }; + * + * #endif + * + * D_MASTER is the maximum compile-time debug level; any debug actions + * above this will be out. D_MODULENAME is the module name (legal C + * identifier), which has to be unique for each module (to avoid + * namespace collisions during linkage). Note those #defines need to + * be done before #including debug.h + * + * We declare N different submodules whose debug level can be + * independently controlled during runtime. + * + * In a .c file of the module (and only in one of them), define the + * following code: + * + * struct d_level D_LEVEL[] = { + * D_SUBMODULE_DEFINE(submodule_1), + * D_SUBMODULE_DEFINE(submodule_2), + * ... + * D_SUBMODULE_DEFINE(submodule_N), + * }; + * size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); + * + * Externs for d_level_MODULENAME and d_level_size_MODULENAME are used + * and declared in this file using the D_LEVEL and D_LEVEL_SIZE macros + * #defined also in this file. + * + * To manipulate from user space the levels, create a debugfs dentry + * and then register each submodule with: + * + * result = d_level_register_debugfs("PREFIX_", submodule_X, parent); + * if (result < 0) + * goto error; + * + * Where PREFIX_ is a name of your chosing. This will create debugfs + * file with a single numeric value that can be use to tweak it. To + * remove the entires, just use debugfs_remove_recursive() on 'parent'. + * + * NOTE: remember that even if this will show attached to some + * particular instance of a device, the settings are *global*. + * + * + * On each submodule (for example, .c files), the debug infrastructure + * should be included like this: + * + * #define D_SUBMODULE submodule_x // matches one in debug-levels.h + * #include "debug-levels.h" + * + * after #including all your include files. + * + * + * Now you can use the d_*() macros below [d_test(), d_fnstart(), + * d_fnend(), d_printf(), d_dump()]. + * + * If their debug level is greater than D_MASTER, they will be + * compiled out. + * + * If their debug level is lower or equal than D_MASTER but greater + * than the current debug level of their submodule, they'll be + * ignored. + * + * Otherwise, the action will be performed. + */ +#ifndef __debug__h__ +#define __debug__h__ + +#include +#include + + +/* Backend stuff */ + +/* + * Debug backend: generate a message header from a 'struct device' + * + * @head: buffer where to place the header + * @head_size: length of @head + * @dev: pointer to device used to generate a header from. If NULL, + * an empty ("") header is generated. + */ +static inline +void __d_head(char *head, size_t head_size, + struct device *dev) +{ + if (dev == NULL) + head[0] = 0; + else if ((unsigned long)dev < 4096) { + printk(KERN_ERR "E: Corrupt dev %p\n", dev); + WARN_ON(1); + } else + snprintf(head, head_size, "%s %s: ", + dev_driver_string(dev), dev->bus_id); +} + + +/* + * Debug backend: log some message if debugging is enabled + * + * @l: intended debug level + * @tag: tag to prefix the message with + * @dev: 'struct device' associated to this message + * @f: printf-like format and arguments + * + * Note this is optimized out if it doesn't pass the compile-time + * check; however, it is *always* compiled. This is useful to make + * sure the printf-like formats and variables are always checked and + * they don't get bit rot if you have all the debugging disabled. + */ +#define _d_printf(l, tag, dev, f, a...) \ +do { \ + char head[64]; \ + if (!d_test(l)) \ + break; \ + __d_head(head, sizeof(head), dev); \ + printk(KERN_ERR "%s%s%s: " f, head, __func__, tag, ##a); \ +} while (0) + + +/* + * CPP sintatic sugar to generate A_B like symbol names when one of + * the arguments is a a preprocessor #define. + */ +#define __D_PASTE__(varname, modulename) varname##_##modulename +#define __D_PASTE(varname, modulename) (__D_PASTE__(varname, modulename)) +#define _D_SUBMODULE_INDEX(_name) (D_SUBMODULE_DECLARE(_name)) + + +/* + * Store a submodule's runtime debug level and name + */ +struct d_level { + u8 level; + const char *name; +}; + + +/* + * List of available submodules and their debug levels + * + * We call them d_level_MODULENAME and d_level_size_MODULENAME; the + * macros D_LEVEL and D_LEVEL_SIZE contain the name already for + * convenience. + * + * This array and the size are defined on some .c file that is part of + * the current module. + */ +#define D_LEVEL __D_PASTE(d_level, D_MODULENAME) +#define D_LEVEL_SIZE __D_PASTE(d_level_size, D_MODULENAME) + +extern struct d_level D_LEVEL[]; +extern size_t D_LEVEL_SIZE; + + +/* + * Frontend stuff + * + * + * Stuff you need to declare prior to using the actual "debug" actions + * (defined below). + */ + +#ifndef D_MODULENAME +#error D_MODULENAME is not defined in your debug-levels.h file +/** + * D_MODULE - Name of the current module + * + * #define in your module's debug-levels.h, making sure it is + * unique. This has to be a legal C identifier. + */ +#define D_MODULENAME undefined_modulename +#endif + + +#ifndef D_MASTER +#warning D_MASTER not defined, but debug.h included! [see docs] +/** + * D_MASTER - Compile time maximum debug level + * + * #define in your debug-levels.h file to the maximum debug level the + * runtime code will be allowed to have. This allows you to provide a + * main knob. + * + * Anything above that level will be optimized out of the compile. + * + * Defaults to zero (no debug code compiled in). + * + * Maximum one definition per module (at the debug-levels.h file). + */ +#define D_MASTER 0 +#endif + +#ifndef D_SUBMODULE +#error D_SUBMODULE not defined, but debug.h included! [see docs] +/** + * D_SUBMODULE - Name of the current submodule + * + * #define in your submodule .c file before #including debug-levels.h + * to the name of the current submodule as previously declared and + * defined with D_SUBMODULE_DECLARE() (in your module's + * debug-levels.h) and D_SUBMODULE_DEFINE(). + * + * This is used to provide runtime-control over the debug levels. + * + * Maximum one per .c file! Can be shared among different .c files + * (meaning they belong to the same submodule categorization). + */ +#define D_SUBMODULE undefined_module +#endif + + +/** + * D_SUBMODULE_DECLARE - Declare a submodule for runtime debug level control + * + * @_name: name of the submodule, restricted to the chars that make up a + * valid C identifier ([a-zA-Z0-9_]). + * + * Declare in the module's debug-levels.h header file as: + * + * enum d_module { + * D_SUBMODULE_DECLARE(submodule_1), + * D_SUBMODULE_DECLARE(submodule_2), + * D_SUBMODULE_DECLARE(submodule_3), + * }; + * + * Some corresponding .c file needs to have a matching + * D_SUBMODULE_DEFINE(). + */ +#define D_SUBMODULE_DECLARE(_name) __D_SUBMODULE_##_name + + +/** + * D_SUBMODULE_DEFINE - Define a submodule for runtime debug level control + * + * @_name: name of the submodule, restricted to the chars that make up a + * valid C identifier ([a-zA-Z0-9_]). + * + * Use once per module (in some .c file) as: + * + * static + * struct d_level d_level_SUBMODULENAME[] = { + * D_SUBMODULE_DEFINE(submodule_1), + * D_SUBMODULE_DEFINE(submodule_2), + * D_SUBMODULE_DEFINE(submodule_3), + * }; + * size_t d_level_size_SUBDMODULENAME = ARRAY_SIZE(d_level_SUBDMODULENAME); + * + * Matching D_SUBMODULE_DECLARE()s have to be present in a + * debug-levels.h header file. + */ +#define D_SUBMODULE_DEFINE(_name) \ +[__D_SUBMODULE_##_name] = { \ + .level = 0, \ + .name = #_name \ +} + + + +/* The actual "debug" operations */ + + +/** + * d_test - Returns true if debugging should be enabled + * + * @l: intended debug level (unsigned) + * + * If the master debug switch is enabled and the current settings are + * higher or equal to the requested level, then debugging + * output/actions should be enabled. + * + * NOTE: + * + * This needs to be coded so that it can be evaluated in compile + * time; this is why the ugly BUG_ON() is placed in there, so the + * D_MASTER evaluation compiles all out if it is compile-time false. + */ +#define d_test(l) \ +({ \ + unsigned __l = l; /* type enforcer */ \ + (D_MASTER) >= __l \ + && ({ \ + BUG_ON(_D_SUBMODULE_INDEX(D_SUBMODULE) >= D_LEVEL_SIZE);\ + D_LEVEL[_D_SUBMODULE_INDEX(D_SUBMODULE)].level >= __l; \ + }); \ +}) + + +/** + * d_fnstart - log message at function start if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_fnstart(l, _dev, f, a...) _d_printf(l, " FNSTART", _dev, f, ## a) + + +/** + * d_fnend - log message at function end if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_fnend(l, _dev, f, a...) _d_printf(l, " FNEND", _dev, f, ## a) + + +/** + * d_printf - log message if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_printf(l, _dev, f, a...) _d_printf(l, "", _dev, f, ## a) + + +/** + * d_dump - log buffer hex dump if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_dump(l, dev, ptr, size) \ +do { \ + char head[64]; \ + if (!d_test(l)) \ + break; \ + __d_head(head, sizeof(head), dev); \ + print_hex_dump(KERN_ERR, head, 0, 16, 1, \ + ((void *) ptr), (size), 0); \ +} while (0) + + +/** + * Export a submodule's debug level over debugfs as PREFIXSUBMODULE + * + * @prefix: string to prefix the name with + * @submodule: name of submodule (not a string, just the name) + * @dentry: debugfs parent dentry + * + * Returns: 0 if ok, < 0 errno on error. + * + * For removing, just use debugfs_remove_recursive() on the parent. + */ +#define d_level_register_debugfs(prefix, name, parent) \ +({ \ + int rc; \ + struct dentry *fd; \ + struct dentry *verify_parent_type = parent; \ + fd = debugfs_create_u8( \ + prefix #name, 0600, verify_parent_type, \ + &(D_LEVEL[__D_SUBMODULE_ ## name].level)); \ + rc = PTR_ERR(fd); \ + if (IS_ERR(fd) && rc != -ENODEV) \ + printk(KERN_ERR "%s: Can't create debugfs entry %s: " \ + "%d\n", __func__, prefix #name, rc); \ + else \ + rc = 0; \ + rc; \ +}) + + +#endif /* #ifndef __debug__h__ */ -- cgit v1.2.2 From ea24652d253eabfb83e955e55ce032228d9d99b9 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:43 -0800 Subject: i2400m: host/device procotol and core driver definitions The wimax/i2400m.h defines the structures and constants for the host-device protocols: - boot / firmware upload protocol - general data transport protocol - control protocol It is done in such a way that can also be used verbatim by user space. drivers/net/wimax/i2400m.h defines all the APIs used by the core, bus-generic driver (i2400m) and the bus specific drivers (i2400m-BUSNAME). It also gives a roadmap to the driver implementation. debug-levels.h adds the core driver's debug settings. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/wimax/i2400m.h | 512 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 512 insertions(+) create mode 100644 include/linux/wimax/i2400m.h (limited to 'include/linux') diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h new file mode 100644 index 000000000000..74198f5bb4dc --- /dev/null +++ b/include/linux/wimax/i2400m.h @@ -0,0 +1,512 @@ +/* + * Intel Wireless WiMax Connection 2400m + * Host-Device protocol interface definitions + * + * + * Copyright (C) 2007-2008 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Intel Corporation + * Inaky Perez-Gonzalez + * - Initial implementation + * + * + * This header defines the data structures and constants used to + * communicate with the device. + * + * BOOTMODE/BOOTROM/FIRMWARE UPLOAD PROTOCOL + * + * The firmware upload protocol is quite simple and only requires a + * handful of commands. See drivers/net/wimax/i2400m/fw.c for more + * details. + * + * The BCF data structure is for the firmware file header. + * + * + * THE DATA / CONTROL PROTOCOL + * + * This is the normal protocol spoken with the device once the + * firmware is uploaded. It transports data payloads and control + * messages back and forth. + * + * It consists 'messages' that pack one or more payloads each. The + * format is described in detail in drivers/net/wimax/i2400m/rx.c and + * tx.c. + * + * + * THE L3L4 PROTOCOL + * + * The term L3L4 refers to Layer 3 (the device), Layer 4 (the + * driver/host software). + * + * This is the control protocol used by the host to control the i2400m + * device (scan, connect, disconnect...). This is sent to / received + * as control frames. These frames consist of a header and zero or + * more TLVs with information. We call each control frame a "message". + * + * Each message is composed of: + * + * HEADER + * [TLV0 + PAYLOAD0] + * [TLV1 + PAYLOAD1] + * [...] + * [TLVN + PAYLOADN] + * + * The HEADER is defined by 'struct i2400m_l3l4_hdr'. The payloads are + * defined by a TLV structure (Type Length Value) which is a 'header' + * (struct i2400m_tlv_hdr) and then the payload. + * + * All integers are represented as Little Endian. + * + * - REQUESTS AND EVENTS + * + * The requests can be clasified as follows: + * + * COMMAND: implies a request from the host to the device requesting + * an action being performed. The device will reply with a + * message (with the same type as the command), status and + * no (TLV) payload. Execution of a command might cause + * events (of different type) to be sent later on as + * device's state changes. + * + * GET/SET: similar to COMMAND, but will not cause other + * EVENTs. The reply, in the case of GET, will contain + * TLVs with the requested information. + * + * EVENT: asynchronous messages sent from the device, maybe as a + * consequence of previous COMMANDs but disassociated from + * them. + * + * Only one request might be pending at the same time (ie: don't + * parallelize nor post another GET request before the previous + * COMMAND has been acknowledged with it's corresponding reply by the + * device). + * + * The different requests and their formats are described below: + * + * I2400M_MT_* Message types + * I2400M_MS_* Message status (for replies, events) + * i2400m_tlv_* TLVs + * + * data types are named 'struct i2400m_msg_OPNAME', OPNAME matching the + * operation. + */ + +#ifndef __LINUX__WIMAX__I2400M_H__ +#define __LINUX__WIMAX__I2400M_H__ + +#include + + +/* + * Host Device Interface (HDI) common to all busses + */ + +/* Boot-mode (firmware upload mode) commands */ + +/* Header for the firmware file */ +struct i2400m_bcf_hdr { + __le32 module_type; + __le32 header_len; + __le32 header_version; + __le32 module_id; + __le32 module_vendor; + __le32 date; /* BCD YYYMMDD */ + __le32 size; + __le32 key_size; /* in dwords */ + __le32 modulus_size; /* in dwords */ + __le32 exponent_size; /* in dwords */ + __u8 reserved[88]; +} __attribute__ ((packed)); + +/* Boot mode opcodes */ +enum i2400m_brh_opcode { + I2400M_BRH_READ = 1, + I2400M_BRH_WRITE = 2, + I2400M_BRH_JUMP = 3, + I2400M_BRH_SIGNED_JUMP = 8, + I2400M_BRH_HASH_PAYLOAD_ONLY = 9, +}; + +/* Boot mode command masks and stuff */ +enum i2400m_brh { + I2400M_BRH_SIGNATURE = 0xcbbc0000, + I2400M_BRH_SIGNATURE_MASK = 0xffff0000, + I2400M_BRH_SIGNATURE_SHIFT = 16, + I2400M_BRH_OPCODE_MASK = 0x0000000f, + I2400M_BRH_RESPONSE_MASK = 0x000000f0, + I2400M_BRH_RESPONSE_SHIFT = 4, + I2400M_BRH_DIRECT_ACCESS = 0x00000400, + I2400M_BRH_RESPONSE_REQUIRED = 0x00000200, + I2400M_BRH_USE_CHECKSUM = 0x00000100, +}; + + +/* Constants for bcf->module_id */ +enum i2400m_bcf_mod_id { + /* Firmware file carries its own pokes -- pokes are a set of + * magical values that have to be written in certain memory + * addresses to get the device up and ready for firmware + * download when it is in non-signed boot mode. */ + I2400M_BCF_MOD_ID_POKES = 0x000000001, +}; + + +/** + * i2400m_bootrom_header - Header for a boot-mode command + * + * @cmd: the above command descriptor + * @target_addr: where on the device memory should the action be performed. + * @data_size: for read/write, amount of data to be read/written + * @block_checksum: checksum value (if applicable) + * @payload: the beginning of data attached to this header + */ +struct i2400m_bootrom_header { + __le32 command; /* Compose with enum i2400_brh */ + __le32 target_addr; + __le32 data_size; + __le32 block_checksum; + char payload[0]; +} __attribute__ ((packed)); + + +/* + * Data / control protocol + */ + +/* Packet types for the host-device interface */ +enum i2400m_pt { + I2400M_PT_DATA = 0, + I2400M_PT_CTRL, + I2400M_PT_TRACE, /* For device debug */ + I2400M_PT_RESET_WARM, /* device reset */ + I2400M_PT_RESET_COLD, /* USB[transport] reset, like reconnect */ + I2400M_PT_ILLEGAL +}; + + +/* + * Payload for a data packet + * + * This is prefixed to each and every outgoing DATA type. + */ +struct i2400m_pl_data_hdr { + __le32 reserved; +} __attribute__((packed)); + + +/* Misc constants */ +enum { + I2400M_PL_PAD = 16, /* Payload data size alignment */ + I2400M_PL_SIZE_MAX = 0x3EFF, + I2400M_MAX_PLS_IN_MSG = 60, + /* protocol barkers: sync sequences; for notifications they + * are sent in groups of four. */ + I2400M_H2D_PREVIEW_BARKER = 0xcafe900d, + I2400M_COLD_RESET_BARKER = 0xc01dc01d, + I2400M_WARM_RESET_BARKER = 0x50f750f7, + I2400M_NBOOT_BARKER = 0xdeadbeef, + I2400M_SBOOT_BARKER = 0x0ff1c1a1, + I2400M_ACK_BARKER = 0xfeedbabe, + I2400M_D2H_MSG_BARKER = 0xbeefbabe, +}; + + +/* + * Hardware payload descriptor + * + * Bitfields encoded in a struct to enforce typing semantics. + * + * Look in rx.c and tx.c for a full description of the format. + */ +struct i2400m_pld { + __le32 val; +} __attribute__ ((packed)); + +#define I2400M_PLD_SIZE_MASK 0x00003fff +#define I2400M_PLD_TYPE_SHIFT 16 +#define I2400M_PLD_TYPE_MASK 0x000f0000 + +/* + * Header for a TX message or RX message + * + * @barker: preamble + * @size: used for management of the FIFO queue buffer; before + * sending, this is converted to be a real preamble. This + * indicates the real size of the TX message that starts at this + * point. If the highest bit is set, then this message is to be + * skipped. + * @sequence: sequence number of this message + * @offset: offset where the message itself starts -- see the comments + * in the file header about message header and payload descriptor + * alignment. + * @num_pls: number of payloads in this message + * @padding: amount of padding bytes at the end of the message to make + * it be of block-size aligned + * + * Look in rx.c and tx.c for a full description of the format. + */ +struct i2400m_msg_hdr { + union { + __le32 barker; + __u32 size; /* same size type as barker!! */ + }; + union { + __le32 sequence; + __u32 offset; /* same size type as barker!! */ + }; + __le16 num_pls; + __le16 rsv1; + __le16 padding; + __le16 rsv2; + struct i2400m_pld pld[0]; +} __attribute__ ((packed)); + + + +/* + * L3/L4 control protocol + */ + +enum { + /* Interface version */ + I2400M_L3L4_VERSION = 0x0100, +}; + +/* Message types */ +enum i2400m_mt { + I2400M_MT_RESERVED = 0x0000, + I2400M_MT_INVALID = 0xffff, + I2400M_MT_REPORT_MASK = 0x8000, + + I2400M_MT_GET_SCAN_RESULT = 0x4202, + I2400M_MT_SET_SCAN_PARAM = 0x4402, + I2400M_MT_CMD_RF_CONTROL = 0x4602, + I2400M_MT_CMD_SCAN = 0x4603, + I2400M_MT_CMD_CONNECT = 0x4604, + I2400M_MT_CMD_DISCONNECT = 0x4605, + I2400M_MT_CMD_EXIT_IDLE = 0x4606, + I2400M_MT_GET_LM_VERSION = 0x5201, + I2400M_MT_GET_DEVICE_INFO = 0x5202, + I2400M_MT_GET_LINK_STATUS = 0x5203, + I2400M_MT_GET_STATISTICS = 0x5204, + I2400M_MT_GET_STATE = 0x5205, + I2400M_MT_GET_MEDIA_STATUS = 0x5206, + I2400M_MT_SET_INIT_CONFIG = 0x5404, + I2400M_MT_CMD_INIT = 0x5601, + I2400M_MT_CMD_TERMINATE = 0x5602, + I2400M_MT_CMD_MODE_OF_OP = 0x5603, + I2400M_MT_CMD_RESET_DEVICE = 0x5604, + I2400M_MT_CMD_MONITOR_CONTROL = 0x5605, + I2400M_MT_CMD_ENTER_POWERSAVE = 0x5606, + I2400M_MT_GET_TLS_OPERATION_RESULT = 0x6201, + I2400M_MT_SET_EAP_SUCCESS = 0x6402, + I2400M_MT_SET_EAP_FAIL = 0x6403, + I2400M_MT_SET_EAP_KEY = 0x6404, + I2400M_MT_CMD_SEND_EAP_RESPONSE = 0x6602, + I2400M_MT_REPORT_SCAN_RESULT = 0xc002, + I2400M_MT_REPORT_STATE = 0xd002, + I2400M_MT_REPORT_POWERSAVE_READY = 0xd005, + I2400M_MT_REPORT_EAP_REQUEST = 0xe002, + I2400M_MT_REPORT_EAP_RESTART = 0xe003, + I2400M_MT_REPORT_ALT_ACCEPT = 0xe004, + I2400M_MT_REPORT_KEY_REQUEST = 0xe005, +}; + + +/* + * Message Ack Status codes + * + * When a message is replied-to, this status is reported. + */ +enum i2400m_ms { + I2400M_MS_DONE_OK = 0, + I2400M_MS_DONE_IN_PROGRESS = 1, + I2400M_MS_INVALID_OP = 2, + I2400M_MS_BAD_STATE = 3, + I2400M_MS_ILLEGAL_VALUE = 4, + I2400M_MS_MISSING_PARAMS = 5, + I2400M_MS_VERSION_ERROR = 6, + I2400M_MS_ACCESSIBILITY_ERROR = 7, + I2400M_MS_BUSY = 8, + I2400M_MS_CORRUPTED_TLV = 9, + I2400M_MS_UNINITIALIZED = 10, + I2400M_MS_UNKNOWN_ERROR = 11, + I2400M_MS_PRODUCTION_ERROR = 12, + I2400M_MS_NO_RF = 13, + I2400M_MS_NOT_READY_FOR_POWERSAVE = 14, + I2400M_MS_THERMAL_CRITICAL = 15, + I2400M_MS_MAX +}; + + +/** + * i2400m_tlv - enumeration of the different types of TLVs + * + * TLVs stand for type-length-value and are the header for a payload + * composed of almost anything. Each payload has a type assigned + * and a length. + */ +enum i2400m_tlv { + I2400M_TLV_L4_MESSAGE_VERSIONS = 129, + I2400M_TLV_SYSTEM_STATE = 141, + I2400M_TLV_MEDIA_STATUS = 161, + I2400M_TLV_RF_OPERATION = 162, + I2400M_TLV_RF_STATUS = 163, + I2400M_TLV_DEVICE_RESET_TYPE = 132, + I2400M_TLV_CONFIG_IDLE_PARAMETERS = 601, +}; + + +struct i2400m_tlv_hdr { + __le16 type; + __le16 length; /* payload's */ + __u8 pl[0]; +} __attribute__((packed)); + + +struct i2400m_l3l4_hdr { + __le16 type; + __le16 length; /* payload's */ + __le16 version; + __le16 resv1; + __le16 status; + __le16 resv2; + struct i2400m_tlv_hdr pl[0]; +} __attribute__((packed)); + + +/** + * i2400m_system_state - different states of the device + */ +enum i2400m_system_state { + I2400M_SS_UNINITIALIZED = 1, + I2400M_SS_INIT, + I2400M_SS_READY, + I2400M_SS_SCAN, + I2400M_SS_STANDBY, + I2400M_SS_CONNECTING, + I2400M_SS_WIMAX_CONNECTED, + I2400M_SS_DATA_PATH_CONNECTED, + I2400M_SS_IDLE, + I2400M_SS_DISCONNECTING, + I2400M_SS_OUT_OF_ZONE, + I2400M_SS_SLEEPACTIVE, + I2400M_SS_PRODUCTION, + I2400M_SS_CONFIG, + I2400M_SS_RF_OFF, + I2400M_SS_RF_SHUTDOWN, + I2400M_SS_DEVICE_DISCONNECT, + I2400M_SS_MAX, +}; + + +/** + * i2400m_tlv_system_state - report on the state of the system + * + * @state: see enum i2400m_system_state + */ +struct i2400m_tlv_system_state { + struct i2400m_tlv_hdr hdr; + __le32 state; +} __attribute__((packed)); + + +struct i2400m_tlv_l4_message_versions { + struct i2400m_tlv_hdr hdr; + __le16 major; + __le16 minor; + __le16 branch; + __le16 reserved; +} __attribute__((packed)); + + +struct i2400m_tlv_detailed_device_info { + struct i2400m_tlv_hdr hdr; + __u8 reserved1[400]; + __u8 mac_address[6]; + __u8 reserved2[2]; +} __attribute__((packed)); + + +enum i2400m_rf_switch_status { + I2400M_RF_SWITCH_ON = 1, + I2400M_RF_SWITCH_OFF = 2, +}; + +struct i2400m_tlv_rf_switches_status { + struct i2400m_tlv_hdr hdr; + __u8 sw_rf_switch; /* 1 ON, 2 OFF */ + __u8 hw_rf_switch; /* 1 ON, 2 OFF */ + __u8 reserved[2]; +} __attribute__((packed)); + + +enum { + i2400m_rf_operation_on = 1, + i2400m_rf_operation_off = 2 +}; + +struct i2400m_tlv_rf_operation { + struct i2400m_tlv_hdr hdr; + __le32 status; /* 1 ON, 2 OFF */ +} __attribute__((packed)); + + +enum i2400m_tlv_reset_type { + I2400M_RESET_TYPE_COLD = 1, + I2400M_RESET_TYPE_WARM +}; + +struct i2400m_tlv_device_reset_type { + struct i2400m_tlv_hdr hdr; + __le32 reset_type; +} __attribute__((packed)); + + +struct i2400m_tlv_config_idle_parameters { + struct i2400m_tlv_hdr hdr; + __le32 idle_timeout; /* 100 to 300000 ms [5min], 100 increments + * 0 disabled */ + __le32 idle_paging_interval; /* frames */ +} __attribute__((packed)); + + +enum i2400m_media_status { + I2400M_MEDIA_STATUS_LINK_UP = 1, + I2400M_MEDIA_STATUS_LINK_DOWN, + I2400M_MEDIA_STATUS_LINK_RENEW, +}; + +struct i2400m_tlv_media_status { + struct i2400m_tlv_hdr hdr; + __le32 media_status; +} __attribute__((packed)); + +#endif /* #ifndef __LINUX__WIMAX__I2400M_H__ */ -- cgit v1.2.2 From e30698743419d20dce03d033761f203b4d847ab0 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:59 -0800 Subject: wimax: export linux/wimax.h and linux/wimax/i2400m.h with headers_install These two files are what user space can use to establish communication with the WiMAX kernel API and to speak the Intel 2400m Wireless WiMAX connection's control protocol. Signed-off-by: Inaky Perez-Gonzalez Cc: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/Kbuild | 2 ++ include/linux/wimax/Kbuild | 1 + 2 files changed, 3 insertions(+) create mode 100644 include/linux/wimax/Kbuild (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index a3323f337e4d..12e9a2957caf 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -371,3 +371,5 @@ unifdef-y += xattr.h unifdef-y += xfrm.h objhdr-y += version.h +header-y += wimax.h +header-y += wimax/ diff --git a/include/linux/wimax/Kbuild b/include/linux/wimax/Kbuild new file mode 100644 index 000000000000..3cb4f269bb09 --- /dev/null +++ b/include/linux/wimax/Kbuild @@ -0,0 +1 @@ +header-y += i2400m.h -- cgit v1.2.2 From f7b7baae6b30ff04124259ff8d7c0c0d281320e6 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 11 Nov 2008 17:17:46 +0800 Subject: PCI: add PCI Advanced Feature Capability defines PCI Advanced Features Capability is introduced by "Conventional PCI Advanced Caps ECN" (can be downloaded in pcisig.com). Add defines for the various AF capabilities, including function level reset (FLR). Reviewed-by: Matthew Wilcox Signed-off-by: Sheng Yang Signed-off-by: Jesse Barnes --- include/linux/pci_regs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index e5effd47ed74..7766488470e4 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -210,6 +210,7 @@ #define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ #define PCI_CAP_ID_EXP 0x10 /* PCI Express */ #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ +#define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ #define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ #define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ #define PCI_CAP_SIZEOF 4 @@ -316,6 +317,17 @@ #define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ #define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ +/* PCI Advanced Feature registers */ + +#define PCI_AF_LENGTH 2 +#define PCI_AF_CAP 3 +#define PCI_AF_CAP_TP 0x01 +#define PCI_AF_CAP_FLR 0x02 +#define PCI_AF_CTRL 4 +#define PCI_AF_CTRL_FLR 0x01 +#define PCI_AF_STATUS 5 +#define PCI_AF_STATUS_TP 0x01 + /* PCI-X registers */ #define PCI_X_CMD 2 /* Modes & Features */ -- cgit v1.2.2 From 8b62091e20215730be1b94b7cd135a78a3e692ca Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:40 -0700 Subject: ACPI/PCI: include missing acpi.h file in pci-acpi.h. The pci-acpi.h file will not compile without including linux/acpi.h. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 8837928fbf33..a9e4c34e9389 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -8,6 +8,8 @@ #ifndef _PCI_ACPI_H_ #define _PCI_ACPI_H_ +#include + #define OSC_QUERY_TYPE 0 #define OSC_SUPPORT_TYPE 1 #define OSC_CONTROL_TYPE 2 -- cgit v1.2.2 From 990a7ac5645883a833a11b900bb6f25b65dea65b Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:45 -0700 Subject: ACPI/PCI: call _OSC support during root bridge discovery Add pci_acpi_osc_support() and call it when a PCI bridge is added. This allows us to avoid having every individual PCI root bridge driver call _OSC support for every root bridge in their probe functions, a significant savings in boot time. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index a9e4c34e9389..424f06f84cab 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -51,6 +51,7 @@ #ifdef CONFIG_ACPI extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); extern acpi_status __pci_osc_support_set(u32 flags, const char *hid); +int pci_acpi_osc_support(acpi_handle handle, u32 flags); static inline acpi_status pci_osc_support_set(u32 flags) { return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING); -- cgit v1.2.2 From 0ef5f8f6159e44b4faa997be08d1a3bcbf44ad08 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:50 -0700 Subject: ACPI/PCI: PCI extended config _OSC support called when root bridge added The _OSC capability OSC_EXT_PCI_CONFIG_SUPPORT is set when the root bridge is added with pci_acpi_osc_support() if we can access PCI extended config space. This adds the function pci_ext_cfg_avail which returns true if we can access PCI extended config space (offset greater than 0xff). It currently only returns false if arch=x86 and raw_pci_ext_ops is not set (which might happen if pci=nommcfg is set on the kernel command-line). Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4bb156ba854a..6fd47654ca4e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1140,6 +1140,8 @@ static inline void pci_mmcfg_early_init(void) { } static inline void pci_mmcfg_late_init(void) { } #endif +int pci_ext_cfg_avail(struct pci_dev *dev); + #ifdef CONFIG_HAS_IOMEM static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) { -- cgit v1.2.2 From 3e1b16002af29758b6bc9c38939d43838d9335bc Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:55 -0700 Subject: ACPI/PCI: PCIe ASPM _OSC support capabilities called when root bridge added The _OSC capabilities OSC_ACTIVE_STATE_PWR_SUPPORT and OSC_CLOCK_PWR_CAPABILITY_SUPPORT are set when the root bridge is added with pci_acpi_osc_support(), so we no longer need to do it in the ASPM driver. Also add the function pcie_aspm_enabled, which returns true if pcie_aspm=off is not on the kernel command-line. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 6fd47654ca4e..eae97a2bf603 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -791,6 +791,15 @@ extern void msi_remove_pci_irq_vectors(struct pci_dev *dev); extern void pci_restore_msi_state(struct pci_dev *dev); #endif +#ifndef CONFIG_PCIEASPM +static inline int pcie_aspm_enabled(void) +{ + return 0; +} +#else +extern int pcie_aspm_enabled(void); +#endif + #ifdef CONFIG_HT_IRQ /* The functions a driver should call */ int ht_create_irq(struct pci_dev *dev, int idx); -- cgit v1.2.2 From 07ae95f988a34465bdcb384bfa73c03424fe2312 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:31:05 -0700 Subject: ACPI/PCI: PCI MSI _OSC support capabilities called when root bridge added The _OSC capability OSC_MSI_SUPPORT is set when the root bridge is added with pci_acpi_osc_support(), so we no longer need to do it in the PCI MSI driver. Also adds the function pci_msi_enabled, which returns true if pci=nomsi is not on the kernel command-line. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eae97a2bf603..59a3dc2059d3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -779,6 +779,10 @@ static inline void msi_remove_pci_irq_vectors(struct pci_dev *dev) static inline void pci_restore_msi_state(struct pci_dev *dev) { } +static inline int pci_msi_enabled(void) +{ + return 0; +} #else extern int pci_enable_msi(struct pci_dev *dev); extern void pci_msi_shutdown(struct pci_dev *dev); @@ -789,6 +793,7 @@ extern void pci_msix_shutdown(struct pci_dev *dev); extern void pci_disable_msix(struct pci_dev *dev); extern void msi_remove_pci_irq_vectors(struct pci_dev *dev); extern void pci_restore_msi_state(struct pci_dev *dev); +extern int pci_msi_enabled(void); #endif #ifndef CONFIG_PCIEASPM -- cgit v1.2.2 From 23616941914917cf25b94789856b5326b68d8ee8 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:31:10 -0700 Subject: ACPI/PCI: remove obsolete _OSC capability support functions The acpi_query_osc, __pci_osc_support_set, pci_osc_support_set, and pcie_osc_support_set functions have been obsoleted in favor of setting these capabilities during root bridge discovery with pci_acpi_osc_support. There are no longer any callers of these functions, so remove them. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 424f06f84cab..871e096e0fbc 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -50,16 +50,7 @@ #ifdef CONFIG_ACPI extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); -extern acpi_status __pci_osc_support_set(u32 flags, const char *hid); int pci_acpi_osc_support(acpi_handle handle, u32 flags); -static inline acpi_status pci_osc_support_set(u32 flags) -{ - return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING); -} -static inline acpi_status pcie_osc_support_set(u32 flags) -{ - return __pci_osc_support_set(flags, PCI_EXPRESS_ROOT_HID_STRING); -} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { /* Find root host bridge */ @@ -76,8 +67,6 @@ typedef u32 acpi_status; #endif static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) {return AE_ERROR;} -static inline acpi_status pci_osc_support_set(u32 flags) {return AE_ERROR;} -static inline acpi_status pcie_osc_support_set(u32 flags) {return AE_ERROR;} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { return NULL; } #endif -- cgit v1.2.2 From e8de1481fd7126ee9e93d6889da6f00c05e1e019 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 22 Oct 2008 19:55:31 -0700 Subject: resource: allow MMIO exclusivity for device drivers Device drivers that use pci_request_regions() (and similar APIs) have a reasonable expectation that they are the only ones accessing their device. As part of the e1000e hunt, we were afraid that some userland (X or some bootsplash stuff) was mapping the MMIO region that the driver thought it had exclusively via /dev/mem or via various sysfs resource mappings. This patch adds the option for device drivers to cause their reserved regions to the "banned from /dev/mem use" list, so now both kernel memory and device-exclusive MMIO regions are banned. NOTE: This is only active when CONFIG_STRICT_DEVMEM is set. In addition to the config option, a kernel parameter iomem=relaxed is provided for the cases where developers want to diagnose, in the field, drivers issues from userspace. Reviewed-by: Matthew Wilcox Signed-off-by: Arjan van de Ven Signed-off-by: Jesse Barnes --- include/linux/ioport.h | 11 ++++++++--- include/linux/pci.h | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 041e95aac2bf..f6bb2ca8e3ba 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -49,6 +49,7 @@ struct resource_list { #define IORESOURCE_SIZEALIGN 0x00020000 /* size indicates alignment */ #define IORESOURCE_STARTALIGN 0x00040000 /* start field is alignment */ +#define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 #define IORESOURCE_AUTO 0x40000000 @@ -133,13 +134,16 @@ static inline unsigned long resource_type(struct resource *res) } /* Convenience shorthand with allocation */ -#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name)) -#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name)) +#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) +#define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) +#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) +#define request_mem_region_exclusive(start,n,name) \ + __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) #define rename_region(region, newname) do { (region)->name = (newname); } while (0) extern struct resource * __request_region(struct resource *, resource_size_t start, - resource_size_t n, const char *name); + resource_size_t n, const char *name, int relaxed); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) @@ -175,6 +179,7 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); +extern int iomem_is_exclusive(u64 addr); #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 59a3dc2059d3..bfcb39ca8879 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -686,10 +686,13 @@ void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(struct pci_dev *, u8, u8)); #define HAVE_PCI_REQ_REGIONS 2 int __must_check pci_request_regions(struct pci_dev *, const char *); +int __must_check pci_request_regions_exclusive(struct pci_dev *, const char *); void pci_release_regions(struct pci_dev *); int __must_check pci_request_region(struct pci_dev *, int, const char *); +int __must_check pci_request_region_exclusive(struct pci_dev *, int, const char *); void pci_release_region(struct pci_dev *, int); int pci_request_selected_regions(struct pci_dev *, int, const char *); +int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *); void pci_release_selected_regions(struct pci_dev *, int); /* drivers/pci/bus.c */ -- cgit v1.2.2 From 57c2cf71c12318b72ebaa5720d210476b6bac4d4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 11 Dec 2008 11:24:23 -0700 Subject: PCI: add pci_swizzle_interrupt_pin() This patch adds pci_swizzle_interrupt_pin(), which implements the INTx swizzling algorithm specified in Table 9-1 of the "PCI-to-PCI Bridge Architecture Specification," revision 1.2. There are many architecture-specific implementations of this swizzle that can be replaced by this common one. Reviewed-by: David Howells Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index bfcb39ca8879..58357d14f94c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -532,6 +532,7 @@ int __must_check pci_bus_add_device(struct pci_dev *dev); void pci_read_bridge_bases(struct pci_bus *child); struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); +u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); extern struct pci_dev *pci_dev_get(struct pci_dev *dev); extern void pci_dev_put(struct pci_dev *dev); -- cgit v1.2.2 From 1684f5ddd4c0c754f52c78eaa2c5c69ad09fb18c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 1 Dec 2008 14:30:30 -0800 Subject: PCI: uninline pci_ioremap_bar() It's too large to be inlined. Acked-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- include/linux/pci.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 58357d14f94c..0d8bc920c2e5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1160,20 +1160,7 @@ static inline void pci_mmcfg_late_init(void) { } int pci_ext_cfg_avail(struct pci_dev *dev); -#ifdef CONFIG_HAS_IOMEM -static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) -{ - /* - * Make sure the BAR is actually a memory resource, not an IO resource - */ - if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { - WARN_ON(1); - return NULL; - } - return ioremap_nocache(pci_resource_start(pdev, bar), - pci_resource_len(pdev, bar)); -} -#endif +void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar); #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.2 From 14add80b5120966fe0659d61815b9e9b4b68fdc5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:38:52 +0800 Subject: PCI: remove unnecessary arg of pci_update_resource() This cleanup removes unnecessary argument 'struct resource *res' in pci_update_resource(), so it takes same arguments as other companion functions (pci_assign_resource(), etc.). Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0d8bc920c2e5..c5e02f324e13 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -648,7 +648,7 @@ int pcie_get_readrq(struct pci_dev *dev); int pcie_set_readrq(struct pci_dev *dev, int rq); int pci_reset_function(struct pci_dev *dev); int pci_execute_reset_function(struct pci_dev *dev); -void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); +void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int pci_select_bars(struct pci_dev *dev, unsigned long flags); -- cgit v1.2.2 From fde09c6d8f92de0c9f75698a75f0989f2234c517 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:39:32 +0800 Subject: PCI: define PCI resource names in an 'enum' This patch moves all definitions of the PCI resource names to an 'enum', and also replaces some hard-coded resource variables with symbol names. This change eases introduction of device specific resources. Reviewed-by: Bjorn Helgaas Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- include/linux/pci.h | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c5e02f324e13..da1c22bab40e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -82,7 +82,30 @@ enum pci_mmap_state { #define PCI_DMA_FROMDEVICE 2 #define PCI_DMA_NONE 3 -#define DEVICE_COUNT_RESOURCE 12 +/* + * For PCI devices, the region numbers are assigned this way: + */ +enum { + /* #0-5: standard PCI resources */ + PCI_STD_RESOURCES, + PCI_STD_RESOURCE_END = 5, + + /* #6: expansion ROM resource */ + PCI_ROM_RESOURCE, + + /* resources assigned to buses behind the bridge */ +#define PCI_BRIDGE_RESOURCE_NUM 4 + + PCI_BRIDGE_RESOURCES, + PCI_BRIDGE_RESOURCE_END = PCI_BRIDGE_RESOURCES + + PCI_BRIDGE_RESOURCE_NUM - 1, + + /* total resources associated with a PCI device */ + PCI_NUM_RESOURCES, + + /* preserve this for compatibility */ + DEVICE_COUNT_RESOURCE +}; typedef int __bitwise pci_power_t; @@ -274,18 +297,6 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev, hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space); } -/* - * For PCI devices, the region numbers are assigned this way: - * - * 0-5 standard PCI regions - * 6 expansion ROM - * 7-10 bridges: address space assigned to buses behind the bridge - */ - -#define PCI_ROM_RESOURCE 6 -#define PCI_BRIDGE_RESOURCES 7 -#define PCI_NUM_RESOURCES 11 - #ifndef PCI_BUS_NUM_RESOURCES #define PCI_BUS_NUM_RESOURCES 16 #endif -- cgit v1.2.2 From e8c331e963c58b83db24b7d0e39e8c07f687dbc6 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 17 Dec 2008 12:09:12 +0900 Subject: PCI hotplug: introduce functions for ACPI slot detection Some ACPI related PCI hotplug code can be shared among PCI hotplug drivers. This patch introduces the following functions in drivers/pci/hotplug/acpi_pcihp.c to share the code, and changes acpiphp and pciehp to use them. - int acpi_pci_detect_ejectable(struct pci_bus *pbus) This checks if the specified PCI bus has ejectable slots. - int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle) This checks if the specified handle is ejectable ACPI PCI slot. The 'pbus' parameter is needed to check if 'handle' is PCI related ACPI object. This patch also introduces the following inline function in include/linux/pci-acpi.h, which is useful to get ACPI handle of the PCI bridge from struct pci_bus of the bridge's secondary bus. - static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) This returns ACPI handle of the PCI bridge which generates PCI bus specified by 'pbus'. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 9 +++++++++ include/linux/pci_hotplug.h | 2 ++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 871e096e0fbc..042c166f65d5 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -60,6 +60,15 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus), pdev->bus->number); } + +static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) +{ + int seg = pci_domain_nr(pbus), busnr = pbus->number; + struct pci_dev *bridge = pbus->self; + if (bridge) + return DEVICE_ACPI_HANDLE(&(bridge->dev)); + return acpi_get_pci_rootbridge_handle(seg, busnr); +} #else #if !defined(AE_ERROR) typedef u32 acpi_status; diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index a00bd1a0f156..f7cc204fab07 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -228,6 +228,8 @@ extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, struct hotplug_params *hpp); int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); int acpi_root_bridge(acpi_handle handle); +int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle); +int acpi_pci_detect_ejectable(struct pci_bus *pbus); #endif #endif -- cgit v1.2.2 From 68feac87de15edfc2c700d2d81b814288c93d003 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 16 Dec 2008 21:36:55 -0700 Subject: PCI: add pci_common_swizzle() for INTx swizzling This patch adds pci_common_swizzle(), which swizzles INTx values all the way up to a root bridge. This common implementation can replace several architecture-specific ones. This should someday be combined with pci_get_interrupt_pin(), but I left it separate for now to make reviewing easier. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index da1c22bab40e..170f9ae2d8a0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -545,6 +545,7 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); +u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); extern struct pci_dev *pci_dev_get(struct pci_dev *dev); extern void pci_dev_put(struct pci_dev *dev); extern void pci_remove_bus(struct pci_bus *b); -- cgit v1.2.2 From 287d19ce2e67c15e79a187b3bdcbbea1a0a51a7d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 18 Dec 2008 09:17:16 -0800 Subject: PCI: revise VPD access interface Change PCI VPD API which was only used by sysfs to something usable in drivers. * move iteration over multiple words to the low level * use conventional types for arguments * add exportable wrapper Signed-off-by: Stephen Hemminger Signed-off-by: Jesse Barnes --- include/linux/pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 170f9ae2d8a0..76079e106895 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -687,6 +687,10 @@ int pci_back_from_sleep(struct pci_dev *dev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); +/* Vital product data routines */ +ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); +ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); + /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ void pci_bus_assign_resources(struct pci_bus *bus); void pci_bus_size_bridges(struct pci_bus *bus); -- cgit v1.2.2 From db5679437a2b938c9127480a3923633721583a4f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 18 Dec 2008 09:17:16 -0800 Subject: PCI: add interface to set visible size of VPD The VPD on all devices may not be 32K. Unfortunately, there is no generic way to find the size, so this adds a simple API hook to reset it. Signed-off-by: Stephen Hemminger Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 76079e106895..7cbecef19bb6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -690,6 +690,7 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); /* Vital product data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); +int pci_vpd_truncate(struct pci_dev *dev, size_t size); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ void pci_bus_assign_resources(struct pci_bus *bus); -- cgit v1.2.2 From 322162a71bd9fc4edb1b11236e7bc8aa27ccac22 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 19 Dec 2008 15:19:02 +0900 Subject: PCI: pciehp: cleanup register and field definitions Clean up register definitions related to PCI Express Hot plug. - Add register definitions into include/linux/pci_regs.h, and use them instead of pciehp's locally definied register definitions. - Remove pciehp's locally defined register definitions - Remove unused register definitions in pciehp. - Some minor cleanups. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- include/linux/pci_regs.h | 64 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 7766488470e4..027815b4635e 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -411,20 +411,70 @@ #define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ #define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ -#define PCI_EXP_LNKCAP_ASPMS 0xc00 /* ASPM Support */ -#define PCI_EXP_LNKCAP_L0SEL 0x7000 /* L0s Exit Latency */ -#define PCI_EXP_LNKCAP_L1EL 0x38000 /* L1 Exit Latency */ -#define PCI_EXP_LNKCAP_CLKPM 0x40000 /* L1 Clock Power Management */ +#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ +#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ +#define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ +#define PCI_EXP_LNKCAP_L1EL 0x00038000 /* L1 Exit Latency */ +#define PCI_EXP_LNKCAP_CLKPM 0x00040000 /* L1 Clock Power Management */ +#define PCI_EXP_LNKCAP_SDERC 0x00080000 /* Suprise Down Error Reporting Capable */ +#define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ +#define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ +#define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ #define PCI_EXP_LNKCTL 16 /* Link Control */ -#define PCI_EXP_LNKCTL_RL 0x20 /* Retrain Link */ -#define PCI_EXP_LNKCTL_CCC 0x40 /* Common Clock COnfiguration */ +#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ +#define PCI_EXP_LNKCTL_RCB 0x0008 /* Read Completion Boundary */ +#define PCI_EXP_LNKCTL_LD 0x0010 /* Link Disable */ +#define PCI_EXP_LNKCTL_RL 0x0020 /* Retrain Link */ +#define PCI_EXP_LNKCTL_CCC 0x0040 /* Common Clock Configuration */ +#define PCI_EXP_LNKCTL_ES 0x0080 /* Extended Synch */ #define PCI_EXP_LNKCTL_CLKREQ_EN 0x100 /* Enable clkreq */ +#define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ +#define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ +#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Lnk Autonomous Bandwidth Interrupt Enable */ #define PCI_EXP_LNKSTA 18 /* Link Status */ -#define PCI_EXP_LNKSTA_LT 0x800 /* Link Training */ +#define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ +#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Nogotiated Link Width */ +#define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ #define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ +#define PCI_EXP_LNKSTA_DLLLA 0x2000 /* Data Link Layer Link Active */ +#define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ +#define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ #define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ +#define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ +#define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ +#define PCI_EXP_SLTCAP_AIP 0x00000008 /* Attention Indicator Present */ +#define PCI_EXP_SLTCAP_PIP 0x00000010 /* Power Indicator Present */ +#define PCI_EXP_SLTCAP_HPS 0x00000020 /* Hot-Plug Surprise */ +#define PCI_EXP_SLTCAP_HPC 0x00000040 /* Hot-Plug Capable */ +#define PCI_EXP_SLTCAP_SPLV 0x00007f80 /* Slot Power Limit Value */ +#define PCI_EXP_SLTCAP_SPLS 0x00018000 /* Slot Power Limit Scale */ +#define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ +#define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ +#define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ #define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ +#define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ +#define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ +#define PCI_EXP_SLTCTL_PDCE 0x0008 /* Presence Detect Changed Enable */ +#define PCI_EXP_SLTCTL_CCIE 0x0010 /* Command Completed Interrupt Enable */ +#define PCI_EXP_SLTCTL_HPIE 0x0020 /* Hot-Plug Interrupt Enable */ +#define PCI_EXP_SLTCTL_AIC 0x00c0 /* Attention Indicator Control */ +#define PCI_EXP_SLTCTL_PIC 0x0300 /* Power Indicator Control */ +#define PCI_EXP_SLTCTL_PCC 0x0400 /* Power Controller Control */ +#define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ +#define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ #define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ +#define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ +#define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ +#define PCI_EXP_SLTSTA_PDC 0x0008 /* Presence Detect Changed */ +#define PCI_EXP_SLTSTA_CC 0x0010 /* Command Completed */ +#define PCI_EXP_SLTSTA_MRLSS 0x0020 /* MRL Sensor State */ +#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ +#define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ +#define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ #define PCI_EXP_RTCTL 28 /* Root Control */ #define PCI_EXP_RTCTL_SECEE 0x01 /* System Error on Correctable Error */ #define PCI_EXP_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */ -- cgit v1.2.2 From 6a479079c07211bf348ac8a79754f26bea258f26 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 23 Dec 2008 03:08:29 +0000 Subject: PCI: Add pci_clear_master() as opposite of pci_set_master() During an online device reset it may be useful to disable bus-mastering. pci_disable_device() does that, and far more besides, so is not suitable for an online reset. Add pci_clear_master() which does just this. Signed-off-by: Ben Hutchings Reviewed-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7cbecef19bb6..0f6d2bb1df9c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -642,6 +642,7 @@ static inline int pci_is_managed(struct pci_dev *pdev) void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); +void pci_clear_master(struct pci_dev *dev); int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state); #define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); -- cgit v1.2.2 From 16cf0ebc35dd63f72628ba1246132a6fd17bced2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 5 Jan 2009 14:50:27 +0100 Subject: x86/PCI: Do not use interrupt links for devices using MSI-X pcibios_enable_device() and pcibios_disable_device() don't handle IRQs for devices that have MSI enabled and it should treat the devices with MSI-X enabled in the same way. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Signed-off-by: Jesse Barnes --- include/linux/pci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0f6d2bb1df9c..80f8b8b65fde 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -336,6 +336,15 @@ struct pci_bus { #define pci_bus_b(n) list_entry(n, struct pci_bus, node) #define to_pci_bus(n) container_of(n, struct pci_bus, dev) +#ifdef CONFIG_PCI_MSI +static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) +{ + return pci_dev->msi_enabled || pci_dev->msix_enabled; +} +#else +static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false; } +#endif + /* * Error values that may be returned by PCI functions. */ -- cgit v1.2.2 From 8d1a0a13edecfdcb47fee3238ed4a2af2a2867f9 Mon Sep 17 00:00:00 2001 From: Anders Larsen Date: Thu, 1 Jan 2009 17:17:35 +0100 Subject: qnx: include for definitions of __[us]{8,16,32,64} types On 2008-12-30 11:32:33, Sam Ravnborg wrote: > We have added a few additional validation checks of the userspace headers: ... > 3) We should include and not > 4) If we use a __[us]{8,16,32,64} type then we must include Satisfy these requirements for the linux/qnx*.h headers. Signed-off-by: Anders Larsen Signed-off-by: Sam Ravnborg --- include/linux/qnx4_fs.h | 4 +--- include/linux/qnxtypes.h | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index 34a196ee7941..787d19ea9f46 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -2,14 +2,12 @@ * Name : qnx4_fs.h * Author : Richard Frowijn * Function : qnx4 global filesystem definitions - * Version : 1.0.2 - * Last modified : 2000-01-31 - * * History : 23-03-1998 created */ #ifndef _LINUX_QNX4_FS_H #define _LINUX_QNX4_FS_H +#include #include #include diff --git a/include/linux/qnxtypes.h b/include/linux/qnxtypes.h index a3eb1137857b..bebbe5cc4fb8 100644 --- a/include/linux/qnxtypes.h +++ b/include/linux/qnxtypes.h @@ -2,9 +2,6 @@ * Name : qnxtypes.h * Author : Richard Frowijn * Function : standard qnx types - * Version : 1.0.2 - * Last modified : 2000-01-06 - * * History : 22-03-1998 created * */ @@ -12,6 +9,8 @@ #ifndef _QNX4TYPES_H #define _QNX4TYPES_H +#include + typedef __le16 qnx4_nxtnt_t; typedef __u8 qnx4_ftype_t; -- cgit v1.2.2 From 14f0ca8eaea42a5b5a69cfcb699665dd2618db5f Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 7 Jan 2009 21:50:22 +0100 Subject: oprofile: make new cpu buffer functions part of the api This patch creates the new functions oprofile_write_reserve() oprofile_add_data() oprofile_write_commit() and makes them part of the oprofile api. Signed-off-by: Robert Richter --- include/linux/oprofile.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 1ce9fe572e51..1d9518bc4c58 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -164,4 +164,22 @@ void oprofile_put_buff(unsigned long *buf, unsigned int start, unsigned long oprofile_get_cpu_buffer_size(void); void oprofile_cpu_buffer_inc_smpl_lost(void); +/* cpu buffer functions */ + +struct op_sample; + +struct op_entry { + struct ring_buffer_event *event; + struct op_sample *sample; + unsigned long irq_flags; + unsigned long size; + unsigned long *data; +}; + +void oprofile_write_reserve(struct op_entry *entry, + struct pt_regs * const regs, + unsigned long pc, int code, int size); +int oprofile_add_data(struct op_entry *entry, unsigned long val); +int oprofile_write_commit(struct op_entry *entry); + #endif /* OPROFILE_H */ -- cgit v1.2.2 From 87df4de8073f922a1f643b9fa6ba0412d5529ecf Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 15 Dec 2008 19:42:03 +0200 Subject: nfsd: last_byte_offset refactor the nfs4 server lock code to use last_byte_offset to compute the last byte covered by the lock. Check for overflow so that the last byte is set to NFS4_MAX_UINT64 if offset + len wraps around. Also, use NFS4_MAX_UINT64 for ~(u64)0 where appropriate. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- include/linux/nfs4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index ea0366769484..b912311a56b1 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -88,6 +88,8 @@ #define NFS4_ACE_GENERIC_EXECUTE 0x001200A0 #define NFS4_ACE_MASK_ALL 0x001F01FF +#define NFS4_MAX_UINT64 (~(u64)0) + enum nfs4_acl_whotype { NFS4_ACL_WHO_NAMED = 0, NFS4_ACL_WHO_OWNER, -- cgit v1.2.2 From db43910cb42285a99f45f7e0a0a32e32d0b61dcf Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 15 Dec 2008 19:42:24 +0200 Subject: nfsd: get rid of NFSD_VERSION Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- include/linux/nfsd/nfsd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 21269405ffe2..e19f45991b2e 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -23,7 +23,6 @@ /* * nfsd version */ -#define NFSD_VERSION "0.5" #define NFSD_SUPPORTED_MINOR_VERSION 0 /* -- cgit v1.2.2 From 5886188dc7ba9a76babcd37452f44079a9a77f71 Mon Sep 17 00:00:00 2001 From: Benjamin Krill Date: Wed, 7 Jan 2009 10:32:38 +0100 Subject: serial: Add driver for the Cell Network Processor serial port NWP device Add support for the nwp serial device which is connected to a DCR bus. It uses the of_serial device driver to determine necessary properties from the device tree. The supported device is added as serial port number 85. NWP stands for network processor and it is part of the QPACE - Quantum Chromodynamics Parallel Computing on the Cell Broadband Engine project. The implementation is a lightweight uart implementation with the focus to consume as little resources as possible and it is connected to a DCR bus. Signed-off-by: Benjamin Krill Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- include/linux/nwpserial.h | 18 ++++++++++++++++++ include/linux/serial_core.h | 3 +++ 2 files changed, 21 insertions(+) create mode 100644 include/linux/nwpserial.h (limited to 'include/linux') diff --git a/include/linux/nwpserial.h b/include/linux/nwpserial.h new file mode 100644 index 000000000000..9acb21572eaf --- /dev/null +++ b/include/linux/nwpserial.h @@ -0,0 +1,18 @@ +/* + * Serial Port driver for a NWP uart device + * + * Copyright (C) 2008 IBM Corp., Benjamin Krill + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#ifndef _NWPSERIAL_H +#define _NWPSERIAL_H + +int nwpserial_register_port(struct uart_port *port); +void nwpserial_unregister_port(int line); + +#endif /* _NWPSERIAL_H */ diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index b4199841f1fc..90bbbf0b1161 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -161,6 +161,9 @@ #define PORT_S3C6400 84 +/* NWPSERIAL */ +#define PORT_NWPSERIAL 85 + #ifdef __KERNEL__ #include -- cgit v1.2.2 From 8feae13110d60cc6287afabc2887366b0eb226c2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: NOMMU: Make VMAs per MM as for MMU-mode linux Make VMAs per mm_struct as for MMU-mode linux. This solves two problems: (1) In SYSV SHM where nattch for a segment does not reflect the number of shmat's (and forks) done. (2) In mmap() where the VMA's vm_mm is set to point to the parent mm by an exec'ing process when VM_EXECUTABLE is specified, regardless of the fact that a VMA might be shared and already have its vm_mm assigned to another process or a dead process. A new struct (vm_region) is introduced to track a mapped region and to remember the circumstances under which it may be shared and the vm_list_struct structure is discarded as it's no longer required. This patch makes the following additional changes: (1) Regions are now allocated with alloc_pages() rather than kmalloc() and with no recourse to __GFP_COMP, so the pages are not composite. Instead, each page has a reference on it held by the region. Anything else that is interested in such a page will have to get a reference on it to retain it. When the pages are released due to unmapping, each page is passed to put_page() and will be freed when the page usage count reaches zero. (2) Excess pages are trimmed after an allocation as the allocation must be made as a power-of-2 quantity of pages. (3) VMAs are added to the parent MM's R/B tree and mmap lists. As an MM may end up with overlapping VMAs within the tree, the VMA struct address is appended to the sort key. (4) Non-anonymous VMAs are now added to the backing inode's prio list. (5) Holes may be punched in anonymous VMAs with munmap(), releasing parts of the backing region. The VMA and region structs will be split if necessary. (6) sys_shmdt() only releases one attachment to a SYSV IPC shared memory segment instead of all the attachments at that addresss. Multiple shmat()'s return the same address under NOMMU-mode instead of different virtual addresses as under MMU-mode. (7) Core dumping for ELF-FDPIC requires fewer exceptions for NOMMU-mode. (8) /proc/maps is now the global list of mapped regions, and may list bits that aren't actually mapped anywhere. (9) /proc/meminfo gains a line (tagged "MmapCopy") that indicates the amount of RAM currently allocated by mmap to hold mappable regions that can't be mapped directly. These are copies of the backing device or file if not anonymous. These changes make NOMMU mode more similar to MMU mode. The downside is that NOMMU mode requires some extra memory to track things over NOMMU without this patch (VMAs are no longer shared, and there are now region structs). Signed-off-by: David Howells Tested-by: Mike Frysinger Acked-by: Paul Mundt --- include/linux/mm.h | 18 ++++++------------ include/linux/mm_types.h | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4a3d28c86443..b91a73fd1bcc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -56,19 +56,9 @@ extern unsigned long mmap_min_addr; extern struct kmem_cache *vm_area_cachep; -/* - * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is - * disabled, then there's a single shared list of VMAs maintained by the - * system, and mm's subscribe to these individually - */ -struct vm_list_struct { - struct vm_list_struct *next; - struct vm_area_struct *vma; -}; - #ifndef CONFIG_MMU -extern struct rb_root nommu_vma_tree; -extern struct rw_semaphore nommu_vma_sem; +extern struct rb_root nommu_region_tree; +extern struct rw_semaphore nommu_region_sem; extern unsigned int kobjsize(const void *objp); #endif @@ -1061,6 +1051,7 @@ extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, enum memmap_context); extern void setup_per_zone_pages_min(void); extern void mem_init(void); +extern void __init mmap_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); @@ -1072,6 +1063,9 @@ extern void setup_per_cpu_pageset(void); static inline void setup_per_cpu_pageset(void) {} #endif +/* nommu.c */ +extern atomic_t mmap_pages_allocated; + /* prio_tree.c */ void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9cfc9b627fdd..1c1e0d3a1714 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -96,6 +96,22 @@ struct page { #endif /* WANT_PAGE_VIRTUAL */ }; +/* + * A region containing a mapping of a non-memory backed file under NOMMU + * conditions. These are held in a global tree and are pinned by the VMAs that + * map parts of them. + */ +struct vm_region { + struct rb_node vm_rb; /* link in global region tree */ + unsigned long vm_flags; /* VMA vm_flags */ + unsigned long vm_start; /* start address of region */ + unsigned long vm_end; /* region initialised to here */ + unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ + struct file *vm_file; /* the backing file or NULL */ + + atomic_t vm_usage; /* region usage count */ +}; + /* * This struct defines a memory VMM memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -152,7 +168,7 @@ struct vm_area_struct { unsigned long vm_truncate_count;/* truncate_count or restart_addr */ #ifndef CONFIG_MMU - atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */ + struct vm_region *vm_region; /* NOMMU mapping region */ #endif #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ -- cgit v1.2.2 From dd8632a12e500a684478fea0951f380478d56fed Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: NOMMU: Make mmap allocation page trimming behaviour configurable. NOMMU mmap allocates a piece of memory for an mmap that's rounded up in size to the nearest power-of-2 number of pages. Currently it then discards the excess pages back to the page allocator, making that memory available for use by other things. This can, however, cause greater amount of fragmentation. To counter this, a sysctl is added in order to fine-tune the trimming behaviour. The default behaviour remains to trim pages aggressively, while this can either be disabled completely or set to a higher page-granular watermark in order to have finer-grained control. vm region vm_top bits taken from an earlier patch by David Howells. Signed-off-by: Paul Mundt Signed-off-by: David Howells Tested-by: Mike Frysinger --- include/linux/mm_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1c1e0d3a1714..92915e81443f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -106,6 +106,7 @@ struct vm_region { unsigned long vm_flags; /* VMA vm_flags */ unsigned long vm_start; /* start address of region */ unsigned long vm_end; /* region initialised to here */ + unsigned long vm_top; /* region allocated to here */ unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ struct file *vm_file; /* the backing file or NULL */ -- cgit v1.2.2 From e2387d6c20752ccdb2895ba5de664fa39652f4cc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 17 Nov 2008 14:35:44 +0000 Subject: leds: Make header variable naming consistent There is one place where the struct led_classdev as the function argument is named differently. Fix it. Signed-off-by: Wolfram Sang Signed-off-by: Richard Purdie --- include/linux/leds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index d3a73f5a48c3..3c1a8ce6a5ea 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -62,7 +62,7 @@ struct led_classdev { extern int led_classdev_register(struct device *parent, struct led_classdev *led_cdev); -extern void led_classdev_unregister(struct led_classdev *lcd); +extern void led_classdev_unregister(struct led_classdev *led_cdev); extern void led_classdev_suspend(struct led_classdev *led_cdev); extern void led_classdev_resume(struct led_classdev *led_cdev); -- cgit v1.2.2 From 934cd3f979a1daacbd403398f2c7a8f6720c33aa Mon Sep 17 00:00:00 2001 From: Riku Voipio Date: Wed, 3 Dec 2008 08:21:36 +0000 Subject: leds: leds-pcs9532 - Move i2c work to a workqueque Apparently these might be called under atomic context, and i2c operations may sleep. BUG found by Ross Burton Signed-off-by: Riku Voipio Signed-off-by: Richard Purdie --- include/linux/leds-pca9532.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h index 81b4207deb95..96eea90f01a8 100644 --- a/include/linux/leds-pca9532.h +++ b/include/linux/leds-pca9532.h @@ -15,6 +15,7 @@ #define __LINUX_PCA9532_H #include +#include enum pca9532_state { PCA9532_OFF = 0x0, @@ -31,6 +32,7 @@ struct pca9532_led { struct i2c_client *client; char *name; struct led_classdev ldev; + struct work_struct work; enum pca9532_type type; enum pca9532_state state; }; -- cgit v1.2.2 From 0081e8020ebd814a99e45720a10e869a54ee08a6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 4 Dec 2008 16:52:33 +0000 Subject: leds: Add WM8350 LED driver The voltage and current regulators on the WM8350 AudioPlus PMIC can be used in concert to provide a power efficient LED driver. This driver implements support for this within the standard LED class. Platform initialisation code should configure the LED hardware in the init callback provided by the WM8350 core driver. The callback should use wm8350_isink_set_flash(), wm8350_dcdc25_set_mode() and wm8350_dcdc_set_slot() to configure the operating parameters of the regulators for their hardware and then then use wm8350_register_led() to instantiate the LED driver. This driver was originally written by Liam Girdwood, though it has been extensively modified since then. Signed-off-by: Mark Brown Signed-off-by: Richard Purdie --- include/linux/mfd/wm8350/pmic.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h index 96acbfc8aa12..be3264e286e0 100644 --- a/include/linux/mfd/wm8350/pmic.h +++ b/include/linux/mfd/wm8350/pmic.h @@ -13,6 +13,10 @@ #ifndef __LINUX_MFD_WM8350_PMIC_H #define __LINUX_MFD_WM8350_PMIC_H +#include +#include +#include + /* * Register values. */ @@ -700,6 +704,33 @@ struct wm8350; struct platform_device; struct regulator_init_data; +/* + * WM8350 LED platform data + */ +struct wm8350_led_platform_data { + const char *name; + const char *default_trigger; + int max_uA; +}; + +struct wm8350_led { + struct platform_device *pdev; + struct mutex mutex; + struct work_struct work; + spinlock_t value_lock; + enum led_brightness value; + struct led_classdev cdev; + int max_uA_index; + int enabled; + + struct regulator *isink; + struct regulator_consumer_supply isink_consumer; + struct regulator_init_data isink_init; + struct regulator *dcdc; + struct regulator_consumer_supply dcdc_consumer; + struct regulator_init_data dcdc_init; +}; + struct wm8350_pmic { /* Number of regulators of each type on this device */ int max_dcdc; @@ -717,10 +748,15 @@ struct wm8350_pmic { /* regulator devices */ struct platform_device *pdev[NUM_WM8350_REGULATORS]; + + /* LED devices */ + struct wm8350_led led[2]; }; int wm8350_register_regulator(struct wm8350 *wm8350, int reg, struct regulator_init_data *initdata); +int wm8350_register_led(struct wm8350 *wm8350, int lednum, int dcdc, int isink, + struct wm8350_led_platform_data *pdata); /* * Additional DCDC control not supported via regulator API -- cgit v1.2.2 From c835ee7f4154992e6cf0674d7ee136f5d36247a4 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Tue, 6 Jan 2009 21:00:19 +0000 Subject: backlight: Add suspend/resume support to the backlight core Add suspend/resume support to the backlight core and enable use of it by appropriate drivers. Signed-off-by: Richard Purdie --- include/linux/backlight.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 1ee9488ca2e4..79ca2da81c87 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -31,6 +31,10 @@ struct backlight_device; struct fb_info; struct backlight_ops { + unsigned int options; + +#define BL_CORE_SUSPENDRESUME (1 << 0) + /* Notify the backlight driver some property has changed */ int (*update_status)(struct backlight_device *); /* Return the current backlight brightness (accounting for power, @@ -51,7 +55,19 @@ struct backlight_properties { modes; 4: full off), see FB_BLANK_XXX */ int power; /* FB Blanking active? (values as for power) */ + /* Due to be removed, please use (state & BL_CORE_FBBLANK) */ int fb_blank; + /* Flags used to signal drivers of state changes */ + /* Upper 4 bits are reserved for driver internal use */ + unsigned int state; + +#define BL_CORE_SUSPENDED (1 << 0) /* backlight is suspended */ +#define BL_CORE_FBBLANK (1 << 1) /* backlight is under an fb blank event */ +#define BL_CORE_DRIVER4 (1 << 28) /* reserved for driver specific use */ +#define BL_CORE_DRIVER3 (1 << 29) /* reserved for driver specific use */ +#define BL_CORE_DRIVER2 (1 << 30) /* reserved for driver specific use */ +#define BL_CORE_DRIVER1 (1 << 31) /* reserved for driver specific use */ + }; struct backlight_device { -- cgit v1.2.2 From 1107ba885e46964316c083d441d5dd185b6c9e49 Mon Sep 17 00:00:00 2001 From: Alex Zeffertt Date: Wed, 7 Jan 2009 18:07:11 -0800 Subject: xen: add xenfs to allow usermode <-> Xen interaction The xenfs filesystem exports various interfaces to usermode. Initially this exports a file to allow usermode to interact with xenbus/xenstore. Traditionally this appeared in /proc/xen. Rather than extending procfs, this patch adds a backward-compat mountpoint on /proc/xen, and provides a xenfs filesystem which can be mounted there. Signed-off-by: Alex Zeffertt Signed-off-by: Jeremy Fitzhardinge Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/magic.h b/include/linux/magic.h index f7f3fdddbef0..439f6f3cb0c4 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -13,6 +13,7 @@ #define EFS_SUPER_MAGIC 0x414A53 #define EXT2_SUPER_MAGIC 0xEF53 #define EXT3_SUPER_MAGIC 0xEF53 +#define XENFS_SUPER_MAGIC 0xabba1974 #define EXT4_SUPER_MAGIC 0xEF53 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 -- cgit v1.2.2 From 18a82eb9f980b5e02cea651e4ecda26265d98933 Mon Sep 17 00:00:00 2001 From: Pekka J Enberg Date: Wed, 7 Jan 2009 18:07:19 -0800 Subject: ext2: allocate ->s_blockgroup_lock separately As spotted by kmemtrace, struct ext2_sb_info is 17024 bytes on 64-bit which makes it a very bad fit for SLAB allocators. The culprit of the wasted memory is ->s_blockgroup_lock which can be as big as 16 KB when NR_CPUS >= 32. To fix that, allocate ->s_blockgroup_lock, which fits nicely in a order 2 page in the worst case, separately. This shinks down struct ext2_sb_info enough to fit a 1 KB slab cache so now we allocate 16 KB + 1 KB instead of 32 KB saving 15 KB of memory. Acked-by: Andreas Dilger Signed-off-by: Pekka Enberg Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs_sb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h index dc541f3653d1..1cdb66367c98 100644 --- a/include/linux/ext2_fs_sb.h +++ b/include/linux/ext2_fs_sb.h @@ -101,7 +101,7 @@ struct ext2_sb_info { struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; - struct blockgroup_lock s_blockgroup_lock; + struct blockgroup_lock *s_blockgroup_lock; /* root of the per fs reservation window tree */ spinlock_t s_rsv_window_lock; struct rb_root s_rsv_window_root; @@ -111,7 +111,7 @@ struct ext2_sb_info { static inline spinlock_t * sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group) { - return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); + return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); } #endif /* _LINUX_EXT2_FS_SB */ -- cgit v1.2.2 From 0e090f1e05a563cc9acdda442767176bf1616001 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:20 -0800 Subject: ext2: don't inherit inappropriate inode flags from parent At present BTREE/INDEX is the only flag that new ext2 inodes do NOT inherit from their parent. In addition prevent the flags DIRTY, ECOMPR, INDEX, IMAGIC and TOPDIR from being inherited. List inheritable flags explicitly to prevent future flags from accidentally being inherited. This fixes the TOPDIR flag inheritance bug reported at http://bugzilla.kernel.org/show_bug.cgi?id=9866. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index 78c775a83f7c..c3a051819363 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -194,6 +194,13 @@ struct ext2_group_desc #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ #define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ +/* Flags that should be inherited by new inodes from their parent. */ +#define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\ + EXT2_SYNC_FL | EXT2_IMMUTABLE_FL | EXT2_APPEND_FL |\ + EXT2_NODUMP_FL | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL|\ + EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\ + EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL) + /* * ioctl commands */ -- cgit v1.2.2 From ef8b646183868b2d042fa6cde0eef2a31263ff85 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:21 -0800 Subject: ext2: tighten restrictions on inode flags At the moment there are few restrictions on which flags may be set on which inodes. Specifically DIRSYNC may only be set on directories and IMMUTABLE and APPEND may not be set on links. Tighten that to disallow TOPDIR being set on non-directories and only NODUMP and NOATIME to be set on non-regular file, non-directories. Introduces a flags masking function which masks flags based on mode and use it during inode creation and when flags are set via the ioctl to facilitate future consistency. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index c3a051819363..121720d74e15 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -201,6 +201,23 @@ struct ext2_group_desc EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\ EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL) +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define EXT2_REG_FLMASK (~(EXT2_DIRSYNC_FL | EXT2_TOPDIR_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define EXT2_OTHER_FLMASK (EXT2_NODUMP_FL | EXT2_NOATIME_FL) + +/* Mask out flags that are inappropriate for the given type of inode. */ +static inline __u32 ext2_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & EXT2_REG_FLMASK; + else + return flags & EXT2_OTHER_FLMASK; +} + /* * ioctl commands */ -- cgit v1.2.2 From f420d4dc4272fd223986762df2ad06056ddebada Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 7 Jan 2009 18:07:24 -0800 Subject: jbd: improve fsync batching There is a flaw with the way jbd handles fsync batching. If we fsync() a file and we were not the last person to run fsync() on this fs then we automatically sleep for 1 jiffie in order to wait for new writers to join into the transaction before forcing the commit. The problem with this is that with really fast storage (ie a Clariion) the time it takes to commit a transaction to disk is way faster than 1 jiffie in most cases, so sleeping means waiting longer with nothing to do than if we just committed the transaction and kept going. Ric Wheeler noticed this when using fs_mark with more than 1 thread, the throughput would plummet as he added more threads. This patch attempts to fix this problem by recording the average time in nanoseconds that it takes to commit a transaction to disk, and what time we started the transaction. If we run an fsync() and we have been running for less time than it takes to commit the transaction to disk, we sleep for the delta amount of time and then commit to disk. We acheive sub-jiffie sleeping using schedule_hrtimeout. This means that the wait time is auto-tuned to the speed of the underlying disk, instead of having this static timeout. I weighted the average according to somebody's comments (Andreas Dilger I think) in order to help normalize random outliers where we take way longer or way less time to commit than the average. I also have a min() check in there to make sure we don't sleep longer than a jiffie in case our storage is super slow, this was requested by Andrew. I unfortunately do not have access to a Clariion, so I had to use a ramdisk to represent a super fast array. I tested with a SATA drive with barrier=1 to make sure there was no regression with local disks, I tested with a 4 way multipathed Apple Xserve RAID array and of course the ramdisk. I ran the following command fs_mark -d /mnt/ext3-test -s 4096 -n 2000 -D 64 -t $i where $i was 2, 4, 8, 16 and 32. I mkfs'ed the fs each time. Here are my results type threads with patch without patch sata 2 24.6 26.3 sata 4 49.2 48.1 sata 8 70.1 67.0 sata 16 104.0 94.1 sata 32 153.6 142.7 xserve 2 246.4 222.0 xserve 4 480.0 440.8 xserve 8 829.5 730.8 xserve 16 1172.7 1026.9 xserve 32 1816.3 1650.5 ramdisk 2 2538.3 1745.6 ramdisk 4 2942.3 661.9 ramdisk 8 2882.5 999.8 ramdisk 16 2738.7 1801.9 ramdisk 32 2541.9 2394.0 Signed-off-by: Josef Bacik Cc: Andreas Dilger Cc: Arjan van de Ven Cc: Ric Wheeler Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/jbd.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 346e2b80be7d..6384b19efe64 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -542,6 +542,11 @@ struct transaction_s */ unsigned long t_expires; + /* + * When this transaction started, in nanoseconds [no locking] + */ + ktime_t t_start_time; + /* * How many handles used this transaction? [t_handle_lock] */ @@ -798,8 +803,18 @@ struct journal_s struct buffer_head **j_wbuf; int j_wbufsize; + /* + * this is the pid of the last person to run a synchronous operation + * through the journal. + */ pid_t j_last_sync_writer; + /* + * the average amount of time in nanoseconds it takes to commit a + * transaction to the disk. [j_state_lock] + */ + u64 j_average_commit_time; + /* * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here -- cgit v1.2.2 From 5df096d67ec2b6578518caed7d57317a4b807aa1 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 7 Jan 2009 18:07:25 -0800 Subject: ext3: allocate ->s_blockgroup_lock separately As spotted by kmemtrace, struct ext3_sb_info is 17152 bytes on 64-bit which makes it a very bad fit for SLAB allocators. The culprit of the wasted memory is ->s_blockgroup_lock which can be as big as 16 KB when NR_CPUS >= 32. To fix that, allocate ->s_blockgroup_lock, which fits nicely in a order 2 page in the worst case, separately. This shinks down struct ext3_sb_info enough to fit a 1 KB slab cache so now we allocate 16 KB + 1 KB instead of 32 KB saving 15 KB of memory. Acked-by: Andreas Dilger Signed-off-by: Pekka Enberg Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs_sb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h index e024e38248ff..76fdc0f4b028 100644 --- a/include/linux/ext3_fs_sb.h +++ b/include/linux/ext3_fs_sb.h @@ -60,7 +60,7 @@ struct ext3_sb_info { struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; - struct blockgroup_lock s_blockgroup_lock; + struct blockgroup_lock *s_blockgroup_lock; /* root of the per fs reservation window tree */ spinlock_t s_rsv_window_lock; @@ -86,7 +86,7 @@ struct ext3_sb_info { static inline spinlock_t * sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group) { - return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); + return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); } #endif /* _LINUX_EXT3_FS_SB */ -- cgit v1.2.2 From 2e8671cb566da993425d324fc355af31edc6e7f1 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:26 -0800 Subject: ext3: don't inherit inappropriate inode flags from parent At present INDEX is the only flag that new ext3 inodes do NOT inherit from their parent. In addition prevent the flags DIRTY, ECOMPR, IMAGIC and TOPDIR from being inherited. List inheritable flags explicitly to prevent future flags from accidentally being inherited. This fixes the TOPDIR flag inheritance bug reported at http://bugzilla.kernel.org/show_bug.cgi?id=9866. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index d14f02918483..b745619a9b8e 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -178,6 +178,13 @@ struct ext3_group_desc #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +/* Flags that should be inherited by new inodes from their parent. */ +#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\ + EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\ + EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\ + EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\ + EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL) + /* * Inode dynamic state flags */ -- cgit v1.2.2 From 04143e2fb9d512c21e1dcfb561dbb0445dcfdc8c Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:26 -0800 Subject: ext3: tighten restrictions on inode flags At the moment there are few restrictions on which flags may be set on which inodes. Specifically DIRSYNC may only be set on directories and IMMUTABLE and APPEND may not be set on links. Tighten that to disallow TOPDIR being set on non-directories and only NODUMP and NOATIME to be set on non-regular file, non-directories. Introduces a flags masking function which masks flags based on mode and use it during inode creation and when flags are set via the ioctl to facilitate future consistency. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index b745619a9b8e..d76800f6ecf0 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -185,6 +185,23 @@ struct ext3_group_desc EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\ EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL) +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL) + +/* Mask out flags that are inappropriate for the given type of inode. */ +static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & EXT3_REG_FLMASK; + else + return flags & EXT3_OTHER_FLMASK; +} + /* * Inode dynamic state flags */ -- cgit v1.2.2 From b2aa30f7bb381e04c93eed106089ba55553955f1 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 7 Jan 2009 18:07:37 -0800 Subject: cgroups: don't put struct cgroupfs_root protected by RCU We don't access struct cgroupfs_root in fast path, so we should not put struct cgroupfs_root protected by RCU But the comment in struct cgroup_subsys.root confuse us. struct cgroup_subsys.root is used in these places: 1 find_css_set(): if (ss->root->subsys_list.next == &ss->sibling) 2 rebind_subsystems(): if (ss->root != &rootnode) rcu_assign_pointer(ss->root, root); rcu_assign_pointer(subsys[i]->root, &rootnode); 3 cgroup_has_css_refs(): if (ss->root != cgrp->root) 4 cgroup_init_subsys(): ss->root = &rootnode; 5 proc_cgroupstats_show(): ss->name, ss->root->subsys_bits, ss->root->number_of_cgroups, !ss->disabled); 6 cgroup_clone(): root = subsys->root; if ((root != subsys->root) || All these place we have held cgroup_lock() or we don't dereference to struct cgroupfs_root. It's means wo don't need RCU when use struct cgroup_subsys.root, and we should not put struct cgroupfs_root protected by RCU. Signed-off-by: Lai Jiangshan Reviewed-by: Paul Menage Cc: KAMEZAWA Hiroyuki Cc: Pavel Emelyanov Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 08b78c09b09a..f68dfd8dd53a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -337,7 +337,6 @@ struct cgroup_subsys { #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; - /* Protected by RCU */ struct cgroupfs_root *root; struct list_head sibling; -- cgit v1.2.2 From a47295e6bc42ad35f9c15ac66f598aa24debd4e2 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 7 Jan 2009 18:07:44 -0800 Subject: cgroups: make cgroup_path() RCU-safe Fix races between /proc/sched_debug by freeing cgroup objects via an RCU callback. Thus any cgroup reference obtained from an RCU-safe source will remain valid during the RCU section. Since dentries are also RCU-safe, this allows us to traverse up the tree safely. Additionally, make cgroup_path() check for a NULL cgrp->dentry to avoid trying to report a path for a partially-created cgroup. [lizf@cn.fujitsu.com: call deactive_super() in cgroup_diput()] Signed-off-by: Paul Menage Reviewed-by: Li Zefan Tested-by: Li Zefan Cc: Peter Zijlstra Signed-off-by: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f68dfd8dd53a..73d1c730c3c4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -116,7 +116,7 @@ struct cgroup { struct list_head children; /* my children */ struct cgroup *parent; /* my parent */ - struct dentry *dentry; /* cgroup fs entry */ + struct dentry *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; @@ -145,6 +145,9 @@ struct cgroup { int pids_use_count; /* Length of the current tasks_pids array */ int pids_length; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; }; /* A css_set is a structure holding pointers to a set of -- cgit v1.2.2 From 7a81b88cb53e335ff7d019e6398c95792c817d93 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:48 -0800 Subject: memcg: introduce charge-commit-cancel style of functions There is a small race in do_swap_page(). When the page swapped-in is charged, the mapcount can be greater than 0. But, at the same time some process (shares it ) call unmap and make mapcount 1->0 and the page is uncharged. CPUA CPUB mapcount == 1. (1) charge if mapcount==0 zap_pte_range() (2) mapcount 1 => 0. (3) uncharge(). (success) (4) set page's rmap() mapcount 0=>1 Then, this swap page's account is leaked. For fixing this, I added a new interface. - charge account to res_counter by PAGE_SIZE and try to free pages if necessary. - commit register page_cgroup and add to LRU if necessary. - cancel uncharge PAGE_SIZE because of do_swap_page failure. CPUA (1) charge (always) (2) set page's rmap (mapcount > 0) (3) commit charge was necessary or not after set_pte(). This protocol uses PCG_USED bit on page_cgroup for avoiding over accounting. Usual mem_cgroup_charge_common() does charge -> commit at a time. And this patch also adds following function to clarify all charges. - mem_cgroup_newpage_charge() ....replacement for mem_cgroup_charge() called against newly allocated anon pages. - mem_cgroup_charge_migrate_fixup() called only from remove_migration_ptes(). we'll have to rewrite this later.(this patch just keeps old behavior) This function will be removed by additional patch to make migration clearer. Good for clarifying "what we do" Then, we have 4 following charge points. - newpage - swap-in - add-to-cache. - migration. [akpm@linux-foundation.org: add missing inline directives to stubs] Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Daisuke Nishimura Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1fbe14d39521..c592f315cd02 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -27,8 +27,17 @@ struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, +extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +extern int mem_cgroup_charge_migrate_fixup(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask); +/* for swap handling */ +extern int mem_cgroup_try_charge(struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **ptr); +extern void mem_cgroup_commit_charge_swapin(struct page *page, + struct mem_cgroup *ptr); +extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); + extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); @@ -71,7 +80,9 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, #else /* CONFIG_CGROUP_MEM_RES_CTLR */ -static inline int mem_cgroup_charge(struct page *page, +struct mem_cgroup; + +static inline int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return 0; @@ -83,6 +94,27 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } +static inline int mem_cgroup_charge_migrate_fixup(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) +{ + return 0; +} + +static inline int mem_cgroup_try_charge(struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **ptr) +{ + return 0; +} + +static inline void mem_cgroup_commit_charge_swapin(struct page *page, + struct mem_cgroup *ptr) +{ +} + +static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr) +{ +} + static inline void mem_cgroup_uncharge_page(struct page *page) { } -- cgit v1.2.2 From 01b1ae63c2270cbacfd43fea94578c17950eb548 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:50 -0800 Subject: memcg: simple migration handling Now, management of "charge" under page migration is done under following manner. (Assume migrate page contents from oldpage to newpage) before - "newpage" is charged before migration. at success. - "oldpage" is uncharged at somewhere(unmap, radix-tree-replace) at failure - "newpage" is uncharged. - "oldpage" is charged if necessary (*1) But (*1) is not reliable....because of GFP_ATOMIC. This patch tries to change behavior as following by charge/commit/cancel ops. before - charge PAGE_SIZE (no target page) success - commit charge against "newpage". failure - commit charge against "oldpage". (PCG_USED bit works effectively to avoid double-counting) - if "oldpage" is obsolete, cancel charge of PAGE_SIZE. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Daisuke Nishimura Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c592f315cd02..b095f5f6ecf7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -29,8 +29,6 @@ struct mm_struct; extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -extern int mem_cgroup_charge_migrate_fixup(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask); /* for swap handling */ extern int mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **ptr); @@ -60,8 +58,9 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); ((cgroup) == mem_cgroup_from_task((mm)->owner)) extern int -mem_cgroup_prepare_migration(struct page *page, struct page *newpage); -extern void mem_cgroup_end_migration(struct page *page); +mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr); +extern void mem_cgroup_end_migration(struct mem_cgroup *mem, + struct page *oldpage, struct page *newpage); /* * For memory reclaim. @@ -94,12 +93,6 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } -static inline int mem_cgroup_charge_migrate_fixup(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask) -{ - return 0; -} - static inline int mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **ptr) { @@ -144,12 +137,14 @@ static inline int task_in_mem_cgroup(struct task_struct *task, } static inline int -mem_cgroup_prepare_migration(struct page *page, struct page *newpage) +mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) { return 0; } -static inline void mem_cgroup_end_migration(struct page *page) +static inline void mem_cgroup_end_migration(struct mem_cgroup *mem, + struct page *oldpage, + struct page *newpage) { } -- cgit v1.2.2 From d13d144309d2e5a3e6ad978b16c1d0226ddc9231 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:56 -0800 Subject: memcg: handle swap caches SwapCache support for memory resource controller (memcg) Before mem+swap controller, memcg itself should handle SwapCache in proper way. This is cut-out from it. In current memcg, SwapCache is just leaked and the user can create tons of SwapCache. This is a leak of account and should be handled. SwapCache accounting is done as following. charge (anon) - charged when it's mapped. (because of readahead, charge at add_to_swap_cache() is not sane) uncharge (anon) - uncharged when it's dropped from swapcache and fully unmapped. means it's not uncharged at unmap. Note: delete from swap cache at swap-in is done after rmap information is established. charge (shmem) - charged at swap-in. this prevents charge at add_to_page_cache(). uncharge (shmem) - uncharged when it's dropped from swapcache and not on shmem's radix-tree. at migration, check against 'old page' is modified to handle shmem. Comparing to the old version discussed (and caused troubles), we have advantages of - PCG_USED bit. - simple migrating handling. So, situation is much easier than several months ago, maybe. [hugh@veritas.com: memcg: handle swap caches build fix] Reviewed-by: Daisuke Nishimura Tested-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 91dee50fe260..f8f3907533f0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -333,6 +333,22 @@ static inline void disable_swap_token(void) put_swap_token(swap_token_mm); } +#ifdef CONFIG_CGROUP_MEM_RES_CTLR +extern int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked); +extern void mem_cgroup_uncharge_swapcache(struct page *page); +#else +static inline +int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked) +{ + return 0; +} +static inline void mem_cgroup_uncharge_swapcache(struct page *page) +{ +} +#endif + #else /* CONFIG_SWAP */ #define nr_swap_pages 0L @@ -409,6 +425,12 @@ static inline swp_entry_t get_swap_page(void) #define has_swap_token(x) 0 #define disable_swap_token() do { } while(0) +static inline int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked) +{ + return 0; +} + #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ -- cgit v1.2.2 From c077719be8e9e6b55702117513d1b5f41d80404a Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:57 -0800 Subject: memcg: mem+swap controller Kconfig Config and control variable for mem+swap controller. This patch adds CONFIG_CGROUP_MEM_RES_CTLR_SWAP (memory resource controller swap extension.) For accounting swap, it's obvious that we have to use additional memory to remember "who uses swap". This adds more overhead. So, it's better to offer "choice" to users. This patch adds 2 choices. This patch adds 2 parameters to enable swap extension or not. - CONFIG - boot option Reviewed-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b095f5f6ecf7..41b46cc9d1f1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -77,6 +77,9 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +extern int do_swap_account; +#endif #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; -- cgit v1.2.2 From 27a7faa0779dd13729196c1a818c294f44bbd1ee Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:58 -0800 Subject: memcg: swap cgroup for remembering usage For accounting swap, we need a record per swap entry, at least. This patch adds following function. - swap_cgroup_swapon() .... called from swapon - swap_cgroup_swapoff() ... called at the end of swapoff. - swap_cgroup_record() .... record information of swap entry. - swap_cgroup_lookup() .... lookup information of swap entry. This patch just implements "how to record information". No actual method for limit the usage of swap. These routine uses flat table to record and lookup. "wise" lookup system like radix-tree requires requires memory allocation at new records but swap-out is usually called under memory shortage (or memcg hits limit.) So, I used static allocation. (maybe dynamic allocation is not very hard but it adds additional memory allocation in memory shortage path.) Note1: In this, we use pointer to record information and this means 8bytes per swap entry. I think we can reduce this when we create "id of cgroup" in the range of 0-65535 or 0-255. Reported-by: Daisuke Nishimura Reviewed-by: Daisuke Nishimura Tested-by: Daisuke Nishimura Reported-by: Hugh Dickins Reported-by: Balbir Singh Reported-by: Andrew Morton Signed-off-by: KAMEZAWA Hiroyuki Cc: Pavel Emelianov Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 1e6d34bfa094..d754b2dfbf2d 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -104,5 +104,40 @@ static inline void page_cgroup_init(void) { } +#endif + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#include +extern struct mem_cgroup * +swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem); +extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent); +extern int swap_cgroup_swapon(int type, unsigned long max_pages); +extern void swap_cgroup_swapoff(int type); +#else +#include + +static inline +struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) +{ + return NULL; +} + +static inline +struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) +{ + return NULL; +} + +static inline int +swap_cgroup_swapon(int type, unsigned long max_pages) +{ + return 0; +} + +static inline void swap_cgroup_swapoff(int type) +{ + return; +} + #endif #endif -- cgit v1.2.2 From 8c7c6e34a1256a5082d38c8e9bd1474476912715 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:00 -0800 Subject: memcg: mem+swap controller core This patch implements per cgroup limit for usage of memory+swap. However there are SwapCache, double counting of swap-cache and swap-entry is avoided. Mem+Swap controller works as following. - memory usage is limited by memory.limit_in_bytes. - memory + swap usage is limited by memory.memsw_limit_in_bytes. This has following benefits. - A user can limit total resource usage of mem+swap. Without this, because memory resource controller doesn't take care of usage of swap, a process can exhaust all the swap (by memory leak.) We can avoid this case. And Swap is shared resource but it cannot be reclaimed (goes back to memory) until it's used. This characteristic can be trouble when the memory is divided into some parts by cpuset or memcg. Assume group A and group B. After some application executes, the system can be.. Group A -- very large free memory space but occupy 99% of swap. Group B -- under memory shortage but cannot use swap...it's nearly full. Ability to set appropriate swap limit for each group is required. Maybe someone wonder "why not swap but mem+swap ?" - The global LRU(kswapd) can swap out arbitrary pages. Swap-out means to move account from memory to swap...there is no change in usage of mem+swap. In other words, when we want to limit the usage of swap without affecting global LRU, mem+swap limit is better than just limiting swap. Accounting target information is stored in swap_cgroup which is per swap entry record. Charge is done as following. map - charge page and memsw. unmap - uncharge page/memsw if not SwapCache. swap-out (__delete_from_swap_cache) - uncharge page - record mem_cgroup information to swap_cgroup. swap-in (do_swap_page) - charged as page and memsw. record in swap_cgroup is cleared. memsw accounting is decremented. swap-free (swap_free()) - if swap entry is freed, memsw is uncharged by PAGE_SIZE. There are people work under never-swap environments and consider swap as something bad. For such people, this mem+swap controller extension is just an overhead. This overhead is avoided by config or boot option. (see Kconfig. detail is not in this patch.) TODO: - maybe more optimization can be don in swap-in path. (but not very safe.) But we just do simple accounting at this stage. [nishimura@mxp.nes.nec.co.jp: make resize limit hold mutex] [hugh@veritas.com: memswap controller core swapcache fixes] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Daisuke Nishimura Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++-- include/linux/swap.h | 14 +++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 41b46cc9d1f1..ca51ac72d6c0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -32,6 +32,8 @@ extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, /* for swap handling */ extern int mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **ptr); +extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, + struct page *page, gfp_t mask, struct mem_cgroup **ptr); extern void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr); extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); @@ -80,7 +82,6 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; #endif - #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -97,7 +98,13 @@ static inline int mem_cgroup_cache_charge(struct page *page, } static inline int mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **ptr) + gfp_t gfp_mask, struct mem_cgroup **ptr) +{ + return 0; +} + +static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm, + struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr) { return 0; } diff --git a/include/linux/swap.h b/include/linux/swap.h index f8f3907533f0..be938ce4895a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -214,7 +214,7 @@ static inline void lru_cache_add_active_file(struct page *page) extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, - gfp_t gfp_mask); + gfp_t gfp_mask, bool noswap); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; @@ -336,7 +336,7 @@ static inline void disable_swap_token(void) #ifdef CONFIG_CGROUP_MEM_RES_CTLR extern int mem_cgroup_cache_charge_swapin(struct page *page, struct mm_struct *mm, gfp_t mask, bool locked); -extern void mem_cgroup_uncharge_swapcache(struct page *page); +extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent); #else static inline int mem_cgroup_cache_charge_swapin(struct page *page, @@ -344,7 +344,15 @@ int mem_cgroup_cache_charge_swapin(struct page *page, { return 0; } -static inline void mem_cgroup_uncharge_swapcache(struct page *page) +static inline void +mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) +{ +} +#endif +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +extern void mem_cgroup_uncharge_swap(swp_entry_t ent); +#else +static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) { } #endif -- cgit v1.2.2 From 08e552c69c6930d64722de3ec18c51844d06ee28 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:01 -0800 Subject: memcg: synchronized LRU A big patch for changing memcg's LRU semantics. Now, - page_cgroup is linked to mem_cgroup's its own LRU (per zone). - LRU of page_cgroup is not synchronous with global LRU. - page and page_cgroup is one-to-one and statically allocated. - To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as - lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc); - SwapCache is handled. And, when we handle LRU list of page_cgroup, we do following. pc = lookup_page_cgroup(page); lock_page_cgroup(pc); .....................(1) mz = page_cgroup_zoneinfo(pc); spin_lock(&mz->lru_lock); .....add to LRU spin_unlock(&mz->lru_lock); unlock_page_cgroup(pc); But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock. So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct. This is a trial to remove this dirty nesting of locks. This patch changes mz->lru_lock to be zone->lru_lock. Then, above sequence will be written as spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU mem_cgroup_add/remove/etc_lru() { pc = lookup_page_cgroup(page); mz = page_cgroup_zoneinfo(pc); if (PageCgroupUsed(pc)) { ....add to LRU } spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU This is much simpler. (*) We're safe even if we don't take lock_page_cgroup(pc). Because.. 1. When pc->mem_cgroup can be modified. - at charge. - at account_move(). 2. at charge the PCG_USED bit is not set before pc->mem_cgroup is fixed. 3. at account_move() the page is isolated and not on LRU. Pros. - easy for maintenance. - memcg can make use of laziness of pagevec. - we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup. - LRU status of memcg will be synchronized with global LRU's one. - # of locks are reduced. - account_move() is simplified very much. Cons. - may increase cost of LRU rotation. (no impact if memcg is not configured.) Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 29 +++++++++++++++++++++++++++-- include/linux/mm_inline.h | 3 +++ include/linux/page_cgroup.h | 17 ----------------- 3 files changed, 30 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ca51ac72d6c0..32c07b1852d6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -40,7 +40,12 @@ extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); +extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru); +extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru); +extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru); +extern void mem_cgroup_del_lru(struct page *page); +extern void mem_cgroup_move_lists(struct page *page, + enum lru_list from, enum lru_list to); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); @@ -131,7 +136,27 @@ static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) return 0; } -static inline void mem_cgroup_move_lists(struct page *page, bool active) +static inline void mem_cgroup_add_lru_list(struct page *page, int lru) +{ +} + +static inline void mem_cgroup_del_lru_list(struct page *page, int lru) +{ + return ; +} + +static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru) +{ + return ; +} + +static inline void mem_cgroup_del_lru(struct page *page) +{ + return ; +} + +static inline void +mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to) { } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index c948350c378e..37ef13d0f01e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -28,6 +28,7 @@ add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) { list_add(&page->lru, &zone->lru[l].list); __inc_zone_state(zone, NR_LRU_BASE + l); + mem_cgroup_add_lru_list(page, l); } static inline void @@ -35,6 +36,7 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) { list_del(&page->lru); __dec_zone_state(zone, NR_LRU_BASE + l); + mem_cgroup_del_lru_list(page, l); } static inline void @@ -54,6 +56,7 @@ del_page_from_lru(struct zone *zone, struct page *page) l += page_is_file_cache(page); } __dec_zone_state(zone, NR_LRU_BASE + l); + mem_cgroup_del_lru_list(page, l); } /** diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index d754b2dfbf2d..602cc1fdee90 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -26,10 +26,6 @@ enum { PCG_LOCK, /* page cgroup is locked */ PCG_CACHE, /* charged as cache */ PCG_USED, /* this object is in use. */ - /* flags for LRU placement */ - PCG_ACTIVE, /* page is active in this cgroup */ - PCG_FILE, /* page is file system backed */ - PCG_UNEVICTABLE, /* page is unevictableable */ }; #define TESTPCGFLAG(uname, lname) \ @@ -50,19 +46,6 @@ TESTPCGFLAG(Cache, CACHE) TESTPCGFLAG(Used, USED) CLEARPCGFLAG(Used, USED) -/* LRU management flags (from global-lru definition) */ -TESTPCGFLAG(File, FILE) -SETPCGFLAG(File, FILE) -CLEARPCGFLAG(File, FILE) - -TESTPCGFLAG(Active, ACTIVE) -SETPCGFLAG(Active, ACTIVE) -CLEARPCGFLAG(Active, ACTIVE) - -TESTPCGFLAG(Unevictable, UNEVICTABLE) -SETPCGFLAG(Unevictable, UNEVICTABLE) -CLEARPCGFLAG(Unevictable, UNEVICTABLE) - static inline int page_cgroup_nid(struct page_cgroup *pc) { return page_to_nid(pc->page); -- cgit v1.2.2 From f8d665422603ee1b8ed04dcad4242f14d623c941 Mon Sep 17 00:00:00 2001 From: Hirokazu Takahashi Date: Wed, 7 Jan 2009 18:08:02 -0800 Subject: memcg: add mem_cgroup_disabled() We check mem_cgroup is disabled or not by checking mem_cgroup_subsys.disabled. I think it has more references than expected, now. replacing if (mem_cgroup_subsys.disabled) with if (mem_cgroup_disabled()) give us good look, I think. [kamezawa.hiroyu@jp.fujitsu.com: fix typo] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 32c07b1852d6..472efd09118c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -19,7 +19,7 @@ #ifndef _LINUX_MEMCONTROL_H #define _LINUX_MEMCONTROL_H - +#include struct mem_cgroup; struct page_cgroup; struct page; @@ -87,6 +87,14 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; #endif + +static inline bool mem_cgroup_disabled(void) +{ + if (mem_cgroup_subsys.disabled) + return true; + return false; +} + #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -214,6 +222,11 @@ static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, { return 0; } + +static inline bool mem_cgroup_disabled(void) +{ + return true; +} #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.2 From 28dbc4b6a01fb579a9441c7b81e3d3413dc452df Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 7 Jan 2009 18:08:05 -0800 Subject: memcg: memory cgroup resource counters for hierarchy Add support for building hierarchies in resource counters. Cgroups allows us to build a deep hierarchy, but we currently don't link the resource counters belonging to the memory controller control groups, in the same fashion as the corresponding cgroup entries in the cgroup hierarchy. This patch provides the infrastructure for resource counters that have the same hiearchy as their cgroup counter parts. These set of patches are based on the resource counter hiearchy patches posted by Pavel Emelianov. NOTE: Building hiearchies is expensive, deeper hierarchies imply charging the all the way up to the root. It is known that hiearchies are expensive, so the user needs to be careful and aware of the trade-offs before creating very deep ones. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Balbir Singh Cc: YAMAMOTO Takashi Cc: Paul Menage Cc: Li Zefan Cc: David Rientjes Cc: Pavel Emelianov Cc: Dhaval Giani Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 271c1c2c9f6f..dede0a2cfc45 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -43,6 +43,10 @@ struct res_counter { * the routines below consider this to be IRQ-safe */ spinlock_t lock; + /* + * Parent counter, used for hierarchial resource accounting + */ + struct res_counter *parent; }; /** @@ -87,7 +91,7 @@ enum { * helpers for accounting */ -void res_counter_init(struct res_counter *counter); +void res_counter_init(struct res_counter *counter, struct res_counter *parent); /* * charge - try to consume more resource. @@ -103,7 +107,7 @@ void res_counter_init(struct res_counter *counter); int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val); int __must_check res_counter_charge(struct res_counter *counter, - unsigned long val); + unsigned long val, struct res_counter **limit_fail_at); /* * uncharge - tell that some portion of the resource is released -- cgit v1.2.2 From 2e4d40915fb85207fe48cfc31201824ec6d7426e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 7 Jan 2009 18:08:07 -0800 Subject: memcontrol: rcu_read_lock() to protect mm_match_cgroup() mm_match_cgroup() calls cgroup_subsys_state(). We must use rcu_read_lock() to protect cgroup_subsys_state(). Signed-off-by: Lai Jiangshan Cc: Paul Menage Reviewed-by: KAMEZAWA Hiroyuki Cc: Pavel Emelyanov Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 472efd09118c..2de6504e01fb 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -61,8 +61,15 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); -#define mm_match_cgroup(mm, cgroup) \ - ((cgroup) == mem_cgroup_from_task((mm)->owner)) +static inline +int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) +{ + struct mem_cgroup *mem; + rcu_read_lock(); + mem = mem_cgroup_from_task((mm)->owner); + rcu_read_unlock(); + return cgroup == mem; +} extern int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr); -- cgit v1.2.2 From a636b327f731143ccc544b966cfd8de6cb6d72c6 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:08 -0800 Subject: memcg: avoid unnecessary system-wide-oom-killer Current mmtom has new oom function as pagefault_out_of_memory(). It's added for select bad process rathar than killing current. When memcg hit limit and calls OOM at page_fault, this handler called and system-wide-oom handling happens. (means kernel panics if panic_on_oom is true....) To avoid overkill, check memcg's recent behavior before starting system-wide-oom. And this patch also fixes to guarantee "don't accnout against process with TIF_MEMDIE". This is necessary for smooth OOM. [akpm@linux-foundation.org: build fix] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Badari Pulavarty Cc: Jan Blunck Cc: Hirokazu Takahashi Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2de6504e01fb..2fdd1380bf0a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -102,6 +102,8 @@ static inline bool mem_cgroup_disabled(void) return false; } +extern bool mem_cgroup_oom_called(struct task_struct *task); + #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -234,6 +236,11 @@ static inline bool mem_cgroup_disabled(void) { return true; } + +static inline bool mem_cgroup_oom_called(struct task_struct *task) +{ + return false; +} #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.2 From 2c26fdd70c3094fa3e84caf9ef434911933d5477 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:10 -0800 Subject: memcg: revert gfp mask fix My patch, memcg-fix-gfp_mask-of-callers-of-charge.patch changed gfp_mask of callers of charge to be GFP_HIGHUSER_MOVABLE for showing what will happen at memory reclaim. But in recent discussion, it's NACKed because it sounds ugly. This patch is for reverting it and add some clean up to gfp_mask of callers of charge. No behavior change but need review before generating HUNK in deep queue. This patch also adds explanation to meaning of gfp_mask passed to charge functions in memcontrol.h. Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2fdd1380bf0a..59ac95a64508 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -26,6 +26,16 @@ struct page; struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR +/* + * All "charge" functions with gfp_mask should use GFP_KERNEL or + * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't + * alloc memory but reclaims memory from all available zones. So, "where I want + * memory from" bits of gfp_mask has no meaning. So any bits of that field is + * available but adding a rule is better. charge functions' gfp_mask should + * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous + * codes. + * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.) + */ extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -- cgit v1.2.2 From f89eb90e33fd4e4e0cc1a6d20afd63c5a561885a Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:14 -0800 Subject: inactive_anon_is_low: move to vmscan The inactive_anon_is_low() is called only vmscan. Then it can move to vmscan.c This patch doesn't have any functional change. Reviewd-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 37ef13d0f01e..7fbb97267556 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -81,23 +81,4 @@ static inline enum lru_list page_lru(struct page *page) return lru; } -/** - * inactive_anon_is_low - check if anonymous pages need to be deactivated - * @zone: zone to check - * - * Returns true if the zone does not have enough inactive anon pages, - * meaning some active anon pages need to be deactivated. - */ -static inline int inactive_anon_is_low(struct zone *zone) -{ - unsigned long active, inactive; - - active = zone_page_state(zone, NR_ACTIVE_ANON); - inactive = zone_page_state(zone, NR_INACTIVE_ANON); - - if (inactive * zone->inactive_ratio < active) - return 1; - - return 0; -} #endif -- cgit v1.2.2 From 6e9015716ae9b59e9635d692fddfcfb9582c146c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:15 -0800 Subject: mm: introduce zone_reclaim struct Add zone_reclam_stat struct for later enhancement. A later patch uses this. This patch doesn't any behavior change (yet). Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 35a7b5e19465..09c14e213b63 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -263,6 +263,19 @@ enum zone_type { #error ZONES_SHIFT -- too many zones configured adjust calculation #endif +struct zone_reclaim_stat { + /* + * The pageout code in vmscan.c keeps track of how many of the + * mem/swap backed and file backed pages are refeferenced. + * The higher the rotated/scanned ratio, the more valuable + * that cache is. + * + * The anon LRU stats live in [0], file LRU stats in [1] + */ + unsigned long recent_rotated[2]; + unsigned long recent_scanned[2]; +}; + struct zone { /* Fields commonly accessed by the page allocator */ unsigned long pages_min, pages_low, pages_high; @@ -315,16 +328,7 @@ struct zone { unsigned long nr_scan; } lru[NR_LRU_LISTS]; - /* - * The pageout code in vmscan.c keeps track of how many of the - * mem/swap backed and file backed pages are refeferenced. - * The higher the rotated/scanned ratio, the more valuable - * that cache is. - * - * The anon LRU stats live in [0], file LRU stats in [1] - */ - unsigned long recent_rotated[2]; - unsigned long recent_scanned[2]; + struct zone_reclaim_stat reclaim_stat; unsigned long pages_scanned; /* since last reclaim */ unsigned long flags; /* zone flags, see below */ -- cgit v1.2.2 From 14797e2363c2b2f1ce139fd1c5a215e4e05aa1d9 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:18 -0800 Subject: memcg: add inactive_anon_is_low() The inactive_anon_is_low() is key component of active/inactive anon balancing on reclaim. However current inactive_anon_is_low() function only consider global reclaim. Therefore, we need following ugly scan_global_lru() condition. if (lru == LRU_ACTIVE_ANON && (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { shrink_active_list(nr_to_scan, zone, sc, priority, file); return 0; it cause that memcg reclaim always deactivate pages when shrink_list() is called. To make mem_cgroup_inactive_anon_is_low() improve active/inactive anon balancing of memcgroup. Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Cyrill Gorcunov Cc: "Pekka Enberg" Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 59ac95a64508..aad9377c9828 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -100,6 +100,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, + struct zone *zone); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -251,6 +253,13 @@ static inline bool mem_cgroup_oom_called(struct task_struct *task) { return false; } + +static inline int +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +{ + return 1; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.2 From a3d8e0549d913e30968fa02e505dfe02c0a23e0d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:19 -0800 Subject: memcg: add mem_cgroup_zone_nr_pages() Introduce mem_cgroup_zone_nr_pages(). It is called by zone_nr_pages() helper function. This patch doesn't have any behavior change. Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Acked-by: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index aad9377c9828..b1defd6a2783 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -102,6 +102,9 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone); +unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, + struct zone *zone, + enum lru_list lru); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -260,6 +263,14 @@ mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) return 1; } +static inline unsigned long +mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, + enum lru_list lru) +{ + return 0; +} + + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.2 From 3e2f41f1f64744f7942980d93cc93dd3e5924560 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:20 -0800 Subject: memcg: add zone_reclaim_stat Introduce mem_cgroup_per_zone::reclaim_stat member and its statics collecting function. Now, get_scan_ratio() can calculate correct value on memcg reclaim. [hugh@veritas.com: avoid reclaim_stat oops when disabled] Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b1defd6a2783..36b8ebb39b82 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -105,6 +105,10 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, enum lru_list lru); +struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, + struct zone *zone); +struct zone_reclaim_stat* +mem_cgroup_get_reclaim_stat_from_page(struct page *page); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -271,6 +275,18 @@ mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, } +static inline struct zone_reclaim_stat* +mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone) +{ + return NULL; +} + +static inline struct zone_reclaim_stat* +mem_cgroup_get_reclaim_stat_from_page(struct page *page) +{ + return NULL; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.2 From 9439c1c95b5c25b8031b2a7eb7e1590eb84be7f5 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:21 -0800 Subject: memcg: remove mem_cgroup_cal_reclaim() Now, get_scan_ratio() return correct value although memcg reclaim. Then, mem_cgroup_calc_reclaim() can be removed. So, memcg reclaim get the same capability of anon/file reclaim balancing as global reclaim now. Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 36b8ebb39b82..8752052da8df 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -97,9 +97,6 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority); extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority); - -extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, - int priority, enum lru_list lru); int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone); unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, @@ -244,13 +241,6 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, { } -static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, - struct zone *zone, int priority, - enum lru_list lru) -{ - return 0; -} - static inline bool mem_cgroup_disabled(void) { return true; -- cgit v1.2.2 From a7885eb8ad465ec9db99ac5b5e6680f0ca8e11c8 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:24 -0800 Subject: memcg: swappiness Currently, /proc/sys/vm/swappiness can change swappiness ratio for global reclaim. However, memcg reclaim doesn't have tuning parameter for itself. In general, the optimal swappiness depend on workload. (e.g. hpc workload need to low swappiness than the others.) Then, per cgroup swappiness improve administrator tunability. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index be938ce4895a..4ccca25d0f05 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -214,7 +214,8 @@ static inline void lru_cache_add_active_file(struct page *page) extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, - gfp_t gfp_mask, bool noswap); + gfp_t gfp_mask, bool noswap, + unsigned int swappiness); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; -- cgit v1.2.2 From c772be939e078afd2505ede7d596a30f8f61de95 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:25 -0800 Subject: memcg: fix calculation of active_ratio Currently, inactive_ratio of memcg is calculated at setting limit. because page_alloc.c does so and current implementation is straightforward porting. However, memcg introduced hierarchy feature recently. In hierarchy restriction, memory limit is not only decided memory.limit_in_bytes of current cgroup, but also parent limit and sibling memory usage. Then, The optimal inactive_ratio is changed frequently. So, everytime calculation is better. Tested-by: KAMEZAWA Hiroyuki Acked-by: KAMEZAWA Hiroyuki Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8752052da8df..056cf82c0e86 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -97,8 +97,7 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority); extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority); -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, - struct zone *zone); +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg); unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, enum lru_list lru); @@ -252,7 +251,7 @@ static inline bool mem_cgroup_oom_called(struct task_struct *task) } static inline int -mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) { return 1; } -- cgit v1.2.2 From a5e924f5f8abf97944e625d74967cc9452cfbce8 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Wed, 7 Jan 2009 18:08:28 -0800 Subject: memcg: remove mem_cgroup_try_charge After previous patch, mem_cgroup_try_charge is not used by anyone, so we can remove it. Signed-off-by: Daisuke Nishimura Acked-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 056cf82c0e86..8ae6ece8c962 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -40,8 +40,6 @@ struct mm_struct; extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); /* for swap handling */ -extern int mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **ptr); extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, gfp_t mask, struct mem_cgroup **ptr); extern void mem_cgroup_commit_charge_swapin(struct page *page, @@ -134,12 +132,6 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } -static inline int mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **ptr) -{ - return 0; -} - static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr) { -- cgit v1.2.2 From b5a84319a4343a0db753436fd8147e61eaafa7ea Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:35 -0800 Subject: memcg: fix shmem's swap accounting Now, you can see following even when swap accounting is enabled. 1. Create Group 01, and 02. 2. allocate a "file" on tmpfs by a task under 01. 3. swap out the "file" (by memory pressure) 4. Read "file" from a task in group 02. 5. the charge of "file" is moved to group 02. This is not ideal behavior. This is because SwapCache which was loaded by read-ahead is not taken into account.. This is a patch to fix shmem's swapcache behavior. - remove mem_cgroup_cache_charge_swapin(). - Add SwapCache handler routine to mem_cgroup_cache_charge(). By this, shmem's file cache is charged at add_to_page_cache() with GFP_NOWAIT. - pass the page of swapcache to shrink_mem_cgroup. Signed-off-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Cc: Balbir Singh Cc: Paul Menage Cc: Li Zefan Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++-- include/linux/swap.h | 8 -------- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8ae6ece8c962..326f45c86530 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -56,7 +56,8 @@ extern void mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); +extern int mem_cgroup_shrink_usage(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, @@ -155,7 +156,8 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page) { } -static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) +static inline int mem_cgroup_shrink_usage(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) { return 0; } diff --git a/include/linux/swap.h b/include/linux/swap.h index 4ccca25d0f05..d30215578877 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -335,16 +335,8 @@ static inline void disable_swap_token(void) } #ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern int mem_cgroup_cache_charge_swapin(struct page *page, - struct mm_struct *mm, gfp_t mask, bool locked); extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent); #else -static inline -int mem_cgroup_cache_charge_swapin(struct page *page, - struct mm_struct *mm, gfp_t mask, bool locked) -{ - return 0; -} static inline void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) { -- cgit v1.2.2 From 999cd8a450f8f93701669a61cac4d3b19eca07e8 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 7 Jan 2009 18:08:36 -0800 Subject: cgroups: add a per-subsystem hierarchy_mutex These patches introduce new locking/refcount support for cgroups to reduce the need for subsystems to call cgroup_lock(). This will ultimately allow the atomicity of cgroup_rmdir() (which was removed recently) to be restored. These three patches give: 1/3 - introduce a per-subsystem hierarchy_mutex which a subsystem can use to prevent changes to its own cgroup tree 2/3 - use hierarchy_mutex in place of calling cgroup_lock() in the memory controller 3/3 - introduce a css_tryget() function similar to the one recently proposed by Kamezawa, but avoiding spurious refcount failures in the event of a race between a css_tryget() and an unsuccessful cgroup_rmdir() Future patches will likely involve: - using hierarchy mutex in place of cgroup_lock() in more subsystems where appropriate - restoring the atomicity of cgroup_rmdir() with respect to cgroup_create() This patch: Add a hierarchy_mutex to the cgroup_subsys object that protects changes to the hierarchy observed by that subsystem. It is taken by the cgroup subsystem (in addition to cgroup_mutex) for the following operations: - linking a cgroup into that subsystem's cgroup tree - unlinking a cgroup from that subsystem's cgroup tree - moving the subsystem to/from a hierarchy (including across the bind() callback) Thus if the subsystem holds its own hierarchy_mutex, it can safely traverse its own hierarchy. Signed-off-by: Paul Menage Tested-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 73d1c730c3c4..ce1c1f34c30c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -340,8 +340,23 @@ struct cgroup_subsys { #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; - struct cgroupfs_root *root; + /* + * Protects sibling/children links of cgroups in this + * hierarchy, plus protects which hierarchy (or none) the + * subsystem is a part of (i.e. root/sibling). To avoid + * potential deadlocks, the following operations should not be + * undertaken while holding any hierarchy_mutex: + * + * - allocating memory + * - initiating hotplug events + */ + struct mutex hierarchy_mutex; + /* + * Link to parent, and list entry in parent's children. + * Protected by this->hierarchy_mutex and cgroup_lock() + */ + struct cgroupfs_root *root; struct list_head sibling; }; -- cgit v1.2.2 From e7c5ec9193d32b9559a3bb8893ceedbda85201ff Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 7 Jan 2009 18:08:38 -0800 Subject: cgroups: add css_tryget() Add css_tryget(), that obtains a counted reference on a CSS. It is used in situations where the caller has a "weak" reference to the CSS, i.e. one that does not protect the cgroup from removal via a reference count, but would instead be cleaned up by a destroy() callback. css_tryget() will return true on success, or false if the cgroup is being removed. This is similar to Kamezawa Hiroyuki's patch from a week or two ago, but with the difference that in the event of css_tryget() racing with a cgroup_rmdir(), css_tryget() will only return false if the cgroup really does get removed. This implementation is done by biasing css->refcnt, so that a refcnt of 1 means "releasable" and 0 means "released or releasing". In the event of a race, css_tryget() distinguishes between "released" and "releasing" by checking for the CSS_REMOVED flag in css->flags. Signed-off-by: Paul Menage Tested-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ce1c1f34c30c..e267e62827bb 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -52,9 +52,9 @@ struct cgroup_subsys_state { * hierarchy structure */ struct cgroup *cgroup; - /* State maintained by the cgroup system to allow - * subsystems to be "busy". Should be accessed via css_get() - * and css_put() */ + /* State maintained by the cgroup system to allow subsystems + * to be "busy". Should be accessed via css_get(), + * css_tryget() and and css_put(). */ atomic_t refcnt; @@ -64,11 +64,14 @@ struct cgroup_subsys_state { /* bits in struct cgroup_subsys_state flags field */ enum { CSS_ROOT, /* This CSS is the root of the subsystem */ + CSS_REMOVED, /* This CSS is dead */ }; /* - * Call css_get() to hold a reference on the cgroup; - * + * Call css_get() to hold a reference on the css; it can be used + * for a reference obtained via: + * - an existing ref-counted reference to the css + * - task->cgroups for a locked task */ static inline void css_get(struct cgroup_subsys_state *css) @@ -77,9 +80,32 @@ static inline void css_get(struct cgroup_subsys_state *css) if (!test_bit(CSS_ROOT, &css->flags)) atomic_inc(&css->refcnt); } + +static inline bool css_is_removed(struct cgroup_subsys_state *css) +{ + return test_bit(CSS_REMOVED, &css->flags); +} + +/* + * Call css_tryget() to take a reference on a css if your existing + * (known-valid) reference isn't already ref-counted. Returns false if + * the css has been destroyed. + */ + +static inline bool css_tryget(struct cgroup_subsys_state *css) +{ + if (test_bit(CSS_ROOT, &css->flags)) + return true; + while (!atomic_inc_not_zero(&css->refcnt)) { + if (test_bit(CSS_REMOVED, &css->flags)) + return false; + } + return true; +} + /* * css_put() should be called to release a reference taken by - * css_get() + * css_get() or css_tryget() */ extern void __css_put(struct cgroup_subsys_state *css); -- cgit v1.2.2 From 6af866af34a96fed24a55979a78b6f73bd4e8e87 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 7 Jan 2009 18:08:45 -0800 Subject: cpuset: remove remaining pointers to cpumask_t Impact: cleanups, use new cpumask API Final trivial cleanups: mainly s/cpumask_t/struct cpumask Note there is a FIXME in generate_sched_domains(). A future patch will change struct cpumask *doms to struct cpumask *doms[]. (I suppose Rusty will do this.) Signed-off-by: Li Zefan Cc: Ingo Molnar Cc: Rusty Russell Acked-by: Mike Travis Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 51ea2bdea0f9..90c6074a36ca 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -20,8 +20,9 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ extern int cpuset_init_early(void); extern int cpuset_init(void); extern void cpuset_init_smp(void); -extern void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask); -extern void cpuset_cpus_allowed_locked(struct task_struct *p, cpumask_t *mask); +extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); +extern void cpuset_cpus_allowed_locked(struct task_struct *p, + struct cpumask *mask); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -86,12 +87,13 @@ static inline int cpuset_init_early(void) { return 0; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} -static inline void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask) +static inline void cpuset_cpus_allowed(struct task_struct *p, + struct cpumask *mask) { *mask = cpu_possible_map; } static inline void cpuset_cpus_allowed_locked(struct task_struct *p, - cpumask_t *mask) + struct cpumask *mask) { *mask = cpu_possible_map; } -- cgit v1.2.2 From f9fb860f67b9542cd78d1558dec7058092b57d8e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2009 18:08:46 -0800 Subject: pid: implement ns_of_pid A current problem with the pid namespace is that it is easy to do pid related work after exit_task_namespaces which drops the nsproxy pointer. However if we are doing pid namespace related work we are always operating on some struct pid which retains the pid_namespace pointer of the pid namespace it was allocated in. So provide ns_of_pid which allows us to find the pid namespace a pid was allocated in. Using this we have the needed infrastructure to do pid namespace related work at anytime we have a struct pid, removing the chance of accidentally having a NULL pointer dereference when accessing current->nsproxy. Signed-off-by: Eric W. Biederman Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: Bastian Blank Cc: Pavel Emelyanov Cc: Nadia Derbey Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index bb206c56d1f0..49f1c2f66e95 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -122,6 +122,24 @@ int next_pidmap(struct pid_namespace *pid_ns, int last); extern struct pid *alloc_pid(struct pid_namespace *ns); extern void free_pid(struct pid *pid); +/* + * ns_of_pid() returns the pid namespace in which the specified pid was + * allocated. + * + * NOTE: + * ns_of_pid() is expected to be called for a process (task) that has + * an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid + * is expected to be non-NULL. If @pid is NULL, caller should handle + * the resulting NULL pid-ns. + */ +static inline struct pid_namespace *ns_of_pid(struct pid *pid) +{ + struct pid_namespace *ns = NULL; + if (pid) + ns = pid->numbers[pid->level].ns; + return ns; +} + /* * the helpers to get the pid's id seen from different namespaces * -- cgit v1.2.2 From 61bce0f1371cfff497fe85594fd39d1a0b15ebe1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2009 18:08:49 -0800 Subject: pid: generalize task_active_pid_ns Currently task_active_pid_ns is not safe to call after a task becomes a zombie and exit_task_namespaces is called, as nsproxy becomes NULL. By reading the pid namespace from the pid of the task we can trivially solve this problem at the cost of one extra memory read in what should be the same cacheline as we read the namespace from. When moving things around I have made task_active_pid_ns out of line because keeping it in pid_namespace.h would require adding includes of pid.h and sched.h that I don't think we want. This change does make task_active_pid_ns unsafe to call during copy_process until we attach a pid on the task_struct which seems to be a reasonable trade off. Signed-off-by: Eric W. Biederman Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: Bastian Blank Cc: Pavel Emelyanov Cc: Nadia Derbey Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid_namespace.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index d82fe825d62f..38d10326246a 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -79,11 +79,7 @@ static inline void zap_pid_ns_processes(struct pid_namespace *ns) } #endif /* CONFIG_PID_NS */ -static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) -{ - return tsk->nsproxy->pid_ns; -} - +extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); void pidmap_init(void); -- cgit v1.2.2 From f06295b44c296c8fb08823a3118468ae343b60f2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Jan 2009 18:08:52 -0800 Subject: ELF: implement AT_RANDOM for glibc PRNG seeding While discussing[1] the need for glibc to have access to random bytes during program load, it seems that an earlier attempt to implement AT_RANDOM got stalled. This implements a random 16 byte string, available to every ELF program via a new auxv AT_RANDOM vector. [1] http://sourceware.org/ml/libc-alpha/2008-10/msg00006.html Ulrich said: glibc needs right after startup a bit of random data for internal protections (stack canary etc). What is now in upstream glibc is that we always unconditionally open /dev/urandom, read some data, and use it. For every process startup. That's slow. ... The solution is to provide a limited amount of random data to the starting process in the aux vector. I suggested 16 bytes and this is what the patch implements. If we need only 16 bytes or less we use the data directly. If we need more we'll use the 16 bytes to see a PRNG. This avoids the costly /dev/urandom use and it allows the kernel to use the most adequate source of random data for this purpose. It might not be the same pool as that for /dev/urandom. Concerns were expressed about the depletion of the randomness pool. But this patch doesn't make the situation worse, it doesn't deplete entropy more than happens now. Signed-off-by: Kees Cook Cc: Jakub Jelinek Cc: Andi Kleen Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auxvec.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h index d7afa9dd6635..f3b5d4e3a2ac 100644 --- a/include/linux/auxvec.h +++ b/include/linux/auxvec.h @@ -23,16 +23,16 @@ #define AT_PLATFORM 15 /* string identifying CPU for optimizations */ #define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ #define AT_CLKTCK 17 /* frequency at which times() increments */ - +/* AT_* values 18 through 22 are reserved */ #define AT_SECURE 23 /* secure mode boolean */ - #define AT_BASE_PLATFORM 24 /* string identifying real platform, may * differ from AT_PLATFORM. */ +#define AT_RANDOM 25 /* address of 16 random bytes */ #define AT_EXECFN 31 /* filename of program */ #ifdef __KERNEL__ -#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */ +#define AT_VECTOR_SIZE_BASE 19 /* NEW_AUX_ENT entries in auxiliary table */ /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ #endif -- cgit v1.2.2 From 91f68b7359144aa40bb9668124543d15284750b4 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 7 Jan 2009 18:09:12 -0800 Subject: generic swap(): introduce global macro swap(a, b) There have been some local definitions of swap(), it's time to replace them all with a uniform one. Signed-off-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 6b8e2027165e..343df9ef2412 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -476,6 +476,12 @@ static inline char *pack_hex_byte(char *buf, u8 byte) __val = __val < __min ? __min: __val; \ __val > __max ? __max: __val; }) + +/* + * swap - swap value of @a and @b + */ +#define swap(a, b) ({ typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; }) + /** * container_of - cast a member of a structure out to the containing structure * @ptr: the pointer to the member. -- cgit v1.2.2 From 859cb7f2a4244ea6da206d3fe9cc8a6810947a68 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Thu, 8 Jan 2009 17:55:03 +0000 Subject: leds: Add suspend/resume to the core class Add suspend/resume to the core class and remove all the now unneeded code from various drivers. Originally the class code couldn't support suspend/resume but since class_device can there is no reason for each driver doing its own suspend/resume anymore. --- include/linux/leds.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 3c1a8ce6a5ea..24489da701e3 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -32,7 +32,10 @@ struct led_classdev { int brightness; int flags; + /* Lower 16 bits reflect status */ #define LED_SUSPENDED (1 << 0) + /* Upper 16 bits reflect control information */ +#define LED_CORE_SUSPENDRESUME (1 << 16) /* Set LED brightness level */ /* Must not sleep, use a workqueue if needed */ -- cgit v1.2.2 From 69279fb9a95051971ac03e558c4d46e7ba84ab3a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 31 Dec 2008 12:52:41 +0000 Subject: regulator: Clean up kerneldoc warnings Remove kerneldoc warnings that don't relate to missing documentation, mostly by renaming parameters in the documentation to match their actual names. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/consumer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index afdc4558bb94..801bf77ff4e2 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -104,10 +104,10 @@ struct regulator; /** * struct regulator_bulk_data - Data used for bulk regulator operations. * - * @supply The name of the supply. Initialised by the user before - * using the bulk regulator APIs. - * @consumer The regulator consumer for the supply. This will be managed - * by the bulk API. + * @supply: The name of the supply. Initialised by the user before + * using the bulk regulator APIs. + * @consumer: The regulator consumer for the supply. This will be managed + * by the bulk API. * * The regulator APIs provide a series of regulator_bulk_() API calls as * a convenience to consumers which require multiple supplies. This -- cgit v1.2.2 From c8e7e4640facbe99d10a6e262523b25be129b9b9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 31 Dec 2008 12:52:42 +0000 Subject: regulator: Add missing kerneldoc This is only the documentation that the kerneldoc system warns about. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/driver.h | 40 +++++++++++++++++++++++++++++++++++++- include/linux/regulator/machine.h | 41 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index e37d80561985..84c3737c2d26 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -24,7 +24,37 @@ struct regulator_init_data; /** * struct regulator_ops - regulator operations. * - * This struct describes regulator operations. + * This struct describes regulator operations which can be implemented by + * regulator chip drivers. + * + * @enable: Enable the regulator. + * @disable: Disable the regulator. + * @is_enabled: Return 1 if the reguator is enabled, 0 otherwise. + * + * @set_voltage: Set the voltage for the regulator within the range specified. + * The driver should select the voltage closest to min_uV. + * @get_voltage: Return the currently configured voltage for the regulator. + * + * @set_current: Set the current for the regulator within the range specified. + * The driver should select the current closest to min_uA. + * @get_current: Return the currently configured current for the regulator. + * + * @set_current_limit: Configure a limit for a current-limited regulator. + * @get_current_limit: Get the limit for a current-limited regulator. + * + * @set_mode: Set the operating mode for the regulator. + * @get_mode: Get the current operating mode for the regulator. + * @get_optimum_mode: Get the most efficient operating mode for the regulator + * when running with the specified parameters. + * + * @set_suspend_voltage: Set the voltage for the regulator when the system + * is suspended. + * @set_suspend_enable: Mark the regulator as enabled when the system is + * suspended. + * @set_suspend_disable: Mark the regulator as disabled when the system is + * suspended. + * @set_suspend_mode: Set the operating mode for the regulator when the + * system is suspended. */ struct regulator_ops { @@ -75,6 +105,14 @@ enum regulator_type { /** * struct regulator_desc - Regulator descriptor * + * Each regulator registered with the core is described with a structure of + * this type. + * + * @name: Identifying name for the regulator. + * @id: Numerical identifier for the regulator. + * @ops: Regulator operations table. + * @type: Indicates if the regulator is a voltage or current regulator. + * @owner: Module providing the regulator, used for refcounting. */ struct regulator_desc { const char *name; diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index c6d69331a81e..3794773b23d2 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -44,6 +44,10 @@ struct regulator; * struct regulator_state - regulator state during low power syatem states * * This describes a regulators state during a system wide low power state. + * + * @uV: Operating voltage during suspend. + * @mode: Operating mode during suspend. + * @enabled: Enabled during suspend. */ struct regulator_state { int uV; /* suspend voltage */ @@ -55,6 +59,30 @@ struct regulator_state { * struct regulation_constraints - regulator operating constraints. * * This struct describes regulator and board/machine specific constraints. + * + * @name: Descriptive name for the constraints, used for display purposes. + * + * @min_uV: Smallest voltage consumers may set. + * @max_uV: Largest voltage consumers may set. + * + * @min_uA: Smallest consumers consumers may set. + * @max_uA: Largest current consumers may set. + * + * @valid_modes_mask: Mask of modes which may be configured by consumers. + * @valid_ops_mask: Operations which may be performed by consumers. + * + * @always_on: Set if the regulator should never be disabled. + * @boot_on: Set if the regulator is enabled when the system is initially + * started. + * @apply_uV: Apply the voltage constraint when initialising. + * + * @input_uV: Input voltage for regulator when supplied by another regulator. + * + * @state_disk: State for regulator when system is suspended in disk mode. + * @state_mem: State for regulator when system is suspended in mem mode. + * @state_standby: State for regulator when system is suspended in standby + * mode. + * @initial_state: Suspend state to set by default. */ struct regulation_constraints { @@ -93,6 +121,9 @@ struct regulation_constraints { * struct regulator_consumer_supply - supply -> device mapping * * This maps a supply name to a device. + * + * @dev: Device structure for the consumer. + * @supply: Name for the supply. */ struct regulator_consumer_supply { struct device *dev; /* consumer */ @@ -103,6 +134,16 @@ struct regulator_consumer_supply { * struct regulator_init_data - regulator platform initialisation data. * * Initialisation constraints, our supply and consumers supplies. + * + * @supply_regulator_dev: Parent regulator (if any). + * + * @constraints: Constraints. These must be specified for the regulator to + * be usable. + * @num_consumer_supplies: Number of consumer device supplies. + * @consumer_supplies: Consumer device supply configuration. + * + * @regulator_init: Callback invoked when the regulator has been registered. + * @driver_data: Data passed to regulator_init. */ struct regulator_init_data { struct device *supply_regulator_dev; /* or NULL for LINE */ -- cgit v1.2.2 From 0ba4887c6329043d6cee5b5b477cfe50c2b57674 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 8 Jan 2009 11:50:23 -0800 Subject: regulator: fix kernel-doc warnings Fix kernel-doc warnings in regulator/driver.h: Warning(linux-next-20090108//include/linux/regulator/driver.h:95): Excess struct/union/enum/typedef member 'set_current' description in 'regulator_ops' Warning(linux-next-20090108//include/linux/regulator/driver.h:95): Excess struct/union/enum/typedef member 'get_current' description in 'regulator_ops' Warning(linux-next-20090108//include/linux/regulator/driver.h:124): No description found for parameter 'irq' Signed-off-by: Randy Dunlap cc: Liam Girdwood cc: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/driver.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 84c3737c2d26..2dae05705f13 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -29,16 +29,12 @@ struct regulator_init_data; * * @enable: Enable the regulator. * @disable: Disable the regulator. - * @is_enabled: Return 1 if the reguator is enabled, 0 otherwise. + * @is_enabled: Return 1 if the regulator is enabled, 0 otherwise. * * @set_voltage: Set the voltage for the regulator within the range specified. * The driver should select the voltage closest to min_uV. * @get_voltage: Return the currently configured voltage for the regulator. * - * @set_current: Set the current for the regulator within the range specified. - * The driver should select the current closest to min_uA. - * @get_current: Return the currently configured current for the regulator. - * * @set_current_limit: Configure a limit for a current-limited regulator. * @get_current_limit: Get the limit for a current-limited regulator. * @@ -111,6 +107,7 @@ enum regulator_type { * @name: Identifying name for the regulator. * @id: Numerical identifier for the regulator. * @ops: Regulator operations table. + * @irq: Interrupt number for the regulator. * @type: Indicates if the regulator is a voltage or current regulator. * @owner: Module providing the regulator, used for refcounting. */ -- cgit v1.2.2 From f4f6bda00fc6bf995a35d8246db45aacaa9b3f09 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 3 Dec 2008 08:48:52 +0000 Subject: backlight: add support for Toppoly TDO35S series to tdo24m lcd driver Signed-off-by: Mike Rapoport Acked-by: Eric Miao Signed-off-by: Richard Purdie --- include/linux/spi/tdo24m.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/spi/tdo24m.h (limited to 'include/linux') diff --git a/include/linux/spi/tdo24m.h b/include/linux/spi/tdo24m.h new file mode 100644 index 000000000000..7572d4e1fe76 --- /dev/null +++ b/include/linux/spi/tdo24m.h @@ -0,0 +1,13 @@ +#ifndef __TDO24M_H__ +#define __TDO24M_H__ + +enum tdo24m_model { + TDO24M, + TDO35S, +}; + +struct tdo24m_platform_data { + enum tdo24m_model model; +}; + +#endif /* __TDO24M_H__ */ -- cgit v1.2.2 From 0c3573f19d135d718264e38c46597295bd6154b7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:05 +1100 Subject: md: use sysfs_notify_dirent to notify changes to md/sync_action. There is no compelling need for this, but sysfs_notify_dirent is a nicer interface and the change is good for consistency. Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 8fc909ef6787..663803eaf0de 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -244,6 +244,7 @@ struct mddev_s struct sysfs_dirent *sysfs_state; /* handle for 'array_state' * file in sysfs. */ + struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */ spinlock_t write_lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ -- cgit v1.2.2 From 019c4e2f3e02aac4b44003913b54ca4b332e4371 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:06 +1100 Subject: md: raid0: Represent device offset in sectors. Rename zone->dev_offset to zone->dev_start to make sure all users have been converted. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index 1b2dda035f8e..61c3d29dc158 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -6,7 +6,7 @@ struct strip_zone { sector_t zone_offset; /* Zone offset in md_dev */ - sector_t dev_offset; /* Zone offset in real dev */ + sector_t dev_start; /* Zone offset in real dev (in sectors) */ sector_t size; /* Zone size */ int nb_dev; /* # of devices attached to the zone */ mdk_rdev_t **dev; /* Devices attached to the zone */ -- cgit v1.2.2 From 6199d3db0fc34f8ada895879d04a353a6ae632bc Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:07 +1100 Subject: md: raid0: Represent zone->zone_offset in sectors. For the same reason as in the previous patch, rename it from zone_offset to zone_start. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index 61c3d29dc158..eaf4f6ac55f6 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -5,7 +5,7 @@ struct strip_zone { - sector_t zone_offset; /* Zone offset in md_dev */ + sector_t zone_start; /* Zone offset in md_dev (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ sector_t size; /* Zone size */ int nb_dev; /* # of devices attached to the zone */ -- cgit v1.2.2 From 83838ed87898e0a8ff8dbf001e54e6c017f0a011 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:07 +1100 Subject: md: raid0: Represent the size of strip zones in sectors. This completes the block -> sector conversion of struct strip_zone. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index eaf4f6ac55f6..c12521d027e2 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -7,7 +7,7 @@ struct strip_zone { sector_t zone_start; /* Zone offset in md_dev (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ - sector_t size; /* Zone size */ + sector_t sectors; /* Zone size in sectors */ int nb_dev; /* # of devices attached to the zone */ mdk_rdev_t **dev; /* Devices attached to the zone */ }; -- cgit v1.2.2 From ccacc7d2cf03114a24ab903f710118e9e5d43273 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: raid0: make hash_spacing and preshift sector-based. This patch renames the hash_spacing and preshift members of struct raid0_private_data to spacing and sector_shift respectively and changes the semantics as follows: We always have spacing = 2 * hash_spacing. In case sizeof(sector_t) > sizeof(u32) we also have sector_shift = preshift + 1 while sector_shift = preshift = 0 otherwise. Note that the values of nb_zone and zone are unaffected by these changes because in the sector_div() preceeding the assignement of these two variables both arguments double. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index c12521d027e2..fd42aa87c391 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -19,8 +19,8 @@ struct raid0_private_data mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ int nr_strip_zones; - sector_t hash_spacing; - int preshift; /* shift this before divide by hash_spacing */ + sector_t spacing; + int sector_shift; /* shift this before divide by spacing */ }; typedef struct raid0_private_data raid0_conf_t; -- cgit v1.2.2 From 159ec1fc060ab22b157a62364045f5e98749c4d3 Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: use list_for_each_entry macro directly The rdev_for_each macro defined in is identical to list_for_each_entry_safe, from , it should be defined to use list_for_each_entry_safe, instead of reinventing the wheel. But some calls to each_entry_safe don't really need a safe version, just a direct list_for_each_entry is enough, this could save a temp variable (tmp) in every function that used rdev_for_each. In this patch, most rdev_for_each loops are replaced by list_for_each_entry, totally save many tmp vars; and only in the other situations that will call list_del to delete an entry, the safe version is used. Signed-off-by: Cheng Renquan Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 663803eaf0de..8f9a54c1fb0e 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -335,17 +335,14 @@ static inline char * mdname (mddev_t * mddev) * iterates through some rdev ringlist. It's safe to remove the * current 'rdev'. Dont touch 'tmp' though. */ -#define rdev_for_each_list(rdev, tmp, list) \ - \ - for ((tmp) = (list).next; \ - (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \ - (tmp) = (tmp)->next, (tmp)->prev != &(list) \ - ; ) +#define rdev_for_each_list(rdev, tmp, head) \ + list_for_each_entry_safe(rdev, tmp, head, same_set) + /* * iterates through the 'same array disks' ringlist */ #define rdev_for_each(rdev, tmp, mddev) \ - rdev_for_each_list(rdev, tmp, (mddev)->disks) + list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) #define rdev_for_each_rcu(rdev, mddev) \ list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) -- cgit v1.2.2 From cd2ac9321c26dc7a76455cd2a4df89123fa2b73e Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: need another print_sb for mdp_superblock_1 md_print_devices is called in two code path: MD_BUG(...), and md_ioctl with PRINT_RAID_DEBUG. it will dump out all in use md devices information; However, it wrongly processed two types of superblock in one: The header file has defined two types of superblock, struct mdp_superblock_s (typedefed with mdp_super_t) according to md with metadata 0.90, and struct mdp_superblock_1 according to md with metadata 1.0 and later, These two types of superblock are very different, The md_print_devices code processed them both in mdp_super_t, that would lead to wrong informaton dump like: [ 6742.345877] [ 6742.345887] md: ********************************** [ 6742.345890] md: * * [ 6742.345892] md: ********************************** [ 6742.345896] md1: [ 6742.345907] md: rdev ram7, SZ:00065472 F:0 S:1 DN:3 [ 6742.345909] md: rdev superblock: [ 6742.345914] md: SB: (V:0.90.0) ID:<42ef13c7.598c059a.5f9f1645.801e9ee6> CT:4919856d [ 6742.345918] md: L5 S00065472 ND:4 RD:4 md1 LO:2 CS:65536 [ 6742.345922] md: UT:4919856d ST:1 AD:4 WD:4 FD:0 SD:0 CSUM:b7992907 E:00000001 [ 6742.345924] D 0: DISK [ 6742.345930] D 1: DISK [ 6742.345933] D 2: DISK [ 6742.345937] D 3: DISK [ 6742.345942] md: THIS: DISK ... [ 6742.346058] md0: [ 6742.346067] md: rdev ram3, SZ:00065472 F:0 S:1 DN:3 [ 6742.346070] md: rdev superblock: [ 6742.346073] md: SB: (V:1.0.0) ID:<369aad81.00000000.00000000.00000000> CT:9a322a9c [ 6742.346077] md: L-1507699579 S976570180 ND:48 RD:0 md0 LO:65536 CS:196610 [ 6742.346081] md: UT:00000018 ST:0 AD:131048 WD:0 FD:8 SD:0 CSUM:00000000 E:00000000 [ 6742.346084] D 0: DISK [ 6742.346089] D 1: DISK [ 6742.346092] D 2: DISK [ 6742.346096] D 3: DISK [ 6742.346102] md: THIS: DISK ... [ 6742.346219] md: ********************************** [ 6742.346221] Here md1 is metadata 0.90.0, and md0 is metadata 1.2 After some more code to distinguish these two types of superblock, in this patch, it will generate dump information like: [ 7906.755790] [ 7906.755799] md: ********************************** [ 7906.755802] md: * * [ 7906.755804] md: ********************************** [ 7906.755808] md1: [ 7906.755819] md: rdev ram7, SZ:00065472 F:0 S:1 DN:3 [ 7906.755821] md: rdev superblock (MJ:0): [ 7906.755826] md: SB: (V:0.90.0) ID:<3fca7a0d.a612bfed.5f9f1645.801e9ee6> CT:491989f3 [ 7906.755830] md: L5 S00065472 ND:4 RD:4 md1 LO:2 CS:65536 [ 7906.755834] md: UT:491989f3 ST:1 AD:4 WD:4 FD:0 SD:0 CSUM:00fb52ad E:00000001 [ 7906.755836] D 0: DISK [ 7906.755842] D 1: DISK [ 7906.755845] D 2: DISK [ 7906.755849] D 3: DISK [ 7906.755855] md: THIS: DISK ... [ 7906.755972] md0: [ 7906.755981] md: rdev ram3, SZ:00065472 F:0 S:1 DN:3 [ 7906.755984] md: rdev superblock (MJ:1): [ 7906.755989] md: SB: (V:1) (F:0) Array-ID:<5fbcf158:55aa:5fbe:9a79:1e939880dcbd> [ 7906.755990] md: Name: "DG5:0" CT:1226410480 [ 7906.755998] md: L5 SZ130944 RD:4 LO:2 CS:128 DO:24 DS:131048 SO:8 RO:0 [ 7906.755999] md: Dev:00000003 UUID: 9194d744:87f7:a448:85f2:7497b84ce30a [ 7906.756001] md: (F:0) UT:1226410480 Events:0 ResyncOffset:-1 CSUM:0dbcd829 [ 7906.756003] md: (MaxDev:384) ... [ 7906.756113] md: ********************************** [ 7906.756116] this md0 (metadata 1.2) information dumping is exactly according to struct mdp_superblock_1. Signed-off-by: Cheng Renquan Cc: Neil Brown Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: NeilBrown --- include/linux/raid/md_p.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 8b4de4a41ff1..9491026afe66 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -194,6 +194,8 @@ static inline __u64 md_event(mdp_super_t *sb) { return (ev<<32)| sb->events_lo; } +#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1) + /* * The version-1 superblock : * All numeric fields are little-endian. -- cgit v1.2.2 From d3374825ce57ba2214d375023979f6197ccc1385 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:10 +1100 Subject: md: make devices disappear when they are no longer needed. Currently md devices, once created, never disappear until the module is unloaded. This is essentially because the gendisk holds a reference to the mddev, and the mddev holds a reference to the gendisk, this a circular reference. If we drop the reference from mddev to gendisk, then we need to ensure that the mddev is destroyed when the gendisk is destroyed. However it is not possible to hook into the gendisk destruction process to enable this. So we drop the reference from the gendisk to the mddev and destroy the gendisk when the mddev gets destroyed. However this has a complication. Between the call __blkdev_get->get_gendisk->kobj_lookup->md_probe and the call __blkdev_get->md_open there is no obvious way to hold a reference on the mddev any more, so unless something is done, it will disappear and gendisk will be destroyed prematurely. Also, once we decide to destroy the mddev, there will be an unlockable moment before the gendisk is unlinked (blk_unregister_region) during which a new reference to the gendisk can be created. We need to ensure that this reference can not be used. i.e. the ->open must fail. So: 1/ in md_probe we set a flag in the mddev (hold_active) which indicates that the array should be treated as active, even though there are no references, and no appearance of activity. This is cleared by md_release when the device is closed if it is no longer needed. This ensures that the gendisk will survive between md_probe and md_open. 2/ In md_open we check if the mddev we expect to open matches the gendisk that we did open. If there is a mismatch we return -ERESTARTSYS and modify __blkdev_get to retry from the top in that case. In the -ERESTARTSYS sys case we make sure to wait until the old gendisk (that we succeeded in opening) is really gone so we loop at most once. Some udev configurations will always open an md device when it first appears. If we allow an md device that was just created by an open to disappear on an immediate close, then this can race with such udev configurations and result in an infinite loop the device being opened and closed, then re-open due to the 'ADD' even from the first open, and then close and so on. So we make sure an md device, once created by an open, remains active at least until some md 'ioctl' has been made on it. This means that all normal usage of md devices will allow them to disappear promptly when not needed, but the worst that an incorrect usage will do it cause an inactive md device to be left in existence (it can easily be removed). As an array can be stopped by writing to a sysfs attribute echo clear > /sys/block/mdXXX/md/array_state we need to use scheduled work for deleting the gendisk and other kobjects. This allows us to wait for any pending gendisk deletion to complete by simply calling flush_scheduled_work(). Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 8f9a54c1fb0e..e3d17c7f954e 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -137,6 +137,8 @@ struct mddev_s struct gendisk *gendisk; struct kobject kobj; + int hold_active; +#define UNTIL_IOCTL 1 /* Superblock information */ int major_version, @@ -246,6 +248,8 @@ struct mddev_s */ struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */ + struct work_struct del_work; /* used for delayed sysfs removal */ + spinlock_t write_lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ atomic_t pending_writes; /* number of active superblock writes */ -- cgit v1.2.2 From efeb53c0e57213e843b7ef3cc6ebcdea7d6186ac Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:10 +1100 Subject: md: Allow md devices to be created by name. Using sequential numbers to identify md devices is somewhat artificial. Using names can be a lot more user-friendly. Also, creating md devices by opening the device special file is a bit awkward. So this patch provides a new option for creating and naming devices. Writing a name such as "md_home" to /sys/modules/md_mod/parameters/new_array will cause an array with that name to be created. It will appear in /sys/block/ /proc/partitions and /proc/mdstat as 'md_home'. It will have an arbitrary minor number allocated. md devices that a created by an open are destroyed on the last close when the device is inactive. For named md devices, they will not be destroyed until the array is explicitly stopped, either with the STOP_ARRAY ioctl or by writing 'clear' to /sys/block/md_XXXX/md/array_state. The name of the array must start 'md_' to avoid conflict with other devices. Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index e3d17c7f954e..dac4217194b8 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -139,6 +139,7 @@ struct mddev_s struct kobject kobj; int hold_active; #define UNTIL_IOCTL 1 +#define UNTIL_STOP 2 /* Superblock information */ int major_version, -- cgit v1.2.2 From 4044ba58dd15cb01797c4fd034f39ef4a75f7cc3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:11 +1100 Subject: md: don't retry recovery of raid1 that fails due to error on source drive. If a raid1 has only one working drive and it has a sector which gives an error on read, then an attempt to recover onto a spare will fail, but as the single remaining drive is not removed from the array, the recovery will be immediately re-attempted, resulting in an infinite recovery loop. So detect this situation and don't retry recovery once an error on the lone remaining drive is detected. Allow recovery to be retried once every time a spare is added in case the problem wasn't actually a media error. Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index dac4217194b8..9743e4dbc918 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -218,6 +218,9 @@ struct mddev_s #define MD_RECOVERY_FROZEN 9 unsigned long recovery; + int recovery_disabled; /* if we detect that recovery + * will always fail, set this + * so we don't loop trying */ int in_sync; /* know to not need resync */ struct mutex reconfig_mutex; -- cgit v1.2.2 From 871af1210f13966ab911ed2166e4ab2ce775b99d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 5 Jan 2009 14:16:39 +0000 Subject: libata: Add 32bit PIO support This matters for some controllers and in one or two cases almost doubles PIO performance. Add a bmdma32 operations set we can inherit and activate it for some controllers Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- include/linux/libata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 3449de597eff..4f7c8fb4d3fe 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1518,6 +1518,7 @@ extern void sata_pmp_error_handler(struct ata_port *ap); extern const struct ata_port_operations ata_sff_port_ops; extern const struct ata_port_operations ata_bmdma_port_ops; +extern const struct ata_port_operations ata_bmdma32_port_ops; /* PIO only, sg_tablesize and dma_boundary limits can be removed */ #define ATA_PIO_SHT(drv_name) \ @@ -1545,6 +1546,8 @@ extern void ata_sff_exec_command(struct ata_port *ap, const struct ata_taskfile *tf); extern unsigned int ata_sff_data_xfer(struct ata_device *dev, unsigned char *buf, unsigned int buflen, int rw); +extern unsigned int ata_sff_data_xfer32(struct ata_device *dev, + unsigned char *buf, unsigned int buflen, int rw); extern unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, unsigned int buflen, int rw); extern u8 ata_sff_irq_on(struct ata_port *ap); -- cgit v1.2.2 From fefae48bf8caab7d56ee4f8181f06602cf73d29e Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Thu, 8 Jan 2009 19:21:27 +0100 Subject: [MTD] CFI: remove major/minor version check for command set 0x0002 The NOR Flash memory K8P2815UQB from Samsung uses the major version number '0'. Add a quirk to cope with it. Signed-off-by: Wolfgang Grandegger Signed-off-by: David Woodhouse --- include/linux/mtd/cfi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 00e2b575021f..88d3d8fbf9f2 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -520,6 +520,7 @@ struct cfi_fixup { #define CFI_MFR_AMD 0x0001 #define CFI_MFR_ATMEL 0x001F +#define CFI_MFR_SAMSUNG 0x00EC #define CFI_MFR_ST 0x0020 /* STMicroelectronics */ void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups); -- cgit v1.2.2 From 8dd2f36f317569665e454268a2677cfba3e848f1 Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Sat, 2 Aug 2008 22:51:52 +0200 Subject: mISDN: Add feature via MISDN_CTRL_FILL_EMPTY to fill fifo if empty This prevents underrun of fifo when filled and in case of an underrun it prevents subsequent underruns due to jitter. Improve dsp, so buffers are kept filled with a certain delay, so moderate jitter will not cause underrun all the time -> the audio quality is highly improved. tones are not interrupted by gaps anymore, except when CPU is stalling or in high load. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil --- include/linux/mISDNhw.h | 25 +++++++++++++------------ include/linux/mISDNif.h | 1 + 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h index e794dfb87504..9384b92dfc65 100644 --- a/include/linux/mISDNhw.h +++ b/include/linux/mISDNhw.h @@ -57,20 +57,21 @@ #define FLG_L2DATA 14 /* channel use L2 DATA primitivs */ #define FLG_ORIGIN 15 /* channel is on origin site */ /* channel specific stuff */ +#define FLG_FILLEMPTY 16 /* fill fifo on first frame (empty) */ /* arcofi specific */ -#define FLG_ARCOFI_TIMER 16 -#define FLG_ARCOFI_ERROR 17 +#define FLG_ARCOFI_TIMER 17 +#define FLG_ARCOFI_ERROR 18 /* isar specific */ -#define FLG_INITIALIZED 16 -#define FLG_DLEETX 17 -#define FLG_LASTDLE 18 -#define FLG_FIRST 19 -#define FLG_LASTDATA 20 -#define FLG_NMD_DATA 21 -#define FLG_FTI_RUN 22 -#define FLG_LL_OK 23 -#define FLG_LL_CONN 24 -#define FLG_DTMFSEND 25 +#define FLG_INITIALIZED 17 +#define FLG_DLEETX 18 +#define FLG_LASTDLE 19 +#define FLG_FIRST 20 +#define FLG_LASTDATA 21 +#define FLG_NMD_DATA 22 +#define FLG_FTI_RUN 23 +#define FLG_LL_OK 24 +#define FLG_LL_CONN 25 +#define FLG_DTMFSEND 26 /* workq events */ #define FLG_RECVQUEUE 30 diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 8f2d60da04e7..74c903cd7a0a 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -312,6 +312,7 @@ clear_channelmap(u_int nr, u_char *map) #define MISDN_CTRL_SETPEER 0x0040 #define MISDN_CTRL_UNSETPEER 0x0080 #define MISDN_CTRL_RX_OFF 0x0100 +#define MISDN_CTRL_FILL_EMPTY 0x0200 #define MISDN_CTRL_HW_FEATURES_OP 0x2000 #define MISDN_CTRL_HW_FEATURES 0x2001 #define MISDN_CTRL_HFC_OP 0x4000 -- cgit v1.2.2 From 8b6015f736125050722dbe59c4f943e78cd626f0 Mon Sep 17 00:00:00 2001 From: Matthias Urlichs Date: Tue, 12 Aug 2008 10:12:09 +0200 Subject: mISDN: Added an ioctl to change the device name To get persistent device names with hotplug we need to rename devices sometime. Signed-off-by: Matthias Urlichs Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 74c903cd7a0a..be09476ed854 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -36,8 +36,8 @@ * - should be incremented on every checkin */ #define MISDN_MAJOR_VERSION 1 -#define MISDN_MINOR_VERSION 0 -#define MISDN_RELEASE 19 +#define MISDN_MINOR_VERSION 1 +#define MISDN_RELEASE 20 /* primitives for information exchange * generell format @@ -255,16 +255,6 @@ struct sockaddr_mISDN { unsigned char tei; }; -/* timer device ioctl */ -#define IMADDTIMER _IOR('I', 64, int) -#define IMDELTIMER _IOR('I', 65, int) -/* socket ioctls */ -#define IMGETVERSION _IOR('I', 66, int) -#define IMGETCOUNT _IOR('I', 67, int) -#define IMGETDEVINFO _IOR('I', 68, int) -#define IMCTRLREQ _IOR('I', 69, int) -#define IMCLEAR_L2 _IOR('I', 70, int) - struct mISDNversion { unsigned char major; unsigned char minor; @@ -281,6 +271,23 @@ struct mISDN_devinfo { char name[MISDN_MAX_IDLEN]; }; +struct mISDN_devrename { + u_int id; + char name[MISDN_MAX_IDLEN]; /* new name */ +}; + +/* timer device ioctl */ +#define IMADDTIMER _IOR('I', 64, int) +#define IMDELTIMER _IOR('I', 65, int) + +/* socket ioctls */ +#define IMGETVERSION _IOR('I', 66, int) +#define IMGETCOUNT _IOR('I', 67, int) +#define IMGETDEVINFO _IOR('I', 68, int) +#define IMCTRLREQ _IOR('I', 69, int) +#define IMCLEAR_L2 _IOR('I', 70, int) +#define IMSETDEVNAME _IOR('I', 71, struct mISDN_devrename) + static inline int test_channelmap(u_int nr, u_char *map) { -- cgit v1.2.2 From 837468d135dcc49cdabc9fa92fc9550479f60704 Mon Sep 17 00:00:00 2001 From: Matthias Urlichs Date: Sat, 16 Aug 2008 00:04:33 +0200 Subject: mISDN: Use struct device name field struct device already has a 'name' member, use it. Signed-off-by: Matthias Urlichs Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index be09476ed854..a59febb6143a 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -431,7 +431,6 @@ struct mISDN_sock { struct mISDNdevice { struct mISDNchannel D; u_int id; - char name[MISDN_MAX_IDLEN]; u_int Dprotocols; u_int Bprotocols; u_int nrbchan; -- cgit v1.2.2 From 1f28fa19d34c0d9186f274e61e4b3dcfc6428c5c Mon Sep 17 00:00:00 2001 From: Martin Bachem Date: Wed, 3 Sep 2008 15:17:45 +0200 Subject: mISDN: Add E-Channel logging features New prim PH_DATA_E_IND. - all E-ch frames are indicated by recv_Echannel(), which pushes E-Channel frames into dch's rqueue - if dchannel is opened with channel nr 0, no E-Channel logging is requested - if dchannel is opened with channel nr 1, E-Channel logging is requested. if layer1 does not support that, -EINVAL in return is appropriate Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil --- include/linux/mISDNhw.h | 1 + include/linux/mISDNif.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h index 9384b92dfc65..97ffdc1d3442 100644 --- a/include/linux/mISDNhw.h +++ b/include/linux/mISDNhw.h @@ -184,6 +184,7 @@ extern void queue_ch_frame(struct mISDNchannel *, u_int, extern int dchannel_senddata(struct dchannel *, struct sk_buff *); extern int bchannel_senddata(struct bchannel *, struct sk_buff *); extern void recv_Dchannel(struct dchannel *); +extern void recv_Echannel(struct dchannel *, struct dchannel *); extern void recv_Bchannel(struct bchannel *); extern void recv_Dchannel_skb(struct dchannel *, struct sk_buff *); extern void recv_Bchannel_skb(struct bchannel *, struct sk_buff *); diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index a59febb6143a..f75d596c5316 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -80,6 +80,7 @@ #define PH_DEACTIVATE_IND 0x0202 #define PH_DEACTIVATE_CNF 0x4202 #define PH_DATA_IND 0x2002 +#define PH_DATA_E_IND 0x3002 #define MPH_ACTIVATE_IND 0x0502 #define MPH_DEACTIVATE_IND 0x0602 #define MPH_INFORMATION_IND 0x0702 -- cgit v1.2.2 From 3bd69ad197a4a3d0085a5dc3b5796111bf176b12 Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Sat, 6 Sep 2008 09:03:46 +0200 Subject: mISDN: Add ISDN sample clock API to mISDN core Add ISDN sample clock API to mISDN core (new file clock.c) hfcmulti and mISDNdsp use clock API. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index f75d596c5316..364f1018f0d1 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -371,6 +371,7 @@ struct mISDN_ctrl_req { #define DEBUG_L2_TEI 0x00100000 #define DEBUG_L2_TEIFSM 0x00200000 #define DEBUG_TIMER 0x01000000 +#define DEBUG_CLOCK 0x02000000 #define mISDN_HEAD_P(s) ((struct mISDNhead *)&s->cb[0]) #define mISDN_HEAD_PRIM(s) (((struct mISDNhead *)&s->cb[0])->prim) @@ -384,6 +385,7 @@ struct mISDN_ctrl_req { struct mISDNchannel; struct mISDNdevice; struct mISDNstack; +struct mISDNclock; struct channel_req { u_int protocol; @@ -460,6 +462,16 @@ struct mISDNstack { #endif }; +typedef int (clockctl_func_t)(void *, int); + +struct mISDNclock { + struct list_head list; + char name[64]; + int pri; + clockctl_func_t *ctl; + void *priv; +}; + /* global alloc/queue functions */ static inline struct sk_buff * @@ -510,8 +522,13 @@ extern int mISDN_register_device(struct mISDNdevice *, char *name); extern void mISDN_unregister_device(struct mISDNdevice *); extern int mISDN_register_Bprotocol(struct Bprotocol *); extern void mISDN_unregister_Bprotocol(struct Bprotocol *); +extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *, + void *); +extern void mISDN_unregister_clock(struct mISDNclock *); extern void set_channel_address(struct mISDNchannel *, u_int, u_int); +extern void mISDN_clock_update(struct mISDNclock *, int, struct timeval *); +extern unsigned short mISDN_clock_get(void); #endif /* __KERNEL__ */ #endif /* mISDNIF_H */ -- cgit v1.2.2 From 02282eee56b75a35e6bbc42cc34c9005eb1653f4 Mon Sep 17 00:00:00 2001 From: Martin Bachem Date: Mon, 8 Sep 2008 15:57:48 +0200 Subject: mISDN: Add ISDN_P_TE_UP0 / ISDN_P_NT_UP0 - new layer1 protocols for UP0 bus - helper #defines to test for TE/NT/S0/E1/UP0 Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 364f1018f0d1..7f65aa0c1cc5 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -200,6 +200,18 @@ #define ISDN_P_NT_S0 0x02 #define ISDN_P_TE_E1 0x03 #define ISDN_P_NT_E1 0x04 +#define ISDN_P_TE_UP0 0x05 +#define ISDN_P_NT_UP0 0x06 + +#define IS_ISDN_P_TE(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_TE_E1) || \ + (p == ISDN_P_TE_UP0)) +#define IS_ISDN_P_NT(p) ((p == ISDN_P_NT_S0) || (p == ISDN_P_NT_E1) || \ + (p == ISDN_P_NT_UP0)) +#define IS_ISDN_P_S0(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_NT_S0)) +#define IS_ISDN_P_E1(p) ((p == ISDN_P_TE_E1) || (p == ISDN_P_NT_E1)) +#define IS_ISDN_P_UP0(p) ((p == ISDN_P_TE_UP0) || (p == ISDN_P_NT_UP0)) + + #define ISDN_P_LAPD_TE 0x10 #define ISDN_P_LAPD_NT 0x11 -- cgit v1.2.2 From 1b4d33121f1d991f6ae226cc3333428ff87627bb Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Sun, 14 Sep 2008 12:30:18 +0200 Subject: mISDN: Fix deactivation, if peer IP is removed from l1oip instance. Added GETPEER operation. Socket now checks if device is already busy at a differen mode. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 7f65aa0c1cc5..3f9988849f32 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -204,9 +204,9 @@ #define ISDN_P_NT_UP0 0x06 #define IS_ISDN_P_TE(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_TE_E1) || \ - (p == ISDN_P_TE_UP0)) + (p == ISDN_P_TE_UP0) || (p == ISDN_P_LAPD_TE)) #define IS_ISDN_P_NT(p) ((p == ISDN_P_NT_S0) || (p == ISDN_P_NT_E1) || \ - (p == ISDN_P_NT_UP0)) + (p == ISDN_P_NT_UP0) || (p == ISDN_P_LAPD_NT)) #define IS_ISDN_P_S0(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_NT_S0)) #define IS_ISDN_P_E1(p) ((p == ISDN_P_TE_E1) || (p == ISDN_P_NT_E1)) #define IS_ISDN_P_UP0(p) ((p == ISDN_P_TE_UP0) || (p == ISDN_P_NT_UP0)) @@ -333,6 +333,7 @@ clear_channelmap(u_int nr, u_char *map) #define MISDN_CTRL_UNSETPEER 0x0080 #define MISDN_CTRL_RX_OFF 0x0100 #define MISDN_CTRL_FILL_EMPTY 0x0200 +#define MISDN_CTRL_GETPEER 0x0400 #define MISDN_CTRL_HW_FEATURES_OP 0x2000 #define MISDN_CTRL_HW_FEATURES 0x2001 #define MISDN_CTRL_HFC_OP 0x4000 -- cgit v1.2.2 From b36b654a7e82308cea063cdf909a7f246105c2a3 Mon Sep 17 00:00:00 2001 From: Matthias Urlichs Date: Sat, 16 Aug 2008 00:09:24 +0200 Subject: mISDN: Create /sys/class/mISDN Create /sys/class/mISDN and implement functions to handle device renames. Signed-Off-By: Matthias Urlichs Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 3f9988849f32..d4229aebf648 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -531,7 +531,8 @@ _queue_data(struct mISDNchannel *ch, u_int prim, /* global register/unregister functions */ -extern int mISDN_register_device(struct mISDNdevice *, char *name); +extern int mISDN_register_device(struct mISDNdevice *, + struct device *parent, char *name); extern void mISDN_unregister_device(struct mISDNdevice *); extern int mISDN_register_Bprotocol(struct Bprotocol *); extern void mISDN_unregister_Bprotocol(struct Bprotocol *); @@ -539,6 +540,11 @@ extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *, void *); extern void mISDN_unregister_clock(struct mISDNclock *); +static inline struct mISDNdevice *dev_to_mISDN(struct device *dev) +{ + return dev_get_drvdata(dev); +} + extern void set_channel_address(struct mISDNchannel *, u_int, u_int); extern void mISDN_clock_update(struct mISDNclock *, int, struct timeval *); extern unsigned short mISDN_clock_get(void); -- cgit v1.2.2 From 3f75e84a6a697c5cffb78ee15e79498a35473e05 Mon Sep 17 00:00:00 2001 From: Martin Bachem Date: Tue, 4 Nov 2008 14:11:22 +0100 Subject: mISDN: Add layer1 prim MPH_INFORMATION_REQ MPH_INFORMATION provides full D- and B-Channel status overview - new layer1 primitive: MPF_INFORMATON_REQ - layer1 replies with MPH_INFORMATION_IND containing - dch->[state,Flags,nrbchan] - bch[]->[protocol,Flags] - hardware driver should send MPH_INFORMATION_IND on all ph state changes and BChannel state changes to MISDN_ID_ANY Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index d4229aebf648..557477ac3d5b 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -289,6 +289,23 @@ struct mISDN_devrename { char name[MISDN_MAX_IDLEN]; /* new name */ }; +/* MPH_INFORMATION_REQ payload */ +struct ph_info_ch { + __u32 protocol; + __u64 Flags; +}; + +struct ph_info_dch { + struct ph_info_ch ch; + __u16 state; + __u16 num_bch; +}; + +struct ph_info { + struct ph_info_dch dch; + struct ph_info_ch bch[]; +}; + /* timer device ioctl */ #define IMADDTIMER _IOR('I', 64, int) #define IMDELTIMER _IOR('I', 65, int) -- cgit v1.2.2 From 4db8e282f2d1dfa43d51ce2a4817901312c9134d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 14:32:46 -0800 Subject: Revert "driver core: move knode_bus into private structure" This reverts commit b9daa99ee533578e3f88231e7a16784dcb44ec42. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 7d9da4b4993f..8987f4776064 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,6 +366,7 @@ struct device_dma_parameters { }; struct device { + struct klist_node knode_bus; struct device *parent; struct device_private *p; -- cgit v1.2.2 From cda5e83fdea476dce9c0a9b1152cd6ca46832cc4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 14:44:18 -0800 Subject: Revert "driver core: move knode_driver into private structure" This reverts commit 93e746db183b3bdbbda67900f79b5835f9cb388f. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 8987f4776064..c66ceb15acd8 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,6 +366,7 @@ struct device_dma_parameters { }; struct device { + struct klist_node knode_driver; struct klist_node knode_bus; struct device *parent; -- cgit v1.2.2 From e2d4077678c7ec7661003c268120582adc544897 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 14:55:37 -0800 Subject: Revert "driver core: move klist_children into private structure" This reverts commit 11c3b5c3e08f4d855cbef52883c266b9ab9df879. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index c66ceb15acd8..2975351635d3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,6 +366,8 @@ struct device_dma_parameters { }; struct device { + struct klist klist_children; + struct klist_node knode_parent; /* node in sibling list */ struct klist_node knode_driver; struct klist_node knode_bus; struct device *parent; -- cgit v1.2.2 From 926beadb3dfaddccb3348a5b9e6c2a1f8290a220 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 15:06:12 -0800 Subject: Revert "driver core: create a private portion of struct device" This reverts commit 2831fe6f9cc4e16c103504ee09a47a084297c0f3. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 2975351635d3..45e5b1921fbb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -28,7 +28,6 @@ #define BUS_ID_SIZE 20 struct device; -struct device_private; struct device_driver; struct driver_private; struct class; @@ -372,8 +371,6 @@ struct device { struct klist_node knode_bus; struct device *parent; - struct device_private *p; - struct kobject kobj; char bus_id[BUS_ID_SIZE]; /* position on parent bus */ unsigned uevent_suppress:1; -- cgit v1.2.2 From 85c210edc46d602a1562aeea0fc74919349c8cf0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 9 Jan 2009 16:40:53 -0800 Subject: compiler-gcc.h: add more comments to RELOC_HIDE Requested by C. Lameter Signed-off-by: Andi Kleen Cc: Christoph Lameter Cc: Andi Kleen Cc: Rusty Russell Cc: Stephen Rothwell Cc: Mike Travis Cc: Ingo Molnar Cc: Richard Henderson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index af40f8eb86f0..1514d534deeb 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -11,9 +11,19 @@ /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") -/* This macro obfuscates arithmetic on a variable address so that gcc - shouldn't recognize the original var, and make assumptions about it */ /* + * This macro obfuscates arithmetic on a variable address so that gcc + * shouldn't recognize the original var, and make assumptions about it. + * + * This is needed because the C standard makes it undefined to do + * pointer arithmetic on "objects" outside their boundaries and the + * gcc optimizers assume this is the case. In particular they + * assume such arithmetic does not wrap. + * + * A miscompilation has been observed because of this on PPC. + * To work around it we hide the relationship of the pointer and the object + * using this macro. + * * Versions of the ppc64 compiler before 4.1 had a bug where use of * RELOC_HIDE could trash r30. The bug can be worked around by changing * the inline assembly constraint from =g to =r, in this particular -- cgit v1.2.2 From 69347a236b22c3962ea812511495e502dedfd50c Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 9 Jan 2009 16:40:56 -0800 Subject: memstick: annotate endianness of attribute structs The code was shifting the endianness appropriately everywhere, annotate the structs to avoid the sparse warnings when assigning the endian types to the struct members, or passing them to be[16|32]_to_cpu: drivers/memstick/core/mspro_block.c:331:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:333:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:335:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:337:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:341:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:347:4: warning: cast to restricted __be32 drivers/memstick/core/mspro_block.c:356:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:358:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:364:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:367:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:369:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:371:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:377:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:478:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:480:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:482:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:484:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:486:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:689:22: expected unsigned int [unsigned] [assigned] data_address drivers/memstick/core/mspro_block.c:689:22: got restricted __be32 [usertype] drivers/memstick/core/mspro_block.c:697:3: warning: cast to restricted __be32 drivers/memstick/core/mspro_block.c:960:17: warning: incorrect type in initializer (different base types) drivers/memstick/core/mspro_block.c:960:17: expected unsigned short [unsigned] data_count drivers/memstick/core/mspro_block.c:960:17: got restricted __be16 [usertype] drivers/memstick/core/mspro_block.c:993:6: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:995:28: warning: cast to restricted __be16 Signed-off-by: Harvey Harrison Cc: Alex Dubov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memstick.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memstick.h b/include/linux/memstick.h index d0c37e682234..690c35a9d4cc 100644 --- a/include/linux/memstick.h +++ b/include/linux/memstick.h @@ -100,8 +100,8 @@ struct mspro_param_register { #define MEMSTICK_SYS_PAR8 0x40 #define MEMSTICK_SYS_SERIAL 0x80 - unsigned short data_count; - unsigned int data_address; + __be16 data_count; + __be32 data_address; unsigned char tpc_param; } __attribute__((packed)); -- cgit v1.2.2 From c4be0c1dc4cdc37b175579be1460f15ac6495e9a Mon Sep 17 00:00:00 2001 From: Takashi Sato Date: Fri, 9 Jan 2009 16:40:58 -0800 Subject: filesystem freeze: add error handling of write_super_lockfs/unlockfs Currently, ext3 in mainline Linux doesn't have the freeze feature which suspends write requests. So, we cannot take a backup which keeps the filesystem's consistency with the storage device's features (snapshot and replication) while it is mounted. In many case, a commercial filesystem (e.g. VxFS) has the freeze feature and it would be used to get the consistent backup. If Linux's standard filesystem ext3 has the freeze feature, we can do it without a commercial filesystem. So I have implemented the ioctls of the freeze feature. I think we can take the consistent backup with the following steps. 1. Freeze the filesystem with the freeze ioctl. 2. Separate the replication volume or create the snapshot with the storage device's feature. 3. Unfreeze the filesystem with the unfreeze ioctl. 4. Take the backup from the separated replication volume or the snapshot. This patch: VFS: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they can return an error. Rename write_super_lockfs and unlockfs of the super block operation freeze_fs and unfreeze_fs to avoid a confusion. ext3, ext4, xfs, gfs2, jfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that write_super_lockfs returns an error if needed, and unlockfs always returns 0. reiserfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they always return 0 (success) to keep a current behavior. Signed-off-by: Takashi Sato Signed-off-by: Masayuki Hamaguchi Cc: Cc: Cc: Christoph Hellwig Cc: Dave Kleikamp Cc: Dave Chinner Cc: Alasdair G Kergon Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0b87b29f4797..3e59182de9df 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1377,8 +1377,8 @@ struct super_operations { void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - void (*write_super_lockfs) (struct super_block *); - void (*unlockfs) (struct super_block *); + int (*freeze_fs) (struct super_block *); + int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); -- cgit v1.2.2 From fcccf502540e3d752d33b2d8e976034dee81f9f7 Mon Sep 17 00:00:00 2001 From: Takashi Sato Date: Fri, 9 Jan 2009 16:40:59 -0800 Subject: filesystem freeze: implement generic freeze feature The ioctls for the generic freeze feature are below. o Freeze the filesystem int ioctl(int fd, int FIFREEZE, arg) fd: The file descriptor of the mountpoint FIFREEZE: request code for the freeze arg: Ignored Return value: 0 if the operation succeeds. Otherwise, -1 o Unfreeze the filesystem int ioctl(int fd, int FITHAW, arg) fd: The file descriptor of the mountpoint FITHAW: request code for unfreeze arg: Ignored Return value: 0 if the operation succeeds. Otherwise, -1 Error number: If the filesystem has already been unfrozen, errno is set to EINVAL. [akpm@linux-foundation.org: fix CONFIG_BLOCK=n] Signed-off-by: Takashi Sato Signed-off-by: Masayuki Hamaguchi Cc: Cc: Cc: Christoph Hellwig Cc: Dave Kleikamp Cc: Dave Chinner Cc: Alasdair G Kergon Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 11 ++++++++++- include/linux/fs.h | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 8605f8a74df9..bd7ac793be19 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -171,7 +171,7 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); int fsync_bdev(struct block_device *); struct super_block *freeze_bdev(struct block_device *); -void thaw_bdev(struct block_device *, struct super_block *); +int thaw_bdev(struct block_device *, struct super_block *); int fsync_super(struct super_block *); int fsync_no_super(struct block_device *); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, @@ -346,6 +346,15 @@ static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } static inline void invalidate_bdev(struct block_device *bdev) {} +static inline struct super_block *freeze_bdev(struct block_device *sb) +{ + return NULL; +} + +static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + return 0; +} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 3e59182de9df..6022f44043f2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -234,6 +234,8 @@ struct inodes_stat_t { #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ +#define FIFREEZE _IOWR('X', 119, int) /* Freeze */ +#define FITHAW _IOWR('X', 120, int) /* Thaw */ #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_IOC_SETFLAGS _IOW('f', 2, long) @@ -591,6 +593,11 @@ struct block_device { * care to not mess up bd_private for that case. */ unsigned long bd_private; + + /* The counter of freeze processes */ + int bd_fsfreeze_count; + /* Mutex for freeze */ + struct mutex bd_fsfreeze_mutex; }; /* -- cgit v1.2.2 From f4b477c47332367d35686bd2b808c2156b96d7c7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 10 Jan 2009 11:12:09 +0000 Subject: rbtree: add const qualifier to some functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'rb_first()', 'rb_last()', 'rb_next()' and 'rb_prev()' calls take a pointer to an RB node or RB root. They do not change the pointed objects, so add a 'const' qualifier in order to make life of the users of these functions easier. Indeed, if I have my own constant pointer &const struct my_type *p, and I call 'rb_next(&p->rb)', I get a GCC warning: warning: passing argument 1 of ‘rb_next’ discards qualifiers from pointer target type Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- include/linux/rbtree.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 344bc3495ddb..9c295411d01f 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -140,10 +140,10 @@ extern void rb_insert_color(struct rb_node *, struct rb_root *); extern void rb_erase(struct rb_node *, struct rb_root *); /* Find logical next and previous nodes in a tree */ -extern struct rb_node *rb_next(struct rb_node *); -extern struct rb_node *rb_prev(struct rb_node *); -extern struct rb_node *rb_first(struct rb_root *); -extern struct rb_node *rb_last(struct rb_root *); +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, -- cgit v1.2.2 From 886ad09fc83342aa1c5a02a0b6d3298b78a8067f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 9 Jan 2009 15:54:07 -0800 Subject: libata: Add a per-host flag to opt-in into parallel port probes This patch adds a per host flag that allows drivers to opt in into having its busses scanned in parallel. Drivers that do not set this flag get their ports scanned in the "original" sequence. Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 4f7c8fb4d3fe..b6b8a7f3ec66 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -239,6 +239,7 @@ enum { /* host set flags */ ATA_HOST_SIMPLEX = (1 << 0), /* Host is simplex, one DMA channel per host only */ ATA_HOST_STARTED = (1 << 1), /* Host started */ + ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */ /* bits 24:31 of host->flags are reserved for LLD specific flags */ -- cgit v1.2.2