aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/testing/sysfs-bus-rbd83
-rw-r--r--Documentation/ABI/testing/sysfs-platform-asus-laptop16
-rw-r--r--Documentation/ABI/testing/sysfs-platform-eeepc-wmi10
-rw-r--r--Documentation/accounting/getdelays.c1
-rw-r--r--Documentation/driver-model/interface.txt129
-rw-r--r--Documentation/feature-removal-schedule.txt11
-rw-r--r--Documentation/filesystems/Locking219
-rw-r--r--Documentation/filesystems/vfs.txt16
-rw-r--r--Documentation/kernel-parameters.txt7
-rw-r--r--Documentation/power/runtime_pm.txt4
-rw-r--r--Documentation/scsi/scsi_mid_low_api.txt59
-rw-r--r--Documentation/trace/postprocess/trace-vmscan-postprocess.pl11
12 files changed, 281 insertions, 285 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd
new file mode 100644
index 000000000000..90a87e2a572b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-rbd
@@ -0,0 +1,83 @@
1What: /sys/bus/rbd/
2Date: November 2010
3Contact: Yehuda Sadeh <yehuda@hq.newdream.net>,
4 Sage Weil <sage@newdream.net>
5Description:
6
7Being used for adding and removing rbd block devices.
8
9Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name]
10
11 $ echo "192.168.0.1 name=admin rbd foo" > /sys/bus/rbd/add
12
13The snapshot name can be "-" or omitted to map the image read/write. A <dev-id>
14will be assigned for any registered block device. If snapshot is used, it will
15be mapped read-only.
16
17Removal of a device:
18
19 $ echo <dev-id> > /sys/bus/rbd/remove
20
21Entries under /sys/bus/rbd/devices/<dev-id>/
22--------------------------------------------
23
24client_id
25
26 The ceph unique client id that was assigned for this specific session.
27
28major
29
30 The block device major number.
31
32name
33
34 The name of the rbd image.
35
36pool
37
38 The pool where this rbd image resides. The pool-name pair is unique
39 per rados system.
40
41size
42
43 The size (in bytes) of the mapped block device.
44
45refresh
46
47 Writing to this file will reread the image header data and set
48 all relevant datastructures accordingly.
49
50current_snap
51
52 The current snapshot for which the device is mapped.
53
54create_snap
55
56 Create a snapshot:
57
58 $ echo <snap-name> > /sys/bus/rbd/devices/<dev-id>/snap_create
59
60rollback_snap
61
62 Rolls back data to the specified snapshot. This goes over the entire
63 list of rados blocks and sends a rollback command to each.
64
65 $ echo <snap-name> > /sys/bus/rbd/devices/<dev-id>/snap_rollback
66
67snap_*
68
69 A directory per each snapshot
70
71
72Entries under /sys/bus/rbd/devices/<dev-id>/snap_<snap-name>
73-------------------------------------------------------------
74
75id
76
77 The rados internal snapshot id assigned for this snapshot
78
79size
80
81 The size of the image when this snapshot was taken.
82
83
diff --git a/Documentation/ABI/testing/sysfs-platform-asus-laptop b/Documentation/ABI/testing/sysfs-platform-asus-laptop
index 1d775390e856..41ff8ae4dee0 100644
--- a/Documentation/ABI/testing/sysfs-platform-asus-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-asus-laptop
@@ -47,6 +47,20 @@ Date: January 2007
47KernelVersion: 2.6.20 47KernelVersion: 2.6.20
48Contact: "Corentin Chary" <corentincj@iksaif.net> 48Contact: "Corentin Chary" <corentincj@iksaif.net>
49Description: 49Description:
50 Control the bluetooth device. 1 means on, 0 means off. 50 Control the wlan device. 1 means on, 0 means off.
51 This may control the led, the device or both. 51 This may control the led, the device or both.
52Users: Lapsus 52Users: Lapsus
53
54What: /sys/devices/platform/asus_laptop/wimax
55Date: October 2010
56KernelVersion: 2.6.37
57Contact: "Corentin Chary" <corentincj@iksaif.net>
58Description:
59 Control the wimax device. 1 means on, 0 means off.
60
61What: /sys/devices/platform/asus_laptop/wwan
62Date: October 2010
63KernelVersion: 2.6.37
64Contact: "Corentin Chary" <corentincj@iksaif.net>
65Description:
66 Control the wwan (3G) device. 1 means on, 0 means off.
diff --git a/Documentation/ABI/testing/sysfs-platform-eeepc-wmi b/Documentation/ABI/testing/sysfs-platform-eeepc-wmi
new file mode 100644
index 000000000000..e4b5fef5fadd
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-eeepc-wmi
@@ -0,0 +1,10 @@
1What: /sys/devices/platform/eeepc-wmi/cpufv
2Date: Oct 2010
3KernelVersion: 2.6.37
4Contact: "Corentin Chary" <corentincj@iksaif.net>
5Description:
6 Change CPU clock configuration (write-only).
7 There are three available clock configuration:
8 * 0 -> Super Performance Mode
9 * 1 -> High Performance Mode
10 * 2 -> Power Saving Mode
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index a2976a6de033..e9c77788a39d 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -516,6 +516,7 @@ int main(int argc, char *argv[])
516 default: 516 default:
517 fprintf(stderr, "Unknown nla_type %d\n", 517 fprintf(stderr, "Unknown nla_type %d\n",
518 na->nla_type); 518 na->nla_type);
519 case TASKSTATS_TYPE_NULL:
519 break; 520 break;
520 } 521 }
521 na = (struct nlattr *) (GENLMSG_DATA(&msg) + len); 522 na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
diff --git a/Documentation/driver-model/interface.txt b/Documentation/driver-model/interface.txt
deleted file mode 100644
index c66912bfe866..000000000000
--- a/Documentation/driver-model/interface.txt
+++ /dev/null
@@ -1,129 +0,0 @@
1
2Device Interfaces
3
4Introduction
5~~~~~~~~~~~~
6
7Device interfaces are the logical interfaces of device classes that correlate
8directly to userspace interfaces, like device nodes.
9
10Each device class may have multiple interfaces through which you can
11access the same device. An input device may support the mouse interface,
12the 'evdev' interface, and the touchscreen interface. A SCSI disk would
13support the disk interface, the SCSI generic interface, and possibly a raw
14device interface.
15
16Device interfaces are registered with the class they belong to. As devices
17are added to the class, they are added to each interface registered with
18the class. The interface is responsible for determining whether the device
19supports the interface or not.
20
21
22Programming Interface
23~~~~~~~~~~~~~~~~~~~~~
24
25struct device_interface {
26 char * name;
27 rwlock_t lock;
28 u32 devnum;
29 struct device_class * devclass;
30
31 struct list_head node;
32 struct driver_dir_entry dir;
33
34 int (*add_device)(struct device *);
35 int (*add_device)(struct intf_data *);
36};
37
38int interface_register(struct device_interface *);
39void interface_unregister(struct device_interface *);
40
41
42An interface must specify the device class it belongs to. It is added
43to that class's list of interfaces on registration.
44
45
46Interfaces can be added to a device class at any time. Whenever it is
47added, each device in the class is passed to the interface's
48add_device callback. When an interface is removed, each device is
49removed from the interface.
50
51
52Devices
53~~~~~~~
54Once a device is added to a device class, it is added to each
55interface that is registered with the device class. The class
56is expected to place a class-specific data structure in
57struct device::class_data. The interface can use that (along with
58other fields of struct device) to determine whether or not the driver
59and/or device support that particular interface.
60
61
62Data
63~~~~
64
65struct intf_data {
66 struct list_head node;
67 struct device_interface * intf;
68 struct device * dev;
69 u32 intf_num;
70};
71
72int interface_add_data(struct interface_data *);
73
74The interface is responsible for allocating and initializing a struct
75intf_data and calling interface_add_data() to add it to the device's list
76of interfaces it belongs to. This list will be iterated over when the device
77is removed from the class (instead of all possible interfaces for a class).
78This structure should probably be embedded in whatever per-device data
79structure the interface is allocating anyway.
80
81Devices are enumerated within the interface. This happens in interface_add_data()
82and the enumerated value is stored in the struct intf_data for that device.
83
84sysfs
85~~~~~
86Each interface is given a directory in the directory of the device
87class it belongs to:
88
89Interfaces get a directory in the class's directory as well:
90
91 class/
92 `-- input
93 |-- devices
94 |-- drivers
95 |-- mouse
96 `-- evdev
97
98When a device is added to the interface, a symlink is created that points
99to the device's directory in the physical hierarchy:
100
101 class/
102 `-- input
103 |-- devices
104 | `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/
105 |-- drivers
106 | `-- usb:usb_mouse -> ../../../bus/drivers/usb_mouse/
107 |-- mouse
108 | `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/
109 `-- evdev
110 `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/
111
112
113Future Plans
114~~~~~~~~~~~~
115A device interface is correlated directly with a userspace interface
116for a device, specifically a device node. For instance, a SCSI disk
117exposes at least two interfaces to userspace: the standard SCSI disk
118interface and the SCSI generic interface. It might also export a raw
119device interface.
120
121Many interfaces have a major number associated with them and each
122device gets a minor number. Or, multiple interfaces might share one
123major number, and each will receive a range of minor numbers (like in
124the case of input devices).
125
126These major and minor numbers could be stored in the interface
127structure. Major and minor allocations could happen when the interface
128is registered with the class, or via a helper function.
129
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 6c2f55e05f13..f281532a15ce 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -232,6 +232,17 @@ Who: Zhang Rui <rui.zhang@intel.com>
232 232
233--------------------------- 233---------------------------
234 234
235What: CONFIG_ACPI_PROCFS_POWER
236When: 2.6.39
237Why: sysfs I/F for ACPI power devices, including AC and Battery,
238 has been working in upstream kenrel since 2.6.24, Sep 2007.
239 In 2.6.37, we make the sysfs I/F always built in and this option
240 disabled by default.
241 Remove this option and the ACPI power procfs interface in 2.6.39.
242Who: Zhang Rui <rui.zhang@intel.com>
243
244---------------------------
245
235What: /proc/acpi/button 246What: /proc/acpi/button
236When: August 2007 247When: August 2007
237Why: /proc/acpi/button has been replaced by events to the input layer 248Why: /proc/acpi/button has been replaced by events to the input layer
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index a91f30890011..33fa3e5d38fd 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -18,7 +18,6 @@ prototypes:
18 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); 18 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
19 19
20locking rules: 20locking rules:
21 none have BKL
22 dcache_lock rename_lock ->d_lock may block 21 dcache_lock rename_lock ->d_lock may block
23d_revalidate: no no no yes 22d_revalidate: no no no yes
24d_hash no no no yes 23d_hash no no no yes
@@ -42,18 +41,23 @@ ata *);
42 int (*rename) (struct inode *, struct dentry *, 41 int (*rename) (struct inode *, struct dentry *,
43 struct inode *, struct dentry *); 42 struct inode *, struct dentry *);
44 int (*readlink) (struct dentry *, char __user *,int); 43 int (*readlink) (struct dentry *, char __user *,int);
45 int (*follow_link) (struct dentry *, struct nameidata *); 44 void * (*follow_link) (struct dentry *, struct nameidata *);
45 void (*put_link) (struct dentry *, struct nameidata *, void *);
46 void (*truncate) (struct inode *); 46 void (*truncate) (struct inode *);
47 int (*permission) (struct inode *, int, struct nameidata *); 47 int (*permission) (struct inode *, int, struct nameidata *);
48 int (*check_acl)(struct inode *, int);
48 int (*setattr) (struct dentry *, struct iattr *); 49 int (*setattr) (struct dentry *, struct iattr *);
49 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); 50 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
50 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 51 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
51 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 52 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
52 ssize_t (*listxattr) (struct dentry *, char *, size_t); 53 ssize_t (*listxattr) (struct dentry *, char *, size_t);
53 int (*removexattr) (struct dentry *, const char *); 54 int (*removexattr) (struct dentry *, const char *);
55 void (*truncate_range)(struct inode *, loff_t, loff_t);
56 long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len);
57 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
54 58
55locking rules: 59locking rules:
56 all may block, none have BKL 60 all may block
57 i_mutex(inode) 61 i_mutex(inode)
58lookup: yes 62lookup: yes
59create: yes 63create: yes
@@ -66,19 +70,24 @@ rmdir: yes (both) (see below)
66rename: yes (all) (see below) 70rename: yes (all) (see below)
67readlink: no 71readlink: no
68follow_link: no 72follow_link: no
73put_link: no
69truncate: yes (see below) 74truncate: yes (see below)
70setattr: yes 75setattr: yes
71permission: no 76permission: no
77check_acl: no
72getattr: no 78getattr: no
73setxattr: yes 79setxattr: yes
74getxattr: no 80getxattr: no
75listxattr: no 81listxattr: no
76removexattr: yes 82removexattr: yes
83truncate_range: yes
84fallocate: no
85fiemap: no
77 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on 86 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
78victim. 87victim.
79 cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. 88 cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
80 ->truncate() is never called directly - it's a callback, not a 89 ->truncate() is never called directly - it's a callback, not a
81method. It's called by vmtruncate() - library function normally used by 90method. It's called by vmtruncate() - deprecated library function used by
82->setattr(). Locking information above applies to that call (i.e. is 91->setattr(). Locking information above applies to that call (i.e. is
83inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been 92inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been
84passed). 93passed).
@@ -91,7 +100,7 @@ prototypes:
91 struct inode *(*alloc_inode)(struct super_block *sb); 100 struct inode *(*alloc_inode)(struct super_block *sb);
92 void (*destroy_inode)(struct inode *); 101 void (*destroy_inode)(struct inode *);
93 void (*dirty_inode) (struct inode *); 102 void (*dirty_inode) (struct inode *);
94 int (*write_inode) (struct inode *, int); 103 int (*write_inode) (struct inode *, struct writeback_control *wbc);
95 int (*drop_inode) (struct inode *); 104 int (*drop_inode) (struct inode *);
96 void (*evict_inode) (struct inode *); 105 void (*evict_inode) (struct inode *);
97 void (*put_super) (struct super_block *); 106 void (*put_super) (struct super_block *);
@@ -105,10 +114,10 @@ prototypes:
105 int (*show_options)(struct seq_file *, struct vfsmount *); 114 int (*show_options)(struct seq_file *, struct vfsmount *);
106 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 115 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
107 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 116 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
117 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
108 118
109locking rules: 119locking rules:
110 All may block [not true, see below] 120 All may block [not true, see below]
111 None have BKL
112 s_umount 121 s_umount
113alloc_inode: 122alloc_inode:
114destroy_inode: 123destroy_inode:
@@ -127,6 +136,7 @@ umount_begin: no
127show_options: no (namespace_sem) 136show_options: no (namespace_sem)
128quota_read: no (see below) 137quota_read: no (see below)
129quota_write: no (see below) 138quota_write: no (see below)
139bdev_try_to_free_page: no (see below)
130 140
131->statfs() has s_umount (shared) when called by ustat(2) (native or 141->statfs() has s_umount (shared) when called by ustat(2) (native or
132compat), but that's an accident of bad API; s_umount is used to pin 142compat), but that's an accident of bad API; s_umount is used to pin
@@ -139,19 +149,25 @@ be the only ones operating on the quota file by the quota code (via
139dqio_sem) (unless an admin really wants to screw up something and 149dqio_sem) (unless an admin really wants to screw up something and
140writes to quota files with quotas on). For other details about locking 150writes to quota files with quotas on). For other details about locking
141see also dquot_operations section. 151see also dquot_operations section.
152->bdev_try_to_free_page is called from the ->releasepage handler of
153the block device inode. See there for more details.
142 154
143--------------------------- file_system_type --------------------------- 155--------------------------- file_system_type ---------------------------
144prototypes: 156prototypes:
145 int (*get_sb) (struct file_system_type *, int, 157 int (*get_sb) (struct file_system_type *, int,
146 const char *, void *, struct vfsmount *); 158 const char *, void *, struct vfsmount *);
159 struct dentry *(*mount) (struct file_system_type *, int,
160 const char *, void *);
147 void (*kill_sb) (struct super_block *); 161 void (*kill_sb) (struct super_block *);
148locking rules: 162locking rules:
149 may block BKL 163 may block
150get_sb yes no 164get_sb yes
151kill_sb yes no 165mount yes
166kill_sb yes
152 167
153->get_sb() returns error or 0 with locked superblock attached to the vfsmount 168->get_sb() returns error or 0 with locked superblock attached to the vfsmount
154(exclusive on ->s_umount). 169(exclusive on ->s_umount).
170->mount() returns ERR_PTR or the root dentry.
155->kill_sb() takes a write-locked superblock, does all shutdown work on it, 171->kill_sb() takes a write-locked superblock, does all shutdown work on it,
156unlocks and drops the reference. 172unlocks and drops the reference.
157 173
@@ -173,28 +189,38 @@ prototypes:
173 sector_t (*bmap)(struct address_space *, sector_t); 189 sector_t (*bmap)(struct address_space *, sector_t);
174 int (*invalidatepage) (struct page *, unsigned long); 190 int (*invalidatepage) (struct page *, unsigned long);
175 int (*releasepage) (struct page *, int); 191 int (*releasepage) (struct page *, int);
192 void (*freepage)(struct page *);
176 int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 193 int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
177 loff_t offset, unsigned long nr_segs); 194 loff_t offset, unsigned long nr_segs);
178 int (*launder_page) (struct page *); 195 int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
196 unsigned long *);
197 int (*migratepage)(struct address_space *, struct page *, struct page *);
198 int (*launder_page)(struct page *);
199 int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long);
200 int (*error_remove_page)(struct address_space *, struct page *);
179 201
180locking rules: 202locking rules:
181 All except set_page_dirty may block 203 All except set_page_dirty and freepage may block
182 204
183 BKL PageLocked(page) i_mutex 205 PageLocked(page) i_mutex
184writepage: no yes, unlocks (see below) 206writepage: yes, unlocks (see below)
185readpage: no yes, unlocks 207readpage: yes, unlocks
186sync_page: no maybe 208sync_page: maybe
187writepages: no 209writepages:
188set_page_dirty no no 210set_page_dirty no
189readpages: no 211readpages:
190write_begin: no locks the page yes 212write_begin: locks the page yes
191write_end: no yes, unlocks yes 213write_end: yes, unlocks yes
192perform_write: no n/a yes 214bmap:
193bmap: no 215invalidatepage: yes
194invalidatepage: no yes 216releasepage: yes
195releasepage: no yes 217freepage: yes
196direct_IO: no 218direct_IO:
197launder_page: no yes 219get_xip_mem: maybe
220migratepage: yes (both)
221launder_page: yes
222is_partially_uptodate: yes
223error_remove_page: yes
198 224
199 ->write_begin(), ->write_end(), ->sync_page() and ->readpage() 225 ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
200may be called from the request handler (/dev/loop). 226may be called from the request handler (/dev/loop).
@@ -274,9 +300,8 @@ under spinlock (it cannot block) and is sometimes called with the page
274not locked. 300not locked.
275 301
276 ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some 302 ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
277filesystems and by the swapper. The latter will eventually go away. All 303filesystems and by the swapper. The latter will eventually go away. Please,
278instances do not actually need the BKL. Please, keep it that way and don't 304keep it that way and don't breed new callers.
279breed new callers.
280 305
281 ->invalidatepage() is called when the filesystem must attempt to drop 306 ->invalidatepage() is called when the filesystem must attempt to drop
282some or all of the buffers from the page when it is being truncated. It 307some or all of the buffers from the page when it is being truncated. It
@@ -288,53 +313,46 @@ buffers from the page in preparation for freeing it. It returns zero to
288indicate that the buffers are (or may be) freeable. If ->releasepage is zero, 313indicate that the buffers are (or may be) freeable. If ->releasepage is zero,
289the kernel assumes that the fs has no private interest in the buffers. 314the kernel assumes that the fs has no private interest in the buffers.
290 315
316 ->freepage() is called when the kernel is done dropping the page
317from the page cache.
318
291 ->launder_page() may be called prior to releasing a page if 319 ->launder_page() may be called prior to releasing a page if
292it is still found to be dirty. It returns zero if the page was successfully 320it is still found to be dirty. It returns zero if the page was successfully
293cleaned, or an error value if not. Note that in order to prevent the page 321cleaned, or an error value if not. Note that in order to prevent the page
294getting mapped back in and redirtied, it needs to be kept locked 322getting mapped back in and redirtied, it needs to be kept locked
295across the entire operation. 323across the entire operation.
296 324
297 Note: currently almost all instances of address_space methods are
298using BKL for internal serialization and that's one of the worst sources
299of contention. Normally they are calling library functions (in fs/buffer.c)
300and pass foo_get_block() as a callback (on local block-based filesystems,
301indeed). BKL is not needed for library stuff and is usually taken by
302foo_get_block(). It's an overkill, since block bitmaps can be protected by
303internal fs locking and real critical areas are much smaller than the areas
304filesystems protect now.
305
306----------------------- file_lock_operations ------------------------------ 325----------------------- file_lock_operations ------------------------------
307prototypes: 326prototypes:
308 void (*fl_insert)(struct file_lock *); /* lock insertion callback */
309 void (*fl_remove)(struct file_lock *); /* lock removal callback */
310 void (*fl_copy_lock)(struct file_lock *, struct file_lock *); 327 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
311 void (*fl_release_private)(struct file_lock *); 328 void (*fl_release_private)(struct file_lock *);
312 329
313 330
314locking rules: 331locking rules:
315 BKL may block 332 file_lock_lock may block
316fl_insert: yes no 333fl_copy_lock: yes no
317fl_remove: yes no 334fl_release_private: maybe no
318fl_copy_lock: yes no
319fl_release_private: yes yes
320 335
321----------------------- lock_manager_operations --------------------------- 336----------------------- lock_manager_operations ---------------------------
322prototypes: 337prototypes:
323 int (*fl_compare_owner)(struct file_lock *, struct file_lock *); 338 int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
324 void (*fl_notify)(struct file_lock *); /* unblock callback */ 339 void (*fl_notify)(struct file_lock *); /* unblock callback */
340 int (*fl_grant)(struct file_lock *, struct file_lock *, int);
325 void (*fl_release_private)(struct file_lock *); 341 void (*fl_release_private)(struct file_lock *);
326 void (*fl_break)(struct file_lock *); /* break_lease callback */ 342 void (*fl_break)(struct file_lock *); /* break_lease callback */
343 int (*fl_mylease)(struct file_lock *, struct file_lock *);
344 int (*fl_change)(struct file_lock **, int);
327 345
328locking rules: 346locking rules:
329 BKL may block 347 file_lock_lock may block
330fl_compare_owner: yes no 348fl_compare_owner: yes no
331fl_notify: yes no 349fl_notify: yes no
332fl_release_private: yes yes 350fl_grant: no no
333fl_break: yes no 351fl_release_private: maybe no
334 352fl_break: yes no
335 Currently only NFSD and NLM provide instances of this class. None of the 353fl_mylease: yes no
336them block. If you have out-of-tree instances - please, show up. Locking 354fl_change yes no
337in that area will change. 355
338--------------------------- buffer_head ----------------------------------- 356--------------------------- buffer_head -----------------------------------
339prototypes: 357prototypes:
340 void (*b_end_io)(struct buffer_head *bh, int uptodate); 358 void (*b_end_io)(struct buffer_head *bh, int uptodate);
@@ -359,17 +377,17 @@ prototypes:
359 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 377 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
360 378
361locking rules: 379locking rules:
362 BKL bd_mutex 380 bd_mutex
363open: no yes 381open: yes
364release: no yes 382release: yes
365ioctl: no no 383ioctl: no
366compat_ioctl: no no 384compat_ioctl: no
367direct_access: no no 385direct_access: no
368media_changed: no no 386media_changed: no
369unlock_native_capacity: no no 387unlock_native_capacity: no
370revalidate_disk: no no 388revalidate_disk: no
371getgeo: no no 389getgeo: no
372swap_slot_free_notify: no no (see below) 390swap_slot_free_notify: no (see below)
373 391
374media_changed, unlock_native_capacity and revalidate_disk are called only from 392media_changed, unlock_native_capacity and revalidate_disk are called only from
375check_disk_change(). 393check_disk_change().
@@ -408,34 +426,21 @@ prototypes:
408 unsigned long (*get_unmapped_area)(struct file *, unsigned long, 426 unsigned long (*get_unmapped_area)(struct file *, unsigned long,
409 unsigned long, unsigned long, unsigned long); 427 unsigned long, unsigned long, unsigned long);
410 int (*check_flags)(int); 428 int (*check_flags)(int);
429 int (*flock) (struct file *, int, struct file_lock *);
430 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
431 size_t, unsigned int);
432 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
433 size_t, unsigned int);
434 int (*setlease)(struct file *, long, struct file_lock **);
411}; 435};
412 436
413locking rules: 437locking rules:
414 All may block. 438 All may block except for ->setlease.
415 BKL 439 No VFS locks held on entry except for ->fsync and ->setlease.
416llseek: no (see below) 440
417read: no 441->fsync() has i_mutex on inode.
418aio_read: no 442
419write: no 443->setlease has the file_list_lock held and must not sleep.
420aio_write: no
421readdir: no
422poll: no
423unlocked_ioctl: no
424compat_ioctl: no
425mmap: no
426open: no
427flush: no
428release: no
429fsync: no (see below)
430aio_fsync: no
431fasync: no
432lock: yes
433readv: no
434writev: no
435sendfile: no
436sendpage: no
437get_unmapped_area: no
438check_flags: no
439 444
440->llseek() locking has moved from llseek to the individual llseek 445->llseek() locking has moved from llseek to the individual llseek
441implementations. If your fs is not using generic_file_llseek, you 446implementations. If your fs is not using generic_file_llseek, you
@@ -445,17 +450,10 @@ mutex or just to use i_size_read() instead.
445Note: this does not protect the file->f_pos against concurrent modifications 450Note: this does not protect the file->f_pos against concurrent modifications
446since this is something the userspace has to take care about. 451since this is something the userspace has to take care about.
447 452
448Note: ext2_release() was *the* source of contention on fs-intensive 453->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
449loads and dropping BKL on ->release() helps to get rid of that (we still 454Most instances call fasync_helper(), which does that maintenance, so it's
450grab BKL for cases when we close a file that had been opened r/w, but that 455not normally something one needs to worry about. Return values > 0 will be
451can and should be done using the internal locking with smaller critical areas). 456mapped to zero in the VFS layer.
452Current worst offender is ext2_get_block()...
453
454->fasync() is called without BKL protection, and is responsible for
455maintaining the FASYNC bit in filp->f_flags. Most instances call
456fasync_helper(), which does that maintenance, so it's not normally
457something one needs to worry about. Return values > 0 will be mapped to
458zero in the VFS layer.
459 457
460->readdir() and ->ioctl() on directories must be changed. Ideally we would 458->readdir() and ->ioctl() on directories must be changed. Ideally we would
461move ->readdir() to inode_operations and use a separate method for directory 459move ->readdir() to inode_operations and use a separate method for directory
@@ -466,8 +464,6 @@ components. And there are other reasons why the current interface is a mess...
466->read on directories probably must go away - we should just enforce -EISDIR 464->read on directories probably must go away - we should just enforce -EISDIR
467in sys_read() and friends. 465in sys_read() and friends.
468 466
469->fsync() has i_mutex on inode.
470
471--------------------------- dquot_operations ------------------------------- 467--------------------------- dquot_operations -------------------------------
472prototypes: 468prototypes:
473 int (*write_dquot) (struct dquot *); 469 int (*write_dquot) (struct dquot *);
@@ -502,12 +498,12 @@ prototypes:
502 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); 498 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
503 499
504locking rules: 500locking rules:
505 BKL mmap_sem PageLocked(page) 501 mmap_sem PageLocked(page)
506open: no yes 502open: yes
507close: no yes 503close: yes
508fault: no yes can return with page locked 504fault: yes can return with page locked
509page_mkwrite: no yes can return with page locked 505page_mkwrite: yes can return with page locked
510access: no yes 506access: yes
511 507
512 ->fault() is called when a previously not present pte is about 508 ->fault() is called when a previously not present pte is about
513to be faulted in. The filesystem must find and return the page associated 509to be faulted in. The filesystem must find and return the page associated
@@ -534,6 +530,3 @@ VM_IO | VM_PFNMAP VMAs.
534 530
535(if you break something or notice that it is broken and do not fix it yourself 531(if you break something or notice that it is broken and do not fix it yourself
536- at least put it here) 532- at least put it here)
537
538ipc/shm.c::shm_delete() - may need BKL.
539->read() and ->write() in many drivers are (probably) missing BKL.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index ed7e5efc06d8..20899e095e7e 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -534,6 +534,7 @@ struct address_space_operations {
534 sector_t (*bmap)(struct address_space *, sector_t); 534 sector_t (*bmap)(struct address_space *, sector_t);
535 int (*invalidatepage) (struct page *, unsigned long); 535 int (*invalidatepage) (struct page *, unsigned long);
536 int (*releasepage) (struct page *, int); 536 int (*releasepage) (struct page *, int);
537 void (*freepage)(struct page *);
537 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 538 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
538 loff_t offset, unsigned long nr_segs); 539 loff_t offset, unsigned long nr_segs);
539 struct page* (*get_xip_page)(struct address_space *, sector_t, 540 struct page* (*get_xip_page)(struct address_space *, sector_t,
@@ -660,11 +661,10 @@ struct address_space_operations {
660 releasepage: releasepage is called on PagePrivate pages to indicate 661 releasepage: releasepage is called on PagePrivate pages to indicate
661 that the page should be freed if possible. ->releasepage 662 that the page should be freed if possible. ->releasepage
662 should remove any private data from the page and clear the 663 should remove any private data from the page and clear the
663 PagePrivate flag. It may also remove the page from the 664 PagePrivate flag. If releasepage() fails for some reason, it must
664 address_space. If this fails for some reason, it may indicate 665 indicate failure with a 0 return value.
665 failure with a 0 return value. 666 releasepage() is used in two distinct though related cases. The
666 This is used in two distinct though related cases. The first 667 first is when the VM finds a clean page with no active users and
667 is when the VM finds a clean page with no active users and
668 wants to make it a free page. If ->releasepage succeeds, the 668 wants to make it a free page. If ->releasepage succeeds, the
669 page will be removed from the address_space and become free. 669 page will be removed from the address_space and become free.
670 670
@@ -679,6 +679,12 @@ struct address_space_operations {
679 need to ensure this. Possibly it can clear the PageUptodate 679 need to ensure this. Possibly it can clear the PageUptodate
680 bit if it cannot free private data yet. 680 bit if it cannot free private data yet.
681 681
682 freepage: freepage is called once the page is no longer visible in
683 the page cache in order to allow the cleanup of any private
684 data. Since it may be called by the memory reclaimer, it
685 should not assume that the original address_space mapping still
686 exists, and it should not block.
687
682 direct_IO: called by the generic read/write routines to perform 688 direct_IO: called by the generic read/write routines to perform
683 direct_IO - that is IO requests which bypass the page cache 689 direct_IO - that is IO requests which bypass the page cache
684 and transfer data directly between the storage and the 690 and transfer data directly between the storage and the
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index cdd2a6e8a3b7..01ece1b9213e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1759,7 +1759,7 @@ and is between 256 and 4096 characters. It is defined in the file
1759 1759
1760 nousb [USB] Disable the USB subsystem 1760 nousb [USB] Disable the USB subsystem
1761 1761
1762 nowatchdog [KNL] Disable the lockup detector. 1762 nowatchdog [KNL] Disable the lockup detector (NMI watchdog).
1763 1763
1764 nowb [ARM] 1764 nowb [ARM]
1765 1765
@@ -2175,11 +2175,6 @@ and is between 256 and 4096 characters. It is defined in the file
2175 reset_devices [KNL] Force drivers to reset the underlying device 2175 reset_devices [KNL] Force drivers to reset the underlying device
2176 during initialization. 2176 during initialization.
2177 2177
2178 resource_alloc_from_bottom
2179 Allocate new resources from the beginning of available
2180 space, not the end. If you need to use this, please
2181 report a bug.
2182
2183 resume= [SWSUSP] 2178 resume= [SWSUSP]
2184 Specify the partition device for software suspend 2179 Specify the partition device for software suspend
2185 2180
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 489e9bacd165..41cc7b30d7dd 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -379,8 +379,8 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
379 zero) 379 zero)
380 380
381 bool pm_runtime_suspended(struct device *dev); 381 bool pm_runtime_suspended(struct device *dev);
382 - return true if the device's runtime PM status is 'suspended', or false 382 - return true if the device's runtime PM status is 'suspended' and its
383 otherwise 383 'power.disable_depth' field is equal to zero, or false otherwise
384 384
385 void pm_runtime_allow(struct device *dev); 385 void pm_runtime_allow(struct device *dev);
386 - set the power.runtime_auto flag for the device and decrease its usage 386 - set the power.runtime_auto flag for the device and decrease its usage
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index 570ef2b3d79b..df322c103466 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -1044,9 +1044,9 @@ Details:
1044 1044
1045 1045
1046/** 1046/**
1047 * queuecommand - queue scsi command, invoke 'done' on completion 1047 * queuecommand - queue scsi command, invoke scp->scsi_done on completion
1048 * @shost: pointer to the scsi host object
1048 * @scp: pointer to scsi command object 1049 * @scp: pointer to scsi command object
1049 * @done: function pointer to be invoked on completion
1050 * 1050 *
1051 * Returns 0 on success. 1051 * Returns 0 on success.
1052 * 1052 *
@@ -1074,42 +1074,45 @@ Details:
1074 * 1074 *
1075 * Other types of errors that are detected immediately may be 1075 * Other types of errors that are detected immediately may be
1076 * flagged by setting scp->result to an appropriate value, 1076 * flagged by setting scp->result to an appropriate value,
1077 * invoking the 'done' callback, and then returning 0 from this 1077 * invoking the scp->scsi_done callback, and then returning 0
1078 * function. If the command is not performed immediately (and the 1078 * from this function. If the command is not performed
1079 * LLD is starting (or will start) the given command) then this 1079 * immediately (and the LLD is starting (or will start) the given
1080 * function should place 0 in scp->result and return 0. 1080 * command) then this function should place 0 in scp->result and
1081 * return 0.
1081 * 1082 *
1082 * Command ownership. If the driver returns zero, it owns the 1083 * Command ownership. If the driver returns zero, it owns the
1083 * command and must take responsibility for ensuring the 'done' 1084 * command and must take responsibility for ensuring the
1084 * callback is executed. Note: the driver may call done before 1085 * scp->scsi_done callback is executed. Note: the driver may
1085 * returning zero, but after it has called done, it may not 1086 * call scp->scsi_done before returning zero, but after it has
1086 * return any value other than zero. If the driver makes a 1087 * called scp->scsi_done, it may not return any value other than
1087 * non-zero return, it must not execute the command's done 1088 * zero. If the driver makes a non-zero return, it must not
1088 * callback at any time. 1089 * execute the command's scsi_done callback at any time.
1089 * 1090 *
1090 * Locks: struct Scsi_Host::host_lock held on entry (with "irqsave") 1091 * Locks: up to and including 2.6.36, struct Scsi_Host::host_lock
1091 * and is expected to be held on return. 1092 * held on entry (with "irqsave") and is expected to be
1093 * held on return. From 2.6.37 onwards, queuecommand is
1094 * called without any locks held.
1092 * 1095 *
1093 * Calling context: in interrupt (soft irq) or process context 1096 * Calling context: in interrupt (soft irq) or process context
1094 * 1097 *
1095 * Notes: This function should be relatively fast. Normally it will 1098 * Notes: This function should be relatively fast. Normally it
1096 * not wait for IO to complete. Hence the 'done' callback is invoked 1099 * will not wait for IO to complete. Hence the scp->scsi_done
1097 * (often directly from an interrupt service routine) some time after 1100 * callback is invoked (often directly from an interrupt service
1098 * this function has returned. In some cases (e.g. pseudo adapter 1101 * routine) some time after this function has returned. In some
1099 * drivers that manufacture the response to a SCSI INQUIRY) 1102 * cases (e.g. pseudo adapter drivers that manufacture the
1100 * the 'done' callback may be invoked before this function returns. 1103 * response to a SCSI INQUIRY) the scp->scsi_done callback may be
1101 * If the 'done' callback is not invoked within a certain period 1104 * invoked before this function returns. If the scp->scsi_done
1102 * the SCSI mid level will commence error processing. 1105 * callback is not invoked within a certain period the SCSI mid
1103 * If a status of CHECK CONDITION is placed in "result" when the 1106 * level will commence error processing. If a status of CHECK
1104 * 'done' callback is invoked, then the LLD driver should 1107 * CONDITION is placed in "result" when the scp->scsi_done
1105 * perform autosense and fill in the struct scsi_cmnd::sense_buffer 1108 * callback is invoked, then the LLD driver should perform
1109 * autosense and fill in the struct scsi_cmnd::sense_buffer
1106 * array. The scsi_cmnd::sense_buffer array is zeroed prior to 1110 * array. The scsi_cmnd::sense_buffer array is zeroed prior to
1107 * the mid level queuing a command to an LLD. 1111 * the mid level queuing a command to an LLD.
1108 * 1112 *
1109 * Defined in: LLD 1113 * Defined in: LLD
1110 **/ 1114 **/
1111 int queuecommand(struct scsi_cmnd * scp, 1115 int queuecommand(struct Scsi_Host *shost, struct scsi_cmnd * scp)
1112 void (*done)(struct scsi_cmnd *))
1113 1116
1114 1117
1115/** 1118/**
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
index b3e73ddb1567..12cecc83cd91 100644
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -373,9 +373,18 @@ EVENT_PROCESS:
373 print " $regex_lru_isolate/o\n"; 373 print " $regex_lru_isolate/o\n";
374 next; 374 next;
375 } 375 }
376 my $isolate_mode = $1;
376 my $nr_scanned = $4; 377 my $nr_scanned = $4;
377 my $nr_contig_dirty = $7; 378 my $nr_contig_dirty = $7;
378 $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; 379
380 # To closer match vmstat scanning statistics, only count isolate_both
381 # and isolate_inactive as scanning. isolate_active is rotation
382 # isolate_inactive == 0
383 # isolate_active == 1
384 # isolate_both == 2
385 if ($isolate_mode != 1) {
386 $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
387 }
379 $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty; 388 $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty;
380 } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") { 389 } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
381 $details = $5; 390 $details = $5;