diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-08-12 15:38:56 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-08-12 15:39:04 -0400 |
commit | f46a6804135795f77d096ab0128f27531c7d051c (patch) | |
tree | 7cd33f69e3661327739ae4c96e5a8389e7fc912e /Documentation/filesystems | |
parent | b3e84ffa21f916e3354a12a7f19169c9febe96d0 (diff) | |
parent | ad41a1e0cab07c5125456e8d38e5b1ab148d04aa (diff) |
Merge branch 'linus' into perf/urgent
Merge reason: Fix upstream breakage introduced by:
de5d9bf: Move list types from <linux/list.h> to <linux/types.h>.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/9p.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/Locking | 22 | ||||
-rw-r--r-- | Documentation/filesystems/affs.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/befs.txt | 4 | ||||
-rw-r--r-- | Documentation/filesystems/caching/fscache.txt | 10 | ||||
-rw-r--r-- | Documentation/filesystems/isofs.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/nfsroot.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/nilfs2.txt | 12 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 45 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 101 | ||||
-rw-r--r-- | Documentation/filesystems/squashfs.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/sysfs-pci.txt | 7 | ||||
-rw-r--r-- | Documentation/filesystems/sysfs.txt | 46 | ||||
-rw-r--r-- | Documentation/filesystems/vfat.txt | 3 | ||||
-rw-r--r-- | Documentation/filesystems/xfs.txt | 11 |
15 files changed, 173 insertions, 98 deletions
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt index c0236e753bc8..f9765e8cf086 100644 --- a/Documentation/filesystems/9p.txt +++ b/Documentation/filesystems/9p.txt | |||
@@ -128,7 +128,7 @@ OPTIONS | |||
128 | RESOURCES | 128 | RESOURCES |
129 | ========= | 129 | ========= |
130 | 130 | ||
131 | Our current recommendation is to use Inferno (http://www.vitanuova.com/inferno) | 131 | Our current recommendation is to use Inferno (http://www.vitanuova.com/nferno/index.html) |
132 | as the 9p server. You can start a 9p server under Inferno by issuing the | 132 | as the 9p server. You can start a 9p server under Inferno by issuing the |
133 | following command: | 133 | following command: |
134 | ; styxlisten -A tcp!*!564 export '#U*' | 134 | ; styxlisten -A tcp!*!564 export '#U*' |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 96d4293607ec..bbcc15651a21 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -92,8 +92,8 @@ prototypes: | |||
92 | void (*destroy_inode)(struct inode *); | 92 | void (*destroy_inode)(struct inode *); |
93 | void (*dirty_inode) (struct inode *); | 93 | void (*dirty_inode) (struct inode *); |
94 | int (*write_inode) (struct inode *, int); | 94 | int (*write_inode) (struct inode *, int); |
95 | void (*drop_inode) (struct inode *); | 95 | int (*drop_inode) (struct inode *); |
96 | void (*delete_inode) (struct inode *); | 96 | void (*evict_inode) (struct inode *); |
97 | void (*put_super) (struct super_block *); | 97 | void (*put_super) (struct super_block *); |
98 | void (*write_super) (struct super_block *); | 98 | void (*write_super) (struct super_block *); |
99 | int (*sync_fs)(struct super_block *sb, int wait); | 99 | int (*sync_fs)(struct super_block *sb, int wait); |
@@ -101,14 +101,13 @@ prototypes: | |||
101 | int (*unfreeze_fs) (struct super_block *); | 101 | int (*unfreeze_fs) (struct super_block *); |
102 | int (*statfs) (struct dentry *, struct kstatfs *); | 102 | int (*statfs) (struct dentry *, struct kstatfs *); |
103 | int (*remount_fs) (struct super_block *, int *, char *); | 103 | int (*remount_fs) (struct super_block *, int *, char *); |
104 | void (*clear_inode) (struct inode *); | ||
105 | void (*umount_begin) (struct super_block *); | 104 | void (*umount_begin) (struct super_block *); |
106 | int (*show_options)(struct seq_file *, struct vfsmount *); | 105 | int (*show_options)(struct seq_file *, struct vfsmount *); |
107 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 106 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
108 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 107 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
109 | 108 | ||
110 | locking rules: | 109 | locking rules: |
111 | All may block. | 110 | All may block [not true, see below] |
112 | None have BKL | 111 | None have BKL |
113 | s_umount | 112 | s_umount |
114 | alloc_inode: | 113 | alloc_inode: |
@@ -116,22 +115,25 @@ destroy_inode: | |||
116 | dirty_inode: (must not sleep) | 115 | dirty_inode: (must not sleep) |
117 | write_inode: | 116 | write_inode: |
118 | drop_inode: !!!inode_lock!!! | 117 | drop_inode: !!!inode_lock!!! |
119 | delete_inode: | 118 | evict_inode: |
120 | put_super: write | 119 | put_super: write |
121 | write_super: read | 120 | write_super: read |
122 | sync_fs: read | 121 | sync_fs: read |
123 | freeze_fs: read | 122 | freeze_fs: read |
124 | unfreeze_fs: read | 123 | unfreeze_fs: read |
125 | statfs: no | 124 | statfs: maybe(read) (see below) |
126 | remount_fs: maybe (see below) | 125 | remount_fs: write |
127 | clear_inode: | ||
128 | umount_begin: no | 126 | umount_begin: no |
129 | show_options: no (namespace_sem) | 127 | show_options: no (namespace_sem) |
130 | quota_read: no (see below) | 128 | quota_read: no (see below) |
131 | quota_write: no (see below) | 129 | quota_write: no (see below) |
132 | 130 | ||
133 | ->remount_fs() will have the s_umount exclusive lock if it's already mounted. | 131 | ->statfs() has s_umount (shared) when called by ustat(2) (native or |
134 | When called from get_sb_single, it does NOT have the s_umount lock. | 132 | compat), but that's an accident of bad API; s_umount is used to pin |
133 | the superblock down when we only have dev_t given us by userland to | ||
134 | identify the superblock. Everything else (statfs(), fstatfs(), etc.) | ||
135 | doesn't hold it when calling ->statfs() - superblock is pinned down | ||
136 | by resolving the pathname passed to syscall. | ||
135 | ->quota_read() and ->quota_write() functions are both guaranteed to | 137 | ->quota_read() and ->quota_write() functions are both guaranteed to |
136 | be the only ones operating on the quota file by the quota code (via | 138 | be the only ones operating on the quota file by the quota code (via |
137 | dqio_sem) (unless an admin really wants to screw up something and | 139 | dqio_sem) (unless an admin really wants to screw up something and |
diff --git a/Documentation/filesystems/affs.txt b/Documentation/filesystems/affs.txt index 2d1524469c25..81ac488e3758 100644 --- a/Documentation/filesystems/affs.txt +++ b/Documentation/filesystems/affs.txt | |||
@@ -216,4 +216,4 @@ due to an incompatibility with the Amiga floppy controller. | |||
216 | 216 | ||
217 | If you are interested in an Amiga Emulator for Linux, look at | 217 | If you are interested in an Amiga Emulator for Linux, look at |
218 | 218 | ||
219 | http://www.freiburg.linux.de/~uae/ | 219 | http://web.archive.org/web/*/http://www.freiburg.linux.de/~uae/ |
diff --git a/Documentation/filesystems/befs.txt b/Documentation/filesystems/befs.txt index 67391a15949a..6e49c363938e 100644 --- a/Documentation/filesystems/befs.txt +++ b/Documentation/filesystems/befs.txt | |||
@@ -31,7 +31,7 @@ Current maintainer: Sergey S. Kostyliov <rathamahata@php4.ru> | |||
31 | 31 | ||
32 | WHAT IS THIS DRIVER? | 32 | WHAT IS THIS DRIVER? |
33 | ================== | 33 | ================== |
34 | This module implements the native filesystem of BeOS <http://www.be.com/> | 34 | This module implements the native filesystem of BeOS http://www.beincorporated.com/ |
35 | for the linux 2.4.1 and later kernels. Currently it is a read-only | 35 | for the linux 2.4.1 and later kernels. Currently it is a read-only |
36 | implementation. | 36 | implementation. |
37 | 37 | ||
@@ -61,7 +61,7 @@ step 2. Configuration & make kernel | |||
61 | 61 | ||
62 | The linux kernel has many compile-time options. Most of them are beyond the | 62 | The linux kernel has many compile-time options. Most of them are beyond the |
63 | scope of this document. I suggest the Kernel-HOWTO document as a good general | 63 | scope of this document. I suggest the Kernel-HOWTO document as a good general |
64 | reference on this topic. <http://www.linux.com/howto/Kernel-HOWTO.html> | 64 | reference on this topic. http://www.linuxdocs.org/HOWTOs/Kernel-HOWTO-4.html |
65 | 65 | ||
66 | However, to use the BeFS module, you must enable it at configure time. | 66 | However, to use the BeFS module, you must enable it at configure time. |
67 | 67 | ||
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index a91e2e2095b0..770267af5b3e 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt | |||
@@ -343,8 +343,8 @@ This will look something like: | |||
343 | [root@andromeda ~]# head /proc/fs/fscache/objects | 343 | [root@andromeda ~]# head /proc/fs/fscache/objects |
344 | OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA | 344 | OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA |
345 | ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================ | 345 | ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================ |
346 | 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a | 346 | 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 0 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a |
347 | 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a | 347 | 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 0 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a |
348 | 348 | ||
349 | where the first set of columns before the '|' describe the object: | 349 | where the first set of columns before the '|' describe the object: |
350 | 350 | ||
@@ -362,7 +362,7 @@ where the first set of columns before the '|' describe the object: | |||
362 | EM Object's event mask | 362 | EM Object's event mask |
363 | EV Events raised on this object | 363 | EV Events raised on this object |
364 | F Object flags | 364 | F Object flags |
365 | S Object slow-work work item flags | 365 | S Object work item busy state mask (1:pending 2:running) |
366 | 366 | ||
367 | and the second set of columns describe the object's cookie, if present: | 367 | and the second set of columns describe the object's cookie, if present: |
368 | 368 | ||
@@ -395,8 +395,8 @@ and the following paired letters: | |||
395 | w Show objects that don't have pending writes | 395 | w Show objects that don't have pending writes |
396 | R Show objects that have outstanding reads | 396 | R Show objects that have outstanding reads |
397 | r Show objects that don't have outstanding reads | 397 | r Show objects that don't have outstanding reads |
398 | S Show objects that have slow work queued | 398 | S Show objects that have work queued |
399 | s Show objects that don't have slow work queued | 399 | s Show objects that don't have work queued |
400 | 400 | ||
401 | If neither side of a letter pair is given, then both are implied. For example: | 401 | If neither side of a letter pair is given, then both are implied. For example: |
402 | 402 | ||
diff --git a/Documentation/filesystems/isofs.txt b/Documentation/filesystems/isofs.txt index 3c367c3b3608..ba0a93384de0 100644 --- a/Documentation/filesystems/isofs.txt +++ b/Documentation/filesystems/isofs.txt | |||
@@ -41,7 +41,7 @@ Mount options unique to the isofs filesystem. | |||
41 | sbsector=xxx Session begins from sector xxx | 41 | sbsector=xxx Session begins from sector xxx |
42 | 42 | ||
43 | Recommended documents about ISO 9660 standard are located at: | 43 | Recommended documents about ISO 9660 standard are located at: |
44 | http://www.y-adagio.com/public/standards/iso_cdromr/tocont.htm | 44 | http://www.y-adagio.com/ |
45 | ftp://ftp.ecma.ch/ecma-st/Ecma-119.pdf | 45 | ftp://ftp.ecma.ch/ecma-st/Ecma-119.pdf |
46 | Quoting from the PDF "This 2nd Edition of Standard ECMA-119 is technically | 46 | Quoting from the PDF "This 2nd Edition of Standard ECMA-119 is technically |
47 | identical with ISO 9660.", so it is a valid and gratis substitute of the | 47 | identical with ISO 9660.", so it is a valid and gratis substitute of the |
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index 3ba0b945aaf8..f2430a7974e1 100644 --- a/Documentation/filesystems/nfs/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt | |||
@@ -124,6 +124,8 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf> | |||
124 | 124 | ||
125 | <hostname> Name of the client. May be supplied by autoconfiguration, | 125 | <hostname> Name of the client. May be supplied by autoconfiguration, |
126 | but its absence will not trigger autoconfiguration. | 126 | but its absence will not trigger autoconfiguration. |
127 | If specified and DHCP is used, the user provided hostname will | ||
128 | be carried in the DHCP request to hopefully update DNS record. | ||
127 | 129 | ||
128 | Default: Client IP address is used in ASCII notation. | 130 | Default: Client IP address is used in ASCII notation. |
129 | 131 | ||
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index d3e7673995eb..d5c0cef38a71 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt | |||
@@ -49,7 +49,10 @@ Mount options | |||
49 | NILFS2 supports the following mount options: | 49 | NILFS2 supports the following mount options: |
50 | (*) == default | 50 | (*) == default |
51 | 51 | ||
52 | nobarrier Disables barriers. | 52 | barrier(*) This enables/disables the use of write barriers. This |
53 | nobarrier requires an IO stack which can support barriers, and | ||
54 | if nilfs gets an error on a barrier write, it will | ||
55 | disable again with a warning. | ||
53 | errors=continue Keep going on a filesystem error. | 56 | errors=continue Keep going on a filesystem error. |
54 | errors=remount-ro(*) Remount the filesystem read-only on an error. | 57 | errors=remount-ro(*) Remount the filesystem read-only on an error. |
55 | errors=panic Panic and halt the machine if an error occurs. | 58 | errors=panic Panic and halt the machine if an error occurs. |
@@ -74,9 +77,10 @@ norecovery Disable recovery of the filesystem on mount. | |||
74 | This disables every write access on the device for | 77 | This disables every write access on the device for |
75 | read-only mounts or snapshots. This option will fail | 78 | read-only mounts or snapshots. This option will fail |
76 | for r/w mounts on an unclean volume. | 79 | for r/w mounts on an unclean volume. |
77 | discard Issue discard/TRIM commands to the underlying block | 80 | discard This enables/disables the use of discard/TRIM commands. |
78 | device when blocks are freed. This is useful for SSD | 81 | nodiscard(*) The discard/TRIM commands are sent to the underlying |
79 | devices and sparse/thinly-provisioned LUNs. | 82 | block device when blocks are freed. This is useful |
83 | for SSD devices and sparse/thinly-provisioned LUNs. | ||
80 | 84 | ||
81 | NILFS2 usage | 85 | NILFS2 usage |
82 | ============ | 86 | ============ |
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index a7e9746ee7ea..b12c89538680 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -273,3 +273,48 @@ it's safe to remove it. If you don't need it, remove it. | |||
273 | deliberate; as soon as struct block_device * is propagated in a reasonable | 273 | deliberate; as soon as struct block_device * is propagated in a reasonable |
274 | way by that code fixing will become trivial; until then nothing can be | 274 | way by that code fixing will become trivial; until then nothing can be |
275 | done. | 275 | done. |
276 | |||
277 | [mandatory] | ||
278 | |||
279 | block truncatation on error exit from ->write_begin, and ->direct_IO | ||
280 | moved from generic methods (block_write_begin, cont_write_begin, | ||
281 | nobh_write_begin, blockdev_direct_IO*) to callers. Take a look at | ||
282 | ext2_write_failed and callers for an example. | ||
283 | |||
284 | [mandatory] | ||
285 | |||
286 | ->truncate is going away. The whole truncate sequence needs to be | ||
287 | implemented in ->setattr, which is now mandatory for filesystems | ||
288 | implementing on-disk size changes. Start with a copy of the old inode_setattr | ||
289 | and vmtruncate, and the reorder the vmtruncate + foofs_vmtruncate sequence to | ||
290 | be in order of zeroing blocks using block_truncate_page or similar helpers, | ||
291 | size update and on finally on-disk truncation which should not fail. | ||
292 | inode_change_ok now includes the size checks for ATTR_SIZE and must be called | ||
293 | in the beginning of ->setattr unconditionally. | ||
294 | |||
295 | [mandatory] | ||
296 | |||
297 | ->clear_inode() and ->delete_inode() are gone; ->evict_inode() should | ||
298 | be used instead. It gets called whenever the inode is evicted, whether it has | ||
299 | remaining links or not. Caller does *not* evict the pagecache or inode-associated | ||
300 | metadata buffers; getting rid of those is responsibility of method, as it had | ||
301 | been for ->delete_inode(). | ||
302 | ->drop_inode() returns int now; it's called on final iput() with inode_lock | ||
303 | held and it returns true if filesystems wants the inode to be dropped. As before, | ||
304 | generic_drop_inode() is still the default and it's been updated appropriately. | ||
305 | generic_delete_inode() is also alive and it consists simply of return 1. Note that | ||
306 | all actual eviction work is done by caller after ->drop_inode() returns. | ||
307 | clear_inode() is gone; use end_writeback() instead. As before, it must | ||
308 | be called exactly once on each call of ->evict_inode() (as it used to be for | ||
309 | each call of ->delete_inode()). Unlike before, if you are using inode-associated | ||
310 | metadata buffers (i.e. mark_buffer_dirty_inode()), it's your responsibility to | ||
311 | call invalidate_inode_buffers() before end_writeback(). | ||
312 | No async writeback (and thus no calls of ->write_inode()) will happen | ||
313 | after end_writeback() returns, so actions that should not overlap with ->write_inode() | ||
314 | (e.g. freeing on-disk inode if i_nlink is 0) ought to be done after that call. | ||
315 | |||
316 | NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out | ||
317 | if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput() | ||
318 | may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly | ||
319 | free the on-disk inode, you may end up doing that while ->write_inode() is writing | ||
320 | to it. | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 9fb6cbe70bde..a6aca8740883 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -33,7 +33,8 @@ Table of Contents | |||
33 | 2 Modifying System Parameters | 33 | 2 Modifying System Parameters |
34 | 34 | ||
35 | 3 Per-Process Parameters | 35 | 3 Per-Process Parameters |
36 | 3.1 /proc/<pid>/oom_adj - Adjust the oom-killer score | 36 | 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj - Adjust the oom-killer |
37 | score | ||
37 | 3.2 /proc/<pid>/oom_score - Display current oom-killer score | 38 | 3.2 /proc/<pid>/oom_score - Display current oom-killer score |
38 | 3.3 /proc/<pid>/io - Display the IO accounting fields | 39 | 3.3 /proc/<pid>/io - Display the IO accounting fields |
39 | 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings | 40 | 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings |
@@ -73,9 +74,9 @@ contact Bodo Bauer at bb@ricochet.net. We'll be happy to add them to this | |||
73 | document. | 74 | document. |
74 | 75 | ||
75 | The latest version of this document is available online at | 76 | The latest version of this document is available online at |
76 | http://skaro.nightcrawler.com/~bb/Docs/Proc as HTML version. | 77 | http://tldp.org/LDP/Linux-Filesystem-Hierarchy/html/proc.html |
77 | 78 | ||
78 | If the above direction does not works for you, ypu could try the kernel | 79 | If the above direction does not works for you, you could try the kernel |
79 | mailing list at linux-kernel@vger.kernel.org and/or try to reach me at | 80 | mailing list at linux-kernel@vger.kernel.org and/or try to reach me at |
80 | comandante@zaralinux.com. | 81 | comandante@zaralinux.com. |
81 | 82 | ||
@@ -1234,42 +1235,64 @@ of the kernel. | |||
1234 | CHAPTER 3: PER-PROCESS PARAMETERS | 1235 | CHAPTER 3: PER-PROCESS PARAMETERS |
1235 | ------------------------------------------------------------------------------ | 1236 | ------------------------------------------------------------------------------ |
1236 | 1237 | ||
1237 | 3.1 /proc/<pid>/oom_adj - Adjust the oom-killer score | 1238 | 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score |
1238 | ------------------------------------------------------ | 1239 | -------------------------------------------------------------------------------- |
1239 | 1240 | ||
1240 | This file can be used to adjust the score used to select which processes | 1241 | These file can be used to adjust the badness heuristic used to select which |
1241 | should be killed in an out-of-memory situation. Giving it a high score will | 1242 | process gets killed in out of memory conditions. |
1242 | increase the likelihood of this process being killed by the oom-killer. Valid | 1243 | |
1243 | values are in the range -16 to +15, plus the special value -17, which disables | 1244 | The badness heuristic assigns a value to each candidate task ranging from 0 |
1244 | oom-killing altogether for this process. | 1245 | (never kill) to 1000 (always kill) to determine which process is targeted. The |
1245 | 1246 | units are roughly a proportion along that range of allowed memory the process | |
1246 | The process to be killed in an out-of-memory situation is selected among all others | 1247 | may allocate from based on an estimation of its current memory and swap use. |
1247 | based on its badness score. This value equals the original memory size of the process | 1248 | For example, if a task is using all allowed memory, its badness score will be |
1248 | and is then updated according to its CPU time (utime + stime) and the | 1249 | 1000. If it is using half of its allowed memory, its score will be 500. |
1249 | run time (uptime - start time). The longer it runs the smaller is the score. | 1250 | |
1250 | Badness score is divided by the square root of the CPU time and then by | 1251 | There is an additional factor included in the badness score: root |
1251 | the double square root of the run time. | 1252 | processes are given 3% extra memory over other tasks. |
1252 | 1253 | ||
1253 | Swapped out tasks are killed first. Half of each child's memory size is added to | 1254 | The amount of "allowed" memory depends on the context in which the oom killer |
1254 | the parent's score if they do not share the same memory. Thus forking servers | 1255 | was called. If it is due to the memory assigned to the allocating task's cpuset |
1255 | are the prime candidates to be killed. Having only one 'hungry' child will make | 1256 | being exhausted, the allowed memory represents the set of mems assigned to that |
1256 | parent less preferable than the child. | 1257 | cpuset. If it is due to a mempolicy's node(s) being exhausted, the allowed |
1257 | 1258 | memory represents the set of mempolicy nodes. If it is due to a memory | |
1258 | /proc/<pid>/oom_score shows process' current badness score. | 1259 | limit (or swap limit) being reached, the allowed memory is that configured |
1259 | 1260 | limit. Finally, if it is due to the entire system being out of memory, the | |
1260 | The following heuristics are then applied: | 1261 | allowed memory represents all allocatable resources. |
1261 | * if the task was reniced, its score doubles | 1262 | |
1262 | * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE | 1263 | The value of /proc/<pid>/oom_score_adj is added to the badness score before it |
1263 | or CAP_SYS_RAWIO) have their score divided by 4 | 1264 | is used to determine which task to kill. Acceptable values range from -1000 |
1264 | * if oom condition happened in one cpuset and checked process does not belong | 1265 | (OOM_SCORE_ADJ_MIN) to +1000 (OOM_SCORE_ADJ_MAX). This allows userspace to |
1265 | to it, its score is divided by 8 | 1266 | polarize the preference for oom killing either by always preferring a certain |
1266 | * the resulting score is multiplied by two to the power of oom_adj, i.e. | 1267 | task or completely disabling it. The lowest possible value, -1000, is |
1267 | points <<= oom_adj when it is positive and | 1268 | equivalent to disabling oom killing entirely for that task since it will always |
1268 | points >>= -(oom_adj) otherwise | 1269 | report a badness score of 0. |
1269 | 1270 | ||
1270 | The task with the highest badness score is then selected and its children | 1271 | Consequently, it is very simple for userspace to define the amount of memory to |
1271 | are killed, process itself will be killed in an OOM situation when it does | 1272 | consider for each task. Setting a /proc/<pid>/oom_score_adj value of +500, for |
1272 | not have children or some of them disabled oom like described above. | 1273 | example, is roughly equivalent to allowing the remainder of tasks sharing the |
1274 | same system, cpuset, mempolicy, or memory controller resources to use at least | ||
1275 | 50% more memory. A value of -500, on the other hand, would be roughly | ||
1276 | equivalent to discounting 50% of the task's allowed memory from being considered | ||
1277 | as scoring against the task. | ||
1278 | |||
1279 | For backwards compatibility with previous kernels, /proc/<pid>/oom_adj may also | ||
1280 | be used to tune the badness score. Its acceptable values range from -16 | ||
1281 | (OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17 | ||
1282 | (OOM_DISABLE) to disable oom killing entirely for that task. Its value is | ||
1283 | scaled linearly with /proc/<pid>/oom_score_adj. | ||
1284 | |||
1285 | Writing to /proc/<pid>/oom_score_adj or /proc/<pid>/oom_adj will change the | ||
1286 | other with its scaled value. | ||
1287 | |||
1288 | NOTICE: /proc/<pid>/oom_adj is deprecated and will be removed, please see | ||
1289 | Documentation/feature-removal-schedule.txt. | ||
1290 | |||
1291 | Caveat: when a parent task is selected, the oom killer will sacrifice any first | ||
1292 | generation children with seperate address spaces instead, if possible. This | ||
1293 | avoids servers and important system daemons from being killed and loses the | ||
1294 | minimal amount of work. | ||
1295 | |||
1273 | 1296 | ||
1274 | 3.2 /proc/<pid>/oom_score - Display current oom-killer score | 1297 | 3.2 /proc/<pid>/oom_score - Display current oom-killer score |
1275 | ------------------------------------------------------------- | 1298 | ------------------------------------------------------------- |
diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt index 203f7202cc9e..66699afd66ca 100644 --- a/Documentation/filesystems/squashfs.txt +++ b/Documentation/filesystems/squashfs.txt | |||
@@ -2,7 +2,7 @@ SQUASHFS 4.0 FILESYSTEM | |||
2 | ======================= | 2 | ======================= |
3 | 3 | ||
4 | Squashfs is a compressed read-only filesystem for Linux. | 4 | Squashfs is a compressed read-only filesystem for Linux. |
5 | It uses zlib compression to compress files, inodes and directories. | 5 | It uses zlib/lzo compression to compress files, inodes and directories. |
6 | Inodes in the system are very small and all blocks are packed to minimise | 6 | Inodes in the system are very small and all blocks are packed to minimise |
7 | data overhead. Block sizes greater than 4K are supported up to a maximum | 7 | data overhead. Block sizes greater than 4K are supported up to a maximum |
8 | of 1Mbytes (default block size 128K). | 8 | of 1Mbytes (default block size 128K). |
diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt index 85354b32d731..74eaac26f8b8 100644 --- a/Documentation/filesystems/sysfs-pci.txt +++ b/Documentation/filesystems/sysfs-pci.txt | |||
@@ -39,7 +39,7 @@ files, each with their own function. | |||
39 | local_cpus nearby CPU mask (cpumask, ro) | 39 | local_cpus nearby CPU mask (cpumask, ro) |
40 | remove remove device from kernel's list (ascii, wo) | 40 | remove remove device from kernel's list (ascii, wo) |
41 | resource PCI resource host addresses (ascii, ro) | 41 | resource PCI resource host addresses (ascii, ro) |
42 | resource0..N PCI resource N, if present (binary, mmap) | 42 | resource0..N PCI resource N, if present (binary, mmap, rw[1]) |
43 | resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap) | 43 | resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap) |
44 | rom PCI ROM resource, if present (binary, ro) | 44 | rom PCI ROM resource, if present (binary, ro) |
45 | subsystem_device PCI subsystem device (ascii, ro) | 45 | subsystem_device PCI subsystem device (ascii, ro) |
@@ -54,13 +54,16 @@ files, each with their own function. | |||
54 | binary - file contains binary data | 54 | binary - file contains binary data |
55 | cpumask - file contains a cpumask type | 55 | cpumask - file contains a cpumask type |
56 | 56 | ||
57 | [1] rw for RESOURCE_IO (I/O port) regions only | ||
58 | |||
57 | The read only files are informational, writes to them will be ignored, with | 59 | The read only files are informational, writes to them will be ignored, with |
58 | the exception of the 'rom' file. Writable files can be used to perform | 60 | the exception of the 'rom' file. Writable files can be used to perform |
59 | actions on the device (e.g. changing config space, detaching a device). | 61 | actions on the device (e.g. changing config space, detaching a device). |
60 | mmapable files are available via an mmap of the file at offset 0 and can be | 62 | mmapable files are available via an mmap of the file at offset 0 and can be |
61 | used to do actual device programming from userspace. Note that some platforms | 63 | used to do actual device programming from userspace. Note that some platforms |
62 | don't support mmapping of certain resources, so be sure to check the return | 64 | don't support mmapping of certain resources, so be sure to check the return |
63 | value from any attempted mmap. | 65 | value from any attempted mmap. The most notable of these are I/O port |
66 | resources, which also provide read/write access. | ||
64 | 67 | ||
65 | The 'enable' file provides a counter that indicates how many times the device | 68 | The 'enable' file provides a counter that indicates how many times the device |
66 | has been enabled. If the 'enable' file currently returns '4', and a '1' is | 69 | has been enabled. If the 'enable' file currently returns '4', and a '1' is |
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index 931c806642c5..5d1335faec2d 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt | |||
@@ -4,7 +4,7 @@ sysfs - _The_ filesystem for exporting kernel objects. | |||
4 | Patrick Mochel <mochel@osdl.org> | 4 | Patrick Mochel <mochel@osdl.org> |
5 | Mike Murphy <mamurph@cs.clemson.edu> | 5 | Mike Murphy <mamurph@cs.clemson.edu> |
6 | 6 | ||
7 | Revised: 22 February 2009 | 7 | Revised: 15 July 2010 |
8 | Original: 10 January 2003 | 8 | Original: 10 January 2003 |
9 | 9 | ||
10 | 10 | ||
@@ -124,7 +124,7 @@ show and store methods of the attribute owners. | |||
124 | 124 | ||
125 | struct sysfs_ops { | 125 | struct sysfs_ops { |
126 | ssize_t (*show)(struct kobject *, struct attribute *, char *); | 126 | ssize_t (*show)(struct kobject *, struct attribute *, char *); |
127 | ssize_t (*store)(struct kobject *, struct attribute *, const char *); | 127 | ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t); |
128 | }; | 128 | }; |
129 | 129 | ||
130 | [ Subsystems should have already defined a struct kobj_type as a | 130 | [ Subsystems should have already defined a struct kobj_type as a |
@@ -139,18 +139,22 @@ calls the associated methods. | |||
139 | 139 | ||
140 | To illustrate: | 140 | To illustrate: |
141 | 141 | ||
142 | #define to_dev(obj) container_of(obj, struct device, kobj) | ||
142 | #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr) | 143 | #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr) |
143 | #define to_dev(d) container_of(d, struct device, kobj) | ||
144 | 144 | ||
145 | static ssize_t | 145 | static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr, |
146 | dev_attr_show(struct kobject * kobj, struct attribute * attr, char * buf) | 146 | char *buf) |
147 | { | 147 | { |
148 | struct device_attribute * dev_attr = to_dev_attr(attr); | 148 | struct device_attribute *dev_attr = to_dev_attr(attr); |
149 | struct device * dev = to_dev(kobj); | 149 | struct device *dev = to_dev(kobj); |
150 | ssize_t ret = 0; | 150 | ssize_t ret = -EIO; |
151 | 151 | ||
152 | if (dev_attr->show) | 152 | if (dev_attr->show) |
153 | ret = dev_attr->show(dev, buf); | 153 | ret = dev_attr->show(dev, dev_attr, buf); |
154 | if (ret >= (ssize_t)PAGE_SIZE) { | ||
155 | print_symbol("dev_attr_show: %s returned bad count\n", | ||
156 | (unsigned long)dev_attr->show); | ||
157 | } | ||
154 | return ret; | 158 | return ret; |
155 | } | 159 | } |
156 | 160 | ||
@@ -163,10 +167,9 @@ To read or write attributes, show() or store() methods must be | |||
163 | specified when declaring the attribute. The method types should be as | 167 | specified when declaring the attribute. The method types should be as |
164 | simple as those defined for device attributes: | 168 | simple as those defined for device attributes: |
165 | 169 | ||
166 | ssize_t (*show)(struct device * dev, struct device_attribute * attr, | 170 | ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf); |
167 | char * buf); | 171 | ssize_t (*store)(struct device *dev, struct device_attribute *attr, |
168 | ssize_t (*store)(struct device * dev, struct device_attribute * attr, | 172 | const char *buf, size_t count); |
169 | const char * buf); | ||
170 | 173 | ||
171 | IOW, they should take only an object, an attribute, and a buffer as parameters. | 174 | IOW, they should take only an object, an attribute, and a buffer as parameters. |
172 | 175 | ||
@@ -209,8 +212,8 @@ Other notes: | |||
209 | 212 | ||
210 | - show() should always use snprintf(). | 213 | - show() should always use snprintf(). |
211 | 214 | ||
212 | - store() should return the number of bytes used from the buffer. This | 215 | - store() should return the number of bytes used from the buffer. If the |
213 | can be done using strlen(). | 216 | entire buffer has been used, just return the count argument. |
214 | 217 | ||
215 | - show() or store() can always return errors. If a bad value comes | 218 | - show() or store() can always return errors. If a bad value comes |
216 | through, be sure to return an error. | 219 | through, be sure to return an error. |
@@ -223,15 +226,18 @@ Other notes: | |||
223 | 226 | ||
224 | A very simple (and naive) implementation of a device attribute is: | 227 | A very simple (and naive) implementation of a device attribute is: |
225 | 228 | ||
226 | static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf) | 229 | static ssize_t show_name(struct device *dev, struct device_attribute *attr, |
230 | char *buf) | ||
227 | { | 231 | { |
228 | return snprintf(buf, PAGE_SIZE, "%s\n", dev->name); | 232 | return snprintf(buf, PAGE_SIZE, "%s\n", dev->name); |
229 | } | 233 | } |
230 | 234 | ||
231 | static ssize_t store_name(struct device * dev, const char * buf) | 235 | static ssize_t store_name(struct device *dev, struct device_attribute *attr, |
236 | const char *buf, size_t count) | ||
232 | { | 237 | { |
233 | sscanf(buf, "%20s", dev->name); | 238 | snprintf(dev->name, sizeof(dev->name), "%.*s", |
234 | return strnlen(buf, PAGE_SIZE); | 239 | (int)min(count, sizeof(dev->name) - 1), buf); |
240 | return count; | ||
235 | } | 241 | } |
236 | 242 | ||
237 | static DEVICE_ATTR(name, S_IRUGO, show_name, store_name); | 243 | static DEVICE_ATTR(name, S_IRUGO, show_name, store_name); |
@@ -327,7 +333,7 @@ Structure: | |||
327 | struct bus_attribute { | 333 | struct bus_attribute { |
328 | struct attribute attr; | 334 | struct attribute attr; |
329 | ssize_t (*show)(struct bus_type *, char * buf); | 335 | ssize_t (*show)(struct bus_type *, char * buf); |
330 | ssize_t (*store)(struct bus_type *, const char * buf); | 336 | ssize_t (*store)(struct bus_type *, const char * buf, size_t count); |
331 | }; | 337 | }; |
332 | 338 | ||
333 | Declaring: | 339 | Declaring: |
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index eed520fd0c8e..ead764b2728f 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt | |||
@@ -165,7 +165,8 @@ TEST SUITE | |||
165 | If you plan to make any modifications to the vfat filesystem, please | 165 | If you plan to make any modifications to the vfat filesystem, please |
166 | get the test suite that comes with the vfat distribution at | 166 | get the test suite that comes with the vfat distribution at |
167 | 167 | ||
168 | http://bmrc.berkeley.edu/people/chaffee/vfat.html | 168 | http://web.archive.org/web/*/http://bmrc.berkeley.edu/ |
169 | people/chaffee/vfat.html | ||
169 | 170 | ||
170 | This tests quite a few parts of the vfat filesystem and additional | 171 | This tests quite a few parts of the vfat filesystem and additional |
171 | tests for new features or untested features would be appreciated. | 172 | tests for new features or untested features would be appreciated. |
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 9878f50d6ed6..7bff3e4f35df 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt | |||
@@ -131,17 +131,6 @@ When mounting an XFS filesystem, the following options are accepted. | |||
131 | Don't check for double mounted file systems using the file system uuid. | 131 | Don't check for double mounted file systems using the file system uuid. |
132 | This is useful to mount LVM snapshot volumes. | 132 | This is useful to mount LVM snapshot volumes. |
133 | 133 | ||
134 | osyncisosync | ||
135 | Make O_SYNC writes implement true O_SYNC. WITHOUT this option, | ||
136 | Linux XFS behaves as if an "osyncisdsync" option is used, | ||
137 | which will make writes to files opened with the O_SYNC flag set | ||
138 | behave as if the O_DSYNC flag had been used instead. | ||
139 | This can result in better performance without compromising | ||
140 | data safety. | ||
141 | However if this option is not in effect, timestamp updates from | ||
142 | O_SYNC writes can be lost if the system crashes. | ||
143 | If timestamp updates are critical, use the osyncisosync option. | ||
144 | |||
145 | uquota/usrquota/uqnoenforce/quota | 134 | uquota/usrquota/uqnoenforce/quota |
146 | User disk quota accounting enabled, and limits (optionally) | 135 | User disk quota accounting enabled, and limits (optionally) |
147 | enforced. Refer to xfs_quota(8) for further details. | 136 | enforced. Refer to xfs_quota(8) for further details. |