diff options
Diffstat (limited to 'Documentation/filesystems')
| -rw-r--r-- | Documentation/filesystems/Locking | 32 | ||||
| -rw-r--r-- | Documentation/filesystems/dax.txt | 6 | ||||
| -rw-r--r-- | Documentation/filesystems/f2fs.txt | 7 | ||||
| -rw-r--r-- | Documentation/filesystems/ocfs2-online-filecheck.txt | 10 | ||||
| -rw-r--r-- | Documentation/filesystems/proc.txt | 11 | ||||
| -rw-r--r-- | Documentation/filesystems/vfs.txt | 62 |
6 files changed, 84 insertions, 44 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index d9c37ec4c760..1b3c39a7de62 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
| @@ -15,11 +15,14 @@ prototypes: | |||
| 15 | int (*d_compare)(const struct dentry *, const struct dentry *, | 15 | int (*d_compare)(const struct dentry *, const struct dentry *, |
| 16 | unsigned int, const char *, const struct qstr *); | 16 | unsigned int, const char *, const struct qstr *); |
| 17 | int (*d_delete)(struct dentry *); | 17 | int (*d_delete)(struct dentry *); |
| 18 | int (*d_init)(struct dentry *); | ||
| 18 | void (*d_release)(struct dentry *); | 19 | void (*d_release)(struct dentry *); |
| 19 | void (*d_iput)(struct dentry *, struct inode *); | 20 | void (*d_iput)(struct dentry *, struct inode *); |
| 20 | char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); | 21 | char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); |
| 21 | struct vfsmount *(*d_automount)(struct path *path); | 22 | struct vfsmount *(*d_automount)(struct path *path); |
| 22 | int (*d_manage)(struct dentry *, bool); | 23 | int (*d_manage)(struct dentry *, bool); |
| 24 | struct dentry *(*d_real)(struct dentry *, const struct inode *, | ||
| 25 | unsigned int); | ||
| 23 | 26 | ||
| 24 | locking rules: | 27 | locking rules: |
| 25 | rename_lock ->d_lock may block rcu-walk | 28 | rename_lock ->d_lock may block rcu-walk |
| @@ -28,12 +31,14 @@ d_weak_revalidate:no no yes no | |||
| 28 | d_hash no no no maybe | 31 | d_hash no no no maybe |
| 29 | d_compare: yes no no maybe | 32 | d_compare: yes no no maybe |
| 30 | d_delete: no yes no no | 33 | d_delete: no yes no no |
| 34 | d_init: no no yes no | ||
| 31 | d_release: no no yes no | 35 | d_release: no no yes no |
| 32 | d_prune: no yes no no | 36 | d_prune: no yes no no |
| 33 | d_iput: no no yes no | 37 | d_iput: no no yes no |
| 34 | d_dname: no no no no | 38 | d_dname: no no no no |
| 35 | d_automount: no no yes no | 39 | d_automount: no no yes no |
| 36 | d_manage: no no yes (ref-walk) maybe | 40 | d_manage: no no yes (ref-walk) maybe |
| 41 | d_real no no yes no | ||
| 37 | 42 | ||
| 38 | --------------------------- inode_operations --------------------------- | 43 | --------------------------- inode_operations --------------------------- |
| 39 | prototypes: | 44 | prototypes: |
| @@ -66,7 +71,6 @@ prototypes: | |||
| 66 | struct file *, unsigned open_flag, | 71 | struct file *, unsigned open_flag, |
| 67 | umode_t create_mode, int *opened); | 72 | umode_t create_mode, int *opened); |
| 68 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); | 73 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); |
| 69 | int (*dentry_open)(struct dentry *, struct file *, const struct cred *); | ||
| 70 | 74 | ||
| 71 | locking rules: | 75 | locking rules: |
| 72 | all may block | 76 | all may block |
| @@ -95,7 +99,6 @@ fiemap: no | |||
| 95 | update_time: no | 99 | update_time: no |
| 96 | atomic_open: yes | 100 | atomic_open: yes |
| 97 | tmpfile: no | 101 | tmpfile: no |
| 98 | dentry_open: no | ||
| 99 | 102 | ||
| 100 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on | 103 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on |
| 101 | victim. | 104 | victim. |
| @@ -179,7 +182,6 @@ unlocks and drops the reference. | |||
| 179 | prototypes: | 182 | prototypes: |
| 180 | int (*writepage)(struct page *page, struct writeback_control *wbc); | 183 | int (*writepage)(struct page *page, struct writeback_control *wbc); |
| 181 | int (*readpage)(struct file *, struct page *); | 184 | int (*readpage)(struct file *, struct page *); |
| 182 | int (*sync_page)(struct page *); | ||
| 183 | int (*writepages)(struct address_space *, struct writeback_control *); | 185 | int (*writepages)(struct address_space *, struct writeback_control *); |
| 184 | int (*set_page_dirty)(struct page *page); | 186 | int (*set_page_dirty)(struct page *page); |
| 185 | int (*readpages)(struct file *filp, struct address_space *mapping, | 187 | int (*readpages)(struct file *filp, struct address_space *mapping, |
| @@ -195,7 +197,9 @@ prototypes: | |||
| 195 | int (*releasepage) (struct page *, int); | 197 | int (*releasepage) (struct page *, int); |
| 196 | void (*freepage)(struct page *); | 198 | void (*freepage)(struct page *); |
| 197 | int (*direct_IO)(struct kiocb *, struct iov_iter *iter); | 199 | int (*direct_IO)(struct kiocb *, struct iov_iter *iter); |
| 200 | bool (*isolate_page) (struct page *, isolate_mode_t); | ||
| 198 | int (*migratepage)(struct address_space *, struct page *, struct page *); | 201 | int (*migratepage)(struct address_space *, struct page *, struct page *); |
| 202 | void (*putback_page) (struct page *); | ||
| 199 | int (*launder_page)(struct page *); | 203 | int (*launder_page)(struct page *); |
| 200 | int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); | 204 | int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); |
| 201 | int (*error_remove_page)(struct address_space *, struct page *); | 205 | int (*error_remove_page)(struct address_space *, struct page *); |
| @@ -208,7 +212,6 @@ locking rules: | |||
| 208 | PageLocked(page) i_mutex | 212 | PageLocked(page) i_mutex |
| 209 | writepage: yes, unlocks (see below) | 213 | writepage: yes, unlocks (see below) |
| 210 | readpage: yes, unlocks | 214 | readpage: yes, unlocks |
| 211 | sync_page: maybe | ||
| 212 | writepages: | 215 | writepages: |
| 213 | set_page_dirty no | 216 | set_page_dirty no |
| 214 | readpages: | 217 | readpages: |
| @@ -219,15 +222,17 @@ invalidatepage: yes | |||
| 219 | releasepage: yes | 222 | releasepage: yes |
| 220 | freepage: yes | 223 | freepage: yes |
| 221 | direct_IO: | 224 | direct_IO: |
| 225 | isolate_page: yes | ||
| 222 | migratepage: yes (both) | 226 | migratepage: yes (both) |
| 227 | putback_page: yes | ||
| 223 | launder_page: yes | 228 | launder_page: yes |
| 224 | is_partially_uptodate: yes | 229 | is_partially_uptodate: yes |
| 225 | error_remove_page: yes | 230 | error_remove_page: yes |
| 226 | swap_activate: no | 231 | swap_activate: no |
| 227 | swap_deactivate: no | 232 | swap_deactivate: no |
| 228 | 233 | ||
| 229 | ->write_begin(), ->write_end(), ->sync_page() and ->readpage() | 234 | ->write_begin(), ->write_end() and ->readpage() may be called from |
| 230 | may be called from the request handler (/dev/loop). | 235 | the request handler (/dev/loop). |
| 231 | 236 | ||
| 232 | ->readpage() unlocks the page, either synchronously or via I/O | 237 | ->readpage() unlocks the page, either synchronously or via I/O |
| 233 | completion. | 238 | completion. |
| @@ -283,11 +288,6 @@ will leave the page itself marked clean but it will be tagged as dirty in the | |||
| 283 | radix tree. This incoherency can lead to all sorts of hard-to-debug problems | 288 | radix tree. This incoherency can lead to all sorts of hard-to-debug problems |
| 284 | in the filesystem like having dirty inodes at umount and losing written data. | 289 | in the filesystem like having dirty inodes at umount and losing written data. |
| 285 | 290 | ||
| 286 | ->sync_page() locking rules are not well-defined - usually it is called | ||
| 287 | with lock on page, but that is not guaranteed. Considering the currently | ||
| 288 | existing instances of this method ->sync_page() itself doesn't look | ||
| 289 | well-defined... | ||
| 290 | |||
| 291 | ->writepages() is used for periodic writeback and for syscall-initiated | 291 | ->writepages() is used for periodic writeback and for syscall-initiated |
| 292 | sync operations. The address_space should start I/O against at least | 292 | sync operations. The address_space should start I/O against at least |
| 293 | *nr_to_write pages. *nr_to_write must be decremented for each page which is | 293 | *nr_to_write pages. *nr_to_write must be decremented for each page which is |
| @@ -544,13 +544,13 @@ subsequent truncate), and then return with VM_FAULT_LOCKED, and the page | |||
| 544 | locked. The VM will unlock the page. | 544 | locked. The VM will unlock the page. |
| 545 | 545 | ||
| 546 | ->map_pages() is called when VM asks to map easy accessible pages. | 546 | ->map_pages() is called when VM asks to map easy accessible pages. |
| 547 | Filesystem should find and map pages associated with offsets from "pgoff" | 547 | Filesystem should find and map pages associated with offsets from "start_pgoff" |
| 548 | till "max_pgoff". ->map_pages() is called with page table locked and must | 548 | till "end_pgoff". ->map_pages() is called with page table locked and must |
| 549 | not block. If it's not possible to reach a page without blocking, | 549 | not block. If it's not possible to reach a page without blocking, |
| 550 | filesystem should skip it. Filesystem should use do_set_pte() to setup | 550 | filesystem should skip it. Filesystem should use do_set_pte() to setup |
| 551 | page table entry. Pointer to entry associated with offset "pgoff" is | 551 | page table entry. Pointer to entry associated with the page is passed in |
| 552 | passed in "pte" field in vm_fault structure. Pointers to entries for other | 552 | "pte" field in fault_env structure. Pointers to entries for other offsets |
| 553 | offsets should be calculated relative to "pte". | 553 | should be calculated relative to "pte". |
| 554 | 554 | ||
| 555 | ->page_mkwrite() is called when a previously read-only pte is | 555 | ->page_mkwrite() is called when a previously read-only pte is |
| 556 | about to become writeable. The filesystem again must ensure that there are | 556 | about to become writeable. The filesystem again must ensure that there are |
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt index ce4587d257d2..0c16a22521a8 100644 --- a/Documentation/filesystems/dax.txt +++ b/Documentation/filesystems/dax.txt | |||
| @@ -49,6 +49,7 @@ These block devices may be used for inspiration: | |||
| 49 | - axonram: Axon DDR2 device driver | 49 | - axonram: Axon DDR2 device driver |
| 50 | - brd: RAM backed block device driver | 50 | - brd: RAM backed block device driver |
| 51 | - dcssblk: s390 dcss block device driver | 51 | - dcssblk: s390 dcss block device driver |
| 52 | - pmem: NVDIMM persistent memory driver | ||
| 52 | 53 | ||
| 53 | 54 | ||
| 54 | Implementation Tips for Filesystem Writers | 55 | Implementation Tips for Filesystem Writers |
| @@ -75,8 +76,9 @@ calls to get_block() (for example by a page-fault racing with a read() | |||
| 75 | or a write()) work correctly. | 76 | or a write()) work correctly. |
| 76 | 77 | ||
| 77 | These filesystems may be used for inspiration: | 78 | These filesystems may be used for inspiration: |
| 78 | - ext2: the second extended filesystem, see Documentation/filesystems/ext2.txt | 79 | - ext2: see Documentation/filesystems/ext2.txt |
| 79 | - ext4: the fourth extended filesystem, see Documentation/filesystems/ext4.txt | 80 | - ext4: see Documentation/filesystems/ext4.txt |
| 81 | - xfs: see Documentation/filesystems/xfs.txt | ||
| 80 | 82 | ||
| 81 | 83 | ||
| 82 | Handling Media Errors | 84 | Handling Media Errors |
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index e1c9f0849da6..ecd808088362 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt | |||
| @@ -109,7 +109,9 @@ background_gc=%s Turn on/off cleaning operations, namely garbage | |||
| 109 | disable_roll_forward Disable the roll-forward recovery routine | 109 | disable_roll_forward Disable the roll-forward recovery routine |
| 110 | norecovery Disable the roll-forward recovery routine, mounted read- | 110 | norecovery Disable the roll-forward recovery routine, mounted read- |
| 111 | only (i.e., -o ro,disable_roll_forward) | 111 | only (i.e., -o ro,disable_roll_forward) |
| 112 | discard Issue discard/TRIM commands when a segment is cleaned. | 112 | discard/nodiscard Enable/disable real-time discard in f2fs, if discard is |
| 113 | enabled, f2fs will issue discard/TRIM commands when a | ||
| 114 | segment is cleaned. | ||
| 113 | no_heap Disable heap-style segment allocation which finds free | 115 | no_heap Disable heap-style segment allocation which finds free |
| 114 | segments for data from the beginning of main area, while | 116 | segments for data from the beginning of main area, while |
| 115 | for node from the end of main area. | 117 | for node from the end of main area. |
| @@ -151,6 +153,9 @@ noinline_data Disable the inline data feature, inline data feature is | |||
| 151 | enabled by default. | 153 | enabled by default. |
| 152 | data_flush Enable data flushing before checkpoint in order to | 154 | data_flush Enable data flushing before checkpoint in order to |
| 153 | persist data of regular and symlink. | 155 | persist data of regular and symlink. |
| 156 | mode=%s Control block allocation mode which supports "adaptive" | ||
| 157 | and "lfs". In "lfs" mode, there should be no random | ||
| 158 | writes towards main area. | ||
| 154 | 159 | ||
| 155 | ================================================================================ | 160 | ================================================================================ |
| 156 | DEBUGFS ENTRIES | 161 | DEBUGFS ENTRIES |
diff --git a/Documentation/filesystems/ocfs2-online-filecheck.txt b/Documentation/filesystems/ocfs2-online-filecheck.txt index 1ab07860430d..139fab175c8a 100644 --- a/Documentation/filesystems/ocfs2-online-filecheck.txt +++ b/Documentation/filesystems/ocfs2-online-filecheck.txt | |||
| @@ -5,12 +5,12 @@ This document will describe OCFS2 online file check feature. | |||
| 5 | 5 | ||
| 6 | Introduction | 6 | Introduction |
| 7 | ============ | 7 | ============ |
| 8 | OCFS2 is often used in high-availaibility systems. However, OCFS2 usually | 8 | OCFS2 is often used in high-availability systems. However, OCFS2 usually |
| 9 | converts the filesystem to read-only when encounters an error. This may not be | 9 | converts the filesystem to read-only when encounters an error. This may not be |
| 10 | necessary, since turning the filesystem read-only would affect other running | 10 | necessary, since turning the filesystem read-only would affect other running |
| 11 | processes as well, decreasing availability. | 11 | processes as well, decreasing availability. |
| 12 | Then, a mount option (errors=continue) is introduced, which would return the | 12 | Then, a mount option (errors=continue) is introduced, which would return the |
| 13 | -EIO errno to the calling process and terminate furhter processing so that the | 13 | -EIO errno to the calling process and terminate further processing so that the |
| 14 | filesystem is not corrupted further. The filesystem is not converted to | 14 | filesystem is not corrupted further. The filesystem is not converted to |
| 15 | read-only, and the problematic file's inode number is reported in the kernel | 15 | read-only, and the problematic file's inode number is reported in the kernel |
| 16 | log. The user can try to check/fix this file via online filecheck feature. | 16 | log. The user can try to check/fix this file via online filecheck feature. |
| @@ -44,7 +44,7 @@ There is a sysfs directory for each OCFS2 file system mounting: | |||
| 44 | 44 | ||
| 45 | /sys/fs/ocfs2/<devname>/filecheck | 45 | /sys/fs/ocfs2/<devname>/filecheck |
| 46 | 46 | ||
| 47 | Here, <devname> indicates the name of OCFS2 volumn device which has been already | 47 | Here, <devname> indicates the name of OCFS2 volume device which has been already |
| 48 | mounted. The file above would accept inode numbers. This could be used to | 48 | mounted. The file above would accept inode numbers. This could be used to |
| 49 | communicate with kernel space, tell which file(inode number) will be checked or | 49 | communicate with kernel space, tell which file(inode number) will be checked or |
| 50 | fixed. Currently, three operations are supported, which includes checking | 50 | fixed. Currently, three operations are supported, which includes checking |
| @@ -76,14 +76,14 @@ The output is like this: | |||
| 76 | This time, the <ERROR> column indicates whether this fix is successful or not. | 76 | This time, the <ERROR> column indicates whether this fix is successful or not. |
| 77 | 77 | ||
| 78 | 3. The record cache is used to store the history of check/fix results. It's | 78 | 3. The record cache is used to store the history of check/fix results. It's |
| 79 | defalut size is 10, and can be adjust between the range of 10 ~ 100. You can | 79 | default size is 10, and can be adjust between the range of 10 ~ 100. You can |
| 80 | adjust the size like this: | 80 | adjust the size like this: |
| 81 | 81 | ||
| 82 | # echo "<size>" > /sys/fs/ocfs2/<devname>/filecheck/set | 82 | # echo "<size>" > /sys/fs/ocfs2/<devname>/filecheck/set |
| 83 | 83 | ||
| 84 | Fixing stuff | 84 | Fixing stuff |
| 85 | ============ | 85 | ============ |
| 86 | On receivng the inode, the filesystem would read the inode and the | 86 | On receiving the inode, the filesystem would read the inode and the |
| 87 | file metadata. In case of errors, the filesystem would fix the errors | 87 | file metadata. In case of errors, the filesystem would fix the errors |
| 88 | and report the problems it fixed in the kernel log. As a precautionary measure, | 88 | and report the problems it fixed in the kernel log. As a precautionary measure, |
| 89 | the inode must first be checked for errors before performing a final fix. | 89 | the inode must first be checked for errors before performing a final fix. |
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index e8d00759bfa5..68080ad6a75e 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
| @@ -436,6 +436,7 @@ Private_Dirty: 0 kB | |||
| 436 | Referenced: 892 kB | 436 | Referenced: 892 kB |
| 437 | Anonymous: 0 kB | 437 | Anonymous: 0 kB |
| 438 | AnonHugePages: 0 kB | 438 | AnonHugePages: 0 kB |
| 439 | ShmemPmdMapped: 0 kB | ||
| 439 | Shared_Hugetlb: 0 kB | 440 | Shared_Hugetlb: 0 kB |
| 440 | Private_Hugetlb: 0 kB | 441 | Private_Hugetlb: 0 kB |
| 441 | Swap: 0 kB | 442 | Swap: 0 kB |
| @@ -464,6 +465,8 @@ accessed. | |||
| 464 | a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE | 465 | a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE |
| 465 | and a page is modified, the file page is replaced by a private anonymous copy. | 466 | and a page is modified, the file page is replaced by a private anonymous copy. |
| 466 | "AnonHugePages" shows the ammount of memory backed by transparent hugepage. | 467 | "AnonHugePages" shows the ammount of memory backed by transparent hugepage. |
| 468 | "ShmemPmdMapped" shows the ammount of shared (shmem/tmpfs) memory backed by | ||
| 469 | huge pages. | ||
| 467 | "Shared_Hugetlb" and "Private_Hugetlb" show the ammounts of memory backed by | 470 | "Shared_Hugetlb" and "Private_Hugetlb" show the ammounts of memory backed by |
| 468 | hugetlbfs page which is *not* counted in "RSS" or "PSS" field for historical | 471 | hugetlbfs page which is *not* counted in "RSS" or "PSS" field for historical |
| 469 | reasons. And these are not included in {Shared,Private}_{Clean,Dirty} field. | 472 | reasons. And these are not included in {Shared,Private}_{Clean,Dirty} field. |
| @@ -725,7 +728,7 @@ IRQ, you can set it by doing: | |||
| 725 | > echo 1 > /proc/irq/10/smp_affinity | 728 | > echo 1 > /proc/irq/10/smp_affinity |
| 726 | 729 | ||
| 727 | This means that only the first CPU will handle the IRQ, but you can also echo | 730 | This means that only the first CPU will handle the IRQ, but you can also echo |
| 728 | 5 which means that only the first and fourth CPU can handle the IRQ. | 731 | 5 which means that only the first and third CPU can handle the IRQ. |
| 729 | 732 | ||
| 730 | The contents of each smp_affinity file is the same by default: | 733 | The contents of each smp_affinity file is the same by default: |
| 731 | 734 | ||
| @@ -868,6 +871,9 @@ VmallocTotal: 112216 kB | |||
| 868 | VmallocUsed: 428 kB | 871 | VmallocUsed: 428 kB |
| 869 | VmallocChunk: 111088 kB | 872 | VmallocChunk: 111088 kB |
| 870 | AnonHugePages: 49152 kB | 873 | AnonHugePages: 49152 kB |
| 874 | ShmemHugePages: 0 kB | ||
| 875 | ShmemPmdMapped: 0 kB | ||
| 876 | |||
| 871 | 877 | ||
| 872 | MemTotal: Total usable ram (i.e. physical ram minus a few reserved | 878 | MemTotal: Total usable ram (i.e. physical ram minus a few reserved |
| 873 | bits and the kernel binary code) | 879 | bits and the kernel binary code) |
| @@ -912,6 +918,9 @@ MemAvailable: An estimate of how much memory is available for starting new | |||
| 912 | AnonHugePages: Non-file backed huge pages mapped into userspace page tables | 918 | AnonHugePages: Non-file backed huge pages mapped into userspace page tables |
| 913 | Mapped: files which have been mmaped, such as libraries | 919 | Mapped: files which have been mmaped, such as libraries |
| 914 | Shmem: Total memory used by shared memory (shmem) and tmpfs | 920 | Shmem: Total memory used by shared memory (shmem) and tmpfs |
| 921 | ShmemHugePages: Memory used by shared memory (shmem) and tmpfs allocated | ||
| 922 | with huge pages | ||
| 923 | ShmemPmdMapped: Shared memory mapped into userspace with huge pages | ||
| 915 | Slab: in-kernel data structures cache | 924 | Slab: in-kernel data structures cache |
| 916 | SReclaimable: Part of Slab, that might be reclaimed, such as caches | 925 | SReclaimable: Part of Slab, that might be reclaimed, such as caches |
| 917 | SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure | 926 | SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c61a223ef3ff..8a196851f01d 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
| @@ -364,7 +364,6 @@ struct inode_operations { | |||
| 364 | int (*atomic_open)(struct inode *, struct dentry *, struct file *, | 364 | int (*atomic_open)(struct inode *, struct dentry *, struct file *, |
| 365 | unsigned open_flag, umode_t create_mode, int *opened); | 365 | unsigned open_flag, umode_t create_mode, int *opened); |
| 366 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); | 366 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); |
| 367 | int (*dentry_open)(struct dentry *, struct file *, const struct cred *); | ||
| 368 | }; | 367 | }; |
| 369 | 368 | ||
| 370 | Again, all methods are called without any locks being held, unless | 369 | Again, all methods are called without any locks being held, unless |
| @@ -534,9 +533,7 @@ __sync_single_inode) to check if ->writepages has been successful in | |||
| 534 | writing out the whole address_space. | 533 | writing out the whole address_space. |
| 535 | 534 | ||
| 536 | The Writeback tag is used by filemap*wait* and sync_page* functions, | 535 | The Writeback tag is used by filemap*wait* and sync_page* functions, |
| 537 | via filemap_fdatawait_range, to wait for all writeback to | 536 | via filemap_fdatawait_range, to wait for all writeback to complete. |
| 538 | complete. While waiting ->sync_page (if defined) will be called on | ||
| 539 | each page that is found to require writeback. | ||
| 540 | 537 | ||
| 541 | An address_space handler may attach extra information to a page, | 538 | An address_space handler may attach extra information to a page, |
| 542 | typically using the 'private' field in the 'struct page'. If such | 539 | typically using the 'private' field in the 'struct page'. If such |
| @@ -554,8 +551,8 @@ address_space has finer control of write sizes. | |||
| 554 | 551 | ||
| 555 | The read process essentially only requires 'readpage'. The write | 552 | The read process essentially only requires 'readpage'. The write |
| 556 | process is more complicated and uses write_begin/write_end or | 553 | process is more complicated and uses write_begin/write_end or |
| 557 | set_page_dirty to write data into the address_space, and writepage, | 554 | set_page_dirty to write data into the address_space, and writepage |
| 558 | sync_page, and writepages to writeback data to storage. | 555 | and writepages to writeback data to storage. |
| 559 | 556 | ||
| 560 | Adding and removing pages to/from an address_space is protected by the | 557 | Adding and removing pages to/from an address_space is protected by the |
| 561 | inode's i_mutex. | 558 | inode's i_mutex. |
| @@ -592,9 +589,14 @@ struct address_space_operations { | |||
| 592 | int (*releasepage) (struct page *, int); | 589 | int (*releasepage) (struct page *, int); |
| 593 | void (*freepage)(struct page *); | 590 | void (*freepage)(struct page *); |
| 594 | ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); | 591 | ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); |
| 592 | /* isolate a page for migration */ | ||
| 593 | bool (*isolate_page) (struct page *, isolate_mode_t); | ||
| 595 | /* migrate the contents of a page to the specified target */ | 594 | /* migrate the contents of a page to the specified target */ |
| 596 | int (*migratepage) (struct page *, struct page *); | 595 | int (*migratepage) (struct page *, struct page *); |
| 596 | /* put migration-failed page back to right list */ | ||
| 597 | void (*putback_page) (struct page *); | ||
| 597 | int (*launder_page) (struct page *); | 598 | int (*launder_page) (struct page *); |
| 599 | |||
| 598 | int (*is_partially_uptodate) (struct page *, unsigned long, | 600 | int (*is_partially_uptodate) (struct page *, unsigned long, |
| 599 | unsigned long); | 601 | unsigned long); |
| 600 | void (*is_dirty_writeback) (struct page *, bool *, bool *); | 602 | void (*is_dirty_writeback) (struct page *, bool *, bool *); |
| @@ -696,13 +698,6 @@ struct address_space_operations { | |||
| 696 | but instead uses bmap to find out where the blocks in the file | 698 | but instead uses bmap to find out where the blocks in the file |
| 697 | are and uses those addresses directly. | 699 | are and uses those addresses directly. |
| 698 | 700 | ||
| 699 | dentry_open: *WARNING: probably going away soon, do not use!* This is an | ||
| 700 | alternative to f_op->open(), the difference is that this method may open | ||
| 701 | a file not necessarily originating from the same filesystem as the one | ||
| 702 | i_op->open() was called on. It may be useful for stacking filesystems | ||
| 703 | which want to allow native I/O directly on underlying files. | ||
| 704 | |||
| 705 | |||
| 706 | invalidatepage: If a page has PagePrivate set, then invalidatepage | 701 | invalidatepage: If a page has PagePrivate set, then invalidatepage |
| 707 | will be called when part or all of the page is to be removed | 702 | will be called when part or all of the page is to be removed |
| 708 | from the address space. This generally corresponds to either a | 703 | from the address space. This generally corresponds to either a |
| @@ -747,6 +742,10 @@ struct address_space_operations { | |||
| 747 | and transfer data directly between the storage and the | 742 | and transfer data directly between the storage and the |
| 748 | application's address space. | 743 | application's address space. |
| 749 | 744 | ||
| 745 | isolate_page: Called by the VM when isolating a movable non-lru page. | ||
| 746 | If page is successfully isolated, VM marks the page as PG_isolated | ||
| 747 | via __SetPageIsolated. | ||
| 748 | |||
| 750 | migrate_page: This is used to compact the physical memory usage. | 749 | migrate_page: This is used to compact the physical memory usage. |
| 751 | If the VM wants to relocate a page (maybe off a memory card | 750 | If the VM wants to relocate a page (maybe off a memory card |
| 752 | that is signalling imminent failure) it will pass a new page | 751 | that is signalling imminent failure) it will pass a new page |
| @@ -754,6 +753,8 @@ struct address_space_operations { | |||
| 754 | transfer any private data across and update any references | 753 | transfer any private data across and update any references |
| 755 | that it has to the page. | 754 | that it has to the page. |
| 756 | 755 | ||
| 756 | putback_page: Called by the VM when isolated page's migration fails. | ||
| 757 | |||
| 757 | launder_page: Called before freeing a page - it writes back the dirty page. To | 758 | launder_page: Called before freeing a page - it writes back the dirty page. To |
| 758 | prevent redirtying the page, it is kept locked during the whole | 759 | prevent redirtying the page, it is kept locked during the whole |
| 759 | operation. | 760 | operation. |
| @@ -933,11 +934,14 @@ struct dentry_operations { | |||
| 933 | int (*d_compare)(const struct dentry *, const struct dentry *, | 934 | int (*d_compare)(const struct dentry *, const struct dentry *, |
| 934 | unsigned int, const char *, const struct qstr *); | 935 | unsigned int, const char *, const struct qstr *); |
| 935 | int (*d_delete)(const struct dentry *); | 936 | int (*d_delete)(const struct dentry *); |
| 937 | int (*d_init)(struct dentry *); | ||
| 936 | void (*d_release)(struct dentry *); | 938 | void (*d_release)(struct dentry *); |
| 937 | void (*d_iput)(struct dentry *, struct inode *); | 939 | void (*d_iput)(struct dentry *, struct inode *); |
| 938 | char *(*d_dname)(struct dentry *, char *, int); | 940 | char *(*d_dname)(struct dentry *, char *, int); |
| 939 | struct vfsmount *(*d_automount)(struct path *); | 941 | struct vfsmount *(*d_automount)(struct path *); |
| 940 | int (*d_manage)(struct dentry *, bool); | 942 | int (*d_manage)(struct dentry *, bool); |
| 943 | struct dentry *(*d_real)(struct dentry *, const struct inode *, | ||
| 944 | unsigned int); | ||
| 941 | }; | 945 | }; |
| 942 | 946 | ||
| 943 | d_revalidate: called when the VFS needs to revalidate a dentry. This | 947 | d_revalidate: called when the VFS needs to revalidate a dentry. This |
| @@ -1003,6 +1007,8 @@ struct dentry_operations { | |||
| 1003 | always cache a reachable dentry. d_delete must be constant and | 1007 | always cache a reachable dentry. d_delete must be constant and |
| 1004 | idempotent. | 1008 | idempotent. |
| 1005 | 1009 | ||
| 1010 | d_init: called when a dentry is allocated | ||
| 1011 | |||
| 1006 | d_release: called when a dentry is really deallocated | 1012 | d_release: called when a dentry is really deallocated |
| 1007 | 1013 | ||
| 1008 | d_iput: called when a dentry loses its inode (just prior to its | 1014 | d_iput: called when a dentry loses its inode (just prior to its |
| @@ -1022,6 +1028,14 @@ struct dentry_operations { | |||
| 1022 | at the end of the buffer, and returns a pointer to the first char. | 1028 | at the end of the buffer, and returns a pointer to the first char. |
| 1023 | dynamic_dname() helper function is provided to take care of this. | 1029 | dynamic_dname() helper function is provided to take care of this. |
| 1024 | 1030 | ||
| 1031 | Example : | ||
| 1032 | |||
| 1033 | static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen) | ||
| 1034 | { | ||
| 1035 | return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", | ||
| 1036 | dentry->d_inode->i_ino); | ||
| 1037 | } | ||
| 1038 | |||
| 1025 | d_automount: called when an automount dentry is to be traversed (optional). | 1039 | d_automount: called when an automount dentry is to be traversed (optional). |
| 1026 | This should create a new VFS mount record and return the record to the | 1040 | This should create a new VFS mount record and return the record to the |
| 1027 | caller. The caller is supplied with a path parameter giving the | 1041 | caller. The caller is supplied with a path parameter giving the |
| @@ -1060,13 +1074,23 @@ struct dentry_operations { | |||
| 1060 | This function is only used if DCACHE_MANAGE_TRANSIT is set on the | 1074 | This function is only used if DCACHE_MANAGE_TRANSIT is set on the |
| 1061 | dentry being transited from. | 1075 | dentry being transited from. |
| 1062 | 1076 | ||
| 1063 | Example : | 1077 | d_real: overlay/union type filesystems implement this method to return one of |
| 1078 | the underlying dentries hidden by the overlay. It is used in three | ||
| 1079 | different modes: | ||
| 1064 | 1080 | ||
| 1065 | static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen) | 1081 | Called from open it may need to copy-up the file depending on the |
| 1066 | { | 1082 | supplied open flags. This mode is selected with a non-zero flags |
| 1067 | return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", | 1083 | argument. In this mode the d_real method can return an error. |
| 1068 | dentry->d_inode->i_ino); | 1084 | |
| 1069 | } | 1085 | Called from file_dentry() it returns the real dentry matching the inode |
| 1086 | argument. The real dentry may be from a lower layer already copied up, | ||
| 1087 | but still referenced from the file. This mode is selected with a | ||
| 1088 | non-NULL inode argument. This will always succeed. | ||
| 1089 | |||
| 1090 | With NULL inode and zero flags the topmost real underlying dentry is | ||
| 1091 | returned. This will always succeed. | ||
| 1092 | |||
| 1093 | This method is never called with both non-NULL inode and non-zero flags. | ||
| 1070 | 1094 | ||
| 1071 | Each dentry has a pointer to its parent dentry, as well as a hash list | 1095 | Each dentry has a pointer to its parent dentry, as well as a hash list |
| 1072 | of child dentries. Child dentries are basically like files in a | 1096 | of child dentries. Child dentries are basically like files in a |
