aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/filesystems
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-07-31 19:44:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 21:42:47 -0400
commit62c230bc1790923a1b35da03596a68a6c9b5b100 (patch)
tree46b300a00929087627e029b0f4d53e486a97ba7f /Documentation/filesystems
parent18022c5d8627a7a9ba8097a0f238b513fae6f5b8 (diff)
mm: add support for a filesystem to activate swap files and use direct_IO for writing swap pages
Currently swapfiles are managed entirely by the core VM by using ->bmap to allocate space and write to the blocks directly. This effectively ensures that the underlying blocks are allocated and avoids the need for the swap subsystem to locate what physical blocks store offsets within a file. If the swap subsystem is to use the filesystem information to locate the blocks, it is critical that information such as block groups, block bitmaps and the block descriptor table that map the swap file were resident in memory. This patch adds address_space_operations that the VM can call when activating or deactivating swap backed by a file. int swap_activate(struct file *); int swap_deactivate(struct file *); The ->swap_activate() method is used to communicate to the file that the VM relies on it, and the address_space should take adequate measures such as reserving space in the underlying device, reserving memory for mempools and pinning information such as the block descriptor table in memory. The ->swap_deactivate() method is called on sys_swapoff() if ->swap_activate() returned success. After a successful swapfile ->swap_activate, the swapfile is marked SWP_FILE and swapper_space.a_ops will proxy to sis->swap_file->f_mappings->a_ops using ->direct_io to write swapcache pages and ->readpage to read. It is perfectly possible that direct_IO be used to read the swap pages but it is an unnecessary complication. Similarly, it is possible that ->writepage be used instead of direct_io to write the pages but filesystem developers have stated that calling writepage from the VM is undesirable for a variety of reasons and using direct_IO opens up the possibility of writing back batches of swap pages in the future. [a.p.zijlstra@chello.nl: Original patch] Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: David S. Miller <davem@davemloft.net> Cc: Eric B Munson <emunson@mgebm.net> Cc: Eric Paris <eparis@redhat.com> Cc: James Morris <jmorris@namei.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Mike Christie <michaelc@cs.wisc.edu> Cc: Neil Brown <neilb@suse.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: Xiaotian Feng <dfeng@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/Locking13
-rw-r--r--Documentation/filesystems/vfs.txt12
2 files changed, 25 insertions, 0 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index e0cce2a5f820..2db1900d7538 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -206,6 +206,8 @@ prototypes:
206 int (*launder_page)(struct page *); 206 int (*launder_page)(struct page *);
207 int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long); 207 int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long);
208 int (*error_remove_page)(struct address_space *, struct page *); 208 int (*error_remove_page)(struct address_space *, struct page *);
209 int (*swap_activate)(struct file *);
210 int (*swap_deactivate)(struct file *);
209 211
210locking rules: 212locking rules:
211 All except set_page_dirty and freepage may block 213 All except set_page_dirty and freepage may block
@@ -229,6 +231,8 @@ migratepage: yes (both)
229launder_page: yes 231launder_page: yes
230is_partially_uptodate: yes 232is_partially_uptodate: yes
231error_remove_page: yes 233error_remove_page: yes
234swap_activate: no
235swap_deactivate: no
232 236
233 ->write_begin(), ->write_end(), ->sync_page() and ->readpage() 237 ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
234may be called from the request handler (/dev/loop). 238may be called from the request handler (/dev/loop).
@@ -330,6 +334,15 @@ cleaned, or an error value if not. Note that in order to prevent the page
330getting mapped back in and redirtied, it needs to be kept locked 334getting mapped back in and redirtied, it needs to be kept locked
331across the entire operation. 335across the entire operation.
332 336
337 ->swap_activate will be called with a non-zero argument on
338files backing (non block device backed) swapfiles. A return value
339of zero indicates success, in which case this file can be used for
340backing swapspace. The swapspace operations will be proxied to the
341address space operations.
342
343 ->swap_deactivate() will be called in the sys_swapoff()
344path after ->swap_activate() returned success.
345
333----------------------- file_lock_operations ------------------------------ 346----------------------- file_lock_operations ------------------------------
334prototypes: 347prototypes:
335 void (*fl_copy_lock)(struct file_lock *, struct file_lock *); 348 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index aa754e01464e..065aa2dc0835 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -592,6 +592,8 @@ struct address_space_operations {
592 int (*migratepage) (struct page *, struct page *); 592 int (*migratepage) (struct page *, struct page *);
593 int (*launder_page) (struct page *); 593 int (*launder_page) (struct page *);
594 int (*error_remove_page) (struct mapping *mapping, struct page *page); 594 int (*error_remove_page) (struct mapping *mapping, struct page *page);
595 int (*swap_activate)(struct file *);
596 int (*swap_deactivate)(struct file *);
595}; 597};
596 598
597 writepage: called by the VM to write a dirty page to backing store. 599 writepage: called by the VM to write a dirty page to backing store.
@@ -760,6 +762,16 @@ struct address_space_operations {
760 Setting this implies you deal with pages going away under you, 762 Setting this implies you deal with pages going away under you,
761 unless you have them locked or reference counts increased. 763 unless you have them locked or reference counts increased.
762 764
765 swap_activate: Called when swapon is used on a file to allocate
766 space if necessary and pin the block lookup information in
767 memory. A return value of zero indicates success,
768 in which case this file can be used to back swapspace. The
769 swapspace operations will be proxied to this address space's
770 ->swap_{out,in} methods.
771
772 swap_deactivate: Called during swapoff on files where swap_activate
773 was successful.
774
763 775
764The File Object 776The File Object
765=============== 777===============