diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2007-05-09 18:16:19 -0400 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-07-10 20:31:51 -0400 |
commit | 7307de80510a70e5e5aa98de1e80ccbb7d90a3a8 (patch) | |
tree | ba45bef3e0b875feb67b97aebe8295159852ef97 /fs/ocfs2/mmap.c | |
parent | 607d44aa3fa6f40b0facaf1028886ed362b92682 (diff) |
ocfs2: shared writeable mmap
Implement cluster consistent shared writeable mappings using the
->page_mkwrite() callback.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/mmap.c')
-rw-r--r-- | fs/ocfs2/mmap.c | 167 |
1 files changed, 143 insertions, 24 deletions
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index af01158b39f5..d79aa12137d2 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -37,11 +37,29 @@ | |||
37 | 37 | ||
38 | #include "ocfs2.h" | 38 | #include "ocfs2.h" |
39 | 39 | ||
40 | #include "aops.h" | ||
40 | #include "dlmglue.h" | 41 | #include "dlmglue.h" |
41 | #include "file.h" | 42 | #include "file.h" |
42 | #include "inode.h" | 43 | #include "inode.h" |
43 | #include "mmap.h" | 44 | #include "mmap.h" |
44 | 45 | ||
46 | static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset) | ||
47 | { | ||
48 | /* The best way to deal with signals in the vm path is | ||
49 | * to block them upfront, rather than allowing the | ||
50 | * locking paths to return -ERESTARTSYS. */ | ||
51 | sigfillset(blocked); | ||
52 | |||
53 | /* We should technically never get a bad return value | ||
54 | * from sigprocmask */ | ||
55 | return sigprocmask(SIG_BLOCK, blocked, oldset); | ||
56 | } | ||
57 | |||
58 | static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) | ||
59 | { | ||
60 | return sigprocmask(SIG_SETMASK, oldset, NULL); | ||
61 | } | ||
62 | |||
45 | static struct page *ocfs2_nopage(struct vm_area_struct * area, | 63 | static struct page *ocfs2_nopage(struct vm_area_struct * area, |
46 | unsigned long address, | 64 | unsigned long address, |
47 | int *type) | 65 | int *type) |
@@ -53,14 +71,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area, | |||
53 | mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, | 71 | mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, |
54 | type); | 72 | type); |
55 | 73 | ||
56 | /* The best way to deal with signals in this path is | 74 | ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); |
57 | * to block them upfront, rather than allowing the | ||
58 | * locking paths to return -ERESTARTSYS. */ | ||
59 | sigfillset(&blocked); | ||
60 | |||
61 | /* We should technically never get a bad ret return | ||
62 | * from sigprocmask */ | ||
63 | ret = sigprocmask(SIG_BLOCK, &blocked, &oldset); | ||
64 | if (ret < 0) { | 75 | if (ret < 0) { |
65 | mlog_errno(ret); | 76 | mlog_errno(ret); |
66 | goto out; | 77 | goto out; |
@@ -68,7 +79,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area, | |||
68 | 79 | ||
69 | page = filemap_nopage(area, address, type); | 80 | page = filemap_nopage(area, address, type); |
70 | 81 | ||
71 | ret = sigprocmask(SIG_SETMASK, &oldset, NULL); | 82 | ret = ocfs2_vm_op_unblock_sigs(&oldset); |
72 | if (ret < 0) | 83 | if (ret < 0) |
73 | mlog_errno(ret); | 84 | mlog_errno(ret); |
74 | out: | 85 | out: |
@@ -76,28 +87,136 @@ out: | |||
76 | return page; | 87 | return page; |
77 | } | 88 | } |
78 | 89 | ||
79 | static struct vm_operations_struct ocfs2_file_vm_ops = { | 90 | static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, |
80 | .nopage = ocfs2_nopage, | 91 | struct page *page) |
81 | }; | 92 | { |
93 | int ret; | ||
94 | struct address_space *mapping = inode->i_mapping; | ||
95 | loff_t pos = page->index << PAGE_CACHE_SHIFT; | ||
96 | unsigned int len = PAGE_CACHE_SIZE; | ||
97 | pgoff_t last_index; | ||
98 | struct page *locked_page = NULL; | ||
99 | void *fsdata; | ||
100 | loff_t size = i_size_read(inode); | ||
82 | 101 | ||
83 | int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | 102 | /* |
103 | * Another node might have truncated while we were waiting on | ||
104 | * cluster locks. | ||
105 | */ | ||
106 | last_index = size >> PAGE_CACHE_SHIFT; | ||
107 | if (page->index > last_index) { | ||
108 | ret = -EINVAL; | ||
109 | goto out; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * The i_size check above doesn't catch the case where nodes | ||
114 | * truncated and then re-extended the file. We'll re-check the | ||
115 | * page mapping after taking the page lock inside of | ||
116 | * ocfs2_write_begin_nolock(). | ||
117 | */ | ||
118 | if (!PageUptodate(page) || page->mapping != inode->i_mapping) { | ||
119 | ret = -EINVAL; | ||
120 | goto out; | ||
121 | } | ||
122 | |||
123 | /* | ||
124 | * Call ocfs2_write_begin() and ocfs2_write_end() to take | ||
125 | * advantage of the allocation code there. We pass a write | ||
126 | * length of the whole page (chopped to i_size) to make sure | ||
127 | * the whole thing is allocated. | ||
128 | * | ||
129 | * Since we know the page is up to date, we don't have to | ||
130 | * worry about ocfs2_write_begin() skipping some buffer reads | ||
131 | * because the "write" would invalidate their data. | ||
132 | */ | ||
133 | if (page->index == last_index) | ||
134 | len = size & ~PAGE_CACHE_MASK; | ||
135 | |||
136 | ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, | ||
137 | &fsdata, di_bh, page); | ||
138 | if (ret) { | ||
139 | if (ret != -ENOSPC) | ||
140 | mlog_errno(ret); | ||
141 | goto out; | ||
142 | } | ||
143 | |||
144 | ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, | ||
145 | fsdata); | ||
146 | if (ret < 0) { | ||
147 | mlog_errno(ret); | ||
148 | goto out; | ||
149 | } | ||
150 | BUG_ON(ret != len); | ||
151 | ret = 0; | ||
152 | out: | ||
153 | return ret; | ||
154 | } | ||
155 | |||
156 | static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
84 | { | 157 | { |
85 | int ret = 0, lock_level = 0; | 158 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
86 | struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb); | 159 | struct buffer_head *di_bh = NULL; |
160 | sigset_t blocked, oldset; | ||
161 | int ret, ret2; | ||
162 | |||
163 | ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); | ||
164 | if (ret < 0) { | ||
165 | mlog_errno(ret); | ||
166 | return ret; | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * The cluster locks taken will block a truncate from another | ||
171 | * node. Taking the data lock will also ensure that we don't | ||
172 | * attempt page truncation as part of a downconvert. | ||
173 | */ | ||
174 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | ||
175 | if (ret < 0) { | ||
176 | mlog_errno(ret); | ||
177 | goto out; | ||
178 | } | ||
87 | 179 | ||
88 | /* | 180 | /* |
89 | * Only support shared writeable mmap for local mounts which | 181 | * The alloc sem should be enough to serialize with |
90 | * don't know about holes. | 182 | * ocfs2_truncate_file() changing i_size as well as any thread |
183 | * modifying the inode btree. | ||
91 | */ | 184 | */ |
92 | if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) && | 185 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
93 | ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) && | 186 | |
94 | ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { | 187 | ret = ocfs2_data_lock(inode, 1); |
95 | mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); | 188 | if (ret < 0) { |
96 | /* This is -EINVAL because generic_file_readonly_mmap | 189 | mlog_errno(ret); |
97 | * returns it in a similar situation. */ | 190 | goto out_meta_unlock; |
98 | return -EINVAL; | ||
99 | } | 191 | } |
100 | 192 | ||
193 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); | ||
194 | |||
195 | ocfs2_data_unlock(inode, 1); | ||
196 | |||
197 | out_meta_unlock: | ||
198 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
199 | |||
200 | brelse(di_bh); | ||
201 | ocfs2_meta_unlock(inode, 1); | ||
202 | |||
203 | out: | ||
204 | ret2 = ocfs2_vm_op_unblock_sigs(&oldset); | ||
205 | if (ret2 < 0) | ||
206 | mlog_errno(ret2); | ||
207 | |||
208 | return ret; | ||
209 | } | ||
210 | |||
211 | static struct vm_operations_struct ocfs2_file_vm_ops = { | ||
212 | .nopage = ocfs2_nopage, | ||
213 | .page_mkwrite = ocfs2_page_mkwrite, | ||
214 | }; | ||
215 | |||
216 | int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | ||
217 | { | ||
218 | int ret = 0, lock_level = 0; | ||
219 | |||
101 | ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, | 220 | ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, |
102 | file->f_vfsmnt, &lock_level); | 221 | file->f_vfsmnt, &lock_level); |
103 | if (ret < 0) { | 222 | if (ret < 0) { |