aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/file.c
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /fs/ceph/file.c
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r--fs/ceph/file.c175
1 files changed, 90 insertions, 85 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e51558fca3a..ce549d31eeb 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -4,7 +4,6 @@
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/slab.h> 5#include <linux/slab.h>
6#include <linux/file.h> 6#include <linux/file.h>
7#include <linux/mount.h>
8#include <linux/namei.h> 7#include <linux/namei.h>
9#include <linux/writeback.h> 8#include <linux/writeback.h>
10 9
@@ -55,6 +54,7 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode)
55 req->r_fmode = ceph_flags_to_mode(flags); 54 req->r_fmode = ceph_flags_to_mode(flags);
56 req->r_args.open.flags = cpu_to_le32(flags); 55 req->r_args.open.flags = cpu_to_le32(flags);
57 req->r_args.open.mode = cpu_to_le32(create_mode); 56 req->r_args.open.mode = cpu_to_le32(create_mode);
57 req->r_args.open.preferred = cpu_to_le32(-1);
58out: 58out:
59 return req; 59 return req;
60} 60}
@@ -107,6 +107,9 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
107} 107}
108 108
109/* 109/*
110 * If the filp already has private_data, that means the file was
111 * already opened by intent during lookup, and we do nothing.
112 *
110 * If we already have the requisite capabilities, we can satisfy 113 * If we already have the requisite capabilities, we can satisfy
111 * the open request locally (no need to request new caps from the 114 * the open request locally (no need to request new caps from the
112 * MDS). We do, however, need to inform the MDS (asynchronously) 115 * MDS). We do, however, need to inform the MDS (asynchronously)
@@ -144,9 +147,9 @@ int ceph_open(struct inode *inode, struct file *file)
144 147
145 /* trivially open snapdir */ 148 /* trivially open snapdir */
146 if (ceph_snap(inode) == CEPH_SNAPDIR) { 149 if (ceph_snap(inode) == CEPH_SNAPDIR) {
147 spin_lock(&ci->i_ceph_lock); 150 spin_lock(&inode->i_lock);
148 __ceph_get_fmode(ci, fmode); 151 __ceph_get_fmode(ci, fmode);
149 spin_unlock(&ci->i_ceph_lock); 152 spin_unlock(&inode->i_lock);
150 return ceph_init_file(inode, file, fmode); 153 return ceph_init_file(inode, file, fmode);
151 } 154 }
152 155
@@ -155,7 +158,7 @@ int ceph_open(struct inode *inode, struct file *file)
155 * write) or any MDS (for read). Update wanted set 158 * write) or any MDS (for read). Update wanted set
156 * asynchronously. 159 * asynchronously.
157 */ 160 */
158 spin_lock(&ci->i_ceph_lock); 161 spin_lock(&inode->i_lock);
159 if (__ceph_is_any_real_caps(ci) && 162 if (__ceph_is_any_real_caps(ci) &&
160 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { 163 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
161 int mds_wanted = __ceph_caps_mds_wanted(ci); 164 int mds_wanted = __ceph_caps_mds_wanted(ci);
@@ -165,7 +168,7 @@ int ceph_open(struct inode *inode, struct file *file)
165 inode, fmode, ceph_cap_string(wanted), 168 inode, fmode, ceph_cap_string(wanted),
166 ceph_cap_string(issued)); 169 ceph_cap_string(issued));
167 __ceph_get_fmode(ci, fmode); 170 __ceph_get_fmode(ci, fmode);
168 spin_unlock(&ci->i_ceph_lock); 171 spin_unlock(&inode->i_lock);
169 172
170 /* adjust wanted? */ 173 /* adjust wanted? */
171 if ((issued & wanted) != wanted && 174 if ((issued & wanted) != wanted &&
@@ -177,10 +180,10 @@ int ceph_open(struct inode *inode, struct file *file)
177 } else if (ceph_snap(inode) != CEPH_NOSNAP && 180 } else if (ceph_snap(inode) != CEPH_NOSNAP &&
178 (ci->i_snap_caps & wanted) == wanted) { 181 (ci->i_snap_caps & wanted) == wanted) {
179 __ceph_get_fmode(ci, fmode); 182 __ceph_get_fmode(ci, fmode);
180 spin_unlock(&ci->i_ceph_lock); 183 spin_unlock(&inode->i_lock);
181 return ceph_init_file(inode, file, fmode); 184 return ceph_init_file(inode, file, fmode);
182 } 185 }
183 spin_unlock(&ci->i_ceph_lock); 186 spin_unlock(&inode->i_lock);
184 187
185 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); 188 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
186 req = prepare_open_request(inode->i_sb, flags, 0); 189 req = prepare_open_request(inode->i_sb, flags, 0);
@@ -205,34 +208,36 @@ out:
205 208
206 209
207/* 210/*
208 * Do a lookup + open with a single request. If we get a non-existent 211 * Do a lookup + open with a single request.
209 * file or symlink, return 1 so the VFS can retry. 212 *
213 * If this succeeds, but some subsequent check in the vfs
214 * may_open() fails, the struct *file gets cleaned up (i.e.
215 * ceph_release gets called). So fear not!
216 */
217/*
218 * flags
219 * path_lookup_open -> LOOKUP_OPEN
220 * path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE
210 */ 221 */
211int ceph_atomic_open(struct inode *dir, struct dentry *dentry, 222struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
212 struct file *file, unsigned flags, umode_t mode, 223 struct nameidata *nd, int mode,
213 int *opened) 224 int locked_dir)
214{ 225{
215 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 226 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
216 struct ceph_mds_client *mdsc = fsc->mdsc; 227 struct ceph_mds_client *mdsc = fsc->mdsc;
228 struct file *file;
217 struct ceph_mds_request *req; 229 struct ceph_mds_request *req;
218 struct dentry *dn; 230 struct dentry *ret;
219 int err; 231 int err;
232 int flags = nd->intent.open.flags;
220 233
221 dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n", 234 dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
222 dir, dentry, dentry->d_name.len, dentry->d_name.name, 235 dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
223 d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
224
225 if (dentry->d_name.len > NAME_MAX)
226 return -ENAMETOOLONG;
227
228 err = ceph_init_dentry(dentry);
229 if (err < 0)
230 return err;
231 236
232 /* do the open */ 237 /* do the open */
233 req = prepare_open_request(dir->i_sb, flags, mode); 238 req = prepare_open_request(dir->i_sb, flags, mode);
234 if (IS_ERR(req)) 239 if (IS_ERR(req))
235 return PTR_ERR(req); 240 return ERR_CAST(req);
236 req->r_dentry = dget(dentry); 241 req->r_dentry = dget(dentry);
237 req->r_num_caps = 2; 242 req->r_num_caps = 2;
238 if (flags & O_CREAT) { 243 if (flags & O_CREAT) {
@@ -244,32 +249,20 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
244 (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, 249 (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
245 req); 250 req);
246 err = ceph_handle_snapdir(req, dentry, err); 251 err = ceph_handle_snapdir(req, dentry, err);
247 if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) 252 if (err)
253 goto out;
254 if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
248 err = ceph_handle_notrace_create(dir, dentry); 255 err = ceph_handle_notrace_create(dir, dentry);
249
250 if (d_unhashed(dentry)) {
251 dn = ceph_finish_lookup(req, dentry, err);
252 if (IS_ERR(dn))
253 err = PTR_ERR(dn);
254 } else {
255 /* we were given a hashed negative dentry */
256 dn = NULL;
257 }
258 if (err) 256 if (err)
259 goto out_err; 257 goto out;
260 if (dn || dentry->d_inode == NULL || S_ISLNK(dentry->d_inode->i_mode)) { 258 file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open);
261 /* make vfs retry on splice, ENOENT, or symlink */ 259 if (IS_ERR(file))
262 dout("atomic_open finish_no_open on dn %p\n", dn); 260 err = PTR_ERR(file);
263 err = finish_no_open(file, dn); 261out:
264 } else { 262 ret = ceph_finish_lookup(req, dentry, err);
265 dout("atomic_open finish_open on dn %p\n", dn);
266 err = finish_open(file, dentry, ceph_open, opened);
267 }
268
269out_err:
270 ceph_mdsc_put_request(req); 263 ceph_mdsc_put_request(req);
271 dout("atomic_open result=%d\n", err); 264 dout("ceph_lookup_open result=%p\n", ret);
272 return err; 265 return ret;
273} 266}
274 267
275int ceph_release(struct inode *inode, struct file *file) 268int ceph_release(struct inode *inode, struct file *file)
@@ -536,8 +529,8 @@ more:
536 do_sync, 529 do_sync,
537 ci->i_truncate_seq, ci->i_truncate_size, 530 ci->i_truncate_seq, ci->i_truncate_size,
538 &mtime, false, 2, page_align); 531 &mtime, false, 2, page_align);
539 if (IS_ERR(req)) 532 if (!req)
540 return PTR_ERR(req); 533 return -ENOMEM;
541 534
542 if (file->f_flags & O_DIRECT) { 535 if (file->f_flags & O_DIRECT) {
543 pages = ceph_get_direct_page_vector(data, num_pages, false); 536 pages = ceph_get_direct_page_vector(data, num_pages, false);
@@ -712,65 +705,78 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
712 struct ceph_osd_client *osdc = 705 struct ceph_osd_client *osdc =
713 &ceph_sb_to_client(inode->i_sb)->client->osdc; 706 &ceph_sb_to_client(inode->i_sb)->client->osdc;
714 loff_t endoff = pos + iov->iov_len; 707 loff_t endoff = pos + iov->iov_len;
715 int got = 0; 708 int want, got = 0;
716 int ret, err, written; 709 int ret, err;
717 710
718 if (ceph_snap(inode) != CEPH_NOSNAP) 711 if (ceph_snap(inode) != CEPH_NOSNAP)
719 return -EROFS; 712 return -EROFS;
720 713
721retry_snap: 714retry_snap:
722 written = 0;
723 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) 715 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
724 return -ENOSPC; 716 return -ENOSPC;
725 __ceph_do_pending_vmtruncate(inode); 717 __ceph_do_pending_vmtruncate(inode);
718 dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
719 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
720 inode->i_size);
721 if (fi->fmode & CEPH_FILE_MODE_LAZY)
722 want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
723 else
724 want = CEPH_CAP_FILE_BUFFER;
725 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
726 if (ret < 0)
727 goto out_put;
726 728
727 /* 729 dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
728 * try to do a buffered write. if we don't have sufficient 730 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
729 * caps, we'll get -EAGAIN from generic_file_aio_write, or a 731 ceph_cap_string(got));
730 * short write if we only get caps for some pages.
731 */
732 if (!(iocb->ki_filp->f_flags & O_DIRECT) &&
733 !(inode->i_sb->s_flags & MS_SYNCHRONOUS) &&
734 !(fi->flags & CEPH_F_SYNC)) {
735 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
736 if (ret >= 0)
737 written = ret;
738 732
733 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
734 (iocb->ki_filp->f_flags & O_DIRECT) ||
735 (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
736 (fi->flags & CEPH_F_SYNC)) {
737 ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
738 &iocb->ki_pos);
739 } else {
740 /*
741 * buffered write; drop Fw early to avoid slow
742 * revocation if we get stuck on balance_dirty_pages
743 */
744 int dirty;
745
746 spin_lock(&inode->i_lock);
747 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
748 spin_unlock(&inode->i_lock);
749 ceph_put_cap_refs(ci, got);
750
751 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
739 if ((ret >= 0 || ret == -EIOCBQUEUED) && 752 if ((ret >= 0 || ret == -EIOCBQUEUED) &&
740 ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) 753 ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
741 || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { 754 || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
742 err = vfs_fsync_range(file, pos, pos + written - 1, 1); 755 err = vfs_fsync_range(file, pos, pos + ret - 1, 1);
743 if (err < 0) 756 if (err < 0)
744 ret = err; 757 ret = err;
745 } 758 }
746 if ((ret < 0 && ret != -EAGAIN) || pos + written >= endoff)
747 goto out;
748 }
749 759
750 dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", 760 if (dirty)
751 inode, ceph_vinop(inode), pos + written, 761 __mark_inode_dirty(inode, dirty);
752 (unsigned)iov->iov_len - written, inode->i_size);
753 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, 0, &got, endoff);
754 if (ret < 0)
755 goto out; 762 goto out;
763 }
756 764
757 dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
758 inode, ceph_vinop(inode), pos + written,
759 (unsigned)iov->iov_len - written, ceph_cap_string(got));
760 ret = ceph_sync_write(file, iov->iov_base + written,
761 iov->iov_len - written, &iocb->ki_pos);
762 if (ret >= 0) { 765 if (ret >= 0) {
763 int dirty; 766 int dirty;
764 spin_lock(&ci->i_ceph_lock); 767 spin_lock(&inode->i_lock);
765 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 768 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
766 spin_unlock(&ci->i_ceph_lock); 769 spin_unlock(&inode->i_lock);
767 if (dirty) 770 if (dirty)
768 __mark_inode_dirty(inode, dirty); 771 __mark_inode_dirty(inode, dirty);
769 } 772 }
773
774out_put:
770 dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", 775 dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
771 inode, ceph_vinop(inode), pos + written, 776 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
772 (unsigned)iov->iov_len - written, ceph_cap_string(got)); 777 ceph_cap_string(got));
773 ceph_put_cap_refs(ci, got); 778 ceph_put_cap_refs(ci, got);
779
774out: 780out:
775 if (ret == -EOLDSNAPC) { 781 if (ret == -EOLDSNAPC) {
776 dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", 782 dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
@@ -784,15 +790,14 @@ out:
784/* 790/*
785 * llseek. be sure to verify file size on SEEK_END. 791 * llseek. be sure to verify file size on SEEK_END.
786 */ 792 */
787static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) 793static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
788{ 794{
789 struct inode *inode = file->f_mapping->host; 795 struct inode *inode = file->f_mapping->host;
790 int ret; 796 int ret;
791 797
792 mutex_lock(&inode->i_mutex); 798 mutex_lock(&inode->i_mutex);
793 __ceph_do_pending_vmtruncate(inode); 799 __ceph_do_pending_vmtruncate(inode);
794 800 if (origin != SEEK_CUR || origin != SEEK_SET) {
795 if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
796 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); 801 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
797 if (ret < 0) { 802 if (ret < 0) {
798 offset = ret; 803 offset = ret;
@@ -800,7 +805,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
800 } 805 }
801 } 806 }
802 807
803 switch (whence) { 808 switch (origin) {
804 case SEEK_END: 809 case SEEK_END:
805 offset += inode->i_size; 810 offset += inode->i_size;
806 break; 811 break;