aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ceph/addr.c12
-rw-r--r--fs/ceph/auth_x.c15
-rw-r--r--fs/ceph/caps.c32
-rw-r--r--fs/ceph/debugfs.c4
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/ceph/inode.c5
-rw-r--r--fs/ceph/locks.c14
-rw-r--r--fs/ceph/mds_client.c101
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/ceph/osd_client.c2
-rw-r--r--fs/ceph/snap.c89
-rw-r--r--fs/ceph/super.h11
-rw-r--r--fs/ceph/xattr.c1
-rw-r--r--mm/page-writeback.c1
14 files changed, 185 insertions, 107 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5598a0d02295..4cfce1ee31fa 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
87 87
88 /* dirty the head */ 88 /* dirty the head */
89 spin_lock(&inode->i_lock); 89 spin_lock(&inode->i_lock);
90 if (ci->i_wrbuffer_ref_head == 0) 90 if (ci->i_head_snapc == NULL)
91 ci->i_head_snapc = ceph_get_snap_context(snapc); 91 ci->i_head_snapc = ceph_get_snap_context(snapc);
92 ++ci->i_wrbuffer_ref_head; 92 ++ci->i_wrbuffer_ref_head;
93 if (ci->i_wrbuffer_ref == 0) 93 if (ci->i_wrbuffer_ref == 0)
@@ -105,13 +105,7 @@ static int ceph_set_page_dirty(struct page *page)
105 spin_lock_irq(&mapping->tree_lock); 105 spin_lock_irq(&mapping->tree_lock);
106 if (page->mapping) { /* Race with truncate? */ 106 if (page->mapping) { /* Race with truncate? */
107 WARN_ON_ONCE(!PageUptodate(page)); 107 WARN_ON_ONCE(!PageUptodate(page));
108 108 account_page_dirtied(page, page->mapping);
109 if (mapping_cap_account_dirty(mapping)) {
110 __inc_zone_page_state(page, NR_FILE_DIRTY);
111 __inc_bdi_stat(mapping->backing_dev_info,
112 BDI_RECLAIMABLE);
113 task_io_account_write(PAGE_CACHE_SIZE);
114 }
115 radix_tree_tag_set(&mapping->page_tree, 109 radix_tree_tag_set(&mapping->page_tree,
116 page_index(page), PAGECACHE_TAG_DIRTY); 110 page_index(page), PAGECACHE_TAG_DIRTY);
117 111
@@ -352,7 +346,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
352 break; 346 break;
353 } 347 }
354 } 348 }
355 if (!snapc && ci->i_head_snapc) { 349 if (!snapc && ci->i_wrbuffer_ref_head) {
356 snapc = ceph_get_snap_context(ci->i_head_snapc); 350 snapc = ceph_get_snap_context(ci->i_head_snapc);
357 dout(" head snapc %p has %d dirty pages\n", 351 dout(" head snapc %p has %d dirty pages\n",
358 snapc, ci->i_wrbuffer_ref_head); 352 snapc, ci->i_wrbuffer_ref_head);
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 582e0b2caf8a..a2d002cbdec2 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -376,7 +376,7 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
376 376
377 th = get_ticket_handler(ac, service); 377 th = get_ticket_handler(ac, service);
378 378
379 if (!th) { 379 if (IS_ERR(th)) {
380 *pneed |= service; 380 *pneed |= service;
381 continue; 381 continue;
382 } 382 }
@@ -399,6 +399,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
399 struct ceph_x_ticket_handler *th = 399 struct ceph_x_ticket_handler *th =
400 get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); 400 get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
401 401
402 if (IS_ERR(th))
403 return PTR_ERR(th);
404
402 ceph_x_validate_tickets(ac, &need); 405 ceph_x_validate_tickets(ac, &need);
403 406
404 dout("build_request want %x have %x need %x\n", 407 dout("build_request want %x have %x need %x\n",
@@ -450,7 +453,6 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
450 return -ERANGE; 453 return -ERANGE;
451 head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY); 454 head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
452 455
453 BUG_ON(!th);
454 ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); 456 ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
455 if (ret) 457 if (ret)
456 return ret; 458 return ret;
@@ -505,7 +507,8 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
505 507
506 case CEPHX_GET_PRINCIPAL_SESSION_KEY: 508 case CEPHX_GET_PRINCIPAL_SESSION_KEY:
507 th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); 509 th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
508 BUG_ON(!th); 510 if (IS_ERR(th))
511 return PTR_ERR(th);
509 ret = ceph_x_proc_ticket_reply(ac, &th->session_key, 512 ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
510 buf + sizeof(*head), end); 513 buf + sizeof(*head), end);
511 break; 514 break;
@@ -563,8 +566,8 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
563 void *end = p + sizeof(au->reply_buf); 566 void *end = p + sizeof(au->reply_buf);
564 567
565 th = get_ticket_handler(ac, au->service); 568 th = get_ticket_handler(ac, au->service);
566 if (!th) 569 if (IS_ERR(th))
567 return -EIO; /* hrm! */ 570 return PTR_ERR(th);
568 ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); 571 ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
569 if (ret < 0) 572 if (ret < 0)
570 return ret; 573 return ret;
@@ -626,7 +629,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
626 struct ceph_x_ticket_handler *th; 629 struct ceph_x_ticket_handler *th;
627 630
628 th = get_ticket_handler(ac, peer_type); 631 th = get_ticket_handler(ac, peer_type);
629 if (th && !IS_ERR(th)) 632 if (!IS_ERR(th))
630 remove_ticket_handler(ac, th); 633 remove_ticket_handler(ac, th);
631} 634}
632 635
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7bf182b03973..a2069b6680ae 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1082,6 +1082,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1082 gid_t gid; 1082 gid_t gid;
1083 struct ceph_mds_session *session; 1083 struct ceph_mds_session *session;
1084 u64 xattr_version = 0; 1084 u64 xattr_version = 0;
1085 struct ceph_buffer *xattr_blob = NULL;
1085 int delayed = 0; 1086 int delayed = 0;
1086 u64 flush_tid = 0; 1087 u64 flush_tid = 0;
1087 int i; 1088 int i;
@@ -1142,6 +1143,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1142 for (i = 0; i < CEPH_CAP_BITS; i++) 1143 for (i = 0; i < CEPH_CAP_BITS; i++)
1143 if (flushing & (1 << i)) 1144 if (flushing & (1 << i))
1144 ci->i_cap_flush_tid[i] = flush_tid; 1145 ci->i_cap_flush_tid[i] = flush_tid;
1146
1147 follows = ci->i_head_snapc->seq;
1148 } else {
1149 follows = 0;
1145 } 1150 }
1146 1151
1147 keep = cap->implemented; 1152 keep = cap->implemented;
@@ -1155,14 +1160,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1155 mtime = inode->i_mtime; 1160 mtime = inode->i_mtime;
1156 atime = inode->i_atime; 1161 atime = inode->i_atime;
1157 time_warp_seq = ci->i_time_warp_seq; 1162 time_warp_seq = ci->i_time_warp_seq;
1158 follows = ci->i_snap_realm->cached_context->seq;
1159 uid = inode->i_uid; 1163 uid = inode->i_uid;
1160 gid = inode->i_gid; 1164 gid = inode->i_gid;
1161 mode = inode->i_mode; 1165 mode = inode->i_mode;
1162 1166
1163 if (dropping & CEPH_CAP_XATTR_EXCL) { 1167 if (flushing & CEPH_CAP_XATTR_EXCL) {
1164 __ceph_build_xattrs_blob(ci); 1168 __ceph_build_xattrs_blob(ci);
1165 xattr_version = ci->i_xattrs.version + 1; 1169 xattr_blob = ci->i_xattrs.blob;
1170 xattr_version = ci->i_xattrs.version;
1166 } 1171 }
1167 1172
1168 spin_unlock(&inode->i_lock); 1173 spin_unlock(&inode->i_lock);
@@ -1170,9 +1175,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1170 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, 1175 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
1171 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, 1176 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
1172 size, max_size, &mtime, &atime, time_warp_seq, 1177 size, max_size, &mtime, &atime, time_warp_seq,
1173 uid, gid, mode, 1178 uid, gid, mode, xattr_version, xattr_blob,
1174 xattr_version,
1175 (flushing & CEPH_CAP_XATTR_EXCL) ? ci->i_xattrs.blob : NULL,
1176 follows); 1179 follows);
1177 if (ret < 0) { 1180 if (ret < 0) {
1178 dout("error sending cap msg, must requeue %p\n", inode); 1181 dout("error sending cap msg, must requeue %p\n", inode);
@@ -1282,7 +1285,7 @@ retry:
1282 &capsnap->mtime, &capsnap->atime, 1285 &capsnap->mtime, &capsnap->atime,
1283 capsnap->time_warp_seq, 1286 capsnap->time_warp_seq,
1284 capsnap->uid, capsnap->gid, capsnap->mode, 1287 capsnap->uid, capsnap->gid, capsnap->mode,
1285 0, NULL, 1288 capsnap->xattr_version, capsnap->xattr_blob,
1286 capsnap->follows); 1289 capsnap->follows);
1287 1290
1288 next_follows = capsnap->follows + 1; 1291 next_follows = capsnap->follows + 1;
@@ -1332,7 +1335,11 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1332 ceph_cap_string(was | mask)); 1335 ceph_cap_string(was | mask));
1333 ci->i_dirty_caps |= mask; 1336 ci->i_dirty_caps |= mask;
1334 if (was == 0) { 1337 if (was == 0) {
1335 dout(" inode %p now dirty\n", &ci->vfs_inode); 1338 if (!ci->i_head_snapc)
1339 ci->i_head_snapc = ceph_get_snap_context(
1340 ci->i_snap_realm->cached_context);
1341 dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode,
1342 ci->i_head_snapc);
1336 BUG_ON(!list_empty(&ci->i_dirty_item)); 1343 BUG_ON(!list_empty(&ci->i_dirty_item));
1337 spin_lock(&mdsc->cap_dirty_lock); 1344 spin_lock(&mdsc->cap_dirty_lock);
1338 list_add(&ci->i_dirty_item, &mdsc->cap_dirty); 1345 list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -2190,7 +2197,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2190 2197
2191 if (ci->i_head_snapc == snapc) { 2198 if (ci->i_head_snapc == snapc) {
2192 ci->i_wrbuffer_ref_head -= nr; 2199 ci->i_wrbuffer_ref_head -= nr;
2193 if (!ci->i_wrbuffer_ref_head) { 2200 if (ci->i_wrbuffer_ref_head == 0 &&
2201 ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) {
2202 BUG_ON(!ci->i_head_snapc);
2194 ceph_put_snap_context(ci->i_head_snapc); 2203 ceph_put_snap_context(ci->i_head_snapc);
2195 ci->i_head_snapc = NULL; 2204 ci->i_head_snapc = NULL;
2196 } 2205 }
@@ -2483,6 +2492,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2483 dout(" inode %p now clean\n", inode); 2492 dout(" inode %p now clean\n", inode);
2484 BUG_ON(!list_empty(&ci->i_dirty_item)); 2493 BUG_ON(!list_empty(&ci->i_dirty_item));
2485 drop = 1; 2494 drop = 1;
2495 if (ci->i_wrbuffer_ref_head == 0) {
2496 BUG_ON(!ci->i_head_snapc);
2497 ceph_put_snap_context(ci->i_head_snapc);
2498 ci->i_head_snapc = NULL;
2499 }
2486 } else { 2500 } else {
2487 BUG_ON(list_empty(&ci->i_dirty_item)); 2501 BUG_ON(list_empty(&ci->i_dirty_item));
2488 } 2502 }
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 360c4f22718d..6fd8b20a8611 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -171,6 +171,8 @@ static int mdsc_show(struct seq_file *s, void *p)
171 } else if (req->r_dentry) { 171 } else if (req->r_dentry) {
172 path = ceph_mdsc_build_path(req->r_dentry, &pathlen, 172 path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
173 &pathbase, 0); 173 &pathbase, 0);
174 if (IS_ERR(path))
175 path = NULL;
174 spin_lock(&req->r_dentry->d_lock); 176 spin_lock(&req->r_dentry->d_lock);
175 seq_printf(s, " #%llx/%.*s (%s)", 177 seq_printf(s, " #%llx/%.*s (%s)",
176 ceph_ino(req->r_dentry->d_parent->d_inode), 178 ceph_ino(req->r_dentry->d_parent->d_inode),
@@ -187,6 +189,8 @@ static int mdsc_show(struct seq_file *s, void *p)
187 if (req->r_old_dentry) { 189 if (req->r_old_dentry) {
188 path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, 190 path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen,
189 &pathbase, 0); 191 &pathbase, 0);
192 if (IS_ERR(path))
193 path = NULL;
190 spin_lock(&req->r_old_dentry->d_lock); 194 spin_lock(&req->r_old_dentry->d_lock);
191 seq_printf(s, " #%llx/%.*s (%s)", 195 seq_printf(s, " #%llx/%.*s (%s)",
192 ceph_ino(req->r_old_dentry->d_parent->d_inode), 196 ceph_ino(req->r_old_dentry->d_parent->d_inode),
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 67bbb41d5526..6e4f43ff23ec 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -46,7 +46,7 @@ int ceph_init_dentry(struct dentry *dentry)
46 else 46 else
47 dentry->d_op = &ceph_snap_dentry_ops; 47 dentry->d_op = &ceph_snap_dentry_ops;
48 48
49 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS); 49 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
50 if (!di) 50 if (!di)
51 return -ENOMEM; /* oh well */ 51 return -ENOMEM; /* oh well */
52 52
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5d893d31e399..e7cca414da03 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -677,6 +677,7 @@ static int fill_inode(struct inode *inode,
677 if (ci->i_files == 0 && ci->i_subdirs == 0 && 677 if (ci->i_files == 0 && ci->i_subdirs == 0 &&
678 ceph_snap(inode) == CEPH_NOSNAP && 678 ceph_snap(inode) == CEPH_NOSNAP &&
679 (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && 679 (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
680 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
680 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 681 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
681 dout(" marking %p complete (empty)\n", inode); 682 dout(" marking %p complete (empty)\n", inode);
682 ci->i_ceph_flags |= CEPH_I_COMPLETE; 683 ci->i_ceph_flags |= CEPH_I_COMPLETE;
@@ -1229,11 +1230,11 @@ retry_lookup:
1229 in = dn->d_inode; 1230 in = dn->d_inode;
1230 } else { 1231 } else {
1231 in = ceph_get_inode(parent->d_sb, vino); 1232 in = ceph_get_inode(parent->d_sb, vino);
1232 if (in == NULL) { 1233 if (IS_ERR(in)) {
1233 dout("new_inode badness\n"); 1234 dout("new_inode badness\n");
1234 d_delete(dn); 1235 d_delete(dn);
1235 dput(dn); 1236 dput(dn);
1236 err = -ENOMEM; 1237 err = PTR_ERR(in);
1237 goto out; 1238 goto out;
1238 } 1239 }
1239 dn = splice_dentry(dn, in, NULL); 1240 dn = splice_dentry(dn, in, NULL);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index ae85af06454f..ff4e753aae92 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -82,7 +82,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
82 length = fl->fl_end - fl->fl_start + 1; 82 length = fl->fl_end - fl->fl_start + 1;
83 83
84 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 84 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
85 (u64)fl->fl_pid, (u64)fl->fl_nspid, 85 (u64)fl->fl_pid,
86 (u64)(unsigned long)fl->fl_nspid,
86 lock_cmd, fl->fl_start, 87 lock_cmd, fl->fl_start,
87 length, wait); 88 length, wait);
88 if (!err) { 89 if (!err) {
@@ -92,7 +93,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
92 /* undo! This should only happen if the kernel detects 93 /* undo! This should only happen if the kernel detects
93 * local deadlock. */ 94 * local deadlock. */
94 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 95 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
95 (u64)fl->fl_pid, (u64)fl->fl_nspid, 96 (u64)fl->fl_pid,
97 (u64)(unsigned long)fl->fl_nspid,
96 CEPH_LOCK_UNLOCK, fl->fl_start, 98 CEPH_LOCK_UNLOCK, fl->fl_start,
97 length, 0); 99 length, 0);
98 dout("got %d on posix_lock_file, undid lock", err); 100 dout("got %d on posix_lock_file, undid lock", err);
@@ -132,7 +134,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
132 length = fl->fl_end - fl->fl_start + 1; 134 length = fl->fl_end - fl->fl_start + 1;
133 135
134 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, 136 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
135 file, (u64)fl->fl_pid, (u64)fl->fl_nspid, 137 file, (u64)fl->fl_pid,
138 (u64)(unsigned long)fl->fl_nspid,
136 lock_cmd, fl->fl_start, 139 lock_cmd, fl->fl_start,
137 length, wait); 140 length, wait);
138 if (!err) { 141 if (!err) {
@@ -141,7 +144,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
141 ceph_lock_message(CEPH_LOCK_FLOCK, 144 ceph_lock_message(CEPH_LOCK_FLOCK,
142 CEPH_MDS_OP_SETFILELOCK, 145 CEPH_MDS_OP_SETFILELOCK,
143 file, (u64)fl->fl_pid, 146 file, (u64)fl->fl_pid,
144 (u64)fl->fl_nspid, 147 (u64)(unsigned long)fl->fl_nspid,
145 CEPH_LOCK_UNLOCK, fl->fl_start, 148 CEPH_LOCK_UNLOCK, fl->fl_start,
146 length, 0); 149 length, 0);
147 dout("got %d on flock_lock_file_wait, undid lock", err); 150 dout("got %d on flock_lock_file_wait, undid lock", err);
@@ -235,7 +238,8 @@ int lock_to_ceph_filelock(struct file_lock *lock,
235 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); 238 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
236 cephlock->client = cpu_to_le64(0); 239 cephlock->client = cpu_to_le64(0);
237 cephlock->pid = cpu_to_le64(lock->fl_pid); 240 cephlock->pid = cpu_to_le64(lock->fl_pid);
238 cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid); 241 cephlock->pid_namespace =
242 cpu_to_le64((u64)(unsigned long)lock->fl_nspid);
239 243
240 switch (lock->fl_type) { 244 switch (lock->fl_type) {
241 case F_RDLCK: 245 case F_RDLCK:
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a75ddbf9fe37..f091b1351786 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -560,6 +560,13 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
560 * 560 *
561 * Called under mdsc->mutex. 561 * Called under mdsc->mutex.
562 */ 562 */
563struct dentry *get_nonsnap_parent(struct dentry *dentry)
564{
565 while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
566 dentry = dentry->d_parent;
567 return dentry;
568}
569
563static int __choose_mds(struct ceph_mds_client *mdsc, 570static int __choose_mds(struct ceph_mds_client *mdsc,
564 struct ceph_mds_request *req) 571 struct ceph_mds_request *req)
565{ 572{
@@ -590,14 +597,29 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
590 if (req->r_inode) { 597 if (req->r_inode) {
591 inode = req->r_inode; 598 inode = req->r_inode;
592 } else if (req->r_dentry) { 599 } else if (req->r_dentry) {
593 if (req->r_dentry->d_inode) { 600 struct inode *dir = req->r_dentry->d_parent->d_inode;
601
602 if (dir->i_sb != mdsc->client->sb) {
603 /* not this fs! */
604 inode = req->r_dentry->d_inode;
605 } else if (ceph_snap(dir) != CEPH_NOSNAP) {
606 /* direct snapped/virtual snapdir requests
607 * based on parent dir inode */
608 struct dentry *dn =
609 get_nonsnap_parent(req->r_dentry->d_parent);
610 inode = dn->d_inode;
611 dout("__choose_mds using nonsnap parent %p\n", inode);
612 } else if (req->r_dentry->d_inode) {
613 /* dentry target */
594 inode = req->r_dentry->d_inode; 614 inode = req->r_dentry->d_inode;
595 } else { 615 } else {
596 inode = req->r_dentry->d_parent->d_inode; 616 /* dir + name */
617 inode = dir;
597 hash = req->r_dentry->d_name.hash; 618 hash = req->r_dentry->d_name.hash;
598 is_hash = true; 619 is_hash = true;
599 } 620 }
600 } 621 }
622
601 dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash, 623 dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
602 (int)hash, mode); 624 (int)hash, mode);
603 if (!inode) 625 if (!inode)
@@ -2208,7 +2230,7 @@ static void handle_session(struct ceph_mds_session *session,
2208 pr_info("mds%d reconnect denied\n", session->s_mds); 2230 pr_info("mds%d reconnect denied\n", session->s_mds);
2209 remove_session_caps(session); 2231 remove_session_caps(session);
2210 wake = 1; /* for good measure */ 2232 wake = 1; /* for good measure */
2211 complete_all(&mdsc->session_close_waiters); 2233 wake_up_all(&mdsc->session_close_wq);
2212 kick_requests(mdsc, mds); 2234 kick_requests(mdsc, mds);
2213 break; 2235 break;
2214 2236
@@ -2302,7 +2324,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2302 path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0); 2324 path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0);
2303 if (IS_ERR(path)) { 2325 if (IS_ERR(path)) {
2304 err = PTR_ERR(path); 2326 err = PTR_ERR(path);
2305 BUG_ON(err); 2327 goto out_dput;
2306 } 2328 }
2307 } else { 2329 } else {
2308 path = NULL; 2330 path = NULL;
@@ -2310,7 +2332,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2310 } 2332 }
2311 err = ceph_pagelist_encode_string(pagelist, path, pathlen); 2333 err = ceph_pagelist_encode_string(pagelist, path, pathlen);
2312 if (err) 2334 if (err)
2313 goto out; 2335 goto out_free;
2314 2336
2315 spin_lock(&inode->i_lock); 2337 spin_lock(&inode->i_lock);
2316 cap->seq = 0; /* reset cap seq */ 2338 cap->seq = 0; /* reset cap seq */
@@ -2354,8 +2376,9 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2354 unlock_kernel(); 2376 unlock_kernel();
2355 } 2377 }
2356 2378
2357out: 2379out_free:
2358 kfree(path); 2380 kfree(path);
2381out_dput:
2359 dput(dentry); 2382 dput(dentry);
2360 return err; 2383 return err;
2361} 2384}
@@ -2876,7 +2899,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
2876 return -ENOMEM; 2899 return -ENOMEM;
2877 2900
2878 init_completion(&mdsc->safe_umount_waiters); 2901 init_completion(&mdsc->safe_umount_waiters);
2879 init_completion(&mdsc->session_close_waiters); 2902 init_waitqueue_head(&mdsc->session_close_wq);
2880 INIT_LIST_HEAD(&mdsc->waiting_for_map); 2903 INIT_LIST_HEAD(&mdsc->waiting_for_map);
2881 mdsc->sessions = NULL; 2904 mdsc->sessions = NULL;
2882 mdsc->max_sessions = 0; 2905 mdsc->max_sessions = 0;
@@ -3021,6 +3044,23 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
3021 wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); 3044 wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush));
3022} 3045}
3023 3046
3047/*
3048 * true if all sessions are closed, or we force unmount
3049 */
3050bool done_closing_sessions(struct ceph_mds_client *mdsc)
3051{
3052 int i, n = 0;
3053
3054 if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
3055 return true;
3056
3057 mutex_lock(&mdsc->mutex);
3058 for (i = 0; i < mdsc->max_sessions; i++)
3059 if (mdsc->sessions[i])
3060 n++;
3061 mutex_unlock(&mdsc->mutex);
3062 return n == 0;
3063}
3024 3064
3025/* 3065/*
3026 * called after sb is ro. 3066 * called after sb is ro.
@@ -3029,45 +3069,32 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
3029{ 3069{
3030 struct ceph_mds_session *session; 3070 struct ceph_mds_session *session;
3031 int i; 3071 int i;
3032 int n;
3033 struct ceph_client *client = mdsc->client; 3072 struct ceph_client *client = mdsc->client;
3034 unsigned long started, timeout = client->mount_args->mount_timeout * HZ; 3073 unsigned long timeout = client->mount_args->mount_timeout * HZ;
3035 3074
3036 dout("close_sessions\n"); 3075 dout("close_sessions\n");
3037 3076
3038 mutex_lock(&mdsc->mutex);
3039
3040 /* close sessions */ 3077 /* close sessions */
3041 started = jiffies; 3078 mutex_lock(&mdsc->mutex);
3042 while (time_before(jiffies, started + timeout)) { 3079 for (i = 0; i < mdsc->max_sessions; i++) {
3043 dout("closing sessions\n"); 3080 session = __ceph_lookup_mds_session(mdsc, i);
3044 n = 0; 3081 if (!session)
3045 for (i = 0; i < mdsc->max_sessions; i++) { 3082 continue;
3046 session = __ceph_lookup_mds_session(mdsc, i);
3047 if (!session)
3048 continue;
3049 mutex_unlock(&mdsc->mutex);
3050 mutex_lock(&session->s_mutex);
3051 __close_session(mdsc, session);
3052 mutex_unlock(&session->s_mutex);
3053 ceph_put_mds_session(session);
3054 mutex_lock(&mdsc->mutex);
3055 n++;
3056 }
3057 if (n == 0)
3058 break;
3059
3060 if (client->mount_state == CEPH_MOUNT_SHUTDOWN)
3061 break;
3062
3063 dout("waiting for sessions to close\n");
3064 mutex_unlock(&mdsc->mutex); 3083 mutex_unlock(&mdsc->mutex);
3065 wait_for_completion_timeout(&mdsc->session_close_waiters, 3084 mutex_lock(&session->s_mutex);
3066 timeout); 3085 __close_session(mdsc, session);
3086 mutex_unlock(&session->s_mutex);
3087 ceph_put_mds_session(session);
3067 mutex_lock(&mdsc->mutex); 3088 mutex_lock(&mdsc->mutex);
3068 } 3089 }
3090 mutex_unlock(&mdsc->mutex);
3091
3092 dout("waiting for sessions to close\n");
3093 wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
3094 timeout);
3069 3095
3070 /* tear down remaining sessions */ 3096 /* tear down remaining sessions */
3097 mutex_lock(&mdsc->mutex);
3071 for (i = 0; i < mdsc->max_sessions; i++) { 3098 for (i = 0; i < mdsc->max_sessions; i++) {
3072 if (mdsc->sessions[i]) { 3099 if (mdsc->sessions[i]) {
3073 session = get_session(mdsc->sessions[i]); 3100 session = get_session(mdsc->sessions[i]);
@@ -3080,9 +3107,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
3080 mutex_lock(&mdsc->mutex); 3107 mutex_lock(&mdsc->mutex);
3081 } 3108 }
3082 } 3109 }
3083
3084 WARN_ON(!list_empty(&mdsc->cap_delay_list)); 3110 WARN_ON(!list_empty(&mdsc->cap_delay_list));
3085
3086 mutex_unlock(&mdsc->mutex); 3111 mutex_unlock(&mdsc->mutex);
3087 3112
3088 ceph_cleanup_empty_realms(mdsc); 3113 ceph_cleanup_empty_realms(mdsc);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ab7e89f5e344..c98267ce6d2a 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -234,7 +234,8 @@ struct ceph_mds_client {
234 struct mutex mutex; /* all nested structures */ 234 struct mutex mutex; /* all nested structures */
235 235
236 struct ceph_mdsmap *mdsmap; 236 struct ceph_mdsmap *mdsmap;
237 struct completion safe_umount_waiters, session_close_waiters; 237 struct completion safe_umount_waiters;
238 wait_queue_head_t session_close_wq;
238 struct list_head waiting_for_map; 239 struct list_head waiting_for_map;
239 240
240 struct ceph_mds_session **sessions; /* NULL for mds if no session */ 241 struct ceph_mds_session **sessions; /* NULL for mds if no session */
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index bed6391e52c7..dfced1dacbcd 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -661,7 +661,7 @@ static int __send_request(struct ceph_osd_client *osdc,
661 reqhead->reassert_version = req->r_reassert_version; 661 reqhead->reassert_version = req->r_reassert_version;
662 662
663 req->r_stamp = jiffies; 663 req->r_stamp = jiffies;
664 list_move_tail(&osdc->req_lru, &req->r_req_lru_item); 664 list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
665 665
666 ceph_msg_get(req->r_request); /* send consumes a ref */ 666 ceph_msg_get(req->r_request); /* send consumes a ref */
667 ceph_con_send(&req->r_osd->o_con, req->r_request); 667 ceph_con_send(&req->r_osd->o_con, req->r_request);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index c0b26b6badba..4868b9dcac5a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -435,7 +435,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
435{ 435{
436 struct inode *inode = &ci->vfs_inode; 436 struct inode *inode = &ci->vfs_inode;
437 struct ceph_cap_snap *capsnap; 437 struct ceph_cap_snap *capsnap;
438 int used; 438 int used, dirty;
439 439
440 capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); 440 capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS);
441 if (!capsnap) { 441 if (!capsnap) {
@@ -445,6 +445,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
445 445
446 spin_lock(&inode->i_lock); 446 spin_lock(&inode->i_lock);
447 used = __ceph_caps_used(ci); 447 used = __ceph_caps_used(ci);
448 dirty = __ceph_caps_dirty(ci);
448 if (__ceph_have_pending_cap_snap(ci)) { 449 if (__ceph_have_pending_cap_snap(ci)) {
449 /* there is no point in queuing multiple "pending" cap_snaps, 450 /* there is no point in queuing multiple "pending" cap_snaps,
450 as no new writes are allowed to start when pending, so any 451 as no new writes are allowed to start when pending, so any
@@ -452,11 +453,15 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
452 cap_snap. lucky us. */ 453 cap_snap. lucky us. */
453 dout("queue_cap_snap %p already pending\n", inode); 454 dout("queue_cap_snap %p already pending\n", inode);
454 kfree(capsnap); 455 kfree(capsnap);
455 } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { 456 } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) ||
457 (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
458 CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) {
456 struct ceph_snap_context *snapc = ci->i_head_snapc; 459 struct ceph_snap_context *snapc = ci->i_head_snapc;
457 460
461 dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode,
462 capsnap, snapc);
458 igrab(inode); 463 igrab(inode);
459 464
460 atomic_set(&capsnap->nref, 1); 465 atomic_set(&capsnap->nref, 1);
461 capsnap->ci = ci; 466 capsnap->ci = ci;
462 INIT_LIST_HEAD(&capsnap->ci_item); 467 INIT_LIST_HEAD(&capsnap->ci_item);
@@ -464,15 +469,21 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
464 469
465 capsnap->follows = snapc->seq - 1; 470 capsnap->follows = snapc->seq - 1;
466 capsnap->issued = __ceph_caps_issued(ci, NULL); 471 capsnap->issued = __ceph_caps_issued(ci, NULL);
467 capsnap->dirty = __ceph_caps_dirty(ci); 472 capsnap->dirty = dirty;
468 473
469 capsnap->mode = inode->i_mode; 474 capsnap->mode = inode->i_mode;
470 capsnap->uid = inode->i_uid; 475 capsnap->uid = inode->i_uid;
471 capsnap->gid = inode->i_gid; 476 capsnap->gid = inode->i_gid;
472 477
473 /* fixme? */ 478 if (dirty & CEPH_CAP_XATTR_EXCL) {
474 capsnap->xattr_blob = NULL; 479 __ceph_build_xattrs_blob(ci);
475 capsnap->xattr_len = 0; 480 capsnap->xattr_blob =
481 ceph_buffer_get(ci->i_xattrs.blob);
482 capsnap->xattr_version = ci->i_xattrs.version;
483 } else {
484 capsnap->xattr_blob = NULL;
485 capsnap->xattr_version = 0;
486 }
476 487
477 /* dirty page count moved from _head to this cap_snap; 488 /* dirty page count moved from _head to this cap_snap;
478 all subsequent writes page dirties occur _after_ this 489 all subsequent writes page dirties occur _after_ this
@@ -480,7 +491,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
480 capsnap->dirty_pages = ci->i_wrbuffer_ref_head; 491 capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
481 ci->i_wrbuffer_ref_head = 0; 492 ci->i_wrbuffer_ref_head = 0;
482 capsnap->context = snapc; 493 capsnap->context = snapc;
483 ci->i_head_snapc = NULL; 494 ci->i_head_snapc =
495 ceph_get_snap_context(ci->i_snap_realm->cached_context);
496 dout(" new snapc is %p\n", ci->i_head_snapc);
484 list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); 497 list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
485 498
486 if (used & CEPH_CAP_FILE_WR) { 499 if (used & CEPH_CAP_FILE_WR) {
@@ -539,6 +552,41 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
539 return 1; /* caller may want to ceph_flush_snaps */ 552 return 1; /* caller may want to ceph_flush_snaps */
540} 553}
541 554
555/*
556 * Queue cap_snaps for snap writeback for this realm and its children.
557 * Called under snap_rwsem, so realm topology won't change.
558 */
559static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
560{
561 struct ceph_inode_info *ci;
562 struct inode *lastinode = NULL;
563 struct ceph_snap_realm *child;
564
565 dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino);
566
567 spin_lock(&realm->inodes_with_caps_lock);
568 list_for_each_entry(ci, &realm->inodes_with_caps,
569 i_snap_realm_item) {
570 struct inode *inode = igrab(&ci->vfs_inode);
571 if (!inode)
572 continue;
573 spin_unlock(&realm->inodes_with_caps_lock);
574 if (lastinode)
575 iput(lastinode);
576 lastinode = inode;
577 ceph_queue_cap_snap(ci);
578 spin_lock(&realm->inodes_with_caps_lock);
579 }
580 spin_unlock(&realm->inodes_with_caps_lock);
581 if (lastinode)
582 iput(lastinode);
583
584 dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino);
585 list_for_each_entry(child, &realm->children, child_item)
586 queue_realm_cap_snaps(child);
587
588 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
589}
542 590
543/* 591/*
544 * Parse and apply a snapblob "snap trace" from the MDS. This specifies 592 * Parse and apply a snapblob "snap trace" from the MDS. This specifies
@@ -589,29 +637,8 @@ more:
589 * 637 *
590 * ...unless it's a snap deletion! 638 * ...unless it's a snap deletion!
591 */ 639 */
592 if (!deletion) { 640 if (!deletion)
593 struct ceph_inode_info *ci; 641 queue_realm_cap_snaps(realm);
594 struct inode *lastinode = NULL;
595
596 spin_lock(&realm->inodes_with_caps_lock);
597 list_for_each_entry(ci, &realm->inodes_with_caps,
598 i_snap_realm_item) {
599 struct inode *inode = igrab(&ci->vfs_inode);
600 if (!inode)
601 continue;
602 spin_unlock(&realm->inodes_with_caps_lock);
603 if (lastinode)
604 iput(lastinode);
605 lastinode = inode;
606 ceph_queue_cap_snap(ci);
607 spin_lock(&realm->inodes_with_caps_lock);
608 }
609 spin_unlock(&realm->inodes_with_caps_lock);
610 if (lastinode)
611 iput(lastinode);
612 dout("update_snap_trace cap_snaps queued\n");
613 }
614
615 } else { 642 } else {
616 dout("update_snap_trace %llx %p seq %lld unchanged\n", 643 dout("update_snap_trace %llx %p seq %lld unchanged\n",
617 realm->ino, realm, realm->seq); 644 realm->ino, realm, realm->seq);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 2482d696f0de..c33897ae5725 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -216,8 +216,7 @@ struct ceph_cap_snap {
216 uid_t uid; 216 uid_t uid;
217 gid_t gid; 217 gid_t gid;
218 218
219 void *xattr_blob; 219 struct ceph_buffer *xattr_blob;
220 int xattr_len;
221 u64 xattr_version; 220 u64 xattr_version;
222 221
223 u64 size; 222 u64 size;
@@ -229,8 +228,11 @@ struct ceph_cap_snap {
229 228
230static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) 229static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
231{ 230{
232 if (atomic_dec_and_test(&capsnap->nref)) 231 if (atomic_dec_and_test(&capsnap->nref)) {
232 if (capsnap->xattr_blob)
233 ceph_buffer_put(capsnap->xattr_blob);
233 kfree(capsnap); 234 kfree(capsnap);
235 }
234} 236}
235 237
236/* 238/*
@@ -342,7 +344,8 @@ struct ceph_inode_info {
342 unsigned i_cap_exporting_issued; 344 unsigned i_cap_exporting_issued;
343 struct ceph_cap_reservation i_cap_migration_resv; 345 struct ceph_cap_reservation i_cap_migration_resv;
344 struct list_head i_cap_snaps; /* snapped state pending flush to mds */ 346 struct list_head i_cap_snaps; /* snapped state pending flush to mds */
345 struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 */ 347 struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
348 dirty|flushing caps */
346 unsigned i_snap_caps; /* cap bits for snapped files */ 349 unsigned i_snap_caps; /* cap bits for snapped files */
347 350
348 int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ 351 int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 097a2654c00f..9578af610b73 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -485,6 +485,7 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
485 ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; 485 ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
486 ci->i_xattrs.prealloc_blob = NULL; 486 ci->i_xattrs.prealloc_blob = NULL;
487 ci->i_xattrs.dirty = false; 487 ci->i_xattrs.dirty = false;
488 ci->i_xattrs.version++;
488 } 489 }
489} 490}
490 491
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index a803f5e33471..e3bccac1f025 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1126,6 +1126,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
1126 task_io_account_write(PAGE_CACHE_SIZE); 1126 task_io_account_write(PAGE_CACHE_SIZE);
1127 } 1127 }
1128} 1128}
1129EXPORT_SYMBOL(account_page_dirtied);
1129 1130
1130/* 1131/*
1131 * For address_spaces which do not use buffers. Just tag the page as dirty in 1132 * For address_spaces which do not use buffers. Just tag the page as dirty in