aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorChengguang Xu <cgxu519@gmx.com>2018-03-12 22:42:44 -0400
committerIlya Dryomov <idryomov@gmail.com>2018-04-02 04:12:49 -0400
commitbb48bd4dc45f9ee1e44d8e9fcb01023e0d0ba80d (patch)
treeb844653552011036270e965137f7af0ea4345291 /fs/ceph
parent47474d0b011bb385719e91a60bb9ff7649d66526 (diff)
ceph: optimize memory usage
In current code, regular file and directory use same struct ceph_file_info to store fs specific data so the struct has to include some fields which are only used for directory (e.g., readdir related info), when having plenty of regular files, it will lead to memory waste. This patch introduces dedicated ceph_dir_file_info cache for readdir related thins. So that regular file does not include those unused fields anymore. Signed-off-by: Chengguang Xu <cgxu519@gmx.com> Reviewed-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/dir.c185
-rw-r--r--fs/ceph/file.c88
-rw-r--r--fs/ceph/super.c8
-rw-r--r--fs/ceph/super.h4
4 files changed, 168 insertions, 117 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 1aa3bfc9ef35..16405e0774a6 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -102,18 +102,18 @@ static int fpos_cmp(loff_t l, loff_t r)
102 * regardless of what dir changes take place on the 102 * regardless of what dir changes take place on the
103 * server. 103 * server.
104 */ 104 */
105static int note_last_dentry(struct ceph_file_info *fi, const char *name, 105static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name,
106 int len, unsigned next_offset) 106 int len, unsigned next_offset)
107{ 107{
108 char *buf = kmalloc(len+1, GFP_KERNEL); 108 char *buf = kmalloc(len+1, GFP_KERNEL);
109 if (!buf) 109 if (!buf)
110 return -ENOMEM; 110 return -ENOMEM;
111 kfree(fi->last_name); 111 kfree(dfi->last_name);
112 fi->last_name = buf; 112 dfi->last_name = buf;
113 memcpy(fi->last_name, name, len); 113 memcpy(dfi->last_name, name, len);
114 fi->last_name[len] = 0; 114 dfi->last_name[len] = 0;
115 fi->next_offset = next_offset; 115 dfi->next_offset = next_offset;
116 dout("note_last_dentry '%s'\n", fi->last_name); 116 dout("note_last_dentry '%s'\n", dfi->last_name);
117 return 0; 117 return 0;
118} 118}
119 119
@@ -175,7 +175,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
175static int __dcache_readdir(struct file *file, struct dir_context *ctx, 175static int __dcache_readdir(struct file *file, struct dir_context *ctx,
176 int shared_gen) 176 int shared_gen)
177{ 177{
178 struct ceph_file_info *fi = file->private_data; 178 struct ceph_dir_file_info *dfi = file->private_data;
179 struct dentry *parent = file->f_path.dentry; 179 struct dentry *parent = file->f_path.dentry;
180 struct inode *dir = d_inode(parent); 180 struct inode *dir = d_inode(parent);
181 struct dentry *dentry, *last = NULL; 181 struct dentry *dentry, *last = NULL;
@@ -222,7 +222,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
222 bool emit_dentry = false; 222 bool emit_dentry = false;
223 dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl); 223 dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl);
224 if (!dentry) { 224 if (!dentry) {
225 fi->flags |= CEPH_F_ATEND; 225 dfi->file_info.flags |= CEPH_F_ATEND;
226 err = 0; 226 err = 0;
227 break; 227 break;
228 } 228 }
@@ -273,33 +273,33 @@ out:
273 if (last) { 273 if (last) {
274 int ret; 274 int ret;
275 di = ceph_dentry(last); 275 di = ceph_dentry(last);
276 ret = note_last_dentry(fi, last->d_name.name, last->d_name.len, 276 ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len,
277 fpos_off(di->offset) + 1); 277 fpos_off(di->offset) + 1);
278 if (ret < 0) 278 if (ret < 0)
279 err = ret; 279 err = ret;
280 dput(last); 280 dput(last);
281 /* last_name no longer match cache index */ 281 /* last_name no longer match cache index */
282 if (fi->readdir_cache_idx >= 0) { 282 if (dfi->readdir_cache_idx >= 0) {
283 fi->readdir_cache_idx = -1; 283 dfi->readdir_cache_idx = -1;
284 fi->dir_release_count = 0; 284 dfi->dir_release_count = 0;
285 } 285 }
286 } 286 }
287 return err; 287 return err;
288} 288}
289 289
290static bool need_send_readdir(struct ceph_file_info *fi, loff_t pos) 290static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos)
291{ 291{
292 if (!fi->last_readdir) 292 if (!dfi->last_readdir)
293 return true; 293 return true;
294 if (is_hash_order(pos)) 294 if (is_hash_order(pos))
295 return !ceph_frag_contains_value(fi->frag, fpos_hash(pos)); 295 return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos));
296 else 296 else
297 return fi->frag != fpos_frag(pos); 297 return dfi->frag != fpos_frag(pos);
298} 298}
299 299
300static int ceph_readdir(struct file *file, struct dir_context *ctx) 300static int ceph_readdir(struct file *file, struct dir_context *ctx)
301{ 301{
302 struct ceph_file_info *fi = file->private_data; 302 struct ceph_dir_file_info *dfi = file->private_data;
303 struct inode *inode = file_inode(file); 303 struct inode *inode = file_inode(file);
304 struct ceph_inode_info *ci = ceph_inode(inode); 304 struct ceph_inode_info *ci = ceph_inode(inode);
305 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 305 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -310,7 +310,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
310 struct ceph_mds_reply_info_parsed *rinfo; 310 struct ceph_mds_reply_info_parsed *rinfo;
311 311
312 dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); 312 dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
313 if (fi->flags & CEPH_F_ATEND) 313 if (dfi->file_info.flags & CEPH_F_ATEND)
314 return 0; 314 return 0;
315 315
316 /* always start with . and .. */ 316 /* always start with . and .. */
@@ -351,15 +351,15 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
351 /* proceed with a normal readdir */ 351 /* proceed with a normal readdir */
352more: 352more:
353 /* do we have the correct frag content buffered? */ 353 /* do we have the correct frag content buffered? */
354 if (need_send_readdir(fi, ctx->pos)) { 354 if (need_send_readdir(dfi, ctx->pos)) {
355 struct ceph_mds_request *req; 355 struct ceph_mds_request *req;
356 int op = ceph_snap(inode) == CEPH_SNAPDIR ? 356 int op = ceph_snap(inode) == CEPH_SNAPDIR ?
357 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; 357 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
358 358
359 /* discard old result, if any */ 359 /* discard old result, if any */
360 if (fi->last_readdir) { 360 if (dfi->last_readdir) {
361 ceph_mdsc_put_request(fi->last_readdir); 361 ceph_mdsc_put_request(dfi->last_readdir);
362 fi->last_readdir = NULL; 362 dfi->last_readdir = NULL;
363 } 363 }
364 364
365 if (is_hash_order(ctx->pos)) { 365 if (is_hash_order(ctx->pos)) {
@@ -373,7 +373,7 @@ more:
373 } 373 }
374 374
375 dout("readdir fetching %llx.%llx frag %x offset '%s'\n", 375 dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
376 ceph_vinop(inode), frag, fi->last_name); 376 ceph_vinop(inode), frag, dfi->last_name);
377 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 377 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
378 if (IS_ERR(req)) 378 if (IS_ERR(req))
379 return PTR_ERR(req); 379 return PTR_ERR(req);
@@ -389,8 +389,8 @@ more:
389 __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); 389 __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
390 req->r_inode_drop = CEPH_CAP_FILE_EXCL; 390 req->r_inode_drop = CEPH_CAP_FILE_EXCL;
391 } 391 }
392 if (fi->last_name) { 392 if (dfi->last_name) {
393 req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); 393 req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL);
394 if (!req->r_path2) { 394 if (!req->r_path2) {
395 ceph_mdsc_put_request(req); 395 ceph_mdsc_put_request(req);
396 return -ENOMEM; 396 return -ENOMEM;
@@ -400,10 +400,10 @@ more:
400 cpu_to_le32(fpos_hash(ctx->pos)); 400 cpu_to_le32(fpos_hash(ctx->pos));
401 } 401 }
402 402
403 req->r_dir_release_cnt = fi->dir_release_count; 403 req->r_dir_release_cnt = dfi->dir_release_count;
404 req->r_dir_ordered_cnt = fi->dir_ordered_count; 404 req->r_dir_ordered_cnt = dfi->dir_ordered_count;
405 req->r_readdir_cache_idx = fi->readdir_cache_idx; 405 req->r_readdir_cache_idx = dfi->readdir_cache_idx;
406 req->r_readdir_offset = fi->next_offset; 406 req->r_readdir_offset = dfi->next_offset;
407 req->r_args.readdir.frag = cpu_to_le32(frag); 407 req->r_args.readdir.frag = cpu_to_le32(frag);
408 req->r_args.readdir.flags = 408 req->r_args.readdir.flags =
409 cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS); 409 cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
@@ -427,35 +427,35 @@ more:
427 if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { 427 if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
428 frag = le32_to_cpu(rinfo->dir_dir->frag); 428 frag = le32_to_cpu(rinfo->dir_dir->frag);
429 if (!rinfo->hash_order) { 429 if (!rinfo->hash_order) {
430 fi->next_offset = req->r_readdir_offset; 430 dfi->next_offset = req->r_readdir_offset;
431 /* adjust ctx->pos to beginning of frag */ 431 /* adjust ctx->pos to beginning of frag */
432 ctx->pos = ceph_make_fpos(frag, 432 ctx->pos = ceph_make_fpos(frag,
433 fi->next_offset, 433 dfi->next_offset,
434 false); 434 false);
435 } 435 }
436 } 436 }
437 437
438 fi->frag = frag; 438 dfi->frag = frag;
439 fi->last_readdir = req; 439 dfi->last_readdir = req;
440 440
441 if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) { 441 if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
442 fi->readdir_cache_idx = req->r_readdir_cache_idx; 442 dfi->readdir_cache_idx = req->r_readdir_cache_idx;
443 if (fi->readdir_cache_idx < 0) { 443 if (dfi->readdir_cache_idx < 0) {
444 /* preclude from marking dir ordered */ 444 /* preclude from marking dir ordered */
445 fi->dir_ordered_count = 0; 445 dfi->dir_ordered_count = 0;
446 } else if (ceph_frag_is_leftmost(frag) && 446 } else if (ceph_frag_is_leftmost(frag) &&
447 fi->next_offset == 2) { 447 dfi->next_offset == 2) {
448 /* note dir version at start of readdir so 448 /* note dir version at start of readdir so
449 * we can tell if any dentries get dropped */ 449 * we can tell if any dentries get dropped */
450 fi->dir_release_count = req->r_dir_release_cnt; 450 dfi->dir_release_count = req->r_dir_release_cnt;
451 fi->dir_ordered_count = req->r_dir_ordered_cnt; 451 dfi->dir_ordered_count = req->r_dir_ordered_cnt;
452 } 452 }
453 } else { 453 } else {
454 dout("readdir !did_prepopulate\n"); 454 dout("readdir !did_prepopulate\n");
455 /* disable readdir cache */ 455 /* disable readdir cache */
456 fi->readdir_cache_idx = -1; 456 dfi->readdir_cache_idx = -1;
457 /* preclude from marking dir complete */ 457 /* preclude from marking dir complete */
458 fi->dir_release_count = 0; 458 dfi->dir_release_count = 0;
459 } 459 }
460 460
461 /* note next offset and last dentry name */ 461 /* note next offset and last dentry name */
@@ -464,19 +464,19 @@ more:
464 rinfo->dir_entries + (rinfo->dir_nr-1); 464 rinfo->dir_entries + (rinfo->dir_nr-1);
465 unsigned next_offset = req->r_reply_info.dir_end ? 465 unsigned next_offset = req->r_reply_info.dir_end ?
466 2 : (fpos_off(rde->offset) + 1); 466 2 : (fpos_off(rde->offset) + 1);
467 err = note_last_dentry(fi, rde->name, rde->name_len, 467 err = note_last_dentry(dfi, rde->name, rde->name_len,
468 next_offset); 468 next_offset);
469 if (err) 469 if (err)
470 return err; 470 return err;
471 } else if (req->r_reply_info.dir_end) { 471 } else if (req->r_reply_info.dir_end) {
472 fi->next_offset = 2; 472 dfi->next_offset = 2;
473 /* keep last name */ 473 /* keep last name */
474 } 474 }
475 } 475 }
476 476
477 rinfo = &fi->last_readdir->r_reply_info; 477 rinfo = &dfi->last_readdir->r_reply_info;
478 dout("readdir frag %x num %d pos %llx chunk first %llx\n", 478 dout("readdir frag %x num %d pos %llx chunk first %llx\n",
479 fi->frag, rinfo->dir_nr, ctx->pos, 479 dfi->frag, rinfo->dir_nr, ctx->pos,
480 rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL); 480 rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
481 481
482 i = 0; 482 i = 0;
@@ -520,52 +520,55 @@ more:
520 ctx->pos++; 520 ctx->pos++;
521 } 521 }
522 522
523 ceph_mdsc_put_request(fi->last_readdir); 523 ceph_mdsc_put_request(dfi->last_readdir);
524 fi->last_readdir = NULL; 524 dfi->last_readdir = NULL;
525 525
526 if (fi->next_offset > 2) { 526 if (dfi->next_offset > 2) {
527 frag = fi->frag; 527 frag = dfi->frag;
528 goto more; 528 goto more;
529 } 529 }
530 530
531 /* more frags? */ 531 /* more frags? */
532 if (!ceph_frag_is_rightmost(fi->frag)) { 532 if (!ceph_frag_is_rightmost(dfi->frag)) {
533 frag = ceph_frag_next(fi->frag); 533 frag = ceph_frag_next(dfi->frag);
534 if (is_hash_order(ctx->pos)) { 534 if (is_hash_order(ctx->pos)) {
535 loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag), 535 loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
536 fi->next_offset, true); 536 dfi->next_offset, true);
537 if (new_pos > ctx->pos) 537 if (new_pos > ctx->pos)
538 ctx->pos = new_pos; 538 ctx->pos = new_pos;
539 /* keep last_name */ 539 /* keep last_name */
540 } else { 540 } else {
541 ctx->pos = ceph_make_fpos(frag, fi->next_offset, false); 541 ctx->pos = ceph_make_fpos(frag, dfi->next_offset,
542 kfree(fi->last_name); 542 false);
543 fi->last_name = NULL; 543 kfree(dfi->last_name);
544 dfi->last_name = NULL;
544 } 545 }
545 dout("readdir next frag is %x\n", frag); 546 dout("readdir next frag is %x\n", frag);
546 goto more; 547 goto more;
547 } 548 }
548 fi->flags |= CEPH_F_ATEND; 549 dfi->file_info.flags |= CEPH_F_ATEND;
549 550
550 /* 551 /*
551 * if dir_release_count still matches the dir, no dentries 552 * if dir_release_count still matches the dir, no dentries
552 * were released during the whole readdir, and we should have 553 * were released during the whole readdir, and we should have
553 * the complete dir contents in our cache. 554 * the complete dir contents in our cache.
554 */ 555 */
555 if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) { 556 if (atomic64_read(&ci->i_release_count) ==
557 dfi->dir_release_count) {
556 spin_lock(&ci->i_ceph_lock); 558 spin_lock(&ci->i_ceph_lock);
557 if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { 559 if (dfi->dir_ordered_count ==
560 atomic64_read(&ci->i_ordered_count)) {
558 dout(" marking %p complete and ordered\n", inode); 561 dout(" marking %p complete and ordered\n", inode);
559 /* use i_size to track number of entries in 562 /* use i_size to track number of entries in
560 * readdir cache */ 563 * readdir cache */
561 BUG_ON(fi->readdir_cache_idx < 0); 564 BUG_ON(dfi->readdir_cache_idx < 0);
562 i_size_write(inode, fi->readdir_cache_idx * 565 i_size_write(inode, dfi->readdir_cache_idx *
563 sizeof(struct dentry*)); 566 sizeof(struct dentry*));
564 } else { 567 } else {
565 dout(" marking %p complete\n", inode); 568 dout(" marking %p complete\n", inode);
566 } 569 }
567 __ceph_dir_set_complete(ci, fi->dir_release_count, 570 __ceph_dir_set_complete(ci, dfi->dir_release_count,
568 fi->dir_ordered_count); 571 dfi->dir_ordered_count);
569 spin_unlock(&ci->i_ceph_lock); 572 spin_unlock(&ci->i_ceph_lock);
570 } 573 }
571 574
@@ -573,25 +576,25 @@ more:
573 return 0; 576 return 0;
574} 577}
575 578
576static void reset_readdir(struct ceph_file_info *fi) 579static void reset_readdir(struct ceph_dir_file_info *dfi)
577{ 580{
578 if (fi->last_readdir) { 581 if (dfi->last_readdir) {
579 ceph_mdsc_put_request(fi->last_readdir); 582 ceph_mdsc_put_request(dfi->last_readdir);
580 fi->last_readdir = NULL; 583 dfi->last_readdir = NULL;
581 } 584 }
582 kfree(fi->last_name); 585 kfree(dfi->last_name);
583 fi->last_name = NULL; 586 dfi->last_name = NULL;
584 fi->dir_release_count = 0; 587 dfi->dir_release_count = 0;
585 fi->readdir_cache_idx = -1; 588 dfi->readdir_cache_idx = -1;
586 fi->next_offset = 2; /* compensate for . and .. */ 589 dfi->next_offset = 2; /* compensate for . and .. */
587 fi->flags &= ~CEPH_F_ATEND; 590 dfi->file_info.flags &= ~CEPH_F_ATEND;
588} 591}
589 592
590/* 593/*
591 * discard buffered readdir content on seekdir(0), or seek to new frag, 594 * discard buffered readdir content on seekdir(0), or seek to new frag,
592 * or seek prior to current chunk 595 * or seek prior to current chunk
593 */ 596 */
594static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) 597static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos)
595{ 598{
596 struct ceph_mds_reply_info_parsed *rinfo; 599 struct ceph_mds_reply_info_parsed *rinfo;
597 loff_t chunk_offset; 600 loff_t chunk_offset;
@@ -600,10 +603,10 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
600 if (is_hash_order(new_pos)) { 603 if (is_hash_order(new_pos)) {
601 /* no need to reset last_name for a forward seek when 604 /* no need to reset last_name for a forward seek when
602 * dentries are sotred in hash order */ 605 * dentries are sotred in hash order */
603 } else if (fi->frag != fpos_frag(new_pos)) { 606 } else if (dfi->frag != fpos_frag(new_pos)) {
604 return true; 607 return true;
605 } 608 }
606 rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL; 609 rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL;
607 if (!rinfo || !rinfo->dir_nr) 610 if (!rinfo || !rinfo->dir_nr)
608 return true; 611 return true;
609 chunk_offset = rinfo->dir_entries[0].offset; 612 chunk_offset = rinfo->dir_entries[0].offset;
@@ -613,7 +616,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
613 616
614static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) 617static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
615{ 618{
616 struct ceph_file_info *fi = file->private_data; 619 struct ceph_dir_file_info *dfi = file->private_data;
617 struct inode *inode = file->f_mapping->host; 620 struct inode *inode = file->f_mapping->host;
618 loff_t retval; 621 loff_t retval;
619 622
@@ -631,20 +634,20 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
631 } 634 }
632 635
633 if (offset >= 0) { 636 if (offset >= 0) {
634 if (need_reset_readdir(fi, offset)) { 637 if (need_reset_readdir(dfi, offset)) {
635 dout("dir_llseek dropping %p content\n", file); 638 dout("dir_llseek dropping %p content\n", file);
636 reset_readdir(fi); 639 reset_readdir(dfi);
637 } else if (is_hash_order(offset) && offset > file->f_pos) { 640 } else if (is_hash_order(offset) && offset > file->f_pos) {
638 /* for hash offset, we don't know if a forward seek 641 /* for hash offset, we don't know if a forward seek
639 * is within same frag */ 642 * is within same frag */
640 fi->dir_release_count = 0; 643 dfi->dir_release_count = 0;
641 fi->readdir_cache_idx = -1; 644 dfi->readdir_cache_idx = -1;
642 } 645 }
643 646
644 if (offset != file->f_pos) { 647 if (offset != file->f_pos) {
645 file->f_pos = offset; 648 file->f_pos = offset;
646 file->f_version = 0; 649 file->f_version = 0;
647 fi->flags &= ~CEPH_F_ATEND; 650 dfi->file_info.flags &= ~CEPH_F_ATEND;
648 } 651 }
649 retval = offset; 652 retval = offset;
650 } 653 }
@@ -1352,7 +1355,7 @@ static void ceph_d_prune(struct dentry *dentry)
1352static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, 1355static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1353 loff_t *ppos) 1356 loff_t *ppos)
1354{ 1357{
1355 struct ceph_file_info *fi = file->private_data; 1358 struct ceph_dir_file_info *dfi = file->private_data;
1356 struct inode *inode = file_inode(file); 1359 struct inode *inode = file_inode(file);
1357 struct ceph_inode_info *ci = ceph_inode(inode); 1360 struct ceph_inode_info *ci = ceph_inode(inode);
1358 int left; 1361 int left;
@@ -1361,12 +1364,12 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1361 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) 1364 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
1362 return -EISDIR; 1365 return -EISDIR;
1363 1366
1364 if (!fi->dir_info) { 1367 if (!dfi->dir_info) {
1365 fi->dir_info = kmalloc(bufsize, GFP_KERNEL); 1368 dfi->dir_info = kmalloc(bufsize, GFP_KERNEL);
1366 if (!fi->dir_info) 1369 if (!dfi->dir_info)
1367 return -ENOMEM; 1370 return -ENOMEM;
1368 fi->dir_info_len = 1371 dfi->dir_info_len =
1369 snprintf(fi->dir_info, bufsize, 1372 snprintf(dfi->dir_info, bufsize,
1370 "entries: %20lld\n" 1373 "entries: %20lld\n"
1371 " files: %20lld\n" 1374 " files: %20lld\n"
1372 " subdirs: %20lld\n" 1375 " subdirs: %20lld\n"
@@ -1386,10 +1389,10 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1386 (long)ci->i_rctime.tv_nsec); 1389 (long)ci->i_rctime.tv_nsec);
1387 } 1390 }
1388 1391
1389 if (*ppos >= fi->dir_info_len) 1392 if (*ppos >= dfi->dir_info_len)
1390 return 0; 1393 return 0;
1391 size = min_t(unsigned, size, fi->dir_info_len-*ppos); 1394 size = min_t(unsigned, size, dfi->dir_info_len-*ppos);
1392 left = copy_to_user(buf, fi->dir_info + *ppos, size); 1395 left = copy_to_user(buf, dfi->dir_info + *ppos, size);
1393 if (left == size) 1396 if (left == size)
1394 return -EFAULT; 1397 return -EFAULT;
1395 *ppos += (size - left); 1398 *ppos += (size - left);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index a1f0aee29c27..4a92acba1e9c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -161,13 +161,50 @@ out:
161 return req; 161 return req;
162} 162}
163 163
164static int ceph_init_file_info(struct inode *inode, struct file *file,
165 int fmode, bool isdir)
166{
167 struct ceph_file_info *fi;
168
169 dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
170 inode->i_mode, isdir ? "dir" : "regular");
171 BUG_ON(inode->i_fop->release != ceph_release);
172
173 if (isdir) {
174 struct ceph_dir_file_info *dfi =
175 kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
176 if (!dfi) {
177 ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
178 return -ENOMEM;
179 }
180
181 file->private_data = dfi;
182 fi = &dfi->file_info;
183 dfi->next_offset = 2;
184 dfi->readdir_cache_idx = -1;
185 } else {
186 fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
187 if (!fi) {
188 ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
189 return -ENOMEM;
190 }
191
192 file->private_data = fi;
193 }
194
195 fi->fmode = fmode;
196 spin_lock_init(&fi->rw_contexts_lock);
197 INIT_LIST_HEAD(&fi->rw_contexts);
198
199 return 0;
200}
201
164/* 202/*
165 * initialize private struct file data. 203 * initialize private struct file data.
166 * if we fail, clean up by dropping fmode reference on the ceph_inode 204 * if we fail, clean up by dropping fmode reference on the ceph_inode
167 */ 205 */
168static int ceph_init_file(struct inode *inode, struct file *file, int fmode) 206static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
169{ 207{
170 struct ceph_file_info *fi;
171 int ret = 0; 208 int ret = 0;
172 209
173 switch (inode->i_mode & S_IFMT) { 210 switch (inode->i_mode & S_IFMT) {
@@ -175,22 +212,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
175 ceph_fscache_register_inode_cookie(inode); 212 ceph_fscache_register_inode_cookie(inode);
176 ceph_fscache_file_set_cookie(inode, file); 213 ceph_fscache_file_set_cookie(inode, file);
177 case S_IFDIR: 214 case S_IFDIR:
178 dout("init_file %p %p 0%o (regular)\n", inode, file, 215 ret = ceph_init_file_info(inode, file, fmode,
179 inode->i_mode); 216 S_ISDIR(inode->i_mode));
180 fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); 217 if (ret)
181 if (!fi) { 218 return ret;
182 ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
183 return -ENOMEM;
184 }
185 fi->fmode = fmode;
186
187 spin_lock_init(&fi->rw_contexts_lock);
188 INIT_LIST_HEAD(&fi->rw_contexts);
189
190 fi->next_offset = 2;
191 fi->readdir_cache_idx = -1;
192 file->private_data = fi;
193 BUG_ON(inode->i_fop->release != ceph_release);
194 break; 219 break;
195 220
196 case S_IFLNK: 221 case S_IFLNK:
@@ -462,16 +487,27 @@ out_acl:
462int ceph_release(struct inode *inode, struct file *file) 487int ceph_release(struct inode *inode, struct file *file)
463{ 488{
464 struct ceph_inode_info *ci = ceph_inode(inode); 489 struct ceph_inode_info *ci = ceph_inode(inode);
465 struct ceph_file_info *fi = file->private_data;
466 490
467 dout("release inode %p file %p\n", inode, file); 491 if (S_ISDIR(inode->i_mode)) {
468 ceph_put_fmode(ci, fi->fmode); 492 struct ceph_dir_file_info *dfi = file->private_data;
469 if (fi->last_readdir) 493 dout("release inode %p dir file %p\n", inode, file);
470 ceph_mdsc_put_request(fi->last_readdir); 494 WARN_ON(!list_empty(&dfi->file_info.rw_contexts));
471 kfree(fi->last_name); 495
472 kfree(fi->dir_info); 496 ceph_put_fmode(ci, dfi->file_info.fmode);
473 WARN_ON(!list_empty(&fi->rw_contexts)); 497
474 kmem_cache_free(ceph_file_cachep, fi); 498 if (dfi->last_readdir)
499 ceph_mdsc_put_request(dfi->last_readdir);
500 kfree(dfi->last_name);
501 kfree(dfi->dir_info);
502 kmem_cache_free(ceph_dir_file_cachep, dfi);
503 } else {
504 struct ceph_file_info *fi = file->private_data;
505 dout("release inode %p regular file %p\n", inode, file);
506 WARN_ON(!list_empty(&fi->rw_contexts));
507
508 ceph_put_fmode(ci, fi->fmode);
509 kmem_cache_free(ceph_file_cachep, fi);
510 }
475 511
476 /* wake up anyone waiting for caps on this inode */ 512 /* wake up anyone waiting for caps on this inode */
477 wake_up_all(&ci->i_cap_wq); 513 wake_up_all(&ci->i_cap_wq);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9bf9e54259dd..0fc03c456c50 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -679,6 +679,7 @@ struct kmem_cache *ceph_cap_cachep;
679struct kmem_cache *ceph_cap_flush_cachep; 679struct kmem_cache *ceph_cap_flush_cachep;
680struct kmem_cache *ceph_dentry_cachep; 680struct kmem_cache *ceph_dentry_cachep;
681struct kmem_cache *ceph_file_cachep; 681struct kmem_cache *ceph_file_cachep;
682struct kmem_cache *ceph_dir_file_cachep;
682 683
683static void ceph_inode_init_once(void *foo) 684static void ceph_inode_init_once(void *foo)
684{ 685{
@@ -715,6 +716,10 @@ static int __init init_caches(void)
715 if (!ceph_file_cachep) 716 if (!ceph_file_cachep)
716 goto bad_file; 717 goto bad_file;
717 718
719 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD);
720 if (!ceph_dir_file_cachep)
721 goto bad_dir_file;
722
718 error = ceph_fscache_register(); 723 error = ceph_fscache_register();
719 if (error) 724 if (error)
720 goto bad_fscache; 725 goto bad_fscache;
@@ -722,6 +727,8 @@ static int __init init_caches(void)
722 return 0; 727 return 0;
723 728
724bad_fscache: 729bad_fscache:
730 kmem_cache_destroy(ceph_dir_file_cachep);
731bad_dir_file:
725 kmem_cache_destroy(ceph_file_cachep); 732 kmem_cache_destroy(ceph_file_cachep);
726bad_file: 733bad_file:
727 kmem_cache_destroy(ceph_dentry_cachep); 734 kmem_cache_destroy(ceph_dentry_cachep);
@@ -747,6 +754,7 @@ static void destroy_caches(void)
747 kmem_cache_destroy(ceph_cap_flush_cachep); 754 kmem_cache_destroy(ceph_cap_flush_cachep);
748 kmem_cache_destroy(ceph_dentry_cachep); 755 kmem_cache_destroy(ceph_dentry_cachep);
749 kmem_cache_destroy(ceph_file_cachep); 756 kmem_cache_destroy(ceph_file_cachep);
757 kmem_cache_destroy(ceph_dir_file_cachep);
750 758
751 ceph_fscache_unregister(); 759 ceph_fscache_unregister();
752} 760}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1c2086e0fec2..ff49433014e9 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -671,6 +671,10 @@ struct ceph_file_info {
671 671
672 spinlock_t rw_contexts_lock; 672 spinlock_t rw_contexts_lock;
673 struct list_head rw_contexts; 673 struct list_head rw_contexts;
674};
675
676struct ceph_dir_file_info {
677 struct ceph_file_info file_info;
674 678
675 /* readdir: position within the dir */ 679 /* readdir: position within the dir */
676 u32 frag; 680 u32 frag;