aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Woodhouse <David.Woodhouse@intel.com>2008-08-12 06:28:00 -0400
committerDavid Woodhouse <David.Woodhouse@intel.com>2008-08-12 06:28:00 -0400
commit742c52533b05d8ae83c794bd6811100675b85ce5 (patch)
treede89a81d88c19504d1dc4f023a4b480c9022b3b5 /fs
parent36cd4fb5d277f34fe9e4db0deac2d4efd7dff735 (diff)
parent10fec20ef5eec1c91913baec1225400f0d02df40 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: include/asm-arm/arch-omap/onenand.h
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/bio.c9
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/cifs/CHANGES8
-rw-r--r--fs/cifs/asn1.c258
-rw-r--r--fs/cifs/cifs_debug.c53
-rw-r--r--fs/cifs/cifs_spnego.c18
-rw-r--r--fs/cifs/cifsfs.c71
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifspdu.h2
-rw-r--r--fs/cifs/cifsproto.h24
-rw-r--r--fs/cifs/cifssmb.c46
-rw-r--r--fs/cifs/connect.c173
-rw-r--r--fs/cifs/dir.c67
-rw-r--r--fs/cifs/file.c21
-rw-r--r--fs/cifs/inode.c419
-rw-r--r--fs/cifs/transport.c1
-rw-r--r--fs/configfs/configfs_internal.h3
-rw-r--r--fs/configfs/dir.c210
-rw-r--r--fs/configfs/symlink.c26
-rw-r--r--fs/dcache.c102
-rw-r--r--fs/ext4/acl.c188
-rw-r--r--fs/ext4/balloc.c11
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/extents.c55
-rw-r--r--fs/ext4/ialloc.c58
-rw-r--r--fs/ext4/inode.c164
-rw-r--r--fs/ext4/mballoc.c254
-rw-r--r--fs/ext4/mballoc.h10
-rw-r--r--fs/ext4/resize.c79
-rw-r--r--fs/ext4/super.c316
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/fat/file.c6
-rw-r--r--fs/jbd/commit.c6
-rw-r--r--fs/jbd2/commit.c26
-rw-r--r--fs/jbd2/journal.c1
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfsd/nfs4proc.c5
-rw-r--r--fs/ntfs/aops.c2
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/mft.c4
-rw-r--r--fs/ocfs2/aops.c29
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/journal.c173
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h5
-rw-r--r--fs/ocfs2/super.c12
-rw-r--r--fs/omfs/inode.c3
-rw-r--r--fs/proc/base.c11
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/splice.c2
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/linux-2.6/kmem.c6
-rw-r--r--fs/xfs/linux-2.6/kmem.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c376
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c345
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c938
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c50
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h89
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c330
-rw-r--r--fs/xfs/quota/xfs_dquot.c3
-rw-r--r--fs/xfs/quota/xfs_dquot.h2
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c24
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c12
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h3
-rw-r--r--fs/xfs/support/ktrace.c4
-rw-r--r--fs/xfs/support/uuid.c8
-rw-r--r--fs/xfs/support/uuid.h1
-rw-r--r--fs/xfs/xfs_acl.c21
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_attr.c608
-rw-r--r--fs/xfs/xfs_attr.h90
-rw-r--r--fs/xfs/xfs_attr_leaf.c99
-rw-r--r--fs/xfs/xfs_attr_leaf.h29
-rw-r--r--fs/xfs/xfs_attr_sf.h10
-rw-r--r--fs/xfs/xfs_bmap.c118
-rw-r--r--fs/xfs/xfs_bmap.h13
-rw-r--r--fs/xfs/xfs_bmap_btree.c76
-rw-r--r--fs/xfs/xfs_buf_item.c8
-rw-r--r--fs/xfs/xfs_clnt.h1
-rw-r--r--fs/xfs/xfs_da_btree.c48
-rw-r--r--fs/xfs/xfs_da_btree.h36
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_dir2.c125
-rw-r--r--fs/xfs/xfs_dir2.h6
-rw-r--r--fs/xfs/xfs_dir2_block.c56
-rw-r--r--fs/xfs/xfs_dir2_data.c5
-rw-r--r--fs/xfs/xfs_dir2_leaf.c93
-rw-r--r--fs/xfs/xfs_dir2_node.c402
-rw-r--r--fs/xfs/xfs_dir2_sf.c83
-rw-r--r--fs/xfs/xfs_dir2_sf.h6
-rw-r--r--fs/xfs/xfs_dir2_trace.c20
-rw-r--r--fs/xfs/xfs_dmapi.h2
-rw-r--r--fs/xfs/xfs_error.c13
-rw-r--r--fs/xfs/xfs_error.h1
-rw-r--r--fs/xfs/xfs_extfree_item.c6
-rw-r--r--fs/xfs/xfs_filestream.c4
-rw-r--r--fs/xfs/xfs_fs.h4
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_inode.c165
-rw-r--r--fs/xfs/xfs_inode.h3
-rw-r--r--fs/xfs/xfs_inode_item.c7
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_log.c49
-rw-r--r--fs/xfs/xfs_log_priv.h6
-rw-r--r--fs/xfs/xfs_log_recover.c21
-rw-r--r--fs/xfs/xfs_mount.c118
-rw-r--r--fs/xfs/xfs_mount.h17
-rw-r--r--fs/xfs/xfs_mru_cache.c21
-rw-r--r--fs/xfs/xfs_rename.c22
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_sb.h17
-rw-r--r--fs/xfs/xfs_trans.c4
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_trans_item.c8
-rw-r--r--fs/xfs/xfs_vfsops.c610
-rw-r--r--fs/xfs/xfs_vfsops.h5
-rw-r--r--fs/xfs/xfs_vnodeops.c722
-rw-r--r--fs/xfs/xfs_vnodeops.h12
135 files changed, 4892 insertions, 4177 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9a849ad3c489..065b4e10681a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
404 page = pages[loop]; 404 page = pages[loop];
405 if (page->index > wb->last) 405 if (page->index > wb->last)
406 break; 406 break;
407 if (TestSetPageLocked(page)) 407 if (!trylock_page(page))
408 break; 408 break;
409 if (!PageDirty(page) || 409 if (!PageDirty(page) ||
410 page_private(page) != (unsigned long) wb) { 410 page_private(page) != (unsigned long) wb) {
diff --git a/fs/bio.c b/fs/bio.c
index 25f1af0d81e5..8000e2fa16cb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -77,11 +77,8 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
77 */ 77 */
78 78
79 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); 79 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
80 if (bvl) { 80 if (bvl)
81 struct biovec_slab *bp = bvec_slabs + *idx; 81 memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
82
83 memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
84 }
85 82
86 return bvl; 83 return bvl;
87} 84}
@@ -149,7 +146,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
149 goto out; 146 goto out;
150 } 147 }
151 bio->bi_flags |= idx << BIO_POOL_OFFSET; 148 bio->bi_flags |= idx << BIO_POOL_OFFSET;
152 bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; 149 bio->bi_max_vecs = bvec_nr_vecs(idx);
153 } 150 }
154 bio->bi_io_vec = bvl; 151 bio->bi_io_vec = bvl;
155 } 152 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 4dbe52948e8f..38653e36e225 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1720,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1720 */ 1720 */
1721 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 1721 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1722 lock_buffer(bh); 1722 lock_buffer(bh);
1723 } else if (test_set_buffer_locked(bh)) { 1723 } else if (!trylock_buffer(bh)) {
1724 redirty_page_for_writepage(wbc, page); 1724 redirty_page_for_writepage(wbc, page);
1725 continue; 1725 continue;
1726 } 1726 }
@@ -3000,7 +3000,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3000 3000
3001 if (rw == SWRITE || rw == SWRITE_SYNC) 3001 if (rw == SWRITE || rw == SWRITE_SYNC)
3002 lock_buffer(bh); 3002 lock_buffer(bh);
3003 else if (test_set_buffer_locked(bh)) 3003 else if (!trylock_buffer(bh))
3004 continue; 3004 continue;
3005 3005
3006 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { 3006 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 1f3465201fdf..f5d0083e09fa 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,11 @@
1Version 1.54
2------------
3Fix premature write failure on congested networks (we would give up
4on EAGAIN from the socket too quickly on large writes).
5Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
6Fix endian problems in acl (mode from/to cifs acl) on bigendian
7architectures.
8
1Version 1.53 9Version 1.53
2------------ 10------------
3DFS support added (Microsoft Distributed File System client support needed 11DFS support added (Microsoft Distributed File System client support needed
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 6bb440b257b0..5fabd2caf93c 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -483,6 +483,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
483 483
484 asn1_open(&ctx, security_blob, length); 484 asn1_open(&ctx, security_blob, length);
485 485
486 /* GSSAPI header */
486 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 487 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
487 cFYI(1, ("Error decoding negTokenInit header")); 488 cFYI(1, ("Error decoding negTokenInit header"));
488 return 0; 489 return 0;
@@ -490,153 +491,142 @@ decode_negTokenInit(unsigned char *security_blob, int length,
490 || (tag != ASN1_EOC)) { 491 || (tag != ASN1_EOC)) {
491 cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag)); 492 cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag));
492 return 0; 493 return 0;
493 } else { 494 }
494 /* remember to free obj->oid */
495 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
496 if (rc) {
497 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
498 rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
499 if (rc) {
500 rc = compare_oid(oid, oidlen,
501 SPNEGO_OID,
502 SPNEGO_OID_LEN);
503 kfree(oid);
504 }
505 } else
506 rc = 0;
507 }
508 495
509 if (!rc) { 496 /* Check for SPNEGO OID -- remember to free obj->oid */
510 cFYI(1, ("Error decoding negTokenInit header")); 497 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
511 return 0; 498 if (rc) {
512 } 499 if ((tag == ASN1_OJI) && (con == ASN1_PRI) &&
500 (cls == ASN1_UNI)) {
501 rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
502 if (rc) {
503 rc = compare_oid(oid, oidlen, SPNEGO_OID,
504 SPNEGO_OID_LEN);
505 kfree(oid);
506 }
507 } else
508 rc = 0;
509 }
513 510
514 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 511 /* SPNEGO OID not present or garbled -- bail out */
515 cFYI(1, ("Error decoding negTokenInit")); 512 if (!rc) {
516 return 0; 513 cFYI(1, ("Error decoding negTokenInit header"));
517 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 514 return 0;
518 || (tag != ASN1_EOC)) { 515 }
519 cFYI(1,
520 ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
521 cls, con, tag, end, *end));
522 return 0;
523 }
524 516
525 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 517 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
526 cFYI(1, ("Error decoding negTokenInit")); 518 cFYI(1, ("Error decoding negTokenInit"));
527 return 0; 519 return 0;
528 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 520 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
529 || (tag != ASN1_SEQ)) { 521 || (tag != ASN1_EOC)) {
530 cFYI(1, 522 cFYI(1,
531 ("cls = %d con = %d tag = %d end = %p (%d) exit 1", 523 ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
532 cls, con, tag, end, *end)); 524 cls, con, tag, end, *end));
533 return 0; 525 return 0;
534 } 526 }
535 527
536 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 528 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
537 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 529 cFYI(1, ("Error decoding negTokenInit"));
538 return 0; 530 return 0;
539 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 531 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
540 || (tag != ASN1_EOC)) { 532 || (tag != ASN1_SEQ)) {
541 cFYI(1, 533 cFYI(1,
542 ("cls = %d con = %d tag = %d end = %p (%d) exit 0", 534 ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
543 cls, con, tag, end, *end)); 535 cls, con, tag, end, *end));
544 return 0; 536 return 0;
545 } 537 }
546 538
547 if (asn1_header_decode 539 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
548 (&ctx, &sequence_end, &cls, &con, &tag) == 0) { 540 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
549 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 541 return 0;
550 return 0; 542 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
551 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 543 || (tag != ASN1_EOC)) {
552 || (tag != ASN1_SEQ)) { 544 cFYI(1,
553 cFYI(1, 545 ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
554 ("cls = %d con = %d tag = %d end = %p (%d) exit 1", 546 cls, con, tag, end, *end));
555 cls, con, tag, end, *end)); 547 return 0;
556 return 0; 548 }
557 }
558 549
559 while (!asn1_eoc_decode(&ctx, sequence_end)) { 550 if (asn1_header_decode
560 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); 551 (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
561 if (!rc) { 552 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
562 cFYI(1, 553 return 0;
563 ("Error decoding negTokenInit hdr exit2")); 554 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
564 return 0; 555 || (tag != ASN1_SEQ)) {
565 } 556 cFYI(1,
566 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { 557 ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
567 if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) { 558 cls, con, tag, end, *end));
568 559 return 0;
569 cFYI(1, 560 }
570 ("OID len = %d oid = 0x%lx 0x%lx "
571 "0x%lx 0x%lx",
572 oidlen, *oid, *(oid + 1),
573 *(oid + 2), *(oid + 3)));
574
575 if (compare_oid(oid, oidlen,
576 MSKRB5_OID,
577 MSKRB5_OID_LEN))
578 use_kerberos = true;
579 else if (compare_oid(oid, oidlen,
580 KRB5_OID,
581 KRB5_OID_LEN))
582 use_kerberos = true;
583 else if (compare_oid(oid, oidlen,
584 NTLMSSP_OID,
585 NTLMSSP_OID_LEN))
586 use_ntlmssp = true;
587
588 kfree(oid);
589 }
590 } else {
591 cFYI(1, ("Should be an oid what is going on?"));
592 }
593 }
594 561
595 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 562 while (!asn1_eoc_decode(&ctx, sequence_end)) {
596 cFYI(1, 563 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
597 ("Error decoding last part negTokenInit exit3")); 564 if (!rc) {
598 return 0;
599 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
600 /* tag = 3 indicating mechListMIC */
601 cFYI(1, 565 cFYI(1,
602 ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)", 566 ("Error decoding negTokenInit hdr exit2"));
603 cls, con, tag, end, *end));
604 return 0; 567 return 0;
605 } 568 }
606 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 569 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
607 cFYI(1, 570 if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
608 ("Error decoding last part negTokenInit exit5")); 571
609 return 0; 572 cFYI(1, ("OID len = %d oid = 0x%lx 0x%lx "
610 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 573 "0x%lx 0x%lx", oidlen, *oid,
611 || (tag != ASN1_SEQ)) { 574 *(oid + 1), *(oid + 2), *(oid + 3)));
612 cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)", 575
613 cls, con, tag, end, *end)); 576 if (compare_oid(oid, oidlen, MSKRB5_OID,
577 MSKRB5_OID_LEN))
578 use_kerberos = true;
579 else if (compare_oid(oid, oidlen, KRB5_OID,
580 KRB5_OID_LEN))
581 use_kerberos = true;
582 else if (compare_oid(oid, oidlen, NTLMSSP_OID,
583 NTLMSSP_OID_LEN))
584 use_ntlmssp = true;
585
586 kfree(oid);
587 }
588 } else {
589 cFYI(1, ("Should be an oid what is going on?"));
614 } 590 }
591 }
615 592
616 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 593 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
617 cFYI(1, 594 cFYI(1, ("Error decoding last part negTokenInit exit3"));
618 ("Error decoding last part negTokenInit exit 7")); 595 return 0;
619 return 0; 596 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
620 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { 597 /* tag = 3 indicating mechListMIC */
621 cFYI(1, 598 cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
622 ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)", 599 cls, con, tag, end, *end));
623 cls, con, tag, end, *end)); 600 return 0;
624 return 0; 601 }
625 } 602 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
626 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 603 cFYI(1, ("Error decoding last part negTokenInit exit5"));
627 cFYI(1, 604 return 0;
628 ("Error decoding last part negTokenInit exit9")); 605 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
629 return 0; 606 || (tag != ASN1_SEQ)) {
630 } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) 607 cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
631 || (tag != ASN1_GENSTR)) { 608 cls, con, tag, end, *end));
632 cFYI(1, 609 }
633 ("Exit10 cls = %d con = %d tag = %d end = %p (%d)", 610
634 cls, con, tag, end, *end)); 611 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
635 return 0; 612 cFYI(1, ("Error decoding last part negTokenInit exit 7"));
636 } 613 return 0;
637 cFYI(1, ("Need to call asn1_octets_decode() function for %s", 614 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
638 ctx.pointer)); /* is this UTF-8 or ASCII? */ 615 cFYI(1, ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
616 cls, con, tag, end, *end));
617 return 0;
618 }
619 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
620 cFYI(1, ("Error decoding last part negTokenInit exit9"));
621 return 0;
622 } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
623 || (tag != ASN1_GENSTR)) {
624 cFYI(1, ("Exit10 cls = %d con = %d tag = %d end = %p (%d)",
625 cls, con, tag, end, *end));
626 return 0;
639 } 627 }
628 cFYI(1, ("Need to call asn1_octets_decode() function for %s",
629 ctx.pointer)); /* is this UTF-8 or ASCII? */
640 630
641 if (use_kerberos) 631 if (use_kerberos)
642 *secType = Kerberos; 632 *secType = Kerberos;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 688a2d42153f..69a12aae91d3 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -79,27 +79,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
79 spin_lock(&GlobalMid_Lock); 79 spin_lock(&GlobalMid_Lock);
80 list_for_each(tmp, &server->pending_mid_q) { 80 list_for_each(tmp, &server->pending_mid_q) {
81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
82 if (mid_entry) { 82 cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
83 cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", 83 mid_entry->midState,
84 mid_entry->midState, 84 (int)mid_entry->command,
85 (int)mid_entry->command, 85 mid_entry->pid,
86 mid_entry->pid, 86 mid_entry->tsk,
87 mid_entry->tsk, 87 mid_entry->mid));
88 mid_entry->mid));
89#ifdef CONFIG_CIFS_STATS2 88#ifdef CONFIG_CIFS_STATS2
90 cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld", 89 cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
91 mid_entry->largeBuf, 90 mid_entry->largeBuf,
92 mid_entry->resp_buf, 91 mid_entry->resp_buf,
93 mid_entry->when_received, 92 mid_entry->when_received,
94 jiffies)); 93 jiffies));
95#endif /* STATS2 */ 94#endif /* STATS2 */
96 cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp, 95 cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
97 mid_entry->multiEnd)); 96 mid_entry->multiEnd));
98 if (mid_entry->resp_buf) { 97 if (mid_entry->resp_buf) {
99 cifs_dump_detail(mid_entry->resp_buf); 98 cifs_dump_detail(mid_entry->resp_buf);
100 cifs_dump_mem("existing buf: ", 99 cifs_dump_mem("existing buf: ",
101 mid_entry->resp_buf, 62); 100 mid_entry->resp_buf, 62);
102 }
103 } 101 }
104 } 102 }
105 spin_unlock(&GlobalMid_Lock); 103 spin_unlock(&GlobalMid_Lock);
@@ -163,16 +161,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
163 mid_entry = list_entry(tmp1, struct 161 mid_entry = list_entry(tmp1, struct
164 mid_q_entry, 162 mid_q_entry,
165 qhead); 163 qhead);
166 if (mid_entry) { 164 seq_printf(m, "State: %d com: %d pid:"
167 seq_printf(m, 165 " %d tsk: %p mid %d\n",
168 "State: %d com: %d pid:" 166 mid_entry->midState,
169 " %d tsk: %p mid %d\n", 167 (int)mid_entry->command,
170 mid_entry->midState, 168 mid_entry->pid,
171 (int)mid_entry->command, 169 mid_entry->tsk,
172 mid_entry->pid, 170 mid_entry->mid);
173 mid_entry->tsk,
174 mid_entry->mid);
175 }
176 } 171 }
177 spin_unlock(&GlobalMid_Lock); 172 spin_unlock(&GlobalMid_Lock);
178 } 173 }
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 7013aaff6aed..2434ab0e8791 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -66,8 +66,8 @@ struct key_type cifs_spnego_key_type = {
66 .describe = user_describe, 66 .describe = user_describe,
67}; 67};
68 68
69#define MAX_VER_STR_LEN 9 /* length of longest version string e.g. 69#define MAX_VER_STR_LEN 8 /* length of longest version string e.g.
70 strlen(";ver=0xFF") */ 70 strlen("ver=0xFF") */
71#define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg 71#define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg
72 in future could have strlen(";sec=ntlmsspi") */ 72 in future could have strlen(";sec=ntlmsspi") */
73#define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ 73#define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */
@@ -81,11 +81,15 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
81 struct key *spnego_key; 81 struct key *spnego_key;
82 const char *hostname = server->hostname; 82 const char *hostname = server->hostname;
83 83
84 /* BB: come up with better scheme for determining length */ 84 /* length of fields (with semicolons): ver=0xyz ip4=ipaddress
85 /* length of fields (with semicolons): ver=0xyz ipv4= ipaddress host= 85 host=hostname sec=mechanism uid=0xFF user=username */
86 hostname sec=mechanism uid=0x uid */ 86 desc_len = MAX_VER_STR_LEN +
87 desc_len = MAX_VER_STR_LEN + 5 + MAX_IPV6_ADDR_LEN + 1 + 6 + 87 6 /* len of "host=" */ + strlen(hostname) +
88 strlen(hostname) + MAX_MECH_STR_LEN + 8 + (sizeof(uid_t) * 2); 88 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN +
89 MAX_MECH_STR_LEN +
90 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) +
91 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1;
92
89 spnego_key = ERR_PTR(-ENOMEM); 93 spnego_key = ERR_PTR(-ENOMEM);
90 description = kzalloc(desc_len, GFP_KERNEL); 94 description = kzalloc(desc_len, GFP_KERNEL);
91 if (description == NULL) 95 if (description == NULL)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1ec7076f7b24..e8da4ee761b5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -930,36 +930,34 @@ static int cifs_oplock_thread(void *dummyarg)
930 schedule_timeout(39*HZ); 930 schedule_timeout(39*HZ);
931 } else { 931 } else {
932 oplock_item = list_entry(GlobalOplock_Q.next, 932 oplock_item = list_entry(GlobalOplock_Q.next,
933 struct oplock_q_entry, qhead); 933 struct oplock_q_entry, qhead);
934 if (oplock_item) { 934 cFYI(1, ("found oplock item to write out"));
935 cFYI(1, ("found oplock item to write out")); 935 pTcon = oplock_item->tcon;
936 pTcon = oplock_item->tcon; 936 inode = oplock_item->pinode;
937 inode = oplock_item->pinode; 937 netfid = oplock_item->netfid;
938 netfid = oplock_item->netfid; 938 spin_unlock(&GlobalMid_Lock);
939 spin_unlock(&GlobalMid_Lock); 939 DeleteOplockQEntry(oplock_item);
940 DeleteOplockQEntry(oplock_item); 940 /* can not grab inode sem here since it would
941 /* can not grab inode sem here since it would
942 deadlock when oplock received on delete 941 deadlock when oplock received on delete
943 since vfs_unlink holds the i_mutex across 942 since vfs_unlink holds the i_mutex across
944 the call */ 943 the call */
945 /* mutex_lock(&inode->i_mutex);*/ 944 /* mutex_lock(&inode->i_mutex);*/
946 if (S_ISREG(inode->i_mode)) { 945 if (S_ISREG(inode->i_mode)) {
947 rc = 946 rc = filemap_fdatawrite(inode->i_mapping);
948 filemap_fdatawrite(inode->i_mapping); 947 if (CIFS_I(inode)->clientCanCacheRead == 0) {
949 if (CIFS_I(inode)->clientCanCacheRead 948 waitrc = filemap_fdatawait(
950 == 0) { 949 inode->i_mapping);
951 waitrc = filemap_fdatawait(inode->i_mapping); 950 invalidate_remote_inode(inode);
952 invalidate_remote_inode(inode); 951 }
953 } 952 if (rc == 0)
954 if (rc == 0) 953 rc = waitrc;
955 rc = waitrc; 954 } else
956 } else 955 rc = 0;
957 rc = 0; 956 /* mutex_unlock(&inode->i_mutex);*/
958 /* mutex_unlock(&inode->i_mutex);*/ 957 if (rc)
959 if (rc) 958 CIFS_I(inode)->write_behind_rc = rc;
960 CIFS_I(inode)->write_behind_rc = rc; 959 cFYI(1, ("Oplock flush inode %p rc %d",
961 cFYI(1, ("Oplock flush inode %p rc %d", 960 inode, rc));
962 inode, rc));
963 961
964 /* releasing stale oplock after recent reconnect 962 /* releasing stale oplock after recent reconnect
965 of smb session using a now incorrect file 963 of smb session using a now incorrect file
@@ -967,15 +965,13 @@ static int cifs_oplock_thread(void *dummyarg)
967 not bother sending an oplock release if session 965 not bother sending an oplock release if session
968 to server still is disconnected since oplock 966 to server still is disconnected since oplock
969 already released by the server in that case */ 967 already released by the server in that case */
970 if (pTcon->tidStatus != CifsNeedReconnect) { 968 if (pTcon->tidStatus != CifsNeedReconnect) {
971 rc = CIFSSMBLock(0, pTcon, netfid, 969 rc = CIFSSMBLock(0, pTcon, netfid,
972 0 /* len */ , 0 /* offset */, 0, 970 0 /* len */ , 0 /* offset */, 0,
973 0, LOCKING_ANDX_OPLOCK_RELEASE, 971 0, LOCKING_ANDX_OPLOCK_RELEASE,
974 false /* wait flag */); 972 false /* wait flag */);
975 cFYI(1, ("Oplock release rc = %d", rc)); 973 cFYI(1, ("Oplock release rc = %d", rc));
976 } 974 }
977 } else
978 spin_unlock(&GlobalMid_Lock);
979 set_current_state(TASK_INTERRUPTIBLE); 975 set_current_state(TASK_INTERRUPTIBLE);
980 schedule_timeout(1); /* yield in case q were corrupt */ 976 schedule_timeout(1); /* yield in case q were corrupt */
981 } 977 }
@@ -1001,8 +997,7 @@ static int cifs_dnotify_thread(void *dummyarg)
1001 list_for_each(tmp, &GlobalSMBSessionList) { 997 list_for_each(tmp, &GlobalSMBSessionList) {
1002 ses = list_entry(tmp, struct cifsSesInfo, 998 ses = list_entry(tmp, struct cifsSesInfo,
1003 cifsSessionList); 999 cifsSessionList);
1004 if (ses && ses->server && 1000 if (ses->server && atomic_read(&ses->server->inFlight))
1005 atomic_read(&ses->server->inFlight))
1006 wake_up_all(&ses->server->response_q); 1001 wake_up_all(&ses->server->response_q);
1007 } 1002 }
1008 read_unlock(&GlobalSMBSeslock); 1003 read_unlock(&GlobalSMBSeslock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 25a6cbd15529..135c965c4137 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
101extern const struct export_operations cifs_export_ops; 101extern const struct export_operations cifs_export_ops;
102#endif /* EXPERIMENTAL */ 102#endif /* EXPERIMENTAL */
103 103
104#define CIFS_VERSION "1.53" 104#define CIFS_VERSION "1.54"
105#endif /* _CIFSFS_H */ 105#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 409abce12732..d2a073edd1b8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -262,7 +262,7 @@
262 */ 262 */
263#define CIFS_NO_HANDLE 0xFFFF 263#define CIFS_NO_HANDLE 0xFFFF
264 264
265#define NO_CHANGE_64 cpu_to_le64(0xFFFFFFFFFFFFFFFFULL) 265#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL
266#define NO_CHANGE_32 0xFFFFFFFFUL 266#define NO_CHANGE_32 0xFFFFFFFFUL
267 267
268/* IPC$ in ASCII */ 268/* IPC$ in ASCII */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b9f5e935f821..a729d083e6f4 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -172,12 +172,13 @@ extern int CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon);
172extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon, 172extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
173 struct kstatfs *FSData); 173 struct kstatfs *FSData);
174 174
175extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, 175extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
176 const char *fileName, const FILE_BASIC_INFO *data, 176 const char *fileName, const FILE_BASIC_INFO *data,
177 const struct nls_table *nls_codepage, 177 const struct nls_table *nls_codepage,
178 int remap_special_chars); 178 int remap_special_chars);
179extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, 179extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
180 const FILE_BASIC_INFO *data, __u16 fid); 180 const FILE_BASIC_INFO *data, __u16 fid,
181 __u32 pid_of_opener);
181#if 0 182#if 0
182extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, 183extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
183 char *fileName, __u16 dos_attributes, 184 char *fileName, __u16 dos_attributes,
@@ -191,9 +192,20 @@ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon,
191extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, 192extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon,
192 __u64 size, __u16 fileHandle, __u32 opener_pid, 193 __u64 size, __u16 fileHandle, __u32 opener_pid,
193 bool AllocSizeFlag); 194 bool AllocSizeFlag);
194extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon, 195
195 char *full_path, __u64 mode, __u64 uid, 196struct cifs_unix_set_info_args {
196 __u64 gid, dev_t dev, 197 __u64 ctime;
198 __u64 atime;
199 __u64 mtime;
200 __u64 mode;
201 __u64 uid;
202 __u64 gid;
203 dev_t device;
204};
205
206extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon,
207 char *fileName,
208 const struct cifs_unix_set_info_args *args,
197 const struct nls_table *nls_codepage, 209 const struct nls_table *nls_codepage,
198 int remap_special_chars); 210 int remap_special_chars);
199 211
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c621ffa2ca90..994de7c90474 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -128,8 +128,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
128 write_lock(&GlobalSMBSeslock); 128 write_lock(&GlobalSMBSeslock);
129 list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { 129 list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
130 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 130 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
131 if (open_file) 131 open_file->invalidHandle = true;
132 open_file->invalidHandle = true;
133 } 132 }
134 write_unlock(&GlobalSMBSeslock); 133 write_unlock(&GlobalSMBSeslock);
135 /* BB Add call to invalidate_inodes(sb) for all superblocks mounted 134 /* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -4816,8 +4815,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4816 time and resort to the original setpathinfo level which takes the ancient 4815 time and resort to the original setpathinfo level which takes the ancient
4817 DOS time format with 2 second granularity */ 4816 DOS time format with 2 second granularity */
4818int 4817int
4819CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, 4818CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
4820 const FILE_BASIC_INFO *data, __u16 fid) 4819 const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener)
4821{ 4820{
4822 struct smb_com_transaction2_sfi_req *pSMB = NULL; 4821 struct smb_com_transaction2_sfi_req *pSMB = NULL;
4823 char *data_offset; 4822 char *data_offset;
@@ -4830,11 +4829,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
4830 if (rc) 4829 if (rc)
4831 return rc; 4830 return rc;
4832 4831
4833 /* At this point there is no need to override the current pid 4832 pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
4834 with the pid of the opener, but that could change if we someday 4833 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
4835 use an existing handle (rather than opening one on the fly) */
4836 /* pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
4837 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));*/
4838 4834
4839 params = 6; 4835 params = 6;
4840 pSMB->MaxSetupCount = 0; 4836 pSMB->MaxSetupCount = 0;
@@ -4882,9 +4878,9 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
4882 4878
4883 4879
4884int 4880int
4885CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, const char *fileName, 4881CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
4886 const FILE_BASIC_INFO *data, 4882 const char *fileName, const FILE_BASIC_INFO *data,
4887 const struct nls_table *nls_codepage, int remap) 4883 const struct nls_table *nls_codepage, int remap)
4888{ 4884{
4889 TRANSACTION2_SPI_REQ *pSMB = NULL; 4885 TRANSACTION2_SPI_REQ *pSMB = NULL;
4890 TRANSACTION2_SPI_RSP *pSMBr = NULL; 4886 TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5013,10 +5009,9 @@ SetAttrLgcyRetry:
5013#endif /* temporarily unneeded SetAttr legacy function */ 5009#endif /* temporarily unneeded SetAttr legacy function */
5014 5010
5015int 5011int
5016CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon, 5012CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
5017 char *fileName, __u64 mode, __u64 uid, __u64 gid, 5013 const struct cifs_unix_set_info_args *args,
5018 dev_t device, const struct nls_table *nls_codepage, 5014 const struct nls_table *nls_codepage, int remap)
5019 int remap)
5020{ 5015{
5021 TRANSACTION2_SPI_REQ *pSMB = NULL; 5016 TRANSACTION2_SPI_REQ *pSMB = NULL;
5022 TRANSACTION2_SPI_RSP *pSMBr = NULL; 5017 TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5025,6 +5020,7 @@ CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon,
5025 int bytes_returned = 0; 5020 int bytes_returned = 0;
5026 FILE_UNIX_BASIC_INFO *data_offset; 5021 FILE_UNIX_BASIC_INFO *data_offset;
5027 __u16 params, param_offset, offset, count, byte_count; 5022 __u16 params, param_offset, offset, count, byte_count;
5023 __u64 mode = args->mode;
5028 5024
5029 cFYI(1, ("In SetUID/GID/Mode")); 5025 cFYI(1, ("In SetUID/GID/Mode"));
5030setPermsRetry: 5026setPermsRetry:
@@ -5080,16 +5076,16 @@ setPermsRetry:
5080 set file size and do not want to truncate file size to zero 5076 set file size and do not want to truncate file size to zero
5081 accidently as happened on one Samba server beta by putting 5077 accidently as happened on one Samba server beta by putting
5082 zero instead of -1 here */ 5078 zero instead of -1 here */
5083 data_offset->EndOfFile = NO_CHANGE_64; 5079 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
5084 data_offset->NumOfBytes = NO_CHANGE_64; 5080 data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
5085 data_offset->LastStatusChange = NO_CHANGE_64; 5081 data_offset->LastStatusChange = cpu_to_le64(args->ctime);
5086 data_offset->LastAccessTime = NO_CHANGE_64; 5082 data_offset->LastAccessTime = cpu_to_le64(args->atime);
5087 data_offset->LastModificationTime = NO_CHANGE_64; 5083 data_offset->LastModificationTime = cpu_to_le64(args->mtime);
5088 data_offset->Uid = cpu_to_le64(uid); 5084 data_offset->Uid = cpu_to_le64(args->uid);
5089 data_offset->Gid = cpu_to_le64(gid); 5085 data_offset->Gid = cpu_to_le64(args->gid);
5090 /* better to leave device as zero when it is */ 5086 /* better to leave device as zero when it is */
5091 data_offset->DevMajor = cpu_to_le64(MAJOR(device)); 5087 data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
5092 data_offset->DevMinor = cpu_to_le64(MINOR(device)); 5088 data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
5093 data_offset->Permissions = cpu_to_le64(mode); 5089 data_offset->Permissions = cpu_to_le64(mode);
5094 5090
5095 if (S_ISREG(mode)) 5091 if (S_ISREG(mode))
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b51d5777cde6..0711db65afe8 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -151,7 +151,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
151 } 151 }
152 list_for_each(tmp, &GlobalTreeConnectionList) { 152 list_for_each(tmp, &GlobalTreeConnectionList) {
153 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 153 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
154 if ((tcon) && (tcon->ses) && (tcon->ses->server == server)) 154 if ((tcon->ses) && (tcon->ses->server == server))
155 tcon->tidStatus = CifsNeedReconnect; 155 tcon->tidStatus = CifsNeedReconnect;
156 } 156 }
157 read_unlock(&GlobalSMBSeslock); 157 read_unlock(&GlobalSMBSeslock);
@@ -173,14 +173,12 @@ cifs_reconnect(struct TCP_Server_Info *server)
173 mid_entry = list_entry(tmp, struct 173 mid_entry = list_entry(tmp, struct
174 mid_q_entry, 174 mid_q_entry,
175 qhead); 175 qhead);
176 if (mid_entry) { 176 if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
177 if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
178 /* Mark other intransit requests as needing 177 /* Mark other intransit requests as needing
179 retry so we do not immediately mark the 178 retry so we do not immediately mark the
180 session bad again (ie after we reconnect 179 session bad again (ie after we reconnect
181 below) as they timeout too */ 180 below) as they timeout too */
182 mid_entry->midState = MID_RETRY_NEEDED; 181 mid_entry->midState = MID_RETRY_NEEDED;
183 }
184 } 182 }
185 } 183 }
186 spin_unlock(&GlobalMid_Lock); 184 spin_unlock(&GlobalMid_Lock);
@@ -351,11 +349,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
351 349
352 current->flags |= PF_MEMALLOC; 350 current->flags |= PF_MEMALLOC;
353 cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current))); 351 cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
354 write_lock(&GlobalSMBSeslock); 352
355 atomic_inc(&tcpSesAllocCount); 353 length = atomic_inc_return(&tcpSesAllocCount);
356 length = tcpSesAllocCount.counter; 354 if (length > 1)
357 write_unlock(&GlobalSMBSeslock);
358 if (length > 1)
359 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 355 mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
360 GFP_KERNEL); 356 GFP_KERNEL);
361 357
@@ -745,14 +741,11 @@ multi_t2_fnd:
745 coming home not much else we can do but free the memory */ 741 coming home not much else we can do but free the memory */
746 } 742 }
747 743
748 write_lock(&GlobalSMBSeslock);
749 atomic_dec(&tcpSesAllocCount);
750 length = tcpSesAllocCount.counter;
751
752 /* last chance to mark ses pointers invalid 744 /* last chance to mark ses pointers invalid
753 if there are any pointing to this (e.g 745 if there are any pointing to this (e.g
754 if a crazy root user tried to kill cifsd 746 if a crazy root user tried to kill cifsd
755 kernel thread explicitly this might happen) */ 747 kernel thread explicitly this might happen) */
748 write_lock(&GlobalSMBSeslock);
756 list_for_each(tmp, &GlobalSMBSessionList) { 749 list_for_each(tmp, &GlobalSMBSessionList) {
757 ses = list_entry(tmp, struct cifsSesInfo, 750 ses = list_entry(tmp, struct cifsSesInfo,
758 cifsSessionList); 751 cifsSessionList);
@@ -763,6 +756,8 @@ multi_t2_fnd:
763 756
764 kfree(server->hostname); 757 kfree(server->hostname);
765 kfree(server); 758 kfree(server);
759
760 length = atomic_dec_return(&tcpSesAllocCount);
766 if (length > 0) 761 if (length > 0)
767 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 762 mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
768 GFP_KERNEL); 763 GFP_KERNEL);
@@ -3623,97 +3618,91 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3623 } 3618 }
3624 first_time = 1; 3619 first_time = 1;
3625 } 3620 }
3626 if (!rc) { 3621
3627 pSesInfo->flags = 0; 3622 if (rc)
3628 pSesInfo->capabilities = pSesInfo->server->capabilities; 3623 goto ss_err_exit;
3629 if (linuxExtEnabled == 0) 3624
3630 pSesInfo->capabilities &= (~CAP_UNIX); 3625 pSesInfo->flags = 0;
3626 pSesInfo->capabilities = pSesInfo->server->capabilities;
3627 if (linuxExtEnabled == 0)
3628 pSesInfo->capabilities &= (~CAP_UNIX);
3631 /* pSesInfo->sequence_number = 0;*/ 3629 /* pSesInfo->sequence_number = 0;*/
3632 cFYI(1, 3630 cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
3633 ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", 3631 pSesInfo->server->secMode,
3634 pSesInfo->server->secMode, 3632 pSesInfo->server->capabilities,
3635 pSesInfo->server->capabilities, 3633 pSesInfo->server->timeAdj));
3636 pSesInfo->server->timeAdj)); 3634 if (experimEnabled < 2)
3637 if (experimEnabled < 2) 3635 rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
3638 rc = CIFS_SessSetup(xid, pSesInfo, 3636 else if (extended_security
3639 first_time, nls_info); 3637 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3640 else if (extended_security 3638 && (pSesInfo->server->secType == NTLMSSP)) {
3641 && (pSesInfo->capabilities 3639 rc = -EOPNOTSUPP;
3642 & CAP_EXTENDED_SECURITY) 3640 } else if (extended_security
3643 && (pSesInfo->server->secType == NTLMSSP)) { 3641 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3644 rc = -EOPNOTSUPP; 3642 && (pSesInfo->server->secType == RawNTLMSSP)) {
3645 } else if (extended_security 3643 cFYI(1, ("NTLMSSP sesssetup"));
3646 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3644 rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
3647 && (pSesInfo->server->secType == RawNTLMSSP)) { 3645 nls_info);
3648 cFYI(1, ("NTLMSSP sesssetup")); 3646 if (!rc) {
3649 rc = CIFSNTLMSSPNegotiateSessSetup(xid, 3647 if (ntlmv2_flag) {
3650 pSesInfo, 3648 char *v2_response;
3651 &ntlmv2_flag, 3649 cFYI(1, ("more secure NTLM ver2 hash"));
3652 nls_info); 3650 if (CalcNTLMv2_partial_mac_key(pSesInfo,
3653 if (!rc) { 3651 nls_info)) {
3654 if (ntlmv2_flag) { 3652 rc = -ENOMEM;
3655 char *v2_response; 3653 goto ss_err_exit;
3656 cFYI(1, ("more secure NTLM ver2 hash")); 3654 } else
3657 if (CalcNTLMv2_partial_mac_key(pSesInfo, 3655 v2_response = kmalloc(16 + 64 /* blob*/,
3658 nls_info)) { 3656 GFP_KERNEL);
3659 rc = -ENOMEM; 3657 if (v2_response) {
3660 goto ss_err_exit; 3658 CalcNTLMv2_response(pSesInfo,
3661 } else 3659 v2_response);
3662 v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL); 3660 /* if (first_time)
3663 if (v2_response) { 3661 cifs_calculate_ntlmv2_mac_key */
3664 CalcNTLMv2_response(pSesInfo, 3662 kfree(v2_response);
3665 v2_response);
3666 /* if (first_time)
3667 cifs_calculate_ntlmv2_mac_key(
3668 pSesInfo->server->mac_signing_key,
3669 response, ntlm_session_key,*/
3670 kfree(v2_response);
3671 /* BB Put dummy sig in SessSetup PDU? */ 3663 /* BB Put dummy sig in SessSetup PDU? */
3672 } else {
3673 rc = -ENOMEM;
3674 goto ss_err_exit;
3675 }
3676
3677 } else { 3664 } else {
3678 SMBNTencrypt(pSesInfo->password, 3665 rc = -ENOMEM;
3679 pSesInfo->server->cryptKey, 3666 goto ss_err_exit;
3680 ntlm_session_key);
3681
3682 if (first_time)
3683 cifs_calculate_mac_key(
3684 &pSesInfo->server->mac_signing_key,
3685 ntlm_session_key,
3686 pSesInfo->password);
3687 } 3667 }
3668
3669 } else {
3670 SMBNTencrypt(pSesInfo->password,
3671 pSesInfo->server->cryptKey,
3672 ntlm_session_key);
3673
3674 if (first_time)
3675 cifs_calculate_mac_key(
3676 &pSesInfo->server->mac_signing_key,
3677 ntlm_session_key,
3678 pSesInfo->password);
3679 }
3688 /* for better security the weaker lanman hash not sent 3680 /* for better security the weaker lanman hash not sent
3689 in AuthSessSetup so we no longer calculate it */ 3681 in AuthSessSetup so we no longer calculate it */
3690 3682
3691 rc = CIFSNTLMSSPAuthSessSetup(xid, 3683 rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo,
3692 pSesInfo, 3684 ntlm_session_key,
3693 ntlm_session_key, 3685 ntlmv2_flag,
3694 ntlmv2_flag, 3686 nls_info);
3695 nls_info); 3687 }
3696 } 3688 } else { /* old style NTLM 0.12 session setup */
3697 } else { /* old style NTLM 0.12 session setup */ 3689 SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey,
3698 SMBNTencrypt(pSesInfo->password, 3690 ntlm_session_key);
3699 pSesInfo->server->cryptKey,
3700 ntlm_session_key);
3701 3691
3702 if (first_time) 3692 if (first_time)
3703 cifs_calculate_mac_key( 3693 cifs_calculate_mac_key(
3704 &pSesInfo->server->mac_signing_key, 3694 &pSesInfo->server->mac_signing_key,
3705 ntlm_session_key, pSesInfo->password); 3695 ntlm_session_key, pSesInfo->password);
3706 3696
3707 rc = CIFSSessSetup(xid, pSesInfo, 3697 rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
3708 ntlm_session_key, nls_info); 3698 }
3709 } 3699 if (rc) {
3710 if (rc) { 3700 cERROR(1, ("Send error in SessSetup = %d", rc));
3711 cERROR(1, ("Send error in SessSetup = %d", rc)); 3701 } else {
3712 } else { 3702 cFYI(1, ("CIFS Session Established successfully"));
3713 cFYI(1, ("CIFS Session Established successfully"));
3714 pSesInfo->status = CifsGood; 3703 pSesInfo->status = CifsGood;
3715 }
3716 } 3704 }
3705
3717ss_err_exit: 3706ss_err_exit:
3718 return rc; 3707 return rc;
3719} 3708}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fb69c1fa85c9..e962e75e6f7b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -226,23 +226,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
226 /* If Open reported that we actually created a file 226 /* If Open reported that we actually created a file
227 then we now have to set the mode if possible */ 227 then we now have to set the mode if possible */
228 if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { 228 if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
229 struct cifs_unix_set_info_args args = {
230 .mode = mode,
231 .ctime = NO_CHANGE_64,
232 .atime = NO_CHANGE_64,
233 .mtime = NO_CHANGE_64,
234 .device = 0,
235 };
236
229 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 237 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
230 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, 238 args.uid = (__u64) current->fsuid;
231 (__u64)current->fsuid, 239 if (inode->i_mode & S_ISGID)
232 (__u64)current->fsgid, 240 args.gid = (__u64) inode->i_gid;
233 0 /* dev */, 241 else
234 cifs_sb->local_nls, 242 args.gid = (__u64) current->fsgid;
235 cifs_sb->mnt_cifs_flags &
236 CIFS_MOUNT_MAP_SPECIAL_CHR);
237 } else { 243 } else {
238 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, 244 args.uid = NO_CHANGE_64;
239 (__u64)-1, 245 args.gid = NO_CHANGE_64;
240 (__u64)-1,
241 0 /* dev */,
242 cifs_sb->local_nls,
243 cifs_sb->mnt_cifs_flags &
244 CIFS_MOUNT_MAP_SPECIAL_CHR);
245 } 246 }
247 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
248 cifs_sb->local_nls,
249 cifs_sb->mnt_cifs_flags &
250 CIFS_MOUNT_MAP_SPECIAL_CHR);
246 } else { 251 } else {
247 /* BB implement mode setting via Windows security 252 /* BB implement mode setting via Windows security
248 descriptors e.g. */ 253 descriptors e.g. */
@@ -267,7 +272,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
267 (cifs_sb->mnt_cifs_flags & 272 (cifs_sb->mnt_cifs_flags &
268 CIFS_MOUNT_SET_UID)) { 273 CIFS_MOUNT_SET_UID)) {
269 newinode->i_uid = current->fsuid; 274 newinode->i_uid = current->fsuid;
270 newinode->i_gid = current->fsgid; 275 if (inode->i_mode & S_ISGID)
276 newinode->i_gid =
277 inode->i_gid;
278 else
279 newinode->i_gid =
280 current->fsgid;
271 } 281 }
272 } 282 }
273 } 283 }
@@ -357,21 +367,24 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
357 if (full_path == NULL) 367 if (full_path == NULL)
358 rc = -ENOMEM; 368 rc = -ENOMEM;
359 else if (pTcon->unix_ext) { 369 else if (pTcon->unix_ext) {
360 mode &= ~current->fs->umask; 370 struct cifs_unix_set_info_args args = {
371 .mode = mode & ~current->fs->umask,
372 .ctime = NO_CHANGE_64,
373 .atime = NO_CHANGE_64,
374 .mtime = NO_CHANGE_64,
375 .device = device_number,
376 };
361 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 377 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
362 rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, 378 args.uid = (__u64) current->fsuid;
363 mode, (__u64)current->fsuid, 379 args.gid = (__u64) current->fsgid;
364 (__u64)current->fsgid,
365 device_number, cifs_sb->local_nls,
366 cifs_sb->mnt_cifs_flags &
367 CIFS_MOUNT_MAP_SPECIAL_CHR);
368 } else { 380 } else {
369 rc = CIFSSMBUnixSetPerms(xid, pTcon, 381 args.uid = NO_CHANGE_64;
370 full_path, mode, (__u64)-1, (__u64)-1, 382 args.gid = NO_CHANGE_64;
371 device_number, cifs_sb->local_nls,
372 cifs_sb->mnt_cifs_flags &
373 CIFS_MOUNT_MAP_SPECIAL_CHR);
374 } 383 }
384 rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path,
385 &args, cifs_sb->local_nls,
386 cifs_sb->mnt_cifs_flags &
387 CIFS_MOUNT_MAP_SPECIAL_CHR);
375 388
376 if (!rc) { 389 if (!rc) {
377 rc = cifs_get_inode_info_unix(&newinode, full_path, 390 rc = cifs_get_inode_info_unix(&newinode, full_path,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0aac824371a5..ff14d14903a0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -310,18 +310,19 @@ int cifs_open(struct inode *inode, struct file *file)
310 /* time to set mode which we can not set earlier due to 310 /* time to set mode which we can not set earlier due to
311 problems creating new read-only files */ 311 problems creating new read-only files */
312 if (pTcon->unix_ext) { 312 if (pTcon->unix_ext) {
313 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 313 struct cifs_unix_set_info_args args = {
314 inode->i_mode, 314 .mode = inode->i_mode,
315 (__u64)-1, (__u64)-1, 0 /* dev */, 315 .uid = NO_CHANGE_64,
316 .gid = NO_CHANGE_64,
317 .ctime = NO_CHANGE_64,
318 .atime = NO_CHANGE_64,
319 .mtime = NO_CHANGE_64,
320 .device = 0,
321 };
322 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
316 cifs_sb->local_nls, 323 cifs_sb->local_nls,
317 cifs_sb->mnt_cifs_flags & 324 cifs_sb->mnt_cifs_flags &
318 CIFS_MOUNT_MAP_SPECIAL_CHR); 325 CIFS_MOUNT_MAP_SPECIAL_CHR);
319 } else {
320 /* BB implement via Windows security descriptors eg
321 CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
322 -1, -1, local_nls);
323 in the meantime could set r/o dos attribute when
324 perms are eg: mode & 0222 == 0 */
325 } 326 }
326 } 327 }
327 328
@@ -1280,7 +1281,7 @@ retry:
1280 1281
1281 if (first < 0) 1282 if (first < 0)
1282 lock_page(page); 1283 lock_page(page);
1283 else if (TestSetPageLocked(page)) 1284 else if (!trylock_page(page))
1284 break; 1285 break;
1285 1286
1286 if (unlikely(page->mapping != mapping)) { 1287 if (unlikely(page->mapping != mapping)) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 46e54d39461d..28a22092d450 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -737,7 +737,7 @@ psx_del_no_retry:
737 /* ATTRS set to normal clears r/o bit */ 737 /* ATTRS set to normal clears r/o bit */
738 pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL); 738 pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL);
739 if (!(pTcon->ses->flags & CIFS_SES_NT4)) 739 if (!(pTcon->ses->flags & CIFS_SES_NT4))
740 rc = CIFSSMBSetTimes(xid, pTcon, full_path, 740 rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
741 pinfo_buf, 741 pinfo_buf,
742 cifs_sb->local_nls, 742 cifs_sb->local_nls,
743 cifs_sb->mnt_cifs_flags & 743 cifs_sb->mnt_cifs_flags &
@@ -767,9 +767,10 @@ psx_del_no_retry:
767 cifs_sb->mnt_cifs_flags & 767 cifs_sb->mnt_cifs_flags &
768 CIFS_MOUNT_MAP_SPECIAL_CHR); 768 CIFS_MOUNT_MAP_SPECIAL_CHR);
769 if (rc == 0) { 769 if (rc == 0) {
770 rc = CIFSSMBSetFileTimes(xid, pTcon, 770 rc = CIFSSMBSetFileInfo(xid, pTcon,
771 pinfo_buf, 771 pinfo_buf,
772 netfid); 772 netfid,
773 current->tgid);
773 CIFSSMBClose(xid, pTcon, netfid); 774 CIFSSMBClose(xid, pTcon, netfid);
774 } 775 }
775 } 776 }
@@ -984,32 +985,41 @@ mkdir_get_info:
984 * failed to get it from the server or was set bogus */ 985 * failed to get it from the server or was set bogus */
985 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) 986 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
986 direntry->d_inode->i_nlink = 2; 987 direntry->d_inode->i_nlink = 2;
988
987 mode &= ~current->fs->umask; 989 mode &= ~current->fs->umask;
990 /* must turn on setgid bit if parent dir has it */
991 if (inode->i_mode & S_ISGID)
992 mode |= S_ISGID;
993
988 if (pTcon->unix_ext) { 994 if (pTcon->unix_ext) {
995 struct cifs_unix_set_info_args args = {
996 .mode = mode,
997 .ctime = NO_CHANGE_64,
998 .atime = NO_CHANGE_64,
999 .mtime = NO_CHANGE_64,
1000 .device = 0,
1001 };
989 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 1002 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
990 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 1003 args.uid = (__u64)current->fsuid;
991 mode, 1004 if (inode->i_mode & S_ISGID)
992 (__u64)current->fsuid, 1005 args.gid = (__u64)inode->i_gid;
993 (__u64)current->fsgid, 1006 else
994 0 /* dev_t */, 1007 args.gid = (__u64)current->fsgid;
995 cifs_sb->local_nls,
996 cifs_sb->mnt_cifs_flags &
997 CIFS_MOUNT_MAP_SPECIAL_CHR);
998 } else { 1008 } else {
999 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 1009 args.uid = NO_CHANGE_64;
1000 mode, (__u64)-1, 1010 args.gid = NO_CHANGE_64;
1001 (__u64)-1, 0 /* dev_t */,
1002 cifs_sb->local_nls,
1003 cifs_sb->mnt_cifs_flags &
1004 CIFS_MOUNT_MAP_SPECIAL_CHR);
1005 } 1011 }
1012 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
1013 cifs_sb->local_nls,
1014 cifs_sb->mnt_cifs_flags &
1015 CIFS_MOUNT_MAP_SPECIAL_CHR);
1006 } else { 1016 } else {
1007 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && 1017 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
1008 (mode & S_IWUGO) == 0) { 1018 (mode & S_IWUGO) == 0) {
1009 FILE_BASIC_INFO pInfo; 1019 FILE_BASIC_INFO pInfo;
1010 memset(&pInfo, 0, sizeof(pInfo)); 1020 memset(&pInfo, 0, sizeof(pInfo));
1011 pInfo.Attributes = cpu_to_le32(ATTR_READONLY); 1021 pInfo.Attributes = cpu_to_le32(ATTR_READONLY);
1012 CIFSSMBSetTimes(xid, pTcon, full_path, 1022 CIFSSMBSetPathInfo(xid, pTcon, full_path,
1013 &pInfo, cifs_sb->local_nls, 1023 &pInfo, cifs_sb->local_nls,
1014 cifs_sb->mnt_cifs_flags & 1024 cifs_sb->mnt_cifs_flags &
1015 CIFS_MOUNT_MAP_SPECIAL_CHR); 1025 CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1024,8 +1034,12 @@ mkdir_get_info:
1024 CIFS_MOUNT_SET_UID) { 1034 CIFS_MOUNT_SET_UID) {
1025 direntry->d_inode->i_uid = 1035 direntry->d_inode->i_uid =
1026 current->fsuid; 1036 current->fsuid;
1027 direntry->d_inode->i_gid = 1037 if (inode->i_mode & S_ISGID)
1028 current->fsgid; 1038 direntry->d_inode->i_gid =
1039 inode->i_gid;
1040 else
1041 direntry->d_inode->i_gid =
1042 current->fsgid;
1029 } 1043 }
1030 } 1044 }
1031 } 1045 }
@@ -1310,10 +1324,11 @@ int cifs_revalidate(struct dentry *direntry)
1310/* if (S_ISDIR(direntry->d_inode->i_mode)) 1324/* if (S_ISDIR(direntry->d_inode->i_mode))
1311 shrink_dcache_parent(direntry); */ 1325 shrink_dcache_parent(direntry); */
1312 if (S_ISREG(direntry->d_inode->i_mode)) { 1326 if (S_ISREG(direntry->d_inode->i_mode)) {
1313 if (direntry->d_inode->i_mapping) 1327 if (direntry->d_inode->i_mapping) {
1314 wbrc = filemap_fdatawait(direntry->d_inode->i_mapping); 1328 wbrc = filemap_fdatawait(direntry->d_inode->i_mapping);
1315 if (wbrc) 1329 if (wbrc)
1316 CIFS_I(direntry->d_inode)->write_behind_rc = wbrc; 1330 CIFS_I(direntry->d_inode)->write_behind_rc = wbrc;
1331 }
1317 /* may eventually have to do this for open files too */ 1332 /* may eventually have to do this for open files too */
1318 if (list_empty(&(cifsInode->openFileList))) { 1333 if (list_empty(&(cifsInode->openFileList))) {
1319 /* changed on server - flush read ahead pages */ 1334 /* changed on server - flush read ahead pages */
@@ -1489,30 +1504,228 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1489 return rc; 1504 return rc;
1490} 1505}
1491 1506
1492int cifs_setattr(struct dentry *direntry, struct iattr *attrs) 1507static int
1508cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
1509 char *full_path, __u32 dosattr)
1510{
1511 int rc;
1512 int oplock = 0;
1513 __u16 netfid;
1514 __u32 netpid;
1515 bool set_time = false;
1516 struct cifsFileInfo *open_file;
1517 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1518 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1519 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1520 FILE_BASIC_INFO info_buf;
1521
1522 if (attrs->ia_valid & ATTR_ATIME) {
1523 set_time = true;
1524 info_buf.LastAccessTime =
1525 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
1526 } else
1527 info_buf.LastAccessTime = 0;
1528
1529 if (attrs->ia_valid & ATTR_MTIME) {
1530 set_time = true;
1531 info_buf.LastWriteTime =
1532 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
1533 } else
1534 info_buf.LastWriteTime = 0;
1535
1536 /*
1537 * Samba throws this field away, but windows may actually use it.
1538 * Do not set ctime unless other time stamps are changed explicitly
1539 * (i.e. by utimes()) since we would then have a mix of client and
1540 * server times.
1541 */
1542 if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
1543 cFYI(1, ("CIFS - CTIME changed"));
1544 info_buf.ChangeTime =
1545 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
1546 } else
1547 info_buf.ChangeTime = 0;
1548
1549 info_buf.CreationTime = 0; /* don't change */
1550 info_buf.Attributes = cpu_to_le32(dosattr);
1551
1552 /*
1553 * If the file is already open for write, just use that fileid
1554 */
1555 open_file = find_writable_file(cifsInode);
1556 if (open_file) {
1557 netfid = open_file->netfid;
1558 netpid = open_file->pid;
1559 goto set_via_filehandle;
1560 }
1561
1562 /*
1563 * NT4 apparently returns success on this call, but it doesn't
1564 * really work.
1565 */
1566 if (!(pTcon->ses->flags & CIFS_SES_NT4)) {
1567 rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
1568 &info_buf, cifs_sb->local_nls,
1569 cifs_sb->mnt_cifs_flags &
1570 CIFS_MOUNT_MAP_SPECIAL_CHR);
1571 if (rc != -EOPNOTSUPP && rc != -EINVAL)
1572 goto out;
1573 }
1574
1575 cFYI(1, ("calling SetFileInfo since SetPathInfo for "
1576 "times not supported by this server"));
1577 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
1578 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
1579 CREATE_NOT_DIR, &netfid, &oplock,
1580 NULL, cifs_sb->local_nls,
1581 cifs_sb->mnt_cifs_flags &
1582 CIFS_MOUNT_MAP_SPECIAL_CHR);
1583
1584 if (rc != 0) {
1585 if (rc == -EIO)
1586 rc = -EINVAL;
1587 goto out;
1588 }
1589
1590 netpid = current->tgid;
1591
1592set_via_filehandle:
1593 rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid);
1594 if (open_file == NULL)
1595 CIFSSMBClose(xid, pTcon, netfid);
1596 else
1597 atomic_dec(&open_file->wrtPending);
1598out:
1599 return rc;
1600}
1601
1602static int
1603cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1493{ 1604{
1605 int rc;
1494 int xid; 1606 int xid;
1495 struct cifs_sb_info *cifs_sb;
1496 struct cifsTconInfo *pTcon;
1497 char *full_path = NULL; 1607 char *full_path = NULL;
1498 int rc = -EACCES;
1499 FILE_BASIC_INFO time_buf;
1500 bool set_time = false;
1501 bool set_dosattr = false;
1502 __u64 mode = 0xFFFFFFFFFFFFFFFFULL;
1503 __u64 uid = 0xFFFFFFFFFFFFFFFFULL;
1504 __u64 gid = 0xFFFFFFFFFFFFFFFFULL;
1505 struct cifsInodeInfo *cifsInode;
1506 struct inode *inode = direntry->d_inode; 1608 struct inode *inode = direntry->d_inode;
1609 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1610 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1611 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1612 struct cifs_unix_set_info_args *args = NULL;
1613
1614 cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x",
1615 direntry->d_name.name, attrs->ia_valid));
1616
1617 xid = GetXid();
1618
1619 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
1620 /* check if we have permission to change attrs */
1621 rc = inode_change_ok(inode, attrs);
1622 if (rc < 0)
1623 goto out;
1624 else
1625 rc = 0;
1626 }
1627
1628 full_path = build_path_from_dentry(direntry);
1629 if (full_path == NULL) {
1630 rc = -ENOMEM;
1631 goto out;
1632 }
1633
1634 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
1635 /*
1636 Flush data before changing file size or changing the last
1637 write time of the file on the server. If the
1638 flush returns error, store it to report later and continue.
1639 BB: This should be smarter. Why bother flushing pages that
1640 will be truncated anyway? Also, should we error out here if
1641 the flush returns error?
1642 */
1643 rc = filemap_write_and_wait(inode->i_mapping);
1644 if (rc != 0) {
1645 cifsInode->write_behind_rc = rc;
1646 rc = 0;
1647 }
1648 }
1649
1650 if (attrs->ia_valid & ATTR_SIZE) {
1651 rc = cifs_set_file_size(inode, attrs, xid, full_path);
1652 if (rc != 0)
1653 goto out;
1654 }
1655
1656 /* skip mode change if it's just for clearing setuid/setgid */
1657 if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
1658 attrs->ia_valid &= ~ATTR_MODE;
1659
1660 args = kmalloc(sizeof(*args), GFP_KERNEL);
1661 if (args == NULL) {
1662 rc = -ENOMEM;
1663 goto out;
1664 }
1665
1666 /* set up the struct */
1667 if (attrs->ia_valid & ATTR_MODE)
1668 args->mode = attrs->ia_mode;
1669 else
1670 args->mode = NO_CHANGE_64;
1671
1672 if (attrs->ia_valid & ATTR_UID)
1673 args->uid = attrs->ia_uid;
1674 else
1675 args->uid = NO_CHANGE_64;
1676
1677 if (attrs->ia_valid & ATTR_GID)
1678 args->gid = attrs->ia_gid;
1679 else
1680 args->gid = NO_CHANGE_64;
1681
1682 if (attrs->ia_valid & ATTR_ATIME)
1683 args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
1684 else
1685 args->atime = NO_CHANGE_64;
1686
1687 if (attrs->ia_valid & ATTR_MTIME)
1688 args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime);
1689 else
1690 args->mtime = NO_CHANGE_64;
1691
1692 if (attrs->ia_valid & ATTR_CTIME)
1693 args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime);
1694 else
1695 args->ctime = NO_CHANGE_64;
1696
1697 args->device = 0;
1698 rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args,
1699 cifs_sb->local_nls,
1700 cifs_sb->mnt_cifs_flags &
1701 CIFS_MOUNT_MAP_SPECIAL_CHR);
1702
1703 if (!rc)
1704 rc = inode_setattr(inode, attrs);
1705out:
1706 kfree(args);
1707 kfree(full_path);
1708 FreeXid(xid);
1709 return rc;
1710}
1711
1712static int
1713cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1714{
1715 int xid;
1716 struct inode *inode = direntry->d_inode;
1717 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1718 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1719 char *full_path = NULL;
1720 int rc = -EACCES;
1721 __u32 dosattr = 0;
1722 __u64 mode = NO_CHANGE_64;
1507 1723
1508 xid = GetXid(); 1724 xid = GetXid();
1509 1725
1510 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x", 1726 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
1511 direntry->d_name.name, attrs->ia_valid)); 1727 direntry->d_name.name, attrs->ia_valid));
1512 1728
1513 cifs_sb = CIFS_SB(inode->i_sb);
1514 pTcon = cifs_sb->tcon;
1515
1516 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { 1729 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
1517 /* check if we have permission to change attrs */ 1730 /* check if we have permission to change attrs */
1518 rc = inode_change_ok(inode, attrs); 1731 rc = inode_change_ok(inode, attrs);
@@ -1528,7 +1741,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1528 FreeXid(xid); 1741 FreeXid(xid);
1529 return -ENOMEM; 1742 return -ENOMEM;
1530 } 1743 }
1531 cifsInode = CIFS_I(inode);
1532 1744
1533 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { 1745 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
1534 /* 1746 /*
@@ -1559,21 +1771,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1559 * CIFSACL support + proper Windows to Unix idmapping, we may be 1771 * CIFSACL support + proper Windows to Unix idmapping, we may be
1560 * able to support this in the future. 1772 * able to support this in the future.
1561 */ 1773 */
1562 if (!pTcon->unix_ext && 1774 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID))
1563 !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
1564 attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); 1775 attrs->ia_valid &= ~(ATTR_UID | ATTR_GID);
1565 } else {
1566 if (attrs->ia_valid & ATTR_UID) {
1567 cFYI(1, ("UID changed to %d", attrs->ia_uid));
1568 uid = attrs->ia_uid;
1569 }
1570 if (attrs->ia_valid & ATTR_GID) {
1571 cFYI(1, ("GID changed to %d", attrs->ia_gid));
1572 gid = attrs->ia_gid;
1573 }
1574 }
1575
1576 time_buf.Attributes = 0;
1577 1776
1578 /* skip mode change if it's just for clearing setuid/setgid */ 1777 /* skip mode change if it's just for clearing setuid/setgid */
1579 if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 1778 if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
@@ -1584,13 +1783,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1584 mode = attrs->ia_mode; 1783 mode = attrs->ia_mode;
1585 } 1784 }
1586 1785
1587 if ((pTcon->unix_ext) 1786 if (attrs->ia_valid & ATTR_MODE) {
1588 && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID)))
1589 rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, uid, gid,
1590 0 /* dev_t */, cifs_sb->local_nls,
1591 cifs_sb->mnt_cifs_flags &
1592 CIFS_MOUNT_MAP_SPECIAL_CHR);
1593 else if (attrs->ia_valid & ATTR_MODE) {
1594 rc = 0; 1787 rc = 0;
1595#ifdef CONFIG_CIFS_EXPERIMENTAL 1788#ifdef CONFIG_CIFS_EXPERIMENTAL
1596 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) 1789 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
@@ -1599,24 +1792,19 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1599#endif 1792#endif
1600 if (((mode & S_IWUGO) == 0) && 1793 if (((mode & S_IWUGO) == 0) &&
1601 (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { 1794 (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
1602 set_dosattr = true; 1795
1603 time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | 1796 dosattr = cifsInode->cifsAttrs | ATTR_READONLY;
1604 ATTR_READONLY); 1797
1605 /* fix up mode if we're not using dynperm */ 1798 /* fix up mode if we're not using dynperm */
1606 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) 1799 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
1607 attrs->ia_mode = inode->i_mode & ~S_IWUGO; 1800 attrs->ia_mode = inode->i_mode & ~S_IWUGO;
1608 } else if ((mode & S_IWUGO) && 1801 } else if ((mode & S_IWUGO) &&
1609 (cifsInode->cifsAttrs & ATTR_READONLY)) { 1802 (cifsInode->cifsAttrs & ATTR_READONLY)) {
1610 /* If file is readonly on server, we would 1803
1611 not be able to write to it - so if any write 1804 dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY;
1612 bit is enabled for user or group or other we 1805 /* Attributes of 0 are ignored */
1613 need to at least try to remove r/o dos attr */ 1806 if (dosattr == 0)
1614 set_dosattr = true; 1807 dosattr |= ATTR_NORMAL;
1615 time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs &
1616 (~ATTR_READONLY));
1617 /* Windows ignores set to zero */
1618 if (time_buf.Attributes == 0)
1619 time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
1620 1808
1621 /* reset local inode permissions to normal */ 1809 /* reset local inode permissions to normal */
1622 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { 1810 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
@@ -1634,82 +1822,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1634 } 1822 }
1635 } 1823 }
1636 1824
1637 if (attrs->ia_valid & ATTR_ATIME) { 1825 if (attrs->ia_valid & (ATTR_MTIME|ATTR_ATIME|ATTR_CTIME) ||
1638 set_time = true; 1826 ((attrs->ia_valid & ATTR_MODE) && dosattr)) {
1639 time_buf.LastAccessTime = 1827 rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr);
1640 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); 1828 /* BB: check for rc = -EOPNOTSUPP and switch to legacy mode */
1641 } else
1642 time_buf.LastAccessTime = 0;
1643
1644 if (attrs->ia_valid & ATTR_MTIME) {
1645 set_time = true;
1646 time_buf.LastWriteTime =
1647 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
1648 } else
1649 time_buf.LastWriteTime = 0;
1650 /* Do not set ctime explicitly unless other time
1651 stamps are changed explicitly (i.e. by utime()
1652 since we would then have a mix of client and
1653 server times */
1654 1829
1655 if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
1656 set_time = true;
1657 /* Although Samba throws this field away
1658 it may be useful to Windows - but we do
1659 not want to set ctime unless some other
1660 timestamp is changing */
1661 cFYI(1, ("CIFS - CTIME changed"));
1662 time_buf.ChangeTime =
1663 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
1664 } else
1665 time_buf.ChangeTime = 0;
1666
1667 if (set_time || set_dosattr) {
1668 time_buf.CreationTime = 0; /* do not change */
1669 /* In the future we should experiment - try setting timestamps
1670 via Handle (SetFileInfo) instead of by path */
1671 if (!(pTcon->ses->flags & CIFS_SES_NT4))
1672 rc = CIFSSMBSetTimes(xid, pTcon, full_path, &time_buf,
1673 cifs_sb->local_nls,
1674 cifs_sb->mnt_cifs_flags &
1675 CIFS_MOUNT_MAP_SPECIAL_CHR);
1676 else
1677 rc = -EOPNOTSUPP;
1678
1679 if (rc == -EOPNOTSUPP) {
1680 int oplock = 0;
1681 __u16 netfid;
1682
1683 cFYI(1, ("calling SetFileInfo since SetPathInfo for "
1684 "times not supported by this server"));
1685 /* BB we could scan to see if we already have it open
1686 and pass in pid of opener to function */
1687 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
1688 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
1689 CREATE_NOT_DIR, &netfid, &oplock,
1690 NULL, cifs_sb->local_nls,
1691 cifs_sb->mnt_cifs_flags &
1692 CIFS_MOUNT_MAP_SPECIAL_CHR);
1693 if (rc == 0) {
1694 rc = CIFSSMBSetFileTimes(xid, pTcon, &time_buf,
1695 netfid);
1696 CIFSSMBClose(xid, pTcon, netfid);
1697 } else {
1698 /* BB For even older servers we could convert time_buf
1699 into old DOS style which uses two second
1700 granularity */
1701
1702 /* rc = CIFSSMBSetTimesLegacy(xid, pTcon, full_path,
1703 &time_buf, cifs_sb->local_nls); */
1704 }
1705 }
1706 /* Even if error on time set, no sense failing the call if 1830 /* Even if error on time set, no sense failing the call if
1707 the server would set the time to a reasonable value anyway, 1831 the server would set the time to a reasonable value anyway,
1708 and this check ensures that we are not being called from 1832 and this check ensures that we are not being called from
1709 sys_utimes in which case we ought to fail the call back to 1833 sys_utimes in which case we ought to fail the call back to
1710 the user when the server rejects the call */ 1834 the user when the server rejects the call */
1711 if ((rc) && (attrs->ia_valid & 1835 if ((rc) && (attrs->ia_valid &
1712 (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE))) 1836 (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
1713 rc = 0; 1837 rc = 0;
1714 } 1838 }
1715 1839
@@ -1723,6 +1847,21 @@ cifs_setattr_exit:
1723 return rc; 1847 return rc;
1724} 1848}
1725 1849
1850int
1851cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1852{
1853 struct inode *inode = direntry->d_inode;
1854 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1855 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1856
1857 if (pTcon->unix_ext)
1858 return cifs_setattr_unix(direntry, attrs);
1859
1860 return cifs_setattr_nounix(direntry, attrs);
1861
1862 /* BB: add cifs_setattr_legacy for really old servers */
1863}
1864
1726#if 0 1865#if 0
1727void cifs_delete_inode(struct inode *inode) 1866void cifs_delete_inode(struct inode *inode)
1728{ 1867{
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 000ac509c98a..e286db9f5ee2 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -265,6 +265,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
265 cFYI(1, ("Sending smb: total_len %d", total_len)); 265 cFYI(1, ("Sending smb: total_len %d", total_len));
266 dump_smb(smb_buffer, len); 266 dump_smb(smb_buffer, len);
267 267
268 i = 0;
268 while (total_len) { 269 while (total_len) {
269 rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], 270 rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec],
270 n_vec - first_vec, total_len); 271 n_vec - first_vec, total_len);
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da015c12e3ea..762d287123ca 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,8 +49,10 @@ struct configfs_dirent {
49#define CONFIGFS_USET_DEFAULT 0x0080 49#define CONFIGFS_USET_DEFAULT 0x0080
50#define CONFIGFS_USET_DROPPING 0x0100 50#define CONFIGFS_USET_DROPPING 0x0100
51#define CONFIGFS_USET_IN_MKDIR 0x0200 51#define CONFIGFS_USET_IN_MKDIR 0x0200
52#define CONFIGFS_USET_CREATING 0x0400
52#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) 53#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR)
53 54
55extern struct mutex configfs_symlink_mutex;
54extern spinlock_t configfs_dirent_lock; 56extern spinlock_t configfs_dirent_lock;
55 57
56extern struct vfsmount * configfs_mount; 58extern struct vfsmount * configfs_mount;
@@ -66,6 +68,7 @@ extern void configfs_inode_exit(void);
66extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); 68extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
67extern int configfs_make_dirent(struct configfs_dirent *, 69extern int configfs_make_dirent(struct configfs_dirent *,
68 struct dentry *, void *, umode_t, int); 70 struct dentry *, void *, umode_t, int);
71extern int configfs_dirent_is_ready(struct configfs_dirent *);
69 72
70extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int); 73extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
71extern void configfs_hash_and_remove(struct dentry * dir, const char * name); 74extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063a..7a8db78a91d2 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
185 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); 185 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
186 if (!error) 186 if (!error)
187 error = configfs_make_dirent(p->d_fsdata, d, k, mode, 187 error = configfs_make_dirent(p->d_fsdata, d, k, mode,
188 CONFIGFS_DIR); 188 CONFIGFS_DIR | CONFIGFS_USET_CREATING);
189 if (!error) { 189 if (!error) {
190 error = configfs_create(d, mode, init_dir); 190 error = configfs_create(d, mode, init_dir);
191 if (!error) { 191 if (!error) {
@@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p,
209 * configfs_create_dir - create a directory for an config_item. 209 * configfs_create_dir - create a directory for an config_item.
210 * @item: config_itemwe're creating directory for. 210 * @item: config_itemwe're creating directory for.
211 * @dentry: config_item's dentry. 211 * @dentry: config_item's dentry.
212 *
213 * Note: user-created entries won't be allowed under this new directory
214 * until it is validated by configfs_dir_set_ready()
212 */ 215 */
213 216
214static int configfs_create_dir(struct config_item * item, struct dentry *dentry) 217static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
@@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
231 return error; 234 return error;
232} 235}
233 236
237/*
238 * Allow userspace to create new entries under a new directory created with
239 * configfs_create_dir(), and under all of its chidlren directories recursively.
240 * @sd configfs_dirent of the new directory to validate
241 *
242 * Caller must hold configfs_dirent_lock.
243 */
244static void configfs_dir_set_ready(struct configfs_dirent *sd)
245{
246 struct configfs_dirent *child_sd;
247
248 sd->s_type &= ~CONFIGFS_USET_CREATING;
249 list_for_each_entry(child_sd, &sd->s_children, s_sibling)
250 if (child_sd->s_type & CONFIGFS_USET_CREATING)
251 configfs_dir_set_ready(child_sd);
252}
253
254/*
255 * Check that a directory does not belong to a directory hierarchy being
256 * attached and not validated yet.
257 * @sd configfs_dirent of the directory to check
258 *
259 * @return non-zero iff the directory was validated
260 *
261 * Note: takes configfs_dirent_lock, so the result may change from false to true
262 * in two consecutive calls, but never from true to false.
263 */
264int configfs_dirent_is_ready(struct configfs_dirent *sd)
265{
266 int ret;
267
268 spin_lock(&configfs_dirent_lock);
269 ret = !(sd->s_type & CONFIGFS_USET_CREATING);
270 spin_unlock(&configfs_dirent_lock);
271
272 return ret;
273}
274
234int configfs_create_link(struct configfs_symlink *sl, 275int configfs_create_link(struct configfs_symlink *sl,
235 struct dentry *parent, 276 struct dentry *parent,
236 struct dentry *dentry) 277 struct dentry *dentry)
@@ -283,6 +324,8 @@ static void remove_dir(struct dentry * d)
283 * The only thing special about this is that we remove any files in 324 * The only thing special about this is that we remove any files in
284 * the directory before we remove the directory, and we've inlined 325 * the directory before we remove the directory, and we've inlined
285 * what used to be configfs_rmdir() below, instead of calling separately. 326 * what used to be configfs_rmdir() below, instead of calling separately.
327 *
328 * Caller holds the mutex of the item's inode
286 */ 329 */
287 330
288static void configfs_remove_dir(struct config_item * item) 331static void configfs_remove_dir(struct config_item * item)
@@ -330,7 +373,19 @@ static struct dentry * configfs_lookup(struct inode *dir,
330 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; 373 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
331 struct configfs_dirent * sd; 374 struct configfs_dirent * sd;
332 int found = 0; 375 int found = 0;
333 int err = 0; 376 int err;
377
378 /*
379 * Fake invisibility if dir belongs to a group/default groups hierarchy
380 * being attached
381 *
382 * This forbids userspace to read/write attributes of items which may
383 * not complete their initialization, since the dentries of the
384 * attributes won't be instantiated.
385 */
386 err = -ENOENT;
387 if (!configfs_dirent_is_ready(parent_sd))
388 goto out;
334 389
335 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 390 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
336 if (sd->s_type & CONFIGFS_NOT_PINNED) { 391 if (sd->s_type & CONFIGFS_NOT_PINNED) {
@@ -353,6 +408,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
353 return simple_lookup(dir, dentry, nd); 408 return simple_lookup(dir, dentry, nd);
354 } 409 }
355 410
411out:
356 return ERR_PTR(err); 412 return ERR_PTR(err);
357} 413}
358 414
@@ -370,13 +426,17 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
370 struct configfs_dirent *sd; 426 struct configfs_dirent *sd;
371 int ret; 427 int ret;
372 428
429 /* Mark that we're trying to drop the group */
430 parent_sd->s_type |= CONFIGFS_USET_DROPPING;
431
373 ret = -EBUSY; 432 ret = -EBUSY;
374 if (!list_empty(&parent_sd->s_links)) 433 if (!list_empty(&parent_sd->s_links))
375 goto out; 434 goto out;
376 435
377 ret = 0; 436 ret = 0;
378 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 437 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
379 if (sd->s_type & CONFIGFS_NOT_PINNED) 438 if (!sd->s_element ||
439 (sd->s_type & CONFIGFS_NOT_PINNED))
380 continue; 440 continue;
381 if (sd->s_type & CONFIGFS_USET_DEFAULT) { 441 if (sd->s_type & CONFIGFS_USET_DEFAULT) {
382 /* Abort if racing with mkdir() */ 442 /* Abort if racing with mkdir() */
@@ -385,8 +445,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
385 *wait_mutex = &sd->s_dentry->d_inode->i_mutex; 445 *wait_mutex = &sd->s_dentry->d_inode->i_mutex;
386 return -EAGAIN; 446 return -EAGAIN;
387 } 447 }
388 /* Mark that we're trying to drop the group */
389 sd->s_type |= CONFIGFS_USET_DROPPING;
390 448
391 /* 449 /*
392 * Yup, recursive. If there's a problem, blame 450 * Yup, recursive. If there's a problem, blame
@@ -414,12 +472,11 @@ static void configfs_detach_rollback(struct dentry *dentry)
414 struct configfs_dirent *parent_sd = dentry->d_fsdata; 472 struct configfs_dirent *parent_sd = dentry->d_fsdata;
415 struct configfs_dirent *sd; 473 struct configfs_dirent *sd;
416 474
417 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 475 parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
418 if (sd->s_type & CONFIGFS_USET_DEFAULT) { 476
477 list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
478 if (sd->s_type & CONFIGFS_USET_DEFAULT)
419 configfs_detach_rollback(sd->s_dentry); 479 configfs_detach_rollback(sd->s_dentry);
420 sd->s_type &= ~CONFIGFS_USET_DROPPING;
421 }
422 }
423} 480}
424 481
425static void detach_attrs(struct config_item * item) 482static void detach_attrs(struct config_item * item)
@@ -558,36 +615,21 @@ static int create_default_group(struct config_group *parent_group,
558static int populate_groups(struct config_group *group) 615static int populate_groups(struct config_group *group)
559{ 616{
560 struct config_group *new_group; 617 struct config_group *new_group;
561 struct dentry *dentry = group->cg_item.ci_dentry;
562 int ret = 0; 618 int ret = 0;
563 int i; 619 int i;
564 620
565 if (group->default_groups) { 621 if (group->default_groups) {
566 /*
567 * FYI, we're faking mkdir here
568 * I'm not sure we need this semaphore, as we're called
569 * from our parent's mkdir. That holds our parent's
570 * i_mutex, so afaik lookup cannot continue through our
571 * parent to find us, let alone mess with our tree.
572 * That said, taking our i_mutex is closer to mkdir
573 * emulation, and shouldn't hurt.
574 */
575 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
576
577 for (i = 0; group->default_groups[i]; i++) { 622 for (i = 0; group->default_groups[i]; i++) {
578 new_group = group->default_groups[i]; 623 new_group = group->default_groups[i];
579 624
580 ret = create_default_group(group, new_group); 625 ret = create_default_group(group, new_group);
581 if (ret) 626 if (ret) {
627 detach_groups(group);
582 break; 628 break;
629 }
583 } 630 }
584
585 mutex_unlock(&dentry->d_inode->i_mutex);
586 } 631 }
587 632
588 if (ret)
589 detach_groups(group);
590
591 return ret; 633 return ret;
592} 634}
593 635
@@ -702,7 +744,15 @@ static int configfs_attach_item(struct config_item *parent_item,
702 if (!ret) { 744 if (!ret) {
703 ret = populate_attrs(item); 745 ret = populate_attrs(item);
704 if (ret) { 746 if (ret) {
747 /*
748 * We are going to remove an inode and its dentry but
749 * the VFS may already have hit and used them. Thus,
750 * we must lock them as rmdir() would.
751 */
752 mutex_lock(&dentry->d_inode->i_mutex);
705 configfs_remove_dir(item); 753 configfs_remove_dir(item);
754 dentry->d_inode->i_flags |= S_DEAD;
755 mutex_unlock(&dentry->d_inode->i_mutex);
706 d_delete(dentry); 756 d_delete(dentry);
707 } 757 }
708 } 758 }
@@ -710,6 +760,7 @@ static int configfs_attach_item(struct config_item *parent_item,
710 return ret; 760 return ret;
711} 761}
712 762
763/* Caller holds the mutex of the item's inode */
713static void configfs_detach_item(struct config_item *item) 764static void configfs_detach_item(struct config_item *item)
714{ 765{
715 detach_attrs(item); 766 detach_attrs(item);
@@ -728,16 +779,30 @@ static int configfs_attach_group(struct config_item *parent_item,
728 sd = dentry->d_fsdata; 779 sd = dentry->d_fsdata;
729 sd->s_type |= CONFIGFS_USET_DIR; 780 sd->s_type |= CONFIGFS_USET_DIR;
730 781
782 /*
783 * FYI, we're faking mkdir in populate_groups()
784 * We must lock the group's inode to avoid races with the VFS
785 * which can already hit the inode and try to add/remove entries
786 * under it.
787 *
788 * We must also lock the inode to remove it safely in case of
789 * error, as rmdir() would.
790 */
791 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
731 ret = populate_groups(to_config_group(item)); 792 ret = populate_groups(to_config_group(item));
732 if (ret) { 793 if (ret) {
733 configfs_detach_item(item); 794 configfs_detach_item(item);
734 d_delete(dentry); 795 dentry->d_inode->i_flags |= S_DEAD;
735 } 796 }
797 mutex_unlock(&dentry->d_inode->i_mutex);
798 if (ret)
799 d_delete(dentry);
736 } 800 }
737 801
738 return ret; 802 return ret;
739} 803}
740 804
805/* Caller holds the mutex of the group's inode */
741static void configfs_detach_group(struct config_item *item) 806static void configfs_detach_group(struct config_item *item)
742{ 807{
743 detach_groups(to_config_group(item)); 808 detach_groups(to_config_group(item));
@@ -1035,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1035 struct configfs_subsystem *subsys; 1100 struct configfs_subsystem *subsys;
1036 struct configfs_dirent *sd; 1101 struct configfs_dirent *sd;
1037 struct config_item_type *type; 1102 struct config_item_type *type;
1038 struct module *owner = NULL; 1103 struct module *subsys_owner = NULL, *new_item_owner = NULL;
1039 char *name; 1104 char *name;
1040 1105
1041 if (dentry->d_parent == configfs_sb->s_root) { 1106 if (dentry->d_parent == configfs_sb->s_root) {
@@ -1044,6 +1109,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1044 } 1109 }
1045 1110
1046 sd = dentry->d_parent->d_fsdata; 1111 sd = dentry->d_parent->d_fsdata;
1112
1113 /*
1114 * Fake invisibility if dir belongs to a group/default groups hierarchy
1115 * being attached
1116 */
1117 if (!configfs_dirent_is_ready(sd)) {
1118 ret = -ENOENT;
1119 goto out;
1120 }
1121
1047 if (!(sd->s_type & CONFIGFS_USET_DIR)) { 1122 if (!(sd->s_type & CONFIGFS_USET_DIR)) {
1048 ret = -EPERM; 1123 ret = -EPERM;
1049 goto out; 1124 goto out;
@@ -1062,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1062 goto out_put; 1137 goto out_put;
1063 } 1138 }
1064 1139
1140 /*
1141 * The subsystem may belong to a different module than the item
1142 * being created. We don't want to safely pin the new item but
1143 * fail to pin the subsystem it sits under.
1144 */
1145 if (!subsys->su_group.cg_item.ci_type) {
1146 ret = -EINVAL;
1147 goto out_put;
1148 }
1149 subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
1150 if (!try_module_get(subsys_owner)) {
1151 ret = -EINVAL;
1152 goto out_put;
1153 }
1154
1065 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); 1155 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
1066 if (!name) { 1156 if (!name) {
1067 ret = -ENOMEM; 1157 ret = -ENOMEM;
1068 goto out_put; 1158 goto out_subsys_put;
1069 } 1159 }
1070 1160
1071 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); 1161 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
@@ -1094,10 +1184,10 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1094 kfree(name); 1184 kfree(name);
1095 if (ret) { 1185 if (ret) {
1096 /* 1186 /*
1097 * If item == NULL, then link_obj() was never called. 1187 * If ret != 0, then link_obj() was never called.
1098 * There are no extra references to clean up. 1188 * There are no extra references to clean up.
1099 */ 1189 */
1100 goto out_put; 1190 goto out_subsys_put;
1101 } 1191 }
1102 1192
1103 /* 1193 /*
@@ -1111,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1111 goto out_unlink; 1201 goto out_unlink;
1112 } 1202 }
1113 1203
1114 owner = type->ct_owner; 1204 new_item_owner = type->ct_owner;
1115 if (!try_module_get(owner)) { 1205 if (!try_module_get(new_item_owner)) {
1116 ret = -EINVAL; 1206 ret = -EINVAL;
1117 goto out_unlink; 1207 goto out_unlink;
1118 } 1208 }
@@ -1142,6 +1232,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1142 1232
1143 spin_lock(&configfs_dirent_lock); 1233 spin_lock(&configfs_dirent_lock);
1144 sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; 1234 sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
1235 if (!ret)
1236 configfs_dir_set_ready(dentry->d_fsdata);
1145 spin_unlock(&configfs_dirent_lock); 1237 spin_unlock(&configfs_dirent_lock);
1146 1238
1147out_unlink: 1239out_unlink:
@@ -1159,9 +1251,13 @@ out_unlink:
1159 mutex_unlock(&subsys->su_mutex); 1251 mutex_unlock(&subsys->su_mutex);
1160 1252
1161 if (module_got) 1253 if (module_got)
1162 module_put(owner); 1254 module_put(new_item_owner);
1163 } 1255 }
1164 1256
1257out_subsys_put:
1258 if (ret)
1259 module_put(subsys_owner);
1260
1165out_put: 1261out_put:
1166 /* 1262 /*
1167 * link_obj()/link_group() took a reference from child->parent, 1263 * link_obj()/link_group() took a reference from child->parent,
@@ -1180,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1180 struct config_item *item; 1276 struct config_item *item;
1181 struct configfs_subsystem *subsys; 1277 struct configfs_subsystem *subsys;
1182 struct configfs_dirent *sd; 1278 struct configfs_dirent *sd;
1183 struct module *owner = NULL; 1279 struct module *subsys_owner = NULL, *dead_item_owner = NULL;
1184 int ret; 1280 int ret;
1185 1281
1186 if (dentry->d_parent == configfs_sb->s_root) 1282 if (dentry->d_parent == configfs_sb->s_root)
@@ -1207,6 +1303,15 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1207 return -EINVAL; 1303 return -EINVAL;
1208 } 1304 }
1209 1305
1306 /* configfs_mkdir() shouldn't have allowed this */
1307 BUG_ON(!subsys->su_group.cg_item.ci_type);
1308 subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
1309
1310 /*
1311 * Ensure that no racing symlink() will make detach_prep() fail while
1312 * the new link is temporarily attached
1313 */
1314 mutex_lock(&configfs_symlink_mutex);
1210 spin_lock(&configfs_dirent_lock); 1315 spin_lock(&configfs_dirent_lock);
1211 do { 1316 do {
1212 struct mutex *wait_mutex; 1317 struct mutex *wait_mutex;
@@ -1215,6 +1320,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1215 if (ret) { 1320 if (ret) {
1216 configfs_detach_rollback(dentry); 1321 configfs_detach_rollback(dentry);
1217 spin_unlock(&configfs_dirent_lock); 1322 spin_unlock(&configfs_dirent_lock);
1323 mutex_unlock(&configfs_symlink_mutex);
1218 if (ret != -EAGAIN) { 1324 if (ret != -EAGAIN) {
1219 config_item_put(parent_item); 1325 config_item_put(parent_item);
1220 return ret; 1326 return ret;
@@ -1224,10 +1330,12 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1224 mutex_lock(wait_mutex); 1330 mutex_lock(wait_mutex);
1225 mutex_unlock(wait_mutex); 1331 mutex_unlock(wait_mutex);
1226 1332
1333 mutex_lock(&configfs_symlink_mutex);
1227 spin_lock(&configfs_dirent_lock); 1334 spin_lock(&configfs_dirent_lock);
1228 } 1335 }
1229 } while (ret == -EAGAIN); 1336 } while (ret == -EAGAIN);
1230 spin_unlock(&configfs_dirent_lock); 1337 spin_unlock(&configfs_dirent_lock);
1338 mutex_unlock(&configfs_symlink_mutex);
1231 1339
1232 /* Get a working ref for the duration of this function */ 1340 /* Get a working ref for the duration of this function */
1233 item = configfs_get_config_item(dentry); 1341 item = configfs_get_config_item(dentry);
@@ -1236,7 +1344,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1236 config_item_put(parent_item); 1344 config_item_put(parent_item);
1237 1345
1238 if (item->ci_type) 1346 if (item->ci_type)
1239 owner = item->ci_type->ct_owner; 1347 dead_item_owner = item->ci_type->ct_owner;
1240 1348
1241 if (sd->s_type & CONFIGFS_USET_DIR) { 1349 if (sd->s_type & CONFIGFS_USET_DIR) {
1242 configfs_detach_group(item); 1350 configfs_detach_group(item);
@@ -1258,7 +1366,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1258 /* Drop our reference from above */ 1366 /* Drop our reference from above */
1259 config_item_put(item); 1367 config_item_put(item);
1260 1368
1261 module_put(owner); 1369 module_put(dead_item_owner);
1370 module_put(subsys_owner);
1262 1371
1263 return 0; 1372 return 0;
1264} 1373}
@@ -1314,13 +1423,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
1314{ 1423{
1315 struct dentry * dentry = file->f_path.dentry; 1424 struct dentry * dentry = file->f_path.dentry;
1316 struct configfs_dirent * parent_sd = dentry->d_fsdata; 1425 struct configfs_dirent * parent_sd = dentry->d_fsdata;
1426 int err;
1317 1427
1318 mutex_lock(&dentry->d_inode->i_mutex); 1428 mutex_lock(&dentry->d_inode->i_mutex);
1319 file->private_data = configfs_new_dirent(parent_sd, NULL); 1429 /*
1430 * Fake invisibility if dir belongs to a group/default groups hierarchy
1431 * being attached
1432 */
1433 err = -ENOENT;
1434 if (configfs_dirent_is_ready(parent_sd)) {
1435 file->private_data = configfs_new_dirent(parent_sd, NULL);
1436 if (IS_ERR(file->private_data))
1437 err = PTR_ERR(file->private_data);
1438 else
1439 err = 0;
1440 }
1320 mutex_unlock(&dentry->d_inode->i_mutex); 1441 mutex_unlock(&dentry->d_inode->i_mutex);
1321 1442
1322 return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0; 1443 return err;
1323
1324} 1444}
1325 1445
1326static int configfs_dir_close(struct inode *inode, struct file *file) 1446static int configfs_dir_close(struct inode *inode, struct file *file)
@@ -1491,6 +1611,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1491 if (err) { 1611 if (err) {
1492 d_delete(dentry); 1612 d_delete(dentry);
1493 dput(dentry); 1613 dput(dentry);
1614 } else {
1615 spin_lock(&configfs_dirent_lock);
1616 configfs_dir_set_ready(dentry->d_fsdata);
1617 spin_unlock(&configfs_dirent_lock);
1494 } 1618 }
1495 } 1619 }
1496 1620
@@ -1517,11 +1641,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1517 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, 1641 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
1518 I_MUTEX_PARENT); 1642 I_MUTEX_PARENT);
1519 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 1643 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
1644 mutex_lock(&configfs_symlink_mutex);
1520 spin_lock(&configfs_dirent_lock); 1645 spin_lock(&configfs_dirent_lock);
1521 if (configfs_detach_prep(dentry, NULL)) { 1646 if (configfs_detach_prep(dentry, NULL)) {
1522 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); 1647 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
1523 } 1648 }
1524 spin_unlock(&configfs_dirent_lock); 1649 spin_unlock(&configfs_dirent_lock);
1650 mutex_unlock(&configfs_symlink_mutex);
1525 configfs_detach_group(&group->cg_item); 1651 configfs_detach_group(&group->cg_item);
1526 dentry->d_inode->i_flags |= S_DEAD; 1652 dentry->d_inode->i_flags |= S_DEAD;
1527 mutex_unlock(&dentry->d_inode->i_mutex); 1653 mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0004d18c40ac..bf74973b0492 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -31,6 +31,9 @@
31#include <linux/configfs.h> 31#include <linux/configfs.h>
32#include "configfs_internal.h" 32#include "configfs_internal.h"
33 33
34/* Protects attachments of new symlinks */
35DEFINE_MUTEX(configfs_symlink_mutex);
36
34static int item_depth(struct config_item * item) 37static int item_depth(struct config_item * item)
35{ 38{
36 struct config_item * p = item; 39 struct config_item * p = item;
@@ -73,11 +76,20 @@ static int create_link(struct config_item *parent_item,
73 struct configfs_symlink *sl; 76 struct configfs_symlink *sl;
74 int ret; 77 int ret;
75 78
79 ret = -ENOENT;
80 if (!configfs_dirent_is_ready(target_sd))
81 goto out;
76 ret = -ENOMEM; 82 ret = -ENOMEM;
77 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); 83 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
78 if (sl) { 84 if (sl) {
79 sl->sl_target = config_item_get(item); 85 sl->sl_target = config_item_get(item);
80 spin_lock(&configfs_dirent_lock); 86 spin_lock(&configfs_dirent_lock);
87 if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
88 spin_unlock(&configfs_dirent_lock);
89 config_item_put(item);
90 kfree(sl);
91 return -ENOENT;
92 }
81 list_add(&sl->sl_list, &target_sd->s_links); 93 list_add(&sl->sl_list, &target_sd->s_links);
82 spin_unlock(&configfs_dirent_lock); 94 spin_unlock(&configfs_dirent_lock);
83 ret = configfs_create_link(sl, parent_item->ci_dentry, 95 ret = configfs_create_link(sl, parent_item->ci_dentry,
@@ -91,6 +103,7 @@ static int create_link(struct config_item *parent_item,
91 } 103 }
92 } 104 }
93 105
106out:
94 return ret; 107 return ret;
95} 108}
96 109
@@ -120,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
120{ 133{
121 int ret; 134 int ret;
122 struct nameidata nd; 135 struct nameidata nd;
136 struct configfs_dirent *sd;
123 struct config_item *parent_item; 137 struct config_item *parent_item;
124 struct config_item *target_item; 138 struct config_item *target_item;
125 struct config_item_type *type; 139 struct config_item_type *type;
@@ -128,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
128 if (dentry->d_parent == configfs_sb->s_root) 142 if (dentry->d_parent == configfs_sb->s_root)
129 goto out; 143 goto out;
130 144
145 sd = dentry->d_parent->d_fsdata;
146 /*
147 * Fake invisibility if dir belongs to a group/default groups hierarchy
148 * being attached
149 */
150 ret = -ENOENT;
151 if (!configfs_dirent_is_ready(sd))
152 goto out;
153
131 parent_item = configfs_get_config_item(dentry->d_parent); 154 parent_item = configfs_get_config_item(dentry->d_parent);
132 type = parent_item->ci_type; 155 type = parent_item->ci_type;
133 156
157 ret = -EPERM;
134 if (!type || !type->ct_item_ops || 158 if (!type || !type->ct_item_ops ||
135 !type->ct_item_ops->allow_link) 159 !type->ct_item_ops->allow_link)
136 goto out_put; 160 goto out_put;
@@ -141,7 +165,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
141 165
142 ret = type->ct_item_ops->allow_link(parent_item, target_item); 166 ret = type->ct_item_ops->allow_link(parent_item, target_item);
143 if (!ret) { 167 if (!ret) {
168 mutex_lock(&configfs_symlink_mutex);
144 ret = create_link(parent_item, target_item, dentry); 169 ret = create_link(parent_item, target_item, dentry);
170 mutex_unlock(&configfs_symlink_mutex);
145 if (ret && type->ct_item_ops->drop_link) 171 if (ret && type->ct_item_ops->drop_link)
146 type->ct_item_ops->drop_link(parent_item, 172 type->ct_item_ops->drop_link(parent_item,
147 target_item); 173 target_item);
diff --git a/fs/dcache.c b/fs/dcache.c
index f2584d22cb45..101663d15e9f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1220,6 +1220,107 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1220 return new; 1220 return new;
1221} 1221}
1222 1222
1223/**
1224 * d_add_ci - lookup or allocate new dentry with case-exact name
1225 * @inode: the inode case-insensitive lookup has found
1226 * @dentry: the negative dentry that was passed to the parent's lookup func
1227 * @name: the case-exact name to be associated with the returned dentry
1228 *
1229 * This is to avoid filling the dcache with case-insensitive names to the
1230 * same inode, only the actual correct case is stored in the dcache for
1231 * case-insensitive filesystems.
1232 *
1233 * For a case-insensitive lookup match and if the the case-exact dentry
1234 * already exists in in the dcache, use it and return it.
1235 *
1236 * If no entry exists with the exact case name, allocate new dentry with
1237 * the exact case, and return the spliced entry.
1238 */
1239struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry,
1240 struct qstr *name)
1241{
1242 int error;
1243 struct dentry *found;
1244 struct dentry *new;
1245
1246 /* Does a dentry matching the name exist already? */
1247 found = d_hash_and_lookup(dentry->d_parent, name);
1248 /* If not, create it now and return */
1249 if (!found) {
1250 new = d_alloc(dentry->d_parent, name);
1251 if (!new) {
1252 error = -ENOMEM;
1253 goto err_out;
1254 }
1255 found = d_splice_alias(inode, new);
1256 if (found) {
1257 dput(new);
1258 return found;
1259 }
1260 return new;
1261 }
1262 /* Matching dentry exists, check if it is negative. */
1263 if (found->d_inode) {
1264 if (unlikely(found->d_inode != inode)) {
1265 /* This can't happen because bad inodes are unhashed. */
1266 BUG_ON(!is_bad_inode(inode));
1267 BUG_ON(!is_bad_inode(found->d_inode));
1268 }
1269 /*
1270 * Already have the inode and the dentry attached, decrement
1271 * the reference count to balance the iget() done
1272 * earlier on. We found the dentry using d_lookup() so it
1273 * cannot be disconnected and thus we do not need to worry
1274 * about any NFS/disconnectedness issues here.
1275 */
1276 iput(inode);
1277 return found;
1278 }
1279 /*
1280 * Negative dentry: instantiate it unless the inode is a directory and
1281 * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
1282 * in which case d_move() that in place of the found dentry.
1283 */
1284 if (!S_ISDIR(inode->i_mode)) {
1285 /* Not a directory; everything is easy. */
1286 d_instantiate(found, inode);
1287 return found;
1288 }
1289 spin_lock(&dcache_lock);
1290 if (list_empty(&inode->i_dentry)) {
1291 /*
1292 * Directory without a 'disconnected' dentry; we need to do
1293 * d_instantiate() by hand because it takes dcache_lock which
1294 * we already hold.
1295 */
1296 list_add(&found->d_alias, &inode->i_dentry);
1297 found->d_inode = inode;
1298 spin_unlock(&dcache_lock);
1299 security_d_instantiate(found, inode);
1300 return found;
1301 }
1302 /*
1303 * Directory with a 'disconnected' dentry; get a reference to the
1304 * 'disconnected' dentry.
1305 */
1306 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1307 dget_locked(new);
1308 spin_unlock(&dcache_lock);
1309 /* Do security vodoo. */
1310 security_d_instantiate(found, inode);
1311 /* Move new in place of found. */
1312 d_move(new, found);
1313 /* Balance the iget() we did above. */
1314 iput(inode);
1315 /* Throw away found. */
1316 dput(found);
1317 /* Use new as the actual dentry. */
1318 return new;
1319
1320err_out:
1321 iput(inode);
1322 return ERR_PTR(error);
1323}
1223 1324
1224/** 1325/**
1225 * d_lookup - search for a dentry 1326 * d_lookup - search for a dentry
@@ -2254,6 +2355,7 @@ EXPORT_SYMBOL(d_path);
2254EXPORT_SYMBOL(d_prune_aliases); 2355EXPORT_SYMBOL(d_prune_aliases);
2255EXPORT_SYMBOL(d_rehash); 2356EXPORT_SYMBOL(d_rehash);
2256EXPORT_SYMBOL(d_splice_alias); 2357EXPORT_SYMBOL(d_splice_alias);
2358EXPORT_SYMBOL(d_add_ci);
2257EXPORT_SYMBOL(d_validate); 2359EXPORT_SYMBOL(d_validate);
2258EXPORT_SYMBOL(dget_locked); 2360EXPORT_SYMBOL(dget_locked);
2259EXPORT_SYMBOL(dput); 2361EXPORT_SYMBOL(dput);
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index c7d04e165446..694ed6fadcc8 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -40,34 +40,35 @@ ext4_acl_from_disk(const void *value, size_t size)
40 acl = posix_acl_alloc(count, GFP_NOFS); 40 acl = posix_acl_alloc(count, GFP_NOFS);
41 if (!acl) 41 if (!acl)
42 return ERR_PTR(-ENOMEM); 42 return ERR_PTR(-ENOMEM);
43 for (n=0; n < count; n++) { 43 for (n = 0; n < count; n++) {
44 ext4_acl_entry *entry = 44 ext4_acl_entry *entry =
45 (ext4_acl_entry *)value; 45 (ext4_acl_entry *)value;
46 if ((char *)value + sizeof(ext4_acl_entry_short) > end) 46 if ((char *)value + sizeof(ext4_acl_entry_short) > end)
47 goto fail; 47 goto fail;
48 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); 48 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
49 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); 49 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
50 switch(acl->a_entries[n].e_tag) { 50
51 case ACL_USER_OBJ: 51 switch (acl->a_entries[n].e_tag) {
52 case ACL_GROUP_OBJ: 52 case ACL_USER_OBJ:
53 case ACL_MASK: 53 case ACL_GROUP_OBJ:
54 case ACL_OTHER: 54 case ACL_MASK:
55 value = (char *)value + 55 case ACL_OTHER:
56 sizeof(ext4_acl_entry_short); 56 value = (char *)value +
57 acl->a_entries[n].e_id = ACL_UNDEFINED_ID; 57 sizeof(ext4_acl_entry_short);
58 break; 58 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
59 59 break;
60 case ACL_USER: 60
61 case ACL_GROUP: 61 case ACL_USER:
62 value = (char *)value + sizeof(ext4_acl_entry); 62 case ACL_GROUP:
63 if ((char *)value > end) 63 value = (char *)value + sizeof(ext4_acl_entry);
64 goto fail; 64 if ((char *)value > end)
65 acl->a_entries[n].e_id =
66 le32_to_cpu(entry->e_id);
67 break;
68
69 default:
70 goto fail; 65 goto fail;
66 acl->a_entries[n].e_id =
67 le32_to_cpu(entry->e_id);
68 break;
69
70 default:
71 goto fail;
71 } 72 }
72 } 73 }
73 if (value != end) 74 if (value != end)
@@ -96,27 +97,26 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
96 return ERR_PTR(-ENOMEM); 97 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); 98 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
98 e = (char *)ext_acl + sizeof(ext4_acl_header); 99 e = (char *)ext_acl + sizeof(ext4_acl_header);
99 for (n=0; n < acl->a_count; n++) { 100 for (n = 0; n < acl->a_count; n++) {
100 ext4_acl_entry *entry = (ext4_acl_entry *)e; 101 ext4_acl_entry *entry = (ext4_acl_entry *)e;
101 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 102 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
102 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 103 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
103 switch(acl->a_entries[n].e_tag) { 104 switch (acl->a_entries[n].e_tag) {
104 case ACL_USER: 105 case ACL_USER:
105 case ACL_GROUP: 106 case ACL_GROUP:
106 entry->e_id = 107 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
107 cpu_to_le32(acl->a_entries[n].e_id); 108 e += sizeof(ext4_acl_entry);
108 e += sizeof(ext4_acl_entry); 109 break;
109 break; 110
110 111 case ACL_USER_OBJ:
111 case ACL_USER_OBJ: 112 case ACL_GROUP_OBJ:
112 case ACL_GROUP_OBJ: 113 case ACL_MASK:
113 case ACL_MASK: 114 case ACL_OTHER:
114 case ACL_OTHER: 115 e += sizeof(ext4_acl_entry_short);
115 e += sizeof(ext4_acl_entry_short); 116 break;
116 break; 117
117 118 default:
118 default: 119 goto fail;
119 goto fail;
120 } 120 }
121 } 121 }
122 return (char *)ext_acl; 122 return (char *)ext_acl;
@@ -167,23 +167,23 @@ ext4_get_acl(struct inode *inode, int type)
167 if (!test_opt(inode->i_sb, POSIX_ACL)) 167 if (!test_opt(inode->i_sb, POSIX_ACL))
168 return NULL; 168 return NULL;
169 169
170 switch(type) { 170 switch (type) {
171 case ACL_TYPE_ACCESS: 171 case ACL_TYPE_ACCESS:
172 acl = ext4_iget_acl(inode, &ei->i_acl); 172 acl = ext4_iget_acl(inode, &ei->i_acl);
173 if (acl != EXT4_ACL_NOT_CACHED) 173 if (acl != EXT4_ACL_NOT_CACHED)
174 return acl; 174 return acl;
175 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 175 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
176 break; 176 break;
177 177
178 case ACL_TYPE_DEFAULT: 178 case ACL_TYPE_DEFAULT:
179 acl = ext4_iget_acl(inode, &ei->i_default_acl); 179 acl = ext4_iget_acl(inode, &ei->i_default_acl);
180 if (acl != EXT4_ACL_NOT_CACHED) 180 if (acl != EXT4_ACL_NOT_CACHED)
181 return acl; 181 return acl;
182 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; 182 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
183 break; 183 break;
184 184
185 default: 185 default:
186 return ERR_PTR(-EINVAL); 186 return ERR_PTR(-EINVAL);
187 } 187 }
188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 189 if (retval > 0) {
@@ -201,14 +201,14 @@ ext4_get_acl(struct inode *inode, int type)
201 kfree(value); 201 kfree(value);
202 202
203 if (!IS_ERR(acl)) { 203 if (!IS_ERR(acl)) {
204 switch(type) { 204 switch (type) {
205 case ACL_TYPE_ACCESS: 205 case ACL_TYPE_ACCESS:
206 ext4_iset_acl(inode, &ei->i_acl, acl); 206 ext4_iset_acl(inode, &ei->i_acl, acl);
207 break; 207 break;
208 208
209 case ACL_TYPE_DEFAULT: 209 case ACL_TYPE_DEFAULT:
210 ext4_iset_acl(inode, &ei->i_default_acl, acl); 210 ext4_iset_acl(inode, &ei->i_default_acl, acl);
211 break; 211 break;
212 } 212 }
213 } 213 }
214 return acl; 214 return acl;
@@ -232,31 +232,31 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
232 if (S_ISLNK(inode->i_mode)) 232 if (S_ISLNK(inode->i_mode))
233 return -EOPNOTSUPP; 233 return -EOPNOTSUPP;
234 234
235 switch(type) { 235 switch (type) {
236 case ACL_TYPE_ACCESS: 236 case ACL_TYPE_ACCESS:
237 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 237 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
238 if (acl) { 238 if (acl) {
239 mode_t mode = inode->i_mode; 239 mode_t mode = inode->i_mode;
240 error = posix_acl_equiv_mode(acl, &mode); 240 error = posix_acl_equiv_mode(acl, &mode);
241 if (error < 0) 241 if (error < 0)
242 return error; 242 return error;
243 else { 243 else {
244 inode->i_mode = mode; 244 inode->i_mode = mode;
245 ext4_mark_inode_dirty(handle, inode); 245 ext4_mark_inode_dirty(handle, inode);
246 if (error == 0) 246 if (error == 0)
247 acl = NULL; 247 acl = NULL;
248 }
249 } 248 }
250 break; 249 }
250 break;
251 251
252 case ACL_TYPE_DEFAULT: 252 case ACL_TYPE_DEFAULT:
253 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; 253 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
254 if (!S_ISDIR(inode->i_mode)) 254 if (!S_ISDIR(inode->i_mode))
255 return acl ? -EACCES : 0; 255 return acl ? -EACCES : 0;
256 break; 256 break;
257 257
258 default: 258 default:
259 return -EINVAL; 259 return -EINVAL;
260 } 260 }
261 if (acl) { 261 if (acl) {
262 value = ext4_acl_to_disk(acl, &size); 262 value = ext4_acl_to_disk(acl, &size);
@@ -269,14 +269,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
269 269
270 kfree(value); 270 kfree(value);
271 if (!error) { 271 if (!error) {
272 switch(type) { 272 switch (type) {
273 case ACL_TYPE_ACCESS: 273 case ACL_TYPE_ACCESS:
274 ext4_iset_acl(inode, &ei->i_acl, acl); 274 ext4_iset_acl(inode, &ei->i_acl, acl);
275 break; 275 break;
276 276
277 case ACL_TYPE_DEFAULT: 277 case ACL_TYPE_DEFAULT:
278 ext4_iset_acl(inode, &ei->i_default_acl, acl); 278 ext4_iset_acl(inode, &ei->i_default_acl, acl);
279 break; 279 break;
280 } 280 }
281 } 281 }
282 return error; 282 return error;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 495ab21b9832..1ae5004e93fc 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
314 if (unlikely(!bh)) { 314 if (unlikely(!bh)) {
315 ext4_error(sb, __func__, 315 ext4_error(sb, __func__,
316 "Cannot read block bitmap - " 316 "Cannot read block bitmap - "
317 "block_group = %d, block_bitmap = %llu", 317 "block_group = %lu, block_bitmap = %llu",
318 (int)block_group, (unsigned long long)bitmap_blk); 318 block_group, bitmap_blk);
319 return NULL; 319 return NULL;
320 } 320 }
321 if (bh_uptodate_or_lock(bh)) 321 if (bh_uptodate_or_lock(bh))
322 return bh; 322 return bh;
323 323
324 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
324 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 325 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
325 ext4_init_block_bitmap(sb, bh, block_group, desc); 326 ext4_init_block_bitmap(sb, bh, block_group, desc);
326 set_buffer_uptodate(bh); 327 set_buffer_uptodate(bh);
327 unlock_buffer(bh); 328 unlock_buffer(bh);
329 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
328 return bh; 330 return bh;
329 } 331 }
332 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
330 if (bh_submit_read(bh) < 0) { 333 if (bh_submit_read(bh) < 0) {
331 put_bh(bh); 334 put_bh(bh);
332 ext4_error(sb, __func__, 335 ext4_error(sb, __func__,
333 "Cannot read block bitmap - " 336 "Cannot read block bitmap - "
334 "block_group = %d, block_bitmap = %llu", 337 "block_group = %lu, block_bitmap = %llu",
335 (int)block_group, (unsigned long long)bitmap_blk); 338 block_group, bitmap_blk);
336 return NULL; 339 return NULL;
337 } 340 }
338 ext4_valid_block_bitmap(sb, desc, block_group, bh); 341 ext4_valid_block_bitmap(sb, desc, block_group, bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 303e41cf7b14..6c7924d9e358 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
1044 1044
1045 1045
1046/* inode.c */ 1046/* inode.c */
1047void ext4_da_release_space(struct inode *inode, int used, int to_free);
1048int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 1047int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1049 struct buffer_head *bh, ext4_fsblk_t blocknr); 1048 struct buffer_head *bh, ext4_fsblk_t blocknr);
1050struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1049struct buffer_head *ext4_getblk(handle_t *, struct inode *,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 42c4c0c892ed..612c3d2c3824 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
99 if (handle->h_buffer_credits > needed) 99 if (handle->h_buffer_credits > needed)
100 return 0; 100 return 0;
101 err = ext4_journal_extend(handle, needed); 101 err = ext4_journal_extend(handle, needed);
102 if (err) 102 if (err <= 0)
103 return err; 103 return err;
104 return ext4_journal_restart(handle, needed); 104 return ext4_journal_restart(handle, needed);
105} 105}
@@ -1441,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
1441 1441
1442 /* 1442 /*
1443 * get the next allocated block if the extent in the path 1443 * get the next allocated block if the extent in the path
1444 * is before the requested block(s) 1444 * is before the requested block(s)
1445 */ 1445 */
1446 if (b2 < b1) { 1446 if (b2 < b1) {
1447 b2 = ext4_ext_next_allocated_block(path); 1447 b2 = ext4_ext_next_allocated_block(path);
@@ -1910,9 +1910,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1910 BUG_ON(b != ex_ee_block + ex_ee_len - 1); 1910 BUG_ON(b != ex_ee_block + ex_ee_len - 1);
1911 } 1911 }
1912 1912
1913 /* at present, extent can't cross block group: */ 1913 /*
1914 /* leaf + bitmap + group desc + sb + inode */ 1914 * 3 for leaf, sb, and inode plus 2 (bmap and group
1915 credits = 5; 1915 * descriptor) for each block group; assume two block
1916 * groups plus ex_ee_len/blocks_per_block_group for
1917 * the worst case
1918 */
1919 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
1916 if (ex == EXT_FIRST_EXTENT(eh)) { 1920 if (ex == EXT_FIRST_EXTENT(eh)) {
1917 correct_index = 1; 1921 correct_index = 1;
1918 credits += (ext_depth(inode)) + 1; 1922 credits += (ext_depth(inode)) + 1;
@@ -2323,7 +2327,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2323 unsigned int newdepth; 2327 unsigned int newdepth;
2324 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ 2328 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2325 if (allocated <= EXT4_EXT_ZERO_LEN) { 2329 if (allocated <= EXT4_EXT_ZERO_LEN) {
2326 /* Mark first half uninitialized. 2330 /*
2331 * iblock == ee_block is handled by the zerouout
2332 * at the beginning.
2333 * Mark first half uninitialized.
2327 * Mark second half initialized and zero out the 2334 * Mark second half initialized and zero out the
2328 * initialized extent 2335 * initialized extent
2329 */ 2336 */
@@ -2346,7 +2353,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2346 ex->ee_len = orig_ex.ee_len; 2353 ex->ee_len = orig_ex.ee_len;
2347 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2354 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2348 ext4_ext_dirty(handle, inode, path + depth); 2355 ext4_ext_dirty(handle, inode, path + depth);
2349 /* zeroed the full extent */ 2356 /* blocks available from iblock */
2350 return allocated; 2357 return allocated;
2351 2358
2352 } else if (err) 2359 } else if (err)
@@ -2374,6 +2381,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2374 err = PTR_ERR(path); 2381 err = PTR_ERR(path);
2375 return err; 2382 return err;
2376 } 2383 }
2384 /* get the second half extent details */
2377 ex = path[depth].p_ext; 2385 ex = path[depth].p_ext;
2378 err = ext4_ext_get_access(handle, inode, 2386 err = ext4_ext_get_access(handle, inode,
2379 path + depth); 2387 path + depth);
@@ -2403,6 +2411,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2403 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2411 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2404 ext4_ext_dirty(handle, inode, path + depth); 2412 ext4_ext_dirty(handle, inode, path + depth);
2405 /* zeroed the full extent */ 2413 /* zeroed the full extent */
2414 /* blocks available from iblock */
2406 return allocated; 2415 return allocated;
2407 2416
2408 } else if (err) 2417 } else if (err)
@@ -2418,23 +2427,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2418 */ 2427 */
2419 orig_ex.ee_len = cpu_to_le16(ee_len - 2428 orig_ex.ee_len = cpu_to_le16(ee_len -
2420 ext4_ext_get_actual_len(ex3)); 2429 ext4_ext_get_actual_len(ex3));
2421 if (newdepth != depth) { 2430 depth = newdepth;
2422 depth = newdepth; 2431 ext4_ext_drop_refs(path);
2423 ext4_ext_drop_refs(path); 2432 path = ext4_ext_find_extent(inode, iblock, path);
2424 path = ext4_ext_find_extent(inode, iblock, path); 2433 if (IS_ERR(path)) {
2425 if (IS_ERR(path)) { 2434 err = PTR_ERR(path);
2426 err = PTR_ERR(path); 2435 goto out;
2427 goto out;
2428 }
2429 eh = path[depth].p_hdr;
2430 ex = path[depth].p_ext;
2431 if (ex2 != &newex)
2432 ex2 = ex;
2433
2434 err = ext4_ext_get_access(handle, inode, path + depth);
2435 if (err)
2436 goto out;
2437 } 2436 }
2437 eh = path[depth].p_hdr;
2438 ex = path[depth].p_ext;
2439 if (ex2 != &newex)
2440 ex2 = ex;
2441
2442 err = ext4_ext_get_access(handle, inode, path + depth);
2443 if (err)
2444 goto out;
2445
2438 allocated = max_blocks; 2446 allocated = max_blocks;
2439 2447
2440 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying 2448 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
@@ -2452,6 +2460,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2452 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2460 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2453 ext4_ext_dirty(handle, inode, path + depth); 2461 ext4_ext_dirty(handle, inode, path + depth);
2454 /* zero out the first half */ 2462 /* zero out the first half */
2463 /* blocks available from iblock */
2455 return allocated; 2464 return allocated;
2456 } 2465 }
2457 } 2466 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index a92eb305344f..655e760212b8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
97 * Return buffer_head of bitmap on success or NULL. 97 * Return buffer_head of bitmap on success or NULL.
98 */ 98 */
99static struct buffer_head * 99static struct buffer_head *
100read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) 100ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
101{ 101{
102 struct ext4_group_desc *desc; 102 struct ext4_group_desc *desc;
103 struct buffer_head *bh = NULL; 103 struct buffer_head *bh = NULL;
104 ext4_fsblk_t bitmap_blk;
104 105
105 desc = ext4_get_group_desc(sb, block_group, NULL); 106 desc = ext4_get_group_desc(sb, block_group, NULL);
106 if (!desc) 107 if (!desc)
107 goto error_out; 108 return NULL;
108 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 109 bitmap_blk = ext4_inode_bitmap(sb, desc);
109 bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); 110 bh = sb_getblk(sb, bitmap_blk);
110 if (!buffer_uptodate(bh)) { 111 if (unlikely(!bh)) {
111 lock_buffer(bh); 112 ext4_error(sb, __func__,
112 if (!buffer_uptodate(bh)) { 113 "Cannot read inode bitmap - "
113 ext4_init_inode_bitmap(sb, bh, block_group, 114 "block_group = %lu, inode_bitmap = %llu",
114 desc); 115 block_group, bitmap_blk);
115 set_buffer_uptodate(bh); 116 return NULL;
116 }
117 unlock_buffer(bh);
118 }
119 } else {
120 bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
121 } 117 }
122 if (!bh) 118 if (bh_uptodate_or_lock(bh))
123 ext4_error(sb, "read_inode_bitmap", 119 return bh;
120
121 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
122 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
123 ext4_init_inode_bitmap(sb, bh, block_group, desc);
124 set_buffer_uptodate(bh);
125 unlock_buffer(bh);
126 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
127 return bh;
128 }
129 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
130 if (bh_submit_read(bh) < 0) {
131 put_bh(bh);
132 ext4_error(sb, __func__,
124 "Cannot read inode bitmap - " 133 "Cannot read inode bitmap - "
125 "block_group = %lu, inode_bitmap = %llu", 134 "block_group = %lu, inode_bitmap = %llu",
126 block_group, ext4_inode_bitmap(sb, desc)); 135 block_group, bitmap_blk);
127error_out: 136 return NULL;
137 }
128 return bh; 138 return bh;
129} 139}
130 140
@@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
200 } 210 }
201 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 211 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
202 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 212 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
203 bitmap_bh = read_inode_bitmap(sb, block_group); 213 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
204 if (!bitmap_bh) 214 if (!bitmap_bh)
205 goto error_return; 215 goto error_return;
206 216
@@ -623,7 +633,7 @@ got_group:
623 goto fail; 633 goto fail;
624 634
625 brelse(bitmap_bh); 635 brelse(bitmap_bh);
626 bitmap_bh = read_inode_bitmap(sb, group); 636 bitmap_bh = ext4_read_inode_bitmap(sb, group);
627 if (!bitmap_bh) 637 if (!bitmap_bh)
628 goto fail; 638 goto fail;
629 639
@@ -728,7 +738,7 @@ got:
728 738
729 /* When marking the block group with 739 /* When marking the block group with
730 * ~EXT4_BG_INODE_UNINIT we don't want to depend 740 * ~EXT4_BG_INODE_UNINIT we don't want to depend
731 * on the value of bg_itable_unsed even though 741 * on the value of bg_itable_unused even though
732 * mke2fs could have initialized the same for us. 742 * mke2fs could have initialized the same for us.
733 * Instead we calculated the value below 743 * Instead we calculated the value below
734 */ 744 */
@@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
891 901
892 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 902 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
893 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 903 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
894 bitmap_bh = read_inode_bitmap(sb, block_group); 904 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
895 if (!bitmap_bh) { 905 if (!bitmap_bh) {
896 ext4_warning(sb, __func__, 906 ext4_warning(sb, __func__,
897 "inode bitmap error for orphan %lu", ino); 907 "inode bitmap error for orphan %lu", ino);
@@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
969 continue; 979 continue;
970 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 980 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
971 brelse(bitmap_bh); 981 brelse(bitmap_bh);
972 bitmap_bh = read_inode_bitmap(sb, i); 982 bitmap_bh = ext4_read_inode_bitmap(sb, i);
973 if (!bitmap_bh) 983 if (!bitmap_bh)
974 continue; 984 continue;
975 985
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9843b046c235..59fbbe899acc 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -191,6 +191,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
191void ext4_delete_inode (struct inode * inode) 191void ext4_delete_inode (struct inode * inode)
192{ 192{
193 handle_t *handle; 193 handle_t *handle;
194 int err;
194 195
195 if (ext4_should_order_data(inode)) 196 if (ext4_should_order_data(inode))
196 ext4_begin_ordered_truncate(inode, 0); 197 ext4_begin_ordered_truncate(inode, 0);
@@ -199,8 +200,9 @@ void ext4_delete_inode (struct inode * inode)
199 if (is_bad_inode(inode)) 200 if (is_bad_inode(inode))
200 goto no_delete; 201 goto no_delete;
201 202
202 handle = start_transaction(inode); 203 handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
203 if (IS_ERR(handle)) { 204 if (IS_ERR(handle)) {
205 ext4_std_error(inode->i_sb, PTR_ERR(handle));
204 /* 206 /*
205 * If we're going to skip the normal cleanup, we still need to 207 * If we're going to skip the normal cleanup, we still need to
206 * make sure that the in-core orphan linked list is properly 208 * make sure that the in-core orphan linked list is properly
@@ -213,8 +215,34 @@ void ext4_delete_inode (struct inode * inode)
213 if (IS_SYNC(inode)) 215 if (IS_SYNC(inode))
214 handle->h_sync = 1; 216 handle->h_sync = 1;
215 inode->i_size = 0; 217 inode->i_size = 0;
218 err = ext4_mark_inode_dirty(handle, inode);
219 if (err) {
220 ext4_warning(inode->i_sb, __func__,
221 "couldn't mark inode dirty (err %d)", err);
222 goto stop_handle;
223 }
216 if (inode->i_blocks) 224 if (inode->i_blocks)
217 ext4_truncate(inode); 225 ext4_truncate(inode);
226
227 /*
228 * ext4_ext_truncate() doesn't reserve any slop when it
229 * restarts journal transactions; therefore there may not be
230 * enough credits left in the handle to remove the inode from
231 * the orphan list and set the dtime field.
232 */
233 if (handle->h_buffer_credits < 3) {
234 err = ext4_journal_extend(handle, 3);
235 if (err > 0)
236 err = ext4_journal_restart(handle, 3);
237 if (err != 0) {
238 ext4_warning(inode->i_sb, __func__,
239 "couldn't extend journal (err %d)", err);
240 stop_handle:
241 ext4_journal_stop(handle);
242 goto no_delete;
243 }
244 }
245
218 /* 246 /*
219 * Kill off the orphan record which ext4_truncate created. 247 * Kill off the orphan record which ext4_truncate created.
220 * AKPM: I think this can be inside the above `if'. 248 * AKPM: I think this can be inside the above `if'.
@@ -952,6 +980,67 @@ out:
952 return err; 980 return err;
953} 981}
954 982
983/*
984 * Calculate the number of metadata blocks need to reserve
985 * to allocate @blocks for non extent file based file
986 */
987static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
988{
989 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
990 int ind_blks, dind_blks, tind_blks;
991
992 /* number of new indirect blocks needed */
993 ind_blks = (blocks + icap - 1) / icap;
994
995 dind_blks = (ind_blks + icap - 1) / icap;
996
997 tind_blks = 1;
998
999 return ind_blks + dind_blks + tind_blks;
1000}
1001
1002/*
1003 * Calculate the number of metadata blocks need to reserve
1004 * to allocate given number of blocks
1005 */
1006static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1007{
1008 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1009 return ext4_ext_calc_metadata_amount(inode, blocks);
1010
1011 return ext4_indirect_calc_metadata_amount(inode, blocks);
1012}
1013
1014static void ext4_da_update_reserve_space(struct inode *inode, int used)
1015{
1016 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1017 int total, mdb, mdb_free;
1018
1019 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1020 /* recalculate the number of metablocks still need to be reserved */
1021 total = EXT4_I(inode)->i_reserved_data_blocks - used;
1022 mdb = ext4_calc_metadata_amount(inode, total);
1023
1024 /* figure out how many metablocks to release */
1025 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1026 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1027
1028 /* Account for allocated meta_blocks */
1029 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1030
1031 /* update fs free blocks counter for truncate case */
1032 percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
1033
1034 /* update per-inode reservations */
1035 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1036 EXT4_I(inode)->i_reserved_data_blocks -= used;
1037
1038 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1039 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1040 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1041 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1042}
1043
955/* Maximum number of blocks we map for direct IO at once. */ 1044/* Maximum number of blocks we map for direct IO at once. */
956#define DIO_MAX_BLOCKS 4096 1045#define DIO_MAX_BLOCKS 4096
957/* 1046/*
@@ -965,10 +1054,9 @@ out:
965 1054
966 1055
967/* 1056/*
1057 * The ext4_get_blocks_wrap() function try to look up the requested blocks,
1058 * and returns if the blocks are already mapped.
968 * 1059 *
969 *
970 * ext4_ext4 get_block() wrapper function
971 * It will do a look up first, and returns if the blocks already mapped.
972 * Otherwise it takes the write lock of the i_data_sem and allocate blocks 1060 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
973 * and store the allocated blocks in the result buffer head and mark it 1061 * and store the allocated blocks in the result buffer head and mark it
974 * mapped. 1062 * mapped.
@@ -1069,7 +1157,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1069 * which were deferred till now 1157 * which were deferred till now
1070 */ 1158 */
1071 if ((retval > 0) && buffer_delay(bh)) 1159 if ((retval > 0) && buffer_delay(bh))
1072 ext4_da_release_space(inode, retval, 0); 1160 ext4_da_update_reserve_space(inode, retval);
1073 } 1161 }
1074 1162
1075 up_write((&EXT4_I(inode)->i_data_sem)); 1163 up_write((&EXT4_I(inode)->i_data_sem));
@@ -1336,12 +1424,8 @@ static int ext4_ordered_write_end(struct file *file,
1336{ 1424{
1337 handle_t *handle = ext4_journal_current_handle(); 1425 handle_t *handle = ext4_journal_current_handle();
1338 struct inode *inode = mapping->host; 1426 struct inode *inode = mapping->host;
1339 unsigned from, to;
1340 int ret = 0, ret2; 1427 int ret = 0, ret2;
1341 1428
1342 from = pos & (PAGE_CACHE_SIZE - 1);
1343 to = from + len;
1344
1345 ret = ext4_jbd2_file_inode(handle, inode); 1429 ret = ext4_jbd2_file_inode(handle, inode);
1346 1430
1347 if (ret == 0) { 1431 if (ret == 0) {
@@ -1437,36 +1521,6 @@ static int ext4_journalled_write_end(struct file *file,
1437 1521
1438 return ret ? ret : copied; 1522 return ret ? ret : copied;
1439} 1523}
1440/*
1441 * Calculate the number of metadata blocks need to reserve
1442 * to allocate @blocks for non extent file based file
1443 */
1444static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
1445{
1446 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1447 int ind_blks, dind_blks, tind_blks;
1448
1449 /* number of new indirect blocks needed */
1450 ind_blks = (blocks + icap - 1) / icap;
1451
1452 dind_blks = (ind_blks + icap - 1) / icap;
1453
1454 tind_blks = 1;
1455
1456 return ind_blks + dind_blks + tind_blks;
1457}
1458
1459/*
1460 * Calculate the number of metadata blocks need to reserve
1461 * to allocate given number of blocks
1462 */
1463static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1464{
1465 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1466 return ext4_ext_calc_metadata_amount(inode, blocks);
1467
1468 return ext4_indirect_calc_metadata_amount(inode, blocks);
1469}
1470 1524
1471static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1525static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1472{ 1526{
@@ -1490,7 +1544,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1490 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1544 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1491 return -ENOSPC; 1545 return -ENOSPC;
1492 } 1546 }
1493
1494 /* reduce fs free blocks counter */ 1547 /* reduce fs free blocks counter */
1495 percpu_counter_sub(&sbi->s_freeblocks_counter, total); 1548 percpu_counter_sub(&sbi->s_freeblocks_counter, total);
1496 1549
@@ -1501,35 +1554,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1501 return 0; /* success */ 1554 return 0; /* success */
1502} 1555}
1503 1556
1504void ext4_da_release_space(struct inode *inode, int used, int to_free) 1557static void ext4_da_release_space(struct inode *inode, int to_free)
1505{ 1558{
1506 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1559 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1507 int total, mdb, mdb_free, release; 1560 int total, mdb, mdb_free, release;
1508 1561
1509 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1562 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1510 /* recalculate the number of metablocks still need to be reserved */ 1563 /* recalculate the number of metablocks still need to be reserved */
1511 total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; 1564 total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
1512 mdb = ext4_calc_metadata_amount(inode, total); 1565 mdb = ext4_calc_metadata_amount(inode, total);
1513 1566
1514 /* figure out how many metablocks to release */ 1567 /* figure out how many metablocks to release */
1515 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1568 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1516 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1569 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1517 1570
1518 /* Account for allocated meta_blocks */
1519 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1520
1521 release = to_free + mdb_free; 1571 release = to_free + mdb_free;
1522 1572
1523 /* update fs free blocks counter for truncate case */ 1573 /* update fs free blocks counter for truncate case */
1524 percpu_counter_add(&sbi->s_freeblocks_counter, release); 1574 percpu_counter_add(&sbi->s_freeblocks_counter, release);
1525 1575
1526 /* update per-inode reservations */ 1576 /* update per-inode reservations */
1527 BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); 1577 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
1528 EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); 1578 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1529 1579
1530 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1580 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1531 EXT4_I(inode)->i_reserved_meta_blocks = mdb; 1581 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1532 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1533 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1582 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1534} 1583}
1535 1584
@@ -1551,7 +1600,7 @@ static void ext4_da_page_release_reservation(struct page *page,
1551 } 1600 }
1552 curr_off = next_off; 1601 curr_off = next_off;
1553 } while ((bh = bh->b_this_page) != head); 1602 } while ((bh = bh->b_this_page) != head);
1554 ext4_da_release_space(page->mapping->host, 0, to_release); 1603 ext4_da_release_space(page->mapping->host, to_release);
1555} 1604}
1556 1605
1557/* 1606/*
@@ -2280,8 +2329,11 @@ retry:
2280 } 2329 }
2281 2330
2282 page = __grab_cache_page(mapping, index); 2331 page = __grab_cache_page(mapping, index);
2283 if (!page) 2332 if (!page) {
2284 return -ENOMEM; 2333 ext4_journal_stop(handle);
2334 ret = -ENOMEM;
2335 goto out;
2336 }
2285 *pagep = page; 2337 *pagep = page;
2286 2338
2287 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 2339 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
@@ -3590,6 +3642,16 @@ static int __ext4_get_inode_loc(struct inode *inode,
3590 } 3642 }
3591 if (!buffer_uptodate(bh)) { 3643 if (!buffer_uptodate(bh)) {
3592 lock_buffer(bh); 3644 lock_buffer(bh);
3645
3646 /*
3647 * If the buffer has the write error flag, we have failed
3648 * to write out another inode in the same block. In this
3649 * case, we don't have to read the block because we may
3650 * read the old inode data successfully.
3651 */
3652 if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
3653 set_buffer_uptodate(bh);
3654
3593 if (buffer_uptodate(bh)) { 3655 if (buffer_uptodate(bh)) {
3594 /* someone brought it uptodate while we waited */ 3656 /* someone brought it uptodate while we waited */
3595 unlock_buffer(bh); 3657 unlock_buffer(bh);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d141a25bbee..865e9ddb44d4 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
787 if (bh_uptodate_or_lock(bh[i])) 787 if (bh_uptodate_or_lock(bh[i]))
788 continue; 788 continue;
789 789
790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
790 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
791 ext4_init_block_bitmap(sb, bh[i], 792 ext4_init_block_bitmap(sb, bh[i],
792 first_group + i, desc); 793 first_group + i, desc);
793 set_buffer_uptodate(bh[i]); 794 set_buffer_uptodate(bh[i]);
794 unlock_buffer(bh[i]); 795 unlock_buffer(bh[i]);
796 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
795 continue; 797 continue;
796 } 798 }
799 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
797 get_bh(bh[i]); 800 get_bh(bh[i]);
798 bh[i]->b_end_io = end_buffer_read_sync; 801 bh[i]->b_end_io = end_buffer_read_sync;
799 submit_bh(READ, bh[i]); 802 submit_bh(READ, bh[i]);
@@ -2477,7 +2480,7 @@ err_freesgi:
2477int ext4_mb_init(struct super_block *sb, int needs_recovery) 2480int ext4_mb_init(struct super_block *sb, int needs_recovery)
2478{ 2481{
2479 struct ext4_sb_info *sbi = EXT4_SB(sb); 2482 struct ext4_sb_info *sbi = EXT4_SB(sb);
2480 unsigned i; 2483 unsigned i, j;
2481 unsigned offset; 2484 unsigned offset;
2482 unsigned max; 2485 unsigned max;
2483 int ret; 2486 int ret;
@@ -2537,7 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2537 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; 2540 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
2538 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2541 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2539 2542
2540 i = sizeof(struct ext4_locality_group) * NR_CPUS; 2543 i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
2541 sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); 2544 sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
2542 if (sbi->s_locality_groups == NULL) { 2545 if (sbi->s_locality_groups == NULL) {
2543 clear_opt(sbi->s_mount_opt, MBALLOC); 2546 clear_opt(sbi->s_mount_opt, MBALLOC);
@@ -2545,11 +2548,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2545 kfree(sbi->s_mb_maxs); 2548 kfree(sbi->s_mb_maxs);
2546 return -ENOMEM; 2549 return -ENOMEM;
2547 } 2550 }
2548 for (i = 0; i < NR_CPUS; i++) { 2551 for (i = 0; i < nr_cpu_ids; i++) {
2549 struct ext4_locality_group *lg; 2552 struct ext4_locality_group *lg;
2550 lg = &sbi->s_locality_groups[i]; 2553 lg = &sbi->s_locality_groups[i];
2551 mutex_init(&lg->lg_mutex); 2554 mutex_init(&lg->lg_mutex);
2552 INIT_LIST_HEAD(&lg->lg_prealloc_list); 2555 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2556 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2553 spin_lock_init(&lg->lg_prealloc_lock); 2557 spin_lock_init(&lg->lg_prealloc_lock);
2554 } 2558 }
2555 2559
@@ -3260,6 +3264,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3260 struct ext4_prealloc_space *pa) 3264 struct ext4_prealloc_space *pa)
3261{ 3265{
3262 unsigned int len = ac->ac_o_ex.fe_len; 3266 unsigned int len = ac->ac_o_ex.fe_len;
3267
3263 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, 3268 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3264 &ac->ac_b_ex.fe_group, 3269 &ac->ac_b_ex.fe_group,
3265 &ac->ac_b_ex.fe_start); 3270 &ac->ac_b_ex.fe_start);
@@ -3282,6 +3287,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3282static noinline_for_stack int 3287static noinline_for_stack int
3283ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3288ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3284{ 3289{
3290 int order, i;
3285 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3291 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3286 struct ext4_locality_group *lg; 3292 struct ext4_locality_group *lg;
3287 struct ext4_prealloc_space *pa; 3293 struct ext4_prealloc_space *pa;
@@ -3322,22 +3328,29 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3322 lg = ac->ac_lg; 3328 lg = ac->ac_lg;
3323 if (lg == NULL) 3329 if (lg == NULL)
3324 return 0; 3330 return 0;
3325 3331 order = fls(ac->ac_o_ex.fe_len) - 1;
3326 rcu_read_lock(); 3332 if (order > PREALLOC_TB_SIZE - 1)
3327 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) { 3333 /* The max size of hash table is PREALLOC_TB_SIZE */
3328 spin_lock(&pa->pa_lock); 3334 order = PREALLOC_TB_SIZE - 1;
3329 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { 3335
3330 atomic_inc(&pa->pa_count); 3336 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3331 ext4_mb_use_group_pa(ac, pa); 3337 rcu_read_lock();
3338 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3339 pa_inode_list) {
3340 spin_lock(&pa->pa_lock);
3341 if (pa->pa_deleted == 0 &&
3342 pa->pa_free >= ac->ac_o_ex.fe_len) {
3343 atomic_inc(&pa->pa_count);
3344 ext4_mb_use_group_pa(ac, pa);
3345 spin_unlock(&pa->pa_lock);
3346 ac->ac_criteria = 20;
3347 rcu_read_unlock();
3348 return 1;
3349 }
3332 spin_unlock(&pa->pa_lock); 3350 spin_unlock(&pa->pa_lock);
3333 ac->ac_criteria = 20;
3334 rcu_read_unlock();
3335 return 1;
3336 } 3351 }
3337 spin_unlock(&pa->pa_lock); 3352 rcu_read_unlock();
3338 } 3353 }
3339 rcu_read_unlock();
3340
3341 return 0; 3354 return 0;
3342} 3355}
3343 3356
@@ -3560,6 +3573,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3560 pa->pa_free = pa->pa_len; 3573 pa->pa_free = pa->pa_len;
3561 atomic_set(&pa->pa_count, 1); 3574 atomic_set(&pa->pa_count, 1);
3562 spin_lock_init(&pa->pa_lock); 3575 spin_lock_init(&pa->pa_lock);
3576 INIT_LIST_HEAD(&pa->pa_inode_list);
3563 pa->pa_deleted = 0; 3577 pa->pa_deleted = 0;
3564 pa->pa_linear = 1; 3578 pa->pa_linear = 1;
3565 3579
@@ -3580,10 +3594,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3580 list_add(&pa->pa_group_list, &grp->bb_prealloc_list); 3594 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3581 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 3595 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3582 3596
3583 spin_lock(pa->pa_obj_lock); 3597 /*
3584 list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list); 3598 * We will later add the new pa to the right bucket
3585 spin_unlock(pa->pa_obj_lock); 3599 * after updating the pa_free in ext4_mb_release_context
3586 3600 */
3587 return 0; 3601 return 0;
3588} 3602}
3589 3603
@@ -3733,20 +3747,23 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3733 3747
3734 bitmap_bh = ext4_read_block_bitmap(sb, group); 3748 bitmap_bh = ext4_read_block_bitmap(sb, group);
3735 if (bitmap_bh == NULL) { 3749 if (bitmap_bh == NULL) {
3736 /* error handling here */ 3750 ext4_error(sb, __func__, "Error in reading block "
3737 ext4_mb_release_desc(&e4b); 3751 "bitmap for %lu\n", group);
3738 BUG_ON(bitmap_bh == NULL); 3752 return 0;
3739 } 3753 }
3740 3754
3741 err = ext4_mb_load_buddy(sb, group, &e4b); 3755 err = ext4_mb_load_buddy(sb, group, &e4b);
3742 BUG_ON(err != 0); /* error handling here */ 3756 if (err) {
3757 ext4_error(sb, __func__, "Error in loading buddy "
3758 "information for %lu\n", group);
3759 put_bh(bitmap_bh);
3760 return 0;
3761 }
3743 3762
3744 if (needed == 0) 3763 if (needed == 0)
3745 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3764 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3746 3765
3747 grp = ext4_get_group_info(sb, group);
3748 INIT_LIST_HEAD(&list); 3766 INIT_LIST_HEAD(&list);
3749
3750 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3767 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3751repeat: 3768repeat:
3752 ext4_lock_group(sb, group); 3769 ext4_lock_group(sb, group);
@@ -3903,13 +3920,18 @@ repeat:
3903 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); 3920 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
3904 3921
3905 err = ext4_mb_load_buddy(sb, group, &e4b); 3922 err = ext4_mb_load_buddy(sb, group, &e4b);
3906 BUG_ON(err != 0); /* error handling here */ 3923 if (err) {
3924 ext4_error(sb, __func__, "Error in loading buddy "
3925 "information for %lu\n", group);
3926 continue;
3927 }
3907 3928
3908 bitmap_bh = ext4_read_block_bitmap(sb, group); 3929 bitmap_bh = ext4_read_block_bitmap(sb, group);
3909 if (bitmap_bh == NULL) { 3930 if (bitmap_bh == NULL) {
3910 /* error handling here */ 3931 ext4_error(sb, __func__, "Error in reading block "
3932 "bitmap for %lu\n", group);
3911 ext4_mb_release_desc(&e4b); 3933 ext4_mb_release_desc(&e4b);
3912 BUG_ON(bitmap_bh == NULL); 3934 continue;
3913 } 3935 }
3914 3936
3915 ext4_lock_group(sb, group); 3937 ext4_lock_group(sb, group);
@@ -4112,22 +4134,168 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4112 4134
4113} 4135}
4114 4136
4137static noinline_for_stack void
4138ext4_mb_discard_lg_preallocations(struct super_block *sb,
4139 struct ext4_locality_group *lg,
4140 int order, int total_entries)
4141{
4142 ext4_group_t group = 0;
4143 struct ext4_buddy e4b;
4144 struct list_head discard_list;
4145 struct ext4_prealloc_space *pa, *tmp;
4146 struct ext4_allocation_context *ac;
4147
4148 mb_debug("discard locality group preallocation\n");
4149
4150 INIT_LIST_HEAD(&discard_list);
4151 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4152
4153 spin_lock(&lg->lg_prealloc_lock);
4154 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4155 pa_inode_list) {
4156 spin_lock(&pa->pa_lock);
4157 if (atomic_read(&pa->pa_count)) {
4158 /*
4159 * This is the pa that we just used
4160 * for block allocation. So don't
4161 * free that
4162 */
4163 spin_unlock(&pa->pa_lock);
4164 continue;
4165 }
4166 if (pa->pa_deleted) {
4167 spin_unlock(&pa->pa_lock);
4168 continue;
4169 }
4170 /* only lg prealloc space */
4171 BUG_ON(!pa->pa_linear);
4172
4173 /* seems this one can be freed ... */
4174 pa->pa_deleted = 1;
4175 spin_unlock(&pa->pa_lock);
4176
4177 list_del_rcu(&pa->pa_inode_list);
4178 list_add(&pa->u.pa_tmp_list, &discard_list);
4179
4180 total_entries--;
4181 if (total_entries <= 5) {
4182 /*
4183 * we want to keep only 5 entries
4184 * allowing it to grow to 8. This
4185 * mak sure we don't call discard
4186 * soon for this list.
4187 */
4188 break;
4189 }
4190 }
4191 spin_unlock(&lg->lg_prealloc_lock);
4192
4193 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4194
4195 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4196 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4197 ext4_error(sb, __func__, "Error in loading buddy "
4198 "information for %lu\n", group);
4199 continue;
4200 }
4201 ext4_lock_group(sb, group);
4202 list_del(&pa->pa_group_list);
4203 ext4_mb_release_group_pa(&e4b, pa, ac);
4204 ext4_unlock_group(sb, group);
4205
4206 ext4_mb_release_desc(&e4b);
4207 list_del(&pa->u.pa_tmp_list);
4208 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4209 }
4210 if (ac)
4211 kmem_cache_free(ext4_ac_cachep, ac);
4212}
4213
4214/*
4215 * We have incremented pa_count. So it cannot be freed at this
4216 * point. Also we hold lg_mutex. So no parallel allocation is
4217 * possible from this lg. That means pa_free cannot be updated.
4218 *
4219 * A parallel ext4_mb_discard_group_preallocations is possible.
4220 * which can cause the lg_prealloc_list to be updated.
4221 */
4222
4223static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4224{
4225 int order, added = 0, lg_prealloc_count = 1;
4226 struct super_block *sb = ac->ac_sb;
4227 struct ext4_locality_group *lg = ac->ac_lg;
4228 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4229
4230 order = fls(pa->pa_free) - 1;
4231 if (order > PREALLOC_TB_SIZE - 1)
4232 /* The max size of hash table is PREALLOC_TB_SIZE */
4233 order = PREALLOC_TB_SIZE - 1;
4234 /* Add the prealloc space to lg */
4235 rcu_read_lock();
4236 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4237 pa_inode_list) {
4238 spin_lock(&tmp_pa->pa_lock);
4239 if (tmp_pa->pa_deleted) {
4240 spin_unlock(&pa->pa_lock);
4241 continue;
4242 }
4243 if (!added && pa->pa_free < tmp_pa->pa_free) {
4244 /* Add to the tail of the previous entry */
4245 list_add_tail_rcu(&pa->pa_inode_list,
4246 &tmp_pa->pa_inode_list);
4247 added = 1;
4248 /*
4249 * we want to count the total
4250 * number of entries in the list
4251 */
4252 }
4253 spin_unlock(&tmp_pa->pa_lock);
4254 lg_prealloc_count++;
4255 }
4256 if (!added)
4257 list_add_tail_rcu(&pa->pa_inode_list,
4258 &lg->lg_prealloc_list[order]);
4259 rcu_read_unlock();
4260
4261 /* Now trim the list to be not more than 8 elements */
4262 if (lg_prealloc_count > 8) {
4263 ext4_mb_discard_lg_preallocations(sb, lg,
4264 order, lg_prealloc_count);
4265 return;
4266 }
4267 return ;
4268}
4269
4115/* 4270/*
4116 * release all resource we used in allocation 4271 * release all resource we used in allocation
4117 */ 4272 */
4118static int ext4_mb_release_context(struct ext4_allocation_context *ac) 4273static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4119{ 4274{
4120 if (ac->ac_pa) { 4275 struct ext4_prealloc_space *pa = ac->ac_pa;
4121 if (ac->ac_pa->pa_linear) { 4276 if (pa) {
4277 if (pa->pa_linear) {
4122 /* see comment in ext4_mb_use_group_pa() */ 4278 /* see comment in ext4_mb_use_group_pa() */
4123 spin_lock(&ac->ac_pa->pa_lock); 4279 spin_lock(&pa->pa_lock);
4124 ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len; 4280 pa->pa_pstart += ac->ac_b_ex.fe_len;
4125 ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len; 4281 pa->pa_lstart += ac->ac_b_ex.fe_len;
4126 ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len; 4282 pa->pa_free -= ac->ac_b_ex.fe_len;
4127 ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len; 4283 pa->pa_len -= ac->ac_b_ex.fe_len;
4128 spin_unlock(&ac->ac_pa->pa_lock); 4284 spin_unlock(&pa->pa_lock);
4285 /*
4286 * We want to add the pa to the right bucket.
4287 * Remove it from the list and while adding
4288 * make sure the list to which we are adding
4289 * doesn't grow big.
4290 */
4291 if (likely(pa->pa_free)) {
4292 spin_lock(pa->pa_obj_lock);
4293 list_del_rcu(&pa->pa_inode_list);
4294 spin_unlock(pa->pa_obj_lock);
4295 ext4_mb_add_n_trim(ac);
4296 }
4129 } 4297 }
4130 ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa); 4298 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4131 } 4299 }
4132 if (ac->ac_bitmap_page) 4300 if (ac->ac_bitmap_page)
4133 page_cache_release(ac->ac_bitmap_page); 4301 page_cache_release(ac->ac_bitmap_page);
@@ -4420,11 +4588,15 @@ do_more:
4420 count -= overflow; 4588 count -= overflow;
4421 } 4589 }
4422 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 4590 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4423 if (!bitmap_bh) 4591 if (!bitmap_bh) {
4592 err = -EIO;
4424 goto error_return; 4593 goto error_return;
4594 }
4425 gdp = ext4_get_group_desc(sb, block_group, &gd_bh); 4595 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4426 if (!gdp) 4596 if (!gdp) {
4597 err = -EIO;
4427 goto error_return; 4598 goto error_return;
4599 }
4428 4600
4429 if (in_range(ext4_block_bitmap(sb, gdp), block, count) || 4601 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4430 in_range(ext4_inode_bitmap(sb, gdp), block, count) || 4602 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index bfe6add46bcf..c7c9906c2a75 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -164,11 +164,17 @@ struct ext4_free_extent {
164 * Locality group: 164 * Locality group:
165 * we try to group all related changes together 165 * we try to group all related changes together
166 * so that writeback can flush/allocate them together as well 166 * so that writeback can flush/allocate them together as well
167 * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
168 * (512). We store prealloc space into the hash based on the pa_free blocks
169 * order value.ie, fls(pa_free)-1;
167 */ 170 */
171#define PREALLOC_TB_SIZE 10
168struct ext4_locality_group { 172struct ext4_locality_group {
169 /* for allocator */ 173 /* for allocator */
170 struct mutex lg_mutex; /* to serialize allocates */ 174 /* to serialize allocates */
171 struct list_head lg_prealloc_list;/* list of preallocations */ 175 struct mutex lg_mutex;
176 /* list of preallocations */
177 struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
172 spinlock_t lg_prealloc_lock; 178 spinlock_t lg_prealloc_lock;
173}; 179};
174 180
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f000fbe2cd93..0a9265164265 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -73,7 +73,7 @@ static int verify_group_input(struct super_block *sb,
73 "Inode bitmap not in group (block %llu)", 73 "Inode bitmap not in group (block %llu)",
74 (unsigned long long)input->inode_bitmap); 74 (unsigned long long)input->inode_bitmap);
75 else if (outside(input->inode_table, start, end) || 75 else if (outside(input->inode_table, start, end) ||
76 outside(itend - 1, start, end)) 76 outside(itend - 1, start, end))
77 ext4_warning(sb, __func__, 77 ext4_warning(sb, __func__,
78 "Inode table not in group (blocks %llu-%llu)", 78 "Inode table not in group (blocks %llu-%llu)",
79 (unsigned long long)input->inode_table, itend - 1); 79 (unsigned long long)input->inode_table, itend - 1);
@@ -104,7 +104,7 @@ static int verify_group_input(struct super_block *sb,
104 (unsigned long long)input->inode_bitmap, 104 (unsigned long long)input->inode_bitmap,
105 start, metaend - 1); 105 start, metaend - 1);
106 else if (inside(input->inode_table, start, metaend) || 106 else if (inside(input->inode_table, start, metaend) ||
107 inside(itend - 1, start, metaend)) 107 inside(itend - 1, start, metaend))
108 ext4_warning(sb, __func__, 108 ext4_warning(sb, __func__,
109 "Inode table (%llu-%llu) overlaps" 109 "Inode table (%llu-%llu) overlaps"
110 "GDT table (%llu-%llu)", 110 "GDT table (%llu-%llu)",
@@ -158,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
158 if (err) { 158 if (err) {
159 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) 159 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
160 return err; 160 return err;
161 if ((err = ext4_journal_get_write_access(handle, bh))) 161 if ((err = ext4_journal_get_write_access(handle, bh)))
162 return err; 162 return err;
163 } 163 }
164 164
165 return 0; 165 return 0;
166} 166}
@@ -416,11 +416,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", 416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
417 gdb_num); 417 gdb_num);
418 418
419 /* 419 /*
420 * If we are not using the primary superblock/GDT copy don't resize, 420 * If we are not using the primary superblock/GDT copy don't resize,
421 * because the user tools have no way of handling this. Probably a 421 * because the user tools have no way of handling this. Probably a
422 * bad time to do it anyways. 422 * bad time to do it anyways.
423 */ 423 */
424 if (EXT4_SB(sb)->s_sbh->b_blocknr != 424 if (EXT4_SB(sb)->s_sbh->b_blocknr !=
425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
426 ext4_warning(sb, __func__, 426 ext4_warning(sb, __func__,
@@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
507 return 0; 507 return 0;
508 508
509exit_inode: 509exit_inode:
510 //ext4_journal_release_buffer(handle, iloc.bh); 510 /* ext4_journal_release_buffer(handle, iloc.bh); */
511 brelse(iloc.bh); 511 brelse(iloc.bh);
512exit_dindj: 512exit_dindj:
513 //ext4_journal_release_buffer(handle, dind); 513 /* ext4_journal_release_buffer(handle, dind); */
514exit_primary: 514exit_primary:
515 //ext4_journal_release_buffer(handle, *primary); 515 /* ext4_journal_release_buffer(handle, *primary); */
516exit_sbh: 516exit_sbh:
517 //ext4_journal_release_buffer(handle, *primary); 517 /* ext4_journal_release_buffer(handle, *primary); */
518exit_dind: 518exit_dind:
519 brelse(dind); 519 brelse(dind);
520exit_bh: 520exit_bh:
@@ -818,12 +818,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
818 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 818 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
819 goto exit_journal; 819 goto exit_journal;
820 820
821 /* 821 /*
822 * We will only either add reserved group blocks to a backup group 822 * We will only either add reserved group blocks to a backup group
823 * or remove reserved blocks for the first group in a new group block. 823 * or remove reserved blocks for the first group in a new group block.
824 * Doing both would be mean more complex code, and sane people don't 824 * Doing both would be mean more complex code, and sane people don't
825 * use non-sparse filesystems anymore. This is already checked above. 825 * use non-sparse filesystems anymore. This is already checked above.
826 */ 826 */
827 if (gdb_off) { 827 if (gdb_off) {
828 primary = sbi->s_group_desc[gdb_num]; 828 primary = sbi->s_group_desc[gdb_num];
829 if ((err = ext4_journal_get_write_access(handle, primary))) 829 if ((err = ext4_journal_get_write_access(handle, primary)))
@@ -835,24 +835,24 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
835 } else if ((err = add_new_gdb(handle, inode, input, &primary))) 835 } else if ((err = add_new_gdb(handle, inode, input, &primary)))
836 goto exit_journal; 836 goto exit_journal;
837 837
838 /* 838 /*
839 * OK, now we've set up the new group. Time to make it active. 839 * OK, now we've set up the new group. Time to make it active.
840 * 840 *
841 * Current kernels don't lock all allocations via lock_super(), 841 * Current kernels don't lock all allocations via lock_super(),
842 * so we have to be safe wrt. concurrent accesses the group 842 * so we have to be safe wrt. concurrent accesses the group
843 * data. So we need to be careful to set all of the relevant 843 * data. So we need to be careful to set all of the relevant
844 * group descriptor data etc. *before* we enable the group. 844 * group descriptor data etc. *before* we enable the group.
845 * 845 *
846 * The key field here is sbi->s_groups_count: as long as 846 * The key field here is sbi->s_groups_count: as long as
847 * that retains its old value, nobody is going to access the new 847 * that retains its old value, nobody is going to access the new
848 * group. 848 * group.
849 * 849 *
850 * So first we update all the descriptor metadata for the new 850 * So first we update all the descriptor metadata for the new
851 * group; then we update the total disk blocks count; then we 851 * group; then we update the total disk blocks count; then we
852 * update the groups count to enable the group; then finally we 852 * update the groups count to enable the group; then finally we
853 * update the free space counts so that the system can start 853 * update the free space counts so that the system can start
854 * using the new disk blocks. 854 * using the new disk blocks.
855 */ 855 */
856 856
857 /* Update group descriptor block for new group */ 857 /* Update group descriptor block for new group */
858 gdp = (struct ext4_group_desc *)((char *)primary->b_data + 858 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
@@ -946,7 +946,8 @@ exit_put:
946 return err; 946 return err;
947} /* ext4_group_add */ 947} /* ext4_group_add */
948 948
949/* Extend the filesystem to the new number of blocks specified. This entry 949/*
950 * Extend the filesystem to the new number of blocks specified. This entry
950 * point is only used to extend the current filesystem to the end of the last 951 * point is only used to extend the current filesystem to the end of the last
951 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" 952 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
952 * for emergencies (because it has no dependencies on reserved blocks). 953 * for emergencies (because it has no dependencies on reserved blocks).
@@ -1024,7 +1025,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1024 o_blocks_count + add, add); 1025 o_blocks_count + add, add);
1025 1026
1026 /* See if the device is actually as big as what was requested */ 1027 /* See if the device is actually as big as what was requested */
1027 bh = sb_bread(sb, o_blocks_count + add -1); 1028 bh = sb_bread(sb, o_blocks_count + add - 1);
1028 if (!bh) { 1029 if (!bh) {
1029 ext4_warning(sb, __func__, 1030 ext4_warning(sb, __func__,
1030 "can't read last block, resize aborted"); 1031 "can't read last block, resize aborted");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1e69f29a8c55..d5d77958b861 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -49,20 +49,19 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
49 unsigned long journal_devnum); 49 unsigned long journal_devnum);
50static int ext4_create_journal(struct super_block *, struct ext4_super_block *, 50static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
51 unsigned int); 51 unsigned int);
52static void ext4_commit_super (struct super_block * sb, 52static void ext4_commit_super(struct super_block *sb,
53 struct ext4_super_block * es, 53 struct ext4_super_block *es, int sync);
54 int sync); 54static void ext4_mark_recovery_complete(struct super_block *sb,
55static void ext4_mark_recovery_complete(struct super_block * sb, 55 struct ext4_super_block *es);
56 struct ext4_super_block * es); 56static void ext4_clear_journal_err(struct super_block *sb,
57static void ext4_clear_journal_err(struct super_block * sb, 57 struct ext4_super_block *es);
58 struct ext4_super_block * es);
59static int ext4_sync_fs(struct super_block *sb, int wait); 58static int ext4_sync_fs(struct super_block *sb, int wait);
60static const char *ext4_decode_error(struct super_block * sb, int errno, 59static const char *ext4_decode_error(struct super_block *sb, int errno,
61 char nbuf[16]); 60 char nbuf[16]);
62static int ext4_remount (struct super_block * sb, int * flags, char * data); 61static int ext4_remount(struct super_block *sb, int *flags, char *data);
63static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf); 62static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
64static void ext4_unlockfs(struct super_block *sb); 63static void ext4_unlockfs(struct super_block *sb);
65static void ext4_write_super (struct super_block * sb); 64static void ext4_write_super(struct super_block *sb);
66static void ext4_write_super_lockfs(struct super_block *sb); 65static void ext4_write_super_lockfs(struct super_block *sb);
67 66
68 67
@@ -211,15 +210,15 @@ static void ext4_handle_error(struct super_block *sb)
211 if (sb->s_flags & MS_RDONLY) 210 if (sb->s_flags & MS_RDONLY)
212 return; 211 return;
213 212
214 if (!test_opt (sb, ERRORS_CONT)) { 213 if (!test_opt(sb, ERRORS_CONT)) {
215 journal_t *journal = EXT4_SB(sb)->s_journal; 214 journal_t *journal = EXT4_SB(sb)->s_journal;
216 215
217 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 216 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
218 if (journal) 217 if (journal)
219 jbd2_journal_abort(journal, -EIO); 218 jbd2_journal_abort(journal, -EIO);
220 } 219 }
221 if (test_opt (sb, ERRORS_RO)) { 220 if (test_opt(sb, ERRORS_RO)) {
222 printk (KERN_CRIT "Remounting filesystem read-only\n"); 221 printk(KERN_CRIT "Remounting filesystem read-only\n");
223 sb->s_flags |= MS_RDONLY; 222 sb->s_flags |= MS_RDONLY;
224 } 223 }
225 ext4_commit_super(sb, es, 1); 224 ext4_commit_super(sb, es, 1);
@@ -228,13 +227,13 @@ static void ext4_handle_error(struct super_block *sb)
228 sb->s_id); 227 sb->s_id);
229} 228}
230 229
231void ext4_error (struct super_block * sb, const char * function, 230void ext4_error(struct super_block *sb, const char *function,
232 const char * fmt, ...) 231 const char *fmt, ...)
233{ 232{
234 va_list args; 233 va_list args;
235 234
236 va_start(args, fmt); 235 va_start(args, fmt);
237 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 236 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
238 vprintk(fmt, args); 237 vprintk(fmt, args);
239 printk("\n"); 238 printk("\n");
240 va_end(args); 239 va_end(args);
@@ -242,7 +241,7 @@ void ext4_error (struct super_block * sb, const char * function,
242 ext4_handle_error(sb); 241 ext4_handle_error(sb);
243} 242}
244 243
245static const char *ext4_decode_error(struct super_block * sb, int errno, 244static const char *ext4_decode_error(struct super_block *sb, int errno,
246 char nbuf[16]) 245 char nbuf[16])
247{ 246{
248 char *errstr = NULL; 247 char *errstr = NULL;
@@ -278,8 +277,7 @@ static const char *ext4_decode_error(struct super_block * sb, int errno,
278/* __ext4_std_error decodes expected errors from journaling functions 277/* __ext4_std_error decodes expected errors from journaling functions
279 * automatically and invokes the appropriate error response. */ 278 * automatically and invokes the appropriate error response. */
280 279
281void __ext4_std_error (struct super_block * sb, const char * function, 280void __ext4_std_error(struct super_block *sb, const char *function, int errno)
282 int errno)
283{ 281{
284 char nbuf[16]; 282 char nbuf[16];
285 const char *errstr; 283 const char *errstr;
@@ -292,8 +290,8 @@ void __ext4_std_error (struct super_block * sb, const char * function,
292 return; 290 return;
293 291
294 errstr = ext4_decode_error(sb, errno, nbuf); 292 errstr = ext4_decode_error(sb, errno, nbuf);
295 printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 293 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
296 sb->s_id, function, errstr); 294 sb->s_id, function, errstr);
297 295
298 ext4_handle_error(sb); 296 ext4_handle_error(sb);
299} 297}
@@ -308,15 +306,15 @@ void __ext4_std_error (struct super_block * sb, const char * function,
308 * case we take the easy way out and panic immediately. 306 * case we take the easy way out and panic immediately.
309 */ 307 */
310 308
311void ext4_abort (struct super_block * sb, const char * function, 309void ext4_abort(struct super_block *sb, const char *function,
312 const char * fmt, ...) 310 const char *fmt, ...)
313{ 311{
314 va_list args; 312 va_list args;
315 313
316 printk (KERN_CRIT "ext4_abort called.\n"); 314 printk(KERN_CRIT "ext4_abort called.\n");
317 315
318 va_start(args, fmt); 316 va_start(args, fmt);
319 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 317 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
320 vprintk(fmt, args); 318 vprintk(fmt, args);
321 printk("\n"); 319 printk("\n");
322 va_end(args); 320 va_end(args);
@@ -334,8 +332,8 @@ void ext4_abort (struct super_block * sb, const char * function,
334 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 332 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
335} 333}
336 334
337void ext4_warning (struct super_block * sb, const char * function, 335void ext4_warning(struct super_block *sb, const char *function,
338 const char * fmt, ...) 336 const char *fmt, ...)
339{ 337{
340 va_list args; 338 va_list args;
341 339
@@ -496,7 +494,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
496 } 494 }
497} 495}
498 496
499static void ext4_put_super (struct super_block * sb) 497static void ext4_put_super(struct super_block *sb)
500{ 498{
501 struct ext4_sb_info *sbi = EXT4_SB(sb); 499 struct ext4_sb_info *sbi = EXT4_SB(sb);
502 struct ext4_super_block *es = sbi->s_es; 500 struct ext4_super_block *es = sbi->s_es;
@@ -647,7 +645,8 @@ static void ext4_clear_inode(struct inode *inode)
647 &EXT4_I(inode)->jinode); 645 &EXT4_I(inode)->jinode);
648} 646}
649 647
650static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) 648static inline void ext4_show_quota_options(struct seq_file *seq,
649 struct super_block *sb)
651{ 650{
652#if defined(CONFIG_QUOTA) 651#if defined(CONFIG_QUOTA)
653 struct ext4_sb_info *sbi = EXT4_SB(sb); 652 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -822,8 +821,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
822} 821}
823 822
824#ifdef CONFIG_QUOTA 823#ifdef CONFIG_QUOTA
825#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 824#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
826#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 825#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
827 826
828static int ext4_dquot_initialize(struct inode *inode, int type); 827static int ext4_dquot_initialize(struct inode *inode, int type);
829static int ext4_dquot_drop(struct inode *inode); 828static int ext4_dquot_drop(struct inode *inode);
@@ -991,12 +990,12 @@ static ext4_fsblk_t get_sb_block(void **data)
991 return sb_block; 990 return sb_block;
992} 991}
993 992
994static int parse_options (char *options, struct super_block *sb, 993static int parse_options(char *options, struct super_block *sb,
995 unsigned int *inum, unsigned long *journal_devnum, 994 unsigned int *inum, unsigned long *journal_devnum,
996 ext4_fsblk_t *n_blocks_count, int is_remount) 995 ext4_fsblk_t *n_blocks_count, int is_remount)
997{ 996{
998 struct ext4_sb_info *sbi = EXT4_SB(sb); 997 struct ext4_sb_info *sbi = EXT4_SB(sb);
999 char * p; 998 char *p;
1000 substring_t args[MAX_OPT_ARGS]; 999 substring_t args[MAX_OPT_ARGS];
1001 int data_opt = 0; 1000 int data_opt = 0;
1002 int option; 1001 int option;
@@ -1009,7 +1008,7 @@ static int parse_options (char *options, struct super_block *sb,
1009 if (!options) 1008 if (!options)
1010 return 1; 1009 return 1;
1011 1010
1012 while ((p = strsep (&options, ",")) != NULL) { 1011 while ((p = strsep(&options, ",")) != NULL) {
1013 int token; 1012 int token;
1014 if (!*p) 1013 if (!*p)
1015 continue; 1014 continue;
@@ -1017,16 +1016,16 @@ static int parse_options (char *options, struct super_block *sb,
1017 token = match_token(p, tokens, args); 1016 token = match_token(p, tokens, args);
1018 switch (token) { 1017 switch (token) {
1019 case Opt_bsd_df: 1018 case Opt_bsd_df:
1020 clear_opt (sbi->s_mount_opt, MINIX_DF); 1019 clear_opt(sbi->s_mount_opt, MINIX_DF);
1021 break; 1020 break;
1022 case Opt_minix_df: 1021 case Opt_minix_df:
1023 set_opt (sbi->s_mount_opt, MINIX_DF); 1022 set_opt(sbi->s_mount_opt, MINIX_DF);
1024 break; 1023 break;
1025 case Opt_grpid: 1024 case Opt_grpid:
1026 set_opt (sbi->s_mount_opt, GRPID); 1025 set_opt(sbi->s_mount_opt, GRPID);
1027 break; 1026 break;
1028 case Opt_nogrpid: 1027 case Opt_nogrpid:
1029 clear_opt (sbi->s_mount_opt, GRPID); 1028 clear_opt(sbi->s_mount_opt, GRPID);
1030 break; 1029 break;
1031 case Opt_resuid: 1030 case Opt_resuid:
1032 if (match_int(&args[0], &option)) 1031 if (match_int(&args[0], &option))
@@ -1043,41 +1042,41 @@ static int parse_options (char *options, struct super_block *sb,
1043 /* *sb_block = match_int(&args[0]); */ 1042 /* *sb_block = match_int(&args[0]); */
1044 break; 1043 break;
1045 case Opt_err_panic: 1044 case Opt_err_panic:
1046 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1045 clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1047 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1046 clear_opt(sbi->s_mount_opt, ERRORS_RO);
1048 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 1047 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1049 break; 1048 break;
1050 case Opt_err_ro: 1049 case Opt_err_ro:
1051 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1050 clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1052 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1051 clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1053 set_opt (sbi->s_mount_opt, ERRORS_RO); 1052 set_opt(sbi->s_mount_opt, ERRORS_RO);
1054 break; 1053 break;
1055 case Opt_err_cont: 1054 case Opt_err_cont:
1056 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1055 clear_opt(sbi->s_mount_opt, ERRORS_RO);
1057 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1056 clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1058 set_opt (sbi->s_mount_opt, ERRORS_CONT); 1057 set_opt(sbi->s_mount_opt, ERRORS_CONT);
1059 break; 1058 break;
1060 case Opt_nouid32: 1059 case Opt_nouid32:
1061 set_opt (sbi->s_mount_opt, NO_UID32); 1060 set_opt(sbi->s_mount_opt, NO_UID32);
1062 break; 1061 break;
1063 case Opt_nocheck: 1062 case Opt_nocheck:
1064 clear_opt (sbi->s_mount_opt, CHECK); 1063 clear_opt(sbi->s_mount_opt, CHECK);
1065 break; 1064 break;
1066 case Opt_debug: 1065 case Opt_debug:
1067 set_opt (sbi->s_mount_opt, DEBUG); 1066 set_opt(sbi->s_mount_opt, DEBUG);
1068 break; 1067 break;
1069 case Opt_oldalloc: 1068 case Opt_oldalloc:
1070 set_opt (sbi->s_mount_opt, OLDALLOC); 1069 set_opt(sbi->s_mount_opt, OLDALLOC);
1071 break; 1070 break;
1072 case Opt_orlov: 1071 case Opt_orlov:
1073 clear_opt (sbi->s_mount_opt, OLDALLOC); 1072 clear_opt(sbi->s_mount_opt, OLDALLOC);
1074 break; 1073 break;
1075#ifdef CONFIG_EXT4DEV_FS_XATTR 1074#ifdef CONFIG_EXT4DEV_FS_XATTR
1076 case Opt_user_xattr: 1075 case Opt_user_xattr:
1077 set_opt (sbi->s_mount_opt, XATTR_USER); 1076 set_opt(sbi->s_mount_opt, XATTR_USER);
1078 break; 1077 break;
1079 case Opt_nouser_xattr: 1078 case Opt_nouser_xattr:
1080 clear_opt (sbi->s_mount_opt, XATTR_USER); 1079 clear_opt(sbi->s_mount_opt, XATTR_USER);
1081 break; 1080 break;
1082#else 1081#else
1083 case Opt_user_xattr: 1082 case Opt_user_xattr:
@@ -1115,7 +1114,7 @@ static int parse_options (char *options, struct super_block *sb,
1115 "journal on remount\n"); 1114 "journal on remount\n");
1116 return 0; 1115 return 0;
1117 } 1116 }
1118 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 1117 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1119 break; 1118 break;
1120 case Opt_journal_inum: 1119 case Opt_journal_inum:
1121 if (is_remount) { 1120 if (is_remount) {
@@ -1145,7 +1144,7 @@ static int parse_options (char *options, struct super_block *sb,
1145 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1144 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1146 break; 1145 break;
1147 case Opt_noload: 1146 case Opt_noload:
1148 set_opt (sbi->s_mount_opt, NOLOAD); 1147 set_opt(sbi->s_mount_opt, NOLOAD);
1149 break; 1148 break;
1150 case Opt_commit: 1149 case Opt_commit:
1151 if (match_int(&args[0], &option)) 1150 if (match_int(&args[0], &option))
@@ -1331,7 +1330,7 @@ set_qf_format:
1331 "on this filesystem, use tune2fs\n"); 1330 "on this filesystem, use tune2fs\n");
1332 return 0; 1331 return 0;
1333 } 1332 }
1334 set_opt (sbi->s_mount_opt, EXTENTS); 1333 set_opt(sbi->s_mount_opt, EXTENTS);
1335 break; 1334 break;
1336 case Opt_noextents: 1335 case Opt_noextents:
1337 /* 1336 /*
@@ -1348,7 +1347,7 @@ set_qf_format:
1348 "-o noextents options\n"); 1347 "-o noextents options\n");
1349 return 0; 1348 return 0;
1350 } 1349 }
1351 clear_opt (sbi->s_mount_opt, EXTENTS); 1350 clear_opt(sbi->s_mount_opt, EXTENTS);
1352 break; 1351 break;
1353 case Opt_i_version: 1352 case Opt_i_version:
1354 set_opt(sbi->s_mount_opt, I_VERSION); 1353 set_opt(sbi->s_mount_opt, I_VERSION);
@@ -1374,9 +1373,9 @@ set_qf_format:
1374 set_opt(sbi->s_mount_opt, DELALLOC); 1373 set_opt(sbi->s_mount_opt, DELALLOC);
1375 break; 1374 break;
1376 default: 1375 default:
1377 printk (KERN_ERR 1376 printk(KERN_ERR
1378 "EXT4-fs: Unrecognized mount option \"%s\" " 1377 "EXT4-fs: Unrecognized mount option \"%s\" "
1379 "or missing value\n", p); 1378 "or missing value\n", p);
1380 return 0; 1379 return 0;
1381 } 1380 }
1382 } 1381 }
@@ -1423,31 +1422,31 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1423 int res = 0; 1422 int res = 0;
1424 1423
1425 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1424 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1426 printk (KERN_ERR "EXT4-fs warning: revision level too high, " 1425 printk(KERN_ERR "EXT4-fs warning: revision level too high, "
1427 "forcing read-only mode\n"); 1426 "forcing read-only mode\n");
1428 res = MS_RDONLY; 1427 res = MS_RDONLY;
1429 } 1428 }
1430 if (read_only) 1429 if (read_only)
1431 return res; 1430 return res;
1432 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1431 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1433 printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1432 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
1434 "running e2fsck is recommended\n"); 1433 "running e2fsck is recommended\n");
1435 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1434 else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1436 printk (KERN_WARNING 1435 printk(KERN_WARNING
1437 "EXT4-fs warning: mounting fs with errors, " 1436 "EXT4-fs warning: mounting fs with errors, "
1438 "running e2fsck is recommended\n"); 1437 "running e2fsck is recommended\n");
1439 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1438 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1440 le16_to_cpu(es->s_mnt_count) >= 1439 le16_to_cpu(es->s_mnt_count) >=
1441 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1440 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1442 printk (KERN_WARNING 1441 printk(KERN_WARNING
1443 "EXT4-fs warning: maximal mount count reached, " 1442 "EXT4-fs warning: maximal mount count reached, "
1444 "running e2fsck is recommended\n"); 1443 "running e2fsck is recommended\n");
1445 else if (le32_to_cpu(es->s_checkinterval) && 1444 else if (le32_to_cpu(es->s_checkinterval) &&
1446 (le32_to_cpu(es->s_lastcheck) + 1445 (le32_to_cpu(es->s_lastcheck) +
1447 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1446 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1448 printk (KERN_WARNING 1447 printk(KERN_WARNING
1449 "EXT4-fs warning: checktime reached, " 1448 "EXT4-fs warning: checktime reached, "
1450 "running e2fsck is recommended\n"); 1449 "running e2fsck is recommended\n");
1451#if 0 1450#if 0
1452 /* @@@ We _will_ want to clear the valid bit if we find 1451 /* @@@ We _will_ want to clear the valid bit if we find
1453 * inconsistencies, to force a fsck at reboot. But for 1452 * inconsistencies, to force a fsck at reboot. But for
@@ -1506,14 +1505,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
1506 1505
1507 flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / 1506 flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
1508 groups_per_flex; 1507 groups_per_flex;
1509 sbi->s_flex_groups = kmalloc(flex_group_count * 1508 sbi->s_flex_groups = kzalloc(flex_group_count *
1510 sizeof(struct flex_groups), GFP_KERNEL); 1509 sizeof(struct flex_groups), GFP_KERNEL);
1511 if (sbi->s_flex_groups == NULL) { 1510 if (sbi->s_flex_groups == NULL) {
1512 printk(KERN_ERR "EXT4-fs: not enough memory\n"); 1511 printk(KERN_ERR "EXT4-fs: not enough memory for "
1512 "%lu flex groups\n", flex_group_count);
1513 goto failed; 1513 goto failed;
1514 } 1514 }
1515 memset(sbi->s_flex_groups, 0, flex_group_count *
1516 sizeof(struct flex_groups));
1517 1515
1518 gdp = ext4_get_group_desc(sb, 1, &bh); 1516 gdp = ext4_get_group_desc(sb, 1, &bh);
1519 block_bitmap = ext4_block_bitmap(sb, gdp) - 1; 1517 block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
@@ -1597,16 +1595,14 @@ static int ext4_check_descriptors(struct super_block *sb)
1597 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1595 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1598 1596
1599 block_bitmap = ext4_block_bitmap(sb, gdp); 1597 block_bitmap = ext4_block_bitmap(sb, gdp);
1600 if (block_bitmap < first_block || block_bitmap > last_block) 1598 if (block_bitmap < first_block || block_bitmap > last_block) {
1601 {
1602 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1599 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1603 "Block bitmap for group %lu not in group " 1600 "Block bitmap for group %lu not in group "
1604 "(block %llu)!", i, block_bitmap); 1601 "(block %llu)!", i, block_bitmap);
1605 return 0; 1602 return 0;
1606 } 1603 }
1607 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1604 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1608 if (inode_bitmap < first_block || inode_bitmap > last_block) 1605 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1609 {
1610 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1606 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1611 "Inode bitmap for group %lu not in group " 1607 "Inode bitmap for group %lu not in group "
1612 "(block %llu)!", i, inode_bitmap); 1608 "(block %llu)!", i, inode_bitmap);
@@ -1614,26 +1610,28 @@ static int ext4_check_descriptors(struct super_block *sb)
1614 } 1610 }
1615 inode_table = ext4_inode_table(sb, gdp); 1611 inode_table = ext4_inode_table(sb, gdp);
1616 if (inode_table < first_block || 1612 if (inode_table < first_block ||
1617 inode_table + sbi->s_itb_per_group - 1 > last_block) 1613 inode_table + sbi->s_itb_per_group - 1 > last_block) {
1618 {
1619 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1614 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1620 "Inode table for group %lu not in group " 1615 "Inode table for group %lu not in group "
1621 "(block %llu)!", i, inode_table); 1616 "(block %llu)!", i, inode_table);
1622 return 0; 1617 return 0;
1623 } 1618 }
1619 spin_lock(sb_bgl_lock(sbi, i));
1624 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1620 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1625 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1621 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1626 "Checksum for group %lu failed (%u!=%u)\n", 1622 "Checksum for group %lu failed (%u!=%u)\n",
1627 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1623 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1628 gdp)), le16_to_cpu(gdp->bg_checksum)); 1624 gdp)), le16_to_cpu(gdp->bg_checksum));
1629 return 0; 1625 if (!(sb->s_flags & MS_RDONLY))
1626 return 0;
1630 } 1627 }
1628 spin_unlock(sb_bgl_lock(sbi, i));
1631 if (!flexbg_flag) 1629 if (!flexbg_flag)
1632 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1630 first_block += EXT4_BLOCKS_PER_GROUP(sb);
1633 } 1631 }
1634 1632
1635 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1633 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1636 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb)); 1634 sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
1637 return 1; 1635 return 1;
1638} 1636}
1639 1637
@@ -1654,8 +1652,8 @@ static int ext4_check_descriptors(struct super_block *sb)
1654 * e2fsck was run on this filesystem, and it must have already done the orphan 1652 * e2fsck was run on this filesystem, and it must have already done the orphan
1655 * inode cleanup for us, so we can safely abort without any further action. 1653 * inode cleanup for us, so we can safely abort without any further action.
1656 */ 1654 */
1657static void ext4_orphan_cleanup (struct super_block * sb, 1655static void ext4_orphan_cleanup(struct super_block *sb,
1658 struct ext4_super_block * es) 1656 struct ext4_super_block *es)
1659{ 1657{
1660 unsigned int s_flags = sb->s_flags; 1658 unsigned int s_flags = sb->s_flags;
1661 int nr_orphans = 0, nr_truncates = 0; 1659 int nr_orphans = 0, nr_truncates = 0;
@@ -1732,7 +1730,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1732 iput(inode); /* The delete magic happens here! */ 1730 iput(inode); /* The delete magic happens here! */
1733 } 1731 }
1734 1732
1735#define PLURAL(x) (x), ((x)==1) ? "" : "s" 1733#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1736 1734
1737 if (nr_orphans) 1735 if (nr_orphans)
1738 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1736 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
@@ -1899,12 +1897,12 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
1899 return 0; 1897 return 0;
1900} 1898}
1901 1899
1902static int ext4_fill_super (struct super_block *sb, void *data, int silent) 1900static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1903 __releases(kernel_lock) 1901 __releases(kernel_lock)
1904 __acquires(kernel_lock) 1902 __acquires(kernel_lock)
1905 1903
1906{ 1904{
1907 struct buffer_head * bh; 1905 struct buffer_head *bh;
1908 struct ext4_super_block *es = NULL; 1906 struct ext4_super_block *es = NULL;
1909 struct ext4_sb_info *sbi; 1907 struct ext4_sb_info *sbi;
1910 ext4_fsblk_t block; 1908 ext4_fsblk_t block;
@@ -1953,7 +1951,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1953 } 1951 }
1954 1952
1955 if (!(bh = sb_bread(sb, logical_sb_block))) { 1953 if (!(bh = sb_bread(sb, logical_sb_block))) {
1956 printk (KERN_ERR "EXT4-fs: unable to read superblock\n"); 1954 printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
1957 goto out_fail; 1955 goto out_fail;
1958 } 1956 }
1959 /* 1957 /*
@@ -2026,8 +2024,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2026 set_opt(sbi->s_mount_opt, DELALLOC); 2024 set_opt(sbi->s_mount_opt, DELALLOC);
2027 2025
2028 2026
2029 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 2027 if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum,
2030 NULL, 0)) 2028 NULL, 0))
2031 goto failed_mount; 2029 goto failed_mount;
2032 2030
2033 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2031 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -2102,7 +2100,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2102 goto failed_mount; 2100 goto failed_mount;
2103 } 2101 }
2104 2102
2105 brelse (bh); 2103 brelse(bh);
2106 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2104 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2107 offset = do_div(logical_sb_block, blocksize); 2105 offset = do_div(logical_sb_block, blocksize);
2108 bh = sb_bread(sb, logical_sb_block); 2106 bh = sb_bread(sb, logical_sb_block);
@@ -2114,8 +2112,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2114 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2112 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2115 sbi->s_es = es; 2113 sbi->s_es = es;
2116 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2114 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2117 printk (KERN_ERR 2115 printk(KERN_ERR
2118 "EXT4-fs: Magic mismatch, very weird !\n"); 2116 "EXT4-fs: Magic mismatch, very weird !\n");
2119 goto failed_mount; 2117 goto failed_mount;
2120 } 2118 }
2121 } 2119 }
@@ -2132,9 +2130,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2132 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2130 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2133 (!is_power_of_2(sbi->s_inode_size)) || 2131 (!is_power_of_2(sbi->s_inode_size)) ||
2134 (sbi->s_inode_size > blocksize)) { 2132 (sbi->s_inode_size > blocksize)) {
2135 printk (KERN_ERR 2133 printk(KERN_ERR
2136 "EXT4-fs: unsupported inode size: %d\n", 2134 "EXT4-fs: unsupported inode size: %d\n",
2137 sbi->s_inode_size); 2135 sbi->s_inode_size);
2138 goto failed_mount; 2136 goto failed_mount;
2139 } 2137 }
2140 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2138 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
@@ -2166,20 +2164,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2166 sbi->s_mount_state = le16_to_cpu(es->s_state); 2164 sbi->s_mount_state = le16_to_cpu(es->s_state);
2167 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2165 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2168 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2166 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2169 for (i=0; i < 4; i++) 2167 for (i = 0; i < 4; i++)
2170 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2168 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2171 sbi->s_def_hash_version = es->s_def_hash_version; 2169 sbi->s_def_hash_version = es->s_def_hash_version;
2172 2170
2173 if (sbi->s_blocks_per_group > blocksize * 8) { 2171 if (sbi->s_blocks_per_group > blocksize * 8) {
2174 printk (KERN_ERR 2172 printk(KERN_ERR
2175 "EXT4-fs: #blocks per group too big: %lu\n", 2173 "EXT4-fs: #blocks per group too big: %lu\n",
2176 sbi->s_blocks_per_group); 2174 sbi->s_blocks_per_group);
2177 goto failed_mount; 2175 goto failed_mount;
2178 } 2176 }
2179 if (sbi->s_inodes_per_group > blocksize * 8) { 2177 if (sbi->s_inodes_per_group > blocksize * 8) {
2180 printk (KERN_ERR 2178 printk(KERN_ERR
2181 "EXT4-fs: #inodes per group too big: %lu\n", 2179 "EXT4-fs: #inodes per group too big: %lu\n",
2182 sbi->s_inodes_per_group); 2180 sbi->s_inodes_per_group);
2183 goto failed_mount; 2181 goto failed_mount;
2184 } 2182 }
2185 2183
@@ -2213,10 +2211,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2213 sbi->s_groups_count = blocks_count; 2211 sbi->s_groups_count = blocks_count;
2214 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2212 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2215 EXT4_DESC_PER_BLOCK(sb); 2213 EXT4_DESC_PER_BLOCK(sb);
2216 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 2214 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2217 GFP_KERNEL); 2215 GFP_KERNEL);
2218 if (sbi->s_group_desc == NULL) { 2216 if (sbi->s_group_desc == NULL) {
2219 printk (KERN_ERR "EXT4-fs: not enough memory\n"); 2217 printk(KERN_ERR "EXT4-fs: not enough memory\n");
2220 goto failed_mount; 2218 goto failed_mount;
2221 } 2219 }
2222 2220
@@ -2226,13 +2224,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2226 block = descriptor_loc(sb, logical_sb_block, i); 2224 block = descriptor_loc(sb, logical_sb_block, i);
2227 sbi->s_group_desc[i] = sb_bread(sb, block); 2225 sbi->s_group_desc[i] = sb_bread(sb, block);
2228 if (!sbi->s_group_desc[i]) { 2226 if (!sbi->s_group_desc[i]) {
2229 printk (KERN_ERR "EXT4-fs: " 2227 printk(KERN_ERR "EXT4-fs: "
2230 "can't read group descriptor %d\n", i); 2228 "can't read group descriptor %d\n", i);
2231 db_count = i; 2229 db_count = i;
2232 goto failed_mount2; 2230 goto failed_mount2;
2233 } 2231 }
2234 } 2232 }
2235 if (!ext4_check_descriptors (sb)) { 2233 if (!ext4_check_descriptors(sb)) {
2236 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 2234 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
2237 goto failed_mount2; 2235 goto failed_mount2;
2238 } 2236 }
@@ -2308,11 +2306,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2308 EXT4_SB(sb)->s_journal->j_failed_commit) { 2306 EXT4_SB(sb)->s_journal->j_failed_commit) {
2309 printk(KERN_CRIT "EXT4-fs error (device %s): " 2307 printk(KERN_CRIT "EXT4-fs error (device %s): "
2310 "ext4_fill_super: Journal transaction " 2308 "ext4_fill_super: Journal transaction "
2311 "%u is corrupt\n", sb->s_id, 2309 "%u is corrupt\n", sb->s_id,
2312 EXT4_SB(sb)->s_journal->j_failed_commit); 2310 EXT4_SB(sb)->s_journal->j_failed_commit);
2313 if (test_opt (sb, ERRORS_RO)) { 2311 if (test_opt(sb, ERRORS_RO)) {
2314 printk (KERN_CRIT 2312 printk(KERN_CRIT
2315 "Mounting filesystem read-only\n"); 2313 "Mounting filesystem read-only\n");
2316 sb->s_flags |= MS_RDONLY; 2314 sb->s_flags |= MS_RDONLY;
2317 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2315 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2318 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2316 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2332,9 +2330,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2332 goto failed_mount3; 2330 goto failed_mount3;
2333 } else { 2331 } else {
2334 if (!silent) 2332 if (!silent)
2335 printk (KERN_ERR 2333 printk(KERN_ERR
2336 "ext4: No journal on filesystem on %s\n", 2334 "ext4: No journal on filesystem on %s\n",
2337 sb->s_id); 2335 sb->s_id);
2338 goto failed_mount3; 2336 goto failed_mount3;
2339 } 2337 }
2340 2338
@@ -2418,7 +2416,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2418 goto failed_mount4; 2416 goto failed_mount4;
2419 } 2417 }
2420 2418
2421 ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2419 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2422 2420
2423 /* determine the minimum size of new large inodes, if present */ 2421 /* determine the minimum size of new large inodes, if present */
2424 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2422 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
@@ -2457,12 +2455,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2457 ext4_orphan_cleanup(sb, es); 2455 ext4_orphan_cleanup(sb, es);
2458 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2456 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2459 if (needs_recovery) 2457 if (needs_recovery)
2460 printk (KERN_INFO "EXT4-fs: recovery complete.\n"); 2458 printk(KERN_INFO "EXT4-fs: recovery complete.\n");
2461 ext4_mark_recovery_complete(sb, es); 2459 ext4_mark_recovery_complete(sb, es);
2462 printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", 2460 printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
2463 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": 2461 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
2464 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 2462 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
2465 "writeback"); 2463 "writeback");
2466 2464
2467 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2465 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2468 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " 2466 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
@@ -2575,14 +2573,14 @@ static journal_t *ext4_get_journal(struct super_block *sb,
2575static journal_t *ext4_get_dev_journal(struct super_block *sb, 2573static journal_t *ext4_get_dev_journal(struct super_block *sb,
2576 dev_t j_dev) 2574 dev_t j_dev)
2577{ 2575{
2578 struct buffer_head * bh; 2576 struct buffer_head *bh;
2579 journal_t *journal; 2577 journal_t *journal;
2580 ext4_fsblk_t start; 2578 ext4_fsblk_t start;
2581 ext4_fsblk_t len; 2579 ext4_fsblk_t len;
2582 int hblock, blocksize; 2580 int hblock, blocksize;
2583 ext4_fsblk_t sb_block; 2581 ext4_fsblk_t sb_block;
2584 unsigned long offset; 2582 unsigned long offset;
2585 struct ext4_super_block * es; 2583 struct ext4_super_block *es;
2586 struct block_device *bdev; 2584 struct block_device *bdev;
2587 2585
2588 bdev = ext4_blkdev_get(j_dev); 2586 bdev = ext4_blkdev_get(j_dev);
@@ -2697,8 +2695,8 @@ static int ext4_load_journal(struct super_block *sb,
2697 "unavailable, cannot proceed.\n"); 2695 "unavailable, cannot proceed.\n");
2698 return -EROFS; 2696 return -EROFS;
2699 } 2697 }
2700 printk (KERN_INFO "EXT4-fs: write access will " 2698 printk(KERN_INFO "EXT4-fs: write access will "
2701 "be enabled during recovery.\n"); 2699 "be enabled during recovery.\n");
2702 } 2700 }
2703 } 2701 }
2704 2702
@@ -2751,8 +2749,8 @@ static int ext4_load_journal(struct super_block *sb,
2751 return 0; 2749 return 0;
2752} 2750}
2753 2751
2754static int ext4_create_journal(struct super_block * sb, 2752static int ext4_create_journal(struct super_block *sb,
2755 struct ext4_super_block * es, 2753 struct ext4_super_block *es,
2756 unsigned int journal_inum) 2754 unsigned int journal_inum)
2757{ 2755{
2758 journal_t *journal; 2756 journal_t *journal;
@@ -2793,9 +2791,8 @@ static int ext4_create_journal(struct super_block * sb,
2793 return 0; 2791 return 0;
2794} 2792}
2795 2793
2796static void ext4_commit_super (struct super_block * sb, 2794static void ext4_commit_super(struct super_block *sb,
2797 struct ext4_super_block * es, 2795 struct ext4_super_block *es, int sync)
2798 int sync)
2799{ 2796{
2800 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 2797 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
2801 2798
@@ -2816,8 +2813,8 @@ static void ext4_commit_super (struct super_block * sb,
2816 * remounting) the filesystem readonly, then we will end up with a 2813 * remounting) the filesystem readonly, then we will end up with a
2817 * consistent fs on disk. Record that fact. 2814 * consistent fs on disk. Record that fact.
2818 */ 2815 */
2819static void ext4_mark_recovery_complete(struct super_block * sb, 2816static void ext4_mark_recovery_complete(struct super_block *sb,
2820 struct ext4_super_block * es) 2817 struct ext4_super_block *es)
2821{ 2818{
2822 journal_t *journal = EXT4_SB(sb)->s_journal; 2819 journal_t *journal = EXT4_SB(sb)->s_journal;
2823 2820
@@ -2839,8 +2836,8 @@ static void ext4_mark_recovery_complete(struct super_block * sb,
2839 * has recorded an error from a previous lifetime, move that error to the 2836 * has recorded an error from a previous lifetime, move that error to the
2840 * main filesystem now. 2837 * main filesystem now.
2841 */ 2838 */
2842static void ext4_clear_journal_err(struct super_block * sb, 2839static void ext4_clear_journal_err(struct super_block *sb,
2843 struct ext4_super_block * es) 2840 struct ext4_super_block *es)
2844{ 2841{
2845 journal_t *journal; 2842 journal_t *journal;
2846 int j_errno; 2843 int j_errno;
@@ -2865,7 +2862,7 @@ static void ext4_clear_journal_err(struct super_block * sb,
2865 2862
2866 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2863 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2867 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2864 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2868 ext4_commit_super (sb, es, 1); 2865 ext4_commit_super(sb, es, 1);
2869 2866
2870 jbd2_journal_clear_err(journal); 2867 jbd2_journal_clear_err(journal);
2871 } 2868 }
@@ -2898,7 +2895,7 @@ int ext4_force_commit(struct super_block *sb)
2898 * This implicitly triggers the writebehind on sync(). 2895 * This implicitly triggers the writebehind on sync().
2899 */ 2896 */
2900 2897
2901static void ext4_write_super (struct super_block * sb) 2898static void ext4_write_super(struct super_block *sb)
2902{ 2899{
2903 if (mutex_trylock(&sb->s_lock) != 0) 2900 if (mutex_trylock(&sb->s_lock) != 0)
2904 BUG(); 2901 BUG();
@@ -2954,13 +2951,14 @@ static void ext4_unlockfs(struct super_block *sb)
2954 } 2951 }
2955} 2952}
2956 2953
2957static int ext4_remount (struct super_block * sb, int * flags, char * data) 2954static int ext4_remount(struct super_block *sb, int *flags, char *data)
2958{ 2955{
2959 struct ext4_super_block * es; 2956 struct ext4_super_block *es;
2960 struct ext4_sb_info *sbi = EXT4_SB(sb); 2957 struct ext4_sb_info *sbi = EXT4_SB(sb);
2961 ext4_fsblk_t n_blocks_count = 0; 2958 ext4_fsblk_t n_blocks_count = 0;
2962 unsigned long old_sb_flags; 2959 unsigned long old_sb_flags;
2963 struct ext4_mount_options old_opts; 2960 struct ext4_mount_options old_opts;
2961 ext4_group_t g;
2964 int err; 2962 int err;
2965#ifdef CONFIG_QUOTA 2963#ifdef CONFIG_QUOTA
2966 int i; 2964 int i;
@@ -3039,6 +3037,26 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
3039 } 3037 }
3040 3038
3041 /* 3039 /*
3040 * Make sure the group descriptor checksums
3041 * are sane. If they aren't, refuse to
3042 * remount r/w.
3043 */
3044 for (g = 0; g < sbi->s_groups_count; g++) {
3045 struct ext4_group_desc *gdp =
3046 ext4_get_group_desc(sb, g, NULL);
3047
3048 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3049 printk(KERN_ERR
3050 "EXT4-fs: ext4_remount: "
3051 "Checksum for group %lu failed (%u!=%u)\n",
3052 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3053 le16_to_cpu(gdp->bg_checksum));
3054 err = -EINVAL;
3055 goto restore_opts;
3056 }
3057 }
3058
3059 /*
3042 * If we have an unprocessed orphan list hanging 3060 * If we have an unprocessed orphan list hanging
3043 * around from a previously readonly bdev mount, 3061 * around from a previously readonly bdev mount,
3044 * require a full umount/remount for now. 3062 * require a full umount/remount for now.
@@ -3063,7 +3081,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
3063 sbi->s_mount_state = le16_to_cpu(es->s_state); 3081 sbi->s_mount_state = le16_to_cpu(es->s_state);
3064 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3082 if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3065 goto restore_opts; 3083 goto restore_opts;
3066 if (!ext4_setup_super (sb, es, 0)) 3084 if (!ext4_setup_super(sb, es, 0))
3067 sb->s_flags &= ~MS_RDONLY; 3085 sb->s_flags &= ~MS_RDONLY;
3068 } 3086 }
3069 } 3087 }
@@ -3093,7 +3111,7 @@ restore_opts:
3093 return err; 3111 return err;
3094} 3112}
3095 3113
3096static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) 3114static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3097{ 3115{
3098 struct super_block *sb = dentry->d_sb; 3116 struct super_block *sb = dentry->d_sb;
3099 struct ext4_sb_info *sbi = EXT4_SB(sb); 3117 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3331,12 +3349,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3331 } 3349 }
3332 /* Journaling quota? */ 3350 /* Journaling quota? */
3333 if (EXT4_SB(sb)->s_qf_names[type]) { 3351 if (EXT4_SB(sb)->s_qf_names[type]) {
3334 /* Quotafile not of fs root? */ 3352 /* Quotafile not in fs root? */
3335 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 3353 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
3336 printk(KERN_WARNING 3354 printk(KERN_WARNING
3337 "EXT4-fs: Quota file not on filesystem root. " 3355 "EXT4-fs: Quota file not on filesystem root. "
3338 "Journaled quota will not work.\n"); 3356 "Journaled quota will not work.\n");
3339 } 3357 }
3340 3358
3341 /* 3359 /*
3342 * When we journal data on quota file, we have to flush journal to see 3360 * When we journal data on quota file, we have to flush journal to see
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 93c5fdcdad2e..8954208b4893 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
1512 char *name = entry->e_name; 1512 char *name = entry->e_name;
1513 int n; 1513 int n;
1514 1514
1515 for (n=0; n < entry->e_name_len; n++) { 1515 for (n = 0; n < entry->e_name_len; n++) {
1516 hash = (hash << NAME_HASH_SHIFT) ^ 1516 hash = (hash << NAME_HASH_SHIFT) ^
1517 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ 1517 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1518 *name++; 1518 *name++;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8707a8cfa02c..ddde37025ca6 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -313,6 +313,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
313 return 0; 313 return 0;
314} 314}
315 315
316#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
317
316int fat_setattr(struct dentry *dentry, struct iattr *attr) 318int fat_setattr(struct dentry *dentry, struct iattr *attr)
317{ 319{
318 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); 320 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -336,9 +338,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
336 338
337 /* Check for setting the inode time. */ 339 /* Check for setting the inode time. */
338 ia_valid = attr->ia_valid; 340 ia_valid = attr->ia_valid;
339 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) { 341 if (ia_valid & TIMES_SET_FLAGS) {
340 if (fat_allow_set_time(sbi, inode)) 342 if (fat_allow_set_time(sbi, inode))
341 attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET); 343 attr->ia_valid &= ~TIMES_SET_FLAGS;
342 } 344 }
343 345
344 error = inode_change_ok(inode, attr); 346 error = inode_change_ok(inode, attr);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 2eccbfaa1d48..ae08c057e751 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh)
63 goto nope; 63 goto nope;
64 64
65 /* OK, it's a truncated page */ 65 /* OK, it's a truncated page */
66 if (TestSetPageLocked(page)) 66 if (!trylock_page(page))
67 goto nope; 67 goto nope;
68 68
69 page_cache_get(page); 69 page_cache_get(page);
@@ -221,7 +221,7 @@ write_out_data:
221 * blocking lock_buffer(). 221 * blocking lock_buffer().
222 */ 222 */
223 if (buffer_dirty(bh)) { 223 if (buffer_dirty(bh)) {
224 if (test_set_buffer_locked(bh)) { 224 if (!trylock_buffer(bh)) {
225 BUFFER_TRACE(bh, "needs blocking lock"); 225 BUFFER_TRACE(bh, "needs blocking lock");
226 spin_unlock(&journal->j_list_lock); 226 spin_unlock(&journal->j_list_lock);
227 /* Write out all data to prevent deadlocks */ 227 /* Write out all data to prevent deadlocks */
@@ -446,7 +446,7 @@ void journal_commit_transaction(journal_t *journal)
446 spin_lock(&journal->j_list_lock); 446 spin_lock(&journal->j_list_lock);
447 } 447 }
448 if (unlikely(!buffer_uptodate(bh))) { 448 if (unlikely(!buffer_uptodate(bh))) {
449 if (TestSetPageLocked(bh->b_page)) { 449 if (!trylock_page(bh->b_page)) {
450 spin_unlock(&journal->j_list_lock); 450 spin_unlock(&journal->j_list_lock);
451 lock_page(bh->b_page); 451 lock_page(bh->b_page);
452 spin_lock(&journal->j_list_lock); 452 spin_lock(&journal->j_list_lock);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f8b3be873226..f2ad061e95ec 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh)
67 goto nope; 67 goto nope;
68 68
69 /* OK, it's a truncated page */ 69 /* OK, it's a truncated page */
70 if (TestSetPageLocked(page)) 70 if (!trylock_page(page))
71 goto nope; 71 goto nope;
72 72
73 page_cache_get(page); 73 page_cache_get(page);
@@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
262 jinode->i_flags |= JI_COMMIT_RUNNING; 262 jinode->i_flags |= JI_COMMIT_RUNNING;
263 spin_unlock(&journal->j_list_lock); 263 spin_unlock(&journal->j_list_lock);
264 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); 264 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
265 if (!ret) 265 if (err) {
266 ret = err; 266 /*
267 * Because AS_EIO is cleared by
268 * wait_on_page_writeback_range(), set it again so
269 * that user process can get -EIO from fsync().
270 */
271 set_bit(AS_EIO,
272 &jinode->i_vfs_inode->i_mapping->flags);
273
274 if (!ret)
275 ret = err;
276 }
267 spin_lock(&journal->j_list_lock); 277 spin_lock(&journal->j_list_lock);
268 jinode->i_flags &= ~JI_COMMIT_RUNNING; 278 jinode->i_flags &= ~JI_COMMIT_RUNNING;
269 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 279 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -670,8 +680,14 @@ start_journal_io:
670 * commit block, which happens below in such setting. 680 * commit block, which happens below in such setting.
671 */ 681 */
672 err = journal_finish_inode_data_buffers(journal, commit_transaction); 682 err = journal_finish_inode_data_buffers(journal, commit_transaction);
673 if (err) 683 if (err) {
674 jbd2_journal_abort(journal, err); 684 char b[BDEVNAME_SIZE];
685
686 printk(KERN_WARNING
687 "JBD2: Detected IO errors while flushing file data "
688 "on %s\n", bdevname(journal->j_fs_dev, b));
689 err = 0;
690 }
675 691
676 /* Lo and behold: we have just managed to send a transaction to 692 /* Lo and behold: we have just managed to send a transaction to
677 the log. Before we can commit it, wait for the IO so far to 693 the log. Before we can commit it, wait for the IO so far to
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b26c6d9fe6ae..8207a01c4edb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features);
68EXPORT_SYMBOL(jbd2_journal_create); 68EXPORT_SYMBOL(jbd2_journal_create);
69EXPORT_SYMBOL(jbd2_journal_load); 69EXPORT_SYMBOL(jbd2_journal_load);
70EXPORT_SYMBOL(jbd2_journal_destroy); 70EXPORT_SYMBOL(jbd2_journal_destroy);
71EXPORT_SYMBOL(jbd2_journal_update_superblock);
72EXPORT_SYMBOL(jbd2_journal_abort); 71EXPORT_SYMBOL(jbd2_journal_abort);
73EXPORT_SYMBOL(jbd2_journal_errno); 72EXPORT_SYMBOL(jbd2_journal_errno);
74EXPORT_SYMBOL(jbd2_journal_ack_err); 73EXPORT_SYMBOL(jbd2_journal_ack_err);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8478fc25daee..46763d1cd397 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
127 Opt_err 127 Opt_err
128}; 128};
129 129
130static match_table_t __initconst tokens = { 130static match_table_t __initdata tokens = {
131 {Opt_port, "port=%u"}, 131 {Opt_port, "port=%u"},
132 {Opt_rsize, "rsize=%u"}, 132 {Opt_rsize, "rsize=%u"},
133 {Opt_wsize, "wsize=%u"}, 133 {Opt_wsize, "wsize=%u"},
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index eef1629806f5..2e51adac65de 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -851,7 +851,7 @@ struct nfsd4_operation {
851 851
852static struct nfsd4_operation nfsd4_ops[]; 852static struct nfsd4_operation nfsd4_ops[];
853 853
854static inline char *nfsd4_op_name(unsigned opnum); 854static const char *nfsd4_op_name(unsigned opnum);
855 855
856/* 856/*
857 * COMPOUND call. 857 * COMPOUND call.
@@ -1116,8 +1116,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
1116 }, 1116 },
1117}; 1117};
1118 1118
1119static inline char * 1119static const char *nfsd4_op_name(unsigned opnum)
1120nfsd4_op_name(unsigned opnum)
1121{ 1120{
1122 if (opnum < ARRAY_SIZE(nfsd4_ops)) 1121 if (opnum < ARRAY_SIZE(nfsd4_ops))
1123 return nfsd4_ops[opnum].op_name; 1122 return nfsd4_ops[opnum].op_name;
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 00e9ccde8e42..b38f944f0667 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1194,7 +1194,7 @@ lock_retry_remap:
1194 tbh = bhs[i]; 1194 tbh = bhs[i];
1195 if (!tbh) 1195 if (!tbh)
1196 continue; 1196 continue;
1197 if (unlikely(test_set_buffer_locked(tbh))) 1197 if (!trylock_buffer(tbh))
1198 BUG(); 1198 BUG();
1199 /* The buffer dirty state is now irrelevant, just clean it. */ 1199 /* The buffer dirty state is now irrelevant, just clean it. */
1200 clear_buffer_dirty(tbh); 1200 clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 33ff314cc507..9669541d0119 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -665,7 +665,7 @@ lock_retry_remap:
665 for (i = 0; i < nr_bhs; i++) { 665 for (i = 0; i < nr_bhs; i++) {
666 struct buffer_head *tbh = bhs[i]; 666 struct buffer_head *tbh = bhs[i];
667 667
668 if (unlikely(test_set_buffer_locked(tbh))) 668 if (!trylock_buffer(tbh))
669 continue; 669 continue;
670 if (unlikely(buffer_uptodate(tbh))) { 670 if (unlikely(buffer_uptodate(tbh))) {
671 unlock_buffer(tbh); 671 unlock_buffer(tbh);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 790defb847e7..17d32ca6bc35 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
586 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 586 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
587 struct buffer_head *tbh = bhs[i_bhs]; 587 struct buffer_head *tbh = bhs[i_bhs];
588 588
589 if (unlikely(test_set_buffer_locked(tbh))) 589 if (!trylock_buffer(tbh))
590 BUG(); 590 BUG();
591 BUG_ON(!buffer_uptodate(tbh)); 591 BUG_ON(!buffer_uptodate(tbh));
592 clear_buffer_dirty(tbh); 592 clear_buffer_dirty(tbh);
@@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
779 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 779 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
780 struct buffer_head *tbh = bhs[i_bhs]; 780 struct buffer_head *tbh = bhs[i_bhs];
781 781
782 if (unlikely(test_set_buffer_locked(tbh))) 782 if (!trylock_buffer(tbh))
783 BUG(); 783 BUG();
784 BUG_ON(!buffer_uptodate(tbh)); 784 BUG_ON(!buffer_uptodate(tbh));
785 clear_buffer_dirty(tbh); 785 clear_buffer_dirty(tbh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1db080135c6d..506c24fb5078 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode,
1073 for(i = 0; i < wc->w_num_pages; i++) { 1073 for(i = 0; i < wc->w_num_pages; i++) {
1074 tmppage = wc->w_pages[i]; 1074 tmppage = wc->w_pages[i];
1075 1075
1076 if (ocfs2_should_order_data(inode)) 1076 if (page_has_buffers(tmppage)) {
1077 walk_page_buffers(wc->w_handle, page_buffers(tmppage), 1077 if (ocfs2_should_order_data(inode))
1078 from, to, NULL, 1078 walk_page_buffers(wc->w_handle,
1079 ocfs2_journal_dirty_data); 1079 page_buffers(tmppage),
1080 1080 from, to, NULL,
1081 block_commit_write(tmppage, from, to); 1081 ocfs2_journal_dirty_data);
1082
1083 block_commit_write(tmppage, from, to);
1084 }
1082 } 1085 }
1083} 1086}
1084 1087
@@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1901 to = PAGE_CACHE_SIZE; 1904 to = PAGE_CACHE_SIZE;
1902 } 1905 }
1903 1906
1904 if (ocfs2_should_order_data(inode)) 1907 if (page_has_buffers(tmppage)) {
1905 walk_page_buffers(wc->w_handle, page_buffers(tmppage), 1908 if (ocfs2_should_order_data(inode))
1906 from, to, NULL, 1909 walk_page_buffers(wc->w_handle,
1907 ocfs2_journal_dirty_data); 1910 page_buffers(tmppage),
1908 1911 from, to, NULL,
1909 block_commit_write(tmppage, from, to); 1912 ocfs2_journal_dirty_data);
1913 block_commit_write(tmppage, from, to);
1914 }
1910 } 1915 }
1911 1916
1912out_write_size: 1917out_write_size:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index be2dd95d3a1d..ec2ed15c3daa 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1766,8 +1766,8 @@ out_inode_unlock:
1766out_rw_unlock: 1766out_rw_unlock:
1767 ocfs2_rw_unlock(inode, 1); 1767 ocfs2_rw_unlock(inode, 1);
1768 1768
1769 mutex_unlock(&inode->i_mutex);
1770out: 1769out:
1770 mutex_unlock(&inode->i_mutex);
1771 return ret; 1771 return ret;
1772} 1772}
1773 1773
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a8c19cb3cfdd..7a37240f7a31 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg);
57static int ocfs2_commit_cache(struct ocfs2_super *osb); 57static int ocfs2_commit_cache(struct ocfs2_super *osb);
58static int ocfs2_wait_on_mount(struct ocfs2_super *osb); 58static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
60 int dirty); 60 int dirty, int replayed);
61static int ocfs2_trylock_journal(struct ocfs2_super *osb, 61static int ocfs2_trylock_journal(struct ocfs2_super *osb,
62 int slot_num); 62 int slot_num);
63static int ocfs2_recover_orphans(struct ocfs2_super *osb, 63static int ocfs2_recover_orphans(struct ocfs2_super *osb,
@@ -562,8 +562,18 @@ done:
562 return status; 562 return status;
563} 563}
564 564
565static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
566{
567 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
568}
569
570static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
571{
572 return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
573}
574
565static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 575static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
566 int dirty) 576 int dirty, int replayed)
567{ 577{
568 int status; 578 int status;
569 unsigned int flags; 579 unsigned int flags;
@@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
593 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 603 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
594 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 604 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
595 605
606 if (replayed)
607 ocfs2_bump_recovery_generation(fe);
608
596 status = ocfs2_write_block(osb, bh, journal->j_inode); 609 status = ocfs2_write_block(osb, bh, journal->j_inode);
597 if (status < 0) 610 if (status < 0)
598 mlog_errno(status); 611 mlog_errno(status);
@@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
667 * Do not toggle if flush was unsuccessful otherwise 680 * Do not toggle if flush was unsuccessful otherwise
668 * will leave dirty metadata in a "clean" journal 681 * will leave dirty metadata in a "clean" journal
669 */ 682 */
670 status = ocfs2_journal_toggle_dirty(osb, 0); 683 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
671 if (status < 0) 684 if (status < 0)
672 mlog_errno(status); 685 mlog_errno(status);
673 } 686 }
@@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
710 } 723 }
711} 724}
712 725
713int ocfs2_journal_load(struct ocfs2_journal *journal, int local) 726int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
714{ 727{
715 int status = 0; 728 int status = 0;
716 struct ocfs2_super *osb; 729 struct ocfs2_super *osb;
@@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
729 742
730 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); 743 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
731 744
732 status = ocfs2_journal_toggle_dirty(osb, 1); 745 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
733 if (status < 0) { 746 if (status < 0) {
734 mlog_errno(status); 747 mlog_errno(status);
735 goto done; 748 goto done;
@@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
771 goto bail; 784 goto bail;
772 } 785 }
773 786
774 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); 787 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
775 if (status < 0) 788 if (status < 0)
776 mlog_errno(status); 789 mlog_errno(status);
777 790
@@ -1034,6 +1047,12 @@ restart:
1034 spin_unlock(&osb->osb_lock); 1047 spin_unlock(&osb->osb_lock);
1035 mlog(0, "All nodes recovered\n"); 1048 mlog(0, "All nodes recovered\n");
1036 1049
1050 /* Refresh all journal recovery generations from disk */
1051 status = ocfs2_check_journals_nolocks(osb);
1052 status = (status == -EROFS) ? 0 : status;
1053 if (status < 0)
1054 mlog_errno(status);
1055
1037 ocfs2_super_unlock(osb, 1); 1056 ocfs2_super_unlock(osb, 1);
1038 1057
1039 /* We always run recovery on our own orphan dir - the dead 1058 /* We always run recovery on our own orphan dir - the dead
@@ -1096,6 +1115,42 @@ out:
1096 mlog_exit_void(); 1115 mlog_exit_void();
1097} 1116}
1098 1117
1118static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1119 int slot_num,
1120 struct buffer_head **bh,
1121 struct inode **ret_inode)
1122{
1123 int status = -EACCES;
1124 struct inode *inode = NULL;
1125
1126 BUG_ON(slot_num >= osb->max_slots);
1127
1128 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1129 slot_num);
1130 if (!inode || is_bad_inode(inode)) {
1131 mlog_errno(status);
1132 goto bail;
1133 }
1134 SET_INODE_JOURNAL(inode);
1135
1136 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
1137 if (status < 0) {
1138 mlog_errno(status);
1139 goto bail;
1140 }
1141
1142 status = 0;
1143
1144bail:
1145 if (inode) {
1146 if (status || !ret_inode)
1147 iput(inode);
1148 else
1149 *ret_inode = inode;
1150 }
1151 return status;
1152}
1153
1099/* Does the actual journal replay and marks the journal inode as 1154/* Does the actual journal replay and marks the journal inode as
1100 * clean. Will only replay if the journal inode is marked dirty. */ 1155 * clean. Will only replay if the journal inode is marked dirty. */
1101static int ocfs2_replay_journal(struct ocfs2_super *osb, 1156static int ocfs2_replay_journal(struct ocfs2_super *osb,
@@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1109 struct ocfs2_dinode *fe; 1164 struct ocfs2_dinode *fe;
1110 journal_t *journal = NULL; 1165 journal_t *journal = NULL;
1111 struct buffer_head *bh = NULL; 1166 struct buffer_head *bh = NULL;
1167 u32 slot_reco_gen;
1112 1168
1113 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, 1169 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1114 slot_num); 1170 if (status) {
1115 if (inode == NULL) {
1116 status = -EACCES;
1117 mlog_errno(status); 1171 mlog_errno(status);
1118 goto done; 1172 goto done;
1119 } 1173 }
1120 if (is_bad_inode(inode)) { 1174
1121 status = -EACCES; 1175 fe = (struct ocfs2_dinode *)bh->b_data;
1122 iput(inode); 1176 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1123 inode = NULL; 1177 brelse(bh);
1124 mlog_errno(status); 1178 bh = NULL;
1179
1180 /*
1181 * As the fs recovery is asynchronous, there is a small chance that
1182 * another node mounted (and recovered) the slot before the recovery
1183 * thread could get the lock. To handle that, we dirty read the journal
1184 * inode for that slot to get the recovery generation. If it is
1185 * different than what we expected, the slot has been recovered.
1186 * If not, it needs recovery.
1187 */
1188 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1189 mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num,
1190 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1191 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1192 status = -EBUSY;
1125 goto done; 1193 goto done;
1126 } 1194 }
1127 SET_INODE_JOURNAL(inode); 1195
1196 /* Continue with recovery as the journal has not yet been recovered */
1128 1197
1129 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); 1198 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1130 if (status < 0) { 1199 if (status < 0) {
@@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1138 fe = (struct ocfs2_dinode *) bh->b_data; 1207 fe = (struct ocfs2_dinode *) bh->b_data;
1139 1208
1140 flags = le32_to_cpu(fe->id1.journal1.ij_flags); 1209 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1210 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1141 1211
1142 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { 1212 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1143 mlog(0, "No recovery required for node %d\n", node_num); 1213 mlog(0, "No recovery required for node %d\n", node_num);
1214 /* Refresh recovery generation for the slot */
1215 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1144 goto done; 1216 goto done;
1145 } 1217 }
1146 1218
@@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1188 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 1260 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1189 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 1261 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
1190 1262
1263 /* Increment recovery generation to indicate successful recovery */
1264 ocfs2_bump_recovery_generation(fe);
1265 osb->slot_recovery_generations[slot_num] =
1266 ocfs2_get_recovery_generation(fe);
1267
1191 status = ocfs2_write_block(osb, bh, inode); 1268 status = ocfs2_write_block(osb, bh, inode);
1192 if (status < 0) 1269 if (status < 0)
1193 mlog_errno(status); 1270 mlog_errno(status);
@@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1252 1329
1253 status = ocfs2_replay_journal(osb, node_num, slot_num); 1330 status = ocfs2_replay_journal(osb, node_num, slot_num);
1254 if (status < 0) { 1331 if (status < 0) {
1332 if (status == -EBUSY) {
1333 mlog(0, "Skipping recovery for slot %u (node %u) "
1334 "as another node has recovered it\n", slot_num,
1335 node_num);
1336 status = 0;
1337 goto done;
1338 }
1255 mlog_errno(status); 1339 mlog_errno(status);
1256 goto done; 1340 goto done;
1257 } 1341 }
@@ -1334,12 +1418,29 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1334{ 1418{
1335 unsigned int node_num; 1419 unsigned int node_num;
1336 int status, i; 1420 int status, i;
1421 struct buffer_head *bh = NULL;
1422 struct ocfs2_dinode *di;
1337 1423
1338 /* This is called with the super block cluster lock, so we 1424 /* This is called with the super block cluster lock, so we
1339 * know that the slot map can't change underneath us. */ 1425 * know that the slot map can't change underneath us. */
1340 1426
1341 spin_lock(&osb->osb_lock); 1427 spin_lock(&osb->osb_lock);
1342 for (i = 0; i < osb->max_slots; i++) { 1428 for (i = 0; i < osb->max_slots; i++) {
1429 /* Read journal inode to get the recovery generation */
1430 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
1431 if (status) {
1432 mlog_errno(status);
1433 goto bail;
1434 }
1435 di = (struct ocfs2_dinode *)bh->b_data;
1436 osb->slot_recovery_generations[i] =
1437 ocfs2_get_recovery_generation(di);
1438 brelse(bh);
1439 bh = NULL;
1440
1441 mlog(0, "Slot %u recovery generation is %u\n", i,
1442 osb->slot_recovery_generations[i]);
1443
1343 if (i == osb->slot_num) 1444 if (i == osb->slot_num)
1344 continue; 1445 continue;
1345 1446
@@ -1603,49 +1704,41 @@ static int ocfs2_commit_thread(void *arg)
1603 return 0; 1704 return 0;
1604} 1705}
1605 1706
1606/* Look for a dirty journal without taking any cluster locks. Used for 1707/* Reads all the journal inodes without taking any cluster locks. Used
1607 * hard readonly access to determine whether the file system journals 1708 * for hard readonly access to determine whether any journal requires
1608 * require recovery. */ 1709 * recovery. Also used to refresh the recovery generation numbers after
1710 * a journal has been recovered by another node.
1711 */
1609int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) 1712int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
1610{ 1713{
1611 int ret = 0; 1714 int ret = 0;
1612 unsigned int slot; 1715 unsigned int slot;
1613 struct buffer_head *di_bh; 1716 struct buffer_head *di_bh = NULL;
1614 struct ocfs2_dinode *di; 1717 struct ocfs2_dinode *di;
1615 struct inode *journal = NULL; 1718 int journal_dirty = 0;
1616 1719
1617 for(slot = 0; slot < osb->max_slots; slot++) { 1720 for(slot = 0; slot < osb->max_slots; slot++) {
1618 journal = ocfs2_get_system_file_inode(osb, 1721 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
1619 JOURNAL_SYSTEM_INODE, 1722 if (ret) {
1620 slot);
1621 if (!journal || is_bad_inode(journal)) {
1622 ret = -EACCES;
1623 mlog_errno(ret);
1624 goto out;
1625 }
1626
1627 di_bh = NULL;
1628 ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh,
1629 0, journal);
1630 if (ret < 0) {
1631 mlog_errno(ret); 1723 mlog_errno(ret);
1632 goto out; 1724 goto out;
1633 } 1725 }
1634 1726
1635 di = (struct ocfs2_dinode *) di_bh->b_data; 1727 di = (struct ocfs2_dinode *) di_bh->b_data;
1636 1728
1729 osb->slot_recovery_generations[slot] =
1730 ocfs2_get_recovery_generation(di);
1731
1637 if (le32_to_cpu(di->id1.journal1.ij_flags) & 1732 if (le32_to_cpu(di->id1.journal1.ij_flags) &
1638 OCFS2_JOURNAL_DIRTY_FL) 1733 OCFS2_JOURNAL_DIRTY_FL)
1639 ret = -EROFS; 1734 journal_dirty = 1;
1640 1735
1641 brelse(di_bh); 1736 brelse(di_bh);
1642 if (ret) 1737 di_bh = NULL;
1643 break;
1644 } 1738 }
1645 1739
1646out: 1740out:
1647 if (journal) 1741 if (journal_dirty)
1648 iput(journal); 1742 ret = -EROFS;
1649
1650 return ret; 1743 return ret;
1651} 1744}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index db82be2532ed..2178ebffa05f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -161,7 +161,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal,
161void ocfs2_journal_shutdown(struct ocfs2_super *osb); 161void ocfs2_journal_shutdown(struct ocfs2_super *osb);
162int ocfs2_journal_wipe(struct ocfs2_journal *journal, 162int ocfs2_journal_wipe(struct ocfs2_journal *journal,
163 int full); 163 int full);
164int ocfs2_journal_load(struct ocfs2_journal *journal, int local); 164int ocfs2_journal_load(struct ocfs2_journal *journal, int local,
165 int replayed);
165int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); 166int ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
166void ocfs2_recovery_thread(struct ocfs2_super *osb, 167void ocfs2_recovery_thread(struct ocfs2_super *osb,
167 int node_num); 168 int node_num);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1cb814be8ef1..7f625f2b1117 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -204,6 +204,8 @@ struct ocfs2_super
204 204
205 struct ocfs2_slot_info *slot_info; 205 struct ocfs2_slot_info *slot_info;
206 206
207 u32 *slot_recovery_generations;
208
207 spinlock_t node_map_lock; 209 spinlock_t node_map_lock;
208 210
209 u64 root_blkno; 211 u64 root_blkno;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3f1945177629..4f619850ccf7 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -660,7 +660,10 @@ struct ocfs2_dinode {
660 struct { /* Info for journal system 660 struct { /* Info for journal system
661 inodes */ 661 inodes */
662 __le32 ij_flags; /* Mounted, version, etc. */ 662 __le32 ij_flags; /* Mounted, version, etc. */
663 __le32 ij_pad; 663 __le32 ij_recovery_generation; /* Incremented when the
664 journal is recovered
665 after an unclean
666 shutdown */
664 } journal1; 667 } journal1;
665 } id1; /* Inode type dependant 1 */ 668 } id1; /* Inode type dependant 1 */
666/*C0*/ union { 669/*C0*/ union {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2560b33889aa..88255d3f52b4 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
1442 } 1442 }
1443 mlog(0, "max_slots for this device: %u\n", osb->max_slots); 1443 mlog(0, "max_slots for this device: %u\n", osb->max_slots);
1444 1444
1445 osb->slot_recovery_generations =
1446 kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
1447 GFP_KERNEL);
1448 if (!osb->slot_recovery_generations) {
1449 status = -ENOMEM;
1450 mlog_errno(status);
1451 goto bail;
1452 }
1453
1445 init_waitqueue_head(&osb->osb_wipe_event); 1454 init_waitqueue_head(&osb->osb_wipe_event);
1446 osb->osb_orphan_wipes = kcalloc(osb->max_slots, 1455 osb->osb_orphan_wipes = kcalloc(osb->max_slots,
1447 sizeof(*osb->osb_orphan_wipes), 1456 sizeof(*osb->osb_orphan_wipes),
@@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
1703 local = ocfs2_mount_local(osb); 1712 local = ocfs2_mount_local(osb);
1704 1713
1705 /* will play back anything left in the journal. */ 1714 /* will play back anything left in the journal. */
1706 status = ocfs2_journal_load(osb->journal, local); 1715 status = ocfs2_journal_load(osb->journal, local, dirty);
1707 if (status < 0) { 1716 if (status < 0) {
1708 mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); 1717 mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
1709 goto finally; 1718 goto finally;
@@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
1768 ocfs2_free_slot_info(osb); 1777 ocfs2_free_slot_info(osb);
1769 1778
1770 kfree(osb->osb_orphan_wipes); 1779 kfree(osb->osb_orphan_wipes);
1780 kfree(osb->slot_recovery_generations);
1771 /* FIXME 1781 /* FIXME
1772 * This belongs in journal shutdown, but because we have to 1782 * This belongs in journal shutdown, but because we have to
1773 * allocate osb->journal at the start of ocfs2_initalize_osb(), 1783 * allocate osb->journal at the start of ocfs2_initalize_osb(),
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index d865f5535436..a95fe5984f4b 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -492,7 +492,8 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
492 if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) { 492 if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) {
493 printk(KERN_ERR "omfs: block count discrepancy between " 493 printk(KERN_ERR "omfs: block count discrepancy between "
494 "super and root blocks (%llx, %llx)\n", 494 "super and root blocks (%llx, %llx)\n",
495 sbi->s_num_blocks, be64_to_cpu(omfs_rb->r_num_blocks)); 495 (unsigned long long)sbi->s_num_blocks,
496 (unsigned long long)be64_to_cpu(omfs_rb->r_num_blocks));
496 goto out_brelse_bh2; 497 goto out_brelse_bh2;
497 } 498 }
498 499
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 01ed610f9b87..a28840b11b89 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2423,10 +2423,13 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2423 "read_bytes: %llu\n" 2423 "read_bytes: %llu\n"
2424 "write_bytes: %llu\n" 2424 "write_bytes: %llu\n"
2425 "cancelled_write_bytes: %llu\n", 2425 "cancelled_write_bytes: %llu\n",
2426 acct.rchar, acct.wchar, 2426 (unsigned long long)acct.rchar,
2427 acct.syscr, acct.syscw, 2427 (unsigned long long)acct.wchar,
2428 acct.read_bytes, acct.write_bytes, 2428 (unsigned long long)acct.syscr,
2429 acct.cancelled_write_bytes); 2429 (unsigned long long)acct.syscw,
2430 (unsigned long long)acct.read_bytes,
2431 (unsigned long long)acct.write_bytes,
2432 (unsigned long long)acct.cancelled_write_bytes);
2430} 2433}
2431 2434
2432static int proc_tid_io_accounting(struct task_struct *task, char *buffer) 2435static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 192269698a8a..5699171212ae 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page,
2435 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 2435 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
2436 lock_buffer(bh); 2436 lock_buffer(bh);
2437 } else { 2437 } else {
2438 if (test_set_buffer_locked(bh)) { 2438 if (!trylock_buffer(bh)) {
2439 redirty_page_for_writepage(wbc, page); 2439 redirty_page_for_writepage(wbc, page);
2440 continue; 2440 continue;
2441 } 2441 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c8f60ee183b5..c21df71943a6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -627,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s,
627static void release_buffer_page(struct buffer_head *bh) 627static void release_buffer_page(struct buffer_head *bh)
628{ 628{
629 struct page *page = bh->b_page; 629 struct page *page = bh->b_page;
630 if (!page->mapping && !TestSetPageLocked(page)) { 630 if (!page->mapping && trylock_page(page)) {
631 page_cache_get(page); 631 page_cache_get(page);
632 put_bh(bh); 632 put_bh(bh);
633 if (!page->mapping) 633 if (!page->mapping)
@@ -855,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock,
855 jh = JH_ENTRY(list->next); 855 jh = JH_ENTRY(list->next);
856 bh = jh->bh; 856 bh = jh->bh;
857 get_bh(bh); 857 get_bh(bh);
858 if (test_set_buffer_locked(bh)) { 858 if (!trylock_buffer(bh)) {
859 if (!buffer_dirty(bh)) { 859 if (!buffer_dirty(bh)) {
860 list_move(&jh->list, &tmp); 860 list_move(&jh->list, &tmp);
861 goto loop_next; 861 goto loop_next;
@@ -3871,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
3871{ 3871{
3872 PROC_INFO_INC(p_s_sb, journal.prepare); 3872 PROC_INFO_INC(p_s_sb, journal.prepare);
3873 3873
3874 if (test_set_buffer_locked(bh)) { 3874 if (!trylock_buffer(bh)) {
3875 if (!wait) 3875 if (!wait)
3876 return 0; 3876 return 0;
3877 lock_buffer(bh); 3877 lock_buffer(bh);
diff --git a/fs/splice.c b/fs/splice.c
index b30311ba8af6..1bbc6f4bb09c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
371 * for an in-flight io page 371 * for an in-flight io page
372 */ 372 */
373 if (flags & SPLICE_F_NONBLOCK) { 373 if (flags & SPLICE_F_NONBLOCK) {
374 if (TestSetPageLocked(page)) { 374 if (!trylock_page(page)) {
375 error = -EAGAIN; 375 error = -EAGAIN;
376 break; 376 break;
377 } 377 }
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3e30e40aa24d..3141969b456d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
1233{ 1233{
1234 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); 1234 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
1235 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; 1235 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
1236 const struct match_token *tp = tokens; 1236 struct match_token *tp = tokens;
1237 1237
1238 while (tp->token != Opt_onerror_panic && tp->token != mval) 1238 while (tp->token != Opt_onerror_panic && tp->token != mval)
1239 ++tp; 1239 ++tp;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 36ec614e699a..737c9a425361 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -106,7 +106,8 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
106 xfs_iops.o \ 106 xfs_iops.o \
107 xfs_lrw.o \ 107 xfs_lrw.o \
108 xfs_super.o \ 108 xfs_super.o \
109 xfs_vnode.o) 109 xfs_vnode.o \
110 xfs_xattr.o)
110 111
111# Objects in support/ 112# Objects in support/
112xfs-y += $(addprefix support/, \ 113xfs-y += $(addprefix support/, \
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 9b1bb17a0501..1cd3b55ee3d2 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -90,7 +90,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
90} 90}
91 91
92void 92void
93kmem_free(void *ptr, size_t size) 93kmem_free(const void *ptr)
94{ 94{
95 if (!is_vmalloc_addr(ptr)) { 95 if (!is_vmalloc_addr(ptr)) {
96 kfree(ptr); 96 kfree(ptr);
@@ -100,7 +100,7 @@ kmem_free(void *ptr, size_t size)
100} 100}
101 101
102void * 102void *
103kmem_realloc(void *ptr, size_t newsize, size_t oldsize, 103kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
104 unsigned int __nocast flags) 104 unsigned int __nocast flags)
105{ 105{
106 void *new; 106 void *new;
@@ -110,7 +110,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
110 if (new) 110 if (new)
111 memcpy(new, ptr, 111 memcpy(new, ptr,
112 ((oldsize < newsize) ? oldsize : newsize)); 112 ((oldsize < newsize) ? oldsize : newsize));
113 kmem_free(ptr, oldsize); 113 kmem_free(ptr);
114 } 114 }
115 return new; 115 return new;
116} 116}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index a20683cf74dd..af6843c7ee4b 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -57,8 +57,8 @@ kmem_flags_convert(unsigned int __nocast flags)
57extern void *kmem_alloc(size_t, unsigned int __nocast); 57extern void *kmem_alloc(size_t, unsigned int __nocast);
58extern void *kmem_zalloc(size_t, unsigned int __nocast); 58extern void *kmem_zalloc(size_t, unsigned int __nocast);
59extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast); 59extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
60extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast); 60extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
61extern void kmem_free(void *, size_t); 61extern void kmem_free(const void *);
62 62
63/* 63/*
64 * Zone interfaces 64 * Zone interfaces
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a55c3b26d840..fa47e43b8b41 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -409,7 +409,6 @@ xfs_start_buffer_writeback(
409STATIC void 409STATIC void
410xfs_start_page_writeback( 410xfs_start_page_writeback(
411 struct page *page, 411 struct page *page,
412 struct writeback_control *wbc,
413 int clear_dirty, 412 int clear_dirty,
414 int buffers) 413 int buffers)
415{ 414{
@@ -676,7 +675,7 @@ xfs_probe_cluster(
676 } else 675 } else
677 pg_offset = PAGE_CACHE_SIZE; 676 pg_offset = PAGE_CACHE_SIZE;
678 677
679 if (page->index == tindex && !TestSetPageLocked(page)) { 678 if (page->index == tindex && trylock_page(page)) {
680 pg_len = xfs_probe_page(page, pg_offset, mapped); 679 pg_len = xfs_probe_page(page, pg_offset, mapped);
681 unlock_page(page); 680 unlock_page(page);
682 } 681 }
@@ -760,7 +759,7 @@ xfs_convert_page(
760 759
761 if (page->index != tindex) 760 if (page->index != tindex)
762 goto fail; 761 goto fail;
763 if (TestSetPageLocked(page)) 762 if (!trylock_page(page))
764 goto fail; 763 goto fail;
765 if (PageWriteback(page)) 764 if (PageWriteback(page))
766 goto fail_unlock_page; 765 goto fail_unlock_page;
@@ -858,7 +857,7 @@ xfs_convert_page(
858 done = 1; 857 done = 1;
859 } 858 }
860 } 859 }
861 xfs_start_page_writeback(page, wbc, !page_dirty, count); 860 xfs_start_page_writeback(page, !page_dirty, count);
862 } 861 }
863 862
864 return done; 863 return done;
@@ -1105,7 +1104,7 @@ xfs_page_state_convert(
1105 * that we are writing into for the first time. 1104 * that we are writing into for the first time.
1106 */ 1105 */
1107 type = IOMAP_NEW; 1106 type = IOMAP_NEW;
1108 if (!test_and_set_bit(BH_Lock, &bh->b_state)) { 1107 if (trylock_buffer(bh)) {
1109 ASSERT(buffer_mapped(bh)); 1108 ASSERT(buffer_mapped(bh));
1110 if (iomap_valid) 1109 if (iomap_valid)
1111 all_bh = 1; 1110 all_bh = 1;
@@ -1130,7 +1129,7 @@ xfs_page_state_convert(
1130 SetPageUptodate(page); 1129 SetPageUptodate(page);
1131 1130
1132 if (startio) 1131 if (startio)
1133 xfs_start_page_writeback(page, wbc, 1, count); 1132 xfs_start_page_writeback(page, 1, count);
1134 1133
1135 if (ioend && iomap_valid) { 1134 if (ioend && iomap_valid) {
1136 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> 1135 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 98e0e86093b4..9cc8f0213095 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -310,8 +310,7 @@ _xfs_buf_free_pages(
310 xfs_buf_t *bp) 310 xfs_buf_t *bp)
311{ 311{
312 if (bp->b_pages != bp->b_page_array) { 312 if (bp->b_pages != bp->b_page_array) {
313 kmem_free(bp->b_pages, 313 kmem_free(bp->b_pages);
314 bp->b_page_count * sizeof(struct page *));
315 } 314 }
316} 315}
317 316
@@ -1398,7 +1397,7 @@ STATIC void
1398xfs_free_bufhash( 1397xfs_free_bufhash(
1399 xfs_buftarg_t *btp) 1398 xfs_buftarg_t *btp)
1400{ 1399{
1401 kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t)); 1400 kmem_free(btp->bt_hash);
1402 btp->bt_hash = NULL; 1401 btp->bt_hash = NULL;
1403} 1402}
1404 1403
@@ -1428,13 +1427,10 @@ xfs_unregister_buftarg(
1428 1427
1429void 1428void
1430xfs_free_buftarg( 1429xfs_free_buftarg(
1431 xfs_buftarg_t *btp, 1430 xfs_buftarg_t *btp)
1432 int external)
1433{ 1431{
1434 xfs_flush_buftarg(btp, 1); 1432 xfs_flush_buftarg(btp, 1);
1435 xfs_blkdev_issue_flush(btp); 1433 xfs_blkdev_issue_flush(btp);
1436 if (external)
1437 xfs_blkdev_put(btp->bt_bdev);
1438 xfs_free_bufhash(btp); 1434 xfs_free_bufhash(btp);
1439 iput(btp->bt_mapping->host); 1435 iput(btp->bt_mapping->host);
1440 1436
@@ -1444,7 +1440,7 @@ xfs_free_buftarg(
1444 xfs_unregister_buftarg(btp); 1440 xfs_unregister_buftarg(btp);
1445 kthread_stop(btp->bt_task); 1441 kthread_stop(btp->bt_task);
1446 1442
1447 kmem_free(btp, sizeof(*btp)); 1443 kmem_free(btp);
1448} 1444}
1449 1445
1450STATIC int 1446STATIC int
@@ -1575,7 +1571,7 @@ xfs_alloc_buftarg(
1575 return btp; 1571 return btp;
1576 1572
1577error: 1573error:
1578 kmem_free(btp, sizeof(*btp)); 1574 kmem_free(btp);
1579 return NULL; 1575 return NULL;
1580} 1576}
1581 1577
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index f948ec7ba9a4..29d1d4adc078 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -429,7 +429,7 @@ static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
429 * Handling of buftargs. 429 * Handling of buftargs.
430 */ 430 */
431extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); 431extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
432extern void xfs_free_buftarg(xfs_buftarg_t *, int); 432extern void xfs_free_buftarg(xfs_buftarg_t *);
433extern void xfs_wait_buftarg(xfs_buftarg_t *); 433extern void xfs_wait_buftarg(xfs_buftarg_t *);
434extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 434extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
435extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 435extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index c672b3238b14..987fe84f7b13 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -215,7 +215,7 @@ xfs_fs_get_parent(
215 struct xfs_inode *cip; 215 struct xfs_inode *cip;
216 struct dentry *parent; 216 struct dentry *parent;
217 217
218 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip); 218 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
219 if (unlikely(error)) 219 if (unlikely(error))
220 return ERR_PTR(-error); 220 return ERR_PTR(-error);
221 221
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 01939ba2d8de..acb978d9d085 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -48,6 +48,8 @@
48#include "xfs_dfrag.h" 48#include "xfs_dfrag.h"
49#include "xfs_fsops.h" 49#include "xfs_fsops.h"
50#include "xfs_vnodeops.h" 50#include "xfs_vnodeops.h"
51#include "xfs_quota.h"
52#include "xfs_inode_item.h"
51 53
52#include <linux/capability.h> 54#include <linux/capability.h>
53#include <linux/dcache.h> 55#include <linux/dcache.h>
@@ -468,6 +470,12 @@ xfs_attrlist_by_handle(
468 if (al_hreq.buflen > XATTR_LIST_MAX) 470 if (al_hreq.buflen > XATTR_LIST_MAX)
469 return -XFS_ERROR(EINVAL); 471 return -XFS_ERROR(EINVAL);
470 472
473 /*
474 * Reject flags, only allow namespaces.
475 */
476 if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
477 return -XFS_ERROR(EINVAL);
478
471 error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode); 479 error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode);
472 if (error) 480 if (error)
473 goto out; 481 goto out;
@@ -587,7 +595,7 @@ xfs_attrmulti_by_handle(
587 goto out; 595 goto out;
588 596
589 error = E2BIG; 597 error = E2BIG;
590 size = am_hreq.opcount * sizeof(attr_multiop_t); 598 size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
591 if (!size || size > 16 * PAGE_SIZE) 599 if (!size || size > 16 * PAGE_SIZE)
592 goto out_vn_rele; 600 goto out_vn_rele;
593 601
@@ -680,9 +688,9 @@ xfs_ioc_space(
680 return -XFS_ERROR(EFAULT); 688 return -XFS_ERROR(EFAULT);
681 689
682 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 690 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
683 attr_flags |= ATTR_NONBLOCK; 691 attr_flags |= XFS_ATTR_NONBLOCK;
684 if (ioflags & IO_INVIS) 692 if (ioflags & IO_INVIS)
685 attr_flags |= ATTR_DMI; 693 attr_flags |= XFS_ATTR_DMI;
686 694
687 error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, 695 error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos,
688 NULL, attr_flags); 696 NULL, attr_flags);
@@ -873,6 +881,322 @@ xfs_ioc_fsgetxattr(
873 return 0; 881 return 0;
874} 882}
875 883
884STATIC void
885xfs_set_diflags(
886 struct xfs_inode *ip,
887 unsigned int xflags)
888{
889 unsigned int di_flags;
890
891 /* can't set PREALLOC this way, just preserve it */
892 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
893 if (xflags & XFS_XFLAG_IMMUTABLE)
894 di_flags |= XFS_DIFLAG_IMMUTABLE;
895 if (xflags & XFS_XFLAG_APPEND)
896 di_flags |= XFS_DIFLAG_APPEND;
897 if (xflags & XFS_XFLAG_SYNC)
898 di_flags |= XFS_DIFLAG_SYNC;
899 if (xflags & XFS_XFLAG_NOATIME)
900 di_flags |= XFS_DIFLAG_NOATIME;
901 if (xflags & XFS_XFLAG_NODUMP)
902 di_flags |= XFS_DIFLAG_NODUMP;
903 if (xflags & XFS_XFLAG_PROJINHERIT)
904 di_flags |= XFS_DIFLAG_PROJINHERIT;
905 if (xflags & XFS_XFLAG_NODEFRAG)
906 di_flags |= XFS_DIFLAG_NODEFRAG;
907 if (xflags & XFS_XFLAG_FILESTREAM)
908 di_flags |= XFS_DIFLAG_FILESTREAM;
909 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
910 if (xflags & XFS_XFLAG_RTINHERIT)
911 di_flags |= XFS_DIFLAG_RTINHERIT;
912 if (xflags & XFS_XFLAG_NOSYMLINKS)
913 di_flags |= XFS_DIFLAG_NOSYMLINKS;
914 if (xflags & XFS_XFLAG_EXTSZINHERIT)
915 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
916 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
917 if (xflags & XFS_XFLAG_REALTIME)
918 di_flags |= XFS_DIFLAG_REALTIME;
919 if (xflags & XFS_XFLAG_EXTSIZE)
920 di_flags |= XFS_DIFLAG_EXTSIZE;
921 }
922
923 ip->i_d.di_flags = di_flags;
924}
925
926STATIC void
927xfs_diflags_to_linux(
928 struct xfs_inode *ip)
929{
930 struct inode *inode = XFS_ITOV(ip);
931 unsigned int xflags = xfs_ip2xflags(ip);
932
933 if (xflags & XFS_XFLAG_IMMUTABLE)
934 inode->i_flags |= S_IMMUTABLE;
935 else
936 inode->i_flags &= ~S_IMMUTABLE;
937 if (xflags & XFS_XFLAG_APPEND)
938 inode->i_flags |= S_APPEND;
939 else
940 inode->i_flags &= ~S_APPEND;
941 if (xflags & XFS_XFLAG_SYNC)
942 inode->i_flags |= S_SYNC;
943 else
944 inode->i_flags &= ~S_SYNC;
945 if (xflags & XFS_XFLAG_NOATIME)
946 inode->i_flags |= S_NOATIME;
947 else
948 inode->i_flags &= ~S_NOATIME;
949}
950
951#define FSX_PROJID 1
952#define FSX_EXTSIZE 2
953#define FSX_XFLAGS 4
954#define FSX_NONBLOCK 8
955
956STATIC int
957xfs_ioctl_setattr(
958 xfs_inode_t *ip,
959 struct fsxattr *fa,
960 int mask)
961{
962 struct xfs_mount *mp = ip->i_mount;
963 struct xfs_trans *tp;
964 unsigned int lock_flags = 0;
965 struct xfs_dquot *udqp = NULL, *gdqp = NULL;
966 struct xfs_dquot *olddquot = NULL;
967 int code;
968
969 xfs_itrace_entry(ip);
970
971 if (mp->m_flags & XFS_MOUNT_RDONLY)
972 return XFS_ERROR(EROFS);
973 if (XFS_FORCED_SHUTDOWN(mp))
974 return XFS_ERROR(EIO);
975
976 /*
977 * If disk quotas is on, we make sure that the dquots do exist on disk,
978 * before we start any other transactions. Trying to do this later
979 * is messy. We don't care to take a readlock to look at the ids
980 * in inode here, because we can't hold it across the trans_reserve.
981 * If the IDs do change before we take the ilock, we're covered
982 * because the i_*dquot fields will get updated anyway.
983 */
984 if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
985 code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
986 ip->i_d.di_gid, fa->fsx_projid,
987 XFS_QMOPT_PQUOTA, &udqp, &gdqp);
988 if (code)
989 return code;
990 }
991
992 /*
993 * For the other attributes, we acquire the inode lock and
994 * first do an error checking pass.
995 */
996 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
997 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
998 if (code)
999 goto error_return;
1000
1001 lock_flags = XFS_ILOCK_EXCL;
1002 xfs_ilock(ip, lock_flags);
1003
1004 /*
1005 * CAP_FOWNER overrides the following restrictions:
1006 *
1007 * The user ID of the calling process must be equal
1008 * to the file owner ID, except in cases where the
1009 * CAP_FSETID capability is applicable.
1010 */
1011 if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
1012 code = XFS_ERROR(EPERM);
1013 goto error_return;
1014 }
1015
1016 /*
1017 * Do a quota reservation only if projid is actually going to change.
1018 */
1019 if (mask & FSX_PROJID) {
1020 if (XFS_IS_PQUOTA_ON(mp) &&
1021 ip->i_d.di_projid != fa->fsx_projid) {
1022 ASSERT(tp);
1023 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
1024 capable(CAP_FOWNER) ?
1025 XFS_QMOPT_FORCE_RES : 0);
1026 if (code) /* out of quota */
1027 goto error_return;
1028 }
1029 }
1030
1031 if (mask & FSX_EXTSIZE) {
1032 /*
1033 * Can't change extent size if any extents are allocated.
1034 */
1035 if (ip->i_d.di_nextents &&
1036 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
1037 fa->fsx_extsize)) {
1038 code = XFS_ERROR(EINVAL); /* EFBIG? */
1039 goto error_return;
1040 }
1041
1042 /*
1043 * Extent size must be a multiple of the appropriate block
1044 * size, if set at all.
1045 */
1046 if (fa->fsx_extsize != 0) {
1047 xfs_extlen_t size;
1048
1049 if (XFS_IS_REALTIME_INODE(ip) ||
1050 ((mask & FSX_XFLAGS) &&
1051 (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
1052 size = mp->m_sb.sb_rextsize <<
1053 mp->m_sb.sb_blocklog;
1054 } else {
1055 size = mp->m_sb.sb_blocksize;
1056 }
1057
1058 if (fa->fsx_extsize % size) {
1059 code = XFS_ERROR(EINVAL);
1060 goto error_return;
1061 }
1062 }
1063 }
1064
1065
1066 if (mask & FSX_XFLAGS) {
1067 /*
1068 * Can't change realtime flag if any extents are allocated.
1069 */
1070 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
1071 (XFS_IS_REALTIME_INODE(ip)) !=
1072 (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
1073 code = XFS_ERROR(EINVAL); /* EFBIG? */
1074 goto error_return;
1075 }
1076
1077 /*
1078 * If realtime flag is set then must have realtime data.
1079 */
1080 if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
1081 if ((mp->m_sb.sb_rblocks == 0) ||
1082 (mp->m_sb.sb_rextsize == 0) ||
1083 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
1084 code = XFS_ERROR(EINVAL);
1085 goto error_return;
1086 }
1087 }
1088
1089 /*
1090 * Can't modify an immutable/append-only file unless
1091 * we have appropriate permission.
1092 */
1093 if ((ip->i_d.di_flags &
1094 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
1095 (fa->fsx_xflags &
1096 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
1097 !capable(CAP_LINUX_IMMUTABLE)) {
1098 code = XFS_ERROR(EPERM);
1099 goto error_return;
1100 }
1101 }
1102
1103 xfs_trans_ijoin(tp, ip, lock_flags);
1104 xfs_trans_ihold(tp, ip);
1105
1106 /*
1107 * Change file ownership. Must be the owner or privileged.
1108 * If the system was configured with the "restricted_chown"
1109 * option, the owner is not permitted to give away the file,
1110 * and can change the group id only to a group of which he
1111 * or she is a member.
1112 */
1113 if (mask & FSX_PROJID) {
1114 /*
1115 * CAP_FSETID overrides the following restrictions:
1116 *
1117 * The set-user-ID and set-group-ID bits of a file will be
1118 * cleared upon successful return from chown()
1119 */
1120 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
1121 !capable(CAP_FSETID))
1122 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
1123
1124 /*
1125 * Change the ownerships and register quota modifications
1126 * in the transaction.
1127 */
1128 if (ip->i_d.di_projid != fa->fsx_projid) {
1129 if (XFS_IS_PQUOTA_ON(mp)) {
1130 olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
1131 &ip->i_gdquot, gdqp);
1132 }
1133 ip->i_d.di_projid = fa->fsx_projid;
1134
1135 /*
1136 * We may have to rev the inode as well as
1137 * the superblock version number since projids didn't
1138 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
1139 */
1140 if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
1141 xfs_bump_ino_vers2(tp, ip);
1142 }
1143
1144 }
1145
1146 if (mask & FSX_EXTSIZE)
1147 ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
1148 if (mask & FSX_XFLAGS) {
1149 xfs_set_diflags(ip, fa->fsx_xflags);
1150 xfs_diflags_to_linux(ip);
1151 }
1152
1153 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1154 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
1155
1156 XFS_STATS_INC(xs_ig_attrchg);
1157
1158 /*
1159 * If this is a synchronous mount, make sure that the
1160 * transaction goes to disk before returning to the user.
1161 * This is slightly sub-optimal in that truncates require
1162 * two sync transactions instead of one for wsync filesystems.
1163 * One for the truncate and one for the timestamps since we
1164 * don't want to change the timestamps unless we're sure the
1165 * truncate worked. Truncates are less than 1% of the laddis
1166 * mix so this probably isn't worth the trouble to optimize.
1167 */
1168 if (mp->m_flags & XFS_MOUNT_WSYNC)
1169 xfs_trans_set_sync(tp);
1170 code = xfs_trans_commit(tp, 0);
1171 xfs_iunlock(ip, lock_flags);
1172
1173 /*
1174 * Release any dquot(s) the inode had kept before chown.
1175 */
1176 XFS_QM_DQRELE(mp, olddquot);
1177 XFS_QM_DQRELE(mp, udqp);
1178 XFS_QM_DQRELE(mp, gdqp);
1179
1180 if (code)
1181 return code;
1182
1183 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
1184 XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
1185 NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
1186 (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
1187 }
1188
1189 return 0;
1190
1191 error_return:
1192 XFS_QM_DQRELE(mp, udqp);
1193 XFS_QM_DQRELE(mp, gdqp);
1194 xfs_trans_cancel(tp, 0);
1195 if (lock_flags)
1196 xfs_iunlock(ip, lock_flags);
1197 return code;
1198}
1199
876STATIC int 1200STATIC int
877xfs_ioc_fssetxattr( 1201xfs_ioc_fssetxattr(
878 xfs_inode_t *ip, 1202 xfs_inode_t *ip,
@@ -880,31 +1204,16 @@ xfs_ioc_fssetxattr(
880 void __user *arg) 1204 void __user *arg)
881{ 1205{
882 struct fsxattr fa; 1206 struct fsxattr fa;
883 struct bhv_vattr *vattr; 1207 unsigned int mask;
884 int error;
885 int attr_flags;
886 1208
887 if (copy_from_user(&fa, arg, sizeof(fa))) 1209 if (copy_from_user(&fa, arg, sizeof(fa)))
888 return -EFAULT; 1210 return -EFAULT;
889 1211
890 vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); 1212 mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
891 if (unlikely(!vattr))
892 return -ENOMEM;
893
894 attr_flags = 0;
895 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 1213 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
896 attr_flags |= ATTR_NONBLOCK; 1214 mask |= FSX_NONBLOCK;
897
898 vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
899 vattr->va_xflags = fa.fsx_xflags;
900 vattr->va_extsize = fa.fsx_extsize;
901 vattr->va_projid = fa.fsx_projid;
902 1215
903 error = -xfs_setattr(ip, vattr, attr_flags, NULL); 1216 return -xfs_ioctl_setattr(ip, &fa, mask);
904 if (!error)
905 vn_revalidate(XFS_ITOV(ip)); /* update flags */
906 kfree(vattr);
907 return 0;
908} 1217}
909 1218
910STATIC int 1219STATIC int
@@ -926,10 +1235,9 @@ xfs_ioc_setxflags(
926 struct file *filp, 1235 struct file *filp,
927 void __user *arg) 1236 void __user *arg)
928{ 1237{
929 struct bhv_vattr *vattr; 1238 struct fsxattr fa;
930 unsigned int flags; 1239 unsigned int flags;
931 int attr_flags; 1240 unsigned int mask;
932 int error;
933 1241
934 if (copy_from_user(&flags, arg, sizeof(flags))) 1242 if (copy_from_user(&flags, arg, sizeof(flags)))
935 return -EFAULT; 1243 return -EFAULT;
@@ -939,22 +1247,12 @@ xfs_ioc_setxflags(
939 FS_SYNC_FL)) 1247 FS_SYNC_FL))
940 return -EOPNOTSUPP; 1248 return -EOPNOTSUPP;
941 1249
942 vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); 1250 mask = FSX_XFLAGS;
943 if (unlikely(!vattr))
944 return -ENOMEM;
945
946 attr_flags = 0;
947 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 1251 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
948 attr_flags |= ATTR_NONBLOCK; 1252 mask |= FSX_NONBLOCK;
949 1253 fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
950 vattr->va_mask = XFS_AT_XFLAGS;
951 vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
952 1254
953 error = -xfs_setattr(ip, vattr, attr_flags, NULL); 1255 return -xfs_ioctl_setattr(ip, &fa, mask);
954 if (likely(!error))
955 vn_revalidate(XFS_ITOV(ip)); /* update flags */
956 kfree(vattr);
957 return error;
958} 1256}
959 1257
960STATIC int 1258STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 5fc61c824bb9..e88f51028086 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -181,23 +181,6 @@ xfs_ichgtime_fast(
181 mark_inode_dirty_sync(inode); 181 mark_inode_dirty_sync(inode);
182} 182}
183 183
184
185/*
186 * Pull the link count and size up from the xfs inode to the linux inode
187 */
188STATIC void
189xfs_validate_fields(
190 struct inode *inode)
191{
192 struct xfs_inode *ip = XFS_I(inode);
193 loff_t size;
194
195 /* we're under i_sem so i_size can't change under us */
196 size = XFS_ISIZE(ip);
197 if (i_size_read(inode) != size)
198 i_size_write(inode, size);
199}
200
201/* 184/*
202 * Hook in SELinux. This is not quite correct yet, what we really need 185 * Hook in SELinux. This is not quite correct yet, what we really need
203 * here (as we do for default ACLs) is a mechanism by which creation of 186 * here (as we do for default ACLs) is a mechanism by which creation of
@@ -245,8 +228,7 @@ STATIC void
245xfs_cleanup_inode( 228xfs_cleanup_inode(
246 struct inode *dir, 229 struct inode *dir,
247 struct inode *inode, 230 struct inode *inode,
248 struct dentry *dentry, 231 struct dentry *dentry)
249 int mode)
250{ 232{
251 struct xfs_name teardown; 233 struct xfs_name teardown;
252 234
@@ -257,10 +239,7 @@ xfs_cleanup_inode(
257 */ 239 */
258 xfs_dentry_to_name(&teardown, dentry); 240 xfs_dentry_to_name(&teardown, dentry);
259 241
260 if (S_ISDIR(mode)) 242 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
261 xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode));
262 else
263 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
264 iput(inode); 243 iput(inode);
265} 244}
266 245
@@ -275,7 +254,7 @@ xfs_vn_mknod(
275 struct xfs_inode *ip = NULL; 254 struct xfs_inode *ip = NULL;
276 xfs_acl_t *default_acl = NULL; 255 xfs_acl_t *default_acl = NULL;
277 struct xfs_name name; 256 struct xfs_name name;
278 attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; 257 int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
279 int error; 258 int error;
280 259
281 /* 260 /*
@@ -335,14 +314,11 @@ xfs_vn_mknod(
335 } 314 }
336 315
337 316
338 if (S_ISDIR(mode))
339 xfs_validate_fields(inode);
340 d_instantiate(dentry, inode); 317 d_instantiate(dentry, inode);
341 xfs_validate_fields(dir);
342 return -error; 318 return -error;
343 319
344 out_cleanup_inode: 320 out_cleanup_inode:
345 xfs_cleanup_inode(dir, inode, dentry, mode); 321 xfs_cleanup_inode(dir, inode, dentry);
346 out_free_acl: 322 out_free_acl:
347 if (default_acl) 323 if (default_acl)
348 _ACL_FREE(default_acl); 324 _ACL_FREE(default_acl);
@@ -382,7 +358,7 @@ xfs_vn_lookup(
382 return ERR_PTR(-ENAMETOOLONG); 358 return ERR_PTR(-ENAMETOOLONG);
383 359
384 xfs_dentry_to_name(&name, dentry); 360 xfs_dentry_to_name(&name, dentry);
385 error = xfs_lookup(XFS_I(dir), &name, &cip); 361 error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
386 if (unlikely(error)) { 362 if (unlikely(error)) {
387 if (unlikely(error != ENOENT)) 363 if (unlikely(error != ENOENT))
388 return ERR_PTR(-error); 364 return ERR_PTR(-error);
@@ -393,6 +369,46 @@ xfs_vn_lookup(
393 return d_splice_alias(cip->i_vnode, dentry); 369 return d_splice_alias(cip->i_vnode, dentry);
394} 370}
395 371
372STATIC struct dentry *
373xfs_vn_ci_lookup(
374 struct inode *dir,
375 struct dentry *dentry,
376 struct nameidata *nd)
377{
378 struct xfs_inode *ip;
379 struct xfs_name xname;
380 struct xfs_name ci_name;
381 struct qstr dname;
382 int error;
383
384 if (dentry->d_name.len >= MAXNAMELEN)
385 return ERR_PTR(-ENAMETOOLONG);
386
387 xfs_dentry_to_name(&xname, dentry);
388 error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
389 if (unlikely(error)) {
390 if (unlikely(error != ENOENT))
391 return ERR_PTR(-error);
392 /*
393 * call d_add(dentry, NULL) here when d_drop_negative_children
394 * is called in xfs_vn_mknod (ie. allow negative dentries
395 * with CI filesystems).
396 */
397 return NULL;
398 }
399
400 /* if exact match, just splice and exit */
401 if (!ci_name.name)
402 return d_splice_alias(ip->i_vnode, dentry);
403
404 /* else case-insensitive match... */
405 dname.name = ci_name.name;
406 dname.len = ci_name.len;
407 dentry = d_add_ci(ip->i_vnode, dentry, &dname);
408 kmem_free(ci_name.name);
409 return dentry;
410}
411
396STATIC int 412STATIC int
397xfs_vn_link( 413xfs_vn_link(
398 struct dentry *old_dentry, 414 struct dentry *old_dentry,
@@ -414,7 +430,6 @@ xfs_vn_link(
414 } 430 }
415 431
416 xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); 432 xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
417 xfs_validate_fields(inode);
418 d_instantiate(dentry, inode); 433 d_instantiate(dentry, inode);
419 return 0; 434 return 0;
420} 435}
@@ -424,19 +439,23 @@ xfs_vn_unlink(
424 struct inode *dir, 439 struct inode *dir,
425 struct dentry *dentry) 440 struct dentry *dentry)
426{ 441{
427 struct inode *inode;
428 struct xfs_name name; 442 struct xfs_name name;
429 int error; 443 int error;
430 444
431 inode = dentry->d_inode;
432 xfs_dentry_to_name(&name, dentry); 445 xfs_dentry_to_name(&name, dentry);
433 446
434 error = xfs_remove(XFS_I(dir), &name, XFS_I(inode)); 447 error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
435 if (likely(!error)) { 448 if (error)
436 xfs_validate_fields(dir); /* size needs update */ 449 return error;
437 xfs_validate_fields(inode); 450
438 } 451 /*
439 return -error; 452 * With unlink, the VFS makes the dentry "negative": no inode,
453 * but still hashed. This is incompatible with case-insensitive
454 * mode, so invalidate (unhash) the dentry in CI-mode.
455 */
456 if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
457 d_invalidate(dentry);
458 return 0;
440} 459}
441 460
442STATIC int 461STATIC int
@@ -466,36 +485,15 @@ xfs_vn_symlink(
466 goto out_cleanup_inode; 485 goto out_cleanup_inode;
467 486
468 d_instantiate(dentry, inode); 487 d_instantiate(dentry, inode);
469 xfs_validate_fields(dir);
470 xfs_validate_fields(inode);
471 return 0; 488 return 0;
472 489
473 out_cleanup_inode: 490 out_cleanup_inode:
474 xfs_cleanup_inode(dir, inode, dentry, 0); 491 xfs_cleanup_inode(dir, inode, dentry);
475 out: 492 out:
476 return -error; 493 return -error;
477} 494}
478 495
479STATIC int 496STATIC int
480xfs_vn_rmdir(
481 struct inode *dir,
482 struct dentry *dentry)
483{
484 struct inode *inode = dentry->d_inode;
485 struct xfs_name name;
486 int error;
487
488 xfs_dentry_to_name(&name, dentry);
489
490 error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode));
491 if (likely(!error)) {
492 xfs_validate_fields(inode);
493 xfs_validate_fields(dir);
494 }
495 return -error;
496}
497
498STATIC int
499xfs_vn_rename( 497xfs_vn_rename(
500 struct inode *odir, 498 struct inode *odir,
501 struct dentry *odentry, 499 struct dentry *odentry,
@@ -505,22 +503,13 @@ xfs_vn_rename(
505 struct inode *new_inode = ndentry->d_inode; 503 struct inode *new_inode = ndentry->d_inode;
506 struct xfs_name oname; 504 struct xfs_name oname;
507 struct xfs_name nname; 505 struct xfs_name nname;
508 int error;
509 506
510 xfs_dentry_to_name(&oname, odentry); 507 xfs_dentry_to_name(&oname, odentry);
511 xfs_dentry_to_name(&nname, ndentry); 508 xfs_dentry_to_name(&nname, ndentry);
512 509
513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 510 return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
514 XFS_I(ndir), &nname, new_inode ? 511 XFS_I(ndir), &nname, new_inode ?
515 XFS_I(new_inode) : NULL); 512 XFS_I(new_inode) : NULL);
516 if (likely(!error)) {
517 if (new_inode)
518 xfs_validate_fields(new_inode);
519 xfs_validate_fields(odir);
520 if (ndir != odir)
521 xfs_validate_fields(ndir);
522 }
523 return -error;
524} 513}
525 514
526/* 515/*
@@ -659,57 +648,9 @@ xfs_vn_getattr(
659STATIC int 648STATIC int
660xfs_vn_setattr( 649xfs_vn_setattr(
661 struct dentry *dentry, 650 struct dentry *dentry,
662 struct iattr *attr) 651 struct iattr *iattr)
663{ 652{
664 struct inode *inode = dentry->d_inode; 653 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL);
665 unsigned int ia_valid = attr->ia_valid;
666 bhv_vattr_t vattr = { 0 };
667 int flags = 0;
668 int error;
669
670 if (ia_valid & ATTR_UID) {
671 vattr.va_mask |= XFS_AT_UID;
672 vattr.va_uid = attr->ia_uid;
673 }
674 if (ia_valid & ATTR_GID) {
675 vattr.va_mask |= XFS_AT_GID;
676 vattr.va_gid = attr->ia_gid;
677 }
678 if (ia_valid & ATTR_SIZE) {
679 vattr.va_mask |= XFS_AT_SIZE;
680 vattr.va_size = attr->ia_size;
681 }
682 if (ia_valid & ATTR_ATIME) {
683 vattr.va_mask |= XFS_AT_ATIME;
684 vattr.va_atime = attr->ia_atime;
685 inode->i_atime = attr->ia_atime;
686 }
687 if (ia_valid & ATTR_MTIME) {
688 vattr.va_mask |= XFS_AT_MTIME;
689 vattr.va_mtime = attr->ia_mtime;
690 }
691 if (ia_valid & ATTR_CTIME) {
692 vattr.va_mask |= XFS_AT_CTIME;
693 vattr.va_ctime = attr->ia_ctime;
694 }
695 if (ia_valid & ATTR_MODE) {
696 vattr.va_mask |= XFS_AT_MODE;
697 vattr.va_mode = attr->ia_mode;
698 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
699 inode->i_mode &= ~S_ISGID;
700 }
701
702 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
703 flags |= ATTR_UTIME;
704#ifdef ATTR_NO_BLOCK
705 if ((ia_valid & ATTR_NO_BLOCK))
706 flags |= ATTR_NONBLOCK;
707#endif
708
709 error = xfs_setattr(XFS_I(inode), &vattr, flags, NULL);
710 if (likely(!error))
711 vn_revalidate(vn_from_inode(inode));
712 return -error;
713} 654}
714 655
715/* 656/*
@@ -727,109 +668,6 @@ xfs_vn_truncate(
727 WARN_ON(error); 668 WARN_ON(error);
728} 669}
729 670
730STATIC int
731xfs_vn_setxattr(
732 struct dentry *dentry,
733 const char *name,
734 const void *data,
735 size_t size,
736 int flags)
737{
738 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
739 char *attr = (char *)name;
740 attrnames_t *namesp;
741 int xflags = 0;
742 int error;
743
744 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
745 if (!namesp)
746 return -EOPNOTSUPP;
747 attr += namesp->attr_namelen;
748 error = namesp->attr_capable(vp, NULL);
749 if (error)
750 return error;
751
752 /* Convert Linux syscall to XFS internal ATTR flags */
753 if (flags & XATTR_CREATE)
754 xflags |= ATTR_CREATE;
755 if (flags & XATTR_REPLACE)
756 xflags |= ATTR_REPLACE;
757 xflags |= namesp->attr_flag;
758 return namesp->attr_set(vp, attr, (void *)data, size, xflags);
759}
760
761STATIC ssize_t
762xfs_vn_getxattr(
763 struct dentry *dentry,
764 const char *name,
765 void *data,
766 size_t size)
767{
768 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
769 char *attr = (char *)name;
770 attrnames_t *namesp;
771 int xflags = 0;
772 ssize_t error;
773
774 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
775 if (!namesp)
776 return -EOPNOTSUPP;
777 attr += namesp->attr_namelen;
778 error = namesp->attr_capable(vp, NULL);
779 if (error)
780 return error;
781
782 /* Convert Linux syscall to XFS internal ATTR flags */
783 if (!size) {
784 xflags |= ATTR_KERNOVAL;
785 data = NULL;
786 }
787 xflags |= namesp->attr_flag;
788 return namesp->attr_get(vp, attr, (void *)data, size, xflags);
789}
790
791STATIC ssize_t
792xfs_vn_listxattr(
793 struct dentry *dentry,
794 char *data,
795 size_t size)
796{
797 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
798 int error, xflags = ATTR_KERNAMELS;
799 ssize_t result;
800
801 if (!size)
802 xflags |= ATTR_KERNOVAL;
803 xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
804
805 error = attr_generic_list(vp, data, size, xflags, &result);
806 if (error < 0)
807 return error;
808 return result;
809}
810
811STATIC int
812xfs_vn_removexattr(
813 struct dentry *dentry,
814 const char *name)
815{
816 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
817 char *attr = (char *)name;
818 attrnames_t *namesp;
819 int xflags = 0;
820 int error;
821
822 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
823 if (!namesp)
824 return -EOPNOTSUPP;
825 attr += namesp->attr_namelen;
826 error = namesp->attr_capable(vp, NULL);
827 if (error)
828 return error;
829 xflags |= namesp->attr_flag;
830 return namesp->attr_remove(vp, attr, xflags);
831}
832
833STATIC long 671STATIC long
834xfs_vn_fallocate( 672xfs_vn_fallocate(
835 struct inode *inode, 673 struct inode *inode,
@@ -853,18 +691,18 @@ xfs_vn_fallocate(
853 691
854 xfs_ilock(ip, XFS_IOLOCK_EXCL); 692 xfs_ilock(ip, XFS_IOLOCK_EXCL);
855 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 693 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
856 0, NULL, ATTR_NOLOCK); 694 0, NULL, XFS_ATTR_NOLOCK);
857 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 695 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
858 offset + len > i_size_read(inode)) 696 offset + len > i_size_read(inode))
859 new_size = offset + len; 697 new_size = offset + len;
860 698
861 /* Change file size if needed */ 699 /* Change file size if needed */
862 if (new_size) { 700 if (new_size) {
863 bhv_vattr_t va; 701 struct iattr iattr;
864 702
865 va.va_mask = XFS_AT_SIZE; 703 iattr.ia_valid = ATTR_SIZE;
866 va.va_size = new_size; 704 iattr.ia_size = new_size;
867 error = xfs_setattr(ip, &va, ATTR_NOLOCK, NULL); 705 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL);
868 } 706 }
869 707
870 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 708 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -877,10 +715,10 @@ const struct inode_operations xfs_inode_operations = {
877 .truncate = xfs_vn_truncate, 715 .truncate = xfs_vn_truncate,
878 .getattr = xfs_vn_getattr, 716 .getattr = xfs_vn_getattr,
879 .setattr = xfs_vn_setattr, 717 .setattr = xfs_vn_setattr,
880 .setxattr = xfs_vn_setxattr, 718 .setxattr = generic_setxattr,
881 .getxattr = xfs_vn_getxattr, 719 .getxattr = generic_getxattr,
720 .removexattr = generic_removexattr,
882 .listxattr = xfs_vn_listxattr, 721 .listxattr = xfs_vn_listxattr,
883 .removexattr = xfs_vn_removexattr,
884 .fallocate = xfs_vn_fallocate, 722 .fallocate = xfs_vn_fallocate,
885}; 723};
886 724
@@ -891,16 +729,47 @@ const struct inode_operations xfs_dir_inode_operations = {
891 .unlink = xfs_vn_unlink, 729 .unlink = xfs_vn_unlink,
892 .symlink = xfs_vn_symlink, 730 .symlink = xfs_vn_symlink,
893 .mkdir = xfs_vn_mkdir, 731 .mkdir = xfs_vn_mkdir,
894 .rmdir = xfs_vn_rmdir, 732 /*
733 * Yes, XFS uses the same method for rmdir and unlink.
734 *
735 * There are some subtile differences deeper in the code,
736 * but we use S_ISDIR to check for those.
737 */
738 .rmdir = xfs_vn_unlink,
739 .mknod = xfs_vn_mknod,
740 .rename = xfs_vn_rename,
741 .permission = xfs_vn_permission,
742 .getattr = xfs_vn_getattr,
743 .setattr = xfs_vn_setattr,
744 .setxattr = generic_setxattr,
745 .getxattr = generic_getxattr,
746 .removexattr = generic_removexattr,
747 .listxattr = xfs_vn_listxattr,
748};
749
750const struct inode_operations xfs_dir_ci_inode_operations = {
751 .create = xfs_vn_create,
752 .lookup = xfs_vn_ci_lookup,
753 .link = xfs_vn_link,
754 .unlink = xfs_vn_unlink,
755 .symlink = xfs_vn_symlink,
756 .mkdir = xfs_vn_mkdir,
757 /*
758 * Yes, XFS uses the same method for rmdir and unlink.
759 *
760 * There are some subtile differences deeper in the code,
761 * but we use S_ISDIR to check for those.
762 */
763 .rmdir = xfs_vn_unlink,
895 .mknod = xfs_vn_mknod, 764 .mknod = xfs_vn_mknod,
896 .rename = xfs_vn_rename, 765 .rename = xfs_vn_rename,
897 .permission = xfs_vn_permission, 766 .permission = xfs_vn_permission,
898 .getattr = xfs_vn_getattr, 767 .getattr = xfs_vn_getattr,
899 .setattr = xfs_vn_setattr, 768 .setattr = xfs_vn_setattr,
900 .setxattr = xfs_vn_setxattr, 769 .setxattr = generic_setxattr,
901 .getxattr = xfs_vn_getxattr, 770 .getxattr = generic_getxattr,
771 .removexattr = generic_removexattr,
902 .listxattr = xfs_vn_listxattr, 772 .listxattr = xfs_vn_listxattr,
903 .removexattr = xfs_vn_removexattr,
904}; 773};
905 774
906const struct inode_operations xfs_symlink_inode_operations = { 775const struct inode_operations xfs_symlink_inode_operations = {
@@ -910,8 +779,8 @@ const struct inode_operations xfs_symlink_inode_operations = {
910 .permission = xfs_vn_permission, 779 .permission = xfs_vn_permission,
911 .getattr = xfs_vn_getattr, 780 .getattr = xfs_vn_getattr,
912 .setattr = xfs_vn_setattr, 781 .setattr = xfs_vn_setattr,
913 .setxattr = xfs_vn_setxattr, 782 .setxattr = generic_setxattr,
914 .getxattr = xfs_vn_getxattr, 783 .getxattr = generic_getxattr,
784 .removexattr = generic_removexattr,
915 .listxattr = xfs_vn_listxattr, 785 .listxattr = xfs_vn_listxattr,
916 .removexattr = xfs_vn_removexattr,
917}; 786};
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 14d0deb7afff..d97ba934a2ac 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -20,12 +20,14 @@
20 20
21extern const struct inode_operations xfs_inode_operations; 21extern const struct inode_operations xfs_inode_operations;
22extern const struct inode_operations xfs_dir_inode_operations; 22extern const struct inode_operations xfs_dir_inode_operations;
23extern const struct inode_operations xfs_dir_ci_inode_operations;
23extern const struct inode_operations xfs_symlink_inode_operations; 24extern const struct inode_operations xfs_symlink_inode_operations;
24 25
25extern const struct file_operations xfs_file_operations; 26extern const struct file_operations xfs_file_operations;
26extern const struct file_operations xfs_dir_file_operations; 27extern const struct file_operations xfs_dir_file_operations;
27extern const struct file_operations xfs_invis_file_operations; 28extern const struct file_operations xfs_invis_file_operations;
28 29
30extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
29 31
30struct xfs_inode; 32struct xfs_inode;
31extern void xfs_ichgtime(struct xfs_inode *, int); 33extern void xfs_ichgtime(struct xfs_inode *, int);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 4edc46915b57..4d45d9351a6c 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -76,6 +76,7 @@
76#include <linux/log2.h> 76#include <linux/log2.h>
77#include <linux/spinlock.h> 77#include <linux/spinlock.h>
78#include <linux/random.h> 78#include <linux/random.h>
79#include <linux/ctype.h>
79 80
80#include <asm/page.h> 81#include <asm/page.h>
81#include <asm/div64.h> 82#include <asm/div64.h>
@@ -299,4 +300,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
299 return x; 300 return x;
300} 301}
301 302
303/* ARM old ABI has some weird alignment/padding */
304#if defined(__arm__) && !defined(__ARM_EABI__)
305#define __arch_pack __attribute__((packed))
306#else
307#define __arch_pack
308#endif
309
302#endif /* __XFS_LINUX__ */ 310#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index e480b6102051..3d5b67c075c7 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -98,12 +98,21 @@ xfs_read_xfsstats(
98 return len; 98 return len;
99} 99}
100 100
101void 101int
102xfs_init_procfs(void) 102xfs_init_procfs(void)
103{ 103{
104 if (!proc_mkdir("fs/xfs", NULL)) 104 if (!proc_mkdir("fs/xfs", NULL))
105 return; 105 goto out;
106 create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL); 106
107 if (!create_proc_read_entry("fs/xfs/stat", 0, NULL,
108 xfs_read_xfsstats, NULL))
109 goto out_remove_entry;
110 return 0;
111
112 out_remove_entry:
113 remove_proc_entry("fs/xfs", NULL);
114 out:
115 return -ENOMEM;
107} 116}
108 117
109void 118void
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index afd0b0d5fdb2..e83820febc9f 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -134,7 +134,7 @@ DECLARE_PER_CPU(struct xfsstats, xfsstats);
134#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) 134#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--)
135#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) 135#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc))
136 136
137extern void xfs_init_procfs(void); 137extern int xfs_init_procfs(void);
138extern void xfs_cleanup_procfs(void); 138extern void xfs_cleanup_procfs(void);
139 139
140 140
@@ -144,8 +144,14 @@ extern void xfs_cleanup_procfs(void);
144# define XFS_STATS_DEC(count) 144# define XFS_STATS_DEC(count)
145# define XFS_STATS_ADD(count, inc) 145# define XFS_STATS_ADD(count, inc)
146 146
147static inline void xfs_init_procfs(void) { }; 147static inline int xfs_init_procfs(void)
148static inline void xfs_cleanup_procfs(void) { }; 148{
149 return 0;
150}
151
152static inline void xfs_cleanup_procfs(void)
153{
154}
149 155
150#endif /* !CONFIG_PROC_FS */ 156#endif /* !CONFIG_PROC_FS */
151 157
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 943381284e2e..30ae96397e31 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -52,6 +52,12 @@
52#include "xfs_version.h" 52#include "xfs_version.h"
53#include "xfs_log_priv.h" 53#include "xfs_log_priv.h"
54#include "xfs_trans_priv.h" 54#include "xfs_trans_priv.h"
55#include "xfs_filestream.h"
56#include "xfs_da_btree.h"
57#include "xfs_dir2_trace.h"
58#include "xfs_extfree_item.h"
59#include "xfs_mru_cache.h"
60#include "xfs_inode_item.h"
55 61
56#include <linux/namei.h> 62#include <linux/namei.h>
57#include <linux/init.h> 63#include <linux/init.h>
@@ -60,6 +66,7 @@
60#include <linux/writeback.h> 66#include <linux/writeback.h>
61#include <linux/kthread.h> 67#include <linux/kthread.h>
62#include <linux/freezer.h> 68#include <linux/freezer.h>
69#include <linux/parser.h>
63 70
64static struct quotactl_ops xfs_quotactl_operations; 71static struct quotactl_ops xfs_quotactl_operations;
65static struct super_operations xfs_super_operations; 72static struct super_operations xfs_super_operations;
@@ -74,7 +81,10 @@ xfs_args_allocate(
74{ 81{
75 struct xfs_mount_args *args; 82 struct xfs_mount_args *args;
76 83
77 args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP); 84 args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
85 if (!args)
86 return NULL;
87
78 args->logbufs = args->logbufsize = -1; 88 args->logbufs = args->logbufsize = -1;
79 strncpy(args->fsname, sb->s_id, MAXNAMELEN); 89 strncpy(args->fsname, sb->s_id, MAXNAMELEN);
80 90
@@ -138,6 +148,23 @@ xfs_args_allocate(
138#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ 148#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */
139#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ 149#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */
140 150
151/*
152 * Table driven mount option parser.
153 *
154 * Currently only used for remount, but it will be used for mount
155 * in the future, too.
156 */
157enum {
158 Opt_barrier, Opt_nobarrier, Opt_err
159};
160
161static match_table_t tokens = {
162 {Opt_barrier, "barrier"},
163 {Opt_nobarrier, "nobarrier"},
164 {Opt_err, NULL}
165};
166
167
141STATIC unsigned long 168STATIC unsigned long
142suffix_strtoul(char *s, char **endp, unsigned int base) 169suffix_strtoul(char *s, char **endp, unsigned int base)
143{ 170{
@@ -314,6 +341,7 @@ xfs_parseargs(
314 args->flags |= XFSMNT_ATTR2; 341 args->flags |= XFSMNT_ATTR2;
315 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 342 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
316 args->flags &= ~XFSMNT_ATTR2; 343 args->flags &= ~XFSMNT_ATTR2;
344 args->flags |= XFSMNT_NOATTR2;
317 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { 345 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
318 args->flags2 |= XFSMNT2_FILESTREAMS; 346 args->flags2 |= XFSMNT2_FILESTREAMS;
319 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { 347 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
@@ -564,7 +592,10 @@ xfs_set_inodeops(
564 inode->i_mapping->a_ops = &xfs_address_space_operations; 592 inode->i_mapping->a_ops = &xfs_address_space_operations;
565 break; 593 break;
566 case S_IFDIR: 594 case S_IFDIR:
567 inode->i_op = &xfs_dir_inode_operations; 595 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
596 inode->i_op = &xfs_dir_ci_inode_operations;
597 else
598 inode->i_op = &xfs_dir_inode_operations;
568 inode->i_fop = &xfs_dir_file_operations; 599 inode->i_fop = &xfs_dir_file_operations;
569 break; 600 break;
570 case S_IFLNK: 601 case S_IFLNK:
@@ -733,14 +764,6 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
733 return; 764 return;
734 } 765 }
735 766
736 if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
737 QUEUE_ORDERED_NONE) {
738 xfs_fs_cmn_err(CE_NOTE, mp,
739 "Disabling barriers, not supported by the underlying device");
740 mp->m_flags &= ~XFS_MOUNT_BARRIER;
741 return;
742 }
743
744 if (xfs_readonly_buftarg(mp->m_ddev_targp)) { 767 if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
745 xfs_fs_cmn_err(CE_NOTE, mp, 768 xfs_fs_cmn_err(CE_NOTE, mp,
746 "Disabling barriers, underlying device is readonly"); 769 "Disabling barriers, underlying device is readonly");
@@ -764,6 +787,139 @@ xfs_blkdev_issue_flush(
764 blkdev_issue_flush(buftarg->bt_bdev, NULL); 787 blkdev_issue_flush(buftarg->bt_bdev, NULL);
765} 788}
766 789
790STATIC void
791xfs_close_devices(
792 struct xfs_mount *mp)
793{
794 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
795 struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
796 xfs_free_buftarg(mp->m_logdev_targp);
797 xfs_blkdev_put(logdev);
798 }
799 if (mp->m_rtdev_targp) {
800 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
801 xfs_free_buftarg(mp->m_rtdev_targp);
802 xfs_blkdev_put(rtdev);
803 }
804 xfs_free_buftarg(mp->m_ddev_targp);
805}
806
807/*
808 * The file system configurations are:
809 * (1) device (partition) with data and internal log
810 * (2) logical volume with data and log subvolumes.
811 * (3) logical volume with data, log, and realtime subvolumes.
812 *
813 * We only have to handle opening the log and realtime volumes here if
814 * they are present. The data subvolume has already been opened by
815 * get_sb_bdev() and is stored in sb->s_bdev.
816 */
817STATIC int
818xfs_open_devices(
819 struct xfs_mount *mp,
820 struct xfs_mount_args *args)
821{
822 struct block_device *ddev = mp->m_super->s_bdev;
823 struct block_device *logdev = NULL, *rtdev = NULL;
824 int error;
825
826 /*
827 * Open real time and log devices - order is important.
828 */
829 if (args->logname[0]) {
830 error = xfs_blkdev_get(mp, args->logname, &logdev);
831 if (error)
832 goto out;
833 }
834
835 if (args->rtname[0]) {
836 error = xfs_blkdev_get(mp, args->rtname, &rtdev);
837 if (error)
838 goto out_close_logdev;
839
840 if (rtdev == ddev || rtdev == logdev) {
841 cmn_err(CE_WARN,
842 "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
843 error = EINVAL;
844 goto out_close_rtdev;
845 }
846 }
847
848 /*
849 * Setup xfs_mount buffer target pointers
850 */
851 error = ENOMEM;
852 mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
853 if (!mp->m_ddev_targp)
854 goto out_close_rtdev;
855
856 if (rtdev) {
857 mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
858 if (!mp->m_rtdev_targp)
859 goto out_free_ddev_targ;
860 }
861
862 if (logdev && logdev != ddev) {
863 mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1);
864 if (!mp->m_logdev_targp)
865 goto out_free_rtdev_targ;
866 } else {
867 mp->m_logdev_targp = mp->m_ddev_targp;
868 }
869
870 return 0;
871
872 out_free_rtdev_targ:
873 if (mp->m_rtdev_targp)
874 xfs_free_buftarg(mp->m_rtdev_targp);
875 out_free_ddev_targ:
876 xfs_free_buftarg(mp->m_ddev_targp);
877 out_close_rtdev:
878 if (rtdev)
879 xfs_blkdev_put(rtdev);
880 out_close_logdev:
881 if (logdev && logdev != ddev)
882 xfs_blkdev_put(logdev);
883 out:
884 return error;
885}
886
887/*
888 * Setup xfs_mount buffer target pointers based on superblock
889 */
890STATIC int
891xfs_setup_devices(
892 struct xfs_mount *mp)
893{
894 int error;
895
896 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
897 mp->m_sb.sb_sectsize);
898 if (error)
899 return error;
900
901 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
902 unsigned int log_sector_size = BBSIZE;
903
904 if (xfs_sb_version_hassector(&mp->m_sb))
905 log_sector_size = mp->m_sb.sb_logsectsize;
906 error = xfs_setsize_buftarg(mp->m_logdev_targp,
907 mp->m_sb.sb_blocksize,
908 log_sector_size);
909 if (error)
910 return error;
911 }
912 if (mp->m_rtdev_targp) {
913 error = xfs_setsize_buftarg(mp->m_rtdev_targp,
914 mp->m_sb.sb_blocksize,
915 mp->m_sb.sb_sectsize);
916 if (error)
917 return error;
918 }
919
920 return 0;
921}
922
767/* 923/*
768 * XFS AIL push thread support 924 * XFS AIL push thread support
769 */ 925 */
@@ -848,42 +1004,6 @@ xfs_fs_inode_init_once(
848 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); 1004 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
849} 1005}
850 1006
851STATIC int __init
852xfs_init_zones(void)
853{
854 xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
855 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
856 KM_ZONE_SPREAD,
857 xfs_fs_inode_init_once);
858 if (!xfs_vnode_zone)
859 goto out;
860
861 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
862 if (!xfs_ioend_zone)
863 goto out_destroy_vnode_zone;
864
865 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
866 xfs_ioend_zone);
867 if (!xfs_ioend_pool)
868 goto out_free_ioend_zone;
869 return 0;
870
871 out_free_ioend_zone:
872 kmem_zone_destroy(xfs_ioend_zone);
873 out_destroy_vnode_zone:
874 kmem_zone_destroy(xfs_vnode_zone);
875 out:
876 return -ENOMEM;
877}
878
879STATIC void
880xfs_destroy_zones(void)
881{
882 mempool_destroy(xfs_ioend_pool);
883 kmem_zone_destroy(xfs_vnode_zone);
884 kmem_zone_destroy(xfs_ioend_zone);
885}
886
887/* 1007/*
888 * Attempt to flush the inode, this will actually fail 1008 * Attempt to flush the inode, this will actually fail
889 * if the inode is pinned, but we dirty the inode again 1009 * if the inode is pinned, but we dirty the inode again
@@ -1073,7 +1193,7 @@ xfssyncd(
1073 list_del(&work->w_list); 1193 list_del(&work->w_list);
1074 if (work == &mp->m_sync_work) 1194 if (work == &mp->m_sync_work)
1075 continue; 1195 continue;
1076 kmem_free(work, sizeof(struct bhv_vfs_sync_work)); 1196 kmem_free(work);
1077 } 1197 }
1078 } 1198 }
1079 1199
@@ -1085,14 +1205,63 @@ xfs_fs_put_super(
1085 struct super_block *sb) 1205 struct super_block *sb)
1086{ 1206{
1087 struct xfs_mount *mp = XFS_M(sb); 1207 struct xfs_mount *mp = XFS_M(sb);
1208 struct xfs_inode *rip = mp->m_rootip;
1209 int unmount_event_flags = 0;
1088 int error; 1210 int error;
1089 1211
1090 kthread_stop(mp->m_sync_task); 1212 kthread_stop(mp->m_sync_task);
1091 1213
1092 xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI); 1214 xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
1093 error = xfs_unmount(mp, 0, NULL); 1215
1094 if (error) 1216#ifdef HAVE_DMAPI
1095 printk("XFS: unmount got error=%d\n", error); 1217 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1218 unmount_event_flags =
1219 (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
1220 0 : DM_FLAGS_UNWANTED;
1221 /*
1222 * Ignore error from dmapi here, first unmount is not allowed
1223 * to fail anyway, and second we wouldn't want to fail a
1224 * unmount because of dmapi.
1225 */
1226 XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
1227 NULL, NULL, 0, 0, unmount_event_flags);
1228 }
1229#endif
1230
1231 /*
1232 * Blow away any referenced inode in the filestreams cache.
1233 * This can and will cause log traffic as inodes go inactive
1234 * here.
1235 */
1236 xfs_filestream_unmount(mp);
1237
1238 XFS_bflush(mp->m_ddev_targp);
1239 error = xfs_unmount_flush(mp, 0);
1240 WARN_ON(error);
1241
1242 IRELE(rip);
1243
1244 /*
1245 * If we're forcing a shutdown, typically because of a media error,
1246 * we want to make sure we invalidate dirty pages that belong to
1247 * referenced vnodes as well.
1248 */
1249 if (XFS_FORCED_SHUTDOWN(mp)) {
1250 error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
1251 ASSERT(error != EFSCORRUPTED);
1252 }
1253
1254 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1255 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
1256 unmount_event_flags);
1257 }
1258
1259 xfs_unmountfs(mp);
1260 xfs_icsb_destroy_counters(mp);
1261 xfs_close_devices(mp);
1262 xfs_qmops_put(mp);
1263 xfs_dmops_put(mp);
1264 kfree(mp);
1096} 1265}
1097 1266
1098STATIC void 1267STATIC void
@@ -1215,14 +1384,54 @@ xfs_fs_remount(
1215 char *options) 1384 char *options)
1216{ 1385{
1217 struct xfs_mount *mp = XFS_M(sb); 1386 struct xfs_mount *mp = XFS_M(sb);
1218 struct xfs_mount_args *args = xfs_args_allocate(sb, 0); 1387 substring_t args[MAX_OPT_ARGS];
1219 int error; 1388 char *p;
1220 1389
1221 error = xfs_parseargs(mp, options, args, 1); 1390 while ((p = strsep(&options, ",")) != NULL) {
1222 if (!error) 1391 int token;
1223 error = xfs_mntupdate(mp, flags, args); 1392
1224 kmem_free(args, sizeof(*args)); 1393 if (!*p)
1225 return -error; 1394 continue;
1395
1396 token = match_token(p, tokens, args);
1397 switch (token) {
1398 case Opt_barrier:
1399 mp->m_flags |= XFS_MOUNT_BARRIER;
1400
1401 /*
1402 * Test if barriers are actually working if we can,
1403 * else delay this check until the filesystem is
1404 * marked writeable.
1405 */
1406 if (!(mp->m_flags & XFS_MOUNT_RDONLY))
1407 xfs_mountfs_check_barriers(mp);
1408 break;
1409 case Opt_nobarrier:
1410 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1411 break;
1412 default:
1413 printk(KERN_INFO
1414 "XFS: mount option \"%s\" not supported for remount\n", p);
1415 return -EINVAL;
1416 }
1417 }
1418
1419 /* rw/ro -> rw */
1420 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
1421 mp->m_flags &= ~XFS_MOUNT_RDONLY;
1422 if (mp->m_flags & XFS_MOUNT_BARRIER)
1423 xfs_mountfs_check_barriers(mp);
1424 }
1425
1426 /* rw -> ro */
1427 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1428 xfs_filestream_flush(mp);
1429 xfs_sync(mp, SYNC_DATA_QUIESCE);
1430 xfs_attr_quiesce(mp);
1431 mp->m_flags |= XFS_MOUNT_RDONLY;
1432 }
1433
1434 return 0;
1226} 1435}
1227 1436
1228/* 1437/*
@@ -1299,6 +1508,225 @@ xfs_fs_setxquota(
1299 Q_XSETPQLIM), id, (caddr_t)fdq); 1508 Q_XSETPQLIM), id, (caddr_t)fdq);
1300} 1509}
1301 1510
1511/*
1512 * This function fills in xfs_mount_t fields based on mount args.
1513 * Note: the superblock has _not_ yet been read in.
1514 */
1515STATIC int
1516xfs_start_flags(
1517 struct xfs_mount_args *ap,
1518 struct xfs_mount *mp)
1519{
1520 /* Values are in BBs */
1521 if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
1522 /*
1523 * At this point the superblock has not been read
1524 * in, therefore we do not know the block size.
1525 * Before the mount call ends we will convert
1526 * these to FSBs.
1527 */
1528 mp->m_dalign = ap->sunit;
1529 mp->m_swidth = ap->swidth;
1530 }
1531
1532 if (ap->logbufs != -1 &&
1533 ap->logbufs != 0 &&
1534 (ap->logbufs < XLOG_MIN_ICLOGS ||
1535 ap->logbufs > XLOG_MAX_ICLOGS)) {
1536 cmn_err(CE_WARN,
1537 "XFS: invalid logbufs value: %d [not %d-%d]",
1538 ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1539 return XFS_ERROR(EINVAL);
1540 }
1541 mp->m_logbufs = ap->logbufs;
1542 if (ap->logbufsize != -1 &&
1543 ap->logbufsize != 0 &&
1544 (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
1545 ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
1546 !is_power_of_2(ap->logbufsize))) {
1547 cmn_err(CE_WARN,
1548 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1549 ap->logbufsize);
1550 return XFS_ERROR(EINVAL);
1551 }
1552 mp->m_logbsize = ap->logbufsize;
1553 mp->m_fsname_len = strlen(ap->fsname) + 1;
1554 mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
1555 strcpy(mp->m_fsname, ap->fsname);
1556 if (ap->rtname[0]) {
1557 mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
1558 strcpy(mp->m_rtname, ap->rtname);
1559 }
1560 if (ap->logname[0]) {
1561 mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
1562 strcpy(mp->m_logname, ap->logname);
1563 }
1564
1565 if (ap->flags & XFSMNT_WSYNC)
1566 mp->m_flags |= XFS_MOUNT_WSYNC;
1567#if XFS_BIG_INUMS
1568 if (ap->flags & XFSMNT_INO64) {
1569 mp->m_flags |= XFS_MOUNT_INO64;
1570 mp->m_inoadd = XFS_INO64_OFFSET;
1571 }
1572#endif
1573 if (ap->flags & XFSMNT_RETERR)
1574 mp->m_flags |= XFS_MOUNT_RETERR;
1575 if (ap->flags & XFSMNT_NOALIGN)
1576 mp->m_flags |= XFS_MOUNT_NOALIGN;
1577 if (ap->flags & XFSMNT_SWALLOC)
1578 mp->m_flags |= XFS_MOUNT_SWALLOC;
1579 if (ap->flags & XFSMNT_OSYNCISOSYNC)
1580 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
1581 if (ap->flags & XFSMNT_32BITINODES)
1582 mp->m_flags |= XFS_MOUNT_32BITINODES;
1583
1584 if (ap->flags & XFSMNT_IOSIZE) {
1585 if (ap->iosizelog > XFS_MAX_IO_LOG ||
1586 ap->iosizelog < XFS_MIN_IO_LOG) {
1587 cmn_err(CE_WARN,
1588 "XFS: invalid log iosize: %d [not %d-%d]",
1589 ap->iosizelog, XFS_MIN_IO_LOG,
1590 XFS_MAX_IO_LOG);
1591 return XFS_ERROR(EINVAL);
1592 }
1593
1594 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
1595 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
1596 }
1597
1598 if (ap->flags & XFSMNT_IKEEP)
1599 mp->m_flags |= XFS_MOUNT_IKEEP;
1600 if (ap->flags & XFSMNT_DIRSYNC)
1601 mp->m_flags |= XFS_MOUNT_DIRSYNC;
1602 if (ap->flags & XFSMNT_ATTR2)
1603 mp->m_flags |= XFS_MOUNT_ATTR2;
1604 if (ap->flags & XFSMNT_NOATTR2)
1605 mp->m_flags |= XFS_MOUNT_NOATTR2;
1606
1607 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
1608 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
1609
1610 /*
1611 * no recovery flag requires a read-only mount
1612 */
1613 if (ap->flags & XFSMNT_NORECOVERY) {
1614 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1615 cmn_err(CE_WARN,
1616 "XFS: tried to mount a FS read-write without recovery!");
1617 return XFS_ERROR(EINVAL);
1618 }
1619 mp->m_flags |= XFS_MOUNT_NORECOVERY;
1620 }
1621
1622 if (ap->flags & XFSMNT_NOUUID)
1623 mp->m_flags |= XFS_MOUNT_NOUUID;
1624 if (ap->flags & XFSMNT_BARRIER)
1625 mp->m_flags |= XFS_MOUNT_BARRIER;
1626 else
1627 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1628
1629 if (ap->flags2 & XFSMNT2_FILESTREAMS)
1630 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
1631
1632 if (ap->flags & XFSMNT_DMAPI)
1633 mp->m_flags |= XFS_MOUNT_DMAPI;
1634 return 0;
1635}
1636
1637/*
1638 * This function fills in xfs_mount_t fields based on mount args.
1639 * Note: the superblock _has_ now been read in.
1640 */
1641STATIC int
1642xfs_finish_flags(
1643 struct xfs_mount_args *ap,
1644 struct xfs_mount *mp)
1645{
1646 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1647
1648 /* Fail a mount where the logbuf is smaller then the log stripe */
1649 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1650 if ((ap->logbufsize <= 0) &&
1651 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
1652 mp->m_logbsize = mp->m_sb.sb_logsunit;
1653 } else if (ap->logbufsize > 0 &&
1654 ap->logbufsize < mp->m_sb.sb_logsunit) {
1655 cmn_err(CE_WARN,
1656 "XFS: logbuf size must be greater than or equal to log stripe size");
1657 return XFS_ERROR(EINVAL);
1658 }
1659 } else {
1660 /* Fail a mount if the logbuf is larger than 32K */
1661 if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
1662 cmn_err(CE_WARN,
1663 "XFS: logbuf size for version 1 logs must be 16K or 32K");
1664 return XFS_ERROR(EINVAL);
1665 }
1666 }
1667
1668 /*
1669 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
1670 * told by noattr2 to turn it off
1671 */
1672 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1673 !(ap->flags & XFSMNT_NOATTR2))
1674 mp->m_flags |= XFS_MOUNT_ATTR2;
1675
1676 /*
1677 * prohibit r/w mounts of read-only filesystems
1678 */
1679 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1680 cmn_err(CE_WARN,
1681 "XFS: cannot mount a read-only filesystem as read-write");
1682 return XFS_ERROR(EROFS);
1683 }
1684
1685 /*
1686 * check for shared mount.
1687 */
1688 if (ap->flags & XFSMNT_SHARED) {
1689 if (!xfs_sb_version_hasshared(&mp->m_sb))
1690 return XFS_ERROR(EINVAL);
1691
1692 /*
1693 * For IRIX 6.5, shared mounts must have the shared
1694 * version bit set, have the persistent readonly
1695 * field set, must be version 0 and can only be mounted
1696 * read-only.
1697 */
1698 if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
1699 (mp->m_sb.sb_shared_vn != 0))
1700 return XFS_ERROR(EINVAL);
1701
1702 mp->m_flags |= XFS_MOUNT_SHARED;
1703
1704 /*
1705 * Shared XFS V0 can't deal with DMI. Return EINVAL.
1706 */
1707 if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
1708 return XFS_ERROR(EINVAL);
1709 }
1710
1711 if (ap->flags & XFSMNT_UQUOTA) {
1712 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
1713 if (ap->flags & XFSMNT_UQUOTAENF)
1714 mp->m_qflags |= XFS_UQUOTA_ENFD;
1715 }
1716
1717 if (ap->flags & XFSMNT_GQUOTA) {
1718 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
1719 if (ap->flags & XFSMNT_GQUOTAENF)
1720 mp->m_qflags |= XFS_OQUOTA_ENFD;
1721 } else if (ap->flags & XFSMNT_PQUOTA) {
1722 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
1723 if (ap->flags & XFSMNT_PQUOTAENF)
1724 mp->m_qflags |= XFS_OQUOTA_ENFD;
1725 }
1726
1727 return 0;
1728}
1729
1302STATIC int 1730STATIC int
1303xfs_fs_fill_super( 1731xfs_fs_fill_super(
1304 struct super_block *sb, 1732 struct super_block *sb,
@@ -1307,11 +1735,21 @@ xfs_fs_fill_super(
1307{ 1735{
1308 struct inode *root; 1736 struct inode *root;
1309 struct xfs_mount *mp = NULL; 1737 struct xfs_mount *mp = NULL;
1310 struct xfs_mount_args *args = xfs_args_allocate(sb, silent); 1738 struct xfs_mount_args *args;
1311 int error; 1739 int flags = 0, error = ENOMEM;
1740
1741 args = xfs_args_allocate(sb, silent);
1742 if (!args)
1743 return -ENOMEM;
1312 1744
1313 mp = xfs_mount_init(); 1745 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1746 if (!mp)
1747 goto out_free_args;
1314 1748
1749 spin_lock_init(&mp->m_sb_lock);
1750 mutex_init(&mp->m_ilock);
1751 mutex_init(&mp->m_growlock);
1752 atomic_set(&mp->m_active_trans, 0);
1315 INIT_LIST_HEAD(&mp->m_sync_list); 1753 INIT_LIST_HEAD(&mp->m_sync_list);
1316 spin_lock_init(&mp->m_sync_lock); 1754 spin_lock_init(&mp->m_sync_lock);
1317 init_waitqueue_head(&mp->m_wait_single_sync_task); 1755 init_waitqueue_head(&mp->m_wait_single_sync_task);
@@ -1324,16 +1762,60 @@ xfs_fs_fill_super(
1324 1762
1325 error = xfs_parseargs(mp, (char *)data, args, 0); 1763 error = xfs_parseargs(mp, (char *)data, args, 0);
1326 if (error) 1764 if (error)
1327 goto fail_vfsop; 1765 goto out_free_mp;
1328 1766
1329 sb_min_blocksize(sb, BBSIZE); 1767 sb_min_blocksize(sb, BBSIZE);
1768 sb->s_xattr = xfs_xattr_handlers;
1330 sb->s_export_op = &xfs_export_operations; 1769 sb->s_export_op = &xfs_export_operations;
1331 sb->s_qcop = &xfs_quotactl_operations; 1770 sb->s_qcop = &xfs_quotactl_operations;
1332 sb->s_op = &xfs_super_operations; 1771 sb->s_op = &xfs_super_operations;
1333 1772
1334 error = xfs_mount(mp, args, NULL); 1773 error = xfs_dmops_get(mp, args);
1335 if (error) 1774 if (error)
1336 goto fail_vfsop; 1775 goto out_free_mp;
1776 error = xfs_qmops_get(mp, args);
1777 if (error)
1778 goto out_put_dmops;
1779
1780 if (args->flags & XFSMNT_QUIET)
1781 flags |= XFS_MFSI_QUIET;
1782
1783 error = xfs_open_devices(mp, args);
1784 if (error)
1785 goto out_put_qmops;
1786
1787 if (xfs_icsb_init_counters(mp))
1788 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
1789
1790 /*
1791 * Setup flags based on mount(2) options and then the superblock
1792 */
1793 error = xfs_start_flags(args, mp);
1794 if (error)
1795 goto out_destroy_counters;
1796 error = xfs_readsb(mp, flags);
1797 if (error)
1798 goto out_destroy_counters;
1799 error = xfs_finish_flags(args, mp);
1800 if (error)
1801 goto out_free_sb;
1802
1803 error = xfs_setup_devices(mp);
1804 if (error)
1805 goto out_free_sb;
1806
1807 if (mp->m_flags & XFS_MOUNT_BARRIER)
1808 xfs_mountfs_check_barriers(mp);
1809
1810 error = xfs_filestream_mount(mp);
1811 if (error)
1812 goto out_free_sb;
1813
1814 error = xfs_mountfs(mp, flags);
1815 if (error)
1816 goto out_filestream_unmount;
1817
1818 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
1337 1819
1338 sb->s_dirt = 1; 1820 sb->s_dirt = 1;
1339 sb->s_magic = XFS_SB_MAGIC; 1821 sb->s_magic = XFS_SB_MAGIC;
@@ -1368,10 +1850,27 @@ xfs_fs_fill_super(
1368 1850
1369 xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); 1851 xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
1370 1852
1371 kmem_free(args, sizeof(*args)); 1853 kfree(args);
1372 return 0; 1854 return 0;
1373 1855
1374fail_vnrele: 1856 out_filestream_unmount:
1857 xfs_filestream_unmount(mp);
1858 out_free_sb:
1859 xfs_freesb(mp);
1860 out_destroy_counters:
1861 xfs_icsb_destroy_counters(mp);
1862 xfs_close_devices(mp);
1863 out_put_qmops:
1864 xfs_qmops_put(mp);
1865 out_put_dmops:
1866 xfs_dmops_put(mp);
1867 out_free_mp:
1868 kfree(mp);
1869 out_free_args:
1870 kfree(args);
1871 return -error;
1872
1873 fail_vnrele:
1375 if (sb->s_root) { 1874 if (sb->s_root) {
1376 dput(sb->s_root); 1875 dput(sb->s_root);
1377 sb->s_root = NULL; 1876 sb->s_root = NULL;
@@ -1379,12 +1878,22 @@ fail_vnrele:
1379 iput(root); 1878 iput(root);
1380 } 1879 }
1381 1880
1382fail_unmount: 1881 fail_unmount:
1383 xfs_unmount(mp, 0, NULL); 1882 /*
1883 * Blow away any referenced inode in the filestreams cache.
1884 * This can and will cause log traffic as inodes go inactive
1885 * here.
1886 */
1887 xfs_filestream_unmount(mp);
1384 1888
1385fail_vfsop: 1889 XFS_bflush(mp->m_ddev_targp);
1386 kmem_free(args, sizeof(*args)); 1890 error = xfs_unmount_flush(mp, 0);
1387 return -error; 1891 WARN_ON(error);
1892
1893 IRELE(mp->m_rootip);
1894
1895 xfs_unmountfs(mp);
1896 goto out_destroy_counters;
1388} 1897}
1389 1898
1390STATIC int 1899STATIC int
@@ -1429,9 +1938,235 @@ static struct file_system_type xfs_fs_type = {
1429 .fs_flags = FS_REQUIRES_DEV, 1938 .fs_flags = FS_REQUIRES_DEV,
1430}; 1939};
1431 1940
1941STATIC int __init
1942xfs_alloc_trace_bufs(void)
1943{
1944#ifdef XFS_ALLOC_TRACE
1945 xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
1946 if (!xfs_alloc_trace_buf)
1947 goto out;
1948#endif
1949#ifdef XFS_BMAP_TRACE
1950 xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
1951 if (!xfs_bmap_trace_buf)
1952 goto out_free_alloc_trace;
1953#endif
1954#ifdef XFS_BMBT_TRACE
1955 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
1956 if (!xfs_bmbt_trace_buf)
1957 goto out_free_bmap_trace;
1958#endif
1959#ifdef XFS_ATTR_TRACE
1960 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
1961 if (!xfs_attr_trace_buf)
1962 goto out_free_bmbt_trace;
1963#endif
1964#ifdef XFS_DIR2_TRACE
1965 xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
1966 if (!xfs_dir2_trace_buf)
1967 goto out_free_attr_trace;
1968#endif
1969
1970 return 0;
1971
1972#ifdef XFS_DIR2_TRACE
1973 out_free_attr_trace:
1974#endif
1975#ifdef XFS_ATTR_TRACE
1976 ktrace_free(xfs_attr_trace_buf);
1977 out_free_bmbt_trace:
1978#endif
1979#ifdef XFS_BMBT_TRACE
1980 ktrace_free(xfs_bmbt_trace_buf);
1981 out_free_bmap_trace:
1982#endif
1983#ifdef XFS_BMAP_TRACE
1984 ktrace_free(xfs_bmap_trace_buf);
1985 out_free_alloc_trace:
1986#endif
1987#ifdef XFS_ALLOC_TRACE
1988 ktrace_free(xfs_alloc_trace_buf);
1989 out:
1990#endif
1991 return -ENOMEM;
1992}
1993
1994STATIC void
1995xfs_free_trace_bufs(void)
1996{
1997#ifdef XFS_DIR2_TRACE
1998 ktrace_free(xfs_dir2_trace_buf);
1999#endif
2000#ifdef XFS_ATTR_TRACE
2001 ktrace_free(xfs_attr_trace_buf);
2002#endif
2003#ifdef XFS_BMBT_TRACE
2004 ktrace_free(xfs_bmbt_trace_buf);
2005#endif
2006#ifdef XFS_BMAP_TRACE
2007 ktrace_free(xfs_bmap_trace_buf);
2008#endif
2009#ifdef XFS_ALLOC_TRACE
2010 ktrace_free(xfs_alloc_trace_buf);
2011#endif
2012}
1432 2013
1433STATIC int __init 2014STATIC int __init
1434init_xfs_fs( void ) 2015xfs_init_zones(void)
2016{
2017 xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
2018 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
2019 KM_ZONE_SPREAD,
2020 xfs_fs_inode_init_once);
2021 if (!xfs_vnode_zone)
2022 goto out;
2023
2024 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
2025 if (!xfs_ioend_zone)
2026 goto out_destroy_vnode_zone;
2027
2028 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
2029 xfs_ioend_zone);
2030 if (!xfs_ioend_pool)
2031 goto out_destroy_ioend_zone;
2032
2033 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
2034 "xfs_log_ticket");
2035 if (!xfs_log_ticket_zone)
2036 goto out_destroy_ioend_pool;
2037
2038 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
2039 "xfs_bmap_free_item");
2040 if (!xfs_bmap_free_item_zone)
2041 goto out_destroy_log_ticket_zone;
2042 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
2043 "xfs_btree_cur");
2044 if (!xfs_btree_cur_zone)
2045 goto out_destroy_bmap_free_item_zone;
2046
2047 xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
2048 "xfs_da_state");
2049 if (!xfs_da_state_zone)
2050 goto out_destroy_btree_cur_zone;
2051
2052 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
2053 if (!xfs_dabuf_zone)
2054 goto out_destroy_da_state_zone;
2055
2056 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
2057 if (!xfs_ifork_zone)
2058 goto out_destroy_dabuf_zone;
2059
2060 xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
2061 if (!xfs_trans_zone)
2062 goto out_destroy_ifork_zone;
2063
2064 /*
2065 * The size of the zone allocated buf log item is the maximum
2066 * size possible under XFS. This wastes a little bit of memory,
2067 * but it is much faster.
2068 */
2069 xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
2070 (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
2071 NBWORD) * sizeof(int))), "xfs_buf_item");
2072 if (!xfs_buf_item_zone)
2073 goto out_destroy_trans_zone;
2074
2075 xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
2076 ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
2077 sizeof(xfs_extent_t))), "xfs_efd_item");
2078 if (!xfs_efd_zone)
2079 goto out_destroy_buf_item_zone;
2080
2081 xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
2082 ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
2083 sizeof(xfs_extent_t))), "xfs_efi_item");
2084 if (!xfs_efi_zone)
2085 goto out_destroy_efd_zone;
2086
2087 xfs_inode_zone =
2088 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
2089 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
2090 KM_ZONE_SPREAD, NULL);
2091 if (!xfs_inode_zone)
2092 goto out_destroy_efi_zone;
2093
2094 xfs_ili_zone =
2095 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
2096 KM_ZONE_SPREAD, NULL);
2097 if (!xfs_ili_zone)
2098 goto out_destroy_inode_zone;
2099
2100#ifdef CONFIG_XFS_POSIX_ACL
2101 xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
2102 if (!xfs_acl_zone)
2103 goto out_destroy_ili_zone;
2104#endif
2105
2106 return 0;
2107
2108#ifdef CONFIG_XFS_POSIX_ACL
2109 out_destroy_ili_zone:
2110#endif
2111 kmem_zone_destroy(xfs_ili_zone);
2112 out_destroy_inode_zone:
2113 kmem_zone_destroy(xfs_inode_zone);
2114 out_destroy_efi_zone:
2115 kmem_zone_destroy(xfs_efi_zone);
2116 out_destroy_efd_zone:
2117 kmem_zone_destroy(xfs_efd_zone);
2118 out_destroy_buf_item_zone:
2119 kmem_zone_destroy(xfs_buf_item_zone);
2120 out_destroy_trans_zone:
2121 kmem_zone_destroy(xfs_trans_zone);
2122 out_destroy_ifork_zone:
2123 kmem_zone_destroy(xfs_ifork_zone);
2124 out_destroy_dabuf_zone:
2125 kmem_zone_destroy(xfs_dabuf_zone);
2126 out_destroy_da_state_zone:
2127 kmem_zone_destroy(xfs_da_state_zone);
2128 out_destroy_btree_cur_zone:
2129 kmem_zone_destroy(xfs_btree_cur_zone);
2130 out_destroy_bmap_free_item_zone:
2131 kmem_zone_destroy(xfs_bmap_free_item_zone);
2132 out_destroy_log_ticket_zone:
2133 kmem_zone_destroy(xfs_log_ticket_zone);
2134 out_destroy_ioend_pool:
2135 mempool_destroy(xfs_ioend_pool);
2136 out_destroy_ioend_zone:
2137 kmem_zone_destroy(xfs_ioend_zone);
2138 out_destroy_vnode_zone:
2139 kmem_zone_destroy(xfs_vnode_zone);
2140 out:
2141 return -ENOMEM;
2142}
2143
2144STATIC void
2145xfs_destroy_zones(void)
2146{
2147#ifdef CONFIG_XFS_POSIX_ACL
2148 kmem_zone_destroy(xfs_acl_zone);
2149#endif
2150 kmem_zone_destroy(xfs_ili_zone);
2151 kmem_zone_destroy(xfs_inode_zone);
2152 kmem_zone_destroy(xfs_efi_zone);
2153 kmem_zone_destroy(xfs_efd_zone);
2154 kmem_zone_destroy(xfs_buf_item_zone);
2155 kmem_zone_destroy(xfs_trans_zone);
2156 kmem_zone_destroy(xfs_ifork_zone);
2157 kmem_zone_destroy(xfs_dabuf_zone);
2158 kmem_zone_destroy(xfs_da_state_zone);
2159 kmem_zone_destroy(xfs_btree_cur_zone);
2160 kmem_zone_destroy(xfs_bmap_free_item_zone);
2161 kmem_zone_destroy(xfs_log_ticket_zone);
2162 mempool_destroy(xfs_ioend_pool);
2163 kmem_zone_destroy(xfs_ioend_zone);
2164 kmem_zone_destroy(xfs_vnode_zone);
2165
2166}
2167
2168STATIC int __init
2169init_xfs_fs(void)
1435{ 2170{
1436 int error; 2171 int error;
1437 static char message[] __initdata = KERN_INFO \ 2172 static char message[] __initdata = KERN_INFO \
@@ -1440,42 +2175,73 @@ init_xfs_fs( void )
1440 printk(message); 2175 printk(message);
1441 2176
1442 ktrace_init(64); 2177 ktrace_init(64);
2178 vn_init();
2179 xfs_dir_startup();
1443 2180
1444 error = xfs_init_zones(); 2181 error = xfs_init_zones();
1445 if (error < 0) 2182 if (error)
1446 goto undo_zones; 2183 goto out;
2184
2185 error = xfs_alloc_trace_bufs();
2186 if (error)
2187 goto out_destroy_zones;
2188
2189 error = xfs_mru_cache_init();
2190 if (error)
2191 goto out_free_trace_buffers;
2192
2193 error = xfs_filestream_init();
2194 if (error)
2195 goto out_mru_cache_uninit;
1447 2196
1448 error = xfs_buf_init(); 2197 error = xfs_buf_init();
1449 if (error < 0) 2198 if (error)
1450 goto undo_buffers; 2199 goto out_filestream_uninit;
2200
2201 error = xfs_init_procfs();
2202 if (error)
2203 goto out_buf_terminate;
2204
2205 error = xfs_sysctl_register();
2206 if (error)
2207 goto out_cleanup_procfs;
1451 2208
1452 vn_init();
1453 xfs_init();
1454 uuid_init();
1455 vfs_initquota(); 2209 vfs_initquota();
1456 2210
1457 error = register_filesystem(&xfs_fs_type); 2211 error = register_filesystem(&xfs_fs_type);
1458 if (error) 2212 if (error)
1459 goto undo_register; 2213 goto out_sysctl_unregister;
1460 return 0; 2214 return 0;
1461 2215
1462undo_register: 2216 out_sysctl_unregister:
2217 xfs_sysctl_unregister();
2218 out_cleanup_procfs:
2219 xfs_cleanup_procfs();
2220 out_buf_terminate:
1463 xfs_buf_terminate(); 2221 xfs_buf_terminate();
1464 2222 out_filestream_uninit:
1465undo_buffers: 2223 xfs_filestream_uninit();
2224 out_mru_cache_uninit:
2225 xfs_mru_cache_uninit();
2226 out_free_trace_buffers:
2227 xfs_free_trace_bufs();
2228 out_destroy_zones:
1466 xfs_destroy_zones(); 2229 xfs_destroy_zones();
1467 2230 out:
1468undo_zones:
1469 return error; 2231 return error;
1470} 2232}
1471 2233
1472STATIC void __exit 2234STATIC void __exit
1473exit_xfs_fs( void ) 2235exit_xfs_fs(void)
1474{ 2236{
1475 vfs_exitquota(); 2237 vfs_exitquota();
1476 unregister_filesystem(&xfs_fs_type); 2238 unregister_filesystem(&xfs_fs_type);
1477 xfs_cleanup(); 2239 xfs_sysctl_unregister();
2240 xfs_cleanup_procfs();
1478 xfs_buf_terminate(); 2241 xfs_buf_terminate();
2242 xfs_filestream_uninit();
2243 xfs_mru_cache_uninit();
2244 xfs_free_trace_bufs();
1479 xfs_destroy_zones(); 2245 xfs_destroy_zones();
1480 ktrace_uninit(); 2246 ktrace_uninit();
1481} 2247}
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 3efb7c6d3303..b7d13da01bd6 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -107,12 +107,10 @@ extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp,
107extern void xfs_flush_inode(struct xfs_inode *); 107extern void xfs_flush_inode(struct xfs_inode *);
108extern void xfs_flush_device(struct xfs_inode *); 108extern void xfs_flush_device(struct xfs_inode *);
109 109
110extern int xfs_blkdev_get(struct xfs_mount *, const char *,
111 struct block_device **);
112extern void xfs_blkdev_put(struct block_device *);
113extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); 110extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
114 111
115extern const struct export_operations xfs_export_operations; 112extern const struct export_operations xfs_export_operations;
113extern struct xattr_handler *xfs_xattr_handlers[];
116 114
117#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) 115#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
118 116
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index bb997d75c05c..7dacb5bbde3f 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -259,15 +259,17 @@ static ctl_table xfs_root_table[] = {
259 {} 259 {}
260}; 260};
261 261
262void 262int
263xfs_sysctl_register(void) 263xfs_sysctl_register(void)
264{ 264{
265 xfs_table_header = register_sysctl_table(xfs_root_table); 265 xfs_table_header = register_sysctl_table(xfs_root_table);
266 if (!xfs_table_header)
267 return -ENOMEM;
268 return 0;
266} 269}
267 270
268void 271void
269xfs_sysctl_unregister(void) 272xfs_sysctl_unregister(void)
270{ 273{
271 if (xfs_table_header) 274 unregister_sysctl_table(xfs_table_header);
272 unregister_sysctl_table(xfs_table_header);
273} 275}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 98b97e399d6f..4aadb8056c37 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -93,10 +93,10 @@ enum {
93extern xfs_param_t xfs_params; 93extern xfs_param_t xfs_params;
94 94
95#ifdef CONFIG_SYSCTL 95#ifdef CONFIG_SYSCTL
96extern void xfs_sysctl_register(void); 96extern int xfs_sysctl_register(void);
97extern void xfs_sysctl_unregister(void); 97extern void xfs_sysctl_unregister(void);
98#else 98#else
99# define xfs_sysctl_register() do { } while (0) 99# define xfs_sysctl_register() (0)
100# define xfs_sysctl_unregister() do { } while (0) 100# define xfs_sysctl_unregister() do { } while (0)
101#endif /* CONFIG_SYSCTL */ 101#endif /* CONFIG_SYSCTL */
102 102
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index bc7afe007338..25488b6d9881 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -82,56 +82,6 @@ vn_ioerror(
82 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); 82 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
83} 83}
84 84
85/*
86 * Revalidate the Linux inode from the XFS inode.
87 * Note: i_size _not_ updated; we must hold the inode
88 * semaphore when doing that - callers responsibility.
89 */
90int
91vn_revalidate(
92 bhv_vnode_t *vp)
93{
94 struct inode *inode = vn_to_inode(vp);
95 struct xfs_inode *ip = XFS_I(inode);
96 struct xfs_mount *mp = ip->i_mount;
97 unsigned long xflags;
98
99 xfs_itrace_entry(ip);
100
101 if (XFS_FORCED_SHUTDOWN(mp))
102 return -EIO;
103
104 xfs_ilock(ip, XFS_ILOCK_SHARED);
105 inode->i_mode = ip->i_d.di_mode;
106 inode->i_uid = ip->i_d.di_uid;
107 inode->i_gid = ip->i_d.di_gid;
108 inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
109 inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
110 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
111 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
112
113 xflags = xfs_ip2xflags(ip);
114 if (xflags & XFS_XFLAG_IMMUTABLE)
115 inode->i_flags |= S_IMMUTABLE;
116 else
117 inode->i_flags &= ~S_IMMUTABLE;
118 if (xflags & XFS_XFLAG_APPEND)
119 inode->i_flags |= S_APPEND;
120 else
121 inode->i_flags &= ~S_APPEND;
122 if (xflags & XFS_XFLAG_SYNC)
123 inode->i_flags |= S_SYNC;
124 else
125 inode->i_flags &= ~S_SYNC;
126 if (xflags & XFS_XFLAG_NOATIME)
127 inode->i_flags |= S_NOATIME;
128 else
129 inode->i_flags &= ~S_NOATIME;
130 xfs_iunlock(ip, XFS_ILOCK_SHARED);
131
132 xfs_iflags_clear(ip, XFS_IMODIFIED);
133 return 0;
134}
135 85
136/* 86/*
137 * Add a reference to a referenced vnode. 87 * Add a reference to a referenced vnode.
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 25eb2a9e8d9b..41ca2cec5d31 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -19,7 +19,6 @@
19#define __XFS_VNODE_H__ 19#define __XFS_VNODE_H__
20 20
21struct file; 21struct file;
22struct bhv_vattr;
23struct xfs_iomap; 22struct xfs_iomap;
24struct attrlist_cursor_kern; 23struct attrlist_cursor_kern;
25 24
@@ -66,87 +65,8 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
66 Prevent VM access to the pages until 65 Prevent VM access to the pages until
67 the operation completes. */ 66 the operation completes. */
68 67
69/*
70 * Vnode attributes. va_mask indicates those attributes the caller
71 * wants to set or extract.
72 */
73typedef struct bhv_vattr {
74 int va_mask; /* bit-mask of attributes present */
75 mode_t va_mode; /* file access mode and type */
76 xfs_nlink_t va_nlink; /* number of references to file */
77 uid_t va_uid; /* owner user id */
78 gid_t va_gid; /* owner group id */
79 xfs_ino_t va_nodeid; /* file id */
80 xfs_off_t va_size; /* file size in bytes */
81 u_long va_blocksize; /* blocksize preferred for i/o */
82 struct timespec va_atime; /* time of last access */
83 struct timespec va_mtime; /* time of last modification */
84 struct timespec va_ctime; /* time file changed */
85 u_int va_gen; /* generation number of file */
86 xfs_dev_t va_rdev; /* device the special file represents */
87 __int64_t va_nblocks; /* number of blocks allocated */
88 u_long va_xflags; /* random extended file flags */
89 u_long va_extsize; /* file extent size */
90 u_long va_nextents; /* number of extents in file */
91 u_long va_anextents; /* number of attr extents in file */
92 prid_t va_projid; /* project id */
93} bhv_vattr_t;
94
95/*
96 * setattr or getattr attributes
97 */
98#define XFS_AT_TYPE 0x00000001
99#define XFS_AT_MODE 0x00000002
100#define XFS_AT_UID 0x00000004
101#define XFS_AT_GID 0x00000008
102#define XFS_AT_FSID 0x00000010
103#define XFS_AT_NODEID 0x00000020
104#define XFS_AT_NLINK 0x00000040
105#define XFS_AT_SIZE 0x00000080
106#define XFS_AT_ATIME 0x00000100
107#define XFS_AT_MTIME 0x00000200
108#define XFS_AT_CTIME 0x00000400
109#define XFS_AT_RDEV 0x00000800
110#define XFS_AT_BLKSIZE 0x00001000
111#define XFS_AT_NBLOCKS 0x00002000
112#define XFS_AT_VCODE 0x00004000
113#define XFS_AT_MAC 0x00008000
114#define XFS_AT_UPDATIME 0x00010000
115#define XFS_AT_UPDMTIME 0x00020000
116#define XFS_AT_UPDCTIME 0x00040000
117#define XFS_AT_ACL 0x00080000
118#define XFS_AT_CAP 0x00100000
119#define XFS_AT_INF 0x00200000
120#define XFS_AT_XFLAGS 0x00400000
121#define XFS_AT_EXTSIZE 0x00800000
122#define XFS_AT_NEXTENTS 0x01000000
123#define XFS_AT_ANEXTENTS 0x02000000
124#define XFS_AT_PROJID 0x04000000
125#define XFS_AT_SIZE_NOPERM 0x08000000
126#define XFS_AT_GENCOUNT 0x10000000
127
128#define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
129 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
130 XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
131 XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
132 XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
133 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
134
135#define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
136 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
137 XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
138 XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
139
140#define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
141
142#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
143
144#define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
145 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
146 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
147 68
148extern void vn_init(void); 69extern void vn_init(void);
149extern int vn_revalidate(bhv_vnode_t *);
150 70
151/* 71/*
152 * Yeah, these don't take vnode anymore at all, all this should be 72 * Yeah, these don't take vnode anymore at all, all this should be
@@ -219,15 +139,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
219#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \ 139#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \
220 PAGECACHE_TAG_DIRTY) 140 PAGECACHE_TAG_DIRTY)
221 141
222/*
223 * Flags to vop_setattr/getattr.
224 */
225#define ATTR_UTIME 0x01 /* non-default utime(2) request */
226#define ATTR_DMI 0x08 /* invocation from a DMI function */
227#define ATTR_LAZY 0x80 /* set/get attributes lazily */
228#define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */
229#define ATTR_NOLOCK 0x200 /* Don't grab any conflicting locks */
230#define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */
231 142
232/* 143/*
233 * Tracking vnode activity. 144 * Tracking vnode activity.
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
new file mode 100644
index 000000000000..964621fde6ed
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -0,0 +1,330 @@
1/*
2 * Copyright (C) 2008 Christoph Hellwig.
3 * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "xfs.h"
20#include "xfs_da_btree.h"
21#include "xfs_bmap_btree.h"
22#include "xfs_inode.h"
23#include "xfs_attr.h"
24#include "xfs_attr_leaf.h"
25#include "xfs_acl.h"
26#include "xfs_vnodeops.h"
27
28#include <linux/posix_acl_xattr.h>
29#include <linux/xattr.h>
30
31
32/*
33 * ACL handling. Should eventually be moved into xfs_acl.c
34 */
35
36static int
37xfs_decode_acl(const char *name)
38{
39 if (strcmp(name, "posix_acl_access") == 0)
40 return _ACL_TYPE_ACCESS;
41 else if (strcmp(name, "posix_acl_default") == 0)
42 return _ACL_TYPE_DEFAULT;
43 return -EINVAL;
44}
45
46/*
47 * Get system extended attributes which at the moment only
48 * includes Posix ACLs.
49 */
50static int
51xfs_xattr_system_get(struct inode *inode, const char *name,
52 void *buffer, size_t size)
53{
54 int acl;
55
56 acl = xfs_decode_acl(name);
57 if (acl < 0)
58 return acl;
59
60 return xfs_acl_vget(inode, buffer, size, acl);
61}
62
63static int
64xfs_xattr_system_set(struct inode *inode, const char *name,
65 const void *value, size_t size, int flags)
66{
67 int acl;
68
69 acl = xfs_decode_acl(name);
70 if (acl < 0)
71 return acl;
72 if (flags & XATTR_CREATE)
73 return -EINVAL;
74
75 if (!value)
76 return xfs_acl_vremove(inode, acl);
77
78 return xfs_acl_vset(inode, (void *)value, size, acl);
79}
80
81static struct xattr_handler xfs_xattr_system_handler = {
82 .prefix = XATTR_SYSTEM_PREFIX,
83 .get = xfs_xattr_system_get,
84 .set = xfs_xattr_system_set,
85};
86
87
88/*
89 * Real xattr handling. The only difference between the namespaces is
90 * a flag passed to the low-level attr code.
91 */
92
93static int
94__xfs_xattr_get(struct inode *inode, const char *name,
95 void *value, size_t size, int xflags)
96{
97 struct xfs_inode *ip = XFS_I(inode);
98 int error, asize = size;
99
100 if (strcmp(name, "") == 0)
101 return -EINVAL;
102
103 /* Convert Linux syscall to XFS internal ATTR flags */
104 if (!size) {
105 xflags |= ATTR_KERNOVAL;
106 value = NULL;
107 }
108
109 error = -xfs_attr_get(ip, name, value, &asize, xflags);
110 if (error)
111 return error;
112 return asize;
113}
114
115static int
116__xfs_xattr_set(struct inode *inode, const char *name, const void *value,
117 size_t size, int flags, int xflags)
118{
119 struct xfs_inode *ip = XFS_I(inode);
120
121 if (strcmp(name, "") == 0)
122 return -EINVAL;
123
124 /* Convert Linux syscall to XFS internal ATTR flags */
125 if (flags & XATTR_CREATE)
126 xflags |= ATTR_CREATE;
127 if (flags & XATTR_REPLACE)
128 xflags |= ATTR_REPLACE;
129
130 if (!value)
131 return -xfs_attr_remove(ip, name, xflags);
132 return -xfs_attr_set(ip, name, (void *)value, size, xflags);
133}
134
135static int
136xfs_xattr_user_get(struct inode *inode, const char *name,
137 void *value, size_t size)
138{
139 return __xfs_xattr_get(inode, name, value, size, 0);
140}
141
142static int
143xfs_xattr_user_set(struct inode *inode, const char *name,
144 const void *value, size_t size, int flags)
145{
146 return __xfs_xattr_set(inode, name, value, size, flags, 0);
147}
148
149static struct xattr_handler xfs_xattr_user_handler = {
150 .prefix = XATTR_USER_PREFIX,
151 .get = xfs_xattr_user_get,
152 .set = xfs_xattr_user_set,
153};
154
155
156static int
157xfs_xattr_trusted_get(struct inode *inode, const char *name,
158 void *value, size_t size)
159{
160 return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
161}
162
163static int
164xfs_xattr_trusted_set(struct inode *inode, const char *name,
165 const void *value, size_t size, int flags)
166{
167 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
168}
169
170static struct xattr_handler xfs_xattr_trusted_handler = {
171 .prefix = XATTR_TRUSTED_PREFIX,
172 .get = xfs_xattr_trusted_get,
173 .set = xfs_xattr_trusted_set,
174};
175
176
177static int
178xfs_xattr_secure_get(struct inode *inode, const char *name,
179 void *value, size_t size)
180{
181 return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
182}
183
184static int
185xfs_xattr_secure_set(struct inode *inode, const char *name,
186 const void *value, size_t size, int flags)
187{
188 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
189}
190
191static struct xattr_handler xfs_xattr_security_handler = {
192 .prefix = XATTR_SECURITY_PREFIX,
193 .get = xfs_xattr_secure_get,
194 .set = xfs_xattr_secure_set,
195};
196
197
198struct xattr_handler *xfs_xattr_handlers[] = {
199 &xfs_xattr_user_handler,
200 &xfs_xattr_trusted_handler,
201 &xfs_xattr_security_handler,
202 &xfs_xattr_system_handler,
203 NULL
204};
205
206static unsigned int xfs_xattr_prefix_len(int flags)
207{
208 if (flags & XFS_ATTR_SECURE)
209 return sizeof("security");
210 else if (flags & XFS_ATTR_ROOT)
211 return sizeof("trusted");
212 else
213 return sizeof("user");
214}
215
216static const char *xfs_xattr_prefix(int flags)
217{
218 if (flags & XFS_ATTR_SECURE)
219 return xfs_xattr_security_handler.prefix;
220 else if (flags & XFS_ATTR_ROOT)
221 return xfs_xattr_trusted_handler.prefix;
222 else
223 return xfs_xattr_user_handler.prefix;
224}
225
226static int
227xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
228 char *name, int namelen, int valuelen, char *value)
229{
230 unsigned int prefix_len = xfs_xattr_prefix_len(flags);
231 char *offset;
232 int arraytop;
233
234 ASSERT(context->count >= 0);
235
236 /*
237 * Only show root namespace entries if we are actually allowed to
238 * see them.
239 */
240 if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
241 return 0;
242
243 arraytop = context->count + prefix_len + namelen + 1;
244 if (arraytop > context->firstu) {
245 context->count = -1; /* insufficient space */
246 return 1;
247 }
248 offset = (char *)context->alist + context->count;
249 strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
250 offset += prefix_len;
251 strncpy(offset, name, namelen); /* real name */
252 offset += namelen;
253 *offset = '\0';
254 context->count += prefix_len + namelen + 1;
255 return 0;
256}
257
258static int
259xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags,
260 char *name, int namelen, int valuelen, char *value)
261{
262 context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
263 return 0;
264}
265
266static int
267list_one_attr(const char *name, const size_t len, void *data,
268 size_t size, ssize_t *result)
269{
270 char *p = data + *result;
271
272 *result += len;
273 if (!size)
274 return 0;
275 if (*result > size)
276 return -ERANGE;
277
278 strcpy(p, name);
279 return 0;
280}
281
282ssize_t
283xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
284{
285 struct xfs_attr_list_context context;
286 struct attrlist_cursor_kern cursor = { 0 };
287 struct inode *inode = dentry->d_inode;
288 int error;
289
290 /*
291 * First read the regular on-disk attributes.
292 */
293 memset(&context, 0, sizeof(context));
294 context.dp = XFS_I(inode);
295 context.cursor = &cursor;
296 context.resynch = 1;
297 context.alist = data;
298 context.bufsize = size;
299 context.firstu = context.bufsize;
300
301 if (size)
302 context.put_listent = xfs_xattr_put_listent;
303 else
304 context.put_listent = xfs_xattr_put_listent_sizes;
305
306 xfs_attr_list_int(&context);
307 if (context.count < 0)
308 return -ERANGE;
309
310 /*
311 * Then add the two synthetic ACL attributes.
312 */
313 if (xfs_acl_vhasacl_access(inode)) {
314 error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
315 strlen(POSIX_ACL_XATTR_ACCESS) + 1,
316 data, size, &context.count);
317 if (error)
318 return error;
319 }
320
321 if (xfs_acl_vhasacl_default(inode)) {
322 error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
323 strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
324 data, size, &context.count);
325 if (error)
326 return error;
327 }
328
329 return context.count;
330}
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 85df3288efd5..fc9f3fb39b7b 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1435,8 +1435,7 @@ xfs_dqlock2(
1435/* ARGSUSED */ 1435/* ARGSUSED */
1436int 1436int
1437xfs_qm_dqpurge( 1437xfs_qm_dqpurge(
1438 xfs_dquot_t *dqp, 1438 xfs_dquot_t *dqp)
1439 uint flags)
1440{ 1439{
1441 xfs_dqhash_t *thishash; 1440 xfs_dqhash_t *thishash;
1442 xfs_mount_t *mp = dqp->q_mount; 1441 xfs_mount_t *mp = dqp->q_mount;
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 5c371a92e3e2..f7393bba4e95 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -164,7 +164,7 @@ extern void xfs_qm_dqprint(xfs_dquot_t *);
164 164
165extern void xfs_qm_dqdestroy(xfs_dquot_t *); 165extern void xfs_qm_dqdestroy(xfs_dquot_t *);
166extern int xfs_qm_dqflush(xfs_dquot_t *, uint); 166extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
167extern int xfs_qm_dqpurge(xfs_dquot_t *, uint); 167extern int xfs_qm_dqpurge(xfs_dquot_t *);
168extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 168extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
169extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); 169extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
170extern int xfs_qm_dqflock_nowait(xfs_dquot_t *); 170extern int xfs_qm_dqflock_nowait(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 36e05ca78412..08d2fc89e6a1 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -576,8 +576,8 @@ xfs_qm_qoffend_logitem_committed(
576 * xfs_trans_delete_ail() drops the AIL lock. 576 * xfs_trans_delete_ail() drops the AIL lock.
577 */ 577 */
578 xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); 578 xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs);
579 kmem_free(qfs, sizeof(xfs_qoff_logitem_t)); 579 kmem_free(qfs);
580 kmem_free(qfe, sizeof(xfs_qoff_logitem_t)); 580 kmem_free(qfe);
581 return (xfs_lsn_t)-1; 581 return (xfs_lsn_t)-1;
582} 582}
583 583
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index d31cce1165c5..021934a3d456 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -192,8 +192,8 @@ xfs_qm_destroy(
192 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 192 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
193 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); 193 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
194 } 194 }
195 kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t)); 195 kmem_free(xqm->qm_usr_dqhtable);
196 kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t)); 196 kmem_free(xqm->qm_grp_dqhtable);
197 xqm->qm_usr_dqhtable = NULL; 197 xqm->qm_usr_dqhtable = NULL;
198 xqm->qm_grp_dqhtable = NULL; 198 xqm->qm_grp_dqhtable = NULL;
199 xqm->qm_dqhashmask = 0; 199 xqm->qm_dqhashmask = 0;
@@ -201,7 +201,7 @@ xfs_qm_destroy(
201#ifdef DEBUG 201#ifdef DEBUG
202 mutex_destroy(&qcheck_lock); 202 mutex_destroy(&qcheck_lock);
203#endif 203#endif
204 kmem_free(xqm, sizeof(xfs_qm_t)); 204 kmem_free(xqm);
205} 205}
206 206
207/* 207/*
@@ -445,11 +445,11 @@ xfs_qm_unmount_quotas(
445 } 445 }
446 } 446 }
447 if (uqp) { 447 if (uqp) {
448 XFS_PURGE_INODE(uqp); 448 IRELE(uqp);
449 mp->m_quotainfo->qi_uquotaip = NULL; 449 mp->m_quotainfo->qi_uquotaip = NULL;
450 } 450 }
451 if (gqp) { 451 if (gqp) {
452 XFS_PURGE_INODE(gqp); 452 IRELE(gqp);
453 mp->m_quotainfo->qi_gquotaip = NULL; 453 mp->m_quotainfo->qi_gquotaip = NULL;
454 } 454 }
455out: 455out:
@@ -631,7 +631,7 @@ xfs_qm_dqpurge_int(
631 * freelist in INACTIVE state. 631 * freelist in INACTIVE state.
632 */ 632 */
633 nextdqp = dqp->MPL_NEXT; 633 nextdqp = dqp->MPL_NEXT;
634 nmisses += xfs_qm_dqpurge(dqp, flags); 634 nmisses += xfs_qm_dqpurge(dqp);
635 dqp = nextdqp; 635 dqp = nextdqp;
636 } 636 }
637 xfs_qm_mplist_unlock(mp); 637 xfs_qm_mplist_unlock(mp);
@@ -1134,7 +1134,7 @@ xfs_qm_init_quotainfo(
1134 * and change the superblock accordingly. 1134 * and change the superblock accordingly.
1135 */ 1135 */
1136 if ((error = xfs_qm_init_quotainos(mp))) { 1136 if ((error = xfs_qm_init_quotainos(mp))) {
1137 kmem_free(qinf, sizeof(xfs_quotainfo_t)); 1137 kmem_free(qinf);
1138 mp->m_quotainfo = NULL; 1138 mp->m_quotainfo = NULL;
1139 return error; 1139 return error;
1140 } 1140 }
@@ -1240,15 +1240,15 @@ xfs_qm_destroy_quotainfo(
1240 xfs_qm_list_destroy(&qi->qi_dqlist); 1240 xfs_qm_list_destroy(&qi->qi_dqlist);
1241 1241
1242 if (qi->qi_uquotaip) { 1242 if (qi->qi_uquotaip) {
1243 XFS_PURGE_INODE(qi->qi_uquotaip); 1243 IRELE(qi->qi_uquotaip);
1244 qi->qi_uquotaip = NULL; /* paranoia */ 1244 qi->qi_uquotaip = NULL; /* paranoia */
1245 } 1245 }
1246 if (qi->qi_gquotaip) { 1246 if (qi->qi_gquotaip) {
1247 XFS_PURGE_INODE(qi->qi_gquotaip); 1247 IRELE(qi->qi_gquotaip);
1248 qi->qi_gquotaip = NULL; 1248 qi->qi_gquotaip = NULL;
1249 } 1249 }
1250 mutex_destroy(&qi->qi_quotaofflock); 1250 mutex_destroy(&qi->qi_quotaofflock);
1251 kmem_free(qi, sizeof(xfs_quotainfo_t)); 1251 kmem_free(qi);
1252 mp->m_quotainfo = NULL; 1252 mp->m_quotainfo = NULL;
1253} 1253}
1254 1254
@@ -1394,7 +1394,7 @@ xfs_qm_qino_alloc(
1394 * locked exclusively and joined to the transaction already. 1394 * locked exclusively and joined to the transaction already.
1395 */ 1395 */
1396 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL)); 1396 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1397 VN_HOLD(XFS_ITOV((*ip))); 1397 IHOLD(*ip);
1398 1398
1399 /* 1399 /*
1400 * Make the changes in the superblock, and log those too. 1400 * Make the changes in the superblock, and log those too.
@@ -1623,7 +1623,7 @@ xfs_qm_dqiterate(
1623 break; 1623 break;
1624 } while (nmaps > 0); 1624 } while (nmaps > 0);
1625 1625
1626 kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map)); 1626 kmem_free(map);
1627 1627
1628 return error; 1628 return error;
1629} 1629}
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 768a3b27d2b6..adfb8723f65a 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -362,11 +362,11 @@ xfs_qm_scall_quotaoff(
362 * if we don't need them anymore. 362 * if we don't need them anymore.
363 */ 363 */
364 if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) { 364 if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
365 XFS_PURGE_INODE(XFS_QI_UQIP(mp)); 365 IRELE(XFS_QI_UQIP(mp));
366 XFS_QI_UQIP(mp) = NULL; 366 XFS_QI_UQIP(mp) = NULL;
367 } 367 }
368 if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) { 368 if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) {
369 XFS_PURGE_INODE(XFS_QI_GQIP(mp)); 369 IRELE(XFS_QI_GQIP(mp));
370 XFS_QI_GQIP(mp) = NULL; 370 XFS_QI_GQIP(mp) = NULL;
371 } 371 }
372out_error: 372out_error:
@@ -1449,14 +1449,14 @@ xfs_qm_internalqcheck(
1449 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { 1449 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
1450 xfs_dqtest_cmp(d); 1450 xfs_dqtest_cmp(d);
1451 e = (xfs_dqtest_t *) d->HL_NEXT; 1451 e = (xfs_dqtest_t *) d->HL_NEXT;
1452 kmem_free(d, sizeof(xfs_dqtest_t)); 1452 kmem_free(d);
1453 d = e; 1453 d = e;
1454 } 1454 }
1455 h1 = &qmtest_gdqtab[i]; 1455 h1 = &qmtest_gdqtab[i];
1456 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { 1456 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
1457 xfs_dqtest_cmp(d); 1457 xfs_dqtest_cmp(d);
1458 e = (xfs_dqtest_t *) d->HL_NEXT; 1458 e = (xfs_dqtest_t *) d->HL_NEXT;
1459 kmem_free(d, sizeof(xfs_dqtest_t)); 1459 kmem_free(d);
1460 d = e; 1460 d = e;
1461 } 1461 }
1462 } 1462 }
@@ -1467,8 +1467,8 @@ xfs_qm_internalqcheck(
1467 } else { 1467 } else {
1468 cmn_err(CE_DEBUG, "******** quotacheck successful! ********"); 1468 cmn_err(CE_DEBUG, "******** quotacheck successful! ********");
1469 } 1469 }
1470 kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t)); 1470 kmem_free(qmtest_udqtab);
1471 kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t)); 1471 kmem_free(qmtest_gdqtab);
1472 mutex_unlock(&qcheck_lock); 1472 mutex_unlock(&qcheck_lock);
1473 return (qmtest_nfails); 1473 return (qmtest_nfails);
1474} 1474}
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 5e4a40b1c565..c4fcea600bc2 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -158,9 +158,6 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
158#define XFS_IS_SUSER_DQUOT(dqp) \ 158#define XFS_IS_SUSER_DQUOT(dqp) \
159 (!((dqp)->q_core.d_id)) 159 (!((dqp)->q_core.d_id))
160 160
161#define XFS_PURGE_INODE(ip) \
162 IRELE(ip);
163
164#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ 161#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
165 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ 162 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
166 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) 163 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 0b75d302508f..a34ef05489b1 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -89,7 +89,7 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
89 if (sleep & KM_SLEEP) 89 if (sleep & KM_SLEEP)
90 panic("ktrace_alloc: NULL memory on KM_SLEEP request!"); 90 panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
91 91
92 kmem_free(ktp, sizeof(*ktp)); 92 kmem_free(ktp);
93 93
94 return NULL; 94 return NULL;
95 } 95 }
@@ -126,7 +126,7 @@ ktrace_free(ktrace_t *ktp)
126 } else { 126 } else {
127 entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t)); 127 entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t));
128 128
129 kmem_free(ktp->kt_entries, entries_size); 129 kmem_free(ktp->kt_entries);
130 } 130 }
131 131
132 kmem_zone_free(ktrace_hdr_zone, ktp); 132 kmem_zone_free(ktrace_hdr_zone, ktp);
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 493a6ecf8590..5830c040ea7e 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -17,7 +17,7 @@
17 */ 17 */
18#include <xfs.h> 18#include <xfs.h>
19 19
20static mutex_t uuid_monitor; 20static DEFINE_MUTEX(uuid_monitor);
21static int uuid_table_size; 21static int uuid_table_size;
22static uuid_t *uuid_table; 22static uuid_t *uuid_table;
23 23
@@ -132,9 +132,3 @@ uuid_table_remove(uuid_t *uuid)
132 ASSERT(i < uuid_table_size); 132 ASSERT(i < uuid_table_size);
133 mutex_unlock(&uuid_monitor); 133 mutex_unlock(&uuid_monitor);
134} 134}
135
136void __init
137uuid_init(void)
138{
139 mutex_init(&uuid_monitor);
140}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
index b6f5922199ba..cff5b607d445 100644
--- a/fs/xfs/support/uuid.h
+++ b/fs/xfs/support/uuid.h
@@ -22,7 +22,6 @@ typedef struct {
22 unsigned char __u_bits[16]; 22 unsigned char __u_bits[16];
23} uuid_t; 23} uuid_t;
24 24
25extern void uuid_init(void);
26extern void uuid_create_nil(uuid_t *uuid); 25extern void uuid_create_nil(uuid_t *uuid);
27extern int uuid_is_nil(uuid_t *uuid); 26extern int uuid_is_nil(uuid_t *uuid);
28extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); 27extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index ebee3a4f703a..3e4648ad9cfc 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -341,8 +341,7 @@ xfs_acl_iaccess(
341 341
342 /* If the file has no ACL return -1. */ 342 /* If the file has no ACL return -1. */
343 rval = sizeof(xfs_acl_t); 343 rval = sizeof(xfs_acl_t);
344 if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, 344 if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
345 ATTR_ROOT | ATTR_KERNACCESS)) {
346 _ACL_FREE(acl); 345 _ACL_FREE(acl);
347 return -1; 346 return -1;
348 } 347 }
@@ -720,7 +719,7 @@ xfs_acl_setmode(
720 xfs_acl_t *acl, 719 xfs_acl_t *acl,
721 int *basicperms) 720 int *basicperms)
722{ 721{
723 bhv_vattr_t va; 722 struct iattr iattr;
724 xfs_acl_entry_t *ap; 723 xfs_acl_entry_t *ap;
725 xfs_acl_entry_t *gap = NULL; 724 xfs_acl_entry_t *gap = NULL;
726 int i, nomask = 1; 725 int i, nomask = 1;
@@ -734,25 +733,25 @@ xfs_acl_setmode(
734 * Copy the u::, g::, o::, and m:: bits from the ACL into the 733 * Copy the u::, g::, o::, and m:: bits from the ACL into the
735 * mode. The m:: bits take precedence over the g:: bits. 734 * mode. The m:: bits take precedence over the g:: bits.
736 */ 735 */
737 va.va_mask = XFS_AT_MODE; 736 iattr.ia_valid = ATTR_MODE;
738 va.va_mode = xfs_vtoi(vp)->i_d.di_mode; 737 iattr.ia_mode = xfs_vtoi(vp)->i_d.di_mode;
739 va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); 738 iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
740 ap = acl->acl_entry; 739 ap = acl->acl_entry;
741 for (i = 0; i < acl->acl_cnt; ++i) { 740 for (i = 0; i < acl->acl_cnt; ++i) {
742 switch (ap->ae_tag) { 741 switch (ap->ae_tag) {
743 case ACL_USER_OBJ: 742 case ACL_USER_OBJ:
744 va.va_mode |= ap->ae_perm << 6; 743 iattr.ia_mode |= ap->ae_perm << 6;
745 break; 744 break;
746 case ACL_GROUP_OBJ: 745 case ACL_GROUP_OBJ:
747 gap = ap; 746 gap = ap;
748 break; 747 break;
749 case ACL_MASK: /* more than just standard modes */ 748 case ACL_MASK: /* more than just standard modes */
750 nomask = 0; 749 nomask = 0;
751 va.va_mode |= ap->ae_perm << 3; 750 iattr.ia_mode |= ap->ae_perm << 3;
752 *basicperms = 0; 751 *basicperms = 0;
753 break; 752 break;
754 case ACL_OTHER: 753 case ACL_OTHER:
755 va.va_mode |= ap->ae_perm; 754 iattr.ia_mode |= ap->ae_perm;
756 break; 755 break;
757 default: /* more than just standard modes */ 756 default: /* more than just standard modes */
758 *basicperms = 0; 757 *basicperms = 0;
@@ -763,9 +762,9 @@ xfs_acl_setmode(
763 762
764 /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */ 763 /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
765 if (gap && nomask) 764 if (gap && nomask)
766 va.va_mode |= gap->ae_perm << 3; 765 iattr.ia_mode |= gap->ae_perm << 3;
767 766
768 return xfs_setattr(xfs_vtoi(vp), &va, 0, sys_cred); 767 return xfs_setattr(xfs_vtoi(vp), &iattr, 0, sys_cred);
769} 768}
770 769
771/* 770/*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 332a772461c4..323ee94cf831 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,6 +46,8 @@ typedef struct xfs_acl {
46#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) 46#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
47#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 47#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
48 48
49#define _ACL_TYPE_ACCESS 1
50#define _ACL_TYPE_DEFAULT 2
49 51
50#ifdef CONFIG_XFS_POSIX_ACL 52#ifdef CONFIG_XFS_POSIX_ACL
51 53
@@ -66,8 +68,6 @@ extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int);
66extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int); 68extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int);
67extern int xfs_acl_vremove(bhv_vnode_t *, int); 69extern int xfs_acl_vremove(bhv_vnode_t *, int);
68 70
69#define _ACL_TYPE_ACCESS 1
70#define _ACL_TYPE_DEFAULT 2
71#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) 71#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
72 72
73#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d)) 73#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d))
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index df151a859186..78de80e3caa2 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -16,8 +16,6 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 18
19#include <linux/capability.h>
20
21#include "xfs.h" 19#include "xfs.h"
22#include "xfs_fs.h" 20#include "xfs_fs.h"
23#include "xfs_types.h" 21#include "xfs_types.h"
@@ -57,11 +55,6 @@
57 * Provide the external interfaces to manage attribute lists. 55 * Provide the external interfaces to manage attribute lists.
58 */ 56 */
59 57
60#define ATTR_SYSCOUNT 2
61static struct attrnames posix_acl_access;
62static struct attrnames posix_acl_default;
63static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
64
65/*======================================================================== 58/*========================================================================
66 * Function prototypes for the kernel. 59 * Function prototypes for the kernel.
67 *========================================================================*/ 60 *========================================================================*/
@@ -116,6 +109,17 @@ xfs_attr_name_to_xname(
116 return 0; 109 return 0;
117} 110}
118 111
112STATIC int
113xfs_inode_hasattr(
114 struct xfs_inode *ip)
115{
116 if (!XFS_IFORK_Q(ip) ||
117 (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
118 ip->i_d.di_anextents == 0))
119 return 0;
120 return 1;
121}
122
119/*======================================================================== 123/*========================================================================
120 * Overall external interface routines. 124 * Overall external interface routines.
121 *========================================================================*/ 125 *========================================================================*/
@@ -127,10 +131,8 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
127 xfs_da_args_t args; 131 xfs_da_args_t args;
128 int error; 132 int error;
129 133
130 if ((XFS_IFORK_Q(ip) == 0) || 134 if (!xfs_inode_hasattr(ip))
131 (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && 135 return ENOATTR;
132 ip->i_d.di_anextents == 0))
133 return(ENOATTR);
134 136
135 /* 137 /*
136 * Fill in the arg structure for this request. 138 * Fill in the arg structure for this request.
@@ -148,11 +150,7 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
148 /* 150 /*
149 * Decide on what work routines to call based on the inode size. 151 * Decide on what work routines to call based on the inode size.
150 */ 152 */
151 if (XFS_IFORK_Q(ip) == 0 || 153 if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
152 (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
153 ip->i_d.di_anextents == 0)) {
154 error = XFS_ERROR(ENOATTR);
155 } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
156 error = xfs_attr_shortform_getvalue(&args); 154 error = xfs_attr_shortform_getvalue(&args);
157 } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) { 155 } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
158 error = xfs_attr_leaf_get(&args); 156 error = xfs_attr_leaf_get(&args);
@@ -241,8 +239,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
241 args.firstblock = &firstblock; 239 args.firstblock = &firstblock;
242 args.flist = &flist; 240 args.flist = &flist;
243 args.whichfork = XFS_ATTR_FORK; 241 args.whichfork = XFS_ATTR_FORK;
244 args.addname = 1; 242 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
245 args.oknoent = 1;
246 243
247 /* 244 /*
248 * Determine space new attribute will use, and if it would be 245 * Determine space new attribute will use, and if it would be
@@ -529,9 +526,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
529 /* 526 /*
530 * Decide on what work routines to call based on the inode size. 527 * Decide on what work routines to call based on the inode size.
531 */ 528 */
532 if (XFS_IFORK_Q(dp) == 0 || 529 if (!xfs_inode_hasattr(dp)) {
533 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
534 dp->i_d.di_anextents == 0)) {
535 error = XFS_ERROR(ENOATTR); 530 error = XFS_ERROR(ENOATTR);
536 goto out; 531 goto out;
537 } 532 }
@@ -601,29 +596,33 @@ xfs_attr_remove(
601 return error; 596 return error;
602 597
603 xfs_ilock(dp, XFS_ILOCK_SHARED); 598 xfs_ilock(dp, XFS_ILOCK_SHARED);
604 if (XFS_IFORK_Q(dp) == 0 || 599 if (!xfs_inode_hasattr(dp)) {
605 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
606 dp->i_d.di_anextents == 0)) {
607 xfs_iunlock(dp, XFS_ILOCK_SHARED); 600 xfs_iunlock(dp, XFS_ILOCK_SHARED);
608 return(XFS_ERROR(ENOATTR)); 601 return XFS_ERROR(ENOATTR);
609 } 602 }
610 xfs_iunlock(dp, XFS_ILOCK_SHARED); 603 xfs_iunlock(dp, XFS_ILOCK_SHARED);
611 604
612 return xfs_attr_remove_int(dp, &xname, flags); 605 return xfs_attr_remove_int(dp, &xname, flags);
613} 606}
614 607
615STATIC int 608int
616xfs_attr_list_int(xfs_attr_list_context_t *context) 609xfs_attr_list_int(xfs_attr_list_context_t *context)
617{ 610{
618 int error; 611 int error;
619 xfs_inode_t *dp = context->dp; 612 xfs_inode_t *dp = context->dp;
620 613
614 XFS_STATS_INC(xs_attr_list);
615
616 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
617 return EIO;
618
619 xfs_ilock(dp, XFS_ILOCK_SHARED);
620 xfs_attr_trace_l_c("syscall start", context);
621
621 /* 622 /*
622 * Decide on what work routines to call based on the inode size. 623 * Decide on what work routines to call based on the inode size.
623 */ 624 */
624 if (XFS_IFORK_Q(dp) == 0 || 625 if (!xfs_inode_hasattr(dp)) {
625 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
626 dp->i_d.di_anextents == 0)) {
627 error = 0; 626 error = 0;
628 } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { 627 } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
629 error = xfs_attr_shortform_list(context); 628 error = xfs_attr_shortform_list(context);
@@ -632,6 +631,10 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
632 } else { 631 } else {
633 error = xfs_attr_node_list(context); 632 error = xfs_attr_node_list(context);
634 } 633 }
634
635 xfs_iunlock(dp, XFS_ILOCK_SHARED);
636 xfs_attr_trace_l_c("syscall end", context);
637
635 return error; 638 return error;
636} 639}
637 640
@@ -648,74 +651,50 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
648 */ 651 */
649/*ARGSUSED*/ 652/*ARGSUSED*/
650STATIC int 653STATIC int
651xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp, 654xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
652 char *name, int namelen, 655 char *name, int namelen,
653 int valuelen, char *value) 656 int valuelen, char *value)
654{ 657{
658 struct attrlist *alist = (struct attrlist *)context->alist;
655 attrlist_ent_t *aep; 659 attrlist_ent_t *aep;
656 int arraytop; 660 int arraytop;
657 661
658 ASSERT(!(context->flags & ATTR_KERNOVAL)); 662 ASSERT(!(context->flags & ATTR_KERNOVAL));
659 ASSERT(context->count >= 0); 663 ASSERT(context->count >= 0);
660 ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); 664 ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
661 ASSERT(context->firstu >= sizeof(*context->alist)); 665 ASSERT(context->firstu >= sizeof(*alist));
662 ASSERT(context->firstu <= context->bufsize); 666 ASSERT(context->firstu <= context->bufsize);
663 667
664 arraytop = sizeof(*context->alist) + 668 /*
665 context->count * sizeof(context->alist->al_offset[0]); 669 * Only list entries in the right namespace.
670 */
671 if (((context->flags & ATTR_SECURE) == 0) !=
672 ((flags & XFS_ATTR_SECURE) == 0))
673 return 0;
674 if (((context->flags & ATTR_ROOT) == 0) !=
675 ((flags & XFS_ATTR_ROOT) == 0))
676 return 0;
677
678 arraytop = sizeof(*alist) +
679 context->count * sizeof(alist->al_offset[0]);
666 context->firstu -= ATTR_ENTSIZE(namelen); 680 context->firstu -= ATTR_ENTSIZE(namelen);
667 if (context->firstu < arraytop) { 681 if (context->firstu < arraytop) {
668 xfs_attr_trace_l_c("buffer full", context); 682 xfs_attr_trace_l_c("buffer full", context);
669 context->alist->al_more = 1; 683 alist->al_more = 1;
670 context->seen_enough = 1; 684 context->seen_enough = 1;
671 return 1; 685 return 1;
672 } 686 }
673 687
674 aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]); 688 aep = (attrlist_ent_t *)&context->alist[context->firstu];
675 aep->a_valuelen = valuelen; 689 aep->a_valuelen = valuelen;
676 memcpy(aep->a_name, name, namelen); 690 memcpy(aep->a_name, name, namelen);
677 aep->a_name[ namelen ] = 0; 691 aep->a_name[namelen] = 0;
678 context->alist->al_offset[ context->count++ ] = context->firstu; 692 alist->al_offset[context->count++] = context->firstu;
679 context->alist->al_count = context->count; 693 alist->al_count = context->count;
680 xfs_attr_trace_l_c("add", context); 694 xfs_attr_trace_l_c("add", context);
681 return 0; 695 return 0;
682} 696}
683 697
684STATIC int
685xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
686 char *name, int namelen,
687 int valuelen, char *value)
688{
689 char *offset;
690 int arraytop;
691
692 ASSERT(context->count >= 0);
693
694 arraytop = context->count + namesp->attr_namelen + namelen + 1;
695 if (arraytop > context->firstu) {
696 context->count = -1; /* insufficient space */
697 return 1;
698 }
699 offset = (char *)context->alist + context->count;
700 strncpy(offset, namesp->attr_name, namesp->attr_namelen);
701 offset += namesp->attr_namelen;
702 strncpy(offset, name, namelen); /* real name */
703 offset += namelen;
704 *offset = '\0';
705 context->count += namesp->attr_namelen + namelen + 1;
706 return 0;
707}
708
709/*ARGSUSED*/
710STATIC int
711xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
712 char *name, int namelen,
713 int valuelen, char *value)
714{
715 context->count += namesp->attr_namelen + namelen + 1;
716 return 0;
717}
718
719/* 698/*
720 * Generate a list of extended attribute names and optionally 699 * Generate a list of extended attribute names and optionally
721 * also value lengths. Positive return value follows the XFS 700 * also value lengths. Positive return value follows the XFS
@@ -732,10 +711,9 @@ xfs_attr_list(
732 attrlist_cursor_kern_t *cursor) 711 attrlist_cursor_kern_t *cursor)
733{ 712{
734 xfs_attr_list_context_t context; 713 xfs_attr_list_context_t context;
714 struct attrlist *alist;
735 int error; 715 int error;
736 716
737 XFS_STATS_INC(xs_attr_list);
738
739 /* 717 /*
740 * Validate the cursor. 718 * Validate the cursor.
741 */ 719 */
@@ -756,52 +734,23 @@ xfs_attr_list(
756 /* 734 /*
757 * Initialize the output buffer. 735 * Initialize the output buffer.
758 */ 736 */
737 memset(&context, 0, sizeof(context));
759 context.dp = dp; 738 context.dp = dp;
760 context.cursor = cursor; 739 context.cursor = cursor;
761 context.count = 0;
762 context.dupcnt = 0;
763 context.resynch = 1; 740 context.resynch = 1;
764 context.flags = flags; 741 context.flags = flags;
765 context.seen_enough = 0; 742 context.alist = buffer;
766 context.alist = (attrlist_t *)buffer; 743 context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
767 context.put_value = 0; 744 context.firstu = context.bufsize;
768 745 context.put_listent = xfs_attr_put_listent;
769 if (flags & ATTR_KERNAMELS) {
770 context.bufsize = bufsize;
771 context.firstu = context.bufsize;
772 if (flags & ATTR_KERNOVAL)
773 context.put_listent = xfs_attr_kern_list_sizes;
774 else
775 context.put_listent = xfs_attr_kern_list;
776 } else {
777 context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
778 context.firstu = context.bufsize;
779 context.alist->al_count = 0;
780 context.alist->al_more = 0;
781 context.alist->al_offset[0] = context.bufsize;
782 context.put_listent = xfs_attr_put_listent;
783 }
784 746
785 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 747 alist = (struct attrlist *)context.alist;
786 return EIO; 748 alist->al_count = 0;
787 749 alist->al_more = 0;
788 xfs_ilock(dp, XFS_ILOCK_SHARED); 750 alist->al_offset[0] = context.bufsize;
789 xfs_attr_trace_l_c("syscall start", &context);
790 751
791 error = xfs_attr_list_int(&context); 752 error = xfs_attr_list_int(&context);
792 753 ASSERT(error >= 0);
793 xfs_iunlock(dp, XFS_ILOCK_SHARED);
794 xfs_attr_trace_l_c("syscall end", &context);
795
796 if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
797 /* must return negated buffer size or the error */
798 if (context.count < 0)
799 error = XFS_ERROR(ERANGE);
800 else
801 error = -context.count;
802 } else
803 ASSERT(error >= 0);
804
805 return error; 754 return error;
806} 755}
807 756
@@ -816,12 +765,10 @@ xfs_attr_inactive(xfs_inode_t *dp)
816 ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); 765 ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
817 766
818 xfs_ilock(dp, XFS_ILOCK_SHARED); 767 xfs_ilock(dp, XFS_ILOCK_SHARED);
819 if ((XFS_IFORK_Q(dp) == 0) || 768 if (!xfs_inode_hasattr(dp) ||
820 (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || 769 dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
821 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
822 dp->i_d.di_anextents == 0)) {
823 xfs_iunlock(dp, XFS_ILOCK_SHARED); 770 xfs_iunlock(dp, XFS_ILOCK_SHARED);
824 return(0); 771 return 0;
825 } 772 }
826 xfs_iunlock(dp, XFS_ILOCK_SHARED); 773 xfs_iunlock(dp, XFS_ILOCK_SHARED);
827 774
@@ -854,10 +801,8 @@ xfs_attr_inactive(xfs_inode_t *dp)
854 /* 801 /*
855 * Decide on what work routines to call based on the inode size. 802 * Decide on what work routines to call based on the inode size.
856 */ 803 */
857 if ((XFS_IFORK_Q(dp) == 0) || 804 if (!xfs_inode_hasattr(dp) ||
858 (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || 805 dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
859 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
860 dp->i_d.di_anextents == 0)) {
861 error = 0; 806 error = 0;
862 goto out; 807 goto out;
863 } 808 }
@@ -974,7 +919,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
974 xfs_da_brelse(args->trans, bp); 919 xfs_da_brelse(args->trans, bp);
975 return(retval); 920 return(retval);
976 } 921 }
977 args->rename = 1; /* an atomic rename */ 922 args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
978 args->blkno2 = args->blkno; /* set 2nd entry info*/ 923 args->blkno2 = args->blkno; /* set 2nd entry info*/
979 args->index2 = args->index; 924 args->index2 = args->index;
980 args->rmtblkno2 = args->rmtblkno; 925 args->rmtblkno2 = args->rmtblkno;
@@ -1054,7 +999,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1054 * so that one disappears and one appears atomically. Then we 999 * so that one disappears and one appears atomically. Then we
1055 * must remove the "old" attribute/value pair. 1000 * must remove the "old" attribute/value pair.
1056 */ 1001 */
1057 if (args->rename) { 1002 if (args->op_flags & XFS_DA_OP_RENAME) {
1058 /* 1003 /*
1059 * In a separate transaction, set the incomplete flag on the 1004 * In a separate transaction, set the incomplete flag on the
1060 * "old" attr and clear the incomplete flag on the "new" attr. 1005 * "old" attr and clear the incomplete flag on the "new" attr.
@@ -1307,7 +1252,7 @@ restart:
1307 } else if (retval == EEXIST) { 1252 } else if (retval == EEXIST) {
1308 if (args->flags & ATTR_CREATE) 1253 if (args->flags & ATTR_CREATE)
1309 goto out; 1254 goto out;
1310 args->rename = 1; /* atomic rename op */ 1255 args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
1311 args->blkno2 = args->blkno; /* set 2nd entry info*/ 1256 args->blkno2 = args->blkno; /* set 2nd entry info*/
1312 args->index2 = args->index; 1257 args->index2 = args->index;
1313 args->rmtblkno2 = args->rmtblkno; 1258 args->rmtblkno2 = args->rmtblkno;
@@ -1425,7 +1370,7 @@ restart:
1425 * so that one disappears and one appears atomically. Then we 1370 * so that one disappears and one appears atomically. Then we
1426 * must remove the "old" attribute/value pair. 1371 * must remove the "old" attribute/value pair.
1427 */ 1372 */
1428 if (args->rename) { 1373 if (args->op_flags & XFS_DA_OP_RENAME) {
1429 /* 1374 /*
1430 * In a separate transaction, set the incomplete flag on the 1375 * In a separate transaction, set the incomplete flag on the
1431 * "old" attr and clear the incomplete flag on the "new" attr. 1376 * "old" attr and clear the incomplete flag on the "new" attr.
@@ -2300,23 +2245,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2300void 2245void
2301xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context) 2246xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
2302{ 2247{
2303 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, 2248 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context,
2304 (__psunsigned_t)context->dp,
2305 (__psunsigned_t)context->cursor->hashval,
2306 (__psunsigned_t)context->cursor->blkno,
2307 (__psunsigned_t)context->cursor->offset,
2308 (__psunsigned_t)context->alist,
2309 (__psunsigned_t)context->bufsize,
2310 (__psunsigned_t)context->count,
2311 (__psunsigned_t)context->firstu,
2312 (__psunsigned_t)
2313 ((context->count > 0) &&
2314 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2315 ? (ATTR_ENTRY(context->alist,
2316 context->count-1)->a_valuelen)
2317 : 0,
2318 (__psunsigned_t)context->dupcnt,
2319 (__psunsigned_t)context->flags,
2320 (__psunsigned_t)NULL, 2249 (__psunsigned_t)NULL,
2321 (__psunsigned_t)NULL, 2250 (__psunsigned_t)NULL,
2322 (__psunsigned_t)NULL); 2251 (__psunsigned_t)NULL);
@@ -2329,23 +2258,7 @@ void
2329xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context, 2258xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
2330 struct xfs_da_intnode *node) 2259 struct xfs_da_intnode *node)
2331{ 2260{
2332 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, 2261 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context,
2333 (__psunsigned_t)context->dp,
2334 (__psunsigned_t)context->cursor->hashval,
2335 (__psunsigned_t)context->cursor->blkno,
2336 (__psunsigned_t)context->cursor->offset,
2337 (__psunsigned_t)context->alist,
2338 (__psunsigned_t)context->bufsize,
2339 (__psunsigned_t)context->count,
2340 (__psunsigned_t)context->firstu,
2341 (__psunsigned_t)
2342 ((context->count > 0) &&
2343 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2344 ? (ATTR_ENTRY(context->alist,
2345 context->count-1)->a_valuelen)
2346 : 0,
2347 (__psunsigned_t)context->dupcnt,
2348 (__psunsigned_t)context->flags,
2349 (__psunsigned_t)be16_to_cpu(node->hdr.count), 2262 (__psunsigned_t)be16_to_cpu(node->hdr.count),
2350 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval), 2263 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
2351 (__psunsigned_t)be32_to_cpu(node->btree[ 2264 (__psunsigned_t)be32_to_cpu(node->btree[
@@ -2359,23 +2272,7 @@ void
2359xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context, 2272xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
2360 struct xfs_da_node_entry *btree) 2273 struct xfs_da_node_entry *btree)
2361{ 2274{
2362 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, 2275 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context,
2363 (__psunsigned_t)context->dp,
2364 (__psunsigned_t)context->cursor->hashval,
2365 (__psunsigned_t)context->cursor->blkno,
2366 (__psunsigned_t)context->cursor->offset,
2367 (__psunsigned_t)context->alist,
2368 (__psunsigned_t)context->bufsize,
2369 (__psunsigned_t)context->count,
2370 (__psunsigned_t)context->firstu,
2371 (__psunsigned_t)
2372 ((context->count > 0) &&
2373 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2374 ? (ATTR_ENTRY(context->alist,
2375 context->count-1)->a_valuelen)
2376 : 0,
2377 (__psunsigned_t)context->dupcnt,
2378 (__psunsigned_t)context->flags,
2379 (__psunsigned_t)be32_to_cpu(btree->hashval), 2276 (__psunsigned_t)be32_to_cpu(btree->hashval),
2380 (__psunsigned_t)be32_to_cpu(btree->before), 2277 (__psunsigned_t)be32_to_cpu(btree->before),
2381 (__psunsigned_t)NULL); 2278 (__psunsigned_t)NULL);
@@ -2388,23 +2285,7 @@ void
2388xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, 2285xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2389 struct xfs_attr_leafblock *leaf) 2286 struct xfs_attr_leafblock *leaf)
2390{ 2287{
2391 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, 2288 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context,
2392 (__psunsigned_t)context->dp,
2393 (__psunsigned_t)context->cursor->hashval,
2394 (__psunsigned_t)context->cursor->blkno,
2395 (__psunsigned_t)context->cursor->offset,
2396 (__psunsigned_t)context->alist,
2397 (__psunsigned_t)context->bufsize,
2398 (__psunsigned_t)context->count,
2399 (__psunsigned_t)context->firstu,
2400 (__psunsigned_t)
2401 ((context->count > 0) &&
2402 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2403 ? (ATTR_ENTRY(context->alist,
2404 context->count-1)->a_valuelen)
2405 : 0,
2406 (__psunsigned_t)context->dupcnt,
2407 (__psunsigned_t)context->flags,
2408 (__psunsigned_t)be16_to_cpu(leaf->hdr.count), 2289 (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
2409 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval), 2290 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
2410 (__psunsigned_t)be32_to_cpu(leaf->entries[ 2291 (__psunsigned_t)be32_to_cpu(leaf->entries[
@@ -2417,329 +2298,24 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2417 */ 2298 */
2418void 2299void
2419xfs_attr_trace_enter(int type, char *where, 2300xfs_attr_trace_enter(int type, char *where,
2420 __psunsigned_t a2, __psunsigned_t a3, 2301 struct xfs_attr_list_context *context,
2421 __psunsigned_t a4, __psunsigned_t a5, 2302 __psunsigned_t a13, __psunsigned_t a14,
2422 __psunsigned_t a6, __psunsigned_t a7, 2303 __psunsigned_t a15)
2423 __psunsigned_t a8, __psunsigned_t a9,
2424 __psunsigned_t a10, __psunsigned_t a11,
2425 __psunsigned_t a12, __psunsigned_t a13,
2426 __psunsigned_t a14, __psunsigned_t a15)
2427{ 2304{
2428 ASSERT(xfs_attr_trace_buf); 2305 ASSERT(xfs_attr_trace_buf);
2429 ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type), 2306 ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
2430 (void *)where, 2307 (void *)((__psunsigned_t)where),
2431 (void *)a2, (void *)a3, (void *)a4, 2308 (void *)((__psunsigned_t)context->dp),
2432 (void *)a5, (void *)a6, (void *)a7, 2309 (void *)((__psunsigned_t)context->cursor->hashval),
2433 (void *)a8, (void *)a9, (void *)a10, 2310 (void *)((__psunsigned_t)context->cursor->blkno),
2434 (void *)a11, (void *)a12, (void *)a13, 2311 (void *)((__psunsigned_t)context->cursor->offset),
2435 (void *)a14, (void *)a15); 2312 (void *)((__psunsigned_t)context->alist),
2313 (void *)((__psunsigned_t)context->bufsize),
2314 (void *)((__psunsigned_t)context->count),
2315 (void *)((__psunsigned_t)context->firstu),
2316 NULL,
2317 (void *)((__psunsigned_t)context->dupcnt),
2318 (void *)((__psunsigned_t)context->flags),
2319 (void *)a13, (void *)a14, (void *)a15);
2436} 2320}
2437#endif /* XFS_ATTR_TRACE */ 2321#endif /* XFS_ATTR_TRACE */
2438
2439
2440/*========================================================================
2441 * System (pseudo) namespace attribute interface routines.
2442 *========================================================================*/
2443
2444STATIC int
2445posix_acl_access_set(
2446 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2447{
2448 return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
2449}
2450
2451STATIC int
2452posix_acl_access_remove(
2453 bhv_vnode_t *vp, char *name, int xflags)
2454{
2455 return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
2456}
2457
2458STATIC int
2459posix_acl_access_get(
2460 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2461{
2462 return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
2463}
2464
2465STATIC int
2466posix_acl_access_exists(
2467 bhv_vnode_t *vp)
2468{
2469 return xfs_acl_vhasacl_access(vp);
2470}
2471
2472STATIC int
2473posix_acl_default_set(
2474 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2475{
2476 return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
2477}
2478
2479STATIC int
2480posix_acl_default_get(
2481 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2482{
2483 return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
2484}
2485
2486STATIC int
2487posix_acl_default_remove(
2488 bhv_vnode_t *vp, char *name, int xflags)
2489{
2490 return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
2491}
2492
2493STATIC int
2494posix_acl_default_exists(
2495 bhv_vnode_t *vp)
2496{
2497 return xfs_acl_vhasacl_default(vp);
2498}
2499
2500static struct attrnames posix_acl_access = {
2501 .attr_name = "posix_acl_access",
2502 .attr_namelen = sizeof("posix_acl_access") - 1,
2503 .attr_get = posix_acl_access_get,
2504 .attr_set = posix_acl_access_set,
2505 .attr_remove = posix_acl_access_remove,
2506 .attr_exists = posix_acl_access_exists,
2507};
2508
2509static struct attrnames posix_acl_default = {
2510 .attr_name = "posix_acl_default",
2511 .attr_namelen = sizeof("posix_acl_default") - 1,
2512 .attr_get = posix_acl_default_get,
2513 .attr_set = posix_acl_default_set,
2514 .attr_remove = posix_acl_default_remove,
2515 .attr_exists = posix_acl_default_exists,
2516};
2517
2518static struct attrnames *attr_system_names[] =
2519 { &posix_acl_access, &posix_acl_default };
2520
2521
2522/*========================================================================
2523 * Namespace-prefix-style attribute name interface routines.
2524 *========================================================================*/
2525
2526STATIC int
2527attr_generic_set(
2528 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2529{
2530 return -xfs_attr_set(xfs_vtoi(vp), name, data, size, xflags);
2531}
2532
2533STATIC int
2534attr_generic_get(
2535 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2536{
2537 int error, asize = size;
2538
2539 error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
2540 if (!error)
2541 return asize;
2542 return -error;
2543}
2544
2545STATIC int
2546attr_generic_remove(
2547 bhv_vnode_t *vp, char *name, int xflags)
2548{
2549 return -xfs_attr_remove(xfs_vtoi(vp), name, xflags);
2550}
2551
2552STATIC int
2553attr_generic_listadd(
2554 attrnames_t *prefix,
2555 attrnames_t *namesp,
2556 void *data,
2557 size_t size,
2558 ssize_t *result)
2559{
2560 char *p = data + *result;
2561
2562 *result += prefix->attr_namelen;
2563 *result += namesp->attr_namelen + 1;
2564 if (!size)
2565 return 0;
2566 if (*result > size)
2567 return -ERANGE;
2568 strcpy(p, prefix->attr_name);
2569 p += prefix->attr_namelen;
2570 strcpy(p, namesp->attr_name);
2571 p += namesp->attr_namelen + 1;
2572 return 0;
2573}
2574
2575STATIC int
2576attr_system_list(
2577 bhv_vnode_t *vp,
2578 void *data,
2579 size_t size,
2580 ssize_t *result)
2581{
2582 attrnames_t *namesp;
2583 int i, error = 0;
2584
2585 for (i = 0; i < ATTR_SYSCOUNT; i++) {
2586 namesp = attr_system_names[i];
2587 if (!namesp->attr_exists || !namesp->attr_exists(vp))
2588 continue;
2589 error = attr_generic_listadd(&attr_system, namesp,
2590 data, size, result);
2591 if (error)
2592 break;
2593 }
2594 return error;
2595}
2596
2597int
2598attr_generic_list(
2599 bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
2600{
2601 attrlist_cursor_kern_t cursor = { 0 };
2602 int error;
2603
2604 error = xfs_attr_list(xfs_vtoi(vp), data, size, xflags, &cursor);
2605 if (error > 0)
2606 return -error;
2607 *result = -error;
2608 return attr_system_list(vp, data, size, result);
2609}
2610
2611attrnames_t *
2612attr_lookup_namespace(
2613 char *name,
2614 struct attrnames **names,
2615 int nnames)
2616{
2617 int i;
2618
2619 for (i = 0; i < nnames; i++)
2620 if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
2621 return names[i];
2622 return NULL;
2623}
2624
2625/*
2626 * Some checks to prevent people abusing EAs to get over quota:
2627 * - Don't allow modifying user EAs on devices/symlinks;
2628 * - Don't allow modifying user EAs if sticky bit set;
2629 */
2630STATIC int
2631attr_user_capable(
2632 bhv_vnode_t *vp,
2633 cred_t *cred)
2634{
2635 struct inode *inode = vn_to_inode(vp);
2636
2637 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2638 return -EPERM;
2639 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
2640 !capable(CAP_SYS_ADMIN))
2641 return -EPERM;
2642 if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
2643 (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
2644 return -EPERM;
2645 return 0;
2646}
2647
2648STATIC int
2649attr_trusted_capable(
2650 bhv_vnode_t *vp,
2651 cred_t *cred)
2652{
2653 struct inode *inode = vn_to_inode(vp);
2654
2655 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2656 return -EPERM;
2657 if (!capable(CAP_SYS_ADMIN))
2658 return -EPERM;
2659 return 0;
2660}
2661
2662STATIC int
2663attr_system_set(
2664 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2665{
2666 attrnames_t *namesp;
2667 int error;
2668
2669 if (xflags & ATTR_CREATE)
2670 return -EINVAL;
2671
2672 namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2673 if (!namesp)
2674 return -EOPNOTSUPP;
2675 error = namesp->attr_set(vp, name, data, size, xflags);
2676 if (!error)
2677 error = vn_revalidate(vp);
2678 return error;
2679}
2680
2681STATIC int
2682attr_system_get(
2683 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2684{
2685 attrnames_t *namesp;
2686
2687 namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2688 if (!namesp)
2689 return -EOPNOTSUPP;
2690 return namesp->attr_get(vp, name, data, size, xflags);
2691}
2692
2693STATIC int
2694attr_system_remove(
2695 bhv_vnode_t *vp, char *name, int xflags)
2696{
2697 attrnames_t *namesp;
2698
2699 namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2700 if (!namesp)
2701 return -EOPNOTSUPP;
2702 return namesp->attr_remove(vp, name, xflags);
2703}
2704
2705struct attrnames attr_system = {
2706 .attr_name = "system.",
2707 .attr_namelen = sizeof("system.") - 1,
2708 .attr_flag = ATTR_SYSTEM,
2709 .attr_get = attr_system_get,
2710 .attr_set = attr_system_set,
2711 .attr_remove = attr_system_remove,
2712 .attr_capable = (attrcapable_t)fs_noerr,
2713};
2714
2715struct attrnames attr_trusted = {
2716 .attr_name = "trusted.",
2717 .attr_namelen = sizeof("trusted.") - 1,
2718 .attr_flag = ATTR_ROOT,
2719 .attr_get = attr_generic_get,
2720 .attr_set = attr_generic_set,
2721 .attr_remove = attr_generic_remove,
2722 .attr_capable = attr_trusted_capable,
2723};
2724
2725struct attrnames attr_secure = {
2726 .attr_name = "security.",
2727 .attr_namelen = sizeof("security.") - 1,
2728 .attr_flag = ATTR_SECURE,
2729 .attr_get = attr_generic_get,
2730 .attr_set = attr_generic_set,
2731 .attr_remove = attr_generic_remove,
2732 .attr_capable = (attrcapable_t)fs_noerr,
2733};
2734
2735struct attrnames attr_user = {
2736 .attr_name = "user.",
2737 .attr_namelen = sizeof("user.") - 1,
2738 .attr_get = attr_generic_get,
2739 .attr_set = attr_generic_set,
2740 .attr_remove = attr_generic_remove,
2741 .attr_capable = attr_user_capable,
2742};
2743
2744struct attrnames *attr_namespaces[] =
2745 { &attr_system, &attr_trusted, &attr_secure, &attr_user };
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 6cfc9384fe35..8b2d31c19e4d 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -18,9 +18,11 @@
18#ifndef __XFS_ATTR_H__ 18#ifndef __XFS_ATTR_H__
19#define __XFS_ATTR_H__ 19#define __XFS_ATTR_H__
20 20
21struct xfs_inode;
22struct xfs_da_args;
23struct xfs_attr_list_context;
24
21/* 25/*
22 * xfs_attr.h
23 *
24 * Large attribute lists are structured around Btrees where all the data 26 * Large attribute lists are structured around Btrees where all the data
25 * elements are in the leaf nodes. Attribute names are hashed into an int, 27 * elements are in the leaf nodes. Attribute names are hashed into an int,
26 * then that int is used as the index into the Btree. Since the hashval 28 * then that int is used as the index into the Btree. Since the hashval
@@ -35,35 +37,6 @@
35 * External interfaces 37 * External interfaces
36 *========================================================================*/ 38 *========================================================================*/
37 39
38struct cred;
39struct xfs_attr_list_context;
40
41typedef int (*attrset_t)(bhv_vnode_t *, char *, void *, size_t, int);
42typedef int (*attrget_t)(bhv_vnode_t *, char *, void *, size_t, int);
43typedef int (*attrremove_t)(bhv_vnode_t *, char *, int);
44typedef int (*attrexists_t)(bhv_vnode_t *);
45typedef int (*attrcapable_t)(bhv_vnode_t *, struct cred *);
46
47typedef struct attrnames {
48 char * attr_name;
49 unsigned int attr_namelen;
50 unsigned int attr_flag;
51 attrget_t attr_get;
52 attrset_t attr_set;
53 attrremove_t attr_remove;
54 attrexists_t attr_exists;
55 attrcapable_t attr_capable;
56} attrnames_t;
57
58#define ATTR_NAMECOUNT 4
59extern struct attrnames attr_user;
60extern struct attrnames attr_secure;
61extern struct attrnames attr_system;
62extern struct attrnames attr_trusted;
63extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
64
65extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int);
66extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
67 40
68#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */ 41#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */
69#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */ 42#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */
@@ -71,16 +44,9 @@ extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
71#define ATTR_SECURE 0x0008 /* use attrs in security namespace */ 44#define ATTR_SECURE 0x0008 /* use attrs in security namespace */
72#define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */ 45#define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */
73#define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */ 46#define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */
74#define ATTR_SYSTEM 0x0100 /* use attrs in system (pseudo) namespace */
75 47
76#define ATTR_KERNACCESS 0x0400 /* [kernel] iaccess, inode held io-locked */
77#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ 48#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */
78#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ 49#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
79#define ATTR_KERNAMELS 0x4000 /* [kernel] list attr names (simple list) */
80
81#define ATTR_KERNORMALS 0x0800 /* [kernel] normal attr list: user+secure */
82#define ATTR_KERNROOTLS 0x8000 /* [kernel] include root in the attr list */
83#define ATTR_KERNFULLS (ATTR_KERNORMALS|ATTR_KERNROOTLS)
84 50
85/* 51/*
86 * The maximum size (into the kernel or returned from the kernel) of an 52 * The maximum size (into the kernel or returned from the kernel) of an
@@ -119,22 +85,6 @@ typedef struct attrlist_ent { /* data from attr_list() */
119 &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ]) 85 &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ])
120 86
121/* 87/*
122 * Multi-attribute operation vector.
123 */
124typedef struct attr_multiop {
125 int am_opcode; /* operation to perform (ATTR_OP_GET, etc.) */
126 int am_error; /* [out arg] result of this sub-op (an errno) */
127 char *am_attrname; /* attribute name to work with */
128 char *am_attrvalue; /* [in/out arg] attribute value (raw bytes) */
129 int am_length; /* [in/out arg] length of value */
130 int am_flags; /* bitwise OR of attr API flags defined above */
131} attr_multiop_t;
132
133#define ATTR_OP_GET 1 /* return the indicated attr's value */
134#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */
135#define ATTR_OP_REMOVE 3 /* remove the indicated attr */
136
137/*
138 * Kernel-internal version of the attrlist cursor. 88 * Kernel-internal version of the attrlist cursor.
139 */ 89 */
140typedef struct attrlist_cursor_kern { 90typedef struct attrlist_cursor_kern {
@@ -148,20 +98,40 @@ typedef struct attrlist_cursor_kern {
148 98
149 99
150/*======================================================================== 100/*========================================================================
151 * Function prototypes for the kernel. 101 * Structure used to pass context around among the routines.
152 *========================================================================*/ 102 *========================================================================*/
153 103
154struct xfs_inode; 104
155struct attrlist_cursor_kern; 105typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
156struct xfs_da_args; 106 char *, int, int, char *);
107
108typedef struct xfs_attr_list_context {
109 struct xfs_inode *dp; /* inode */
110 struct attrlist_cursor_kern *cursor; /* position in list */
111 char *alist; /* output buffer */
112 int seen_enough; /* T/F: seen enough of list? */
113 ssize_t count; /* num used entries */
114 int dupcnt; /* count dup hashvals seen */
115 int bufsize; /* total buffer size */
116 int firstu; /* first used byte in buffer */
117 int flags; /* from VOP call */
118 int resynch; /* T/F: resynch with cursor */
119 int put_value; /* T/F: need value for listent */
120 put_listent_func_t put_listent; /* list output fmt function */
121 int index; /* index into output buffer */
122} xfs_attr_list_context_t;
123
124
125/*========================================================================
126 * Function prototypes for the kernel.
127 *========================================================================*/
157 128
158/* 129/*
159 * Overall external interface routines. 130 * Overall external interface routines.
160 */ 131 */
161int xfs_attr_inactive(struct xfs_inode *dp); 132int xfs_attr_inactive(struct xfs_inode *dp);
162
163int xfs_attr_shortform_getvalue(struct xfs_da_args *);
164int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int); 133int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
165int xfs_attr_rmtval_get(struct xfs_da_args *args); 134int xfs_attr_rmtval_get(struct xfs_da_args *args);
135int xfs_attr_list_int(struct xfs_attr_list_context *);
166 136
167#endif /* __XFS_ATTR_H__ */ 137#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 303d41e4217b..23ef5d7c87e1 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -94,13 +94,6 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
94 * Namespace helper routines 94 * Namespace helper routines
95 *========================================================================*/ 95 *========================================================================*/
96 96
97STATIC_INLINE attrnames_t *
98xfs_attr_flags_namesp(int flags)
99{
100 return ((flags & XFS_ATTR_SECURE) ? &attr_secure:
101 ((flags & XFS_ATTR_ROOT) ? &attr_trusted : &attr_user));
102}
103
104/* 97/*
105 * If namespace bits don't match return 0. 98 * If namespace bits don't match return 0.
106 * If all match then return 1. 99 * If all match then return 1.
@@ -111,25 +104,6 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
111 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); 104 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
112} 105}
113 106
114/*
115 * If namespace bits don't match and we don't have an override for it
116 * then return 0.
117 * If all match or are overridable then return 1.
118 */
119STATIC_INLINE int
120xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags)
121{
122 if (((arg_flags & ATTR_SECURE) == 0) !=
123 ((ondisk_flags & XFS_ATTR_SECURE) == 0) &&
124 !(arg_flags & ATTR_KERNORMALS))
125 return 0;
126 if (((arg_flags & ATTR_ROOT) == 0) !=
127 ((ondisk_flags & XFS_ATTR_ROOT) == 0) &&
128 !(arg_flags & ATTR_KERNROOTLS))
129 return 0;
130 return 1;
131}
132
133 107
134/*======================================================================== 108/*========================================================================
135 * External routines when attribute fork size < XFS_LITINO(mp). 109 * External routines when attribute fork size < XFS_LITINO(mp).
@@ -369,9 +343,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
369 * Fix up the start offset of the attribute fork 343 * Fix up the start offset of the attribute fork
370 */ 344 */
371 totsize -= size; 345 totsize -= size;
372 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname && 346 if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
373 (mp->m_flags & XFS_MOUNT_ATTR2) && 347 !(args->op_flags & XFS_DA_OP_ADDNAME) &&
374 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) { 348 (mp->m_flags & XFS_MOUNT_ATTR2) &&
349 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
375 /* 350 /*
376 * Last attribute now removed, revert to original 351 * Last attribute now removed, revert to original
377 * inode format making all literal area available 352 * inode format making all literal area available
@@ -389,9 +364,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
389 xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); 364 xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
390 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); 365 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
391 ASSERT(dp->i_d.di_forkoff); 366 ASSERT(dp->i_d.di_forkoff);
392 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname || 367 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
393 !(mp->m_flags & XFS_MOUNT_ATTR2) || 368 (args->op_flags & XFS_DA_OP_ADDNAME) ||
394 dp->i_d.di_format == XFS_DINODE_FMT_BTREE); 369 !(mp->m_flags & XFS_MOUNT_ATTR2) ||
370 dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
395 dp->i_afp->if_ext_max = 371 dp->i_afp->if_ext_max =
396 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); 372 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
397 dp->i_df.if_ext_max = 373 dp->i_df.if_ext_max =
@@ -531,7 +507,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
531 nargs.total = args->total; 507 nargs.total = args->total;
532 nargs.whichfork = XFS_ATTR_FORK; 508 nargs.whichfork = XFS_ATTR_FORK;
533 nargs.trans = args->trans; 509 nargs.trans = args->trans;
534 nargs.oknoent = 1; 510 nargs.op_flags = XFS_DA_OP_OKNOENT;
535 511
536 sfe = &sf->list[0]; 512 sfe = &sf->list[0];
537 for (i = 0; i < sf->hdr.count; i++) { 513 for (i = 0; i < sf->hdr.count; i++) {
@@ -555,7 +531,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
555out: 531out:
556 if(bp) 532 if(bp)
557 xfs_da_buf_done(bp); 533 xfs_da_buf_done(bp);
558 kmem_free(tmpbuffer, size); 534 kmem_free(tmpbuffer);
559 return(error); 535 return(error);
560} 536}
561 537
@@ -624,15 +600,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
624 (XFS_ISRESET_CURSOR(cursor) && 600 (XFS_ISRESET_CURSOR(cursor) &&
625 (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) { 601 (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
626 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { 602 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
627 attrnames_t *namesp;
628
629 if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
630 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
631 continue;
632 }
633 namesp = xfs_attr_flags_namesp(sfe->flags);
634 error = context->put_listent(context, 603 error = context->put_listent(context,
635 namesp, 604 sfe->flags,
636 (char *)sfe->nameval, 605 (char *)sfe->nameval,
637 (int)sfe->namelen, 606 (int)sfe->namelen,
638 (int)sfe->valuelen, 607 (int)sfe->valuelen,
@@ -676,13 +645,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
676 XFS_ERRLEVEL_LOW, 645 XFS_ERRLEVEL_LOW,
677 context->dp->i_mount, sfe); 646 context->dp->i_mount, sfe);
678 xfs_attr_trace_l_c("sf corrupted", context); 647 xfs_attr_trace_l_c("sf corrupted", context);
679 kmem_free(sbuf, sbsize); 648 kmem_free(sbuf);
680 return XFS_ERROR(EFSCORRUPTED); 649 return XFS_ERROR(EFSCORRUPTED);
681 } 650 }
682 if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) { 651
683 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
684 continue;
685 }
686 sbp->entno = i; 652 sbp->entno = i;
687 sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen); 653 sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen);
688 sbp->name = (char *)sfe->nameval; 654 sbp->name = (char *)sfe->nameval;
@@ -717,7 +683,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
717 } 683 }
718 } 684 }
719 if (i == nsbuf) { 685 if (i == nsbuf) {
720 kmem_free(sbuf, sbsize); 686 kmem_free(sbuf);
721 xfs_attr_trace_l_c("blk end", context); 687 xfs_attr_trace_l_c("blk end", context);
722 return(0); 688 return(0);
723 } 689 }
@@ -726,16 +692,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
726 * Loop putting entries into the user buffer. 692 * Loop putting entries into the user buffer.
727 */ 693 */
728 for ( ; i < nsbuf; i++, sbp++) { 694 for ( ; i < nsbuf; i++, sbp++) {
729 attrnames_t *namesp;
730
731 namesp = xfs_attr_flags_namesp(sbp->flags);
732
733 if (cursor->hashval != sbp->hash) { 695 if (cursor->hashval != sbp->hash) {
734 cursor->hashval = sbp->hash; 696 cursor->hashval = sbp->hash;
735 cursor->offset = 0; 697 cursor->offset = 0;
736 } 698 }
737 error = context->put_listent(context, 699 error = context->put_listent(context,
738 namesp, 700 sbp->flags,
739 sbp->name, 701 sbp->name,
740 sbp->namelen, 702 sbp->namelen,
741 sbp->valuelen, 703 sbp->valuelen,
@@ -747,7 +709,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
747 cursor->offset++; 709 cursor->offset++;
748 } 710 }
749 711
750 kmem_free(sbuf, sbsize); 712 kmem_free(sbuf);
751 xfs_attr_trace_l_c("sf E-O-F", context); 713 xfs_attr_trace_l_c("sf E-O-F", context);
752 return(0); 714 return(0);
753} 715}
@@ -853,7 +815,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
853 nargs.total = args->total; 815 nargs.total = args->total;
854 nargs.whichfork = XFS_ATTR_FORK; 816 nargs.whichfork = XFS_ATTR_FORK;
855 nargs.trans = args->trans; 817 nargs.trans = args->trans;
856 nargs.oknoent = 1; 818 nargs.op_flags = XFS_DA_OP_OKNOENT;
857 entry = &leaf->entries[0]; 819 entry = &leaf->entries[0];
858 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 820 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
859 if (entry->flags & XFS_ATTR_INCOMPLETE) 821 if (entry->flags & XFS_ATTR_INCOMPLETE)
@@ -873,7 +835,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
873 error = 0; 835 error = 0;
874 836
875out: 837out:
876 kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount)); 838 kmem_free(tmpbuffer);
877 return(error); 839 return(error);
878} 840}
879 841
@@ -1155,7 +1117,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
1155 entry->hashval = cpu_to_be32(args->hashval); 1117 entry->hashval = cpu_to_be32(args->hashval);
1156 entry->flags = tmp ? XFS_ATTR_LOCAL : 0; 1118 entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
1157 entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); 1119 entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
1158 if (args->rename) { 1120 if (args->op_flags & XFS_DA_OP_RENAME) {
1159 entry->flags |= XFS_ATTR_INCOMPLETE; 1121 entry->flags |= XFS_ATTR_INCOMPLETE;
1160 if ((args->blkno2 == args->blkno) && 1122 if ((args->blkno2 == args->blkno) &&
1161 (args->index2 <= args->index)) { 1123 (args->index2 <= args->index)) {
@@ -1271,7 +1233,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
1271 be16_to_cpu(hdr_s->count), mp); 1233 be16_to_cpu(hdr_s->count), mp);
1272 xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); 1234 xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
1273 1235
1274 kmem_free(tmpbuffer, XFS_LBSIZE(mp)); 1236 kmem_free(tmpbuffer);
1275} 1237}
1276 1238
1277/* 1239/*
@@ -1921,7 +1883,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1921 be16_to_cpu(drop_hdr->count), mp); 1883 be16_to_cpu(drop_hdr->count), mp);
1922 } 1884 }
1923 memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize); 1885 memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize);
1924 kmem_free(tmpbuffer, state->blocksize); 1886 kmem_free(tmpbuffer);
1925 } 1887 }
1926 1888
1927 xfs_da_log_buf(state->args->trans, save_blk->bp, 0, 1889 xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
@@ -2400,8 +2362,6 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2400 */ 2362 */
2401 retval = 0; 2363 retval = 0;
2402 for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) { 2364 for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) {
2403 attrnames_t *namesp;
2404
2405 if (be32_to_cpu(entry->hashval) != cursor->hashval) { 2365 if (be32_to_cpu(entry->hashval) != cursor->hashval) {
2406 cursor->hashval = be32_to_cpu(entry->hashval); 2366 cursor->hashval = be32_to_cpu(entry->hashval);
2407 cursor->offset = 0; 2367 cursor->offset = 0;
@@ -2409,17 +2369,13 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2409 2369
2410 if (entry->flags & XFS_ATTR_INCOMPLETE) 2370 if (entry->flags & XFS_ATTR_INCOMPLETE)
2411 continue; /* skip incomplete entries */ 2371 continue; /* skip incomplete entries */
2412 if (!xfs_attr_namesp_match_overrides(context->flags, entry->flags))
2413 continue;
2414
2415 namesp = xfs_attr_flags_namesp(entry->flags);
2416 2372
2417 if (entry->flags & XFS_ATTR_LOCAL) { 2373 if (entry->flags & XFS_ATTR_LOCAL) {
2418 xfs_attr_leaf_name_local_t *name_loc = 2374 xfs_attr_leaf_name_local_t *name_loc =
2419 XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); 2375 XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
2420 2376
2421 retval = context->put_listent(context, 2377 retval = context->put_listent(context,
2422 namesp, 2378 entry->flags,
2423 (char *)name_loc->nameval, 2379 (char *)name_loc->nameval,
2424 (int)name_loc->namelen, 2380 (int)name_loc->namelen,
2425 be16_to_cpu(name_loc->valuelen), 2381 be16_to_cpu(name_loc->valuelen),
@@ -2446,16 +2402,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2446 if (retval) 2402 if (retval)
2447 return retval; 2403 return retval;
2448 retval = context->put_listent(context, 2404 retval = context->put_listent(context,
2449 namesp, 2405 entry->flags,
2450 (char *)name_rmt->name, 2406 (char *)name_rmt->name,
2451 (int)name_rmt->namelen, 2407 (int)name_rmt->namelen,
2452 valuelen, 2408 valuelen,
2453 (char*)args.value); 2409 (char*)args.value);
2454 kmem_free(args.value, valuelen); 2410 kmem_free(args.value);
2455 } 2411 } else {
2456 else {
2457 retval = context->put_listent(context, 2412 retval = context->put_listent(context,
2458 namesp, 2413 entry->flags,
2459 (char *)name_rmt->name, 2414 (char *)name_rmt->name,
2460 (int)name_rmt->namelen, 2415 (int)name_rmt->namelen,
2461 valuelen, 2416 valuelen,
@@ -2954,7 +2909,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
2954 error = tmp; /* save only the 1st errno */ 2909 error = tmp; /* save only the 1st errno */
2955 } 2910 }
2956 2911
2957 kmem_free((xfs_caddr_t)list, size); 2912 kmem_free((xfs_caddr_t)list);
2958 return(error); 2913 return(error);
2959} 2914}
2960 2915
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 040f732ce1e2..5ecf437b7825 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -30,7 +30,7 @@
30 30
31struct attrlist; 31struct attrlist;
32struct attrlist_cursor_kern; 32struct attrlist_cursor_kern;
33struct attrnames; 33struct xfs_attr_list_context;
34struct xfs_dabuf; 34struct xfs_dabuf;
35struct xfs_da_args; 35struct xfs_da_args;
36struct xfs_da_state; 36struct xfs_da_state;
@@ -204,33 +204,6 @@ static inline int xfs_attr_leaf_entsize_local_max(int bsize)
204 return (((bsize) >> 1) + ((bsize) >> 2)); 204 return (((bsize) >> 1) + ((bsize) >> 2));
205} 205}
206 206
207
208/*========================================================================
209 * Structure used to pass context around among the routines.
210 *========================================================================*/
211
212
213struct xfs_attr_list_context;
214
215typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, struct attrnames *,
216 char *, int, int, char *);
217
218typedef struct xfs_attr_list_context {
219 struct xfs_inode *dp; /* inode */
220 struct attrlist_cursor_kern *cursor; /* position in list */
221 struct attrlist *alist; /* output buffer */
222 int seen_enough; /* T/F: seen enough of list? */
223 int count; /* num used entries */
224 int dupcnt; /* count dup hashvals seen */
225 int bufsize; /* total buffer size */
226 int firstu; /* first used byte in buffer */
227 int flags; /* from VOP call */
228 int resynch; /* T/F: resynch with cursor */
229 int put_value; /* T/F: need value for listent */
230 put_listent_func_t put_listent; /* list output fmt function */
231 int index; /* index into output buffer */
232} xfs_attr_list_context_t;
233
234/* 207/*
235 * Used to keep a list of "remote value" extents when unlinking an inode. 208 * Used to keep a list of "remote value" extents when unlinking an inode.
236 */ 209 */
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index f67f917803b1..ea22839caed2 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -97,13 +97,9 @@ void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
97void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, 97void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
98 struct xfs_attr_leafblock *leaf); 98 struct xfs_attr_leafblock *leaf);
99void xfs_attr_trace_enter(int type, char *where, 99void xfs_attr_trace_enter(int type, char *where,
100 __psunsigned_t a2, __psunsigned_t a3, 100 struct xfs_attr_list_context *context,
101 __psunsigned_t a4, __psunsigned_t a5, 101 __psunsigned_t a13, __psunsigned_t a14,
102 __psunsigned_t a6, __psunsigned_t a7, 102 __psunsigned_t a15);
103 __psunsigned_t a8, __psunsigned_t a9,
104 __psunsigned_t a10, __psunsigned_t a11,
105 __psunsigned_t a12, __psunsigned_t a13,
106 __psunsigned_t a14, __psunsigned_t a15);
107#else 103#else
108#define xfs_attr_trace_l_c(w,c) 104#define xfs_attr_trace_l_c(w,c)
109#define xfs_attr_trace_l_cn(w,c,n) 105#define xfs_attr_trace_l_cn(w,c,n)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 53c259f5a5af..3c4beb3a4326 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -428,7 +428,8 @@ xfs_bmap_add_attrfork_btree(
428 cur->bc_private.b.firstblock = *firstblock; 428 cur->bc_private.b.firstblock = *firstblock;
429 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 429 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
430 goto error0; 430 goto error0;
431 ASSERT(stat == 1); /* must be at least one entry */ 431 /* must be at least one entry */
432 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
432 if ((error = xfs_bmbt_newroot(cur, flags, &stat))) 433 if ((error = xfs_bmbt_newroot(cur, flags, &stat)))
433 goto error0; 434 goto error0;
434 if (stat == 0) { 435 if (stat == 0) {
@@ -816,13 +817,13 @@ xfs_bmap_add_extent_delay_real(
816 RIGHT.br_startblock, 817 RIGHT.br_startblock,
817 RIGHT.br_blockcount, &i))) 818 RIGHT.br_blockcount, &i)))
818 goto done; 819 goto done;
819 ASSERT(i == 1); 820 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
820 if ((error = xfs_bmbt_delete(cur, &i))) 821 if ((error = xfs_bmbt_delete(cur, &i)))
821 goto done; 822 goto done;
822 ASSERT(i == 1); 823 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
823 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 824 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
824 goto done; 825 goto done;
825 ASSERT(i == 1); 826 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
826 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 827 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
827 LEFT.br_startblock, 828 LEFT.br_startblock,
828 LEFT.br_blockcount + 829 LEFT.br_blockcount +
@@ -860,7 +861,7 @@ xfs_bmap_add_extent_delay_real(
860 LEFT.br_startblock, LEFT.br_blockcount, 861 LEFT.br_startblock, LEFT.br_blockcount,
861 &i))) 862 &i)))
862 goto done; 863 goto done;
863 ASSERT(i == 1); 864 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
864 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 865 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
865 LEFT.br_startblock, 866 LEFT.br_startblock,
866 LEFT.br_blockcount + 867 LEFT.br_blockcount +
@@ -895,7 +896,7 @@ xfs_bmap_add_extent_delay_real(
895 RIGHT.br_startblock, 896 RIGHT.br_startblock,
896 RIGHT.br_blockcount, &i))) 897 RIGHT.br_blockcount, &i)))
897 goto done; 898 goto done;
898 ASSERT(i == 1); 899 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
899 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 900 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
900 new->br_startblock, 901 new->br_startblock,
901 PREV.br_blockcount + 902 PREV.br_blockcount +
@@ -928,11 +929,11 @@ xfs_bmap_add_extent_delay_real(
928 new->br_startblock, new->br_blockcount, 929 new->br_startblock, new->br_blockcount,
929 &i))) 930 &i)))
930 goto done; 931 goto done;
931 ASSERT(i == 0); 932 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
932 cur->bc_rec.b.br_state = XFS_EXT_NORM; 933 cur->bc_rec.b.br_state = XFS_EXT_NORM;
933 if ((error = xfs_bmbt_insert(cur, &i))) 934 if ((error = xfs_bmbt_insert(cur, &i)))
934 goto done; 935 goto done;
935 ASSERT(i == 1); 936 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
936 } 937 }
937 *dnew = 0; 938 *dnew = 0;
938 /* DELTA: The in-core extent described by new changed type. */ 939 /* DELTA: The in-core extent described by new changed type. */
@@ -963,7 +964,7 @@ xfs_bmap_add_extent_delay_real(
963 LEFT.br_startblock, LEFT.br_blockcount, 964 LEFT.br_startblock, LEFT.br_blockcount,
964 &i))) 965 &i)))
965 goto done; 966 goto done;
966 ASSERT(i == 1); 967 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
967 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 968 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
968 LEFT.br_startblock, 969 LEFT.br_startblock,
969 LEFT.br_blockcount + 970 LEFT.br_blockcount +
@@ -1004,11 +1005,11 @@ xfs_bmap_add_extent_delay_real(
1004 new->br_startblock, new->br_blockcount, 1005 new->br_startblock, new->br_blockcount,
1005 &i))) 1006 &i)))
1006 goto done; 1007 goto done;
1007 ASSERT(i == 0); 1008 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1008 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1009 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1009 if ((error = xfs_bmbt_insert(cur, &i))) 1010 if ((error = xfs_bmbt_insert(cur, &i)))
1010 goto done; 1011 goto done;
1011 ASSERT(i == 1); 1012 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1012 } 1013 }
1013 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1014 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1014 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 1015 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1054,7 +1055,7 @@ xfs_bmap_add_extent_delay_real(
1054 RIGHT.br_startblock, 1055 RIGHT.br_startblock,
1055 RIGHT.br_blockcount, &i))) 1056 RIGHT.br_blockcount, &i)))
1056 goto done; 1057 goto done;
1057 ASSERT(i == 1); 1058 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1058 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1059 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1059 new->br_startblock, 1060 new->br_startblock,
1060 new->br_blockcount + 1061 new->br_blockcount +
@@ -1094,11 +1095,11 @@ xfs_bmap_add_extent_delay_real(
1094 new->br_startblock, new->br_blockcount, 1095 new->br_startblock, new->br_blockcount,
1095 &i))) 1096 &i)))
1096 goto done; 1097 goto done;
1097 ASSERT(i == 0); 1098 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1098 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1099 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1099 if ((error = xfs_bmbt_insert(cur, &i))) 1100 if ((error = xfs_bmbt_insert(cur, &i)))
1100 goto done; 1101 goto done;
1101 ASSERT(i == 1); 1102 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1102 } 1103 }
1103 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1104 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1104 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 1105 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1149,11 +1150,11 @@ xfs_bmap_add_extent_delay_real(
1149 new->br_startblock, new->br_blockcount, 1150 new->br_startblock, new->br_blockcount,
1150 &i))) 1151 &i)))
1151 goto done; 1152 goto done;
1152 ASSERT(i == 0); 1153 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1153 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1154 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1154 if ((error = xfs_bmbt_insert(cur, &i))) 1155 if ((error = xfs_bmbt_insert(cur, &i)))
1155 goto done; 1156 goto done;
1156 ASSERT(i == 1); 1157 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1157 } 1158 }
1158 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1159 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1159 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 1160 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1377,19 +1378,19 @@ xfs_bmap_add_extent_unwritten_real(
1377 RIGHT.br_startblock, 1378 RIGHT.br_startblock,
1378 RIGHT.br_blockcount, &i))) 1379 RIGHT.br_blockcount, &i)))
1379 goto done; 1380 goto done;
1380 ASSERT(i == 1); 1381 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1381 if ((error = xfs_bmbt_delete(cur, &i))) 1382 if ((error = xfs_bmbt_delete(cur, &i)))
1382 goto done; 1383 goto done;
1383 ASSERT(i == 1); 1384 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1384 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1385 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1385 goto done; 1386 goto done;
1386 ASSERT(i == 1); 1387 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1387 if ((error = xfs_bmbt_delete(cur, &i))) 1388 if ((error = xfs_bmbt_delete(cur, &i)))
1388 goto done; 1389 goto done;
1389 ASSERT(i == 1); 1390 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1390 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1391 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1391 goto done; 1392 goto done;
1392 ASSERT(i == 1); 1393 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1393 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1394 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
1394 LEFT.br_startblock, 1395 LEFT.br_startblock,
1395 LEFT.br_blockcount + PREV.br_blockcount + 1396 LEFT.br_blockcount + PREV.br_blockcount +
@@ -1426,13 +1427,13 @@ xfs_bmap_add_extent_unwritten_real(
1426 PREV.br_startblock, PREV.br_blockcount, 1427 PREV.br_startblock, PREV.br_blockcount,
1427 &i))) 1428 &i)))
1428 goto done; 1429 goto done;
1429 ASSERT(i == 1); 1430 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1430 if ((error = xfs_bmbt_delete(cur, &i))) 1431 if ((error = xfs_bmbt_delete(cur, &i)))
1431 goto done; 1432 goto done;
1432 ASSERT(i == 1); 1433 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1433 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1434 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1434 goto done; 1435 goto done;
1435 ASSERT(i == 1); 1436 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1436 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1437 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
1437 LEFT.br_startblock, 1438 LEFT.br_startblock,
1438 LEFT.br_blockcount + PREV.br_blockcount, 1439 LEFT.br_blockcount + PREV.br_blockcount,
@@ -1469,13 +1470,13 @@ xfs_bmap_add_extent_unwritten_real(
1469 RIGHT.br_startblock, 1470 RIGHT.br_startblock,
1470 RIGHT.br_blockcount, &i))) 1471 RIGHT.br_blockcount, &i)))
1471 goto done; 1472 goto done;
1472 ASSERT(i == 1); 1473 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1473 if ((error = xfs_bmbt_delete(cur, &i))) 1474 if ((error = xfs_bmbt_delete(cur, &i)))
1474 goto done; 1475 goto done;
1475 ASSERT(i == 1); 1476 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1476 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1477 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1477 goto done; 1478 goto done;
1478 ASSERT(i == 1); 1479 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1479 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1480 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1480 new->br_startblock, 1481 new->br_startblock,
1481 new->br_blockcount + RIGHT.br_blockcount, 1482 new->br_blockcount + RIGHT.br_blockcount,
@@ -1508,7 +1509,7 @@ xfs_bmap_add_extent_unwritten_real(
1508 new->br_startblock, new->br_blockcount, 1509 new->br_startblock, new->br_blockcount,
1509 &i))) 1510 &i)))
1510 goto done; 1511 goto done;
1511 ASSERT(i == 1); 1512 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1512 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1513 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1513 new->br_startblock, new->br_blockcount, 1514 new->br_startblock, new->br_blockcount,
1514 newext))) 1515 newext)))
@@ -1549,7 +1550,7 @@ xfs_bmap_add_extent_unwritten_real(
1549 PREV.br_startblock, PREV.br_blockcount, 1550 PREV.br_startblock, PREV.br_blockcount,
1550 &i))) 1551 &i)))
1551 goto done; 1552 goto done;
1552 ASSERT(i == 1); 1553 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1553 if ((error = xfs_bmbt_update(cur, 1554 if ((error = xfs_bmbt_update(cur,
1554 PREV.br_startoff + new->br_blockcount, 1555 PREV.br_startoff + new->br_blockcount,
1555 PREV.br_startblock + new->br_blockcount, 1556 PREV.br_startblock + new->br_blockcount,
@@ -1596,7 +1597,7 @@ xfs_bmap_add_extent_unwritten_real(
1596 PREV.br_startblock, PREV.br_blockcount, 1597 PREV.br_startblock, PREV.br_blockcount,
1597 &i))) 1598 &i)))
1598 goto done; 1599 goto done;
1599 ASSERT(i == 1); 1600 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1600 if ((error = xfs_bmbt_update(cur, 1601 if ((error = xfs_bmbt_update(cur,
1601 PREV.br_startoff + new->br_blockcount, 1602 PREV.br_startoff + new->br_blockcount,
1602 PREV.br_startblock + new->br_blockcount, 1603 PREV.br_startblock + new->br_blockcount,
@@ -1606,7 +1607,7 @@ xfs_bmap_add_extent_unwritten_real(
1606 cur->bc_rec.b = *new; 1607 cur->bc_rec.b = *new;
1607 if ((error = xfs_bmbt_insert(cur, &i))) 1608 if ((error = xfs_bmbt_insert(cur, &i)))
1608 goto done; 1609 goto done;
1609 ASSERT(i == 1); 1610 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1610 } 1611 }
1611 /* DELTA: One in-core extent is split in two. */ 1612 /* DELTA: One in-core extent is split in two. */
1612 temp = PREV.br_startoff; 1613 temp = PREV.br_startoff;
@@ -1640,7 +1641,7 @@ xfs_bmap_add_extent_unwritten_real(
1640 PREV.br_startblock, 1641 PREV.br_startblock,
1641 PREV.br_blockcount, &i))) 1642 PREV.br_blockcount, &i)))
1642 goto done; 1643 goto done;
1643 ASSERT(i == 1); 1644 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1644 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 1645 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
1645 PREV.br_startblock, 1646 PREV.br_startblock,
1646 PREV.br_blockcount - new->br_blockcount, 1647 PREV.br_blockcount - new->br_blockcount,
@@ -1682,7 +1683,7 @@ xfs_bmap_add_extent_unwritten_real(
1682 PREV.br_startblock, PREV.br_blockcount, 1683 PREV.br_startblock, PREV.br_blockcount,
1683 &i))) 1684 &i)))
1684 goto done; 1685 goto done;
1685 ASSERT(i == 1); 1686 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1686 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 1687 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
1687 PREV.br_startblock, 1688 PREV.br_startblock,
1688 PREV.br_blockcount - new->br_blockcount, 1689 PREV.br_blockcount - new->br_blockcount,
@@ -1692,11 +1693,11 @@ xfs_bmap_add_extent_unwritten_real(
1692 new->br_startblock, new->br_blockcount, 1693 new->br_startblock, new->br_blockcount,
1693 &i))) 1694 &i)))
1694 goto done; 1695 goto done;
1695 ASSERT(i == 0); 1696 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1696 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1697 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1697 if ((error = xfs_bmbt_insert(cur, &i))) 1698 if ((error = xfs_bmbt_insert(cur, &i)))
1698 goto done; 1699 goto done;
1699 ASSERT(i == 1); 1700 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1700 } 1701 }
1701 /* DELTA: One in-core extent is split in two. */ 1702 /* DELTA: One in-core extent is split in two. */
1702 temp = PREV.br_startoff; 1703 temp = PREV.br_startoff;
@@ -1732,27 +1733,34 @@ xfs_bmap_add_extent_unwritten_real(
1732 PREV.br_startblock, PREV.br_blockcount, 1733 PREV.br_startblock, PREV.br_blockcount,
1733 &i))) 1734 &i)))
1734 goto done; 1735 goto done;
1735 ASSERT(i == 1); 1736 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1736 /* new right extent - oldext */ 1737 /* new right extent - oldext */
1737 if ((error = xfs_bmbt_update(cur, r[1].br_startoff, 1738 if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
1738 r[1].br_startblock, r[1].br_blockcount, 1739 r[1].br_startblock, r[1].br_blockcount,
1739 r[1].br_state))) 1740 r[1].br_state)))
1740 goto done; 1741 goto done;
1741 /* new left extent - oldext */ 1742 /* new left extent - oldext */
1742 PREV.br_blockcount =
1743 new->br_startoff - PREV.br_startoff;
1744 cur->bc_rec.b = PREV; 1743 cur->bc_rec.b = PREV;
1744 cur->bc_rec.b.br_blockcount =
1745 new->br_startoff - PREV.br_startoff;
1745 if ((error = xfs_bmbt_insert(cur, &i))) 1746 if ((error = xfs_bmbt_insert(cur, &i)))
1746 goto done; 1747 goto done;
1747 ASSERT(i == 1); 1748 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1748 if ((error = xfs_bmbt_increment(cur, 0, &i))) 1749 /*
1750 * Reset the cursor to the position of the new extent
1751 * we are about to insert as we can't trust it after
1752 * the previous insert.
1753 */
1754 if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1755 new->br_startblock, new->br_blockcount,
1756 &i)))
1749 goto done; 1757 goto done;
1750 ASSERT(i == 1); 1758 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1751 /* new middle extent - newext */ 1759 /* new middle extent - newext */
1752 cur->bc_rec.b = *new; 1760 cur->bc_rec.b.br_state = new->br_state;
1753 if ((error = xfs_bmbt_insert(cur, &i))) 1761 if ((error = xfs_bmbt_insert(cur, &i)))
1754 goto done; 1762 goto done;
1755 ASSERT(i == 1); 1763 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1756 } 1764 }
1757 /* DELTA: One in-core extent is split in three. */ 1765 /* DELTA: One in-core extent is split in three. */
1758 temp = PREV.br_startoff; 1766 temp = PREV.br_startoff;
@@ -2097,13 +2105,13 @@ xfs_bmap_add_extent_hole_real(
2097 right.br_startblock, 2105 right.br_startblock,
2098 right.br_blockcount, &i))) 2106 right.br_blockcount, &i)))
2099 goto done; 2107 goto done;
2100 ASSERT(i == 1); 2108 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2101 if ((error = xfs_bmbt_delete(cur, &i))) 2109 if ((error = xfs_bmbt_delete(cur, &i)))
2102 goto done; 2110 goto done;
2103 ASSERT(i == 1); 2111 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2104 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 2112 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
2105 goto done; 2113 goto done;
2106 ASSERT(i == 1); 2114 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2107 if ((error = xfs_bmbt_update(cur, left.br_startoff, 2115 if ((error = xfs_bmbt_update(cur, left.br_startoff,
2108 left.br_startblock, 2116 left.br_startblock,
2109 left.br_blockcount + 2117 left.br_blockcount +
@@ -2139,7 +2147,7 @@ xfs_bmap_add_extent_hole_real(
2139 left.br_startblock, 2147 left.br_startblock,
2140 left.br_blockcount, &i))) 2148 left.br_blockcount, &i)))
2141 goto done; 2149 goto done;
2142 ASSERT(i == 1); 2150 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2143 if ((error = xfs_bmbt_update(cur, left.br_startoff, 2151 if ((error = xfs_bmbt_update(cur, left.br_startoff,
2144 left.br_startblock, 2152 left.br_startblock,
2145 left.br_blockcount + 2153 left.br_blockcount +
@@ -2174,7 +2182,7 @@ xfs_bmap_add_extent_hole_real(
2174 right.br_startblock, 2182 right.br_startblock,
2175 right.br_blockcount, &i))) 2183 right.br_blockcount, &i)))
2176 goto done; 2184 goto done;
2177 ASSERT(i == 1); 2185 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2178 if ((error = xfs_bmbt_update(cur, new->br_startoff, 2186 if ((error = xfs_bmbt_update(cur, new->br_startoff,
2179 new->br_startblock, 2187 new->br_startblock,
2180 new->br_blockcount + 2188 new->br_blockcount +
@@ -2208,11 +2216,11 @@ xfs_bmap_add_extent_hole_real(
2208 new->br_startblock, 2216 new->br_startblock,
2209 new->br_blockcount, &i))) 2217 new->br_blockcount, &i)))
2210 goto done; 2218 goto done;
2211 ASSERT(i == 0); 2219 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
2212 cur->bc_rec.b.br_state = new->br_state; 2220 cur->bc_rec.b.br_state = new->br_state;
2213 if ((error = xfs_bmbt_insert(cur, &i))) 2221 if ((error = xfs_bmbt_insert(cur, &i)))
2214 goto done; 2222 goto done;
2215 ASSERT(i == 1); 2223 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2216 } 2224 }
2217 /* DELTA: A new extent was added in a hole. */ 2225 /* DELTA: A new extent was added in a hole. */
2218 temp = new->br_startoff; 2226 temp = new->br_startoff;
@@ -3131,7 +3139,7 @@ xfs_bmap_del_extent(
3131 got.br_startblock, got.br_blockcount, 3139 got.br_startblock, got.br_blockcount,
3132 &i))) 3140 &i)))
3133 goto done; 3141 goto done;
3134 ASSERT(i == 1); 3142 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3135 } 3143 }
3136 da_old = da_new = 0; 3144 da_old = da_new = 0;
3137 } else { 3145 } else {
@@ -3164,7 +3172,7 @@ xfs_bmap_del_extent(
3164 } 3172 }
3165 if ((error = xfs_bmbt_delete(cur, &i))) 3173 if ((error = xfs_bmbt_delete(cur, &i)))
3166 goto done; 3174 goto done;
3167 ASSERT(i == 1); 3175 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3168 break; 3176 break;
3169 3177
3170 case 2: 3178 case 2:
@@ -3268,7 +3276,7 @@ xfs_bmap_del_extent(
3268 got.br_startblock, 3276 got.br_startblock,
3269 temp, &i))) 3277 temp, &i)))
3270 goto done; 3278 goto done;
3271 ASSERT(i == 1); 3279 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3272 /* 3280 /*
3273 * Update the btree record back 3281 * Update the btree record back
3274 * to the original value. 3282 * to the original value.
@@ -3289,7 +3297,7 @@ xfs_bmap_del_extent(
3289 error = XFS_ERROR(ENOSPC); 3297 error = XFS_ERROR(ENOSPC);
3290 goto done; 3298 goto done;
3291 } 3299 }
3292 ASSERT(i == 1); 3300 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3293 } else 3301 } else
3294 flags |= XFS_ILOG_FEXT(whichfork); 3302 flags |= XFS_ILOG_FEXT(whichfork);
3295 XFS_IFORK_NEXT_SET(ip, whichfork, 3303 XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -5970,7 +5978,7 @@ unlock_and_return:
5970 xfs_iunlock_map_shared(ip, lock); 5978 xfs_iunlock_map_shared(ip, lock);
5971 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 5979 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
5972 5980
5973 kmem_free(map, subnex * sizeof(*map)); 5981 kmem_free(map);
5974 5982
5975 return error; 5983 return error;
5976} 5984}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 6ff70cda451c..9f3e3a836d15 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -54,12 +54,23 @@ typedef struct xfs_bmap_free_item
54 54
55/* 55/*
56 * Header for free extent list. 56 * Header for free extent list.
57 *
58 * xbf_low is used by the allocator to activate the lowspace algorithm -
59 * when free space is running low the extent allocator may choose to
60 * allocate an extent from an AG without leaving sufficient space for
61 * a btree split when inserting the new extent. In this case the allocator
62 * will enable the lowspace algorithm which is supposed to allow further
63 * allocations (such as btree splits and newroots) to allocate from
64 * sequential AGs. In order to avoid locking AGs out of order the lowspace
65 * algorithm will start searching for free space from AG 0. If the correct
66 * transaction reservations have been made then this algorithm will eventually
67 * find all the space it needs.
57 */ 68 */
58typedef struct xfs_bmap_free 69typedef struct xfs_bmap_free
59{ 70{
60 xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */ 71 xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */
61 int xbf_count; /* count of items on list */ 72 int xbf_count; /* count of items on list */
62 int xbf_low; /* kludge: alloc in low mode */ 73 int xbf_low; /* alloc in low mode */
63} xfs_bmap_free_t; 74} xfs_bmap_free_t;
64 75
65#define XFS_BMAP_MAX_NMAP 4 76#define XFS_BMAP_MAX_NMAP 4
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 4f0e849d973e..23efad29a5cd 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1493,12 +1493,27 @@ xfs_bmbt_split(
1493 left = XFS_BUF_TO_BMBT_BLOCK(lbp); 1493 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1494 args.fsbno = cur->bc_private.b.firstblock; 1494 args.fsbno = cur->bc_private.b.firstblock;
1495 args.firstblock = args.fsbno; 1495 args.firstblock = args.fsbno;
1496 args.minleft = 0;
1496 if (args.fsbno == NULLFSBLOCK) { 1497 if (args.fsbno == NULLFSBLOCK) {
1497 args.fsbno = lbno; 1498 args.fsbno = lbno;
1498 args.type = XFS_ALLOCTYPE_START_BNO; 1499 args.type = XFS_ALLOCTYPE_START_BNO;
1499 } else 1500 /*
1501 * Make sure there is sufficient room left in the AG to
1502 * complete a full tree split for an extent insert. If
1503 * we are converting the middle part of an extent then
1504 * we may need space for two tree splits.
1505 *
1506 * We are relying on the caller to make the correct block
1507 * reservation for this operation to succeed. If the
1508 * reservation amount is insufficient then we may fail a
1509 * block allocation here and corrupt the filesystem.
1510 */
1511 args.minleft = xfs_trans_get_block_res(args.tp);
1512 } else if (cur->bc_private.b.flist->xbf_low)
1513 args.type = XFS_ALLOCTYPE_START_BNO;
1514 else
1500 args.type = XFS_ALLOCTYPE_NEAR_BNO; 1515 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1501 args.mod = args.minleft = args.alignment = args.total = args.isfl = 1516 args.mod = args.alignment = args.total = args.isfl =
1502 args.userdata = args.minalignslop = 0; 1517 args.userdata = args.minalignslop = 0;
1503 args.minlen = args.maxlen = args.prod = 1; 1518 args.minlen = args.maxlen = args.prod = 1;
1504 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; 1519 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
@@ -1510,6 +1525,21 @@ xfs_bmbt_split(
1510 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 1525 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1511 return error; 1526 return error;
1512 } 1527 }
1528 if (args.fsbno == NULLFSBLOCK && args.minleft) {
1529 /*
1530 * Could not find an AG with enough free space to satisfy
1531 * a full btree split. Try again without minleft and if
1532 * successful activate the lowspace algorithm.
1533 */
1534 args.fsbno = 0;
1535 args.type = XFS_ALLOCTYPE_FIRST_AG;
1536 args.minleft = 0;
1537 if ((error = xfs_alloc_vextent(&args))) {
1538 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1539 return error;
1540 }
1541 cur->bc_private.b.flist->xbf_low = 1;
1542 }
1513 if (args.fsbno == NULLFSBLOCK) { 1543 if (args.fsbno == NULLFSBLOCK) {
1514 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 1544 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1515 *stat = 0; 1545 *stat = 0;
@@ -2029,22 +2059,8 @@ xfs_bmbt_increment(
2029 * Insert the current record at the point referenced by cur. 2059 * Insert the current record at the point referenced by cur.
2030 * 2060 *
2031 * A multi-level split of the tree on insert will invalidate the original 2061 * A multi-level split of the tree on insert will invalidate the original
2032 * cursor. It appears, however, that some callers assume that the cursor is 2062 * cursor. All callers of this function should assume that the cursor is
2033 * always valid. Hence if we do a multi-level split we need to revalidate the 2063 * no longer valid and revalidate it.
2034 * cursor.
2035 *
2036 * When a split occurs, we will see a new cursor returned. Use that as a
2037 * trigger to determine if we need to revalidate the original cursor. If we get
2038 * a split, then use the original irec to lookup up the path of the record we
2039 * just inserted.
2040 *
2041 * Note that the fact that the btree root is in the inode means that we can
2042 * have the level of the tree change without a "split" occurring at the root
2043 * level. What happens is that the root is migrated to an allocated block and
2044 * the inode root is pointed to it. This means a single split can change the
2045 * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence
2046 * the level change should be accounted as a split so as to correctly trigger a
2047 * revalidation of the old cursor.
2048 */ 2064 */
2049int /* error */ 2065int /* error */
2050xfs_bmbt_insert( 2066xfs_bmbt_insert(
@@ -2057,14 +2073,11 @@ xfs_bmbt_insert(
2057 xfs_fsblock_t nbno; 2073 xfs_fsblock_t nbno;
2058 xfs_btree_cur_t *ncur; 2074 xfs_btree_cur_t *ncur;
2059 xfs_bmbt_rec_t nrec; 2075 xfs_bmbt_rec_t nrec;
2060 xfs_bmbt_irec_t oirec; /* original irec */
2061 xfs_btree_cur_t *pcur; 2076 xfs_btree_cur_t *pcur;
2062 int splits = 0;
2063 2077
2064 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 2078 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2065 level = 0; 2079 level = 0;
2066 nbno = NULLFSBLOCK; 2080 nbno = NULLFSBLOCK;
2067 oirec = cur->bc_rec.b;
2068 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); 2081 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
2069 ncur = NULL; 2082 ncur = NULL;
2070 pcur = cur; 2083 pcur = cur;
@@ -2073,13 +2086,11 @@ xfs_bmbt_insert(
2073 &i))) { 2086 &i))) {
2074 if (pcur != cur) 2087 if (pcur != cur)
2075 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 2088 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2076 goto error0; 2089 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2090 return error;
2077 } 2091 }
2078 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 2092 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2079 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { 2093 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
2080 /* allocating a new root is effectively a split */
2081 if (cur->bc_nlevels != pcur->bc_nlevels)
2082 splits++;
2083 cur->bc_nlevels = pcur->bc_nlevels; 2094 cur->bc_nlevels = pcur->bc_nlevels;
2084 cur->bc_private.b.allocated += 2095 cur->bc_private.b.allocated +=
2085 pcur->bc_private.b.allocated; 2096 pcur->bc_private.b.allocated;
@@ -2093,21 +2104,10 @@ xfs_bmbt_insert(
2093 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 2104 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2094 } 2105 }
2095 if (ncur) { 2106 if (ncur) {
2096 splits++;
2097 pcur = ncur; 2107 pcur = ncur;
2098 ncur = NULL; 2108 ncur = NULL;
2099 } 2109 }
2100 } while (nbno != NULLFSBLOCK); 2110 } while (nbno != NULLFSBLOCK);
2101
2102 if (splits > 1) {
2103 /* revalidate the old cursor as we had a multi-level split */
2104 error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff,
2105 oirec.br_startblock, oirec.br_blockcount, &i);
2106 if (error)
2107 goto error0;
2108 ASSERT(i == 1);
2109 }
2110
2111 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 2111 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2112 *stat = i; 2112 *stat = i;
2113 return 0; 2113 return 0;
@@ -2254,7 +2254,9 @@ xfs_bmbt_newroot(
2254#endif 2254#endif
2255 args.fsbno = be64_to_cpu(*pp); 2255 args.fsbno = be64_to_cpu(*pp);
2256 args.type = XFS_ALLOCTYPE_START_BNO; 2256 args.type = XFS_ALLOCTYPE_START_BNO;
2257 } else 2257 } else if (cur->bc_private.b.flist->xbf_low)
2258 args.type = XFS_ALLOCTYPE_START_BNO;
2259 else
2258 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2260 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2259 if ((error = xfs_alloc_vextent(&args))) { 2261 if ((error = xfs_alloc_vextent(&args))) {
2260 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2262 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 53a71c62025d..d86ca2c03a70 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -889,9 +889,9 @@ xfs_buf_item_relse(
889 } 889 }
890 890
891#ifdef XFS_TRANS_DEBUG 891#ifdef XFS_TRANS_DEBUG
892 kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp)); 892 kmem_free(bip->bli_orig);
893 bip->bli_orig = NULL; 893 bip->bli_orig = NULL;
894 kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY); 894 kmem_free(bip->bli_logged);
895 bip->bli_logged = NULL; 895 bip->bli_logged = NULL;
896#endif /* XFS_TRANS_DEBUG */ 896#endif /* XFS_TRANS_DEBUG */
897 897
@@ -1138,9 +1138,9 @@ xfs_buf_iodone(
1138 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); 1138 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
1139 1139
1140#ifdef XFS_TRANS_DEBUG 1140#ifdef XFS_TRANS_DEBUG
1141 kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp)); 1141 kmem_free(bip->bli_orig);
1142 bip->bli_orig = NULL; 1142 bip->bli_orig = NULL;
1143 kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY); 1143 kmem_free(bip->bli_logged);
1144 bip->bli_logged = NULL; 1144 bip->bli_logged = NULL;
1145#endif /* XFS_TRANS_DEBUG */ 1145#endif /* XFS_TRANS_DEBUG */
1146 1146
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index d5d1e60ee224..d2ce5dd70d87 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -78,6 +78,7 @@ struct xfs_mount_args {
78#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */ 78#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */
79#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */ 79#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */
80 /* (osyncisdsync is default) */ 80 /* (osyncisdsync is default) */
81#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */
81#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32 82#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32
82 * bits of address space */ 83 * bits of address space */
83#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */ 84#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 021a8f7e563f..9e561a9cefca 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1431,7 +1431,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1431 } 1431 }
1432 if (level < 0) { 1432 if (level < 0) {
1433 *result = XFS_ERROR(ENOENT); /* we're out of our tree */ 1433 *result = XFS_ERROR(ENOENT); /* we're out of our tree */
1434 ASSERT(args->oknoent); 1434 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1435 return(0); 1435 return(0);
1436 } 1436 }
1437 1437
@@ -1530,6 +1530,28 @@ xfs_da_hashname(const uchar_t *name, int namelen)
1530 } 1530 }
1531} 1531}
1532 1532
1533enum xfs_dacmp
1534xfs_da_compname(
1535 struct xfs_da_args *args,
1536 const char *name,
1537 int len)
1538{
1539 return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
1540 XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
1541}
1542
1543static xfs_dahash_t
1544xfs_default_hashname(
1545 struct xfs_name *name)
1546{
1547 return xfs_da_hashname(name->name, name->len);
1548}
1549
1550const struct xfs_nameops xfs_default_nameops = {
1551 .hashname = xfs_default_hashname,
1552 .compname = xfs_da_compname
1553};
1554
1533/* 1555/*
1534 * Add a block to the btree ahead of the file. 1556 * Add a block to the btree ahead of the file.
1535 * Return the new block number to the caller. 1557 * Return the new block number to the caller.
@@ -1598,7 +1620,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1598 args->firstblock, args->total, 1620 args->firstblock, args->total,
1599 &mapp[mapi], &nmap, args->flist, 1621 &mapp[mapi], &nmap, args->flist,
1600 NULL))) { 1622 NULL))) {
1601 kmem_free(mapp, sizeof(*mapp) * count); 1623 kmem_free(mapp);
1602 return error; 1624 return error;
1603 } 1625 }
1604 if (nmap < 1) 1626 if (nmap < 1)
@@ -1620,11 +1642,11 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1620 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 1642 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1621 bno + count) { 1643 bno + count) {
1622 if (mapp != &map) 1644 if (mapp != &map)
1623 kmem_free(mapp, sizeof(*mapp) * count); 1645 kmem_free(mapp);
1624 return XFS_ERROR(ENOSPC); 1646 return XFS_ERROR(ENOSPC);
1625 } 1647 }
1626 if (mapp != &map) 1648 if (mapp != &map)
1627 kmem_free(mapp, sizeof(*mapp) * count); 1649 kmem_free(mapp);
1628 *new_blkno = (xfs_dablk_t)bno; 1650 *new_blkno = (xfs_dablk_t)bno;
1629 return 0; 1651 return 0;
1630} 1652}
@@ -2090,10 +2112,10 @@ xfs_da_do_buf(
2090 } 2112 }
2091 } 2113 }
2092 if (bplist) { 2114 if (bplist) {
2093 kmem_free(bplist, sizeof(*bplist) * nmap); 2115 kmem_free(bplist);
2094 } 2116 }
2095 if (mapp != &map) { 2117 if (mapp != &map) {
2096 kmem_free(mapp, sizeof(*mapp) * nfsb); 2118 kmem_free(mapp);
2097 } 2119 }
2098 if (bpp) 2120 if (bpp)
2099 *bpp = rbp; 2121 *bpp = rbp;
@@ -2102,11 +2124,11 @@ exit1:
2102 if (bplist) { 2124 if (bplist) {
2103 for (i = 0; i < nbplist; i++) 2125 for (i = 0; i < nbplist; i++)
2104 xfs_trans_brelse(trans, bplist[i]); 2126 xfs_trans_brelse(trans, bplist[i]);
2105 kmem_free(bplist, sizeof(*bplist) * nmap); 2127 kmem_free(bplist);
2106 } 2128 }
2107exit0: 2129exit0:
2108 if (mapp != &map) 2130 if (mapp != &map)
2109 kmem_free(mapp, sizeof(*mapp) * nfsb); 2131 kmem_free(mapp);
2110 if (bpp) 2132 if (bpp)
2111 *bpp = NULL; 2133 *bpp = NULL;
2112 return error; 2134 return error;
@@ -2218,7 +2240,7 @@ xfs_da_state_free(xfs_da_state_t *state)
2218 2240
2219#ifdef XFS_DABUF_DEBUG 2241#ifdef XFS_DABUF_DEBUG
2220xfs_dabuf_t *xfs_dabuf_global_list; 2242xfs_dabuf_t *xfs_dabuf_global_list;
2221spinlock_t xfs_dabuf_global_lock; 2243static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
2222#endif 2244#endif
2223 2245
2224/* 2246/*
@@ -2315,7 +2337,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
2315 if (dabuf->dirty) 2337 if (dabuf->dirty)
2316 xfs_da_buf_clean(dabuf); 2338 xfs_da_buf_clean(dabuf);
2317 if (dabuf->nbuf > 1) 2339 if (dabuf->nbuf > 1)
2318 kmem_free(dabuf->data, BBTOB(dabuf->bbcount)); 2340 kmem_free(dabuf->data);
2319#ifdef XFS_DABUF_DEBUG 2341#ifdef XFS_DABUF_DEBUG
2320 { 2342 {
2321 spin_lock(&xfs_dabuf_global_lock); 2343 spin_lock(&xfs_dabuf_global_lock);
@@ -2332,7 +2354,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
2332 if (dabuf->nbuf == 1) 2354 if (dabuf->nbuf == 1)
2333 kmem_zone_free(xfs_dabuf_zone, dabuf); 2355 kmem_zone_free(xfs_dabuf_zone, dabuf);
2334 else 2356 else
2335 kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf)); 2357 kmem_free(dabuf);
2336} 2358}
2337 2359
2338/* 2360/*
@@ -2403,7 +2425,7 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2403 for (i = 0; i < nbuf; i++) 2425 for (i = 0; i < nbuf; i++)
2404 xfs_trans_brelse(tp, bplist[i]); 2426 xfs_trans_brelse(tp, bplist[i]);
2405 if (bplist != &bp) 2427 if (bplist != &bp)
2406 kmem_free(bplist, nbuf * sizeof(*bplist)); 2428 kmem_free(bplist);
2407} 2429}
2408 2430
2409/* 2431/*
@@ -2429,7 +2451,7 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2429 for (i = 0; i < nbuf; i++) 2451 for (i = 0; i < nbuf; i++)
2430 xfs_trans_binval(tp, bplist[i]); 2452 xfs_trans_binval(tp, bplist[i]);
2431 if (bplist != &bp) 2453 if (bplist != &bp)
2432 kmem_free(bplist, nbuf * sizeof(*bplist)); 2454 kmem_free(bplist);
2433} 2455}
2434 2456
2435/* 2457/*
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 7facf86f74f9..8be0b00ede9a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -99,6 +99,15 @@ typedef struct xfs_da_node_entry xfs_da_node_entry_t;
99 *========================================================================*/ 99 *========================================================================*/
100 100
101/* 101/*
102 * Search comparison results
103 */
104enum xfs_dacmp {
105 XFS_CMP_DIFFERENT, /* names are completely different */
106 XFS_CMP_EXACT, /* names are exactly the same */
107 XFS_CMP_CASE /* names are same but differ in case */
108};
109
110/*
102 * Structure to ease passing around component names. 111 * Structure to ease passing around component names.
103 */ 112 */
104typedef struct xfs_da_args { 113typedef struct xfs_da_args {
@@ -123,13 +132,20 @@ typedef struct xfs_da_args {
123 int index2; /* index of 2nd attr in blk */ 132 int index2; /* index of 2nd attr in blk */
124 xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */ 133 xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */
125 int rmtblkcnt2; /* remote attr value block count */ 134 int rmtblkcnt2; /* remote attr value block count */
126 unsigned char justcheck; /* T/F: check for ok with no space */ 135 int op_flags; /* operation flags */
127 unsigned char rename; /* T/F: this is an atomic rename op */ 136 enum xfs_dacmp cmpresult; /* name compare result for lookups */
128 unsigned char addname; /* T/F: this is an add operation */
129 unsigned char oknoent; /* T/F: ok to return ENOENT, else die */
130} xfs_da_args_t; 137} xfs_da_args_t;
131 138
132/* 139/*
140 * Operation flags:
141 */
142#define XFS_DA_OP_JUSTCHECK 0x0001 /* check for ok with no space */
143#define XFS_DA_OP_RENAME 0x0002 /* this is an atomic rename op */
144#define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */
145#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */
146#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */
147
148/*
133 * Structure to describe buffer(s) for a block. 149 * Structure to describe buffer(s) for a block.
134 * This is needed in the directory version 2 format case, when 150 * This is needed in the directory version 2 format case, when
135 * multiple non-contiguous fsblocks might be needed to cover one 151 * multiple non-contiguous fsblocks might be needed to cover one
@@ -201,6 +217,14 @@ typedef struct xfs_da_state {
201 (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \ 217 (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \
202 (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1) 218 (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1)
203 219
220/*
221 * Name ops for directory and/or attr name operations
222 */
223struct xfs_nameops {
224 xfs_dahash_t (*hashname)(struct xfs_name *);
225 enum xfs_dacmp (*compname)(struct xfs_da_args *, const char *, int);
226};
227
204 228
205#ifdef __KERNEL__ 229#ifdef __KERNEL__
206/*======================================================================== 230/*========================================================================
@@ -249,6 +273,10 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
249 xfs_dabuf_t *dead_buf); 273 xfs_dabuf_t *dead_buf);
250 274
251uint xfs_da_hashname(const uchar_t *name_string, int name_length); 275uint xfs_da_hashname(const uchar_t *name_string, int name_length);
276enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
277 const char *name, int len);
278
279
252xfs_da_state_t *xfs_da_state_alloc(void); 280xfs_da_state_t *xfs_da_state_alloc(void);
253void xfs_da_state_free(xfs_da_state_t *state); 281void xfs_da_state_free(xfs_da_state_t *state);
254 282
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5f3647cb9885..2211e885ef24 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -116,7 +116,7 @@ xfs_swapext(
116 out_put_file: 116 out_put_file:
117 fput(file); 117 fput(file);
118 out_free_sxp: 118 out_free_sxp:
119 kmem_free(sxp, sizeof(xfs_swapext_t)); 119 kmem_free(sxp);
120 out: 120 out:
121 return error; 121 return error;
122} 122}
@@ -381,6 +381,6 @@ xfs_swap_extents(
381 xfs_iunlock(tip, lock_flags); 381 xfs_iunlock(tip, lock_flags);
382 } 382 }
383 if (tempifp != NULL) 383 if (tempifp != NULL)
384 kmem_free(tempifp, sizeof(xfs_ifork_t)); 384 kmem_free(tempifp);
385 return error; 385 return error;
386} 386}
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 7cb26529766b..80e0dc51361c 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -46,6 +46,54 @@
46 46
47struct xfs_name xfs_name_dotdot = {"..", 2}; 47struct xfs_name xfs_name_dotdot = {"..", 2};
48 48
49extern const struct xfs_nameops xfs_default_nameops;
50
51/*
52 * ASCII case-insensitive (ie. A-Z) support for directories that was
53 * used in IRIX.
54 */
55STATIC xfs_dahash_t
56xfs_ascii_ci_hashname(
57 struct xfs_name *name)
58{
59 xfs_dahash_t hash;
60 int i;
61
62 for (i = 0, hash = 0; i < name->len; i++)
63 hash = tolower(name->name[i]) ^ rol32(hash, 7);
64
65 return hash;
66}
67
68STATIC enum xfs_dacmp
69xfs_ascii_ci_compname(
70 struct xfs_da_args *args,
71 const char *name,
72 int len)
73{
74 enum xfs_dacmp result;
75 int i;
76
77 if (args->namelen != len)
78 return XFS_CMP_DIFFERENT;
79
80 result = XFS_CMP_EXACT;
81 for (i = 0; i < len; i++) {
82 if (args->name[i] == name[i])
83 continue;
84 if (tolower(args->name[i]) != tolower(name[i]))
85 return XFS_CMP_DIFFERENT;
86 result = XFS_CMP_CASE;
87 }
88
89 return result;
90}
91
92static struct xfs_nameops xfs_ascii_ci_nameops = {
93 .hashname = xfs_ascii_ci_hashname,
94 .compname = xfs_ascii_ci_compname,
95};
96
49void 97void
50xfs_dir_mount( 98xfs_dir_mount(
51 xfs_mount_t *mp) 99 xfs_mount_t *mp)
@@ -65,6 +113,10 @@ xfs_dir_mount(
65 (mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) / 113 (mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
66 (uint)sizeof(xfs_da_node_entry_t); 114 (uint)sizeof(xfs_da_node_entry_t);
67 mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100; 115 mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
116 if (xfs_sb_version_hasasciici(&mp->m_sb))
117 mp->m_dirnameops = &xfs_ascii_ci_nameops;
118 else
119 mp->m_dirnameops = &xfs_default_nameops;
68} 120}
69 121
70/* 122/*
@@ -162,9 +214,10 @@ xfs_dir_createname(
162 return rval; 214 return rval;
163 XFS_STATS_INC(xs_dir_create); 215 XFS_STATS_INC(xs_dir_create);
164 216
217 memset(&args, 0, sizeof(xfs_da_args_t));
165 args.name = name->name; 218 args.name = name->name;
166 args.namelen = name->len; 219 args.namelen = name->len;
167 args.hashval = xfs_da_hashname(name->name, name->len); 220 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
168 args.inumber = inum; 221 args.inumber = inum;
169 args.dp = dp; 222 args.dp = dp;
170 args.firstblock = first; 223 args.firstblock = first;
@@ -172,8 +225,7 @@ xfs_dir_createname(
172 args.total = total; 225 args.total = total;
173 args.whichfork = XFS_DATA_FORK; 226 args.whichfork = XFS_DATA_FORK;
174 args.trans = tp; 227 args.trans = tp;
175 args.justcheck = 0; 228 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
176 args.addname = args.oknoent = 1;
177 229
178 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 230 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
179 rval = xfs_dir2_sf_addname(&args); 231 rval = xfs_dir2_sf_addname(&args);
@@ -191,14 +243,43 @@ xfs_dir_createname(
191} 243}
192 244
193/* 245/*
246 * If doing a CI lookup and case-insensitive match, dup actual name into
247 * args.value. Return EEXIST for success (ie. name found) or an error.
248 */
249int
250xfs_dir_cilookup_result(
251 struct xfs_da_args *args,
252 const char *name,
253 int len)
254{
255 if (args->cmpresult == XFS_CMP_DIFFERENT)
256 return ENOENT;
257 if (args->cmpresult != XFS_CMP_CASE ||
258 !(args->op_flags & XFS_DA_OP_CILOOKUP))
259 return EEXIST;
260
261 args->value = kmem_alloc(len, KM_MAYFAIL);
262 if (!args->value)
263 return ENOMEM;
264
265 memcpy(args->value, name, len);
266 args->valuelen = len;
267 return EEXIST;
268}
269
270/*
194 * Lookup a name in a directory, give back the inode number. 271 * Lookup a name in a directory, give back the inode number.
272 * If ci_name is not NULL, returns the actual name in ci_name if it differs
273 * to name, or ci_name->name is set to NULL for an exact match.
195 */ 274 */
275
196int 276int
197xfs_dir_lookup( 277xfs_dir_lookup(
198 xfs_trans_t *tp, 278 xfs_trans_t *tp,
199 xfs_inode_t *dp, 279 xfs_inode_t *dp,
200 struct xfs_name *name, 280 struct xfs_name *name,
201 xfs_ino_t *inum) /* out: inode number */ 281 xfs_ino_t *inum, /* out: inode number */
282 struct xfs_name *ci_name) /* out: actual name if CI match */
202{ 283{
203 xfs_da_args_t args; 284 xfs_da_args_t args;
204 int rval; 285 int rval;
@@ -206,15 +287,17 @@ xfs_dir_lookup(
206 287
207 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 288 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
208 XFS_STATS_INC(xs_dir_lookup); 289 XFS_STATS_INC(xs_dir_lookup);
209 memset(&args, 0, sizeof(xfs_da_args_t));
210 290
291 memset(&args, 0, sizeof(xfs_da_args_t));
211 args.name = name->name; 292 args.name = name->name;
212 args.namelen = name->len; 293 args.namelen = name->len;
213 args.hashval = xfs_da_hashname(name->name, name->len); 294 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
214 args.dp = dp; 295 args.dp = dp;
215 args.whichfork = XFS_DATA_FORK; 296 args.whichfork = XFS_DATA_FORK;
216 args.trans = tp; 297 args.trans = tp;
217 args.oknoent = 1; 298 args.op_flags = XFS_DA_OP_OKNOENT;
299 if (ci_name)
300 args.op_flags |= XFS_DA_OP_CILOOKUP;
218 301
219 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 302 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
220 rval = xfs_dir2_sf_lookup(&args); 303 rval = xfs_dir2_sf_lookup(&args);
@@ -230,8 +313,13 @@ xfs_dir_lookup(
230 rval = xfs_dir2_node_lookup(&args); 313 rval = xfs_dir2_node_lookup(&args);
231 if (rval == EEXIST) 314 if (rval == EEXIST)
232 rval = 0; 315 rval = 0;
233 if (rval == 0) 316 if (!rval) {
234 *inum = args.inumber; 317 *inum = args.inumber;
318 if (ci_name) {
319 ci_name->name = args.value;
320 ci_name->len = args.valuelen;
321 }
322 }
235 return rval; 323 return rval;
236} 324}
237 325
@@ -255,9 +343,10 @@ xfs_dir_removename(
255 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 343 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
256 XFS_STATS_INC(xs_dir_remove); 344 XFS_STATS_INC(xs_dir_remove);
257 345
346 memset(&args, 0, sizeof(xfs_da_args_t));
258 args.name = name->name; 347 args.name = name->name;
259 args.namelen = name->len; 348 args.namelen = name->len;
260 args.hashval = xfs_da_hashname(name->name, name->len); 349 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
261 args.inumber = ino; 350 args.inumber = ino;
262 args.dp = dp; 351 args.dp = dp;
263 args.firstblock = first; 352 args.firstblock = first;
@@ -265,7 +354,6 @@ xfs_dir_removename(
265 args.total = total; 354 args.total = total;
266 args.whichfork = XFS_DATA_FORK; 355 args.whichfork = XFS_DATA_FORK;
267 args.trans = tp; 356 args.trans = tp;
268 args.justcheck = args.addname = args.oknoent = 0;
269 357
270 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 358 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
271 rval = xfs_dir2_sf_removename(&args); 359 rval = xfs_dir2_sf_removename(&args);
@@ -338,9 +426,10 @@ xfs_dir_replace(
338 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) 426 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
339 return rval; 427 return rval;
340 428
429 memset(&args, 0, sizeof(xfs_da_args_t));
341 args.name = name->name; 430 args.name = name->name;
342 args.namelen = name->len; 431 args.namelen = name->len;
343 args.hashval = xfs_da_hashname(name->name, name->len); 432 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
344 args.inumber = inum; 433 args.inumber = inum;
345 args.dp = dp; 434 args.dp = dp;
346 args.firstblock = first; 435 args.firstblock = first;
@@ -348,7 +437,6 @@ xfs_dir_replace(
348 args.total = total; 437 args.total = total;
349 args.whichfork = XFS_DATA_FORK; 438 args.whichfork = XFS_DATA_FORK;
350 args.trans = tp; 439 args.trans = tp;
351 args.justcheck = args.addname = args.oknoent = 0;
352 440
353 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 441 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
354 rval = xfs_dir2_sf_replace(&args); 442 rval = xfs_dir2_sf_replace(&args);
@@ -384,15 +472,16 @@ xfs_dir_canenter(
384 return 0; 472 return 0;
385 473
386 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 474 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
387 memset(&args, 0, sizeof(xfs_da_args_t));
388 475
476 memset(&args, 0, sizeof(xfs_da_args_t));
389 args.name = name->name; 477 args.name = name->name;
390 args.namelen = name->len; 478 args.namelen = name->len;
391 args.hashval = xfs_da_hashname(name->name, name->len); 479 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
392 args.dp = dp; 480 args.dp = dp;
393 args.whichfork = XFS_DATA_FORK; 481 args.whichfork = XFS_DATA_FORK;
394 args.trans = tp; 482 args.trans = tp;
395 args.justcheck = args.addname = args.oknoent = 1; 483 args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
484 XFS_DA_OP_OKNOENT;
396 485
397 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 486 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
398 rval = xfs_dir2_sf_addname(&args); 487 rval = xfs_dir2_sf_addname(&args);
@@ -493,7 +582,7 @@ xfs_dir2_grow_inode(
493 args->firstblock, args->total, 582 args->firstblock, args->total,
494 &mapp[mapi], &nmap, args->flist, 583 &mapp[mapi], &nmap, args->flist,
495 NULL))) { 584 NULL))) {
496 kmem_free(mapp, sizeof(*mapp) * count); 585 kmem_free(mapp);
497 return error; 586 return error;
498 } 587 }
499 if (nmap < 1) 588 if (nmap < 1)
@@ -525,14 +614,14 @@ xfs_dir2_grow_inode(
525 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 614 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
526 bno + count) { 615 bno + count) {
527 if (mapp != &map) 616 if (mapp != &map)
528 kmem_free(mapp, sizeof(*mapp) * count); 617 kmem_free(mapp);
529 return XFS_ERROR(ENOSPC); 618 return XFS_ERROR(ENOSPC);
530 } 619 }
531 /* 620 /*
532 * Done with the temporary mapping table. 621 * Done with the temporary mapping table.
533 */ 622 */
534 if (mapp != &map) 623 if (mapp != &map)
535 kmem_free(mapp, sizeof(*mapp) * count); 624 kmem_free(mapp);
536 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); 625 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
537 /* 626 /*
538 * Update file's size if this is the data space and it grew. 627 * Update file's size if this is the data space and it grew.
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 6392f939029f..1d9ef96f33aa 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -74,7 +74,8 @@ extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
74 xfs_fsblock_t *first, 74 xfs_fsblock_t *first,
75 struct xfs_bmap_free *flist, xfs_extlen_t tot); 75 struct xfs_bmap_free *flist, xfs_extlen_t tot);
76extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, 76extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
77 struct xfs_name *name, xfs_ino_t *inum); 77 struct xfs_name *name, xfs_ino_t *inum,
78 struct xfs_name *ci_name);
78extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, 79extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
79 struct xfs_name *name, xfs_ino_t ino, 80 struct xfs_name *name, xfs_ino_t ino,
80 xfs_fsblock_t *first, 81 xfs_fsblock_t *first,
@@ -99,4 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
99extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, 100extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
100 struct xfs_dabuf *bp); 101 struct xfs_dabuf *bp);
101 102
103extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name,
104 int len);
105
102#endif /* __XFS_DIR2_H__ */ 106#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index fb5a556725b3..e2fa0a1d8e96 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -215,7 +215,7 @@ xfs_dir2_block_addname(
215 /* 215 /*
216 * If this isn't a real add, we're done with the buffer. 216 * If this isn't a real add, we're done with the buffer.
217 */ 217 */
218 if (args->justcheck) 218 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
219 xfs_da_brelse(tp, bp); 219 xfs_da_brelse(tp, bp);
220 /* 220 /*
221 * If we don't have space for the new entry & leaf ... 221 * If we don't have space for the new entry & leaf ...
@@ -225,7 +225,7 @@ xfs_dir2_block_addname(
225 * Not trying to actually do anything, or don't have 225 * Not trying to actually do anything, or don't have
226 * a space reservation: return no-space. 226 * a space reservation: return no-space.
227 */ 227 */
228 if (args->justcheck || args->total == 0) 228 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
229 return XFS_ERROR(ENOSPC); 229 return XFS_ERROR(ENOSPC);
230 /* 230 /*
231 * Convert to the next larger format. 231 * Convert to the next larger format.
@@ -240,7 +240,7 @@ xfs_dir2_block_addname(
240 /* 240 /*
241 * Just checking, and it would work, so say so. 241 * Just checking, and it would work, so say so.
242 */ 242 */
243 if (args->justcheck) 243 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
244 return 0; 244 return 0;
245 needlog = needscan = 0; 245 needlog = needscan = 0;
246 /* 246 /*
@@ -610,14 +610,15 @@ xfs_dir2_block_lookup(
610 /* 610 /*
611 * Get the offset from the leaf entry, to point to the data. 611 * Get the offset from the leaf entry, to point to the data.
612 */ 612 */
613 dep = (xfs_dir2_data_entry_t *) 613 dep = (xfs_dir2_data_entry_t *)((char *)block +
614 ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); 614 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
615 /* 615 /*
616 * Fill in inode number, release the block. 616 * Fill in inode number, CI name if appropriate, release the block.
617 */ 617 */
618 args->inumber = be64_to_cpu(dep->inumber); 618 args->inumber = be64_to_cpu(dep->inumber);
619 error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
619 xfs_da_brelse(args->trans, bp); 620 xfs_da_brelse(args->trans, bp);
620 return XFS_ERROR(EEXIST); 621 return XFS_ERROR(error);
621} 622}
622 623
623/* 624/*
@@ -643,6 +644,7 @@ xfs_dir2_block_lookup_int(
643 int mid; /* binary search current idx */ 644 int mid; /* binary search current idx */
644 xfs_mount_t *mp; /* filesystem mount point */ 645 xfs_mount_t *mp; /* filesystem mount point */
645 xfs_trans_t *tp; /* transaction pointer */ 646 xfs_trans_t *tp; /* transaction pointer */
647 enum xfs_dacmp cmp; /* comparison result */
646 648
647 dp = args->dp; 649 dp = args->dp;
648 tp = args->trans; 650 tp = args->trans;
@@ -673,7 +675,7 @@ xfs_dir2_block_lookup_int(
673 else 675 else
674 high = mid - 1; 676 high = mid - 1;
675 if (low > high) { 677 if (low > high) {
676 ASSERT(args->oknoent); 678 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
677 xfs_da_brelse(tp, bp); 679 xfs_da_brelse(tp, bp);
678 return XFS_ERROR(ENOENT); 680 return XFS_ERROR(ENOENT);
679 } 681 }
@@ -697,20 +699,31 @@ xfs_dir2_block_lookup_int(
697 dep = (xfs_dir2_data_entry_t *) 699 dep = (xfs_dir2_data_entry_t *)
698 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); 700 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
699 /* 701 /*
700 * Compare, if it's right give back buffer & entry number. 702 * Compare name and if it's an exact match, return the index
703 * and buffer. If it's the first case-insensitive match, store
704 * the index and buffer and continue looking for an exact match.
701 */ 705 */
702 if (dep->namelen == args->namelen && 706 cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
703 dep->name[0] == args->name[0] && 707 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
704 memcmp(dep->name, args->name, args->namelen) == 0) { 708 args->cmpresult = cmp;
705 *bpp = bp; 709 *bpp = bp;
706 *entno = mid; 710 *entno = mid;
707 return 0; 711 if (cmp == XFS_CMP_EXACT)
712 return 0;
708 } 713 }
709 } while (++mid < be32_to_cpu(btp->count) && be32_to_cpu(blp[mid].hashval) == hash); 714 } while (++mid < be32_to_cpu(btp->count) &&
715 be32_to_cpu(blp[mid].hashval) == hash);
716
717 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
718 /*
719 * Here, we can only be doing a lookup (not a rename or replace).
720 * If a case-insensitive match was found earlier, return success.
721 */
722 if (args->cmpresult == XFS_CMP_CASE)
723 return 0;
710 /* 724 /*
711 * No match, release the buffer and return ENOENT. 725 * No match, release the buffer and return ENOENT.
712 */ 726 */
713 ASSERT(args->oknoent);
714 xfs_da_brelse(tp, bp); 727 xfs_da_brelse(tp, bp);
715 return XFS_ERROR(ENOENT); 728 return XFS_ERROR(ENOENT);
716} 729}
@@ -1033,6 +1046,7 @@ xfs_dir2_sf_to_block(
1033 xfs_dir2_sf_t *sfp; /* shortform structure */ 1046 xfs_dir2_sf_t *sfp; /* shortform structure */
1034 __be16 *tagp; /* end of data entry */ 1047 __be16 *tagp; /* end of data entry */
1035 xfs_trans_t *tp; /* transaction pointer */ 1048 xfs_trans_t *tp; /* transaction pointer */
1049 struct xfs_name name;
1036 1050
1037 xfs_dir2_trace_args("sf_to_block", args); 1051 xfs_dir2_trace_args("sf_to_block", args);
1038 dp = args->dp; 1052 dp = args->dp;
@@ -1071,7 +1085,7 @@ xfs_dir2_sf_to_block(
1071 */ 1085 */
1072 error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); 1086 error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
1073 if (error) { 1087 if (error) {
1074 kmem_free(buf, buf_len); 1088 kmem_free(buf);
1075 return error; 1089 return error;
1076 } 1090 }
1077 /* 1091 /*
@@ -1079,7 +1093,7 @@ xfs_dir2_sf_to_block(
1079 */ 1093 */
1080 error = xfs_dir2_data_init(args, blkno, &bp); 1094 error = xfs_dir2_data_init(args, blkno, &bp);
1081 if (error) { 1095 if (error) {
1082 kmem_free(buf, buf_len); 1096 kmem_free(buf);
1083 return error; 1097 return error;
1084 } 1098 }
1085 block = bp->data; 1099 block = bp->data;
@@ -1187,8 +1201,10 @@ xfs_dir2_sf_to_block(
1187 tagp = xfs_dir2_data_entry_tag_p(dep); 1201 tagp = xfs_dir2_data_entry_tag_p(dep);
1188 *tagp = cpu_to_be16((char *)dep - (char *)block); 1202 *tagp = cpu_to_be16((char *)dep - (char *)block);
1189 xfs_dir2_data_log_entry(tp, bp, dep); 1203 xfs_dir2_data_log_entry(tp, bp, dep);
1190 blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname( 1204 name.name = sfep->name;
1191 (char *)sfep->name, sfep->namelen)); 1205 name.len = sfep->namelen;
1206 blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
1207 hashname(&name));
1192 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1208 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1193 (char *)dep - (char *)block)); 1209 (char *)dep - (char *)block));
1194 offset = (int)((char *)(tagp + 1) - (char *)block); 1210 offset = (int)((char *)(tagp + 1) - (char *)block);
@@ -1198,7 +1214,7 @@ xfs_dir2_sf_to_block(
1198 sfep = xfs_dir2_sf_nextentry(sfp, sfep); 1214 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
1199 } 1215 }
1200 /* Done with the temporary buffer */ 1216 /* Done with the temporary buffer */
1201 kmem_free(buf, buf_len); 1217 kmem_free(buf);
1202 /* 1218 /*
1203 * Sort the leaf entries by hash value. 1219 * Sort the leaf entries by hash value.
1204 */ 1220 */
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index fb8c9e08b23d..498f8d694330 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -65,6 +65,7 @@ xfs_dir2_data_check(
65 xfs_mount_t *mp; /* filesystem mount point */ 65 xfs_mount_t *mp; /* filesystem mount point */
66 char *p; /* current data position */ 66 char *p; /* current data position */
67 int stale; /* count of stale leaves */ 67 int stale; /* count of stale leaves */
68 struct xfs_name name;
68 69
69 mp = dp->i_mount; 70 mp = dp->i_mount;
70 d = bp->data; 71 d = bp->data;
@@ -140,7 +141,9 @@ xfs_dir2_data_check(
140 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 141 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
141 (xfs_dir2_data_aoff_t) 142 (xfs_dir2_data_aoff_t)
142 ((char *)dep - (char *)d)); 143 ((char *)dep - (char *)d));
143 hash = xfs_da_hashname((char *)dep->name, dep->namelen); 144 name.name = dep->name;
145 name.len = dep->namelen;
146 hash = mp->m_dirnameops->hashname(&name);
144 for (i = 0; i < be32_to_cpu(btp->count); i++) { 147 for (i = 0; i < be32_to_cpu(btp->count); i++) {
145 if (be32_to_cpu(lep[i].address) == addr && 148 if (be32_to_cpu(lep[i].address) == addr &&
146 be32_to_cpu(lep[i].hashval) == hash) 149 be32_to_cpu(lep[i].hashval) == hash)
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index bc52b803d79b..93535992cb60 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -263,20 +263,21 @@ xfs_dir2_leaf_addname(
263 * If we don't have enough free bytes but we can make enough 263 * If we don't have enough free bytes but we can make enough
264 * by compacting out stale entries, we'll do that. 264 * by compacting out stale entries, we'll do that.
265 */ 265 */
266 if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < needbytes && 266 if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
267 be16_to_cpu(leaf->hdr.stale) > 1) { 267 needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
268 compact = 1; 268 compact = 1;
269 } 269 }
270 /* 270 /*
271 * Otherwise if we don't have enough free bytes we need to 271 * Otherwise if we don't have enough free bytes we need to
272 * convert to node form. 272 * convert to node form.
273 */ 273 */
274 else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < 274 else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
275 needbytes) { 275 leaf->hdr.count)] < needbytes) {
276 /* 276 /*
277 * Just checking or no space reservation, give up. 277 * Just checking or no space reservation, give up.
278 */ 278 */
279 if (args->justcheck || args->total == 0) { 279 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
280 args->total == 0) {
280 xfs_da_brelse(tp, lbp); 281 xfs_da_brelse(tp, lbp);
281 return XFS_ERROR(ENOSPC); 282 return XFS_ERROR(ENOSPC);
282 } 283 }
@@ -301,7 +302,7 @@ xfs_dir2_leaf_addname(
301 * If just checking, then it will fit unless we needed to allocate 302 * If just checking, then it will fit unless we needed to allocate
302 * a new data block. 303 * a new data block.
303 */ 304 */
304 if (args->justcheck) { 305 if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
305 xfs_da_brelse(tp, lbp); 306 xfs_da_brelse(tp, lbp);
306 return use_block == -1 ? XFS_ERROR(ENOSPC) : 0; 307 return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
307 } 308 }
@@ -1110,7 +1111,7 @@ xfs_dir2_leaf_getdents(
1110 *offset = XFS_DIR2_MAX_DATAPTR; 1111 *offset = XFS_DIR2_MAX_DATAPTR;
1111 else 1112 else
1112 *offset = xfs_dir2_byte_to_dataptr(mp, curoff); 1113 *offset = xfs_dir2_byte_to_dataptr(mp, curoff);
1113 kmem_free(map, map_size * sizeof(*map)); 1114 kmem_free(map);
1114 if (bp) 1115 if (bp)
1115 xfs_da_brelse(NULL, bp); 1116 xfs_da_brelse(NULL, bp);
1116 return error; 1117 return error;
@@ -1298,12 +1299,13 @@ xfs_dir2_leaf_lookup(
1298 ((char *)dbp->data + 1299 ((char *)dbp->data +
1299 xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); 1300 xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1300 /* 1301 /*
1301 * Return the found inode number. 1302 * Return the found inode number & CI name if appropriate
1302 */ 1303 */
1303 args->inumber = be64_to_cpu(dep->inumber); 1304 args->inumber = be64_to_cpu(dep->inumber);
1305 error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1304 xfs_da_brelse(tp, dbp); 1306 xfs_da_brelse(tp, dbp);
1305 xfs_da_brelse(tp, lbp); 1307 xfs_da_brelse(tp, lbp);
1306 return XFS_ERROR(EEXIST); 1308 return XFS_ERROR(error);
1307} 1309}
1308 1310
1309/* 1311/*
@@ -1319,8 +1321,8 @@ xfs_dir2_leaf_lookup_int(
1319 int *indexp, /* out: index in leaf block */ 1321 int *indexp, /* out: index in leaf block */
1320 xfs_dabuf_t **dbpp) /* out: data buffer */ 1322 xfs_dabuf_t **dbpp) /* out: data buffer */
1321{ 1323{
1322 xfs_dir2_db_t curdb; /* current data block number */ 1324 xfs_dir2_db_t curdb = -1; /* current data block number */
1323 xfs_dabuf_t *dbp; /* data buffer */ 1325 xfs_dabuf_t *dbp = NULL; /* data buffer */
1324 xfs_dir2_data_entry_t *dep; /* data entry */ 1326 xfs_dir2_data_entry_t *dep; /* data entry */
1325 xfs_inode_t *dp; /* incore directory inode */ 1327 xfs_inode_t *dp; /* incore directory inode */
1326 int error; /* error return code */ 1328 int error; /* error return code */
@@ -1331,6 +1333,8 @@ xfs_dir2_leaf_lookup_int(
1331 xfs_mount_t *mp; /* filesystem mount point */ 1333 xfs_mount_t *mp; /* filesystem mount point */
1332 xfs_dir2_db_t newdb; /* new data block number */ 1334 xfs_dir2_db_t newdb; /* new data block number */
1333 xfs_trans_t *tp; /* transaction pointer */ 1335 xfs_trans_t *tp; /* transaction pointer */
1336 xfs_dir2_db_t cidb = -1; /* case match data block no. */
1337 enum xfs_dacmp cmp; /* name compare result */
1334 1338
1335 dp = args->dp; 1339 dp = args->dp;
1336 tp = args->trans; 1340 tp = args->trans;
@@ -1338,11 +1342,10 @@ xfs_dir2_leaf_lookup_int(
1338 /* 1342 /*
1339 * Read the leaf block into the buffer. 1343 * Read the leaf block into the buffer.
1340 */ 1344 */
1341 if ((error = 1345 error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
1342 xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp, 1346 XFS_DATA_FORK);
1343 XFS_DATA_FORK))) { 1347 if (error)
1344 return error; 1348 return error;
1345 }
1346 *lbpp = lbp; 1349 *lbpp = lbp;
1347 leaf = lbp->data; 1350 leaf = lbp->data;
1348 xfs_dir2_leaf_check(dp, lbp); 1351 xfs_dir2_leaf_check(dp, lbp);
@@ -1354,9 +1357,9 @@ xfs_dir2_leaf_lookup_int(
1354 * Loop over all the entries with the right hash value 1357 * Loop over all the entries with the right hash value
1355 * looking to match the name. 1358 * looking to match the name.
1356 */ 1359 */
1357 for (lep = &leaf->ents[index], dbp = NULL, curdb = -1; 1360 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
1358 index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval; 1361 be32_to_cpu(lep->hashval) == args->hashval;
1359 lep++, index++) { 1362 lep++, index++) {
1360 /* 1363 /*
1361 * Skip over stale leaf entries. 1364 * Skip over stale leaf entries.
1362 */ 1365 */
@@ -1373,10 +1376,10 @@ xfs_dir2_leaf_lookup_int(
1373 if (newdb != curdb) { 1376 if (newdb != curdb) {
1374 if (dbp) 1377 if (dbp)
1375 xfs_da_brelse(tp, dbp); 1378 xfs_da_brelse(tp, dbp);
1376 if ((error = 1379 error = xfs_da_read_buf(tp, dp,
1377 xfs_da_read_buf(tp, dp, 1380 xfs_dir2_db_to_da(mp, newdb),
1378 xfs_dir2_db_to_da(mp, newdb), -1, &dbp, 1381 -1, &dbp, XFS_DATA_FORK);
1379 XFS_DATA_FORK))) { 1382 if (error) {
1380 xfs_da_brelse(tp, lbp); 1383 xfs_da_brelse(tp, lbp);
1381 return error; 1384 return error;
1382 } 1385 }
@@ -1386,24 +1389,50 @@ xfs_dir2_leaf_lookup_int(
1386 /* 1389 /*
1387 * Point to the data entry. 1390 * Point to the data entry.
1388 */ 1391 */
1389 dep = (xfs_dir2_data_entry_t *) 1392 dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
1390 ((char *)dbp->data + 1393 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1391 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1392 /* 1394 /*
1393 * If it matches then return it. 1395 * Compare name and if it's an exact match, return the index
1396 * and buffer. If it's the first case-insensitive match, store
1397 * the index and buffer and continue looking for an exact match.
1394 */ 1398 */
1395 if (dep->namelen == args->namelen && 1399 cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
1396 dep->name[0] == args->name[0] && 1400 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
1397 memcmp(dep->name, args->name, args->namelen) == 0) { 1401 args->cmpresult = cmp;
1398 *dbpp = dbp;
1399 *indexp = index; 1402 *indexp = index;
1400 return 0; 1403 /* case exact match: return the current buffer. */
1404 if (cmp == XFS_CMP_EXACT) {
1405 *dbpp = dbp;
1406 return 0;
1407 }
1408 cidb = curdb;
1401 } 1409 }
1402 } 1410 }
1411 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1412 /*
1413 * Here, we can only be doing a lookup (not a rename or remove).
1414 * If a case-insensitive match was found earlier, re-read the
1415 * appropriate data block if required and return it.
1416 */
1417 if (args->cmpresult == XFS_CMP_CASE) {
1418 ASSERT(cidb != -1);
1419 if (cidb != curdb) {
1420 xfs_da_brelse(tp, dbp);
1421 error = xfs_da_read_buf(tp, dp,
1422 xfs_dir2_db_to_da(mp, cidb),
1423 -1, &dbp, XFS_DATA_FORK);
1424 if (error) {
1425 xfs_da_brelse(tp, lbp);
1426 return error;
1427 }
1428 }
1429 *dbpp = dbp;
1430 return 0;
1431 }
1403 /* 1432 /*
1404 * No match found, return ENOENT. 1433 * No match found, return ENOENT.
1405 */ 1434 */
1406 ASSERT(args->oknoent); 1435 ASSERT(cidb == -1);
1407 if (dbp) 1436 if (dbp)
1408 xfs_da_brelse(tp, dbp); 1437 xfs_da_brelse(tp, dbp);
1409 xfs_da_brelse(tp, lbp); 1438 xfs_da_brelse(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 8dade711f099..fa6c3a5ddbc6 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -226,7 +226,7 @@ xfs_dir2_leafn_add(
226 ASSERT(index == be16_to_cpu(leaf->hdr.count) || 226 ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
227 be32_to_cpu(leaf->ents[index].hashval) >= args->hashval); 227 be32_to_cpu(leaf->ents[index].hashval) >= args->hashval);
228 228
229 if (args->justcheck) 229 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
230 return 0; 230 return 0;
231 231
232 /* 232 /*
@@ -387,28 +387,26 @@ xfs_dir2_leafn_lasthash(
387} 387}
388 388
389/* 389/*
390 * Look up a leaf entry in a node-format leaf block. 390 * Look up a leaf entry for space to add a name in a node-format leaf block.
391 * If this is an addname then the extrablk in state is a freespace block, 391 * The extrablk in state is a freespace block.
392 * otherwise it's a data block.
393 */ 392 */
394int 393STATIC int
395xfs_dir2_leafn_lookup_int( 394xfs_dir2_leafn_lookup_for_addname(
396 xfs_dabuf_t *bp, /* leaf buffer */ 395 xfs_dabuf_t *bp, /* leaf buffer */
397 xfs_da_args_t *args, /* operation arguments */ 396 xfs_da_args_t *args, /* operation arguments */
398 int *indexp, /* out: leaf entry index */ 397 int *indexp, /* out: leaf entry index */
399 xfs_da_state_t *state) /* state to fill in */ 398 xfs_da_state_t *state) /* state to fill in */
400{ 399{
401 xfs_dabuf_t *curbp; /* current data/free buffer */ 400 xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
402 xfs_dir2_db_t curdb; /* current data block number */ 401 xfs_dir2_db_t curdb = -1; /* current data block number */
403 xfs_dir2_db_t curfdb; /* current free block number */ 402 xfs_dir2_db_t curfdb = -1; /* current free block number */
404 xfs_dir2_data_entry_t *dep; /* data block entry */
405 xfs_inode_t *dp; /* incore directory inode */ 403 xfs_inode_t *dp; /* incore directory inode */
406 int error; /* error return value */ 404 int error; /* error return value */
407 int fi; /* free entry index */ 405 int fi; /* free entry index */
408 xfs_dir2_free_t *free=NULL; /* free block structure */ 406 xfs_dir2_free_t *free = NULL; /* free block structure */
409 int index; /* leaf entry index */ 407 int index; /* leaf entry index */
410 xfs_dir2_leaf_t *leaf; /* leaf structure */ 408 xfs_dir2_leaf_t *leaf; /* leaf structure */
411 int length=0; /* length of new data entry */ 409 int length; /* length of new data entry */
412 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 410 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
413 xfs_mount_t *mp; /* filesystem mount point */ 411 xfs_mount_t *mp; /* filesystem mount point */
414 xfs_dir2_db_t newdb; /* new data block number */ 412 xfs_dir2_db_t newdb; /* new data block number */
@@ -431,33 +429,20 @@ xfs_dir2_leafn_lookup_int(
431 /* 429 /*
432 * Do we have a buffer coming in? 430 * Do we have a buffer coming in?
433 */ 431 */
434 if (state->extravalid) 432 if (state->extravalid) {
433 /* If so, it's a free block buffer, get the block number. */
435 curbp = state->extrablk.bp; 434 curbp = state->extrablk.bp;
436 else 435 curfdb = state->extrablk.blkno;
437 curbp = NULL; 436 free = curbp->data;
438 /* 437 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
439 * For addname, it's a free block buffer, get the block number.
440 */
441 if (args->addname) {
442 curfdb = curbp ? state->extrablk.blkno : -1;
443 curdb = -1;
444 length = xfs_dir2_data_entsize(args->namelen);
445 if ((free = (curbp ? curbp->data : NULL)))
446 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
447 }
448 /*
449 * For others, it's a data block buffer, get the block number.
450 */
451 else {
452 curfdb = -1;
453 curdb = curbp ? state->extrablk.blkno : -1;
454 } 438 }
439 length = xfs_dir2_data_entsize(args->namelen);
455 /* 440 /*
456 * Loop over leaf entries with the right hash value. 441 * Loop over leaf entries with the right hash value.
457 */ 442 */
458 for (lep = &leaf->ents[index]; 443 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
459 index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval; 444 be32_to_cpu(lep->hashval) == args->hashval;
460 lep++, index++) { 445 lep++, index++) {
461 /* 446 /*
462 * Skip stale leaf entries. 447 * Skip stale leaf entries.
463 */ 448 */
@@ -471,161 +456,244 @@ xfs_dir2_leafn_lookup_int(
471 * For addname, we're looking for a place to put the new entry. 456 * For addname, we're looking for a place to put the new entry.
472 * We want to use a data block with an entry of equal 457 * We want to use a data block with an entry of equal
473 * hash value to ours if there is one with room. 458 * hash value to ours if there is one with room.
459 *
460 * If this block isn't the data block we already have
461 * in hand, take a look at it.
474 */ 462 */
475 if (args->addname) { 463 if (newdb != curdb) {
464 curdb = newdb;
476 /* 465 /*
477 * If this block isn't the data block we already have 466 * Convert the data block to the free block
478 * in hand, take a look at it. 467 * holding its freespace information.
479 */ 468 */
480 if (newdb != curdb) { 469 newfdb = xfs_dir2_db_to_fdb(mp, newdb);
481 curdb = newdb;
482 /*
483 * Convert the data block to the free block
484 * holding its freespace information.
485 */
486 newfdb = xfs_dir2_db_to_fdb(mp, newdb);
487 /*
488 * If it's not the one we have in hand,
489 * read it in.
490 */
491 if (newfdb != curfdb) {
492 /*
493 * If we had one before, drop it.
494 */
495 if (curbp)
496 xfs_da_brelse(tp, curbp);
497 /*
498 * Read the free block.
499 */
500 if ((error = xfs_da_read_buf(tp, dp,
501 xfs_dir2_db_to_da(mp,
502 newfdb),
503 -1, &curbp,
504 XFS_DATA_FORK))) {
505 return error;
506 }
507 free = curbp->data;
508 ASSERT(be32_to_cpu(free->hdr.magic) ==
509 XFS_DIR2_FREE_MAGIC);
510 ASSERT((be32_to_cpu(free->hdr.firstdb) %
511 XFS_DIR2_MAX_FREE_BESTS(mp)) ==
512 0);
513 ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
514 ASSERT(curdb <
515 be32_to_cpu(free->hdr.firstdb) +
516 be32_to_cpu(free->hdr.nvalid));
517 }
518 /*
519 * Get the index for our entry.
520 */
521 fi = xfs_dir2_db_to_fdindex(mp, curdb);
522 /*
523 * If it has room, return it.
524 */
525 if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
526 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
527 XFS_ERRLEVEL_LOW, mp);
528 if (curfdb != newfdb)
529 xfs_da_brelse(tp, curbp);
530 return XFS_ERROR(EFSCORRUPTED);
531 }
532 curfdb = newfdb;
533 if (be16_to_cpu(free->bests[fi]) >= length) {
534 *indexp = index;
535 state->extravalid = 1;
536 state->extrablk.bp = curbp;
537 state->extrablk.blkno = curfdb;
538 state->extrablk.index = fi;
539 state->extrablk.magic =
540 XFS_DIR2_FREE_MAGIC;
541 ASSERT(args->oknoent);
542 return XFS_ERROR(ENOENT);
543 }
544 }
545 }
546 /*
547 * Not adding a new entry, so we really want to find
548 * the name given to us.
549 */
550 else {
551 /* 470 /*
552 * If it's a different data block, go get it. 471 * If it's not the one we have in hand, read it in.
553 */ 472 */
554 if (newdb != curdb) { 473 if (newfdb != curfdb) {
555 /* 474 /*
556 * If we had a block before, drop it. 475 * If we had one before, drop it.
557 */ 476 */
558 if (curbp) 477 if (curbp)
559 xfs_da_brelse(tp, curbp); 478 xfs_da_brelse(tp, curbp);
560 /* 479 /*
561 * Read the data block. 480 * Read the free block.
562 */ 481 */
563 if ((error = 482 error = xfs_da_read_buf(tp, dp,
564 xfs_da_read_buf(tp, dp, 483 xfs_dir2_db_to_da(mp, newfdb),
565 xfs_dir2_db_to_da(mp, newdb), -1, 484 -1, &curbp, XFS_DATA_FORK);
566 &curbp, XFS_DATA_FORK))) { 485 if (error)
567 return error; 486 return error;
568 } 487 free = curbp->data;
569 xfs_dir2_data_check(dp, curbp); 488 ASSERT(be32_to_cpu(free->hdr.magic) ==
570 curdb = newdb; 489 XFS_DIR2_FREE_MAGIC);
490 ASSERT((be32_to_cpu(free->hdr.firstdb) %
491 XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
492 ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
493 ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
494 be32_to_cpu(free->hdr.nvalid));
571 } 495 }
572 /* 496 /*
573 * Point to the data entry. 497 * Get the index for our entry.
574 */ 498 */
575 dep = (xfs_dir2_data_entry_t *) 499 fi = xfs_dir2_db_to_fdindex(mp, curdb);
576 ((char *)curbp->data +
577 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
578 /* 500 /*
579 * Compare the entry, return it if it matches. 501 * If it has room, return it.
580 */ 502 */
581 if (dep->namelen == args->namelen && 503 if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
582 dep->name[0] == args->name[0] && 504 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
583 memcmp(dep->name, args->name, args->namelen) == 0) { 505 XFS_ERRLEVEL_LOW, mp);
584 args->inumber = be64_to_cpu(dep->inumber); 506 if (curfdb != newfdb)
585 *indexp = index; 507 xfs_da_brelse(tp, curbp);
586 state->extravalid = 1; 508 return XFS_ERROR(EFSCORRUPTED);
587 state->extrablk.bp = curbp;
588 state->extrablk.blkno = curdb;
589 state->extrablk.index =
590 (int)((char *)dep -
591 (char *)curbp->data);
592 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
593 return XFS_ERROR(EEXIST);
594 } 509 }
510 curfdb = newfdb;
511 if (be16_to_cpu(free->bests[fi]) >= length)
512 goto out;
595 } 513 }
596 } 514 }
515 /* Didn't find any space */
516 fi = -1;
517out:
518 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
519 if (curbp) {
520 /* Giving back a free block. */
521 state->extravalid = 1;
522 state->extrablk.bp = curbp;
523 state->extrablk.index = fi;
524 state->extrablk.blkno = curfdb;
525 state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
526 } else {
527 state->extravalid = 0;
528 }
597 /* 529 /*
598 * Didn't find a match. 530 * Return the index, that will be the insertion point.
599 * If we are holding a buffer, give it back in case our caller
600 * finds it useful.
601 */ 531 */
602 if ((state->extravalid = (curbp != NULL))) { 532 *indexp = index;
603 state->extrablk.bp = curbp; 533 return XFS_ERROR(ENOENT);
604 state->extrablk.index = -1; 534}
535
536/*
537 * Look up a leaf entry in a node-format leaf block.
538 * The extrablk in state a data block.
539 */
540STATIC int
541xfs_dir2_leafn_lookup_for_entry(
542 xfs_dabuf_t *bp, /* leaf buffer */
543 xfs_da_args_t *args, /* operation arguments */
544 int *indexp, /* out: leaf entry index */
545 xfs_da_state_t *state) /* state to fill in */
546{
547 xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
548 xfs_dir2_db_t curdb = -1; /* current data block number */
549 xfs_dir2_data_entry_t *dep; /* data block entry */
550 xfs_inode_t *dp; /* incore directory inode */
551 int error; /* error return value */
552 int index; /* leaf entry index */
553 xfs_dir2_leaf_t *leaf; /* leaf structure */
554 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
555 xfs_mount_t *mp; /* filesystem mount point */
556 xfs_dir2_db_t newdb; /* new data block number */
557 xfs_trans_t *tp; /* transaction pointer */
558 enum xfs_dacmp cmp; /* comparison result */
559
560 dp = args->dp;
561 tp = args->trans;
562 mp = dp->i_mount;
563 leaf = bp->data;
564 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
565#ifdef __KERNEL__
566 ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
567#endif
568 xfs_dir2_leafn_check(dp, bp);
569 /*
570 * Look up the hash value in the leaf entries.
571 */
572 index = xfs_dir2_leaf_search_hash(args, bp);
573 /*
574 * Do we have a buffer coming in?
575 */
576 if (state->extravalid) {
577 curbp = state->extrablk.bp;
578 curdb = state->extrablk.blkno;
579 }
580 /*
581 * Loop over leaf entries with the right hash value.
582 */
583 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
584 be32_to_cpu(lep->hashval) == args->hashval;
585 lep++, index++) {
605 /* 586 /*
606 * For addname, giving back a free block. 587 * Skip stale leaf entries.
607 */ 588 */
608 if (args->addname) { 589 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
609 state->extrablk.blkno = curfdb; 590 continue;
610 state->extrablk.magic = XFS_DIR2_FREE_MAGIC; 591 /*
592 * Pull the data block number from the entry.
593 */
594 newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
595 /*
596 * Not adding a new entry, so we really want to find
597 * the name given to us.
598 *
599 * If it's a different data block, go get it.
600 */
601 if (newdb != curdb) {
602 /*
603 * If we had a block before that we aren't saving
604 * for a CI name, drop it
605 */
606 if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
607 curdb != state->extrablk.blkno))
608 xfs_da_brelse(tp, curbp);
609 /*
610 * If needing the block that is saved with a CI match,
611 * use it otherwise read in the new data block.
612 */
613 if (args->cmpresult != XFS_CMP_DIFFERENT &&
614 newdb == state->extrablk.blkno) {
615 ASSERT(state->extravalid);
616 curbp = state->extrablk.bp;
617 } else {
618 error = xfs_da_read_buf(tp, dp,
619 xfs_dir2_db_to_da(mp, newdb),
620 -1, &curbp, XFS_DATA_FORK);
621 if (error)
622 return error;
623 }
624 xfs_dir2_data_check(dp, curbp);
625 curdb = newdb;
611 } 626 }
612 /* 627 /*
613 * For other callers, giving back a data block. 628 * Point to the data entry.
614 */ 629 */
615 else { 630 dep = (xfs_dir2_data_entry_t *)((char *)curbp->data +
631 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
632 /*
633 * Compare the entry and if it's an exact match, return
634 * EEXIST immediately. If it's the first case-insensitive
635 * match, store the block & inode number and continue looking.
636 */
637 cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
638 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
639 /* If there is a CI match block, drop it */
640 if (args->cmpresult != XFS_CMP_DIFFERENT &&
641 curdb != state->extrablk.blkno)
642 xfs_da_brelse(tp, state->extrablk.bp);
643 args->cmpresult = cmp;
644 args->inumber = be64_to_cpu(dep->inumber);
645 *indexp = index;
646 state->extravalid = 1;
647 state->extrablk.bp = curbp;
616 state->extrablk.blkno = curdb; 648 state->extrablk.blkno = curdb;
649 state->extrablk.index = (int)((char *)dep -
650 (char *)curbp->data);
617 state->extrablk.magic = XFS_DIR2_DATA_MAGIC; 651 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
652 if (cmp == XFS_CMP_EXACT)
653 return XFS_ERROR(EEXIST);
618 } 654 }
619 } 655 }
620 /* 656 ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
621 * Return the final index, that will be the insertion point. 657 (args->op_flags & XFS_DA_OP_OKNOENT));
622 */ 658 if (curbp) {
659 if (args->cmpresult == XFS_CMP_DIFFERENT) {
660 /* Giving back last used data block. */
661 state->extravalid = 1;
662 state->extrablk.bp = curbp;
663 state->extrablk.index = -1;
664 state->extrablk.blkno = curdb;
665 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
666 } else {
667 /* If the curbp is not the CI match block, drop it */
668 if (state->extrablk.bp != curbp)
669 xfs_da_brelse(tp, curbp);
670 }
671 } else {
672 state->extravalid = 0;
673 }
623 *indexp = index; 674 *indexp = index;
624 ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
625 return XFS_ERROR(ENOENT); 675 return XFS_ERROR(ENOENT);
626} 676}
627 677
628/* 678/*
679 * Look up a leaf entry in a node-format leaf block.
680 * If this is an addname then the extrablk in state is a freespace block,
681 * otherwise it's a data block.
682 */
683int
684xfs_dir2_leafn_lookup_int(
685 xfs_dabuf_t *bp, /* leaf buffer */
686 xfs_da_args_t *args, /* operation arguments */
687 int *indexp, /* out: leaf entry index */
688 xfs_da_state_t *state) /* state to fill in */
689{
690 if (args->op_flags & XFS_DA_OP_ADDNAME)
691 return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp,
692 state);
693 return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state);
694}
695
696/*
629 * Move count leaf entries from source to destination leaf. 697 * Move count leaf entries from source to destination leaf.
630 * Log entries and headers. Stale entries are preserved. 698 * Log entries and headers. Stale entries are preserved.
631 */ 699 */
@@ -823,9 +891,10 @@ xfs_dir2_leafn_rebalance(
823 */ 891 */
824 if (!state->inleaf) 892 if (!state->inleaf)
825 blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count); 893 blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
826 894
827 /* 895 /*
828 * Finally sanity check just to make sure we are not returning a negative index 896 * Finally sanity check just to make sure we are not returning a
897 * negative index
829 */ 898 */
830 if(blk2->index < 0) { 899 if(blk2->index < 0) {
831 state->inleaf = 1; 900 state->inleaf = 1;
@@ -1332,7 +1401,7 @@ xfs_dir2_node_addname(
1332 /* 1401 /*
1333 * It worked, fix the hash values up the btree. 1402 * It worked, fix the hash values up the btree.
1334 */ 1403 */
1335 if (!args->justcheck) 1404 if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
1336 xfs_da_fixhashpath(state, &state->path); 1405 xfs_da_fixhashpath(state, &state->path);
1337 } else { 1406 } else {
1338 /* 1407 /*
@@ -1515,7 +1584,8 @@ xfs_dir2_node_addname_int(
1515 /* 1584 /*
1516 * Not allowed to allocate, return failure. 1585 * Not allowed to allocate, return failure.
1517 */ 1586 */
1518 if (args->justcheck || args->total == 0) { 1587 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
1588 args->total == 0) {
1519 /* 1589 /*
1520 * Drop the freespace buffer unless it came from our 1590 * Drop the freespace buffer unless it came from our
1521 * caller. 1591 * caller.
@@ -1661,7 +1731,7 @@ xfs_dir2_node_addname_int(
1661 /* 1731 /*
1662 * If just checking, we succeeded. 1732 * If just checking, we succeeded.
1663 */ 1733 */
1664 if (args->justcheck) { 1734 if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
1665 if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) 1735 if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
1666 xfs_da_buf_done(fbp); 1736 xfs_da_buf_done(fbp);
1667 return 0; 1737 return 0;
@@ -1767,6 +1837,14 @@ xfs_dir2_node_lookup(
1767 error = xfs_da_node_lookup_int(state, &rval); 1837 error = xfs_da_node_lookup_int(state, &rval);
1768 if (error) 1838 if (error)
1769 rval = error; 1839 rval = error;
1840 else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
1841 /* If a CI match, dup the actual name and return EEXIST */
1842 xfs_dir2_data_entry_t *dep;
1843
1844 dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp->
1845 data + state->extrablk.index);
1846 rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1847 }
1770 /* 1848 /*
1771 * Release the btree blocks and leaf block. 1849 * Release the btree blocks and leaf block.
1772 */ 1850 */
@@ -1810,9 +1888,8 @@ xfs_dir2_node_removename(
1810 * Look up the entry we're deleting, set up the cursor. 1888 * Look up the entry we're deleting, set up the cursor.
1811 */ 1889 */
1812 error = xfs_da_node_lookup_int(state, &rval); 1890 error = xfs_da_node_lookup_int(state, &rval);
1813 if (error) { 1891 if (error)
1814 rval = error; 1892 rval = error;
1815 }
1816 /* 1893 /*
1817 * Didn't find it, upper layer screwed up. 1894 * Didn't find it, upper layer screwed up.
1818 */ 1895 */
@@ -1829,9 +1906,8 @@ xfs_dir2_node_removename(
1829 */ 1906 */
1830 error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, 1907 error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
1831 &state->extrablk, &rval); 1908 &state->extrablk, &rval);
1832 if (error) { 1909 if (error)
1833 return error; 1910 return error;
1834 }
1835 /* 1911 /*
1836 * Fix the hash values up the btree. 1912 * Fix the hash values up the btree.
1837 */ 1913 */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 919d275a1cef..b46af0013ec9 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -255,7 +255,7 @@ xfs_dir2_block_to_sf(
255 xfs_dir2_sf_check(args); 255 xfs_dir2_sf_check(args);
256out: 256out:
257 xfs_trans_log_inode(args->trans, dp, logflags); 257 xfs_trans_log_inode(args->trans, dp, logflags);
258 kmem_free(block, mp->m_dirblksize); 258 kmem_free(block);
259 return error; 259 return error;
260} 260}
261 261
@@ -332,7 +332,7 @@ xfs_dir2_sf_addname(
332 /* 332 /*
333 * Just checking or no space reservation, it doesn't fit. 333 * Just checking or no space reservation, it doesn't fit.
334 */ 334 */
335 if (args->justcheck || args->total == 0) 335 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
336 return XFS_ERROR(ENOSPC); 336 return XFS_ERROR(ENOSPC);
337 /* 337 /*
338 * Convert to block form then add the name. 338 * Convert to block form then add the name.
@@ -345,7 +345,7 @@ xfs_dir2_sf_addname(
345 /* 345 /*
346 * Just checking, it fits. 346 * Just checking, it fits.
347 */ 347 */
348 if (args->justcheck) 348 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
349 return 0; 349 return 0;
350 /* 350 /*
351 * Do it the easy way - just add it at the end. 351 * Do it the easy way - just add it at the end.
@@ -512,7 +512,7 @@ xfs_dir2_sf_addname_hard(
512 sfep = xfs_dir2_sf_nextentry(sfp, sfep); 512 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
513 memcpy(sfep, oldsfep, old_isize - nbytes); 513 memcpy(sfep, oldsfep, old_isize - nbytes);
514 } 514 }
515 kmem_free(buf, old_isize); 515 kmem_free(buf);
516 dp->i_d.di_size = new_isize; 516 dp->i_d.di_size = new_isize;
517 xfs_dir2_sf_check(args); 517 xfs_dir2_sf_check(args);
518} 518}
@@ -812,8 +812,11 @@ xfs_dir2_sf_lookup(
812{ 812{
813 xfs_inode_t *dp; /* incore directory inode */ 813 xfs_inode_t *dp; /* incore directory inode */
814 int i; /* entry index */ 814 int i; /* entry index */
815 int error;
815 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 816 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
816 xfs_dir2_sf_t *sfp; /* shortform structure */ 817 xfs_dir2_sf_t *sfp; /* shortform structure */
818 enum xfs_dacmp cmp; /* comparison result */
819 xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
817 820
818 xfs_dir2_trace_args("sf_lookup", args); 821 xfs_dir2_trace_args("sf_lookup", args);
819 xfs_dir2_sf_check(args); 822 xfs_dir2_sf_check(args);
@@ -836,6 +839,7 @@ xfs_dir2_sf_lookup(
836 */ 839 */
837 if (args->namelen == 1 && args->name[0] == '.') { 840 if (args->namelen == 1 && args->name[0] == '.') {
838 args->inumber = dp->i_ino; 841 args->inumber = dp->i_ino;
842 args->cmpresult = XFS_CMP_EXACT;
839 return XFS_ERROR(EEXIST); 843 return XFS_ERROR(EEXIST);
840 } 844 }
841 /* 845 /*
@@ -844,28 +848,41 @@ xfs_dir2_sf_lookup(
844 if (args->namelen == 2 && 848 if (args->namelen == 2 &&
845 args->name[0] == '.' && args->name[1] == '.') { 849 args->name[0] == '.' && args->name[1] == '.') {
846 args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); 850 args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
851 args->cmpresult = XFS_CMP_EXACT;
847 return XFS_ERROR(EEXIST); 852 return XFS_ERROR(EEXIST);
848 } 853 }
849 /* 854 /*
850 * Loop over all the entries trying to match ours. 855 * Loop over all the entries trying to match ours.
851 */ 856 */
852 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); 857 ci_sfep = NULL;
853 i < sfp->hdr.count; 858 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
854 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 859 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
855 if (sfep->namelen == args->namelen && 860 /*
856 sfep->name[0] == args->name[0] && 861 * Compare name and if it's an exact match, return the inode
857 memcmp(args->name, sfep->name, args->namelen) == 0) { 862 * number. If it's the first case-insensitive match, store the
858 args->inumber = 863 * inode number and continue looking for an exact match.
859 xfs_dir2_sf_get_inumber(sfp, 864 */
860 xfs_dir2_sf_inumberp(sfep)); 865 cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name,
861 return XFS_ERROR(EEXIST); 866 sfep->namelen);
867 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
868 args->cmpresult = cmp;
869 args->inumber = xfs_dir2_sf_get_inumber(sfp,
870 xfs_dir2_sf_inumberp(sfep));
871 if (cmp == XFS_CMP_EXACT)
872 return XFS_ERROR(EEXIST);
873 ci_sfep = sfep;
862 } 874 }
863 } 875 }
876 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
864 /* 877 /*
865 * Didn't find it. 878 * Here, we can only be doing a lookup (not a rename or replace).
879 * If a case-insensitive match was not found, return ENOENT.
866 */ 880 */
867 ASSERT(args->oknoent); 881 if (!ci_sfep)
868 return XFS_ERROR(ENOENT); 882 return XFS_ERROR(ENOENT);
883 /* otherwise process the CI match as required by the caller */
884 error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
885 return XFS_ERROR(error);
869} 886}
870 887
871/* 888/*
@@ -904,24 +921,21 @@ xfs_dir2_sf_removename(
904 * Loop over the old directory entries. 921 * Loop over the old directory entries.
905 * Find the one we're deleting. 922 * Find the one we're deleting.
906 */ 923 */
907 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); 924 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
908 i < sfp->hdr.count; 925 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
909 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 926 if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
910 if (sfep->namelen == args->namelen && 927 XFS_CMP_EXACT) {
911 sfep->name[0] == args->name[0] &&
912 memcmp(sfep->name, args->name, args->namelen) == 0) {
913 ASSERT(xfs_dir2_sf_get_inumber(sfp, 928 ASSERT(xfs_dir2_sf_get_inumber(sfp,
914 xfs_dir2_sf_inumberp(sfep)) == 929 xfs_dir2_sf_inumberp(sfep)) ==
915 args->inumber); 930 args->inumber);
916 break; 931 break;
917 } 932 }
918 } 933 }
919 /* 934 /*
920 * Didn't find it. 935 * Didn't find it.
921 */ 936 */
922 if (i == sfp->hdr.count) { 937 if (i == sfp->hdr.count)
923 return XFS_ERROR(ENOENT); 938 return XFS_ERROR(ENOENT);
924 }
925 /* 939 /*
926 * Calculate sizes. 940 * Calculate sizes.
927 */ 941 */
@@ -1042,11 +1056,10 @@ xfs_dir2_sf_replace(
1042 */ 1056 */
1043 else { 1057 else {
1044 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); 1058 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
1045 i < sfp->hdr.count; 1059 i < sfp->hdr.count;
1046 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 1060 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
1047 if (sfep->namelen == args->namelen && 1061 if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
1048 sfep->name[0] == args->name[0] && 1062 XFS_CMP_EXACT) {
1049 memcmp(args->name, sfep->name, args->namelen) == 0) {
1050#if XFS_BIG_INUMS || defined(DEBUG) 1063#if XFS_BIG_INUMS || defined(DEBUG)
1051 ino = xfs_dir2_sf_get_inumber(sfp, 1064 ino = xfs_dir2_sf_get_inumber(sfp,
1052 xfs_dir2_sf_inumberp(sfep)); 1065 xfs_dir2_sf_inumberp(sfep));
@@ -1061,7 +1074,7 @@ xfs_dir2_sf_replace(
1061 * Didn't find it. 1074 * Didn't find it.
1062 */ 1075 */
1063 if (i == sfp->hdr.count) { 1076 if (i == sfp->hdr.count) {
1064 ASSERT(args->oknoent); 1077 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1065#if XFS_BIG_INUMS 1078#if XFS_BIG_INUMS
1066 if (i8elevated) 1079 if (i8elevated)
1067 xfs_dir2_sf_toino4(args); 1080 xfs_dir2_sf_toino4(args);
@@ -1174,7 +1187,7 @@ xfs_dir2_sf_toino4(
1174 /* 1187 /*
1175 * Clean up the inode. 1188 * Clean up the inode.
1176 */ 1189 */
1177 kmem_free(buf, oldsize); 1190 kmem_free(buf);
1178 dp->i_d.di_size = newsize; 1191 dp->i_d.di_size = newsize;
1179 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 1192 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
1180} 1193}
@@ -1251,7 +1264,7 @@ xfs_dir2_sf_toino8(
1251 /* 1264 /*
1252 * Clean up the inode. 1265 * Clean up the inode.
1253 */ 1266 */
1254 kmem_free(buf, oldsize); 1267 kmem_free(buf);
1255 dp->i_d.di_size = newsize; 1268 dp->i_d.di_size = newsize;
1256 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 1269 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
1257} 1270}
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 005629d702d2..deecc9d238f8 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -62,7 +62,7 @@ typedef union {
62 * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. 62 * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
63 * Only need 16 bits, this is the byte offset into the single block form. 63 * Only need 16 bits, this is the byte offset into the single block form.
64 */ 64 */
65typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t; 65typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
66 66
67/* 67/*
68 * The parent directory has a dedicated field, and the self-pointer must 68 * The parent directory has a dedicated field, and the self-pointer must
@@ -76,14 +76,14 @@ typedef struct xfs_dir2_sf_hdr {
76 __uint8_t count; /* count of entries */ 76 __uint8_t count; /* count of entries */
77 __uint8_t i8count; /* count of 8-byte inode #s */ 77 __uint8_t i8count; /* count of 8-byte inode #s */
78 xfs_dir2_inou_t parent; /* parent dir inode number */ 78 xfs_dir2_inou_t parent; /* parent dir inode number */
79} xfs_dir2_sf_hdr_t; 79} __arch_pack xfs_dir2_sf_hdr_t;
80 80
81typedef struct xfs_dir2_sf_entry { 81typedef struct xfs_dir2_sf_entry {
82 __uint8_t namelen; /* actual name length */ 82 __uint8_t namelen; /* actual name length */
83 xfs_dir2_sf_off_t offset; /* saved offset */ 83 xfs_dir2_sf_off_t offset; /* saved offset */
84 __uint8_t name[1]; /* name, variable size */ 84 __uint8_t name[1]; /* name, variable size */
85 xfs_dir2_inou_t inumber; /* inode number, var. offset */ 85 xfs_dir2_inou_t inumber; /* inode number, var. offset */
86} xfs_dir2_sf_entry_t; 86} __arch_pack xfs_dir2_sf_entry_t;
87 87
88typedef struct xfs_dir2_sf { 88typedef struct xfs_dir2_sf {
89 xfs_dir2_sf_hdr_t hdr; /* shortform header */ 89 xfs_dir2_sf_hdr_t hdr; /* shortform header */
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index f3fb2ffd6f5c..6cc7c0c681ac 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -85,7 +85,8 @@ xfs_dir2_trace_args(
85 (void *)((unsigned long)(args->inumber >> 32)), 85 (void *)((unsigned long)(args->inumber >> 32)),
86 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 86 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
87 (void *)args->dp, (void *)args->trans, 87 (void *)args->dp, (void *)args->trans,
88 (void *)(unsigned long)args->justcheck, NULL, NULL); 88 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
89 NULL, NULL);
89} 90}
90 91
91void 92void
@@ -100,7 +101,7 @@ xfs_dir2_trace_args_b(
100 (void *)((unsigned long)(args->inumber >> 32)), 101 (void *)((unsigned long)(args->inumber >> 32)),
101 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 102 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
102 (void *)args->dp, (void *)args->trans, 103 (void *)args->dp, (void *)args->trans,
103 (void *)(unsigned long)args->justcheck, 104 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
104 (void *)(bp ? bp->bps[0] : NULL), NULL); 105 (void *)(bp ? bp->bps[0] : NULL), NULL);
105} 106}
106 107
@@ -117,7 +118,7 @@ xfs_dir2_trace_args_bb(
117 (void *)((unsigned long)(args->inumber >> 32)), 118 (void *)((unsigned long)(args->inumber >> 32)),
118 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 119 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
119 (void *)args->dp, (void *)args->trans, 120 (void *)args->dp, (void *)args->trans,
120 (void *)(unsigned long)args->justcheck, 121 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
121 (void *)(lbp ? lbp->bps[0] : NULL), 122 (void *)(lbp ? lbp->bps[0] : NULL),
122 (void *)(dbp ? dbp->bps[0] : NULL)); 123 (void *)(dbp ? dbp->bps[0] : NULL));
123} 124}
@@ -157,8 +158,8 @@ xfs_dir2_trace_args_db(
157 (void *)((unsigned long)(args->inumber >> 32)), 158 (void *)((unsigned long)(args->inumber >> 32)),
158 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 159 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
159 (void *)args->dp, (void *)args->trans, 160 (void *)args->dp, (void *)args->trans,
160 (void *)(unsigned long)args->justcheck, (void *)(long)db, 161 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
161 (void *)dbp); 162 (void *)(long)db, (void *)dbp);
162} 163}
163 164
164void 165void
@@ -173,7 +174,7 @@ xfs_dir2_trace_args_i(
173 (void *)((unsigned long)(args->inumber >> 32)), 174 (void *)((unsigned long)(args->inumber >> 32)),
174 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 175 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
175 (void *)args->dp, (void *)args->trans, 176 (void *)args->dp, (void *)args->trans,
176 (void *)(unsigned long)args->justcheck, 177 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
177 (void *)((unsigned long)(i >> 32)), 178 (void *)((unsigned long)(i >> 32)),
178 (void *)((unsigned long)(i & 0xFFFFFFFF))); 179 (void *)((unsigned long)(i & 0xFFFFFFFF)));
179} 180}
@@ -190,7 +191,8 @@ xfs_dir2_trace_args_s(
190 (void *)((unsigned long)(args->inumber >> 32)), 191 (void *)((unsigned long)(args->inumber >> 32)),
191 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 192 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
192 (void *)args->dp, (void *)args->trans, 193 (void *)args->dp, (void *)args->trans,
193 (void *)(unsigned long)args->justcheck, (void *)(long)s, NULL); 194 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
195 (void *)(long)s, NULL);
194} 196}
195 197
196void 198void
@@ -208,7 +210,7 @@ xfs_dir2_trace_args_sb(
208 (void *)((unsigned long)(args->inumber >> 32)), 210 (void *)((unsigned long)(args->inumber >> 32)),
209 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 211 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
210 (void *)args->dp, (void *)args->trans, 212 (void *)args->dp, (void *)args->trans,
211 (void *)(unsigned long)args->justcheck, (void *)(long)s, 213 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
212 (void *)dbp); 214 (void *)(long)s, (void *)dbp);
213} 215}
214#endif /* XFS_DIR2_TRACE */ 216#endif /* XFS_DIR2_TRACE */
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index f71784ab6a60..cdc2d3464a1a 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -166,6 +166,6 @@ typedef enum {
166 166
167#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \ 167#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
168 DM_FLAGS_NDELAY : 0) 168 DM_FLAGS_NDELAY : 0)
169#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0) 169#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
170 170
171#endif /* __XFS_DMAPI_H__ */ 171#endif /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 05e5365d3c31..f66756cfb5e8 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -66,14 +66,6 @@ int xfs_etest[XFS_NUM_INJECT_ERROR];
66int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; 66int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
67char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; 67char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
68 68
69void
70xfs_error_test_init(void)
71{
72 memset(xfs_etest, 0, sizeof(xfs_etest));
73 memset(xfs_etest_fsid, 0, sizeof(xfs_etest_fsid));
74 memset(xfs_etest_fsname, 0, sizeof(xfs_etest_fsname));
75}
76
77int 69int
78xfs_error_test(int error_tag, int *fsidp, char *expression, 70xfs_error_test(int error_tag, int *fsidp, char *expression,
79 int line, char *file, unsigned long randfactor) 71 int line, char *file, unsigned long randfactor)
@@ -150,8 +142,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
150 xfs_etest[i]); 142 xfs_etest[i]);
151 xfs_etest[i] = 0; 143 xfs_etest[i] = 0;
152 xfs_etest_fsid[i] = 0LL; 144 xfs_etest_fsid[i] = 0LL;
153 kmem_free(xfs_etest_fsname[i], 145 kmem_free(xfs_etest_fsname[i]);
154 strlen(xfs_etest_fsname[i]) + 1);
155 xfs_etest_fsname[i] = NULL; 146 xfs_etest_fsname[i] = NULL;
156 } 147 }
157 } 148 }
@@ -175,7 +166,7 @@ xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
175 newfmt = kmem_alloc(len, KM_SLEEP); 166 newfmt = kmem_alloc(len, KM_SLEEP);
176 sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt); 167 sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt);
177 icmn_err(level, newfmt, ap); 168 icmn_err(level, newfmt, ap);
178 kmem_free(newfmt, len); 169 kmem_free(newfmt);
179 } else { 170 } else {
180 icmn_err(level, fmt, ap); 171 icmn_err(level, fmt, ap);
181 } 172 }
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 6490d2a9f8e1..d8559d132efa 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -127,7 +127,6 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp,
127 127
128#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) 128#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
129extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); 129extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
130extern void xfs_error_test_init(void);
131 130
132#define XFS_NUM_INJECT_ERROR 10 131#define XFS_NUM_INJECT_ERROR 10
133 132
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 132bd07b9bb8..8aa28f751b2a 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -41,8 +41,7 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip)
41 int nexts = efip->efi_format.efi_nextents; 41 int nexts = efip->efi_format.efi_nextents;
42 42
43 if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { 43 if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
44 kmem_free(efip, sizeof(xfs_efi_log_item_t) + 44 kmem_free(efip);
45 (nexts - 1) * sizeof(xfs_extent_t));
46 } else { 45 } else {
47 kmem_zone_free(xfs_efi_zone, efip); 46 kmem_zone_free(xfs_efi_zone, efip);
48 } 47 }
@@ -374,8 +373,7 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp)
374 int nexts = efdp->efd_format.efd_nextents; 373 int nexts = efdp->efd_format.efd_nextents;
375 374
376 if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { 375 if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
377 kmem_free(efdp, sizeof(xfs_efd_log_item_t) + 376 kmem_free(efdp);
378 (nexts - 1) * sizeof(xfs_extent_t));
379 } else { 377 } else {
380 kmem_zone_free(xfs_efd_zone, efdp); 378 kmem_zone_free(xfs_efd_zone, efdp);
381 } 379 }
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 3f3785b10804..c38fd14fca29 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -397,10 +397,12 @@ int
397xfs_filestream_init(void) 397xfs_filestream_init(void)
398{ 398{
399 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); 399 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
400 if (!item_zone)
401 return -ENOMEM;
400#ifdef XFS_FILESTREAMS_TRACE 402#ifdef XFS_FILESTREAMS_TRACE
401 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP); 403 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP);
402#endif 404#endif
403 return item_zone ? 0 : -ENOMEM; 405 return 0;
404} 406}
405 407
406/* 408/*
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 3bed6433d050..01c0cc88d3f3 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
239#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ 239#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */
240#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ 240#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */
241#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ 241#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */
242#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */
242#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ 243#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
243 244
244 245
@@ -371,6 +372,9 @@ typedef struct xfs_fsop_attrlist_handlereq {
371 372
372typedef struct xfs_attr_multiop { 373typedef struct xfs_attr_multiop {
373 __u32 am_opcode; 374 __u32 am_opcode;
375#define ATTR_OP_GET 1 /* return the indicated attr's value */
376#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */
377#define ATTR_OP_REMOVE 3 /* remove the indicated attr */
374 __s32 am_error; 378 __s32 am_error;
375 void __user *am_attrname; 379 void __user *am_attrname;
376 void __user *am_attrvalue; 380 void __user *am_attrvalue;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 381ebda4f7bc..84583cf73db3 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -95,6 +95,8 @@ xfs_fs_geometry(
95 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) | 95 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
96 (xfs_sb_version_hassector(&mp->m_sb) ? 96 (xfs_sb_version_hassector(&mp->m_sb) ?
97 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | 97 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
98 (xfs_sb_version_hasasciici(&mp->m_sb) ?
99 XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
98 (xfs_sb_version_haslazysbcount(&mp->m_sb) ? 100 (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
99 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | 101 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
100 (xfs_sb_version_hasattr2(&mp->m_sb) ? 102 (xfs_sb_version_hasattr2(&mp->m_sb) ?
@@ -625,7 +627,7 @@ xfs_fs_goingdown(
625 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 627 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
626 thaw_bdev(sb->s_bdev, sb); 628 thaw_bdev(sb->s_bdev, sb);
627 } 629 }
628 630
629 break; 631 break;
630 } 632 }
631 case XFS_FSOP_GOING_FLAGS_LOGFLUSH: 633 case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e569bf5d6cf0..bedc66163176 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1763,67 +1763,6 @@ xfs_itruncate_finish(
1763 return 0; 1763 return 0;
1764} 1764}
1765 1765
1766
1767/*
1768 * xfs_igrow_start
1769 *
1770 * Do the first part of growing a file: zero any data in the last
1771 * block that is beyond the old EOF. We need to do this before
1772 * the inode is joined to the transaction to modify the i_size.
1773 * That way we can drop the inode lock and call into the buffer
1774 * cache to get the buffer mapping the EOF.
1775 */
1776int
1777xfs_igrow_start(
1778 xfs_inode_t *ip,
1779 xfs_fsize_t new_size,
1780 cred_t *credp)
1781{
1782 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1783 ASSERT(new_size > ip->i_size);
1784
1785 /*
1786 * Zero any pages that may have been created by
1787 * xfs_write_file() beyond the end of the file
1788 * and any blocks between the old and new file sizes.
1789 */
1790 return xfs_zero_eof(ip, new_size, ip->i_size);
1791}
1792
1793/*
1794 * xfs_igrow_finish
1795 *
1796 * This routine is called to extend the size of a file.
1797 * The inode must have both the iolock and the ilock locked
1798 * for update and it must be a part of the current transaction.
1799 * The xfs_igrow_start() function must have been called previously.
1800 * If the change_flag is not zero, the inode change timestamp will
1801 * be updated.
1802 */
1803void
1804xfs_igrow_finish(
1805 xfs_trans_t *tp,
1806 xfs_inode_t *ip,
1807 xfs_fsize_t new_size,
1808 int change_flag)
1809{
1810 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1811 ASSERT(ip->i_transp == tp);
1812 ASSERT(new_size > ip->i_size);
1813
1814 /*
1815 * Update the file size. Update the inode change timestamp
1816 * if change_flag set.
1817 */
1818 ip->i_d.di_size = new_size;
1819 ip->i_size = new_size;
1820 if (change_flag)
1821 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
1822 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1823
1824}
1825
1826
1827/* 1766/*
1828 * This is called when the inode's link count goes to 0. 1767 * This is called when the inode's link count goes to 0.
1829 * We place the on-disk inode on a list in the AGI. It 1768 * We place the on-disk inode on a list in the AGI. It
@@ -2258,7 +2197,7 @@ xfs_ifree_cluster(
2258 xfs_trans_binval(tp, bp); 2197 xfs_trans_binval(tp, bp);
2259 } 2198 }
2260 2199
2261 kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *)); 2200 kmem_free(ip_found);
2262 xfs_put_perag(mp, pag); 2201 xfs_put_perag(mp, pag);
2263} 2202}
2264 2203
@@ -2470,7 +2409,7 @@ xfs_iroot_realloc(
2470 (int)new_size); 2409 (int)new_size);
2471 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 2410 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
2472 } 2411 }
2473 kmem_free(ifp->if_broot, ifp->if_broot_bytes); 2412 kmem_free(ifp->if_broot);
2474 ifp->if_broot = new_broot; 2413 ifp->if_broot = new_broot;
2475 ifp->if_broot_bytes = (int)new_size; 2414 ifp->if_broot_bytes = (int)new_size;
2476 ASSERT(ifp->if_broot_bytes <= 2415 ASSERT(ifp->if_broot_bytes <=
@@ -2514,7 +2453,7 @@ xfs_idata_realloc(
2514 2453
2515 if (new_size == 0) { 2454 if (new_size == 0) {
2516 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2455 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2517 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2456 kmem_free(ifp->if_u1.if_data);
2518 } 2457 }
2519 ifp->if_u1.if_data = NULL; 2458 ifp->if_u1.if_data = NULL;
2520 real_size = 0; 2459 real_size = 0;
@@ -2529,7 +2468,7 @@ xfs_idata_realloc(
2529 ASSERT(ifp->if_real_bytes != 0); 2468 ASSERT(ifp->if_real_bytes != 0);
2530 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 2469 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
2531 new_size); 2470 new_size);
2532 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2471 kmem_free(ifp->if_u1.if_data);
2533 ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 2472 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
2534 } 2473 }
2535 real_size = 0; 2474 real_size = 0;
@@ -2636,7 +2575,7 @@ xfs_idestroy_fork(
2636 2575
2637 ifp = XFS_IFORK_PTR(ip, whichfork); 2576 ifp = XFS_IFORK_PTR(ip, whichfork);
2638 if (ifp->if_broot != NULL) { 2577 if (ifp->if_broot != NULL) {
2639 kmem_free(ifp->if_broot, ifp->if_broot_bytes); 2578 kmem_free(ifp->if_broot);
2640 ifp->if_broot = NULL; 2579 ifp->if_broot = NULL;
2641 } 2580 }
2642 2581
@@ -2650,7 +2589,7 @@ xfs_idestroy_fork(
2650 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 2589 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
2651 (ifp->if_u1.if_data != NULL)) { 2590 (ifp->if_u1.if_data != NULL)) {
2652 ASSERT(ifp->if_real_bytes != 0); 2591 ASSERT(ifp->if_real_bytes != 0);
2653 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2592 kmem_free(ifp->if_u1.if_data);
2654 ifp->if_u1.if_data = NULL; 2593 ifp->if_u1.if_data = NULL;
2655 ifp->if_real_bytes = 0; 2594 ifp->if_real_bytes = 0;
2656 } 2595 }
@@ -3058,7 +2997,7 @@ xfs_iflush_cluster(
3058 2997
3059out_free: 2998out_free:
3060 read_unlock(&pag->pag_ici_lock); 2999 read_unlock(&pag->pag_ici_lock);
3061 kmem_free(ilist, ilist_size); 3000 kmem_free(ilist);
3062 return 0; 3001 return 0;
3063 3002
3064 3003
@@ -3102,7 +3041,7 @@ cluster_corrupt_out:
3102 * Unlocks the flush lock 3041 * Unlocks the flush lock
3103 */ 3042 */
3104 xfs_iflush_abort(iq); 3043 xfs_iflush_abort(iq);
3105 kmem_free(ilist, ilist_size); 3044 kmem_free(ilist);
3106 return XFS_ERROR(EFSCORRUPTED); 3045 return XFS_ERROR(EFSCORRUPTED);
3107} 3046}
3108 3047
@@ -3143,8 +3082,6 @@ xfs_iflush(
3143 * flush lock and do nothing. 3082 * flush lock and do nothing.
3144 */ 3083 */
3145 if (xfs_inode_clean(ip)) { 3084 if (xfs_inode_clean(ip)) {
3146 ASSERT((iip != NULL) ?
3147 !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
3148 xfs_ifunlock(ip); 3085 xfs_ifunlock(ip);
3149 return 0; 3086 return 0;
3150 } 3087 }
@@ -3836,7 +3773,7 @@ xfs_iext_add_indirect_multi(
3836 erp = xfs_iext_irec_new(ifp, erp_idx); 3773 erp = xfs_iext_irec_new(ifp, erp_idx);
3837 } 3774 }
3838 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 3775 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
3839 kmem_free(nex2_ep, byte_diff); 3776 kmem_free(nex2_ep);
3840 erp->er_extcount += nex2; 3777 erp->er_extcount += nex2;
3841 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 3778 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
3842 } 3779 }
@@ -4112,7 +4049,7 @@ xfs_iext_direct_to_inline(
4112 */ 4049 */
4113 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 4050 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
4114 nextents * sizeof(xfs_bmbt_rec_t)); 4051 nextents * sizeof(xfs_bmbt_rec_t));
4115 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 4052 kmem_free(ifp->if_u1.if_extents);
4116 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 4053 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
4117 ifp->if_real_bytes = 0; 4054 ifp->if_real_bytes = 0;
4118} 4055}
@@ -4186,7 +4123,7 @@ xfs_iext_indirect_to_direct(
4186 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 4123 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
4187 4124
4188 ep = ifp->if_u1.if_ext_irec->er_extbuf; 4125 ep = ifp->if_u1.if_ext_irec->er_extbuf;
4189 kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t)); 4126 kmem_free(ifp->if_u1.if_ext_irec);
4190 ifp->if_flags &= ~XFS_IFEXTIREC; 4127 ifp->if_flags &= ~XFS_IFEXTIREC;
4191 ifp->if_u1.if_extents = ep; 4128 ifp->if_u1.if_extents = ep;
4192 ifp->if_bytes = size; 4129 ifp->if_bytes = size;
@@ -4212,7 +4149,7 @@ xfs_iext_destroy(
4212 } 4149 }
4213 ifp->if_flags &= ~XFS_IFEXTIREC; 4150 ifp->if_flags &= ~XFS_IFEXTIREC;
4214 } else if (ifp->if_real_bytes) { 4151 } else if (ifp->if_real_bytes) {
4215 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 4152 kmem_free(ifp->if_u1.if_extents);
4216 } else if (ifp->if_bytes) { 4153 } else if (ifp->if_bytes) {
4217 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 4154 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
4218 sizeof(xfs_bmbt_rec_t)); 4155 sizeof(xfs_bmbt_rec_t));
@@ -4483,7 +4420,7 @@ xfs_iext_irec_remove(
4483 if (erp->er_extbuf) { 4420 if (erp->er_extbuf) {
4484 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 4421 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
4485 -erp->er_extcount); 4422 -erp->er_extcount);
4486 kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ); 4423 kmem_free(erp->er_extbuf);
4487 } 4424 }
4488 /* Compact extent records */ 4425 /* Compact extent records */
4489 erp = ifp->if_u1.if_ext_irec; 4426 erp = ifp->if_u1.if_ext_irec;
@@ -4501,8 +4438,7 @@ xfs_iext_irec_remove(
4501 xfs_iext_realloc_indirect(ifp, 4438 xfs_iext_realloc_indirect(ifp,
4502 nlists * sizeof(xfs_ext_irec_t)); 4439 nlists * sizeof(xfs_ext_irec_t));
4503 } else { 4440 } else {
4504 kmem_free(ifp->if_u1.if_ext_irec, 4441 kmem_free(ifp->if_u1.if_ext_irec);
4505 sizeof(xfs_ext_irec_t));
4506 } 4442 }
4507 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 4443 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
4508} 4444}
@@ -4571,7 +4507,7 @@ xfs_iext_irec_compact_pages(
4571 * so er_extoffs don't get modified in 4507 * so er_extoffs don't get modified in
4572 * xfs_iext_irec_remove. 4508 * xfs_iext_irec_remove.
4573 */ 4509 */
4574 kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ); 4510 kmem_free(erp_next->er_extbuf);
4575 erp_next->er_extbuf = NULL; 4511 erp_next->er_extbuf = NULL;
4576 xfs_iext_irec_remove(ifp, erp_idx + 1); 4512 xfs_iext_irec_remove(ifp, erp_idx + 1);
4577 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4513 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
@@ -4596,40 +4532,63 @@ xfs_iext_irec_compact_full(
4596 int nlists; /* number of irec's (ex lists) */ 4532 int nlists; /* number of irec's (ex lists) */
4597 4533
4598 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4534 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4535
4599 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4536 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4600 erp = ifp->if_u1.if_ext_irec; 4537 erp = ifp->if_u1.if_ext_irec;
4601 ep = &erp->er_extbuf[erp->er_extcount]; 4538 ep = &erp->er_extbuf[erp->er_extcount];
4602 erp_next = erp + 1; 4539 erp_next = erp + 1;
4603 ep_next = erp_next->er_extbuf; 4540 ep_next = erp_next->er_extbuf;
4541
4604 while (erp_idx < nlists - 1) { 4542 while (erp_idx < nlists - 1) {
4543 /*
4544 * Check how many extent records are available in this irec.
4545 * If there is none skip the whole exercise.
4546 */
4605 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 4547 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
4606 ext_diff = MIN(ext_avail, erp_next->er_extcount); 4548 if (ext_avail) {
4607 memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); 4549
4608 erp->er_extcount += ext_diff;
4609 erp_next->er_extcount -= ext_diff;
4610 /* Remove next page */
4611 if (erp_next->er_extcount == 0) {
4612 /* 4550 /*
4613 * Free page before removing extent record 4551 * Copy over as many as possible extent records into
4614 * so er_extoffs don't get modified in 4552 * the previous page.
4615 * xfs_iext_irec_remove.
4616 */ 4553 */
4617 kmem_free(erp_next->er_extbuf, 4554 ext_diff = MIN(ext_avail, erp_next->er_extcount);
4618 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); 4555 memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
4619 erp_next->er_extbuf = NULL; 4556 erp->er_extcount += ext_diff;
4620 xfs_iext_irec_remove(ifp, erp_idx + 1); 4557 erp_next->er_extcount -= ext_diff;
4621 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4558
4622 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4559 /*
4623 /* Update next page */ 4560 * If the next irec is empty now we can simply
4624 } else { 4561 * remove it.
4625 /* Move rest of page up to become next new page */ 4562 */
4626 memmove(erp_next->er_extbuf, ep_next, 4563 if (erp_next->er_extcount == 0) {
4627 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); 4564 /*
4628 ep_next = erp_next->er_extbuf; 4565 * Free page before removing extent record
4629 memset(&ep_next[erp_next->er_extcount], 0, 4566 * so er_extoffs don't get modified in
4630 (XFS_LINEAR_EXTS - erp_next->er_extcount) * 4567 * xfs_iext_irec_remove.
4631 sizeof(xfs_bmbt_rec_t)); 4568 */
4569 kmem_free(erp_next->er_extbuf);
4570 erp_next->er_extbuf = NULL;
4571 xfs_iext_irec_remove(ifp, erp_idx + 1);
4572 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4573 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4574
4575 /*
4576 * If the next irec is not empty move up the content
4577 * that has not been copied to the previous page to
4578 * the beggining of this one.
4579 */
4580 } else {
4581 memmove(erp_next->er_extbuf, &ep_next[ext_diff],
4582 erp_next->er_extcount *
4583 sizeof(xfs_bmbt_rec_t));
4584 ep_next = erp_next->er_extbuf;
4585 memset(&ep_next[erp_next->er_extcount], 0,
4586 (XFS_LINEAR_EXTS -
4587 erp_next->er_extcount) *
4588 sizeof(xfs_bmbt_rec_t));
4589 }
4632 } 4590 }
4591
4633 if (erp->er_extcount == XFS_LINEAR_EXTS) { 4592 if (erp->er_extcount == XFS_LINEAR_EXTS) {
4634 erp_idx++; 4593 erp_idx++;
4635 if (erp_idx < nlists) 4594 if (erp_idx < nlists)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0a999fee4f03..17a04b6321ed 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -507,9 +507,6 @@ int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
507int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, 507int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
508 xfs_fsize_t, int, int); 508 xfs_fsize_t, int, int);
509int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 509int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
510int xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *);
511void xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *,
512 xfs_fsize_t, int);
513 510
514void xfs_idestroy_fork(xfs_inode_t *, int); 511void xfs_idestroy_fork(xfs_inode_t *, int);
515void xfs_idestroy(xfs_inode_t *); 512void xfs_idestroy(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 167b33f15772..0eee08a32c26 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -686,7 +686,7 @@ xfs_inode_item_unlock(
686 ASSERT(ip->i_d.di_nextents > 0); 686 ASSERT(ip->i_d.di_nextents > 0);
687 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); 687 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT);
688 ASSERT(ip->i_df.if_bytes > 0); 688 ASSERT(ip->i_df.if_bytes > 0);
689 kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes); 689 kmem_free(iip->ili_extents_buf);
690 iip->ili_extents_buf = NULL; 690 iip->ili_extents_buf = NULL;
691 } 691 }
692 if (iip->ili_aextents_buf != NULL) { 692 if (iip->ili_aextents_buf != NULL) {
@@ -694,7 +694,7 @@ xfs_inode_item_unlock(
694 ASSERT(ip->i_d.di_anextents > 0); 694 ASSERT(ip->i_d.di_anextents > 0);
695 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); 695 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
696 ASSERT(ip->i_afp->if_bytes > 0); 696 ASSERT(ip->i_afp->if_bytes > 0);
697 kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes); 697 kmem_free(iip->ili_aextents_buf);
698 iip->ili_aextents_buf = NULL; 698 iip->ili_aextents_buf = NULL;
699 } 699 }
700 700
@@ -957,8 +957,7 @@ xfs_inode_item_destroy(
957{ 957{
958#ifdef XFS_TRANS_DEBUG 958#ifdef XFS_TRANS_DEBUG
959 if (ip->i_itemp->ili_root_size != 0) { 959 if (ip->i_itemp->ili_root_size != 0) {
960 kmem_free(ip->i_itemp->ili_orig_root, 960 kmem_free(ip->i_itemp->ili_orig_root);
961 ip->i_itemp->ili_root_size);
962 } 961 }
963#endif 962#endif
964 kmem_zone_free(xfs_ili_zone, ip->i_itemp); 963 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7edcde691d1a..67f22b2b44b3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -889,6 +889,16 @@ xfs_iomap_write_unwritten(
889 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 889 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
890 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 890 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
891 891
892 /*
893 * Reserve enough blocks in this transaction for two complete extent
894 * btree splits. We may be converting the middle part of an unwritten
895 * extent and in this case we will insert two new extents in the btree
896 * each of which could cause a full split.
897 *
898 * This reservation amount will be used in the first call to
899 * xfs_bmbt_split() to select an AG with enough space to satisfy the
900 * rest of the operation.
901 */
892 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 902 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
893 903
894 do { 904 do {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 419de15aeb43..9a3ef9dcaeb9 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -257,7 +257,7 @@ xfs_bulkstat_one(
257 *ubused = error; 257 *ubused = error;
258 258
259 out_free: 259 out_free:
260 kmem_free(buf, sizeof(*buf)); 260 kmem_free(buf);
261 return error; 261 return error;
262} 262}
263 263
@@ -708,7 +708,7 @@ xfs_bulkstat(
708 /* 708 /*
709 * Done, we're either out of filesystem or space to put the data. 709 * Done, we're either out of filesystem or space to put the data.
710 */ 710 */
711 kmem_free(irbuf, irbsize); 711 kmem_free(irbuf);
712 *ubcountp = ubelem; 712 *ubcountp = ubelem;
713 /* 713 /*
714 * Found some inodes, return them now and return the error next time. 714 * Found some inodes, return them now and return the error next time.
@@ -914,7 +914,7 @@ xfs_inumbers(
914 } 914 }
915 *lastino = XFS_AGINO_TO_INO(mp, agno, agino); 915 *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
916 } 916 }
917 kmem_free(buffer, bcount * sizeof(*buffer)); 917 kmem_free(buffer);
918 if (cur) 918 if (cur)
919 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : 919 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
920 XFS_BTREE_NOERROR)); 920 XFS_BTREE_NOERROR));
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ad3d26ddfe31..91b00a5686cd 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -226,20 +226,24 @@ xlog_grant_sub_space(struct log *log, int bytes)
226static void 226static void
227xlog_grant_add_space_write(struct log *log, int bytes) 227xlog_grant_add_space_write(struct log *log, int bytes)
228{ 228{
229 log->l_grant_write_bytes += bytes; 229 int tmp = log->l_logsize - log->l_grant_write_bytes;
230 if (log->l_grant_write_bytes > log->l_logsize) { 230 if (tmp > bytes)
231 log->l_grant_write_bytes -= log->l_logsize; 231 log->l_grant_write_bytes += bytes;
232 else {
232 log->l_grant_write_cycle++; 233 log->l_grant_write_cycle++;
234 log->l_grant_write_bytes = bytes - tmp;
233 } 235 }
234} 236}
235 237
236static void 238static void
237xlog_grant_add_space_reserve(struct log *log, int bytes) 239xlog_grant_add_space_reserve(struct log *log, int bytes)
238{ 240{
239 log->l_grant_reserve_bytes += bytes; 241 int tmp = log->l_logsize - log->l_grant_reserve_bytes;
240 if (log->l_grant_reserve_bytes > log->l_logsize) { 242 if (tmp > bytes)
241 log->l_grant_reserve_bytes -= log->l_logsize; 243 log->l_grant_reserve_bytes += bytes;
244 else {
242 log->l_grant_reserve_cycle++; 245 log->l_grant_reserve_cycle++;
246 log->l_grant_reserve_bytes = bytes - tmp;
243 } 247 }
244} 248}
245 249
@@ -1228,7 +1232,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1228 1232
1229 spin_lock_init(&log->l_icloglock); 1233 spin_lock_init(&log->l_icloglock);
1230 spin_lock_init(&log->l_grant_lock); 1234 spin_lock_init(&log->l_grant_lock);
1231 initnsema(&log->l_flushsema, 0, "ic-flush"); 1235 sv_init(&log->l_flush_wait, 0, "flush_wait");
1232 1236
1233 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1237 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1234 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1238 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1570,10 +1574,9 @@ xlog_dealloc_log(xlog_t *log)
1570 } 1574 }
1571#endif 1575#endif
1572 next_iclog = iclog->ic_next; 1576 next_iclog = iclog->ic_next;
1573 kmem_free(iclog, sizeof(xlog_in_core_t)); 1577 kmem_free(iclog);
1574 iclog = next_iclog; 1578 iclog = next_iclog;
1575 } 1579 }
1576 freesema(&log->l_flushsema);
1577 spinlock_destroy(&log->l_icloglock); 1580 spinlock_destroy(&log->l_icloglock);
1578 spinlock_destroy(&log->l_grant_lock); 1581 spinlock_destroy(&log->l_grant_lock);
1579 1582
@@ -1587,7 +1590,7 @@ xlog_dealloc_log(xlog_t *log)
1587 } 1590 }
1588#endif 1591#endif
1589 log->l_mp->m_log = NULL; 1592 log->l_mp->m_log = NULL;
1590 kmem_free(log, sizeof(xlog_t)); 1593 kmem_free(log);
1591} /* xlog_dealloc_log */ 1594} /* xlog_dealloc_log */
1592 1595
1593/* 1596/*
@@ -2097,6 +2100,7 @@ xlog_state_do_callback(
2097 int funcdidcallbacks; /* flag: function did callbacks */ 2100 int funcdidcallbacks; /* flag: function did callbacks */
2098 int repeats; /* for issuing console warnings if 2101 int repeats; /* for issuing console warnings if
2099 * looping too many times */ 2102 * looping too many times */
2103 int wake = 0;
2100 2104
2101 spin_lock(&log->l_icloglock); 2105 spin_lock(&log->l_icloglock);
2102 first_iclog = iclog = log->l_iclog; 2106 first_iclog = iclog = log->l_iclog;
@@ -2278,15 +2282,13 @@ xlog_state_do_callback(
2278 } 2282 }
2279#endif 2283#endif
2280 2284
2281 flushcnt = 0; 2285 if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
2282 if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) { 2286 wake = 1;
2283 flushcnt = log->l_flushcnt;
2284 log->l_flushcnt = 0;
2285 }
2286 spin_unlock(&log->l_icloglock); 2287 spin_unlock(&log->l_icloglock);
2287 while (flushcnt--) 2288
2288 vsema(&log->l_flushsema); 2289 if (wake)
2289} /* xlog_state_do_callback */ 2290 sv_broadcast(&log->l_flush_wait);
2291}
2290 2292
2291 2293
2292/* 2294/*
@@ -2384,16 +2386,15 @@ restart:
2384 } 2386 }
2385 2387
2386 iclog = log->l_iclog; 2388 iclog = log->l_iclog;
2387 if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) { 2389 if (iclog->ic_state != XLOG_STATE_ACTIVE) {
2388 log->l_flushcnt++;
2389 spin_unlock(&log->l_icloglock);
2390 xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH); 2390 xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
2391 XFS_STATS_INC(xs_log_noiclogs); 2391 XFS_STATS_INC(xs_log_noiclogs);
2392 /* Ensure that log writes happen */ 2392
2393 psema(&log->l_flushsema, PINOD); 2393 /* Wait for log writes to have flushed */
2394 sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
2394 goto restart; 2395 goto restart;
2395 } 2396 }
2396 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); 2397
2397 head = &iclog->ic_header; 2398 head = &iclog->ic_header;
2398 2399
2399 atomic_inc(&iclog->ic_refcnt); /* prevents sync */ 2400 atomic_inc(&iclog->ic_refcnt); /* prevents sync */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8952a392b5f3..6245913196b4 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -423,10 +423,8 @@ typedef struct log {
423 int l_logBBsize; /* size of log in BB chunks */ 423 int l_logBBsize; /* size of log in BB chunks */
424 424
425 /* The following block of fields are changed while holding icloglock */ 425 /* The following block of fields are changed while holding icloglock */
426 sema_t l_flushsema ____cacheline_aligned_in_smp; 426 sv_t l_flush_wait ____cacheline_aligned_in_smp;
427 /* iclog flushing semaphore */ 427 /* waiting for iclog flush */
428 int l_flushcnt; /* # of procs waiting on this
429 * sema */
430 int l_covered_state;/* state of "covering disk 428 int l_covered_state;/* state of "covering disk
431 * log entries" */ 429 * log entries" */
432 xlog_in_core_t *l_iclog; /* head log queue */ 430 xlog_in_core_t *l_iclog; /* head log queue */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e65ab4af0955..9eb722ec744e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1715,8 +1715,7 @@ xlog_check_buffer_cancelled(
1715 } else { 1715 } else {
1716 prevp->bc_next = bcp->bc_next; 1716 prevp->bc_next = bcp->bc_next;
1717 } 1717 }
1718 kmem_free(bcp, 1718 kmem_free(bcp);
1719 sizeof(xfs_buf_cancel_t));
1720 } 1719 }
1721 } 1720 }
1722 return 1; 1721 return 1;
@@ -2519,7 +2518,7 @@ write_inode_buffer:
2519 2518
2520error: 2519error:
2521 if (need_free) 2520 if (need_free)
2522 kmem_free(in_f, sizeof(*in_f)); 2521 kmem_free(in_f);
2523 return XFS_ERROR(error); 2522 return XFS_ERROR(error);
2524} 2523}
2525 2524
@@ -2830,16 +2829,14 @@ xlog_recover_free_trans(
2830 item = item->ri_next; 2829 item = item->ri_next;
2831 /* Free the regions in the item. */ 2830 /* Free the regions in the item. */
2832 for (i = 0; i < free_item->ri_cnt; i++) { 2831 for (i = 0; i < free_item->ri_cnt; i++) {
2833 kmem_free(free_item->ri_buf[i].i_addr, 2832 kmem_free(free_item->ri_buf[i].i_addr);
2834 free_item->ri_buf[i].i_len);
2835 } 2833 }
2836 /* Free the item itself */ 2834 /* Free the item itself */
2837 kmem_free(free_item->ri_buf, 2835 kmem_free(free_item->ri_buf);
2838 (free_item->ri_total * sizeof(xfs_log_iovec_t))); 2836 kmem_free(free_item);
2839 kmem_free(free_item, sizeof(xlog_recover_item_t));
2840 } while (first_item != item); 2837 } while (first_item != item);
2841 /* Free the transaction recover structure */ 2838 /* Free the transaction recover structure */
2842 kmem_free(trans, sizeof(xlog_recover_t)); 2839 kmem_free(trans);
2843} 2840}
2844 2841
2845STATIC int 2842STATIC int
@@ -3786,8 +3783,7 @@ xlog_do_log_recovery(
3786 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3783 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3787 XLOG_RECOVER_PASS1); 3784 XLOG_RECOVER_PASS1);
3788 if (error != 0) { 3785 if (error != 0) {
3789 kmem_free(log->l_buf_cancel_table, 3786 kmem_free(log->l_buf_cancel_table);
3790 XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
3791 log->l_buf_cancel_table = NULL; 3787 log->l_buf_cancel_table = NULL;
3792 return error; 3788 return error;
3793 } 3789 }
@@ -3806,8 +3802,7 @@ xlog_do_log_recovery(
3806 } 3802 }
3807#endif /* DEBUG */ 3803#endif /* DEBUG */
3808 3804
3809 kmem_free(log->l_buf_cancel_table, 3805 kmem_free(log->l_buf_cancel_table);
3810 XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
3811 log->l_buf_cancel_table = NULL; 3806 log->l_buf_cancel_table = NULL;
3812 3807
3813 return error; 3808 return error;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da3988453b71..6c5d1325e7f6 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -47,12 +47,10 @@
47 47
48STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t); 48STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t);
49STATIC int xfs_uuid_mount(xfs_mount_t *); 49STATIC int xfs_uuid_mount(xfs_mount_t *);
50STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
51STATIC void xfs_unmountfs_wait(xfs_mount_t *); 50STATIC void xfs_unmountfs_wait(xfs_mount_t *);
52 51
53 52
54#ifdef HAVE_PERCPU_SB 53#ifdef HAVE_PERCPU_SB
55STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
56STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 54STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
57 int); 55 int);
58STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, 56STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
@@ -63,7 +61,6 @@ STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
63 61
64#else 62#else
65 63
66#define xfs_icsb_destroy_counters(mp) do { } while (0)
67#define xfs_icsb_balance_counter(mp, a, b) do { } while (0) 64#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
68#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) 65#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
69#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 66#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
@@ -126,33 +123,11 @@ static const struct {
126}; 123};
127 124
128/* 125/*
129 * Return a pointer to an initialized xfs_mount structure.
130 */
131xfs_mount_t *
132xfs_mount_init(void)
133{
134 xfs_mount_t *mp;
135
136 mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
137
138 if (xfs_icsb_init_counters(mp)) {
139 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
140 }
141
142 spin_lock_init(&mp->m_sb_lock);
143 mutex_init(&mp->m_ilock);
144 mutex_init(&mp->m_growlock);
145 atomic_set(&mp->m_active_trans, 0);
146
147 return mp;
148}
149
150/*
151 * Free up the resources associated with a mount structure. Assume that 126 * Free up the resources associated with a mount structure. Assume that
152 * the structure was initially zeroed, so we can tell which fields got 127 * the structure was initially zeroed, so we can tell which fields got
153 * initialized. 128 * initialized.
154 */ 129 */
155void 130STATIC void
156xfs_mount_free( 131xfs_mount_free(
157 xfs_mount_t *mp) 132 xfs_mount_t *mp)
158{ 133{
@@ -161,11 +136,8 @@ xfs_mount_free(
161 136
162 for (agno = 0; agno < mp->m_maxagi; agno++) 137 for (agno = 0; agno < mp->m_maxagi; agno++)
163 if (mp->m_perag[agno].pagb_list) 138 if (mp->m_perag[agno].pagb_list)
164 kmem_free(mp->m_perag[agno].pagb_list, 139 kmem_free(mp->m_perag[agno].pagb_list);
165 sizeof(xfs_perag_busy_t) * 140 kmem_free(mp->m_perag);
166 XFS_PAGB_NUM_SLOTS);
167 kmem_free(mp->m_perag,
168 sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
169 } 141 }
170 142
171 spinlock_destroy(&mp->m_ail_lock); 143 spinlock_destroy(&mp->m_ail_lock);
@@ -176,13 +148,11 @@ xfs_mount_free(
176 XFS_QM_DONE(mp); 148 XFS_QM_DONE(mp);
177 149
178 if (mp->m_fsname != NULL) 150 if (mp->m_fsname != NULL)
179 kmem_free(mp->m_fsname, mp->m_fsname_len); 151 kmem_free(mp->m_fsname);
180 if (mp->m_rtname != NULL) 152 if (mp->m_rtname != NULL)
181 kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1); 153 kmem_free(mp->m_rtname);
182 if (mp->m_logname != NULL) 154 if (mp->m_logname != NULL)
183 kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); 155 kmem_free(mp->m_logname);
184
185 xfs_icsb_destroy_counters(mp);
186} 156}
187 157
188/* 158/*
@@ -288,6 +258,19 @@ xfs_mount_validate_sb(
288 return XFS_ERROR(EFSCORRUPTED); 258 return XFS_ERROR(EFSCORRUPTED);
289 } 259 }
290 260
261 /*
262 * Until this is fixed only page-sized or smaller data blocks work.
263 */
264 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
265 xfs_fs_mount_cmn_err(flags,
266 "file system with blocksize %d bytes",
267 sbp->sb_blocksize);
268 xfs_fs_mount_cmn_err(flags,
269 "only pagesize (%ld) or less will currently work.",
270 PAGE_SIZE);
271 return XFS_ERROR(ENOSYS);
272 }
273
291 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || 274 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
292 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 275 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
293 xfs_fs_mount_cmn_err(flags, 276 xfs_fs_mount_cmn_err(flags,
@@ -309,19 +292,6 @@ xfs_mount_validate_sb(
309 return XFS_ERROR(ENOSYS); 292 return XFS_ERROR(ENOSYS);
310 } 293 }
311 294
312 /*
313 * Until this is fixed only page-sized or smaller data blocks work.
314 */
315 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
316 xfs_fs_mount_cmn_err(flags,
317 "file system with blocksize %d bytes",
318 sbp->sb_blocksize);
319 xfs_fs_mount_cmn_err(flags,
320 "only pagesize (%ld) or less will currently work.",
321 PAGE_SIZE);
322 return XFS_ERROR(ENOSYS);
323 }
324
325 return 0; 295 return 0;
326} 296}
327 297
@@ -994,9 +964,19 @@ xfs_mountfs(
994 * Re-check for ATTR2 in case it was found in bad_features2 964 * Re-check for ATTR2 in case it was found in bad_features2
995 * slot. 965 * slot.
996 */ 966 */
997 if (xfs_sb_version_hasattr2(&mp->m_sb)) 967 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
968 !(mp->m_flags & XFS_MOUNT_NOATTR2))
998 mp->m_flags |= XFS_MOUNT_ATTR2; 969 mp->m_flags |= XFS_MOUNT_ATTR2;
970 }
971
972 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
973 (mp->m_flags & XFS_MOUNT_NOATTR2)) {
974 xfs_sb_version_removeattr2(&mp->m_sb);
975 update_flags |= XFS_SB_FEATURES2;
999 976
977 /* update sb_versionnum for the clearing of the morebits */
978 if (!sbp->sb_features2)
979 update_flags |= XFS_SB_VERSIONNUM;
1000 } 980 }
1001 981
1002 /* 982 /*
@@ -1255,15 +1235,13 @@ xfs_mountfs(
1255 error2: 1235 error2:
1256 for (agno = 0; agno < sbp->sb_agcount; agno++) 1236 for (agno = 0; agno < sbp->sb_agcount; agno++)
1257 if (mp->m_perag[agno].pagb_list) 1237 if (mp->m_perag[agno].pagb_list)
1258 kmem_free(mp->m_perag[agno].pagb_list, 1238 kmem_free(mp->m_perag[agno].pagb_list);
1259 sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS); 1239 kmem_free(mp->m_perag);
1260 kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
1261 mp->m_perag = NULL; 1240 mp->m_perag = NULL;
1262 /* FALLTHROUGH */ 1241 /* FALLTHROUGH */
1263 error1: 1242 error1:
1264 if (uuid_mounted) 1243 if (uuid_mounted)
1265 xfs_uuid_unmount(mp); 1244 uuid_table_remove(&mp->m_sb.sb_uuid);
1266 xfs_freesb(mp);
1267 return error; 1245 return error;
1268} 1246}
1269 1247
@@ -1274,7 +1252,7 @@ xfs_mountfs(
1274 * log and makes sure that incore structures are freed. 1252 * log and makes sure that incore structures are freed.
1275 */ 1253 */
1276int 1254int
1277xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1255xfs_unmountfs(xfs_mount_t *mp)
1278{ 1256{
1279 __uint64_t resblks; 1257 __uint64_t resblks;
1280 int error = 0; 1258 int error = 0;
@@ -1341,9 +1319,8 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1341 */ 1319 */
1342 ASSERT(mp->m_inodes == NULL); 1320 ASSERT(mp->m_inodes == NULL);
1343 1321
1344 xfs_unmountfs_close(mp, cr);
1345 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1322 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
1346 xfs_uuid_unmount(mp); 1323 uuid_table_remove(&mp->m_sb.sb_uuid);
1347 1324
1348#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1325#if defined(DEBUG) || defined(INDUCE_IO_ERROR)
1349 xfs_errortag_clearall(mp, 0); 1326 xfs_errortag_clearall(mp, 0);
@@ -1352,16 +1329,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1352 return 0; 1329 return 0;
1353} 1330}
1354 1331
1355void
1356xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
1357{
1358 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
1359 xfs_free_buftarg(mp->m_logdev_targp, 1);
1360 if (mp->m_rtdev_targp)
1361 xfs_free_buftarg(mp->m_rtdev_targp, 1);
1362 xfs_free_buftarg(mp->m_ddev_targp, 0);
1363}
1364
1365STATIC void 1332STATIC void
1366xfs_unmountfs_wait(xfs_mount_t *mp) 1333xfs_unmountfs_wait(xfs_mount_t *mp)
1367{ 1334{
@@ -1905,16 +1872,6 @@ xfs_uuid_mount(
1905} 1872}
1906 1873
1907/* 1874/*
1908 * Remove filesystem from the UUID table.
1909 */
1910STATIC void
1911xfs_uuid_unmount(
1912 xfs_mount_t *mp)
1913{
1914 uuid_table_remove(&mp->m_sb.sb_uuid);
1915}
1916
1917/*
1918 * Used to log changes to the superblock unit and width fields which could 1875 * Used to log changes to the superblock unit and width fields which could
1919 * be altered by the mount options, as well as any potential sb_features2 1876 * be altered by the mount options, as well as any potential sb_features2
1920 * fixup. Only the first superblock is updated. 1877 * fixup. Only the first superblock is updated.
@@ -1928,7 +1885,8 @@ xfs_mount_log_sb(
1928 int error; 1885 int error;
1929 1886
1930 ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | 1887 ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
1931 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2)); 1888 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 |
1889 XFS_SB_VERSIONNUM));
1932 1890
1933 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1891 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
1934 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1892 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
@@ -2109,7 +2067,7 @@ xfs_icsb_reinit_counters(
2109 xfs_icsb_unlock(mp); 2067 xfs_icsb_unlock(mp);
2110} 2068}
2111 2069
2112STATIC void 2070void
2113xfs_icsb_destroy_counters( 2071xfs_icsb_destroy_counters(
2114 xfs_mount_t *mp) 2072 xfs_mount_t *mp)
2115{ 2073{
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 63e0693a358a..5269bd6e3df0 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -61,6 +61,7 @@ struct xfs_bmap_free;
61struct xfs_extdelta; 61struct xfs_extdelta;
62struct xfs_swapext; 62struct xfs_swapext;
63struct xfs_mru_cache; 63struct xfs_mru_cache;
64struct xfs_nameops;
64 65
65/* 66/*
66 * Prototypes and functions for the Data Migration subsystem. 67 * Prototypes and functions for the Data Migration subsystem.
@@ -210,12 +211,14 @@ typedef struct xfs_icsb_cnts {
210 211
211extern int xfs_icsb_init_counters(struct xfs_mount *); 212extern int xfs_icsb_init_counters(struct xfs_mount *);
212extern void xfs_icsb_reinit_counters(struct xfs_mount *); 213extern void xfs_icsb_reinit_counters(struct xfs_mount *);
214extern void xfs_icsb_destroy_counters(struct xfs_mount *);
213extern void xfs_icsb_sync_counters(struct xfs_mount *, int); 215extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
214extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); 216extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
215 217
216#else 218#else
217#define xfs_icsb_init_counters(mp) (0) 219#define xfs_icsb_init_counters(mp) (0)
218#define xfs_icsb_reinit_counters(mp) do { } while (0) 220#define xfs_icsb_destroy_counters(mp) do { } while (0)
221#define xfs_icsb_reinit_counters(mp) do { } while (0)
219#define xfs_icsb_sync_counters(mp, flags) do { } while (0) 222#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
220#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) 223#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
221#endif 224#endif
@@ -313,6 +316,7 @@ typedef struct xfs_mount {
313 __uint8_t m_inode_quiesce;/* call quiesce on new inodes. 316 __uint8_t m_inode_quiesce;/* call quiesce on new inodes.
314 field governed by m_ilock */ 317 field governed by m_ilock */
315 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 318 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
319 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
316 int m_dirblksize; /* directory block sz--bytes */ 320 int m_dirblksize; /* directory block sz--bytes */
317 int m_dirblkfsbs; /* directory block sz--fsbs */ 321 int m_dirblkfsbs; /* directory block sz--fsbs */
318 xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ 322 xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */
@@ -378,6 +382,7 @@ typedef struct xfs_mount {
378 counters */ 382 counters */
379#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams 383#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams
380 allocator */ 384 allocator */
385#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
381 386
382 387
383/* 388/*
@@ -510,15 +515,12 @@ typedef struct xfs_mod_sb {
510#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) 515#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
511#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) 516#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock))
512 517
513extern xfs_mount_t *xfs_mount_init(void);
514extern void xfs_mod_sb(xfs_trans_t *, __int64_t); 518extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
515extern int xfs_log_sbcount(xfs_mount_t *, uint); 519extern int xfs_log_sbcount(xfs_mount_t *, uint);
516extern void xfs_mount_free(xfs_mount_t *mp);
517extern int xfs_mountfs(xfs_mount_t *mp, int); 520extern int xfs_mountfs(xfs_mount_t *mp, int);
518extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 521extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
519 522
520extern int xfs_unmountfs(xfs_mount_t *, struct cred *); 523extern int xfs_unmountfs(xfs_mount_t *);
521extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
522extern int xfs_unmountfs_writesb(xfs_mount_t *); 524extern int xfs_unmountfs_writesb(xfs_mount_t *);
523extern int xfs_unmount_flush(xfs_mount_t *, int); 525extern int xfs_unmount_flush(xfs_mount_t *, int);
524extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 526extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
@@ -544,9 +546,6 @@ extern void xfs_qmops_put(struct xfs_mount *);
544 546
545extern struct xfs_dmops xfs_dmcore_xfs; 547extern struct xfs_dmops xfs_dmcore_xfs;
546 548
547extern int xfs_init(void);
548extern void xfs_cleanup(void);
549
550#endif /* __KERNEL__ */ 549#endif /* __KERNEL__ */
551 550
552#endif /* __XFS_MOUNT_H__ */ 551#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index a0b2c0a2589a..afee7eb24323 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -307,15 +307,18 @@ xfs_mru_cache_init(void)
307 xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t), 307 xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
308 "xfs_mru_cache_elem"); 308 "xfs_mru_cache_elem");
309 if (!xfs_mru_elem_zone) 309 if (!xfs_mru_elem_zone)
310 return ENOMEM; 310 goto out;
311 311
312 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); 312 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
313 if (!xfs_mru_reap_wq) { 313 if (!xfs_mru_reap_wq)
314 kmem_zone_destroy(xfs_mru_elem_zone); 314 goto out_destroy_mru_elem_zone;
315 return ENOMEM;
316 }
317 315
318 return 0; 316 return 0;
317
318 out_destroy_mru_elem_zone:
319 kmem_zone_destroy(xfs_mru_elem_zone);
320 out:
321 return -ENOMEM;
319} 322}
320 323
321void 324void
@@ -382,9 +385,9 @@ xfs_mru_cache_create(
382 385
383exit: 386exit:
384 if (err && mru && mru->lists) 387 if (err && mru && mru->lists)
385 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 388 kmem_free(mru->lists);
386 if (err && mru) 389 if (err && mru)
387 kmem_free(mru, sizeof(*mru)); 390 kmem_free(mru);
388 391
389 return err; 392 return err;
390} 393}
@@ -424,8 +427,8 @@ xfs_mru_cache_destroy(
424 427
425 xfs_mru_cache_flush(mru); 428 xfs_mru_cache_flush(mru);
426 429
427 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 430 kmem_free(mru->lists);
428 kmem_free(mru, sizeof(*mru)); 431 kmem_free(mru);
429} 432}
430 433
431/* 434/*
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d8063e1ad298..d700dacdb10e 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -336,22 +336,18 @@ xfs_rename(
336 ASSERT(error != EEXIST); 336 ASSERT(error != EEXIST);
337 if (error) 337 if (error)
338 goto abort_return; 338 goto abort_return;
339 xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
340
341 } else {
342 /*
343 * We always want to hit the ctime on the source inode.
344 * We do it in the if clause above for the 'new_parent &&
345 * src_is_directory' case, and here we get all the other
346 * cases. This isn't strictly required by the standards
347 * since the source inode isn't really being changed,
348 * but old unix file systems did it and some incremental
349 * backup programs won't work without it.
350 */
351 xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
352 } 339 }
353 340
354 /* 341 /*
342 * We always want to hit the ctime on the source inode.
343 *
344 * This isn't strictly required by the standards since the source
345 * inode isn't really being changed, but old unix file systems did
346 * it and some incremental backup programs won't work without it.
347 */
348 xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
349
350 /*
355 * Adjust the link count on src_dp. This is necessary when 351 * Adjust the link count on src_dp. This is necessary when
356 * renaming a directory, either within one parent when 352 * renaming a directory, either within one parent when
357 * the target existed, or across two parent directories. 353 * the target existed, or across two parent directories.
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a0dc6e5bc5b9..bf87a5913504 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2062,7 +2062,7 @@ xfs_growfs_rt(
2062 /* 2062 /*
2063 * Free the fake mp structure. 2063 * Free the fake mp structure.
2064 */ 2064 */
2065 kmem_free(nmp, sizeof(*nmp)); 2065 kmem_free(nmp);
2066 2066
2067 return error; 2067 return error;
2068} 2068}
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index d904efe7f871..3f8cf1587f4c 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -46,10 +46,12 @@ struct xfs_mount;
46#define XFS_SB_VERSION_SECTORBIT 0x0800 46#define XFS_SB_VERSION_SECTORBIT 0x0800
47#define XFS_SB_VERSION_EXTFLGBIT 0x1000 47#define XFS_SB_VERSION_EXTFLGBIT 0x1000
48#define XFS_SB_VERSION_DIRV2BIT 0x2000 48#define XFS_SB_VERSION_DIRV2BIT 0x2000
49#define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */
49#define XFS_SB_VERSION_MOREBITSBIT 0x8000 50#define XFS_SB_VERSION_MOREBITSBIT 0x8000
50#define XFS_SB_VERSION_OKSASHFBITS \ 51#define XFS_SB_VERSION_OKSASHFBITS \
51 (XFS_SB_VERSION_EXTFLGBIT | \ 52 (XFS_SB_VERSION_EXTFLGBIT | \
52 XFS_SB_VERSION_DIRV2BIT) 53 XFS_SB_VERSION_DIRV2BIT | \
54 XFS_SB_VERSION_BORGBIT)
53#define XFS_SB_VERSION_OKREALFBITS \ 55#define XFS_SB_VERSION_OKREALFBITS \
54 (XFS_SB_VERSION_ATTRBIT | \ 56 (XFS_SB_VERSION_ATTRBIT | \
55 XFS_SB_VERSION_NLINKBIT | \ 57 XFS_SB_VERSION_NLINKBIT | \
@@ -437,6 +439,12 @@ static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
437 ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT); 439 ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
438} 440}
439 441
442static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
443{
444 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
445 (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
446}
447
440static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) 448static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
441{ 449{
442 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 450 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
@@ -473,6 +481,13 @@ static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
473 ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT))); 481 ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT)));
474} 482}
475 483
484static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
485{
486 sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
487 if (!sbp->sb_features2)
488 sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
489}
490
476/* 491/*
477 * end of superblock version macros 492 * end of superblock version macros
478 */ 493 */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 140386434aa3..e4ebddd3c500 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -889,7 +889,7 @@ shut_us_down:
889 889
890 tp->t_commit_lsn = commit_lsn; 890 tp->t_commit_lsn = commit_lsn;
891 if (nvec > XFS_TRANS_LOGVEC_COUNT) { 891 if (nvec > XFS_TRANS_LOGVEC_COUNT) {
892 kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t)); 892 kmem_free(log_vector);
893 } 893 }
894 894
895 /* 895 /*
@@ -1265,7 +1265,7 @@ xfs_trans_committed(
1265 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 1265 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
1266 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); 1266 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1267 next_licp = licp->lic_next; 1267 next_licp = licp->lic_next;
1268 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 1268 kmem_free(licp);
1269 licp = next_licp; 1269 licp = next_licp;
1270 } 1270 }
1271 1271
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 4c70bf5e9985..2a1c0f071f91 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -291,7 +291,7 @@ xfs_trans_inode_broot_debug(
291 iip = ip->i_itemp; 291 iip = ip->i_itemp;
292 if (iip->ili_root_size != 0) { 292 if (iip->ili_root_size != 0) {
293 ASSERT(iip->ili_orig_root != NULL); 293 ASSERT(iip->ili_orig_root != NULL);
294 kmem_free(iip->ili_orig_root, iip->ili_root_size); 294 kmem_free(iip->ili_orig_root);
295 iip->ili_root_size = 0; 295 iip->ili_root_size = 0;
296 iip->ili_orig_root = NULL; 296 iip->ili_orig_root = NULL;
297 } 297 }
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 66a09f0d894b..db5c83595526 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -161,7 +161,7 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
161 licpp = &((*licpp)->lic_next); 161 licpp = &((*licpp)->lic_next);
162 } 162 }
163 *licpp = licp->lic_next; 163 *licpp = licp->lic_next;
164 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 164 kmem_free(licp);
165 tp->t_items_free -= XFS_LIC_NUM_SLOTS; 165 tp->t_items_free -= XFS_LIC_NUM_SLOTS;
166 } 166 }
167} 167}
@@ -314,7 +314,7 @@ xfs_trans_free_items(
314 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 314 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
315 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); 315 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
316 next_licp = licp->lic_next; 316 next_licp = licp->lic_next;
317 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 317 kmem_free(licp);
318 licp = next_licp; 318 licp = next_licp;
319 } 319 }
320 320
@@ -363,7 +363,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
363 next_licp = licp->lic_next; 363 next_licp = licp->lic_next;
364 if (XFS_LIC_ARE_ALL_FREE(licp)) { 364 if (XFS_LIC_ARE_ALL_FREE(licp)) {
365 *licpp = next_licp; 365 *licpp = next_licp;
366 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 366 kmem_free(licp);
367 freed -= XFS_LIC_NUM_SLOTS; 367 freed -= XFS_LIC_NUM_SLOTS;
368 } else { 368 } else {
369 licpp = &(licp->lic_next); 369 licpp = &(licp->lic_next);
@@ -530,7 +530,7 @@ xfs_trans_free_busy(xfs_trans_t *tp)
530 lbcp = tp->t_busy.lbc_next; 530 lbcp = tp->t_busy.lbc_next;
531 while (lbcp != NULL) { 531 while (lbcp != NULL) {
532 lbcq = lbcp->lbc_next; 532 lbcq = lbcp->lbc_next;
533 kmem_free(lbcp, sizeof(xfs_log_busy_chunk_t)); 533 kmem_free(lbcp);
534 lbcp = lbcq; 534 lbcp = lbcq;
535 } 535 }
536 536
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 30bacd8bb0e5..4a9a43315a86 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -58,586 +58,6 @@
58#include "xfs_utils.h" 58#include "xfs_utils.h"
59 59
60 60
61int __init
62xfs_init(void)
63{
64#ifdef XFS_DABUF_DEBUG
65 extern spinlock_t xfs_dabuf_global_lock;
66 spin_lock_init(&xfs_dabuf_global_lock);
67#endif
68
69 /*
70 * Initialize all of the zone allocators we use.
71 */
72 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
73 "xfs_log_ticket");
74 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
75 "xfs_bmap_free_item");
76 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
77 "xfs_btree_cur");
78 xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
79 "xfs_da_state");
80 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
81 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
82 xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
83 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
84 xfs_mru_cache_init();
85 xfs_filestream_init();
86
87 /*
88 * The size of the zone allocated buf log item is the maximum
89 * size possible under XFS. This wastes a little bit of memory,
90 * but it is much faster.
91 */
92 xfs_buf_item_zone =
93 kmem_zone_init((sizeof(xfs_buf_log_item_t) +
94 (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
95 NBWORD) * sizeof(int))),
96 "xfs_buf_item");
97 xfs_efd_zone =
98 kmem_zone_init((sizeof(xfs_efd_log_item_t) +
99 ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
100 sizeof(xfs_extent_t))),
101 "xfs_efd_item");
102 xfs_efi_zone =
103 kmem_zone_init((sizeof(xfs_efi_log_item_t) +
104 ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
105 sizeof(xfs_extent_t))),
106 "xfs_efi_item");
107
108 /*
109 * These zones warrant special memory allocator hints
110 */
111 xfs_inode_zone =
112 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
113 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
114 KM_ZONE_SPREAD, NULL);
115 xfs_ili_zone =
116 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
117 KM_ZONE_SPREAD, NULL);
118
119 /*
120 * Allocate global trace buffers.
121 */
122#ifdef XFS_ALLOC_TRACE
123 xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_SLEEP);
124#endif
125#ifdef XFS_BMAP_TRACE
126 xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_SLEEP);
127#endif
128#ifdef XFS_BMBT_TRACE
129 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP);
130#endif
131#ifdef XFS_ATTR_TRACE
132 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP);
133#endif
134#ifdef XFS_DIR2_TRACE
135 xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_SLEEP);
136#endif
137
138 xfs_dir_startup();
139
140#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
141 xfs_error_test_init();
142#endif /* DEBUG || INDUCE_IO_ERROR */
143
144 xfs_init_procfs();
145 xfs_sysctl_register();
146 return 0;
147}
148
149void __exit
150xfs_cleanup(void)
151{
152 extern kmem_zone_t *xfs_inode_zone;
153 extern kmem_zone_t *xfs_efd_zone;
154 extern kmem_zone_t *xfs_efi_zone;
155
156 xfs_cleanup_procfs();
157 xfs_sysctl_unregister();
158 xfs_filestream_uninit();
159 xfs_mru_cache_uninit();
160 xfs_acl_zone_destroy(xfs_acl_zone);
161
162#ifdef XFS_DIR2_TRACE
163 ktrace_free(xfs_dir2_trace_buf);
164#endif
165#ifdef XFS_ATTR_TRACE
166 ktrace_free(xfs_attr_trace_buf);
167#endif
168#ifdef XFS_BMBT_TRACE
169 ktrace_free(xfs_bmbt_trace_buf);
170#endif
171#ifdef XFS_BMAP_TRACE
172 ktrace_free(xfs_bmap_trace_buf);
173#endif
174#ifdef XFS_ALLOC_TRACE
175 ktrace_free(xfs_alloc_trace_buf);
176#endif
177
178 kmem_zone_destroy(xfs_bmap_free_item_zone);
179 kmem_zone_destroy(xfs_btree_cur_zone);
180 kmem_zone_destroy(xfs_inode_zone);
181 kmem_zone_destroy(xfs_trans_zone);
182 kmem_zone_destroy(xfs_da_state_zone);
183 kmem_zone_destroy(xfs_dabuf_zone);
184 kmem_zone_destroy(xfs_buf_item_zone);
185 kmem_zone_destroy(xfs_efd_zone);
186 kmem_zone_destroy(xfs_efi_zone);
187 kmem_zone_destroy(xfs_ifork_zone);
188 kmem_zone_destroy(xfs_ili_zone);
189 kmem_zone_destroy(xfs_log_ticket_zone);
190}
191
192/*
193 * xfs_start_flags
194 *
195 * This function fills in xfs_mount_t fields based on mount args.
196 * Note: the superblock has _not_ yet been read in.
197 */
198STATIC int
199xfs_start_flags(
200 struct xfs_mount_args *ap,
201 struct xfs_mount *mp)
202{
203 /* Values are in BBs */
204 if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
205 /*
206 * At this point the superblock has not been read
207 * in, therefore we do not know the block size.
208 * Before the mount call ends we will convert
209 * these to FSBs.
210 */
211 mp->m_dalign = ap->sunit;
212 mp->m_swidth = ap->swidth;
213 }
214
215 if (ap->logbufs != -1 &&
216 ap->logbufs != 0 &&
217 (ap->logbufs < XLOG_MIN_ICLOGS ||
218 ap->logbufs > XLOG_MAX_ICLOGS)) {
219 cmn_err(CE_WARN,
220 "XFS: invalid logbufs value: %d [not %d-%d]",
221 ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
222 return XFS_ERROR(EINVAL);
223 }
224 mp->m_logbufs = ap->logbufs;
225 if (ap->logbufsize != -1 &&
226 ap->logbufsize != 0 &&
227 (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
228 ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
229 !is_power_of_2(ap->logbufsize))) {
230 cmn_err(CE_WARN,
231 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
232 ap->logbufsize);
233 return XFS_ERROR(EINVAL);
234 }
235 mp->m_logbsize = ap->logbufsize;
236 mp->m_fsname_len = strlen(ap->fsname) + 1;
237 mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
238 strcpy(mp->m_fsname, ap->fsname);
239 if (ap->rtname[0]) {
240 mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
241 strcpy(mp->m_rtname, ap->rtname);
242 }
243 if (ap->logname[0]) {
244 mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
245 strcpy(mp->m_logname, ap->logname);
246 }
247
248 if (ap->flags & XFSMNT_WSYNC)
249 mp->m_flags |= XFS_MOUNT_WSYNC;
250#if XFS_BIG_INUMS
251 if (ap->flags & XFSMNT_INO64) {
252 mp->m_flags |= XFS_MOUNT_INO64;
253 mp->m_inoadd = XFS_INO64_OFFSET;
254 }
255#endif
256 if (ap->flags & XFSMNT_RETERR)
257 mp->m_flags |= XFS_MOUNT_RETERR;
258 if (ap->flags & XFSMNT_NOALIGN)
259 mp->m_flags |= XFS_MOUNT_NOALIGN;
260 if (ap->flags & XFSMNT_SWALLOC)
261 mp->m_flags |= XFS_MOUNT_SWALLOC;
262 if (ap->flags & XFSMNT_OSYNCISOSYNC)
263 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
264 if (ap->flags & XFSMNT_32BITINODES)
265 mp->m_flags |= XFS_MOUNT_32BITINODES;
266
267 if (ap->flags & XFSMNT_IOSIZE) {
268 if (ap->iosizelog > XFS_MAX_IO_LOG ||
269 ap->iosizelog < XFS_MIN_IO_LOG) {
270 cmn_err(CE_WARN,
271 "XFS: invalid log iosize: %d [not %d-%d]",
272 ap->iosizelog, XFS_MIN_IO_LOG,
273 XFS_MAX_IO_LOG);
274 return XFS_ERROR(EINVAL);
275 }
276
277 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
278 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
279 }
280
281 if (ap->flags & XFSMNT_IKEEP)
282 mp->m_flags |= XFS_MOUNT_IKEEP;
283 if (ap->flags & XFSMNT_DIRSYNC)
284 mp->m_flags |= XFS_MOUNT_DIRSYNC;
285 if (ap->flags & XFSMNT_ATTR2)
286 mp->m_flags |= XFS_MOUNT_ATTR2;
287
288 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
289 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
290
291 /*
292 * no recovery flag requires a read-only mount
293 */
294 if (ap->flags & XFSMNT_NORECOVERY) {
295 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
296 cmn_err(CE_WARN,
297 "XFS: tried to mount a FS read-write without recovery!");
298 return XFS_ERROR(EINVAL);
299 }
300 mp->m_flags |= XFS_MOUNT_NORECOVERY;
301 }
302
303 if (ap->flags & XFSMNT_NOUUID)
304 mp->m_flags |= XFS_MOUNT_NOUUID;
305 if (ap->flags & XFSMNT_BARRIER)
306 mp->m_flags |= XFS_MOUNT_BARRIER;
307 else
308 mp->m_flags &= ~XFS_MOUNT_BARRIER;
309
310 if (ap->flags2 & XFSMNT2_FILESTREAMS)
311 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
312
313 if (ap->flags & XFSMNT_DMAPI)
314 mp->m_flags |= XFS_MOUNT_DMAPI;
315 return 0;
316}
317
318/*
319 * This function fills in xfs_mount_t fields based on mount args.
320 * Note: the superblock _has_ now been read in.
321 */
322STATIC int
323xfs_finish_flags(
324 struct xfs_mount_args *ap,
325 struct xfs_mount *mp)
326{
327 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
328
329 /* Fail a mount where the logbuf is smaller then the log stripe */
330 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
331 if ((ap->logbufsize <= 0) &&
332 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
333 mp->m_logbsize = mp->m_sb.sb_logsunit;
334 } else if (ap->logbufsize > 0 &&
335 ap->logbufsize < mp->m_sb.sb_logsunit) {
336 cmn_err(CE_WARN,
337 "XFS: logbuf size must be greater than or equal to log stripe size");
338 return XFS_ERROR(EINVAL);
339 }
340 } else {
341 /* Fail a mount if the logbuf is larger than 32K */
342 if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
343 cmn_err(CE_WARN,
344 "XFS: logbuf size for version 1 logs must be 16K or 32K");
345 return XFS_ERROR(EINVAL);
346 }
347 }
348
349 if (xfs_sb_version_hasattr2(&mp->m_sb))
350 mp->m_flags |= XFS_MOUNT_ATTR2;
351
352 /*
353 * prohibit r/w mounts of read-only filesystems
354 */
355 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
356 cmn_err(CE_WARN,
357 "XFS: cannot mount a read-only filesystem as read-write");
358 return XFS_ERROR(EROFS);
359 }
360
361 /*
362 * check for shared mount.
363 */
364 if (ap->flags & XFSMNT_SHARED) {
365 if (!xfs_sb_version_hasshared(&mp->m_sb))
366 return XFS_ERROR(EINVAL);
367
368 /*
369 * For IRIX 6.5, shared mounts must have the shared
370 * version bit set, have the persistent readonly
371 * field set, must be version 0 and can only be mounted
372 * read-only.
373 */
374 if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
375 (mp->m_sb.sb_shared_vn != 0))
376 return XFS_ERROR(EINVAL);
377
378 mp->m_flags |= XFS_MOUNT_SHARED;
379
380 /*
381 * Shared XFS V0 can't deal with DMI. Return EINVAL.
382 */
383 if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
384 return XFS_ERROR(EINVAL);
385 }
386
387 if (ap->flags & XFSMNT_UQUOTA) {
388 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
389 if (ap->flags & XFSMNT_UQUOTAENF)
390 mp->m_qflags |= XFS_UQUOTA_ENFD;
391 }
392
393 if (ap->flags & XFSMNT_GQUOTA) {
394 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
395 if (ap->flags & XFSMNT_GQUOTAENF)
396 mp->m_qflags |= XFS_OQUOTA_ENFD;
397 } else if (ap->flags & XFSMNT_PQUOTA) {
398 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
399 if (ap->flags & XFSMNT_PQUOTAENF)
400 mp->m_qflags |= XFS_OQUOTA_ENFD;
401 }
402
403 return 0;
404}
405
406/*
407 * xfs_mount
408 *
409 * The file system configurations are:
410 * (1) device (partition) with data and internal log
411 * (2) logical volume with data and log subvolumes.
412 * (3) logical volume with data, log, and realtime subvolumes.
413 *
414 * We only have to handle opening the log and realtime volumes here if
415 * they are present. The data subvolume has already been opened by
416 * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev.
417 */
418int
419xfs_mount(
420 struct xfs_mount *mp,
421 struct xfs_mount_args *args,
422 cred_t *credp)
423{
424 struct block_device *ddev, *logdev, *rtdev;
425 int flags = 0, error;
426
427 ddev = mp->m_super->s_bdev;
428 logdev = rtdev = NULL;
429
430 error = xfs_dmops_get(mp, args);
431 if (error)
432 return error;
433 error = xfs_qmops_get(mp, args);
434 if (error)
435 return error;
436
437 if (args->flags & XFSMNT_QUIET)
438 flags |= XFS_MFSI_QUIET;
439
440 /*
441 * Open real time and log devices - order is important.
442 */
443 if (args->logname[0]) {
444 error = xfs_blkdev_get(mp, args->logname, &logdev);
445 if (error)
446 return error;
447 }
448 if (args->rtname[0]) {
449 error = xfs_blkdev_get(mp, args->rtname, &rtdev);
450 if (error) {
451 xfs_blkdev_put(logdev);
452 return error;
453 }
454
455 if (rtdev == ddev || rtdev == logdev) {
456 cmn_err(CE_WARN,
457 "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
458 xfs_blkdev_put(logdev);
459 xfs_blkdev_put(rtdev);
460 return EINVAL;
461 }
462 }
463
464 /*
465 * Setup xfs_mount buffer target pointers
466 */
467 error = ENOMEM;
468 mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
469 if (!mp->m_ddev_targp) {
470 xfs_blkdev_put(logdev);
471 xfs_blkdev_put(rtdev);
472 return error;
473 }
474 if (rtdev) {
475 mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
476 if (!mp->m_rtdev_targp) {
477 xfs_blkdev_put(logdev);
478 xfs_blkdev_put(rtdev);
479 goto error0;
480 }
481 }
482 mp->m_logdev_targp = (logdev && logdev != ddev) ?
483 xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
484 if (!mp->m_logdev_targp) {
485 xfs_blkdev_put(logdev);
486 xfs_blkdev_put(rtdev);
487 goto error0;
488 }
489
490 /*
491 * Setup flags based on mount(2) options and then the superblock
492 */
493 error = xfs_start_flags(args, mp);
494 if (error)
495 goto error1;
496 error = xfs_readsb(mp, flags);
497 if (error)
498 goto error1;
499 error = xfs_finish_flags(args, mp);
500 if (error)
501 goto error2;
502
503 /*
504 * Setup xfs_mount buffer target pointers based on superblock
505 */
506 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
507 mp->m_sb.sb_sectsize);
508 if (!error && logdev && logdev != ddev) {
509 unsigned int log_sector_size = BBSIZE;
510
511 if (xfs_sb_version_hassector(&mp->m_sb))
512 log_sector_size = mp->m_sb.sb_logsectsize;
513 error = xfs_setsize_buftarg(mp->m_logdev_targp,
514 mp->m_sb.sb_blocksize,
515 log_sector_size);
516 }
517 if (!error && rtdev)
518 error = xfs_setsize_buftarg(mp->m_rtdev_targp,
519 mp->m_sb.sb_blocksize,
520 mp->m_sb.sb_sectsize);
521 if (error)
522 goto error2;
523
524 if (mp->m_flags & XFS_MOUNT_BARRIER)
525 xfs_mountfs_check_barriers(mp);
526
527 if ((error = xfs_filestream_mount(mp)))
528 goto error2;
529
530 error = xfs_mountfs(mp, flags);
531 if (error)
532 goto error2;
533
534 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
535
536 return 0;
537
538error2:
539 if (mp->m_sb_bp)
540 xfs_freesb(mp);
541error1:
542 xfs_binval(mp->m_ddev_targp);
543 if (logdev && logdev != ddev)
544 xfs_binval(mp->m_logdev_targp);
545 if (rtdev)
546 xfs_binval(mp->m_rtdev_targp);
547error0:
548 xfs_unmountfs_close(mp, credp);
549 xfs_qmops_put(mp);
550 xfs_dmops_put(mp);
551 return error;
552}
553
554int
555xfs_unmount(
556 xfs_mount_t *mp,
557 int flags,
558 cred_t *credp)
559{
560 xfs_inode_t *rip;
561 bhv_vnode_t *rvp;
562 int unmount_event_wanted = 0;
563 int unmount_event_flags = 0;
564 int xfs_unmountfs_needed = 0;
565 int error;
566
567 rip = mp->m_rootip;
568 rvp = XFS_ITOV(rip);
569
570#ifdef HAVE_DMAPI
571 if (mp->m_flags & XFS_MOUNT_DMAPI) {
572 error = XFS_SEND_PREUNMOUNT(mp,
573 rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
574 NULL, NULL, 0, 0,
575 (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
576 0:DM_FLAGS_UNWANTED);
577 if (error)
578 return XFS_ERROR(error);
579 unmount_event_wanted = 1;
580 unmount_event_flags = (mp->m_dmevmask & (1<<DM_EVENT_UNMOUNT))?
581 0 : DM_FLAGS_UNWANTED;
582 }
583#endif
584
585 /*
586 * Blow away any referenced inode in the filestreams cache.
587 * This can and will cause log traffic as inodes go inactive
588 * here.
589 */
590 xfs_filestream_unmount(mp);
591
592 XFS_bflush(mp->m_ddev_targp);
593 error = xfs_unmount_flush(mp, 0);
594 if (error)
595 goto out;
596
597 ASSERT(vn_count(rvp) == 1);
598
599 /*
600 * Drop the reference count
601 */
602 IRELE(rip);
603
604 /*
605 * If we're forcing a shutdown, typically because of a media error,
606 * we want to make sure we invalidate dirty pages that belong to
607 * referenced vnodes as well.
608 */
609 if (XFS_FORCED_SHUTDOWN(mp)) {
610 error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
611 ASSERT(error != EFSCORRUPTED);
612 }
613 xfs_unmountfs_needed = 1;
614
615out:
616 /* Send DMAPI event, if required.
617 * Then do xfs_unmountfs() if needed.
618 * Then return error (or zero).
619 */
620 if (unmount_event_wanted) {
621 /* Note: mp structure must still exist for
622 * XFS_SEND_UNMOUNT() call.
623 */
624 XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL,
625 DM_RIGHT_NULL, 0, error, unmount_event_flags);
626 }
627 if (xfs_unmountfs_needed) {
628 /*
629 * Call common unmount function to flush to disk
630 * and free the super block buffer & mount structures.
631 */
632 xfs_unmountfs(mp, credp);
633 xfs_qmops_put(mp);
634 xfs_dmops_put(mp);
635 kmem_free(mp, sizeof(xfs_mount_t));
636 }
637
638 return XFS_ERROR(error);
639}
640
641STATIC void 61STATIC void
642xfs_quiesce_fs( 62xfs_quiesce_fs(
643 xfs_mount_t *mp) 63 xfs_mount_t *mp)
@@ -694,30 +114,6 @@ xfs_attr_quiesce(
694 xfs_unmountfs_writesb(mp); 114 xfs_unmountfs_writesb(mp);
695} 115}
696 116
697int
698xfs_mntupdate(
699 struct xfs_mount *mp,
700 int *flags,
701 struct xfs_mount_args *args)
702{
703 if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */
704 if (mp->m_flags & XFS_MOUNT_RDONLY)
705 mp->m_flags &= ~XFS_MOUNT_RDONLY;
706 if (args->flags & XFSMNT_BARRIER) {
707 mp->m_flags |= XFS_MOUNT_BARRIER;
708 xfs_mountfs_check_barriers(mp);
709 } else {
710 mp->m_flags &= ~XFS_MOUNT_BARRIER;
711 }
712 } else if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { /* rw -> ro */
713 xfs_filestream_flush(mp);
714 xfs_sync(mp, SYNC_DATA_QUIESCE);
715 xfs_attr_quiesce(mp);
716 mp->m_flags |= XFS_MOUNT_RDONLY;
717 }
718 return 0;
719}
720
721/* 117/*
722 * xfs_unmount_flush implements a set of flush operation on special 118 * xfs_unmount_flush implements a set of flush operation on special
723 * inodes, which are needed as a separate set of operations so that 119 * inodes, which are needed as a separate set of operations so that
@@ -1048,7 +444,7 @@ xfs_sync_inodes(
1048 444
1049 if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { 445 if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
1050 XFS_MOUNT_IUNLOCK(mp); 446 XFS_MOUNT_IUNLOCK(mp);
1051 kmem_free(ipointer, sizeof(xfs_iptr_t)); 447 kmem_free(ipointer);
1052 return 0; 448 return 0;
1053 } 449 }
1054 450
@@ -1194,7 +590,7 @@ xfs_sync_inodes(
1194 } 590 }
1195 XFS_MOUNT_IUNLOCK(mp); 591 XFS_MOUNT_IUNLOCK(mp);
1196 ASSERT(ipointer_in == B_FALSE); 592 ASSERT(ipointer_in == B_FALSE);
1197 kmem_free(ipointer, sizeof(xfs_iptr_t)); 593 kmem_free(ipointer);
1198 return XFS_ERROR(error); 594 return XFS_ERROR(error);
1199 } 595 }
1200 596
@@ -1224,7 +620,7 @@ xfs_sync_inodes(
1224 620
1225 ASSERT(ipointer_in == B_FALSE); 621 ASSERT(ipointer_in == B_FALSE);
1226 622
1227 kmem_free(ipointer, sizeof(xfs_iptr_t)); 623 kmem_free(ipointer);
1228 return XFS_ERROR(last_error); 624 return XFS_ERROR(last_error);
1229} 625}
1230 626
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index 1688817c55ed..a74b05087da4 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -8,11 +8,6 @@ struct kstatfs;
8struct xfs_mount; 8struct xfs_mount;
9struct xfs_mount_args; 9struct xfs_mount_args;
10 10
11int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args,
12 struct cred *credp);
13int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp);
14int xfs_mntupdate(struct xfs_mount *mp, int *flags,
15 struct xfs_mount_args *args);
16int xfs_sync(struct xfs_mount *mp, int flags); 11int xfs_sync(struct xfs_mount *mp, int flags);
17void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, 12void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
18 int lnnum); 13 int lnnum);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e475e3717eb3..76a1166af822 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -75,26 +75,23 @@ xfs_open(
75 return 0; 75 return 0;
76} 76}
77 77
78/*
79 * xfs_setattr
80 */
81int 78int
82xfs_setattr( 79xfs_setattr(
83 xfs_inode_t *ip, 80 struct xfs_inode *ip,
84 bhv_vattr_t *vap, 81 struct iattr *iattr,
85 int flags, 82 int flags,
86 cred_t *credp) 83 cred_t *credp)
87{ 84{
88 xfs_mount_t *mp = ip->i_mount; 85 xfs_mount_t *mp = ip->i_mount;
86 struct inode *inode = XFS_ITOV(ip);
87 int mask = iattr->ia_valid;
89 xfs_trans_t *tp; 88 xfs_trans_t *tp;
90 int mask;
91 int code; 89 int code;
92 uint lock_flags; 90 uint lock_flags;
93 uint commit_flags=0; 91 uint commit_flags=0;
94 uid_t uid=0, iuid=0; 92 uid_t uid=0, iuid=0;
95 gid_t gid=0, igid=0; 93 gid_t gid=0, igid=0;
96 int timeflags = 0; 94 int timeflags = 0;
97 xfs_prid_t projid=0, iprojid=0;
98 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 95 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
99 int file_owner; 96 int file_owner;
100 int need_iolock = 1; 97 int need_iolock = 1;
@@ -104,30 +101,9 @@ xfs_setattr(
104 if (mp->m_flags & XFS_MOUNT_RDONLY) 101 if (mp->m_flags & XFS_MOUNT_RDONLY)
105 return XFS_ERROR(EROFS); 102 return XFS_ERROR(EROFS);
106 103
107 /*
108 * Cannot set certain attributes.
109 */
110 mask = vap->va_mask;
111 if (mask & XFS_AT_NOSET) {
112 return XFS_ERROR(EINVAL);
113 }
114
115 if (XFS_FORCED_SHUTDOWN(mp)) 104 if (XFS_FORCED_SHUTDOWN(mp))
116 return XFS_ERROR(EIO); 105 return XFS_ERROR(EIO);
117 106
118 /*
119 * Timestamps do not need to be logged and hence do not
120 * need to be done within a transaction.
121 */
122 if (mask & XFS_AT_UPDTIMES) {
123 ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
124 timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) |
125 ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) |
126 ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
127 xfs_ichgtime(ip, timeflags);
128 return 0;
129 }
130
131 olddquot1 = olddquot2 = NULL; 107 olddquot1 = olddquot2 = NULL;
132 udqp = gdqp = NULL; 108 udqp = gdqp = NULL;
133 109
@@ -139,28 +115,22 @@ xfs_setattr(
139 * If the IDs do change before we take the ilock, we're covered 115 * If the IDs do change before we take the ilock, we're covered
140 * because the i_*dquot fields will get updated anyway. 116 * because the i_*dquot fields will get updated anyway.
141 */ 117 */
142 if (XFS_IS_QUOTA_ON(mp) && 118 if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
143 (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
144 uint qflags = 0; 119 uint qflags = 0;
145 120
146 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { 121 if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
147 uid = vap->va_uid; 122 uid = iattr->ia_uid;
148 qflags |= XFS_QMOPT_UQUOTA; 123 qflags |= XFS_QMOPT_UQUOTA;
149 } else { 124 } else {
150 uid = ip->i_d.di_uid; 125 uid = ip->i_d.di_uid;
151 } 126 }
152 if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { 127 if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
153 gid = vap->va_gid; 128 gid = iattr->ia_gid;
154 qflags |= XFS_QMOPT_GQUOTA; 129 qflags |= XFS_QMOPT_GQUOTA;
155 } else { 130 } else {
156 gid = ip->i_d.di_gid; 131 gid = ip->i_d.di_gid;
157 } 132 }
158 if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { 133
159 projid = vap->va_projid;
160 qflags |= XFS_QMOPT_PQUOTA;
161 } else {
162 projid = ip->i_d.di_projid;
163 }
164 /* 134 /*
165 * We take a reference when we initialize udqp and gdqp, 135 * We take a reference when we initialize udqp and gdqp,
166 * so it is important that we never blindly double trip on 136 * so it is important that we never blindly double trip on
@@ -168,8 +138,8 @@ xfs_setattr(
168 */ 138 */
169 ASSERT(udqp == NULL); 139 ASSERT(udqp == NULL);
170 ASSERT(gdqp == NULL); 140 ASSERT(gdqp == NULL);
171 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, 141 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
172 &udqp, &gdqp); 142 qflags, &udqp, &gdqp);
173 if (code) 143 if (code)
174 return code; 144 return code;
175 } 145 }
@@ -180,10 +150,10 @@ xfs_setattr(
180 */ 150 */
181 tp = NULL; 151 tp = NULL;
182 lock_flags = XFS_ILOCK_EXCL; 152 lock_flags = XFS_ILOCK_EXCL;
183 if (flags & ATTR_NOLOCK) 153 if (flags & XFS_ATTR_NOLOCK)
184 need_iolock = 0; 154 need_iolock = 0;
185 if (!(mask & XFS_AT_SIZE)) { 155 if (!(mask & ATTR_SIZE)) {
186 if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || 156 if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) ||
187 (mp->m_flags & XFS_MOUNT_WSYNC)) { 157 (mp->m_flags & XFS_MOUNT_WSYNC)) {
188 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 158 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
189 commit_flags = 0; 159 commit_flags = 0;
@@ -196,10 +166,10 @@ xfs_setattr(
196 } 166 }
197 } else { 167 } else {
198 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 168 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
199 !(flags & ATTR_DMI)) { 169 !(flags & XFS_ATTR_DMI)) {
200 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; 170 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
201 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip, 171 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
202 vap->va_size, 0, dmflags, NULL); 172 iattr->ia_size, 0, dmflags, NULL);
203 if (code) { 173 if (code) {
204 lock_flags = 0; 174 lock_flags = 0;
205 goto error_return; 175 goto error_return;
@@ -219,9 +189,7 @@ xfs_setattr(
219 * Only the owner or users with CAP_FOWNER 189 * Only the owner or users with CAP_FOWNER
220 * capability may do these things. 190 * capability may do these things.
221 */ 191 */
222 if (mask & 192 if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) {
223 (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
224 XFS_AT_GID|XFS_AT_PROJID)) {
225 /* 193 /*
226 * CAP_FOWNER overrides the following restrictions: 194 * CAP_FOWNER overrides the following restrictions:
227 * 195 *
@@ -245,21 +213,21 @@ xfs_setattr(
245 * IDs of the calling process shall match the group owner of 213 * IDs of the calling process shall match the group owner of
246 * the file when setting the set-group-ID bit on that file 214 * the file when setting the set-group-ID bit on that file
247 */ 215 */
248 if (mask & XFS_AT_MODE) { 216 if (mask & ATTR_MODE) {
249 mode_t m = 0; 217 mode_t m = 0;
250 218
251 if ((vap->va_mode & S_ISUID) && !file_owner) 219 if ((iattr->ia_mode & S_ISUID) && !file_owner)
252 m |= S_ISUID; 220 m |= S_ISUID;
253 if ((vap->va_mode & S_ISGID) && 221 if ((iattr->ia_mode & S_ISGID) &&
254 !in_group_p((gid_t)ip->i_d.di_gid)) 222 !in_group_p((gid_t)ip->i_d.di_gid))
255 m |= S_ISGID; 223 m |= S_ISGID;
256#if 0 224#if 0
257 /* Linux allows this, Irix doesn't. */ 225 /* Linux allows this, Irix doesn't. */
258 if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode)) 226 if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
259 m |= S_ISVTX; 227 m |= S_ISVTX;
260#endif 228#endif
261 if (m && !capable(CAP_FSETID)) 229 if (m && !capable(CAP_FSETID))
262 vap->va_mode &= ~m; 230 iattr->ia_mode &= ~m;
263 } 231 }
264 } 232 }
265 233
@@ -270,7 +238,7 @@ xfs_setattr(
270 * and can change the group id only to a group of which he 238 * and can change the group id only to a group of which he
271 * or she is a member. 239 * or she is a member.
272 */ 240 */
273 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 241 if (mask & (ATTR_UID|ATTR_GID)) {
274 /* 242 /*
275 * These IDs could have changed since we last looked at them. 243 * These IDs could have changed since we last looked at them.
276 * But, we're assured that if the ownership did change 244 * But, we're assured that if the ownership did change
@@ -278,12 +246,9 @@ xfs_setattr(
278 * would have changed also. 246 * would have changed also.
279 */ 247 */
280 iuid = ip->i_d.di_uid; 248 iuid = ip->i_d.di_uid;
281 iprojid = ip->i_d.di_projid;
282 igid = ip->i_d.di_gid; 249 igid = ip->i_d.di_gid;
283 gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; 250 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
284 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; 251 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
285 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
286 iprojid;
287 252
288 /* 253 /*
289 * CAP_CHOWN overrides the following restrictions: 254 * CAP_CHOWN overrides the following restrictions:
@@ -303,11 +268,10 @@ xfs_setattr(
303 goto error_return; 268 goto error_return;
304 } 269 }
305 /* 270 /*
306 * Do a quota reservation only if uid/projid/gid is actually 271 * Do a quota reservation only if uid/gid is actually
307 * going to change. 272 * going to change.
308 */ 273 */
309 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 274 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
310 (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
311 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { 275 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
312 ASSERT(tp); 276 ASSERT(tp);
313 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, 277 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
@@ -321,13 +285,13 @@ xfs_setattr(
321 /* 285 /*
322 * Truncate file. Must have write permission and not be a directory. 286 * Truncate file. Must have write permission and not be a directory.
323 */ 287 */
324 if (mask & XFS_AT_SIZE) { 288 if (mask & ATTR_SIZE) {
325 /* Short circuit the truncate case for zero length files */ 289 /* Short circuit the truncate case for zero length files */
326 if ((vap->va_size == 0) && 290 if (iattr->ia_size == 0 &&
327 (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) { 291 ip->i_size == 0 && ip->i_d.di_nextents == 0) {
328 xfs_iunlock(ip, XFS_ILOCK_EXCL); 292 xfs_iunlock(ip, XFS_ILOCK_EXCL);
329 lock_flags &= ~XFS_ILOCK_EXCL; 293 lock_flags &= ~XFS_ILOCK_EXCL;
330 if (mask & XFS_AT_CTIME) 294 if (mask & ATTR_CTIME)
331 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 295 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
332 code = 0; 296 code = 0;
333 goto error_return; 297 goto error_return;
@@ -350,9 +314,9 @@ xfs_setattr(
350 /* 314 /*
351 * Change file access or modified times. 315 * Change file access or modified times.
352 */ 316 */
353 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 317 if (mask & (ATTR_ATIME|ATTR_MTIME)) {
354 if (!file_owner) { 318 if (!file_owner) {
355 if ((flags & ATTR_UTIME) && 319 if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) &&
356 !capable(CAP_FOWNER)) { 320 !capable(CAP_FOWNER)) {
357 code = XFS_ERROR(EPERM); 321 code = XFS_ERROR(EPERM);
358 goto error_return; 322 goto error_return;
@@ -361,90 +325,23 @@ xfs_setattr(
361 } 325 }
362 326
363 /* 327 /*
364 * Change extent size or realtime flag.
365 */
366 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
367 /*
368 * Can't change extent size if any extents are allocated.
369 */
370 if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) &&
371 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
372 vap->va_extsize) ) {
373 code = XFS_ERROR(EINVAL); /* EFBIG? */
374 goto error_return;
375 }
376
377 /*
378 * Can't change realtime flag if any extents are allocated.
379 */
380 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
381 (mask & XFS_AT_XFLAGS) &&
382 (XFS_IS_REALTIME_INODE(ip)) !=
383 (vap->va_xflags & XFS_XFLAG_REALTIME)) {
384 code = XFS_ERROR(EINVAL); /* EFBIG? */
385 goto error_return;
386 }
387 /*
388 * Extent size must be a multiple of the appropriate block
389 * size, if set at all.
390 */
391 if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
392 xfs_extlen_t size;
393
394 if (XFS_IS_REALTIME_INODE(ip) ||
395 ((mask & XFS_AT_XFLAGS) &&
396 (vap->va_xflags & XFS_XFLAG_REALTIME))) {
397 size = mp->m_sb.sb_rextsize <<
398 mp->m_sb.sb_blocklog;
399 } else {
400 size = mp->m_sb.sb_blocksize;
401 }
402 if (vap->va_extsize % size) {
403 code = XFS_ERROR(EINVAL);
404 goto error_return;
405 }
406 }
407 /*
408 * If realtime flag is set then must have realtime data.
409 */
410 if ((mask & XFS_AT_XFLAGS) &&
411 (vap->va_xflags & XFS_XFLAG_REALTIME)) {
412 if ((mp->m_sb.sb_rblocks == 0) ||
413 (mp->m_sb.sb_rextsize == 0) ||
414 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
415 code = XFS_ERROR(EINVAL);
416 goto error_return;
417 }
418 }
419
420 /*
421 * Can't modify an immutable/append-only file unless
422 * we have appropriate permission.
423 */
424 if ((mask & XFS_AT_XFLAGS) &&
425 (ip->i_d.di_flags &
426 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
427 (vap->va_xflags &
428 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
429 !capable(CAP_LINUX_IMMUTABLE)) {
430 code = XFS_ERROR(EPERM);
431 goto error_return;
432 }
433 }
434
435 /*
436 * Now we can make the changes. Before we join the inode 328 * Now we can make the changes. Before we join the inode
437 * to the transaction, if XFS_AT_SIZE is set then take care of 329 * to the transaction, if ATTR_SIZE is set then take care of
438 * the part of the truncation that must be done without the 330 * the part of the truncation that must be done without the
439 * inode lock. This needs to be done before joining the inode 331 * inode lock. This needs to be done before joining the inode
440 * to the transaction, because the inode cannot be unlocked 332 * to the transaction, because the inode cannot be unlocked
441 * once it is a part of the transaction. 333 * once it is a part of the transaction.
442 */ 334 */
443 if (mask & XFS_AT_SIZE) { 335 if (mask & ATTR_SIZE) {
444 code = 0; 336 code = 0;
445 if ((vap->va_size > ip->i_size) && 337 if (iattr->ia_size > ip->i_size) {
446 (flags & ATTR_NOSIZETOK) == 0) { 338 /*
447 code = xfs_igrow_start(ip, vap->va_size, credp); 339 * Do the first part of growing a file: zero any data
340 * in the last block that is beyond the old EOF. We
341 * need to do this before the inode is joined to the
342 * transaction to modify the i_size.
343 */
344 code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
448 } 345 }
449 xfs_iunlock(ip, XFS_ILOCK_EXCL); 346 xfs_iunlock(ip, XFS_ILOCK_EXCL);
450 347
@@ -461,10 +358,10 @@ xfs_setattr(
461 * not within the range we care about here. 358 * not within the range we care about here.
462 */ 359 */
463 if (!code && 360 if (!code &&
464 (ip->i_size != ip->i_d.di_size) && 361 ip->i_size != ip->i_d.di_size &&
465 (vap->va_size > ip->i_d.di_size)) { 362 iattr->ia_size > ip->i_d.di_size) {
466 code = xfs_flush_pages(ip, 363 code = xfs_flush_pages(ip,
467 ip->i_d.di_size, vap->va_size, 364 ip->i_d.di_size, iattr->ia_size,
468 XFS_B_ASYNC, FI_NONE); 365 XFS_B_ASYNC, FI_NONE);
469 } 366 }
470 367
@@ -472,7 +369,7 @@ xfs_setattr(
472 vn_iowait(ip); 369 vn_iowait(ip);
473 370
474 if (!code) 371 if (!code)
475 code = xfs_itruncate_data(ip, vap->va_size); 372 code = xfs_itruncate_data(ip, iattr->ia_size);
476 if (code) { 373 if (code) {
477 ASSERT(tp == NULL); 374 ASSERT(tp == NULL);
478 lock_flags &= ~XFS_ILOCK_EXCL; 375 lock_flags &= ~XFS_ILOCK_EXCL;
@@ -501,28 +398,30 @@ xfs_setattr(
501 /* 398 /*
502 * Truncate file. Must have write permission and not be a directory. 399 * Truncate file. Must have write permission and not be a directory.
503 */ 400 */
504 if (mask & XFS_AT_SIZE) { 401 if (mask & ATTR_SIZE) {
505 /* 402 /*
506 * Only change the c/mtime if we are changing the size 403 * Only change the c/mtime if we are changing the size
507 * or we are explicitly asked to change it. This handles 404 * or we are explicitly asked to change it. This handles
508 * the semantic difference between truncate() and ftruncate() 405 * the semantic difference between truncate() and ftruncate()
509 * as implemented in the VFS. 406 * as implemented in the VFS.
510 */ 407 */
511 if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME)) 408 if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME))
512 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 409 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
513 410
514 if (vap->va_size > ip->i_size) { 411 if (iattr->ia_size > ip->i_size) {
515 xfs_igrow_finish(tp, ip, vap->va_size, 412 ip->i_d.di_size = iattr->ia_size;
516 !(flags & ATTR_DMI)); 413 ip->i_size = iattr->ia_size;
517 } else if ((vap->va_size <= ip->i_size) || 414 if (!(flags & XFS_ATTR_DMI))
518 ((vap->va_size == 0) && ip->i_d.di_nextents)) { 415 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
416 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
417 } else if (iattr->ia_size <= ip->i_size ||
418 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
519 /* 419 /*
520 * signal a sync transaction unless 420 * signal a sync transaction unless
521 * we're truncating an already unlinked 421 * we're truncating an already unlinked
522 * file on a wsync filesystem 422 * file on a wsync filesystem
523 */ 423 */
524 code = xfs_itruncate_finish(&tp, ip, 424 code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
525 (xfs_fsize_t)vap->va_size,
526 XFS_DATA_FORK, 425 XFS_DATA_FORK,
527 ((ip->i_d.di_nlink != 0 || 426 ((ip->i_d.di_nlink != 0 ||
528 !(mp->m_flags & XFS_MOUNT_WSYNC)) 427 !(mp->m_flags & XFS_MOUNT_WSYNC))
@@ -544,9 +443,12 @@ xfs_setattr(
544 /* 443 /*
545 * Change file access modes. 444 * Change file access modes.
546 */ 445 */
547 if (mask & XFS_AT_MODE) { 446 if (mask & ATTR_MODE) {
548 ip->i_d.di_mode &= S_IFMT; 447 ip->i_d.di_mode &= S_IFMT;
549 ip->i_d.di_mode |= vap->va_mode & ~S_IFMT; 448 ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT;
449
450 inode->i_mode &= S_IFMT;
451 inode->i_mode |= iattr->ia_mode & ~S_IFMT;
550 452
551 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 453 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
552 timeflags |= XFS_ICHGTIME_CHG; 454 timeflags |= XFS_ICHGTIME_CHG;
@@ -559,7 +461,7 @@ xfs_setattr(
559 * and can change the group id only to a group of which he 461 * and can change the group id only to a group of which he
560 * or she is a member. 462 * or she is a member.
561 */ 463 */
562 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 464 if (mask & (ATTR_UID|ATTR_GID)) {
563 /* 465 /*
564 * CAP_FSETID overrides the following restrictions: 466 * CAP_FSETID overrides the following restrictions:
565 * 467 *
@@ -577,39 +479,24 @@ xfs_setattr(
577 */ 479 */
578 if (iuid != uid) { 480 if (iuid != uid) {
579 if (XFS_IS_UQUOTA_ON(mp)) { 481 if (XFS_IS_UQUOTA_ON(mp)) {
580 ASSERT(mask & XFS_AT_UID); 482 ASSERT(mask & ATTR_UID);
581 ASSERT(udqp); 483 ASSERT(udqp);
582 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 484 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
583 &ip->i_udquot, udqp); 485 &ip->i_udquot, udqp);
584 } 486 }
585 ip->i_d.di_uid = uid; 487 ip->i_d.di_uid = uid;
488 inode->i_uid = uid;
586 } 489 }
587 if (igid != gid) { 490 if (igid != gid) {
588 if (XFS_IS_GQUOTA_ON(mp)) { 491 if (XFS_IS_GQUOTA_ON(mp)) {
589 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 492 ASSERT(!XFS_IS_PQUOTA_ON(mp));
590 ASSERT(mask & XFS_AT_GID); 493 ASSERT(mask & ATTR_GID);
591 ASSERT(gdqp); 494 ASSERT(gdqp);
592 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 495 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
593 &ip->i_gdquot, gdqp); 496 &ip->i_gdquot, gdqp);
594 } 497 }
595 ip->i_d.di_gid = gid; 498 ip->i_d.di_gid = gid;
596 } 499 inode->i_gid = gid;
597 if (iprojid != projid) {
598 if (XFS_IS_PQUOTA_ON(mp)) {
599 ASSERT(!XFS_IS_GQUOTA_ON(mp));
600 ASSERT(mask & XFS_AT_PROJID);
601 ASSERT(gdqp);
602 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
603 &ip->i_gdquot, gdqp);
604 }
605 ip->i_d.di_projid = projid;
606 /*
607 * We may have to rev the inode as well as
608 * the superblock version number since projids didn't
609 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
610 */
611 if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
612 xfs_bump_ino_vers2(tp, ip);
613 } 500 }
614 501
615 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 502 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
@@ -620,82 +507,34 @@ xfs_setattr(
620 /* 507 /*
621 * Change file access or modified times. 508 * Change file access or modified times.
622 */ 509 */
623 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 510 if (mask & (ATTR_ATIME|ATTR_MTIME)) {
624 if (mask & XFS_AT_ATIME) { 511 if (mask & ATTR_ATIME) {
625 ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec; 512 inode->i_atime = iattr->ia_atime;
626 ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec; 513 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
514 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
627 ip->i_update_core = 1; 515 ip->i_update_core = 1;
628 timeflags &= ~XFS_ICHGTIME_ACC; 516 timeflags &= ~XFS_ICHGTIME_ACC;
629 } 517 }
630 if (mask & XFS_AT_MTIME) { 518 if (mask & ATTR_MTIME) {
631 ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec; 519 inode->i_mtime = iattr->ia_mtime;
632 ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec; 520 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
521 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
633 timeflags &= ~XFS_ICHGTIME_MOD; 522 timeflags &= ~XFS_ICHGTIME_MOD;
634 timeflags |= XFS_ICHGTIME_CHG; 523 timeflags |= XFS_ICHGTIME_CHG;
635 } 524 }
636 if (tp && (flags & ATTR_UTIME)) 525 if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
637 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 526 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
638 } 527 }
639 528
640 /* 529 /*
641 * Change XFS-added attributes. 530 * Change file inode change time only if ATTR_CTIME set
642 */
643 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
644 if (mask & XFS_AT_EXTSIZE) {
645 /*
646 * Converting bytes to fs blocks.
647 */
648 ip->i_d.di_extsize = vap->va_extsize >>
649 mp->m_sb.sb_blocklog;
650 }
651 if (mask & XFS_AT_XFLAGS) {
652 uint di_flags;
653
654 /* can't set PREALLOC this way, just preserve it */
655 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
656 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
657 di_flags |= XFS_DIFLAG_IMMUTABLE;
658 if (vap->va_xflags & XFS_XFLAG_APPEND)
659 di_flags |= XFS_DIFLAG_APPEND;
660 if (vap->va_xflags & XFS_XFLAG_SYNC)
661 di_flags |= XFS_DIFLAG_SYNC;
662 if (vap->va_xflags & XFS_XFLAG_NOATIME)
663 di_flags |= XFS_DIFLAG_NOATIME;
664 if (vap->va_xflags & XFS_XFLAG_NODUMP)
665 di_flags |= XFS_DIFLAG_NODUMP;
666 if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
667 di_flags |= XFS_DIFLAG_PROJINHERIT;
668 if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
669 di_flags |= XFS_DIFLAG_NODEFRAG;
670 if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
671 di_flags |= XFS_DIFLAG_FILESTREAM;
672 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
673 if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
674 di_flags |= XFS_DIFLAG_RTINHERIT;
675 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
676 di_flags |= XFS_DIFLAG_NOSYMLINKS;
677 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
678 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
679 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
680 if (vap->va_xflags & XFS_XFLAG_REALTIME)
681 di_flags |= XFS_DIFLAG_REALTIME;
682 if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
683 di_flags |= XFS_DIFLAG_EXTSIZE;
684 }
685 ip->i_d.di_flags = di_flags;
686 }
687 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
688 timeflags |= XFS_ICHGTIME_CHG;
689 }
690
691 /*
692 * Change file inode change time only if XFS_AT_CTIME set
693 * AND we have been called by a DMI function. 531 * AND we have been called by a DMI function.
694 */ 532 */
695 533
696 if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) { 534 if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
697 ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec; 535 inode->i_ctime = iattr->ia_ctime;
698 ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec; 536 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
537 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
699 ip->i_update_core = 1; 538 ip->i_update_core = 1;
700 timeflags &= ~XFS_ICHGTIME_CHG; 539 timeflags &= ~XFS_ICHGTIME_CHG;
701 } 540 }
@@ -704,7 +543,7 @@ xfs_setattr(
704 * Send out timestamp changes that need to be set to the 543 * Send out timestamp changes that need to be set to the
705 * current time. Not done when called by a DMI function. 544 * current time. Not done when called by a DMI function.
706 */ 545 */
707 if (timeflags && !(flags & ATTR_DMI)) 546 if (timeflags && !(flags & XFS_ATTR_DMI))
708 xfs_ichgtime(ip, timeflags); 547 xfs_ichgtime(ip, timeflags);
709 548
710 XFS_STATS_INC(xs_ig_attrchg); 549 XFS_STATS_INC(xs_ig_attrchg);
@@ -742,7 +581,7 @@ xfs_setattr(
742 } 581 }
743 582
744 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && 583 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
745 !(flags & ATTR_DMI)) { 584 !(flags & XFS_ATTR_DMI)) {
746 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, 585 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
747 NULL, DM_RIGHT_NULL, NULL, NULL, 586 NULL, DM_RIGHT_NULL, NULL, NULL,
748 0, 0, AT_DELAY_FLAG(flags)); 587 0, 0, AT_DELAY_FLAG(flags));
@@ -1601,12 +1440,18 @@ xfs_inactive(
1601 return VN_INACTIVE_CACHE; 1440 return VN_INACTIVE_CACHE;
1602} 1441}
1603 1442
1604 1443/*
1444 * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
1445 * is allowed, otherwise it has to be an exact match. If a CI match is found,
1446 * ci_name->name will point to a the actual name (caller must free) or
1447 * will be set to NULL if an exact match is found.
1448 */
1605int 1449int
1606xfs_lookup( 1450xfs_lookup(
1607 xfs_inode_t *dp, 1451 xfs_inode_t *dp,
1608 struct xfs_name *name, 1452 struct xfs_name *name,
1609 xfs_inode_t **ipp) 1453 xfs_inode_t **ipp,
1454 struct xfs_name *ci_name)
1610{ 1455{
1611 xfs_ino_t inum; 1456 xfs_ino_t inum;
1612 int error; 1457 int error;
@@ -1618,7 +1463,7 @@ xfs_lookup(
1618 return XFS_ERROR(EIO); 1463 return XFS_ERROR(EIO);
1619 1464
1620 lock_mode = xfs_ilock_map_shared(dp); 1465 lock_mode = xfs_ilock_map_shared(dp);
1621 error = xfs_dir_lookup(NULL, dp, name, &inum); 1466 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
1622 xfs_iunlock_map_shared(dp, lock_mode); 1467 xfs_iunlock_map_shared(dp, lock_mode);
1623 1468
1624 if (error) 1469 if (error)
@@ -1626,12 +1471,15 @@ xfs_lookup(
1626 1471
1627 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0); 1472 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
1628 if (error) 1473 if (error)
1629 goto out; 1474 goto out_free_name;
1630 1475
1631 xfs_itrace_ref(*ipp); 1476 xfs_itrace_ref(*ipp);
1632 return 0; 1477 return 0;
1633 1478
1634 out: 1479out_free_name:
1480 if (ci_name)
1481 kmem_free(ci_name->name);
1482out:
1635 *ipp = NULL; 1483 *ipp = NULL;
1636 return error; 1484 return error;
1637} 1485}
@@ -2098,13 +1946,6 @@ again:
2098#endif 1946#endif
2099} 1947}
2100 1948
2101#ifdef DEBUG
2102#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);}
2103int remove_which_error_return = 0;
2104#else /* ! DEBUG */
2105#define REMOVE_DEBUG_TRACE(x)
2106#endif /* ! DEBUG */
2107
2108int 1949int
2109xfs_remove( 1950xfs_remove(
2110 xfs_inode_t *dp, 1951 xfs_inode_t *dp,
@@ -2113,6 +1954,7 @@ xfs_remove(
2113{ 1954{
2114 xfs_mount_t *mp = dp->i_mount; 1955 xfs_mount_t *mp = dp->i_mount;
2115 xfs_trans_t *tp = NULL; 1956 xfs_trans_t *tp = NULL;
1957 int is_dir = S_ISDIR(ip->i_d.di_mode);
2116 int error = 0; 1958 int error = 0;
2117 xfs_bmap_free_t free_list; 1959 xfs_bmap_free_t free_list;
2118 xfs_fsblock_t first_block; 1960 xfs_fsblock_t first_block;
@@ -2120,8 +1962,10 @@ xfs_remove(
2120 int committed; 1962 int committed;
2121 int link_zero; 1963 int link_zero;
2122 uint resblks; 1964 uint resblks;
1965 uint log_count;
2123 1966
2124 xfs_itrace_entry(dp); 1967 xfs_itrace_entry(dp);
1968 xfs_itrace_entry(ip);
2125 1969
2126 if (XFS_FORCED_SHUTDOWN(mp)) 1970 if (XFS_FORCED_SHUTDOWN(mp))
2127 return XFS_ERROR(EIO); 1971 return XFS_ERROR(EIO);
@@ -2134,19 +1978,23 @@ xfs_remove(
2134 return error; 1978 return error;
2135 } 1979 }
2136 1980
2137 xfs_itrace_entry(ip);
2138 xfs_itrace_ref(ip);
2139
2140 error = XFS_QM_DQATTACH(mp, dp, 0); 1981 error = XFS_QM_DQATTACH(mp, dp, 0);
2141 if (!error) 1982 if (error)
2142 error = XFS_QM_DQATTACH(mp, ip, 0); 1983 goto std_return;
2143 if (error) { 1984
2144 REMOVE_DEBUG_TRACE(__LINE__); 1985 error = XFS_QM_DQATTACH(mp, ip, 0);
1986 if (error)
2145 goto std_return; 1987 goto std_return;
2146 }
2147 1988
2148 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 1989 if (is_dir) {
1990 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
1991 log_count = XFS_DEFAULT_LOG_COUNT;
1992 } else {
1993 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
1994 log_count = XFS_REMOVE_LOG_COUNT;
1995 }
2149 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1996 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1997
2150 /* 1998 /*
2151 * We try to get the real space reservation first, 1999 * We try to get the real space reservation first,
2152 * allowing for directory btree deletion(s) implying 2000 * allowing for directory btree deletion(s) implying
@@ -2158,25 +2006,21 @@ xfs_remove(
2158 */ 2006 */
2159 resblks = XFS_REMOVE_SPACE_RES(mp); 2007 resblks = XFS_REMOVE_SPACE_RES(mp);
2160 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 2008 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
2161 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2009 XFS_TRANS_PERM_LOG_RES, log_count);
2162 if (error == ENOSPC) { 2010 if (error == ENOSPC) {
2163 resblks = 0; 2011 resblks = 0;
2164 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 2012 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
2165 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2013 XFS_TRANS_PERM_LOG_RES, log_count);
2166 } 2014 }
2167 if (error) { 2015 if (error) {
2168 ASSERT(error != ENOSPC); 2016 ASSERT(error != ENOSPC);
2169 REMOVE_DEBUG_TRACE(__LINE__); 2017 cancel_flags = 0;
2170 xfs_trans_cancel(tp, 0); 2018 goto out_trans_cancel;
2171 return error;
2172 } 2019 }
2173 2020
2174 error = xfs_lock_dir_and_entry(dp, ip); 2021 error = xfs_lock_dir_and_entry(dp, ip);
2175 if (error) { 2022 if (error)
2176 REMOVE_DEBUG_TRACE(__LINE__); 2023 goto out_trans_cancel;
2177 xfs_trans_cancel(tp, cancel_flags);
2178 goto std_return;
2179 }
2180 2024
2181 /* 2025 /*
2182 * At this point, we've gotten both the directory and the entry 2026 * At this point, we've gotten both the directory and the entry
@@ -2189,6 +2033,21 @@ xfs_remove(
2189 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2033 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2190 2034
2191 /* 2035 /*
2036 * If we're removing a directory perform some additional validation.
2037 */
2038 if (is_dir) {
2039 ASSERT(ip->i_d.di_nlink >= 2);
2040 if (ip->i_d.di_nlink != 2) {
2041 error = XFS_ERROR(ENOTEMPTY);
2042 goto out_trans_cancel;
2043 }
2044 if (!xfs_dir_isempty(ip)) {
2045 error = XFS_ERROR(ENOTEMPTY);
2046 goto out_trans_cancel;
2047 }
2048 }
2049
2050 /*
2192 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2051 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
2193 */ 2052 */
2194 XFS_BMAP_INIT(&free_list, &first_block); 2053 XFS_BMAP_INIT(&free_list, &first_block);
@@ -2196,39 +2055,64 @@ xfs_remove(
2196 &first_block, &free_list, resblks); 2055 &first_block, &free_list, resblks);
2197 if (error) { 2056 if (error) {
2198 ASSERT(error != ENOENT); 2057 ASSERT(error != ENOENT);
2199 REMOVE_DEBUG_TRACE(__LINE__); 2058 goto out_bmap_cancel;
2200 goto error1;
2201 } 2059 }
2202 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2060 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2203 2061
2062 /*
2063 * Bump the in memory generation count on the parent
2064 * directory so that other can know that it has changed.
2065 */
2204 dp->i_gen++; 2066 dp->i_gen++;
2205 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2067 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2206 2068
2207 error = xfs_droplink(tp, ip); 2069 if (is_dir) {
2208 if (error) { 2070 /*
2209 REMOVE_DEBUG_TRACE(__LINE__); 2071 * Drop the link from ip's "..".
2210 goto error1; 2072 */
2073 error = xfs_droplink(tp, dp);
2074 if (error)
2075 goto out_bmap_cancel;
2076
2077 /*
2078 * Drop the link from dp to ip.
2079 */
2080 error = xfs_droplink(tp, ip);
2081 if (error)
2082 goto out_bmap_cancel;
2083 } else {
2084 /*
2085 * When removing a non-directory we need to log the parent
2086 * inode here for the i_gen update. For a directory this is
2087 * done implicitly by the xfs_droplink call for the ".." entry.
2088 */
2089 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2211 } 2090 }
2212 2091
2213 /* Determine if this is the last link while 2092 /*
2093 * Drop the "." link from ip to self.
2094 */
2095 error = xfs_droplink(tp, ip);
2096 if (error)
2097 goto out_bmap_cancel;
2098
2099 /*
2100 * Determine if this is the last link while
2214 * we are in the transaction. 2101 * we are in the transaction.
2215 */ 2102 */
2216 link_zero = (ip)->i_d.di_nlink==0; 2103 link_zero = (ip->i_d.di_nlink == 0);
2217 2104
2218 /* 2105 /*
2219 * If this is a synchronous mount, make sure that the 2106 * If this is a synchronous mount, make sure that the
2220 * remove transaction goes to disk before returning to 2107 * remove transaction goes to disk before returning to
2221 * the user. 2108 * the user.
2222 */ 2109 */
2223 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2110 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2224 xfs_trans_set_sync(tp); 2111 xfs_trans_set_sync(tp);
2225 }
2226 2112
2227 error = xfs_bmap_finish(&tp, &free_list, &committed); 2113 error = xfs_bmap_finish(&tp, &free_list, &committed);
2228 if (error) { 2114 if (error)
2229 REMOVE_DEBUG_TRACE(__LINE__); 2115 goto out_bmap_cancel;
2230 goto error_rele;
2231 }
2232 2116
2233 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2117 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2234 if (error) 2118 if (error)
@@ -2240,38 +2124,26 @@ xfs_remove(
2240 * will get killed on last close in xfs_close() so we don't 2124 * will get killed on last close in xfs_close() so we don't
2241 * have to worry about that. 2125 * have to worry about that.
2242 */ 2126 */
2243 if (link_zero && xfs_inode_is_filestream(ip)) 2127 if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
2244 xfs_filestream_deassociate(ip); 2128 xfs_filestream_deassociate(ip);
2245 2129
2246 xfs_itrace_exit(ip); 2130 xfs_itrace_exit(ip);
2131 xfs_itrace_exit(dp);
2247 2132
2248/* Fall through to std_return with error = 0 */
2249 std_return: 2133 std_return:
2250 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 2134 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
2251 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2135 XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
2252 dp, DM_RIGHT_NULL, 2136 NULL, DM_RIGHT_NULL, name->name, NULL,
2253 NULL, DM_RIGHT_NULL, 2137 ip->i_d.di_mode, error, 0);
2254 name->name, NULL, ip->i_d.di_mode, error, 0);
2255 } 2138 }
2256 return error;
2257 2139
2258 error1: 2140 return error;
2259 xfs_bmap_cancel(&free_list);
2260 cancel_flags |= XFS_TRANS_ABORT;
2261 xfs_trans_cancel(tp, cancel_flags);
2262 goto std_return;
2263 2141
2264 error_rele: 2142 out_bmap_cancel:
2265 /*
2266 * In this case make sure to not release the inode until after
2267 * the current transaction is aborted. Releasing it beforehand
2268 * can cause us to go to xfs_inactive and start a recursive
2269 * transaction which can easily deadlock with the current one.
2270 */
2271 xfs_bmap_cancel(&free_list); 2143 xfs_bmap_cancel(&free_list);
2272 cancel_flags |= XFS_TRANS_ABORT; 2144 cancel_flags |= XFS_TRANS_ABORT;
2145 out_trans_cancel:
2273 xfs_trans_cancel(tp, cancel_flags); 2146 xfs_trans_cancel(tp, cancel_flags);
2274
2275 goto std_return; 2147 goto std_return;
2276} 2148}
2277 2149
@@ -2638,186 +2510,6 @@ std_return:
2638} 2510}
2639 2511
2640int 2512int
2641xfs_rmdir(
2642 xfs_inode_t *dp,
2643 struct xfs_name *name,
2644 xfs_inode_t *cdp)
2645{
2646 xfs_mount_t *mp = dp->i_mount;
2647 xfs_trans_t *tp;
2648 int error;
2649 xfs_bmap_free_t free_list;
2650 xfs_fsblock_t first_block;
2651 int cancel_flags;
2652 int committed;
2653 int last_cdp_link;
2654 uint resblks;
2655
2656 xfs_itrace_entry(dp);
2657
2658 if (XFS_FORCED_SHUTDOWN(mp))
2659 return XFS_ERROR(EIO);
2660
2661 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
2662 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
2663 dp, DM_RIGHT_NULL,
2664 NULL, DM_RIGHT_NULL, name->name,
2665 NULL, cdp->i_d.di_mode, 0, 0);
2666 if (error)
2667 return XFS_ERROR(error);
2668 }
2669
2670 /*
2671 * Get the dquots for the inodes.
2672 */
2673 error = XFS_QM_DQATTACH(mp, dp, 0);
2674 if (!error)
2675 error = XFS_QM_DQATTACH(mp, cdp, 0);
2676 if (error) {
2677 REMOVE_DEBUG_TRACE(__LINE__);
2678 goto std_return;
2679 }
2680
2681 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
2682 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2683 /*
2684 * We try to get the real space reservation first,
2685 * allowing for directory btree deletion(s) implying
2686 * possible bmap insert(s). If we can't get the space
2687 * reservation then we use 0 instead, and avoid the bmap
2688 * btree insert(s) in the directory code by, if the bmap
2689 * insert tries to happen, instead trimming the LAST
2690 * block from the directory.
2691 */
2692 resblks = XFS_REMOVE_SPACE_RES(mp);
2693 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
2694 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
2695 if (error == ENOSPC) {
2696 resblks = 0;
2697 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
2698 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
2699 }
2700 if (error) {
2701 ASSERT(error != ENOSPC);
2702 cancel_flags = 0;
2703 goto error_return;
2704 }
2705 XFS_BMAP_INIT(&free_list, &first_block);
2706
2707 /*
2708 * Now lock the child directory inode and the parent directory
2709 * inode in the proper order. This will take care of validating
2710 * that the directory entry for the child directory inode has
2711 * not changed while we were obtaining a log reservation.
2712 */
2713 error = xfs_lock_dir_and_entry(dp, cdp);
2714 if (error) {
2715 xfs_trans_cancel(tp, cancel_flags);
2716 goto std_return;
2717 }
2718
2719 IHOLD(dp);
2720 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2721
2722 IHOLD(cdp);
2723 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
2724
2725 ASSERT(cdp->i_d.di_nlink >= 2);
2726 if (cdp->i_d.di_nlink != 2) {
2727 error = XFS_ERROR(ENOTEMPTY);
2728 goto error_return;
2729 }
2730 if (!xfs_dir_isempty(cdp)) {
2731 error = XFS_ERROR(ENOTEMPTY);
2732 goto error_return;
2733 }
2734
2735 error = xfs_dir_removename(tp, dp, name, cdp->i_ino,
2736 &first_block, &free_list, resblks);
2737 if (error)
2738 goto error1;
2739
2740 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2741
2742 /*
2743 * Bump the in memory generation count on the parent
2744 * directory so that other can know that it has changed.
2745 */
2746 dp->i_gen++;
2747
2748 /*
2749 * Drop the link from cdp's "..".
2750 */
2751 error = xfs_droplink(tp, dp);
2752 if (error) {
2753 goto error1;
2754 }
2755
2756 /*
2757 * Drop the link from dp to cdp.
2758 */
2759 error = xfs_droplink(tp, cdp);
2760 if (error) {
2761 goto error1;
2762 }
2763
2764 /*
2765 * Drop the "." link from cdp to self.
2766 */
2767 error = xfs_droplink(tp, cdp);
2768 if (error) {
2769 goto error1;
2770 }
2771
2772 /* Determine these before committing transaction */
2773 last_cdp_link = (cdp)->i_d.di_nlink==0;
2774
2775 /*
2776 * If this is a synchronous mount, make sure that the
2777 * rmdir transaction goes to disk before returning to
2778 * the user.
2779 */
2780 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
2781 xfs_trans_set_sync(tp);
2782 }
2783
2784 error = xfs_bmap_finish (&tp, &free_list, &committed);
2785 if (error) {
2786 xfs_bmap_cancel(&free_list);
2787 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
2788 XFS_TRANS_ABORT));
2789 goto std_return;
2790 }
2791
2792 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2793 if (error) {
2794 goto std_return;
2795 }
2796
2797
2798 /* Fall through to std_return with error = 0 or the errno
2799 * from xfs_trans_commit. */
2800 std_return:
2801 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
2802 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
2803 dp, DM_RIGHT_NULL,
2804 NULL, DM_RIGHT_NULL,
2805 name->name, NULL, cdp->i_d.di_mode,
2806 error, 0);
2807 }
2808 return error;
2809
2810 error1:
2811 xfs_bmap_cancel(&free_list);
2812 cancel_flags |= XFS_TRANS_ABORT;
2813 /* FALLTHROUGH */
2814
2815 error_return:
2816 xfs_trans_cancel(tp, cancel_flags);
2817 goto std_return;
2818}
2819
2820int
2821xfs_symlink( 2513xfs_symlink(
2822 xfs_inode_t *dp, 2514 xfs_inode_t *dp,
2823 struct xfs_name *link_name, 2515 struct xfs_name *link_name,
@@ -3242,7 +2934,6 @@ xfs_finish_reclaim(
3242{ 2934{
3243 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); 2935 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
3244 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 2936 bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
3245 int error;
3246 2937
3247 if (vp && VN_BAD(vp)) 2938 if (vp && VN_BAD(vp))
3248 goto reclaim; 2939 goto reclaim;
@@ -3285,29 +2976,16 @@ xfs_finish_reclaim(
3285 xfs_iflock(ip); 2976 xfs_iflock(ip);
3286 } 2977 }
3287 2978
3288 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 2979 /*
3289 if (ip->i_update_core || 2980 * In the case of a forced shutdown we rely on xfs_iflush() to
3290 ((ip->i_itemp != NULL) && 2981 * wait for the inode to be unpinned before returning an error.
3291 (ip->i_itemp->ili_format.ilf_fields != 0))) { 2982 */
3292 error = xfs_iflush(ip, sync_mode); 2983 if (xfs_iflush(ip, sync_mode) == 0) {
3293 /* 2984 /* synchronize with xfs_iflush_done */
3294 * If we hit an error, typically because of filesystem 2985 xfs_iflock(ip);
3295 * shutdown, we don't need to let vn_reclaim to know 2986 xfs_ifunlock(ip);
3296 * because we're gonna reclaim the inode anyway.
3297 */
3298 if (error) {
3299 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3300 goto reclaim;
3301 }
3302 xfs_iflock(ip); /* synchronize with xfs_iflush_done */
3303 }
3304
3305 ASSERT(ip->i_update_core == 0);
3306 ASSERT(ip->i_itemp == NULL ||
3307 ip->i_itemp->ili_format.ilf_fields == 0);
3308 } 2987 }
3309 2988
3310 xfs_ifunlock(ip);
3311 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2989 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3312 2990
3313 reclaim: 2991 reclaim:
@@ -3418,7 +3096,7 @@ xfs_alloc_file_space(
3418 3096
3419 /* Generate a DMAPI event if needed. */ 3097 /* Generate a DMAPI event if needed. */
3420 if (alloc_type != 0 && offset < ip->i_size && 3098 if (alloc_type != 0 && offset < ip->i_size &&
3421 (attr_flags&ATTR_DMI) == 0 && 3099 (attr_flags & XFS_ATTR_DMI) == 0 &&
3422 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 3100 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
3423 xfs_off_t end_dmi_offset; 3101 xfs_off_t end_dmi_offset;
3424 3102
@@ -3532,7 +3210,7 @@ retry:
3532 allocatesize_fsb -= allocated_fsb; 3210 allocatesize_fsb -= allocated_fsb;
3533 } 3211 }
3534dmapi_enospc_check: 3212dmapi_enospc_check:
3535 if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && 3213 if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 &&
3536 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { 3214 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
3537 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, 3215 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
3538 ip, DM_RIGHT_NULL, 3216 ip, DM_RIGHT_NULL,
@@ -3679,7 +3357,7 @@ xfs_free_file_space(
3679 end_dmi_offset = offset + len; 3357 end_dmi_offset = offset + len;
3680 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); 3358 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
3681 3359
3682 if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 && 3360 if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 &&
3683 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 3361 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
3684 if (end_dmi_offset > ip->i_size) 3362 if (end_dmi_offset > ip->i_size)
3685 end_dmi_offset = ip->i_size; 3363 end_dmi_offset = ip->i_size;
@@ -3690,7 +3368,7 @@ xfs_free_file_space(
3690 return error; 3368 return error;
3691 } 3369 }
3692 3370
3693 if (attr_flags & ATTR_NOLOCK) 3371 if (attr_flags & XFS_ATTR_NOLOCK)
3694 need_iolock = 0; 3372 need_iolock = 0;
3695 if (need_iolock) { 3373 if (need_iolock) {
3696 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3374 xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -3867,7 +3545,7 @@ xfs_change_file_space(
3867 xfs_off_t startoffset; 3545 xfs_off_t startoffset;
3868 xfs_off_t llen; 3546 xfs_off_t llen;
3869 xfs_trans_t *tp; 3547 xfs_trans_t *tp;
3870 bhv_vattr_t va; 3548 struct iattr iattr;
3871 3549
3872 xfs_itrace_entry(ip); 3550 xfs_itrace_entry(ip);
3873 3551
@@ -3941,10 +3619,10 @@ xfs_change_file_space(
3941 break; 3619 break;
3942 } 3620 }
3943 3621
3944 va.va_mask = XFS_AT_SIZE; 3622 iattr.ia_valid = ATTR_SIZE;
3945 va.va_size = startoffset; 3623 iattr.ia_size = startoffset;
3946 3624
3947 error = xfs_setattr(ip, &va, attr_flags, credp); 3625 error = xfs_setattr(ip, &iattr, attr_flags, credp);
3948 3626
3949 if (error) 3627 if (error)
3950 return error; 3628 return error;
@@ -3974,7 +3652,7 @@ xfs_change_file_space(
3974 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3652 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
3975 xfs_trans_ihold(tp, ip); 3653 xfs_trans_ihold(tp, ip);
3976 3654
3977 if ((attr_flags & ATTR_DMI) == 0) { 3655 if ((attr_flags & XFS_ATTR_DMI) == 0) {
3978 ip->i_d.di_mode &= ~S_ISUID; 3656 ip->i_d.di_mode &= ~S_ISUID;
3979 3657
3980 /* 3658 /*
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 57335ba4ce53..e932a96bec54 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -2,9 +2,9 @@
2#define _XFS_VNODEOPS_H 1 2#define _XFS_VNODEOPS_H 1
3 3
4struct attrlist_cursor_kern; 4struct attrlist_cursor_kern;
5struct bhv_vattr;
6struct cred; 5struct cred;
7struct file; 6struct file;
7struct iattr;
8struct inode; 8struct inode;
9struct iovec; 9struct iovec;
10struct kiocb; 10struct kiocb;
@@ -15,14 +15,18 @@ struct xfs_iomap;
15 15
16 16
17int xfs_open(struct xfs_inode *ip); 17int xfs_open(struct xfs_inode *ip);
18int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, 18int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags,
19 struct cred *credp); 19 struct cred *credp);
20#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
21#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
22#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
23
20int xfs_readlink(struct xfs_inode *ip, char *link); 24int xfs_readlink(struct xfs_inode *ip, char *link);
21int xfs_fsync(struct xfs_inode *ip); 25int xfs_fsync(struct xfs_inode *ip);
22int xfs_release(struct xfs_inode *ip); 26int xfs_release(struct xfs_inode *ip);
23int xfs_inactive(struct xfs_inode *ip); 27int xfs_inactive(struct xfs_inode *ip);
24int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 28int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
25 struct xfs_inode **ipp); 29 struct xfs_inode **ipp, struct xfs_name *ci_name);
26int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, 30int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
27 xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); 31 xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
28int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 32int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
@@ -31,8 +35,6 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
31 struct xfs_name *target_name); 35 struct xfs_name *target_name);
32int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, 36int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
33 mode_t mode, struct xfs_inode **ipp, struct cred *credp); 37 mode_t mode, struct xfs_inode **ipp, struct cred *credp);
34int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name,
35 struct xfs_inode *cdp);
36int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, 38int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
37 xfs_off_t *offset, filldir_t filldir); 39 xfs_off_t *offset, filldir_t filldir);
38int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, 40int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,