aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile6
-rw-r--r--fs/binfmt_elf.c14
-rw-r--r--fs/bio-integrity.c26
-rw-r--r--fs/bio.c5
-rw-r--r--fs/buffer.c5
-rw-r--r--fs/cifs/CHANGES17
-rw-r--r--fs/cifs/cifsencrypt.c18
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h6
-rw-r--r--fs/cifs/cifsproto.h8
-rw-r--r--fs/cifs/cifssmb.c7
-rw-r--r--fs/cifs/connect.c75
-rw-r--r--fs/cifs/dir.c337
-rw-r--r--fs/cifs/inode.c109
-rw-r--r--fs/cifs/md5.c38
-rw-r--r--fs/cifs/md5.h6
-rw-r--r--fs/cifs/readdir.c58
-rw-r--r--fs/cifs/sess.c91
-rw-r--r--fs/cifs/transport.c127
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c14
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/ecryptfs/crypto.c4
-rw-r--r--fs/eventpoll.c22
-rw-r--r--fs/exec.c28
-rw-r--r--fs/ext2/super.c9
-rw-r--r--fs/ext3/namei.c20
-rw-r--r--fs/ext3/super.c11
-rw-r--r--fs/ext4/balloc.c10
-rw-r--r--fs/ext4/ext4.h9
-rw-r--r--fs/ext4/extents.c2
-rw-r--r--fs/ext4/ialloc.c15
-rw-r--r--fs/ext4/inode.c47
-rw-r--r--fs/ext4/mballoc.c34
-rw-r--r--fs/ext4/migrate.c8
-rw-r--r--fs/ext4/namei.c21
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c12
-rw-r--r--fs/hugetlbfs/inode.c8
-rw-r--r--fs/internal.h2
-rw-r--r--fs/jbd/journal.c17
-rw-r--r--fs/jbd2/journal.c23
-rw-r--r--fs/jbd2/transaction.c42
-rw-r--r--fs/jffs2/background.c18
-rw-r--r--fs/jffs2/readinode.c42
-rw-r--r--fs/lockd/svclock.c6
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/notify/inotify/inotify.c2
-rw-r--r--fs/ocfs2/alloc.c30
-rw-r--r--fs/ocfs2/dcache.c42
-rw-r--r--fs/ocfs2/dcache.h9
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c12
-rw-r--r--fs/ocfs2/dlm/dlmthread.c3
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c4
-rw-r--r--fs/ocfs2/dlmglue.c15
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/ocfs2.h9
-rw-r--r--fs/ocfs2/quota_global.c4
-rw-r--r--fs/ocfs2/super.c11
-rw-r--r--fs/ocfs2/xattr.c44
-rw-r--r--fs/proc/inode.c4
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/seq_file.c151
-rw-r--r--fs/squashfs/block.c13
-rw-r--r--fs/squashfs/cache.c4
-rw-r--r--fs/squashfs/inode.c6
-rw-r--r--fs/squashfs/squashfs.h2
-rw-r--r--fs/squashfs/super.c2
-rw-r--r--fs/super.c21
-rw-r--r--fs/timerfd.c12
-rw-r--r--fs/ubifs/budget.c35
-rw-r--r--fs/ubifs/debug.c122
-rw-r--r--fs/ubifs/debug.h36
-rw-r--r--fs/ubifs/dir.c96
-rw-r--r--fs/ubifs/file.c9
-rw-r--r--fs/ubifs/gc.c28
-rw-r--r--fs/ubifs/io.c22
-rw-r--r--fs/ubifs/journal.c2
-rw-r--r--fs/ubifs/lprops.c12
-rw-r--r--fs/ubifs/lpt_commit.c44
-rw-r--r--fs/ubifs/master.c2
-rw-r--r--fs/ubifs/orphan.c38
-rw-r--r--fs/ubifs/super.c195
-rw-r--r--fs/ubifs/tnc.c12
-rw-r--r--fs/ubifs/ubifs.h26
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c79
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c6
-rw-r--r--fs/xfs/xfs_dfrag.c10
-rw-r--r--fs/xfs/xfs_log_recover.c31
89 files changed, 1680 insertions, 925 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 38bc735c67ad..dc20db348679 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -69,10 +69,12 @@ obj-$(CONFIG_DLM) += dlm/
69# Do not add any filesystems before this line 69# Do not add any filesystems before this line
70obj-$(CONFIG_REISERFS_FS) += reiserfs/ 70obj-$(CONFIG_REISERFS_FS) += reiserfs/
71obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 71obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
72obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4 72obj-$(CONFIG_EXT2_FS) += ext2/
73# We place ext4 after ext2 so plain ext2 root fs's are mounted using ext2
74# unless explicitly requested by rootfstype
75obj-$(CONFIG_EXT4_FS) += ext4/
73obj-$(CONFIG_JBD) += jbd/ 76obj-$(CONFIG_JBD) += jbd/
74obj-$(CONFIG_JBD2) += jbd2/ 77obj-$(CONFIG_JBD2) += jbd2/
75obj-$(CONFIG_EXT2_FS) += ext2/
76obj-$(CONFIG_CRAMFS) += cramfs/ 78obj-$(CONFIG_CRAMFS) += cramfs/
77obj-$(CONFIG_SQUASHFS) += squashfs/ 79obj-$(CONFIG_SQUASHFS) += squashfs/
78obj-y += ramfs/ 80obj-y += ramfs/
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e3ff2b9e602f..33b7235f853b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1208,9 +1208,11 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
1208 * check for an ELF header. If we find one, dump the first page to 1208 * check for an ELF header. If we find one, dump the first page to
1209 * aid in determining what was mapped here. 1209 * aid in determining what was mapped here.
1210 */ 1210 */
1211 if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) { 1211 if (FILTER(ELF_HEADERS) &&
1212 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1212 u32 __user *header = (u32 __user *) vma->vm_start; 1213 u32 __user *header = (u32 __user *) vma->vm_start;
1213 u32 word; 1214 u32 word;
1215 mm_segment_t fs = get_fs();
1214 /* 1216 /*
1215 * Doing it this way gets the constant folded by GCC. 1217 * Doing it this way gets the constant folded by GCC.
1216 */ 1218 */
@@ -1223,7 +1225,15 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
1223 magic.elfmag[EI_MAG1] = ELFMAG1; 1225 magic.elfmag[EI_MAG1] = ELFMAG1;
1224 magic.elfmag[EI_MAG2] = ELFMAG2; 1226 magic.elfmag[EI_MAG2] = ELFMAG2;
1225 magic.elfmag[EI_MAG3] = ELFMAG3; 1227 magic.elfmag[EI_MAG3] = ELFMAG3;
1226 if (get_user(word, header) == 0 && word == magic.cmp) 1228 /*
1229 * Switch to the user "segment" for get_user(),
1230 * then put back what elf_core_dump() had in place.
1231 */
1232 set_fs(USER_DS);
1233 if (unlikely(get_user(word, header)))
1234 word = 0;
1235 set_fs(fs);
1236 if (word == magic.cmp)
1227 return PAGE_SIZE; 1237 return PAGE_SIZE;
1228 } 1238 }
1229 1239
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 77ebc3c263d6..549b0144da11 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -140,7 +140,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
140 140
141 iv = bip_vec_idx(bip, bip->bip_vcnt); 141 iv = bip_vec_idx(bip, bip->bip_vcnt);
142 BUG_ON(iv == NULL); 142 BUG_ON(iv == NULL);
143 BUG_ON(iv->bv_page != NULL);
144 143
145 iv->bv_page = page; 144 iv->bv_page = page;
146 iv->bv_len = len; 145 iv->bv_len = len;
@@ -465,7 +464,7 @@ static int bio_integrity_verify(struct bio *bio)
465 464
466 if (ret) { 465 if (ret) {
467 kunmap_atomic(kaddr, KM_USER0); 466 kunmap_atomic(kaddr, KM_USER0);
468 break; 467 return ret;
469 } 468 }
470 469
471 sectors = bv->bv_len / bi->sector_size; 470 sectors = bv->bv_len / bi->sector_size;
@@ -493,18 +492,13 @@ static void bio_integrity_verify_fn(struct work_struct *work)
493 struct bio_integrity_payload *bip = 492 struct bio_integrity_payload *bip =
494 container_of(work, struct bio_integrity_payload, bip_work); 493 container_of(work, struct bio_integrity_payload, bip_work);
495 struct bio *bio = bip->bip_bio; 494 struct bio *bio = bip->bip_bio;
496 int error = bip->bip_error; 495 int error;
497 496
498 if (bio_integrity_verify(bio)) { 497 error = bio_integrity_verify(bio);
499 clear_bit(BIO_UPTODATE, &bio->bi_flags);
500 error = -EIO;
501 }
502 498
503 /* Restore original bio completion handler */ 499 /* Restore original bio completion handler */
504 bio->bi_end_io = bip->bip_end_io; 500 bio->bi_end_io = bip->bip_end_io;
505 501 bio_endio(bio, error);
506 if (bio->bi_end_io)
507 bio->bi_end_io(bio, error);
508} 502}
509 503
510/** 504/**
@@ -525,7 +519,17 @@ void bio_integrity_endio(struct bio *bio, int error)
525 519
526 BUG_ON(bip->bip_bio != bio); 520 BUG_ON(bip->bip_bio != bio);
527 521
528 bip->bip_error = error; 522 /* In case of an I/O error there is no point in verifying the
523 * integrity metadata. Restore original bio end_io handler
524 * and run it.
525 */
526 if (error) {
527 bio->bi_end_io = bip->bip_end_io;
528 bio_endio(bio, error);
529
530 return;
531 }
532
529 INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); 533 INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
530 queue_work(kintegrityd_wq, &bip->bip_work); 534 queue_work(kintegrityd_wq, &bip->bip_work);
531} 535}
diff --git a/fs/bio.c b/fs/bio.c
index 062299acbccd..124b95c4d582 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -302,9 +302,10 @@ void bio_init(struct bio *bio)
302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
303{ 303{
304 struct bio *bio = NULL; 304 struct bio *bio = NULL;
305 void *uninitialized_var(p);
305 306
306 if (bs) { 307 if (bs) {
307 void *p = mempool_alloc(bs->bio_pool, gfp_mask); 308 p = mempool_alloc(bs->bio_pool, gfp_mask);
308 309
309 if (p) 310 if (p)
310 bio = p + bs->front_pad; 311 bio = p + bs->front_pad;
@@ -329,7 +330,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
329 } 330 }
330 if (unlikely(!bvl)) { 331 if (unlikely(!bvl)) {
331 if (bs) 332 if (bs)
332 mempool_free(bio, bs->bio_pool); 333 mempool_free(p, bs->bio_pool);
333 else 334 else
334 kfree(bio); 335 kfree(bio);
335 bio = NULL; 336 bio = NULL;
diff --git a/fs/buffer.c b/fs/buffer.c
index b58208f1640a..9f697419ed8e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -777,6 +777,7 @@ static int __set_page_dirty(struct page *page,
777 __inc_zone_page_state(page, NR_FILE_DIRTY); 777 __inc_zone_page_state(page, NR_FILE_DIRTY);
778 __inc_bdi_stat(mapping->backing_dev_info, 778 __inc_bdi_stat(mapping->backing_dev_info,
779 BDI_RECLAIMABLE); 779 BDI_RECLAIMABLE);
780 task_dirty_inc(current);
780 task_io_account_write(PAGE_CACHE_SIZE); 781 task_io_account_write(PAGE_CACHE_SIZE);
781 } 782 }
782 radix_tree_tag_set(&mapping->page_tree, 783 radix_tree_tag_set(&mapping->page_tree,
@@ -2688,7 +2689,7 @@ int nobh_write_end(struct file *file, struct address_space *mapping,
2688 struct buffer_head *bh; 2689 struct buffer_head *bh;
2689 BUG_ON(fsdata != NULL && page_has_buffers(page)); 2690 BUG_ON(fsdata != NULL && page_has_buffers(page));
2690 2691
2691 if (unlikely(copied < len) && !page_has_buffers(page)) 2692 if (unlikely(copied < len) && head)
2692 attach_nobh_buffers(page, head); 2693 attach_nobh_buffers(page, head);
2693 if (page_has_buffers(page)) 2694 if (page_has_buffers(page))
2694 return generic_write_end(file, mapping, pos, len, 2695 return generic_write_end(file, mapping, pos, len,
@@ -3108,7 +3109,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
3108 if (test_clear_buffer_dirty(bh)) { 3109 if (test_clear_buffer_dirty(bh)) {
3109 get_bh(bh); 3110 get_bh(bh);
3110 bh->b_end_io = end_buffer_write_sync; 3111 bh->b_end_io = end_buffer_write_sync;
3111 ret = submit_bh(WRITE_SYNC, bh); 3112 ret = submit_bh(WRITE, bh);
3112 wait_on_buffer(bh); 3113 wait_on_buffer(bh);
3113 if (buffer_eopnotsupp(bh)) { 3114 if (buffer_eopnotsupp(bh)) {
3114 clear_buffer_eopnotsupp(bh); 3115 clear_buffer_eopnotsupp(bh);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 080703a15f44..851388fafc73 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,13 @@
1Version 1.57
2------------
3Improve support for multiple security contexts to the same server. We
4used to use the same "vcnumber" for all connections which could cause
5the server to treat subsequent connections, especially those that
6are authenticated as guest, as reconnections, invalidating the earlier
7user's smb session. This fix allows cifs to mount multiple times to the
8same server with different userids without risking invalidating earlier
9established security contexts.
10
1Version 1.56 11Version 1.56
2------------ 12------------
3Add "forcemandatorylock" mount option to allow user to use mandatory 13Add "forcemandatorylock" mount option to allow user to use mandatory
@@ -5,7 +15,12 @@ rather than posix (advisory) byte range locks, even though server would
5support posix byte range locks. Fix query of root inode when prefixpath 15support posix byte range locks. Fix query of root inode when prefixpath
6specified and user does not have access to query information about the 16specified and user does not have access to query information about the
7top of the share. Fix problem in 2.6.28 resolving DFS paths to 17top of the share. Fix problem in 2.6.28 resolving DFS paths to
8Samba servers (worked to Windows). 18Samba servers (worked to Windows). Fix rmdir so that pending search
19(readdir) requests do not get invalid results which include the now
20removed directory. Fix oops in cifs_dfs_ref.c when prefixpath is not reachable
21when using DFS. Add better file create support to servers which support
22the CIFS POSIX protocol extensions (this adds support for new flags
23on create, and improves semantics for write of locked ranges).
9 24
10Version 1.55 25Version 1.55
11------------ 26------------
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index d4839cf0cb2c..7c9809523f42 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -48,11 +48,11 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
48 if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL)) 48 if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL))
49 return -EINVAL; 49 return -EINVAL;
50 50
51 MD5Init(&context); 51 cifs_MD5_init(&context);
52 MD5Update(&context, (char *)&key->data, key->len); 52 cifs_MD5_update(&context, (char *)&key->data, key->len);
53 MD5Update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length); 53 cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
54 54
55 MD5Final(signature, &context); 55 cifs_MD5_final(signature, &context);
56 return 0; 56 return 0;
57} 57}
58 58
@@ -96,8 +96,8 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
96 if ((iov == NULL) || (signature == NULL) || (key == NULL)) 96 if ((iov == NULL) || (signature == NULL) || (key == NULL))
97 return -EINVAL; 97 return -EINVAL;
98 98
99 MD5Init(&context); 99 cifs_MD5_init(&context);
100 MD5Update(&context, (char *)&key->data, key->len); 100 cifs_MD5_update(&context, (char *)&key->data, key->len);
101 for (i = 0; i < n_vec; i++) { 101 for (i = 0; i < n_vec; i++) {
102 if (iov[i].iov_len == 0) 102 if (iov[i].iov_len == 0)
103 continue; 103 continue;
@@ -110,13 +110,13 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
110 if (i == 0) { 110 if (i == 0) {
111 if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ 111 if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
112 break; /* nothing to sign or corrupt header */ 112 break; /* nothing to sign or corrupt header */
113 MD5Update(&context, iov[0].iov_base+4, 113 cifs_MD5_update(&context, iov[0].iov_base+4,
114 iov[0].iov_len-4); 114 iov[0].iov_len-4);
115 } else 115 } else
116 MD5Update(&context, iov[i].iov_base, iov[i].iov_len); 116 cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len);
117 } 117 }
118 118
119 MD5Final(signature, &context); 119 cifs_MD5_final(signature, &context);
120 120
121 return 0; 121 return 0;
122} 122}
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 7ac481841f87..2b1d28a9ee28 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
100extern const struct export_operations cifs_export_ops; 100extern const struct export_operations cifs_export_ops;
101#endif /* EXPERIMENTAL */ 101#endif /* EXPERIMENTAL */
102 102
103#define CIFS_VERSION "1.56" 103#define CIFS_VERSION "1.57"
104#endif /* _CIFSFS_H */ 104#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 94c1ca0ec953..e004f6db5fc8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -164,9 +164,12 @@ struct TCP_Server_Info {
164 /* multiplexed reads or writes */ 164 /* multiplexed reads or writes */
165 unsigned int maxBuf; /* maxBuf specifies the maximum */ 165 unsigned int maxBuf; /* maxBuf specifies the maximum */
166 /* message size the server can send or receive for non-raw SMBs */ 166 /* message size the server can send or receive for non-raw SMBs */
167 unsigned int maxRw; /* maxRw specifies the maximum */ 167 unsigned int max_rw; /* maxRw specifies the maximum */
168 /* message size the server can send or receive for */ 168 /* message size the server can send or receive for */
169 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ 169 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
170 unsigned int max_vcs; /* maximum number of smb sessions, at least
171 those that can be specified uniquely with
172 vcnumbers */
170 char sessid[4]; /* unique token id for this session */ 173 char sessid[4]; /* unique token id for this session */
171 /* (returned on Negotiate */ 174 /* (returned on Negotiate */
172 int capabilities; /* allow selective disabling of caps by smb sess */ 175 int capabilities; /* allow selective disabling of caps by smb sess */
@@ -210,6 +213,7 @@ struct cifsSesInfo {
210 unsigned overrideSecFlg; /* if non-zero override global sec flags */ 213 unsigned overrideSecFlg; /* if non-zero override global sec flags */
211 __u16 ipc_tid; /* special tid for connection to IPC share */ 214 __u16 ipc_tid; /* special tid for connection to IPC share */
212 __u16 flags; 215 __u16 flags;
216 __u16 vcnum;
213 char *serverOS; /* name of operating system underlying server */ 217 char *serverOS; /* name of operating system underlying server */
214 char *serverNOS; /* name of network operating system of server */ 218 char *serverNOS; /* name of network operating system of server */
215 char *serverDomain; /* security realm of server */ 219 char *serverDomain; /* security realm of server */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 06f6779988bf..083dfc57c7a3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -35,13 +35,14 @@ extern struct smb_hdr *cifs_buf_get(void);
35extern void cifs_buf_release(void *); 35extern void cifs_buf_release(void *);
36extern struct smb_hdr *cifs_small_buf_get(void); 36extern struct smb_hdr *cifs_small_buf_get(void);
37extern void cifs_small_buf_release(void *); 37extern void cifs_small_buf_release(void *);
38extern int smb_send(struct socket *, struct smb_hdr *, 38extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *,
39 unsigned int /* length */ , struct sockaddr *, bool); 39 unsigned int /* length */);
40extern unsigned int _GetXid(void); 40extern unsigned int _GetXid(void);
41extern void _FreeXid(unsigned int); 41extern void _FreeXid(unsigned int);
42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current_fsuid())); 42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current_fsuid()));
43#define FreeXid(curr_xid) {_FreeXid(curr_xid); cFYI(1,("CIFS VFS: leaving %s (xid = %d) rc = %d",__func__,curr_xid,(int)rc));} 43#define FreeXid(curr_xid) {_FreeXid(curr_xid); cFYI(1,("CIFS VFS: leaving %s (xid = %d) rc = %d",__func__,curr_xid,(int)rc));}
44extern char *build_path_from_dentry(struct dentry *); 44extern char *build_path_from_dentry(struct dentry *);
45extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb);
45extern char *build_wildcard_path_from_dentry(struct dentry *direntry); 46extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
46/* extern void renew_parental_timestamps(struct dentry *direntry);*/ 47/* extern void renew_parental_timestamps(struct dentry *direntry);*/
47extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, 48extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
@@ -91,6 +92,9 @@ extern u64 cifs_UnixTimeToNT(struct timespec);
91extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); 92extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time);
92extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); 93extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time);
93 94
95extern void posix_fill_in_inode(struct inode *tmp_inode,
96 FILE_UNIX_BASIC_INFO *pData, int isNewInode);
97extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum);
94extern int cifs_get_inode_info(struct inode **pinode, 98extern int cifs_get_inode_info(struct inode **pinode,
95 const unsigned char *search_path, 99 const unsigned char *search_path,
96 FILE_ALL_INFO *pfile_info, 100 FILE_ALL_INFO *pfile_info,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 552642a507c4..939e2f76b959 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -528,14 +528,15 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
528 server->maxReq = le16_to_cpu(rsp->MaxMpxCount); 528 server->maxReq = le16_to_cpu(rsp->MaxMpxCount);
529 server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize), 529 server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize),
530 (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); 530 (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
531 server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs);
531 GETU32(server->sessid) = le32_to_cpu(rsp->SessionKey); 532 GETU32(server->sessid) = le32_to_cpu(rsp->SessionKey);
532 /* even though we do not use raw we might as well set this 533 /* even though we do not use raw we might as well set this
533 accurately, in case we ever find a need for it */ 534 accurately, in case we ever find a need for it */
534 if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { 535 if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
535 server->maxRw = 0xFF00; 536 server->max_rw = 0xFF00;
536 server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE; 537 server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE;
537 } else { 538 } else {
538 server->maxRw = 0;/* we do not need to use raw anyway */ 539 server->max_rw = 0;/* do not need to use raw anyway */
539 server->capabilities = CAP_MPX_MODE; 540 server->capabilities = CAP_MPX_MODE;
540 } 541 }
541 tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone); 542 tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone);
@@ -638,7 +639,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
638 /* probably no need to store and check maxvcs */ 639 /* probably no need to store and check maxvcs */
639 server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize), 640 server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize),
640 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); 641 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
641 server->maxRw = le32_to_cpu(pSMBr->MaxRawSize); 642 server->max_rw = le32_to_cpu(pSMBr->MaxRawSize);
642 cFYI(DBG2, ("Max buf = %d", ses->server->maxBuf)); 643 cFYI(DBG2, ("Max buf = %d", ses->server->maxBuf));
643 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey); 644 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
644 server->capabilities = le32_to_cpu(pSMBr->Capabilities); 645 server->capabilities = le32_to_cpu(pSMBr->Capabilities);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e9ea394ee075..da0f4ffa0613 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -23,7 +23,6 @@
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/list.h> 24#include <linux/list.h>
25#include <linux/wait.h> 25#include <linux/wait.h>
26#include <linux/ipv6.h>
27#include <linux/pagemap.h> 26#include <linux/pagemap.h>
28#include <linux/ctype.h> 27#include <linux/ctype.h>
29#include <linux/utsname.h> 28#include <linux/utsname.h>
@@ -35,6 +34,7 @@
35#include <linux/freezer.h> 34#include <linux/freezer.h>
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
37#include <asm/processor.h> 36#include <asm/processor.h>
37#include <net/ipv6.h>
38#include "cifspdu.h" 38#include "cifspdu.h"
39#include "cifsglob.h" 39#include "cifsglob.h"
40#include "cifsproto.h" 40#include "cifsproto.h"
@@ -1354,7 +1354,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1354} 1354}
1355 1355
1356static struct TCP_Server_Info * 1356static struct TCP_Server_Info *
1357cifs_find_tcp_session(struct sockaddr *addr) 1357cifs_find_tcp_session(struct sockaddr_storage *addr)
1358{ 1358{
1359 struct list_head *tmp; 1359 struct list_head *tmp;
1360 struct TCP_Server_Info *server; 1360 struct TCP_Server_Info *server;
@@ -1374,13 +1374,13 @@ cifs_find_tcp_session(struct sockaddr *addr)
1374 if (server->tcpStatus == CifsNew) 1374 if (server->tcpStatus == CifsNew)
1375 continue; 1375 continue;
1376 1376
1377 if (addr->sa_family == AF_INET && 1377 if (addr->ss_family == AF_INET &&
1378 (addr4->sin_addr.s_addr != 1378 (addr4->sin_addr.s_addr !=
1379 server->addr.sockAddr.sin_addr.s_addr)) 1379 server->addr.sockAddr.sin_addr.s_addr))
1380 continue; 1380 continue;
1381 else if (addr->sa_family == AF_INET6 && 1381 else if (addr->ss_family == AF_INET6 &&
1382 memcmp(&server->addr.sockAddr6.sin6_addr, 1382 !ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr,
1383 &addr6->sin6_addr, sizeof(addr6->sin6_addr))) 1383 &addr6->sin6_addr))
1384 continue; 1384 continue;
1385 1385
1386 ++server->srv_count; 1386 ++server->srv_count;
@@ -1419,12 +1419,12 @@ static struct TCP_Server_Info *
1419cifs_get_tcp_session(struct smb_vol *volume_info) 1419cifs_get_tcp_session(struct smb_vol *volume_info)
1420{ 1420{
1421 struct TCP_Server_Info *tcp_ses = NULL; 1421 struct TCP_Server_Info *tcp_ses = NULL;
1422 struct sockaddr addr; 1422 struct sockaddr_storage addr;
1423 struct sockaddr_in *sin_server = (struct sockaddr_in *) &addr; 1423 struct sockaddr_in *sin_server = (struct sockaddr_in *) &addr;
1424 struct sockaddr_in6 *sin_server6 = (struct sockaddr_in6 *) &addr; 1424 struct sockaddr_in6 *sin_server6 = (struct sockaddr_in6 *) &addr;
1425 int rc; 1425 int rc;
1426 1426
1427 memset(&addr, 0, sizeof(struct sockaddr)); 1427 memset(&addr, 0, sizeof(struct sockaddr_storage));
1428 1428
1429 if (volume_info->UNCip && volume_info->UNC) { 1429 if (volume_info->UNCip && volume_info->UNC) {
1430 rc = cifs_inet_pton(AF_INET, volume_info->UNCip, 1430 rc = cifs_inet_pton(AF_INET, volume_info->UNCip,
@@ -1435,9 +1435,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1435 rc = cifs_inet_pton(AF_INET6, volume_info->UNCip, 1435 rc = cifs_inet_pton(AF_INET6, volume_info->UNCip,
1436 &sin_server6->sin6_addr.in6_u); 1436 &sin_server6->sin6_addr.in6_u);
1437 if (rc > 0) 1437 if (rc > 0)
1438 addr.sa_family = AF_INET6; 1438 addr.ss_family = AF_INET6;
1439 } else { 1439 } else {
1440 addr.sa_family = AF_INET; 1440 addr.ss_family = AF_INET;
1441 } 1441 }
1442 1442
1443 if (rc <= 0) { 1443 if (rc <= 0) {
@@ -1502,7 +1502,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1502 tcp_ses->tcpStatus = CifsNew; 1502 tcp_ses->tcpStatus = CifsNew;
1503 ++tcp_ses->srv_count; 1503 ++tcp_ses->srv_count;
1504 1504
1505 if (addr.sa_family == AF_INET6) { 1505 if (addr.ss_family == AF_INET6) {
1506 cFYI(1, ("attempting ipv6 connect")); 1506 cFYI(1, ("attempting ipv6 connect"));
1507 /* BB should we allow ipv6 on port 139? */ 1507 /* BB should we allow ipv6 on port 139? */
1508 /* other OS never observed in Wild doing 139 with v6 */ 1508 /* other OS never observed in Wild doing 139 with v6 */
@@ -1802,7 +1802,7 @@ ipv4_connect(struct TCP_Server_Info *server)
1802 * user space buffer 1802 * user space buffer
1803 */ 1803 */
1804 socket->sk->sk_rcvtimeo = 7 * HZ; 1804 socket->sk->sk_rcvtimeo = 7 * HZ;
1805 socket->sk->sk_sndtimeo = 3 * HZ; 1805 socket->sk->sk_sndtimeo = 5 * HZ;
1806 1806
1807 /* make the bufsizes depend on wsize/rsize and max requests */ 1807 /* make the bufsizes depend on wsize/rsize and max requests */
1808 if (server->noautotune) { 1808 if (server->noautotune) {
@@ -1860,9 +1860,7 @@ ipv4_connect(struct TCP_Server_Info *server)
1860 smb_buf = (struct smb_hdr *)ses_init_buf; 1860 smb_buf = (struct smb_hdr *)ses_init_buf;
1861 /* sizeof RFC1002_SESSION_REQUEST with no scope */ 1861 /* sizeof RFC1002_SESSION_REQUEST with no scope */
1862 smb_buf->smb_buf_length = 0x81000044; 1862 smb_buf->smb_buf_length = 0x81000044;
1863 rc = smb_send(socket, smb_buf, 0x44, 1863 rc = smb_send(server, smb_buf, 0x44);
1864 (struct sockaddr *) &server->addr.sockAddr,
1865 server->noblocksnd);
1866 kfree(ses_init_buf); 1864 kfree(ses_init_buf);
1867 msleep(1); /* RFC1001 layer in at least one server 1865 msleep(1); /* RFC1001 layer in at least one server
1868 requires very short break before negprot 1866 requires very short break before negprot
@@ -1955,7 +1953,7 @@ ipv6_connect(struct TCP_Server_Info *server)
1955 * user space buffer 1953 * user space buffer
1956 */ 1954 */
1957 socket->sk->sk_rcvtimeo = 7 * HZ; 1955 socket->sk->sk_rcvtimeo = 7 * HZ;
1958 socket->sk->sk_sndtimeo = 3 * HZ; 1956 socket->sk->sk_sndtimeo = 5 * HZ;
1959 server->ssocket = socket; 1957 server->ssocket = socket;
1960 1958
1961 return rc; 1959 return rc;
@@ -2182,6 +2180,33 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2182 "mount option supported")); 2180 "mount option supported"));
2183} 2181}
2184 2182
2183static int
2184is_path_accessible(int xid, struct cifsTconInfo *tcon,
2185 struct cifs_sb_info *cifs_sb, const char *full_path)
2186{
2187 int rc;
2188 __u64 inode_num;
2189 FILE_ALL_INFO *pfile_info;
2190
2191 rc = CIFSGetSrvInodeNumber(xid, tcon, full_path, &inode_num,
2192 cifs_sb->local_nls,
2193 cifs_sb->mnt_cifs_flags &
2194 CIFS_MOUNT_MAP_SPECIAL_CHR);
2195 if (rc != -EOPNOTSUPP)
2196 return rc;
2197
2198 pfile_info = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
2199 if (pfile_info == NULL)
2200 return -ENOMEM;
2201
2202 rc = CIFSSMBQPathInfo(xid, tcon, full_path, pfile_info,
2203 0 /* not legacy */, cifs_sb->local_nls,
2204 cifs_sb->mnt_cifs_flags &
2205 CIFS_MOUNT_MAP_SPECIAL_CHR);
2206 kfree(pfile_info);
2207 return rc;
2208}
2209
2185int 2210int
2186cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2211cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2187 char *mount_data, const char *devname) 2212 char *mount_data, const char *devname)
@@ -2192,6 +2217,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2192 struct cifsSesInfo *pSesInfo = NULL; 2217 struct cifsSesInfo *pSesInfo = NULL;
2193 struct cifsTconInfo *tcon = NULL; 2218 struct cifsTconInfo *tcon = NULL;
2194 struct TCP_Server_Info *srvTcp = NULL; 2219 struct TCP_Server_Info *srvTcp = NULL;
2220 char *full_path;
2195 2221
2196 xid = GetXid(); 2222 xid = GetXid();
2197 2223
@@ -2428,6 +2454,23 @@ mount_fail_check:
2428 cifs_sb->rsize = min(cifs_sb->rsize, 2454 cifs_sb->rsize = min(cifs_sb->rsize,
2429 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); 2455 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2430 2456
2457 if (!rc && cifs_sb->prepathlen) {
2458 /* build_path_to_root works only when we have a valid tcon */
2459 full_path = cifs_build_path_to_root(cifs_sb);
2460 if (full_path == NULL) {
2461 rc = -ENOMEM;
2462 goto mount_fail_check;
2463 }
2464 rc = is_path_accessible(xid, tcon, cifs_sb, full_path);
2465 if (rc) {
2466 cERROR(1, ("Path %s in not accessible: %d",
2467 full_path, rc));
2468 kfree(full_path);
2469 goto mount_fail_check;
2470 }
2471 kfree(full_path);
2472 }
2473
2431 /* volume_info->password is freed above when existing session found 2474 /* volume_info->password is freed above when existing session found
2432 (in which case it is not needed anymore) but when new sesion is created 2475 (in which case it is not needed anymore) but when new sesion is created
2433 the password ptr is put in the new session structure (in which case the 2476 the password ptr is put in the new session structure (in which case the
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 838d9c720a5c..89fb72832652 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * vfs operations that deal with dentries 4 * vfs operations that deal with dentries
5 * 5 *
6 * Copyright (C) International Business Machines Corp., 2002,2008 6 * Copyright (C) International Business Machines Corp., 2002,2009
7 * Author(s): Steve French (sfrench@us.ibm.com) 7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * 8 *
9 * This library is free software; you can redistribute it and/or modify 9 * This library is free software; you can redistribute it and/or modify
@@ -129,6 +129,89 @@ cifs_bp_rename_retry:
129 return full_path; 129 return full_path;
130} 130}
131 131
132static int cifs_posix_open(char *full_path, struct inode **pinode,
133 struct super_block *sb, int mode, int oflags,
134 int *poplock, __u16 *pnetfid, int xid)
135{
136 int rc;
137 __u32 oplock;
138 FILE_UNIX_BASIC_INFO *presp_data;
139 __u32 posix_flags = 0;
140 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
141
142 cFYI(1, ("posix open %s", full_path));
143
144 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
145 if (presp_data == NULL)
146 return -ENOMEM;
147
148/* So far cifs posix extensions can only map the following flags.
149 There are other valid fmode oflags such as FMODE_LSEEK, FMODE_PREAD, but
150 so far we do not seem to need them, and we can treat them as local only */
151 if ((oflags & (FMODE_READ | FMODE_WRITE)) ==
152 (FMODE_READ | FMODE_WRITE))
153 posix_flags = SMB_O_RDWR;
154 else if (oflags & FMODE_READ)
155 posix_flags = SMB_O_RDONLY;
156 else if (oflags & FMODE_WRITE)
157 posix_flags = SMB_O_WRONLY;
158 if (oflags & O_CREAT)
159 posix_flags |= SMB_O_CREAT;
160 if (oflags & O_EXCL)
161 posix_flags |= SMB_O_EXCL;
162 if (oflags & O_TRUNC)
163 posix_flags |= SMB_O_TRUNC;
164 if (oflags & O_APPEND)
165 posix_flags |= SMB_O_APPEND;
166 if (oflags & O_SYNC)
167 posix_flags |= SMB_O_SYNC;
168 if (oflags & O_DIRECTORY)
169 posix_flags |= SMB_O_DIRECTORY;
170 if (oflags & O_NOFOLLOW)
171 posix_flags |= SMB_O_NOFOLLOW;
172 if (oflags & O_DIRECT)
173 posix_flags |= SMB_O_DIRECT;
174
175
176 rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode,
177 pnetfid, presp_data, &oplock, full_path,
178 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
179 CIFS_MOUNT_MAP_SPECIAL_CHR);
180 if (rc)
181 goto posix_open_ret;
182
183 if (presp_data->Type == cpu_to_le32(-1))
184 goto posix_open_ret; /* open ok, caller does qpathinfo */
185
186 /* get new inode and set it up */
187 if (!pinode)
188 goto posix_open_ret; /* caller does not need info */
189
190 *pinode = cifs_new_inode(sb, &presp_data->UniqueId);
191
192 /* We do not need to close the file if new_inode fails since
193 the caller will retry qpathinfo as long as inode is null */
194 if (*pinode == NULL)
195 goto posix_open_ret;
196
197 posix_fill_in_inode(*pinode, presp_data, 1);
198
199posix_open_ret:
200 kfree(presp_data);
201 return rc;
202}
203
204static void setup_cifs_dentry(struct cifsTconInfo *tcon,
205 struct dentry *direntry,
206 struct inode *newinode)
207{
208 if (tcon->nocase)
209 direntry->d_op = &cifs_ci_dentry_ops;
210 else
211 direntry->d_op = &cifs_dentry_ops;
212 d_instantiate(direntry, newinode);
213}
214
132/* Inode operations in similar order to how they appear in Linux file fs.h */ 215/* Inode operations in similar order to how they appear in Linux file fs.h */
133 216
134int 217int
@@ -139,14 +222,21 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
139 int xid; 222 int xid;
140 int create_options = CREATE_NOT_DIR; 223 int create_options = CREATE_NOT_DIR;
141 int oplock = 0; 224 int oplock = 0;
225 int oflags;
226 /*
227 * BB below access is probably too much for mknod to request
228 * but we have to do query and setpathinfo so requesting
229 * less could fail (unless we want to request getatr and setatr
230 * permissions (only). At least for POSIX we do not have to
231 * request so much.
232 */
142 int desiredAccess = GENERIC_READ | GENERIC_WRITE; 233 int desiredAccess = GENERIC_READ | GENERIC_WRITE;
143 __u16 fileHandle; 234 __u16 fileHandle;
144 struct cifs_sb_info *cifs_sb; 235 struct cifs_sb_info *cifs_sb;
145 struct cifsTconInfo *pTcon; 236 struct cifsTconInfo *tcon;
146 char *full_path = NULL; 237 char *full_path = NULL;
147 FILE_ALL_INFO *buf = NULL; 238 FILE_ALL_INFO *buf = NULL;
148 struct inode *newinode = NULL; 239 struct inode *newinode = NULL;
149 struct cifsFileInfo *pCifsFile = NULL;
150 struct cifsInodeInfo *pCifsInode; 240 struct cifsInodeInfo *pCifsInode;
151 int disposition = FILE_OVERWRITE_IF; 241 int disposition = FILE_OVERWRITE_IF;
152 bool write_only = false; 242 bool write_only = false;
@@ -154,7 +244,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
154 xid = GetXid(); 244 xid = GetXid();
155 245
156 cifs_sb = CIFS_SB(inode->i_sb); 246 cifs_sb = CIFS_SB(inode->i_sb);
157 pTcon = cifs_sb->tcon; 247 tcon = cifs_sb->tcon;
158 248
159 full_path = build_path_from_dentry(direntry); 249 full_path = build_path_from_dentry(direntry);
160 if (full_path == NULL) { 250 if (full_path == NULL) {
@@ -162,12 +252,44 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
162 return -ENOMEM; 252 return -ENOMEM;
163 } 253 }
164 254
165 if (nd && (nd->flags & LOOKUP_OPEN)) { 255 mode &= ~current->fs->umask;
166 int oflags = nd->intent.open.flags; 256 if (oplockEnabled)
257 oplock = REQ_OPLOCK;
258
259 if (nd && (nd->flags & LOOKUP_OPEN))
260 oflags = nd->intent.open.flags;
261 else
262 oflags = FMODE_READ;
263
264 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
265 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
266 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
267 rc = cifs_posix_open(full_path, &newinode, inode->i_sb,
268 mode, oflags, &oplock, &fileHandle, xid);
269 /* EIO could indicate that (posix open) operation is not
270 supported, despite what server claimed in capability
271 negotation. EREMOTE indicates DFS junction, which is not
272 handled in posix open */
273
274 if ((rc == 0) && (newinode == NULL))
275 goto cifs_create_get_file_info; /* query inode info */
276 else if (rc == 0) /* success, no need to query */
277 goto cifs_create_set_dentry;
278 else if ((rc != -EIO) && (rc != -EREMOTE) &&
279 (rc != -EOPNOTSUPP)) /* path not found or net err */
280 goto cifs_create_out;
281 /* else fallthrough to retry, using older open call, this is
282 case where server does not support this SMB level, and
283 falsely claims capability (also get here for DFS case
284 which should be rare for path not covered on files) */
285 }
167 286
287 if (nd && (nd->flags & LOOKUP_OPEN)) {
288 /* if the file is going to stay open, then we
289 need to set the desired access properly */
168 desiredAccess = 0; 290 desiredAccess = 0;
169 if (oflags & FMODE_READ) 291 if (oflags & FMODE_READ)
170 desiredAccess |= GENERIC_READ; 292 desiredAccess |= GENERIC_READ; /* is this too little? */
171 if (oflags & FMODE_WRITE) { 293 if (oflags & FMODE_WRITE) {
172 desiredAccess |= GENERIC_WRITE; 294 desiredAccess |= GENERIC_WRITE;
173 if (!(oflags & FMODE_READ)) 295 if (!(oflags & FMODE_READ))
@@ -186,8 +308,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
186 308
187 /* BB add processing to set equivalent of mode - e.g. via CreateX with 309 /* BB add processing to set equivalent of mode - e.g. via CreateX with
188 ACLs */ 310 ACLs */
189 if (oplockEnabled)
190 oplock = REQ_OPLOCK;
191 311
192 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); 312 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
193 if (buf == NULL) { 313 if (buf == NULL) {
@@ -196,17 +316,15 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
196 return -ENOMEM; 316 return -ENOMEM;
197 } 317 }
198 318
199 mode &= ~current->fs->umask;
200
201 /* 319 /*
202 * if we're not using unix extensions, see if we need to set 320 * if we're not using unix extensions, see if we need to set
203 * ATTR_READONLY on the create call 321 * ATTR_READONLY on the create call
204 */ 322 */
205 if (!pTcon->unix_ext && (mode & S_IWUGO) == 0) 323 if (!tcon->unix_ext && (mode & S_IWUGO) == 0)
206 create_options |= CREATE_OPTION_READONLY; 324 create_options |= CREATE_OPTION_READONLY;
207 325
208 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) 326 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
209 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, 327 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
210 desiredAccess, create_options, 328 desiredAccess, create_options,
211 &fileHandle, &oplock, buf, cifs_sb->local_nls, 329 &fileHandle, &oplock, buf, cifs_sb->local_nls,
212 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 330 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -215,128 +333,119 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
215 333
216 if (rc == -EIO) { 334 if (rc == -EIO) {
217 /* old server, retry the open legacy style */ 335 /* old server, retry the open legacy style */
218 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, 336 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
219 desiredAccess, create_options, 337 desiredAccess, create_options,
220 &fileHandle, &oplock, buf, cifs_sb->local_nls, 338 &fileHandle, &oplock, buf, cifs_sb->local_nls,
221 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 339 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
222 } 340 }
223 if (rc) { 341 if (rc) {
224 cFYI(1, ("cifs_create returned 0x%x", rc)); 342 cFYI(1, ("cifs_create returned 0x%x", rc));
225 } else { 343 goto cifs_create_out;
226 /* If Open reported that we actually created a file 344 }
227 then we now have to set the mode if possible */ 345
228 if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { 346 /* If Open reported that we actually created a file
229 struct cifs_unix_set_info_args args = { 347 then we now have to set the mode if possible */
348 if ((tcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
349 struct cifs_unix_set_info_args args = {
230 .mode = mode, 350 .mode = mode,
231 .ctime = NO_CHANGE_64, 351 .ctime = NO_CHANGE_64,
232 .atime = NO_CHANGE_64, 352 .atime = NO_CHANGE_64,
233 .mtime = NO_CHANGE_64, 353 .mtime = NO_CHANGE_64,
234 .device = 0, 354 .device = 0,
235 }; 355 };
236 356
237 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 357 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
238 args.uid = (__u64) current_fsuid(); 358 args.uid = (__u64) current_fsuid();
239 if (inode->i_mode & S_ISGID) 359 if (inode->i_mode & S_ISGID)
240 args.gid = (__u64) inode->i_gid; 360 args.gid = (__u64) inode->i_gid;
241 else 361 else
242 args.gid = (__u64) current_fsgid(); 362 args.gid = (__u64) current_fsgid();
243 } else {
244 args.uid = NO_CHANGE_64;
245 args.gid = NO_CHANGE_64;
246 }
247 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
248 cifs_sb->local_nls,
249 cifs_sb->mnt_cifs_flags &
250 CIFS_MOUNT_MAP_SPECIAL_CHR);
251 } else { 363 } else {
252 /* BB implement mode setting via Windows security 364 args.uid = NO_CHANGE_64;
253 descriptors e.g. */ 365 args.gid = NO_CHANGE_64;
254 /* CIFSSMBWinSetPerms(xid,pTcon,path,mode,-1,-1,nls);*/
255
256 /* Could set r/o dos attribute if mode & 0222 == 0 */
257 } 366 }
367 CIFSSMBUnixSetInfo(xid, tcon, full_path, &args,
368 cifs_sb->local_nls,
369 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
370 } else {
371 /* BB implement mode setting via Windows security
372 descriptors e.g. */
373 /* CIFSSMBWinSetPerms(xid,tcon,path,mode,-1,-1,nls);*/
258 374
259 /* server might mask mode so we have to query for it */ 375 /* Could set r/o dos attribute if mode & 0222 == 0 */
260 if (pTcon->unix_ext) 376 }
261 rc = cifs_get_inode_info_unix(&newinode, full_path, 377
262 inode->i_sb, xid); 378cifs_create_get_file_info:
263 else { 379 /* server might mask mode so we have to query for it */
264 rc = cifs_get_inode_info(&newinode, full_path, 380 if (tcon->unix_ext)
265 buf, inode->i_sb, xid, 381 rc = cifs_get_inode_info_unix(&newinode, full_path,
266 &fileHandle); 382 inode->i_sb, xid);
267 if (newinode) { 383 else {
268 if (cifs_sb->mnt_cifs_flags & 384 rc = cifs_get_inode_info(&newinode, full_path, buf,
269 CIFS_MOUNT_DYNPERM) 385 inode->i_sb, xid, &fileHandle);
270 newinode->i_mode = mode; 386 if (newinode) {
271 if ((oplock & CIFS_CREATE_ACTION) && 387 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
272 (cifs_sb->mnt_cifs_flags & 388 newinode->i_mode = mode;
273 CIFS_MOUNT_SET_UID)) { 389 if ((oplock & CIFS_CREATE_ACTION) &&
274 newinode->i_uid = current_fsuid(); 390 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
275 if (inode->i_mode & S_ISGID) 391 newinode->i_uid = current_fsuid();
276 newinode->i_gid = 392 if (inode->i_mode & S_ISGID)
277 inode->i_gid; 393 newinode->i_gid = inode->i_gid;
278 else 394 else
279 newinode->i_gid = 395 newinode->i_gid = current_fsgid();
280 current_fsgid();
281 }
282 } 396 }
283 } 397 }
398 }
284 399
285 if (rc != 0) { 400cifs_create_set_dentry:
286 cFYI(1, 401 if (rc == 0)
287 ("Create worked but get_inode_info failed rc = %d", 402 setup_cifs_dentry(tcon, direntry, newinode);
288 rc)); 403 else
289 } else { 404 cFYI(1, ("Create worked, get_inode_info failed rc = %d", rc));
290 if (pTcon->nocase) 405
291 direntry->d_op = &cifs_ci_dentry_ops; 406 /* nfsd case - nfs srv does not set nd */
292 else 407 if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
293 direntry->d_op = &cifs_dentry_ops; 408 /* mknod case - do not leave file open */
294 d_instantiate(direntry, newinode); 409 CIFSSMBClose(xid, tcon, fileHandle);
295 } 410 } else if (newinode) {
296 if ((nd == NULL /* nfsd case - nfs srv does not set nd */) || 411 struct cifsFileInfo *pCifsFile =
297 (!(nd->flags & LOOKUP_OPEN))) { 412 kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
298 /* mknod case - do not leave file open */ 413
299 CIFSSMBClose(xid, pTcon, fileHandle); 414 if (pCifsFile == NULL)
300 } else if (newinode) { 415 goto cifs_create_out;
301 pCifsFile = 416 pCifsFile->netfid = fileHandle;
302 kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 417 pCifsFile->pid = current->tgid;
303 418 pCifsFile->pInode = newinode;
304 if (pCifsFile == NULL) 419 pCifsFile->invalidHandle = false;
305 goto cifs_create_out; 420 pCifsFile->closePend = false;
306 pCifsFile->netfid = fileHandle; 421 init_MUTEX(&pCifsFile->fh_sem);
307 pCifsFile->pid = current->tgid; 422 mutex_init(&pCifsFile->lock_mutex);
308 pCifsFile->pInode = newinode; 423 INIT_LIST_HEAD(&pCifsFile->llist);
309 pCifsFile->invalidHandle = false; 424 atomic_set(&pCifsFile->wrtPending, 0);
310 pCifsFile->closePend = false; 425
311 init_MUTEX(&pCifsFile->fh_sem); 426 /* set the following in open now
312 mutex_init(&pCifsFile->lock_mutex);
313 INIT_LIST_HEAD(&pCifsFile->llist);
314 atomic_set(&pCifsFile->wrtPending, 0);
315
316 /* set the following in open now
317 pCifsFile->pfile = file; */ 427 pCifsFile->pfile = file; */
318 write_lock(&GlobalSMBSeslock); 428 write_lock(&GlobalSMBSeslock);
319 list_add(&pCifsFile->tlist, &pTcon->openFileList); 429 list_add(&pCifsFile->tlist, &tcon->openFileList);
320 pCifsInode = CIFS_I(newinode); 430 pCifsInode = CIFS_I(newinode);
321 if (pCifsInode) { 431 if (pCifsInode) {
322 /* if readable file instance put first in list*/ 432 /* if readable file instance put first in list*/
323 if (write_only) { 433 if (write_only) {
324 list_add_tail(&pCifsFile->flist, 434 list_add_tail(&pCifsFile->flist,
325 &pCifsInode->openFileList); 435 &pCifsInode->openFileList);
326 } else { 436 } else {
327 list_add(&pCifsFile->flist, 437 list_add(&pCifsFile->flist,
328 &pCifsInode->openFileList); 438 &pCifsInode->openFileList);
329 }
330 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
331 pCifsInode->clientCanCacheAll = true;
332 pCifsInode->clientCanCacheRead = true;
333 cFYI(1, ("Exclusive Oplock inode %p",
334 newinode));
335 } else if ((oplock & 0xF) == OPLOCK_READ)
336 pCifsInode->clientCanCacheRead = true;
337 } 439 }
338 write_unlock(&GlobalSMBSeslock); 440 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
441 pCifsInode->clientCanCacheAll = true;
442 pCifsInode->clientCanCacheRead = true;
443 cFYI(1, ("Exclusive Oplock inode %p",
444 newinode));
445 } else if ((oplock & 0xF) == OPLOCK_READ)
446 pCifsInode->clientCanCacheRead = true;
339 } 447 }
448 write_unlock(&GlobalSMBSeslock);
340 } 449 }
341cifs_create_out: 450cifs_create_out:
342 kfree(buf); 451 kfree(buf);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 5ab9896fdcb2..4690a360c855 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -199,6 +199,49 @@ static void fill_fake_finddataunix(FILE_UNIX_BASIC_INFO *pfnd_dat,
199 pfnd_dat->Gid = cpu_to_le64(pinode->i_gid); 199 pfnd_dat->Gid = cpu_to_le64(pinode->i_gid);
200} 200}
201 201
202/**
203 * cifs_new inode - create new inode, initialize, and hash it
204 * @sb - pointer to superblock
205 * @inum - if valid pointer and serverino is enabled, replace i_ino with val
206 *
207 * Create a new inode, initialize it for CIFS and hash it. Returns the new
208 * inode or NULL if one couldn't be allocated.
209 *
210 * If the share isn't mounted with "serverino" or inum is a NULL pointer then
211 * we'll just use the inode number assigned by new_inode(). Note that this can
212 * mean i_ino collisions since the i_ino assigned by new_inode is not
213 * guaranteed to be unique.
214 */
215struct inode *
216cifs_new_inode(struct super_block *sb, __u64 *inum)
217{
218 struct inode *inode;
219
220 inode = new_inode(sb);
221 if (inode == NULL)
222 return NULL;
223
224 /*
225 * BB: Is i_ino == 0 legal? Here, we assume that it is. If it isn't we
226 * stop passing inum as ptr. Are there sanity checks we can use to
227 * ensure that the server is really filling in that field? Also,
228 * if serverino is disabled, perhaps we should be using iunique()?
229 */
230 if (inum && (CIFS_SB(sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM))
231 inode->i_ino = (unsigned long) *inum;
232
233 /*
234 * must set this here instead of cifs_alloc_inode since VFS will
235 * clobber i_flags
236 */
237 if (sb->s_flags & MS_NOATIME)
238 inode->i_flags |= S_NOATIME | S_NOCMTIME;
239
240 insert_inode_hash(inode);
241
242 return inode;
243}
244
202int cifs_get_inode_info_unix(struct inode **pinode, 245int cifs_get_inode_info_unix(struct inode **pinode,
203 const unsigned char *full_path, struct super_block *sb, int xid) 246 const unsigned char *full_path, struct super_block *sb, int xid)
204{ 247{
@@ -233,22 +276,11 @@ int cifs_get_inode_info_unix(struct inode **pinode,
233 276
234 /* get new inode */ 277 /* get new inode */
235 if (*pinode == NULL) { 278 if (*pinode == NULL) {
236 *pinode = new_inode(sb); 279 *pinode = cifs_new_inode(sb, &find_data.UniqueId);
237 if (*pinode == NULL) { 280 if (*pinode == NULL) {
238 rc = -ENOMEM; 281 rc = -ENOMEM;
239 goto cgiiu_exit; 282 goto cgiiu_exit;
240 } 283 }
241 /* Is an i_ino of zero legal? */
242 /* note ino incremented to unique num in new_inode */
243 /* Are there sanity checks we can use to ensure that
244 the server is really filling in that field? */
245 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
246 (*pinode)->i_ino = (unsigned long)find_data.UniqueId;
247
248 if (sb->s_flags & MS_NOATIME)
249 (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME;
250
251 insert_inode_hash(*pinode);
252 } 284 }
253 285
254 inode = *pinode; 286 inode = *pinode;
@@ -465,11 +497,9 @@ int cifs_get_inode_info(struct inode **pinode,
465 497
466 /* get new inode */ 498 /* get new inode */
467 if (*pinode == NULL) { 499 if (*pinode == NULL) {
468 *pinode = new_inode(sb); 500 __u64 inode_num;
469 if (*pinode == NULL) { 501 __u64 *pinum = &inode_num;
470 rc = -ENOMEM; 502
471 goto cgii_exit;
472 }
473 /* Is an i_ino of zero legal? Can we use that to check 503 /* Is an i_ino of zero legal? Can we use that to check
474 if the server supports returning inode numbers? Are 504 if the server supports returning inode numbers? Are
475 there other sanity checks we can use to ensure that 505 there other sanity checks we can use to ensure that
@@ -486,22 +516,26 @@ int cifs_get_inode_info(struct inode **pinode,
486 516
487 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 517 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
488 int rc1 = 0; 518 int rc1 = 0;
489 __u64 inode_num;
490 519
491 rc1 = CIFSGetSrvInodeNumber(xid, pTcon, 520 rc1 = CIFSGetSrvInodeNumber(xid, pTcon,
492 full_path, &inode_num, 521 full_path, pinum,
493 cifs_sb->local_nls, 522 cifs_sb->local_nls,
494 cifs_sb->mnt_cifs_flags & 523 cifs_sb->mnt_cifs_flags &
495 CIFS_MOUNT_MAP_SPECIAL_CHR); 524 CIFS_MOUNT_MAP_SPECIAL_CHR);
496 if (rc1) { 525 if (rc1) {
497 cFYI(1, ("GetSrvInodeNum rc %d", rc1)); 526 cFYI(1, ("GetSrvInodeNum rc %d", rc1));
527 pinum = NULL;
498 /* BB EOPNOSUPP disable SERVER_INUM? */ 528 /* BB EOPNOSUPP disable SERVER_INUM? */
499 } else /* do we need cast or hash to ino? */ 529 }
500 (*pinode)->i_ino = inode_num; 530 } else {
501 } /* else ino incremented to unique num in new_inode*/ 531 pinum = NULL;
502 if (sb->s_flags & MS_NOATIME) 532 }
503 (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME; 533
504 insert_inode_hash(*pinode); 534 *pinode = cifs_new_inode(sb, pinum);
535 if (*pinode == NULL) {
536 rc = -ENOMEM;
537 goto cgii_exit;
538 }
505 } 539 }
506 inode = *pinode; 540 inode = *pinode;
507 cifsInfo = CIFS_I(inode); 541 cifsInfo = CIFS_I(inode);
@@ -621,7 +655,7 @@ static const struct inode_operations cifs_ipc_inode_ops = {
621 .lookup = cifs_lookup, 655 .lookup = cifs_lookup,
622}; 656};
623 657
624static char *build_path_to_root(struct cifs_sb_info *cifs_sb) 658char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb)
625{ 659{
626 int pplen = cifs_sb->prepathlen; 660 int pplen = cifs_sb->prepathlen;
627 int dfsplen; 661 int dfsplen;
@@ -678,7 +712,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
678 return inode; 712 return inode;
679 713
680 cifs_sb = CIFS_SB(inode->i_sb); 714 cifs_sb = CIFS_SB(inode->i_sb);
681 full_path = build_path_to_root(cifs_sb); 715 full_path = cifs_build_path_to_root(cifs_sb);
682 if (full_path == NULL) 716 if (full_path == NULL)
683 return ERR_PTR(-ENOMEM); 717 return ERR_PTR(-ENOMEM);
684 718
@@ -1017,7 +1051,7 @@ out_reval:
1017 return rc; 1051 return rc;
1018} 1052}
1019 1053
1020static void posix_fill_in_inode(struct inode *tmp_inode, 1054void posix_fill_in_inode(struct inode *tmp_inode,
1021 FILE_UNIX_BASIC_INFO *pData, int isNewInode) 1055 FILE_UNIX_BASIC_INFO *pData, int isNewInode)
1022{ 1056{
1023 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode); 1057 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
@@ -1114,24 +1148,14 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1114 else 1148 else
1115 direntry->d_op = &cifs_dentry_ops; 1149 direntry->d_op = &cifs_dentry_ops;
1116 1150
1117 newinode = new_inode(inode->i_sb); 1151 newinode = cifs_new_inode(inode->i_sb,
1152 &pInfo->UniqueId);
1118 if (newinode == NULL) { 1153 if (newinode == NULL) {
1119 kfree(pInfo); 1154 kfree(pInfo);
1120 goto mkdir_get_info; 1155 goto mkdir_get_info;
1121 } 1156 }
1122 1157
1123 /* Is an i_ino of zero legal? */
1124 /* Are there sanity checks we can use to ensure that
1125 the server is really filling in that field? */
1126 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
1127 newinode->i_ino =
1128 (unsigned long)pInfo->UniqueId;
1129 } /* note ino incremented to unique num in new_inode */
1130 if (inode->i_sb->s_flags & MS_NOATIME)
1131 newinode->i_flags |= S_NOATIME | S_NOCMTIME;
1132 newinode->i_nlink = 2; 1158 newinode->i_nlink = 2;
1133
1134 insert_inode_hash(newinode);
1135 d_instantiate(direntry, newinode); 1159 d_instantiate(direntry, newinode);
1136 1160
1137 /* we already checked in POSIXCreate whether 1161 /* we already checked in POSIXCreate whether
@@ -1285,6 +1309,11 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1285 cifsInode = CIFS_I(direntry->d_inode); 1309 cifsInode = CIFS_I(direntry->d_inode);
1286 cifsInode->time = 0; /* force revalidate to go get info when 1310 cifsInode->time = 0; /* force revalidate to go get info when
1287 needed */ 1311 needed */
1312
1313 cifsInode = CIFS_I(inode);
1314 cifsInode->time = 0; /* force revalidate to get parent dir info
1315 since cached search results now invalid */
1316
1288 direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime = 1317 direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime =
1289 current_fs_time(inode->i_sb); 1318 current_fs_time(inode->i_sb);
1290 1319
diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c
index 462bbfefd4b6..98b66a54c319 100644
--- a/fs/cifs/md5.c
+++ b/fs/cifs/md5.c
@@ -10,8 +10,8 @@
10 * with every copy. 10 * with every copy.
11 * 11 *
12 * To compute the message digest of a chunk of bytes, declare an 12 * To compute the message digest of a chunk of bytes, declare an
13 * MD5Context structure, pass it to MD5Init, call MD5Update as 13 * MD5Context structure, pass it to cifs_MD5_init, call cifs_MD5_update as
14 * needed on buffers full of bytes, and then call MD5Final, which 14 * needed on buffers full of bytes, and then call cifs_MD5_final, which
15 * will fill a supplied 16-byte array with the digest. 15 * will fill a supplied 16-byte array with the digest.
16 */ 16 */
17 17
@@ -45,7 +45,7 @@ byteReverse(unsigned char *buf, unsigned longs)
45 * initialization constants. 45 * initialization constants.
46 */ 46 */
47void 47void
48MD5Init(struct MD5Context *ctx) 48cifs_MD5_init(struct MD5Context *ctx)
49{ 49{
50 ctx->buf[0] = 0x67452301; 50 ctx->buf[0] = 0x67452301;
51 ctx->buf[1] = 0xefcdab89; 51 ctx->buf[1] = 0xefcdab89;
@@ -61,7 +61,7 @@ MD5Init(struct MD5Context *ctx)
61 * of bytes. 61 * of bytes.
62 */ 62 */
63void 63void
64MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len) 64cifs_MD5_update(struct MD5Context *ctx, unsigned char const *buf, unsigned len)
65{ 65{
66 register __u32 t; 66 register __u32 t;
67 67
@@ -110,7 +110,7 @@ MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len)
110 * 1 0* (64-bit count of bits processed, MSB-first) 110 * 1 0* (64-bit count of bits processed, MSB-first)
111 */ 111 */
112void 112void
113MD5Final(unsigned char digest[16], struct MD5Context *ctx) 113cifs_MD5_final(unsigned char digest[16], struct MD5Context *ctx)
114{ 114{
115 unsigned int count; 115 unsigned int count;
116 unsigned char *p; 116 unsigned char *p;
@@ -165,7 +165,7 @@ MD5Final(unsigned char digest[16], struct MD5Context *ctx)
165 165
166/* 166/*
167 * The core of the MD5 algorithm, this alters an existing MD5 hash to 167 * The core of the MD5 algorithm, this alters an existing MD5 hash to
168 * reflect the addition of 16 longwords of new data. MD5Update blocks 168 * reflect the addition of 16 longwords of new data. cifs_MD5_update blocks
169 * the data and converts bytes into longwords for this routine. 169 * the data and converts bytes into longwords for this routine.
170 */ 170 */
171static void 171static void
@@ -267,9 +267,9 @@ hmac_md5_init_rfc2104(unsigned char *key, int key_len,
267 unsigned char tk[16]; 267 unsigned char tk[16];
268 struct MD5Context tctx; 268 struct MD5Context tctx;
269 269
270 MD5Init(&tctx); 270 cifs_MD5_init(&tctx);
271 MD5Update(&tctx, key, key_len); 271 cifs_MD5_update(&tctx, key, key_len);
272 MD5Final(tk, &tctx); 272 cifs_MD5_final(tk, &tctx);
273 273
274 key = tk; 274 key = tk;
275 key_len = 16; 275 key_len = 16;
@@ -287,8 +287,8 @@ hmac_md5_init_rfc2104(unsigned char *key, int key_len,
287 ctx->k_opad[i] ^= 0x5c; 287 ctx->k_opad[i] ^= 0x5c;
288 } 288 }
289 289
290 MD5Init(&ctx->ctx); 290 cifs_MD5_init(&ctx->ctx);
291 MD5Update(&ctx->ctx, ctx->k_ipad, 64); 291 cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64);
292} 292}
293#endif 293#endif
294 294
@@ -317,8 +317,8 @@ hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
317 ctx->k_opad[i] ^= 0x5c; 317 ctx->k_opad[i] ^= 0x5c;
318 } 318 }
319 319
320 MD5Init(&ctx->ctx); 320 cifs_MD5_init(&ctx->ctx);
321 MD5Update(&ctx->ctx, ctx->k_ipad, 64); 321 cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64);
322} 322}
323 323
324/*********************************************************************** 324/***********************************************************************
@@ -328,7 +328,7 @@ void
328hmac_md5_update(const unsigned char *text, int text_len, 328hmac_md5_update(const unsigned char *text, int text_len,
329 struct HMACMD5Context *ctx) 329 struct HMACMD5Context *ctx)
330{ 330{
331 MD5Update(&ctx->ctx, text, text_len); /* then text of datagram */ 331 cifs_MD5_update(&ctx->ctx, text, text_len); /* then text of datagram */
332} 332}
333 333
334/*********************************************************************** 334/***********************************************************************
@@ -339,12 +339,12 @@ hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx)
339{ 339{
340 struct MD5Context ctx_o; 340 struct MD5Context ctx_o;
341 341
342 MD5Final(digest, &ctx->ctx); 342 cifs_MD5_final(digest, &ctx->ctx);
343 343
344 MD5Init(&ctx_o); 344 cifs_MD5_init(&ctx_o);
345 MD5Update(&ctx_o, ctx->k_opad, 64); 345 cifs_MD5_update(&ctx_o, ctx->k_opad, 64);
346 MD5Update(&ctx_o, digest, 16); 346 cifs_MD5_update(&ctx_o, digest, 16);
347 MD5Final(digest, &ctx_o); 347 cifs_MD5_final(digest, &ctx_o);
348} 348}
349 349
350/*********************************************************** 350/***********************************************************
diff --git a/fs/cifs/md5.h b/fs/cifs/md5.h
index f7d4f4197bac..6fba8cb402fd 100644
--- a/fs/cifs/md5.h
+++ b/fs/cifs/md5.h
@@ -20,10 +20,10 @@ struct HMACMD5Context {
20}; 20};
21#endif /* _HMAC_MD5_H */ 21#endif /* _HMAC_MD5_H */
22 22
23void MD5Init(struct MD5Context *context); 23void cifs_MD5_init(struct MD5Context *context);
24void MD5Update(struct MD5Context *context, unsigned char const *buf, 24void cifs_MD5_update(struct MD5Context *context, unsigned char const *buf,
25 unsigned len); 25 unsigned len);
26void MD5Final(unsigned char digest[16], struct MD5Context *context); 26void cifs_MD5_final(unsigned char digest[16], struct MD5Context *context);
27 27
28/* The following definitions come from lib/hmacmd5.c */ 28/* The following definitions come from lib/hmacmd5.c */
29 29
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 9f51f9bf0292..c2c01ff4c32c 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -56,35 +56,34 @@ static inline void dump_cifs_file_struct(struct file *file, char *label)
56} 56}
57#endif /* DEBUG2 */ 57#endif /* DEBUG2 */
58 58
59/* Returns one if new inode created (which therefore needs to be hashed) */ 59/* Returns 1 if new inode created, 2 if both dentry and inode were */
60/* Might check in the future if inode number changed so we can rehash inode */ 60/* Might check in the future if inode number changed so we can rehash inode */
61static int construct_dentry(struct qstr *qstring, struct file *file, 61static int
62 struct inode **ptmp_inode, struct dentry **pnew_dentry) 62construct_dentry(struct qstr *qstring, struct file *file,
63 struct inode **ptmp_inode, struct dentry **pnew_dentry,
64 __u64 *inum)
63{ 65{
64 struct dentry *tmp_dentry; 66 struct dentry *tmp_dentry = NULL;
65 struct cifs_sb_info *cifs_sb; 67 struct super_block *sb = file->f_path.dentry->d_sb;
66 struct cifsTconInfo *pTcon;
67 int rc = 0; 68 int rc = 0;
68 69
69 cFYI(1, ("For %s", qstring->name)); 70 cFYI(1, ("For %s", qstring->name));
70 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
71 pTcon = cifs_sb->tcon;
72 71
73 qstring->hash = full_name_hash(qstring->name, qstring->len); 72 qstring->hash = full_name_hash(qstring->name, qstring->len);
74 tmp_dentry = d_lookup(file->f_path.dentry, qstring); 73 tmp_dentry = d_lookup(file->f_path.dentry, qstring);
75 if (tmp_dentry) { 74 if (tmp_dentry) {
75 /* BB: overwrite old name? i.e. tmp_dentry->d_name and
76 * tmp_dentry->d_name.len??
77 */
76 cFYI(0, ("existing dentry with inode 0x%p", 78 cFYI(0, ("existing dentry with inode 0x%p",
77 tmp_dentry->d_inode)); 79 tmp_dentry->d_inode));
78 *ptmp_inode = tmp_dentry->d_inode; 80 *ptmp_inode = tmp_dentry->d_inode;
79/* BB overwrite old name? i.e. tmp_dentry->d_name and tmp_dentry->d_name.len??*/
80 if (*ptmp_inode == NULL) { 81 if (*ptmp_inode == NULL) {
81 *ptmp_inode = new_inode(file->f_path.dentry->d_sb); 82 *ptmp_inode = cifs_new_inode(sb, inum);
82 if (*ptmp_inode == NULL) 83 if (*ptmp_inode == NULL)
83 return rc; 84 return rc;
84 rc = 1; 85 rc = 1;
85 } 86 }
86 if (file->f_path.dentry->d_sb->s_flags & MS_NOATIME)
87 (*ptmp_inode)->i_flags |= S_NOATIME | S_NOCMTIME;
88 } else { 87 } else {
89 tmp_dentry = d_alloc(file->f_path.dentry, qstring); 88 tmp_dentry = d_alloc(file->f_path.dentry, qstring);
90 if (tmp_dentry == NULL) { 89 if (tmp_dentry == NULL) {
@@ -93,15 +92,14 @@ static int construct_dentry(struct qstr *qstring, struct file *file,
93 return rc; 92 return rc;
94 } 93 }
95 94
96 *ptmp_inode = new_inode(file->f_path.dentry->d_sb); 95 if (CIFS_SB(sb)->tcon->nocase)
97 if (pTcon->nocase)
98 tmp_dentry->d_op = &cifs_ci_dentry_ops; 96 tmp_dentry->d_op = &cifs_ci_dentry_ops;
99 else 97 else
100 tmp_dentry->d_op = &cifs_dentry_ops; 98 tmp_dentry->d_op = &cifs_dentry_ops;
99
100 *ptmp_inode = cifs_new_inode(sb, inum);
101 if (*ptmp_inode == NULL) 101 if (*ptmp_inode == NULL)
102 return rc; 102 return rc;
103 if (file->f_path.dentry->d_sb->s_flags & MS_NOATIME)
104 (*ptmp_inode)->i_flags |= S_NOATIME | S_NOCMTIME;
105 rc = 2; 103 rc = 2;
106 } 104 }
107 105
@@ -822,7 +820,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
822/* inode num, inode type and filename returned */ 820/* inode num, inode type and filename returned */
823static int cifs_get_name_from_search_buf(struct qstr *pqst, 821static int cifs_get_name_from_search_buf(struct qstr *pqst,
824 char *current_entry, __u16 level, unsigned int unicode, 822 char *current_entry, __u16 level, unsigned int unicode,
825 struct cifs_sb_info *cifs_sb, int max_len, ino_t *pinum) 823 struct cifs_sb_info *cifs_sb, int max_len, __u64 *pinum)
826{ 824{
827 int rc = 0; 825 int rc = 0;
828 unsigned int len = 0; 826 unsigned int len = 0;
@@ -842,9 +840,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
842 len = strnlen(filename, PATH_MAX); 840 len = strnlen(filename, PATH_MAX);
843 } 841 }
844 842
845 /* BB fixme - hash low and high 32 bits if not 64 bit arch BB */ 843 *pinum = pFindData->UniqueId;
846 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
847 *pinum = pFindData->UniqueId;
848 } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { 844 } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) {
849 FILE_DIRECTORY_INFO *pFindData = 845 FILE_DIRECTORY_INFO *pFindData =
850 (FILE_DIRECTORY_INFO *)current_entry; 846 (FILE_DIRECTORY_INFO *)current_entry;
@@ -907,7 +903,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
907 struct qstr qstring; 903 struct qstr qstring;
908 struct cifsFileInfo *pCifsF; 904 struct cifsFileInfo *pCifsF;
909 unsigned int obj_type; 905 unsigned int obj_type;
910 ino_t inum; 906 __u64 inum;
911 struct cifs_sb_info *cifs_sb; 907 struct cifs_sb_info *cifs_sb;
912 struct inode *tmp_inode; 908 struct inode *tmp_inode;
913 struct dentry *tmp_dentry; 909 struct dentry *tmp_dentry;
@@ -940,20 +936,18 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
940 if (rc) 936 if (rc)
941 return rc; 937 return rc;
942 938
943 rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry); 939 /* only these two infolevels return valid inode numbers */
940 if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX ||
941 pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO)
942 rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry,
943 &inum);
944 else
945 rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry,
946 NULL);
947
944 if ((tmp_inode == NULL) || (tmp_dentry == NULL)) 948 if ((tmp_inode == NULL) || (tmp_dentry == NULL))
945 return -ENOMEM; 949 return -ENOMEM;
946 950
947 if (rc) {
948 /* inode created, we need to hash it with right inode number */
949 if (inum != 0) {
950 /* BB fixme - hash the 2 32 quantities bits together if
951 * necessary BB */
952 tmp_inode->i_ino = inum;
953 }
954 insert_inode_hash(tmp_inode);
955 }
956
957 /* we pass in rc below, indicating whether it is a new inode, 951 /* we pass in rc below, indicating whether it is a new inode,
958 so we can figure out whether to invalidate the inode cached 952 so we can figure out whether to invalidate the inode cached
959 data if the file has changed */ 953 data if the file has changed */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 5f22de7b79a9..5c68b4282be9 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -34,15 +34,99 @@
34extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, 34extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
35 unsigned char *p24); 35 unsigned char *p24);
36 36
37/* Checks if this is the first smb session to be reconnected after
38 the socket has been reestablished (so we know whether to use vc 0).
39 Called while holding the cifs_tcp_ses_lock, so do not block */
40static bool is_first_ses_reconnect(struct cifsSesInfo *ses)
41{
42 struct list_head *tmp;
43 struct cifsSesInfo *tmp_ses;
44
45 list_for_each(tmp, &ses->server->smb_ses_list) {
46 tmp_ses = list_entry(tmp, struct cifsSesInfo,
47 smb_ses_list);
48 if (tmp_ses->need_reconnect == false)
49 return false;
50 }
51 /* could not find a session that was already connected,
52 this must be the first one we are reconnecting */
53 return true;
54}
55
56/*
57 * vc number 0 is treated specially by some servers, and should be the
58 * first one we request. After that we can use vcnumbers up to maxvcs,
59 * one for each smb session (some Windows versions set maxvcs incorrectly
60 * so maxvc=1 can be ignored). If we have too many vcs, we can reuse
61 * any vc but zero (some servers reset the connection on vcnum zero)
62 *
63 */
64static __le16 get_next_vcnum(struct cifsSesInfo *ses)
65{
66 __u16 vcnum = 0;
67 struct list_head *tmp;
68 struct cifsSesInfo *tmp_ses;
69 __u16 max_vcs = ses->server->max_vcs;
70 __u16 i;
71 int free_vc_found = 0;
72
73 /* Quoting the MS-SMB specification: "Windows-based SMB servers set this
74 field to one but do not enforce this limit, which allows an SMB client
75 to establish more virtual circuits than allowed by this value ... but
76 other server implementations can enforce this limit." */
77 if (max_vcs < 2)
78 max_vcs = 0xFFFF;
79
80 write_lock(&cifs_tcp_ses_lock);
81 if ((ses->need_reconnect) && is_first_ses_reconnect(ses))
82 goto get_vc_num_exit; /* vcnum will be zero */
83 for (i = ses->server->srv_count - 1; i < max_vcs; i++) {
84 if (i == 0) /* this is the only connection, use vc 0 */
85 break;
86
87 free_vc_found = 1;
88
89 list_for_each(tmp, &ses->server->smb_ses_list) {
90 tmp_ses = list_entry(tmp, struct cifsSesInfo,
91 smb_ses_list);
92 if (tmp_ses->vcnum == i) {
93 free_vc_found = 0;
94 break; /* found duplicate, try next vcnum */
95 }
96 }
97 if (free_vc_found)
98 break; /* we found a vcnumber that will work - use it */
99 }
100
101 if (i == 0)
102 vcnum = 0; /* for most common case, ie if one smb session, use
103 vc zero. Also for case when no free vcnum, zero
104 is safest to send (some clients only send zero) */
105 else if (free_vc_found == 0)
106 vcnum = 1; /* we can not reuse vc=0 safely, since some servers
107 reset all uids on that, but 1 is ok. */
108 else
109 vcnum = i;
110 ses->vcnum = vcnum;
111get_vc_num_exit:
112 write_unlock(&cifs_tcp_ses_lock);
113
114 return le16_to_cpu(vcnum);
115}
116
37static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB) 117static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
38{ 118{
39 __u32 capabilities = 0; 119 __u32 capabilities = 0;
40 120
41 /* init fields common to all four types of SessSetup */ 121 /* init fields common to all four types of SessSetup */
42 /* note that header is initialized to zero in header_assemble */ 122 /* Note that offsets for first seven fields in req struct are same */
123 /* in CIFS Specs so does not matter which of 3 forms of struct */
124 /* that we use in next few lines */
125 /* Note that header is initialized to zero in header_assemble */
43 pSMB->req.AndXCommand = 0xFF; 126 pSMB->req.AndXCommand = 0xFF;
44 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf); 127 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
45 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); 128 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
129 pSMB->req.VcNumber = get_next_vcnum(ses);
46 130
47 /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */ 131 /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
48 132
@@ -71,7 +155,6 @@ static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
71 if (ses->capabilities & CAP_UNIX) 155 if (ses->capabilities & CAP_UNIX)
72 capabilities |= CAP_UNIX; 156 capabilities |= CAP_UNIX;
73 157
74 /* BB check whether to init vcnum BB */
75 return capabilities; 158 return capabilities;
76} 159}
77 160
@@ -228,7 +311,7 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
228 311
229 kfree(ses->serverOS); 312 kfree(ses->serverOS);
230 /* UTF-8 string will not grow more than four times as big as UCS-16 */ 313 /* UTF-8 string will not grow more than four times as big as UCS-16 */
231 ses->serverOS = kzalloc(4 * len, GFP_KERNEL); 314 ses->serverOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL);
232 if (ses->serverOS != NULL) 315 if (ses->serverOS != NULL)
233 cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp); 316 cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp);
234 data += 2 * (len + 1); 317 data += 2 * (len + 1);
@@ -241,7 +324,7 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
241 return rc; 324 return rc;
242 325
243 kfree(ses->serverNOS); 326 kfree(ses->serverNOS);
244 ses->serverNOS = kzalloc(4 * len, GFP_KERNEL); /* BB this is wrong length FIXME BB */ 327 ses->serverNOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL);
245 if (ses->serverNOS != NULL) { 328 if (ses->serverNOS != NULL) {
246 cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len, 329 cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len,
247 nls_cp); 330 nls_cp);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 7ebe6599ed3a..0ad3e2d116a6 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -154,81 +154,8 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon)
154 spin_unlock(&GlobalMid_Lock); 154 spin_unlock(&GlobalMid_Lock);
155} 155}
156 156
157int
158smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
159 unsigned int smb_buf_length, struct sockaddr *sin, bool noblocksnd)
160{
161 int rc = 0;
162 int i = 0;
163 struct msghdr smb_msg;
164 struct kvec iov;
165 unsigned len = smb_buf_length + 4;
166
167 if (ssocket == NULL)
168 return -ENOTSOCK; /* BB eventually add reconnect code here */
169 iov.iov_base = smb_buffer;
170 iov.iov_len = len;
171
172 smb_msg.msg_name = sin;
173 smb_msg.msg_namelen = sizeof(struct sockaddr);
174 smb_msg.msg_control = NULL;
175 smb_msg.msg_controllen = 0;
176 if (noblocksnd)
177 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
178 else
179 smb_msg.msg_flags = MSG_NOSIGNAL;
180
181 /* smb header is converted in header_assemble. bcc and rest of SMB word
182 area, and byte area if necessary, is converted to littleendian in
183 cifssmb.c and RFC1001 len is converted to bigendian in smb_send
184 Flags2 is converted in SendReceive */
185
186 smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
187 cFYI(1, ("Sending smb of length %d", smb_buf_length));
188 dump_smb(smb_buffer, len);
189
190 while (len > 0) {
191 rc = kernel_sendmsg(ssocket, &smb_msg, &iov, 1, len);
192 if ((rc == -ENOSPC) || (rc == -EAGAIN)) {
193 i++;
194 /* smaller timeout here than send2 since smaller size */
195 /* Although it may not be required, this also is smaller
196 oplock break time */
197 if (i > 12) {
198 cERROR(1,
199 ("sends on sock %p stuck for 7 seconds",
200 ssocket));
201 rc = -EAGAIN;
202 break;
203 }
204 msleep(1 << i);
205 continue;
206 }
207 if (rc < 0)
208 break;
209 else
210 i = 0; /* reset i after each successful send */
211 iov.iov_base += rc;
212 iov.iov_len -= rc;
213 len -= rc;
214 }
215
216 if (rc < 0) {
217 cERROR(1, ("Error %d sending data on socket to server", rc));
218 } else {
219 rc = 0;
220 }
221
222 /* Don't want to modify the buffer as a
223 side effect of this call. */
224 smb_buffer->smb_buf_length = smb_buf_length;
225
226 return rc;
227}
228
229static int 157static int
230smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec, 158smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
231 struct sockaddr *sin, bool noblocksnd)
232{ 159{
233 int rc = 0; 160 int rc = 0;
234 int i = 0; 161 int i = 0;
@@ -243,11 +170,11 @@ smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec,
243 if (ssocket == NULL) 170 if (ssocket == NULL)
244 return -ENOTSOCK; /* BB eventually add reconnect code here */ 171 return -ENOTSOCK; /* BB eventually add reconnect code here */
245 172
246 smb_msg.msg_name = sin; 173 smb_msg.msg_name = (struct sockaddr *) &server->addr.sockAddr;
247 smb_msg.msg_namelen = sizeof(struct sockaddr); 174 smb_msg.msg_namelen = sizeof(struct sockaddr);
248 smb_msg.msg_control = NULL; 175 smb_msg.msg_control = NULL;
249 smb_msg.msg_controllen = 0; 176 smb_msg.msg_controllen = 0;
250 if (noblocksnd) 177 if (server->noblocksnd)
251 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; 178 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
252 else 179 else
253 smb_msg.msg_flags = MSG_NOSIGNAL; 180 smb_msg.msg_flags = MSG_NOSIGNAL;
@@ -272,7 +199,25 @@ smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec,
272 n_vec - first_vec, total_len); 199 n_vec - first_vec, total_len);
273 if ((rc == -ENOSPC) || (rc == -EAGAIN)) { 200 if ((rc == -ENOSPC) || (rc == -EAGAIN)) {
274 i++; 201 i++;
275 if (i >= 14) { 202 /* if blocking send we try 3 times, since each can block
203 for 5 seconds. For nonblocking we have to try more
204 but wait increasing amounts of time allowing time for
205 socket to clear. The overall time we wait in either
206 case to send on the socket is about 15 seconds.
207 Similarly we wait for 15 seconds for
208 a response from the server in SendReceive[2]
209 for the server to send a response back for
210 most types of requests (except SMB Write
211 past end of file which can be slow, and
212 blocking lock operations). NFS waits slightly longer
213 than CIFS, but this can make it take longer for
214 nonresponsive servers to be detected and 15 seconds
215 is more than enough time for modern networks to
216 send a packet. In most cases if we fail to send
217 after the retries we will kill the socket and
218 reconnect which may clear the network problem.
219 */
220 if ((i >= 14) || (!server->noblocksnd && (i > 2))) {
276 cERROR(1, 221 cERROR(1,
277 ("sends on sock %p stuck for 15 seconds", 222 ("sends on sock %p stuck for 15 seconds",
278 ssocket)); 223 ssocket));
@@ -339,6 +284,18 @@ smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec,
339 return rc; 284 return rc;
340} 285}
341 286
287int
288smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
289 unsigned int smb_buf_length)
290{
291 struct kvec iov;
292
293 iov.iov_base = smb_buffer;
294 iov.iov_len = smb_buf_length + 4;
295
296 return smb_sendv(server, &iov, 1);
297}
298
342static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op) 299static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op)
343{ 300{
344 if (long_op == CIFS_ASYNC_OP) { 301 if (long_op == CIFS_ASYNC_OP) {
@@ -540,9 +497,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
540#ifdef CONFIG_CIFS_STATS2 497#ifdef CONFIG_CIFS_STATS2
541 atomic_inc(&ses->server->inSend); 498 atomic_inc(&ses->server->inSend);
542#endif 499#endif
543 rc = smb_send2(ses->server, iov, n_vec, 500 rc = smb_sendv(ses->server, iov, n_vec);
544 (struct sockaddr *) &(ses->server->addr.sockAddr),
545 ses->server->noblocksnd);
546#ifdef CONFIG_CIFS_STATS2 501#ifdef CONFIG_CIFS_STATS2
547 atomic_dec(&ses->server->inSend); 502 atomic_dec(&ses->server->inSend);
548 midQ->when_sent = jiffies; 503 midQ->when_sent = jiffies;
@@ -736,9 +691,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
736#ifdef CONFIG_CIFS_STATS2 691#ifdef CONFIG_CIFS_STATS2
737 atomic_inc(&ses->server->inSend); 692 atomic_inc(&ses->server->inSend);
738#endif 693#endif
739 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 694 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length);
740 (struct sockaddr *) &(ses->server->addr.sockAddr),
741 ses->server->noblocksnd);
742#ifdef CONFIG_CIFS_STATS2 695#ifdef CONFIG_CIFS_STATS2
743 atomic_dec(&ses->server->inSend); 696 atomic_dec(&ses->server->inSend);
744 midQ->when_sent = jiffies; 697 midQ->when_sent = jiffies;
@@ -879,9 +832,7 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf,
879 mutex_unlock(&ses->server->srv_mutex); 832 mutex_unlock(&ses->server->srv_mutex);
880 return rc; 833 return rc;
881 } 834 }
882 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 835 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length);
883 (struct sockaddr *) &(ses->server->addr.sockAddr),
884 ses->server->noblocksnd);
885 mutex_unlock(&ses->server->srv_mutex); 836 mutex_unlock(&ses->server->srv_mutex);
886 return rc; 837 return rc;
887} 838}
@@ -973,9 +924,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
973#ifdef CONFIG_CIFS_STATS2 924#ifdef CONFIG_CIFS_STATS2
974 atomic_inc(&ses->server->inSend); 925 atomic_inc(&ses->server->inSend);
975#endif 926#endif
976 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 927 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length);
977 (struct sockaddr *) &(ses->server->addr.sockAddr),
978 ses->server->noblocksnd);
979#ifdef CONFIG_CIFS_STATS2 928#ifdef CONFIG_CIFS_STATS2
980 atomic_dec(&ses->server->inSend); 929 atomic_dec(&ses->server->inSend);
981 midQ->when_sent = jiffies; 930 midQ->when_sent = jiffies;
diff --git a/fs/compat.c b/fs/compat.c
index 65a070e705ab..d0145ca27572 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1407,7 +1407,7 @@ int compat_do_execve(char * filename,
1407 bprm->cred = prepare_exec_creds(); 1407 bprm->cred = prepare_exec_creds();
1408 if (!bprm->cred) 1408 if (!bprm->cred)
1409 goto out_unlock; 1409 goto out_unlock;
1410 check_unsafe_exec(bprm); 1410 check_unsafe_exec(bprm, current->files);
1411 1411
1412 file = open_exec(filename); 1412 file = open_exec(filename);
1413 retval = PTR_ERR(file); 1413 retval = PTR_ERR(file);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 5235c67e7594..45e59d3c7f1f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -538,6 +538,7 @@ static int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long arg)
538 * cannot be fixed without breaking all existing apps. 538 * cannot be fixed without breaking all existing apps.
539 */ 539 */
540 case TUNSETIFF: 540 case TUNSETIFF:
541 case TUNGETIFF:
541 case SIOCGIFFLAGS: 542 case SIOCGIFFLAGS:
542 case SIOCGIFMETRIC: 543 case SIOCGIFMETRIC:
543 case SIOCGIFMTU: 544 case SIOCGIFMTU:
@@ -784,7 +785,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
784 785
785 if (copy_in_user(&sgio->status, &sgio32->status, 786 if (copy_in_user(&sgio->status, &sgio32->status,
786 (4 * sizeof(unsigned char)) + 787 (4 * sizeof(unsigned char)) +
787 (2 * sizeof(unsigned (short))) + 788 (2 * sizeof(unsigned short)) +
788 (3 * sizeof(int)))) 789 (3 * sizeof(int))))
789 return -EFAULT; 790 return -EFAULT;
790 791
@@ -1912,6 +1913,9 @@ COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */
1912/* 0x00 */ 1913/* 0x00 */
1913COMPATIBLE_IOCTL(FIBMAP) 1914COMPATIBLE_IOCTL(FIBMAP)
1914COMPATIBLE_IOCTL(FIGETBSZ) 1915COMPATIBLE_IOCTL(FIGETBSZ)
1916/* 'X' - originally XFS but some now in the VFS */
1917COMPATIBLE_IOCTL(FIFREEZE)
1918COMPATIBLE_IOCTL(FITHAW)
1915/* RAID */ 1919/* RAID */
1916COMPATIBLE_IOCTL(RAID_VERSION) 1920COMPATIBLE_IOCTL(RAID_VERSION)
1917COMPATIBLE_IOCTL(GET_ARRAY_INFO) 1921COMPATIBLE_IOCTL(GET_ARRAY_INFO)
@@ -1937,6 +1941,8 @@ ULONG_IOCTL(SET_BITMAP_FILE)
1937/* Big K */ 1941/* Big K */
1938COMPATIBLE_IOCTL(PIO_FONT) 1942COMPATIBLE_IOCTL(PIO_FONT)
1939COMPATIBLE_IOCTL(GIO_FONT) 1943COMPATIBLE_IOCTL(GIO_FONT)
1944COMPATIBLE_IOCTL(PIO_CMAP)
1945COMPATIBLE_IOCTL(GIO_CMAP)
1940ULONG_IOCTL(KDSIGACCEPT) 1946ULONG_IOCTL(KDSIGACCEPT)
1941COMPATIBLE_IOCTL(KDGETKEYCODE) 1947COMPATIBLE_IOCTL(KDGETKEYCODE)
1942COMPATIBLE_IOCTL(KDSETKEYCODE) 1948COMPATIBLE_IOCTL(KDSETKEYCODE)
@@ -1982,6 +1988,11 @@ COMPATIBLE_IOCTL(TUNSETNOCSUM)
1982COMPATIBLE_IOCTL(TUNSETDEBUG) 1988COMPATIBLE_IOCTL(TUNSETDEBUG)
1983COMPATIBLE_IOCTL(TUNSETPERSIST) 1989COMPATIBLE_IOCTL(TUNSETPERSIST)
1984COMPATIBLE_IOCTL(TUNSETOWNER) 1990COMPATIBLE_IOCTL(TUNSETOWNER)
1991COMPATIBLE_IOCTL(TUNSETLINK)
1992COMPATIBLE_IOCTL(TUNSETGROUP)
1993COMPATIBLE_IOCTL(TUNGETFEATURES)
1994COMPATIBLE_IOCTL(TUNSETOFFLOAD)
1995COMPATIBLE_IOCTL(TUNSETTXFILTER)
1985/* Big V */ 1996/* Big V */
1986COMPATIBLE_IOCTL(VT_SETMODE) 1997COMPATIBLE_IOCTL(VT_SETMODE)
1987COMPATIBLE_IOCTL(VT_GETMODE) 1998COMPATIBLE_IOCTL(VT_GETMODE)
@@ -2573,6 +2584,7 @@ HANDLE_IOCTL(SIOCGIFPFLAGS, dev_ifsioc)
2573HANDLE_IOCTL(SIOCGIFTXQLEN, dev_ifsioc) 2584HANDLE_IOCTL(SIOCGIFTXQLEN, dev_ifsioc)
2574HANDLE_IOCTL(SIOCSIFTXQLEN, dev_ifsioc) 2585HANDLE_IOCTL(SIOCSIFTXQLEN, dev_ifsioc)
2575HANDLE_IOCTL(TUNSETIFF, dev_ifsioc) 2586HANDLE_IOCTL(TUNSETIFF, dev_ifsioc)
2587HANDLE_IOCTL(TUNGETIFF, dev_ifsioc)
2576HANDLE_IOCTL(SIOCETHTOOL, ethtool_ioctl) 2588HANDLE_IOCTL(SIOCETHTOOL, ethtool_ioctl)
2577HANDLE_IOCTL(SIOCBONDENSLAVE, bond_ioctl) 2589HANDLE_IOCTL(SIOCBONDENSLAVE, bond_ioctl)
2578HANDLE_IOCTL(SIOCBONDRELEASE, bond_ioctl) 2590HANDLE_IOCTL(SIOCBONDRELEASE, bond_ioctl)
diff --git a/fs/dcache.c b/fs/dcache.c
index 937df0fb0da5..07e2d4a44bda 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1180,7 +1180,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1180 iput(inode); 1180 iput(inode);
1181 return res; 1181 return res;
1182} 1182}
1183EXPORT_SYMBOL_GPL(d_obtain_alias); 1183EXPORT_SYMBOL(d_obtain_alias);
1184 1184
1185/** 1185/**
1186 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1186 * d_splice_alias - splice a disconnected dentry into the tree if one exists
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c01e043670e2..f6caeb1d1106 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1716,7 +1716,7 @@ static int ecryptfs_copy_filename(char **copied_name, size_t *copied_name_size,
1716{ 1716{
1717 int rc = 0; 1717 int rc = 0;
1718 1718
1719 (*copied_name) = kmalloc((name_size + 2), GFP_KERNEL); 1719 (*copied_name) = kmalloc((name_size + 1), GFP_KERNEL);
1720 if (!(*copied_name)) { 1720 if (!(*copied_name)) {
1721 rc = -ENOMEM; 1721 rc = -ENOMEM;
1722 goto out; 1722 goto out;
@@ -1726,7 +1726,7 @@ static int ecryptfs_copy_filename(char **copied_name, size_t *copied_name_size,
1726 * in printing out the 1726 * in printing out the
1727 * string in debug 1727 * string in debug
1728 * messages */ 1728 * messages */
1729 (*copied_name_size) = (name_size + 1); 1729 (*copied_name_size) = name_size;
1730out: 1730out:
1731 return rc; 1731 return rc;
1732} 1732}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ba2f9ec71192..011b9b8c90c6 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -234,8 +234,6 @@ struct ep_pqueue {
234/* 234/*
235 * Configuration options available inside /proc/sys/fs/epoll/ 235 * Configuration options available inside /proc/sys/fs/epoll/
236 */ 236 */
237/* Maximum number of epoll devices, per user */
238static int max_user_instances __read_mostly;
239/* Maximum number of epoll watched descriptors, per user */ 237/* Maximum number of epoll watched descriptors, per user */
240static int max_user_watches __read_mostly; 238static int max_user_watches __read_mostly;
241 239
@@ -261,14 +259,6 @@ static int zero;
261 259
262ctl_table epoll_table[] = { 260ctl_table epoll_table[] = {
263 { 261 {
264 .procname = "max_user_instances",
265 .data = &max_user_instances,
266 .maxlen = sizeof(int),
267 .mode = 0644,
268 .proc_handler = &proc_dointvec_minmax,
269 .extra1 = &zero,
270 },
271 {
272 .procname = "max_user_watches", 262 .procname = "max_user_watches",
273 .data = &max_user_watches, 263 .data = &max_user_watches,
274 .maxlen = sizeof(int), 264 .maxlen = sizeof(int),
@@ -491,7 +481,6 @@ static void ep_free(struct eventpoll *ep)
491 481
492 mutex_unlock(&epmutex); 482 mutex_unlock(&epmutex);
493 mutex_destroy(&ep->mtx); 483 mutex_destroy(&ep->mtx);
494 atomic_dec(&ep->user->epoll_devs);
495 free_uid(ep->user); 484 free_uid(ep->user);
496 kfree(ep); 485 kfree(ep);
497} 486}
@@ -581,10 +570,6 @@ static int ep_alloc(struct eventpoll **pep)
581 struct eventpoll *ep; 570 struct eventpoll *ep;
582 571
583 user = get_current_user(); 572 user = get_current_user();
584 error = -EMFILE;
585 if (unlikely(atomic_read(&user->epoll_devs) >=
586 max_user_instances))
587 goto free_uid;
588 error = -ENOMEM; 573 error = -ENOMEM;
589 ep = kzalloc(sizeof(*ep), GFP_KERNEL); 574 ep = kzalloc(sizeof(*ep), GFP_KERNEL);
590 if (unlikely(!ep)) 575 if (unlikely(!ep))
@@ -1141,7 +1126,6 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
1141 flags & O_CLOEXEC); 1126 flags & O_CLOEXEC);
1142 if (fd < 0) 1127 if (fd < 0)
1143 ep_free(ep); 1128 ep_free(ep);
1144 atomic_inc(&ep->user->epoll_devs);
1145 1129
1146error_return: 1130error_return:
1147 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1131 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
@@ -1366,8 +1350,10 @@ static int __init eventpoll_init(void)
1366 struct sysinfo si; 1350 struct sysinfo si;
1367 1351
1368 si_meminfo(&si); 1352 si_meminfo(&si);
1369 max_user_instances = 128; 1353 /*
1370 max_user_watches = (((si.totalram - si.totalhigh) / 32) << PAGE_SHIFT) / 1354 * Allows top 4% of lomem to be allocated for epoll watches (per user).
1355 */
1356 max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) /
1371 EP_ITEM_COST; 1357 EP_ITEM_COST;
1372 1358
1373 /* Initialize the structure used to perform safe poll wait head wake ups */ 1359 /* Initialize the structure used to perform safe poll wait head wake ups */
diff --git a/fs/exec.c b/fs/exec.c
index 0dd60a01f1b4..929b58004b7e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1049,16 +1049,32 @@ EXPORT_SYMBOL(install_exec_creds);
1049 * - the caller must hold current->cred_exec_mutex to protect against 1049 * - the caller must hold current->cred_exec_mutex to protect against
1050 * PTRACE_ATTACH 1050 * PTRACE_ATTACH
1051 */ 1051 */
1052void check_unsafe_exec(struct linux_binprm *bprm) 1052void check_unsafe_exec(struct linux_binprm *bprm, struct files_struct *files)
1053{ 1053{
1054 struct task_struct *p = current; 1054 struct task_struct *p = current, *t;
1055 unsigned long flags;
1056 unsigned n_fs, n_files, n_sighand;
1055 1057
1056 bprm->unsafe = tracehook_unsafe_exec(p); 1058 bprm->unsafe = tracehook_unsafe_exec(p);
1057 1059
1058 if (atomic_read(&p->fs->count) > 1 || 1060 n_fs = 1;
1059 atomic_read(&p->files->count) > 1 || 1061 n_files = 1;
1060 atomic_read(&p->sighand->count) > 1) 1062 n_sighand = 1;
1063 lock_task_sighand(p, &flags);
1064 for (t = next_thread(p); t != p; t = next_thread(t)) {
1065 if (t->fs == p->fs)
1066 n_fs++;
1067 if (t->files == files)
1068 n_files++;
1069 n_sighand++;
1070 }
1071
1072 if (atomic_read(&p->fs->count) > n_fs ||
1073 atomic_read(&p->files->count) > n_files ||
1074 atomic_read(&p->sighand->count) > n_sighand)
1061 bprm->unsafe |= LSM_UNSAFE_SHARE; 1075 bprm->unsafe |= LSM_UNSAFE_SHARE;
1076
1077 unlock_task_sighand(p, &flags);
1062} 1078}
1063 1079
1064/* 1080/*
@@ -1273,7 +1289,7 @@ int do_execve(char * filename,
1273 bprm->cred = prepare_exec_creds(); 1289 bprm->cred = prepare_exec_creds();
1274 if (!bprm->cred) 1290 if (!bprm->cred)
1275 goto out_unlock; 1291 goto out_unlock;
1276 check_unsafe_exec(bprm); 1292 check_unsafe_exec(bprm, displaced);
1277 1293
1278 file = open_exec(filename); 1294 file = open_exec(filename);
1279 retval = PTR_ERR(file); 1295 retval = PTR_ERR(file);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index da8bdeaa2e6d..7c6e3606f0ec 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1185,9 +1185,12 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1185 es = sbi->s_es; 1185 es = sbi->s_es;
1186 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != 1186 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
1187 (old_mount_opt & EXT2_MOUNT_XIP)) && 1187 (old_mount_opt & EXT2_MOUNT_XIP)) &&
1188 invalidate_inodes(sb)) 1188 invalidate_inodes(sb)) {
1189 ext2_warning(sb, __func__, "busy inodes while remounting "\ 1189 ext2_warning(sb, __func__, "refusing change of xip flag "
1190 "xip remain in cache (no functional problem)"); 1190 "with busy inodes while remounting");
1191 sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
1192 sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
1193 }
1191 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 1194 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1192 return 0; 1195 return 0;
1193 if (*flags & MS_RDONLY) { 1196 if (*flags & MS_RDONLY) {
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 69a3d19ca9fd..4db4ffa1edad 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1358,7 +1358,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1358 struct fake_dirent *fde; 1358 struct fake_dirent *fde;
1359 1359
1360 blocksize = dir->i_sb->s_blocksize; 1360 blocksize = dir->i_sb->s_blocksize;
1361 dxtrace(printk("Creating index\n")); 1361 dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
1362 retval = ext3_journal_get_write_access(handle, bh); 1362 retval = ext3_journal_get_write_access(handle, bh);
1363 if (retval) { 1363 if (retval) {
1364 ext3_std_error(dir->i_sb, retval); 1364 ext3_std_error(dir->i_sb, retval);
@@ -1367,6 +1367,19 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1367 } 1367 }
1368 root = (struct dx_root *) bh->b_data; 1368 root = (struct dx_root *) bh->b_data;
1369 1369
1370 /* The 0th block becomes the root, move the dirents out */
1371 fde = &root->dotdot;
1372 de = (struct ext3_dir_entry_2 *)((char *)fde +
1373 ext3_rec_len_from_disk(fde->rec_len));
1374 if ((char *) de >= (((char *) root) + blocksize)) {
1375 ext3_error(dir->i_sb, __func__,
1376 "invalid rec_len for '..' in inode %lu",
1377 dir->i_ino);
1378 brelse(bh);
1379 return -EIO;
1380 }
1381 len = ((char *) root) + blocksize - (char *) de;
1382
1370 bh2 = ext3_append (handle, dir, &block, &retval); 1383 bh2 = ext3_append (handle, dir, &block, &retval);
1371 if (!(bh2)) { 1384 if (!(bh2)) {
1372 brelse(bh); 1385 brelse(bh);
@@ -1375,11 +1388,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1375 EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; 1388 EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
1376 data1 = bh2->b_data; 1389 data1 = bh2->b_data;
1377 1390
1378 /* The 0th block becomes the root, move the dirents out */
1379 fde = &root->dotdot;
1380 de = (struct ext3_dir_entry_2 *)((char *)fde +
1381 ext3_rec_len_from_disk(fde->rec_len));
1382 len = ((char *) root) + blocksize - (char *) de;
1383 memcpy (data1, de, len); 1391 memcpy (data1, de, len);
1384 de = (struct ext3_dir_entry_2 *) data1; 1392 de = (struct ext3_dir_entry_2 *) data1;
1385 top = data1 + len; 1393 top = data1 + len;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b70d90e08a3c..4a970411a458 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2428,12 +2428,13 @@ static void ext3_write_super (struct super_block * sb)
2428 2428
2429static int ext3_sync_fs(struct super_block *sb, int wait) 2429static int ext3_sync_fs(struct super_block *sb, int wait)
2430{ 2430{
2431 sb->s_dirt = 0; 2431 tid_t target;
2432 if (wait)
2433 ext3_force_commit(sb);
2434 else
2435 journal_start_commit(EXT3_SB(sb)->s_journal, NULL);
2436 2432
2433 sb->s_dirt = 0;
2434 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2435 if (wait)
2436 log_wait_commit(EXT3_SB(sb)->s_journal, target);
2437 }
2437 return 0; 2438 return 0;
2438} 2439}
2439 2440
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6bba06b09dd1..de9459b4cb94 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -609,7 +609,9 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
609 */ 609 */
610int ext4_should_retry_alloc(struct super_block *sb, int *retries) 610int ext4_should_retry_alloc(struct super_block *sb, int *retries)
611{ 611{
612 if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3) 612 if (!ext4_has_free_blocks(EXT4_SB(sb), 1) ||
613 (*retries)++ > 3 ||
614 !EXT4_SB(sb)->s_journal)
613 return 0; 615 return 0;
614 616
615 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 617 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
@@ -684,15 +686,15 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
684 gdp = ext4_get_group_desc(sb, i, NULL); 686 gdp = ext4_get_group_desc(sb, i, NULL);
685 if (!gdp) 687 if (!gdp)
686 continue; 688 continue;
687 desc_count += le16_to_cpu(gdp->bg_free_blocks_count); 689 desc_count += ext4_free_blks_count(sb, gdp);
688 brelse(bitmap_bh); 690 brelse(bitmap_bh);
689 bitmap_bh = ext4_read_block_bitmap(sb, i); 691 bitmap_bh = ext4_read_block_bitmap(sb, i);
690 if (bitmap_bh == NULL) 692 if (bitmap_bh == NULL)
691 continue; 693 continue;
692 694
693 x = ext4_count_free(bitmap_bh, sb->s_blocksize); 695 x = ext4_count_free(bitmap_bh, sb->s_blocksize);
694 printk(KERN_DEBUG "group %lu: stored = %d, counted = %u\n", 696 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
695 i, le16_to_cpu(gdp->bg_free_blocks_count), x); 697 i, ext4_free_blks_count(sb, gdp), x);
696 bitmap_count += x; 698 bitmap_count += x;
697 } 699 }
698 brelse(bitmap_bh); 700 brelse(bitmap_bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c668e4377d76..b0c87dce66a3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -868,7 +868,7 @@ static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
868{ 868{
869 unsigned len = le16_to_cpu(dlen); 869 unsigned len = le16_to_cpu(dlen);
870 870
871 if (len == EXT4_MAX_REC_LEN) 871 if (len == EXT4_MAX_REC_LEN || len == 0)
872 return 1 << 16; 872 return 1 << 16;
873 return len; 873 return len;
874} 874}
@@ -1206,8 +1206,11 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
1206 1206
1207static inline loff_t ext4_isize(struct ext4_inode *raw_inode) 1207static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
1208{ 1208{
1209 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | 1209 if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
1210 le32_to_cpu(raw_inode->i_size_lo); 1210 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
1211 le32_to_cpu(raw_inode->i_size_lo);
1212 else
1213 return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
1211} 1214}
1212 1215
1213static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) 1216static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 54bf0623a9ae..e2eab196875f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3048,7 +3048,7 @@ retry:
3048 WARN_ON(ret <= 0); 3048 WARN_ON(ret <= 0);
3049 printk(KERN_ERR "%s: ext4_ext_get_blocks " 3049 printk(KERN_ERR "%s: ext4_ext_get_blocks "
3050 "returned error inode#%lu, block=%u, " 3050 "returned error inode#%lu, block=%u, "
3051 "max_blocks=%lu", __func__, 3051 "max_blocks=%u", __func__,
3052 inode->i_ino, block, max_blocks); 3052 inode->i_ino, block, max_blocks);
3053#endif 3053#endif
3054 ext4_mark_inode_dirty(handle, inode); 3054 ext4_mark_inode_dirty(handle, inode);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4fb86a0061d0..627f8c3337a3 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -188,7 +188,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
188 struct ext4_group_desc *gdp; 188 struct ext4_group_desc *gdp;
189 struct ext4_super_block *es; 189 struct ext4_super_block *es;
190 struct ext4_sb_info *sbi; 190 struct ext4_sb_info *sbi;
191 int fatal = 0, err, count; 191 int fatal = 0, err, count, cleared;
192 ext4_group_t flex_group; 192 ext4_group_t flex_group;
193 193
194 if (atomic_read(&inode->i_count) > 1) { 194 if (atomic_read(&inode->i_count) > 1) {
@@ -248,8 +248,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
248 goto error_return; 248 goto error_return;
249 249
250 /* Ok, now we can actually update the inode bitmaps.. */ 250 /* Ok, now we can actually update the inode bitmaps.. */
251 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 251 spin_lock(sb_bgl_lock(sbi, block_group));
252 bit, bitmap_bh->b_data)) 252 cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
253 spin_unlock(sb_bgl_lock(sbi, block_group));
254 if (!cleared)
253 ext4_error(sb, "ext4_free_inode", 255 ext4_error(sb, "ext4_free_inode",
254 "bit already cleared for inode %lu", ino); 256 "bit already cleared for inode %lu", ino);
255 else { 257 else {
@@ -715,6 +717,13 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
715 717
716 if (sbi->s_log_groups_per_flex) { 718 if (sbi->s_log_groups_per_flex) {
717 ret2 = find_group_flex(sb, dir, &group); 719 ret2 = find_group_flex(sb, dir, &group);
720 if (ret2 == -1) {
721 ret2 = find_group_other(sb, dir, &group);
722 if (ret2 == 0 && printk_ratelimit())
723 printk(KERN_NOTICE "ext4: find_group_flex "
724 "failed, fallback succeeded dir %lu\n",
725 dir->i_ino);
726 }
718 goto got_group; 727 goto got_group;
719 } 728 }
720 729
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a6444cee0c7e..c7fed5b18745 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -47,8 +47,10 @@
47static inline int ext4_begin_ordered_truncate(struct inode *inode, 47static inline int ext4_begin_ordered_truncate(struct inode *inode,
48 loff_t new_size) 48 loff_t new_size)
49{ 49{
50 return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, 50 return jbd2_journal_begin_ordered_truncate(
51 new_size); 51 EXT4_SB(inode->i_sb)->s_journal,
52 &EXT4_I(inode)->jinode,
53 new_size);
52} 54}
53 55
54static void ext4_invalidatepage(struct page *page, unsigned long offset); 56static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -360,9 +362,9 @@ static int ext4_block_to_path(struct inode *inode,
360 final = ptrs; 362 final = ptrs;
361 } else { 363 } else {
362 ext4_warning(inode->i_sb, "ext4_block_to_path", 364 ext4_warning(inode->i_sb, "ext4_block_to_path",
363 "block %lu > max", 365 "block %lu > max in inode %lu",
364 i_block + direct_blocks + 366 i_block + direct_blocks +
365 indirect_blocks + double_blocks); 367 indirect_blocks + double_blocks, inode->i_ino);
366 } 368 }
367 if (boundary) 369 if (boundary)
368 *boundary = final - 1 - (i_block & (ptrs - 1)); 370 *boundary = final - 1 - (i_block & (ptrs - 1));
@@ -1366,6 +1368,10 @@ retry:
1366 goto out; 1368 goto out;
1367 } 1369 }
1368 1370
1371 /* We cannot recurse into the filesystem as the transaction is already
1372 * started */
1373 flags |= AOP_FLAG_NOFS;
1374
1369 page = grab_cache_page_write_begin(mapping, index, flags); 1375 page = grab_cache_page_write_begin(mapping, index, flags);
1370 if (!page) { 1376 if (!page) {
1371 ext4_journal_stop(handle); 1377 ext4_journal_stop(handle);
@@ -1375,7 +1381,7 @@ retry:
1375 *pagep = page; 1381 *pagep = page;
1376 1382
1377 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1383 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
1378 ext4_get_block); 1384 ext4_get_block);
1379 1385
1380 if (!ret && ext4_should_journal_data(inode)) { 1386 if (!ret && ext4_should_journal_data(inode)) {
1381 ret = walk_page_buffers(handle, page_buffers(page), 1387 ret = walk_page_buffers(handle, page_buffers(page),
@@ -2437,6 +2443,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2437 int no_nrwrite_index_update; 2443 int no_nrwrite_index_update;
2438 int pages_written = 0; 2444 int pages_written = 0;
2439 long pages_skipped; 2445 long pages_skipped;
2446 int range_cyclic, cycled = 1, io_done = 0;
2440 int needed_blocks, ret = 0, nr_to_writebump = 0; 2447 int needed_blocks, ret = 0, nr_to_writebump = 0;
2441 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2448 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2442 2449
@@ -2488,9 +2495,15 @@ static int ext4_da_writepages(struct address_space *mapping,
2488 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 2495 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2489 range_whole = 1; 2496 range_whole = 1;
2490 2497
2491 if (wbc->range_cyclic) 2498 range_cyclic = wbc->range_cyclic;
2499 if (wbc->range_cyclic) {
2492 index = mapping->writeback_index; 2500 index = mapping->writeback_index;
2493 else 2501 if (index)
2502 cycled = 0;
2503 wbc->range_start = index << PAGE_CACHE_SHIFT;
2504 wbc->range_end = LLONG_MAX;
2505 wbc->range_cyclic = 0;
2506 } else
2494 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2507 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2495 2508
2496 mpd.wbc = wbc; 2509 mpd.wbc = wbc;
@@ -2504,6 +2517,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2504 wbc->no_nrwrite_index_update = 1; 2517 wbc->no_nrwrite_index_update = 1;
2505 pages_skipped = wbc->pages_skipped; 2518 pages_skipped = wbc->pages_skipped;
2506 2519
2520retry:
2507 while (!ret && wbc->nr_to_write > 0) { 2521 while (!ret && wbc->nr_to_write > 0) {
2508 2522
2509 /* 2523 /*
@@ -2530,7 +2544,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2530 2544
2531 ext4_journal_stop(handle); 2545 ext4_journal_stop(handle);
2532 2546
2533 if (mpd.retval == -ENOSPC) { 2547 if ((mpd.retval == -ENOSPC) && sbi->s_journal) {
2534 /* commit the transaction which would 2548 /* commit the transaction which would
2535 * free blocks released in the transaction 2549 * free blocks released in the transaction
2536 * and try again 2550 * and try again
@@ -2546,6 +2560,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2546 pages_written += mpd.pages_written; 2560 pages_written += mpd.pages_written;
2547 wbc->pages_skipped = pages_skipped; 2561 wbc->pages_skipped = pages_skipped;
2548 ret = 0; 2562 ret = 0;
2563 io_done = 1;
2549 } else if (wbc->nr_to_write) 2564 } else if (wbc->nr_to_write)
2550 /* 2565 /*
2551 * There is no more writeout needed 2566 * There is no more writeout needed
@@ -2554,6 +2569,13 @@ static int ext4_da_writepages(struct address_space *mapping,
2554 */ 2569 */
2555 break; 2570 break;
2556 } 2571 }
2572 if (!io_done && !cycled) {
2573 cycled = 1;
2574 index = 0;
2575 wbc->range_start = index << PAGE_CACHE_SHIFT;
2576 wbc->range_end = mapping->writeback_index - 1;
2577 goto retry;
2578 }
2557 if (pages_skipped != wbc->pages_skipped) 2579 if (pages_skipped != wbc->pages_skipped)
2558 printk(KERN_EMERG "This should not happen leaving %s " 2580 printk(KERN_EMERG "This should not happen leaving %s "
2559 "with nr_to_write = %ld ret = %d\n", 2581 "with nr_to_write = %ld ret = %d\n",
@@ -2561,6 +2583,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2561 2583
2562 /* Update index */ 2584 /* Update index */
2563 index += pages_written; 2585 index += pages_written;
2586 wbc->range_cyclic = range_cyclic;
2564 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 2587 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2565 /* 2588 /*
2566 * set the writeback_index so that range_cyclic 2589 * set the writeback_index so that range_cyclic
@@ -2648,6 +2671,9 @@ retry:
2648 ret = PTR_ERR(handle); 2671 ret = PTR_ERR(handle);
2649 goto out; 2672 goto out;
2650 } 2673 }
2674 /* We cannot recurse into the filesystem as the transaction is already
2675 * started */
2676 flags |= AOP_FLAG_NOFS;
2651 2677
2652 page = grab_cache_page_write_begin(mapping, index, flags); 2678 page = grab_cache_page_write_begin(mapping, index, flags);
2653 if (!page) { 2679 if (!page) {
@@ -2821,9 +2847,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
2821 filemap_write_and_wait(mapping); 2847 filemap_write_and_wait(mapping);
2822 } 2848 }
2823 2849
2824 BUG_ON(!EXT4_JOURNAL(inode) &&
2825 EXT4_I(inode)->i_state & EXT4_STATE_JDATA);
2826
2827 if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { 2850 if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
2828 /* 2851 /*
2829 * This is a REALLY heavyweight approach, but the use of 2852 * This is a REALLY heavyweight approach, but the use of
@@ -3622,7 +3645,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
3622 * block pointed to itself, it would have been detached when 3645 * block pointed to itself, it would have been detached when
3623 * the block was cleared. Check for this instead of OOPSing. 3646 * the block was cleared. Check for this instead of OOPSing.
3624 */ 3647 */
3625 if (bh2jh(this_bh)) 3648 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
3626 ext4_handle_dirty_metadata(handle, inode, this_bh); 3649 ext4_handle_dirty_metadata(handle, inode, this_bh);
3627 else 3650 else
3628 ext4_error(inode->i_sb, __func__, 3651 ext4_error(inode->i_sb, __func__,
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 918aec0c8a11..4415beeb0b62 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3025,7 +3025,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3025 goto out_err; 3025 goto out_err;
3026 3026
3027 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, 3027 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
3028 gdp->bg_free_blocks_count); 3028 ext4_free_blks_count(sb, gdp));
3029 3029
3030 err = ext4_journal_get_write_access(handle, gdp_bh); 3030 err = ext4_journal_get_write_access(handle, gdp_bh);
3031 if (err) 3031 if (err)
@@ -3693,6 +3693,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3693 pa->pa_free = pa->pa_len; 3693 pa->pa_free = pa->pa_len;
3694 atomic_set(&pa->pa_count, 1); 3694 atomic_set(&pa->pa_count, 1);
3695 spin_lock_init(&pa->pa_lock); 3695 spin_lock_init(&pa->pa_lock);
3696 INIT_LIST_HEAD(&pa->pa_inode_list);
3697 INIT_LIST_HEAD(&pa->pa_group_list);
3696 pa->pa_deleted = 0; 3698 pa->pa_deleted = 0;
3697 pa->pa_linear = 0; 3699 pa->pa_linear = 0;
3698 3700
@@ -3755,6 +3757,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3755 atomic_set(&pa->pa_count, 1); 3757 atomic_set(&pa->pa_count, 1);
3756 spin_lock_init(&pa->pa_lock); 3758 spin_lock_init(&pa->pa_lock);
3757 INIT_LIST_HEAD(&pa->pa_inode_list); 3759 INIT_LIST_HEAD(&pa->pa_inode_list);
3760 INIT_LIST_HEAD(&pa->pa_group_list);
3758 pa->pa_deleted = 0; 3761 pa->pa_deleted = 0;
3759 pa->pa_linear = 1; 3762 pa->pa_linear = 1;
3760 3763
@@ -4476,23 +4479,26 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4476 pa->pa_free -= ac->ac_b_ex.fe_len; 4479 pa->pa_free -= ac->ac_b_ex.fe_len;
4477 pa->pa_len -= ac->ac_b_ex.fe_len; 4480 pa->pa_len -= ac->ac_b_ex.fe_len;
4478 spin_unlock(&pa->pa_lock); 4481 spin_unlock(&pa->pa_lock);
4479 /*
4480 * We want to add the pa to the right bucket.
4481 * Remove it from the list and while adding
4482 * make sure the list to which we are adding
4483 * doesn't grow big.
4484 */
4485 if (likely(pa->pa_free)) {
4486 spin_lock(pa->pa_obj_lock);
4487 list_del_rcu(&pa->pa_inode_list);
4488 spin_unlock(pa->pa_obj_lock);
4489 ext4_mb_add_n_trim(ac);
4490 }
4491 } 4482 }
4492 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4493 } 4483 }
4494 if (ac->alloc_semp) 4484 if (ac->alloc_semp)
4495 up_read(ac->alloc_semp); 4485 up_read(ac->alloc_semp);
4486 if (pa) {
4487 /*
4488 * We want to add the pa to the right bucket.
4489 * Remove it from the list and while adding
4490 * make sure the list to which we are adding
4491 * doesn't grow big. We need to release
4492 * alloc_semp before calling ext4_mb_add_n_trim()
4493 */
4494 if (pa->pa_linear && likely(pa->pa_free)) {
4495 spin_lock(pa->pa_obj_lock);
4496 list_del_rcu(&pa->pa_inode_list);
4497 spin_unlock(pa->pa_obj_lock);
4498 ext4_mb_add_n_trim(ac);
4499 }
4500 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4501 }
4496 if (ac->ac_bitmap_page) 4502 if (ac->ac_bitmap_page)
4497 page_cache_release(ac->ac_bitmap_page); 4503 page_cache_release(ac->ac_bitmap_page);
4498 if (ac->ac_buddy_page) 4504 if (ac->ac_buddy_page)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 734abca25e35..fe64d9f79852 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -481,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode)
481 + 1); 481 + 1);
482 if (IS_ERR(handle)) { 482 if (IS_ERR(handle)) {
483 retval = PTR_ERR(handle); 483 retval = PTR_ERR(handle);
484 goto err_out; 484 return retval;
485 } 485 }
486 tmp_inode = ext4_new_inode(handle, 486 tmp_inode = ext4_new_inode(handle,
487 inode->i_sb->s_root->d_inode, 487 inode->i_sb->s_root->d_inode,
@@ -489,8 +489,7 @@ int ext4_ext_migrate(struct inode *inode)
489 if (IS_ERR(tmp_inode)) { 489 if (IS_ERR(tmp_inode)) {
490 retval = -ENOMEM; 490 retval = -ENOMEM;
491 ext4_journal_stop(handle); 491 ext4_journal_stop(handle);
492 tmp_inode = NULL; 492 return retval;
493 goto err_out;
494 } 493 }
495 i_size_write(tmp_inode, i_size_read(inode)); 494 i_size_write(tmp_inode, i_size_read(inode));
496 /* 495 /*
@@ -618,8 +617,7 @@ err_out:
618 617
619 ext4_journal_stop(handle); 618 ext4_journal_stop(handle);
620 619
621 if (tmp_inode) 620 iput(tmp_inode);
622 iput(tmp_inode);
623 621
624 return retval; 622 return retval;
625} 623}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index fec0b4c2f5f1..ba702bd7910d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1368,7 +1368,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1368 struct fake_dirent *fde; 1368 struct fake_dirent *fde;
1369 1369
1370 blocksize = dir->i_sb->s_blocksize; 1370 blocksize = dir->i_sb->s_blocksize;
1371 dxtrace(printk(KERN_DEBUG "Creating index\n")); 1371 dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
1372 retval = ext4_journal_get_write_access(handle, bh); 1372 retval = ext4_journal_get_write_access(handle, bh);
1373 if (retval) { 1373 if (retval) {
1374 ext4_std_error(dir->i_sb, retval); 1374 ext4_std_error(dir->i_sb, retval);
@@ -1377,6 +1377,20 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1377 } 1377 }
1378 root = (struct dx_root *) bh->b_data; 1378 root = (struct dx_root *) bh->b_data;
1379 1379
1380 /* The 0th block becomes the root, move the dirents out */
1381 fde = &root->dotdot;
1382 de = (struct ext4_dir_entry_2 *)((char *)fde +
1383 ext4_rec_len_from_disk(fde->rec_len));
1384 if ((char *) de >= (((char *) root) + blocksize)) {
1385 ext4_error(dir->i_sb, __func__,
1386 "invalid rec_len for '..' in inode %lu",
1387 dir->i_ino);
1388 brelse(bh);
1389 return -EIO;
1390 }
1391 len = ((char *) root) + blocksize - (char *) de;
1392
1393 /* Allocate new block for the 0th block's dirents */
1380 bh2 = ext4_append(handle, dir, &block, &retval); 1394 bh2 = ext4_append(handle, dir, &block, &retval);
1381 if (!(bh2)) { 1395 if (!(bh2)) {
1382 brelse(bh); 1396 brelse(bh);
@@ -1385,11 +1399,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1385 EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; 1399 EXT4_I(dir)->i_flags |= EXT4_INDEX_FL;
1386 data1 = bh2->b_data; 1400 data1 = bh2->b_data;
1387 1401
1388 /* The 0th block becomes the root, move the dirents out */
1389 fde = &root->dotdot;
1390 de = (struct ext4_dir_entry_2 *)((char *)fde +
1391 ext4_rec_len_from_disk(fde->rec_len));
1392 len = ((char *) root) + blocksize - (char *) de;
1393 memcpy (data1, de, len); 1402 memcpy (data1, de, len);
1394 de = (struct ext4_dir_entry_2 *) data1; 1403 de = (struct ext4_dir_entry_2 *) data1;
1395 top = data1 + len; 1404 top = data1 + len;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c328be5d6885..c06886abd658 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -861,12 +861,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
861 gdp = (struct ext4_group_desc *)((char *)primary->b_data + 861 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
862 gdb_off * EXT4_DESC_SIZE(sb)); 862 gdb_off * EXT4_DESC_SIZE(sb));
863 863
864 memset(gdp, 0, EXT4_DESC_SIZE(sb));
864 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ 865 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
865 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ 866 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
866 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ 867 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
867 ext4_free_blks_set(sb, gdp, input->free_blocks_count); 868 ext4_free_blks_set(sb, gdp, input->free_blocks_count);
868 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 869 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
869 gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); 870 gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
870 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); 871 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
871 872
872 /* 873 /*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e5f06a5f045e..39d1993cfa13 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3046,14 +3046,17 @@ static void ext4_write_super(struct super_block *sb)
3046static int ext4_sync_fs(struct super_block *sb, int wait) 3046static int ext4_sync_fs(struct super_block *sb, int wait)
3047{ 3047{
3048 int ret = 0; 3048 int ret = 0;
3049 tid_t target;
3049 3050
3050 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3051 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3051 sb->s_dirt = 0; 3052 sb->s_dirt = 0;
3052 if (EXT4_SB(sb)->s_journal) { 3053 if (EXT4_SB(sb)->s_journal) {
3053 if (wait) 3054 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
3054 ret = ext4_force_commit(sb); 3055 &target)) {
3055 else 3056 if (wait)
3056 jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); 3057 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
3058 target);
3059 }
3057 } else { 3060 } else {
3058 ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); 3061 ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
3059 } 3062 }
@@ -3088,7 +3091,6 @@ static int ext4_freeze(struct super_block *sb)
3088 3091
3089 /* Journal blocked and flushed, clear needs_recovery flag. */ 3092 /* Journal blocked and flushed, clear needs_recovery flag. */
3090 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3093 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3091 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3092 error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3094 error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3093 if (error) 3095 if (error)
3094 goto out; 3096 goto out;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6903d37af037..9b800d97a687 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -108,7 +108,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
108 108
109 if (hugetlb_reserve_pages(inode, 109 if (hugetlb_reserve_pages(inode,
110 vma->vm_pgoff >> huge_page_order(h), 110 vma->vm_pgoff >> huge_page_order(h),
111 len >> huge_page_shift(h), vma)) 111 len >> huge_page_shift(h), vma,
112 vma->vm_flags))
112 goto out; 113 goto out;
113 114
114 ret = 0; 115 ret = 0;
@@ -947,7 +948,7 @@ static int can_do_hugetlb_shm(void)
947 can_do_mlock()); 948 can_do_mlock());
948} 949}
949 950
950struct file *hugetlb_file_setup(const char *name, size_t size) 951struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag)
951{ 952{
952 int error = -ENOMEM; 953 int error = -ENOMEM;
953 struct file *file; 954 struct file *file;
@@ -981,7 +982,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
981 982
982 error = -ENOMEM; 983 error = -ENOMEM;
983 if (hugetlb_reserve_pages(inode, 0, 984 if (hugetlb_reserve_pages(inode, 0,
984 size >> huge_page_shift(hstate_inode(inode)), NULL)) 985 size >> huge_page_shift(hstate_inode(inode)), NULL,
986 acctflag))
985 goto out_inode; 987 goto out_inode;
986 988
987 d_instantiate(dentry, inode); 989 d_instantiate(dentry, inode);
diff --git a/fs/internal.h b/fs/internal.h
index 53af885f1732..0d8ac497b3d5 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -43,7 +43,7 @@ extern void __init chrdev_init(void);
43/* 43/*
44 * exec.c 44 * exec.c
45 */ 45 */
46extern void check_unsafe_exec(struct linux_binprm *); 46extern void check_unsafe_exec(struct linux_binprm *, struct files_struct *);
47 47
48/* 48/*
49 * namespace.c 49 * namespace.c
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 9e4fa52d7dc8..e79c07812afa 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -427,7 +427,7 @@ int __log_space_left(journal_t *journal)
427} 427}
428 428
429/* 429/*
430 * Called under j_state_lock. Returns true if a transaction was started. 430 * Called under j_state_lock. Returns true if a transaction commit was started.
431 */ 431 */
432int __log_start_commit(journal_t *journal, tid_t target) 432int __log_start_commit(journal_t *journal, tid_t target)
433{ 433{
@@ -495,7 +495,8 @@ int journal_force_commit_nested(journal_t *journal)
495 495
496/* 496/*
497 * Start a commit of the current running transaction (if any). Returns true 497 * Start a commit of the current running transaction (if any). Returns true
498 * if a transaction was started, and fills its tid in at *ptid 498 * if a transaction is going to be committed (or is currently already
499 * committing), and fills its tid in at *ptid
499 */ 500 */
500int journal_start_commit(journal_t *journal, tid_t *ptid) 501int journal_start_commit(journal_t *journal, tid_t *ptid)
501{ 502{
@@ -505,15 +506,19 @@ int journal_start_commit(journal_t *journal, tid_t *ptid)
505 if (journal->j_running_transaction) { 506 if (journal->j_running_transaction) {
506 tid_t tid = journal->j_running_transaction->t_tid; 507 tid_t tid = journal->j_running_transaction->t_tid;
507 508
508 ret = __log_start_commit(journal, tid); 509 __log_start_commit(journal, tid);
509 if (ret && ptid) 510 /* There's a running transaction and we've just made sure
511 * it's commit has been scheduled. */
512 if (ptid)
510 *ptid = tid; 513 *ptid = tid;
511 } else if (journal->j_committing_transaction && ptid) { 514 ret = 1;
515 } else if (journal->j_committing_transaction) {
512 /* 516 /*
513 * If ext3_write_super() recently started a commit, then we 517 * If ext3_write_super() recently started a commit, then we
514 * have to wait for completion of that transaction 518 * have to wait for completion of that transaction
515 */ 519 */
516 *ptid = journal->j_committing_transaction->t_tid; 520 if (ptid)
521 *ptid = journal->j_committing_transaction->t_tid;
517 ret = 1; 522 ret = 1;
518 } 523 }
519 spin_unlock(&journal->j_state_lock); 524 spin_unlock(&journal->j_state_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 56675306ed81..58144102bf25 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -37,10 +37,10 @@
37#include <linux/proc_fs.h> 37#include <linux/proc_fs.h>
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/seq_file.h> 39#include <linux/seq_file.h>
40#include <linux/math64.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/page.h> 43#include <asm/page.h>
43#include <asm/div64.h>
44 44
45EXPORT_SYMBOL(jbd2_journal_start); 45EXPORT_SYMBOL(jbd2_journal_start);
46EXPORT_SYMBOL(jbd2_journal_restart); 46EXPORT_SYMBOL(jbd2_journal_restart);
@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal)
450} 450}
451 451
452/* 452/*
453 * Called under j_state_lock. Returns true if a transaction was started. 453 * Called under j_state_lock. Returns true if a transaction commit was started.
454 */ 454 */
455int __jbd2_log_start_commit(journal_t *journal, tid_t target) 455int __jbd2_log_start_commit(journal_t *journal, tid_t target)
456{ 456{
@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
518 518
519/* 519/*
520 * Start a commit of the current running transaction (if any). Returns true 520 * Start a commit of the current running transaction (if any). Returns true
521 * if a transaction was started, and fills its tid in at *ptid 521 * if a transaction is going to be committed (or is currently already
522 * committing), and fills its tid in at *ptid
522 */ 523 */
523int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) 524int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
524{ 525{
@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
528 if (journal->j_running_transaction) { 529 if (journal->j_running_transaction) {
529 tid_t tid = journal->j_running_transaction->t_tid; 530 tid_t tid = journal->j_running_transaction->t_tid;
530 531
531 ret = __jbd2_log_start_commit(journal, tid); 532 __jbd2_log_start_commit(journal, tid);
532 if (ret && ptid) 533 /* There's a running transaction and we've just made sure
534 * it's commit has been scheduled. */
535 if (ptid)
533 *ptid = tid; 536 *ptid = tid;
534 } else if (journal->j_committing_transaction && ptid) { 537 ret = 1;
538 } else if (journal->j_committing_transaction) {
535 /* 539 /*
536 * If ext3_write_super() recently started a commit, then we 540 * If ext3_write_super() recently started a commit, then we
537 * have to wait for completion of that transaction 541 * have to wait for completion of that transaction
538 */ 542 */
539 *ptid = journal->j_committing_transaction->t_tid; 543 if (ptid)
544 *ptid = journal->j_committing_transaction->t_tid;
540 ret = 1; 545 ret = 1;
541 } 546 }
542 spin_unlock(&journal->j_state_lock); 547 spin_unlock(&journal->j_state_lock);
@@ -846,8 +851,8 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v)
846 jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); 851 jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid));
847 seq_printf(seq, " %ums logging transaction\n", 852 seq_printf(seq, " %ums logging transaction\n",
848 jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); 853 jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid));
849 seq_printf(seq, " %luus average transaction commit time\n", 854 seq_printf(seq, " %lluus average transaction commit time\n",
850 do_div(s->journal->j_average_commit_time, 1000)); 855 div_u64(s->journal->j_average_commit_time, 1000));
851 seq_printf(seq, " %lu handles per transaction\n", 856 seq_printf(seq, " %lu handles per transaction\n",
852 s->stats->u.run.rs_handle_count / s->stats->ts_tid); 857 s->stats->u.run.rs_handle_count / s->stats->ts_tid);
853 seq_printf(seq, " %lu blocks per transaction\n", 858 seq_printf(seq, " %lu blocks per transaction\n",
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 46b4e347ed7d..28ce21d8598e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2129,26 +2129,46 @@ done:
2129} 2129}
2130 2130
2131/* 2131/*
2132 * This function must be called when inode is journaled in ordered mode 2132 * File truncate and transaction commit interact with each other in a
2133 * before truncation happens. It starts writeout of truncated part in 2133 * non-trivial way. If a transaction writing data block A is
2134 * case it is in the committing transaction so that we stand to ordered 2134 * committing, we cannot discard the data by truncate until we have
2135 * mode consistency guarantees. 2135 * written them. Otherwise if we crashed after the transaction with
2136 * write has committed but before the transaction with truncate has
2137 * committed, we could see stale data in block A. This function is a
2138 * helper to solve this problem. It starts writeout of the truncated
2139 * part in case it is in the committing transaction.
2140 *
2141 * Filesystem code must call this function when inode is journaled in
2142 * ordered mode before truncation happens and after the inode has been
2143 * placed on orphan list with the new inode size. The second condition
2144 * avoids the race that someone writes new data and we start
2145 * committing the transaction after this function has been called but
2146 * before a transaction for truncate is started (and furthermore it
2147 * allows us to optimize the case where the addition to orphan list
2148 * happens in the same transaction as write --- we don't have to write
2149 * any data in such case).
2136 */ 2150 */
2137int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, 2151int jbd2_journal_begin_ordered_truncate(journal_t *journal,
2152 struct jbd2_inode *jinode,
2138 loff_t new_size) 2153 loff_t new_size)
2139{ 2154{
2140 journal_t *journal; 2155 transaction_t *inode_trans, *commit_trans;
2141 transaction_t *commit_trans;
2142 int ret = 0; 2156 int ret = 0;
2143 2157
2144 if (!inode->i_transaction && !inode->i_next_transaction) 2158 /* This is a quick check to avoid locking if not necessary */
2159 if (!jinode->i_transaction)
2145 goto out; 2160 goto out;
2146 journal = inode->i_transaction->t_journal; 2161 /* Locks are here just to force reading of recent values, it is
2162 * enough that the transaction was not committing before we started
2163 * a transaction adding the inode to orphan list */
2147 spin_lock(&journal->j_state_lock); 2164 spin_lock(&journal->j_state_lock);
2148 commit_trans = journal->j_committing_transaction; 2165 commit_trans = journal->j_committing_transaction;
2149 spin_unlock(&journal->j_state_lock); 2166 spin_unlock(&journal->j_state_lock);
2150 if (inode->i_transaction == commit_trans) { 2167 spin_lock(&journal->j_list_lock);
2151 ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, 2168 inode_trans = jinode->i_transaction;
2169 spin_unlock(&journal->j_list_lock);
2170 if (inode_trans == commit_trans) {
2171 ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
2152 new_size, LLONG_MAX); 2172 new_size, LLONG_MAX);
2153 if (ret) 2173 if (ret)
2154 jbd2_journal_abort(journal, ret); 2174 jbd2_journal_abort(journal, ret);
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 3cceef4ad2b7..e9580104b6ba 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -95,13 +95,17 @@ static int jffs2_garbage_collect_thread(void *_c)
95 spin_unlock(&c->erase_completion_lock); 95 spin_unlock(&c->erase_completion_lock);
96 96
97 97
98 /* This thread is purely an optimisation. But if it runs when 98 /* Problem - immediately after bootup, the GCD spends a lot
99 other things could be running, it actually makes things a 99 * of time in places like jffs2_kill_fragtree(); so much so
100 lot worse. Use yield() and put it at the back of the runqueue 100 * that userspace processes (like gdm and X) are starved
101 every time. Especially during boot, pulling an inode in 101 * despite plenty of cond_resched()s and renicing. Yield()
102 with read_inode() is much preferable to having the GC thread 102 * doesn't help, either (presumably because userspace and GCD
103 get there first. */ 103 * are generally competing for a higher latency resource -
104 yield(); 104 * disk).
105 * This forces the GCD to slow the hell down. Pulling an
106 * inode in with read_inode() is much preferable to having
107 * the GC thread get there first. */
108 schedule_timeout_interruptible(msecs_to_jiffies(50));
105 109
106 /* Put_super will send a SIGKILL and then wait on the sem. 110 /* Put_super will send a SIGKILL and then wait on the sem.
107 */ 111 */
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 6ca08ad887c0..1fc1e92356ee 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -220,7 +220,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
220 struct jffs2_tmp_dnode_info *tn) 220 struct jffs2_tmp_dnode_info *tn)
221{ 221{
222 uint32_t fn_end = tn->fn->ofs + tn->fn->size; 222 uint32_t fn_end = tn->fn->ofs + tn->fn->size;
223 struct jffs2_tmp_dnode_info *this; 223 struct jffs2_tmp_dnode_info *this, *ptn;
224 224
225 dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw)); 225 dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw));
226 226
@@ -251,11 +251,18 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
251 if (this) { 251 if (this) {
252 /* If the node is coincident with another at a lower address, 252 /* If the node is coincident with another at a lower address,
253 back up until the other node is found. It may be relevant */ 253 back up until the other node is found. It may be relevant */
254 while (this->overlapped) 254 while (this->overlapped) {
255 this = tn_prev(this); 255 ptn = tn_prev(this);
256 256 if (!ptn) {
257 /* First node should never be marked overlapped */ 257 /*
258 BUG_ON(!this); 258 * We killed a node which set the overlapped
259 * flags during the scan. Fix it up.
260 */
261 this->overlapped = 0;
262 break;
263 }
264 this = ptn;
265 }
259 dbg_readinode("'this' found %#04x-%#04x (%s)\n", this->fn->ofs, this->fn->ofs + this->fn->size, this->fn ? "data" : "hole"); 266 dbg_readinode("'this' found %#04x-%#04x (%s)\n", this->fn->ofs, this->fn->ofs + this->fn->size, this->fn ? "data" : "hole");
260 } 267 }
261 268
@@ -360,7 +367,17 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
360 } 367 }
361 if (!this->overlapped) 368 if (!this->overlapped)
362 break; 369 break;
363 this = tn_prev(this); 370
371 ptn = tn_prev(this);
372 if (!ptn) {
373 /*
374 * We killed a node which set the overlapped
375 * flags during the scan. Fix it up.
376 */
377 this->overlapped = 0;
378 break;
379 }
380 this = ptn;
364 } 381 }
365 } 382 }
366 383
@@ -456,8 +473,15 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c,
456 eat_last(&rii->tn_root, &last->rb); 473 eat_last(&rii->tn_root, &last->rb);
457 ver_insert(&ver_root, last); 474 ver_insert(&ver_root, last);
458 475
459 if (unlikely(last->overlapped)) 476 if (unlikely(last->overlapped)) {
460 continue; 477 if (pen)
478 continue;
479 /*
480 * We killed a node which set the overlapped
481 * flags during the scan. Fix it up.
482 */
483 last->overlapped = 0;
484 }
461 485
462 /* Now we have a bunch of nodes in reverse version 486 /* Now we have a bunch of nodes in reverse version
463 order, in the tree at ver_root. Most of the time, 487 order, in the tree at ver_root. Most of the time,
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 6063a8e4b9f3..763b78a6e9de 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -427,7 +427,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
427 goto out; 427 goto out;
428 case -EAGAIN: 428 case -EAGAIN:
429 ret = nlm_lck_denied; 429 ret = nlm_lck_denied;
430 goto out; 430 break;
431 case FILE_LOCK_DEFERRED: 431 case FILE_LOCK_DEFERRED:
432 if (wait) 432 if (wait)
433 break; 433 break;
@@ -443,6 +443,10 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
443 goto out; 443 goto out;
444 } 444 }
445 445
446 ret = nlm_lck_denied;
447 if (!wait)
448 goto out;
449
446 ret = nlm_lck_blocked; 450 ret = nlm_lck_blocked;
447 451
448 /* Append to list of blocked */ 452 /* Append to list of blocked */
diff --git a/fs/namespace.c b/fs/namespace.c
index 228d8c4bfd18..06f8e63f6cb1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -614,9 +614,11 @@ static inline void __mntput(struct vfsmount *mnt)
614 */ 614 */
615 for_each_possible_cpu(cpu) { 615 for_each_possible_cpu(cpu) {
616 struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu); 616 struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
617 if (cpu_writer->mnt != mnt)
618 continue;
619 spin_lock(&cpu_writer->lock); 617 spin_lock(&cpu_writer->lock);
618 if (cpu_writer->mnt != mnt) {
619 spin_unlock(&cpu_writer->lock);
620 continue;
621 }
620 atomic_add(cpu_writer->count, &mnt->__mnt_writers); 622 atomic_add(cpu_writer->count, &mnt->__mnt_writers);
621 cpu_writer->count = 0; 623 cpu_writer->count = 0;
622 /* 624 /*
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index dae3f28f30d4..331f2e88e284 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -156,7 +156,7 @@ static int inotify_handle_get_wd(struct inotify_handle *ih,
156 int ret; 156 int ret;
157 157
158 do { 158 do {
159 if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL))) 159 if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS)))
160 return -ENOSPC; 160 return -ENOSPC;
161 ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd); 161 ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
162 } while (ret == -EAGAIN); 162 } while (ret == -EAGAIN);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d861096c9d81..3a9e5deed74d 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4796,6 +4796,29 @@ out:
4796 return ret; 4796 return ret;
4797} 4797}
4798 4798
4799static int ocfs2_replace_extent_rec(struct inode *inode,
4800 handle_t *handle,
4801 struct ocfs2_path *path,
4802 struct ocfs2_extent_list *el,
4803 int split_index,
4804 struct ocfs2_extent_rec *split_rec)
4805{
4806 int ret;
4807
4808 ret = ocfs2_path_bh_journal_access(handle, inode, path,
4809 path_num_items(path) - 1);
4810 if (ret) {
4811 mlog_errno(ret);
4812 goto out;
4813 }
4814
4815 el->l_recs[split_index] = *split_rec;
4816
4817 ocfs2_journal_dirty(handle, path_leaf_bh(path));
4818out:
4819 return ret;
4820}
4821
4799/* 4822/*
4800 * Mark part or all of the extent record at split_index in the leaf 4823 * Mark part or all of the extent record at split_index in the leaf
4801 * pointed to by path as written. This removes the unwritten 4824 * pointed to by path as written. This removes the unwritten
@@ -4885,7 +4908,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4885 4908
4886 if (ctxt.c_contig_type == CONTIG_NONE) { 4909 if (ctxt.c_contig_type == CONTIG_NONE) {
4887 if (ctxt.c_split_covers_rec) 4910 if (ctxt.c_split_covers_rec)
4888 el->l_recs[split_index] = *split_rec; 4911 ret = ocfs2_replace_extent_rec(inode, handle,
4912 path, el,
4913 split_index, split_rec);
4889 else 4914 else
4890 ret = ocfs2_split_and_insert(inode, handle, path, et, 4915 ret = ocfs2_split_and_insert(inode, handle, path, et,
4891 &last_eb_bh, split_index, 4916 &last_eb_bh, split_index,
@@ -5390,6 +5415,9 @@ int ocfs2_remove_btree_range(struct inode *inode,
5390 goto out; 5415 goto out;
5391 } 5416 }
5392 5417
5418 vfs_dq_free_space_nodirty(inode,
5419 ocfs2_clusters_to_bytes(inode->i_sb, len));
5420
5393 ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac, 5421 ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
5394 dealloc); 5422 dealloc);
5395 if (ret) { 5423 if (ret) {
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b1cc7c381e88..e9d7c2038c0f 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -38,6 +38,7 @@
38#include "dlmglue.h" 38#include "dlmglue.h"
39#include "file.h" 39#include "file.h"
40#include "inode.h" 40#include "inode.h"
41#include "super.h"
41 42
42 43
43static int ocfs2_dentry_revalidate(struct dentry *dentry, 44static int ocfs2_dentry_revalidate(struct dentry *dentry,
@@ -294,6 +295,34 @@ out_attach:
294 return ret; 295 return ret;
295} 296}
296 297
298static DEFINE_SPINLOCK(dentry_list_lock);
299
300/* We limit the number of dentry locks to drop in one go. We have
301 * this limit so that we don't starve other users of ocfs2_wq. */
302#define DL_INODE_DROP_COUNT 64
303
304/* Drop inode references from dentry locks */
305void ocfs2_drop_dl_inodes(struct work_struct *work)
306{
307 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
308 dentry_lock_work);
309 struct ocfs2_dentry_lock *dl;
310 int drop_count = DL_INODE_DROP_COUNT;
311
312 spin_lock(&dentry_list_lock);
313 while (osb->dentry_lock_list && drop_count--) {
314 dl = osb->dentry_lock_list;
315 osb->dentry_lock_list = dl->dl_next;
316 spin_unlock(&dentry_list_lock);
317 iput(dl->dl_inode);
318 kfree(dl);
319 spin_lock(&dentry_list_lock);
320 }
321 if (osb->dentry_lock_list)
322 queue_work(ocfs2_wq, &osb->dentry_lock_work);
323 spin_unlock(&dentry_list_lock);
324}
325
297/* 326/*
298 * ocfs2_dentry_iput() and friends. 327 * ocfs2_dentry_iput() and friends.
299 * 328 *
@@ -318,16 +347,23 @@ out_attach:
318static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, 347static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
319 struct ocfs2_dentry_lock *dl) 348 struct ocfs2_dentry_lock *dl)
320{ 349{
321 iput(dl->dl_inode);
322 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); 350 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
323 ocfs2_lock_res_free(&dl->dl_lockres); 351 ocfs2_lock_res_free(&dl->dl_lockres);
324 kfree(dl); 352
353 /* We leave dropping of inode reference to ocfs2_wq as that can
354 * possibly lead to inode deletion which gets tricky */
355 spin_lock(&dentry_list_lock);
356 if (!osb->dentry_lock_list)
357 queue_work(ocfs2_wq, &osb->dentry_lock_work);
358 dl->dl_next = osb->dentry_lock_list;
359 osb->dentry_lock_list = dl;
360 spin_unlock(&dentry_list_lock);
325} 361}
326 362
327void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 363void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
328 struct ocfs2_dentry_lock *dl) 364 struct ocfs2_dentry_lock *dl)
329{ 365{
330 int unlock = 0; 366 int unlock;
331 367
332 BUG_ON(dl->dl_count == 0); 368 BUG_ON(dl->dl_count == 0);
333 369
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index c091c34d9883..d06e16c06640 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -29,8 +29,13 @@
29extern struct dentry_operations ocfs2_dentry_ops; 29extern struct dentry_operations ocfs2_dentry_ops;
30 30
31struct ocfs2_dentry_lock { 31struct ocfs2_dentry_lock {
32 /* Use count of dentry lock */
32 unsigned int dl_count; 33 unsigned int dl_count;
33 u64 dl_parent_blkno; 34 union {
35 /* Linked list of dentry locks to release */
36 struct ocfs2_dentry_lock *dl_next;
37 u64 dl_parent_blkno;
38 };
34 39
35 /* 40 /*
36 * The ocfs2_dentry_lock keeps an inode reference until 41 * The ocfs2_dentry_lock keeps an inode reference until
@@ -47,6 +52,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
47void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 52void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
48 struct ocfs2_dentry_lock *dl); 53 struct ocfs2_dentry_lock *dl);
49 54
55void ocfs2_drop_dl_inodes(struct work_struct *work);
56
50struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, 57struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
51 int skip_unhashed); 58 int skip_unhashed);
52 59
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 54e182a27caf..0a2813947853 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1849,12 +1849,12 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
1849 if (!mle) { 1849 if (!mle) {
1850 if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN && 1850 if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN &&
1851 res->owner != assert->node_idx) { 1851 res->owner != assert->node_idx) {
1852 mlog(ML_ERROR, "assert_master from " 1852 mlog(ML_ERROR, "DIE! Mastery assert from %u, "
1853 "%u, but current owner is " 1853 "but current owner is %u! (%.*s)\n",
1854 "%u! (%.*s)\n", 1854 assert->node_idx, res->owner, namelen,
1855 assert->node_idx, res->owner, 1855 name);
1856 namelen, name); 1856 __dlm_print_one_lock_resource(res);
1857 goto kill; 1857 BUG();
1858 } 1858 }
1859 } else if (mle->type != DLM_MLE_MIGRATION) { 1859 } else if (mle->type != DLM_MLE_MIGRATION) {
1860 if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) { 1860 if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index d1295203029f..4060bb328bc8 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -181,8 +181,7 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
181 181
182 spin_lock(&res->spinlock); 182 spin_lock(&res->spinlock);
183 /* This ensures that clear refmap is sent after the set */ 183 /* This ensures that clear refmap is sent after the set */
184 __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG | 184 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
185 DLM_LOCK_RES_MIGRATING));
186 spin_unlock(&res->spinlock); 185 spin_unlock(&res->spinlock);
187 186
188 /* clear our bit from the master's refmap, ignore errors */ 187 /* clear our bit from the master's refmap, ignore errors */
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 86ca085ef324..fcf879ed6930 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -117,11 +117,11 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
117 else 117 else
118 BUG_ON(res->owner == dlm->node_num); 118 BUG_ON(res->owner == dlm->node_num);
119 119
120 spin_lock(&dlm->spinlock); 120 spin_lock(&dlm->ast_lock);
121 /* We want to be sure that we're not freeing a lock 121 /* We want to be sure that we're not freeing a lock
122 * that still has AST's pending... */ 122 * that still has AST's pending... */
123 in_use = !list_empty(&lock->ast_list); 123 in_use = !list_empty(&lock->ast_list);
124 spin_unlock(&dlm->spinlock); 124 spin_unlock(&dlm->ast_lock);
125 if (in_use) { 125 if (in_use) {
126 mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " 126 mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
127 "while waiting for an ast!", res->lockname.len, 127 "while waiting for an ast!", res->lockname.len,
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index b0c4cadd4c45..7219a86d34cc 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -320,9 +320,14 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
320 struct ocfs2_lock_res *lockres); 320 struct ocfs2_lock_res *lockres);
321static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 321static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
322 int convert); 322 int convert);
323#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 323#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \
324 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 324 if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \
325 _err, _func, _lockres->l_name); \ 325 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \
326 _err, _func, _lockres->l_name); \
327 else \
328 mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \
329 _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \
330 (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \
326} while (0) 331} while (0)
327static int ocfs2_downconvert_thread(void *arg); 332static int ocfs2_downconvert_thread(void *arg);
328static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 333static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
@@ -2860,6 +2865,10 @@ static void ocfs2_unlock_ast(void *opaque, int error)
2860 case OCFS2_UNLOCK_CANCEL_CONVERT: 2865 case OCFS2_UNLOCK_CANCEL_CONVERT:
2861 mlog(0, "Cancel convert success for %s\n", lockres->l_name); 2866 mlog(0, "Cancel convert success for %s\n", lockres->l_name);
2862 lockres->l_action = OCFS2_AST_INVALID; 2867 lockres->l_action = OCFS2_AST_INVALID;
2868 /* Downconvert thread may have requeued this lock, we
2869 * need to wake it. */
2870 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
2871 ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
2863 break; 2872 break;
2864 case OCFS2_UNLOCK_DROP_LOCK: 2873 case OCFS2_UNLOCK_DROP_LOCK:
2865 lockres->l_level = DLM_LOCK_IV; 2874 lockres->l_level = DLM_LOCK_IV;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3c3532e1307c..172850a9a12a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
513static inline int ocfs2_begin_ordered_truncate(struct inode *inode, 513static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
514 loff_t new_size) 514 loff_t new_size)
515{ 515{
516 return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode, 516 return jbd2_journal_begin_ordered_truncate(
517 new_size); 517 OCFS2_SB(inode->i_sb)->journal->j_journal,
518 &OCFS2_I(inode)->ip_jinode,
519 new_size);
518} 520}
519 521
520#endif /* OCFS2_JOURNAL_H */ 522#endif /* OCFS2_JOURNAL_H */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index ad5c24a29edd..946d3c34b90b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -210,6 +210,7 @@ struct ocfs2_journal;
210struct ocfs2_slot_info; 210struct ocfs2_slot_info;
211struct ocfs2_recovery_map; 211struct ocfs2_recovery_map;
212struct ocfs2_quota_recovery; 212struct ocfs2_quota_recovery;
213struct ocfs2_dentry_lock;
213struct ocfs2_super 214struct ocfs2_super
214{ 215{
215 struct task_struct *commit_task; 216 struct task_struct *commit_task;
@@ -325,6 +326,11 @@ struct ocfs2_super
325 struct list_head blocked_lock_list; 326 struct list_head blocked_lock_list;
326 unsigned long blocked_lock_count; 327 unsigned long blocked_lock_count;
327 328
329 /* List of dentry locks to release. Anyone can add locks to
330 * the list, ocfs2_wq processes the list */
331 struct ocfs2_dentry_lock *dentry_lock_list;
332 struct work_struct dentry_lock_work;
333
328 wait_queue_head_t osb_mount_event; 334 wait_queue_head_t osb_mount_event;
329 335
330 /* Truncate log info */ 336 /* Truncate log info */
@@ -335,6 +341,9 @@ struct ocfs2_super
335 struct ocfs2_node_map osb_recovering_orphan_dirs; 341 struct ocfs2_node_map osb_recovering_orphan_dirs;
336 unsigned int *osb_orphan_wipes; 342 unsigned int *osb_orphan_wipes;
337 wait_queue_head_t osb_wipe_event; 343 wait_queue_head_t osb_wipe_event;
344
345 /* used to protect metaecc calculation check of xattr. */
346 spinlock_t osb_xattr_lock;
338}; 347};
339 348
340#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) 349#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index f4efa89baee5..1ed0f7c86869 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -754,7 +754,9 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
754 if (dquot->dq_flags & mask) 754 if (dquot->dq_flags & mask)
755 sync = 1; 755 sync = 1;
756 spin_unlock(&dq_data_lock); 756 spin_unlock(&dq_data_lock);
757 if (!sync) { 757 /* This is a slight hack but we can't afford getting global quota
758 * lock if we already have a transaction started. */
759 if (!sync || journal_current_handle()) {
758 status = ocfs2_write_dquot(dquot); 760 status = ocfs2_write_dquot(dquot);
759 goto out; 761 goto out;
760 } 762 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 43ed11345b59..7ac83a81ee55 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1537,6 +1537,13 @@ static int ocfs2_get_sector(struct super_block *sb,
1537 unlock_buffer(*bh); 1537 unlock_buffer(*bh);
1538 ll_rw_block(READ, 1, bh); 1538 ll_rw_block(READ, 1, bh);
1539 wait_on_buffer(*bh); 1539 wait_on_buffer(*bh);
1540 if (!buffer_uptodate(*bh)) {
1541 mlog_errno(-EIO);
1542 brelse(*bh);
1543 *bh = NULL;
1544 return -EIO;
1545 }
1546
1540 return 0; 1547 return 0;
1541} 1548}
1542 1549
@@ -1747,6 +1754,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1747 INIT_LIST_HEAD(&osb->blocked_lock_list); 1754 INIT_LIST_HEAD(&osb->blocked_lock_list);
1748 osb->blocked_lock_count = 0; 1755 osb->blocked_lock_count = 0;
1749 spin_lock_init(&osb->osb_lock); 1756 spin_lock_init(&osb->osb_lock);
1757 spin_lock_init(&osb->osb_xattr_lock);
1750 ocfs2_init_inode_steal_slot(osb); 1758 ocfs2_init_inode_steal_slot(osb);
1751 1759
1752 atomic_set(&osb->alloc_stats.moves, 0); 1760 atomic_set(&osb->alloc_stats.moves, 0);
@@ -1887,6 +1895,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
1887 INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); 1895 INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
1888 journal->j_state = OCFS2_JOURNAL_FREE; 1896 journal->j_state = OCFS2_JOURNAL_FREE;
1889 1897
1898 INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes);
1899 osb->dentry_lock_list = NULL;
1900
1890 /* get some pseudo constants for clustersize bits */ 1901 /* get some pseudo constants for clustersize bits */
1891 osb->s_clustersize_bits = 1902 osb->s_clustersize_bits =
1892 le32_to_cpu(di->id2.i_super.s_clustersize_bits); 1903 le32_to_cpu(di->id2.i_super.s_clustersize_bits);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e1d638af6ac3..4ddd788add67 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -82,13 +82,14 @@ struct ocfs2_xattr_set_ctxt {
82 82
83#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 83#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
84#define OCFS2_XATTR_INLINE_SIZE 80 84#define OCFS2_XATTR_INLINE_SIZE 80
85#define OCFS2_XATTR_HEADER_GAP 4
85#define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 86#define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \
86 - sizeof(struct ocfs2_xattr_header) \ 87 - sizeof(struct ocfs2_xattr_header) \
87 - sizeof(__u32)) 88 - OCFS2_XATTR_HEADER_GAP)
88#define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 89#define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \
89 - sizeof(struct ocfs2_xattr_block) \ 90 - sizeof(struct ocfs2_xattr_block) \
90 - sizeof(struct ocfs2_xattr_header) \ 91 - sizeof(struct ocfs2_xattr_header) \
91 - sizeof(__u32)) 92 - OCFS2_XATTR_HEADER_GAP)
92 93
93static struct ocfs2_xattr_def_value_root def_xv = { 94static struct ocfs2_xattr_def_value_root def_xv = {
94 .xv.xr_list.l_count = cpu_to_le16(1), 95 .xv.xr_list.l_count = cpu_to_le16(1),
@@ -274,10 +275,12 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
274 bucket->bu_blocks, bucket->bu_bhs, 0, 275 bucket->bu_blocks, bucket->bu_bhs, 0,
275 NULL); 276 NULL);
276 if (!rc) { 277 if (!rc) {
278 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
277 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 279 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
278 bucket->bu_bhs, 280 bucket->bu_bhs,
279 bucket->bu_blocks, 281 bucket->bu_blocks,
280 &bucket_xh(bucket)->xh_check); 282 &bucket_xh(bucket)->xh_check);
283 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
281 if (rc) 284 if (rc)
282 mlog_errno(rc); 285 mlog_errno(rc);
283 } 286 }
@@ -310,9 +313,11 @@ static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
310{ 313{
311 int i; 314 int i;
312 315
316 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
313 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 317 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
314 bucket->bu_bhs, bucket->bu_blocks, 318 bucket->bu_bhs, bucket->bu_blocks,
315 &bucket_xh(bucket)->xh_check); 319 &bucket_xh(bucket)->xh_check);
320 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
316 321
317 for (i = 0; i < bucket->bu_blocks; i++) 322 for (i = 0; i < bucket->bu_blocks; i++)
318 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 323 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
@@ -1507,7 +1512,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1507 last += 1; 1512 last += 1;
1508 } 1513 }
1509 1514
1510 free = min_offs - ((void *)last - xs->base) - sizeof(__u32); 1515 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
1511 if (free < 0) 1516 if (free < 0)
1512 return -EIO; 1517 return -EIO;
1513 1518
@@ -2190,7 +2195,7 @@ static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2190 last += 1; 2195 last += 1;
2191 } 2196 }
2192 2197
2193 free = min_offs - ((void *)last - xs->base) - sizeof(__u32); 2198 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2194 if (free < 0) 2199 if (free < 0)
2195 return 0; 2200 return 0;
2196 2201
@@ -2592,8 +2597,9 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
2592 2597
2593 if (!ret) { 2598 if (!ret) {
2594 /* Update inode ctime. */ 2599 /* Update inode ctime. */
2595 ret = ocfs2_journal_access(ctxt->handle, inode, xis->inode_bh, 2600 ret = ocfs2_journal_access_di(ctxt->handle, inode,
2596 OCFS2_JOURNAL_ACCESS_WRITE); 2601 xis->inode_bh,
2602 OCFS2_JOURNAL_ACCESS_WRITE);
2597 if (ret) { 2603 if (ret) {
2598 mlog_errno(ret); 2604 mlog_errno(ret);
2599 goto out; 2605 goto out;
@@ -4729,13 +4735,6 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4729 vb.vb_xv = (struct ocfs2_xattr_value_root *) 4735 vb.vb_xv = (struct ocfs2_xattr_value_root *)
4730 (vb.vb_bh->b_data + offset % blocksize); 4736 (vb.vb_bh->b_data + offset % blocksize);
4731 4737
4732 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
4733 OCFS2_JOURNAL_ACCESS_WRITE);
4734 if (ret) {
4735 mlog_errno(ret);
4736 goto out;
4737 }
4738
4739 /* 4738 /*
4740 * From here on out we have to dirty the bucket. The generic 4739 * From here on out we have to dirty the bucket. The generic
4741 * value calls only modify one of the bucket's bhs, but we need 4740 * value calls only modify one of the bucket's bhs, but we need
@@ -4748,12 +4747,18 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4748 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 4747 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
4749 if (ret) { 4748 if (ret) {
4750 mlog_errno(ret); 4749 mlog_errno(ret);
4751 goto out_dirty; 4750 goto out;
4751 }
4752
4753 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
4754 OCFS2_JOURNAL_ACCESS_WRITE);
4755 if (ret) {
4756 mlog_errno(ret);
4757 goto out;
4752 } 4758 }
4753 4759
4754 xe->xe_value_size = cpu_to_le64(len); 4760 xe->xe_value_size = cpu_to_le64(len);
4755 4761
4756out_dirty:
4757 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 4762 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
4758 4763
4759out: 4764out:
@@ -5061,8 +5066,8 @@ try_again:
5061 xh_free_start = le16_to_cpu(xh->xh_free_start); 5066 xh_free_start = le16_to_cpu(xh->xh_free_start);
5062 header_size = sizeof(struct ocfs2_xattr_header) + 5067 header_size = sizeof(struct ocfs2_xattr_header) +
5063 count * sizeof(struct ocfs2_xattr_entry); 5068 count * sizeof(struct ocfs2_xattr_entry);
5064 max_free = OCFS2_XATTR_BUCKET_SIZE - 5069 max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5065 le16_to_cpu(xh->xh_name_value_len) - header_size; 5070 le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5066 5071
5067 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " 5072 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5068 "of %u which exceed block size\n", 5073 "of %u which exceed block size\n",
@@ -5095,7 +5100,7 @@ try_again:
5095 need = 0; 5100 need = 0;
5096 } 5101 }
5097 5102
5098 free = xh_free_start - header_size; 5103 free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5099 /* 5104 /*
5100 * We need to make sure the new name/value pair 5105 * We need to make sure the new name/value pair
5101 * can exist in the same block. 5106 * can exist in the same block.
@@ -5128,7 +5133,8 @@ try_again:
5128 } 5133 }
5129 5134
5130 xh_free_start = le16_to_cpu(xh->xh_free_start); 5135 xh_free_start = le16_to_cpu(xh->xh_free_start);
5131 free = xh_free_start - header_size; 5136 free = xh_free_start - header_size
5137 - OCFS2_XATTR_HEADER_GAP;
5132 if (xh_free_start % blocksize < need) 5138 if (xh_free_start % blocksize < need)
5133 free -= xh_free_start % blocksize; 5139 free -= xh_free_start % blocksize;
5134 5140
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3e76bb9b3ad6..d8bb5c671f42 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -485,8 +485,10 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
485 } 485 }
486 } 486 }
487 unlock_new_inode(inode); 487 unlock_new_inode(inode);
488 } else 488 } else {
489 module_put(de->owner); 489 module_put(de->owner);
490 de_put(de);
491 }
490 return inode; 492 return inode;
491 493
492out_ino: 494out_ino:
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 767d95a6d1b1..2d1345112a42 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -107,7 +107,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
107 else 107 else
108 kflags = ppage->flags; 108 kflags = ppage->flags;
109 109
110 uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) | 110 uflags = kpf_copy_bit(kflags, KPF_LOCKED, PG_locked) |
111 kpf_copy_bit(kflags, KPF_ERROR, PG_error) | 111 kpf_copy_bit(kflags, KPF_ERROR, PG_error) |
112 kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | 112 kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) |
113 kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | 113 kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) |
diff --git a/fs/seq_file.c b/fs/seq_file.c
index b569ff1c4dc8..a1a4cfe19210 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -48,12 +48,78 @@ int seq_open(struct file *file, const struct seq_operations *op)
48 */ 48 */
49 file->f_version = 0; 49 file->f_version = 0;
50 50
51 /* SEQ files support lseek, but not pread/pwrite */ 51 /*
52 file->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE); 52 * seq_files support lseek() and pread(). They do not implement
53 * write() at all, but we clear FMODE_PWRITE here for historical
54 * reasons.
55 *
56 * If a client of seq_files a) implements file.write() and b) wishes to
57 * support pwrite() then that client will need to implement its own
58 * file.open() which calls seq_open() and then sets FMODE_PWRITE.
59 */
60 file->f_mode &= ~FMODE_PWRITE;
53 return 0; 61 return 0;
54} 62}
55EXPORT_SYMBOL(seq_open); 63EXPORT_SYMBOL(seq_open);
56 64
65static int traverse(struct seq_file *m, loff_t offset)
66{
67 loff_t pos = 0, index;
68 int error = 0;
69 void *p;
70
71 m->version = 0;
72 index = 0;
73 m->count = m->from = 0;
74 if (!offset) {
75 m->index = index;
76 return 0;
77 }
78 if (!m->buf) {
79 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
80 if (!m->buf)
81 return -ENOMEM;
82 }
83 p = m->op->start(m, &index);
84 while (p) {
85 error = PTR_ERR(p);
86 if (IS_ERR(p))
87 break;
88 error = m->op->show(m, p);
89 if (error < 0)
90 break;
91 if (unlikely(error)) {
92 error = 0;
93 m->count = 0;
94 }
95 if (m->count == m->size)
96 goto Eoverflow;
97 if (pos + m->count > offset) {
98 m->from = offset - pos;
99 m->count -= m->from;
100 m->index = index;
101 break;
102 }
103 pos += m->count;
104 m->count = 0;
105 if (pos == offset) {
106 index++;
107 m->index = index;
108 break;
109 }
110 p = m->op->next(m, p, &index);
111 }
112 m->op->stop(m, p);
113 m->index = index;
114 return error;
115
116Eoverflow:
117 m->op->stop(m, p);
118 kfree(m->buf);
119 m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
120 return !m->buf ? -ENOMEM : -EAGAIN;
121}
122
57/** 123/**
58 * seq_read - ->read() method for sequential files. 124 * seq_read - ->read() method for sequential files.
59 * @file: the file to read from 125 * @file: the file to read from
@@ -73,6 +139,22 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
73 int err = 0; 139 int err = 0;
74 140
75 mutex_lock(&m->lock); 141 mutex_lock(&m->lock);
142
143 /* Don't assume *ppos is where we left it */
144 if (unlikely(*ppos != m->read_pos)) {
145 m->read_pos = *ppos;
146 while ((err = traverse(m, *ppos)) == -EAGAIN)
147 ;
148 if (err) {
149 /* With prejudice... */
150 m->read_pos = 0;
151 m->version = 0;
152 m->index = 0;
153 m->count = 0;
154 goto Done;
155 }
156 }
157
76 /* 158 /*
77 * seq_file->op->..m_start/m_stop/m_next may do special actions 159 * seq_file->op->..m_start/m_stop/m_next may do special actions
78 * or optimisations based on the file->f_version, so we want to 160 * or optimisations based on the file->f_version, so we want to
@@ -172,8 +254,10 @@ Fill:
172Done: 254Done:
173 if (!copied) 255 if (!copied)
174 copied = err; 256 copied = err;
175 else 257 else {
176 *ppos += copied; 258 *ppos += copied;
259 m->read_pos += copied;
260 }
177 file->f_version = m->version; 261 file->f_version = m->version;
178 mutex_unlock(&m->lock); 262 mutex_unlock(&m->lock);
179 return copied; 263 return copied;
@@ -186,63 +270,6 @@ Efault:
186} 270}
187EXPORT_SYMBOL(seq_read); 271EXPORT_SYMBOL(seq_read);
188 272
189static int traverse(struct seq_file *m, loff_t offset)
190{
191 loff_t pos = 0, index;
192 int error = 0;
193 void *p;
194
195 m->version = 0;
196 index = 0;
197 m->count = m->from = 0;
198 if (!offset) {
199 m->index = index;
200 return 0;
201 }
202 if (!m->buf) {
203 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
204 if (!m->buf)
205 return -ENOMEM;
206 }
207 p = m->op->start(m, &index);
208 while (p) {
209 error = PTR_ERR(p);
210 if (IS_ERR(p))
211 break;
212 error = m->op->show(m, p);
213 if (error < 0)
214 break;
215 if (unlikely(error)) {
216 error = 0;
217 m->count = 0;
218 }
219 if (m->count == m->size)
220 goto Eoverflow;
221 if (pos + m->count > offset) {
222 m->from = offset - pos;
223 m->count -= m->from;
224 m->index = index;
225 break;
226 }
227 pos += m->count;
228 m->count = 0;
229 if (pos == offset) {
230 index++;
231 m->index = index;
232 break;
233 }
234 p = m->op->next(m, p, &index);
235 }
236 m->op->stop(m, p);
237 return error;
238
239Eoverflow:
240 m->op->stop(m, p);
241 kfree(m->buf);
242 m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
243 return !m->buf ? -ENOMEM : -EAGAIN;
244}
245
246/** 273/**
247 * seq_lseek - ->llseek() method for sequential files. 274 * seq_lseek - ->llseek() method for sequential files.
248 * @file: the file in question 275 * @file: the file in question
@@ -265,16 +292,18 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
265 if (offset < 0) 292 if (offset < 0)
266 break; 293 break;
267 retval = offset; 294 retval = offset;
268 if (offset != file->f_pos) { 295 if (offset != m->read_pos) {
269 while ((retval=traverse(m, offset)) == -EAGAIN) 296 while ((retval=traverse(m, offset)) == -EAGAIN)
270 ; 297 ;
271 if (retval) { 298 if (retval) {
272 /* with extreme prejudice... */ 299 /* with extreme prejudice... */
273 file->f_pos = 0; 300 file->f_pos = 0;
301 m->read_pos = 0;
274 m->version = 0; 302 m->version = 0;
275 m->index = 0; 303 m->index = 0;
276 m->count = 0; 304 m->count = 0;
277 } else { 305 } else {
306 m->read_pos = offset;
278 retval = file->f_pos = offset; 307 retval = file->f_pos = offset;
279 } 308 }
280 } 309 }
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index c837dfc2b3c6..321728f48f2d 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -80,7 +80,7 @@ static struct buffer_head *get_block_length(struct super_block *sb,
80 * generated a larger block - this does occasionally happen with zlib). 80 * generated a larger block - this does occasionally happen with zlib).
81 */ 81 */
82int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, 82int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
83 int length, u64 *next_index, int srclength) 83 int length, u64 *next_index, int srclength, int pages)
84{ 84{
85 struct squashfs_sb_info *msblk = sb->s_fs_info; 85 struct squashfs_sb_info *msblk = sb->s_fs_info;
86 struct buffer_head **bh; 86 struct buffer_head **bh;
@@ -185,6 +185,14 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
185 } 185 }
186 186
187 if (msblk->stream.avail_out == 0) { 187 if (msblk->stream.avail_out == 0) {
188 if (page == pages) {
189 ERROR("zlib_inflate tried to "
190 "decompress too much data, "
191 "expected %d bytes. Zlib "
192 "data probably corrupt\n",
193 srclength);
194 goto release_mutex;
195 }
188 msblk->stream.next_out = buffer[page++]; 196 msblk->stream.next_out = buffer[page++];
189 msblk->stream.avail_out = PAGE_CACHE_SIZE; 197 msblk->stream.avail_out = PAGE_CACHE_SIZE;
190 } 198 }
@@ -268,7 +276,8 @@ block_release:
268 put_bh(bh[k]); 276 put_bh(bh[k]);
269 277
270read_failure: 278read_failure:
271 ERROR("sb_bread failed reading block 0x%llx\n", cur_index); 279 ERROR("squashfs_read_data failed to read block 0x%llx\n",
280 (unsigned long long) index);
272 kfree(bh); 281 kfree(bh);
273 return -EIO; 282 return -EIO;
274} 283}
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index f29eda16d25e..1c4739e33af6 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -119,7 +119,7 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
119 119
120 entry->length = squashfs_read_data(sb, entry->data, 120 entry->length = squashfs_read_data(sb, entry->data,
121 block, length, &entry->next_index, 121 block, length, &entry->next_index,
122 cache->block_size); 122 cache->block_size, cache->pages);
123 123
124 spin_lock(&cache->lock); 124 spin_lock(&cache->lock);
125 125
@@ -406,7 +406,7 @@ int squashfs_read_table(struct super_block *sb, void *buffer, u64 block,
406 for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) 406 for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
407 data[i] = buffer; 407 data[i] = buffer;
408 res = squashfs_read_data(sb, data, block, length | 408 res = squashfs_read_data(sb, data, block, length |
409 SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length); 409 SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
410 kfree(data); 410 kfree(data);
411 return res; 411 return res;
412} 412}
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 7a63398bb855..9101dbde39ec 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -133,7 +133,8 @@ int squashfs_read_inode(struct inode *inode, long long ino)
133 type = le16_to_cpu(sqshb_ino->inode_type); 133 type = le16_to_cpu(sqshb_ino->inode_type);
134 switch (type) { 134 switch (type) {
135 case SQUASHFS_REG_TYPE: { 135 case SQUASHFS_REG_TYPE: {
136 unsigned int frag_offset, frag_size, frag; 136 unsigned int frag_offset, frag;
137 int frag_size;
137 u64 frag_blk; 138 u64 frag_blk;
138 struct squashfs_reg_inode *sqsh_ino = &squashfs_ino.reg; 139 struct squashfs_reg_inode *sqsh_ino = &squashfs_ino.reg;
139 140
@@ -175,7 +176,8 @@ int squashfs_read_inode(struct inode *inode, long long ino)
175 break; 176 break;
176 } 177 }
177 case SQUASHFS_LREG_TYPE: { 178 case SQUASHFS_LREG_TYPE: {
178 unsigned int frag_offset, frag_size, frag; 179 unsigned int frag_offset, frag;
180 int frag_size;
179 u64 frag_blk; 181 u64 frag_blk;
180 struct squashfs_lreg_inode *sqsh_ino = &squashfs_ino.lreg; 182 struct squashfs_lreg_inode *sqsh_ino = &squashfs_ino.lreg;
181 183
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 6b2515d027d5..0e9feb6adf7e 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -34,7 +34,7 @@ static inline struct squashfs_inode_info *squashfs_i(struct inode *inode)
34 34
35/* block.c */ 35/* block.c */
36extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, 36extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
37 int); 37 int, int);
38 38
39/* cache.c */ 39/* cache.c */
40extern struct squashfs_cache *squashfs_cache_init(char *, int, int); 40extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 071df5b5b491..681ec0d83799 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -389,7 +389,7 @@ static int __init init_squashfs_fs(void)
389 return err; 389 return err;
390 } 390 }
391 391
392 printk(KERN_INFO "squashfs: version 4.0 (2009/01/03) " 392 printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
393 "Phillip Lougher\n"); 393 "Phillip Lougher\n");
394 394
395 return 0; 395 return 0;
diff --git a/fs/super.c b/fs/super.c
index 645e5403f2a0..8349ed6b1412 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -82,7 +82,22 @@ static struct super_block *alloc_super(struct file_system_type *type)
82 * lock ordering than usbfs: 82 * lock ordering than usbfs:
83 */ 83 */
84 lockdep_set_class(&s->s_lock, &type->s_lock_key); 84 lockdep_set_class(&s->s_lock, &type->s_lock_key);
85 down_write(&s->s_umount); 85 /*
86 * sget() can have s_umount recursion.
87 *
88 * When it cannot find a suitable sb, it allocates a new
89 * one (this one), and tries again to find a suitable old
90 * one.
91 *
92 * In case that succeeds, it will acquire the s_umount
93 * lock of the old one. Since these are clearly distrinct
94 * locks, and this object isn't exposed yet, there's no
95 * risk of deadlocks.
96 *
97 * Annotate this by putting this lock in a different
98 * subclass.
99 */
100 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
86 s->s_count = S_BIAS; 101 s->s_count = S_BIAS;
87 atomic_set(&s->s_active, 1); 102 atomic_set(&s->s_active, 1);
88 mutex_init(&s->s_vfs_rename_mutex); 103 mutex_init(&s->s_vfs_rename_mutex);
@@ -301,7 +316,7 @@ void generic_shutdown_super(struct super_block *sb)
301 /* 316 /*
302 * wait for asynchronous fs operations to finish before going further 317 * wait for asynchronous fs operations to finish before going further
303 */ 318 */
304 async_synchronize_full_special(&sb->s_async_list); 319 async_synchronize_full_domain(&sb->s_async_list);
305 320
306 /* bad name - it should be evict_inodes() */ 321 /* bad name - it should be evict_inodes() */
307 invalidate_inodes(sb); 322 invalidate_inodes(sb);
@@ -470,7 +485,7 @@ restart:
470 sb->s_count++; 485 sb->s_count++;
471 spin_unlock(&sb_lock); 486 spin_unlock(&sb_lock);
472 down_read(&sb->s_umount); 487 down_read(&sb->s_umount);
473 async_synchronize_full_special(&sb->s_async_list); 488 async_synchronize_full_domain(&sb->s_async_list);
474 if (sb->s_root && (wait || sb->s_dirt)) 489 if (sb->s_root && (wait || sb->s_dirt))
475 sb->s_op->sync_fs(sb, wait); 490 sb->s_op->sync_fs(sb, wait);
476 up_read(&sb->s_umount); 491 up_read(&sb->s_umount);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 6a123b8ff3f5..b042bd7034b1 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -186,10 +186,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
186 BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); 186 BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
187 BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); 187 BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
188 188
189 if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) 189 if ((flags & ~TFD_CREATE_FLAGS) ||
190 return -EINVAL; 190 (clockid != CLOCK_MONOTONIC &&
191 if (clockid != CLOCK_MONOTONIC && 191 clockid != CLOCK_REALTIME))
192 clockid != CLOCK_REALTIME)
193 return -EINVAL; 192 return -EINVAL;
194 193
195 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 194 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -201,7 +200,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
201 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
202 201
203 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 202 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
204 flags & (O_CLOEXEC | O_NONBLOCK)); 203 flags & TFD_SHARED_FCNTL_FLAGS);
205 if (ufd < 0) 204 if (ufd < 0)
206 kfree(ctx); 205 kfree(ctx);
207 206
@@ -219,7 +218,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
219 if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) 218 if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
220 return -EFAULT; 219 return -EFAULT;
221 220
222 if (!timespec_valid(&ktmr.it_value) || 221 if ((flags & ~TFD_SETTIME_FLAGS) ||
222 !timespec_valid(&ktmr.it_value) ||
223 !timespec_valid(&ktmr.it_interval)) 223 !timespec_valid(&ktmr.it_interval))
224 return -EINVAL; 224 return -EINVAL;
225 225
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 175f9c590b77..f393620890ee 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -689,7 +689,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free)
689} 689}
690 690
691/** 691/**
692 * ubifs_get_free_space - return amount of free space. 692 * ubifs_get_free_space_nolock - return amount of free space.
693 * @c: UBIFS file-system description object 693 * @c: UBIFS file-system description object
694 * 694 *
695 * This function calculates amount of free space to report to user-space. 695 * This function calculates amount of free space to report to user-space.
@@ -704,16 +704,14 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free)
704 * traditional file-systems, because they have way less overhead than UBIFS. 704 * traditional file-systems, because they have way less overhead than UBIFS.
705 * So, to keep users happy, UBIFS tries to take the overhead into account. 705 * So, to keep users happy, UBIFS tries to take the overhead into account.
706 */ 706 */
707long long ubifs_get_free_space(struct ubifs_info *c) 707long long ubifs_get_free_space_nolock(struct ubifs_info *c)
708{ 708{
709 int min_idx_lebs, rsvd_idx_lebs, lebs; 709 int rsvd_idx_lebs, lebs;
710 long long available, outstanding, free; 710 long long available, outstanding, free;
711 711
712 spin_lock(&c->space_lock); 712 ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
713 min_idx_lebs = c->min_idx_lebs;
714 ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c));
715 outstanding = c->budg_data_growth + c->budg_dd_growth; 713 outstanding = c->budg_data_growth + c->budg_dd_growth;
716 available = ubifs_calc_available(c, min_idx_lebs); 714 available = ubifs_calc_available(c, c->min_idx_lebs);
717 715
718 /* 716 /*
719 * When reporting free space to user-space, UBIFS guarantees that it is 717 * When reporting free space to user-space, UBIFS guarantees that it is
@@ -726,15 +724,14 @@ long long ubifs_get_free_space(struct ubifs_info *c)
726 * Note, the calculations below are similar to what we have in 724 * Note, the calculations below are similar to what we have in
727 * 'do_budget_space()', so refer there for comments. 725 * 'do_budget_space()', so refer there for comments.
728 */ 726 */
729 if (min_idx_lebs > c->lst.idx_lebs) 727 if (c->min_idx_lebs > c->lst.idx_lebs)
730 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; 728 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
731 else 729 else
732 rsvd_idx_lebs = 0; 730 rsvd_idx_lebs = 0;
733 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 731 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
734 c->lst.taken_empty_lebs; 732 c->lst.taken_empty_lebs;
735 lebs -= rsvd_idx_lebs; 733 lebs -= rsvd_idx_lebs;
736 available += lebs * (c->dark_wm - c->leb_overhead); 734 available += lebs * (c->dark_wm - c->leb_overhead);
737 spin_unlock(&c->space_lock);
738 735
739 if (available > outstanding) 736 if (available > outstanding)
740 free = ubifs_reported_space(c, available - outstanding); 737 free = ubifs_reported_space(c, available - outstanding);
@@ -742,3 +739,21 @@ long long ubifs_get_free_space(struct ubifs_info *c)
742 free = 0; 739 free = 0;
743 return free; 740 return free;
744} 741}
742
743/**
744 * ubifs_get_free_space - return amount of free space.
745 * @c: UBIFS file-system description object
746 *
747 * This function calculates and retuns amount of free space to report to
748 * user-space.
749 */
750long long ubifs_get_free_space(struct ubifs_info *c)
751{
752 long long free;
753
754 spin_lock(&c->space_lock);
755 free = ubifs_get_free_space_nolock(c);
756 spin_unlock(&c->space_lock);
757
758 return free;
759}
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 792c5a16c182..e975bd82f38b 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -620,9 +620,11 @@ void dbg_dump_budg(struct ubifs_info *c)
620 c->dark_wm, c->dead_wm, c->max_idx_node_sz); 620 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
621 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", 621 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
622 c->gc_lnum, c->ihead_lnum); 622 c->gc_lnum, c->ihead_lnum);
623 for (i = 0; i < c->jhead_cnt; i++) 623 /* If we are in R/O mode, journal heads do not exist */
624 printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", 624 if (c->jheads)
625 c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); 625 for (i = 0; i < c->jhead_cnt; i++)
626 printk(KERN_DEBUG "\tjhead %d\t LEB %d\n",
627 c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum);
626 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { 628 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
627 bud = rb_entry(rb, struct ubifs_bud, rb); 629 bud = rb_entry(rb, struct ubifs_bud, rb);
628 printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); 630 printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum);
@@ -637,10 +639,7 @@ void dbg_dump_budg(struct ubifs_info *c)
637 /* Print budgeting predictions */ 639 /* Print budgeting predictions */
638 available = ubifs_calc_available(c, c->min_idx_lebs); 640 available = ubifs_calc_available(c, c->min_idx_lebs);
639 outstanding = c->budg_data_growth + c->budg_dd_growth; 641 outstanding = c->budg_data_growth + c->budg_dd_growth;
640 if (available > outstanding) 642 free = ubifs_get_free_space_nolock(c);
641 free = ubifs_reported_space(c, available - outstanding);
642 else
643 free = 0;
644 printk(KERN_DEBUG "Budgeting predictions:\n"); 643 printk(KERN_DEBUG "Budgeting predictions:\n");
645 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", 644 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
646 available, outstanding, free); 645 available, outstanding, free);
@@ -861,6 +860,65 @@ void dbg_dump_index(struct ubifs_info *c)
861} 860}
862 861
863/** 862/**
863 * dbg_save_space_info - save information about flash space.
864 * @c: UBIFS file-system description object
865 *
866 * This function saves information about UBIFS free space, dirty space, etc, in
867 * order to check it later.
868 */
869void dbg_save_space_info(struct ubifs_info *c)
870{
871 struct ubifs_debug_info *d = c->dbg;
872
873 ubifs_get_lp_stats(c, &d->saved_lst);
874
875 spin_lock(&c->space_lock);
876 d->saved_free = ubifs_get_free_space_nolock(c);
877 spin_unlock(&c->space_lock);
878}
879
880/**
881 * dbg_check_space_info - check flash space information.
882 * @c: UBIFS file-system description object
883 *
884 * This function compares current flash space information with the information
885 * which was saved when the 'dbg_save_space_info()' function was called.
886 * Returns zero if the information has not changed, and %-EINVAL it it has
887 * changed.
888 */
889int dbg_check_space_info(struct ubifs_info *c)
890{
891 struct ubifs_debug_info *d = c->dbg;
892 struct ubifs_lp_stats lst;
893 long long avail, free;
894
895 spin_lock(&c->space_lock);
896 avail = ubifs_calc_available(c, c->min_idx_lebs);
897 spin_unlock(&c->space_lock);
898 free = ubifs_get_free_space(c);
899
900 if (free != d->saved_free) {
901 ubifs_err("free space changed from %lld to %lld",
902 d->saved_free, free);
903 goto out;
904 }
905
906 return 0;
907
908out:
909 ubifs_msg("saved lprops statistics dump");
910 dbg_dump_lstats(&d->saved_lst);
911 ubifs_get_lp_stats(c, &lst);
912 ubifs_msg("current lprops statistics dump");
913 dbg_dump_lstats(&d->saved_lst);
914 spin_lock(&c->space_lock);
915 dbg_dump_budg(c);
916 spin_unlock(&c->space_lock);
917 dump_stack();
918 return -EINVAL;
919}
920
921/**
864 * dbg_check_synced_i_size - check synchronized inode size. 922 * dbg_check_synced_i_size - check synchronized inode size.
865 * @inode: inode to check 923 * @inode: inode to check
866 * 924 *
@@ -1349,7 +1407,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
1349 * @c: UBIFS file-system description object 1407 * @c: UBIFS file-system description object
1350 * @leaf_cb: called for each leaf node 1408 * @leaf_cb: called for each leaf node
1351 * @znode_cb: called for each indexing node 1409 * @znode_cb: called for each indexing node
1352 * @priv: private date which is passed to callbacks 1410 * @priv: private data which is passed to callbacks
1353 * 1411 *
1354 * This function walks the UBIFS index and calls the @leaf_cb for each leaf 1412 * This function walks the UBIFS index and calls the @leaf_cb for each leaf
1355 * node and @znode_cb for each indexing node. Returns zero in case of success 1413 * node and @znode_cb for each indexing node. Returns zero in case of success
@@ -2409,7 +2467,7 @@ void ubifs_debugging_exit(struct ubifs_info *c)
2409 * Root directory for UBIFS stuff in debugfs. Contains sub-directories which 2467 * Root directory for UBIFS stuff in debugfs. Contains sub-directories which
2410 * contain the stuff specific to particular file-system mounts. 2468 * contain the stuff specific to particular file-system mounts.
2411 */ 2469 */
2412static struct dentry *debugfs_rootdir; 2470static struct dentry *dfs_rootdir;
2413 2471
2414/** 2472/**
2415 * dbg_debugfs_init - initialize debugfs file-system. 2473 * dbg_debugfs_init - initialize debugfs file-system.
@@ -2421,9 +2479,9 @@ static struct dentry *debugfs_rootdir;
2421 */ 2479 */
2422int dbg_debugfs_init(void) 2480int dbg_debugfs_init(void)
2423{ 2481{
2424 debugfs_rootdir = debugfs_create_dir("ubifs", NULL); 2482 dfs_rootdir = debugfs_create_dir("ubifs", NULL);
2425 if (IS_ERR(debugfs_rootdir)) { 2483 if (IS_ERR(dfs_rootdir)) {
2426 int err = PTR_ERR(debugfs_rootdir); 2484 int err = PTR_ERR(dfs_rootdir);
2427 ubifs_err("cannot create \"ubifs\" debugfs directory, " 2485 ubifs_err("cannot create \"ubifs\" debugfs directory, "
2428 "error %d\n", err); 2486 "error %d\n", err);
2429 return err; 2487 return err;
@@ -2437,7 +2495,7 @@ int dbg_debugfs_init(void)
2437 */ 2495 */
2438void dbg_debugfs_exit(void) 2496void dbg_debugfs_exit(void)
2439{ 2497{
2440 debugfs_remove(debugfs_rootdir); 2498 debugfs_remove(dfs_rootdir);
2441} 2499}
2442 2500
2443static int open_debugfs_file(struct inode *inode, struct file *file) 2501static int open_debugfs_file(struct inode *inode, struct file *file)
@@ -2452,13 +2510,13 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
2452 struct ubifs_info *c = file->private_data; 2510 struct ubifs_info *c = file->private_data;
2453 struct ubifs_debug_info *d = c->dbg; 2511 struct ubifs_debug_info *d = c->dbg;
2454 2512
2455 if (file->f_path.dentry == d->dump_lprops) 2513 if (file->f_path.dentry == d->dfs_dump_lprops)
2456 dbg_dump_lprops(c); 2514 dbg_dump_lprops(c);
2457 else if (file->f_path.dentry == d->dump_budg) { 2515 else if (file->f_path.dentry == d->dfs_dump_budg) {
2458 spin_lock(&c->space_lock); 2516 spin_lock(&c->space_lock);
2459 dbg_dump_budg(c); 2517 dbg_dump_budg(c);
2460 spin_unlock(&c->space_lock); 2518 spin_unlock(&c->space_lock);
2461 } else if (file->f_path.dentry == d->dump_tnc) { 2519 } else if (file->f_path.dentry == d->dfs_dump_tnc) {
2462 mutex_lock(&c->tnc_mutex); 2520 mutex_lock(&c->tnc_mutex);
2463 dbg_dump_tnc(c); 2521 dbg_dump_tnc(c);
2464 mutex_unlock(&c->tnc_mutex); 2522 mutex_unlock(&c->tnc_mutex);
@@ -2469,7 +2527,7 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
2469 return count; 2527 return count;
2470} 2528}
2471 2529
2472static const struct file_operations debugfs_fops = { 2530static const struct file_operations dfs_fops = {
2473 .open = open_debugfs_file, 2531 .open = open_debugfs_file,
2474 .write = write_debugfs_file, 2532 .write = write_debugfs_file,
2475 .owner = THIS_MODULE, 2533 .owner = THIS_MODULE,
@@ -2494,36 +2552,32 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
2494 struct dentry *dent; 2552 struct dentry *dent;
2495 struct ubifs_debug_info *d = c->dbg; 2553 struct ubifs_debug_info *d = c->dbg;
2496 2554
2497 sprintf(d->debugfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); 2555 sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
2498 d->debugfs_dir = debugfs_create_dir(d->debugfs_dir_name, 2556 d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir);
2499 debugfs_rootdir); 2557 if (IS_ERR(d->dfs_dir)) {
2500 if (IS_ERR(d->debugfs_dir)) { 2558 err = PTR_ERR(d->dfs_dir);
2501 err = PTR_ERR(d->debugfs_dir);
2502 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", 2559 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2503 d->debugfs_dir_name, err); 2560 d->dfs_dir_name, err);
2504 goto out; 2561 goto out;
2505 } 2562 }
2506 2563
2507 fname = "dump_lprops"; 2564 fname = "dump_lprops";
2508 dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, 2565 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
2509 &debugfs_fops);
2510 if (IS_ERR(dent)) 2566 if (IS_ERR(dent))
2511 goto out_remove; 2567 goto out_remove;
2512 d->dump_lprops = dent; 2568 d->dfs_dump_lprops = dent;
2513 2569
2514 fname = "dump_budg"; 2570 fname = "dump_budg";
2515 dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, 2571 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
2516 &debugfs_fops);
2517 if (IS_ERR(dent)) 2572 if (IS_ERR(dent))
2518 goto out_remove; 2573 goto out_remove;
2519 d->dump_budg = dent; 2574 d->dfs_dump_budg = dent;
2520 2575
2521 fname = "dump_tnc"; 2576 fname = "dump_tnc";
2522 dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, 2577 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
2523 &debugfs_fops);
2524 if (IS_ERR(dent)) 2578 if (IS_ERR(dent))
2525 goto out_remove; 2579 goto out_remove;
2526 d->dump_tnc = dent; 2580 d->dfs_dump_tnc = dent;
2527 2581
2528 return 0; 2582 return 0;
2529 2583
@@ -2531,7 +2585,7 @@ out_remove:
2531 err = PTR_ERR(dent); 2585 err = PTR_ERR(dent);
2532 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", 2586 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2533 fname, err); 2587 fname, err);
2534 debugfs_remove_recursive(d->debugfs_dir); 2588 debugfs_remove_recursive(d->dfs_dir);
2535out: 2589out:
2536 return err; 2590 return err;
2537} 2591}
@@ -2542,7 +2596,7 @@ out:
2542 */ 2596 */
2543void dbg_debugfs_exit_fs(struct ubifs_info *c) 2597void dbg_debugfs_exit_fs(struct ubifs_info *c)
2544{ 2598{
2545 debugfs_remove_recursive(c->dbg->debugfs_dir); 2599 debugfs_remove_recursive(c->dbg->dfs_dir);
2546} 2600}
2547 2601
2548#endif /* CONFIG_UBIFS_FS_DEBUG */ 2602#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 9820d6999f7e..c1cd73b2e06e 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -41,15 +41,17 @@
41 * @chk_lpt_wastage: used by LPT tree size checker 41 * @chk_lpt_wastage: used by LPT tree size checker
42 * @chk_lpt_lebs: used by LPT tree size checker 42 * @chk_lpt_lebs: used by LPT tree size checker
43 * @new_nhead_offs: used by LPT tree size checker 43 * @new_nhead_offs: used by LPT tree size checker
44 * @new_ihead_lnum: used by debugging to check ihead_lnum 44 * @new_ihead_lnum: used by debugging to check @c->ihead_lnum
45 * @new_ihead_offs: used by debugging to check ihead_offs 45 * @new_ihead_offs: used by debugging to check @c->ihead_offs
46 * 46 *
47 * debugfs_dir_name: name of debugfs directory containing this file-system's 47 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
48 * files 48 * @saved_free: saved free space (used by 'dbg_save_space_info()')
49 * debugfs_dir: direntry object of the file-system debugfs directory 49 *
50 * dump_lprops: "dump lprops" debugfs knob 50 * dfs_dir_name: name of debugfs directory containing this file-system's files
51 * dump_budg: "dump budgeting information" debugfs knob 51 * dfs_dir: direntry object of the file-system debugfs directory
52 * dump_tnc: "dump TNC" debugfs knob 52 * dfs_dump_lprops: "dump lprops" debugfs knob
53 * dfs_dump_budg: "dump budgeting information" debugfs knob
54 * dfs_dump_tnc: "dump TNC" debugfs knob
53 */ 55 */
54struct ubifs_debug_info { 56struct ubifs_debug_info {
55 void *buf; 57 void *buf;
@@ -69,11 +71,14 @@ struct ubifs_debug_info {
69 int new_ihead_lnum; 71 int new_ihead_lnum;
70 int new_ihead_offs; 72 int new_ihead_offs;
71 73
72 char debugfs_dir_name[100]; 74 struct ubifs_lp_stats saved_lst;
73 struct dentry *debugfs_dir; 75 long long saved_free;
74 struct dentry *dump_lprops; 76
75 struct dentry *dump_budg; 77 char dfs_dir_name[100];
76 struct dentry *dump_tnc; 78 struct dentry *dfs_dir;
79 struct dentry *dfs_dump_lprops;
80 struct dentry *dfs_dump_budg;
81 struct dentry *dfs_dump_tnc;
77}; 82};
78 83
79#define ubifs_assert(expr) do { \ 84#define ubifs_assert(expr) do { \
@@ -297,7 +302,8 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
297 dbg_znode_callback znode_cb, void *priv); 302 dbg_znode_callback znode_cb, void *priv);
298 303
299/* Checking functions */ 304/* Checking functions */
300 305void dbg_save_space_info(struct ubifs_info *c);
306int dbg_check_space_info(struct ubifs_info *c);
301int dbg_check_lprops(struct ubifs_info *c); 307int dbg_check_lprops(struct ubifs_info *c);
302int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); 308int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
303int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); 309int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
@@ -439,6 +445,8 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
439 445
440#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 446#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
441#define dbg_old_index_check_init(c, zroot) 0 447#define dbg_old_index_check_init(c, zroot) 0
448#define dbg_save_space_info(c) ({})
449#define dbg_check_space_info(c) 0
442#define dbg_check_old_index(c, zroot) 0 450#define dbg_check_old_index(c, zroot) 0
443#define dbg_check_cats(c) 0 451#define dbg_check_cats(c) 0
444#define dbg_check_ltab(c) 0 452#define dbg_check_ltab(c) 0
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index f448ab1f9c38..f55d523c52bb 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -482,30 +482,29 @@ static int ubifs_dir_release(struct inode *dir, struct file *file)
482} 482}
483 483
484/** 484/**
485 * lock_2_inodes - lock two UBIFS inodes. 485 * lock_2_inodes - a wrapper for locking two UBIFS inodes.
486 * @inode1: first inode 486 * @inode1: first inode
487 * @inode2: second inode 487 * @inode2: second inode
488 *
489 * We do not implement any tricks to guarantee strict lock ordering, because
490 * VFS has already done it for us on the @i_mutex. So this is just a simple
491 * wrapper function.
488 */ 492 */
489static void lock_2_inodes(struct inode *inode1, struct inode *inode2) 493static void lock_2_inodes(struct inode *inode1, struct inode *inode2)
490{ 494{
491 if (inode1->i_ino < inode2->i_ino) { 495 mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
492 mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2); 496 mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
493 mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3);
494 } else {
495 mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
496 mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3);
497 }
498} 497}
499 498
500/** 499/**
501 * unlock_2_inodes - unlock two UBIFS inodes inodes. 500 * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes.
502 * @inode1: first inode 501 * @inode1: first inode
503 * @inode2: second inode 502 * @inode2: second inode
504 */ 503 */
505static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) 504static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
506{ 505{
507 mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
508 mutex_unlock(&ubifs_inode(inode2)->ui_mutex); 506 mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
507 mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
509} 508}
510 509
511static int ubifs_link(struct dentry *old_dentry, struct inode *dir, 510static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
@@ -527,6 +526,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
527 dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", 526 dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu",
528 dentry->d_name.len, dentry->d_name.name, inode->i_ino, 527 dentry->d_name.len, dentry->d_name.name, inode->i_ino,
529 inode->i_nlink, dir->i_ino); 528 inode->i_nlink, dir->i_ino);
529 ubifs_assert(mutex_is_locked(&dir->i_mutex));
530 ubifs_assert(mutex_is_locked(&inode->i_mutex));
530 err = dbg_check_synced_i_size(inode); 531 err = dbg_check_synced_i_size(inode);
531 if (err) 532 if (err)
532 return err; 533 return err;
@@ -580,6 +581,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
580 dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", 581 dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu",
581 dentry->d_name.len, dentry->d_name.name, inode->i_ino, 582 dentry->d_name.len, dentry->d_name.name, inode->i_ino,
582 inode->i_nlink, dir->i_ino); 583 inode->i_nlink, dir->i_ino);
584 ubifs_assert(mutex_is_locked(&dir->i_mutex));
585 ubifs_assert(mutex_is_locked(&inode->i_mutex));
583 err = dbg_check_synced_i_size(inode); 586 err = dbg_check_synced_i_size(inode);
584 if (err) 587 if (err)
585 return err; 588 return err;
@@ -667,7 +670,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
667 670
668 dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, 671 dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len,
669 dentry->d_name.name, inode->i_ino, dir->i_ino); 672 dentry->d_name.name, inode->i_ino, dir->i_ino);
670 673 ubifs_assert(mutex_is_locked(&dir->i_mutex));
674 ubifs_assert(mutex_is_locked(&inode->i_mutex));
671 err = check_dir_empty(c, dentry->d_inode); 675 err = check_dir_empty(c, dentry->d_inode);
672 if (err) 676 if (err)
673 return err; 677 return err;
@@ -922,59 +926,30 @@ out_budg:
922} 926}
923 927
924/** 928/**
925 * lock_3_inodes - lock three UBIFS inodes for rename. 929 * lock_3_inodes - a wrapper for locking three UBIFS inodes.
926 * @inode1: first inode 930 * @inode1: first inode
927 * @inode2: second inode 931 * @inode2: second inode
928 * @inode3: third inode 932 * @inode3: third inode
929 * 933 *
930 * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may 934 * This function is used for 'ubifs_rename()' and @inode1 may be the same as
931 * be null. 935 * @inode2 whereas @inode3 may be %NULL.
936 *
937 * We do not implement any tricks to guarantee strict lock ordering, because
938 * VFS has already done it for us on the @i_mutex. So this is just a simple
939 * wrapper function.
932 */ 940 */
933static void lock_3_inodes(struct inode *inode1, struct inode *inode2, 941static void lock_3_inodes(struct inode *inode1, struct inode *inode2,
934 struct inode *inode3) 942 struct inode *inode3)
935{ 943{
936 struct inode *i1, *i2, *i3; 944 mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
937 945 if (inode2 != inode1)
938 if (!inode3) { 946 mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
939 if (inode1 != inode2) { 947 if (inode3)
940 lock_2_inodes(inode1, inode2); 948 mutex_lock_nested(&ubifs_inode(inode3)->ui_mutex, WB_MUTEX_3);
941 return;
942 }
943 mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
944 return;
945 }
946
947 if (inode1 == inode2) {
948 lock_2_inodes(inode1, inode3);
949 return;
950 }
951
952 /* 3 different inodes */
953 if (inode1 < inode2) {
954 i3 = inode2;
955 if (inode1 < inode3) {
956 i1 = inode1;
957 i2 = inode3;
958 } else {
959 i1 = inode3;
960 i2 = inode1;
961 }
962 } else {
963 i3 = inode1;
964 if (inode2 < inode3) {
965 i1 = inode2;
966 i2 = inode3;
967 } else {
968 i1 = inode3;
969 i2 = inode2;
970 }
971 }
972 mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1);
973 lock_2_inodes(i2, i3);
974} 949}
975 950
976/** 951/**
977 * unlock_3_inodes - unlock three UBIFS inodes for rename. 952 * unlock_3_inodes - a wrapper for unlocking three UBIFS inodes for rename.
978 * @inode1: first inode 953 * @inode1: first inode
979 * @inode2: second inode 954 * @inode2: second inode
980 * @inode3: third inode 955 * @inode3: third inode
@@ -982,11 +957,11 @@ static void lock_3_inodes(struct inode *inode1, struct inode *inode2,
982static void unlock_3_inodes(struct inode *inode1, struct inode *inode2, 957static void unlock_3_inodes(struct inode *inode1, struct inode *inode2,
983 struct inode *inode3) 958 struct inode *inode3)
984{ 959{
985 mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
986 if (inode1 != inode2)
987 mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
988 if (inode3) 960 if (inode3)
989 mutex_unlock(&ubifs_inode(inode3)->ui_mutex); 961 mutex_unlock(&ubifs_inode(inode3)->ui_mutex);
962 if (inode1 != inode2)
963 mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
964 mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
990} 965}
991 966
992static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, 967static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
@@ -1020,6 +995,11 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
1020 "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, 995 "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name,
1021 old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, 996 old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
1022 new_dentry->d_name.name, new_dir->i_ino); 997 new_dentry->d_name.name, new_dir->i_ino);
998 ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
999 ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
1000 if (unlink)
1001 ubifs_assert(mutex_is_locked(&new_inode->i_mutex));
1002
1023 1003
1024 if (unlink && is_dir) { 1004 if (unlink && is_dir) {
1025 err = check_dir_empty(c, new_inode); 1005 err = check_dir_empty(c, new_inode);
@@ -1199,7 +1179,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1199 return 0; 1179 return 0;
1200} 1180}
1201 1181
1202struct inode_operations ubifs_dir_inode_operations = { 1182const struct inode_operations ubifs_dir_inode_operations = {
1203 .lookup = ubifs_lookup, 1183 .lookup = ubifs_lookup,
1204 .create = ubifs_create, 1184 .create = ubifs_create,
1205 .link = ubifs_link, 1185 .link = ubifs_link,
@@ -1219,7 +1199,7 @@ struct inode_operations ubifs_dir_inode_operations = {
1219#endif 1199#endif
1220}; 1200};
1221 1201
1222struct file_operations ubifs_dir_operations = { 1202const struct file_operations ubifs_dir_operations = {
1223 .llseek = ubifs_dir_llseek, 1203 .llseek = ubifs_dir_llseek,
1224 .release = ubifs_dir_release, 1204 .release = ubifs_dir_release,
1225 .read = generic_read_dir, 1205 .read = generic_read_dir,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index bf37374567fa..93b6de51f261 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -432,7 +432,6 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
432 int uninitialized_var(err), appending = !!(pos + len > inode->i_size); 432 int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
433 struct page *page; 433 struct page *page;
434 434
435
436 ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); 435 ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
437 436
438 if (unlikely(c->ro_media)) 437 if (unlikely(c->ro_media))
@@ -1541,7 +1540,7 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1541 return 0; 1540 return 0;
1542} 1541}
1543 1542
1544struct address_space_operations ubifs_file_address_operations = { 1543const struct address_space_operations ubifs_file_address_operations = {
1545 .readpage = ubifs_readpage, 1544 .readpage = ubifs_readpage,
1546 .writepage = ubifs_writepage, 1545 .writepage = ubifs_writepage,
1547 .write_begin = ubifs_write_begin, 1546 .write_begin = ubifs_write_begin,
@@ -1551,7 +1550,7 @@ struct address_space_operations ubifs_file_address_operations = {
1551 .releasepage = ubifs_releasepage, 1550 .releasepage = ubifs_releasepage,
1552}; 1551};
1553 1552
1554struct inode_operations ubifs_file_inode_operations = { 1553const struct inode_operations ubifs_file_inode_operations = {
1555 .setattr = ubifs_setattr, 1554 .setattr = ubifs_setattr,
1556 .getattr = ubifs_getattr, 1555 .getattr = ubifs_getattr,
1557#ifdef CONFIG_UBIFS_FS_XATTR 1556#ifdef CONFIG_UBIFS_FS_XATTR
@@ -1562,14 +1561,14 @@ struct inode_operations ubifs_file_inode_operations = {
1562#endif 1561#endif
1563}; 1562};
1564 1563
1565struct inode_operations ubifs_symlink_inode_operations = { 1564const struct inode_operations ubifs_symlink_inode_operations = {
1566 .readlink = generic_readlink, 1565 .readlink = generic_readlink,
1567 .follow_link = ubifs_follow_link, 1566 .follow_link = ubifs_follow_link,
1568 .setattr = ubifs_setattr, 1567 .setattr = ubifs_setattr,
1569 .getattr = ubifs_getattr, 1568 .getattr = ubifs_getattr,
1570}; 1569};
1571 1570
1572struct file_operations ubifs_file_operations = { 1571const struct file_operations ubifs_file_operations = {
1573 .llseek = generic_file_llseek, 1572 .llseek = generic_file_llseek,
1574 .read = do_sync_read, 1573 .read = do_sync_read,
1575 .write = do_sync_write, 1574 .write = do_sync_write,
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 9832f9abe28e..a711d33b3d3e 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -31,6 +31,26 @@
31 * to be reused. Garbage collection will cause the number of dirty index nodes 31 * to be reused. Garbage collection will cause the number of dirty index nodes
32 * to grow, however sufficient space is reserved for the index to ensure the 32 * to grow, however sufficient space is reserved for the index to ensure the
33 * commit will never run out of space. 33 * commit will never run out of space.
34 *
35 * Notes about dead watermark. At current UBIFS implementation we assume that
36 * LEBs which have less than @c->dead_wm bytes of free + dirty space are full
37 * and not worth garbage-collecting. The dead watermark is one min. I/O unit
38 * size, or min. UBIFS node size, depending on what is greater. Indeed, UBIFS
39 * Garbage Collector has to synchronize the GC head's write buffer before
40 * returning, so this is about wasting one min. I/O unit. However, UBIFS GC can
41 * actually reclaim even very small pieces of dirty space by garbage collecting
42 * enough dirty LEBs, but we do not bother doing this at this implementation.
43 *
44 * Notes about dark watermark. The results of GC work depends on how big are
45 * the UBIFS nodes GC deals with. Large nodes make GC waste more space. Indeed,
46 * if GC move data from LEB A to LEB B and nodes in LEB A are large, GC would
47 * have to waste large pieces of free space at the end of LEB B, because nodes
48 * from LEB A would not fit. And the worst situation is when all nodes are of
49 * maximum size. So dark watermark is the amount of free + dirty space in LEB
50 * which are guaranteed to be reclaimable. If LEB has less space, the GC migh
51 * be unable to reclaim it. So, LEBs with free + dirty greater than dark
52 * watermark are "good" LEBs from GC's point of few. The other LEBs are not so
53 * good, and GC takes extra care when moving them.
34 */ 54 */
35 55
36#include <linux/pagemap.h> 56#include <linux/pagemap.h>
@@ -381,7 +401,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
381 401
382 /* 402 /*
383 * Don't release the LEB until after the next commit, because 403 * Don't release the LEB until after the next commit, because
384 * it may contain date which is needed for recovery. So 404 * it may contain data which is needed for recovery. So
385 * although we freed this LEB, it will become usable only after 405 * although we freed this LEB, it will become usable only after
386 * the commit. 406 * the commit.
387 */ 407 */
@@ -810,8 +830,9 @@ out:
810 * ubifs_destroy_idx_gc - destroy idx_gc list. 830 * ubifs_destroy_idx_gc - destroy idx_gc list.
811 * @c: UBIFS file-system description object 831 * @c: UBIFS file-system description object
812 * 832 *
813 * This function destroys the idx_gc list. It is called when unmounting or 833 * This function destroys the @c->idx_gc list. It is called when unmounting
814 * remounting read-only so locks are not needed. 834 * so locks are not needed. Returns zero in case of success and a negative
835 * error code in case of failure.
815 */ 836 */
816void ubifs_destroy_idx_gc(struct ubifs_info *c) 837void ubifs_destroy_idx_gc(struct ubifs_info *c)
817{ 838{
@@ -824,7 +845,6 @@ void ubifs_destroy_idx_gc(struct ubifs_info *c)
824 list_del(&idx_gc->list); 845 list_del(&idx_gc->list);
825 kfree(idx_gc); 846 kfree(idx_gc);
826 } 847 }
827
828} 848}
829 849
830/** 850/**
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 01682713af69..e8e632a1dcdf 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -29,7 +29,7 @@
29 * would have been wasted for padding to the nearest minimal I/O unit boundary. 29 * would have been wasted for padding to the nearest minimal I/O unit boundary.
30 * Instead, data first goes to the write-buffer and is flushed when the 30 * Instead, data first goes to the write-buffer and is flushed when the
31 * buffer is full or when it is not used for some time (by timer). This is 31 * buffer is full or when it is not used for some time (by timer). This is
32 * similarto the mechanism is used by JFFS2. 32 * similar to the mechanism is used by JFFS2.
33 * 33 *
34 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by 34 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
35 * mutexes defined inside these objects. Since sometimes upper-level code 35 * mutexes defined inside these objects. Since sometimes upper-level code
@@ -75,7 +75,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
75 * @lnum: logical eraseblock number 75 * @lnum: logical eraseblock number
76 * @offs: offset within the logical eraseblock 76 * @offs: offset within the logical eraseblock
77 * @quiet: print no messages 77 * @quiet: print no messages
78 * @chk_crc: indicates whether to always check the CRC 78 * @must_chk_crc: indicates whether to always check the CRC
79 * 79 *
80 * This function checks node magic number and CRC checksum. This function also 80 * This function checks node magic number and CRC checksum. This function also
81 * validates node length to prevent UBIFS from becoming crazy when an attacker 81 * validates node length to prevent UBIFS from becoming crazy when an attacker
@@ -83,11 +83,17 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
83 * node length in the common header could cause UBIFS to read memory outside of 83 * node length in the common header could cause UBIFS to read memory outside of
84 * allocated buffer when checking the CRC checksum. 84 * allocated buffer when checking the CRC checksum.
85 * 85 *
86 * This function returns zero in case of success %-EUCLEAN in case of bad CRC 86 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
87 * or magic. 87 * true, which is controlled by corresponding UBIFS mount option. However, if
88 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
89 * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is
90 * ignored and CRC is checked.
91 *
92 * This function returns zero in case of success and %-EUCLEAN in case of bad
93 * CRC or magic.
88 */ 94 */
89int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, 95int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
90 int offs, int quiet, int chk_crc) 96 int offs, int quiet, int must_chk_crc)
91{ 97{
92 int err = -EINVAL, type, node_len; 98 int err = -EINVAL, type, node_len;
93 uint32_t crc, node_crc, magic; 99 uint32_t crc, node_crc, magic;
@@ -123,9 +129,9 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
123 node_len > c->ranges[type].max_len) 129 node_len > c->ranges[type].max_len)
124 goto out_len; 130 goto out_len;
125 131
126 if (!chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc) 132 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc &&
127 if (c->no_chk_data_crc) 133 c->no_chk_data_crc)
128 return 0; 134 return 0;
129 135
130 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 136 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
131 node_crc = le32_to_cpu(ch->crc); 137 node_crc = le32_to_cpu(ch->crc);
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 9b7c54e0cd2a..a11ca0958a23 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -208,7 +208,7 @@ again:
208 offs = 0; 208 offs = 0;
209 209
210out: 210out:
211 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM); 211 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype);
212 if (err) 212 if (err)
213 goto out_unlock; 213 goto out_unlock;
214 214
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index dfd2bcece27a..4cdd284dea56 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -635,10 +635,10 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
635 * @c: UBIFS file-system description object 635 * @c: UBIFS file-system description object
636 * @st: return statistics 636 * @st: return statistics
637 */ 637 */
638void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st) 638void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst)
639{ 639{
640 spin_lock(&c->space_lock); 640 spin_lock(&c->space_lock);
641 memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats)); 641 memcpy(lst, &c->lst, sizeof(struct ubifs_lp_stats));
642 spin_unlock(&c->space_lock); 642 spin_unlock(&c->space_lock);
643} 643}
644 644
@@ -678,6 +678,9 @@ int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
678 678
679out: 679out:
680 ubifs_release_lprops(c); 680 ubifs_release_lprops(c);
681 if (err)
682 ubifs_err("cannot change properties of LEB %d, error %d",
683 lnum, err);
681 return err; 684 return err;
682} 685}
683 686
@@ -714,6 +717,9 @@ int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
714 717
715out: 718out:
716 ubifs_release_lprops(c); 719 ubifs_release_lprops(c);
720 if (err)
721 ubifs_err("cannot update properties of LEB %d, error %d",
722 lnum, err);
717 return err; 723 return err;
718} 724}
719 725
@@ -737,6 +743,8 @@ int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp)
737 lpp = ubifs_lpt_lookup(c, lnum); 743 lpp = ubifs_lpt_lookup(c, lnum);
738 if (IS_ERR(lpp)) { 744 if (IS_ERR(lpp)) {
739 err = PTR_ERR(lpp); 745 err = PTR_ERR(lpp);
746 ubifs_err("cannot read properties of LEB %d, error %d",
747 lnum, err);
740 goto out; 748 goto out;
741 } 749 }
742 750
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 96ca95707175..3216a1f277f8 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -556,23 +556,23 @@ no_space:
556} 556}
557 557
558/** 558/**
559 * next_pnode - find next pnode. 559 * next_pnode_to_dirty - find next pnode to dirty.
560 * @c: UBIFS file-system description object 560 * @c: UBIFS file-system description object
561 * @pnode: pnode 561 * @pnode: pnode
562 * 562 *
563 * This function returns the next pnode or %NULL if there are no more pnodes. 563 * This function returns the next pnode to dirty or %NULL if there are no more
564 * pnodes. Note that pnodes that have never been written (lnum == 0) are
565 * skipped.
564 */ 566 */
565static struct ubifs_pnode *next_pnode(struct ubifs_info *c, 567static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
566 struct ubifs_pnode *pnode) 568 struct ubifs_pnode *pnode)
567{ 569{
568 struct ubifs_nnode *nnode; 570 struct ubifs_nnode *nnode;
569 int iip; 571 int iip;
570 572
571 /* Try to go right */ 573 /* Try to go right */
572 nnode = pnode->parent; 574 nnode = pnode->parent;
573 iip = pnode->iip + 1; 575 for (iip = pnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) {
574 if (iip < UBIFS_LPT_FANOUT) {
575 /* We assume here that LEB zero is never an LPT LEB */
576 if (nnode->nbranch[iip].lnum) 576 if (nnode->nbranch[iip].lnum)
577 return ubifs_get_pnode(c, nnode, iip); 577 return ubifs_get_pnode(c, nnode, iip);
578 } 578 }
@@ -583,8 +583,11 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
583 nnode = nnode->parent; 583 nnode = nnode->parent;
584 if (!nnode) 584 if (!nnode)
585 return NULL; 585 return NULL;
586 /* We assume here that LEB zero is never an LPT LEB */ 586 for (; iip < UBIFS_LPT_FANOUT; iip++) {
587 } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum); 587 if (nnode->nbranch[iip].lnum)
588 break;
589 }
590 } while (iip >= UBIFS_LPT_FANOUT);
588 591
589 /* Go right */ 592 /* Go right */
590 nnode = ubifs_get_nnode(c, nnode, iip); 593 nnode = ubifs_get_nnode(c, nnode, iip);
@@ -593,12 +596,29 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
593 596
594 /* Go down to level 1 */ 597 /* Go down to level 1 */
595 while (nnode->level > 1) { 598 while (nnode->level > 1) {
596 nnode = ubifs_get_nnode(c, nnode, 0); 599 for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) {
600 if (nnode->nbranch[iip].lnum)
601 break;
602 }
603 if (iip >= UBIFS_LPT_FANOUT) {
604 /*
605 * Should not happen, but we need to keep going
606 * if it does.
607 */
608 iip = 0;
609 }
610 nnode = ubifs_get_nnode(c, nnode, iip);
597 if (IS_ERR(nnode)) 611 if (IS_ERR(nnode))
598 return (void *)nnode; 612 return (void *)nnode;
599 } 613 }
600 614
601 return ubifs_get_pnode(c, nnode, 0); 615 for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++)
616 if (nnode->nbranch[iip].lnum)
617 break;
618 if (iip >= UBIFS_LPT_FANOUT)
619 /* Should not happen, but we need to keep going if it does */
620 iip = 0;
621 return ubifs_get_pnode(c, nnode, iip);
602} 622}
603 623
604/** 624/**
@@ -688,7 +708,7 @@ static int make_tree_dirty(struct ubifs_info *c)
688 pnode = pnode_lookup(c, 0); 708 pnode = pnode_lookup(c, 0);
689 while (pnode) { 709 while (pnode) {
690 do_make_pnode_dirty(c, pnode); 710 do_make_pnode_dirty(c, pnode);
691 pnode = next_pnode(c, pnode); 711 pnode = next_pnode_to_dirty(c, pnode);
692 if (IS_ERR(pnode)) 712 if (IS_ERR(pnode))
693 return PTR_ERR(pnode); 713 return PTR_ERR(pnode);
694 } 714 }
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 71d5493bf565..a88f33801b98 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -354,7 +354,7 @@ int ubifs_write_master(struct ubifs_info *c)
354 int err, lnum, offs, len; 354 int err, lnum, offs, len;
355 355
356 if (c->ro_media) 356 if (c->ro_media)
357 return -EINVAL; 357 return -EROFS;
358 358
359 lnum = UBIFS_MST_LNUM; 359 lnum = UBIFS_MST_LNUM;
360 offs = c->mst_offs + c->mst_node_alsz; 360 offs = c->mst_offs + c->mst_node_alsz;
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 9e6f403f170e..152a7b34a141 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -46,7 +46,7 @@
46 * Orphans are accumulated in a rb-tree. When an inode's link count drops to 46 * Orphans are accumulated in a rb-tree. When an inode's link count drops to
47 * zero, the inode number is added to the rb-tree. It is removed from the tree 47 * zero, the inode number is added to the rb-tree. It is removed from the tree
48 * when the inode is deleted. Any new orphans that are in the orphan tree when 48 * when the inode is deleted. Any new orphans that are in the orphan tree when
49 * the commit is run, are written to the orphan area in 1 or more orph nodes. 49 * the commit is run, are written to the orphan area in 1 or more orphan nodes.
50 * If the orphan area is full, it is consolidated to make space. There is 50 * If the orphan area is full, it is consolidated to make space. There is
51 * always enough space because validation prevents the user from creating more 51 * always enough space because validation prevents the user from creating more
52 * than the maximum number of orphans allowed. 52 * than the maximum number of orphans allowed.
@@ -231,7 +231,7 @@ static int tot_avail_orphs(struct ubifs_info *c)
231} 231}
232 232
233/** 233/**
234 * do_write_orph_node - write a node 234 * do_write_orph_node - write a node to the orphan head.
235 * @c: UBIFS file-system description object 235 * @c: UBIFS file-system description object
236 * @len: length of node 236 * @len: length of node
237 * @atomic: write atomically 237 * @atomic: write atomically
@@ -264,11 +264,11 @@ static int do_write_orph_node(struct ubifs_info *c, int len, int atomic)
264} 264}
265 265
266/** 266/**
267 * write_orph_node - write an orph node 267 * write_orph_node - write an orphan node.
268 * @c: UBIFS file-system description object 268 * @c: UBIFS file-system description object
269 * @atomic: write atomically 269 * @atomic: write atomically
270 * 270 *
271 * This function builds an orph node from the cnext list and writes it to the 271 * This function builds an orphan node from the cnext list and writes it to the
272 * orphan head. On success, %0 is returned, otherwise a negative error code 272 * orphan head. On success, %0 is returned, otherwise a negative error code
273 * is returned. 273 * is returned.
274 */ 274 */
@@ -326,11 +326,11 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
326} 326}
327 327
328/** 328/**
329 * write_orph_nodes - write orph nodes until there are no more to commit 329 * write_orph_nodes - write orphan nodes until there are no more to commit.
330 * @c: UBIFS file-system description object 330 * @c: UBIFS file-system description object
331 * @atomic: write atomically 331 * @atomic: write atomically
332 * 332 *
333 * This function writes orph nodes for all the orphans to commit. On success, 333 * This function writes orphan nodes for all the orphans to commit. On success,
334 * %0 is returned, otherwise a negative error code is returned. 334 * %0 is returned, otherwise a negative error code is returned.
335 */ 335 */
336static int write_orph_nodes(struct ubifs_info *c, int atomic) 336static int write_orph_nodes(struct ubifs_info *c, int atomic)
@@ -478,14 +478,14 @@ int ubifs_orphan_end_commit(struct ubifs_info *c)
478} 478}
479 479
480/** 480/**
481 * clear_orphans - erase all LEBs used for orphans. 481 * ubifs_clear_orphans - erase all LEBs used for orphans.
482 * @c: UBIFS file-system description object 482 * @c: UBIFS file-system description object
483 * 483 *
484 * If recovery is not required, then the orphans from the previous session 484 * If recovery is not required, then the orphans from the previous session
485 * are not needed. This function locates the LEBs used to record 485 * are not needed. This function locates the LEBs used to record
486 * orphans, and un-maps them. 486 * orphans, and un-maps them.
487 */ 487 */
488static int clear_orphans(struct ubifs_info *c) 488int ubifs_clear_orphans(struct ubifs_info *c)
489{ 489{
490 int lnum, err; 490 int lnum, err;
491 491
@@ -547,9 +547,9 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
547 * do_kill_orphans - remove orphan inodes from the index. 547 * do_kill_orphans - remove orphan inodes from the index.
548 * @c: UBIFS file-system description object 548 * @c: UBIFS file-system description object
549 * @sleb: scanned LEB 549 * @sleb: scanned LEB
550 * @last_cmt_no: cmt_no of last orph node read is passed and returned here 550 * @last_cmt_no: cmt_no of last orphan node read is passed and returned here
551 * @outofdate: whether the LEB is out of date is returned here 551 * @outofdate: whether the LEB is out of date is returned here
552 * @last_flagged: whether the end orph node is encountered 552 * @last_flagged: whether the end orphan node is encountered
553 * 553 *
554 * This function is a helper to the 'kill_orphans()' function. It goes through 554 * This function is a helper to the 'kill_orphans()' function. It goes through
555 * every orphan node in a LEB and for every inode number recorded, removes 555 * every orphan node in a LEB and for every inode number recorded, removes
@@ -580,8 +580,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
580 /* 580 /*
581 * The commit number on the master node may be less, because 581 * The commit number on the master node may be less, because
582 * of a failed commit. If there are several failed commits in a 582 * of a failed commit. If there are several failed commits in a
583 * row, the commit number written on orph nodes will continue to 583 * row, the commit number written on orphan nodes will continue
584 * increase (because the commit number is adjusted here) even 584 * to increase (because the commit number is adjusted here) even
585 * though the commit number on the master node stays the same 585 * though the commit number on the master node stays the same
586 * because the master node has not been re-written. 586 * because the master node has not been re-written.
587 */ 587 */
@@ -589,9 +589,9 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
589 c->cmt_no = cmt_no; 589 c->cmt_no = cmt_no;
590 if (cmt_no < *last_cmt_no && *last_flagged) { 590 if (cmt_no < *last_cmt_no && *last_flagged) {
591 /* 591 /*
592 * The last orph node had a higher commit number and was 592 * The last orphan node had a higher commit number and
593 * flagged as the last written for that commit number. 593 * was flagged as the last written for that commit
594 * That makes this orph node, out of date. 594 * number. That makes this orphan node, out of date.
595 */ 595 */
596 if (!first) { 596 if (!first) {
597 ubifs_err("out of order commit number %llu in " 597 ubifs_err("out of order commit number %llu in "
@@ -658,10 +658,10 @@ static int kill_orphans(struct ubifs_info *c)
658 /* 658 /*
659 * Orph nodes always start at c->orph_first and are written to each 659 * Orph nodes always start at c->orph_first and are written to each
660 * successive LEB in turn. Generally unused LEBs will have been unmapped 660 * successive LEB in turn. Generally unused LEBs will have been unmapped
661 * but may contain out of date orph nodes if the unmap didn't go 661 * but may contain out of date orphan nodes if the unmap didn't go
662 * through. In addition, the last orph node written for each commit is 662 * through. In addition, the last orphan node written for each commit is
663 * marked (top bit of orph->cmt_no is set to 1). It is possible that 663 * marked (top bit of orph->cmt_no is set to 1). It is possible that
664 * there are orph nodes from the next commit (i.e. the commit did not 664 * there are orphan nodes from the next commit (i.e. the commit did not
665 * complete successfully). In that case, no orphans will have been lost 665 * complete successfully). In that case, no orphans will have been lost
666 * due to the way that orphans are written, and any orphans added will 666 * due to the way that orphans are written, and any orphans added will
667 * be valid orphans anyway and so can be deleted. 667 * be valid orphans anyway and so can be deleted.
@@ -718,7 +718,7 @@ int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only)
718 if (unclean) 718 if (unclean)
719 err = kill_orphans(c); 719 err = kill_orphans(c);
720 else if (!read_only) 720 else if (!read_only)
721 err = clear_orphans(c); 721 err = ubifs_clear_orphans(c);
722 722
723 return err; 723 return err;
724} 724}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 89556ee72518..1182b66a5491 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -397,6 +397,7 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
397 buf->f_namelen = UBIFS_MAX_NLEN; 397 buf->f_namelen = UBIFS_MAX_NLEN;
398 buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); 398 buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]);
399 buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); 399 buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]);
400 ubifs_assert(buf->f_bfree <= c->block_cnt);
400 return 0; 401 return 0;
401} 402}
402 403
@@ -432,33 +433,24 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
432 int i, err; 433 int i, err;
433 struct ubifs_info *c = sb->s_fs_info; 434 struct ubifs_info *c = sb->s_fs_info;
434 struct writeback_control wbc = { 435 struct writeback_control wbc = {
435 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, 436 .sync_mode = WB_SYNC_ALL,
436 .range_start = 0, 437 .range_start = 0,
437 .range_end = LLONG_MAX, 438 .range_end = LLONG_MAX,
438 .nr_to_write = LONG_MAX, 439 .nr_to_write = LONG_MAX,
439 }; 440 };
440 441
441 /* 442 /*
442 * Note by akpm about WB_SYNC_NONE used above: zero @wait is just an 443 * Zero @wait is just an advisory thing to help the file system shove
443 * advisory thing to help the file system shove lots of data into the 444 * lots of data into the queues, and there will be the second
444 * queues. If some gets missed then it'll be picked up on the second
445 * '->sync_fs()' call, with non-zero @wait. 445 * '->sync_fs()' call, with non-zero @wait.
446 */ 446 */
447 if (!wait)
448 return 0;
447 449
448 if (sb->s_flags & MS_RDONLY) 450 if (sb->s_flags & MS_RDONLY)
449 return 0; 451 return 0;
450 452
451 /* 453 /*
452 * Synchronize write buffers, because 'ubifs_run_commit()' does not
453 * do this if it waits for an already running commit.
454 */
455 for (i = 0; i < c->jhead_cnt; i++) {
456 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
457 if (err)
458 return err;
459 }
460
461 /*
462 * VFS calls '->sync_fs()' before synchronizing all dirty inodes and 454 * VFS calls '->sync_fs()' before synchronizing all dirty inodes and
463 * pages, so synchronize them first, then commit the journal. Strictly 455 * pages, so synchronize them first, then commit the journal. Strictly
464 * speaking, it is not necessary to commit the journal here, 456 * speaking, it is not necessary to commit the journal here,
@@ -469,6 +461,16 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
469 */ 461 */
470 generic_sync_sb_inodes(sb, &wbc); 462 generic_sync_sb_inodes(sb, &wbc);
471 463
464 /*
465 * Synchronize write buffers, because 'ubifs_run_commit()' does not
466 * do this if it waits for an already running commit.
467 */
468 for (i = 0; i < c->jhead_cnt; i++) {
469 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
470 if (err)
471 return err;
472 }
473
472 err = ubifs_run_commit(c); 474 err = ubifs_run_commit(c);
473 if (err) 475 if (err)
474 return err; 476 return err;
@@ -572,15 +574,8 @@ static int init_constants_early(struct ubifs_info *c)
572 c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX; 574 c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX;
573 575
574 /* 576 /*
575 * Initialize dead and dark LEB space watermarks. 577 * Initialize dead and dark LEB space watermarks. See gc.c for comments
576 * 578 * about these values.
577 * Dead space is the space which cannot be used. Its watermark is
578 * equivalent to min. I/O unit or minimum node size if it is greater
579 * then min. I/O unit.
580 *
581 * Dark space is the space which might be used, or might not, depending
582 * on which node should be written to the LEB. Its watermark is
583 * equivalent to maximum UBIFS node size.
584 */ 579 */
585 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); 580 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
586 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); 581 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
@@ -741,12 +736,12 @@ static void init_constants_master(struct ubifs_info *c)
741 * take_gc_lnum - reserve GC LEB. 736 * take_gc_lnum - reserve GC LEB.
742 * @c: UBIFS file-system description object 737 * @c: UBIFS file-system description object
743 * 738 *
744 * This function ensures that the LEB reserved for garbage collection is 739 * This function ensures that the LEB reserved for garbage collection is marked
745 * unmapped and is marked as "taken" in lprops. We also have to set free space 740 * as "taken" in lprops. We also have to set free space to LEB size and dirty
746 * to LEB size and dirty space to zero, because lprops may contain out-of-date 741 * space to zero, because lprops may contain out-of-date information if the
747 * information if the file-system was un-mounted before it has been committed. 742 * file-system was un-mounted before it has been committed. This function
748 * This function returns zero in case of success and a negative error code in 743 * returns zero in case of success and a negative error code in case of
749 * case of failure. 744 * failure.
750 */ 745 */
751static int take_gc_lnum(struct ubifs_info *c) 746static int take_gc_lnum(struct ubifs_info *c)
752{ 747{
@@ -757,10 +752,6 @@ static int take_gc_lnum(struct ubifs_info *c)
757 return -EINVAL; 752 return -EINVAL;
758 } 753 }
759 754
760 err = ubifs_leb_unmap(c, c->gc_lnum);
761 if (err)
762 return err;
763
764 /* And we have to tell lprops that this LEB is taken */ 755 /* And we have to tell lprops that this LEB is taken */
765 err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0, 756 err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0,
766 LPROPS_TAKEN, 0, 0); 757 LPROPS_TAKEN, 0, 0);
@@ -966,13 +957,16 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
966 957
967 token = match_token(p, tokens, args); 958 token = match_token(p, tokens, args);
968 switch (token) { 959 switch (token) {
960 /*
961 * %Opt_fast_unmount and %Opt_norm_unmount options are ignored.
962 * We accepte them in order to be backware-compatible. But this
963 * should be removed at some point.
964 */
969 case Opt_fast_unmount: 965 case Opt_fast_unmount:
970 c->mount_opts.unmount_mode = 2; 966 c->mount_opts.unmount_mode = 2;
971 c->fast_unmount = 1;
972 break; 967 break;
973 case Opt_norm_unmount: 968 case Opt_norm_unmount:
974 c->mount_opts.unmount_mode = 1; 969 c->mount_opts.unmount_mode = 1;
975 c->fast_unmount = 0;
976 break; 970 break;
977 case Opt_bulk_read: 971 case Opt_bulk_read:
978 c->mount_opts.bulk_read = 2; 972 c->mount_opts.bulk_read = 2;
@@ -1094,12 +1088,7 @@ static int check_free_space(struct ubifs_info *c)
1094 ubifs_err("insufficient free space to mount in read/write mode"); 1088 ubifs_err("insufficient free space to mount in read/write mode");
1095 dbg_dump_budg(c); 1089 dbg_dump_budg(c);
1096 dbg_dump_lprops(c); 1090 dbg_dump_lprops(c);
1097 /* 1091 return -ENOSPC;
1098 * We return %-EINVAL instead of %-ENOSPC because it seems to
1099 * be the closest error code mentioned in the mount function
1100 * documentation.
1101 */
1102 return -EINVAL;
1103 } 1092 }
1104 return 0; 1093 return 0;
1105} 1094}
@@ -1286,10 +1275,19 @@ static int mount_ubifs(struct ubifs_info *c)
1286 if (err) 1275 if (err)
1287 goto out_orphans; 1276 goto out_orphans;
1288 err = ubifs_rcvry_gc_commit(c); 1277 err = ubifs_rcvry_gc_commit(c);
1289 } else 1278 } else {
1290 err = take_gc_lnum(c); 1279 err = take_gc_lnum(c);
1291 if (err) 1280 if (err)
1292 goto out_orphans; 1281 goto out_orphans;
1282
1283 /*
1284 * GC LEB may contain garbage if there was an unclean
1285 * reboot, and it should be un-mapped.
1286 */
1287 err = ubifs_leb_unmap(c, c->gc_lnum);
1288 if (err)
1289 return err;
1290 }
1293 1291
1294 err = dbg_check_lprops(c); 1292 err = dbg_check_lprops(c);
1295 if (err) 1293 if (err)
@@ -1298,6 +1296,16 @@ static int mount_ubifs(struct ubifs_info *c)
1298 err = ubifs_recover_size(c); 1296 err = ubifs_recover_size(c);
1299 if (err) 1297 if (err)
1300 goto out_orphans; 1298 goto out_orphans;
1299 } else {
1300 /*
1301 * Even if we mount read-only, we have to set space in GC LEB
1302 * to proper value because this affects UBIFS free space
1303 * reporting. We do not want to have a situation when
1304 * re-mounting from R/O to R/W changes amount of free space.
1305 */
1306 err = take_gc_lnum(c);
1307 if (err)
1308 goto out_orphans;
1301 } 1309 }
1302 1310
1303 spin_lock(&ubifs_infos_lock); 1311 spin_lock(&ubifs_infos_lock);
@@ -1310,14 +1318,17 @@ static int mount_ubifs(struct ubifs_info *c)
1310 else { 1318 else {
1311 c->need_recovery = 0; 1319 c->need_recovery = 0;
1312 ubifs_msg("recovery completed"); 1320 ubifs_msg("recovery completed");
1321 /* GC LEB has to be empty and taken at this point */
1322 ubifs_assert(c->lst.taken_empty_lebs == 1);
1313 } 1323 }
1314 } 1324 } else
1325 ubifs_assert(c->lst.taken_empty_lebs == 1);
1315 1326
1316 err = dbg_debugfs_init_fs(c); 1327 err = dbg_check_filesystem(c);
1317 if (err) 1328 if (err)
1318 goto out_infos; 1329 goto out_infos;
1319 1330
1320 err = dbg_check_filesystem(c); 1331 err = dbg_debugfs_init_fs(c);
1321 if (err) 1332 if (err)
1322 goto out_infos; 1333 goto out_infos;
1323 1334
@@ -1351,7 +1362,6 @@ static int mount_ubifs(struct ubifs_info *c)
1351 c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], 1362 c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7],
1352 c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], 1363 c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11],
1353 c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); 1364 c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]);
1354 dbg_msg("fast unmount: %d", c->fast_unmount);
1355 dbg_msg("big_lpt %d", c->big_lpt); 1365 dbg_msg("big_lpt %d", c->big_lpt);
1356 dbg_msg("log LEBs: %d (%d - %d)", 1366 dbg_msg("log LEBs: %d (%d - %d)",
1357 c->log_lebs, UBIFS_LOG_LNUM, c->log_last); 1367 c->log_lebs, UBIFS_LOG_LNUM, c->log_last);
@@ -1475,10 +1485,8 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1475{ 1485{
1476 int err, lnum; 1486 int err, lnum;
1477 1487
1478 if (c->ro_media)
1479 return -EINVAL;
1480
1481 mutex_lock(&c->umount_mutex); 1488 mutex_lock(&c->umount_mutex);
1489 dbg_save_space_info(c);
1482 c->remounting_rw = 1; 1490 c->remounting_rw = 1;
1483 c->always_chk_crc = 1; 1491 c->always_chk_crc = 1;
1484 1492
@@ -1514,6 +1522,12 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1514 err = ubifs_recover_inl_heads(c, c->sbuf); 1522 err = ubifs_recover_inl_heads(c, c->sbuf);
1515 if (err) 1523 if (err)
1516 goto out; 1524 goto out;
1525 } else {
1526 /* A readonly mount is not allowed to have orphans */
1527 ubifs_assert(c->tot_orphans == 0);
1528 err = ubifs_clear_orphans(c);
1529 if (err)
1530 goto out;
1517 } 1531 }
1518 1532
1519 if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) { 1533 if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) {
@@ -1569,7 +1583,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1569 if (c->need_recovery) 1583 if (c->need_recovery)
1570 err = ubifs_rcvry_gc_commit(c); 1584 err = ubifs_rcvry_gc_commit(c);
1571 else 1585 else
1572 err = take_gc_lnum(c); 1586 err = ubifs_leb_unmap(c, c->gc_lnum);
1573 if (err) 1587 if (err)
1574 goto out; 1588 goto out;
1575 1589
@@ -1582,8 +1596,9 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1582 c->vfs_sb->s_flags &= ~MS_RDONLY; 1596 c->vfs_sb->s_flags &= ~MS_RDONLY;
1583 c->remounting_rw = 0; 1597 c->remounting_rw = 0;
1584 c->always_chk_crc = 0; 1598 c->always_chk_crc = 0;
1599 err = dbg_check_space_info(c);
1585 mutex_unlock(&c->umount_mutex); 1600 mutex_unlock(&c->umount_mutex);
1586 return 0; 1601 return err;
1587 1602
1588out: 1603out:
1589 vfree(c->orph_buf); 1604 vfree(c->orph_buf);
@@ -1603,43 +1618,18 @@ out:
1603} 1618}
1604 1619
1605/** 1620/**
1606 * commit_on_unmount - commit the journal when un-mounting.
1607 * @c: UBIFS file-system description object
1608 *
1609 * This function is called during un-mounting and re-mounting, and it commits
1610 * the journal unless the "fast unmount" mode is enabled.
1611 */
1612static void commit_on_unmount(struct ubifs_info *c)
1613{
1614 struct super_block *sb = c->vfs_sb;
1615 long long bud_bytes;
1616
1617 /*
1618 * This function is called before the background thread is stopped, so
1619 * we may race with ongoing commit, which means we have to take
1620 * @c->bud_lock to access @c->bud_bytes.
1621 */
1622 spin_lock(&c->buds_lock);
1623 bud_bytes = c->bud_bytes;
1624 spin_unlock(&c->buds_lock);
1625
1626 if (!c->fast_unmount && !(sb->s_flags & MS_RDONLY) && bud_bytes)
1627 ubifs_run_commit(c);
1628}
1629
1630/**
1631 * ubifs_remount_ro - re-mount in read-only mode. 1621 * ubifs_remount_ro - re-mount in read-only mode.
1632 * @c: UBIFS file-system description object 1622 * @c: UBIFS file-system description object
1633 * 1623 *
1634 * We rely on VFS to have stopped writing. Possibly the background thread could 1624 * We assume VFS has stopped writing. Possibly the background thread could be
1635 * be running a commit, however kthread_stop will wait in that case. 1625 * running a commit, however kthread_stop will wait in that case.
1636 */ 1626 */
1637static void ubifs_remount_ro(struct ubifs_info *c) 1627static void ubifs_remount_ro(struct ubifs_info *c)
1638{ 1628{
1639 int i, err; 1629 int i, err;
1640 1630
1641 ubifs_assert(!c->need_recovery); 1631 ubifs_assert(!c->need_recovery);
1642 commit_on_unmount(c); 1632 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
1643 1633
1644 mutex_lock(&c->umount_mutex); 1634 mutex_lock(&c->umount_mutex);
1645 if (c->bgt) { 1635 if (c->bgt) {
@@ -1647,27 +1637,29 @@ static void ubifs_remount_ro(struct ubifs_info *c)
1647 c->bgt = NULL; 1637 c->bgt = NULL;
1648 } 1638 }
1649 1639
1640 dbg_save_space_info(c);
1641
1650 for (i = 0; i < c->jhead_cnt; i++) { 1642 for (i = 0; i < c->jhead_cnt; i++) {
1651 ubifs_wbuf_sync(&c->jheads[i].wbuf); 1643 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1652 del_timer_sync(&c->jheads[i].wbuf.timer); 1644 del_timer_sync(&c->jheads[i].wbuf.timer);
1653 } 1645 }
1654 1646
1655 if (!c->ro_media) { 1647 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
1656 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); 1648 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
1657 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); 1649 c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
1658 c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); 1650 err = ubifs_write_master(c);
1659 err = ubifs_write_master(c); 1651 if (err)
1660 if (err) 1652 ubifs_ro_mode(c, err);
1661 ubifs_ro_mode(c, err);
1662 }
1663 1653
1664 ubifs_destroy_idx_gc(c);
1665 free_wbufs(c); 1654 free_wbufs(c);
1666 vfree(c->orph_buf); 1655 vfree(c->orph_buf);
1667 c->orph_buf = NULL; 1656 c->orph_buf = NULL;
1668 vfree(c->ileb_buf); 1657 vfree(c->ileb_buf);
1669 c->ileb_buf = NULL; 1658 c->ileb_buf = NULL;
1670 ubifs_lpt_free(c, 1); 1659 ubifs_lpt_free(c, 1);
1660 err = dbg_check_space_info(c);
1661 if (err)
1662 ubifs_ro_mode(c, err);
1671 mutex_unlock(&c->umount_mutex); 1663 mutex_unlock(&c->umount_mutex);
1672} 1664}
1673 1665
@@ -1760,11 +1752,20 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1760 } 1752 }
1761 1753
1762 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { 1754 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
1755 if (c->ro_media) {
1756 ubifs_msg("cannot re-mount due to prior errors");
1757 return -EROFS;
1758 }
1763 err = ubifs_remount_rw(c); 1759 err = ubifs_remount_rw(c);
1764 if (err) 1760 if (err)
1765 return err; 1761 return err;
1766 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) 1762 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
1763 if (c->ro_media) {
1764 ubifs_msg("cannot re-mount due to prior errors");
1765 return -EROFS;
1766 }
1767 ubifs_remount_ro(c); 1767 ubifs_remount_ro(c);
1768 }
1768 1769
1769 if (c->bulk_read == 1) 1770 if (c->bulk_read == 1)
1770 bu_init(c); 1771 bu_init(c);
@@ -1774,10 +1775,11 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1774 c->bu.buf = NULL; 1775 c->bu.buf = NULL;
1775 } 1776 }
1776 1777
1778 ubifs_assert(c->lst.taken_empty_lebs == 1);
1777 return 0; 1779 return 0;
1778} 1780}
1779 1781
1780struct super_operations ubifs_super_operations = { 1782const struct super_operations ubifs_super_operations = {
1781 .alloc_inode = ubifs_alloc_inode, 1783 .alloc_inode = ubifs_alloc_inode,
1782 .destroy_inode = ubifs_destroy_inode, 1784 .destroy_inode = ubifs_destroy_inode,
1783 .put_super = ubifs_put_super, 1785 .put_super = ubifs_put_super,
@@ -2044,15 +2046,6 @@ out_close:
2044 2046
2045static void ubifs_kill_sb(struct super_block *sb) 2047static void ubifs_kill_sb(struct super_block *sb)
2046{ 2048{
2047 struct ubifs_info *c = sb->s_fs_info;
2048
2049 /*
2050 * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()'
2051 * in order to be outside BKL.
2052 */
2053 if (sb->s_root)
2054 commit_on_unmount(c);
2055 /* The un-mount routine is actually done in put_super() */
2056 generic_shutdown_super(sb); 2049 generic_shutdown_super(sb);
2057} 2050}
2058 2051
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index f7e36f545527..fa28a84c6a1b 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -443,6 +443,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
443 * This function performs that same function as ubifs_read_node except that 443 * This function performs that same function as ubifs_read_node except that
444 * it does not require that there is actually a node present and instead 444 * it does not require that there is actually a node present and instead
445 * the return code indicates if a node was read. 445 * the return code indicates if a node was read.
446 *
447 * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc
448 * is true (it is controlled by corresponding mount option). However, if
449 * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always
450 * checked.
446 */ 451 */
447static int try_read_node(const struct ubifs_info *c, void *buf, int type, 452static int try_read_node(const struct ubifs_info *c, void *buf, int type,
448 int len, int lnum, int offs) 453 int len, int lnum, int offs)
@@ -470,9 +475,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
470 if (node_len != len) 475 if (node_len != len)
471 return 0; 476 return 0;
472 477
473 if (type == UBIFS_DATA_NODE && !c->always_chk_crc) 478 if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc)
474 if (c->no_chk_data_crc) 479 return 1;
475 return 0;
476 480
477 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 481 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
478 node_crc = le32_to_cpu(ch->crc); 482 node_crc = le32_to_cpu(ch->crc);
@@ -1506,7 +1510,7 @@ out:
1506 * 1510 *
1507 * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function 1511 * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function
1508 * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares 1512 * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares
1509 * maxumum possible amount of nodes for bulk-read. 1513 * maximum possible amount of nodes for bulk-read.
1510 */ 1514 */
1511int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) 1515int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
1512{ 1516{
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index fc2a4cc66d03..039a68bee29a 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -426,9 +426,9 @@ struct ubifs_unclean_leb {
426 * LEB properties flags. 426 * LEB properties flags.
427 * 427 *
428 * LPROPS_UNCAT: not categorized 428 * LPROPS_UNCAT: not categorized
429 * LPROPS_DIRTY: dirty > 0, not index 429 * LPROPS_DIRTY: dirty > free, dirty >= @c->dead_wm, not index
430 * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index 430 * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index
431 * LPROPS_FREE: free > 0, not empty, not index 431 * LPROPS_FREE: free > 0, dirty < @c->dead_wm, not empty, not index
432 * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs 432 * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs
433 * LPROPS_EMPTY: LEB is empty, not taken 433 * LPROPS_EMPTY: LEB is empty, not taken
434 * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken 434 * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken
@@ -961,7 +961,6 @@ struct ubifs_debug_info;
961 * @cs_lock: commit state lock 961 * @cs_lock: commit state lock
962 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running 962 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
963 * 963 *
964 * @fast_unmount: do not run journal commit before un-mounting
965 * @big_lpt: flag that LPT is too big to write whole during commit 964 * @big_lpt: flag that LPT is too big to write whole during commit
966 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during 965 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
967 * recovery) 966 * recovery)
@@ -1202,7 +1201,6 @@ struct ubifs_info {
1202 spinlock_t cs_lock; 1201 spinlock_t cs_lock;
1203 wait_queue_head_t cmt_wq; 1202 wait_queue_head_t cmt_wq;
1204 1203
1205 unsigned int fast_unmount:1;
1206 unsigned int big_lpt:1; 1204 unsigned int big_lpt:1;
1207 unsigned int no_chk_data_crc:1; 1205 unsigned int no_chk_data_crc:1;
1208 unsigned int bulk_read:1; 1206 unsigned int bulk_read:1;
@@ -1405,13 +1403,13 @@ extern struct list_head ubifs_infos;
1405extern spinlock_t ubifs_infos_lock; 1403extern spinlock_t ubifs_infos_lock;
1406extern atomic_long_t ubifs_clean_zn_cnt; 1404extern atomic_long_t ubifs_clean_zn_cnt;
1407extern struct kmem_cache *ubifs_inode_slab; 1405extern struct kmem_cache *ubifs_inode_slab;
1408extern struct super_operations ubifs_super_operations; 1406extern const struct super_operations ubifs_super_operations;
1409extern struct address_space_operations ubifs_file_address_operations; 1407extern const struct address_space_operations ubifs_file_address_operations;
1410extern struct file_operations ubifs_file_operations; 1408extern const struct file_operations ubifs_file_operations;
1411extern struct inode_operations ubifs_file_inode_operations; 1409extern const struct inode_operations ubifs_file_inode_operations;
1412extern struct file_operations ubifs_dir_operations; 1410extern const struct file_operations ubifs_dir_operations;
1413extern struct inode_operations ubifs_dir_inode_operations; 1411extern const struct inode_operations ubifs_dir_inode_operations;
1414extern struct inode_operations ubifs_symlink_inode_operations; 1412extern const struct inode_operations ubifs_symlink_inode_operations;
1415extern struct backing_dev_info ubifs_backing_dev_info; 1413extern struct backing_dev_info ubifs_backing_dev_info;
1416extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; 1414extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
1417 1415
@@ -1428,7 +1426,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
1428int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, 1426int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
1429 int offs, int dtype); 1427 int offs, int dtype);
1430int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, 1428int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
1431 int offs, int quiet, int chk_crc); 1429 int offs, int quiet, int must_chk_crc);
1432void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); 1430void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
1433void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); 1431void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
1434int ubifs_io_init(struct ubifs_info *c); 1432int ubifs_io_init(struct ubifs_info *c);
@@ -1495,6 +1493,7 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
1495void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, 1493void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
1496 struct ubifs_budget_req *req); 1494 struct ubifs_budget_req *req);
1497long long ubifs_get_free_space(struct ubifs_info *c); 1495long long ubifs_get_free_space(struct ubifs_info *c);
1496long long ubifs_get_free_space_nolock(struct ubifs_info *c);
1498int ubifs_calc_min_idx_lebs(struct ubifs_info *c); 1497int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
1499void ubifs_convert_page_budget(struct ubifs_info *c); 1498void ubifs_convert_page_budget(struct ubifs_info *c);
1500long long ubifs_reported_space(const struct ubifs_info *c, long long free); 1499long long ubifs_reported_space(const struct ubifs_info *c, long long free);
@@ -1603,6 +1602,7 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum);
1603int ubifs_orphan_start_commit(struct ubifs_info *c); 1602int ubifs_orphan_start_commit(struct ubifs_info *c);
1604int ubifs_orphan_end_commit(struct ubifs_info *c); 1603int ubifs_orphan_end_commit(struct ubifs_info *c);
1605int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only); 1604int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only);
1605int ubifs_clear_orphans(struct ubifs_info *c);
1606 1606
1607/* lpt.c */ 1607/* lpt.c */
1608int ubifs_calc_lpt_geom(struct ubifs_info *c); 1608int ubifs_calc_lpt_geom(struct ubifs_info *c);
@@ -1646,7 +1646,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
1646 const struct ubifs_lprops *lp, 1646 const struct ubifs_lprops *lp,
1647 int free, int dirty, int flags, 1647 int free, int dirty, int flags,
1648 int idx_gc_cnt); 1648 int idx_gc_cnt);
1649void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); 1649void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst);
1650void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, 1650void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
1651 int cat); 1651 int cat);
1652void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, 1652void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index d71dc44e21ed..cb329edc925b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -166,6 +166,75 @@ test_page_region(
166} 166}
167 167
168/* 168/*
169 * Mapping of multi-page buffers into contiguous virtual space
170 */
171
172typedef struct a_list {
173 void *vm_addr;
174 struct a_list *next;
175} a_list_t;
176
177static a_list_t *as_free_head;
178static int as_list_len;
179static DEFINE_SPINLOCK(as_lock);
180
181/*
182 * Try to batch vunmaps because they are costly.
183 */
184STATIC void
185free_address(
186 void *addr)
187{
188 a_list_t *aentry;
189
190#ifdef CONFIG_XEN
191 /*
192 * Xen needs to be able to make sure it can get an exclusive
193 * RO mapping of pages it wants to turn into a pagetable. If
194 * a newly allocated page is also still being vmap()ed by xfs,
195 * it will cause pagetable construction to fail. This is a
196 * quick workaround to always eagerly unmap pages so that Xen
197 * is happy.
198 */
199 vunmap(addr);
200 return;
201#endif
202
203 aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
204 if (likely(aentry)) {
205 spin_lock(&as_lock);
206 aentry->next = as_free_head;
207 aentry->vm_addr = addr;
208 as_free_head = aentry;
209 as_list_len++;
210 spin_unlock(&as_lock);
211 } else {
212 vunmap(addr);
213 }
214}
215
216STATIC void
217purge_addresses(void)
218{
219 a_list_t *aentry, *old;
220
221 if (as_free_head == NULL)
222 return;
223
224 spin_lock(&as_lock);
225 aentry = as_free_head;
226 as_free_head = NULL;
227 as_list_len = 0;
228 spin_unlock(&as_lock);
229
230 while ((old = aentry) != NULL) {
231 vunmap(aentry->vm_addr);
232 aentry = aentry->next;
233 kfree(old);
234 }
235}
236
237/*
169 * Internal xfs_buf_t object manipulation 238 * Internal xfs_buf_t object manipulation
170 */ 239 */
171 240
@@ -264,7 +333,7 @@ xfs_buf_free(
264 uint i; 333 uint i;
265 334
266 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) 335 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
267 vm_unmap_ram(bp->b_addr - bp->b_offset, bp->b_page_count); 336 free_address(bp->b_addr - bp->b_offset);
268 337
269 for (i = 0; i < bp->b_page_count; i++) { 338 for (i = 0; i < bp->b_page_count; i++) {
270 struct page *page = bp->b_pages[i]; 339 struct page *page = bp->b_pages[i];
@@ -386,8 +455,10 @@ _xfs_buf_map_pages(
386 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; 455 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
387 bp->b_flags |= XBF_MAPPED; 456 bp->b_flags |= XBF_MAPPED;
388 } else if (flags & XBF_MAPPED) { 457 } else if (flags & XBF_MAPPED) {
389 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, 458 if (as_list_len > 64)
390 -1, PAGE_KERNEL); 459 purge_addresses();
460 bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
461 VM_MAP, PAGE_KERNEL);
391 if (unlikely(bp->b_addr == NULL)) 462 if (unlikely(bp->b_addr == NULL))
392 return -ENOMEM; 463 return -ENOMEM;
393 bp->b_addr += bp->b_offset; 464 bp->b_addr += bp->b_offset;
@@ -1672,6 +1743,8 @@ xfsbufd(
1672 count++; 1743 count++;
1673 } 1744 }
1674 1745
1746 if (as_list_len > 0)
1747 purge_addresses();
1675 if (count) 1748 if (count)
1676 blk_run_address_space(target->bt_mapping); 1749 blk_run_address_space(target->bt_mapping);
1677 1750
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 2ed035354c26..a608e72fa405 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -371,7 +371,11 @@ xfs_quiesce_attr(
371 /* flush inodes and push all remaining buffers out to disk */ 371 /* flush inodes and push all remaining buffers out to disk */
372 xfs_quiesce_fs(mp); 372 xfs_quiesce_fs(mp);
373 373
374 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); 374 /*
375 * Just warn here till VFS can correctly support
376 * read-only remount without racing.
377 */
378 WARN_ON(atomic_read(&mp->m_active_trans) != 0);
375 379
376 /* Push the superblock and write an unmount record */ 380 /* Push the superblock and write an unmount record */
377 error = xfs_log_sbcount(mp, 1); 381 error = xfs_log_sbcount(mp, 1);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index b4c1ee713492..f8278cfcc1d3 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -55,17 +55,11 @@ xfs_swapext(
55 struct file *file, *target_file; 55 struct file *file, *target_file;
56 int error = 0; 56 int error = 0;
57 57
58 sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL);
59 if (!sxp) {
60 error = XFS_ERROR(ENOMEM);
61 goto out;
62 }
63
64 /* Pull information for the target fd */ 58 /* Pull information for the target fd */
65 file = fget((int)sxp->sx_fdtarget); 59 file = fget((int)sxp->sx_fdtarget);
66 if (!file) { 60 if (!file) {
67 error = XFS_ERROR(EINVAL); 61 error = XFS_ERROR(EINVAL);
68 goto out_free_sxp; 62 goto out;
69 } 63 }
70 64
71 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) { 65 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) {
@@ -109,8 +103,6 @@ xfs_swapext(
109 fput(target_file); 103 fput(target_file);
110 out_put_file: 104 out_put_file:
111 fput(file); 105 fput(file);
112 out_free_sxp:
113 kmem_free(sxp);
114 out: 106 out:
115 return error; 107 return error;
116} 108}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 35cca98bd94c..b1047de2fffd 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -70,16 +70,21 @@ STATIC void xlog_recover_check_summary(xlog_t *);
70xfs_buf_t * 70xfs_buf_t *
71xlog_get_bp( 71xlog_get_bp(
72 xlog_t *log, 72 xlog_t *log,
73 int num_bblks) 73 int nbblks)
74{ 74{
75 ASSERT(num_bblks > 0); 75 if (nbblks <= 0 || nbblks > log->l_logBBsize) {
76 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks);
77 XFS_ERROR_REPORT("xlog_get_bp(1)",
78 XFS_ERRLEVEL_HIGH, log->l_mp);
79 return NULL;
80 }
76 81
77 if (log->l_sectbb_log) { 82 if (log->l_sectbb_log) {
78 if (num_bblks > 1) 83 if (nbblks > 1)
79 num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); 84 nbblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
80 num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks); 85 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
81 } 86 }
82 return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp); 87 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
83} 88}
84 89
85void 90void
@@ -102,6 +107,13 @@ xlog_bread(
102{ 107{
103 int error; 108 int error;
104 109
110 if (nbblks <= 0 || nbblks > log->l_logBBsize) {
111 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks);
112 XFS_ERROR_REPORT("xlog_bread(1)",
113 XFS_ERRLEVEL_HIGH, log->l_mp);
114 return EFSCORRUPTED;
115 }
116
105 if (log->l_sectbb_log) { 117 if (log->l_sectbb_log) {
106 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); 118 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
107 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); 119 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
@@ -139,6 +151,13 @@ xlog_bwrite(
139{ 151{
140 int error; 152 int error;
141 153
154 if (nbblks <= 0 || nbblks > log->l_logBBsize) {
155 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks);
156 XFS_ERROR_REPORT("xlog_bwrite(1)",
157 XFS_ERRLEVEL_HIGH, log->l_mp);
158 return EFSCORRUPTED;
159 }
160
142 if (log->l_sectbb_log) { 161 if (log->l_sectbb_log) {
143 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); 162 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
144 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); 163 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);