diff options
Diffstat (limited to 'fs')
118 files changed, 4835 insertions, 4574 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c95295c65045..e83aa5ebe861 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -626,8 +626,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
626 | return NULL; | 626 | return NULL; |
627 | 627 | ||
628 | error: | 628 | error: |
629 | if (fid) | 629 | p9_client_clunk(fid); |
630 | p9_client_clunk(fid); | ||
631 | 630 | ||
632 | return ERR_PTR(result); | 631 | return ERR_PTR(result); |
633 | } | 632 | } |
diff --git a/fs/Kconfig b/fs/Kconfig index abccb5dab9a8..f54a157a0296 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -136,37 +136,51 @@ config EXT3_FS_SECURITY | |||
136 | If you are not using a security module that requires using | 136 | If you are not using a security module that requires using |
137 | extended attributes for file security labels, say N. | 137 | extended attributes for file security labels, say N. |
138 | 138 | ||
139 | config EXT4DEV_FS | 139 | config EXT4_FS |
140 | tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" | 140 | tristate "The Extended 4 (ext4) filesystem" |
141 | depends on EXPERIMENTAL | ||
142 | select JBD2 | 141 | select JBD2 |
143 | select CRC16 | 142 | select CRC16 |
144 | help | 143 | help |
145 | Ext4dev is a predecessor filesystem of the next generation | 144 | This is the next generation of the ext3 filesystem. |
146 | extended fs ext4, based on ext3 filesystem code. It will be | ||
147 | renamed ext4 fs later, once ext4dev is mature and stabilized. | ||
148 | 145 | ||
149 | Unlike the change from ext2 filesystem to ext3 filesystem, | 146 | Unlike the change from ext2 filesystem to ext3 filesystem, |
150 | the on-disk format of ext4dev is not the same as ext3 any more: | 147 | the on-disk format of ext4 is not forwards compatible with |
151 | it is based on extent maps and it supports 48-bit physical block | 148 | ext3; it is based on extent maps and it supports 48-bit |
152 | numbers. These combined on-disk format changes will allow | 149 | physical block numbers. The ext4 filesystem also supports delayed |
153 | ext4dev/ext4 to handle more than 16 TB filesystem volumes -- | 150 | allocation, persistent preallocation, high resolution time stamps, |
154 | a hard limit that ext3 cannot overcome without changing the | 151 | and a number of other features to improve performance and speed |
155 | on-disk format. | 152 | up fsck time. For more information, please see the web pages at |
156 | 153 | http://ext4.wiki.kernel.org. | |
157 | Other than extent maps and 48-bit block numbers, ext4dev also is | 154 | |
158 | likely to have other new features such as persistent preallocation, | 155 | The ext4 filesystem will support mounting an ext3 |
159 | high resolution time stamps, and larger file support etc. These | 156 | filesystem; while there will be some performance gains from |
160 | features will be added to ext4dev gradually. | 157 | the delayed allocation and inode table readahead, the best |
158 | performance gains will require enabling ext4 features in the | ||
159 | filesystem, or formating a new filesystem as an ext4 | ||
160 | filesystem initially. | ||
161 | 161 | ||
162 | To compile this file system support as a module, choose M here. The | 162 | To compile this file system support as a module, choose M here. The |
163 | module will be called ext4dev. | 163 | module will be called ext4dev. |
164 | 164 | ||
165 | If unsure, say N. | 165 | If unsure, say N. |
166 | 166 | ||
167 | config EXT4DEV_FS_XATTR | 167 | config EXT4DEV_COMPAT |
168 | bool "Ext4dev extended attributes" | 168 | bool "Enable ext4dev compatibility" |
169 | depends on EXT4DEV_FS | 169 | depends on EXT4_FS |
170 | help | ||
171 | Starting with 2.6.28, the name of the ext4 filesystem was | ||
172 | renamed from ext4dev to ext4. Unfortunately there are some | ||
173 | legacy userspace programs (such as klibc's fstype) have | ||
174 | "ext4dev" hardcoded. | ||
175 | |||
176 | To enable backwards compatibility so that systems that are | ||
177 | still expecting to mount ext4 filesystems using ext4dev, | ||
178 | chose Y here. This feature will go away by 2.6.31, so | ||
179 | please arrange to get your userspace programs fixed! | ||
180 | |||
181 | config EXT4_FS_XATTR | ||
182 | bool "Ext4 extended attributes" | ||
183 | depends on EXT4_FS | ||
170 | default y | 184 | default y |
171 | help | 185 | help |
172 | Extended attributes are name:value pairs associated with inodes by | 186 | Extended attributes are name:value pairs associated with inodes by |
@@ -175,11 +189,11 @@ config EXT4DEV_FS_XATTR | |||
175 | 189 | ||
176 | If unsure, say N. | 190 | If unsure, say N. |
177 | 191 | ||
178 | You need this for POSIX ACL support on ext4dev/ext4. | 192 | You need this for POSIX ACL support on ext4. |
179 | 193 | ||
180 | config EXT4DEV_FS_POSIX_ACL | 194 | config EXT4_FS_POSIX_ACL |
181 | bool "Ext4dev POSIX Access Control Lists" | 195 | bool "Ext4 POSIX Access Control Lists" |
182 | depends on EXT4DEV_FS_XATTR | 196 | depends on EXT4_FS_XATTR |
183 | select FS_POSIX_ACL | 197 | select FS_POSIX_ACL |
184 | help | 198 | help |
185 | POSIX Access Control Lists (ACLs) support permissions for users and | 199 | POSIX Access Control Lists (ACLs) support permissions for users and |
@@ -190,14 +204,14 @@ config EXT4DEV_FS_POSIX_ACL | |||
190 | 204 | ||
191 | If you don't know what Access Control Lists are, say N | 205 | If you don't know what Access Control Lists are, say N |
192 | 206 | ||
193 | config EXT4DEV_FS_SECURITY | 207 | config EXT4_FS_SECURITY |
194 | bool "Ext4dev Security Labels" | 208 | bool "Ext4 Security Labels" |
195 | depends on EXT4DEV_FS_XATTR | 209 | depends on EXT4_FS_XATTR |
196 | help | 210 | help |
197 | Security labels support alternative access control models | 211 | Security labels support alternative access control models |
198 | implemented by security modules like SELinux. This option | 212 | implemented by security modules like SELinux. This option |
199 | enables an extended attribute handler for file security | 213 | enables an extended attribute handler for file security |
200 | labels in the ext4dev/ext4 filesystem. | 214 | labels in the ext4 filesystem. |
201 | 215 | ||
202 | If you are not using a security module that requires using | 216 | If you are not using a security module that requires using |
203 | extended attributes for file security labels, say N. | 217 | extended attributes for file security labels, say N. |
@@ -240,22 +254,22 @@ config JBD2 | |||
240 | help | 254 | help |
241 | This is a generic journaling layer for block devices that support | 255 | This is a generic journaling layer for block devices that support |
242 | both 32-bit and 64-bit block numbers. It is currently used by | 256 | both 32-bit and 64-bit block numbers. It is currently used by |
243 | the ext4dev/ext4 filesystem, but it could also be used to add | 257 | the ext4 filesystem, but it could also be used to add |
244 | journal support to other file systems or block devices such | 258 | journal support to other file systems or block devices such |
245 | as RAID or LVM. | 259 | as RAID or LVM. |
246 | 260 | ||
247 | If you are using ext4dev/ext4, you need to say Y here. If you are not | 261 | If you are using ext4, you need to say Y here. If you are not |
248 | using ext4dev/ext4 then you will probably want to say N. | 262 | using ext4 then you will probably want to say N. |
249 | 263 | ||
250 | To compile this device as a module, choose M here. The module will be | 264 | To compile this device as a module, choose M here. The module will be |
251 | called jbd2. If you are compiling ext4dev/ext4 into the kernel, | 265 | called jbd2. If you are compiling ext4 into the kernel, |
252 | you cannot compile this code as a module. | 266 | you cannot compile this code as a module. |
253 | 267 | ||
254 | config JBD2_DEBUG | 268 | config JBD2_DEBUG |
255 | bool "JBD2 (ext4dev/ext4) debugging support" | 269 | bool "JBD2 (ext4) debugging support" |
256 | depends on JBD2 && DEBUG_FS | 270 | depends on JBD2 && DEBUG_FS |
257 | help | 271 | help |
258 | If you are using the ext4dev/ext4 journaled file system (or | 272 | If you are using the ext4 journaled file system (or |
259 | potentially any other filesystem/device using JBD2), this option | 273 | potentially any other filesystem/device using JBD2), this option |
260 | allows you to enable debugging output while the system is running, | 274 | allows you to enable debugging output while the system is running, |
261 | in order to help track down any problems you are having. | 275 | in order to help track down any problems you are having. |
@@ -270,9 +284,9 @@ config JBD2_DEBUG | |||
270 | config FS_MBCACHE | 284 | config FS_MBCACHE |
271 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) | 285 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) |
272 | tristate | 286 | tristate |
273 | depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR | 287 | depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR |
274 | default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y | 288 | default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y |
275 | default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m | 289 | default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m |
276 | 290 | ||
277 | config REISERFS_FS | 291 | config REISERFS_FS |
278 | tristate "Reiserfs support" | 292 | tristate "Reiserfs support" |
diff --git a/fs/Makefile b/fs/Makefile index a1482a5eff15..de404b00eb0c 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -69,7 +69,7 @@ obj-$(CONFIG_DLM) += dlm/ | |||
69 | # Do not add any filesystems before this line | 69 | # Do not add any filesystems before this line |
70 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ | 70 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ |
71 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 | 71 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 |
72 | obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev | 72 | obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev |
73 | obj-$(CONFIG_JBD) += jbd/ | 73 | obj-$(CONFIG_JBD) += jbd/ |
74 | obj-$(CONFIG_JBD2) += jbd2/ | 74 | obj-$(CONFIG_JBD2) += jbd2/ |
75 | obj-$(CONFIG_EXT2_FS) += ext2/ | 75 | obj-$(CONFIG_EXT2_FS) += ext2/ |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 87ee5ccee348..ed8feb052df9 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -125,8 +125,8 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
125 | inode->i_ino); | 125 | inode->i_ino); |
126 | if (err) { | 126 | if (err) { |
127 | inode_dec_link_count(inode); | 127 | inode_dec_link_count(inode); |
128 | iput(inode); | ||
129 | mutex_unlock(&info->bfs_lock); | 128 | mutex_unlock(&info->bfs_lock); |
129 | iput(inode); | ||
130 | return err; | 130 | return err; |
131 | } | 131 | } |
132 | mutex_unlock(&info->bfs_lock); | 132 | mutex_unlock(&info->bfs_lock); |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index c3e174b35fe6..19caf7c962ac 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
@@ -107,7 +107,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs) | |||
107 | BUG_ON(bip == NULL); | 107 | BUG_ON(bip == NULL); |
108 | 108 | ||
109 | /* A cloned bio doesn't own the integrity metadata */ | 109 | /* A cloned bio doesn't own the integrity metadata */ |
110 | if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL) | 110 | if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) |
111 | && bip->bip_buf != NULL) | ||
111 | kfree(bip->bip_buf); | 112 | kfree(bip->bip_buf); |
112 | 113 | ||
113 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); | 114 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); |
@@ -150,6 +151,24 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, | |||
150 | } | 151 | } |
151 | EXPORT_SYMBOL(bio_integrity_add_page); | 152 | EXPORT_SYMBOL(bio_integrity_add_page); |
152 | 153 | ||
154 | static int bdev_integrity_enabled(struct block_device *bdev, int rw) | ||
155 | { | ||
156 | struct blk_integrity *bi = bdev_get_integrity(bdev); | ||
157 | |||
158 | if (bi == NULL) | ||
159 | return 0; | ||
160 | |||
161 | if (rw == READ && bi->verify_fn != NULL && | ||
162 | (bi->flags & INTEGRITY_FLAG_READ)) | ||
163 | return 1; | ||
164 | |||
165 | if (rw == WRITE && bi->generate_fn != NULL && | ||
166 | (bi->flags & INTEGRITY_FLAG_WRITE)) | ||
167 | return 1; | ||
168 | |||
169 | return 0; | ||
170 | } | ||
171 | |||
153 | /** | 172 | /** |
154 | * bio_integrity_enabled - Check whether integrity can be passed | 173 | * bio_integrity_enabled - Check whether integrity can be passed |
155 | * @bio: bio to check | 174 | * @bio: bio to check |
@@ -313,6 +332,14 @@ static void bio_integrity_generate(struct bio *bio) | |||
313 | } | 332 | } |
314 | } | 333 | } |
315 | 334 | ||
335 | static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) | ||
336 | { | ||
337 | if (bi) | ||
338 | return bi->tuple_size; | ||
339 | |||
340 | return 0; | ||
341 | } | ||
342 | |||
316 | /** | 343 | /** |
317 | * bio_integrity_prep - Prepare bio for integrity I/O | 344 | * bio_integrity_prep - Prepare bio for integrity I/O |
318 | * @bio: bio to prepare | 345 | * @bio: bio to prepare |
@@ -30,7 +30,7 @@ | |||
30 | 30 | ||
31 | static struct kmem_cache *bio_slab __read_mostly; | 31 | static struct kmem_cache *bio_slab __read_mostly; |
32 | 32 | ||
33 | mempool_t *bio_split_pool __read_mostly; | 33 | static mempool_t *bio_split_pool __read_mostly; |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * if you change this list, also change bvec_alloc or things will | 36 | * if you change this list, also change bvec_alloc or things will |
@@ -60,25 +60,46 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct | |||
60 | struct bio_vec *bvl; | 60 | struct bio_vec *bvl; |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * see comment near bvec_array define! | 63 | * If 'bs' is given, lookup the pool and do the mempool alloc. |
64 | * If not, this is a bio_kmalloc() allocation and just do a | ||
65 | * kzalloc() for the exact number of vecs right away. | ||
64 | */ | 66 | */ |
65 | switch (nr) { | 67 | if (bs) { |
66 | case 1 : *idx = 0; break; | 68 | /* |
67 | case 2 ... 4: *idx = 1; break; | 69 | * see comment near bvec_array define! |
68 | case 5 ... 16: *idx = 2; break; | 70 | */ |
69 | case 17 ... 64: *idx = 3; break; | 71 | switch (nr) { |
70 | case 65 ... 128: *idx = 4; break; | 72 | case 1: |
71 | case 129 ... BIO_MAX_PAGES: *idx = 5; break; | 73 | *idx = 0; |
74 | break; | ||
75 | case 2 ... 4: | ||
76 | *idx = 1; | ||
77 | break; | ||
78 | case 5 ... 16: | ||
79 | *idx = 2; | ||
80 | break; | ||
81 | case 17 ... 64: | ||
82 | *idx = 3; | ||
83 | break; | ||
84 | case 65 ... 128: | ||
85 | *idx = 4; | ||
86 | break; | ||
87 | case 129 ... BIO_MAX_PAGES: | ||
88 | *idx = 5; | ||
89 | break; | ||
72 | default: | 90 | default: |
73 | return NULL; | 91 | return NULL; |
74 | } | 92 | } |
75 | /* | ||
76 | * idx now points to the pool we want to allocate from | ||
77 | */ | ||
78 | 93 | ||
79 | bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); | 94 | /* |
80 | if (bvl) | 95 | * idx now points to the pool we want to allocate from |
81 | memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); | 96 | */ |
97 | bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); | ||
98 | if (bvl) | ||
99 | memset(bvl, 0, | ||
100 | bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); | ||
101 | } else | ||
102 | bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); | ||
82 | 103 | ||
83 | return bvl; | 104 | return bvl; |
84 | } | 105 | } |
@@ -107,10 +128,17 @@ static void bio_fs_destructor(struct bio *bio) | |||
107 | bio_free(bio, fs_bio_set); | 128 | bio_free(bio, fs_bio_set); |
108 | } | 129 | } |
109 | 130 | ||
131 | static void bio_kmalloc_destructor(struct bio *bio) | ||
132 | { | ||
133 | kfree(bio->bi_io_vec); | ||
134 | kfree(bio); | ||
135 | } | ||
136 | |||
110 | void bio_init(struct bio *bio) | 137 | void bio_init(struct bio *bio) |
111 | { | 138 | { |
112 | memset(bio, 0, sizeof(*bio)); | 139 | memset(bio, 0, sizeof(*bio)); |
113 | bio->bi_flags = 1 << BIO_UPTODATE; | 140 | bio->bi_flags = 1 << BIO_UPTODATE; |
141 | bio->bi_comp_cpu = -1; | ||
114 | atomic_set(&bio->bi_cnt, 1); | 142 | atomic_set(&bio->bi_cnt, 1); |
115 | } | 143 | } |
116 | 144 | ||
@@ -118,19 +146,25 @@ void bio_init(struct bio *bio) | |||
118 | * bio_alloc_bioset - allocate a bio for I/O | 146 | * bio_alloc_bioset - allocate a bio for I/O |
119 | * @gfp_mask: the GFP_ mask given to the slab allocator | 147 | * @gfp_mask: the GFP_ mask given to the slab allocator |
120 | * @nr_iovecs: number of iovecs to pre-allocate | 148 | * @nr_iovecs: number of iovecs to pre-allocate |
121 | * @bs: the bio_set to allocate from | 149 | * @bs: the bio_set to allocate from. If %NULL, just use kmalloc |
122 | * | 150 | * |
123 | * Description: | 151 | * Description: |
124 | * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. | 152 | * bio_alloc_bioset will first try its own mempool to satisfy the allocation. |
125 | * If %__GFP_WAIT is set then we will block on the internal pool waiting | 153 | * If %__GFP_WAIT is set then we will block on the internal pool waiting |
126 | * for a &struct bio to become free. | 154 | * for a &struct bio to become free. If a %NULL @bs is passed in, we will |
155 | * fall back to just using @kmalloc to allocate the required memory. | ||
127 | * | 156 | * |
128 | * allocate bio and iovecs from the memory pools specified by the | 157 | * allocate bio and iovecs from the memory pools specified by the |
129 | * bio_set structure. | 158 | * bio_set structure, or @kmalloc if none given. |
130 | **/ | 159 | **/ |
131 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | 160 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
132 | { | 161 | { |
133 | struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); | 162 | struct bio *bio; |
163 | |||
164 | if (bs) | ||
165 | bio = mempool_alloc(bs->bio_pool, gfp_mask); | ||
166 | else | ||
167 | bio = kmalloc(sizeof(*bio), gfp_mask); | ||
134 | 168 | ||
135 | if (likely(bio)) { | 169 | if (likely(bio)) { |
136 | struct bio_vec *bvl = NULL; | 170 | struct bio_vec *bvl = NULL; |
@@ -141,7 +175,10 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | |||
141 | 175 | ||
142 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); | 176 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); |
143 | if (unlikely(!bvl)) { | 177 | if (unlikely(!bvl)) { |
144 | mempool_free(bio, bs->bio_pool); | 178 | if (bs) |
179 | mempool_free(bio, bs->bio_pool); | ||
180 | else | ||
181 | kfree(bio); | ||
145 | bio = NULL; | 182 | bio = NULL; |
146 | goto out; | 183 | goto out; |
147 | } | 184 | } |
@@ -164,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) | |||
164 | return bio; | 201 | return bio; |
165 | } | 202 | } |
166 | 203 | ||
204 | /* | ||
205 | * Like bio_alloc(), but doesn't use a mempool backing. This means that | ||
206 | * it CAN fail, but while bio_alloc() can only be used for allocations | ||
207 | * that have a short (finite) life span, bio_kmalloc() should be used | ||
208 | * for more permanent bio allocations (like allocating some bio's for | ||
209 | * initalization or setup purposes). | ||
210 | */ | ||
211 | struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) | ||
212 | { | ||
213 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); | ||
214 | |||
215 | if (bio) | ||
216 | bio->bi_destructor = bio_kmalloc_destructor; | ||
217 | |||
218 | return bio; | ||
219 | } | ||
220 | |||
167 | void zero_fill_bio(struct bio *bio) | 221 | void zero_fill_bio(struct bio *bio) |
168 | { | 222 | { |
169 | unsigned long flags; | 223 | unsigned long flags; |
@@ -208,14 +262,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio) | |||
208 | return bio->bi_phys_segments; | 262 | return bio->bi_phys_segments; |
209 | } | 263 | } |
210 | 264 | ||
211 | inline int bio_hw_segments(struct request_queue *q, struct bio *bio) | ||
212 | { | ||
213 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | ||
214 | blk_recount_segments(q, bio); | ||
215 | |||
216 | return bio->bi_hw_segments; | ||
217 | } | ||
218 | |||
219 | /** | 265 | /** |
220 | * __bio_clone - clone a bio | 266 | * __bio_clone - clone a bio |
221 | * @bio: destination bio | 267 | * @bio: destination bio |
@@ -350,8 +396,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
350 | */ | 396 | */ |
351 | 397 | ||
352 | while (bio->bi_phys_segments >= q->max_phys_segments | 398 | while (bio->bi_phys_segments >= q->max_phys_segments |
353 | || bio->bi_hw_segments >= q->max_hw_segments | 399 | || bio->bi_phys_segments >= q->max_hw_segments) { |
354 | || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) { | ||
355 | 400 | ||
356 | if (retried_segments) | 401 | if (retried_segments) |
357 | return 0; | 402 | return 0; |
@@ -395,13 +440,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
395 | } | 440 | } |
396 | 441 | ||
397 | /* If we may be able to merge these biovecs, force a recount */ | 442 | /* If we may be able to merge these biovecs, force a recount */ |
398 | if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || | 443 | if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) |
399 | BIOVEC_VIRT_MERGEABLE(bvec-1, bvec))) | ||
400 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); | 444 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); |
401 | 445 | ||
402 | bio->bi_vcnt++; | 446 | bio->bi_vcnt++; |
403 | bio->bi_phys_segments++; | 447 | bio->bi_phys_segments++; |
404 | bio->bi_hw_segments++; | ||
405 | done: | 448 | done: |
406 | bio->bi_size += len; | 449 | bio->bi_size += len; |
407 | return len; | 450 | return len; |
@@ -449,16 +492,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, | |||
449 | 492 | ||
450 | struct bio_map_data { | 493 | struct bio_map_data { |
451 | struct bio_vec *iovecs; | 494 | struct bio_vec *iovecs; |
452 | int nr_sgvecs; | ||
453 | struct sg_iovec *sgvecs; | 495 | struct sg_iovec *sgvecs; |
496 | int nr_sgvecs; | ||
497 | int is_our_pages; | ||
454 | }; | 498 | }; |
455 | 499 | ||
456 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, | 500 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, |
457 | struct sg_iovec *iov, int iov_count) | 501 | struct sg_iovec *iov, int iov_count, |
502 | int is_our_pages) | ||
458 | { | 503 | { |
459 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); | 504 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); |
460 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); | 505 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); |
461 | bmd->nr_sgvecs = iov_count; | 506 | bmd->nr_sgvecs = iov_count; |
507 | bmd->is_our_pages = is_our_pages; | ||
462 | bio->bi_private = bmd; | 508 | bio->bi_private = bmd; |
463 | } | 509 | } |
464 | 510 | ||
@@ -493,7 +539,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, | |||
493 | } | 539 | } |
494 | 540 | ||
495 | static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, | 541 | static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, |
496 | struct sg_iovec *iov, int iov_count, int uncopy) | 542 | struct sg_iovec *iov, int iov_count, int uncopy, |
543 | int do_free_page) | ||
497 | { | 544 | { |
498 | int ret = 0, i; | 545 | int ret = 0, i; |
499 | struct bio_vec *bvec; | 546 | struct bio_vec *bvec; |
@@ -536,7 +583,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, | |||
536 | } | 583 | } |
537 | } | 584 | } |
538 | 585 | ||
539 | if (uncopy) | 586 | if (do_free_page) |
540 | __free_page(bvec->bv_page); | 587 | __free_page(bvec->bv_page); |
541 | } | 588 | } |
542 | 589 | ||
@@ -553,10 +600,11 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, | |||
553 | int bio_uncopy_user(struct bio *bio) | 600 | int bio_uncopy_user(struct bio *bio) |
554 | { | 601 | { |
555 | struct bio_map_data *bmd = bio->bi_private; | 602 | struct bio_map_data *bmd = bio->bi_private; |
556 | int ret; | 603 | int ret = 0; |
557 | |||
558 | ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1); | ||
559 | 604 | ||
605 | if (!bio_flagged(bio, BIO_NULL_MAPPED)) | ||
606 | ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, | ||
607 | bmd->nr_sgvecs, 1, bmd->is_our_pages); | ||
560 | bio_free_map_data(bmd); | 608 | bio_free_map_data(bmd); |
561 | bio_put(bio); | 609 | bio_put(bio); |
562 | return ret; | 610 | return ret; |
@@ -565,16 +613,20 @@ int bio_uncopy_user(struct bio *bio) | |||
565 | /** | 613 | /** |
566 | * bio_copy_user_iov - copy user data to bio | 614 | * bio_copy_user_iov - copy user data to bio |
567 | * @q: destination block queue | 615 | * @q: destination block queue |
616 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
568 | * @iov: the iovec. | 617 | * @iov: the iovec. |
569 | * @iov_count: number of elements in the iovec | 618 | * @iov_count: number of elements in the iovec |
570 | * @write_to_vm: bool indicating writing to pages or not | 619 | * @write_to_vm: bool indicating writing to pages or not |
620 | * @gfp_mask: memory allocation flags | ||
571 | * | 621 | * |
572 | * Prepares and returns a bio for indirect user io, bouncing data | 622 | * Prepares and returns a bio for indirect user io, bouncing data |
573 | * to/from kernel pages as necessary. Must be paired with | 623 | * to/from kernel pages as necessary. Must be paired with |
574 | * call bio_uncopy_user() on io completion. | 624 | * call bio_uncopy_user() on io completion. |
575 | */ | 625 | */ |
576 | struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | 626 | struct bio *bio_copy_user_iov(struct request_queue *q, |
577 | int iov_count, int write_to_vm) | 627 | struct rq_map_data *map_data, |
628 | struct sg_iovec *iov, int iov_count, | ||
629 | int write_to_vm, gfp_t gfp_mask) | ||
578 | { | 630 | { |
579 | struct bio_map_data *bmd; | 631 | struct bio_map_data *bmd; |
580 | struct bio_vec *bvec; | 632 | struct bio_vec *bvec; |
@@ -597,25 +649,38 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
597 | len += iov[i].iov_len; | 649 | len += iov[i].iov_len; |
598 | } | 650 | } |
599 | 651 | ||
600 | bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); | 652 | bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); |
601 | if (!bmd) | 653 | if (!bmd) |
602 | return ERR_PTR(-ENOMEM); | 654 | return ERR_PTR(-ENOMEM); |
603 | 655 | ||
604 | ret = -ENOMEM; | 656 | ret = -ENOMEM; |
605 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 657 | bio = bio_alloc(gfp_mask, nr_pages); |
606 | if (!bio) | 658 | if (!bio) |
607 | goto out_bmd; | 659 | goto out_bmd; |
608 | 660 | ||
609 | bio->bi_rw |= (!write_to_vm << BIO_RW); | 661 | bio->bi_rw |= (!write_to_vm << BIO_RW); |
610 | 662 | ||
611 | ret = 0; | 663 | ret = 0; |
664 | i = 0; | ||
612 | while (len) { | 665 | while (len) { |
613 | unsigned int bytes = PAGE_SIZE; | 666 | unsigned int bytes; |
667 | |||
668 | if (map_data) | ||
669 | bytes = 1U << (PAGE_SHIFT + map_data->page_order); | ||
670 | else | ||
671 | bytes = PAGE_SIZE; | ||
614 | 672 | ||
615 | if (bytes > len) | 673 | if (bytes > len) |
616 | bytes = len; | 674 | bytes = len; |
617 | 675 | ||
618 | page = alloc_page(q->bounce_gfp | GFP_KERNEL); | 676 | if (map_data) { |
677 | if (i == map_data->nr_entries) { | ||
678 | ret = -ENOMEM; | ||
679 | break; | ||
680 | } | ||
681 | page = map_data->pages[i++]; | ||
682 | } else | ||
683 | page = alloc_page(q->bounce_gfp | gfp_mask); | ||
619 | if (!page) { | 684 | if (!page) { |
620 | ret = -ENOMEM; | 685 | ret = -ENOMEM; |
621 | break; | 686 | break; |
@@ -634,16 +699,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
634 | * success | 699 | * success |
635 | */ | 700 | */ |
636 | if (!write_to_vm) { | 701 | if (!write_to_vm) { |
637 | ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); | 702 | ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); |
638 | if (ret) | 703 | if (ret) |
639 | goto cleanup; | 704 | goto cleanup; |
640 | } | 705 | } |
641 | 706 | ||
642 | bio_set_map_data(bmd, bio, iov, iov_count); | 707 | bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); |
643 | return bio; | 708 | return bio; |
644 | cleanup: | 709 | cleanup: |
645 | bio_for_each_segment(bvec, bio, i) | 710 | if (!map_data) |
646 | __free_page(bvec->bv_page); | 711 | bio_for_each_segment(bvec, bio, i) |
712 | __free_page(bvec->bv_page); | ||
647 | 713 | ||
648 | bio_put(bio); | 714 | bio_put(bio); |
649 | out_bmd: | 715 | out_bmd: |
@@ -654,29 +720,32 @@ out_bmd: | |||
654 | /** | 720 | /** |
655 | * bio_copy_user - copy user data to bio | 721 | * bio_copy_user - copy user data to bio |
656 | * @q: destination block queue | 722 | * @q: destination block queue |
723 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
657 | * @uaddr: start of user address | 724 | * @uaddr: start of user address |
658 | * @len: length in bytes | 725 | * @len: length in bytes |
659 | * @write_to_vm: bool indicating writing to pages or not | 726 | * @write_to_vm: bool indicating writing to pages or not |
727 | * @gfp_mask: memory allocation flags | ||
660 | * | 728 | * |
661 | * Prepares and returns a bio for indirect user io, bouncing data | 729 | * Prepares and returns a bio for indirect user io, bouncing data |
662 | * to/from kernel pages as necessary. Must be paired with | 730 | * to/from kernel pages as necessary. Must be paired with |
663 | * call bio_uncopy_user() on io completion. | 731 | * call bio_uncopy_user() on io completion. |
664 | */ | 732 | */ |
665 | struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, | 733 | struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, |
666 | unsigned int len, int write_to_vm) | 734 | unsigned long uaddr, unsigned int len, |
735 | int write_to_vm, gfp_t gfp_mask) | ||
667 | { | 736 | { |
668 | struct sg_iovec iov; | 737 | struct sg_iovec iov; |
669 | 738 | ||
670 | iov.iov_base = (void __user *)uaddr; | 739 | iov.iov_base = (void __user *)uaddr; |
671 | iov.iov_len = len; | 740 | iov.iov_len = len; |
672 | 741 | ||
673 | return bio_copy_user_iov(q, &iov, 1, write_to_vm); | 742 | return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); |
674 | } | 743 | } |
675 | 744 | ||
676 | static struct bio *__bio_map_user_iov(struct request_queue *q, | 745 | static struct bio *__bio_map_user_iov(struct request_queue *q, |
677 | struct block_device *bdev, | 746 | struct block_device *bdev, |
678 | struct sg_iovec *iov, int iov_count, | 747 | struct sg_iovec *iov, int iov_count, |
679 | int write_to_vm) | 748 | int write_to_vm, gfp_t gfp_mask) |
680 | { | 749 | { |
681 | int i, j; | 750 | int i, j; |
682 | int nr_pages = 0; | 751 | int nr_pages = 0; |
@@ -702,12 +771,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
702 | if (!nr_pages) | 771 | if (!nr_pages) |
703 | return ERR_PTR(-EINVAL); | 772 | return ERR_PTR(-EINVAL); |
704 | 773 | ||
705 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 774 | bio = bio_alloc(gfp_mask, nr_pages); |
706 | if (!bio) | 775 | if (!bio) |
707 | return ERR_PTR(-ENOMEM); | 776 | return ERR_PTR(-ENOMEM); |
708 | 777 | ||
709 | ret = -ENOMEM; | 778 | ret = -ENOMEM; |
710 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); | 779 | pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); |
711 | if (!pages) | 780 | if (!pages) |
712 | goto out; | 781 | goto out; |
713 | 782 | ||
@@ -786,19 +855,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
786 | * @uaddr: start of user address | 855 | * @uaddr: start of user address |
787 | * @len: length in bytes | 856 | * @len: length in bytes |
788 | * @write_to_vm: bool indicating writing to pages or not | 857 | * @write_to_vm: bool indicating writing to pages or not |
858 | * @gfp_mask: memory allocation flags | ||
789 | * | 859 | * |
790 | * Map the user space address into a bio suitable for io to a block | 860 | * Map the user space address into a bio suitable for io to a block |
791 | * device. Returns an error pointer in case of error. | 861 | * device. Returns an error pointer in case of error. |
792 | */ | 862 | */ |
793 | struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, | 863 | struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, |
794 | unsigned long uaddr, unsigned int len, int write_to_vm) | 864 | unsigned long uaddr, unsigned int len, int write_to_vm, |
865 | gfp_t gfp_mask) | ||
795 | { | 866 | { |
796 | struct sg_iovec iov; | 867 | struct sg_iovec iov; |
797 | 868 | ||
798 | iov.iov_base = (void __user *)uaddr; | 869 | iov.iov_base = (void __user *)uaddr; |
799 | iov.iov_len = len; | 870 | iov.iov_len = len; |
800 | 871 | ||
801 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); | 872 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); |
802 | } | 873 | } |
803 | 874 | ||
804 | /** | 875 | /** |
@@ -808,18 +879,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, | |||
808 | * @iov: the iovec. | 879 | * @iov: the iovec. |
809 | * @iov_count: number of elements in the iovec | 880 | * @iov_count: number of elements in the iovec |
810 | * @write_to_vm: bool indicating writing to pages or not | 881 | * @write_to_vm: bool indicating writing to pages or not |
882 | * @gfp_mask: memory allocation flags | ||
811 | * | 883 | * |
812 | * Map the user space address into a bio suitable for io to a block | 884 | * Map the user space address into a bio suitable for io to a block |
813 | * device. Returns an error pointer in case of error. | 885 | * device. Returns an error pointer in case of error. |
814 | */ | 886 | */ |
815 | struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, | 887 | struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, |
816 | struct sg_iovec *iov, int iov_count, | 888 | struct sg_iovec *iov, int iov_count, |
817 | int write_to_vm) | 889 | int write_to_vm, gfp_t gfp_mask) |
818 | { | 890 | { |
819 | struct bio *bio; | 891 | struct bio *bio; |
820 | 892 | ||
821 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); | 893 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, |
822 | 894 | gfp_mask); | |
823 | if (IS_ERR(bio)) | 895 | if (IS_ERR(bio)) |
824 | return bio; | 896 | return bio; |
825 | 897 | ||
@@ -976,48 +1048,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
976 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | 1048 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, |
977 | gfp_t gfp_mask, int reading) | 1049 | gfp_t gfp_mask, int reading) |
978 | { | 1050 | { |
979 | unsigned long kaddr = (unsigned long)data; | ||
980 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
981 | unsigned long start = kaddr >> PAGE_SHIFT; | ||
982 | const int nr_pages = end - start; | ||
983 | struct bio *bio; | 1051 | struct bio *bio; |
984 | struct bio_vec *bvec; | 1052 | struct bio_vec *bvec; |
985 | struct bio_map_data *bmd; | 1053 | int i; |
986 | int i, ret; | ||
987 | struct sg_iovec iov; | ||
988 | |||
989 | iov.iov_base = data; | ||
990 | iov.iov_len = len; | ||
991 | |||
992 | bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask); | ||
993 | if (!bmd) | ||
994 | return ERR_PTR(-ENOMEM); | ||
995 | |||
996 | ret = -ENOMEM; | ||
997 | bio = bio_alloc(gfp_mask, nr_pages); | ||
998 | if (!bio) | ||
999 | goto out_bmd; | ||
1000 | |||
1001 | while (len) { | ||
1002 | struct page *page; | ||
1003 | unsigned int bytes = PAGE_SIZE; | ||
1004 | |||
1005 | if (bytes > len) | ||
1006 | bytes = len; | ||
1007 | |||
1008 | page = alloc_page(q->bounce_gfp | gfp_mask); | ||
1009 | if (!page) { | ||
1010 | ret = -ENOMEM; | ||
1011 | goto cleanup; | ||
1012 | } | ||
1013 | |||
1014 | if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { | ||
1015 | ret = -EINVAL; | ||
1016 | goto cleanup; | ||
1017 | } | ||
1018 | 1054 | ||
1019 | len -= bytes; | 1055 | bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); |
1020 | } | 1056 | if (IS_ERR(bio)) |
1057 | return bio; | ||
1021 | 1058 | ||
1022 | if (!reading) { | 1059 | if (!reading) { |
1023 | void *p = data; | 1060 | void *p = data; |
@@ -1030,20 +1067,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | |||
1030 | } | 1067 | } |
1031 | } | 1068 | } |
1032 | 1069 | ||
1033 | bio->bi_private = bmd; | ||
1034 | bio->bi_end_io = bio_copy_kern_endio; | 1070 | bio->bi_end_io = bio_copy_kern_endio; |
1035 | 1071 | ||
1036 | bio_set_map_data(bmd, bio, &iov, 1); | ||
1037 | return bio; | 1072 | return bio; |
1038 | cleanup: | ||
1039 | bio_for_each_segment(bvec, bio, i) | ||
1040 | __free_page(bvec->bv_page); | ||
1041 | |||
1042 | bio_put(bio); | ||
1043 | out_bmd: | ||
1044 | bio_free_map_data(bmd); | ||
1045 | |||
1046 | return ERR_PTR(ret); | ||
1047 | } | 1073 | } |
1048 | 1074 | ||
1049 | /* | 1075 | /* |
@@ -1230,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err) | |||
1230 | * split a bio - only worry about a bio with a single page | 1256 | * split a bio - only worry about a bio with a single page |
1231 | * in it's iovec | 1257 | * in it's iovec |
1232 | */ | 1258 | */ |
1233 | struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | 1259 | struct bio_pair *bio_split(struct bio *bi, int first_sectors) |
1234 | { | 1260 | { |
1235 | struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); | 1261 | struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); |
1236 | 1262 | ||
1237 | if (!bp) | 1263 | if (!bp) |
1238 | return bp; | 1264 | return bp; |
@@ -1266,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
1266 | bp->bio2.bi_end_io = bio_pair_end_2; | 1292 | bp->bio2.bi_end_io = bio_pair_end_2; |
1267 | 1293 | ||
1268 | bp->bio1.bi_private = bi; | 1294 | bp->bio1.bi_private = bi; |
1269 | bp->bio2.bi_private = pool; | 1295 | bp->bio2.bi_private = bio_split_pool; |
1270 | 1296 | ||
1271 | if (bio_integrity(bi)) | 1297 | if (bio_integrity(bi)) |
1272 | bio_integrity_split(bi, bp, first_sectors); | 1298 | bio_integrity_split(bi, bp, first_sectors); |
@@ -1274,6 +1300,42 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
1274 | return bp; | 1300 | return bp; |
1275 | } | 1301 | } |
1276 | 1302 | ||
1303 | /** | ||
1304 | * bio_sector_offset - Find hardware sector offset in bio | ||
1305 | * @bio: bio to inspect | ||
1306 | * @index: bio_vec index | ||
1307 | * @offset: offset in bv_page | ||
1308 | * | ||
1309 | * Return the number of hardware sectors between beginning of bio | ||
1310 | * and an end point indicated by a bio_vec index and an offset | ||
1311 | * within that vector's page. | ||
1312 | */ | ||
1313 | sector_t bio_sector_offset(struct bio *bio, unsigned short index, | ||
1314 | unsigned int offset) | ||
1315 | { | ||
1316 | unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue); | ||
1317 | struct bio_vec *bv; | ||
1318 | sector_t sectors; | ||
1319 | int i; | ||
1320 | |||
1321 | sectors = 0; | ||
1322 | |||
1323 | if (index >= bio->bi_idx) | ||
1324 | index = bio->bi_vcnt - 1; | ||
1325 | |||
1326 | __bio_for_each_segment(bv, bio, i, 0) { | ||
1327 | if (i == index) { | ||
1328 | if (offset > bv->bv_offset) | ||
1329 | sectors += (offset - bv->bv_offset) / sector_sz; | ||
1330 | break; | ||
1331 | } | ||
1332 | |||
1333 | sectors += bv->bv_len / sector_sz; | ||
1334 | } | ||
1335 | |||
1336 | return sectors; | ||
1337 | } | ||
1338 | EXPORT_SYMBOL(bio_sector_offset); | ||
1277 | 1339 | ||
1278 | /* | 1340 | /* |
1279 | * create memory pools for biovec's in a bio_set. | 1341 | * create memory pools for biovec's in a bio_set. |
@@ -1376,6 +1438,7 @@ static int __init init_bio(void) | |||
1376 | subsys_initcall(init_bio); | 1438 | subsys_initcall(init_bio); |
1377 | 1439 | ||
1378 | EXPORT_SYMBOL(bio_alloc); | 1440 | EXPORT_SYMBOL(bio_alloc); |
1441 | EXPORT_SYMBOL(bio_kmalloc); | ||
1379 | EXPORT_SYMBOL(bio_put); | 1442 | EXPORT_SYMBOL(bio_put); |
1380 | EXPORT_SYMBOL(bio_free); | 1443 | EXPORT_SYMBOL(bio_free); |
1381 | EXPORT_SYMBOL(bio_endio); | 1444 | EXPORT_SYMBOL(bio_endio); |
@@ -1383,7 +1446,6 @@ EXPORT_SYMBOL(bio_init); | |||
1383 | EXPORT_SYMBOL(__bio_clone); | 1446 | EXPORT_SYMBOL(__bio_clone); |
1384 | EXPORT_SYMBOL(bio_clone); | 1447 | EXPORT_SYMBOL(bio_clone); |
1385 | EXPORT_SYMBOL(bio_phys_segments); | 1448 | EXPORT_SYMBOL(bio_phys_segments); |
1386 | EXPORT_SYMBOL(bio_hw_segments); | ||
1387 | EXPORT_SYMBOL(bio_add_page); | 1449 | EXPORT_SYMBOL(bio_add_page); |
1388 | EXPORT_SYMBOL(bio_add_pc_page); | 1450 | EXPORT_SYMBOL(bio_add_pc_page); |
1389 | EXPORT_SYMBOL(bio_get_nr_vecs); | 1451 | EXPORT_SYMBOL(bio_get_nr_vecs); |
@@ -1393,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern); | |||
1393 | EXPORT_SYMBOL(bio_copy_kern); | 1455 | EXPORT_SYMBOL(bio_copy_kern); |
1394 | EXPORT_SYMBOL(bio_pair_release); | 1456 | EXPORT_SYMBOL(bio_pair_release); |
1395 | EXPORT_SYMBOL(bio_split); | 1457 | EXPORT_SYMBOL(bio_split); |
1396 | EXPORT_SYMBOL(bio_split_pool); | ||
1397 | EXPORT_SYMBOL(bio_copy_user); | 1458 | EXPORT_SYMBOL(bio_copy_user); |
1398 | EXPORT_SYMBOL(bio_uncopy_user); | 1459 | EXPORT_SYMBOL(bio_uncopy_user); |
1399 | EXPORT_SYMBOL(bioset_create); | 1460 | EXPORT_SYMBOL(bioset_create); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index aff54219e049..d84f0469a016 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -540,22 +540,6 @@ EXPORT_SYMBOL(bd_release); | |||
540 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 | 540 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 |
541 | */ | 541 | */ |
542 | 542 | ||
543 | static struct kobject *bdev_get_kobj(struct block_device *bdev) | ||
544 | { | ||
545 | if (bdev->bd_contains != bdev) | ||
546 | return kobject_get(&bdev->bd_part->dev.kobj); | ||
547 | else | ||
548 | return kobject_get(&bdev->bd_disk->dev.kobj); | ||
549 | } | ||
550 | |||
551 | static struct kobject *bdev_get_holder(struct block_device *bdev) | ||
552 | { | ||
553 | if (bdev->bd_contains != bdev) | ||
554 | return kobject_get(bdev->bd_part->holder_dir); | ||
555 | else | ||
556 | return kobject_get(bdev->bd_disk->holder_dir); | ||
557 | } | ||
558 | |||
559 | static int add_symlink(struct kobject *from, struct kobject *to) | 543 | static int add_symlink(struct kobject *from, struct kobject *to) |
560 | { | 544 | { |
561 | if (!from || !to) | 545 | if (!from || !to) |
@@ -604,11 +588,11 @@ static int bd_holder_grab_dirs(struct block_device *bdev, | |||
604 | if (!bo->hdev) | 588 | if (!bo->hdev) |
605 | goto fail_put_sdir; | 589 | goto fail_put_sdir; |
606 | 590 | ||
607 | bo->sdev = bdev_get_kobj(bdev); | 591 | bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); |
608 | if (!bo->sdev) | 592 | if (!bo->sdev) |
609 | goto fail_put_hdev; | 593 | goto fail_put_hdev; |
610 | 594 | ||
611 | bo->hdir = bdev_get_holder(bdev); | 595 | bo->hdir = kobject_get(bdev->bd_part->holder_dir); |
612 | if (!bo->hdir) | 596 | if (!bo->hdir) |
613 | goto fail_put_sdev; | 597 | goto fail_put_sdev; |
614 | 598 | ||
@@ -868,6 +852,87 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) | |||
868 | 852 | ||
869 | EXPORT_SYMBOL(open_by_devnum); | 853 | EXPORT_SYMBOL(open_by_devnum); |
870 | 854 | ||
855 | /** | ||
856 | * flush_disk - invalidates all buffer-cache entries on a disk | ||
857 | * | ||
858 | * @bdev: struct block device to be flushed | ||
859 | * | ||
860 | * Invalidates all buffer-cache entries on a disk. It should be called | ||
861 | * when a disk has been changed -- either by a media change or online | ||
862 | * resize. | ||
863 | */ | ||
864 | static void flush_disk(struct block_device *bdev) | ||
865 | { | ||
866 | if (__invalidate_device(bdev)) { | ||
867 | char name[BDEVNAME_SIZE] = ""; | ||
868 | |||
869 | if (bdev->bd_disk) | ||
870 | disk_name(bdev->bd_disk, 0, name); | ||
871 | printk(KERN_WARNING "VFS: busy inodes on changed media or " | ||
872 | "resized disk %s\n", name); | ||
873 | } | ||
874 | |||
875 | if (!bdev->bd_disk) | ||
876 | return; | ||
877 | if (disk_partitionable(bdev->bd_disk)) | ||
878 | bdev->bd_invalidated = 1; | ||
879 | } | ||
880 | |||
881 | /** | ||
882 | * check_disk_size_change - checks for disk size change and adjusts bdev size. | ||
883 | * @disk: struct gendisk to check | ||
884 | * @bdev: struct bdev to adjust. | ||
885 | * | ||
886 | * This routine checks to see if the bdev size does not match the disk size | ||
887 | * and adjusts it if it differs. | ||
888 | */ | ||
889 | void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) | ||
890 | { | ||
891 | loff_t disk_size, bdev_size; | ||
892 | |||
893 | disk_size = (loff_t)get_capacity(disk) << 9; | ||
894 | bdev_size = i_size_read(bdev->bd_inode); | ||
895 | if (disk_size != bdev_size) { | ||
896 | char name[BDEVNAME_SIZE]; | ||
897 | |||
898 | disk_name(disk, 0, name); | ||
899 | printk(KERN_INFO | ||
900 | "%s: detected capacity change from %lld to %lld\n", | ||
901 | name, bdev_size, disk_size); | ||
902 | i_size_write(bdev->bd_inode, disk_size); | ||
903 | flush_disk(bdev); | ||
904 | } | ||
905 | } | ||
906 | EXPORT_SYMBOL(check_disk_size_change); | ||
907 | |||
908 | /** | ||
909 | * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back | ||
910 | * @disk: struct gendisk to be revalidated | ||
911 | * | ||
912 | * This routine is a wrapper for lower-level driver's revalidate_disk | ||
913 | * call-backs. It is used to do common pre and post operations needed | ||
914 | * for all revalidate_disk operations. | ||
915 | */ | ||
916 | int revalidate_disk(struct gendisk *disk) | ||
917 | { | ||
918 | struct block_device *bdev; | ||
919 | int ret = 0; | ||
920 | |||
921 | if (disk->fops->revalidate_disk) | ||
922 | ret = disk->fops->revalidate_disk(disk); | ||
923 | |||
924 | bdev = bdget_disk(disk, 0); | ||
925 | if (!bdev) | ||
926 | return ret; | ||
927 | |||
928 | mutex_lock(&bdev->bd_mutex); | ||
929 | check_disk_size_change(disk, bdev); | ||
930 | mutex_unlock(&bdev->bd_mutex); | ||
931 | bdput(bdev); | ||
932 | return ret; | ||
933 | } | ||
934 | EXPORT_SYMBOL(revalidate_disk); | ||
935 | |||
871 | /* | 936 | /* |
872 | * This routine checks whether a removable media has been changed, | 937 | * This routine checks whether a removable media has been changed, |
873 | * and invalidates all buffer-cache-entries in that case. This | 938 | * and invalidates all buffer-cache-entries in that case. This |
@@ -887,13 +952,9 @@ int check_disk_change(struct block_device *bdev) | |||
887 | if (!bdops->media_changed(bdev->bd_disk)) | 952 | if (!bdops->media_changed(bdev->bd_disk)) |
888 | return 0; | 953 | return 0; |
889 | 954 | ||
890 | if (__invalidate_device(bdev)) | 955 | flush_disk(bdev); |
891 | printk("VFS: busy inodes on changed media.\n"); | ||
892 | |||
893 | if (bdops->revalidate_disk) | 956 | if (bdops->revalidate_disk) |
894 | bdops->revalidate_disk(bdev->bd_disk); | 957 | bdops->revalidate_disk(bdev->bd_disk); |
895 | if (bdev->bd_disk->minors > 1) | ||
896 | bdev->bd_invalidated = 1; | ||
897 | return 1; | 958 | return 1; |
898 | } | 959 | } |
899 | 960 | ||
@@ -927,10 +988,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part); | |||
927 | 988 | ||
928 | static int do_open(struct block_device *bdev, struct file *file, int for_part) | 989 | static int do_open(struct block_device *bdev, struct file *file, int for_part) |
929 | { | 990 | { |
930 | struct module *owner = NULL; | ||
931 | struct gendisk *disk; | 991 | struct gendisk *disk; |
992 | struct hd_struct *part = NULL; | ||
932 | int ret; | 993 | int ret; |
933 | int part; | 994 | int partno; |
934 | int perm = 0; | 995 | int perm = 0; |
935 | 996 | ||
936 | if (file->f_mode & FMODE_READ) | 997 | if (file->f_mode & FMODE_READ) |
@@ -948,25 +1009,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
948 | 1009 | ||
949 | ret = -ENXIO; | 1010 | ret = -ENXIO; |
950 | file->f_mapping = bdev->bd_inode->i_mapping; | 1011 | file->f_mapping = bdev->bd_inode->i_mapping; |
1012 | |||
951 | lock_kernel(); | 1013 | lock_kernel(); |
952 | disk = get_gendisk(bdev->bd_dev, &part); | 1014 | |
953 | if (!disk) { | 1015 | disk = get_gendisk(bdev->bd_dev, &partno); |
954 | unlock_kernel(); | 1016 | if (!disk) |
955 | bdput(bdev); | 1017 | goto out_unlock_kernel; |
956 | return ret; | 1018 | part = disk_get_part(disk, partno); |
957 | } | 1019 | if (!part) |
958 | owner = disk->fops->owner; | 1020 | goto out_unlock_kernel; |
959 | 1021 | ||
960 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1022 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
961 | if (!bdev->bd_openers) { | 1023 | if (!bdev->bd_openers) { |
962 | bdev->bd_disk = disk; | 1024 | bdev->bd_disk = disk; |
1025 | bdev->bd_part = part; | ||
963 | bdev->bd_contains = bdev; | 1026 | bdev->bd_contains = bdev; |
964 | if (!part) { | 1027 | if (!partno) { |
965 | struct backing_dev_info *bdi; | 1028 | struct backing_dev_info *bdi; |
966 | if (disk->fops->open) { | 1029 | if (disk->fops->open) { |
967 | ret = disk->fops->open(bdev->bd_inode, file); | 1030 | ret = disk->fops->open(bdev->bd_inode, file); |
968 | if (ret) | 1031 | if (ret) |
969 | goto out_first; | 1032 | goto out_clear; |
970 | } | 1033 | } |
971 | if (!bdev->bd_openers) { | 1034 | if (!bdev->bd_openers) { |
972 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1035 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
@@ -978,36 +1041,36 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
978 | if (bdev->bd_invalidated) | 1041 | if (bdev->bd_invalidated) |
979 | rescan_partitions(disk, bdev); | 1042 | rescan_partitions(disk, bdev); |
980 | } else { | 1043 | } else { |
981 | struct hd_struct *p; | ||
982 | struct block_device *whole; | 1044 | struct block_device *whole; |
983 | whole = bdget_disk(disk, 0); | 1045 | whole = bdget_disk(disk, 0); |
984 | ret = -ENOMEM; | 1046 | ret = -ENOMEM; |
985 | if (!whole) | 1047 | if (!whole) |
986 | goto out_first; | 1048 | goto out_clear; |
987 | BUG_ON(for_part); | 1049 | BUG_ON(for_part); |
988 | ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); | 1050 | ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); |
989 | if (ret) | 1051 | if (ret) |
990 | goto out_first; | 1052 | goto out_clear; |
991 | bdev->bd_contains = whole; | 1053 | bdev->bd_contains = whole; |
992 | p = disk->part[part - 1]; | ||
993 | bdev->bd_inode->i_data.backing_dev_info = | 1054 | bdev->bd_inode->i_data.backing_dev_info = |
994 | whole->bd_inode->i_data.backing_dev_info; | 1055 | whole->bd_inode->i_data.backing_dev_info; |
995 | if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { | 1056 | if (!(disk->flags & GENHD_FL_UP) || |
1057 | !part || !part->nr_sects) { | ||
996 | ret = -ENXIO; | 1058 | ret = -ENXIO; |
997 | goto out_first; | 1059 | goto out_clear; |
998 | } | 1060 | } |
999 | kobject_get(&p->dev.kobj); | 1061 | bd_set_size(bdev, (loff_t)part->nr_sects << 9); |
1000 | bdev->bd_part = p; | ||
1001 | bd_set_size(bdev, (loff_t) p->nr_sects << 9); | ||
1002 | } | 1062 | } |
1003 | } else { | 1063 | } else { |
1064 | disk_put_part(part); | ||
1004 | put_disk(disk); | 1065 | put_disk(disk); |
1005 | module_put(owner); | 1066 | module_put(disk->fops->owner); |
1067 | part = NULL; | ||
1068 | disk = NULL; | ||
1006 | if (bdev->bd_contains == bdev) { | 1069 | if (bdev->bd_contains == bdev) { |
1007 | if (bdev->bd_disk->fops->open) { | 1070 | if (bdev->bd_disk->fops->open) { |
1008 | ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); | 1071 | ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); |
1009 | if (ret) | 1072 | if (ret) |
1010 | goto out; | 1073 | goto out_unlock_bdev; |
1011 | } | 1074 | } |
1012 | if (bdev->bd_invalidated) | 1075 | if (bdev->bd_invalidated) |
1013 | rescan_partitions(bdev->bd_disk, bdev); | 1076 | rescan_partitions(bdev->bd_disk, bdev); |
@@ -1020,19 +1083,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1020 | unlock_kernel(); | 1083 | unlock_kernel(); |
1021 | return 0; | 1084 | return 0; |
1022 | 1085 | ||
1023 | out_first: | 1086 | out_clear: |
1024 | bdev->bd_disk = NULL; | 1087 | bdev->bd_disk = NULL; |
1088 | bdev->bd_part = NULL; | ||
1025 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1089 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
1026 | if (bdev != bdev->bd_contains) | 1090 | if (bdev != bdev->bd_contains) |
1027 | __blkdev_put(bdev->bd_contains, 1); | 1091 | __blkdev_put(bdev->bd_contains, 1); |
1028 | bdev->bd_contains = NULL; | 1092 | bdev->bd_contains = NULL; |
1029 | put_disk(disk); | 1093 | out_unlock_bdev: |
1030 | module_put(owner); | ||
1031 | out: | ||
1032 | mutex_unlock(&bdev->bd_mutex); | 1094 | mutex_unlock(&bdev->bd_mutex); |
1095 | out_unlock_kernel: | ||
1033 | unlock_kernel(); | 1096 | unlock_kernel(); |
1034 | if (ret) | 1097 | |
1035 | bdput(bdev); | 1098 | disk_put_part(part); |
1099 | if (disk) | ||
1100 | module_put(disk->fops->owner); | ||
1101 | put_disk(disk); | ||
1102 | bdput(bdev); | ||
1103 | |||
1036 | return ret; | 1104 | return ret; |
1037 | } | 1105 | } |
1038 | 1106 | ||
@@ -1117,11 +1185,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part) | |||
1117 | 1185 | ||
1118 | put_disk(disk); | 1186 | put_disk(disk); |
1119 | module_put(owner); | 1187 | module_put(owner); |
1120 | 1188 | disk_put_part(bdev->bd_part); | |
1121 | if (bdev->bd_contains != bdev) { | 1189 | bdev->bd_part = NULL; |
1122 | kobject_put(&bdev->bd_part->dev.kobj); | ||
1123 | bdev->bd_part = NULL; | ||
1124 | } | ||
1125 | bdev->bd_disk = NULL; | 1190 | bdev->bd_disk = NULL; |
1126 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1191 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
1127 | if (bdev != bdev->bd_contains) | 1192 | if (bdev != bdev->bd_contains) |
@@ -1197,10 +1262,9 @@ EXPORT_SYMBOL(ioctl_by_bdev); | |||
1197 | 1262 | ||
1198 | /** | 1263 | /** |
1199 | * lookup_bdev - lookup a struct block_device by name | 1264 | * lookup_bdev - lookup a struct block_device by name |
1265 | * @pathname: special file representing the block device | ||
1200 | * | 1266 | * |
1201 | * @path: special file representing the block device | 1267 | * Get a reference to the blockdevice at @pathname in the current |
1202 | * | ||
1203 | * Get a reference to the blockdevice at @path in the current | ||
1204 | * namespace if possible and return it. Return ERR_PTR(error) | 1268 | * namespace if possible and return it. Return ERR_PTR(error) |
1205 | * otherwise. | 1269 | * otherwise. |
1206 | */ | 1270 | */ |
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 117ef4bba68e..fcee9298b620 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
@@ -66,11 +66,28 @@ struct key_type cifs_spnego_key_type = { | |||
66 | .describe = user_describe, | 66 | .describe = user_describe, |
67 | }; | 67 | }; |
68 | 68 | ||
69 | #define MAX_VER_STR_LEN 8 /* length of longest version string e.g. | 69 | /* length of longest version string e.g. strlen("ver=0xFF") */ |
70 | strlen("ver=0xFF") */ | 70 | #define MAX_VER_STR_LEN 8 |
71 | #define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg | 71 | |
72 | in future could have strlen(";sec=ntlmsspi") */ | 72 | /* length of longest security mechanism name, eg in future could have |
73 | #define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ | 73 | * strlen(";sec=ntlmsspi") */ |
74 | #define MAX_MECH_STR_LEN 13 | ||
75 | |||
76 | /* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ | ||
77 | #define MAX_IPV6_ADDR_LEN 42 | ||
78 | |||
79 | /* strlen of "host=" */ | ||
80 | #define HOST_KEY_LEN 5 | ||
81 | |||
82 | /* strlen of ";ip4=" or ";ip6=" */ | ||
83 | #define IP_KEY_LEN 5 | ||
84 | |||
85 | /* strlen of ";uid=0x" */ | ||
86 | #define UID_KEY_LEN 7 | ||
87 | |||
88 | /* strlen of ";user=" */ | ||
89 | #define USER_KEY_LEN 6 | ||
90 | |||
74 | /* get a key struct with a SPNEGO security blob, suitable for session setup */ | 91 | /* get a key struct with a SPNEGO security blob, suitable for session setup */ |
75 | struct key * | 92 | struct key * |
76 | cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | 93 | cifs_get_spnego_key(struct cifsSesInfo *sesInfo) |
@@ -84,11 +101,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
84 | /* length of fields (with semicolons): ver=0xyz ip4=ipaddress | 101 | /* length of fields (with semicolons): ver=0xyz ip4=ipaddress |
85 | host=hostname sec=mechanism uid=0xFF user=username */ | 102 | host=hostname sec=mechanism uid=0xFF user=username */ |
86 | desc_len = MAX_VER_STR_LEN + | 103 | desc_len = MAX_VER_STR_LEN + |
87 | 6 /* len of "host=" */ + strlen(hostname) + | 104 | HOST_KEY_LEN + strlen(hostname) + |
88 | 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN + | 105 | IP_KEY_LEN + MAX_IPV6_ADDR_LEN + |
89 | MAX_MECH_STR_LEN + | 106 | MAX_MECH_STR_LEN + |
90 | 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) + | 107 | UID_KEY_LEN + (sizeof(uid_t) * 2) + |
91 | 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1; | 108 | USER_KEY_LEN + strlen(sesInfo->userName) + 1; |
92 | 109 | ||
93 | spnego_key = ERR_PTR(-ENOMEM); | 110 | spnego_key = ERR_PTR(-ENOMEM); |
94 | description = kzalloc(desc_len, GFP_KERNEL); | 111 | description = kzalloc(desc_len, GFP_KERNEL); |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 135c965c4137..f7b4a5cd837b 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -41,7 +41,7 @@ extern int cifs_create(struct inode *, struct dentry *, int, | |||
41 | struct nameidata *); | 41 | struct nameidata *); |
42 | extern struct dentry *cifs_lookup(struct inode *, struct dentry *, | 42 | extern struct dentry *cifs_lookup(struct inode *, struct dentry *, |
43 | struct nameidata *); | 43 | struct nameidata *); |
44 | extern int cifs_unlink(struct inode *, struct dentry *); | 44 | extern int cifs_unlink(struct inode *dir, struct dentry *dentry); |
45 | extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); | 45 | extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); |
46 | extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t); | 46 | extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t); |
47 | extern int cifs_mkdir(struct inode *, struct dentry *, int); | 47 | extern int cifs_mkdir(struct inode *, struct dentry *, int); |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 8dfd6f24d488..0d22479d99b7 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -309,6 +309,7 @@ struct cifs_search_info { | |||
309 | __u32 resume_key; | 309 | __u32 resume_key; |
310 | char *ntwrk_buf_start; | 310 | char *ntwrk_buf_start; |
311 | char *srch_entries_start; | 311 | char *srch_entries_start; |
312 | char *last_entry; | ||
312 | char *presume_name; | 313 | char *presume_name; |
313 | unsigned int resume_name_len; | 314 | unsigned int resume_name_len; |
314 | bool endOfSearch:1; | 315 | bool endOfSearch:1; |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a729d083e6f4..0cff7fe986e8 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -179,6 +179,8 @@ extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, | |||
179 | extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, | 179 | extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, |
180 | const FILE_BASIC_INFO *data, __u16 fid, | 180 | const FILE_BASIC_INFO *data, __u16 fid, |
181 | __u32 pid_of_opener); | 181 | __u32 pid_of_opener); |
182 | extern int CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, | ||
183 | bool delete_file, __u16 fid, __u32 pid_of_opener); | ||
182 | #if 0 | 184 | #if 0 |
183 | extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, | 185 | extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, |
184 | char *fileName, __u16 dos_attributes, | 186 | char *fileName, __u16 dos_attributes, |
@@ -229,7 +231,7 @@ extern int CIFSSMBRename(const int xid, struct cifsTconInfo *tcon, | |||
229 | const struct nls_table *nls_codepage, | 231 | const struct nls_table *nls_codepage, |
230 | int remap_special_chars); | 232 | int remap_special_chars); |
231 | extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | 233 | extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, |
232 | int netfid, char *target_name, | 234 | int netfid, const char *target_name, |
233 | const struct nls_table *nls_codepage, | 235 | const struct nls_table *nls_codepage, |
234 | int remap_special_chars); | 236 | int remap_special_chars); |
235 | extern int CIFSCreateHardLink(const int xid, | 237 | extern int CIFSCreateHardLink(const int xid, |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 994de7c90474..6f4ffe15d68d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -2017,7 +2017,7 @@ renameRetry: | |||
2017 | } | 2017 | } |
2018 | 2018 | ||
2019 | int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | 2019 | int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, |
2020 | int netfid, char *target_name, | 2020 | int netfid, const char *target_name, |
2021 | const struct nls_table *nls_codepage, int remap) | 2021 | const struct nls_table *nls_codepage, int remap) |
2022 | { | 2022 | { |
2023 | struct smb_com_transaction2_sfi_req *pSMB = NULL; | 2023 | struct smb_com_transaction2_sfi_req *pSMB = NULL; |
@@ -2071,7 +2071,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | |||
2071 | remap); | 2071 | remap); |
2072 | } | 2072 | } |
2073 | rename_info->target_name_len = cpu_to_le32(2 * len_of_str); | 2073 | rename_info->target_name_len = cpu_to_le32(2 * len_of_str); |
2074 | count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str) + 2; | 2074 | count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str); |
2075 | byte_count += count; | 2075 | byte_count += count; |
2076 | pSMB->DataCount = cpu_to_le16(count); | 2076 | pSMB->DataCount = cpu_to_le16(count); |
2077 | pSMB->TotalDataCount = pSMB->DataCount; | 2077 | pSMB->TotalDataCount = pSMB->DataCount; |
@@ -3614,6 +3614,8 @@ findFirstRetry: | |||
3614 | /* BB remember to free buffer if error BB */ | 3614 | /* BB remember to free buffer if error BB */ |
3615 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 3615 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
3616 | if (rc == 0) { | 3616 | if (rc == 0) { |
3617 | unsigned int lnoff; | ||
3618 | |||
3617 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) | 3619 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) |
3618 | psrch_inf->unicode = true; | 3620 | psrch_inf->unicode = true; |
3619 | else | 3621 | else |
@@ -3636,6 +3638,17 @@ findFirstRetry: | |||
3636 | le16_to_cpu(parms->SearchCount); | 3638 | le16_to_cpu(parms->SearchCount); |
3637 | psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + | 3639 | psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + |
3638 | psrch_inf->entries_in_buffer; | 3640 | psrch_inf->entries_in_buffer; |
3641 | lnoff = le16_to_cpu(parms->LastNameOffset); | ||
3642 | if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < | ||
3643 | lnoff) { | ||
3644 | cERROR(1, ("ignoring corrupt resume name")); | ||
3645 | psrch_inf->last_entry = NULL; | ||
3646 | return rc; | ||
3647 | } | ||
3648 | |||
3649 | psrch_inf->last_entry = psrch_inf->srch_entries_start + | ||
3650 | lnoff; | ||
3651 | |||
3639 | *pnetfid = parms->SearchHandle; | 3652 | *pnetfid = parms->SearchHandle; |
3640 | } else { | 3653 | } else { |
3641 | cifs_buf_release(pSMB); | 3654 | cifs_buf_release(pSMB); |
@@ -3725,6 +3738,8 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, | |||
3725 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 3738 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
3726 | 3739 | ||
3727 | if (rc == 0) { | 3740 | if (rc == 0) { |
3741 | unsigned int lnoff; | ||
3742 | |||
3728 | /* BB fixme add lock for file (srch_info) struct here */ | 3743 | /* BB fixme add lock for file (srch_info) struct here */ |
3729 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) | 3744 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) |
3730 | psrch_inf->unicode = true; | 3745 | psrch_inf->unicode = true; |
@@ -3751,6 +3766,16 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, | |||
3751 | le16_to_cpu(parms->SearchCount); | 3766 | le16_to_cpu(parms->SearchCount); |
3752 | psrch_inf->index_of_last_entry += | 3767 | psrch_inf->index_of_last_entry += |
3753 | psrch_inf->entries_in_buffer; | 3768 | psrch_inf->entries_in_buffer; |
3769 | lnoff = le16_to_cpu(parms->LastNameOffset); | ||
3770 | if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < | ||
3771 | lnoff) { | ||
3772 | cERROR(1, ("ignoring corrupt resume name")); | ||
3773 | psrch_inf->last_entry = NULL; | ||
3774 | return rc; | ||
3775 | } else | ||
3776 | psrch_inf->last_entry = | ||
3777 | psrch_inf->srch_entries_start + lnoff; | ||
3778 | |||
3754 | /* cFYI(1,("fnxt2 entries in buf %d index_of_last %d", | 3779 | /* cFYI(1,("fnxt2 entries in buf %d index_of_last %d", |
3755 | psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */ | 3780 | psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */ |
3756 | 3781 | ||
@@ -4876,6 +4901,61 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, | |||
4876 | return rc; | 4901 | return rc; |
4877 | } | 4902 | } |
4878 | 4903 | ||
4904 | int | ||
4905 | CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, | ||
4906 | bool delete_file, __u16 fid, __u32 pid_of_opener) | ||
4907 | { | ||
4908 | struct smb_com_transaction2_sfi_req *pSMB = NULL; | ||
4909 | char *data_offset; | ||
4910 | int rc = 0; | ||
4911 | __u16 params, param_offset, offset, byte_count, count; | ||
4912 | |||
4913 | cFYI(1, ("Set File Disposition (via SetFileInfo)")); | ||
4914 | rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); | ||
4915 | |||
4916 | if (rc) | ||
4917 | return rc; | ||
4918 | |||
4919 | pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); | ||
4920 | pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); | ||
4921 | |||
4922 | params = 6; | ||
4923 | pSMB->MaxSetupCount = 0; | ||
4924 | pSMB->Reserved = 0; | ||
4925 | pSMB->Flags = 0; | ||
4926 | pSMB->Timeout = 0; | ||
4927 | pSMB->Reserved2 = 0; | ||
4928 | param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; | ||
4929 | offset = param_offset + params; | ||
4930 | |||
4931 | data_offset = (char *) (&pSMB->hdr.Protocol) + offset; | ||
4932 | |||
4933 | count = 1; | ||
4934 | pSMB->MaxParameterCount = cpu_to_le16(2); | ||
4935 | /* BB find max SMB PDU from sess */ | ||
4936 | pSMB->MaxDataCount = cpu_to_le16(1000); | ||
4937 | pSMB->SetupCount = 1; | ||
4938 | pSMB->Reserved3 = 0; | ||
4939 | pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); | ||
4940 | byte_count = 3 /* pad */ + params + count; | ||
4941 | pSMB->DataCount = cpu_to_le16(count); | ||
4942 | pSMB->ParameterCount = cpu_to_le16(params); | ||
4943 | pSMB->TotalDataCount = pSMB->DataCount; | ||
4944 | pSMB->TotalParameterCount = pSMB->ParameterCount; | ||
4945 | pSMB->ParameterOffset = cpu_to_le16(param_offset); | ||
4946 | pSMB->DataOffset = cpu_to_le16(offset); | ||
4947 | pSMB->Fid = fid; | ||
4948 | pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO); | ||
4949 | pSMB->Reserved4 = 0; | ||
4950 | pSMB->hdr.smb_buf_length += byte_count; | ||
4951 | pSMB->ByteCount = cpu_to_le16(byte_count); | ||
4952 | *data_offset = delete_file ? 1 : 0; | ||
4953 | rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); | ||
4954 | if (rc) | ||
4955 | cFYI(1, ("Send error in SetFileDisposition = %d", rc)); | ||
4956 | |||
4957 | return rc; | ||
4958 | } | ||
4879 | 4959 | ||
4880 | int | 4960 | int |
4881 | CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, | 4961 | CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, |
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index a2e0673e1b08..1e0c1bd8f2e4 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c | |||
@@ -29,19 +29,55 @@ | |||
29 | #include "cifsproto.h" | 29 | #include "cifsproto.h" |
30 | #include "cifs_debug.h" | 30 | #include "cifs_debug.h" |
31 | 31 | ||
32 | static int dns_resolver_instantiate(struct key *key, const void *data, | 32 | /* Checks if supplied name is IP address |
33 | * returns: | ||
34 | * 1 - name is IP | ||
35 | * 0 - name is not IP | ||
36 | */ | ||
37 | static int | ||
38 | is_ip(const char *name) | ||
39 | { | ||
40 | int rc; | ||
41 | struct sockaddr_in sin_server; | ||
42 | struct sockaddr_in6 sin_server6; | ||
43 | |||
44 | rc = cifs_inet_pton(AF_INET, name, | ||
45 | &sin_server.sin_addr.s_addr); | ||
46 | |||
47 | if (rc <= 0) { | ||
48 | /* not ipv4 address, try ipv6 */ | ||
49 | rc = cifs_inet_pton(AF_INET6, name, | ||
50 | &sin_server6.sin6_addr.in6_u); | ||
51 | if (rc > 0) | ||
52 | return 1; | ||
53 | } else { | ||
54 | return 1; | ||
55 | } | ||
56 | /* we failed translating address */ | ||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | static int | ||
61 | dns_resolver_instantiate(struct key *key, const void *data, | ||
33 | size_t datalen) | 62 | size_t datalen) |
34 | { | 63 | { |
35 | int rc = 0; | 64 | int rc = 0; |
36 | char *ip; | 65 | char *ip; |
37 | 66 | ||
38 | ip = kmalloc(datalen+1, GFP_KERNEL); | 67 | ip = kmalloc(datalen + 1, GFP_KERNEL); |
39 | if (!ip) | 68 | if (!ip) |
40 | return -ENOMEM; | 69 | return -ENOMEM; |
41 | 70 | ||
42 | memcpy(ip, data, datalen); | 71 | memcpy(ip, data, datalen); |
43 | ip[datalen] = '\0'; | 72 | ip[datalen] = '\0'; |
44 | 73 | ||
74 | /* make sure this looks like an address */ | ||
75 | if (!is_ip((const char *) ip)) { | ||
76 | kfree(ip); | ||
77 | return -EINVAL; | ||
78 | } | ||
79 | |||
80 | key->type_data.x[0] = datalen; | ||
45 | rcu_assign_pointer(key->payload.data, ip); | 81 | rcu_assign_pointer(key->payload.data, ip); |
46 | 82 | ||
47 | return rc; | 83 | return rc; |
@@ -62,33 +98,6 @@ struct key_type key_type_dns_resolver = { | |||
62 | .match = user_match, | 98 | .match = user_match, |
63 | }; | 99 | }; |
64 | 100 | ||
65 | /* Checks if supplied name is IP address | ||
66 | * returns: | ||
67 | * 1 - name is IP | ||
68 | * 0 - name is not IP | ||
69 | */ | ||
70 | static int is_ip(const char *name) | ||
71 | { | ||
72 | int rc; | ||
73 | struct sockaddr_in sin_server; | ||
74 | struct sockaddr_in6 sin_server6; | ||
75 | |||
76 | rc = cifs_inet_pton(AF_INET, name, | ||
77 | &sin_server.sin_addr.s_addr); | ||
78 | |||
79 | if (rc <= 0) { | ||
80 | /* not ipv4 address, try ipv6 */ | ||
81 | rc = cifs_inet_pton(AF_INET6, name, | ||
82 | &sin_server6.sin6_addr.in6_u); | ||
83 | if (rc > 0) | ||
84 | return 1; | ||
85 | } else { | ||
86 | return 1; | ||
87 | } | ||
88 | /* we failed translating address */ | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | /* Resolves server name to ip address. | 101 | /* Resolves server name to ip address. |
93 | * input: | 102 | * input: |
94 | * unc - server UNC | 103 | * unc - server UNC |
@@ -140,6 +149,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | |||
140 | 149 | ||
141 | rkey = request_key(&key_type_dns_resolver, name, ""); | 150 | rkey = request_key(&key_type_dns_resolver, name, ""); |
142 | if (!IS_ERR(rkey)) { | 151 | if (!IS_ERR(rkey)) { |
152 | len = rkey->type_data.x[0]; | ||
143 | data = rkey->payload.data; | 153 | data = rkey->payload.data; |
144 | } else { | 154 | } else { |
145 | cERROR(1, ("%s: unable to resolve: %s", __func__, name)); | 155 | cERROR(1, ("%s: unable to resolve: %s", __func__, name)); |
@@ -148,11 +158,9 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | |||
148 | 158 | ||
149 | skip_upcall: | 159 | skip_upcall: |
150 | if (data) { | 160 | if (data) { |
151 | len = strlen(data); | 161 | *ip_addr = kmalloc(len + 1, GFP_KERNEL); |
152 | *ip_addr = kmalloc(len+1, GFP_KERNEL); | ||
153 | if (*ip_addr) { | 162 | if (*ip_addr) { |
154 | memcpy(*ip_addr, data, len); | 163 | memcpy(*ip_addr, data, len + 1); |
155 | (*ip_addr)[len] = '\0'; | ||
156 | if (!IS_ERR(rkey)) | 164 | if (!IS_ERR(rkey)) |
157 | cFYI(1, ("%s: resolved: %s to %s", __func__, | 165 | cFYI(1, ("%s: resolved: %s to %s", __func__, |
158 | name, | 166 | name, |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cbefe1f1f9fe..c4a8a0605125 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -107,7 +107,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, | |||
107 | 107 | ||
108 | /* want handles we can use to read with first | 108 | /* want handles we can use to read with first |
109 | in the list so we do not have to walk the | 109 | in the list so we do not have to walk the |
110 | list to search for one in prepare_write */ | 110 | list to search for one in write_begin */ |
111 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) { | 111 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) { |
112 | list_add_tail(&pCifsFile->flist, | 112 | list_add_tail(&pCifsFile->flist, |
113 | &pCifsInode->openFileList); | 113 | &pCifsInode->openFileList); |
@@ -915,7 +915,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
915 | } | 915 | } |
916 | 916 | ||
917 | static ssize_t cifs_write(struct file *file, const char *write_data, | 917 | static ssize_t cifs_write(struct file *file, const char *write_data, |
918 | size_t write_size, loff_t *poffset) | 918 | size_t write_size, loff_t *poffset) |
919 | { | 919 | { |
920 | int rc = 0; | 920 | int rc = 0; |
921 | unsigned int bytes_written = 0; | 921 | unsigned int bytes_written = 0; |
@@ -1065,6 +1065,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode) | |||
1065 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) | 1065 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) |
1066 | { | 1066 | { |
1067 | struct cifsFileInfo *open_file; | 1067 | struct cifsFileInfo *open_file; |
1068 | bool any_available = false; | ||
1068 | int rc; | 1069 | int rc; |
1069 | 1070 | ||
1070 | /* Having a null inode here (because mapping->host was set to zero by | 1071 | /* Having a null inode here (because mapping->host was set to zero by |
@@ -1080,8 +1081,10 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) | |||
1080 | read_lock(&GlobalSMBSeslock); | 1081 | read_lock(&GlobalSMBSeslock); |
1081 | refind_writable: | 1082 | refind_writable: |
1082 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { | 1083 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { |
1083 | if (open_file->closePend) | 1084 | if (open_file->closePend || |
1085 | (!any_available && open_file->pid != current->tgid)) | ||
1084 | continue; | 1086 | continue; |
1087 | |||
1085 | if (open_file->pfile && | 1088 | if (open_file->pfile && |
1086 | ((open_file->pfile->f_flags & O_RDWR) || | 1089 | ((open_file->pfile->f_flags & O_RDWR) || |
1087 | (open_file->pfile->f_flags & O_WRONLY))) { | 1090 | (open_file->pfile->f_flags & O_WRONLY))) { |
@@ -1131,6 +1134,11 @@ refind_writable: | |||
1131 | of the loop here. */ | 1134 | of the loop here. */ |
1132 | } | 1135 | } |
1133 | } | 1136 | } |
1137 | /* couldn't find useable FH with same pid, try any available */ | ||
1138 | if (!any_available) { | ||
1139 | any_available = true; | ||
1140 | goto refind_writable; | ||
1141 | } | ||
1134 | read_unlock(&GlobalSMBSeslock); | 1142 | read_unlock(&GlobalSMBSeslock); |
1135 | return NULL; | 1143 | return NULL; |
1136 | } | 1144 | } |
@@ -1447,49 +1455,52 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc) | |||
1447 | return rc; | 1455 | return rc; |
1448 | } | 1456 | } |
1449 | 1457 | ||
1450 | static int cifs_commit_write(struct file *file, struct page *page, | 1458 | static int cifs_write_end(struct file *file, struct address_space *mapping, |
1451 | unsigned offset, unsigned to) | 1459 | loff_t pos, unsigned len, unsigned copied, |
1460 | struct page *page, void *fsdata) | ||
1452 | { | 1461 | { |
1453 | int xid; | 1462 | int rc; |
1454 | int rc = 0; | 1463 | struct inode *inode = mapping->host; |
1455 | struct inode *inode = page->mapping->host; | ||
1456 | loff_t position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; | ||
1457 | char *page_data; | ||
1458 | 1464 | ||
1459 | xid = GetXid(); | 1465 | cFYI(1, ("write_end for page %p from pos %lld with %d bytes", |
1460 | cFYI(1, ("commit write for page %p up to position %lld for %d", | 1466 | page, pos, copied)); |
1461 | page, position, to)); | 1467 | |
1462 | spin_lock(&inode->i_lock); | 1468 | if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) |
1463 | if (position > inode->i_size) | 1469 | SetPageUptodate(page); |
1464 | i_size_write(inode, position); | ||
1465 | 1470 | ||
1466 | spin_unlock(&inode->i_lock); | ||
1467 | if (!PageUptodate(page)) { | 1471 | if (!PageUptodate(page)) { |
1468 | position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + offset; | 1472 | char *page_data; |
1469 | /* can not rely on (or let) writepage write this data */ | 1473 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); |
1470 | if (to < offset) { | 1474 | int xid; |
1471 | cFYI(1, ("Illegal offsets, can not copy from %d to %d", | 1475 | |
1472 | offset, to)); | 1476 | xid = GetXid(); |
1473 | FreeXid(xid); | ||
1474 | return rc; | ||
1475 | } | ||
1476 | /* this is probably better than directly calling | 1477 | /* this is probably better than directly calling |
1477 | partialpage_write since in this function the file handle is | 1478 | partialpage_write since in this function the file handle is |
1478 | known which we might as well leverage */ | 1479 | known which we might as well leverage */ |
1479 | /* BB check if anything else missing out of ppw | 1480 | /* BB check if anything else missing out of ppw |
1480 | such as updating last write time */ | 1481 | such as updating last write time */ |
1481 | page_data = kmap(page); | 1482 | page_data = kmap(page); |
1482 | rc = cifs_write(file, page_data + offset, to-offset, | 1483 | rc = cifs_write(file, page_data + offset, copied, &pos); |
1483 | &position); | 1484 | /* if (rc < 0) should we set writebehind rc? */ |
1484 | if (rc > 0) | ||
1485 | rc = 0; | ||
1486 | /* else if (rc < 0) should we set writebehind rc? */ | ||
1487 | kunmap(page); | 1485 | kunmap(page); |
1486 | |||
1487 | FreeXid(xid); | ||
1488 | } else { | 1488 | } else { |
1489 | rc = copied; | ||
1490 | pos += copied; | ||
1489 | set_page_dirty(page); | 1491 | set_page_dirty(page); |
1490 | } | 1492 | } |
1491 | 1493 | ||
1492 | FreeXid(xid); | 1494 | if (rc > 0) { |
1495 | spin_lock(&inode->i_lock); | ||
1496 | if (pos > inode->i_size) | ||
1497 | i_size_write(inode, pos); | ||
1498 | spin_unlock(&inode->i_lock); | ||
1499 | } | ||
1500 | |||
1501 | unlock_page(page); | ||
1502 | page_cache_release(page); | ||
1503 | |||
1493 | return rc; | 1504 | return rc; |
1494 | } | 1505 | } |
1495 | 1506 | ||
@@ -2035,49 +2046,44 @@ bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) | |||
2035 | return true; | 2046 | return true; |
2036 | } | 2047 | } |
2037 | 2048 | ||
2038 | static int cifs_prepare_write(struct file *file, struct page *page, | 2049 | static int cifs_write_begin(struct file *file, struct address_space *mapping, |
2039 | unsigned from, unsigned to) | 2050 | loff_t pos, unsigned len, unsigned flags, |
2051 | struct page **pagep, void **fsdata) | ||
2040 | { | 2052 | { |
2041 | int rc = 0; | 2053 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
2042 | loff_t i_size; | 2054 | loff_t offset = pos & (PAGE_CACHE_SIZE - 1); |
2043 | loff_t offset; | ||
2044 | 2055 | ||
2045 | cFYI(1, ("prepare write for page %p from %d to %d", page, from, to)); | 2056 | cFYI(1, ("write_begin from %lld len %d", (long long)pos, len)); |
2046 | if (PageUptodate(page)) | 2057 | |
2058 | *pagep = __grab_cache_page(mapping, index); | ||
2059 | if (!*pagep) | ||
2060 | return -ENOMEM; | ||
2061 | |||
2062 | if (PageUptodate(*pagep)) | ||
2047 | return 0; | 2063 | return 0; |
2048 | 2064 | ||
2049 | /* If we are writing a full page it will be up to date, | 2065 | /* If we are writing a full page it will be up to date, |
2050 | no need to read from the server */ | 2066 | no need to read from the server */ |
2051 | if ((to == PAGE_CACHE_SIZE) && (from == 0)) { | 2067 | if (len == PAGE_CACHE_SIZE && flags & AOP_FLAG_UNINTERRUPTIBLE) |
2052 | SetPageUptodate(page); | ||
2053 | return 0; | 2068 | return 0; |
2054 | } | ||
2055 | 2069 | ||
2056 | offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | 2070 | if ((file->f_flags & O_ACCMODE) != O_WRONLY) { |
2057 | i_size = i_size_read(page->mapping->host); | 2071 | int rc; |
2058 | 2072 | ||
2059 | if ((offset >= i_size) || | ||
2060 | ((from == 0) && (offset + to) >= i_size)) { | ||
2061 | /* | ||
2062 | * We don't need to read data beyond the end of the file. | ||
2063 | * zero it, and set the page uptodate | ||
2064 | */ | ||
2065 | simple_prepare_write(file, page, from, to); | ||
2066 | SetPageUptodate(page); | ||
2067 | } else if ((file->f_flags & O_ACCMODE) != O_WRONLY) { | ||
2068 | /* might as well read a page, it is fast enough */ | 2073 | /* might as well read a page, it is fast enough */ |
2069 | rc = cifs_readpage_worker(file, page, &offset); | 2074 | rc = cifs_readpage_worker(file, *pagep, &offset); |
2075 | |||
2076 | /* we do not need to pass errors back | ||
2077 | e.g. if we do not have read access to the file | ||
2078 | because cifs_write_end will attempt synchronous writes | ||
2079 | -- shaggy */ | ||
2070 | } else { | 2080 | } else { |
2071 | /* we could try using another file handle if there is one - | 2081 | /* we could try using another file handle if there is one - |
2072 | but how would we lock it to prevent close of that handle | 2082 | but how would we lock it to prevent close of that handle |
2073 | racing with this read? In any case | 2083 | racing with this read? In any case |
2074 | this will be written out by commit_write so is fine */ | 2084 | this will be written out by write_end so is fine */ |
2075 | } | 2085 | } |
2076 | 2086 | ||
2077 | /* we do not need to pass errors back | ||
2078 | e.g. if we do not have read access to the file | ||
2079 | because cifs_commit_write will do the right thing. -- shaggy */ | ||
2080 | |||
2081 | return 0; | 2087 | return 0; |
2082 | } | 2088 | } |
2083 | 2089 | ||
@@ -2086,8 +2092,8 @@ const struct address_space_operations cifs_addr_ops = { | |||
2086 | .readpages = cifs_readpages, | 2092 | .readpages = cifs_readpages, |
2087 | .writepage = cifs_writepage, | 2093 | .writepage = cifs_writepage, |
2088 | .writepages = cifs_writepages, | 2094 | .writepages = cifs_writepages, |
2089 | .prepare_write = cifs_prepare_write, | 2095 | .write_begin = cifs_write_begin, |
2090 | .commit_write = cifs_commit_write, | 2096 | .write_end = cifs_write_end, |
2091 | .set_page_dirty = __set_page_dirty_nobuffers, | 2097 | .set_page_dirty = __set_page_dirty_nobuffers, |
2092 | /* .sync_page = cifs_sync_page, */ | 2098 | /* .sync_page = cifs_sync_page, */ |
2093 | /* .direct_IO = */ | 2099 | /* .direct_IO = */ |
@@ -2102,8 +2108,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { | |||
2102 | .readpage = cifs_readpage, | 2108 | .readpage = cifs_readpage, |
2103 | .writepage = cifs_writepage, | 2109 | .writepage = cifs_writepage, |
2104 | .writepages = cifs_writepages, | 2110 | .writepages = cifs_writepages, |
2105 | .prepare_write = cifs_prepare_write, | 2111 | .write_begin = cifs_write_begin, |
2106 | .commit_write = cifs_commit_write, | 2112 | .write_end = cifs_write_end, |
2107 | .set_page_dirty = __set_page_dirty_nobuffers, | 2113 | .set_page_dirty = __set_page_dirty_nobuffers, |
2108 | /* .sync_page = cifs_sync_page, */ | 2114 | /* .sync_page = cifs_sync_page, */ |
2109 | /* .direct_IO = */ | 2115 | /* .direct_IO = */ |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9c548f110102..a8c833345fc9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -665,40 +665,201 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino) | |||
665 | return inode; | 665 | return inode; |
666 | } | 666 | } |
667 | 667 | ||
668 | int cifs_unlink(struct inode *inode, struct dentry *direntry) | 668 | static int |
669 | cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, | ||
670 | char *full_path, __u32 dosattr) | ||
671 | { | ||
672 | int rc; | ||
673 | int oplock = 0; | ||
674 | __u16 netfid; | ||
675 | __u32 netpid; | ||
676 | bool set_time = false; | ||
677 | struct cifsFileInfo *open_file; | ||
678 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
679 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
680 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
681 | FILE_BASIC_INFO info_buf; | ||
682 | |||
683 | if (attrs->ia_valid & ATTR_ATIME) { | ||
684 | set_time = true; | ||
685 | info_buf.LastAccessTime = | ||
686 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); | ||
687 | } else | ||
688 | info_buf.LastAccessTime = 0; | ||
689 | |||
690 | if (attrs->ia_valid & ATTR_MTIME) { | ||
691 | set_time = true; | ||
692 | info_buf.LastWriteTime = | ||
693 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); | ||
694 | } else | ||
695 | info_buf.LastWriteTime = 0; | ||
696 | |||
697 | /* | ||
698 | * Samba throws this field away, but windows may actually use it. | ||
699 | * Do not set ctime unless other time stamps are changed explicitly | ||
700 | * (i.e. by utimes()) since we would then have a mix of client and | ||
701 | * server times. | ||
702 | */ | ||
703 | if (set_time && (attrs->ia_valid & ATTR_CTIME)) { | ||
704 | cFYI(1, ("CIFS - CTIME changed")); | ||
705 | info_buf.ChangeTime = | ||
706 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); | ||
707 | } else | ||
708 | info_buf.ChangeTime = 0; | ||
709 | |||
710 | info_buf.CreationTime = 0; /* don't change */ | ||
711 | info_buf.Attributes = cpu_to_le32(dosattr); | ||
712 | |||
713 | /* | ||
714 | * If the file is already open for write, just use that fileid | ||
715 | */ | ||
716 | open_file = find_writable_file(cifsInode); | ||
717 | if (open_file) { | ||
718 | netfid = open_file->netfid; | ||
719 | netpid = open_file->pid; | ||
720 | goto set_via_filehandle; | ||
721 | } | ||
722 | |||
723 | /* | ||
724 | * NT4 apparently returns success on this call, but it doesn't | ||
725 | * really work. | ||
726 | */ | ||
727 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) { | ||
728 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
729 | &info_buf, cifs_sb->local_nls, | ||
730 | cifs_sb->mnt_cifs_flags & | ||
731 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
732 | if (rc == 0) { | ||
733 | cifsInode->cifsAttrs = dosattr; | ||
734 | goto out; | ||
735 | } else if (rc != -EOPNOTSUPP && rc != -EINVAL) | ||
736 | goto out; | ||
737 | } | ||
738 | |||
739 | cFYI(1, ("calling SetFileInfo since SetPathInfo for " | ||
740 | "times not supported by this server")); | ||
741 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, | ||
742 | SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, | ||
743 | CREATE_NOT_DIR, &netfid, &oplock, | ||
744 | NULL, cifs_sb->local_nls, | ||
745 | cifs_sb->mnt_cifs_flags & | ||
746 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
747 | |||
748 | if (rc != 0) { | ||
749 | if (rc == -EIO) | ||
750 | rc = -EINVAL; | ||
751 | goto out; | ||
752 | } | ||
753 | |||
754 | netpid = current->tgid; | ||
755 | |||
756 | set_via_filehandle: | ||
757 | rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); | ||
758 | if (!rc) | ||
759 | cifsInode->cifsAttrs = dosattr; | ||
760 | |||
761 | if (open_file == NULL) | ||
762 | CIFSSMBClose(xid, pTcon, netfid); | ||
763 | else | ||
764 | atomic_dec(&open_file->wrtPending); | ||
765 | out: | ||
766 | return rc; | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * open the given file (if it isn't already), set the DELETE_ON_CLOSE bit | ||
771 | * and rename it to a random name that hopefully won't conflict with | ||
772 | * anything else. | ||
773 | */ | ||
774 | static int | ||
775 | cifs_rename_pending_delete(char *full_path, struct inode *inode, int xid) | ||
776 | { | ||
777 | int oplock = 0; | ||
778 | int rc; | ||
779 | __u16 netfid; | ||
780 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
781 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
782 | struct cifsTconInfo *tcon = cifs_sb->tcon; | ||
783 | __u32 dosattr; | ||
784 | FILE_BASIC_INFO *info_buf; | ||
785 | |||
786 | rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, | ||
787 | DELETE|FILE_WRITE_ATTRIBUTES, | ||
788 | CREATE_NOT_DIR|CREATE_DELETE_ON_CLOSE, | ||
789 | &netfid, &oplock, NULL, cifs_sb->local_nls, | ||
790 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
791 | if (rc != 0) | ||
792 | goto out; | ||
793 | |||
794 | /* set ATTR_HIDDEN and clear ATTR_READONLY */ | ||
795 | cifsInode = CIFS_I(inode); | ||
796 | dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY; | ||
797 | if (dosattr == 0) | ||
798 | dosattr |= ATTR_NORMAL; | ||
799 | dosattr |= ATTR_HIDDEN; | ||
800 | |||
801 | info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL); | ||
802 | if (info_buf == NULL) { | ||
803 | rc = -ENOMEM; | ||
804 | goto out_close; | ||
805 | } | ||
806 | info_buf->Attributes = cpu_to_le32(dosattr); | ||
807 | rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, current->tgid); | ||
808 | kfree(info_buf); | ||
809 | if (rc != 0) | ||
810 | goto out_close; | ||
811 | cifsInode->cifsAttrs = dosattr; | ||
812 | |||
813 | /* silly-rename the file */ | ||
814 | CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls, | ||
815 | cifs_sb->mnt_cifs_flags & | ||
816 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
817 | |||
818 | /* set DELETE_ON_CLOSE */ | ||
819 | rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid, current->tgid); | ||
820 | |||
821 | /* | ||
822 | * some samba versions return -ENOENT when we try to set the file | ||
823 | * disposition here. Likely a samba bug, but work around it for now | ||
824 | */ | ||
825 | if (rc == -ENOENT) | ||
826 | rc = 0; | ||
827 | |||
828 | out_close: | ||
829 | CIFSSMBClose(xid, tcon, netfid); | ||
830 | out: | ||
831 | return rc; | ||
832 | } | ||
833 | |||
834 | int cifs_unlink(struct inode *dir, struct dentry *dentry) | ||
669 | { | 835 | { |
670 | int rc = 0; | 836 | int rc = 0; |
671 | int xid; | 837 | int xid; |
672 | struct cifs_sb_info *cifs_sb; | ||
673 | struct cifsTconInfo *pTcon; | ||
674 | char *full_path = NULL; | 838 | char *full_path = NULL; |
675 | struct cifsInodeInfo *cifsInode; | 839 | struct inode *inode = dentry->d_inode; |
676 | FILE_BASIC_INFO *pinfo_buf; | 840 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); |
841 | struct super_block *sb = dir->i_sb; | ||
842 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | ||
843 | struct cifsTconInfo *tcon = cifs_sb->tcon; | ||
844 | struct iattr *attrs = NULL; | ||
845 | __u32 dosattr = 0, origattr = 0; | ||
677 | 846 | ||
678 | cFYI(1, ("cifs_unlink, inode = 0x%p", inode)); | 847 | cFYI(1, ("cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry)); |
679 | 848 | ||
680 | xid = GetXid(); | 849 | xid = GetXid(); |
681 | 850 | ||
682 | if (inode) | 851 | /* Unlink can be called from rename so we can not take the |
683 | cifs_sb = CIFS_SB(inode->i_sb); | 852 | * sb->s_vfs_rename_mutex here */ |
684 | else | 853 | full_path = build_path_from_dentry(dentry); |
685 | cifs_sb = CIFS_SB(direntry->d_sb); | ||
686 | pTcon = cifs_sb->tcon; | ||
687 | |||
688 | /* Unlink can be called from rename so we can not grab the sem here | ||
689 | since we deadlock otherwise */ | ||
690 | /* mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);*/ | ||
691 | full_path = build_path_from_dentry(direntry); | ||
692 | /* mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);*/ | ||
693 | if (full_path == NULL) { | 854 | if (full_path == NULL) { |
694 | FreeXid(xid); | 855 | FreeXid(xid); |
695 | return -ENOMEM; | 856 | return -ENOMEM; |
696 | } | 857 | } |
697 | 858 | ||
698 | if ((pTcon->ses->capabilities & CAP_UNIX) && | 859 | if ((tcon->ses->capabilities & CAP_UNIX) && |
699 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & | 860 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & |
700 | le64_to_cpu(pTcon->fsUnixInfo.Capability))) { | 861 | le64_to_cpu(tcon->fsUnixInfo.Capability))) { |
701 | rc = CIFSPOSIXDelFile(xid, pTcon, full_path, | 862 | rc = CIFSPOSIXDelFile(xid, tcon, full_path, |
702 | SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, | 863 | SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, |
703 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 864 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
704 | cFYI(1, ("posix del rc %d", rc)); | 865 | cFYI(1, ("posix del rc %d", rc)); |
@@ -706,125 +867,60 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) | |||
706 | goto psx_del_no_retry; | 867 | goto psx_del_no_retry; |
707 | } | 868 | } |
708 | 869 | ||
709 | rc = CIFSSMBDelFile(xid, pTcon, full_path, cifs_sb->local_nls, | 870 | retry_std_delete: |
871 | rc = CIFSSMBDelFile(xid, tcon, full_path, cifs_sb->local_nls, | ||
710 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 872 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
873 | |||
711 | psx_del_no_retry: | 874 | psx_del_no_retry: |
712 | if (!rc) { | 875 | if (!rc) { |
713 | if (direntry->d_inode) | 876 | if (inode) |
714 | drop_nlink(direntry->d_inode); | 877 | drop_nlink(inode); |
715 | } else if (rc == -ENOENT) { | 878 | } else if (rc == -ENOENT) { |
716 | d_drop(direntry); | 879 | d_drop(dentry); |
717 | } else if (rc == -ETXTBSY) { | 880 | } else if (rc == -ETXTBSY) { |
718 | int oplock = 0; | 881 | rc = cifs_rename_pending_delete(full_path, inode, xid); |
719 | __u16 netfid; | 882 | if (rc == 0) |
720 | 883 | drop_nlink(inode); | |
721 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE, | 884 | } else if (rc == -EACCES && dosattr == 0) { |
722 | CREATE_NOT_DIR | CREATE_DELETE_ON_CLOSE, | 885 | attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); |
723 | &netfid, &oplock, NULL, cifs_sb->local_nls, | 886 | if (attrs == NULL) { |
724 | cifs_sb->mnt_cifs_flags & | 887 | rc = -ENOMEM; |
725 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 888 | goto out_reval; |
726 | if (rc == 0) { | ||
727 | CIFSSMBRenameOpenFile(xid, pTcon, netfid, NULL, | ||
728 | cifs_sb->local_nls, | ||
729 | cifs_sb->mnt_cifs_flags & | ||
730 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
731 | CIFSSMBClose(xid, pTcon, netfid); | ||
732 | if (direntry->d_inode) | ||
733 | drop_nlink(direntry->d_inode); | ||
734 | } | 889 | } |
735 | } else if (rc == -EACCES) { | ||
736 | /* try only if r/o attribute set in local lookup data? */ | ||
737 | pinfo_buf = kzalloc(sizeof(FILE_BASIC_INFO), GFP_KERNEL); | ||
738 | if (pinfo_buf) { | ||
739 | /* ATTRS set to normal clears r/o bit */ | ||
740 | pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL); | ||
741 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) | ||
742 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
743 | pinfo_buf, | ||
744 | cifs_sb->local_nls, | ||
745 | cifs_sb->mnt_cifs_flags & | ||
746 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
747 | else | ||
748 | rc = -EOPNOTSUPP; | ||
749 | 890 | ||
750 | if (rc == -EOPNOTSUPP) { | 891 | /* try to reset dos attributes */ |
751 | int oplock = 0; | 892 | origattr = cifsInode->cifsAttrs; |
752 | __u16 netfid; | 893 | if (origattr == 0) |
753 | /* rc = CIFSSMBSetAttrLegacy(xid, pTcon, | 894 | origattr |= ATTR_NORMAL; |
754 | full_path, | 895 | dosattr = origattr & ~ATTR_READONLY; |
755 | (__u16)ATTR_NORMAL, | 896 | if (dosattr == 0) |
756 | cifs_sb->local_nls); | 897 | dosattr |= ATTR_NORMAL; |
757 | For some strange reason it seems that NT4 eats the | 898 | dosattr |= ATTR_HIDDEN; |
758 | old setattr call without actually setting the | 899 | |
759 | attributes so on to the third attempted workaround | 900 | rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr); |
760 | */ | 901 | if (rc != 0) |
761 | 902 | goto out_reval; | |
762 | /* BB could scan to see if we already have it open | 903 | |
763 | and pass in pid of opener to function */ | 904 | goto retry_std_delete; |
764 | rc = CIFSSMBOpen(xid, pTcon, full_path, | ||
765 | FILE_OPEN, SYNCHRONIZE | | ||
766 | FILE_WRITE_ATTRIBUTES, 0, | ||
767 | &netfid, &oplock, NULL, | ||
768 | cifs_sb->local_nls, | ||
769 | cifs_sb->mnt_cifs_flags & | ||
770 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
771 | if (rc == 0) { | ||
772 | rc = CIFSSMBSetFileInfo(xid, pTcon, | ||
773 | pinfo_buf, | ||
774 | netfid, | ||
775 | current->tgid); | ||
776 | CIFSSMBClose(xid, pTcon, netfid); | ||
777 | } | ||
778 | } | ||
779 | kfree(pinfo_buf); | ||
780 | } | ||
781 | if (rc == 0) { | ||
782 | rc = CIFSSMBDelFile(xid, pTcon, full_path, | ||
783 | cifs_sb->local_nls, | ||
784 | cifs_sb->mnt_cifs_flags & | ||
785 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
786 | if (!rc) { | ||
787 | if (direntry->d_inode) | ||
788 | drop_nlink(direntry->d_inode); | ||
789 | } else if (rc == -ETXTBSY) { | ||
790 | int oplock = 0; | ||
791 | __u16 netfid; | ||
792 | |||
793 | rc = CIFSSMBOpen(xid, pTcon, full_path, | ||
794 | FILE_OPEN, DELETE, | ||
795 | CREATE_NOT_DIR | | ||
796 | CREATE_DELETE_ON_CLOSE, | ||
797 | &netfid, &oplock, NULL, | ||
798 | cifs_sb->local_nls, | ||
799 | cifs_sb->mnt_cifs_flags & | ||
800 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
801 | if (rc == 0) { | ||
802 | CIFSSMBRenameOpenFile(xid, pTcon, | ||
803 | netfid, NULL, | ||
804 | cifs_sb->local_nls, | ||
805 | cifs_sb->mnt_cifs_flags & | ||
806 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
807 | CIFSSMBClose(xid, pTcon, netfid); | ||
808 | if (direntry->d_inode) | ||
809 | drop_nlink(direntry->d_inode); | ||
810 | } | ||
811 | /* BB if rc = -ETXTBUSY goto the rename logic BB */ | ||
812 | } | ||
813 | } | ||
814 | } | ||
815 | if (direntry->d_inode) { | ||
816 | cifsInode = CIFS_I(direntry->d_inode); | ||
817 | cifsInode->time = 0; /* will force revalidate to get info | ||
818 | when needed */ | ||
819 | direntry->d_inode->i_ctime = current_fs_time(inode->i_sb); | ||
820 | } | 905 | } |
906 | |||
907 | /* undo the setattr if we errored out and it's needed */ | ||
908 | if (rc != 0 && dosattr != 0) | ||
909 | cifs_set_file_info(inode, attrs, xid, full_path, origattr); | ||
910 | |||
911 | out_reval: | ||
821 | if (inode) { | 912 | if (inode) { |
822 | inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); | ||
823 | cifsInode = CIFS_I(inode); | 913 | cifsInode = CIFS_I(inode); |
824 | cifsInode->time = 0; /* force revalidate of dir as well */ | 914 | cifsInode->time = 0; /* will force revalidate to get info |
915 | when needed */ | ||
916 | inode->i_ctime = current_fs_time(sb); | ||
825 | } | 917 | } |
918 | dir->i_ctime = dir->i_mtime = current_fs_time(sb); | ||
919 | cifsInode = CIFS_I(dir); | ||
920 | CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ | ||
826 | 921 | ||
827 | kfree(full_path); | 922 | kfree(full_path); |
923 | kfree(attrs); | ||
828 | FreeXid(xid); | 924 | FreeXid(xid); |
829 | return rc; | 925 | return rc; |
830 | } | 926 | } |
@@ -869,7 +965,7 @@ static void posix_fill_in_inode(struct inode *tmp_inode, | |||
869 | 965 | ||
870 | int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | 966 | int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) |
871 | { | 967 | { |
872 | int rc = 0; | 968 | int rc = 0, tmprc; |
873 | int xid; | 969 | int xid; |
874 | struct cifs_sb_info *cifs_sb; | 970 | struct cifs_sb_info *cifs_sb; |
875 | struct cifsTconInfo *pTcon; | 971 | struct cifsTconInfo *pTcon; |
@@ -931,6 +1027,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | |||
931 | kfree(pInfo); | 1027 | kfree(pInfo); |
932 | goto mkdir_get_info; | 1028 | goto mkdir_get_info; |
933 | } | 1029 | } |
1030 | |||
934 | /* Is an i_ino of zero legal? */ | 1031 | /* Is an i_ino of zero legal? */ |
935 | /* Are there sanity checks we can use to ensure that | 1032 | /* Are there sanity checks we can use to ensure that |
936 | the server is really filling in that field? */ | 1033 | the server is really filling in that field? */ |
@@ -1019,12 +1116,20 @@ mkdir_get_info: | |||
1019 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && | 1116 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && |
1020 | (mode & S_IWUGO) == 0) { | 1117 | (mode & S_IWUGO) == 0) { |
1021 | FILE_BASIC_INFO pInfo; | 1118 | FILE_BASIC_INFO pInfo; |
1119 | struct cifsInodeInfo *cifsInode; | ||
1120 | u32 dosattrs; | ||
1121 | |||
1022 | memset(&pInfo, 0, sizeof(pInfo)); | 1122 | memset(&pInfo, 0, sizeof(pInfo)); |
1023 | pInfo.Attributes = cpu_to_le32(ATTR_READONLY); | 1123 | cifsInode = CIFS_I(newinode); |
1024 | CIFSSMBSetPathInfo(xid, pTcon, full_path, | 1124 | dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; |
1025 | &pInfo, cifs_sb->local_nls, | 1125 | pInfo.Attributes = cpu_to_le32(dosattrs); |
1126 | tmprc = CIFSSMBSetPathInfo(xid, pTcon, | ||
1127 | full_path, &pInfo, | ||
1128 | cifs_sb->local_nls, | ||
1026 | cifs_sb->mnt_cifs_flags & | 1129 | cifs_sb->mnt_cifs_flags & |
1027 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1130 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1131 | if (tmprc == 0) | ||
1132 | cifsInode->cifsAttrs = dosattrs; | ||
1028 | } | 1133 | } |
1029 | if (direntry->d_inode) { | 1134 | if (direntry->d_inode) { |
1030 | if (cifs_sb->mnt_cifs_flags & | 1135 | if (cifs_sb->mnt_cifs_flags & |
@@ -1096,117 +1201,141 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) | |||
1096 | return rc; | 1201 | return rc; |
1097 | } | 1202 | } |
1098 | 1203 | ||
1204 | static int | ||
1205 | cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, | ||
1206 | struct dentry *to_dentry, const char *toPath) | ||
1207 | { | ||
1208 | struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); | ||
1209 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
1210 | __u16 srcfid; | ||
1211 | int oplock, rc; | ||
1212 | |||
1213 | /* try path-based rename first */ | ||
1214 | rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls, | ||
1215 | cifs_sb->mnt_cifs_flags & | ||
1216 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1217 | |||
1218 | /* | ||
1219 | * don't bother with rename by filehandle unless file is busy and | ||
1220 | * source Note that cross directory moves do not work with | ||
1221 | * rename by filehandle to various Windows servers. | ||
1222 | */ | ||
1223 | if (rc == 0 || rc != -ETXTBSY) | ||
1224 | return rc; | ||
1225 | |||
1226 | /* open the file to be renamed -- we need DELETE perms */ | ||
1227 | rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, | ||
1228 | CREATE_NOT_DIR, &srcfid, &oplock, NULL, | ||
1229 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
1230 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1231 | |||
1232 | if (rc == 0) { | ||
1233 | rc = CIFSSMBRenameOpenFile(xid, pTcon, srcfid, | ||
1234 | (const char *) to_dentry->d_name.name, | ||
1235 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
1236 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1237 | |||
1238 | CIFSSMBClose(xid, pTcon, srcfid); | ||
1239 | } | ||
1240 | |||
1241 | return rc; | ||
1242 | } | ||
1243 | |||
1099 | int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, | 1244 | int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, |
1100 | struct inode *target_inode, struct dentry *target_direntry) | 1245 | struct inode *target_inode, struct dentry *target_direntry) |
1101 | { | 1246 | { |
1102 | char *fromName; | 1247 | char *fromName = NULL; |
1103 | char *toName; | 1248 | char *toName = NULL; |
1104 | struct cifs_sb_info *cifs_sb_source; | 1249 | struct cifs_sb_info *cifs_sb_source; |
1105 | struct cifs_sb_info *cifs_sb_target; | 1250 | struct cifs_sb_info *cifs_sb_target; |
1106 | struct cifsTconInfo *pTcon; | 1251 | struct cifsTconInfo *pTcon; |
1252 | FILE_UNIX_BASIC_INFO *info_buf_source = NULL; | ||
1253 | FILE_UNIX_BASIC_INFO *info_buf_target; | ||
1107 | int xid; | 1254 | int xid; |
1108 | int rc = 0; | 1255 | int rc; |
1109 | |||
1110 | xid = GetXid(); | ||
1111 | 1256 | ||
1112 | cifs_sb_target = CIFS_SB(target_inode->i_sb); | 1257 | cifs_sb_target = CIFS_SB(target_inode->i_sb); |
1113 | cifs_sb_source = CIFS_SB(source_inode->i_sb); | 1258 | cifs_sb_source = CIFS_SB(source_inode->i_sb); |
1114 | pTcon = cifs_sb_source->tcon; | 1259 | pTcon = cifs_sb_source->tcon; |
1115 | 1260 | ||
1261 | xid = GetXid(); | ||
1262 | |||
1263 | /* | ||
1264 | * BB: this might be allowed if same server, but different share. | ||
1265 | * Consider adding support for this | ||
1266 | */ | ||
1116 | if (pTcon != cifs_sb_target->tcon) { | 1267 | if (pTcon != cifs_sb_target->tcon) { |
1117 | FreeXid(xid); | 1268 | rc = -EXDEV; |
1118 | return -EXDEV; /* BB actually could be allowed if same server, | 1269 | goto cifs_rename_exit; |
1119 | but different share. | ||
1120 | Might eventually add support for this */ | ||
1121 | } | 1270 | } |
1122 | 1271 | ||
1123 | /* we already have the rename sem so we do not need to grab it again | 1272 | /* |
1124 | here to protect the path integrity */ | 1273 | * we already have the rename sem so we do not need to |
1274 | * grab it again here to protect the path integrity | ||
1275 | */ | ||
1125 | fromName = build_path_from_dentry(source_direntry); | 1276 | fromName = build_path_from_dentry(source_direntry); |
1277 | if (fromName == NULL) { | ||
1278 | rc = -ENOMEM; | ||
1279 | goto cifs_rename_exit; | ||
1280 | } | ||
1281 | |||
1126 | toName = build_path_from_dentry(target_direntry); | 1282 | toName = build_path_from_dentry(target_direntry); |
1127 | if ((fromName == NULL) || (toName == NULL)) { | 1283 | if (toName == NULL) { |
1128 | rc = -ENOMEM; | 1284 | rc = -ENOMEM; |
1129 | goto cifs_rename_exit; | 1285 | goto cifs_rename_exit; |
1130 | } | 1286 | } |
1131 | 1287 | ||
1132 | rc = CIFSSMBRename(xid, pTcon, fromName, toName, | 1288 | rc = cifs_do_rename(xid, source_direntry, fromName, |
1133 | cifs_sb_source->local_nls, | 1289 | target_direntry, toName); |
1134 | cifs_sb_source->mnt_cifs_flags & | 1290 | |
1135 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1136 | if (rc == -EEXIST) { | 1291 | if (rc == -EEXIST) { |
1137 | /* check if they are the same file because rename of hardlinked | 1292 | if (pTcon->unix_ext) { |
1138 | files is a noop */ | 1293 | /* |
1139 | FILE_UNIX_BASIC_INFO *info_buf_source; | 1294 | * Are src and dst hardlinks of same inode? We can |
1140 | FILE_UNIX_BASIC_INFO *info_buf_target; | 1295 | * only tell with unix extensions enabled |
1141 | 1296 | */ | |
1142 | info_buf_source = | 1297 | info_buf_source = |
1143 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); | 1298 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), |
1144 | if (info_buf_source != NULL) { | 1299 | GFP_KERNEL); |
1300 | if (info_buf_source == NULL) | ||
1301 | goto unlink_target; | ||
1302 | |||
1145 | info_buf_target = info_buf_source + 1; | 1303 | info_buf_target = info_buf_source + 1; |
1146 | if (pTcon->unix_ext) | 1304 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, |
1147 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, | 1305 | info_buf_source, |
1148 | info_buf_source, | 1306 | cifs_sb_source->local_nls, |
1149 | cifs_sb_source->local_nls, | 1307 | cifs_sb_source->mnt_cifs_flags & |
1150 | cifs_sb_source->mnt_cifs_flags & | ||
1151 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1308 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1152 | /* else rc is still EEXIST so will fall through to | 1309 | if (rc != 0) |
1153 | unlink the target and retry rename */ | 1310 | goto unlink_target; |
1154 | if (rc == 0) { | 1311 | |
1155 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, toName, | 1312 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, |
1156 | info_buf_target, | 1313 | toName, info_buf_target, |
1157 | cifs_sb_target->local_nls, | 1314 | cifs_sb_target->local_nls, |
1158 | /* remap based on source sb */ | 1315 | /* remap based on source sb */ |
1159 | cifs_sb_source->mnt_cifs_flags & | 1316 | cifs_sb_source->mnt_cifs_flags & |
1160 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1161 | } | ||
1162 | if ((rc == 0) && | ||
1163 | (info_buf_source->UniqueId == | ||
1164 | info_buf_target->UniqueId)) { | ||
1165 | /* do not rename since the files are hardlinked which | ||
1166 | is a noop */ | ||
1167 | } else { | ||
1168 | /* we either can not tell the files are hardlinked | ||
1169 | (as with Windows servers) or files are not | ||
1170 | hardlinked so delete the target manually before | ||
1171 | renaming to follow POSIX rather than Windows | ||
1172 | semantics */ | ||
1173 | cifs_unlink(target_inode, target_direntry); | ||
1174 | rc = CIFSSMBRename(xid, pTcon, fromName, | ||
1175 | toName, | ||
1176 | cifs_sb_source->local_nls, | ||
1177 | cifs_sb_source->mnt_cifs_flags | ||
1178 | & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1179 | } | ||
1180 | kfree(info_buf_source); | ||
1181 | } /* if we can not get memory just leave rc as EEXIST */ | ||
1182 | } | ||
1183 | |||
1184 | if (rc) | ||
1185 | cFYI(1, ("rename rc %d", rc)); | ||
1186 | |||
1187 | if ((rc == -EIO) || (rc == -EEXIST)) { | ||
1188 | int oplock = 0; | ||
1189 | __u16 netfid; | ||
1190 | |||
1191 | /* BB FIXME Is Generic Read correct for rename? */ | ||
1192 | /* if renaming directory - we should not say CREATE_NOT_DIR, | ||
1193 | need to test renaming open directory, also GENERIC_READ | ||
1194 | might not right be right access to request */ | ||
1195 | rc = CIFSSMBOpen(xid, pTcon, fromName, FILE_OPEN, GENERIC_READ, | ||
1196 | CREATE_NOT_DIR, &netfid, &oplock, NULL, | ||
1197 | cifs_sb_source->local_nls, | ||
1198 | cifs_sb_source->mnt_cifs_flags & | ||
1199 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1200 | if (rc == 0) { | ||
1201 | rc = CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName, | ||
1202 | cifs_sb_source->local_nls, | ||
1203 | cifs_sb_source->mnt_cifs_flags & | ||
1204 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1317 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1205 | CIFSSMBClose(xid, pTcon, netfid); | 1318 | |
1206 | } | 1319 | if (rc == 0 && (info_buf_source->UniqueId == |
1320 | info_buf_target->UniqueId)) | ||
1321 | /* same file, POSIX says that this is a noop */ | ||
1322 | goto cifs_rename_exit; | ||
1323 | } /* else ... BB we could add the same check for Windows by | ||
1324 | checking the UniqueId via FILE_INTERNAL_INFO */ | ||
1325 | unlink_target: | ||
1326 | /* | ||
1327 | * we either can not tell the files are hardlinked (as with | ||
1328 | * Windows servers) or files are not hardlinked. Delete the | ||
1329 | * target manually before renaming to follow POSIX rather than | ||
1330 | * Windows semantics | ||
1331 | */ | ||
1332 | cifs_unlink(target_inode, target_direntry); | ||
1333 | rc = cifs_do_rename(xid, source_direntry, fromName, | ||
1334 | target_direntry, toName); | ||
1207 | } | 1335 | } |
1208 | 1336 | ||
1209 | cifs_rename_exit: | 1337 | cifs_rename_exit: |
1338 | kfree(info_buf_source); | ||
1210 | kfree(fromName); | 1339 | kfree(fromName); |
1211 | kfree(toName); | 1340 | kfree(toName); |
1212 | FreeXid(xid); | 1341 | FreeXid(xid); |
@@ -1507,101 +1636,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1507 | } | 1636 | } |
1508 | 1637 | ||
1509 | static int | 1638 | static int |
1510 | cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, | ||
1511 | char *full_path, __u32 dosattr) | ||
1512 | { | ||
1513 | int rc; | ||
1514 | int oplock = 0; | ||
1515 | __u16 netfid; | ||
1516 | __u32 netpid; | ||
1517 | bool set_time = false; | ||
1518 | struct cifsFileInfo *open_file; | ||
1519 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
1520 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
1521 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
1522 | FILE_BASIC_INFO info_buf; | ||
1523 | |||
1524 | if (attrs->ia_valid & ATTR_ATIME) { | ||
1525 | set_time = true; | ||
1526 | info_buf.LastAccessTime = | ||
1527 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); | ||
1528 | } else | ||
1529 | info_buf.LastAccessTime = 0; | ||
1530 | |||
1531 | if (attrs->ia_valid & ATTR_MTIME) { | ||
1532 | set_time = true; | ||
1533 | info_buf.LastWriteTime = | ||
1534 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); | ||
1535 | } else | ||
1536 | info_buf.LastWriteTime = 0; | ||
1537 | |||
1538 | /* | ||
1539 | * Samba throws this field away, but windows may actually use it. | ||
1540 | * Do not set ctime unless other time stamps are changed explicitly | ||
1541 | * (i.e. by utimes()) since we would then have a mix of client and | ||
1542 | * server times. | ||
1543 | */ | ||
1544 | if (set_time && (attrs->ia_valid & ATTR_CTIME)) { | ||
1545 | cFYI(1, ("CIFS - CTIME changed")); | ||
1546 | info_buf.ChangeTime = | ||
1547 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); | ||
1548 | } else | ||
1549 | info_buf.ChangeTime = 0; | ||
1550 | |||
1551 | info_buf.CreationTime = 0; /* don't change */ | ||
1552 | info_buf.Attributes = cpu_to_le32(dosattr); | ||
1553 | |||
1554 | /* | ||
1555 | * If the file is already open for write, just use that fileid | ||
1556 | */ | ||
1557 | open_file = find_writable_file(cifsInode); | ||
1558 | if (open_file) { | ||
1559 | netfid = open_file->netfid; | ||
1560 | netpid = open_file->pid; | ||
1561 | goto set_via_filehandle; | ||
1562 | } | ||
1563 | |||
1564 | /* | ||
1565 | * NT4 apparently returns success on this call, but it doesn't | ||
1566 | * really work. | ||
1567 | */ | ||
1568 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) { | ||
1569 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
1570 | &info_buf, cifs_sb->local_nls, | ||
1571 | cifs_sb->mnt_cifs_flags & | ||
1572 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1573 | if (rc != -EOPNOTSUPP && rc != -EINVAL) | ||
1574 | goto out; | ||
1575 | } | ||
1576 | |||
1577 | cFYI(1, ("calling SetFileInfo since SetPathInfo for " | ||
1578 | "times not supported by this server")); | ||
1579 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, | ||
1580 | SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, | ||
1581 | CREATE_NOT_DIR, &netfid, &oplock, | ||
1582 | NULL, cifs_sb->local_nls, | ||
1583 | cifs_sb->mnt_cifs_flags & | ||
1584 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1585 | |||
1586 | if (rc != 0) { | ||
1587 | if (rc == -EIO) | ||
1588 | rc = -EINVAL; | ||
1589 | goto out; | ||
1590 | } | ||
1591 | |||
1592 | netpid = current->tgid; | ||
1593 | |||
1594 | set_via_filehandle: | ||
1595 | rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); | ||
1596 | if (open_file == NULL) | ||
1597 | CIFSSMBClose(xid, pTcon, netfid); | ||
1598 | else | ||
1599 | atomic_dec(&open_file->wrtPending); | ||
1600 | out: | ||
1601 | return rc; | ||
1602 | } | ||
1603 | |||
1604 | static int | ||
1605 | cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) | 1639 | cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) |
1606 | { | 1640 | { |
1607 | int rc; | 1641 | int rc; |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 4b17f8fe3157..88786ba02d27 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -150,8 +150,7 @@ cifs_buf_get(void) | |||
150 | but it may be more efficient to always alloc same size | 150 | but it may be more efficient to always alloc same size |
151 | albeit slightly larger than necessary and maxbuffersize | 151 | albeit slightly larger than necessary and maxbuffersize |
152 | defaults to this and can not be bigger */ | 152 | defaults to this and can not be bigger */ |
153 | ret_buf = (struct smb_hdr *) mempool_alloc(cifs_req_poolp, | 153 | ret_buf = mempool_alloc(cifs_req_poolp, GFP_NOFS); |
154 | GFP_KERNEL | GFP_NOFS); | ||
155 | 154 | ||
156 | /* clear the first few header bytes */ | 155 | /* clear the first few header bytes */ |
157 | /* for most paths, more is cleared in header_assemble */ | 156 | /* for most paths, more is cleared in header_assemble */ |
@@ -188,8 +187,7 @@ cifs_small_buf_get(void) | |||
188 | but it may be more efficient to always alloc same size | 187 | but it may be more efficient to always alloc same size |
189 | albeit slightly larger than necessary and maxbuffersize | 188 | albeit slightly larger than necessary and maxbuffersize |
190 | defaults to this and can not be bigger */ | 189 | defaults to this and can not be bigger */ |
191 | ret_buf = (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, | 190 | ret_buf = mempool_alloc(cifs_sm_req_poolp, GFP_NOFS); |
192 | GFP_KERNEL | GFP_NOFS); | ||
193 | if (ret_buf) { | 191 | if (ret_buf) { |
194 | /* No need to clear memory here, cleared in header assemble */ | 192 | /* No need to clear memory here, cleared in header assemble */ |
195 | /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ | 193 | /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ |
@@ -313,8 +311,6 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , | |||
313 | buffer->Flags2 = SMBFLG2_KNOWS_LONG_NAMES; | 311 | buffer->Flags2 = SMBFLG2_KNOWS_LONG_NAMES; |
314 | buffer->Pid = cpu_to_le16((__u16)current->tgid); | 312 | buffer->Pid = cpu_to_le16((__u16)current->tgid); |
315 | buffer->PidHigh = cpu_to_le16((__u16)(current->tgid >> 16)); | 313 | buffer->PidHigh = cpu_to_le16((__u16)(current->tgid >> 16)); |
316 | spin_lock(&GlobalMid_Lock); | ||
317 | spin_unlock(&GlobalMid_Lock); | ||
318 | if (treeCon) { | 314 | if (treeCon) { |
319 | buffer->Tid = treeCon->tid; | 315 | buffer->Tid = treeCon->tid; |
320 | if (treeCon->ses) { | 316 | if (treeCon->ses) { |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 5f40ed3473f5..765adf12d54f 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -640,6 +640,70 @@ static int is_dir_changed(struct file *file) | |||
640 | 640 | ||
641 | } | 641 | } |
642 | 642 | ||
643 | static int cifs_save_resume_key(const char *current_entry, | ||
644 | struct cifsFileInfo *cifsFile) | ||
645 | { | ||
646 | int rc = 0; | ||
647 | unsigned int len = 0; | ||
648 | __u16 level; | ||
649 | char *filename; | ||
650 | |||
651 | if ((cifsFile == NULL) || (current_entry == NULL)) | ||
652 | return -EINVAL; | ||
653 | |||
654 | level = cifsFile->srch_inf.info_level; | ||
655 | |||
656 | if (level == SMB_FIND_FILE_UNIX) { | ||
657 | FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; | ||
658 | |||
659 | filename = &pFindData->FileName[0]; | ||
660 | if (cifsFile->srch_inf.unicode) { | ||
661 | len = cifs_unicode_bytelen(filename); | ||
662 | } else { | ||
663 | /* BB should we make this strnlen of PATH_MAX? */ | ||
664 | len = strnlen(filename, PATH_MAX); | ||
665 | } | ||
666 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
667 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { | ||
668 | FILE_DIRECTORY_INFO *pFindData = | ||
669 | (FILE_DIRECTORY_INFO *)current_entry; | ||
670 | filename = &pFindData->FileName[0]; | ||
671 | len = le32_to_cpu(pFindData->FileNameLength); | ||
672 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
673 | } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { | ||
674 | FILE_FULL_DIRECTORY_INFO *pFindData = | ||
675 | (FILE_FULL_DIRECTORY_INFO *)current_entry; | ||
676 | filename = &pFindData->FileName[0]; | ||
677 | len = le32_to_cpu(pFindData->FileNameLength); | ||
678 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
679 | } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { | ||
680 | SEARCH_ID_FULL_DIR_INFO *pFindData = | ||
681 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | ||
682 | filename = &pFindData->FileName[0]; | ||
683 | len = le32_to_cpu(pFindData->FileNameLength); | ||
684 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
685 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | ||
686 | FILE_BOTH_DIRECTORY_INFO *pFindData = | ||
687 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | ||
688 | filename = &pFindData->FileName[0]; | ||
689 | len = le32_to_cpu(pFindData->FileNameLength); | ||
690 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
691 | } else if (level == SMB_FIND_FILE_INFO_STANDARD) { | ||
692 | FIND_FILE_STANDARD_INFO *pFindData = | ||
693 | (FIND_FILE_STANDARD_INFO *)current_entry; | ||
694 | filename = &pFindData->FileName[0]; | ||
695 | /* one byte length, no name conversion */ | ||
696 | len = (unsigned int)pFindData->FileNameLength; | ||
697 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
698 | } else { | ||
699 | cFYI(1, ("Unknown findfirst level %d", level)); | ||
700 | return -EINVAL; | ||
701 | } | ||
702 | cifsFile->srch_inf.resume_name_len = len; | ||
703 | cifsFile->srch_inf.presume_name = filename; | ||
704 | return rc; | ||
705 | } | ||
706 | |||
643 | /* find the corresponding entry in the search */ | 707 | /* find the corresponding entry in the search */ |
644 | /* Note that the SMB server returns search entries for . and .. which | 708 | /* Note that the SMB server returns search entries for . and .. which |
645 | complicates logic here if we choose to parse for them and we do not | 709 | complicates logic here if we choose to parse for them and we do not |
@@ -703,6 +767,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, | |||
703 | while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && | 767 | while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && |
704 | (rc == 0) && !cifsFile->srch_inf.endOfSearch) { | 768 | (rc == 0) && !cifsFile->srch_inf.endOfSearch) { |
705 | cFYI(1, ("calling findnext2")); | 769 | cFYI(1, ("calling findnext2")); |
770 | cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); | ||
706 | rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, | 771 | rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, |
707 | &cifsFile->srch_inf); | 772 | &cifsFile->srch_inf); |
708 | if (rc) | 773 | if (rc) |
@@ -919,69 +984,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file, | |||
919 | return rc; | 984 | return rc; |
920 | } | 985 | } |
921 | 986 | ||
922 | static int cifs_save_resume_key(const char *current_entry, | ||
923 | struct cifsFileInfo *cifsFile) | ||
924 | { | ||
925 | int rc = 0; | ||
926 | unsigned int len = 0; | ||
927 | __u16 level; | ||
928 | char *filename; | ||
929 | |||
930 | if ((cifsFile == NULL) || (current_entry == NULL)) | ||
931 | return -EINVAL; | ||
932 | |||
933 | level = cifsFile->srch_inf.info_level; | ||
934 | |||
935 | if (level == SMB_FIND_FILE_UNIX) { | ||
936 | FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; | ||
937 | |||
938 | filename = &pFindData->FileName[0]; | ||
939 | if (cifsFile->srch_inf.unicode) { | ||
940 | len = cifs_unicode_bytelen(filename); | ||
941 | } else { | ||
942 | /* BB should we make this strnlen of PATH_MAX? */ | ||
943 | len = strnlen(filename, PATH_MAX); | ||
944 | } | ||
945 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
946 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { | ||
947 | FILE_DIRECTORY_INFO *pFindData = | ||
948 | (FILE_DIRECTORY_INFO *)current_entry; | ||
949 | filename = &pFindData->FileName[0]; | ||
950 | len = le32_to_cpu(pFindData->FileNameLength); | ||
951 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
952 | } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { | ||
953 | FILE_FULL_DIRECTORY_INFO *pFindData = | ||
954 | (FILE_FULL_DIRECTORY_INFO *)current_entry; | ||
955 | filename = &pFindData->FileName[0]; | ||
956 | len = le32_to_cpu(pFindData->FileNameLength); | ||
957 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
958 | } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { | ||
959 | SEARCH_ID_FULL_DIR_INFO *pFindData = | ||
960 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | ||
961 | filename = &pFindData->FileName[0]; | ||
962 | len = le32_to_cpu(pFindData->FileNameLength); | ||
963 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
964 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | ||
965 | FILE_BOTH_DIRECTORY_INFO *pFindData = | ||
966 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | ||
967 | filename = &pFindData->FileName[0]; | ||
968 | len = le32_to_cpu(pFindData->FileNameLength); | ||
969 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
970 | } else if (level == SMB_FIND_FILE_INFO_STANDARD) { | ||
971 | FIND_FILE_STANDARD_INFO *pFindData = | ||
972 | (FIND_FILE_STANDARD_INFO *)current_entry; | ||
973 | filename = &pFindData->FileName[0]; | ||
974 | /* one byte length, no name conversion */ | ||
975 | len = (unsigned int)pFindData->FileNameLength; | ||
976 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
977 | } else { | ||
978 | cFYI(1, ("Unknown findfirst level %d", level)); | ||
979 | return -EINVAL; | ||
980 | } | ||
981 | cifsFile->srch_inf.resume_name_len = len; | ||
982 | cifsFile->srch_inf.presume_name = filename; | ||
983 | return rc; | ||
984 | } | ||
985 | 987 | ||
986 | int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | 988 | int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) |
987 | { | 989 | { |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 252fdc0567f1..2851d5da0c8c 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -624,8 +624,10 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
624 | ses, nls_cp); | 624 | ses, nls_cp); |
625 | 625 | ||
626 | ssetup_exit: | 626 | ssetup_exit: |
627 | if (spnego_key) | 627 | if (spnego_key) { |
628 | key_revoke(spnego_key); | ||
628 | key_put(spnego_key); | 629 | key_put(spnego_key); |
630 | } | ||
629 | kfree(str_area); | 631 | kfree(str_area); |
630 | if (resp_buf_type == CIFS_SMALL_BUFFER) { | 632 | if (resp_buf_type == CIFS_SMALL_BUFFER) { |
631 | cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base)); | 633 | cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base)); |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e286db9f5ee2..bf0e6d8e382a 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -50,8 +50,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) | |||
50 | return NULL; | 50 | return NULL; |
51 | } | 51 | } |
52 | 52 | ||
53 | temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp, | 53 | temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); |
54 | GFP_KERNEL | GFP_NOFS); | ||
55 | if (temp == NULL) | 54 | if (temp == NULL) |
56 | return temp; | 55 | return temp; |
57 | else { | 56 | else { |
diff --git a/fs/dcache.c b/fs/dcache.c index 80e93956aced..e7a1a99b7464 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1395,6 +1395,10 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1395 | if (dentry->d_parent != parent) | 1395 | if (dentry->d_parent != parent) |
1396 | goto next; | 1396 | goto next; |
1397 | 1397 | ||
1398 | /* non-existing due to RCU? */ | ||
1399 | if (d_unhashed(dentry)) | ||
1400 | goto next; | ||
1401 | |||
1398 | /* | 1402 | /* |
1399 | * It is safe to compare names since d_move() cannot | 1403 | * It is safe to compare names since d_move() cannot |
1400 | * change the qstr (protected by d_lock). | 1404 | * change the qstr (protected by d_lock). |
@@ -1410,10 +1414,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1410 | goto next; | 1414 | goto next; |
1411 | } | 1415 | } |
1412 | 1416 | ||
1413 | if (!d_unhashed(dentry)) { | 1417 | atomic_inc(&dentry->d_count); |
1414 | atomic_inc(&dentry->d_count); | 1418 | found = dentry; |
1415 | found = dentry; | ||
1416 | } | ||
1417 | spin_unlock(&dentry->d_lock); | 1419 | spin_unlock(&dentry->d_lock); |
1418 | break; | 1420 | break; |
1419 | next: | 1421 | next: |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 89d2fb7b991a..fd9859f92fad 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -14,6 +14,9 @@ | |||
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/configfs.h> | 16 | #include <linux/configfs.h> |
17 | #include <linux/in.h> | ||
18 | #include <linux/in6.h> | ||
19 | #include <net/ipv6.h> | ||
17 | #include <net/sock.h> | 20 | #include <net/sock.h> |
18 | 21 | ||
19 | #include "config.h" | 22 | #include "config.h" |
@@ -377,24 +380,24 @@ static struct config_item_type node_type = { | |||
377 | .ct_owner = THIS_MODULE, | 380 | .ct_owner = THIS_MODULE, |
378 | }; | 381 | }; |
379 | 382 | ||
380 | static struct dlm_cluster *to_cluster(struct config_item *i) | 383 | static struct dlm_cluster *config_item_to_cluster(struct config_item *i) |
381 | { | 384 | { |
382 | return i ? container_of(to_config_group(i), struct dlm_cluster, group) : | 385 | return i ? container_of(to_config_group(i), struct dlm_cluster, group) : |
383 | NULL; | 386 | NULL; |
384 | } | 387 | } |
385 | 388 | ||
386 | static struct dlm_space *to_space(struct config_item *i) | 389 | static struct dlm_space *config_item_to_space(struct config_item *i) |
387 | { | 390 | { |
388 | return i ? container_of(to_config_group(i), struct dlm_space, group) : | 391 | return i ? container_of(to_config_group(i), struct dlm_space, group) : |
389 | NULL; | 392 | NULL; |
390 | } | 393 | } |
391 | 394 | ||
392 | static struct dlm_comm *to_comm(struct config_item *i) | 395 | static struct dlm_comm *config_item_to_comm(struct config_item *i) |
393 | { | 396 | { |
394 | return i ? container_of(i, struct dlm_comm, item) : NULL; | 397 | return i ? container_of(i, struct dlm_comm, item) : NULL; |
395 | } | 398 | } |
396 | 399 | ||
397 | static struct dlm_node *to_node(struct config_item *i) | 400 | static struct dlm_node *config_item_to_node(struct config_item *i) |
398 | { | 401 | { |
399 | return i ? container_of(i, struct dlm_node, item) : NULL; | 402 | return i ? container_of(i, struct dlm_node, item) : NULL; |
400 | } | 403 | } |
@@ -450,7 +453,7 @@ static struct config_group *make_cluster(struct config_group *g, | |||
450 | 453 | ||
451 | static void drop_cluster(struct config_group *g, struct config_item *i) | 454 | static void drop_cluster(struct config_group *g, struct config_item *i) |
452 | { | 455 | { |
453 | struct dlm_cluster *cl = to_cluster(i); | 456 | struct dlm_cluster *cl = config_item_to_cluster(i); |
454 | struct config_item *tmp; | 457 | struct config_item *tmp; |
455 | int j; | 458 | int j; |
456 | 459 | ||
@@ -468,7 +471,7 @@ static void drop_cluster(struct config_group *g, struct config_item *i) | |||
468 | 471 | ||
469 | static void release_cluster(struct config_item *i) | 472 | static void release_cluster(struct config_item *i) |
470 | { | 473 | { |
471 | struct dlm_cluster *cl = to_cluster(i); | 474 | struct dlm_cluster *cl = config_item_to_cluster(i); |
472 | kfree(cl->group.default_groups); | 475 | kfree(cl->group.default_groups); |
473 | kfree(cl); | 476 | kfree(cl); |
474 | } | 477 | } |
@@ -507,7 +510,7 @@ static struct config_group *make_space(struct config_group *g, const char *name) | |||
507 | 510 | ||
508 | static void drop_space(struct config_group *g, struct config_item *i) | 511 | static void drop_space(struct config_group *g, struct config_item *i) |
509 | { | 512 | { |
510 | struct dlm_space *sp = to_space(i); | 513 | struct dlm_space *sp = config_item_to_space(i); |
511 | struct config_item *tmp; | 514 | struct config_item *tmp; |
512 | int j; | 515 | int j; |
513 | 516 | ||
@@ -524,7 +527,7 @@ static void drop_space(struct config_group *g, struct config_item *i) | |||
524 | 527 | ||
525 | static void release_space(struct config_item *i) | 528 | static void release_space(struct config_item *i) |
526 | { | 529 | { |
527 | struct dlm_space *sp = to_space(i); | 530 | struct dlm_space *sp = config_item_to_space(i); |
528 | kfree(sp->group.default_groups); | 531 | kfree(sp->group.default_groups); |
529 | kfree(sp); | 532 | kfree(sp); |
530 | } | 533 | } |
@@ -546,7 +549,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name) | |||
546 | 549 | ||
547 | static void drop_comm(struct config_group *g, struct config_item *i) | 550 | static void drop_comm(struct config_group *g, struct config_item *i) |
548 | { | 551 | { |
549 | struct dlm_comm *cm = to_comm(i); | 552 | struct dlm_comm *cm = config_item_to_comm(i); |
550 | if (local_comm == cm) | 553 | if (local_comm == cm) |
551 | local_comm = NULL; | 554 | local_comm = NULL; |
552 | dlm_lowcomms_close(cm->nodeid); | 555 | dlm_lowcomms_close(cm->nodeid); |
@@ -557,13 +560,13 @@ static void drop_comm(struct config_group *g, struct config_item *i) | |||
557 | 560 | ||
558 | static void release_comm(struct config_item *i) | 561 | static void release_comm(struct config_item *i) |
559 | { | 562 | { |
560 | struct dlm_comm *cm = to_comm(i); | 563 | struct dlm_comm *cm = config_item_to_comm(i); |
561 | kfree(cm); | 564 | kfree(cm); |
562 | } | 565 | } |
563 | 566 | ||
564 | static struct config_item *make_node(struct config_group *g, const char *name) | 567 | static struct config_item *make_node(struct config_group *g, const char *name) |
565 | { | 568 | { |
566 | struct dlm_space *sp = to_space(g->cg_item.ci_parent); | 569 | struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent); |
567 | struct dlm_node *nd; | 570 | struct dlm_node *nd; |
568 | 571 | ||
569 | nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL); | 572 | nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL); |
@@ -585,8 +588,8 @@ static struct config_item *make_node(struct config_group *g, const char *name) | |||
585 | 588 | ||
586 | static void drop_node(struct config_group *g, struct config_item *i) | 589 | static void drop_node(struct config_group *g, struct config_item *i) |
587 | { | 590 | { |
588 | struct dlm_space *sp = to_space(g->cg_item.ci_parent); | 591 | struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent); |
589 | struct dlm_node *nd = to_node(i); | 592 | struct dlm_node *nd = config_item_to_node(i); |
590 | 593 | ||
591 | mutex_lock(&sp->members_lock); | 594 | mutex_lock(&sp->members_lock); |
592 | list_del(&nd->list); | 595 | list_del(&nd->list); |
@@ -598,7 +601,7 @@ static void drop_node(struct config_group *g, struct config_item *i) | |||
598 | 601 | ||
599 | static void release_node(struct config_item *i) | 602 | static void release_node(struct config_item *i) |
600 | { | 603 | { |
601 | struct dlm_node *nd = to_node(i); | 604 | struct dlm_node *nd = config_item_to_node(i); |
602 | kfree(nd); | 605 | kfree(nd); |
603 | } | 606 | } |
604 | 607 | ||
@@ -632,7 +635,7 @@ void dlm_config_exit(void) | |||
632 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, | 635 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, |
633 | char *buf) | 636 | char *buf) |
634 | { | 637 | { |
635 | struct dlm_cluster *cl = to_cluster(i); | 638 | struct dlm_cluster *cl = config_item_to_cluster(i); |
636 | struct cluster_attribute *cla = | 639 | struct cluster_attribute *cla = |
637 | container_of(a, struct cluster_attribute, attr); | 640 | container_of(a, struct cluster_attribute, attr); |
638 | return cla->show ? cla->show(cl, buf) : 0; | 641 | return cla->show ? cla->show(cl, buf) : 0; |
@@ -642,7 +645,7 @@ static ssize_t store_cluster(struct config_item *i, | |||
642 | struct configfs_attribute *a, | 645 | struct configfs_attribute *a, |
643 | const char *buf, size_t len) | 646 | const char *buf, size_t len) |
644 | { | 647 | { |
645 | struct dlm_cluster *cl = to_cluster(i); | 648 | struct dlm_cluster *cl = config_item_to_cluster(i); |
646 | struct cluster_attribute *cla = | 649 | struct cluster_attribute *cla = |
647 | container_of(a, struct cluster_attribute, attr); | 650 | container_of(a, struct cluster_attribute, attr); |
648 | return cla->store ? cla->store(cl, buf, len) : -EINVAL; | 651 | return cla->store ? cla->store(cl, buf, len) : -EINVAL; |
@@ -651,7 +654,7 @@ static ssize_t store_cluster(struct config_item *i, | |||
651 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | 654 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, |
652 | char *buf) | 655 | char *buf) |
653 | { | 656 | { |
654 | struct dlm_comm *cm = to_comm(i); | 657 | struct dlm_comm *cm = config_item_to_comm(i); |
655 | struct comm_attribute *cma = | 658 | struct comm_attribute *cma = |
656 | container_of(a, struct comm_attribute, attr); | 659 | container_of(a, struct comm_attribute, attr); |
657 | return cma->show ? cma->show(cm, buf) : 0; | 660 | return cma->show ? cma->show(cm, buf) : 0; |
@@ -660,7 +663,7 @@ static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | |||
660 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, | 663 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, |
661 | const char *buf, size_t len) | 664 | const char *buf, size_t len) |
662 | { | 665 | { |
663 | struct dlm_comm *cm = to_comm(i); | 666 | struct dlm_comm *cm = config_item_to_comm(i); |
664 | struct comm_attribute *cma = | 667 | struct comm_attribute *cma = |
665 | container_of(a, struct comm_attribute, attr); | 668 | container_of(a, struct comm_attribute, attr); |
666 | return cma->store ? cma->store(cm, buf, len) : -EINVAL; | 669 | return cma->store ? cma->store(cm, buf, len) : -EINVAL; |
@@ -714,7 +717,7 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) | |||
714 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | 717 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, |
715 | char *buf) | 718 | char *buf) |
716 | { | 719 | { |
717 | struct dlm_node *nd = to_node(i); | 720 | struct dlm_node *nd = config_item_to_node(i); |
718 | struct node_attribute *nda = | 721 | struct node_attribute *nda = |
719 | container_of(a, struct node_attribute, attr); | 722 | container_of(a, struct node_attribute, attr); |
720 | return nda->show ? nda->show(nd, buf) : 0; | 723 | return nda->show ? nda->show(nd, buf) : 0; |
@@ -723,7 +726,7 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | |||
723 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, | 726 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, |
724 | const char *buf, size_t len) | 727 | const char *buf, size_t len) |
725 | { | 728 | { |
726 | struct dlm_node *nd = to_node(i); | 729 | struct dlm_node *nd = config_item_to_node(i); |
727 | struct node_attribute *nda = | 730 | struct node_attribute *nda = |
728 | container_of(a, struct node_attribute, attr); | 731 | container_of(a, struct node_attribute, attr); |
729 | return nda->store ? nda->store(nd, buf, len) : -EINVAL; | 732 | return nda->store ? nda->store(nd, buf, len) : -EINVAL; |
@@ -768,7 +771,7 @@ static struct dlm_space *get_space(char *name) | |||
768 | i = config_group_find_item(space_list, name); | 771 | i = config_group_find_item(space_list, name); |
769 | mutex_unlock(&space_list->cg_subsys->su_mutex); | 772 | mutex_unlock(&space_list->cg_subsys->su_mutex); |
770 | 773 | ||
771 | return to_space(i); | 774 | return config_item_to_space(i); |
772 | } | 775 | } |
773 | 776 | ||
774 | static void put_space(struct dlm_space *sp) | 777 | static void put_space(struct dlm_space *sp) |
@@ -776,6 +779,33 @@ static void put_space(struct dlm_space *sp) | |||
776 | config_item_put(&sp->group.cg_item); | 779 | config_item_put(&sp->group.cg_item); |
777 | } | 780 | } |
778 | 781 | ||
782 | static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) | ||
783 | { | ||
784 | switch (x->ss_family) { | ||
785 | case AF_INET: { | ||
786 | struct sockaddr_in *sinx = (struct sockaddr_in *)x; | ||
787 | struct sockaddr_in *siny = (struct sockaddr_in *)y; | ||
788 | if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) | ||
789 | return 0; | ||
790 | if (sinx->sin_port != siny->sin_port) | ||
791 | return 0; | ||
792 | break; | ||
793 | } | ||
794 | case AF_INET6: { | ||
795 | struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; | ||
796 | struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; | ||
797 | if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) | ||
798 | return 0; | ||
799 | if (sinx->sin6_port != siny->sin6_port) | ||
800 | return 0; | ||
801 | break; | ||
802 | } | ||
803 | default: | ||
804 | return 0; | ||
805 | } | ||
806 | return 1; | ||
807 | } | ||
808 | |||
779 | static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | 809 | static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) |
780 | { | 810 | { |
781 | struct config_item *i; | 811 | struct config_item *i; |
@@ -788,7 +818,7 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
788 | mutex_lock(&clusters_root.subsys.su_mutex); | 818 | mutex_lock(&clusters_root.subsys.su_mutex); |
789 | 819 | ||
790 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { | 820 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { |
791 | cm = to_comm(i); | 821 | cm = config_item_to_comm(i); |
792 | 822 | ||
793 | if (nodeid) { | 823 | if (nodeid) { |
794 | if (cm->nodeid != nodeid) | 824 | if (cm->nodeid != nodeid) |
@@ -797,8 +827,7 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
797 | config_item_get(i); | 827 | config_item_get(i); |
798 | break; | 828 | break; |
799 | } else { | 829 | } else { |
800 | if (!cm->addr_count || | 830 | if (!cm->addr_count || !addr_compare(cm->addr[0], addr)) |
801 | memcmp(cm->addr[0], addr, sizeof(*addr))) | ||
802 | continue; | 831 | continue; |
803 | found = 1; | 832 | found = 1; |
804 | config_item_get(i); | 833 | config_item_get(i); |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 5a7ac33b629c..868e4c9ef127 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -441,8 +441,11 @@ struct dlm_ls { | |||
441 | uint32_t ls_global_id; /* global unique lockspace ID */ | 441 | uint32_t ls_global_id; /* global unique lockspace ID */ |
442 | uint32_t ls_exflags; | 442 | uint32_t ls_exflags; |
443 | int ls_lvblen; | 443 | int ls_lvblen; |
444 | int ls_count; /* reference count */ | 444 | int ls_count; /* refcount of processes in |
445 | the dlm using this ls */ | ||
446 | int ls_create_count; /* create/release refcount */ | ||
445 | unsigned long ls_flags; /* LSFL_ */ | 447 | unsigned long ls_flags; /* LSFL_ */ |
448 | unsigned long ls_scan_time; | ||
446 | struct kobject ls_kobj; | 449 | struct kobject ls_kobj; |
447 | 450 | ||
448 | struct dlm_rsbtable *ls_rsbtbl; | 451 | struct dlm_rsbtable *ls_rsbtbl; |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 499e16759e96..d910501de6d2 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -23,6 +23,7 @@ | |||
23 | #include "lock.h" | 23 | #include "lock.h" |
24 | #include "recover.h" | 24 | #include "recover.h" |
25 | #include "requestqueue.h" | 25 | #include "requestqueue.h" |
26 | #include "user.h" | ||
26 | 27 | ||
27 | static int ls_count; | 28 | static int ls_count; |
28 | static struct mutex ls_lock; | 29 | static struct mutex ls_lock; |
@@ -211,19 +212,41 @@ void dlm_lockspace_exit(void) | |||
211 | kset_unregister(dlm_kset); | 212 | kset_unregister(dlm_kset); |
212 | } | 213 | } |
213 | 214 | ||
215 | static struct dlm_ls *find_ls_to_scan(void) | ||
216 | { | ||
217 | struct dlm_ls *ls; | ||
218 | |||
219 | spin_lock(&lslist_lock); | ||
220 | list_for_each_entry(ls, &lslist, ls_list) { | ||
221 | if (time_after_eq(jiffies, ls->ls_scan_time + | ||
222 | dlm_config.ci_scan_secs * HZ)) { | ||
223 | spin_unlock(&lslist_lock); | ||
224 | return ls; | ||
225 | } | ||
226 | } | ||
227 | spin_unlock(&lslist_lock); | ||
228 | return NULL; | ||
229 | } | ||
230 | |||
214 | static int dlm_scand(void *data) | 231 | static int dlm_scand(void *data) |
215 | { | 232 | { |
216 | struct dlm_ls *ls; | 233 | struct dlm_ls *ls; |
234 | int timeout_jiffies = dlm_config.ci_scan_secs * HZ; | ||
217 | 235 | ||
218 | while (!kthread_should_stop()) { | 236 | while (!kthread_should_stop()) { |
219 | list_for_each_entry(ls, &lslist, ls_list) { | 237 | ls = find_ls_to_scan(); |
238 | if (ls) { | ||
220 | if (dlm_lock_recovery_try(ls)) { | 239 | if (dlm_lock_recovery_try(ls)) { |
240 | ls->ls_scan_time = jiffies; | ||
221 | dlm_scan_rsbs(ls); | 241 | dlm_scan_rsbs(ls); |
222 | dlm_scan_timeout(ls); | 242 | dlm_scan_timeout(ls); |
223 | dlm_unlock_recovery(ls); | 243 | dlm_unlock_recovery(ls); |
244 | } else { | ||
245 | ls->ls_scan_time += HZ; | ||
224 | } | 246 | } |
247 | } else { | ||
248 | schedule_timeout_interruptible(timeout_jiffies); | ||
225 | } | 249 | } |
226 | schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); | ||
227 | } | 250 | } |
228 | return 0; | 251 | return 0; |
229 | } | 252 | } |
@@ -246,23 +269,6 @@ static void dlm_scand_stop(void) | |||
246 | kthread_stop(scand_task); | 269 | kthread_stop(scand_task); |
247 | } | 270 | } |
248 | 271 | ||
249 | static struct dlm_ls *dlm_find_lockspace_name(char *name, int namelen) | ||
250 | { | ||
251 | struct dlm_ls *ls; | ||
252 | |||
253 | spin_lock(&lslist_lock); | ||
254 | |||
255 | list_for_each_entry(ls, &lslist, ls_list) { | ||
256 | if (ls->ls_namelen == namelen && | ||
257 | memcmp(ls->ls_name, name, namelen) == 0) | ||
258 | goto out; | ||
259 | } | ||
260 | ls = NULL; | ||
261 | out: | ||
262 | spin_unlock(&lslist_lock); | ||
263 | return ls; | ||
264 | } | ||
265 | |||
266 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id) | 272 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id) |
267 | { | 273 | { |
268 | struct dlm_ls *ls; | 274 | struct dlm_ls *ls; |
@@ -327,6 +333,7 @@ static void remove_lockspace(struct dlm_ls *ls) | |||
327 | for (;;) { | 333 | for (;;) { |
328 | spin_lock(&lslist_lock); | 334 | spin_lock(&lslist_lock); |
329 | if (ls->ls_count == 0) { | 335 | if (ls->ls_count == 0) { |
336 | WARN_ON(ls->ls_create_count != 0); | ||
330 | list_del(&ls->ls_list); | 337 | list_del(&ls->ls_list); |
331 | spin_unlock(&lslist_lock); | 338 | spin_unlock(&lslist_lock); |
332 | return; | 339 | return; |
@@ -381,7 +388,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
381 | uint32_t flags, int lvblen) | 388 | uint32_t flags, int lvblen) |
382 | { | 389 | { |
383 | struct dlm_ls *ls; | 390 | struct dlm_ls *ls; |
384 | int i, size, error = -ENOMEM; | 391 | int i, size, error; |
385 | int do_unreg = 0; | 392 | int do_unreg = 0; |
386 | 393 | ||
387 | if (namelen > DLM_LOCKSPACE_LEN) | 394 | if (namelen > DLM_LOCKSPACE_LEN) |
@@ -393,12 +400,37 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
393 | if (!try_module_get(THIS_MODULE)) | 400 | if (!try_module_get(THIS_MODULE)) |
394 | return -EINVAL; | 401 | return -EINVAL; |
395 | 402 | ||
396 | ls = dlm_find_lockspace_name(name, namelen); | 403 | if (!dlm_user_daemon_available()) { |
397 | if (ls) { | 404 | module_put(THIS_MODULE); |
398 | *lockspace = ls; | 405 | return -EUNATCH; |
406 | } | ||
407 | |||
408 | error = 0; | ||
409 | |||
410 | spin_lock(&lslist_lock); | ||
411 | list_for_each_entry(ls, &lslist, ls_list) { | ||
412 | WARN_ON(ls->ls_create_count <= 0); | ||
413 | if (ls->ls_namelen != namelen) | ||
414 | continue; | ||
415 | if (memcmp(ls->ls_name, name, namelen)) | ||
416 | continue; | ||
417 | if (flags & DLM_LSFL_NEWEXCL) { | ||
418 | error = -EEXIST; | ||
419 | break; | ||
420 | } | ||
421 | ls->ls_create_count++; | ||
399 | module_put(THIS_MODULE); | 422 | module_put(THIS_MODULE); |
400 | return -EEXIST; | 423 | error = 1; /* not an error, return 0 */ |
424 | break; | ||
401 | } | 425 | } |
426 | spin_unlock(&lslist_lock); | ||
427 | |||
428 | if (error < 0) | ||
429 | goto out; | ||
430 | if (error) | ||
431 | goto ret_zero; | ||
432 | |||
433 | error = -ENOMEM; | ||
402 | 434 | ||
403 | ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); | 435 | ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); |
404 | if (!ls) | 436 | if (!ls) |
@@ -408,6 +440,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
408 | ls->ls_lvblen = lvblen; | 440 | ls->ls_lvblen = lvblen; |
409 | ls->ls_count = 0; | 441 | ls->ls_count = 0; |
410 | ls->ls_flags = 0; | 442 | ls->ls_flags = 0; |
443 | ls->ls_scan_time = jiffies; | ||
411 | 444 | ||
412 | if (flags & DLM_LSFL_TIMEWARN) | 445 | if (flags & DLM_LSFL_TIMEWARN) |
413 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); | 446 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); |
@@ -418,8 +451,9 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
418 | ls->ls_allocation = GFP_KERNEL; | 451 | ls->ls_allocation = GFP_KERNEL; |
419 | 452 | ||
420 | /* ls_exflags are forced to match among nodes, and we don't | 453 | /* ls_exflags are forced to match among nodes, and we don't |
421 | need to require all nodes to have TIMEWARN or FS set */ | 454 | need to require all nodes to have some flags set */ |
422 | ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS)); | 455 | ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS | |
456 | DLM_LSFL_NEWEXCL)); | ||
423 | 457 | ||
424 | size = dlm_config.ci_rsbtbl_size; | 458 | size = dlm_config.ci_rsbtbl_size; |
425 | ls->ls_rsbtbl_size = size; | 459 | ls->ls_rsbtbl_size = size; |
@@ -510,6 +544,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
510 | down_write(&ls->ls_in_recovery); | 544 | down_write(&ls->ls_in_recovery); |
511 | 545 | ||
512 | spin_lock(&lslist_lock); | 546 | spin_lock(&lslist_lock); |
547 | ls->ls_create_count = 1; | ||
513 | list_add(&ls->ls_list, &lslist); | 548 | list_add(&ls->ls_list, &lslist); |
514 | spin_unlock(&lslist_lock); | 549 | spin_unlock(&lslist_lock); |
515 | 550 | ||
@@ -548,7 +583,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
548 | dlm_create_debug_file(ls); | 583 | dlm_create_debug_file(ls); |
549 | 584 | ||
550 | log_debug(ls, "join complete"); | 585 | log_debug(ls, "join complete"); |
551 | 586 | ret_zero: | |
552 | *lockspace = ls; | 587 | *lockspace = ls; |
553 | return 0; | 588 | return 0; |
554 | 589 | ||
@@ -635,13 +670,34 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
635 | struct dlm_lkb *lkb; | 670 | struct dlm_lkb *lkb; |
636 | struct dlm_rsb *rsb; | 671 | struct dlm_rsb *rsb; |
637 | struct list_head *head; | 672 | struct list_head *head; |
638 | int i; | 673 | int i, busy, rv; |
639 | int busy = lockspace_busy(ls); | 674 | |
675 | busy = lockspace_busy(ls); | ||
676 | |||
677 | spin_lock(&lslist_lock); | ||
678 | if (ls->ls_create_count == 1) { | ||
679 | if (busy > force) | ||
680 | rv = -EBUSY; | ||
681 | else { | ||
682 | /* remove_lockspace takes ls off lslist */ | ||
683 | ls->ls_create_count = 0; | ||
684 | rv = 0; | ||
685 | } | ||
686 | } else if (ls->ls_create_count > 1) { | ||
687 | rv = --ls->ls_create_count; | ||
688 | } else { | ||
689 | rv = -EINVAL; | ||
690 | } | ||
691 | spin_unlock(&lslist_lock); | ||
640 | 692 | ||
641 | if (busy > force) | 693 | if (rv) { |
642 | return -EBUSY; | 694 | log_debug(ls, "release_lockspace no remove %d", rv); |
695 | return rv; | ||
696 | } | ||
697 | |||
698 | dlm_device_deregister(ls); | ||
643 | 699 | ||
644 | if (force < 3) | 700 | if (force < 3 && dlm_user_daemon_available()) |
645 | do_uevent(ls, 0); | 701 | do_uevent(ls, 0); |
646 | 702 | ||
647 | dlm_recoverd_stop(ls); | 703 | dlm_recoverd_stop(ls); |
@@ -720,15 +776,10 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
720 | dlm_clear_members(ls); | 776 | dlm_clear_members(ls); |
721 | dlm_clear_members_gone(ls); | 777 | dlm_clear_members_gone(ls); |
722 | kfree(ls->ls_node_array); | 778 | kfree(ls->ls_node_array); |
779 | log_debug(ls, "release_lockspace final free"); | ||
723 | kobject_put(&ls->ls_kobj); | 780 | kobject_put(&ls->ls_kobj); |
724 | /* The ls structure will be freed when the kobject is done with */ | 781 | /* The ls structure will be freed when the kobject is done with */ |
725 | 782 | ||
726 | mutex_lock(&ls_lock); | ||
727 | ls_count--; | ||
728 | if (!ls_count) | ||
729 | threads_stop(); | ||
730 | mutex_unlock(&ls_lock); | ||
731 | |||
732 | module_put(THIS_MODULE); | 783 | module_put(THIS_MODULE); |
733 | return 0; | 784 | return 0; |
734 | } | 785 | } |
@@ -750,11 +801,38 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
750 | int dlm_release_lockspace(void *lockspace, int force) | 801 | int dlm_release_lockspace(void *lockspace, int force) |
751 | { | 802 | { |
752 | struct dlm_ls *ls; | 803 | struct dlm_ls *ls; |
804 | int error; | ||
753 | 805 | ||
754 | ls = dlm_find_lockspace_local(lockspace); | 806 | ls = dlm_find_lockspace_local(lockspace); |
755 | if (!ls) | 807 | if (!ls) |
756 | return -EINVAL; | 808 | return -EINVAL; |
757 | dlm_put_lockspace(ls); | 809 | dlm_put_lockspace(ls); |
758 | return release_lockspace(ls, force); | 810 | |
811 | mutex_lock(&ls_lock); | ||
812 | error = release_lockspace(ls, force); | ||
813 | if (!error) | ||
814 | ls_count--; | ||
815 | else if (!ls_count) | ||
816 | threads_stop(); | ||
817 | mutex_unlock(&ls_lock); | ||
818 | |||
819 | return error; | ||
820 | } | ||
821 | |||
822 | void dlm_stop_lockspaces(void) | ||
823 | { | ||
824 | struct dlm_ls *ls; | ||
825 | |||
826 | restart: | ||
827 | spin_lock(&lslist_lock); | ||
828 | list_for_each_entry(ls, &lslist, ls_list) { | ||
829 | if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) | ||
830 | continue; | ||
831 | spin_unlock(&lslist_lock); | ||
832 | log_error(ls, "no userland control daemon, stopping lockspace"); | ||
833 | dlm_ls_stop(ls); | ||
834 | goto restart; | ||
835 | } | ||
836 | spin_unlock(&lslist_lock); | ||
759 | } | 837 | } |
760 | 838 | ||
diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h index 891eabbdd021..f879f87901f8 100644 --- a/fs/dlm/lockspace.h +++ b/fs/dlm/lockspace.h | |||
@@ -20,6 +20,7 @@ struct dlm_ls *dlm_find_lockspace_global(uint32_t id); | |||
20 | struct dlm_ls *dlm_find_lockspace_local(void *id); | 20 | struct dlm_ls *dlm_find_lockspace_local(void *id); |
21 | struct dlm_ls *dlm_find_lockspace_device(int minor); | 21 | struct dlm_ls *dlm_find_lockspace_device(int minor); |
22 | void dlm_put_lockspace(struct dlm_ls *ls); | 22 | void dlm_put_lockspace(struct dlm_ls *ls); |
23 | void dlm_stop_lockspaces(void); | ||
23 | 24 | ||
24 | #endif /* __LOCKSPACE_DOT_H__ */ | 25 | #endif /* __LOCKSPACE_DOT_H__ */ |
25 | 26 | ||
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 34f14a14fb4e..b3832c67194a 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/poll.h> | 15 | #include <linux/poll.h> |
16 | #include <linux/signal.h> | 16 | #include <linux/signal.h> |
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <linux/smp_lock.h> | ||
19 | #include <linux/dlm.h> | 18 | #include <linux/dlm.h> |
20 | #include <linux/dlm_device.h> | 19 | #include <linux/dlm_device.h> |
21 | 20 | ||
@@ -27,6 +26,8 @@ | |||
27 | 26 | ||
28 | static const char name_prefix[] = "dlm"; | 27 | static const char name_prefix[] = "dlm"; |
29 | static const struct file_operations device_fops; | 28 | static const struct file_operations device_fops; |
29 | static atomic_t dlm_monitor_opened; | ||
30 | static int dlm_monitor_unused = 1; | ||
30 | 31 | ||
31 | #ifdef CONFIG_COMPAT | 32 | #ifdef CONFIG_COMPAT |
32 | 33 | ||
@@ -340,10 +341,15 @@ static int device_user_deadlock(struct dlm_user_proc *proc, | |||
340 | return error; | 341 | return error; |
341 | } | 342 | } |
342 | 343 | ||
343 | static int create_misc_device(struct dlm_ls *ls, char *name) | 344 | static int dlm_device_register(struct dlm_ls *ls, char *name) |
344 | { | 345 | { |
345 | int error, len; | 346 | int error, len; |
346 | 347 | ||
348 | /* The device is already registered. This happens when the | ||
349 | lockspace is created multiple times from userspace. */ | ||
350 | if (ls->ls_device.name) | ||
351 | return 0; | ||
352 | |||
347 | error = -ENOMEM; | 353 | error = -ENOMEM; |
348 | len = strlen(name) + strlen(name_prefix) + 2; | 354 | len = strlen(name) + strlen(name_prefix) + 2; |
349 | ls->ls_device.name = kzalloc(len, GFP_KERNEL); | 355 | ls->ls_device.name = kzalloc(len, GFP_KERNEL); |
@@ -363,6 +369,22 @@ fail: | |||
363 | return error; | 369 | return error; |
364 | } | 370 | } |
365 | 371 | ||
372 | int dlm_device_deregister(struct dlm_ls *ls) | ||
373 | { | ||
374 | int error; | ||
375 | |||
376 | /* The device is not registered. This happens when the lockspace | ||
377 | was never used from userspace, or when device_create_lockspace() | ||
378 | calls dlm_release_lockspace() after the register fails. */ | ||
379 | if (!ls->ls_device.name) | ||
380 | return 0; | ||
381 | |||
382 | error = misc_deregister(&ls->ls_device); | ||
383 | if (!error) | ||
384 | kfree(ls->ls_device.name); | ||
385 | return error; | ||
386 | } | ||
387 | |||
366 | static int device_user_purge(struct dlm_user_proc *proc, | 388 | static int device_user_purge(struct dlm_user_proc *proc, |
367 | struct dlm_purge_params *params) | 389 | struct dlm_purge_params *params) |
368 | { | 390 | { |
@@ -397,7 +419,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params) | |||
397 | if (!ls) | 419 | if (!ls) |
398 | return -ENOENT; | 420 | return -ENOENT; |
399 | 421 | ||
400 | error = create_misc_device(ls, params->name); | 422 | error = dlm_device_register(ls, params->name); |
401 | dlm_put_lockspace(ls); | 423 | dlm_put_lockspace(ls); |
402 | 424 | ||
403 | if (error) | 425 | if (error) |
@@ -421,31 +443,22 @@ static int device_remove_lockspace(struct dlm_lspace_params *params) | |||
421 | if (!ls) | 443 | if (!ls) |
422 | return -ENOENT; | 444 | return -ENOENT; |
423 | 445 | ||
424 | /* Deregister the misc device first, so we don't have | ||
425 | * a device that's not attached to a lockspace. If | ||
426 | * dlm_release_lockspace fails then we can recreate it | ||
427 | */ | ||
428 | error = misc_deregister(&ls->ls_device); | ||
429 | if (error) { | ||
430 | dlm_put_lockspace(ls); | ||
431 | goto out; | ||
432 | } | ||
433 | kfree(ls->ls_device.name); | ||
434 | |||
435 | if (params->flags & DLM_USER_LSFLG_FORCEFREE) | 446 | if (params->flags & DLM_USER_LSFLG_FORCEFREE) |
436 | force = 2; | 447 | force = 2; |
437 | 448 | ||
438 | lockspace = ls->ls_local_handle; | 449 | lockspace = ls->ls_local_handle; |
450 | dlm_put_lockspace(ls); | ||
439 | 451 | ||
440 | /* dlm_release_lockspace waits for references to go to zero, | 452 | /* The final dlm_release_lockspace waits for references to go to |
441 | so all processes will need to close their device for the ls | 453 | zero, so all processes will need to close their device for the |
442 | before the release will procede */ | 454 | ls before the release will proceed. release also calls the |
455 | device_deregister above. Converting a positive return value | ||
456 | from release to zero means that userspace won't know when its | ||
457 | release was the final one, but it shouldn't need to know. */ | ||
443 | 458 | ||
444 | dlm_put_lockspace(ls); | ||
445 | error = dlm_release_lockspace(lockspace, force); | 459 | error = dlm_release_lockspace(lockspace, force); |
446 | if (error) | 460 | if (error > 0) |
447 | create_misc_device(ls, ls->ls_name); | 461 | error = 0; |
448 | out: | ||
449 | return error; | 462 | return error; |
450 | } | 463 | } |
451 | 464 | ||
@@ -623,17 +636,13 @@ static int device_open(struct inode *inode, struct file *file) | |||
623 | struct dlm_user_proc *proc; | 636 | struct dlm_user_proc *proc; |
624 | struct dlm_ls *ls; | 637 | struct dlm_ls *ls; |
625 | 638 | ||
626 | lock_kernel(); | ||
627 | ls = dlm_find_lockspace_device(iminor(inode)); | 639 | ls = dlm_find_lockspace_device(iminor(inode)); |
628 | if (!ls) { | 640 | if (!ls) |
629 | unlock_kernel(); | ||
630 | return -ENOENT; | 641 | return -ENOENT; |
631 | } | ||
632 | 642 | ||
633 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); | 643 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); |
634 | if (!proc) { | 644 | if (!proc) { |
635 | dlm_put_lockspace(ls); | 645 | dlm_put_lockspace(ls); |
636 | unlock_kernel(); | ||
637 | return -ENOMEM; | 646 | return -ENOMEM; |
638 | } | 647 | } |
639 | 648 | ||
@@ -645,7 +654,6 @@ static int device_open(struct inode *inode, struct file *file) | |||
645 | spin_lock_init(&proc->locks_spin); | 654 | spin_lock_init(&proc->locks_spin); |
646 | init_waitqueue_head(&proc->wait); | 655 | init_waitqueue_head(&proc->wait); |
647 | file->private_data = proc; | 656 | file->private_data = proc; |
648 | unlock_kernel(); | ||
649 | 657 | ||
650 | return 0; | 658 | return 0; |
651 | } | 659 | } |
@@ -878,9 +886,28 @@ static unsigned int device_poll(struct file *file, poll_table *wait) | |||
878 | return 0; | 886 | return 0; |
879 | } | 887 | } |
880 | 888 | ||
889 | int dlm_user_daemon_available(void) | ||
890 | { | ||
891 | /* dlm_controld hasn't started (or, has started, but not | ||
892 | properly populated configfs) */ | ||
893 | |||
894 | if (!dlm_our_nodeid()) | ||
895 | return 0; | ||
896 | |||
897 | /* This is to deal with versions of dlm_controld that don't | ||
898 | know about the monitor device. We assume that if the | ||
899 | dlm_controld was started (above), but the monitor device | ||
900 | was never opened, that it's an old version. dlm_controld | ||
901 | should open the monitor device before populating configfs. */ | ||
902 | |||
903 | if (dlm_monitor_unused) | ||
904 | return 1; | ||
905 | |||
906 | return atomic_read(&dlm_monitor_opened) ? 1 : 0; | ||
907 | } | ||
908 | |||
881 | static int ctl_device_open(struct inode *inode, struct file *file) | 909 | static int ctl_device_open(struct inode *inode, struct file *file) |
882 | { | 910 | { |
883 | cycle_kernel_lock(); | ||
884 | file->private_data = NULL; | 911 | file->private_data = NULL; |
885 | return 0; | 912 | return 0; |
886 | } | 913 | } |
@@ -890,6 +917,20 @@ static int ctl_device_close(struct inode *inode, struct file *file) | |||
890 | return 0; | 917 | return 0; |
891 | } | 918 | } |
892 | 919 | ||
920 | static int monitor_device_open(struct inode *inode, struct file *file) | ||
921 | { | ||
922 | atomic_inc(&dlm_monitor_opened); | ||
923 | dlm_monitor_unused = 0; | ||
924 | return 0; | ||
925 | } | ||
926 | |||
927 | static int monitor_device_close(struct inode *inode, struct file *file) | ||
928 | { | ||
929 | if (atomic_dec_and_test(&dlm_monitor_opened)) | ||
930 | dlm_stop_lockspaces(); | ||
931 | return 0; | ||
932 | } | ||
933 | |||
893 | static const struct file_operations device_fops = { | 934 | static const struct file_operations device_fops = { |
894 | .open = device_open, | 935 | .open = device_open, |
895 | .release = device_close, | 936 | .release = device_close, |
@@ -913,19 +954,42 @@ static struct miscdevice ctl_device = { | |||
913 | .minor = MISC_DYNAMIC_MINOR, | 954 | .minor = MISC_DYNAMIC_MINOR, |
914 | }; | 955 | }; |
915 | 956 | ||
957 | static const struct file_operations monitor_device_fops = { | ||
958 | .open = monitor_device_open, | ||
959 | .release = monitor_device_close, | ||
960 | .owner = THIS_MODULE, | ||
961 | }; | ||
962 | |||
963 | static struct miscdevice monitor_device = { | ||
964 | .name = "dlm-monitor", | ||
965 | .fops = &monitor_device_fops, | ||
966 | .minor = MISC_DYNAMIC_MINOR, | ||
967 | }; | ||
968 | |||
916 | int __init dlm_user_init(void) | 969 | int __init dlm_user_init(void) |
917 | { | 970 | { |
918 | int error; | 971 | int error; |
919 | 972 | ||
973 | atomic_set(&dlm_monitor_opened, 0); | ||
974 | |||
920 | error = misc_register(&ctl_device); | 975 | error = misc_register(&ctl_device); |
921 | if (error) | 976 | if (error) { |
922 | log_print("misc_register failed for control device"); | 977 | log_print("misc_register failed for control device"); |
978 | goto out; | ||
979 | } | ||
923 | 980 | ||
981 | error = misc_register(&monitor_device); | ||
982 | if (error) { | ||
983 | log_print("misc_register failed for monitor device"); | ||
984 | misc_deregister(&ctl_device); | ||
985 | } | ||
986 | out: | ||
924 | return error; | 987 | return error; |
925 | } | 988 | } |
926 | 989 | ||
927 | void dlm_user_exit(void) | 990 | void dlm_user_exit(void) |
928 | { | 991 | { |
929 | misc_deregister(&ctl_device); | 992 | misc_deregister(&ctl_device); |
993 | misc_deregister(&monitor_device); | ||
930 | } | 994 | } |
931 | 995 | ||
diff --git a/fs/dlm/user.h b/fs/dlm/user.h index d38e9f3e4151..35eb6a13d616 100644 --- a/fs/dlm/user.h +++ b/fs/dlm/user.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2006 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -12,5 +12,7 @@ | |||
12 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type); | 12 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type); |
13 | int dlm_user_init(void); | 13 | int dlm_user_init(void); |
14 | void dlm_user_exit(void); | 14 | void dlm_user_exit(void); |
15 | int dlm_device_deregister(struct dlm_ls *ls); | ||
16 | int dlm_user_daemon_available(void); | ||
15 | 17 | ||
16 | #endif | 18 | #endif |
@@ -752,11 +752,11 @@ static int exec_mmap(struct mm_struct *mm) | |||
752 | tsk->active_mm = mm; | 752 | tsk->active_mm = mm; |
753 | activate_mm(active_mm, mm); | 753 | activate_mm(active_mm, mm); |
754 | task_unlock(tsk); | 754 | task_unlock(tsk); |
755 | mm_update_next_owner(old_mm); | ||
756 | arch_pick_mmap_layout(mm); | 755 | arch_pick_mmap_layout(mm); |
757 | if (old_mm) { | 756 | if (old_mm) { |
758 | up_read(&old_mm->mmap_sem); | 757 | up_read(&old_mm->mmap_sem); |
759 | BUG_ON(active_mm != old_mm); | 758 | BUG_ON(active_mm != old_mm); |
759 | mm_update_next_owner(old_mm); | ||
760 | mmput(old_mm); | 760 | mmput(old_mm); |
761 | return 0; | 761 | return 0; |
762 | } | 762 | } |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 47d88da2d33b..bae998c1e44e 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -133,6 +133,8 @@ extern void ext2_truncate (struct inode *); | |||
133 | extern int ext2_setattr (struct dentry *, struct iattr *); | 133 | extern int ext2_setattr (struct dentry *, struct iattr *); |
134 | extern void ext2_set_inode_flags(struct inode *inode); | 134 | extern void ext2_set_inode_flags(struct inode *inode); |
135 | extern void ext2_get_inode_flags(struct ext2_inode_info *); | 135 | extern void ext2_get_inode_flags(struct ext2_inode_info *); |
136 | extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
137 | u64 start, u64 len); | ||
136 | int __ext2_write_begin(struct file *file, struct address_space *mapping, | 138 | int __ext2_write_begin(struct file *file, struct address_space *mapping, |
137 | loff_t pos, unsigned len, unsigned flags, | 139 | loff_t pos, unsigned len, unsigned flags, |
138 | struct page **pagep, void **fsdata); | 140 | struct page **pagep, void **fsdata); |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 5f2fa9c36293..45ed07122182 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -86,4 +86,5 @@ const struct inode_operations ext2_file_inode_operations = { | |||
86 | #endif | 86 | #endif |
87 | .setattr = ext2_setattr, | 87 | .setattr = ext2_setattr, |
88 | .permission = ext2_permission, | 88 | .permission = ext2_permission, |
89 | .fiemap = ext2_fiemap, | ||
89 | }; | 90 | }; |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 991d6dfeb51f..7658b33e2653 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/writeback.h> | 31 | #include <linux/writeback.h> |
32 | #include <linux/buffer_head.h> | 32 | #include <linux/buffer_head.h> |
33 | #include <linux/mpage.h> | 33 | #include <linux/mpage.h> |
34 | #include <linux/fiemap.h> | ||
34 | #include "ext2.h" | 35 | #include "ext2.h" |
35 | #include "acl.h" | 36 | #include "acl.h" |
36 | #include "xip.h" | 37 | #include "xip.h" |
@@ -704,6 +705,13 @@ int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_ | |||
704 | 705 | ||
705 | } | 706 | } |
706 | 707 | ||
708 | int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
709 | u64 start, u64 len) | ||
710 | { | ||
711 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
712 | ext2_get_block); | ||
713 | } | ||
714 | |||
707 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) | 715 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) |
708 | { | 716 | { |
709 | return block_write_full_page(page, ext2_get_block, wbc); | 717 | return block_write_full_page(page, ext2_get_block, wbc); |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index acc4913d3019..3be1e0689c9a 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
@@ -134,5 +134,6 @@ const struct inode_operations ext3_file_inode_operations = { | |||
134 | .removexattr = generic_removexattr, | 134 | .removexattr = generic_removexattr, |
135 | #endif | 135 | #endif |
136 | .permission = ext3_permission, | 136 | .permission = ext3_permission, |
137 | .fiemap = ext3_fiemap, | ||
137 | }; | 138 | }; |
138 | 139 | ||
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 507d8689b111..ebfec4d0148e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
37 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
39 | #include <linux/fiemap.h> | ||
39 | #include "xattr.h" | 40 | #include "xattr.h" |
40 | #include "acl.h" | 41 | #include "acl.h" |
41 | 42 | ||
@@ -981,6 +982,13 @@ out: | |||
981 | return ret; | 982 | return ret; |
982 | } | 983 | } |
983 | 984 | ||
985 | int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
986 | u64 start, u64 len) | ||
987 | { | ||
988 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
989 | ext3_get_block); | ||
990 | } | ||
991 | |||
984 | /* | 992 | /* |
985 | * `handle' can be NULL if create is zero | 993 | * `handle' can be NULL if create is zero |
986 | */ | 994 | */ |
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index ac6fa8ca0a2f..a8ff003a00f7 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -2,12 +2,12 @@ | |||
2 | # Makefile for the linux ext4-filesystem routines. | 2 | # Makefile for the linux ext4-filesystem routines. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o | 5 | obj-$(CONFIG_EXT4_FS) += ext4.o |
6 | 6 | ||
7 | ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o | 9 | ext4_jbd2.o migrate.o mballoc.o |
10 | 10 | ||
11 | ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
12 | ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o | 12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
13 | ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o | 13 | ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o |
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index cd2b855a07d6..cb45257a246e 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h | |||
@@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t size) | |||
51 | } | 51 | } |
52 | } | 52 | } |
53 | 53 | ||
54 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
55 | 55 | ||
56 | /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl | 56 | /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl |
57 | if the ACL has not been cached */ | 57 | if the ACL has not been cached */ |
58 | #define EXT4_ACL_NOT_CACHED ((void *)-1) | 58 | #define EXT4_ACL_NOT_CACHED ((void *)-1) |
59 | 59 | ||
60 | /* acl.c */ | 60 | /* acl.c */ |
61 | extern int ext4_permission (struct inode *, int); | 61 | extern int ext4_permission(struct inode *, int); |
62 | extern int ext4_acl_chmod (struct inode *); | 62 | extern int ext4_acl_chmod(struct inode *); |
63 | extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); | 63 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); |
64 | 64 | ||
65 | #else /* CONFIG_EXT4DEV_FS_POSIX_ACL */ | 65 | #else /* CONFIG_EXT4_FS_POSIX_ACL */ |
66 | #include <linux/sched.h> | 66 | #include <linux/sched.h> |
67 | #define ext4_permission NULL | 67 | #define ext4_permission NULL |
68 | 68 | ||
@@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) | |||
77 | { | 77 | { |
78 | return 0; | 78 | return 0; |
79 | } | 79 | } |
80 | #endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */ | 80 | #endif /* CONFIG_EXT4_FS_POSIX_ACL */ |
81 | 81 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e9fa960ba6da..bd2ece228827 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -83,6 +83,7 @@ static int ext4_group_used_meta_blocks(struct super_block *sb, | |||
83 | } | 83 | } |
84 | return used_blocks; | 84 | return used_blocks; |
85 | } | 85 | } |
86 | |||
86 | /* Initializes an uninitialized block bitmap if given, and returns the | 87 | /* Initializes an uninitialized block bitmap if given, and returns the |
87 | * number of blocks free in the group. */ | 88 | * number of blocks free in the group. */ |
88 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | 89 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, |
@@ -132,7 +133,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
132 | */ | 133 | */ |
133 | group_blocks = ext4_blocks_count(sbi->s_es) - | 134 | group_blocks = ext4_blocks_count(sbi->s_es) - |
134 | le32_to_cpu(sbi->s_es->s_first_data_block) - | 135 | le32_to_cpu(sbi->s_es->s_first_data_block) - |
135 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); | 136 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); |
136 | } else { | 137 | } else { |
137 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); | 138 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); |
138 | } | 139 | } |
@@ -200,20 +201,20 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
200 | * @bh: pointer to the buffer head to store the block | 201 | * @bh: pointer to the buffer head to store the block |
201 | * group descriptor | 202 | * group descriptor |
202 | */ | 203 | */ |
203 | struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 204 | struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, |
204 | ext4_group_t block_group, | 205 | ext4_group_t block_group, |
205 | struct buffer_head ** bh) | 206 | struct buffer_head **bh) |
206 | { | 207 | { |
207 | unsigned long group_desc; | 208 | unsigned long group_desc; |
208 | unsigned long offset; | 209 | unsigned long offset; |
209 | struct ext4_group_desc * desc; | 210 | struct ext4_group_desc *desc; |
210 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 211 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
211 | 212 | ||
212 | if (block_group >= sbi->s_groups_count) { | 213 | if (block_group >= sbi->s_groups_count) { |
213 | ext4_error (sb, "ext4_get_group_desc", | 214 | ext4_error(sb, "ext4_get_group_desc", |
214 | "block_group >= groups_count - " | 215 | "block_group >= groups_count - " |
215 | "block_group = %lu, groups_count = %lu", | 216 | "block_group = %lu, groups_count = %lu", |
216 | block_group, sbi->s_groups_count); | 217 | block_group, sbi->s_groups_count); |
217 | 218 | ||
218 | return NULL; | 219 | return NULL; |
219 | } | 220 | } |
@@ -222,10 +223,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
222 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); | 223 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); |
223 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 224 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
224 | if (!sbi->s_group_desc[group_desc]) { | 225 | if (!sbi->s_group_desc[group_desc]) { |
225 | ext4_error (sb, "ext4_get_group_desc", | 226 | ext4_error(sb, "ext4_get_group_desc", |
226 | "Group descriptor not loaded - " | 227 | "Group descriptor not loaded - " |
227 | "block_group = %lu, group_desc = %lu, desc = %lu", | 228 | "block_group = %lu, group_desc = %lu, desc = %lu", |
228 | block_group, group_desc, offset); | 229 | block_group, group_desc, offset); |
229 | return NULL; | 230 | return NULL; |
230 | } | 231 | } |
231 | 232 | ||
@@ -302,8 +303,8 @@ err_out: | |||
302 | struct buffer_head * | 303 | struct buffer_head * |
303 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | 304 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) |
304 | { | 305 | { |
305 | struct ext4_group_desc * desc; | 306 | struct ext4_group_desc *desc; |
306 | struct buffer_head * bh = NULL; | 307 | struct buffer_head *bh = NULL; |
307 | ext4_fsblk_t bitmap_blk; | 308 | ext4_fsblk_t bitmap_blk; |
308 | 309 | ||
309 | desc = ext4_get_group_desc(sb, block_group, NULL); | 310 | desc = ext4_get_group_desc(sb, block_group, NULL); |
@@ -318,9 +319,11 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
318 | block_group, bitmap_blk); | 319 | block_group, bitmap_blk); |
319 | return NULL; | 320 | return NULL; |
320 | } | 321 | } |
321 | if (bh_uptodate_or_lock(bh)) | 322 | if (buffer_uptodate(bh) && |
323 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
322 | return bh; | 324 | return bh; |
323 | 325 | ||
326 | lock_buffer(bh); | ||
324 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 327 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
325 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 328 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
326 | ext4_init_block_bitmap(sb, bh, block_group, desc); | 329 | ext4_init_block_bitmap(sb, bh, block_group, desc); |
@@ -345,301 +348,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
345 | */ | 348 | */ |
346 | return bh; | 349 | return bh; |
347 | } | 350 | } |
348 | /* | ||
349 | * The reservation window structure operations | ||
350 | * -------------------------------------------- | ||
351 | * Operations include: | ||
352 | * dump, find, add, remove, is_empty, find_next_reservable_window, etc. | ||
353 | * | ||
354 | * We use a red-black tree to represent per-filesystem reservation | ||
355 | * windows. | ||
356 | * | ||
357 | */ | ||
358 | |||
359 | /** | ||
360 | * __rsv_window_dump() -- Dump the filesystem block allocation reservation map | ||
361 | * @rb_root: root of per-filesystem reservation rb tree | ||
362 | * @verbose: verbose mode | ||
363 | * @fn: function which wishes to dump the reservation map | ||
364 | * | ||
365 | * If verbose is turned on, it will print the whole block reservation | ||
366 | * windows(start, end). Otherwise, it will only print out the "bad" windows, | ||
367 | * those windows that overlap with their immediate neighbors. | ||
368 | */ | ||
369 | #if 1 | ||
370 | static void __rsv_window_dump(struct rb_root *root, int verbose, | ||
371 | const char *fn) | ||
372 | { | ||
373 | struct rb_node *n; | ||
374 | struct ext4_reserve_window_node *rsv, *prev; | ||
375 | int bad; | ||
376 | |||
377 | restart: | ||
378 | n = rb_first(root); | ||
379 | bad = 0; | ||
380 | prev = NULL; | ||
381 | |||
382 | printk("Block Allocation Reservation Windows Map (%s):\n", fn); | ||
383 | while (n) { | ||
384 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
385 | if (verbose) | ||
386 | printk("reservation window 0x%p " | ||
387 | "start: %llu, end: %llu\n", | ||
388 | rsv, rsv->rsv_start, rsv->rsv_end); | ||
389 | if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { | ||
390 | printk("Bad reservation %p (start >= end)\n", | ||
391 | rsv); | ||
392 | bad = 1; | ||
393 | } | ||
394 | if (prev && prev->rsv_end >= rsv->rsv_start) { | ||
395 | printk("Bad reservation %p (prev->end >= start)\n", | ||
396 | rsv); | ||
397 | bad = 1; | ||
398 | } | ||
399 | if (bad) { | ||
400 | if (!verbose) { | ||
401 | printk("Restarting reservation walk in verbose mode\n"); | ||
402 | verbose = 1; | ||
403 | goto restart; | ||
404 | } | ||
405 | } | ||
406 | n = rb_next(n); | ||
407 | prev = rsv; | ||
408 | } | ||
409 | printk("Window map complete.\n"); | ||
410 | BUG_ON(bad); | ||
411 | } | ||
412 | #define rsv_window_dump(root, verbose) \ | ||
413 | __rsv_window_dump((root), (verbose), __func__) | ||
414 | #else | ||
415 | #define rsv_window_dump(root, verbose) do {} while (0) | ||
416 | #endif | ||
417 | |||
418 | /** | ||
419 | * goal_in_my_reservation() | ||
420 | * @rsv: inode's reservation window | ||
421 | * @grp_goal: given goal block relative to the allocation block group | ||
422 | * @group: the current allocation block group | ||
423 | * @sb: filesystem super block | ||
424 | * | ||
425 | * Test if the given goal block (group relative) is within the file's | ||
426 | * own block reservation window range. | ||
427 | * | ||
428 | * If the reservation window is outside the goal allocation group, return 0; | ||
429 | * grp_goal (given goal block) could be -1, which means no specific | ||
430 | * goal block. In this case, always return 1. | ||
431 | * If the goal block is within the reservation window, return 1; | ||
432 | * otherwise, return 0; | ||
433 | */ | ||
434 | static int | ||
435 | goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, | ||
436 | ext4_group_t group, struct super_block *sb) | ||
437 | { | ||
438 | ext4_fsblk_t group_first_block, group_last_block; | ||
439 | |||
440 | group_first_block = ext4_group_first_block_no(sb, group); | ||
441 | group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
442 | |||
443 | if ((rsv->_rsv_start > group_last_block) || | ||
444 | (rsv->_rsv_end < group_first_block)) | ||
445 | return 0; | ||
446 | if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) | ||
447 | || (grp_goal + group_first_block > rsv->_rsv_end))) | ||
448 | return 0; | ||
449 | return 1; | ||
450 | } | ||
451 | |||
452 | /** | ||
453 | * search_reserve_window() | ||
454 | * @rb_root: root of reservation tree | ||
455 | * @goal: target allocation block | ||
456 | * | ||
457 | * Find the reserved window which includes the goal, or the previous one | ||
458 | * if the goal is not in any window. | ||
459 | * Returns NULL if there are no windows or if all windows start after the goal. | ||
460 | */ | ||
461 | static struct ext4_reserve_window_node * | ||
462 | search_reserve_window(struct rb_root *root, ext4_fsblk_t goal) | ||
463 | { | ||
464 | struct rb_node *n = root->rb_node; | ||
465 | struct ext4_reserve_window_node *rsv; | ||
466 | |||
467 | if (!n) | ||
468 | return NULL; | ||
469 | |||
470 | do { | ||
471 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
472 | |||
473 | if (goal < rsv->rsv_start) | ||
474 | n = n->rb_left; | ||
475 | else if (goal > rsv->rsv_end) | ||
476 | n = n->rb_right; | ||
477 | else | ||
478 | return rsv; | ||
479 | } while (n); | ||
480 | /* | ||
481 | * We've fallen off the end of the tree: the goal wasn't inside | ||
482 | * any particular node. OK, the previous node must be to one | ||
483 | * side of the interval containing the goal. If it's the RHS, | ||
484 | * we need to back up one. | ||
485 | */ | ||
486 | if (rsv->rsv_start > goal) { | ||
487 | n = rb_prev(&rsv->rsv_node); | ||
488 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
489 | } | ||
490 | return rsv; | ||
491 | } | ||
492 | |||
493 | /** | ||
494 | * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree. | ||
495 | * @sb: super block | ||
496 | * @rsv: reservation window to add | ||
497 | * | ||
498 | * Must be called with rsv_lock hold. | ||
499 | */ | ||
500 | void ext4_rsv_window_add(struct super_block *sb, | ||
501 | struct ext4_reserve_window_node *rsv) | ||
502 | { | ||
503 | struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root; | ||
504 | struct rb_node *node = &rsv->rsv_node; | ||
505 | ext4_fsblk_t start = rsv->rsv_start; | ||
506 | |||
507 | struct rb_node ** p = &root->rb_node; | ||
508 | struct rb_node * parent = NULL; | ||
509 | struct ext4_reserve_window_node *this; | ||
510 | |||
511 | while (*p) | ||
512 | { | ||
513 | parent = *p; | ||
514 | this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node); | ||
515 | |||
516 | if (start < this->rsv_start) | ||
517 | p = &(*p)->rb_left; | ||
518 | else if (start > this->rsv_end) | ||
519 | p = &(*p)->rb_right; | ||
520 | else { | ||
521 | rsv_window_dump(root, 1); | ||
522 | BUG(); | ||
523 | } | ||
524 | } | ||
525 | |||
526 | rb_link_node(node, parent, p); | ||
527 | rb_insert_color(node, root); | ||
528 | } | ||
529 | |||
530 | /** | ||
531 | * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree | ||
532 | * @sb: super block | ||
533 | * @rsv: reservation window to remove | ||
534 | * | ||
535 | * Mark the block reservation window as not allocated, and unlink it | ||
536 | * from the filesystem reservation window rb tree. Must be called with | ||
537 | * rsv_lock hold. | ||
538 | */ | ||
539 | static void rsv_window_remove(struct super_block *sb, | ||
540 | struct ext4_reserve_window_node *rsv) | ||
541 | { | ||
542 | rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
543 | rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
544 | rsv->rsv_alloc_hit = 0; | ||
545 | rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root); | ||
546 | } | ||
547 | |||
548 | /* | ||
549 | * rsv_is_empty() -- Check if the reservation window is allocated. | ||
550 | * @rsv: given reservation window to check | ||
551 | * | ||
552 | * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED. | ||
553 | */ | ||
554 | static inline int rsv_is_empty(struct ext4_reserve_window *rsv) | ||
555 | { | ||
556 | /* a valid reservation end block could not be 0 */ | ||
557 | return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
558 | } | ||
559 | |||
560 | /** | ||
561 | * ext4_init_block_alloc_info() | ||
562 | * @inode: file inode structure | ||
563 | * | ||
564 | * Allocate and initialize the reservation window structure, and | ||
565 | * link the window to the ext4 inode structure at last | ||
566 | * | ||
567 | * The reservation window structure is only dynamically allocated | ||
568 | * and linked to ext4 inode the first time the open file | ||
569 | * needs a new block. So, before every ext4_new_block(s) call, for | ||
570 | * regular files, we should check whether the reservation window | ||
571 | * structure exists or not. In the latter case, this function is called. | ||
572 | * Fail to do so will result in block reservation being turned off for that | ||
573 | * open file. | ||
574 | * | ||
575 | * This function is called from ext4_get_blocks_handle(), also called | ||
576 | * when setting the reservation window size through ioctl before the file | ||
577 | * is open for write (needs block allocation). | ||
578 | * | ||
579 | * Needs down_write(i_data_sem) protection prior to call this function. | ||
580 | */ | ||
581 | void ext4_init_block_alloc_info(struct inode *inode) | ||
582 | { | ||
583 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
584 | struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; | ||
585 | struct super_block *sb = inode->i_sb; | ||
586 | |||
587 | block_i = kmalloc(sizeof(*block_i), GFP_NOFS); | ||
588 | if (block_i) { | ||
589 | struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node; | ||
590 | |||
591 | rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
592 | rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
593 | |||
594 | /* | ||
595 | * if filesystem is mounted with NORESERVATION, the goal | ||
596 | * reservation window size is set to zero to indicate | ||
597 | * block reservation is off | ||
598 | */ | ||
599 | if (!test_opt(sb, RESERVATION)) | ||
600 | rsv->rsv_goal_size = 0; | ||
601 | else | ||
602 | rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS; | ||
603 | rsv->rsv_alloc_hit = 0; | ||
604 | block_i->last_alloc_logical_block = 0; | ||
605 | block_i->last_alloc_physical_block = 0; | ||
606 | } | ||
607 | ei->i_block_alloc_info = block_i; | ||
608 | } | ||
609 | |||
610 | /** | ||
611 | * ext4_discard_reservation() | ||
612 | * @inode: inode | ||
613 | * | ||
614 | * Discard(free) block reservation window on last file close, or truncate | ||
615 | * or at last iput(). | ||
616 | * | ||
617 | * It is being called in three cases: | ||
618 | * ext4_release_file(): last writer close the file | ||
619 | * ext4_clear_inode(): last iput(), when nobody link to this file. | ||
620 | * ext4_truncate(): when the block indirect map is about to change. | ||
621 | * | ||
622 | */ | ||
623 | void ext4_discard_reservation(struct inode *inode) | ||
624 | { | ||
625 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
626 | struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; | ||
627 | struct ext4_reserve_window_node *rsv; | ||
628 | spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; | ||
629 | |||
630 | ext4_mb_discard_inode_preallocations(inode); | ||
631 | |||
632 | if (!block_i) | ||
633 | return; | ||
634 | |||
635 | rsv = &block_i->rsv_window_node; | ||
636 | if (!rsv_is_empty(&rsv->rsv_window)) { | ||
637 | spin_lock(rsv_lock); | ||
638 | if (!rsv_is_empty(&rsv->rsv_window)) | ||
639 | rsv_window_remove(inode->i_sb, rsv); | ||
640 | spin_unlock(rsv_lock); | ||
641 | } | ||
642 | } | ||
643 | 351 | ||
644 | /** | 352 | /** |
645 | * ext4_free_blocks_sb() -- Free given blocks and update quota | 353 | * ext4_free_blocks_sb() -- Free given blocks and update quota |
@@ -648,6 +356,13 @@ void ext4_discard_reservation(struct inode *inode) | |||
648 | * @block: start physcial block to free | 356 | * @block: start physcial block to free |
649 | * @count: number of blocks to free | 357 | * @count: number of blocks to free |
650 | * @pdquot_freed_blocks: pointer to quota | 358 | * @pdquot_freed_blocks: pointer to quota |
359 | * | ||
360 | * XXX This function is only used by the on-line resizing code, which | ||
361 | * should probably be fixed up to call the mballoc variant. There | ||
362 | * this needs to be cleaned up later; in fact, I'm not convinced this | ||
363 | * is 100% correct in the face of the mballoc code. The online resizing | ||
364 | * code needs to be fixed up to more tightly (and correctly) interlock | ||
365 | * with the mballoc code. | ||
651 | */ | 366 | */ |
652 | void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | 367 | void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, |
653 | ext4_fsblk_t block, unsigned long count, | 368 | ext4_fsblk_t block, unsigned long count, |
@@ -659,8 +374,8 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |||
659 | ext4_grpblk_t bit; | 374 | ext4_grpblk_t bit; |
660 | unsigned long i; | 375 | unsigned long i; |
661 | unsigned long overflow; | 376 | unsigned long overflow; |
662 | struct ext4_group_desc * desc; | 377 | struct ext4_group_desc *desc; |
663 | struct ext4_super_block * es; | 378 | struct ext4_super_block *es; |
664 | struct ext4_sb_info *sbi; | 379 | struct ext4_sb_info *sbi; |
665 | int err = 0, ret; | 380 | int err = 0, ret; |
666 | ext4_grpblk_t group_freed; | 381 | ext4_grpblk_t group_freed; |
@@ -671,13 +386,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |||
671 | if (block < le32_to_cpu(es->s_first_data_block) || | 386 | if (block < le32_to_cpu(es->s_first_data_block) || |
672 | block + count < block || | 387 | block + count < block || |
673 | block + count > ext4_blocks_count(es)) { | 388 | block + count > ext4_blocks_count(es)) { |
674 | ext4_error (sb, "ext4_free_blocks", | 389 | ext4_error(sb, "ext4_free_blocks", |
675 | "Freeing blocks not in datazone - " | 390 | "Freeing blocks not in datazone - " |
676 | "block = %llu, count = %lu", block, count); | 391 | "block = %llu, count = %lu", block, count); |
677 | goto error_return; | 392 | goto error_return; |
678 | } | 393 | } |
679 | 394 | ||
680 | ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); | 395 | ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1); |
681 | 396 | ||
682 | do_more: | 397 | do_more: |
683 | overflow = 0; | 398 | overflow = 0; |
@@ -694,7 +409,7 @@ do_more: | |||
694 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | 409 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
695 | if (!bitmap_bh) | 410 | if (!bitmap_bh) |
696 | goto error_return; | 411 | goto error_return; |
697 | desc = ext4_get_group_desc (sb, block_group, &gd_bh); | 412 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); |
698 | if (!desc) | 413 | if (!desc) |
699 | goto error_return; | 414 | goto error_return; |
700 | 415 | ||
@@ -703,10 +418,10 @@ do_more: | |||
703 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | 418 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
704 | in_range(block + count - 1, ext4_inode_table(sb, desc), | 419 | in_range(block + count - 1, ext4_inode_table(sb, desc), |
705 | sbi->s_itb_per_group)) { | 420 | sbi->s_itb_per_group)) { |
706 | ext4_error (sb, "ext4_free_blocks", | 421 | ext4_error(sb, "ext4_free_blocks", |
707 | "Freeing blocks in system zones - " | 422 | "Freeing blocks in system zones - " |
708 | "Block = %llu, count = %lu", | 423 | "Block = %llu, count = %lu", |
709 | block, count); | 424 | block, count); |
710 | goto error_return; | 425 | goto error_return; |
711 | } | 426 | } |
712 | 427 | ||
@@ -848,7 +563,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
848 | ext4_fsblk_t block, unsigned long count, | 563 | ext4_fsblk_t block, unsigned long count, |
849 | int metadata) | 564 | int metadata) |
850 | { | 565 | { |
851 | struct super_block * sb; | 566 | struct super_block *sb; |
852 | unsigned long dquot_freed_blocks; | 567 | unsigned long dquot_freed_blocks; |
853 | 568 | ||
854 | /* this isn't the right place to decide whether block is metadata | 569 | /* this isn't the right place to decide whether block is metadata |
@@ -859,748 +574,52 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
859 | 574 | ||
860 | sb = inode->i_sb; | 575 | sb = inode->i_sb; |
861 | 576 | ||
862 | if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) | 577 | ext4_mb_free_blocks(handle, inode, block, count, |
863 | ext4_free_blocks_sb(handle, sb, block, count, | 578 | metadata, &dquot_freed_blocks); |
864 | &dquot_freed_blocks); | ||
865 | else | ||
866 | ext4_mb_free_blocks(handle, inode, block, count, | ||
867 | metadata, &dquot_freed_blocks); | ||
868 | if (dquot_freed_blocks) | 579 | if (dquot_freed_blocks) |
869 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); | 580 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); |
870 | return; | 581 | return; |
871 | } | 582 | } |
872 | 583 | ||
873 | /** | 584 | int ext4_claim_free_blocks(struct ext4_sb_info *sbi, |
874 | * ext4_test_allocatable() | 585 | s64 nblocks) |
875 | * @nr: given allocation block group | ||
876 | * @bh: bufferhead contains the bitmap of the given block group | ||
877 | * | ||
878 | * For ext4 allocations, we must not reuse any blocks which are | ||
879 | * allocated in the bitmap buffer's "last committed data" copy. This | ||
880 | * prevents deletes from freeing up the page for reuse until we have | ||
881 | * committed the delete transaction. | ||
882 | * | ||
883 | * If we didn't do this, then deleting something and reallocating it as | ||
884 | * data would allow the old block to be overwritten before the | ||
885 | * transaction committed (because we force data to disk before commit). | ||
886 | * This would lead to corruption if we crashed between overwriting the | ||
887 | * data and committing the delete. | ||
888 | * | ||
889 | * @@@ We may want to make this allocation behaviour conditional on | ||
890 | * data-writes at some point, and disable it for metadata allocations or | ||
891 | * sync-data inodes. | ||
892 | */ | ||
893 | static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) | ||
894 | { | ||
895 | int ret; | ||
896 | struct journal_head *jh = bh2jh(bh); | ||
897 | |||
898 | if (ext4_test_bit(nr, bh->b_data)) | ||
899 | return 0; | ||
900 | |||
901 | jbd_lock_bh_state(bh); | ||
902 | if (!jh->b_committed_data) | ||
903 | ret = 1; | ||
904 | else | ||
905 | ret = !ext4_test_bit(nr, jh->b_committed_data); | ||
906 | jbd_unlock_bh_state(bh); | ||
907 | return ret; | ||
908 | } | ||
909 | |||
910 | /** | ||
911 | * bitmap_search_next_usable_block() | ||
912 | * @start: the starting block (group relative) of the search | ||
913 | * @bh: bufferhead contains the block group bitmap | ||
914 | * @maxblocks: the ending block (group relative) of the reservation | ||
915 | * | ||
916 | * The bitmap search --- search forward alternately through the actual | ||
917 | * bitmap on disk and the last-committed copy in journal, until we find a | ||
918 | * bit free in both bitmaps. | ||
919 | */ | ||
920 | static ext4_grpblk_t | ||
921 | bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, | ||
922 | ext4_grpblk_t maxblocks) | ||
923 | { | 586 | { |
924 | ext4_grpblk_t next; | 587 | s64 free_blocks, dirty_blocks; |
925 | struct journal_head *jh = bh2jh(bh); | 588 | s64 root_blocks = 0; |
926 | 589 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | |
927 | while (start < maxblocks) { | 590 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; |
928 | next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start); | ||
929 | if (next >= maxblocks) | ||
930 | return -1; | ||
931 | if (ext4_test_allocatable(next, bh)) | ||
932 | return next; | ||
933 | jbd_lock_bh_state(bh); | ||
934 | if (jh->b_committed_data) | ||
935 | start = ext4_find_next_zero_bit(jh->b_committed_data, | ||
936 | maxblocks, next); | ||
937 | jbd_unlock_bh_state(bh); | ||
938 | } | ||
939 | return -1; | ||
940 | } | ||
941 | 591 | ||
942 | /** | 592 | free_blocks = percpu_counter_read_positive(fbc); |
943 | * find_next_usable_block() | 593 | dirty_blocks = percpu_counter_read_positive(dbc); |
944 | * @start: the starting block (group relative) to find next | ||
945 | * allocatable block in bitmap. | ||
946 | * @bh: bufferhead contains the block group bitmap | ||
947 | * @maxblocks: the ending block (group relative) for the search | ||
948 | * | ||
949 | * Find an allocatable block in a bitmap. We honor both the bitmap and | ||
950 | * its last-committed copy (if that exists), and perform the "most | ||
951 | * appropriate allocation" algorithm of looking for a free block near | ||
952 | * the initial goal; then for a free byte somewhere in the bitmap; then | ||
953 | * for any free bit in the bitmap. | ||
954 | */ | ||
955 | static ext4_grpblk_t | ||
956 | find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, | ||
957 | ext4_grpblk_t maxblocks) | ||
958 | { | ||
959 | ext4_grpblk_t here, next; | ||
960 | char *p, *r; | ||
961 | |||
962 | if (start > 0) { | ||
963 | /* | ||
964 | * The goal was occupied; search forward for a free | ||
965 | * block within the next XX blocks. | ||
966 | * | ||
967 | * end_goal is more or less random, but it has to be | ||
968 | * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the | ||
969 | * next 64-bit boundary is simple.. | ||
970 | */ | ||
971 | ext4_grpblk_t end_goal = (start + 63) & ~63; | ||
972 | if (end_goal > maxblocks) | ||
973 | end_goal = maxblocks; | ||
974 | here = ext4_find_next_zero_bit(bh->b_data, end_goal, start); | ||
975 | if (here < end_goal && ext4_test_allocatable(here, bh)) | ||
976 | return here; | ||
977 | ext4_debug("Bit not found near goal\n"); | ||
978 | } | ||
979 | |||
980 | here = start; | ||
981 | if (here < 0) | ||
982 | here = 0; | ||
983 | |||
984 | p = ((char *)bh->b_data) + (here >> 3); | ||
985 | r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); | ||
986 | next = (r - ((char *)bh->b_data)) << 3; | ||
987 | |||
988 | if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) | ||
989 | return next; | ||
990 | |||
991 | /* | ||
992 | * The bitmap search --- search forward alternately through the actual | ||
993 | * bitmap and the last-committed copy until we find a bit free in | ||
994 | * both | ||
995 | */ | ||
996 | here = bitmap_search_next_usable_block(here, bh, maxblocks); | ||
997 | return here; | ||
998 | } | ||
999 | |||
1000 | /** | ||
1001 | * claim_block() | ||
1002 | * @block: the free block (group relative) to allocate | ||
1003 | * @bh: the bufferhead containts the block group bitmap | ||
1004 | * | ||
1005 | * We think we can allocate this block in this bitmap. Try to set the bit. | ||
1006 | * If that succeeds then check that nobody has allocated and then freed the | ||
1007 | * block since we saw that is was not marked in b_committed_data. If it _was_ | ||
1008 | * allocated and freed then clear the bit in the bitmap again and return | ||
1009 | * zero (failure). | ||
1010 | */ | ||
1011 | static inline int | ||
1012 | claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) | ||
1013 | { | ||
1014 | struct journal_head *jh = bh2jh(bh); | ||
1015 | int ret; | ||
1016 | |||
1017 | if (ext4_set_bit_atomic(lock, block, bh->b_data)) | ||
1018 | return 0; | ||
1019 | jbd_lock_bh_state(bh); | ||
1020 | if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) { | ||
1021 | ext4_clear_bit_atomic(lock, block, bh->b_data); | ||
1022 | ret = 0; | ||
1023 | } else { | ||
1024 | ret = 1; | ||
1025 | } | ||
1026 | jbd_unlock_bh_state(bh); | ||
1027 | return ret; | ||
1028 | } | ||
1029 | 594 | ||
1030 | /** | 595 | if (!capable(CAP_SYS_RESOURCE) && |
1031 | * ext4_try_to_allocate() | 596 | sbi->s_resuid != current->fsuid && |
1032 | * @sb: superblock | 597 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
1033 | * @handle: handle to this transaction | 598 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
1034 | * @group: given allocation block group | ||
1035 | * @bitmap_bh: bufferhead holds the block bitmap | ||
1036 | * @grp_goal: given target block within the group | ||
1037 | * @count: target number of blocks to allocate | ||
1038 | * @my_rsv: reservation window | ||
1039 | * | ||
1040 | * Attempt to allocate blocks within a give range. Set the range of allocation | ||
1041 | * first, then find the first free bit(s) from the bitmap (within the range), | ||
1042 | * and at last, allocate the blocks by claiming the found free bit as allocated. | ||
1043 | * | ||
1044 | * To set the range of this allocation: | ||
1045 | * if there is a reservation window, only try to allocate block(s) from the | ||
1046 | * file's own reservation window; | ||
1047 | * Otherwise, the allocation range starts from the give goal block, ends at | ||
1048 | * the block group's last block. | ||
1049 | * | ||
1050 | * If we failed to allocate the desired block then we may end up crossing to a | ||
1051 | * new bitmap. In that case we must release write access to the old one via | ||
1052 | * ext4_journal_release_buffer(), else we'll run out of credits. | ||
1053 | */ | ||
1054 | static ext4_grpblk_t | ||
1055 | ext4_try_to_allocate(struct super_block *sb, handle_t *handle, | ||
1056 | ext4_group_t group, struct buffer_head *bitmap_bh, | ||
1057 | ext4_grpblk_t grp_goal, unsigned long *count, | ||
1058 | struct ext4_reserve_window *my_rsv) | ||
1059 | { | ||
1060 | ext4_fsblk_t group_first_block; | ||
1061 | ext4_grpblk_t start, end; | ||
1062 | unsigned long num = 0; | ||
1063 | |||
1064 | /* we do allocation within the reservation window if we have a window */ | ||
1065 | if (my_rsv) { | ||
1066 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1067 | if (my_rsv->_rsv_start >= group_first_block) | ||
1068 | start = my_rsv->_rsv_start - group_first_block; | ||
1069 | else | ||
1070 | /* reservation window cross group boundary */ | ||
1071 | start = 0; | ||
1072 | end = my_rsv->_rsv_end - group_first_block + 1; | ||
1073 | if (end > EXT4_BLOCKS_PER_GROUP(sb)) | ||
1074 | /* reservation window crosses group boundary */ | ||
1075 | end = EXT4_BLOCKS_PER_GROUP(sb); | ||
1076 | if ((start <= grp_goal) && (grp_goal < end)) | ||
1077 | start = grp_goal; | ||
1078 | else | ||
1079 | grp_goal = -1; | ||
1080 | } else { | ||
1081 | if (grp_goal > 0) | ||
1082 | start = grp_goal; | ||
1083 | else | ||
1084 | start = 0; | ||
1085 | end = EXT4_BLOCKS_PER_GROUP(sb); | ||
1086 | } | ||
1087 | |||
1088 | BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb)); | ||
1089 | |||
1090 | repeat: | ||
1091 | if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) { | ||
1092 | grp_goal = find_next_usable_block(start, bitmap_bh, end); | ||
1093 | if (grp_goal < 0) | ||
1094 | goto fail_access; | ||
1095 | if (!my_rsv) { | ||
1096 | int i; | ||
1097 | |||
1098 | for (i = 0; i < 7 && grp_goal > start && | ||
1099 | ext4_test_allocatable(grp_goal - 1, | ||
1100 | bitmap_bh); | ||
1101 | i++, grp_goal--) | ||
1102 | ; | ||
1103 | } | ||
1104 | } | ||
1105 | start = grp_goal; | ||
1106 | |||
1107 | if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group), | ||
1108 | grp_goal, bitmap_bh)) { | ||
1109 | /* | ||
1110 | * The block was allocated by another thread, or it was | ||
1111 | * allocated and then freed by another thread | ||
1112 | */ | ||
1113 | start++; | ||
1114 | grp_goal++; | ||
1115 | if (start >= end) | ||
1116 | goto fail_access; | ||
1117 | goto repeat; | ||
1118 | } | ||
1119 | num++; | ||
1120 | grp_goal++; | ||
1121 | while (num < *count && grp_goal < end | ||
1122 | && ext4_test_allocatable(grp_goal, bitmap_bh) | ||
1123 | && claim_block(sb_bgl_lock(EXT4_SB(sb), group), | ||
1124 | grp_goal, bitmap_bh)) { | ||
1125 | num++; | ||
1126 | grp_goal++; | ||
1127 | } | ||
1128 | *count = num; | ||
1129 | return grp_goal - num; | ||
1130 | fail_access: | ||
1131 | *count = num; | ||
1132 | return -1; | ||
1133 | } | ||
1134 | |||
1135 | /** | ||
1136 | * find_next_reservable_window(): | ||
1137 | * find a reservable space within the given range. | ||
1138 | * It does not allocate the reservation window for now: | ||
1139 | * alloc_new_reservation() will do the work later. | ||
1140 | * | ||
1141 | * @search_head: the head of the searching list; | ||
1142 | * This is not necessarily the list head of the whole filesystem | ||
1143 | * | ||
1144 | * We have both head and start_block to assist the search | ||
1145 | * for the reservable space. The list starts from head, | ||
1146 | * but we will shift to the place where start_block is, | ||
1147 | * then start from there, when looking for a reservable space. | ||
1148 | * | ||
1149 | * @size: the target new reservation window size | ||
1150 | * | ||
1151 | * @group_first_block: the first block we consider to start | ||
1152 | * the real search from | ||
1153 | * | ||
1154 | * @last_block: | ||
1155 | * the maximum block number that our goal reservable space | ||
1156 | * could start from. This is normally the last block in this | ||
1157 | * group. The search will end when we found the start of next | ||
1158 | * possible reservable space is out of this boundary. | ||
1159 | * This could handle the cross boundary reservation window | ||
1160 | * request. | ||
1161 | * | ||
1162 | * basically we search from the given range, rather than the whole | ||
1163 | * reservation double linked list, (start_block, last_block) | ||
1164 | * to find a free region that is of my size and has not | ||
1165 | * been reserved. | ||
1166 | * | ||
1167 | */ | ||
1168 | static int find_next_reservable_window( | ||
1169 | struct ext4_reserve_window_node *search_head, | ||
1170 | struct ext4_reserve_window_node *my_rsv, | ||
1171 | struct super_block * sb, | ||
1172 | ext4_fsblk_t start_block, | ||
1173 | ext4_fsblk_t last_block) | ||
1174 | { | ||
1175 | struct rb_node *next; | ||
1176 | struct ext4_reserve_window_node *rsv, *prev; | ||
1177 | ext4_fsblk_t cur; | ||
1178 | int size = my_rsv->rsv_goal_size; | ||
1179 | |||
1180 | /* TODO: make the start of the reservation window byte-aligned */ | ||
1181 | /* cur = *start_block & ~7;*/ | ||
1182 | cur = start_block; | ||
1183 | rsv = search_head; | ||
1184 | if (!rsv) | ||
1185 | return -1; | ||
1186 | |||
1187 | while (1) { | ||
1188 | if (cur <= rsv->rsv_end) | ||
1189 | cur = rsv->rsv_end + 1; | ||
1190 | |||
1191 | /* TODO? | ||
1192 | * in the case we could not find a reservable space | ||
1193 | * that is what is expected, during the re-search, we could | ||
1194 | * remember what's the largest reservable space we could have | ||
1195 | * and return that one. | ||
1196 | * | ||
1197 | * For now it will fail if we could not find the reservable | ||
1198 | * space with expected-size (or more)... | ||
1199 | */ | ||
1200 | if (cur > last_block) | ||
1201 | return -1; /* fail */ | ||
1202 | |||
1203 | prev = rsv; | ||
1204 | next = rb_next(&rsv->rsv_node); | ||
1205 | rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node); | ||
1206 | 599 | ||
1207 | /* | 600 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < |
1208 | * Reached the last reservation, we can just append to the | 601 | EXT4_FREEBLOCKS_WATERMARK) { |
1209 | * previous one. | 602 | free_blocks = percpu_counter_sum(fbc); |
1210 | */ | 603 | dirty_blocks = percpu_counter_sum(dbc); |
1211 | if (!next) | 604 | if (dirty_blocks < 0) { |
1212 | break; | 605 | printk(KERN_CRIT "Dirty block accounting " |
1213 | 606 | "went wrong %lld\n", | |
1214 | if (cur + size <= rsv->rsv_start) { | 607 | dirty_blocks); |
1215 | /* | ||
1216 | * Found a reserveable space big enough. We could | ||
1217 | * have a reservation across the group boundary here | ||
1218 | */ | ||
1219 | break; | ||
1220 | } | 608 | } |
1221 | } | 609 | } |
1222 | /* | 610 | /* Check whether we have space after |
1223 | * we come here either : | 611 | * accounting for current dirty blocks |
1224 | * when we reach the end of the whole list, | ||
1225 | * and there is empty reservable space after last entry in the list. | ||
1226 | * append it to the end of the list. | ||
1227 | * | ||
1228 | * or we found one reservable space in the middle of the list, | ||
1229 | * return the reservation window that we could append to. | ||
1230 | * succeed. | ||
1231 | */ | 612 | */ |
613 | if (free_blocks < ((root_blocks + nblocks) + dirty_blocks)) | ||
614 | /* we don't have free space */ | ||
615 | return -ENOSPC; | ||
1232 | 616 | ||
1233 | if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) | 617 | /* Add the blocks to nblocks */ |
1234 | rsv_window_remove(sb, my_rsv); | 618 | percpu_counter_add(dbc, nblocks); |
1235 | |||
1236 | /* | ||
1237 | * Let's book the whole avaliable window for now. We will check the | ||
1238 | * disk bitmap later and then, if there are free blocks then we adjust | ||
1239 | * the window size if it's larger than requested. | ||
1240 | * Otherwise, we will remove this node from the tree next time | ||
1241 | * call find_next_reservable_window. | ||
1242 | */ | ||
1243 | my_rsv->rsv_start = cur; | ||
1244 | my_rsv->rsv_end = cur + size - 1; | ||
1245 | my_rsv->rsv_alloc_hit = 0; | ||
1246 | |||
1247 | if (prev != my_rsv) | ||
1248 | ext4_rsv_window_add(sb, my_rsv); | ||
1249 | |||
1250 | return 0; | 619 | return 0; |
1251 | } | 620 | } |
1252 | 621 | ||
1253 | /** | 622 | /** |
1254 | * alloc_new_reservation()--allocate a new reservation window | ||
1255 | * | ||
1256 | * To make a new reservation, we search part of the filesystem | ||
1257 | * reservation list (the list that inside the group). We try to | ||
1258 | * allocate a new reservation window near the allocation goal, | ||
1259 | * or the beginning of the group, if there is no goal. | ||
1260 | * | ||
1261 | * We first find a reservable space after the goal, then from | ||
1262 | * there, we check the bitmap for the first free block after | ||
1263 | * it. If there is no free block until the end of group, then the | ||
1264 | * whole group is full, we failed. Otherwise, check if the free | ||
1265 | * block is inside the expected reservable space, if so, we | ||
1266 | * succeed. | ||
1267 | * If the first free block is outside the reservable space, then | ||
1268 | * start from the first free block, we search for next available | ||
1269 | * space, and go on. | ||
1270 | * | ||
1271 | * on succeed, a new reservation will be found and inserted into the list | ||
1272 | * It contains at least one free block, and it does not overlap with other | ||
1273 | * reservation windows. | ||
1274 | * | ||
1275 | * failed: we failed to find a reservation window in this group | ||
1276 | * | ||
1277 | * @rsv: the reservation | ||
1278 | * | ||
1279 | * @grp_goal: The goal (group-relative). It is where the search for a | ||
1280 | * free reservable space should start from. | ||
1281 | * if we have a grp_goal(grp_goal >0 ), then start from there, | ||
1282 | * no grp_goal(grp_goal = -1), we start from the first block | ||
1283 | * of the group. | ||
1284 | * | ||
1285 | * @sb: the super block | ||
1286 | * @group: the group we are trying to allocate in | ||
1287 | * @bitmap_bh: the block group block bitmap | ||
1288 | * | ||
1289 | */ | ||
1290 | static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, | ||
1291 | ext4_grpblk_t grp_goal, struct super_block *sb, | ||
1292 | ext4_group_t group, struct buffer_head *bitmap_bh) | ||
1293 | { | ||
1294 | struct ext4_reserve_window_node *search_head; | ||
1295 | ext4_fsblk_t group_first_block, group_end_block, start_block; | ||
1296 | ext4_grpblk_t first_free_block; | ||
1297 | struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root; | ||
1298 | unsigned long size; | ||
1299 | int ret; | ||
1300 | spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; | ||
1301 | |||
1302 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1303 | group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
1304 | |||
1305 | if (grp_goal < 0) | ||
1306 | start_block = group_first_block; | ||
1307 | else | ||
1308 | start_block = grp_goal + group_first_block; | ||
1309 | |||
1310 | size = my_rsv->rsv_goal_size; | ||
1311 | |||
1312 | if (!rsv_is_empty(&my_rsv->rsv_window)) { | ||
1313 | /* | ||
1314 | * if the old reservation is cross group boundary | ||
1315 | * and if the goal is inside the old reservation window, | ||
1316 | * we will come here when we just failed to allocate from | ||
1317 | * the first part of the window. We still have another part | ||
1318 | * that belongs to the next group. In this case, there is no | ||
1319 | * point to discard our window and try to allocate a new one | ||
1320 | * in this group(which will fail). we should | ||
1321 | * keep the reservation window, just simply move on. | ||
1322 | * | ||
1323 | * Maybe we could shift the start block of the reservation | ||
1324 | * window to the first block of next group. | ||
1325 | */ | ||
1326 | |||
1327 | if ((my_rsv->rsv_start <= group_end_block) && | ||
1328 | (my_rsv->rsv_end > group_end_block) && | ||
1329 | (start_block >= my_rsv->rsv_start)) | ||
1330 | return -1; | ||
1331 | |||
1332 | if ((my_rsv->rsv_alloc_hit > | ||
1333 | (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { | ||
1334 | /* | ||
1335 | * if the previously allocation hit ratio is | ||
1336 | * greater than 1/2, then we double the size of | ||
1337 | * the reservation window the next time, | ||
1338 | * otherwise we keep the same size window | ||
1339 | */ | ||
1340 | size = size * 2; | ||
1341 | if (size > EXT4_MAX_RESERVE_BLOCKS) | ||
1342 | size = EXT4_MAX_RESERVE_BLOCKS; | ||
1343 | my_rsv->rsv_goal_size= size; | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | spin_lock(rsv_lock); | ||
1348 | /* | ||
1349 | * shift the search start to the window near the goal block | ||
1350 | */ | ||
1351 | search_head = search_reserve_window(fs_rsv_root, start_block); | ||
1352 | |||
1353 | /* | ||
1354 | * find_next_reservable_window() simply finds a reservable window | ||
1355 | * inside the given range(start_block, group_end_block). | ||
1356 | * | ||
1357 | * To make sure the reservation window has a free bit inside it, we | ||
1358 | * need to check the bitmap after we found a reservable window. | ||
1359 | */ | ||
1360 | retry: | ||
1361 | ret = find_next_reservable_window(search_head, my_rsv, sb, | ||
1362 | start_block, group_end_block); | ||
1363 | |||
1364 | if (ret == -1) { | ||
1365 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
1366 | rsv_window_remove(sb, my_rsv); | ||
1367 | spin_unlock(rsv_lock); | ||
1368 | return -1; | ||
1369 | } | ||
1370 | |||
1371 | /* | ||
1372 | * On success, find_next_reservable_window() returns the | ||
1373 | * reservation window where there is a reservable space after it. | ||
1374 | * Before we reserve this reservable space, we need | ||
1375 | * to make sure there is at least a free block inside this region. | ||
1376 | * | ||
1377 | * searching the first free bit on the block bitmap and copy of | ||
1378 | * last committed bitmap alternatively, until we found a allocatable | ||
1379 | * block. Search start from the start block of the reservable space | ||
1380 | * we just found. | ||
1381 | */ | ||
1382 | spin_unlock(rsv_lock); | ||
1383 | first_free_block = bitmap_search_next_usable_block( | ||
1384 | my_rsv->rsv_start - group_first_block, | ||
1385 | bitmap_bh, group_end_block - group_first_block + 1); | ||
1386 | |||
1387 | if (first_free_block < 0) { | ||
1388 | /* | ||
1389 | * no free block left on the bitmap, no point | ||
1390 | * to reserve the space. return failed. | ||
1391 | */ | ||
1392 | spin_lock(rsv_lock); | ||
1393 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
1394 | rsv_window_remove(sb, my_rsv); | ||
1395 | spin_unlock(rsv_lock); | ||
1396 | return -1; /* failed */ | ||
1397 | } | ||
1398 | |||
1399 | start_block = first_free_block + group_first_block; | ||
1400 | /* | ||
1401 | * check if the first free block is within the | ||
1402 | * free space we just reserved | ||
1403 | */ | ||
1404 | if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) | ||
1405 | return 0; /* success */ | ||
1406 | /* | ||
1407 | * if the first free bit we found is out of the reservable space | ||
1408 | * continue search for next reservable space, | ||
1409 | * start from where the free block is, | ||
1410 | * we also shift the list head to where we stopped last time | ||
1411 | */ | ||
1412 | search_head = my_rsv; | ||
1413 | spin_lock(rsv_lock); | ||
1414 | goto retry; | ||
1415 | } | ||
1416 | |||
1417 | /** | ||
1418 | * try_to_extend_reservation() | ||
1419 | * @my_rsv: given reservation window | ||
1420 | * @sb: super block | ||
1421 | * @size: the delta to extend | ||
1422 | * | ||
1423 | * Attempt to expand the reservation window large enough to have | ||
1424 | * required number of free blocks | ||
1425 | * | ||
1426 | * Since ext4_try_to_allocate() will always allocate blocks within | ||
1427 | * the reservation window range, if the window size is too small, | ||
1428 | * multiple blocks allocation has to stop at the end of the reservation | ||
1429 | * window. To make this more efficient, given the total number of | ||
1430 | * blocks needed and the current size of the window, we try to | ||
1431 | * expand the reservation window size if necessary on a best-effort | ||
1432 | * basis before ext4_new_blocks() tries to allocate blocks, | ||
1433 | */ | ||
1434 | static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, | ||
1435 | struct super_block *sb, int size) | ||
1436 | { | ||
1437 | struct ext4_reserve_window_node *next_rsv; | ||
1438 | struct rb_node *next; | ||
1439 | spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; | ||
1440 | |||
1441 | if (!spin_trylock(rsv_lock)) | ||
1442 | return; | ||
1443 | |||
1444 | next = rb_next(&my_rsv->rsv_node); | ||
1445 | |||
1446 | if (!next) | ||
1447 | my_rsv->rsv_end += size; | ||
1448 | else { | ||
1449 | next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node); | ||
1450 | |||
1451 | if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) | ||
1452 | my_rsv->rsv_end += size; | ||
1453 | else | ||
1454 | my_rsv->rsv_end = next_rsv->rsv_start - 1; | ||
1455 | } | ||
1456 | spin_unlock(rsv_lock); | ||
1457 | } | ||
1458 | |||
1459 | /** | ||
1460 | * ext4_try_to_allocate_with_rsv() | ||
1461 | * @sb: superblock | ||
1462 | * @handle: handle to this transaction | ||
1463 | * @group: given allocation block group | ||
1464 | * @bitmap_bh: bufferhead holds the block bitmap | ||
1465 | * @grp_goal: given target block within the group | ||
1466 | * @count: target number of blocks to allocate | ||
1467 | * @my_rsv: reservation window | ||
1468 | * @errp: pointer to store the error code | ||
1469 | * | ||
1470 | * This is the main function used to allocate a new block and its reservation | ||
1471 | * window. | ||
1472 | * | ||
1473 | * Each time when a new block allocation is need, first try to allocate from | ||
1474 | * its own reservation. If it does not have a reservation window, instead of | ||
1475 | * looking for a free bit on bitmap first, then look up the reservation list to | ||
1476 | * see if it is inside somebody else's reservation window, we try to allocate a | ||
1477 | * reservation window for it starting from the goal first. Then do the block | ||
1478 | * allocation within the reservation window. | ||
1479 | * | ||
1480 | * This will avoid keeping on searching the reservation list again and | ||
1481 | * again when somebody is looking for a free block (without | ||
1482 | * reservation), and there are lots of free blocks, but they are all | ||
1483 | * being reserved. | ||
1484 | * | ||
1485 | * We use a red-black tree for the per-filesystem reservation list. | ||
1486 | * | ||
1487 | */ | ||
1488 | static ext4_grpblk_t | ||
1489 | ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | ||
1490 | ext4_group_t group, struct buffer_head *bitmap_bh, | ||
1491 | ext4_grpblk_t grp_goal, | ||
1492 | struct ext4_reserve_window_node * my_rsv, | ||
1493 | unsigned long *count, int *errp) | ||
1494 | { | ||
1495 | ext4_fsblk_t group_first_block, group_last_block; | ||
1496 | ext4_grpblk_t ret = 0; | ||
1497 | int fatal; | ||
1498 | unsigned long num = *count; | ||
1499 | |||
1500 | *errp = 0; | ||
1501 | |||
1502 | /* | ||
1503 | * Make sure we use undo access for the bitmap, because it is critical | ||
1504 | * that we do the frozen_data COW on bitmap buffers in all cases even | ||
1505 | * if the buffer is in BJ_Forget state in the committing transaction. | ||
1506 | */ | ||
1507 | BUFFER_TRACE(bitmap_bh, "get undo access for new block"); | ||
1508 | fatal = ext4_journal_get_undo_access(handle, bitmap_bh); | ||
1509 | if (fatal) { | ||
1510 | *errp = fatal; | ||
1511 | return -1; | ||
1512 | } | ||
1513 | |||
1514 | /* | ||
1515 | * we don't deal with reservation when | ||
1516 | * filesystem is mounted without reservation | ||
1517 | * or the file is not a regular file | ||
1518 | * or last attempt to allocate a block with reservation turned on failed | ||
1519 | */ | ||
1520 | if (my_rsv == NULL ) { | ||
1521 | ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, | ||
1522 | grp_goal, count, NULL); | ||
1523 | goto out; | ||
1524 | } | ||
1525 | /* | ||
1526 | * grp_goal is a group relative block number (if there is a goal) | ||
1527 | * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb) | ||
1528 | * first block is a filesystem wide block number | ||
1529 | * first block is the block number of the first block in this group | ||
1530 | */ | ||
1531 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1532 | group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
1533 | |||
1534 | /* | ||
1535 | * Basically we will allocate a new block from inode's reservation | ||
1536 | * window. | ||
1537 | * | ||
1538 | * We need to allocate a new reservation window, if: | ||
1539 | * a) inode does not have a reservation window; or | ||
1540 | * b) last attempt to allocate a block from existing reservation | ||
1541 | * failed; or | ||
1542 | * c) we come here with a goal and with a reservation window | ||
1543 | * | ||
1544 | * We do not need to allocate a new reservation window if we come here | ||
1545 | * at the beginning with a goal and the goal is inside the window, or | ||
1546 | * we don't have a goal but already have a reservation window. | ||
1547 | * then we could go to allocate from the reservation window directly. | ||
1548 | */ | ||
1549 | while (1) { | ||
1550 | if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || | ||
1551 | !goal_in_my_reservation(&my_rsv->rsv_window, | ||
1552 | grp_goal, group, sb)) { | ||
1553 | if (my_rsv->rsv_goal_size < *count) | ||
1554 | my_rsv->rsv_goal_size = *count; | ||
1555 | ret = alloc_new_reservation(my_rsv, grp_goal, sb, | ||
1556 | group, bitmap_bh); | ||
1557 | if (ret < 0) | ||
1558 | break; /* failed */ | ||
1559 | |||
1560 | if (!goal_in_my_reservation(&my_rsv->rsv_window, | ||
1561 | grp_goal, group, sb)) | ||
1562 | grp_goal = -1; | ||
1563 | } else if (grp_goal >= 0) { | ||
1564 | int curr = my_rsv->rsv_end - | ||
1565 | (grp_goal + group_first_block) + 1; | ||
1566 | |||
1567 | if (curr < *count) | ||
1568 | try_to_extend_reservation(my_rsv, sb, | ||
1569 | *count - curr); | ||
1570 | } | ||
1571 | |||
1572 | if ((my_rsv->rsv_start > group_last_block) || | ||
1573 | (my_rsv->rsv_end < group_first_block)) { | ||
1574 | rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1); | ||
1575 | BUG(); | ||
1576 | } | ||
1577 | ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, | ||
1578 | grp_goal, &num, &my_rsv->rsv_window); | ||
1579 | if (ret >= 0) { | ||
1580 | my_rsv->rsv_alloc_hit += num; | ||
1581 | *count = num; | ||
1582 | break; /* succeed */ | ||
1583 | } | ||
1584 | num = *count; | ||
1585 | } | ||
1586 | out: | ||
1587 | if (ret >= 0) { | ||
1588 | BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " | ||
1589 | "bitmap block"); | ||
1590 | fatal = ext4_journal_dirty_metadata(handle, bitmap_bh); | ||
1591 | if (fatal) { | ||
1592 | *errp = fatal; | ||
1593 | return -1; | ||
1594 | } | ||
1595 | return ret; | ||
1596 | } | ||
1597 | |||
1598 | BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); | ||
1599 | ext4_journal_release_buffer(handle, bitmap_bh); | ||
1600 | return ret; | ||
1601 | } | ||
1602 | |||
1603 | /** | ||
1604 | * ext4_has_free_blocks() | 623 | * ext4_has_free_blocks() |
1605 | * @sbi: in-core super block structure. | 624 | * @sbi: in-core super block structure. |
1606 | * @nblocks: number of neeed blocks | 625 | * @nblocks: number of neeed blocks |
@@ -1610,29 +629,34 @@ out: | |||
1610 | * On success, return nblocks | 629 | * On success, return nblocks |
1611 | */ | 630 | */ |
1612 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | 631 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
1613 | ext4_fsblk_t nblocks) | 632 | s64 nblocks) |
1614 | { | 633 | { |
1615 | ext4_fsblk_t free_blocks; | 634 | s64 free_blocks, dirty_blocks; |
1616 | ext4_fsblk_t root_blocks = 0; | 635 | s64 root_blocks = 0; |
636 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | ||
637 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; | ||
1617 | 638 | ||
1618 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 639 | free_blocks = percpu_counter_read_positive(fbc); |
640 | dirty_blocks = percpu_counter_read_positive(dbc); | ||
1619 | 641 | ||
1620 | if (!capable(CAP_SYS_RESOURCE) && | 642 | if (!capable(CAP_SYS_RESOURCE) && |
1621 | sbi->s_resuid != current->fsuid && | 643 | sbi->s_resuid != current->fsuid && |
1622 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) | 644 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
1623 | root_blocks = ext4_r_blocks_count(sbi->s_es); | 645 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
1624 | #ifdef CONFIG_SMP | 646 | |
1625 | if (free_blocks - root_blocks < FBC_BATCH) | 647 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < |
1626 | free_blocks = | 648 | EXT4_FREEBLOCKS_WATERMARK) { |
1627 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); | 649 | free_blocks = percpu_counter_sum(fbc); |
1628 | #endif | 650 | dirty_blocks = percpu_counter_sum(dbc); |
1629 | if (free_blocks <= root_blocks) | 651 | } |
652 | if (free_blocks <= (root_blocks + dirty_blocks)) | ||
1630 | /* we don't have free space */ | 653 | /* we don't have free space */ |
1631 | return 0; | 654 | return 0; |
1632 | if (free_blocks - root_blocks < nblocks) | 655 | |
1633 | return free_blocks - root_blocks; | 656 | if (free_blocks - (root_blocks + dirty_blocks) < nblocks) |
657 | return free_blocks - (root_blocks + dirty_blocks); | ||
1634 | return nblocks; | 658 | return nblocks; |
1635 | } | 659 | } |
1636 | 660 | ||
1637 | 661 | ||
1638 | /** | 662 | /** |
@@ -1657,303 +681,6 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
1657 | return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); | 681 | return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); |
1658 | } | 682 | } |
1659 | 683 | ||
1660 | /** | ||
1661 | * ext4_old_new_blocks() -- core block bitmap based block allocation function | ||
1662 | * | ||
1663 | * @handle: handle to this transaction | ||
1664 | * @inode: file inode | ||
1665 | * @goal: given target block(filesystem wide) | ||
1666 | * @count: target number of blocks to allocate | ||
1667 | * @errp: error code | ||
1668 | * | ||
1669 | * ext4_old_new_blocks uses a goal block to assist allocation and look up | ||
1670 | * the block bitmap directly to do block allocation. It tries to | ||
1671 | * allocate block(s) from the block group contains the goal block first. If | ||
1672 | * that fails, it will try to allocate block(s) from other block groups | ||
1673 | * without any specific goal block. | ||
1674 | * | ||
1675 | * This function is called when -o nomballoc mount option is enabled | ||
1676 | * | ||
1677 | */ | ||
1678 | ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | ||
1679 | ext4_fsblk_t goal, unsigned long *count, int *errp) | ||
1680 | { | ||
1681 | struct buffer_head *bitmap_bh = NULL; | ||
1682 | struct buffer_head *gdp_bh; | ||
1683 | ext4_group_t group_no; | ||
1684 | ext4_group_t goal_group; | ||
1685 | ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ | ||
1686 | ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ | ||
1687 | ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ | ||
1688 | ext4_group_t bgi; /* blockgroup iteration index */ | ||
1689 | int fatal = 0, err; | ||
1690 | int performed_allocation = 0; | ||
1691 | ext4_grpblk_t free_blocks; /* number of free blocks in a group */ | ||
1692 | struct super_block *sb; | ||
1693 | struct ext4_group_desc *gdp; | ||
1694 | struct ext4_super_block *es; | ||
1695 | struct ext4_sb_info *sbi; | ||
1696 | struct ext4_reserve_window_node *my_rsv = NULL; | ||
1697 | struct ext4_block_alloc_info *block_i; | ||
1698 | unsigned short windowsz = 0; | ||
1699 | ext4_group_t ngroups; | ||
1700 | unsigned long num = *count; | ||
1701 | |||
1702 | sb = inode->i_sb; | ||
1703 | if (!sb) { | ||
1704 | *errp = -ENODEV; | ||
1705 | printk("ext4_new_block: nonexistent device"); | ||
1706 | return 0; | ||
1707 | } | ||
1708 | |||
1709 | sbi = EXT4_SB(sb); | ||
1710 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) { | ||
1711 | /* | ||
1712 | * With delalloc we already reserved the blocks | ||
1713 | */ | ||
1714 | *count = ext4_has_free_blocks(sbi, *count); | ||
1715 | } | ||
1716 | if (*count == 0) { | ||
1717 | *errp = -ENOSPC; | ||
1718 | return 0; /*return with ENOSPC error */ | ||
1719 | } | ||
1720 | num = *count; | ||
1721 | |||
1722 | /* | ||
1723 | * Check quota for allocation of this block. | ||
1724 | */ | ||
1725 | if (DQUOT_ALLOC_BLOCK(inode, num)) { | ||
1726 | *errp = -EDQUOT; | ||
1727 | return 0; | ||
1728 | } | ||
1729 | |||
1730 | sbi = EXT4_SB(sb); | ||
1731 | es = EXT4_SB(sb)->s_es; | ||
1732 | ext4_debug("goal=%llu.\n", goal); | ||
1733 | /* | ||
1734 | * Allocate a block from reservation only when | ||
1735 | * filesystem is mounted with reservation(default,-o reservation), and | ||
1736 | * it's a regular file, and | ||
1737 | * the desired window size is greater than 0 (One could use ioctl | ||
1738 | * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off | ||
1739 | * reservation on that particular file) | ||
1740 | */ | ||
1741 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
1742 | if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) | ||
1743 | my_rsv = &block_i->rsv_window_node; | ||
1744 | |||
1745 | /* | ||
1746 | * First, test whether the goal block is free. | ||
1747 | */ | ||
1748 | if (goal < le32_to_cpu(es->s_first_data_block) || | ||
1749 | goal >= ext4_blocks_count(es)) | ||
1750 | goal = le32_to_cpu(es->s_first_data_block); | ||
1751 | ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); | ||
1752 | goal_group = group_no; | ||
1753 | retry_alloc: | ||
1754 | gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); | ||
1755 | if (!gdp) | ||
1756 | goto io_error; | ||
1757 | |||
1758 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | ||
1759 | /* | ||
1760 | * if there is not enough free blocks to make a new resevation | ||
1761 | * turn off reservation for this allocation | ||
1762 | */ | ||
1763 | if (my_rsv && (free_blocks < windowsz) | ||
1764 | && (rsv_is_empty(&my_rsv->rsv_window))) | ||
1765 | my_rsv = NULL; | ||
1766 | |||
1767 | if (free_blocks > 0) { | ||
1768 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); | ||
1769 | if (!bitmap_bh) | ||
1770 | goto io_error; | ||
1771 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | ||
1772 | group_no, bitmap_bh, grp_target_blk, | ||
1773 | my_rsv, &num, &fatal); | ||
1774 | if (fatal) | ||
1775 | goto out; | ||
1776 | if (grp_alloc_blk >= 0) | ||
1777 | goto allocated; | ||
1778 | } | ||
1779 | |||
1780 | ngroups = EXT4_SB(sb)->s_groups_count; | ||
1781 | smp_rmb(); | ||
1782 | |||
1783 | /* | ||
1784 | * Now search the rest of the groups. We assume that | ||
1785 | * group_no and gdp correctly point to the last group visited. | ||
1786 | */ | ||
1787 | for (bgi = 0; bgi < ngroups; bgi++) { | ||
1788 | group_no++; | ||
1789 | if (group_no >= ngroups) | ||
1790 | group_no = 0; | ||
1791 | gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); | ||
1792 | if (!gdp) | ||
1793 | goto io_error; | ||
1794 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | ||
1795 | /* | ||
1796 | * skip this group if the number of | ||
1797 | * free blocks is less than half of the reservation | ||
1798 | * window size. | ||
1799 | */ | ||
1800 | if (free_blocks <= (windowsz/2)) | ||
1801 | continue; | ||
1802 | |||
1803 | brelse(bitmap_bh); | ||
1804 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); | ||
1805 | if (!bitmap_bh) | ||
1806 | goto io_error; | ||
1807 | /* | ||
1808 | * try to allocate block(s) from this group, without a goal(-1). | ||
1809 | */ | ||
1810 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | ||
1811 | group_no, bitmap_bh, -1, my_rsv, | ||
1812 | &num, &fatal); | ||
1813 | if (fatal) | ||
1814 | goto out; | ||
1815 | if (grp_alloc_blk >= 0) | ||
1816 | goto allocated; | ||
1817 | } | ||
1818 | /* | ||
1819 | * We may end up a bogus ealier ENOSPC error due to | ||
1820 | * filesystem is "full" of reservations, but | ||
1821 | * there maybe indeed free blocks avaliable on disk | ||
1822 | * In this case, we just forget about the reservations | ||
1823 | * just do block allocation as without reservations. | ||
1824 | */ | ||
1825 | if (my_rsv) { | ||
1826 | my_rsv = NULL; | ||
1827 | windowsz = 0; | ||
1828 | group_no = goal_group; | ||
1829 | goto retry_alloc; | ||
1830 | } | ||
1831 | /* No space left on the device */ | ||
1832 | *errp = -ENOSPC; | ||
1833 | goto out; | ||
1834 | |||
1835 | allocated: | ||
1836 | |||
1837 | ext4_debug("using block group %lu(%d)\n", | ||
1838 | group_no, gdp->bg_free_blocks_count); | ||
1839 | |||
1840 | BUFFER_TRACE(gdp_bh, "get_write_access"); | ||
1841 | fatal = ext4_journal_get_write_access(handle, gdp_bh); | ||
1842 | if (fatal) | ||
1843 | goto out; | ||
1844 | |||
1845 | ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); | ||
1846 | |||
1847 | if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || | ||
1848 | in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) || | ||
1849 | in_range(ret_block, ext4_inode_table(sb, gdp), | ||
1850 | EXT4_SB(sb)->s_itb_per_group) || | ||
1851 | in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), | ||
1852 | EXT4_SB(sb)->s_itb_per_group)) { | ||
1853 | ext4_error(sb, "ext4_new_block", | ||
1854 | "Allocating block in system zone - " | ||
1855 | "blocks from %llu, length %lu", | ||
1856 | ret_block, num); | ||
1857 | /* | ||
1858 | * claim_block marked the blocks we allocated | ||
1859 | * as in use. So we may want to selectively | ||
1860 | * mark some of the blocks as free | ||
1861 | */ | ||
1862 | goto retry_alloc; | ||
1863 | } | ||
1864 | |||
1865 | performed_allocation = 1; | ||
1866 | |||
1867 | #ifdef CONFIG_JBD2_DEBUG | ||
1868 | { | ||
1869 | struct buffer_head *debug_bh; | ||
1870 | |||
1871 | /* Record bitmap buffer state in the newly allocated block */ | ||
1872 | debug_bh = sb_find_get_block(sb, ret_block); | ||
1873 | if (debug_bh) { | ||
1874 | BUFFER_TRACE(debug_bh, "state when allocated"); | ||
1875 | BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); | ||
1876 | brelse(debug_bh); | ||
1877 | } | ||
1878 | } | ||
1879 | jbd_lock_bh_state(bitmap_bh); | ||
1880 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
1881 | if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { | ||
1882 | int i; | ||
1883 | |||
1884 | for (i = 0; i < num; i++) { | ||
1885 | if (ext4_test_bit(grp_alloc_blk+i, | ||
1886 | bh2jh(bitmap_bh)->b_committed_data)) { | ||
1887 | printk("%s: block was unexpectedly set in " | ||
1888 | "b_committed_data\n", __func__); | ||
1889 | } | ||
1890 | } | ||
1891 | } | ||
1892 | ext4_debug("found bit %d\n", grp_alloc_blk); | ||
1893 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
1894 | jbd_unlock_bh_state(bitmap_bh); | ||
1895 | #endif | ||
1896 | |||
1897 | if (ret_block + num - 1 >= ext4_blocks_count(es)) { | ||
1898 | ext4_error(sb, "ext4_new_block", | ||
1899 | "block(%llu) >= blocks count(%llu) - " | ||
1900 | "block_group = %lu, es == %p ", ret_block, | ||
1901 | ext4_blocks_count(es), group_no, es); | ||
1902 | goto out; | ||
1903 | } | ||
1904 | |||
1905 | /* | ||
1906 | * It is up to the caller to add the new buffer to a journal | ||
1907 | * list of some description. We don't know in advance whether | ||
1908 | * the caller wants to use it as metadata or data. | ||
1909 | */ | ||
1910 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
1911 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) | ||
1912 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | ||
1913 | le16_add_cpu(&gdp->bg_free_blocks_count, -num); | ||
1914 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); | ||
1915 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
1916 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) | ||
1917 | percpu_counter_sub(&sbi->s_freeblocks_counter, num); | ||
1918 | |||
1919 | if (sbi->s_log_groups_per_flex) { | ||
1920 | ext4_group_t flex_group = ext4_flex_group(sbi, group_no); | ||
1921 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
1922 | sbi->s_flex_groups[flex_group].free_blocks -= num; | ||
1923 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
1924 | } | ||
1925 | |||
1926 | BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); | ||
1927 | err = ext4_journal_dirty_metadata(handle, gdp_bh); | ||
1928 | if (!fatal) | ||
1929 | fatal = err; | ||
1930 | |||
1931 | sb->s_dirt = 1; | ||
1932 | if (fatal) | ||
1933 | goto out; | ||
1934 | |||
1935 | *errp = 0; | ||
1936 | brelse(bitmap_bh); | ||
1937 | DQUOT_FREE_BLOCK(inode, *count-num); | ||
1938 | *count = num; | ||
1939 | return ret_block; | ||
1940 | |||
1941 | io_error: | ||
1942 | *errp = -EIO; | ||
1943 | out: | ||
1944 | if (fatal) { | ||
1945 | *errp = fatal; | ||
1946 | ext4_std_error(sb, fatal); | ||
1947 | } | ||
1948 | /* | ||
1949 | * Undo the block allocation | ||
1950 | */ | ||
1951 | if (!performed_allocation) | ||
1952 | DQUOT_FREE_BLOCK(inode, *count); | ||
1953 | brelse(bitmap_bh); | ||
1954 | return 0; | ||
1955 | } | ||
1956 | |||
1957 | #define EXT4_META_BLOCK 0x1 | 684 | #define EXT4_META_BLOCK 0x1 |
1958 | 685 | ||
1959 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | 686 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, |
@@ -1963,10 +690,6 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | |||
1963 | struct ext4_allocation_request ar; | 690 | struct ext4_allocation_request ar; |
1964 | ext4_fsblk_t ret; | 691 | ext4_fsblk_t ret; |
1965 | 692 | ||
1966 | if (!test_opt(inode->i_sb, MBALLOC)) { | ||
1967 | return ext4_old_new_blocks(handle, inode, goal, count, errp); | ||
1968 | } | ||
1969 | |||
1970 | memset(&ar, 0, sizeof(ar)); | 693 | memset(&ar, 0, sizeof(ar)); |
1971 | /* Fill with neighbour allocated blocks */ | 694 | /* Fill with neighbour allocated blocks */ |
1972 | 695 | ||
@@ -2008,7 +731,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
2008 | /* | 731 | /* |
2009 | * Account for the allocated meta blocks | 732 | * Account for the allocated meta blocks |
2010 | */ | 733 | */ |
2011 | if (!(*errp)) { | 734 | if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { |
2012 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 735 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
2013 | EXT4_I(inode)->i_allocated_meta_blocks += *count; | 736 | EXT4_I(inode)->i_allocated_meta_blocks += *count; |
2014 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 737 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
@@ -2093,10 +816,9 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
2093 | bitmap_count += x; | 816 | bitmap_count += x; |
2094 | } | 817 | } |
2095 | brelse(bitmap_bh); | 818 | brelse(bitmap_bh); |
2096 | printk("ext4_count_free_blocks: stored = %llu" | 819 | printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" |
2097 | ", computed = %llu, %llu\n", | 820 | ", computed = %llu, %llu\n", ext4_free_blocks_count(es), |
2098 | ext4_free_blocks_count(es), | 821 | desc_count, bitmap_count); |
2099 | desc_count, bitmap_count); | ||
2100 | return bitmap_count; | 822 | return bitmap_count; |
2101 | #else | 823 | #else |
2102 | desc_count = 0; | 824 | desc_count = 0; |
@@ -2183,8 +905,9 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) | |||
2183 | 905 | ||
2184 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || | 906 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || |
2185 | metagroup < first_meta_bg) | 907 | metagroup < first_meta_bg) |
2186 | return ext4_bg_num_gdb_nometa(sb,group); | 908 | return ext4_bg_num_gdb_nometa(sb, group); |
2187 | 909 | ||
2188 | return ext4_bg_num_gdb_meta(sb,group); | 910 | return ext4_bg_num_gdb_meta(sb,group); |
2189 | 911 | ||
2190 | } | 912 | } |
913 | |||
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index d37ea6750454..0a7a6663c190 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c | |||
@@ -15,17 +15,17 @@ | |||
15 | 15 | ||
16 | static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; | 16 | static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; |
17 | 17 | ||
18 | unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars) | 18 | unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars) |
19 | { | 19 | { |
20 | unsigned int i; | 20 | unsigned int i; |
21 | unsigned long sum = 0; | 21 | unsigned long sum = 0; |
22 | 22 | ||
23 | if (!map) | 23 | if (!map) |
24 | return (0); | 24 | return 0; |
25 | for (i = 0; i < numchars; i++) | 25 | for (i = 0; i < numchars; i++) |
26 | sum += nibblemap[map->b_data[i] & 0xf] + | 26 | sum += nibblemap[map->b_data[i] & 0xf] + |
27 | nibblemap[(map->b_data[i] >> 4) & 0xf]; | 27 | nibblemap[(map->b_data[i] >> 4) & 0xf]; |
28 | return (sum); | 28 | return sum; |
29 | } | 29 | } |
30 | 30 | ||
31 | #endif /* EXT4FS_DEBUG */ | 31 | #endif /* EXT4FS_DEBUG */ |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ec8e33b45219..3ca6a2b7632d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table[] = { | |||
33 | }; | 33 | }; |
34 | 34 | ||
35 | static int ext4_readdir(struct file *, void *, filldir_t); | 35 | static int ext4_readdir(struct file *, void *, filldir_t); |
36 | static int ext4_dx_readdir(struct file * filp, | 36 | static int ext4_dx_readdir(struct file *filp, |
37 | void * dirent, filldir_t filldir); | 37 | void *dirent, filldir_t filldir); |
38 | static int ext4_release_dir (struct inode * inode, | 38 | static int ext4_release_dir(struct inode *inode, |
39 | struct file * filp); | 39 | struct file *filp); |
40 | 40 | ||
41 | const struct file_operations ext4_dir_operations = { | 41 | const struct file_operations ext4_dir_operations = { |
42 | .llseek = generic_file_llseek, | 42 | .llseek = generic_file_llseek, |
@@ -61,12 +61,12 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
61 | } | 61 | } |
62 | 62 | ||
63 | 63 | ||
64 | int ext4_check_dir_entry (const char * function, struct inode * dir, | 64 | int ext4_check_dir_entry(const char *function, struct inode *dir, |
65 | struct ext4_dir_entry_2 * de, | 65 | struct ext4_dir_entry_2 *de, |
66 | struct buffer_head * bh, | 66 | struct buffer_head *bh, |
67 | unsigned long offset) | 67 | unsigned long offset) |
68 | { | 68 | { |
69 | const char * error_msg = NULL; | 69 | const char *error_msg = NULL; |
70 | const int rlen = ext4_rec_len_from_disk(de->rec_len); | 70 | const int rlen = ext4_rec_len_from_disk(de->rec_len); |
71 | 71 | ||
72 | if (rlen < EXT4_DIR_REC_LEN(1)) | 72 | if (rlen < EXT4_DIR_REC_LEN(1)) |
@@ -82,7 +82,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, | |||
82 | error_msg = "inode out of bounds"; | 82 | error_msg = "inode out of bounds"; |
83 | 83 | ||
84 | if (error_msg != NULL) | 84 | if (error_msg != NULL) |
85 | ext4_error (dir->i_sb, function, | 85 | ext4_error(dir->i_sb, function, |
86 | "bad entry in directory #%lu: %s - " | 86 | "bad entry in directory #%lu: %s - " |
87 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | 87 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", |
88 | dir->i_ino, error_msg, offset, | 88 | dir->i_ino, error_msg, offset, |
@@ -91,8 +91,8 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, | |||
91 | return error_msg == NULL ? 1 : 0; | 91 | return error_msg == NULL ? 1 : 0; |
92 | } | 92 | } |
93 | 93 | ||
94 | static int ext4_readdir(struct file * filp, | 94 | static int ext4_readdir(struct file *filp, |
95 | void * dirent, filldir_t filldir) | 95 | void *dirent, filldir_t filldir) |
96 | { | 96 | { |
97 | int error = 0; | 97 | int error = 0; |
98 | unsigned long offset; | 98 | unsigned long offset; |
@@ -102,6 +102,7 @@ static int ext4_readdir(struct file * filp, | |||
102 | int err; | 102 | int err; |
103 | struct inode *inode = filp->f_path.dentry->d_inode; | 103 | struct inode *inode = filp->f_path.dentry->d_inode; |
104 | int ret = 0; | 104 | int ret = 0; |
105 | int dir_has_error = 0; | ||
105 | 106 | ||
106 | sb = inode->i_sb; | 107 | sb = inode->i_sb; |
107 | 108 | ||
@@ -148,9 +149,13 @@ static int ext4_readdir(struct file * filp, | |||
148 | * of recovering data when there's a bad sector | 149 | * of recovering data when there's a bad sector |
149 | */ | 150 | */ |
150 | if (!bh) { | 151 | if (!bh) { |
151 | ext4_error (sb, "ext4_readdir", | 152 | if (!dir_has_error) { |
152 | "directory #%lu contains a hole at offset %lu", | 153 | ext4_error(sb, __func__, "directory #%lu " |
153 | inode->i_ino, (unsigned long)filp->f_pos); | 154 | "contains a hole at offset %Lu", |
155 | inode->i_ino, | ||
156 | (unsigned long long) filp->f_pos); | ||
157 | dir_has_error = 1; | ||
158 | } | ||
154 | /* corrupt size? Maybe no more blocks to read */ | 159 | /* corrupt size? Maybe no more blocks to read */ |
155 | if (filp->f_pos > inode->i_blocks << 9) | 160 | if (filp->f_pos > inode->i_blocks << 9) |
156 | break; | 161 | break; |
@@ -187,14 +192,14 @@ revalidate: | |||
187 | while (!error && filp->f_pos < inode->i_size | 192 | while (!error && filp->f_pos < inode->i_size |
188 | && offset < sb->s_blocksize) { | 193 | && offset < sb->s_blocksize) { |
189 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | 194 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); |
190 | if (!ext4_check_dir_entry ("ext4_readdir", inode, de, | 195 | if (!ext4_check_dir_entry("ext4_readdir", inode, de, |
191 | bh, offset)) { | 196 | bh, offset)) { |
192 | /* | 197 | /* |
193 | * On error, skip the f_pos to the next block | 198 | * On error, skip the f_pos to the next block |
194 | */ | 199 | */ |
195 | filp->f_pos = (filp->f_pos | | 200 | filp->f_pos = (filp->f_pos | |
196 | (sb->s_blocksize - 1)) + 1; | 201 | (sb->s_blocksize - 1)) + 1; |
197 | brelse (bh); | 202 | brelse(bh); |
198 | ret = stored; | 203 | ret = stored; |
199 | goto out; | 204 | goto out; |
200 | } | 205 | } |
@@ -218,12 +223,12 @@ revalidate: | |||
218 | break; | 223 | break; |
219 | if (version != filp->f_version) | 224 | if (version != filp->f_version) |
220 | goto revalidate; | 225 | goto revalidate; |
221 | stored ++; | 226 | stored++; |
222 | } | 227 | } |
223 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len); | 228 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len); |
224 | } | 229 | } |
225 | offset = 0; | 230 | offset = 0; |
226 | brelse (bh); | 231 | brelse(bh); |
227 | } | 232 | } |
228 | out: | 233 | out: |
229 | return ret; | 234 | return ret; |
@@ -290,9 +295,9 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
290 | parent = rb_parent(n); | 295 | parent = rb_parent(n); |
291 | fname = rb_entry(n, struct fname, rb_hash); | 296 | fname = rb_entry(n, struct fname, rb_hash); |
292 | while (fname) { | 297 | while (fname) { |
293 | struct fname * old = fname; | 298 | struct fname *old = fname; |
294 | fname = fname->next; | 299 | fname = fname->next; |
295 | kfree (old); | 300 | kfree(old); |
296 | } | 301 | } |
297 | if (!parent) | 302 | if (!parent) |
298 | root->rb_node = NULL; | 303 | root->rb_node = NULL; |
@@ -331,7 +336,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
331 | struct ext4_dir_entry_2 *dirent) | 336 | struct ext4_dir_entry_2 *dirent) |
332 | { | 337 | { |
333 | struct rb_node **p, *parent = NULL; | 338 | struct rb_node **p, *parent = NULL; |
334 | struct fname * fname, *new_fn; | 339 | struct fname *fname, *new_fn; |
335 | struct dir_private_info *info; | 340 | struct dir_private_info *info; |
336 | int len; | 341 | int len; |
337 | 342 | ||
@@ -388,19 +393,20 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
388 | * for all entres on the fname linked list. (Normally there is only | 393 | * for all entres on the fname linked list. (Normally there is only |
389 | * one entry on the linked list, unless there are 62 bit hash collisions.) | 394 | * one entry on the linked list, unless there are 62 bit hash collisions.) |
390 | */ | 395 | */ |
391 | static int call_filldir(struct file * filp, void * dirent, | 396 | static int call_filldir(struct file *filp, void *dirent, |
392 | filldir_t filldir, struct fname *fname) | 397 | filldir_t filldir, struct fname *fname) |
393 | { | 398 | { |
394 | struct dir_private_info *info = filp->private_data; | 399 | struct dir_private_info *info = filp->private_data; |
395 | loff_t curr_pos; | 400 | loff_t curr_pos; |
396 | struct inode *inode = filp->f_path.dentry->d_inode; | 401 | struct inode *inode = filp->f_path.dentry->d_inode; |
397 | struct super_block * sb; | 402 | struct super_block *sb; |
398 | int error; | 403 | int error; |
399 | 404 | ||
400 | sb = inode->i_sb; | 405 | sb = inode->i_sb; |
401 | 406 | ||
402 | if (!fname) { | 407 | if (!fname) { |
403 | printk("call_filldir: called with null fname?!?\n"); | 408 | printk(KERN_ERR "ext4: call_filldir: called with " |
409 | "null fname?!?\n"); | ||
404 | return 0; | 410 | return 0; |
405 | } | 411 | } |
406 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 412 | curr_pos = hash2pos(fname->hash, fname->minor_hash); |
@@ -419,8 +425,8 @@ static int call_filldir(struct file * filp, void * dirent, | |||
419 | return 0; | 425 | return 0; |
420 | } | 426 | } |
421 | 427 | ||
422 | static int ext4_dx_readdir(struct file * filp, | 428 | static int ext4_dx_readdir(struct file *filp, |
423 | void * dirent, filldir_t filldir) | 429 | void *dirent, filldir_t filldir) |
424 | { | 430 | { |
425 | struct dir_private_info *info = filp->private_data; | 431 | struct dir_private_info *info = filp->private_data; |
426 | struct inode *inode = filp->f_path.dentry->d_inode; | 432 | struct inode *inode = filp->f_path.dentry->d_inode; |
@@ -511,7 +517,7 @@ finished: | |||
511 | return 0; | 517 | return 0; |
512 | } | 518 | } |
513 | 519 | ||
514 | static int ext4_release_dir (struct inode * inode, struct file * filp) | 520 | static int ext4_release_dir(struct inode *inode, struct file *filp) |
515 | { | 521 | { |
516 | if (filp->private_data) | 522 | if (filp->private_data) |
517 | ext4_htree_free_dir_info(filp->private_data); | 523 | ext4_htree_free_dir_info(filp->private_data); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 295003241d3d..6690a41cdd9f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -44,9 +44,9 @@ | |||
44 | #ifdef EXT4FS_DEBUG | 44 | #ifdef EXT4FS_DEBUG |
45 | #define ext4_debug(f, a...) \ | 45 | #define ext4_debug(f, a...) \ |
46 | do { \ | 46 | do { \ |
47 | printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ | 47 | printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ |
48 | __FILE__, __LINE__, __func__); \ | 48 | __FILE__, __LINE__, __func__); \ |
49 | printk (KERN_DEBUG f, ## a); \ | 49 | printk(KERN_DEBUG f, ## a); \ |
50 | } while (0) | 50 | } while (0) |
51 | #else | 51 | #else |
52 | #define ext4_debug(f, a...) do {} while (0) | 52 | #define ext4_debug(f, a...) do {} while (0) |
@@ -128,7 +128,7 @@ struct ext4_allocation_request { | |||
128 | #else | 128 | #else |
129 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) | 129 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) |
130 | #endif | 130 | #endif |
131 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) | 131 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) |
132 | #ifdef __KERNEL__ | 132 | #ifdef __KERNEL__ |
133 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) | 133 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) |
134 | #else | 134 | #else |
@@ -245,7 +245,7 @@ struct flex_groups { | |||
245 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 245 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
246 | 246 | ||
247 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | 247 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ |
248 | #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ | 248 | #define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ |
249 | 249 | ||
250 | /* | 250 | /* |
251 | * Inode dynamic state flags | 251 | * Inode dynamic state flags |
@@ -291,8 +291,6 @@ struct ext4_new_group_data { | |||
291 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS | 291 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS |
292 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) | 292 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) |
293 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) | 293 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) |
294 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) | ||
295 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) | ||
296 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION | 294 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION |
297 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION | 295 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION |
298 | #ifdef CONFIG_JBD2_DEBUG | 296 | #ifdef CONFIG_JBD2_DEBUG |
@@ -300,7 +298,10 @@ struct ext4_new_group_data { | |||
300 | #endif | 298 | #endif |
301 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) | 299 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) |
302 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) | 300 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) |
303 | #define EXT4_IOC_MIGRATE _IO('f', 7) | 301 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) |
302 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) | ||
303 | #define EXT4_IOC_MIGRATE _IO('f', 9) | ||
304 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ | ||
304 | 305 | ||
305 | /* | 306 | /* |
306 | * ioctl commands in 32 bit emulation | 307 | * ioctl commands in 32 bit emulation |
@@ -538,8 +539,9 @@ do { \ | |||
538 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | 539 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ |
539 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 540 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
540 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 541 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
541 | #define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ | ||
542 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 542 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
543 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | ||
544 | |||
543 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | 545 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ |
544 | #ifndef _LINUX_EXT2_FS_H | 546 | #ifndef _LINUX_EXT2_FS_H |
545 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 547 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
@@ -667,7 +669,7 @@ struct ext4_super_block { | |||
667 | }; | 669 | }; |
668 | 670 | ||
669 | #ifdef __KERNEL__ | 671 | #ifdef __KERNEL__ |
670 | static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) | 672 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
671 | { | 673 | { |
672 | return sb->s_fs_info; | 674 | return sb->s_fs_info; |
673 | } | 675 | } |
@@ -725,11 +727,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
725 | */ | 727 | */ |
726 | 728 | ||
727 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ | 729 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ |
728 | ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) | 730 | (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) |
729 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ | 731 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ |
730 | ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) | 732 | (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) |
731 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ | 733 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ |
732 | ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) | 734 | (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) |
733 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ | 735 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ |
734 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) | 736 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) |
735 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ | 737 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ |
@@ -789,6 +791,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
789 | #define EXT4_DEF_RESUID 0 | 791 | #define EXT4_DEF_RESUID 0 |
790 | #define EXT4_DEF_RESGID 0 | 792 | #define EXT4_DEF_RESGID 0 |
791 | 793 | ||
794 | #define EXT4_DEF_INODE_READAHEAD_BLKS 32 | ||
795 | |||
792 | /* | 796 | /* |
793 | * Default mount options | 797 | * Default mount options |
794 | */ | 798 | */ |
@@ -954,6 +958,24 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) | |||
954 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 958 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
955 | unsigned long *blockgrpp, ext4_grpblk_t *offsetp); | 959 | unsigned long *blockgrpp, ext4_grpblk_t *offsetp); |
956 | 960 | ||
961 | extern struct proc_dir_entry *ext4_proc_root; | ||
962 | |||
963 | #ifdef CONFIG_PROC_FS | ||
964 | extern const struct file_operations ext4_ui_proc_fops; | ||
965 | |||
966 | #define EXT4_PROC_HANDLER(name, var) \ | ||
967 | do { \ | ||
968 | proc = proc_create_data(name, mode, sbi->s_proc, \ | ||
969 | &ext4_ui_proc_fops, &sbi->s_##var); \ | ||
970 | if (proc == NULL) { \ | ||
971 | printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \ | ||
972 | goto err_out; \ | ||
973 | } \ | ||
974 | } while (0) | ||
975 | #else | ||
976 | #define EXT4_PROC_HANDLER(name, var) | ||
977 | #endif | ||
978 | |||
957 | /* | 979 | /* |
958 | * Function prototypes | 980 | * Function prototypes |
959 | */ | 981 | */ |
@@ -981,23 +1003,20 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
981 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | 1003 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, |
982 | ext4_lblk_t iblock, ext4_fsblk_t goal, | 1004 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
983 | unsigned long *count, int *errp); | 1005 | unsigned long *count, int *errp); |
984 | extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | 1006 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); |
985 | ext4_fsblk_t goal, unsigned long *count, int *errp); | ||
986 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | 1007 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
987 | ext4_fsblk_t nblocks); | 1008 | s64 nblocks); |
988 | extern void ext4_free_blocks (handle_t *handle, struct inode *inode, | 1009 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
989 | ext4_fsblk_t block, unsigned long count, int metadata); | 1010 | ext4_fsblk_t block, unsigned long count, int metadata); |
990 | extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, | 1011 | extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, |
991 | ext4_fsblk_t block, unsigned long count, | 1012 | ext4_fsblk_t block, unsigned long count, |
992 | unsigned long *pdquot_freed_blocks); | 1013 | unsigned long *pdquot_freed_blocks); |
993 | extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); | 1014 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
994 | extern void ext4_check_blocks_bitmap (struct super_block *); | 1015 | extern void ext4_check_blocks_bitmap(struct super_block *); |
995 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 1016 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
996 | ext4_group_t block_group, | 1017 | ext4_group_t block_group, |
997 | struct buffer_head ** bh); | 1018 | struct buffer_head ** bh); |
998 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1019 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
999 | extern void ext4_init_block_alloc_info(struct inode *); | ||
1000 | extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); | ||
1001 | 1020 | ||
1002 | /* dir.c */ | 1021 | /* dir.c */ |
1003 | extern int ext4_check_dir_entry(const char *, struct inode *, | 1022 | extern int ext4_check_dir_entry(const char *, struct inode *, |
@@ -1009,20 +1028,20 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
1009 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); | 1028 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); |
1010 | 1029 | ||
1011 | /* fsync.c */ | 1030 | /* fsync.c */ |
1012 | extern int ext4_sync_file (struct file *, struct dentry *, int); | 1031 | extern int ext4_sync_file(struct file *, struct dentry *, int); |
1013 | 1032 | ||
1014 | /* hash.c */ | 1033 | /* hash.c */ |
1015 | extern int ext4fs_dirhash(const char *name, int len, struct | 1034 | extern int ext4fs_dirhash(const char *name, int len, struct |
1016 | dx_hash_info *hinfo); | 1035 | dx_hash_info *hinfo); |
1017 | 1036 | ||
1018 | /* ialloc.c */ | 1037 | /* ialloc.c */ |
1019 | extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); | 1038 | extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); |
1020 | extern void ext4_free_inode (handle_t *, struct inode *); | 1039 | extern void ext4_free_inode(handle_t *, struct inode *); |
1021 | extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); | 1040 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); |
1022 | extern unsigned long ext4_count_free_inodes (struct super_block *); | 1041 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
1023 | extern unsigned long ext4_count_dirs (struct super_block *); | 1042 | extern unsigned long ext4_count_dirs(struct super_block *); |
1024 | extern void ext4_check_inodes_bitmap (struct super_block *); | 1043 | extern void ext4_check_inodes_bitmap(struct super_block *); |
1025 | extern unsigned long ext4_count_free (struct buffer_head *, unsigned); | 1044 | extern unsigned long ext4_count_free(struct buffer_head *, unsigned); |
1026 | 1045 | ||
1027 | /* mballoc.c */ | 1046 | /* mballoc.c */ |
1028 | extern long ext4_mb_stats; | 1047 | extern long ext4_mb_stats; |
@@ -1032,7 +1051,7 @@ extern int ext4_mb_release(struct super_block *); | |||
1032 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | 1051 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, |
1033 | struct ext4_allocation_request *, int *); | 1052 | struct ext4_allocation_request *, int *); |
1034 | extern int ext4_mb_reserve_blocks(struct super_block *, int); | 1053 | extern int ext4_mb_reserve_blocks(struct super_block *, int); |
1035 | extern void ext4_mb_discard_inode_preallocations(struct inode *); | 1054 | extern void ext4_discard_preallocations(struct inode *); |
1036 | extern int __init init_ext4_mballoc(void); | 1055 | extern int __init init_ext4_mballoc(void); |
1037 | extern void exit_ext4_mballoc(void); | 1056 | extern void exit_ext4_mballoc(void); |
1038 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, | 1057 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, |
@@ -1050,24 +1069,25 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, | |||
1050 | ext4_lblk_t, int, int *); | 1069 | ext4_lblk_t, int, int *); |
1051 | struct buffer_head *ext4_bread(handle_t *, struct inode *, | 1070 | struct buffer_head *ext4_bread(handle_t *, struct inode *, |
1052 | ext4_lblk_t, int, int *); | 1071 | ext4_lblk_t, int, int *); |
1072 | int ext4_get_block(struct inode *inode, sector_t iblock, | ||
1073 | struct buffer_head *bh_result, int create); | ||
1053 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | 1074 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, |
1054 | ext4_lblk_t iblock, unsigned long maxblocks, | 1075 | ext4_lblk_t iblock, unsigned long maxblocks, |
1055 | struct buffer_head *bh_result, | 1076 | struct buffer_head *bh_result, |
1056 | int create, int extend_disksize); | 1077 | int create, int extend_disksize); |
1057 | 1078 | ||
1058 | extern struct inode *ext4_iget(struct super_block *, unsigned long); | 1079 | extern struct inode *ext4_iget(struct super_block *, unsigned long); |
1059 | extern int ext4_write_inode (struct inode *, int); | 1080 | extern int ext4_write_inode(struct inode *, int); |
1060 | extern int ext4_setattr (struct dentry *, struct iattr *); | 1081 | extern int ext4_setattr(struct dentry *, struct iattr *); |
1061 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1082 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
1062 | struct kstat *stat); | 1083 | struct kstat *stat); |
1063 | extern void ext4_delete_inode (struct inode *); | 1084 | extern void ext4_delete_inode(struct inode *); |
1064 | extern int ext4_sync_inode (handle_t *, struct inode *); | 1085 | extern int ext4_sync_inode(handle_t *, struct inode *); |
1065 | extern void ext4_discard_reservation (struct inode *); | ||
1066 | extern void ext4_dirty_inode(struct inode *); | 1086 | extern void ext4_dirty_inode(struct inode *); |
1067 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 1087 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
1068 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1088 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
1069 | extern int ext4_can_truncate(struct inode *inode); | 1089 | extern int ext4_can_truncate(struct inode *inode); |
1070 | extern void ext4_truncate (struct inode *); | 1090 | extern void ext4_truncate(struct inode *); |
1071 | extern void ext4_set_inode_flags(struct inode *); | 1091 | extern void ext4_set_inode_flags(struct inode *); |
1072 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1092 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
1073 | extern void ext4_set_aops(struct inode *inode); | 1093 | extern void ext4_set_aops(struct inode *inode); |
@@ -1080,11 +1100,10 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | |||
1080 | 1100 | ||
1081 | /* ioctl.c */ | 1101 | /* ioctl.c */ |
1082 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 1102 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
1083 | extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); | 1103 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); |
1084 | 1104 | ||
1085 | /* migrate.c */ | 1105 | /* migrate.c */ |
1086 | extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, | 1106 | extern int ext4_ext_migrate(struct inode *); |
1087 | unsigned long); | ||
1088 | /* namei.c */ | 1107 | /* namei.c */ |
1089 | extern int ext4_orphan_add(handle_t *, struct inode *); | 1108 | extern int ext4_orphan_add(handle_t *, struct inode *); |
1090 | extern int ext4_orphan_del(handle_t *, struct inode *); | 1109 | extern int ext4_orphan_del(handle_t *, struct inode *); |
@@ -1099,14 +1118,14 @@ extern int ext4_group_extend(struct super_block *sb, | |||
1099 | ext4_fsblk_t n_blocks_count); | 1118 | ext4_fsblk_t n_blocks_count); |
1100 | 1119 | ||
1101 | /* super.c */ | 1120 | /* super.c */ |
1102 | extern void ext4_error (struct super_block *, const char *, const char *, ...) | 1121 | extern void ext4_error(struct super_block *, const char *, const char *, ...) |
1103 | __attribute__ ((format (printf, 3, 4))); | 1122 | __attribute__ ((format (printf, 3, 4))); |
1104 | extern void __ext4_std_error (struct super_block *, const char *, int); | 1123 | extern void __ext4_std_error(struct super_block *, const char *, int); |
1105 | extern void ext4_abort (struct super_block *, const char *, const char *, ...) | 1124 | extern void ext4_abort(struct super_block *, const char *, const char *, ...) |
1106 | __attribute__ ((format (printf, 3, 4))); | 1125 | __attribute__ ((format (printf, 3, 4))); |
1107 | extern void ext4_warning (struct super_block *, const char *, const char *, ...) | 1126 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) |
1108 | __attribute__ ((format (printf, 3, 4))); | 1127 | __attribute__ ((format (printf, 3, 4))); |
1109 | extern void ext4_update_dynamic_rev (struct super_block *sb); | 1128 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
1110 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 1129 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
1111 | __u32 compat); | 1130 | __u32 compat); |
1112 | extern int ext4_update_rocompat_feature(handle_t *handle, | 1131 | extern int ext4_update_rocompat_feature(handle_t *handle, |
@@ -1179,7 +1198,7 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) | |||
1179 | 1198 | ||
1180 | static inline | 1199 | static inline |
1181 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | 1200 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, |
1182 | ext4_group_t group) | 1201 | ext4_group_t group) |
1183 | { | 1202 | { |
1184 | struct ext4_group_info ***grp_info; | 1203 | struct ext4_group_info ***grp_info; |
1185 | long indexv, indexh; | 1204 | long indexv, indexh; |
@@ -1207,6 +1226,28 @@ do { \ | |||
1207 | __ext4_std_error((sb), __func__, (errno)); \ | 1226 | __ext4_std_error((sb), __func__, (errno)); \ |
1208 | } while (0) | 1227 | } while (0) |
1209 | 1228 | ||
1229 | #ifdef CONFIG_SMP | ||
1230 | /* Each CPU can accumulate FBC_BATCH blocks in their local | ||
1231 | * counters. So we need to make sure we have free blocks more | ||
1232 | * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times. | ||
1233 | */ | ||
1234 | #define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids)) | ||
1235 | #else | ||
1236 | #define EXT4_FREEBLOCKS_WATERMARK 0 | ||
1237 | #endif | ||
1238 | |||
1239 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | ||
1240 | { | ||
1241 | /* | ||
1242 | * XXX: replace with spinlock if seen contended -bzzz | ||
1243 | */ | ||
1244 | down_write(&EXT4_I(inode)->i_data_sem); | ||
1245 | if (newsize > EXT4_I(inode)->i_disksize) | ||
1246 | EXT4_I(inode)->i_disksize = newsize; | ||
1247 | up_write(&EXT4_I(inode)->i_data_sem); | ||
1248 | return ; | ||
1249 | } | ||
1250 | |||
1210 | /* | 1251 | /* |
1211 | * Inodes and files operations | 1252 | * Inodes and files operations |
1212 | */ | 1253 | */ |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index d33dc56d6986..bec7ce59fc0d 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -124,6 +124,19 @@ struct ext4_ext_path { | |||
124 | #define EXT4_EXT_CACHE_GAP 1 | 124 | #define EXT4_EXT_CACHE_GAP 1 |
125 | #define EXT4_EXT_CACHE_EXTENT 2 | 125 | #define EXT4_EXT_CACHE_EXTENT 2 |
126 | 126 | ||
127 | /* | ||
128 | * to be called by ext4_ext_walk_space() | ||
129 | * negative retcode - error | ||
130 | * positive retcode - signal for ext4_ext_walk_space(), see below | ||
131 | * callback must return valid extent (passed or newly created) | ||
132 | */ | ||
133 | typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | ||
134 | struct ext4_ext_cache *, | ||
135 | struct ext4_extent *, void *); | ||
136 | |||
137 | #define EXT_CONTINUE 0 | ||
138 | #define EXT_BREAK 1 | ||
139 | #define EXT_REPEAT 2 | ||
127 | 140 | ||
128 | #define EXT_MAX_BLOCK 0xffffffff | 141 | #define EXT_MAX_BLOCK 0xffffffff |
129 | 142 | ||
@@ -224,6 +237,8 @@ extern int ext4_ext_try_to_merge(struct inode *inode, | |||
224 | struct ext4_extent *); | 237 | struct ext4_extent *); |
225 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); | 238 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); |
226 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); | 239 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); |
240 | extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t, | ||
241 | ext_prepare_callback, void *); | ||
227 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 242 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, |
228 | struct ext4_ext_path *); | 243 | struct ext4_ext_path *); |
229 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, | 244 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, |
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index ef7409f0e7e4..5c124c0ac6d3 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h | |||
@@ -33,38 +33,6 @@ typedef __u32 ext4_lblk_t; | |||
33 | /* data type for block group number */ | 33 | /* data type for block group number */ |
34 | typedef unsigned long ext4_group_t; | 34 | typedef unsigned long ext4_group_t; |
35 | 35 | ||
36 | struct ext4_reserve_window { | ||
37 | ext4_fsblk_t _rsv_start; /* First byte reserved */ | ||
38 | ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ | ||
39 | }; | ||
40 | |||
41 | struct ext4_reserve_window_node { | ||
42 | struct rb_node rsv_node; | ||
43 | __u32 rsv_goal_size; | ||
44 | __u32 rsv_alloc_hit; | ||
45 | struct ext4_reserve_window rsv_window; | ||
46 | }; | ||
47 | |||
48 | struct ext4_block_alloc_info { | ||
49 | /* information about reservation window */ | ||
50 | struct ext4_reserve_window_node rsv_window_node; | ||
51 | /* | ||
52 | * was i_next_alloc_block in ext4_inode_info | ||
53 | * is the logical (file-relative) number of the | ||
54 | * most-recently-allocated block in this file. | ||
55 | * We use this for detecting linearly ascending allocation requests. | ||
56 | */ | ||
57 | ext4_lblk_t last_alloc_logical_block; | ||
58 | /* | ||
59 | * Was i_next_alloc_goal in ext4_inode_info | ||
60 | * is the *physical* companion to i_next_alloc_block. | ||
61 | * it the physical block number of the block which was most-recentl | ||
62 | * allocated to this file. This give us the goal (target) for the next | ||
63 | * allocation when we detect linearly ascending requests. | ||
64 | */ | ||
65 | ext4_fsblk_t last_alloc_physical_block; | ||
66 | }; | ||
67 | |||
68 | #define rsv_start rsv_window._rsv_start | 36 | #define rsv_start rsv_window._rsv_start |
69 | #define rsv_end rsv_window._rsv_end | 37 | #define rsv_end rsv_window._rsv_end |
70 | 38 | ||
@@ -97,11 +65,8 @@ struct ext4_inode_info { | |||
97 | ext4_group_t i_block_group; | 65 | ext4_group_t i_block_group; |
98 | __u32 i_state; /* Dynamic state flags for ext4 */ | 66 | __u32 i_state; /* Dynamic state flags for ext4 */ |
99 | 67 | ||
100 | /* block reservation info */ | ||
101 | struct ext4_block_alloc_info *i_block_alloc_info; | ||
102 | |||
103 | ext4_lblk_t i_dir_start_lookup; | 68 | ext4_lblk_t i_dir_start_lookup; |
104 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 69 | #ifdef CONFIG_EXT4_FS_XATTR |
105 | /* | 70 | /* |
106 | * Extended attributes can be read independently of the main file | 71 | * Extended attributes can be read independently of the main file |
107 | * data. Taking i_mutex even when reading would cause contention | 72 | * data. Taking i_mutex even when reading would cause contention |
@@ -111,7 +76,7 @@ struct ext4_inode_info { | |||
111 | */ | 76 | */ |
112 | struct rw_semaphore xattr_sem; | 77 | struct rw_semaphore xattr_sem; |
113 | #endif | 78 | #endif |
114 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 79 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
115 | struct posix_acl *i_acl; | 80 | struct posix_acl *i_acl; |
116 | struct posix_acl *i_default_acl; | 81 | struct posix_acl *i_default_acl; |
117 | #endif | 82 | #endif |
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 6300226d5531..6a0b40d43264 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h | |||
@@ -40,8 +40,8 @@ struct ext4_sb_info { | |||
40 | unsigned long s_blocks_last; /* Last seen block count */ | 40 | unsigned long s_blocks_last; /* Last seen block count */ |
41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | 42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ |
43 | struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ | 43 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ |
44 | struct buffer_head ** s_group_desc; | 44 | struct buffer_head **s_group_desc; |
45 | unsigned long s_mount_opt; | 45 | unsigned long s_mount_opt; |
46 | ext4_fsblk_t s_sb_block; | 46 | ext4_fsblk_t s_sb_block; |
47 | uid_t s_resuid; | 47 | uid_t s_resuid; |
@@ -52,6 +52,7 @@ struct ext4_sb_info { | |||
52 | int s_desc_per_block_bits; | 52 | int s_desc_per_block_bits; |
53 | int s_inode_size; | 53 | int s_inode_size; |
54 | int s_first_ino; | 54 | int s_first_ino; |
55 | unsigned int s_inode_readahead_blks; | ||
55 | spinlock_t s_next_gen_lock; | 56 | spinlock_t s_next_gen_lock; |
56 | u32 s_next_generation; | 57 | u32 s_next_generation; |
57 | u32 s_hash_seed[4]; | 58 | u32 s_hash_seed[4]; |
@@ -59,16 +60,17 @@ struct ext4_sb_info { | |||
59 | struct percpu_counter s_freeblocks_counter; | 60 | struct percpu_counter s_freeblocks_counter; |
60 | struct percpu_counter s_freeinodes_counter; | 61 | struct percpu_counter s_freeinodes_counter; |
61 | struct percpu_counter s_dirs_counter; | 62 | struct percpu_counter s_dirs_counter; |
63 | struct percpu_counter s_dirtyblocks_counter; | ||
62 | struct blockgroup_lock s_blockgroup_lock; | 64 | struct blockgroup_lock s_blockgroup_lock; |
65 | struct proc_dir_entry *s_proc; | ||
63 | 66 | ||
64 | /* root of the per fs reservation window tree */ | 67 | /* root of the per fs reservation window tree */ |
65 | spinlock_t s_rsv_window_lock; | 68 | spinlock_t s_rsv_window_lock; |
66 | struct rb_root s_rsv_window_root; | 69 | struct rb_root s_rsv_window_root; |
67 | struct ext4_reserve_window_node s_rsv_window_head; | ||
68 | 70 | ||
69 | /* Journaling */ | 71 | /* Journaling */ |
70 | struct inode * s_journal_inode; | 72 | struct inode *s_journal_inode; |
71 | struct journal_s * s_journal; | 73 | struct journal_s *s_journal; |
72 | struct list_head s_orphan; | 74 | struct list_head s_orphan; |
73 | unsigned long s_commit_interval; | 75 | unsigned long s_commit_interval; |
74 | struct block_device *journal_bdev; | 76 | struct block_device *journal_bdev; |
@@ -106,12 +108,12 @@ struct ext4_sb_info { | |||
106 | 108 | ||
107 | /* tunables */ | 109 | /* tunables */ |
108 | unsigned long s_stripe; | 110 | unsigned long s_stripe; |
109 | unsigned long s_mb_stream_request; | 111 | unsigned int s_mb_stream_request; |
110 | unsigned long s_mb_max_to_scan; | 112 | unsigned int s_mb_max_to_scan; |
111 | unsigned long s_mb_min_to_scan; | 113 | unsigned int s_mb_min_to_scan; |
112 | unsigned long s_mb_stats; | 114 | unsigned int s_mb_stats; |
113 | unsigned long s_mb_order2_reqs; | 115 | unsigned int s_mb_order2_reqs; |
114 | unsigned long s_mb_group_prealloc; | 116 | unsigned int s_mb_group_prealloc; |
115 | /* where last allocation was done - for stream allocation */ | 117 | /* where last allocation was done - for stream allocation */ |
116 | unsigned long s_mb_last_group; | 118 | unsigned long s_mb_last_group; |
117 | unsigned long s_mb_last_start; | 119 | unsigned long s_mb_last_start; |
@@ -121,7 +123,6 @@ struct ext4_sb_info { | |||
121 | int s_mb_history_cur; | 123 | int s_mb_history_cur; |
122 | int s_mb_history_max; | 124 | int s_mb_history_max; |
123 | int s_mb_history_num; | 125 | int s_mb_history_num; |
124 | struct proc_dir_entry *s_mb_proc; | ||
125 | spinlock_t s_mb_history_lock; | 126 | spinlock_t s_mb_history_lock; |
126 | int s_mb_history_filter; | 127 | int s_mb_history_filter; |
127 | 128 | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b24d3c53f20c..ea2ce3c0ae66 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/falloc.h> | 41 | #include <linux/falloc.h> |
42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
43 | #include <linux/fiemap.h> | ||
43 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
44 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
45 | 46 | ||
@@ -383,8 +384,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
383 | ext_debug("\n"); | 384 | ext_debug("\n"); |
384 | } | 385 | } |
385 | #else | 386 | #else |
386 | #define ext4_ext_show_path(inode,path) | 387 | #define ext4_ext_show_path(inode, path) |
387 | #define ext4_ext_show_leaf(inode,path) | 388 | #define ext4_ext_show_leaf(inode, path) |
388 | #endif | 389 | #endif |
389 | 390 | ||
390 | void ext4_ext_drop_refs(struct ext4_ext_path *path) | 391 | void ext4_ext_drop_refs(struct ext4_ext_path *path) |
@@ -440,9 +441,10 @@ ext4_ext_binsearch_idx(struct inode *inode, | |||
440 | for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { | 441 | for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { |
441 | if (k != 0 && | 442 | if (k != 0 && |
442 | le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { | 443 | le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { |
443 | printk("k=%d, ix=0x%p, first=0x%p\n", k, | 444 | printk(KERN_DEBUG "k=%d, ix=0x%p, " |
444 | ix, EXT_FIRST_INDEX(eh)); | 445 | "first=0x%p\n", k, |
445 | printk("%u <= %u\n", | 446 | ix, EXT_FIRST_INDEX(eh)); |
447 | printk(KERN_DEBUG "%u <= %u\n", | ||
446 | le32_to_cpu(ix->ei_block), | 448 | le32_to_cpu(ix->ei_block), |
447 | le32_to_cpu(ix[-1].ei_block)); | 449 | le32_to_cpu(ix[-1].ei_block)); |
448 | } | 450 | } |
@@ -1475,7 +1477,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1475 | struct ext4_ext_path *path, | 1477 | struct ext4_ext_path *path, |
1476 | struct ext4_extent *newext) | 1478 | struct ext4_extent *newext) |
1477 | { | 1479 | { |
1478 | struct ext4_extent_header * eh; | 1480 | struct ext4_extent_header *eh; |
1479 | struct ext4_extent *ex, *fex; | 1481 | struct ext4_extent *ex, *fex; |
1480 | struct ext4_extent *nearex; /* nearest extent */ | 1482 | struct ext4_extent *nearex; /* nearest extent */ |
1481 | struct ext4_ext_path *npath = NULL; | 1483 | struct ext4_ext_path *npath = NULL; |
@@ -1625,6 +1627,113 @@ cleanup: | |||
1625 | return err; | 1627 | return err; |
1626 | } | 1628 | } |
1627 | 1629 | ||
1630 | int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | ||
1631 | ext4_lblk_t num, ext_prepare_callback func, | ||
1632 | void *cbdata) | ||
1633 | { | ||
1634 | struct ext4_ext_path *path = NULL; | ||
1635 | struct ext4_ext_cache cbex; | ||
1636 | struct ext4_extent *ex; | ||
1637 | ext4_lblk_t next, start = 0, end = 0; | ||
1638 | ext4_lblk_t last = block + num; | ||
1639 | int depth, exists, err = 0; | ||
1640 | |||
1641 | BUG_ON(func == NULL); | ||
1642 | BUG_ON(inode == NULL); | ||
1643 | |||
1644 | while (block < last && block != EXT_MAX_BLOCK) { | ||
1645 | num = last - block; | ||
1646 | /* find extent for this block */ | ||
1647 | path = ext4_ext_find_extent(inode, block, path); | ||
1648 | if (IS_ERR(path)) { | ||
1649 | err = PTR_ERR(path); | ||
1650 | path = NULL; | ||
1651 | break; | ||
1652 | } | ||
1653 | |||
1654 | depth = ext_depth(inode); | ||
1655 | BUG_ON(path[depth].p_hdr == NULL); | ||
1656 | ex = path[depth].p_ext; | ||
1657 | next = ext4_ext_next_allocated_block(path); | ||
1658 | |||
1659 | exists = 0; | ||
1660 | if (!ex) { | ||
1661 | /* there is no extent yet, so try to allocate | ||
1662 | * all requested space */ | ||
1663 | start = block; | ||
1664 | end = block + num; | ||
1665 | } else if (le32_to_cpu(ex->ee_block) > block) { | ||
1666 | /* need to allocate space before found extent */ | ||
1667 | start = block; | ||
1668 | end = le32_to_cpu(ex->ee_block); | ||
1669 | if (block + num < end) | ||
1670 | end = block + num; | ||
1671 | } else if (block >= le32_to_cpu(ex->ee_block) | ||
1672 | + ext4_ext_get_actual_len(ex)) { | ||
1673 | /* need to allocate space after found extent */ | ||
1674 | start = block; | ||
1675 | end = block + num; | ||
1676 | if (end >= next) | ||
1677 | end = next; | ||
1678 | } else if (block >= le32_to_cpu(ex->ee_block)) { | ||
1679 | /* | ||
1680 | * some part of requested space is covered | ||
1681 | * by found extent | ||
1682 | */ | ||
1683 | start = block; | ||
1684 | end = le32_to_cpu(ex->ee_block) | ||
1685 | + ext4_ext_get_actual_len(ex); | ||
1686 | if (block + num < end) | ||
1687 | end = block + num; | ||
1688 | exists = 1; | ||
1689 | } else { | ||
1690 | BUG(); | ||
1691 | } | ||
1692 | BUG_ON(end <= start); | ||
1693 | |||
1694 | if (!exists) { | ||
1695 | cbex.ec_block = start; | ||
1696 | cbex.ec_len = end - start; | ||
1697 | cbex.ec_start = 0; | ||
1698 | cbex.ec_type = EXT4_EXT_CACHE_GAP; | ||
1699 | } else { | ||
1700 | cbex.ec_block = le32_to_cpu(ex->ee_block); | ||
1701 | cbex.ec_len = ext4_ext_get_actual_len(ex); | ||
1702 | cbex.ec_start = ext_pblock(ex); | ||
1703 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; | ||
1704 | } | ||
1705 | |||
1706 | BUG_ON(cbex.ec_len == 0); | ||
1707 | err = func(inode, path, &cbex, ex, cbdata); | ||
1708 | ext4_ext_drop_refs(path); | ||
1709 | |||
1710 | if (err < 0) | ||
1711 | break; | ||
1712 | |||
1713 | if (err == EXT_REPEAT) | ||
1714 | continue; | ||
1715 | else if (err == EXT_BREAK) { | ||
1716 | err = 0; | ||
1717 | break; | ||
1718 | } | ||
1719 | |||
1720 | if (ext_depth(inode) != depth) { | ||
1721 | /* depth was changed. we have to realloc path */ | ||
1722 | kfree(path); | ||
1723 | path = NULL; | ||
1724 | } | ||
1725 | |||
1726 | block = cbex.ec_block + cbex.ec_len; | ||
1727 | } | ||
1728 | |||
1729 | if (path) { | ||
1730 | ext4_ext_drop_refs(path); | ||
1731 | kfree(path); | ||
1732 | } | ||
1733 | |||
1734 | return err; | ||
1735 | } | ||
1736 | |||
1628 | static void | 1737 | static void |
1629 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, | 1738 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, |
1630 | __u32 len, ext4_fsblk_t start, int type) | 1739 | __u32 len, ext4_fsblk_t start, int type) |
@@ -2142,7 +2251,7 @@ void ext4_ext_init(struct super_block *sb) | |||
2142 | */ | 2251 | */ |
2143 | 2252 | ||
2144 | if (test_opt(sb, EXTENTS)) { | 2253 | if (test_opt(sb, EXTENTS)) { |
2145 | printk("EXT4-fs: file extents enabled"); | 2254 | printk(KERN_INFO "EXT4-fs: file extents enabled"); |
2146 | #ifdef AGGRESSIVE_TEST | 2255 | #ifdef AGGRESSIVE_TEST |
2147 | printk(", aggressive tests"); | 2256 | printk(", aggressive tests"); |
2148 | #endif | 2257 | #endif |
@@ -2696,11 +2805,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2696 | goto out2; | 2805 | goto out2; |
2697 | } | 2806 | } |
2698 | /* | 2807 | /* |
2699 | * Okay, we need to do block allocation. Lazily initialize the block | 2808 | * Okay, we need to do block allocation. |
2700 | * allocation info here if necessary. | ||
2701 | */ | 2809 | */ |
2702 | if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) | ||
2703 | ext4_init_block_alloc_info(inode); | ||
2704 | 2810 | ||
2705 | /* find neighbour allocated blocks */ | 2811 | /* find neighbour allocated blocks */ |
2706 | ar.lleft = iblock; | 2812 | ar.lleft = iblock; |
@@ -2760,7 +2866,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2760 | /* free data blocks we just allocated */ | 2866 | /* free data blocks we just allocated */ |
2761 | /* not a good idea to call discard here directly, | 2867 | /* not a good idea to call discard here directly, |
2762 | * but otherwise we'd need to call it every free() */ | 2868 | * but otherwise we'd need to call it every free() */ |
2763 | ext4_mb_discard_inode_preallocations(inode); | 2869 | ext4_discard_preallocations(inode); |
2764 | ext4_free_blocks(handle, inode, ext_pblock(&newex), | 2870 | ext4_free_blocks(handle, inode, ext_pblock(&newex), |
2765 | ext4_ext_get_actual_len(&newex), 0); | 2871 | ext4_ext_get_actual_len(&newex), 0); |
2766 | goto out2; | 2872 | goto out2; |
@@ -2824,7 +2930,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
2824 | down_write(&EXT4_I(inode)->i_data_sem); | 2930 | down_write(&EXT4_I(inode)->i_data_sem); |
2825 | ext4_ext_invalidate_cache(inode); | 2931 | ext4_ext_invalidate_cache(inode); |
2826 | 2932 | ||
2827 | ext4_discard_reservation(inode); | 2933 | ext4_discard_preallocations(inode); |
2828 | 2934 | ||
2829 | /* | 2935 | /* |
2830 | * TODO: optimization is possible here. | 2936 | * TODO: optimization is possible here. |
@@ -2877,10 +2983,11 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
2877 | * Update only when preallocation was requested beyond | 2983 | * Update only when preallocation was requested beyond |
2878 | * the file size. | 2984 | * the file size. |
2879 | */ | 2985 | */ |
2880 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 2986 | if (!(mode & FALLOC_FL_KEEP_SIZE)) { |
2881 | new_size > i_size_read(inode)) { | 2987 | if (new_size > i_size_read(inode)) |
2882 | i_size_write(inode, new_size); | 2988 | i_size_write(inode, new_size); |
2883 | EXT4_I(inode)->i_disksize = new_size; | 2989 | if (new_size > EXT4_I(inode)->i_disksize) |
2990 | ext4_update_i_disksize(inode, new_size); | ||
2884 | } | 2991 | } |
2885 | 2992 | ||
2886 | } | 2993 | } |
@@ -2972,3 +3079,143 @@ retry: | |||
2972 | mutex_unlock(&inode->i_mutex); | 3079 | mutex_unlock(&inode->i_mutex); |
2973 | return ret > 0 ? ret2 : ret; | 3080 | return ret > 0 ? ret2 : ret; |
2974 | } | 3081 | } |
3082 | |||
3083 | /* | ||
3084 | * Callback function called for each extent to gather FIEMAP information. | ||
3085 | */ | ||
3086 | int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | ||
3087 | struct ext4_ext_cache *newex, struct ext4_extent *ex, | ||
3088 | void *data) | ||
3089 | { | ||
3090 | struct fiemap_extent_info *fieinfo = data; | ||
3091 | unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; | ||
3092 | __u64 logical; | ||
3093 | __u64 physical; | ||
3094 | __u64 length; | ||
3095 | __u32 flags = 0; | ||
3096 | int error; | ||
3097 | |||
3098 | logical = (__u64)newex->ec_block << blksize_bits; | ||
3099 | |||
3100 | if (newex->ec_type == EXT4_EXT_CACHE_GAP) { | ||
3101 | pgoff_t offset; | ||
3102 | struct page *page; | ||
3103 | struct buffer_head *bh = NULL; | ||
3104 | |||
3105 | offset = logical >> PAGE_SHIFT; | ||
3106 | page = find_get_page(inode->i_mapping, offset); | ||
3107 | if (!page || !page_has_buffers(page)) | ||
3108 | return EXT_CONTINUE; | ||
3109 | |||
3110 | bh = page_buffers(page); | ||
3111 | |||
3112 | if (!bh) | ||
3113 | return EXT_CONTINUE; | ||
3114 | |||
3115 | if (buffer_delay(bh)) { | ||
3116 | flags |= FIEMAP_EXTENT_DELALLOC; | ||
3117 | page_cache_release(page); | ||
3118 | } else { | ||
3119 | page_cache_release(page); | ||
3120 | return EXT_CONTINUE; | ||
3121 | } | ||
3122 | } | ||
3123 | |||
3124 | physical = (__u64)newex->ec_start << blksize_bits; | ||
3125 | length = (__u64)newex->ec_len << blksize_bits; | ||
3126 | |||
3127 | if (ex && ext4_ext_is_uninitialized(ex)) | ||
3128 | flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
3129 | |||
3130 | /* | ||
3131 | * If this extent reaches EXT_MAX_BLOCK, it must be last. | ||
3132 | * | ||
3133 | * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, | ||
3134 | * this also indicates no more allocated blocks. | ||
3135 | * | ||
3136 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK | ||
3137 | */ | ||
3138 | if (logical + length - 1 == EXT_MAX_BLOCK || | ||
3139 | ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) | ||
3140 | flags |= FIEMAP_EXTENT_LAST; | ||
3141 | |||
3142 | error = fiemap_fill_next_extent(fieinfo, logical, physical, | ||
3143 | length, flags); | ||
3144 | if (error < 0) | ||
3145 | return error; | ||
3146 | if (error == 1) | ||
3147 | return EXT_BREAK; | ||
3148 | |||
3149 | return EXT_CONTINUE; | ||
3150 | } | ||
3151 | |||
3152 | /* fiemap flags we can handle specified here */ | ||
3153 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | ||
3154 | |||
3155 | int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) | ||
3156 | { | ||
3157 | __u64 physical = 0; | ||
3158 | __u64 length; | ||
3159 | __u32 flags = FIEMAP_EXTENT_LAST; | ||
3160 | int blockbits = inode->i_sb->s_blocksize_bits; | ||
3161 | int error = 0; | ||
3162 | |||
3163 | /* in-inode? */ | ||
3164 | if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { | ||
3165 | struct ext4_iloc iloc; | ||
3166 | int offset; /* offset of xattr in inode */ | ||
3167 | |||
3168 | error = ext4_get_inode_loc(inode, &iloc); | ||
3169 | if (error) | ||
3170 | return error; | ||
3171 | physical = iloc.bh->b_blocknr << blockbits; | ||
3172 | offset = EXT4_GOOD_OLD_INODE_SIZE + | ||
3173 | EXT4_I(inode)->i_extra_isize; | ||
3174 | physical += offset; | ||
3175 | length = EXT4_SB(inode->i_sb)->s_inode_size - offset; | ||
3176 | flags |= FIEMAP_EXTENT_DATA_INLINE; | ||
3177 | } else { /* external block */ | ||
3178 | physical = EXT4_I(inode)->i_file_acl << blockbits; | ||
3179 | length = inode->i_sb->s_blocksize; | ||
3180 | } | ||
3181 | |||
3182 | if (physical) | ||
3183 | error = fiemap_fill_next_extent(fieinfo, 0, physical, | ||
3184 | length, flags); | ||
3185 | return (error < 0 ? error : 0); | ||
3186 | } | ||
3187 | |||
3188 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
3189 | __u64 start, __u64 len) | ||
3190 | { | ||
3191 | ext4_lblk_t start_blk; | ||
3192 | ext4_lblk_t len_blks; | ||
3193 | int error = 0; | ||
3194 | |||
3195 | /* fallback to generic here if not in extents fmt */ | ||
3196 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
3197 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
3198 | ext4_get_block); | ||
3199 | |||
3200 | if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) | ||
3201 | return -EBADR; | ||
3202 | |||
3203 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { | ||
3204 | error = ext4_xattr_fiemap(inode, fieinfo); | ||
3205 | } else { | ||
3206 | start_blk = start >> inode->i_sb->s_blocksize_bits; | ||
3207 | len_blks = len >> inode->i_sb->s_blocksize_bits; | ||
3208 | |||
3209 | /* | ||
3210 | * Walk the extent tree gathering extent information. | ||
3211 | * ext4_ext_fiemap_cb will push extents back to user. | ||
3212 | */ | ||
3213 | down_write(&EXT4_I(inode)->i_data_sem); | ||
3214 | error = ext4_ext_walk_space(inode, start_blk, len_blks, | ||
3215 | ext4_ext_fiemap_cb, fieinfo); | ||
3216 | up_write(&EXT4_I(inode)->i_data_sem); | ||
3217 | } | ||
3218 | |||
3219 | return error; | ||
3220 | } | ||
3221 | |||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 430eb7978db4..6bd11fba71f7 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -31,14 +31,14 @@ | |||
31 | * from ext4_file_open: open gets called at every open, but release | 31 | * from ext4_file_open: open gets called at every open, but release |
32 | * gets called only when /all/ the files are closed. | 32 | * gets called only when /all/ the files are closed. |
33 | */ | 33 | */ |
34 | static int ext4_release_file (struct inode * inode, struct file * filp) | 34 | static int ext4_release_file(struct inode *inode, struct file *filp) |
35 | { | 35 | { |
36 | /* if we are the last writer on the inode, drop the block reservation */ | 36 | /* if we are the last writer on the inode, drop the block reservation */ |
37 | if ((filp->f_mode & FMODE_WRITE) && | 37 | if ((filp->f_mode & FMODE_WRITE) && |
38 | (atomic_read(&inode->i_writecount) == 1)) | 38 | (atomic_read(&inode->i_writecount) == 1)) |
39 | { | 39 | { |
40 | down_write(&EXT4_I(inode)->i_data_sem); | 40 | down_write(&EXT4_I(inode)->i_data_sem); |
41 | ext4_discard_reservation(inode); | 41 | ext4_discard_preallocations(inode); |
42 | up_write(&EXT4_I(inode)->i_data_sem); | 42 | up_write(&EXT4_I(inode)->i_data_sem); |
43 | } | 43 | } |
44 | if (is_dx(inode) && filp->private_data) | 44 | if (is_dx(inode) && filp->private_data) |
@@ -140,6 +140,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
140 | return 0; | 140 | return 0; |
141 | } | 141 | } |
142 | 142 | ||
143 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
144 | __u64 start, __u64 len); | ||
145 | |||
143 | const struct file_operations ext4_file_operations = { | 146 | const struct file_operations ext4_file_operations = { |
144 | .llseek = generic_file_llseek, | 147 | .llseek = generic_file_llseek, |
145 | .read = do_sync_read, | 148 | .read = do_sync_read, |
@@ -162,7 +165,7 @@ const struct inode_operations ext4_file_inode_operations = { | |||
162 | .truncate = ext4_truncate, | 165 | .truncate = ext4_truncate, |
163 | .setattr = ext4_setattr, | 166 | .setattr = ext4_setattr, |
164 | .getattr = ext4_getattr, | 167 | .getattr = ext4_getattr, |
165 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 168 | #ifdef CONFIG_EXT4_FS_XATTR |
166 | .setxattr = generic_setxattr, | 169 | .setxattr = generic_setxattr, |
167 | .getxattr = generic_getxattr, | 170 | .getxattr = generic_getxattr, |
168 | .listxattr = ext4_listxattr, | 171 | .listxattr = ext4_listxattr, |
@@ -170,5 +173,6 @@ const struct inode_operations ext4_file_inode_operations = { | |||
170 | #endif | 173 | #endif |
171 | .permission = ext4_permission, | 174 | .permission = ext4_permission, |
172 | .fallocate = ext4_fallocate, | 175 | .fallocate = ext4_fallocate, |
176 | .fiemap = ext4_fiemap, | ||
173 | }; | 177 | }; |
174 | 178 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a45c3737ad31..5afe4370840b 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
29 | #include <linux/jbd2.h> | 29 | #include <linux/jbd2.h> |
30 | #include <linux/blkdev.h> | 30 | #include <linux/blkdev.h> |
31 | #include <linux/marker.h> | ||
31 | #include "ext4.h" | 32 | #include "ext4.h" |
32 | #include "ext4_jbd2.h" | 33 | #include "ext4_jbd2.h" |
33 | 34 | ||
@@ -43,7 +44,7 @@ | |||
43 | * inode to disk. | 44 | * inode to disk. |
44 | */ | 45 | */ |
45 | 46 | ||
46 | int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | 47 | int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) |
47 | { | 48 | { |
48 | struct inode *inode = dentry->d_inode; | 49 | struct inode *inode = dentry->d_inode; |
49 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 50 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
@@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
51 | 52 | ||
52 | J_ASSERT(ext4_journal_current_handle() == NULL); | 53 | J_ASSERT(ext4_journal_current_handle() == NULL); |
53 | 54 | ||
55 | trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", | ||
56 | inode->i_sb->s_id, datasync, inode->i_ino, | ||
57 | dentry->d_parent->d_inode->i_ino); | ||
58 | |||
54 | /* | 59 | /* |
55 | * data=writeback: | 60 | * data=writeback: |
56 | * The caller's filemap_fdatawrite()/wait will sync the data. | 61 | * The caller's filemap_fdatawrite()/wait will sync the data. |
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 1d6329dbe390..556ca8eba3db 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c | |||
@@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) | |||
27 | sum += DELTA; | 27 | sum += DELTA; |
28 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); | 28 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); |
29 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); | 29 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); |
30 | } while(--n); | 30 | } while (--n); |
31 | 31 | ||
32 | buf[0] += b0; | 32 | buf[0] += b0; |
33 | buf[1] += b1; | 33 | buf[1] += b1; |
@@ -35,7 +35,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) | |||
35 | 35 | ||
36 | 36 | ||
37 | /* The old legacy hash */ | 37 | /* The old legacy hash */ |
38 | static __u32 dx_hack_hash (const char *name, int len) | 38 | static __u32 dx_hack_hash(const char *name, int len) |
39 | { | 39 | { |
40 | __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; | 40 | __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; |
41 | while (len--) { | 41 | while (len--) { |
@@ -59,7 +59,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) | |||
59 | val = pad; | 59 | val = pad; |
60 | if (len > num*4) | 60 | if (len > num*4) |
61 | len = num * 4; | 61 | len = num * 4; |
62 | for (i=0; i < len; i++) { | 62 | for (i = 0; i < len; i++) { |
63 | if ((i % 4) == 0) | 63 | if ((i % 4) == 0) |
64 | val = pad; | 64 | val = pad; |
65 | val = msg[i] + (val << 8); | 65 | val = msg[i] + (val << 8); |
@@ -104,7 +104,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) | |||
104 | 104 | ||
105 | /* Check to see if the seed is all zero's */ | 105 | /* Check to see if the seed is all zero's */ |
106 | if (hinfo->seed) { | 106 | if (hinfo->seed) { |
107 | for (i=0; i < 4; i++) { | 107 | for (i = 0; i < 4; i++) { |
108 | if (hinfo->seed[i]) | 108 | if (hinfo->seed[i]) |
109 | break; | 109 | break; |
110 | } | 110 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f344834bbf58..fe34d74cfb19 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -115,9 +115,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
115 | block_group, bitmap_blk); | 115 | block_group, bitmap_blk); |
116 | return NULL; | 116 | return NULL; |
117 | } | 117 | } |
118 | if (bh_uptodate_or_lock(bh)) | 118 | if (buffer_uptodate(bh) && |
119 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
119 | return bh; | 120 | return bh; |
120 | 121 | ||
122 | lock_buffer(bh); | ||
121 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 123 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
122 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 124 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
123 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 125 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
@@ -154,39 +156,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
154 | * though), and then we'd have two inodes sharing the | 156 | * though), and then we'd have two inodes sharing the |
155 | * same inode number and space on the harddisk. | 157 | * same inode number and space on the harddisk. |
156 | */ | 158 | */ |
157 | void ext4_free_inode (handle_t *handle, struct inode * inode) | 159 | void ext4_free_inode(handle_t *handle, struct inode *inode) |
158 | { | 160 | { |
159 | struct super_block * sb = inode->i_sb; | 161 | struct super_block *sb = inode->i_sb; |
160 | int is_directory; | 162 | int is_directory; |
161 | unsigned long ino; | 163 | unsigned long ino; |
162 | struct buffer_head *bitmap_bh = NULL; | 164 | struct buffer_head *bitmap_bh = NULL; |
163 | struct buffer_head *bh2; | 165 | struct buffer_head *bh2; |
164 | ext4_group_t block_group; | 166 | ext4_group_t block_group; |
165 | unsigned long bit; | 167 | unsigned long bit; |
166 | struct ext4_group_desc * gdp; | 168 | struct ext4_group_desc *gdp; |
167 | struct ext4_super_block * es; | 169 | struct ext4_super_block *es; |
168 | struct ext4_sb_info *sbi; | 170 | struct ext4_sb_info *sbi; |
169 | int fatal = 0, err; | 171 | int fatal = 0, err; |
170 | ext4_group_t flex_group; | 172 | ext4_group_t flex_group; |
171 | 173 | ||
172 | if (atomic_read(&inode->i_count) > 1) { | 174 | if (atomic_read(&inode->i_count) > 1) { |
173 | printk ("ext4_free_inode: inode has count=%d\n", | 175 | printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", |
174 | atomic_read(&inode->i_count)); | 176 | atomic_read(&inode->i_count)); |
175 | return; | 177 | return; |
176 | } | 178 | } |
177 | if (inode->i_nlink) { | 179 | if (inode->i_nlink) { |
178 | printk ("ext4_free_inode: inode has nlink=%d\n", | 180 | printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", |
179 | inode->i_nlink); | 181 | inode->i_nlink); |
180 | return; | 182 | return; |
181 | } | 183 | } |
182 | if (!sb) { | 184 | if (!sb) { |
183 | printk("ext4_free_inode: inode on nonexistent device\n"); | 185 | printk(KERN_ERR "ext4_free_inode: inode on " |
186 | "nonexistent device\n"); | ||
184 | return; | 187 | return; |
185 | } | 188 | } |
186 | sbi = EXT4_SB(sb); | 189 | sbi = EXT4_SB(sb); |
187 | 190 | ||
188 | ino = inode->i_ino; | 191 | ino = inode->i_ino; |
189 | ext4_debug ("freeing inode %lu\n", ino); | 192 | ext4_debug("freeing inode %lu\n", ino); |
190 | 193 | ||
191 | /* | 194 | /* |
192 | * Note: we must free any quota before locking the superblock, | 195 | * Note: we must free any quota before locking the superblock, |
@@ -200,12 +203,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
200 | is_directory = S_ISDIR(inode->i_mode); | 203 | is_directory = S_ISDIR(inode->i_mode); |
201 | 204 | ||
202 | /* Do this BEFORE marking the inode not in use or returning an error */ | 205 | /* Do this BEFORE marking the inode not in use or returning an error */ |
203 | clear_inode (inode); | 206 | clear_inode(inode); |
204 | 207 | ||
205 | es = EXT4_SB(sb)->s_es; | 208 | es = EXT4_SB(sb)->s_es; |
206 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { | 209 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { |
207 | ext4_error (sb, "ext4_free_inode", | 210 | ext4_error(sb, "ext4_free_inode", |
208 | "reserved or nonexistent inode %lu", ino); | 211 | "reserved or nonexistent inode %lu", ino); |
209 | goto error_return; | 212 | goto error_return; |
210 | } | 213 | } |
211 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | 214 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); |
@@ -222,10 +225,10 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
222 | /* Ok, now we can actually update the inode bitmaps.. */ | 225 | /* Ok, now we can actually update the inode bitmaps.. */ |
223 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), | 226 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), |
224 | bit, bitmap_bh->b_data)) | 227 | bit, bitmap_bh->b_data)) |
225 | ext4_error (sb, "ext4_free_inode", | 228 | ext4_error(sb, "ext4_free_inode", |
226 | "bit already cleared for inode %lu", ino); | 229 | "bit already cleared for inode %lu", ino); |
227 | else { | 230 | else { |
228 | gdp = ext4_get_group_desc (sb, block_group, &bh2); | 231 | gdp = ext4_get_group_desc(sb, block_group, &bh2); |
229 | 232 | ||
230 | BUFFER_TRACE(bh2, "get_write_access"); | 233 | BUFFER_TRACE(bh2, "get_write_access"); |
231 | fatal = ext4_journal_get_write_access(handle, bh2); | 234 | fatal = ext4_journal_get_write_access(handle, bh2); |
@@ -287,7 +290,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, | |||
287 | avefreei = freei / ngroups; | 290 | avefreei = freei / ngroups; |
288 | 291 | ||
289 | for (group = 0; group < ngroups; group++) { | 292 | for (group = 0; group < ngroups; group++) { |
290 | desc = ext4_get_group_desc (sb, group, NULL); | 293 | desc = ext4_get_group_desc(sb, group, NULL); |
291 | if (!desc || !desc->bg_free_inodes_count) | 294 | if (!desc || !desc->bg_free_inodes_count) |
292 | continue; | 295 | continue; |
293 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) | 296 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) |
@@ -576,16 +579,16 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
576 | * For other inodes, search forward from the parent directory's block | 579 | * For other inodes, search forward from the parent directory's block |
577 | * group to find a free inode. | 580 | * group to find a free inode. |
578 | */ | 581 | */ |
579 | struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | 582 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) |
580 | { | 583 | { |
581 | struct super_block *sb; | 584 | struct super_block *sb; |
582 | struct buffer_head *bitmap_bh = NULL; | 585 | struct buffer_head *bitmap_bh = NULL; |
583 | struct buffer_head *bh2; | 586 | struct buffer_head *bh2; |
584 | ext4_group_t group = 0; | 587 | ext4_group_t group = 0; |
585 | unsigned long ino = 0; | 588 | unsigned long ino = 0; |
586 | struct inode * inode; | 589 | struct inode *inode; |
587 | struct ext4_group_desc * gdp = NULL; | 590 | struct ext4_group_desc *gdp = NULL; |
588 | struct ext4_super_block * es; | 591 | struct ext4_super_block *es; |
589 | struct ext4_inode_info *ei; | 592 | struct ext4_inode_info *ei; |
590 | struct ext4_sb_info *sbi; | 593 | struct ext4_sb_info *sbi; |
591 | int ret2, err = 0; | 594 | int ret2, err = 0; |
@@ -613,7 +616,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
613 | } | 616 | } |
614 | 617 | ||
615 | if (S_ISDIR(mode)) { | 618 | if (S_ISDIR(mode)) { |
616 | if (test_opt (sb, OLDALLOC)) | 619 | if (test_opt(sb, OLDALLOC)) |
617 | ret2 = find_group_dir(sb, dir, &group); | 620 | ret2 = find_group_dir(sb, dir, &group); |
618 | else | 621 | else |
619 | ret2 = find_group_orlov(sb, dir, &group); | 622 | ret2 = find_group_orlov(sb, dir, &group); |
@@ -783,7 +786,7 @@ got: | |||
783 | } | 786 | } |
784 | 787 | ||
785 | inode->i_uid = current->fsuid; | 788 | inode->i_uid = current->fsuid; |
786 | if (test_opt (sb, GRPID)) | 789 | if (test_opt(sb, GRPID)) |
787 | inode->i_gid = dir->i_gid; | 790 | inode->i_gid = dir->i_gid; |
788 | else if (dir->i_mode & S_ISGID) { | 791 | else if (dir->i_mode & S_ISGID) { |
789 | inode->i_gid = dir->i_gid; | 792 | inode->i_gid = dir->i_gid; |
@@ -816,7 +819,6 @@ got: | |||
816 | ei->i_flags &= ~EXT4_DIRSYNC_FL; | 819 | ei->i_flags &= ~EXT4_DIRSYNC_FL; |
817 | ei->i_file_acl = 0; | 820 | ei->i_file_acl = 0; |
818 | ei->i_dtime = 0; | 821 | ei->i_dtime = 0; |
819 | ei->i_block_alloc_info = NULL; | ||
820 | ei->i_block_group = group; | 822 | ei->i_block_group = group; |
821 | 823 | ||
822 | ext4_set_inode_flags(inode); | 824 | ext4_set_inode_flags(inode); |
@@ -832,7 +834,7 @@ got: | |||
832 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; | 834 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; |
833 | 835 | ||
834 | ret = inode; | 836 | ret = inode; |
835 | if(DQUOT_ALLOC_INODE(inode)) { | 837 | if (DQUOT_ALLOC_INODE(inode)) { |
836 | err = -EDQUOT; | 838 | err = -EDQUOT; |
837 | goto fail_drop; | 839 | goto fail_drop; |
838 | } | 840 | } |
@@ -841,7 +843,7 @@ got: | |||
841 | if (err) | 843 | if (err) |
842 | goto fail_free_drop; | 844 | goto fail_free_drop; |
843 | 845 | ||
844 | err = ext4_init_security(handle,inode, dir); | 846 | err = ext4_init_security(handle, inode, dir); |
845 | if (err) | 847 | if (err) |
846 | goto fail_free_drop; | 848 | goto fail_free_drop; |
847 | 849 | ||
@@ -959,7 +961,7 @@ error: | |||
959 | return ERR_PTR(err); | 961 | return ERR_PTR(err); |
960 | } | 962 | } |
961 | 963 | ||
962 | unsigned long ext4_count_free_inodes (struct super_block * sb) | 964 | unsigned long ext4_count_free_inodes(struct super_block *sb) |
963 | { | 965 | { |
964 | unsigned long desc_count; | 966 | unsigned long desc_count; |
965 | struct ext4_group_desc *gdp; | 967 | struct ext4_group_desc *gdp; |
@@ -974,7 +976,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
974 | bitmap_count = 0; | 976 | bitmap_count = 0; |
975 | gdp = NULL; | 977 | gdp = NULL; |
976 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 978 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
977 | gdp = ext4_get_group_desc (sb, i, NULL); | 979 | gdp = ext4_get_group_desc(sb, i, NULL); |
978 | if (!gdp) | 980 | if (!gdp) |
979 | continue; | 981 | continue; |
980 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); | 982 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); |
@@ -989,13 +991,14 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
989 | bitmap_count += x; | 991 | bitmap_count += x; |
990 | } | 992 | } |
991 | brelse(bitmap_bh); | 993 | brelse(bitmap_bh); |
992 | printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n", | 994 | printk(KERN_DEBUG "ext4_count_free_inodes: " |
993 | le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); | 995 | "stored = %u, computed = %lu, %lu\n", |
996 | le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); | ||
994 | return desc_count; | 997 | return desc_count; |
995 | #else | 998 | #else |
996 | desc_count = 0; | 999 | desc_count = 0; |
997 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1000 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
998 | gdp = ext4_get_group_desc (sb, i, NULL); | 1001 | gdp = ext4_get_group_desc(sb, i, NULL); |
999 | if (!gdp) | 1002 | if (!gdp) |
1000 | continue; | 1003 | continue; |
1001 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); | 1004 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); |
@@ -1006,13 +1009,13 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
1006 | } | 1009 | } |
1007 | 1010 | ||
1008 | /* Called at mount-time, super-block is locked */ | 1011 | /* Called at mount-time, super-block is locked */ |
1009 | unsigned long ext4_count_dirs (struct super_block * sb) | 1012 | unsigned long ext4_count_dirs(struct super_block * sb) |
1010 | { | 1013 | { |
1011 | unsigned long count = 0; | 1014 | unsigned long count = 0; |
1012 | ext4_group_t i; | 1015 | ext4_group_t i; |
1013 | 1016 | ||
1014 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1017 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
1015 | struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); | 1018 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
1016 | if (!gdp) | 1019 | if (!gdp) |
1017 | continue; | 1020 | continue; |
1018 | count += le16_to_cpu(gdp->bg_used_dirs_count); | 1021 | count += le16_to_cpu(gdp->bg_used_dirs_count); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7e91913e325b..9b4ec9decfd1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -190,7 +190,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) | |||
190 | /* | 190 | /* |
191 | * Called at the last iput() if i_nlink is zero. | 191 | * Called at the last iput() if i_nlink is zero. |
192 | */ | 192 | */ |
193 | void ext4_delete_inode (struct inode * inode) | 193 | void ext4_delete_inode(struct inode *inode) |
194 | { | 194 | { |
195 | handle_t *handle; | 195 | handle_t *handle; |
196 | int err; | 196 | int err; |
@@ -330,11 +330,11 @@ static int ext4_block_to_path(struct inode *inode, | |||
330 | int final = 0; | 330 | int final = 0; |
331 | 331 | ||
332 | if (i_block < 0) { | 332 | if (i_block < 0) { |
333 | ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0"); | 333 | ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); |
334 | } else if (i_block < direct_blocks) { | 334 | } else if (i_block < direct_blocks) { |
335 | offsets[n++] = i_block; | 335 | offsets[n++] = i_block; |
336 | final = direct_blocks; | 336 | final = direct_blocks; |
337 | } else if ( (i_block -= direct_blocks) < indirect_blocks) { | 337 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
338 | offsets[n++] = EXT4_IND_BLOCK; | 338 | offsets[n++] = EXT4_IND_BLOCK; |
339 | offsets[n++] = i_block; | 339 | offsets[n++] = i_block; |
340 | final = ptrs; | 340 | final = ptrs; |
@@ -400,14 +400,14 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, | |||
400 | 400 | ||
401 | *err = 0; | 401 | *err = 0; |
402 | /* i_data is not going away, no lock needed */ | 402 | /* i_data is not going away, no lock needed */ |
403 | add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets); | 403 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); |
404 | if (!p->key) | 404 | if (!p->key) |
405 | goto no_block; | 405 | goto no_block; |
406 | while (--depth) { | 406 | while (--depth) { |
407 | bh = sb_bread(sb, le32_to_cpu(p->key)); | 407 | bh = sb_bread(sb, le32_to_cpu(p->key)); |
408 | if (!bh) | 408 | if (!bh) |
409 | goto failure; | 409 | goto failure; |
410 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); | 410 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); |
411 | /* Reader: end */ | 411 | /* Reader: end */ |
412 | if (!p->key) | 412 | if (!p->key) |
413 | goto no_block; | 413 | goto no_block; |
@@ -443,7 +443,7 @@ no_block: | |||
443 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | 443 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) |
444 | { | 444 | { |
445 | struct ext4_inode_info *ei = EXT4_I(inode); | 445 | struct ext4_inode_info *ei = EXT4_I(inode); |
446 | __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; | 446 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; |
447 | __le32 *p; | 447 | __le32 *p; |
448 | ext4_fsblk_t bg_start; | 448 | ext4_fsblk_t bg_start; |
449 | ext4_fsblk_t last_block; | 449 | ext4_fsblk_t last_block; |
@@ -486,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
486 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 486 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
487 | Indirect *partial) | 487 | Indirect *partial) |
488 | { | 488 | { |
489 | struct ext4_block_alloc_info *block_i; | ||
490 | |||
491 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
492 | |||
493 | /* | 489 | /* |
494 | * try the heuristic for sequential allocation, | 490 | * XXX need to get goal block from mballoc's data structures |
495 | * failing that at least try to get decent locality. | ||
496 | */ | 491 | */ |
497 | if (block_i && (block == block_i->last_alloc_logical_block + 1) | ||
498 | && (block_i->last_alloc_physical_block != 0)) { | ||
499 | return block_i->last_alloc_physical_block + 1; | ||
500 | } | ||
501 | 492 | ||
502 | return ext4_find_near(inode, partial); | 493 | return ext4_find_near(inode, partial); |
503 | } | 494 | } |
@@ -630,7 +621,7 @@ allocated: | |||
630 | *err = 0; | 621 | *err = 0; |
631 | return ret; | 622 | return ret; |
632 | failed_out: | 623 | failed_out: |
633 | for (i = 0; i <index; i++) | 624 | for (i = 0; i < index; i++) |
634 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 625 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
635 | return ret; | 626 | return ret; |
636 | } | 627 | } |
@@ -703,7 +694,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
703 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | 694 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; |
704 | branch[n].key = cpu_to_le32(new_blocks[n]); | 695 | branch[n].key = cpu_to_le32(new_blocks[n]); |
705 | *branch[n].p = branch[n].key; | 696 | *branch[n].p = branch[n].key; |
706 | if ( n == indirect_blks) { | 697 | if (n == indirect_blks) { |
707 | current_block = new_blocks[n]; | 698 | current_block = new_blocks[n]; |
708 | /* | 699 | /* |
709 | * End of chain, update the last new metablock of | 700 | * End of chain, update the last new metablock of |
@@ -730,7 +721,7 @@ failed: | |||
730 | BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); | 721 | BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); |
731 | ext4_journal_forget(handle, branch[i].bh); | 722 | ext4_journal_forget(handle, branch[i].bh); |
732 | } | 723 | } |
733 | for (i = 0; i <indirect_blks; i++) | 724 | for (i = 0; i < indirect_blks; i++) |
734 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 725 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
735 | 726 | ||
736 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); | 727 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); |
@@ -757,10 +748,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
757 | { | 748 | { |
758 | int i; | 749 | int i; |
759 | int err = 0; | 750 | int err = 0; |
760 | struct ext4_block_alloc_info *block_i; | ||
761 | ext4_fsblk_t current_block; | 751 | ext4_fsblk_t current_block; |
762 | 752 | ||
763 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
764 | /* | 753 | /* |
765 | * If we're splicing into a [td]indirect block (as opposed to the | 754 | * If we're splicing into a [td]indirect block (as opposed to the |
766 | * inode) then we need to get write access to the [td]indirect block | 755 | * inode) then we need to get write access to the [td]indirect block |
@@ -783,18 +772,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
783 | if (num == 0 && blks > 1) { | 772 | if (num == 0 && blks > 1) { |
784 | current_block = le32_to_cpu(where->key) + 1; | 773 | current_block = le32_to_cpu(where->key) + 1; |
785 | for (i = 1; i < blks; i++) | 774 | for (i = 1; i < blks; i++) |
786 | *(where->p + i ) = cpu_to_le32(current_block++); | 775 | *(where->p + i) = cpu_to_le32(current_block++); |
787 | } | ||
788 | |||
789 | /* | ||
790 | * update the most recently allocated logical & physical block | ||
791 | * in i_block_alloc_info, to assist find the proper goal block for next | ||
792 | * allocation | ||
793 | */ | ||
794 | if (block_i) { | ||
795 | block_i->last_alloc_logical_block = block + blks - 1; | ||
796 | block_i->last_alloc_physical_block = | ||
797 | le32_to_cpu(where[num].key) + blks - 1; | ||
798 | } | 776 | } |
799 | 777 | ||
800 | /* We are done with atomic stuff, now do the rest of housekeeping */ | 778 | /* We are done with atomic stuff, now do the rest of housekeeping */ |
@@ -914,12 +892,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
914 | goto cleanup; | 892 | goto cleanup; |
915 | 893 | ||
916 | /* | 894 | /* |
917 | * Okay, we need to do block allocation. Lazily initialize the block | 895 | * Okay, we need to do block allocation. |
918 | * allocation info here if necessary | ||
919 | */ | 896 | */ |
920 | if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) | ||
921 | ext4_init_block_alloc_info(inode); | ||
922 | |||
923 | goal = ext4_find_goal(inode, iblock, partial); | 897 | goal = ext4_find_goal(inode, iblock, partial); |
924 | 898 | ||
925 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 899 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
@@ -1030,19 +1004,20 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1030 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1004 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); |
1031 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1005 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; |
1032 | 1006 | ||
1033 | /* Account for allocated meta_blocks */ | 1007 | if (mdb_free) { |
1034 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | 1008 | /* Account for allocated meta_blocks */ |
1009 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | ||
1035 | 1010 | ||
1036 | /* update fs free blocks counter for truncate case */ | 1011 | /* update fs dirty blocks counter */ |
1037 | percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); | 1012 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); |
1013 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1014 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1015 | } | ||
1038 | 1016 | ||
1039 | /* update per-inode reservations */ | 1017 | /* update per-inode reservations */ |
1040 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); | 1018 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); |
1041 | EXT4_I(inode)->i_reserved_data_blocks -= used; | 1019 | EXT4_I(inode)->i_reserved_data_blocks -= used; |
1042 | 1020 | ||
1043 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
1044 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1045 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1046 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1021 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1047 | } | 1022 | } |
1048 | 1023 | ||
@@ -1160,8 +1135,8 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1160 | /* Maximum number of blocks we map for direct IO at once. */ | 1135 | /* Maximum number of blocks we map for direct IO at once. */ |
1161 | #define DIO_MAX_BLOCKS 4096 | 1136 | #define DIO_MAX_BLOCKS 4096 |
1162 | 1137 | ||
1163 | static int ext4_get_block(struct inode *inode, sector_t iblock, | 1138 | int ext4_get_block(struct inode *inode, sector_t iblock, |
1164 | struct buffer_head *bh_result, int create) | 1139 | struct buffer_head *bh_result, int create) |
1165 | { | 1140 | { |
1166 | handle_t *handle = ext4_journal_current_handle(); | 1141 | handle_t *handle = ext4_journal_current_handle(); |
1167 | int ret = 0, started = 0; | 1142 | int ret = 0, started = 0; |
@@ -1241,7 +1216,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
1241 | BUFFER_TRACE(bh, "call get_create_access"); | 1216 | BUFFER_TRACE(bh, "call get_create_access"); |
1242 | fatal = ext4_journal_get_create_access(handle, bh); | 1217 | fatal = ext4_journal_get_create_access(handle, bh); |
1243 | if (!fatal && !buffer_uptodate(bh)) { | 1218 | if (!fatal && !buffer_uptodate(bh)) { |
1244 | memset(bh->b_data,0,inode->i_sb->s_blocksize); | 1219 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
1245 | set_buffer_uptodate(bh); | 1220 | set_buffer_uptodate(bh); |
1246 | } | 1221 | } |
1247 | unlock_buffer(bh); | 1222 | unlock_buffer(bh); |
@@ -1266,7 +1241,7 @@ err: | |||
1266 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 1241 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
1267 | ext4_lblk_t block, int create, int *err) | 1242 | ext4_lblk_t block, int create, int *err) |
1268 | { | 1243 | { |
1269 | struct buffer_head * bh; | 1244 | struct buffer_head *bh; |
1270 | 1245 | ||
1271 | bh = ext4_getblk(handle, inode, block, create, err); | 1246 | bh = ext4_getblk(handle, inode, block, create, err); |
1272 | if (!bh) | 1247 | if (!bh) |
@@ -1282,13 +1257,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | |||
1282 | return NULL; | 1257 | return NULL; |
1283 | } | 1258 | } |
1284 | 1259 | ||
1285 | static int walk_page_buffers( handle_t *handle, | 1260 | static int walk_page_buffers(handle_t *handle, |
1286 | struct buffer_head *head, | 1261 | struct buffer_head *head, |
1287 | unsigned from, | 1262 | unsigned from, |
1288 | unsigned to, | 1263 | unsigned to, |
1289 | int *partial, | 1264 | int *partial, |
1290 | int (*fn)( handle_t *handle, | 1265 | int (*fn)(handle_t *handle, |
1291 | struct buffer_head *bh)) | 1266 | struct buffer_head *bh)) |
1292 | { | 1267 | { |
1293 | struct buffer_head *bh; | 1268 | struct buffer_head *bh; |
1294 | unsigned block_start, block_end; | 1269 | unsigned block_start, block_end; |
@@ -1296,9 +1271,9 @@ static int walk_page_buffers( handle_t *handle, | |||
1296 | int err, ret = 0; | 1271 | int err, ret = 0; |
1297 | struct buffer_head *next; | 1272 | struct buffer_head *next; |
1298 | 1273 | ||
1299 | for ( bh = head, block_start = 0; | 1274 | for (bh = head, block_start = 0; |
1300 | ret == 0 && (bh != head || !block_start); | 1275 | ret == 0 && (bh != head || !block_start); |
1301 | block_start = block_end, bh = next) | 1276 | block_start = block_end, bh = next) |
1302 | { | 1277 | { |
1303 | next = bh->b_this_page; | 1278 | next = bh->b_this_page; |
1304 | block_end = block_start + blocksize; | 1279 | block_end = block_start + blocksize; |
@@ -1351,23 +1326,23 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1351 | loff_t pos, unsigned len, unsigned flags, | 1326 | loff_t pos, unsigned len, unsigned flags, |
1352 | struct page **pagep, void **fsdata) | 1327 | struct page **pagep, void **fsdata) |
1353 | { | 1328 | { |
1354 | struct inode *inode = mapping->host; | 1329 | struct inode *inode = mapping->host; |
1355 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); | 1330 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); |
1356 | handle_t *handle; | 1331 | handle_t *handle; |
1357 | int retries = 0; | 1332 | int retries = 0; |
1358 | struct page *page; | 1333 | struct page *page; |
1359 | pgoff_t index; | 1334 | pgoff_t index; |
1360 | unsigned from, to; | 1335 | unsigned from, to; |
1361 | 1336 | ||
1362 | index = pos >> PAGE_CACHE_SHIFT; | 1337 | index = pos >> PAGE_CACHE_SHIFT; |
1363 | from = pos & (PAGE_CACHE_SIZE - 1); | 1338 | from = pos & (PAGE_CACHE_SIZE - 1); |
1364 | to = from + len; | 1339 | to = from + len; |
1365 | 1340 | ||
1366 | retry: | 1341 | retry: |
1367 | handle = ext4_journal_start(inode, needed_blocks); | 1342 | handle = ext4_journal_start(inode, needed_blocks); |
1368 | if (IS_ERR(handle)) { | 1343 | if (IS_ERR(handle)) { |
1369 | ret = PTR_ERR(handle); | 1344 | ret = PTR_ERR(handle); |
1370 | goto out; | 1345 | goto out; |
1371 | } | 1346 | } |
1372 | 1347 | ||
1373 | page = __grab_cache_page(mapping, index); | 1348 | page = __grab_cache_page(mapping, index); |
@@ -1387,9 +1362,16 @@ retry: | |||
1387 | } | 1362 | } |
1388 | 1363 | ||
1389 | if (ret) { | 1364 | if (ret) { |
1390 | unlock_page(page); | 1365 | unlock_page(page); |
1391 | ext4_journal_stop(handle); | 1366 | ext4_journal_stop(handle); |
1392 | page_cache_release(page); | 1367 | page_cache_release(page); |
1368 | /* | ||
1369 | * block_write_begin may have instantiated a few blocks | ||
1370 | * outside i_size. Trim these off again. Don't need | ||
1371 | * i_size_read because we hold i_mutex. | ||
1372 | */ | ||
1373 | if (pos + len > inode->i_size) | ||
1374 | vmtruncate(inode, inode->i_size); | ||
1393 | } | 1375 | } |
1394 | 1376 | ||
1395 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 1377 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -1426,16 +1408,18 @@ static int ext4_ordered_write_end(struct file *file, | |||
1426 | ret = ext4_jbd2_file_inode(handle, inode); | 1408 | ret = ext4_jbd2_file_inode(handle, inode); |
1427 | 1409 | ||
1428 | if (ret == 0) { | 1410 | if (ret == 0) { |
1429 | /* | ||
1430 | * generic_write_end() will run mark_inode_dirty() if i_size | ||
1431 | * changes. So let's piggyback the i_disksize mark_inode_dirty | ||
1432 | * into that. | ||
1433 | */ | ||
1434 | loff_t new_i_size; | 1411 | loff_t new_i_size; |
1435 | 1412 | ||
1436 | new_i_size = pos + copied; | 1413 | new_i_size = pos + copied; |
1437 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1414 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1438 | EXT4_I(inode)->i_disksize = new_i_size; | 1415 | ext4_update_i_disksize(inode, new_i_size); |
1416 | /* We need to mark inode dirty even if | ||
1417 | * new_i_size is less that inode->i_size | ||
1418 | * bu greater than i_disksize.(hint delalloc) | ||
1419 | */ | ||
1420 | ext4_mark_inode_dirty(handle, inode); | ||
1421 | } | ||
1422 | |||
1439 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 1423 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
1440 | page, fsdata); | 1424 | page, fsdata); |
1441 | copied = ret2; | 1425 | copied = ret2; |
@@ -1460,8 +1444,14 @@ static int ext4_writeback_write_end(struct file *file, | |||
1460 | loff_t new_i_size; | 1444 | loff_t new_i_size; |
1461 | 1445 | ||
1462 | new_i_size = pos + copied; | 1446 | new_i_size = pos + copied; |
1463 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1447 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1464 | EXT4_I(inode)->i_disksize = new_i_size; | 1448 | ext4_update_i_disksize(inode, new_i_size); |
1449 | /* We need to mark inode dirty even if | ||
1450 | * new_i_size is less that inode->i_size | ||
1451 | * bu greater than i_disksize.(hint delalloc) | ||
1452 | */ | ||
1453 | ext4_mark_inode_dirty(handle, inode); | ||
1454 | } | ||
1465 | 1455 | ||
1466 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 1456 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
1467 | page, fsdata); | 1457 | page, fsdata); |
@@ -1486,6 +1476,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1486 | int ret = 0, ret2; | 1476 | int ret = 0, ret2; |
1487 | int partial = 0; | 1477 | int partial = 0; |
1488 | unsigned from, to; | 1478 | unsigned from, to; |
1479 | loff_t new_i_size; | ||
1489 | 1480 | ||
1490 | from = pos & (PAGE_CACHE_SIZE - 1); | 1481 | from = pos & (PAGE_CACHE_SIZE - 1); |
1491 | to = from + len; | 1482 | to = from + len; |
@@ -1500,11 +1491,12 @@ static int ext4_journalled_write_end(struct file *file, | |||
1500 | to, &partial, write_end_fn); | 1491 | to, &partial, write_end_fn); |
1501 | if (!partial) | 1492 | if (!partial) |
1502 | SetPageUptodate(page); | 1493 | SetPageUptodate(page); |
1503 | if (pos+copied > inode->i_size) | 1494 | new_i_size = pos + copied; |
1495 | if (new_i_size > inode->i_size) | ||
1504 | i_size_write(inode, pos+copied); | 1496 | i_size_write(inode, pos+copied); |
1505 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 1497 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; |
1506 | if (inode->i_size > EXT4_I(inode)->i_disksize) { | 1498 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1507 | EXT4_I(inode)->i_disksize = inode->i_size; | 1499 | ext4_update_i_disksize(inode, new_i_size); |
1508 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1500 | ret2 = ext4_mark_inode_dirty(handle, inode); |
1509 | if (!ret) | 1501 | if (!ret) |
1510 | ret = ret2; | 1502 | ret = ret2; |
@@ -1521,6 +1513,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1521 | 1513 | ||
1522 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | 1514 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) |
1523 | { | 1515 | { |
1516 | int retries = 0; | ||
1524 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1517 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1525 | unsigned long md_needed, mdblocks, total = 0; | 1518 | unsigned long md_needed, mdblocks, total = 0; |
1526 | 1519 | ||
@@ -1529,6 +1522,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1529 | * in order to allocate nrblocks | 1522 | * in order to allocate nrblocks |
1530 | * worse case is one extent per block | 1523 | * worse case is one extent per block |
1531 | */ | 1524 | */ |
1525 | repeat: | ||
1532 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1526 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1533 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | 1527 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; |
1534 | mdblocks = ext4_calc_metadata_amount(inode, total); | 1528 | mdblocks = ext4_calc_metadata_amount(inode, total); |
@@ -1537,13 +1531,14 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1537 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | 1531 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; |
1538 | total = md_needed + nrblocks; | 1532 | total = md_needed + nrblocks; |
1539 | 1533 | ||
1540 | if (ext4_has_free_blocks(sbi, total) < total) { | 1534 | if (ext4_claim_free_blocks(sbi, total)) { |
1541 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1535 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1536 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
1537 | yield(); | ||
1538 | goto repeat; | ||
1539 | } | ||
1542 | return -ENOSPC; | 1540 | return -ENOSPC; |
1543 | } | 1541 | } |
1544 | /* reduce fs free blocks counter */ | ||
1545 | percpu_counter_sub(&sbi->s_freeblocks_counter, total); | ||
1546 | |||
1547 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | 1542 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; |
1548 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; | 1543 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; |
1549 | 1544 | ||
@@ -1585,8 +1580,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1585 | 1580 | ||
1586 | release = to_free + mdb_free; | 1581 | release = to_free + mdb_free; |
1587 | 1582 | ||
1588 | /* update fs free blocks counter for truncate case */ | 1583 | /* update fs dirty blocks counter for truncate case */ |
1589 | percpu_counter_add(&sbi->s_freeblocks_counter, release); | 1584 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); |
1590 | 1585 | ||
1591 | /* update per-inode reservations */ | 1586 | /* update per-inode reservations */ |
1592 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); | 1587 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); |
@@ -1630,6 +1625,7 @@ struct mpage_da_data { | |||
1630 | struct writeback_control *wbc; | 1625 | struct writeback_control *wbc; |
1631 | int io_done; | 1626 | int io_done; |
1632 | long pages_written; | 1627 | long pages_written; |
1628 | int retval; | ||
1633 | }; | 1629 | }; |
1634 | 1630 | ||
1635 | /* | 1631 | /* |
@@ -1783,6 +1779,57 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
1783 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | 1779 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); |
1784 | } | 1780 | } |
1785 | 1781 | ||
1782 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | ||
1783 | sector_t logical, long blk_cnt) | ||
1784 | { | ||
1785 | int nr_pages, i; | ||
1786 | pgoff_t index, end; | ||
1787 | struct pagevec pvec; | ||
1788 | struct inode *inode = mpd->inode; | ||
1789 | struct address_space *mapping = inode->i_mapping; | ||
1790 | |||
1791 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1792 | end = (logical + blk_cnt - 1) >> | ||
1793 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1794 | while (index <= end) { | ||
1795 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
1796 | if (nr_pages == 0) | ||
1797 | break; | ||
1798 | for (i = 0; i < nr_pages; i++) { | ||
1799 | struct page *page = pvec.pages[i]; | ||
1800 | index = page->index; | ||
1801 | if (index > end) | ||
1802 | break; | ||
1803 | index++; | ||
1804 | |||
1805 | BUG_ON(!PageLocked(page)); | ||
1806 | BUG_ON(PageWriteback(page)); | ||
1807 | block_invalidatepage(page, 0); | ||
1808 | ClearPageUptodate(page); | ||
1809 | unlock_page(page); | ||
1810 | } | ||
1811 | } | ||
1812 | return; | ||
1813 | } | ||
1814 | |||
1815 | static void ext4_print_free_blocks(struct inode *inode) | ||
1816 | { | ||
1817 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1818 | printk(KERN_EMERG "Total free blocks count %lld\n", | ||
1819 | ext4_count_free_blocks(inode->i_sb)); | ||
1820 | printk(KERN_EMERG "Free/Dirty block details\n"); | ||
1821 | printk(KERN_EMERG "free_blocks=%lld\n", | ||
1822 | percpu_counter_sum(&sbi->s_freeblocks_counter)); | ||
1823 | printk(KERN_EMERG "dirty_blocks=%lld\n", | ||
1824 | percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | ||
1825 | printk(KERN_EMERG "Block reservation details\n"); | ||
1826 | printk(KERN_EMERG "i_reserved_data_blocks=%lu\n", | ||
1827 | EXT4_I(inode)->i_reserved_data_blocks); | ||
1828 | printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n", | ||
1829 | EXT4_I(inode)->i_reserved_meta_blocks); | ||
1830 | return; | ||
1831 | } | ||
1832 | |||
1786 | /* | 1833 | /* |
1787 | * mpage_da_map_blocks - go through given space | 1834 | * mpage_da_map_blocks - go through given space |
1788 | * | 1835 | * |
@@ -1792,32 +1839,69 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
1792 | * The function skips space we know is already mapped to disk blocks. | 1839 | * The function skips space we know is already mapped to disk blocks. |
1793 | * | 1840 | * |
1794 | */ | 1841 | */ |
1795 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | 1842 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
1796 | { | 1843 | { |
1797 | int err = 0; | 1844 | int err = 0; |
1798 | struct buffer_head *lbh = &mpd->lbh; | ||
1799 | sector_t next = lbh->b_blocknr; | ||
1800 | struct buffer_head new; | 1845 | struct buffer_head new; |
1846 | struct buffer_head *lbh = &mpd->lbh; | ||
1847 | sector_t next; | ||
1801 | 1848 | ||
1802 | /* | 1849 | /* |
1803 | * We consider only non-mapped and non-allocated blocks | 1850 | * We consider only non-mapped and non-allocated blocks |
1804 | */ | 1851 | */ |
1805 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | 1852 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) |
1806 | return; | 1853 | return 0; |
1807 | |||
1808 | new.b_state = lbh->b_state; | 1854 | new.b_state = lbh->b_state; |
1809 | new.b_blocknr = 0; | 1855 | new.b_blocknr = 0; |
1810 | new.b_size = lbh->b_size; | 1856 | new.b_size = lbh->b_size; |
1811 | 1857 | next = lbh->b_blocknr; | |
1812 | /* | 1858 | /* |
1813 | * If we didn't accumulate anything | 1859 | * If we didn't accumulate anything |
1814 | * to write simply return | 1860 | * to write simply return |
1815 | */ | 1861 | */ |
1816 | if (!new.b_size) | 1862 | if (!new.b_size) |
1817 | return; | 1863 | return 0; |
1818 | err = mpd->get_block(mpd->inode, next, &new, 1); | 1864 | err = mpd->get_block(mpd->inode, next, &new, 1); |
1819 | if (err) | 1865 | if (err) { |
1820 | return; | 1866 | |
1867 | /* If get block returns with error | ||
1868 | * we simply return. Later writepage | ||
1869 | * will redirty the page and writepages | ||
1870 | * will find the dirty page again | ||
1871 | */ | ||
1872 | if (err == -EAGAIN) | ||
1873 | return 0; | ||
1874 | |||
1875 | if (err == -ENOSPC && | ||
1876 | ext4_count_free_blocks(mpd->inode->i_sb)) { | ||
1877 | mpd->retval = err; | ||
1878 | return 0; | ||
1879 | } | ||
1880 | |||
1881 | /* | ||
1882 | * get block failure will cause us | ||
1883 | * to loop in writepages. Because | ||
1884 | * a_ops->writepage won't be able to | ||
1885 | * make progress. The page will be redirtied | ||
1886 | * by writepage and writepages will again | ||
1887 | * try to write the same. | ||
1888 | */ | ||
1889 | printk(KERN_EMERG "%s block allocation failed for inode %lu " | ||
1890 | "at logical offset %llu with max blocks " | ||
1891 | "%zd with error %d\n", | ||
1892 | __func__, mpd->inode->i_ino, | ||
1893 | (unsigned long long)next, | ||
1894 | lbh->b_size >> mpd->inode->i_blkbits, err); | ||
1895 | printk(KERN_EMERG "This should not happen.!! " | ||
1896 | "Data will be lost\n"); | ||
1897 | if (err == -ENOSPC) { | ||
1898 | ext4_print_free_blocks(mpd->inode); | ||
1899 | } | ||
1900 | /* invlaidate all the pages */ | ||
1901 | ext4_da_block_invalidatepages(mpd, next, | ||
1902 | lbh->b_size >> mpd->inode->i_blkbits); | ||
1903 | return err; | ||
1904 | } | ||
1821 | BUG_ON(new.b_size == 0); | 1905 | BUG_ON(new.b_size == 0); |
1822 | 1906 | ||
1823 | if (buffer_new(&new)) | 1907 | if (buffer_new(&new)) |
@@ -1830,7 +1914,7 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1830 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) | 1914 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) |
1831 | mpage_put_bnr_to_bhs(mpd, next, &new); | 1915 | mpage_put_bnr_to_bhs(mpd, next, &new); |
1832 | 1916 | ||
1833 | return; | 1917 | return 0; |
1834 | } | 1918 | } |
1835 | 1919 | ||
1836 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 1920 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
@@ -1899,8 +1983,8 @@ flush_it: | |||
1899 | * We couldn't merge the block to our extent, so we | 1983 | * We couldn't merge the block to our extent, so we |
1900 | * need to flush current extent and start new one | 1984 | * need to flush current extent and start new one |
1901 | */ | 1985 | */ |
1902 | mpage_da_map_blocks(mpd); | 1986 | if (mpage_da_map_blocks(mpd) == 0) |
1903 | mpage_da_submit_io(mpd); | 1987 | mpage_da_submit_io(mpd); |
1904 | mpd->io_done = 1; | 1988 | mpd->io_done = 1; |
1905 | return; | 1989 | return; |
1906 | } | 1990 | } |
@@ -1942,8 +2026,8 @@ static int __mpage_da_writepage(struct page *page, | |||
1942 | * and start IO on them using writepage() | 2026 | * and start IO on them using writepage() |
1943 | */ | 2027 | */ |
1944 | if (mpd->next_page != mpd->first_page) { | 2028 | if (mpd->next_page != mpd->first_page) { |
1945 | mpage_da_map_blocks(mpd); | 2029 | if (mpage_da_map_blocks(mpd) == 0) |
1946 | mpage_da_submit_io(mpd); | 2030 | mpage_da_submit_io(mpd); |
1947 | /* | 2031 | /* |
1948 | * skip rest of the page in the page_vec | 2032 | * skip rest of the page in the page_vec |
1949 | */ | 2033 | */ |
@@ -2018,39 +2102,36 @@ static int __mpage_da_writepage(struct page *page, | |||
2018 | */ | 2102 | */ |
2019 | static int mpage_da_writepages(struct address_space *mapping, | 2103 | static int mpage_da_writepages(struct address_space *mapping, |
2020 | struct writeback_control *wbc, | 2104 | struct writeback_control *wbc, |
2021 | get_block_t get_block) | 2105 | struct mpage_da_data *mpd) |
2022 | { | 2106 | { |
2023 | struct mpage_da_data mpd; | ||
2024 | long to_write; | 2107 | long to_write; |
2025 | int ret; | 2108 | int ret; |
2026 | 2109 | ||
2027 | if (!get_block) | 2110 | if (!mpd->get_block) |
2028 | return generic_writepages(mapping, wbc); | 2111 | return generic_writepages(mapping, wbc); |
2029 | 2112 | ||
2030 | mpd.wbc = wbc; | 2113 | mpd->lbh.b_size = 0; |
2031 | mpd.inode = mapping->host; | 2114 | mpd->lbh.b_state = 0; |
2032 | mpd.lbh.b_size = 0; | 2115 | mpd->lbh.b_blocknr = 0; |
2033 | mpd.lbh.b_state = 0; | 2116 | mpd->first_page = 0; |
2034 | mpd.lbh.b_blocknr = 0; | 2117 | mpd->next_page = 0; |
2035 | mpd.first_page = 0; | 2118 | mpd->io_done = 0; |
2036 | mpd.next_page = 0; | 2119 | mpd->pages_written = 0; |
2037 | mpd.get_block = get_block; | 2120 | mpd->retval = 0; |
2038 | mpd.io_done = 0; | ||
2039 | mpd.pages_written = 0; | ||
2040 | 2121 | ||
2041 | to_write = wbc->nr_to_write; | 2122 | to_write = wbc->nr_to_write; |
2042 | 2123 | ||
2043 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | 2124 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); |
2044 | 2125 | ||
2045 | /* | 2126 | /* |
2046 | * Handle last extent of pages | 2127 | * Handle last extent of pages |
2047 | */ | 2128 | */ |
2048 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 2129 | if (!mpd->io_done && mpd->next_page != mpd->first_page) { |
2049 | mpage_da_map_blocks(&mpd); | 2130 | if (mpage_da_map_blocks(mpd) == 0) |
2050 | mpage_da_submit_io(&mpd); | 2131 | mpage_da_submit_io(mpd); |
2051 | } | 2132 | } |
2052 | 2133 | ||
2053 | wbc->nr_to_write = to_write - mpd.pages_written; | 2134 | wbc->nr_to_write = to_write - mpd->pages_written; |
2054 | return ret; | 2135 | return ret; |
2055 | } | 2136 | } |
2056 | 2137 | ||
@@ -2103,18 +2184,24 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2103 | handle_t *handle = NULL; | 2184 | handle_t *handle = NULL; |
2104 | 2185 | ||
2105 | handle = ext4_journal_current_handle(); | 2186 | handle = ext4_journal_current_handle(); |
2106 | if (!handle) { | 2187 | BUG_ON(!handle); |
2107 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | 2188 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, |
2108 | bh_result, 0, 0, 0); | 2189 | bh_result, create, 0, EXT4_DELALLOC_RSVED); |
2109 | BUG_ON(!ret); | ||
2110 | } else { | ||
2111 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
2112 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
2113 | } | ||
2114 | |||
2115 | if (ret > 0) { | 2190 | if (ret > 0) { |
2191 | |||
2116 | bh_result->b_size = (ret << inode->i_blkbits); | 2192 | bh_result->b_size = (ret << inode->i_blkbits); |
2117 | 2193 | ||
2194 | if (ext4_should_order_data(inode)) { | ||
2195 | int retval; | ||
2196 | retval = ext4_jbd2_file_inode(handle, inode); | ||
2197 | if (retval) | ||
2198 | /* | ||
2199 | * Failed to add inode for ordered | ||
2200 | * mode. Don't update file size | ||
2201 | */ | ||
2202 | return retval; | ||
2203 | } | ||
2204 | |||
2118 | /* | 2205 | /* |
2119 | * Update on-disk size along with block allocation | 2206 | * Update on-disk size along with block allocation |
2120 | * we don't use 'extend_disksize' as size may change | 2207 | * we don't use 'extend_disksize' as size may change |
@@ -2124,18 +2211,9 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2124 | if (disksize > i_size_read(inode)) | 2211 | if (disksize > i_size_read(inode)) |
2125 | disksize = i_size_read(inode); | 2212 | disksize = i_size_read(inode); |
2126 | if (disksize > EXT4_I(inode)->i_disksize) { | 2213 | if (disksize > EXT4_I(inode)->i_disksize) { |
2127 | /* | 2214 | ext4_update_i_disksize(inode, disksize); |
2128 | * XXX: replace with spinlock if seen contended -bzzz | 2215 | ret = ext4_mark_inode_dirty(handle, inode); |
2129 | */ | 2216 | return ret; |
2130 | down_write(&EXT4_I(inode)->i_data_sem); | ||
2131 | if (disksize > EXT4_I(inode)->i_disksize) | ||
2132 | EXT4_I(inode)->i_disksize = disksize; | ||
2133 | up_write(&EXT4_I(inode)->i_data_sem); | ||
2134 | |||
2135 | if (EXT4_I(inode)->i_disksize == disksize) { | ||
2136 | ret = ext4_mark_inode_dirty(handle, inode); | ||
2137 | return ret; | ||
2138 | } | ||
2139 | } | 2217 | } |
2140 | ret = 0; | 2218 | ret = 0; |
2141 | } | 2219 | } |
@@ -2284,6 +2362,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2284 | { | 2362 | { |
2285 | handle_t *handle = NULL; | 2363 | handle_t *handle = NULL; |
2286 | loff_t range_start = 0; | 2364 | loff_t range_start = 0; |
2365 | struct mpage_da_data mpd; | ||
2287 | struct inode *inode = mapping->host; | 2366 | struct inode *inode = mapping->host; |
2288 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2367 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
2289 | long to_write, pages_skipped = 0; | 2368 | long to_write, pages_skipped = 0; |
@@ -2317,6 +2396,9 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2317 | range_start = wbc->range_start; | 2396 | range_start = wbc->range_start; |
2318 | pages_skipped = wbc->pages_skipped; | 2397 | pages_skipped = wbc->pages_skipped; |
2319 | 2398 | ||
2399 | mpd.wbc = wbc; | ||
2400 | mpd.inode = mapping->host; | ||
2401 | |||
2320 | restart_loop: | 2402 | restart_loop: |
2321 | to_write = wbc->nr_to_write; | 2403 | to_write = wbc->nr_to_write; |
2322 | while (!ret && to_write > 0) { | 2404 | while (!ret && to_write > 0) { |
@@ -2340,23 +2422,17 @@ restart_loop: | |||
2340 | dump_stack(); | 2422 | dump_stack(); |
2341 | goto out_writepages; | 2423 | goto out_writepages; |
2342 | } | 2424 | } |
2343 | if (ext4_should_order_data(inode)) { | ||
2344 | /* | ||
2345 | * With ordered mode we need to add | ||
2346 | * the inode to the journal handl | ||
2347 | * when we do block allocation. | ||
2348 | */ | ||
2349 | ret = ext4_jbd2_file_inode(handle, inode); | ||
2350 | if (ret) { | ||
2351 | ext4_journal_stop(handle); | ||
2352 | goto out_writepages; | ||
2353 | } | ||
2354 | } | ||
2355 | |||
2356 | to_write -= wbc->nr_to_write; | 2425 | to_write -= wbc->nr_to_write; |
2357 | ret = mpage_da_writepages(mapping, wbc, | 2426 | |
2358 | ext4_da_get_block_write); | 2427 | mpd.get_block = ext4_da_get_block_write; |
2428 | ret = mpage_da_writepages(mapping, wbc, &mpd); | ||
2429 | |||
2359 | ext4_journal_stop(handle); | 2430 | ext4_journal_stop(handle); |
2431 | |||
2432 | if (mpd.retval == -ENOSPC) | ||
2433 | jbd2_journal_force_commit_nested(sbi->s_journal); | ||
2434 | |||
2435 | /* reset the retry count */ | ||
2360 | if (ret == MPAGE_DA_EXTENT_TAIL) { | 2436 | if (ret == MPAGE_DA_EXTENT_TAIL) { |
2361 | /* | 2437 | /* |
2362 | * got one extent now try with | 2438 | * got one extent now try with |
@@ -2391,6 +2467,33 @@ out_writepages: | |||
2391 | return ret; | 2467 | return ret; |
2392 | } | 2468 | } |
2393 | 2469 | ||
2470 | #define FALL_BACK_TO_NONDELALLOC 1 | ||
2471 | static int ext4_nonda_switch(struct super_block *sb) | ||
2472 | { | ||
2473 | s64 free_blocks, dirty_blocks; | ||
2474 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2475 | |||
2476 | /* | ||
2477 | * switch to non delalloc mode if we are running low | ||
2478 | * on free block. The free block accounting via percpu | ||
2479 | * counters can get slightly wrong with FBC_BATCH getting | ||
2480 | * accumulated on each CPU without updating global counters | ||
2481 | * Delalloc need an accurate free block accounting. So switch | ||
2482 | * to non delalloc when we are near to error range. | ||
2483 | */ | ||
2484 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | ||
2485 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); | ||
2486 | if (2 * free_blocks < 3 * dirty_blocks || | ||
2487 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | ||
2488 | /* | ||
2489 | * free block count is less that 150% of dirty blocks | ||
2490 | * or free blocks is less that watermark | ||
2491 | */ | ||
2492 | return 1; | ||
2493 | } | ||
2494 | return 0; | ||
2495 | } | ||
2496 | |||
2394 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | 2497 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, |
2395 | loff_t pos, unsigned len, unsigned flags, | 2498 | loff_t pos, unsigned len, unsigned flags, |
2396 | struct page **pagep, void **fsdata) | 2499 | struct page **pagep, void **fsdata) |
@@ -2406,6 +2509,12 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2406 | from = pos & (PAGE_CACHE_SIZE - 1); | 2509 | from = pos & (PAGE_CACHE_SIZE - 1); |
2407 | to = from + len; | 2510 | to = from + len; |
2408 | 2511 | ||
2512 | if (ext4_nonda_switch(inode->i_sb)) { | ||
2513 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | ||
2514 | return ext4_write_begin(file, mapping, pos, | ||
2515 | len, flags, pagep, fsdata); | ||
2516 | } | ||
2517 | *fsdata = (void *)0; | ||
2409 | retry: | 2518 | retry: |
2410 | /* | 2519 | /* |
2411 | * With delayed allocation, we don't log the i_disksize update | 2520 | * With delayed allocation, we don't log the i_disksize update |
@@ -2433,6 +2542,13 @@ retry: | |||
2433 | unlock_page(page); | 2542 | unlock_page(page); |
2434 | ext4_journal_stop(handle); | 2543 | ext4_journal_stop(handle); |
2435 | page_cache_release(page); | 2544 | page_cache_release(page); |
2545 | /* | ||
2546 | * block_write_begin may have instantiated a few blocks | ||
2547 | * outside i_size. Trim these off again. Don't need | ||
2548 | * i_size_read because we hold i_mutex. | ||
2549 | */ | ||
2550 | if (pos + len > inode->i_size) | ||
2551 | vmtruncate(inode, inode->i_size); | ||
2436 | } | 2552 | } |
2437 | 2553 | ||
2438 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2554 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -2456,7 +2572,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, | |||
2456 | bh = page_buffers(page); | 2572 | bh = page_buffers(page); |
2457 | idx = offset >> inode->i_blkbits; | 2573 | idx = offset >> inode->i_blkbits; |
2458 | 2574 | ||
2459 | for (i=0; i < idx; i++) | 2575 | for (i = 0; i < idx; i++) |
2460 | bh = bh->b_this_page; | 2576 | bh = bh->b_this_page; |
2461 | 2577 | ||
2462 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | 2578 | if (!buffer_mapped(bh) || (buffer_delay(bh))) |
@@ -2474,9 +2590,22 @@ static int ext4_da_write_end(struct file *file, | |||
2474 | handle_t *handle = ext4_journal_current_handle(); | 2590 | handle_t *handle = ext4_journal_current_handle(); |
2475 | loff_t new_i_size; | 2591 | loff_t new_i_size; |
2476 | unsigned long start, end; | 2592 | unsigned long start, end; |
2593 | int write_mode = (int)(unsigned long)fsdata; | ||
2594 | |||
2595 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | ||
2596 | if (ext4_should_order_data(inode)) { | ||
2597 | return ext4_ordered_write_end(file, mapping, pos, | ||
2598 | len, copied, page, fsdata); | ||
2599 | } else if (ext4_should_writeback_data(inode)) { | ||
2600 | return ext4_writeback_write_end(file, mapping, pos, | ||
2601 | len, copied, page, fsdata); | ||
2602 | } else { | ||
2603 | BUG(); | ||
2604 | } | ||
2605 | } | ||
2477 | 2606 | ||
2478 | start = pos & (PAGE_CACHE_SIZE - 1); | 2607 | start = pos & (PAGE_CACHE_SIZE - 1); |
2479 | end = start + copied -1; | 2608 | end = start + copied - 1; |
2480 | 2609 | ||
2481 | /* | 2610 | /* |
2482 | * generic_write_end() will run mark_inode_dirty() if i_size | 2611 | * generic_write_end() will run mark_inode_dirty() if i_size |
@@ -2500,6 +2629,11 @@ static int ext4_da_write_end(struct file *file, | |||
2500 | EXT4_I(inode)->i_disksize = new_i_size; | 2629 | EXT4_I(inode)->i_disksize = new_i_size; |
2501 | } | 2630 | } |
2502 | up_write(&EXT4_I(inode)->i_data_sem); | 2631 | up_write(&EXT4_I(inode)->i_data_sem); |
2632 | /* We need to mark inode dirty even if | ||
2633 | * new_i_size is less that inode->i_size | ||
2634 | * bu greater than i_disksize.(hint delalloc) | ||
2635 | */ | ||
2636 | ext4_mark_inode_dirty(handle, inode); | ||
2503 | } | 2637 | } |
2504 | } | 2638 | } |
2505 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2639 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
@@ -2591,7 +2725,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
2591 | return 0; | 2725 | return 0; |
2592 | } | 2726 | } |
2593 | 2727 | ||
2594 | return generic_block_bmap(mapping,block,ext4_get_block); | 2728 | return generic_block_bmap(mapping, block, ext4_get_block); |
2595 | } | 2729 | } |
2596 | 2730 | ||
2597 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 2731 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
@@ -3197,7 +3331,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, | |||
3197 | if (!partial->key && *partial->p) | 3331 | if (!partial->key && *partial->p) |
3198 | /* Writer: end */ | 3332 | /* Writer: end */ |
3199 | goto no_top; | 3333 | goto no_top; |
3200 | for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) | 3334 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) |
3201 | ; | 3335 | ; |
3202 | /* | 3336 | /* |
3203 | * OK, we've found the last block that must survive. The rest of our | 3337 | * OK, we've found the last block that must survive. The rest of our |
@@ -3216,7 +3350,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, | |||
3216 | } | 3350 | } |
3217 | /* Writer: end */ | 3351 | /* Writer: end */ |
3218 | 3352 | ||
3219 | while(partial > p) { | 3353 | while (partial > p) { |
3220 | brelse(partial->bh); | 3354 | brelse(partial->bh); |
3221 | partial--; | 3355 | partial--; |
3222 | } | 3356 | } |
@@ -3408,9 +3542,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
3408 | /* This zaps the entire block. Bottom up. */ | 3542 | /* This zaps the entire block. Bottom up. */ |
3409 | BUFFER_TRACE(bh, "free child branches"); | 3543 | BUFFER_TRACE(bh, "free child branches"); |
3410 | ext4_free_branches(handle, inode, bh, | 3544 | ext4_free_branches(handle, inode, bh, |
3411 | (__le32*)bh->b_data, | 3545 | (__le32 *) bh->b_data, |
3412 | (__le32*)bh->b_data + addr_per_block, | 3546 | (__le32 *) bh->b_data + addr_per_block, |
3413 | depth); | 3547 | depth); |
3414 | 3548 | ||
3415 | /* | 3549 | /* |
3416 | * We've probably journalled the indirect block several | 3550 | * We've probably journalled the indirect block several |
@@ -3578,7 +3712,7 @@ void ext4_truncate(struct inode *inode) | |||
3578 | */ | 3712 | */ |
3579 | down_write(&ei->i_data_sem); | 3713 | down_write(&ei->i_data_sem); |
3580 | 3714 | ||
3581 | ext4_discard_reservation(inode); | 3715 | ext4_discard_preallocations(inode); |
3582 | 3716 | ||
3583 | /* | 3717 | /* |
3584 | * The orphan list entry will now protect us from any crash which | 3718 | * The orphan list entry will now protect us from any crash which |
@@ -3673,41 +3807,6 @@ out_stop: | |||
3673 | ext4_journal_stop(handle); | 3807 | ext4_journal_stop(handle); |
3674 | } | 3808 | } |
3675 | 3809 | ||
3676 | static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | ||
3677 | unsigned long ino, struct ext4_iloc *iloc) | ||
3678 | { | ||
3679 | ext4_group_t block_group; | ||
3680 | unsigned long offset; | ||
3681 | ext4_fsblk_t block; | ||
3682 | struct ext4_group_desc *gdp; | ||
3683 | |||
3684 | if (!ext4_valid_inum(sb, ino)) { | ||
3685 | /* | ||
3686 | * This error is already checked for in namei.c unless we are | ||
3687 | * looking at an NFS filehandle, in which case no error | ||
3688 | * report is needed | ||
3689 | */ | ||
3690 | return 0; | ||
3691 | } | ||
3692 | |||
3693 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | ||
3694 | gdp = ext4_get_group_desc(sb, block_group, NULL); | ||
3695 | if (!gdp) | ||
3696 | return 0; | ||
3697 | |||
3698 | /* | ||
3699 | * Figure out the offset within the block group inode table | ||
3700 | */ | ||
3701 | offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) * | ||
3702 | EXT4_INODE_SIZE(sb); | ||
3703 | block = ext4_inode_table(sb, gdp) + | ||
3704 | (offset >> EXT4_BLOCK_SIZE_BITS(sb)); | ||
3705 | |||
3706 | iloc->block_group = block_group; | ||
3707 | iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1); | ||
3708 | return block; | ||
3709 | } | ||
3710 | |||
3711 | /* | 3810 | /* |
3712 | * ext4_get_inode_loc returns with an extra refcount against the inode's | 3811 | * ext4_get_inode_loc returns with an extra refcount against the inode's |
3713 | * underlying buffer_head on success. If 'in_mem' is true, we have all | 3812 | * underlying buffer_head on success. If 'in_mem' is true, we have all |
@@ -3717,19 +3816,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | |||
3717 | static int __ext4_get_inode_loc(struct inode *inode, | 3816 | static int __ext4_get_inode_loc(struct inode *inode, |
3718 | struct ext4_iloc *iloc, int in_mem) | 3817 | struct ext4_iloc *iloc, int in_mem) |
3719 | { | 3818 | { |
3720 | ext4_fsblk_t block; | 3819 | struct ext4_group_desc *gdp; |
3721 | struct buffer_head *bh; | 3820 | struct buffer_head *bh; |
3821 | struct super_block *sb = inode->i_sb; | ||
3822 | ext4_fsblk_t block; | ||
3823 | int inodes_per_block, inode_offset; | ||
3824 | |||
3825 | iloc->bh = 0; | ||
3826 | if (!ext4_valid_inum(sb, inode->i_ino)) | ||
3827 | return -EIO; | ||
3722 | 3828 | ||
3723 | block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); | 3829 | iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); |
3724 | if (!block) | 3830 | gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); |
3831 | if (!gdp) | ||
3725 | return -EIO; | 3832 | return -EIO; |
3726 | 3833 | ||
3727 | bh = sb_getblk(inode->i_sb, block); | 3834 | /* |
3835 | * Figure out the offset within the block group inode table | ||
3836 | */ | ||
3837 | inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); | ||
3838 | inode_offset = ((inode->i_ino - 1) % | ||
3839 | EXT4_INODES_PER_GROUP(sb)); | ||
3840 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | ||
3841 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); | ||
3842 | |||
3843 | bh = sb_getblk(sb, block); | ||
3728 | if (!bh) { | 3844 | if (!bh) { |
3729 | ext4_error (inode->i_sb, "ext4_get_inode_loc", | 3845 | ext4_error(sb, "ext4_get_inode_loc", "unable to read " |
3730 | "unable to read inode block - " | 3846 | "inode block - inode=%lu, block=%llu", |
3731 | "inode=%lu, block=%llu", | 3847 | inode->i_ino, block); |
3732 | inode->i_ino, block); | ||
3733 | return -EIO; | 3848 | return -EIO; |
3734 | } | 3849 | } |
3735 | if (!buffer_uptodate(bh)) { | 3850 | if (!buffer_uptodate(bh)) { |
@@ -3757,28 +3872,12 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3757 | */ | 3872 | */ |
3758 | if (in_mem) { | 3873 | if (in_mem) { |
3759 | struct buffer_head *bitmap_bh; | 3874 | struct buffer_head *bitmap_bh; |
3760 | struct ext4_group_desc *desc; | 3875 | int i, start; |
3761 | int inodes_per_buffer; | ||
3762 | int inode_offset, i; | ||
3763 | ext4_group_t block_group; | ||
3764 | int start; | ||
3765 | |||
3766 | block_group = (inode->i_ino - 1) / | ||
3767 | EXT4_INODES_PER_GROUP(inode->i_sb); | ||
3768 | inodes_per_buffer = bh->b_size / | ||
3769 | EXT4_INODE_SIZE(inode->i_sb); | ||
3770 | inode_offset = ((inode->i_ino - 1) % | ||
3771 | EXT4_INODES_PER_GROUP(inode->i_sb)); | ||
3772 | start = inode_offset & ~(inodes_per_buffer - 1); | ||
3773 | 3876 | ||
3774 | /* Is the inode bitmap in cache? */ | 3877 | start = inode_offset & ~(inodes_per_block - 1); |
3775 | desc = ext4_get_group_desc(inode->i_sb, | ||
3776 | block_group, NULL); | ||
3777 | if (!desc) | ||
3778 | goto make_io; | ||
3779 | 3878 | ||
3780 | bitmap_bh = sb_getblk(inode->i_sb, | 3879 | /* Is the inode bitmap in cache? */ |
3781 | ext4_inode_bitmap(inode->i_sb, desc)); | 3880 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); |
3782 | if (!bitmap_bh) | 3881 | if (!bitmap_bh) |
3783 | goto make_io; | 3882 | goto make_io; |
3784 | 3883 | ||
@@ -3791,14 +3890,14 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3791 | brelse(bitmap_bh); | 3890 | brelse(bitmap_bh); |
3792 | goto make_io; | 3891 | goto make_io; |
3793 | } | 3892 | } |
3794 | for (i = start; i < start + inodes_per_buffer; i++) { | 3893 | for (i = start; i < start + inodes_per_block; i++) { |
3795 | if (i == inode_offset) | 3894 | if (i == inode_offset) |
3796 | continue; | 3895 | continue; |
3797 | if (ext4_test_bit(i, bitmap_bh->b_data)) | 3896 | if (ext4_test_bit(i, bitmap_bh->b_data)) |
3798 | break; | 3897 | break; |
3799 | } | 3898 | } |
3800 | brelse(bitmap_bh); | 3899 | brelse(bitmap_bh); |
3801 | if (i == start + inodes_per_buffer) { | 3900 | if (i == start + inodes_per_block) { |
3802 | /* all other inodes are free, so skip I/O */ | 3901 | /* all other inodes are free, so skip I/O */ |
3803 | memset(bh->b_data, 0, bh->b_size); | 3902 | memset(bh->b_data, 0, bh->b_size); |
3804 | set_buffer_uptodate(bh); | 3903 | set_buffer_uptodate(bh); |
@@ -3809,6 +3908,36 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3809 | 3908 | ||
3810 | make_io: | 3909 | make_io: |
3811 | /* | 3910 | /* |
3911 | * If we need to do any I/O, try to pre-readahead extra | ||
3912 | * blocks from the inode table. | ||
3913 | */ | ||
3914 | if (EXT4_SB(sb)->s_inode_readahead_blks) { | ||
3915 | ext4_fsblk_t b, end, table; | ||
3916 | unsigned num; | ||
3917 | |||
3918 | table = ext4_inode_table(sb, gdp); | ||
3919 | /* Make sure s_inode_readahead_blks is a power of 2 */ | ||
3920 | while (EXT4_SB(sb)->s_inode_readahead_blks & | ||
3921 | (EXT4_SB(sb)->s_inode_readahead_blks-1)) | ||
3922 | EXT4_SB(sb)->s_inode_readahead_blks = | ||
3923 | (EXT4_SB(sb)->s_inode_readahead_blks & | ||
3924 | (EXT4_SB(sb)->s_inode_readahead_blks-1)); | ||
3925 | b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); | ||
3926 | if (table > b) | ||
3927 | b = table; | ||
3928 | end = b + EXT4_SB(sb)->s_inode_readahead_blks; | ||
3929 | num = EXT4_INODES_PER_GROUP(sb); | ||
3930 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
3931 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
3932 | num -= le16_to_cpu(gdp->bg_itable_unused); | ||
3933 | table += num / inodes_per_block; | ||
3934 | if (end > table) | ||
3935 | end = table; | ||
3936 | while (b <= end) | ||
3937 | sb_breadahead(sb, b++); | ||
3938 | } | ||
3939 | |||
3940 | /* | ||
3812 | * There are other valid inodes in the buffer, this inode | 3941 | * There are other valid inodes in the buffer, this inode |
3813 | * has in-inode xattrs, or we don't have this inode in memory. | 3942 | * has in-inode xattrs, or we don't have this inode in memory. |
3814 | * Read the block from disk. | 3943 | * Read the block from disk. |
@@ -3818,10 +3947,9 @@ make_io: | |||
3818 | submit_bh(READ_META, bh); | 3947 | submit_bh(READ_META, bh); |
3819 | wait_on_buffer(bh); | 3948 | wait_on_buffer(bh); |
3820 | if (!buffer_uptodate(bh)) { | 3949 | if (!buffer_uptodate(bh)) { |
3821 | ext4_error(inode->i_sb, "ext4_get_inode_loc", | 3950 | ext4_error(sb, __func__, |
3822 | "unable to read inode block - " | 3951 | "unable to read inode block - inode=%lu, " |
3823 | "inode=%lu, block=%llu", | 3952 | "block=%llu", inode->i_ino, block); |
3824 | inode->i_ino, block); | ||
3825 | brelse(bh); | 3953 | brelse(bh); |
3826 | return -EIO; | 3954 | return -EIO; |
3827 | } | 3955 | } |
@@ -3913,11 +4041,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3913 | return inode; | 4041 | return inode; |
3914 | 4042 | ||
3915 | ei = EXT4_I(inode); | 4043 | ei = EXT4_I(inode); |
3916 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 4044 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
3917 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 4045 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
3918 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 4046 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
3919 | #endif | 4047 | #endif |
3920 | ei->i_block_alloc_info = NULL; | ||
3921 | 4048 | ||
3922 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4049 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
3923 | if (ret < 0) | 4050 | if (ret < 0) |
@@ -3927,7 +4054,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3927 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 4054 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
3928 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 4055 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
3929 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | 4056 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); |
3930 | if(!(test_opt (inode->i_sb, NO_UID32))) { | 4057 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
3931 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 4058 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
3932 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 4059 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
3933 | } | 4060 | } |
@@ -3945,7 +4072,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3945 | if (inode->i_mode == 0 || | 4072 | if (inode->i_mode == 0 || |
3946 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { | 4073 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { |
3947 | /* this inode is deleted */ | 4074 | /* this inode is deleted */ |
3948 | brelse (bh); | 4075 | brelse(bh); |
3949 | ret = -ESTALE; | 4076 | ret = -ESTALE; |
3950 | goto bad_inode; | 4077 | goto bad_inode; |
3951 | } | 4078 | } |
@@ -3978,7 +4105,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3978 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | 4105 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); |
3979 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | 4106 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > |
3980 | EXT4_INODE_SIZE(inode->i_sb)) { | 4107 | EXT4_INODE_SIZE(inode->i_sb)) { |
3981 | brelse (bh); | 4108 | brelse(bh); |
3982 | ret = -EIO; | 4109 | ret = -EIO; |
3983 | goto bad_inode; | 4110 | goto bad_inode; |
3984 | } | 4111 | } |
@@ -4031,7 +4158,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4031 | init_special_inode(inode, inode->i_mode, | 4158 | init_special_inode(inode, inode->i_mode, |
4032 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 4159 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
4033 | } | 4160 | } |
4034 | brelse (iloc.bh); | 4161 | brelse(iloc.bh); |
4035 | ext4_set_inode_flags(inode); | 4162 | ext4_set_inode_flags(inode); |
4036 | unlock_new_inode(inode); | 4163 | unlock_new_inode(inode); |
4037 | return inode; | 4164 | return inode; |
@@ -4113,14 +4240,14 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4113 | 4240 | ||
4114 | ext4_get_inode_flags(ei); | 4241 | ext4_get_inode_flags(ei); |
4115 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 4242 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
4116 | if(!(test_opt(inode->i_sb, NO_UID32))) { | 4243 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
4117 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); | 4244 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); |
4118 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); | 4245 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); |
4119 | /* | 4246 | /* |
4120 | * Fix up interoperability with old kernels. Otherwise, old inodes get | 4247 | * Fix up interoperability with old kernels. Otherwise, old inodes get |
4121 | * re-used with the upper 16 bits of the uid/gid intact | 4248 | * re-used with the upper 16 bits of the uid/gid intact |
4122 | */ | 4249 | */ |
4123 | if(!ei->i_dtime) { | 4250 | if (!ei->i_dtime) { |
4124 | raw_inode->i_uid_high = | 4251 | raw_inode->i_uid_high = |
4125 | cpu_to_le16(high_16_bits(inode->i_uid)); | 4252 | cpu_to_le16(high_16_bits(inode->i_uid)); |
4126 | raw_inode->i_gid_high = | 4253 | raw_inode->i_gid_high = |
@@ -4208,7 +4335,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4208 | ei->i_state &= ~EXT4_STATE_NEW; | 4335 | ei->i_state &= ~EXT4_STATE_NEW; |
4209 | 4336 | ||
4210 | out_brelse: | 4337 | out_brelse: |
4211 | brelse (bh); | 4338 | brelse(bh); |
4212 | ext4_std_error(inode->i_sb, err); | 4339 | ext4_std_error(inode->i_sb, err); |
4213 | return err; | 4340 | return err; |
4214 | } | 4341 | } |
@@ -4811,6 +4938,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4811 | loff_t size; | 4938 | loff_t size; |
4812 | unsigned long len; | 4939 | unsigned long len; |
4813 | int ret = -EINVAL; | 4940 | int ret = -EINVAL; |
4941 | void *fsdata; | ||
4814 | struct file *file = vma->vm_file; | 4942 | struct file *file = vma->vm_file; |
4815 | struct inode *inode = file->f_path.dentry->d_inode; | 4943 | struct inode *inode = file->f_path.dentry->d_inode; |
4816 | struct address_space *mapping = inode->i_mapping; | 4944 | struct address_space *mapping = inode->i_mapping; |
@@ -4849,11 +4977,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4849 | * on the same page though | 4977 | * on the same page though |
4850 | */ | 4978 | */ |
4851 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | 4979 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), |
4852 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | 4980 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); |
4853 | if (ret < 0) | 4981 | if (ret < 0) |
4854 | goto out_unlock; | 4982 | goto out_unlock; |
4855 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | 4983 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), |
4856 | len, len, page, NULL); | 4984 | len, len, page, fsdata); |
4857 | if (ret < 0) | 4985 | if (ret < 0) |
4858 | goto out_unlock; | 4986 | goto out_unlock; |
4859 | ret = 0; | 4987 | ret = 0; |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7a6c2f1faba6..dc99b4776d58 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
23 | struct inode *inode = filp->f_dentry->d_inode; | 23 | struct inode *inode = filp->f_dentry->d_inode; |
24 | struct ext4_inode_info *ei = EXT4_I(inode); | 24 | struct ext4_inode_info *ei = EXT4_I(inode); |
25 | unsigned int flags; | 25 | unsigned int flags; |
26 | unsigned short rsv_window_size; | ||
27 | 26 | ||
28 | ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg); | 27 | ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); |
29 | 28 | ||
30 | switch (cmd) { | 29 | switch (cmd) { |
31 | case EXT4_IOC_GETFLAGS: | 30 | case EXT4_IOC_GETFLAGS: |
@@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
34 | return put_user(flags, (int __user *) arg); | 33 | return put_user(flags, (int __user *) arg); |
35 | case EXT4_IOC_SETFLAGS: { | 34 | case EXT4_IOC_SETFLAGS: { |
36 | handle_t *handle = NULL; | 35 | handle_t *handle = NULL; |
37 | int err; | 36 | int err, migrate = 0; |
38 | struct ext4_iloc iloc; | 37 | struct ext4_iloc iloc; |
39 | unsigned int oldflags; | 38 | unsigned int oldflags; |
40 | unsigned int jflag; | 39 | unsigned int jflag; |
@@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
82 | if (!capable(CAP_SYS_RESOURCE)) | 81 | if (!capable(CAP_SYS_RESOURCE)) |
83 | goto flags_out; | 82 | goto flags_out; |
84 | } | 83 | } |
84 | if (oldflags & EXT4_EXTENTS_FL) { | ||
85 | /* We don't support clearning extent flags */ | ||
86 | if (!(flags & EXT4_EXTENTS_FL)) { | ||
87 | err = -EOPNOTSUPP; | ||
88 | goto flags_out; | ||
89 | } | ||
90 | } else if (flags & EXT4_EXTENTS_FL) { | ||
91 | /* migrate the file */ | ||
92 | migrate = 1; | ||
93 | flags &= ~EXT4_EXTENTS_FL; | ||
94 | } | ||
85 | 95 | ||
86 | handle = ext4_journal_start(inode, 1); | 96 | handle = ext4_journal_start(inode, 1); |
87 | if (IS_ERR(handle)) { | 97 | if (IS_ERR(handle)) { |
@@ -109,6 +119,10 @@ flags_err: | |||
109 | 119 | ||
110 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) | 120 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) |
111 | err = ext4_change_inode_journal_flag(inode, jflag); | 121 | err = ext4_change_inode_journal_flag(inode, jflag); |
122 | if (err) | ||
123 | goto flags_out; | ||
124 | if (migrate) | ||
125 | err = ext4_ext_migrate(inode); | ||
112 | flags_out: | 126 | flags_out: |
113 | mutex_unlock(&inode->i_mutex); | 127 | mutex_unlock(&inode->i_mutex); |
114 | mnt_drop_write(filp->f_path.mnt); | 128 | mnt_drop_write(filp->f_path.mnt); |
@@ -175,53 +189,10 @@ setversion_out: | |||
175 | return ret; | 189 | return ret; |
176 | } | 190 | } |
177 | #endif | 191 | #endif |
178 | case EXT4_IOC_GETRSVSZ: | ||
179 | if (test_opt(inode->i_sb, RESERVATION) | ||
180 | && S_ISREG(inode->i_mode) | ||
181 | && ei->i_block_alloc_info) { | ||
182 | rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; | ||
183 | return put_user(rsv_window_size, (int __user *)arg); | ||
184 | } | ||
185 | return -ENOTTY; | ||
186 | case EXT4_IOC_SETRSVSZ: { | ||
187 | int err; | ||
188 | |||
189 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) | ||
190 | return -ENOTTY; | ||
191 | |||
192 | if (!is_owner_or_cap(inode)) | ||
193 | return -EACCES; | ||
194 | |||
195 | if (get_user(rsv_window_size, (int __user *)arg)) | ||
196 | return -EFAULT; | ||
197 | |||
198 | err = mnt_want_write(filp->f_path.mnt); | ||
199 | if (err) | ||
200 | return err; | ||
201 | |||
202 | if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) | ||
203 | rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; | ||
204 | |||
205 | /* | ||
206 | * need to allocate reservation structure for this inode | ||
207 | * before set the window size | ||
208 | */ | ||
209 | down_write(&ei->i_data_sem); | ||
210 | if (!ei->i_block_alloc_info) | ||
211 | ext4_init_block_alloc_info(inode); | ||
212 | |||
213 | if (ei->i_block_alloc_info){ | ||
214 | struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; | ||
215 | rsv->rsv_goal_size = rsv_window_size; | ||
216 | } | ||
217 | up_write(&ei->i_data_sem); | ||
218 | mnt_drop_write(filp->f_path.mnt); | ||
219 | return 0; | ||
220 | } | ||
221 | case EXT4_IOC_GROUP_EXTEND: { | 192 | case EXT4_IOC_GROUP_EXTEND: { |
222 | ext4_fsblk_t n_blocks_count; | 193 | ext4_fsblk_t n_blocks_count; |
223 | struct super_block *sb = inode->i_sb; | 194 | struct super_block *sb = inode->i_sb; |
224 | int err; | 195 | int err, err2; |
225 | 196 | ||
226 | if (!capable(CAP_SYS_RESOURCE)) | 197 | if (!capable(CAP_SYS_RESOURCE)) |
227 | return -EPERM; | 198 | return -EPERM; |
@@ -235,8 +206,10 @@ setversion_out: | |||
235 | 206 | ||
236 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); | 207 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); |
237 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 208 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
238 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 209 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
239 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 210 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
211 | if (err == 0) | ||
212 | err = err2; | ||
240 | mnt_drop_write(filp->f_path.mnt); | 213 | mnt_drop_write(filp->f_path.mnt); |
241 | 214 | ||
242 | return err; | 215 | return err; |
@@ -244,7 +217,7 @@ setversion_out: | |||
244 | case EXT4_IOC_GROUP_ADD: { | 217 | case EXT4_IOC_GROUP_ADD: { |
245 | struct ext4_new_group_data input; | 218 | struct ext4_new_group_data input; |
246 | struct super_block *sb = inode->i_sb; | 219 | struct super_block *sb = inode->i_sb; |
247 | int err; | 220 | int err, err2; |
248 | 221 | ||
249 | if (!capable(CAP_SYS_RESOURCE)) | 222 | if (!capable(CAP_SYS_RESOURCE)) |
250 | return -EPERM; | 223 | return -EPERM; |
@@ -259,15 +232,36 @@ setversion_out: | |||
259 | 232 | ||
260 | err = ext4_group_add(sb, &input); | 233 | err = ext4_group_add(sb, &input); |
261 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 234 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
262 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 235 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
263 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 236 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
237 | if (err == 0) | ||
238 | err = err2; | ||
264 | mnt_drop_write(filp->f_path.mnt); | 239 | mnt_drop_write(filp->f_path.mnt); |
265 | 240 | ||
266 | return err; | 241 | return err; |
267 | } | 242 | } |
268 | 243 | ||
269 | case EXT4_IOC_MIGRATE: | 244 | case EXT4_IOC_MIGRATE: |
270 | return ext4_ext_migrate(inode, filp, cmd, arg); | 245 | { |
246 | int err; | ||
247 | if (!is_owner_or_cap(inode)) | ||
248 | return -EACCES; | ||
249 | |||
250 | err = mnt_want_write(filp->f_path.mnt); | ||
251 | if (err) | ||
252 | return err; | ||
253 | /* | ||
254 | * inode_mutex prevent write and truncate on the file. | ||
255 | * Read still goes through. We take i_data_sem in | ||
256 | * ext4_ext_swap_inode_data before we switch the | ||
257 | * inode format to prevent read. | ||
258 | */ | ||
259 | mutex_lock(&(inode->i_mutex)); | ||
260 | err = ext4_ext_migrate(inode); | ||
261 | mutex_unlock(&(inode->i_mutex)); | ||
262 | mnt_drop_write(filp->f_path.mnt); | ||
263 | return err; | ||
264 | } | ||
271 | 265 | ||
272 | default: | 266 | default: |
273 | return -ENOTTY; | 267 | return -ENOTTY; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e0e3a5eb1ddb..b580714f0d85 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) | |||
477 | b2 = (unsigned char *) bitmap; | 477 | b2 = (unsigned char *) bitmap; |
478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { | 478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { |
479 | if (b1[i] != b2[i]) { | 479 | if (b1[i] != b2[i]) { |
480 | printk("corruption in group %lu at byte %u(%u):" | 480 | printk(KERN_ERR "corruption in group %lu " |
481 | " %x in copy != %x on disk/prealloc\n", | 481 | "at byte %u(%u): %x in copy != %x " |
482 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | 482 | "on disk/prealloc\n", |
483 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | ||
483 | BUG(); | 484 | BUG(); |
484 | } | 485 | } |
485 | } | 486 | } |
@@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
533 | void *buddy; | 534 | void *buddy; |
534 | void *buddy2; | 535 | void *buddy2; |
535 | 536 | ||
536 | if (!test_opt(sb, MBALLOC)) | ||
537 | return 0; | ||
538 | |||
539 | { | 537 | { |
540 | static int mb_check_counter; | 538 | static int mb_check_counter; |
541 | if (mb_check_counter++ % 100 != 0) | 539 | if (mb_check_counter++ % 100 != 0) |
@@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
784 | if (bh[i] == NULL) | 782 | if (bh[i] == NULL) |
785 | goto out; | 783 | goto out; |
786 | 784 | ||
787 | if (bh_uptodate_or_lock(bh[i])) | 785 | if (buffer_uptodate(bh[i]) && |
786 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
788 | continue; | 787 | continue; |
789 | 788 | ||
789 | lock_buffer(bh[i]); | ||
790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
792 | ext4_init_block_bitmap(sb, bh[i], | 792 | ext4_init_block_bitmap(sb, bh[i], |
@@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb) | |||
2169 | { | 2169 | { |
2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2171 | 2171 | ||
2172 | remove_proc_entry("mb_groups", sbi->s_mb_proc); | 2172 | if (sbi->s_proc != NULL) { |
2173 | remove_proc_entry("mb_history", sbi->s_mb_proc); | 2173 | remove_proc_entry("mb_groups", sbi->s_proc); |
2174 | 2174 | remove_proc_entry("mb_history", sbi->s_proc); | |
2175 | } | ||
2175 | kfree(sbi->s_mb_history); | 2176 | kfree(sbi->s_mb_history); |
2176 | } | 2177 | } |
2177 | 2178 | ||
@@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb) | |||
2180 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2181 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2181 | int i; | 2182 | int i; |
2182 | 2183 | ||
2183 | if (sbi->s_mb_proc != NULL) { | 2184 | if (sbi->s_proc != NULL) { |
2184 | proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, | 2185 | proc_create_data("mb_history", S_IRUGO, sbi->s_proc, |
2185 | &ext4_mb_seq_history_fops, sb); | 2186 | &ext4_mb_seq_history_fops, sb); |
2186 | proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, | 2187 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
2187 | &ext4_mb_seq_groups_fops, sb); | 2188 | &ext4_mb_seq_groups_fops, sb); |
2188 | } | 2189 | } |
2189 | 2190 | ||
@@ -2485,19 +2486,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2485 | unsigned max; | 2486 | unsigned max; |
2486 | int ret; | 2487 | int ret; |
2487 | 2488 | ||
2488 | if (!test_opt(sb, MBALLOC)) | ||
2489 | return 0; | ||
2490 | |||
2491 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); | 2489 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); |
2492 | 2490 | ||
2493 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2491 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2494 | if (sbi->s_mb_offsets == NULL) { | 2492 | if (sbi->s_mb_offsets == NULL) { |
2495 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2496 | return -ENOMEM; | 2493 | return -ENOMEM; |
2497 | } | 2494 | } |
2498 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2495 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2499 | if (sbi->s_mb_maxs == NULL) { | 2496 | if (sbi->s_mb_maxs == NULL) { |
2500 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2501 | kfree(sbi->s_mb_maxs); | 2497 | kfree(sbi->s_mb_maxs); |
2502 | return -ENOMEM; | 2498 | return -ENOMEM; |
2503 | } | 2499 | } |
@@ -2520,7 +2516,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2520 | /* init file for buddy data */ | 2516 | /* init file for buddy data */ |
2521 | ret = ext4_mb_init_backend(sb); | 2517 | ret = ext4_mb_init_backend(sb); |
2522 | if (ret != 0) { | 2518 | if (ret != 0) { |
2523 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2524 | kfree(sbi->s_mb_offsets); | 2519 | kfree(sbi->s_mb_offsets); |
2525 | kfree(sbi->s_mb_maxs); | 2520 | kfree(sbi->s_mb_maxs); |
2526 | return ret; | 2521 | return ret; |
@@ -2540,17 +2535,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2540 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; | 2535 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; |
2541 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | 2536 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; |
2542 | 2537 | ||
2543 | i = sizeof(struct ext4_locality_group) * nr_cpu_ids; | 2538 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2544 | sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); | ||
2545 | if (sbi->s_locality_groups == NULL) { | 2539 | if (sbi->s_locality_groups == NULL) { |
2546 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2547 | kfree(sbi->s_mb_offsets); | 2540 | kfree(sbi->s_mb_offsets); |
2548 | kfree(sbi->s_mb_maxs); | 2541 | kfree(sbi->s_mb_maxs); |
2549 | return -ENOMEM; | 2542 | return -ENOMEM; |
2550 | } | 2543 | } |
2551 | for (i = 0; i < nr_cpu_ids; i++) { | 2544 | for_each_possible_cpu(i) { |
2552 | struct ext4_locality_group *lg; | 2545 | struct ext4_locality_group *lg; |
2553 | lg = &sbi->s_locality_groups[i]; | 2546 | lg = per_cpu_ptr(sbi->s_locality_groups, i); |
2554 | mutex_init(&lg->lg_mutex); | 2547 | mutex_init(&lg->lg_mutex); |
2555 | for (j = 0; j < PREALLOC_TB_SIZE; j++) | 2548 | for (j = 0; j < PREALLOC_TB_SIZE; j++) |
2556 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); | 2549 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); |
@@ -2560,7 +2553,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2560 | ext4_mb_init_per_dev_proc(sb); | 2553 | ext4_mb_init_per_dev_proc(sb); |
2561 | ext4_mb_history_init(sb); | 2554 | ext4_mb_history_init(sb); |
2562 | 2555 | ||
2563 | printk("EXT4-fs: mballoc enabled\n"); | 2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); |
2564 | return 0; | 2557 | return 0; |
2565 | } | 2558 | } |
2566 | 2559 | ||
@@ -2589,9 +2582,6 @@ int ext4_mb_release(struct super_block *sb) | |||
2589 | struct ext4_group_info *grinfo; | 2582 | struct ext4_group_info *grinfo; |
2590 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2591 | 2584 | ||
2592 | if (!test_opt(sb, MBALLOC)) | ||
2593 | return 0; | ||
2594 | |||
2595 | /* release freed, non-committed blocks */ | 2585 | /* release freed, non-committed blocks */ |
2596 | spin_lock(&sbi->s_md_lock); | 2586 | spin_lock(&sbi->s_md_lock); |
2597 | list_splice_init(&sbi->s_closed_transaction, | 2587 | list_splice_init(&sbi->s_closed_transaction, |
@@ -2647,8 +2637,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2647 | atomic_read(&sbi->s_mb_discarded)); | 2637 | atomic_read(&sbi->s_mb_discarded)); |
2648 | } | 2638 | } |
2649 | 2639 | ||
2650 | kfree(sbi->s_locality_groups); | 2640 | free_percpu(sbi->s_locality_groups); |
2651 | |||
2652 | ext4_mb_history_release(sb); | 2641 | ext4_mb_history_release(sb); |
2653 | ext4_mb_destroy_per_dev_proc(sb); | 2642 | ext4_mb_destroy_per_dev_proc(sb); |
2654 | 2643 | ||
@@ -2721,118 +2710,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
2721 | #define EXT4_MB_STREAM_REQ "stream_req" | 2710 | #define EXT4_MB_STREAM_REQ "stream_req" |
2722 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" | 2711 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" |
2723 | 2712 | ||
2724 | |||
2725 | |||
2726 | #define MB_PROC_FOPS(name) \ | ||
2727 | static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \ | ||
2728 | { \ | ||
2729 | struct ext4_sb_info *sbi = m->private; \ | ||
2730 | \ | ||
2731 | seq_printf(m, "%ld\n", sbi->s_mb_##name); \ | ||
2732 | return 0; \ | ||
2733 | } \ | ||
2734 | \ | ||
2735 | static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\ | ||
2736 | { \ | ||
2737 | return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\ | ||
2738 | } \ | ||
2739 | \ | ||
2740 | static ssize_t ext4_mb_##name##_proc_write(struct file *file, \ | ||
2741 | const char __user *buf, size_t cnt, loff_t *ppos) \ | ||
2742 | { \ | ||
2743 | struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\ | ||
2744 | char str[32]; \ | ||
2745 | long value; \ | ||
2746 | if (cnt >= sizeof(str)) \ | ||
2747 | return -EINVAL; \ | ||
2748 | if (copy_from_user(str, buf, cnt)) \ | ||
2749 | return -EFAULT; \ | ||
2750 | value = simple_strtol(str, NULL, 0); \ | ||
2751 | if (value <= 0) \ | ||
2752 | return -ERANGE; \ | ||
2753 | sbi->s_mb_##name = value; \ | ||
2754 | return cnt; \ | ||
2755 | } \ | ||
2756 | \ | ||
2757 | static const struct file_operations ext4_mb_##name##_proc_fops = { \ | ||
2758 | .owner = THIS_MODULE, \ | ||
2759 | .open = ext4_mb_##name##_proc_open, \ | ||
2760 | .read = seq_read, \ | ||
2761 | .llseek = seq_lseek, \ | ||
2762 | .release = single_release, \ | ||
2763 | .write = ext4_mb_##name##_proc_write, \ | ||
2764 | }; | ||
2765 | |||
2766 | MB_PROC_FOPS(stats); | ||
2767 | MB_PROC_FOPS(max_to_scan); | ||
2768 | MB_PROC_FOPS(min_to_scan); | ||
2769 | MB_PROC_FOPS(order2_reqs); | ||
2770 | MB_PROC_FOPS(stream_request); | ||
2771 | MB_PROC_FOPS(group_prealloc); | ||
2772 | |||
2773 | #define MB_PROC_HANDLER(name, var) \ | ||
2774 | do { \ | ||
2775 | proc = proc_create_data(name, mode, sbi->s_mb_proc, \ | ||
2776 | &ext4_mb_##var##_proc_fops, sbi); \ | ||
2777 | if (proc == NULL) { \ | ||
2778 | printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ | ||
2779 | goto err_out; \ | ||
2780 | } \ | ||
2781 | } while (0) | ||
2782 | |||
2783 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | 2713 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) |
2784 | { | 2714 | { |
2785 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; | 2715 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; |
2786 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2716 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2787 | struct proc_dir_entry *proc; | 2717 | struct proc_dir_entry *proc; |
2788 | char devname[64]; | ||
2789 | 2718 | ||
2790 | if (proc_root_ext4 == NULL) { | 2719 | if (sbi->s_proc == NULL) |
2791 | sbi->s_mb_proc = NULL; | ||
2792 | return -EINVAL; | 2720 | return -EINVAL; |
2793 | } | ||
2794 | bdevname(sb->s_bdev, devname); | ||
2795 | sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); | ||
2796 | |||
2797 | MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); | ||
2798 | MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan); | ||
2799 | MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan); | ||
2800 | MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs); | ||
2801 | MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request); | ||
2802 | MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc); | ||
2803 | 2721 | ||
2722 | EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats); | ||
2723 | EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan); | ||
2724 | EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan); | ||
2725 | EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs); | ||
2726 | EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request); | ||
2727 | EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc); | ||
2804 | return 0; | 2728 | return 0; |
2805 | 2729 | ||
2806 | err_out: | 2730 | err_out: |
2807 | printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); | 2731 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
2808 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2732 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
2809 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2733 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
2810 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2734 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2811 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2735 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2812 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2736 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2813 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | ||
2814 | remove_proc_entry(devname, proc_root_ext4); | ||
2815 | sbi->s_mb_proc = NULL; | ||
2816 | |||
2817 | return -ENOMEM; | 2737 | return -ENOMEM; |
2818 | } | 2738 | } |
2819 | 2739 | ||
2820 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | 2740 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) |
2821 | { | 2741 | { |
2822 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2742 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2823 | char devname[64]; | ||
2824 | 2743 | ||
2825 | if (sbi->s_mb_proc == NULL) | 2744 | if (sbi->s_proc == NULL) |
2826 | return -EINVAL; | 2745 | return -EINVAL; |
2827 | 2746 | ||
2828 | bdevname(sb->s_bdev, devname); | 2747 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
2829 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2748 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
2830 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2749 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
2831 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2750 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2832 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2751 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2833 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2752 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2834 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | ||
2835 | remove_proc_entry(devname, proc_root_ext4); | ||
2836 | 2753 | ||
2837 | return 0; | 2754 | return 0; |
2838 | } | 2755 | } |
@@ -2854,11 +2771,6 @@ int __init init_ext4_mballoc(void) | |||
2854 | kmem_cache_destroy(ext4_pspace_cachep); | 2771 | kmem_cache_destroy(ext4_pspace_cachep); |
2855 | return -ENOMEM; | 2772 | return -ENOMEM; |
2856 | } | 2773 | } |
2857 | #ifdef CONFIG_PROC_FS | ||
2858 | proc_root_ext4 = proc_mkdir("fs/ext4", NULL); | ||
2859 | if (proc_root_ext4 == NULL) | ||
2860 | printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); | ||
2861 | #endif | ||
2862 | return 0; | 2774 | return 0; |
2863 | } | 2775 | } |
2864 | 2776 | ||
@@ -2867,9 +2779,6 @@ void exit_ext4_mballoc(void) | |||
2867 | /* XXX: synchronize_rcu(); */ | 2779 | /* XXX: synchronize_rcu(); */ |
2868 | kmem_cache_destroy(ext4_pspace_cachep); | 2780 | kmem_cache_destroy(ext4_pspace_cachep); |
2869 | kmem_cache_destroy(ext4_ac_cachep); | 2781 | kmem_cache_destroy(ext4_ac_cachep); |
2870 | #ifdef CONFIG_PROC_FS | ||
2871 | remove_proc_entry("fs/ext4", NULL); | ||
2872 | #endif | ||
2873 | } | 2782 | } |
2874 | 2783 | ||
2875 | 2784 | ||
@@ -2879,7 +2788,7 @@ void exit_ext4_mballoc(void) | |||
2879 | */ | 2788 | */ |
2880 | static noinline_for_stack int | 2789 | static noinline_for_stack int |
2881 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | 2790 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, |
2882 | handle_t *handle) | 2791 | handle_t *handle, unsigned long reserv_blks) |
2883 | { | 2792 | { |
2884 | struct buffer_head *bitmap_bh = NULL; | 2793 | struct buffer_head *bitmap_bh = NULL; |
2885 | struct ext4_super_block *es; | 2794 | struct ext4_super_block *es; |
@@ -2968,15 +2877,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2968 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); | 2877 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); |
2969 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 2878 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
2970 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 2879 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); |
2971 | 2880 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | |
2972 | /* | 2881 | /* |
2973 | * free blocks account has already be reduced/reserved | 2882 | * Now reduce the dirty block count also. Should not go negative |
2974 | * at write_begin() time for delayed allocation | ||
2975 | * do not double accounting | ||
2976 | */ | 2883 | */ |
2977 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | 2884 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) |
2978 | percpu_counter_sub(&sbi->s_freeblocks_counter, | 2885 | /* release all the reserved blocks if non delalloc */ |
2979 | ac->ac_b_ex.fe_len); | 2886 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); |
2887 | else | ||
2888 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | ||
2889 | ac->ac_b_ex.fe_len); | ||
2980 | 2890 | ||
2981 | if (sbi->s_log_groups_per_flex) { | 2891 | if (sbi->s_log_groups_per_flex) { |
2982 | ext4_group_t flex_group = ext4_flex_group(sbi, | 2892 | ext4_group_t flex_group = ext4_flex_group(sbi, |
@@ -3884,7 +3794,7 @@ out: | |||
3884 | * | 3794 | * |
3885 | * FIXME!! Make sure it is valid at all the call sites | 3795 | * FIXME!! Make sure it is valid at all the call sites |
3886 | */ | 3796 | */ |
3887 | void ext4_mb_discard_inode_preallocations(struct inode *inode) | 3797 | void ext4_discard_preallocations(struct inode *inode) |
3888 | { | 3798 | { |
3889 | struct ext4_inode_info *ei = EXT4_I(inode); | 3799 | struct ext4_inode_info *ei = EXT4_I(inode); |
3890 | struct super_block *sb = inode->i_sb; | 3800 | struct super_block *sb = inode->i_sb; |
@@ -3896,7 +3806,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode) | |||
3896 | struct ext4_buddy e4b; | 3806 | struct ext4_buddy e4b; |
3897 | int err; | 3807 | int err; |
3898 | 3808 | ||
3899 | if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { | 3809 | if (!S_ISREG(inode->i_mode)) { |
3900 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ | 3810 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ |
3901 | return; | 3811 | return; |
3902 | } | 3812 | } |
@@ -4094,8 +4004,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
4094 | * per cpu locality group is to reduce the contention between block | 4004 | * per cpu locality group is to reduce the contention between block |
4095 | * request from multiple CPUs. | 4005 | * request from multiple CPUs. |
4096 | */ | 4006 | */ |
4097 | ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; | 4007 | ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); |
4098 | put_cpu(); | ||
4099 | 4008 | ||
4100 | /* we're going to use group allocation */ | 4009 | /* we're going to use group allocation */ |
4101 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; | 4010 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; |
@@ -4369,33 +4278,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | |||
4369 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | 4278 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, |
4370 | struct ext4_allocation_request *ar, int *errp) | 4279 | struct ext4_allocation_request *ar, int *errp) |
4371 | { | 4280 | { |
4281 | int freed; | ||
4372 | struct ext4_allocation_context *ac = NULL; | 4282 | struct ext4_allocation_context *ac = NULL; |
4373 | struct ext4_sb_info *sbi; | 4283 | struct ext4_sb_info *sbi; |
4374 | struct super_block *sb; | 4284 | struct super_block *sb; |
4375 | ext4_fsblk_t block = 0; | 4285 | ext4_fsblk_t block = 0; |
4376 | int freed; | 4286 | unsigned long inquota; |
4377 | int inquota; | 4287 | unsigned long reserv_blks = 0; |
4378 | 4288 | ||
4379 | sb = ar->inode->i_sb; | 4289 | sb = ar->inode->i_sb; |
4380 | sbi = EXT4_SB(sb); | 4290 | sbi = EXT4_SB(sb); |
4381 | 4291 | ||
4382 | if (!test_opt(sb, MBALLOC)) { | ||
4383 | block = ext4_old_new_blocks(handle, ar->inode, ar->goal, | ||
4384 | &(ar->len), errp); | ||
4385 | return block; | ||
4386 | } | ||
4387 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { | 4292 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { |
4388 | /* | 4293 | /* |
4389 | * With delalloc we already reserved the blocks | 4294 | * With delalloc we already reserved the blocks |
4390 | */ | 4295 | */ |
4391 | ar->len = ext4_has_free_blocks(sbi, ar->len); | 4296 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { |
4392 | } | 4297 | /* let others to free the space */ |
4393 | 4298 | yield(); | |
4394 | if (ar->len == 0) { | 4299 | ar->len = ar->len >> 1; |
4395 | *errp = -ENOSPC; | 4300 | } |
4396 | return 0; | 4301 | if (!ar->len) { |
4302 | *errp = -ENOSPC; | ||
4303 | return 0; | ||
4304 | } | ||
4305 | reserv_blks = ar->len; | ||
4397 | } | 4306 | } |
4398 | |||
4399 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { | 4307 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { |
4400 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4308 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
4401 | ar->len--; | 4309 | ar->len--; |
@@ -4441,7 +4349,7 @@ repeat: | |||
4441 | } | 4349 | } |
4442 | 4350 | ||
4443 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4351 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4444 | *errp = ext4_mb_mark_diskspace_used(ac, handle); | 4352 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); |
4445 | if (*errp == -EAGAIN) { | 4353 | if (*errp == -EAGAIN) { |
4446 | ac->ac_b_ex.fe_group = 0; | 4354 | ac->ac_b_ex.fe_group = 0; |
4447 | ac->ac_b_ex.fe_start = 0; | 4355 | ac->ac_b_ex.fe_start = 0; |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c7c9906c2a75..b3b4828f8b89 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -257,7 +257,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac); | |||
257 | 257 | ||
258 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 258 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
259 | 259 | ||
260 | static struct proc_dir_entry *proc_root_ext4; | ||
261 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | 260 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); |
262 | 261 | ||
263 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 262 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 46fc0b5b12ba..f2a9cf498ecd 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -447,8 +447,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) | |||
447 | 447 | ||
448 | } | 448 | } |
449 | 449 | ||
450 | int ext4_ext_migrate(struct inode *inode, struct file *filp, | 450 | int ext4_ext_migrate(struct inode *inode) |
451 | unsigned int cmd, unsigned long arg) | ||
452 | { | 451 | { |
453 | handle_t *handle; | 452 | handle_t *handle; |
454 | int retval = 0, i; | 453 | int retval = 0, i; |
@@ -516,12 +515,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
516 | * when we add extents we extent the journal | 515 | * when we add extents we extent the journal |
517 | */ | 516 | */ |
518 | /* | 517 | /* |
519 | * inode_mutex prevent write and truncate on the file. Read still goes | ||
520 | * through. We take i_data_sem in ext4_ext_swap_inode_data before we | ||
521 | * switch the inode format to prevent read. | ||
522 | */ | ||
523 | mutex_lock(&(inode->i_mutex)); | ||
524 | /* | ||
525 | * Even though we take i_mutex we can still cause block allocation | 518 | * Even though we take i_mutex we can still cause block allocation |
526 | * via mmap write to holes. If we have allocated new blocks we fail | 519 | * via mmap write to holes. If we have allocated new blocks we fail |
527 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. | 520 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. |
@@ -623,7 +616,6 @@ err_out: | |||
623 | tmp_inode->i_nlink = 0; | 616 | tmp_inode->i_nlink = 0; |
624 | 617 | ||
625 | ext4_journal_stop(handle); | 618 | ext4_journal_stop(handle); |
626 | mutex_unlock(&(inode->i_mutex)); | ||
627 | 619 | ||
628 | if (tmp_inode) | 620 | if (tmp_inode) |
629 | iput(tmp_inode); | 621 | iput(tmp_inode); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 387ad98350c3..92db9e945147 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -151,34 +151,36 @@ struct dx_map_entry | |||
151 | 151 | ||
152 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); | 152 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); |
153 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); | 153 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); |
154 | static inline unsigned dx_get_hash (struct dx_entry *entry); | 154 | static inline unsigned dx_get_hash(struct dx_entry *entry); |
155 | static void dx_set_hash (struct dx_entry *entry, unsigned value); | 155 | static void dx_set_hash(struct dx_entry *entry, unsigned value); |
156 | static unsigned dx_get_count (struct dx_entry *entries); | 156 | static unsigned dx_get_count(struct dx_entry *entries); |
157 | static unsigned dx_get_limit (struct dx_entry *entries); | 157 | static unsigned dx_get_limit(struct dx_entry *entries); |
158 | static void dx_set_count (struct dx_entry *entries, unsigned value); | 158 | static void dx_set_count(struct dx_entry *entries, unsigned value); |
159 | static void dx_set_limit (struct dx_entry *entries, unsigned value); | 159 | static void dx_set_limit(struct dx_entry *entries, unsigned value); |
160 | static unsigned dx_root_limit (struct inode *dir, unsigned infosize); | 160 | static unsigned dx_root_limit(struct inode *dir, unsigned infosize); |
161 | static unsigned dx_node_limit (struct inode *dir); | 161 | static unsigned dx_node_limit(struct inode *dir); |
162 | static struct dx_frame *dx_probe(struct dentry *dentry, | 162 | static struct dx_frame *dx_probe(const struct qstr *d_name, |
163 | struct inode *dir, | 163 | struct inode *dir, |
164 | struct dx_hash_info *hinfo, | 164 | struct dx_hash_info *hinfo, |
165 | struct dx_frame *frame, | 165 | struct dx_frame *frame, |
166 | int *err); | 166 | int *err); |
167 | static void dx_release (struct dx_frame *frames); | 167 | static void dx_release(struct dx_frame *frames); |
168 | static int dx_make_map (struct ext4_dir_entry_2 *de, int size, | 168 | static int dx_make_map(struct ext4_dir_entry_2 *de, int size, |
169 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); | 169 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); |
170 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); | 170 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); |
171 | static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, | 171 | static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, |
172 | struct dx_map_entry *offsets, int count); | 172 | struct dx_map_entry *offsets, int count); |
173 | static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); | 173 | static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size); |
174 | static void dx_insert_block(struct dx_frame *frame, | 174 | static void dx_insert_block(struct dx_frame *frame, |
175 | u32 hash, ext4_lblk_t block); | 175 | u32 hash, ext4_lblk_t block); |
176 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, | 176 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, |
177 | struct dx_frame *frame, | 177 | struct dx_frame *frame, |
178 | struct dx_frame *frames, | 178 | struct dx_frame *frames, |
179 | __u32 *start_hash); | 179 | __u32 *start_hash); |
180 | static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | 180 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, |
181 | struct ext4_dir_entry_2 **res_dir, int *err); | 181 | const struct qstr *d_name, |
182 | struct ext4_dir_entry_2 **res_dir, | ||
183 | int *err); | ||
182 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | 184 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, |
183 | struct inode *inode); | 185 | struct inode *inode); |
184 | 186 | ||
@@ -207,44 +209,44 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) | |||
207 | entry->block = cpu_to_le32(value); | 209 | entry->block = cpu_to_le32(value); |
208 | } | 210 | } |
209 | 211 | ||
210 | static inline unsigned dx_get_hash (struct dx_entry *entry) | 212 | static inline unsigned dx_get_hash(struct dx_entry *entry) |
211 | { | 213 | { |
212 | return le32_to_cpu(entry->hash); | 214 | return le32_to_cpu(entry->hash); |
213 | } | 215 | } |
214 | 216 | ||
215 | static inline void dx_set_hash (struct dx_entry *entry, unsigned value) | 217 | static inline void dx_set_hash(struct dx_entry *entry, unsigned value) |
216 | { | 218 | { |
217 | entry->hash = cpu_to_le32(value); | 219 | entry->hash = cpu_to_le32(value); |
218 | } | 220 | } |
219 | 221 | ||
220 | static inline unsigned dx_get_count (struct dx_entry *entries) | 222 | static inline unsigned dx_get_count(struct dx_entry *entries) |
221 | { | 223 | { |
222 | return le16_to_cpu(((struct dx_countlimit *) entries)->count); | 224 | return le16_to_cpu(((struct dx_countlimit *) entries)->count); |
223 | } | 225 | } |
224 | 226 | ||
225 | static inline unsigned dx_get_limit (struct dx_entry *entries) | 227 | static inline unsigned dx_get_limit(struct dx_entry *entries) |
226 | { | 228 | { |
227 | return le16_to_cpu(((struct dx_countlimit *) entries)->limit); | 229 | return le16_to_cpu(((struct dx_countlimit *) entries)->limit); |
228 | } | 230 | } |
229 | 231 | ||
230 | static inline void dx_set_count (struct dx_entry *entries, unsigned value) | 232 | static inline void dx_set_count(struct dx_entry *entries, unsigned value) |
231 | { | 233 | { |
232 | ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); | 234 | ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); |
233 | } | 235 | } |
234 | 236 | ||
235 | static inline void dx_set_limit (struct dx_entry *entries, unsigned value) | 237 | static inline void dx_set_limit(struct dx_entry *entries, unsigned value) |
236 | { | 238 | { |
237 | ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); | 239 | ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); |
238 | } | 240 | } |
239 | 241 | ||
240 | static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) | 242 | static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) |
241 | { | 243 | { |
242 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - | 244 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - |
243 | EXT4_DIR_REC_LEN(2) - infosize; | 245 | EXT4_DIR_REC_LEN(2) - infosize; |
244 | return entry_space / sizeof(struct dx_entry); | 246 | return entry_space / sizeof(struct dx_entry); |
245 | } | 247 | } |
246 | 248 | ||
247 | static inline unsigned dx_node_limit (struct inode *dir) | 249 | static inline unsigned dx_node_limit(struct inode *dir) |
248 | { | 250 | { |
249 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); | 251 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); |
250 | return entry_space / sizeof(struct dx_entry); | 252 | return entry_space / sizeof(struct dx_entry); |
@@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (struct inode *dir) | |||
254 | * Debug | 256 | * Debug |
255 | */ | 257 | */ |
256 | #ifdef DX_DEBUG | 258 | #ifdef DX_DEBUG |
257 | static void dx_show_index (char * label, struct dx_entry *entries) | 259 | static void dx_show_index(char * label, struct dx_entry *entries) |
258 | { | 260 | { |
259 | int i, n = dx_get_count (entries); | 261 | int i, n = dx_get_count (entries); |
260 | printk("%s index ", label); | 262 | printk(KERN_DEBUG "%s index ", label); |
261 | for (i = 0; i < n; i++) { | 263 | for (i = 0; i < n; i++) { |
262 | printk("%x->%lu ", i? dx_get_hash(entries + i) : | 264 | printk("%x->%lu ", i ? dx_get_hash(entries + i) : |
263 | 0, (unsigned long)dx_get_block(entries + i)); | 265 | 0, (unsigned long)dx_get_block(entries + i)); |
264 | } | 266 | } |
265 | printk("\n"); | 267 | printk("\n"); |
@@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
306 | struct dx_entry *entries, int levels) | 308 | struct dx_entry *entries, int levels) |
307 | { | 309 | { |
308 | unsigned blocksize = dir->i_sb->s_blocksize; | 310 | unsigned blocksize = dir->i_sb->s_blocksize; |
309 | unsigned count = dx_get_count (entries), names = 0, space = 0, i; | 311 | unsigned count = dx_get_count(entries), names = 0, space = 0, i; |
310 | unsigned bcount = 0; | 312 | unsigned bcount = 0; |
311 | struct buffer_head *bh; | 313 | struct buffer_head *bh; |
312 | int err; | 314 | int err; |
@@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
325 | names += stats.names; | 327 | names += stats.names; |
326 | space += stats.space; | 328 | space += stats.space; |
327 | bcount += stats.bcount; | 329 | bcount += stats.bcount; |
328 | brelse (bh); | 330 | brelse(bh); |
329 | } | 331 | } |
330 | if (bcount) | 332 | if (bcount) |
331 | printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", | 333 | printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", |
332 | names, space/bcount,(space/bcount)*100/blocksize); | 334 | levels ? "" : " ", names, space/bcount, |
335 | (space/bcount)*100/blocksize); | ||
333 | return (struct stats) { names, space, bcount}; | 336 | return (struct stats) { names, space, bcount}; |
334 | } | 337 | } |
335 | #endif /* DX_DEBUG */ | 338 | #endif /* DX_DEBUG */ |
@@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
344 | * back to userspace. | 347 | * back to userspace. |
345 | */ | 348 | */ |
346 | static struct dx_frame * | 349 | static struct dx_frame * |
347 | dx_probe(struct dentry *dentry, struct inode *dir, | 350 | dx_probe(const struct qstr *d_name, struct inode *dir, |
348 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) | 351 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) |
349 | { | 352 | { |
350 | unsigned count, indirect; | 353 | unsigned count, indirect; |
@@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
355 | u32 hash; | 358 | u32 hash; |
356 | 359 | ||
357 | frame->bh = NULL; | 360 | frame->bh = NULL; |
358 | if (dentry) | ||
359 | dir = dentry->d_parent->d_inode; | ||
360 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) | 361 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) |
361 | goto fail; | 362 | goto fail; |
362 | root = (struct dx_root *) bh->b_data; | 363 | root = (struct dx_root *) bh->b_data; |
@@ -372,8 +373,8 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
372 | } | 373 | } |
373 | hinfo->hash_version = root->info.hash_version; | 374 | hinfo->hash_version = root->info.hash_version; |
374 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; | 375 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
375 | if (dentry) | 376 | if (d_name) |
376 | ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); | 377 | ext4fs_dirhash(d_name->name, d_name->len, hinfo); |
377 | hash = hinfo->hash; | 378 | hash = hinfo->hash; |
378 | 379 | ||
379 | if (root->info.unused_flags & 1) { | 380 | if (root->info.unused_flags & 1) { |
@@ -406,7 +407,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
406 | goto fail; | 407 | goto fail; |
407 | } | 408 | } |
408 | 409 | ||
409 | dxtrace (printk("Look up %x", hash)); | 410 | dxtrace(printk("Look up %x", hash)); |
410 | while (1) | 411 | while (1) |
411 | { | 412 | { |
412 | count = dx_get_count(entries); | 413 | count = dx_get_count(entries); |
@@ -555,7 +556,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
555 | 0, &err))) | 556 | 0, &err))) |
556 | return err; /* Failure */ | 557 | return err; /* Failure */ |
557 | p++; | 558 | p++; |
558 | brelse (p->bh); | 559 | brelse(p->bh); |
559 | p->bh = bh; | 560 | p->bh = bh; |
560 | p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; | 561 | p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; |
561 | } | 562 | } |
@@ -593,7 +594,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
593 | /* On error, skip the f_pos to the next block. */ | 594 | /* On error, skip the f_pos to the next block. */ |
594 | dir_file->f_pos = (dir_file->f_pos | | 595 | dir_file->f_pos = (dir_file->f_pos | |
595 | (dir->i_sb->s_blocksize - 1)) + 1; | 596 | (dir->i_sb->s_blocksize - 1)) + 1; |
596 | brelse (bh); | 597 | brelse(bh); |
597 | return count; | 598 | return count; |
598 | } | 599 | } |
599 | ext4fs_dirhash(de->name, de->name_len, hinfo); | 600 | ext4fs_dirhash(de->name, de->name_len, hinfo); |
@@ -635,8 +636,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
635 | int ret, err; | 636 | int ret, err; |
636 | __u32 hashval; | 637 | __u32 hashval; |
637 | 638 | ||
638 | dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, | 639 | dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", |
639 | start_minor_hash)); | 640 | start_hash, start_minor_hash)); |
640 | dir = dir_file->f_path.dentry->d_inode; | 641 | dir = dir_file->f_path.dentry->d_inode; |
641 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { | 642 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { |
642 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 643 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
@@ -648,7 +649,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
648 | } | 649 | } |
649 | hinfo.hash = start_hash; | 650 | hinfo.hash = start_hash; |
650 | hinfo.minor_hash = 0; | 651 | hinfo.minor_hash = 0; |
651 | frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); | 652 | frame = dx_probe(NULL, dir, &hinfo, frames, &err); |
652 | if (!frame) | 653 | if (!frame) |
653 | return err; | 654 | return err; |
654 | 655 | ||
@@ -694,8 +695,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
694 | break; | 695 | break; |
695 | } | 696 | } |
696 | dx_release(frames); | 697 | dx_release(frames); |
697 | dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", | 698 | dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, " |
698 | count, *next_hash)); | 699 | "next hash: %x\n", count, *next_hash)); |
699 | return count; | 700 | return count; |
700 | errout: | 701 | errout: |
701 | dx_release(frames); | 702 | dx_release(frames); |
@@ -802,17 +803,17 @@ static inline int ext4_match (int len, const char * const name, | |||
802 | /* | 803 | /* |
803 | * Returns 0 if not found, -1 on failure, and 1 on success | 804 | * Returns 0 if not found, -1 on failure, and 1 on success |
804 | */ | 805 | */ |
805 | static inline int search_dirblock(struct buffer_head * bh, | 806 | static inline int search_dirblock(struct buffer_head *bh, |
806 | struct inode *dir, | 807 | struct inode *dir, |
807 | struct dentry *dentry, | 808 | const struct qstr *d_name, |
808 | unsigned long offset, | 809 | unsigned long offset, |
809 | struct ext4_dir_entry_2 ** res_dir) | 810 | struct ext4_dir_entry_2 ** res_dir) |
810 | { | 811 | { |
811 | struct ext4_dir_entry_2 * de; | 812 | struct ext4_dir_entry_2 * de; |
812 | char * dlimit; | 813 | char * dlimit; |
813 | int de_len; | 814 | int de_len; |
814 | const char *name = dentry->d_name.name; | 815 | const char *name = d_name->name; |
815 | int namelen = dentry->d_name.len; | 816 | int namelen = d_name->len; |
816 | 817 | ||
817 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 818 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
818 | dlimit = bh->b_data + dir->i_sb->s_blocksize; | 819 | dlimit = bh->b_data + dir->i_sb->s_blocksize; |
@@ -851,12 +852,13 @@ static inline int search_dirblock(struct buffer_head * bh, | |||
851 | * The returned buffer_head has ->b_count elevated. The caller is expected | 852 | * The returned buffer_head has ->b_count elevated. The caller is expected |
852 | * to brelse() it when appropriate. | 853 | * to brelse() it when appropriate. |
853 | */ | 854 | */ |
854 | static struct buffer_head * ext4_find_entry (struct dentry *dentry, | 855 | static struct buffer_head * ext4_find_entry (struct inode *dir, |
856 | const struct qstr *d_name, | ||
855 | struct ext4_dir_entry_2 ** res_dir) | 857 | struct ext4_dir_entry_2 ** res_dir) |
856 | { | 858 | { |
857 | struct super_block * sb; | 859 | struct super_block *sb; |
858 | struct buffer_head * bh_use[NAMEI_RA_SIZE]; | 860 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; |
859 | struct buffer_head * bh, *ret = NULL; | 861 | struct buffer_head *bh, *ret = NULL; |
860 | ext4_lblk_t start, block, b; | 862 | ext4_lblk_t start, block, b; |
861 | int ra_max = 0; /* Number of bh's in the readahead | 863 | int ra_max = 0; /* Number of bh's in the readahead |
862 | buffer, bh_use[] */ | 864 | buffer, bh_use[] */ |
@@ -865,16 +867,15 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, | |||
865 | int num = 0; | 867 | int num = 0; |
866 | ext4_lblk_t nblocks; | 868 | ext4_lblk_t nblocks; |
867 | int i, err; | 869 | int i, err; |
868 | struct inode *dir = dentry->d_parent->d_inode; | ||
869 | int namelen; | 870 | int namelen; |
870 | 871 | ||
871 | *res_dir = NULL; | 872 | *res_dir = NULL; |
872 | sb = dir->i_sb; | 873 | sb = dir->i_sb; |
873 | namelen = dentry->d_name.len; | 874 | namelen = d_name->len; |
874 | if (namelen > EXT4_NAME_LEN) | 875 | if (namelen > EXT4_NAME_LEN) |
875 | return NULL; | 876 | return NULL; |
876 | if (is_dx(dir)) { | 877 | if (is_dx(dir)) { |
877 | bh = ext4_dx_find_entry(dentry, res_dir, &err); | 878 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); |
878 | /* | 879 | /* |
879 | * On success, or if the error was file not found, | 880 | * On success, or if the error was file not found, |
880 | * return. Otherwise, fall back to doing a search the | 881 | * return. Otherwise, fall back to doing a search the |
@@ -882,7 +883,8 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, | |||
882 | */ | 883 | */ |
883 | if (bh || (err != ERR_BAD_DX_DIR)) | 884 | if (bh || (err != ERR_BAD_DX_DIR)) |
884 | return bh; | 885 | return bh; |
885 | dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); | 886 | dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " |
887 | "falling back\n")); | ||
886 | } | 888 | } |
887 | nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); | 889 | nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); |
888 | start = EXT4_I(dir)->i_dir_start_lookup; | 890 | start = EXT4_I(dir)->i_dir_start_lookup; |
@@ -926,7 +928,7 @@ restart: | |||
926 | brelse(bh); | 928 | brelse(bh); |
927 | goto next; | 929 | goto next; |
928 | } | 930 | } |
929 | i = search_dirblock(bh, dir, dentry, | 931 | i = search_dirblock(bh, dir, d_name, |
930 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); | 932 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); |
931 | if (i == 1) { | 933 | if (i == 1) { |
932 | EXT4_I(dir)->i_dir_start_lookup = block; | 934 | EXT4_I(dir)->i_dir_start_lookup = block; |
@@ -956,11 +958,11 @@ restart: | |||
956 | cleanup_and_exit: | 958 | cleanup_and_exit: |
957 | /* Clean up the read-ahead blocks */ | 959 | /* Clean up the read-ahead blocks */ |
958 | for (; ra_ptr < ra_max; ra_ptr++) | 960 | for (; ra_ptr < ra_max; ra_ptr++) |
959 | brelse (bh_use[ra_ptr]); | 961 | brelse(bh_use[ra_ptr]); |
960 | return ret; | 962 | return ret; |
961 | } | 963 | } |
962 | 964 | ||
963 | static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | 965 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, |
964 | struct ext4_dir_entry_2 **res_dir, int *err) | 966 | struct ext4_dir_entry_2 **res_dir, int *err) |
965 | { | 967 | { |
966 | struct super_block * sb; | 968 | struct super_block * sb; |
@@ -971,14 +973,13 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
971 | struct buffer_head *bh; | 973 | struct buffer_head *bh; |
972 | ext4_lblk_t block; | 974 | ext4_lblk_t block; |
973 | int retval; | 975 | int retval; |
974 | int namelen = dentry->d_name.len; | 976 | int namelen = d_name->len; |
975 | const u8 *name = dentry->d_name.name; | 977 | const u8 *name = d_name->name; |
976 | struct inode *dir = dentry->d_parent->d_inode; | ||
977 | 978 | ||
978 | sb = dir->i_sb; | 979 | sb = dir->i_sb; |
979 | /* NFS may look up ".." - look at dx_root directory block */ | 980 | /* NFS may look up ".." - look at dx_root directory block */ |
980 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ | 981 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ |
981 | if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) | 982 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) |
982 | return NULL; | 983 | return NULL; |
983 | } else { | 984 | } else { |
984 | frame = frames; | 985 | frame = frames; |
@@ -1010,7 +1011,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
1010 | return bh; | 1011 | return bh; |
1011 | } | 1012 | } |
1012 | } | 1013 | } |
1013 | brelse (bh); | 1014 | brelse(bh); |
1014 | /* Check to see if we should continue to search */ | 1015 | /* Check to see if we should continue to search */ |
1015 | retval = ext4_htree_next_block(dir, hash, frame, | 1016 | retval = ext4_htree_next_block(dir, hash, frame, |
1016 | frames, NULL); | 1017 | frames, NULL); |
@@ -1025,25 +1026,25 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
1025 | 1026 | ||
1026 | *err = -ENOENT; | 1027 | *err = -ENOENT; |
1027 | errout: | 1028 | errout: |
1028 | dxtrace(printk("%s not found\n", name)); | 1029 | dxtrace(printk(KERN_DEBUG "%s not found\n", name)); |
1029 | dx_release (frames); | 1030 | dx_release (frames); |
1030 | return NULL; | 1031 | return NULL; |
1031 | } | 1032 | } |
1032 | 1033 | ||
1033 | static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) | 1034 | static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) |
1034 | { | 1035 | { |
1035 | struct inode * inode; | 1036 | struct inode *inode; |
1036 | struct ext4_dir_entry_2 * de; | 1037 | struct ext4_dir_entry_2 *de; |
1037 | struct buffer_head * bh; | 1038 | struct buffer_head *bh; |
1038 | 1039 | ||
1039 | if (dentry->d_name.len > EXT4_NAME_LEN) | 1040 | if (dentry->d_name.len > EXT4_NAME_LEN) |
1040 | return ERR_PTR(-ENAMETOOLONG); | 1041 | return ERR_PTR(-ENAMETOOLONG); |
1041 | 1042 | ||
1042 | bh = ext4_find_entry(dentry, &de); | 1043 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
1043 | inode = NULL; | 1044 | inode = NULL; |
1044 | if (bh) { | 1045 | if (bh) { |
1045 | unsigned long ino = le32_to_cpu(de->inode); | 1046 | unsigned long ino = le32_to_cpu(de->inode); |
1046 | brelse (bh); | 1047 | brelse(bh); |
1047 | if (!ext4_valid_inum(dir->i_sb, ino)) { | 1048 | if (!ext4_valid_inum(dir->i_sb, ino)) { |
1048 | ext4_error(dir->i_sb, "ext4_lookup", | 1049 | ext4_error(dir->i_sb, "ext4_lookup", |
1049 | "bad inode number: %lu", ino); | 1050 | "bad inode number: %lu", ino); |
@@ -1062,15 +1063,14 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1062 | unsigned long ino; | 1063 | unsigned long ino; |
1063 | struct dentry *parent; | 1064 | struct dentry *parent; |
1064 | struct inode *inode; | 1065 | struct inode *inode; |
1065 | struct dentry dotdot; | 1066 | static const struct qstr dotdot = { |
1067 | .name = "..", | ||
1068 | .len = 2, | ||
1069 | }; | ||
1066 | struct ext4_dir_entry_2 * de; | 1070 | struct ext4_dir_entry_2 * de; |
1067 | struct buffer_head *bh; | 1071 | struct buffer_head *bh; |
1068 | 1072 | ||
1069 | dotdot.d_name.name = ".."; | 1073 | bh = ext4_find_entry(child->d_inode, &dotdot, &de); |
1070 | dotdot.d_name.len = 2; | ||
1071 | dotdot.d_parent = child; /* confusing, isn't it! */ | ||
1072 | |||
1073 | bh = ext4_find_entry(&dotdot, &de); | ||
1074 | inode = NULL; | 1074 | inode = NULL; |
1075 | if (!bh) | 1075 | if (!bh) |
1076 | return ERR_PTR(-ENOENT); | 1076 | return ERR_PTR(-ENOENT); |
@@ -1201,10 +1201,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1201 | 1201 | ||
1202 | /* create map in the end of data2 block */ | 1202 | /* create map in the end of data2 block */ |
1203 | map = (struct dx_map_entry *) (data2 + blocksize); | 1203 | map = (struct dx_map_entry *) (data2 + blocksize); |
1204 | count = dx_make_map ((struct ext4_dir_entry_2 *) data1, | 1204 | count = dx_make_map((struct ext4_dir_entry_2 *) data1, |
1205 | blocksize, hinfo, map); | 1205 | blocksize, hinfo, map); |
1206 | map -= count; | 1206 | map -= count; |
1207 | dx_sort_map (map, count); | 1207 | dx_sort_map(map, count); |
1208 | /* Split the existing block in the middle, size-wise */ | 1208 | /* Split the existing block in the middle, size-wise */ |
1209 | size = 0; | 1209 | size = 0; |
1210 | move = 0; | 1210 | move = 0; |
@@ -1225,7 +1225,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1225 | 1225 | ||
1226 | /* Fancy dance to stay within two buffers */ | 1226 | /* Fancy dance to stay within two buffers */ |
1227 | de2 = dx_move_dirents(data1, data2, map + split, count - split); | 1227 | de2 = dx_move_dirents(data1, data2, map + split, count - split); |
1228 | de = dx_pack_dirents(data1,blocksize); | 1228 | de = dx_pack_dirents(data1, blocksize); |
1229 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); | 1229 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); |
1230 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); | 1230 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); |
1231 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); | 1231 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); |
@@ -1237,15 +1237,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1237 | swap(*bh, bh2); | 1237 | swap(*bh, bh2); |
1238 | de = de2; | 1238 | de = de2; |
1239 | } | 1239 | } |
1240 | dx_insert_block (frame, hash2 + continued, newblock); | 1240 | dx_insert_block(frame, hash2 + continued, newblock); |
1241 | err = ext4_journal_dirty_metadata (handle, bh2); | 1241 | err = ext4_journal_dirty_metadata(handle, bh2); |
1242 | if (err) | 1242 | if (err) |
1243 | goto journal_error; | 1243 | goto journal_error; |
1244 | err = ext4_journal_dirty_metadata (handle, frame->bh); | 1244 | err = ext4_journal_dirty_metadata(handle, frame->bh); |
1245 | if (err) | 1245 | if (err) |
1246 | goto journal_error; | 1246 | goto journal_error; |
1247 | brelse (bh2); | 1247 | brelse(bh2); |
1248 | dxtrace(dx_show_index ("frame", frame->entries)); | 1248 | dxtrace(dx_show_index("frame", frame->entries)); |
1249 | return de; | 1249 | return de; |
1250 | 1250 | ||
1251 | journal_error: | 1251 | journal_error: |
@@ -1271,7 +1271,7 @@ errout: | |||
1271 | */ | 1271 | */ |
1272 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | 1272 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, |
1273 | struct inode *inode, struct ext4_dir_entry_2 *de, | 1273 | struct inode *inode, struct ext4_dir_entry_2 *de, |
1274 | struct buffer_head * bh) | 1274 | struct buffer_head *bh) |
1275 | { | 1275 | { |
1276 | struct inode *dir = dentry->d_parent->d_inode; | 1276 | struct inode *dir = dentry->d_parent->d_inode; |
1277 | const char *name = dentry->d_name.name; | 1277 | const char *name = dentry->d_name.name; |
@@ -1288,11 +1288,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1288 | while ((char *) de <= top) { | 1288 | while ((char *) de <= top) { |
1289 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, | 1289 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, |
1290 | bh, offset)) { | 1290 | bh, offset)) { |
1291 | brelse (bh); | 1291 | brelse(bh); |
1292 | return -EIO; | 1292 | return -EIO; |
1293 | } | 1293 | } |
1294 | if (ext4_match (namelen, name, de)) { | 1294 | if (ext4_match(namelen, name, de)) { |
1295 | brelse (bh); | 1295 | brelse(bh); |
1296 | return -EEXIST; | 1296 | return -EEXIST; |
1297 | } | 1297 | } |
1298 | nlen = EXT4_DIR_REC_LEN(de->name_len); | 1298 | nlen = EXT4_DIR_REC_LEN(de->name_len); |
@@ -1329,7 +1329,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1329 | } else | 1329 | } else |
1330 | de->inode = 0; | 1330 | de->inode = 0; |
1331 | de->name_len = namelen; | 1331 | de->name_len = namelen; |
1332 | memcpy (de->name, name, namelen); | 1332 | memcpy(de->name, name, namelen); |
1333 | /* | 1333 | /* |
1334 | * XXX shouldn't update any times until successful | 1334 | * XXX shouldn't update any times until successful |
1335 | * completion of syscall, but too many callers depend | 1335 | * completion of syscall, but too many callers depend |
@@ -1377,7 +1377,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1377 | struct fake_dirent *fde; | 1377 | struct fake_dirent *fde; |
1378 | 1378 | ||
1379 | blocksize = dir->i_sb->s_blocksize; | 1379 | blocksize = dir->i_sb->s_blocksize; |
1380 | dxtrace(printk("Creating index\n")); | 1380 | dxtrace(printk(KERN_DEBUG "Creating index\n")); |
1381 | retval = ext4_journal_get_write_access(handle, bh); | 1381 | retval = ext4_journal_get_write_access(handle, bh); |
1382 | if (retval) { | 1382 | if (retval) { |
1383 | ext4_std_error(dir->i_sb, retval); | 1383 | ext4_std_error(dir->i_sb, retval); |
@@ -1386,7 +1386,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1386 | } | 1386 | } |
1387 | root = (struct dx_root *) bh->b_data; | 1387 | root = (struct dx_root *) bh->b_data; |
1388 | 1388 | ||
1389 | bh2 = ext4_append (handle, dir, &block, &retval); | 1389 | bh2 = ext4_append(handle, dir, &block, &retval); |
1390 | if (!(bh2)) { | 1390 | if (!(bh2)) { |
1391 | brelse(bh); | 1391 | brelse(bh); |
1392 | return retval; | 1392 | return retval; |
@@ -1412,9 +1412,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1412 | root->info.info_length = sizeof(root->info); | 1412 | root->info.info_length = sizeof(root->info); |
1413 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 1413 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
1414 | entries = root->entries; | 1414 | entries = root->entries; |
1415 | dx_set_block (entries, 1); | 1415 | dx_set_block(entries, 1); |
1416 | dx_set_count (entries, 1); | 1416 | dx_set_count(entries, 1); |
1417 | dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); | 1417 | dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info))); |
1418 | 1418 | ||
1419 | /* Initialize as for dx_probe */ | 1419 | /* Initialize as for dx_probe */ |
1420 | hinfo.hash_version = root->info.hash_version; | 1420 | hinfo.hash_version = root->info.hash_version; |
@@ -1443,14 +1443,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1443 | * may not sleep between calling this and putting something into | 1443 | * may not sleep between calling this and putting something into |
1444 | * the entry, as someone else might have used it while you slept. | 1444 | * the entry, as someone else might have used it while you slept. |
1445 | */ | 1445 | */ |
1446 | static int ext4_add_entry (handle_t *handle, struct dentry *dentry, | 1446 | static int ext4_add_entry(handle_t *handle, struct dentry *dentry, |
1447 | struct inode *inode) | 1447 | struct inode *inode) |
1448 | { | 1448 | { |
1449 | struct inode *dir = dentry->d_parent->d_inode; | 1449 | struct inode *dir = dentry->d_parent->d_inode; |
1450 | unsigned long offset; | 1450 | unsigned long offset; |
1451 | struct buffer_head * bh; | 1451 | struct buffer_head *bh; |
1452 | struct ext4_dir_entry_2 *de; | 1452 | struct ext4_dir_entry_2 *de; |
1453 | struct super_block * sb; | 1453 | struct super_block *sb; |
1454 | int retval; | 1454 | int retval; |
1455 | int dx_fallback=0; | 1455 | int dx_fallback=0; |
1456 | unsigned blocksize; | 1456 | unsigned blocksize; |
@@ -1500,13 +1500,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1500 | struct dx_frame frames[2], *frame; | 1500 | struct dx_frame frames[2], *frame; |
1501 | struct dx_entry *entries, *at; | 1501 | struct dx_entry *entries, *at; |
1502 | struct dx_hash_info hinfo; | 1502 | struct dx_hash_info hinfo; |
1503 | struct buffer_head * bh; | 1503 | struct buffer_head *bh; |
1504 | struct inode *dir = dentry->d_parent->d_inode; | 1504 | struct inode *dir = dentry->d_parent->d_inode; |
1505 | struct super_block * sb = dir->i_sb; | 1505 | struct super_block *sb = dir->i_sb; |
1506 | struct ext4_dir_entry_2 *de; | 1506 | struct ext4_dir_entry_2 *de; |
1507 | int err; | 1507 | int err; |
1508 | 1508 | ||
1509 | frame = dx_probe(dentry, NULL, &hinfo, frames, &err); | 1509 | frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); |
1510 | if (!frame) | 1510 | if (!frame) |
1511 | return err; | 1511 | return err; |
1512 | entries = frame->entries; | 1512 | entries = frame->entries; |
@@ -1527,7 +1527,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1527 | } | 1527 | } |
1528 | 1528 | ||
1529 | /* Block full, should compress but for now just split */ | 1529 | /* Block full, should compress but for now just split */ |
1530 | dxtrace(printk("using %u of %u node entries\n", | 1530 | dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", |
1531 | dx_get_count(entries), dx_get_limit(entries))); | 1531 | dx_get_count(entries), dx_get_limit(entries))); |
1532 | /* Need to split index? */ | 1532 | /* Need to split index? */ |
1533 | if (dx_get_count(entries) == dx_get_limit(entries)) { | 1533 | if (dx_get_count(entries) == dx_get_limit(entries)) { |
@@ -1559,7 +1559,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1559 | if (levels) { | 1559 | if (levels) { |
1560 | unsigned icount1 = icount/2, icount2 = icount - icount1; | 1560 | unsigned icount1 = icount/2, icount2 = icount - icount1; |
1561 | unsigned hash2 = dx_get_hash(entries + icount1); | 1561 | unsigned hash2 = dx_get_hash(entries + icount1); |
1562 | dxtrace(printk("Split index %i/%i\n", icount1, icount2)); | 1562 | dxtrace(printk(KERN_DEBUG "Split index %i/%i\n", |
1563 | icount1, icount2)); | ||
1563 | 1564 | ||
1564 | BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ | 1565 | BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ |
1565 | err = ext4_journal_get_write_access(handle, | 1566 | err = ext4_journal_get_write_access(handle, |
@@ -1567,11 +1568,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1567 | if (err) | 1568 | if (err) |
1568 | goto journal_error; | 1569 | goto journal_error; |
1569 | 1570 | ||
1570 | memcpy ((char *) entries2, (char *) (entries + icount1), | 1571 | memcpy((char *) entries2, (char *) (entries + icount1), |
1571 | icount2 * sizeof(struct dx_entry)); | 1572 | icount2 * sizeof(struct dx_entry)); |
1572 | dx_set_count (entries, icount1); | 1573 | dx_set_count(entries, icount1); |
1573 | dx_set_count (entries2, icount2); | 1574 | dx_set_count(entries2, icount2); |
1574 | dx_set_limit (entries2, dx_node_limit(dir)); | 1575 | dx_set_limit(entries2, dx_node_limit(dir)); |
1575 | 1576 | ||
1576 | /* Which index block gets the new entry? */ | 1577 | /* Which index block gets the new entry? */ |
1577 | if (at - entries >= icount1) { | 1578 | if (at - entries >= icount1) { |
@@ -1579,16 +1580,17 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1579 | frame->entries = entries = entries2; | 1580 | frame->entries = entries = entries2; |
1580 | swap(frame->bh, bh2); | 1581 | swap(frame->bh, bh2); |
1581 | } | 1582 | } |
1582 | dx_insert_block (frames + 0, hash2, newblock); | 1583 | dx_insert_block(frames + 0, hash2, newblock); |
1583 | dxtrace(dx_show_index ("node", frames[1].entries)); | 1584 | dxtrace(dx_show_index("node", frames[1].entries)); |
1584 | dxtrace(dx_show_index ("node", | 1585 | dxtrace(dx_show_index("node", |
1585 | ((struct dx_node *) bh2->b_data)->entries)); | 1586 | ((struct dx_node *) bh2->b_data)->entries)); |
1586 | err = ext4_journal_dirty_metadata(handle, bh2); | 1587 | err = ext4_journal_dirty_metadata(handle, bh2); |
1587 | if (err) | 1588 | if (err) |
1588 | goto journal_error; | 1589 | goto journal_error; |
1589 | brelse (bh2); | 1590 | brelse (bh2); |
1590 | } else { | 1591 | } else { |
1591 | dxtrace(printk("Creating second level index...\n")); | 1592 | dxtrace(printk(KERN_DEBUG |
1593 | "Creating second level index...\n")); | ||
1592 | memcpy((char *) entries2, (char *) entries, | 1594 | memcpy((char *) entries2, (char *) entries, |
1593 | icount * sizeof(struct dx_entry)); | 1595 | icount * sizeof(struct dx_entry)); |
1594 | dx_set_limit(entries2, dx_node_limit(dir)); | 1596 | dx_set_limit(entries2, dx_node_limit(dir)); |
@@ -1630,12 +1632,12 @@ cleanup: | |||
1630 | * ext4_delete_entry deletes a directory entry by merging it with the | 1632 | * ext4_delete_entry deletes a directory entry by merging it with the |
1631 | * previous entry | 1633 | * previous entry |
1632 | */ | 1634 | */ |
1633 | static int ext4_delete_entry (handle_t *handle, | 1635 | static int ext4_delete_entry(handle_t *handle, |
1634 | struct inode * dir, | 1636 | struct inode *dir, |
1635 | struct ext4_dir_entry_2 * de_del, | 1637 | struct ext4_dir_entry_2 *de_del, |
1636 | struct buffer_head * bh) | 1638 | struct buffer_head *bh) |
1637 | { | 1639 | { |
1638 | struct ext4_dir_entry_2 * de, * pde; | 1640 | struct ext4_dir_entry_2 *de, *pde; |
1639 | int i; | 1641 | int i; |
1640 | 1642 | ||
1641 | i = 0; | 1643 | i = 0; |
@@ -1716,11 +1718,11 @@ static int ext4_add_nondir(handle_t *handle, | |||
1716 | * If the create succeeds, we fill in the inode information | 1718 | * If the create succeeds, we fill in the inode information |
1717 | * with d_instantiate(). | 1719 | * with d_instantiate(). |
1718 | */ | 1720 | */ |
1719 | static int ext4_create (struct inode * dir, struct dentry * dentry, int mode, | 1721 | static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, |
1720 | struct nameidata *nd) | 1722 | struct nameidata *nd) |
1721 | { | 1723 | { |
1722 | handle_t *handle; | 1724 | handle_t *handle; |
1723 | struct inode * inode; | 1725 | struct inode *inode; |
1724 | int err, retries = 0; | 1726 | int err, retries = 0; |
1725 | 1727 | ||
1726 | retry: | 1728 | retry: |
@@ -1747,8 +1749,8 @@ retry: | |||
1747 | return err; | 1749 | return err; |
1748 | } | 1750 | } |
1749 | 1751 | ||
1750 | static int ext4_mknod (struct inode * dir, struct dentry *dentry, | 1752 | static int ext4_mknod(struct inode *dir, struct dentry *dentry, |
1751 | int mode, dev_t rdev) | 1753 | int mode, dev_t rdev) |
1752 | { | 1754 | { |
1753 | handle_t *handle; | 1755 | handle_t *handle; |
1754 | struct inode *inode; | 1756 | struct inode *inode; |
@@ -1767,11 +1769,11 @@ retry: | |||
1767 | if (IS_DIRSYNC(dir)) | 1769 | if (IS_DIRSYNC(dir)) |
1768 | handle->h_sync = 1; | 1770 | handle->h_sync = 1; |
1769 | 1771 | ||
1770 | inode = ext4_new_inode (handle, dir, mode); | 1772 | inode = ext4_new_inode(handle, dir, mode); |
1771 | err = PTR_ERR(inode); | 1773 | err = PTR_ERR(inode); |
1772 | if (!IS_ERR(inode)) { | 1774 | if (!IS_ERR(inode)) { |
1773 | init_special_inode(inode, inode->i_mode, rdev); | 1775 | init_special_inode(inode, inode->i_mode, rdev); |
1774 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1776 | #ifdef CONFIG_EXT4_FS_XATTR |
1775 | inode->i_op = &ext4_special_inode_operations; | 1777 | inode->i_op = &ext4_special_inode_operations; |
1776 | #endif | 1778 | #endif |
1777 | err = ext4_add_nondir(handle, dentry, inode); | 1779 | err = ext4_add_nondir(handle, dentry, inode); |
@@ -1782,12 +1784,12 @@ retry: | |||
1782 | return err; | 1784 | return err; |
1783 | } | 1785 | } |
1784 | 1786 | ||
1785 | static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) | 1787 | static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
1786 | { | 1788 | { |
1787 | handle_t *handle; | 1789 | handle_t *handle; |
1788 | struct inode * inode; | 1790 | struct inode *inode; |
1789 | struct buffer_head * dir_block; | 1791 | struct buffer_head *dir_block; |
1790 | struct ext4_dir_entry_2 * de; | 1792 | struct ext4_dir_entry_2 *de; |
1791 | int err, retries = 0; | 1793 | int err, retries = 0; |
1792 | 1794 | ||
1793 | if (EXT4_DIR_LINK_MAX(dir)) | 1795 | if (EXT4_DIR_LINK_MAX(dir)) |
@@ -1803,7 +1805,7 @@ retry: | |||
1803 | if (IS_DIRSYNC(dir)) | 1805 | if (IS_DIRSYNC(dir)) |
1804 | handle->h_sync = 1; | 1806 | handle->h_sync = 1; |
1805 | 1807 | ||
1806 | inode = ext4_new_inode (handle, dir, S_IFDIR | mode); | 1808 | inode = ext4_new_inode(handle, dir, S_IFDIR | mode); |
1807 | err = PTR_ERR(inode); | 1809 | err = PTR_ERR(inode); |
1808 | if (IS_ERR(inode)) | 1810 | if (IS_ERR(inode)) |
1809 | goto out_stop; | 1811 | goto out_stop; |
@@ -1811,7 +1813,7 @@ retry: | |||
1811 | inode->i_op = &ext4_dir_inode_operations; | 1813 | inode->i_op = &ext4_dir_inode_operations; |
1812 | inode->i_fop = &ext4_dir_operations; | 1814 | inode->i_fop = &ext4_dir_operations; |
1813 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; | 1815 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; |
1814 | dir_block = ext4_bread (handle, inode, 0, 1, &err); | 1816 | dir_block = ext4_bread(handle, inode, 0, 1, &err); |
1815 | if (!dir_block) | 1817 | if (!dir_block) |
1816 | goto out_clear_inode; | 1818 | goto out_clear_inode; |
1817 | BUFFER_TRACE(dir_block, "get_write_access"); | 1819 | BUFFER_TRACE(dir_block, "get_write_access"); |
@@ -1820,26 +1822,26 @@ retry: | |||
1820 | de->inode = cpu_to_le32(inode->i_ino); | 1822 | de->inode = cpu_to_le32(inode->i_ino); |
1821 | de->name_len = 1; | 1823 | de->name_len = 1; |
1822 | de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); | 1824 | de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); |
1823 | strcpy (de->name, "."); | 1825 | strcpy(de->name, "."); |
1824 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1826 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1825 | de = ext4_next_entry(de); | 1827 | de = ext4_next_entry(de); |
1826 | de->inode = cpu_to_le32(dir->i_ino); | 1828 | de->inode = cpu_to_le32(dir->i_ino); |
1827 | de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - | 1829 | de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - |
1828 | EXT4_DIR_REC_LEN(1)); | 1830 | EXT4_DIR_REC_LEN(1)); |
1829 | de->name_len = 2; | 1831 | de->name_len = 2; |
1830 | strcpy (de->name, ".."); | 1832 | strcpy(de->name, ".."); |
1831 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1833 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1832 | inode->i_nlink = 2; | 1834 | inode->i_nlink = 2; |
1833 | BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); | 1835 | BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); |
1834 | ext4_journal_dirty_metadata(handle, dir_block); | 1836 | ext4_journal_dirty_metadata(handle, dir_block); |
1835 | brelse (dir_block); | 1837 | brelse(dir_block); |
1836 | ext4_mark_inode_dirty(handle, inode); | 1838 | ext4_mark_inode_dirty(handle, inode); |
1837 | err = ext4_add_entry (handle, dentry, inode); | 1839 | err = ext4_add_entry(handle, dentry, inode); |
1838 | if (err) { | 1840 | if (err) { |
1839 | out_clear_inode: | 1841 | out_clear_inode: |
1840 | clear_nlink(inode); | 1842 | clear_nlink(inode); |
1841 | ext4_mark_inode_dirty(handle, inode); | 1843 | ext4_mark_inode_dirty(handle, inode); |
1842 | iput (inode); | 1844 | iput(inode); |
1843 | goto out_stop; | 1845 | goto out_stop; |
1844 | } | 1846 | } |
1845 | ext4_inc_count(handle, dir); | 1847 | ext4_inc_count(handle, dir); |
@@ -1856,17 +1858,17 @@ out_stop: | |||
1856 | /* | 1858 | /* |
1857 | * routine to check that the specified directory is empty (for rmdir) | 1859 | * routine to check that the specified directory is empty (for rmdir) |
1858 | */ | 1860 | */ |
1859 | static int empty_dir (struct inode * inode) | 1861 | static int empty_dir(struct inode *inode) |
1860 | { | 1862 | { |
1861 | unsigned long offset; | 1863 | unsigned long offset; |
1862 | struct buffer_head * bh; | 1864 | struct buffer_head *bh; |
1863 | struct ext4_dir_entry_2 * de, * de1; | 1865 | struct ext4_dir_entry_2 *de, *de1; |
1864 | struct super_block * sb; | 1866 | struct super_block *sb; |
1865 | int err = 0; | 1867 | int err = 0; |
1866 | 1868 | ||
1867 | sb = inode->i_sb; | 1869 | sb = inode->i_sb; |
1868 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || | 1870 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || |
1869 | !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { | 1871 | !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { |
1870 | if (err) | 1872 | if (err) |
1871 | ext4_error(inode->i_sb, __func__, | 1873 | ext4_error(inode->i_sb, __func__, |
1872 | "error %d reading directory #%lu offset 0", | 1874 | "error %d reading directory #%lu offset 0", |
@@ -1881,23 +1883,23 @@ static int empty_dir (struct inode * inode) | |||
1881 | de1 = ext4_next_entry(de); | 1883 | de1 = ext4_next_entry(de); |
1882 | if (le32_to_cpu(de->inode) != inode->i_ino || | 1884 | if (le32_to_cpu(de->inode) != inode->i_ino || |
1883 | !le32_to_cpu(de1->inode) || | 1885 | !le32_to_cpu(de1->inode) || |
1884 | strcmp (".", de->name) || | 1886 | strcmp(".", de->name) || |
1885 | strcmp ("..", de1->name)) { | 1887 | strcmp("..", de1->name)) { |
1886 | ext4_warning (inode->i_sb, "empty_dir", | 1888 | ext4_warning(inode->i_sb, "empty_dir", |
1887 | "bad directory (dir #%lu) - no `.' or `..'", | 1889 | "bad directory (dir #%lu) - no `.' or `..'", |
1888 | inode->i_ino); | 1890 | inode->i_ino); |
1889 | brelse (bh); | 1891 | brelse(bh); |
1890 | return 1; | 1892 | return 1; |
1891 | } | 1893 | } |
1892 | offset = ext4_rec_len_from_disk(de->rec_len) + | 1894 | offset = ext4_rec_len_from_disk(de->rec_len) + |
1893 | ext4_rec_len_from_disk(de1->rec_len); | 1895 | ext4_rec_len_from_disk(de1->rec_len); |
1894 | de = ext4_next_entry(de1); | 1896 | de = ext4_next_entry(de1); |
1895 | while (offset < inode->i_size ) { | 1897 | while (offset < inode->i_size) { |
1896 | if (!bh || | 1898 | if (!bh || |
1897 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { | 1899 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { |
1898 | err = 0; | 1900 | err = 0; |
1899 | brelse (bh); | 1901 | brelse(bh); |
1900 | bh = ext4_bread (NULL, inode, | 1902 | bh = ext4_bread(NULL, inode, |
1901 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); | 1903 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); |
1902 | if (!bh) { | 1904 | if (!bh) { |
1903 | if (err) | 1905 | if (err) |
@@ -1917,13 +1919,13 @@ static int empty_dir (struct inode * inode) | |||
1917 | continue; | 1919 | continue; |
1918 | } | 1920 | } |
1919 | if (le32_to_cpu(de->inode)) { | 1921 | if (le32_to_cpu(de->inode)) { |
1920 | brelse (bh); | 1922 | brelse(bh); |
1921 | return 0; | 1923 | return 0; |
1922 | } | 1924 | } |
1923 | offset += ext4_rec_len_from_disk(de->rec_len); | 1925 | offset += ext4_rec_len_from_disk(de->rec_len); |
1924 | de = ext4_next_entry(de); | 1926 | de = ext4_next_entry(de); |
1925 | } | 1927 | } |
1926 | brelse (bh); | 1928 | brelse(bh); |
1927 | return 1; | 1929 | return 1; |
1928 | } | 1930 | } |
1929 | 1931 | ||
@@ -1954,8 +1956,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
1954 | * ->i_nlink. For, say it, character device. Not a regular file, | 1956 | * ->i_nlink. For, say it, character device. Not a regular file, |
1955 | * not a directory, not a symlink and ->i_nlink > 0. | 1957 | * not a directory, not a symlink and ->i_nlink > 0. |
1956 | */ | 1958 | */ |
1957 | J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 1959 | J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
1958 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); | 1960 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); |
1959 | 1961 | ||
1960 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); | 1962 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); |
1961 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); | 1963 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); |
@@ -2069,12 +2071,12 @@ out_brelse: | |||
2069 | goto out_err; | 2071 | goto out_err; |
2070 | } | 2072 | } |
2071 | 2073 | ||
2072 | static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | 2074 | static int ext4_rmdir(struct inode *dir, struct dentry *dentry) |
2073 | { | 2075 | { |
2074 | int retval; | 2076 | int retval; |
2075 | struct inode * inode; | 2077 | struct inode *inode; |
2076 | struct buffer_head * bh; | 2078 | struct buffer_head *bh; |
2077 | struct ext4_dir_entry_2 * de; | 2079 | struct ext4_dir_entry_2 *de; |
2078 | handle_t *handle; | 2080 | handle_t *handle; |
2079 | 2081 | ||
2080 | /* Initialize quotas before so that eventual writes go in | 2082 | /* Initialize quotas before so that eventual writes go in |
@@ -2085,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2085 | return PTR_ERR(handle); | 2087 | return PTR_ERR(handle); |
2086 | 2088 | ||
2087 | retval = -ENOENT; | 2089 | retval = -ENOENT; |
2088 | bh = ext4_find_entry (dentry, &de); | 2090 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
2089 | if (!bh) | 2091 | if (!bh) |
2090 | goto end_rmdir; | 2092 | goto end_rmdir; |
2091 | 2093 | ||
@@ -2099,16 +2101,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2099 | goto end_rmdir; | 2101 | goto end_rmdir; |
2100 | 2102 | ||
2101 | retval = -ENOTEMPTY; | 2103 | retval = -ENOTEMPTY; |
2102 | if (!empty_dir (inode)) | 2104 | if (!empty_dir(inode)) |
2103 | goto end_rmdir; | 2105 | goto end_rmdir; |
2104 | 2106 | ||
2105 | retval = ext4_delete_entry(handle, dir, de, bh); | 2107 | retval = ext4_delete_entry(handle, dir, de, bh); |
2106 | if (retval) | 2108 | if (retval) |
2107 | goto end_rmdir; | 2109 | goto end_rmdir; |
2108 | if (!EXT4_DIR_LINK_EMPTY(inode)) | 2110 | if (!EXT4_DIR_LINK_EMPTY(inode)) |
2109 | ext4_warning (inode->i_sb, "ext4_rmdir", | 2111 | ext4_warning(inode->i_sb, "ext4_rmdir", |
2110 | "empty directory has too many links (%d)", | 2112 | "empty directory has too many links (%d)", |
2111 | inode->i_nlink); | 2113 | inode->i_nlink); |
2112 | inode->i_version++; | 2114 | inode->i_version++; |
2113 | clear_nlink(inode); | 2115 | clear_nlink(inode); |
2114 | /* There's no need to set i_disksize: the fact that i_nlink is | 2116 | /* There's no need to set i_disksize: the fact that i_nlink is |
@@ -2124,16 +2126,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2124 | 2126 | ||
2125 | end_rmdir: | 2127 | end_rmdir: |
2126 | ext4_journal_stop(handle); | 2128 | ext4_journal_stop(handle); |
2127 | brelse (bh); | 2129 | brelse(bh); |
2128 | return retval; | 2130 | return retval; |
2129 | } | 2131 | } |
2130 | 2132 | ||
2131 | static int ext4_unlink(struct inode * dir, struct dentry *dentry) | 2133 | static int ext4_unlink(struct inode *dir, struct dentry *dentry) |
2132 | { | 2134 | { |
2133 | int retval; | 2135 | int retval; |
2134 | struct inode * inode; | 2136 | struct inode *inode; |
2135 | struct buffer_head * bh; | 2137 | struct buffer_head *bh; |
2136 | struct ext4_dir_entry_2 * de; | 2138 | struct ext4_dir_entry_2 *de; |
2137 | handle_t *handle; | 2139 | handle_t *handle; |
2138 | 2140 | ||
2139 | /* Initialize quotas before so that eventual writes go | 2141 | /* Initialize quotas before so that eventual writes go |
@@ -2147,7 +2149,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2147 | handle->h_sync = 1; | 2149 | handle->h_sync = 1; |
2148 | 2150 | ||
2149 | retval = -ENOENT; | 2151 | retval = -ENOENT; |
2150 | bh = ext4_find_entry (dentry, &de); | 2152 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
2151 | if (!bh) | 2153 | if (!bh) |
2152 | goto end_unlink; | 2154 | goto end_unlink; |
2153 | 2155 | ||
@@ -2158,9 +2160,9 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2158 | goto end_unlink; | 2160 | goto end_unlink; |
2159 | 2161 | ||
2160 | if (!inode->i_nlink) { | 2162 | if (!inode->i_nlink) { |
2161 | ext4_warning (inode->i_sb, "ext4_unlink", | 2163 | ext4_warning(inode->i_sb, "ext4_unlink", |
2162 | "Deleting nonexistent file (%lu), %d", | 2164 | "Deleting nonexistent file (%lu), %d", |
2163 | inode->i_ino, inode->i_nlink); | 2165 | inode->i_ino, inode->i_nlink); |
2164 | inode->i_nlink = 1; | 2166 | inode->i_nlink = 1; |
2165 | } | 2167 | } |
2166 | retval = ext4_delete_entry(handle, dir, de, bh); | 2168 | retval = ext4_delete_entry(handle, dir, de, bh); |
@@ -2178,15 +2180,15 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2178 | 2180 | ||
2179 | end_unlink: | 2181 | end_unlink: |
2180 | ext4_journal_stop(handle); | 2182 | ext4_journal_stop(handle); |
2181 | brelse (bh); | 2183 | brelse(bh); |
2182 | return retval; | 2184 | return retval; |
2183 | } | 2185 | } |
2184 | 2186 | ||
2185 | static int ext4_symlink (struct inode * dir, | 2187 | static int ext4_symlink(struct inode *dir, |
2186 | struct dentry *dentry, const char * symname) | 2188 | struct dentry *dentry, const char *symname) |
2187 | { | 2189 | { |
2188 | handle_t *handle; | 2190 | handle_t *handle; |
2189 | struct inode * inode; | 2191 | struct inode *inode; |
2190 | int l, err, retries = 0; | 2192 | int l, err, retries = 0; |
2191 | 2193 | ||
2192 | l = strlen(symname)+1; | 2194 | l = strlen(symname)+1; |
@@ -2203,12 +2205,12 @@ retry: | |||
2203 | if (IS_DIRSYNC(dir)) | 2205 | if (IS_DIRSYNC(dir)) |
2204 | handle->h_sync = 1; | 2206 | handle->h_sync = 1; |
2205 | 2207 | ||
2206 | inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); | 2208 | inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); |
2207 | err = PTR_ERR(inode); | 2209 | err = PTR_ERR(inode); |
2208 | if (IS_ERR(inode)) | 2210 | if (IS_ERR(inode)) |
2209 | goto out_stop; | 2211 | goto out_stop; |
2210 | 2212 | ||
2211 | if (l > sizeof (EXT4_I(inode)->i_data)) { | 2213 | if (l > sizeof(EXT4_I(inode)->i_data)) { |
2212 | inode->i_op = &ext4_symlink_inode_operations; | 2214 | inode->i_op = &ext4_symlink_inode_operations; |
2213 | ext4_set_aops(inode); | 2215 | ext4_set_aops(inode); |
2214 | /* | 2216 | /* |
@@ -2221,14 +2223,14 @@ retry: | |||
2221 | if (err) { | 2223 | if (err) { |
2222 | clear_nlink(inode); | 2224 | clear_nlink(inode); |
2223 | ext4_mark_inode_dirty(handle, inode); | 2225 | ext4_mark_inode_dirty(handle, inode); |
2224 | iput (inode); | 2226 | iput(inode); |
2225 | goto out_stop; | 2227 | goto out_stop; |
2226 | } | 2228 | } |
2227 | } else { | 2229 | } else { |
2228 | /* clear the extent format for fast symlink */ | 2230 | /* clear the extent format for fast symlink */ |
2229 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; | 2231 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; |
2230 | inode->i_op = &ext4_fast_symlink_inode_operations; | 2232 | inode->i_op = &ext4_fast_symlink_inode_operations; |
2231 | memcpy((char*)&EXT4_I(inode)->i_data,symname,l); | 2233 | memcpy((char *)&EXT4_I(inode)->i_data, symname, l); |
2232 | inode->i_size = l-1; | 2234 | inode->i_size = l-1; |
2233 | } | 2235 | } |
2234 | EXT4_I(inode)->i_disksize = inode->i_size; | 2236 | EXT4_I(inode)->i_disksize = inode->i_size; |
@@ -2240,8 +2242,8 @@ out_stop: | |||
2240 | return err; | 2242 | return err; |
2241 | } | 2243 | } |
2242 | 2244 | ||
2243 | static int ext4_link (struct dentry * old_dentry, | 2245 | static int ext4_link(struct dentry *old_dentry, |
2244 | struct inode * dir, struct dentry *dentry) | 2246 | struct inode *dir, struct dentry *dentry) |
2245 | { | 2247 | { |
2246 | handle_t *handle; | 2248 | handle_t *handle; |
2247 | struct inode *inode = old_dentry->d_inode; | 2249 | struct inode *inode = old_dentry->d_inode; |
@@ -2284,13 +2286,13 @@ retry: | |||
2284 | * Anybody can rename anything with this: the permission checks are left to the | 2286 | * Anybody can rename anything with this: the permission checks are left to the |
2285 | * higher-level routines. | 2287 | * higher-level routines. |
2286 | */ | 2288 | */ |
2287 | static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | 2289 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, |
2288 | struct inode * new_dir,struct dentry *new_dentry) | 2290 | struct inode *new_dir, struct dentry *new_dentry) |
2289 | { | 2291 | { |
2290 | handle_t *handle; | 2292 | handle_t *handle; |
2291 | struct inode * old_inode, * new_inode; | 2293 | struct inode *old_inode, *new_inode; |
2292 | struct buffer_head * old_bh, * new_bh, * dir_bh; | 2294 | struct buffer_head *old_bh, *new_bh, *dir_bh; |
2293 | struct ext4_dir_entry_2 * old_de, * new_de; | 2295 | struct ext4_dir_entry_2 *old_de, *new_de; |
2294 | int retval; | 2296 | int retval; |
2295 | 2297 | ||
2296 | old_bh = new_bh = dir_bh = NULL; | 2298 | old_bh = new_bh = dir_bh = NULL; |
@@ -2308,7 +2310,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2308 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) | 2310 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) |
2309 | handle->h_sync = 1; | 2311 | handle->h_sync = 1; |
2310 | 2312 | ||
2311 | old_bh = ext4_find_entry (old_dentry, &old_de); | 2313 | old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de); |
2312 | /* | 2314 | /* |
2313 | * Check for inode number is _not_ due to possible IO errors. | 2315 | * Check for inode number is _not_ due to possible IO errors. |
2314 | * We might rmdir the source, keep it as pwd of some process | 2316 | * We might rmdir the source, keep it as pwd of some process |
@@ -2321,32 +2323,32 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2321 | goto end_rename; | 2323 | goto end_rename; |
2322 | 2324 | ||
2323 | new_inode = new_dentry->d_inode; | 2325 | new_inode = new_dentry->d_inode; |
2324 | new_bh = ext4_find_entry (new_dentry, &new_de); | 2326 | new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de); |
2325 | if (new_bh) { | 2327 | if (new_bh) { |
2326 | if (!new_inode) { | 2328 | if (!new_inode) { |
2327 | brelse (new_bh); | 2329 | brelse(new_bh); |
2328 | new_bh = NULL; | 2330 | new_bh = NULL; |
2329 | } | 2331 | } |
2330 | } | 2332 | } |
2331 | if (S_ISDIR(old_inode->i_mode)) { | 2333 | if (S_ISDIR(old_inode->i_mode)) { |
2332 | if (new_inode) { | 2334 | if (new_inode) { |
2333 | retval = -ENOTEMPTY; | 2335 | retval = -ENOTEMPTY; |
2334 | if (!empty_dir (new_inode)) | 2336 | if (!empty_dir(new_inode)) |
2335 | goto end_rename; | 2337 | goto end_rename; |
2336 | } | 2338 | } |
2337 | retval = -EIO; | 2339 | retval = -EIO; |
2338 | dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval); | 2340 | dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); |
2339 | if (!dir_bh) | 2341 | if (!dir_bh) |
2340 | goto end_rename; | 2342 | goto end_rename; |
2341 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) | 2343 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) |
2342 | goto end_rename; | 2344 | goto end_rename; |
2343 | retval = -EMLINK; | 2345 | retval = -EMLINK; |
2344 | if (!new_inode && new_dir!=old_dir && | 2346 | if (!new_inode && new_dir != old_dir && |
2345 | new_dir->i_nlink >= EXT4_LINK_MAX) | 2347 | new_dir->i_nlink >= EXT4_LINK_MAX) |
2346 | goto end_rename; | 2348 | goto end_rename; |
2347 | } | 2349 | } |
2348 | if (!new_bh) { | 2350 | if (!new_bh) { |
2349 | retval = ext4_add_entry (handle, new_dentry, old_inode); | 2351 | retval = ext4_add_entry(handle, new_dentry, old_inode); |
2350 | if (retval) | 2352 | if (retval) |
2351 | goto end_rename; | 2353 | goto end_rename; |
2352 | } else { | 2354 | } else { |
@@ -2388,7 +2390,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2388 | struct buffer_head *old_bh2; | 2390 | struct buffer_head *old_bh2; |
2389 | struct ext4_dir_entry_2 *old_de2; | 2391 | struct ext4_dir_entry_2 *old_de2; |
2390 | 2392 | ||
2391 | old_bh2 = ext4_find_entry(old_dentry, &old_de2); | 2393 | old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2); |
2392 | if (old_bh2) { | 2394 | if (old_bh2) { |
2393 | retval = ext4_delete_entry(handle, old_dir, | 2395 | retval = ext4_delete_entry(handle, old_dir, |
2394 | old_de2, old_bh2); | 2396 | old_de2, old_bh2); |
@@ -2433,9 +2435,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2433 | retval = 0; | 2435 | retval = 0; |
2434 | 2436 | ||
2435 | end_rename: | 2437 | end_rename: |
2436 | brelse (dir_bh); | 2438 | brelse(dir_bh); |
2437 | brelse (old_bh); | 2439 | brelse(old_bh); |
2438 | brelse (new_bh); | 2440 | brelse(new_bh); |
2439 | ext4_journal_stop(handle); | 2441 | ext4_journal_stop(handle); |
2440 | return retval; | 2442 | return retval; |
2441 | } | 2443 | } |
@@ -2454,7 +2456,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2454 | .mknod = ext4_mknod, | 2456 | .mknod = ext4_mknod, |
2455 | .rename = ext4_rename, | 2457 | .rename = ext4_rename, |
2456 | .setattr = ext4_setattr, | 2458 | .setattr = ext4_setattr, |
2457 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2459 | #ifdef CONFIG_EXT4_FS_XATTR |
2458 | .setxattr = generic_setxattr, | 2460 | .setxattr = generic_setxattr, |
2459 | .getxattr = generic_getxattr, | 2461 | .getxattr = generic_getxattr, |
2460 | .listxattr = ext4_listxattr, | 2462 | .listxattr = ext4_listxattr, |
@@ -2465,7 +2467,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2465 | 2467 | ||
2466 | const struct inode_operations ext4_special_inode_operations = { | 2468 | const struct inode_operations ext4_special_inode_operations = { |
2467 | .setattr = ext4_setattr, | 2469 | .setattr = ext4_setattr, |
2468 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2470 | #ifdef CONFIG_EXT4_FS_XATTR |
2469 | .setxattr = generic_setxattr, | 2471 | .setxattr = generic_setxattr, |
2470 | .getxattr = generic_getxattr, | 2472 | .getxattr = generic_getxattr, |
2471 | .listxattr = ext4_listxattr, | 2473 | .listxattr = ext4_listxattr, |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b3d35604ea18..b6ec1843a015 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -416,8 +416,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
416 | "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", | 416 | "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", |
417 | gdb_num); | 417 | gdb_num); |
418 | 418 | ||
419 | /* | 419 | /* |
420 | * If we are not using the primary superblock/GDT copy don't resize, | 420 | * If we are not using the primary superblock/GDT copy don't resize, |
421 | * because the user tools have no way of handling this. Probably a | 421 | * because the user tools have no way of handling this. Probably a |
422 | * bad time to do it anyways. | 422 | * bad time to do it anyways. |
423 | */ | 423 | */ |
@@ -870,11 +870,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
870 | * We can allocate memory for mb_alloc based on the new group | 870 | * We can allocate memory for mb_alloc based on the new group |
871 | * descriptor | 871 | * descriptor |
872 | */ | 872 | */ |
873 | if (test_opt(sb, MBALLOC)) { | 873 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); |
874 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); | 874 | if (err) |
875 | if (err) | 875 | goto exit_journal; |
876 | goto exit_journal; | 876 | |
877 | } | ||
878 | /* | 877 | /* |
879 | * Make the new blocks and inodes valid next. We do this before | 878 | * Make the new blocks and inodes valid next. We do this before |
880 | * increasing the group count so that once the group is enabled, | 879 | * increasing the group count so that once the group is enabled, |
@@ -929,6 +928,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
929 | percpu_counter_add(&sbi->s_freeinodes_counter, | 928 | percpu_counter_add(&sbi->s_freeinodes_counter, |
930 | EXT4_INODES_PER_GROUP(sb)); | 929 | EXT4_INODES_PER_GROUP(sb)); |
931 | 930 | ||
931 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { | ||
932 | ext4_group_t flex_group; | ||
933 | flex_group = ext4_flex_group(sbi, input->group); | ||
934 | sbi->s_flex_groups[flex_group].free_blocks += | ||
935 | input->free_blocks_count; | ||
936 | sbi->s_flex_groups[flex_group].free_inodes += | ||
937 | EXT4_INODES_PER_GROUP(sb); | ||
938 | } | ||
939 | |||
932 | ext4_journal_dirty_metadata(handle, sbi->s_sbh); | 940 | ext4_journal_dirty_metadata(handle, sbi->s_sbh); |
933 | sb->s_dirt = 1; | 941 | sb->s_dirt = 1; |
934 | 942 | ||
@@ -964,7 +972,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
964 | ext4_group_t o_groups_count; | 972 | ext4_group_t o_groups_count; |
965 | ext4_grpblk_t last; | 973 | ext4_grpblk_t last; |
966 | ext4_grpblk_t add; | 974 | ext4_grpblk_t add; |
967 | struct buffer_head * bh; | 975 | struct buffer_head *bh; |
968 | handle_t *handle; | 976 | handle_t *handle; |
969 | int err; | 977 | int err; |
970 | unsigned long freed_blocks; | 978 | unsigned long freed_blocks; |
@@ -1077,8 +1085,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1077 | /* | 1085 | /* |
1078 | * Mark mballoc pages as not up to date so that they will be updated | 1086 | * Mark mballoc pages as not up to date so that they will be updated |
1079 | * next time they are loaded by ext4_mb_load_buddy. | 1087 | * next time they are loaded by ext4_mb_load_buddy. |
1088 | * | ||
1089 | * XXX Bad, Bad, BAD!!! We should not be overloading the | ||
1090 | * Uptodate flag, particularly on thte bitmap bh, as way of | ||
1091 | * hinting to ext4_mb_load_buddy() that it needs to be | ||
1092 | * overloaded. A user could take a LVM snapshot, then do an | ||
1093 | * on-line fsck, and clear the uptodate flag, and this would | ||
1094 | * not be a bug in userspace, but a bug in the kernel. FIXME!!! | ||
1080 | */ | 1095 | */ |
1081 | if (test_opt(sb, MBALLOC)) { | 1096 | { |
1082 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1097 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1083 | struct inode *inode = sbi->s_buddy_cache; | 1098 | struct inode *inode = sbi->s_buddy_cache; |
1084 | int blocks_per_page; | 1099 | int blocks_per_page; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 566344b926b7..fb940c22ab0d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include <linux/namei.h> | 34 | #include <linux/namei.h> |
35 | #include <linux/quotaops.h> | 35 | #include <linux/quotaops.h> |
36 | #include <linux/seq_file.h> | 36 | #include <linux/seq_file.h> |
37 | #include <linux/proc_fs.h> | ||
38 | #include <linux/marker.h> | ||
37 | #include <linux/log2.h> | 39 | #include <linux/log2.h> |
38 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
39 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
@@ -45,6 +47,8 @@ | |||
45 | #include "namei.h" | 47 | #include "namei.h" |
46 | #include "group.h" | 48 | #include "group.h" |
47 | 49 | ||
50 | struct proc_dir_entry *ext4_proc_root; | ||
51 | |||
48 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 52 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
49 | unsigned long journal_devnum); | 53 | unsigned long journal_devnum); |
50 | static int ext4_create_journal(struct super_block *, struct ext4_super_block *, | 54 | static int ext4_create_journal(struct super_block *, struct ext4_super_block *, |
@@ -503,15 +507,18 @@ static void ext4_put_super(struct super_block *sb) | |||
503 | ext4_mb_release(sb); | 507 | ext4_mb_release(sb); |
504 | ext4_ext_release(sb); | 508 | ext4_ext_release(sb); |
505 | ext4_xattr_put_super(sb); | 509 | ext4_xattr_put_super(sb); |
506 | jbd2_journal_destroy(sbi->s_journal); | 510 | if (jbd2_journal_destroy(sbi->s_journal) < 0) |
511 | ext4_abort(sb, __func__, "Couldn't clean up the journal"); | ||
507 | sbi->s_journal = NULL; | 512 | sbi->s_journal = NULL; |
508 | if (!(sb->s_flags & MS_RDONLY)) { | 513 | if (!(sb->s_flags & MS_RDONLY)) { |
509 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 514 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
510 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 515 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
511 | BUFFER_TRACE(sbi->s_sbh, "marking dirty"); | ||
512 | mark_buffer_dirty(sbi->s_sbh); | ||
513 | ext4_commit_super(sb, es, 1); | 516 | ext4_commit_super(sb, es, 1); |
514 | } | 517 | } |
518 | if (sbi->s_proc) { | ||
519 | remove_proc_entry("inode_readahead_blks", sbi->s_proc); | ||
520 | remove_proc_entry(sb->s_id, ext4_proc_root); | ||
521 | } | ||
515 | 522 | ||
516 | for (i = 0; i < sbi->s_gdb_count; i++) | 523 | for (i = 0; i < sbi->s_gdb_count; i++) |
517 | brelse(sbi->s_group_desc[i]); | 524 | brelse(sbi->s_group_desc[i]); |
@@ -520,6 +527,7 @@ static void ext4_put_super(struct super_block *sb) | |||
520 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 527 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
521 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 528 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
522 | percpu_counter_destroy(&sbi->s_dirs_counter); | 529 | percpu_counter_destroy(&sbi->s_dirs_counter); |
530 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
523 | brelse(sbi->s_sbh); | 531 | brelse(sbi->s_sbh); |
524 | #ifdef CONFIG_QUOTA | 532 | #ifdef CONFIG_QUOTA |
525 | for (i = 0; i < MAXQUOTAS; i++) | 533 | for (i = 0; i < MAXQUOTAS; i++) |
@@ -562,11 +570,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
562 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); | 570 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); |
563 | if (!ei) | 571 | if (!ei) |
564 | return NULL; | 572 | return NULL; |
565 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 573 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
566 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 574 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
567 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 575 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
568 | #endif | 576 | #endif |
569 | ei->i_block_alloc_info = NULL; | ||
570 | ei->vfs_inode.i_version = 1; | 577 | ei->vfs_inode.i_version = 1; |
571 | ei->vfs_inode.i_data.writeback_index = 0; | 578 | ei->vfs_inode.i_data.writeback_index = 0; |
572 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 579 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
@@ -599,7 +606,7 @@ static void init_once(void *foo) | |||
599 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; | 606 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; |
600 | 607 | ||
601 | INIT_LIST_HEAD(&ei->i_orphan); | 608 | INIT_LIST_HEAD(&ei->i_orphan); |
602 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 609 | #ifdef CONFIG_EXT4_FS_XATTR |
603 | init_rwsem(&ei->xattr_sem); | 610 | init_rwsem(&ei->xattr_sem); |
604 | #endif | 611 | #endif |
605 | init_rwsem(&ei->i_data_sem); | 612 | init_rwsem(&ei->i_data_sem); |
@@ -625,8 +632,7 @@ static void destroy_inodecache(void) | |||
625 | 632 | ||
626 | static void ext4_clear_inode(struct inode *inode) | 633 | static void ext4_clear_inode(struct inode *inode) |
627 | { | 634 | { |
628 | struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; | 635 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
629 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | ||
630 | if (EXT4_I(inode)->i_acl && | 636 | if (EXT4_I(inode)->i_acl && |
631 | EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { | 637 | EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { |
632 | posix_acl_release(EXT4_I(inode)->i_acl); | 638 | posix_acl_release(EXT4_I(inode)->i_acl); |
@@ -638,10 +644,7 @@ static void ext4_clear_inode(struct inode *inode) | |||
638 | EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; | 644 | EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; |
639 | } | 645 | } |
640 | #endif | 646 | #endif |
641 | ext4_discard_reservation(inode); | 647 | ext4_discard_preallocations(inode); |
642 | EXT4_I(inode)->i_block_alloc_info = NULL; | ||
643 | if (unlikely(rsv)) | ||
644 | kfree(rsv); | ||
645 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, | 648 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, |
646 | &EXT4_I(inode)->jinode); | 649 | &EXT4_I(inode)->jinode); |
647 | } | 650 | } |
@@ -654,7 +657,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, | |||
654 | 657 | ||
655 | if (sbi->s_jquota_fmt) | 658 | if (sbi->s_jquota_fmt) |
656 | seq_printf(seq, ",jqfmt=%s", | 659 | seq_printf(seq, ",jqfmt=%s", |
657 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); | 660 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); |
658 | 661 | ||
659 | if (sbi->s_qf_names[USRQUOTA]) | 662 | if (sbi->s_qf_names[USRQUOTA]) |
660 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | 663 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); |
@@ -718,7 +721,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
718 | seq_puts(seq, ",debug"); | 721 | seq_puts(seq, ",debug"); |
719 | if (test_opt(sb, OLDALLOC)) | 722 | if (test_opt(sb, OLDALLOC)) |
720 | seq_puts(seq, ",oldalloc"); | 723 | seq_puts(seq, ",oldalloc"); |
721 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 724 | #ifdef CONFIG_EXT4_FS_XATTR |
722 | if (test_opt(sb, XATTR_USER) && | 725 | if (test_opt(sb, XATTR_USER) && |
723 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) | 726 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) |
724 | seq_puts(seq, ",user_xattr"); | 727 | seq_puts(seq, ",user_xattr"); |
@@ -727,7 +730,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
727 | seq_puts(seq, ",nouser_xattr"); | 730 | seq_puts(seq, ",nouser_xattr"); |
728 | } | 731 | } |
729 | #endif | 732 | #endif |
730 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 733 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
731 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) | 734 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) |
732 | seq_puts(seq, ",acl"); | 735 | seq_puts(seq, ",acl"); |
733 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) | 736 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) |
@@ -752,8 +755,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
752 | seq_puts(seq, ",nobh"); | 755 | seq_puts(seq, ",nobh"); |
753 | if (!test_opt(sb, EXTENTS)) | 756 | if (!test_opt(sb, EXTENTS)) |
754 | seq_puts(seq, ",noextents"); | 757 | seq_puts(seq, ",noextents"); |
755 | if (!test_opt(sb, MBALLOC)) | ||
756 | seq_puts(seq, ",nomballoc"); | ||
757 | if (test_opt(sb, I_VERSION)) | 758 | if (test_opt(sb, I_VERSION)) |
758 | seq_puts(seq, ",i_version"); | 759 | seq_puts(seq, ",i_version"); |
759 | if (!test_opt(sb, DELALLOC)) | 760 | if (!test_opt(sb, DELALLOC)) |
@@ -773,6 +774,13 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
773 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 774 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
774 | seq_puts(seq, ",data=writeback"); | 775 | seq_puts(seq, ",data=writeback"); |
775 | 776 | ||
777 | if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) | ||
778 | seq_printf(seq, ",inode_readahead_blks=%u", | ||
779 | sbi->s_inode_readahead_blks); | ||
780 | |||
781 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
782 | seq_puts(seq, ",data_err=abort"); | ||
783 | |||
776 | ext4_show_quota_options(seq, sb); | 784 | ext4_show_quota_options(seq, sb); |
777 | return 0; | 785 | return 0; |
778 | } | 786 | } |
@@ -822,7 +830,7 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
822 | } | 830 | } |
823 | 831 | ||
824 | #ifdef CONFIG_QUOTA | 832 | #ifdef CONFIG_QUOTA |
825 | #define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group") | 833 | #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") |
826 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) | 834 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) |
827 | 835 | ||
828 | static int ext4_dquot_initialize(struct inode *inode, int type); | 836 | static int ext4_dquot_initialize(struct inode *inode, int type); |
@@ -902,11 +910,13 @@ enum { | |||
902 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | 910 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, |
903 | Opt_journal_checksum, Opt_journal_async_commit, | 911 | Opt_journal_checksum, Opt_journal_async_commit, |
904 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 912 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
913 | Opt_data_err_abort, Opt_data_err_ignore, | ||
905 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 914 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
906 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 915 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
907 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 916 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
908 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, | 917 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, |
909 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 918 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
919 | Opt_inode_readahead_blks | ||
910 | }; | 920 | }; |
911 | 921 | ||
912 | static match_table_t tokens = { | 922 | static match_table_t tokens = { |
@@ -947,6 +957,8 @@ static match_table_t tokens = { | |||
947 | {Opt_data_journal, "data=journal"}, | 957 | {Opt_data_journal, "data=journal"}, |
948 | {Opt_data_ordered, "data=ordered"}, | 958 | {Opt_data_ordered, "data=ordered"}, |
949 | {Opt_data_writeback, "data=writeback"}, | 959 | {Opt_data_writeback, "data=writeback"}, |
960 | {Opt_data_err_abort, "data_err=abort"}, | ||
961 | {Opt_data_err_ignore, "data_err=ignore"}, | ||
950 | {Opt_offusrjquota, "usrjquota="}, | 962 | {Opt_offusrjquota, "usrjquota="}, |
951 | {Opt_usrjquota, "usrjquota=%s"}, | 963 | {Opt_usrjquota, "usrjquota=%s"}, |
952 | {Opt_offgrpjquota, "grpjquota="}, | 964 | {Opt_offgrpjquota, "grpjquota="}, |
@@ -967,6 +979,7 @@ static match_table_t tokens = { | |||
967 | {Opt_resize, "resize"}, | 979 | {Opt_resize, "resize"}, |
968 | {Opt_delalloc, "delalloc"}, | 980 | {Opt_delalloc, "delalloc"}, |
969 | {Opt_nodelalloc, "nodelalloc"}, | 981 | {Opt_nodelalloc, "nodelalloc"}, |
982 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, | ||
970 | {Opt_err, NULL}, | 983 | {Opt_err, NULL}, |
971 | }; | 984 | }; |
972 | 985 | ||
@@ -981,7 +994,7 @@ static ext4_fsblk_t get_sb_block(void **data) | |||
981 | /*todo: use simple_strtoll with >32bit ext4 */ | 994 | /*todo: use simple_strtoll with >32bit ext4 */ |
982 | sb_block = simple_strtoul(options, &options, 0); | 995 | sb_block = simple_strtoul(options, &options, 0); |
983 | if (*options && *options != ',') { | 996 | if (*options && *options != ',') { |
984 | printk("EXT4-fs: Invalid sb specification: %s\n", | 997 | printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", |
985 | (char *) *data); | 998 | (char *) *data); |
986 | return 1; | 999 | return 1; |
987 | } | 1000 | } |
@@ -1072,7 +1085,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1072 | case Opt_orlov: | 1085 | case Opt_orlov: |
1073 | clear_opt(sbi->s_mount_opt, OLDALLOC); | 1086 | clear_opt(sbi->s_mount_opt, OLDALLOC); |
1074 | break; | 1087 | break; |
1075 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1088 | #ifdef CONFIG_EXT4_FS_XATTR |
1076 | case Opt_user_xattr: | 1089 | case Opt_user_xattr: |
1077 | set_opt(sbi->s_mount_opt, XATTR_USER); | 1090 | set_opt(sbi->s_mount_opt, XATTR_USER); |
1078 | break; | 1091 | break; |
@@ -1082,10 +1095,11 @@ static int parse_options(char *options, struct super_block *sb, | |||
1082 | #else | 1095 | #else |
1083 | case Opt_user_xattr: | 1096 | case Opt_user_xattr: |
1084 | case Opt_nouser_xattr: | 1097 | case Opt_nouser_xattr: |
1085 | printk("EXT4 (no)user_xattr options not supported\n"); | 1098 | printk(KERN_ERR "EXT4 (no)user_xattr options " |
1099 | "not supported\n"); | ||
1086 | break; | 1100 | break; |
1087 | #endif | 1101 | #endif |
1088 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 1102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1089 | case Opt_acl: | 1103 | case Opt_acl: |
1090 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 1104 | set_opt(sbi->s_mount_opt, POSIX_ACL); |
1091 | break; | 1105 | break; |
@@ -1095,7 +1109,8 @@ static int parse_options(char *options, struct super_block *sb, | |||
1095 | #else | 1109 | #else |
1096 | case Opt_acl: | 1110 | case Opt_acl: |
1097 | case Opt_noacl: | 1111 | case Opt_noacl: |
1098 | printk("EXT4 (no)acl options not supported\n"); | 1112 | printk(KERN_ERR "EXT4 (no)acl options " |
1113 | "not supported\n"); | ||
1099 | break; | 1114 | break; |
1100 | #endif | 1115 | #endif |
1101 | case Opt_reservation: | 1116 | case Opt_reservation: |
@@ -1178,6 +1193,12 @@ static int parse_options(char *options, struct super_block *sb, | |||
1178 | sbi->s_mount_opt |= data_opt; | 1193 | sbi->s_mount_opt |= data_opt; |
1179 | } | 1194 | } |
1180 | break; | 1195 | break; |
1196 | case Opt_data_err_abort: | ||
1197 | set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
1198 | break; | ||
1199 | case Opt_data_err_ignore: | ||
1200 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
1201 | break; | ||
1181 | #ifdef CONFIG_QUOTA | 1202 | #ifdef CONFIG_QUOTA |
1182 | case Opt_usrjquota: | 1203 | case Opt_usrjquota: |
1183 | qtype = USRQUOTA; | 1204 | qtype = USRQUOTA; |
@@ -1189,8 +1210,8 @@ set_qf_name: | |||
1189 | sb_any_quota_suspended(sb)) && | 1210 | sb_any_quota_suspended(sb)) && |
1190 | !sbi->s_qf_names[qtype]) { | 1211 | !sbi->s_qf_names[qtype]) { |
1191 | printk(KERN_ERR | 1212 | printk(KERN_ERR |
1192 | "EXT4-fs: Cannot change journaled " | 1213 | "EXT4-fs: Cannot change journaled " |
1193 | "quota options when quota turned on.\n"); | 1214 | "quota options when quota turned on.\n"); |
1194 | return 0; | 1215 | return 0; |
1195 | } | 1216 | } |
1196 | qname = match_strdup(&args[0]); | 1217 | qname = match_strdup(&args[0]); |
@@ -1357,12 +1378,6 @@ set_qf_format: | |||
1357 | case Opt_nodelalloc: | 1378 | case Opt_nodelalloc: |
1358 | clear_opt(sbi->s_mount_opt, DELALLOC); | 1379 | clear_opt(sbi->s_mount_opt, DELALLOC); |
1359 | break; | 1380 | break; |
1360 | case Opt_mballoc: | ||
1361 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
1362 | break; | ||
1363 | case Opt_nomballoc: | ||
1364 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
1365 | break; | ||
1366 | case Opt_stripe: | 1381 | case Opt_stripe: |
1367 | if (match_int(&args[0], &option)) | 1382 | if (match_int(&args[0], &option)) |
1368 | return 0; | 1383 | return 0; |
@@ -1373,6 +1388,13 @@ set_qf_format: | |||
1373 | case Opt_delalloc: | 1388 | case Opt_delalloc: |
1374 | set_opt(sbi->s_mount_opt, DELALLOC); | 1389 | set_opt(sbi->s_mount_opt, DELALLOC); |
1375 | break; | 1390 | break; |
1391 | case Opt_inode_readahead_blks: | ||
1392 | if (match_int(&args[0], &option)) | ||
1393 | return 0; | ||
1394 | if (option < 0 || option > (1 << 30)) | ||
1395 | return 0; | ||
1396 | sbi->s_inode_readahead_blks = option; | ||
1397 | break; | ||
1376 | default: | 1398 | default: |
1377 | printk(KERN_ERR | 1399 | printk(KERN_ERR |
1378 | "EXT4-fs: Unrecognized mount option \"%s\" " | 1400 | "EXT4-fs: Unrecognized mount option \"%s\" " |
@@ -1473,15 +1495,9 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1473 | EXT4_INODES_PER_GROUP(sb), | 1495 | EXT4_INODES_PER_GROUP(sb), |
1474 | sbi->s_mount_opt); | 1496 | sbi->s_mount_opt); |
1475 | 1497 | ||
1476 | printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); | 1498 | printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", |
1477 | if (EXT4_SB(sb)->s_journal->j_inode == NULL) { | 1499 | sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : |
1478 | char b[BDEVNAME_SIZE]; | 1500 | "external", EXT4_SB(sb)->s_journal->j_devname); |
1479 | |||
1480 | printk("external journal on %s\n", | ||
1481 | bdevname(EXT4_SB(sb)->s_journal->j_dev, b)); | ||
1482 | } else { | ||
1483 | printk("internal journal\n"); | ||
1484 | } | ||
1485 | return res; | 1501 | return res; |
1486 | } | 1502 | } |
1487 | 1503 | ||
@@ -1504,8 +1520,11 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1504 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1520 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
1505 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | 1521 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; |
1506 | 1522 | ||
1507 | flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / | 1523 | /* We allocate both existing and potentially added groups */ |
1508 | groups_per_flex; | 1524 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + |
1525 | ((sbi->s_es->s_reserved_gdt_blocks +1 ) << | ||
1526 | EXT4_DESC_PER_BLOCK_BITS(sb))) / | ||
1527 | groups_per_flex; | ||
1509 | sbi->s_flex_groups = kzalloc(flex_group_count * | 1528 | sbi->s_flex_groups = kzalloc(flex_group_count * |
1510 | sizeof(struct flex_groups), GFP_KERNEL); | 1529 | sizeof(struct flex_groups), GFP_KERNEL); |
1511 | if (sbi->s_flex_groups == NULL) { | 1530 | if (sbi->s_flex_groups == NULL) { |
@@ -1584,7 +1603,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1584 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 1603 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
1585 | flexbg_flag = 1; | 1604 | flexbg_flag = 1; |
1586 | 1605 | ||
1587 | ext4_debug ("Checking group descriptors"); | 1606 | ext4_debug("Checking group descriptors"); |
1588 | 1607 | ||
1589 | for (i = 0; i < sbi->s_groups_count; i++) { | 1608 | for (i = 0; i < sbi->s_groups_count; i++) { |
1590 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); | 1609 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
@@ -1623,8 +1642,10 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1623 | "Checksum for group %lu failed (%u!=%u)\n", | 1642 | "Checksum for group %lu failed (%u!=%u)\n", |
1624 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, | 1643 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, |
1625 | gdp)), le16_to_cpu(gdp->bg_checksum)); | 1644 | gdp)), le16_to_cpu(gdp->bg_checksum)); |
1626 | if (!(sb->s_flags & MS_RDONLY)) | 1645 | if (!(sb->s_flags & MS_RDONLY)) { |
1646 | spin_unlock(sb_bgl_lock(sbi, i)); | ||
1627 | return 0; | 1647 | return 0; |
1648 | } | ||
1628 | } | 1649 | } |
1629 | spin_unlock(sb_bgl_lock(sbi, i)); | 1650 | spin_unlock(sb_bgl_lock(sbi, i)); |
1630 | if (!flexbg_flag) | 1651 | if (!flexbg_flag) |
@@ -1714,9 +1735,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1714 | DQUOT_INIT(inode); | 1735 | DQUOT_INIT(inode); |
1715 | if (inode->i_nlink) { | 1736 | if (inode->i_nlink) { |
1716 | printk(KERN_DEBUG | 1737 | printk(KERN_DEBUG |
1717 | "%s: truncating inode %lu to %Ld bytes\n", | 1738 | "%s: truncating inode %lu to %lld bytes\n", |
1718 | __func__, inode->i_ino, inode->i_size); | 1739 | __func__, inode->i_ino, inode->i_size); |
1719 | jbd_debug(2, "truncating inode %lu to %Ld bytes\n", | 1740 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
1720 | inode->i_ino, inode->i_size); | 1741 | inode->i_ino, inode->i_size); |
1721 | ext4_truncate(inode); | 1742 | ext4_truncate(inode); |
1722 | nr_truncates++; | 1743 | nr_truncates++; |
@@ -1914,6 +1935,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1914 | unsigned long journal_devnum = 0; | 1935 | unsigned long journal_devnum = 0; |
1915 | unsigned long def_mount_opts; | 1936 | unsigned long def_mount_opts; |
1916 | struct inode *root; | 1937 | struct inode *root; |
1938 | char *cp; | ||
1917 | int ret = -EINVAL; | 1939 | int ret = -EINVAL; |
1918 | int blocksize; | 1940 | int blocksize; |
1919 | int db_count; | 1941 | int db_count; |
@@ -1930,10 +1952,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1930 | sbi->s_mount_opt = 0; | 1952 | sbi->s_mount_opt = 0; |
1931 | sbi->s_resuid = EXT4_DEF_RESUID; | 1953 | sbi->s_resuid = EXT4_DEF_RESUID; |
1932 | sbi->s_resgid = EXT4_DEF_RESGID; | 1954 | sbi->s_resgid = EXT4_DEF_RESGID; |
1955 | sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; | ||
1933 | sbi->s_sb_block = sb_block; | 1956 | sbi->s_sb_block = sb_block; |
1934 | 1957 | ||
1935 | unlock_kernel(); | 1958 | unlock_kernel(); |
1936 | 1959 | ||
1960 | /* Cleanup superblock name */ | ||
1961 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) | ||
1962 | *cp = '!'; | ||
1963 | |||
1937 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); | 1964 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); |
1938 | if (!blocksize) { | 1965 | if (!blocksize) { |
1939 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); | 1966 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); |
@@ -1973,11 +2000,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1973 | set_opt(sbi->s_mount_opt, GRPID); | 2000 | set_opt(sbi->s_mount_opt, GRPID); |
1974 | if (def_mount_opts & EXT4_DEFM_UID16) | 2001 | if (def_mount_opts & EXT4_DEFM_UID16) |
1975 | set_opt(sbi->s_mount_opt, NO_UID32); | 2002 | set_opt(sbi->s_mount_opt, NO_UID32); |
1976 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2003 | #ifdef CONFIG_EXT4_FS_XATTR |
1977 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) | 2004 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) |
1978 | set_opt(sbi->s_mount_opt, XATTR_USER); | 2005 | set_opt(sbi->s_mount_opt, XATTR_USER); |
1979 | #endif | 2006 | #endif |
1980 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 2007 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1981 | if (def_mount_opts & EXT4_DEFM_ACL) | 2008 | if (def_mount_opts & EXT4_DEFM_ACL) |
1982 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 2009 | set_opt(sbi->s_mount_opt, POSIX_ACL); |
1983 | #endif | 2010 | #endif |
@@ -2012,11 +2039,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2012 | ext4_warning(sb, __func__, | 2039 | ext4_warning(sb, __func__, |
2013 | "extents feature not enabled on this filesystem, " | 2040 | "extents feature not enabled on this filesystem, " |
2014 | "use tune2fs.\n"); | 2041 | "use tune2fs.\n"); |
2015 | /* | ||
2016 | * turn on mballoc code by default in ext4 filesystem | ||
2017 | * Use -o nomballoc to turn it off | ||
2018 | */ | ||
2019 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
2020 | 2042 | ||
2021 | /* | 2043 | /* |
2022 | * enable delayed allocation by default | 2044 | * enable delayed allocation by default |
@@ -2041,16 +2063,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2041 | "running e2fsck is recommended\n"); | 2063 | "running e2fsck is recommended\n"); |
2042 | 2064 | ||
2043 | /* | 2065 | /* |
2044 | * Since ext4 is still considered development code, we require | ||
2045 | * that the TEST_FILESYS flag in s->flags be set. | ||
2046 | */ | ||
2047 | if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) { | ||
2048 | printk(KERN_WARNING "EXT4-fs: %s: not marked " | ||
2049 | "OK to use with test code.\n", sb->s_id); | ||
2050 | goto failed_mount; | ||
2051 | } | ||
2052 | |||
2053 | /* | ||
2054 | * Check feature flags regardless of the revision level, since we | 2066 | * Check feature flags regardless of the revision level, since we |
2055 | * previously didn't change the revision level when setting the flags, | 2067 | * previously didn't change the revision level when setting the flags, |
2056 | * so there is a chance incompat flags are set on a rev 0 filesystem. | 2068 | * so there is a chance incompat flags are set on a rev 0 filesystem. |
@@ -2219,6 +2231,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2219 | goto failed_mount; | 2231 | goto failed_mount; |
2220 | } | 2232 | } |
2221 | 2233 | ||
2234 | #ifdef CONFIG_PROC_FS | ||
2235 | if (ext4_proc_root) | ||
2236 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); | ||
2237 | |||
2238 | if (sbi->s_proc) | ||
2239 | proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, | ||
2240 | &ext4_ui_proc_fops, | ||
2241 | &sbi->s_inode_readahead_blks); | ||
2242 | #endif | ||
2243 | |||
2222 | bgl_lock_init(&sbi->s_blockgroup_lock); | 2244 | bgl_lock_init(&sbi->s_blockgroup_lock); |
2223 | 2245 | ||
2224 | for (i = 0; i < db_count; i++) { | 2246 | for (i = 0; i < db_count; i++) { |
@@ -2257,24 +2279,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2257 | err = percpu_counter_init(&sbi->s_dirs_counter, | 2279 | err = percpu_counter_init(&sbi->s_dirs_counter, |
2258 | ext4_count_dirs(sb)); | 2280 | ext4_count_dirs(sb)); |
2259 | } | 2281 | } |
2282 | if (!err) { | ||
2283 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
2284 | } | ||
2260 | if (err) { | 2285 | if (err) { |
2261 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); | 2286 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); |
2262 | goto failed_mount3; | 2287 | goto failed_mount3; |
2263 | } | 2288 | } |
2264 | 2289 | ||
2265 | /* per fileystem reservation list head & lock */ | ||
2266 | spin_lock_init(&sbi->s_rsv_window_lock); | ||
2267 | sbi->s_rsv_window_root = RB_ROOT; | ||
2268 | /* Add a single, static dummy reservation to the start of the | ||
2269 | * reservation window list --- it gives us a placeholder for | ||
2270 | * append-at-start-of-list which makes the allocation logic | ||
2271 | * _much_ simpler. */ | ||
2272 | sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
2273 | sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
2274 | sbi->s_rsv_window_head.rsv_alloc_hit = 0; | ||
2275 | sbi->s_rsv_window_head.rsv_goal_size = 0; | ||
2276 | ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); | ||
2277 | |||
2278 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 2290 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
2279 | 2291 | ||
2280 | /* | 2292 | /* |
@@ -2471,7 +2483,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2471 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | 2483 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); |
2472 | 2484 | ||
2473 | ext4_ext_init(sb); | 2485 | ext4_ext_init(sb); |
2474 | ext4_mb_init(sb, needs_recovery); | 2486 | err = ext4_mb_init(sb, needs_recovery); |
2487 | if (err) { | ||
2488 | printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", | ||
2489 | err); | ||
2490 | goto failed_mount4; | ||
2491 | } | ||
2475 | 2492 | ||
2476 | lock_kernel(); | 2493 | lock_kernel(); |
2477 | return 0; | 2494 | return 0; |
@@ -2489,11 +2506,16 @@ failed_mount3: | |||
2489 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 2506 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
2490 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 2507 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
2491 | percpu_counter_destroy(&sbi->s_dirs_counter); | 2508 | percpu_counter_destroy(&sbi->s_dirs_counter); |
2509 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
2492 | failed_mount2: | 2510 | failed_mount2: |
2493 | for (i = 0; i < db_count; i++) | 2511 | for (i = 0; i < db_count; i++) |
2494 | brelse(sbi->s_group_desc[i]); | 2512 | brelse(sbi->s_group_desc[i]); |
2495 | kfree(sbi->s_group_desc); | 2513 | kfree(sbi->s_group_desc); |
2496 | failed_mount: | 2514 | failed_mount: |
2515 | if (sbi->s_proc) { | ||
2516 | remove_proc_entry("inode_readahead_blks", sbi->s_proc); | ||
2517 | remove_proc_entry(sb->s_id, ext4_proc_root); | ||
2518 | } | ||
2497 | #ifdef CONFIG_QUOTA | 2519 | #ifdef CONFIG_QUOTA |
2498 | for (i = 0; i < MAXQUOTAS; i++) | 2520 | for (i = 0; i < MAXQUOTAS; i++) |
2499 | kfree(sbi->s_qf_names[i]); | 2521 | kfree(sbi->s_qf_names[i]); |
@@ -2527,6 +2549,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) | |||
2527 | journal->j_flags |= JBD2_BARRIER; | 2549 | journal->j_flags |= JBD2_BARRIER; |
2528 | else | 2550 | else |
2529 | journal->j_flags &= ~JBD2_BARRIER; | 2551 | journal->j_flags &= ~JBD2_BARRIER; |
2552 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
2553 | journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; | ||
2554 | else | ||
2555 | journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; | ||
2530 | spin_unlock(&journal->j_state_lock); | 2556 | spin_unlock(&journal->j_state_lock); |
2531 | } | 2557 | } |
2532 | 2558 | ||
@@ -2552,7 +2578,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, | |||
2552 | return NULL; | 2578 | return NULL; |
2553 | } | 2579 | } |
2554 | 2580 | ||
2555 | jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", | 2581 | jbd_debug(2, "Journal inode found at %p: %lld bytes\n", |
2556 | journal_inode, journal_inode->i_size); | 2582 | journal_inode, journal_inode->i_size); |
2557 | if (!S_ISREG(journal_inode->i_mode)) { | 2583 | if (!S_ISREG(journal_inode->i_mode)) { |
2558 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); | 2584 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); |
@@ -2715,6 +2741,11 @@ static int ext4_load_journal(struct super_block *sb, | |||
2715 | return -EINVAL; | 2741 | return -EINVAL; |
2716 | } | 2742 | } |
2717 | 2743 | ||
2744 | if (journal->j_flags & JBD2_BARRIER) | ||
2745 | printk(KERN_INFO "EXT4-fs: barriers enabled\n"); | ||
2746 | else | ||
2747 | printk(KERN_INFO "EXT4-fs: barriers disabled\n"); | ||
2748 | |||
2718 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { | 2749 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { |
2719 | err = jbd2_journal_update_format(journal); | 2750 | err = jbd2_journal_update_format(journal); |
2720 | if (err) { | 2751 | if (err) { |
@@ -2799,13 +2830,34 @@ static void ext4_commit_super(struct super_block *sb, | |||
2799 | 2830 | ||
2800 | if (!sbh) | 2831 | if (!sbh) |
2801 | return; | 2832 | return; |
2833 | if (buffer_write_io_error(sbh)) { | ||
2834 | /* | ||
2835 | * Oh, dear. A previous attempt to write the | ||
2836 | * superblock failed. This could happen because the | ||
2837 | * USB device was yanked out. Or it could happen to | ||
2838 | * be a transient write error and maybe the block will | ||
2839 | * be remapped. Nothing we can do but to retry the | ||
2840 | * write and hope for the best. | ||
2841 | */ | ||
2842 | printk(KERN_ERR "ext4: previous I/O error to " | ||
2843 | "superblock detected for %s.\n", sb->s_id); | ||
2844 | clear_buffer_write_io_error(sbh); | ||
2845 | set_buffer_uptodate(sbh); | ||
2846 | } | ||
2802 | es->s_wtime = cpu_to_le32(get_seconds()); | 2847 | es->s_wtime = cpu_to_le32(get_seconds()); |
2803 | ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); | 2848 | ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); |
2804 | es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); | 2849 | es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); |
2805 | BUFFER_TRACE(sbh, "marking dirty"); | 2850 | BUFFER_TRACE(sbh, "marking dirty"); |
2806 | mark_buffer_dirty(sbh); | 2851 | mark_buffer_dirty(sbh); |
2807 | if (sync) | 2852 | if (sync) { |
2808 | sync_dirty_buffer(sbh); | 2853 | sync_dirty_buffer(sbh); |
2854 | if (buffer_write_io_error(sbh)) { | ||
2855 | printk(KERN_ERR "ext4: I/O error while writing " | ||
2856 | "superblock for %s.\n", sb->s_id); | ||
2857 | clear_buffer_write_io_error(sbh); | ||
2858 | set_buffer_uptodate(sbh); | ||
2859 | } | ||
2860 | } | ||
2809 | } | 2861 | } |
2810 | 2862 | ||
2811 | 2863 | ||
@@ -2820,7 +2872,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
2820 | journal_t *journal = EXT4_SB(sb)->s_journal; | 2872 | journal_t *journal = EXT4_SB(sb)->s_journal; |
2821 | 2873 | ||
2822 | jbd2_journal_lock_updates(journal); | 2874 | jbd2_journal_lock_updates(journal); |
2823 | jbd2_journal_flush(journal); | 2875 | if (jbd2_journal_flush(journal) < 0) |
2876 | goto out; | ||
2877 | |||
2824 | lock_super(sb); | 2878 | lock_super(sb); |
2825 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && | 2879 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && |
2826 | sb->s_flags & MS_RDONLY) { | 2880 | sb->s_flags & MS_RDONLY) { |
@@ -2829,6 +2883,8 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
2829 | ext4_commit_super(sb, es, 1); | 2883 | ext4_commit_super(sb, es, 1); |
2830 | } | 2884 | } |
2831 | unlock_super(sb); | 2885 | unlock_super(sb); |
2886 | |||
2887 | out: | ||
2832 | jbd2_journal_unlock_updates(journal); | 2888 | jbd2_journal_unlock_updates(journal); |
2833 | } | 2889 | } |
2834 | 2890 | ||
@@ -2907,6 +2963,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
2907 | { | 2963 | { |
2908 | tid_t target; | 2964 | tid_t target; |
2909 | 2965 | ||
2966 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); | ||
2910 | sb->s_dirt = 0; | 2967 | sb->s_dirt = 0; |
2911 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { | 2968 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { |
2912 | if (wait) | 2969 | if (wait) |
@@ -2928,7 +2985,13 @@ static void ext4_write_super_lockfs(struct super_block *sb) | |||
2928 | 2985 | ||
2929 | /* Now we set up the journal barrier. */ | 2986 | /* Now we set up the journal barrier. */ |
2930 | jbd2_journal_lock_updates(journal); | 2987 | jbd2_journal_lock_updates(journal); |
2931 | jbd2_journal_flush(journal); | 2988 | |
2989 | /* | ||
2990 | * We don't want to clear needs_recovery flag when we failed | ||
2991 | * to flush the journal. | ||
2992 | */ | ||
2993 | if (jbd2_journal_flush(journal) < 0) | ||
2994 | return; | ||
2932 | 2995 | ||
2933 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 2996 | /* Journal blocked and flushed, clear needs_recovery flag. */ |
2934 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 2997 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
@@ -3162,7 +3225,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3162 | buf->f_type = EXT4_SUPER_MAGIC; | 3225 | buf->f_type = EXT4_SUPER_MAGIC; |
3163 | buf->f_bsize = sb->s_blocksize; | 3226 | buf->f_bsize = sb->s_blocksize; |
3164 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; | 3227 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; |
3165 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); | 3228 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - |
3229 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); | ||
3166 | ext4_free_blocks_count_set(es, buf->f_bfree); | 3230 | ext4_free_blocks_count_set(es, buf->f_bfree); |
3167 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 3231 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
3168 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 3232 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
@@ -3367,8 +3431,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
3367 | * otherwise be livelocked... | 3431 | * otherwise be livelocked... |
3368 | */ | 3432 | */ |
3369 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 3433 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
3370 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 3434 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
3371 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 3435 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
3436 | if (err) { | ||
3437 | path_put(&nd.path); | ||
3438 | return err; | ||
3439 | } | ||
3372 | } | 3440 | } |
3373 | 3441 | ||
3374 | err = vfs_quota_on_path(sb, type, format_id, &nd.path); | 3442 | err = vfs_quota_on_path(sb, type, format_id, &nd.path); |
@@ -3432,7 +3500,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
3432 | handle_t *handle = journal_current_handle(); | 3500 | handle_t *handle = journal_current_handle(); |
3433 | 3501 | ||
3434 | if (!handle) { | 3502 | if (!handle) { |
3435 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)" | 3503 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" |
3436 | " cancelled because transaction is not started.\n", | 3504 | " cancelled because transaction is not started.\n", |
3437 | (unsigned long long)off, (unsigned long long)len); | 3505 | (unsigned long long)off, (unsigned long long)len); |
3438 | return -EIO; | 3506 | return -EIO; |
@@ -3493,18 +3561,82 @@ static int ext4_get_sb(struct file_system_type *fs_type, | |||
3493 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | 3561 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); |
3494 | } | 3562 | } |
3495 | 3563 | ||
3564 | #ifdef CONFIG_PROC_FS | ||
3565 | static int ext4_ui_proc_show(struct seq_file *m, void *v) | ||
3566 | { | ||
3567 | unsigned int *p = m->private; | ||
3568 | |||
3569 | seq_printf(m, "%u\n", *p); | ||
3570 | return 0; | ||
3571 | } | ||
3572 | |||
3573 | static int ext4_ui_proc_open(struct inode *inode, struct file *file) | ||
3574 | { | ||
3575 | return single_open(file, ext4_ui_proc_show, PDE(inode)->data); | ||
3576 | } | ||
3577 | |||
3578 | static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, | ||
3579 | size_t cnt, loff_t *ppos) | ||
3580 | { | ||
3581 | unsigned int *p = PDE(file->f_path.dentry->d_inode)->data; | ||
3582 | char str[32]; | ||
3583 | unsigned long value; | ||
3584 | |||
3585 | if (cnt >= sizeof(str)) | ||
3586 | return -EINVAL; | ||
3587 | if (copy_from_user(str, buf, cnt)) | ||
3588 | return -EFAULT; | ||
3589 | value = simple_strtol(str, NULL, 0); | ||
3590 | if (value < 0) | ||
3591 | return -ERANGE; | ||
3592 | *p = value; | ||
3593 | return cnt; | ||
3594 | } | ||
3595 | |||
3596 | const struct file_operations ext4_ui_proc_fops = { | ||
3597 | .owner = THIS_MODULE, | ||
3598 | .open = ext4_ui_proc_open, | ||
3599 | .read = seq_read, | ||
3600 | .llseek = seq_lseek, | ||
3601 | .release = single_release, | ||
3602 | .write = ext4_ui_proc_write, | ||
3603 | }; | ||
3604 | #endif | ||
3605 | |||
3606 | static struct file_system_type ext4_fs_type = { | ||
3607 | .owner = THIS_MODULE, | ||
3608 | .name = "ext4", | ||
3609 | .get_sb = ext4_get_sb, | ||
3610 | .kill_sb = kill_block_super, | ||
3611 | .fs_flags = FS_REQUIRES_DEV, | ||
3612 | }; | ||
3613 | |||
3614 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3615 | static int ext4dev_get_sb(struct file_system_type *fs_type, | ||
3616 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | ||
3617 | { | ||
3618 | printk(KERN_WARNING "EXT4-fs: Update your userspace programs " | ||
3619 | "to mount using ext4\n"); | ||
3620 | printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " | ||
3621 | "will go away by 2.6.31\n"); | ||
3622 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | ||
3623 | } | ||
3624 | |||
3496 | static struct file_system_type ext4dev_fs_type = { | 3625 | static struct file_system_type ext4dev_fs_type = { |
3497 | .owner = THIS_MODULE, | 3626 | .owner = THIS_MODULE, |
3498 | .name = "ext4dev", | 3627 | .name = "ext4dev", |
3499 | .get_sb = ext4_get_sb, | 3628 | .get_sb = ext4dev_get_sb, |
3500 | .kill_sb = kill_block_super, | 3629 | .kill_sb = kill_block_super, |
3501 | .fs_flags = FS_REQUIRES_DEV, | 3630 | .fs_flags = FS_REQUIRES_DEV, |
3502 | }; | 3631 | }; |
3632 | MODULE_ALIAS("ext4dev"); | ||
3633 | #endif | ||
3503 | 3634 | ||
3504 | static int __init init_ext4_fs(void) | 3635 | static int __init init_ext4_fs(void) |
3505 | { | 3636 | { |
3506 | int err; | 3637 | int err; |
3507 | 3638 | ||
3639 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | ||
3508 | err = init_ext4_mballoc(); | 3640 | err = init_ext4_mballoc(); |
3509 | if (err) | 3641 | if (err) |
3510 | return err; | 3642 | return err; |
@@ -3515,9 +3647,16 @@ static int __init init_ext4_fs(void) | |||
3515 | err = init_inodecache(); | 3647 | err = init_inodecache(); |
3516 | if (err) | 3648 | if (err) |
3517 | goto out1; | 3649 | goto out1; |
3518 | err = register_filesystem(&ext4dev_fs_type); | 3650 | err = register_filesystem(&ext4_fs_type); |
3519 | if (err) | 3651 | if (err) |
3520 | goto out; | 3652 | goto out; |
3653 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3654 | err = register_filesystem(&ext4dev_fs_type); | ||
3655 | if (err) { | ||
3656 | unregister_filesystem(&ext4_fs_type); | ||
3657 | goto out; | ||
3658 | } | ||
3659 | #endif | ||
3521 | return 0; | 3660 | return 0; |
3522 | out: | 3661 | out: |
3523 | destroy_inodecache(); | 3662 | destroy_inodecache(); |
@@ -3530,10 +3669,14 @@ out2: | |||
3530 | 3669 | ||
3531 | static void __exit exit_ext4_fs(void) | 3670 | static void __exit exit_ext4_fs(void) |
3532 | { | 3671 | { |
3672 | unregister_filesystem(&ext4_fs_type); | ||
3673 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3533 | unregister_filesystem(&ext4dev_fs_type); | 3674 | unregister_filesystem(&ext4dev_fs_type); |
3675 | #endif | ||
3534 | destroy_inodecache(); | 3676 | destroy_inodecache(); |
3535 | exit_ext4_xattr(); | 3677 | exit_ext4_xattr(); |
3536 | exit_ext4_mballoc(); | 3678 | exit_ext4_mballoc(); |
3679 | remove_proc_entry("fs/ext4", NULL); | ||
3537 | } | 3680 | } |
3538 | 3681 | ||
3539 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 3682 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index e9178643dc01..00740cb32be3 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c | |||
@@ -23,10 +23,10 @@ | |||
23 | #include "ext4.h" | 23 | #include "ext4.h" |
24 | #include "xattr.h" | 24 | #include "xattr.h" |
25 | 25 | ||
26 | static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) | 26 | static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) |
27 | { | 27 | { |
28 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); | 28 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); |
29 | nd_set_link(nd, (char*)ei->i_data); | 29 | nd_set_link(nd, (char *) ei->i_data); |
30 | return NULL; | 30 | return NULL; |
31 | } | 31 | } |
32 | 32 | ||
@@ -34,7 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
34 | .readlink = generic_readlink, | 34 | .readlink = generic_readlink, |
35 | .follow_link = page_follow_link_light, | 35 | .follow_link = page_follow_link_light, |
36 | .put_link = page_put_link, | 36 | .put_link = page_put_link, |
37 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 37 | #ifdef CONFIG_EXT4_FS_XATTR |
38 | .setxattr = generic_setxattr, | 38 | .setxattr = generic_setxattr, |
39 | .getxattr = generic_getxattr, | 39 | .getxattr = generic_getxattr, |
40 | .listxattr = ext4_listxattr, | 40 | .listxattr = ext4_listxattr, |
@@ -45,7 +45,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
45 | const struct inode_operations ext4_fast_symlink_inode_operations = { | 45 | const struct inode_operations ext4_fast_symlink_inode_operations = { |
46 | .readlink = generic_readlink, | 46 | .readlink = generic_readlink, |
47 | .follow_link = ext4_follow_link, | 47 | .follow_link = ext4_follow_link, |
48 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 48 | #ifdef CONFIG_EXT4_FS_XATTR |
49 | .setxattr = generic_setxattr, | 49 | .setxattr = generic_setxattr, |
50 | .getxattr = generic_getxattr, | 50 | .getxattr = generic_getxattr, |
51 | .listxattr = ext4_listxattr, | 51 | .listxattr = ext4_listxattr, |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 8954208b4893..80626d516fee 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache; | |||
99 | 99 | ||
100 | static struct xattr_handler *ext4_xattr_handler_map[] = { | 100 | static struct xattr_handler *ext4_xattr_handler_map[] = { |
101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, | 101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, |
102 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, | 103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, |
104 | [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, | 104 | [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, |
105 | #endif | 105 | #endif |
106 | [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, | 106 | [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, |
107 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 107 | #ifdef CONFIG_EXT4_FS_SECURITY |
108 | [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, | 108 | [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, |
109 | #endif | 109 | #endif |
110 | }; | 110 | }; |
@@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_handler_map[] = { | |||
112 | struct xattr_handler *ext4_xattr_handlers[] = { | 112 | struct xattr_handler *ext4_xattr_handlers[] = { |
113 | &ext4_xattr_user_handler, | 113 | &ext4_xattr_user_handler, |
114 | &ext4_xattr_trusted_handler, | 114 | &ext4_xattr_trusted_handler, |
115 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 115 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
116 | &ext4_xattr_acl_access_handler, | 116 | &ext4_xattr_acl_access_handler, |
117 | &ext4_xattr_acl_default_handler, | 117 | &ext4_xattr_acl_default_handler, |
118 | #endif | 118 | #endif |
119 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 119 | #ifdef CONFIG_EXT4_FS_SECURITY |
120 | &ext4_xattr_security_handler, | 120 | &ext4_xattr_security_handler, |
121 | #endif | 121 | #endif |
122 | NULL | 122 | NULL |
@@ -959,6 +959,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
959 | struct ext4_xattr_block_find bs = { | 959 | struct ext4_xattr_block_find bs = { |
960 | .s = { .not_found = -ENODATA, }, | 960 | .s = { .not_found = -ENODATA, }, |
961 | }; | 961 | }; |
962 | unsigned long no_expand; | ||
962 | int error; | 963 | int error; |
963 | 964 | ||
964 | if (!name) | 965 | if (!name) |
@@ -966,6 +967,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
966 | if (strlen(name) > 255) | 967 | if (strlen(name) > 255) |
967 | return -ERANGE; | 968 | return -ERANGE; |
968 | down_write(&EXT4_I(inode)->xattr_sem); | 969 | down_write(&EXT4_I(inode)->xattr_sem); |
970 | no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; | ||
971 | EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; | ||
972 | |||
969 | error = ext4_get_inode_loc(inode, &is.iloc); | 973 | error = ext4_get_inode_loc(inode, &is.iloc); |
970 | if (error) | 974 | if (error) |
971 | goto cleanup; | 975 | goto cleanup; |
@@ -1042,6 +1046,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
1042 | cleanup: | 1046 | cleanup: |
1043 | brelse(is.iloc.bh); | 1047 | brelse(is.iloc.bh); |
1044 | brelse(bs.bh); | 1048 | brelse(bs.bh); |
1049 | if (no_expand == 0) | ||
1050 | EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; | ||
1045 | up_write(&EXT4_I(inode)->xattr_sem); | 1051 | up_write(&EXT4_I(inode)->xattr_sem); |
1046 | return error; | 1052 | return error; |
1047 | } | 1053 | } |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 5992fe979bb9..8ede88b18c29 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -51,8 +51,8 @@ struct ext4_xattr_entry { | |||
51 | (((name_len) + EXT4_XATTR_ROUND + \ | 51 | (((name_len) + EXT4_XATTR_ROUND + \ |
52 | sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) | 52 | sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) |
53 | #define EXT4_XATTR_NEXT(entry) \ | 53 | #define EXT4_XATTR_NEXT(entry) \ |
54 | ( (struct ext4_xattr_entry *)( \ | 54 | ((struct ext4_xattr_entry *)( \ |
55 | (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) ) | 55 | (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len))) |
56 | #define EXT4_XATTR_SIZE(size) \ | 56 | #define EXT4_XATTR_SIZE(size) \ |
57 | (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) | 57 | (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) |
58 | 58 | ||
@@ -63,7 +63,7 @@ struct ext4_xattr_entry { | |||
63 | EXT4_I(inode)->i_extra_isize)) | 63 | EXT4_I(inode)->i_extra_isize)) |
64 | #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) | 64 | #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) |
65 | 65 | ||
66 | # ifdef CONFIG_EXT4DEV_FS_XATTR | 66 | # ifdef CONFIG_EXT4_FS_XATTR |
67 | 67 | ||
68 | extern struct xattr_handler ext4_xattr_user_handler; | 68 | extern struct xattr_handler ext4_xattr_user_handler; |
69 | extern struct xattr_handler ext4_xattr_trusted_handler; | 69 | extern struct xattr_handler ext4_xattr_trusted_handler; |
@@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void); | |||
88 | 88 | ||
89 | extern struct xattr_handler *ext4_xattr_handlers[]; | 89 | extern struct xattr_handler *ext4_xattr_handlers[]; |
90 | 90 | ||
91 | # else /* CONFIG_EXT4DEV_FS_XATTR */ | 91 | # else /* CONFIG_EXT4_FS_XATTR */ |
92 | 92 | ||
93 | static inline int | 93 | static inline int |
94 | ext4_xattr_get(struct inode *inode, int name_index, const char *name, | 94 | ext4_xattr_get(struct inode *inode, int name_index, const char *name, |
@@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | |||
141 | 141 | ||
142 | #define ext4_xattr_handlers NULL | 142 | #define ext4_xattr_handlers NULL |
143 | 143 | ||
144 | # endif /* CONFIG_EXT4DEV_FS_XATTR */ | 144 | # endif /* CONFIG_EXT4_FS_XATTR */ |
145 | 145 | ||
146 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 146 | #ifdef CONFIG_EXT4_FS_SECURITY |
147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, | 147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, |
148 | struct inode *dir); | 148 | struct inode *dir); |
149 | #else | 149 | #else |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 302e95c4af7e..fb98b3d847ed 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
8 | #include <linux/msdos_fs.h> | 8 | #include <linux/msdos_fs.h> |
9 | #include <linux/blkdev.h> | ||
9 | 10 | ||
10 | struct fatent_operations { | 11 | struct fatent_operations { |
11 | void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); | 12 | void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); |
@@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
535 | struct fat_entry fatent; | 536 | struct fat_entry fatent; |
536 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; | 537 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; |
537 | int i, err, nr_bhs; | 538 | int i, err, nr_bhs; |
539 | int first_cl = cluster; | ||
538 | 540 | ||
539 | nr_bhs = 0; | 541 | nr_bhs = 0; |
540 | fatent_init(&fatent); | 542 | fatent_init(&fatent); |
@@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
551 | goto error; | 553 | goto error; |
552 | } | 554 | } |
553 | 555 | ||
556 | /* | ||
557 | * Issue discard for the sectors we no longer care about, | ||
558 | * batching contiguous clusters into one request | ||
559 | */ | ||
560 | if (cluster != fatent.entry + 1) { | ||
561 | int nr_clus = fatent.entry - first_cl + 1; | ||
562 | |||
563 | sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), | ||
564 | nr_clus * sbi->sec_per_clus); | ||
565 | first_cl = cluster; | ||
566 | } | ||
567 | |||
554 | ops->ent_put(&fatent, FAT_ENT_FREE); | 568 | ops->ent_put(&fatent, FAT_ENT_FREE); |
555 | if (sbi->free_clusters != -1) { | 569 | if (sbi->free_clusters != -1) { |
556 | sbi->free_clusters++; | 570 | sbi->free_clusters++; |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 13391e546616..c962283d4e7f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1265,6 +1265,8 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, | |||
1265 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | 1265 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; |
1266 | if (time_before(now, holdtime)) | 1266 | if (time_before(now, holdtime)) |
1267 | delay = holdtime - now; | 1267 | delay = holdtime - now; |
1268 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | ||
1269 | delay = gl->gl_ops->go_min_hold_time; | ||
1268 | 1270 | ||
1269 | spin_lock(&gl->gl_spin); | 1271 | spin_lock(&gl->gl_spin); |
1270 | handle_callback(gl, state, 1, delay); | 1272 | handle_callback(gl, state, 1, delay); |
@@ -1578,8 +1580,6 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) | |||
1578 | *p++ = 'a'; | 1580 | *p++ = 'a'; |
1579 | if (flags & GL_EXACT) | 1581 | if (flags & GL_EXACT) |
1580 | *p++ = 'E'; | 1582 | *p++ = 'E'; |
1581 | if (flags & GL_ATIME) | ||
1582 | *p++ = 'a'; | ||
1583 | if (flags & GL_NOCACHE) | 1583 | if (flags & GL_NOCACHE) |
1584 | *p++ = 'c'; | 1584 | *p++ = 'c'; |
1585 | if (test_bit(HIF_HOLDER, &iflags)) | 1585 | if (test_bit(HIF_HOLDER, &iflags)) |
@@ -1816,15 +1816,17 @@ restart: | |||
1816 | if (gl) { | 1816 | if (gl) { |
1817 | gi->gl = hlist_entry(gl->gl_list.next, | 1817 | gi->gl = hlist_entry(gl->gl_list.next, |
1818 | struct gfs2_glock, gl_list); | 1818 | struct gfs2_glock, gl_list); |
1819 | if (gi->gl) | 1819 | } else { |
1820 | gfs2_glock_hold(gi->gl); | 1820 | gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, |
1821 | struct gfs2_glock, gl_list); | ||
1821 | } | 1822 | } |
1823 | if (gi->gl) | ||
1824 | gfs2_glock_hold(gi->gl); | ||
1822 | read_unlock(gl_lock_addr(gi->hash)); | 1825 | read_unlock(gl_lock_addr(gi->hash)); |
1823 | if (gl) | 1826 | if (gl) |
1824 | gfs2_glock_put(gl); | 1827 | gfs2_glock_put(gl); |
1825 | if (gl && gi->gl == NULL) | ||
1826 | gi->hash++; | ||
1827 | while (gi->gl == NULL) { | 1828 | while (gi->gl == NULL) { |
1829 | gi->hash++; | ||
1828 | if (gi->hash >= GFS2_GL_HASH_SIZE) | 1830 | if (gi->hash >= GFS2_GL_HASH_SIZE) |
1829 | return 1; | 1831 | return 1; |
1830 | read_lock(gl_lock_addr(gi->hash)); | 1832 | read_lock(gl_lock_addr(gi->hash)); |
@@ -1833,7 +1835,6 @@ restart: | |||
1833 | if (gi->gl) | 1835 | if (gi->gl) |
1834 | gfs2_glock_hold(gi->gl); | 1836 | gfs2_glock_hold(gi->gl); |
1835 | read_unlock(gl_lock_addr(gi->hash)); | 1837 | read_unlock(gl_lock_addr(gi->hash)); |
1836 | gi->hash++; | ||
1837 | } | 1838 | } |
1838 | 1839 | ||
1839 | if (gi->sdp != gi->gl->gl_sbd) | 1840 | if (gi->sdp != gi->gl->gl_sbd) |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 971d92af70fc..695c6b193611 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -24,7 +24,6 @@ | |||
24 | #define GL_ASYNC 0x00000040 | 24 | #define GL_ASYNC 0x00000040 |
25 | #define GL_EXACT 0x00000080 | 25 | #define GL_EXACT 0x00000080 |
26 | #define GL_SKIP 0x00000100 | 26 | #define GL_SKIP 0x00000100 |
27 | #define GL_ATIME 0x00000200 | ||
28 | #define GL_NOCACHE 0x00000400 | 27 | #define GL_NOCACHE 0x00000400 |
29 | 28 | ||
30 | #define GLR_TRYFAILED 13 | 29 | #define GLR_TRYFAILED 13 |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 448697a5c462..f566ec1b4e8e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -386,20 +386,21 @@ struct gfs2_statfs_change_host { | |||
386 | #define GFS2_DATA_ORDERED 2 | 386 | #define GFS2_DATA_ORDERED 2 |
387 | 387 | ||
388 | struct gfs2_args { | 388 | struct gfs2_args { |
389 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ | 389 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ |
390 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ | 390 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ |
391 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ | 391 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ |
392 | int ar_spectator; /* Don't get a journal because we're always RO */ | 392 | unsigned int ar_spectator:1; /* Don't get a journal */ |
393 | int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */ | 393 | unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */ |
394 | int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */ | 394 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ |
395 | int ar_localcaching; /* Local-style caching (dangerous on multihost) */ | 395 | unsigned int ar_localcaching:1; /* Local caching */ |
396 | int ar_debug; /* Oops on errors instead of trying to be graceful */ | 396 | unsigned int ar_debug:1; /* Oops on errors */ |
397 | int ar_upgrade; /* Upgrade ondisk/multihost format */ | 397 | unsigned int ar_upgrade:1; /* Upgrade ondisk format */ |
398 | unsigned int ar_num_glockd; /* Number of glockd threads */ | 398 | unsigned int ar_posix_acl:1; /* Enable posix acls */ |
399 | int ar_posix_acl; /* Enable posix acls */ | 399 | unsigned int ar_quota:2; /* off/account/on */ |
400 | int ar_quota; /* off/account/on */ | 400 | unsigned int ar_suiddir:1; /* suiddir support */ |
401 | int ar_suiddir; /* suiddir support */ | 401 | unsigned int ar_data:2; /* ordered/writeback */ |
402 | int ar_data; /* ordered/writeback */ | 402 | unsigned int ar_meta:1; /* mount metafs */ |
403 | unsigned int ar_num_glockd; /* Number of glockd threads */ | ||
403 | }; | 404 | }; |
404 | 405 | ||
405 | struct gfs2_tune { | 406 | struct gfs2_tune { |
@@ -419,7 +420,6 @@ struct gfs2_tune { | |||
419 | unsigned int gt_quota_scale_den; /* Denominator */ | 420 | unsigned int gt_quota_scale_den; /* Denominator */ |
420 | unsigned int gt_quota_cache_secs; | 421 | unsigned int gt_quota_cache_secs; |
421 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ | 422 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ |
422 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ | ||
423 | unsigned int gt_new_files_jdata; | 423 | unsigned int gt_new_files_jdata; |
424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | 424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ |
425 | unsigned int gt_stall_secs; /* Detects trouble! */ | 425 | unsigned int gt_stall_secs; /* Detects trouble! */ |
@@ -432,7 +432,7 @@ enum { | |||
432 | SDF_JOURNAL_CHECKED = 0, | 432 | SDF_JOURNAL_CHECKED = 0, |
433 | SDF_JOURNAL_LIVE = 1, | 433 | SDF_JOURNAL_LIVE = 1, |
434 | SDF_SHUTDOWN = 2, | 434 | SDF_SHUTDOWN = 2, |
435 | SDF_NOATIME = 3, | 435 | SDF_NOBARRIERS = 3, |
436 | }; | 436 | }; |
437 | 437 | ||
438 | #define GFS2_FSNAME_LEN 256 | 438 | #define GFS2_FSNAME_LEN 256 |
@@ -461,7 +461,6 @@ struct gfs2_sb_host { | |||
461 | 461 | ||
462 | struct gfs2_sbd { | 462 | struct gfs2_sbd { |
463 | struct super_block *sd_vfs; | 463 | struct super_block *sd_vfs; |
464 | struct super_block *sd_vfs_meta; | ||
465 | struct kobject sd_kobj; | 464 | struct kobject sd_kobj; |
466 | unsigned long sd_flags; /* SDF_... */ | 465 | unsigned long sd_flags; /* SDF_... */ |
467 | struct gfs2_sb_host sd_sb; | 466 | struct gfs2_sb_host sd_sb; |
@@ -499,7 +498,9 @@ struct gfs2_sbd { | |||
499 | 498 | ||
500 | /* Inode Stuff */ | 499 | /* Inode Stuff */ |
501 | 500 | ||
502 | struct inode *sd_master_dir; | 501 | struct dentry *sd_master_dir; |
502 | struct dentry *sd_root_dir; | ||
503 | |||
503 | struct inode *sd_jindex; | 504 | struct inode *sd_jindex; |
504 | struct inode *sd_inum_inode; | 505 | struct inode *sd_inum_inode; |
505 | struct inode *sd_statfs_inode; | 506 | struct inode *sd_statfs_inode; |
@@ -634,7 +635,6 @@ struct gfs2_sbd { | |||
634 | /* Debugging crud */ | 635 | /* Debugging crud */ |
635 | 636 | ||
636 | unsigned long sd_last_warning; | 637 | unsigned long sd_last_warning; |
637 | struct vfsmount *sd_gfs2mnt; | ||
638 | struct dentry *debugfs_dir; /* debugfs directory */ | 638 | struct dentry *debugfs_dir; /* debugfs directory */ |
639 | struct dentry *debugfs_dentry_glocks; /* for debugfs */ | 639 | struct dentry *debugfs_dentry_glocks; /* for debugfs */ |
640 | }; | 640 | }; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 8b0806a32948..7cee695fa441 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/crc32.h> | 18 | #include <linux/crc32.h> |
19 | #include <linux/lm_interface.h> | 19 | #include <linux/lm_interface.h> |
20 | #include <linux/security.h> | 20 | #include <linux/security.h> |
21 | #include <linux/time.h> | ||
21 | 22 | ||
22 | #include "gfs2.h" | 23 | #include "gfs2.h" |
23 | #include "incore.h" | 24 | #include "incore.h" |
@@ -249,6 +250,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
249 | { | 250 | { |
250 | struct gfs2_dinode_host *di = &ip->i_di; | 251 | struct gfs2_dinode_host *di = &ip->i_di; |
251 | const struct gfs2_dinode *str = buf; | 252 | const struct gfs2_dinode *str = buf; |
253 | struct timespec atime; | ||
252 | u16 height, depth; | 254 | u16 height, depth; |
253 | 255 | ||
254 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) | 256 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) |
@@ -275,8 +277,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
275 | di->di_size = be64_to_cpu(str->di_size); | 277 | di->di_size = be64_to_cpu(str->di_size); |
276 | i_size_write(&ip->i_inode, di->di_size); | 278 | i_size_write(&ip->i_inode, di->di_size); |
277 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); | 279 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); |
278 | ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); | 280 | atime.tv_sec = be64_to_cpu(str->di_atime); |
279 | ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); | 281 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
282 | if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) | ||
283 | ip->i_inode.i_atime = atime; | ||
280 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); | 284 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); |
281 | ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); | 285 | ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); |
282 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); | 286 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); |
@@ -1033,13 +1037,11 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
1033 | 1037 | ||
1034 | if (bh) | 1038 | if (bh) |
1035 | brelse(bh); | 1039 | brelse(bh); |
1036 | if (!inode) | ||
1037 | return ERR_PTR(-ENOMEM); | ||
1038 | return inode; | 1040 | return inode; |
1039 | 1041 | ||
1040 | fail_gunlock2: | 1042 | fail_gunlock2: |
1041 | gfs2_glock_dq_uninit(ghs + 1); | 1043 | gfs2_glock_dq_uninit(ghs + 1); |
1042 | if (inode) | 1044 | if (inode && !IS_ERR(inode)) |
1043 | iput(inode); | 1045 | iput(inode); |
1044 | fail_gunlock: | 1046 | fail_gunlock: |
1045 | gfs2_glock_dq(ghs); | 1047 | gfs2_glock_dq(ghs); |
@@ -1140,54 +1142,6 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
1140 | return 0; | 1142 | return 0; |
1141 | } | 1143 | } |
1142 | 1144 | ||
1143 | /* | ||
1144 | * gfs2_ok_to_move - check if it's ok to move a directory to another directory | ||
1145 | * @this: move this | ||
1146 | * @to: to here | ||
1147 | * | ||
1148 | * Follow @to back to the root and make sure we don't encounter @this | ||
1149 | * Assumes we already hold the rename lock. | ||
1150 | * | ||
1151 | * Returns: errno | ||
1152 | */ | ||
1153 | |||
1154 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | ||
1155 | { | ||
1156 | struct inode *dir = &to->i_inode; | ||
1157 | struct super_block *sb = dir->i_sb; | ||
1158 | struct inode *tmp; | ||
1159 | struct qstr dotdot; | ||
1160 | int error = 0; | ||
1161 | |||
1162 | gfs2_str2qstr(&dotdot, ".."); | ||
1163 | |||
1164 | igrab(dir); | ||
1165 | |||
1166 | for (;;) { | ||
1167 | if (dir == &this->i_inode) { | ||
1168 | error = -EINVAL; | ||
1169 | break; | ||
1170 | } | ||
1171 | if (dir == sb->s_root->d_inode) { | ||
1172 | error = 0; | ||
1173 | break; | ||
1174 | } | ||
1175 | |||
1176 | tmp = gfs2_lookupi(dir, &dotdot, 1); | ||
1177 | if (IS_ERR(tmp)) { | ||
1178 | error = PTR_ERR(tmp); | ||
1179 | break; | ||
1180 | } | ||
1181 | |||
1182 | iput(dir); | ||
1183 | dir = tmp; | ||
1184 | } | ||
1185 | |||
1186 | iput(dir); | ||
1187 | |||
1188 | return error; | ||
1189 | } | ||
1190 | |||
1191 | /** | 1145 | /** |
1192 | * gfs2_readlinki - return the contents of a symlink | 1146 | * gfs2_readlinki - return the contents of a symlink |
1193 | * @ip: the symlink's inode | 1147 | * @ip: the symlink's inode |
@@ -1207,8 +1161,8 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) | |||
1207 | unsigned int x; | 1161 | unsigned int x; |
1208 | int error; | 1162 | int error; |
1209 | 1163 | ||
1210 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | 1164 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); |
1211 | error = gfs2_glock_nq_atime(&i_gh); | 1165 | error = gfs2_glock_nq(&i_gh); |
1212 | if (error) { | 1166 | if (error) { |
1213 | gfs2_holder_uninit(&i_gh); | 1167 | gfs2_holder_uninit(&i_gh); |
1214 | return error; | 1168 | return error; |
@@ -1243,101 +1197,6 @@ out: | |||
1243 | return error; | 1197 | return error; |
1244 | } | 1198 | } |
1245 | 1199 | ||
1246 | /** | ||
1247 | * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and | ||
1248 | * conditionally update the inode's atime | ||
1249 | * @gh: the holder to acquire | ||
1250 | * | ||
1251 | * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap | ||
1252 | * Update if the difference between the current time and the inode's current | ||
1253 | * atime is greater than an interval specified at mount. | ||
1254 | * | ||
1255 | * Returns: errno | ||
1256 | */ | ||
1257 | |||
1258 | int gfs2_glock_nq_atime(struct gfs2_holder *gh) | ||
1259 | { | ||
1260 | struct gfs2_glock *gl = gh->gh_gl; | ||
1261 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
1262 | struct gfs2_inode *ip = gl->gl_object; | ||
1263 | s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum); | ||
1264 | unsigned int state; | ||
1265 | int flags; | ||
1266 | int error; | ||
1267 | struct timespec tv = CURRENT_TIME; | ||
1268 | |||
1269 | if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || | ||
1270 | gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || | ||
1271 | gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops)) | ||
1272 | return -EINVAL; | ||
1273 | |||
1274 | state = gh->gh_state; | ||
1275 | flags = gh->gh_flags; | ||
1276 | |||
1277 | error = gfs2_glock_nq(gh); | ||
1278 | if (error) | ||
1279 | return error; | ||
1280 | |||
1281 | if (test_bit(SDF_NOATIME, &sdp->sd_flags) || | ||
1282 | (sdp->sd_vfs->s_flags & MS_RDONLY)) | ||
1283 | return 0; | ||
1284 | |||
1285 | if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { | ||
1286 | gfs2_glock_dq(gh); | ||
1287 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, | ||
1288 | gh); | ||
1289 | error = gfs2_glock_nq(gh); | ||
1290 | if (error) | ||
1291 | return error; | ||
1292 | |||
1293 | /* Verify that atime hasn't been updated while we were | ||
1294 | trying to get exclusive lock. */ | ||
1295 | |||
1296 | tv = CURRENT_TIME; | ||
1297 | if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { | ||
1298 | struct buffer_head *dibh; | ||
1299 | struct gfs2_dinode *di; | ||
1300 | |||
1301 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
1302 | if (error == -EROFS) | ||
1303 | return 0; | ||
1304 | if (error) | ||
1305 | goto fail; | ||
1306 | |||
1307 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
1308 | if (error) | ||
1309 | goto fail_end_trans; | ||
1310 | |||
1311 | ip->i_inode.i_atime = tv; | ||
1312 | |||
1313 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1314 | di = (struct gfs2_dinode *)dibh->b_data; | ||
1315 | di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); | ||
1316 | di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); | ||
1317 | brelse(dibh); | ||
1318 | |||
1319 | gfs2_trans_end(sdp); | ||
1320 | } | ||
1321 | |||
1322 | /* If someone else has asked for the glock, | ||
1323 | unlock and let them have it. Then reacquire | ||
1324 | in the original state. */ | ||
1325 | if (gfs2_glock_is_blocking(gl)) { | ||
1326 | gfs2_glock_dq(gh); | ||
1327 | gfs2_holder_reinit(state, flags, gh); | ||
1328 | return gfs2_glock_nq(gh); | ||
1329 | } | ||
1330 | } | ||
1331 | |||
1332 | return 0; | ||
1333 | |||
1334 | fail_end_trans: | ||
1335 | gfs2_trans_end(sdp); | ||
1336 | fail: | ||
1337 | gfs2_glock_dq(gh); | ||
1338 | return error; | ||
1339 | } | ||
1340 | |||
1341 | static int | 1200 | static int |
1342 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | 1201 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) |
1343 | { | 1202 | { |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 58f9607d6a86..2d43f69610a0 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -91,9 +91,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | |||
91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | 91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, |
92 | const struct gfs2_inode *ip); | 92 | const struct gfs2_inode *ip); |
93 | int gfs2_permission(struct inode *inode, int mask); | 93 | int gfs2_permission(struct inode *inode, int mask); |
94 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); | ||
95 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); | 94 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); |
96 | int gfs2_glock_nq_atime(struct gfs2_holder *gh); | ||
97 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); | 95 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); |
98 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); | 96 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); |
99 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); | 97 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); |
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 09d78c216f48..0c4cbe6c8285 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c | |||
@@ -144,7 +144,8 @@ static int gdlm_mount(char *table_name, char *host_data, | |||
144 | 144 | ||
145 | error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), | 145 | error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), |
146 | &ls->dlm_lockspace, | 146 | &ls->dlm_lockspace, |
147 | DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0), | 147 | DLM_LSFL_FS | DLM_LSFL_NEWEXCL | |
148 | (nodir ? DLM_LSFL_NODIR : 0), | ||
148 | GDLM_LVB_SIZE); | 149 | GDLM_LVB_SIZE); |
149 | if (error) { | 150 | if (error) { |
150 | log_error("dlm_new_lockspace error %d", error); | 151 | log_error("dlm_new_lockspace error %d", error); |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 6c6af9f5e3ab..ad305854bdc6 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
19 | #include <linux/kthread.h> | 19 | #include <linux/kthread.h> |
20 | #include <linux/freezer.h> | 20 | #include <linux/freezer.h> |
21 | #include <linux/bio.h> | ||
21 | 22 | ||
22 | #include "gfs2.h" | 23 | #include "gfs2.h" |
23 | #include "incore.h" | 24 | #include "incore.h" |
@@ -584,7 +585,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
584 | memset(bh->b_data, 0, bh->b_size); | 585 | memset(bh->b_data, 0, bh->b_size); |
585 | set_buffer_uptodate(bh); | 586 | set_buffer_uptodate(bh); |
586 | clear_buffer_dirty(bh); | 587 | clear_buffer_dirty(bh); |
587 | unlock_buffer(bh); | ||
588 | 588 | ||
589 | gfs2_ail1_empty(sdp, 0); | 589 | gfs2_ail1_empty(sdp, 0); |
590 | tail = current_tail(sdp); | 590 | tail = current_tail(sdp); |
@@ -601,8 +601,23 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
601 | hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); | 601 | hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); |
602 | lh->lh_hash = cpu_to_be32(hash); | 602 | lh->lh_hash = cpu_to_be32(hash); |
603 | 603 | ||
604 | set_buffer_dirty(bh); | 604 | bh->b_end_io = end_buffer_write_sync; |
605 | if (sync_dirty_buffer(bh)) | 605 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
606 | goto skip_barrier; | ||
607 | get_bh(bh); | ||
608 | submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh); | ||
609 | wait_on_buffer(bh); | ||
610 | if (buffer_eopnotsupp(bh)) { | ||
611 | clear_buffer_eopnotsupp(bh); | ||
612 | set_buffer_uptodate(bh); | ||
613 | set_bit(SDF_NOBARRIERS, &sdp->sd_flags); | ||
614 | lock_buffer(bh); | ||
615 | skip_barrier: | ||
616 | get_bh(bh); | ||
617 | submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh); | ||
618 | wait_on_buffer(bh); | ||
619 | } | ||
620 | if (!buffer_uptodate(bh)) | ||
606 | gfs2_io_error_bh(sdp, bh); | 621 | gfs2_io_error_bh(sdp, bh); |
607 | brelse(bh); | 622 | brelse(bh); |
608 | 623 | ||
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index b941f9f9f958..df48333e6f01 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c | |||
@@ -42,6 +42,7 @@ enum { | |||
42 | Opt_nosuiddir, | 42 | Opt_nosuiddir, |
43 | Opt_data_writeback, | 43 | Opt_data_writeback, |
44 | Opt_data_ordered, | 44 | Opt_data_ordered, |
45 | Opt_meta, | ||
45 | Opt_err, | 46 | Opt_err, |
46 | }; | 47 | }; |
47 | 48 | ||
@@ -66,6 +67,7 @@ static match_table_t tokens = { | |||
66 | {Opt_nosuiddir, "nosuiddir"}, | 67 | {Opt_nosuiddir, "nosuiddir"}, |
67 | {Opt_data_writeback, "data=writeback"}, | 68 | {Opt_data_writeback, "data=writeback"}, |
68 | {Opt_data_ordered, "data=ordered"}, | 69 | {Opt_data_ordered, "data=ordered"}, |
70 | {Opt_meta, "meta"}, | ||
69 | {Opt_err, NULL} | 71 | {Opt_err, NULL} |
70 | }; | 72 | }; |
71 | 73 | ||
@@ -239,6 +241,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) | |||
239 | case Opt_data_ordered: | 241 | case Opt_data_ordered: |
240 | args->ar_data = GFS2_DATA_ORDERED; | 242 | args->ar_data = GFS2_DATA_ORDERED; |
241 | break; | 243 | break; |
244 | case Opt_meta: | ||
245 | if (remount && args->ar_meta != 1) | ||
246 | goto cant_remount; | ||
247 | args->ar_meta = 1; | ||
248 | break; | ||
242 | case Opt_err: | 249 | case Opt_err: |
243 | default: | 250 | default: |
244 | fs_info(sdp, "unknown option: %s\n", o); | 251 | fs_info(sdp, "unknown option: %s\n", o); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index e64a1b04117a..27563816e1c5 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -512,8 +512,8 @@ static int gfs2_readpage(struct file *file, struct page *page) | |||
512 | int error; | 512 | int error; |
513 | 513 | ||
514 | unlock_page(page); | 514 | unlock_page(page); |
515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
516 | error = gfs2_glock_nq_atime(&gh); | 516 | error = gfs2_glock_nq(&gh); |
517 | if (unlikely(error)) | 517 | if (unlikely(error)) |
518 | goto out; | 518 | goto out; |
519 | error = AOP_TRUNCATED_PAGE; | 519 | error = AOP_TRUNCATED_PAGE; |
@@ -594,8 +594,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
594 | struct gfs2_holder gh; | 594 | struct gfs2_holder gh; |
595 | int ret; | 595 | int ret; |
596 | 596 | ||
597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
598 | ret = gfs2_glock_nq_atime(&gh); | 598 | ret = gfs2_glock_nq(&gh); |
599 | if (unlikely(ret)) | 599 | if (unlikely(ret)) |
600 | goto out_uninit; | 600 | goto out_uninit; |
601 | if (!gfs2_is_stuffed(ip)) | 601 | if (!gfs2_is_stuffed(ip)) |
@@ -636,8 +636,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
636 | unsigned to = from + len; | 636 | unsigned to = from + len; |
637 | struct page *page; | 637 | struct page *page; |
638 | 638 | ||
639 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); | 639 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); |
640 | error = gfs2_glock_nq_atime(&ip->i_gh); | 640 | error = gfs2_glock_nq(&ip->i_gh); |
641 | if (unlikely(error)) | 641 | if (unlikely(error)) |
642 | goto out_uninit; | 642 | goto out_uninit; |
643 | 643 | ||
@@ -975,7 +975,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | |||
975 | if (gfs2_is_stuffed(ip)) | 975 | if (gfs2_is_stuffed(ip)) |
976 | return 0; | 976 | return 0; |
977 | 977 | ||
978 | if (offset > i_size_read(&ip->i_inode)) | 978 | if (offset >= i_size_read(&ip->i_inode)) |
979 | return 0; | 979 | return 0; |
980 | return 1; | 980 | return 1; |
981 | } | 981 | } |
@@ -1000,8 +1000,8 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
1000 | * unfortunately have the option of only flushing a range like | 1000 | * unfortunately have the option of only flushing a range like |
1001 | * the VFS does. | 1001 | * the VFS does. |
1002 | */ | 1002 | */ |
1003 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); | 1003 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); |
1004 | rv = gfs2_glock_nq_atime(&gh); | 1004 | rv = gfs2_glock_nq(&gh); |
1005 | if (rv) | 1005 | if (rv) |
1006 | return rv; | 1006 | return rv; |
1007 | rv = gfs2_ok_for_dio(ip, rw, offset); | 1007 | rv = gfs2_ok_for_dio(ip, rw, offset); |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index e9a366d4411c..3a747f8e2188 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -89,8 +89,8 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
89 | u64 offset = file->f_pos; | 89 | u64 offset = file->f_pos; |
90 | int error; | 90 | int error; |
91 | 91 | ||
92 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); | 92 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
93 | error = gfs2_glock_nq_atime(&d_gh); | 93 | error = gfs2_glock_nq(&d_gh); |
94 | if (error) { | 94 | if (error) { |
95 | gfs2_holder_uninit(&d_gh); | 95 | gfs2_holder_uninit(&d_gh); |
96 | return error; | 96 | return error; |
@@ -153,8 +153,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) | |||
153 | int error; | 153 | int error; |
154 | u32 fsflags; | 154 | u32 fsflags; |
155 | 155 | ||
156 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 156 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
157 | error = gfs2_glock_nq_atime(&gh); | 157 | error = gfs2_glock_nq(&gh); |
158 | if (error) | 158 | if (error) |
159 | return error; | 159 | return error; |
160 | 160 | ||
@@ -351,8 +351,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
351 | struct gfs2_alloc *al; | 351 | struct gfs2_alloc *al; |
352 | int ret; | 352 | int ret; |
353 | 353 | ||
354 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); | 354 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
355 | ret = gfs2_glock_nq_atime(&gh); | 355 | ret = gfs2_glock_nq(&gh); |
356 | if (ret) | 356 | if (ret) |
357 | goto out; | 357 | goto out; |
358 | 358 | ||
@@ -434,8 +434,8 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
434 | struct gfs2_holder i_gh; | 434 | struct gfs2_holder i_gh; |
435 | int error; | 435 | int error; |
436 | 436 | ||
437 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | 437 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); |
438 | error = gfs2_glock_nq_atime(&i_gh); | 438 | error = gfs2_glock_nq(&i_gh); |
439 | if (error) { | 439 | if (error) { |
440 | gfs2_holder_uninit(&i_gh); | 440 | gfs2_holder_uninit(&i_gh); |
441 | return error; | 441 | return error; |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b4d1d6490633..b117fcf2c4f5 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -40,6 +40,44 @@ | |||
40 | #define DO 0 | 40 | #define DO 0 |
41 | #define UNDO 1 | 41 | #define UNDO 1 |
42 | 42 | ||
43 | static const u32 gfs2_old_fs_formats[] = { | ||
44 | 0 | ||
45 | }; | ||
46 | |||
47 | static const u32 gfs2_old_multihost_formats[] = { | ||
48 | 0 | ||
49 | }; | ||
50 | |||
51 | /** | ||
52 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | ||
53 | * @gt: tune | ||
54 | * | ||
55 | */ | ||
56 | |||
57 | static void gfs2_tune_init(struct gfs2_tune *gt) | ||
58 | { | ||
59 | spin_lock_init(>->gt_spin); | ||
60 | |||
61 | gt->gt_demote_secs = 300; | ||
62 | gt->gt_incore_log_blocks = 1024; | ||
63 | gt->gt_log_flush_secs = 60; | ||
64 | gt->gt_recoverd_secs = 60; | ||
65 | gt->gt_logd_secs = 1; | ||
66 | gt->gt_quotad_secs = 5; | ||
67 | gt->gt_quota_simul_sync = 64; | ||
68 | gt->gt_quota_warn_period = 10; | ||
69 | gt->gt_quota_scale_num = 1; | ||
70 | gt->gt_quota_scale_den = 1; | ||
71 | gt->gt_quota_cache_secs = 300; | ||
72 | gt->gt_quota_quantum = 60; | ||
73 | gt->gt_new_files_jdata = 0; | ||
74 | gt->gt_max_readahead = 1 << 18; | ||
75 | gt->gt_stall_secs = 600; | ||
76 | gt->gt_complain_secs = 10; | ||
77 | gt->gt_statfs_quantum = 30; | ||
78 | gt->gt_statfs_slow = 0; | ||
79 | } | ||
80 | |||
43 | static struct gfs2_sbd *init_sbd(struct super_block *sb) | 81 | static struct gfs2_sbd *init_sbd(struct super_block *sb) |
44 | { | 82 | { |
45 | struct gfs2_sbd *sdp; | 83 | struct gfs2_sbd *sdp; |
@@ -96,21 +134,271 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
96 | return sdp; | 134 | return sdp; |
97 | } | 135 | } |
98 | 136 | ||
99 | static void init_vfs(struct super_block *sb, unsigned noatime) | 137 | |
138 | /** | ||
139 | * gfs2_check_sb - Check superblock | ||
140 | * @sdp: the filesystem | ||
141 | * @sb: The superblock | ||
142 | * @silent: Don't print a message if the check fails | ||
143 | * | ||
144 | * Checks the version code of the FS is one that we understand how to | ||
145 | * read and that the sizes of the various on-disk structures have not | ||
146 | * changed. | ||
147 | */ | ||
148 | |||
149 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | ||
100 | { | 150 | { |
101 | struct gfs2_sbd *sdp = sb->s_fs_info; | 151 | unsigned int x; |
102 | 152 | ||
103 | sb->s_magic = GFS2_MAGIC; | 153 | if (sb->sb_magic != GFS2_MAGIC || |
104 | sb->s_op = &gfs2_super_ops; | 154 | sb->sb_type != GFS2_METATYPE_SB) { |
105 | sb->s_export_op = &gfs2_export_ops; | 155 | if (!silent) |
106 | sb->s_time_gran = 1; | 156 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); |
107 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 157 | return -EINVAL; |
158 | } | ||
159 | |||
160 | /* If format numbers match exactly, we're done. */ | ||
161 | |||
162 | if (sb->sb_fs_format == GFS2_FORMAT_FS && | ||
163 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | ||
164 | return 0; | ||
165 | |||
166 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | ||
167 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
168 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
169 | break; | ||
170 | |||
171 | if (!gfs2_old_fs_formats[x]) { | ||
172 | printk(KERN_WARNING | ||
173 | "GFS2: code version (%u, %u) is incompatible " | ||
174 | "with ondisk format (%u, %u)\n", | ||
175 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
176 | sb->sb_fs_format, sb->sb_multihost_format); | ||
177 | printk(KERN_WARNING | ||
178 | "GFS2: I don't know how to upgrade this FS\n"); | ||
179 | return -EINVAL; | ||
180 | } | ||
181 | } | ||
182 | |||
183 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
184 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
185 | if (gfs2_old_multihost_formats[x] == | ||
186 | sb->sb_multihost_format) | ||
187 | break; | ||
188 | |||
189 | if (!gfs2_old_multihost_formats[x]) { | ||
190 | printk(KERN_WARNING | ||
191 | "GFS2: code version (%u, %u) is incompatible " | ||
192 | "with ondisk format (%u, %u)\n", | ||
193 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
194 | sb->sb_fs_format, sb->sb_multihost_format); | ||
195 | printk(KERN_WARNING | ||
196 | "GFS2: I don't know how to upgrade this FS\n"); | ||
197 | return -EINVAL; | ||
198 | } | ||
199 | } | ||
200 | |||
201 | if (!sdp->sd_args.ar_upgrade) { | ||
202 | printk(KERN_WARNING | ||
203 | "GFS2: code version (%u, %u) is incompatible " | ||
204 | "with ondisk format (%u, %u)\n", | ||
205 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
206 | sb->sb_fs_format, sb->sb_multihost_format); | ||
207 | printk(KERN_INFO | ||
208 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
209 | "the FS\n"); | ||
210 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
211 | return -EINVAL; | ||
212 | } | ||
213 | |||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | static void end_bio_io_page(struct bio *bio, int error) | ||
218 | { | ||
219 | struct page *page = bio->bi_private; | ||
108 | 220 | ||
109 | if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) | 221 | if (!error) |
110 | set_bit(noatime, &sdp->sd_flags); | 222 | SetPageUptodate(page); |
223 | else | ||
224 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | ||
225 | unlock_page(page); | ||
226 | } | ||
227 | |||
228 | static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) | ||
229 | { | ||
230 | const struct gfs2_sb *str = buf; | ||
231 | |||
232 | sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); | ||
233 | sb->sb_type = be32_to_cpu(str->sb_header.mh_type); | ||
234 | sb->sb_format = be32_to_cpu(str->sb_header.mh_format); | ||
235 | sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); | ||
236 | sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); | ||
237 | sb->sb_bsize = be32_to_cpu(str->sb_bsize); | ||
238 | sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); | ||
239 | sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); | ||
240 | sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); | ||
241 | sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); | ||
242 | sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); | ||
243 | |||
244 | memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); | ||
245 | memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); | ||
246 | } | ||
247 | |||
248 | /** | ||
249 | * gfs2_read_super - Read the gfs2 super block from disk | ||
250 | * @sdp: The GFS2 super block | ||
251 | * @sector: The location of the super block | ||
252 | * @error: The error code to return | ||
253 | * | ||
254 | * This uses the bio functions to read the super block from disk | ||
255 | * because we want to be 100% sure that we never read cached data. | ||
256 | * A super block is read twice only during each GFS2 mount and is | ||
257 | * never written to by the filesystem. The first time its read no | ||
258 | * locks are held, and the only details which are looked at are those | ||
259 | * relating to the locking protocol. Once locking is up and working, | ||
260 | * the sb is read again under the lock to establish the location of | ||
261 | * the master directory (contains pointers to journals etc) and the | ||
262 | * root directory. | ||
263 | * | ||
264 | * Returns: 0 on success or error | ||
265 | */ | ||
266 | |||
267 | static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | ||
268 | { | ||
269 | struct super_block *sb = sdp->sd_vfs; | ||
270 | struct gfs2_sb *p; | ||
271 | struct page *page; | ||
272 | struct bio *bio; | ||
273 | |||
274 | page = alloc_page(GFP_NOFS); | ||
275 | if (unlikely(!page)) | ||
276 | return -ENOBUFS; | ||
277 | |||
278 | ClearPageUptodate(page); | ||
279 | ClearPageDirty(page); | ||
280 | lock_page(page); | ||
281 | |||
282 | bio = bio_alloc(GFP_NOFS, 1); | ||
283 | if (unlikely(!bio)) { | ||
284 | __free_page(page); | ||
285 | return -ENOBUFS; | ||
286 | } | ||
111 | 287 | ||
112 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | 288 | bio->bi_sector = sector * (sb->s_blocksize >> 9); |
113 | sb->s_flags |= MS_NOATIME | MS_NODIRATIME; | 289 | bio->bi_bdev = sb->s_bdev; |
290 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
291 | |||
292 | bio->bi_end_io = end_bio_io_page; | ||
293 | bio->bi_private = page; | ||
294 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | ||
295 | wait_on_page_locked(page); | ||
296 | bio_put(bio); | ||
297 | if (!PageUptodate(page)) { | ||
298 | __free_page(page); | ||
299 | return -EIO; | ||
300 | } | ||
301 | p = kmap(page); | ||
302 | gfs2_sb_in(&sdp->sd_sb, p); | ||
303 | kunmap(page); | ||
304 | __free_page(page); | ||
305 | return 0; | ||
306 | } | ||
307 | /** | ||
308 | * gfs2_read_sb - Read super block | ||
309 | * @sdp: The GFS2 superblock | ||
310 | * @gl: the glock for the superblock (assumed to be held) | ||
311 | * @silent: Don't print message if mount fails | ||
312 | * | ||
313 | */ | ||
314 | |||
315 | static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | ||
316 | { | ||
317 | u32 hash_blocks, ind_blocks, leaf_blocks; | ||
318 | u32 tmp_blocks; | ||
319 | unsigned int x; | ||
320 | int error; | ||
321 | |||
322 | error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
323 | if (error) { | ||
324 | if (!silent) | ||
325 | fs_err(sdp, "can't read superblock\n"); | ||
326 | return error; | ||
327 | } | ||
328 | |||
329 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
330 | if (error) | ||
331 | return error; | ||
332 | |||
333 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
334 | GFS2_BASIC_BLOCK_SHIFT; | ||
335 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
336 | sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - | ||
337 | sizeof(struct gfs2_dinode)) / sizeof(u64); | ||
338 | sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - | ||
339 | sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
340 | sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); | ||
341 | sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; | ||
342 | sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; | ||
343 | sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); | ||
344 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | ||
345 | sizeof(struct gfs2_meta_header)) / | ||
346 | sizeof(struct gfs2_quota_change); | ||
347 | |||
348 | /* Compute maximum reservation required to add a entry to a directory */ | ||
349 | |||
350 | hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), | ||
351 | sdp->sd_jbsize); | ||
352 | |||
353 | ind_blocks = 0; | ||
354 | for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { | ||
355 | tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); | ||
356 | ind_blocks += tmp_blocks; | ||
357 | } | ||
358 | |||
359 | leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; | ||
360 | |||
361 | sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; | ||
362 | |||
363 | sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - | ||
364 | sizeof(struct gfs2_dinode); | ||
365 | sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; | ||
366 | for (x = 2;; x++) { | ||
367 | u64 space, d; | ||
368 | u32 m; | ||
369 | |||
370 | space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; | ||
371 | d = space; | ||
372 | m = do_div(d, sdp->sd_inptrs); | ||
373 | |||
374 | if (d != sdp->sd_heightsize[x - 1] || m) | ||
375 | break; | ||
376 | sdp->sd_heightsize[x] = space; | ||
377 | } | ||
378 | sdp->sd_max_height = x; | ||
379 | sdp->sd_heightsize[x] = ~0; | ||
380 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | ||
381 | |||
382 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | ||
383 | sizeof(struct gfs2_dinode); | ||
384 | sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; | ||
385 | for (x = 2;; x++) { | ||
386 | u64 space, d; | ||
387 | u32 m; | ||
388 | |||
389 | space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; | ||
390 | d = space; | ||
391 | m = do_div(d, sdp->sd_inptrs); | ||
392 | |||
393 | if (d != sdp->sd_jheightsize[x - 1] || m) | ||
394 | break; | ||
395 | sdp->sd_jheightsize[x] = space; | ||
396 | } | ||
397 | sdp->sd_max_jheight = x; | ||
398 | sdp->sd_jheightsize[x] = ~0; | ||
399 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | ||
400 | |||
401 | return 0; | ||
114 | } | 402 | } |
115 | 403 | ||
116 | static int init_names(struct gfs2_sbd *sdp, int silent) | 404 | static int init_names(struct gfs2_sbd *sdp, int silent) |
@@ -224,51 +512,59 @@ fail: | |||
224 | return error; | 512 | return error; |
225 | } | 513 | } |
226 | 514 | ||
227 | static inline struct inode *gfs2_lookup_root(struct super_block *sb, | 515 | static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, |
228 | u64 no_addr) | 516 | u64 no_addr, const char *name) |
229 | { | 517 | { |
230 | return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); | 518 | struct gfs2_sbd *sdp = sb->s_fs_info; |
519 | struct dentry *dentry; | ||
520 | struct inode *inode; | ||
521 | |||
522 | inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); | ||
523 | if (IS_ERR(inode)) { | ||
524 | fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); | ||
525 | return PTR_ERR(inode); | ||
526 | } | ||
527 | dentry = d_alloc_root(inode); | ||
528 | if (!dentry) { | ||
529 | fs_err(sdp, "can't alloc %s dentry\n", name); | ||
530 | iput(inode); | ||
531 | return -ENOMEM; | ||
532 | } | ||
533 | dentry->d_op = &gfs2_dops; | ||
534 | *dptr = dentry; | ||
535 | return 0; | ||
231 | } | 536 | } |
232 | 537 | ||
233 | static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | 538 | static int init_sb(struct gfs2_sbd *sdp, int silent) |
234 | { | 539 | { |
235 | struct super_block *sb = sdp->sd_vfs; | 540 | struct super_block *sb = sdp->sd_vfs; |
236 | struct gfs2_holder sb_gh; | 541 | struct gfs2_holder sb_gh; |
237 | u64 no_addr; | 542 | u64 no_addr; |
238 | struct inode *inode; | 543 | int ret; |
239 | int error = 0; | ||
240 | 544 | ||
241 | if (undo) { | 545 | ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, |
242 | if (sb->s_root) { | 546 | LM_ST_SHARED, 0, &sb_gh); |
243 | dput(sb->s_root); | 547 | if (ret) { |
244 | sb->s_root = NULL; | 548 | fs_err(sdp, "can't acquire superblock glock: %d\n", ret); |
245 | } | 549 | return ret; |
246 | return 0; | ||
247 | } | 550 | } |
248 | 551 | ||
249 | error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, | 552 | ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); |
250 | LM_ST_SHARED, 0, &sb_gh); | 553 | if (ret) { |
251 | if (error) { | 554 | fs_err(sdp, "can't read superblock: %d\n", ret); |
252 | fs_err(sdp, "can't acquire superblock glock: %d\n", error); | ||
253 | return error; | ||
254 | } | ||
255 | |||
256 | error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); | ||
257 | if (error) { | ||
258 | fs_err(sdp, "can't read superblock: %d\n", error); | ||
259 | goto out; | 555 | goto out; |
260 | } | 556 | } |
261 | 557 | ||
262 | /* Set up the buffer cache and SB for real */ | 558 | /* Set up the buffer cache and SB for real */ |
263 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { | 559 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { |
264 | error = -EINVAL; | 560 | ret = -EINVAL; |
265 | fs_err(sdp, "FS block size (%u) is too small for device " | 561 | fs_err(sdp, "FS block size (%u) is too small for device " |
266 | "block size (%u)\n", | 562 | "block size (%u)\n", |
267 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); | 563 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); |
268 | goto out; | 564 | goto out; |
269 | } | 565 | } |
270 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { | 566 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { |
271 | error = -EINVAL; | 567 | ret = -EINVAL; |
272 | fs_err(sdp, "FS block size (%u) is too big for machine " | 568 | fs_err(sdp, "FS block size (%u) is too big for machine " |
273 | "page size (%u)\n", | 569 | "page size (%u)\n", |
274 | sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); | 570 | sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); |
@@ -278,26 +574,21 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | |||
278 | 574 | ||
279 | /* Get the root inode */ | 575 | /* Get the root inode */ |
280 | no_addr = sdp->sd_sb.sb_root_dir.no_addr; | 576 | no_addr = sdp->sd_sb.sb_root_dir.no_addr; |
281 | if (sb->s_type == &gfs2meta_fs_type) | 577 | ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root"); |
282 | no_addr = sdp->sd_sb.sb_master_dir.no_addr; | 578 | if (ret) |
283 | inode = gfs2_lookup_root(sb, no_addr); | ||
284 | if (IS_ERR(inode)) { | ||
285 | error = PTR_ERR(inode); | ||
286 | fs_err(sdp, "can't read in root inode: %d\n", error); | ||
287 | goto out; | 579 | goto out; |
288 | } | ||
289 | 580 | ||
290 | sb->s_root = d_alloc_root(inode); | 581 | /* Get the master inode */ |
291 | if (!sb->s_root) { | 582 | no_addr = sdp->sd_sb.sb_master_dir.no_addr; |
292 | fs_err(sdp, "can't get root dentry\n"); | 583 | ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master"); |
293 | error = -ENOMEM; | 584 | if (ret) { |
294 | iput(inode); | 585 | dput(sdp->sd_root_dir); |
295 | } else | 586 | goto out; |
296 | sb->s_root->d_op = &gfs2_dops; | 587 | } |
297 | 588 | sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir); | |
298 | out: | 589 | out: |
299 | gfs2_glock_dq_uninit(&sb_gh); | 590 | gfs2_glock_dq_uninit(&sb_gh); |
300 | return error; | 591 | return ret; |
301 | } | 592 | } |
302 | 593 | ||
303 | /** | 594 | /** |
@@ -372,6 +663,7 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | |||
372 | 663 | ||
373 | static int init_journal(struct gfs2_sbd *sdp, int undo) | 664 | static int init_journal(struct gfs2_sbd *sdp, int undo) |
374 | { | 665 | { |
666 | struct inode *master = sdp->sd_master_dir->d_inode; | ||
375 | struct gfs2_holder ji_gh; | 667 | struct gfs2_holder ji_gh; |
376 | struct task_struct *p; | 668 | struct task_struct *p; |
377 | struct gfs2_inode *ip; | 669 | struct gfs2_inode *ip; |
@@ -383,7 +675,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
383 | goto fail_recoverd; | 675 | goto fail_recoverd; |
384 | } | 676 | } |
385 | 677 | ||
386 | sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex"); | 678 | sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); |
387 | if (IS_ERR(sdp->sd_jindex)) { | 679 | if (IS_ERR(sdp->sd_jindex)) { |
388 | fs_err(sdp, "can't lookup journal index: %d\n", error); | 680 | fs_err(sdp, "can't lookup journal index: %d\n", error); |
389 | return PTR_ERR(sdp->sd_jindex); | 681 | return PTR_ERR(sdp->sd_jindex); |
@@ -506,25 +798,17 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
506 | { | 798 | { |
507 | int error = 0; | 799 | int error = 0; |
508 | struct gfs2_inode *ip; | 800 | struct gfs2_inode *ip; |
509 | struct inode *inode; | 801 | struct inode *master = sdp->sd_master_dir->d_inode; |
510 | 802 | ||
511 | if (undo) | 803 | if (undo) |
512 | goto fail_qinode; | 804 | goto fail_qinode; |
513 | 805 | ||
514 | inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr); | ||
515 | if (IS_ERR(inode)) { | ||
516 | error = PTR_ERR(inode); | ||
517 | fs_err(sdp, "can't read in master directory: %d\n", error); | ||
518 | goto fail; | ||
519 | } | ||
520 | sdp->sd_master_dir = inode; | ||
521 | |||
522 | error = init_journal(sdp, undo); | 806 | error = init_journal(sdp, undo); |
523 | if (error) | 807 | if (error) |
524 | goto fail_master; | 808 | goto fail; |
525 | 809 | ||
526 | /* Read in the master inode number inode */ | 810 | /* Read in the master inode number inode */ |
527 | sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum"); | 811 | sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum"); |
528 | if (IS_ERR(sdp->sd_inum_inode)) { | 812 | if (IS_ERR(sdp->sd_inum_inode)) { |
529 | error = PTR_ERR(sdp->sd_inum_inode); | 813 | error = PTR_ERR(sdp->sd_inum_inode); |
530 | fs_err(sdp, "can't read in inum inode: %d\n", error); | 814 | fs_err(sdp, "can't read in inum inode: %d\n", error); |
@@ -533,7 +817,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
533 | 817 | ||
534 | 818 | ||
535 | /* Read in the master statfs inode */ | 819 | /* Read in the master statfs inode */ |
536 | sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs"); | 820 | sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); |
537 | if (IS_ERR(sdp->sd_statfs_inode)) { | 821 | if (IS_ERR(sdp->sd_statfs_inode)) { |
538 | error = PTR_ERR(sdp->sd_statfs_inode); | 822 | error = PTR_ERR(sdp->sd_statfs_inode); |
539 | fs_err(sdp, "can't read in statfs inode: %d\n", error); | 823 | fs_err(sdp, "can't read in statfs inode: %d\n", error); |
@@ -541,7 +825,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
541 | } | 825 | } |
542 | 826 | ||
543 | /* Read in the resource index inode */ | 827 | /* Read in the resource index inode */ |
544 | sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex"); | 828 | sdp->sd_rindex = gfs2_lookup_simple(master, "rindex"); |
545 | if (IS_ERR(sdp->sd_rindex)) { | 829 | if (IS_ERR(sdp->sd_rindex)) { |
546 | error = PTR_ERR(sdp->sd_rindex); | 830 | error = PTR_ERR(sdp->sd_rindex); |
547 | fs_err(sdp, "can't get resource index inode: %d\n", error); | 831 | fs_err(sdp, "can't get resource index inode: %d\n", error); |
@@ -552,7 +836,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
552 | sdp->sd_rindex_uptodate = 0; | 836 | sdp->sd_rindex_uptodate = 0; |
553 | 837 | ||
554 | /* Read in the quota inode */ | 838 | /* Read in the quota inode */ |
555 | sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); | 839 | sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota"); |
556 | if (IS_ERR(sdp->sd_quota_inode)) { | 840 | if (IS_ERR(sdp->sd_quota_inode)) { |
557 | error = PTR_ERR(sdp->sd_quota_inode); | 841 | error = PTR_ERR(sdp->sd_quota_inode); |
558 | fs_err(sdp, "can't get quota file inode: %d\n", error); | 842 | fs_err(sdp, "can't get quota file inode: %d\n", error); |
@@ -571,8 +855,6 @@ fail_inum: | |||
571 | iput(sdp->sd_inum_inode); | 855 | iput(sdp->sd_inum_inode); |
572 | fail_journal: | 856 | fail_journal: |
573 | init_journal(sdp, UNDO); | 857 | init_journal(sdp, UNDO); |
574 | fail_master: | ||
575 | iput(sdp->sd_master_dir); | ||
576 | fail: | 858 | fail: |
577 | return error; | 859 | return error; |
578 | } | 860 | } |
@@ -583,6 +865,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
583 | char buf[30]; | 865 | char buf[30]; |
584 | int error = 0; | 866 | int error = 0; |
585 | struct gfs2_inode *ip; | 867 | struct gfs2_inode *ip; |
868 | struct inode *master = sdp->sd_master_dir->d_inode; | ||
586 | 869 | ||
587 | if (sdp->sd_args.ar_spectator) | 870 | if (sdp->sd_args.ar_spectator) |
588 | return 0; | 871 | return 0; |
@@ -590,7 +873,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
590 | if (undo) | 873 | if (undo) |
591 | goto fail_qc_gh; | 874 | goto fail_qc_gh; |
592 | 875 | ||
593 | pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node"); | 876 | pn = gfs2_lookup_simple(master, "per_node"); |
594 | if (IS_ERR(pn)) { | 877 | if (IS_ERR(pn)) { |
595 | error = PTR_ERR(pn); | 878 | error = PTR_ERR(pn); |
596 | fs_err(sdp, "can't find per_node directory: %d\n", error); | 879 | fs_err(sdp, "can't find per_node directory: %d\n", error); |
@@ -800,7 +1083,11 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
800 | goto fail; | 1083 | goto fail; |
801 | } | 1084 | } |
802 | 1085 | ||
803 | init_vfs(sb, SDF_NOATIME); | 1086 | sb->s_magic = GFS2_MAGIC; |
1087 | sb->s_op = &gfs2_super_ops; | ||
1088 | sb->s_export_op = &gfs2_export_ops; | ||
1089 | sb->s_time_gran = 1; | ||
1090 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
804 | 1091 | ||
805 | /* Set up the buffer cache and fill in some fake block size values | 1092 | /* Set up the buffer cache and fill in some fake block size values |
806 | to allow us to read-in the on-disk superblock. */ | 1093 | to allow us to read-in the on-disk superblock. */ |
@@ -828,7 +1115,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
828 | if (error) | 1115 | if (error) |
829 | goto fail_lm; | 1116 | goto fail_lm; |
830 | 1117 | ||
831 | error = init_sb(sdp, silent, DO); | 1118 | error = init_sb(sdp, silent); |
832 | if (error) | 1119 | if (error) |
833 | goto fail_locking; | 1120 | goto fail_locking; |
834 | 1121 | ||
@@ -869,7 +1156,11 @@ fail_per_node: | |||
869 | fail_inodes: | 1156 | fail_inodes: |
870 | init_inodes(sdp, UNDO); | 1157 | init_inodes(sdp, UNDO); |
871 | fail_sb: | 1158 | fail_sb: |
872 | init_sb(sdp, 0, UNDO); | 1159 | if (sdp->sd_root_dir) |
1160 | dput(sdp->sd_root_dir); | ||
1161 | if (sdp->sd_master_dir) | ||
1162 | dput(sdp->sd_master_dir); | ||
1163 | sb->s_root = NULL; | ||
873 | fail_locking: | 1164 | fail_locking: |
874 | init_locking(sdp, &mount_gh, UNDO); | 1165 | init_locking(sdp, &mount_gh, UNDO); |
875 | fail_lm: | 1166 | fail_lm: |
@@ -887,151 +1178,63 @@ fail: | |||
887 | } | 1178 | } |
888 | 1179 | ||
889 | static int gfs2_get_sb(struct file_system_type *fs_type, int flags, | 1180 | static int gfs2_get_sb(struct file_system_type *fs_type, int flags, |
890 | const char *dev_name, void *data, struct vfsmount *mnt) | 1181 | const char *dev_name, void *data, struct vfsmount *mnt) |
891 | { | 1182 | { |
892 | struct super_block *sb; | 1183 | return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); |
893 | struct gfs2_sbd *sdp; | ||
894 | int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); | ||
895 | if (error) | ||
896 | goto out; | ||
897 | sb = mnt->mnt_sb; | ||
898 | sdp = sb->s_fs_info; | ||
899 | sdp->sd_gfs2mnt = mnt; | ||
900 | out: | ||
901 | return error; | ||
902 | } | 1184 | } |
903 | 1185 | ||
904 | static int fill_super_meta(struct super_block *sb, struct super_block *new, | 1186 | static struct super_block *get_gfs2_sb(const char *dev_name) |
905 | void *data, int silent) | ||
906 | { | 1187 | { |
907 | struct gfs2_sbd *sdp = sb->s_fs_info; | 1188 | struct super_block *sb; |
908 | struct inode *inode; | ||
909 | int error = 0; | ||
910 | |||
911 | new->s_fs_info = sdp; | ||
912 | sdp->sd_vfs_meta = sb; | ||
913 | |||
914 | init_vfs(new, SDF_NOATIME); | ||
915 | |||
916 | /* Get the master inode */ | ||
917 | inode = igrab(sdp->sd_master_dir); | ||
918 | |||
919 | new->s_root = d_alloc_root(inode); | ||
920 | if (!new->s_root) { | ||
921 | fs_err(sdp, "can't get root dentry\n"); | ||
922 | error = -ENOMEM; | ||
923 | iput(inode); | ||
924 | } else | ||
925 | new->s_root->d_op = &gfs2_dops; | ||
926 | |||
927 | return error; | ||
928 | } | ||
929 | |||
930 | static int set_bdev_super(struct super_block *s, void *data) | ||
931 | { | ||
932 | s->s_bdev = data; | ||
933 | s->s_dev = s->s_bdev->bd_dev; | ||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | static int test_bdev_super(struct super_block *s, void *data) | ||
938 | { | ||
939 | return s->s_bdev == data; | ||
940 | } | ||
941 | |||
942 | static struct super_block* get_gfs2_sb(const char *dev_name) | ||
943 | { | ||
944 | struct kstat stat; | ||
945 | struct nameidata nd; | 1189 | struct nameidata nd; |
946 | struct super_block *sb = NULL, *s; | ||
947 | int error; | 1190 | int error; |
948 | 1191 | ||
949 | error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); | 1192 | error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); |
950 | if (error) { | 1193 | if (error) { |
951 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n", | 1194 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", |
952 | dev_name); | 1195 | dev_name, error); |
953 | goto out; | 1196 | return NULL; |
954 | } | ||
955 | error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); | ||
956 | |||
957 | list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) { | ||
958 | if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || | ||
959 | (S_ISDIR(stat.mode) && | ||
960 | s == nd.path.dentry->d_inode->i_sb)) { | ||
961 | sb = s; | ||
962 | goto free_nd; | ||
963 | } | ||
964 | } | 1197 | } |
965 | 1198 | sb = nd.path.dentry->d_inode->i_sb; | |
966 | printk(KERN_WARNING "GFS2: Unrecognized block device or " | 1199 | if (sb && (sb->s_type == &gfs2_fs_type)) |
967 | "mount point %s\n", dev_name); | 1200 | atomic_inc(&sb->s_active); |
968 | 1201 | else | |
969 | free_nd: | 1202 | sb = NULL; |
970 | path_put(&nd.path); | 1203 | path_put(&nd.path); |
971 | out: | ||
972 | return sb; | 1204 | return sb; |
973 | } | 1205 | } |
974 | 1206 | ||
975 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, | 1207 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, |
976 | const char *dev_name, void *data, struct vfsmount *mnt) | 1208 | const char *dev_name, void *data, struct vfsmount *mnt) |
977 | { | 1209 | { |
978 | int error = 0; | 1210 | struct super_block *sb = NULL; |
979 | struct super_block *sb = NULL, *new; | ||
980 | struct gfs2_sbd *sdp; | 1211 | struct gfs2_sbd *sdp; |
981 | 1212 | ||
982 | sb = get_gfs2_sb(dev_name); | 1213 | sb = get_gfs2_sb(dev_name); |
983 | if (!sb) { | 1214 | if (!sb) { |
984 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); | 1215 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); |
985 | error = -ENOENT; | 1216 | return -ENOENT; |
986 | goto error; | ||
987 | } | 1217 | } |
988 | sdp = sb->s_fs_info; | 1218 | sdp = sb->s_fs_info; |
989 | if (sdp->sd_vfs_meta) { | 1219 | mnt->mnt_sb = sb; |
990 | printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); | 1220 | mnt->mnt_root = dget(sdp->sd_master_dir); |
991 | error = -EBUSY; | 1221 | return 0; |
992 | goto error; | ||
993 | } | ||
994 | down(&sb->s_bdev->bd_mount_sem); | ||
995 | new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev); | ||
996 | up(&sb->s_bdev->bd_mount_sem); | ||
997 | if (IS_ERR(new)) { | ||
998 | error = PTR_ERR(new); | ||
999 | goto error; | ||
1000 | } | ||
1001 | new->s_flags = flags; | ||
1002 | strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); | ||
1003 | sb_set_blocksize(new, sb->s_blocksize); | ||
1004 | error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0); | ||
1005 | if (error) { | ||
1006 | up_write(&new->s_umount); | ||
1007 | deactivate_super(new); | ||
1008 | goto error; | ||
1009 | } | ||
1010 | |||
1011 | new->s_flags |= MS_ACTIVE; | ||
1012 | |||
1013 | /* Grab a reference to the gfs2 mount point */ | ||
1014 | atomic_inc(&sdp->sd_gfs2mnt->mnt_count); | ||
1015 | return simple_set_mnt(mnt, new); | ||
1016 | error: | ||
1017 | return error; | ||
1018 | } | 1222 | } |
1019 | 1223 | ||
1020 | static void gfs2_kill_sb(struct super_block *sb) | 1224 | static void gfs2_kill_sb(struct super_block *sb) |
1021 | { | 1225 | { |
1022 | if (sb->s_fs_info) { | 1226 | struct gfs2_sbd *sdp = sb->s_fs_info; |
1023 | gfs2_delete_debugfs_file(sb->s_fs_info); | 1227 | if (sdp) { |
1024 | gfs2_meta_syncfs(sb->s_fs_info); | 1228 | gfs2_meta_syncfs(sdp); |
1229 | dput(sdp->sd_root_dir); | ||
1230 | dput(sdp->sd_master_dir); | ||
1231 | sdp->sd_root_dir = NULL; | ||
1232 | sdp->sd_master_dir = NULL; | ||
1025 | } | 1233 | } |
1234 | shrink_dcache_sb(sb); | ||
1026 | kill_block_super(sb); | 1235 | kill_block_super(sb); |
1027 | } | 1236 | if (sdp) |
1028 | 1237 | gfs2_delete_debugfs_file(sdp); | |
1029 | static void gfs2_kill_sb_meta(struct super_block *sb) | ||
1030 | { | ||
1031 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
1032 | generic_shutdown_super(sb); | ||
1033 | sdp->sd_vfs_meta = NULL; | ||
1034 | atomic_dec(&sdp->sd_gfs2mnt->mnt_count); | ||
1035 | } | 1238 | } |
1036 | 1239 | ||
1037 | struct file_system_type gfs2_fs_type = { | 1240 | struct file_system_type gfs2_fs_type = { |
@@ -1046,7 +1249,6 @@ struct file_system_type gfs2meta_fs_type = { | |||
1046 | .name = "gfs2meta", | 1249 | .name = "gfs2meta", |
1047 | .fs_flags = FS_REQUIRES_DEV, | 1250 | .fs_flags = FS_REQUIRES_DEV, |
1048 | .get_sb = gfs2_get_sb_meta, | 1251 | .get_sb = gfs2_get_sb_meta, |
1049 | .kill_sb = gfs2_kill_sb_meta, | ||
1050 | .owner = THIS_MODULE, | 1252 | .owner = THIS_MODULE, |
1051 | }; | 1253 | }; |
1052 | 1254 | ||
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index e2c62f73a778..534e1e2c65ca 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -159,9 +159,13 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
159 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | 159 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); |
160 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | 160 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); |
161 | 161 | ||
162 | error = gfs2_glock_nq_m(2, ghs); | 162 | error = gfs2_glock_nq(ghs); /* parent */ |
163 | if (error) | 163 | if (error) |
164 | goto out; | 164 | goto out_parent; |
165 | |||
166 | error = gfs2_glock_nq(ghs + 1); /* child */ | ||
167 | if (error) | ||
168 | goto out_child; | ||
165 | 169 | ||
166 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); | 170 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); |
167 | if (error) | 171 | if (error) |
@@ -245,8 +249,10 @@ out_alloc: | |||
245 | if (alloc_required) | 249 | if (alloc_required) |
246 | gfs2_alloc_put(dip); | 250 | gfs2_alloc_put(dip); |
247 | out_gunlock: | 251 | out_gunlock: |
248 | gfs2_glock_dq_m(2, ghs); | 252 | gfs2_glock_dq(ghs + 1); |
249 | out: | 253 | out_child: |
254 | gfs2_glock_dq(ghs); | ||
255 | out_parent: | ||
250 | gfs2_holder_uninit(ghs); | 256 | gfs2_holder_uninit(ghs); |
251 | gfs2_holder_uninit(ghs + 1); | 257 | gfs2_holder_uninit(ghs + 1); |
252 | if (!error) { | 258 | if (!error) { |
@@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
302 | 308 | ||
303 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | 309 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); |
304 | if (error) | 310 | if (error) |
305 | goto out_rgrp; | 311 | goto out_gunlock; |
306 | 312 | ||
307 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); | 313 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); |
308 | if (error) | 314 | if (error) |
@@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
316 | 322 | ||
317 | out_end_trans: | 323 | out_end_trans: |
318 | gfs2_trans_end(sdp); | 324 | gfs2_trans_end(sdp); |
325 | out_gunlock: | ||
319 | gfs2_glock_dq(ghs + 2); | 326 | gfs2_glock_dq(ghs + 2); |
320 | out_rgrp: | 327 | out_rgrp: |
321 | gfs2_holder_uninit(ghs + 2); | 328 | gfs2_holder_uninit(ghs + 2); |
@@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
485 | struct gfs2_holder ri_gh; | 492 | struct gfs2_holder ri_gh; |
486 | int error; | 493 | int error; |
487 | 494 | ||
488 | |||
489 | error = gfs2_rindex_hold(sdp, &ri_gh); | 495 | error = gfs2_rindex_hold(sdp, &ri_gh); |
490 | if (error) | 496 | if (error) |
491 | return error; | 497 | return error; |
@@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
495 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); | 501 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); |
496 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); | 502 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); |
497 | 503 | ||
498 | error = gfs2_glock_nq_m(3, ghs); | 504 | error = gfs2_glock_nq(ghs); /* parent */ |
499 | if (error) | 505 | if (error) |
500 | goto out; | 506 | goto out_parent; |
507 | |||
508 | error = gfs2_glock_nq(ghs + 1); /* child */ | ||
509 | if (error) | ||
510 | goto out_child; | ||
511 | |||
512 | error = gfs2_glock_nq(ghs + 2); /* rgrp */ | ||
513 | if (error) | ||
514 | goto out_rgrp; | ||
501 | 515 | ||
502 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | 516 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); |
503 | if (error) | 517 | if (error) |
@@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
523 | gfs2_trans_end(sdp); | 537 | gfs2_trans_end(sdp); |
524 | 538 | ||
525 | out_gunlock: | 539 | out_gunlock: |
526 | gfs2_glock_dq_m(3, ghs); | 540 | gfs2_glock_dq(ghs + 2); |
527 | out: | 541 | out_rgrp: |
528 | gfs2_holder_uninit(ghs); | ||
529 | gfs2_holder_uninit(ghs + 1); | ||
530 | gfs2_holder_uninit(ghs + 2); | 542 | gfs2_holder_uninit(ghs + 2); |
543 | gfs2_glock_dq(ghs + 1); | ||
544 | out_child: | ||
545 | gfs2_holder_uninit(ghs + 1); | ||
546 | gfs2_glock_dq(ghs); | ||
547 | out_parent: | ||
548 | gfs2_holder_uninit(ghs); | ||
531 | gfs2_glock_dq_uninit(&ri_gh); | 549 | gfs2_glock_dq_uninit(&ri_gh); |
532 | return error; | 550 | return error; |
533 | } | 551 | } |
@@ -571,6 +589,54 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, | |||
571 | return 0; | 589 | return 0; |
572 | } | 590 | } |
573 | 591 | ||
592 | /* | ||
593 | * gfs2_ok_to_move - check if it's ok to move a directory to another directory | ||
594 | * @this: move this | ||
595 | * @to: to here | ||
596 | * | ||
597 | * Follow @to back to the root and make sure we don't encounter @this | ||
598 | * Assumes we already hold the rename lock. | ||
599 | * | ||
600 | * Returns: errno | ||
601 | */ | ||
602 | |||
603 | static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | ||
604 | { | ||
605 | struct inode *dir = &to->i_inode; | ||
606 | struct super_block *sb = dir->i_sb; | ||
607 | struct inode *tmp; | ||
608 | struct qstr dotdot; | ||
609 | int error = 0; | ||
610 | |||
611 | gfs2_str2qstr(&dotdot, ".."); | ||
612 | |||
613 | igrab(dir); | ||
614 | |||
615 | for (;;) { | ||
616 | if (dir == &this->i_inode) { | ||
617 | error = -EINVAL; | ||
618 | break; | ||
619 | } | ||
620 | if (dir == sb->s_root->d_inode) { | ||
621 | error = 0; | ||
622 | break; | ||
623 | } | ||
624 | |||
625 | tmp = gfs2_lookupi(dir, &dotdot, 1); | ||
626 | if (IS_ERR(tmp)) { | ||
627 | error = PTR_ERR(tmp); | ||
628 | break; | ||
629 | } | ||
630 | |||
631 | iput(dir); | ||
632 | dir = tmp; | ||
633 | } | ||
634 | |||
635 | iput(dir); | ||
636 | |||
637 | return error; | ||
638 | } | ||
639 | |||
574 | /** | 640 | /** |
575 | * gfs2_rename - Rename a file | 641 | * gfs2_rename - Rename a file |
576 | * @odir: Parent directory of old file name | 642 | * @odir: Parent directory of old file name |
@@ -589,7 +655,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
589 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 655 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
590 | struct gfs2_inode *nip = NULL; | 656 | struct gfs2_inode *nip = NULL; |
591 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 657 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
592 | struct gfs2_holder ghs[5], r_gh; | 658 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; |
593 | struct gfs2_rgrpd *nrgd; | 659 | struct gfs2_rgrpd *nrgd; |
594 | unsigned int num_gh; | 660 | unsigned int num_gh; |
595 | int dir_rename = 0; | 661 | int dir_rename = 0; |
@@ -603,19 +669,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
603 | return 0; | 669 | return 0; |
604 | } | 670 | } |
605 | 671 | ||
606 | /* Make sure we aren't trying to move a dirctory into it's subdir */ | ||
607 | |||
608 | if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) { | ||
609 | dir_rename = 1; | ||
610 | 672 | ||
611 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, | 673 | if (odip != ndip) { |
612 | &r_gh); | 674 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, |
675 | 0, &r_gh); | ||
613 | if (error) | 676 | if (error) |
614 | goto out; | 677 | goto out; |
615 | 678 | ||
616 | error = gfs2_ok_to_move(ip, ndip); | 679 | if (S_ISDIR(ip->i_inode.i_mode)) { |
617 | if (error) | 680 | dir_rename = 1; |
618 | goto out_gunlock_r; | 681 | /* don't move a dirctory into it's subdir */ |
682 | error = gfs2_ok_to_move(ip, ndip); | ||
683 | if (error) | ||
684 | goto out_gunlock_r; | ||
685 | } | ||
619 | } | 686 | } |
620 | 687 | ||
621 | num_gh = 1; | 688 | num_gh = 1; |
@@ -639,9 +706,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
639 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); | 706 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); |
640 | } | 707 | } |
641 | 708 | ||
642 | error = gfs2_glock_nq_m(num_gh, ghs); | 709 | for (x = 0; x < num_gh; x++) { |
643 | if (error) | 710 | error = gfs2_glock_nq(ghs + x); |
644 | goto out_uninit; | 711 | if (error) |
712 | goto out_gunlock; | ||
713 | } | ||
645 | 714 | ||
646 | /* Check out the old directory */ | 715 | /* Check out the old directory */ |
647 | 716 | ||
@@ -804,12 +873,12 @@ out_alloc: | |||
804 | if (alloc_required) | 873 | if (alloc_required) |
805 | gfs2_alloc_put(ndip); | 874 | gfs2_alloc_put(ndip); |
806 | out_gunlock: | 875 | out_gunlock: |
807 | gfs2_glock_dq_m(num_gh, ghs); | 876 | while (x--) { |
808 | out_uninit: | 877 | gfs2_glock_dq(ghs + x); |
809 | for (x = 0; x < num_gh; x++) | ||
810 | gfs2_holder_uninit(ghs + x); | 878 | gfs2_holder_uninit(ghs + x); |
879 | } | ||
811 | out_gunlock_r: | 880 | out_gunlock_r: |
812 | if (dir_rename) | 881 | if (r_gh.gh_gl) |
813 | gfs2_glock_dq_uninit(&r_gh); | 882 | gfs2_glock_dq_uninit(&r_gh); |
814 | out: | 883 | out: |
815 | return error; | 884 | return error; |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index f66ea0f7a356..d5355d9b5926 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
21 | #include <linux/crc32.h> | 21 | #include <linux/crc32.h> |
22 | #include <linux/lm_interface.h> | 22 | #include <linux/lm_interface.h> |
23 | #include <linux/time.h> | ||
23 | 24 | ||
24 | #include "gfs2.h" | 25 | #include "gfs2.h" |
25 | #include "incore.h" | 26 | #include "incore.h" |
@@ -38,6 +39,7 @@ | |||
38 | #include "dir.h" | 39 | #include "dir.h" |
39 | #include "eattr.h" | 40 | #include "eattr.h" |
40 | #include "bmap.h" | 41 | #include "bmap.h" |
42 | #include "meta_io.h" | ||
41 | 43 | ||
42 | /** | 44 | /** |
43 | * gfs2_write_inode - Make sure the inode is stable on the disk | 45 | * gfs2_write_inode - Make sure the inode is stable on the disk |
@@ -50,16 +52,74 @@ | |||
50 | static int gfs2_write_inode(struct inode *inode, int sync) | 52 | static int gfs2_write_inode(struct inode *inode, int sync) |
51 | { | 53 | { |
52 | struct gfs2_inode *ip = GFS2_I(inode); | 54 | struct gfs2_inode *ip = GFS2_I(inode); |
53 | 55 | struct gfs2_sbd *sdp = GFS2_SB(inode); | |
54 | /* Check this is a "normal" inode */ | 56 | struct gfs2_holder gh; |
55 | if (test_bit(GIF_USER, &ip->i_flags)) { | 57 | struct buffer_head *bh; |
56 | if (current->flags & PF_MEMALLOC) | 58 | struct timespec atime; |
57 | return 0; | 59 | struct gfs2_dinode *di; |
58 | if (sync) | 60 | int ret = 0; |
59 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | 61 | |
62 | /* Check this is a "normal" inode, etc */ | ||
63 | if (!test_bit(GIF_USER, &ip->i_flags) || | ||
64 | (current->flags & PF_MEMALLOC)) | ||
65 | return 0; | ||
66 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
67 | if (ret) | ||
68 | goto do_flush; | ||
69 | ret = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
70 | if (ret) | ||
71 | goto do_unlock; | ||
72 | ret = gfs2_meta_inode_buffer(ip, &bh); | ||
73 | if (ret == 0) { | ||
74 | di = (struct gfs2_dinode *)bh->b_data; | ||
75 | atime.tv_sec = be64_to_cpu(di->di_atime); | ||
76 | atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); | ||
77 | if (timespec_compare(&inode->i_atime, &atime) > 0) { | ||
78 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
79 | gfs2_dinode_out(ip, bh->b_data); | ||
80 | } | ||
81 | brelse(bh); | ||
60 | } | 82 | } |
83 | gfs2_trans_end(sdp); | ||
84 | do_unlock: | ||
85 | gfs2_glock_dq_uninit(&gh); | ||
86 | do_flush: | ||
87 | if (sync != 0) | ||
88 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | ||
89 | return ret; | ||
90 | } | ||
61 | 91 | ||
62 | return 0; | 92 | /** |
93 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
94 | * @sdp: the filesystem | ||
95 | * | ||
96 | * Returns: errno | ||
97 | */ | ||
98 | |||
99 | static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
100 | { | ||
101 | struct gfs2_holder t_gh; | ||
102 | int error; | ||
103 | |||
104 | gfs2_quota_sync(sdp); | ||
105 | gfs2_statfs_sync(sdp); | ||
106 | |||
107 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
108 | &t_gh); | ||
109 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
110 | return error; | ||
111 | |||
112 | gfs2_meta_syncfs(sdp); | ||
113 | gfs2_log_shutdown(sdp); | ||
114 | |||
115 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
116 | |||
117 | if (t_gh.gh_gl) | ||
118 | gfs2_glock_dq_uninit(&t_gh); | ||
119 | |||
120 | gfs2_quota_cleanup(sdp); | ||
121 | |||
122 | return error; | ||
63 | } | 123 | } |
64 | 124 | ||
65 | /** | 125 | /** |
@@ -73,12 +133,6 @@ static void gfs2_put_super(struct super_block *sb) | |||
73 | struct gfs2_sbd *sdp = sb->s_fs_info; | 133 | struct gfs2_sbd *sdp = sb->s_fs_info; |
74 | int error; | 134 | int error; |
75 | 135 | ||
76 | if (!sdp) | ||
77 | return; | ||
78 | |||
79 | if (!strncmp(sb->s_type->name, "gfs2meta", 8)) | ||
80 | return; /* Nothing to do */ | ||
81 | |||
82 | /* Unfreeze the filesystem, if we need to */ | 136 | /* Unfreeze the filesystem, if we need to */ |
83 | 137 | ||
84 | mutex_lock(&sdp->sd_freeze_lock); | 138 | mutex_lock(&sdp->sd_freeze_lock); |
@@ -101,7 +155,6 @@ static void gfs2_put_super(struct super_block *sb) | |||
101 | 155 | ||
102 | /* Release stuff */ | 156 | /* Release stuff */ |
103 | 157 | ||
104 | iput(sdp->sd_master_dir); | ||
105 | iput(sdp->sd_jindex); | 158 | iput(sdp->sd_jindex); |
106 | iput(sdp->sd_inum_inode); | 159 | iput(sdp->sd_inum_inode); |
107 | iput(sdp->sd_statfs_inode); | 160 | iput(sdp->sd_statfs_inode); |
@@ -152,6 +205,7 @@ static void gfs2_write_super(struct super_block *sb) | |||
152 | * | 205 | * |
153 | * Flushes the log to disk. | 206 | * Flushes the log to disk. |
154 | */ | 207 | */ |
208 | |||
155 | static int gfs2_sync_fs(struct super_block *sb, int wait) | 209 | static int gfs2_sync_fs(struct super_block *sb, int wait) |
156 | { | 210 | { |
157 | sb->s_dirt = 0; | 211 | sb->s_dirt = 0; |
@@ -270,14 +324,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
270 | } | 324 | } |
271 | } | 325 | } |
272 | 326 | ||
273 | if (*flags & (MS_NOATIME | MS_NODIRATIME)) | ||
274 | set_bit(SDF_NOATIME, &sdp->sd_flags); | ||
275 | else | ||
276 | clear_bit(SDF_NOATIME, &sdp->sd_flags); | ||
277 | |||
278 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | ||
279 | *flags |= MS_NOATIME | MS_NODIRATIME; | ||
280 | |||
281 | return error; | 327 | return error; |
282 | } | 328 | } |
283 | 329 | ||
@@ -295,6 +341,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
295 | * inode's blocks, or alternatively pass the baton on to another | 341 | * inode's blocks, or alternatively pass the baton on to another |
296 | * node for later deallocation. | 342 | * node for later deallocation. |
297 | */ | 343 | */ |
344 | |||
298 | static void gfs2_drop_inode(struct inode *inode) | 345 | static void gfs2_drop_inode(struct inode *inode) |
299 | { | 346 | { |
300 | struct gfs2_inode *ip = GFS2_I(inode); | 347 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -333,6 +380,16 @@ static void gfs2_clear_inode(struct inode *inode) | |||
333 | } | 380 | } |
334 | } | 381 | } |
335 | 382 | ||
383 | static int is_ancestor(const struct dentry *d1, const struct dentry *d2) | ||
384 | { | ||
385 | do { | ||
386 | if (d1 == d2) | ||
387 | return 1; | ||
388 | d1 = d1->d_parent; | ||
389 | } while (!IS_ROOT(d1)); | ||
390 | return 0; | ||
391 | } | ||
392 | |||
336 | /** | 393 | /** |
337 | * gfs2_show_options - Show mount options for /proc/mounts | 394 | * gfs2_show_options - Show mount options for /proc/mounts |
338 | * @s: seq_file structure | 395 | * @s: seq_file structure |
@@ -346,6 +403,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
346 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; | 403 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; |
347 | struct gfs2_args *args = &sdp->sd_args; | 404 | struct gfs2_args *args = &sdp->sd_args; |
348 | 405 | ||
406 | if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) | ||
407 | seq_printf(s, ",meta"); | ||
349 | if (args->ar_lockproto[0]) | 408 | if (args->ar_lockproto[0]) |
350 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); | 409 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); |
351 | if (args->ar_locktable[0]) | 410 | if (args->ar_locktable[0]) |
@@ -414,6 +473,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
414 | * conversion on the iopen lock, but we can change that later. This | 473 | * conversion on the iopen lock, but we can change that later. This |
415 | * is safe, just less efficient. | 474 | * is safe, just less efficient. |
416 | */ | 475 | */ |
476 | |||
417 | static void gfs2_delete_inode(struct inode *inode) | 477 | static void gfs2_delete_inode(struct inode *inode) |
418 | { | 478 | { |
419 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | 479 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; |
@@ -478,8 +538,6 @@ out: | |||
478 | clear_inode(inode); | 538 | clear_inode(inode); |
479 | } | 539 | } |
480 | 540 | ||
481 | |||
482 | |||
483 | static struct inode *gfs2_alloc_inode(struct super_block *sb) | 541 | static struct inode *gfs2_alloc_inode(struct super_block *sb) |
484 | { | 542 | { |
485 | struct gfs2_inode *ip; | 543 | struct gfs2_inode *ip; |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ca831991cbc2..c3ba3d9d0aac 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -33,313 +33,6 @@ | |||
33 | #include "trans.h" | 33 | #include "trans.h" |
34 | #include "util.h" | 34 | #include "util.h" |
35 | 35 | ||
36 | static const u32 gfs2_old_fs_formats[] = { | ||
37 | 0 | ||
38 | }; | ||
39 | |||
40 | static const u32 gfs2_old_multihost_formats[] = { | ||
41 | 0 | ||
42 | }; | ||
43 | |||
44 | /** | ||
45 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | ||
46 | * @gt: tune | ||
47 | * | ||
48 | */ | ||
49 | |||
50 | void gfs2_tune_init(struct gfs2_tune *gt) | ||
51 | { | ||
52 | spin_lock_init(>->gt_spin); | ||
53 | |||
54 | gt->gt_demote_secs = 300; | ||
55 | gt->gt_incore_log_blocks = 1024; | ||
56 | gt->gt_log_flush_secs = 60; | ||
57 | gt->gt_recoverd_secs = 60; | ||
58 | gt->gt_logd_secs = 1; | ||
59 | gt->gt_quotad_secs = 5; | ||
60 | gt->gt_quota_simul_sync = 64; | ||
61 | gt->gt_quota_warn_period = 10; | ||
62 | gt->gt_quota_scale_num = 1; | ||
63 | gt->gt_quota_scale_den = 1; | ||
64 | gt->gt_quota_cache_secs = 300; | ||
65 | gt->gt_quota_quantum = 60; | ||
66 | gt->gt_atime_quantum = 3600; | ||
67 | gt->gt_new_files_jdata = 0; | ||
68 | gt->gt_max_readahead = 1 << 18; | ||
69 | gt->gt_stall_secs = 600; | ||
70 | gt->gt_complain_secs = 10; | ||
71 | gt->gt_statfs_quantum = 30; | ||
72 | gt->gt_statfs_slow = 0; | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * gfs2_check_sb - Check superblock | ||
77 | * @sdp: the filesystem | ||
78 | * @sb: The superblock | ||
79 | * @silent: Don't print a message if the check fails | ||
80 | * | ||
81 | * Checks the version code of the FS is one that we understand how to | ||
82 | * read and that the sizes of the various on-disk structures have not | ||
83 | * changed. | ||
84 | */ | ||
85 | |||
86 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | ||
87 | { | ||
88 | unsigned int x; | ||
89 | |||
90 | if (sb->sb_magic != GFS2_MAGIC || | ||
91 | sb->sb_type != GFS2_METATYPE_SB) { | ||
92 | if (!silent) | ||
93 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); | ||
94 | return -EINVAL; | ||
95 | } | ||
96 | |||
97 | /* If format numbers match exactly, we're done. */ | ||
98 | |||
99 | if (sb->sb_fs_format == GFS2_FORMAT_FS && | ||
100 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | ||
101 | return 0; | ||
102 | |||
103 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | ||
104 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
105 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
106 | break; | ||
107 | |||
108 | if (!gfs2_old_fs_formats[x]) { | ||
109 | printk(KERN_WARNING | ||
110 | "GFS2: code version (%u, %u) is incompatible " | ||
111 | "with ondisk format (%u, %u)\n", | ||
112 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
113 | sb->sb_fs_format, sb->sb_multihost_format); | ||
114 | printk(KERN_WARNING | ||
115 | "GFS2: I don't know how to upgrade this FS\n"); | ||
116 | return -EINVAL; | ||
117 | } | ||
118 | } | ||
119 | |||
120 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
121 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
122 | if (gfs2_old_multihost_formats[x] == | ||
123 | sb->sb_multihost_format) | ||
124 | break; | ||
125 | |||
126 | if (!gfs2_old_multihost_formats[x]) { | ||
127 | printk(KERN_WARNING | ||
128 | "GFS2: code version (%u, %u) is incompatible " | ||
129 | "with ondisk format (%u, %u)\n", | ||
130 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
131 | sb->sb_fs_format, sb->sb_multihost_format); | ||
132 | printk(KERN_WARNING | ||
133 | "GFS2: I don't know how to upgrade this FS\n"); | ||
134 | return -EINVAL; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | if (!sdp->sd_args.ar_upgrade) { | ||
139 | printk(KERN_WARNING | ||
140 | "GFS2: code version (%u, %u) is incompatible " | ||
141 | "with ondisk format (%u, %u)\n", | ||
142 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
143 | sb->sb_fs_format, sb->sb_multihost_format); | ||
144 | printk(KERN_INFO | ||
145 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
146 | "the FS\n"); | ||
147 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
148 | return -EINVAL; | ||
149 | } | ||
150 | |||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | |||
155 | static void end_bio_io_page(struct bio *bio, int error) | ||
156 | { | ||
157 | struct page *page = bio->bi_private; | ||
158 | |||
159 | if (!error) | ||
160 | SetPageUptodate(page); | ||
161 | else | ||
162 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | ||
163 | unlock_page(page); | ||
164 | } | ||
165 | |||
166 | static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) | ||
167 | { | ||
168 | const struct gfs2_sb *str = buf; | ||
169 | |||
170 | sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); | ||
171 | sb->sb_type = be32_to_cpu(str->sb_header.mh_type); | ||
172 | sb->sb_format = be32_to_cpu(str->sb_header.mh_format); | ||
173 | sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); | ||
174 | sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); | ||
175 | sb->sb_bsize = be32_to_cpu(str->sb_bsize); | ||
176 | sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); | ||
177 | sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); | ||
178 | sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); | ||
179 | sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); | ||
180 | sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); | ||
181 | |||
182 | memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); | ||
183 | memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * gfs2_read_super - Read the gfs2 super block from disk | ||
188 | * @sdp: The GFS2 super block | ||
189 | * @sector: The location of the super block | ||
190 | * @error: The error code to return | ||
191 | * | ||
192 | * This uses the bio functions to read the super block from disk | ||
193 | * because we want to be 100% sure that we never read cached data. | ||
194 | * A super block is read twice only during each GFS2 mount and is | ||
195 | * never written to by the filesystem. The first time its read no | ||
196 | * locks are held, and the only details which are looked at are those | ||
197 | * relating to the locking protocol. Once locking is up and working, | ||
198 | * the sb is read again under the lock to establish the location of | ||
199 | * the master directory (contains pointers to journals etc) and the | ||
200 | * root directory. | ||
201 | * | ||
202 | * Returns: 0 on success or error | ||
203 | */ | ||
204 | |||
205 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | ||
206 | { | ||
207 | struct super_block *sb = sdp->sd_vfs; | ||
208 | struct gfs2_sb *p; | ||
209 | struct page *page; | ||
210 | struct bio *bio; | ||
211 | |||
212 | page = alloc_page(GFP_NOFS); | ||
213 | if (unlikely(!page)) | ||
214 | return -ENOBUFS; | ||
215 | |||
216 | ClearPageUptodate(page); | ||
217 | ClearPageDirty(page); | ||
218 | lock_page(page); | ||
219 | |||
220 | bio = bio_alloc(GFP_NOFS, 1); | ||
221 | if (unlikely(!bio)) { | ||
222 | __free_page(page); | ||
223 | return -ENOBUFS; | ||
224 | } | ||
225 | |||
226 | bio->bi_sector = sector * (sb->s_blocksize >> 9); | ||
227 | bio->bi_bdev = sb->s_bdev; | ||
228 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
229 | |||
230 | bio->bi_end_io = end_bio_io_page; | ||
231 | bio->bi_private = page; | ||
232 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | ||
233 | wait_on_page_locked(page); | ||
234 | bio_put(bio); | ||
235 | if (!PageUptodate(page)) { | ||
236 | __free_page(page); | ||
237 | return -EIO; | ||
238 | } | ||
239 | p = kmap(page); | ||
240 | gfs2_sb_in(&sdp->sd_sb, p); | ||
241 | kunmap(page); | ||
242 | __free_page(page); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | /** | ||
247 | * gfs2_read_sb - Read super block | ||
248 | * @sdp: The GFS2 superblock | ||
249 | * @gl: the glock for the superblock (assumed to be held) | ||
250 | * @silent: Don't print message if mount fails | ||
251 | * | ||
252 | */ | ||
253 | |||
254 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | ||
255 | { | ||
256 | u32 hash_blocks, ind_blocks, leaf_blocks; | ||
257 | u32 tmp_blocks; | ||
258 | unsigned int x; | ||
259 | int error; | ||
260 | |||
261 | error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
262 | if (error) { | ||
263 | if (!silent) | ||
264 | fs_err(sdp, "can't read superblock\n"); | ||
265 | return error; | ||
266 | } | ||
267 | |||
268 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
269 | if (error) | ||
270 | return error; | ||
271 | |||
272 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
273 | GFS2_BASIC_BLOCK_SHIFT; | ||
274 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
275 | sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - | ||
276 | sizeof(struct gfs2_dinode)) / sizeof(u64); | ||
277 | sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - | ||
278 | sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
279 | sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); | ||
280 | sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; | ||
281 | sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; | ||
282 | sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); | ||
283 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | ||
284 | sizeof(struct gfs2_meta_header)) / | ||
285 | sizeof(struct gfs2_quota_change); | ||
286 | |||
287 | /* Compute maximum reservation required to add a entry to a directory */ | ||
288 | |||
289 | hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), | ||
290 | sdp->sd_jbsize); | ||
291 | |||
292 | ind_blocks = 0; | ||
293 | for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { | ||
294 | tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); | ||
295 | ind_blocks += tmp_blocks; | ||
296 | } | ||
297 | |||
298 | leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; | ||
299 | |||
300 | sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; | ||
301 | |||
302 | sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - | ||
303 | sizeof(struct gfs2_dinode); | ||
304 | sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; | ||
305 | for (x = 2;; x++) { | ||
306 | u64 space, d; | ||
307 | u32 m; | ||
308 | |||
309 | space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; | ||
310 | d = space; | ||
311 | m = do_div(d, sdp->sd_inptrs); | ||
312 | |||
313 | if (d != sdp->sd_heightsize[x - 1] || m) | ||
314 | break; | ||
315 | sdp->sd_heightsize[x] = space; | ||
316 | } | ||
317 | sdp->sd_max_height = x; | ||
318 | sdp->sd_heightsize[x] = ~0; | ||
319 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | ||
320 | |||
321 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | ||
322 | sizeof(struct gfs2_dinode); | ||
323 | sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; | ||
324 | for (x = 2;; x++) { | ||
325 | u64 space, d; | ||
326 | u32 m; | ||
327 | |||
328 | space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; | ||
329 | d = space; | ||
330 | m = do_div(d, sdp->sd_inptrs); | ||
331 | |||
332 | if (d != sdp->sd_jheightsize[x - 1] || m) | ||
333 | break; | ||
334 | sdp->sd_jheightsize[x] = space; | ||
335 | } | ||
336 | sdp->sd_max_jheight = x; | ||
337 | sdp->sd_jheightsize[x] = ~0; | ||
338 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | ||
339 | |||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | /** | 36 | /** |
344 | * gfs2_jindex_hold - Grab a lock on the jindex | 37 | * gfs2_jindex_hold - Grab a lock on the jindex |
345 | * @sdp: The GFS2 superblock | 38 | * @sdp: The GFS2 superblock |
@@ -581,39 +274,6 @@ fail: | |||
581 | return error; | 274 | return error; |
582 | } | 275 | } |
583 | 276 | ||
584 | /** | ||
585 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
586 | * @sdp: the filesystem | ||
587 | * | ||
588 | * Returns: errno | ||
589 | */ | ||
590 | |||
591 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
592 | { | ||
593 | struct gfs2_holder t_gh; | ||
594 | int error; | ||
595 | |||
596 | gfs2_quota_sync(sdp); | ||
597 | gfs2_statfs_sync(sdp); | ||
598 | |||
599 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
600 | &t_gh); | ||
601 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
602 | return error; | ||
603 | |||
604 | gfs2_meta_syncfs(sdp); | ||
605 | gfs2_log_shutdown(sdp); | ||
606 | |||
607 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
608 | |||
609 | if (t_gh.gh_gl) | ||
610 | gfs2_glock_dq_uninit(&t_gh); | ||
611 | |||
612 | gfs2_quota_cleanup(sdp); | ||
613 | |||
614 | return error; | ||
615 | } | ||
616 | |||
617 | static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) | 277 | static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) |
618 | { | 278 | { |
619 | const struct gfs2_statfs_change *str = buf; | 279 | const struct gfs2_statfs_change *str = buf; |
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 44361ecc44f7..50a4c9b1215e 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h | |||
@@ -12,11 +12,6 @@ | |||
12 | 12 | ||
13 | #include "incore.h" | 13 | #include "incore.h" |
14 | 14 | ||
15 | void gfs2_tune_init(struct gfs2_tune *gt); | ||
16 | |||
17 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); | ||
18 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); | ||
19 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); | ||
20 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); | 15 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); |
21 | 16 | ||
22 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) | 17 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) |
@@ -40,7 +35,6 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, | |||
40 | struct gfs2_inode **ipp); | 35 | struct gfs2_inode **ipp); |
41 | 36 | ||
42 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp); | 37 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp); |
43 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp); | ||
44 | 38 | ||
45 | int gfs2_statfs_init(struct gfs2_sbd *sdp); | 39 | int gfs2_statfs_init(struct gfs2_sbd *sdp); |
46 | void gfs2_statfs_change(struct gfs2_sbd *sdp, | 40 | void gfs2_statfs_change(struct gfs2_sbd *sdp, |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 74846559fc3f..7e1879f1a02c 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -269,14 +269,6 @@ ARGS_ATTR(quota, "%u\n"); | |||
269 | ARGS_ATTR(suiddir, "%d\n"); | 269 | ARGS_ATTR(suiddir, "%d\n"); |
270 | ARGS_ATTR(data, "%d\n"); | 270 | ARGS_ATTR(data, "%d\n"); |
271 | 271 | ||
272 | /* one oddball doesn't fit the macro mold */ | ||
273 | static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf) | ||
274 | { | ||
275 | return snprintf(buf, PAGE_SIZE, "%d\n", | ||
276 | !!test_bit(SDF_NOATIME, &sdp->sd_flags)); | ||
277 | } | ||
278 | static struct args_attr args_attr_noatime = __ATTR_RO(noatime); | ||
279 | |||
280 | static struct attribute *args_attrs[] = { | 272 | static struct attribute *args_attrs[] = { |
281 | &args_attr_lockproto.attr, | 273 | &args_attr_lockproto.attr, |
282 | &args_attr_locktable.attr, | 274 | &args_attr_locktable.attr, |
@@ -292,7 +284,6 @@ static struct attribute *args_attrs[] = { | |||
292 | &args_attr_quota.attr, | 284 | &args_attr_quota.attr, |
293 | &args_attr_suiddir.attr, | 285 | &args_attr_suiddir.attr, |
294 | &args_attr_data.attr, | 286 | &args_attr_data.attr, |
295 | &args_attr_noatime.attr, | ||
296 | NULL, | 287 | NULL, |
297 | }; | 288 | }; |
298 | 289 | ||
@@ -407,7 +398,6 @@ TUNE_ATTR(incore_log_blocks, 0); | |||
407 | TUNE_ATTR(log_flush_secs, 0); | 398 | TUNE_ATTR(log_flush_secs, 0); |
408 | TUNE_ATTR(quota_warn_period, 0); | 399 | TUNE_ATTR(quota_warn_period, 0); |
409 | TUNE_ATTR(quota_quantum, 0); | 400 | TUNE_ATTR(quota_quantum, 0); |
410 | TUNE_ATTR(atime_quantum, 0); | ||
411 | TUNE_ATTR(max_readahead, 0); | 401 | TUNE_ATTR(max_readahead, 0); |
412 | TUNE_ATTR(complain_secs, 0); | 402 | TUNE_ATTR(complain_secs, 0); |
413 | TUNE_ATTR(statfs_slow, 0); | 403 | TUNE_ATTR(statfs_slow, 0); |
@@ -427,7 +417,6 @@ static struct attribute *tune_attrs[] = { | |||
427 | &tune_attr_log_flush_secs.attr, | 417 | &tune_attr_log_flush_secs.attr, |
428 | &tune_attr_quota_warn_period.attr, | 418 | &tune_attr_quota_warn_period.attr, |
429 | &tune_attr_quota_quantum.attr, | 419 | &tune_attr_quota_quantum.attr, |
430 | &tune_attr_atime_quantum.attr, | ||
431 | &tune_attr_max_readahead.attr, | 420 | &tune_attr_max_readahead.attr, |
432 | &tune_attr_complain_secs.attr, | 421 | &tune_attr_complain_secs.attr, |
433 | &tune_attr_statfs_slow.attr, | 422 | &tune_attr_statfs_slow.attr, |
diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 60249429a253..d85c7d931cdf 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c | |||
@@ -323,7 +323,7 @@ out: | |||
323 | } | 323 | } |
324 | 324 | ||
325 | /* | 325 | /* |
326 | * remove_kevent - cleans up and ultimately frees the given kevent | 326 | * remove_kevent - cleans up the given kevent |
327 | * | 327 | * |
328 | * Caller must hold dev->ev_mutex. | 328 | * Caller must hold dev->ev_mutex. |
329 | */ | 329 | */ |
@@ -334,7 +334,13 @@ static void remove_kevent(struct inotify_device *dev, | |||
334 | 334 | ||
335 | dev->event_count--; | 335 | dev->event_count--; |
336 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; | 336 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; |
337 | } | ||
337 | 338 | ||
339 | /* | ||
340 | * free_kevent - frees the given kevent. | ||
341 | */ | ||
342 | static void free_kevent(struct inotify_kernel_event *kevent) | ||
343 | { | ||
338 | kfree(kevent->name); | 344 | kfree(kevent->name); |
339 | kmem_cache_free(event_cachep, kevent); | 345 | kmem_cache_free(event_cachep, kevent); |
340 | } | 346 | } |
@@ -350,6 +356,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev) | |||
350 | struct inotify_kernel_event *kevent; | 356 | struct inotify_kernel_event *kevent; |
351 | kevent = inotify_dev_get_event(dev); | 357 | kevent = inotify_dev_get_event(dev); |
352 | remove_kevent(dev, kevent); | 358 | remove_kevent(dev, kevent); |
359 | free_kevent(kevent); | ||
353 | } | 360 | } |
354 | } | 361 | } |
355 | 362 | ||
@@ -433,17 +440,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
433 | dev = file->private_data; | 440 | dev = file->private_data; |
434 | 441 | ||
435 | while (1) { | 442 | while (1) { |
436 | int events; | ||
437 | 443 | ||
438 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); | 444 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); |
439 | 445 | ||
440 | mutex_lock(&dev->ev_mutex); | 446 | mutex_lock(&dev->ev_mutex); |
441 | events = !list_empty(&dev->events); | 447 | if (!list_empty(&dev->events)) { |
442 | mutex_unlock(&dev->ev_mutex); | ||
443 | if (events) { | ||
444 | ret = 0; | 448 | ret = 0; |
445 | break; | 449 | break; |
446 | } | 450 | } |
451 | mutex_unlock(&dev->ev_mutex); | ||
447 | 452 | ||
448 | if (file->f_flags & O_NONBLOCK) { | 453 | if (file->f_flags & O_NONBLOCK) { |
449 | ret = -EAGAIN; | 454 | ret = -EAGAIN; |
@@ -462,7 +467,6 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
462 | if (ret) | 467 | if (ret) |
463 | return ret; | 468 | return ret; |
464 | 469 | ||
465 | mutex_lock(&dev->ev_mutex); | ||
466 | while (1) { | 470 | while (1) { |
467 | struct inotify_kernel_event *kevent; | 471 | struct inotify_kernel_event *kevent; |
468 | 472 | ||
@@ -481,6 +485,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
481 | } | 485 | } |
482 | break; | 486 | break; |
483 | } | 487 | } |
488 | remove_kevent(dev, kevent); | ||
489 | |||
490 | /* | ||
491 | * Must perform the copy_to_user outside the mutex in order | ||
492 | * to avoid a lock order reversal with mmap_sem. | ||
493 | */ | ||
494 | mutex_unlock(&dev->ev_mutex); | ||
484 | 495 | ||
485 | if (copy_to_user(buf, &kevent->event, event_size)) { | 496 | if (copy_to_user(buf, &kevent->event, event_size)) { |
486 | ret = -EFAULT; | 497 | ret = -EFAULT; |
@@ -498,7 +509,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
498 | count -= kevent->event.len; | 509 | count -= kevent->event.len; |
499 | } | 510 | } |
500 | 511 | ||
501 | remove_kevent(dev, kevent); | 512 | free_kevent(kevent); |
513 | |||
514 | mutex_lock(&dev->ev_mutex); | ||
502 | } | 515 | } |
503 | mutex_unlock(&dev->ev_mutex); | 516 | mutex_unlock(&dev->ev_mutex); |
504 | 517 | ||
diff --git a/fs/ioctl.c b/fs/ioctl.c index 7db32b3382d3..d152856c371b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -13,9 +13,14 @@ | |||
13 | #include <linux/security.h> | 13 | #include <linux/security.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/uaccess.h> | 15 | #include <linux/uaccess.h> |
16 | #include <linux/writeback.h> | ||
17 | #include <linux/buffer_head.h> | ||
16 | 18 | ||
17 | #include <asm/ioctls.h> | 19 | #include <asm/ioctls.h> |
18 | 20 | ||
21 | /* So that the fiemap access checks can't overflow on 32 bit machines. */ | ||
22 | #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) | ||
23 | |||
19 | /** | 24 | /** |
20 | * vfs_ioctl - call filesystem specific ioctl methods | 25 | * vfs_ioctl - call filesystem specific ioctl methods |
21 | * @filp: open file to invoke ioctl method on | 26 | * @filp: open file to invoke ioctl method on |
@@ -71,6 +76,276 @@ static int ioctl_fibmap(struct file *filp, int __user *p) | |||
71 | return put_user(res, p); | 76 | return put_user(res, p); |
72 | } | 77 | } |
73 | 78 | ||
79 | /** | ||
80 | * fiemap_fill_next_extent - Fiemap helper function | ||
81 | * @fieinfo: Fiemap context passed into ->fiemap | ||
82 | * @logical: Extent logical start offset, in bytes | ||
83 | * @phys: Extent physical start offset, in bytes | ||
84 | * @len: Extent length, in bytes | ||
85 | * @flags: FIEMAP_EXTENT flags that describe this extent | ||
86 | * | ||
87 | * Called from file system ->fiemap callback. Will populate extent | ||
88 | * info as passed in via arguments and copy to user memory. On | ||
89 | * success, extent count on fieinfo is incremented. | ||
90 | * | ||
91 | * Returns 0 on success, -errno on error, 1 if this was the last | ||
92 | * extent that will fit in user array. | ||
93 | */ | ||
94 | #define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) | ||
95 | #define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) | ||
96 | #define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) | ||
97 | int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, | ||
98 | u64 phys, u64 len, u32 flags) | ||
99 | { | ||
100 | struct fiemap_extent extent; | ||
101 | struct fiemap_extent *dest = fieinfo->fi_extents_start; | ||
102 | |||
103 | /* only count the extents */ | ||
104 | if (fieinfo->fi_extents_max == 0) { | ||
105 | fieinfo->fi_extents_mapped++; | ||
106 | return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; | ||
107 | } | ||
108 | |||
109 | if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) | ||
110 | return 1; | ||
111 | |||
112 | if (flags & SET_UNKNOWN_FLAGS) | ||
113 | flags |= FIEMAP_EXTENT_UNKNOWN; | ||
114 | if (flags & SET_NO_UNMOUNTED_IO_FLAGS) | ||
115 | flags |= FIEMAP_EXTENT_ENCODED; | ||
116 | if (flags & SET_NOT_ALIGNED_FLAGS) | ||
117 | flags |= FIEMAP_EXTENT_NOT_ALIGNED; | ||
118 | |||
119 | memset(&extent, 0, sizeof(extent)); | ||
120 | extent.fe_logical = logical; | ||
121 | extent.fe_physical = phys; | ||
122 | extent.fe_length = len; | ||
123 | extent.fe_flags = flags; | ||
124 | |||
125 | dest += fieinfo->fi_extents_mapped; | ||
126 | if (copy_to_user(dest, &extent, sizeof(extent))) | ||
127 | return -EFAULT; | ||
128 | |||
129 | fieinfo->fi_extents_mapped++; | ||
130 | if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) | ||
131 | return 1; | ||
132 | return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; | ||
133 | } | ||
134 | EXPORT_SYMBOL(fiemap_fill_next_extent); | ||
135 | |||
136 | /** | ||
137 | * fiemap_check_flags - check validity of requested flags for fiemap | ||
138 | * @fieinfo: Fiemap context passed into ->fiemap | ||
139 | * @fs_flags: Set of fiemap flags that the file system understands | ||
140 | * | ||
141 | * Called from file system ->fiemap callback. This will compute the | ||
142 | * intersection of valid fiemap flags and those that the fs supports. That | ||
143 | * value is then compared against the user supplied flags. In case of bad user | ||
144 | * flags, the invalid values will be written into the fieinfo structure, and | ||
145 | * -EBADR is returned, which tells ioctl_fiemap() to return those values to | ||
146 | * userspace. For this reason, a return code of -EBADR should be preserved. | ||
147 | * | ||
148 | * Returns 0 on success, -EBADR on bad flags. | ||
149 | */ | ||
150 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags) | ||
151 | { | ||
152 | u32 incompat_flags; | ||
153 | |||
154 | incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags); | ||
155 | if (incompat_flags) { | ||
156 | fieinfo->fi_flags = incompat_flags; | ||
157 | return -EBADR; | ||
158 | } | ||
159 | return 0; | ||
160 | } | ||
161 | EXPORT_SYMBOL(fiemap_check_flags); | ||
162 | |||
163 | static int fiemap_check_ranges(struct super_block *sb, | ||
164 | u64 start, u64 len, u64 *new_len) | ||
165 | { | ||
166 | *new_len = len; | ||
167 | |||
168 | if (len == 0) | ||
169 | return -EINVAL; | ||
170 | |||
171 | if (start > sb->s_maxbytes) | ||
172 | return -EFBIG; | ||
173 | |||
174 | /* | ||
175 | * Shrink request scope to what the fs can actually handle. | ||
176 | */ | ||
177 | if ((len > sb->s_maxbytes) || | ||
178 | (sb->s_maxbytes - len) < start) | ||
179 | *new_len = sb->s_maxbytes - start; | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static int ioctl_fiemap(struct file *filp, unsigned long arg) | ||
185 | { | ||
186 | struct fiemap fiemap; | ||
187 | struct fiemap_extent_info fieinfo = { 0, }; | ||
188 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
189 | struct super_block *sb = inode->i_sb; | ||
190 | u64 len; | ||
191 | int error; | ||
192 | |||
193 | if (!inode->i_op->fiemap) | ||
194 | return -EOPNOTSUPP; | ||
195 | |||
196 | if (copy_from_user(&fiemap, (struct fiemap __user *)arg, | ||
197 | sizeof(struct fiemap))) | ||
198 | return -EFAULT; | ||
199 | |||
200 | if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) | ||
201 | return -EINVAL; | ||
202 | |||
203 | error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length, | ||
204 | &len); | ||
205 | if (error) | ||
206 | return error; | ||
207 | |||
208 | fieinfo.fi_flags = fiemap.fm_flags; | ||
209 | fieinfo.fi_extents_max = fiemap.fm_extent_count; | ||
210 | fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); | ||
211 | |||
212 | if (fiemap.fm_extent_count != 0 && | ||
213 | !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, | ||
214 | fieinfo.fi_extents_max * sizeof(struct fiemap_extent))) | ||
215 | return -EFAULT; | ||
216 | |||
217 | if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC) | ||
218 | filemap_write_and_wait(inode->i_mapping); | ||
219 | |||
220 | error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); | ||
221 | fiemap.fm_flags = fieinfo.fi_flags; | ||
222 | fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; | ||
223 | if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) | ||
224 | error = -EFAULT; | ||
225 | |||
226 | return error; | ||
227 | } | ||
228 | |||
229 | #ifdef CONFIG_BLOCK | ||
230 | |||
231 | #define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) | ||
232 | #define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); | ||
233 | |||
234 | /* | ||
235 | * @inode - the inode to map | ||
236 | * @arg - the pointer to userspace where we copy everything to | ||
237 | * @get_block - the fs's get_block function | ||
238 | * | ||
239 | * This does FIEMAP for block based inodes. Basically it will just loop | ||
240 | * through get_block until we hit the number of extents we want to map, or we | ||
241 | * go past the end of the file and hit a hole. | ||
242 | * | ||
243 | * If it is possible to have data blocks beyond a hole past @inode->i_size, then | ||
244 | * please do not use this function, it will stop at the first unmapped block | ||
245 | * beyond i_size | ||
246 | */ | ||
247 | int generic_block_fiemap(struct inode *inode, | ||
248 | struct fiemap_extent_info *fieinfo, u64 start, | ||
249 | u64 len, get_block_t *get_block) | ||
250 | { | ||
251 | struct buffer_head tmp; | ||
252 | unsigned int start_blk; | ||
253 | long long length = 0, map_len = 0; | ||
254 | u64 logical = 0, phys = 0, size = 0; | ||
255 | u32 flags = FIEMAP_EXTENT_MERGED; | ||
256 | int ret = 0; | ||
257 | |||
258 | if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) | ||
259 | return ret; | ||
260 | |||
261 | start_blk = logical_to_blk(inode, start); | ||
262 | |||
263 | /* guard against change */ | ||
264 | mutex_lock(&inode->i_mutex); | ||
265 | |||
266 | length = (long long)min_t(u64, len, i_size_read(inode)); | ||
267 | map_len = length; | ||
268 | |||
269 | do { | ||
270 | /* | ||
271 | * we set b_size to the total size we want so it will map as | ||
272 | * many contiguous blocks as possible at once | ||
273 | */ | ||
274 | memset(&tmp, 0, sizeof(struct buffer_head)); | ||
275 | tmp.b_size = map_len; | ||
276 | |||
277 | ret = get_block(inode, start_blk, &tmp, 0); | ||
278 | if (ret) | ||
279 | break; | ||
280 | |||
281 | /* HOLE */ | ||
282 | if (!buffer_mapped(&tmp)) { | ||
283 | /* | ||
284 | * first hole after going past the EOF, this is our | ||
285 | * last extent | ||
286 | */ | ||
287 | if (length <= 0) { | ||
288 | flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; | ||
289 | ret = fiemap_fill_next_extent(fieinfo, logical, | ||
290 | phys, size, | ||
291 | flags); | ||
292 | break; | ||
293 | } | ||
294 | |||
295 | length -= blk_to_logical(inode, 1); | ||
296 | |||
297 | /* if we have holes up to/past EOF then we're done */ | ||
298 | if (length <= 0) | ||
299 | break; | ||
300 | |||
301 | start_blk++; | ||
302 | } else { | ||
303 | if (length <= 0 && size) { | ||
304 | ret = fiemap_fill_next_extent(fieinfo, logical, | ||
305 | phys, size, | ||
306 | flags); | ||
307 | if (ret) | ||
308 | break; | ||
309 | } | ||
310 | |||
311 | logical = blk_to_logical(inode, start_blk); | ||
312 | phys = blk_to_logical(inode, tmp.b_blocknr); | ||
313 | size = tmp.b_size; | ||
314 | flags = FIEMAP_EXTENT_MERGED; | ||
315 | |||
316 | length -= tmp.b_size; | ||
317 | start_blk += logical_to_blk(inode, size); | ||
318 | |||
319 | /* | ||
320 | * if we are past the EOF we need to loop again to see | ||
321 | * if there is a hole so we can mark this extent as the | ||
322 | * last one, and if not keep mapping things until we | ||
323 | * find a hole, or we run out of slots in the extent | ||
324 | * array | ||
325 | */ | ||
326 | if (length <= 0) | ||
327 | continue; | ||
328 | |||
329 | ret = fiemap_fill_next_extent(fieinfo, logical, phys, | ||
330 | size, flags); | ||
331 | if (ret) | ||
332 | break; | ||
333 | } | ||
334 | cond_resched(); | ||
335 | } while (1); | ||
336 | |||
337 | mutex_unlock(&inode->i_mutex); | ||
338 | |||
339 | /* if ret is 1 then we just hit the end of the extent array */ | ||
340 | if (ret == 1) | ||
341 | ret = 0; | ||
342 | |||
343 | return ret; | ||
344 | } | ||
345 | EXPORT_SYMBOL(generic_block_fiemap); | ||
346 | |||
347 | #endif /* CONFIG_BLOCK */ | ||
348 | |||
74 | static int file_ioctl(struct file *filp, unsigned int cmd, | 349 | static int file_ioctl(struct file *filp, unsigned int cmd, |
75 | unsigned long arg) | 350 | unsigned long arg) |
76 | { | 351 | { |
@@ -80,6 +355,8 @@ static int file_ioctl(struct file *filp, unsigned int cmd, | |||
80 | switch (cmd) { | 355 | switch (cmd) { |
81 | case FIBMAP: | 356 | case FIBMAP: |
82 | return ioctl_fibmap(filp, p); | 357 | return ioctl_fibmap(filp, p); |
358 | case FS_IOC_FIEMAP: | ||
359 | return ioctl_fiemap(filp, arg); | ||
83 | case FIGETBSZ: | 360 | case FIGETBSZ: |
84 | return put_user(inode->i_sb->s_blocksize, p); | 361 | return put_user(inode->i_sb->s_blocksize, p); |
85 | case FIONREAD: | 362 | case FIONREAD: |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 91389c8aee8a..9203c3332f17 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/time.h> | 20 | #include <linux/time.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/jbd2.h> | 22 | #include <linux/jbd2.h> |
23 | #include <linux/marker.h> | ||
23 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
24 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
25 | 26 | ||
@@ -93,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
93 | int ret = 0; | 94 | int ret = 0; |
94 | struct buffer_head *bh = jh2bh(jh); | 95 | struct buffer_head *bh = jh2bh(jh); |
95 | 96 | ||
96 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { | 97 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | ||
97 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 99 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
98 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 100 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
99 | jbd_unlock_bh_state(bh); | 101 | jbd_unlock_bh_state(bh); |
@@ -126,14 +128,29 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
126 | 128 | ||
127 | /* | 129 | /* |
128 | * Test again, another process may have checkpointed while we | 130 | * Test again, another process may have checkpointed while we |
129 | * were waiting for the checkpoint lock | 131 | * were waiting for the checkpoint lock. If there are no |
132 | * outstanding transactions there is nothing to checkpoint and | ||
133 | * we can't make progress. Abort the journal in this case. | ||
130 | */ | 134 | */ |
131 | spin_lock(&journal->j_state_lock); | 135 | spin_lock(&journal->j_state_lock); |
136 | spin_lock(&journal->j_list_lock); | ||
132 | nblocks = jbd_space_needed(journal); | 137 | nblocks = jbd_space_needed(journal); |
133 | if (__jbd2_log_space_left(journal) < nblocks) { | 138 | if (__jbd2_log_space_left(journal) < nblocks) { |
139 | int chkpt = journal->j_checkpoint_transactions != NULL; | ||
140 | |||
141 | spin_unlock(&journal->j_list_lock); | ||
134 | spin_unlock(&journal->j_state_lock); | 142 | spin_unlock(&journal->j_state_lock); |
135 | jbd2_log_do_checkpoint(journal); | 143 | if (chkpt) { |
144 | jbd2_log_do_checkpoint(journal); | ||
145 | } else { | ||
146 | printk(KERN_ERR "%s: no transactions\n", | ||
147 | __func__); | ||
148 | jbd2_journal_abort(journal, 0); | ||
149 | } | ||
150 | |||
136 | spin_lock(&journal->j_state_lock); | 151 | spin_lock(&journal->j_state_lock); |
152 | } else { | ||
153 | spin_unlock(&journal->j_list_lock); | ||
137 | } | 154 | } |
138 | mutex_unlock(&journal->j_checkpoint_mutex); | 155 | mutex_unlock(&journal->j_checkpoint_mutex); |
139 | } | 156 | } |
@@ -160,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | |||
160 | * buffers. Note that we take the buffers in the opposite ordering | 177 | * buffers. Note that we take the buffers in the opposite ordering |
161 | * from the one in which they were submitted for IO. | 178 | * from the one in which they were submitted for IO. |
162 | * | 179 | * |
180 | * Return 0 on success, and return <0 if some buffers have failed | ||
181 | * to be written out. | ||
182 | * | ||
163 | * Called with j_list_lock held. | 183 | * Called with j_list_lock held. |
164 | */ | 184 | */ |
165 | static void __wait_cp_io(journal_t *journal, transaction_t *transaction) | 185 | static int __wait_cp_io(journal_t *journal, transaction_t *transaction) |
166 | { | 186 | { |
167 | struct journal_head *jh; | 187 | struct journal_head *jh; |
168 | struct buffer_head *bh; | 188 | struct buffer_head *bh; |
169 | tid_t this_tid; | 189 | tid_t this_tid; |
170 | int released = 0; | 190 | int released = 0; |
191 | int ret = 0; | ||
171 | 192 | ||
172 | this_tid = transaction->t_tid; | 193 | this_tid = transaction->t_tid; |
173 | restart: | 194 | restart: |
174 | /* Did somebody clean up the transaction in the meanwhile? */ | 195 | /* Did somebody clean up the transaction in the meanwhile? */ |
175 | if (journal->j_checkpoint_transactions != transaction || | 196 | if (journal->j_checkpoint_transactions != transaction || |
176 | transaction->t_tid != this_tid) | 197 | transaction->t_tid != this_tid) |
177 | return; | 198 | return ret; |
178 | while (!released && transaction->t_checkpoint_io_list) { | 199 | while (!released && transaction->t_checkpoint_io_list) { |
179 | jh = transaction->t_checkpoint_io_list; | 200 | jh = transaction->t_checkpoint_io_list; |
180 | bh = jh2bh(jh); | 201 | bh = jh2bh(jh); |
@@ -194,6 +215,9 @@ restart: | |||
194 | spin_lock(&journal->j_list_lock); | 215 | spin_lock(&journal->j_list_lock); |
195 | goto restart; | 216 | goto restart; |
196 | } | 217 | } |
218 | if (unlikely(buffer_write_io_error(bh))) | ||
219 | ret = -EIO; | ||
220 | |||
197 | /* | 221 | /* |
198 | * Now in whatever state the buffer currently is, we know that | 222 | * Now in whatever state the buffer currently is, we know that |
199 | * it has been written out and so we can drop it from the list | 223 | * it has been written out and so we can drop it from the list |
@@ -203,6 +227,8 @@ restart: | |||
203 | jbd2_journal_remove_journal_head(bh); | 227 | jbd2_journal_remove_journal_head(bh); |
204 | __brelse(bh); | 228 | __brelse(bh); |
205 | } | 229 | } |
230 | |||
231 | return ret; | ||
206 | } | 232 | } |
207 | 233 | ||
208 | #define NR_BATCH 64 | 234 | #define NR_BATCH 64 |
@@ -226,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
226 | * Try to flush one buffer from the checkpoint list to disk. | 252 | * Try to flush one buffer from the checkpoint list to disk. |
227 | * | 253 | * |
228 | * Return 1 if something happened which requires us to abort the current | 254 | * Return 1 if something happened which requires us to abort the current |
229 | * scan of the checkpoint list. | 255 | * scan of the checkpoint list. Return <0 if the buffer has failed to |
256 | * be written out. | ||
230 | * | 257 | * |
231 | * Called with j_list_lock held and drops it if 1 is returned | 258 | * Called with j_list_lock held and drops it if 1 is returned |
232 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 259 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
@@ -258,6 +285,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
258 | jbd2_log_wait_commit(journal, tid); | 285 | jbd2_log_wait_commit(journal, tid); |
259 | ret = 1; | 286 | ret = 1; |
260 | } else if (!buffer_dirty(bh)) { | 287 | } else if (!buffer_dirty(bh)) { |
288 | ret = 1; | ||
289 | if (unlikely(buffer_write_io_error(bh))) | ||
290 | ret = -EIO; | ||
261 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 291 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
262 | BUFFER_TRACE(bh, "remove from checkpoint"); | 292 | BUFFER_TRACE(bh, "remove from checkpoint"); |
263 | __jbd2_journal_remove_checkpoint(jh); | 293 | __jbd2_journal_remove_checkpoint(jh); |
@@ -265,7 +295,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
265 | jbd_unlock_bh_state(bh); | 295 | jbd_unlock_bh_state(bh); |
266 | jbd2_journal_remove_journal_head(bh); | 296 | jbd2_journal_remove_journal_head(bh); |
267 | __brelse(bh); | 297 | __brelse(bh); |
268 | ret = 1; | ||
269 | } else { | 298 | } else { |
270 | /* | 299 | /* |
271 | * Important: we are about to write the buffer, and | 300 | * Important: we are about to write the buffer, and |
@@ -298,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
298 | * to disk. We submit larger chunks of data at once. | 327 | * to disk. We submit larger chunks of data at once. |
299 | * | 328 | * |
300 | * The journal should be locked before calling this function. | 329 | * The journal should be locked before calling this function. |
330 | * Called with j_checkpoint_mutex held. | ||
301 | */ | 331 | */ |
302 | int jbd2_log_do_checkpoint(journal_t *journal) | 332 | int jbd2_log_do_checkpoint(journal_t *journal) |
303 | { | 333 | { |
@@ -313,6 +343,8 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
313 | * journal straight away. | 343 | * journal straight away. |
314 | */ | 344 | */ |
315 | result = jbd2_cleanup_journal_tail(journal); | 345 | result = jbd2_cleanup_journal_tail(journal); |
346 | trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", | ||
347 | journal->j_devname, result); | ||
316 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); | 348 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); |
317 | if (result <= 0) | 349 | if (result <= 0) |
318 | return result; | 350 | return result; |
@@ -321,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
321 | * OK, we need to start writing disk blocks. Take one transaction | 353 | * OK, we need to start writing disk blocks. Take one transaction |
322 | * and write it. | 354 | * and write it. |
323 | */ | 355 | */ |
356 | result = 0; | ||
324 | spin_lock(&journal->j_list_lock); | 357 | spin_lock(&journal->j_list_lock); |
325 | if (!journal->j_checkpoint_transactions) | 358 | if (!journal->j_checkpoint_transactions) |
326 | goto out; | 359 | goto out; |
@@ -339,7 +372,7 @@ restart: | |||
339 | int batch_count = 0; | 372 | int batch_count = 0; |
340 | struct buffer_head *bhs[NR_BATCH]; | 373 | struct buffer_head *bhs[NR_BATCH]; |
341 | struct journal_head *jh; | 374 | struct journal_head *jh; |
342 | int retry = 0; | 375 | int retry = 0, err; |
343 | 376 | ||
344 | while (!retry && transaction->t_checkpoint_list) { | 377 | while (!retry && transaction->t_checkpoint_list) { |
345 | struct buffer_head *bh; | 378 | struct buffer_head *bh; |
@@ -353,6 +386,8 @@ restart: | |||
353 | } | 386 | } |
354 | retry = __process_buffer(journal, jh, bhs, &batch_count, | 387 | retry = __process_buffer(journal, jh, bhs, &batch_count, |
355 | transaction); | 388 | transaction); |
389 | if (retry < 0 && !result) | ||
390 | result = retry; | ||
356 | if (!retry && (need_resched() || | 391 | if (!retry && (need_resched() || |
357 | spin_needbreak(&journal->j_list_lock))) { | 392 | spin_needbreak(&journal->j_list_lock))) { |
358 | spin_unlock(&journal->j_list_lock); | 393 | spin_unlock(&journal->j_list_lock); |
@@ -377,14 +412,18 @@ restart: | |||
377 | * Now we have cleaned up the first transaction's checkpoint | 412 | * Now we have cleaned up the first transaction's checkpoint |
378 | * list. Let's clean up the second one | 413 | * list. Let's clean up the second one |
379 | */ | 414 | */ |
380 | __wait_cp_io(journal, transaction); | 415 | err = __wait_cp_io(journal, transaction); |
416 | if (!result) | ||
417 | result = err; | ||
381 | } | 418 | } |
382 | out: | 419 | out: |
383 | spin_unlock(&journal->j_list_lock); | 420 | spin_unlock(&journal->j_list_lock); |
384 | result = jbd2_cleanup_journal_tail(journal); | ||
385 | if (result < 0) | 421 | if (result < 0) |
386 | return result; | 422 | jbd2_journal_abort(journal, result); |
387 | return 0; | 423 | else |
424 | result = jbd2_cleanup_journal_tail(journal); | ||
425 | |||
426 | return (result < 0) ? result : 0; | ||
388 | } | 427 | } |
389 | 428 | ||
390 | /* | 429 | /* |
@@ -400,8 +439,9 @@ out: | |||
400 | * This is the only part of the journaling code which really needs to be | 439 | * This is the only part of the journaling code which really needs to be |
401 | * aware of transaction aborts. Checkpointing involves writing to the | 440 | * aware of transaction aborts. Checkpointing involves writing to the |
402 | * main filesystem area rather than to the journal, so it can proceed | 441 | * main filesystem area rather than to the journal, so it can proceed |
403 | * even in abort state, but we must not update the journal superblock if | 442 | * even in abort state, but we must not update the super block if |
404 | * we have an abort error outstanding. | 443 | * checkpointing may have failed. Otherwise, we would lose some metadata |
444 | * buffers which should be written-back to the filesystem. | ||
405 | */ | 445 | */ |
406 | 446 | ||
407 | int jbd2_cleanup_journal_tail(journal_t *journal) | 447 | int jbd2_cleanup_journal_tail(journal_t *journal) |
@@ -410,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
410 | tid_t first_tid; | 450 | tid_t first_tid; |
411 | unsigned long blocknr, freed; | 451 | unsigned long blocknr, freed; |
412 | 452 | ||
453 | if (is_journal_aborted(journal)) | ||
454 | return 1; | ||
455 | |||
413 | /* OK, work out the oldest transaction remaining in the log, and | 456 | /* OK, work out the oldest transaction remaining in the log, and |
414 | * the log block it starts at. | 457 | * the log block it starts at. |
415 | * | 458 | * |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f2ad061e95ec..0abe02c4242a 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/time.h> | 16 | #include <linux/time.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/jbd2.h> | 18 | #include <linux/jbd2.h> |
19 | #include <linux/marker.h> | ||
19 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
20 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
21 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
@@ -126,8 +127,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
126 | 127 | ||
127 | JBUFFER_TRACE(descriptor, "submit commit block"); | 128 | JBUFFER_TRACE(descriptor, "submit commit block"); |
128 | lock_buffer(bh); | 129 | lock_buffer(bh); |
129 | get_bh(bh); | 130 | clear_buffer_dirty(bh); |
130 | set_buffer_dirty(bh); | ||
131 | set_buffer_uptodate(bh); | 131 | set_buffer_uptodate(bh); |
132 | bh->b_end_io = journal_end_buffer_io_sync; | 132 | bh->b_end_io = journal_end_buffer_io_sync; |
133 | 133 | ||
@@ -147,12 +147,9 @@ static int journal_submit_commit_record(journal_t *journal, | |||
147 | * to remember if we sent a barrier request | 147 | * to remember if we sent a barrier request |
148 | */ | 148 | */ |
149 | if (ret == -EOPNOTSUPP && barrier_done) { | 149 | if (ret == -EOPNOTSUPP && barrier_done) { |
150 | char b[BDEVNAME_SIZE]; | ||
151 | |||
152 | printk(KERN_WARNING | 150 | printk(KERN_WARNING |
153 | "JBD: barrier-based sync failed on %s - " | 151 | "JBD: barrier-based sync failed on %s - " |
154 | "disabling barriers\n", | 152 | "disabling barriers\n", journal->j_devname); |
155 | bdevname(journal->j_dev, b)); | ||
156 | spin_lock(&journal->j_state_lock); | 153 | spin_lock(&journal->j_state_lock); |
157 | journal->j_flags &= ~JBD2_BARRIER; | 154 | journal->j_flags &= ~JBD2_BARRIER; |
158 | spin_unlock(&journal->j_state_lock); | 155 | spin_unlock(&journal->j_state_lock); |
@@ -160,7 +157,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
160 | /* And try again, without the barrier */ | 157 | /* And try again, without the barrier */ |
161 | lock_buffer(bh); | 158 | lock_buffer(bh); |
162 | set_buffer_uptodate(bh); | 159 | set_buffer_uptodate(bh); |
163 | set_buffer_dirty(bh); | 160 | clear_buffer_dirty(bh); |
164 | ret = submit_bh(WRITE, bh); | 161 | ret = submit_bh(WRITE, bh); |
165 | } | 162 | } |
166 | *cbh = bh; | 163 | *cbh = bh; |
@@ -371,6 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
371 | commit_transaction = journal->j_running_transaction; | 368 | commit_transaction = journal->j_running_transaction; |
372 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | 369 | J_ASSERT(commit_transaction->t_state == T_RUNNING); |
373 | 370 | ||
371 | trace_mark(jbd2_start_commit, "dev %s transaction %d", | ||
372 | journal->j_devname, commit_transaction->t_tid); | ||
374 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 373 | jbd_debug(1, "JBD: starting commit of transaction %d\n", |
375 | commit_transaction->t_tid); | 374 | commit_transaction->t_tid); |
376 | 375 | ||
@@ -505,9 +504,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
505 | jh = commit_transaction->t_buffers; | 504 | jh = commit_transaction->t_buffers; |
506 | 505 | ||
507 | /* If we're in abort mode, we just un-journal the buffer and | 506 | /* If we're in abort mode, we just un-journal the buffer and |
508 | release it for background writing. */ | 507 | release it. */ |
509 | 508 | ||
510 | if (is_journal_aborted(journal)) { | 509 | if (is_journal_aborted(journal)) { |
510 | clear_buffer_jbddirty(jh2bh(jh)); | ||
511 | JBUFFER_TRACE(jh, "journal is aborting: refile"); | 511 | JBUFFER_TRACE(jh, "journal is aborting: refile"); |
512 | jbd2_journal_refile_buffer(journal, jh); | 512 | jbd2_journal_refile_buffer(journal, jh); |
513 | /* If that was the last one, we need to clean up | 513 | /* If that was the last one, we need to clean up |
@@ -681,11 +681,11 @@ start_journal_io: | |||
681 | */ | 681 | */ |
682 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | 682 | err = journal_finish_inode_data_buffers(journal, commit_transaction); |
683 | if (err) { | 683 | if (err) { |
684 | char b[BDEVNAME_SIZE]; | ||
685 | |||
686 | printk(KERN_WARNING | 684 | printk(KERN_WARNING |
687 | "JBD2: Detected IO errors while flushing file data " | 685 | "JBD2: Detected IO errors while flushing file data " |
688 | "on %s\n", bdevname(journal->j_fs_dev, b)); | 686 | "on %s\n", journal->j_devname); |
687 | if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) | ||
688 | jbd2_journal_abort(journal, err); | ||
689 | err = 0; | 689 | err = 0; |
690 | } | 690 | } |
691 | 691 | ||
@@ -786,6 +786,9 @@ wait_for_iobuf: | |||
786 | /* AKPM: bforget here */ | 786 | /* AKPM: bforget here */ |
787 | } | 787 | } |
788 | 788 | ||
789 | if (err) | ||
790 | jbd2_journal_abort(journal, err); | ||
791 | |||
789 | jbd_debug(3, "JBD: commit phase 5\n"); | 792 | jbd_debug(3, "JBD: commit phase 5\n"); |
790 | 793 | ||
791 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 794 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
@@ -884,6 +887,8 @@ restart_loop: | |||
884 | if (buffer_jbddirty(bh)) { | 887 | if (buffer_jbddirty(bh)) { |
885 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); | 888 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); |
886 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); | 889 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); |
890 | if (is_journal_aborted(journal)) | ||
891 | clear_buffer_jbddirty(bh); | ||
887 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | 892 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); |
888 | __jbd2_journal_refile_buffer(jh); | 893 | __jbd2_journal_refile_buffer(jh); |
889 | jbd_unlock_bh_state(bh); | 894 | jbd_unlock_bh_state(bh); |
@@ -990,6 +995,9 @@ restart_loop: | |||
990 | } | 995 | } |
991 | spin_unlock(&journal->j_list_lock); | 996 | spin_unlock(&journal->j_list_lock); |
992 | 997 | ||
998 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", | ||
999 | journal->j_devname, commit_transaction->t_tid, | ||
1000 | journal->j_tail_sequence); | ||
993 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 1001 | jbd_debug(1, "JBD: commit %d complete, head %d\n", |
994 | journal->j_commit_sequence, journal->j_tail_sequence); | 1002 | journal->j_commit_sequence, journal->j_tail_sequence); |
995 | 1003 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8207a01c4edb..783de118de92 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -597,13 +597,9 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, | |||
597 | if (ret) | 597 | if (ret) |
598 | *retp = ret; | 598 | *retp = ret; |
599 | else { | 599 | else { |
600 | char b[BDEVNAME_SIZE]; | ||
601 | |||
602 | printk(KERN_ALERT "%s: journal block not found " | 600 | printk(KERN_ALERT "%s: journal block not found " |
603 | "at offset %lu on %s\n", | 601 | "at offset %lu on %s\n", |
604 | __func__, | 602 | __func__, blocknr, journal->j_devname); |
605 | blocknr, | ||
606 | bdevname(journal->j_dev, b)); | ||
607 | err = -EIO; | 603 | err = -EIO; |
608 | __journal_abort_soft(journal, err); | 604 | __journal_abort_soft(journal, err); |
609 | } | 605 | } |
@@ -901,10 +897,7 @@ static struct proc_dir_entry *proc_jbd2_stats; | |||
901 | 897 | ||
902 | static void jbd2_stats_proc_init(journal_t *journal) | 898 | static void jbd2_stats_proc_init(journal_t *journal) |
903 | { | 899 | { |
904 | char name[BDEVNAME_SIZE]; | 900 | journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); |
905 | |||
906 | bdevname(journal->j_dev, name); | ||
907 | journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); | ||
908 | if (journal->j_proc_entry) { | 901 | if (journal->j_proc_entry) { |
909 | proc_create_data("history", S_IRUGO, journal->j_proc_entry, | 902 | proc_create_data("history", S_IRUGO, journal->j_proc_entry, |
910 | &jbd2_seq_history_fops, journal); | 903 | &jbd2_seq_history_fops, journal); |
@@ -915,12 +908,9 @@ static void jbd2_stats_proc_init(journal_t *journal) | |||
915 | 908 | ||
916 | static void jbd2_stats_proc_exit(journal_t *journal) | 909 | static void jbd2_stats_proc_exit(journal_t *journal) |
917 | { | 910 | { |
918 | char name[BDEVNAME_SIZE]; | ||
919 | |||
920 | bdevname(journal->j_dev, name); | ||
921 | remove_proc_entry("info", journal->j_proc_entry); | 911 | remove_proc_entry("info", journal->j_proc_entry); |
922 | remove_proc_entry("history", journal->j_proc_entry); | 912 | remove_proc_entry("history", journal->j_proc_entry); |
923 | remove_proc_entry(name, proc_jbd2_stats); | 913 | remove_proc_entry(journal->j_devname, proc_jbd2_stats); |
924 | } | 914 | } |
925 | 915 | ||
926 | static void journal_init_stats(journal_t *journal) | 916 | static void journal_init_stats(journal_t *journal) |
@@ -1018,6 +1008,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1018 | { | 1008 | { |
1019 | journal_t *journal = journal_init_common(); | 1009 | journal_t *journal = journal_init_common(); |
1020 | struct buffer_head *bh; | 1010 | struct buffer_head *bh; |
1011 | char *p; | ||
1021 | int n; | 1012 | int n; |
1022 | 1013 | ||
1023 | if (!journal) | 1014 | if (!journal) |
@@ -1039,6 +1030,10 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1039 | journal->j_fs_dev = fs_dev; | 1030 | journal->j_fs_dev = fs_dev; |
1040 | journal->j_blk_offset = start; | 1031 | journal->j_blk_offset = start; |
1041 | journal->j_maxlen = len; | 1032 | journal->j_maxlen = len; |
1033 | bdevname(journal->j_dev, journal->j_devname); | ||
1034 | p = journal->j_devname; | ||
1035 | while ((p = strchr(p, '/'))) | ||
1036 | *p = '!'; | ||
1042 | jbd2_stats_proc_init(journal); | 1037 | jbd2_stats_proc_init(journal); |
1043 | 1038 | ||
1044 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); | 1039 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); |
@@ -1061,6 +1056,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1061 | { | 1056 | { |
1062 | struct buffer_head *bh; | 1057 | struct buffer_head *bh; |
1063 | journal_t *journal = journal_init_common(); | 1058 | journal_t *journal = journal_init_common(); |
1059 | char *p; | ||
1064 | int err; | 1060 | int err; |
1065 | int n; | 1061 | int n; |
1066 | unsigned long long blocknr; | 1062 | unsigned long long blocknr; |
@@ -1070,6 +1066,12 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1070 | 1066 | ||
1071 | journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; | 1067 | journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; |
1072 | journal->j_inode = inode; | 1068 | journal->j_inode = inode; |
1069 | bdevname(journal->j_dev, journal->j_devname); | ||
1070 | p = journal->j_devname; | ||
1071 | while ((p = strchr(p, '/'))) | ||
1072 | *p = '!'; | ||
1073 | p = journal->j_devname + strlen(journal->j_devname); | ||
1074 | sprintf(p, ":%lu", journal->j_inode->i_ino); | ||
1073 | jbd_debug(1, | 1075 | jbd_debug(1, |
1074 | "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", | 1076 | "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", |
1075 | journal, inode->i_sb->s_id, inode->i_ino, | 1077 | journal, inode->i_sb->s_id, inode->i_ino, |
@@ -1253,6 +1255,22 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1253 | goto out; | 1255 | goto out; |
1254 | } | 1256 | } |
1255 | 1257 | ||
1258 | if (buffer_write_io_error(bh)) { | ||
1259 | /* | ||
1260 | * Oh, dear. A previous attempt to write the journal | ||
1261 | * superblock failed. This could happen because the | ||
1262 | * USB device was yanked out. Or it could happen to | ||
1263 | * be a transient write error and maybe the block will | ||
1264 | * be remapped. Nothing we can do but to retry the | ||
1265 | * write and hope for the best. | ||
1266 | */ | ||
1267 | printk(KERN_ERR "JBD2: previous I/O error detected " | ||
1268 | "for journal superblock update for %s.\n", | ||
1269 | journal->j_devname); | ||
1270 | clear_buffer_write_io_error(bh); | ||
1271 | set_buffer_uptodate(bh); | ||
1272 | } | ||
1273 | |||
1256 | spin_lock(&journal->j_state_lock); | 1274 | spin_lock(&journal->j_state_lock); |
1257 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", | 1275 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", |
1258 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1276 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); |
@@ -1264,9 +1282,16 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1264 | 1282 | ||
1265 | BUFFER_TRACE(bh, "marking dirty"); | 1283 | BUFFER_TRACE(bh, "marking dirty"); |
1266 | mark_buffer_dirty(bh); | 1284 | mark_buffer_dirty(bh); |
1267 | if (wait) | 1285 | if (wait) { |
1268 | sync_dirty_buffer(bh); | 1286 | sync_dirty_buffer(bh); |
1269 | else | 1287 | if (buffer_write_io_error(bh)) { |
1288 | printk(KERN_ERR "JBD2: I/O error detected " | ||
1289 | "when updating journal superblock for %s.\n", | ||
1290 | journal->j_devname); | ||
1291 | clear_buffer_write_io_error(bh); | ||
1292 | set_buffer_uptodate(bh); | ||
1293 | } | ||
1294 | } else | ||
1270 | ll_rw_block(SWRITE, 1, &bh); | 1295 | ll_rw_block(SWRITE, 1, &bh); |
1271 | 1296 | ||
1272 | out: | 1297 | out: |
@@ -1426,9 +1451,12 @@ recovery_error: | |||
1426 | * | 1451 | * |
1427 | * Release a journal_t structure once it is no longer in use by the | 1452 | * Release a journal_t structure once it is no longer in use by the |
1428 | * journaled object. | 1453 | * journaled object. |
1454 | * Return <0 if we couldn't clean up the journal. | ||
1429 | */ | 1455 | */ |
1430 | void jbd2_journal_destroy(journal_t *journal) | 1456 | int jbd2_journal_destroy(journal_t *journal) |
1431 | { | 1457 | { |
1458 | int err = 0; | ||
1459 | |||
1432 | /* Wait for the commit thread to wake up and die. */ | 1460 | /* Wait for the commit thread to wake up and die. */ |
1433 | journal_kill_thread(journal); | 1461 | journal_kill_thread(journal); |
1434 | 1462 | ||
@@ -1451,11 +1479,16 @@ void jbd2_journal_destroy(journal_t *journal) | |||
1451 | J_ASSERT(journal->j_checkpoint_transactions == NULL); | 1479 | J_ASSERT(journal->j_checkpoint_transactions == NULL); |
1452 | spin_unlock(&journal->j_list_lock); | 1480 | spin_unlock(&journal->j_list_lock); |
1453 | 1481 | ||
1454 | /* We can now mark the journal as empty. */ | ||
1455 | journal->j_tail = 0; | ||
1456 | journal->j_tail_sequence = ++journal->j_transaction_sequence; | ||
1457 | if (journal->j_sb_buffer) { | 1482 | if (journal->j_sb_buffer) { |
1458 | jbd2_journal_update_superblock(journal, 1); | 1483 | if (!is_journal_aborted(journal)) { |
1484 | /* We can now mark the journal as empty. */ | ||
1485 | journal->j_tail = 0; | ||
1486 | journal->j_tail_sequence = | ||
1487 | ++journal->j_transaction_sequence; | ||
1488 | jbd2_journal_update_superblock(journal, 1); | ||
1489 | } else { | ||
1490 | err = -EIO; | ||
1491 | } | ||
1459 | brelse(journal->j_sb_buffer); | 1492 | brelse(journal->j_sb_buffer); |
1460 | } | 1493 | } |
1461 | 1494 | ||
@@ -1467,6 +1500,8 @@ void jbd2_journal_destroy(journal_t *journal) | |||
1467 | jbd2_journal_destroy_revoke(journal); | 1500 | jbd2_journal_destroy_revoke(journal); |
1468 | kfree(journal->j_wbuf); | 1501 | kfree(journal->j_wbuf); |
1469 | kfree(journal); | 1502 | kfree(journal); |
1503 | |||
1504 | return err; | ||
1470 | } | 1505 | } |
1471 | 1506 | ||
1472 | 1507 | ||
@@ -1692,10 +1727,16 @@ int jbd2_journal_flush(journal_t *journal) | |||
1692 | spin_lock(&journal->j_list_lock); | 1727 | spin_lock(&journal->j_list_lock); |
1693 | while (!err && journal->j_checkpoint_transactions != NULL) { | 1728 | while (!err && journal->j_checkpoint_transactions != NULL) { |
1694 | spin_unlock(&journal->j_list_lock); | 1729 | spin_unlock(&journal->j_list_lock); |
1730 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1695 | err = jbd2_log_do_checkpoint(journal); | 1731 | err = jbd2_log_do_checkpoint(journal); |
1732 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1696 | spin_lock(&journal->j_list_lock); | 1733 | spin_lock(&journal->j_list_lock); |
1697 | } | 1734 | } |
1698 | spin_unlock(&journal->j_list_lock); | 1735 | spin_unlock(&journal->j_list_lock); |
1736 | |||
1737 | if (is_journal_aborted(journal)) | ||
1738 | return -EIO; | ||
1739 | |||
1699 | jbd2_cleanup_journal_tail(journal); | 1740 | jbd2_cleanup_journal_tail(journal); |
1700 | 1741 | ||
1701 | /* Finally, mark the journal as really needing no recovery. | 1742 | /* Finally, mark the journal as really needing no recovery. |
@@ -1717,7 +1758,7 @@ int jbd2_journal_flush(journal_t *journal) | |||
1717 | J_ASSERT(journal->j_head == journal->j_tail); | 1758 | J_ASSERT(journal->j_head == journal->j_tail); |
1718 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); | 1759 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); |
1719 | spin_unlock(&journal->j_state_lock); | 1760 | spin_unlock(&journal->j_state_lock); |
1720 | return err; | 1761 | return 0; |
1721 | } | 1762 | } |
1722 | 1763 | ||
1723 | /** | 1764 | /** |
@@ -1761,23 +1802,6 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
1761 | } | 1802 | } |
1762 | 1803 | ||
1763 | /* | 1804 | /* |
1764 | * journal_dev_name: format a character string to describe on what | ||
1765 | * device this journal is present. | ||
1766 | */ | ||
1767 | |||
1768 | static const char *journal_dev_name(journal_t *journal, char *buffer) | ||
1769 | { | ||
1770 | struct block_device *bdev; | ||
1771 | |||
1772 | if (journal->j_inode) | ||
1773 | bdev = journal->j_inode->i_sb->s_bdev; | ||
1774 | else | ||
1775 | bdev = journal->j_dev; | ||
1776 | |||
1777 | return bdevname(bdev, buffer); | ||
1778 | } | ||
1779 | |||
1780 | /* | ||
1781 | * Journal abort has very specific semantics, which we describe | 1805 | * Journal abort has very specific semantics, which we describe |
1782 | * for journal abort. | 1806 | * for journal abort. |
1783 | * | 1807 | * |
@@ -1793,13 +1817,12 @@ static const char *journal_dev_name(journal_t *journal, char *buffer) | |||
1793 | void __jbd2_journal_abort_hard(journal_t *journal) | 1817 | void __jbd2_journal_abort_hard(journal_t *journal) |
1794 | { | 1818 | { |
1795 | transaction_t *transaction; | 1819 | transaction_t *transaction; |
1796 | char b[BDEVNAME_SIZE]; | ||
1797 | 1820 | ||
1798 | if (journal->j_flags & JBD2_ABORT) | 1821 | if (journal->j_flags & JBD2_ABORT) |
1799 | return; | 1822 | return; |
1800 | 1823 | ||
1801 | printk(KERN_ERR "Aborting journal on device %s.\n", | 1824 | printk(KERN_ERR "Aborting journal on device %s.\n", |
1802 | journal_dev_name(journal, b)); | 1825 | journal->j_devname); |
1803 | 1826 | ||
1804 | spin_lock(&journal->j_state_lock); | 1827 | spin_lock(&journal->j_state_lock); |
1805 | journal->j_flags |= JBD2_ABORT; | 1828 | journal->j_flags |= JBD2_ABORT; |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 058f50f65b76..73063285b13f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -225,7 +225,7 @@ do { \ | |||
225 | */ | 225 | */ |
226 | int jbd2_journal_recover(journal_t *journal) | 226 | int jbd2_journal_recover(journal_t *journal) |
227 | { | 227 | { |
228 | int err; | 228 | int err, err2; |
229 | journal_superblock_t * sb; | 229 | journal_superblock_t * sb; |
230 | 230 | ||
231 | struct recovery_info info; | 231 | struct recovery_info info; |
@@ -263,7 +263,10 @@ int jbd2_journal_recover(journal_t *journal) | |||
263 | journal->j_transaction_sequence = ++info.end_transaction; | 263 | journal->j_transaction_sequence = ++info.end_transaction; |
264 | 264 | ||
265 | jbd2_journal_clear_revoke(journal); | 265 | jbd2_journal_clear_revoke(journal); |
266 | sync_blockdev(journal->j_fs_dev); | 266 | err2 = sync_blockdev(journal->j_fs_dev); |
267 | if (!err) | ||
268 | err = err2; | ||
269 | |||
267 | return err; | 270 | return err; |
268 | } | 271 | } |
269 | 272 | ||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9abcd2b329f7..e9b20173fef3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw, | |||
1279 | } | 1279 | } |
1280 | } | 1280 | } |
1281 | 1281 | ||
1282 | if (errors > 0) { | ||
1283 | dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", | ||
1284 | errors, (errors == 1 ? "" : "s")); | ||
1285 | if (!sloppy) | ||
1286 | return 0; | ||
1287 | } | ||
1282 | return 1; | 1288 | return 1; |
1283 | 1289 | ||
1284 | out_nomem: | 1290 | out_nomem: |
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h index 3a8af75351e8..4087fbdac327 100644 --- a/fs/ntfs/usnjrnl.h +++ b/fs/ntfs/usnjrnl.h | |||
@@ -113,7 +113,7 @@ typedef struct { | |||
113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the | 113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the |
114 | * file since it was last opened. I think the names speak for themselves but | 114 | * file since it was last opened. I think the names speak for themselves but |
115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS | 115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS |
116 | * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 116 | * documentation: http://www.linux-ntfs.org/ |
117 | */ | 117 | */ |
118 | enum { | 118 | enum { |
119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), | 119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), |
@@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS; | |||
145 | * Source info flags (32-bit). Information about the source of the change(s) | 145 | * Source info flags (32-bit). Information about the source of the change(s) |
146 | * to the file. For detailed descriptions of what these mean, see the Linux | 146 | * to the file. For detailed descriptions of what these mean, see the Linux |
147 | * NTFS project NTFS documentation: | 147 | * NTFS project NTFS documentation: |
148 | * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 148 | * http://www.linux-ntfs.org/ |
149 | */ | 149 | */ |
150 | enum { | 150 | enum { |
151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), | 151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 10bfb466e068..29ff57ec5d1f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -990,15 +990,6 @@ out: | |||
990 | } | 990 | } |
991 | 991 | ||
992 | /* | 992 | /* |
993 | * This is only valid for leaf nodes, which are the only ones that can | ||
994 | * have empty extents anyway. | ||
995 | */ | ||
996 | static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) | ||
997 | { | ||
998 | return !rec->e_leaf_clusters; | ||
999 | } | ||
1000 | |||
1001 | /* | ||
1002 | * This function will discard the rightmost extent record. | 993 | * This function will discard the rightmost extent record. |
1003 | */ | 994 | */ |
1004 | static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) | 995 | static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 42ff94bd8011..60cd3d59230c 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -146,4 +146,13 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el, | |||
146 | return le16_to_cpu(rec->e_leaf_clusters); | 146 | return le16_to_cpu(rec->e_leaf_clusters); |
147 | } | 147 | } |
148 | 148 | ||
149 | /* | ||
150 | * This is only valid for leaf nodes, which are the only ones that can | ||
151 | * have empty extents anyway. | ||
152 | */ | ||
153 | static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) | ||
154 | { | ||
155 | return !rec->e_leaf_clusters; | ||
156 | } | ||
157 | |||
149 | #endif /* OCFS2_ALLOC_H */ | 158 | #endif /* OCFS2_ALLOC_H */ |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 506c24fb5078..a53da1466277 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -594,7 +594,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
594 | goto bail; | 594 | goto bail; |
595 | } | 595 | } |
596 | 596 | ||
597 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) { | 597 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) { |
598 | ocfs2_error(inode->i_sb, | 598 | ocfs2_error(inode->i_sb, |
599 | "Inode %llu has a hole at block %llu\n", | 599 | "Inode %llu has a hole at block %llu\n", |
600 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 600 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index c58668a326fe..aed268e80b49 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/fiemap.h> | ||
28 | 29 | ||
29 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP | 30 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP |
30 | #include <cluster/masklog.h> | 31 | #include <cluster/masklog.h> |
@@ -32,6 +33,7 @@ | |||
32 | #include "ocfs2.h" | 33 | #include "ocfs2.h" |
33 | 34 | ||
34 | #include "alloc.h" | 35 | #include "alloc.h" |
36 | #include "dlmglue.h" | ||
35 | #include "extent_map.h" | 37 | #include "extent_map.h" |
36 | #include "inode.h" | 38 | #include "inode.h" |
37 | #include "super.h" | 39 | #include "super.h" |
@@ -282,6 +284,51 @@ out: | |||
282 | kfree(new_emi); | 284 | kfree(new_emi); |
283 | } | 285 | } |
284 | 286 | ||
287 | static int ocfs2_last_eb_is_empty(struct inode *inode, | ||
288 | struct ocfs2_dinode *di) | ||
289 | { | ||
290 | int ret, next_free; | ||
291 | u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); | ||
292 | struct buffer_head *eb_bh = NULL; | ||
293 | struct ocfs2_extent_block *eb; | ||
294 | struct ocfs2_extent_list *el; | ||
295 | |||
296 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk, | ||
297 | &eb_bh, OCFS2_BH_CACHED, inode); | ||
298 | if (ret) { | ||
299 | mlog_errno(ret); | ||
300 | goto out; | ||
301 | } | ||
302 | |||
303 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
304 | el = &eb->h_list; | ||
305 | |||
306 | if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { | ||
307 | ret = -EROFS; | ||
308 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); | ||
309 | goto out; | ||
310 | } | ||
311 | |||
312 | if (el->l_tree_depth) { | ||
313 | ocfs2_error(inode->i_sb, | ||
314 | "Inode %lu has non zero tree depth in " | ||
315 | "leaf block %llu\n", inode->i_ino, | ||
316 | (unsigned long long)eb_bh->b_blocknr); | ||
317 | ret = -EROFS; | ||
318 | goto out; | ||
319 | } | ||
320 | |||
321 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
322 | |||
323 | if (next_free == 0 || | ||
324 | (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) | ||
325 | ret = 1; | ||
326 | |||
327 | out: | ||
328 | brelse(eb_bh); | ||
329 | return ret; | ||
330 | } | ||
331 | |||
285 | /* | 332 | /* |
286 | * Return the 1st index within el which contains an extent start | 333 | * Return the 1st index within el which contains an extent start |
287 | * larger than v_cluster. | 334 | * larger than v_cluster. |
@@ -373,42 +420,28 @@ out: | |||
373 | return ret; | 420 | return ret; |
374 | } | 421 | } |
375 | 422 | ||
376 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | 423 | static int ocfs2_get_clusters_nocache(struct inode *inode, |
377 | u32 *p_cluster, u32 *num_clusters, | 424 | struct buffer_head *di_bh, |
378 | unsigned int *extent_flags) | 425 | u32 v_cluster, unsigned int *hole_len, |
426 | struct ocfs2_extent_rec *ret_rec, | ||
427 | unsigned int *is_last) | ||
379 | { | 428 | { |
380 | int ret, i; | 429 | int i, ret, tree_height, len; |
381 | unsigned int flags = 0; | ||
382 | struct buffer_head *di_bh = NULL; | ||
383 | struct buffer_head *eb_bh = NULL; | ||
384 | struct ocfs2_dinode *di; | 430 | struct ocfs2_dinode *di; |
385 | struct ocfs2_extent_block *eb; | 431 | struct ocfs2_extent_block *uninitialized_var(eb); |
386 | struct ocfs2_extent_list *el; | 432 | struct ocfs2_extent_list *el; |
387 | struct ocfs2_extent_rec *rec; | 433 | struct ocfs2_extent_rec *rec; |
388 | u32 coff; | 434 | struct buffer_head *eb_bh = NULL; |
389 | |||
390 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
391 | ret = -ERANGE; | ||
392 | mlog_errno(ret); | ||
393 | goto out; | ||
394 | } | ||
395 | |||
396 | ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, | ||
397 | num_clusters, extent_flags); | ||
398 | if (ret == 0) | ||
399 | goto out; | ||
400 | 435 | ||
401 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, | 436 | memset(ret_rec, 0, sizeof(*ret_rec)); |
402 | &di_bh, OCFS2_BH_CACHED, inode); | 437 | if (is_last) |
403 | if (ret) { | 438 | *is_last = 0; |
404 | mlog_errno(ret); | ||
405 | goto out; | ||
406 | } | ||
407 | 439 | ||
408 | di = (struct ocfs2_dinode *) di_bh->b_data; | 440 | di = (struct ocfs2_dinode *) di_bh->b_data; |
409 | el = &di->id2.i_list; | 441 | el = &di->id2.i_list; |
442 | tree_height = le16_to_cpu(el->l_tree_depth); | ||
410 | 443 | ||
411 | if (el->l_tree_depth) { | 444 | if (tree_height > 0) { |
412 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | 445 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); |
413 | if (ret) { | 446 | if (ret) { |
414 | mlog_errno(ret); | 447 | mlog_errno(ret); |
@@ -431,46 +464,143 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | |||
431 | i = ocfs2_search_extent_list(el, v_cluster); | 464 | i = ocfs2_search_extent_list(el, v_cluster); |
432 | if (i == -1) { | 465 | if (i == -1) { |
433 | /* | 466 | /* |
434 | * A hole was found. Return some canned values that | 467 | * Holes can be larger than the maximum size of an |
435 | * callers can key on. If asked for, num_clusters will | 468 | * extent, so we return their lengths in a seperate |
436 | * be populated with the size of the hole. | 469 | * field. |
437 | */ | 470 | */ |
438 | *p_cluster = 0; | 471 | if (hole_len) { |
439 | if (num_clusters) { | ||
440 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, | 472 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, |
441 | v_cluster, | 473 | v_cluster, &len); |
442 | num_clusters); | ||
443 | if (ret) { | 474 | if (ret) { |
444 | mlog_errno(ret); | 475 | mlog_errno(ret); |
445 | goto out; | 476 | goto out; |
446 | } | 477 | } |
478 | |||
479 | *hole_len = len; | ||
447 | } | 480 | } |
448 | } else { | 481 | goto out_hole; |
449 | rec = &el->l_recs[i]; | 482 | } |
450 | 483 | ||
451 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); | 484 | rec = &el->l_recs[i]; |
452 | 485 | ||
453 | if (!rec->e_blkno) { | 486 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); |
454 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | 487 | |
455 | "record (%u, %u, 0)", inode->i_ino, | 488 | if (!rec->e_blkno) { |
456 | le32_to_cpu(rec->e_cpos), | 489 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " |
457 | ocfs2_rec_clusters(el, rec)); | 490 | "record (%u, %u, 0)", inode->i_ino, |
458 | ret = -EROFS; | 491 | le32_to_cpu(rec->e_cpos), |
459 | goto out; | 492 | ocfs2_rec_clusters(el, rec)); |
493 | ret = -EROFS; | ||
494 | goto out; | ||
495 | } | ||
496 | |||
497 | *ret_rec = *rec; | ||
498 | |||
499 | /* | ||
500 | * Checking for last extent is potentially expensive - we | ||
501 | * might have to look at the next leaf over to see if it's | ||
502 | * empty. | ||
503 | * | ||
504 | * The first two checks are to see whether the caller even | ||
505 | * cares for this information, and if the extent is at least | ||
506 | * the last in it's list. | ||
507 | * | ||
508 | * If those hold true, then the extent is last if any of the | ||
509 | * additional conditions hold true: | ||
510 | * - Extent list is in-inode | ||
511 | * - Extent list is right-most | ||
512 | * - Extent list is 2nd to rightmost, with empty right-most | ||
513 | */ | ||
514 | if (is_last) { | ||
515 | if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { | ||
516 | if (tree_height == 0) | ||
517 | *is_last = 1; | ||
518 | else if (eb->h_blkno == di->i_last_eb_blk) | ||
519 | *is_last = 1; | ||
520 | else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { | ||
521 | ret = ocfs2_last_eb_is_empty(inode, di); | ||
522 | if (ret < 0) { | ||
523 | mlog_errno(ret); | ||
524 | goto out; | ||
525 | } | ||
526 | if (ret == 1) | ||
527 | *is_last = 1; | ||
528 | } | ||
460 | } | 529 | } |
530 | } | ||
531 | |||
532 | out_hole: | ||
533 | ret = 0; | ||
534 | out: | ||
535 | brelse(eb_bh); | ||
536 | return ret; | ||
537 | } | ||
538 | |||
539 | static void ocfs2_relative_extent_offsets(struct super_block *sb, | ||
540 | u32 v_cluster, | ||
541 | struct ocfs2_extent_rec *rec, | ||
542 | u32 *p_cluster, u32 *num_clusters) | ||
543 | |||
544 | { | ||
545 | u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); | ||
546 | |||
547 | *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); | ||
548 | *p_cluster = *p_cluster + coff; | ||
549 | |||
550 | if (num_clusters) | ||
551 | *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; | ||
552 | } | ||
553 | |||
554 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | ||
555 | u32 *p_cluster, u32 *num_clusters, | ||
556 | unsigned int *extent_flags) | ||
557 | { | ||
558 | int ret; | ||
559 | unsigned int uninitialized_var(hole_len), flags = 0; | ||
560 | struct buffer_head *di_bh = NULL; | ||
561 | struct ocfs2_extent_rec rec; | ||
461 | 562 | ||
462 | coff = v_cluster - le32_to_cpu(rec->e_cpos); | 563 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
564 | ret = -ERANGE; | ||
565 | mlog_errno(ret); | ||
566 | goto out; | ||
567 | } | ||
463 | 568 | ||
464 | *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, | 569 | ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, |
465 | le64_to_cpu(rec->e_blkno)); | 570 | num_clusters, extent_flags); |
466 | *p_cluster = *p_cluster + coff; | 571 | if (ret == 0) |
572 | goto out; | ||
467 | 573 | ||
468 | if (num_clusters) | 574 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, |
469 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; | 575 | &di_bh, OCFS2_BH_CACHED, inode); |
576 | if (ret) { | ||
577 | mlog_errno(ret); | ||
578 | goto out; | ||
579 | } | ||
470 | 580 | ||
471 | flags = rec->e_flags; | 581 | ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, |
582 | &rec, NULL); | ||
583 | if (ret) { | ||
584 | mlog_errno(ret); | ||
585 | goto out; | ||
586 | } | ||
472 | 587 | ||
473 | ocfs2_extent_map_insert_rec(inode, rec); | 588 | if (rec.e_blkno == 0ULL) { |
589 | /* | ||
590 | * A hole was found. Return some canned values that | ||
591 | * callers can key on. If asked for, num_clusters will | ||
592 | * be populated with the size of the hole. | ||
593 | */ | ||
594 | *p_cluster = 0; | ||
595 | if (num_clusters) { | ||
596 | *num_clusters = hole_len; | ||
597 | } | ||
598 | } else { | ||
599 | ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, | ||
600 | p_cluster, num_clusters); | ||
601 | flags = rec.e_flags; | ||
602 | |||
603 | ocfs2_extent_map_insert_rec(inode, &rec); | ||
474 | } | 604 | } |
475 | 605 | ||
476 | if (extent_flags) | 606 | if (extent_flags) |
@@ -478,7 +608,6 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | |||
478 | 608 | ||
479 | out: | 609 | out: |
480 | brelse(di_bh); | 610 | brelse(di_bh); |
481 | brelse(eb_bh); | ||
482 | return ret; | 611 | return ret; |
483 | } | 612 | } |
484 | 613 | ||
@@ -521,3 +650,114 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | |||
521 | out: | 650 | out: |
522 | return ret; | 651 | return ret; |
523 | } | 652 | } |
653 | |||
654 | static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, | ||
655 | struct fiemap_extent_info *fieinfo, | ||
656 | u64 map_start) | ||
657 | { | ||
658 | int ret; | ||
659 | unsigned int id_count; | ||
660 | struct ocfs2_dinode *di; | ||
661 | u64 phys; | ||
662 | u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; | ||
663 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
664 | |||
665 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
666 | id_count = le16_to_cpu(di->id2.i_data.id_count); | ||
667 | |||
668 | if (map_start < id_count) { | ||
669 | phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; | ||
670 | phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); | ||
671 | |||
672 | ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, | ||
673 | flags); | ||
674 | if (ret < 0) | ||
675 | return ret; | ||
676 | } | ||
677 | |||
678 | return 0; | ||
679 | } | ||
680 | |||
681 | #define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) | ||
682 | |||
683 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
684 | u64 map_start, u64 map_len) | ||
685 | { | ||
686 | int ret, is_last; | ||
687 | u32 mapping_end, cpos; | ||
688 | unsigned int hole_size; | ||
689 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
690 | u64 len_bytes, phys_bytes, virt_bytes; | ||
691 | struct buffer_head *di_bh = NULL; | ||
692 | struct ocfs2_extent_rec rec; | ||
693 | |||
694 | ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS); | ||
695 | if (ret) | ||
696 | return ret; | ||
697 | |||
698 | ret = ocfs2_inode_lock(inode, &di_bh, 0); | ||
699 | if (ret) { | ||
700 | mlog_errno(ret); | ||
701 | goto out; | ||
702 | } | ||
703 | |||
704 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
705 | |||
706 | /* | ||
707 | * Handle inline-data separately. | ||
708 | */ | ||
709 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
710 | ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); | ||
711 | goto out_unlock; | ||
712 | } | ||
713 | |||
714 | cpos = map_start >> osb->s_clustersize_bits; | ||
715 | mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, | ||
716 | map_start + map_len); | ||
717 | mapping_end -= cpos; | ||
718 | is_last = 0; | ||
719 | while (cpos < mapping_end && !is_last) { | ||
720 | u32 fe_flags; | ||
721 | |||
722 | ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, | ||
723 | &hole_size, &rec, &is_last); | ||
724 | if (ret) { | ||
725 | mlog_errno(ret); | ||
726 | goto out; | ||
727 | } | ||
728 | |||
729 | if (rec.e_blkno == 0ULL) { | ||
730 | cpos += hole_size; | ||
731 | continue; | ||
732 | } | ||
733 | |||
734 | fe_flags = 0; | ||
735 | if (rec.e_flags & OCFS2_EXT_UNWRITTEN) | ||
736 | fe_flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
737 | if (is_last) | ||
738 | fe_flags |= FIEMAP_EXTENT_LAST; | ||
739 | len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; | ||
740 | phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; | ||
741 | virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; | ||
742 | |||
743 | ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, | ||
744 | len_bytes, fe_flags); | ||
745 | if (ret) | ||
746 | break; | ||
747 | |||
748 | cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); | ||
749 | } | ||
750 | |||
751 | if (ret > 0) | ||
752 | ret = 0; | ||
753 | |||
754 | out_unlock: | ||
755 | brelse(di_bh); | ||
756 | |||
757 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
758 | |||
759 | ocfs2_inode_unlock(inode, 0); | ||
760 | out: | ||
761 | |||
762 | return ret; | ||
763 | } | ||
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index de91e3e41a22..1b97490e1ea8 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h | |||
@@ -50,4 +50,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, | |||
50 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | 50 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, |
51 | u64 *ret_count, unsigned int *extent_flags); | 51 | u64 *ret_count, unsigned int *extent_flags); |
52 | 52 | ||
53 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
54 | u64 map_start, u64 map_len); | ||
55 | |||
53 | #endif /* _EXTENT_MAP_H */ | 56 | #endif /* _EXTENT_MAP_H */ |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ec2ed15c3daa..ed38796052d2 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2228,6 +2228,7 @@ const struct inode_operations ocfs2_file_iops = { | |||
2228 | .getattr = ocfs2_getattr, | 2228 | .getattr = ocfs2_getattr, |
2229 | .permission = ocfs2_permission, | 2229 | .permission = ocfs2_permission, |
2230 | .fallocate = ocfs2_fallocate, | 2230 | .fallocate = ocfs2_fallocate, |
2231 | .fiemap = ocfs2_fiemap, | ||
2231 | }; | 2232 | }; |
2232 | 2233 | ||
2233 | const struct inode_operations ocfs2_special_file_iops = { | 2234 | const struct inode_operations ocfs2_special_file_iops = { |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 7d6b34e201db..7408227c49c9 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -120,22 +120,21 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) = | |||
120 | * a pointer to that same buffer (for convenience). | 120 | * a pointer to that same buffer (for convenience). |
121 | */ | 121 | */ |
122 | 122 | ||
123 | char *disk_name(struct gendisk *hd, int part, char *buf) | 123 | char *disk_name(struct gendisk *hd, int partno, char *buf) |
124 | { | 124 | { |
125 | if (!part) | 125 | if (!partno) |
126 | snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); | 126 | snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); |
127 | else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) | 127 | else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) |
128 | snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part); | 128 | snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); |
129 | else | 129 | else |
130 | snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part); | 130 | snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); |
131 | 131 | ||
132 | return buf; | 132 | return buf; |
133 | } | 133 | } |
134 | 134 | ||
135 | const char *bdevname(struct block_device *bdev, char *buf) | 135 | const char *bdevname(struct block_device *bdev, char *buf) |
136 | { | 136 | { |
137 | int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; | 137 | return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); |
138 | return disk_name(bdev->bd_disk, part, buf); | ||
139 | } | 138 | } |
140 | 139 | ||
141 | EXPORT_SYMBOL(bdevname); | 140 | EXPORT_SYMBOL(bdevname); |
@@ -169,7 +168,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
169 | if (isdigit(state->name[strlen(state->name)-1])) | 168 | if (isdigit(state->name[strlen(state->name)-1])) |
170 | sprintf(state->name, "p"); | 169 | sprintf(state->name, "p"); |
171 | 170 | ||
172 | state->limit = hd->minors; | 171 | state->limit = disk_max_parts(hd); |
173 | i = res = err = 0; | 172 | i = res = err = 0; |
174 | while (!res && check_part[i]) { | 173 | while (!res && check_part[i]) { |
175 | memset(&state->parts, 0, sizeof(state->parts)); | 174 | memset(&state->parts, 0, sizeof(state->parts)); |
@@ -204,21 +203,22 @@ static ssize_t part_start_show(struct device *dev, | |||
204 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); | 203 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); |
205 | } | 204 | } |
206 | 205 | ||
207 | static ssize_t part_size_show(struct device *dev, | 206 | ssize_t part_size_show(struct device *dev, |
208 | struct device_attribute *attr, char *buf) | 207 | struct device_attribute *attr, char *buf) |
209 | { | 208 | { |
210 | struct hd_struct *p = dev_to_part(dev); | 209 | struct hd_struct *p = dev_to_part(dev); |
211 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); | 210 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); |
212 | } | 211 | } |
213 | 212 | ||
214 | static ssize_t part_stat_show(struct device *dev, | 213 | ssize_t part_stat_show(struct device *dev, |
215 | struct device_attribute *attr, char *buf) | 214 | struct device_attribute *attr, char *buf) |
216 | { | 215 | { |
217 | struct hd_struct *p = dev_to_part(dev); | 216 | struct hd_struct *p = dev_to_part(dev); |
217 | int cpu; | ||
218 | 218 | ||
219 | preempt_disable(); | 219 | cpu = part_stat_lock(); |
220 | part_round_stats(p); | 220 | part_round_stats(cpu, p); |
221 | preempt_enable(); | 221 | part_stat_unlock(); |
222 | return sprintf(buf, | 222 | return sprintf(buf, |
223 | "%8lu %8lu %8llu %8u " | 223 | "%8lu %8lu %8llu %8u " |
224 | "%8lu %8lu %8llu %8u " | 224 | "%8lu %8lu %8llu %8u " |
@@ -238,17 +238,17 @@ static ssize_t part_stat_show(struct device *dev, | |||
238 | } | 238 | } |
239 | 239 | ||
240 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 240 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
241 | static ssize_t part_fail_show(struct device *dev, | 241 | ssize_t part_fail_show(struct device *dev, |
242 | struct device_attribute *attr, char *buf) | 242 | struct device_attribute *attr, char *buf) |
243 | { | 243 | { |
244 | struct hd_struct *p = dev_to_part(dev); | 244 | struct hd_struct *p = dev_to_part(dev); |
245 | 245 | ||
246 | return sprintf(buf, "%d\n", p->make_it_fail); | 246 | return sprintf(buf, "%d\n", p->make_it_fail); |
247 | } | 247 | } |
248 | 248 | ||
249 | static ssize_t part_fail_store(struct device *dev, | 249 | ssize_t part_fail_store(struct device *dev, |
250 | struct device_attribute *attr, | 250 | struct device_attribute *attr, |
251 | const char *buf, size_t count) | 251 | const char *buf, size_t count) |
252 | { | 252 | { |
253 | struct hd_struct *p = dev_to_part(dev); | 253 | struct hd_struct *p = dev_to_part(dev); |
254 | int i; | 254 | int i; |
@@ -300,40 +300,34 @@ struct device_type part_type = { | |||
300 | .release = part_release, | 300 | .release = part_release, |
301 | }; | 301 | }; |
302 | 302 | ||
303 | static inline void partition_sysfs_add_subdir(struct hd_struct *p) | 303 | static void delete_partition_rcu_cb(struct rcu_head *head) |
304 | { | ||
305 | struct kobject *k; | ||
306 | |||
307 | k = kobject_get(&p->dev.kobj); | ||
308 | p->holder_dir = kobject_create_and_add("holders", k); | ||
309 | kobject_put(k); | ||
310 | } | ||
311 | |||
312 | static inline void disk_sysfs_add_subdirs(struct gendisk *disk) | ||
313 | { | 304 | { |
314 | struct kobject *k; | 305 | struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); |
315 | 306 | ||
316 | k = kobject_get(&disk->dev.kobj); | 307 | part->start_sect = 0; |
317 | disk->holder_dir = kobject_create_and_add("holders", k); | 308 | part->nr_sects = 0; |
318 | disk->slave_dir = kobject_create_and_add("slaves", k); | 309 | part_stat_set_all(part, 0); |
319 | kobject_put(k); | 310 | put_device(part_to_dev(part)); |
320 | } | 311 | } |
321 | 312 | ||
322 | void delete_partition(struct gendisk *disk, int part) | 313 | void delete_partition(struct gendisk *disk, int partno) |
323 | { | 314 | { |
324 | struct hd_struct *p = disk->part[part-1]; | 315 | struct disk_part_tbl *ptbl = disk->part_tbl; |
316 | struct hd_struct *part; | ||
325 | 317 | ||
326 | if (!p) | 318 | if (partno >= ptbl->len) |
327 | return; | 319 | return; |
328 | if (!p->nr_sects) | 320 | |
321 | part = ptbl->part[partno]; | ||
322 | if (!part) | ||
329 | return; | 323 | return; |
330 | disk->part[part-1] = NULL; | 324 | |
331 | p->start_sect = 0; | 325 | blk_free_devt(part_devt(part)); |
332 | p->nr_sects = 0; | 326 | rcu_assign_pointer(ptbl->part[partno], NULL); |
333 | part_stat_set_all(p, 0); | 327 | kobject_put(part->holder_dir); |
334 | kobject_put(p->holder_dir); | 328 | device_del(part_to_dev(part)); |
335 | device_del(&p->dev); | 329 | |
336 | put_device(&p->dev); | 330 | call_rcu(&part->rcu_head, delete_partition_rcu_cb); |
337 | } | 331 | } |
338 | 332 | ||
339 | static ssize_t whole_disk_show(struct device *dev, | 333 | static ssize_t whole_disk_show(struct device *dev, |
@@ -344,102 +338,132 @@ static ssize_t whole_disk_show(struct device *dev, | |||
344 | static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, | 338 | static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, |
345 | whole_disk_show, NULL); | 339 | whole_disk_show, NULL); |
346 | 340 | ||
347 | int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) | 341 | int add_partition(struct gendisk *disk, int partno, |
342 | sector_t start, sector_t len, int flags) | ||
348 | { | 343 | { |
349 | struct hd_struct *p; | 344 | struct hd_struct *p; |
345 | dev_t devt = MKDEV(0, 0); | ||
346 | struct device *ddev = disk_to_dev(disk); | ||
347 | struct device *pdev; | ||
348 | struct disk_part_tbl *ptbl; | ||
349 | const char *dname; | ||
350 | int err; | 350 | int err; |
351 | 351 | ||
352 | err = disk_expand_part_tbl(disk, partno); | ||
353 | if (err) | ||
354 | return err; | ||
355 | ptbl = disk->part_tbl; | ||
356 | |||
357 | if (ptbl->part[partno]) | ||
358 | return -EBUSY; | ||
359 | |||
352 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 360 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
353 | if (!p) | 361 | if (!p) |
354 | return -ENOMEM; | 362 | return -ENOMEM; |
355 | 363 | ||
356 | if (!init_part_stats(p)) { | 364 | if (!init_part_stats(p)) { |
357 | err = -ENOMEM; | 365 | err = -ENOMEM; |
358 | goto out0; | 366 | goto out_free; |
359 | } | 367 | } |
368 | pdev = part_to_dev(p); | ||
369 | |||
360 | p->start_sect = start; | 370 | p->start_sect = start; |
361 | p->nr_sects = len; | 371 | p->nr_sects = len; |
362 | p->partno = part; | 372 | p->partno = partno; |
363 | p->policy = disk->policy; | 373 | p->policy = get_disk_ro(disk); |
364 | 374 | ||
365 | if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) | 375 | dname = dev_name(ddev); |
366 | snprintf(p->dev.bus_id, BUS_ID_SIZE, | 376 | if (isdigit(dname[strlen(dname) - 1])) |
367 | "%sp%d", disk->dev.bus_id, part); | 377 | snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno); |
368 | else | 378 | else |
369 | snprintf(p->dev.bus_id, BUS_ID_SIZE, | 379 | snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno); |
370 | "%s%d", disk->dev.bus_id, part); | 380 | |
381 | device_initialize(pdev); | ||
382 | pdev->class = &block_class; | ||
383 | pdev->type = &part_type; | ||
384 | pdev->parent = ddev; | ||
371 | 385 | ||
372 | device_initialize(&p->dev); | 386 | err = blk_alloc_devt(p, &devt); |
373 | p->dev.devt = MKDEV(disk->major, disk->first_minor + part); | 387 | if (err) |
374 | p->dev.class = &block_class; | 388 | goto out_free; |
375 | p->dev.type = &part_type; | 389 | pdev->devt = devt; |
376 | p->dev.parent = &disk->dev; | ||
377 | disk->part[part-1] = p; | ||
378 | 390 | ||
379 | /* delay uevent until 'holders' subdir is created */ | 391 | /* delay uevent until 'holders' subdir is created */ |
380 | p->dev.uevent_suppress = 1; | 392 | pdev->uevent_suppress = 1; |
381 | err = device_add(&p->dev); | 393 | err = device_add(pdev); |
382 | if (err) | 394 | if (err) |
383 | goto out1; | 395 | goto out_put; |
384 | partition_sysfs_add_subdir(p); | 396 | |
385 | p->dev.uevent_suppress = 0; | 397 | err = -ENOMEM; |
398 | p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); | ||
399 | if (!p->holder_dir) | ||
400 | goto out_del; | ||
401 | |||
402 | pdev->uevent_suppress = 0; | ||
386 | if (flags & ADDPART_FLAG_WHOLEDISK) { | 403 | if (flags & ADDPART_FLAG_WHOLEDISK) { |
387 | err = device_create_file(&p->dev, &dev_attr_whole_disk); | 404 | err = device_create_file(pdev, &dev_attr_whole_disk); |
388 | if (err) | 405 | if (err) |
389 | goto out2; | 406 | goto out_del; |
390 | } | 407 | } |
391 | 408 | ||
409 | /* everything is up and running, commence */ | ||
410 | INIT_RCU_HEAD(&p->rcu_head); | ||
411 | rcu_assign_pointer(ptbl->part[partno], p); | ||
412 | |||
392 | /* suppress uevent if the disk supresses it */ | 413 | /* suppress uevent if the disk supresses it */ |
393 | if (!disk->dev.uevent_suppress) | 414 | if (!ddev->uevent_suppress) |
394 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); | 415 | kobject_uevent(&pdev->kobj, KOBJ_ADD); |
395 | 416 | ||
396 | return 0; | 417 | return 0; |
397 | 418 | ||
398 | out2: | 419 | out_free: |
399 | device_del(&p->dev); | ||
400 | out1: | ||
401 | put_device(&p->dev); | ||
402 | free_part_stats(p); | ||
403 | out0: | ||
404 | kfree(p); | 420 | kfree(p); |
405 | return err; | 421 | return err; |
422 | out_del: | ||
423 | kobject_put(p->holder_dir); | ||
424 | device_del(pdev); | ||
425 | out_put: | ||
426 | put_device(pdev); | ||
427 | blk_free_devt(devt); | ||
428 | return err; | ||
406 | } | 429 | } |
407 | 430 | ||
408 | /* Not exported, helper to add_disk(). */ | 431 | /* Not exported, helper to add_disk(). */ |
409 | void register_disk(struct gendisk *disk) | 432 | void register_disk(struct gendisk *disk) |
410 | { | 433 | { |
434 | struct device *ddev = disk_to_dev(disk); | ||
411 | struct block_device *bdev; | 435 | struct block_device *bdev; |
436 | struct disk_part_iter piter; | ||
437 | struct hd_struct *part; | ||
412 | char *s; | 438 | char *s; |
413 | int i; | ||
414 | struct hd_struct *p; | ||
415 | int err; | 439 | int err; |
416 | 440 | ||
417 | disk->dev.parent = disk->driverfs_dev; | 441 | ddev->parent = disk->driverfs_dev; |
418 | disk->dev.devt = MKDEV(disk->major, disk->first_minor); | ||
419 | 442 | ||
420 | strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); | 443 | strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE); |
421 | /* ewww... some of these buggers have / in the name... */ | 444 | /* ewww... some of these buggers have / in the name... */ |
422 | s = strchr(disk->dev.bus_id, '/'); | 445 | s = strchr(ddev->bus_id, '/'); |
423 | if (s) | 446 | if (s) |
424 | *s = '!'; | 447 | *s = '!'; |
425 | 448 | ||
426 | /* delay uevents, until we scanned partition table */ | 449 | /* delay uevents, until we scanned partition table */ |
427 | disk->dev.uevent_suppress = 1; | 450 | ddev->uevent_suppress = 1; |
428 | 451 | ||
429 | if (device_add(&disk->dev)) | 452 | if (device_add(ddev)) |
430 | return; | 453 | return; |
431 | #ifndef CONFIG_SYSFS_DEPRECATED | 454 | #ifndef CONFIG_SYSFS_DEPRECATED |
432 | err = sysfs_create_link(block_depr, &disk->dev.kobj, | 455 | err = sysfs_create_link(block_depr, &ddev->kobj, |
433 | kobject_name(&disk->dev.kobj)); | 456 | kobject_name(&ddev->kobj)); |
434 | if (err) { | 457 | if (err) { |
435 | device_del(&disk->dev); | 458 | device_del(ddev); |
436 | return; | 459 | return; |
437 | } | 460 | } |
438 | #endif | 461 | #endif |
439 | disk_sysfs_add_subdirs(disk); | 462 | disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); |
463 | disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); | ||
440 | 464 | ||
441 | /* No minors to use for partitions */ | 465 | /* No minors to use for partitions */ |
442 | if (disk->minors == 1) | 466 | if (!disk_partitionable(disk)) |
443 | goto exit; | 467 | goto exit; |
444 | 468 | ||
445 | /* No such device (e.g., media were just removed) */ | 469 | /* No such device (e.g., media were just removed) */ |
@@ -458,50 +482,66 @@ void register_disk(struct gendisk *disk) | |||
458 | 482 | ||
459 | exit: | 483 | exit: |
460 | /* announce disk after possible partitions are created */ | 484 | /* announce disk after possible partitions are created */ |
461 | disk->dev.uevent_suppress = 0; | 485 | ddev->uevent_suppress = 0; |
462 | kobject_uevent(&disk->dev.kobj, KOBJ_ADD); | 486 | kobject_uevent(&ddev->kobj, KOBJ_ADD); |
463 | 487 | ||
464 | /* announce possible partitions */ | 488 | /* announce possible partitions */ |
465 | for (i = 1; i < disk->minors; i++) { | 489 | disk_part_iter_init(&piter, disk, 0); |
466 | p = disk->part[i-1]; | 490 | while ((part = disk_part_iter_next(&piter))) |
467 | if (!p || !p->nr_sects) | 491 | kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); |
468 | continue; | 492 | disk_part_iter_exit(&piter); |
469 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); | ||
470 | } | ||
471 | } | 493 | } |
472 | 494 | ||
473 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | 495 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) |
474 | { | 496 | { |
497 | struct disk_part_iter piter; | ||
498 | struct hd_struct *part; | ||
475 | struct parsed_partitions *state; | 499 | struct parsed_partitions *state; |
476 | int p, res; | 500 | int p, highest, res; |
477 | 501 | ||
478 | if (bdev->bd_part_count) | 502 | if (bdev->bd_part_count) |
479 | return -EBUSY; | 503 | return -EBUSY; |
480 | res = invalidate_partition(disk, 0); | 504 | res = invalidate_partition(disk, 0); |
481 | if (res) | 505 | if (res) |
482 | return res; | 506 | return res; |
483 | bdev->bd_invalidated = 0; | 507 | |
484 | for (p = 1; p < disk->minors; p++) | 508 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); |
485 | delete_partition(disk, p); | 509 | while ((part = disk_part_iter_next(&piter))) |
510 | delete_partition(disk, part->partno); | ||
511 | disk_part_iter_exit(&piter); | ||
512 | |||
486 | if (disk->fops->revalidate_disk) | 513 | if (disk->fops->revalidate_disk) |
487 | disk->fops->revalidate_disk(disk); | 514 | disk->fops->revalidate_disk(disk); |
515 | check_disk_size_change(disk, bdev); | ||
516 | bdev->bd_invalidated = 0; | ||
488 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) | 517 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) |
489 | return 0; | 518 | return 0; |
490 | if (IS_ERR(state)) /* I/O error reading the partition table */ | 519 | if (IS_ERR(state)) /* I/O error reading the partition table */ |
491 | return -EIO; | 520 | return -EIO; |
492 | 521 | ||
493 | /* tell userspace that the media / partition table may have changed */ | 522 | /* tell userspace that the media / partition table may have changed */ |
494 | kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); | 523 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); |
495 | 524 | ||
525 | /* Detect the highest partition number and preallocate | ||
526 | * disk->part_tbl. This is an optimization and not strictly | ||
527 | * necessary. | ||
528 | */ | ||
529 | for (p = 1, highest = 0; p < state->limit; p++) | ||
530 | if (state->parts[p].size) | ||
531 | highest = p; | ||
532 | |||
533 | disk_expand_part_tbl(disk, highest); | ||
534 | |||
535 | /* add partitions */ | ||
496 | for (p = 1; p < state->limit; p++) { | 536 | for (p = 1; p < state->limit; p++) { |
497 | sector_t size = state->parts[p].size; | 537 | sector_t size = state->parts[p].size; |
498 | sector_t from = state->parts[p].from; | 538 | sector_t from = state->parts[p].from; |
499 | if (!size) | 539 | if (!size) |
500 | continue; | 540 | continue; |
501 | if (from + size > get_capacity(disk)) { | 541 | if (from + size > get_capacity(disk)) { |
502 | printk(KERN_ERR " %s: p%d exceeds device capacity\n", | 542 | printk(KERN_WARNING |
543 | "%s: p%d exceeds device capacity\n", | ||
503 | disk->disk_name, p); | 544 | disk->disk_name, p); |
504 | continue; | ||
505 | } | 545 | } |
506 | res = add_partition(disk, p, from, size, state->parts[p].flags); | 546 | res = add_partition(disk, p, from, size, state->parts[p].flags); |
507 | if (res) { | 547 | if (res) { |
@@ -541,25 +581,31 @@ EXPORT_SYMBOL(read_dev_sector); | |||
541 | 581 | ||
542 | void del_gendisk(struct gendisk *disk) | 582 | void del_gendisk(struct gendisk *disk) |
543 | { | 583 | { |
544 | int p; | 584 | struct disk_part_iter piter; |
585 | struct hd_struct *part; | ||
545 | 586 | ||
546 | /* invalidate stuff */ | 587 | /* invalidate stuff */ |
547 | for (p = disk->minors - 1; p > 0; p--) { | 588 | disk_part_iter_init(&piter, disk, |
548 | invalidate_partition(disk, p); | 589 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); |
549 | delete_partition(disk, p); | 590 | while ((part = disk_part_iter_next(&piter))) { |
591 | invalidate_partition(disk, part->partno); | ||
592 | delete_partition(disk, part->partno); | ||
550 | } | 593 | } |
594 | disk_part_iter_exit(&piter); | ||
595 | |||
551 | invalidate_partition(disk, 0); | 596 | invalidate_partition(disk, 0); |
552 | disk->capacity = 0; | 597 | blk_free_devt(disk_to_dev(disk)->devt); |
598 | set_capacity(disk, 0); | ||
553 | disk->flags &= ~GENHD_FL_UP; | 599 | disk->flags &= ~GENHD_FL_UP; |
554 | unlink_gendisk(disk); | 600 | unlink_gendisk(disk); |
555 | disk_stat_set_all(disk, 0); | 601 | part_stat_set_all(&disk->part0, 0); |
556 | disk->stamp = 0; | 602 | disk->part0.stamp = 0; |
557 | 603 | ||
558 | kobject_put(disk->holder_dir); | 604 | kobject_put(disk->part0.holder_dir); |
559 | kobject_put(disk->slave_dir); | 605 | kobject_put(disk->slave_dir); |
560 | disk->driverfs_dev = NULL; | 606 | disk->driverfs_dev = NULL; |
561 | #ifndef CONFIG_SYSFS_DEPRECATED | 607 | #ifndef CONFIG_SYSFS_DEPRECATED |
562 | sysfs_remove_link(block_depr, disk->dev.bus_id); | 608 | sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); |
563 | #endif | 609 | #endif |
564 | device_del(&disk->dev); | 610 | device_del(disk_to_dev(disk)); |
565 | } | 611 | } |
diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 17ae8ecd9e8b..98dbe1a84528 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h | |||
@@ -5,15 +5,13 @@ | |||
5 | * add_gd_partition adds a partitions details to the devices partition | 5 | * add_gd_partition adds a partitions details to the devices partition |
6 | * description. | 6 | * description. |
7 | */ | 7 | */ |
8 | enum { MAX_PART = 256 }; | ||
9 | |||
10 | struct parsed_partitions { | 8 | struct parsed_partitions { |
11 | char name[BDEVNAME_SIZE]; | 9 | char name[BDEVNAME_SIZE]; |
12 | struct { | 10 | struct { |
13 | sector_t from; | 11 | sector_t from; |
14 | sector_t size; | 12 | sector_t size; |
15 | int flags; | 13 | int flags; |
16 | } parts[MAX_PART]; | 14 | } parts[DISK_MAX_PARTS]; |
17 | int next; | 15 | int next; |
18 | int limit; | 16 | int limit; |
19 | }; | 17 | }; |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 0d6eb33597c6..71c9be59c9c2 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | |||
337 | return 0; | 337 | return 0; |
338 | } | 338 | } |
339 | 339 | ||
340 | /* | ||
341 | * Use precise platform statistics if available: | ||
342 | */ | ||
343 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
344 | static cputime_t task_utime(struct task_struct *p) | ||
345 | { | ||
346 | return p->utime; | ||
347 | } | ||
348 | |||
349 | static cputime_t task_stime(struct task_struct *p) | ||
350 | { | ||
351 | return p->stime; | ||
352 | } | ||
353 | #else | ||
354 | static cputime_t task_utime(struct task_struct *p) | ||
355 | { | ||
356 | clock_t utime = cputime_to_clock_t(p->utime), | ||
357 | total = utime + cputime_to_clock_t(p->stime); | ||
358 | u64 temp; | ||
359 | |||
360 | /* | ||
361 | * Use CFS's precise accounting: | ||
362 | */ | ||
363 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | ||
364 | |||
365 | if (total) { | ||
366 | temp *= utime; | ||
367 | do_div(temp, total); | ||
368 | } | ||
369 | utime = (clock_t)temp; | ||
370 | |||
371 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | ||
372 | return p->prev_utime; | ||
373 | } | ||
374 | |||
375 | static cputime_t task_stime(struct task_struct *p) | ||
376 | { | ||
377 | clock_t stime; | ||
378 | |||
379 | /* | ||
380 | * Use CFS's precise accounting. (we subtract utime from | ||
381 | * the total, to make sure the total observed by userspace | ||
382 | * grows monotonically - apps rely on that): | ||
383 | */ | ||
384 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
385 | cputime_to_clock_t(task_utime(p)); | ||
386 | |||
387 | if (stime >= 0) | ||
388 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | ||
389 | |||
390 | return p->prev_stime; | ||
391 | } | ||
392 | #endif | ||
393 | |||
394 | static cputime_t task_gtime(struct task_struct *p) | ||
395 | { | ||
396 | return p->gtime; | ||
397 | } | ||
398 | |||
399 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | 340 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, |
400 | struct pid *pid, struct task_struct *task, int whole) | 341 | struct pid *pid, struct task_struct *task, int whole) |
401 | { | 342 | { |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index bca0f81eb687..7821589a17d5 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -547,8 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
547 | 547 | ||
548 | for (tmp = dir->subdir; tmp; tmp = tmp->next) | 548 | for (tmp = dir->subdir; tmp; tmp = tmp->next) |
549 | if (strcmp(tmp->name, dp->name) == 0) { | 549 | if (strcmp(tmp->name, dp->name) == 0) { |
550 | printk(KERN_WARNING "proc_dir_entry '%s' already " | 550 | printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", |
551 | "registered\n", dp->name); | 551 | dir->name, dp->name); |
552 | dump_stack(); | 552 | dump_stack(); |
553 | break; | 553 | break; |
554 | } | 554 | } |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index ded969862960..29e20c6b1f7f 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/tty.h> | 24 | #include <linux/tty.h> |
25 | #include <linux/string.h> | 25 | #include <linux/string.h> |
26 | #include <linux/mman.h> | 26 | #include <linux/mman.h> |
27 | #include <linux/quicklist.h> | ||
27 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
28 | #include <linux/ioport.h> | 29 | #include <linux/ioport.h> |
29 | #include <linux/mm.h> | 30 | #include <linux/mm.h> |
@@ -182,6 +183,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
182 | "SReclaimable: %8lu kB\n" | 183 | "SReclaimable: %8lu kB\n" |
183 | "SUnreclaim: %8lu kB\n" | 184 | "SUnreclaim: %8lu kB\n" |
184 | "PageTables: %8lu kB\n" | 185 | "PageTables: %8lu kB\n" |
186 | #ifdef CONFIG_QUICKLIST | ||
187 | "Quicklists: %8lu kB\n" | ||
188 | #endif | ||
185 | "NFS_Unstable: %8lu kB\n" | 189 | "NFS_Unstable: %8lu kB\n" |
186 | "Bounce: %8lu kB\n" | 190 | "Bounce: %8lu kB\n" |
187 | "WritebackTmp: %8lu kB\n" | 191 | "WritebackTmp: %8lu kB\n" |
@@ -214,6 +218,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
214 | K(global_page_state(NR_SLAB_RECLAIMABLE)), | 218 | K(global_page_state(NR_SLAB_RECLAIMABLE)), |
215 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), | 219 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), |
216 | K(global_page_state(NR_PAGETABLE)), | 220 | K(global_page_state(NR_PAGETABLE)), |
221 | #ifdef CONFIG_QUICKLIST | ||
222 | K(quicklist_total_size()), | ||
223 | #endif | ||
217 | K(global_page_state(NR_UNSTABLE_NFS)), | 224 | K(global_page_state(NR_UNSTABLE_NFS)), |
218 | K(global_page_state(NR_BOUNCE)), | 225 | K(global_page_state(NR_BOUNCE)), |
219 | K(global_page_state(NR_WRITEBACK_TEMP)), | 226 | K(global_page_state(NR_WRITEBACK_TEMP)), |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 52312ec93ff4..5145cb9125af 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -58,7 +58,7 @@ const struct inode_operations ramfs_file_inode_operations = { | |||
58 | * size 0 on the assumption that it's going to be used for an mmap of shared | 58 | * size 0 on the assumption that it's going to be used for an mmap of shared |
59 | * memory | 59 | * memory |
60 | */ | 60 | */ |
61 | static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) | 61 | int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) |
62 | { | 62 | { |
63 | struct pagevec lru_pvec; | 63 | struct pagevec lru_pvec; |
64 | unsigned long npages, xpages, loop, limit; | 64 | unsigned long npages, xpages, loop, limit; |
diff --git a/fs/splice.c b/fs/splice.c index 1bbc6f4bb09c..a1e701c27156 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -898,6 +898,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
898 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | 898 | if (unlikely(!(out->f_mode & FMODE_WRITE))) |
899 | return -EBADF; | 899 | return -EBADF; |
900 | 900 | ||
901 | if (unlikely(out->f_flags & O_APPEND)) | ||
902 | return -EINVAL; | ||
903 | |||
901 | ret = rw_verify_area(WRITE, out, ppos, len); | 904 | ret = rw_verify_area(WRITE, out, ppos, len); |
902 | if (unlikely(ret < 0)) | 905 | if (unlikely(ret < 0)) |
903 | return ret; | 906 | return ret; |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 154098157473..73db464cd08b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) | |||
302 | int subtract_lebs; | 302 | int subtract_lebs; |
303 | long long available; | 303 | long long available; |
304 | 304 | ||
305 | /* | ||
306 | * Force the amount available to the total size reported if the used | ||
307 | * space is zero. | ||
308 | */ | ||
309 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && | ||
310 | c->budg_data_growth + c->budg_dd_growth == 0) { | ||
311 | /* Do the same calculation as for c->block_cnt */ | ||
312 | available = c->main_lebs - 2; | ||
313 | available *= c->leb_size - c->dark_wm; | ||
314 | return available; | ||
315 | } | ||
316 | |||
317 | available = c->main_bytes - c->lst.total_used; | 305 | available = c->main_bytes - c->lst.total_used; |
318 | 306 | ||
319 | /* | 307 | /* |
@@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | |||
714 | } | 702 | } |
715 | 703 | ||
716 | /** | 704 | /** |
717 | * ubifs_budg_get_free_space - return amount of free space. | 705 | * ubifs_reported_space - calculate reported free space. |
706 | * @c: the UBIFS file-system description object | ||
707 | * @free: amount of free space | ||
708 | * | ||
709 | * This function calculates amount of free space which will be reported to | ||
710 | * user-space. User-space application tend to expect that if the file-system | ||
711 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
712 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
713 | * node and it has to write indexind nodes as well. This introduces additional | ||
714 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
715 | * above expectetion. | ||
716 | * | ||
717 | * This function assumes free space is made up of uncompressed data nodes and | ||
718 | * full index nodes (one per data node, tripled because we always allow enough | ||
719 | * space to write the index thrice). | ||
720 | * | ||
721 | * Note, the calculation is pessimistic, which means that most of the time | ||
722 | * UBIFS reports less space than it actually has. | ||
723 | */ | ||
724 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) | ||
725 | { | ||
726 | int divisor, factor, f; | ||
727 | |||
728 | /* | ||
729 | * Reported space size is @free * X, where X is UBIFS block size | ||
730 | * divided by UBIFS block size + all overhead one data block | ||
731 | * introduces. The overhead is the node header + indexing overhead. | ||
732 | * | ||
733 | * Indexing overhead calculations are based on the following formula: | ||
734 | * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number | ||
735 | * of data nodes, f - fanout. Because effective UBIFS fanout is twice | ||
736 | * as less than maximum fanout, we assume that each data node | ||
737 | * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. | ||
738 | * Note, the multiplier 3 is because UBIFS reseves thrice as more space | ||
739 | * for the index. | ||
740 | */ | ||
741 | f = c->fanout > 3 ? c->fanout >> 1 : 2; | ||
742 | factor = UBIFS_BLOCK_SIZE; | ||
743 | divisor = UBIFS_MAX_DATA_NODE_SZ; | ||
744 | divisor += (c->max_idx_node_sz * 3) / (f - 1); | ||
745 | free *= factor; | ||
746 | do_div(free, divisor); | ||
747 | return free; | ||
748 | } | ||
749 | |||
750 | /** | ||
751 | * ubifs_get_free_space - return amount of free space. | ||
718 | * @c: UBIFS file-system description object | 752 | * @c: UBIFS file-system description object |
719 | * | 753 | * |
720 | * This function returns amount of free space on the file-system. | 754 | * This function calculates amount of free space to report to user-space. |
755 | * | ||
756 | * Because UBIFS may introduce substantial overhead (the index, node headers, | ||
757 | * alighment, wastage at the end of eraseblocks, etc), it cannot report real | ||
758 | * amount of free flash space it has (well, because not all dirty space is | ||
759 | * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, | ||
760 | * it would bread user expectetion about what free space is. Users seem to | ||
761 | * accustomed to assume that if the file-system reports N bytes of free space, | ||
762 | * they would be able to fit a file of N bytes to the FS. This almost works for | ||
763 | * traditional file-systems, because they have way less overhead than UBIFS. | ||
764 | * So, to keep users happy, UBIFS tries to take the overhead into account. | ||
721 | */ | 765 | */ |
722 | long long ubifs_budg_get_free_space(struct ubifs_info *c) | 766 | long long ubifs_get_free_space(struct ubifs_info *c) |
723 | { | 767 | { |
724 | int min_idx_lebs, rsvd_idx_lebs; | 768 | int min_idx_lebs, rsvd_idx_lebs, lebs; |
725 | long long available, outstanding, free; | 769 | long long available, outstanding, free; |
726 | 770 | ||
727 | /* Do exactly the same calculations as in 'do_budget_space()' */ | ||
728 | spin_lock(&c->space_lock); | 771 | spin_lock(&c->space_lock); |
729 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 772 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
773 | outstanding = c->budg_data_growth + c->budg_dd_growth; | ||
730 | 774 | ||
731 | if (min_idx_lebs > c->lst.idx_lebs) | 775 | /* |
732 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | 776 | * Force the amount available to the total size reported if the used |
733 | else | 777 | * space is zero. |
734 | rsvd_idx_lebs = 0; | 778 | */ |
735 | 779 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { | |
736 | if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt | ||
737 | - c->lst.taken_empty_lebs) { | ||
738 | spin_unlock(&c->space_lock); | 780 | spin_unlock(&c->space_lock); |
739 | return 0; | 781 | return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; |
740 | } | 782 | } |
741 | 783 | ||
742 | available = ubifs_calc_available(c, min_idx_lebs); | 784 | available = ubifs_calc_available(c, min_idx_lebs); |
743 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 785 | |
744 | c->min_idx_lebs = min_idx_lebs; | 786 | /* |
787 | * When reporting free space to user-space, UBIFS guarantees that it is | ||
788 | * possible to write a file of free space size. This means that for | ||
789 | * empty LEBs we may use more precise calculations than | ||
790 | * 'ubifs_calc_available()' is using. Namely, we know that in empty | ||
791 | * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. | ||
792 | * Thus, amend the available space. | ||
793 | * | ||
794 | * Note, the calculations below are similar to what we have in | ||
795 | * 'do_budget_space()', so refer there for comments. | ||
796 | */ | ||
797 | if (min_idx_lebs > c->lst.idx_lebs) | ||
798 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | ||
799 | else | ||
800 | rsvd_idx_lebs = 0; | ||
801 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | ||
802 | c->lst.taken_empty_lebs; | ||
803 | lebs -= rsvd_idx_lebs; | ||
804 | available += lebs * (c->dark_wm - c->leb_overhead); | ||
745 | spin_unlock(&c->space_lock); | 805 | spin_unlock(&c->space_lock); |
746 | 806 | ||
747 | if (available > outstanding) | 807 | if (available > outstanding) |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index b9cb77473758..d7f7645779f2 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -538,7 +538,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
538 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); | 538 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); |
539 | for (i = 0; i < n; i++) | 539 | for (i = 0; i < n; i++) |
540 | printk(KERN_DEBUG "\t ino %llu\n", | 540 | printk(KERN_DEBUG "\t ino %llu\n", |
541 | le64_to_cpu(orph->inos[i])); | 541 | (unsigned long long)le64_to_cpu(orph->inos[i])); |
542 | break; | 542 | break; |
543 | } | 543 | } |
544 | default: | 544 | default: |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5c96f1fb7016..526c01ec8003 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -426,7 +426,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
426 | 426 | ||
427 | while (1) { | 427 | while (1) { |
428 | dbg_gen("feed '%s', ino %llu, new f_pos %#x", | 428 | dbg_gen("feed '%s', ino %llu, new f_pos %#x", |
429 | dent->name, le64_to_cpu(dent->inum), | 429 | dent->name, (unsigned long long)le64_to_cpu(dent->inum), |
430 | key_hash_flash(c, &dent->key)); | 430 | key_hash_flash(c, &dent->key)); |
431 | ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); | 431 | ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); |
432 | 432 | ||
@@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | |||
587 | if (err) { | 587 | if (err) { |
588 | if (err != -ENOSPC) | 588 | if (err != -ENOSPC) |
589 | return err; | 589 | return err; |
590 | err = 0; | ||
591 | budgeted = 0; | 590 | budgeted = 0; |
592 | } | 591 | } |
593 | 592 | ||
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 4071d1cae29f..3d698e2022b1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
793 | int err; | 793 | int err; |
794 | struct ubifs_budget_req req; | 794 | struct ubifs_budget_req req; |
795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; | 795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; |
796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1); | 796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1; |
797 | struct ubifs_inode *ui = ubifs_inode(inode); | 797 | struct ubifs_inode *ui = ubifs_inode(inode); |
798 | 798 | ||
799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); | 799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); |
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
811 | /* A funny way to budget for truncation node */ | 811 | /* A funny way to budget for truncation node */ |
812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; | 812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; |
813 | err = ubifs_budget_space(c, &req); | 813 | err = ubifs_budget_space(c, &req); |
814 | if (err) | 814 | if (err) { |
815 | return err; | 815 | /* |
816 | * Treat truncations to zero as deletion and always allow them, | ||
817 | * just like we do for '->unlink()'. | ||
818 | */ | ||
819 | if (new_size || err != -ENOSPC) | ||
820 | return err; | ||
821 | budgeted = 0; | ||
822 | } | ||
816 | 823 | ||
817 | err = vmtruncate(inode, new_size); | 824 | err = vmtruncate(inode, new_size); |
818 | if (err) | 825 | if (err) |
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
869 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); | 876 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); |
870 | mutex_unlock(&ui->ui_mutex); | 877 | mutex_unlock(&ui->ui_mutex); |
871 | out_budg: | 878 | out_budg: |
872 | ubifs_release_budget(c, &req); | 879 | if (budgeted) |
880 | ubifs_release_budget(c, &req); | ||
881 | else { | ||
882 | c->nospace = c->nospace_rp = 0; | ||
883 | smp_wmb(); | ||
884 | } | ||
873 | return err; | 885 | return err; |
874 | } | 886 | } |
875 | 887 | ||
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index adee7b5ddeab..47814cde2407 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty | 211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty |
212 | * or do not have an LEB which satisfies the @min_space criteria. | 212 | * or do not have an LEB which satisfies the @min_space criteria. |
213 | * | 213 | * |
214 | * Note: | 214 | * Note, LEBs which have less than dead watermark of free + dirty space are |
215 | * o LEBs which have less than dead watermark of dirty space are never picked | 215 | * never picked by this function. |
216 | * by this function; | ||
217 | * | ||
218 | * Returns zero and the LEB properties of | ||
219 | * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a | ||
220 | * negative error code in case of other failures. The returned LEB is marked as | ||
221 | * "taken". | ||
222 | * | 216 | * |
223 | * The additional @pick_free argument controls if this function has to return a | 217 | * The additional @pick_free argument controls if this function has to return a |
224 | * free or freeable LEB if one is present. For example, GC must to set it to %1, | 218 | * free or freeable LEB if one is present. For example, GC must to set it to %1, |
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
231 | * | 225 | * |
232 | * In addition @pick_free is set to %2 by the recovery process in order to | 226 | * In addition @pick_free is set to %2 by the recovery process in order to |
233 | * recover gc_lnum in which case an index LEB must not be returned. | 227 | * recover gc_lnum in which case an index LEB must not be returned. |
228 | * | ||
229 | * This function returns zero and the LEB properties of found dirty LEB in case | ||
230 | * of success, %-ENOSPC if no dirty LEB was found and a negative error code in | ||
231 | * case of other failures. The returned LEB is marked as "taken". | ||
234 | */ | 232 | */ |
235 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | 233 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, |
236 | int min_space, int pick_free) | 234 | int min_space, int pick_free) |
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
245 | int lebs, rsvd_idx_lebs = 0; | 243 | int lebs, rsvd_idx_lebs = 0; |
246 | 244 | ||
247 | spin_lock(&c->space_lock); | 245 | spin_lock(&c->space_lock); |
248 | lebs = c->lst.empty_lebs; | 246 | lebs = c->lst.empty_lebs + c->idx_gc_cnt; |
249 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; | 247 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; |
250 | 248 | ||
251 | /* | 249 | /* |
@@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
317 | lp = idx_lp; | 315 | lp = idx_lp; |
318 | 316 | ||
319 | if (lp) { | 317 | if (lp) { |
320 | ubifs_assert(lp->dirty >= c->dead_wm); | 318 | ubifs_assert(lp->free + lp->dirty >= c->dead_wm); |
321 | goto found; | 319 | goto found; |
322 | } | 320 | } |
323 | 321 | ||
@@ -509,7 +507,6 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | |||
509 | rsvd_idx_lebs = 0; | 507 | rsvd_idx_lebs = 0; |
510 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 508 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
511 | c->lst.taken_empty_lebs; | 509 | c->lst.taken_empty_lebs; |
512 | ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs); | ||
513 | if (rsvd_idx_lebs < lebs) | 510 | if (rsvd_idx_lebs < lebs) |
514 | /* | 511 | /* |
515 | * OK to allocate an empty LEB, but we still don't want to go | 512 | * OK to allocate an empty LEB, but we still don't want to go |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index d0f3dac29081..02aba36fe3d4 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -334,15 +334,21 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
334 | 334 | ||
335 | err = move_nodes(c, sleb); | 335 | err = move_nodes(c, sleb); |
336 | if (err) | 336 | if (err) |
337 | goto out; | 337 | goto out_inc_seq; |
338 | 338 | ||
339 | err = gc_sync_wbufs(c); | 339 | err = gc_sync_wbufs(c); |
340 | if (err) | 340 | if (err) |
341 | goto out; | 341 | goto out_inc_seq; |
342 | 342 | ||
343 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); | 343 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); |
344 | if (err) | 344 | if (err) |
345 | goto out; | 345 | goto out_inc_seq; |
346 | |||
347 | /* Allow for races with TNC */ | ||
348 | c->gced_lnum = lnum; | ||
349 | smp_wmb(); | ||
350 | c->gc_seq += 1; | ||
351 | smp_wmb(); | ||
346 | 352 | ||
347 | if (c->gc_lnum == -1) { | 353 | if (c->gc_lnum == -1) { |
348 | c->gc_lnum = lnum; | 354 | c->gc_lnum = lnum; |
@@ -363,6 +369,14 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
363 | out: | 369 | out: |
364 | ubifs_scan_destroy(sleb); | 370 | ubifs_scan_destroy(sleb); |
365 | return err; | 371 | return err; |
372 | |||
373 | out_inc_seq: | ||
374 | /* We may have moved at least some nodes so allow for races with TNC */ | ||
375 | c->gced_lnum = lnum; | ||
376 | smp_wmb(); | ||
377 | c->gc_seq += 1; | ||
378 | smp_wmb(); | ||
379 | goto out; | ||
366 | } | 380 | } |
367 | 381 | ||
368 | /** | 382 | /** |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 87dabf9fe742..4c12a9215d7f 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
@@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, | |||
284 | } | 284 | } |
285 | 285 | ||
286 | /** | 286 | /** |
287 | * ubifs_reported_space - calculate reported free space. | ||
288 | * @c: the UBIFS file-system description object | ||
289 | * @free: amount of free space | ||
290 | * | ||
291 | * This function calculates amount of free space which will be reported to | ||
292 | * user-space. User-space application tend to expect that if the file-system | ||
293 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
294 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
295 | * node and it has to write indexind nodes as well. This introduces additional | ||
296 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
297 | * above expectetion. | ||
298 | * | ||
299 | * This function assumes free space is made up of uncompressed data nodes and | ||
300 | * full index nodes (one per data node, doubled because we always allow enough | ||
301 | * space to write the index twice). | ||
302 | * | ||
303 | * Note, the calculation is pessimistic, which means that most of the time | ||
304 | * UBIFS reports less space than it actually has. | ||
305 | */ | ||
306 | static inline long long ubifs_reported_space(const struct ubifs_info *c, | ||
307 | uint64_t free) | ||
308 | { | ||
309 | int divisor, factor; | ||
310 | |||
311 | divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3); | ||
312 | factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; | ||
313 | do_div(free, divisor); | ||
314 | |||
315 | return free * factor; | ||
316 | } | ||
317 | |||
318 | /** | ||
319 | * ubifs_current_time - round current time to time granularity. | 287 | * ubifs_current_time - round current time to time granularity. |
320 | * @inode: inode | 288 | * @inode: inode |
321 | */ | 289 | */ |
@@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode) | |||
325 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | 293 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; |
326 | } | 294 | } |
327 | 295 | ||
296 | /** | ||
297 | * ubifs_tnc_lookup - look up a file-system node. | ||
298 | * @c: UBIFS file-system description object | ||
299 | * @key: node key to lookup | ||
300 | * @node: the node is returned here | ||
301 | * | ||
302 | * This function look up and reads node with key @key. The caller has to make | ||
303 | * sure the @node buffer is large enough to fit the node. Returns zero in case | ||
304 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
305 | * case of failure. | ||
306 | */ | ||
307 | static inline int ubifs_tnc_lookup(struct ubifs_info *c, | ||
308 | const union ubifs_key *key, void *node) | ||
309 | { | ||
310 | return ubifs_tnc_locate(c, key, node, NULL, NULL); | ||
311 | } | ||
312 | |||
328 | #endif /* __UBIFS_MISC_H__ */ | 313 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f71e6b8822c4..3f4902060c7a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
370 | { | 370 | { |
371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; | 371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; |
372 | unsigned long long free; | 372 | unsigned long long free; |
373 | __le32 *uuid = (__le32 *)c->uuid; | ||
373 | 374 | ||
374 | free = ubifs_budg_get_free_space(c); | 375 | free = ubifs_get_free_space(c); |
375 | dbg_gen("free space %lld bytes (%lld blocks)", | 376 | dbg_gen("free space %lld bytes (%lld blocks)", |
376 | free, free >> UBIFS_BLOCK_SHIFT); | 377 | free, free >> UBIFS_BLOCK_SHIFT); |
377 | 378 | ||
@@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
386 | buf->f_files = 0; | 387 | buf->f_files = 0; |
387 | buf->f_ffree = 0; | 388 | buf->f_ffree = 0; |
388 | buf->f_namelen = UBIFS_MAX_NLEN; | 389 | buf->f_namelen = UBIFS_MAX_NLEN; |
389 | 390 | buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); | |
391 | buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); | ||
390 | return 0; | 392 | return 0; |
391 | } | 393 | } |
392 | 394 | ||
@@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c) | |||
530 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); | 532 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); |
531 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); | 533 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); |
532 | 534 | ||
535 | /* | ||
536 | * Calculate how many bytes would be wasted at the end of LEB if it was | ||
537 | * fully filled with data nodes of maximum size. This is used in | ||
538 | * calculations when reporting free space. | ||
539 | */ | ||
540 | c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; | ||
533 | return 0; | 541 | return 0; |
534 | } | 542 | } |
535 | 543 | ||
@@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c) | |||
647 | * internally because it does not make much sense for UBIFS, but it is | 655 | * internally because it does not make much sense for UBIFS, but it is |
648 | * necessary to report something for the 'statfs()' call. | 656 | * necessary to report something for the 'statfs()' call. |
649 | * | 657 | * |
650 | * Subtract the LEB reserved for GC and the LEB which is reserved for | 658 | * Subtract the LEB reserved for GC, the LEB which is reserved for |
651 | * deletions. | 659 | * deletions, and assume only one journal head is available. |
652 | * | ||
653 | * Review 'ubifs_calc_available()' if changing this calculation. | ||
654 | */ | 660 | */ |
655 | tmp64 = c->main_lebs - 2; | 661 | tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; |
656 | tmp64 *= (uint64_t)c->leb_size - c->dark_wm; | 662 | tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; |
657 | tmp64 = ubifs_reported_space(c, tmp64); | 663 | tmp64 = ubifs_reported_space(c, tmp64); |
658 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; | 664 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; |
659 | 665 | ||
@@ -1018,14 +1024,13 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1018 | goto out_dereg; | 1024 | goto out_dereg; |
1019 | } | 1025 | } |
1020 | 1026 | ||
1027 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); | ||
1021 | if (!mounted_read_only) { | 1028 | if (!mounted_read_only) { |
1022 | err = alloc_wbufs(c); | 1029 | err = alloc_wbufs(c); |
1023 | if (err) | 1030 | if (err) |
1024 | goto out_cbuf; | 1031 | goto out_cbuf; |
1025 | 1032 | ||
1026 | /* Create background thread */ | 1033 | /* Create background thread */ |
1027 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, | ||
1028 | c->vi.vol_id); | ||
1029 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1034 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); |
1030 | if (!c->bgt) | 1035 | if (!c->bgt) |
1031 | c->bgt = ERR_PTR(-EINVAL); | 1036 | c->bgt = ERR_PTR(-EINVAL); |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e909f4a96443..7634c5970887 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, | |||
506 | if (keys_cmp(c, key, &node_key) != 0) | 506 | if (keys_cmp(c, key, &node_key) != 0) |
507 | ret = 0; | 507 | ret = 0; |
508 | } | 508 | } |
509 | if (ret == 0) | 509 | if (ret == 0 && c->replaying) |
510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", | 510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", |
511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); | 511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); |
512 | return ret; | 512 | return ret; |
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, | |||
1382 | } | 1382 | } |
1383 | 1383 | ||
1384 | /** | 1384 | /** |
1385 | * ubifs_tnc_lookup - look up a file-system node. | 1385 | * maybe_leb_gced - determine if a LEB may have been garbage collected. |
1386 | * @c: UBIFS file-system description object | 1386 | * @c: UBIFS file-system description object |
1387 | * @key: node key to lookup | 1387 | * @lnum: LEB number |
1388 | * @node: the node is returned here | 1388 | * @gc_seq1: garbage collection sequence number |
1389 | * | 1389 | * |
1390 | * This function look up and reads node with key @key. The caller has to make | 1390 | * This function determines if @lnum may have been garbage collected since |
1391 | * sure the @node buffer is large enough to fit the node. Returns zero in case | 1391 | * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise |
1392 | * of success, %-ENOENT if the node was not found, and a negative error code in | 1392 | * %0 is returned. |
1393 | * case of failure. | ||
1394 | */ | 1393 | */ |
1395 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | 1394 | static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) |
1396 | void *node) | ||
1397 | { | 1395 | { |
1398 | int found, n, err; | 1396 | int gc_seq2, gced_lnum; |
1399 | struct ubifs_znode *znode; | ||
1400 | struct ubifs_zbranch zbr, *zt; | ||
1401 | 1397 | ||
1402 | mutex_lock(&c->tnc_mutex); | 1398 | gced_lnum = c->gced_lnum; |
1403 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1399 | smp_rmb(); |
1404 | if (!found) { | 1400 | gc_seq2 = c->gc_seq; |
1405 | err = -ENOENT; | 1401 | /* Same seq means no GC */ |
1406 | goto out; | 1402 | if (gc_seq1 == gc_seq2) |
1407 | } else if (found < 0) { | 1403 | return 0; |
1408 | err = found; | 1404 | /* Different by more than 1 means we don't know */ |
1409 | goto out; | 1405 | if (gc_seq1 + 1 != gc_seq2) |
1410 | } | 1406 | return 1; |
1411 | zt = &znode->zbranch[n]; | 1407 | /* |
1412 | if (is_hash_key(c, key)) { | 1408 | * We have seen the sequence number has increased by 1. Now we need to |
1413 | /* | 1409 | * be sure we read the right LEB number, so read it again. |
1414 | * In this case the leaf node cache gets used, so we pass the | 1410 | */ |
1415 | * address of the zbranch and keep the mutex locked | 1411 | smp_rmb(); |
1416 | */ | 1412 | if (gced_lnum != c->gced_lnum) |
1417 | err = tnc_read_node_nm(c, zt, node); | 1413 | return 1; |
1418 | goto out; | 1414 | /* Finally we can check lnum */ |
1419 | } | 1415 | if (gced_lnum == lnum) |
1420 | zbr = znode->zbranch[n]; | 1416 | return 1; |
1421 | mutex_unlock(&c->tnc_mutex); | 1417 | return 0; |
1422 | |||
1423 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1424 | return err; | ||
1425 | |||
1426 | out: | ||
1427 | mutex_unlock(&c->tnc_mutex); | ||
1428 | return err; | ||
1429 | } | 1418 | } |
1430 | 1419 | ||
1431 | /** | 1420 | /** |
@@ -1436,16 +1425,19 @@ out: | |||
1436 | * @lnum: LEB number is returned here | 1425 | * @lnum: LEB number is returned here |
1437 | * @offs: offset is returned here | 1426 | * @offs: offset is returned here |
1438 | * | 1427 | * |
1439 | * This function is the same as 'ubifs_tnc_lookup()' but it returns the node | 1428 | * This function look up and reads node with key @key. The caller has to make |
1440 | * location also. See 'ubifs_tnc_lookup()'. | 1429 | * sure the @node buffer is large enough to fit the node. Returns zero in case |
1430 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
1431 | * case of failure. The node location can be returned in @lnum and @offs. | ||
1441 | */ | 1432 | */ |
1442 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1433 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
1443 | void *node, int *lnum, int *offs) | 1434 | void *node, int *lnum, int *offs) |
1444 | { | 1435 | { |
1445 | int found, n, err; | 1436 | int found, n, err, safely = 0, gc_seq1; |
1446 | struct ubifs_znode *znode; | 1437 | struct ubifs_znode *znode; |
1447 | struct ubifs_zbranch zbr, *zt; | 1438 | struct ubifs_zbranch zbr, *zt; |
1448 | 1439 | ||
1440 | again: | ||
1449 | mutex_lock(&c->tnc_mutex); | 1441 | mutex_lock(&c->tnc_mutex); |
1450 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1442 | found = ubifs_lookup_level0(c, key, &znode, &n); |
1451 | if (!found) { | 1443 | if (!found) { |
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | |||
1456 | goto out; | 1448 | goto out; |
1457 | } | 1449 | } |
1458 | zt = &znode->zbranch[n]; | 1450 | zt = &znode->zbranch[n]; |
1451 | if (lnum) { | ||
1452 | *lnum = zt->lnum; | ||
1453 | *offs = zt->offs; | ||
1454 | } | ||
1459 | if (is_hash_key(c, key)) { | 1455 | if (is_hash_key(c, key)) { |
1460 | /* | 1456 | /* |
1461 | * In this case the leaf node cache gets used, so we pass the | 1457 | * In this case the leaf node cache gets used, so we pass the |
1462 | * address of the zbranch and keep the mutex locked | 1458 | * address of the zbranch and keep the mutex locked |
1463 | */ | 1459 | */ |
1464 | *lnum = zt->lnum; | ||
1465 | *offs = zt->offs; | ||
1466 | err = tnc_read_node_nm(c, zt, node); | 1460 | err = tnc_read_node_nm(c, zt, node); |
1467 | goto out; | 1461 | goto out; |
1468 | } | 1462 | } |
1463 | if (safely) { | ||
1464 | err = ubifs_tnc_read_node(c, zt, node); | ||
1465 | goto out; | ||
1466 | } | ||
1467 | /* Drop the TNC mutex prematurely and race with garbage collection */ | ||
1469 | zbr = znode->zbranch[n]; | 1468 | zbr = znode->zbranch[n]; |
1469 | gc_seq1 = c->gc_seq; | ||
1470 | mutex_unlock(&c->tnc_mutex); | 1470 | mutex_unlock(&c->tnc_mutex); |
1471 | 1471 | ||
1472 | *lnum = zbr.lnum; | 1472 | if (ubifs_get_wbuf(c, zbr.lnum)) { |
1473 | *offs = zbr.offs; | 1473 | /* We do not GC journal heads */ |
1474 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1475 | return err; | ||
1476 | } | ||
1474 | 1477 | ||
1475 | err = ubifs_tnc_read_node(c, &zbr, node); | 1478 | err = fallible_read_node(c, key, &zbr, node); |
1476 | return err; | 1479 | if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) { |
1480 | /* | ||
1481 | * The node may have been GC'ed out from under us so try again | ||
1482 | * while keeping the TNC mutex locked. | ||
1483 | */ | ||
1484 | safely = 1; | ||
1485 | goto again; | ||
1486 | } | ||
1487 | return 0; | ||
1477 | 1488 | ||
1478 | out: | 1489 | out: |
1479 | mutex_unlock(&c->tnc_mutex); | 1490 | mutex_unlock(&c->tnc_mutex); |
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1498 | { | 1509 | { |
1499 | int found, n, err; | 1510 | int found, n, err; |
1500 | struct ubifs_znode *znode; | 1511 | struct ubifs_znode *znode; |
1501 | struct ubifs_zbranch zbr; | ||
1502 | 1512 | ||
1503 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); | 1513 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); |
1504 | mutex_lock(&c->tnc_mutex); | 1514 | mutex_lock(&c->tnc_mutex); |
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1522 | goto out_unlock; | 1532 | goto out_unlock; |
1523 | } | 1533 | } |
1524 | 1534 | ||
1525 | zbr = znode->zbranch[n]; | 1535 | err = tnc_read_node_nm(c, &znode->zbranch[n], node); |
1526 | mutex_unlock(&c->tnc_mutex); | ||
1527 | |||
1528 | err = tnc_read_node_nm(c, &zbr, node); | ||
1529 | return err; | ||
1530 | 1536 | ||
1531 | out_unlock: | 1537 | out_unlock: |
1532 | mutex_unlock(&c->tnc_mutex); | 1538 | mutex_unlock(&c->tnc_mutex); |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index bd2121f3426e..a9ecbd9af20d 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
@@ -87,7 +87,7 @@ | |||
87 | #define UBIFS_SK_LEN 8 | 87 | #define UBIFS_SK_LEN 8 |
88 | 88 | ||
89 | /* Minimum index tree fanout */ | 89 | /* Minimum index tree fanout */ |
90 | #define UBIFS_MIN_FANOUT 2 | 90 | #define UBIFS_MIN_FANOUT 3 |
91 | 91 | ||
92 | /* Maximum number of levels in UBIFS indexing B-tree */ | 92 | /* Maximum number of levels in UBIFS indexing B-tree */ |
93 | #define UBIFS_MAX_LEVELS 512 | 93 | #define UBIFS_MAX_LEVELS 512 |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index d7f706f7a302..17c620b93eec 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -995,6 +995,9 @@ struct ubifs_mount_opts { | |||
995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary | 995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary |
996 | * @max_inode_sz: maximum possible inode size in bytes | 996 | * @max_inode_sz: maximum possible inode size in bytes |
997 | * @max_znode_sz: size of znode in bytes | 997 | * @max_znode_sz: size of znode in bytes |
998 | * | ||
999 | * @leb_overhead: how many bytes are wasted in an LEB when it is filled with | ||
1000 | * data nodes of maximum size - used in free space reporting | ||
998 | * @dead_wm: LEB dead space watermark | 1001 | * @dead_wm: LEB dead space watermark |
999 | * @dark_wm: LEB dark space watermark | 1002 | * @dark_wm: LEB dark space watermark |
1000 | * @block_cnt: count of 4KiB blocks on the FS | 1003 | * @block_cnt: count of 4KiB blocks on the FS |
@@ -1028,6 +1031,8 @@ struct ubifs_mount_opts { | |||
1028 | * @sbuf: a buffer of LEB size used by GC and replay for scanning | 1031 | * @sbuf: a buffer of LEB size used by GC and replay for scanning |
1029 | * @idx_gc: list of index LEBs that have been garbage collected | 1032 | * @idx_gc: list of index LEBs that have been garbage collected |
1030 | * @idx_gc_cnt: number of elements on the idx_gc list | 1033 | * @idx_gc_cnt: number of elements on the idx_gc list |
1034 | * @gc_seq: incremented for every non-index LEB garbage collected | ||
1035 | * @gced_lnum: last non-index LEB that was garbage collected | ||
1031 | * | 1036 | * |
1032 | * @infos_list: links all 'ubifs_info' objects | 1037 | * @infos_list: links all 'ubifs_info' objects |
1033 | * @umount_mutex: serializes shrinker and un-mount | 1038 | * @umount_mutex: serializes shrinker and un-mount |
@@ -1224,6 +1229,8 @@ struct ubifs_info { | |||
1224 | int max_idx_node_sz; | 1229 | int max_idx_node_sz; |
1225 | long long max_inode_sz; | 1230 | long long max_inode_sz; |
1226 | int max_znode_sz; | 1231 | int max_znode_sz; |
1232 | |||
1233 | int leb_overhead; | ||
1227 | int dead_wm; | 1234 | int dead_wm; |
1228 | int dark_wm; | 1235 | int dark_wm; |
1229 | int block_cnt; | 1236 | int block_cnt; |
@@ -1257,6 +1264,8 @@ struct ubifs_info { | |||
1257 | void *sbuf; | 1264 | void *sbuf; |
1258 | struct list_head idx_gc; | 1265 | struct list_head idx_gc; |
1259 | int idx_gc_cnt; | 1266 | int idx_gc_cnt; |
1267 | volatile int gc_seq; | ||
1268 | volatile int gced_lnum; | ||
1260 | 1269 | ||
1261 | struct list_head infos_list; | 1270 | struct list_head infos_list; |
1262 | struct mutex umount_mutex; | 1271 | struct mutex umount_mutex; |
@@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, | |||
1434 | struct ubifs_budget_req *req); | 1443 | struct ubifs_budget_req *req); |
1435 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, | 1444 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, |
1436 | struct ubifs_budget_req *req); | 1445 | struct ubifs_budget_req *req); |
1437 | long long ubifs_budg_get_free_space(struct ubifs_info *c); | 1446 | long long ubifs_get_free_space(struct ubifs_info *c); |
1438 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); | 1447 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); |
1439 | void ubifs_convert_page_budget(struct ubifs_info *c); | 1448 | void ubifs_convert_page_budget(struct ubifs_info *c); |
1449 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); | ||
1440 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); | 1450 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); |
1441 | 1451 | ||
1442 | /* find.c */ | 1452 | /* find.c */ |
@@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); | |||
1451 | /* tnc.c */ | 1461 | /* tnc.c */ |
1452 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | 1462 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, |
1453 | struct ubifs_znode **zn, int *n); | 1463 | struct ubifs_znode **zn, int *n); |
1454 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | ||
1455 | void *node); | ||
1456 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | 1464 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, |
1457 | void *node, const struct qstr *nm); | 1465 | void *node, const struct qstr *nm); |
1458 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1466 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
diff --git a/fs/udf/file.c b/fs/udf/file.c index 0ed6e146a0d9..eb91f3b70320 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -211,6 +211,7 @@ const struct file_operations udf_file_operations = { | |||
211 | .release = udf_release_file, | 211 | .release = udf_release_file, |
212 | .fsync = udf_fsync_file, | 212 | .fsync = udf_fsync_file, |
213 | .splice_read = generic_file_splice_read, | 213 | .splice_read = generic_file_splice_read, |
214 | .llseek = generic_file_llseek, | ||
214 | }; | 215 | }; |
215 | 216 | ||
216 | const struct inode_operations udf_file_inode_operations = { | 217 | const struct inode_operations udf_file_inode_operations = { |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index eb9cfa23dc3d..a4f2b3ce45b0 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -76,11 +76,24 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
76 | *err = -ENOSPC; | 76 | *err = -ENOSPC; |
77 | 77 | ||
78 | iinfo = UDF_I(inode); | 78 | iinfo = UDF_I(inode); |
79 | iinfo->i_unique = 0; | 79 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { |
80 | iinfo->i_lenExtents = 0; | 80 | iinfo->i_efe = 1; |
81 | iinfo->i_next_alloc_block = 0; | 81 | if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) |
82 | iinfo->i_next_alloc_goal = 0; | 82 | sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; |
83 | iinfo->i_strat4096 = 0; | 83 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - |
84 | sizeof(struct extendedFileEntry), | ||
85 | GFP_KERNEL); | ||
86 | } else { | ||
87 | iinfo->i_efe = 0; | ||
88 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
89 | sizeof(struct fileEntry), | ||
90 | GFP_KERNEL); | ||
91 | } | ||
92 | if (!iinfo->i_ext.i_data) { | ||
93 | iput(inode); | ||
94 | *err = -ENOMEM; | ||
95 | return NULL; | ||
96 | } | ||
84 | 97 | ||
85 | block = udf_new_block(dir->i_sb, NULL, | 98 | block = udf_new_block(dir->i_sb, NULL, |
86 | dinfo->i_location.partitionReferenceNum, | 99 | dinfo->i_location.partitionReferenceNum, |
@@ -111,6 +124,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
111 | lvhd->uniqueID = cpu_to_le64(uniqueID); | 124 | lvhd->uniqueID = cpu_to_le64(uniqueID); |
112 | mark_buffer_dirty(sbi->s_lvid_bh); | 125 | mark_buffer_dirty(sbi->s_lvid_bh); |
113 | } | 126 | } |
127 | mutex_unlock(&sbi->s_alloc_mutex); | ||
114 | inode->i_mode = mode; | 128 | inode->i_mode = mode; |
115 | inode->i_uid = current->fsuid; | 129 | inode->i_uid = current->fsuid; |
116 | if (dir->i_mode & S_ISGID) { | 130 | if (dir->i_mode & S_ISGID) { |
@@ -129,25 +143,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
129 | iinfo->i_lenEAttr = 0; | 143 | iinfo->i_lenEAttr = 0; |
130 | iinfo->i_lenAlloc = 0; | 144 | iinfo->i_lenAlloc = 0; |
131 | iinfo->i_use = 0; | 145 | iinfo->i_use = 0; |
132 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { | ||
133 | iinfo->i_efe = 1; | ||
134 | if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) | ||
135 | sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; | ||
136 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
137 | sizeof(struct extendedFileEntry), | ||
138 | GFP_KERNEL); | ||
139 | } else { | ||
140 | iinfo->i_efe = 0; | ||
141 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
142 | sizeof(struct fileEntry), | ||
143 | GFP_KERNEL); | ||
144 | } | ||
145 | if (!iinfo->i_ext.i_data) { | ||
146 | iput(inode); | ||
147 | *err = -ENOMEM; | ||
148 | mutex_unlock(&sbi->s_alloc_mutex); | ||
149 | return NULL; | ||
150 | } | ||
151 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) | 146 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) |
152 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; | 147 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; |
153 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) | 148 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) |
@@ -158,7 +153,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
158 | iinfo->i_crtime = current_fs_time(inode->i_sb); | 153 | iinfo->i_crtime = current_fs_time(inode->i_sb); |
159 | insert_inode_hash(inode); | 154 | insert_inode_hash(inode); |
160 | mark_inode_dirty(inode); | 155 | mark_inode_dirty(inode); |
161 | mutex_unlock(&sbi->s_alloc_mutex); | ||
162 | 156 | ||
163 | if (DQUOT_ALLOC_INODE(inode)) { | 157 | if (DQUOT_ALLOC_INODE(inode)) { |
164 | DQUOT_DROP(inode); | 158 | DQUOT_DROP(inode); |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index f42f80a3b1fa..a44d68eb50b5 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -1338,6 +1338,10 @@ __xfs_get_blocks( | |||
1338 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1338 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
1339 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1339 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
1340 | size = bh_result->b_size; | 1340 | size = bh_result->b_size; |
1341 | |||
1342 | if (!create && direct && offset >= i_size_read(inode)) | ||
1343 | return 0; | ||
1344 | |||
1341 | error = xfs_iomap(XFS_I(inode), offset, size, | 1345 | error = xfs_iomap(XFS_I(inode), offset, size, |
1342 | create ? flags : BMAPI_READ, &iomap, &niomap); | 1346 | create ? flags : BMAPI_READ, &iomap, &niomap); |
1343 | if (error) | 1347 | if (error) |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 986061ae1b9b..36d5fcd3f593 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -1001,12 +1001,13 @@ xfs_buf_iodone_work( | |||
1001 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the | 1001 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the |
1002 | * ordered flag and reissue them. Because we can't tell the higher | 1002 | * ordered flag and reissue them. Because we can't tell the higher |
1003 | * layers directly that they should not issue ordered I/O anymore, they | 1003 | * layers directly that they should not issue ordered I/O anymore, they |
1004 | * need to check if the ordered flag was cleared during I/O completion. | 1004 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. |
1005 | */ | 1005 | */ |
1006 | if ((bp->b_error == EOPNOTSUPP) && | 1006 | if ((bp->b_error == EOPNOTSUPP) && |
1007 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { | 1007 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { |
1008 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); | 1008 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); |
1009 | bp->b_flags &= ~XBF_ORDERED; | 1009 | bp->b_flags &= ~XBF_ORDERED; |
1010 | bp->b_flags |= _XFS_BARRIER_FAILED; | ||
1010 | xfs_buf_iorequest(bp); | 1011 | xfs_buf_iorequest(bp); |
1011 | } else if (bp->b_iodone) | 1012 | } else if (bp->b_iodone) |
1012 | (*(bp->b_iodone))(bp); | 1013 | (*(bp->b_iodone))(bp); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index fe0109956656..456519a088c7 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -85,6 +85,14 @@ typedef enum { | |||
85 | * modifications being lost. | 85 | * modifications being lost. |
86 | */ | 86 | */ |
87 | _XBF_PAGE_LOCKED = (1 << 22), | 87 | _XBF_PAGE_LOCKED = (1 << 22), |
88 | |||
89 | /* | ||
90 | * If we try a barrier write, but it fails we have to communicate | ||
91 | * this to the upper layers. Unfortunately b_error gets overwritten | ||
92 | * when the buffer is re-issued so we have to add another flag to | ||
93 | * keep this information. | ||
94 | */ | ||
95 | _XFS_BARRIER_FAILED = (1 << 23), | ||
88 | } xfs_buf_flags_t; | 96 | } xfs_buf_flags_t; |
89 | 97 | ||
90 | typedef enum { | 98 | typedef enum { |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 73c65f19e549..18d3c8487835 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1302,9 +1302,29 @@ xfs_fs_remount( | |||
1302 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1302 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
1303 | break; | 1303 | break; |
1304 | default: | 1304 | default: |
1305 | /* | ||
1306 | * Logically we would return an error here to prevent | ||
1307 | * users from believing they might have changed | ||
1308 | * mount options using remount which can't be changed. | ||
1309 | * | ||
1310 | * But unfortunately mount(8) adds all options from | ||
1311 | * mtab and fstab to the mount arguments in some cases | ||
1312 | * so we can't blindly reject options, but have to | ||
1313 | * check for each specified option if it actually | ||
1314 | * differs from the currently set option and only | ||
1315 | * reject it if that's the case. | ||
1316 | * | ||
1317 | * Until that is implemented we return success for | ||
1318 | * every remount request, and silently ignore all | ||
1319 | * options that we can't actually change. | ||
1320 | */ | ||
1321 | #if 0 | ||
1305 | printk(KERN_INFO | 1322 | printk(KERN_INFO |
1306 | "XFS: mount option \"%s\" not supported for remount\n", p); | 1323 | "XFS: mount option \"%s\" not supported for remount\n", p); |
1307 | return -EINVAL; | 1324 | return -EINVAL; |
1325 | #else | ||
1326 | return 0; | ||
1327 | #endif | ||
1308 | } | 1328 | } |
1309 | } | 1329 | } |
1310 | 1330 | ||
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 608c30c3f76b..002fc2617c8e 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -732,6 +732,7 @@ xfs_buf_item_init( | |||
732 | bip->bli_item.li_ops = &xfs_buf_item_ops; | 732 | bip->bli_item.li_ops = &xfs_buf_item_ops; |
733 | bip->bli_item.li_mountp = mp; | 733 | bip->bli_item.li_mountp = mp; |
734 | bip->bli_buf = bp; | 734 | bip->bli_buf = bp; |
735 | xfs_buf_hold(bp); | ||
735 | bip->bli_format.blf_type = XFS_LI_BUF; | 736 | bip->bli_format.blf_type = XFS_LI_BUF; |
736 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); | 737 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); |
737 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); | 738 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); |
@@ -867,6 +868,21 @@ xfs_buf_item_dirty( | |||
867 | return (bip->bli_flags & XFS_BLI_DIRTY); | 868 | return (bip->bli_flags & XFS_BLI_DIRTY); |
868 | } | 869 | } |
869 | 870 | ||
871 | STATIC void | ||
872 | xfs_buf_item_free( | ||
873 | xfs_buf_log_item_t *bip) | ||
874 | { | ||
875 | #ifdef XFS_TRANS_DEBUG | ||
876 | kmem_free(bip->bli_orig); | ||
877 | kmem_free(bip->bli_logged); | ||
878 | #endif /* XFS_TRANS_DEBUG */ | ||
879 | |||
880 | #ifdef XFS_BLI_TRACE | ||
881 | ktrace_free(bip->bli_trace); | ||
882 | #endif | ||
883 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
884 | } | ||
885 | |||
870 | /* | 886 | /* |
871 | * This is called when the buf log item is no longer needed. It should | 887 | * This is called when the buf log item is no longer needed. It should |
872 | * free the buf log item associated with the given buffer and clear | 888 | * free the buf log item associated with the given buffer and clear |
@@ -887,18 +903,8 @@ xfs_buf_item_relse( | |||
887 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { | 903 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { |
888 | XFS_BUF_CLR_IODONE_FUNC(bp); | 904 | XFS_BUF_CLR_IODONE_FUNC(bp); |
889 | } | 905 | } |
890 | 906 | xfs_buf_rele(bp); | |
891 | #ifdef XFS_TRANS_DEBUG | 907 | xfs_buf_item_free(bip); |
892 | kmem_free(bip->bli_orig); | ||
893 | bip->bli_orig = NULL; | ||
894 | kmem_free(bip->bli_logged); | ||
895 | bip->bli_logged = NULL; | ||
896 | #endif /* XFS_TRANS_DEBUG */ | ||
897 | |||
898 | #ifdef XFS_BLI_TRACE | ||
899 | ktrace_free(bip->bli_trace); | ||
900 | #endif | ||
901 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
902 | } | 908 | } |
903 | 909 | ||
904 | 910 | ||
@@ -1120,6 +1126,7 @@ xfs_buf_iodone( | |||
1120 | 1126 | ||
1121 | ASSERT(bip->bli_buf == bp); | 1127 | ASSERT(bip->bli_buf == bp); |
1122 | 1128 | ||
1129 | xfs_buf_rele(bp); | ||
1123 | mp = bip->bli_item.li_mountp; | 1130 | mp = bip->bli_item.li_mountp; |
1124 | 1131 | ||
1125 | /* | 1132 | /* |
@@ -1136,18 +1143,7 @@ xfs_buf_iodone( | |||
1136 | * xfs_trans_delete_ail() drops the AIL lock. | 1143 | * xfs_trans_delete_ail() drops the AIL lock. |
1137 | */ | 1144 | */ |
1138 | xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); | 1145 | xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); |
1139 | 1146 | xfs_buf_item_free(bip); | |
1140 | #ifdef XFS_TRANS_DEBUG | ||
1141 | kmem_free(bip->bli_orig); | ||
1142 | bip->bli_orig = NULL; | ||
1143 | kmem_free(bip->bli_logged); | ||
1144 | bip->bli_logged = NULL; | ||
1145 | #endif /* XFS_TRANS_DEBUG */ | ||
1146 | |||
1147 | #ifdef XFS_BLI_TRACE | ||
1148 | ktrace_free(bip->bli_trace); | ||
1149 | #endif | ||
1150 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
1151 | } | 1147 | } |
1152 | 1148 | ||
1153 | #if defined(XFS_BLI_TRACE) | 1149 | #if defined(XFS_BLI_TRACE) |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 760f4c5b5160..75b0cd4da0ea 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -149,7 +149,14 @@ xfs_swap_extents( | |||
149 | 149 | ||
150 | sbp = &sxp->sx_stat; | 150 | sbp = &sxp->sx_stat; |
151 | 151 | ||
152 | xfs_lock_two_inodes(ip, tip, lock_flags); | 152 | /* |
153 | * we have to do two separate lock calls here to keep lockdep | ||
154 | * happy. If we try to get all the locks in one call, lock will | ||
155 | * report false positives when we drop the ILOCK and regain them | ||
156 | * below. | ||
157 | */ | ||
158 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); | ||
159 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | ||
153 | locked = 1; | 160 | locked = 1; |
154 | 161 | ||
155 | /* Verify that both files have the same format */ | 162 | /* Verify that both files have the same format */ |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 00e80df9dd9d..dbd9cef852ec 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -4118,7 +4118,7 @@ xfs_iext_indirect_to_direct( | |||
4118 | ASSERT(nextents <= XFS_LINEAR_EXTS); | 4118 | ASSERT(nextents <= XFS_LINEAR_EXTS); |
4119 | size = nextents * sizeof(xfs_bmbt_rec_t); | 4119 | size = nextents * sizeof(xfs_bmbt_rec_t); |
4120 | 4120 | ||
4121 | xfs_iext_irec_compact_full(ifp); | 4121 | xfs_iext_irec_compact_pages(ifp); |
4122 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); | 4122 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); |
4123 | 4123 | ||
4124 | ep = ifp->if_u1.if_ext_irec->er_extbuf; | 4124 | ep = ifp->if_u1.if_ext_irec->er_extbuf; |
@@ -4449,8 +4449,7 @@ xfs_iext_irec_remove( | |||
4449 | * compaction policy is as follows: | 4449 | * compaction policy is as follows: |
4450 | * | 4450 | * |
4451 | * Full Compaction: Extents fit into a single page (or inline buffer) | 4451 | * Full Compaction: Extents fit into a single page (or inline buffer) |
4452 | * Full Compaction: Extents occupy less than 10% of allocated space | 4452 | * Partial Compaction: Extents occupy less than 50% of allocated space |
4453 | * Partial Compaction: Extents occupy > 10% and < 50% of allocated space | ||
4454 | * No Compaction: Extents occupy at least 50% of allocated space | 4453 | * No Compaction: Extents occupy at least 50% of allocated space |
4455 | */ | 4454 | */ |
4456 | void | 4455 | void |
@@ -4471,8 +4470,6 @@ xfs_iext_irec_compact( | |||
4471 | xfs_iext_direct_to_inline(ifp, nextents); | 4470 | xfs_iext_direct_to_inline(ifp, nextents); |
4472 | } else if (nextents <= XFS_LINEAR_EXTS) { | 4471 | } else if (nextents <= XFS_LINEAR_EXTS) { |
4473 | xfs_iext_indirect_to_direct(ifp); | 4472 | xfs_iext_indirect_to_direct(ifp); |
4474 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) { | ||
4475 | xfs_iext_irec_compact_full(ifp); | ||
4476 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { | 4473 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { |
4477 | xfs_iext_irec_compact_pages(ifp); | 4474 | xfs_iext_irec_compact_pages(ifp); |
4478 | } | 4475 | } |
@@ -4496,7 +4493,7 @@ xfs_iext_irec_compact_pages( | |||
4496 | erp_next = erp + 1; | 4493 | erp_next = erp + 1; |
4497 | if (erp_next->er_extcount <= | 4494 | if (erp_next->er_extcount <= |
4498 | (XFS_LINEAR_EXTS - erp->er_extcount)) { | 4495 | (XFS_LINEAR_EXTS - erp->er_extcount)) { |
4499 | memmove(&erp->er_extbuf[erp->er_extcount], | 4496 | memcpy(&erp->er_extbuf[erp->er_extcount], |
4500 | erp_next->er_extbuf, erp_next->er_extcount * | 4497 | erp_next->er_extbuf, erp_next->er_extcount * |
4501 | sizeof(xfs_bmbt_rec_t)); | 4498 | sizeof(xfs_bmbt_rec_t)); |
4502 | erp->er_extcount += erp_next->er_extcount; | 4499 | erp->er_extcount += erp_next->er_extcount; |
@@ -4516,91 +4513,6 @@ xfs_iext_irec_compact_pages( | |||
4516 | } | 4513 | } |
4517 | 4514 | ||
4518 | /* | 4515 | /* |
4519 | * Fully compact the extent records managed by the indirection array. | ||
4520 | */ | ||
4521 | void | ||
4522 | xfs_iext_irec_compact_full( | ||
4523 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
4524 | { | ||
4525 | xfs_bmbt_rec_host_t *ep, *ep_next; /* extent record pointers */ | ||
4526 | xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */ | ||
4527 | int erp_idx = 0; /* extent irec index */ | ||
4528 | int ext_avail; /* empty entries in ex list */ | ||
4529 | int ext_diff; /* number of exts to add */ | ||
4530 | int nlists; /* number of irec's (ex lists) */ | ||
4531 | |||
4532 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
4533 | |||
4534 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
4535 | erp = ifp->if_u1.if_ext_irec; | ||
4536 | ep = &erp->er_extbuf[erp->er_extcount]; | ||
4537 | erp_next = erp + 1; | ||
4538 | ep_next = erp_next->er_extbuf; | ||
4539 | |||
4540 | while (erp_idx < nlists - 1) { | ||
4541 | /* | ||
4542 | * Check how many extent records are available in this irec. | ||
4543 | * If there is none skip the whole exercise. | ||
4544 | */ | ||
4545 | ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; | ||
4546 | if (ext_avail) { | ||
4547 | |||
4548 | /* | ||
4549 | * Copy over as many as possible extent records into | ||
4550 | * the previous page. | ||
4551 | */ | ||
4552 | ext_diff = MIN(ext_avail, erp_next->er_extcount); | ||
4553 | memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
4554 | erp->er_extcount += ext_diff; | ||
4555 | erp_next->er_extcount -= ext_diff; | ||
4556 | |||
4557 | /* | ||
4558 | * If the next irec is empty now we can simply | ||
4559 | * remove it. | ||
4560 | */ | ||
4561 | if (erp_next->er_extcount == 0) { | ||
4562 | /* | ||
4563 | * Free page before removing extent record | ||
4564 | * so er_extoffs don't get modified in | ||
4565 | * xfs_iext_irec_remove. | ||
4566 | */ | ||
4567 | kmem_free(erp_next->er_extbuf); | ||
4568 | erp_next->er_extbuf = NULL; | ||
4569 | xfs_iext_irec_remove(ifp, erp_idx + 1); | ||
4570 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
4571 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
4572 | |||
4573 | /* | ||
4574 | * If the next irec is not empty move up the content | ||
4575 | * that has not been copied to the previous page to | ||
4576 | * the beggining of this one. | ||
4577 | */ | ||
4578 | } else { | ||
4579 | memmove(erp_next->er_extbuf, &ep_next[ext_diff], | ||
4580 | erp_next->er_extcount * | ||
4581 | sizeof(xfs_bmbt_rec_t)); | ||
4582 | ep_next = erp_next->er_extbuf; | ||
4583 | memset(&ep_next[erp_next->er_extcount], 0, | ||
4584 | (XFS_LINEAR_EXTS - | ||
4585 | erp_next->er_extcount) * | ||
4586 | sizeof(xfs_bmbt_rec_t)); | ||
4587 | } | ||
4588 | } | ||
4589 | |||
4590 | if (erp->er_extcount == XFS_LINEAR_EXTS) { | ||
4591 | erp_idx++; | ||
4592 | if (erp_idx < nlists) | ||
4593 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
4594 | else | ||
4595 | break; | ||
4596 | } | ||
4597 | ep = &erp->er_extbuf[erp->er_extcount]; | ||
4598 | erp_next = erp + 1; | ||
4599 | ep_next = erp_next->er_extbuf; | ||
4600 | } | ||
4601 | } | ||
4602 | |||
4603 | /* | ||
4604 | * This is called to update the er_extoff field in the indirection | 4516 | * This is called to update the er_extoff field in the indirection |
4605 | * array when extents have been added or removed from one of the | 4517 | * array when extents have been added or removed from one of the |
4606 | * extent lists. erp_idx contains the irec index to begin updating | 4518 | * extent lists. erp_idx contains the irec index to begin updating |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ccba14eb9dbe..0b02c6443551 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, | |||
124 | STATIC int xlog_iclogs_empty(xlog_t *log); | 124 | STATIC int xlog_iclogs_empty(xlog_t *log); |
125 | 125 | ||
126 | #if defined(XFS_LOG_TRACE) | 126 | #if defined(XFS_LOG_TRACE) |
127 | |||
128 | #define XLOG_TRACE_LOGGRANT_SIZE 2048 | ||
129 | #define XLOG_TRACE_ICLOG_SIZE 256 | ||
130 | |||
131 | void | ||
132 | xlog_trace_loggrant_alloc(xlog_t *log) | ||
133 | { | ||
134 | log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS); | ||
135 | } | ||
136 | |||
137 | void | ||
138 | xlog_trace_loggrant_dealloc(xlog_t *log) | ||
139 | { | ||
140 | ktrace_free(log->l_grant_trace); | ||
141 | } | ||
142 | |||
127 | void | 143 | void |
128 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | 144 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) |
129 | { | 145 | { |
130 | unsigned long cnts; | 146 | unsigned long cnts; |
131 | 147 | ||
132 | if (!log->l_grant_trace) { | ||
133 | log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP); | ||
134 | if (!log->l_grant_trace) | ||
135 | return; | ||
136 | } | ||
137 | /* ticket counts are 1 byte each */ | 148 | /* ticket counts are 1 byte each */ |
138 | cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; | 149 | cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; |
139 | 150 | ||
@@ -157,10 +168,20 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | |||
157 | } | 168 | } |
158 | 169 | ||
159 | void | 170 | void |
171 | xlog_trace_iclog_alloc(xlog_in_core_t *iclog) | ||
172 | { | ||
173 | iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS); | ||
174 | } | ||
175 | |||
176 | void | ||
177 | xlog_trace_iclog_dealloc(xlog_in_core_t *iclog) | ||
178 | { | ||
179 | ktrace_free(iclog->ic_trace); | ||
180 | } | ||
181 | |||
182 | void | ||
160 | xlog_trace_iclog(xlog_in_core_t *iclog, uint state) | 183 | xlog_trace_iclog(xlog_in_core_t *iclog, uint state) |
161 | { | 184 | { |
162 | if (!iclog->ic_trace) | ||
163 | iclog->ic_trace = ktrace_alloc(256, KM_NOFS); | ||
164 | ktrace_enter(iclog->ic_trace, | 185 | ktrace_enter(iclog->ic_trace, |
165 | (void *)((unsigned long)state), | 186 | (void *)((unsigned long)state), |
166 | (void *)((unsigned long)current_pid()), | 187 | (void *)((unsigned long)current_pid()), |
@@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state) | |||
170 | (void *)NULL, (void *)NULL); | 191 | (void *)NULL, (void *)NULL); |
171 | } | 192 | } |
172 | #else | 193 | #else |
194 | |||
195 | #define xlog_trace_loggrant_alloc(log) | ||
196 | #define xlog_trace_loggrant_dealloc(log) | ||
173 | #define xlog_trace_loggrant(log,tic,string) | 197 | #define xlog_trace_loggrant(log,tic,string) |
198 | |||
199 | #define xlog_trace_iclog_alloc(iclog) | ||
200 | #define xlog_trace_iclog_dealloc(iclog) | ||
174 | #define xlog_trace_iclog(iclog,state) | 201 | #define xlog_trace_iclog(iclog,state) |
202 | |||
175 | #endif /* XFS_LOG_TRACE */ | 203 | #endif /* XFS_LOG_TRACE */ |
176 | 204 | ||
177 | 205 | ||
@@ -1005,11 +1033,12 @@ xlog_iodone(xfs_buf_t *bp) | |||
1005 | l = iclog->ic_log; | 1033 | l = iclog->ic_log; |
1006 | 1034 | ||
1007 | /* | 1035 | /* |
1008 | * If the ordered flag has been removed by a lower | 1036 | * If the _XFS_BARRIER_FAILED flag was set by a lower |
1009 | * layer, it means the underlyin device no longer supports | 1037 | * layer, it means the underlying device no longer supports |
1010 | * barrier I/O. Warn loudly and turn off barriers. | 1038 | * barrier I/O. Warn loudly and turn off barriers. |
1011 | */ | 1039 | */ |
1012 | if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { | 1040 | if (bp->b_flags & _XFS_BARRIER_FAILED) { |
1041 | bp->b_flags &= ~_XFS_BARRIER_FAILED; | ||
1013 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1042 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; |
1014 | xfs_fs_cmn_err(CE_WARN, l->l_mp, | 1043 | xfs_fs_cmn_err(CE_WARN, l->l_mp, |
1015 | "xlog_iodone: Barriers are no longer supported" | 1044 | "xlog_iodone: Barriers are no longer supported" |
@@ -1231,6 +1260,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1231 | spin_lock_init(&log->l_grant_lock); | 1260 | spin_lock_init(&log->l_grant_lock); |
1232 | sv_init(&log->l_flush_wait, 0, "flush_wait"); | 1261 | sv_init(&log->l_flush_wait, 0, "flush_wait"); |
1233 | 1262 | ||
1263 | xlog_trace_loggrant_alloc(log); | ||
1234 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1264 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
1235 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1265 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
1236 | 1266 | ||
@@ -1285,6 +1315,8 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1285 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); | 1315 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); |
1286 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); | 1316 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); |
1287 | 1317 | ||
1318 | xlog_trace_iclog_alloc(iclog); | ||
1319 | |||
1288 | iclogp = &iclog->ic_next; | 1320 | iclogp = &iclog->ic_next; |
1289 | } | 1321 | } |
1290 | *iclogp = log->l_iclog; /* complete ring */ | 1322 | *iclogp = log->l_iclog; /* complete ring */ |
@@ -1565,11 +1597,7 @@ xlog_dealloc_log(xlog_t *log) | |||
1565 | sv_destroy(&iclog->ic_force_wait); | 1597 | sv_destroy(&iclog->ic_force_wait); |
1566 | sv_destroy(&iclog->ic_write_wait); | 1598 | sv_destroy(&iclog->ic_write_wait); |
1567 | xfs_buf_free(iclog->ic_bp); | 1599 | xfs_buf_free(iclog->ic_bp); |
1568 | #ifdef XFS_LOG_TRACE | 1600 | xlog_trace_iclog_dealloc(iclog); |
1569 | if (iclog->ic_trace != NULL) { | ||
1570 | ktrace_free(iclog->ic_trace); | ||
1571 | } | ||
1572 | #endif | ||
1573 | next_iclog = iclog->ic_next; | 1601 | next_iclog = iclog->ic_next; |
1574 | kmem_free(iclog); | 1602 | kmem_free(iclog); |
1575 | iclog = next_iclog; | 1603 | iclog = next_iclog; |
@@ -1578,14 +1606,7 @@ xlog_dealloc_log(xlog_t *log) | |||
1578 | spinlock_destroy(&log->l_grant_lock); | 1606 | spinlock_destroy(&log->l_grant_lock); |
1579 | 1607 | ||
1580 | xfs_buf_free(log->l_xbuf); | 1608 | xfs_buf_free(log->l_xbuf); |
1581 | #ifdef XFS_LOG_TRACE | 1609 | xlog_trace_loggrant_dealloc(log); |
1582 | if (log->l_trace != NULL) { | ||
1583 | ktrace_free(log->l_trace); | ||
1584 | } | ||
1585 | if (log->l_grant_trace != NULL) { | ||
1586 | ktrace_free(log->l_grant_trace); | ||
1587 | } | ||
1588 | #endif | ||
1589 | log->l_mp->m_log = NULL; | 1610 | log->l_mp->m_log = NULL; |
1590 | kmem_free(log); | 1611 | kmem_free(log); |
1591 | } /* xlog_dealloc_log */ | 1612 | } /* xlog_dealloc_log */ |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index c8a5b22ee3e3..e7d8f84443fa 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -448,7 +448,6 @@ typedef struct log { | |||
448 | int l_grant_write_bytes; | 448 | int l_grant_write_bytes; |
449 | 449 | ||
450 | #ifdef XFS_LOG_TRACE | 450 | #ifdef XFS_LOG_TRACE |
451 | struct ktrace *l_trace; | ||
452 | struct ktrace *l_grant_trace; | 451 | struct ktrace *l_grant_trace; |
453 | #endif | 452 | #endif |
454 | 453 | ||
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index aa238c8fbd7a..8b6812f66a15 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -1838,6 +1838,12 @@ again: | |||
1838 | #endif | 1838 | #endif |
1839 | } | 1839 | } |
1840 | 1840 | ||
1841 | /* | ||
1842 | * xfs_lock_two_inodes() can only be used to lock one type of lock | ||
1843 | * at a time - the iolock or the ilock, but not both at once. If | ||
1844 | * we lock both at once, lockdep will report false positives saying | ||
1845 | * we have violated locking orders. | ||
1846 | */ | ||
1841 | void | 1847 | void |
1842 | xfs_lock_two_inodes( | 1848 | xfs_lock_two_inodes( |
1843 | xfs_inode_t *ip0, | 1849 | xfs_inode_t *ip0, |
@@ -1848,6 +1854,8 @@ xfs_lock_two_inodes( | |||
1848 | int attempts = 0; | 1854 | int attempts = 0; |
1849 | xfs_log_item_t *lp; | 1855 | xfs_log_item_t *lp; |
1850 | 1856 | ||
1857 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) | ||
1858 | ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); | ||
1851 | ASSERT(ip0->i_ino != ip1->i_ino); | 1859 | ASSERT(ip0->i_ino != ip1->i_ino); |
1852 | 1860 | ||
1853 | if (ip0->i_ino > ip1->i_ino) { | 1861 | if (ip0->i_ino > ip1->i_ino) { |
@@ -3152,6 +3160,13 @@ error1: /* Just cancel transaction */ | |||
3152 | /* | 3160 | /* |
3153 | * Zero file bytes between startoff and endoff inclusive. | 3161 | * Zero file bytes between startoff and endoff inclusive. |
3154 | * The iolock is held exclusive and no blocks are buffered. | 3162 | * The iolock is held exclusive and no blocks are buffered. |
3163 | * | ||
3164 | * This function is used by xfs_free_file_space() to zero | ||
3165 | * partial blocks when the range to free is not block aligned. | ||
3166 | * When unreserving space with boundaries that are not block | ||
3167 | * aligned we round up the start and round down the end | ||
3168 | * boundaries and then use this function to zero the parts of | ||
3169 | * the blocks that got dropped during the rounding. | ||
3155 | */ | 3170 | */ |
3156 | STATIC int | 3171 | STATIC int |
3157 | xfs_zero_remaining_bytes( | 3172 | xfs_zero_remaining_bytes( |
@@ -3168,6 +3183,17 @@ xfs_zero_remaining_bytes( | |||
3168 | int nimap; | 3183 | int nimap; |
3169 | int error = 0; | 3184 | int error = 0; |
3170 | 3185 | ||
3186 | /* | ||
3187 | * Avoid doing I/O beyond eof - it's not necessary | ||
3188 | * since nothing can read beyond eof. The space will | ||
3189 | * be zeroed when the file is extended anyway. | ||
3190 | */ | ||
3191 | if (startoff >= ip->i_size) | ||
3192 | return 0; | ||
3193 | |||
3194 | if (endoff > ip->i_size) | ||
3195 | endoff = ip->i_size; | ||
3196 | |||
3171 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, | 3197 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, |
3172 | XFS_IS_REALTIME_INODE(ip) ? | 3198 | XFS_IS_REALTIME_INODE(ip) ? |
3173 | mp->m_rtdev_targp : mp->m_ddev_targp); | 3199 | mp->m_rtdev_targp : mp->m_ddev_targp); |