aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c113
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/inode.c136
-rw-r--r--fs/cifs/cifsproto.h7
-rw-r--r--fs/cifs/cifssmb.c6
-rw-r--r--fs/cifs/dir.c11
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c26
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/ext4_jbd2.c9
-rw-r--r--fs/ext4/extents.c45
-rw-r--r--fs/ext4/inode.c12
-rw-r--r--fs/ext4/mballoc.c17
-rw-r--r--fs/ext4/super.c21
-rw-r--r--fs/gfs2/aops.c30
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/gfs2/glops.c10
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/meta_io.c5
-rw-r--r--fs/gfs2/ops_fstype.c12
-rw-r--r--fs/jbd2/journal.c18
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/transaction.c16
-rw-r--r--fs/pstore/platform.c7
-rw-r--r--fs/sysfs/file.c8
-rw-r--r--fs/xfs/xfs_attr_remote.c2
-rw-r--r--fs/xfs/xfs_bmap.c32
-rw-r--r--fs/xfs/xfs_bmap_util.c15
-rw-r--r--fs/xfs/xfs_buf.c37
-rw-r--r--fs/xfs/xfs_buf.h11
-rw-r--r--fs/xfs/xfs_buf_item.c21
-rw-r--r--fs/xfs/xfs_dir2_node.c26
-rw-r--r--fs/xfs/xfs_iops.c3
-rw-r--r--fs/xfs/xfs_log_recover.c13
-rw-r--r--fs/xfs/xfs_qm.c80
-rw-r--r--fs/xfs/xfs_trans_buf.c13
38 files changed, 490 insertions, 309 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 6efb7f6cb22e..062a5f6a1448 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -244,9 +244,14 @@ static void aio_free_ring(struct kioctx *ctx)
244 int i; 244 int i;
245 245
246 for (i = 0; i < ctx->nr_pages; i++) { 246 for (i = 0; i < ctx->nr_pages; i++) {
247 struct page *page;
247 pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, 248 pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
248 page_count(ctx->ring_pages[i])); 249 page_count(ctx->ring_pages[i]));
249 put_page(ctx->ring_pages[i]); 250 page = ctx->ring_pages[i];
251 if (!page)
252 continue;
253 ctx->ring_pages[i] = NULL;
254 put_page(page);
250 } 255 }
251 256
252 put_aio_ring_file(ctx); 257 put_aio_ring_file(ctx);
@@ -280,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
280 unsigned long flags; 285 unsigned long flags;
281 int rc; 286 int rc;
282 287
288 rc = 0;
289
290 /* Make sure the old page hasn't already been changed */
291 spin_lock(&mapping->private_lock);
292 ctx = mapping->private_data;
293 if (ctx) {
294 pgoff_t idx;
295 spin_lock_irqsave(&ctx->completion_lock, flags);
296 idx = old->index;
297 if (idx < (pgoff_t)ctx->nr_pages) {
298 if (ctx->ring_pages[idx] != old)
299 rc = -EAGAIN;
300 } else
301 rc = -EINVAL;
302 spin_unlock_irqrestore(&ctx->completion_lock, flags);
303 } else
304 rc = -EINVAL;
305 spin_unlock(&mapping->private_lock);
306
307 if (rc != 0)
308 return rc;
309
283 /* Writeback must be complete */ 310 /* Writeback must be complete */
284 BUG_ON(PageWriteback(old)); 311 BUG_ON(PageWriteback(old));
285 put_page(old); 312 get_page(new);
286 313
287 rc = migrate_page_move_mapping(mapping, new, old, NULL, mode); 314 rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
288 if (rc != MIGRATEPAGE_SUCCESS) { 315 if (rc != MIGRATEPAGE_SUCCESS) {
289 get_page(old); 316 put_page(new);
290 return rc; 317 return rc;
291 } 318 }
292 319
293 get_page(new);
294
295 /* We can potentially race against kioctx teardown here. Use the 320 /* We can potentially race against kioctx teardown here. Use the
296 * address_space's private data lock to protect the mapping's 321 * address_space's private data lock to protect the mapping's
297 * private_data. 322 * private_data.
@@ -303,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
303 spin_lock_irqsave(&ctx->completion_lock, flags); 328 spin_lock_irqsave(&ctx->completion_lock, flags);
304 migrate_page_copy(new, old); 329 migrate_page_copy(new, old);
305 idx = old->index; 330 idx = old->index;
306 if (idx < (pgoff_t)ctx->nr_pages) 331 if (idx < (pgoff_t)ctx->nr_pages) {
307 ctx->ring_pages[idx] = new; 332 /* And only do the move if things haven't changed */
333 if (ctx->ring_pages[idx] == old)
334 ctx->ring_pages[idx] = new;
335 else
336 rc = -EAGAIN;
337 } else
338 rc = -EINVAL;
308 spin_unlock_irqrestore(&ctx->completion_lock, flags); 339 spin_unlock_irqrestore(&ctx->completion_lock, flags);
309 } else 340 } else
310 rc = -EBUSY; 341 rc = -EBUSY;
311 spin_unlock(&mapping->private_lock); 342 spin_unlock(&mapping->private_lock);
312 343
344 if (rc == MIGRATEPAGE_SUCCESS)
345 put_page(old);
346 else
347 put_page(new);
348
313 return rc; 349 return rc;
314} 350}
315#endif 351#endif
@@ -326,7 +362,7 @@ static int aio_setup_ring(struct kioctx *ctx)
326 struct aio_ring *ring; 362 struct aio_ring *ring;
327 unsigned nr_events = ctx->max_reqs; 363 unsigned nr_events = ctx->max_reqs;
328 struct mm_struct *mm = current->mm; 364 struct mm_struct *mm = current->mm;
329 unsigned long size, populate; 365 unsigned long size, unused;
330 int nr_pages; 366 int nr_pages;
331 int i; 367 int i;
332 struct file *file; 368 struct file *file;
@@ -347,6 +383,20 @@ static int aio_setup_ring(struct kioctx *ctx)
347 return -EAGAIN; 383 return -EAGAIN;
348 } 384 }
349 385
386 ctx->aio_ring_file = file;
387 nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
388 / sizeof(struct io_event);
389
390 ctx->ring_pages = ctx->internal_pages;
391 if (nr_pages > AIO_RING_PAGES) {
392 ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
393 GFP_KERNEL);
394 if (!ctx->ring_pages) {
395 put_aio_ring_file(ctx);
396 return -ENOMEM;
397 }
398 }
399
350 for (i = 0; i < nr_pages; i++) { 400 for (i = 0; i < nr_pages; i++) {
351 struct page *page; 401 struct page *page;
352 page = find_or_create_page(file->f_inode->i_mapping, 402 page = find_or_create_page(file->f_inode->i_mapping,
@@ -358,19 +408,14 @@ static int aio_setup_ring(struct kioctx *ctx)
358 SetPageUptodate(page); 408 SetPageUptodate(page);
359 SetPageDirty(page); 409 SetPageDirty(page);
360 unlock_page(page); 410 unlock_page(page);
411
412 ctx->ring_pages[i] = page;
361 } 413 }
362 ctx->aio_ring_file = file; 414 ctx->nr_pages = i;
363 nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
364 / sizeof(struct io_event);
365 415
366 ctx->ring_pages = ctx->internal_pages; 416 if (unlikely(i != nr_pages)) {
367 if (nr_pages > AIO_RING_PAGES) { 417 aio_free_ring(ctx);
368 ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), 418 return -EAGAIN;
369 GFP_KERNEL);
370 if (!ctx->ring_pages) {
371 put_aio_ring_file(ctx);
372 return -ENOMEM;
373 }
374 } 419 }
375 420
376 ctx->mmap_size = nr_pages * PAGE_SIZE; 421 ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -379,9 +424,9 @@ static int aio_setup_ring(struct kioctx *ctx)
379 down_write(&mm->mmap_sem); 424 down_write(&mm->mmap_sem);
380 ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, 425 ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
381 PROT_READ | PROT_WRITE, 426 PROT_READ | PROT_WRITE,
382 MAP_SHARED | MAP_POPULATE, 0, &populate); 427 MAP_SHARED, 0, &unused);
428 up_write(&mm->mmap_sem);
383 if (IS_ERR((void *)ctx->mmap_base)) { 429 if (IS_ERR((void *)ctx->mmap_base)) {
384 up_write(&mm->mmap_sem);
385 ctx->mmap_size = 0; 430 ctx->mmap_size = 0;
386 aio_free_ring(ctx); 431 aio_free_ring(ctx);
387 return -EAGAIN; 432 return -EAGAIN;
@@ -389,27 +434,6 @@ static int aio_setup_ring(struct kioctx *ctx)
389 434
390 pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); 435 pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
391 436
392 /* We must do this while still holding mmap_sem for write, as we
393 * need to be protected against userspace attempting to mremap()
394 * or munmap() the ring buffer.
395 */
396 ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
397 1, 0, ctx->ring_pages, NULL);
398
399 /* Dropping the reference here is safe as the page cache will hold
400 * onto the pages for us. It is also required so that page migration
401 * can unmap the pages and get the right reference count.
402 */
403 for (i = 0; i < ctx->nr_pages; i++)
404 put_page(ctx->ring_pages[i]);
405
406 up_write(&mm->mmap_sem);
407
408 if (unlikely(ctx->nr_pages != nr_pages)) {
409 aio_free_ring(ctx);
410 return -EAGAIN;
411 }
412
413 ctx->user_id = ctx->mmap_base; 437 ctx->user_id = ctx->mmap_base;
414 ctx->nr_events = nr_events; /* trusted copy */ 438 ctx->nr_events = nr_events; /* trusted copy */
415 439
@@ -652,7 +676,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
652 aio_nr += ctx->max_reqs; 676 aio_nr += ctx->max_reqs;
653 spin_unlock(&aio_nr_lock); 677 spin_unlock(&aio_nr_lock);
654 678
655 percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ 679 percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
680 percpu_ref_get(&ctx->reqs); /* free_ioctx_users() will drop this */
656 681
657 err = ioctx_add_table(ctx, mm); 682 err = ioctx_add_table(ctx, mm);
658 if (err) 683 if (err)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1e561c059539..ec3ba43b9faa 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -210,9 +210,13 @@ static int readpage_nounlock(struct file *filp, struct page *page)
210 if (err < 0) { 210 if (err < 0) {
211 SetPageError(page); 211 SetPageError(page);
212 goto out; 212 goto out;
213 } else if (err < PAGE_CACHE_SIZE) { 213 } else {
214 if (err < PAGE_CACHE_SIZE) {
214 /* zero fill remainder of page */ 215 /* zero fill remainder of page */
215 zero_user_segment(page, err, PAGE_CACHE_SIZE); 216 zero_user_segment(page, err, PAGE_CACHE_SIZE);
217 } else {
218 flush_dcache_page(page);
219 }
216 } 220 }
217 SetPageUptodate(page); 221 SetPageUptodate(page);
218 222
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9a8e396aed89..278fd2891288 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -978,7 +978,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
978 struct ceph_mds_reply_inode *ininfo; 978 struct ceph_mds_reply_inode *ininfo;
979 struct ceph_vino vino; 979 struct ceph_vino vino;
980 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 980 struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
981 int i = 0;
982 int err = 0; 981 int err = 0;
983 982
984 dout("fill_trace %p is_dentry %d is_target %d\n", req, 983 dout("fill_trace %p is_dentry %d is_target %d\n", req,
@@ -1039,6 +1038,29 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1039 } 1038 }
1040 } 1039 }
1041 1040
1041 if (rinfo->head->is_target) {
1042 vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
1043 vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
1044
1045 in = ceph_get_inode(sb, vino);
1046 if (IS_ERR(in)) {
1047 err = PTR_ERR(in);
1048 goto done;
1049 }
1050 req->r_target_inode = in;
1051
1052 err = fill_inode(in, &rinfo->targeti, NULL,
1053 session, req->r_request_started,
1054 (le32_to_cpu(rinfo->head->result) == 0) ?
1055 req->r_fmode : -1,
1056 &req->r_caps_reservation);
1057 if (err < 0) {
1058 pr_err("fill_inode badness %p %llx.%llx\n",
1059 in, ceph_vinop(in));
1060 goto done;
1061 }
1062 }
1063
1042 /* 1064 /*
1043 * ignore null lease/binding on snapdir ENOENT, or else we 1065 * ignore null lease/binding on snapdir ENOENT, or else we
1044 * will have trouble splicing in the virtual snapdir later 1066 * will have trouble splicing in the virtual snapdir later
@@ -1108,7 +1130,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1108 ceph_dentry(req->r_old_dentry)->offset); 1130 ceph_dentry(req->r_old_dentry)->offset);
1109 1131
1110 dn = req->r_old_dentry; /* use old_dentry */ 1132 dn = req->r_old_dentry; /* use old_dentry */
1111 in = dn->d_inode;
1112 } 1133 }
1113 1134
1114 /* null dentry? */ 1135 /* null dentry? */
@@ -1130,44 +1151,28 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1130 } 1151 }
1131 1152
1132 /* attach proper inode */ 1153 /* attach proper inode */
1133 ininfo = rinfo->targeti.in; 1154 if (!dn->d_inode) {
1134 vino.ino = le64_to_cpu(ininfo->ino); 1155 ihold(in);
1135 vino.snap = le64_to_cpu(ininfo->snapid);
1136 in = dn->d_inode;
1137 if (!in) {
1138 in = ceph_get_inode(sb, vino);
1139 if (IS_ERR(in)) {
1140 pr_err("fill_trace bad get_inode "
1141 "%llx.%llx\n", vino.ino, vino.snap);
1142 err = PTR_ERR(in);
1143 d_drop(dn);
1144 goto done;
1145 }
1146 dn = splice_dentry(dn, in, &have_lease, true); 1156 dn = splice_dentry(dn, in, &have_lease, true);
1147 if (IS_ERR(dn)) { 1157 if (IS_ERR(dn)) {
1148 err = PTR_ERR(dn); 1158 err = PTR_ERR(dn);
1149 goto done; 1159 goto done;
1150 } 1160 }
1151 req->r_dentry = dn; /* may have spliced */ 1161 req->r_dentry = dn; /* may have spliced */
1152 ihold(in); 1162 } else if (dn->d_inode && dn->d_inode != in) {
1153 } else if (ceph_ino(in) == vino.ino &&
1154 ceph_snap(in) == vino.snap) {
1155 ihold(in);
1156 } else {
1157 dout(" %p links to %p %llx.%llx, not %llx.%llx\n", 1163 dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
1158 dn, in, ceph_ino(in), ceph_snap(in), 1164 dn, dn->d_inode, ceph_vinop(dn->d_inode),
1159 vino.ino, vino.snap); 1165 ceph_vinop(in));
1160 have_lease = false; 1166 have_lease = false;
1161 in = NULL;
1162 } 1167 }
1163 1168
1164 if (have_lease) 1169 if (have_lease)
1165 update_dentry_lease(dn, rinfo->dlease, session, 1170 update_dentry_lease(dn, rinfo->dlease, session,
1166 req->r_request_started); 1171 req->r_request_started);
1167 dout(" final dn %p\n", dn); 1172 dout(" final dn %p\n", dn);
1168 i++; 1173 } else if (!req->r_aborted &&
1169 } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || 1174 (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
1170 req->r_op == CEPH_MDS_OP_MKSNAP) && !req->r_aborted) { 1175 req->r_op == CEPH_MDS_OP_MKSNAP)) {
1171 struct dentry *dn = req->r_dentry; 1176 struct dentry *dn = req->r_dentry;
1172 1177
1173 /* fill out a snapdir LOOKUPSNAP dentry */ 1178 /* fill out a snapdir LOOKUPSNAP dentry */
@@ -1177,52 +1182,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1177 ininfo = rinfo->targeti.in; 1182 ininfo = rinfo->targeti.in;
1178 vino.ino = le64_to_cpu(ininfo->ino); 1183 vino.ino = le64_to_cpu(ininfo->ino);
1179 vino.snap = le64_to_cpu(ininfo->snapid); 1184 vino.snap = le64_to_cpu(ininfo->snapid);
1180 in = ceph_get_inode(sb, vino);
1181 if (IS_ERR(in)) {
1182 pr_err("fill_inode get_inode badness %llx.%llx\n",
1183 vino.ino, vino.snap);
1184 err = PTR_ERR(in);
1185 d_delete(dn);
1186 goto done;
1187 }
1188 dout(" linking snapped dir %p to dn %p\n", in, dn); 1185 dout(" linking snapped dir %p to dn %p\n", in, dn);
1186 ihold(in);
1189 dn = splice_dentry(dn, in, NULL, true); 1187 dn = splice_dentry(dn, in, NULL, true);
1190 if (IS_ERR(dn)) { 1188 if (IS_ERR(dn)) {
1191 err = PTR_ERR(dn); 1189 err = PTR_ERR(dn);
1192 goto done; 1190 goto done;
1193 } 1191 }
1194 req->r_dentry = dn; /* may have spliced */ 1192 req->r_dentry = dn; /* may have spliced */
1195 ihold(in);
1196 rinfo->head->is_dentry = 1; /* fool notrace handlers */
1197 }
1198
1199 if (rinfo->head->is_target) {
1200 vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
1201 vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
1202
1203 if (in == NULL || ceph_ino(in) != vino.ino ||
1204 ceph_snap(in) != vino.snap) {
1205 in = ceph_get_inode(sb, vino);
1206 if (IS_ERR(in)) {
1207 err = PTR_ERR(in);
1208 goto done;
1209 }
1210 }
1211 req->r_target_inode = in;
1212
1213 err = fill_inode(in,
1214 &rinfo->targeti, NULL,
1215 session, req->r_request_started,
1216 (le32_to_cpu(rinfo->head->result) == 0) ?
1217 req->r_fmode : -1,
1218 &req->r_caps_reservation);
1219 if (err < 0) {
1220 pr_err("fill_inode badness %p %llx.%llx\n",
1221 in, ceph_vinop(in));
1222 goto done;
1223 }
1224 } 1193 }
1225
1226done: 1194done:
1227 dout("fill_trace done err=%d\n", err); 1195 dout("fill_trace done err=%d\n", err);
1228 return err; 1196 return err;
@@ -1272,7 +1240,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
1272 struct qstr dname; 1240 struct qstr dname;
1273 struct dentry *dn; 1241 struct dentry *dn;
1274 struct inode *in; 1242 struct inode *in;
1275 int err = 0, i; 1243 int err = 0, ret, i;
1276 struct inode *snapdir = NULL; 1244 struct inode *snapdir = NULL;
1277 struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; 1245 struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
1278 struct ceph_dentry_info *di; 1246 struct ceph_dentry_info *di;
@@ -1305,6 +1273,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
1305 ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir); 1273 ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir);
1306 } 1274 }
1307 1275
1276 /* FIXME: release caps/leases if error occurs */
1308 for (i = 0; i < rinfo->dir_nr; i++) { 1277 for (i = 0; i < rinfo->dir_nr; i++) {
1309 struct ceph_vino vino; 1278 struct ceph_vino vino;
1310 1279
@@ -1329,9 +1298,10 @@ retry_lookup:
1329 err = -ENOMEM; 1298 err = -ENOMEM;
1330 goto out; 1299 goto out;
1331 } 1300 }
1332 err = ceph_init_dentry(dn); 1301 ret = ceph_init_dentry(dn);
1333 if (err < 0) { 1302 if (ret < 0) {
1334 dput(dn); 1303 dput(dn);
1304 err = ret;
1335 goto out; 1305 goto out;
1336 } 1306 }
1337 } else if (dn->d_inode && 1307 } else if (dn->d_inode &&
@@ -1351,9 +1321,6 @@ retry_lookup:
1351 spin_unlock(&parent->d_lock); 1321 spin_unlock(&parent->d_lock);
1352 } 1322 }
1353 1323
1354 di = dn->d_fsdata;
1355 di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
1356
1357 /* inode */ 1324 /* inode */
1358 if (dn->d_inode) { 1325 if (dn->d_inode) {
1359 in = dn->d_inode; 1326 in = dn->d_inode;
@@ -1366,26 +1333,39 @@ retry_lookup:
1366 err = PTR_ERR(in); 1333 err = PTR_ERR(in);
1367 goto out; 1334 goto out;
1368 } 1335 }
1369 dn = splice_dentry(dn, in, NULL, false);
1370 if (IS_ERR(dn))
1371 dn = NULL;
1372 } 1336 }
1373 1337
1374 if (fill_inode(in, &rinfo->dir_in[i], NULL, session, 1338 if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
1375 req->r_request_started, -1, 1339 req->r_request_started, -1,
1376 &req->r_caps_reservation) < 0) { 1340 &req->r_caps_reservation) < 0) {
1377 pr_err("fill_inode badness on %p\n", in); 1341 pr_err("fill_inode badness on %p\n", in);
1342 if (!dn->d_inode)
1343 iput(in);
1344 d_drop(dn);
1378 goto next_item; 1345 goto next_item;
1379 } 1346 }
1380 if (dn) 1347
1381 update_dentry_lease(dn, rinfo->dir_dlease[i], 1348 if (!dn->d_inode) {
1382 req->r_session, 1349 dn = splice_dentry(dn, in, NULL, false);
1383 req->r_request_started); 1350 if (IS_ERR(dn)) {
1351 err = PTR_ERR(dn);
1352 dn = NULL;
1353 goto next_item;
1354 }
1355 }
1356
1357 di = dn->d_fsdata;
1358 di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
1359
1360 update_dentry_lease(dn, rinfo->dir_dlease[i],
1361 req->r_session,
1362 req->r_request_started);
1384next_item: 1363next_item:
1385 if (dn) 1364 if (dn)
1386 dput(dn); 1365 dput(dn);
1387 } 1366 }
1388 req->r_did_prepopulate = true; 1367 if (err == 0)
1368 req->r_did_prepopulate = true;
1389 1369
1390out: 1370out:
1391 if (snapdir) { 1371 if (snapdir) {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index aa3397620342..2c29db6a247e 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -477,9 +477,10 @@ extern int CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon,
477 const int netfid, __u64 *pExtAttrBits, __u64 *pMask); 477 const int netfid, __u64 *pExtAttrBits, __u64 *pMask);
478extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); 478extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb);
479extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr); 479extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr);
480extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr, 480extern int CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon,
481 const unsigned char *path, 481 struct cifs_sb_info *cifs_sb,
482 struct cifs_sb_info *cifs_sb, unsigned int xid); 482 struct cifs_fattr *fattr,
483 const unsigned char *path);
483extern int mdfour(unsigned char *, unsigned char *, int); 484extern int mdfour(unsigned char *, unsigned char *, int);
484extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, 485extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
485 const struct nls_table *codepage); 486 const struct nls_table *codepage);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 124aa0230c1b..d707edb6b852 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4010,7 +4010,7 @@ QFileInfoRetry:
4010 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4010 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4011 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4011 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4012 if (rc) { 4012 if (rc) {
4013 cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); 4013 cifs_dbg(FYI, "Send error in QFileInfo = %d", rc);
4014 } else { /* decode response */ 4014 } else { /* decode response */
4015 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4015 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4016 4016
@@ -4179,7 +4179,7 @@ UnixQFileInfoRetry:
4179 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4179 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4180 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4180 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4181 if (rc) { 4181 if (rc) {
4182 cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); 4182 cifs_dbg(FYI, "Send error in UnixQFileInfo = %d", rc);
4183 } else { /* decode response */ 4183 } else { /* decode response */
4184 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4184 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4185 4185
@@ -4263,7 +4263,7 @@ UnixQPathInfoRetry:
4263 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4263 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4264 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4264 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4265 if (rc) { 4265 if (rc) {
4266 cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); 4266 cifs_dbg(FYI, "Send error in UnixQPathInfo = %d", rc);
4267 } else { /* decode response */ 4267 } else { /* decode response */
4268 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4268 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4269 4269
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 11ff5f116b20..a514e0a65f69 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -193,7 +193,7 @@ check_name(struct dentry *direntry)
193static int 193static int
194cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, 194cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
195 struct tcon_link *tlink, unsigned oflags, umode_t mode, 195 struct tcon_link *tlink, unsigned oflags, umode_t mode,
196 __u32 *oplock, struct cifs_fid *fid, int *created) 196 __u32 *oplock, struct cifs_fid *fid)
197{ 197{
198 int rc = -ENOENT; 198 int rc = -ENOENT;
199 int create_options = CREATE_NOT_DIR; 199 int create_options = CREATE_NOT_DIR;
@@ -349,7 +349,6 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
349 .device = 0, 349 .device = 0,
350 }; 350 };
351 351
352 *created |= FILE_CREATED;
353 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 352 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
354 args.uid = current_fsuid(); 353 args.uid = current_fsuid();
355 if (inode->i_mode & S_ISGID) 354 if (inode->i_mode & S_ISGID)
@@ -480,13 +479,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
480 cifs_add_pending_open(&fid, tlink, &open); 479 cifs_add_pending_open(&fid, tlink, &open);
481 480
482 rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, 481 rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
483 &oplock, &fid, opened); 482 &oplock, &fid);
484 483
485 if (rc) { 484 if (rc) {
486 cifs_del_pending_open(&open); 485 cifs_del_pending_open(&open);
487 goto out; 486 goto out;
488 } 487 }
489 488
489 if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
490 *opened |= FILE_CREATED;
491
490 rc = finish_open(file, direntry, generic_file_open, opened); 492 rc = finish_open(file, direntry, generic_file_open, opened);
491 if (rc) { 493 if (rc) {
492 if (server->ops->close) 494 if (server->ops->close)
@@ -529,7 +531,6 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
529 struct TCP_Server_Info *server; 531 struct TCP_Server_Info *server;
530 struct cifs_fid fid; 532 struct cifs_fid fid;
531 __u32 oplock; 533 __u32 oplock;
532 int created = FILE_CREATED;
533 534
534 cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n", 535 cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n",
535 inode, direntry->d_name.name, direntry); 536 inode, direntry->d_name.name, direntry);
@@ -546,7 +547,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
546 server->ops->new_lease_key(&fid); 547 server->ops->new_lease_key(&fid);
547 548
548 rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, 549 rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
549 &oplock, &fid, &created); 550 &oplock, &fid);
550 if (!rc && server->ops->close) 551 if (!rc && server->ops->close)
551 server->ops->close(xid, tcon, &fid); 552 server->ops->close(xid, tcon, &fid);
552 553
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 36f9ebb93ceb..49719b8228e5 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -383,7 +383,8 @@ int cifs_get_inode_info_unix(struct inode **pinode,
383 383
384 /* check for Minshall+French symlinks */ 384 /* check for Minshall+French symlinks */
385 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { 385 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
386 int tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); 386 int tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr,
387 full_path);
387 if (tmprc) 388 if (tmprc)
388 cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); 389 cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc);
389 } 390 }
@@ -799,7 +800,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
799 800
800 /* check for Minshall+French symlinks */ 801 /* check for Minshall+French symlinks */
801 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { 802 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
802 tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); 803 tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr,
804 full_path);
803 if (tmprc) 805 if (tmprc)
804 cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); 806 cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc);
805 } 807 }
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index cc0234710ddb..92aee08483a5 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -354,34 +354,30 @@ open_query_close_cifs_symlink(const unsigned char *path, char *pbuf,
354 354
355 355
356int 356int
357CIFSCheckMFSymlink(struct cifs_fattr *fattr, 357CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon,
358 const unsigned char *path, 358 struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
359 struct cifs_sb_info *cifs_sb, unsigned int xid) 359 const unsigned char *path)
360{ 360{
361 int rc = 0; 361 int rc;
362 u8 *buf = NULL; 362 u8 *buf = NULL;
363 unsigned int link_len = 0; 363 unsigned int link_len = 0;
364 unsigned int bytes_read = 0; 364 unsigned int bytes_read = 0;
365 struct cifs_tcon *ptcon;
366 365
367 if (!CIFSCouldBeMFSymlink(fattr)) 366 if (!CIFSCouldBeMFSymlink(fattr))
368 /* it's not a symlink */ 367 /* it's not a symlink */
369 return 0; 368 return 0;
370 369
371 buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); 370 buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
372 if (!buf) { 371 if (!buf)
373 rc = -ENOMEM; 372 return -ENOMEM;
374 goto out;
375 }
376 373
377 ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb)); 374 if (tcon->ses->server->ops->query_mf_symlink)
378 if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink)) 375 rc = tcon->ses->server->ops->query_mf_symlink(path, buf,
379 rc = ptcon->ses->server->ops->query_mf_symlink(path, buf, 376 &bytes_read, cifs_sb, xid);
380 &bytes_read, cifs_sb, xid);
381 else 377 else
382 goto out; 378 rc = -ENOSYS;
383 379
384 if (rc != 0) 380 if (rc)
385 goto out; 381 goto out;
386 382
387 if (bytes_read == 0) /* not a symlink */ 383 if (bytes_read == 0) /* not a symlink */
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8b5e2584c840..af903128891c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1907,10 +1907,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1907 } 1907 }
1908 } 1908 }
1909 } 1909 }
1910 if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) {
1911 tep = tf.file->private_data;
1912 mutex_lock_nested(&tep->mtx, 1);
1913 }
1914 1910
1915 /* 1911 /*
1916 * Try to lookup the file inside our RB tree, Since we grabbed "mtx" 1912 * Try to lookup the file inside our RB tree, Since we grabbed "mtx"
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 288534920fe5..20d6697bd638 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
1493 sb->s_blocksize - offset : towrite; 1493 sb->s_blocksize - offset : towrite;
1494 1494
1495 tmp_bh.b_state = 0; 1495 tmp_bh.b_state = 0;
1496 tmp_bh.b_size = sb->s_blocksize;
1496 err = ext2_get_block(inode, blk, &tmp_bh, 1); 1497 err = ext2_get_block(inode, blk, &tmp_bh, 1);
1497 if (err < 0) 1498 if (err < 0)
1498 goto out; 1499 goto out;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e6185031c1cc..ece55565b9cd 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -268,6 +268,16 @@ struct ext4_io_submit {
268/* Translate # of blks to # of clusters */ 268/* Translate # of blks to # of clusters */
269#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ 269#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
270 (sbi)->s_cluster_bits) 270 (sbi)->s_cluster_bits)
271/* Mask out the low bits to get the starting block of the cluster */
272#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \
273 ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
274#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \
275 ~((ext4_lblk_t) (s)->s_cluster_ratio - 1))
276/* Get the cluster offset */
277#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \
278 ((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
279#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \
280 ((ext4_lblk_t) (s)->s_cluster_ratio - 1))
271 281
272/* 282/*
273 * Structure of a blocks group descriptor 283 * Structure of a blocks group descriptor
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 17ac112ab101..3fe29de832c8 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -259,6 +259,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
259 if (WARN_ON_ONCE(err)) { 259 if (WARN_ON_ONCE(err)) {
260 ext4_journal_abort_handle(where, line, __func__, bh, 260 ext4_journal_abort_handle(where, line, __func__, bh,
261 handle, err); 261 handle, err);
262 ext4_error_inode(inode, where, line,
263 bh->b_blocknr,
264 "journal_dirty_metadata failed: "
265 "handle type %u started at line %u, "
266 "credits %u/%u, errcode %d",
267 handle->h_type,
268 handle->h_line_no,
269 handle->h_requested_credits,
270 handle->h_buffer_credits, err);
262 } 271 }
263 } else { 272 } else {
264 if (inode) 273 if (inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 35f65cf4f318..3384dc4bed40 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
360{ 360{
361 ext4_fsblk_t block = ext4_ext_pblock(ext); 361 ext4_fsblk_t block = ext4_ext_pblock(ext);
362 int len = ext4_ext_get_actual_len(ext); 362 int len = ext4_ext_get_actual_len(ext);
363 ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
364 ext4_lblk_t last = lblock + len - 1;
363 365
364 if (len == 0) 366 if (lblock > last)
365 return 0; 367 return 0;
366 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 368 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
367} 369}
@@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode,
387 if (depth == 0) { 389 if (depth == 0) {
388 /* leaf entries */ 390 /* leaf entries */
389 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); 391 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
392 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
393 ext4_fsblk_t pblock = 0;
394 ext4_lblk_t lblock = 0;
395 ext4_lblk_t prev = 0;
396 int len = 0;
390 while (entries) { 397 while (entries) {
391 if (!ext4_valid_extent(inode, ext)) 398 if (!ext4_valid_extent(inode, ext))
392 return 0; 399 return 0;
400
401 /* Check for overlapping extents */
402 lblock = le32_to_cpu(ext->ee_block);
403 len = ext4_ext_get_actual_len(ext);
404 if ((lblock <= prev) && prev) {
405 pblock = ext4_ext_pblock(ext);
406 es->s_last_error_block = cpu_to_le64(pblock);
407 return 0;
408 }
393 ext++; 409 ext++;
394 entries--; 410 entries--;
411 prev = lblock + len - 1;
395 } 412 }
396 } else { 413 } else {
397 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); 414 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
@@ -1834,8 +1851,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1834 depth = ext_depth(inode); 1851 depth = ext_depth(inode);
1835 if (!path[depth].p_ext) 1852 if (!path[depth].p_ext)
1836 goto out; 1853 goto out;
1837 b2 = le32_to_cpu(path[depth].p_ext->ee_block); 1854 b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
1838 b2 &= ~(sbi->s_cluster_ratio - 1);
1839 1855
1840 /* 1856 /*
1841 * get the next allocated block if the extent in the path 1857 * get the next allocated block if the extent in the path
@@ -1845,7 +1861,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1845 b2 = ext4_ext_next_allocated_block(path); 1861 b2 = ext4_ext_next_allocated_block(path);
1846 if (b2 == EXT_MAX_BLOCKS) 1862 if (b2 == EXT_MAX_BLOCKS)
1847 goto out; 1863 goto out;
1848 b2 &= ~(sbi->s_cluster_ratio - 1); 1864 b2 = EXT4_LBLK_CMASK(sbi, b2);
1849 } 1865 }
1850 1866
1851 /* check for wrap through zero on extent logical start block*/ 1867 /* check for wrap through zero on extent logical start block*/
@@ -2504,7 +2520,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2504 * extent, we have to mark the cluster as used (store negative 2520 * extent, we have to mark the cluster as used (store negative
2505 * cluster number in partial_cluster). 2521 * cluster number in partial_cluster).
2506 */ 2522 */
2507 unaligned = pblk & (sbi->s_cluster_ratio - 1); 2523 unaligned = EXT4_PBLK_COFF(sbi, pblk);
2508 if (unaligned && (ee_len == num) && 2524 if (unaligned && (ee_len == num) &&
2509 (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) 2525 (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
2510 *partial_cluster = EXT4_B2C(sbi, pblk); 2526 *partial_cluster = EXT4_B2C(sbi, pblk);
@@ -2598,7 +2614,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2598 * accidentally freeing it later on 2614 * accidentally freeing it later on
2599 */ 2615 */
2600 pblk = ext4_ext_pblock(ex); 2616 pblk = ext4_ext_pblock(ex);
2601 if (pblk & (sbi->s_cluster_ratio - 1)) 2617 if (EXT4_PBLK_COFF(sbi, pblk))
2602 *partial_cluster = 2618 *partial_cluster =
2603 -((long long)EXT4_B2C(sbi, pblk)); 2619 -((long long)EXT4_B2C(sbi, pblk));
2604 ex--; 2620 ex--;
@@ -3753,7 +3769,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
3753{ 3769{
3754 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 3770 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3755 ext4_lblk_t lblk_start, lblk_end; 3771 ext4_lblk_t lblk_start, lblk_end;
3756 lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); 3772 lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
3757 lblk_end = lblk_start + sbi->s_cluster_ratio - 1; 3773 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
3758 3774
3759 return ext4_find_delalloc_range(inode, lblk_start, lblk_end); 3775 return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
@@ -3812,9 +3828,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3812 trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); 3828 trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
3813 3829
3814 /* Check towards left side */ 3830 /* Check towards left side */
3815 c_offset = lblk_start & (sbi->s_cluster_ratio - 1); 3831 c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
3816 if (c_offset) { 3832 if (c_offset) {
3817 lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); 3833 lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
3818 lblk_to = lblk_from + c_offset - 1; 3834 lblk_to = lblk_from + c_offset - 1;
3819 3835
3820 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) 3836 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
@@ -3822,7 +3838,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3822 } 3838 }
3823 3839
3824 /* Now check towards right. */ 3840 /* Now check towards right. */
3825 c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1); 3841 c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
3826 if (allocated_clusters && c_offset) { 3842 if (allocated_clusters && c_offset) {
3827 lblk_from = lblk_start + num_blks; 3843 lblk_from = lblk_start + num_blks;
3828 lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; 3844 lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
@@ -4030,7 +4046,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
4030 struct ext4_ext_path *path) 4046 struct ext4_ext_path *path)
4031{ 4047{
4032 struct ext4_sb_info *sbi = EXT4_SB(sb); 4048 struct ext4_sb_info *sbi = EXT4_SB(sb);
4033 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); 4049 ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4034 ext4_lblk_t ex_cluster_start, ex_cluster_end; 4050 ext4_lblk_t ex_cluster_start, ex_cluster_end;
4035 ext4_lblk_t rr_cluster_start; 4051 ext4_lblk_t rr_cluster_start;
4036 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 4052 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
@@ -4048,8 +4064,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
4048 (rr_cluster_start == ex_cluster_start)) { 4064 (rr_cluster_start == ex_cluster_start)) {
4049 if (rr_cluster_start == ex_cluster_end) 4065 if (rr_cluster_start == ex_cluster_end)
4050 ee_start += ee_len - 1; 4066 ee_start += ee_len - 1;
4051 map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) + 4067 map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
4052 c_offset;
4053 map->m_len = min(map->m_len, 4068 map->m_len = min(map->m_len,
4054 (unsigned) sbi->s_cluster_ratio - c_offset); 4069 (unsigned) sbi->s_cluster_ratio - c_offset);
4055 /* 4070 /*
@@ -4203,7 +4218,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4203 */ 4218 */
4204 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; 4219 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
4205 newex.ee_block = cpu_to_le32(map->m_lblk); 4220 newex.ee_block = cpu_to_le32(map->m_lblk);
4206 cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); 4221 cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4207 4222
4208 /* 4223 /*
4209 * If we are doing bigalloc, check to see if the extent returned 4224 * If we are doing bigalloc, check to see if the extent returned
@@ -4271,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4271 * needed so that future calls to get_implied_cluster_alloc() 4286 * needed so that future calls to get_implied_cluster_alloc()
4272 * work correctly. 4287 * work correctly.
4273 */ 4288 */
4274 offset = map->m_lblk & (sbi->s_cluster_ratio - 1); 4289 offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4275 ar.len = EXT4_NUM_B2C(sbi, offset+allocated); 4290 ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
4276 ar.goal -= offset; 4291 ar.goal -= offset;
4277 ar.logical -= offset; 4292 ar.logical -= offset;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 075763474118..61d49ff22c81 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1206,7 +1206,6 @@ static int ext4_journalled_write_end(struct file *file,
1206 */ 1206 */
1207static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) 1207static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
1208{ 1208{
1209 int retries = 0;
1210 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1209 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1211 struct ext4_inode_info *ei = EXT4_I(inode); 1210 struct ext4_inode_info *ei = EXT4_I(inode);
1212 unsigned int md_needed; 1211 unsigned int md_needed;
@@ -1218,7 +1217,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
1218 * in order to allocate nrblocks 1217 * in order to allocate nrblocks
1219 * worse case is one extent per block 1218 * worse case is one extent per block
1220 */ 1219 */
1221repeat:
1222 spin_lock(&ei->i_block_reservation_lock); 1220 spin_lock(&ei->i_block_reservation_lock);
1223 /* 1221 /*
1224 * ext4_calc_metadata_amount() has side effects, which we have 1222 * ext4_calc_metadata_amount() has side effects, which we have
@@ -1238,10 +1236,6 @@ repeat:
1238 ei->i_da_metadata_calc_len = save_len; 1236 ei->i_da_metadata_calc_len = save_len;
1239 ei->i_da_metadata_calc_last_lblock = save_last_lblock; 1237 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1240 spin_unlock(&ei->i_block_reservation_lock); 1238 spin_unlock(&ei->i_block_reservation_lock);
1241 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1242 cond_resched();
1243 goto repeat;
1244 }
1245 return -ENOSPC; 1239 return -ENOSPC;
1246 } 1240 }
1247 ei->i_reserved_meta_blocks += md_needed; 1241 ei->i_reserved_meta_blocks += md_needed;
@@ -1255,7 +1249,6 @@ repeat:
1255 */ 1249 */
1256static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1250static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1257{ 1251{
1258 int retries = 0;
1259 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1252 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1260 struct ext4_inode_info *ei = EXT4_I(inode); 1253 struct ext4_inode_info *ei = EXT4_I(inode);
1261 unsigned int md_needed; 1254 unsigned int md_needed;
@@ -1277,7 +1270,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1277 * in order to allocate nrblocks 1270 * in order to allocate nrblocks
1278 * worse case is one extent per block 1271 * worse case is one extent per block
1279 */ 1272 */
1280repeat:
1281 spin_lock(&ei->i_block_reservation_lock); 1273 spin_lock(&ei->i_block_reservation_lock);
1282 /* 1274 /*
1283 * ext4_calc_metadata_amount() has side effects, which we have 1275 * ext4_calc_metadata_amount() has side effects, which we have
@@ -1297,10 +1289,6 @@ repeat:
1297 ei->i_da_metadata_calc_len = save_len; 1289 ei->i_da_metadata_calc_len = save_len;
1298 ei->i_da_metadata_calc_last_lblock = save_last_lblock; 1290 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1299 spin_unlock(&ei->i_block_reservation_lock); 1291 spin_unlock(&ei->i_block_reservation_lock);
1300 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1301 cond_resched();
1302 goto repeat;
1303 }
1304 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); 1292 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1305 return -ENOSPC; 1293 return -ENOSPC;
1306 } 1294 }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4d113efa024c..04a5c7504be9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3442,6 +3442,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
3442{ 3442{
3443 struct ext4_prealloc_space *pa; 3443 struct ext4_prealloc_space *pa;
3444 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); 3444 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3445
3446 BUG_ON(atomic_read(&pa->pa_count));
3447 BUG_ON(pa->pa_deleted == 0);
3445 kmem_cache_free(ext4_pspace_cachep, pa); 3448 kmem_cache_free(ext4_pspace_cachep, pa);
3446} 3449}
3447 3450
@@ -3455,11 +3458,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3455 ext4_group_t grp; 3458 ext4_group_t grp;
3456 ext4_fsblk_t grp_blk; 3459 ext4_fsblk_t grp_blk;
3457 3460
3458 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3459 return;
3460
3461 /* in this short window concurrent discard can set pa_deleted */ 3461 /* in this short window concurrent discard can set pa_deleted */
3462 spin_lock(&pa->pa_lock); 3462 spin_lock(&pa->pa_lock);
3463 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
3464 spin_unlock(&pa->pa_lock);
3465 return;
3466 }
3467
3463 if (pa->pa_deleted == 1) { 3468 if (pa->pa_deleted == 1) {
3464 spin_unlock(&pa->pa_lock); 3469 spin_unlock(&pa->pa_lock);
3465 return; 3470 return;
@@ -4121,7 +4126,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4121 ext4_get_group_no_and_offset(sb, goal, &group, &block); 4126 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4122 4127
4123 /* set up allocation goals */ 4128 /* set up allocation goals */
4124 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); 4129 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
4125 ac->ac_status = AC_STATUS_CONTINUE; 4130 ac->ac_status = AC_STATUS_CONTINUE;
4126 ac->ac_sb = sb; 4131 ac->ac_sb = sb;
4127 ac->ac_inode = ar->inode; 4132 ac->ac_inode = ar->inode;
@@ -4663,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4663 * blocks at the beginning or the end unless we are explicitly 4668 * blocks at the beginning or the end unless we are explicitly
4664 * requested to avoid doing so. 4669 * requested to avoid doing so.
4665 */ 4670 */
4666 overflow = block & (sbi->s_cluster_ratio - 1); 4671 overflow = EXT4_PBLK_COFF(sbi, block);
4667 if (overflow) { 4672 if (overflow) {
4668 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { 4673 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4669 overflow = sbi->s_cluster_ratio - overflow; 4674 overflow = sbi->s_cluster_ratio - overflow;
@@ -4677,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4677 count += overflow; 4682 count += overflow;
4678 } 4683 }
4679 } 4684 }
4680 overflow = count & (sbi->s_cluster_ratio - 1); 4685 overflow = EXT4_LBLK_COFF(sbi, count);
4681 if (overflow) { 4686 if (overflow) {
4682 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { 4687 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4683 if (count > overflow) 4688 if (count > overflow)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c977f4e4e63b..1f7784de05b6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -792,7 +792,7 @@ static void ext4_put_super(struct super_block *sb)
792 } 792 }
793 793
794 ext4_es_unregister_shrinker(sbi); 794 ext4_es_unregister_shrinker(sbi);
795 del_timer(&sbi->s_err_report); 795 del_timer_sync(&sbi->s_err_report);
796 ext4_release_system_zone(sb); 796 ext4_release_system_zone(sb);
797 ext4_mb_release(sb); 797 ext4_mb_release(sb);
798 ext4_ext_release(sb); 798 ext4_ext_release(sb);
@@ -3316,11 +3316,19 @@ int ext4_calculate_overhead(struct super_block *sb)
3316} 3316}
3317 3317
3318 3318
3319static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) 3319static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
3320{ 3320{
3321 ext4_fsblk_t resv_clusters; 3321 ext4_fsblk_t resv_clusters;
3322 3322
3323 /* 3323 /*
3324 * There's no need to reserve anything when we aren't using extents.
3325 * The space estimates are exact, there are no unwritten extents,
3326 * hole punching doesn't need new metadata... This is needed especially
3327 * to keep ext2/3 backward compatibility.
3328 */
3329 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
3330 return 0;
3331 /*
3324 * By default we reserve 2% or 4096 clusters, whichever is smaller. 3332 * By default we reserve 2% or 4096 clusters, whichever is smaller.
3325 * This should cover the situations where we can not afford to run 3333 * This should cover the situations where we can not afford to run
3326 * out of space like for example punch hole, or converting 3334 * out of space like for example punch hole, or converting
@@ -3328,7 +3336,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
3328 * allocation would require 1, or 2 blocks, higher numbers are 3336 * allocation would require 1, or 2 blocks, higher numbers are
3329 * very rare. 3337 * very rare.
3330 */ 3338 */
3331 resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; 3339 resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
3340 EXT4_SB(sb)->s_cluster_bits;
3332 3341
3333 do_div(resv_clusters, 50); 3342 do_div(resv_clusters, 50);
3334 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); 3343 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
@@ -4071,10 +4080,10 @@ no_journal:
4071 "available"); 4080 "available");
4072 } 4081 }
4073 4082
4074 err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi)); 4083 err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
4075 if (err) { 4084 if (err) {
4076 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " 4085 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
4077 "reserved pool", ext4_calculate_resv_clusters(sbi)); 4086 "reserved pool", ext4_calculate_resv_clusters(sb));
4078 goto failed_mount4a; 4087 goto failed_mount4a;
4079 } 4088 }
4080 4089
@@ -4184,7 +4193,7 @@ failed_mount_wq:
4184 } 4193 }
4185failed_mount3: 4194failed_mount3:
4186 ext4_es_unregister_shrinker(sbi); 4195 ext4_es_unregister_shrinker(sbi);
4187 del_timer(&sbi->s_err_report); 4196 del_timer_sync(&sbi->s_err_report);
4188 if (sbi->s_flex_groups) 4197 if (sbi->s_flex_groups)
4189 ext4_kvfree(sbi->s_flex_groups); 4198 ext4_kvfree(sbi->s_flex_groups);
4190 percpu_counter_destroy(&sbi->s_freeclusters_counter); 4199 percpu_counter_destroy(&sbi->s_freeclusters_counter);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index b7fc035a6943..73f3e4ee4037 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -986,6 +986,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
986{ 986{
987 struct file *file = iocb->ki_filp; 987 struct file *file = iocb->ki_filp;
988 struct inode *inode = file->f_mapping->host; 988 struct inode *inode = file->f_mapping->host;
989 struct address_space *mapping = inode->i_mapping;
989 struct gfs2_inode *ip = GFS2_I(inode); 990 struct gfs2_inode *ip = GFS2_I(inode);
990 struct gfs2_holder gh; 991 struct gfs2_holder gh;
991 int rv; 992 int rv;
@@ -1006,6 +1007,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1006 if (rv != 1) 1007 if (rv != 1)
1007 goto out; /* dio not valid, fall back to buffered i/o */ 1008 goto out; /* dio not valid, fall back to buffered i/o */
1008 1009
1010 /*
1011 * Now since we are holding a deferred (CW) lock at this point, you
1012 * might be wondering why this is ever needed. There is a case however
1013 * where we've granted a deferred local lock against a cached exclusive
1014 * glock. That is ok provided all granted local locks are deferred, but
1015 * it also means that it is possible to encounter pages which are
1016 * cached and possibly also mapped. So here we check for that and sort
1017 * them out ahead of the dio. The glock state machine will take care of
1018 * everything else.
1019 *
1020 * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
1021 * the first place, mapping->nr_pages will always be zero.
1022 */
1023 if (mapping->nrpages) {
1024 loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
1025 loff_t len = iov_length(iov, nr_segs);
1026 loff_t end = PAGE_ALIGN(offset + len) - 1;
1027
1028 rv = 0;
1029 if (len == 0)
1030 goto out;
1031 if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
1032 unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
1033 rv = filemap_write_and_wait_range(mapping, lstart, end);
1034 if (rv)
1035 return rv;
1036 truncate_inode_pages_range(mapping, lstart, end);
1037 }
1038
1009 rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 1039 rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
1010 offset, nr_segs, gfs2_get_block_direct, 1040 offset, nr_segs, gfs2_get_block_direct,
1011 NULL, NULL, 0); 1041 NULL, NULL, 0);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c8420f7e4db6..6f7a47c05259 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1655,6 +1655,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1655 struct task_struct *gh_owner = NULL; 1655 struct task_struct *gh_owner = NULL;
1656 char flags_buf[32]; 1656 char flags_buf[32];
1657 1657
1658 rcu_read_lock();
1658 if (gh->gh_owner_pid) 1659 if (gh->gh_owner_pid)
1659 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); 1660 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
1660 gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", 1661 gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
@@ -1664,6 +1665,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1664 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, 1665 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
1665 gh_owner ? gh_owner->comm : "(ended)", 1666 gh_owner ? gh_owner->comm : "(ended)",
1666 (void *)gh->gh_ip); 1667 (void *)gh->gh_ip);
1668 rcu_read_unlock();
1667 return 0; 1669 return 0;
1668} 1670}
1669 1671
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index db908f697139..f88dcd925010 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -192,8 +192,11 @@ static void inode_go_sync(struct gfs2_glock *gl)
192 192
193 if (ip && !S_ISREG(ip->i_inode.i_mode)) 193 if (ip && !S_ISREG(ip->i_inode.i_mode))
194 ip = NULL; 194 ip = NULL;
195 if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) 195 if (ip) {
196 unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); 196 if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
197 unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
198 inode_dio_wait(&ip->i_inode);
199 }
197 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 200 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
198 return; 201 return;
199 202
@@ -410,6 +413,9 @@ static int inode_go_lock(struct gfs2_holder *gh)
410 return error; 413 return error;
411 } 414 }
412 415
416 if (gh->gh_state != LM_ST_DEFERRED)
417 inode_dio_wait(&ip->i_inode);
418
413 if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && 419 if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
414 (gl->gl_state == LM_ST_EXCLUSIVE) && 420 (gl->gl_state == LM_ST_EXCLUSIVE) &&
415 (gh->gh_state == LM_ST_EXCLUSIVE)) { 421 (gh->gh_state == LM_ST_EXCLUSIVE)) {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 610613fb65b5..9dcb9777a5f8 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -551,10 +551,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
551 struct buffer_head *bh = bd->bd_bh; 551 struct buffer_head *bh = bd->bd_bh;
552 struct gfs2_glock *gl = bd->bd_gl; 552 struct gfs2_glock *gl = bd->bd_gl;
553 553
554 gfs2_remove_from_ail(bd);
555 bd->bd_bh = NULL;
556 bh->b_private = NULL; 554 bh->b_private = NULL;
557 bd->bd_blkno = bh->b_blocknr; 555 bd->bd_blkno = bh->b_blocknr;
556 gfs2_remove_from_ail(bd); /* drops ref on bh */
557 bd->bd_bh = NULL;
558 bd->bd_ops = &gfs2_revoke_lops; 558 bd->bd_ops = &gfs2_revoke_lops;
559 sdp->sd_log_num_revoke++; 559 sdp->sd_log_num_revoke++;
560 atomic_inc(&gl->gl_revokes); 560 atomic_inc(&gl->gl_revokes);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 932415050540..52f177be3bf8 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -258,6 +258,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
258 struct address_space *mapping = bh->b_page->mapping; 258 struct address_space *mapping = bh->b_page->mapping;
259 struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); 259 struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
260 struct gfs2_bufdata *bd = bh->b_private; 260 struct gfs2_bufdata *bd = bh->b_private;
261 int was_pinned = 0;
261 262
262 if (test_clear_buffer_pinned(bh)) { 263 if (test_clear_buffer_pinned(bh)) {
263 trace_gfs2_pin(bd, 0); 264 trace_gfs2_pin(bd, 0);
@@ -273,12 +274,16 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
273 tr->tr_num_databuf_rm++; 274 tr->tr_num_databuf_rm++;
274 } 275 }
275 tr->tr_touched = 1; 276 tr->tr_touched = 1;
277 was_pinned = 1;
276 brelse(bh); 278 brelse(bh);
277 } 279 }
278 if (bd) { 280 if (bd) {
279 spin_lock(&sdp->sd_ail_lock); 281 spin_lock(&sdp->sd_ail_lock);
280 if (bd->bd_tr) { 282 if (bd->bd_tr) {
281 gfs2_trans_add_revoke(sdp, bd); 283 gfs2_trans_add_revoke(sdp, bd);
284 } else if (was_pinned) {
285 bh->b_private = NULL;
286 kmem_cache_free(gfs2_bufdata_cachep, bd);
282 } 287 }
283 spin_unlock(&sdp->sd_ail_lock); 288 spin_unlock(&sdp->sd_ail_lock);
284 } 289 }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 82303b474958..52fa88314f5c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1366,8 +1366,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1366 if (IS_ERR(s)) 1366 if (IS_ERR(s))
1367 goto error_bdev; 1367 goto error_bdev;
1368 1368
1369 if (s->s_root) 1369 if (s->s_root) {
1370 /*
1371 * s_umount nests inside bd_mutex during
1372 * __invalidate_device(). blkdev_put() acquires
1373 * bd_mutex and can't be called under s_umount. Drop
1374 * s_umount temporarily. This is safe as we're
1375 * holding an active reference.
1376 */
1377 up_write(&s->s_umount);
1370 blkdev_put(bdev, mode); 1378 blkdev_put(bdev, mode);
1379 down_write(&s->s_umount);
1380 }
1371 1381
1372 memset(&args, 0, sizeof(args)); 1382 memset(&args, 0, sizeof(args));
1373 args.ar_quota = GFS2_QUOTA_DEFAULT; 1383 args.ar_quota = GFS2_QUOTA_DEFAULT;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 52032647dd4a..5fa344afb49a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -702,7 +702,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
702 read_lock(&journal->j_state_lock); 702 read_lock(&journal->j_state_lock);
703#ifdef CONFIG_JBD2_DEBUG 703#ifdef CONFIG_JBD2_DEBUG
704 if (!tid_geq(journal->j_commit_request, tid)) { 704 if (!tid_geq(journal->j_commit_request, tid)) {
705 printk(KERN_EMERG 705 printk(KERN_ERR
706 "%s: error: j_commit_request=%d, tid=%d\n", 706 "%s: error: j_commit_request=%d, tid=%d\n",
707 __func__, journal->j_commit_request, tid); 707 __func__, journal->j_commit_request, tid);
708 } 708 }
@@ -718,10 +718,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
718 } 718 }
719 read_unlock(&journal->j_state_lock); 719 read_unlock(&journal->j_state_lock);
720 720
721 if (unlikely(is_journal_aborted(journal))) { 721 if (unlikely(is_journal_aborted(journal)))
722 printk(KERN_EMERG "journal commit I/O error\n");
723 err = -EIO; 722 err = -EIO;
724 }
725 return err; 723 return err;
726} 724}
727 725
@@ -1527,13 +1525,13 @@ static int journal_get_superblock(journal_t *journal)
1527 if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && 1525 if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) &&
1528 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1526 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
1529 /* Can't have checksum v1 and v2 on at the same time! */ 1527 /* Can't have checksum v1 and v2 on at the same time! */
1530 printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " 1528 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
1531 "at the same time!\n"); 1529 "at the same time!\n");
1532 goto out; 1530 goto out;
1533 } 1531 }
1534 1532
1535 if (!jbd2_verify_csum_type(journal, sb)) { 1533 if (!jbd2_verify_csum_type(journal, sb)) {
1536 printk(KERN_ERR "JBD: Unknown checksum type\n"); 1534 printk(KERN_ERR "JBD2: Unknown checksum type\n");
1537 goto out; 1535 goto out;
1538 } 1536 }
1539 1537
@@ -1541,7 +1539,7 @@ static int journal_get_superblock(journal_t *journal)
1541 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1539 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
1542 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 1540 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
1543 if (IS_ERR(journal->j_chksum_driver)) { 1541 if (IS_ERR(journal->j_chksum_driver)) {
1544 printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); 1542 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
1545 err = PTR_ERR(journal->j_chksum_driver); 1543 err = PTR_ERR(journal->j_chksum_driver);
1546 journal->j_chksum_driver = NULL; 1544 journal->j_chksum_driver = NULL;
1547 goto out; 1545 goto out;
@@ -1550,7 +1548,7 @@ static int journal_get_superblock(journal_t *journal)
1550 1548
1551 /* Check superblock checksum */ 1549 /* Check superblock checksum */
1552 if (!jbd2_superblock_csum_verify(journal, sb)) { 1550 if (!jbd2_superblock_csum_verify(journal, sb)) {
1553 printk(KERN_ERR "JBD: journal checksum error\n"); 1551 printk(KERN_ERR "JBD2: journal checksum error\n");
1554 goto out; 1552 goto out;
1555 } 1553 }
1556 1554
@@ -1836,7 +1834,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1836 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 1834 journal->j_chksum_driver = crypto_alloc_shash("crc32c",
1837 0, 0); 1835 0, 0);
1838 if (IS_ERR(journal->j_chksum_driver)) { 1836 if (IS_ERR(journal->j_chksum_driver)) {
1839 printk(KERN_ERR "JBD: Cannot load crc32c " 1837 printk(KERN_ERR "JBD2: Cannot load crc32c "
1840 "driver.\n"); 1838 "driver.\n");
1841 journal->j_chksum_driver = NULL; 1839 journal->j_chksum_driver = NULL;
1842 return 0; 1840 return 0;
@@ -2645,7 +2643,7 @@ static void __exit journal_exit(void)
2645#ifdef CONFIG_JBD2_DEBUG 2643#ifdef CONFIG_JBD2_DEBUG
2646 int n = atomic_read(&nr_journal_heads); 2644 int n = atomic_read(&nr_journal_heads);
2647 if (n) 2645 if (n)
2648 printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n); 2646 printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n);
2649#endif 2647#endif
2650 jbd2_remove_jbd_stats_proc_entry(); 2648 jbd2_remove_jbd_stats_proc_entry();
2651 jbd2_journal_destroy_caches(); 2649 jbd2_journal_destroy_caches();
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 3929c50428b1..3b6bb19d60b1 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -594,7 +594,7 @@ static int do_one_pass(journal_t *journal,
594 be32_to_cpu(tmp->h_sequence))) { 594 be32_to_cpu(tmp->h_sequence))) {
595 brelse(obh); 595 brelse(obh);
596 success = -EIO; 596 success = -EIO;
597 printk(KERN_ERR "JBD: Invalid " 597 printk(KERN_ERR "JBD2: Invalid "
598 "checksum recovering " 598 "checksum recovering "
599 "block %llu in log\n", 599 "block %llu in log\n",
600 blocknr); 600 blocknr);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 7aa9a32573bb..8360674c85bc 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -932,7 +932,7 @@ repeat:
932 jbd2_alloc(jh2bh(jh)->b_size, 932 jbd2_alloc(jh2bh(jh)->b_size,
933 GFP_NOFS); 933 GFP_NOFS);
934 if (!frozen_buffer) { 934 if (!frozen_buffer) {
935 printk(KERN_EMERG 935 printk(KERN_ERR
936 "%s: OOM for frozen_buffer\n", 936 "%s: OOM for frozen_buffer\n",
937 __func__); 937 __func__);
938 JBUFFER_TRACE(jh, "oom!"); 938 JBUFFER_TRACE(jh, "oom!");
@@ -1166,7 +1166,7 @@ repeat:
1166 if (!jh->b_committed_data) { 1166 if (!jh->b_committed_data) {
1167 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); 1167 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
1168 if (!committed_data) { 1168 if (!committed_data) {
1169 printk(KERN_EMERG "%s: No memory for committed data\n", 1169 printk(KERN_ERR "%s: No memory for committed data\n",
1170 __func__); 1170 __func__);
1171 err = -ENOMEM; 1171 err = -ENOMEM;
1172 goto out; 1172 goto out;
@@ -1290,7 +1290,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1290 * once a transaction -bzzz 1290 * once a transaction -bzzz
1291 */ 1291 */
1292 jh->b_modified = 1; 1292 jh->b_modified = 1;
1293 J_ASSERT_JH(jh, handle->h_buffer_credits > 0); 1293 if (handle->h_buffer_credits <= 0) {
1294 ret = -ENOSPC;
1295 goto out_unlock_bh;
1296 }
1294 handle->h_buffer_credits--; 1297 handle->h_buffer_credits--;
1295 } 1298 }
1296 1299
@@ -1305,7 +1308,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1305 JBUFFER_TRACE(jh, "fastpath"); 1308 JBUFFER_TRACE(jh, "fastpath");
1306 if (unlikely(jh->b_transaction != 1309 if (unlikely(jh->b_transaction !=
1307 journal->j_running_transaction)) { 1310 journal->j_running_transaction)) {
1308 printk(KERN_EMERG "JBD: %s: " 1311 printk(KERN_ERR "JBD2: %s: "
1309 "jh->b_transaction (%llu, %p, %u) != " 1312 "jh->b_transaction (%llu, %p, %u) != "
1310 "journal->j_running_transaction (%p, %u)", 1313 "journal->j_running_transaction (%p, %u)",
1311 journal->j_devname, 1314 journal->j_devname,
@@ -1332,7 +1335,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1332 JBUFFER_TRACE(jh, "already on other transaction"); 1335 JBUFFER_TRACE(jh, "already on other transaction");
1333 if (unlikely(jh->b_transaction != 1336 if (unlikely(jh->b_transaction !=
1334 journal->j_committing_transaction)) { 1337 journal->j_committing_transaction)) {
1335 printk(KERN_EMERG "JBD: %s: " 1338 printk(KERN_ERR "JBD2: %s: "
1336 "jh->b_transaction (%llu, %p, %u) != " 1339 "jh->b_transaction (%llu, %p, %u) != "
1337 "journal->j_committing_transaction (%p, %u)", 1340 "journal->j_committing_transaction (%p, %u)",
1338 journal->j_devname, 1341 journal->j_devname,
@@ -1345,7 +1348,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1345 ret = -EINVAL; 1348 ret = -EINVAL;
1346 } 1349 }
1347 if (unlikely(jh->b_next_transaction != transaction)) { 1350 if (unlikely(jh->b_next_transaction != transaction)) {
1348 printk(KERN_EMERG "JBD: %s: " 1351 printk(KERN_ERR "JBD2: %s: "
1349 "jh->b_next_transaction (%llu, %p, %u) != " 1352 "jh->b_next_transaction (%llu, %p, %u) != "
1350 "transaction (%p, %u)", 1353 "transaction (%p, %u)",
1351 journal->j_devname, 1354 journal->j_devname,
@@ -1373,7 +1376,6 @@ out_unlock_bh:
1373 jbd2_journal_put_journal_head(jh); 1376 jbd2_journal_put_journal_head(jh);
1374out: 1377out:
1375 JBUFFER_TRACE(jh, "exit"); 1378 JBUFFER_TRACE(jh, "exit");
1376 WARN_ON(ret); /* All errors are bugs, so dump the stack */
1377 return ret; 1379 return ret;
1378} 1380}
1379 1381
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b8e93a40a5d3..78c3c2097787 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -443,8 +443,11 @@ int pstore_register(struct pstore_info *psi)
443 pstore_get_records(0); 443 pstore_get_records(0);
444 444
445 kmsg_dump_register(&pstore_dumper); 445 kmsg_dump_register(&pstore_dumper);
446 pstore_register_console(); 446
447 pstore_register_ftrace(); 447 if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) {
448 pstore_register_console();
449 pstore_register_ftrace();
450 }
448 451
449 if (pstore_update_ms >= 0) { 452 if (pstore_update_ms >= 0) {
450 pstore_timer.expires = jiffies + 453 pstore_timer.expires = jiffies +
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b94f93685093..35e7d08fe629 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -609,7 +609,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
609 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; 609 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
610 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; 610 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
611 struct sysfs_open_file *of; 611 struct sysfs_open_file *of;
612 bool has_read, has_write, has_mmap; 612 bool has_read, has_write;
613 int error = -EACCES; 613 int error = -EACCES;
614 614
615 /* need attr_sd for attr and ops, its parent for kobj */ 615 /* need attr_sd for attr and ops, its parent for kobj */
@@ -621,7 +621,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
621 621
622 has_read = battr->read || battr->mmap; 622 has_read = battr->read || battr->mmap;
623 has_write = battr->write || battr->mmap; 623 has_write = battr->write || battr->mmap;
624 has_mmap = battr->mmap;
625 } else { 624 } else {
626 const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); 625 const struct sysfs_ops *ops = sysfs_file_ops(attr_sd);
627 626
@@ -633,7 +632,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
633 632
634 has_read = ops->show; 633 has_read = ops->show;
635 has_write = ops->store; 634 has_write = ops->store;
636 has_mmap = false;
637 } 635 }
638 636
639 /* check perms and supported operations */ 637 /* check perms and supported operations */
@@ -661,9 +659,9 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
661 * open file has a separate mutex, it's okay as long as those don't 659 * open file has a separate mutex, it's okay as long as those don't
662 * happen on the same file. At this point, we can't easily give 660 * happen on the same file. At this point, we can't easily give
663 * each file a separate locking class. Let's differentiate on 661 * each file a separate locking class. Let's differentiate on
664 * whether the file has mmap or not for now. 662 * whether the file is bin or not for now.
665 */ 663 */
666 if (has_mmap) 664 if (sysfs_is_bin(attr_sd))
667 mutex_init(&of->mutex); 665 mutex_init(&of->mutex);
668 else 666 else
669 mutex_init(&of->mutex); 667 mutex_init(&of->mutex);
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index 739e0a52deda..5549d69ddb45 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -110,7 +110,7 @@ xfs_attr3_rmt_verify(
110 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 110 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
111 return false; 111 return false;
112 if (be32_to_cpu(rmt->rm_offset) + 112 if (be32_to_cpu(rmt->rm_offset) +
113 be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX) 113 be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)
114 return false; 114 return false;
115 if (rmt->rm_owner == 0) 115 if (rmt->rm_owner == 0)
116 return false; 116 return false;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3ef11b22e750..3b2c14b6f0fb 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1635,7 +1635,7 @@ xfs_bmap_last_extent(
1635 * blocks at the end of the file which do not start at the previous data block, 1635 * blocks at the end of the file which do not start at the previous data block,
1636 * we will try to align the new blocks at stripe unit boundaries. 1636 * we will try to align the new blocks at stripe unit boundaries.
1637 * 1637 *
1638 * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be 1638 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1639 * at, or past the EOF. 1639 * at, or past the EOF.
1640 */ 1640 */
1641STATIC int 1641STATIC int
@@ -1650,9 +1650,14 @@ xfs_bmap_isaeof(
1650 bma->aeof = 0; 1650 bma->aeof = 0;
1651 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, 1651 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1652 &is_empty); 1652 &is_empty);
1653 if (error || is_empty) 1653 if (error)
1654 return error; 1654 return error;
1655 1655
1656 if (is_empty) {
1657 bma->aeof = 1;
1658 return 0;
1659 }
1660
1656 /* 1661 /*
1657 * Check if we are allocation or past the last extent, or at least into 1662 * Check if we are allocation or past the last extent, or at least into
1658 * the last delayed allocated extent. 1663 * the last delayed allocated extent.
@@ -3643,10 +3648,19 @@ xfs_bmap_btalloc(
3643 int isaligned; 3648 int isaligned;
3644 int tryagain; 3649 int tryagain;
3645 int error; 3650 int error;
3651 int stripe_align;
3646 3652
3647 ASSERT(ap->length); 3653 ASSERT(ap->length);
3648 3654
3649 mp = ap->ip->i_mount; 3655 mp = ap->ip->i_mount;
3656
3657 /* stripe alignment for allocation is determined by mount parameters */
3658 stripe_align = 0;
3659 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3660 stripe_align = mp->m_swidth;
3661 else if (mp->m_dalign)
3662 stripe_align = mp->m_dalign;
3663
3650 align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; 3664 align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
3651 if (unlikely(align)) { 3665 if (unlikely(align)) {
3652 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 3666 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
@@ -3655,6 +3669,8 @@ xfs_bmap_btalloc(
3655 ASSERT(!error); 3669 ASSERT(!error);
3656 ASSERT(ap->length); 3670 ASSERT(ap->length);
3657 } 3671 }
3672
3673
3658 nullfb = *ap->firstblock == NULLFSBLOCK; 3674 nullfb = *ap->firstblock == NULLFSBLOCK;
3659 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); 3675 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3660 if (nullfb) { 3676 if (nullfb) {
@@ -3730,7 +3746,7 @@ xfs_bmap_btalloc(
3730 */ 3746 */
3731 if (!ap->flist->xbf_low && ap->aeof) { 3747 if (!ap->flist->xbf_low && ap->aeof) {
3732 if (!ap->offset) { 3748 if (!ap->offset) {
3733 args.alignment = mp->m_dalign; 3749 args.alignment = stripe_align;
3734 atype = args.type; 3750 atype = args.type;
3735 isaligned = 1; 3751 isaligned = 1;
3736 /* 3752 /*
@@ -3755,13 +3771,13 @@ xfs_bmap_btalloc(
3755 * of minlen+alignment+slop doesn't go up 3771 * of minlen+alignment+slop doesn't go up
3756 * between the calls. 3772 * between the calls.
3757 */ 3773 */
3758 if (blen > mp->m_dalign && blen <= args.maxlen) 3774 if (blen > stripe_align && blen <= args.maxlen)
3759 nextminlen = blen - mp->m_dalign; 3775 nextminlen = blen - stripe_align;
3760 else 3776 else
3761 nextminlen = args.minlen; 3777 nextminlen = args.minlen;
3762 if (nextminlen + mp->m_dalign > args.minlen + 1) 3778 if (nextminlen + stripe_align > args.minlen + 1)
3763 args.minalignslop = 3779 args.minalignslop =
3764 nextminlen + mp->m_dalign - 3780 nextminlen + stripe_align -
3765 args.minlen - 1; 3781 args.minlen - 1;
3766 else 3782 else
3767 args.minalignslop = 0; 3783 args.minalignslop = 0;
@@ -3783,7 +3799,7 @@ xfs_bmap_btalloc(
3783 */ 3799 */
3784 args.type = atype; 3800 args.type = atype;
3785 args.fsbno = ap->blkno; 3801 args.fsbno = ap->blkno;
3786 args.alignment = mp->m_dalign; 3802 args.alignment = stripe_align;
3787 args.minlen = nextminlen; 3803 args.minlen = nextminlen;
3788 args.minalignslop = 0; 3804 args.minalignslop = 0;
3789 isaligned = 1; 3805 isaligned = 1;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 5887e41c0323..82e0dab46ee5 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -287,6 +287,7 @@ xfs_bmapi_allocate(
287 INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); 287 INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
288 queue_work(xfs_alloc_wq, &args->work); 288 queue_work(xfs_alloc_wq, &args->work);
289 wait_for_completion(&done); 289 wait_for_completion(&done);
290 destroy_work_on_stack(&args->work);
290 return args->result; 291 return args->result;
291} 292}
292 293
@@ -1187,7 +1188,12 @@ xfs_zero_remaining_bytes(
1187 XFS_BUF_UNWRITE(bp); 1188 XFS_BUF_UNWRITE(bp);
1188 XFS_BUF_READ(bp); 1189 XFS_BUF_READ(bp);
1189 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); 1190 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
1190 xfsbdstrat(mp, bp); 1191
1192 if (XFS_FORCED_SHUTDOWN(mp)) {
1193 error = XFS_ERROR(EIO);
1194 break;
1195 }
1196 xfs_buf_iorequest(bp);
1191 error = xfs_buf_iowait(bp); 1197 error = xfs_buf_iowait(bp);
1192 if (error) { 1198 if (error) {
1193 xfs_buf_ioerror_alert(bp, 1199 xfs_buf_ioerror_alert(bp,
@@ -1200,7 +1206,12 @@ xfs_zero_remaining_bytes(
1200 XFS_BUF_UNDONE(bp); 1206 XFS_BUF_UNDONE(bp);
1201 XFS_BUF_UNREAD(bp); 1207 XFS_BUF_UNREAD(bp);
1202 XFS_BUF_WRITE(bp); 1208 XFS_BUF_WRITE(bp);
1203 xfsbdstrat(mp, bp); 1209
1210 if (XFS_FORCED_SHUTDOWN(mp)) {
1211 error = XFS_ERROR(EIO);
1212 break;
1213 }
1214 xfs_buf_iorequest(bp);
1204 error = xfs_buf_iowait(bp); 1215 error = xfs_buf_iowait(bp);
1205 if (error) { 1216 if (error) {
1206 xfs_buf_ioerror_alert(bp, 1217 xfs_buf_ioerror_alert(bp,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c7f0b77dcb00..afe7645e4b2b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -698,7 +698,11 @@ xfs_buf_read_uncached(
698 bp->b_flags |= XBF_READ; 698 bp->b_flags |= XBF_READ;
699 bp->b_ops = ops; 699 bp->b_ops = ops;
700 700
701 xfsbdstrat(target->bt_mount, bp); 701 if (XFS_FORCED_SHUTDOWN(target->bt_mount)) {
702 xfs_buf_relse(bp);
703 return NULL;
704 }
705 xfs_buf_iorequest(bp);
702 xfs_buf_iowait(bp); 706 xfs_buf_iowait(bp);
703 return bp; 707 return bp;
704} 708}
@@ -1089,7 +1093,7 @@ xfs_bioerror(
1089 * This is meant for userdata errors; metadata bufs come with 1093 * This is meant for userdata errors; metadata bufs come with
1090 * iodone functions attached, so that we can track down errors. 1094 * iodone functions attached, so that we can track down errors.
1091 */ 1095 */
1092STATIC int 1096int
1093xfs_bioerror_relse( 1097xfs_bioerror_relse(
1094 struct xfs_buf *bp) 1098 struct xfs_buf *bp)
1095{ 1099{
@@ -1152,7 +1156,7 @@ xfs_bwrite(
1152 ASSERT(xfs_buf_islocked(bp)); 1156 ASSERT(xfs_buf_islocked(bp));
1153 1157
1154 bp->b_flags |= XBF_WRITE; 1158 bp->b_flags |= XBF_WRITE;
1155 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); 1159 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);
1156 1160
1157 xfs_bdstrat_cb(bp); 1161 xfs_bdstrat_cb(bp);
1158 1162
@@ -1164,25 +1168,6 @@ xfs_bwrite(
1164 return error; 1168 return error;
1165} 1169}
1166 1170
1167/*
1168 * Wrapper around bdstrat so that we can stop data from going to disk in case
1169 * we are shutting down the filesystem. Typically user data goes thru this
1170 * path; one of the exceptions is the superblock.
1171 */
1172void
1173xfsbdstrat(
1174 struct xfs_mount *mp,
1175 struct xfs_buf *bp)
1176{
1177 if (XFS_FORCED_SHUTDOWN(mp)) {
1178 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1179 xfs_bioerror_relse(bp);
1180 return;
1181 }
1182
1183 xfs_buf_iorequest(bp);
1184}
1185
1186STATIC void 1171STATIC void
1187_xfs_buf_ioend( 1172_xfs_buf_ioend(
1188 xfs_buf_t *bp, 1173 xfs_buf_t *bp,
@@ -1516,6 +1501,12 @@ xfs_wait_buftarg(
1516 struct xfs_buf *bp; 1501 struct xfs_buf *bp;
1517 bp = list_first_entry(&dispose, struct xfs_buf, b_lru); 1502 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1518 list_del_init(&bp->b_lru); 1503 list_del_init(&bp->b_lru);
1504 if (bp->b_flags & XBF_WRITE_FAIL) {
1505 xfs_alert(btp->bt_mount,
1506"Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n"
1507"Please run xfs_repair to determine the extent of the problem.",
1508 (long long)bp->b_bn);
1509 }
1519 xfs_buf_rele(bp); 1510 xfs_buf_rele(bp);
1520 } 1511 }
1521 if (loop++ != 0) 1512 if (loop++ != 0)
@@ -1799,7 +1790,7 @@ __xfs_buf_delwri_submit(
1799 1790
1800 blk_start_plug(&plug); 1791 blk_start_plug(&plug);
1801 list_for_each_entry_safe(bp, n, io_list, b_list) { 1792 list_for_each_entry_safe(bp, n, io_list, b_list) {
1802 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); 1793 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
1803 bp->b_flags |= XBF_WRITE; 1794 bp->b_flags |= XBF_WRITE;
1804 1795
1805 if (!wait) { 1796 if (!wait) {
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index e65683361017..1cf21a4a9f22 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -45,6 +45,7 @@ typedef enum {
45#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ 45#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
46#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ 46#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
47#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ 47#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
48#define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */
48 49
49/* I/O hints for the BIO layer */ 50/* I/O hints for the BIO layer */
50#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ 51#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
@@ -70,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t;
70 { XBF_ASYNC, "ASYNC" }, \ 71 { XBF_ASYNC, "ASYNC" }, \
71 { XBF_DONE, "DONE" }, \ 72 { XBF_DONE, "DONE" }, \
72 { XBF_STALE, "STALE" }, \ 73 { XBF_STALE, "STALE" }, \
74 { XBF_WRITE_FAIL, "WRITE_FAIL" }, \
73 { XBF_SYNCIO, "SYNCIO" }, \ 75 { XBF_SYNCIO, "SYNCIO" }, \
74 { XBF_FUA, "FUA" }, \ 76 { XBF_FUA, "FUA" }, \
75 { XBF_FLUSH, "FLUSH" }, \ 77 { XBF_FLUSH, "FLUSH" }, \
@@ -80,6 +82,7 @@ typedef unsigned int xfs_buf_flags_t;
80 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 82 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
81 { _XBF_COMPOUND, "COMPOUND" } 83 { _XBF_COMPOUND, "COMPOUND" }
82 84
85
83/* 86/*
84 * Internal state flags. 87 * Internal state flags.
85 */ 88 */
@@ -269,9 +272,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
269 272
270/* Buffer Read and Write Routines */ 273/* Buffer Read and Write Routines */
271extern int xfs_bwrite(struct xfs_buf *bp); 274extern int xfs_bwrite(struct xfs_buf *bp);
272
273extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
274
275extern void xfs_buf_ioend(xfs_buf_t *, int); 275extern void xfs_buf_ioend(xfs_buf_t *, int);
276extern void xfs_buf_ioerror(xfs_buf_t *, int); 276extern void xfs_buf_ioerror(xfs_buf_t *, int);
277extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); 277extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
@@ -282,6 +282,8 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
282#define xfs_buf_zero(bp, off, len) \ 282#define xfs_buf_zero(bp, off, len) \
283 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 283 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
284 284
285extern int xfs_bioerror_relse(struct xfs_buf *);
286
285static inline int xfs_buf_geterror(xfs_buf_t *bp) 287static inline int xfs_buf_geterror(xfs_buf_t *bp)
286{ 288{
287 return bp ? bp->b_error : ENOMEM; 289 return bp ? bp->b_error : ENOMEM;
@@ -301,7 +303,8 @@ extern void xfs_buf_terminate(void);
301 303
302#define XFS_BUF_ZEROFLAGS(bp) \ 304#define XFS_BUF_ZEROFLAGS(bp) \
303 ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ 305 ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
304 XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) 306 XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \
307 XBF_WRITE_FAIL))
305 308
306void xfs_buf_stale(struct xfs_buf *bp); 309void xfs_buf_stale(struct xfs_buf *bp);
307#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) 310#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a64f67ba25d3..2227b9b050bb 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -496,6 +496,14 @@ xfs_buf_item_unpin(
496 } 496 }
497} 497}
498 498
499/*
500 * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30
501 * seconds so as to not spam logs too much on repeated detection of the same
502 * buffer being bad..
503 */
504
505DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
506
499STATIC uint 507STATIC uint
500xfs_buf_item_push( 508xfs_buf_item_push(
501 struct xfs_log_item *lip, 509 struct xfs_log_item *lip,
@@ -524,6 +532,14 @@ xfs_buf_item_push(
524 532
525 trace_xfs_buf_item_push(bip); 533 trace_xfs_buf_item_push(bip);
526 534
535 /* has a previous flush failed due to IO errors? */
536 if ((bp->b_flags & XBF_WRITE_FAIL) &&
537 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) {
538 xfs_warn(bp->b_target->bt_mount,
539"Detected failing async write on buffer block 0x%llx. Retrying async write.\n",
540 (long long)bp->b_bn);
541 }
542
527 if (!xfs_buf_delwri_queue(bp, buffer_list)) 543 if (!xfs_buf_delwri_queue(bp, buffer_list))
528 rval = XFS_ITEM_FLUSHING; 544 rval = XFS_ITEM_FLUSHING;
529 xfs_buf_unlock(bp); 545 xfs_buf_unlock(bp);
@@ -1096,8 +1112,9 @@ xfs_buf_iodone_callbacks(
1096 1112
1097 xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ 1113 xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
1098 1114
1099 if (!XFS_BUF_ISSTALE(bp)) { 1115 if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
1100 bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; 1116 bp->b_flags |= XBF_WRITE | XBF_ASYNC |
1117 XBF_DONE | XBF_WRITE_FAIL;
1101 xfs_buf_iorequest(bp); 1118 xfs_buf_iorequest(bp);
1102 } else { 1119 } else {
1103 xfs_buf_relse(bp); 1120 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 56369d4509d5..48c7d18f68c3 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -2067,12 +2067,12 @@ xfs_dir2_node_lookup(
2067 */ 2067 */
2068int /* error */ 2068int /* error */
2069xfs_dir2_node_removename( 2069xfs_dir2_node_removename(
2070 xfs_da_args_t *args) /* operation arguments */ 2070 struct xfs_da_args *args) /* operation arguments */
2071{ 2071{
2072 xfs_da_state_blk_t *blk; /* leaf block */ 2072 struct xfs_da_state_blk *blk; /* leaf block */
2073 int error; /* error return value */ 2073 int error; /* error return value */
2074 int rval; /* operation return value */ 2074 int rval; /* operation return value */
2075 xfs_da_state_t *state; /* btree cursor */ 2075 struct xfs_da_state *state; /* btree cursor */
2076 2076
2077 trace_xfs_dir2_node_removename(args); 2077 trace_xfs_dir2_node_removename(args);
2078 2078
@@ -2084,19 +2084,18 @@ xfs_dir2_node_removename(
2084 state->mp = args->dp->i_mount; 2084 state->mp = args->dp->i_mount;
2085 state->blocksize = state->mp->m_dirblksize; 2085 state->blocksize = state->mp->m_dirblksize;
2086 state->node_ents = state->mp->m_dir_node_ents; 2086 state->node_ents = state->mp->m_dir_node_ents;
2087 /* 2087
2088 * Look up the entry we're deleting, set up the cursor. 2088 /* Look up the entry we're deleting, set up the cursor. */
2089 */
2090 error = xfs_da3_node_lookup_int(state, &rval); 2089 error = xfs_da3_node_lookup_int(state, &rval);
2091 if (error) 2090 if (error)
2092 rval = error; 2091 goto out_free;
2093 /* 2092
2094 * Didn't find it, upper layer screwed up. 2093 /* Didn't find it, upper layer screwed up. */
2095 */
2096 if (rval != EEXIST) { 2094 if (rval != EEXIST) {
2097 xfs_da_state_free(state); 2095 error = rval;
2098 return rval; 2096 goto out_free;
2099 } 2097 }
2098
2100 blk = &state->path.blk[state->path.active - 1]; 2099 blk = &state->path.blk[state->path.active - 1];
2101 ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); 2100 ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
2102 ASSERT(state->extravalid); 2101 ASSERT(state->extravalid);
@@ -2107,7 +2106,7 @@ xfs_dir2_node_removename(
2107 error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, 2106 error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
2108 &state->extrablk, &rval); 2107 &state->extrablk, &rval);
2109 if (error) 2108 if (error)
2110 return error; 2109 goto out_free;
2111 /* 2110 /*
2112 * Fix the hash values up the btree. 2111 * Fix the hash values up the btree.
2113 */ 2112 */
@@ -2122,6 +2121,7 @@ xfs_dir2_node_removename(
2122 */ 2121 */
2123 if (!error) 2122 if (!error)
2124 error = xfs_dir2_node_to_leaf(state); 2123 error = xfs_dir2_node_to_leaf(state);
2124out_free:
2125 xfs_da_state_free(state); 2125 xfs_da_state_free(state);
2126 return error; 2126 return error;
2127} 2127}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 27e0e544e963..104455b8046c 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -618,7 +618,8 @@ xfs_setattr_nonsize(
618 } 618 }
619 if (!gid_eq(igid, gid)) { 619 if (!gid_eq(igid, gid)) {
620 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { 620 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
621 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 621 ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) ||
622 !XFS_IS_PQUOTA_ON(mp));
622 ASSERT(mask & ATTR_GID); 623 ASSERT(mask & ATTR_GID);
623 ASSERT(gdqp); 624 ASSERT(gdqp);
624 olddquot2 = xfs_qm_vop_chown(tp, ip, 625 olddquot2 = xfs_qm_vop_chown(tp, ip,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b6b669df40f3..eae16920655b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -193,7 +193,10 @@ xlog_bread_noalign(
193 bp->b_io_length = nbblks; 193 bp->b_io_length = nbblks;
194 bp->b_error = 0; 194 bp->b_error = 0;
195 195
196 xfsbdstrat(log->l_mp, bp); 196 if (XFS_FORCED_SHUTDOWN(log->l_mp))
197 return XFS_ERROR(EIO);
198
199 xfs_buf_iorequest(bp);
197 error = xfs_buf_iowait(bp); 200 error = xfs_buf_iowait(bp);
198 if (error) 201 if (error)
199 xfs_buf_ioerror_alert(bp, __func__); 202 xfs_buf_ioerror_alert(bp, __func__);
@@ -4397,7 +4400,13 @@ xlog_do_recover(
4397 XFS_BUF_READ(bp); 4400 XFS_BUF_READ(bp);
4398 XFS_BUF_UNASYNC(bp); 4401 XFS_BUF_UNASYNC(bp);
4399 bp->b_ops = &xfs_sb_buf_ops; 4402 bp->b_ops = &xfs_sb_buf_ops;
4400 xfsbdstrat(log->l_mp, bp); 4403
4404 if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
4405 xfs_buf_relse(bp);
4406 return XFS_ERROR(EIO);
4407 }
4408
4409 xfs_buf_iorequest(bp);
4401 error = xfs_buf_iowait(bp); 4410 error = xfs_buf_iowait(bp);
4402 if (error) { 4411 if (error) {
4403 xfs_buf_ioerror_alert(bp, __func__); 4412 xfs_buf_ioerror_alert(bp, __func__);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 14a4996cfec6..dd88f0e27bd8 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -134,8 +134,6 @@ xfs_qm_dqpurge(
134{ 134{
135 struct xfs_mount *mp = dqp->q_mount; 135 struct xfs_mount *mp = dqp->q_mount;
136 struct xfs_quotainfo *qi = mp->m_quotainfo; 136 struct xfs_quotainfo *qi = mp->m_quotainfo;
137 struct xfs_dquot *gdqp = NULL;
138 struct xfs_dquot *pdqp = NULL;
139 137
140 xfs_dqlock(dqp); 138 xfs_dqlock(dqp);
141 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { 139 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
@@ -143,21 +141,6 @@ xfs_qm_dqpurge(
143 return EAGAIN; 141 return EAGAIN;
144 } 142 }
145 143
146 /*
147 * If this quota has a hint attached, prepare for releasing it now.
148 */
149 gdqp = dqp->q_gdquot;
150 if (gdqp) {
151 xfs_dqlock(gdqp);
152 dqp->q_gdquot = NULL;
153 }
154
155 pdqp = dqp->q_pdquot;
156 if (pdqp) {
157 xfs_dqlock(pdqp);
158 dqp->q_pdquot = NULL;
159 }
160
161 dqp->dq_flags |= XFS_DQ_FREEING; 144 dqp->dq_flags |= XFS_DQ_FREEING;
162 145
163 xfs_dqflock(dqp); 146 xfs_dqflock(dqp);
@@ -206,11 +189,47 @@ xfs_qm_dqpurge(
206 XFS_STATS_DEC(xs_qm_dquot_unused); 189 XFS_STATS_DEC(xs_qm_dquot_unused);
207 190
208 xfs_qm_dqdestroy(dqp); 191 xfs_qm_dqdestroy(dqp);
192 return 0;
193}
194
195/*
196 * Release the group or project dquot pointers the user dquots maybe carrying
197 * around as a hint, and proceed to purge the user dquot cache if requested.
198*/
199STATIC int
200xfs_qm_dqpurge_hints(
201 struct xfs_dquot *dqp,
202 void *data)
203{
204 struct xfs_dquot *gdqp = NULL;
205 struct xfs_dquot *pdqp = NULL;
206 uint flags = *((uint *)data);
207
208 xfs_dqlock(dqp);
209 if (dqp->dq_flags & XFS_DQ_FREEING) {
210 xfs_dqunlock(dqp);
211 return EAGAIN;
212 }
213
214 /* If this quota has a hint attached, prepare for releasing it now */
215 gdqp = dqp->q_gdquot;
216 if (gdqp)
217 dqp->q_gdquot = NULL;
218
219 pdqp = dqp->q_pdquot;
220 if (pdqp)
221 dqp->q_pdquot = NULL;
222
223 xfs_dqunlock(dqp);
209 224
210 if (gdqp) 225 if (gdqp)
211 xfs_qm_dqput(gdqp); 226 xfs_qm_dqrele(gdqp);
212 if (pdqp) 227 if (pdqp)
213 xfs_qm_dqput(pdqp); 228 xfs_qm_dqrele(pdqp);
229
230 if (flags & XFS_QMOPT_UQUOTA)
231 return xfs_qm_dqpurge(dqp, NULL);
232
214 return 0; 233 return 0;
215} 234}
216 235
@@ -222,8 +241,18 @@ xfs_qm_dqpurge_all(
222 struct xfs_mount *mp, 241 struct xfs_mount *mp,
223 uint flags) 242 uint flags)
224{ 243{
225 if (flags & XFS_QMOPT_UQUOTA) 244 /*
226 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); 245 * We have to release group/project dquot hint(s) from the user dquot
246 * at first if they are there, otherwise we would run into an infinite
247 * loop while walking through radix tree to purge other type of dquots
248 * since their refcount is not zero if the user dquot refers to them
249 * as hint.
250 *
251 * Call the special xfs_qm_dqpurge_hints() will end up go through the
252 * general xfs_qm_dqpurge() against user dquot cache if requested.
253 */
254 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags);
255
227 if (flags & XFS_QMOPT_GQUOTA) 256 if (flags & XFS_QMOPT_GQUOTA)
228 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); 257 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
229 if (flags & XFS_QMOPT_PQUOTA) 258 if (flags & XFS_QMOPT_PQUOTA)
@@ -2082,24 +2111,21 @@ xfs_qm_vop_create_dqattach(
2082 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 2111 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2083 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 2112 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2084 2113
2085 if (udqp) { 2114 if (udqp && XFS_IS_UQUOTA_ON(mp)) {
2086 ASSERT(ip->i_udquot == NULL); 2115 ASSERT(ip->i_udquot == NULL);
2087 ASSERT(XFS_IS_UQUOTA_ON(mp));
2088 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); 2116 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2089 2117
2090 ip->i_udquot = xfs_qm_dqhold(udqp); 2118 ip->i_udquot = xfs_qm_dqhold(udqp);
2091 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); 2119 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2092 } 2120 }
2093 if (gdqp) { 2121 if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
2094 ASSERT(ip->i_gdquot == NULL); 2122 ASSERT(ip->i_gdquot == NULL);
2095 ASSERT(XFS_IS_GQUOTA_ON(mp));
2096 ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); 2123 ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
2097 ip->i_gdquot = xfs_qm_dqhold(gdqp); 2124 ip->i_gdquot = xfs_qm_dqhold(gdqp);
2098 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); 2125 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2099 } 2126 }
2100 if (pdqp) { 2127 if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
2101 ASSERT(ip->i_pdquot == NULL); 2128 ASSERT(ip->i_pdquot == NULL);
2102 ASSERT(XFS_IS_PQUOTA_ON(mp));
2103 ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); 2129 ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id));
2104 2130
2105 ip->i_pdquot = xfs_qm_dqhold(pdqp); 2131 ip->i_pdquot = xfs_qm_dqhold(pdqp);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c035d11b7734..647b6f1d8923 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -314,7 +314,18 @@ xfs_trans_read_buf_map(
314 ASSERT(bp->b_iodone == NULL); 314 ASSERT(bp->b_iodone == NULL);
315 XFS_BUF_READ(bp); 315 XFS_BUF_READ(bp);
316 bp->b_ops = ops; 316 bp->b_ops = ops;
317 xfsbdstrat(tp->t_mountp, bp); 317
318 /*
319 * XXX(hch): clean up the error handling here to be less
320 * of a mess..
321 */
322 if (XFS_FORCED_SHUTDOWN(mp)) {
323 trace_xfs_bdstrat_shut(bp, _RET_IP_);
324 xfs_bioerror_relse(bp);
325 } else {
326 xfs_buf_iorequest(bp);
327 }
328
318 error = xfs_buf_iowait(bp); 329 error = xfs_buf_iowait(bp);
319 if (error) { 330 if (error) {
320 xfs_buf_ioerror_alert(bp, __func__); 331 xfs_buf_ioerror_alert(bp, __func__);