diff options
author | Yan, Zheng <zyan@redhat.com> | 2014-11-14 09:38:29 -0500 |
---|---|---|
committer | Ilya Dryomov <idryomov@redhat.com> | 2014-12-17 12:09:52 -0500 |
commit | 28127bdd2f843e996f24b51a70a0592c7ec5c763 (patch) | |
tree | f6053dbabcc606d7a5270f6069bc70e305cafe9d | |
parent | 83701246aee8f83b4b42483051b439fbe96ed47d (diff) |
ceph: convert inline data to normal data before data write
Before any data write, convert inline data to normal data and set
i_inline_version to CEPH_INLINE_NONE. The OSD request that saves
inline data to object contains 3 operations (CMPXATTR, WRITE and
SETXATTR). It compares a xattr named 'inline_version' to prevent
old data overwrites newer data.
Signed-off-by: Yan, Zheng <zyan@redhat.com>
-rw-r--r-- | fs/ceph/addr.c | 148 | ||||
-rw-r--r-- | fs/ceph/file.c | 14 | ||||
-rw-r--r-- | fs/ceph/super.h | 2 |
3 files changed, 161 insertions, 3 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 13413d7440d6..70a3b441261b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1313,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1313 | size_t len; | 1313 | size_t len; |
1314 | int want, got, ret; | 1314 | int want, got, ret; |
1315 | 1315 | ||
1316 | if (ci->i_inline_version != CEPH_INLINE_NONE) { | ||
1317 | struct page *locked_page = NULL; | ||
1318 | if (off == 0) { | ||
1319 | lock_page(page); | ||
1320 | locked_page = page; | ||
1321 | } | ||
1322 | ret = ceph_uninline_data(vma->vm_file, locked_page); | ||
1323 | if (locked_page) | ||
1324 | unlock_page(locked_page); | ||
1325 | if (ret < 0) | ||
1326 | return VM_FAULT_SIGBUS; | ||
1327 | } | ||
1328 | |||
1316 | if (off + PAGE_CACHE_SIZE <= size) | 1329 | if (off + PAGE_CACHE_SIZE <= size) |
1317 | len = PAGE_CACHE_SIZE; | 1330 | len = PAGE_CACHE_SIZE; |
1318 | else | 1331 | else |
@@ -1361,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1361 | ret = VM_FAULT_SIGBUS; | 1374 | ret = VM_FAULT_SIGBUS; |
1362 | } | 1375 | } |
1363 | out: | 1376 | out: |
1364 | if (ret != VM_FAULT_LOCKED) { | 1377 | if (ret != VM_FAULT_LOCKED) |
1365 | unlock_page(page); | 1378 | unlock_page(page); |
1366 | } else { | 1379 | if (ret == VM_FAULT_LOCKED || |
1380 | ci->i_inline_version != CEPH_INLINE_NONE) { | ||
1367 | int dirty; | 1381 | int dirty; |
1368 | spin_lock(&ci->i_ceph_lock); | 1382 | spin_lock(&ci->i_ceph_lock); |
1383 | ci->i_inline_version = CEPH_INLINE_NONE; | ||
1369 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | 1384 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); |
1370 | spin_unlock(&ci->i_ceph_lock); | 1385 | spin_unlock(&ci->i_ceph_lock); |
1371 | if (dirty) | 1386 | if (dirty) |
@@ -1422,6 +1437,135 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, | |||
1422 | } | 1437 | } |
1423 | } | 1438 | } |
1424 | 1439 | ||
1440 | int ceph_uninline_data(struct file *filp, struct page *locked_page) | ||
1441 | { | ||
1442 | struct inode *inode = file_inode(filp); | ||
1443 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
1444 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | ||
1445 | struct ceph_osd_request *req; | ||
1446 | struct page *page = NULL; | ||
1447 | u64 len, inline_version; | ||
1448 | int err = 0; | ||
1449 | bool from_pagecache = false; | ||
1450 | |||
1451 | spin_lock(&ci->i_ceph_lock); | ||
1452 | inline_version = ci->i_inline_version; | ||
1453 | spin_unlock(&ci->i_ceph_lock); | ||
1454 | |||
1455 | dout("uninline_data %p %llx.%llx inline_version %llu\n", | ||
1456 | inode, ceph_vinop(inode), inline_version); | ||
1457 | |||
1458 | if (inline_version == 1 || /* initial version, no data */ | ||
1459 | inline_version == CEPH_INLINE_NONE) | ||
1460 | goto out; | ||
1461 | |||
1462 | if (locked_page) { | ||
1463 | page = locked_page; | ||
1464 | WARN_ON(!PageUptodate(page)); | ||
1465 | } else if (ceph_caps_issued(ci) & | ||
1466 | (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) { | ||
1467 | page = find_get_page(inode->i_mapping, 0); | ||
1468 | if (page) { | ||
1469 | if (PageUptodate(page)) { | ||
1470 | from_pagecache = true; | ||
1471 | lock_page(page); | ||
1472 | } else { | ||
1473 | page_cache_release(page); | ||
1474 | page = NULL; | ||
1475 | } | ||
1476 | } | ||
1477 | } | ||
1478 | |||
1479 | if (page) { | ||
1480 | len = i_size_read(inode); | ||
1481 | if (len > PAGE_CACHE_SIZE) | ||
1482 | len = PAGE_CACHE_SIZE; | ||
1483 | } else { | ||
1484 | page = __page_cache_alloc(GFP_NOFS); | ||
1485 | if (!page) { | ||
1486 | err = -ENOMEM; | ||
1487 | goto out; | ||
1488 | } | ||
1489 | err = __ceph_do_getattr(inode, page, | ||
1490 | CEPH_STAT_CAP_INLINE_DATA, true); | ||
1491 | if (err < 0) { | ||
1492 | /* no inline data */ | ||
1493 | if (err == -ENODATA) | ||
1494 | err = 0; | ||
1495 | goto out; | ||
1496 | } | ||
1497 | len = err; | ||
1498 | } | ||
1499 | |||
1500 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | ||
1501 | ceph_vino(inode), 0, &len, 0, 1, | ||
1502 | CEPH_OSD_OP_CREATE, | ||
1503 | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, | ||
1504 | ci->i_snap_realm->cached_context, | ||
1505 | 0, 0, false); | ||
1506 | if (IS_ERR(req)) { | ||
1507 | err = PTR_ERR(req); | ||
1508 | goto out; | ||
1509 | } | ||
1510 | |||
1511 | ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); | ||
1512 | err = ceph_osdc_start_request(&fsc->client->osdc, req, false); | ||
1513 | if (!err) | ||
1514 | err = ceph_osdc_wait_request(&fsc->client->osdc, req); | ||
1515 | ceph_osdc_put_request(req); | ||
1516 | if (err < 0) | ||
1517 | goto out; | ||
1518 | |||
1519 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | ||
1520 | ceph_vino(inode), 0, &len, 1, 3, | ||
1521 | CEPH_OSD_OP_WRITE, | ||
1522 | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, | ||
1523 | ci->i_snap_realm->cached_context, | ||
1524 | ci->i_truncate_seq, ci->i_truncate_size, | ||
1525 | false); | ||
1526 | if (IS_ERR(req)) { | ||
1527 | err = PTR_ERR(req); | ||
1528 | goto out; | ||
1529 | } | ||
1530 | |||
1531 | osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); | ||
1532 | |||
1533 | err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, | ||
1534 | "inline_version", &inline_version, | ||
1535 | sizeof(inline_version), | ||
1536 | CEPH_OSD_CMPXATTR_OP_GT, | ||
1537 | CEPH_OSD_CMPXATTR_MODE_U64); | ||
1538 | if (err) | ||
1539 | goto out_put; | ||
1540 | |||
1541 | err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, | ||
1542 | "inline_version", &inline_version, | ||
1543 | sizeof(inline_version), 0, 0); | ||
1544 | if (err) | ||
1545 | goto out_put; | ||
1546 | |||
1547 | ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); | ||
1548 | err = ceph_osdc_start_request(&fsc->client->osdc, req, false); | ||
1549 | if (!err) | ||
1550 | err = ceph_osdc_wait_request(&fsc->client->osdc, req); | ||
1551 | out_put: | ||
1552 | ceph_osdc_put_request(req); | ||
1553 | if (err == -ECANCELED) | ||
1554 | err = 0; | ||
1555 | out: | ||
1556 | if (page && page != locked_page) { | ||
1557 | if (from_pagecache) { | ||
1558 | unlock_page(page); | ||
1559 | page_cache_release(page); | ||
1560 | } else | ||
1561 | __free_pages(page, 0); | ||
1562 | } | ||
1563 | |||
1564 | dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", | ||
1565 | inode, ceph_vinop(inode), inline_version, err); | ||
1566 | return err; | ||
1567 | } | ||
1568 | |||
1425 | static struct vm_operations_struct ceph_vmops = { | 1569 | static struct vm_operations_struct ceph_vmops = { |
1426 | .fault = ceph_filemap_fault, | 1570 | .fault = ceph_filemap_fault, |
1427 | .page_mkwrite = ceph_page_mkwrite, | 1571 | .page_mkwrite = ceph_page_mkwrite, |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5b092bda9284..9b5901fefbf8 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -963,6 +963,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
963 | if (err) | 963 | if (err) |
964 | goto out; | 964 | goto out; |
965 | 965 | ||
966 | if (ci->i_inline_version != CEPH_INLINE_NONE) { | ||
967 | err = ceph_uninline_data(file, NULL); | ||
968 | if (err < 0) | ||
969 | goto out; | ||
970 | } | ||
971 | |||
966 | retry_snap: | 972 | retry_snap: |
967 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { | 973 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { |
968 | err = -ENOSPC; | 974 | err = -ENOSPC; |
@@ -1024,6 +1030,7 @@ retry_snap: | |||
1024 | if (written >= 0) { | 1030 | if (written >= 0) { |
1025 | int dirty; | 1031 | int dirty; |
1026 | spin_lock(&ci->i_ceph_lock); | 1032 | spin_lock(&ci->i_ceph_lock); |
1033 | ci->i_inline_version = CEPH_INLINE_NONE; | ||
1027 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | 1034 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); |
1028 | spin_unlock(&ci->i_ceph_lock); | 1035 | spin_unlock(&ci->i_ceph_lock); |
1029 | if (dirty) | 1036 | if (dirty) |
@@ -1269,6 +1276,12 @@ static long ceph_fallocate(struct file *file, int mode, | |||
1269 | goto unlock; | 1276 | goto unlock; |
1270 | } | 1277 | } |
1271 | 1278 | ||
1279 | if (ci->i_inline_version != CEPH_INLINE_NONE) { | ||
1280 | ret = ceph_uninline_data(file, NULL); | ||
1281 | if (ret < 0) | ||
1282 | goto unlock; | ||
1283 | } | ||
1284 | |||
1272 | size = i_size_read(inode); | 1285 | size = i_size_read(inode); |
1273 | if (!(mode & FALLOC_FL_KEEP_SIZE)) | 1286 | if (!(mode & FALLOC_FL_KEEP_SIZE)) |
1274 | endoff = offset + length; | 1287 | endoff = offset + length; |
@@ -1295,6 +1308,7 @@ static long ceph_fallocate(struct file *file, int mode, | |||
1295 | 1308 | ||
1296 | if (!ret) { | 1309 | if (!ret) { |
1297 | spin_lock(&ci->i_ceph_lock); | 1310 | spin_lock(&ci->i_ceph_lock); |
1311 | ci->i_inline_version = CEPH_INLINE_NONE; | ||
1298 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | 1312 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); |
1299 | spin_unlock(&ci->i_ceph_lock); | 1313 | spin_unlock(&ci->i_ceph_lock); |
1300 | if (dirty) | 1314 | if (dirty) |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6d56fae863ca..8197a3cf750b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -888,7 +888,7 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, | |||
888 | extern int ceph_release(struct inode *inode, struct file *filp); | 888 | extern int ceph_release(struct inode *inode, struct file *filp); |
889 | extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, | 889 | extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, |
890 | char *data, size_t len); | 890 | char *data, size_t len); |
891 | 891 | int ceph_uninline_data(struct file *filp, struct page *locked_page); | |
892 | /* dir.c */ | 892 | /* dir.c */ |
893 | extern const struct file_operations ceph_dir_fops; | 893 | extern const struct file_operations ceph_dir_fops; |
894 | extern const struct inode_operations ceph_dir_iops; | 894 | extern const struct inode_operations ceph_dir_iops; |