diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-15 00:46:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-15 00:46:01 -0400 |
commit | 6b0490816671b2f4126a99998c9bf3c8c0472de2 (patch) | |
tree | 016543455c2bdbe47b422fed6a3b4ffb991c97d6 /drivers/block | |
parent | ce9d7f7b45930ed16c512aabcfe651d44f1c8619 (diff) | |
parent | 0bc62284ee3f2a228c64902ed818b6ba8e04159b (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil:
"There is the long-awaited discard support for RBD (Guangliang Zhao,
Josh Durgin), a pile of RBD bug fixes that didn't belong in late -rc's
(Ilya Dryomov, Li RongQing), a pile of fs/ceph bug fixes and
performance and debugging improvements (Yan, Zheng, John Spray), and a
smattering of cleanups (Chao Yu, Fabian Frederick, Joe Perches)"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (40 commits)
ceph: fix divide-by-zero in __validate_layout()
rbd: rbd workqueues need a resque worker
libceph: ceph-msgr workqueue needs a resque worker
ceph: fix bool assignments
libceph: separate multiple ops with commas in debugfs output
libceph: sync osd op definitions in rados.h
libceph: remove redundant declaration
ceph: additional debugfs output
ceph: export ceph_session_state_name function
ceph: include the initial ACL in create/mkdir/mknod MDS requests
ceph: use pagelist to present MDS request data
libceph: reference counting pagelist
ceph: fix llistxattr on symlink
ceph: send client metadata to MDS
ceph: remove redundant code for max file size verification
ceph: remove redundant io_iter_advance()
ceph: move ceph_find_inode() outside the s_mutex
ceph: request xattrs if xattr_version is zero
rbd: set the remaining discard properties to enable support
rbd: use helpers to handle discard for layered images correctly
...
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/rbd.c | 396 |
1 files changed, 276 insertions, 120 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4b97baf8afa3..0a54c588e433 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -210,6 +210,12 @@ enum obj_request_type { | |||
210 | OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES | 210 | OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES |
211 | }; | 211 | }; |
212 | 212 | ||
213 | enum obj_operation_type { | ||
214 | OBJ_OP_WRITE, | ||
215 | OBJ_OP_READ, | ||
216 | OBJ_OP_DISCARD, | ||
217 | }; | ||
218 | |||
213 | enum obj_req_flags { | 219 | enum obj_req_flags { |
214 | OBJ_REQ_DONE, /* completion flag: not done = 0, done = 1 */ | 220 | OBJ_REQ_DONE, /* completion flag: not done = 0, done = 1 */ |
215 | OBJ_REQ_IMG_DATA, /* object usage: standalone = 0, image = 1 */ | 221 | OBJ_REQ_IMG_DATA, /* object usage: standalone = 0, image = 1 */ |
@@ -276,6 +282,7 @@ enum img_req_flags { | |||
276 | IMG_REQ_WRITE, /* I/O direction: read = 0, write = 1 */ | 282 | IMG_REQ_WRITE, /* I/O direction: read = 0, write = 1 */ |
277 | IMG_REQ_CHILD, /* initiator: block = 0, child image = 1 */ | 283 | IMG_REQ_CHILD, /* initiator: block = 0, child image = 1 */ |
278 | IMG_REQ_LAYERED, /* ENOENT handling: normal = 0, layered = 1 */ | 284 | IMG_REQ_LAYERED, /* ENOENT handling: normal = 0, layered = 1 */ |
285 | IMG_REQ_DISCARD, /* discard: normal = 0, discard request = 1 */ | ||
279 | }; | 286 | }; |
280 | 287 | ||
281 | struct rbd_img_request { | 288 | struct rbd_img_request { |
@@ -785,6 +792,20 @@ static int parse_rbd_opts_token(char *c, void *private) | |||
785 | return 0; | 792 | return 0; |
786 | } | 793 | } |
787 | 794 | ||
795 | static char* obj_op_name(enum obj_operation_type op_type) | ||
796 | { | ||
797 | switch (op_type) { | ||
798 | case OBJ_OP_READ: | ||
799 | return "read"; | ||
800 | case OBJ_OP_WRITE: | ||
801 | return "write"; | ||
802 | case OBJ_OP_DISCARD: | ||
803 | return "discard"; | ||
804 | default: | ||
805 | return "???"; | ||
806 | } | ||
807 | } | ||
808 | |||
788 | /* | 809 | /* |
789 | * Get a ceph client with specific addr and configuration, if one does | 810 | * Get a ceph client with specific addr and configuration, if one does |
790 | * not exist create it. Either way, ceph_opts is consumed by this | 811 | * not exist create it. Either way, ceph_opts is consumed by this |
@@ -1600,6 +1621,21 @@ static bool img_request_write_test(struct rbd_img_request *img_request) | |||
1600 | return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0; | 1621 | return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0; |
1601 | } | 1622 | } |
1602 | 1623 | ||
1624 | /* | ||
1625 | * Set the discard flag when the img_request is an discard request | ||
1626 | */ | ||
1627 | static void img_request_discard_set(struct rbd_img_request *img_request) | ||
1628 | { | ||
1629 | set_bit(IMG_REQ_DISCARD, &img_request->flags); | ||
1630 | smp_mb(); | ||
1631 | } | ||
1632 | |||
1633 | static bool img_request_discard_test(struct rbd_img_request *img_request) | ||
1634 | { | ||
1635 | smp_mb(); | ||
1636 | return test_bit(IMG_REQ_DISCARD, &img_request->flags) != 0; | ||
1637 | } | ||
1638 | |||
1603 | static void img_request_child_set(struct rbd_img_request *img_request) | 1639 | static void img_request_child_set(struct rbd_img_request *img_request) |
1604 | { | 1640 | { |
1605 | set_bit(IMG_REQ_CHILD, &img_request->flags); | 1641 | set_bit(IMG_REQ_CHILD, &img_request->flags); |
@@ -1636,6 +1672,17 @@ static bool img_request_layered_test(struct rbd_img_request *img_request) | |||
1636 | return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0; | 1672 | return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0; |
1637 | } | 1673 | } |
1638 | 1674 | ||
1675 | static enum obj_operation_type | ||
1676 | rbd_img_request_op_type(struct rbd_img_request *img_request) | ||
1677 | { | ||
1678 | if (img_request_write_test(img_request)) | ||
1679 | return OBJ_OP_WRITE; | ||
1680 | else if (img_request_discard_test(img_request)) | ||
1681 | return OBJ_OP_DISCARD; | ||
1682 | else | ||
1683 | return OBJ_OP_READ; | ||
1684 | } | ||
1685 | |||
1639 | static void | 1686 | static void |
1640 | rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) | 1687 | rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) |
1641 | { | 1688 | { |
@@ -1722,6 +1769,21 @@ static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) | |||
1722 | obj_request_done_set(obj_request); | 1769 | obj_request_done_set(obj_request); |
1723 | } | 1770 | } |
1724 | 1771 | ||
1772 | static void rbd_osd_discard_callback(struct rbd_obj_request *obj_request) | ||
1773 | { | ||
1774 | dout("%s: obj %p result %d %llu\n", __func__, obj_request, | ||
1775 | obj_request->result, obj_request->length); | ||
1776 | /* | ||
1777 | * There is no such thing as a successful short discard. Set | ||
1778 | * it to our originally-requested length. | ||
1779 | */ | ||
1780 | obj_request->xferred = obj_request->length; | ||
1781 | /* discarding a non-existent object is not a problem */ | ||
1782 | if (obj_request->result == -ENOENT) | ||
1783 | obj_request->result = 0; | ||
1784 | obj_request_done_set(obj_request); | ||
1785 | } | ||
1786 | |||
1725 | /* | 1787 | /* |
1726 | * For a simple stat call there's nothing to do. We'll do more if | 1788 | * For a simple stat call there's nothing to do. We'll do more if |
1727 | * this is part of a write sequence for a layered image. | 1789 | * this is part of a write sequence for a layered image. |
@@ -1773,6 +1835,11 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, | |||
1773 | case CEPH_OSD_OP_STAT: | 1835 | case CEPH_OSD_OP_STAT: |
1774 | rbd_osd_stat_callback(obj_request); | 1836 | rbd_osd_stat_callback(obj_request); |
1775 | break; | 1837 | break; |
1838 | case CEPH_OSD_OP_DELETE: | ||
1839 | case CEPH_OSD_OP_TRUNCATE: | ||
1840 | case CEPH_OSD_OP_ZERO: | ||
1841 | rbd_osd_discard_callback(obj_request); | ||
1842 | break; | ||
1776 | case CEPH_OSD_OP_CALL: | 1843 | case CEPH_OSD_OP_CALL: |
1777 | case CEPH_OSD_OP_NOTIFY_ACK: | 1844 | case CEPH_OSD_OP_NOTIFY_ACK: |
1778 | case CEPH_OSD_OP_WATCH: | 1845 | case CEPH_OSD_OP_WATCH: |
@@ -1823,7 +1890,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) | |||
1823 | */ | 1890 | */ |
1824 | static struct ceph_osd_request *rbd_osd_req_create( | 1891 | static struct ceph_osd_request *rbd_osd_req_create( |
1825 | struct rbd_device *rbd_dev, | 1892 | struct rbd_device *rbd_dev, |
1826 | bool write_request, | 1893 | enum obj_operation_type op_type, |
1827 | unsigned int num_ops, | 1894 | unsigned int num_ops, |
1828 | struct rbd_obj_request *obj_request) | 1895 | struct rbd_obj_request *obj_request) |
1829 | { | 1896 | { |
@@ -1831,16 +1898,18 @@ static struct ceph_osd_request *rbd_osd_req_create( | |||
1831 | struct ceph_osd_client *osdc; | 1898 | struct ceph_osd_client *osdc; |
1832 | struct ceph_osd_request *osd_req; | 1899 | struct ceph_osd_request *osd_req; |
1833 | 1900 | ||
1834 | if (obj_request_img_data_test(obj_request)) { | 1901 | if (obj_request_img_data_test(obj_request) && |
1902 | (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE)) { | ||
1835 | struct rbd_img_request *img_request = obj_request->img_request; | 1903 | struct rbd_img_request *img_request = obj_request->img_request; |
1836 | 1904 | if (op_type == OBJ_OP_WRITE) { | |
1837 | rbd_assert(write_request == | 1905 | rbd_assert(img_request_write_test(img_request)); |
1838 | img_request_write_test(img_request)); | 1906 | } else { |
1839 | if (write_request) | 1907 | rbd_assert(img_request_discard_test(img_request)); |
1840 | snapc = img_request->snapc; | 1908 | } |
1909 | snapc = img_request->snapc; | ||
1841 | } | 1910 | } |
1842 | 1911 | ||
1843 | rbd_assert(num_ops == 1 || (write_request && num_ops == 2)); | 1912 | rbd_assert(num_ops == 1 || ((op_type == OBJ_OP_WRITE) && num_ops == 2)); |
1844 | 1913 | ||
1845 | /* Allocate and initialize the request, for the num_ops ops */ | 1914 | /* Allocate and initialize the request, for the num_ops ops */ |
1846 | 1915 | ||
@@ -1850,7 +1919,7 @@ static struct ceph_osd_request *rbd_osd_req_create( | |||
1850 | if (!osd_req) | 1919 | if (!osd_req) |
1851 | return NULL; /* ENOMEM */ | 1920 | return NULL; /* ENOMEM */ |
1852 | 1921 | ||
1853 | if (write_request) | 1922 | if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) |
1854 | osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; | 1923 | osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; |
1855 | else | 1924 | else |
1856 | osd_req->r_flags = CEPH_OSD_FLAG_READ; | 1925 | osd_req->r_flags = CEPH_OSD_FLAG_READ; |
@@ -1865,9 +1934,10 @@ static struct ceph_osd_request *rbd_osd_req_create( | |||
1865 | } | 1934 | } |
1866 | 1935 | ||
1867 | /* | 1936 | /* |
1868 | * Create a copyup osd request based on the information in the | 1937 | * Create a copyup osd request based on the information in the object |
1869 | * object request supplied. A copyup request has three osd ops, | 1938 | * request supplied. A copyup request has two or three osd ops, a |
1870 | * a copyup method call, a hint op, and a write op. | 1939 | * copyup method call, potentially a hint op, and a write or truncate |
1940 | * or zero op. | ||
1871 | */ | 1941 | */ |
1872 | static struct ceph_osd_request * | 1942 | static struct ceph_osd_request * |
1873 | rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) | 1943 | rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) |
@@ -1877,18 +1947,24 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) | |||
1877 | struct rbd_device *rbd_dev; | 1947 | struct rbd_device *rbd_dev; |
1878 | struct ceph_osd_client *osdc; | 1948 | struct ceph_osd_client *osdc; |
1879 | struct ceph_osd_request *osd_req; | 1949 | struct ceph_osd_request *osd_req; |
1950 | int num_osd_ops = 3; | ||
1880 | 1951 | ||
1881 | rbd_assert(obj_request_img_data_test(obj_request)); | 1952 | rbd_assert(obj_request_img_data_test(obj_request)); |
1882 | img_request = obj_request->img_request; | 1953 | img_request = obj_request->img_request; |
1883 | rbd_assert(img_request); | 1954 | rbd_assert(img_request); |
1884 | rbd_assert(img_request_write_test(img_request)); | 1955 | rbd_assert(img_request_write_test(img_request) || |
1956 | img_request_discard_test(img_request)); | ||
1957 | |||
1958 | if (img_request_discard_test(img_request)) | ||
1959 | num_osd_ops = 2; | ||
1885 | 1960 | ||
1886 | /* Allocate and initialize the request, for the three ops */ | 1961 | /* Allocate and initialize the request, for all the ops */ |
1887 | 1962 | ||
1888 | snapc = img_request->snapc; | 1963 | snapc = img_request->snapc; |
1889 | rbd_dev = img_request->rbd_dev; | 1964 | rbd_dev = img_request->rbd_dev; |
1890 | osdc = &rbd_dev->rbd_client->client->osdc; | 1965 | osdc = &rbd_dev->rbd_client->client->osdc; |
1891 | osd_req = ceph_osdc_alloc_request(osdc, snapc, 3, false, GFP_ATOMIC); | 1966 | osd_req = ceph_osdc_alloc_request(osdc, snapc, num_osd_ops, |
1967 | false, GFP_ATOMIC); | ||
1892 | if (!osd_req) | 1968 | if (!osd_req) |
1893 | return NULL; /* ENOMEM */ | 1969 | return NULL; /* ENOMEM */ |
1894 | 1970 | ||
@@ -2057,7 +2133,8 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev) | |||
2057 | static struct rbd_img_request *rbd_img_request_create( | 2133 | static struct rbd_img_request *rbd_img_request_create( |
2058 | struct rbd_device *rbd_dev, | 2134 | struct rbd_device *rbd_dev, |
2059 | u64 offset, u64 length, | 2135 | u64 offset, u64 length, |
2060 | bool write_request) | 2136 | enum obj_operation_type op_type, |
2137 | struct ceph_snap_context *snapc) | ||
2061 | { | 2138 | { |
2062 | struct rbd_img_request *img_request; | 2139 | struct rbd_img_request *img_request; |
2063 | 2140 | ||
@@ -2065,20 +2142,17 @@ static struct rbd_img_request *rbd_img_request_create( | |||
2065 | if (!img_request) | 2142 | if (!img_request) |
2066 | return NULL; | 2143 | return NULL; |
2067 | 2144 | ||
2068 | if (write_request) { | ||
2069 | down_read(&rbd_dev->header_rwsem); | ||
2070 | ceph_get_snap_context(rbd_dev->header.snapc); | ||
2071 | up_read(&rbd_dev->header_rwsem); | ||
2072 | } | ||
2073 | |||
2074 | img_request->rq = NULL; | 2145 | img_request->rq = NULL; |
2075 | img_request->rbd_dev = rbd_dev; | 2146 | img_request->rbd_dev = rbd_dev; |
2076 | img_request->offset = offset; | 2147 | img_request->offset = offset; |
2077 | img_request->length = length; | 2148 | img_request->length = length; |
2078 | img_request->flags = 0; | 2149 | img_request->flags = 0; |
2079 | if (write_request) { | 2150 | if (op_type == OBJ_OP_DISCARD) { |
2151 | img_request_discard_set(img_request); | ||
2152 | img_request->snapc = snapc; | ||
2153 | } else if (op_type == OBJ_OP_WRITE) { | ||
2080 | img_request_write_set(img_request); | 2154 | img_request_write_set(img_request); |
2081 | img_request->snapc = rbd_dev->header.snapc; | 2155 | img_request->snapc = snapc; |
2082 | } else { | 2156 | } else { |
2083 | img_request->snap_id = rbd_dev->spec->snap_id; | 2157 | img_request->snap_id = rbd_dev->spec->snap_id; |
2084 | } | 2158 | } |
@@ -2093,8 +2167,7 @@ static struct rbd_img_request *rbd_img_request_create( | |||
2093 | kref_init(&img_request->kref); | 2167 | kref_init(&img_request->kref); |
2094 | 2168 | ||
2095 | dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, | 2169 | dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, |
2096 | write_request ? "write" : "read", offset, length, | 2170 | obj_op_name(op_type), offset, length, img_request); |
2097 | img_request); | ||
2098 | 2171 | ||
2099 | return img_request; | 2172 | return img_request; |
2100 | } | 2173 | } |
@@ -2118,7 +2191,8 @@ static void rbd_img_request_destroy(struct kref *kref) | |||
2118 | rbd_dev_parent_put(img_request->rbd_dev); | 2191 | rbd_dev_parent_put(img_request->rbd_dev); |
2119 | } | 2192 | } |
2120 | 2193 | ||
2121 | if (img_request_write_test(img_request)) | 2194 | if (img_request_write_test(img_request) || |
2195 | img_request_discard_test(img_request)) | ||
2122 | ceph_put_snap_context(img_request->snapc); | 2196 | ceph_put_snap_context(img_request->snapc); |
2123 | 2197 | ||
2124 | kmem_cache_free(rbd_img_request_cache, img_request); | 2198 | kmem_cache_free(rbd_img_request_cache, img_request); |
@@ -2134,8 +2208,8 @@ static struct rbd_img_request *rbd_parent_request_create( | |||
2134 | rbd_assert(obj_request->img_request); | 2208 | rbd_assert(obj_request->img_request); |
2135 | rbd_dev = obj_request->img_request->rbd_dev; | 2209 | rbd_dev = obj_request->img_request->rbd_dev; |
2136 | 2210 | ||
2137 | parent_request = rbd_img_request_create(rbd_dev->parent, | 2211 | parent_request = rbd_img_request_create(rbd_dev->parent, img_offset, |
2138 | img_offset, length, false); | 2212 | length, OBJ_OP_READ, NULL); |
2139 | if (!parent_request) | 2213 | if (!parent_request) |
2140 | return NULL; | 2214 | return NULL; |
2141 | 2215 | ||
@@ -2176,11 +2250,18 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) | |||
2176 | result = obj_request->result; | 2250 | result = obj_request->result; |
2177 | if (result) { | 2251 | if (result) { |
2178 | struct rbd_device *rbd_dev = img_request->rbd_dev; | 2252 | struct rbd_device *rbd_dev = img_request->rbd_dev; |
2253 | enum obj_operation_type op_type; | ||
2254 | |||
2255 | if (img_request_discard_test(img_request)) | ||
2256 | op_type = OBJ_OP_DISCARD; | ||
2257 | else if (img_request_write_test(img_request)) | ||
2258 | op_type = OBJ_OP_WRITE; | ||
2259 | else | ||
2260 | op_type = OBJ_OP_READ; | ||
2179 | 2261 | ||
2180 | rbd_warn(rbd_dev, "%s %llx at %llx (%llx)", | 2262 | rbd_warn(rbd_dev, "%s %llx at %llx (%llx)", |
2181 | img_request_write_test(img_request) ? "write" : "read", | 2263 | obj_op_name(op_type), obj_request->length, |
2182 | obj_request->length, obj_request->img_offset, | 2264 | obj_request->img_offset, obj_request->offset); |
2183 | obj_request->offset); | ||
2184 | rbd_warn(rbd_dev, " result %d xferred %x", | 2265 | rbd_warn(rbd_dev, " result %d xferred %x", |
2185 | result, xferred); | 2266 | result, xferred); |
2186 | if (!img_request->result) | 2267 | if (!img_request->result) |
@@ -2245,6 +2326,67 @@ out: | |||
2245 | } | 2326 | } |
2246 | 2327 | ||
2247 | /* | 2328 | /* |
2329 | * Add individual osd ops to the given ceph_osd_request and prepare | ||
2330 | * them for submission. num_ops is the current number of | ||
2331 | * osd operations already to the object request. | ||
2332 | */ | ||
2333 | static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request, | ||
2334 | struct ceph_osd_request *osd_request, | ||
2335 | enum obj_operation_type op_type, | ||
2336 | unsigned int num_ops) | ||
2337 | { | ||
2338 | struct rbd_img_request *img_request = obj_request->img_request; | ||
2339 | struct rbd_device *rbd_dev = img_request->rbd_dev; | ||
2340 | u64 object_size = rbd_obj_bytes(&rbd_dev->header); | ||
2341 | u64 offset = obj_request->offset; | ||
2342 | u64 length = obj_request->length; | ||
2343 | u64 img_end; | ||
2344 | u16 opcode; | ||
2345 | |||
2346 | if (op_type == OBJ_OP_DISCARD) { | ||
2347 | if (!offset && length == object_size && | ||
2348 | (!img_request_layered_test(img_request) || | ||
2349 | !obj_request_overlaps_parent(obj_request))) { | ||
2350 | opcode = CEPH_OSD_OP_DELETE; | ||
2351 | } else if ((offset + length == object_size)) { | ||
2352 | opcode = CEPH_OSD_OP_TRUNCATE; | ||
2353 | } else { | ||
2354 | down_read(&rbd_dev->header_rwsem); | ||
2355 | img_end = rbd_dev->header.image_size; | ||
2356 | up_read(&rbd_dev->header_rwsem); | ||
2357 | |||
2358 | if (obj_request->img_offset + length == img_end) | ||
2359 | opcode = CEPH_OSD_OP_TRUNCATE; | ||
2360 | else | ||
2361 | opcode = CEPH_OSD_OP_ZERO; | ||
2362 | } | ||
2363 | } else if (op_type == OBJ_OP_WRITE) { | ||
2364 | opcode = CEPH_OSD_OP_WRITE; | ||
2365 | osd_req_op_alloc_hint_init(osd_request, num_ops, | ||
2366 | object_size, object_size); | ||
2367 | num_ops++; | ||
2368 | } else { | ||
2369 | opcode = CEPH_OSD_OP_READ; | ||
2370 | } | ||
2371 | |||
2372 | osd_req_op_extent_init(osd_request, num_ops, opcode, offset, length, | ||
2373 | 0, 0); | ||
2374 | if (obj_request->type == OBJ_REQUEST_BIO) | ||
2375 | osd_req_op_extent_osd_data_bio(osd_request, num_ops, | ||
2376 | obj_request->bio_list, length); | ||
2377 | else if (obj_request->type == OBJ_REQUEST_PAGES) | ||
2378 | osd_req_op_extent_osd_data_pages(osd_request, num_ops, | ||
2379 | obj_request->pages, length, | ||
2380 | offset & ~PAGE_MASK, false, false); | ||
2381 | |||
2382 | /* Discards are also writes */ | ||
2383 | if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) | ||
2384 | rbd_osd_req_format_write(obj_request); | ||
2385 | else | ||
2386 | rbd_osd_req_format_read(obj_request); | ||
2387 | } | ||
2388 | |||
2389 | /* | ||
2248 | * Split up an image request into one or more object requests, each | 2390 | * Split up an image request into one or more object requests, each |
2249 | * to a different object. The "type" parameter indicates whether | 2391 | * to a different object. The "type" parameter indicates whether |
2250 | * "data_desc" is the pointer to the head of a list of bio | 2392 | * "data_desc" is the pointer to the head of a list of bio |
@@ -2259,28 +2401,26 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, | |||
2259 | struct rbd_device *rbd_dev = img_request->rbd_dev; | 2401 | struct rbd_device *rbd_dev = img_request->rbd_dev; |
2260 | struct rbd_obj_request *obj_request = NULL; | 2402 | struct rbd_obj_request *obj_request = NULL; |
2261 | struct rbd_obj_request *next_obj_request; | 2403 | struct rbd_obj_request *next_obj_request; |
2262 | bool write_request = img_request_write_test(img_request); | ||
2263 | struct bio *bio_list = NULL; | 2404 | struct bio *bio_list = NULL; |
2264 | unsigned int bio_offset = 0; | 2405 | unsigned int bio_offset = 0; |
2265 | struct page **pages = NULL; | 2406 | struct page **pages = NULL; |
2407 | enum obj_operation_type op_type; | ||
2266 | u64 img_offset; | 2408 | u64 img_offset; |
2267 | u64 resid; | 2409 | u64 resid; |
2268 | u16 opcode; | ||
2269 | 2410 | ||
2270 | dout("%s: img %p type %d data_desc %p\n", __func__, img_request, | 2411 | dout("%s: img %p type %d data_desc %p\n", __func__, img_request, |
2271 | (int)type, data_desc); | 2412 | (int)type, data_desc); |
2272 | 2413 | ||
2273 | opcode = write_request ? CEPH_OSD_OP_WRITE : CEPH_OSD_OP_READ; | ||
2274 | img_offset = img_request->offset; | 2414 | img_offset = img_request->offset; |
2275 | resid = img_request->length; | 2415 | resid = img_request->length; |
2276 | rbd_assert(resid > 0); | 2416 | rbd_assert(resid > 0); |
2417 | op_type = rbd_img_request_op_type(img_request); | ||
2277 | 2418 | ||
2278 | if (type == OBJ_REQUEST_BIO) { | 2419 | if (type == OBJ_REQUEST_BIO) { |
2279 | bio_list = data_desc; | 2420 | bio_list = data_desc; |
2280 | rbd_assert(img_offset == | 2421 | rbd_assert(img_offset == |
2281 | bio_list->bi_iter.bi_sector << SECTOR_SHIFT); | 2422 | bio_list->bi_iter.bi_sector << SECTOR_SHIFT); |
2282 | } else { | 2423 | } else if (type == OBJ_REQUEST_PAGES) { |
2283 | rbd_assert(type == OBJ_REQUEST_PAGES); | ||
2284 | pages = data_desc; | 2424 | pages = data_desc; |
2285 | } | 2425 | } |
2286 | 2426 | ||
@@ -2289,7 +2429,6 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, | |||
2289 | const char *object_name; | 2429 | const char *object_name; |
2290 | u64 offset; | 2430 | u64 offset; |
2291 | u64 length; | 2431 | u64 length; |
2292 | unsigned int which = 0; | ||
2293 | 2432 | ||
2294 | object_name = rbd_segment_name(rbd_dev, img_offset); | 2433 | object_name = rbd_segment_name(rbd_dev, img_offset); |
2295 | if (!object_name) | 2434 | if (!object_name) |
@@ -2321,7 +2460,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, | |||
2321 | GFP_ATOMIC); | 2460 | GFP_ATOMIC); |
2322 | if (!obj_request->bio_list) | 2461 | if (!obj_request->bio_list) |
2323 | goto out_unwind; | 2462 | goto out_unwind; |
2324 | } else { | 2463 | } else if (type == OBJ_REQUEST_PAGES) { |
2325 | unsigned int page_count; | 2464 | unsigned int page_count; |
2326 | 2465 | ||
2327 | obj_request->pages = pages; | 2466 | obj_request->pages = pages; |
@@ -2332,38 +2471,19 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, | |||
2332 | pages += page_count; | 2471 | pages += page_count; |
2333 | } | 2472 | } |
2334 | 2473 | ||
2335 | osd_req = rbd_osd_req_create(rbd_dev, write_request, | 2474 | osd_req = rbd_osd_req_create(rbd_dev, op_type, |
2336 | (write_request ? 2 : 1), | 2475 | (op_type == OBJ_OP_WRITE) ? 2 : 1, |
2337 | obj_request); | 2476 | obj_request); |
2338 | if (!osd_req) | 2477 | if (!osd_req) |
2339 | goto out_unwind; | 2478 | goto out_unwind; |
2479 | |||
2340 | obj_request->osd_req = osd_req; | 2480 | obj_request->osd_req = osd_req; |
2341 | obj_request->callback = rbd_img_obj_callback; | 2481 | obj_request->callback = rbd_img_obj_callback; |
2342 | rbd_img_request_get(img_request); | 2482 | obj_request->img_offset = img_offset; |
2343 | |||
2344 | if (write_request) { | ||
2345 | osd_req_op_alloc_hint_init(osd_req, which, | ||
2346 | rbd_obj_bytes(&rbd_dev->header), | ||
2347 | rbd_obj_bytes(&rbd_dev->header)); | ||
2348 | which++; | ||
2349 | } | ||
2350 | |||
2351 | osd_req_op_extent_init(osd_req, which, opcode, offset, length, | ||
2352 | 0, 0); | ||
2353 | if (type == OBJ_REQUEST_BIO) | ||
2354 | osd_req_op_extent_osd_data_bio(osd_req, which, | ||
2355 | obj_request->bio_list, length); | ||
2356 | else | ||
2357 | osd_req_op_extent_osd_data_pages(osd_req, which, | ||
2358 | obj_request->pages, length, | ||
2359 | offset & ~PAGE_MASK, false, false); | ||
2360 | 2483 | ||
2361 | if (write_request) | 2484 | rbd_img_obj_request_fill(obj_request, osd_req, op_type, 0); |
2362 | rbd_osd_req_format_write(obj_request); | ||
2363 | else | ||
2364 | rbd_osd_req_format_read(obj_request); | ||
2365 | 2485 | ||
2366 | obj_request->img_offset = img_offset; | 2486 | rbd_img_request_get(img_request); |
2367 | 2487 | ||
2368 | img_offset += length; | 2488 | img_offset += length; |
2369 | resid -= length; | 2489 | resid -= length; |
@@ -2386,7 +2506,8 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) | |||
2386 | struct page **pages; | 2506 | struct page **pages; |
2387 | u32 page_count; | 2507 | u32 page_count; |
2388 | 2508 | ||
2389 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO); | 2509 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO || |
2510 | obj_request->type == OBJ_REQUEST_NODATA); | ||
2390 | rbd_assert(obj_request_img_data_test(obj_request)); | 2511 | rbd_assert(obj_request_img_data_test(obj_request)); |
2391 | img_request = obj_request->img_request; | 2512 | img_request = obj_request->img_request; |
2392 | rbd_assert(img_request); | 2513 | rbd_assert(img_request); |
@@ -2424,11 +2545,10 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) | |||
2424 | struct ceph_osd_client *osdc; | 2545 | struct ceph_osd_client *osdc; |
2425 | struct rbd_device *rbd_dev; | 2546 | struct rbd_device *rbd_dev; |
2426 | struct page **pages; | 2547 | struct page **pages; |
2548 | enum obj_operation_type op_type; | ||
2427 | u32 page_count; | 2549 | u32 page_count; |
2428 | int img_result; | 2550 | int img_result; |
2429 | u64 parent_length; | 2551 | u64 parent_length; |
2430 | u64 offset; | ||
2431 | u64 length; | ||
2432 | 2552 | ||
2433 | rbd_assert(img_request_child_test(img_request)); | 2553 | rbd_assert(img_request_child_test(img_request)); |
2434 | 2554 | ||
@@ -2492,26 +2612,10 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) | |||
2492 | osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0, | 2612 | osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0, |
2493 | false, false); | 2613 | false, false); |
2494 | 2614 | ||
2495 | /* Then the hint op */ | 2615 | /* Add the other op(s) */ |
2496 | 2616 | ||
2497 | osd_req_op_alloc_hint_init(osd_req, 1, rbd_obj_bytes(&rbd_dev->header), | 2617 | op_type = rbd_img_request_op_type(orig_request->img_request); |
2498 | rbd_obj_bytes(&rbd_dev->header)); | 2618 | rbd_img_obj_request_fill(orig_request, osd_req, op_type, 1); |
2499 | |||
2500 | /* And the original write request op */ | ||
2501 | |||
2502 | offset = orig_request->offset; | ||
2503 | length = orig_request->length; | ||
2504 | osd_req_op_extent_init(osd_req, 2, CEPH_OSD_OP_WRITE, | ||
2505 | offset, length, 0, 0); | ||
2506 | if (orig_request->type == OBJ_REQUEST_BIO) | ||
2507 | osd_req_op_extent_osd_data_bio(osd_req, 2, | ||
2508 | orig_request->bio_list, length); | ||
2509 | else | ||
2510 | osd_req_op_extent_osd_data_pages(osd_req, 2, | ||
2511 | orig_request->pages, length, | ||
2512 | offset & ~PAGE_MASK, false, false); | ||
2513 | |||
2514 | rbd_osd_req_format_write(orig_request); | ||
2515 | 2619 | ||
2516 | /* All set, send it off. */ | 2620 | /* All set, send it off. */ |
2517 | 2621 | ||
@@ -2728,7 +2832,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) | |||
2728 | 2832 | ||
2729 | rbd_assert(obj_request->img_request); | 2833 | rbd_assert(obj_request->img_request); |
2730 | rbd_dev = obj_request->img_request->rbd_dev; | 2834 | rbd_dev = obj_request->img_request->rbd_dev; |
2731 | stat_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, | 2835 | stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, |
2732 | stat_request); | 2836 | stat_request); |
2733 | if (!stat_request->osd_req) | 2837 | if (!stat_request->osd_req) |
2734 | goto out; | 2838 | goto out; |
@@ -2748,11 +2852,10 @@ out: | |||
2748 | return ret; | 2852 | return ret; |
2749 | } | 2853 | } |
2750 | 2854 | ||
2751 | static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) | 2855 | static bool img_obj_request_simple(struct rbd_obj_request *obj_request) |
2752 | { | 2856 | { |
2753 | struct rbd_img_request *img_request; | 2857 | struct rbd_img_request *img_request; |
2754 | struct rbd_device *rbd_dev; | 2858 | struct rbd_device *rbd_dev; |
2755 | bool known; | ||
2756 | 2859 | ||
2757 | rbd_assert(obj_request_img_data_test(obj_request)); | 2860 | rbd_assert(obj_request_img_data_test(obj_request)); |
2758 | 2861 | ||
@@ -2760,22 +2863,44 @@ static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) | |||
2760 | rbd_assert(img_request); | 2863 | rbd_assert(img_request); |
2761 | rbd_dev = img_request->rbd_dev; | 2864 | rbd_dev = img_request->rbd_dev; |
2762 | 2865 | ||
2866 | /* Reads */ | ||
2867 | if (!img_request_write_test(img_request) && | ||
2868 | !img_request_discard_test(img_request)) | ||
2869 | return true; | ||
2870 | |||
2871 | /* Non-layered writes */ | ||
2872 | if (!img_request_layered_test(img_request)) | ||
2873 | return true; | ||
2874 | |||
2875 | /* | ||
2876 | * Layered writes outside of the parent overlap range don't | ||
2877 | * share any data with the parent. | ||
2878 | */ | ||
2879 | if (!obj_request_overlaps_parent(obj_request)) | ||
2880 | return true; | ||
2881 | |||
2763 | /* | 2882 | /* |
2764 | * Only writes to layered images need special handling. | 2883 | * Entire-object layered writes - we will overwrite whatever |
2765 | * Reads and non-layered writes are simple object requests. | 2884 | * parent data there is anyway. |
2766 | * Layered writes that start beyond the end of the overlap | ||
2767 | * with the parent have no parent data, so they too are | ||
2768 | * simple object requests. Finally, if the target object is | ||
2769 | * known to already exist, its parent data has already been | ||
2770 | * copied, so a write to the object can also be handled as a | ||
2771 | * simple object request. | ||
2772 | */ | 2885 | */ |
2773 | if (!img_request_write_test(img_request) || | 2886 | if (!obj_request->offset && |
2774 | !img_request_layered_test(img_request) || | 2887 | obj_request->length == rbd_obj_bytes(&rbd_dev->header)) |
2775 | !obj_request_overlaps_parent(obj_request) || | 2888 | return true; |
2776 | ((known = obj_request_known_test(obj_request)) && | 2889 | |
2777 | obj_request_exists_test(obj_request))) { | 2890 | /* |
2891 | * If the object is known to already exist, its parent data has | ||
2892 | * already been copied. | ||
2893 | */ | ||
2894 | if (obj_request_known_test(obj_request) && | ||
2895 | obj_request_exists_test(obj_request)) | ||
2896 | return true; | ||
2897 | |||
2898 | return false; | ||
2899 | } | ||
2778 | 2900 | ||
2901 | static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) | ||
2902 | { | ||
2903 | if (img_obj_request_simple(obj_request)) { | ||
2779 | struct rbd_device *rbd_dev; | 2904 | struct rbd_device *rbd_dev; |
2780 | struct ceph_osd_client *osdc; | 2905 | struct ceph_osd_client *osdc; |
2781 | 2906 | ||
@@ -2791,7 +2916,7 @@ static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) | |||
2791 | * start by reading the data for the full target object from | 2916 | * start by reading the data for the full target object from |
2792 | * the parent so we can use it for a copyup to the target. | 2917 | * the parent so we can use it for a copyup to the target. |
2793 | */ | 2918 | */ |
2794 | if (known) | 2919 | if (obj_request_known_test(obj_request)) |
2795 | return rbd_img_obj_parent_read_full(obj_request); | 2920 | return rbd_img_obj_parent_read_full(obj_request); |
2796 | 2921 | ||
2797 | /* We don't know whether the target exists. Go find out. */ | 2922 | /* We don't know whether the target exists. Go find out. */ |
@@ -2932,7 +3057,7 @@ static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id) | |||
2932 | return -ENOMEM; | 3057 | return -ENOMEM; |
2933 | 3058 | ||
2934 | ret = -ENOMEM; | 3059 | ret = -ENOMEM; |
2935 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, | 3060 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, |
2936 | obj_request); | 3061 | obj_request); |
2937 | if (!obj_request->osd_req) | 3062 | if (!obj_request->osd_req) |
2938 | goto out; | 3063 | goto out; |
@@ -2995,7 +3120,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper( | |||
2995 | if (!obj_request) | 3120 | if (!obj_request) |
2996 | return ERR_PTR(-ENOMEM); | 3121 | return ERR_PTR(-ENOMEM); |
2997 | 3122 | ||
2998 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1, | 3123 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_WRITE, 1, |
2999 | obj_request); | 3124 | obj_request); |
3000 | if (!obj_request->osd_req) { | 3125 | if (!obj_request->osd_req) { |
3001 | ret = -ENOMEM; | 3126 | ret = -ENOMEM; |
@@ -3133,7 +3258,7 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, | |||
3133 | obj_request->pages = pages; | 3258 | obj_request->pages = pages; |
3134 | obj_request->page_count = page_count; | 3259 | obj_request->page_count = page_count; |
3135 | 3260 | ||
3136 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, | 3261 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, |
3137 | obj_request); | 3262 | obj_request); |
3138 | if (!obj_request->osd_req) | 3263 | if (!obj_request->osd_req) |
3139 | goto out; | 3264 | goto out; |
@@ -3183,11 +3308,20 @@ out: | |||
3183 | static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) | 3308 | static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) |
3184 | { | 3309 | { |
3185 | struct rbd_img_request *img_request; | 3310 | struct rbd_img_request *img_request; |
3311 | struct ceph_snap_context *snapc = NULL; | ||
3186 | u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; | 3312 | u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; |
3187 | u64 length = blk_rq_bytes(rq); | 3313 | u64 length = blk_rq_bytes(rq); |
3188 | bool wr = rq_data_dir(rq) == WRITE; | 3314 | enum obj_operation_type op_type; |
3315 | u64 mapping_size; | ||
3189 | int result; | 3316 | int result; |
3190 | 3317 | ||
3318 | if (rq->cmd_flags & REQ_DISCARD) | ||
3319 | op_type = OBJ_OP_DISCARD; | ||
3320 | else if (rq->cmd_flags & REQ_WRITE) | ||
3321 | op_type = OBJ_OP_WRITE; | ||
3322 | else | ||
3323 | op_type = OBJ_OP_READ; | ||
3324 | |||
3191 | /* Ignore/skip any zero-length requests */ | 3325 | /* Ignore/skip any zero-length requests */ |
3192 | 3326 | ||
3193 | if (!length) { | 3327 | if (!length) { |
@@ -3196,9 +3330,9 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) | |||
3196 | goto err_rq; | 3330 | goto err_rq; |
3197 | } | 3331 | } |
3198 | 3332 | ||
3199 | /* Disallow writes to a read-only device */ | 3333 | /* Only reads are allowed to a read-only device */ |
3200 | 3334 | ||
3201 | if (wr) { | 3335 | if (op_type != OBJ_OP_READ) { |
3202 | if (rbd_dev->mapping.read_only) { | 3336 | if (rbd_dev->mapping.read_only) { |
3203 | result = -EROFS; | 3337 | result = -EROFS; |
3204 | goto err_rq; | 3338 | goto err_rq; |
@@ -3226,21 +3360,35 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) | |||
3226 | goto err_rq; /* Shouldn't happen */ | 3360 | goto err_rq; /* Shouldn't happen */ |
3227 | } | 3361 | } |
3228 | 3362 | ||
3229 | if (offset + length > rbd_dev->mapping.size) { | 3363 | down_read(&rbd_dev->header_rwsem); |
3364 | mapping_size = rbd_dev->mapping.size; | ||
3365 | if (op_type != OBJ_OP_READ) { | ||
3366 | snapc = rbd_dev->header.snapc; | ||
3367 | ceph_get_snap_context(snapc); | ||
3368 | } | ||
3369 | up_read(&rbd_dev->header_rwsem); | ||
3370 | |||
3371 | if (offset + length > mapping_size) { | ||
3230 | rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", offset, | 3372 | rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", offset, |
3231 | length, rbd_dev->mapping.size); | 3373 | length, mapping_size); |
3232 | result = -EIO; | 3374 | result = -EIO; |
3233 | goto err_rq; | 3375 | goto err_rq; |
3234 | } | 3376 | } |
3235 | 3377 | ||
3236 | img_request = rbd_img_request_create(rbd_dev, offset, length, wr); | 3378 | img_request = rbd_img_request_create(rbd_dev, offset, length, op_type, |
3379 | snapc); | ||
3237 | if (!img_request) { | 3380 | if (!img_request) { |
3238 | result = -ENOMEM; | 3381 | result = -ENOMEM; |
3239 | goto err_rq; | 3382 | goto err_rq; |
3240 | } | 3383 | } |
3241 | img_request->rq = rq; | 3384 | img_request->rq = rq; |
3242 | 3385 | ||
3243 | result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, rq->bio); | 3386 | if (op_type == OBJ_OP_DISCARD) |
3387 | result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA, | ||
3388 | NULL); | ||
3389 | else | ||
3390 | result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, | ||
3391 | rq->bio); | ||
3244 | if (result) | 3392 | if (result) |
3245 | goto err_img_request; | 3393 | goto err_img_request; |
3246 | 3394 | ||
@@ -3255,7 +3403,9 @@ err_img_request: | |||
3255 | err_rq: | 3403 | err_rq: |
3256 | if (result) | 3404 | if (result) |
3257 | rbd_warn(rbd_dev, "%s %llx at %llx result %d", | 3405 | rbd_warn(rbd_dev, "%s %llx at %llx result %d", |
3258 | wr ? "write" : "read", length, offset, result); | 3406 | obj_op_name(op_type), length, offset, result); |
3407 | if (snapc) | ||
3408 | ceph_put_snap_context(snapc); | ||
3259 | blk_end_request_all(rq, result); | 3409 | blk_end_request_all(rq, result); |
3260 | } | 3410 | } |
3261 | 3411 | ||
@@ -3393,7 +3543,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, | |||
3393 | obj_request->pages = pages; | 3543 | obj_request->pages = pages; |
3394 | obj_request->page_count = page_count; | 3544 | obj_request->page_count = page_count; |
3395 | 3545 | ||
3396 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, | 3546 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, |
3397 | obj_request); | 3547 | obj_request); |
3398 | if (!obj_request->osd_req) | 3548 | if (!obj_request->osd_req) |
3399 | goto out; | 3549 | goto out; |
@@ -3610,6 +3760,13 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
3610 | blk_queue_io_min(q, segment_size); | 3760 | blk_queue_io_min(q, segment_size); |
3611 | blk_queue_io_opt(q, segment_size); | 3761 | blk_queue_io_opt(q, segment_size); |
3612 | 3762 | ||
3763 | /* enable the discard support */ | ||
3764 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); | ||
3765 | q->limits.discard_granularity = segment_size; | ||
3766 | q->limits.discard_alignment = segment_size; | ||
3767 | q->limits.max_discard_sectors = segment_size / SECTOR_SIZE; | ||
3768 | q->limits.discard_zeroes_data = 1; | ||
3769 | |||
3613 | blk_queue_merge_bvec(q, rbd_merge_bvec); | 3770 | blk_queue_merge_bvec(q, rbd_merge_bvec); |
3614 | disk->queue = q; | 3771 | disk->queue = q; |
3615 | 3772 | ||
@@ -4924,7 +5081,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) | |||
4924 | ret = image_id ? 0 : -ENOMEM; | 5081 | ret = image_id ? 0 : -ENOMEM; |
4925 | if (!ret) | 5082 | if (!ret) |
4926 | rbd_dev->image_format = 1; | 5083 | rbd_dev->image_format = 1; |
4927 | } else if (ret > sizeof (__le32)) { | 5084 | } else if (ret >= 0) { |
4928 | void *p = response; | 5085 | void *p = response; |
4929 | 5086 | ||
4930 | image_id = ceph_extract_encoded_string(&p, p + ret, | 5087 | image_id = ceph_extract_encoded_string(&p, p + ret, |
@@ -4932,8 +5089,6 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) | |||
4932 | ret = PTR_ERR_OR_ZERO(image_id); | 5089 | ret = PTR_ERR_OR_ZERO(image_id); |
4933 | if (!ret) | 5090 | if (!ret) |
4934 | rbd_dev->image_format = 2; | 5091 | rbd_dev->image_format = 2; |
4935 | } else { | ||
4936 | ret = -EINVAL; | ||
4937 | } | 5092 | } |
4938 | 5093 | ||
4939 | if (!ret) { | 5094 | if (!ret) { |
@@ -5087,7 +5242,8 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) | |||
5087 | set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); | 5242 | set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); |
5088 | set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only); | 5243 | set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only); |
5089 | 5244 | ||
5090 | rbd_dev->rq_wq = alloc_workqueue("%s", 0, 0, rbd_dev->disk->disk_name); | 5245 | rbd_dev->rq_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, |
5246 | rbd_dev->disk->disk_name); | ||
5091 | if (!rbd_dev->rq_wq) { | 5247 | if (!rbd_dev->rq_wq) { |
5092 | ret = -ENOMEM; | 5248 | ret = -ENOMEM; |
5093 | goto err_out_mapping; | 5249 | goto err_out_mapping; |