diff options
| -rw-r--r-- | MAINTAINERS | 11 | ||||
| -rw-r--r-- | drivers/block/Kconfig | 17 | ||||
| -rw-r--r-- | drivers/block/Makefile | 1 | ||||
| -rw-r--r-- | drivers/block/rbd.c | 1841 | ||||
| -rw-r--r-- | drivers/block/rbd_types.h | 73 | ||||
| -rw-r--r-- | drivers/block/virtio_blk.c | 17 | ||||
| -rw-r--r-- | drivers/char/virtio_console.c | 240 | ||||
| -rw-r--r-- | fs/ceph/Kconfig | 14 | ||||
| -rw-r--r-- | fs/ceph/Makefile | 11 | ||||
| -rw-r--r-- | fs/ceph/README | 20 | ||||
| -rw-r--r-- | fs/ceph/addr.c | 65 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 50 | ||||
| -rw-r--r-- | fs/ceph/ceph_frag.c | 3 | ||||
| -rw-r--r-- | fs/ceph/debugfs.c | 406 | ||||
| -rw-r--r-- | fs/ceph/dir.c | 97 | ||||
| -rw-r--r-- | fs/ceph/export.c | 5 | ||||
| -rw-r--r-- | fs/ceph/file.c | 207 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 19 | ||||
| -rw-r--r-- | fs/ceph/ioctl.c | 77 | ||||
| -rw-r--r-- | fs/ceph/ioctl.h | 4 | ||||
| -rw-r--r-- | fs/ceph/locks.c | 23 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 129 | ||||
| -rw-r--r-- | fs/ceph/mds_client.h | 20 | ||||
| -rw-r--r-- | fs/ceph/mdsmap.c | 11 | ||||
| -rw-r--r-- | fs/ceph/pagelist.c | 63 | ||||
| -rw-r--r-- | fs/ceph/snap.c | 10 | ||||
| -rw-r--r-- | fs/ceph/strings.c (renamed from fs/ceph/ceph_strings.c) | 82 | ||||
| -rw-r--r-- | fs/ceph/super.c | 1154 | ||||
| -rw-r--r-- | fs/ceph/super.h | 400 | ||||
| -rw-r--r-- | fs/ceph/xattr.c | 18 | ||||
| -rw-r--r-- | fs/gfs2/Kconfig | 2 | ||||
| -rw-r--r-- | fs/gfs2/aops.c | 24 | ||||
| -rw-r--r-- | fs/gfs2/bmap.c | 255 | ||||
| -rw-r--r-- | fs/gfs2/bmap.h | 20 | ||||
| -rw-r--r-- | fs/gfs2/dentry.c | 2 | ||||
| -rw-r--r-- | fs/gfs2/dir.c | 31 | ||||
| -rw-r--r-- | fs/gfs2/dir.h | 34 | ||||
| -rw-r--r-- | fs/gfs2/export.c | 9 | ||||
| -rw-r--r-- | fs/gfs2/file.c | 6 | ||||
| -rw-r--r-- | fs/gfs2/glock.c | 23 | ||||
| -rw-r--r-- | fs/gfs2/glock.h | 2 | ||||
| -rw-r--r-- | fs/gfs2/glops.c | 6 | ||||
| -rw-r--r-- | fs/gfs2/incore.h | 8 | ||||
| -rw-r--r-- | fs/gfs2/inode.c | 9 | ||||
| -rw-r--r-- | fs/gfs2/inode.h | 15 | ||||
| -rw-r--r-- | fs/gfs2/lock_dlm.c | 4 | ||||
| -rw-r--r-- | fs/gfs2/main.c | 6 | ||||
| -rw-r--r-- | fs/gfs2/ops_fstype.c | 79 | ||||
| -rw-r--r-- | fs/gfs2/ops_inode.c | 326 | ||||
| -rw-r--r-- | fs/gfs2/quota.c | 16 | ||||
| -rw-r--r-- | fs/gfs2/recovery.c | 15 | ||||
| -rw-r--r-- | fs/gfs2/rgrp.c | 50 | ||||
| -rw-r--r-- | fs/gfs2/rgrp.h | 8 | ||||
| -rw-r--r-- | fs/gfs2/super.c | 26 | ||||
| -rw-r--r-- | fs/gfs2/sys.c | 22 | ||||
| -rw-r--r-- | fs/gfs2/trace_gfs2.h | 3 | ||||
| -rw-r--r-- | fs/gfs2/trans.h | 9 | ||||
| -rw-r--r-- | fs/gfs2/xattr.c | 2 | ||||
| -rw-r--r-- | fs/hfsplus/bfind.c | 17 | ||||
| -rw-r--r-- | fs/hfsplus/bitmap.c | 20 | ||||
| -rw-r--r-- | fs/hfsplus/brec.c | 29 | ||||
| -rw-r--r-- | fs/hfsplus/btree.c | 67 | ||||
| -rw-r--r-- | fs/hfsplus/catalog.c | 50 | ||||
| -rw-r--r-- | fs/hfsplus/dir.c | 201 | ||||
| -rw-r--r-- | fs/hfsplus/extents.c | 223 | ||||
| -rw-r--r-- | fs/hfsplus/hfsplus_fs.h | 85 | ||||
| -rw-r--r-- | fs/hfsplus/hfsplus_raw.h | 3 | ||||
| -rw-r--r-- | fs/hfsplus/inode.c | 185 | ||||
| -rw-r--r-- | fs/hfsplus/ioctl.c | 153 | ||||
| -rw-r--r-- | fs/hfsplus/options.c | 10 | ||||
| -rw-r--r-- | fs/hfsplus/part_tbl.c | 5 | ||||
| -rw-r--r-- | fs/hfsplus/super.c | 310 | ||||
| -rw-r--r-- | fs/hfsplus/unicode.c | 16 | ||||
| -rw-r--r-- | fs/hfsplus/wrapper.c | 40 | ||||
| -rw-r--r-- | include/linux/ceph/auth.h (renamed from fs/ceph/auth.h) | 4 | ||||
| -rw-r--r-- | include/linux/ceph/buffer.h (renamed from fs/ceph/buffer.h) | 0 | ||||
| -rw-r--r-- | include/linux/ceph/ceph_debug.h (renamed from fs/ceph/ceph_debug.h) | 5 | ||||
| -rw-r--r-- | include/linux/ceph/ceph_frag.h (renamed from fs/ceph/ceph_frag.h) | 0 | ||||
| -rw-r--r-- | include/linux/ceph/ceph_fs.h (renamed from fs/ceph/ceph_fs.h) | 1 | ||||
| -rw-r--r-- | include/linux/ceph/ceph_hash.h (renamed from fs/ceph/ceph_hash.h) | 0 | ||||
| -rw-r--r-- | include/linux/ceph/debugfs.h | 33 | ||||
| -rw-r--r-- | include/linux/ceph/decode.h (renamed from fs/ceph/decode.h) | 5 | ||||
| -rw-r--r-- | include/linux/ceph/libceph.h | 249 | ||||
| -rw-r--r-- | include/linux/ceph/mdsmap.h (renamed from fs/ceph/mdsmap.h) | 0 | ||||
| -rw-r--r-- | include/linux/ceph/messenger.h (renamed from fs/ceph/messenger.h) | 12 | ||||
| -rw-r--r-- | include/linux/ceph/mon_client.h (renamed from fs/ceph/mon_client.h) | 1 | ||||
| -rw-r--r-- | include/linux/ceph/msgpool.h (renamed from fs/ceph/msgpool.h) | 0 | ||||
| -rw-r--r-- | include/linux/ceph/msgr.h (renamed from fs/ceph/msgr.h) | 0 | ||||
| -rw-r--r-- | include/linux/ceph/osd_client.h (renamed from fs/ceph/osd_client.h) | 67 | ||||
| -rw-r--r-- | include/linux/ceph/osdmap.h (renamed from fs/ceph/osdmap.h) | 4 | ||||
| -rw-r--r-- | include/linux/ceph/pagelist.h (renamed from fs/ceph/pagelist.h) | 23 | ||||
| -rw-r--r-- | include/linux/ceph/rados.h (renamed from fs/ceph/rados.h) | 0 | ||||
| -rw-r--r-- | include/linux/ceph/types.h (renamed from fs/ceph/types.h) | 0 | ||||
| -rw-r--r-- | include/linux/crush/crush.h (renamed from fs/ceph/crush/crush.h) | 0 | ||||
| -rw-r--r-- | include/linux/crush/hash.h (renamed from fs/ceph/crush/hash.h) | 0 | ||||
| -rw-r--r-- | include/linux/crush/mapper.h (renamed from fs/ceph/crush/mapper.h) | 0 | ||||
| -rw-r--r-- | net/Kconfig | 1 | ||||
| -rw-r--r-- | net/Makefile | 1 | ||||
| -rw-r--r-- | net/ceph/Kconfig | 28 | ||||
| -rw-r--r-- | net/ceph/Makefile | 37 | ||||
| -rw-r--r-- | net/ceph/armor.c (renamed from fs/ceph/armor.c) | 0 | ||||
| -rw-r--r-- | net/ceph/auth.c (renamed from fs/ceph/auth.c) | 10 | ||||
| -rw-r--r-- | net/ceph/auth_none.c (renamed from fs/ceph/auth_none.c) | 7 | ||||
| -rw-r--r-- | net/ceph/auth_none.h (renamed from fs/ceph/auth_none.h) | 3 | ||||
| -rw-r--r-- | net/ceph/auth_x.c (renamed from fs/ceph/auth_x.c) | 9 | ||||
| -rw-r--r-- | net/ceph/auth_x.h (renamed from fs/ceph/auth_x.h) | 3 | ||||
| -rw-r--r-- | net/ceph/auth_x_protocol.h (renamed from fs/ceph/auth_x_protocol.h) | 0 | ||||
| -rw-r--r-- | net/ceph/buffer.c (renamed from fs/ceph/buffer.c) | 9 | ||||
| -rw-r--r-- | net/ceph/ceph_common.c | 529 | ||||
| -rw-r--r-- | net/ceph/ceph_fs.c (renamed from fs/ceph/ceph_fs.c) | 5 | ||||
| -rw-r--r-- | net/ceph/ceph_hash.c (renamed from fs/ceph/ceph_hash.c) | 2 | ||||
| -rw-r--r-- | net/ceph/ceph_strings.c | 84 | ||||
| -rw-r--r-- | net/ceph/crush/crush.c (renamed from fs/ceph/crush/crush.c) | 2 | ||||
| -rw-r--r-- | net/ceph/crush/hash.c (renamed from fs/ceph/crush/hash.c) | 2 | ||||
| -rw-r--r-- | net/ceph/crush/mapper.c (renamed from fs/ceph/crush/mapper.c) | 4 | ||||
| -rw-r--r-- | net/ceph/crypto.c (renamed from fs/ceph/crypto.c) | 4 | ||||
| -rw-r--r-- | net/ceph/crypto.h (renamed from fs/ceph/crypto.h) | 4 | ||||
| -rw-r--r-- | net/ceph/debugfs.c | 267 | ||||
| -rw-r--r-- | net/ceph/messenger.c (renamed from fs/ceph/messenger.c) | 296 | ||||
| -rw-r--r-- | net/ceph/mon_client.c (renamed from fs/ceph/mon_client.c) | 73 | ||||
| -rw-r--r-- | net/ceph/msgpool.c (renamed from fs/ceph/msgpool.c) | 4 | ||||
| -rw-r--r-- | net/ceph/osd_client.c (renamed from fs/ceph/osd_client.c) | 400 | ||||
| -rw-r--r-- | net/ceph/osdmap.c (renamed from fs/ceph/osdmap.c) | 30 | ||||
| -rw-r--r-- | net/ceph/pagelist.c | 154 | ||||
| -rw-r--r-- | net/ceph/pagevec.c | 223 |
125 files changed, 6971 insertions, 3138 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index f2a2b8e647c5..3d4179fbc526 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -1527,6 +1527,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git | |||
| 1527 | S: Supported | 1527 | S: Supported |
| 1528 | F: Documentation/filesystems/ceph.txt | 1528 | F: Documentation/filesystems/ceph.txt |
| 1529 | F: fs/ceph | 1529 | F: fs/ceph |
| 1530 | F: net/ceph | ||
| 1531 | F: include/linux/ceph | ||
| 1530 | 1532 | ||
| 1531 | CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: | 1533 | CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: |
| 1532 | M: David Vrabel <david.vrabel@csr.com> | 1534 | M: David Vrabel <david.vrabel@csr.com> |
| @@ -4805,6 +4807,15 @@ F: fs/qnx4/ | |||
| 4805 | F: include/linux/qnx4_fs.h | 4807 | F: include/linux/qnx4_fs.h |
| 4806 | F: include/linux/qnxtypes.h | 4808 | F: include/linux/qnxtypes.h |
| 4807 | 4809 | ||
| 4810 | RADOS BLOCK DEVICE (RBD) | ||
| 4811 | F: include/linux/qnxtypes.h | ||
| 4812 | M: Yehuda Sadeh <yehuda@hq.newdream.net> | ||
| 4813 | M: Sage Weil <sage@newdream.net> | ||
| 4814 | M: ceph-devel@vger.kernel.org | ||
| 4815 | S: Supported | ||
| 4816 | F: drivers/block/rbd.c | ||
| 4817 | F: drivers/block/rbd_types.h | ||
| 4818 | |||
| 4808 | RADEON FRAMEBUFFER DISPLAY DRIVER | 4819 | RADEON FRAMEBUFFER DISPLAY DRIVER |
| 4809 | M: Benjamin Herrenschmidt <benh@kernel.crashing.org> | 4820 | M: Benjamin Herrenschmidt <benh@kernel.crashing.org> |
| 4810 | L: linux-fbdev@vger.kernel.org | 4821 | L: linux-fbdev@vger.kernel.org |
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index de277689da61..4b9359a6f6ca 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
| @@ -488,4 +488,21 @@ config BLK_DEV_HD | |||
| 488 | 488 | ||
| 489 | If unsure, say N. | 489 | If unsure, say N. |
| 490 | 490 | ||
| 491 | config BLK_DEV_RBD | ||
| 492 | tristate "Rados block device (RBD)" | ||
| 493 | depends on INET && EXPERIMENTAL && BLOCK | ||
| 494 | select CEPH_LIB | ||
| 495 | select LIBCRC32C | ||
| 496 | select CRYPTO_AES | ||
| 497 | select CRYPTO | ||
| 498 | default n | ||
| 499 | help | ||
| 500 | Say Y here if you want include the Rados block device, which stripes | ||
| 501 | a block device over objects stored in the Ceph distributed object | ||
| 502 | store. | ||
| 503 | |||
| 504 | More information at http://ceph.newdream.net/. | ||
| 505 | |||
| 506 | If unsure, say N. | ||
| 507 | |||
| 491 | endif # BLK_DEV | 508 | endif # BLK_DEV |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index aff5ac925c34..d7f463d6312d 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
| @@ -37,5 +37,6 @@ obj-$(CONFIG_BLK_DEV_HD) += hd.o | |||
| 37 | 37 | ||
| 38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o | 38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o |
| 39 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ | 39 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ |
| 40 | obj-$(CONFIG_BLK_DEV_RBD) += rbd.o | ||
| 40 | 41 | ||
| 41 | swim_mod-objs := swim.o swim_asm.o | 42 | swim_mod-objs := swim.o swim_asm.o |
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c new file mode 100644 index 000000000000..6ec9d53806c5 --- /dev/null +++ b/drivers/block/rbd.c | |||
| @@ -0,0 +1,1841 @@ | |||
| 1 | /* | ||
| 2 | rbd.c -- Export ceph rados objects as a Linux block device | ||
| 3 | |||
| 4 | |||
| 5 | based on drivers/block/osdblk.c: | ||
| 6 | |||
| 7 | Copyright 2009 Red Hat, Inc. | ||
| 8 | |||
| 9 | This program is free software; you can redistribute it and/or modify | ||
| 10 | it under the terms of the GNU General Public License as published by | ||
| 11 | the Free Software Foundation. | ||
| 12 | |||
| 13 | This program is distributed in the hope that it will be useful, | ||
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | GNU General Public License for more details. | ||
| 17 | |||
| 18 | You should have received a copy of the GNU General Public License | ||
| 19 | along with this program; see the file COPYING. If not, write to | ||
| 20 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | Instructions for use | ||
| 25 | -------------------- | ||
| 26 | |||
| 27 | 1) Map a Linux block device to an existing rbd image. | ||
| 28 | |||
| 29 | Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name] | ||
| 30 | |||
| 31 | $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add | ||
| 32 | |||
| 33 | The snapshot name can be "-" or omitted to map the image read/write. | ||
| 34 | |||
| 35 | 2) List all active blkdev<->object mappings. | ||
| 36 | |||
| 37 | In this example, we have performed step #1 twice, creating two blkdevs, | ||
| 38 | mapped to two separate rados objects in the rados rbd pool | ||
| 39 | |||
| 40 | $ cat /sys/class/rbd/list | ||
| 41 | #id major client_name pool name snap KB | ||
| 42 | 0 254 client4143 rbd foo - 1024000 | ||
| 43 | |||
| 44 | The columns, in order, are: | ||
| 45 | - blkdev unique id | ||
| 46 | - blkdev assigned major | ||
| 47 | - rados client id | ||
| 48 | - rados pool name | ||
| 49 | - rados block device name | ||
| 50 | - mapped snapshot ("-" if none) | ||
| 51 | - device size in KB | ||
| 52 | |||
| 53 | |||
| 54 | 3) Create a snapshot. | ||
| 55 | |||
| 56 | Usage: <blkdev id> <snapname> | ||
| 57 | |||
| 58 | $ echo "0 mysnap" > /sys/class/rbd/snap_create | ||
| 59 | |||
| 60 | |||
| 61 | 4) Listing a snapshot. | ||
| 62 | |||
| 63 | $ cat /sys/class/rbd/snaps_list | ||
| 64 | #id snap KB | ||
| 65 | 0 - 1024000 (*) | ||
| 66 | 0 foo 1024000 | ||
| 67 | |||
| 68 | The columns, in order, are: | ||
| 69 | - blkdev unique id | ||
| 70 | - snapshot name, '-' means none (active read/write version) | ||
| 71 | - size of device at time of snapshot | ||
| 72 | - the (*) indicates this is the active version | ||
| 73 | |||
| 74 | 5) Rollback to snapshot. | ||
| 75 | |||
| 76 | Usage: <blkdev id> <snapname> | ||
| 77 | |||
| 78 | $ echo "0 mysnap" > /sys/class/rbd/snap_rollback | ||
| 79 | |||
| 80 | |||
| 81 | 6) Mapping an image using snapshot. | ||
| 82 | |||
| 83 | A snapshot mapping is read-only. This is being done by passing | ||
| 84 | snap=<snapname> to the options when adding a device. | ||
| 85 | |||
| 86 | $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add | ||
| 87 | |||
| 88 | |||
| 89 | 7) Remove an active blkdev<->rbd image mapping. | ||
| 90 | |||
| 91 | In this example, we remove the mapping with blkdev unique id 1. | ||
| 92 | |||
| 93 | $ echo 1 > /sys/class/rbd/remove | ||
| 94 | |||
| 95 | |||
| 96 | NOTE: The actual creation and deletion of rados objects is outside the scope | ||
| 97 | of this driver. | ||
| 98 | |||
| 99 | */ | ||
| 100 | |||
| 101 | #include <linux/ceph/libceph.h> | ||
| 102 | #include <linux/ceph/osd_client.h> | ||
| 103 | #include <linux/ceph/mon_client.h> | ||
| 104 | #include <linux/ceph/decode.h> | ||
| 105 | |||
| 106 | #include <linux/kernel.h> | ||
| 107 | #include <linux/device.h> | ||
| 108 | #include <linux/module.h> | ||
| 109 | #include <linux/fs.h> | ||
| 110 | #include <linux/blkdev.h> | ||
| 111 | |||
| 112 | #include "rbd_types.h" | ||
| 113 | |||
| 114 | #define DRV_NAME "rbd" | ||
| 115 | #define DRV_NAME_LONG "rbd (rados block device)" | ||
| 116 | |||
| 117 | #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ | ||
| 118 | |||
| 119 | #define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX)) | ||
| 120 | #define RBD_MAX_POOL_NAME_LEN 64 | ||
| 121 | #define RBD_MAX_SNAP_NAME_LEN 32 | ||
| 122 | #define RBD_MAX_OPT_LEN 1024 | ||
| 123 | |||
| 124 | #define RBD_SNAP_HEAD_NAME "-" | ||
| 125 | |||
| 126 | #define DEV_NAME_LEN 32 | ||
| 127 | |||
| 128 | /* | ||
| 129 | * block device image metadata (in-memory version) | ||
| 130 | */ | ||
| 131 | struct rbd_image_header { | ||
| 132 | u64 image_size; | ||
| 133 | char block_name[32]; | ||
| 134 | __u8 obj_order; | ||
| 135 | __u8 crypt_type; | ||
| 136 | __u8 comp_type; | ||
| 137 | struct rw_semaphore snap_rwsem; | ||
| 138 | struct ceph_snap_context *snapc; | ||
| 139 | size_t snap_names_len; | ||
| 140 | u64 snap_seq; | ||
| 141 | u32 total_snaps; | ||
| 142 | |||
| 143 | char *snap_names; | ||
| 144 | u64 *snap_sizes; | ||
| 145 | }; | ||
| 146 | |||
| 147 | /* | ||
| 148 | * an instance of the client. multiple devices may share a client. | ||
| 149 | */ | ||
| 150 | struct rbd_client { | ||
| 151 | struct ceph_client *client; | ||
| 152 | struct kref kref; | ||
| 153 | struct list_head node; | ||
| 154 | }; | ||
| 155 | |||
| 156 | /* | ||
| 157 | * a single io request | ||
| 158 | */ | ||
| 159 | struct rbd_request { | ||
| 160 | struct request *rq; /* blk layer request */ | ||
| 161 | struct bio *bio; /* cloned bio */ | ||
| 162 | struct page **pages; /* list of used pages */ | ||
| 163 | u64 len; | ||
| 164 | }; | ||
| 165 | |||
| 166 | /* | ||
| 167 | * a single device | ||
| 168 | */ | ||
| 169 | struct rbd_device { | ||
| 170 | int id; /* blkdev unique id */ | ||
| 171 | |||
| 172 | int major; /* blkdev assigned major */ | ||
| 173 | struct gendisk *disk; /* blkdev's gendisk and rq */ | ||
| 174 | struct request_queue *q; | ||
| 175 | |||
| 176 | struct ceph_client *client; | ||
| 177 | struct rbd_client *rbd_client; | ||
| 178 | |||
| 179 | char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ | ||
| 180 | |||
| 181 | spinlock_t lock; /* queue lock */ | ||
| 182 | |||
| 183 | struct rbd_image_header header; | ||
| 184 | char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */ | ||
| 185 | int obj_len; | ||
| 186 | char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */ | ||
| 187 | char pool_name[RBD_MAX_POOL_NAME_LEN]; | ||
| 188 | int poolid; | ||
| 189 | |||
| 190 | char snap_name[RBD_MAX_SNAP_NAME_LEN]; | ||
| 191 | u32 cur_snap; /* index+1 of current snapshot within snap context | ||
| 192 | 0 - for the head */ | ||
| 193 | int read_only; | ||
| 194 | |||
| 195 | struct list_head node; | ||
| 196 | }; | ||
| 197 | |||
| 198 | static spinlock_t node_lock; /* protects client get/put */ | ||
| 199 | |||
| 200 | static struct class *class_rbd; /* /sys/class/rbd */ | ||
| 201 | static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ | ||
| 202 | static LIST_HEAD(rbd_dev_list); /* devices */ | ||
| 203 | static LIST_HEAD(rbd_client_list); /* clients */ | ||
| 204 | |||
| 205 | |||
| 206 | static int rbd_open(struct block_device *bdev, fmode_t mode) | ||
| 207 | { | ||
| 208 | struct gendisk *disk = bdev->bd_disk; | ||
| 209 | struct rbd_device *rbd_dev = disk->private_data; | ||
| 210 | |||
| 211 | set_device_ro(bdev, rbd_dev->read_only); | ||
| 212 | |||
| 213 | if ((mode & FMODE_WRITE) && rbd_dev->read_only) | ||
| 214 | return -EROFS; | ||
| 215 | |||
| 216 | return 0; | ||
| 217 | } | ||
| 218 | |||
| 219 | static const struct block_device_operations rbd_bd_ops = { | ||
| 220 | .owner = THIS_MODULE, | ||
| 221 | .open = rbd_open, | ||
| 222 | }; | ||
| 223 | |||
| 224 | /* | ||
| 225 | * Initialize an rbd client instance. | ||
| 226 | * We own *opt. | ||
| 227 | */ | ||
| 228 | static struct rbd_client *rbd_client_create(struct ceph_options *opt) | ||
| 229 | { | ||
| 230 | struct rbd_client *rbdc; | ||
| 231 | int ret = -ENOMEM; | ||
| 232 | |||
| 233 | dout("rbd_client_create\n"); | ||
| 234 | rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); | ||
| 235 | if (!rbdc) | ||
| 236 | goto out_opt; | ||
| 237 | |||
| 238 | kref_init(&rbdc->kref); | ||
| 239 | INIT_LIST_HEAD(&rbdc->node); | ||
| 240 | |||
| 241 | rbdc->client = ceph_create_client(opt, rbdc); | ||
| 242 | if (IS_ERR(rbdc->client)) | ||
| 243 | goto out_rbdc; | ||
| 244 | opt = NULL; /* Now rbdc->client is responsible for opt */ | ||
| 245 | |||
| 246 | ret = ceph_open_session(rbdc->client); | ||
| 247 | if (ret < 0) | ||
| 248 | goto out_err; | ||
| 249 | |||
| 250 | spin_lock(&node_lock); | ||
| 251 | list_add_tail(&rbdc->node, &rbd_client_list); | ||
| 252 | spin_unlock(&node_lock); | ||
| 253 | |||
| 254 | dout("rbd_client_create created %p\n", rbdc); | ||
| 255 | return rbdc; | ||
| 256 | |||
| 257 | out_err: | ||
| 258 | ceph_destroy_client(rbdc->client); | ||
| 259 | out_rbdc: | ||
| 260 | kfree(rbdc); | ||
| 261 | out_opt: | ||
| 262 | if (opt) | ||
| 263 | ceph_destroy_options(opt); | ||
| 264 | return ERR_PTR(ret); | ||
| 265 | } | ||
| 266 | |||
| 267 | /* | ||
| 268 | * Find a ceph client with specific addr and configuration. | ||
| 269 | */ | ||
| 270 | static struct rbd_client *__rbd_client_find(struct ceph_options *opt) | ||
| 271 | { | ||
| 272 | struct rbd_client *client_node; | ||
| 273 | |||
| 274 | if (opt->flags & CEPH_OPT_NOSHARE) | ||
| 275 | return NULL; | ||
| 276 | |||
| 277 | list_for_each_entry(client_node, &rbd_client_list, node) | ||
| 278 | if (ceph_compare_options(opt, client_node->client) == 0) | ||
| 279 | return client_node; | ||
| 280 | return NULL; | ||
| 281 | } | ||
| 282 | |||
| 283 | /* | ||
| 284 | * Get a ceph client with specific addr and configuration, if one does | ||
| 285 | * not exist create it. | ||
| 286 | */ | ||
| 287 | static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, | ||
| 288 | char *options) | ||
| 289 | { | ||
| 290 | struct rbd_client *rbdc; | ||
| 291 | struct ceph_options *opt; | ||
| 292 | int ret; | ||
| 293 | |||
| 294 | ret = ceph_parse_options(&opt, options, mon_addr, | ||
| 295 | mon_addr + strlen(mon_addr), NULL, NULL); | ||
| 296 | if (ret < 0) | ||
| 297 | return ret; | ||
| 298 | |||
| 299 | spin_lock(&node_lock); | ||
| 300 | rbdc = __rbd_client_find(opt); | ||
| 301 | if (rbdc) { | ||
| 302 | ceph_destroy_options(opt); | ||
| 303 | |||
| 304 | /* using an existing client */ | ||
| 305 | kref_get(&rbdc->kref); | ||
| 306 | rbd_dev->rbd_client = rbdc; | ||
| 307 | rbd_dev->client = rbdc->client; | ||
| 308 | spin_unlock(&node_lock); | ||
| 309 | return 0; | ||
| 310 | } | ||
| 311 | spin_unlock(&node_lock); | ||
| 312 | |||
| 313 | rbdc = rbd_client_create(opt); | ||
| 314 | if (IS_ERR(rbdc)) | ||
| 315 | return PTR_ERR(rbdc); | ||
| 316 | |||
| 317 | rbd_dev->rbd_client = rbdc; | ||
| 318 | rbd_dev->client = rbdc->client; | ||
| 319 | return 0; | ||
| 320 | } | ||
| 321 | |||
| 322 | /* | ||
| 323 | * Destroy ceph client | ||
| 324 | */ | ||
| 325 | static void rbd_client_release(struct kref *kref) | ||
| 326 | { | ||
| 327 | struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); | ||
| 328 | |||
| 329 | dout("rbd_release_client %p\n", rbdc); | ||
| 330 | spin_lock(&node_lock); | ||
| 331 | list_del(&rbdc->node); | ||
| 332 | spin_unlock(&node_lock); | ||
| 333 | |||
| 334 | ceph_destroy_client(rbdc->client); | ||
| 335 | kfree(rbdc); | ||
| 336 | } | ||
| 337 | |||
| 338 | /* | ||
| 339 | * Drop reference to ceph client node. If it's not referenced anymore, release | ||
| 340 | * it. | ||
| 341 | */ | ||
| 342 | static void rbd_put_client(struct rbd_device *rbd_dev) | ||
| 343 | { | ||
| 344 | kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); | ||
| 345 | rbd_dev->rbd_client = NULL; | ||
| 346 | rbd_dev->client = NULL; | ||
| 347 | } | ||
| 348 | |||
| 349 | |||
| 350 | /* | ||
| 351 | * Create a new header structure, translate header format from the on-disk | ||
| 352 | * header. | ||
| 353 | */ | ||
| 354 | static int rbd_header_from_disk(struct rbd_image_header *header, | ||
| 355 | struct rbd_image_header_ondisk *ondisk, | ||
| 356 | int allocated_snaps, | ||
| 357 | gfp_t gfp_flags) | ||
| 358 | { | ||
| 359 | int i; | ||
| 360 | u32 snap_count = le32_to_cpu(ondisk->snap_count); | ||
| 361 | int ret = -ENOMEM; | ||
| 362 | |||
| 363 | init_rwsem(&header->snap_rwsem); | ||
| 364 | |||
| 365 | header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); | ||
| 366 | header->snapc = kmalloc(sizeof(struct ceph_snap_context) + | ||
| 367 | snap_count * | ||
| 368 | sizeof(struct rbd_image_snap_ondisk), | ||
| 369 | gfp_flags); | ||
| 370 | if (!header->snapc) | ||
| 371 | return -ENOMEM; | ||
| 372 | if (snap_count) { | ||
| 373 | header->snap_names = kmalloc(header->snap_names_len, | ||
| 374 | GFP_KERNEL); | ||
| 375 | if (!header->snap_names) | ||
| 376 | goto err_snapc; | ||
| 377 | header->snap_sizes = kmalloc(snap_count * sizeof(u64), | ||
| 378 | GFP_KERNEL); | ||
| 379 | if (!header->snap_sizes) | ||
| 380 | goto err_names; | ||
| 381 | } else { | ||
| 382 | header->snap_names = NULL; | ||
| 383 | header->snap_sizes = NULL; | ||
| 384 | } | ||
| 385 | memcpy(header->block_name, ondisk->block_name, | ||
| 386 | sizeof(ondisk->block_name)); | ||
| 387 | |||
| 388 | header->image_size = le64_to_cpu(ondisk->image_size); | ||
| 389 | header->obj_order = ondisk->options.order; | ||
| 390 | header->crypt_type = ondisk->options.crypt_type; | ||
| 391 | header->comp_type = ondisk->options.comp_type; | ||
| 392 | |||
| 393 | atomic_set(&header->snapc->nref, 1); | ||
| 394 | header->snap_seq = le64_to_cpu(ondisk->snap_seq); | ||
| 395 | header->snapc->num_snaps = snap_count; | ||
| 396 | header->total_snaps = snap_count; | ||
| 397 | |||
| 398 | if (snap_count && | ||
| 399 | allocated_snaps == snap_count) { | ||
| 400 | for (i = 0; i < snap_count; i++) { | ||
| 401 | header->snapc->snaps[i] = | ||
| 402 | le64_to_cpu(ondisk->snaps[i].id); | ||
| 403 | header->snap_sizes[i] = | ||
| 404 | le64_to_cpu(ondisk->snaps[i].image_size); | ||
| 405 | } | ||
| 406 | |||
| 407 | /* copy snapshot names */ | ||
| 408 | memcpy(header->snap_names, &ondisk->snaps[i], | ||
| 409 | header->snap_names_len); | ||
| 410 | } | ||
| 411 | |||
| 412 | return 0; | ||
| 413 | |||
| 414 | err_names: | ||
| 415 | kfree(header->snap_names); | ||
| 416 | err_snapc: | ||
| 417 | kfree(header->snapc); | ||
| 418 | return ret; | ||
| 419 | } | ||
| 420 | |||
| 421 | static int snap_index(struct rbd_image_header *header, int snap_num) | ||
| 422 | { | ||
| 423 | return header->total_snaps - snap_num; | ||
| 424 | } | ||
| 425 | |||
| 426 | static u64 cur_snap_id(struct rbd_device *rbd_dev) | ||
| 427 | { | ||
| 428 | struct rbd_image_header *header = &rbd_dev->header; | ||
| 429 | |||
| 430 | if (!rbd_dev->cur_snap) | ||
| 431 | return 0; | ||
| 432 | |||
| 433 | return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)]; | ||
| 434 | } | ||
| 435 | |||
| 436 | static int snap_by_name(struct rbd_image_header *header, const char *snap_name, | ||
| 437 | u64 *seq, u64 *size) | ||
| 438 | { | ||
| 439 | int i; | ||
| 440 | char *p = header->snap_names; | ||
| 441 | |||
| 442 | for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { | ||
| 443 | if (strcmp(snap_name, p) == 0) | ||
| 444 | break; | ||
| 445 | } | ||
| 446 | if (i == header->total_snaps) | ||
| 447 | return -ENOENT; | ||
| 448 | if (seq) | ||
| 449 | *seq = header->snapc->snaps[i]; | ||
| 450 | |||
| 451 | if (size) | ||
| 452 | *size = header->snap_sizes[i]; | ||
| 453 | |||
| 454 | return i; | ||
| 455 | } | ||
| 456 | |||
| 457 | static int rbd_header_set_snap(struct rbd_device *dev, | ||
| 458 | const char *snap_name, | ||
| 459 | u64 *size) | ||
| 460 | { | ||
| 461 | struct rbd_image_header *header = &dev->header; | ||
| 462 | struct ceph_snap_context *snapc = header->snapc; | ||
| 463 | int ret = -ENOENT; | ||
| 464 | |||
| 465 | down_write(&header->snap_rwsem); | ||
| 466 | |||
| 467 | if (!snap_name || | ||
| 468 | !*snap_name || | ||
| 469 | strcmp(snap_name, "-") == 0 || | ||
| 470 | strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) { | ||
| 471 | if (header->total_snaps) | ||
| 472 | snapc->seq = header->snap_seq; | ||
| 473 | else | ||
| 474 | snapc->seq = 0; | ||
| 475 | dev->cur_snap = 0; | ||
| 476 | dev->read_only = 0; | ||
| 477 | if (size) | ||
| 478 | *size = header->image_size; | ||
| 479 | } else { | ||
| 480 | ret = snap_by_name(header, snap_name, &snapc->seq, size); | ||
| 481 | if (ret < 0) | ||
| 482 | goto done; | ||
| 483 | |||
| 484 | dev->cur_snap = header->total_snaps - ret; | ||
| 485 | dev->read_only = 1; | ||
| 486 | } | ||
| 487 | |||
| 488 | ret = 0; | ||
| 489 | done: | ||
| 490 | up_write(&header->snap_rwsem); | ||
| 491 | return ret; | ||
| 492 | } | ||
| 493 | |||
| 494 | static void rbd_header_free(struct rbd_image_header *header) | ||
| 495 | { | ||
| 496 | kfree(header->snapc); | ||
| 497 | kfree(header->snap_names); | ||
| 498 | kfree(header->snap_sizes); | ||
| 499 | } | ||
| 500 | |||
| 501 | /* | ||
| 502 | * get the actual striped segment name, offset and length | ||
| 503 | */ | ||
| 504 | static u64 rbd_get_segment(struct rbd_image_header *header, | ||
| 505 | const char *block_name, | ||
| 506 | u64 ofs, u64 len, | ||
| 507 | char *seg_name, u64 *segofs) | ||
| 508 | { | ||
| 509 | u64 seg = ofs >> header->obj_order; | ||
| 510 | |||
| 511 | if (seg_name) | ||
| 512 | snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, | ||
| 513 | "%s.%012llx", block_name, seg); | ||
| 514 | |||
| 515 | ofs = ofs & ((1 << header->obj_order) - 1); | ||
| 516 | len = min_t(u64, len, (1 << header->obj_order) - ofs); | ||
| 517 | |||
| 518 | if (segofs) | ||
| 519 | *segofs = ofs; | ||
| 520 | |||
| 521 | return len; | ||
| 522 | } | ||
| 523 | |||
| 524 | /* | ||
| 525 | * bio helpers | ||
| 526 | */ | ||
| 527 | |||
| 528 | static void bio_chain_put(struct bio *chain) | ||
| 529 | { | ||
| 530 | struct bio *tmp; | ||
| 531 | |||
| 532 | while (chain) { | ||
| 533 | tmp = chain; | ||
| 534 | chain = chain->bi_next; | ||
| 535 | bio_put(tmp); | ||
| 536 | } | ||
| 537 | } | ||
| 538 | |||
| 539 | /* | ||
| 540 | * zeros a bio chain, starting at specific offset | ||
| 541 | */ | ||
| 542 | static void zero_bio_chain(struct bio *chain, int start_ofs) | ||
| 543 | { | ||
| 544 | struct bio_vec *bv; | ||
| 545 | unsigned long flags; | ||
| 546 | void *buf; | ||
| 547 | int i; | ||
| 548 | int pos = 0; | ||
| 549 | |||
| 550 | while (chain) { | ||
| 551 | bio_for_each_segment(bv, chain, i) { | ||
| 552 | if (pos + bv->bv_len > start_ofs) { | ||
| 553 | int remainder = max(start_ofs - pos, 0); | ||
| 554 | buf = bvec_kmap_irq(bv, &flags); | ||
| 555 | memset(buf + remainder, 0, | ||
| 556 | bv->bv_len - remainder); | ||
| 557 | bvec_kunmap_irq(buf, &flags); | ||
| 558 | } | ||
| 559 | pos += bv->bv_len; | ||
| 560 | } | ||
| 561 | |||
| 562 | chain = chain->bi_next; | ||
| 563 | } | ||
| 564 | } | ||
| 565 | |||
| 566 | /* | ||
| 567 | * bio_chain_clone - clone a chain of bios up to a certain length. | ||
| 568 | * might return a bio_pair that will need to be released. | ||
| 569 | */ | ||
| 570 | static struct bio *bio_chain_clone(struct bio **old, struct bio **next, | ||
| 571 | struct bio_pair **bp, | ||
| 572 | int len, gfp_t gfpmask) | ||
| 573 | { | ||
| 574 | struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL; | ||
| 575 | int total = 0; | ||
| 576 | |||
| 577 | if (*bp) { | ||
| 578 | bio_pair_release(*bp); | ||
| 579 | *bp = NULL; | ||
| 580 | } | ||
| 581 | |||
| 582 | while (old_chain && (total < len)) { | ||
| 583 | tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); | ||
| 584 | if (!tmp) | ||
| 585 | goto err_out; | ||
| 586 | |||
| 587 | if (total + old_chain->bi_size > len) { | ||
| 588 | struct bio_pair *bp; | ||
| 589 | |||
| 590 | /* | ||
| 591 | * this split can only happen with a single paged bio, | ||
| 592 | * split_bio will BUG_ON if this is not the case | ||
| 593 | */ | ||
| 594 | dout("bio_chain_clone split! total=%d remaining=%d" | ||
| 595 | "bi_size=%d\n", | ||
| 596 | (int)total, (int)len-total, | ||
| 597 | (int)old_chain->bi_size); | ||
| 598 | |||
| 599 | /* split the bio. We'll release it either in the next | ||
| 600 | call, or it will have to be released outside */ | ||
| 601 | bp = bio_split(old_chain, (len - total) / 512ULL); | ||
| 602 | if (!bp) | ||
| 603 | goto err_out; | ||
| 604 | |||
| 605 | __bio_clone(tmp, &bp->bio1); | ||
| 606 | |||
| 607 | *next = &bp->bio2; | ||
| 608 | } else { | ||
| 609 | __bio_clone(tmp, old_chain); | ||
| 610 | *next = old_chain->bi_next; | ||
| 611 | } | ||
| 612 | |||
| 613 | tmp->bi_bdev = NULL; | ||
| 614 | gfpmask &= ~__GFP_WAIT; | ||
| 615 | tmp->bi_next = NULL; | ||
| 616 | |||
| 617 | if (!new_chain) { | ||
| 618 | new_chain = tail = tmp; | ||
| 619 | } else { | ||
| 620 | tail->bi_next = tmp; | ||
| 621 | tail = tmp; | ||
| 622 | } | ||
| 623 | old_chain = old_chain->bi_next; | ||
| 624 | |||
| 625 | total += tmp->bi_size; | ||
| 626 | } | ||
| 627 | |||
| 628 | BUG_ON(total < len); | ||
| 629 | |||
| 630 | if (tail) | ||
| 631 | tail->bi_next = NULL; | ||
| 632 | |||
| 633 | *old = old_chain; | ||
| 634 | |||
| 635 | return new_chain; | ||
| 636 | |||
| 637 | err_out: | ||
| 638 | dout("bio_chain_clone with err\n"); | ||
| 639 | bio_chain_put(new_chain); | ||
| 640 | return NULL; | ||
| 641 | } | ||
| 642 | |||
| 643 | /* | ||
| 644 | * helpers for osd request op vectors. | ||
| 645 | */ | ||
| 646 | static int rbd_create_rw_ops(struct ceph_osd_req_op **ops, | ||
| 647 | int num_ops, | ||
| 648 | int opcode, | ||
| 649 | u32 payload_len) | ||
| 650 | { | ||
| 651 | *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1), | ||
| 652 | GFP_NOIO); | ||
| 653 | if (!*ops) | ||
| 654 | return -ENOMEM; | ||
| 655 | (*ops)[0].op = opcode; | ||
| 656 | /* | ||
| 657 | * op extent offset and length will be set later on | ||
| 658 | * in calc_raw_layout() | ||
| 659 | */ | ||
| 660 | (*ops)[0].payload_len = payload_len; | ||
| 661 | return 0; | ||
| 662 | } | ||
| 663 | |||
| 664 | static void rbd_destroy_ops(struct ceph_osd_req_op *ops) | ||
| 665 | { | ||
| 666 | kfree(ops); | ||
| 667 | } | ||
| 668 | |||
| 669 | /* | ||
| 670 | * Send ceph osd request | ||
| 671 | */ | ||
| 672 | static int rbd_do_request(struct request *rq, | ||
| 673 | struct rbd_device *dev, | ||
| 674 | struct ceph_snap_context *snapc, | ||
| 675 | u64 snapid, | ||
| 676 | const char *obj, u64 ofs, u64 len, | ||
| 677 | struct bio *bio, | ||
| 678 | struct page **pages, | ||
| 679 | int num_pages, | ||
| 680 | int flags, | ||
| 681 | struct ceph_osd_req_op *ops, | ||
| 682 | int num_reply, | ||
| 683 | void (*rbd_cb)(struct ceph_osd_request *req, | ||
| 684 | struct ceph_msg *msg)) | ||
| 685 | { | ||
| 686 | struct ceph_osd_request *req; | ||
| 687 | struct ceph_file_layout *layout; | ||
| 688 | int ret; | ||
| 689 | u64 bno; | ||
| 690 | struct timespec mtime = CURRENT_TIME; | ||
| 691 | struct rbd_request *req_data; | ||
| 692 | struct ceph_osd_request_head *reqhead; | ||
| 693 | struct rbd_image_header *header = &dev->header; | ||
| 694 | |||
| 695 | ret = -ENOMEM; | ||
| 696 | req_data = kzalloc(sizeof(*req_data), GFP_NOIO); | ||
| 697 | if (!req_data) | ||
| 698 | goto done; | ||
| 699 | |||
| 700 | dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs); | ||
| 701 | |||
| 702 | down_read(&header->snap_rwsem); | ||
| 703 | |||
| 704 | req = ceph_osdc_alloc_request(&dev->client->osdc, flags, | ||
| 705 | snapc, | ||
| 706 | ops, | ||
| 707 | false, | ||
| 708 | GFP_NOIO, pages, bio); | ||
| 709 | if (IS_ERR(req)) { | ||
| 710 | up_read(&header->snap_rwsem); | ||
| 711 | ret = PTR_ERR(req); | ||
| 712 | goto done_pages; | ||
| 713 | } | ||
| 714 | |||
| 715 | req->r_callback = rbd_cb; | ||
| 716 | |||
| 717 | req_data->rq = rq; | ||
| 718 | req_data->bio = bio; | ||
| 719 | req_data->pages = pages; | ||
| 720 | req_data->len = len; | ||
| 721 | |||
| 722 | req->r_priv = req_data; | ||
| 723 | |||
| 724 | reqhead = req->r_request->front.iov_base; | ||
| 725 | reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); | ||
| 726 | |||
| 727 | strncpy(req->r_oid, obj, sizeof(req->r_oid)); | ||
| 728 | req->r_oid_len = strlen(req->r_oid); | ||
| 729 | |||
| 730 | layout = &req->r_file_layout; | ||
| 731 | memset(layout, 0, sizeof(*layout)); | ||
| 732 | layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
| 733 | layout->fl_stripe_count = cpu_to_le32(1); | ||
| 734 | layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
| 735 | layout->fl_pg_preferred = cpu_to_le32(-1); | ||
| 736 | layout->fl_pg_pool = cpu_to_le32(dev->poolid); | ||
| 737 | ceph_calc_raw_layout(&dev->client->osdc, layout, snapid, | ||
| 738 | ofs, &len, &bno, req, ops); | ||
| 739 | |||
| 740 | ceph_osdc_build_request(req, ofs, &len, | ||
| 741 | ops, | ||
| 742 | snapc, | ||
| 743 | &mtime, | ||
| 744 | req->r_oid, req->r_oid_len); | ||
| 745 | up_read(&header->snap_rwsem); | ||
| 746 | |||
| 747 | ret = ceph_osdc_start_request(&dev->client->osdc, req, false); | ||
| 748 | if (ret < 0) | ||
| 749 | goto done_err; | ||
| 750 | |||
| 751 | if (!rbd_cb) { | ||
| 752 | ret = ceph_osdc_wait_request(&dev->client->osdc, req); | ||
| 753 | ceph_osdc_put_request(req); | ||
| 754 | } | ||
| 755 | return ret; | ||
| 756 | |||
| 757 | done_err: | ||
| 758 | bio_chain_put(req_data->bio); | ||
| 759 | ceph_osdc_put_request(req); | ||
| 760 | done_pages: | ||
| 761 | kfree(req_data); | ||
| 762 | done: | ||
| 763 | if (rq) | ||
| 764 | blk_end_request(rq, ret, len); | ||
| 765 | return ret; | ||
| 766 | } | ||
| 767 | |||
| 768 | /* | ||
| 769 | * Ceph osd op callback | ||
| 770 | */ | ||
| 771 | static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) | ||
| 772 | { | ||
| 773 | struct rbd_request *req_data = req->r_priv; | ||
| 774 | struct ceph_osd_reply_head *replyhead; | ||
| 775 | struct ceph_osd_op *op; | ||
| 776 | __s32 rc; | ||
| 777 | u64 bytes; | ||
| 778 | int read_op; | ||
| 779 | |||
| 780 | /* parse reply */ | ||
| 781 | replyhead = msg->front.iov_base; | ||
| 782 | WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); | ||
| 783 | op = (void *)(replyhead + 1); | ||
| 784 | rc = le32_to_cpu(replyhead->result); | ||
| 785 | bytes = le64_to_cpu(op->extent.length); | ||
| 786 | read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ); | ||
| 787 | |||
| 788 | dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); | ||
| 789 | |||
| 790 | if (rc == -ENOENT && read_op) { | ||
| 791 | zero_bio_chain(req_data->bio, 0); | ||
| 792 | rc = 0; | ||
| 793 | } else if (rc == 0 && read_op && bytes < req_data->len) { | ||
| 794 | zero_bio_chain(req_data->bio, bytes); | ||
| 795 | bytes = req_data->len; | ||
| 796 | } | ||
| 797 | |||
| 798 | blk_end_request(req_data->rq, rc, bytes); | ||
| 799 | |||
| 800 | if (req_data->bio) | ||
| 801 | bio_chain_put(req_data->bio); | ||
| 802 | |||
| 803 | ceph_osdc_put_request(req); | ||
| 804 | kfree(req_data); | ||
| 805 | } | ||
| 806 | |||
| 807 | /* | ||
| 808 | * Do a synchronous ceph osd operation | ||
| 809 | */ | ||
| 810 | static int rbd_req_sync_op(struct rbd_device *dev, | ||
| 811 | struct ceph_snap_context *snapc, | ||
| 812 | u64 snapid, | ||
| 813 | int opcode, | ||
| 814 | int flags, | ||
| 815 | struct ceph_osd_req_op *orig_ops, | ||
| 816 | int num_reply, | ||
| 817 | const char *obj, | ||
| 818 | u64 ofs, u64 len, | ||
| 819 | char *buf) | ||
| 820 | { | ||
| 821 | int ret; | ||
| 822 | struct page **pages; | ||
| 823 | int num_pages; | ||
| 824 | struct ceph_osd_req_op *ops = orig_ops; | ||
| 825 | u32 payload_len; | ||
| 826 | |||
| 827 | num_pages = calc_pages_for(ofs , len); | ||
| 828 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); | ||
| 829 | if (IS_ERR(pages)) | ||
| 830 | return PTR_ERR(pages); | ||
| 831 | |||
| 832 | if (!orig_ops) { | ||
| 833 | payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0); | ||
| 834 | ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); | ||
| 835 | if (ret < 0) | ||
| 836 | goto done; | ||
| 837 | |||
| 838 | if ((flags & CEPH_OSD_FLAG_WRITE) && buf) { | ||
| 839 | ret = ceph_copy_to_page_vector(pages, buf, ofs, len); | ||
| 840 | if (ret < 0) | ||
| 841 | goto done_ops; | ||
| 842 | } | ||
| 843 | } | ||
| 844 | |||
| 845 | ret = rbd_do_request(NULL, dev, snapc, snapid, | ||
| 846 | obj, ofs, len, NULL, | ||
| 847 | pages, num_pages, | ||
| 848 | flags, | ||
| 849 | ops, | ||
| 850 | 2, | ||
| 851 | NULL); | ||
| 852 | if (ret < 0) | ||
| 853 | goto done_ops; | ||
| 854 | |||
| 855 | if ((flags & CEPH_OSD_FLAG_READ) && buf) | ||
| 856 | ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); | ||
| 857 | |||
| 858 | done_ops: | ||
| 859 | if (!orig_ops) | ||
| 860 | rbd_destroy_ops(ops); | ||
| 861 | done: | ||
| 862 | ceph_release_page_vector(pages, num_pages); | ||
| 863 | return ret; | ||
| 864 | } | ||
| 865 | |||
| 866 | /* | ||
| 867 | * Do an asynchronous ceph osd operation | ||
| 868 | */ | ||
| 869 | static int rbd_do_op(struct request *rq, | ||
| 870 | struct rbd_device *rbd_dev , | ||
| 871 | struct ceph_snap_context *snapc, | ||
| 872 | u64 snapid, | ||
| 873 | int opcode, int flags, int num_reply, | ||
| 874 | u64 ofs, u64 len, | ||
| 875 | struct bio *bio) | ||
| 876 | { | ||
| 877 | char *seg_name; | ||
| 878 | u64 seg_ofs; | ||
| 879 | u64 seg_len; | ||
| 880 | int ret; | ||
| 881 | struct ceph_osd_req_op *ops; | ||
| 882 | u32 payload_len; | ||
| 883 | |||
| 884 | seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); | ||
| 885 | if (!seg_name) | ||
| 886 | return -ENOMEM; | ||
| 887 | |||
| 888 | seg_len = rbd_get_segment(&rbd_dev->header, | ||
| 889 | rbd_dev->header.block_name, | ||
| 890 | ofs, len, | ||
| 891 | seg_name, &seg_ofs); | ||
| 892 | |||
| 893 | payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); | ||
| 894 | |||
| 895 | ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); | ||
| 896 | if (ret < 0) | ||
| 897 | goto done; | ||
| 898 | |||
| 899 | /* we've taken care of segment sizes earlier when we | ||
| 900 | cloned the bios. We should never have a segment | ||
| 901 | truncated at this point */ | ||
| 902 | BUG_ON(seg_len < len); | ||
| 903 | |||
| 904 | ret = rbd_do_request(rq, rbd_dev, snapc, snapid, | ||
| 905 | seg_name, seg_ofs, seg_len, | ||
| 906 | bio, | ||
| 907 | NULL, 0, | ||
| 908 | flags, | ||
| 909 | ops, | ||
| 910 | num_reply, | ||
| 911 | rbd_req_cb); | ||
| 912 | done: | ||
| 913 | kfree(seg_name); | ||
| 914 | return ret; | ||
| 915 | } | ||
| 916 | |||
| 917 | /* | ||
| 918 | * Request async osd write | ||
| 919 | */ | ||
| 920 | static int rbd_req_write(struct request *rq, | ||
| 921 | struct rbd_device *rbd_dev, | ||
| 922 | struct ceph_snap_context *snapc, | ||
| 923 | u64 ofs, u64 len, | ||
| 924 | struct bio *bio) | ||
| 925 | { | ||
| 926 | return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, | ||
| 927 | CEPH_OSD_OP_WRITE, | ||
| 928 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
| 929 | 2, | ||
| 930 | ofs, len, bio); | ||
| 931 | } | ||
| 932 | |||
| 933 | /* | ||
| 934 | * Request async osd read | ||
| 935 | */ | ||
| 936 | static int rbd_req_read(struct request *rq, | ||
| 937 | struct rbd_device *rbd_dev, | ||
| 938 | u64 snapid, | ||
| 939 | u64 ofs, u64 len, | ||
| 940 | struct bio *bio) | ||
| 941 | { | ||
| 942 | return rbd_do_op(rq, rbd_dev, NULL, | ||
| 943 | (snapid ? snapid : CEPH_NOSNAP), | ||
| 944 | CEPH_OSD_OP_READ, | ||
| 945 | CEPH_OSD_FLAG_READ, | ||
| 946 | 2, | ||
| 947 | ofs, len, bio); | ||
| 948 | } | ||
| 949 | |||
| 950 | /* | ||
| 951 | * Request sync osd read | ||
| 952 | */ | ||
| 953 | static int rbd_req_sync_read(struct rbd_device *dev, | ||
| 954 | struct ceph_snap_context *snapc, | ||
| 955 | u64 snapid, | ||
| 956 | const char *obj, | ||
| 957 | u64 ofs, u64 len, | ||
| 958 | char *buf) | ||
| 959 | { | ||
| 960 | return rbd_req_sync_op(dev, NULL, | ||
| 961 | (snapid ? snapid : CEPH_NOSNAP), | ||
| 962 | CEPH_OSD_OP_READ, | ||
| 963 | CEPH_OSD_FLAG_READ, | ||
| 964 | NULL, | ||
| 965 | 1, obj, ofs, len, buf); | ||
| 966 | } | ||
| 967 | |||
| 968 | /* | ||
| 969 | * Request sync osd read | ||
| 970 | */ | ||
| 971 | static int rbd_req_sync_rollback_obj(struct rbd_device *dev, | ||
| 972 | u64 snapid, | ||
| 973 | const char *obj) | ||
| 974 | { | ||
| 975 | struct ceph_osd_req_op *ops; | ||
| 976 | int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0); | ||
| 977 | if (ret < 0) | ||
| 978 | return ret; | ||
| 979 | |||
| 980 | ops[0].snap.snapid = snapid; | ||
| 981 | |||
| 982 | ret = rbd_req_sync_op(dev, NULL, | ||
| 983 | CEPH_NOSNAP, | ||
| 984 | 0, | ||
| 985 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
| 986 | ops, | ||
| 987 | 1, obj, 0, 0, NULL); | ||
| 988 | |||
| 989 | rbd_destroy_ops(ops); | ||
| 990 | |||
| 991 | if (ret < 0) | ||
| 992 | return ret; | ||
| 993 | |||
| 994 | return ret; | ||
| 995 | } | ||
| 996 | |||
| 997 | /* | ||
| 998 | * Request sync osd read | ||
| 999 | */ | ||
| 1000 | static int rbd_req_sync_exec(struct rbd_device *dev, | ||
| 1001 | const char *obj, | ||
| 1002 | const char *cls, | ||
| 1003 | const char *method, | ||
| 1004 | const char *data, | ||
| 1005 | int len) | ||
| 1006 | { | ||
| 1007 | struct ceph_osd_req_op *ops; | ||
| 1008 | int cls_len = strlen(cls); | ||
| 1009 | int method_len = strlen(method); | ||
| 1010 | int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL, | ||
| 1011 | cls_len + method_len + len); | ||
| 1012 | if (ret < 0) | ||
| 1013 | return ret; | ||
| 1014 | |||
| 1015 | ops[0].cls.class_name = cls; | ||
| 1016 | ops[0].cls.class_len = (__u8)cls_len; | ||
| 1017 | ops[0].cls.method_name = method; | ||
| 1018 | ops[0].cls.method_len = (__u8)method_len; | ||
| 1019 | ops[0].cls.argc = 0; | ||
| 1020 | ops[0].cls.indata = data; | ||
| 1021 | ops[0].cls.indata_len = len; | ||
| 1022 | |||
| 1023 | ret = rbd_req_sync_op(dev, NULL, | ||
| 1024 | CEPH_NOSNAP, | ||
| 1025 | 0, | ||
| 1026 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
| 1027 | ops, | ||
| 1028 | 1, obj, 0, 0, NULL); | ||
| 1029 | |||
| 1030 | rbd_destroy_ops(ops); | ||
| 1031 | |||
| 1032 | dout("cls_exec returned %d\n", ret); | ||
| 1033 | return ret; | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | /* | ||
| 1037 | * block device queue callback | ||
| 1038 | */ | ||
| 1039 | static void rbd_rq_fn(struct request_queue *q) | ||
| 1040 | { | ||
| 1041 | struct rbd_device *rbd_dev = q->queuedata; | ||
| 1042 | struct request *rq; | ||
| 1043 | struct bio_pair *bp = NULL; | ||
| 1044 | |||
| 1045 | rq = blk_fetch_request(q); | ||
| 1046 | |||
| 1047 | while (1) { | ||
| 1048 | struct bio *bio; | ||
| 1049 | struct bio *rq_bio, *next_bio = NULL; | ||
| 1050 | bool do_write; | ||
| 1051 | int size, op_size = 0; | ||
| 1052 | u64 ofs; | ||
| 1053 | |||
| 1054 | /* peek at request from block layer */ | ||
| 1055 | if (!rq) | ||
| 1056 | break; | ||
| 1057 | |||
| 1058 | dout("fetched request\n"); | ||
| 1059 | |||
| 1060 | /* filter out block requests we don't understand */ | ||
| 1061 | if ((rq->cmd_type != REQ_TYPE_FS)) { | ||
| 1062 | __blk_end_request_all(rq, 0); | ||
| 1063 | goto next; | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | /* deduce our operation (read, write) */ | ||
| 1067 | do_write = (rq_data_dir(rq) == WRITE); | ||
| 1068 | |||
| 1069 | size = blk_rq_bytes(rq); | ||
| 1070 | ofs = blk_rq_pos(rq) * 512ULL; | ||
| 1071 | rq_bio = rq->bio; | ||
| 1072 | if (do_write && rbd_dev->read_only) { | ||
| 1073 | __blk_end_request_all(rq, -EROFS); | ||
| 1074 | goto next; | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | spin_unlock_irq(q->queue_lock); | ||
| 1078 | |||
| 1079 | dout("%s 0x%x bytes at 0x%llx\n", | ||
| 1080 | do_write ? "write" : "read", | ||
| 1081 | size, blk_rq_pos(rq) * 512ULL); | ||
| 1082 | |||
| 1083 | do { | ||
| 1084 | /* a bio clone to be passed down to OSD req */ | ||
| 1085 | dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt); | ||
| 1086 | op_size = rbd_get_segment(&rbd_dev->header, | ||
| 1087 | rbd_dev->header.block_name, | ||
| 1088 | ofs, size, | ||
| 1089 | NULL, NULL); | ||
| 1090 | bio = bio_chain_clone(&rq_bio, &next_bio, &bp, | ||
| 1091 | op_size, GFP_ATOMIC); | ||
| 1092 | if (!bio) { | ||
| 1093 | spin_lock_irq(q->queue_lock); | ||
| 1094 | __blk_end_request_all(rq, -ENOMEM); | ||
| 1095 | goto next; | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | /* init OSD command: write or read */ | ||
| 1099 | if (do_write) | ||
| 1100 | rbd_req_write(rq, rbd_dev, | ||
| 1101 | rbd_dev->header.snapc, | ||
| 1102 | ofs, | ||
| 1103 | op_size, bio); | ||
| 1104 | else | ||
| 1105 | rbd_req_read(rq, rbd_dev, | ||
| 1106 | cur_snap_id(rbd_dev), | ||
| 1107 | ofs, | ||
| 1108 | op_size, bio); | ||
| 1109 | |||
| 1110 | size -= op_size; | ||
| 1111 | ofs += op_size; | ||
| 1112 | |||
| 1113 | rq_bio = next_bio; | ||
| 1114 | } while (size > 0); | ||
| 1115 | |||
| 1116 | if (bp) | ||
| 1117 | bio_pair_release(bp); | ||
| 1118 | |||
| 1119 | spin_lock_irq(q->queue_lock); | ||
| 1120 | next: | ||
| 1121 | rq = blk_fetch_request(q); | ||
| 1122 | } | ||
| 1123 | } | ||
| 1124 | |||
| 1125 | /* | ||
| 1126 | * a queue callback. Makes sure that we don't create a bio that spans across | ||
| 1127 | * multiple osd objects. One exception would be with a single page bios, | ||
| 1128 | * which we handle later at bio_chain_clone | ||
| 1129 | */ | ||
| 1130 | static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, | ||
| 1131 | struct bio_vec *bvec) | ||
| 1132 | { | ||
| 1133 | struct rbd_device *rbd_dev = q->queuedata; | ||
| 1134 | unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9); | ||
| 1135 | sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); | ||
| 1136 | unsigned int bio_sectors = bmd->bi_size >> 9; | ||
| 1137 | int max; | ||
| 1138 | |||
| 1139 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) | ||
| 1140 | + bio_sectors)) << 9; | ||
| 1141 | if (max < 0) | ||
| 1142 | max = 0; /* bio_add cannot handle a negative return */ | ||
| 1143 | if (max <= bvec->bv_len && bio_sectors == 0) | ||
| 1144 | return bvec->bv_len; | ||
| 1145 | return max; | ||
| 1146 | } | ||
| 1147 | |||
| 1148 | static void rbd_free_disk(struct rbd_device *rbd_dev) | ||
| 1149 | { | ||
| 1150 | struct gendisk *disk = rbd_dev->disk; | ||
| 1151 | |||
| 1152 | if (!disk) | ||
| 1153 | return; | ||
| 1154 | |||
| 1155 | rbd_header_free(&rbd_dev->header); | ||
| 1156 | |||
| 1157 | if (disk->flags & GENHD_FL_UP) | ||
| 1158 | del_gendisk(disk); | ||
| 1159 | if (disk->queue) | ||
| 1160 | blk_cleanup_queue(disk->queue); | ||
| 1161 | put_disk(disk); | ||
| 1162 | } | ||
| 1163 | |||
| 1164 | /* | ||
| 1165 | * reload the ondisk the header | ||
| 1166 | */ | ||
| 1167 | static int rbd_read_header(struct rbd_device *rbd_dev, | ||
| 1168 | struct rbd_image_header *header) | ||
| 1169 | { | ||
| 1170 | ssize_t rc; | ||
| 1171 | struct rbd_image_header_ondisk *dh; | ||
| 1172 | int snap_count = 0; | ||
| 1173 | u64 snap_names_len = 0; | ||
| 1174 | |||
| 1175 | while (1) { | ||
| 1176 | int len = sizeof(*dh) + | ||
| 1177 | snap_count * sizeof(struct rbd_image_snap_ondisk) + | ||
| 1178 | snap_names_len; | ||
| 1179 | |||
| 1180 | rc = -ENOMEM; | ||
| 1181 | dh = kmalloc(len, GFP_KERNEL); | ||
| 1182 | if (!dh) | ||
| 1183 | return -ENOMEM; | ||
| 1184 | |||
| 1185 | rc = rbd_req_sync_read(rbd_dev, | ||
| 1186 | NULL, CEPH_NOSNAP, | ||
| 1187 | rbd_dev->obj_md_name, | ||
| 1188 | 0, len, | ||
| 1189 | (char *)dh); | ||
| 1190 | if (rc < 0) | ||
| 1191 | goto out_dh; | ||
| 1192 | |||
| 1193 | rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); | ||
| 1194 | if (rc < 0) | ||
| 1195 | goto out_dh; | ||
| 1196 | |||
| 1197 | if (snap_count != header->total_snaps) { | ||
| 1198 | snap_count = header->total_snaps; | ||
| 1199 | snap_names_len = header->snap_names_len; | ||
| 1200 | rbd_header_free(header); | ||
| 1201 | kfree(dh); | ||
| 1202 | continue; | ||
| 1203 | } | ||
| 1204 | break; | ||
| 1205 | } | ||
| 1206 | |||
| 1207 | out_dh: | ||
| 1208 | kfree(dh); | ||
| 1209 | return rc; | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | /* | ||
| 1213 | * create a snapshot | ||
| 1214 | */ | ||
| 1215 | static int rbd_header_add_snap(struct rbd_device *dev, | ||
| 1216 | const char *snap_name, | ||
| 1217 | gfp_t gfp_flags) | ||
| 1218 | { | ||
| 1219 | int name_len = strlen(snap_name); | ||
| 1220 | u64 new_snapid; | ||
| 1221 | int ret; | ||
| 1222 | void *data, *data_start, *data_end; | ||
| 1223 | |||
| 1224 | /* we should create a snapshot only if we're pointing at the head */ | ||
| 1225 | if (dev->cur_snap) | ||
| 1226 | return -EINVAL; | ||
| 1227 | |||
| 1228 | ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid, | ||
| 1229 | &new_snapid); | ||
| 1230 | dout("created snapid=%lld\n", new_snapid); | ||
| 1231 | if (ret < 0) | ||
| 1232 | return ret; | ||
| 1233 | |||
| 1234 | data = kmalloc(name_len + 16, gfp_flags); | ||
| 1235 | if (!data) | ||
| 1236 | return -ENOMEM; | ||
| 1237 | |||
| 1238 | data_start = data; | ||
| 1239 | data_end = data + name_len + 16; | ||
| 1240 | |||
| 1241 | ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad); | ||
| 1242 | ceph_encode_64_safe(&data, data_end, new_snapid, bad); | ||
| 1243 | |||
| 1244 | ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", | ||
| 1245 | data_start, data - data_start); | ||
| 1246 | |||
| 1247 | kfree(data_start); | ||
| 1248 | |||
| 1249 | if (ret < 0) | ||
| 1250 | return ret; | ||
| 1251 | |||
| 1252 | dev->header.snapc->seq = new_snapid; | ||
| 1253 | |||
| 1254 | return 0; | ||
| 1255 | bad: | ||
| 1256 | return -ERANGE; | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | /* | ||
| 1260 | * only read the first part of the ondisk header, without the snaps info | ||
| 1261 | */ | ||
| 1262 | static int rbd_update_snaps(struct rbd_device *rbd_dev) | ||
| 1263 | { | ||
| 1264 | int ret; | ||
| 1265 | struct rbd_image_header h; | ||
| 1266 | u64 snap_seq; | ||
| 1267 | |||
| 1268 | ret = rbd_read_header(rbd_dev, &h); | ||
| 1269 | if (ret < 0) | ||
| 1270 | return ret; | ||
| 1271 | |||
| 1272 | down_write(&rbd_dev->header.snap_rwsem); | ||
| 1273 | |||
| 1274 | snap_seq = rbd_dev->header.snapc->seq; | ||
| 1275 | |||
| 1276 | kfree(rbd_dev->header.snapc); | ||
| 1277 | kfree(rbd_dev->header.snap_names); | ||
| 1278 | kfree(rbd_dev->header.snap_sizes); | ||
| 1279 | |||
| 1280 | rbd_dev->header.total_snaps = h.total_snaps; | ||
| 1281 | rbd_dev->header.snapc = h.snapc; | ||
| 1282 | rbd_dev->header.snap_names = h.snap_names; | ||
| 1283 | rbd_dev->header.snap_sizes = h.snap_sizes; | ||
| 1284 | rbd_dev->header.snapc->seq = snap_seq; | ||
| 1285 | |||
| 1286 | up_write(&rbd_dev->header.snap_rwsem); | ||
| 1287 | |||
| 1288 | return 0; | ||
| 1289 | } | ||
| 1290 | |||
| 1291 | static int rbd_init_disk(struct rbd_device *rbd_dev) | ||
| 1292 | { | ||
| 1293 | struct gendisk *disk; | ||
| 1294 | struct request_queue *q; | ||
| 1295 | int rc; | ||
| 1296 | u64 total_size = 0; | ||
| 1297 | |||
| 1298 | /* contact OSD, request size info about the object being mapped */ | ||
| 1299 | rc = rbd_read_header(rbd_dev, &rbd_dev->header); | ||
| 1300 | if (rc) | ||
| 1301 | return rc; | ||
| 1302 | |||
| 1303 | rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); | ||
| 1304 | if (rc) | ||
| 1305 | return rc; | ||
| 1306 | |||
| 1307 | /* create gendisk info */ | ||
| 1308 | rc = -ENOMEM; | ||
| 1309 | disk = alloc_disk(RBD_MINORS_PER_MAJOR); | ||
| 1310 | if (!disk) | ||
| 1311 | goto out; | ||
| 1312 | |||
| 1313 | sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id); | ||
| 1314 | disk->major = rbd_dev->major; | ||
| 1315 | disk->first_minor = 0; | ||
| 1316 | disk->fops = &rbd_bd_ops; | ||
| 1317 | disk->private_data = rbd_dev; | ||
| 1318 | |||
| 1319 | /* init rq */ | ||
| 1320 | rc = -ENOMEM; | ||
| 1321 | q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); | ||
| 1322 | if (!q) | ||
| 1323 | goto out_disk; | ||
| 1324 | blk_queue_merge_bvec(q, rbd_merge_bvec); | ||
| 1325 | disk->queue = q; | ||
| 1326 | |||
| 1327 | q->queuedata = rbd_dev; | ||
| 1328 | |||
| 1329 | rbd_dev->disk = disk; | ||
| 1330 | rbd_dev->q = q; | ||
| 1331 | |||
| 1332 | /* finally, announce the disk to the world */ | ||
| 1333 | set_capacity(disk, total_size / 512ULL); | ||
| 1334 | add_disk(disk); | ||
| 1335 | |||
| 1336 | pr_info("%s: added with size 0x%llx\n", | ||
| 1337 | disk->disk_name, (unsigned long long)total_size); | ||
| 1338 | return 0; | ||
| 1339 | |||
| 1340 | out_disk: | ||
| 1341 | put_disk(disk); | ||
| 1342 | out: | ||
| 1343 | return rc; | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | /******************************************************************** | ||
| 1347 | * /sys/class/rbd/ | ||
| 1348 | * add map rados objects to blkdev | ||
| 1349 | * remove unmap rados objects | ||
| 1350 | * list show mappings | ||
| 1351 | *******************************************************************/ | ||
| 1352 | |||
| 1353 | static void class_rbd_release(struct class *cls) | ||
| 1354 | { | ||
| 1355 | kfree(cls); | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | static ssize_t class_rbd_list(struct class *c, | ||
| 1359 | struct class_attribute *attr, | ||
| 1360 | char *data) | ||
| 1361 | { | ||
| 1362 | int n = 0; | ||
| 1363 | struct list_head *tmp; | ||
| 1364 | int max = PAGE_SIZE; | ||
| 1365 | |||
| 1366 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1367 | |||
| 1368 | n += snprintf(data, max, | ||
| 1369 | "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n"); | ||
| 1370 | |||
| 1371 | list_for_each(tmp, &rbd_dev_list) { | ||
| 1372 | struct rbd_device *rbd_dev; | ||
| 1373 | |||
| 1374 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
| 1375 | n += snprintf(data+n, max-n, | ||
| 1376 | "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n", | ||
| 1377 | rbd_dev->id, | ||
| 1378 | rbd_dev->major, | ||
| 1379 | ceph_client_id(rbd_dev->client), | ||
| 1380 | rbd_dev->pool_name, | ||
| 1381 | rbd_dev->obj, rbd_dev->snap_name, | ||
| 1382 | rbd_dev->header.image_size >> 10); | ||
| 1383 | if (n == max) | ||
| 1384 | break; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | mutex_unlock(&ctl_mutex); | ||
| 1388 | return n; | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | static ssize_t class_rbd_add(struct class *c, | ||
| 1392 | struct class_attribute *attr, | ||
| 1393 | const char *buf, size_t count) | ||
| 1394 | { | ||
| 1395 | struct ceph_osd_client *osdc; | ||
| 1396 | struct rbd_device *rbd_dev; | ||
| 1397 | ssize_t rc = -ENOMEM; | ||
| 1398 | int irc, new_id = 0; | ||
| 1399 | struct list_head *tmp; | ||
| 1400 | char *mon_dev_name; | ||
| 1401 | char *options; | ||
| 1402 | |||
| 1403 | if (!try_module_get(THIS_MODULE)) | ||
| 1404 | return -ENODEV; | ||
| 1405 | |||
| 1406 | mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); | ||
| 1407 | if (!mon_dev_name) | ||
| 1408 | goto err_out_mod; | ||
| 1409 | |||
| 1410 | options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); | ||
| 1411 | if (!options) | ||
| 1412 | goto err_mon_dev; | ||
| 1413 | |||
| 1414 | /* new rbd_device object */ | ||
| 1415 | rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); | ||
| 1416 | if (!rbd_dev) | ||
| 1417 | goto err_out_opt; | ||
| 1418 | |||
| 1419 | /* static rbd_device initialization */ | ||
| 1420 | spin_lock_init(&rbd_dev->lock); | ||
| 1421 | INIT_LIST_HEAD(&rbd_dev->node); | ||
| 1422 | |||
| 1423 | /* generate unique id: find highest unique id, add one */ | ||
| 1424 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1425 | |||
| 1426 | list_for_each(tmp, &rbd_dev_list) { | ||
| 1427 | struct rbd_device *rbd_dev; | ||
| 1428 | |||
| 1429 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
| 1430 | if (rbd_dev->id >= new_id) | ||
| 1431 | new_id = rbd_dev->id + 1; | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | rbd_dev->id = new_id; | ||
| 1435 | |||
| 1436 | /* add to global list */ | ||
| 1437 | list_add_tail(&rbd_dev->node, &rbd_dev_list); | ||
| 1438 | |||
| 1439 | /* parse add command */ | ||
| 1440 | if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s " | ||
| 1441 | "%" __stringify(RBD_MAX_OPT_LEN) "s " | ||
| 1442 | "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s " | ||
| 1443 | "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s" | ||
| 1444 | "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", | ||
| 1445 | mon_dev_name, options, rbd_dev->pool_name, | ||
| 1446 | rbd_dev->obj, rbd_dev->snap_name) < 4) { | ||
| 1447 | rc = -EINVAL; | ||
| 1448 | goto err_out_slot; | ||
| 1449 | } | ||
| 1450 | |||
| 1451 | if (rbd_dev->snap_name[0] == 0) | ||
| 1452 | rbd_dev->snap_name[0] = '-'; | ||
| 1453 | |||
| 1454 | rbd_dev->obj_len = strlen(rbd_dev->obj); | ||
| 1455 | snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s", | ||
| 1456 | rbd_dev->obj, RBD_SUFFIX); | ||
| 1457 | |||
| 1458 | /* initialize rest of new object */ | ||
| 1459 | snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id); | ||
| 1460 | rc = rbd_get_client(rbd_dev, mon_dev_name, options); | ||
| 1461 | if (rc < 0) | ||
| 1462 | goto err_out_slot; | ||
| 1463 | |||
| 1464 | mutex_unlock(&ctl_mutex); | ||
| 1465 | |||
| 1466 | /* pick the pool */ | ||
| 1467 | osdc = &rbd_dev->client->osdc; | ||
| 1468 | rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); | ||
| 1469 | if (rc < 0) | ||
| 1470 | goto err_out_client; | ||
| 1471 | rbd_dev->poolid = rc; | ||
| 1472 | |||
| 1473 | /* register our block device */ | ||
| 1474 | irc = register_blkdev(0, rbd_dev->name); | ||
| 1475 | if (irc < 0) { | ||
| 1476 | rc = irc; | ||
| 1477 | goto err_out_client; | ||
| 1478 | } | ||
| 1479 | rbd_dev->major = irc; | ||
| 1480 | |||
| 1481 | /* set up and announce blkdev mapping */ | ||
| 1482 | rc = rbd_init_disk(rbd_dev); | ||
| 1483 | if (rc) | ||
| 1484 | goto err_out_blkdev; | ||
| 1485 | |||
| 1486 | return count; | ||
| 1487 | |||
| 1488 | err_out_blkdev: | ||
| 1489 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | ||
| 1490 | err_out_client: | ||
| 1491 | rbd_put_client(rbd_dev); | ||
| 1492 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1493 | err_out_slot: | ||
| 1494 | list_del_init(&rbd_dev->node); | ||
| 1495 | mutex_unlock(&ctl_mutex); | ||
| 1496 | |||
| 1497 | kfree(rbd_dev); | ||
| 1498 | err_out_opt: | ||
| 1499 | kfree(options); | ||
| 1500 | err_mon_dev: | ||
| 1501 | kfree(mon_dev_name); | ||
| 1502 | err_out_mod: | ||
| 1503 | dout("Error adding device %s\n", buf); | ||
| 1504 | module_put(THIS_MODULE); | ||
| 1505 | return rc; | ||
| 1506 | } | ||
| 1507 | |||
| 1508 | static struct rbd_device *__rbd_get_dev(unsigned long id) | ||
| 1509 | { | ||
| 1510 | struct list_head *tmp; | ||
| 1511 | struct rbd_device *rbd_dev; | ||
| 1512 | |||
| 1513 | list_for_each(tmp, &rbd_dev_list) { | ||
| 1514 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
| 1515 | if (rbd_dev->id == id) | ||
| 1516 | return rbd_dev; | ||
| 1517 | } | ||
| 1518 | return NULL; | ||
| 1519 | } | ||
| 1520 | |||
| 1521 | static ssize_t class_rbd_remove(struct class *c, | ||
| 1522 | struct class_attribute *attr, | ||
| 1523 | const char *buf, | ||
| 1524 | size_t count) | ||
| 1525 | { | ||
| 1526 | struct rbd_device *rbd_dev = NULL; | ||
| 1527 | int target_id, rc; | ||
| 1528 | unsigned long ul; | ||
| 1529 | |||
| 1530 | rc = strict_strtoul(buf, 10, &ul); | ||
| 1531 | if (rc) | ||
| 1532 | return rc; | ||
| 1533 | |||
| 1534 | /* convert to int; abort if we lost anything in the conversion */ | ||
| 1535 | target_id = (int) ul; | ||
| 1536 | if (target_id != ul) | ||
| 1537 | return -EINVAL; | ||
| 1538 | |||
| 1539 | /* remove object from list immediately */ | ||
| 1540 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1541 | |||
| 1542 | rbd_dev = __rbd_get_dev(target_id); | ||
| 1543 | if (rbd_dev) | ||
| 1544 | list_del_init(&rbd_dev->node); | ||
| 1545 | |||
| 1546 | mutex_unlock(&ctl_mutex); | ||
| 1547 | |||
| 1548 | if (!rbd_dev) | ||
| 1549 | return -ENOENT; | ||
| 1550 | |||
| 1551 | rbd_put_client(rbd_dev); | ||
| 1552 | |||
| 1553 | /* clean up and free blkdev */ | ||
| 1554 | rbd_free_disk(rbd_dev); | ||
| 1555 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | ||
| 1556 | kfree(rbd_dev); | ||
| 1557 | |||
| 1558 | /* release module ref */ | ||
| 1559 | module_put(THIS_MODULE); | ||
| 1560 | |||
| 1561 | return count; | ||
| 1562 | } | ||
| 1563 | |||
| 1564 | static ssize_t class_rbd_snaps_list(struct class *c, | ||
| 1565 | struct class_attribute *attr, | ||
| 1566 | char *data) | ||
| 1567 | { | ||
| 1568 | struct rbd_device *rbd_dev = NULL; | ||
| 1569 | struct list_head *tmp; | ||
| 1570 | struct rbd_image_header *header; | ||
| 1571 | int i, n = 0, max = PAGE_SIZE; | ||
| 1572 | int ret; | ||
| 1573 | |||
| 1574 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1575 | |||
| 1576 | n += snprintf(data, max, "#id\tsnap\tKB\n"); | ||
| 1577 | |||
| 1578 | list_for_each(tmp, &rbd_dev_list) { | ||
| 1579 | char *names, *p; | ||
| 1580 | struct ceph_snap_context *snapc; | ||
| 1581 | |||
| 1582 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
| 1583 | header = &rbd_dev->header; | ||
| 1584 | |||
| 1585 | down_read(&header->snap_rwsem); | ||
| 1586 | |||
| 1587 | names = header->snap_names; | ||
| 1588 | snapc = header->snapc; | ||
| 1589 | |||
| 1590 | n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", | ||
| 1591 | rbd_dev->id, RBD_SNAP_HEAD_NAME, | ||
| 1592 | header->image_size >> 10, | ||
| 1593 | (!rbd_dev->cur_snap ? " (*)" : "")); | ||
| 1594 | if (n == max) | ||
| 1595 | break; | ||
| 1596 | |||
| 1597 | p = names; | ||
| 1598 | for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { | ||
| 1599 | n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", | ||
| 1600 | rbd_dev->id, p, header->snap_sizes[i] >> 10, | ||
| 1601 | (rbd_dev->cur_snap && | ||
| 1602 | (snap_index(header, i) == rbd_dev->cur_snap) ? | ||
| 1603 | " (*)" : "")); | ||
| 1604 | if (n == max) | ||
| 1605 | break; | ||
| 1606 | } | ||
| 1607 | |||
| 1608 | up_read(&header->snap_rwsem); | ||
| 1609 | } | ||
| 1610 | |||
| 1611 | |||
| 1612 | ret = n; | ||
| 1613 | mutex_unlock(&ctl_mutex); | ||
| 1614 | return ret; | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | static ssize_t class_rbd_snaps_refresh(struct class *c, | ||
| 1618 | struct class_attribute *attr, | ||
| 1619 | const char *buf, | ||
| 1620 | size_t count) | ||
| 1621 | { | ||
| 1622 | struct rbd_device *rbd_dev = NULL; | ||
| 1623 | int target_id, rc; | ||
| 1624 | unsigned long ul; | ||
| 1625 | int ret = count; | ||
| 1626 | |||
| 1627 | rc = strict_strtoul(buf, 10, &ul); | ||
| 1628 | if (rc) | ||
| 1629 | return rc; | ||
| 1630 | |||
| 1631 | /* convert to int; abort if we lost anything in the conversion */ | ||
| 1632 | target_id = (int) ul; | ||
| 1633 | if (target_id != ul) | ||
| 1634 | return -EINVAL; | ||
| 1635 | |||
| 1636 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1637 | |||
| 1638 | rbd_dev = __rbd_get_dev(target_id); | ||
| 1639 | if (!rbd_dev) { | ||
| 1640 | ret = -ENOENT; | ||
| 1641 | goto done; | ||
| 1642 | } | ||
| 1643 | |||
| 1644 | rc = rbd_update_snaps(rbd_dev); | ||
| 1645 | if (rc < 0) | ||
| 1646 | ret = rc; | ||
| 1647 | |||
| 1648 | done: | ||
| 1649 | mutex_unlock(&ctl_mutex); | ||
| 1650 | return ret; | ||
| 1651 | } | ||
| 1652 | |||
| 1653 | static ssize_t class_rbd_snap_create(struct class *c, | ||
| 1654 | struct class_attribute *attr, | ||
| 1655 | const char *buf, | ||
| 1656 | size_t count) | ||
| 1657 | { | ||
| 1658 | struct rbd_device *rbd_dev = NULL; | ||
| 1659 | int target_id, ret; | ||
| 1660 | char *name; | ||
| 1661 | |||
| 1662 | name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL); | ||
| 1663 | if (!name) | ||
| 1664 | return -ENOMEM; | ||
| 1665 | |||
| 1666 | /* parse snaps add command */ | ||
| 1667 | if (sscanf(buf, "%d " | ||
| 1668 | "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", | ||
| 1669 | &target_id, | ||
| 1670 | name) != 2) { | ||
| 1671 | ret = -EINVAL; | ||
| 1672 | goto done; | ||
| 1673 | } | ||
| 1674 | |||
| 1675 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1676 | |||
| 1677 | rbd_dev = __rbd_get_dev(target_id); | ||
| 1678 | if (!rbd_dev) { | ||
| 1679 | ret = -ENOENT; | ||
| 1680 | goto done_unlock; | ||
| 1681 | } | ||
| 1682 | |||
| 1683 | ret = rbd_header_add_snap(rbd_dev, | ||
| 1684 | name, GFP_KERNEL); | ||
| 1685 | if (ret < 0) | ||
| 1686 | goto done_unlock; | ||
| 1687 | |||
| 1688 | ret = rbd_update_snaps(rbd_dev); | ||
| 1689 | if (ret < 0) | ||
| 1690 | goto done_unlock; | ||
| 1691 | |||
| 1692 | ret = count; | ||
| 1693 | done_unlock: | ||
| 1694 | mutex_unlock(&ctl_mutex); | ||
| 1695 | done: | ||
| 1696 | kfree(name); | ||
| 1697 | return ret; | ||
| 1698 | } | ||
| 1699 | |||
| 1700 | static ssize_t class_rbd_rollback(struct class *c, | ||
| 1701 | struct class_attribute *attr, | ||
| 1702 | const char *buf, | ||
| 1703 | size_t count) | ||
| 1704 | { | ||
| 1705 | struct rbd_device *rbd_dev = NULL; | ||
| 1706 | int target_id, ret; | ||
| 1707 | u64 snapid; | ||
| 1708 | char snap_name[RBD_MAX_SNAP_NAME_LEN]; | ||
| 1709 | u64 cur_ofs; | ||
| 1710 | char *seg_name; | ||
| 1711 | |||
| 1712 | /* parse snaps add command */ | ||
| 1713 | if (sscanf(buf, "%d " | ||
| 1714 | "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", | ||
| 1715 | &target_id, | ||
| 1716 | snap_name) != 2) { | ||
| 1717 | return -EINVAL; | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | ret = -ENOMEM; | ||
| 1721 | seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); | ||
| 1722 | if (!seg_name) | ||
| 1723 | return ret; | ||
| 1724 | |||
| 1725 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1726 | |||
| 1727 | rbd_dev = __rbd_get_dev(target_id); | ||
| 1728 | if (!rbd_dev) { | ||
| 1729 | ret = -ENOENT; | ||
| 1730 | goto done_unlock; | ||
| 1731 | } | ||
| 1732 | |||
| 1733 | ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL); | ||
| 1734 | if (ret < 0) | ||
| 1735 | goto done_unlock; | ||
| 1736 | |||
| 1737 | dout("snapid=%lld\n", snapid); | ||
| 1738 | |||
| 1739 | cur_ofs = 0; | ||
| 1740 | while (cur_ofs < rbd_dev->header.image_size) { | ||
| 1741 | cur_ofs += rbd_get_segment(&rbd_dev->header, | ||
| 1742 | rbd_dev->obj, | ||
| 1743 | cur_ofs, (u64)-1, | ||
| 1744 | seg_name, NULL); | ||
| 1745 | dout("seg_name=%s\n", seg_name); | ||
| 1746 | |||
| 1747 | ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name); | ||
| 1748 | if (ret < 0) | ||
| 1749 | pr_warning("could not roll back obj %s err=%d\n", | ||
| 1750 | seg_name, ret); | ||
| 1751 | } | ||
| 1752 | |||
| 1753 | ret = rbd_update_snaps(rbd_dev); | ||
| 1754 | if (ret < 0) | ||
| 1755 | goto done_unlock; | ||
| 1756 | |||
| 1757 | ret = count; | ||
| 1758 | |||
| 1759 | done_unlock: | ||
| 1760 | mutex_unlock(&ctl_mutex); | ||
| 1761 | kfree(seg_name); | ||
| 1762 | |||
| 1763 | return ret; | ||
| 1764 | } | ||
| 1765 | |||
| 1766 | static struct class_attribute class_rbd_attrs[] = { | ||
| 1767 | __ATTR(add, 0200, NULL, class_rbd_add), | ||
| 1768 | __ATTR(remove, 0200, NULL, class_rbd_remove), | ||
| 1769 | __ATTR(list, 0444, class_rbd_list, NULL), | ||
| 1770 | __ATTR(snaps_refresh, 0200, NULL, class_rbd_snaps_refresh), | ||
| 1771 | __ATTR(snap_create, 0200, NULL, class_rbd_snap_create), | ||
| 1772 | __ATTR(snaps_list, 0444, class_rbd_snaps_list, NULL), | ||
| 1773 | __ATTR(snap_rollback, 0200, NULL, class_rbd_rollback), | ||
| 1774 | __ATTR_NULL | ||
| 1775 | }; | ||
| 1776 | |||
| 1777 | /* | ||
| 1778 | * create control files in sysfs | ||
| 1779 | * /sys/class/rbd/... | ||
| 1780 | */ | ||
| 1781 | static int rbd_sysfs_init(void) | ||
| 1782 | { | ||
| 1783 | int ret = -ENOMEM; | ||
| 1784 | |||
| 1785 | class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL); | ||
| 1786 | if (!class_rbd) | ||
| 1787 | goto out; | ||
| 1788 | |||
| 1789 | class_rbd->name = DRV_NAME; | ||
| 1790 | class_rbd->owner = THIS_MODULE; | ||
| 1791 | class_rbd->class_release = class_rbd_release; | ||
| 1792 | class_rbd->class_attrs = class_rbd_attrs; | ||
| 1793 | |||
| 1794 | ret = class_register(class_rbd); | ||
| 1795 | if (ret) | ||
| 1796 | goto out_class; | ||
| 1797 | return 0; | ||
| 1798 | |||
| 1799 | out_class: | ||
| 1800 | kfree(class_rbd); | ||
| 1801 | class_rbd = NULL; | ||
| 1802 | pr_err(DRV_NAME ": failed to create class rbd\n"); | ||
| 1803 | out: | ||
| 1804 | return ret; | ||
| 1805 | } | ||
| 1806 | |||
| 1807 | static void rbd_sysfs_cleanup(void) | ||
| 1808 | { | ||
| 1809 | if (class_rbd) | ||
| 1810 | class_destroy(class_rbd); | ||
| 1811 | class_rbd = NULL; | ||
| 1812 | } | ||
| 1813 | |||
| 1814 | int __init rbd_init(void) | ||
| 1815 | { | ||
| 1816 | int rc; | ||
| 1817 | |||
| 1818 | rc = rbd_sysfs_init(); | ||
| 1819 | if (rc) | ||
| 1820 | return rc; | ||
| 1821 | spin_lock_init(&node_lock); | ||
| 1822 | pr_info("loaded " DRV_NAME_LONG "\n"); | ||
| 1823 | return 0; | ||
| 1824 | } | ||
| 1825 | |||
| 1826 | void __exit rbd_exit(void) | ||
| 1827 | { | ||
| 1828 | rbd_sysfs_cleanup(); | ||
| 1829 | } | ||
| 1830 | |||
| 1831 | module_init(rbd_init); | ||
| 1832 | module_exit(rbd_exit); | ||
| 1833 | |||
| 1834 | MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); | ||
| 1835 | MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); | ||
| 1836 | MODULE_DESCRIPTION("rados block device"); | ||
| 1837 | |||
| 1838 | /* following authorship retained from original osdblk.c */ | ||
| 1839 | MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); | ||
| 1840 | |||
| 1841 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h new file mode 100644 index 000000000000..fc6c678aa2cb --- /dev/null +++ b/drivers/block/rbd_types.h | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | /* | ||
| 2 | * Ceph - scalable distributed file system | ||
| 3 | * | ||
| 4 | * Copyright (C) 2004-2010 Sage Weil <sage@newdream.net> | ||
| 5 | * | ||
| 6 | * This is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License version 2.1, as published by the Free Software | ||
| 9 | * Foundation. See file COPYING. | ||
| 10 | * | ||
| 11 | */ | ||
| 12 | |||
| 13 | #ifndef CEPH_RBD_TYPES_H | ||
| 14 | #define CEPH_RBD_TYPES_H | ||
| 15 | |||
| 16 | #include <linux/types.h> | ||
| 17 | |||
| 18 | /* | ||
| 19 | * rbd image 'foo' consists of objects | ||
| 20 | * foo.rbd - image metadata | ||
| 21 | * foo.00000000 | ||
| 22 | * foo.00000001 | ||
| 23 | * ... - data | ||
| 24 | */ | ||
| 25 | |||
| 26 | #define RBD_SUFFIX ".rbd" | ||
| 27 | #define RBD_DIRECTORY "rbd_directory" | ||
| 28 | #define RBD_INFO "rbd_info" | ||
| 29 | |||
| 30 | #define RBD_DEFAULT_OBJ_ORDER 22 /* 4MB */ | ||
| 31 | #define RBD_MIN_OBJ_ORDER 16 | ||
| 32 | #define RBD_MAX_OBJ_ORDER 30 | ||
| 33 | |||
| 34 | #define RBD_MAX_OBJ_NAME_LEN 96 | ||
| 35 | #define RBD_MAX_SEG_NAME_LEN 128 | ||
| 36 | |||
| 37 | #define RBD_COMP_NONE 0 | ||
| 38 | #define RBD_CRYPT_NONE 0 | ||
| 39 | |||
| 40 | #define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n" | ||
| 41 | #define RBD_HEADER_SIGNATURE "RBD" | ||
| 42 | #define RBD_HEADER_VERSION "001.005" | ||
| 43 | |||
| 44 | struct rbd_info { | ||
| 45 | __le64 max_id; | ||
| 46 | } __attribute__ ((packed)); | ||
| 47 | |||
| 48 | struct rbd_image_snap_ondisk { | ||
| 49 | __le64 id; | ||
| 50 | __le64 image_size; | ||
| 51 | } __attribute__((packed)); | ||
| 52 | |||
| 53 | struct rbd_image_header_ondisk { | ||
| 54 | char text[40]; | ||
| 55 | char block_name[24]; | ||
| 56 | char signature[4]; | ||
| 57 | char version[8]; | ||
| 58 | struct { | ||
| 59 | __u8 order; | ||
| 60 | __u8 crypt_type; | ||
| 61 | __u8 comp_type; | ||
| 62 | __u8 unused; | ||
| 63 | } __attribute__((packed)) options; | ||
| 64 | __le64 image_size; | ||
| 65 | __le64 snap_seq; | ||
| 66 | __le32 snap_count; | ||
| 67 | __le32 reserved; | ||
| 68 | __le64 snap_names_len; | ||
| 69 | struct rbd_image_snap_ondisk snaps[0]; | ||
| 70 | } __attribute__((packed)); | ||
| 71 | |||
| 72 | |||
| 73 | #endif | ||
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1101e251a629..8320490226b7 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | #include <linux/spinlock.h> | 2 | #include <linux/spinlock.h> |
| 3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
| 4 | #include <linux/blkdev.h> | 4 | #include <linux/blkdev.h> |
| 5 | #include <linux/smp_lock.h> | ||
| 6 | #include <linux/hdreg.h> | 5 | #include <linux/hdreg.h> |
| 7 | #include <linux/virtio.h> | 6 | #include <linux/virtio.h> |
| 8 | #include <linux/virtio_blk.h> | 7 | #include <linux/virtio_blk.h> |
| @@ -222,8 +221,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) | |||
| 222 | return err; | 221 | return err; |
| 223 | } | 222 | } |
| 224 | 223 | ||
| 225 | static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, | 224 | static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, |
| 226 | unsigned cmd, unsigned long data) | 225 | unsigned int cmd, unsigned long data) |
| 227 | { | 226 | { |
| 228 | struct gendisk *disk = bdev->bd_disk; | 227 | struct gendisk *disk = bdev->bd_disk; |
| 229 | struct virtio_blk *vblk = disk->private_data; | 228 | struct virtio_blk *vblk = disk->private_data; |
| @@ -238,18 +237,6 @@ static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, | |||
| 238 | (void __user *)data); | 237 | (void __user *)data); |
| 239 | } | 238 | } |
| 240 | 239 | ||
| 241 | static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, | ||
| 242 | unsigned int cmd, unsigned long param) | ||
| 243 | { | ||
| 244 | int ret; | ||
| 245 | |||
| 246 | lock_kernel(); | ||
| 247 | ret = virtblk_locked_ioctl(bdev, mode, cmd, param); | ||
| 248 | unlock_kernel(); | ||
| 249 | |||
| 250 | return ret; | ||
| 251 | } | ||
| 252 | |||
| 253 | /* We provide getgeo only to please some old bootloader/partitioning tools */ | 240 | /* We provide getgeo only to please some old bootloader/partitioning tools */ |
| 254 | static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) | 241 | static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) |
| 255 | { | 242 | { |
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 0f69c5ec0ecd..6c1b676643a9 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c | |||
| @@ -48,6 +48,9 @@ struct ports_driver_data { | |||
| 48 | /* Used for exporting per-port information to debugfs */ | 48 | /* Used for exporting per-port information to debugfs */ |
| 49 | struct dentry *debugfs_dir; | 49 | struct dentry *debugfs_dir; |
| 50 | 50 | ||
| 51 | /* List of all the devices we're handling */ | ||
| 52 | struct list_head portdevs; | ||
| 53 | |||
| 51 | /* Number of devices this driver is handling */ | 54 | /* Number of devices this driver is handling */ |
| 52 | unsigned int index; | 55 | unsigned int index; |
| 53 | 56 | ||
| @@ -108,6 +111,9 @@ struct port_buffer { | |||
| 108 | * ports for that device (vdev->priv). | 111 | * ports for that device (vdev->priv). |
| 109 | */ | 112 | */ |
| 110 | struct ports_device { | 113 | struct ports_device { |
| 114 | /* Next portdev in the list, head is in the pdrvdata struct */ | ||
| 115 | struct list_head list; | ||
| 116 | |||
| 111 | /* | 117 | /* |
| 112 | * Workqueue handlers where we process deferred work after | 118 | * Workqueue handlers where we process deferred work after |
| 113 | * notification | 119 | * notification |
| @@ -178,15 +184,21 @@ struct port { | |||
| 178 | struct console cons; | 184 | struct console cons; |
| 179 | 185 | ||
| 180 | /* Each port associates with a separate char device */ | 186 | /* Each port associates with a separate char device */ |
| 181 | struct cdev cdev; | 187 | struct cdev *cdev; |
| 182 | struct device *dev; | 188 | struct device *dev; |
| 183 | 189 | ||
| 190 | /* Reference-counting to handle port hot-unplugs and file operations */ | ||
| 191 | struct kref kref; | ||
| 192 | |||
| 184 | /* A waitqueue for poll() or blocking read operations */ | 193 | /* A waitqueue for poll() or blocking read operations */ |
| 185 | wait_queue_head_t waitqueue; | 194 | wait_queue_head_t waitqueue; |
| 186 | 195 | ||
| 187 | /* The 'name' of the port that we expose via sysfs properties */ | 196 | /* The 'name' of the port that we expose via sysfs properties */ |
| 188 | char *name; | 197 | char *name; |
| 189 | 198 | ||
| 199 | /* We can notify apps of host connect / disconnect events via SIGIO */ | ||
| 200 | struct fasync_struct *async_queue; | ||
| 201 | |||
| 190 | /* The 'id' to identify the port with the Host */ | 202 | /* The 'id' to identify the port with the Host */ |
| 191 | u32 id; | 203 | u32 id; |
| 192 | 204 | ||
| @@ -221,6 +233,41 @@ out: | |||
| 221 | return port; | 233 | return port; |
| 222 | } | 234 | } |
| 223 | 235 | ||
| 236 | static struct port *find_port_by_devt_in_portdev(struct ports_device *portdev, | ||
| 237 | dev_t dev) | ||
| 238 | { | ||
| 239 | struct port *port; | ||
| 240 | unsigned long flags; | ||
| 241 | |||
| 242 | spin_lock_irqsave(&portdev->ports_lock, flags); | ||
| 243 | list_for_each_entry(port, &portdev->ports, list) | ||
| 244 | if (port->cdev->dev == dev) | ||
| 245 | goto out; | ||
| 246 | port = NULL; | ||
| 247 | out: | ||
| 248 | spin_unlock_irqrestore(&portdev->ports_lock, flags); | ||
| 249 | |||
| 250 | return port; | ||
| 251 | } | ||
| 252 | |||
| 253 | static struct port *find_port_by_devt(dev_t dev) | ||
| 254 | { | ||
| 255 | struct ports_device *portdev; | ||
| 256 | struct port *port; | ||
| 257 | unsigned long flags; | ||
| 258 | |||
| 259 | spin_lock_irqsave(&pdrvdata_lock, flags); | ||
| 260 | list_for_each_entry(portdev, &pdrvdata.portdevs, list) { | ||
| 261 | port = find_port_by_devt_in_portdev(portdev, dev); | ||
| 262 | if (port) | ||
| 263 | goto out; | ||
| 264 | } | ||
| 265 | port = NULL; | ||
| 266 | out: | ||
| 267 | spin_unlock_irqrestore(&pdrvdata_lock, flags); | ||
| 268 | return port; | ||
| 269 | } | ||
| 270 | |||
| 224 | static struct port *find_port_by_id(struct ports_device *portdev, u32 id) | 271 | static struct port *find_port_by_id(struct ports_device *portdev, u32 id) |
| 225 | { | 272 | { |
| 226 | struct port *port; | 273 | struct port *port; |
| @@ -410,7 +457,10 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, | |||
| 410 | static ssize_t send_control_msg(struct port *port, unsigned int event, | 457 | static ssize_t send_control_msg(struct port *port, unsigned int event, |
| 411 | unsigned int value) | 458 | unsigned int value) |
| 412 | { | 459 | { |
| 413 | return __send_control_msg(port->portdev, port->id, event, value); | 460 | /* Did the port get unplugged before userspace closed it? */ |
| 461 | if (port->portdev) | ||
| 462 | return __send_control_msg(port->portdev, port->id, event, value); | ||
| 463 | return 0; | ||
| 414 | } | 464 | } |
| 415 | 465 | ||
| 416 | /* Callers must take the port->outvq_lock */ | 466 | /* Callers must take the port->outvq_lock */ |
| @@ -525,6 +575,10 @@ static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count, | |||
| 525 | /* The condition that must be true for polling to end */ | 575 | /* The condition that must be true for polling to end */ |
| 526 | static bool will_read_block(struct port *port) | 576 | static bool will_read_block(struct port *port) |
| 527 | { | 577 | { |
| 578 | if (!port->guest_connected) { | ||
| 579 | /* Port got hot-unplugged. Let's exit. */ | ||
| 580 | return false; | ||
| 581 | } | ||
| 528 | return !port_has_data(port) && port->host_connected; | 582 | return !port_has_data(port) && port->host_connected; |
| 529 | } | 583 | } |
| 530 | 584 | ||
| @@ -575,6 +629,9 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf, | |||
| 575 | if (ret < 0) | 629 | if (ret < 0) |
| 576 | return ret; | 630 | return ret; |
| 577 | } | 631 | } |
| 632 | /* Port got hot-unplugged. */ | ||
| 633 | if (!port->guest_connected) | ||
| 634 | return -ENODEV; | ||
| 578 | /* | 635 | /* |
| 579 | * We could've received a disconnection message while we were | 636 | * We could've received a disconnection message while we were |
| 580 | * waiting for more data. | 637 | * waiting for more data. |
| @@ -616,6 +673,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, | |||
| 616 | if (ret < 0) | 673 | if (ret < 0) |
| 617 | return ret; | 674 | return ret; |
| 618 | } | 675 | } |
| 676 | /* Port got hot-unplugged. */ | ||
| 677 | if (!port->guest_connected) | ||
| 678 | return -ENODEV; | ||
| 619 | 679 | ||
| 620 | count = min((size_t)(32 * 1024), count); | 680 | count = min((size_t)(32 * 1024), count); |
| 621 | 681 | ||
| @@ -656,6 +716,10 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) | |||
| 656 | port = filp->private_data; | 716 | port = filp->private_data; |
| 657 | poll_wait(filp, &port->waitqueue, wait); | 717 | poll_wait(filp, &port->waitqueue, wait); |
| 658 | 718 | ||
| 719 | if (!port->guest_connected) { | ||
| 720 | /* Port got unplugged */ | ||
| 721 | return POLLHUP; | ||
| 722 | } | ||
| 659 | ret = 0; | 723 | ret = 0; |
| 660 | if (!will_read_block(port)) | 724 | if (!will_read_block(port)) |
| 661 | ret |= POLLIN | POLLRDNORM; | 725 | ret |= POLLIN | POLLRDNORM; |
| @@ -667,6 +731,8 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) | |||
| 667 | return ret; | 731 | return ret; |
| 668 | } | 732 | } |
| 669 | 733 | ||
| 734 | static void remove_port(struct kref *kref); | ||
| 735 | |||
| 670 | static int port_fops_release(struct inode *inode, struct file *filp) | 736 | static int port_fops_release(struct inode *inode, struct file *filp) |
| 671 | { | 737 | { |
| 672 | struct port *port; | 738 | struct port *port; |
| @@ -687,6 +753,16 @@ static int port_fops_release(struct inode *inode, struct file *filp) | |||
| 687 | reclaim_consumed_buffers(port); | 753 | reclaim_consumed_buffers(port); |
| 688 | spin_unlock_irq(&port->outvq_lock); | 754 | spin_unlock_irq(&port->outvq_lock); |
| 689 | 755 | ||
| 756 | /* | ||
| 757 | * Locks aren't necessary here as a port can't be opened after | ||
| 758 | * unplug, and if a port isn't unplugged, a kref would already | ||
| 759 | * exist for the port. Plus, taking ports_lock here would | ||
| 760 | * create a dependency on other locks taken by functions | ||
| 761 | * inside remove_port if we're the last holder of the port, | ||
| 762 | * creating many problems. | ||
| 763 | */ | ||
| 764 | kref_put(&port->kref, remove_port); | ||
| 765 | |||
| 690 | return 0; | 766 | return 0; |
| 691 | } | 767 | } |
| 692 | 768 | ||
| @@ -694,22 +770,31 @@ static int port_fops_open(struct inode *inode, struct file *filp) | |||
| 694 | { | 770 | { |
| 695 | struct cdev *cdev = inode->i_cdev; | 771 | struct cdev *cdev = inode->i_cdev; |
| 696 | struct port *port; | 772 | struct port *port; |
| 773 | int ret; | ||
| 697 | 774 | ||
| 698 | port = container_of(cdev, struct port, cdev); | 775 | port = find_port_by_devt(cdev->dev); |
| 699 | filp->private_data = port; | 776 | filp->private_data = port; |
| 700 | 777 | ||
| 778 | /* Prevent against a port getting hot-unplugged at the same time */ | ||
| 779 | spin_lock_irq(&port->portdev->ports_lock); | ||
| 780 | kref_get(&port->kref); | ||
| 781 | spin_unlock_irq(&port->portdev->ports_lock); | ||
| 782 | |||
| 701 | /* | 783 | /* |
| 702 | * Don't allow opening of console port devices -- that's done | 784 | * Don't allow opening of console port devices -- that's done |
| 703 | * via /dev/hvc | 785 | * via /dev/hvc |
| 704 | */ | 786 | */ |
| 705 | if (is_console_port(port)) | 787 | if (is_console_port(port)) { |
| 706 | return -ENXIO; | 788 | ret = -ENXIO; |
| 789 | goto out; | ||
| 790 | } | ||
| 707 | 791 | ||
| 708 | /* Allow only one process to open a particular port at a time */ | 792 | /* Allow only one process to open a particular port at a time */ |
| 709 | spin_lock_irq(&port->inbuf_lock); | 793 | spin_lock_irq(&port->inbuf_lock); |
| 710 | if (port->guest_connected) { | 794 | if (port->guest_connected) { |
| 711 | spin_unlock_irq(&port->inbuf_lock); | 795 | spin_unlock_irq(&port->inbuf_lock); |
| 712 | return -EMFILE; | 796 | ret = -EMFILE; |
| 797 | goto out; | ||
| 713 | } | 798 | } |
| 714 | 799 | ||
| 715 | port->guest_connected = true; | 800 | port->guest_connected = true; |
| @@ -724,10 +809,23 @@ static int port_fops_open(struct inode *inode, struct file *filp) | |||
| 724 | reclaim_consumed_buffers(port); | 809 | reclaim_consumed_buffers(port); |
| 725 | spin_unlock_irq(&port->outvq_lock); | 810 | spin_unlock_irq(&port->outvq_lock); |
| 726 | 811 | ||
| 812 | nonseekable_open(inode, filp); | ||
| 813 | |||
| 727 | /* Notify host of port being opened */ | 814 | /* Notify host of port being opened */ |
| 728 | send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1); | 815 | send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1); |
| 729 | 816 | ||
| 730 | return 0; | 817 | return 0; |
| 818 | out: | ||
| 819 | kref_put(&port->kref, remove_port); | ||
| 820 | return ret; | ||
| 821 | } | ||
| 822 | |||
| 823 | static int port_fops_fasync(int fd, struct file *filp, int mode) | ||
| 824 | { | ||
| 825 | struct port *port; | ||
| 826 | |||
| 827 | port = filp->private_data; | ||
| 828 | return fasync_helper(fd, filp, mode, &port->async_queue); | ||
| 731 | } | 829 | } |
| 732 | 830 | ||
| 733 | /* | 831 | /* |
| @@ -743,6 +841,8 @@ static const struct file_operations port_fops = { | |||
| 743 | .write = port_fops_write, | 841 | .write = port_fops_write, |
| 744 | .poll = port_fops_poll, | 842 | .poll = port_fops_poll, |
| 745 | .release = port_fops_release, | 843 | .release = port_fops_release, |
| 844 | .fasync = port_fops_fasync, | ||
| 845 | .llseek = no_llseek, | ||
| 746 | }; | 846 | }; |
| 747 | 847 | ||
| 748 | /* | 848 | /* |
| @@ -1001,6 +1101,12 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) | |||
| 1001 | return nr_added_bufs; | 1101 | return nr_added_bufs; |
| 1002 | } | 1102 | } |
| 1003 | 1103 | ||
| 1104 | static void send_sigio_to_port(struct port *port) | ||
| 1105 | { | ||
| 1106 | if (port->async_queue && port->guest_connected) | ||
| 1107 | kill_fasync(&port->async_queue, SIGIO, POLL_OUT); | ||
| 1108 | } | ||
| 1109 | |||
| 1004 | static int add_port(struct ports_device *portdev, u32 id) | 1110 | static int add_port(struct ports_device *portdev, u32 id) |
| 1005 | { | 1111 | { |
| 1006 | char debugfs_name[16]; | 1112 | char debugfs_name[16]; |
| @@ -1015,6 +1121,7 @@ static int add_port(struct ports_device *portdev, u32 id) | |||
| 1015 | err = -ENOMEM; | 1121 | err = -ENOMEM; |
| 1016 | goto fail; | 1122 | goto fail; |
| 1017 | } | 1123 | } |
| 1124 | kref_init(&port->kref); | ||
| 1018 | 1125 | ||
| 1019 | port->portdev = portdev; | 1126 | port->portdev = portdev; |
| 1020 | port->id = id; | 1127 | port->id = id; |
| @@ -1022,6 +1129,7 @@ static int add_port(struct ports_device *portdev, u32 id) | |||
| 1022 | port->name = NULL; | 1129 | port->name = NULL; |
| 1023 | port->inbuf = NULL; | 1130 | port->inbuf = NULL; |
| 1024 | port->cons.hvc = NULL; | 1131 | port->cons.hvc = NULL; |
| 1132 | port->async_queue = NULL; | ||
| 1025 | 1133 | ||
| 1026 | port->cons.ws.ws_row = port->cons.ws.ws_col = 0; | 1134 | port->cons.ws.ws_row = port->cons.ws.ws_col = 0; |
| 1027 | 1135 | ||
| @@ -1032,14 +1140,20 @@ static int add_port(struct ports_device *portdev, u32 id) | |||
| 1032 | port->in_vq = portdev->in_vqs[port->id]; | 1140 | port->in_vq = portdev->in_vqs[port->id]; |
| 1033 | port->out_vq = portdev->out_vqs[port->id]; | 1141 | port->out_vq = portdev->out_vqs[port->id]; |
| 1034 | 1142 | ||
| 1035 | cdev_init(&port->cdev, &port_fops); | 1143 | port->cdev = cdev_alloc(); |
| 1144 | if (!port->cdev) { | ||
| 1145 | dev_err(&port->portdev->vdev->dev, "Error allocating cdev\n"); | ||
| 1146 | err = -ENOMEM; | ||
| 1147 | goto free_port; | ||
| 1148 | } | ||
| 1149 | port->cdev->ops = &port_fops; | ||
| 1036 | 1150 | ||
| 1037 | devt = MKDEV(portdev->chr_major, id); | 1151 | devt = MKDEV(portdev->chr_major, id); |
| 1038 | err = cdev_add(&port->cdev, devt, 1); | 1152 | err = cdev_add(port->cdev, devt, 1); |
| 1039 | if (err < 0) { | 1153 | if (err < 0) { |
| 1040 | dev_err(&port->portdev->vdev->dev, | 1154 | dev_err(&port->portdev->vdev->dev, |
| 1041 | "Error %d adding cdev for port %u\n", err, id); | 1155 | "Error %d adding cdev for port %u\n", err, id); |
| 1042 | goto free_port; | 1156 | goto free_cdev; |
| 1043 | } | 1157 | } |
| 1044 | port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev, | 1158 | port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev, |
| 1045 | devt, port, "vport%up%u", | 1159 | devt, port, "vport%up%u", |
| @@ -1104,7 +1218,7 @@ free_inbufs: | |||
| 1104 | free_device: | 1218 | free_device: |
| 1105 | device_destroy(pdrvdata.class, port->dev->devt); | 1219 | device_destroy(pdrvdata.class, port->dev->devt); |
| 1106 | free_cdev: | 1220 | free_cdev: |
| 1107 | cdev_del(&port->cdev); | 1221 | cdev_del(port->cdev); |
| 1108 | free_port: | 1222 | free_port: |
| 1109 | kfree(port); | 1223 | kfree(port); |
| 1110 | fail: | 1224 | fail: |
| @@ -1113,21 +1227,45 @@ fail: | |||
| 1113 | return err; | 1227 | return err; |
| 1114 | } | 1228 | } |
| 1115 | 1229 | ||
| 1116 | /* Remove all port-specific data. */ | 1230 | /* No users remain, remove all port-specific data. */ |
| 1117 | static int remove_port(struct port *port) | 1231 | static void remove_port(struct kref *kref) |
| 1232 | { | ||
| 1233 | struct port *port; | ||
| 1234 | |||
| 1235 | port = container_of(kref, struct port, kref); | ||
| 1236 | |||
| 1237 | sysfs_remove_group(&port->dev->kobj, &port_attribute_group); | ||
| 1238 | device_destroy(pdrvdata.class, port->dev->devt); | ||
| 1239 | cdev_del(port->cdev); | ||
| 1240 | |||
| 1241 | kfree(port->name); | ||
| 1242 | |||
| 1243 | debugfs_remove(port->debugfs_file); | ||
| 1244 | |||
| 1245 | kfree(port); | ||
| 1246 | } | ||
| 1247 | |||
| 1248 | /* | ||
| 1249 | * Port got unplugged. Remove port from portdev's list and drop the | ||
| 1250 | * kref reference. If no userspace has this port opened, it will | ||
| 1251 | * result in immediate removal the port. | ||
| 1252 | */ | ||
| 1253 | static void unplug_port(struct port *port) | ||
| 1118 | { | 1254 | { |
| 1119 | struct port_buffer *buf; | 1255 | struct port_buffer *buf; |
| 1120 | 1256 | ||
| 1257 | spin_lock_irq(&port->portdev->ports_lock); | ||
| 1258 | list_del(&port->list); | ||
| 1259 | spin_unlock_irq(&port->portdev->ports_lock); | ||
| 1260 | |||
| 1121 | if (port->guest_connected) { | 1261 | if (port->guest_connected) { |
| 1122 | port->guest_connected = false; | 1262 | port->guest_connected = false; |
| 1123 | port->host_connected = false; | 1263 | port->host_connected = false; |
| 1124 | wake_up_interruptible(&port->waitqueue); | 1264 | wake_up_interruptible(&port->waitqueue); |
| 1125 | send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0); | ||
| 1126 | } | ||
| 1127 | 1265 | ||
| 1128 | spin_lock_irq(&port->portdev->ports_lock); | 1266 | /* Let the app know the port is going down. */ |
| 1129 | list_del(&port->list); | 1267 | send_sigio_to_port(port); |
| 1130 | spin_unlock_irq(&port->portdev->ports_lock); | 1268 | } |
| 1131 | 1269 | ||
| 1132 | if (is_console_port(port)) { | 1270 | if (is_console_port(port)) { |
| 1133 | spin_lock_irq(&pdrvdata_lock); | 1271 | spin_lock_irq(&pdrvdata_lock); |
| @@ -1146,9 +1284,6 @@ static int remove_port(struct port *port) | |||
| 1146 | hvc_remove(port->cons.hvc); | 1284 | hvc_remove(port->cons.hvc); |
| 1147 | #endif | 1285 | #endif |
| 1148 | } | 1286 | } |
| 1149 | sysfs_remove_group(&port->dev->kobj, &port_attribute_group); | ||
| 1150 | device_destroy(pdrvdata.class, port->dev->devt); | ||
| 1151 | cdev_del(&port->cdev); | ||
| 1152 | 1287 | ||
| 1153 | /* Remove unused data this port might have received. */ | 1288 | /* Remove unused data this port might have received. */ |
| 1154 | discard_port_data(port); | 1289 | discard_port_data(port); |
| @@ -1159,12 +1294,19 @@ static int remove_port(struct port *port) | |||
| 1159 | while ((buf = virtqueue_detach_unused_buf(port->in_vq))) | 1294 | while ((buf = virtqueue_detach_unused_buf(port->in_vq))) |
| 1160 | free_buf(buf); | 1295 | free_buf(buf); |
| 1161 | 1296 | ||
| 1162 | kfree(port->name); | 1297 | /* |
| 1163 | 1298 | * We should just assume the device itself has gone off -- | |
| 1164 | debugfs_remove(port->debugfs_file); | 1299 | * else a close on an open port later will try to send out a |
| 1300 | * control message. | ||
| 1301 | */ | ||
| 1302 | port->portdev = NULL; | ||
| 1165 | 1303 | ||
| 1166 | kfree(port); | 1304 | /* |
| 1167 | return 0; | 1305 | * Locks around here are not necessary - a port can't be |
| 1306 | * opened after we removed the port struct from ports_list | ||
| 1307 | * above. | ||
| 1308 | */ | ||
| 1309 | kref_put(&port->kref, remove_port); | ||
| 1168 | } | 1310 | } |
| 1169 | 1311 | ||
| 1170 | /* Any private messages that the Host and Guest want to share */ | 1312 | /* Any private messages that the Host and Guest want to share */ |
| @@ -1203,7 +1345,7 @@ static void handle_control_message(struct ports_device *portdev, | |||
| 1203 | add_port(portdev, cpkt->id); | 1345 | add_port(portdev, cpkt->id); |
| 1204 | break; | 1346 | break; |
| 1205 | case VIRTIO_CONSOLE_PORT_REMOVE: | 1347 | case VIRTIO_CONSOLE_PORT_REMOVE: |
| 1206 | remove_port(port); | 1348 | unplug_port(port); |
| 1207 | break; | 1349 | break; |
| 1208 | case VIRTIO_CONSOLE_CONSOLE_PORT: | 1350 | case VIRTIO_CONSOLE_CONSOLE_PORT: |
| 1209 | if (!cpkt->value) | 1351 | if (!cpkt->value) |
| @@ -1245,6 +1387,12 @@ static void handle_control_message(struct ports_device *portdev, | |||
| 1245 | spin_lock_irq(&port->outvq_lock); | 1387 | spin_lock_irq(&port->outvq_lock); |
| 1246 | reclaim_consumed_buffers(port); | 1388 | reclaim_consumed_buffers(port); |
| 1247 | spin_unlock_irq(&port->outvq_lock); | 1389 | spin_unlock_irq(&port->outvq_lock); |
| 1390 | |||
| 1391 | /* | ||
| 1392 | * If the guest is connected, it'll be interested in | ||
| 1393 | * knowing the host connection state changed. | ||
| 1394 | */ | ||
| 1395 | send_sigio_to_port(port); | ||
| 1248 | break; | 1396 | break; |
| 1249 | case VIRTIO_CONSOLE_PORT_NAME: | 1397 | case VIRTIO_CONSOLE_PORT_NAME: |
| 1250 | /* | 1398 | /* |
| @@ -1341,6 +1489,9 @@ static void in_intr(struct virtqueue *vq) | |||
| 1341 | 1489 | ||
| 1342 | wake_up_interruptible(&port->waitqueue); | 1490 | wake_up_interruptible(&port->waitqueue); |
| 1343 | 1491 | ||
| 1492 | /* Send a SIGIO indicating new data in case the process asked for it */ | ||
| 1493 | send_sigio_to_port(port); | ||
| 1494 | |||
| 1344 | if (is_console_port(port) && hvc_poll(port->cons.hvc)) | 1495 | if (is_console_port(port) && hvc_poll(port->cons.hvc)) |
| 1345 | hvc_kick(); | 1496 | hvc_kick(); |
| 1346 | } | 1497 | } |
| @@ -1577,6 +1728,10 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) | |||
| 1577 | add_port(portdev, 0); | 1728 | add_port(portdev, 0); |
| 1578 | } | 1729 | } |
| 1579 | 1730 | ||
| 1731 | spin_lock_irq(&pdrvdata_lock); | ||
| 1732 | list_add_tail(&portdev->list, &pdrvdata.portdevs); | ||
| 1733 | spin_unlock_irq(&pdrvdata_lock); | ||
| 1734 | |||
| 1580 | __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, | 1735 | __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, |
| 1581 | VIRTIO_CONSOLE_DEVICE_READY, 1); | 1736 | VIRTIO_CONSOLE_DEVICE_READY, 1); |
| 1582 | return 0; | 1737 | return 0; |
| @@ -1600,23 +1755,41 @@ static void virtcons_remove(struct virtio_device *vdev) | |||
| 1600 | { | 1755 | { |
| 1601 | struct ports_device *portdev; | 1756 | struct ports_device *portdev; |
| 1602 | struct port *port, *port2; | 1757 | struct port *port, *port2; |
| 1603 | struct port_buffer *buf; | ||
| 1604 | unsigned int len; | ||
| 1605 | 1758 | ||
| 1606 | portdev = vdev->priv; | 1759 | portdev = vdev->priv; |
| 1607 | 1760 | ||
| 1761 | spin_lock_irq(&pdrvdata_lock); | ||
| 1762 | list_del(&portdev->list); | ||
| 1763 | spin_unlock_irq(&pdrvdata_lock); | ||
| 1764 | |||
| 1765 | /* Disable interrupts for vqs */ | ||
| 1766 | vdev->config->reset(vdev); | ||
| 1767 | /* Finish up work that's lined up */ | ||
| 1608 | cancel_work_sync(&portdev->control_work); | 1768 | cancel_work_sync(&portdev->control_work); |
| 1609 | 1769 | ||
| 1610 | list_for_each_entry_safe(port, port2, &portdev->ports, list) | 1770 | list_for_each_entry_safe(port, port2, &portdev->ports, list) |
| 1611 | remove_port(port); | 1771 | unplug_port(port); |
| 1612 | 1772 | ||
| 1613 | unregister_chrdev(portdev->chr_major, "virtio-portsdev"); | 1773 | unregister_chrdev(portdev->chr_major, "virtio-portsdev"); |
| 1614 | 1774 | ||
| 1615 | while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) | 1775 | /* |
| 1616 | free_buf(buf); | 1776 | * When yanking out a device, we immediately lose the |
| 1777 | * (device-side) queues. So there's no point in keeping the | ||
| 1778 | * guest side around till we drop our final reference. This | ||
| 1779 | * also means that any ports which are in an open state will | ||
| 1780 | * have to just stop using the port, as the vqs are going | ||
| 1781 | * away. | ||
| 1782 | */ | ||
| 1783 | if (use_multiport(portdev)) { | ||
| 1784 | struct port_buffer *buf; | ||
| 1785 | unsigned int len; | ||
| 1617 | 1786 | ||
| 1618 | while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) | 1787 | while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) |
| 1619 | free_buf(buf); | 1788 | free_buf(buf); |
| 1789 | |||
| 1790 | while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) | ||
| 1791 | free_buf(buf); | ||
| 1792 | } | ||
| 1620 | 1793 | ||
| 1621 | vdev->config->del_vqs(vdev); | 1794 | vdev->config->del_vqs(vdev); |
| 1622 | kfree(portdev->in_vqs); | 1795 | kfree(portdev->in_vqs); |
| @@ -1663,6 +1836,7 @@ static int __init init(void) | |||
| 1663 | PTR_ERR(pdrvdata.debugfs_dir)); | 1836 | PTR_ERR(pdrvdata.debugfs_dir)); |
| 1664 | } | 1837 | } |
| 1665 | INIT_LIST_HEAD(&pdrvdata.consoles); | 1838 | INIT_LIST_HEAD(&pdrvdata.consoles); |
| 1839 | INIT_LIST_HEAD(&pdrvdata.portdevs); | ||
| 1666 | 1840 | ||
| 1667 | return register_virtio_driver(&virtio_console); | 1841 | return register_virtio_driver(&virtio_console); |
| 1668 | } | 1842 | } |
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index 0fcd2640c23f..9eb134ea6eb2 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig | |||
| @@ -1,9 +1,11 @@ | |||
| 1 | config CEPH_FS | 1 | config CEPH_FS |
| 2 | tristate "Ceph distributed file system (EXPERIMENTAL)" | 2 | tristate "Ceph distributed file system (EXPERIMENTAL)" |
| 3 | depends on INET && EXPERIMENTAL | 3 | depends on INET && EXPERIMENTAL |
| 4 | select CEPH_LIB | ||
| 4 | select LIBCRC32C | 5 | select LIBCRC32C |
| 5 | select CRYPTO_AES | 6 | select CRYPTO_AES |
| 6 | select CRYPTO | 7 | select CRYPTO |
| 8 | default n | ||
| 7 | help | 9 | help |
| 8 | Choose Y or M here to include support for mounting the | 10 | Choose Y or M here to include support for mounting the |
| 9 | experimental Ceph distributed file system. Ceph is an extremely | 11 | experimental Ceph distributed file system. Ceph is an extremely |
| @@ -14,15 +16,3 @@ config CEPH_FS | |||
| 14 | 16 | ||
| 15 | If unsure, say N. | 17 | If unsure, say N. |
| 16 | 18 | ||
| 17 | config CEPH_FS_PRETTYDEBUG | ||
| 18 | bool "Include file:line in ceph debug output" | ||
| 19 | depends on CEPH_FS | ||
| 20 | default n | ||
| 21 | help | ||
| 22 | If you say Y here, debug output will include a filename and | ||
| 23 | line to aid debugging. This icnreases kernel size and slows | ||
| 24 | execution slightly when debug call sites are enabled (e.g., | ||
| 25 | via CONFIG_DYNAMIC_DEBUG). | ||
| 26 | |||
| 27 | If unsure, say N. | ||
| 28 | |||
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 278e1172600d..9e6c4f2e8ff1 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
| @@ -8,15 +8,8 @@ obj-$(CONFIG_CEPH_FS) += ceph.o | |||
| 8 | 8 | ||
| 9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ | 9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ |
| 10 | export.o caps.o snap.o xattr.o \ | 10 | export.o caps.o snap.o xattr.o \ |
| 11 | messenger.o msgpool.o buffer.o pagelist.o \ | 11 | mds_client.o mdsmap.o strings.o ceph_frag.o \ |
| 12 | mds_client.o mdsmap.o \ | 12 | debugfs.o |
| 13 | mon_client.o \ | ||
| 14 | osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ | ||
| 15 | debugfs.o \ | ||
| 16 | auth.o auth_none.o \ | ||
| 17 | crypto.o armor.o \ | ||
| 18 | auth_x.o \ | ||
| 19 | ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o | ||
| 20 | 13 | ||
| 21 | else | 14 | else |
| 22 | #Otherwise we were called directly from the command | 15 | #Otherwise we were called directly from the command |
diff --git a/fs/ceph/README b/fs/ceph/README deleted file mode 100644 index 18352fab37c0..000000000000 --- a/fs/ceph/README +++ /dev/null | |||
| @@ -1,20 +0,0 @@ | |||
| 1 | # | ||
| 2 | # The following files are shared by (and manually synchronized | ||
| 3 | # between) the Ceph userland and kernel client. | ||
| 4 | # | ||
| 5 | # userland kernel | ||
| 6 | src/include/ceph_fs.h fs/ceph/ceph_fs.h | ||
| 7 | src/include/ceph_fs.cc fs/ceph/ceph_fs.c | ||
| 8 | src/include/msgr.h fs/ceph/msgr.h | ||
| 9 | src/include/rados.h fs/ceph/rados.h | ||
| 10 | src/include/ceph_strings.cc fs/ceph/ceph_strings.c | ||
| 11 | src/include/ceph_frag.h fs/ceph/ceph_frag.h | ||
| 12 | src/include/ceph_frag.cc fs/ceph/ceph_frag.c | ||
| 13 | src/include/ceph_hash.h fs/ceph/ceph_hash.h | ||
| 14 | src/include/ceph_hash.cc fs/ceph/ceph_hash.c | ||
| 15 | src/crush/crush.c fs/ceph/crush/crush.c | ||
| 16 | src/crush/crush.h fs/ceph/crush/crush.h | ||
| 17 | src/crush/mapper.c fs/ceph/crush/mapper.c | ||
| 18 | src/crush/mapper.h fs/ceph/crush/mapper.h | ||
| 19 | src/crush/hash.h fs/ceph/crush/hash.h | ||
| 20 | src/crush/hash.c fs/ceph/crush/hash.c | ||
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index efbc604001c8..51bcc5ce3230 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/backing-dev.h> | 3 | #include <linux/backing-dev.h> |
| 4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
| @@ -10,7 +10,8 @@ | |||
| 10 | #include <linux/task_io_accounting_ops.h> | 10 | #include <linux/task_io_accounting_ops.h> |
| 11 | 11 | ||
| 12 | #include "super.h" | 12 | #include "super.h" |
| 13 | #include "osd_client.h" | 13 | #include "mds_client.h" |
| 14 | #include <linux/ceph/osd_client.h> | ||
| 14 | 15 | ||
| 15 | /* | 16 | /* |
| 16 | * Ceph address space ops. | 17 | * Ceph address space ops. |
| @@ -193,7 +194,8 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
| 193 | { | 194 | { |
| 194 | struct inode *inode = filp->f_dentry->d_inode; | 195 | struct inode *inode = filp->f_dentry->d_inode; |
| 195 | struct ceph_inode_info *ci = ceph_inode(inode); | 196 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 196 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 197 | struct ceph_osd_client *osdc = |
| 198 | &ceph_inode_to_client(inode)->client->osdc; | ||
| 197 | int err = 0; | 199 | int err = 0; |
| 198 | u64 len = PAGE_CACHE_SIZE; | 200 | u64 len = PAGE_CACHE_SIZE; |
| 199 | 201 | ||
| @@ -265,7 +267,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
| 265 | { | 267 | { |
| 266 | struct inode *inode = file->f_dentry->d_inode; | 268 | struct inode *inode = file->f_dentry->d_inode; |
| 267 | struct ceph_inode_info *ci = ceph_inode(inode); | 269 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 268 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 270 | struct ceph_osd_client *osdc = |
| 271 | &ceph_inode_to_client(inode)->client->osdc; | ||
| 269 | int rc = 0; | 272 | int rc = 0; |
| 270 | struct page **pages; | 273 | struct page **pages; |
| 271 | loff_t offset; | 274 | loff_t offset; |
| @@ -365,7 +368,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
| 365 | { | 368 | { |
| 366 | struct inode *inode; | 369 | struct inode *inode; |
| 367 | struct ceph_inode_info *ci; | 370 | struct ceph_inode_info *ci; |
| 368 | struct ceph_client *client; | 371 | struct ceph_fs_client *fsc; |
| 369 | struct ceph_osd_client *osdc; | 372 | struct ceph_osd_client *osdc; |
| 370 | loff_t page_off = page->index << PAGE_CACHE_SHIFT; | 373 | loff_t page_off = page->index << PAGE_CACHE_SHIFT; |
| 371 | int len = PAGE_CACHE_SIZE; | 374 | int len = PAGE_CACHE_SIZE; |
| @@ -383,8 +386,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
| 383 | } | 386 | } |
| 384 | inode = page->mapping->host; | 387 | inode = page->mapping->host; |
| 385 | ci = ceph_inode(inode); | 388 | ci = ceph_inode(inode); |
| 386 | client = ceph_inode_to_client(inode); | 389 | fsc = ceph_inode_to_client(inode); |
| 387 | osdc = &client->osdc; | 390 | osdc = &fsc->client->osdc; |
| 388 | 391 | ||
| 389 | /* verify this is a writeable snap context */ | 392 | /* verify this is a writeable snap context */ |
| 390 | snapc = (void *)page->private; | 393 | snapc = (void *)page->private; |
| @@ -414,10 +417,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
| 414 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", | 417 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", |
| 415 | inode, page, page->index, page_off, len, snapc); | 418 | inode, page, page->index, page_off, len, snapc); |
| 416 | 419 | ||
| 417 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 420 | writeback_stat = atomic_long_inc_return(&fsc->writeback_count); |
| 418 | if (writeback_stat > | 421 | if (writeback_stat > |
| 419 | CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) | 422 | CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) |
| 420 | set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); | 423 | set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); |
| 421 | 424 | ||
| 422 | set_page_writeback(page); | 425 | set_page_writeback(page); |
| 423 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), | 426 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), |
| @@ -496,7 +499,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 496 | struct address_space *mapping = inode->i_mapping; | 499 | struct address_space *mapping = inode->i_mapping; |
| 497 | __s32 rc = -EIO; | 500 | __s32 rc = -EIO; |
| 498 | u64 bytes = 0; | 501 | u64 bytes = 0; |
| 499 | struct ceph_client *client = ceph_inode_to_client(inode); | 502 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 500 | long writeback_stat; | 503 | long writeback_stat; |
| 501 | unsigned issued = ceph_caps_issued(ci); | 504 | unsigned issued = ceph_caps_issued(ci); |
| 502 | 505 | ||
| @@ -529,10 +532,10 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 529 | WARN_ON(!PageUptodate(page)); | 532 | WARN_ON(!PageUptodate(page)); |
| 530 | 533 | ||
| 531 | writeback_stat = | 534 | writeback_stat = |
| 532 | atomic_long_dec_return(&client->writeback_count); | 535 | atomic_long_dec_return(&fsc->writeback_count); |
| 533 | if (writeback_stat < | 536 | if (writeback_stat < |
| 534 | CONGESTION_OFF_THRESH(client->mount_args->congestion_kb)) | 537 | CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb)) |
| 535 | clear_bdi_congested(&client->backing_dev_info, | 538 | clear_bdi_congested(&fsc->backing_dev_info, |
| 536 | BLK_RW_ASYNC); | 539 | BLK_RW_ASYNC); |
| 537 | 540 | ||
| 538 | ceph_put_snap_context((void *)page->private); | 541 | ceph_put_snap_context((void *)page->private); |
| @@ -569,13 +572,13 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 569 | * mempool. we avoid the mempool if we can because req->r_num_pages | 572 | * mempool. we avoid the mempool if we can because req->r_num_pages |
| 570 | * may be less than the maximum write size. | 573 | * may be less than the maximum write size. |
| 571 | */ | 574 | */ |
| 572 | static void alloc_page_vec(struct ceph_client *client, | 575 | static void alloc_page_vec(struct ceph_fs_client *fsc, |
| 573 | struct ceph_osd_request *req) | 576 | struct ceph_osd_request *req) |
| 574 | { | 577 | { |
| 575 | req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, | 578 | req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, |
| 576 | GFP_NOFS); | 579 | GFP_NOFS); |
| 577 | if (!req->r_pages) { | 580 | if (!req->r_pages) { |
| 578 | req->r_pages = mempool_alloc(client->wb_pagevec_pool, GFP_NOFS); | 581 | req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS); |
| 579 | req->r_pages_from_pool = 1; | 582 | req->r_pages_from_pool = 1; |
| 580 | WARN_ON(!req->r_pages); | 583 | WARN_ON(!req->r_pages); |
| 581 | } | 584 | } |
| @@ -590,7 +593,7 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
| 590 | struct inode *inode = mapping->host; | 593 | struct inode *inode = mapping->host; |
| 591 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 594 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
| 592 | struct ceph_inode_info *ci = ceph_inode(inode); | 595 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 593 | struct ceph_client *client; | 596 | struct ceph_fs_client *fsc; |
| 594 | pgoff_t index, start, end; | 597 | pgoff_t index, start, end; |
| 595 | int range_whole = 0; | 598 | int range_whole = 0; |
| 596 | int should_loop = 1; | 599 | int should_loop = 1; |
| @@ -617,13 +620,13 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
| 617 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : | 620 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : |
| 618 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); | 621 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); |
| 619 | 622 | ||
| 620 | client = ceph_inode_to_client(inode); | 623 | fsc = ceph_inode_to_client(inode); |
| 621 | if (client->mount_state == CEPH_MOUNT_SHUTDOWN) { | 624 | if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { |
| 622 | pr_warning("writepage_start %p on forced umount\n", inode); | 625 | pr_warning("writepage_start %p on forced umount\n", inode); |
| 623 | return -EIO; /* we're in a forced umount, don't write! */ | 626 | return -EIO; /* we're in a forced umount, don't write! */ |
| 624 | } | 627 | } |
| 625 | if (client->mount_args->wsize && client->mount_args->wsize < wsize) | 628 | if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) |
| 626 | wsize = client->mount_args->wsize; | 629 | wsize = fsc->mount_options->wsize; |
| 627 | if (wsize < PAGE_CACHE_SIZE) | 630 | if (wsize < PAGE_CACHE_SIZE) |
| 628 | wsize = PAGE_CACHE_SIZE; | 631 | wsize = PAGE_CACHE_SIZE; |
| 629 | max_pages_ever = wsize >> PAGE_CACHE_SHIFT; | 632 | max_pages_ever = wsize >> PAGE_CACHE_SHIFT; |
| @@ -769,7 +772,7 @@ get_more_pages: | |||
| 769 | offset = (unsigned long long)page->index | 772 | offset = (unsigned long long)page->index |
| 770 | << PAGE_CACHE_SHIFT; | 773 | << PAGE_CACHE_SHIFT; |
| 771 | len = wsize; | 774 | len = wsize; |
| 772 | req = ceph_osdc_new_request(&client->osdc, | 775 | req = ceph_osdc_new_request(&fsc->client->osdc, |
| 773 | &ci->i_layout, | 776 | &ci->i_layout, |
| 774 | ceph_vino(inode), | 777 | ceph_vino(inode), |
| 775 | offset, &len, | 778 | offset, &len, |
| @@ -782,7 +785,7 @@ get_more_pages: | |||
| 782 | &inode->i_mtime, true, 1); | 785 | &inode->i_mtime, true, 1); |
| 783 | max_pages = req->r_num_pages; | 786 | max_pages = req->r_num_pages; |
| 784 | 787 | ||
| 785 | alloc_page_vec(client, req); | 788 | alloc_page_vec(fsc, req); |
| 786 | req->r_callback = writepages_finish; | 789 | req->r_callback = writepages_finish; |
| 787 | req->r_inode = inode; | 790 | req->r_inode = inode; |
| 788 | } | 791 | } |
| @@ -794,10 +797,10 @@ get_more_pages: | |||
| 794 | inode, page, page->index); | 797 | inode, page, page->index); |
| 795 | 798 | ||
| 796 | writeback_stat = | 799 | writeback_stat = |
| 797 | atomic_long_inc_return(&client->writeback_count); | 800 | atomic_long_inc_return(&fsc->writeback_count); |
| 798 | if (writeback_stat > CONGESTION_ON_THRESH( | 801 | if (writeback_stat > CONGESTION_ON_THRESH( |
| 799 | client->mount_args->congestion_kb)) { | 802 | fsc->mount_options->congestion_kb)) { |
| 800 | set_bdi_congested(&client->backing_dev_info, | 803 | set_bdi_congested(&fsc->backing_dev_info, |
| 801 | BLK_RW_ASYNC); | 804 | BLK_RW_ASYNC); |
| 802 | } | 805 | } |
| 803 | 806 | ||
| @@ -846,7 +849,7 @@ get_more_pages: | |||
| 846 | op->payload_len = cpu_to_le32(len); | 849 | op->payload_len = cpu_to_le32(len); |
| 847 | req->r_request->hdr.data_len = cpu_to_le32(len); | 850 | req->r_request->hdr.data_len = cpu_to_le32(len); |
| 848 | 851 | ||
| 849 | ceph_osdc_start_request(&client->osdc, req, true); | 852 | ceph_osdc_start_request(&fsc->client->osdc, req, true); |
| 850 | req = NULL; | 853 | req = NULL; |
| 851 | 854 | ||
| 852 | /* continue? */ | 855 | /* continue? */ |
| @@ -915,7 +918,7 @@ static int ceph_update_writeable_page(struct file *file, | |||
| 915 | { | 918 | { |
| 916 | struct inode *inode = file->f_dentry->d_inode; | 919 | struct inode *inode = file->f_dentry->d_inode; |
| 917 | struct ceph_inode_info *ci = ceph_inode(inode); | 920 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 918 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 921 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 919 | loff_t page_off = pos & PAGE_CACHE_MASK; | 922 | loff_t page_off = pos & PAGE_CACHE_MASK; |
| 920 | int pos_in_page = pos & ~PAGE_CACHE_MASK; | 923 | int pos_in_page = pos & ~PAGE_CACHE_MASK; |
| 921 | int end_in_page = pos_in_page + len; | 924 | int end_in_page = pos_in_page + len; |
| @@ -1053,8 +1056,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, | |||
| 1053 | struct page *page, void *fsdata) | 1056 | struct page *page, void *fsdata) |
| 1054 | { | 1057 | { |
| 1055 | struct inode *inode = file->f_dentry->d_inode; | 1058 | struct inode *inode = file->f_dentry->d_inode; |
| 1056 | struct ceph_client *client = ceph_inode_to_client(inode); | 1059 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 1057 | struct ceph_mds_client *mdsc = &client->mdsc; | 1060 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 1058 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | 1061 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
| 1059 | int check_cap = 0; | 1062 | int check_cap = 0; |
| 1060 | 1063 | ||
| @@ -1123,7 +1126,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 1123 | { | 1126 | { |
| 1124 | struct inode *inode = vma->vm_file->f_dentry->d_inode; | 1127 | struct inode *inode = vma->vm_file->f_dentry->d_inode; |
| 1125 | struct page *page = vmf->page; | 1128 | struct page *page = vmf->page; |
| 1126 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 1129 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 1127 | loff_t off = page->index << PAGE_CACHE_SHIFT; | 1130 | loff_t off = page->index << PAGE_CACHE_SHIFT; |
| 1128 | loff_t size, len; | 1131 | loff_t size, len; |
| 1129 | int ret; | 1132 | int ret; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 5e9da996a151..98ab13e2b71d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
| 4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
| @@ -9,8 +9,9 @@ | |||
| 9 | #include <linux/writeback.h> | 9 | #include <linux/writeback.h> |
| 10 | 10 | ||
| 11 | #include "super.h" | 11 | #include "super.h" |
| 12 | #include "decode.h" | 12 | #include "mds_client.h" |
| 13 | #include "messenger.h" | 13 | #include <linux/ceph/decode.h> |
| 14 | #include <linux/ceph/messenger.h> | ||
| 14 | 15 | ||
| 15 | /* | 16 | /* |
| 16 | * Capability management | 17 | * Capability management |
| @@ -287,11 +288,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) | |||
| 287 | spin_unlock(&mdsc->caps_list_lock); | 288 | spin_unlock(&mdsc->caps_list_lock); |
| 288 | } | 289 | } |
| 289 | 290 | ||
| 290 | void ceph_reservation_status(struct ceph_client *client, | 291 | void ceph_reservation_status(struct ceph_fs_client *fsc, |
| 291 | int *total, int *avail, int *used, int *reserved, | 292 | int *total, int *avail, int *used, int *reserved, |
| 292 | int *min) | 293 | int *min) |
| 293 | { | 294 | { |
| 294 | struct ceph_mds_client *mdsc = &client->mdsc; | 295 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 295 | 296 | ||
| 296 | if (total) | 297 | if (total) |
| 297 | *total = mdsc->caps_total_count; | 298 | *total = mdsc->caps_total_count; |
| @@ -399,7 +400,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci, | |||
| 399 | static void __cap_set_timeouts(struct ceph_mds_client *mdsc, | 400 | static void __cap_set_timeouts(struct ceph_mds_client *mdsc, |
| 400 | struct ceph_inode_info *ci) | 401 | struct ceph_inode_info *ci) |
| 401 | { | 402 | { |
| 402 | struct ceph_mount_args *ma = mdsc->client->mount_args; | 403 | struct ceph_mount_options *ma = mdsc->fsc->mount_options; |
| 403 | 404 | ||
| 404 | ci->i_hold_caps_min = round_jiffies(jiffies + | 405 | ci->i_hold_caps_min = round_jiffies(jiffies + |
| 405 | ma->caps_wanted_delay_min * HZ); | 406 | ma->caps_wanted_delay_min * HZ); |
| @@ -515,7 +516,7 @@ int ceph_add_cap(struct inode *inode, | |||
| 515 | unsigned seq, unsigned mseq, u64 realmino, int flags, | 516 | unsigned seq, unsigned mseq, u64 realmino, int flags, |
| 516 | struct ceph_cap_reservation *caps_reservation) | 517 | struct ceph_cap_reservation *caps_reservation) |
| 517 | { | 518 | { |
| 518 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 519 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 519 | struct ceph_inode_info *ci = ceph_inode(inode); | 520 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 520 | struct ceph_cap *new_cap = NULL; | 521 | struct ceph_cap *new_cap = NULL; |
| 521 | struct ceph_cap *cap; | 522 | struct ceph_cap *cap; |
| @@ -873,7 +874,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
| 873 | struct ceph_mds_session *session = cap->session; | 874 | struct ceph_mds_session *session = cap->session; |
| 874 | struct ceph_inode_info *ci = cap->ci; | 875 | struct ceph_inode_info *ci = cap->ci; |
| 875 | struct ceph_mds_client *mdsc = | 876 | struct ceph_mds_client *mdsc = |
| 876 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 877 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
| 877 | int removed = 0; | 878 | int removed = 0; |
| 878 | 879 | ||
| 879 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 880 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); |
| @@ -1210,7 +1211,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, | |||
| 1210 | int mds; | 1211 | int mds; |
| 1211 | struct ceph_cap_snap *capsnap; | 1212 | struct ceph_cap_snap *capsnap; |
| 1212 | u32 mseq; | 1213 | u32 mseq; |
| 1213 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 1214 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 1214 | struct ceph_mds_session *session = NULL; /* if session != NULL, we hold | 1215 | struct ceph_mds_session *session = NULL; /* if session != NULL, we hold |
| 1215 | session->s_mutex */ | 1216 | session->s_mutex */ |
| 1216 | u64 next_follows = 0; /* keep track of how far we've gotten through the | 1217 | u64 next_follows = 0; /* keep track of how far we've gotten through the |
| @@ -1336,7 +1337,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
| 1336 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | 1337 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) |
| 1337 | { | 1338 | { |
| 1338 | struct ceph_mds_client *mdsc = | 1339 | struct ceph_mds_client *mdsc = |
| 1339 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 1340 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
| 1340 | struct inode *inode = &ci->vfs_inode; | 1341 | struct inode *inode = &ci->vfs_inode; |
| 1341 | int was = ci->i_dirty_caps; | 1342 | int was = ci->i_dirty_caps; |
| 1342 | int dirty = 0; | 1343 | int dirty = 0; |
| @@ -1378,7 +1379,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
| 1378 | static int __mark_caps_flushing(struct inode *inode, | 1379 | static int __mark_caps_flushing(struct inode *inode, |
| 1379 | struct ceph_mds_session *session) | 1380 | struct ceph_mds_session *session) |
| 1380 | { | 1381 | { |
| 1381 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 1382 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
| 1382 | struct ceph_inode_info *ci = ceph_inode(inode); | 1383 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1383 | int flushing; | 1384 | int flushing; |
| 1384 | 1385 | ||
| @@ -1416,17 +1417,6 @@ static int __mark_caps_flushing(struct inode *inode, | |||
| 1416 | /* | 1417 | /* |
| 1417 | * try to invalidate mapping pages without blocking. | 1418 | * try to invalidate mapping pages without blocking. |
| 1418 | */ | 1419 | */ |
| 1419 | static int mapping_is_empty(struct address_space *mapping) | ||
| 1420 | { | ||
| 1421 | struct page *page = find_get_page(mapping, 0); | ||
| 1422 | |||
| 1423 | if (!page) | ||
| 1424 | return 1; | ||
| 1425 | |||
| 1426 | put_page(page); | ||
| 1427 | return 0; | ||
| 1428 | } | ||
| 1429 | |||
| 1430 | static int try_nonblocking_invalidate(struct inode *inode) | 1420 | static int try_nonblocking_invalidate(struct inode *inode) |
| 1431 | { | 1421 | { |
| 1432 | struct ceph_inode_info *ci = ceph_inode(inode); | 1422 | struct ceph_inode_info *ci = ceph_inode(inode); |
| @@ -1436,7 +1426,7 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
| 1436 | invalidate_mapping_pages(&inode->i_data, 0, -1); | 1426 | invalidate_mapping_pages(&inode->i_data, 0, -1); |
| 1437 | spin_lock(&inode->i_lock); | 1427 | spin_lock(&inode->i_lock); |
| 1438 | 1428 | ||
| 1439 | if (mapping_is_empty(&inode->i_data) && | 1429 | if (inode->i_data.nrpages == 0 && |
| 1440 | invalidating_gen == ci->i_rdcache_gen) { | 1430 | invalidating_gen == ci->i_rdcache_gen) { |
| 1441 | /* success. */ | 1431 | /* success. */ |
| 1442 | dout("try_nonblocking_invalidate %p success\n", inode); | 1432 | dout("try_nonblocking_invalidate %p success\n", inode); |
| @@ -1462,8 +1452,8 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
| 1462 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 1452 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
| 1463 | struct ceph_mds_session *session) | 1453 | struct ceph_mds_session *session) |
| 1464 | { | 1454 | { |
| 1465 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 1455 | struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); |
| 1466 | struct ceph_mds_client *mdsc = &client->mdsc; | 1456 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 1467 | struct inode *inode = &ci->vfs_inode; | 1457 | struct inode *inode = &ci->vfs_inode; |
| 1468 | struct ceph_cap *cap; | 1458 | struct ceph_cap *cap; |
| 1469 | int file_wanted, used; | 1459 | int file_wanted, used; |
| @@ -1533,7 +1523,7 @@ retry_locked: | |||
| 1533 | */ | 1523 | */ |
| 1534 | if ((!is_delayed || mdsc->stopping) && | 1524 | if ((!is_delayed || mdsc->stopping) && |
| 1535 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ | 1525 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ |
| 1536 | ci->i_rdcache_gen && /* may have cached pages */ | 1526 | inode->i_data.nrpages && /* have cached pages */ |
| 1537 | (file_wanted == 0 || /* no open files */ | 1527 | (file_wanted == 0 || /* no open files */ |
| 1538 | (revoking & (CEPH_CAP_FILE_CACHE| | 1528 | (revoking & (CEPH_CAP_FILE_CACHE| |
| 1539 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ | 1529 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ |
| @@ -1706,7 +1696,7 @@ ack: | |||
| 1706 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | 1696 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, |
| 1707 | unsigned *flush_tid) | 1697 | unsigned *flush_tid) |
| 1708 | { | 1698 | { |
| 1709 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 1699 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
| 1710 | struct ceph_inode_info *ci = ceph_inode(inode); | 1700 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1711 | int unlock_session = session ? 0 : 1; | 1701 | int unlock_session = session ? 0 : 1; |
| 1712 | int flushing = 0; | 1702 | int flushing = 0; |
| @@ -1872,7 +1862,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 1872 | caps_are_flushed(inode, flush_tid)); | 1862 | caps_are_flushed(inode, flush_tid)); |
| 1873 | } else { | 1863 | } else { |
| 1874 | struct ceph_mds_client *mdsc = | 1864 | struct ceph_mds_client *mdsc = |
| 1875 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 1865 | ceph_sb_to_client(inode->i_sb)->mdsc; |
| 1876 | 1866 | ||
| 1877 | spin_lock(&inode->i_lock); | 1867 | spin_lock(&inode->i_lock); |
| 1878 | if (__ceph_caps_dirty(ci)) | 1868 | if (__ceph_caps_dirty(ci)) |
| @@ -2465,7 +2455,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
| 2465 | __releases(inode->i_lock) | 2455 | __releases(inode->i_lock) |
| 2466 | { | 2456 | { |
| 2467 | struct ceph_inode_info *ci = ceph_inode(inode); | 2457 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2468 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 2458 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
| 2469 | unsigned seq = le32_to_cpu(m->seq); | 2459 | unsigned seq = le32_to_cpu(m->seq); |
| 2470 | int dirty = le32_to_cpu(m->dirty); | 2460 | int dirty = le32_to_cpu(m->dirty); |
| 2471 | int cleaned = 0; | 2461 | int cleaned = 0; |
| @@ -2713,7 +2703,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 2713 | struct ceph_msg *msg) | 2703 | struct ceph_msg *msg) |
| 2714 | { | 2704 | { |
| 2715 | struct ceph_mds_client *mdsc = session->s_mdsc; | 2705 | struct ceph_mds_client *mdsc = session->s_mdsc; |
| 2716 | struct super_block *sb = mdsc->client->sb; | 2706 | struct super_block *sb = mdsc->fsc->sb; |
| 2717 | struct inode *inode; | 2707 | struct inode *inode; |
| 2718 | struct ceph_cap *cap; | 2708 | struct ceph_cap *cap; |
| 2719 | struct ceph_mds_caps *h; | 2709 | struct ceph_mds_caps *h; |
diff --git a/fs/ceph/ceph_frag.c b/fs/ceph/ceph_frag.c index ab6cf35c4091..bdce8b1fbd06 100644 --- a/fs/ceph/ceph_frag.c +++ b/fs/ceph/ceph_frag.c | |||
| @@ -1,7 +1,8 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Ceph 'frag' type | 2 | * Ceph 'frag' type |
| 3 | */ | 3 | */ |
| 4 | #include "types.h" | 4 | #include <linux/module.h> |
| 5 | #include <linux/ceph/types.h> | ||
| 5 | 6 | ||
| 6 | int ceph_frag_compare(__u32 a, __u32 b) | 7 | int ceph_frag_compare(__u32 a, __u32 b) |
| 7 | { | 8 | { |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 6fd8b20a8611..7ae1b3d55b58 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/device.h> | 3 | #include <linux/device.h> |
| 4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
| @@ -7,143 +7,49 @@ | |||
| 7 | #include <linux/debugfs.h> | 7 | #include <linux/debugfs.h> |
| 8 | #include <linux/seq_file.h> | 8 | #include <linux/seq_file.h> |
| 9 | 9 | ||
| 10 | #include <linux/ceph/libceph.h> | ||
| 11 | #include <linux/ceph/mon_client.h> | ||
| 12 | #include <linux/ceph/auth.h> | ||
| 13 | #include <linux/ceph/debugfs.h> | ||
| 14 | |||
| 10 | #include "super.h" | 15 | #include "super.h" |
| 11 | #include "mds_client.h" | ||
| 12 | #include "mon_client.h" | ||
| 13 | #include "auth.h" | ||
| 14 | 16 | ||
| 15 | #ifdef CONFIG_DEBUG_FS | 17 | #ifdef CONFIG_DEBUG_FS |
| 16 | 18 | ||
| 17 | /* | 19 | #include "mds_client.h" |
| 18 | * Implement /sys/kernel/debug/ceph fun | ||
| 19 | * | ||
| 20 | * /sys/kernel/debug/ceph/client* - an instance of the ceph client | ||
| 21 | * .../osdmap - current osdmap | ||
| 22 | * .../mdsmap - current mdsmap | ||
| 23 | * .../monmap - current monmap | ||
| 24 | * .../osdc - active osd requests | ||
| 25 | * .../mdsc - active mds requests | ||
| 26 | * .../monc - mon client state | ||
| 27 | * .../dentry_lru - dump contents of dentry lru | ||
| 28 | * .../caps - expose cap (reservation) stats | ||
| 29 | * .../bdi - symlink to ../../bdi/something | ||
| 30 | */ | ||
| 31 | |||
| 32 | static struct dentry *ceph_debugfs_dir; | ||
| 33 | |||
| 34 | static int monmap_show(struct seq_file *s, void *p) | ||
| 35 | { | ||
| 36 | int i; | ||
| 37 | struct ceph_client *client = s->private; | ||
| 38 | |||
| 39 | if (client->monc.monmap == NULL) | ||
| 40 | return 0; | ||
| 41 | |||
| 42 | seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); | ||
| 43 | for (i = 0; i < client->monc.monmap->num_mon; i++) { | ||
| 44 | struct ceph_entity_inst *inst = | ||
| 45 | &client->monc.monmap->mon_inst[i]; | ||
| 46 | |||
| 47 | seq_printf(s, "\t%s%lld\t%s\n", | ||
| 48 | ENTITY_NAME(inst->name), | ||
| 49 | pr_addr(&inst->addr.in_addr)); | ||
| 50 | } | ||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | 20 | ||
| 54 | static int mdsmap_show(struct seq_file *s, void *p) | 21 | static int mdsmap_show(struct seq_file *s, void *p) |
| 55 | { | 22 | { |
| 56 | int i; | 23 | int i; |
| 57 | struct ceph_client *client = s->private; | 24 | struct ceph_fs_client *fsc = s->private; |
| 58 | 25 | ||
| 59 | if (client->mdsc.mdsmap == NULL) | 26 | if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL) |
| 60 | return 0; | 27 | return 0; |
| 61 | seq_printf(s, "epoch %d\n", client->mdsc.mdsmap->m_epoch); | 28 | seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch); |
| 62 | seq_printf(s, "root %d\n", client->mdsc.mdsmap->m_root); | 29 | seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root); |
| 63 | seq_printf(s, "session_timeout %d\n", | 30 | seq_printf(s, "session_timeout %d\n", |
| 64 | client->mdsc.mdsmap->m_session_timeout); | 31 | fsc->mdsc->mdsmap->m_session_timeout); |
| 65 | seq_printf(s, "session_autoclose %d\n", | 32 | seq_printf(s, "session_autoclose %d\n", |
| 66 | client->mdsc.mdsmap->m_session_autoclose); | 33 | fsc->mdsc->mdsmap->m_session_autoclose); |
| 67 | for (i = 0; i < client->mdsc.mdsmap->m_max_mds; i++) { | 34 | for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) { |
| 68 | struct ceph_entity_addr *addr = | 35 | struct ceph_entity_addr *addr = |
| 69 | &client->mdsc.mdsmap->m_info[i].addr; | 36 | &fsc->mdsc->mdsmap->m_info[i].addr; |
| 70 | int state = client->mdsc.mdsmap->m_info[i].state; | 37 | int state = fsc->mdsc->mdsmap->m_info[i].state; |
| 71 | 38 | ||
| 72 | seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, pr_addr(&addr->in_addr), | 39 | seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, |
| 40 | ceph_pr_addr(&addr->in_addr), | ||
| 73 | ceph_mds_state_name(state)); | 41 | ceph_mds_state_name(state)); |
| 74 | } | 42 | } |
| 75 | return 0; | 43 | return 0; |
| 76 | } | 44 | } |
| 77 | 45 | ||
| 78 | static int osdmap_show(struct seq_file *s, void *p) | 46 | /* |
| 79 | { | 47 | * mdsc debugfs |
| 80 | int i; | 48 | */ |
| 81 | struct ceph_client *client = s->private; | ||
| 82 | struct rb_node *n; | ||
| 83 | |||
| 84 | if (client->osdc.osdmap == NULL) | ||
| 85 | return 0; | ||
| 86 | seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); | ||
| 87 | seq_printf(s, "flags%s%s\n", | ||
| 88 | (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? | ||
| 89 | " NEARFULL" : "", | ||
| 90 | (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? | ||
| 91 | " FULL" : ""); | ||
| 92 | for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { | ||
| 93 | struct ceph_pg_pool_info *pool = | ||
| 94 | rb_entry(n, struct ceph_pg_pool_info, node); | ||
| 95 | seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", | ||
| 96 | pool->id, pool->v.pg_num, pool->pg_num_mask, | ||
| 97 | pool->v.lpg_num, pool->lpg_num_mask); | ||
| 98 | } | ||
| 99 | for (i = 0; i < client->osdc.osdmap->max_osd; i++) { | ||
| 100 | struct ceph_entity_addr *addr = | ||
| 101 | &client->osdc.osdmap->osd_addr[i]; | ||
| 102 | int state = client->osdc.osdmap->osd_state[i]; | ||
| 103 | char sb[64]; | ||
| 104 | |||
| 105 | seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", | ||
| 106 | i, pr_addr(&addr->in_addr), | ||
| 107 | ((client->osdc.osdmap->osd_weight[i]*100) >> 16), | ||
| 108 | ceph_osdmap_state_str(sb, sizeof(sb), state)); | ||
| 109 | } | ||
| 110 | return 0; | ||
| 111 | } | ||
| 112 | |||
| 113 | static int monc_show(struct seq_file *s, void *p) | ||
| 114 | { | ||
| 115 | struct ceph_client *client = s->private; | ||
| 116 | struct ceph_mon_generic_request *req; | ||
| 117 | struct ceph_mon_client *monc = &client->monc; | ||
| 118 | struct rb_node *rp; | ||
| 119 | |||
| 120 | mutex_lock(&monc->mutex); | ||
| 121 | |||
| 122 | if (monc->have_mdsmap) | ||
| 123 | seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); | ||
| 124 | if (monc->have_osdmap) | ||
| 125 | seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); | ||
| 126 | if (monc->want_next_osdmap) | ||
| 127 | seq_printf(s, "want next osdmap\n"); | ||
| 128 | |||
| 129 | for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { | ||
| 130 | __u16 op; | ||
| 131 | req = rb_entry(rp, struct ceph_mon_generic_request, node); | ||
| 132 | op = le16_to_cpu(req->request->hdr.type); | ||
| 133 | if (op == CEPH_MSG_STATFS) | ||
| 134 | seq_printf(s, "%lld statfs\n", req->tid); | ||
| 135 | else | ||
| 136 | seq_printf(s, "%lld unknown\n", req->tid); | ||
| 137 | } | ||
| 138 | |||
| 139 | mutex_unlock(&monc->mutex); | ||
| 140 | return 0; | ||
| 141 | } | ||
| 142 | |||
| 143 | static int mdsc_show(struct seq_file *s, void *p) | 49 | static int mdsc_show(struct seq_file *s, void *p) |
| 144 | { | 50 | { |
| 145 | struct ceph_client *client = s->private; | 51 | struct ceph_fs_client *fsc = s->private; |
| 146 | struct ceph_mds_client *mdsc = &client->mdsc; | 52 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 147 | struct ceph_mds_request *req; | 53 | struct ceph_mds_request *req; |
| 148 | struct rb_node *rp; | 54 | struct rb_node *rp; |
| 149 | int pathlen; | 55 | int pathlen; |
| @@ -214,61 +120,12 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
| 214 | return 0; | 120 | return 0; |
| 215 | } | 121 | } |
| 216 | 122 | ||
| 217 | static int osdc_show(struct seq_file *s, void *pp) | ||
| 218 | { | ||
| 219 | struct ceph_client *client = s->private; | ||
| 220 | struct ceph_osd_client *osdc = &client->osdc; | ||
| 221 | struct rb_node *p; | ||
| 222 | |||
| 223 | mutex_lock(&osdc->request_mutex); | ||
| 224 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | ||
| 225 | struct ceph_osd_request *req; | ||
| 226 | struct ceph_osd_request_head *head; | ||
| 227 | struct ceph_osd_op *op; | ||
| 228 | int num_ops; | ||
| 229 | int opcode, olen; | ||
| 230 | int i; | ||
| 231 | |||
| 232 | req = rb_entry(p, struct ceph_osd_request, r_node); | ||
| 233 | |||
| 234 | seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, | ||
| 235 | req->r_osd ? req->r_osd->o_osd : -1, | ||
| 236 | le32_to_cpu(req->r_pgid.pool), | ||
| 237 | le16_to_cpu(req->r_pgid.ps)); | ||
| 238 | |||
| 239 | head = req->r_request->front.iov_base; | ||
| 240 | op = (void *)(head + 1); | ||
| 241 | |||
| 242 | num_ops = le16_to_cpu(head->num_ops); | ||
| 243 | olen = le32_to_cpu(head->object_len); | ||
| 244 | seq_printf(s, "%.*s", olen, | ||
| 245 | (const char *)(head->ops + num_ops)); | ||
| 246 | |||
| 247 | if (req->r_reassert_version.epoch) | ||
| 248 | seq_printf(s, "\t%u'%llu", | ||
| 249 | (unsigned)le32_to_cpu(req->r_reassert_version.epoch), | ||
| 250 | le64_to_cpu(req->r_reassert_version.version)); | ||
| 251 | else | ||
| 252 | seq_printf(s, "\t"); | ||
| 253 | |||
| 254 | for (i = 0; i < num_ops; i++) { | ||
| 255 | opcode = le16_to_cpu(op->op); | ||
| 256 | seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); | ||
| 257 | op++; | ||
| 258 | } | ||
| 259 | |||
| 260 | seq_printf(s, "\n"); | ||
| 261 | } | ||
| 262 | mutex_unlock(&osdc->request_mutex); | ||
| 263 | return 0; | ||
| 264 | } | ||
| 265 | |||
| 266 | static int caps_show(struct seq_file *s, void *p) | 123 | static int caps_show(struct seq_file *s, void *p) |
| 267 | { | 124 | { |
| 268 | struct ceph_client *client = s->private; | 125 | struct ceph_fs_client *fsc = s->private; |
| 269 | int total, avail, used, reserved, min; | 126 | int total, avail, used, reserved, min; |
| 270 | 127 | ||
| 271 | ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); | 128 | ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); |
| 272 | seq_printf(s, "total\t\t%d\n" | 129 | seq_printf(s, "total\t\t%d\n" |
| 273 | "avail\t\t%d\n" | 130 | "avail\t\t%d\n" |
| 274 | "used\t\t%d\n" | 131 | "used\t\t%d\n" |
| @@ -280,8 +137,8 @@ static int caps_show(struct seq_file *s, void *p) | |||
| 280 | 137 | ||
| 281 | static int dentry_lru_show(struct seq_file *s, void *ptr) | 138 | static int dentry_lru_show(struct seq_file *s, void *ptr) |
| 282 | { | 139 | { |
| 283 | struct ceph_client *client = s->private; | 140 | struct ceph_fs_client *fsc = s->private; |
| 284 | struct ceph_mds_client *mdsc = &client->mdsc; | 141 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 285 | struct ceph_dentry_info *di; | 142 | struct ceph_dentry_info *di; |
| 286 | 143 | ||
| 287 | spin_lock(&mdsc->dentry_lru_lock); | 144 | spin_lock(&mdsc->dentry_lru_lock); |
| @@ -295,199 +152,124 @@ static int dentry_lru_show(struct seq_file *s, void *ptr) | |||
| 295 | return 0; | 152 | return 0; |
| 296 | } | 153 | } |
| 297 | 154 | ||
| 298 | #define DEFINE_SHOW_FUNC(name) \ | 155 | CEPH_DEFINE_SHOW_FUNC(mdsmap_show) |
| 299 | static int name##_open(struct inode *inode, struct file *file) \ | 156 | CEPH_DEFINE_SHOW_FUNC(mdsc_show) |
| 300 | { \ | 157 | CEPH_DEFINE_SHOW_FUNC(caps_show) |
| 301 | struct seq_file *sf; \ | 158 | CEPH_DEFINE_SHOW_FUNC(dentry_lru_show) |
| 302 | int ret; \ | 159 | |
| 303 | \ | ||
| 304 | ret = single_open(file, name, NULL); \ | ||
| 305 | sf = file->private_data; \ | ||
| 306 | sf->private = inode->i_private; \ | ||
| 307 | return ret; \ | ||
| 308 | } \ | ||
| 309 | \ | ||
| 310 | static const struct file_operations name##_fops = { \ | ||
| 311 | .open = name##_open, \ | ||
| 312 | .read = seq_read, \ | ||
| 313 | .llseek = seq_lseek, \ | ||
| 314 | .release = single_release, \ | ||
| 315 | }; | ||
| 316 | |||
| 317 | DEFINE_SHOW_FUNC(monmap_show) | ||
| 318 | DEFINE_SHOW_FUNC(mdsmap_show) | ||
| 319 | DEFINE_SHOW_FUNC(osdmap_show) | ||
| 320 | DEFINE_SHOW_FUNC(monc_show) | ||
| 321 | DEFINE_SHOW_FUNC(mdsc_show) | ||
| 322 | DEFINE_SHOW_FUNC(osdc_show) | ||
| 323 | DEFINE_SHOW_FUNC(dentry_lru_show) | ||
| 324 | DEFINE_SHOW_FUNC(caps_show) | ||
| 325 | 160 | ||
| 161 | /* | ||
| 162 | * debugfs | ||
| 163 | */ | ||
| 326 | static int congestion_kb_set(void *data, u64 val) | 164 | static int congestion_kb_set(void *data, u64 val) |
| 327 | { | 165 | { |
| 328 | struct ceph_client *client = (struct ceph_client *)data; | 166 | struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; |
| 329 | |||
| 330 | if (client) | ||
| 331 | client->mount_args->congestion_kb = (int)val; | ||
| 332 | 167 | ||
| 168 | fsc->mount_options->congestion_kb = (int)val; | ||
| 333 | return 0; | 169 | return 0; |
| 334 | } | 170 | } |
| 335 | 171 | ||
| 336 | static int congestion_kb_get(void *data, u64 *val) | 172 | static int congestion_kb_get(void *data, u64 *val) |
| 337 | { | 173 | { |
| 338 | struct ceph_client *client = (struct ceph_client *)data; | 174 | struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; |
| 339 | |||
| 340 | if (client) | ||
| 341 | *val = (u64)client->mount_args->congestion_kb; | ||
| 342 | 175 | ||
| 176 | *val = (u64)fsc->mount_options->congestion_kb; | ||
| 343 | return 0; | 177 | return 0; |
| 344 | } | 178 | } |
| 345 | 179 | ||
| 346 | |||
| 347 | DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, | 180 | DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, |
| 348 | congestion_kb_set, "%llu\n"); | 181 | congestion_kb_set, "%llu\n"); |
| 349 | 182 | ||
| 350 | int __init ceph_debugfs_init(void) | ||
| 351 | { | ||
| 352 | ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); | ||
| 353 | if (!ceph_debugfs_dir) | ||
| 354 | return -ENOMEM; | ||
| 355 | return 0; | ||
| 356 | } | ||
| 357 | 183 | ||
| 358 | void ceph_debugfs_cleanup(void) | 184 | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) |
| 359 | { | 185 | { |
| 360 | debugfs_remove(ceph_debugfs_dir); | 186 | dout("ceph_fs_debugfs_cleanup\n"); |
| 187 | debugfs_remove(fsc->debugfs_bdi); | ||
| 188 | debugfs_remove(fsc->debugfs_congestion_kb); | ||
| 189 | debugfs_remove(fsc->debugfs_mdsmap); | ||
| 190 | debugfs_remove(fsc->debugfs_caps); | ||
| 191 | debugfs_remove(fsc->debugfs_mdsc); | ||
| 192 | debugfs_remove(fsc->debugfs_dentry_lru); | ||
| 361 | } | 193 | } |
| 362 | 194 | ||
| 363 | int ceph_debugfs_client_init(struct ceph_client *client) | 195 | int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) |
| 364 | { | 196 | { |
| 365 | int ret = 0; | 197 | char name[100]; |
| 366 | char name[80]; | 198 | int err = -ENOMEM; |
| 367 | |||
| 368 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, | ||
| 369 | client->monc.auth->global_id); | ||
| 370 | 199 | ||
| 371 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | 200 | dout("ceph_fs_debugfs_init\n"); |
| 372 | if (!client->debugfs_dir) | 201 | fsc->debugfs_congestion_kb = |
| 373 | goto out; | 202 | debugfs_create_file("writeback_congestion_kb", |
| 374 | 203 | 0600, | |
| 375 | client->monc.debugfs_file = debugfs_create_file("monc", | 204 | fsc->client->debugfs_dir, |
| 376 | 0600, | 205 | fsc, |
| 377 | client->debugfs_dir, | 206 | &congestion_kb_fops); |
| 378 | client, | 207 | if (!fsc->debugfs_congestion_kb) |
| 379 | &monc_show_fops); | ||
| 380 | if (!client->monc.debugfs_file) | ||
| 381 | goto out; | 208 | goto out; |
| 382 | 209 | ||
| 383 | client->mdsc.debugfs_file = debugfs_create_file("mdsc", | 210 | dout("a\n"); |
| 384 | 0600, | ||
| 385 | client->debugfs_dir, | ||
| 386 | client, | ||
| 387 | &mdsc_show_fops); | ||
| 388 | if (!client->mdsc.debugfs_file) | ||
| 389 | goto out; | ||
| 390 | 211 | ||
| 391 | client->osdc.debugfs_file = debugfs_create_file("osdc", | 212 | snprintf(name, sizeof(name), "../../bdi/%s", |
| 392 | 0600, | 213 | dev_name(fsc->backing_dev_info.dev)); |
| 393 | client->debugfs_dir, | 214 | fsc->debugfs_bdi = |
| 394 | client, | 215 | debugfs_create_symlink("bdi", |
| 395 | &osdc_show_fops); | 216 | fsc->client->debugfs_dir, |
| 396 | if (!client->osdc.debugfs_file) | 217 | name); |
| 218 | if (!fsc->debugfs_bdi) | ||
| 397 | goto out; | 219 | goto out; |
| 398 | 220 | ||
| 399 | client->debugfs_monmap = debugfs_create_file("monmap", | 221 | dout("b\n"); |
| 222 | fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", | ||
| 400 | 0600, | 223 | 0600, |
| 401 | client->debugfs_dir, | 224 | fsc->client->debugfs_dir, |
| 402 | client, | 225 | fsc, |
| 403 | &monmap_show_fops); | ||
| 404 | if (!client->debugfs_monmap) | ||
| 405 | goto out; | ||
| 406 | |||
| 407 | client->debugfs_mdsmap = debugfs_create_file("mdsmap", | ||
| 408 | 0600, | ||
| 409 | client->debugfs_dir, | ||
| 410 | client, | ||
| 411 | &mdsmap_show_fops); | 226 | &mdsmap_show_fops); |
| 412 | if (!client->debugfs_mdsmap) | 227 | if (!fsc->debugfs_mdsmap) |
| 413 | goto out; | ||
| 414 | |||
| 415 | client->debugfs_osdmap = debugfs_create_file("osdmap", | ||
| 416 | 0600, | ||
| 417 | client->debugfs_dir, | ||
| 418 | client, | ||
| 419 | &osdmap_show_fops); | ||
| 420 | if (!client->debugfs_osdmap) | ||
| 421 | goto out; | 228 | goto out; |
| 422 | 229 | ||
| 423 | client->debugfs_dentry_lru = debugfs_create_file("dentry_lru", | 230 | dout("ca\n"); |
| 424 | 0600, | 231 | fsc->debugfs_mdsc = debugfs_create_file("mdsc", |
| 425 | client->debugfs_dir, | 232 | 0600, |
| 426 | client, | 233 | fsc->client->debugfs_dir, |
| 427 | &dentry_lru_show_fops); | 234 | fsc, |
| 428 | if (!client->debugfs_dentry_lru) | 235 | &mdsc_show_fops); |
| 236 | if (!fsc->debugfs_mdsc) | ||
| 429 | goto out; | 237 | goto out; |
| 430 | 238 | ||
| 431 | client->debugfs_caps = debugfs_create_file("caps", | 239 | dout("da\n"); |
| 240 | fsc->debugfs_caps = debugfs_create_file("caps", | ||
| 432 | 0400, | 241 | 0400, |
| 433 | client->debugfs_dir, | 242 | fsc->client->debugfs_dir, |
| 434 | client, | 243 | fsc, |
| 435 | &caps_show_fops); | 244 | &caps_show_fops); |
| 436 | if (!client->debugfs_caps) | 245 | if (!fsc->debugfs_caps) |
| 437 | goto out; | 246 | goto out; |
| 438 | 247 | ||
| 439 | client->debugfs_congestion_kb = | 248 | dout("ea\n"); |
| 440 | debugfs_create_file("writeback_congestion_kb", | 249 | fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", |
| 441 | 0600, | 250 | 0600, |
| 442 | client->debugfs_dir, | 251 | fsc->client->debugfs_dir, |
| 443 | client, | 252 | fsc, |
| 444 | &congestion_kb_fops); | 253 | &dentry_lru_show_fops); |
| 445 | if (!client->debugfs_congestion_kb) | 254 | if (!fsc->debugfs_dentry_lru) |
| 446 | goto out; | 255 | goto out; |
| 447 | 256 | ||
| 448 | sprintf(name, "../../bdi/%s", dev_name(client->sb->s_bdi->dev)); | ||
| 449 | client->debugfs_bdi = debugfs_create_symlink("bdi", client->debugfs_dir, | ||
| 450 | name); | ||
| 451 | |||
| 452 | return 0; | 257 | return 0; |
| 453 | 258 | ||
| 454 | out: | 259 | out: |
| 455 | ceph_debugfs_client_cleanup(client); | 260 | ceph_fs_debugfs_cleanup(fsc); |
| 456 | return ret; | 261 | return err; |
| 457 | } | 262 | } |
| 458 | 263 | ||
| 459 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||
| 460 | { | ||
| 461 | debugfs_remove(client->debugfs_bdi); | ||
| 462 | debugfs_remove(client->debugfs_caps); | ||
| 463 | debugfs_remove(client->debugfs_dentry_lru); | ||
| 464 | debugfs_remove(client->debugfs_osdmap); | ||
| 465 | debugfs_remove(client->debugfs_mdsmap); | ||
| 466 | debugfs_remove(client->debugfs_monmap); | ||
| 467 | debugfs_remove(client->osdc.debugfs_file); | ||
| 468 | debugfs_remove(client->mdsc.debugfs_file); | ||
| 469 | debugfs_remove(client->monc.debugfs_file); | ||
| 470 | debugfs_remove(client->debugfs_congestion_kb); | ||
| 471 | debugfs_remove(client->debugfs_dir); | ||
| 472 | } | ||
| 473 | 264 | ||
| 474 | #else /* CONFIG_DEBUG_FS */ | 265 | #else /* CONFIG_DEBUG_FS */ |
| 475 | 266 | ||
| 476 | int __init ceph_debugfs_init(void) | 267 | int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) |
| 477 | { | ||
| 478 | return 0; | ||
| 479 | } | ||
| 480 | |||
| 481 | void ceph_debugfs_cleanup(void) | ||
| 482 | { | ||
| 483 | } | ||
| 484 | |||
| 485 | int ceph_debugfs_client_init(struct ceph_client *client) | ||
| 486 | { | 268 | { |
| 487 | return 0; | 269 | return 0; |
| 488 | } | 270 | } |
| 489 | 271 | ||
| 490 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | 272 | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) |
| 491 | { | 273 | { |
| 492 | } | 274 | } |
| 493 | 275 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index a1986eb52045..e0a2dc6fcafc 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/spinlock.h> | 3 | #include <linux/spinlock.h> |
| 4 | #include <linux/fs_struct.h> | 4 | #include <linux/fs_struct.h> |
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
| 8 | 8 | ||
| 9 | #include "super.h" | 9 | #include "super.h" |
| 10 | #include "mds_client.h" | ||
| 10 | 11 | ||
| 11 | /* | 12 | /* |
| 12 | * Directory operations: readdir, lookup, create, link, unlink, | 13 | * Directory operations: readdir, lookup, create, link, unlink, |
| @@ -94,10 +95,7 @@ static unsigned fpos_off(loff_t p) | |||
| 94 | */ | 95 | */ |
| 95 | static int __dcache_readdir(struct file *filp, | 96 | static int __dcache_readdir(struct file *filp, |
| 96 | void *dirent, filldir_t filldir) | 97 | void *dirent, filldir_t filldir) |
| 97 | __releases(inode->i_lock) | ||
| 98 | __acquires(inode->i_lock) | ||
| 99 | { | 98 | { |
| 100 | struct inode *inode = filp->f_dentry->d_inode; | ||
| 101 | struct ceph_file_info *fi = filp->private_data; | 99 | struct ceph_file_info *fi = filp->private_data; |
| 102 | struct dentry *parent = filp->f_dentry; | 100 | struct dentry *parent = filp->f_dentry; |
| 103 | struct inode *dir = parent->d_inode; | 101 | struct inode *dir = parent->d_inode; |
| @@ -153,7 +151,6 @@ more: | |||
| 153 | 151 | ||
| 154 | atomic_inc(&dentry->d_count); | 152 | atomic_inc(&dentry->d_count); |
| 155 | spin_unlock(&dcache_lock); | 153 | spin_unlock(&dcache_lock); |
| 156 | spin_unlock(&inode->i_lock); | ||
| 157 | 154 | ||
| 158 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, | 155 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, |
| 159 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 156 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); |
| @@ -171,35 +168,30 @@ more: | |||
| 171 | } else { | 168 | } else { |
| 172 | dput(last); | 169 | dput(last); |
| 173 | } | 170 | } |
| 174 | last = NULL; | ||
| 175 | } | 171 | } |
| 176 | |||
| 177 | spin_lock(&inode->i_lock); | ||
| 178 | spin_lock(&dcache_lock); | ||
| 179 | |||
| 180 | last = dentry; | 172 | last = dentry; |
| 181 | 173 | ||
| 182 | if (err < 0) | 174 | if (err < 0) |
| 183 | goto out_unlock; | 175 | goto out; |
| 184 | 176 | ||
| 185 | p = p->prev; | ||
| 186 | filp->f_pos++; | 177 | filp->f_pos++; |
| 187 | 178 | ||
| 188 | /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ | 179 | /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ |
| 189 | if ((ceph_inode(dir)->i_ceph_flags & CEPH_I_COMPLETE)) | 180 | if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { |
| 190 | goto more; | 181 | dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); |
| 191 | dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); | 182 | err = -EAGAIN; |
| 192 | err = -EAGAIN; | 183 | goto out; |
| 184 | } | ||
| 185 | |||
| 186 | spin_lock(&dcache_lock); | ||
| 187 | p = p->prev; /* advance to next dentry */ | ||
| 188 | goto more; | ||
| 193 | 189 | ||
| 194 | out_unlock: | 190 | out_unlock: |
| 195 | spin_unlock(&dcache_lock); | 191 | spin_unlock(&dcache_lock); |
| 196 | 192 | out: | |
| 197 | if (last) { | 193 | if (last) |
| 198 | spin_unlock(&inode->i_lock); | ||
| 199 | dput(last); | 194 | dput(last); |
| 200 | spin_lock(&inode->i_lock); | ||
| 201 | } | ||
| 202 | |||
| 203 | return err; | 195 | return err; |
| 204 | } | 196 | } |
| 205 | 197 | ||
| @@ -227,15 +219,15 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 227 | struct ceph_file_info *fi = filp->private_data; | 219 | struct ceph_file_info *fi = filp->private_data; |
| 228 | struct inode *inode = filp->f_dentry->d_inode; | 220 | struct inode *inode = filp->f_dentry->d_inode; |
| 229 | struct ceph_inode_info *ci = ceph_inode(inode); | 221 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 230 | struct ceph_client *client = ceph_inode_to_client(inode); | 222 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 231 | struct ceph_mds_client *mdsc = &client->mdsc; | 223 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 232 | unsigned frag = fpos_frag(filp->f_pos); | 224 | unsigned frag = fpos_frag(filp->f_pos); |
| 233 | int off = fpos_off(filp->f_pos); | 225 | int off = fpos_off(filp->f_pos); |
| 234 | int err; | 226 | int err; |
| 235 | u32 ftype; | 227 | u32 ftype; |
| 236 | struct ceph_mds_reply_info_parsed *rinfo; | 228 | struct ceph_mds_reply_info_parsed *rinfo; |
| 237 | const int max_entries = client->mount_args->max_readdir; | 229 | const int max_entries = fsc->mount_options->max_readdir; |
| 238 | const int max_bytes = client->mount_args->max_readdir_bytes; | 230 | const int max_bytes = fsc->mount_options->max_readdir_bytes; |
| 239 | 231 | ||
| 240 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 232 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); |
| 241 | if (fi->at_end) | 233 | if (fi->at_end) |
| @@ -267,17 +259,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 267 | /* can we use the dcache? */ | 259 | /* can we use the dcache? */ |
| 268 | spin_lock(&inode->i_lock); | 260 | spin_lock(&inode->i_lock); |
| 269 | if ((filp->f_pos == 2 || fi->dentry) && | 261 | if ((filp->f_pos == 2 || fi->dentry) && |
| 270 | !ceph_test_opt(client, NOASYNCREADDIR) && | 262 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && |
| 271 | ceph_snap(inode) != CEPH_SNAPDIR && | 263 | ceph_snap(inode) != CEPH_SNAPDIR && |
| 272 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 264 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
| 273 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { | 265 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { |
| 266 | spin_unlock(&inode->i_lock); | ||
| 274 | err = __dcache_readdir(filp, dirent, filldir); | 267 | err = __dcache_readdir(filp, dirent, filldir); |
| 275 | if (err != -EAGAIN) { | 268 | if (err != -EAGAIN) |
| 276 | spin_unlock(&inode->i_lock); | ||
| 277 | return err; | 269 | return err; |
| 278 | } | 270 | } else { |
| 271 | spin_unlock(&inode->i_lock); | ||
| 279 | } | 272 | } |
| 280 | spin_unlock(&inode->i_lock); | ||
| 281 | if (fi->dentry) { | 273 | if (fi->dentry) { |
| 282 | err = note_last_dentry(fi, fi->dentry->d_name.name, | 274 | err = note_last_dentry(fi, fi->dentry->d_name.name, |
| 283 | fi->dentry->d_name.len); | 275 | fi->dentry->d_name.len); |
| @@ -487,14 +479,13 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) | |||
| 487 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | 479 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, |
| 488 | struct dentry *dentry, int err) | 480 | struct dentry *dentry, int err) |
| 489 | { | 481 | { |
| 490 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 482 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
| 491 | struct inode *parent = dentry->d_parent->d_inode; | 483 | struct inode *parent = dentry->d_parent->d_inode; |
| 492 | 484 | ||
| 493 | /* .snap dir? */ | 485 | /* .snap dir? */ |
| 494 | if (err == -ENOENT && | 486 | if (err == -ENOENT && |
| 495 | ceph_vino(parent).ino != CEPH_INO_ROOT && /* no .snap in root dir */ | ||
| 496 | strcmp(dentry->d_name.name, | 487 | strcmp(dentry->d_name.name, |
| 497 | client->mount_args->snapdir_name) == 0) { | 488 | fsc->mount_options->snapdir_name) == 0) { |
| 498 | struct inode *inode = ceph_get_snapdir(parent); | 489 | struct inode *inode = ceph_get_snapdir(parent); |
| 499 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", | 490 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", |
| 500 | dentry, dentry->d_name.len, dentry->d_name.name, inode); | 491 | dentry, dentry->d_name.len, dentry->d_name.name, inode); |
| @@ -539,8 +530,8 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) | |||
| 539 | static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | 530 | static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, |
| 540 | struct nameidata *nd) | 531 | struct nameidata *nd) |
| 541 | { | 532 | { |
| 542 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 533 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 543 | struct ceph_mds_client *mdsc = &client->mdsc; | 534 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 544 | struct ceph_mds_request *req; | 535 | struct ceph_mds_request *req; |
| 545 | int op; | 536 | int op; |
| 546 | int err; | 537 | int err; |
| @@ -572,7 +563,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
| 572 | spin_lock(&dir->i_lock); | 563 | spin_lock(&dir->i_lock); |
| 573 | dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); | 564 | dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); |
| 574 | if (strncmp(dentry->d_name.name, | 565 | if (strncmp(dentry->d_name.name, |
| 575 | client->mount_args->snapdir_name, | 566 | fsc->mount_options->snapdir_name, |
| 576 | dentry->d_name.len) && | 567 | dentry->d_name.len) && |
| 577 | !is_root_ceph_dentry(dir, dentry) && | 568 | !is_root_ceph_dentry(dir, dentry) && |
| 578 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 569 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
| @@ -629,8 +620,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) | |||
| 629 | static int ceph_mknod(struct inode *dir, struct dentry *dentry, | 620 | static int ceph_mknod(struct inode *dir, struct dentry *dentry, |
| 630 | int mode, dev_t rdev) | 621 | int mode, dev_t rdev) |
| 631 | { | 622 | { |
| 632 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 623 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 633 | struct ceph_mds_client *mdsc = &client->mdsc; | 624 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 634 | struct ceph_mds_request *req; | 625 | struct ceph_mds_request *req; |
| 635 | int err; | 626 | int err; |
| 636 | 627 | ||
| @@ -685,8 +676,8 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 685 | static int ceph_symlink(struct inode *dir, struct dentry *dentry, | 676 | static int ceph_symlink(struct inode *dir, struct dentry *dentry, |
| 686 | const char *dest) | 677 | const char *dest) |
| 687 | { | 678 | { |
| 688 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 679 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 689 | struct ceph_mds_client *mdsc = &client->mdsc; | 680 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 690 | struct ceph_mds_request *req; | 681 | struct ceph_mds_request *req; |
| 691 | int err; | 682 | int err; |
| 692 | 683 | ||
| @@ -716,8 +707,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, | |||
| 716 | 707 | ||
| 717 | static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) | 708 | static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
| 718 | { | 709 | { |
| 719 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 710 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 720 | struct ceph_mds_client *mdsc = &client->mdsc; | 711 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 721 | struct ceph_mds_request *req; | 712 | struct ceph_mds_request *req; |
| 722 | int err = -EROFS; | 713 | int err = -EROFS; |
| 723 | int op; | 714 | int op; |
| @@ -758,8 +749,8 @@ out: | |||
| 758 | static int ceph_link(struct dentry *old_dentry, struct inode *dir, | 749 | static int ceph_link(struct dentry *old_dentry, struct inode *dir, |
| 759 | struct dentry *dentry) | 750 | struct dentry *dentry) |
| 760 | { | 751 | { |
| 761 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 752 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 762 | struct ceph_mds_client *mdsc = &client->mdsc; | 753 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 763 | struct ceph_mds_request *req; | 754 | struct ceph_mds_request *req; |
| 764 | int err; | 755 | int err; |
| 765 | 756 | ||
| @@ -813,8 +804,8 @@ static int drop_caps_for_unlink(struct inode *inode) | |||
| 813 | */ | 804 | */ |
| 814 | static int ceph_unlink(struct inode *dir, struct dentry *dentry) | 805 | static int ceph_unlink(struct inode *dir, struct dentry *dentry) |
| 815 | { | 806 | { |
| 816 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 807 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 817 | struct ceph_mds_client *mdsc = &client->mdsc; | 808 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 818 | struct inode *inode = dentry->d_inode; | 809 | struct inode *inode = dentry->d_inode; |
| 819 | struct ceph_mds_request *req; | 810 | struct ceph_mds_request *req; |
| 820 | int err = -EROFS; | 811 | int err = -EROFS; |
| @@ -854,8 +845,8 @@ out: | |||
| 854 | static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | 845 | static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, |
| 855 | struct inode *new_dir, struct dentry *new_dentry) | 846 | struct inode *new_dir, struct dentry *new_dentry) |
| 856 | { | 847 | { |
| 857 | struct ceph_client *client = ceph_sb_to_client(old_dir->i_sb); | 848 | struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); |
| 858 | struct ceph_mds_client *mdsc = &client->mdsc; | 849 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 859 | struct ceph_mds_request *req; | 850 | struct ceph_mds_request *req; |
| 860 | int err; | 851 | int err; |
| 861 | 852 | ||
| @@ -1076,7 +1067,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
| 1076 | struct ceph_inode_info *ci = ceph_inode(inode); | 1067 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1077 | int left; | 1068 | int left; |
| 1078 | 1069 | ||
| 1079 | if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) | 1070 | if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
| 1080 | return -EISDIR; | 1071 | return -EISDIR; |
| 1081 | 1072 | ||
| 1082 | if (!cf->dir_info) { | 1073 | if (!cf->dir_info) { |
| @@ -1177,7 +1168,7 @@ void ceph_dentry_lru_add(struct dentry *dn) | |||
| 1177 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, | 1168 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, |
| 1178 | dn->d_name.len, dn->d_name.name); | 1169 | dn->d_name.len, dn->d_name.name); |
| 1179 | if (di) { | 1170 | if (di) { |
| 1180 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1171 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
| 1181 | spin_lock(&mdsc->dentry_lru_lock); | 1172 | spin_lock(&mdsc->dentry_lru_lock); |
| 1182 | list_add_tail(&di->lru, &mdsc->dentry_lru); | 1173 | list_add_tail(&di->lru, &mdsc->dentry_lru); |
| 1183 | mdsc->num_dentry++; | 1174 | mdsc->num_dentry++; |
| @@ -1193,7 +1184,7 @@ void ceph_dentry_lru_touch(struct dentry *dn) | |||
| 1193 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, | 1184 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, |
| 1194 | dn->d_name.len, dn->d_name.name, di->offset); | 1185 | dn->d_name.len, dn->d_name.name, di->offset); |
| 1195 | if (di) { | 1186 | if (di) { |
| 1196 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1187 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
| 1197 | spin_lock(&mdsc->dentry_lru_lock); | 1188 | spin_lock(&mdsc->dentry_lru_lock); |
| 1198 | list_move_tail(&di->lru, &mdsc->dentry_lru); | 1189 | list_move_tail(&di->lru, &mdsc->dentry_lru); |
| 1199 | spin_unlock(&mdsc->dentry_lru_lock); | 1190 | spin_unlock(&mdsc->dentry_lru_lock); |
| @@ -1208,7 +1199,7 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||
| 1208 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, | 1199 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, |
| 1209 | dn->d_name.len, dn->d_name.name); | 1200 | dn->d_name.len, dn->d_name.name); |
| 1210 | if (di) { | 1201 | if (di) { |
| 1211 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1202 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
| 1212 | spin_lock(&mdsc->dentry_lru_lock); | 1203 | spin_lock(&mdsc->dentry_lru_lock); |
| 1213 | list_del_init(&di->lru); | 1204 | list_del_init(&di->lru); |
| 1214 | mdsc->num_dentry--; | 1205 | mdsc->num_dentry--; |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index e38423e82f2e..2297d9426992 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
| @@ -1,10 +1,11 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/exportfs.h> | 3 | #include <linux/exportfs.h> |
| 4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
| 5 | #include <asm/unaligned.h> | 5 | #include <asm/unaligned.h> |
| 6 | 6 | ||
| 7 | #include "super.h" | 7 | #include "super.h" |
| 8 | #include "mds_client.h" | ||
| 8 | 9 | ||
| 9 | /* | 10 | /* |
| 10 | * NFS export support | 11 | * NFS export support |
| @@ -120,7 +121,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
| 120 | static struct dentry *__cfh_to_dentry(struct super_block *sb, | 121 | static struct dentry *__cfh_to_dentry(struct super_block *sb, |
| 121 | struct ceph_nfs_confh *cfh) | 122 | struct ceph_nfs_confh *cfh) |
| 122 | { | 123 | { |
| 123 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc; | 124 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; |
| 124 | struct inode *inode; | 125 | struct inode *inode; |
| 125 | struct dentry *dentry; | 126 | struct dentry *dentry; |
| 126 | struct ceph_vino vino; | 127 | struct ceph_vino vino; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 66e4da6dba22..e77c28cf3690 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/module.h> | ||
| 3 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
| 4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 5 | #include <linux/file.h> | 6 | #include <linux/file.h> |
| @@ -38,8 +39,8 @@ | |||
| 38 | static struct ceph_mds_request * | 39 | static struct ceph_mds_request * |
| 39 | prepare_open_request(struct super_block *sb, int flags, int create_mode) | 40 | prepare_open_request(struct super_block *sb, int flags, int create_mode) |
| 40 | { | 41 | { |
| 41 | struct ceph_client *client = ceph_sb_to_client(sb); | 42 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
| 42 | struct ceph_mds_client *mdsc = &client->mdsc; | 43 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 43 | struct ceph_mds_request *req; | 44 | struct ceph_mds_request *req; |
| 44 | int want_auth = USE_ANY_MDS; | 45 | int want_auth = USE_ANY_MDS; |
| 45 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; | 46 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; |
| @@ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) | |||
| 117 | int ceph_open(struct inode *inode, struct file *file) | 118 | int ceph_open(struct inode *inode, struct file *file) |
| 118 | { | 119 | { |
| 119 | struct ceph_inode_info *ci = ceph_inode(inode); | 120 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 120 | struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 121 | struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); |
| 121 | struct ceph_mds_client *mdsc = &client->mdsc; | 122 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 122 | struct ceph_mds_request *req; | 123 | struct ceph_mds_request *req; |
| 123 | struct ceph_file_info *cf = file->private_data; | 124 | struct ceph_file_info *cf = file->private_data; |
| 124 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 125 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; |
| @@ -216,8 +217,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | |||
| 216 | struct nameidata *nd, int mode, | 217 | struct nameidata *nd, int mode, |
| 217 | int locked_dir) | 218 | int locked_dir) |
| 218 | { | 219 | { |
| 219 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 220 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 220 | struct ceph_mds_client *mdsc = &client->mdsc; | 221 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 221 | struct file *file = nd->intent.open.file; | 222 | struct file *file = nd->intent.open.file; |
| 222 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); | 223 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); |
| 223 | struct ceph_mds_request *req; | 224 | struct ceph_mds_request *req; |
| @@ -270,163 +271,6 @@ int ceph_release(struct inode *inode, struct file *file) | |||
| 270 | } | 271 | } |
| 271 | 272 | ||
| 272 | /* | 273 | /* |
| 273 | * build a vector of user pages | ||
| 274 | */ | ||
| 275 | static struct page **get_direct_page_vector(const char __user *data, | ||
| 276 | int num_pages, | ||
| 277 | loff_t off, size_t len) | ||
| 278 | { | ||
| 279 | struct page **pages; | ||
| 280 | int rc; | ||
| 281 | |||
| 282 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | ||
| 283 | if (!pages) | ||
| 284 | return ERR_PTR(-ENOMEM); | ||
| 285 | |||
| 286 | down_read(¤t->mm->mmap_sem); | ||
| 287 | rc = get_user_pages(current, current->mm, (unsigned long)data, | ||
| 288 | num_pages, 0, 0, pages, NULL); | ||
| 289 | up_read(¤t->mm->mmap_sem); | ||
| 290 | if (rc < 0) | ||
| 291 | goto fail; | ||
| 292 | return pages; | ||
| 293 | |||
| 294 | fail: | ||
| 295 | kfree(pages); | ||
| 296 | return ERR_PTR(rc); | ||
| 297 | } | ||
| 298 | |||
| 299 | static void put_page_vector(struct page **pages, int num_pages) | ||
| 300 | { | ||
| 301 | int i; | ||
| 302 | |||
| 303 | for (i = 0; i < num_pages; i++) | ||
| 304 | put_page(pages[i]); | ||
| 305 | kfree(pages); | ||
| 306 | } | ||
| 307 | |||
| 308 | void ceph_release_page_vector(struct page **pages, int num_pages) | ||
| 309 | { | ||
| 310 | int i; | ||
| 311 | |||
| 312 | for (i = 0; i < num_pages; i++) | ||
| 313 | __free_pages(pages[i], 0); | ||
| 314 | kfree(pages); | ||
| 315 | } | ||
| 316 | |||
| 317 | /* | ||
| 318 | * allocate a vector new pages | ||
| 319 | */ | ||
| 320 | static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | ||
| 321 | { | ||
| 322 | struct page **pages; | ||
| 323 | int i; | ||
| 324 | |||
| 325 | pages = kmalloc(sizeof(*pages) * num_pages, flags); | ||
| 326 | if (!pages) | ||
| 327 | return ERR_PTR(-ENOMEM); | ||
| 328 | for (i = 0; i < num_pages; i++) { | ||
| 329 | pages[i] = __page_cache_alloc(flags); | ||
| 330 | if (pages[i] == NULL) { | ||
| 331 | ceph_release_page_vector(pages, i); | ||
| 332 | return ERR_PTR(-ENOMEM); | ||
| 333 | } | ||
| 334 | } | ||
| 335 | return pages; | ||
| 336 | } | ||
| 337 | |||
| 338 | /* | ||
| 339 | * copy user data into a page vector | ||
| 340 | */ | ||
| 341 | static int copy_user_to_page_vector(struct page **pages, | ||
| 342 | const char __user *data, | ||
| 343 | loff_t off, size_t len) | ||
| 344 | { | ||
| 345 | int i = 0; | ||
| 346 | int po = off & ~PAGE_CACHE_MASK; | ||
| 347 | int left = len; | ||
| 348 | int l, bad; | ||
| 349 | |||
| 350 | while (left > 0) { | ||
| 351 | l = min_t(int, PAGE_CACHE_SIZE-po, left); | ||
| 352 | bad = copy_from_user(page_address(pages[i]) + po, data, l); | ||
| 353 | if (bad == l) | ||
| 354 | return -EFAULT; | ||
| 355 | data += l - bad; | ||
| 356 | left -= l - bad; | ||
| 357 | po += l - bad; | ||
| 358 | if (po == PAGE_CACHE_SIZE) { | ||
| 359 | po = 0; | ||
| 360 | i++; | ||
| 361 | } | ||
| 362 | } | ||
| 363 | return len; | ||
| 364 | } | ||
| 365 | |||
| 366 | /* | ||
| 367 | * copy user data from a page vector into a user pointer | ||
| 368 | */ | ||
| 369 | static int copy_page_vector_to_user(struct page **pages, char __user *data, | ||
| 370 | loff_t off, size_t len) | ||
| 371 | { | ||
| 372 | int i = 0; | ||
| 373 | int po = off & ~PAGE_CACHE_MASK; | ||
| 374 | int left = len; | ||
| 375 | int l, bad; | ||
| 376 | |||
| 377 | while (left > 0) { | ||
| 378 | l = min_t(int, left, PAGE_CACHE_SIZE-po); | ||
| 379 | bad = copy_to_user(data, page_address(pages[i]) + po, l); | ||
| 380 | if (bad == l) | ||
| 381 | return -EFAULT; | ||
| 382 | data += l - bad; | ||
| 383 | left -= l - bad; | ||
| 384 | if (po) { | ||
| 385 | po += l - bad; | ||
| 386 | if (po == PAGE_CACHE_SIZE) | ||
| 387 | po = 0; | ||
| 388 | } | ||
| 389 | i++; | ||
| 390 | } | ||
| 391 | return len; | ||
| 392 | } | ||
| 393 | |||
| 394 | /* | ||
| 395 | * Zero an extent within a page vector. Offset is relative to the | ||
| 396 | * start of the first page. | ||
| 397 | */ | ||
| 398 | static void zero_page_vector_range(int off, int len, struct page **pages) | ||
| 399 | { | ||
| 400 | int i = off >> PAGE_CACHE_SHIFT; | ||
| 401 | |||
| 402 | off &= ~PAGE_CACHE_MASK; | ||
| 403 | |||
| 404 | dout("zero_page_vector_page %u~%u\n", off, len); | ||
| 405 | |||
| 406 | /* leading partial page? */ | ||
| 407 | if (off) { | ||
| 408 | int end = min((int)PAGE_CACHE_SIZE, off + len); | ||
| 409 | dout("zeroing %d %p head from %d\n", i, pages[i], | ||
| 410 | (int)off); | ||
| 411 | zero_user_segment(pages[i], off, end); | ||
| 412 | len -= (end - off); | ||
| 413 | i++; | ||
| 414 | } | ||
| 415 | while (len >= PAGE_CACHE_SIZE) { | ||
| 416 | dout("zeroing %d %p len=%d\n", i, pages[i], len); | ||
| 417 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||
| 418 | len -= PAGE_CACHE_SIZE; | ||
| 419 | i++; | ||
| 420 | } | ||
| 421 | /* trailing partial page? */ | ||
| 422 | if (len) { | ||
| 423 | dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); | ||
| 424 | zero_user_segment(pages[i], 0, len); | ||
| 425 | } | ||
| 426 | } | ||
| 427 | |||
| 428 | |||
| 429 | /* | ||
| 430 | * Read a range of bytes striped over one or more objects. Iterate over | 274 | * Read a range of bytes striped over one or more objects. Iterate over |
| 431 | * objects we stripe over. (That's not atomic, but good enough for now.) | 275 | * objects we stripe over. (That's not atomic, but good enough for now.) |
| 432 | * | 276 | * |
| @@ -438,7 +282,7 @@ static int striped_read(struct inode *inode, | |||
| 438 | struct page **pages, int num_pages, | 282 | struct page **pages, int num_pages, |
| 439 | int *checkeof) | 283 | int *checkeof) |
| 440 | { | 284 | { |
| 441 | struct ceph_client *client = ceph_inode_to_client(inode); | 285 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 442 | struct ceph_inode_info *ci = ceph_inode(inode); | 286 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 443 | u64 pos, this_len; | 287 | u64 pos, this_len; |
| 444 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 288 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ |
| @@ -459,7 +303,7 @@ static int striped_read(struct inode *inode, | |||
| 459 | 303 | ||
| 460 | more: | 304 | more: |
| 461 | this_len = left; | 305 | this_len = left; |
| 462 | ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode), | 306 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
| 463 | &ci->i_layout, pos, &this_len, | 307 | &ci->i_layout, pos, &this_len, |
| 464 | ci->i_truncate_seq, | 308 | ci->i_truncate_seq, |
| 465 | ci->i_truncate_size, | 309 | ci->i_truncate_size, |
| @@ -477,8 +321,8 @@ more: | |||
| 477 | 321 | ||
| 478 | if (read < pos - off) { | 322 | if (read < pos - off) { |
| 479 | dout(" zero gap %llu to %llu\n", off + read, pos); | 323 | dout(" zero gap %llu to %llu\n", off + read, pos); |
| 480 | zero_page_vector_range(page_off + read, | 324 | ceph_zero_page_vector_range(page_off + read, |
| 481 | pos - off - read, pages); | 325 | pos - off - read, pages); |
| 482 | } | 326 | } |
| 483 | pos += ret; | 327 | pos += ret; |
| 484 | read = pos - off; | 328 | read = pos - off; |
| @@ -495,8 +339,8 @@ more: | |||
| 495 | /* was original extent fully inside i_size? */ | 339 | /* was original extent fully inside i_size? */ |
| 496 | if (pos + left <= inode->i_size) { | 340 | if (pos + left <= inode->i_size) { |
| 497 | dout("zero tail\n"); | 341 | dout("zero tail\n"); |
| 498 | zero_page_vector_range(page_off + read, len - read, | 342 | ceph_zero_page_vector_range(page_off + read, len - read, |
| 499 | pages); | 343 | pages); |
| 500 | read = len; | 344 | read = len; |
| 501 | goto out; | 345 | goto out; |
| 502 | } | 346 | } |
| @@ -531,7 +375,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
| 531 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 375 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
| 532 | 376 | ||
| 533 | if (file->f_flags & O_DIRECT) { | 377 | if (file->f_flags & O_DIRECT) { |
| 534 | pages = get_direct_page_vector(data, num_pages, off, len); | 378 | pages = ceph_get_direct_page_vector(data, num_pages, off, len); |
| 535 | 379 | ||
| 536 | /* | 380 | /* |
| 537 | * flush any page cache pages in this range. this | 381 | * flush any page cache pages in this range. this |
| @@ -552,13 +396,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
| 552 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 396 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); |
| 553 | 397 | ||
| 554 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 398 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
| 555 | ret = copy_page_vector_to_user(pages, data, off, ret); | 399 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
| 556 | if (ret >= 0) | 400 | if (ret >= 0) |
| 557 | *poff = off + ret; | 401 | *poff = off + ret; |
| 558 | 402 | ||
| 559 | done: | 403 | done: |
| 560 | if (file->f_flags & O_DIRECT) | 404 | if (file->f_flags & O_DIRECT) |
| 561 | put_page_vector(pages, num_pages); | 405 | ceph_put_page_vector(pages, num_pages); |
| 562 | else | 406 | else |
| 563 | ceph_release_page_vector(pages, num_pages); | 407 | ceph_release_page_vector(pages, num_pages); |
| 564 | dout("sync_read result %d\n", ret); | 408 | dout("sync_read result %d\n", ret); |
| @@ -594,7 +438,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
| 594 | { | 438 | { |
| 595 | struct inode *inode = file->f_dentry->d_inode; | 439 | struct inode *inode = file->f_dentry->d_inode; |
| 596 | struct ceph_inode_info *ci = ceph_inode(inode); | 440 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 597 | struct ceph_client *client = ceph_inode_to_client(inode); | 441 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 598 | struct ceph_osd_request *req; | 442 | struct ceph_osd_request *req; |
| 599 | struct page **pages; | 443 | struct page **pages; |
| 600 | int num_pages; | 444 | int num_pages; |
| @@ -642,7 +486,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
| 642 | */ | 486 | */ |
| 643 | more: | 487 | more: |
| 644 | len = left; | 488 | len = left; |
| 645 | req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, | 489 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
| 646 | ceph_vino(inode), pos, &len, | 490 | ceph_vino(inode), pos, &len, |
| 647 | CEPH_OSD_OP_WRITE, flags, | 491 | CEPH_OSD_OP_WRITE, flags, |
| 648 | ci->i_snap_realm->cached_context, | 492 | ci->i_snap_realm->cached_context, |
| @@ -655,7 +499,7 @@ more: | |||
| 655 | num_pages = calc_pages_for(pos, len); | 499 | num_pages = calc_pages_for(pos, len); |
| 656 | 500 | ||
| 657 | if (file->f_flags & O_DIRECT) { | 501 | if (file->f_flags & O_DIRECT) { |
| 658 | pages = get_direct_page_vector(data, num_pages, pos, len); | 502 | pages = ceph_get_direct_page_vector(data, num_pages, pos, len); |
| 659 | if (IS_ERR(pages)) { | 503 | if (IS_ERR(pages)) { |
| 660 | ret = PTR_ERR(pages); | 504 | ret = PTR_ERR(pages); |
| 661 | goto out; | 505 | goto out; |
| @@ -673,7 +517,7 @@ more: | |||
| 673 | ret = PTR_ERR(pages); | 517 | ret = PTR_ERR(pages); |
| 674 | goto out; | 518 | goto out; |
| 675 | } | 519 | } |
| 676 | ret = copy_user_to_page_vector(pages, data, pos, len); | 520 | ret = ceph_copy_user_to_page_vector(pages, data, pos, len); |
| 677 | if (ret < 0) { | 521 | if (ret < 0) { |
| 678 | ceph_release_page_vector(pages, num_pages); | 522 | ceph_release_page_vector(pages, num_pages); |
| 679 | goto out; | 523 | goto out; |
| @@ -689,7 +533,7 @@ more: | |||
| 689 | req->r_num_pages = num_pages; | 533 | req->r_num_pages = num_pages; |
| 690 | req->r_inode = inode; | 534 | req->r_inode = inode; |
| 691 | 535 | ||
| 692 | ret = ceph_osdc_start_request(&client->osdc, req, false); | 536 | ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); |
| 693 | if (!ret) { | 537 | if (!ret) { |
| 694 | if (req->r_safe_callback) { | 538 | if (req->r_safe_callback) { |
| 695 | /* | 539 | /* |
| @@ -701,11 +545,11 @@ more: | |||
| 701 | spin_unlock(&ci->i_unsafe_lock); | 545 | spin_unlock(&ci->i_unsafe_lock); |
| 702 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); | 546 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); |
| 703 | } | 547 | } |
| 704 | ret = ceph_osdc_wait_request(&client->osdc, req); | 548 | ret = ceph_osdc_wait_request(&fsc->client->osdc, req); |
| 705 | } | 549 | } |
| 706 | 550 | ||
| 707 | if (file->f_flags & O_DIRECT) | 551 | if (file->f_flags & O_DIRECT) |
| 708 | put_page_vector(pages, num_pages); | 552 | ceph_put_page_vector(pages, num_pages); |
| 709 | else if (file->f_flags & O_SYNC) | 553 | else if (file->f_flags & O_SYNC) |
| 710 | ceph_release_page_vector(pages, num_pages); | 554 | ceph_release_page_vector(pages, num_pages); |
| 711 | 555 | ||
| @@ -814,7 +658,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 814 | struct ceph_file_info *fi = file->private_data; | 658 | struct ceph_file_info *fi = file->private_data; |
| 815 | struct inode *inode = file->f_dentry->d_inode; | 659 | struct inode *inode = file->f_dentry->d_inode; |
| 816 | struct ceph_inode_info *ci = ceph_inode(inode); | 660 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 817 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 661 | struct ceph_osd_client *osdc = |
| 662 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | ||
| 818 | loff_t endoff = pos + iov->iov_len; | 663 | loff_t endoff = pos + iov->iov_len; |
| 819 | int want, got = 0; | 664 | int want, got = 0; |
| 820 | int ret, err; | 665 | int ret, err; |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 62377ec37edf..1d6a45b5a04c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
| 4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
| @@ -13,7 +13,8 @@ | |||
| 13 | #include <linux/pagevec.h> | 13 | #include <linux/pagevec.h> |
| 14 | 14 | ||
| 15 | #include "super.h" | 15 | #include "super.h" |
| 16 | #include "decode.h" | 16 | #include "mds_client.h" |
| 17 | #include <linux/ceph/decode.h> | ||
| 17 | 18 | ||
| 18 | /* | 19 | /* |
| 19 | * Ceph inode operations | 20 | * Ceph inode operations |
| @@ -384,7 +385,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
| 384 | */ | 385 | */ |
| 385 | if (ci->i_snap_realm) { | 386 | if (ci->i_snap_realm) { |
| 386 | struct ceph_mds_client *mdsc = | 387 | struct ceph_mds_client *mdsc = |
| 387 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 388 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
| 388 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 389 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
| 389 | 390 | ||
| 390 | dout(" dropping residual ref to snap realm %p\n", realm); | 391 | dout(" dropping residual ref to snap realm %p\n", realm); |
| @@ -685,7 +686,7 @@ static int fill_inode(struct inode *inode, | |||
| 685 | } | 686 | } |
| 686 | 687 | ||
| 687 | /* it may be better to set st_size in getattr instead? */ | 688 | /* it may be better to set st_size in getattr instead? */ |
| 688 | if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) | 689 | if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) |
| 689 | inode->i_size = ci->i_rbytes; | 690 | inode->i_size = ci->i_rbytes; |
| 690 | break; | 691 | break; |
| 691 | default: | 692 | default: |
| @@ -901,7 +902,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
| 901 | struct inode *in = NULL; | 902 | struct inode *in = NULL; |
| 902 | struct ceph_mds_reply_inode *ininfo; | 903 | struct ceph_mds_reply_inode *ininfo; |
| 903 | struct ceph_vino vino; | 904 | struct ceph_vino vino; |
| 904 | struct ceph_client *client = ceph_sb_to_client(sb); | 905 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
| 905 | int i = 0; | 906 | int i = 0; |
| 906 | int err = 0; | 907 | int err = 0; |
| 907 | 908 | ||
| @@ -965,7 +966,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
| 965 | */ | 966 | */ |
| 966 | if (rinfo->head->is_dentry && !req->r_aborted && | 967 | if (rinfo->head->is_dentry && !req->r_aborted && |
| 967 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, | 968 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, |
| 968 | client->mount_args->snapdir_name, | 969 | fsc->mount_options->snapdir_name, |
| 969 | req->r_dentry->d_name.len))) { | 970 | req->r_dentry->d_name.len))) { |
| 970 | /* | 971 | /* |
| 971 | * lookup link rename : null -> possibly existing inode | 972 | * lookup link rename : null -> possibly existing inode |
| @@ -1533,7 +1534,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1533 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1534 | struct inode *parent_inode = dentry->d_parent->d_inode; |
| 1534 | const unsigned int ia_valid = attr->ia_valid; | 1535 | const unsigned int ia_valid = attr->ia_valid; |
| 1535 | struct ceph_mds_request *req; | 1536 | struct ceph_mds_request *req; |
| 1536 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; | 1537 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; |
| 1537 | int issued; | 1538 | int issued; |
| 1538 | int release = 0, dirtied = 0; | 1539 | int release = 0, dirtied = 0; |
| 1539 | int mask = 0; | 1540 | int mask = 0; |
| @@ -1728,8 +1729,8 @@ out: | |||
| 1728 | */ | 1729 | */ |
| 1729 | int ceph_do_getattr(struct inode *inode, int mask) | 1730 | int ceph_do_getattr(struct inode *inode, int mask) |
| 1730 | { | 1731 | { |
| 1731 | struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 1732 | struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); |
| 1732 | struct ceph_mds_client *mdsc = &client->mdsc; | 1733 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 1733 | struct ceph_mds_request *req; | 1734 | struct ceph_mds_request *req; |
| 1734 | int err; | 1735 | int err; |
| 1735 | 1736 | ||
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 76e307d2aba1..8888c9ba68db 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
| @@ -1,8 +1,10 @@ | |||
| 1 | #include <linux/in.h> | 1 | #include <linux/in.h> |
| 2 | 2 | ||
| 3 | #include "ioctl.h" | ||
| 4 | #include "super.h" | 3 | #include "super.h" |
| 5 | #include "ceph_debug.h" | 4 | #include "mds_client.h" |
| 5 | #include <linux/ceph/ceph_debug.h> | ||
| 6 | |||
| 7 | #include "ioctl.h" | ||
| 6 | 8 | ||
| 7 | 9 | ||
| 8 | /* | 10 | /* |
| @@ -37,7 +39,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
| 37 | { | 39 | { |
| 38 | struct inode *inode = file->f_dentry->d_inode; | 40 | struct inode *inode = file->f_dentry->d_inode; |
| 39 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 41 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; |
| 40 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 42 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
| 41 | struct ceph_mds_request *req; | 43 | struct ceph_mds_request *req; |
| 42 | struct ceph_ioctl_layout l; | 44 | struct ceph_ioctl_layout l; |
| 43 | int err, i; | 45 | int err, i; |
| @@ -90,6 +92,68 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
| 90 | } | 92 | } |
| 91 | 93 | ||
| 92 | /* | 94 | /* |
| 95 | * Set a layout policy on a directory inode. All items in the tree | ||
| 96 | * rooted at this inode will inherit this layout on creation, | ||
| 97 | * (It doesn't apply retroactively ) | ||
| 98 | * unless a subdirectory has its own layout policy. | ||
| 99 | */ | ||
| 100 | static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) | ||
| 101 | { | ||
| 102 | struct inode *inode = file->f_dentry->d_inode; | ||
| 103 | struct ceph_mds_request *req; | ||
| 104 | struct ceph_ioctl_layout l; | ||
| 105 | int err, i; | ||
| 106 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | ||
| 107 | |||
| 108 | /* copy and validate */ | ||
| 109 | if (copy_from_user(&l, arg, sizeof(l))) | ||
| 110 | return -EFAULT; | ||
| 111 | |||
| 112 | if ((l.object_size & ~PAGE_MASK) || | ||
| 113 | (l.stripe_unit & ~PAGE_MASK) || | ||
| 114 | !l.stripe_unit || | ||
| 115 | (l.object_size && | ||
| 116 | (unsigned)l.object_size % (unsigned)l.stripe_unit)) | ||
| 117 | return -EINVAL; | ||
| 118 | |||
| 119 | /* make sure it's a valid data pool */ | ||
| 120 | if (l.data_pool > 0) { | ||
| 121 | mutex_lock(&mdsc->mutex); | ||
| 122 | err = -EINVAL; | ||
| 123 | for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++) | ||
| 124 | if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) { | ||
| 125 | err = 0; | ||
| 126 | break; | ||
| 127 | } | ||
| 128 | mutex_unlock(&mdsc->mutex); | ||
| 129 | if (err) | ||
| 130 | return err; | ||
| 131 | } | ||
| 132 | |||
| 133 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT, | ||
| 134 | USE_AUTH_MDS); | ||
| 135 | |||
| 136 | if (IS_ERR(req)) | ||
| 137 | return PTR_ERR(req); | ||
| 138 | req->r_inode = igrab(inode); | ||
| 139 | |||
| 140 | req->r_args.setlayout.layout.fl_stripe_unit = | ||
| 141 | cpu_to_le32(l.stripe_unit); | ||
| 142 | req->r_args.setlayout.layout.fl_stripe_count = | ||
| 143 | cpu_to_le32(l.stripe_count); | ||
| 144 | req->r_args.setlayout.layout.fl_object_size = | ||
| 145 | cpu_to_le32(l.object_size); | ||
| 146 | req->r_args.setlayout.layout.fl_pg_pool = | ||
| 147 | cpu_to_le32(l.data_pool); | ||
| 148 | req->r_args.setlayout.layout.fl_pg_preferred = | ||
| 149 | cpu_to_le32(l.preferred_osd); | ||
| 150 | |||
| 151 | err = ceph_mdsc_do_request(mdsc, inode, req); | ||
| 152 | ceph_mdsc_put_request(req); | ||
| 153 | return err; | ||
| 154 | } | ||
| 155 | |||
| 156 | /* | ||
| 93 | * Return object name, size/offset information, and location (OSD | 157 | * Return object name, size/offset information, and location (OSD |
| 94 | * number, network address) for a given file offset. | 158 | * number, network address) for a given file offset. |
| 95 | */ | 159 | */ |
| @@ -98,7 +162,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
| 98 | struct ceph_ioctl_dataloc dl; | 162 | struct ceph_ioctl_dataloc dl; |
| 99 | struct inode *inode = file->f_dentry->d_inode; | 163 | struct inode *inode = file->f_dentry->d_inode; |
| 100 | struct ceph_inode_info *ci = ceph_inode(inode); | 164 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 101 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 165 | struct ceph_osd_client *osdc = |
| 166 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | ||
| 102 | u64 len = 1, olen; | 167 | u64 len = 1, olen; |
| 103 | u64 tmp; | 168 | u64 tmp; |
| 104 | struct ceph_object_layout ol; | 169 | struct ceph_object_layout ol; |
| @@ -174,11 +239,15 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 174 | case CEPH_IOC_SET_LAYOUT: | 239 | case CEPH_IOC_SET_LAYOUT: |
| 175 | return ceph_ioctl_set_layout(file, (void __user *)arg); | 240 | return ceph_ioctl_set_layout(file, (void __user *)arg); |
| 176 | 241 | ||
| 242 | case CEPH_IOC_SET_LAYOUT_POLICY: | ||
| 243 | return ceph_ioctl_set_layout_policy(file, (void __user *)arg); | ||
| 244 | |||
| 177 | case CEPH_IOC_GET_DATALOC: | 245 | case CEPH_IOC_GET_DATALOC: |
| 178 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); | 246 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); |
| 179 | 247 | ||
| 180 | case CEPH_IOC_LAZYIO: | 248 | case CEPH_IOC_LAZYIO: |
| 181 | return ceph_ioctl_lazyio(file); | 249 | return ceph_ioctl_lazyio(file); |
| 182 | } | 250 | } |
| 251 | |||
| 183 | return -ENOTTY; | 252 | return -ENOTTY; |
| 184 | } | 253 | } |
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index 88451a3b6857..a6ce54e94eb5 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | #include <linux/ioctl.h> | 4 | #include <linux/ioctl.h> |
| 5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
| 6 | 6 | ||
| 7 | #define CEPH_IOCTL_MAGIC 0x97 | 7 | #define CEPH_IOCTL_MAGIC 0x98 |
| 8 | 8 | ||
| 9 | /* just use u64 to align sanely on all archs */ | 9 | /* just use u64 to align sanely on all archs */ |
| 10 | struct ceph_ioctl_layout { | 10 | struct ceph_ioctl_layout { |
| @@ -17,6 +17,8 @@ struct ceph_ioctl_layout { | |||
| 17 | struct ceph_ioctl_layout) | 17 | struct ceph_ioctl_layout) |
| 18 | #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2, \ | 18 | #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2, \ |
| 19 | struct ceph_ioctl_layout) | 19 | struct ceph_ioctl_layout) |
| 20 | #define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5, \ | ||
| 21 | struct ceph_ioctl_layout) | ||
| 20 | 22 | ||
| 21 | /* | 23 | /* |
| 22 | * Extract identity, address of the OSD and object storing a given | 24 | * Extract identity, address of the OSD and object storing a given |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ff4e753aae92..40abde93c345 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
| @@ -1,11 +1,11 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/file.h> | 3 | #include <linux/file.h> |
| 4 | #include <linux/namei.h> | 4 | #include <linux/namei.h> |
| 5 | 5 | ||
| 6 | #include "super.h" | 6 | #include "super.h" |
| 7 | #include "mds_client.h" | 7 | #include "mds_client.h" |
| 8 | #include "pagelist.h" | 8 | #include <linux/ceph/pagelist.h> |
| 9 | 9 | ||
| 10 | /** | 10 | /** |
| 11 | * Implement fcntl and flock locking functions. | 11 | * Implement fcntl and flock locking functions. |
| @@ -16,7 +16,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
| 16 | { | 16 | { |
| 17 | struct inode *inode = file->f_dentry->d_inode; | 17 | struct inode *inode = file->f_dentry->d_inode; |
| 18 | struct ceph_mds_client *mdsc = | 18 | struct ceph_mds_client *mdsc = |
| 19 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 19 | ceph_sb_to_client(inode->i_sb)->mdsc; |
| 20 | struct ceph_mds_request *req; | 20 | struct ceph_mds_request *req; |
| 21 | int err; | 21 | int err; |
| 22 | 22 | ||
| @@ -181,8 +181,9 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | |||
| 181 | * Encode the flock and fcntl locks for the given inode into the pagelist. | 181 | * Encode the flock and fcntl locks for the given inode into the pagelist. |
| 182 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, | 182 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, |
| 183 | * sequential flock locks. | 183 | * sequential flock locks. |
| 184 | * Must be called with BLK already held, and the lock numbers should have | 184 | * Must be called with lock_flocks() already held. |
| 185 | * been gathered under the same lock holding window. | 185 | * If we encounter more of a specific lock type than expected, |
| 186 | * we return the value 1. | ||
| 186 | */ | 187 | */ |
| 187 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | 188 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, |
| 188 | int num_fcntl_locks, int num_flock_locks) | 189 | int num_fcntl_locks, int num_flock_locks) |
| @@ -190,6 +191,8 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
| 190 | struct file_lock *lock; | 191 | struct file_lock *lock; |
| 191 | struct ceph_filelock cephlock; | 192 | struct ceph_filelock cephlock; |
| 192 | int err = 0; | 193 | int err = 0; |
| 194 | int seen_fcntl = 0; | ||
| 195 | int seen_flock = 0; | ||
| 193 | 196 | ||
| 194 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, | 197 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, |
| 195 | num_fcntl_locks); | 198 | num_fcntl_locks); |
| @@ -198,6 +201,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
| 198 | goto fail; | 201 | goto fail; |
| 199 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 202 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
| 200 | if (lock->fl_flags & FL_POSIX) { | 203 | if (lock->fl_flags & FL_POSIX) { |
| 204 | ++seen_fcntl; | ||
| 205 | if (seen_fcntl > num_fcntl_locks) { | ||
| 206 | err = -ENOSPC; | ||
| 207 | goto fail; | ||
| 208 | } | ||
| 201 | err = lock_to_ceph_filelock(lock, &cephlock); | 209 | err = lock_to_ceph_filelock(lock, &cephlock); |
| 202 | if (err) | 210 | if (err) |
| 203 | goto fail; | 211 | goto fail; |
| @@ -213,6 +221,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
| 213 | goto fail; | 221 | goto fail; |
| 214 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 222 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
| 215 | if (lock->fl_flags & FL_FLOCK) { | 223 | if (lock->fl_flags & FL_FLOCK) { |
| 224 | ++seen_flock; | ||
| 225 | if (seen_flock > num_flock_locks) { | ||
| 226 | err = -ENOSPC; | ||
| 227 | goto fail; | ||
| 228 | } | ||
| 216 | err = lock_to_ceph_filelock(lock, &cephlock); | 229 | err = lock_to_ceph_filelock(lock, &cephlock); |
| 217 | if (err) | 230 | if (err) |
| 218 | goto fail; | 231 | goto fail; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index fad95f8f2608..3142b15940c2 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -1,17 +1,21 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/fs.h> | ||
| 3 | #include <linux/wait.h> | 4 | #include <linux/wait.h> |
| 4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 5 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
| 7 | #include <linux/debugfs.h> | ||
| 8 | #include <linux/seq_file.h> | ||
| 6 | #include <linux/smp_lock.h> | 9 | #include <linux/smp_lock.h> |
| 7 | 10 | ||
| 8 | #include "mds_client.h" | ||
| 9 | #include "mon_client.h" | ||
| 10 | #include "super.h" | 11 | #include "super.h" |
| 11 | #include "messenger.h" | 12 | #include "mds_client.h" |
| 12 | #include "decode.h" | 13 | |
| 13 | #include "auth.h" | 14 | #include <linux/ceph/messenger.h> |
| 14 | #include "pagelist.h" | 15 | #include <linux/ceph/decode.h> |
| 16 | #include <linux/ceph/pagelist.h> | ||
| 17 | #include <linux/ceph/auth.h> | ||
| 18 | #include <linux/ceph/debugfs.h> | ||
| 15 | 19 | ||
| 16 | /* | 20 | /* |
| 17 | * A cluster of MDS (metadata server) daemons is responsible for | 21 | * A cluster of MDS (metadata server) daemons is responsible for |
| @@ -286,8 +290,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s) | |||
| 286 | atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); | 290 | atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); |
| 287 | if (atomic_dec_and_test(&s->s_ref)) { | 291 | if (atomic_dec_and_test(&s->s_ref)) { |
| 288 | if (s->s_authorizer) | 292 | if (s->s_authorizer) |
| 289 | s->s_mdsc->client->monc.auth->ops->destroy_authorizer( | 293 | s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer( |
| 290 | s->s_mdsc->client->monc.auth, s->s_authorizer); | 294 | s->s_mdsc->fsc->client->monc.auth, |
| 295 | s->s_authorizer); | ||
| 291 | kfree(s); | 296 | kfree(s); |
| 292 | } | 297 | } |
| 293 | } | 298 | } |
| @@ -344,7 +349,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
| 344 | s->s_seq = 0; | 349 | s->s_seq = 0; |
| 345 | mutex_init(&s->s_mutex); | 350 | mutex_init(&s->s_mutex); |
| 346 | 351 | ||
| 347 | ceph_con_init(mdsc->client->msgr, &s->s_con); | 352 | ceph_con_init(mdsc->fsc->client->msgr, &s->s_con); |
| 348 | s->s_con.private = s; | 353 | s->s_con.private = s; |
| 349 | s->s_con.ops = &mds_con_ops; | 354 | s->s_con.ops = &mds_con_ops; |
| 350 | s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; | 355 | s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; |
| @@ -599,7 +604,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
| 599 | } else if (req->r_dentry) { | 604 | } else if (req->r_dentry) { |
| 600 | struct inode *dir = req->r_dentry->d_parent->d_inode; | 605 | struct inode *dir = req->r_dentry->d_parent->d_inode; |
| 601 | 606 | ||
| 602 | if (dir->i_sb != mdsc->client->sb) { | 607 | if (dir->i_sb != mdsc->fsc->sb) { |
| 603 | /* not this fs! */ | 608 | /* not this fs! */ |
| 604 | inode = req->r_dentry->d_inode; | 609 | inode = req->r_dentry->d_inode; |
| 605 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { | 610 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { |
| @@ -884,7 +889,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 884 | __ceph_remove_cap(cap); | 889 | __ceph_remove_cap(cap); |
| 885 | if (!__ceph_is_any_real_caps(ci)) { | 890 | if (!__ceph_is_any_real_caps(ci)) { |
| 886 | struct ceph_mds_client *mdsc = | 891 | struct ceph_mds_client *mdsc = |
| 887 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 892 | ceph_sb_to_client(inode->i_sb)->mdsc; |
| 888 | 893 | ||
| 889 | spin_lock(&mdsc->cap_dirty_lock); | 894 | spin_lock(&mdsc->cap_dirty_lock); |
| 890 | if (!list_empty(&ci->i_dirty_item)) { | 895 | if (!list_empty(&ci->i_dirty_item)) { |
| @@ -1146,7 +1151,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1146 | struct ceph_msg *msg, *partial = NULL; | 1151 | struct ceph_msg *msg, *partial = NULL; |
| 1147 | struct ceph_mds_cap_release *head; | 1152 | struct ceph_mds_cap_release *head; |
| 1148 | int err = -ENOMEM; | 1153 | int err = -ENOMEM; |
| 1149 | int extra = mdsc->client->mount_args->cap_release_safety; | 1154 | int extra = mdsc->fsc->mount_options->cap_release_safety; |
| 1150 | int num; | 1155 | int num; |
| 1151 | 1156 | ||
| 1152 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, | 1157 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, |
| @@ -2085,7 +2090,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 2085 | 2090 | ||
| 2086 | /* insert trace into our cache */ | 2091 | /* insert trace into our cache */ |
| 2087 | mutex_lock(&req->r_fill_mutex); | 2092 | mutex_lock(&req->r_fill_mutex); |
| 2088 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); | 2093 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); |
| 2089 | if (err == 0) { | 2094 | if (err == 0) { |
| 2090 | if (result == 0 && rinfo->dir_nr) | 2095 | if (result == 0 && rinfo->dir_nr) |
| 2091 | ceph_readdir_prepopulate(req, req->r_session); | 2096 | ceph_readdir_prepopulate(req, req->r_session); |
| @@ -2361,19 +2366,35 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2361 | 2366 | ||
| 2362 | if (recon_state->flock) { | 2367 | if (recon_state->flock) { |
| 2363 | int num_fcntl_locks, num_flock_locks; | 2368 | int num_fcntl_locks, num_flock_locks; |
| 2364 | 2369 | struct ceph_pagelist_cursor trunc_point; | |
| 2365 | lock_kernel(); | 2370 | |
| 2366 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | 2371 | ceph_pagelist_set_cursor(pagelist, &trunc_point); |
| 2367 | rec.v2.flock_len = (2*sizeof(u32) + | 2372 | do { |
| 2368 | (num_fcntl_locks+num_flock_locks) * | 2373 | lock_flocks(); |
| 2369 | sizeof(struct ceph_filelock)); | 2374 | ceph_count_locks(inode, &num_fcntl_locks, |
| 2370 | 2375 | &num_flock_locks); | |
| 2371 | err = ceph_pagelist_append(pagelist, &rec, reclen); | 2376 | rec.v2.flock_len = (2*sizeof(u32) + |
| 2372 | if (!err) | 2377 | (num_fcntl_locks+num_flock_locks) * |
| 2373 | err = ceph_encode_locks(inode, pagelist, | 2378 | sizeof(struct ceph_filelock)); |
| 2374 | num_fcntl_locks, | 2379 | unlock_flocks(); |
| 2375 | num_flock_locks); | 2380 | |
| 2376 | unlock_kernel(); | 2381 | /* pre-alloc pagelist */ |
| 2382 | ceph_pagelist_truncate(pagelist, &trunc_point); | ||
| 2383 | err = ceph_pagelist_append(pagelist, &rec, reclen); | ||
| 2384 | if (!err) | ||
| 2385 | err = ceph_pagelist_reserve(pagelist, | ||
| 2386 | rec.v2.flock_len); | ||
| 2387 | |||
| 2388 | /* encode locks */ | ||
| 2389 | if (!err) { | ||
| 2390 | lock_flocks(); | ||
| 2391 | err = ceph_encode_locks(inode, | ||
| 2392 | pagelist, | ||
| 2393 | num_fcntl_locks, | ||
| 2394 | num_flock_locks); | ||
| 2395 | unlock_flocks(); | ||
| 2396 | } | ||
| 2397 | } while (err == -ENOSPC); | ||
| 2377 | } else { | 2398 | } else { |
| 2378 | err = ceph_pagelist_append(pagelist, &rec, reclen); | 2399 | err = ceph_pagelist_append(pagelist, &rec, reclen); |
| 2379 | } | 2400 | } |
| @@ -2613,7 +2634,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
| 2613 | struct ceph_mds_session *session, | 2634 | struct ceph_mds_session *session, |
| 2614 | struct ceph_msg *msg) | 2635 | struct ceph_msg *msg) |
| 2615 | { | 2636 | { |
| 2616 | struct super_block *sb = mdsc->client->sb; | 2637 | struct super_block *sb = mdsc->fsc->sb; |
| 2617 | struct inode *inode; | 2638 | struct inode *inode; |
| 2618 | struct ceph_inode_info *ci; | 2639 | struct ceph_inode_info *ci; |
| 2619 | struct dentry *parent, *dentry; | 2640 | struct dentry *parent, *dentry; |
| @@ -2891,10 +2912,16 @@ static void delayed_work(struct work_struct *work) | |||
| 2891 | schedule_delayed(mdsc); | 2912 | schedule_delayed(mdsc); |
| 2892 | } | 2913 | } |
| 2893 | 2914 | ||
| 2915 | int ceph_mdsc_init(struct ceph_fs_client *fsc) | ||
| 2894 | 2916 | ||
| 2895 | int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | ||
| 2896 | { | 2917 | { |
| 2897 | mdsc->client = client; | 2918 | struct ceph_mds_client *mdsc; |
| 2919 | |||
| 2920 | mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS); | ||
| 2921 | if (!mdsc) | ||
| 2922 | return -ENOMEM; | ||
| 2923 | mdsc->fsc = fsc; | ||
| 2924 | fsc->mdsc = mdsc; | ||
| 2898 | mutex_init(&mdsc->mutex); | 2925 | mutex_init(&mdsc->mutex); |
| 2899 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | 2926 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); |
| 2900 | if (mdsc->mdsmap == NULL) | 2927 | if (mdsc->mdsmap == NULL) |
| @@ -2927,7 +2954,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
| 2927 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2954 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
| 2928 | 2955 | ||
| 2929 | ceph_caps_init(mdsc); | 2956 | ceph_caps_init(mdsc); |
| 2930 | ceph_adjust_min_caps(mdsc, client->min_caps); | 2957 | ceph_adjust_min_caps(mdsc, fsc->min_caps); |
| 2931 | 2958 | ||
| 2932 | return 0; | 2959 | return 0; |
| 2933 | } | 2960 | } |
| @@ -2939,7 +2966,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
| 2939 | static void wait_requests(struct ceph_mds_client *mdsc) | 2966 | static void wait_requests(struct ceph_mds_client *mdsc) |
| 2940 | { | 2967 | { |
| 2941 | struct ceph_mds_request *req; | 2968 | struct ceph_mds_request *req; |
| 2942 | struct ceph_client *client = mdsc->client; | 2969 | struct ceph_fs_client *fsc = mdsc->fsc; |
| 2943 | 2970 | ||
| 2944 | mutex_lock(&mdsc->mutex); | 2971 | mutex_lock(&mdsc->mutex); |
| 2945 | if (__get_oldest_req(mdsc)) { | 2972 | if (__get_oldest_req(mdsc)) { |
| @@ -2947,7 +2974,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) | |||
| 2947 | 2974 | ||
| 2948 | dout("wait_requests waiting for requests\n"); | 2975 | dout("wait_requests waiting for requests\n"); |
| 2949 | wait_for_completion_timeout(&mdsc->safe_umount_waiters, | 2976 | wait_for_completion_timeout(&mdsc->safe_umount_waiters, |
| 2950 | client->mount_args->mount_timeout * HZ); | 2977 | fsc->client->options->mount_timeout * HZ); |
| 2951 | 2978 | ||
| 2952 | /* tear down remaining requests */ | 2979 | /* tear down remaining requests */ |
| 2953 | mutex_lock(&mdsc->mutex); | 2980 | mutex_lock(&mdsc->mutex); |
| @@ -3030,7 +3057,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
| 3030 | { | 3057 | { |
| 3031 | u64 want_tid, want_flush; | 3058 | u64 want_tid, want_flush; |
| 3032 | 3059 | ||
| 3033 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | 3060 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) |
| 3034 | return; | 3061 | return; |
| 3035 | 3062 | ||
| 3036 | dout("sync\n"); | 3063 | dout("sync\n"); |
| @@ -3053,7 +3080,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc) | |||
| 3053 | { | 3080 | { |
| 3054 | int i, n = 0; | 3081 | int i, n = 0; |
| 3055 | 3082 | ||
| 3056 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | 3083 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) |
| 3057 | return true; | 3084 | return true; |
| 3058 | 3085 | ||
| 3059 | mutex_lock(&mdsc->mutex); | 3086 | mutex_lock(&mdsc->mutex); |
| @@ -3071,8 +3098,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
| 3071 | { | 3098 | { |
| 3072 | struct ceph_mds_session *session; | 3099 | struct ceph_mds_session *session; |
| 3073 | int i; | 3100 | int i; |
| 3074 | struct ceph_client *client = mdsc->client; | 3101 | struct ceph_fs_client *fsc = mdsc->fsc; |
| 3075 | unsigned long timeout = client->mount_args->mount_timeout * HZ; | 3102 | unsigned long timeout = fsc->client->options->mount_timeout * HZ; |
| 3076 | 3103 | ||
| 3077 | dout("close_sessions\n"); | 3104 | dout("close_sessions\n"); |
| 3078 | 3105 | ||
| @@ -3119,7 +3146,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
| 3119 | dout("stopped\n"); | 3146 | dout("stopped\n"); |
| 3120 | } | 3147 | } |
| 3121 | 3148 | ||
| 3122 | void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | 3149 | static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) |
| 3123 | { | 3150 | { |
| 3124 | dout("stop\n"); | 3151 | dout("stop\n"); |
| 3125 | cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ | 3152 | cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ |
| @@ -3129,6 +3156,15 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | |||
| 3129 | ceph_caps_finalize(mdsc); | 3156 | ceph_caps_finalize(mdsc); |
| 3130 | } | 3157 | } |
| 3131 | 3158 | ||
| 3159 | void ceph_mdsc_destroy(struct ceph_fs_client *fsc) | ||
| 3160 | { | ||
| 3161 | struct ceph_mds_client *mdsc = fsc->mdsc; | ||
| 3162 | |||
| 3163 | ceph_mdsc_stop(mdsc); | ||
| 3164 | fsc->mdsc = NULL; | ||
| 3165 | kfree(mdsc); | ||
| 3166 | } | ||
| 3167 | |||
| 3132 | 3168 | ||
| 3133 | /* | 3169 | /* |
| 3134 | * handle mds map update. | 3170 | * handle mds map update. |
| @@ -3145,14 +3181,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) | |||
| 3145 | 3181 | ||
| 3146 | ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); | 3182 | ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); |
| 3147 | ceph_decode_copy(&p, &fsid, sizeof(fsid)); | 3183 | ceph_decode_copy(&p, &fsid, sizeof(fsid)); |
| 3148 | if (ceph_check_fsid(mdsc->client, &fsid) < 0) | 3184 | if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0) |
| 3149 | return; | 3185 | return; |
| 3150 | epoch = ceph_decode_32(&p); | 3186 | epoch = ceph_decode_32(&p); |
| 3151 | maplen = ceph_decode_32(&p); | 3187 | maplen = ceph_decode_32(&p); |
| 3152 | dout("handle_map epoch %u len %d\n", epoch, (int)maplen); | 3188 | dout("handle_map epoch %u len %d\n", epoch, (int)maplen); |
| 3153 | 3189 | ||
| 3154 | /* do we need it? */ | 3190 | /* do we need it? */ |
| 3155 | ceph_monc_got_mdsmap(&mdsc->client->monc, epoch); | 3191 | ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch); |
| 3156 | mutex_lock(&mdsc->mutex); | 3192 | mutex_lock(&mdsc->mutex); |
| 3157 | if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { | 3193 | if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { |
| 3158 | dout("handle_map epoch %u <= our %u\n", | 3194 | dout("handle_map epoch %u <= our %u\n", |
| @@ -3176,7 +3212,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) | |||
| 3176 | } else { | 3212 | } else { |
| 3177 | mdsc->mdsmap = newmap; /* first mds map */ | 3213 | mdsc->mdsmap = newmap; /* first mds map */ |
| 3178 | } | 3214 | } |
| 3179 | mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; | 3215 | mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; |
| 3180 | 3216 | ||
| 3181 | __wake_requests(mdsc, &mdsc->waiting_for_map); | 3217 | __wake_requests(mdsc, &mdsc->waiting_for_map); |
| 3182 | 3218 | ||
| @@ -3277,7 +3313,7 @@ static int get_authorizer(struct ceph_connection *con, | |||
| 3277 | { | 3313 | { |
| 3278 | struct ceph_mds_session *s = con->private; | 3314 | struct ceph_mds_session *s = con->private; |
| 3279 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3315 | struct ceph_mds_client *mdsc = s->s_mdsc; |
| 3280 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3316 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
| 3281 | int ret = 0; | 3317 | int ret = 0; |
| 3282 | 3318 | ||
| 3283 | if (force_new && s->s_authorizer) { | 3319 | if (force_new && s->s_authorizer) { |
| @@ -3311,7 +3347,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len) | |||
| 3311 | { | 3347 | { |
| 3312 | struct ceph_mds_session *s = con->private; | 3348 | struct ceph_mds_session *s = con->private; |
| 3313 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3349 | struct ceph_mds_client *mdsc = s->s_mdsc; |
| 3314 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3350 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
| 3315 | 3351 | ||
| 3316 | return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); | 3352 | return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); |
| 3317 | } | 3353 | } |
| @@ -3320,12 +3356,12 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
| 3320 | { | 3356 | { |
| 3321 | struct ceph_mds_session *s = con->private; | 3357 | struct ceph_mds_session *s = con->private; |
| 3322 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3358 | struct ceph_mds_client *mdsc = s->s_mdsc; |
| 3323 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3359 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
| 3324 | 3360 | ||
| 3325 | if (ac->ops->invalidate_authorizer) | 3361 | if (ac->ops->invalidate_authorizer) |
| 3326 | ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); | 3362 | ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); |
| 3327 | 3363 | ||
| 3328 | return ceph_monc_validate_auth(&mdsc->client->monc); | 3364 | return ceph_monc_validate_auth(&mdsc->fsc->client->monc); |
| 3329 | } | 3365 | } |
| 3330 | 3366 | ||
| 3331 | static const struct ceph_connection_operations mds_con_ops = { | 3367 | static const struct ceph_connection_operations mds_con_ops = { |
| @@ -3338,7 +3374,4 @@ static const struct ceph_connection_operations mds_con_ops = { | |||
| 3338 | .peer_reset = peer_reset, | 3374 | .peer_reset = peer_reset, |
| 3339 | }; | 3375 | }; |
| 3340 | 3376 | ||
| 3341 | |||
| 3342 | |||
| 3343 | |||
| 3344 | /* eof */ | 3377 | /* eof */ |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c98267ce6d2a..d66d63c72355 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -8,9 +8,9 @@ | |||
| 8 | #include <linux/rbtree.h> | 8 | #include <linux/rbtree.h> |
| 9 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
| 10 | 10 | ||
| 11 | #include "types.h" | 11 | #include <linux/ceph/types.h> |
| 12 | #include "messenger.h" | 12 | #include <linux/ceph/messenger.h> |
| 13 | #include "mdsmap.h" | 13 | #include <linux/ceph/mdsmap.h> |
| 14 | 14 | ||
| 15 | /* | 15 | /* |
| 16 | * Some lock dependencies: | 16 | * Some lock dependencies: |
| @@ -26,7 +26,7 @@ | |||
| 26 | * | 26 | * |
| 27 | */ | 27 | */ |
| 28 | 28 | ||
| 29 | struct ceph_client; | 29 | struct ceph_fs_client; |
| 30 | struct ceph_cap; | 30 | struct ceph_cap; |
| 31 | 31 | ||
| 32 | /* | 32 | /* |
| @@ -230,7 +230,7 @@ struct ceph_mds_request { | |||
| 230 | * mds client state | 230 | * mds client state |
| 231 | */ | 231 | */ |
| 232 | struct ceph_mds_client { | 232 | struct ceph_mds_client { |
| 233 | struct ceph_client *client; | 233 | struct ceph_fs_client *fsc; |
| 234 | struct mutex mutex; /* all nested structures */ | 234 | struct mutex mutex; /* all nested structures */ |
| 235 | 235 | ||
| 236 | struct ceph_mdsmap *mdsmap; | 236 | struct ceph_mdsmap *mdsmap; |
| @@ -289,11 +289,6 @@ struct ceph_mds_client { | |||
| 289 | int caps_avail_count; /* unused, unreserved */ | 289 | int caps_avail_count; /* unused, unreserved */ |
| 290 | int caps_min_count; /* keep at least this many | 290 | int caps_min_count; /* keep at least this many |
| 291 | (unreserved) */ | 291 | (unreserved) */ |
| 292 | |||
| 293 | #ifdef CONFIG_DEBUG_FS | ||
| 294 | struct dentry *debugfs_file; | ||
| 295 | #endif | ||
| 296 | |||
| 297 | spinlock_t dentry_lru_lock; | 292 | spinlock_t dentry_lru_lock; |
| 298 | struct list_head dentry_lru; | 293 | struct list_head dentry_lru; |
| 299 | int num_dentry; | 294 | int num_dentry; |
| @@ -316,10 +311,9 @@ extern void ceph_put_mds_session(struct ceph_mds_session *s); | |||
| 316 | extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, | 311 | extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, |
| 317 | struct ceph_msg *msg, int mds); | 312 | struct ceph_msg *msg, int mds); |
| 318 | 313 | ||
| 319 | extern int ceph_mdsc_init(struct ceph_mds_client *mdsc, | 314 | extern int ceph_mdsc_init(struct ceph_fs_client *fsc); |
| 320 | struct ceph_client *client); | ||
| 321 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); | 315 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); |
| 322 | extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc); | 316 | extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); |
| 323 | 317 | ||
| 324 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); | 318 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); |
| 325 | 319 | ||
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 040be6d1150b..73b7d44e8a35 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/bug.h> | 3 | #include <linux/bug.h> |
| 4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| @@ -6,9 +6,9 @@ | |||
| 6 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
| 7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
| 8 | 8 | ||
| 9 | #include "mdsmap.h" | 9 | #include <linux/ceph/mdsmap.h> |
| 10 | #include "messenger.h" | 10 | #include <linux/ceph/messenger.h> |
| 11 | #include "decode.h" | 11 | #include <linux/ceph/decode.h> |
| 12 | 12 | ||
| 13 | #include "super.h" | 13 | #include "super.h" |
| 14 | 14 | ||
| @@ -117,7 +117,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", | 119 | dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", |
| 120 | i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr), | 120 | i+1, n, global_id, mds, inc, |
| 121 | ceph_pr_addr(&addr.in_addr), | ||
| 121 | ceph_mds_state_name(state)); | 122 | ceph_mds_state_name(state)); |
| 122 | if (mds >= 0 && mds < m->m_max_mds && state > 0) { | 123 | if (mds >= 0 && mds < m->m_max_mds && state > 0) { |
| 123 | m->m_info[mds].global_id = global_id; | 124 | m->m_info[mds].global_id = global_id; |
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c deleted file mode 100644 index 46a368b6dce5..000000000000 --- a/fs/ceph/pagelist.c +++ /dev/null | |||
| @@ -1,63 +0,0 @@ | |||
| 1 | |||
| 2 | #include <linux/gfp.h> | ||
| 3 | #include <linux/pagemap.h> | ||
| 4 | #include <linux/highmem.h> | ||
| 5 | |||
| 6 | #include "pagelist.h" | ||
| 7 | |||
| 8 | static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) | ||
| 9 | { | ||
| 10 | struct page *page = list_entry(pl->head.prev, struct page, | ||
| 11 | lru); | ||
| 12 | kunmap(page); | ||
| 13 | } | ||
| 14 | |||
| 15 | int ceph_pagelist_release(struct ceph_pagelist *pl) | ||
| 16 | { | ||
| 17 | if (pl->mapped_tail) | ||
| 18 | ceph_pagelist_unmap_tail(pl); | ||
| 19 | |||
| 20 | while (!list_empty(&pl->head)) { | ||
| 21 | struct page *page = list_first_entry(&pl->head, struct page, | ||
| 22 | lru); | ||
| 23 | list_del(&page->lru); | ||
| 24 | __free_page(page); | ||
| 25 | } | ||
| 26 | return 0; | ||
| 27 | } | ||
| 28 | |||
| 29 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | ||
| 30 | { | ||
| 31 | struct page *page = __page_cache_alloc(GFP_NOFS); | ||
| 32 | if (!page) | ||
| 33 | return -ENOMEM; | ||
| 34 | pl->room += PAGE_SIZE; | ||
| 35 | list_add_tail(&page->lru, &pl->head); | ||
| 36 | if (pl->mapped_tail) | ||
| 37 | ceph_pagelist_unmap_tail(pl); | ||
| 38 | pl->mapped_tail = kmap(page); | ||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | |||
| 42 | int ceph_pagelist_append(struct ceph_pagelist *pl, void *buf, size_t len) | ||
| 43 | { | ||
| 44 | while (pl->room < len) { | ||
| 45 | size_t bit = pl->room; | ||
| 46 | int ret; | ||
| 47 | |||
| 48 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), | ||
| 49 | buf, bit); | ||
| 50 | pl->length += bit; | ||
| 51 | pl->room -= bit; | ||
| 52 | buf += bit; | ||
| 53 | len -= bit; | ||
| 54 | ret = ceph_pagelist_addpage(pl); | ||
| 55 | if (ret) | ||
| 56 | return ret; | ||
| 57 | } | ||
| 58 | |||
| 59 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); | ||
| 60 | pl->length += len; | ||
| 61 | pl->room -= len; | ||
| 62 | return 0; | ||
| 63 | } | ||
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 190b6c4a6f2b..39c243acd062 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
| @@ -1,10 +1,12 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/sort.h> | 3 | #include <linux/sort.h> |
| 4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
| 5 | 5 | ||
| 6 | #include "super.h" | 6 | #include "super.h" |
| 7 | #include "decode.h" | 7 | #include "mds_client.h" |
| 8 | |||
| 9 | #include <linux/ceph/decode.h> | ||
| 8 | 10 | ||
| 9 | /* | 11 | /* |
| 10 | * Snapshots in ceph are driven in large part by cooperation from the | 12 | * Snapshots in ceph are driven in large part by cooperation from the |
| @@ -526,7 +528,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
| 526 | struct ceph_cap_snap *capsnap) | 528 | struct ceph_cap_snap *capsnap) |
| 527 | { | 529 | { |
| 528 | struct inode *inode = &ci->vfs_inode; | 530 | struct inode *inode = &ci->vfs_inode; |
| 529 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 531 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
| 530 | 532 | ||
| 531 | BUG_ON(capsnap->writing); | 533 | BUG_ON(capsnap->writing); |
| 532 | capsnap->size = inode->i_size; | 534 | capsnap->size = inode->i_size; |
| @@ -747,7 +749,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
| 747 | struct ceph_mds_session *session, | 749 | struct ceph_mds_session *session, |
| 748 | struct ceph_msg *msg) | 750 | struct ceph_msg *msg) |
| 749 | { | 751 | { |
| 750 | struct super_block *sb = mdsc->client->sb; | 752 | struct super_block *sb = mdsc->fsc->sb; |
| 751 | int mds = session->s_mds; | 753 | int mds = session->s_mds; |
| 752 | u64 split; | 754 | u64 split; |
| 753 | int op; | 755 | int op; |
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/strings.c index c6179d3a26a2..cd5097d7c804 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/strings.c | |||
| @@ -1,71 +1,9 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Ceph string constants | 2 | * Ceph fs string constants |
| 3 | */ | 3 | */ |
| 4 | #include "types.h" | 4 | #include <linux/module.h> |
| 5 | #include <linux/ceph/types.h> | ||
| 5 | 6 | ||
| 6 | const char *ceph_entity_type_name(int type) | ||
| 7 | { | ||
| 8 | switch (type) { | ||
| 9 | case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||
| 10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||
| 11 | case CEPH_ENTITY_TYPE_MON: return "mon"; | ||
| 12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||
| 13 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; | ||
| 14 | default: return "unknown"; | ||
| 15 | } | ||
| 16 | } | ||
| 17 | |||
| 18 | const char *ceph_osd_op_name(int op) | ||
| 19 | { | ||
| 20 | switch (op) { | ||
| 21 | case CEPH_OSD_OP_READ: return "read"; | ||
| 22 | case CEPH_OSD_OP_STAT: return "stat"; | ||
| 23 | |||
| 24 | case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; | ||
| 25 | |||
| 26 | case CEPH_OSD_OP_WRITE: return "write"; | ||
| 27 | case CEPH_OSD_OP_DELETE: return "delete"; | ||
| 28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | ||
| 29 | case CEPH_OSD_OP_ZERO: return "zero"; | ||
| 30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | ||
| 31 | case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||
| 32 | |||
| 33 | case CEPH_OSD_OP_APPEND: return "append"; | ||
| 34 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | ||
| 35 | case CEPH_OSD_OP_SETTRUNC: return "settrunc"; | ||
| 36 | case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; | ||
| 37 | |||
| 38 | case CEPH_OSD_OP_TMAPUP: return "tmapup"; | ||
| 39 | case CEPH_OSD_OP_TMAPGET: return "tmapget"; | ||
| 40 | case CEPH_OSD_OP_TMAPPUT: return "tmapput"; | ||
| 41 | |||
| 42 | case CEPH_OSD_OP_GETXATTR: return "getxattr"; | ||
| 43 | case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; | ||
| 44 | case CEPH_OSD_OP_SETXATTR: return "setxattr"; | ||
| 45 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | ||
| 46 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | ||
| 47 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | ||
| 48 | case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||
| 49 | |||
| 50 | case CEPH_OSD_OP_PULL: return "pull"; | ||
| 51 | case CEPH_OSD_OP_PUSH: return "push"; | ||
| 52 | case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; | ||
| 53 | case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; | ||
| 54 | case CEPH_OSD_OP_SCRUB: return "scrub"; | ||
| 55 | |||
| 56 | case CEPH_OSD_OP_WRLOCK: return "wrlock"; | ||
| 57 | case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; | ||
| 58 | case CEPH_OSD_OP_RDLOCK: return "rdlock"; | ||
| 59 | case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; | ||
| 60 | case CEPH_OSD_OP_UPLOCK: return "uplock"; | ||
| 61 | case CEPH_OSD_OP_DNLOCK: return "dnlock"; | ||
| 62 | |||
| 63 | case CEPH_OSD_OP_CALL: return "call"; | ||
| 64 | |||
| 65 | case CEPH_OSD_OP_PGLS: return "pgls"; | ||
| 66 | } | ||
| 67 | return "???"; | ||
| 68 | } | ||
| 69 | 7 | ||
| 70 | const char *ceph_mds_state_name(int s) | 8 | const char *ceph_mds_state_name(int s) |
| 71 | { | 9 | { |
| @@ -177,17 +115,3 @@ const char *ceph_snap_op_name(int o) | |||
| 177 | } | 115 | } |
| 178 | return "???"; | 116 | return "???"; |
| 179 | } | 117 | } |
| 180 | |||
| 181 | const char *ceph_pool_op_name(int op) | ||
| 182 | { | ||
| 183 | switch (op) { | ||
| 184 | case POOL_OP_CREATE: return "create"; | ||
| 185 | case POOL_OP_DELETE: return "delete"; | ||
| 186 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
| 187 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
| 188 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
| 189 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
| 190 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
| 191 | } | ||
| 192 | return "???"; | ||
| 193 | } | ||
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9922628532b2..d6e0e0421891 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/backing-dev.h> | 4 | #include <linux/backing-dev.h> |
| 5 | #include <linux/ctype.h> | 5 | #include <linux/ctype.h> |
| @@ -15,10 +15,13 @@ | |||
| 15 | #include <linux/statfs.h> | 15 | #include <linux/statfs.h> |
| 16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
| 17 | 17 | ||
| 18 | #include "decode.h" | ||
| 19 | #include "super.h" | 18 | #include "super.h" |
| 20 | #include "mon_client.h" | 19 | #include "mds_client.h" |
| 21 | #include "auth.h" | 20 | |
| 21 | #include <linux/ceph/decode.h> | ||
| 22 | #include <linux/ceph/mon_client.h> | ||
| 23 | #include <linux/ceph/auth.h> | ||
| 24 | #include <linux/ceph/debugfs.h> | ||
| 22 | 25 | ||
| 23 | /* | 26 | /* |
| 24 | * Ceph superblock operations | 27 | * Ceph superblock operations |
| @@ -26,36 +29,22 @@ | |||
| 26 | * Handle the basics of mounting, unmounting. | 29 | * Handle the basics of mounting, unmounting. |
| 27 | */ | 30 | */ |
| 28 | 31 | ||
| 29 | |||
| 30 | /* | ||
| 31 | * find filename portion of a path (/foo/bar/baz -> baz) | ||
| 32 | */ | ||
| 33 | const char *ceph_file_part(const char *s, int len) | ||
| 34 | { | ||
| 35 | const char *e = s + len; | ||
| 36 | |||
| 37 | while (e != s && *(e-1) != '/') | ||
| 38 | e--; | ||
| 39 | return e; | ||
| 40 | } | ||
| 41 | |||
| 42 | |||
| 43 | /* | 32 | /* |
| 44 | * super ops | 33 | * super ops |
| 45 | */ | 34 | */ |
| 46 | static void ceph_put_super(struct super_block *s) | 35 | static void ceph_put_super(struct super_block *s) |
| 47 | { | 36 | { |
| 48 | struct ceph_client *client = ceph_sb_to_client(s); | 37 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); |
| 49 | 38 | ||
| 50 | dout("put_super\n"); | 39 | dout("put_super\n"); |
| 51 | ceph_mdsc_close_sessions(&client->mdsc); | 40 | ceph_mdsc_close_sessions(fsc->mdsc); |
| 52 | 41 | ||
| 53 | /* | 42 | /* |
| 54 | * ensure we release the bdi before put_anon_super releases | 43 | * ensure we release the bdi before put_anon_super releases |
| 55 | * the device name. | 44 | * the device name. |
| 56 | */ | 45 | */ |
| 57 | if (s->s_bdi == &client->backing_dev_info) { | 46 | if (s->s_bdi == &fsc->backing_dev_info) { |
| 58 | bdi_unregister(&client->backing_dev_info); | 47 | bdi_unregister(&fsc->backing_dev_info); |
| 59 | s->s_bdi = NULL; | 48 | s->s_bdi = NULL; |
| 60 | } | 49 | } |
| 61 | 50 | ||
| @@ -64,14 +53,14 @@ static void ceph_put_super(struct super_block *s) | |||
| 64 | 53 | ||
| 65 | static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | 54 | static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) |
| 66 | { | 55 | { |
| 67 | struct ceph_client *client = ceph_inode_to_client(dentry->d_inode); | 56 | struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode); |
| 68 | struct ceph_monmap *monmap = client->monc.monmap; | 57 | struct ceph_monmap *monmap = fsc->client->monc.monmap; |
| 69 | struct ceph_statfs st; | 58 | struct ceph_statfs st; |
| 70 | u64 fsid; | 59 | u64 fsid; |
| 71 | int err; | 60 | int err; |
| 72 | 61 | ||
| 73 | dout("statfs\n"); | 62 | dout("statfs\n"); |
| 74 | err = ceph_monc_do_statfs(&client->monc, &st); | 63 | err = ceph_monc_do_statfs(&fsc->client->monc, &st); |
| 75 | if (err < 0) | 64 | if (err < 0) |
| 76 | return err; | 65 | return err; |
| 77 | 66 | ||
| @@ -104,238 +93,28 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 104 | 93 | ||
| 105 | static int ceph_sync_fs(struct super_block *sb, int wait) | 94 | static int ceph_sync_fs(struct super_block *sb, int wait) |
| 106 | { | 95 | { |
| 107 | struct ceph_client *client = ceph_sb_to_client(sb); | 96 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
| 108 | 97 | ||
| 109 | if (!wait) { | 98 | if (!wait) { |
| 110 | dout("sync_fs (non-blocking)\n"); | 99 | dout("sync_fs (non-blocking)\n"); |
| 111 | ceph_flush_dirty_caps(&client->mdsc); | 100 | ceph_flush_dirty_caps(fsc->mdsc); |
| 112 | dout("sync_fs (non-blocking) done\n"); | 101 | dout("sync_fs (non-blocking) done\n"); |
| 113 | return 0; | 102 | return 0; |
| 114 | } | 103 | } |
| 115 | 104 | ||
| 116 | dout("sync_fs (blocking)\n"); | 105 | dout("sync_fs (blocking)\n"); |
| 117 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); | 106 | ceph_osdc_sync(&fsc->client->osdc); |
| 118 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); | 107 | ceph_mdsc_sync(fsc->mdsc); |
| 119 | dout("sync_fs (blocking) done\n"); | 108 | dout("sync_fs (blocking) done\n"); |
| 120 | return 0; | 109 | return 0; |
| 121 | } | 110 | } |
| 122 | 111 | ||
| 123 | static int default_congestion_kb(void) | ||
| 124 | { | ||
| 125 | int congestion_kb; | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Copied from NFS | ||
| 129 | * | ||
| 130 | * congestion size, scale with available memory. | ||
| 131 | * | ||
| 132 | * 64MB: 8192k | ||
| 133 | * 128MB: 11585k | ||
| 134 | * 256MB: 16384k | ||
| 135 | * 512MB: 23170k | ||
| 136 | * 1GB: 32768k | ||
| 137 | * 2GB: 46340k | ||
| 138 | * 4GB: 65536k | ||
| 139 | * 8GB: 92681k | ||
| 140 | * 16GB: 131072k | ||
| 141 | * | ||
| 142 | * This allows larger machines to have larger/more transfers. | ||
| 143 | * Limit the default to 256M | ||
| 144 | */ | ||
| 145 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
| 146 | if (congestion_kb > 256*1024) | ||
| 147 | congestion_kb = 256*1024; | ||
| 148 | |||
| 149 | return congestion_kb; | ||
| 150 | } | ||
| 151 | |||
| 152 | /** | ||
| 153 | * ceph_show_options - Show mount options in /proc/mounts | ||
| 154 | * @m: seq_file to write to | ||
| 155 | * @mnt: mount descriptor | ||
| 156 | */ | ||
| 157 | static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
| 158 | { | ||
| 159 | struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb); | ||
| 160 | struct ceph_mount_args *args = client->mount_args; | ||
| 161 | |||
| 162 | if (args->flags & CEPH_OPT_FSID) | ||
| 163 | seq_printf(m, ",fsid=%pU", &args->fsid); | ||
| 164 | if (args->flags & CEPH_OPT_NOSHARE) | ||
| 165 | seq_puts(m, ",noshare"); | ||
| 166 | if (args->flags & CEPH_OPT_DIRSTAT) | ||
| 167 | seq_puts(m, ",dirstat"); | ||
| 168 | if ((args->flags & CEPH_OPT_RBYTES) == 0) | ||
| 169 | seq_puts(m, ",norbytes"); | ||
| 170 | if (args->flags & CEPH_OPT_NOCRC) | ||
| 171 | seq_puts(m, ",nocrc"); | ||
| 172 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) | ||
| 173 | seq_puts(m, ",noasyncreaddir"); | ||
| 174 | |||
| 175 | if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
| 176 | seq_printf(m, ",mount_timeout=%d", args->mount_timeout); | ||
| 177 | if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
| 178 | seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); | ||
| 179 | if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
| 180 | seq_printf(m, ",osdtimeout=%d", args->osd_timeout); | ||
| 181 | if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
| 182 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
| 183 | args->osd_keepalive_timeout); | ||
| 184 | if (args->wsize) | ||
| 185 | seq_printf(m, ",wsize=%d", args->wsize); | ||
| 186 | if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
| 187 | seq_printf(m, ",rsize=%d", args->rsize); | ||
| 188 | if (args->congestion_kb != default_congestion_kb()) | ||
| 189 | seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); | ||
| 190 | if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
| 191 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
| 192 | args->caps_wanted_delay_min); | ||
| 193 | if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
| 194 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
| 195 | args->caps_wanted_delay_max); | ||
| 196 | if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
| 197 | seq_printf(m, ",cap_release_safety=%d", | ||
| 198 | args->cap_release_safety); | ||
| 199 | if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
| 200 | seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); | ||
| 201 | if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
| 202 | seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); | ||
| 203 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | ||
| 204 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); | ||
| 205 | if (args->name) | ||
| 206 | seq_printf(m, ",name=%s", args->name); | ||
| 207 | if (args->secret) | ||
| 208 | seq_puts(m, ",secret=<hidden>"); | ||
| 209 | return 0; | ||
| 210 | } | ||
| 211 | |||
| 212 | /* | ||
| 213 | * caches | ||
| 214 | */ | ||
| 215 | struct kmem_cache *ceph_inode_cachep; | ||
| 216 | struct kmem_cache *ceph_cap_cachep; | ||
| 217 | struct kmem_cache *ceph_dentry_cachep; | ||
| 218 | struct kmem_cache *ceph_file_cachep; | ||
| 219 | |||
| 220 | static void ceph_inode_init_once(void *foo) | ||
| 221 | { | ||
| 222 | struct ceph_inode_info *ci = foo; | ||
| 223 | inode_init_once(&ci->vfs_inode); | ||
| 224 | } | ||
| 225 | |||
| 226 | static int __init init_caches(void) | ||
| 227 | { | ||
| 228 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", | ||
| 229 | sizeof(struct ceph_inode_info), | ||
| 230 | __alignof__(struct ceph_inode_info), | ||
| 231 | (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), | ||
| 232 | ceph_inode_init_once); | ||
| 233 | if (ceph_inode_cachep == NULL) | ||
| 234 | return -ENOMEM; | ||
| 235 | |||
| 236 | ceph_cap_cachep = KMEM_CACHE(ceph_cap, | ||
| 237 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 238 | if (ceph_cap_cachep == NULL) | ||
| 239 | goto bad_cap; | ||
| 240 | |||
| 241 | ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, | ||
| 242 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 243 | if (ceph_dentry_cachep == NULL) | ||
| 244 | goto bad_dentry; | ||
| 245 | |||
| 246 | ceph_file_cachep = KMEM_CACHE(ceph_file_info, | ||
| 247 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 248 | if (ceph_file_cachep == NULL) | ||
| 249 | goto bad_file; | ||
| 250 | |||
| 251 | return 0; | ||
| 252 | |||
| 253 | bad_file: | ||
| 254 | kmem_cache_destroy(ceph_dentry_cachep); | ||
| 255 | bad_dentry: | ||
| 256 | kmem_cache_destroy(ceph_cap_cachep); | ||
| 257 | bad_cap: | ||
| 258 | kmem_cache_destroy(ceph_inode_cachep); | ||
| 259 | return -ENOMEM; | ||
| 260 | } | ||
| 261 | |||
| 262 | static void destroy_caches(void) | ||
| 263 | { | ||
| 264 | kmem_cache_destroy(ceph_inode_cachep); | ||
| 265 | kmem_cache_destroy(ceph_cap_cachep); | ||
| 266 | kmem_cache_destroy(ceph_dentry_cachep); | ||
| 267 | kmem_cache_destroy(ceph_file_cachep); | ||
| 268 | } | ||
| 269 | |||
| 270 | |||
| 271 | /* | ||
| 272 | * ceph_umount_begin - initiate forced umount. Tear down down the | ||
| 273 | * mount, skipping steps that may hang while waiting for server(s). | ||
| 274 | */ | ||
| 275 | static void ceph_umount_begin(struct super_block *sb) | ||
| 276 | { | ||
| 277 | struct ceph_client *client = ceph_sb_to_client(sb); | ||
| 278 | |||
| 279 | dout("ceph_umount_begin - starting forced umount\n"); | ||
| 280 | if (!client) | ||
| 281 | return; | ||
| 282 | client->mount_state = CEPH_MOUNT_SHUTDOWN; | ||
| 283 | return; | ||
| 284 | } | ||
| 285 | |||
| 286 | static const struct super_operations ceph_super_ops = { | ||
| 287 | .alloc_inode = ceph_alloc_inode, | ||
| 288 | .destroy_inode = ceph_destroy_inode, | ||
| 289 | .write_inode = ceph_write_inode, | ||
| 290 | .sync_fs = ceph_sync_fs, | ||
| 291 | .put_super = ceph_put_super, | ||
| 292 | .show_options = ceph_show_options, | ||
| 293 | .statfs = ceph_statfs, | ||
| 294 | .umount_begin = ceph_umount_begin, | ||
| 295 | }; | ||
| 296 | |||
| 297 | |||
| 298 | const char *ceph_msg_type_name(int type) | ||
| 299 | { | ||
| 300 | switch (type) { | ||
| 301 | case CEPH_MSG_SHUTDOWN: return "shutdown"; | ||
| 302 | case CEPH_MSG_PING: return "ping"; | ||
| 303 | case CEPH_MSG_AUTH: return "auth"; | ||
| 304 | case CEPH_MSG_AUTH_REPLY: return "auth_reply"; | ||
| 305 | case CEPH_MSG_MON_MAP: return "mon_map"; | ||
| 306 | case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; | ||
| 307 | case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; | ||
| 308 | case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; | ||
| 309 | case CEPH_MSG_STATFS: return "statfs"; | ||
| 310 | case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; | ||
| 311 | case CEPH_MSG_MDS_MAP: return "mds_map"; | ||
| 312 | case CEPH_MSG_CLIENT_SESSION: return "client_session"; | ||
| 313 | case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; | ||
| 314 | case CEPH_MSG_CLIENT_REQUEST: return "client_request"; | ||
| 315 | case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; | ||
| 316 | case CEPH_MSG_CLIENT_REPLY: return "client_reply"; | ||
| 317 | case CEPH_MSG_CLIENT_CAPS: return "client_caps"; | ||
| 318 | case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; | ||
| 319 | case CEPH_MSG_CLIENT_SNAP: return "client_snap"; | ||
| 320 | case CEPH_MSG_CLIENT_LEASE: return "client_lease"; | ||
| 321 | case CEPH_MSG_OSD_MAP: return "osd_map"; | ||
| 322 | case CEPH_MSG_OSD_OP: return "osd_op"; | ||
| 323 | case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; | ||
| 324 | default: return "unknown"; | ||
| 325 | } | ||
| 326 | } | ||
| 327 | |||
| 328 | |||
| 329 | /* | 112 | /* |
| 330 | * mount options | 113 | * mount options |
| 331 | */ | 114 | */ |
| 332 | enum { | 115 | enum { |
| 333 | Opt_wsize, | 116 | Opt_wsize, |
| 334 | Opt_rsize, | 117 | Opt_rsize, |
| 335 | Opt_osdtimeout, | ||
| 336 | Opt_osdkeepalivetimeout, | ||
| 337 | Opt_mount_timeout, | ||
| 338 | Opt_osd_idle_ttl, | ||
| 339 | Opt_caps_wanted_delay_min, | 118 | Opt_caps_wanted_delay_min, |
| 340 | Opt_caps_wanted_delay_max, | 119 | Opt_caps_wanted_delay_max, |
| 341 | Opt_cap_release_safety, | 120 | Opt_cap_release_safety, |
| @@ -344,29 +123,19 @@ enum { | |||
| 344 | Opt_congestion_kb, | 123 | Opt_congestion_kb, |
| 345 | Opt_last_int, | 124 | Opt_last_int, |
| 346 | /* int args above */ | 125 | /* int args above */ |
| 347 | Opt_fsid, | ||
| 348 | Opt_snapdirname, | 126 | Opt_snapdirname, |
| 349 | Opt_name, | ||
| 350 | Opt_secret, | ||
| 351 | Opt_last_string, | 127 | Opt_last_string, |
| 352 | /* string args above */ | 128 | /* string args above */ |
| 353 | Opt_ip, | ||
| 354 | Opt_noshare, | ||
| 355 | Opt_dirstat, | 129 | Opt_dirstat, |
| 356 | Opt_nodirstat, | 130 | Opt_nodirstat, |
| 357 | Opt_rbytes, | 131 | Opt_rbytes, |
| 358 | Opt_norbytes, | 132 | Opt_norbytes, |
| 359 | Opt_nocrc, | ||
| 360 | Opt_noasyncreaddir, | 133 | Opt_noasyncreaddir, |
| 361 | }; | 134 | }; |
| 362 | 135 | ||
| 363 | static match_table_t arg_tokens = { | 136 | static match_table_t fsopt_tokens = { |
| 364 | {Opt_wsize, "wsize=%d"}, | 137 | {Opt_wsize, "wsize=%d"}, |
| 365 | {Opt_rsize, "rsize=%d"}, | 138 | {Opt_rsize, "rsize=%d"}, |
| 366 | {Opt_osdtimeout, "osdtimeout=%d"}, | ||
| 367 | {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, | ||
| 368 | {Opt_mount_timeout, "mount_timeout=%d"}, | ||
| 369 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | ||
| 370 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, | 139 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, |
| 371 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, | 140 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, |
| 372 | {Opt_cap_release_safety, "cap_release_safety=%d"}, | 141 | {Opt_cap_release_safety, "cap_release_safety=%d"}, |
| @@ -374,403 +143,459 @@ static match_table_t arg_tokens = { | |||
| 374 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | 143 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, |
| 375 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 144 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
| 376 | /* int args above */ | 145 | /* int args above */ |
| 377 | {Opt_fsid, "fsid=%s"}, | ||
| 378 | {Opt_snapdirname, "snapdirname=%s"}, | 146 | {Opt_snapdirname, "snapdirname=%s"}, |
| 379 | {Opt_name, "name=%s"}, | ||
| 380 | {Opt_secret, "secret=%s"}, | ||
| 381 | /* string args above */ | 147 | /* string args above */ |
| 382 | {Opt_ip, "ip=%s"}, | ||
| 383 | {Opt_noshare, "noshare"}, | ||
| 384 | {Opt_dirstat, "dirstat"}, | 148 | {Opt_dirstat, "dirstat"}, |
| 385 | {Opt_nodirstat, "nodirstat"}, | 149 | {Opt_nodirstat, "nodirstat"}, |
| 386 | {Opt_rbytes, "rbytes"}, | 150 | {Opt_rbytes, "rbytes"}, |
| 387 | {Opt_norbytes, "norbytes"}, | 151 | {Opt_norbytes, "norbytes"}, |
| 388 | {Opt_nocrc, "nocrc"}, | ||
| 389 | {Opt_noasyncreaddir, "noasyncreaddir"}, | 152 | {Opt_noasyncreaddir, "noasyncreaddir"}, |
| 390 | {-1, NULL} | 153 | {-1, NULL} |
| 391 | }; | 154 | }; |
| 392 | 155 | ||
| 393 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | 156 | static int parse_fsopt_token(char *c, void *private) |
| 394 | { | 157 | { |
| 395 | int i = 0; | 158 | struct ceph_mount_options *fsopt = private; |
| 396 | char tmp[3]; | 159 | substring_t argstr[MAX_OPT_ARGS]; |
| 397 | int err = -EINVAL; | 160 | int token, intval, ret; |
| 398 | int d; | 161 | |
| 399 | 162 | token = match_token((char *)c, fsopt_tokens, argstr); | |
| 400 | dout("parse_fsid '%s'\n", str); | 163 | if (token < 0) |
| 401 | tmp[2] = 0; | 164 | return -EINVAL; |
| 402 | while (*str && i < 16) { | 165 | |
| 403 | if (ispunct(*str)) { | 166 | if (token < Opt_last_int) { |
| 404 | str++; | 167 | ret = match_int(&argstr[0], &intval); |
| 405 | continue; | 168 | if (ret < 0) { |
| 169 | pr_err("bad mount option arg (not int) " | ||
| 170 | "at '%s'\n", c); | ||
| 171 | return ret; | ||
| 406 | } | 172 | } |
| 407 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | 173 | dout("got int token %d val %d\n", token, intval); |
| 408 | break; | 174 | } else if (token > Opt_last_int && token < Opt_last_string) { |
| 409 | tmp[0] = str[0]; | 175 | dout("got string token %d val %s\n", token, |
| 410 | tmp[1] = str[1]; | 176 | argstr[0].from); |
| 411 | if (sscanf(tmp, "%x", &d) < 1) | 177 | } else { |
| 412 | break; | 178 | dout("got token %d\n", token); |
| 413 | fsid->fsid[i] = d & 0xff; | ||
| 414 | i++; | ||
| 415 | str += 2; | ||
| 416 | } | 179 | } |
| 417 | 180 | ||
| 418 | if (i == 16) | 181 | switch (token) { |
| 419 | err = 0; | 182 | case Opt_snapdirname: |
| 420 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | 183 | kfree(fsopt->snapdir_name); |
| 421 | return err; | 184 | fsopt->snapdir_name = kstrndup(argstr[0].from, |
| 185 | argstr[0].to-argstr[0].from, | ||
| 186 | GFP_KERNEL); | ||
| 187 | if (!fsopt->snapdir_name) | ||
| 188 | return -ENOMEM; | ||
| 189 | break; | ||
| 190 | |||
| 191 | /* misc */ | ||
| 192 | case Opt_wsize: | ||
| 193 | fsopt->wsize = intval; | ||
| 194 | break; | ||
| 195 | case Opt_rsize: | ||
| 196 | fsopt->rsize = intval; | ||
| 197 | break; | ||
| 198 | case Opt_caps_wanted_delay_min: | ||
| 199 | fsopt->caps_wanted_delay_min = intval; | ||
| 200 | break; | ||
| 201 | case Opt_caps_wanted_delay_max: | ||
| 202 | fsopt->caps_wanted_delay_max = intval; | ||
| 203 | break; | ||
| 204 | case Opt_readdir_max_entries: | ||
| 205 | fsopt->max_readdir = intval; | ||
| 206 | break; | ||
| 207 | case Opt_readdir_max_bytes: | ||
| 208 | fsopt->max_readdir_bytes = intval; | ||
| 209 | break; | ||
| 210 | case Opt_congestion_kb: | ||
| 211 | fsopt->congestion_kb = intval; | ||
| 212 | break; | ||
| 213 | case Opt_dirstat: | ||
| 214 | fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; | ||
| 215 | break; | ||
| 216 | case Opt_nodirstat: | ||
| 217 | fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; | ||
| 218 | break; | ||
| 219 | case Opt_rbytes: | ||
| 220 | fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; | ||
| 221 | break; | ||
| 222 | case Opt_norbytes: | ||
| 223 | fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; | ||
| 224 | break; | ||
| 225 | case Opt_noasyncreaddir: | ||
| 226 | fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; | ||
| 227 | break; | ||
| 228 | default: | ||
| 229 | BUG_ON(token); | ||
| 230 | } | ||
| 231 | return 0; | ||
| 422 | } | 232 | } |
| 423 | 233 | ||
| 424 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, | 234 | static void destroy_mount_options(struct ceph_mount_options *args) |
| 425 | const char *dev_name, | ||
| 426 | const char **path) | ||
| 427 | { | 235 | { |
| 428 | struct ceph_mount_args *args; | 236 | dout("destroy_mount_options %p\n", args); |
| 429 | const char *c; | 237 | kfree(args->snapdir_name); |
| 430 | int err = -ENOMEM; | 238 | kfree(args); |
| 431 | substring_t argstr[MAX_OPT_ARGS]; | 239 | } |
| 432 | 240 | ||
| 433 | args = kzalloc(sizeof(*args), GFP_KERNEL); | 241 | static int strcmp_null(const char *s1, const char *s2) |
| 434 | if (!args) | 242 | { |
| 435 | return ERR_PTR(-ENOMEM); | 243 | if (!s1 && !s2) |
| 436 | args->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*args->mon_addr), | 244 | return 0; |
| 437 | GFP_KERNEL); | 245 | if (s1 && !s2) |
| 438 | if (!args->mon_addr) | 246 | return -1; |
| 439 | goto out; | 247 | if (!s1 && s2) |
| 248 | return 1; | ||
| 249 | return strcmp(s1, s2); | ||
| 250 | } | ||
| 440 | 251 | ||
| 441 | dout("parse_mount_args %p, dev_name '%s'\n", args, dev_name); | 252 | static int compare_mount_options(struct ceph_mount_options *new_fsopt, |
| 442 | 253 | struct ceph_options *new_opt, | |
| 443 | /* start with defaults */ | 254 | struct ceph_fs_client *fsc) |
| 444 | args->sb_flags = flags; | 255 | { |
| 445 | args->flags = CEPH_OPT_DEFAULT; | 256 | struct ceph_mount_options *fsopt1 = new_fsopt; |
| 446 | args->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; | 257 | struct ceph_mount_options *fsopt2 = fsc->mount_options; |
| 447 | args->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | 258 | int ofs = offsetof(struct ceph_mount_options, snapdir_name); |
| 448 | args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | 259 | int ret; |
| 449 | args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ | ||
| 450 | args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; | ||
| 451 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | ||
| 452 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | ||
| 453 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | ||
| 454 | args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; | ||
| 455 | args->max_readdir = CEPH_MAX_READDIR_DEFAULT; | ||
| 456 | args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
| 457 | args->congestion_kb = default_congestion_kb(); | ||
| 458 | |||
| 459 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | ||
| 460 | err = -EINVAL; | ||
| 461 | if (!dev_name) | ||
| 462 | goto out; | ||
| 463 | *path = strstr(dev_name, ":/"); | ||
| 464 | if (*path == NULL) { | ||
| 465 | pr_err("device name is missing path (no :/ in %s)\n", | ||
| 466 | dev_name); | ||
| 467 | goto out; | ||
| 468 | } | ||
| 469 | 260 | ||
| 470 | /* get mon ip(s) */ | 261 | ret = memcmp(fsopt1, fsopt2, ofs); |
| 471 | err = ceph_parse_ips(dev_name, *path, args->mon_addr, | 262 | if (ret) |
| 472 | CEPH_MAX_MON, &args->num_mon); | 263 | return ret; |
| 473 | if (err < 0) | 264 | |
| 474 | goto out; | 265 | ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); |
| 266 | if (ret) | ||
| 267 | return ret; | ||
| 268 | |||
| 269 | return ceph_compare_options(new_opt, fsc->client); | ||
| 270 | } | ||
| 271 | |||
| 272 | static int parse_mount_options(struct ceph_mount_options **pfsopt, | ||
| 273 | struct ceph_options **popt, | ||
| 274 | int flags, char *options, | ||
| 275 | const char *dev_name, | ||
| 276 | const char **path) | ||
| 277 | { | ||
| 278 | struct ceph_mount_options *fsopt; | ||
| 279 | const char *dev_name_end; | ||
| 280 | int err = -ENOMEM; | ||
| 281 | |||
| 282 | fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); | ||
| 283 | if (!fsopt) | ||
| 284 | return -ENOMEM; | ||
| 285 | |||
| 286 | dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); | ||
| 287 | |||
| 288 | fsopt->sb_flags = flags; | ||
| 289 | fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; | ||
| 290 | |||
| 291 | fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | ||
| 292 | fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | ||
| 293 | fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; | ||
| 294 | fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; | ||
| 295 | fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
| 296 | fsopt->congestion_kb = default_congestion_kb(); | ||
| 297 | |||
| 298 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | ||
| 299 | err = -EINVAL; | ||
| 300 | if (!dev_name) | ||
| 301 | goto out; | ||
| 302 | *path = strstr(dev_name, ":/"); | ||
| 303 | if (*path == NULL) { | ||
| 304 | pr_err("device name is missing path (no :/ in %s)\n", | ||
| 305 | dev_name); | ||
| 306 | goto out; | ||
| 307 | } | ||
| 308 | dev_name_end = *path; | ||
| 309 | dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); | ||
| 475 | 310 | ||
| 476 | /* path on server */ | 311 | /* path on server */ |
| 477 | *path += 2; | 312 | *path += 2; |
| 478 | dout("server path '%s'\n", *path); | 313 | dout("server path '%s'\n", *path); |
| 479 | 314 | ||
| 480 | /* parse mount options */ | 315 | err = ceph_parse_options(popt, options, dev_name, dev_name_end, |
| 481 | while ((c = strsep(&options, ",")) != NULL) { | 316 | parse_fsopt_token, (void *)fsopt); |
| 482 | int token, intval, ret; | 317 | if (err) |
| 483 | if (!*c) | 318 | goto out; |
| 484 | continue; | 319 | |
| 485 | err = -EINVAL; | 320 | /* success */ |
| 486 | token = match_token((char *)c, arg_tokens, argstr); | 321 | *pfsopt = fsopt; |
| 487 | if (token < 0) { | 322 | return 0; |
| 488 | pr_err("bad mount option at '%s'\n", c); | ||
| 489 | goto out; | ||
| 490 | } | ||
| 491 | if (token < Opt_last_int) { | ||
| 492 | ret = match_int(&argstr[0], &intval); | ||
| 493 | if (ret < 0) { | ||
| 494 | pr_err("bad mount option arg (not int) " | ||
| 495 | "at '%s'\n", c); | ||
| 496 | continue; | ||
| 497 | } | ||
| 498 | dout("got int token %d val %d\n", token, intval); | ||
| 499 | } else if (token > Opt_last_int && token < Opt_last_string) { | ||
| 500 | dout("got string token %d val %s\n", token, | ||
| 501 | argstr[0].from); | ||
| 502 | } else { | ||
| 503 | dout("got token %d\n", token); | ||
| 504 | } | ||
| 505 | switch (token) { | ||
| 506 | case Opt_ip: | ||
| 507 | err = ceph_parse_ips(argstr[0].from, | ||
| 508 | argstr[0].to, | ||
| 509 | &args->my_addr, | ||
| 510 | 1, NULL); | ||
| 511 | if (err < 0) | ||
| 512 | goto out; | ||
| 513 | args->flags |= CEPH_OPT_MYIP; | ||
| 514 | break; | ||
| 515 | |||
| 516 | case Opt_fsid: | ||
| 517 | err = parse_fsid(argstr[0].from, &args->fsid); | ||
| 518 | if (err == 0) | ||
| 519 | args->flags |= CEPH_OPT_FSID; | ||
| 520 | break; | ||
| 521 | case Opt_snapdirname: | ||
| 522 | kfree(args->snapdir_name); | ||
| 523 | args->snapdir_name = kstrndup(argstr[0].from, | ||
| 524 | argstr[0].to-argstr[0].from, | ||
| 525 | GFP_KERNEL); | ||
| 526 | break; | ||
| 527 | case Opt_name: | ||
| 528 | args->name = kstrndup(argstr[0].from, | ||
| 529 | argstr[0].to-argstr[0].from, | ||
| 530 | GFP_KERNEL); | ||
| 531 | break; | ||
| 532 | case Opt_secret: | ||
| 533 | args->secret = kstrndup(argstr[0].from, | ||
| 534 | argstr[0].to-argstr[0].from, | ||
| 535 | GFP_KERNEL); | ||
| 536 | break; | ||
| 537 | |||
| 538 | /* misc */ | ||
| 539 | case Opt_wsize: | ||
| 540 | args->wsize = intval; | ||
| 541 | break; | ||
| 542 | case Opt_rsize: | ||
| 543 | args->rsize = intval; | ||
| 544 | break; | ||
| 545 | case Opt_osdtimeout: | ||
| 546 | args->osd_timeout = intval; | ||
| 547 | break; | ||
| 548 | case Opt_osdkeepalivetimeout: | ||
| 549 | args->osd_keepalive_timeout = intval; | ||
| 550 | break; | ||
| 551 | case Opt_osd_idle_ttl: | ||
| 552 | args->osd_idle_ttl = intval; | ||
| 553 | break; | ||
| 554 | case Opt_mount_timeout: | ||
| 555 | args->mount_timeout = intval; | ||
| 556 | break; | ||
| 557 | case Opt_caps_wanted_delay_min: | ||
| 558 | args->caps_wanted_delay_min = intval; | ||
| 559 | break; | ||
| 560 | case Opt_caps_wanted_delay_max: | ||
| 561 | args->caps_wanted_delay_max = intval; | ||
| 562 | break; | ||
| 563 | case Opt_readdir_max_entries: | ||
| 564 | args->max_readdir = intval; | ||
| 565 | break; | ||
| 566 | case Opt_readdir_max_bytes: | ||
| 567 | args->max_readdir_bytes = intval; | ||
| 568 | break; | ||
| 569 | case Opt_congestion_kb: | ||
| 570 | args->congestion_kb = intval; | ||
| 571 | break; | ||
| 572 | |||
| 573 | case Opt_noshare: | ||
| 574 | args->flags |= CEPH_OPT_NOSHARE; | ||
| 575 | break; | ||
| 576 | |||
| 577 | case Opt_dirstat: | ||
| 578 | args->flags |= CEPH_OPT_DIRSTAT; | ||
| 579 | break; | ||
| 580 | case Opt_nodirstat: | ||
| 581 | args->flags &= ~CEPH_OPT_DIRSTAT; | ||
| 582 | break; | ||
| 583 | case Opt_rbytes: | ||
| 584 | args->flags |= CEPH_OPT_RBYTES; | ||
| 585 | break; | ||
| 586 | case Opt_norbytes: | ||
| 587 | args->flags &= ~CEPH_OPT_RBYTES; | ||
| 588 | break; | ||
| 589 | case Opt_nocrc: | ||
| 590 | args->flags |= CEPH_OPT_NOCRC; | ||
| 591 | break; | ||
| 592 | case Opt_noasyncreaddir: | ||
| 593 | args->flags |= CEPH_OPT_NOASYNCREADDIR; | ||
| 594 | break; | ||
| 595 | |||
| 596 | default: | ||
| 597 | BUG_ON(token); | ||
| 598 | } | ||
| 599 | } | ||
| 600 | return args; | ||
| 601 | 323 | ||
| 602 | out: | 324 | out: |
| 603 | kfree(args->mon_addr); | 325 | destroy_mount_options(fsopt); |
| 604 | kfree(args); | 326 | return err; |
| 605 | return ERR_PTR(err); | ||
| 606 | } | 327 | } |
| 607 | 328 | ||
| 608 | static void destroy_mount_args(struct ceph_mount_args *args) | 329 | /** |
| 330 | * ceph_show_options - Show mount options in /proc/mounts | ||
| 331 | * @m: seq_file to write to | ||
| 332 | * @mnt: mount descriptor | ||
| 333 | */ | ||
| 334 | static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
| 609 | { | 335 | { |
| 610 | dout("destroy_mount_args %p\n", args); | 336 | struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb); |
| 611 | kfree(args->snapdir_name); | 337 | struct ceph_mount_options *fsopt = fsc->mount_options; |
| 612 | args->snapdir_name = NULL; | 338 | struct ceph_options *opt = fsc->client->options; |
| 613 | kfree(args->name); | 339 | |
| 614 | args->name = NULL; | 340 | if (opt->flags & CEPH_OPT_FSID) |
| 615 | kfree(args->secret); | 341 | seq_printf(m, ",fsid=%pU", &opt->fsid); |
| 616 | args->secret = NULL; | 342 | if (opt->flags & CEPH_OPT_NOSHARE) |
| 617 | kfree(args); | 343 | seq_puts(m, ",noshare"); |
| 344 | if (opt->flags & CEPH_OPT_NOCRC) | ||
| 345 | seq_puts(m, ",nocrc"); | ||
| 346 | |||
| 347 | if (opt->name) | ||
| 348 | seq_printf(m, ",name=%s", opt->name); | ||
| 349 | if (opt->secret) | ||
| 350 | seq_puts(m, ",secret=<hidden>"); | ||
| 351 | |||
| 352 | if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
| 353 | seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); | ||
| 354 | if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
| 355 | seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); | ||
| 356 | if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
| 357 | seq_printf(m, ",osdtimeout=%d", opt->osd_timeout); | ||
| 358 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
| 359 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
| 360 | opt->osd_keepalive_timeout); | ||
| 361 | |||
| 362 | if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) | ||
| 363 | seq_puts(m, ",dirstat"); | ||
| 364 | if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0) | ||
| 365 | seq_puts(m, ",norbytes"); | ||
| 366 | if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) | ||
| 367 | seq_puts(m, ",noasyncreaddir"); | ||
| 368 | |||
| 369 | if (fsopt->wsize) | ||
| 370 | seq_printf(m, ",wsize=%d", fsopt->wsize); | ||
| 371 | if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
| 372 | seq_printf(m, ",rsize=%d", fsopt->rsize); | ||
| 373 | if (fsopt->congestion_kb != default_congestion_kb()) | ||
| 374 | seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); | ||
| 375 | if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
| 376 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
| 377 | fsopt->caps_wanted_delay_min); | ||
| 378 | if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
| 379 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
| 380 | fsopt->caps_wanted_delay_max); | ||
| 381 | if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
| 382 | seq_printf(m, ",cap_release_safety=%d", | ||
| 383 | fsopt->cap_release_safety); | ||
| 384 | if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
| 385 | seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); | ||
| 386 | if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
| 387 | seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); | ||
| 388 | if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | ||
| 389 | seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); | ||
| 390 | return 0; | ||
| 618 | } | 391 | } |
| 619 | 392 | ||
| 620 | /* | 393 | /* |
| 621 | * create a fresh client instance | 394 | * handle any mon messages the standard library doesn't understand. |
| 395 | * return error if we don't either. | ||
| 622 | */ | 396 | */ |
| 623 | static struct ceph_client *ceph_create_client(struct ceph_mount_args *args) | 397 | static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) |
| 624 | { | 398 | { |
| 625 | struct ceph_client *client; | 399 | struct ceph_fs_client *fsc = client->private; |
| 400 | int type = le16_to_cpu(msg->hdr.type); | ||
| 401 | |||
| 402 | switch (type) { | ||
| 403 | case CEPH_MSG_MDS_MAP: | ||
| 404 | ceph_mdsc_handle_map(fsc->mdsc, msg); | ||
| 405 | return 0; | ||
| 406 | |||
| 407 | default: | ||
| 408 | return -1; | ||
| 409 | } | ||
| 410 | } | ||
| 411 | |||
| 412 | /* | ||
| 413 | * create a new fs client | ||
| 414 | */ | ||
| 415 | struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, | ||
| 416 | struct ceph_options *opt) | ||
| 417 | { | ||
| 418 | struct ceph_fs_client *fsc; | ||
| 626 | int err = -ENOMEM; | 419 | int err = -ENOMEM; |
| 627 | 420 | ||
| 628 | client = kzalloc(sizeof(*client), GFP_KERNEL); | 421 | fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); |
| 629 | if (client == NULL) | 422 | if (!fsc) |
| 630 | return ERR_PTR(-ENOMEM); | 423 | return ERR_PTR(-ENOMEM); |
| 631 | 424 | ||
| 632 | mutex_init(&client->mount_mutex); | 425 | fsc->client = ceph_create_client(opt, fsc); |
| 633 | 426 | if (IS_ERR(fsc->client)) { | |
| 634 | init_waitqueue_head(&client->auth_wq); | 427 | err = PTR_ERR(fsc->client); |
| 428 | goto fail; | ||
| 429 | } | ||
| 430 | fsc->client->extra_mon_dispatch = extra_mon_dispatch; | ||
| 431 | fsc->client->supported_features |= CEPH_FEATURE_FLOCK; | ||
| 432 | fsc->client->monc.want_mdsmap = 1; | ||
| 635 | 433 | ||
| 636 | client->sb = NULL; | 434 | fsc->mount_options = fsopt; |
| 637 | client->mount_state = CEPH_MOUNT_MOUNTING; | ||
| 638 | client->mount_args = args; | ||
| 639 | 435 | ||
| 640 | client->msgr = NULL; | 436 | fsc->sb = NULL; |
| 437 | fsc->mount_state = CEPH_MOUNT_MOUNTING; | ||
| 641 | 438 | ||
| 642 | client->auth_err = 0; | 439 | atomic_long_set(&fsc->writeback_count, 0); |
| 643 | atomic_long_set(&client->writeback_count, 0); | ||
| 644 | 440 | ||
| 645 | err = bdi_init(&client->backing_dev_info); | 441 | err = bdi_init(&fsc->backing_dev_info); |
| 646 | if (err < 0) | 442 | if (err < 0) |
| 647 | goto fail; | 443 | goto fail_client; |
| 648 | 444 | ||
| 649 | err = -ENOMEM; | 445 | err = -ENOMEM; |
| 650 | client->wb_wq = create_workqueue("ceph-writeback"); | 446 | fsc->wb_wq = create_workqueue("ceph-writeback"); |
| 651 | if (client->wb_wq == NULL) | 447 | if (fsc->wb_wq == NULL) |
| 652 | goto fail_bdi; | 448 | goto fail_bdi; |
| 653 | client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); | 449 | fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); |
| 654 | if (client->pg_inv_wq == NULL) | 450 | if (fsc->pg_inv_wq == NULL) |
| 655 | goto fail_wb_wq; | 451 | goto fail_wb_wq; |
| 656 | client->trunc_wq = create_singlethread_workqueue("ceph-trunc"); | 452 | fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc"); |
| 657 | if (client->trunc_wq == NULL) | 453 | if (fsc->trunc_wq == NULL) |
| 658 | goto fail_pg_inv_wq; | 454 | goto fail_pg_inv_wq; |
| 659 | 455 | ||
| 660 | /* set up mempools */ | 456 | /* set up mempools */ |
| 661 | err = -ENOMEM; | 457 | err = -ENOMEM; |
| 662 | client->wb_pagevec_pool = mempool_create_kmalloc_pool(10, | 458 | fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, |
| 663 | client->mount_args->wsize >> PAGE_CACHE_SHIFT); | 459 | fsc->mount_options->wsize >> PAGE_CACHE_SHIFT); |
| 664 | if (!client->wb_pagevec_pool) | 460 | if (!fsc->wb_pagevec_pool) |
| 665 | goto fail_trunc_wq; | 461 | goto fail_trunc_wq; |
| 666 | 462 | ||
| 667 | /* caps */ | 463 | /* caps */ |
| 668 | client->min_caps = args->max_readdir; | 464 | fsc->min_caps = fsopt->max_readdir; |
| 465 | |||
| 466 | return fsc; | ||
| 669 | 467 | ||
| 670 | /* subsystems */ | ||
| 671 | err = ceph_monc_init(&client->monc, client); | ||
| 672 | if (err < 0) | ||
| 673 | goto fail_mempool; | ||
| 674 | err = ceph_osdc_init(&client->osdc, client); | ||
| 675 | if (err < 0) | ||
| 676 | goto fail_monc; | ||
| 677 | err = ceph_mdsc_init(&client->mdsc, client); | ||
| 678 | if (err < 0) | ||
| 679 | goto fail_osdc; | ||
| 680 | return client; | ||
| 681 | |||
| 682 | fail_osdc: | ||
| 683 | ceph_osdc_stop(&client->osdc); | ||
| 684 | fail_monc: | ||
| 685 | ceph_monc_stop(&client->monc); | ||
| 686 | fail_mempool: | ||
| 687 | mempool_destroy(client->wb_pagevec_pool); | ||
| 688 | fail_trunc_wq: | 468 | fail_trunc_wq: |
| 689 | destroy_workqueue(client->trunc_wq); | 469 | destroy_workqueue(fsc->trunc_wq); |
| 690 | fail_pg_inv_wq: | 470 | fail_pg_inv_wq: |
| 691 | destroy_workqueue(client->pg_inv_wq); | 471 | destroy_workqueue(fsc->pg_inv_wq); |
| 692 | fail_wb_wq: | 472 | fail_wb_wq: |
| 693 | destroy_workqueue(client->wb_wq); | 473 | destroy_workqueue(fsc->wb_wq); |
| 694 | fail_bdi: | 474 | fail_bdi: |
| 695 | bdi_destroy(&client->backing_dev_info); | 475 | bdi_destroy(&fsc->backing_dev_info); |
| 476 | fail_client: | ||
| 477 | ceph_destroy_client(fsc->client); | ||
| 696 | fail: | 478 | fail: |
| 697 | kfree(client); | 479 | kfree(fsc); |
| 698 | return ERR_PTR(err); | 480 | return ERR_PTR(err); |
| 699 | } | 481 | } |
| 700 | 482 | ||
| 701 | static void ceph_destroy_client(struct ceph_client *client) | 483 | void destroy_fs_client(struct ceph_fs_client *fsc) |
| 702 | { | 484 | { |
| 703 | dout("destroy_client %p\n", client); | 485 | dout("destroy_fs_client %p\n", fsc); |
| 704 | 486 | ||
| 705 | /* unmount */ | 487 | destroy_workqueue(fsc->wb_wq); |
| 706 | ceph_mdsc_stop(&client->mdsc); | 488 | destroy_workqueue(fsc->pg_inv_wq); |
| 707 | ceph_osdc_stop(&client->osdc); | 489 | destroy_workqueue(fsc->trunc_wq); |
| 708 | 490 | ||
| 709 | /* | 491 | bdi_destroy(&fsc->backing_dev_info); |
| 710 | * make sure mds and osd connections close out before destroying | ||
| 711 | * the auth module, which is needed to free those connections' | ||
| 712 | * ceph_authorizers. | ||
| 713 | */ | ||
| 714 | ceph_msgr_flush(); | ||
| 715 | |||
| 716 | ceph_monc_stop(&client->monc); | ||
| 717 | 492 | ||
| 718 | ceph_debugfs_client_cleanup(client); | 493 | mempool_destroy(fsc->wb_pagevec_pool); |
| 719 | destroy_workqueue(client->wb_wq); | ||
| 720 | destroy_workqueue(client->pg_inv_wq); | ||
| 721 | destroy_workqueue(client->trunc_wq); | ||
| 722 | 494 | ||
| 723 | bdi_destroy(&client->backing_dev_info); | 495 | destroy_mount_options(fsc->mount_options); |
| 724 | 496 | ||
| 725 | if (client->msgr) | 497 | ceph_fs_debugfs_cleanup(fsc); |
| 726 | ceph_messenger_destroy(client->msgr); | ||
| 727 | mempool_destroy(client->wb_pagevec_pool); | ||
| 728 | 498 | ||
| 729 | destroy_mount_args(client->mount_args); | 499 | ceph_destroy_client(fsc->client); |
| 730 | 500 | ||
| 731 | kfree(client); | 501 | kfree(fsc); |
| 732 | dout("destroy_client %p done\n", client); | 502 | dout("destroy_fs_client %p done\n", fsc); |
| 733 | } | 503 | } |
| 734 | 504 | ||
| 735 | /* | 505 | /* |
| 736 | * Initially learn our fsid, or verify an fsid matches. | 506 | * caches |
| 737 | */ | 507 | */ |
| 738 | int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | 508 | struct kmem_cache *ceph_inode_cachep; |
| 509 | struct kmem_cache *ceph_cap_cachep; | ||
| 510 | struct kmem_cache *ceph_dentry_cachep; | ||
| 511 | struct kmem_cache *ceph_file_cachep; | ||
| 512 | |||
| 513 | static void ceph_inode_init_once(void *foo) | ||
| 739 | { | 514 | { |
| 740 | if (client->have_fsid) { | 515 | struct ceph_inode_info *ci = foo; |
| 741 | if (ceph_fsid_compare(&client->fsid, fsid)) { | 516 | inode_init_once(&ci->vfs_inode); |
| 742 | pr_err("bad fsid, had %pU got %pU", | 517 | } |
| 743 | &client->fsid, fsid); | 518 | |
| 744 | return -1; | 519 | static int __init init_caches(void) |
| 745 | } | 520 | { |
| 746 | } else { | 521 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
| 747 | pr_info("client%lld fsid %pU\n", client->monc.auth->global_id, | 522 | sizeof(struct ceph_inode_info), |
| 748 | fsid); | 523 | __alignof__(struct ceph_inode_info), |
| 749 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | 524 | (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), |
| 750 | ceph_debugfs_client_init(client); | 525 | ceph_inode_init_once); |
| 751 | client->have_fsid = true; | 526 | if (ceph_inode_cachep == NULL) |
| 752 | } | 527 | return -ENOMEM; |
| 528 | |||
| 529 | ceph_cap_cachep = KMEM_CACHE(ceph_cap, | ||
| 530 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 531 | if (ceph_cap_cachep == NULL) | ||
| 532 | goto bad_cap; | ||
| 533 | |||
| 534 | ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, | ||
| 535 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 536 | if (ceph_dentry_cachep == NULL) | ||
| 537 | goto bad_dentry; | ||
| 538 | |||
| 539 | ceph_file_cachep = KMEM_CACHE(ceph_file_info, | ||
| 540 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 541 | if (ceph_file_cachep == NULL) | ||
| 542 | goto bad_file; | ||
| 543 | |||
| 753 | return 0; | 544 | return 0; |
| 545 | |||
| 546 | bad_file: | ||
| 547 | kmem_cache_destroy(ceph_dentry_cachep); | ||
| 548 | bad_dentry: | ||
| 549 | kmem_cache_destroy(ceph_cap_cachep); | ||
| 550 | bad_cap: | ||
| 551 | kmem_cache_destroy(ceph_inode_cachep); | ||
| 552 | return -ENOMEM; | ||
| 754 | } | 553 | } |
| 755 | 554 | ||
| 555 | static void destroy_caches(void) | ||
| 556 | { | ||
| 557 | kmem_cache_destroy(ceph_inode_cachep); | ||
| 558 | kmem_cache_destroy(ceph_cap_cachep); | ||
| 559 | kmem_cache_destroy(ceph_dentry_cachep); | ||
| 560 | kmem_cache_destroy(ceph_file_cachep); | ||
| 561 | } | ||
| 562 | |||
| 563 | |||
| 756 | /* | 564 | /* |
| 757 | * true if we have the mon map (and have thus joined the cluster) | 565 | * ceph_umount_begin - initiate forced umount. Tear down down the |
| 566 | * mount, skipping steps that may hang while waiting for server(s). | ||
| 758 | */ | 567 | */ |
| 759 | static int have_mon_and_osd_map(struct ceph_client *client) | 568 | static void ceph_umount_begin(struct super_block *sb) |
| 760 | { | 569 | { |
| 761 | return client->monc.monmap && client->monc.monmap->epoch && | 570 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
| 762 | client->osdc.osdmap && client->osdc.osdmap->epoch; | 571 | |
| 572 | dout("ceph_umount_begin - starting forced umount\n"); | ||
| 573 | if (!fsc) | ||
| 574 | return; | ||
| 575 | fsc->mount_state = CEPH_MOUNT_SHUTDOWN; | ||
| 576 | return; | ||
| 763 | } | 577 | } |
| 764 | 578 | ||
| 579 | static const struct super_operations ceph_super_ops = { | ||
| 580 | .alloc_inode = ceph_alloc_inode, | ||
| 581 | .destroy_inode = ceph_destroy_inode, | ||
| 582 | .write_inode = ceph_write_inode, | ||
| 583 | .sync_fs = ceph_sync_fs, | ||
| 584 | .put_super = ceph_put_super, | ||
| 585 | .show_options = ceph_show_options, | ||
| 586 | .statfs = ceph_statfs, | ||
| 587 | .umount_begin = ceph_umount_begin, | ||
| 588 | }; | ||
| 589 | |||
| 765 | /* | 590 | /* |
| 766 | * Bootstrap mount by opening the root directory. Note the mount | 591 | * Bootstrap mount by opening the root directory. Note the mount |
| 767 | * @started time from caller, and time out if this takes too long. | 592 | * @started time from caller, and time out if this takes too long. |
| 768 | */ | 593 | */ |
| 769 | static struct dentry *open_root_dentry(struct ceph_client *client, | 594 | static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, |
| 770 | const char *path, | 595 | const char *path, |
| 771 | unsigned long started) | 596 | unsigned long started) |
| 772 | { | 597 | { |
| 773 | struct ceph_mds_client *mdsc = &client->mdsc; | 598 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 774 | struct ceph_mds_request *req = NULL; | 599 | struct ceph_mds_request *req = NULL; |
| 775 | int err; | 600 | int err; |
| 776 | struct dentry *root; | 601 | struct dentry *root; |
| @@ -784,14 +609,14 @@ static struct dentry *open_root_dentry(struct ceph_client *client, | |||
| 784 | req->r_ino1.ino = CEPH_INO_ROOT; | 609 | req->r_ino1.ino = CEPH_INO_ROOT; |
| 785 | req->r_ino1.snap = CEPH_NOSNAP; | 610 | req->r_ino1.snap = CEPH_NOSNAP; |
| 786 | req->r_started = started; | 611 | req->r_started = started; |
| 787 | req->r_timeout = client->mount_args->mount_timeout * HZ; | 612 | req->r_timeout = fsc->client->options->mount_timeout * HZ; |
| 788 | req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); | 613 | req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); |
| 789 | req->r_num_caps = 2; | 614 | req->r_num_caps = 2; |
| 790 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 615 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
| 791 | if (err == 0) { | 616 | if (err == 0) { |
| 792 | dout("open_root_inode success\n"); | 617 | dout("open_root_inode success\n"); |
| 793 | if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && | 618 | if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && |
| 794 | client->sb->s_root == NULL) | 619 | fsc->sb->s_root == NULL) |
| 795 | root = d_alloc_root(req->r_target_inode); | 620 | root = d_alloc_root(req->r_target_inode); |
| 796 | else | 621 | else |
| 797 | root = d_obtain_alias(req->r_target_inode); | 622 | root = d_obtain_alias(req->r_target_inode); |
| @@ -804,105 +629,86 @@ static struct dentry *open_root_dentry(struct ceph_client *client, | |||
| 804 | return root; | 629 | return root; |
| 805 | } | 630 | } |
| 806 | 631 | ||
| 632 | |||
| 633 | |||
| 634 | |||
| 807 | /* | 635 | /* |
| 808 | * mount: join the ceph cluster, and open root directory. | 636 | * mount: join the ceph cluster, and open root directory. |
| 809 | */ | 637 | */ |
| 810 | static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | 638 | static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt, |
| 811 | const char *path) | 639 | const char *path) |
| 812 | { | 640 | { |
| 813 | struct ceph_entity_addr *myaddr = NULL; | ||
| 814 | int err; | 641 | int err; |
| 815 | unsigned long timeout = client->mount_args->mount_timeout * HZ; | ||
| 816 | unsigned long started = jiffies; /* note the start time */ | 642 | unsigned long started = jiffies; /* note the start time */ |
| 817 | struct dentry *root; | 643 | struct dentry *root; |
| 644 | int first = 0; /* first vfsmount for this super_block */ | ||
| 818 | 645 | ||
| 819 | dout("mount start\n"); | 646 | dout("mount start\n"); |
| 820 | mutex_lock(&client->mount_mutex); | 647 | mutex_lock(&fsc->client->mount_mutex); |
| 821 | |||
| 822 | /* initialize the messenger */ | ||
| 823 | if (client->msgr == NULL) { | ||
| 824 | if (ceph_test_opt(client, MYIP)) | ||
| 825 | myaddr = &client->mount_args->my_addr; | ||
| 826 | client->msgr = ceph_messenger_create(myaddr); | ||
| 827 | if (IS_ERR(client->msgr)) { | ||
| 828 | err = PTR_ERR(client->msgr); | ||
| 829 | client->msgr = NULL; | ||
| 830 | goto out; | ||
| 831 | } | ||
| 832 | client->msgr->nocrc = ceph_test_opt(client, NOCRC); | ||
| 833 | } | ||
| 834 | 648 | ||
| 835 | /* open session, and wait for mon, mds, and osd maps */ | 649 | err = __ceph_open_session(fsc->client, started); |
| 836 | err = ceph_monc_open_session(&client->monc); | ||
| 837 | if (err < 0) | 650 | if (err < 0) |
| 838 | goto out; | 651 | goto out; |
| 839 | 652 | ||
| 840 | while (!have_mon_and_osd_map(client)) { | ||
| 841 | err = -EIO; | ||
| 842 | if (timeout && time_after_eq(jiffies, started + timeout)) | ||
| 843 | goto out; | ||
| 844 | |||
| 845 | /* wait */ | ||
| 846 | dout("mount waiting for mon_map\n"); | ||
| 847 | err = wait_event_interruptible_timeout(client->auth_wq, | ||
| 848 | have_mon_and_osd_map(client) || (client->auth_err < 0), | ||
| 849 | timeout); | ||
| 850 | if (err == -EINTR || err == -ERESTARTSYS) | ||
| 851 | goto out; | ||
| 852 | if (client->auth_err < 0) { | ||
| 853 | err = client->auth_err; | ||
| 854 | goto out; | ||
| 855 | } | ||
| 856 | } | ||
| 857 | |||
| 858 | dout("mount opening root\n"); | 653 | dout("mount opening root\n"); |
| 859 | root = open_root_dentry(client, "", started); | 654 | root = open_root_dentry(fsc, "", started); |
| 860 | if (IS_ERR(root)) { | 655 | if (IS_ERR(root)) { |
| 861 | err = PTR_ERR(root); | 656 | err = PTR_ERR(root); |
| 862 | goto out; | 657 | goto out; |
| 863 | } | 658 | } |
| 864 | if (client->sb->s_root) | 659 | if (fsc->sb->s_root) { |
| 865 | dput(root); | 660 | dput(root); |
| 866 | else | 661 | } else { |
| 867 | client->sb->s_root = root; | 662 | fsc->sb->s_root = root; |
| 663 | first = 1; | ||
| 664 | |||
| 665 | err = ceph_fs_debugfs_init(fsc); | ||
| 666 | if (err < 0) | ||
| 667 | goto fail; | ||
| 668 | } | ||
| 868 | 669 | ||
| 869 | if (path[0] == 0) { | 670 | if (path[0] == 0) { |
| 870 | dget(root); | 671 | dget(root); |
| 871 | } else { | 672 | } else { |
| 872 | dout("mount opening base mountpoint\n"); | 673 | dout("mount opening base mountpoint\n"); |
| 873 | root = open_root_dentry(client, path, started); | 674 | root = open_root_dentry(fsc, path, started); |
| 874 | if (IS_ERR(root)) { | 675 | if (IS_ERR(root)) { |
| 875 | err = PTR_ERR(root); | 676 | err = PTR_ERR(root); |
| 876 | dput(client->sb->s_root); | 677 | goto fail; |
| 877 | client->sb->s_root = NULL; | ||
| 878 | goto out; | ||
| 879 | } | 678 | } |
| 880 | } | 679 | } |
| 881 | 680 | ||
| 882 | mnt->mnt_root = root; | 681 | mnt->mnt_root = root; |
| 883 | mnt->mnt_sb = client->sb; | 682 | mnt->mnt_sb = fsc->sb; |
| 884 | 683 | ||
| 885 | client->mount_state = CEPH_MOUNT_MOUNTED; | 684 | fsc->mount_state = CEPH_MOUNT_MOUNTED; |
| 886 | dout("mount success\n"); | 685 | dout("mount success\n"); |
| 887 | err = 0; | 686 | err = 0; |
| 888 | 687 | ||
| 889 | out: | 688 | out: |
| 890 | mutex_unlock(&client->mount_mutex); | 689 | mutex_unlock(&fsc->client->mount_mutex); |
| 891 | return err; | 690 | return err; |
| 691 | |||
| 692 | fail: | ||
| 693 | if (first) { | ||
| 694 | dput(fsc->sb->s_root); | ||
| 695 | fsc->sb->s_root = NULL; | ||
| 696 | } | ||
| 697 | goto out; | ||
| 892 | } | 698 | } |
| 893 | 699 | ||
| 894 | static int ceph_set_super(struct super_block *s, void *data) | 700 | static int ceph_set_super(struct super_block *s, void *data) |
| 895 | { | 701 | { |
| 896 | struct ceph_client *client = data; | 702 | struct ceph_fs_client *fsc = data; |
| 897 | int ret; | 703 | int ret; |
| 898 | 704 | ||
| 899 | dout("set_super %p data %p\n", s, data); | 705 | dout("set_super %p data %p\n", s, data); |
| 900 | 706 | ||
| 901 | s->s_flags = client->mount_args->sb_flags; | 707 | s->s_flags = fsc->mount_options->sb_flags; |
| 902 | s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ | 708 | s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ |
| 903 | 709 | ||
| 904 | s->s_fs_info = client; | 710 | s->s_fs_info = fsc; |
| 905 | client->sb = s; | 711 | fsc->sb = s; |
| 906 | 712 | ||
| 907 | s->s_op = &ceph_super_ops; | 713 | s->s_op = &ceph_super_ops; |
| 908 | s->s_export_op = &ceph_export_ops; | 714 | s->s_export_op = &ceph_export_ops; |
| @@ -917,7 +723,7 @@ static int ceph_set_super(struct super_block *s, void *data) | |||
| 917 | 723 | ||
| 918 | fail: | 724 | fail: |
| 919 | s->s_fs_info = NULL; | 725 | s->s_fs_info = NULL; |
| 920 | client->sb = NULL; | 726 | fsc->sb = NULL; |
| 921 | return ret; | 727 | return ret; |
| 922 | } | 728 | } |
| 923 | 729 | ||
| @@ -926,30 +732,23 @@ fail: | |||
| 926 | */ | 732 | */ |
| 927 | static int ceph_compare_super(struct super_block *sb, void *data) | 733 | static int ceph_compare_super(struct super_block *sb, void *data) |
| 928 | { | 734 | { |
| 929 | struct ceph_client *new = data; | 735 | struct ceph_fs_client *new = data; |
| 930 | struct ceph_mount_args *args = new->mount_args; | 736 | struct ceph_mount_options *fsopt = new->mount_options; |
| 931 | struct ceph_client *other = ceph_sb_to_client(sb); | 737 | struct ceph_options *opt = new->client->options; |
| 932 | int i; | 738 | struct ceph_fs_client *other = ceph_sb_to_client(sb); |
| 933 | 739 | ||
| 934 | dout("ceph_compare_super %p\n", sb); | 740 | dout("ceph_compare_super %p\n", sb); |
| 935 | if (args->flags & CEPH_OPT_FSID) { | 741 | |
| 936 | if (ceph_fsid_compare(&args->fsid, &other->fsid)) { | 742 | if (compare_mount_options(fsopt, opt, other)) { |
| 937 | dout("fsid doesn't match\n"); | 743 | dout("monitor(s)/mount options don't match\n"); |
| 938 | return 0; | 744 | return 0; |
| 939 | } | ||
| 940 | } else { | ||
| 941 | /* do we share (a) monitor? */ | ||
| 942 | for (i = 0; i < new->monc.monmap->num_mon; i++) | ||
| 943 | if (ceph_monmap_contains(other->monc.monmap, | ||
| 944 | &new->monc.monmap->mon_inst[i].addr)) | ||
| 945 | break; | ||
| 946 | if (i == new->monc.monmap->num_mon) { | ||
| 947 | dout("mon ip not part of monmap\n"); | ||
| 948 | return 0; | ||
| 949 | } | ||
| 950 | dout("mon ip matches existing sb %p\n", sb); | ||
| 951 | } | 745 | } |
| 952 | if (args->sb_flags != other->mount_args->sb_flags) { | 746 | if ((opt->flags & CEPH_OPT_FSID) && |
| 747 | ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { | ||
| 748 | dout("fsid doesn't match\n"); | ||
| 749 | return 0; | ||
| 750 | } | ||
| 751 | if (fsopt->sb_flags != other->mount_options->sb_flags) { | ||
| 953 | dout("flags differ\n"); | 752 | dout("flags differ\n"); |
| 954 | return 0; | 753 | return 0; |
| 955 | } | 754 | } |
| @@ -961,19 +760,20 @@ static int ceph_compare_super(struct super_block *sb, void *data) | |||
| 961 | */ | 760 | */ |
| 962 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); | 761 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); |
| 963 | 762 | ||
| 964 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | 763 | static int ceph_register_bdi(struct super_block *sb, |
| 764 | struct ceph_fs_client *fsc) | ||
| 965 | { | 765 | { |
| 966 | int err; | 766 | int err; |
| 967 | 767 | ||
| 968 | /* set ra_pages based on rsize mount option? */ | 768 | /* set ra_pages based on rsize mount option? */ |
| 969 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) | 769 | if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE) |
| 970 | client->backing_dev_info.ra_pages = | 770 | fsc->backing_dev_info.ra_pages = |
| 971 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 771 | (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1) |
| 972 | >> PAGE_SHIFT; | 772 | >> PAGE_SHIFT; |
| 973 | err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", | 773 | err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", |
| 974 | atomic_long_inc_return(&bdi_seq)); | 774 | atomic_long_inc_return(&bdi_seq)); |
| 975 | if (!err) | 775 | if (!err) |
| 976 | sb->s_bdi = &client->backing_dev_info; | 776 | sb->s_bdi = &fsc->backing_dev_info; |
| 977 | return err; | 777 | return err; |
| 978 | } | 778 | } |
| 979 | 779 | ||
| @@ -982,46 +782,52 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
| 982 | struct vfsmount *mnt) | 782 | struct vfsmount *mnt) |
| 983 | { | 783 | { |
| 984 | struct super_block *sb; | 784 | struct super_block *sb; |
| 985 | struct ceph_client *client; | 785 | struct ceph_fs_client *fsc; |
| 986 | int err; | 786 | int err; |
| 987 | int (*compare_super)(struct super_block *, void *) = ceph_compare_super; | 787 | int (*compare_super)(struct super_block *, void *) = ceph_compare_super; |
| 988 | const char *path = NULL; | 788 | const char *path = NULL; |
| 989 | struct ceph_mount_args *args; | 789 | struct ceph_mount_options *fsopt = NULL; |
| 790 | struct ceph_options *opt = NULL; | ||
| 990 | 791 | ||
| 991 | dout("ceph_get_sb\n"); | 792 | dout("ceph_get_sb\n"); |
| 992 | args = parse_mount_args(flags, data, dev_name, &path); | 793 | err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path); |
| 993 | if (IS_ERR(args)) { | 794 | if (err < 0) |
| 994 | err = PTR_ERR(args); | ||
| 995 | goto out_final; | 795 | goto out_final; |
| 996 | } | ||
| 997 | 796 | ||
| 998 | /* create client (which we may/may not use) */ | 797 | /* create client (which we may/may not use) */ |
| 999 | client = ceph_create_client(args); | 798 | fsc = create_fs_client(fsopt, opt); |
| 1000 | if (IS_ERR(client)) { | 799 | if (IS_ERR(fsc)) { |
| 1001 | err = PTR_ERR(client); | 800 | err = PTR_ERR(fsc); |
| 801 | kfree(fsopt); | ||
| 802 | kfree(opt); | ||
| 1002 | goto out_final; | 803 | goto out_final; |
| 1003 | } | 804 | } |
| 1004 | 805 | ||
| 1005 | if (client->mount_args->flags & CEPH_OPT_NOSHARE) | 806 | err = ceph_mdsc_init(fsc); |
| 807 | if (err < 0) | ||
| 808 | goto out; | ||
| 809 | |||
| 810 | if (ceph_test_opt(fsc->client, NOSHARE)) | ||
| 1006 | compare_super = NULL; | 811 | compare_super = NULL; |
| 1007 | sb = sget(fs_type, compare_super, ceph_set_super, client); | 812 | sb = sget(fs_type, compare_super, ceph_set_super, fsc); |
| 1008 | if (IS_ERR(sb)) { | 813 | if (IS_ERR(sb)) { |
| 1009 | err = PTR_ERR(sb); | 814 | err = PTR_ERR(sb); |
| 1010 | goto out; | 815 | goto out; |
| 1011 | } | 816 | } |
| 1012 | 817 | ||
| 1013 | if (ceph_sb_to_client(sb) != client) { | 818 | if (ceph_sb_to_client(sb) != fsc) { |
| 1014 | ceph_destroy_client(client); | 819 | ceph_mdsc_destroy(fsc); |
| 1015 | client = ceph_sb_to_client(sb); | 820 | destroy_fs_client(fsc); |
| 1016 | dout("get_sb got existing client %p\n", client); | 821 | fsc = ceph_sb_to_client(sb); |
| 822 | dout("get_sb got existing client %p\n", fsc); | ||
| 1017 | } else { | 823 | } else { |
| 1018 | dout("get_sb using new client %p\n", client); | 824 | dout("get_sb using new client %p\n", fsc); |
| 1019 | err = ceph_register_bdi(sb, client); | 825 | err = ceph_register_bdi(sb, fsc); |
| 1020 | if (err < 0) | 826 | if (err < 0) |
| 1021 | goto out_splat; | 827 | goto out_splat; |
| 1022 | } | 828 | } |
| 1023 | 829 | ||
| 1024 | err = ceph_mount(client, mnt, path); | 830 | err = ceph_mount(fsc, mnt, path); |
| 1025 | if (err < 0) | 831 | if (err < 0) |
| 1026 | goto out_splat; | 832 | goto out_splat; |
| 1027 | dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, | 833 | dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, |
| @@ -1029,12 +835,13 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
| 1029 | return 0; | 835 | return 0; |
| 1030 | 836 | ||
| 1031 | out_splat: | 837 | out_splat: |
| 1032 | ceph_mdsc_close_sessions(&client->mdsc); | 838 | ceph_mdsc_close_sessions(fsc->mdsc); |
| 1033 | deactivate_locked_super(sb); | 839 | deactivate_locked_super(sb); |
| 1034 | goto out_final; | 840 | goto out_final; |
| 1035 | 841 | ||
| 1036 | out: | 842 | out: |
| 1037 | ceph_destroy_client(client); | 843 | ceph_mdsc_destroy(fsc); |
| 844 | destroy_fs_client(fsc); | ||
| 1038 | out_final: | 845 | out_final: |
| 1039 | dout("ceph_get_sb fail %d\n", err); | 846 | dout("ceph_get_sb fail %d\n", err); |
| 1040 | return err; | 847 | return err; |
| @@ -1042,11 +849,12 @@ out_final: | |||
| 1042 | 849 | ||
| 1043 | static void ceph_kill_sb(struct super_block *s) | 850 | static void ceph_kill_sb(struct super_block *s) |
| 1044 | { | 851 | { |
| 1045 | struct ceph_client *client = ceph_sb_to_client(s); | 852 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); |
| 1046 | dout("kill_sb %p\n", s); | 853 | dout("kill_sb %p\n", s); |
| 1047 | ceph_mdsc_pre_umount(&client->mdsc); | 854 | ceph_mdsc_pre_umount(fsc->mdsc); |
| 1048 | kill_anon_super(s); /* will call put_super after sb is r/o */ | 855 | kill_anon_super(s); /* will call put_super after sb is r/o */ |
| 1049 | ceph_destroy_client(client); | 856 | ceph_mdsc_destroy(fsc); |
| 857 | destroy_fs_client(fsc); | ||
| 1050 | } | 858 | } |
| 1051 | 859 | ||
| 1052 | static struct file_system_type ceph_fs_type = { | 860 | static struct file_system_type ceph_fs_type = { |
| @@ -1062,36 +870,20 @@ static struct file_system_type ceph_fs_type = { | |||
| 1062 | 870 | ||
| 1063 | static int __init init_ceph(void) | 871 | static int __init init_ceph(void) |
| 1064 | { | 872 | { |
| 1065 | int ret = 0; | 873 | int ret = init_caches(); |
| 1066 | |||
| 1067 | ret = ceph_debugfs_init(); | ||
| 1068 | if (ret < 0) | ||
| 1069 | goto out; | ||
| 1070 | |||
| 1071 | ret = ceph_msgr_init(); | ||
| 1072 | if (ret < 0) | ||
| 1073 | goto out_debugfs; | ||
| 1074 | |||
| 1075 | ret = init_caches(); | ||
| 1076 | if (ret) | 874 | if (ret) |
| 1077 | goto out_msgr; | 875 | goto out; |
| 1078 | 876 | ||
| 1079 | ret = register_filesystem(&ceph_fs_type); | 877 | ret = register_filesystem(&ceph_fs_type); |
| 1080 | if (ret) | 878 | if (ret) |
| 1081 | goto out_icache; | 879 | goto out_icache; |
| 1082 | 880 | ||
| 1083 | pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n", | 881 | pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); |
| 1084 | CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL, | 882 | |
| 1085 | CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, | ||
| 1086 | CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); | ||
| 1087 | return 0; | 883 | return 0; |
| 1088 | 884 | ||
| 1089 | out_icache: | 885 | out_icache: |
| 1090 | destroy_caches(); | 886 | destroy_caches(); |
| 1091 | out_msgr: | ||
| 1092 | ceph_msgr_exit(); | ||
| 1093 | out_debugfs: | ||
| 1094 | ceph_debugfs_cleanup(); | ||
| 1095 | out: | 887 | out: |
| 1096 | return ret; | 888 | return ret; |
| 1097 | } | 889 | } |
| @@ -1101,8 +893,6 @@ static void __exit exit_ceph(void) | |||
| 1101 | dout("exit_ceph\n"); | 893 | dout("exit_ceph\n"); |
| 1102 | unregister_filesystem(&ceph_fs_type); | 894 | unregister_filesystem(&ceph_fs_type); |
| 1103 | destroy_caches(); | 895 | destroy_caches(); |
| 1104 | ceph_msgr_exit(); | ||
| 1105 | ceph_debugfs_cleanup(); | ||
| 1106 | } | 896 | } |
| 1107 | 897 | ||
| 1108 | module_init(init_ceph); | 898 | module_init(init_ceph); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b87638e84c4b..1886294e12f7 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | #ifndef _FS_CEPH_SUPER_H | 1 | #ifndef _FS_CEPH_SUPER_H |
| 2 | #define _FS_CEPH_SUPER_H | 2 | #define _FS_CEPH_SUPER_H |
| 3 | 3 | ||
| 4 | #include "ceph_debug.h" | 4 | #include <linux/ceph/ceph_debug.h> |
| 5 | 5 | ||
| 6 | #include <asm/unaligned.h> | 6 | #include <asm/unaligned.h> |
| 7 | #include <linux/backing-dev.h> | 7 | #include <linux/backing-dev.h> |
| @@ -14,13 +14,7 @@ | |||
| 14 | #include <linux/writeback.h> | 14 | #include <linux/writeback.h> |
| 15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 16 | 16 | ||
| 17 | #include "types.h" | 17 | #include <linux/ceph/libceph.h> |
| 18 | #include "messenger.h" | ||
| 19 | #include "msgpool.h" | ||
| 20 | #include "mon_client.h" | ||
| 21 | #include "mds_client.h" | ||
| 22 | #include "osd_client.h" | ||
| 23 | #include "ceph_fs.h" | ||
| 24 | 18 | ||
| 25 | /* f_type in struct statfs */ | 19 | /* f_type in struct statfs */ |
| 26 | #define CEPH_SUPER_MAGIC 0x00c36400 | 20 | #define CEPH_SUPER_MAGIC 0x00c36400 |
| @@ -30,42 +24,25 @@ | |||
| 30 | #define CEPH_BLOCK_SHIFT 20 /* 1 MB */ | 24 | #define CEPH_BLOCK_SHIFT 20 /* 1 MB */ |
| 31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) | 25 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) |
| 32 | 26 | ||
| 33 | /* | 27 | #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ |
| 34 | * Supported features | 28 | #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ |
| 35 | */ | 29 | #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ |
| 36 | #define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK | ||
| 37 | #define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR | ||
| 38 | 30 | ||
| 39 | /* | 31 | #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) |
| 40 | * mount options | ||
| 41 | */ | ||
| 42 | #define CEPH_OPT_FSID (1<<0) | ||
| 43 | #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ | ||
| 44 | #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ | ||
| 45 | #define CEPH_OPT_DIRSTAT (1<<4) /* funky `cat dirname` for stats */ | ||
| 46 | #define CEPH_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ | ||
| 47 | #define CEPH_OPT_NOCRC (1<<6) /* no data crc on writes */ | ||
| 48 | #define CEPH_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ | ||
| 49 | 32 | ||
| 50 | #define CEPH_OPT_DEFAULT (CEPH_OPT_RBYTES) | 33 | #define ceph_set_mount_opt(fsc, opt) \ |
| 34 | (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; | ||
| 35 | #define ceph_test_mount_opt(fsc, opt) \ | ||
| 36 | (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) | ||
| 51 | 37 | ||
| 52 | #define ceph_set_opt(client, opt) \ | 38 | #define CEPH_MAX_READDIR_DEFAULT 1024 |
| 53 | (client)->mount_args->flags |= CEPH_OPT_##opt; | 39 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) |
| 54 | #define ceph_test_opt(client, opt) \ | 40 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" |
| 55 | (!!((client)->mount_args->flags & CEPH_OPT_##opt)) | ||
| 56 | 41 | ||
| 57 | 42 | struct ceph_mount_options { | |
| 58 | struct ceph_mount_args { | ||
| 59 | int sb_flags; | ||
| 60 | int flags; | 43 | int flags; |
| 61 | struct ceph_fsid fsid; | 44 | int sb_flags; |
| 62 | struct ceph_entity_addr my_addr; | 45 | |
| 63 | int num_mon; | ||
| 64 | struct ceph_entity_addr *mon_addr; | ||
| 65 | int mount_timeout; | ||
| 66 | int osd_idle_ttl; | ||
| 67 | int osd_timeout; | ||
| 68 | int osd_keepalive_timeout; | ||
| 69 | int wsize; | 46 | int wsize; |
| 70 | int rsize; /* max readahead */ | 47 | int rsize; /* max readahead */ |
| 71 | int congestion_kb; /* max writeback in flight */ | 48 | int congestion_kb; /* max writeback in flight */ |
| @@ -73,82 +50,25 @@ struct ceph_mount_args { | |||
| 73 | int cap_release_safety; | 50 | int cap_release_safety; |
| 74 | int max_readdir; /* max readdir result (entires) */ | 51 | int max_readdir; /* max readdir result (entires) */ |
| 75 | int max_readdir_bytes; /* max readdir result (bytes) */ | 52 | int max_readdir_bytes; /* max readdir result (bytes) */ |
| 76 | char *snapdir_name; /* default ".snap" */ | ||
| 77 | char *name; | ||
| 78 | char *secret; | ||
| 79 | }; | ||
| 80 | 53 | ||
| 81 | /* | 54 | /* |
| 82 | * defaults | 55 | * everything above this point can be memcmp'd; everything below |
| 83 | */ | 56 | * is handled in compare_mount_options() |
| 84 | #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 | 57 | */ |
| 85 | #define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ | ||
| 86 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | ||
| 87 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | ||
| 88 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | ||
| 89 | #define CEPH_MAX_READDIR_DEFAULT 1024 | ||
| 90 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) | ||
| 91 | |||
| 92 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | ||
| 93 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | ||
| 94 | |||
| 95 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" | ||
| 96 | #define CEPH_AUTH_NAME_DEFAULT "guest" | ||
| 97 | /* | ||
| 98 | * Delay telling the MDS we no longer want caps, in case we reopen | ||
| 99 | * the file. Delay a minimum amount of time, even if we send a cap | ||
| 100 | * message for some other reason. Otherwise, take the oppotunity to | ||
| 101 | * update the mds to avoid sending another message later. | ||
| 102 | */ | ||
| 103 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | ||
| 104 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | ||
| 105 | |||
| 106 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
| 107 | |||
| 108 | /* mount state */ | ||
| 109 | enum { | ||
| 110 | CEPH_MOUNT_MOUNTING, | ||
| 111 | CEPH_MOUNT_MOUNTED, | ||
| 112 | CEPH_MOUNT_UNMOUNTING, | ||
| 113 | CEPH_MOUNT_UNMOUNTED, | ||
| 114 | CEPH_MOUNT_SHUTDOWN, | ||
| 115 | }; | ||
| 116 | |||
| 117 | /* | ||
| 118 | * subtract jiffies | ||
| 119 | */ | ||
| 120 | static inline unsigned long time_sub(unsigned long a, unsigned long b) | ||
| 121 | { | ||
| 122 | BUG_ON(time_after(b, a)); | ||
| 123 | return (long)a - (long)b; | ||
| 124 | } | ||
| 125 | |||
| 126 | /* | ||
| 127 | * per-filesystem client state | ||
| 128 | * | ||
| 129 | * possibly shared by multiple mount points, if they are | ||
| 130 | * mounting the same ceph filesystem/cluster. | ||
| 131 | */ | ||
| 132 | struct ceph_client { | ||
| 133 | struct ceph_fsid fsid; | ||
| 134 | bool have_fsid; | ||
| 135 | 58 | ||
| 136 | struct mutex mount_mutex; /* serialize mount attempts */ | 59 | char *snapdir_name; /* default ".snap" */ |
| 137 | struct ceph_mount_args *mount_args; | 60 | }; |
| 138 | 61 | ||
| 62 | struct ceph_fs_client { | ||
| 139 | struct super_block *sb; | 63 | struct super_block *sb; |
| 140 | 64 | ||
| 141 | unsigned long mount_state; | 65 | struct ceph_mount_options *mount_options; |
| 142 | wait_queue_head_t auth_wq; | 66 | struct ceph_client *client; |
| 143 | |||
| 144 | int auth_err; | ||
| 145 | 67 | ||
| 68 | unsigned long mount_state; | ||
| 146 | int min_caps; /* min caps i added */ | 69 | int min_caps; /* min caps i added */ |
| 147 | 70 | ||
| 148 | struct ceph_messenger *msgr; /* messenger instance */ | 71 | struct ceph_mds_client *mdsc; |
| 149 | struct ceph_mon_client monc; | ||
| 150 | struct ceph_mds_client mdsc; | ||
| 151 | struct ceph_osd_client osdc; | ||
| 152 | 72 | ||
| 153 | /* writeback */ | 73 | /* writeback */ |
| 154 | mempool_t *wb_pagevec_pool; | 74 | mempool_t *wb_pagevec_pool; |
| @@ -160,14 +80,14 @@ struct ceph_client { | |||
| 160 | struct backing_dev_info backing_dev_info; | 80 | struct backing_dev_info backing_dev_info; |
| 161 | 81 | ||
| 162 | #ifdef CONFIG_DEBUG_FS | 82 | #ifdef CONFIG_DEBUG_FS |
| 163 | struct dentry *debugfs_monmap; | 83 | struct dentry *debugfs_dentry_lru, *debugfs_caps; |
| 164 | struct dentry *debugfs_mdsmap, *debugfs_osdmap; | ||
| 165 | struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps; | ||
| 166 | struct dentry *debugfs_congestion_kb; | 84 | struct dentry *debugfs_congestion_kb; |
| 167 | struct dentry *debugfs_bdi; | 85 | struct dentry *debugfs_bdi; |
| 86 | struct dentry *debugfs_mdsc, *debugfs_mdsmap; | ||
| 168 | #endif | 87 | #endif |
| 169 | }; | 88 | }; |
| 170 | 89 | ||
| 90 | |||
| 171 | /* | 91 | /* |
| 172 | * File i/o capability. This tracks shared state with the metadata | 92 | * File i/o capability. This tracks shared state with the metadata |
| 173 | * server that allows us to cache or writeback attributes or to read | 93 | * server that allows us to cache or writeback attributes or to read |
| @@ -275,6 +195,20 @@ struct ceph_inode_xattr { | |||
| 275 | int should_free_val; | 195 | int should_free_val; |
| 276 | }; | 196 | }; |
| 277 | 197 | ||
| 198 | /* | ||
| 199 | * Ceph dentry state | ||
| 200 | */ | ||
| 201 | struct ceph_dentry_info { | ||
| 202 | struct ceph_mds_session *lease_session; | ||
| 203 | u32 lease_gen, lease_shared_gen; | ||
| 204 | u32 lease_seq; | ||
| 205 | unsigned long lease_renew_after, lease_renew_from; | ||
| 206 | struct list_head lru; | ||
| 207 | struct dentry *dentry; | ||
| 208 | u64 time; | ||
| 209 | u64 offset; | ||
| 210 | }; | ||
| 211 | |||
| 278 | struct ceph_inode_xattrs_info { | 212 | struct ceph_inode_xattrs_info { |
| 279 | /* | 213 | /* |
| 280 | * (still encoded) xattr blob. we avoid the overhead of parsing | 214 | * (still encoded) xattr blob. we avoid the overhead of parsing |
| @@ -296,11 +230,6 @@ struct ceph_inode_xattrs_info { | |||
| 296 | /* | 230 | /* |
| 297 | * Ceph inode. | 231 | * Ceph inode. |
| 298 | */ | 232 | */ |
| 299 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ | ||
| 300 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ | ||
| 301 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ | ||
| 302 | #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ | ||
| 303 | |||
| 304 | struct ceph_inode_info { | 233 | struct ceph_inode_info { |
| 305 | struct ceph_vino i_vino; /* ceph ino + snap */ | 234 | struct ceph_vino i_vino; /* ceph ino + snap */ |
| 306 | 235 | ||
| @@ -391,6 +320,63 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode) | |||
| 391 | return container_of(inode, struct ceph_inode_info, vfs_inode); | 320 | return container_of(inode, struct ceph_inode_info, vfs_inode); |
| 392 | } | 321 | } |
| 393 | 322 | ||
| 323 | static inline struct ceph_vino ceph_vino(struct inode *inode) | ||
| 324 | { | ||
| 325 | return ceph_inode(inode)->i_vino; | ||
| 326 | } | ||
| 327 | |||
| 328 | /* | ||
| 329 | * ino_t is <64 bits on many architectures, blech. | ||
| 330 | * | ||
| 331 | * don't include snap in ino hash, at least for now. | ||
| 332 | */ | ||
| 333 | static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) | ||
| 334 | { | ||
| 335 | ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ | ||
| 336 | #if BITS_PER_LONG == 32 | ||
| 337 | ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; | ||
| 338 | if (!ino) | ||
| 339 | ino = 1; | ||
| 340 | #endif | ||
| 341 | return ino; | ||
| 342 | } | ||
| 343 | |||
| 344 | /* for printf-style formatting */ | ||
| 345 | #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap | ||
| 346 | |||
| 347 | static inline u64 ceph_ino(struct inode *inode) | ||
| 348 | { | ||
| 349 | return ceph_inode(inode)->i_vino.ino; | ||
| 350 | } | ||
| 351 | static inline u64 ceph_snap(struct inode *inode) | ||
| 352 | { | ||
| 353 | return ceph_inode(inode)->i_vino.snap; | ||
| 354 | } | ||
| 355 | |||
| 356 | static inline int ceph_ino_compare(struct inode *inode, void *data) | ||
| 357 | { | ||
| 358 | struct ceph_vino *pvino = (struct ceph_vino *)data; | ||
| 359 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 360 | return ci->i_vino.ino == pvino->ino && | ||
| 361 | ci->i_vino.snap == pvino->snap; | ||
| 362 | } | ||
| 363 | |||
| 364 | static inline struct inode *ceph_find_inode(struct super_block *sb, | ||
| 365 | struct ceph_vino vino) | ||
| 366 | { | ||
| 367 | ino_t t = ceph_vino_to_ino(vino); | ||
| 368 | return ilookup5(sb, t, ceph_ino_compare, &vino); | ||
| 369 | } | ||
| 370 | |||
| 371 | |||
| 372 | /* | ||
| 373 | * Ceph inode. | ||
| 374 | */ | ||
| 375 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ | ||
| 376 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ | ||
| 377 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ | ||
| 378 | #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ | ||
| 379 | |||
| 394 | static inline void ceph_i_clear(struct inode *inode, unsigned mask) | 380 | static inline void ceph_i_clear(struct inode *inode, unsigned mask) |
| 395 | { | 381 | { |
| 396 | struct ceph_inode_info *ci = ceph_inode(inode); | 382 | struct ceph_inode_info *ci = ceph_inode(inode); |
| @@ -414,8 +400,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask) | |||
| 414 | struct ceph_inode_info *ci = ceph_inode(inode); | 400 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 415 | bool r; | 401 | bool r; |
| 416 | 402 | ||
| 417 | smp_mb(); | 403 | spin_lock(&inode->i_lock); |
| 418 | r = (ci->i_ceph_flags & mask) == mask; | 404 | r = (ci->i_ceph_flags & mask) == mask; |
| 405 | spin_unlock(&inode->i_lock); | ||
| 419 | return r; | 406 | return r; |
| 420 | } | 407 | } |
| 421 | 408 | ||
| @@ -432,20 +419,6 @@ extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | |||
| 432 | struct ceph_inode_frag *pfrag, | 419 | struct ceph_inode_frag *pfrag, |
| 433 | int *found); | 420 | int *found); |
| 434 | 421 | ||
| 435 | /* | ||
| 436 | * Ceph dentry state | ||
| 437 | */ | ||
| 438 | struct ceph_dentry_info { | ||
| 439 | struct ceph_mds_session *lease_session; | ||
| 440 | u32 lease_gen, lease_shared_gen; | ||
| 441 | u32 lease_seq; | ||
| 442 | unsigned long lease_renew_after, lease_renew_from; | ||
| 443 | struct list_head lru; | ||
| 444 | struct dentry *dentry; | ||
| 445 | u64 time; | ||
| 446 | u64 offset; | ||
| 447 | }; | ||
| 448 | |||
| 449 | static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) | 422 | static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) |
| 450 | { | 423 | { |
| 451 | return (struct ceph_dentry_info *)dentry->d_fsdata; | 424 | return (struct ceph_dentry_info *)dentry->d_fsdata; |
| @@ -456,22 +429,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off) | |||
| 456 | return ((loff_t)frag << 32) | (loff_t)off; | 429 | return ((loff_t)frag << 32) | (loff_t)off; |
| 457 | } | 430 | } |
| 458 | 431 | ||
| 459 | /* | ||
| 460 | * ino_t is <64 bits on many architectures, blech. | ||
| 461 | * | ||
| 462 | * don't include snap in ino hash, at least for now. | ||
| 463 | */ | ||
| 464 | static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) | ||
| 465 | { | ||
| 466 | ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ | ||
| 467 | #if BITS_PER_LONG == 32 | ||
| 468 | ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; | ||
| 469 | if (!ino) | ||
| 470 | ino = 1; | ||
| 471 | #endif | ||
| 472 | return ino; | ||
| 473 | } | ||
| 474 | |||
| 475 | static inline int ceph_set_ino_cb(struct inode *inode, void *data) | 432 | static inline int ceph_set_ino_cb(struct inode *inode, void *data) |
| 476 | { | 433 | { |
| 477 | ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; | 434 | ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; |
| @@ -479,39 +436,6 @@ static inline int ceph_set_ino_cb(struct inode *inode, void *data) | |||
| 479 | return 0; | 436 | return 0; |
| 480 | } | 437 | } |
| 481 | 438 | ||
| 482 | static inline struct ceph_vino ceph_vino(struct inode *inode) | ||
| 483 | { | ||
| 484 | return ceph_inode(inode)->i_vino; | ||
| 485 | } | ||
| 486 | |||
| 487 | /* for printf-style formatting */ | ||
| 488 | #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap | ||
| 489 | |||
| 490 | static inline u64 ceph_ino(struct inode *inode) | ||
| 491 | { | ||
| 492 | return ceph_inode(inode)->i_vino.ino; | ||
| 493 | } | ||
| 494 | static inline u64 ceph_snap(struct inode *inode) | ||
| 495 | { | ||
| 496 | return ceph_inode(inode)->i_vino.snap; | ||
| 497 | } | ||
| 498 | |||
| 499 | static inline int ceph_ino_compare(struct inode *inode, void *data) | ||
| 500 | { | ||
| 501 | struct ceph_vino *pvino = (struct ceph_vino *)data; | ||
| 502 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 503 | return ci->i_vino.ino == pvino->ino && | ||
| 504 | ci->i_vino.snap == pvino->snap; | ||
| 505 | } | ||
| 506 | |||
| 507 | static inline struct inode *ceph_find_inode(struct super_block *sb, | ||
| 508 | struct ceph_vino vino) | ||
| 509 | { | ||
| 510 | ino_t t = ceph_vino_to_ino(vino); | ||
| 511 | return ilookup5(sb, t, ceph_ino_compare, &vino); | ||
| 512 | } | ||
| 513 | |||
| 514 | |||
| 515 | /* | 439 | /* |
| 516 | * caps helpers | 440 | * caps helpers |
| 517 | */ | 441 | */ |
| @@ -576,18 +500,18 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||
| 576 | struct ceph_cap_reservation *ctx, int need); | 500 | struct ceph_cap_reservation *ctx, int need); |
| 577 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | 501 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
| 578 | struct ceph_cap_reservation *ctx); | 502 | struct ceph_cap_reservation *ctx); |
| 579 | extern void ceph_reservation_status(struct ceph_client *client, | 503 | extern void ceph_reservation_status(struct ceph_fs_client *client, |
| 580 | int *total, int *avail, int *used, | 504 | int *total, int *avail, int *used, |
| 581 | int *reserved, int *min); | 505 | int *reserved, int *min); |
| 582 | 506 | ||
| 583 | static inline struct ceph_client *ceph_inode_to_client(struct inode *inode) | 507 | static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode) |
| 584 | { | 508 | { |
| 585 | return (struct ceph_client *)inode->i_sb->s_fs_info; | 509 | return (struct ceph_fs_client *)inode->i_sb->s_fs_info; |
| 586 | } | 510 | } |
| 587 | 511 | ||
| 588 | static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb) | 512 | static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb) |
| 589 | { | 513 | { |
| 590 | return (struct ceph_client *)sb->s_fs_info; | 514 | return (struct ceph_fs_client *)sb->s_fs_info; |
| 591 | } | 515 | } |
| 592 | 516 | ||
| 593 | 517 | ||
| @@ -617,51 +541,6 @@ struct ceph_file_info { | |||
| 617 | 541 | ||
| 618 | 542 | ||
| 619 | /* | 543 | /* |
| 620 | * snapshots | ||
| 621 | */ | ||
| 622 | |||
| 623 | /* | ||
| 624 | * A "snap context" is the set of existing snapshots when we | ||
| 625 | * write data. It is used by the OSD to guide its COW behavior. | ||
| 626 | * | ||
| 627 | * The ceph_snap_context is refcounted, and attached to each dirty | ||
| 628 | * page, indicating which context the dirty data belonged when it was | ||
| 629 | * dirtied. | ||
| 630 | */ | ||
| 631 | struct ceph_snap_context { | ||
| 632 | atomic_t nref; | ||
| 633 | u64 seq; | ||
| 634 | int num_snaps; | ||
| 635 | u64 snaps[]; | ||
| 636 | }; | ||
| 637 | |||
| 638 | static inline struct ceph_snap_context * | ||
| 639 | ceph_get_snap_context(struct ceph_snap_context *sc) | ||
| 640 | { | ||
| 641 | /* | ||
| 642 | printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
| 643 | atomic_read(&sc->nref)+1); | ||
| 644 | */ | ||
| 645 | if (sc) | ||
| 646 | atomic_inc(&sc->nref); | ||
| 647 | return sc; | ||
| 648 | } | ||
| 649 | |||
| 650 | static inline void ceph_put_snap_context(struct ceph_snap_context *sc) | ||
| 651 | { | ||
| 652 | if (!sc) | ||
| 653 | return; | ||
| 654 | /* | ||
| 655 | printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
| 656 | atomic_read(&sc->nref)-1); | ||
| 657 | */ | ||
| 658 | if (atomic_dec_and_test(&sc->nref)) { | ||
| 659 | /*printk(" deleting snap_context %p\n", sc);*/ | ||
| 660 | kfree(sc); | ||
| 661 | } | ||
| 662 | } | ||
| 663 | |||
| 664 | /* | ||
| 665 | * A "snap realm" describes a subset of the file hierarchy sharing | 544 | * A "snap realm" describes a subset of the file hierarchy sharing |
| 666 | * the same set of snapshots that apply to it. The realms themselves | 545 | * the same set of snapshots that apply to it. The realms themselves |
| 667 | * are organized into a hierarchy, such that children inherit (some of) | 546 | * are organized into a hierarchy, such that children inherit (some of) |
| @@ -699,16 +578,33 @@ struct ceph_snap_realm { | |||
| 699 | spinlock_t inodes_with_caps_lock; | 578 | spinlock_t inodes_with_caps_lock; |
| 700 | }; | 579 | }; |
| 701 | 580 | ||
| 702 | 581 | static inline int default_congestion_kb(void) | |
| 703 | |||
| 704 | /* | ||
| 705 | * calculate the number of pages a given length and offset map onto, | ||
| 706 | * if we align the data. | ||
| 707 | */ | ||
| 708 | static inline int calc_pages_for(u64 off, u64 len) | ||
| 709 | { | 582 | { |
| 710 | return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - | 583 | int congestion_kb; |
| 711 | (off >> PAGE_CACHE_SHIFT); | 584 | |
| 585 | /* | ||
| 586 | * Copied from NFS | ||
| 587 | * | ||
| 588 | * congestion size, scale with available memory. | ||
| 589 | * | ||
| 590 | * 64MB: 8192k | ||
| 591 | * 128MB: 11585k | ||
| 592 | * 256MB: 16384k | ||
| 593 | * 512MB: 23170k | ||
| 594 | * 1GB: 32768k | ||
| 595 | * 2GB: 46340k | ||
| 596 | * 4GB: 65536k | ||
| 597 | * 8GB: 92681k | ||
| 598 | * 16GB: 131072k | ||
| 599 | * | ||
| 600 | * This allows larger machines to have larger/more transfers. | ||
| 601 | * Limit the default to 256M | ||
| 602 | */ | ||
| 603 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
| 604 | if (congestion_kb > 256*1024) | ||
| 605 | congestion_kb = 256*1024; | ||
| 606 | |||
| 607 | return congestion_kb; | ||
| 712 | } | 608 | } |
| 713 | 609 | ||
| 714 | 610 | ||
| @@ -741,16 +637,6 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) | |||
| 741 | ci_item)->writing; | 637 | ci_item)->writing; |
| 742 | } | 638 | } |
| 743 | 639 | ||
| 744 | |||
| 745 | /* super.c */ | ||
| 746 | extern struct kmem_cache *ceph_inode_cachep; | ||
| 747 | extern struct kmem_cache *ceph_cap_cachep; | ||
| 748 | extern struct kmem_cache *ceph_dentry_cachep; | ||
| 749 | extern struct kmem_cache *ceph_file_cachep; | ||
| 750 | |||
| 751 | extern const char *ceph_msg_type_name(int type); | ||
| 752 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | ||
| 753 | |||
| 754 | /* inode.c */ | 640 | /* inode.c */ |
| 755 | extern const struct inode_operations ceph_file_iops; | 641 | extern const struct inode_operations ceph_file_iops; |
| 756 | 642 | ||
| @@ -857,12 +743,18 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); | |||
| 857 | /* file.c */ | 743 | /* file.c */ |
| 858 | extern const struct file_operations ceph_file_fops; | 744 | extern const struct file_operations ceph_file_fops; |
| 859 | extern const struct address_space_operations ceph_aops; | 745 | extern const struct address_space_operations ceph_aops; |
| 746 | extern int ceph_copy_to_page_vector(struct page **pages, | ||
| 747 | const char *data, | ||
| 748 | loff_t off, size_t len); | ||
| 749 | extern int ceph_copy_from_page_vector(struct page **pages, | ||
| 750 | char *data, | ||
| 751 | loff_t off, size_t len); | ||
| 752 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | ||
| 860 | extern int ceph_open(struct inode *inode, struct file *file); | 753 | extern int ceph_open(struct inode *inode, struct file *file); |
| 861 | extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | 754 | extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, |
| 862 | struct nameidata *nd, int mode, | 755 | struct nameidata *nd, int mode, |
| 863 | int locked_dir); | 756 | int locked_dir); |
| 864 | extern int ceph_release(struct inode *inode, struct file *filp); | 757 | extern int ceph_release(struct inode *inode, struct file *filp); |
| 865 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||
| 866 | 758 | ||
| 867 | /* dir.c */ | 759 | /* dir.c */ |
| 868 | extern const struct file_operations ceph_dir_fops; | 760 | extern const struct file_operations ceph_dir_fops; |
| @@ -892,12 +784,6 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | |||
| 892 | /* export.c */ | 784 | /* export.c */ |
| 893 | extern const struct export_operations ceph_export_ops; | 785 | extern const struct export_operations ceph_export_ops; |
| 894 | 786 | ||
| 895 | /* debugfs.c */ | ||
| 896 | extern int ceph_debugfs_init(void); | ||
| 897 | extern void ceph_debugfs_cleanup(void); | ||
| 898 | extern int ceph_debugfs_client_init(struct ceph_client *client); | ||
| 899 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | ||
| 900 | |||
| 901 | /* locks.c */ | 787 | /* locks.c */ |
| 902 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | 788 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); |
| 903 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | 789 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); |
| @@ -914,4 +800,8 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) | |||
| 914 | return NULL; | 800 | return NULL; |
| 915 | } | 801 | } |
| 916 | 802 | ||
| 803 | /* debugfs.c */ | ||
| 804 | extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); | ||
| 805 | extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); | ||
| 806 | |||
| 917 | #endif /* _FS_CEPH_SUPER_H */ | 807 | #endif /* _FS_CEPH_SUPER_H */ |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 9578af610b73..6e12a6ba5f79 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
| @@ -1,6 +1,9 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | |||
| 2 | #include "super.h" | 3 | #include "super.h" |
| 3 | #include "decode.h" | 4 | #include "mds_client.h" |
| 5 | |||
| 6 | #include <linux/ceph/decode.h> | ||
| 4 | 7 | ||
| 5 | #include <linux/xattr.h> | 8 | #include <linux/xattr.h> |
| 6 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
| @@ -620,12 +623,12 @@ out: | |||
| 620 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | 623 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, |
| 621 | const char *value, size_t size, int flags) | 624 | const char *value, size_t size, int flags) |
| 622 | { | 625 | { |
| 623 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 626 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
| 624 | struct inode *inode = dentry->d_inode; | 627 | struct inode *inode = dentry->d_inode; |
| 625 | struct ceph_inode_info *ci = ceph_inode(inode); | 628 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 626 | struct inode *parent_inode = dentry->d_parent->d_inode; | 629 | struct inode *parent_inode = dentry->d_parent->d_inode; |
| 627 | struct ceph_mds_request *req; | 630 | struct ceph_mds_request *req; |
| 628 | struct ceph_mds_client *mdsc = &client->mdsc; | 631 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 629 | int err; | 632 | int err; |
| 630 | int i, nr_pages; | 633 | int i, nr_pages; |
| 631 | struct page **pages = NULL; | 634 | struct page **pages = NULL; |
| @@ -713,10 +716,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name, | |||
| 713 | 716 | ||
| 714 | /* preallocate memory for xattr name, value, index node */ | 717 | /* preallocate memory for xattr name, value, index node */ |
| 715 | err = -ENOMEM; | 718 | err = -ENOMEM; |
| 716 | newname = kmalloc(name_len + 1, GFP_NOFS); | 719 | newname = kmemdup(name, name_len + 1, GFP_NOFS); |
| 717 | if (!newname) | 720 | if (!newname) |
| 718 | goto out; | 721 | goto out; |
| 719 | memcpy(newname, name, name_len + 1); | ||
| 720 | 722 | ||
| 721 | if (val_len) { | 723 | if (val_len) { |
| 722 | newval = kmalloc(val_len + 1, GFP_NOFS); | 724 | newval = kmalloc(val_len + 1, GFP_NOFS); |
| @@ -777,8 +779,8 @@ out: | |||
| 777 | 779 | ||
| 778 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) | 780 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) |
| 779 | { | 781 | { |
| 780 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 782 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
| 781 | struct ceph_mds_client *mdsc = &client->mdsc; | 783 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 782 | struct inode *inode = dentry->d_inode; | 784 | struct inode *inode = dentry->d_inode; |
| 783 | struct inode *parent_inode = dentry->d_parent->d_inode; | 785 | struct inode *parent_inode = dentry->d_parent->d_inode; |
| 784 | struct ceph_mds_request *req; | 786 | struct ceph_mds_request *req; |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index cc9665522148..c465ae066c62 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | config GFS2_FS | 1 | config GFS2_FS |
| 2 | tristate "GFS2 file system support" | 2 | tristate "GFS2 file system support" |
| 3 | depends on EXPERIMENTAL && (64BIT || LBDAF) | 3 | depends on (64BIT || LBDAF) |
| 4 | select DLM if GFS2_FS_LOCKING_DLM | 4 | select DLM if GFS2_FS_LOCKING_DLM |
| 5 | select CONFIGFS_FS if GFS2_FS_LOCKING_DLM | 5 | select CONFIGFS_FS if GFS2_FS_LOCKING_DLM |
| 6 | select SYSFS if GFS2_FS_LOCKING_DLM | 6 | select SYSFS if GFS2_FS_LOCKING_DLM |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 194fe16d8418..6b24afb96aae 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
| @@ -36,8 +36,8 @@ | |||
| 36 | #include "glops.h" | 36 | #include "glops.h" |
| 37 | 37 | ||
| 38 | 38 | ||
| 39 | static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | 39 | void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, |
| 40 | unsigned int from, unsigned int to) | 40 | unsigned int from, unsigned int to) |
| 41 | { | 41 | { |
| 42 | struct buffer_head *head = page_buffers(page); | 42 | struct buffer_head *head = page_buffers(page); |
| 43 | unsigned int bsize = head->b_size; | 43 | unsigned int bsize = head->b_size; |
| @@ -615,7 +615,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 615 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | 615 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; |
| 616 | int alloc_required; | 616 | int alloc_required; |
| 617 | int error = 0; | 617 | int error = 0; |
| 618 | struct gfs2_alloc *al; | 618 | struct gfs2_alloc *al = NULL; |
| 619 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 619 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
| 620 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | 620 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
| 621 | unsigned to = from + len; | 621 | unsigned to = from + len; |
| @@ -663,6 +663,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 663 | rblocks += RES_STATFS + RES_QUOTA; | 663 | rblocks += RES_STATFS + RES_QUOTA; |
| 664 | if (&ip->i_inode == sdp->sd_rindex) | 664 | if (&ip->i_inode == sdp->sd_rindex) |
| 665 | rblocks += 2 * RES_STATFS; | 665 | rblocks += 2 * RES_STATFS; |
| 666 | if (alloc_required) | ||
| 667 | rblocks += gfs2_rg_blocks(al); | ||
| 666 | 668 | ||
| 667 | error = gfs2_trans_begin(sdp, rblocks, | 669 | error = gfs2_trans_begin(sdp, rblocks, |
| 668 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | 670 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); |
| @@ -696,13 +698,11 @@ out: | |||
| 696 | 698 | ||
| 697 | page_cache_release(page); | 699 | page_cache_release(page); |
| 698 | 700 | ||
| 699 | /* | 701 | gfs2_trans_end(sdp); |
| 700 | * XXX(truncate): the call below should probably be replaced with | ||
| 701 | * a call to the gfs2-specific truncate blocks helper to actually | ||
| 702 | * release disk blocks.. | ||
| 703 | */ | ||
| 704 | if (pos + len > ip->i_inode.i_size) | 702 | if (pos + len > ip->i_inode.i_size) |
| 705 | truncate_setsize(&ip->i_inode, ip->i_inode.i_size); | 703 | gfs2_trim_blocks(&ip->i_inode); |
| 704 | goto out_trans_fail; | ||
| 705 | |||
| 706 | out_endtrans: | 706 | out_endtrans: |
| 707 | gfs2_trans_end(sdp); | 707 | gfs2_trans_end(sdp); |
| 708 | out_trans_fail: | 708 | out_trans_fail: |
| @@ -802,10 +802,8 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, | |||
| 802 | page_cache_release(page); | 802 | page_cache_release(page); |
| 803 | 803 | ||
| 804 | if (copied) { | 804 | if (copied) { |
| 805 | if (inode->i_size < to) { | 805 | if (inode->i_size < to) |
| 806 | i_size_write(inode, to); | 806 | i_size_write(inode, to); |
| 807 | ip->i_disksize = inode->i_size; | ||
| 808 | } | ||
| 809 | gfs2_dinode_out(ip, di); | 807 | gfs2_dinode_out(ip, di); |
| 810 | mark_inode_dirty(inode); | 808 | mark_inode_dirty(inode); |
| 811 | } | 809 | } |
| @@ -876,8 +874,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
| 876 | 874 | ||
| 877 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 875 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
| 878 | if (ret > 0) { | 876 | if (ret > 0) { |
| 879 | if (inode->i_size > ip->i_disksize) | ||
| 880 | ip->i_disksize = inode->i_size; | ||
| 881 | gfs2_dinode_out(ip, dibh->b_data); | 877 | gfs2_dinode_out(ip, dibh->b_data); |
| 882 | mark_inode_dirty(inode); | 878 | mark_inode_dirty(inode); |
| 883 | } | 879 | } |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 6f482809d1a3..5476c066d4ee 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
| @@ -50,7 +50,7 @@ struct strip_mine { | |||
| 50 | * @ip: the inode | 50 | * @ip: the inode |
| 51 | * @dibh: the dinode buffer | 51 | * @dibh: the dinode buffer |
| 52 | * @block: the block number that was allocated | 52 | * @block: the block number that was allocated |
| 53 | * @private: any locked page held by the caller process | 53 | * @page: The (optional) page. This is looked up if @page is NULL |
| 54 | * | 54 | * |
| 55 | * Returns: errno | 55 | * Returns: errno |
| 56 | */ | 56 | */ |
| @@ -109,8 +109,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
| 109 | /** | 109 | /** |
| 110 | * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big | 110 | * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big |
| 111 | * @ip: The GFS2 inode to unstuff | 111 | * @ip: The GFS2 inode to unstuff |
| 112 | * @unstuffer: the routine that handles unstuffing a non-zero length file | 112 | * @page: The (optional) page. This is looked up if the @page is NULL |
| 113 | * @private: private data for the unstuffer | ||
| 114 | * | 113 | * |
| 115 | * This routine unstuffs a dinode and returns it to a "normal" state such | 114 | * This routine unstuffs a dinode and returns it to a "normal" state such |
| 116 | * that the height can be grown in the traditional way. | 115 | * that the height can be grown in the traditional way. |
| @@ -132,7 +131,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
| 132 | if (error) | 131 | if (error) |
| 133 | goto out; | 132 | goto out; |
| 134 | 133 | ||
| 135 | if (ip->i_disksize) { | 134 | if (i_size_read(&ip->i_inode)) { |
| 136 | /* Get a free block, fill it with the stuffed data, | 135 | /* Get a free block, fill it with the stuffed data, |
| 137 | and write it out to disk */ | 136 | and write it out to disk */ |
| 138 | 137 | ||
| @@ -161,7 +160,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
| 161 | di = (struct gfs2_dinode *)dibh->b_data; | 160 | di = (struct gfs2_dinode *)dibh->b_data; |
| 162 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 161 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
| 163 | 162 | ||
| 164 | if (ip->i_disksize) { | 163 | if (i_size_read(&ip->i_inode)) { |
| 165 | *(__be64 *)(di + 1) = cpu_to_be64(block); | 164 | *(__be64 *)(di + 1) = cpu_to_be64(block); |
| 166 | gfs2_add_inode_blocks(&ip->i_inode, 1); | 165 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
| 167 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); | 166 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
| @@ -885,83 +884,14 @@ out: | |||
| 885 | } | 884 | } |
| 886 | 885 | ||
| 887 | /** | 886 | /** |
| 888 | * do_grow - Make a file look bigger than it is | ||
| 889 | * @ip: the inode | ||
| 890 | * @size: the size to set the file to | ||
| 891 | * | ||
| 892 | * Called with an exclusive lock on @ip. | ||
| 893 | * | ||
| 894 | * Returns: errno | ||
| 895 | */ | ||
| 896 | |||
| 897 | static int do_grow(struct gfs2_inode *ip, u64 size) | ||
| 898 | { | ||
| 899 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 900 | struct gfs2_alloc *al; | ||
| 901 | struct buffer_head *dibh; | ||
| 902 | int error; | ||
| 903 | |||
| 904 | al = gfs2_alloc_get(ip); | ||
| 905 | if (!al) | ||
| 906 | return -ENOMEM; | ||
| 907 | |||
| 908 | error = gfs2_quota_lock_check(ip); | ||
| 909 | if (error) | ||
| 910 | goto out; | ||
| 911 | |||
| 912 | al->al_requested = sdp->sd_max_height + RES_DATA; | ||
| 913 | |||
| 914 | error = gfs2_inplace_reserve(ip); | ||
| 915 | if (error) | ||
| 916 | goto out_gunlock_q; | ||
| 917 | |||
| 918 | error = gfs2_trans_begin(sdp, | ||
| 919 | sdp->sd_max_height + al->al_rgd->rd_length + | ||
| 920 | RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0); | ||
| 921 | if (error) | ||
| 922 | goto out_ipres; | ||
| 923 | |||
| 924 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 925 | if (error) | ||
| 926 | goto out_end_trans; | ||
| 927 | |||
| 928 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { | ||
| 929 | if (gfs2_is_stuffed(ip)) { | ||
| 930 | error = gfs2_unstuff_dinode(ip, NULL); | ||
| 931 | if (error) | ||
| 932 | goto out_brelse; | ||
| 933 | } | ||
| 934 | } | ||
| 935 | |||
| 936 | ip->i_disksize = size; | ||
| 937 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
| 938 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 939 | gfs2_dinode_out(ip, dibh->b_data); | ||
| 940 | |||
| 941 | out_brelse: | ||
| 942 | brelse(dibh); | ||
| 943 | out_end_trans: | ||
| 944 | gfs2_trans_end(sdp); | ||
| 945 | out_ipres: | ||
| 946 | gfs2_inplace_release(ip); | ||
| 947 | out_gunlock_q: | ||
| 948 | gfs2_quota_unlock(ip); | ||
| 949 | out: | ||
| 950 | gfs2_alloc_put(ip); | ||
| 951 | return error; | ||
| 952 | } | ||
| 953 | |||
| 954 | |||
| 955 | /** | ||
| 956 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate | 887 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate |
| 957 | * | 888 | * |
| 958 | * This is partly borrowed from ext3. | 889 | * This is partly borrowed from ext3. |
| 959 | */ | 890 | */ |
| 960 | static int gfs2_block_truncate_page(struct address_space *mapping) | 891 | static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) |
| 961 | { | 892 | { |
| 962 | struct inode *inode = mapping->host; | 893 | struct inode *inode = mapping->host; |
| 963 | struct gfs2_inode *ip = GFS2_I(inode); | 894 | struct gfs2_inode *ip = GFS2_I(inode); |
| 964 | loff_t from = inode->i_size; | ||
| 965 | unsigned long index = from >> PAGE_CACHE_SHIFT; | 895 | unsigned long index = from >> PAGE_CACHE_SHIFT; |
| 966 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 896 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
| 967 | unsigned blocksize, iblock, length, pos; | 897 | unsigned blocksize, iblock, length, pos; |
| @@ -1023,9 +953,11 @@ unlock: | |||
| 1023 | return err; | 953 | return err; |
| 1024 | } | 954 | } |
| 1025 | 955 | ||
| 1026 | static int trunc_start(struct gfs2_inode *ip, u64 size) | 956 | static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) |
| 1027 | { | 957 | { |
| 1028 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 958 | struct gfs2_inode *ip = GFS2_I(inode); |
| 959 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 960 | struct address_space *mapping = inode->i_mapping; | ||
| 1029 | struct buffer_head *dibh; | 961 | struct buffer_head *dibh; |
| 1030 | int journaled = gfs2_is_jdata(ip); | 962 | int journaled = gfs2_is_jdata(ip); |
| 1031 | int error; | 963 | int error; |
| @@ -1039,31 +971,26 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) | |||
| 1039 | if (error) | 971 | if (error) |
| 1040 | goto out; | 972 | goto out; |
| 1041 | 973 | ||
| 974 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 975 | |||
| 1042 | if (gfs2_is_stuffed(ip)) { | 976 | if (gfs2_is_stuffed(ip)) { |
| 1043 | u64 dsize = size + sizeof(struct gfs2_dinode); | 977 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); |
| 1044 | ip->i_disksize = size; | ||
| 1045 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
| 1046 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1047 | gfs2_dinode_out(ip, dibh->b_data); | ||
| 1048 | if (dsize > dibh->b_size) | ||
| 1049 | dsize = dibh->b_size; | ||
| 1050 | gfs2_buffer_clear_tail(dibh, dsize); | ||
| 1051 | error = 1; | ||
| 1052 | } else { | 978 | } else { |
| 1053 | if (size & (u64)(sdp->sd_sb.sb_bsize - 1)) | 979 | if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) { |
| 1054 | error = gfs2_block_truncate_page(ip->i_inode.i_mapping); | 980 | error = gfs2_block_truncate_page(mapping, newsize); |
| 1055 | 981 | if (error) | |
| 1056 | if (!error) { | 982 | goto out_brelse; |
| 1057 | ip->i_disksize = size; | ||
| 1058 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
| 1059 | ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; | ||
| 1060 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1061 | gfs2_dinode_out(ip, dibh->b_data); | ||
| 1062 | } | 983 | } |
| 984 | ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; | ||
| 1063 | } | 985 | } |
| 1064 | 986 | ||
| 1065 | brelse(dibh); | 987 | i_size_write(inode, newsize); |
| 988 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
| 989 | gfs2_dinode_out(ip, dibh->b_data); | ||
| 1066 | 990 | ||
| 991 | truncate_pagecache(inode, oldsize, newsize); | ||
| 992 | out_brelse: | ||
| 993 | brelse(dibh); | ||
| 1067 | out: | 994 | out: |
| 1068 | gfs2_trans_end(sdp); | 995 | gfs2_trans_end(sdp); |
| 1069 | return error; | 996 | return error; |
| @@ -1123,7 +1050,7 @@ static int trunc_end(struct gfs2_inode *ip) | |||
| 1123 | if (error) | 1050 | if (error) |
| 1124 | goto out; | 1051 | goto out; |
| 1125 | 1052 | ||
| 1126 | if (!ip->i_disksize) { | 1053 | if (!i_size_read(&ip->i_inode)) { |
| 1127 | ip->i_height = 0; | 1054 | ip->i_height = 0; |
| 1128 | ip->i_goal = ip->i_no_addr; | 1055 | ip->i_goal = ip->i_no_addr; |
| 1129 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 1056 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
| @@ -1143,92 +1070,154 @@ out: | |||
| 1143 | 1070 | ||
| 1144 | /** | 1071 | /** |
| 1145 | * do_shrink - make a file smaller | 1072 | * do_shrink - make a file smaller |
| 1146 | * @ip: the inode | 1073 | * @inode: the inode |
| 1147 | * @size: the size to make the file | 1074 | * @oldsize: the current inode size |
| 1148 | * @truncator: function to truncate the last partial block | 1075 | * @newsize: the size to make the file |
| 1149 | * | 1076 | * |
| 1150 | * Called with an exclusive lock on @ip. | 1077 | * Called with an exclusive lock on @inode. The @size must |
| 1078 | * be equal to or smaller than the current inode size. | ||
| 1151 | * | 1079 | * |
| 1152 | * Returns: errno | 1080 | * Returns: errno |
| 1153 | */ | 1081 | */ |
| 1154 | 1082 | ||
| 1155 | static int do_shrink(struct gfs2_inode *ip, u64 size) | 1083 | static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize) |
| 1156 | { | 1084 | { |
| 1085 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1157 | int error; | 1086 | int error; |
| 1158 | 1087 | ||
| 1159 | error = trunc_start(ip, size); | 1088 | error = trunc_start(inode, oldsize, newsize); |
| 1160 | if (error < 0) | 1089 | if (error < 0) |
| 1161 | return error; | 1090 | return error; |
| 1162 | if (error > 0) | 1091 | if (gfs2_is_stuffed(ip)) |
| 1163 | return 0; | 1092 | return 0; |
| 1164 | 1093 | ||
| 1165 | error = trunc_dealloc(ip, size); | 1094 | error = trunc_dealloc(ip, newsize); |
| 1166 | if (!error) | 1095 | if (error == 0) |
| 1167 | error = trunc_end(ip); | 1096 | error = trunc_end(ip); |
| 1168 | 1097 | ||
| 1169 | return error; | 1098 | return error; |
| 1170 | } | 1099 | } |
| 1171 | 1100 | ||
| 1172 | static int do_touch(struct gfs2_inode *ip, u64 size) | 1101 | void gfs2_trim_blocks(struct inode *inode) |
| 1173 | { | 1102 | { |
| 1174 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1103 | u64 size = inode->i_size; |
| 1104 | int ret; | ||
| 1105 | |||
| 1106 | ret = do_shrink(inode, size, size); | ||
| 1107 | WARN_ON(ret != 0); | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | /** | ||
| 1111 | * do_grow - Touch and update inode size | ||
| 1112 | * @inode: The inode | ||
| 1113 | * @size: The new size | ||
| 1114 | * | ||
| 1115 | * This function updates the timestamps on the inode and | ||
| 1116 | * may also increase the size of the inode. This function | ||
| 1117 | * must not be called with @size any smaller than the current | ||
| 1118 | * inode size. | ||
| 1119 | * | ||
| 1120 | * Although it is not strictly required to unstuff files here, | ||
| 1121 | * earlier versions of GFS2 have a bug in the stuffed file reading | ||
| 1122 | * code which will result in a buffer overrun if the size is larger | ||
| 1123 | * than the max stuffed file size. In order to prevent this from | ||
| 1124 | * occuring, such files are unstuffed, but in other cases we can | ||
| 1125 | * just update the inode size directly. | ||
| 1126 | * | ||
| 1127 | * Returns: 0 on success, or -ve on error | ||
| 1128 | */ | ||
| 1129 | |||
| 1130 | static int do_grow(struct inode *inode, u64 size) | ||
| 1131 | { | ||
| 1132 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1133 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 1175 | struct buffer_head *dibh; | 1134 | struct buffer_head *dibh; |
| 1135 | struct gfs2_alloc *al = NULL; | ||
| 1176 | int error; | 1136 | int error; |
| 1177 | 1137 | ||
| 1178 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | 1138 | if (gfs2_is_stuffed(ip) && |
| 1139 | (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { | ||
| 1140 | al = gfs2_alloc_get(ip); | ||
| 1141 | if (al == NULL) | ||
| 1142 | return -ENOMEM; | ||
| 1143 | |||
| 1144 | error = gfs2_quota_lock_check(ip); | ||
| 1145 | if (error) | ||
| 1146 | goto do_grow_alloc_put; | ||
| 1147 | |||
| 1148 | al->al_requested = 1; | ||
| 1149 | error = gfs2_inplace_reserve(ip); | ||
| 1150 | if (error) | ||
| 1151 | goto do_grow_qunlock; | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); | ||
| 1179 | if (error) | 1155 | if (error) |
| 1180 | return error; | 1156 | goto do_grow_release; |
| 1181 | 1157 | ||
| 1182 | down_write(&ip->i_rw_mutex); | 1158 | if (al) { |
| 1159 | error = gfs2_unstuff_dinode(ip, NULL); | ||
| 1160 | if (error) | ||
| 1161 | goto do_end_trans; | ||
| 1162 | } | ||
| 1183 | 1163 | ||
| 1184 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1164 | error = gfs2_meta_inode_buffer(ip, &dibh); |
| 1185 | if (error) | 1165 | if (error) |
| 1186 | goto do_touch_out; | 1166 | goto do_end_trans; |
| 1187 | 1167 | ||
| 1168 | i_size_write(inode, size); | ||
| 1188 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1169 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
| 1189 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1170 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
| 1190 | gfs2_dinode_out(ip, dibh->b_data); | 1171 | gfs2_dinode_out(ip, dibh->b_data); |
| 1191 | brelse(dibh); | 1172 | brelse(dibh); |
| 1192 | 1173 | ||
| 1193 | do_touch_out: | 1174 | do_end_trans: |
| 1194 | up_write(&ip->i_rw_mutex); | ||
| 1195 | gfs2_trans_end(sdp); | 1175 | gfs2_trans_end(sdp); |
| 1176 | do_grow_release: | ||
| 1177 | if (al) { | ||
| 1178 | gfs2_inplace_release(ip); | ||
| 1179 | do_grow_qunlock: | ||
| 1180 | gfs2_quota_unlock(ip); | ||
| 1181 | do_grow_alloc_put: | ||
| 1182 | gfs2_alloc_put(ip); | ||
| 1183 | } | ||
| 1196 | return error; | 1184 | return error; |
| 1197 | } | 1185 | } |
| 1198 | 1186 | ||
| 1199 | /** | 1187 | /** |
| 1200 | * gfs2_truncatei - make a file a given size | 1188 | * gfs2_setattr_size - make a file a given size |
| 1201 | * @ip: the inode | 1189 | * @inode: the inode |
| 1202 | * @size: the size to make the file | 1190 | * @newsize: the size to make the file |
| 1203 | * @truncator: function to truncate the last partial block | ||
| 1204 | * | 1191 | * |
| 1205 | * The file size can grow, shrink, or stay the same size. | 1192 | * The file size can grow, shrink, or stay the same size. This |
| 1193 | * is called holding i_mutex and an exclusive glock on the inode | ||
| 1194 | * in question. | ||
| 1206 | * | 1195 | * |
| 1207 | * Returns: errno | 1196 | * Returns: errno |
| 1208 | */ | 1197 | */ |
| 1209 | 1198 | ||
| 1210 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size) | 1199 | int gfs2_setattr_size(struct inode *inode, u64 newsize) |
| 1211 | { | 1200 | { |
| 1212 | int error; | 1201 | int ret; |
| 1202 | u64 oldsize; | ||
| 1213 | 1203 | ||
| 1214 | if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode))) | 1204 | BUG_ON(!S_ISREG(inode->i_mode)); |
| 1215 | return -EINVAL; | ||
| 1216 | 1205 | ||
| 1217 | if (size > ip->i_disksize) | 1206 | ret = inode_newsize_ok(inode, newsize); |
| 1218 | error = do_grow(ip, size); | 1207 | if (ret) |
| 1219 | else if (size < ip->i_disksize) | 1208 | return ret; |
| 1220 | error = do_shrink(ip, size); | ||
| 1221 | else | ||
| 1222 | /* update time stamps */ | ||
| 1223 | error = do_touch(ip, size); | ||
| 1224 | 1209 | ||
| 1225 | return error; | 1210 | oldsize = inode->i_size; |
| 1211 | if (newsize >= oldsize) | ||
| 1212 | return do_grow(inode, newsize); | ||
| 1213 | |||
| 1214 | return do_shrink(inode, oldsize, newsize); | ||
| 1226 | } | 1215 | } |
| 1227 | 1216 | ||
| 1228 | int gfs2_truncatei_resume(struct gfs2_inode *ip) | 1217 | int gfs2_truncatei_resume(struct gfs2_inode *ip) |
| 1229 | { | 1218 | { |
| 1230 | int error; | 1219 | int error; |
| 1231 | error = trunc_dealloc(ip, ip->i_disksize); | 1220 | error = trunc_dealloc(ip, i_size_read(&ip->i_inode)); |
| 1232 | if (!error) | 1221 | if (!error) |
| 1233 | error = trunc_end(ip); | 1222 | error = trunc_end(ip); |
| 1234 | return error; | 1223 | return error; |
| @@ -1269,7 +1258,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
| 1269 | 1258 | ||
| 1270 | shift = sdp->sd_sb.sb_bsize_shift; | 1259 | shift = sdp->sd_sb.sb_bsize_shift; |
| 1271 | BUG_ON(gfs2_is_dir(ip)); | 1260 | BUG_ON(gfs2_is_dir(ip)); |
| 1272 | end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift; | 1261 | end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; |
| 1273 | lblock = offset >> shift; | 1262 | lblock = offset >> shift; |
| 1274 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; | 1263 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; |
| 1275 | if (lblock_stop > end_of_file) | 1264 | if (lblock_stop > end_of_file) |
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index a20a5213135a..42fea03e2bd9 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h | |||
| @@ -44,14 +44,16 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip, | |||
| 44 | } | 44 | } |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); | 47 | extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); |
| 48 | int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create); | 48 | extern int gfs2_block_map(struct inode *inode, sector_t lblock, |
| 49 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); | 49 | struct buffer_head *bh, int create); |
| 50 | 50 | extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, | |
| 51 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size); | 51 | u64 *dblock, unsigned *extlen); |
| 52 | int gfs2_truncatei_resume(struct gfs2_inode *ip); | 52 | extern int gfs2_setattr_size(struct inode *inode, u64 size); |
| 53 | int gfs2_file_dealloc(struct gfs2_inode *ip); | 53 | extern void gfs2_trim_blocks(struct inode *inode); |
| 54 | int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | 54 | extern int gfs2_truncatei_resume(struct gfs2_inode *ip); |
| 55 | unsigned int len); | 55 | extern int gfs2_file_dealloc(struct gfs2_inode *ip); |
| 56 | extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | ||
| 57 | unsigned int len); | ||
| 56 | 58 | ||
| 57 | #endif /* __BMAP_DOT_H__ */ | 59 | #endif /* __BMAP_DOT_H__ */ |
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index bb7907bde3d8..6798755b3858 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c | |||
| @@ -49,7 +49,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
| 49 | ip = GFS2_I(inode); | 49 | ip = GFS2_I(inode); |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | if (sdp->sd_args.ar_localcaching) | 52 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
| 53 | goto valid; | 53 | goto valid; |
| 54 | 54 | ||
| 55 | had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); | 55 | had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index b9dd88a78dd4..5c356d09c321 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
| @@ -79,6 +79,9 @@ | |||
| 79 | #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) | 79 | #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) |
| 80 | #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) | 80 | #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) |
| 81 | 81 | ||
| 82 | struct qstr gfs2_qdot __read_mostly; | ||
| 83 | struct qstr gfs2_qdotdot __read_mostly; | ||
| 84 | |||
| 82 | typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, | 85 | typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, |
| 83 | u64 leaf_no, void *data); | 86 | u64 leaf_no, void *data); |
| 84 | typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, | 87 | typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, |
| @@ -127,8 +130,8 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, | |||
| 127 | 130 | ||
| 128 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 131 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
| 129 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); | 132 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); |
| 130 | if (ip->i_disksize < offset + size) | 133 | if (ip->i_inode.i_size < offset + size) |
| 131 | ip->i_disksize = offset + size; | 134 | i_size_write(&ip->i_inode, offset + size); |
| 132 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 135 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
| 133 | gfs2_dinode_out(ip, dibh->b_data); | 136 | gfs2_dinode_out(ip, dibh->b_data); |
| 134 | 137 | ||
| @@ -225,8 +228,8 @@ out: | |||
| 225 | if (error) | 228 | if (error) |
| 226 | return error; | 229 | return error; |
| 227 | 230 | ||
| 228 | if (ip->i_disksize < offset + copied) | 231 | if (ip->i_inode.i_size < offset + copied) |
| 229 | ip->i_disksize = offset + copied; | 232 | i_size_write(&ip->i_inode, offset + copied); |
| 230 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 233 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
| 231 | 234 | ||
| 232 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 235 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
| @@ -275,12 +278,13 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, | |||
| 275 | unsigned int o; | 278 | unsigned int o; |
| 276 | int copied = 0; | 279 | int copied = 0; |
| 277 | int error = 0; | 280 | int error = 0; |
| 281 | u64 disksize = i_size_read(&ip->i_inode); | ||
| 278 | 282 | ||
| 279 | if (offset >= ip->i_disksize) | 283 | if (offset >= disksize) |
| 280 | return 0; | 284 | return 0; |
| 281 | 285 | ||
| 282 | if (offset + size > ip->i_disksize) | 286 | if (offset + size > disksize) |
| 283 | size = ip->i_disksize - offset; | 287 | size = disksize - offset; |
| 284 | 288 | ||
| 285 | if (!size) | 289 | if (!size) |
| 286 | return 0; | 290 | return 0; |
| @@ -727,7 +731,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, | |||
| 727 | unsigned hsize = 1 << ip->i_depth; | 731 | unsigned hsize = 1 << ip->i_depth; |
| 728 | unsigned index; | 732 | unsigned index; |
| 729 | u64 ln; | 733 | u64 ln; |
| 730 | if (hsize * sizeof(u64) != ip->i_disksize) { | 734 | if (hsize * sizeof(u64) != i_size_read(inode)) { |
| 731 | gfs2_consist_inode(ip); | 735 | gfs2_consist_inode(ip); |
| 732 | return ERR_PTR(-EIO); | 736 | return ERR_PTR(-EIO); |
| 733 | } | 737 | } |
| @@ -879,7 +883,7 @@ static int dir_make_exhash(struct inode *inode) | |||
| 879 | for (x = sdp->sd_hash_ptrs; x--; lp++) | 883 | for (x = sdp->sd_hash_ptrs; x--; lp++) |
| 880 | *lp = cpu_to_be64(bn); | 884 | *lp = cpu_to_be64(bn); |
| 881 | 885 | ||
| 882 | dip->i_disksize = sdp->sd_sb.sb_bsize / 2; | 886 | i_size_write(inode, sdp->sd_sb.sb_bsize / 2); |
| 883 | gfs2_add_inode_blocks(&dip->i_inode, 1); | 887 | gfs2_add_inode_blocks(&dip->i_inode, 1); |
| 884 | dip->i_diskflags |= GFS2_DIF_EXHASH; | 888 | dip->i_diskflags |= GFS2_DIF_EXHASH; |
| 885 | 889 | ||
| @@ -1057,11 +1061,12 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
| 1057 | u64 *buf; | 1061 | u64 *buf; |
| 1058 | u64 *from, *to; | 1062 | u64 *from, *to; |
| 1059 | u64 block; | 1063 | u64 block; |
| 1064 | u64 disksize = i_size_read(&dip->i_inode); | ||
| 1060 | int x; | 1065 | int x; |
| 1061 | int error = 0; | 1066 | int error = 0; |
| 1062 | 1067 | ||
| 1063 | hsize = 1 << dip->i_depth; | 1068 | hsize = 1 << dip->i_depth; |
| 1064 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1069 | if (hsize * sizeof(u64) != disksize) { |
| 1065 | gfs2_consist_inode(dip); | 1070 | gfs2_consist_inode(dip); |
| 1066 | return -EIO; | 1071 | return -EIO; |
| 1067 | } | 1072 | } |
| @@ -1072,7 +1077,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
| 1072 | if (!buf) | 1077 | if (!buf) |
| 1073 | return -ENOMEM; | 1078 | return -ENOMEM; |
| 1074 | 1079 | ||
| 1075 | for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) { | 1080 | for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) { |
| 1076 | error = gfs2_dir_read_data(dip, (char *)buf, | 1081 | error = gfs2_dir_read_data(dip, (char *)buf, |
| 1077 | block * sdp->sd_hash_bsize, | 1082 | block * sdp->sd_hash_bsize, |
| 1078 | sdp->sd_hash_bsize, 1); | 1083 | sdp->sd_hash_bsize, 1); |
| @@ -1370,7 +1375,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1370 | unsigned depth = 0; | 1375 | unsigned depth = 0; |
| 1371 | 1376 | ||
| 1372 | hsize = 1 << dip->i_depth; | 1377 | hsize = 1 << dip->i_depth; |
| 1373 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1378 | if (hsize * sizeof(u64) != i_size_read(inode)) { |
| 1374 | gfs2_consist_inode(dip); | 1379 | gfs2_consist_inode(dip); |
| 1375 | return -EIO; | 1380 | return -EIO; |
| 1376 | } | 1381 | } |
| @@ -1784,7 +1789,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) | |||
| 1784 | int error = 0; | 1789 | int error = 0; |
| 1785 | 1790 | ||
| 1786 | hsize = 1 << dip->i_depth; | 1791 | hsize = 1 << dip->i_depth; |
| 1787 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1792 | if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { |
| 1788 | gfs2_consist_inode(dip); | 1793 | gfs2_consist_inode(dip); |
| 1789 | return -EIO; | 1794 | return -EIO; |
| 1790 | } | 1795 | } |
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 4f919440c3be..a98f644bd3df 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
| @@ -17,23 +17,24 @@ struct inode; | |||
| 17 | struct gfs2_inode; | 17 | struct gfs2_inode; |
| 18 | struct gfs2_inum; | 18 | struct gfs2_inum; |
| 19 | 19 | ||
| 20 | struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename); | 20 | extern struct inode *gfs2_dir_search(struct inode *dir, |
| 21 | int gfs2_dir_check(struct inode *dir, const struct qstr *filename, | 21 | const struct qstr *filename); |
| 22 | const struct gfs2_inode *ip); | 22 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, |
| 23 | int gfs2_dir_add(struct inode *inode, const struct qstr *filename, | 23 | const struct gfs2_inode *ip); |
| 24 | const struct gfs2_inode *ip, unsigned int type); | 24 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, |
| 25 | int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); | 25 | const struct gfs2_inode *ip, unsigned int type); |
| 26 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | 26 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); |
| 27 | filldir_t filldir); | 27 | extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, |
| 28 | int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | 28 | filldir_t filldir); |
| 29 | const struct gfs2_inode *nip, unsigned int new_type); | 29 | extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, |
| 30 | const struct gfs2_inode *nip, unsigned int new_type); | ||
| 30 | 31 | ||
| 31 | int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); | 32 | extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); |
| 32 | 33 | ||
| 33 | int gfs2_diradd_alloc_required(struct inode *dir, | 34 | extern int gfs2_diradd_alloc_required(struct inode *dir, |
| 34 | const struct qstr *filename); | 35 | const struct qstr *filename); |
| 35 | int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, | 36 | extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, |
| 36 | struct buffer_head **bhp); | 37 | struct buffer_head **bhp); |
| 37 | 38 | ||
| 38 | static inline u32 gfs2_disk_hash(const char *data, int len) | 39 | static inline u32 gfs2_disk_hash(const char *data, int len) |
| 39 | { | 40 | { |
| @@ -61,4 +62,7 @@ static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct | |||
| 61 | memcpy(dent + 1, name->name, name->len); | 62 | memcpy(dent + 1, name->name, name->len); |
| 62 | } | 63 | } |
| 63 | 64 | ||
| 65 | extern struct qstr gfs2_qdot; | ||
| 66 | extern struct qstr gfs2_qdotdot; | ||
| 67 | |||
| 64 | #endif /* __DIR_DOT_H__ */ | 68 | #endif /* __DIR_DOT_H__ */ |
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index dfe237a3f8ad..06d582732d34 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c | |||
| @@ -126,16 +126,9 @@ static int gfs2_get_name(struct dentry *parent, char *name, | |||
| 126 | 126 | ||
| 127 | static struct dentry *gfs2_get_parent(struct dentry *child) | 127 | static struct dentry *gfs2_get_parent(struct dentry *child) |
| 128 | { | 128 | { |
| 129 | struct qstr dotdot; | ||
| 130 | struct dentry *dentry; | 129 | struct dentry *dentry; |
| 131 | 130 | ||
| 132 | /* | 131 | dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); |
| 133 | * XXX(hch): it would be a good idea to keep this around as a | ||
| 134 | * static variable. | ||
| 135 | */ | ||
| 136 | gfs2_str2qstr(&dotdot, ".."); | ||
| 137 | |||
| 138 | dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1)); | ||
| 139 | if (!IS_ERR(dentry)) | 132 | if (!IS_ERR(dentry)) |
| 140 | dentry->d_op = &gfs2_dops; | 133 | dentry->d_op = &gfs2_dops; |
| 141 | return dentry; | 134 | return dentry; |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 4edd662c8232..237ee6a940df 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
| @@ -382,8 +382,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 382 | rblocks = RES_DINODE + ind_blocks; | 382 | rblocks = RES_DINODE + ind_blocks; |
| 383 | if (gfs2_is_jdata(ip)) | 383 | if (gfs2_is_jdata(ip)) |
| 384 | rblocks += data_blocks ? data_blocks : 1; | 384 | rblocks += data_blocks ? data_blocks : 1; |
| 385 | if (ind_blocks || data_blocks) | 385 | if (ind_blocks || data_blocks) { |
| 386 | rblocks += RES_STATFS + RES_QUOTA; | 386 | rblocks += RES_STATFS + RES_QUOTA; |
| 387 | rblocks += gfs2_rg_blocks(al); | ||
| 388 | } | ||
| 387 | ret = gfs2_trans_begin(sdp, rblocks, 0); | 389 | ret = gfs2_trans_begin(sdp, rblocks, 0); |
| 388 | if (ret) | 390 | if (ret) |
| 389 | goto out_trans_fail; | 391 | goto out_trans_fail; |
| @@ -491,7 +493,7 @@ static int gfs2_open(struct inode *inode, struct file *file) | |||
| 491 | goto fail; | 493 | goto fail; |
| 492 | 494 | ||
| 493 | if (!(file->f_flags & O_LARGEFILE) && | 495 | if (!(file->f_flags & O_LARGEFILE) && |
| 494 | ip->i_disksize > MAX_NON_LFS) { | 496 | i_size_read(inode) > MAX_NON_LFS) { |
| 495 | error = -EOVERFLOW; | 497 | error = -EOVERFLOW; |
| 496 | goto fail_gunlock; | 498 | goto fail_gunlock; |
| 497 | } | 499 | } |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9adf8f924e08..87778857f099 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -441,6 +441,8 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) | |||
| 441 | else | 441 | else |
| 442 | gfs2_glock_put_nolock(gl); | 442 | gfs2_glock_put_nolock(gl); |
| 443 | } | 443 | } |
| 444 | if (held1 && held2 && list_empty(&gl->gl_holders)) | ||
| 445 | clear_bit(GLF_QUEUED, &gl->gl_flags); | ||
| 444 | 446 | ||
| 445 | gl->gl_state = new_state; | 447 | gl->gl_state = new_state; |
| 446 | gl->gl_tchange = jiffies; | 448 | gl->gl_tchange = jiffies; |
| @@ -1012,6 +1014,7 @@ fail: | |||
| 1012 | if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) | 1014 | if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) |
| 1013 | insert_pt = &gh2->gh_list; | 1015 | insert_pt = &gh2->gh_list; |
| 1014 | } | 1016 | } |
| 1017 | set_bit(GLF_QUEUED, &gl->gl_flags); | ||
| 1015 | if (likely(insert_pt == NULL)) { | 1018 | if (likely(insert_pt == NULL)) { |
| 1016 | list_add_tail(&gh->gh_list, &gl->gl_holders); | 1019 | list_add_tail(&gh->gh_list, &gl->gl_holders); |
| 1017 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) | 1020 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) |
| @@ -1310,10 +1313,12 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) | |||
| 1310 | 1313 | ||
| 1311 | gfs2_glock_hold(gl); | 1314 | gfs2_glock_hold(gl); |
| 1312 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | 1315 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; |
| 1313 | if (time_before(now, holdtime)) | 1316 | if (test_bit(GLF_QUEUED, &gl->gl_flags)) { |
| 1314 | delay = holdtime - now; | 1317 | if (time_before(now, holdtime)) |
| 1315 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | 1318 | delay = holdtime - now; |
| 1316 | delay = gl->gl_ops->go_min_hold_time; | 1319 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) |
| 1320 | delay = gl->gl_ops->go_min_hold_time; | ||
| 1321 | } | ||
| 1317 | 1322 | ||
| 1318 | spin_lock(&gl->gl_spin); | 1323 | spin_lock(&gl->gl_spin); |
| 1319 | handle_callback(gl, state, delay); | 1324 | handle_callback(gl, state, delay); |
| @@ -1512,7 +1517,7 @@ static void clear_glock(struct gfs2_glock *gl) | |||
| 1512 | spin_unlock(&lru_lock); | 1517 | spin_unlock(&lru_lock); |
| 1513 | 1518 | ||
| 1514 | spin_lock(&gl->gl_spin); | 1519 | spin_lock(&gl->gl_spin); |
| 1515 | if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED) | 1520 | if (gl->gl_state != LM_ST_UNLOCKED) |
| 1516 | handle_callback(gl, LM_ST_UNLOCKED, 0); | 1521 | handle_callback(gl, LM_ST_UNLOCKED, 0); |
| 1517 | spin_unlock(&gl->gl_spin); | 1522 | spin_unlock(&gl->gl_spin); |
| 1518 | gfs2_glock_hold(gl); | 1523 | gfs2_glock_hold(gl); |
| @@ -1660,6 +1665,8 @@ static const char *gflags2str(char *buf, const unsigned long *gflags) | |||
| 1660 | *p++ = 'I'; | 1665 | *p++ = 'I'; |
| 1661 | if (test_bit(GLF_FROZEN, gflags)) | 1666 | if (test_bit(GLF_FROZEN, gflags)) |
| 1662 | *p++ = 'F'; | 1667 | *p++ = 'F'; |
| 1668 | if (test_bit(GLF_QUEUED, gflags)) | ||
| 1669 | *p++ = 'q'; | ||
| 1663 | *p = 0; | 1670 | *p = 0; |
| 1664 | return buf; | 1671 | return buf; |
| 1665 | } | 1672 | } |
| @@ -1776,10 +1783,12 @@ int __init gfs2_glock_init(void) | |||
| 1776 | } | 1783 | } |
| 1777 | #endif | 1784 | #endif |
| 1778 | 1785 | ||
| 1779 | glock_workqueue = create_workqueue("glock_workqueue"); | 1786 | glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER | |
| 1787 | WQ_HIGHPRI | WQ_FREEZEABLE, 0); | ||
| 1780 | if (IS_ERR(glock_workqueue)) | 1788 | if (IS_ERR(glock_workqueue)) |
| 1781 | return PTR_ERR(glock_workqueue); | 1789 | return PTR_ERR(glock_workqueue); |
| 1782 | gfs2_delete_workqueue = create_workqueue("delete_workqueue"); | 1790 | gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER | |
| 1791 | WQ_FREEZEABLE, 0); | ||
| 1783 | if (IS_ERR(gfs2_delete_workqueue)) { | 1792 | if (IS_ERR(gfs2_delete_workqueue)) { |
| 1784 | destroy_workqueue(glock_workqueue); | 1793 | destroy_workqueue(glock_workqueue); |
| 1785 | return PTR_ERR(gfs2_delete_workqueue); | 1794 | return PTR_ERR(gfs2_delete_workqueue); |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2bda1911b156..db1c26d6d220 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
| @@ -215,7 +215,7 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); | |||
| 215 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); | 215 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); |
| 216 | 216 | ||
| 217 | /** | 217 | /** |
| 218 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock | 218 | * gfs2_glock_nq_init - initialize a holder and enqueue it on a glock |
| 219 | * @gl: the glock | 219 | * @gl: the glock |
| 220 | * @state: the state we're requesting | 220 | * @state: the state we're requesting |
| 221 | * @flags: the modifier flags | 221 | * @flags: the modifier flags |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 49f97d3bb690..0d149dcc04e5 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
| @@ -262,13 +262,12 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | |||
| 262 | const struct gfs2_inode *ip = gl->gl_object; | 262 | const struct gfs2_inode *ip = gl->gl_object; |
| 263 | if (ip == NULL) | 263 | if (ip == NULL) |
| 264 | return 0; | 264 | return 0; |
| 265 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu/%llu\n", | 265 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", |
| 266 | (unsigned long long)ip->i_no_formal_ino, | 266 | (unsigned long long)ip->i_no_formal_ino, |
| 267 | (unsigned long long)ip->i_no_addr, | 267 | (unsigned long long)ip->i_no_addr, |
| 268 | IF2DT(ip->i_inode.i_mode), ip->i_flags, | 268 | IF2DT(ip->i_inode.i_mode), ip->i_flags, |
| 269 | (unsigned int)ip->i_diskflags, | 269 | (unsigned int)ip->i_diskflags, |
| 270 | (unsigned long long)ip->i_inode.i_size, | 270 | (unsigned long long)i_size_read(&ip->i_inode)); |
| 271 | (unsigned long long)ip->i_disksize); | ||
| 272 | return 0; | 271 | return 0; |
| 273 | } | 272 | } |
| 274 | 273 | ||
| @@ -453,7 +452,6 @@ const struct gfs2_glock_operations *gfs2_glops_list[] = { | |||
| 453 | [LM_TYPE_META] = &gfs2_meta_glops, | 452 | [LM_TYPE_META] = &gfs2_meta_glops, |
| 454 | [LM_TYPE_INODE] = &gfs2_inode_glops, | 453 | [LM_TYPE_INODE] = &gfs2_inode_glops, |
| 455 | [LM_TYPE_RGRP] = &gfs2_rgrp_glops, | 454 | [LM_TYPE_RGRP] = &gfs2_rgrp_glops, |
| 456 | [LM_TYPE_NONDISK] = &gfs2_trans_glops, | ||
| 457 | [LM_TYPE_IOPEN] = &gfs2_iopen_glops, | 455 | [LM_TYPE_IOPEN] = &gfs2_iopen_glops, |
| 458 | [LM_TYPE_FLOCK] = &gfs2_flock_glops, | 456 | [LM_TYPE_FLOCK] = &gfs2_flock_glops, |
| 459 | [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, | 457 | [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index fdbf4b366fa5..764fbb49efc8 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
| @@ -196,6 +196,7 @@ enum { | |||
| 196 | GLF_REPLY_PENDING = 9, | 196 | GLF_REPLY_PENDING = 9, |
| 197 | GLF_INITIAL = 10, | 197 | GLF_INITIAL = 10, |
| 198 | GLF_FROZEN = 11, | 198 | GLF_FROZEN = 11, |
| 199 | GLF_QUEUED = 12, | ||
| 199 | }; | 200 | }; |
| 200 | 201 | ||
| 201 | struct gfs2_glock { | 202 | struct gfs2_glock { |
| @@ -267,7 +268,6 @@ struct gfs2_inode { | |||
| 267 | u64 i_no_formal_ino; | 268 | u64 i_no_formal_ino; |
| 268 | u64 i_generation; | 269 | u64 i_generation; |
| 269 | u64 i_eattr; | 270 | u64 i_eattr; |
| 270 | loff_t i_disksize; | ||
| 271 | unsigned long i_flags; /* GIF_... */ | 271 | unsigned long i_flags; /* GIF_... */ |
| 272 | struct gfs2_glock *i_gl; /* Move into i_gh? */ | 272 | struct gfs2_glock *i_gl; /* Move into i_gh? */ |
| 273 | struct gfs2_holder i_iopen_gh; | 273 | struct gfs2_holder i_iopen_gh; |
| @@ -416,11 +416,8 @@ struct gfs2_args { | |||
| 416 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ | 416 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ |
| 417 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ | 417 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ |
| 418 | unsigned int ar_spectator:1; /* Don't get a journal */ | 418 | unsigned int ar_spectator:1; /* Don't get a journal */ |
| 419 | unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */ | ||
| 420 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ | 419 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ |
| 421 | unsigned int ar_localcaching:1; /* Local caching */ | ||
| 422 | unsigned int ar_debug:1; /* Oops on errors */ | 420 | unsigned int ar_debug:1; /* Oops on errors */ |
| 423 | unsigned int ar_upgrade:1; /* Upgrade ondisk format */ | ||
| 424 | unsigned int ar_posix_acl:1; /* Enable posix acls */ | 421 | unsigned int ar_posix_acl:1; /* Enable posix acls */ |
| 425 | unsigned int ar_quota:2; /* off/account/on */ | 422 | unsigned int ar_quota:2; /* off/account/on */ |
| 426 | unsigned int ar_suiddir:1; /* suiddir support */ | 423 | unsigned int ar_suiddir:1; /* suiddir support */ |
| @@ -497,7 +494,7 @@ struct gfs2_sb_host { | |||
| 497 | */ | 494 | */ |
| 498 | 495 | ||
| 499 | struct lm_lockstruct { | 496 | struct lm_lockstruct { |
| 500 | unsigned int ls_jid; | 497 | int ls_jid; |
| 501 | unsigned int ls_first; | 498 | unsigned int ls_first; |
| 502 | unsigned int ls_first_done; | 499 | unsigned int ls_first_done; |
| 503 | unsigned int ls_nodir; | 500 | unsigned int ls_nodir; |
| @@ -572,6 +569,7 @@ struct gfs2_sbd { | |||
| 572 | struct list_head sd_rindex_mru_list; | 569 | struct list_head sd_rindex_mru_list; |
| 573 | struct gfs2_rgrpd *sd_rindex_forward; | 570 | struct gfs2_rgrpd *sd_rindex_forward; |
| 574 | unsigned int sd_rgrps; | 571 | unsigned int sd_rgrps; |
| 572 | unsigned int sd_max_rg_data; | ||
| 575 | 573 | ||
| 576 | /* Journal index stuff */ | 574 | /* Journal index stuff */ |
| 577 | 575 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 08140f185a37..06370f8bd8cf 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
| @@ -359,8 +359,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
| 359 | * to do that. | 359 | * to do that. |
| 360 | */ | 360 | */ |
| 361 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); | 361 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); |
| 362 | ip->i_disksize = be64_to_cpu(str->di_size); | 362 | i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); |
| 363 | i_size_write(&ip->i_inode, ip->i_disksize); | ||
| 364 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); | 363 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); |
| 365 | atime.tv_sec = be64_to_cpu(str->di_atime); | 364 | atime.tv_sec = be64_to_cpu(str->di_atime); |
| 366 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); | 365 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
| @@ -1055,7 +1054,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) | |||
| 1055 | str->di_uid = cpu_to_be32(ip->i_inode.i_uid); | 1054 | str->di_uid = cpu_to_be32(ip->i_inode.i_uid); |
| 1056 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); | 1055 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); |
| 1057 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); | 1056 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); |
| 1058 | str->di_size = cpu_to_be64(ip->i_disksize); | 1057 | str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); |
| 1059 | str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); | 1058 | str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
| 1060 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); | 1059 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); |
| 1061 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); | 1060 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); |
| @@ -1085,8 +1084,8 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) | |||
| 1085 | (unsigned long long)ip->i_no_formal_ino); | 1084 | (unsigned long long)ip->i_no_formal_ino); |
| 1086 | printk(KERN_INFO " no_addr = %llu\n", | 1085 | printk(KERN_INFO " no_addr = %llu\n", |
| 1087 | (unsigned long long)ip->i_no_addr); | 1086 | (unsigned long long)ip->i_no_addr); |
| 1088 | printk(KERN_INFO " i_disksize = %llu\n", | 1087 | printk(KERN_INFO " i_size = %llu\n", |
| 1089 | (unsigned long long)ip->i_disksize); | 1088 | (unsigned long long)i_size_read(&ip->i_inode)); |
| 1090 | printk(KERN_INFO " blocks = %llu\n", | 1089 | printk(KERN_INFO " blocks = %llu\n", |
| 1091 | (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); | 1090 | (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); |
| 1092 | printk(KERN_INFO " i_goal = %llu\n", | 1091 | printk(KERN_INFO " i_goal = %llu\n", |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 300ada3f21de..6720d7d5fbc6 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
| @@ -19,6 +19,8 @@ extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); | |||
| 19 | extern int gfs2_internal_read(struct gfs2_inode *ip, | 19 | extern int gfs2_internal_read(struct gfs2_inode *ip, |
| 20 | struct file_ra_state *ra_state, | 20 | struct file_ra_state *ra_state, |
| 21 | char *buf, loff_t *pos, unsigned size); | 21 | char *buf, loff_t *pos, unsigned size); |
| 22 | extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | ||
| 23 | unsigned int from, unsigned int to); | ||
| 22 | extern void gfs2_set_aops(struct inode *inode); | 24 | extern void gfs2_set_aops(struct inode *inode); |
| 23 | 25 | ||
| 24 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) | 26 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) |
| @@ -80,6 +82,19 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, | |||
| 80 | dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); | 82 | dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); |
| 81 | } | 83 | } |
| 82 | 84 | ||
| 85 | static inline int gfs2_check_internal_file_size(struct inode *inode, | ||
| 86 | u64 minsize, u64 maxsize) | ||
| 87 | { | ||
| 88 | u64 size = i_size_read(inode); | ||
| 89 | if (size < minsize || size > maxsize) | ||
| 90 | goto err; | ||
| 91 | if (size & ((1 << inode->i_blkbits) - 1)) | ||
| 92 | goto err; | ||
| 93 | return 0; | ||
| 94 | err: | ||
| 95 | gfs2_consist_inode(GFS2_I(inode)); | ||
| 96 | return -EIO; | ||
| 97 | } | ||
| 83 | 98 | ||
| 84 | extern void gfs2_set_iop(struct inode *inode); | 99 | extern void gfs2_set_iop(struct inode *inode); |
| 85 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 100 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 0e0470ed34c2..1c09425b45fd 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
| @@ -42,9 +42,9 @@ static void gdlm_ast(void *arg) | |||
| 42 | ret |= LM_OUT_CANCELED; | 42 | ret |= LM_OUT_CANCELED; |
| 43 | goto out; | 43 | goto out; |
| 44 | case -EAGAIN: /* Try lock fails */ | 44 | case -EAGAIN: /* Try lock fails */ |
| 45 | case -EDEADLK: /* Deadlock detected */ | ||
| 45 | goto out; | 46 | goto out; |
| 46 | case -EINVAL: /* Invalid */ | 47 | case -ETIMEDOUT: /* Canceled due to timeout */ |
| 47 | case -ENOMEM: /* Out of memory */ | ||
| 48 | ret |= LM_OUT_ERROR; | 48 | ret |= LM_OUT_ERROR; |
| 49 | goto out; | 49 | goto out; |
| 50 | case 0: /* Success */ | 50 | case 0: /* Success */ |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index b1e9630eb46a..d7eb1e209aa8 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "glock.h" | 24 | #include "glock.h" |
| 25 | #include "quota.h" | 25 | #include "quota.h" |
| 26 | #include "recovery.h" | 26 | #include "recovery.h" |
| 27 | #include "dir.h" | ||
| 27 | 28 | ||
| 28 | static struct shrinker qd_shrinker = { | 29 | static struct shrinker qd_shrinker = { |
| 29 | .shrink = gfs2_shrink_qd_memory, | 30 | .shrink = gfs2_shrink_qd_memory, |
| @@ -78,6 +79,9 @@ static int __init init_gfs2_fs(void) | |||
| 78 | { | 79 | { |
| 79 | int error; | 80 | int error; |
| 80 | 81 | ||
| 82 | gfs2_str2qstr(&gfs2_qdot, "."); | ||
| 83 | gfs2_str2qstr(&gfs2_qdotdot, ".."); | ||
| 84 | |||
| 81 | error = gfs2_sys_init(); | 85 | error = gfs2_sys_init(); |
| 82 | if (error) | 86 | if (error) |
| 83 | return error; | 87 | return error; |
| @@ -140,7 +144,7 @@ static int __init init_gfs2_fs(void) | |||
| 140 | 144 | ||
| 141 | error = -ENOMEM; | 145 | error = -ENOMEM; |
| 142 | gfs_recovery_wq = alloc_workqueue("gfs_recovery", | 146 | gfs_recovery_wq = alloc_workqueue("gfs_recovery", |
| 143 | WQ_NON_REENTRANT | WQ_RESCUER, 0); | 147 | WQ_RESCUER | WQ_FREEZEABLE, 0); |
| 144 | if (!gfs_recovery_wq) | 148 | if (!gfs_recovery_wq) |
| 145 | goto fail_wq; | 149 | goto fail_wq; |
| 146 | 150 | ||
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 4d4b1e8ac64c..aeafc233dc89 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
| @@ -38,14 +38,6 @@ | |||
| 38 | #define DO 0 | 38 | #define DO 0 |
| 39 | #define UNDO 1 | 39 | #define UNDO 1 |
| 40 | 40 | ||
| 41 | static const u32 gfs2_old_fs_formats[] = { | ||
| 42 | 0 | ||
| 43 | }; | ||
| 44 | |||
| 45 | static const u32 gfs2_old_multihost_formats[] = { | ||
| 46 | 0 | ||
| 47 | }; | ||
| 48 | |||
| 49 | /** | 41 | /** |
| 50 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | 42 | * gfs2_tune_init - Fill a gfs2_tune structure with default values |
| 51 | * @gt: tune | 43 | * @gt: tune |
| @@ -135,8 +127,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
| 135 | 127 | ||
| 136 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | 128 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) |
| 137 | { | 129 | { |
| 138 | unsigned int x; | ||
| 139 | |||
| 140 | if (sb->sb_magic != GFS2_MAGIC || | 130 | if (sb->sb_magic != GFS2_MAGIC || |
| 141 | sb->sb_type != GFS2_METATYPE_SB) { | 131 | sb->sb_type != GFS2_METATYPE_SB) { |
| 142 | if (!silent) | 132 | if (!silent) |
| @@ -150,55 +140,9 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int sile | |||
| 150 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | 140 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) |
| 151 | return 0; | 141 | return 0; |
| 152 | 142 | ||
| 153 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | 143 | fs_warn(sdp, "Unknown on-disk format, unable to mount\n"); |
| 154 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
| 155 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
| 156 | break; | ||
| 157 | 144 | ||
| 158 | if (!gfs2_old_fs_formats[x]) { | 145 | return -EINVAL; |
| 159 | printk(KERN_WARNING | ||
| 160 | "GFS2: code version (%u, %u) is incompatible " | ||
| 161 | "with ondisk format (%u, %u)\n", | ||
| 162 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 163 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 164 | printk(KERN_WARNING | ||
| 165 | "GFS2: I don't know how to upgrade this FS\n"); | ||
| 166 | return -EINVAL; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
| 171 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
| 172 | if (gfs2_old_multihost_formats[x] == | ||
| 173 | sb->sb_multihost_format) | ||
| 174 | break; | ||
| 175 | |||
| 176 | if (!gfs2_old_multihost_formats[x]) { | ||
| 177 | printk(KERN_WARNING | ||
| 178 | "GFS2: code version (%u, %u) is incompatible " | ||
| 179 | "with ondisk format (%u, %u)\n", | ||
| 180 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 181 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 182 | printk(KERN_WARNING | ||
| 183 | "GFS2: I don't know how to upgrade this FS\n"); | ||
| 184 | return -EINVAL; | ||
| 185 | } | ||
| 186 | } | ||
| 187 | |||
| 188 | if (!sdp->sd_args.ar_upgrade) { | ||
| 189 | printk(KERN_WARNING | ||
| 190 | "GFS2: code version (%u, %u) is incompatible " | ||
| 191 | "with ondisk format (%u, %u)\n", | ||
| 192 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 193 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 194 | printk(KERN_INFO | ||
| 195 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
| 196 | "the FS\n"); | ||
| 197 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
| 198 | return -EINVAL; | ||
| 199 | } | ||
| 200 | |||
| 201 | return 0; | ||
| 202 | } | 146 | } |
| 203 | 147 | ||
| 204 | static void end_bio_io_page(struct bio *bio, int error) | 148 | static void end_bio_io_page(struct bio *bio, int error) |
| @@ -586,7 +530,7 @@ static int map_journal_extents(struct gfs2_sbd *sdp) | |||
| 586 | 530 | ||
| 587 | prev_db = 0; | 531 | prev_db = 0; |
| 588 | 532 | ||
| 589 | for (lb = 0; lb < ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; lb++) { | 533 | for (lb = 0; lb < i_size_read(jd->jd_inode) >> sdp->sd_sb.sb_bsize_shift; lb++) { |
| 590 | bh.b_state = 0; | 534 | bh.b_state = 0; |
| 591 | bh.b_blocknr = 0; | 535 | bh.b_blocknr = 0; |
| 592 | bh.b_size = 1 << ip->i_inode.i_blkbits; | 536 | bh.b_size = 1 << ip->i_inode.i_blkbits; |
| @@ -1022,7 +966,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | |||
| 1022 | if (!strcmp("lock_nolock", proto)) { | 966 | if (!strcmp("lock_nolock", proto)) { |
| 1023 | lm = &nolock_ops; | 967 | lm = &nolock_ops; |
| 1024 | sdp->sd_args.ar_localflocks = 1; | 968 | sdp->sd_args.ar_localflocks = 1; |
| 1025 | sdp->sd_args.ar_localcaching = 1; | ||
| 1026 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM | 969 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM |
| 1027 | } else if (!strcmp("lock_dlm", proto)) { | 970 | } else if (!strcmp("lock_dlm", proto)) { |
| 1028 | lm = &gfs2_dlm_ops; | 971 | lm = &gfs2_dlm_ops; |
| @@ -1113,8 +1056,6 @@ static int gfs2_journalid_wait(void *word) | |||
| 1113 | 1056 | ||
| 1114 | static int wait_on_journal(struct gfs2_sbd *sdp) | 1057 | static int wait_on_journal(struct gfs2_sbd *sdp) |
| 1115 | { | 1058 | { |
| 1116 | if (sdp->sd_args.ar_spectator) | ||
| 1117 | return 0; | ||
| 1118 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) | 1059 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
| 1119 | return 0; | 1060 | return 0; |
| 1120 | 1061 | ||
| @@ -1217,6 +1158,20 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
| 1217 | if (error) | 1158 | if (error) |
| 1218 | goto fail_sb; | 1159 | goto fail_sb; |
| 1219 | 1160 | ||
| 1161 | /* | ||
| 1162 | * If user space has failed to join the cluster or some similar | ||
| 1163 | * failure has occurred, then the journal id will contain a | ||
| 1164 | * negative (error) number. This will then be returned to the | ||
| 1165 | * caller (of the mount syscall). We do this even for spectator | ||
| 1166 | * mounts (which just write a jid of 0 to indicate "ok" even though | ||
| 1167 | * the jid is unused in the spectator case) | ||
| 1168 | */ | ||
| 1169 | if (sdp->sd_lockstruct.ls_jid < 0) { | ||
| 1170 | error = sdp->sd_lockstruct.ls_jid; | ||
| 1171 | sdp->sd_lockstruct.ls_jid = 0; | ||
| 1172 | goto fail_sb; | ||
| 1173 | } | ||
| 1174 | |||
| 1220 | error = init_inodes(sdp, DO); | 1175 | error = init_inodes(sdp, DO); |
| 1221 | if (error) | 1176 | if (error) |
| 1222 | goto fail_sb; | 1177 | goto fail_sb; |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 1009be2c9737..0534510200d5 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
| @@ -18,6 +18,8 @@ | |||
| 18 | #include <linux/gfs2_ondisk.h> | 18 | #include <linux/gfs2_ondisk.h> |
| 19 | #include <linux/crc32.h> | 19 | #include <linux/crc32.h> |
| 20 | #include <linux/fiemap.h> | 20 | #include <linux/fiemap.h> |
| 21 | #include <linux/swap.h> | ||
| 22 | #include <linux/falloc.h> | ||
| 21 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
| 22 | 24 | ||
| 23 | #include "gfs2.h" | 25 | #include "gfs2.h" |
| @@ -217,7 +219,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
| 217 | goto out_gunlock_q; | 219 | goto out_gunlock_q; |
| 218 | 220 | ||
| 219 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 221 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
| 220 | al->al_rgd->rd_length + | 222 | gfs2_rg_blocks(al) + |
| 221 | 2 * RES_DINODE + RES_STATFS + | 223 | 2 * RES_DINODE + RES_STATFS + |
| 222 | RES_QUOTA, 0); | 224 | RES_QUOTA, 0); |
| 223 | if (error) | 225 | if (error) |
| @@ -406,7 +408,6 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | |||
| 406 | 408 | ||
| 407 | ip = ghs[1].gh_gl->gl_object; | 409 | ip = ghs[1].gh_gl->gl_object; |
| 408 | 410 | ||
| 409 | ip->i_disksize = size; | ||
| 410 | i_size_write(inode, size); | 411 | i_size_write(inode, size); |
| 411 | 412 | ||
| 412 | error = gfs2_meta_inode_buffer(ip, &dibh); | 413 | error = gfs2_meta_inode_buffer(ip, &dibh); |
| @@ -461,7 +462,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 461 | ip = ghs[1].gh_gl->gl_object; | 462 | ip = ghs[1].gh_gl->gl_object; |
| 462 | 463 | ||
| 463 | ip->i_inode.i_nlink = 2; | 464 | ip->i_inode.i_nlink = 2; |
| 464 | ip->i_disksize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); | 465 | i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); |
| 465 | ip->i_diskflags |= GFS2_DIF_JDATA; | 466 | ip->i_diskflags |= GFS2_DIF_JDATA; |
| 466 | ip->i_entries = 2; | 467 | ip->i_entries = 2; |
| 467 | 468 | ||
| @@ -470,18 +471,15 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 470 | if (!gfs2_assert_withdraw(sdp, !error)) { | 471 | if (!gfs2_assert_withdraw(sdp, !error)) { |
| 471 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; | 472 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; |
| 472 | struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); | 473 | struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); |
| 473 | struct qstr str; | ||
| 474 | 474 | ||
| 475 | gfs2_str2qstr(&str, "."); | ||
| 476 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 475 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
| 477 | gfs2_qstr2dirent(&str, GFS2_DIRENT_SIZE(str.len), dent); | 476 | gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent); |
| 478 | dent->de_inum = di->di_num; /* already GFS2 endian */ | 477 | dent->de_inum = di->di_num; /* already GFS2 endian */ |
| 479 | dent->de_type = cpu_to_be16(DT_DIR); | 478 | dent->de_type = cpu_to_be16(DT_DIR); |
| 480 | di->di_entries = cpu_to_be32(1); | 479 | di->di_entries = cpu_to_be32(1); |
| 481 | 480 | ||
| 482 | gfs2_str2qstr(&str, ".."); | ||
| 483 | dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); | 481 | dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); |
| 484 | gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); | 482 | gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); |
| 485 | 483 | ||
| 486 | gfs2_inum_out(dip, dent); | 484 | gfs2_inum_out(dip, dent); |
| 487 | dent->de_type = cpu_to_be16(DT_DIR); | 485 | dent->de_type = cpu_to_be16(DT_DIR); |
| @@ -522,7 +520,6 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 522 | static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | 520 | static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, |
| 523 | struct gfs2_inode *ip) | 521 | struct gfs2_inode *ip) |
| 524 | { | 522 | { |
| 525 | struct qstr dotname; | ||
| 526 | int error; | 523 | int error; |
| 527 | 524 | ||
| 528 | if (ip->i_entries != 2) { | 525 | if (ip->i_entries != 2) { |
| @@ -539,13 +536,11 @@ static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | |||
| 539 | if (error) | 536 | if (error) |
| 540 | return error; | 537 | return error; |
| 541 | 538 | ||
| 542 | gfs2_str2qstr(&dotname, "."); | 539 | error = gfs2_dir_del(ip, &gfs2_qdot); |
| 543 | error = gfs2_dir_del(ip, &dotname); | ||
| 544 | if (error) | 540 | if (error) |
| 545 | return error; | 541 | return error; |
| 546 | 542 | ||
| 547 | gfs2_str2qstr(&dotname, ".."); | 543 | error = gfs2_dir_del(ip, &gfs2_qdotdot); |
| 548 | error = gfs2_dir_del(ip, &dotname); | ||
| 549 | if (error) | 544 | if (error) |
| 550 | return error; | 545 | return error; |
| 551 | 546 | ||
| @@ -694,11 +689,8 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | |||
| 694 | struct inode *dir = &to->i_inode; | 689 | struct inode *dir = &to->i_inode; |
| 695 | struct super_block *sb = dir->i_sb; | 690 | struct super_block *sb = dir->i_sb; |
| 696 | struct inode *tmp; | 691 | struct inode *tmp; |
| 697 | struct qstr dotdot; | ||
| 698 | int error = 0; | 692 | int error = 0; |
| 699 | 693 | ||
| 700 | gfs2_str2qstr(&dotdot, ".."); | ||
| 701 | |||
| 702 | igrab(dir); | 694 | igrab(dir); |
| 703 | 695 | ||
| 704 | for (;;) { | 696 | for (;;) { |
| @@ -711,7 +703,7 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | |||
| 711 | break; | 703 | break; |
| 712 | } | 704 | } |
| 713 | 705 | ||
| 714 | tmp = gfs2_lookupi(dir, &dotdot, 1); | 706 | tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); |
| 715 | if (IS_ERR(tmp)) { | 707 | if (IS_ERR(tmp)) { |
| 716 | error = PTR_ERR(tmp); | 708 | error = PTR_ERR(tmp); |
| 717 | break; | 709 | break; |
| @@ -744,7 +736,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 744 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 736 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
| 745 | struct gfs2_inode *nip = NULL; | 737 | struct gfs2_inode *nip = NULL; |
| 746 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 738 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
| 747 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; | 739 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh; |
| 748 | struct gfs2_rgrpd *nrgd; | 740 | struct gfs2_rgrpd *nrgd; |
| 749 | unsigned int num_gh; | 741 | unsigned int num_gh; |
| 750 | int dir_rename = 0; | 742 | int dir_rename = 0; |
| @@ -758,6 +750,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 758 | return 0; | 750 | return 0; |
| 759 | } | 751 | } |
| 760 | 752 | ||
| 753 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
| 754 | if (error) | ||
| 755 | return error; | ||
| 761 | 756 | ||
| 762 | if (odip != ndip) { | 757 | if (odip != ndip) { |
| 763 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, | 758 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, |
| @@ -887,12 +882,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 887 | 882 | ||
| 888 | al->al_requested = sdp->sd_max_dirres; | 883 | al->al_requested = sdp->sd_max_dirres; |
| 889 | 884 | ||
| 890 | error = gfs2_inplace_reserve(ndip); | 885 | error = gfs2_inplace_reserve_ri(ndip); |
| 891 | if (error) | 886 | if (error) |
| 892 | goto out_gunlock_q; | 887 | goto out_gunlock_q; |
| 893 | 888 | ||
| 894 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 889 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
| 895 | al->al_rgd->rd_length + | 890 | gfs2_rg_blocks(al) + |
| 896 | 4 * RES_DINODE + 4 * RES_LEAF + | 891 | 4 * RES_DINODE + 4 * RES_LEAF + |
| 897 | RES_STATFS + RES_QUOTA + 4, 0); | 892 | RES_STATFS + RES_QUOTA + 4, 0); |
| 898 | if (error) | 893 | if (error) |
| @@ -920,9 +915,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 920 | } | 915 | } |
| 921 | 916 | ||
| 922 | if (dir_rename) { | 917 | if (dir_rename) { |
| 923 | struct qstr name; | ||
| 924 | gfs2_str2qstr(&name, ".."); | ||
| 925 | |||
| 926 | error = gfs2_change_nlink(ndip, +1); | 918 | error = gfs2_change_nlink(ndip, +1); |
| 927 | if (error) | 919 | if (error) |
| 928 | goto out_end_trans; | 920 | goto out_end_trans; |
| @@ -930,7 +922,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 930 | if (error) | 922 | if (error) |
| 931 | goto out_end_trans; | 923 | goto out_end_trans; |
| 932 | 924 | ||
| 933 | error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR); | 925 | error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); |
| 934 | if (error) | 926 | if (error) |
| 935 | goto out_end_trans; | 927 | goto out_end_trans; |
| 936 | } else { | 928 | } else { |
| @@ -972,6 +964,7 @@ out_gunlock_r: | |||
| 972 | if (r_gh.gh_gl) | 964 | if (r_gh.gh_gl) |
| 973 | gfs2_glock_dq_uninit(&r_gh); | 965 | gfs2_glock_dq_uninit(&r_gh); |
| 974 | out: | 966 | out: |
| 967 | gfs2_glock_dq_uninit(&ri_gh); | ||
| 975 | return error; | 968 | return error; |
| 976 | } | 969 | } |
| 977 | 970 | ||
| @@ -990,7 +983,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
| 990 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | 983 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); |
| 991 | struct gfs2_holder i_gh; | 984 | struct gfs2_holder i_gh; |
| 992 | struct buffer_head *dibh; | 985 | struct buffer_head *dibh; |
| 993 | unsigned int x; | 986 | unsigned int x, size; |
| 994 | char *buf; | 987 | char *buf; |
| 995 | int error; | 988 | int error; |
| 996 | 989 | ||
| @@ -1002,7 +995,8 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
| 1002 | return NULL; | 995 | return NULL; |
| 1003 | } | 996 | } |
| 1004 | 997 | ||
| 1005 | if (!ip->i_disksize) { | 998 | size = (unsigned int)i_size_read(&ip->i_inode); |
| 999 | if (size == 0) { | ||
| 1006 | gfs2_consist_inode(ip); | 1000 | gfs2_consist_inode(ip); |
| 1007 | buf = ERR_PTR(-EIO); | 1001 | buf = ERR_PTR(-EIO); |
| 1008 | goto out; | 1002 | goto out; |
| @@ -1014,7 +1008,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
| 1014 | goto out; | 1008 | goto out; |
| 1015 | } | 1009 | } |
| 1016 | 1010 | ||
| 1017 | x = ip->i_disksize + 1; | 1011 | x = size + 1; |
| 1018 | buf = kmalloc(x, GFP_NOFS); | 1012 | buf = kmalloc(x, GFP_NOFS); |
| 1019 | if (!buf) | 1013 | if (!buf) |
| 1020 | buf = ERR_PTR(-ENOMEM); | 1014 | buf = ERR_PTR(-ENOMEM); |
| @@ -1071,30 +1065,6 @@ int gfs2_permission(struct inode *inode, int mask) | |||
| 1071 | return error; | 1065 | return error; |
| 1072 | } | 1066 | } |
| 1073 | 1067 | ||
| 1074 | /* | ||
| 1075 | * XXX(truncate): the truncate_setsize calls should be moved to the end. | ||
| 1076 | */ | ||
| 1077 | static int setattr_size(struct inode *inode, struct iattr *attr) | ||
| 1078 | { | ||
| 1079 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1080 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 1081 | int error; | ||
| 1082 | |||
| 1083 | if (attr->ia_size != ip->i_disksize) { | ||
| 1084 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); | ||
| 1085 | if (error) | ||
| 1086 | return error; | ||
| 1087 | truncate_setsize(inode, attr->ia_size); | ||
| 1088 | gfs2_trans_end(sdp); | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | error = gfs2_truncatei(ip, attr->ia_size); | ||
| 1092 | if (error && (inode->i_size != ip->i_disksize)) | ||
| 1093 | i_size_write(inode, ip->i_disksize); | ||
| 1094 | |||
| 1095 | return error; | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | static int setattr_chown(struct inode *inode, struct iattr *attr) | 1068 | static int setattr_chown(struct inode *inode, struct iattr *attr) |
| 1099 | { | 1069 | { |
| 1100 | struct gfs2_inode *ip = GFS2_I(inode); | 1070 | struct gfs2_inode *ip = GFS2_I(inode); |
| @@ -1195,7 +1165,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1195 | goto out; | 1165 | goto out; |
| 1196 | 1166 | ||
| 1197 | if (attr->ia_valid & ATTR_SIZE) | 1167 | if (attr->ia_valid & ATTR_SIZE) |
| 1198 | error = setattr_size(inode, attr); | 1168 | error = gfs2_setattr_size(inode, attr->ia_size); |
| 1199 | else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) | 1169 | else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) |
| 1200 | error = setattr_chown(inode, attr); | 1170 | error = setattr_chown(inode, attr); |
| 1201 | else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) | 1171 | else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) |
| @@ -1301,6 +1271,257 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) | |||
| 1301 | return ret; | 1271 | return ret; |
| 1302 | } | 1272 | } |
| 1303 | 1273 | ||
| 1274 | static void empty_write_end(struct page *page, unsigned from, | ||
| 1275 | unsigned to) | ||
| 1276 | { | ||
| 1277 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
| 1278 | |||
| 1279 | page_zero_new_buffers(page, from, to); | ||
| 1280 | flush_dcache_page(page); | ||
| 1281 | mark_page_accessed(page); | ||
| 1282 | |||
| 1283 | if (!gfs2_is_writeback(ip)) | ||
| 1284 | gfs2_page_add_databufs(ip, page, from, to); | ||
| 1285 | |||
| 1286 | block_commit_write(page, from, to); | ||
| 1287 | } | ||
| 1288 | |||
| 1289 | |||
| 1290 | static int write_empty_blocks(struct page *page, unsigned from, unsigned to) | ||
| 1291 | { | ||
| 1292 | unsigned start, end, next; | ||
| 1293 | struct buffer_head *bh, *head; | ||
| 1294 | int error; | ||
| 1295 | |||
| 1296 | if (!page_has_buffers(page)) { | ||
| 1297 | error = block_prepare_write(page, from, to, gfs2_block_map); | ||
| 1298 | if (unlikely(error)) | ||
| 1299 | return error; | ||
| 1300 | |||
| 1301 | empty_write_end(page, from, to); | ||
| 1302 | return 0; | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | bh = head = page_buffers(page); | ||
| 1306 | next = end = 0; | ||
| 1307 | while (next < from) { | ||
| 1308 | next += bh->b_size; | ||
| 1309 | bh = bh->b_this_page; | ||
| 1310 | } | ||
| 1311 | start = next; | ||
| 1312 | do { | ||
| 1313 | next += bh->b_size; | ||
| 1314 | if (buffer_mapped(bh)) { | ||
| 1315 | if (end) { | ||
| 1316 | error = block_prepare_write(page, start, end, | ||
| 1317 | gfs2_block_map); | ||
| 1318 | if (unlikely(error)) | ||
| 1319 | return error; | ||
| 1320 | empty_write_end(page, start, end); | ||
| 1321 | end = 0; | ||
| 1322 | } | ||
| 1323 | start = next; | ||
| 1324 | } | ||
| 1325 | else | ||
| 1326 | end = next; | ||
| 1327 | bh = bh->b_this_page; | ||
| 1328 | } while (next < to); | ||
| 1329 | |||
| 1330 | if (end) { | ||
| 1331 | error = block_prepare_write(page, start, end, gfs2_block_map); | ||
| 1332 | if (unlikely(error)) | ||
| 1333 | return error; | ||
| 1334 | empty_write_end(page, start, end); | ||
| 1335 | } | ||
| 1336 | |||
| 1337 | return 0; | ||
| 1338 | } | ||
| 1339 | |||
| 1340 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | ||
| 1341 | int mode) | ||
| 1342 | { | ||
| 1343 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1344 | struct buffer_head *dibh; | ||
| 1345 | int error; | ||
| 1346 | u64 start = offset >> PAGE_CACHE_SHIFT; | ||
| 1347 | unsigned int start_offset = offset & ~PAGE_CACHE_MASK; | ||
| 1348 | u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; | ||
| 1349 | pgoff_t curr; | ||
| 1350 | struct page *page; | ||
| 1351 | unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; | ||
| 1352 | unsigned int from, to; | ||
| 1353 | |||
| 1354 | if (!end_offset) | ||
| 1355 | end_offset = PAGE_CACHE_SIZE; | ||
| 1356 | |||
| 1357 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 1358 | if (unlikely(error)) | ||
| 1359 | goto out; | ||
| 1360 | |||
| 1361 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1362 | |||
| 1363 | if (gfs2_is_stuffed(ip)) { | ||
| 1364 | error = gfs2_unstuff_dinode(ip, NULL); | ||
| 1365 | if (unlikely(error)) | ||
| 1366 | goto out; | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | curr = start; | ||
| 1370 | offset = start << PAGE_CACHE_SHIFT; | ||
| 1371 | from = start_offset; | ||
| 1372 | to = PAGE_CACHE_SIZE; | ||
| 1373 | while (curr <= end) { | ||
| 1374 | page = grab_cache_page_write_begin(inode->i_mapping, curr, | ||
| 1375 | AOP_FLAG_NOFS); | ||
| 1376 | if (unlikely(!page)) { | ||
| 1377 | error = -ENOMEM; | ||
| 1378 | goto out; | ||
| 1379 | } | ||
| 1380 | |||
| 1381 | if (curr == end) | ||
| 1382 | to = end_offset; | ||
| 1383 | error = write_empty_blocks(page, from, to); | ||
| 1384 | if (!error && offset + to > inode->i_size && | ||
| 1385 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
| 1386 | i_size_write(inode, offset + to); | ||
| 1387 | } | ||
| 1388 | unlock_page(page); | ||
| 1389 | page_cache_release(page); | ||
| 1390 | if (error) | ||
| 1391 | goto out; | ||
| 1392 | curr++; | ||
| 1393 | offset += PAGE_CACHE_SIZE; | ||
| 1394 | from = 0; | ||
| 1395 | } | ||
| 1396 | |||
| 1397 | gfs2_dinode_out(ip, dibh->b_data); | ||
| 1398 | mark_inode_dirty(inode); | ||
| 1399 | |||
| 1400 | brelse(dibh); | ||
| 1401 | |||
| 1402 | out: | ||
| 1403 | return error; | ||
| 1404 | } | ||
| 1405 | |||
| 1406 | static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, | ||
| 1407 | unsigned int *data_blocks, unsigned int *ind_blocks) | ||
| 1408 | { | ||
| 1409 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1410 | unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; | ||
| 1411 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); | ||
| 1412 | |||
| 1413 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { | ||
| 1414 | tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); | ||
| 1415 | max_data -= tmp; | ||
| 1416 | } | ||
| 1417 | /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, | ||
| 1418 | so it might end up with fewer data blocks */ | ||
| 1419 | if (max_data <= *data_blocks) | ||
| 1420 | return; | ||
| 1421 | *data_blocks = max_data; | ||
| 1422 | *ind_blocks = max_blocks - max_data; | ||
| 1423 | *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; | ||
| 1424 | if (*len > max) { | ||
| 1425 | *len = max; | ||
| 1426 | gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); | ||
| 1427 | } | ||
| 1428 | } | ||
| 1429 | |||
| 1430 | static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset, | ||
| 1431 | loff_t len) | ||
| 1432 | { | ||
| 1433 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 1434 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1435 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | ||
| 1436 | loff_t bytes, max_bytes; | ||
| 1437 | struct gfs2_alloc *al; | ||
| 1438 | int error; | ||
| 1439 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; | ||
| 1440 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; | ||
| 1441 | |||
| 1442 | offset = (offset >> sdp->sd_sb.sb_bsize_shift) << | ||
| 1443 | sdp->sd_sb.sb_bsize_shift; | ||
| 1444 | |||
| 1445 | len = next - offset; | ||
| 1446 | bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; | ||
| 1447 | if (!bytes) | ||
| 1448 | bytes = UINT_MAX; | ||
| 1449 | |||
| 1450 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); | ||
| 1451 | error = gfs2_glock_nq(&ip->i_gh); | ||
| 1452 | if (unlikely(error)) | ||
| 1453 | goto out_uninit; | ||
| 1454 | |||
| 1455 | if (!gfs2_write_alloc_required(ip, offset, len)) | ||
| 1456 | goto out_unlock; | ||
| 1457 | |||
| 1458 | while (len > 0) { | ||
| 1459 | if (len < bytes) | ||
| 1460 | bytes = len; | ||
| 1461 | al = gfs2_alloc_get(ip); | ||
| 1462 | if (!al) { | ||
| 1463 | error = -ENOMEM; | ||
| 1464 | goto out_unlock; | ||
| 1465 | } | ||
| 1466 | |||
| 1467 | error = gfs2_quota_lock_check(ip); | ||
| 1468 | if (error) | ||
| 1469 | goto out_alloc_put; | ||
| 1470 | |||
| 1471 | retry: | ||
| 1472 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); | ||
| 1473 | |||
| 1474 | al->al_requested = data_blocks + ind_blocks; | ||
| 1475 | error = gfs2_inplace_reserve(ip); | ||
| 1476 | if (error) { | ||
| 1477 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { | ||
| 1478 | bytes >>= 1; | ||
| 1479 | goto retry; | ||
| 1480 | } | ||
| 1481 | goto out_qunlock; | ||
| 1482 | } | ||
| 1483 | max_bytes = bytes; | ||
| 1484 | calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); | ||
| 1485 | al->al_requested = data_blocks + ind_blocks; | ||
| 1486 | |||
| 1487 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + | ||
| 1488 | RES_RG_HDR + gfs2_rg_blocks(al); | ||
| 1489 | if (gfs2_is_jdata(ip)) | ||
| 1490 | rblocks += data_blocks ? data_blocks : 1; | ||
| 1491 | |||
| 1492 | error = gfs2_trans_begin(sdp, rblocks, | ||
| 1493 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | ||
| 1494 | if (error) | ||
| 1495 | goto out_trans_fail; | ||
| 1496 | |||
| 1497 | error = fallocate_chunk(inode, offset, max_bytes, mode); | ||
| 1498 | gfs2_trans_end(sdp); | ||
| 1499 | |||
| 1500 | if (error) | ||
| 1501 | goto out_trans_fail; | ||
| 1502 | |||
| 1503 | len -= max_bytes; | ||
| 1504 | offset += max_bytes; | ||
| 1505 | gfs2_inplace_release(ip); | ||
| 1506 | gfs2_quota_unlock(ip); | ||
| 1507 | gfs2_alloc_put(ip); | ||
| 1508 | } | ||
| 1509 | goto out_unlock; | ||
| 1510 | |||
| 1511 | out_trans_fail: | ||
| 1512 | gfs2_inplace_release(ip); | ||
| 1513 | out_qunlock: | ||
| 1514 | gfs2_quota_unlock(ip); | ||
| 1515 | out_alloc_put: | ||
| 1516 | gfs2_alloc_put(ip); | ||
| 1517 | out_unlock: | ||
| 1518 | gfs2_glock_dq(&ip->i_gh); | ||
| 1519 | out_uninit: | ||
| 1520 | gfs2_holder_uninit(&ip->i_gh); | ||
| 1521 | return error; | ||
| 1522 | } | ||
| 1523 | |||
| 1524 | |||
| 1304 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1525 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
| 1305 | u64 start, u64 len) | 1526 | u64 start, u64 len) |
| 1306 | { | 1527 | { |
| @@ -1351,6 +1572,7 @@ const struct inode_operations gfs2_file_iops = { | |||
| 1351 | .getxattr = gfs2_getxattr, | 1572 | .getxattr = gfs2_getxattr, |
| 1352 | .listxattr = gfs2_listxattr, | 1573 | .listxattr = gfs2_listxattr, |
| 1353 | .removexattr = gfs2_removexattr, | 1574 | .removexattr = gfs2_removexattr, |
| 1575 | .fallocate = gfs2_fallocate, | ||
| 1354 | .fiemap = gfs2_fiemap, | 1576 | .fiemap = gfs2_fiemap, |
| 1355 | }; | 1577 | }; |
| 1356 | 1578 | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 1bc6b5695e6d..58a9b9998b42 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
| @@ -735,10 +735,8 @@ get_a_page: | |||
| 735 | goto out; | 735 | goto out; |
| 736 | 736 | ||
| 737 | size = loc + sizeof(struct gfs2_quota); | 737 | size = loc + sizeof(struct gfs2_quota); |
| 738 | if (size > inode->i_size) { | 738 | if (size > inode->i_size) |
| 739 | ip->i_disksize = size; | ||
| 740 | i_size_write(inode, size); | 739 | i_size_write(inode, size); |
| 741 | } | ||
| 742 | inode->i_mtime = inode->i_atime = CURRENT_TIME; | 740 | inode->i_mtime = inode->i_atime = CURRENT_TIME; |
| 743 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 741 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
| 744 | gfs2_dinode_out(ip, dibh->b_data); | 742 | gfs2_dinode_out(ip, dibh->b_data); |
| @@ -817,7 +815,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
| 817 | goto out_alloc; | 815 | goto out_alloc; |
| 818 | 816 | ||
| 819 | if (nalloc) | 817 | if (nalloc) |
| 820 | blocks += al->al_rgd->rd_length + nalloc * ind_blocks + RES_STATFS; | 818 | blocks += gfs2_rg_blocks(al) + nalloc * ind_blocks + RES_STATFS; |
| 821 | 819 | ||
| 822 | error = gfs2_trans_begin(sdp, blocks, 0); | 820 | error = gfs2_trans_begin(sdp, blocks, 0); |
| 823 | if (error) | 821 | if (error) |
| @@ -1190,18 +1188,17 @@ static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void * | |||
| 1190 | int gfs2_quota_init(struct gfs2_sbd *sdp) | 1188 | int gfs2_quota_init(struct gfs2_sbd *sdp) |
| 1191 | { | 1189 | { |
| 1192 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); | 1190 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); |
| 1193 | unsigned int blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; | 1191 | u64 size = i_size_read(sdp->sd_qc_inode); |
| 1192 | unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; | ||
| 1194 | unsigned int x, slot = 0; | 1193 | unsigned int x, slot = 0; |
| 1195 | unsigned int found = 0; | 1194 | unsigned int found = 0; |
| 1196 | u64 dblock; | 1195 | u64 dblock; |
| 1197 | u32 extlen = 0; | 1196 | u32 extlen = 0; |
| 1198 | int error; | 1197 | int error; |
| 1199 | 1198 | ||
| 1200 | if (!ip->i_disksize || ip->i_disksize > (64 << 20) || | 1199 | if (gfs2_check_internal_file_size(sdp->sd_qc_inode, 1, 64 << 20)) |
| 1201 | ip->i_disksize & (sdp->sd_sb.sb_bsize - 1)) { | ||
| 1202 | gfs2_consist_inode(ip); | ||
| 1203 | return -EIO; | 1200 | return -EIO; |
| 1204 | } | 1201 | |
| 1205 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; | 1202 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; |
| 1206 | sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); | 1203 | sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); |
| 1207 | 1204 | ||
| @@ -1589,6 +1586,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
| 1589 | error = gfs2_inplace_reserve(ip); | 1586 | error = gfs2_inplace_reserve(ip); |
| 1590 | if (error) | 1587 | if (error) |
| 1591 | goto out_alloc; | 1588 | goto out_alloc; |
| 1589 | blocks += gfs2_rg_blocks(al); | ||
| 1592 | } | 1590 | } |
| 1593 | 1591 | ||
| 1594 | error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); | 1592 | error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index f7f89a94a5a4..f2a02edcac8f 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
| @@ -455,11 +455,13 @@ void gfs2_recover_func(struct work_struct *work) | |||
| 455 | int ro = 0; | 455 | int ro = 0; |
| 456 | unsigned int pass; | 456 | unsigned int pass; |
| 457 | int error; | 457 | int error; |
| 458 | int jlocked = 0; | ||
| 458 | 459 | ||
| 459 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { | 460 | if (sdp->sd_args.ar_spectator || |
| 461 | (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) { | ||
| 460 | fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", | 462 | fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", |
| 461 | jd->jd_jid); | 463 | jd->jd_jid); |
| 462 | 464 | jlocked = 1; | |
| 463 | /* Acquire the journal lock so we can do recovery */ | 465 | /* Acquire the journal lock so we can do recovery */ |
| 464 | 466 | ||
| 465 | error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, | 467 | error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, |
| @@ -554,13 +556,12 @@ void gfs2_recover_func(struct work_struct *work) | |||
| 554 | jd->jd_jid, t); | 556 | jd->jd_jid, t); |
| 555 | } | 557 | } |
| 556 | 558 | ||
| 557 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) | ||
| 558 | gfs2_glock_dq_uninit(&ji_gh); | ||
| 559 | |||
| 560 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); | 559 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); |
| 561 | 560 | ||
| 562 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) | 561 | if (jlocked) { |
| 562 | gfs2_glock_dq_uninit(&ji_gh); | ||
| 563 | gfs2_glock_dq_uninit(&j_gh); | 563 | gfs2_glock_dq_uninit(&j_gh); |
| 564 | } | ||
| 564 | 565 | ||
| 565 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); | 566 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); |
| 566 | goto done; | 567 | goto done; |
| @@ -568,7 +569,7 @@ void gfs2_recover_func(struct work_struct *work) | |||
| 568 | fail_gunlock_tr: | 569 | fail_gunlock_tr: |
| 569 | gfs2_glock_dq_uninit(&t_gh); | 570 | gfs2_glock_dq_uninit(&t_gh); |
| 570 | fail_gunlock_ji: | 571 | fail_gunlock_ji: |
| 571 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { | 572 | if (jlocked) { |
| 572 | gfs2_glock_dq_uninit(&ji_gh); | 573 | gfs2_glock_dq_uninit(&ji_gh); |
| 573 | fail_gunlock_j: | 574 | fail_gunlock_j: |
| 574 | gfs2_glock_dq_uninit(&j_gh); | 575 | gfs2_glock_dq_uninit(&j_gh); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 171a744f8e45..fb67f593f408 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) | |||
| 500 | for (rgrps = 0;; rgrps++) { | 500 | for (rgrps = 0;; rgrps++) { |
| 501 | loff_t pos = rgrps * sizeof(struct gfs2_rindex); | 501 | loff_t pos = rgrps * sizeof(struct gfs2_rindex); |
| 502 | 502 | ||
| 503 | if (pos + sizeof(struct gfs2_rindex) >= ip->i_disksize) | 503 | if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) |
| 504 | break; | 504 | break; |
| 505 | error = gfs2_internal_read(ip, &ra_state, buf, &pos, | 505 | error = gfs2_internal_read(ip, &ra_state, buf, &pos, |
| 506 | sizeof(struct gfs2_rindex)); | 506 | sizeof(struct gfs2_rindex)); |
| @@ -588,7 +588,9 @@ static int gfs2_ri_update(struct gfs2_inode *ip) | |||
| 588 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 588 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 589 | struct inode *inode = &ip->i_inode; | 589 | struct inode *inode = &ip->i_inode; |
| 590 | struct file_ra_state ra_state; | 590 | struct file_ra_state ra_state; |
| 591 | u64 rgrp_count = ip->i_disksize; | 591 | u64 rgrp_count = i_size_read(inode); |
| 592 | struct gfs2_rgrpd *rgd; | ||
| 593 | unsigned int max_data = 0; | ||
| 592 | int error; | 594 | int error; |
| 593 | 595 | ||
| 594 | do_div(rgrp_count, sizeof(struct gfs2_rindex)); | 596 | do_div(rgrp_count, sizeof(struct gfs2_rindex)); |
| @@ -603,6 +605,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip) | |||
| 603 | } | 605 | } |
| 604 | } | 606 | } |
| 605 | 607 | ||
| 608 | list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) | ||
| 609 | if (rgd->rd_data > max_data) | ||
| 610 | max_data = rgd->rd_data; | ||
| 611 | sdp->sd_max_rg_data = max_data; | ||
| 606 | sdp->sd_rindex_uptodate = 1; | 612 | sdp->sd_rindex_uptodate = 1; |
| 607 | return 0; | 613 | return 0; |
| 608 | } | 614 | } |
| @@ -622,13 +628,15 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) | |||
| 622 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 628 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 623 | struct inode *inode = &ip->i_inode; | 629 | struct inode *inode = &ip->i_inode; |
| 624 | struct file_ra_state ra_state; | 630 | struct file_ra_state ra_state; |
| 631 | struct gfs2_rgrpd *rgd; | ||
| 632 | unsigned int max_data = 0; | ||
| 625 | int error; | 633 | int error; |
| 626 | 634 | ||
| 627 | file_ra_state_init(&ra_state, inode->i_mapping); | 635 | file_ra_state_init(&ra_state, inode->i_mapping); |
| 628 | for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { | 636 | for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { |
| 629 | /* Ignore partials */ | 637 | /* Ignore partials */ |
| 630 | if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > | 638 | if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > |
| 631 | ip->i_disksize) | 639 | i_size_read(inode)) |
| 632 | break; | 640 | break; |
| 633 | error = read_rindex_entry(ip, &ra_state); | 641 | error = read_rindex_entry(ip, &ra_state); |
| 634 | if (error) { | 642 | if (error) { |
| @@ -636,6 +644,10 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) | |||
| 636 | return error; | 644 | return error; |
| 637 | } | 645 | } |
| 638 | } | 646 | } |
| 647 | list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) | ||
| 648 | if (rgd->rd_data > max_data) | ||
| 649 | max_data = rgd->rd_data; | ||
| 650 | sdp->sd_max_rg_data = max_data; | ||
| 639 | 651 | ||
| 640 | sdp->sd_rindex_uptodate = 1; | 652 | sdp->sd_rindex_uptodate = 1; |
| 641 | return 0; | 653 | return 0; |
| @@ -1188,7 +1200,8 @@ out: | |||
| 1188 | * Returns: errno | 1200 | * Returns: errno |
| 1189 | */ | 1201 | */ |
| 1190 | 1202 | ||
| 1191 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | 1203 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, |
| 1204 | char *file, unsigned int line) | ||
| 1192 | { | 1205 | { |
| 1193 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1206 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 1194 | struct gfs2_alloc *al = ip->i_alloc; | 1207 | struct gfs2_alloc *al = ip->i_alloc; |
| @@ -1199,12 +1212,15 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | |||
| 1199 | return -EINVAL; | 1212 | return -EINVAL; |
| 1200 | 1213 | ||
| 1201 | try_again: | 1214 | try_again: |
| 1202 | /* We need to hold the rindex unless the inode we're using is | 1215 | if (hold_rindex) { |
| 1203 | the rindex itself, in which case it's already held. */ | 1216 | /* We need to hold the rindex unless the inode we're using is |
| 1204 | if (ip != GFS2_I(sdp->sd_rindex)) | 1217 | the rindex itself, in which case it's already held. */ |
| 1205 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); | 1218 | if (ip != GFS2_I(sdp->sd_rindex)) |
| 1206 | else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */ | 1219 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); |
| 1207 | error = gfs2_ri_update_special(ip); | 1220 | else if (!sdp->sd_rgrps) /* We may not have the rindex read |
| 1221 | in, so: */ | ||
| 1222 | error = gfs2_ri_update_special(ip); | ||
| 1223 | } | ||
| 1208 | 1224 | ||
| 1209 | if (error) | 1225 | if (error) |
| 1210 | return error; | 1226 | return error; |
| @@ -1215,7 +1231,7 @@ try_again: | |||
| 1215 | try to free it, and try the allocation again. */ | 1231 | try to free it, and try the allocation again. */ |
| 1216 | error = get_local_rgrp(ip, &unlinked, &last_unlinked); | 1232 | error = get_local_rgrp(ip, &unlinked, &last_unlinked); |
| 1217 | if (error) { | 1233 | if (error) { |
| 1218 | if (ip != GFS2_I(sdp->sd_rindex)) | 1234 | if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) |
| 1219 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1235 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
| 1220 | if (error != -EAGAIN) | 1236 | if (error != -EAGAIN) |
| 1221 | return error; | 1237 | return error; |
| @@ -1257,7 +1273,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) | |||
| 1257 | al->al_rgd = NULL; | 1273 | al->al_rgd = NULL; |
| 1258 | if (al->al_rgd_gh.gh_gl) | 1274 | if (al->al_rgd_gh.gh_gl) |
| 1259 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1275 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
| 1260 | if (ip != GFS2_I(sdp->sd_rindex)) | 1276 | if (ip != GFS2_I(sdp->sd_rindex) && al->al_ri_gh.gh_gl) |
| 1261 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1277 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
| 1262 | } | 1278 | } |
| 1263 | 1279 | ||
| @@ -1496,11 +1512,19 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) | |||
| 1496 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1512 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 1497 | struct buffer_head *dibh; | 1513 | struct buffer_head *dibh; |
| 1498 | struct gfs2_alloc *al = ip->i_alloc; | 1514 | struct gfs2_alloc *al = ip->i_alloc; |
| 1499 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1515 | struct gfs2_rgrpd *rgd; |
| 1500 | u32 goal, blk; | 1516 | u32 goal, blk; |
| 1501 | u64 block; | 1517 | u64 block; |
| 1502 | int error; | 1518 | int error; |
| 1503 | 1519 | ||
| 1520 | /* Only happens if there is a bug in gfs2, return something distinctive | ||
| 1521 | * to ensure that it is noticed. | ||
| 1522 | */ | ||
| 1523 | if (al == NULL) | ||
| 1524 | return -ECANCELED; | ||
| 1525 | |||
| 1526 | rgd = al->al_rgd; | ||
| 1527 | |||
| 1504 | if (rgrp_contains_block(rgd, ip->i_goal)) | 1528 | if (rgrp_contains_block(rgd, ip->i_goal)) |
| 1505 | goal = ip->i_goal - rgd->rd_data0; | 1529 | goal = ip->i_goal - rgd->rd_data0; |
| 1506 | else | 1530 | else |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index f07119d89557..0e35c0466f9a 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
| @@ -39,10 +39,12 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip) | |||
| 39 | ip->i_alloc = NULL; | 39 | ip->i_alloc = NULL; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, | 42 | extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, |
| 43 | unsigned int line); | 43 | char *file, unsigned int line); |
| 44 | #define gfs2_inplace_reserve(ip) \ | 44 | #define gfs2_inplace_reserve(ip) \ |
| 45 | gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) | 45 | gfs2_inplace_reserve_i((ip), 1, __FILE__, __LINE__) |
| 46 | #define gfs2_inplace_reserve_ri(ip) \ | ||
| 47 | gfs2_inplace_reserve_i((ip), 0, __FILE__, __LINE__) | ||
| 46 | 48 | ||
| 47 | extern void gfs2_inplace_release(struct gfs2_inode *ip); | 49 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
| 48 | 50 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 77cb9f830ee4..047d1176096c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
| @@ -85,6 +85,7 @@ static const match_table_t tokens = { | |||
| 85 | {Opt_locktable, "locktable=%s"}, | 85 | {Opt_locktable, "locktable=%s"}, |
| 86 | {Opt_hostdata, "hostdata=%s"}, | 86 | {Opt_hostdata, "hostdata=%s"}, |
| 87 | {Opt_spectator, "spectator"}, | 87 | {Opt_spectator, "spectator"}, |
| 88 | {Opt_spectator, "norecovery"}, | ||
| 88 | {Opt_ignore_local_fs, "ignore_local_fs"}, | 89 | {Opt_ignore_local_fs, "ignore_local_fs"}, |
| 89 | {Opt_localflocks, "localflocks"}, | 90 | {Opt_localflocks, "localflocks"}, |
| 90 | {Opt_localcaching, "localcaching"}, | 91 | {Opt_localcaching, "localcaching"}, |
| @@ -159,13 +160,13 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
| 159 | args->ar_spectator = 1; | 160 | args->ar_spectator = 1; |
| 160 | break; | 161 | break; |
| 161 | case Opt_ignore_local_fs: | 162 | case Opt_ignore_local_fs: |
| 162 | args->ar_ignore_local_fs = 1; | 163 | /* Retained for backwards compat only */ |
| 163 | break; | 164 | break; |
| 164 | case Opt_localflocks: | 165 | case Opt_localflocks: |
| 165 | args->ar_localflocks = 1; | 166 | args->ar_localflocks = 1; |
| 166 | break; | 167 | break; |
| 167 | case Opt_localcaching: | 168 | case Opt_localcaching: |
| 168 | args->ar_localcaching = 1; | 169 | /* Retained for backwards compat only */ |
| 169 | break; | 170 | break; |
| 170 | case Opt_debug: | 171 | case Opt_debug: |
| 171 | if (args->ar_errors == GFS2_ERRORS_PANIC) { | 172 | if (args->ar_errors == GFS2_ERRORS_PANIC) { |
| @@ -179,7 +180,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
| 179 | args->ar_debug = 0; | 180 | args->ar_debug = 0; |
| 180 | break; | 181 | break; |
| 181 | case Opt_upgrade: | 182 | case Opt_upgrade: |
| 182 | args->ar_upgrade = 1; | 183 | /* Retained for backwards compat only */ |
| 183 | break; | 184 | break; |
| 184 | case Opt_acl: | 185 | case Opt_acl: |
| 185 | args->ar_posix_acl = 1; | 186 | args->ar_posix_acl = 1; |
| @@ -342,15 +343,14 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) | |||
| 342 | { | 343 | { |
| 343 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | 344 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); |
| 344 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | 345 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); |
| 346 | u64 size = i_size_read(jd->jd_inode); | ||
| 345 | 347 | ||
| 346 | if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) || | 348 | if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, 1 << 30)) |
| 347 | (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) { | ||
| 348 | gfs2_consist_inode(ip); | ||
| 349 | return -EIO; | 349 | return -EIO; |
| 350 | } | ||
| 351 | jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; | ||
| 352 | 350 | ||
| 353 | if (gfs2_write_alloc_required(ip, 0, ip->i_disksize)) { | 351 | jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift; |
| 352 | |||
| 353 | if (gfs2_write_alloc_required(ip, 0, size)) { | ||
| 354 | gfs2_consist_inode(ip); | 354 | gfs2_consist_inode(ip); |
| 355 | return -EIO; | 355 | return -EIO; |
| 356 | } | 356 | } |
| @@ -1129,9 +1129,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
| 1129 | 1129 | ||
| 1130 | /* Some flags must not be changed */ | 1130 | /* Some flags must not be changed */ |
| 1131 | if (args_neq(&args, &sdp->sd_args, spectator) || | 1131 | if (args_neq(&args, &sdp->sd_args, spectator) || |
| 1132 | args_neq(&args, &sdp->sd_args, ignore_local_fs) || | ||
| 1133 | args_neq(&args, &sdp->sd_args, localflocks) || | 1132 | args_neq(&args, &sdp->sd_args, localflocks) || |
| 1134 | args_neq(&args, &sdp->sd_args, localcaching) || | ||
| 1135 | args_neq(&args, &sdp->sd_args, meta)) | 1133 | args_neq(&args, &sdp->sd_args, meta)) |
| 1136 | return -EINVAL; | 1134 | return -EINVAL; |
| 1137 | 1135 | ||
| @@ -1234,16 +1232,10 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 1234 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); | 1232 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); |
| 1235 | if (args->ar_spectator) | 1233 | if (args->ar_spectator) |
| 1236 | seq_printf(s, ",spectator"); | 1234 | seq_printf(s, ",spectator"); |
| 1237 | if (args->ar_ignore_local_fs) | ||
| 1238 | seq_printf(s, ",ignore_local_fs"); | ||
| 1239 | if (args->ar_localflocks) | 1235 | if (args->ar_localflocks) |
| 1240 | seq_printf(s, ",localflocks"); | 1236 | seq_printf(s, ",localflocks"); |
| 1241 | if (args->ar_localcaching) | ||
| 1242 | seq_printf(s, ",localcaching"); | ||
| 1243 | if (args->ar_debug) | 1237 | if (args->ar_debug) |
| 1244 | seq_printf(s, ",debug"); | 1238 | seq_printf(s, ",debug"); |
| 1245 | if (args->ar_upgrade) | ||
| 1246 | seq_printf(s, ",upgrade"); | ||
| 1247 | if (args->ar_posix_acl) | 1239 | if (args->ar_posix_acl) |
| 1248 | seq_printf(s, ",acl"); | 1240 | seq_printf(s, ",acl"); |
| 1249 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { | 1241 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index ccacffd2faaa..748ccb557c18 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
| @@ -230,7 +230,10 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len | |||
| 230 | 230 | ||
| 231 | if (gltype > LM_TYPE_JOURNAL) | 231 | if (gltype > LM_TYPE_JOURNAL) |
| 232 | return -EINVAL; | 232 | return -EINVAL; |
| 233 | glops = gfs2_glops_list[gltype]; | 233 | if (gltype == LM_TYPE_NONDISK && glnum == GFS2_TRANS_LOCK) |
| 234 | glops = &gfs2_trans_glops; | ||
| 235 | else | ||
| 236 | glops = gfs2_glops_list[gltype]; | ||
| 234 | if (glops == NULL) | 237 | if (glops == NULL) |
| 235 | return -EINVAL; | 238 | return -EINVAL; |
| 236 | if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags)) | 239 | if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags)) |
| @@ -399,31 +402,32 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) | |||
| 399 | 402 | ||
| 400 | static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) | 403 | static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) |
| 401 | { | 404 | { |
| 402 | return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid); | 405 | return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid); |
| 403 | } | 406 | } |
| 404 | 407 | ||
| 405 | static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | 408 | static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) |
| 406 | { | 409 | { |
| 407 | unsigned jid; | 410 | int jid; |
| 408 | int rv; | 411 | int rv; |
| 409 | 412 | ||
| 410 | rv = sscanf(buf, "%u", &jid); | 413 | rv = sscanf(buf, "%d", &jid); |
| 411 | if (rv != 1) | 414 | if (rv != 1) |
| 412 | return -EINVAL; | 415 | return -EINVAL; |
| 413 | 416 | ||
| 414 | spin_lock(&sdp->sd_jindex_spin); | 417 | spin_lock(&sdp->sd_jindex_spin); |
| 415 | rv = -EINVAL; | 418 | rv = -EINVAL; |
| 416 | if (sdp->sd_args.ar_spectator) | ||
| 417 | goto out; | ||
| 418 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) | 419 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
| 419 | goto out; | 420 | goto out; |
| 420 | rv = -EBUSY; | 421 | rv = -EBUSY; |
| 421 | if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) | 422 | if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) |
| 422 | goto out; | 423 | goto out; |
| 424 | rv = 0; | ||
| 425 | if (sdp->sd_args.ar_spectator && jid > 0) | ||
| 426 | rv = jid = -EINVAL; | ||
| 423 | sdp->sd_lockstruct.ls_jid = jid; | 427 | sdp->sd_lockstruct.ls_jid = jid; |
| 428 | clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); | ||
| 424 | smp_mb__after_clear_bit(); | 429 | smp_mb__after_clear_bit(); |
| 425 | wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); | 430 | wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); |
| 426 | rv = 0; | ||
| 427 | out: | 431 | out: |
| 428 | spin_unlock(&sdp->sd_jindex_spin); | 432 | spin_unlock(&sdp->sd_jindex_spin); |
| 429 | return rv ? rv : len; | 433 | return rv ? rv : len; |
| @@ -617,7 +621,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, | |||
| 617 | add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); | 621 | add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); |
| 618 | add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); | 622 | add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); |
| 619 | if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) | 623 | if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) |
| 620 | add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid); | 624 | add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid); |
| 621 | if (gfs2_uuid_valid(uuid)) | 625 | if (gfs2_uuid_valid(uuid)) |
| 622 | add_uevent_var(env, "UUID=%pUB", uuid); | 626 | add_uevent_var(env, "UUID=%pUB", uuid); |
| 623 | return 0; | 627 | return 0; |
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 148d55c14171..cedb0bb96d96 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h | |||
| @@ -39,7 +39,8 @@ | |||
| 39 | {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ | 39 | {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ |
| 40 | {(1UL << GLF_REPLY_PENDING), "r" }, \ | 40 | {(1UL << GLF_REPLY_PENDING), "r" }, \ |
| 41 | {(1UL << GLF_INITIAL), "I" }, \ | 41 | {(1UL << GLF_INITIAL), "I" }, \ |
| 42 | {(1UL << GLF_FROZEN), "F" }) | 42 | {(1UL << GLF_FROZEN), "F" }, \ |
| 43 | {(1UL << GLF_QUEUED), "q" }) | ||
| 43 | 44 | ||
| 44 | #ifndef NUMPTY | 45 | #ifndef NUMPTY |
| 45 | #define NUMPTY | 46 | #define NUMPTY |
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index edf9d4bd908e..fb56b783e028 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h | |||
| @@ -20,11 +20,20 @@ struct gfs2_glock; | |||
| 20 | #define RES_JDATA 1 | 20 | #define RES_JDATA 1 |
| 21 | #define RES_DATA 1 | 21 | #define RES_DATA 1 |
| 22 | #define RES_LEAF 1 | 22 | #define RES_LEAF 1 |
| 23 | #define RES_RG_HDR 1 | ||
| 23 | #define RES_RG_BIT 2 | 24 | #define RES_RG_BIT 2 |
| 24 | #define RES_EATTR 1 | 25 | #define RES_EATTR 1 |
| 25 | #define RES_STATFS 1 | 26 | #define RES_STATFS 1 |
| 26 | #define RES_QUOTA 2 | 27 | #define RES_QUOTA 2 |
| 27 | 28 | ||
| 29 | /* reserve either the number of blocks to be allocated plus the rg header | ||
| 30 | * block, or all of the blocks in the rg, whichever is smaller */ | ||
| 31 | static inline unsigned int gfs2_rg_blocks(const struct gfs2_alloc *al) | ||
| 32 | { | ||
| 33 | return (al->al_requested < al->al_rgd->rd_length)? | ||
| 34 | al->al_requested + 1 : al->al_rgd->rd_length; | ||
| 35 | } | ||
| 36 | |||
| 28 | int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | 37 | int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, |
| 29 | unsigned int revokes); | 38 | unsigned int revokes); |
| 30 | 39 | ||
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 776af6eb4bcb..30b58f07c8a6 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
| @@ -734,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
| 734 | goto out_gunlock_q; | 734 | goto out_gunlock_q; |
| 735 | 735 | ||
| 736 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), | 736 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), |
| 737 | blks + al->al_rgd->rd_length + | 737 | blks + gfs2_rg_blocks(al) + |
| 738 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); | 738 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); |
| 739 | if (error) | 739 | if (error) |
| 740 | goto out_ipres; | 740 | goto out_ipres; |
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index 5007a41f1be9..d182438c7ae4 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c | |||
| @@ -23,7 +23,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) | |||
| 23 | fd->search_key = ptr; | 23 | fd->search_key = ptr; |
| 24 | fd->key = ptr + tree->max_key_len + 2; | 24 | fd->key = ptr + tree->max_key_len + 2; |
| 25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); | 25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); |
| 26 | down(&tree->tree_lock); | 26 | mutex_lock(&tree->tree_lock); |
| 27 | return 0; | 27 | return 0; |
| 28 | } | 28 | } |
| 29 | 29 | ||
| @@ -32,7 +32,7 @@ void hfs_find_exit(struct hfs_find_data *fd) | |||
| 32 | hfs_bnode_put(fd->bnode); | 32 | hfs_bnode_put(fd->bnode); |
| 33 | kfree(fd->search_key); | 33 | kfree(fd->search_key); |
| 34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); | 34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); |
| 35 | up(&fd->tree->tree_lock); | 35 | mutex_unlock(&fd->tree->tree_lock); |
| 36 | fd->tree = NULL; | 36 | fd->tree = NULL; |
| 37 | } | 37 | } |
| 38 | 38 | ||
| @@ -52,6 +52,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) | |||
| 52 | rec = (e + b) / 2; | 52 | rec = (e + b) / 2; |
| 53 | len = hfs_brec_lenoff(bnode, rec, &off); | 53 | len = hfs_brec_lenoff(bnode, rec, &off); |
| 54 | keylen = hfs_brec_keylen(bnode, rec); | 54 | keylen = hfs_brec_keylen(bnode, rec); |
| 55 | if (keylen == 0) { | ||
| 56 | res = -EINVAL; | ||
| 57 | goto fail; | ||
| 58 | } | ||
| 55 | hfs_bnode_read(bnode, fd->key, off, keylen); | 59 | hfs_bnode_read(bnode, fd->key, off, keylen); |
| 56 | cmpval = bnode->tree->keycmp(fd->key, fd->search_key); | 60 | cmpval = bnode->tree->keycmp(fd->key, fd->search_key); |
| 57 | if (!cmpval) { | 61 | if (!cmpval) { |
| @@ -67,6 +71,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) | |||
| 67 | if (rec != e && e >= 0) { | 71 | if (rec != e && e >= 0) { |
| 68 | len = hfs_brec_lenoff(bnode, e, &off); | 72 | len = hfs_brec_lenoff(bnode, e, &off); |
| 69 | keylen = hfs_brec_keylen(bnode, e); | 73 | keylen = hfs_brec_keylen(bnode, e); |
| 74 | if (keylen == 0) { | ||
| 75 | res = -EINVAL; | ||
| 76 | goto fail; | ||
| 77 | } | ||
| 70 | hfs_bnode_read(bnode, fd->key, off, keylen); | 78 | hfs_bnode_read(bnode, fd->key, off, keylen); |
| 71 | } | 79 | } |
| 72 | done: | 80 | done: |
| @@ -75,6 +83,7 @@ done: | |||
| 75 | fd->keylength = keylen; | 83 | fd->keylength = keylen; |
| 76 | fd->entryoffset = off + keylen; | 84 | fd->entryoffset = off + keylen; |
| 77 | fd->entrylength = len - keylen; | 85 | fd->entrylength = len - keylen; |
| 86 | fail: | ||
| 78 | return res; | 87 | return res; |
| 79 | } | 88 | } |
| 80 | 89 | ||
| @@ -198,6 +207,10 @@ int hfs_brec_goto(struct hfs_find_data *fd, int cnt) | |||
| 198 | 207 | ||
| 199 | len = hfs_brec_lenoff(bnode, fd->record, &off); | 208 | len = hfs_brec_lenoff(bnode, fd->record, &off); |
| 200 | keylen = hfs_brec_keylen(bnode, fd->record); | 209 | keylen = hfs_brec_keylen(bnode, fd->record); |
| 210 | if (keylen == 0) { | ||
| 211 | res = -EINVAL; | ||
| 212 | goto out; | ||
| 213 | } | ||
| 201 | fd->keyoffset = off; | 214 | fd->keyoffset = off; |
| 202 | fd->keylength = keylen; | 215 | fd->keylength = keylen; |
| 203 | fd->entryoffset = off + keylen; | 216 | fd->entryoffset = off + keylen; |
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index ea30afc2a03c..ad57f5991eb1 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | 17 | ||
| 18 | int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max) | 18 | int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max) |
| 19 | { | 19 | { |
| 20 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 20 | struct page *page; | 21 | struct page *page; |
| 21 | struct address_space *mapping; | 22 | struct address_space *mapping; |
| 22 | __be32 *pptr, *curr, *end; | 23 | __be32 *pptr, *curr, *end; |
| @@ -29,8 +30,8 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma | |||
| 29 | return size; | 30 | return size; |
| 30 | 31 | ||
| 31 | dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); | 32 | dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); |
| 32 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 33 | mutex_lock(&sbi->alloc_mutex); |
| 33 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; | 34 | mapping = sbi->alloc_file->i_mapping; |
| 34 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); | 35 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); |
| 35 | if (IS_ERR(page)) { | 36 | if (IS_ERR(page)) { |
| 36 | start = size; | 37 | start = size; |
| @@ -150,16 +151,17 @@ done: | |||
| 150 | set_page_dirty(page); | 151 | set_page_dirty(page); |
| 151 | kunmap(page); | 152 | kunmap(page); |
| 152 | *max = offset + (curr - pptr) * 32 + i - start; | 153 | *max = offset + (curr - pptr) * 32 + i - start; |
| 153 | HFSPLUS_SB(sb).free_blocks -= *max; | 154 | sbi->free_blocks -= *max; |
| 154 | sb->s_dirt = 1; | 155 | sb->s_dirt = 1; |
| 155 | dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); | 156 | dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); |
| 156 | out: | 157 | out: |
| 157 | mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 158 | mutex_unlock(&sbi->alloc_mutex); |
| 158 | return start; | 159 | return start; |
| 159 | } | 160 | } |
| 160 | 161 | ||
| 161 | int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) | 162 | int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) |
| 162 | { | 163 | { |
| 164 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 163 | struct page *page; | 165 | struct page *page; |
| 164 | struct address_space *mapping; | 166 | struct address_space *mapping; |
| 165 | __be32 *pptr, *curr, *end; | 167 | __be32 *pptr, *curr, *end; |
| @@ -172,11 +174,11 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) | |||
| 172 | 174 | ||
| 173 | dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); | 175 | dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); |
| 174 | /* are all of the bits in range? */ | 176 | /* are all of the bits in range? */ |
| 175 | if ((offset + count) > HFSPLUS_SB(sb).total_blocks) | 177 | if ((offset + count) > sbi->total_blocks) |
| 176 | return -2; | 178 | return -2; |
| 177 | 179 | ||
| 178 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 180 | mutex_lock(&sbi->alloc_mutex); |
| 179 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; | 181 | mapping = sbi->alloc_file->i_mapping; |
| 180 | pnr = offset / PAGE_CACHE_BITS; | 182 | pnr = offset / PAGE_CACHE_BITS; |
| 181 | page = read_mapping_page(mapping, pnr, NULL); | 183 | page = read_mapping_page(mapping, pnr, NULL); |
| 182 | pptr = kmap(page); | 184 | pptr = kmap(page); |
| @@ -224,9 +226,9 @@ done: | |||
| 224 | out: | 226 | out: |
| 225 | set_page_dirty(page); | 227 | set_page_dirty(page); |
| 226 | kunmap(page); | 228 | kunmap(page); |
| 227 | HFSPLUS_SB(sb).free_blocks += len; | 229 | sbi->free_blocks += len; |
| 228 | sb->s_dirt = 1; | 230 | sb->s_dirt = 1; |
| 229 | mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 231 | mutex_unlock(&sbi->alloc_mutex); |
| 230 | 232 | ||
| 231 | return 0; | 233 | return 0; |
| 232 | } | 234 | } |
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index c88e5d72a402..2f39d05443e1 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c | |||
| @@ -42,10 +42,13 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) | |||
| 42 | recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); | 42 | recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); |
| 43 | if (!recoff) | 43 | if (!recoff) |
| 44 | return 0; | 44 | return 0; |
| 45 | if (node->tree->attributes & HFS_TREE_BIGKEYS) | 45 | |
| 46 | retval = hfs_bnode_read_u16(node, recoff) + 2; | 46 | retval = hfs_bnode_read_u16(node, recoff) + 2; |
| 47 | else | 47 | if (retval > node->tree->max_key_len + 2) { |
| 48 | retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1; | 48 | printk(KERN_ERR "hfs: keylen %d too large\n", |
| 49 | retval); | ||
| 50 | retval = 0; | ||
| 51 | } | ||
| 49 | } | 52 | } |
| 50 | return retval; | 53 | return retval; |
| 51 | } | 54 | } |
| @@ -216,7 +219,7 @@ skip: | |||
| 216 | static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | 219 | static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) |
| 217 | { | 220 | { |
| 218 | struct hfs_btree *tree; | 221 | struct hfs_btree *tree; |
| 219 | struct hfs_bnode *node, *new_node; | 222 | struct hfs_bnode *node, *new_node, *next_node; |
| 220 | struct hfs_bnode_desc node_desc; | 223 | struct hfs_bnode_desc node_desc; |
| 221 | int num_recs, new_rec_off, new_off, old_rec_off; | 224 | int num_recs, new_rec_off, new_off, old_rec_off; |
| 222 | int data_start, data_end, size; | 225 | int data_start, data_end, size; |
| @@ -235,6 +238,17 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
| 235 | new_node->type = node->type; | 238 | new_node->type = node->type; |
| 236 | new_node->height = node->height; | 239 | new_node->height = node->height; |
| 237 | 240 | ||
| 241 | if (node->next) | ||
| 242 | next_node = hfs_bnode_find(tree, node->next); | ||
| 243 | else | ||
| 244 | next_node = NULL; | ||
| 245 | |||
| 246 | if (IS_ERR(next_node)) { | ||
| 247 | hfs_bnode_put(node); | ||
| 248 | hfs_bnode_put(new_node); | ||
| 249 | return next_node; | ||
| 250 | } | ||
| 251 | |||
| 238 | size = tree->node_size / 2 - node->num_recs * 2 - 14; | 252 | size = tree->node_size / 2 - node->num_recs * 2 - 14; |
| 239 | old_rec_off = tree->node_size - 4; | 253 | old_rec_off = tree->node_size - 4; |
| 240 | num_recs = 1; | 254 | num_recs = 1; |
| @@ -248,6 +262,8 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
| 248 | /* panic? */ | 262 | /* panic? */ |
| 249 | hfs_bnode_put(node); | 263 | hfs_bnode_put(node); |
| 250 | hfs_bnode_put(new_node); | 264 | hfs_bnode_put(new_node); |
| 265 | if (next_node) | ||
| 266 | hfs_bnode_put(next_node); | ||
| 251 | return ERR_PTR(-ENOSPC); | 267 | return ERR_PTR(-ENOSPC); |
| 252 | } | 268 | } |
| 253 | 269 | ||
| @@ -302,8 +318,7 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
| 302 | hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc)); | 318 | hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc)); |
| 303 | 319 | ||
| 304 | /* update next bnode header */ | 320 | /* update next bnode header */ |
| 305 | if (new_node->next) { | 321 | if (next_node) { |
| 306 | struct hfs_bnode *next_node = hfs_bnode_find(tree, new_node->next); | ||
| 307 | next_node->prev = new_node->this; | 322 | next_node->prev = new_node->this; |
| 308 | hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc)); | 323 | hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc)); |
| 309 | node_desc.prev = cpu_to_be32(next_node->prev); | 324 | node_desc.prev = cpu_to_be32(next_node->prev); |
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index e49fcee1e293..22e4d4e32999 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c | |||
| @@ -30,7 +30,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
| 30 | if (!tree) | 30 | if (!tree) |
| 31 | return NULL; | 31 | return NULL; |
| 32 | 32 | ||
| 33 | init_MUTEX(&tree->tree_lock); | 33 | mutex_init(&tree->tree_lock); |
| 34 | spin_lock_init(&tree->hash_lock); | 34 | spin_lock_init(&tree->hash_lock); |
| 35 | tree->sb = sb; | 35 | tree->sb = sb; |
| 36 | tree->cnid = id; | 36 | tree->cnid = id; |
| @@ -39,10 +39,16 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
| 39 | goto free_tree; | 39 | goto free_tree; |
| 40 | tree->inode = inode; | 40 | tree->inode = inode; |
| 41 | 41 | ||
| 42 | if (!HFSPLUS_I(tree->inode)->first_blocks) { | ||
| 43 | printk(KERN_ERR | ||
| 44 | "hfs: invalid btree extent records (0 size).\n"); | ||
| 45 | goto free_inode; | ||
| 46 | } | ||
| 47 | |||
| 42 | mapping = tree->inode->i_mapping; | 48 | mapping = tree->inode->i_mapping; |
| 43 | page = read_mapping_page(mapping, 0, NULL); | 49 | page = read_mapping_page(mapping, 0, NULL); |
| 44 | if (IS_ERR(page)) | 50 | if (IS_ERR(page)) |
| 45 | goto free_tree; | 51 | goto free_inode; |
| 46 | 52 | ||
| 47 | /* Load the header */ | 53 | /* Load the header */ |
| 48 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); | 54 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); |
| @@ -57,27 +63,56 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
| 57 | tree->max_key_len = be16_to_cpu(head->max_key_len); | 63 | tree->max_key_len = be16_to_cpu(head->max_key_len); |
| 58 | tree->depth = be16_to_cpu(head->depth); | 64 | tree->depth = be16_to_cpu(head->depth); |
| 59 | 65 | ||
| 60 | /* Set the correct compare function */ | 66 | /* Verify the tree and set the correct compare function */ |
| 61 | if (id == HFSPLUS_EXT_CNID) { | 67 | switch (id) { |
| 68 | case HFSPLUS_EXT_CNID: | ||
| 69 | if (tree->max_key_len != HFSPLUS_EXT_KEYLEN - sizeof(u16)) { | ||
| 70 | printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", | ||
| 71 | tree->max_key_len); | ||
| 72 | goto fail_page; | ||
| 73 | } | ||
| 74 | if (tree->attributes & HFS_TREE_VARIDXKEYS) { | ||
| 75 | printk(KERN_ERR "hfs: invalid extent btree flag\n"); | ||
| 76 | goto fail_page; | ||
| 77 | } | ||
| 78 | |||
| 62 | tree->keycmp = hfsplus_ext_cmp_key; | 79 | tree->keycmp = hfsplus_ext_cmp_key; |
| 63 | } else if (id == HFSPLUS_CAT_CNID) { | 80 | break; |
| 64 | if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) && | 81 | case HFSPLUS_CAT_CNID: |
| 82 | if (tree->max_key_len != HFSPLUS_CAT_KEYLEN - sizeof(u16)) { | ||
| 83 | printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", | ||
| 84 | tree->max_key_len); | ||
| 85 | goto fail_page; | ||
| 86 | } | ||
| 87 | if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) { | ||
| 88 | printk(KERN_ERR "hfs: invalid catalog btree flag\n"); | ||
| 89 | goto fail_page; | ||
| 90 | } | ||
| 91 | |||
| 92 | if (test_bit(HFSPLUS_SB_HFSX, &HFSPLUS_SB(sb)->flags) && | ||
| 65 | (head->key_type == HFSPLUS_KEY_BINARY)) | 93 | (head->key_type == HFSPLUS_KEY_BINARY)) |
| 66 | tree->keycmp = hfsplus_cat_bin_cmp_key; | 94 | tree->keycmp = hfsplus_cat_bin_cmp_key; |
| 67 | else { | 95 | else { |
| 68 | tree->keycmp = hfsplus_cat_case_cmp_key; | 96 | tree->keycmp = hfsplus_cat_case_cmp_key; |
| 69 | HFSPLUS_SB(sb).flags |= HFSPLUS_SB_CASEFOLD; | 97 | set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
| 70 | } | 98 | } |
| 71 | } else { | 99 | break; |
| 100 | default: | ||
| 72 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); | 101 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); |
| 73 | goto fail_page; | 102 | goto fail_page; |
| 74 | } | 103 | } |
| 75 | 104 | ||
| 105 | if (!(tree->attributes & HFS_TREE_BIGKEYS)) { | ||
| 106 | printk(KERN_ERR "hfs: invalid btree flag\n"); | ||
| 107 | goto fail_page; | ||
| 108 | } | ||
| 109 | |||
| 76 | size = tree->node_size; | 110 | size = tree->node_size; |
| 77 | if (!is_power_of_2(size)) | 111 | if (!is_power_of_2(size)) |
| 78 | goto fail_page; | 112 | goto fail_page; |
| 79 | if (!tree->node_count) | 113 | if (!tree->node_count) |
| 80 | goto fail_page; | 114 | goto fail_page; |
| 115 | |||
| 81 | tree->node_size_shift = ffs(size) - 1; | 116 | tree->node_size_shift = ffs(size) - 1; |
| 82 | 117 | ||
| 83 | tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 118 | tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
| @@ -87,10 +122,11 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
| 87 | return tree; | 122 | return tree; |
| 88 | 123 | ||
| 89 | fail_page: | 124 | fail_page: |
| 90 | tree->inode->i_mapping->a_ops = &hfsplus_aops; | ||
| 91 | page_cache_release(page); | 125 | page_cache_release(page); |
| 92 | free_tree: | 126 | free_inode: |
| 127 | tree->inode->i_mapping->a_ops = &hfsplus_aops; | ||
| 93 | iput(tree->inode); | 128 | iput(tree->inode); |
| 129 | free_tree: | ||
| 94 | kfree(tree); | 130 | kfree(tree); |
| 95 | return NULL; | 131 | return NULL; |
| 96 | } | 132 | } |
| @@ -192,17 +228,18 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) | |||
| 192 | 228 | ||
| 193 | while (!tree->free_nodes) { | 229 | while (!tree->free_nodes) { |
| 194 | struct inode *inode = tree->inode; | 230 | struct inode *inode = tree->inode; |
| 231 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 195 | u32 count; | 232 | u32 count; |
| 196 | int res; | 233 | int res; |
| 197 | 234 | ||
| 198 | res = hfsplus_file_extend(inode); | 235 | res = hfsplus_file_extend(inode); |
| 199 | if (res) | 236 | if (res) |
| 200 | return ERR_PTR(res); | 237 | return ERR_PTR(res); |
| 201 | HFSPLUS_I(inode).phys_size = inode->i_size = | 238 | hip->phys_size = inode->i_size = |
| 202 | (loff_t)HFSPLUS_I(inode).alloc_blocks << | 239 | (loff_t)hip->alloc_blocks << |
| 203 | HFSPLUS_SB(tree->sb).alloc_blksz_shift; | 240 | HFSPLUS_SB(tree->sb)->alloc_blksz_shift; |
| 204 | HFSPLUS_I(inode).fs_blocks = HFSPLUS_I(inode).alloc_blocks << | 241 | hip->fs_blocks = |
| 205 | HFSPLUS_SB(tree->sb).fs_shift; | 242 | hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift; |
| 206 | inode_set_bytes(inode, inode->i_size); | 243 | inode_set_bytes(inode, inode->i_size); |
| 207 | count = inode->i_size >> tree->node_size_shift; | 244 | count = inode->i_size >> tree->node_size_shift; |
| 208 | tree->free_nodes = count - tree->node_count; | 245 | tree->free_nodes = count - tree->node_count; |
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index f6874acb2cf2..8af45fc5b051 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c | |||
| @@ -67,7 +67,7 @@ static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent, | |||
| 67 | key->key_len = cpu_to_be16(6 + ustrlen); | 67 | key->key_len = cpu_to_be16(6 + ustrlen); |
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | 70 | void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms) |
| 71 | { | 71 | { |
| 72 | if (inode->i_flags & S_IMMUTABLE) | 72 | if (inode->i_flags & S_IMMUTABLE) |
| 73 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; | 73 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; |
| @@ -77,15 +77,24 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | |||
| 77 | perms->rootflags |= HFSPLUS_FLG_APPEND; | 77 | perms->rootflags |= HFSPLUS_FLG_APPEND; |
| 78 | else | 78 | else |
| 79 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; | 79 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; |
| 80 | HFSPLUS_I(inode).rootflags = perms->rootflags; | 80 | |
| 81 | HFSPLUS_I(inode).userflags = perms->userflags; | 81 | perms->userflags = HFSPLUS_I(inode)->userflags; |
| 82 | perms->mode = cpu_to_be16(inode->i_mode); | 82 | perms->mode = cpu_to_be16(inode->i_mode); |
| 83 | perms->owner = cpu_to_be32(inode->i_uid); | 83 | perms->owner = cpu_to_be32(inode->i_uid); |
| 84 | perms->group = cpu_to_be32(inode->i_gid); | 84 | perms->group = cpu_to_be32(inode->i_gid); |
| 85 | |||
| 86 | if (S_ISREG(inode->i_mode)) | ||
| 87 | perms->dev = cpu_to_be32(inode->i_nlink); | ||
| 88 | else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) | ||
| 89 | perms->dev = cpu_to_be32(inode->i_rdev); | ||
| 90 | else | ||
| 91 | perms->dev = 0; | ||
| 85 | } | 92 | } |
| 86 | 93 | ||
| 87 | static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode) | 94 | static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode) |
| 88 | { | 95 | { |
| 96 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); | ||
| 97 | |||
| 89 | if (S_ISDIR(inode->i_mode)) { | 98 | if (S_ISDIR(inode->i_mode)) { |
| 90 | struct hfsplus_cat_folder *folder; | 99 | struct hfsplus_cat_folder *folder; |
| 91 | 100 | ||
| @@ -93,13 +102,13 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
| 93 | memset(folder, 0, sizeof(*folder)); | 102 | memset(folder, 0, sizeof(*folder)); |
| 94 | folder->type = cpu_to_be16(HFSPLUS_FOLDER); | 103 | folder->type = cpu_to_be16(HFSPLUS_FOLDER); |
| 95 | folder->id = cpu_to_be32(inode->i_ino); | 104 | folder->id = cpu_to_be32(inode->i_ino); |
| 96 | HFSPLUS_I(inode).create_date = | 105 | HFSPLUS_I(inode)->create_date = |
| 97 | folder->create_date = | 106 | folder->create_date = |
| 98 | folder->content_mod_date = | 107 | folder->content_mod_date = |
| 99 | folder->attribute_mod_date = | 108 | folder->attribute_mod_date = |
| 100 | folder->access_date = hfsp_now2mt(); | 109 | folder->access_date = hfsp_now2mt(); |
| 101 | hfsplus_set_perms(inode, &folder->permissions); | 110 | hfsplus_cat_set_perms(inode, &folder->permissions); |
| 102 | if (inode == HFSPLUS_SB(inode->i_sb).hidden_dir) | 111 | if (inode == sbi->hidden_dir) |
| 103 | /* invisible and namelocked */ | 112 | /* invisible and namelocked */ |
| 104 | folder->user_info.frFlags = cpu_to_be16(0x5000); | 113 | folder->user_info.frFlags = cpu_to_be16(0x5000); |
| 105 | return sizeof(*folder); | 114 | return sizeof(*folder); |
| @@ -111,19 +120,19 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
| 111 | file->type = cpu_to_be16(HFSPLUS_FILE); | 120 | file->type = cpu_to_be16(HFSPLUS_FILE); |
| 112 | file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS); | 121 | file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS); |
| 113 | file->id = cpu_to_be32(cnid); | 122 | file->id = cpu_to_be32(cnid); |
| 114 | HFSPLUS_I(inode).create_date = | 123 | HFSPLUS_I(inode)->create_date = |
| 115 | file->create_date = | 124 | file->create_date = |
| 116 | file->content_mod_date = | 125 | file->content_mod_date = |
| 117 | file->attribute_mod_date = | 126 | file->attribute_mod_date = |
| 118 | file->access_date = hfsp_now2mt(); | 127 | file->access_date = hfsp_now2mt(); |
| 119 | if (cnid == inode->i_ino) { | 128 | if (cnid == inode->i_ino) { |
| 120 | hfsplus_set_perms(inode, &file->permissions); | 129 | hfsplus_cat_set_perms(inode, &file->permissions); |
| 121 | if (S_ISLNK(inode->i_mode)) { | 130 | if (S_ISLNK(inode->i_mode)) { |
| 122 | file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); | 131 | file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); |
| 123 | file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); | 132 | file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); |
| 124 | } else { | 133 | } else { |
| 125 | file->user_info.fdType = cpu_to_be32(HFSPLUS_SB(inode->i_sb).type); | 134 | file->user_info.fdType = cpu_to_be32(sbi->type); |
| 126 | file->user_info.fdCreator = cpu_to_be32(HFSPLUS_SB(inode->i_sb).creator); | 135 | file->user_info.fdCreator = cpu_to_be32(sbi->creator); |
| 127 | } | 136 | } |
| 128 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) | 137 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) |
| 129 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); | 138 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); |
| @@ -131,8 +140,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
| 131 | file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); | 140 | file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); |
| 132 | file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); | 141 | file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); |
| 133 | file->user_info.fdFlags = cpu_to_be16(0x100); | 142 | file->user_info.fdFlags = cpu_to_be16(0x100); |
| 134 | file->create_date = HFSPLUS_I(HFSPLUS_SB(inode->i_sb).hidden_dir).create_date; | 143 | file->create_date = HFSPLUS_I(sbi->hidden_dir)->create_date; |
| 135 | file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode).dev); | 144 | file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode)->linkid); |
| 136 | } | 145 | } |
| 137 | return sizeof(*file); | 146 | return sizeof(*file); |
| 138 | } | 147 | } |
| @@ -180,15 +189,14 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, | |||
| 180 | 189 | ||
| 181 | int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) | 190 | int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) |
| 182 | { | 191 | { |
| 192 | struct super_block *sb = dir->i_sb; | ||
| 183 | struct hfs_find_data fd; | 193 | struct hfs_find_data fd; |
| 184 | struct super_block *sb; | ||
| 185 | hfsplus_cat_entry entry; | 194 | hfsplus_cat_entry entry; |
| 186 | int entry_size; | 195 | int entry_size; |
| 187 | int err; | 196 | int err; |
| 188 | 197 | ||
| 189 | dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); | 198 | dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); |
| 190 | sb = dir->i_sb; | 199 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
| 191 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | ||
| 192 | 200 | ||
| 193 | hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); | 201 | hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); |
| 194 | entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? | 202 | entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? |
| @@ -234,7 +242,7 @@ err2: | |||
| 234 | 242 | ||
| 235 | int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | 243 | int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) |
| 236 | { | 244 | { |
| 237 | struct super_block *sb; | 245 | struct super_block *sb = dir->i_sb; |
| 238 | struct hfs_find_data fd; | 246 | struct hfs_find_data fd; |
| 239 | struct hfsplus_fork_raw fork; | 247 | struct hfsplus_fork_raw fork; |
| 240 | struct list_head *pos; | 248 | struct list_head *pos; |
| @@ -242,8 +250,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | |||
| 242 | u16 type; | 250 | u16 type; |
| 243 | 251 | ||
| 244 | dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); | 252 | dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); |
| 245 | sb = dir->i_sb; | 253 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
| 246 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | ||
| 247 | 254 | ||
| 248 | if (!str) { | 255 | if (!str) { |
| 249 | int len; | 256 | int len; |
| @@ -279,7 +286,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | |||
| 279 | hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); | 286 | hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); |
| 280 | } | 287 | } |
| 281 | 288 | ||
| 282 | list_for_each(pos, &HFSPLUS_I(dir).open_dir_list) { | 289 | list_for_each(pos, &HFSPLUS_I(dir)->open_dir_list) { |
| 283 | struct hfsplus_readdir_data *rd = | 290 | struct hfsplus_readdir_data *rd = |
| 284 | list_entry(pos, struct hfsplus_readdir_data, list); | 291 | list_entry(pos, struct hfsplus_readdir_data, list); |
| 285 | if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) | 292 | if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) |
| @@ -312,7 +319,7 @@ int hfsplus_rename_cat(u32 cnid, | |||
| 312 | struct inode *src_dir, struct qstr *src_name, | 319 | struct inode *src_dir, struct qstr *src_name, |
| 313 | struct inode *dst_dir, struct qstr *dst_name) | 320 | struct inode *dst_dir, struct qstr *dst_name) |
| 314 | { | 321 | { |
| 315 | struct super_block *sb; | 322 | struct super_block *sb = src_dir->i_sb; |
| 316 | struct hfs_find_data src_fd, dst_fd; | 323 | struct hfs_find_data src_fd, dst_fd; |
| 317 | hfsplus_cat_entry entry; | 324 | hfsplus_cat_entry entry; |
| 318 | int entry_size, type; | 325 | int entry_size, type; |
| @@ -320,8 +327,7 @@ int hfsplus_rename_cat(u32 cnid, | |||
| 320 | 327 | ||
| 321 | dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, | 328 | dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, |
| 322 | dst_dir->i_ino, dst_name->name); | 329 | dst_dir->i_ino, dst_name->name); |
| 323 | sb = src_dir->i_sb; | 330 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); |
| 324 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &src_fd); | ||
| 325 | dst_fd = src_fd; | 331 | dst_fd = src_fd; |
| 326 | 332 | ||
| 327 | /* find the old dir entry and read the data */ | 333 | /* find the old dir entry and read the data */ |
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 764fd1bdca88..d236d85ec9d7 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
| @@ -39,7 +39,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, | |||
| 39 | 39 | ||
| 40 | dentry->d_op = &hfsplus_dentry_operations; | 40 | dentry->d_op = &hfsplus_dentry_operations; |
| 41 | dentry->d_fsdata = NULL; | 41 | dentry->d_fsdata = NULL; |
| 42 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 42 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
| 43 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); | 43 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); |
| 44 | again: | 44 | again: |
| 45 | err = hfs_brec_read(&fd, &entry, sizeof(entry)); | 45 | err = hfs_brec_read(&fd, &entry, sizeof(entry)); |
| @@ -68,9 +68,9 @@ again: | |||
| 68 | cnid = be32_to_cpu(entry.file.id); | 68 | cnid = be32_to_cpu(entry.file.id); |
| 69 | if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && | 69 | if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && |
| 70 | entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && | 70 | entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && |
| 71 | (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb).hidden_dir).create_date || | 71 | (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->create_date || |
| 72 | entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode).create_date) && | 72 | entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode)->create_date) && |
| 73 | HFSPLUS_SB(sb).hidden_dir) { | 73 | HFSPLUS_SB(sb)->hidden_dir) { |
| 74 | struct qstr str; | 74 | struct qstr str; |
| 75 | char name[32]; | 75 | char name[32]; |
| 76 | 76 | ||
| @@ -86,7 +86,8 @@ again: | |||
| 86 | linkid = be32_to_cpu(entry.file.permissions.dev); | 86 | linkid = be32_to_cpu(entry.file.permissions.dev); |
| 87 | str.len = sprintf(name, "iNode%d", linkid); | 87 | str.len = sprintf(name, "iNode%d", linkid); |
| 88 | str.name = name; | 88 | str.name = name; |
| 89 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb).hidden_dir->i_ino, &str); | 89 | hfsplus_cat_build_key(sb, fd.search_key, |
| 90 | HFSPLUS_SB(sb)->hidden_dir->i_ino, &str); | ||
| 90 | goto again; | 91 | goto again; |
| 91 | } | 92 | } |
| 92 | } else if (!dentry->d_fsdata) | 93 | } else if (!dentry->d_fsdata) |
| @@ -101,7 +102,7 @@ again: | |||
| 101 | if (IS_ERR(inode)) | 102 | if (IS_ERR(inode)) |
| 102 | return ERR_CAST(inode); | 103 | return ERR_CAST(inode); |
| 103 | if (S_ISREG(inode->i_mode)) | 104 | if (S_ISREG(inode->i_mode)) |
| 104 | HFSPLUS_I(inode).dev = linkid; | 105 | HFSPLUS_I(inode)->linkid = linkid; |
| 105 | out: | 106 | out: |
| 106 | d_add(dentry, inode); | 107 | d_add(dentry, inode); |
| 107 | return NULL; | 108 | return NULL; |
| @@ -124,7 +125,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 124 | if (filp->f_pos >= inode->i_size) | 125 | if (filp->f_pos >= inode->i_size) |
| 125 | return 0; | 126 | return 0; |
| 126 | 127 | ||
| 127 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 128 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
| 128 | hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); | 129 | hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); |
| 129 | err = hfs_brec_find(&fd); | 130 | err = hfs_brec_find(&fd); |
| 130 | if (err) | 131 | if (err) |
| @@ -180,8 +181,9 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 180 | err = -EIO; | 181 | err = -EIO; |
| 181 | goto out; | 182 | goto out; |
| 182 | } | 183 | } |
| 183 | if (HFSPLUS_SB(sb).hidden_dir && | 184 | if (HFSPLUS_SB(sb)->hidden_dir && |
| 184 | HFSPLUS_SB(sb).hidden_dir->i_ino == be32_to_cpu(entry.folder.id)) | 185 | HFSPLUS_SB(sb)->hidden_dir->i_ino == |
| 186 | be32_to_cpu(entry.folder.id)) | ||
| 185 | goto next; | 187 | goto next; |
| 186 | if (filldir(dirent, strbuf, len, filp->f_pos, | 188 | if (filldir(dirent, strbuf, len, filp->f_pos, |
| 187 | be32_to_cpu(entry.folder.id), DT_DIR)) | 189 | be32_to_cpu(entry.folder.id), DT_DIR)) |
| @@ -217,7 +219,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 217 | } | 219 | } |
| 218 | filp->private_data = rd; | 220 | filp->private_data = rd; |
| 219 | rd->file = filp; | 221 | rd->file = filp; |
| 220 | list_add(&rd->list, &HFSPLUS_I(inode).open_dir_list); | 222 | list_add(&rd->list, &HFSPLUS_I(inode)->open_dir_list); |
| 221 | } | 223 | } |
| 222 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); | 224 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); |
| 223 | out: | 225 | out: |
| @@ -229,38 +231,18 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file) | |||
| 229 | { | 231 | { |
| 230 | struct hfsplus_readdir_data *rd = file->private_data; | 232 | struct hfsplus_readdir_data *rd = file->private_data; |
| 231 | if (rd) { | 233 | if (rd) { |
| 234 | mutex_lock(&inode->i_mutex); | ||
| 232 | list_del(&rd->list); | 235 | list_del(&rd->list); |
| 236 | mutex_unlock(&inode->i_mutex); | ||
| 233 | kfree(rd); | 237 | kfree(rd); |
| 234 | } | 238 | } |
| 235 | return 0; | 239 | return 0; |
| 236 | } | 240 | } |
| 237 | 241 | ||
| 238 | static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, | ||
| 239 | struct nameidata *nd) | ||
| 240 | { | ||
| 241 | struct inode *inode; | ||
| 242 | int res; | ||
| 243 | |||
| 244 | inode = hfsplus_new_inode(dir->i_sb, mode); | ||
| 245 | if (!inode) | ||
| 246 | return -ENOSPC; | ||
| 247 | |||
| 248 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | ||
| 249 | if (res) { | ||
| 250 | inode->i_nlink = 0; | ||
| 251 | hfsplus_delete_inode(inode); | ||
| 252 | iput(inode); | ||
| 253 | return res; | ||
| 254 | } | ||
| 255 | hfsplus_instantiate(dentry, inode, inode->i_ino); | ||
| 256 | mark_inode_dirty(inode); | ||
| 257 | return 0; | ||
| 258 | } | ||
| 259 | |||
| 260 | static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | 242 | static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, |
| 261 | struct dentry *dst_dentry) | 243 | struct dentry *dst_dentry) |
| 262 | { | 244 | { |
| 263 | struct super_block *sb = dst_dir->i_sb; | 245 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dst_dir->i_sb); |
| 264 | struct inode *inode = src_dentry->d_inode; | 246 | struct inode *inode = src_dentry->d_inode; |
| 265 | struct inode *src_dir = src_dentry->d_parent->d_inode; | 247 | struct inode *src_dir = src_dentry->d_parent->d_inode; |
| 266 | struct qstr str; | 248 | struct qstr str; |
| @@ -270,7 +252,10 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | |||
| 270 | 252 | ||
| 271 | if (HFSPLUS_IS_RSRC(inode)) | 253 | if (HFSPLUS_IS_RSRC(inode)) |
| 272 | return -EPERM; | 254 | return -EPERM; |
| 255 | if (!S_ISREG(inode->i_mode)) | ||
| 256 | return -EPERM; | ||
| 273 | 257 | ||
| 258 | mutex_lock(&sbi->vh_mutex); | ||
| 274 | if (inode->i_ino == (u32)(unsigned long)src_dentry->d_fsdata) { | 259 | if (inode->i_ino == (u32)(unsigned long)src_dentry->d_fsdata) { |
| 275 | for (;;) { | 260 | for (;;) { |
| 276 | get_random_bytes(&id, sizeof(cnid)); | 261 | get_random_bytes(&id, sizeof(cnid)); |
| @@ -279,40 +264,41 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | |||
| 279 | str.len = sprintf(name, "iNode%d", id); | 264 | str.len = sprintf(name, "iNode%d", id); |
| 280 | res = hfsplus_rename_cat(inode->i_ino, | 265 | res = hfsplus_rename_cat(inode->i_ino, |
| 281 | src_dir, &src_dentry->d_name, | 266 | src_dir, &src_dentry->d_name, |
| 282 | HFSPLUS_SB(sb).hidden_dir, &str); | 267 | sbi->hidden_dir, &str); |
| 283 | if (!res) | 268 | if (!res) |
| 284 | break; | 269 | break; |
| 285 | if (res != -EEXIST) | 270 | if (res != -EEXIST) |
| 286 | return res; | 271 | goto out; |
| 287 | } | 272 | } |
| 288 | HFSPLUS_I(inode).dev = id; | 273 | HFSPLUS_I(inode)->linkid = id; |
| 289 | cnid = HFSPLUS_SB(sb).next_cnid++; | 274 | cnid = sbi->next_cnid++; |
| 290 | src_dentry->d_fsdata = (void *)(unsigned long)cnid; | 275 | src_dentry->d_fsdata = (void *)(unsigned long)cnid; |
| 291 | res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode); | 276 | res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode); |
| 292 | if (res) | 277 | if (res) |
| 293 | /* panic? */ | 278 | /* panic? */ |
| 294 | return res; | 279 | goto out; |
| 295 | HFSPLUS_SB(sb).file_count++; | 280 | sbi->file_count++; |
| 296 | } | 281 | } |
| 297 | cnid = HFSPLUS_SB(sb).next_cnid++; | 282 | cnid = sbi->next_cnid++; |
| 298 | res = hfsplus_create_cat(cnid, dst_dir, &dst_dentry->d_name, inode); | 283 | res = hfsplus_create_cat(cnid, dst_dir, &dst_dentry->d_name, inode); |
| 299 | if (res) | 284 | if (res) |
| 300 | return res; | 285 | goto out; |
| 301 | 286 | ||
| 302 | inc_nlink(inode); | 287 | inc_nlink(inode); |
| 303 | hfsplus_instantiate(dst_dentry, inode, cnid); | 288 | hfsplus_instantiate(dst_dentry, inode, cnid); |
| 304 | atomic_inc(&inode->i_count); | 289 | atomic_inc(&inode->i_count); |
| 305 | inode->i_ctime = CURRENT_TIME_SEC; | 290 | inode->i_ctime = CURRENT_TIME_SEC; |
| 306 | mark_inode_dirty(inode); | 291 | mark_inode_dirty(inode); |
| 307 | HFSPLUS_SB(sb).file_count++; | 292 | sbi->file_count++; |
| 308 | sb->s_dirt = 1; | 293 | dst_dir->i_sb->s_dirt = 1; |
| 309 | 294 | out: | |
| 310 | return 0; | 295 | mutex_unlock(&sbi->vh_mutex); |
| 296 | return res; | ||
| 311 | } | 297 | } |
| 312 | 298 | ||
| 313 | static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | 299 | static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) |
| 314 | { | 300 | { |
| 315 | struct super_block *sb = dir->i_sb; | 301 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
| 316 | struct inode *inode = dentry->d_inode; | 302 | struct inode *inode = dentry->d_inode; |
| 317 | struct qstr str; | 303 | struct qstr str; |
| 318 | char name[32]; | 304 | char name[32]; |
| @@ -322,21 +308,22 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
| 322 | if (HFSPLUS_IS_RSRC(inode)) | 308 | if (HFSPLUS_IS_RSRC(inode)) |
| 323 | return -EPERM; | 309 | return -EPERM; |
| 324 | 310 | ||
| 311 | mutex_lock(&sbi->vh_mutex); | ||
| 325 | cnid = (u32)(unsigned long)dentry->d_fsdata; | 312 | cnid = (u32)(unsigned long)dentry->d_fsdata; |
| 326 | if (inode->i_ino == cnid && | 313 | if (inode->i_ino == cnid && |
| 327 | atomic_read(&HFSPLUS_I(inode).opencnt)) { | 314 | atomic_read(&HFSPLUS_I(inode)->opencnt)) { |
| 328 | str.name = name; | 315 | str.name = name; |
| 329 | str.len = sprintf(name, "temp%lu", inode->i_ino); | 316 | str.len = sprintf(name, "temp%lu", inode->i_ino); |
| 330 | res = hfsplus_rename_cat(inode->i_ino, | 317 | res = hfsplus_rename_cat(inode->i_ino, |
| 331 | dir, &dentry->d_name, | 318 | dir, &dentry->d_name, |
| 332 | HFSPLUS_SB(sb).hidden_dir, &str); | 319 | sbi->hidden_dir, &str); |
| 333 | if (!res) | 320 | if (!res) |
| 334 | inode->i_flags |= S_DEAD; | 321 | inode->i_flags |= S_DEAD; |
| 335 | return res; | 322 | goto out; |
| 336 | } | 323 | } |
| 337 | res = hfsplus_delete_cat(cnid, dir, &dentry->d_name); | 324 | res = hfsplus_delete_cat(cnid, dir, &dentry->d_name); |
| 338 | if (res) | 325 | if (res) |
| 339 | return res; | 326 | goto out; |
| 340 | 327 | ||
| 341 | if (inode->i_nlink > 0) | 328 | if (inode->i_nlink > 0) |
| 342 | drop_nlink(inode); | 329 | drop_nlink(inode); |
| @@ -344,10 +331,10 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
| 344 | clear_nlink(inode); | 331 | clear_nlink(inode); |
| 345 | if (!inode->i_nlink) { | 332 | if (!inode->i_nlink) { |
| 346 | if (inode->i_ino != cnid) { | 333 | if (inode->i_ino != cnid) { |
| 347 | HFSPLUS_SB(sb).file_count--; | 334 | sbi->file_count--; |
| 348 | if (!atomic_read(&HFSPLUS_I(inode).opencnt)) { | 335 | if (!atomic_read(&HFSPLUS_I(inode)->opencnt)) { |
| 349 | res = hfsplus_delete_cat(inode->i_ino, | 336 | res = hfsplus_delete_cat(inode->i_ino, |
| 350 | HFSPLUS_SB(sb).hidden_dir, | 337 | sbi->hidden_dir, |
| 351 | NULL); | 338 | NULL); |
| 352 | if (!res) | 339 | if (!res) |
| 353 | hfsplus_delete_inode(inode); | 340 | hfsplus_delete_inode(inode); |
| @@ -356,107 +343,108 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
| 356 | } else | 343 | } else |
| 357 | hfsplus_delete_inode(inode); | 344 | hfsplus_delete_inode(inode); |
| 358 | } else | 345 | } else |
| 359 | HFSPLUS_SB(sb).file_count--; | 346 | sbi->file_count--; |
| 360 | inode->i_ctime = CURRENT_TIME_SEC; | 347 | inode->i_ctime = CURRENT_TIME_SEC; |
| 361 | mark_inode_dirty(inode); | 348 | mark_inode_dirty(inode); |
| 362 | 349 | out: | |
| 350 | mutex_unlock(&sbi->vh_mutex); | ||
| 363 | return res; | 351 | return res; |
| 364 | } | 352 | } |
| 365 | 353 | ||
| 366 | static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
| 367 | { | ||
| 368 | struct inode *inode; | ||
| 369 | int res; | ||
| 370 | |||
| 371 | inode = hfsplus_new_inode(dir->i_sb, S_IFDIR | mode); | ||
| 372 | if (!inode) | ||
| 373 | return -ENOSPC; | ||
| 374 | |||
| 375 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | ||
| 376 | if (res) { | ||
| 377 | inode->i_nlink = 0; | ||
| 378 | hfsplus_delete_inode(inode); | ||
| 379 | iput(inode); | ||
| 380 | return res; | ||
| 381 | } | ||
| 382 | hfsplus_instantiate(dentry, inode, inode->i_ino); | ||
| 383 | mark_inode_dirty(inode); | ||
| 384 | return 0; | ||
| 385 | } | ||
| 386 | |||
| 387 | static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) | 354 | static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) |
| 388 | { | 355 | { |
| 389 | struct inode *inode; | 356 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
| 357 | struct inode *inode = dentry->d_inode; | ||
| 390 | int res; | 358 | int res; |
| 391 | 359 | ||
| 392 | inode = dentry->d_inode; | ||
| 393 | if (inode->i_size != 2) | 360 | if (inode->i_size != 2) |
| 394 | return -ENOTEMPTY; | 361 | return -ENOTEMPTY; |
| 362 | |||
| 363 | mutex_lock(&sbi->vh_mutex); | ||
| 395 | res = hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); | 364 | res = hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); |
| 396 | if (res) | 365 | if (res) |
| 397 | return res; | 366 | goto out; |
| 398 | clear_nlink(inode); | 367 | clear_nlink(inode); |
| 399 | inode->i_ctime = CURRENT_TIME_SEC; | 368 | inode->i_ctime = CURRENT_TIME_SEC; |
| 400 | hfsplus_delete_inode(inode); | 369 | hfsplus_delete_inode(inode); |
| 401 | mark_inode_dirty(inode); | 370 | mark_inode_dirty(inode); |
| 402 | return 0; | 371 | out: |
| 372 | mutex_unlock(&sbi->vh_mutex); | ||
| 373 | return res; | ||
| 403 | } | 374 | } |
| 404 | 375 | ||
| 405 | static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, | 376 | static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, |
| 406 | const char *symname) | 377 | const char *symname) |
| 407 | { | 378 | { |
| 408 | struct super_block *sb; | 379 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
| 409 | struct inode *inode; | 380 | struct inode *inode; |
| 410 | int res; | 381 | int res = -ENOSPC; |
| 411 | 382 | ||
| 412 | sb = dir->i_sb; | 383 | mutex_lock(&sbi->vh_mutex); |
| 413 | inode = hfsplus_new_inode(sb, S_IFLNK | S_IRWXUGO); | 384 | inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO); |
| 414 | if (!inode) | 385 | if (!inode) |
| 415 | return -ENOSPC; | 386 | goto out; |
| 416 | 387 | ||
| 417 | res = page_symlink(inode, symname, strlen(symname) + 1); | 388 | res = page_symlink(inode, symname, strlen(symname) + 1); |
| 418 | if (res) { | 389 | if (res) |
| 419 | inode->i_nlink = 0; | 390 | goto out_err; |
| 420 | hfsplus_delete_inode(inode); | ||
| 421 | iput(inode); | ||
| 422 | return res; | ||
| 423 | } | ||
| 424 | 391 | ||
| 425 | mark_inode_dirty(inode); | ||
| 426 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | 392 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); |
| 393 | if (res) | ||
| 394 | goto out_err; | ||
| 427 | 395 | ||
| 428 | if (!res) { | 396 | hfsplus_instantiate(dentry, inode, inode->i_ino); |
| 429 | hfsplus_instantiate(dentry, inode, inode->i_ino); | 397 | mark_inode_dirty(inode); |
| 430 | mark_inode_dirty(inode); | 398 | goto out; |
| 431 | } | ||
| 432 | 399 | ||
| 400 | out_err: | ||
| 401 | inode->i_nlink = 0; | ||
| 402 | hfsplus_delete_inode(inode); | ||
| 403 | iput(inode); | ||
| 404 | out: | ||
| 405 | mutex_unlock(&sbi->vh_mutex); | ||
| 433 | return res; | 406 | return res; |
| 434 | } | 407 | } |
| 435 | 408 | ||
| 436 | static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, | 409 | static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, |
| 437 | int mode, dev_t rdev) | 410 | int mode, dev_t rdev) |
| 438 | { | 411 | { |
| 439 | struct super_block *sb; | 412 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
| 440 | struct inode *inode; | 413 | struct inode *inode; |
| 441 | int res; | 414 | int res = -ENOSPC; |
| 442 | 415 | ||
| 443 | sb = dir->i_sb; | 416 | mutex_lock(&sbi->vh_mutex); |
| 444 | inode = hfsplus_new_inode(sb, mode); | 417 | inode = hfsplus_new_inode(dir->i_sb, mode); |
| 445 | if (!inode) | 418 | if (!inode) |
| 446 | return -ENOSPC; | 419 | goto out; |
| 420 | |||
| 421 | if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) | ||
| 422 | init_special_inode(inode, mode, rdev); | ||
| 447 | 423 | ||
| 448 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | 424 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); |
| 449 | if (res) { | 425 | if (res) { |
| 450 | inode->i_nlink = 0; | 426 | inode->i_nlink = 0; |
| 451 | hfsplus_delete_inode(inode); | 427 | hfsplus_delete_inode(inode); |
| 452 | iput(inode); | 428 | iput(inode); |
| 453 | return res; | 429 | goto out; |
| 454 | } | 430 | } |
| 455 | init_special_inode(inode, mode, rdev); | 431 | |
| 456 | hfsplus_instantiate(dentry, inode, inode->i_ino); | 432 | hfsplus_instantiate(dentry, inode, inode->i_ino); |
| 457 | mark_inode_dirty(inode); | 433 | mark_inode_dirty(inode); |
| 434 | out: | ||
| 435 | mutex_unlock(&sbi->vh_mutex); | ||
| 436 | return res; | ||
| 437 | } | ||
| 458 | 438 | ||
| 459 | return 0; | 439 | static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, |
| 440 | struct nameidata *nd) | ||
| 441 | { | ||
| 442 | return hfsplus_mknod(dir, dentry, mode, 0); | ||
| 443 | } | ||
| 444 | |||
| 445 | static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
| 446 | { | ||
| 447 | return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0); | ||
| 460 | } | 448 | } |
| 461 | 449 | ||
| 462 | static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, | 450 | static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, |
| @@ -466,7 +454,10 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 466 | 454 | ||
| 467 | /* Unlink destination if it already exists */ | 455 | /* Unlink destination if it already exists */ |
| 468 | if (new_dentry->d_inode) { | 456 | if (new_dentry->d_inode) { |
| 469 | res = hfsplus_unlink(new_dir, new_dentry); | 457 | if (S_ISDIR(new_dentry->d_inode->i_mode)) |
| 458 | res = hfsplus_rmdir(new_dir, new_dentry); | ||
| 459 | else | ||
| 460 | res = hfsplus_unlink(new_dir, new_dentry); | ||
| 470 | if (res) | 461 | if (res) |
| 471 | return res; | 462 | return res; |
| 472 | } | 463 | } |
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 0022eec63cda..0c9cb1820a52 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c | |||
| @@ -85,35 +85,49 @@ static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext) | |||
| 85 | 85 | ||
| 86 | static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) | 86 | static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) |
| 87 | { | 87 | { |
| 88 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 88 | int res; | 89 | int res; |
| 89 | 90 | ||
| 90 | hfsplus_ext_build_key(fd->search_key, inode->i_ino, HFSPLUS_I(inode).cached_start, | 91 | WARN_ON(!mutex_is_locked(&hip->extents_lock)); |
| 91 | HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | 92 | |
| 93 | hfsplus_ext_build_key(fd->search_key, inode->i_ino, hip->cached_start, | ||
| 94 | HFSPLUS_IS_RSRC(inode) ? | ||
| 95 | HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | ||
| 96 | |||
| 92 | res = hfs_brec_find(fd); | 97 | res = hfs_brec_find(fd); |
| 93 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_NEW) { | 98 | if (hip->flags & HFSPLUS_FLG_EXT_NEW) { |
| 94 | if (res != -ENOENT) | 99 | if (res != -ENOENT) |
| 95 | return; | 100 | return; |
| 96 | hfs_brec_insert(fd, HFSPLUS_I(inode).cached_extents, sizeof(hfsplus_extent_rec)); | 101 | hfs_brec_insert(fd, hip->cached_extents, |
| 97 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 102 | sizeof(hfsplus_extent_rec)); |
| 103 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | ||
| 98 | } else { | 104 | } else { |
| 99 | if (res) | 105 | if (res) |
| 100 | return; | 106 | return; |
| 101 | hfs_bnode_write(fd->bnode, HFSPLUS_I(inode).cached_extents, fd->entryoffset, fd->entrylength); | 107 | hfs_bnode_write(fd->bnode, hip->cached_extents, |
| 102 | HFSPLUS_I(inode).flags &= ~HFSPLUS_FLG_EXT_DIRTY; | 108 | fd->entryoffset, fd->entrylength); |
| 109 | hip->flags &= ~HFSPLUS_FLG_EXT_DIRTY; | ||
| 103 | } | 110 | } |
| 104 | } | 111 | } |
| 105 | 112 | ||
| 106 | void hfsplus_ext_write_extent(struct inode *inode) | 113 | static void hfsplus_ext_write_extent_locked(struct inode *inode) |
| 107 | { | 114 | { |
| 108 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY) { | 115 | if (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_EXT_DIRTY) { |
| 109 | struct hfs_find_data fd; | 116 | struct hfs_find_data fd; |
| 110 | 117 | ||
| 111 | hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd); | 118 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); |
| 112 | __hfsplus_ext_write_extent(inode, &fd); | 119 | __hfsplus_ext_write_extent(inode, &fd); |
| 113 | hfs_find_exit(&fd); | 120 | hfs_find_exit(&fd); |
| 114 | } | 121 | } |
| 115 | } | 122 | } |
| 116 | 123 | ||
| 124 | void hfsplus_ext_write_extent(struct inode *inode) | ||
| 125 | { | ||
| 126 | mutex_lock(&HFSPLUS_I(inode)->extents_lock); | ||
| 127 | hfsplus_ext_write_extent_locked(inode); | ||
| 128 | mutex_unlock(&HFSPLUS_I(inode)->extents_lock); | ||
| 129 | } | ||
| 130 | |||
| 117 | static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, | 131 | static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, |
| 118 | struct hfsplus_extent *extent, | 132 | struct hfsplus_extent *extent, |
| 119 | u32 cnid, u32 block, u8 type) | 133 | u32 cnid, u32 block, u8 type) |
| @@ -136,33 +150,39 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, | |||
| 136 | 150 | ||
| 137 | static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) | 151 | static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) |
| 138 | { | 152 | { |
| 153 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 139 | int res; | 154 | int res; |
| 140 | 155 | ||
| 141 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY) | 156 | WARN_ON(!mutex_is_locked(&hip->extents_lock)); |
| 157 | |||
| 158 | if (hip->flags & HFSPLUS_FLG_EXT_DIRTY) | ||
| 142 | __hfsplus_ext_write_extent(inode, fd); | 159 | __hfsplus_ext_write_extent(inode, fd); |
| 143 | 160 | ||
| 144 | res = __hfsplus_ext_read_extent(fd, HFSPLUS_I(inode).cached_extents, inode->i_ino, | 161 | res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino, |
| 145 | block, HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | 162 | block, HFSPLUS_IS_RSRC(inode) ? |
| 163 | HFSPLUS_TYPE_RSRC : | ||
| 164 | HFSPLUS_TYPE_DATA); | ||
| 146 | if (!res) { | 165 | if (!res) { |
| 147 | HFSPLUS_I(inode).cached_start = be32_to_cpu(fd->key->ext.start_block); | 166 | hip->cached_start = be32_to_cpu(fd->key->ext.start_block); |
| 148 | HFSPLUS_I(inode).cached_blocks = hfsplus_ext_block_count(HFSPLUS_I(inode).cached_extents); | 167 | hip->cached_blocks = hfsplus_ext_block_count(hip->cached_extents); |
| 149 | } else { | 168 | } else { |
| 150 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0; | 169 | hip->cached_start = hip->cached_blocks = 0; |
| 151 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 170 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); |
| 152 | } | 171 | } |
| 153 | return res; | 172 | return res; |
| 154 | } | 173 | } |
| 155 | 174 | ||
| 156 | static int hfsplus_ext_read_extent(struct inode *inode, u32 block) | 175 | static int hfsplus_ext_read_extent(struct inode *inode, u32 block) |
| 157 | { | 176 | { |
| 177 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 158 | struct hfs_find_data fd; | 178 | struct hfs_find_data fd; |
| 159 | int res; | 179 | int res; |
| 160 | 180 | ||
| 161 | if (block >= HFSPLUS_I(inode).cached_start && | 181 | if (block >= hip->cached_start && |
| 162 | block < HFSPLUS_I(inode).cached_start + HFSPLUS_I(inode).cached_blocks) | 182 | block < hip->cached_start + hip->cached_blocks) |
| 163 | return 0; | 183 | return 0; |
| 164 | 184 | ||
| 165 | hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd); | 185 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); |
| 166 | res = __hfsplus_ext_cache_extent(&fd, inode, block); | 186 | res = __hfsplus_ext_cache_extent(&fd, inode, block); |
| 167 | hfs_find_exit(&fd); | 187 | hfs_find_exit(&fd); |
| 168 | return res; | 188 | return res; |
| @@ -172,21 +192,21 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block) | |||
| 172 | int hfsplus_get_block(struct inode *inode, sector_t iblock, | 192 | int hfsplus_get_block(struct inode *inode, sector_t iblock, |
| 173 | struct buffer_head *bh_result, int create) | 193 | struct buffer_head *bh_result, int create) |
| 174 | { | 194 | { |
| 175 | struct super_block *sb; | 195 | struct super_block *sb = inode->i_sb; |
| 196 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 197 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 176 | int res = -EIO; | 198 | int res = -EIO; |
| 177 | u32 ablock, dblock, mask; | 199 | u32 ablock, dblock, mask; |
| 178 | int shift; | 200 | int shift; |
| 179 | 201 | ||
| 180 | sb = inode->i_sb; | ||
| 181 | |||
| 182 | /* Convert inode block to disk allocation block */ | 202 | /* Convert inode block to disk allocation block */ |
| 183 | shift = HFSPLUS_SB(sb).alloc_blksz_shift - sb->s_blocksize_bits; | 203 | shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; |
| 184 | ablock = iblock >> HFSPLUS_SB(sb).fs_shift; | 204 | ablock = iblock >> sbi->fs_shift; |
| 185 | 205 | ||
| 186 | if (iblock >= HFSPLUS_I(inode).fs_blocks) { | 206 | if (iblock >= hip->fs_blocks) { |
| 187 | if (iblock > HFSPLUS_I(inode).fs_blocks || !create) | 207 | if (iblock > hip->fs_blocks || !create) |
| 188 | return -EIO; | 208 | return -EIO; |
| 189 | if (ablock >= HFSPLUS_I(inode).alloc_blocks) { | 209 | if (ablock >= hip->alloc_blocks) { |
| 190 | res = hfsplus_file_extend(inode); | 210 | res = hfsplus_file_extend(inode); |
| 191 | if (res) | 211 | if (res) |
| 192 | return res; | 212 | return res; |
| @@ -194,33 +214,33 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, | |||
| 194 | } else | 214 | } else |
| 195 | create = 0; | 215 | create = 0; |
| 196 | 216 | ||
| 197 | if (ablock < HFSPLUS_I(inode).first_blocks) { | 217 | if (ablock < hip->first_blocks) { |
| 198 | dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).first_extents, ablock); | 218 | dblock = hfsplus_ext_find_block(hip->first_extents, ablock); |
| 199 | goto done; | 219 | goto done; |
| 200 | } | 220 | } |
| 201 | 221 | ||
| 202 | if (inode->i_ino == HFSPLUS_EXT_CNID) | 222 | if (inode->i_ino == HFSPLUS_EXT_CNID) |
| 203 | return -EIO; | 223 | return -EIO; |
| 204 | 224 | ||
| 205 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 225 | mutex_lock(&hip->extents_lock); |
| 206 | res = hfsplus_ext_read_extent(inode, ablock); | 226 | res = hfsplus_ext_read_extent(inode, ablock); |
| 207 | if (!res) { | 227 | if (!res) { |
| 208 | dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock - | 228 | dblock = hfsplus_ext_find_block(hip->cached_extents, |
| 209 | HFSPLUS_I(inode).cached_start); | 229 | ablock - hip->cached_start); |
| 210 | } else { | 230 | } else { |
| 211 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 231 | mutex_unlock(&hip->extents_lock); |
| 212 | return -EIO; | 232 | return -EIO; |
| 213 | } | 233 | } |
| 214 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 234 | mutex_unlock(&hip->extents_lock); |
| 215 | 235 | ||
| 216 | done: | 236 | done: |
| 217 | dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); | 237 | dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); |
| 218 | mask = (1 << HFSPLUS_SB(sb).fs_shift) - 1; | 238 | mask = (1 << sbi->fs_shift) - 1; |
| 219 | map_bh(bh_result, sb, (dblock << HFSPLUS_SB(sb).fs_shift) + HFSPLUS_SB(sb).blockoffset + (iblock & mask)); | 239 | map_bh(bh_result, sb, (dblock << sbi->fs_shift) + sbi->blockoffset + (iblock & mask)); |
| 220 | if (create) { | 240 | if (create) { |
| 221 | set_buffer_new(bh_result); | 241 | set_buffer_new(bh_result); |
| 222 | HFSPLUS_I(inode).phys_size += sb->s_blocksize; | 242 | hip->phys_size += sb->s_blocksize; |
| 223 | HFSPLUS_I(inode).fs_blocks++; | 243 | hip->fs_blocks++; |
| 224 | inode_add_bytes(inode, sb->s_blocksize); | 244 | inode_add_bytes(inode, sb->s_blocksize); |
| 225 | mark_inode_dirty(inode); | 245 | mark_inode_dirty(inode); |
| 226 | } | 246 | } |
| @@ -327,7 +347,7 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw | |||
| 327 | if (total_blocks == blocks) | 347 | if (total_blocks == blocks) |
| 328 | return 0; | 348 | return 0; |
| 329 | 349 | ||
| 330 | hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); | 350 | hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); |
| 331 | do { | 351 | do { |
| 332 | res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, | 352 | res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, |
| 333 | total_blocks, type); | 353 | total_blocks, type); |
| @@ -348,29 +368,33 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw | |||
| 348 | int hfsplus_file_extend(struct inode *inode) | 368 | int hfsplus_file_extend(struct inode *inode) |
| 349 | { | 369 | { |
| 350 | struct super_block *sb = inode->i_sb; | 370 | struct super_block *sb = inode->i_sb; |
| 371 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 372 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 351 | u32 start, len, goal; | 373 | u32 start, len, goal; |
| 352 | int res; | 374 | int res; |
| 353 | 375 | ||
| 354 | if (HFSPLUS_SB(sb).alloc_file->i_size * 8 < HFSPLUS_SB(sb).total_blocks - HFSPLUS_SB(sb).free_blocks + 8) { | 376 | if (sbi->alloc_file->i_size * 8 < |
| 377 | sbi->total_blocks - sbi->free_blocks + 8) { | ||
| 355 | // extend alloc file | 378 | // extend alloc file |
| 356 | printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", HFSPLUS_SB(sb).alloc_file->i_size * 8, | 379 | printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", |
| 357 | HFSPLUS_SB(sb).total_blocks, HFSPLUS_SB(sb).free_blocks); | 380 | sbi->alloc_file->i_size * 8, |
| 381 | sbi->total_blocks, sbi->free_blocks); | ||
| 358 | return -ENOSPC; | 382 | return -ENOSPC; |
| 359 | } | 383 | } |
| 360 | 384 | ||
| 361 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 385 | mutex_lock(&hip->extents_lock); |
| 362 | if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks) | 386 | if (hip->alloc_blocks == hip->first_blocks) |
| 363 | goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents); | 387 | goal = hfsplus_ext_lastblock(hip->first_extents); |
| 364 | else { | 388 | else { |
| 365 | res = hfsplus_ext_read_extent(inode, HFSPLUS_I(inode).alloc_blocks); | 389 | res = hfsplus_ext_read_extent(inode, hip->alloc_blocks); |
| 366 | if (res) | 390 | if (res) |
| 367 | goto out; | 391 | goto out; |
| 368 | goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).cached_extents); | 392 | goal = hfsplus_ext_lastblock(hip->cached_extents); |
| 369 | } | 393 | } |
| 370 | 394 | ||
| 371 | len = HFSPLUS_I(inode).clump_blocks; | 395 | len = hip->clump_blocks; |
| 372 | start = hfsplus_block_allocate(sb, HFSPLUS_SB(sb).total_blocks, goal, &len); | 396 | start = hfsplus_block_allocate(sb, sbi->total_blocks, goal, &len); |
| 373 | if (start >= HFSPLUS_SB(sb).total_blocks) { | 397 | if (start >= sbi->total_blocks) { |
| 374 | start = hfsplus_block_allocate(sb, goal, 0, &len); | 398 | start = hfsplus_block_allocate(sb, goal, 0, &len); |
| 375 | if (start >= goal) { | 399 | if (start >= goal) { |
| 376 | res = -ENOSPC; | 400 | res = -ENOSPC; |
| @@ -379,56 +403,56 @@ int hfsplus_file_extend(struct inode *inode) | |||
| 379 | } | 403 | } |
| 380 | 404 | ||
| 381 | dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); | 405 | dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); |
| 382 | if (HFSPLUS_I(inode).alloc_blocks <= HFSPLUS_I(inode).first_blocks) { | 406 | |
| 383 | if (!HFSPLUS_I(inode).first_blocks) { | 407 | if (hip->alloc_blocks <= hip->first_blocks) { |
| 408 | if (!hip->first_blocks) { | ||
| 384 | dprint(DBG_EXTENT, "first extents\n"); | 409 | dprint(DBG_EXTENT, "first extents\n"); |
| 385 | /* no extents yet */ | 410 | /* no extents yet */ |
| 386 | HFSPLUS_I(inode).first_extents[0].start_block = cpu_to_be32(start); | 411 | hip->first_extents[0].start_block = cpu_to_be32(start); |
| 387 | HFSPLUS_I(inode).first_extents[0].block_count = cpu_to_be32(len); | 412 | hip->first_extents[0].block_count = cpu_to_be32(len); |
| 388 | res = 0; | 413 | res = 0; |
| 389 | } else { | 414 | } else { |
| 390 | /* try to append to extents in inode */ | 415 | /* try to append to extents in inode */ |
| 391 | res = hfsplus_add_extent(HFSPLUS_I(inode).first_extents, | 416 | res = hfsplus_add_extent(hip->first_extents, |
| 392 | HFSPLUS_I(inode).alloc_blocks, | 417 | hip->alloc_blocks, |
| 393 | start, len); | 418 | start, len); |
| 394 | if (res == -ENOSPC) | 419 | if (res == -ENOSPC) |
| 395 | goto insert_extent; | 420 | goto insert_extent; |
| 396 | } | 421 | } |
| 397 | if (!res) { | 422 | if (!res) { |
| 398 | hfsplus_dump_extent(HFSPLUS_I(inode).first_extents); | 423 | hfsplus_dump_extent(hip->first_extents); |
| 399 | HFSPLUS_I(inode).first_blocks += len; | 424 | hip->first_blocks += len; |
| 400 | } | 425 | } |
| 401 | } else { | 426 | } else { |
| 402 | res = hfsplus_add_extent(HFSPLUS_I(inode).cached_extents, | 427 | res = hfsplus_add_extent(hip->cached_extents, |
| 403 | HFSPLUS_I(inode).alloc_blocks - | 428 | hip->alloc_blocks - hip->cached_start, |
| 404 | HFSPLUS_I(inode).cached_start, | ||
| 405 | start, len); | 429 | start, len); |
| 406 | if (!res) { | 430 | if (!res) { |
| 407 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 431 | hfsplus_dump_extent(hip->cached_extents); |
| 408 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY; | 432 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY; |
| 409 | HFSPLUS_I(inode).cached_blocks += len; | 433 | hip->cached_blocks += len; |
| 410 | } else if (res == -ENOSPC) | 434 | } else if (res == -ENOSPC) |
| 411 | goto insert_extent; | 435 | goto insert_extent; |
| 412 | } | 436 | } |
| 413 | out: | 437 | out: |
| 414 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 438 | mutex_unlock(&hip->extents_lock); |
| 415 | if (!res) { | 439 | if (!res) { |
| 416 | HFSPLUS_I(inode).alloc_blocks += len; | 440 | hip->alloc_blocks += len; |
| 417 | mark_inode_dirty(inode); | 441 | mark_inode_dirty(inode); |
| 418 | } | 442 | } |
| 419 | return res; | 443 | return res; |
| 420 | 444 | ||
| 421 | insert_extent: | 445 | insert_extent: |
| 422 | dprint(DBG_EXTENT, "insert new extent\n"); | 446 | dprint(DBG_EXTENT, "insert new extent\n"); |
| 423 | hfsplus_ext_write_extent(inode); | 447 | hfsplus_ext_write_extent_locked(inode); |
| 424 | 448 | ||
| 425 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 449 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
| 426 | HFSPLUS_I(inode).cached_extents[0].start_block = cpu_to_be32(start); | 450 | hip->cached_extents[0].start_block = cpu_to_be32(start); |
| 427 | HFSPLUS_I(inode).cached_extents[0].block_count = cpu_to_be32(len); | 451 | hip->cached_extents[0].block_count = cpu_to_be32(len); |
| 428 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 452 | hfsplus_dump_extent(hip->cached_extents); |
| 429 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; | 453 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; |
| 430 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).alloc_blocks; | 454 | hip->cached_start = hip->alloc_blocks; |
| 431 | HFSPLUS_I(inode).cached_blocks = len; | 455 | hip->cached_blocks = len; |
| 432 | 456 | ||
| 433 | res = 0; | 457 | res = 0; |
| 434 | goto out; | 458 | goto out; |
| @@ -437,13 +461,15 @@ insert_extent: | |||
| 437 | void hfsplus_file_truncate(struct inode *inode) | 461 | void hfsplus_file_truncate(struct inode *inode) |
| 438 | { | 462 | { |
| 439 | struct super_block *sb = inode->i_sb; | 463 | struct super_block *sb = inode->i_sb; |
| 464 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 440 | struct hfs_find_data fd; | 465 | struct hfs_find_data fd; |
| 441 | u32 alloc_cnt, blk_cnt, start; | 466 | u32 alloc_cnt, blk_cnt, start; |
| 442 | int res; | 467 | int res; |
| 443 | 468 | ||
| 444 | dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", inode->i_ino, | 469 | dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", |
| 445 | (long long)HFSPLUS_I(inode).phys_size, inode->i_size); | 470 | inode->i_ino, (long long)hip->phys_size, inode->i_size); |
| 446 | if (inode->i_size > HFSPLUS_I(inode).phys_size) { | 471 | |
| 472 | if (inode->i_size > hip->phys_size) { | ||
| 447 | struct address_space *mapping = inode->i_mapping; | 473 | struct address_space *mapping = inode->i_mapping; |
| 448 | struct page *page; | 474 | struct page *page; |
| 449 | void *fsdata; | 475 | void *fsdata; |
| @@ -460,47 +486,48 @@ void hfsplus_file_truncate(struct inode *inode) | |||
| 460 | return; | 486 | return; |
| 461 | mark_inode_dirty(inode); | 487 | mark_inode_dirty(inode); |
| 462 | return; | 488 | return; |
| 463 | } else if (inode->i_size == HFSPLUS_I(inode).phys_size) | 489 | } else if (inode->i_size == hip->phys_size) |
| 464 | return; | 490 | return; |
| 465 | 491 | ||
| 466 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb).alloc_blksz - 1) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 492 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >> |
| 467 | alloc_cnt = HFSPLUS_I(inode).alloc_blocks; | 493 | HFSPLUS_SB(sb)->alloc_blksz_shift; |
| 494 | alloc_cnt = hip->alloc_blocks; | ||
| 468 | if (blk_cnt == alloc_cnt) | 495 | if (blk_cnt == alloc_cnt) |
| 469 | goto out; | 496 | goto out; |
| 470 | 497 | ||
| 471 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 498 | mutex_lock(&hip->extents_lock); |
| 472 | hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); | 499 | hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); |
| 473 | while (1) { | 500 | while (1) { |
| 474 | if (alloc_cnt == HFSPLUS_I(inode).first_blocks) { | 501 | if (alloc_cnt == hip->first_blocks) { |
| 475 | hfsplus_free_extents(sb, HFSPLUS_I(inode).first_extents, | 502 | hfsplus_free_extents(sb, hip->first_extents, |
| 476 | alloc_cnt, alloc_cnt - blk_cnt); | 503 | alloc_cnt, alloc_cnt - blk_cnt); |
| 477 | hfsplus_dump_extent(HFSPLUS_I(inode).first_extents); | 504 | hfsplus_dump_extent(hip->first_extents); |
| 478 | HFSPLUS_I(inode).first_blocks = blk_cnt; | 505 | hip->first_blocks = blk_cnt; |
| 479 | break; | 506 | break; |
| 480 | } | 507 | } |
| 481 | res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); | 508 | res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); |
| 482 | if (res) | 509 | if (res) |
| 483 | break; | 510 | break; |
| 484 | start = HFSPLUS_I(inode).cached_start; | 511 | start = hip->cached_start; |
| 485 | hfsplus_free_extents(sb, HFSPLUS_I(inode).cached_extents, | 512 | hfsplus_free_extents(sb, hip->cached_extents, |
| 486 | alloc_cnt - start, alloc_cnt - blk_cnt); | 513 | alloc_cnt - start, alloc_cnt - blk_cnt); |
| 487 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 514 | hfsplus_dump_extent(hip->cached_extents); |
| 488 | if (blk_cnt > start) { | 515 | if (blk_cnt > start) { |
| 489 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY; | 516 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY; |
| 490 | break; | 517 | break; |
| 491 | } | 518 | } |
| 492 | alloc_cnt = start; | 519 | alloc_cnt = start; |
| 493 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0; | 520 | hip->cached_start = hip->cached_blocks = 0; |
| 494 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 521 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); |
| 495 | hfs_brec_remove(&fd); | 522 | hfs_brec_remove(&fd); |
| 496 | } | 523 | } |
| 497 | hfs_find_exit(&fd); | 524 | hfs_find_exit(&fd); |
| 498 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 525 | mutex_unlock(&hip->extents_lock); |
| 499 | 526 | ||
| 500 | HFSPLUS_I(inode).alloc_blocks = blk_cnt; | 527 | hip->alloc_blocks = blk_cnt; |
| 501 | out: | 528 | out: |
| 502 | HFSPLUS_I(inode).phys_size = inode->i_size; | 529 | hip->phys_size = inode->i_size; |
| 503 | HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; | 530 | hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; |
| 504 | inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits); | 531 | inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); |
| 505 | mark_inode_dirty(inode); | 532 | mark_inode_dirty(inode); |
| 506 | } | 533 | } |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index dc856be3c2b0..cb3653efb57a 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
| @@ -62,7 +62,7 @@ struct hfs_btree { | |||
| 62 | unsigned int depth; | 62 | unsigned int depth; |
| 63 | 63 | ||
| 64 | //unsigned int map1_size, map_size; | 64 | //unsigned int map1_size, map_size; |
| 65 | struct semaphore tree_lock; | 65 | struct mutex tree_lock; |
| 66 | 66 | ||
| 67 | unsigned int pages_per_bnode; | 67 | unsigned int pages_per_bnode; |
| 68 | spinlock_t hash_lock; | 68 | spinlock_t hash_lock; |
| @@ -121,16 +121,21 @@ struct hfsplus_sb_info { | |||
| 121 | u32 sect_count; | 121 | u32 sect_count; |
| 122 | int fs_shift; | 122 | int fs_shift; |
| 123 | 123 | ||
| 124 | /* Stuff in host order from Vol Header */ | 124 | /* immutable data from the volume header */ |
| 125 | u32 alloc_blksz; | 125 | u32 alloc_blksz; |
| 126 | int alloc_blksz_shift; | 126 | int alloc_blksz_shift; |
| 127 | u32 total_blocks; | 127 | u32 total_blocks; |
| 128 | u32 data_clump_blocks, rsrc_clump_blocks; | ||
| 129 | |||
| 130 | /* mutable data from the volume header, protected by alloc_mutex */ | ||
| 128 | u32 free_blocks; | 131 | u32 free_blocks; |
| 129 | u32 next_alloc; | 132 | struct mutex alloc_mutex; |
| 133 | |||
| 134 | /* mutable data from the volume header, protected by vh_mutex */ | ||
| 130 | u32 next_cnid; | 135 | u32 next_cnid; |
| 131 | u32 file_count; | 136 | u32 file_count; |
| 132 | u32 folder_count; | 137 | u32 folder_count; |
| 133 | u32 data_clump_blocks, rsrc_clump_blocks; | 138 | struct mutex vh_mutex; |
| 134 | 139 | ||
| 135 | /* Config options */ | 140 | /* Config options */ |
| 136 | u32 creator; | 141 | u32 creator; |
| @@ -143,40 +148,50 @@ struct hfsplus_sb_info { | |||
| 143 | int part, session; | 148 | int part, session; |
| 144 | 149 | ||
| 145 | unsigned long flags; | 150 | unsigned long flags; |
| 146 | |||
| 147 | struct hlist_head rsrc_inodes; | ||
| 148 | }; | 151 | }; |
| 149 | 152 | ||
| 150 | #define HFSPLUS_SB_WRITEBACKUP 0x0001 | 153 | #define HFSPLUS_SB_WRITEBACKUP 0 |
| 151 | #define HFSPLUS_SB_NODECOMPOSE 0x0002 | 154 | #define HFSPLUS_SB_NODECOMPOSE 1 |
| 152 | #define HFSPLUS_SB_FORCE 0x0004 | 155 | #define HFSPLUS_SB_FORCE 2 |
| 153 | #define HFSPLUS_SB_HFSX 0x0008 | 156 | #define HFSPLUS_SB_HFSX 3 |
| 154 | #define HFSPLUS_SB_CASEFOLD 0x0010 | 157 | #define HFSPLUS_SB_CASEFOLD 4 |
| 155 | 158 | ||
| 156 | 159 | ||
| 157 | struct hfsplus_inode_info { | 160 | struct hfsplus_inode_info { |
| 158 | struct mutex extents_lock; | ||
| 159 | u32 clump_blocks, alloc_blocks; | ||
| 160 | sector_t fs_blocks; | ||
| 161 | /* Allocation extents from catalog record or volume header */ | ||
| 162 | hfsplus_extent_rec first_extents; | ||
| 163 | u32 first_blocks; | ||
| 164 | hfsplus_extent_rec cached_extents; | ||
| 165 | u32 cached_start, cached_blocks; | ||
| 166 | atomic_t opencnt; | 161 | atomic_t opencnt; |
| 167 | 162 | ||
| 168 | struct inode *rsrc_inode; | 163 | /* |
| 164 | * Extent allocation information, protected by extents_lock. | ||
| 165 | */ | ||
| 166 | u32 first_blocks; | ||
| 167 | u32 clump_blocks; | ||
| 168 | u32 alloc_blocks; | ||
| 169 | u32 cached_start; | ||
| 170 | u32 cached_blocks; | ||
| 171 | hfsplus_extent_rec first_extents; | ||
| 172 | hfsplus_extent_rec cached_extents; | ||
| 169 | unsigned long flags; | 173 | unsigned long flags; |
| 174 | struct mutex extents_lock; | ||
| 170 | 175 | ||
| 176 | /* | ||
| 177 | * Immutable data. | ||
| 178 | */ | ||
| 179 | struct inode *rsrc_inode; | ||
| 171 | __be32 create_date; | 180 | __be32 create_date; |
| 172 | /* Device number in hfsplus_permissions in catalog */ | ||
| 173 | u32 dev; | ||
| 174 | /* BSD system and user file flags */ | ||
| 175 | u8 rootflags; | ||
| 176 | u8 userflags; | ||
| 177 | 181 | ||
| 182 | /* | ||
| 183 | * Protected by sbi->vh_mutex. | ||
| 184 | */ | ||
| 185 | u32 linkid; | ||
| 186 | |||
| 187 | /* | ||
| 188 | * Protected by i_mutex. | ||
| 189 | */ | ||
| 190 | sector_t fs_blocks; | ||
| 191 | u8 userflags; /* BSD user file flags */ | ||
| 178 | struct list_head open_dir_list; | 192 | struct list_head open_dir_list; |
| 179 | loff_t phys_size; | 193 | loff_t phys_size; |
| 194 | |||
| 180 | struct inode vfs_inode; | 195 | struct inode vfs_inode; |
| 181 | }; | 196 | }; |
| 182 | 197 | ||
| @@ -184,8 +199,8 @@ struct hfsplus_inode_info { | |||
| 184 | #define HFSPLUS_FLG_EXT_DIRTY 0x0002 | 199 | #define HFSPLUS_FLG_EXT_DIRTY 0x0002 |
| 185 | #define HFSPLUS_FLG_EXT_NEW 0x0004 | 200 | #define HFSPLUS_FLG_EXT_NEW 0x0004 |
| 186 | 201 | ||
| 187 | #define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC)) | 202 | #define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC)) |
| 188 | #define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC) | 203 | #define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC) |
| 189 | 204 | ||
| 190 | struct hfs_find_data { | 205 | struct hfs_find_data { |
| 191 | /* filled by caller */ | 206 | /* filled by caller */ |
| @@ -311,6 +326,7 @@ int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *); | |||
| 311 | int hfsplus_delete_cat(u32, struct inode *, struct qstr *); | 326 | int hfsplus_delete_cat(u32, struct inode *, struct qstr *); |
| 312 | int hfsplus_rename_cat(u32, struct inode *, struct qstr *, | 327 | int hfsplus_rename_cat(u32, struct inode *, struct qstr *, |
| 313 | struct inode *, struct qstr *); | 328 | struct inode *, struct qstr *); |
| 329 | void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms); | ||
| 314 | 330 | ||
| 315 | /* dir.c */ | 331 | /* dir.c */ |
| 316 | extern const struct inode_operations hfsplus_dir_inode_operations; | 332 | extern const struct inode_operations hfsplus_dir_inode_operations; |
| @@ -372,26 +388,15 @@ int hfsplus_read_wrapper(struct super_block *); | |||
| 372 | int hfs_part_find(struct super_block *, sector_t *, sector_t *); | 388 | int hfs_part_find(struct super_block *, sector_t *, sector_t *); |
| 373 | 389 | ||
| 374 | /* access macros */ | 390 | /* access macros */ |
| 375 | /* | ||
| 376 | static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) | 391 | static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) |
| 377 | { | 392 | { |
| 378 | return sb->s_fs_info; | 393 | return sb->s_fs_info; |
| 379 | } | 394 | } |
| 395 | |||
| 380 | static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) | 396 | static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) |
| 381 | { | 397 | { |
| 382 | return list_entry(inode, struct hfsplus_inode_info, vfs_inode); | 398 | return list_entry(inode, struct hfsplus_inode_info, vfs_inode); |
| 383 | } | 399 | } |
| 384 | */ | ||
| 385 | #define HFSPLUS_SB(super) (*(struct hfsplus_sb_info *)(super)->s_fs_info) | ||
| 386 | #define HFSPLUS_I(inode) (*list_entry(inode, struct hfsplus_inode_info, vfs_inode)) | ||
| 387 | |||
| 388 | #if 1 | ||
| 389 | #define hfsplus_kmap(p) ({ struct page *__p = (p); kmap(__p); }) | ||
| 390 | #define hfsplus_kunmap(p) ({ struct page *__p = (p); kunmap(__p); __p; }) | ||
| 391 | #else | ||
| 392 | #define hfsplus_kmap(p) kmap(p) | ||
| 393 | #define hfsplus_kunmap(p) kunmap(p) | ||
| 394 | #endif | ||
| 395 | 400 | ||
| 396 | #define sb_bread512(sb, sec, data) ({ \ | 401 | #define sb_bread512(sb, sec, data) ({ \ |
| 397 | struct buffer_head *__bh; \ | 402 | struct buffer_head *__bh; \ |
| @@ -419,6 +424,4 @@ static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) | |||
| 419 | #define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec) | 424 | #define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec) |
| 420 | #define hfsp_now2mt() __hfsp_ut2mt(get_seconds()) | 425 | #define hfsp_now2mt() __hfsp_ut2mt(get_seconds()) |
| 421 | 426 | ||
| 422 | #define kdev_t_to_nr(x) (x) | ||
| 423 | |||
| 424 | #endif | 427 | #endif |
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index fe99fe8db61a..6892899fd6fb 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h | |||
| @@ -200,6 +200,7 @@ struct hfsplus_cat_key { | |||
| 200 | struct hfsplus_unistr name; | 200 | struct hfsplus_unistr name; |
| 201 | } __packed; | 201 | } __packed; |
| 202 | 202 | ||
| 203 | #define HFSPLUS_CAT_KEYLEN (sizeof(struct hfsplus_cat_key)) | ||
| 203 | 204 | ||
| 204 | /* Structs from hfs.h */ | 205 | /* Structs from hfs.h */ |
| 205 | struct hfsp_point { | 206 | struct hfsp_point { |
| @@ -323,7 +324,7 @@ struct hfsplus_ext_key { | |||
| 323 | __be32 start_block; | 324 | __be32 start_block; |
| 324 | } __packed; | 325 | } __packed; |
| 325 | 326 | ||
| 326 | #define HFSPLUS_EXT_KEYLEN 12 | 327 | #define HFSPLUS_EXT_KEYLEN sizeof(struct hfsplus_ext_key) |
| 327 | 328 | ||
| 328 | /* HFS+ generic BTree key */ | 329 | /* HFS+ generic BTree key */ |
| 329 | typedef union { | 330 | typedef union { |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index c5a979d62c65..78449280dae0 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
| @@ -36,7 +36,7 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping, | |||
| 36 | *pagep = NULL; | 36 | *pagep = NULL; |
| 37 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 37 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
| 38 | hfsplus_get_block, | 38 | hfsplus_get_block, |
| 39 | &HFSPLUS_I(mapping->host).phys_size); | 39 | &HFSPLUS_I(mapping->host)->phys_size); |
| 40 | if (unlikely(ret)) { | 40 | if (unlikely(ret)) { |
| 41 | loff_t isize = mapping->host->i_size; | 41 | loff_t isize = mapping->host->i_size; |
| 42 | if (pos + len > isize) | 42 | if (pos + len > isize) |
| @@ -62,13 +62,13 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) | |||
| 62 | 62 | ||
| 63 | switch (inode->i_ino) { | 63 | switch (inode->i_ino) { |
| 64 | case HFSPLUS_EXT_CNID: | 64 | case HFSPLUS_EXT_CNID: |
| 65 | tree = HFSPLUS_SB(sb).ext_tree; | 65 | tree = HFSPLUS_SB(sb)->ext_tree; |
| 66 | break; | 66 | break; |
| 67 | case HFSPLUS_CAT_CNID: | 67 | case HFSPLUS_CAT_CNID: |
| 68 | tree = HFSPLUS_SB(sb).cat_tree; | 68 | tree = HFSPLUS_SB(sb)->cat_tree; |
| 69 | break; | 69 | break; |
| 70 | case HFSPLUS_ATTR_CNID: | 70 | case HFSPLUS_ATTR_CNID: |
| 71 | tree = HFSPLUS_SB(sb).attr_tree; | 71 | tree = HFSPLUS_SB(sb)->attr_tree; |
| 72 | break; | 72 | break; |
| 73 | default: | 73 | default: |
| 74 | BUG(); | 74 | BUG(); |
| @@ -172,12 +172,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
| 172 | struct hfs_find_data fd; | 172 | struct hfs_find_data fd; |
| 173 | struct super_block *sb = dir->i_sb; | 173 | struct super_block *sb = dir->i_sb; |
| 174 | struct inode *inode = NULL; | 174 | struct inode *inode = NULL; |
| 175 | struct hfsplus_inode_info *hip; | ||
| 175 | int err; | 176 | int err; |
| 176 | 177 | ||
| 177 | if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) | 178 | if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) |
| 178 | goto out; | 179 | goto out; |
| 179 | 180 | ||
| 180 | inode = HFSPLUS_I(dir).rsrc_inode; | 181 | inode = HFSPLUS_I(dir)->rsrc_inode; |
| 181 | if (inode) | 182 | if (inode) |
| 182 | goto out; | 183 | goto out; |
| 183 | 184 | ||
| @@ -185,12 +186,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
| 185 | if (!inode) | 186 | if (!inode) |
| 186 | return ERR_PTR(-ENOMEM); | 187 | return ERR_PTR(-ENOMEM); |
| 187 | 188 | ||
| 189 | hip = HFSPLUS_I(inode); | ||
| 188 | inode->i_ino = dir->i_ino; | 190 | inode->i_ino = dir->i_ino; |
| 189 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 191 | INIT_LIST_HEAD(&hip->open_dir_list); |
| 190 | mutex_init(&HFSPLUS_I(inode).extents_lock); | 192 | mutex_init(&hip->extents_lock); |
| 191 | HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC; | 193 | hip->flags = HFSPLUS_FLG_RSRC; |
| 192 | 194 | ||
| 193 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 195 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
| 194 | err = hfsplus_find_cat(sb, dir->i_ino, &fd); | 196 | err = hfsplus_find_cat(sb, dir->i_ino, &fd); |
| 195 | if (!err) | 197 | if (!err) |
| 196 | err = hfsplus_cat_read_inode(inode, &fd); | 198 | err = hfsplus_cat_read_inode(inode, &fd); |
| @@ -199,10 +201,18 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
| 199 | iput(inode); | 201 | iput(inode); |
| 200 | return ERR_PTR(err); | 202 | return ERR_PTR(err); |
| 201 | } | 203 | } |
| 202 | HFSPLUS_I(inode).rsrc_inode = dir; | 204 | hip->rsrc_inode = dir; |
| 203 | HFSPLUS_I(dir).rsrc_inode = inode; | 205 | HFSPLUS_I(dir)->rsrc_inode = inode; |
| 204 | igrab(dir); | 206 | igrab(dir); |
| 205 | hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes); | 207 | |
| 208 | /* | ||
| 209 | * __mark_inode_dirty expects inodes to be hashed. Since we don't | ||
| 210 | * want resource fork inodes in the regular inode space, we make them | ||
| 211 | * appear hashed, but do not put on any lists. hlist_del() | ||
| 212 | * will work fine and require no locking. | ||
| 213 | */ | ||
| 214 | inode->i_hash.pprev = &inode->i_hash.next; | ||
| 215 | |||
| 206 | mark_inode_dirty(inode); | 216 | mark_inode_dirty(inode); |
| 207 | out: | 217 | out: |
| 208 | d_add(dentry, inode); | 218 | d_add(dentry, inode); |
| @@ -211,30 +221,27 @@ out: | |||
| 211 | 221 | ||
| 212 | static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) | 222 | static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) |
| 213 | { | 223 | { |
| 214 | struct super_block *sb = inode->i_sb; | 224 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); |
| 215 | u16 mode; | 225 | u16 mode; |
| 216 | 226 | ||
| 217 | mode = be16_to_cpu(perms->mode); | 227 | mode = be16_to_cpu(perms->mode); |
| 218 | 228 | ||
| 219 | inode->i_uid = be32_to_cpu(perms->owner); | 229 | inode->i_uid = be32_to_cpu(perms->owner); |
| 220 | if (!inode->i_uid && !mode) | 230 | if (!inode->i_uid && !mode) |
| 221 | inode->i_uid = HFSPLUS_SB(sb).uid; | 231 | inode->i_uid = sbi->uid; |
| 222 | 232 | ||
| 223 | inode->i_gid = be32_to_cpu(perms->group); | 233 | inode->i_gid = be32_to_cpu(perms->group); |
| 224 | if (!inode->i_gid && !mode) | 234 | if (!inode->i_gid && !mode) |
| 225 | inode->i_gid = HFSPLUS_SB(sb).gid; | 235 | inode->i_gid = sbi->gid; |
| 226 | 236 | ||
| 227 | if (dir) { | 237 | if (dir) { |
| 228 | mode = mode ? (mode & S_IALLUGO) : | 238 | mode = mode ? (mode & S_IALLUGO) : (S_IRWXUGO & ~(sbi->umask)); |
| 229 | (S_IRWXUGO & ~(HFSPLUS_SB(sb).umask)); | ||
| 230 | mode |= S_IFDIR; | 239 | mode |= S_IFDIR; |
| 231 | } else if (!mode) | 240 | } else if (!mode) |
| 232 | mode = S_IFREG | ((S_IRUGO|S_IWUGO) & | 241 | mode = S_IFREG | ((S_IRUGO|S_IWUGO) & ~(sbi->umask)); |
| 233 | ~(HFSPLUS_SB(sb).umask)); | ||
| 234 | inode->i_mode = mode; | 242 | inode->i_mode = mode; |
| 235 | 243 | ||
| 236 | HFSPLUS_I(inode).rootflags = perms->rootflags; | 244 | HFSPLUS_I(inode)->userflags = perms->userflags; |
| 237 | HFSPLUS_I(inode).userflags = perms->userflags; | ||
| 238 | if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) | 245 | if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) |
| 239 | inode->i_flags |= S_IMMUTABLE; | 246 | inode->i_flags |= S_IMMUTABLE; |
| 240 | else | 247 | else |
| @@ -245,30 +252,13 @@ static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, i | |||
| 245 | inode->i_flags &= ~S_APPEND; | 252 | inode->i_flags &= ~S_APPEND; |
| 246 | } | 253 | } |
| 247 | 254 | ||
| 248 | static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | ||
| 249 | { | ||
| 250 | if (inode->i_flags & S_IMMUTABLE) | ||
| 251 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; | ||
| 252 | else | ||
| 253 | perms->rootflags &= ~HFSPLUS_FLG_IMMUTABLE; | ||
| 254 | if (inode->i_flags & S_APPEND) | ||
| 255 | perms->rootflags |= HFSPLUS_FLG_APPEND; | ||
| 256 | else | ||
| 257 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; | ||
| 258 | perms->userflags = HFSPLUS_I(inode).userflags; | ||
| 259 | perms->mode = cpu_to_be16(inode->i_mode); | ||
| 260 | perms->owner = cpu_to_be32(inode->i_uid); | ||
| 261 | perms->group = cpu_to_be32(inode->i_gid); | ||
| 262 | perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); | ||
| 263 | } | ||
| 264 | |||
| 265 | static int hfsplus_file_open(struct inode *inode, struct file *file) | 255 | static int hfsplus_file_open(struct inode *inode, struct file *file) |
| 266 | { | 256 | { |
| 267 | if (HFSPLUS_IS_RSRC(inode)) | 257 | if (HFSPLUS_IS_RSRC(inode)) |
| 268 | inode = HFSPLUS_I(inode).rsrc_inode; | 258 | inode = HFSPLUS_I(inode)->rsrc_inode; |
| 269 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) | 259 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) |
| 270 | return -EOVERFLOW; | 260 | return -EOVERFLOW; |
| 271 | atomic_inc(&HFSPLUS_I(inode).opencnt); | 261 | atomic_inc(&HFSPLUS_I(inode)->opencnt); |
| 272 | return 0; | 262 | return 0; |
| 273 | } | 263 | } |
| 274 | 264 | ||
| @@ -277,12 +267,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) | |||
| 277 | struct super_block *sb = inode->i_sb; | 267 | struct super_block *sb = inode->i_sb; |
| 278 | 268 | ||
| 279 | if (HFSPLUS_IS_RSRC(inode)) | 269 | if (HFSPLUS_IS_RSRC(inode)) |
| 280 | inode = HFSPLUS_I(inode).rsrc_inode; | 270 | inode = HFSPLUS_I(inode)->rsrc_inode; |
| 281 | if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { | 271 | if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) { |
| 282 | mutex_lock(&inode->i_mutex); | 272 | mutex_lock(&inode->i_mutex); |
| 283 | hfsplus_file_truncate(inode); | 273 | hfsplus_file_truncate(inode); |
| 284 | if (inode->i_flags & S_DEAD) { | 274 | if (inode->i_flags & S_DEAD) { |
| 285 | hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); | 275 | hfsplus_delete_cat(inode->i_ino, |
| 276 | HFSPLUS_SB(sb)->hidden_dir, NULL); | ||
| 286 | hfsplus_delete_inode(inode); | 277 | hfsplus_delete_inode(inode); |
| 287 | } | 278 | } |
| 288 | mutex_unlock(&inode->i_mutex); | 279 | mutex_unlock(&inode->i_mutex); |
| @@ -361,47 +352,52 @@ static const struct file_operations hfsplus_file_operations = { | |||
| 361 | 352 | ||
| 362 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) | 353 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) |
| 363 | { | 354 | { |
| 355 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 364 | struct inode *inode = new_inode(sb); | 356 | struct inode *inode = new_inode(sb); |
| 357 | struct hfsplus_inode_info *hip; | ||
| 358 | |||
| 365 | if (!inode) | 359 | if (!inode) |
| 366 | return NULL; | 360 | return NULL; |
| 367 | 361 | ||
| 368 | inode->i_ino = HFSPLUS_SB(sb).next_cnid++; | 362 | inode->i_ino = sbi->next_cnid++; |
| 369 | inode->i_mode = mode; | 363 | inode->i_mode = mode; |
| 370 | inode->i_uid = current_fsuid(); | 364 | inode->i_uid = current_fsuid(); |
| 371 | inode->i_gid = current_fsgid(); | 365 | inode->i_gid = current_fsgid(); |
| 372 | inode->i_nlink = 1; | 366 | inode->i_nlink = 1; |
| 373 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 367 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
| 374 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 368 | |
| 375 | mutex_init(&HFSPLUS_I(inode).extents_lock); | 369 | hip = HFSPLUS_I(inode); |
| 376 | atomic_set(&HFSPLUS_I(inode).opencnt, 0); | 370 | INIT_LIST_HEAD(&hip->open_dir_list); |
| 377 | HFSPLUS_I(inode).flags = 0; | 371 | mutex_init(&hip->extents_lock); |
| 378 | memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec)); | 372 | atomic_set(&hip->opencnt, 0); |
| 379 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 373 | hip->flags = 0; |
| 380 | HFSPLUS_I(inode).alloc_blocks = 0; | 374 | memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); |
| 381 | HFSPLUS_I(inode).first_blocks = 0; | 375 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
| 382 | HFSPLUS_I(inode).cached_start = 0; | 376 | hip->alloc_blocks = 0; |
| 383 | HFSPLUS_I(inode).cached_blocks = 0; | 377 | hip->first_blocks = 0; |
| 384 | HFSPLUS_I(inode).phys_size = 0; | 378 | hip->cached_start = 0; |
| 385 | HFSPLUS_I(inode).fs_blocks = 0; | 379 | hip->cached_blocks = 0; |
| 386 | HFSPLUS_I(inode).rsrc_inode = NULL; | 380 | hip->phys_size = 0; |
| 381 | hip->fs_blocks = 0; | ||
| 382 | hip->rsrc_inode = NULL; | ||
| 387 | if (S_ISDIR(inode->i_mode)) { | 383 | if (S_ISDIR(inode->i_mode)) { |
| 388 | inode->i_size = 2; | 384 | inode->i_size = 2; |
| 389 | HFSPLUS_SB(sb).folder_count++; | 385 | sbi->folder_count++; |
| 390 | inode->i_op = &hfsplus_dir_inode_operations; | 386 | inode->i_op = &hfsplus_dir_inode_operations; |
| 391 | inode->i_fop = &hfsplus_dir_operations; | 387 | inode->i_fop = &hfsplus_dir_operations; |
| 392 | } else if (S_ISREG(inode->i_mode)) { | 388 | } else if (S_ISREG(inode->i_mode)) { |
| 393 | HFSPLUS_SB(sb).file_count++; | 389 | sbi->file_count++; |
| 394 | inode->i_op = &hfsplus_file_inode_operations; | 390 | inode->i_op = &hfsplus_file_inode_operations; |
| 395 | inode->i_fop = &hfsplus_file_operations; | 391 | inode->i_fop = &hfsplus_file_operations; |
| 396 | inode->i_mapping->a_ops = &hfsplus_aops; | 392 | inode->i_mapping->a_ops = &hfsplus_aops; |
| 397 | HFSPLUS_I(inode).clump_blocks = HFSPLUS_SB(sb).data_clump_blocks; | 393 | hip->clump_blocks = sbi->data_clump_blocks; |
| 398 | } else if (S_ISLNK(inode->i_mode)) { | 394 | } else if (S_ISLNK(inode->i_mode)) { |
| 399 | HFSPLUS_SB(sb).file_count++; | 395 | sbi->file_count++; |
| 400 | inode->i_op = &page_symlink_inode_operations; | 396 | inode->i_op = &page_symlink_inode_operations; |
| 401 | inode->i_mapping->a_ops = &hfsplus_aops; | 397 | inode->i_mapping->a_ops = &hfsplus_aops; |
| 402 | HFSPLUS_I(inode).clump_blocks = 1; | 398 | hip->clump_blocks = 1; |
| 403 | } else | 399 | } else |
| 404 | HFSPLUS_SB(sb).file_count++; | 400 | sbi->file_count++; |
| 405 | insert_inode_hash(inode); | 401 | insert_inode_hash(inode); |
| 406 | mark_inode_dirty(inode); | 402 | mark_inode_dirty(inode); |
| 407 | sb->s_dirt = 1; | 403 | sb->s_dirt = 1; |
| @@ -414,11 +410,11 @@ void hfsplus_delete_inode(struct inode *inode) | |||
| 414 | struct super_block *sb = inode->i_sb; | 410 | struct super_block *sb = inode->i_sb; |
| 415 | 411 | ||
| 416 | if (S_ISDIR(inode->i_mode)) { | 412 | if (S_ISDIR(inode->i_mode)) { |
| 417 | HFSPLUS_SB(sb).folder_count--; | 413 | HFSPLUS_SB(sb)->folder_count--; |
| 418 | sb->s_dirt = 1; | 414 | sb->s_dirt = 1; |
| 419 | return; | 415 | return; |
| 420 | } | 416 | } |
| 421 | HFSPLUS_SB(sb).file_count--; | 417 | HFSPLUS_SB(sb)->file_count--; |
| 422 | if (S_ISREG(inode->i_mode)) { | 418 | if (S_ISREG(inode->i_mode)) { |
| 423 | if (!inode->i_nlink) { | 419 | if (!inode->i_nlink) { |
| 424 | inode->i_size = 0; | 420 | inode->i_size = 0; |
| @@ -434,34 +430,39 @@ void hfsplus_delete_inode(struct inode *inode) | |||
| 434 | void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) | 430 | void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) |
| 435 | { | 431 | { |
| 436 | struct super_block *sb = inode->i_sb; | 432 | struct super_block *sb = inode->i_sb; |
| 433 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 434 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 437 | u32 count; | 435 | u32 count; |
| 438 | int i; | 436 | int i; |
| 439 | 437 | ||
| 440 | memcpy(&HFSPLUS_I(inode).first_extents, &fork->extents, | 438 | memcpy(&hip->first_extents, &fork->extents, sizeof(hfsplus_extent_rec)); |
| 441 | sizeof(hfsplus_extent_rec)); | ||
| 442 | for (count = 0, i = 0; i < 8; i++) | 439 | for (count = 0, i = 0; i < 8; i++) |
| 443 | count += be32_to_cpu(fork->extents[i].block_count); | 440 | count += be32_to_cpu(fork->extents[i].block_count); |
| 444 | HFSPLUS_I(inode).first_blocks = count; | 441 | hip->first_blocks = count; |
| 445 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 442 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
| 446 | HFSPLUS_I(inode).cached_start = 0; | 443 | hip->cached_start = 0; |
| 447 | HFSPLUS_I(inode).cached_blocks = 0; | 444 | hip->cached_blocks = 0; |
| 448 | 445 | ||
| 449 | HFSPLUS_I(inode).alloc_blocks = be32_to_cpu(fork->total_blocks); | 446 | hip->alloc_blocks = be32_to_cpu(fork->total_blocks); |
| 450 | inode->i_size = HFSPLUS_I(inode).phys_size = be64_to_cpu(fork->total_size); | 447 | hip->phys_size = inode->i_size = be64_to_cpu(fork->total_size); |
| 451 | HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; | 448 | hip->fs_blocks = |
| 452 | inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits); | 449 | (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; |
| 453 | HFSPLUS_I(inode).clump_blocks = be32_to_cpu(fork->clump_size) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 450 | inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); |
| 454 | if (!HFSPLUS_I(inode).clump_blocks) | 451 | hip->clump_blocks = |
| 455 | HFSPLUS_I(inode).clump_blocks = HFSPLUS_IS_RSRC(inode) ? HFSPLUS_SB(sb).rsrc_clump_blocks : | 452 | be32_to_cpu(fork->clump_size) >> sbi->alloc_blksz_shift; |
| 456 | HFSPLUS_SB(sb).data_clump_blocks; | 453 | if (!hip->clump_blocks) { |
| 454 | hip->clump_blocks = HFSPLUS_IS_RSRC(inode) ? | ||
| 455 | sbi->rsrc_clump_blocks : | ||
| 456 | sbi->data_clump_blocks; | ||
| 457 | } | ||
| 457 | } | 458 | } |
| 458 | 459 | ||
| 459 | void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) | 460 | void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) |
| 460 | { | 461 | { |
| 461 | memcpy(&fork->extents, &HFSPLUS_I(inode).first_extents, | 462 | memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents, |
| 462 | sizeof(hfsplus_extent_rec)); | 463 | sizeof(hfsplus_extent_rec)); |
| 463 | fork->total_size = cpu_to_be64(inode->i_size); | 464 | fork->total_size = cpu_to_be64(inode->i_size); |
| 464 | fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode).alloc_blocks); | 465 | fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode)->alloc_blocks); |
| 465 | } | 466 | } |
| 466 | 467 | ||
| 467 | int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | 468 | int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) |
| @@ -472,7 +473,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
| 472 | 473 | ||
| 473 | type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); | 474 | type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); |
| 474 | 475 | ||
| 475 | HFSPLUS_I(inode).dev = 0; | 476 | HFSPLUS_I(inode)->linkid = 0; |
| 476 | if (type == HFSPLUS_FOLDER) { | 477 | if (type == HFSPLUS_FOLDER) { |
| 477 | struct hfsplus_cat_folder *folder = &entry.folder; | 478 | struct hfsplus_cat_folder *folder = &entry.folder; |
| 478 | 479 | ||
| @@ -486,8 +487,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
| 486 | inode->i_atime = hfsp_mt2ut(folder->access_date); | 487 | inode->i_atime = hfsp_mt2ut(folder->access_date); |
| 487 | inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); | 488 | inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); |
| 488 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); | 489 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); |
| 489 | HFSPLUS_I(inode).create_date = folder->create_date; | 490 | HFSPLUS_I(inode)->create_date = folder->create_date; |
| 490 | HFSPLUS_I(inode).fs_blocks = 0; | 491 | HFSPLUS_I(inode)->fs_blocks = 0; |
| 491 | inode->i_op = &hfsplus_dir_inode_operations; | 492 | inode->i_op = &hfsplus_dir_inode_operations; |
| 492 | inode->i_fop = &hfsplus_dir_operations; | 493 | inode->i_fop = &hfsplus_dir_operations; |
| 493 | } else if (type == HFSPLUS_FILE) { | 494 | } else if (type == HFSPLUS_FILE) { |
| @@ -518,7 +519,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
| 518 | inode->i_atime = hfsp_mt2ut(file->access_date); | 519 | inode->i_atime = hfsp_mt2ut(file->access_date); |
| 519 | inode->i_mtime = hfsp_mt2ut(file->content_mod_date); | 520 | inode->i_mtime = hfsp_mt2ut(file->content_mod_date); |
| 520 | inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); | 521 | inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); |
| 521 | HFSPLUS_I(inode).create_date = file->create_date; | 522 | HFSPLUS_I(inode)->create_date = file->create_date; |
| 522 | } else { | 523 | } else { |
| 523 | printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); | 524 | printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); |
| 524 | res = -EIO; | 525 | res = -EIO; |
| @@ -533,12 +534,12 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
| 533 | hfsplus_cat_entry entry; | 534 | hfsplus_cat_entry entry; |
| 534 | 535 | ||
| 535 | if (HFSPLUS_IS_RSRC(inode)) | 536 | if (HFSPLUS_IS_RSRC(inode)) |
| 536 | main_inode = HFSPLUS_I(inode).rsrc_inode; | 537 | main_inode = HFSPLUS_I(inode)->rsrc_inode; |
| 537 | 538 | ||
| 538 | if (!main_inode->i_nlink) | 539 | if (!main_inode->i_nlink) |
| 539 | return 0; | 540 | return 0; |
| 540 | 541 | ||
| 541 | if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb).cat_tree, &fd)) | 542 | if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb)->cat_tree, &fd)) |
| 542 | /* panic? */ | 543 | /* panic? */ |
| 543 | return -EIO; | 544 | return -EIO; |
| 544 | 545 | ||
| @@ -554,7 +555,7 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
| 554 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, | 555 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, |
| 555 | sizeof(struct hfsplus_cat_folder)); | 556 | sizeof(struct hfsplus_cat_folder)); |
| 556 | /* simple node checks? */ | 557 | /* simple node checks? */ |
| 557 | hfsplus_set_perms(inode, &folder->permissions); | 558 | hfsplus_cat_set_perms(inode, &folder->permissions); |
| 558 | folder->access_date = hfsp_ut2mt(inode->i_atime); | 559 | folder->access_date = hfsp_ut2mt(inode->i_atime); |
| 559 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); | 560 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); |
| 560 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); | 561 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); |
| @@ -576,11 +577,7 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
| 576 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, | 577 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, |
| 577 | sizeof(struct hfsplus_cat_file)); | 578 | sizeof(struct hfsplus_cat_file)); |
| 578 | hfsplus_inode_write_fork(inode, &file->data_fork); | 579 | hfsplus_inode_write_fork(inode, &file->data_fork); |
| 579 | if (S_ISREG(inode->i_mode)) | 580 | hfsplus_cat_set_perms(inode, &file->permissions); |
| 580 | HFSPLUS_I(inode).dev = inode->i_nlink; | ||
| 581 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | ||
| 582 | HFSPLUS_I(inode).dev = kdev_t_to_nr(inode->i_rdev); | ||
| 583 | hfsplus_set_perms(inode, &file->permissions); | ||
| 584 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) | 581 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) |
| 585 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); | 582 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); |
| 586 | else | 583 | else |
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index ac405f099026..5b4667e08ef7 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c | |||
| @@ -17,83 +17,98 @@ | |||
| 17 | #include <linux/mount.h> | 17 | #include <linux/mount.h> |
| 18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
| 19 | #include <linux/xattr.h> | 19 | #include <linux/xattr.h> |
| 20 | #include <linux/smp_lock.h> | ||
| 21 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
| 22 | #include "hfsplus_fs.h" | 21 | #include "hfsplus_fs.h" |
| 23 | 22 | ||
| 24 | long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 23 | static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) |
| 25 | { | 24 | { |
| 26 | struct inode *inode = filp->f_path.dentry->d_inode; | 25 | struct inode *inode = file->f_path.dentry->d_inode; |
| 26 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 27 | unsigned int flags = 0; | ||
| 28 | |||
| 29 | if (inode->i_flags & S_IMMUTABLE) | ||
| 30 | flags |= FS_IMMUTABLE_FL; | ||
| 31 | if (inode->i_flags |= S_APPEND) | ||
| 32 | flags |= FS_APPEND_FL; | ||
| 33 | if (hip->userflags & HFSPLUS_FLG_NODUMP) | ||
| 34 | flags |= FS_NODUMP_FL; | ||
| 35 | |||
| 36 | return put_user(flags, user_flags); | ||
| 37 | } | ||
| 38 | |||
| 39 | static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) | ||
| 40 | { | ||
| 41 | struct inode *inode = file->f_path.dentry->d_inode; | ||
| 42 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 27 | unsigned int flags; | 43 | unsigned int flags; |
| 44 | int err = 0; | ||
| 28 | 45 | ||
| 29 | lock_kernel(); | 46 | err = mnt_want_write(file->f_path.mnt); |
| 30 | switch (cmd) { | 47 | if (err) |
| 31 | case HFSPLUS_IOC_EXT2_GETFLAGS: | 48 | goto out; |
| 32 | flags = 0; | ||
| 33 | if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE) | ||
| 34 | flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */ | ||
| 35 | if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND) | ||
| 36 | flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */ | ||
| 37 | if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP) | ||
| 38 | flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ | ||
| 39 | return put_user(flags, (int __user *)arg); | ||
| 40 | case HFSPLUS_IOC_EXT2_SETFLAGS: { | ||
| 41 | int err = 0; | ||
| 42 | err = mnt_want_write(filp->f_path.mnt); | ||
| 43 | if (err) { | ||
| 44 | unlock_kernel(); | ||
| 45 | return err; | ||
| 46 | } | ||
| 47 | 49 | ||
| 48 | if (!is_owner_or_cap(inode)) { | 50 | if (!is_owner_or_cap(inode)) { |
| 49 | err = -EACCES; | 51 | err = -EACCES; |
| 50 | goto setflags_out; | 52 | goto out_drop_write; |
| 51 | } | 53 | } |
| 52 | if (get_user(flags, (int __user *)arg)) { | ||
| 53 | err = -EFAULT; | ||
| 54 | goto setflags_out; | ||
| 55 | } | ||
| 56 | if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || | ||
| 57 | HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { | ||
| 58 | if (!capable(CAP_LINUX_IMMUTABLE)) { | ||
| 59 | err = -EPERM; | ||
| 60 | goto setflags_out; | ||
| 61 | } | ||
| 62 | } | ||
| 63 | 54 | ||
| 64 | /* don't silently ignore unsupported ext2 flags */ | 55 | if (get_user(flags, user_flags)) { |
| 65 | if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { | 56 | err = -EFAULT; |
| 66 | err = -EOPNOTSUPP; | 57 | goto out_drop_write; |
| 67 | goto setflags_out; | 58 | } |
| 68 | } | 59 | |
| 69 | if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ | 60 | mutex_lock(&inode->i_mutex); |
| 70 | inode->i_flags |= S_IMMUTABLE; | 61 | |
| 71 | HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; | 62 | if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) || |
| 72 | } else { | 63 | inode->i_flags & (S_IMMUTABLE|S_APPEND)) { |
| 73 | inode->i_flags &= ~S_IMMUTABLE; | 64 | if (!capable(CAP_LINUX_IMMUTABLE)) { |
| 74 | HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE; | 65 | err = -EPERM; |
| 75 | } | 66 | goto out_unlock_inode; |
| 76 | if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */ | ||
| 77 | inode->i_flags |= S_APPEND; | ||
| 78 | HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND; | ||
| 79 | } else { | ||
| 80 | inode->i_flags &= ~S_APPEND; | ||
| 81 | HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND; | ||
| 82 | } | 67 | } |
| 83 | if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */ | ||
| 84 | HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP; | ||
| 85 | else | ||
| 86 | HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP; | ||
| 87 | |||
| 88 | inode->i_ctime = CURRENT_TIME_SEC; | ||
| 89 | mark_inode_dirty(inode); | ||
| 90 | setflags_out: | ||
| 91 | mnt_drop_write(filp->f_path.mnt); | ||
| 92 | unlock_kernel(); | ||
| 93 | return err; | ||
| 94 | } | 68 | } |
| 69 | |||
| 70 | /* don't silently ignore unsupported ext2 flags */ | ||
| 71 | if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { | ||
| 72 | err = -EOPNOTSUPP; | ||
| 73 | goto out_unlock_inode; | ||
| 74 | } | ||
| 75 | |||
| 76 | if (flags & FS_IMMUTABLE_FL) | ||
| 77 | inode->i_flags |= S_IMMUTABLE; | ||
| 78 | else | ||
| 79 | inode->i_flags &= ~S_IMMUTABLE; | ||
| 80 | |||
| 81 | if (flags & FS_APPEND_FL) | ||
| 82 | inode->i_flags |= S_APPEND; | ||
| 83 | else | ||
| 84 | inode->i_flags &= ~S_APPEND; | ||
| 85 | |||
| 86 | if (flags & FS_NODUMP_FL) | ||
| 87 | hip->userflags |= HFSPLUS_FLG_NODUMP; | ||
| 88 | else | ||
| 89 | hip->userflags &= ~HFSPLUS_FLG_NODUMP; | ||
| 90 | |||
| 91 | inode->i_ctime = CURRENT_TIME_SEC; | ||
| 92 | mark_inode_dirty(inode); | ||
| 93 | |||
| 94 | out_unlock_inode: | ||
| 95 | mutex_lock(&inode->i_mutex); | ||
| 96 | out_drop_write: | ||
| 97 | mnt_drop_write(file->f_path.mnt); | ||
| 98 | out: | ||
| 99 | return err; | ||
| 100 | } | ||
| 101 | |||
| 102 | long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
| 103 | { | ||
| 104 | void __user *argp = (void __user *)arg; | ||
| 105 | |||
| 106 | switch (cmd) { | ||
| 107 | case HFSPLUS_IOC_EXT2_GETFLAGS: | ||
| 108 | return hfsplus_ioctl_getflags(file, argp); | ||
| 109 | case HFSPLUS_IOC_EXT2_SETFLAGS: | ||
| 110 | return hfsplus_ioctl_setflags(file, argp); | ||
| 95 | default: | 111 | default: |
| 96 | unlock_kernel(); | ||
| 97 | return -ENOTTY; | 112 | return -ENOTTY; |
| 98 | } | 113 | } |
| 99 | } | 114 | } |
| @@ -110,7 +125,7 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name, | |||
| 110 | if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) | 125 | if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) |
| 111 | return -EOPNOTSUPP; | 126 | return -EOPNOTSUPP; |
| 112 | 127 | ||
| 113 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | 128 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); |
| 114 | if (res) | 129 | if (res) |
| 115 | return res; | 130 | return res; |
| 116 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | 131 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); |
| @@ -153,7 +168,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | |||
| 153 | return -EOPNOTSUPP; | 168 | return -EOPNOTSUPP; |
| 154 | 169 | ||
| 155 | if (size) { | 170 | if (size) { |
| 156 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | 171 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); |
| 157 | if (res) | 172 | if (res) |
| 158 | return res; | 173 | return res; |
| 159 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | 174 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); |
| @@ -177,7 +192,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | |||
| 177 | } else | 192 | } else |
| 178 | res = size ? -ERANGE : 4; | 193 | res = size ? -ERANGE : 4; |
| 179 | } else | 194 | } else |
| 180 | res = -ENODATA; | 195 | res = -EOPNOTSUPP; |
| 181 | out: | 196 | out: |
| 182 | if (size) | 197 | if (size) |
| 183 | hfs_find_exit(&fd); | 198 | hfs_find_exit(&fd); |
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 572628b4b07d..f9ab276a4d8d 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c | |||
| @@ -143,13 +143,13 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) | |||
| 143 | kfree(p); | 143 | kfree(p); |
| 144 | break; | 144 | break; |
| 145 | case opt_decompose: | 145 | case opt_decompose: |
| 146 | sbi->flags &= ~HFSPLUS_SB_NODECOMPOSE; | 146 | clear_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); |
| 147 | break; | 147 | break; |
| 148 | case opt_nodecompose: | 148 | case opt_nodecompose: |
| 149 | sbi->flags |= HFSPLUS_SB_NODECOMPOSE; | 149 | set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); |
| 150 | break; | 150 | break; |
| 151 | case opt_force: | 151 | case opt_force: |
| 152 | sbi->flags |= HFSPLUS_SB_FORCE; | 152 | set_bit(HFSPLUS_SB_FORCE, &sbi->flags); |
| 153 | break; | 153 | break; |
| 154 | default: | 154 | default: |
| 155 | return 0; | 155 | return 0; |
| @@ -171,7 +171,7 @@ done: | |||
| 171 | 171 | ||
| 172 | int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) | 172 | int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) |
| 173 | { | 173 | { |
| 174 | struct hfsplus_sb_info *sbi = &HFSPLUS_SB(mnt->mnt_sb); | 174 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(mnt->mnt_sb); |
| 175 | 175 | ||
| 176 | if (sbi->creator != HFSPLUS_DEF_CR_TYPE) | 176 | if (sbi->creator != HFSPLUS_DEF_CR_TYPE) |
| 177 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); | 177 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); |
| @@ -184,7 +184,7 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) | |||
| 184 | seq_printf(seq, ",session=%u", sbi->session); | 184 | seq_printf(seq, ",session=%u", sbi->session); |
| 185 | if (sbi->nls) | 185 | if (sbi->nls) |
| 186 | seq_printf(seq, ",nls=%s", sbi->nls->charset); | 186 | seq_printf(seq, ",nls=%s", sbi->nls->charset); |
| 187 | if (sbi->flags & HFSPLUS_SB_NODECOMPOSE) | 187 | if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags)) |
| 188 | seq_printf(seq, ",nodecompose"); | 188 | seq_printf(seq, ",nodecompose"); |
| 189 | return 0; | 189 | return 0; |
| 190 | } | 190 | } |
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index 1528a6fd0299..208b16c645cc 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c | |||
| @@ -74,6 +74,7 @@ struct old_pmap { | |||
| 74 | int hfs_part_find(struct super_block *sb, | 74 | int hfs_part_find(struct super_block *sb, |
| 75 | sector_t *part_start, sector_t *part_size) | 75 | sector_t *part_start, sector_t *part_size) |
| 76 | { | 76 | { |
| 77 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 77 | struct buffer_head *bh; | 78 | struct buffer_head *bh; |
| 78 | __be16 *data; | 79 | __be16 *data; |
| 79 | int i, size, res; | 80 | int i, size, res; |
| @@ -95,7 +96,7 @@ int hfs_part_find(struct super_block *sb, | |||
| 95 | for (i = 0; i < size; p++, i++) { | 96 | for (i = 0; i < size; p++, i++) { |
| 96 | if (p->pdStart && p->pdSize && | 97 | if (p->pdStart && p->pdSize && |
| 97 | p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ && | 98 | p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ && |
| 98 | (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) { | 99 | (sbi->part < 0 || sbi->part == i)) { |
| 99 | *part_start += be32_to_cpu(p->pdStart); | 100 | *part_start += be32_to_cpu(p->pdStart); |
| 100 | *part_size = be32_to_cpu(p->pdSize); | 101 | *part_size = be32_to_cpu(p->pdSize); |
| 101 | res = 0; | 102 | res = 0; |
| @@ -111,7 +112,7 @@ int hfs_part_find(struct super_block *sb, | |||
| 111 | size = be32_to_cpu(pm->pmMapBlkCnt); | 112 | size = be32_to_cpu(pm->pmMapBlkCnt); |
| 112 | for (i = 0; i < size;) { | 113 | for (i = 0; i < size;) { |
| 113 | if (!memcmp(pm->pmPartType,"Apple_HFS", 9) && | 114 | if (!memcmp(pm->pmPartType,"Apple_HFS", 9) && |
| 114 | (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) { | 115 | (sbi->part < 0 || sbi->part == i)) { |
| 115 | *part_start += be32_to_cpu(pm->pmPyPartStart); | 116 | *part_start += be32_to_cpu(pm->pmPyPartStart); |
| 116 | *part_size = be32_to_cpu(pm->pmPartBlkCnt); | 117 | *part_size = be32_to_cpu(pm->pmPartBlkCnt); |
| 117 | res = 0; | 118 | res = 0; |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 3b55c050c742..9a88d7536103 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
| 13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
| 14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
| 15 | #include <linux/smp_lock.h> | ||
| 16 | #include <linux/vfs.h> | 15 | #include <linux/vfs.h> |
| 17 | #include <linux/nls.h> | 16 | #include <linux/nls.h> |
| 18 | 17 | ||
| @@ -21,40 +20,11 @@ static void hfsplus_destroy_inode(struct inode *inode); | |||
| 21 | 20 | ||
| 22 | #include "hfsplus_fs.h" | 21 | #include "hfsplus_fs.h" |
| 23 | 22 | ||
| 24 | struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | 23 | static int hfsplus_system_read_inode(struct inode *inode) |
| 25 | { | 24 | { |
| 26 | struct hfs_find_data fd; | 25 | struct hfsplus_vh *vhdr = HFSPLUS_SB(inode->i_sb)->s_vhdr; |
| 27 | struct hfsplus_vh *vhdr; | ||
| 28 | struct inode *inode; | ||
| 29 | long err = -EIO; | ||
| 30 | |||
| 31 | inode = iget_locked(sb, ino); | ||
| 32 | if (!inode) | ||
| 33 | return ERR_PTR(-ENOMEM); | ||
| 34 | if (!(inode->i_state & I_NEW)) | ||
| 35 | return inode; | ||
| 36 | 26 | ||
| 37 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 27 | switch (inode->i_ino) { |
| 38 | mutex_init(&HFSPLUS_I(inode).extents_lock); | ||
| 39 | HFSPLUS_I(inode).flags = 0; | ||
| 40 | HFSPLUS_I(inode).rsrc_inode = NULL; | ||
| 41 | atomic_set(&HFSPLUS_I(inode).opencnt, 0); | ||
| 42 | |||
| 43 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) { | ||
| 44 | read_inode: | ||
| 45 | hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | ||
| 46 | err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | ||
| 47 | if (!err) | ||
| 48 | err = hfsplus_cat_read_inode(inode, &fd); | ||
| 49 | hfs_find_exit(&fd); | ||
| 50 | if (err) | ||
| 51 | goto bad_inode; | ||
| 52 | goto done; | ||
| 53 | } | ||
| 54 | vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; | ||
| 55 | switch(inode->i_ino) { | ||
| 56 | case HFSPLUS_ROOT_CNID: | ||
| 57 | goto read_inode; | ||
| 58 | case HFSPLUS_EXT_CNID: | 28 | case HFSPLUS_EXT_CNID: |
| 59 | hfsplus_inode_read_fork(inode, &vhdr->ext_file); | 29 | hfsplus_inode_read_fork(inode, &vhdr->ext_file); |
| 60 | inode->i_mapping->a_ops = &hfsplus_btree_aops; | 30 | inode->i_mapping->a_ops = &hfsplus_btree_aops; |
| @@ -75,74 +45,101 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | |||
| 75 | inode->i_mapping->a_ops = &hfsplus_btree_aops; | 45 | inode->i_mapping->a_ops = &hfsplus_btree_aops; |
| 76 | break; | 46 | break; |
| 77 | default: | 47 | default: |
| 78 | goto bad_inode; | 48 | return -EIO; |
| 49 | } | ||
| 50 | |||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | |||
| 54 | struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | ||
| 55 | { | ||
| 56 | struct hfs_find_data fd; | ||
| 57 | struct inode *inode; | ||
| 58 | int err; | ||
| 59 | |||
| 60 | inode = iget_locked(sb, ino); | ||
| 61 | if (!inode) | ||
| 62 | return ERR_PTR(-ENOMEM); | ||
| 63 | if (!(inode->i_state & I_NEW)) | ||
| 64 | return inode; | ||
| 65 | |||
| 66 | INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list); | ||
| 67 | mutex_init(&HFSPLUS_I(inode)->extents_lock); | ||
| 68 | HFSPLUS_I(inode)->flags = 0; | ||
| 69 | HFSPLUS_I(inode)->rsrc_inode = NULL; | ||
| 70 | atomic_set(&HFSPLUS_I(inode)->opencnt, 0); | ||
| 71 | |||
| 72 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || | ||
| 73 | inode->i_ino == HFSPLUS_ROOT_CNID) { | ||
| 74 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); | ||
| 75 | err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | ||
| 76 | if (!err) | ||
| 77 | err = hfsplus_cat_read_inode(inode, &fd); | ||
| 78 | hfs_find_exit(&fd); | ||
| 79 | } else { | ||
| 80 | err = hfsplus_system_read_inode(inode); | ||
| 81 | } | ||
| 82 | |||
| 83 | if (err) { | ||
| 84 | iget_failed(inode); | ||
| 85 | return ERR_PTR(err); | ||
| 79 | } | 86 | } |
| 80 | 87 | ||
| 81 | done: | ||
| 82 | unlock_new_inode(inode); | 88 | unlock_new_inode(inode); |
| 83 | return inode; | 89 | return inode; |
| 84 | |||
| 85 | bad_inode: | ||
| 86 | iget_failed(inode); | ||
| 87 | return ERR_PTR(err); | ||
| 88 | } | 90 | } |
| 89 | 91 | ||
| 90 | static int hfsplus_write_inode(struct inode *inode, | 92 | static int hfsplus_system_write_inode(struct inode *inode) |
| 91 | struct writeback_control *wbc) | ||
| 92 | { | 93 | { |
| 93 | struct hfsplus_vh *vhdr; | 94 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); |
| 94 | int ret = 0; | 95 | struct hfsplus_vh *vhdr = sbi->s_vhdr; |
| 96 | struct hfsplus_fork_raw *fork; | ||
| 97 | struct hfs_btree *tree = NULL; | ||
| 95 | 98 | ||
| 96 | dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); | ||
| 97 | hfsplus_ext_write_extent(inode); | ||
| 98 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) { | ||
| 99 | return hfsplus_cat_write_inode(inode); | ||
| 100 | } | ||
| 101 | vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; | ||
| 102 | switch (inode->i_ino) { | 99 | switch (inode->i_ino) { |
| 103 | case HFSPLUS_ROOT_CNID: | ||
| 104 | ret = hfsplus_cat_write_inode(inode); | ||
| 105 | break; | ||
| 106 | case HFSPLUS_EXT_CNID: | 100 | case HFSPLUS_EXT_CNID: |
| 107 | if (vhdr->ext_file.total_size != cpu_to_be64(inode->i_size)) { | 101 | fork = &vhdr->ext_file; |
| 108 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 102 | tree = sbi->ext_tree; |
| 109 | inode->i_sb->s_dirt = 1; | ||
| 110 | } | ||
| 111 | hfsplus_inode_write_fork(inode, &vhdr->ext_file); | ||
| 112 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).ext_tree); | ||
| 113 | break; | 103 | break; |
| 114 | case HFSPLUS_CAT_CNID: | 104 | case HFSPLUS_CAT_CNID: |
| 115 | if (vhdr->cat_file.total_size != cpu_to_be64(inode->i_size)) { | 105 | fork = &vhdr->cat_file; |
| 116 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 106 | tree = sbi->cat_tree; |
| 117 | inode->i_sb->s_dirt = 1; | ||
| 118 | } | ||
| 119 | hfsplus_inode_write_fork(inode, &vhdr->cat_file); | ||
| 120 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).cat_tree); | ||
| 121 | break; | 107 | break; |
| 122 | case HFSPLUS_ALLOC_CNID: | 108 | case HFSPLUS_ALLOC_CNID: |
| 123 | if (vhdr->alloc_file.total_size != cpu_to_be64(inode->i_size)) { | 109 | fork = &vhdr->alloc_file; |
| 124 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | ||
| 125 | inode->i_sb->s_dirt = 1; | ||
| 126 | } | ||
| 127 | hfsplus_inode_write_fork(inode, &vhdr->alloc_file); | ||
| 128 | break; | 110 | break; |
| 129 | case HFSPLUS_START_CNID: | 111 | case HFSPLUS_START_CNID: |
| 130 | if (vhdr->start_file.total_size != cpu_to_be64(inode->i_size)) { | 112 | fork = &vhdr->start_file; |
| 131 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | ||
| 132 | inode->i_sb->s_dirt = 1; | ||
| 133 | } | ||
| 134 | hfsplus_inode_write_fork(inode, &vhdr->start_file); | ||
| 135 | break; | 113 | break; |
| 136 | case HFSPLUS_ATTR_CNID: | 114 | case HFSPLUS_ATTR_CNID: |
| 137 | if (vhdr->attr_file.total_size != cpu_to_be64(inode->i_size)) { | 115 | fork = &vhdr->attr_file; |
| 138 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 116 | tree = sbi->attr_tree; |
| 139 | inode->i_sb->s_dirt = 1; | 117 | default: |
| 140 | } | 118 | return -EIO; |
| 141 | hfsplus_inode_write_fork(inode, &vhdr->attr_file); | 119 | } |
| 142 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).attr_tree); | 120 | |
| 143 | break; | 121 | if (fork->total_size != cpu_to_be64(inode->i_size)) { |
| 122 | set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags); | ||
| 123 | inode->i_sb->s_dirt = 1; | ||
| 144 | } | 124 | } |
| 145 | return ret; | 125 | hfsplus_inode_write_fork(inode, fork); |
| 126 | if (tree) | ||
| 127 | hfs_btree_write(tree); | ||
| 128 | return 0; | ||
| 129 | } | ||
| 130 | |||
| 131 | static int hfsplus_write_inode(struct inode *inode, | ||
| 132 | struct writeback_control *wbc) | ||
| 133 | { | ||
| 134 | dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); | ||
| 135 | |||
| 136 | hfsplus_ext_write_extent(inode); | ||
| 137 | |||
| 138 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || | ||
| 139 | inode->i_ino == HFSPLUS_ROOT_CNID) | ||
| 140 | return hfsplus_cat_write_inode(inode); | ||
| 141 | else | ||
| 142 | return hfsplus_system_write_inode(inode); | ||
| 146 | } | 143 | } |
| 147 | 144 | ||
| 148 | static void hfsplus_evict_inode(struct inode *inode) | 145 | static void hfsplus_evict_inode(struct inode *inode) |
| @@ -151,51 +148,53 @@ static void hfsplus_evict_inode(struct inode *inode) | |||
| 151 | truncate_inode_pages(&inode->i_data, 0); | 148 | truncate_inode_pages(&inode->i_data, 0); |
| 152 | end_writeback(inode); | 149 | end_writeback(inode); |
| 153 | if (HFSPLUS_IS_RSRC(inode)) { | 150 | if (HFSPLUS_IS_RSRC(inode)) { |
| 154 | HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; | 151 | HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
| 155 | iput(HFSPLUS_I(inode).rsrc_inode); | 152 | iput(HFSPLUS_I(inode)->rsrc_inode); |
| 156 | } | 153 | } |
| 157 | } | 154 | } |
| 158 | 155 | ||
| 159 | int hfsplus_sync_fs(struct super_block *sb, int wait) | 156 | int hfsplus_sync_fs(struct super_block *sb, int wait) |
| 160 | { | 157 | { |
| 161 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 158 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); |
| 159 | struct hfsplus_vh *vhdr = sbi->s_vhdr; | ||
| 162 | 160 | ||
| 163 | dprint(DBG_SUPER, "hfsplus_write_super\n"); | 161 | dprint(DBG_SUPER, "hfsplus_write_super\n"); |
| 164 | 162 | ||
| 165 | lock_super(sb); | 163 | mutex_lock(&sbi->vh_mutex); |
| 164 | mutex_lock(&sbi->alloc_mutex); | ||
| 166 | sb->s_dirt = 0; | 165 | sb->s_dirt = 0; |
| 167 | 166 | ||
| 168 | vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks); | 167 | vhdr->free_blocks = cpu_to_be32(sbi->free_blocks); |
| 169 | vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc); | 168 | vhdr->next_cnid = cpu_to_be32(sbi->next_cnid); |
| 170 | vhdr->next_cnid = cpu_to_be32(HFSPLUS_SB(sb).next_cnid); | 169 | vhdr->folder_count = cpu_to_be32(sbi->folder_count); |
| 171 | vhdr->folder_count = cpu_to_be32(HFSPLUS_SB(sb).folder_count); | 170 | vhdr->file_count = cpu_to_be32(sbi->file_count); |
| 172 | vhdr->file_count = cpu_to_be32(HFSPLUS_SB(sb).file_count); | ||
| 173 | 171 | ||
| 174 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 172 | mark_buffer_dirty(sbi->s_vhbh); |
| 175 | if (HFSPLUS_SB(sb).flags & HFSPLUS_SB_WRITEBACKUP) { | 173 | if (test_and_clear_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags)) { |
| 176 | if (HFSPLUS_SB(sb).sect_count) { | 174 | if (sbi->sect_count) { |
| 177 | struct buffer_head *bh; | 175 | struct buffer_head *bh; |
| 178 | u32 block, offset; | 176 | u32 block, offset; |
| 179 | 177 | ||
| 180 | block = HFSPLUS_SB(sb).blockoffset; | 178 | block = sbi->blockoffset; |
| 181 | block += (HFSPLUS_SB(sb).sect_count - 2) >> (sb->s_blocksize_bits - 9); | 179 | block += (sbi->sect_count - 2) >> (sb->s_blocksize_bits - 9); |
| 182 | offset = ((HFSPLUS_SB(sb).sect_count - 2) << 9) & (sb->s_blocksize - 1); | 180 | offset = ((sbi->sect_count - 2) << 9) & (sb->s_blocksize - 1); |
| 183 | printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", HFSPLUS_SB(sb).blockoffset, | 181 | printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", |
| 184 | HFSPLUS_SB(sb).sect_count, block, offset); | 182 | sbi->blockoffset, sbi->sect_count, |
| 183 | block, offset); | ||
| 185 | bh = sb_bread(sb, block); | 184 | bh = sb_bread(sb, block); |
| 186 | if (bh) { | 185 | if (bh) { |
| 187 | vhdr = (struct hfsplus_vh *)(bh->b_data + offset); | 186 | vhdr = (struct hfsplus_vh *)(bh->b_data + offset); |
| 188 | if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) { | 187 | if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) { |
| 189 | memcpy(vhdr, HFSPLUS_SB(sb).s_vhdr, sizeof(*vhdr)); | 188 | memcpy(vhdr, sbi->s_vhdr, sizeof(*vhdr)); |
| 190 | mark_buffer_dirty(bh); | 189 | mark_buffer_dirty(bh); |
| 191 | brelse(bh); | 190 | brelse(bh); |
| 192 | } else | 191 | } else |
| 193 | printk(KERN_WARNING "hfs: backup not found!\n"); | 192 | printk(KERN_WARNING "hfs: backup not found!\n"); |
| 194 | } | 193 | } |
| 195 | } | 194 | } |
| 196 | HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP; | ||
| 197 | } | 195 | } |
| 198 | unlock_super(sb); | 196 | mutex_unlock(&sbi->alloc_mutex); |
| 197 | mutex_unlock(&sbi->vh_mutex); | ||
| 199 | return 0; | 198 | return 0; |
| 200 | } | 199 | } |
| 201 | 200 | ||
| @@ -209,48 +208,48 @@ static void hfsplus_write_super(struct super_block *sb) | |||
| 209 | 208 | ||
| 210 | static void hfsplus_put_super(struct super_block *sb) | 209 | static void hfsplus_put_super(struct super_block *sb) |
| 211 | { | 210 | { |
| 211 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 212 | |||
| 212 | dprint(DBG_SUPER, "hfsplus_put_super\n"); | 213 | dprint(DBG_SUPER, "hfsplus_put_super\n"); |
| 214 | |||
| 213 | if (!sb->s_fs_info) | 215 | if (!sb->s_fs_info) |
| 214 | return; | 216 | return; |
| 215 | 217 | ||
| 216 | lock_kernel(); | ||
| 217 | |||
| 218 | if (sb->s_dirt) | 218 | if (sb->s_dirt) |
| 219 | hfsplus_write_super(sb); | 219 | hfsplus_write_super(sb); |
| 220 | if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) { | 220 | if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { |
| 221 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 221 | struct hfsplus_vh *vhdr = sbi->s_vhdr; |
| 222 | 222 | ||
| 223 | vhdr->modify_date = hfsp_now2mt(); | 223 | vhdr->modify_date = hfsp_now2mt(); |
| 224 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); | 224 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); |
| 225 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); | 225 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); |
| 226 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 226 | mark_buffer_dirty(sbi->s_vhbh); |
| 227 | sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); | 227 | sync_dirty_buffer(sbi->s_vhbh); |
| 228 | } | 228 | } |
| 229 | 229 | ||
| 230 | hfs_btree_close(HFSPLUS_SB(sb).cat_tree); | 230 | hfs_btree_close(sbi->cat_tree); |
| 231 | hfs_btree_close(HFSPLUS_SB(sb).ext_tree); | 231 | hfs_btree_close(sbi->ext_tree); |
| 232 | iput(HFSPLUS_SB(sb).alloc_file); | 232 | iput(sbi->alloc_file); |
| 233 | iput(HFSPLUS_SB(sb).hidden_dir); | 233 | iput(sbi->hidden_dir); |
| 234 | brelse(HFSPLUS_SB(sb).s_vhbh); | 234 | brelse(sbi->s_vhbh); |
| 235 | unload_nls(HFSPLUS_SB(sb).nls); | 235 | unload_nls(sbi->nls); |
| 236 | kfree(sb->s_fs_info); | 236 | kfree(sb->s_fs_info); |
| 237 | sb->s_fs_info = NULL; | 237 | sb->s_fs_info = NULL; |
| 238 | |||
| 239 | unlock_kernel(); | ||
| 240 | } | 238 | } |
| 241 | 239 | ||
| 242 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) | 240 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) |
| 243 | { | 241 | { |
| 244 | struct super_block *sb = dentry->d_sb; | 242 | struct super_block *sb = dentry->d_sb; |
| 243 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 245 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | 244 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); |
| 246 | 245 | ||
| 247 | buf->f_type = HFSPLUS_SUPER_MAGIC; | 246 | buf->f_type = HFSPLUS_SUPER_MAGIC; |
| 248 | buf->f_bsize = sb->s_blocksize; | 247 | buf->f_bsize = sb->s_blocksize; |
| 249 | buf->f_blocks = HFSPLUS_SB(sb).total_blocks << HFSPLUS_SB(sb).fs_shift; | 248 | buf->f_blocks = sbi->total_blocks << sbi->fs_shift; |
| 250 | buf->f_bfree = HFSPLUS_SB(sb).free_blocks << HFSPLUS_SB(sb).fs_shift; | 249 | buf->f_bfree = sbi->free_blocks << sbi->fs_shift; |
| 251 | buf->f_bavail = buf->f_bfree; | 250 | buf->f_bavail = buf->f_bfree; |
| 252 | buf->f_files = 0xFFFFFFFF; | 251 | buf->f_files = 0xFFFFFFFF; |
| 253 | buf->f_ffree = 0xFFFFFFFF - HFSPLUS_SB(sb).next_cnid; | 252 | buf->f_ffree = 0xFFFFFFFF - sbi->next_cnid; |
| 254 | buf->f_fsid.val[0] = (u32)id; | 253 | buf->f_fsid.val[0] = (u32)id; |
| 255 | buf->f_fsid.val[1] = (u32)(id >> 32); | 254 | buf->f_fsid.val[1] = (u32)(id >> 32); |
| 256 | buf->f_namelen = HFSPLUS_MAX_STRLEN; | 255 | buf->f_namelen = HFSPLUS_MAX_STRLEN; |
| @@ -263,11 +262,11 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) | |||
| 263 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 262 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
| 264 | return 0; | 263 | return 0; |
| 265 | if (!(*flags & MS_RDONLY)) { | 264 | if (!(*flags & MS_RDONLY)) { |
| 266 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 265 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr; |
| 267 | struct hfsplus_sb_info sbi; | 266 | struct hfsplus_sb_info sbi; |
| 268 | 267 | ||
| 269 | memset(&sbi, 0, sizeof(struct hfsplus_sb_info)); | 268 | memset(&sbi, 0, sizeof(struct hfsplus_sb_info)); |
| 270 | sbi.nls = HFSPLUS_SB(sb).nls; | 269 | sbi.nls = HFSPLUS_SB(sb)->nls; |
| 271 | if (!hfsplus_parse_options(data, &sbi)) | 270 | if (!hfsplus_parse_options(data, &sbi)) |
| 272 | return -EINVAL; | 271 | return -EINVAL; |
| 273 | 272 | ||
| @@ -276,7 +275,7 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) | |||
| 276 | "running fsck.hfsplus is recommended. leaving read-only.\n"); | 275 | "running fsck.hfsplus is recommended. leaving read-only.\n"); |
| 277 | sb->s_flags |= MS_RDONLY; | 276 | sb->s_flags |= MS_RDONLY; |
| 278 | *flags |= MS_RDONLY; | 277 | *flags |= MS_RDONLY; |
| 279 | } else if (sbi.flags & HFSPLUS_SB_FORCE) { | 278 | } else if (test_bit(HFSPLUS_SB_FORCE, &sbi.flags)) { |
| 280 | /* nothing */ | 279 | /* nothing */ |
| 281 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { | 280 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { |
| 282 | printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); | 281 | printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); |
| @@ -320,7 +319,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 320 | return -ENOMEM; | 319 | return -ENOMEM; |
| 321 | 320 | ||
| 322 | sb->s_fs_info = sbi; | 321 | sb->s_fs_info = sbi; |
| 323 | INIT_HLIST_HEAD(&sbi->rsrc_inodes); | 322 | mutex_init(&sbi->alloc_mutex); |
| 323 | mutex_init(&sbi->vh_mutex); | ||
| 324 | hfsplus_fill_defaults(sbi); | 324 | hfsplus_fill_defaults(sbi); |
| 325 | if (!hfsplus_parse_options(data, sbi)) { | 325 | if (!hfsplus_parse_options(data, sbi)) { |
| 326 | printk(KERN_ERR "hfs: unable to parse mount options\n"); | 326 | printk(KERN_ERR "hfs: unable to parse mount options\n"); |
| @@ -344,7 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 344 | err = -EINVAL; | 344 | err = -EINVAL; |
| 345 | goto cleanup; | 345 | goto cleanup; |
| 346 | } | 346 | } |
| 347 | vhdr = HFSPLUS_SB(sb).s_vhdr; | 347 | vhdr = sbi->s_vhdr; |
| 348 | 348 | ||
| 349 | /* Copy parts of the volume header into the superblock */ | 349 | /* Copy parts of the volume header into the superblock */ |
| 350 | sb->s_magic = HFSPLUS_VOLHEAD_SIG; | 350 | sb->s_magic = HFSPLUS_VOLHEAD_SIG; |
| @@ -353,18 +353,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 353 | printk(KERN_ERR "hfs: wrong filesystem version\n"); | 353 | printk(KERN_ERR "hfs: wrong filesystem version\n"); |
| 354 | goto cleanup; | 354 | goto cleanup; |
| 355 | } | 355 | } |
| 356 | HFSPLUS_SB(sb).total_blocks = be32_to_cpu(vhdr->total_blocks); | 356 | sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); |
| 357 | HFSPLUS_SB(sb).free_blocks = be32_to_cpu(vhdr->free_blocks); | 357 | sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); |
| 358 | HFSPLUS_SB(sb).next_alloc = be32_to_cpu(vhdr->next_alloc); | 358 | sbi->next_cnid = be32_to_cpu(vhdr->next_cnid); |
| 359 | HFSPLUS_SB(sb).next_cnid = be32_to_cpu(vhdr->next_cnid); | 359 | sbi->file_count = be32_to_cpu(vhdr->file_count); |
| 360 | HFSPLUS_SB(sb).file_count = be32_to_cpu(vhdr->file_count); | 360 | sbi->folder_count = be32_to_cpu(vhdr->folder_count); |
| 361 | HFSPLUS_SB(sb).folder_count = be32_to_cpu(vhdr->folder_count); | 361 | sbi->data_clump_blocks = |
| 362 | HFSPLUS_SB(sb).data_clump_blocks = be32_to_cpu(vhdr->data_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 362 | be32_to_cpu(vhdr->data_clump_sz) >> sbi->alloc_blksz_shift; |
| 363 | if (!HFSPLUS_SB(sb).data_clump_blocks) | 363 | if (!sbi->data_clump_blocks) |
| 364 | HFSPLUS_SB(sb).data_clump_blocks = 1; | 364 | sbi->data_clump_blocks = 1; |
| 365 | HFSPLUS_SB(sb).rsrc_clump_blocks = be32_to_cpu(vhdr->rsrc_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 365 | sbi->rsrc_clump_blocks = |
| 366 | if (!HFSPLUS_SB(sb).rsrc_clump_blocks) | 366 | be32_to_cpu(vhdr->rsrc_clump_sz) >> sbi->alloc_blksz_shift; |
| 367 | HFSPLUS_SB(sb).rsrc_clump_blocks = 1; | 367 | if (!sbi->rsrc_clump_blocks) |
| 368 | sbi->rsrc_clump_blocks = 1; | ||
| 368 | 369 | ||
| 369 | /* Set up operations so we can load metadata */ | 370 | /* Set up operations so we can load metadata */ |
| 370 | sb->s_op = &hfsplus_sops; | 371 | sb->s_op = &hfsplus_sops; |
| @@ -374,7 +375,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 374 | printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, " | 375 | printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, " |
| 375 | "running fsck.hfsplus is recommended. mounting read-only.\n"); | 376 | "running fsck.hfsplus is recommended. mounting read-only.\n"); |
| 376 | sb->s_flags |= MS_RDONLY; | 377 | sb->s_flags |= MS_RDONLY; |
| 377 | } else if (sbi->flags & HFSPLUS_SB_FORCE) { | 378 | } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { |
| 378 | /* nothing */ | 379 | /* nothing */ |
| 379 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { | 380 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { |
| 380 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); | 381 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); |
| @@ -384,16 +385,15 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 384 | "use the force option at your own risk, mounting read-only.\n"); | 385 | "use the force option at your own risk, mounting read-only.\n"); |
| 385 | sb->s_flags |= MS_RDONLY; | 386 | sb->s_flags |= MS_RDONLY; |
| 386 | } | 387 | } |
| 387 | sbi->flags &= ~HFSPLUS_SB_FORCE; | ||
| 388 | 388 | ||
| 389 | /* Load metadata objects (B*Trees) */ | 389 | /* Load metadata objects (B*Trees) */ |
| 390 | HFSPLUS_SB(sb).ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); | 390 | sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); |
| 391 | if (!HFSPLUS_SB(sb).ext_tree) { | 391 | if (!sbi->ext_tree) { |
| 392 | printk(KERN_ERR "hfs: failed to load extents file\n"); | 392 | printk(KERN_ERR "hfs: failed to load extents file\n"); |
| 393 | goto cleanup; | 393 | goto cleanup; |
| 394 | } | 394 | } |
| 395 | HFSPLUS_SB(sb).cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); | 395 | sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); |
| 396 | if (!HFSPLUS_SB(sb).cat_tree) { | 396 | if (!sbi->cat_tree) { |
| 397 | printk(KERN_ERR "hfs: failed to load catalog file\n"); | 397 | printk(KERN_ERR "hfs: failed to load catalog file\n"); |
| 398 | goto cleanup; | 398 | goto cleanup; |
| 399 | } | 399 | } |
| @@ -404,7 +404,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 404 | err = PTR_ERR(inode); | 404 | err = PTR_ERR(inode); |
| 405 | goto cleanup; | 405 | goto cleanup; |
| 406 | } | 406 | } |
| 407 | HFSPLUS_SB(sb).alloc_file = inode; | 407 | sbi->alloc_file = inode; |
| 408 | 408 | ||
| 409 | /* Load the root directory */ | 409 | /* Load the root directory */ |
| 410 | root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); | 410 | root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); |
| @@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 423 | 423 | ||
| 424 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; | 424 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; |
| 425 | str.name = HFSP_HIDDENDIR_NAME; | 425 | str.name = HFSP_HIDDENDIR_NAME; |
| 426 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 426 | hfs_find_init(sbi->cat_tree, &fd); |
| 427 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); | 427 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); |
| 428 | if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { | 428 | if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { |
| 429 | hfs_find_exit(&fd); | 429 | hfs_find_exit(&fd); |
| @@ -434,7 +434,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 434 | err = PTR_ERR(inode); | 434 | err = PTR_ERR(inode); |
| 435 | goto cleanup; | 435 | goto cleanup; |
| 436 | } | 436 | } |
| 437 | HFSPLUS_SB(sb).hidden_dir = inode; | 437 | sbi->hidden_dir = inode; |
| 438 | } else | 438 | } else |
| 439 | hfs_find_exit(&fd); | 439 | hfs_find_exit(&fd); |
| 440 | 440 | ||
| @@ -449,15 +449,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 449 | be32_add_cpu(&vhdr->write_count, 1); | 449 | be32_add_cpu(&vhdr->write_count, 1); |
| 450 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); | 450 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); |
| 451 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); | 451 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); |
| 452 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 452 | mark_buffer_dirty(sbi->s_vhbh); |
| 453 | sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); | 453 | sync_dirty_buffer(sbi->s_vhbh); |
| 454 | 454 | ||
| 455 | if (!HFSPLUS_SB(sb).hidden_dir) { | 455 | if (!sbi->hidden_dir) { |
| 456 | printk(KERN_DEBUG "hfs: create hidden dir...\n"); | 456 | printk(KERN_DEBUG "hfs: create hidden dir...\n"); |
| 457 | HFSPLUS_SB(sb).hidden_dir = hfsplus_new_inode(sb, S_IFDIR); | 457 | |
| 458 | hfsplus_create_cat(HFSPLUS_SB(sb).hidden_dir->i_ino, sb->s_root->d_inode, | 458 | mutex_lock(&sbi->vh_mutex); |
| 459 | &str, HFSPLUS_SB(sb).hidden_dir); | 459 | sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); |
| 460 | mark_inode_dirty(HFSPLUS_SB(sb).hidden_dir); | 460 | hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode, |
| 461 | &str, sbi->hidden_dir); | ||
| 462 | mutex_unlock(&sbi->vh_mutex); | ||
| 463 | |||
| 464 | mark_inode_dirty(sbi->hidden_dir); | ||
| 461 | } | 465 | } |
| 462 | out: | 466 | out: |
| 463 | unload_nls(sbi->nls); | 467 | unload_nls(sbi->nls); |
| @@ -486,7 +490,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb) | |||
| 486 | 490 | ||
| 487 | static void hfsplus_destroy_inode(struct inode *inode) | 491 | static void hfsplus_destroy_inode(struct inode *inode) |
| 488 | { | 492 | { |
| 489 | kmem_cache_free(hfsplus_inode_cachep, &HFSPLUS_I(inode)); | 493 | kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); |
| 490 | } | 494 | } |
| 491 | 495 | ||
| 492 | #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) | 496 | #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) |
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index 628ccf6fa402..b66d67de882c 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c | |||
| @@ -121,7 +121,7 @@ static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) | |||
| 121 | int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) | 121 | int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) |
| 122 | { | 122 | { |
| 123 | const hfsplus_unichr *ip; | 123 | const hfsplus_unichr *ip; |
| 124 | struct nls_table *nls = HFSPLUS_SB(sb).nls; | 124 | struct nls_table *nls = HFSPLUS_SB(sb)->nls; |
| 125 | u8 *op; | 125 | u8 *op; |
| 126 | u16 cc, c0, c1; | 126 | u16 cc, c0, c1; |
| 127 | u16 *ce1, *ce2; | 127 | u16 *ce1, *ce2; |
| @@ -132,7 +132,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c | |||
| 132 | ustrlen = be16_to_cpu(ustr->length); | 132 | ustrlen = be16_to_cpu(ustr->length); |
| 133 | len = *len_p; | 133 | len = *len_p; |
| 134 | ce1 = NULL; | 134 | ce1 = NULL; |
| 135 | compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 135 | compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
| 136 | 136 | ||
| 137 | while (ustrlen > 0) { | 137 | while (ustrlen > 0) { |
| 138 | c0 = be16_to_cpu(*ip++); | 138 | c0 = be16_to_cpu(*ip++); |
| @@ -246,7 +246,7 @@ out: | |||
| 246 | static inline int asc2unichar(struct super_block *sb, const char *astr, int len, | 246 | static inline int asc2unichar(struct super_block *sb, const char *astr, int len, |
| 247 | wchar_t *uc) | 247 | wchar_t *uc) |
| 248 | { | 248 | { |
| 249 | int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc); | 249 | int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); |
| 250 | if (size <= 0) { | 250 | if (size <= 0) { |
| 251 | *uc = '?'; | 251 | *uc = '?'; |
| 252 | size = 1; | 252 | size = 1; |
| @@ -293,7 +293,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, | |||
| 293 | u16 *dstr, outlen = 0; | 293 | u16 *dstr, outlen = 0; |
| 294 | wchar_t c; | 294 | wchar_t c; |
| 295 | 295 | ||
| 296 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 296 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
| 297 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { | 297 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { |
| 298 | size = asc2unichar(sb, astr, len, &c); | 298 | size = asc2unichar(sb, astr, len, &c); |
| 299 | 299 | ||
| @@ -330,8 +330,8 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) | |||
| 330 | wchar_t c; | 330 | wchar_t c; |
| 331 | u16 c2; | 331 | u16 c2; |
| 332 | 332 | ||
| 333 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | 333 | casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
| 334 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 334 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
| 335 | hash = init_name_hash(); | 335 | hash = init_name_hash(); |
| 336 | astr = str->name; | 336 | astr = str->name; |
| 337 | len = str->len; | 337 | len = str->len; |
| @@ -373,8 +373,8 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr * | |||
| 373 | u16 c1, c2; | 373 | u16 c1, c2; |
| 374 | wchar_t c; | 374 | wchar_t c; |
| 375 | 375 | ||
| 376 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | 376 | casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
| 377 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 377 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
| 378 | astr1 = s1->name; | 378 | astr1 = s1->name; |
| 379 | len1 = s1->len; | 379 | len1 = s1->len; |
| 380 | astr2 = s2->name; | 380 | astr2 = s2->name; |
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index bed78ac8f6d1..8972c20b3216 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c | |||
| @@ -65,8 +65,8 @@ static int hfsplus_get_last_session(struct super_block *sb, | |||
| 65 | *start = 0; | 65 | *start = 0; |
| 66 | *size = sb->s_bdev->bd_inode->i_size >> 9; | 66 | *size = sb->s_bdev->bd_inode->i_size >> 9; |
| 67 | 67 | ||
| 68 | if (HFSPLUS_SB(sb).session >= 0) { | 68 | if (HFSPLUS_SB(sb)->session >= 0) { |
| 69 | te.cdte_track = HFSPLUS_SB(sb).session; | 69 | te.cdte_track = HFSPLUS_SB(sb)->session; |
| 70 | te.cdte_format = CDROM_LBA; | 70 | te.cdte_format = CDROM_LBA; |
| 71 | res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te); | 71 | res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te); |
| 72 | if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { | 72 | if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { |
| @@ -87,6 +87,7 @@ static int hfsplus_get_last_session(struct super_block *sb, | |||
| 87 | /* Takes in super block, returns true if good data read */ | 87 | /* Takes in super block, returns true if good data read */ |
| 88 | int hfsplus_read_wrapper(struct super_block *sb) | 88 | int hfsplus_read_wrapper(struct super_block *sb) |
| 89 | { | 89 | { |
| 90 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 90 | struct buffer_head *bh; | 91 | struct buffer_head *bh; |
| 91 | struct hfsplus_vh *vhdr; | 92 | struct hfsplus_vh *vhdr; |
| 92 | struct hfsplus_wd wd; | 93 | struct hfsplus_wd wd; |
| @@ -122,7 +123,7 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
| 122 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) | 123 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) |
| 123 | break; | 124 | break; |
| 124 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) { | 125 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) { |
| 125 | HFSPLUS_SB(sb).flags |= HFSPLUS_SB_HFSX; | 126 | set_bit(HFSPLUS_SB_HFSX, &sbi->flags); |
| 126 | break; | 127 | break; |
| 127 | } | 128 | } |
| 128 | brelse(bh); | 129 | brelse(bh); |
| @@ -143,11 +144,11 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
| 143 | if (blocksize < HFSPLUS_SECTOR_SIZE || | 144 | if (blocksize < HFSPLUS_SECTOR_SIZE || |
| 144 | ((blocksize - 1) & blocksize)) | 145 | ((blocksize - 1) & blocksize)) |
| 145 | return -EINVAL; | 146 | return -EINVAL; |
| 146 | HFSPLUS_SB(sb).alloc_blksz = blocksize; | 147 | sbi->alloc_blksz = blocksize; |
| 147 | HFSPLUS_SB(sb).alloc_blksz_shift = 0; | 148 | sbi->alloc_blksz_shift = 0; |
| 148 | while ((blocksize >>= 1) != 0) | 149 | while ((blocksize >>= 1) != 0) |
| 149 | HFSPLUS_SB(sb).alloc_blksz_shift++; | 150 | sbi->alloc_blksz_shift++; |
| 150 | blocksize = min(HFSPLUS_SB(sb).alloc_blksz, (u32)PAGE_SIZE); | 151 | blocksize = min(sbi->alloc_blksz, (u32)PAGE_SIZE); |
| 151 | 152 | ||
| 152 | /* align block size to block offset */ | 153 | /* align block size to block offset */ |
| 153 | while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) | 154 | while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) |
| @@ -158,23 +159,26 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
| 158 | return -EINVAL; | 159 | return -EINVAL; |
| 159 | } | 160 | } |
| 160 | 161 | ||
| 161 | HFSPLUS_SB(sb).blockoffset = part_start >> | 162 | sbi->blockoffset = |
| 162 | (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); | 163 | part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); |
| 163 | HFSPLUS_SB(sb).sect_count = part_size; | 164 | sbi->sect_count = part_size; |
| 164 | HFSPLUS_SB(sb).fs_shift = HFSPLUS_SB(sb).alloc_blksz_shift - | 165 | sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; |
| 165 | sb->s_blocksize_bits; | ||
| 166 | 166 | ||
| 167 | bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); | 167 | bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); |
| 168 | if (!bh) | 168 | if (!bh) |
| 169 | return -EIO; | 169 | return -EIO; |
| 170 | 170 | ||
| 171 | /* should still be the same... */ | 171 | /* should still be the same... */ |
| 172 | if (vhdr->signature != (HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX ? | 172 | if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { |
| 173 | cpu_to_be16(HFSPLUS_VOLHEAD_SIGX) : | 173 | if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) |
| 174 | cpu_to_be16(HFSPLUS_VOLHEAD_SIG))) | 174 | goto error; |
| 175 | goto error; | 175 | } else { |
| 176 | HFSPLUS_SB(sb).s_vhbh = bh; | 176 | if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) |
| 177 | HFSPLUS_SB(sb).s_vhdr = vhdr; | 177 | goto error; |
| 178 | } | ||
| 179 | |||
| 180 | sbi->s_vhbh = bh; | ||
| 181 | sbi->s_vhdr = vhdr; | ||
| 178 | 182 | ||
| 179 | return 0; | 183 | return 0; |
| 180 | error: | 184 | error: |
diff --git a/fs/ceph/auth.h b/include/linux/ceph/auth.h index d38a2fb4a137..7fff521d7eb5 100644 --- a/fs/ceph/auth.h +++ b/include/linux/ceph/auth.h | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | #ifndef _FS_CEPH_AUTH_H | 1 | #ifndef _FS_CEPH_AUTH_H |
| 2 | #define _FS_CEPH_AUTH_H | 2 | #define _FS_CEPH_AUTH_H |
| 3 | 3 | ||
| 4 | #include "types.h" | 4 | #include <linux/ceph/types.h> |
| 5 | #include "buffer.h" | 5 | #include <linux/ceph/buffer.h> |
| 6 | 6 | ||
| 7 | /* | 7 | /* |
| 8 | * Abstract interface for communicating with the authenticate module. | 8 | * Abstract interface for communicating with the authenticate module. |
diff --git a/fs/ceph/buffer.h b/include/linux/ceph/buffer.h index 58d19014068f..58d19014068f 100644 --- a/fs/ceph/buffer.h +++ b/include/linux/ceph/buffer.h | |||
diff --git a/fs/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index 1818c2305610..aa2e19182d99 100644 --- a/fs/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 4 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 5 | 5 | ||
| 6 | #ifdef CONFIG_CEPH_FS_PRETTYDEBUG | 6 | #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG |
| 7 | 7 | ||
| 8 | /* | 8 | /* |
| 9 | * wrap pr_debug to include a filename:lineno prefix on each line. | 9 | * wrap pr_debug to include a filename:lineno prefix on each line. |
| @@ -14,7 +14,8 @@ | |||
| 14 | # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) | 14 | # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) |
| 15 | extern const char *ceph_file_part(const char *s, int len); | 15 | extern const char *ceph_file_part(const char *s, int len); |
| 16 | # define dout(fmt, ...) \ | 16 | # define dout(fmt, ...) \ |
| 17 | pr_debug(" %12.12s:%-4d : " fmt, \ | 17 | pr_debug("%.*s %12.12s:%-4d : " fmt, \ |
| 18 | 8 - (int)sizeof(KBUILD_MODNAME), " ", \ | ||
| 18 | ceph_file_part(__FILE__, sizeof(__FILE__)), \ | 19 | ceph_file_part(__FILE__, sizeof(__FILE__)), \ |
| 19 | __LINE__, ##__VA_ARGS__) | 20 | __LINE__, ##__VA_ARGS__) |
| 20 | # else | 21 | # else |
diff --git a/fs/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h index 5babb8e95352..5babb8e95352 100644 --- a/fs/ceph/ceph_frag.h +++ b/include/linux/ceph/ceph_frag.h | |||
diff --git a/fs/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index d5619ac86711..c3c74aef289d 100644 --- a/fs/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h | |||
| @@ -299,6 +299,7 @@ enum { | |||
| 299 | CEPH_MDS_OP_SETATTR = 0x01108, | 299 | CEPH_MDS_OP_SETATTR = 0x01108, |
| 300 | CEPH_MDS_OP_SETFILELOCK= 0x01109, | 300 | CEPH_MDS_OP_SETFILELOCK= 0x01109, |
| 301 | CEPH_MDS_OP_GETFILELOCK= 0x00110, | 301 | CEPH_MDS_OP_GETFILELOCK= 0x00110, |
| 302 | CEPH_MDS_OP_SETDIRLAYOUT=0x0110a, | ||
| 302 | 303 | ||
| 303 | CEPH_MDS_OP_MKNOD = 0x01201, | 304 | CEPH_MDS_OP_MKNOD = 0x01201, |
| 304 | CEPH_MDS_OP_LINK = 0x01202, | 305 | CEPH_MDS_OP_LINK = 0x01202, |
diff --git a/fs/ceph/ceph_hash.h b/include/linux/ceph/ceph_hash.h index d099c3f90236..d099c3f90236 100644 --- a/fs/ceph/ceph_hash.h +++ b/include/linux/ceph/ceph_hash.h | |||
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h new file mode 100644 index 000000000000..2a79702e092b --- /dev/null +++ b/include/linux/ceph/debugfs.h | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | #ifndef _FS_CEPH_DEBUGFS_H | ||
| 2 | #define _FS_CEPH_DEBUGFS_H | ||
| 3 | |||
| 4 | #include "ceph_debug.h" | ||
| 5 | #include "types.h" | ||
| 6 | |||
| 7 | #define CEPH_DEFINE_SHOW_FUNC(name) \ | ||
| 8 | static int name##_open(struct inode *inode, struct file *file) \ | ||
| 9 | { \ | ||
| 10 | struct seq_file *sf; \ | ||
| 11 | int ret; \ | ||
| 12 | \ | ||
| 13 | ret = single_open(file, name, NULL); \ | ||
| 14 | sf = file->private_data; \ | ||
| 15 | sf->private = inode->i_private; \ | ||
| 16 | return ret; \ | ||
| 17 | } \ | ||
| 18 | \ | ||
| 19 | static const struct file_operations name##_fops = { \ | ||
| 20 | .open = name##_open, \ | ||
| 21 | .read = seq_read, \ | ||
| 22 | .llseek = seq_lseek, \ | ||
| 23 | .release = single_release, \ | ||
| 24 | }; | ||
| 25 | |||
| 26 | /* debugfs.c */ | ||
| 27 | extern int ceph_debugfs_init(void); | ||
| 28 | extern void ceph_debugfs_cleanup(void); | ||
| 29 | extern int ceph_debugfs_client_init(struct ceph_client *client); | ||
| 30 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | ||
| 31 | |||
| 32 | #endif | ||
| 33 | |||
diff --git a/fs/ceph/decode.h b/include/linux/ceph/decode.h index 3d25415afe63..c5b6939fb32a 100644 --- a/fs/ceph/decode.h +++ b/include/linux/ceph/decode.h | |||
| @@ -191,6 +191,11 @@ static inline void ceph_encode_string(void **p, void *end, | |||
| 191 | ceph_encode_need(p, end, n, bad); \ | 191 | ceph_encode_need(p, end, n, bad); \ |
| 192 | ceph_encode_copy(p, pv, n); \ | 192 | ceph_encode_copy(p, pv, n); \ |
| 193 | } while (0) | 193 | } while (0) |
| 194 | #define ceph_encode_string_safe(p, end, s, n, bad) \ | ||
| 195 | do { \ | ||
| 196 | ceph_encode_need(p, end, n, bad); \ | ||
| 197 | ceph_encode_string(p, end, s, n); \ | ||
| 198 | } while (0) | ||
| 194 | 199 | ||
| 195 | 200 | ||
| 196 | #endif | 201 | #endif |
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h new file mode 100644 index 000000000000..f22b2e941686 --- /dev/null +++ b/include/linux/ceph/libceph.h | |||
| @@ -0,0 +1,249 @@ | |||
| 1 | #ifndef _FS_CEPH_LIBCEPH_H | ||
| 2 | #define _FS_CEPH_LIBCEPH_H | ||
| 3 | |||
| 4 | #include "ceph_debug.h" | ||
| 5 | |||
| 6 | #include <asm/unaligned.h> | ||
| 7 | #include <linux/backing-dev.h> | ||
| 8 | #include <linux/completion.h> | ||
| 9 | #include <linux/exportfs.h> | ||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/mempool.h> | ||
| 12 | #include <linux/pagemap.h> | ||
| 13 | #include <linux/wait.h> | ||
| 14 | #include <linux/writeback.h> | ||
| 15 | #include <linux/slab.h> | ||
| 16 | |||
| 17 | #include "types.h" | ||
| 18 | #include "messenger.h" | ||
| 19 | #include "msgpool.h" | ||
| 20 | #include "mon_client.h" | ||
| 21 | #include "osd_client.h" | ||
| 22 | #include "ceph_fs.h" | ||
| 23 | |||
| 24 | /* | ||
| 25 | * Supported features | ||
| 26 | */ | ||
| 27 | #define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR | ||
| 28 | #define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR | ||
| 29 | |||
| 30 | /* | ||
| 31 | * mount options | ||
| 32 | */ | ||
| 33 | #define CEPH_OPT_FSID (1<<0) | ||
| 34 | #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ | ||
| 35 | #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ | ||
| 36 | #define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ | ||
| 37 | |||
| 38 | #define CEPH_OPT_DEFAULT (0); | ||
| 39 | |||
| 40 | #define ceph_set_opt(client, opt) \ | ||
| 41 | (client)->options->flags |= CEPH_OPT_##opt; | ||
| 42 | #define ceph_test_opt(client, opt) \ | ||
| 43 | (!!((client)->options->flags & CEPH_OPT_##opt)) | ||
| 44 | |||
| 45 | struct ceph_options { | ||
| 46 | int flags; | ||
| 47 | struct ceph_fsid fsid; | ||
| 48 | struct ceph_entity_addr my_addr; | ||
| 49 | int mount_timeout; | ||
| 50 | int osd_idle_ttl; | ||
| 51 | int osd_timeout; | ||
| 52 | int osd_keepalive_timeout; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * any type that can't be simply compared or doesn't need need | ||
| 56 | * to be compared should go beyond this point, | ||
| 57 | * ceph_compare_options() should be updated accordingly | ||
| 58 | */ | ||
| 59 | |||
| 60 | struct ceph_entity_addr *mon_addr; /* should be the first | ||
| 61 | pointer type of args */ | ||
| 62 | int num_mon; | ||
| 63 | char *name; | ||
| 64 | char *secret; | ||
| 65 | }; | ||
| 66 | |||
| 67 | /* | ||
| 68 | * defaults | ||
| 69 | */ | ||
| 70 | #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 | ||
| 71 | #define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ | ||
| 72 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | ||
| 73 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | ||
| 74 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | ||
| 75 | |||
| 76 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | ||
| 77 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | ||
| 78 | |||
| 79 | #define CEPH_AUTH_NAME_DEFAULT "guest" | ||
| 80 | |||
| 81 | /* | ||
| 82 | * Delay telling the MDS we no longer want caps, in case we reopen | ||
| 83 | * the file. Delay a minimum amount of time, even if we send a cap | ||
| 84 | * message for some other reason. Otherwise, take the oppotunity to | ||
| 85 | * update the mds to avoid sending another message later. | ||
| 86 | */ | ||
| 87 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | ||
| 88 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | ||
| 89 | |||
| 90 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
| 91 | |||
| 92 | /* mount state */ | ||
| 93 | enum { | ||
| 94 | CEPH_MOUNT_MOUNTING, | ||
| 95 | CEPH_MOUNT_MOUNTED, | ||
| 96 | CEPH_MOUNT_UNMOUNTING, | ||
| 97 | CEPH_MOUNT_UNMOUNTED, | ||
| 98 | CEPH_MOUNT_SHUTDOWN, | ||
| 99 | }; | ||
| 100 | |||
| 101 | /* | ||
| 102 | * subtract jiffies | ||
| 103 | */ | ||
| 104 | static inline unsigned long time_sub(unsigned long a, unsigned long b) | ||
| 105 | { | ||
| 106 | BUG_ON(time_after(b, a)); | ||
| 107 | return (long)a - (long)b; | ||
| 108 | } | ||
| 109 | |||
| 110 | struct ceph_mds_client; | ||
| 111 | |||
| 112 | /* | ||
| 113 | * per client state | ||
| 114 | * | ||
| 115 | * possibly shared by multiple mount points, if they are | ||
| 116 | * mounting the same ceph filesystem/cluster. | ||
| 117 | */ | ||
| 118 | struct ceph_client { | ||
| 119 | struct ceph_fsid fsid; | ||
| 120 | bool have_fsid; | ||
| 121 | |||
| 122 | void *private; | ||
| 123 | |||
| 124 | struct ceph_options *options; | ||
| 125 | |||
| 126 | struct mutex mount_mutex; /* serialize mount attempts */ | ||
| 127 | wait_queue_head_t auth_wq; | ||
| 128 | int auth_err; | ||
| 129 | |||
| 130 | int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *); | ||
| 131 | |||
| 132 | u32 supported_features; | ||
| 133 | u32 required_features; | ||
| 134 | |||
| 135 | struct ceph_messenger *msgr; /* messenger instance */ | ||
| 136 | struct ceph_mon_client monc; | ||
| 137 | struct ceph_osd_client osdc; | ||
| 138 | |||
| 139 | #ifdef CONFIG_DEBUG_FS | ||
| 140 | struct dentry *debugfs_dir; | ||
| 141 | struct dentry *debugfs_monmap; | ||
| 142 | struct dentry *debugfs_osdmap; | ||
| 143 | #endif | ||
| 144 | }; | ||
| 145 | |||
| 146 | |||
| 147 | |||
| 148 | /* | ||
| 149 | * snapshots | ||
| 150 | */ | ||
| 151 | |||
| 152 | /* | ||
| 153 | * A "snap context" is the set of existing snapshots when we | ||
| 154 | * write data. It is used by the OSD to guide its COW behavior. | ||
| 155 | * | ||
| 156 | * The ceph_snap_context is refcounted, and attached to each dirty | ||
| 157 | * page, indicating which context the dirty data belonged when it was | ||
| 158 | * dirtied. | ||
| 159 | */ | ||
| 160 | struct ceph_snap_context { | ||
| 161 | atomic_t nref; | ||
| 162 | u64 seq; | ||
| 163 | int num_snaps; | ||
| 164 | u64 snaps[]; | ||
| 165 | }; | ||
| 166 | |||
| 167 | static inline struct ceph_snap_context * | ||
| 168 | ceph_get_snap_context(struct ceph_snap_context *sc) | ||
| 169 | { | ||
| 170 | /* | ||
| 171 | printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
| 172 | atomic_read(&sc->nref)+1); | ||
| 173 | */ | ||
| 174 | if (sc) | ||
| 175 | atomic_inc(&sc->nref); | ||
| 176 | return sc; | ||
| 177 | } | ||
| 178 | |||
| 179 | static inline void ceph_put_snap_context(struct ceph_snap_context *sc) | ||
| 180 | { | ||
| 181 | if (!sc) | ||
| 182 | return; | ||
| 183 | /* | ||
| 184 | printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
| 185 | atomic_read(&sc->nref)-1); | ||
| 186 | */ | ||
| 187 | if (atomic_dec_and_test(&sc->nref)) { | ||
| 188 | /*printk(" deleting snap_context %p\n", sc);*/ | ||
| 189 | kfree(sc); | ||
| 190 | } | ||
| 191 | } | ||
| 192 | |||
| 193 | /* | ||
| 194 | * calculate the number of pages a given length and offset map onto, | ||
| 195 | * if we align the data. | ||
| 196 | */ | ||
| 197 | static inline int calc_pages_for(u64 off, u64 len) | ||
| 198 | { | ||
| 199 | return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - | ||
| 200 | (off >> PAGE_CACHE_SHIFT); | ||
| 201 | } | ||
| 202 | |||
| 203 | /* ceph_common.c */ | ||
| 204 | extern const char *ceph_msg_type_name(int type); | ||
| 205 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | ||
| 206 | extern struct kmem_cache *ceph_inode_cachep; | ||
| 207 | extern struct kmem_cache *ceph_cap_cachep; | ||
| 208 | extern struct kmem_cache *ceph_dentry_cachep; | ||
| 209 | extern struct kmem_cache *ceph_file_cachep; | ||
| 210 | |||
| 211 | extern int ceph_parse_options(struct ceph_options **popt, char *options, | ||
| 212 | const char *dev_name, const char *dev_name_end, | ||
| 213 | int (*parse_extra_token)(char *c, void *private), | ||
| 214 | void *private); | ||
| 215 | extern void ceph_destroy_options(struct ceph_options *opt); | ||
| 216 | extern int ceph_compare_options(struct ceph_options *new_opt, | ||
| 217 | struct ceph_client *client); | ||
| 218 | extern struct ceph_client *ceph_create_client(struct ceph_options *opt, | ||
| 219 | void *private); | ||
| 220 | extern u64 ceph_client_id(struct ceph_client *client); | ||
| 221 | extern void ceph_destroy_client(struct ceph_client *client); | ||
| 222 | extern int __ceph_open_session(struct ceph_client *client, | ||
| 223 | unsigned long started); | ||
| 224 | extern int ceph_open_session(struct ceph_client *client); | ||
| 225 | |||
| 226 | /* pagevec.c */ | ||
| 227 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||
| 228 | |||
| 229 | extern struct page **ceph_get_direct_page_vector(const char __user *data, | ||
| 230 | int num_pages, | ||
| 231 | loff_t off, size_t len); | ||
| 232 | extern void ceph_put_page_vector(struct page **pages, int num_pages); | ||
| 233 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||
| 234 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | ||
| 235 | extern int ceph_copy_user_to_page_vector(struct page **pages, | ||
| 236 | const char __user *data, | ||
| 237 | loff_t off, size_t len); | ||
| 238 | extern int ceph_copy_to_page_vector(struct page **pages, | ||
| 239 | const char *data, | ||
| 240 | loff_t off, size_t len); | ||
| 241 | extern int ceph_copy_from_page_vector(struct page **pages, | ||
| 242 | char *data, | ||
| 243 | loff_t off, size_t len); | ||
| 244 | extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, | ||
| 245 | loff_t off, size_t len); | ||
| 246 | extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); | ||
| 247 | |||
| 248 | |||
| 249 | #endif /* _FS_CEPH_SUPER_H */ | ||
diff --git a/fs/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 4c5cb0880bba..4c5cb0880bba 100644 --- a/fs/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h | |||
diff --git a/fs/ceph/messenger.h b/include/linux/ceph/messenger.h index 76fbc957bc13..5956d62c3057 100644 --- a/fs/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
| @@ -65,6 +65,9 @@ struct ceph_messenger { | |||
| 65 | */ | 65 | */ |
| 66 | u32 global_seq; | 66 | u32 global_seq; |
| 67 | spinlock_t global_seq_lock; | 67 | spinlock_t global_seq_lock; |
| 68 | |||
| 69 | u32 supported_features; | ||
| 70 | u32 required_features; | ||
| 68 | }; | 71 | }; |
| 69 | 72 | ||
| 70 | /* | 73 | /* |
| @@ -82,6 +85,10 @@ struct ceph_msg { | |||
| 82 | struct ceph_pagelist *pagelist; /* instead of pages */ | 85 | struct ceph_pagelist *pagelist; /* instead of pages */ |
| 83 | struct list_head list_head; | 86 | struct list_head list_head; |
| 84 | struct kref kref; | 87 | struct kref kref; |
| 88 | struct bio *bio; /* instead of pages/pagelist */ | ||
| 89 | struct bio *bio_iter; /* bio iterator */ | ||
| 90 | int bio_seg; /* current bio segment */ | ||
| 91 | struct ceph_pagelist *trail; /* the trailing part of the data */ | ||
| 85 | bool front_is_vmalloc; | 92 | bool front_is_vmalloc; |
| 86 | bool more_to_follow; | 93 | bool more_to_follow; |
| 87 | bool needs_out_seq; | 94 | bool needs_out_seq; |
| @@ -205,7 +212,7 @@ struct ceph_connection { | |||
| 205 | }; | 212 | }; |
| 206 | 213 | ||
| 207 | 214 | ||
| 208 | extern const char *pr_addr(const struct sockaddr_storage *ss); | 215 | extern const char *ceph_pr_addr(const struct sockaddr_storage *ss); |
| 209 | extern int ceph_parse_ips(const char *c, const char *end, | 216 | extern int ceph_parse_ips(const char *c, const char *end, |
| 210 | struct ceph_entity_addr *addr, | 217 | struct ceph_entity_addr *addr, |
| 211 | int max_count, int *count); | 218 | int max_count, int *count); |
| @@ -216,7 +223,8 @@ extern void ceph_msgr_exit(void); | |||
| 216 | extern void ceph_msgr_flush(void); | 223 | extern void ceph_msgr_flush(void); |
| 217 | 224 | ||
| 218 | extern struct ceph_messenger *ceph_messenger_create( | 225 | extern struct ceph_messenger *ceph_messenger_create( |
| 219 | struct ceph_entity_addr *myaddr); | 226 | struct ceph_entity_addr *myaddr, |
| 227 | u32 features, u32 required); | ||
| 220 | extern void ceph_messenger_destroy(struct ceph_messenger *); | 228 | extern void ceph_messenger_destroy(struct ceph_messenger *); |
| 221 | 229 | ||
| 222 | extern void ceph_con_init(struct ceph_messenger *msgr, | 230 | extern void ceph_con_init(struct ceph_messenger *msgr, |
diff --git a/fs/ceph/mon_client.h b/include/linux/ceph/mon_client.h index 8e396f2c0963..545f85917780 100644 --- a/fs/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h | |||
| @@ -79,6 +79,7 @@ struct ceph_mon_client { | |||
| 79 | u64 last_tid; | 79 | u64 last_tid; |
| 80 | 80 | ||
| 81 | /* mds/osd map */ | 81 | /* mds/osd map */ |
| 82 | int want_mdsmap; | ||
| 82 | int want_next_osdmap; /* 1 = want, 2 = want+asked */ | 83 | int want_next_osdmap; /* 1 = want, 2 = want+asked */ |
| 83 | u32 have_osdmap, have_mdsmap; | 84 | u32 have_osdmap, have_mdsmap; |
| 84 | 85 | ||
diff --git a/fs/ceph/msgpool.h b/include/linux/ceph/msgpool.h index a362605f9368..a362605f9368 100644 --- a/fs/ceph/msgpool.h +++ b/include/linux/ceph/msgpool.h | |||
diff --git a/fs/ceph/msgr.h b/include/linux/ceph/msgr.h index 680d3d648cac..680d3d648cac 100644 --- a/fs/ceph/msgr.h +++ b/include/linux/ceph/msgr.h | |||
diff --git a/fs/ceph/osd_client.h b/include/linux/ceph/osd_client.h index ce776989ef6a..6c91fb032c39 100644 --- a/fs/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h | |||
| @@ -15,6 +15,7 @@ struct ceph_snap_context; | |||
| 15 | struct ceph_osd_request; | 15 | struct ceph_osd_request; |
| 16 | struct ceph_osd_client; | 16 | struct ceph_osd_client; |
| 17 | struct ceph_authorizer; | 17 | struct ceph_authorizer; |
| 18 | struct ceph_pagelist; | ||
| 18 | 19 | ||
| 19 | /* | 20 | /* |
| 20 | * completion callback for async writepages | 21 | * completion callback for async writepages |
| @@ -68,6 +69,7 @@ struct ceph_osd_request { | |||
| 68 | struct list_head r_unsafe_item; | 69 | struct list_head r_unsafe_item; |
| 69 | 70 | ||
| 70 | struct inode *r_inode; /* for use by callbacks */ | 71 | struct inode *r_inode; /* for use by callbacks */ |
| 72 | void *r_priv; /* ditto */ | ||
| 71 | 73 | ||
| 72 | char r_oid[40]; /* object name */ | 74 | char r_oid[40]; /* object name */ |
| 73 | int r_oid_len; | 75 | int r_oid_len; |
| @@ -80,6 +82,11 @@ struct ceph_osd_request { | |||
| 80 | struct page **r_pages; /* pages for data payload */ | 82 | struct page **r_pages; /* pages for data payload */ |
| 81 | int r_pages_from_pool; | 83 | int r_pages_from_pool; |
| 82 | int r_own_pages; /* if true, i own page list */ | 84 | int r_own_pages; /* if true, i own page list */ |
| 85 | #ifdef CONFIG_BLOCK | ||
| 86 | struct bio *r_bio; /* instead of pages */ | ||
| 87 | #endif | ||
| 88 | |||
| 89 | struct ceph_pagelist *r_trail; /* trailing part of the data */ | ||
| 83 | }; | 90 | }; |
| 84 | 91 | ||
| 85 | struct ceph_osd_client { | 92 | struct ceph_osd_client { |
| @@ -110,6 +117,42 @@ struct ceph_osd_client { | |||
| 110 | struct ceph_msgpool msgpool_op_reply; | 117 | struct ceph_msgpool msgpool_op_reply; |
| 111 | }; | 118 | }; |
| 112 | 119 | ||
| 120 | struct ceph_osd_req_op { | ||
| 121 | u16 op; /* CEPH_OSD_OP_* */ | ||
| 122 | u32 flags; /* CEPH_OSD_FLAG_* */ | ||
| 123 | union { | ||
| 124 | struct { | ||
| 125 | u64 offset, length; | ||
| 126 | u64 truncate_size; | ||
| 127 | u32 truncate_seq; | ||
| 128 | } extent; | ||
| 129 | struct { | ||
| 130 | const char *name; | ||
| 131 | u32 name_len; | ||
| 132 | const char *val; | ||
| 133 | u32 value_len; | ||
| 134 | __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ | ||
| 135 | __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ | ||
| 136 | } xattr; | ||
| 137 | struct { | ||
| 138 | const char *class_name; | ||
| 139 | __u8 class_len; | ||
| 140 | const char *method_name; | ||
| 141 | __u8 method_len; | ||
| 142 | __u8 argc; | ||
| 143 | const char *indata; | ||
| 144 | u32 indata_len; | ||
| 145 | } cls; | ||
| 146 | struct { | ||
| 147 | u64 cookie, count; | ||
| 148 | } pgls; | ||
| 149 | struct { | ||
| 150 | u64 snapid; | ||
| 151 | } snap; | ||
| 152 | }; | ||
| 153 | u32 payload_len; | ||
| 154 | }; | ||
| 155 | |||
| 113 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, | 156 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, |
| 114 | struct ceph_client *client); | 157 | struct ceph_client *client); |
| 115 | extern void ceph_osdc_stop(struct ceph_osd_client *osdc); | 158 | extern void ceph_osdc_stop(struct ceph_osd_client *osdc); |
| @@ -119,6 +162,30 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, | |||
| 119 | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, | 162 | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, |
| 120 | struct ceph_msg *msg); | 163 | struct ceph_msg *msg); |
| 121 | 164 | ||
| 165 | extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc, | ||
| 166 | struct ceph_file_layout *layout, | ||
| 167 | u64 snapid, | ||
| 168 | u64 off, u64 *plen, u64 *bno, | ||
| 169 | struct ceph_osd_request *req, | ||
| 170 | struct ceph_osd_req_op *op); | ||
| 171 | |||
| 172 | extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | ||
| 173 | int flags, | ||
| 174 | struct ceph_snap_context *snapc, | ||
| 175 | struct ceph_osd_req_op *ops, | ||
| 176 | bool use_mempool, | ||
| 177 | gfp_t gfp_flags, | ||
| 178 | struct page **pages, | ||
| 179 | struct bio *bio); | ||
| 180 | |||
| 181 | extern void ceph_osdc_build_request(struct ceph_osd_request *req, | ||
| 182 | u64 off, u64 *plen, | ||
| 183 | struct ceph_osd_req_op *src_ops, | ||
| 184 | struct ceph_snap_context *snapc, | ||
| 185 | struct timespec *mtime, | ||
| 186 | const char *oid, | ||
| 187 | int oid_len); | ||
| 188 | |||
| 122 | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, | 189 | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, |
| 123 | struct ceph_file_layout *layout, | 190 | struct ceph_file_layout *layout, |
| 124 | struct ceph_vino vino, | 191 | struct ceph_vino vino, |
diff --git a/fs/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 970b547e510d..ba4c205cbb01 100644 --- a/fs/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | #include <linux/rbtree.h> | 4 | #include <linux/rbtree.h> |
| 5 | #include "types.h" | 5 | #include "types.h" |
| 6 | #include "ceph_fs.h" | 6 | #include "ceph_fs.h" |
| 7 | #include "crush/crush.h" | 7 | #include <linux/crush/crush.h> |
| 8 | 8 | ||
| 9 | /* | 9 | /* |
| 10 | * The osd map describes the current membership of the osd cluster and | 10 | * The osd map describes the current membership of the osd cluster and |
| @@ -125,4 +125,6 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
| 125 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, | 125 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, |
| 126 | struct ceph_pg pgid); | 126 | struct ceph_pg pgid); |
| 127 | 127 | ||
| 128 | extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); | ||
| 129 | |||
| 128 | #endif | 130 | #endif |
diff --git a/fs/ceph/pagelist.h b/include/linux/ceph/pagelist.h index e8a4187e1087..9660d6b0a35d 100644 --- a/fs/ceph/pagelist.h +++ b/include/linux/ceph/pagelist.h | |||
| @@ -8,6 +8,14 @@ struct ceph_pagelist { | |||
| 8 | void *mapped_tail; | 8 | void *mapped_tail; |
| 9 | size_t length; | 9 | size_t length; |
| 10 | size_t room; | 10 | size_t room; |
| 11 | struct list_head free_list; | ||
| 12 | size_t num_pages_free; | ||
| 13 | }; | ||
| 14 | |||
| 15 | struct ceph_pagelist_cursor { | ||
| 16 | struct ceph_pagelist *pl; /* pagelist, for error checking */ | ||
| 17 | struct list_head *page_lru; /* page in list */ | ||
| 18 | size_t room; /* room remaining to reset to */ | ||
| 11 | }; | 19 | }; |
| 12 | 20 | ||
| 13 | static inline void ceph_pagelist_init(struct ceph_pagelist *pl) | 21 | static inline void ceph_pagelist_init(struct ceph_pagelist *pl) |
| @@ -16,10 +24,23 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl) | |||
| 16 | pl->mapped_tail = NULL; | 24 | pl->mapped_tail = NULL; |
| 17 | pl->length = 0; | 25 | pl->length = 0; |
| 18 | pl->room = 0; | 26 | pl->room = 0; |
| 27 | INIT_LIST_HEAD(&pl->free_list); | ||
| 28 | pl->num_pages_free = 0; | ||
| 19 | } | 29 | } |
| 30 | |||
| 20 | extern int ceph_pagelist_release(struct ceph_pagelist *pl); | 31 | extern int ceph_pagelist_release(struct ceph_pagelist *pl); |
| 21 | 32 | ||
| 22 | extern int ceph_pagelist_append(struct ceph_pagelist *pl, void *d, size_t l); | 33 | extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l); |
| 34 | |||
| 35 | extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space); | ||
| 36 | |||
| 37 | extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl); | ||
| 38 | |||
| 39 | extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, | ||
| 40 | struct ceph_pagelist_cursor *c); | ||
| 41 | |||
| 42 | extern int ceph_pagelist_truncate(struct ceph_pagelist *pl, | ||
| 43 | struct ceph_pagelist_cursor *c); | ||
| 23 | 44 | ||
| 24 | static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) | 45 | static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) |
| 25 | { | 46 | { |
diff --git a/fs/ceph/rados.h b/include/linux/ceph/rados.h index 6d5247f2e81b..6d5247f2e81b 100644 --- a/fs/ceph/rados.h +++ b/include/linux/ceph/rados.h | |||
diff --git a/fs/ceph/types.h b/include/linux/ceph/types.h index 28b35a005ec2..28b35a005ec2 100644 --- a/fs/ceph/types.h +++ b/include/linux/ceph/types.h | |||
diff --git a/fs/ceph/crush/crush.h b/include/linux/crush/crush.h index 97e435b191f4..97e435b191f4 100644 --- a/fs/ceph/crush/crush.h +++ b/include/linux/crush/crush.h | |||
diff --git a/fs/ceph/crush/hash.h b/include/linux/crush/hash.h index 91e884230d5d..91e884230d5d 100644 --- a/fs/ceph/crush/hash.h +++ b/include/linux/crush/hash.h | |||
diff --git a/fs/ceph/crush/mapper.h b/include/linux/crush/mapper.h index c46b99c18bb0..c46b99c18bb0 100644 --- a/fs/ceph/crush/mapper.h +++ b/include/linux/crush/mapper.h | |||
diff --git a/net/Kconfig b/net/Kconfig index e926884c1675..55fd82e9ffd9 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
| @@ -293,6 +293,7 @@ source "net/wimax/Kconfig" | |||
| 293 | source "net/rfkill/Kconfig" | 293 | source "net/rfkill/Kconfig" |
| 294 | source "net/9p/Kconfig" | 294 | source "net/9p/Kconfig" |
| 295 | source "net/caif/Kconfig" | 295 | source "net/caif/Kconfig" |
| 296 | source "net/ceph/Kconfig" | ||
| 296 | 297 | ||
| 297 | 298 | ||
| 298 | endif # if NET | 299 | endif # if NET |
diff --git a/net/Makefile b/net/Makefile index ea60fbce9b1b..6b7bfd7f1416 100644 --- a/net/Makefile +++ b/net/Makefile | |||
| @@ -68,3 +68,4 @@ obj-$(CONFIG_SYSCTL) += sysctl_net.o | |||
| 68 | endif | 68 | endif |
| 69 | obj-$(CONFIG_WIMAX) += wimax/ | 69 | obj-$(CONFIG_WIMAX) += wimax/ |
| 70 | obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ | 70 | obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ |
| 71 | obj-$(CONFIG_CEPH_LIB) += ceph/ | ||
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig new file mode 100644 index 000000000000..ad424049b0cf --- /dev/null +++ b/net/ceph/Kconfig | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | config CEPH_LIB | ||
| 2 | tristate "Ceph core library (EXPERIMENTAL)" | ||
| 3 | depends on INET && EXPERIMENTAL | ||
| 4 | select LIBCRC32C | ||
| 5 | select CRYPTO_AES | ||
| 6 | select CRYPTO | ||
| 7 | default n | ||
| 8 | help | ||
| 9 | Choose Y or M here to include cephlib, which provides the | ||
| 10 | common functionality to both the Ceph filesystem and | ||
| 11 | to the rados block device (rbd). | ||
| 12 | |||
| 13 | More information at http://ceph.newdream.net/. | ||
| 14 | |||
| 15 | If unsure, say N. | ||
| 16 | |||
| 17 | config CEPH_LIB_PRETTYDEBUG | ||
| 18 | bool "Include file:line in ceph debug output" | ||
| 19 | depends on CEPH_LIB | ||
| 20 | default n | ||
| 21 | help | ||
| 22 | If you say Y here, debug output will include a filename and | ||
| 23 | line to aid debugging. This increases kernel size and slows | ||
| 24 | execution slightly when debug call sites are enabled (e.g., | ||
| 25 | via CONFIG_DYNAMIC_DEBUG). | ||
| 26 | |||
| 27 | If unsure, say N. | ||
| 28 | |||
diff --git a/net/ceph/Makefile b/net/ceph/Makefile new file mode 100644 index 000000000000..aab1cabb8035 --- /dev/null +++ b/net/ceph/Makefile | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | # | ||
| 2 | # Makefile for CEPH filesystem. | ||
| 3 | # | ||
| 4 | |||
| 5 | ifneq ($(KERNELRELEASE),) | ||
| 6 | |||
| 7 | obj-$(CONFIG_CEPH_LIB) += libceph.o | ||
| 8 | |||
| 9 | libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ | ||
| 10 | mon_client.o \ | ||
| 11 | osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ | ||
| 12 | debugfs.o \ | ||
| 13 | auth.o auth_none.o \ | ||
| 14 | crypto.o armor.o \ | ||
| 15 | auth_x.o \ | ||
| 16 | ceph_fs.o ceph_strings.o ceph_hash.o \ | ||
| 17 | pagevec.o | ||
| 18 | |||
| 19 | else | ||
| 20 | #Otherwise we were called directly from the command | ||
| 21 | # line; invoke the kernel build system. | ||
| 22 | |||
| 23 | KERNELDIR ?= /lib/modules/$(shell uname -r)/build | ||
| 24 | PWD := $(shell pwd) | ||
| 25 | |||
| 26 | default: all | ||
| 27 | |||
| 28 | all: | ||
| 29 | $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules | ||
| 30 | |||
| 31 | modules_install: | ||
| 32 | $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install | ||
| 33 | |||
| 34 | clean: | ||
| 35 | $(MAKE) -C $(KERNELDIR) M=$(PWD) clean | ||
| 36 | |||
| 37 | endif | ||
diff --git a/fs/ceph/armor.c b/net/ceph/armor.c index eb2a666b0be7..eb2a666b0be7 100644 --- a/fs/ceph/armor.c +++ b/net/ceph/armor.c | |||
diff --git a/fs/ceph/auth.c b/net/ceph/auth.c index 6d2e30600627..549c1f43e1d5 100644 --- a/fs/ceph/auth.c +++ b/net/ceph/auth.c | |||
| @@ -1,16 +1,16 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
| 4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| 5 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 6 | 6 | ||
| 7 | #include "types.h" | 7 | #include <linux/ceph/types.h> |
| 8 | #include <linux/ceph/decode.h> | ||
| 9 | #include <linux/ceph/libceph.h> | ||
| 10 | #include <linux/ceph/messenger.h> | ||
| 8 | #include "auth_none.h" | 11 | #include "auth_none.h" |
| 9 | #include "auth_x.h" | 12 | #include "auth_x.h" |
| 10 | #include "decode.h" | ||
| 11 | #include "super.h" | ||
| 12 | 13 | ||
| 13 | #include "messenger.h" | ||
| 14 | 14 | ||
| 15 | /* | 15 | /* |
| 16 | * get protocol handler | 16 | * get protocol handler |
diff --git a/fs/ceph/auth_none.c b/net/ceph/auth_none.c index ad1dc21286c7..214c2bb43d62 100644 --- a/fs/ceph/auth_none.c +++ b/net/ceph/auth_none.c | |||
| @@ -1,14 +1,15 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| 5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
| 6 | #include <linux/random.h> | 6 | #include <linux/random.h> |
| 7 | #include <linux/slab.h> | 7 | #include <linux/slab.h> |
| 8 | 8 | ||
| 9 | #include <linux/ceph/decode.h> | ||
| 10 | #include <linux/ceph/auth.h> | ||
| 11 | |||
| 9 | #include "auth_none.h" | 12 | #include "auth_none.h" |
| 10 | #include "auth.h" | ||
| 11 | #include "decode.h" | ||
| 12 | 13 | ||
| 13 | static void reset(struct ceph_auth_client *ac) | 14 | static void reset(struct ceph_auth_client *ac) |
| 14 | { | 15 | { |
diff --git a/fs/ceph/auth_none.h b/net/ceph/auth_none.h index 8164df1a08be..ed7d088b1bc9 100644 --- a/fs/ceph/auth_none.h +++ b/net/ceph/auth_none.h | |||
| @@ -2,8 +2,7 @@ | |||
| 2 | #define _FS_CEPH_AUTH_NONE_H | 2 | #define _FS_CEPH_AUTH_NONE_H |
| 3 | 3 | ||
| 4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
| 5 | 5 | #include <linux/ceph/auth.h> | |
| 6 | #include "auth.h" | ||
| 7 | 6 | ||
| 8 | /* | 7 | /* |
| 9 | * null security mode. | 8 | * null security mode. |
diff --git a/fs/ceph/auth_x.c b/net/ceph/auth_x.c index a2d002cbdec2..7fd5dfcf6e18 100644 --- a/fs/ceph/auth_x.c +++ b/net/ceph/auth_x.c | |||
| @@ -1,16 +1,17 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| 5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
| 6 | #include <linux/random.h> | 6 | #include <linux/random.h> |
| 7 | #include <linux/slab.h> | 7 | #include <linux/slab.h> |
| 8 | 8 | ||
| 9 | #include <linux/ceph/decode.h> | ||
| 10 | #include <linux/ceph/auth.h> | ||
| 11 | |||
| 12 | #include "crypto.h" | ||
| 9 | #include "auth_x.h" | 13 | #include "auth_x.h" |
| 10 | #include "auth_x_protocol.h" | 14 | #include "auth_x_protocol.h" |
| 11 | #include "crypto.h" | ||
| 12 | #include "auth.h" | ||
| 13 | #include "decode.h" | ||
| 14 | 15 | ||
| 15 | #define TEMP_TICKET_BUF_LEN 256 | 16 | #define TEMP_TICKET_BUF_LEN 256 |
| 16 | 17 | ||
diff --git a/fs/ceph/auth_x.h b/net/ceph/auth_x.h index ff6f8180e681..e02da7a5c5a1 100644 --- a/fs/ceph/auth_x.h +++ b/net/ceph/auth_x.h | |||
| @@ -3,8 +3,9 @@ | |||
| 3 | 3 | ||
| 4 | #include <linux/rbtree.h> | 4 | #include <linux/rbtree.h> |
| 5 | 5 | ||
| 6 | #include <linux/ceph/auth.h> | ||
| 7 | |||
| 6 | #include "crypto.h" | 8 | #include "crypto.h" |
| 7 | #include "auth.h" | ||
| 8 | #include "auth_x_protocol.h" | 9 | #include "auth_x_protocol.h" |
| 9 | 10 | ||
| 10 | /* | 11 | /* |
diff --git a/fs/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h index 671d30576c4f..671d30576c4f 100644 --- a/fs/ceph/auth_x_protocol.h +++ b/net/ceph/auth_x_protocol.h | |||
diff --git a/fs/ceph/buffer.c b/net/ceph/buffer.c index cd39f17021de..53d8abfa25d5 100644 --- a/fs/ceph/buffer.c +++ b/net/ceph/buffer.c | |||
| @@ -1,10 +1,11 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/module.h> | ||
| 4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 5 | 6 | ||
| 6 | #include "buffer.h" | 7 | #include <linux/ceph/buffer.h> |
| 7 | #include "decode.h" | 8 | #include <linux/ceph/decode.h> |
| 8 | 9 | ||
| 9 | struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) | 10 | struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) |
| 10 | { | 11 | { |
| @@ -32,6 +33,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) | |||
| 32 | dout("buffer_new %p\n", b); | 33 | dout("buffer_new %p\n", b); |
| 33 | return b; | 34 | return b; |
| 34 | } | 35 | } |
| 36 | EXPORT_SYMBOL(ceph_buffer_new); | ||
| 35 | 37 | ||
| 36 | void ceph_buffer_release(struct kref *kref) | 38 | void ceph_buffer_release(struct kref *kref) |
| 37 | { | 39 | { |
| @@ -46,6 +48,7 @@ void ceph_buffer_release(struct kref *kref) | |||
| 46 | } | 48 | } |
| 47 | kfree(b); | 49 | kfree(b); |
| 48 | } | 50 | } |
| 51 | EXPORT_SYMBOL(ceph_buffer_release); | ||
| 49 | 52 | ||
| 50 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) | 53 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) |
| 51 | { | 54 | { |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c new file mode 100644 index 000000000000..f3e4a13fea0c --- /dev/null +++ b/net/ceph/ceph_common.c | |||
| @@ -0,0 +1,529 @@ | |||
| 1 | |||
| 2 | #include <linux/ceph/ceph_debug.h> | ||
| 3 | #include <linux/backing-dev.h> | ||
| 4 | #include <linux/ctype.h> | ||
| 5 | #include <linux/fs.h> | ||
| 6 | #include <linux/inet.h> | ||
| 7 | #include <linux/in6.h> | ||
| 8 | #include <linux/module.h> | ||
| 9 | #include <linux/mount.h> | ||
| 10 | #include <linux/parser.h> | ||
| 11 | #include <linux/sched.h> | ||
| 12 | #include <linux/seq_file.h> | ||
| 13 | #include <linux/slab.h> | ||
| 14 | #include <linux/statfs.h> | ||
| 15 | #include <linux/string.h> | ||
| 16 | |||
| 17 | |||
| 18 | #include <linux/ceph/libceph.h> | ||
| 19 | #include <linux/ceph/debugfs.h> | ||
| 20 | #include <linux/ceph/decode.h> | ||
| 21 | #include <linux/ceph/mon_client.h> | ||
| 22 | #include <linux/ceph/auth.h> | ||
| 23 | |||
| 24 | |||
| 25 | |||
| 26 | /* | ||
| 27 | * find filename portion of a path (/foo/bar/baz -> baz) | ||
| 28 | */ | ||
| 29 | const char *ceph_file_part(const char *s, int len) | ||
| 30 | { | ||
| 31 | const char *e = s + len; | ||
| 32 | |||
| 33 | while (e != s && *(e-1) != '/') | ||
| 34 | e--; | ||
| 35 | return e; | ||
| 36 | } | ||
| 37 | EXPORT_SYMBOL(ceph_file_part); | ||
| 38 | |||
| 39 | const char *ceph_msg_type_name(int type) | ||
| 40 | { | ||
| 41 | switch (type) { | ||
| 42 | case CEPH_MSG_SHUTDOWN: return "shutdown"; | ||
| 43 | case CEPH_MSG_PING: return "ping"; | ||
| 44 | case CEPH_MSG_AUTH: return "auth"; | ||
| 45 | case CEPH_MSG_AUTH_REPLY: return "auth_reply"; | ||
| 46 | case CEPH_MSG_MON_MAP: return "mon_map"; | ||
| 47 | case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; | ||
| 48 | case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; | ||
| 49 | case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; | ||
| 50 | case CEPH_MSG_STATFS: return "statfs"; | ||
| 51 | case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; | ||
| 52 | case CEPH_MSG_MDS_MAP: return "mds_map"; | ||
| 53 | case CEPH_MSG_CLIENT_SESSION: return "client_session"; | ||
| 54 | case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; | ||
| 55 | case CEPH_MSG_CLIENT_REQUEST: return "client_request"; | ||
| 56 | case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; | ||
| 57 | case CEPH_MSG_CLIENT_REPLY: return "client_reply"; | ||
| 58 | case CEPH_MSG_CLIENT_CAPS: return "client_caps"; | ||
| 59 | case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; | ||
| 60 | case CEPH_MSG_CLIENT_SNAP: return "client_snap"; | ||
| 61 | case CEPH_MSG_CLIENT_LEASE: return "client_lease"; | ||
| 62 | case CEPH_MSG_OSD_MAP: return "osd_map"; | ||
| 63 | case CEPH_MSG_OSD_OP: return "osd_op"; | ||
| 64 | case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; | ||
| 65 | default: return "unknown"; | ||
| 66 | } | ||
| 67 | } | ||
| 68 | EXPORT_SYMBOL(ceph_msg_type_name); | ||
| 69 | |||
| 70 | /* | ||
| 71 | * Initially learn our fsid, or verify an fsid matches. | ||
| 72 | */ | ||
| 73 | int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | ||
| 74 | { | ||
| 75 | if (client->have_fsid) { | ||
| 76 | if (ceph_fsid_compare(&client->fsid, fsid)) { | ||
| 77 | pr_err("bad fsid, had %pU got %pU", | ||
| 78 | &client->fsid, fsid); | ||
| 79 | return -1; | ||
| 80 | } | ||
| 81 | } else { | ||
| 82 | pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); | ||
| 83 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | ||
| 84 | ceph_debugfs_client_init(client); | ||
| 85 | client->have_fsid = true; | ||
| 86 | } | ||
| 87 | return 0; | ||
| 88 | } | ||
| 89 | EXPORT_SYMBOL(ceph_check_fsid); | ||
| 90 | |||
| 91 | static int strcmp_null(const char *s1, const char *s2) | ||
| 92 | { | ||
| 93 | if (!s1 && !s2) | ||
| 94 | return 0; | ||
| 95 | if (s1 && !s2) | ||
| 96 | return -1; | ||
| 97 | if (!s1 && s2) | ||
| 98 | return 1; | ||
| 99 | return strcmp(s1, s2); | ||
| 100 | } | ||
| 101 | |||
| 102 | int ceph_compare_options(struct ceph_options *new_opt, | ||
| 103 | struct ceph_client *client) | ||
| 104 | { | ||
| 105 | struct ceph_options *opt1 = new_opt; | ||
| 106 | struct ceph_options *opt2 = client->options; | ||
| 107 | int ofs = offsetof(struct ceph_options, mon_addr); | ||
| 108 | int i; | ||
| 109 | int ret; | ||
| 110 | |||
| 111 | ret = memcmp(opt1, opt2, ofs); | ||
| 112 | if (ret) | ||
| 113 | return ret; | ||
| 114 | |||
| 115 | ret = strcmp_null(opt1->name, opt2->name); | ||
| 116 | if (ret) | ||
| 117 | return ret; | ||
| 118 | |||
| 119 | ret = strcmp_null(opt1->secret, opt2->secret); | ||
| 120 | if (ret) | ||
| 121 | return ret; | ||
| 122 | |||
| 123 | /* any matching mon ip implies a match */ | ||
| 124 | for (i = 0; i < opt1->num_mon; i++) { | ||
| 125 | if (ceph_monmap_contains(client->monc.monmap, | ||
| 126 | &opt1->mon_addr[i])) | ||
| 127 | return 0; | ||
| 128 | } | ||
| 129 | return -1; | ||
| 130 | } | ||
| 131 | EXPORT_SYMBOL(ceph_compare_options); | ||
| 132 | |||
| 133 | |||
| 134 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | ||
| 135 | { | ||
| 136 | int i = 0; | ||
| 137 | char tmp[3]; | ||
| 138 | int err = -EINVAL; | ||
| 139 | int d; | ||
| 140 | |||
| 141 | dout("parse_fsid '%s'\n", str); | ||
| 142 | tmp[2] = 0; | ||
| 143 | while (*str && i < 16) { | ||
| 144 | if (ispunct(*str)) { | ||
| 145 | str++; | ||
| 146 | continue; | ||
| 147 | } | ||
| 148 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | ||
| 149 | break; | ||
| 150 | tmp[0] = str[0]; | ||
| 151 | tmp[1] = str[1]; | ||
| 152 | if (sscanf(tmp, "%x", &d) < 1) | ||
| 153 | break; | ||
| 154 | fsid->fsid[i] = d & 0xff; | ||
| 155 | i++; | ||
| 156 | str += 2; | ||
| 157 | } | ||
| 158 | |||
| 159 | if (i == 16) | ||
| 160 | err = 0; | ||
| 161 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | ||
| 162 | return err; | ||
| 163 | } | ||
| 164 | |||
| 165 | /* | ||
| 166 | * ceph options | ||
| 167 | */ | ||
| 168 | enum { | ||
| 169 | Opt_osdtimeout, | ||
| 170 | Opt_osdkeepalivetimeout, | ||
| 171 | Opt_mount_timeout, | ||
| 172 | Opt_osd_idle_ttl, | ||
| 173 | Opt_last_int, | ||
| 174 | /* int args above */ | ||
| 175 | Opt_fsid, | ||
| 176 | Opt_name, | ||
| 177 | Opt_secret, | ||
| 178 | Opt_ip, | ||
| 179 | Opt_last_string, | ||
| 180 | /* string args above */ | ||
| 181 | Opt_noshare, | ||
| 182 | Opt_nocrc, | ||
| 183 | }; | ||
| 184 | |||
| 185 | static match_table_t opt_tokens = { | ||
| 186 | {Opt_osdtimeout, "osdtimeout=%d"}, | ||
| 187 | {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, | ||
| 188 | {Opt_mount_timeout, "mount_timeout=%d"}, | ||
| 189 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | ||
| 190 | /* int args above */ | ||
| 191 | {Opt_fsid, "fsid=%s"}, | ||
| 192 | {Opt_name, "name=%s"}, | ||
| 193 | {Opt_secret, "secret=%s"}, | ||
| 194 | {Opt_ip, "ip=%s"}, | ||
| 195 | /* string args above */ | ||
| 196 | {Opt_noshare, "noshare"}, | ||
| 197 | {Opt_nocrc, "nocrc"}, | ||
| 198 | {-1, NULL} | ||
| 199 | }; | ||
| 200 | |||
| 201 | void ceph_destroy_options(struct ceph_options *opt) | ||
| 202 | { | ||
| 203 | dout("destroy_options %p\n", opt); | ||
| 204 | kfree(opt->name); | ||
| 205 | kfree(opt->secret); | ||
| 206 | kfree(opt); | ||
| 207 | } | ||
| 208 | EXPORT_SYMBOL(ceph_destroy_options); | ||
| 209 | |||
| 210 | int ceph_parse_options(struct ceph_options **popt, char *options, | ||
| 211 | const char *dev_name, const char *dev_name_end, | ||
| 212 | int (*parse_extra_token)(char *c, void *private), | ||
| 213 | void *private) | ||
| 214 | { | ||
| 215 | struct ceph_options *opt; | ||
| 216 | const char *c; | ||
| 217 | int err = -ENOMEM; | ||
| 218 | substring_t argstr[MAX_OPT_ARGS]; | ||
| 219 | |||
| 220 | opt = kzalloc(sizeof(*opt), GFP_KERNEL); | ||
| 221 | if (!opt) | ||
| 222 | return err; | ||
| 223 | opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), | ||
| 224 | GFP_KERNEL); | ||
| 225 | if (!opt->mon_addr) | ||
| 226 | goto out; | ||
| 227 | |||
| 228 | dout("parse_options %p options '%s' dev_name '%s'\n", opt, options, | ||
| 229 | dev_name); | ||
| 230 | |||
| 231 | /* start with defaults */ | ||
| 232 | opt->flags = CEPH_OPT_DEFAULT; | ||
| 233 | opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; | ||
| 234 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | ||
| 235 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | ||
| 236 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ | ||
| 237 | |||
| 238 | /* get mon ip(s) */ | ||
| 239 | /* ip1[:port1][,ip2[:port2]...] */ | ||
| 240 | err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr, | ||
| 241 | CEPH_MAX_MON, &opt->num_mon); | ||
| 242 | if (err < 0) | ||
| 243 | goto out; | ||
| 244 | |||
| 245 | /* parse mount options */ | ||
| 246 | while ((c = strsep(&options, ",")) != NULL) { | ||
| 247 | int token, intval, ret; | ||
| 248 | if (!*c) | ||
| 249 | continue; | ||
| 250 | err = -EINVAL; | ||
| 251 | token = match_token((char *)c, opt_tokens, argstr); | ||
| 252 | if (token < 0 && parse_extra_token) { | ||
| 253 | /* extra? */ | ||
| 254 | err = parse_extra_token((char *)c, private); | ||
| 255 | if (err < 0) { | ||
| 256 | pr_err("bad option at '%s'\n", c); | ||
| 257 | goto out; | ||
| 258 | } | ||
| 259 | continue; | ||
| 260 | } | ||
| 261 | if (token < Opt_last_int) { | ||
| 262 | ret = match_int(&argstr[0], &intval); | ||
| 263 | if (ret < 0) { | ||
| 264 | pr_err("bad mount option arg (not int) " | ||
| 265 | "at '%s'\n", c); | ||
| 266 | continue; | ||
| 267 | } | ||
| 268 | dout("got int token %d val %d\n", token, intval); | ||
| 269 | } else if (token > Opt_last_int && token < Opt_last_string) { | ||
| 270 | dout("got string token %d val %s\n", token, | ||
| 271 | argstr[0].from); | ||
| 272 | } else { | ||
| 273 | dout("got token %d\n", token); | ||
| 274 | } | ||
| 275 | switch (token) { | ||
| 276 | case Opt_ip: | ||
| 277 | err = ceph_parse_ips(argstr[0].from, | ||
| 278 | argstr[0].to, | ||
| 279 | &opt->my_addr, | ||
| 280 | 1, NULL); | ||
| 281 | if (err < 0) | ||
| 282 | goto out; | ||
| 283 | opt->flags |= CEPH_OPT_MYIP; | ||
| 284 | break; | ||
| 285 | |||
| 286 | case Opt_fsid: | ||
| 287 | err = parse_fsid(argstr[0].from, &opt->fsid); | ||
| 288 | if (err == 0) | ||
| 289 | opt->flags |= CEPH_OPT_FSID; | ||
| 290 | break; | ||
| 291 | case Opt_name: | ||
| 292 | opt->name = kstrndup(argstr[0].from, | ||
| 293 | argstr[0].to-argstr[0].from, | ||
| 294 | GFP_KERNEL); | ||
| 295 | break; | ||
| 296 | case Opt_secret: | ||
| 297 | opt->secret = kstrndup(argstr[0].from, | ||
| 298 | argstr[0].to-argstr[0].from, | ||
| 299 | GFP_KERNEL); | ||
| 300 | break; | ||
| 301 | |||
| 302 | /* misc */ | ||
| 303 | case Opt_osdtimeout: | ||
| 304 | opt->osd_timeout = intval; | ||
| 305 | break; | ||
| 306 | case Opt_osdkeepalivetimeout: | ||
| 307 | opt->osd_keepalive_timeout = intval; | ||
| 308 | break; | ||
| 309 | case Opt_osd_idle_ttl: | ||
| 310 | opt->osd_idle_ttl = intval; | ||
| 311 | break; | ||
| 312 | case Opt_mount_timeout: | ||
| 313 | opt->mount_timeout = intval; | ||
| 314 | break; | ||
| 315 | |||
| 316 | case Opt_noshare: | ||
| 317 | opt->flags |= CEPH_OPT_NOSHARE; | ||
| 318 | break; | ||
| 319 | |||
| 320 | case Opt_nocrc: | ||
| 321 | opt->flags |= CEPH_OPT_NOCRC; | ||
| 322 | break; | ||
| 323 | |||
| 324 | default: | ||
| 325 | BUG_ON(token); | ||
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 329 | /* success */ | ||
| 330 | *popt = opt; | ||
| 331 | return 0; | ||
| 332 | |||
| 333 | out: | ||
| 334 | ceph_destroy_options(opt); | ||
| 335 | return err; | ||
| 336 | } | ||
| 337 | EXPORT_SYMBOL(ceph_parse_options); | ||
| 338 | |||
| 339 | u64 ceph_client_id(struct ceph_client *client) | ||
| 340 | { | ||
| 341 | return client->monc.auth->global_id; | ||
| 342 | } | ||
| 343 | EXPORT_SYMBOL(ceph_client_id); | ||
| 344 | |||
| 345 | /* | ||
| 346 | * create a fresh client instance | ||
| 347 | */ | ||
| 348 | struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) | ||
| 349 | { | ||
| 350 | struct ceph_client *client; | ||
| 351 | int err = -ENOMEM; | ||
| 352 | |||
| 353 | client = kzalloc(sizeof(*client), GFP_KERNEL); | ||
| 354 | if (client == NULL) | ||
| 355 | return ERR_PTR(-ENOMEM); | ||
| 356 | |||
| 357 | client->private = private; | ||
| 358 | client->options = opt; | ||
| 359 | |||
| 360 | mutex_init(&client->mount_mutex); | ||
| 361 | init_waitqueue_head(&client->auth_wq); | ||
| 362 | client->auth_err = 0; | ||
| 363 | |||
| 364 | client->extra_mon_dispatch = NULL; | ||
| 365 | client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT; | ||
| 366 | client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT; | ||
| 367 | |||
| 368 | client->msgr = NULL; | ||
| 369 | |||
| 370 | /* subsystems */ | ||
| 371 | err = ceph_monc_init(&client->monc, client); | ||
| 372 | if (err < 0) | ||
| 373 | goto fail; | ||
| 374 | err = ceph_osdc_init(&client->osdc, client); | ||
| 375 | if (err < 0) | ||
| 376 | goto fail_monc; | ||
| 377 | |||
| 378 | return client; | ||
| 379 | |||
| 380 | fail_monc: | ||
| 381 | ceph_monc_stop(&client->monc); | ||
| 382 | fail: | ||
| 383 | kfree(client); | ||
| 384 | return ERR_PTR(err); | ||
| 385 | } | ||
| 386 | EXPORT_SYMBOL(ceph_create_client); | ||
| 387 | |||
| 388 | void ceph_destroy_client(struct ceph_client *client) | ||
| 389 | { | ||
| 390 | dout("destroy_client %p\n", client); | ||
| 391 | |||
| 392 | /* unmount */ | ||
| 393 | ceph_osdc_stop(&client->osdc); | ||
| 394 | |||
| 395 | /* | ||
| 396 | * make sure mds and osd connections close out before destroying | ||
| 397 | * the auth module, which is needed to free those connections' | ||
| 398 | * ceph_authorizers. | ||
| 399 | */ | ||
| 400 | ceph_msgr_flush(); | ||
| 401 | |||
| 402 | ceph_monc_stop(&client->monc); | ||
| 403 | |||
| 404 | ceph_debugfs_client_cleanup(client); | ||
| 405 | |||
| 406 | if (client->msgr) | ||
| 407 | ceph_messenger_destroy(client->msgr); | ||
| 408 | |||
| 409 | ceph_destroy_options(client->options); | ||
| 410 | |||
| 411 | kfree(client); | ||
| 412 | dout("destroy_client %p done\n", client); | ||
| 413 | } | ||
| 414 | EXPORT_SYMBOL(ceph_destroy_client); | ||
| 415 | |||
| 416 | /* | ||
| 417 | * true if we have the mon map (and have thus joined the cluster) | ||
| 418 | */ | ||
| 419 | static int have_mon_and_osd_map(struct ceph_client *client) | ||
| 420 | { | ||
| 421 | return client->monc.monmap && client->monc.monmap->epoch && | ||
| 422 | client->osdc.osdmap && client->osdc.osdmap->epoch; | ||
| 423 | } | ||
| 424 | |||
| 425 | /* | ||
| 426 | * mount: join the ceph cluster, and open root directory. | ||
| 427 | */ | ||
| 428 | int __ceph_open_session(struct ceph_client *client, unsigned long started) | ||
| 429 | { | ||
| 430 | struct ceph_entity_addr *myaddr = NULL; | ||
| 431 | int err; | ||
| 432 | unsigned long timeout = client->options->mount_timeout * HZ; | ||
| 433 | |||
| 434 | /* initialize the messenger */ | ||
| 435 | if (client->msgr == NULL) { | ||
| 436 | if (ceph_test_opt(client, MYIP)) | ||
| 437 | myaddr = &client->options->my_addr; | ||
| 438 | client->msgr = ceph_messenger_create(myaddr, | ||
| 439 | client->supported_features, | ||
| 440 | client->required_features); | ||
| 441 | if (IS_ERR(client->msgr)) { | ||
| 442 | client->msgr = NULL; | ||
| 443 | return PTR_ERR(client->msgr); | ||
| 444 | } | ||
| 445 | client->msgr->nocrc = ceph_test_opt(client, NOCRC); | ||
| 446 | } | ||
| 447 | |||
| 448 | /* open session, and wait for mon and osd maps */ | ||
| 449 | err = ceph_monc_open_session(&client->monc); | ||
| 450 | if (err < 0) | ||
| 451 | return err; | ||
| 452 | |||
| 453 | while (!have_mon_and_osd_map(client)) { | ||
| 454 | err = -EIO; | ||
| 455 | if (timeout && time_after_eq(jiffies, started + timeout)) | ||
| 456 | return err; | ||
| 457 | |||
| 458 | /* wait */ | ||
| 459 | dout("mount waiting for mon_map\n"); | ||
| 460 | err = wait_event_interruptible_timeout(client->auth_wq, | ||
| 461 | have_mon_and_osd_map(client) || (client->auth_err < 0), | ||
| 462 | timeout); | ||
| 463 | if (err == -EINTR || err == -ERESTARTSYS) | ||
| 464 | return err; | ||
| 465 | if (client->auth_err < 0) | ||
| 466 | return client->auth_err; | ||
| 467 | } | ||
| 468 | |||
| 469 | return 0; | ||
| 470 | } | ||
| 471 | EXPORT_SYMBOL(__ceph_open_session); | ||
| 472 | |||
| 473 | |||
| 474 | int ceph_open_session(struct ceph_client *client) | ||
| 475 | { | ||
| 476 | int ret; | ||
| 477 | unsigned long started = jiffies; /* note the start time */ | ||
| 478 | |||
| 479 | dout("open_session start\n"); | ||
| 480 | mutex_lock(&client->mount_mutex); | ||
| 481 | |||
| 482 | ret = __ceph_open_session(client, started); | ||
| 483 | |||
| 484 | mutex_unlock(&client->mount_mutex); | ||
| 485 | return ret; | ||
| 486 | } | ||
| 487 | EXPORT_SYMBOL(ceph_open_session); | ||
| 488 | |||
| 489 | |||
| 490 | static int __init init_ceph_lib(void) | ||
| 491 | { | ||
| 492 | int ret = 0; | ||
| 493 | |||
| 494 | ret = ceph_debugfs_init(); | ||
| 495 | if (ret < 0) | ||
| 496 | goto out; | ||
| 497 | |||
| 498 | ret = ceph_msgr_init(); | ||
| 499 | if (ret < 0) | ||
| 500 | goto out_debugfs; | ||
| 501 | |||
| 502 | pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", | ||
| 503 | CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, | ||
| 504 | CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, | ||
| 505 | CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); | ||
| 506 | |||
| 507 | return 0; | ||
| 508 | |||
| 509 | out_debugfs: | ||
| 510 | ceph_debugfs_cleanup(); | ||
| 511 | out: | ||
| 512 | return ret; | ||
| 513 | } | ||
| 514 | |||
| 515 | static void __exit exit_ceph_lib(void) | ||
| 516 | { | ||
| 517 | dout("exit_ceph_lib\n"); | ||
| 518 | ceph_msgr_exit(); | ||
| 519 | ceph_debugfs_cleanup(); | ||
| 520 | } | ||
| 521 | |||
| 522 | module_init(init_ceph_lib); | ||
| 523 | module_exit(exit_ceph_lib); | ||
| 524 | |||
| 525 | MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); | ||
| 526 | MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); | ||
| 527 | MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); | ||
| 528 | MODULE_DESCRIPTION("Ceph filesystem for Linux"); | ||
| 529 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/ceph/ceph_fs.c b/net/ceph/ceph_fs.c index 3ac6cc7c1156..a3a3a31d3c37 100644 --- a/fs/ceph/ceph_fs.c +++ b/net/ceph/ceph_fs.c | |||
| @@ -1,7 +1,8 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Some non-inline ceph helpers | 2 | * Some non-inline ceph helpers |
| 3 | */ | 3 | */ |
| 4 | #include "types.h" | 4 | #include <linux/module.h> |
| 5 | #include <linux/ceph/types.h> | ||
| 5 | 6 | ||
| 6 | /* | 7 | /* |
| 7 | * return true if @layout appears to be valid | 8 | * return true if @layout appears to be valid |
| @@ -52,6 +53,7 @@ int ceph_flags_to_mode(int flags) | |||
| 52 | 53 | ||
| 53 | return mode; | 54 | return mode; |
| 54 | } | 55 | } |
| 56 | EXPORT_SYMBOL(ceph_flags_to_mode); | ||
| 55 | 57 | ||
| 56 | int ceph_caps_for_mode(int mode) | 58 | int ceph_caps_for_mode(int mode) |
| 57 | { | 59 | { |
| @@ -70,3 +72,4 @@ int ceph_caps_for_mode(int mode) | |||
| 70 | 72 | ||
| 71 | return caps; | 73 | return caps; |
| 72 | } | 74 | } |
| 75 | EXPORT_SYMBOL(ceph_caps_for_mode); | ||
diff --git a/fs/ceph/ceph_hash.c b/net/ceph/ceph_hash.c index bd570015d147..815ef8826796 100644 --- a/fs/ceph/ceph_hash.c +++ b/net/ceph/ceph_hash.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | 1 | ||
| 2 | #include "types.h" | 2 | #include <linux/ceph/types.h> |
| 3 | 3 | ||
| 4 | /* | 4 | /* |
| 5 | * Robert Jenkin's hash function. | 5 | * Robert Jenkin's hash function. |
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c new file mode 100644 index 000000000000..3fbda04de29c --- /dev/null +++ b/net/ceph/ceph_strings.c | |||
| @@ -0,0 +1,84 @@ | |||
| 1 | /* | ||
| 2 | * Ceph string constants | ||
| 3 | */ | ||
| 4 | #include <linux/module.h> | ||
| 5 | #include <linux/ceph/types.h> | ||
| 6 | |||
| 7 | const char *ceph_entity_type_name(int type) | ||
| 8 | { | ||
| 9 | switch (type) { | ||
| 10 | case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||
| 11 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||
| 12 | case CEPH_ENTITY_TYPE_MON: return "mon"; | ||
| 13 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||
| 14 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; | ||
| 15 | default: return "unknown"; | ||
| 16 | } | ||
| 17 | } | ||
| 18 | |||
| 19 | const char *ceph_osd_op_name(int op) | ||
| 20 | { | ||
| 21 | switch (op) { | ||
| 22 | case CEPH_OSD_OP_READ: return "read"; | ||
| 23 | case CEPH_OSD_OP_STAT: return "stat"; | ||
| 24 | |||
| 25 | case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; | ||
| 26 | |||
| 27 | case CEPH_OSD_OP_WRITE: return "write"; | ||
| 28 | case CEPH_OSD_OP_DELETE: return "delete"; | ||
| 29 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | ||
| 30 | case CEPH_OSD_OP_ZERO: return "zero"; | ||
| 31 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | ||
| 32 | case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||
| 33 | |||
| 34 | case CEPH_OSD_OP_APPEND: return "append"; | ||
| 35 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | ||
| 36 | case CEPH_OSD_OP_SETTRUNC: return "settrunc"; | ||
| 37 | case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; | ||
| 38 | |||
| 39 | case CEPH_OSD_OP_TMAPUP: return "tmapup"; | ||
| 40 | case CEPH_OSD_OP_TMAPGET: return "tmapget"; | ||
| 41 | case CEPH_OSD_OP_TMAPPUT: return "tmapput"; | ||
| 42 | |||
| 43 | case CEPH_OSD_OP_GETXATTR: return "getxattr"; | ||
| 44 | case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; | ||
| 45 | case CEPH_OSD_OP_SETXATTR: return "setxattr"; | ||
| 46 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | ||
| 47 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | ||
| 48 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | ||
| 49 | case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||
| 50 | |||
| 51 | case CEPH_OSD_OP_PULL: return "pull"; | ||
| 52 | case CEPH_OSD_OP_PUSH: return "push"; | ||
| 53 | case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; | ||
| 54 | case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; | ||
| 55 | case CEPH_OSD_OP_SCRUB: return "scrub"; | ||
| 56 | |||
| 57 | case CEPH_OSD_OP_WRLOCK: return "wrlock"; | ||
| 58 | case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; | ||
| 59 | case CEPH_OSD_OP_RDLOCK: return "rdlock"; | ||
| 60 | case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; | ||
| 61 | case CEPH_OSD_OP_UPLOCK: return "uplock"; | ||
| 62 | case CEPH_OSD_OP_DNLOCK: return "dnlock"; | ||
| 63 | |||
| 64 | case CEPH_OSD_OP_CALL: return "call"; | ||
| 65 | |||
| 66 | case CEPH_OSD_OP_PGLS: return "pgls"; | ||
| 67 | } | ||
| 68 | return "???"; | ||
| 69 | } | ||
| 70 | |||
| 71 | |||
| 72 | const char *ceph_pool_op_name(int op) | ||
| 73 | { | ||
| 74 | switch (op) { | ||
| 75 | case POOL_OP_CREATE: return "create"; | ||
| 76 | case POOL_OP_DELETE: return "delete"; | ||
| 77 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
| 78 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
| 79 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
| 80 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
| 81 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
| 82 | } | ||
| 83 | return "???"; | ||
| 84 | } | ||
diff --git a/fs/ceph/crush/crush.c b/net/ceph/crush/crush.c index fabd302e5779..d6ebb13a18a4 100644 --- a/fs/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | # define BUG_ON(x) assert(!(x)) | 8 | # define BUG_ON(x) assert(!(x)) |
| 9 | #endif | 9 | #endif |
| 10 | 10 | ||
| 11 | #include "crush.h" | 11 | #include <linux/crush/crush.h> |
| 12 | 12 | ||
| 13 | const char *crush_bucket_alg_name(int alg) | 13 | const char *crush_bucket_alg_name(int alg) |
| 14 | { | 14 | { |
diff --git a/fs/ceph/crush/hash.c b/net/ceph/crush/hash.c index 5873aed694bf..5bb63e37a8a1 100644 --- a/fs/ceph/crush/hash.c +++ b/net/ceph/crush/hash.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | 1 | ||
| 2 | #include <linux/types.h> | 2 | #include <linux/types.h> |
| 3 | #include "hash.h" | 3 | #include <linux/crush/hash.h> |
| 4 | 4 | ||
| 5 | /* | 5 | /* |
| 6 | * Robert Jenkins' function for mixing 32-bit values | 6 | * Robert Jenkins' function for mixing 32-bit values |
diff --git a/fs/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index a4eec133258e..42599e31dcad 100644 --- a/fs/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c | |||
| @@ -18,8 +18,8 @@ | |||
| 18 | # define kfree(x) free(x) | 18 | # define kfree(x) free(x) |
| 19 | #endif | 19 | #endif |
| 20 | 20 | ||
| 21 | #include "crush.h" | 21 | #include <linux/crush/crush.h> |
| 22 | #include "hash.h" | 22 | #include <linux/crush/hash.h> |
| 23 | 23 | ||
| 24 | /* | 24 | /* |
| 25 | * Implement the core CRUSH mapping algorithm. | 25 | * Implement the core CRUSH mapping algorithm. |
diff --git a/fs/ceph/crypto.c b/net/ceph/crypto.c index a3e627f63293..7b505b0c983f 100644 --- a/fs/ceph/crypto.c +++ b/net/ceph/crypto.c | |||
| @@ -1,13 +1,13 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| 5 | #include <linux/scatterlist.h> | 5 | #include <linux/scatterlist.h> |
| 6 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
| 7 | #include <crypto/hash.h> | 7 | #include <crypto/hash.h> |
| 8 | 8 | ||
| 9 | #include <linux/ceph/decode.h> | ||
| 9 | #include "crypto.h" | 10 | #include "crypto.h" |
| 10 | #include "decode.h" | ||
| 11 | 11 | ||
| 12 | int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) | 12 | int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) |
| 13 | { | 13 | { |
diff --git a/fs/ceph/crypto.h b/net/ceph/crypto.h index bdf38607323c..f9eccace592b 100644 --- a/fs/ceph/crypto.h +++ b/net/ceph/crypto.h | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | #ifndef _FS_CEPH_CRYPTO_H | 1 | #ifndef _FS_CEPH_CRYPTO_H |
| 2 | #define _FS_CEPH_CRYPTO_H | 2 | #define _FS_CEPH_CRYPTO_H |
| 3 | 3 | ||
| 4 | #include "types.h" | 4 | #include <linux/ceph/types.h> |
| 5 | #include "buffer.h" | 5 | #include <linux/ceph/buffer.h> |
| 6 | 6 | ||
| 7 | /* | 7 | /* |
| 8 | * cryptographic secret | 8 | * cryptographic secret |
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c new file mode 100644 index 000000000000..27d4ea315d12 --- /dev/null +++ b/net/ceph/debugfs.c | |||
| @@ -0,0 +1,267 @@ | |||
| 1 | #include <linux/ceph/ceph_debug.h> | ||
| 2 | |||
| 3 | #include <linux/device.h> | ||
| 4 | #include <linux/slab.h> | ||
| 5 | #include <linux/module.h> | ||
| 6 | #include <linux/ctype.h> | ||
| 7 | #include <linux/debugfs.h> | ||
| 8 | #include <linux/seq_file.h> | ||
| 9 | |||
| 10 | #include <linux/ceph/libceph.h> | ||
| 11 | #include <linux/ceph/mon_client.h> | ||
| 12 | #include <linux/ceph/auth.h> | ||
| 13 | #include <linux/ceph/debugfs.h> | ||
| 14 | |||
| 15 | #ifdef CONFIG_DEBUG_FS | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Implement /sys/kernel/debug/ceph fun | ||
| 19 | * | ||
| 20 | * /sys/kernel/debug/ceph/client* - an instance of the ceph client | ||
| 21 | * .../osdmap - current osdmap | ||
| 22 | * .../monmap - current monmap | ||
| 23 | * .../osdc - active osd requests | ||
| 24 | * .../monc - mon client state | ||
| 25 | * .../dentry_lru - dump contents of dentry lru | ||
| 26 | * .../caps - expose cap (reservation) stats | ||
| 27 | * .../bdi - symlink to ../../bdi/something | ||
| 28 | */ | ||
| 29 | |||
| 30 | static struct dentry *ceph_debugfs_dir; | ||
| 31 | |||
| 32 | static int monmap_show(struct seq_file *s, void *p) | ||
| 33 | { | ||
| 34 | int i; | ||
| 35 | struct ceph_client *client = s->private; | ||
| 36 | |||
| 37 | if (client->monc.monmap == NULL) | ||
| 38 | return 0; | ||
| 39 | |||
| 40 | seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); | ||
| 41 | for (i = 0; i < client->monc.monmap->num_mon; i++) { | ||
| 42 | struct ceph_entity_inst *inst = | ||
| 43 | &client->monc.monmap->mon_inst[i]; | ||
| 44 | |||
| 45 | seq_printf(s, "\t%s%lld\t%s\n", | ||
| 46 | ENTITY_NAME(inst->name), | ||
| 47 | ceph_pr_addr(&inst->addr.in_addr)); | ||
| 48 | } | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | |||
| 52 | static int osdmap_show(struct seq_file *s, void *p) | ||
| 53 | { | ||
| 54 | int i; | ||
| 55 | struct ceph_client *client = s->private; | ||
| 56 | struct rb_node *n; | ||
| 57 | |||
| 58 | if (client->osdc.osdmap == NULL) | ||
| 59 | return 0; | ||
| 60 | seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); | ||
| 61 | seq_printf(s, "flags%s%s\n", | ||
| 62 | (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? | ||
| 63 | " NEARFULL" : "", | ||
| 64 | (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? | ||
| 65 | " FULL" : ""); | ||
| 66 | for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { | ||
| 67 | struct ceph_pg_pool_info *pool = | ||
| 68 | rb_entry(n, struct ceph_pg_pool_info, node); | ||
| 69 | seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", | ||
| 70 | pool->id, pool->v.pg_num, pool->pg_num_mask, | ||
| 71 | pool->v.lpg_num, pool->lpg_num_mask); | ||
| 72 | } | ||
| 73 | for (i = 0; i < client->osdc.osdmap->max_osd; i++) { | ||
| 74 | struct ceph_entity_addr *addr = | ||
| 75 | &client->osdc.osdmap->osd_addr[i]; | ||
| 76 | int state = client->osdc.osdmap->osd_state[i]; | ||
| 77 | char sb[64]; | ||
| 78 | |||
| 79 | seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", | ||
| 80 | i, ceph_pr_addr(&addr->in_addr), | ||
| 81 | ((client->osdc.osdmap->osd_weight[i]*100) >> 16), | ||
| 82 | ceph_osdmap_state_str(sb, sizeof(sb), state)); | ||
| 83 | } | ||
| 84 | return 0; | ||
| 85 | } | ||
| 86 | |||
| 87 | static int monc_show(struct seq_file *s, void *p) | ||
| 88 | { | ||
| 89 | struct ceph_client *client = s->private; | ||
| 90 | struct ceph_mon_generic_request *req; | ||
| 91 | struct ceph_mon_client *monc = &client->monc; | ||
| 92 | struct rb_node *rp; | ||
| 93 | |||
| 94 | mutex_lock(&monc->mutex); | ||
| 95 | |||
| 96 | if (monc->have_mdsmap) | ||
| 97 | seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); | ||
| 98 | if (monc->have_osdmap) | ||
| 99 | seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); | ||
| 100 | if (monc->want_next_osdmap) | ||
| 101 | seq_printf(s, "want next osdmap\n"); | ||
| 102 | |||
| 103 | for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { | ||
| 104 | __u16 op; | ||
| 105 | req = rb_entry(rp, struct ceph_mon_generic_request, node); | ||
| 106 | op = le16_to_cpu(req->request->hdr.type); | ||
| 107 | if (op == CEPH_MSG_STATFS) | ||
| 108 | seq_printf(s, "%lld statfs\n", req->tid); | ||
| 109 | else | ||
| 110 | seq_printf(s, "%lld unknown\n", req->tid); | ||
| 111 | } | ||
| 112 | |||
| 113 | mutex_unlock(&monc->mutex); | ||
| 114 | return 0; | ||
| 115 | } | ||
| 116 | |||
| 117 | static int osdc_show(struct seq_file *s, void *pp) | ||
| 118 | { | ||
| 119 | struct ceph_client *client = s->private; | ||
| 120 | struct ceph_osd_client *osdc = &client->osdc; | ||
| 121 | struct rb_node *p; | ||
| 122 | |||
| 123 | mutex_lock(&osdc->request_mutex); | ||
| 124 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | ||
| 125 | struct ceph_osd_request *req; | ||
| 126 | struct ceph_osd_request_head *head; | ||
| 127 | struct ceph_osd_op *op; | ||
| 128 | int num_ops; | ||
| 129 | int opcode, olen; | ||
| 130 | int i; | ||
| 131 | |||
| 132 | req = rb_entry(p, struct ceph_osd_request, r_node); | ||
| 133 | |||
| 134 | seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, | ||
| 135 | req->r_osd ? req->r_osd->o_osd : -1, | ||
| 136 | le32_to_cpu(req->r_pgid.pool), | ||
| 137 | le16_to_cpu(req->r_pgid.ps)); | ||
| 138 | |||
| 139 | head = req->r_request->front.iov_base; | ||
| 140 | op = (void *)(head + 1); | ||
| 141 | |||
| 142 | num_ops = le16_to_cpu(head->num_ops); | ||
| 143 | olen = le32_to_cpu(head->object_len); | ||
| 144 | seq_printf(s, "%.*s", olen, | ||
| 145 | (const char *)(head->ops + num_ops)); | ||
| 146 | |||
| 147 | if (req->r_reassert_version.epoch) | ||
| 148 | seq_printf(s, "\t%u'%llu", | ||
| 149 | (unsigned)le32_to_cpu(req->r_reassert_version.epoch), | ||
| 150 | le64_to_cpu(req->r_reassert_version.version)); | ||
| 151 | else | ||
| 152 | seq_printf(s, "\t"); | ||
| 153 | |||
| 154 | for (i = 0; i < num_ops; i++) { | ||
| 155 | opcode = le16_to_cpu(op->op); | ||
| 156 | seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); | ||
| 157 | op++; | ||
| 158 | } | ||
| 159 | |||
| 160 | seq_printf(s, "\n"); | ||
| 161 | } | ||
| 162 | mutex_unlock(&osdc->request_mutex); | ||
| 163 | return 0; | ||
| 164 | } | ||
| 165 | |||
| 166 | CEPH_DEFINE_SHOW_FUNC(monmap_show) | ||
| 167 | CEPH_DEFINE_SHOW_FUNC(osdmap_show) | ||
| 168 | CEPH_DEFINE_SHOW_FUNC(monc_show) | ||
| 169 | CEPH_DEFINE_SHOW_FUNC(osdc_show) | ||
| 170 | |||
| 171 | int ceph_debugfs_init(void) | ||
| 172 | { | ||
| 173 | ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); | ||
| 174 | if (!ceph_debugfs_dir) | ||
| 175 | return -ENOMEM; | ||
| 176 | return 0; | ||
| 177 | } | ||
| 178 | |||
| 179 | void ceph_debugfs_cleanup(void) | ||
| 180 | { | ||
| 181 | debugfs_remove(ceph_debugfs_dir); | ||
| 182 | } | ||
| 183 | |||
| 184 | int ceph_debugfs_client_init(struct ceph_client *client) | ||
| 185 | { | ||
| 186 | int ret = -ENOMEM; | ||
| 187 | char name[80]; | ||
| 188 | |||
| 189 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, | ||
| 190 | client->monc.auth->global_id); | ||
| 191 | |||
| 192 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | ||
| 193 | if (!client->debugfs_dir) | ||
| 194 | goto out; | ||
| 195 | |||
| 196 | client->monc.debugfs_file = debugfs_create_file("monc", | ||
| 197 | 0600, | ||
| 198 | client->debugfs_dir, | ||
| 199 | client, | ||
| 200 | &monc_show_fops); | ||
| 201 | if (!client->monc.debugfs_file) | ||
| 202 | goto out; | ||
| 203 | |||
| 204 | client->osdc.debugfs_file = debugfs_create_file("osdc", | ||
| 205 | 0600, | ||
| 206 | client->debugfs_dir, | ||
| 207 | client, | ||
| 208 | &osdc_show_fops); | ||
| 209 | if (!client->osdc.debugfs_file) | ||
| 210 | goto out; | ||
| 211 | |||
| 212 | client->debugfs_monmap = debugfs_create_file("monmap", | ||
| 213 | 0600, | ||
| 214 | client->debugfs_dir, | ||
| 215 | client, | ||
| 216 | &monmap_show_fops); | ||
| 217 | if (!client->debugfs_monmap) | ||
| 218 | goto out; | ||
| 219 | |||
| 220 | client->debugfs_osdmap = debugfs_create_file("osdmap", | ||
| 221 | 0600, | ||
| 222 | client->debugfs_dir, | ||
| 223 | client, | ||
| 224 | &osdmap_show_fops); | ||
| 225 | if (!client->debugfs_osdmap) | ||
| 226 | goto out; | ||
| 227 | |||
| 228 | return 0; | ||
| 229 | |||
| 230 | out: | ||
| 231 | ceph_debugfs_client_cleanup(client); | ||
| 232 | return ret; | ||
| 233 | } | ||
| 234 | |||
| 235 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||
| 236 | { | ||
| 237 | debugfs_remove(client->debugfs_osdmap); | ||
| 238 | debugfs_remove(client->debugfs_monmap); | ||
| 239 | debugfs_remove(client->osdc.debugfs_file); | ||
| 240 | debugfs_remove(client->monc.debugfs_file); | ||
| 241 | debugfs_remove(client->debugfs_dir); | ||
| 242 | } | ||
| 243 | |||
| 244 | #else /* CONFIG_DEBUG_FS */ | ||
| 245 | |||
| 246 | int ceph_debugfs_init(void) | ||
| 247 | { | ||
| 248 | return 0; | ||
| 249 | } | ||
| 250 | |||
| 251 | void ceph_debugfs_cleanup(void) | ||
| 252 | { | ||
| 253 | } | ||
| 254 | |||
| 255 | int ceph_debugfs_client_init(struct ceph_client *client) | ||
| 256 | { | ||
| 257 | return 0; | ||
| 258 | } | ||
| 259 | |||
| 260 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||
| 261 | { | ||
| 262 | } | ||
| 263 | |||
| 264 | #endif /* CONFIG_DEBUG_FS */ | ||
| 265 | |||
| 266 | EXPORT_SYMBOL(ceph_debugfs_init); | ||
| 267 | EXPORT_SYMBOL(ceph_debugfs_cleanup); | ||
diff --git a/fs/ceph/messenger.c b/net/ceph/messenger.c index 2502d76fcec1..0e8157ee5d43 100644 --- a/fs/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/crc32c.h> | 3 | #include <linux/crc32c.h> |
| 4 | #include <linux/ctype.h> | 4 | #include <linux/ctype.h> |
| @@ -9,12 +9,14 @@ | |||
| 9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
| 10 | #include <linux/socket.h> | 10 | #include <linux/socket.h> |
| 11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
| 12 | #include <linux/bio.h> | ||
| 13 | #include <linux/blkdev.h> | ||
| 12 | #include <net/tcp.h> | 14 | #include <net/tcp.h> |
| 13 | 15 | ||
| 14 | #include "super.h" | 16 | #include <linux/ceph/libceph.h> |
| 15 | #include "messenger.h" | 17 | #include <linux/ceph/messenger.h> |
| 16 | #include "decode.h" | 18 | #include <linux/ceph/decode.h> |
| 17 | #include "pagelist.h" | 19 | #include <linux/ceph/pagelist.h> |
| 18 | 20 | ||
| 19 | /* | 21 | /* |
| 20 | * Ceph uses the messenger to exchange ceph_msg messages with other | 22 | * Ceph uses the messenger to exchange ceph_msg messages with other |
| @@ -48,7 +50,7 @@ static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; | |||
| 48 | static DEFINE_SPINLOCK(addr_str_lock); | 50 | static DEFINE_SPINLOCK(addr_str_lock); |
| 49 | static int last_addr_str; | 51 | static int last_addr_str; |
| 50 | 52 | ||
| 51 | const char *pr_addr(const struct sockaddr_storage *ss) | 53 | const char *ceph_pr_addr(const struct sockaddr_storage *ss) |
| 52 | { | 54 | { |
| 53 | int i; | 55 | int i; |
| 54 | char *s; | 56 | char *s; |
| @@ -79,6 +81,7 @@ const char *pr_addr(const struct sockaddr_storage *ss) | |||
| 79 | 81 | ||
| 80 | return s; | 82 | return s; |
| 81 | } | 83 | } |
| 84 | EXPORT_SYMBOL(ceph_pr_addr); | ||
| 82 | 85 | ||
| 83 | static void encode_my_addr(struct ceph_messenger *msgr) | 86 | static void encode_my_addr(struct ceph_messenger *msgr) |
| 84 | { | 87 | { |
| @@ -91,7 +94,7 @@ static void encode_my_addr(struct ceph_messenger *msgr) | |||
| 91 | */ | 94 | */ |
| 92 | struct workqueue_struct *ceph_msgr_wq; | 95 | struct workqueue_struct *ceph_msgr_wq; |
| 93 | 96 | ||
| 94 | int __init ceph_msgr_init(void) | 97 | int ceph_msgr_init(void) |
| 95 | { | 98 | { |
| 96 | ceph_msgr_wq = create_workqueue("ceph-msgr"); | 99 | ceph_msgr_wq = create_workqueue("ceph-msgr"); |
| 97 | if (IS_ERR(ceph_msgr_wq)) { | 100 | if (IS_ERR(ceph_msgr_wq)) { |
| @@ -102,16 +105,19 @@ int __init ceph_msgr_init(void) | |||
| 102 | } | 105 | } |
| 103 | return 0; | 106 | return 0; |
| 104 | } | 107 | } |
| 108 | EXPORT_SYMBOL(ceph_msgr_init); | ||
| 105 | 109 | ||
| 106 | void ceph_msgr_exit(void) | 110 | void ceph_msgr_exit(void) |
| 107 | { | 111 | { |
| 108 | destroy_workqueue(ceph_msgr_wq); | 112 | destroy_workqueue(ceph_msgr_wq); |
| 109 | } | 113 | } |
| 114 | EXPORT_SYMBOL(ceph_msgr_exit); | ||
| 110 | 115 | ||
| 111 | void ceph_msgr_flush(void) | 116 | void ceph_msgr_flush(void) |
| 112 | { | 117 | { |
| 113 | flush_workqueue(ceph_msgr_wq); | 118 | flush_workqueue(ceph_msgr_wq); |
| 114 | } | 119 | } |
| 120 | EXPORT_SYMBOL(ceph_msgr_flush); | ||
| 115 | 121 | ||
| 116 | 122 | ||
| 117 | /* | 123 | /* |
| @@ -221,19 +227,19 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) | |||
| 221 | 227 | ||
| 222 | set_sock_callbacks(sock, con); | 228 | set_sock_callbacks(sock, con); |
| 223 | 229 | ||
| 224 | dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); | 230 | dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); |
| 225 | 231 | ||
| 226 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), | 232 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), |
| 227 | O_NONBLOCK); | 233 | O_NONBLOCK); |
| 228 | if (ret == -EINPROGRESS) { | 234 | if (ret == -EINPROGRESS) { |
| 229 | dout("connect %s EINPROGRESS sk_state = %u\n", | 235 | dout("connect %s EINPROGRESS sk_state = %u\n", |
| 230 | pr_addr(&con->peer_addr.in_addr), | 236 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 231 | sock->sk->sk_state); | 237 | sock->sk->sk_state); |
| 232 | ret = 0; | 238 | ret = 0; |
| 233 | } | 239 | } |
| 234 | if (ret < 0) { | 240 | if (ret < 0) { |
| 235 | pr_err("connect %s error %d\n", | 241 | pr_err("connect %s error %d\n", |
| 236 | pr_addr(&con->peer_addr.in_addr), ret); | 242 | ceph_pr_addr(&con->peer_addr.in_addr), ret); |
| 237 | sock_release(sock); | 243 | sock_release(sock); |
| 238 | con->sock = NULL; | 244 | con->sock = NULL; |
| 239 | con->error_msg = "connect error"; | 245 | con->error_msg = "connect error"; |
| @@ -334,7 +340,8 @@ static void reset_connection(struct ceph_connection *con) | |||
| 334 | */ | 340 | */ |
| 335 | void ceph_con_close(struct ceph_connection *con) | 341 | void ceph_con_close(struct ceph_connection *con) |
| 336 | { | 342 | { |
| 337 | dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr)); | 343 | dout("con_close %p peer %s\n", con, |
| 344 | ceph_pr_addr(&con->peer_addr.in_addr)); | ||
| 338 | set_bit(CLOSED, &con->state); /* in case there's queued work */ | 345 | set_bit(CLOSED, &con->state); /* in case there's queued work */ |
| 339 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ | 346 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ |
| 340 | clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ | 347 | clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ |
| @@ -347,19 +354,21 @@ void ceph_con_close(struct ceph_connection *con) | |||
| 347 | mutex_unlock(&con->mutex); | 354 | mutex_unlock(&con->mutex); |
| 348 | queue_con(con); | 355 | queue_con(con); |
| 349 | } | 356 | } |
| 357 | EXPORT_SYMBOL(ceph_con_close); | ||
| 350 | 358 | ||
| 351 | /* | 359 | /* |
| 352 | * Reopen a closed connection, with a new peer address. | 360 | * Reopen a closed connection, with a new peer address. |
| 353 | */ | 361 | */ |
| 354 | void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) | 362 | void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) |
| 355 | { | 363 | { |
| 356 | dout("con_open %p %s\n", con, pr_addr(&addr->in_addr)); | 364 | dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); |
| 357 | set_bit(OPENING, &con->state); | 365 | set_bit(OPENING, &con->state); |
| 358 | clear_bit(CLOSED, &con->state); | 366 | clear_bit(CLOSED, &con->state); |
| 359 | memcpy(&con->peer_addr, addr, sizeof(*addr)); | 367 | memcpy(&con->peer_addr, addr, sizeof(*addr)); |
| 360 | con->delay = 0; /* reset backoff memory */ | 368 | con->delay = 0; /* reset backoff memory */ |
| 361 | queue_con(con); | 369 | queue_con(con); |
| 362 | } | 370 | } |
| 371 | EXPORT_SYMBOL(ceph_con_open); | ||
| 363 | 372 | ||
| 364 | /* | 373 | /* |
| 365 | * return true if this connection ever successfully opened | 374 | * return true if this connection ever successfully opened |
| @@ -406,6 +415,7 @@ void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) | |||
| 406 | INIT_LIST_HEAD(&con->out_sent); | 415 | INIT_LIST_HEAD(&con->out_sent); |
| 407 | INIT_DELAYED_WORK(&con->work, con_work); | 416 | INIT_DELAYED_WORK(&con->work, con_work); |
| 408 | } | 417 | } |
| 418 | EXPORT_SYMBOL(ceph_con_init); | ||
| 409 | 419 | ||
| 410 | 420 | ||
| 411 | /* | 421 | /* |
| @@ -529,8 +539,11 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 529 | if (le32_to_cpu(m->hdr.data_len) > 0) { | 539 | if (le32_to_cpu(m->hdr.data_len) > 0) { |
| 530 | /* initialize page iterator */ | 540 | /* initialize page iterator */ |
| 531 | con->out_msg_pos.page = 0; | 541 | con->out_msg_pos.page = 0; |
| 532 | con->out_msg_pos.page_pos = | 542 | if (m->pages) |
| 533 | le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; | 543 | con->out_msg_pos.page_pos = |
| 544 | le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; | ||
| 545 | else | ||
| 546 | con->out_msg_pos.page_pos = 0; | ||
| 534 | con->out_msg_pos.data_pos = 0; | 547 | con->out_msg_pos.data_pos = 0; |
| 535 | con->out_msg_pos.did_page_crc = 0; | 548 | con->out_msg_pos.did_page_crc = 0; |
| 536 | con->out_more = 1; /* data + footer will follow */ | 549 | con->out_more = 1; /* data + footer will follow */ |
| @@ -647,7 +660,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
| 647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 660 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
| 648 | con->connect_seq, global_seq, proto); | 661 | con->connect_seq, global_seq, proto); |
| 649 | 662 | ||
| 650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED); | 663 | con->out_connect.features = cpu_to_le64(msgr->supported_features); |
| 651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 664 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
| 652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 665 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
| 653 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 666 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
| @@ -712,6 +725,31 @@ out: | |||
| 712 | return ret; /* done! */ | 725 | return ret; /* done! */ |
| 713 | } | 726 | } |
| 714 | 727 | ||
| 728 | #ifdef CONFIG_BLOCK | ||
| 729 | static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) | ||
| 730 | { | ||
| 731 | if (!bio) { | ||
| 732 | *iter = NULL; | ||
| 733 | *seg = 0; | ||
| 734 | return; | ||
| 735 | } | ||
| 736 | *iter = bio; | ||
| 737 | *seg = bio->bi_idx; | ||
| 738 | } | ||
| 739 | |||
| 740 | static void iter_bio_next(struct bio **bio_iter, int *seg) | ||
| 741 | { | ||
| 742 | if (*bio_iter == NULL) | ||
| 743 | return; | ||
| 744 | |||
| 745 | BUG_ON(*seg >= (*bio_iter)->bi_vcnt); | ||
| 746 | |||
| 747 | (*seg)++; | ||
| 748 | if (*seg == (*bio_iter)->bi_vcnt) | ||
| 749 | init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); | ||
| 750 | } | ||
| 751 | #endif | ||
| 752 | |||
| 715 | /* | 753 | /* |
| 716 | * Write as much message data payload as we can. If we finish, queue | 754 | * Write as much message data payload as we can. If we finish, queue |
| 717 | * up the footer. | 755 | * up the footer. |
| @@ -726,21 +764,46 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 726 | size_t len; | 764 | size_t len; |
| 727 | int crc = con->msgr->nocrc; | 765 | int crc = con->msgr->nocrc; |
| 728 | int ret; | 766 | int ret; |
| 767 | int total_max_write; | ||
| 768 | int in_trail = 0; | ||
| 769 | size_t trail_len = (msg->trail ? msg->trail->length : 0); | ||
| 729 | 770 | ||
| 730 | dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", | 771 | dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", |
| 731 | con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, | 772 | con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, |
| 732 | con->out_msg_pos.page_pos); | 773 | con->out_msg_pos.page_pos); |
| 733 | 774 | ||
| 734 | while (con->out_msg_pos.page < con->out_msg->nr_pages) { | 775 | #ifdef CONFIG_BLOCK |
| 776 | if (msg->bio && !msg->bio_iter) | ||
| 777 | init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); | ||
| 778 | #endif | ||
| 779 | |||
| 780 | while (data_len > con->out_msg_pos.data_pos) { | ||
| 735 | struct page *page = NULL; | 781 | struct page *page = NULL; |
| 736 | void *kaddr = NULL; | 782 | void *kaddr = NULL; |
| 783 | int max_write = PAGE_SIZE; | ||
| 784 | int page_shift = 0; | ||
| 785 | |||
| 786 | total_max_write = data_len - trail_len - | ||
| 787 | con->out_msg_pos.data_pos; | ||
| 737 | 788 | ||
| 738 | /* | 789 | /* |
| 739 | * if we are calculating the data crc (the default), we need | 790 | * if we are calculating the data crc (the default), we need |
| 740 | * to map the page. if our pages[] has been revoked, use the | 791 | * to map the page. if our pages[] has been revoked, use the |
| 741 | * zero page. | 792 | * zero page. |
| 742 | */ | 793 | */ |
| 743 | if (msg->pages) { | 794 | |
| 795 | /* have we reached the trail part of the data? */ | ||
| 796 | if (con->out_msg_pos.data_pos >= data_len - trail_len) { | ||
| 797 | in_trail = 1; | ||
| 798 | |||
| 799 | total_max_write = data_len - con->out_msg_pos.data_pos; | ||
| 800 | |||
| 801 | page = list_first_entry(&msg->trail->head, | ||
| 802 | struct page, lru); | ||
| 803 | if (crc) | ||
| 804 | kaddr = kmap(page); | ||
| 805 | max_write = PAGE_SIZE; | ||
| 806 | } else if (msg->pages) { | ||
| 744 | page = msg->pages[con->out_msg_pos.page]; | 807 | page = msg->pages[con->out_msg_pos.page]; |
| 745 | if (crc) | 808 | if (crc) |
| 746 | kaddr = kmap(page); | 809 | kaddr = kmap(page); |
| @@ -749,13 +812,25 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 749 | struct page, lru); | 812 | struct page, lru); |
| 750 | if (crc) | 813 | if (crc) |
| 751 | kaddr = kmap(page); | 814 | kaddr = kmap(page); |
| 815 | #ifdef CONFIG_BLOCK | ||
| 816 | } else if (msg->bio) { | ||
| 817 | struct bio_vec *bv; | ||
| 818 | |||
| 819 | bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); | ||
| 820 | page = bv->bv_page; | ||
| 821 | page_shift = bv->bv_offset; | ||
| 822 | if (crc) | ||
| 823 | kaddr = kmap(page) + page_shift; | ||
| 824 | max_write = bv->bv_len; | ||
| 825 | #endif | ||
| 752 | } else { | 826 | } else { |
| 753 | page = con->msgr->zero_page; | 827 | page = con->msgr->zero_page; |
| 754 | if (crc) | 828 | if (crc) |
| 755 | kaddr = page_address(con->msgr->zero_page); | 829 | kaddr = page_address(con->msgr->zero_page); |
| 756 | } | 830 | } |
| 757 | len = min((int)(PAGE_SIZE - con->out_msg_pos.page_pos), | 831 | len = min_t(int, max_write - con->out_msg_pos.page_pos, |
| 758 | (int)(data_len - con->out_msg_pos.data_pos)); | 832 | total_max_write); |
| 833 | |||
| 759 | if (crc && !con->out_msg_pos.did_page_crc) { | 834 | if (crc && !con->out_msg_pos.did_page_crc) { |
| 760 | void *base = kaddr + con->out_msg_pos.page_pos; | 835 | void *base = kaddr + con->out_msg_pos.page_pos; |
| 761 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); | 836 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); |
| @@ -765,13 +840,14 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 765 | cpu_to_le32(crc32c(tmpcrc, base, len)); | 840 | cpu_to_le32(crc32c(tmpcrc, base, len)); |
| 766 | con->out_msg_pos.did_page_crc = 1; | 841 | con->out_msg_pos.did_page_crc = 1; |
| 767 | } | 842 | } |
| 768 | |||
| 769 | ret = kernel_sendpage(con->sock, page, | 843 | ret = kernel_sendpage(con->sock, page, |
| 770 | con->out_msg_pos.page_pos, len, | 844 | con->out_msg_pos.page_pos + page_shift, |
| 845 | len, | ||
| 771 | MSG_DONTWAIT | MSG_NOSIGNAL | | 846 | MSG_DONTWAIT | MSG_NOSIGNAL | |
| 772 | MSG_MORE); | 847 | MSG_MORE); |
| 773 | 848 | ||
| 774 | if (crc && (msg->pages || msg->pagelist)) | 849 | if (crc && |
| 850 | (msg->pages || msg->pagelist || msg->bio || in_trail)) | ||
| 775 | kunmap(page); | 851 | kunmap(page); |
| 776 | 852 | ||
| 777 | if (ret <= 0) | 853 | if (ret <= 0) |
| @@ -783,9 +859,16 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 783 | con->out_msg_pos.page_pos = 0; | 859 | con->out_msg_pos.page_pos = 0; |
| 784 | con->out_msg_pos.page++; | 860 | con->out_msg_pos.page++; |
| 785 | con->out_msg_pos.did_page_crc = 0; | 861 | con->out_msg_pos.did_page_crc = 0; |
| 786 | if (msg->pagelist) | 862 | if (in_trail) |
| 863 | list_move_tail(&page->lru, | ||
| 864 | &msg->trail->head); | ||
| 865 | else if (msg->pagelist) | ||
| 787 | list_move_tail(&page->lru, | 866 | list_move_tail(&page->lru, |
| 788 | &msg->pagelist->head); | 867 | &msg->pagelist->head); |
| 868 | #ifdef CONFIG_BLOCK | ||
| 869 | else if (msg->bio) | ||
| 870 | iter_bio_next(&msg->bio_iter, &msg->bio_seg); | ||
| 871 | #endif | ||
| 789 | } | 872 | } |
| 790 | } | 873 | } |
| 791 | 874 | ||
| @@ -938,7 +1021,7 @@ static int verify_hello(struct ceph_connection *con) | |||
| 938 | { | 1021 | { |
| 939 | if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { | 1022 | if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { |
| 940 | pr_err("connect to %s got bad banner\n", | 1023 | pr_err("connect to %s got bad banner\n", |
| 941 | pr_addr(&con->peer_addr.in_addr)); | 1024 | ceph_pr_addr(&con->peer_addr.in_addr)); |
| 942 | con->error_msg = "protocol error, bad banner"; | 1025 | con->error_msg = "protocol error, bad banner"; |
| 943 | return -1; | 1026 | return -1; |
| 944 | } | 1027 | } |
| @@ -1041,7 +1124,7 @@ int ceph_parse_ips(const char *c, const char *end, | |||
| 1041 | 1124 | ||
| 1042 | addr_set_port(ss, port); | 1125 | addr_set_port(ss, port); |
| 1043 | 1126 | ||
| 1044 | dout("parse_ips got %s\n", pr_addr(ss)); | 1127 | dout("parse_ips got %s\n", ceph_pr_addr(ss)); |
| 1045 | 1128 | ||
| 1046 | if (p == end) | 1129 | if (p == end) |
| 1047 | break; | 1130 | break; |
| @@ -1061,6 +1144,7 @@ bad: | |||
| 1061 | pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); | 1144 | pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); |
| 1062 | return -EINVAL; | 1145 | return -EINVAL; |
| 1063 | } | 1146 | } |
| 1147 | EXPORT_SYMBOL(ceph_parse_ips); | ||
| 1064 | 1148 | ||
| 1065 | static int process_banner(struct ceph_connection *con) | 1149 | static int process_banner(struct ceph_connection *con) |
| 1066 | { | 1150 | { |
| @@ -1082,9 +1166,9 @@ static int process_banner(struct ceph_connection *con) | |||
| 1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && | 1166 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && |
| 1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | 1167 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { |
| 1084 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", | 1168 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", |
| 1085 | pr_addr(&con->peer_addr.in_addr), | 1169 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 1086 | (int)le32_to_cpu(con->peer_addr.nonce), | 1170 | (int)le32_to_cpu(con->peer_addr.nonce), |
| 1087 | pr_addr(&con->actual_peer_addr.in_addr), | 1171 | ceph_pr_addr(&con->actual_peer_addr.in_addr), |
| 1088 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); | 1172 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); |
| 1089 | con->error_msg = "wrong peer at address"; | 1173 | con->error_msg = "wrong peer at address"; |
| 1090 | return -1; | 1174 | return -1; |
| @@ -1102,7 +1186,7 @@ static int process_banner(struct ceph_connection *con) | |||
| 1102 | addr_set_port(&con->msgr->inst.addr.in_addr, port); | 1186 | addr_set_port(&con->msgr->inst.addr.in_addr, port); |
| 1103 | encode_my_addr(con->msgr); | 1187 | encode_my_addr(con->msgr); |
| 1104 | dout("process_banner learned my addr is %s\n", | 1188 | dout("process_banner learned my addr is %s\n", |
| 1105 | pr_addr(&con->msgr->inst.addr.in_addr)); | 1189 | ceph_pr_addr(&con->msgr->inst.addr.in_addr)); |
| 1106 | } | 1190 | } |
| 1107 | 1191 | ||
| 1108 | set_bit(NEGOTIATING, &con->state); | 1192 | set_bit(NEGOTIATING, &con->state); |
| @@ -1123,8 +1207,8 @@ static void fail_protocol(struct ceph_connection *con) | |||
| 1123 | 1207 | ||
| 1124 | static int process_connect(struct ceph_connection *con) | 1208 | static int process_connect(struct ceph_connection *con) |
| 1125 | { | 1209 | { |
| 1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; | 1210 | u64 sup_feat = con->msgr->supported_features; |
| 1127 | u64 req_feat = CEPH_FEATURE_REQUIRED; | 1211 | u64 req_feat = con->msgr->required_features; |
| 1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1212 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
| 1129 | 1213 | ||
| 1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1214 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
| @@ -1134,7 +1218,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1134 | pr_err("%s%lld %s feature set mismatch," | 1218 | pr_err("%s%lld %s feature set mismatch," |
| 1135 | " my %llx < server's %llx, missing %llx\n", | 1219 | " my %llx < server's %llx, missing %llx\n", |
| 1136 | ENTITY_NAME(con->peer_name), | 1220 | ENTITY_NAME(con->peer_name), |
| 1137 | pr_addr(&con->peer_addr.in_addr), | 1221 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 1138 | sup_feat, server_feat, server_feat & ~sup_feat); | 1222 | sup_feat, server_feat, server_feat & ~sup_feat); |
| 1139 | con->error_msg = "missing required protocol features"; | 1223 | con->error_msg = "missing required protocol features"; |
| 1140 | fail_protocol(con); | 1224 | fail_protocol(con); |
| @@ -1144,7 +1228,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1144 | pr_err("%s%lld %s protocol version mismatch," | 1228 | pr_err("%s%lld %s protocol version mismatch," |
| 1145 | " my %d != server's %d\n", | 1229 | " my %d != server's %d\n", |
| 1146 | ENTITY_NAME(con->peer_name), | 1230 | ENTITY_NAME(con->peer_name), |
| 1147 | pr_addr(&con->peer_addr.in_addr), | 1231 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 1148 | le32_to_cpu(con->out_connect.protocol_version), | 1232 | le32_to_cpu(con->out_connect.protocol_version), |
| 1149 | le32_to_cpu(con->in_reply.protocol_version)); | 1233 | le32_to_cpu(con->in_reply.protocol_version)); |
| 1150 | con->error_msg = "protocol version mismatch"; | 1234 | con->error_msg = "protocol version mismatch"; |
| @@ -1178,7 +1262,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1178 | le32_to_cpu(con->in_connect.connect_seq)); | 1262 | le32_to_cpu(con->in_connect.connect_seq)); |
| 1179 | pr_err("%s%lld %s connection reset\n", | 1263 | pr_err("%s%lld %s connection reset\n", |
| 1180 | ENTITY_NAME(con->peer_name), | 1264 | ENTITY_NAME(con->peer_name), |
| 1181 | pr_addr(&con->peer_addr.in_addr)); | 1265 | ceph_pr_addr(&con->peer_addr.in_addr)); |
| 1182 | reset_connection(con); | 1266 | reset_connection(con); |
| 1183 | prepare_write_connect(con->msgr, con, 0); | 1267 | prepare_write_connect(con->msgr, con, 0); |
| 1184 | prepare_read_connect(con); | 1268 | prepare_read_connect(con); |
| @@ -1223,7 +1307,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1223 | pr_err("%s%lld %s protocol feature mismatch," | 1307 | pr_err("%s%lld %s protocol feature mismatch," |
| 1224 | " my required %llx > server's %llx, need %llx\n", | 1308 | " my required %llx > server's %llx, need %llx\n", |
| 1225 | ENTITY_NAME(con->peer_name), | 1309 | ENTITY_NAME(con->peer_name), |
| 1226 | pr_addr(&con->peer_addr.in_addr), | 1310 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 1227 | req_feat, server_feat, req_feat & ~server_feat); | 1311 | req_feat, server_feat, req_feat & ~server_feat); |
| 1228 | con->error_msg = "missing required protocol features"; | 1312 | con->error_msg = "missing required protocol features"; |
| 1229 | fail_protocol(con); | 1313 | fail_protocol(con); |
| @@ -1305,8 +1389,7 @@ static int read_partial_message_section(struct ceph_connection *con, | |||
| 1305 | struct kvec *section, | 1389 | struct kvec *section, |
| 1306 | unsigned int sec_len, u32 *crc) | 1390 | unsigned int sec_len, u32 *crc) |
| 1307 | { | 1391 | { |
| 1308 | int left; | 1392 | int ret, left; |
| 1309 | int ret; | ||
| 1310 | 1393 | ||
| 1311 | BUG_ON(!section); | 1394 | BUG_ON(!section); |
| 1312 | 1395 | ||
| @@ -1329,13 +1412,83 @@ static int read_partial_message_section(struct ceph_connection *con, | |||
| 1329 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | 1412 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, |
| 1330 | struct ceph_msg_header *hdr, | 1413 | struct ceph_msg_header *hdr, |
| 1331 | int *skip); | 1414 | int *skip); |
| 1415 | |||
| 1416 | |||
| 1417 | static int read_partial_message_pages(struct ceph_connection *con, | ||
| 1418 | struct page **pages, | ||
| 1419 | unsigned data_len, int datacrc) | ||
| 1420 | { | ||
| 1421 | void *p; | ||
| 1422 | int ret; | ||
| 1423 | int left; | ||
| 1424 | |||
| 1425 | left = min((int)(data_len - con->in_msg_pos.data_pos), | ||
| 1426 | (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); | ||
| 1427 | /* (page) data */ | ||
| 1428 | BUG_ON(pages == NULL); | ||
| 1429 | p = kmap(pages[con->in_msg_pos.page]); | ||
| 1430 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | ||
| 1431 | left); | ||
| 1432 | if (ret > 0 && datacrc) | ||
| 1433 | con->in_data_crc = | ||
| 1434 | crc32c(con->in_data_crc, | ||
| 1435 | p + con->in_msg_pos.page_pos, ret); | ||
| 1436 | kunmap(pages[con->in_msg_pos.page]); | ||
| 1437 | if (ret <= 0) | ||
| 1438 | return ret; | ||
| 1439 | con->in_msg_pos.data_pos += ret; | ||
| 1440 | con->in_msg_pos.page_pos += ret; | ||
| 1441 | if (con->in_msg_pos.page_pos == PAGE_SIZE) { | ||
| 1442 | con->in_msg_pos.page_pos = 0; | ||
| 1443 | con->in_msg_pos.page++; | ||
| 1444 | } | ||
| 1445 | |||
| 1446 | return ret; | ||
| 1447 | } | ||
| 1448 | |||
| 1449 | #ifdef CONFIG_BLOCK | ||
| 1450 | static int read_partial_message_bio(struct ceph_connection *con, | ||
| 1451 | struct bio **bio_iter, int *bio_seg, | ||
| 1452 | unsigned data_len, int datacrc) | ||
| 1453 | { | ||
| 1454 | struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); | ||
| 1455 | void *p; | ||
| 1456 | int ret, left; | ||
| 1457 | |||
| 1458 | if (IS_ERR(bv)) | ||
| 1459 | return PTR_ERR(bv); | ||
| 1460 | |||
| 1461 | left = min((int)(data_len - con->in_msg_pos.data_pos), | ||
| 1462 | (int)(bv->bv_len - con->in_msg_pos.page_pos)); | ||
| 1463 | |||
| 1464 | p = kmap(bv->bv_page) + bv->bv_offset; | ||
| 1465 | |||
| 1466 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | ||
| 1467 | left); | ||
| 1468 | if (ret > 0 && datacrc) | ||
| 1469 | con->in_data_crc = | ||
| 1470 | crc32c(con->in_data_crc, | ||
| 1471 | p + con->in_msg_pos.page_pos, ret); | ||
| 1472 | kunmap(bv->bv_page); | ||
| 1473 | if (ret <= 0) | ||
| 1474 | return ret; | ||
| 1475 | con->in_msg_pos.data_pos += ret; | ||
| 1476 | con->in_msg_pos.page_pos += ret; | ||
| 1477 | if (con->in_msg_pos.page_pos == bv->bv_len) { | ||
| 1478 | con->in_msg_pos.page_pos = 0; | ||
| 1479 | iter_bio_next(bio_iter, bio_seg); | ||
| 1480 | } | ||
| 1481 | |||
| 1482 | return ret; | ||
| 1483 | } | ||
| 1484 | #endif | ||
| 1485 | |||
| 1332 | /* | 1486 | /* |
| 1333 | * read (part of) a message. | 1487 | * read (part of) a message. |
| 1334 | */ | 1488 | */ |
| 1335 | static int read_partial_message(struct ceph_connection *con) | 1489 | static int read_partial_message(struct ceph_connection *con) |
| 1336 | { | 1490 | { |
| 1337 | struct ceph_msg *m = con->in_msg; | 1491 | struct ceph_msg *m = con->in_msg; |
| 1338 | void *p; | ||
| 1339 | int ret; | 1492 | int ret; |
| 1340 | int to, left; | 1493 | int to, left; |
| 1341 | unsigned front_len, middle_len, data_len, data_off; | 1494 | unsigned front_len, middle_len, data_len, data_off; |
| @@ -1381,7 +1534,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1381 | if ((s64)seq - (s64)con->in_seq < 1) { | 1534 | if ((s64)seq - (s64)con->in_seq < 1) { |
| 1382 | pr_info("skipping %s%lld %s seq %lld, expected %lld\n", | 1535 | pr_info("skipping %s%lld %s seq %lld, expected %lld\n", |
| 1383 | ENTITY_NAME(con->peer_name), | 1536 | ENTITY_NAME(con->peer_name), |
| 1384 | pr_addr(&con->peer_addr.in_addr), | 1537 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 1385 | seq, con->in_seq + 1); | 1538 | seq, con->in_seq + 1); |
| 1386 | con->in_base_pos = -front_len - middle_len - data_len - | 1539 | con->in_base_pos = -front_len - middle_len - data_len - |
| 1387 | sizeof(m->footer); | 1540 | sizeof(m->footer); |
| @@ -1422,7 +1575,10 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1422 | m->middle->vec.iov_len = 0; | 1575 | m->middle->vec.iov_len = 0; |
| 1423 | 1576 | ||
| 1424 | con->in_msg_pos.page = 0; | 1577 | con->in_msg_pos.page = 0; |
| 1425 | con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; | 1578 | if (m->pages) |
| 1579 | con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; | ||
| 1580 | else | ||
| 1581 | con->in_msg_pos.page_pos = 0; | ||
| 1426 | con->in_msg_pos.data_pos = 0; | 1582 | con->in_msg_pos.data_pos = 0; |
| 1427 | } | 1583 | } |
| 1428 | 1584 | ||
| @@ -1440,27 +1596,29 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1440 | if (ret <= 0) | 1596 | if (ret <= 0) |
| 1441 | return ret; | 1597 | return ret; |
| 1442 | } | 1598 | } |
| 1599 | #ifdef CONFIG_BLOCK | ||
| 1600 | if (m->bio && !m->bio_iter) | ||
| 1601 | init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); | ||
| 1602 | #endif | ||
| 1443 | 1603 | ||
| 1444 | /* (page) data */ | 1604 | /* (page) data */ |
| 1445 | while (con->in_msg_pos.data_pos < data_len) { | 1605 | while (con->in_msg_pos.data_pos < data_len) { |
| 1446 | left = min((int)(data_len - con->in_msg_pos.data_pos), | 1606 | if (m->pages) { |
| 1447 | (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); | 1607 | ret = read_partial_message_pages(con, m->pages, |
| 1448 | BUG_ON(m->pages == NULL); | 1608 | data_len, datacrc); |
| 1449 | p = kmap(m->pages[con->in_msg_pos.page]); | 1609 | if (ret <= 0) |
| 1450 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | 1610 | return ret; |
| 1451 | left); | 1611 | #ifdef CONFIG_BLOCK |
| 1452 | if (ret > 0 && datacrc) | 1612 | } else if (m->bio) { |
| 1453 | con->in_data_crc = | 1613 | |
| 1454 | crc32c(con->in_data_crc, | 1614 | ret = read_partial_message_bio(con, |
| 1455 | p + con->in_msg_pos.page_pos, ret); | 1615 | &m->bio_iter, &m->bio_seg, |
| 1456 | kunmap(m->pages[con->in_msg_pos.page]); | 1616 | data_len, datacrc); |
| 1457 | if (ret <= 0) | 1617 | if (ret <= 0) |
| 1458 | return ret; | 1618 | return ret; |
| 1459 | con->in_msg_pos.data_pos += ret; | 1619 | #endif |
| 1460 | con->in_msg_pos.page_pos += ret; | 1620 | } else { |
| 1461 | if (con->in_msg_pos.page_pos == PAGE_SIZE) { | 1621 | BUG_ON(1); |
| 1462 | con->in_msg_pos.page_pos = 0; | ||
| 1463 | con->in_msg_pos.page++; | ||
| 1464 | } | 1622 | } |
| 1465 | } | 1623 | } |
| 1466 | 1624 | ||
| @@ -1874,9 +2032,9 @@ out: | |||
| 1874 | static void ceph_fault(struct ceph_connection *con) | 2032 | static void ceph_fault(struct ceph_connection *con) |
| 1875 | { | 2033 | { |
| 1876 | pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | 2034 | pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), |
| 1877 | pr_addr(&con->peer_addr.in_addr), con->error_msg); | 2035 | ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); |
| 1878 | dout("fault %p state %lu to peer %s\n", | 2036 | dout("fault %p state %lu to peer %s\n", |
| 1879 | con, con->state, pr_addr(&con->peer_addr.in_addr)); | 2037 | con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); |
| 1880 | 2038 | ||
| 1881 | if (test_bit(LOSSYTX, &con->state)) { | 2039 | if (test_bit(LOSSYTX, &con->state)) { |
| 1882 | dout("fault on LOSSYTX channel\n"); | 2040 | dout("fault on LOSSYTX channel\n"); |
| @@ -1936,7 +2094,9 @@ out: | |||
| 1936 | /* | 2094 | /* |
| 1937 | * create a new messenger instance | 2095 | * create a new messenger instance |
| 1938 | */ | 2096 | */ |
| 1939 | struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | 2097 | struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, |
| 2098 | u32 supported_features, | ||
| 2099 | u32 required_features) | ||
| 1940 | { | 2100 | { |
| 1941 | struct ceph_messenger *msgr; | 2101 | struct ceph_messenger *msgr; |
| 1942 | 2102 | ||
| @@ -1944,6 +2104,9 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||
| 1944 | if (msgr == NULL) | 2104 | if (msgr == NULL) |
| 1945 | return ERR_PTR(-ENOMEM); | 2105 | return ERR_PTR(-ENOMEM); |
| 1946 | 2106 | ||
| 2107 | msgr->supported_features = supported_features; | ||
| 2108 | msgr->required_features = required_features; | ||
| 2109 | |||
| 1947 | spin_lock_init(&msgr->global_seq_lock); | 2110 | spin_lock_init(&msgr->global_seq_lock); |
| 1948 | 2111 | ||
| 1949 | /* the zero page is needed if a request is "canceled" while the message | 2112 | /* the zero page is needed if a request is "canceled" while the message |
| @@ -1966,6 +2129,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||
| 1966 | dout("messenger_create %p\n", msgr); | 2129 | dout("messenger_create %p\n", msgr); |
| 1967 | return msgr; | 2130 | return msgr; |
| 1968 | } | 2131 | } |
| 2132 | EXPORT_SYMBOL(ceph_messenger_create); | ||
| 1969 | 2133 | ||
| 1970 | void ceph_messenger_destroy(struct ceph_messenger *msgr) | 2134 | void ceph_messenger_destroy(struct ceph_messenger *msgr) |
| 1971 | { | 2135 | { |
| @@ -1975,6 +2139,7 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr) | |||
| 1975 | kfree(msgr); | 2139 | kfree(msgr); |
| 1976 | dout("destroyed messenger %p\n", msgr); | 2140 | dout("destroyed messenger %p\n", msgr); |
| 1977 | } | 2141 | } |
| 2142 | EXPORT_SYMBOL(ceph_messenger_destroy); | ||
| 1978 | 2143 | ||
| 1979 | /* | 2144 | /* |
| 1980 | * Queue up an outgoing message on the given connection. | 2145 | * Queue up an outgoing message on the given connection. |
| @@ -2011,6 +2176,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 2011 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2176 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
| 2012 | queue_con(con); | 2177 | queue_con(con); |
| 2013 | } | 2178 | } |
| 2179 | EXPORT_SYMBOL(ceph_con_send); | ||
| 2014 | 2180 | ||
| 2015 | /* | 2181 | /* |
| 2016 | * Revoke a message that was previously queued for send | 2182 | * Revoke a message that was previously queued for send |
| @@ -2076,6 +2242,7 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||
| 2076 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2242 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
| 2077 | queue_con(con); | 2243 | queue_con(con); |
| 2078 | } | 2244 | } |
| 2245 | EXPORT_SYMBOL(ceph_con_keepalive); | ||
| 2079 | 2246 | ||
| 2080 | 2247 | ||
| 2081 | /* | 2248 | /* |
| @@ -2136,6 +2303,10 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) | |||
| 2136 | m->nr_pages = 0; | 2303 | m->nr_pages = 0; |
| 2137 | m->pages = NULL; | 2304 | m->pages = NULL; |
| 2138 | m->pagelist = NULL; | 2305 | m->pagelist = NULL; |
| 2306 | m->bio = NULL; | ||
| 2307 | m->bio_iter = NULL; | ||
| 2308 | m->bio_seg = 0; | ||
| 2309 | m->trail = NULL; | ||
| 2139 | 2310 | ||
| 2140 | dout("ceph_msg_new %p front %d\n", m, front_len); | 2311 | dout("ceph_msg_new %p front %d\n", m, front_len); |
| 2141 | return m; | 2312 | return m; |
| @@ -2146,6 +2317,7 @@ out: | |||
| 2146 | pr_err("msg_new can't create type %d front %d\n", type, front_len); | 2317 | pr_err("msg_new can't create type %d front %d\n", type, front_len); |
| 2147 | return NULL; | 2318 | return NULL; |
| 2148 | } | 2319 | } |
| 2320 | EXPORT_SYMBOL(ceph_msg_new); | ||
| 2149 | 2321 | ||
| 2150 | /* | 2322 | /* |
| 2151 | * Allocate "middle" portion of a message, if it is needed and wasn't | 2323 | * Allocate "middle" portion of a message, if it is needed and wasn't |
| @@ -2250,11 +2422,14 @@ void ceph_msg_last_put(struct kref *kref) | |||
| 2250 | m->pagelist = NULL; | 2422 | m->pagelist = NULL; |
| 2251 | } | 2423 | } |
| 2252 | 2424 | ||
| 2425 | m->trail = NULL; | ||
| 2426 | |||
| 2253 | if (m->pool) | 2427 | if (m->pool) |
| 2254 | ceph_msgpool_put(m->pool, m); | 2428 | ceph_msgpool_put(m->pool, m); |
| 2255 | else | 2429 | else |
| 2256 | ceph_msg_kfree(m); | 2430 | ceph_msg_kfree(m); |
| 2257 | } | 2431 | } |
| 2432 | EXPORT_SYMBOL(ceph_msg_last_put); | ||
| 2258 | 2433 | ||
| 2259 | void ceph_msg_dump(struct ceph_msg *msg) | 2434 | void ceph_msg_dump(struct ceph_msg *msg) |
| 2260 | { | 2435 | { |
| @@ -2275,3 +2450,4 @@ void ceph_msg_dump(struct ceph_msg *msg) | |||
| 2275 | DUMP_PREFIX_OFFSET, 16, 1, | 2450 | DUMP_PREFIX_OFFSET, 16, 1, |
| 2276 | &msg->footer, sizeof(msg->footer), true); | 2451 | &msg->footer, sizeof(msg->footer), true); |
| 2277 | } | 2452 | } |
| 2453 | EXPORT_SYMBOL(ceph_msg_dump); | ||
diff --git a/fs/ceph/mon_client.c b/net/ceph/mon_client.c index b2a5a3e4a671..8a079399174a 100644 --- a/fs/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
| @@ -1,14 +1,16 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/module.h> | ||
| 3 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 5 | #include <linux/random.h> | 6 | #include <linux/random.h> |
| 6 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
| 7 | 8 | ||
| 8 | #include "mon_client.h" | 9 | #include <linux/ceph/mon_client.h> |
| 9 | #include "super.h" | 10 | #include <linux/ceph/libceph.h> |
| 10 | #include "auth.h" | 11 | #include <linux/ceph/decode.h> |
| 11 | #include "decode.h" | 12 | |
| 13 | #include <linux/ceph/auth.h> | ||
| 12 | 14 | ||
| 13 | /* | 15 | /* |
| 14 | * Interact with Ceph monitor cluster. Handle requests for new map | 16 | * Interact with Ceph monitor cluster. Handle requests for new map |
| @@ -74,7 +76,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) | |||
| 74 | m->num_mon); | 76 | m->num_mon); |
| 75 | for (i = 0; i < m->num_mon; i++) | 77 | for (i = 0; i < m->num_mon; i++) |
| 76 | dout("monmap_decode mon%d is %s\n", i, | 78 | dout("monmap_decode mon%d is %s\n", i, |
| 77 | pr_addr(&m->mon_inst[i].addr.in_addr)); | 79 | ceph_pr_addr(&m->mon_inst[i].addr.in_addr)); |
| 78 | return m; | 80 | return m; |
| 79 | 81 | ||
| 80 | bad: | 82 | bad: |
| @@ -191,30 +193,33 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
| 191 | struct ceph_msg *msg = monc->m_subscribe; | 193 | struct ceph_msg *msg = monc->m_subscribe; |
| 192 | struct ceph_mon_subscribe_item *i; | 194 | struct ceph_mon_subscribe_item *i; |
| 193 | void *p, *end; | 195 | void *p, *end; |
| 196 | int num; | ||
| 194 | 197 | ||
| 195 | p = msg->front.iov_base; | 198 | p = msg->front.iov_base; |
| 196 | end = p + msg->front_max; | 199 | end = p + msg->front_max; |
| 197 | 200 | ||
| 198 | dout("__send_subscribe to 'mdsmap' %u+\n", | 201 | num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap; |
| 199 | (unsigned)monc->have_mdsmap); | 202 | ceph_encode_32(&p, num); |
| 203 | |||
| 200 | if (monc->want_next_osdmap) { | 204 | if (monc->want_next_osdmap) { |
| 201 | dout("__send_subscribe to 'osdmap' %u\n", | 205 | dout("__send_subscribe to 'osdmap' %u\n", |
| 202 | (unsigned)monc->have_osdmap); | 206 | (unsigned)monc->have_osdmap); |
| 203 | ceph_encode_32(&p, 3); | ||
| 204 | ceph_encode_string(&p, end, "osdmap", 6); | 207 | ceph_encode_string(&p, end, "osdmap", 6); |
| 205 | i = p; | 208 | i = p; |
| 206 | i->have = cpu_to_le64(monc->have_osdmap); | 209 | i->have = cpu_to_le64(monc->have_osdmap); |
| 207 | i->onetime = 1; | 210 | i->onetime = 1; |
| 208 | p += sizeof(*i); | 211 | p += sizeof(*i); |
| 209 | monc->want_next_osdmap = 2; /* requested */ | 212 | monc->want_next_osdmap = 2; /* requested */ |
| 210 | } else { | ||
| 211 | ceph_encode_32(&p, 2); | ||
| 212 | } | 213 | } |
| 213 | ceph_encode_string(&p, end, "mdsmap", 6); | 214 | if (monc->want_mdsmap) { |
| 214 | i = p; | 215 | dout("__send_subscribe to 'mdsmap' %u+\n", |
| 215 | i->have = cpu_to_le64(monc->have_mdsmap); | 216 | (unsigned)monc->have_mdsmap); |
| 216 | i->onetime = 0; | 217 | ceph_encode_string(&p, end, "mdsmap", 6); |
| 217 | p += sizeof(*i); | 218 | i = p; |
| 219 | i->have = cpu_to_le64(monc->have_mdsmap); | ||
| 220 | i->onetime = 0; | ||
| 221 | p += sizeof(*i); | ||
| 222 | } | ||
| 218 | ceph_encode_string(&p, end, "monmap", 6); | 223 | ceph_encode_string(&p, end, "monmap", 6); |
| 219 | i = p; | 224 | i = p; |
| 220 | i->have = 0; | 225 | i->have = 0; |
| @@ -243,7 +248,8 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc, | |||
| 243 | mutex_lock(&monc->mutex); | 248 | mutex_lock(&monc->mutex); |
| 244 | if (monc->hunting) { | 249 | if (monc->hunting) { |
| 245 | pr_info("mon%d %s session established\n", | 250 | pr_info("mon%d %s session established\n", |
| 246 | monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr)); | 251 | monc->cur_mon, |
| 252 | ceph_pr_addr(&monc->con->peer_addr.in_addr)); | ||
| 247 | monc->hunting = false; | 253 | monc->hunting = false; |
| 248 | } | 254 | } |
| 249 | dout("handle_subscribe_ack after %d seconds\n", seconds); | 255 | dout("handle_subscribe_ack after %d seconds\n", seconds); |
| @@ -266,6 +272,7 @@ int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got) | |||
| 266 | mutex_unlock(&monc->mutex); | 272 | mutex_unlock(&monc->mutex); |
| 267 | return 0; | 273 | return 0; |
| 268 | } | 274 | } |
| 275 | EXPORT_SYMBOL(ceph_monc_got_mdsmap); | ||
| 269 | 276 | ||
| 270 | int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) | 277 | int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) |
| 271 | { | 278 | { |
| @@ -310,6 +317,7 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) | |||
| 310 | mutex_unlock(&monc->mutex); | 317 | mutex_unlock(&monc->mutex); |
| 311 | return 0; | 318 | return 0; |
| 312 | } | 319 | } |
| 320 | EXPORT_SYMBOL(ceph_monc_open_session); | ||
| 313 | 321 | ||
| 314 | /* | 322 | /* |
| 315 | * The monitor responds with mount ack indicate mount success. The | 323 | * The monitor responds with mount ack indicate mount success. The |
| @@ -540,6 +548,7 @@ out: | |||
| 540 | kref_put(&req->kref, release_generic_request); | 548 | kref_put(&req->kref, release_generic_request); |
| 541 | return err; | 549 | return err; |
| 542 | } | 550 | } |
| 551 | EXPORT_SYMBOL(ceph_monc_do_statfs); | ||
| 543 | 552 | ||
| 544 | /* | 553 | /* |
| 545 | * pool ops | 554 | * pool ops |
| @@ -651,6 +660,7 @@ int ceph_monc_create_snapid(struct ceph_mon_client *monc, | |||
| 651 | pool, 0, (char *)snapid, sizeof(*snapid)); | 660 | pool, 0, (char *)snapid, sizeof(*snapid)); |
| 652 | 661 | ||
| 653 | } | 662 | } |
| 663 | EXPORT_SYMBOL(ceph_monc_create_snapid); | ||
| 654 | 664 | ||
| 655 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | 665 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, |
| 656 | u32 pool, u64 snapid) | 666 | u32 pool, u64 snapid) |
| @@ -708,9 +718,9 @@ static void delayed_work(struct work_struct *work) | |||
| 708 | */ | 718 | */ |
| 709 | static int build_initial_monmap(struct ceph_mon_client *monc) | 719 | static int build_initial_monmap(struct ceph_mon_client *monc) |
| 710 | { | 720 | { |
| 711 | struct ceph_mount_args *args = monc->client->mount_args; | 721 | struct ceph_options *opt = monc->client->options; |
| 712 | struct ceph_entity_addr *mon_addr = args->mon_addr; | 722 | struct ceph_entity_addr *mon_addr = opt->mon_addr; |
| 713 | int num_mon = args->num_mon; | 723 | int num_mon = opt->num_mon; |
| 714 | int i; | 724 | int i; |
| 715 | 725 | ||
| 716 | /* build initial monmap */ | 726 | /* build initial monmap */ |
| @@ -728,11 +738,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc) | |||
| 728 | } | 738 | } |
| 729 | monc->monmap->num_mon = num_mon; | 739 | monc->monmap->num_mon = num_mon; |
| 730 | monc->have_fsid = false; | 740 | monc->have_fsid = false; |
| 731 | |||
| 732 | /* release addr memory */ | ||
| 733 | kfree(args->mon_addr); | ||
| 734 | args->mon_addr = NULL; | ||
| 735 | args->num_mon = 0; | ||
| 736 | return 0; | 741 | return 0; |
| 737 | } | 742 | } |
| 738 | 743 | ||
| @@ -753,8 +758,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
| 753 | monc->con = NULL; | 758 | monc->con = NULL; |
| 754 | 759 | ||
| 755 | /* authentication */ | 760 | /* authentication */ |
| 756 | monc->auth = ceph_auth_init(cl->mount_args->name, | 761 | monc->auth = ceph_auth_init(cl->options->name, |
| 757 | cl->mount_args->secret); | 762 | cl->options->secret); |
| 758 | if (IS_ERR(monc->auth)) | 763 | if (IS_ERR(monc->auth)) |
| 759 | return PTR_ERR(monc->auth); | 764 | return PTR_ERR(monc->auth); |
| 760 | monc->auth->want_keys = | 765 | monc->auth->want_keys = |
| @@ -808,6 +813,7 @@ out_monmap: | |||
| 808 | out: | 813 | out: |
| 809 | return err; | 814 | return err; |
| 810 | } | 815 | } |
| 816 | EXPORT_SYMBOL(ceph_monc_init); | ||
| 811 | 817 | ||
| 812 | void ceph_monc_stop(struct ceph_mon_client *monc) | 818 | void ceph_monc_stop(struct ceph_mon_client *monc) |
| 813 | { | 819 | { |
| @@ -832,6 +838,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc) | |||
| 832 | 838 | ||
| 833 | kfree(monc->monmap); | 839 | kfree(monc->monmap); |
| 834 | } | 840 | } |
| 841 | EXPORT_SYMBOL(ceph_monc_stop); | ||
| 835 | 842 | ||
| 836 | static void handle_auth_reply(struct ceph_mon_client *monc, | 843 | static void handle_auth_reply(struct ceph_mon_client *monc, |
| 837 | struct ceph_msg *msg) | 844 | struct ceph_msg *msg) |
| @@ -889,6 +896,7 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc) | |||
| 889 | mutex_unlock(&monc->mutex); | 896 | mutex_unlock(&monc->mutex); |
| 890 | return ret; | 897 | return ret; |
| 891 | } | 898 | } |
| 899 | EXPORT_SYMBOL(ceph_monc_validate_auth); | ||
| 892 | 900 | ||
| 893 | /* | 901 | /* |
| 894 | * handle incoming message | 902 | * handle incoming message |
| @@ -922,15 +930,16 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 922 | ceph_monc_handle_map(monc, msg); | 930 | ceph_monc_handle_map(monc, msg); |
| 923 | break; | 931 | break; |
| 924 | 932 | ||
| 925 | case CEPH_MSG_MDS_MAP: | ||
| 926 | ceph_mdsc_handle_map(&monc->client->mdsc, msg); | ||
| 927 | break; | ||
| 928 | |||
| 929 | case CEPH_MSG_OSD_MAP: | 933 | case CEPH_MSG_OSD_MAP: |
| 930 | ceph_osdc_handle_map(&monc->client->osdc, msg); | 934 | ceph_osdc_handle_map(&monc->client->osdc, msg); |
| 931 | break; | 935 | break; |
| 932 | 936 | ||
| 933 | default: | 937 | default: |
| 938 | /* can the chained handler handle it? */ | ||
| 939 | if (monc->client->extra_mon_dispatch && | ||
| 940 | monc->client->extra_mon_dispatch(monc->client, msg) == 0) | ||
| 941 | break; | ||
| 942 | |||
| 934 | pr_err("received unknown message type %d %s\n", type, | 943 | pr_err("received unknown message type %d %s\n", type, |
| 935 | ceph_msg_type_name(type)); | 944 | ceph_msg_type_name(type)); |
| 936 | } | 945 | } |
| @@ -994,7 +1003,7 @@ static void mon_fault(struct ceph_connection *con) | |||
| 994 | if (monc->con && !monc->hunting) | 1003 | if (monc->con && !monc->hunting) |
| 995 | pr_info("mon%d %s session lost, " | 1004 | pr_info("mon%d %s session lost, " |
| 996 | "hunting for new mon\n", monc->cur_mon, | 1005 | "hunting for new mon\n", monc->cur_mon, |
| 997 | pr_addr(&monc->con->peer_addr.in_addr)); | 1006 | ceph_pr_addr(&monc->con->peer_addr.in_addr)); |
| 998 | 1007 | ||
| 999 | __close_session(monc); | 1008 | __close_session(monc); |
| 1000 | if (!monc->hunting) { | 1009 | if (!monc->hunting) { |
diff --git a/fs/ceph/msgpool.c b/net/ceph/msgpool.c index dd65a6438131..d5f2d97ac05c 100644 --- a/fs/ceph/msgpool.c +++ b/net/ceph/msgpool.c | |||
| @@ -1,11 +1,11 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/err.h> | 3 | #include <linux/err.h> |
| 4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
| 5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
| 6 | #include <linux/vmalloc.h> | 6 | #include <linux/vmalloc.h> |
| 7 | 7 | ||
| 8 | #include "msgpool.h" | 8 | #include <linux/ceph/msgpool.h> |
| 9 | 9 | ||
| 10 | static void *alloc_fn(gfp_t gfp_mask, void *arg) | 10 | static void *alloc_fn(gfp_t gfp_mask, void *arg) |
| 11 | { | 11 | { |
diff --git a/fs/ceph/osd_client.c b/net/ceph/osd_client.c index 3b5571b8ce22..79391994b3ed 100644 --- a/fs/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
| @@ -1,17 +1,22 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/module.h> | ||
| 3 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| 4 | #include <linux/highmem.h> | 5 | #include <linux/highmem.h> |
| 5 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
| 6 | #include <linux/pagemap.h> | 7 | #include <linux/pagemap.h> |
| 7 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
| 8 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
| 10 | #ifdef CONFIG_BLOCK | ||
| 11 | #include <linux/bio.h> | ||
| 12 | #endif | ||
| 9 | 13 | ||
| 10 | #include "super.h" | 14 | #include <linux/ceph/libceph.h> |
| 11 | #include "osd_client.h" | 15 | #include <linux/ceph/osd_client.h> |
| 12 | #include "messenger.h" | 16 | #include <linux/ceph/messenger.h> |
| 13 | #include "decode.h" | 17 | #include <linux/ceph/decode.h> |
| 14 | #include "auth.h" | 18 | #include <linux/ceph/auth.h> |
| 19 | #include <linux/ceph/pagelist.h> | ||
| 15 | 20 | ||
| 16 | #define OSD_OP_FRONT_LEN 4096 | 21 | #define OSD_OP_FRONT_LEN 4096 |
| 17 | #define OSD_OPREPLY_FRONT_LEN 512 | 22 | #define OSD_OPREPLY_FRONT_LEN 512 |
| @@ -22,6 +27,59 @@ static int __kick_requests(struct ceph_osd_client *osdc, | |||
| 22 | 27 | ||
| 23 | static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); | 28 | static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); |
| 24 | 29 | ||
| 30 | static int op_needs_trail(int op) | ||
| 31 | { | ||
| 32 | switch (op) { | ||
| 33 | case CEPH_OSD_OP_GETXATTR: | ||
| 34 | case CEPH_OSD_OP_SETXATTR: | ||
| 35 | case CEPH_OSD_OP_CMPXATTR: | ||
| 36 | case CEPH_OSD_OP_CALL: | ||
| 37 | return 1; | ||
| 38 | default: | ||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | static int op_has_extent(int op) | ||
| 44 | { | ||
| 45 | return (op == CEPH_OSD_OP_READ || | ||
| 46 | op == CEPH_OSD_OP_WRITE); | ||
| 47 | } | ||
| 48 | |||
| 49 | void ceph_calc_raw_layout(struct ceph_osd_client *osdc, | ||
| 50 | struct ceph_file_layout *layout, | ||
| 51 | u64 snapid, | ||
| 52 | u64 off, u64 *plen, u64 *bno, | ||
| 53 | struct ceph_osd_request *req, | ||
| 54 | struct ceph_osd_req_op *op) | ||
| 55 | { | ||
| 56 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | ||
| 57 | u64 orig_len = *plen; | ||
| 58 | u64 objoff, objlen; /* extent in object */ | ||
| 59 | |||
| 60 | reqhead->snapid = cpu_to_le64(snapid); | ||
| 61 | |||
| 62 | /* object extent? */ | ||
| 63 | ceph_calc_file_object_mapping(layout, off, plen, bno, | ||
| 64 | &objoff, &objlen); | ||
| 65 | if (*plen < orig_len) | ||
| 66 | dout(" skipping last %llu, final file extent %llu~%llu\n", | ||
| 67 | orig_len - *plen, off, *plen); | ||
| 68 | |||
| 69 | if (op_has_extent(op->op)) { | ||
| 70 | op->extent.offset = objoff; | ||
| 71 | op->extent.length = objlen; | ||
| 72 | } | ||
| 73 | req->r_num_pages = calc_pages_for(off, *plen); | ||
| 74 | if (op->op == CEPH_OSD_OP_WRITE) | ||
| 75 | op->payload_len = *plen; | ||
| 76 | |||
| 77 | dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", | ||
| 78 | *bno, objoff, objlen, req->r_num_pages); | ||
| 79 | |||
| 80 | } | ||
| 81 | EXPORT_SYMBOL(ceph_calc_raw_layout); | ||
| 82 | |||
| 25 | /* | 83 | /* |
| 26 | * Implement client access to distributed object storage cluster. | 84 | * Implement client access to distributed object storage cluster. |
| 27 | * | 85 | * |
| @@ -48,34 +106,19 @@ static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); | |||
| 48 | * fill osd op in request message. | 106 | * fill osd op in request message. |
| 49 | */ | 107 | */ |
| 50 | static void calc_layout(struct ceph_osd_client *osdc, | 108 | static void calc_layout(struct ceph_osd_client *osdc, |
| 51 | struct ceph_vino vino, struct ceph_file_layout *layout, | 109 | struct ceph_vino vino, |
| 110 | struct ceph_file_layout *layout, | ||
| 52 | u64 off, u64 *plen, | 111 | u64 off, u64 *plen, |
| 53 | struct ceph_osd_request *req) | 112 | struct ceph_osd_request *req, |
| 113 | struct ceph_osd_req_op *op) | ||
| 54 | { | 114 | { |
| 55 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | ||
| 56 | struct ceph_osd_op *op = (void *)(reqhead + 1); | ||
| 57 | u64 orig_len = *plen; | ||
| 58 | u64 objoff, objlen; /* extent in object */ | ||
| 59 | u64 bno; | 115 | u64 bno; |
| 60 | 116 | ||
| 61 | reqhead->snapid = cpu_to_le64(vino.snap); | 117 | ceph_calc_raw_layout(osdc, layout, vino.snap, off, |
| 62 | 118 | plen, &bno, req, op); | |
| 63 | /* object extent? */ | ||
| 64 | ceph_calc_file_object_mapping(layout, off, plen, &bno, | ||
| 65 | &objoff, &objlen); | ||
| 66 | if (*plen < orig_len) | ||
| 67 | dout(" skipping last %llu, final file extent %llu~%llu\n", | ||
| 68 | orig_len - *plen, off, *plen); | ||
| 69 | 119 | ||
| 70 | sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); | 120 | sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); |
| 71 | req->r_oid_len = strlen(req->r_oid); | 121 | req->r_oid_len = strlen(req->r_oid); |
| 72 | |||
| 73 | op->extent.offset = cpu_to_le64(objoff); | ||
| 74 | op->extent.length = cpu_to_le64(objlen); | ||
| 75 | req->r_num_pages = calc_pages_for(off, *plen); | ||
| 76 | |||
| 77 | dout("calc_layout %s (%d) %llu~%llu (%d pages)\n", | ||
| 78 | req->r_oid, req->r_oid_len, objoff, objlen, req->r_num_pages); | ||
| 79 | } | 122 | } |
| 80 | 123 | ||
| 81 | /* | 124 | /* |
| @@ -101,56 +144,66 @@ void ceph_osdc_release_request(struct kref *kref) | |||
| 101 | if (req->r_own_pages) | 144 | if (req->r_own_pages) |
| 102 | ceph_release_page_vector(req->r_pages, | 145 | ceph_release_page_vector(req->r_pages, |
| 103 | req->r_num_pages); | 146 | req->r_num_pages); |
| 147 | #ifdef CONFIG_BLOCK | ||
| 148 | if (req->r_bio) | ||
| 149 | bio_put(req->r_bio); | ||
| 150 | #endif | ||
| 104 | ceph_put_snap_context(req->r_snapc); | 151 | ceph_put_snap_context(req->r_snapc); |
| 152 | if (req->r_trail) { | ||
| 153 | ceph_pagelist_release(req->r_trail); | ||
| 154 | kfree(req->r_trail); | ||
| 155 | } | ||
| 105 | if (req->r_mempool) | 156 | if (req->r_mempool) |
| 106 | mempool_free(req, req->r_osdc->req_mempool); | 157 | mempool_free(req, req->r_osdc->req_mempool); |
| 107 | else | 158 | else |
| 108 | kfree(req); | 159 | kfree(req); |
| 109 | } | 160 | } |
| 161 | EXPORT_SYMBOL(ceph_osdc_release_request); | ||
| 110 | 162 | ||
| 111 | /* | 163 | static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail) |
| 112 | * build new request AND message, calculate layout, and adjust file | 164 | { |
| 113 | * extent as needed. | 165 | int i = 0; |
| 114 | * | 166 | |
| 115 | * if the file was recently truncated, we include information about its | 167 | if (needs_trail) |
| 116 | * old and new size so that the object can be updated appropriately. (we | 168 | *needs_trail = 0; |
| 117 | * avoid synchronously deleting truncated objects because it's slow.) | 169 | while (ops[i].op) { |
| 118 | * | 170 | if (needs_trail && op_needs_trail(ops[i].op)) |
| 119 | * if @do_sync, include a 'startsync' command so that the osd will flush | 171 | *needs_trail = 1; |
| 120 | * data quickly. | 172 | i++; |
| 121 | */ | 173 | } |
| 122 | struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | 174 | |
| 123 | struct ceph_file_layout *layout, | 175 | return i; |
| 124 | struct ceph_vino vino, | 176 | } |
| 125 | u64 off, u64 *plen, | 177 | |
| 126 | int opcode, int flags, | 178 | struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, |
| 179 | int flags, | ||
| 127 | struct ceph_snap_context *snapc, | 180 | struct ceph_snap_context *snapc, |
| 128 | int do_sync, | 181 | struct ceph_osd_req_op *ops, |
| 129 | u32 truncate_seq, | 182 | bool use_mempool, |
| 130 | u64 truncate_size, | 183 | gfp_t gfp_flags, |
| 131 | struct timespec *mtime, | 184 | struct page **pages, |
| 132 | bool use_mempool, int num_reply) | 185 | struct bio *bio) |
| 133 | { | 186 | { |
| 134 | struct ceph_osd_request *req; | 187 | struct ceph_osd_request *req; |
| 135 | struct ceph_msg *msg; | 188 | struct ceph_msg *msg; |
| 136 | struct ceph_osd_request_head *head; | 189 | int needs_trail; |
| 137 | struct ceph_osd_op *op; | 190 | int num_op = get_num_ops(ops, &needs_trail); |
| 138 | void *p; | 191 | size_t msg_size = sizeof(struct ceph_osd_request_head); |
| 139 | int num_op = 1 + do_sync; | 192 | |
| 140 | size_t msg_size = sizeof(*head) + num_op*sizeof(*op); | 193 | msg_size += num_op*sizeof(struct ceph_osd_op); |
| 141 | int i; | ||
| 142 | 194 | ||
| 143 | if (use_mempool) { | 195 | if (use_mempool) { |
| 144 | req = mempool_alloc(osdc->req_mempool, GFP_NOFS); | 196 | req = mempool_alloc(osdc->req_mempool, gfp_flags); |
| 145 | memset(req, 0, sizeof(*req)); | 197 | memset(req, 0, sizeof(*req)); |
| 146 | } else { | 198 | } else { |
| 147 | req = kzalloc(sizeof(*req), GFP_NOFS); | 199 | req = kzalloc(sizeof(*req), gfp_flags); |
| 148 | } | 200 | } |
| 149 | if (req == NULL) | 201 | if (req == NULL) |
| 150 | return NULL; | 202 | return NULL; |
| 151 | 203 | ||
| 152 | req->r_osdc = osdc; | 204 | req->r_osdc = osdc; |
| 153 | req->r_mempool = use_mempool; | 205 | req->r_mempool = use_mempool; |
| 206 | |||
| 154 | kref_init(&req->r_kref); | 207 | kref_init(&req->r_kref); |
| 155 | init_completion(&req->r_completion); | 208 | init_completion(&req->r_completion); |
| 156 | init_completion(&req->r_safe_completion); | 209 | init_completion(&req->r_safe_completion); |
| @@ -164,13 +217,22 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
| 164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); | 217 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); |
| 165 | else | 218 | else |
| 166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, | 219 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, |
| 167 | OSD_OPREPLY_FRONT_LEN, GFP_NOFS); | 220 | OSD_OPREPLY_FRONT_LEN, gfp_flags); |
| 168 | if (!msg) { | 221 | if (!msg) { |
| 169 | ceph_osdc_put_request(req); | 222 | ceph_osdc_put_request(req); |
| 170 | return NULL; | 223 | return NULL; |
| 171 | } | 224 | } |
| 172 | req->r_reply = msg; | 225 | req->r_reply = msg; |
| 173 | 226 | ||
| 227 | /* allocate space for the trailing data */ | ||
| 228 | if (needs_trail) { | ||
| 229 | req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); | ||
| 230 | if (!req->r_trail) { | ||
| 231 | ceph_osdc_put_request(req); | ||
| 232 | return NULL; | ||
| 233 | } | ||
| 234 | ceph_pagelist_init(req->r_trail); | ||
| 235 | } | ||
| 174 | /* create request message; allow space for oid */ | 236 | /* create request message; allow space for oid */ |
| 175 | msg_size += 40; | 237 | msg_size += 40; |
| 176 | if (snapc) | 238 | if (snapc) |
| @@ -178,18 +240,115 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
| 178 | if (use_mempool) | 240 | if (use_mempool) |
| 179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); | 241 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); |
| 180 | else | 242 | else |
| 181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); | 243 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags); |
| 182 | if (!msg) { | 244 | if (!msg) { |
| 183 | ceph_osdc_put_request(req); | 245 | ceph_osdc_put_request(req); |
| 184 | return NULL; | 246 | return NULL; |
| 185 | } | 247 | } |
| 248 | |||
| 186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); | 249 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); |
| 187 | memset(msg->front.iov_base, 0, msg->front.iov_len); | 250 | memset(msg->front.iov_base, 0, msg->front.iov_len); |
| 251 | |||
| 252 | req->r_request = msg; | ||
| 253 | req->r_pages = pages; | ||
| 254 | #ifdef CONFIG_BLOCK | ||
| 255 | if (bio) { | ||
| 256 | req->r_bio = bio; | ||
| 257 | bio_get(req->r_bio); | ||
| 258 | } | ||
| 259 | #endif | ||
| 260 | |||
| 261 | return req; | ||
| 262 | } | ||
| 263 | EXPORT_SYMBOL(ceph_osdc_alloc_request); | ||
| 264 | |||
| 265 | static void osd_req_encode_op(struct ceph_osd_request *req, | ||
| 266 | struct ceph_osd_op *dst, | ||
| 267 | struct ceph_osd_req_op *src) | ||
| 268 | { | ||
| 269 | dst->op = cpu_to_le16(src->op); | ||
| 270 | |||
| 271 | switch (dst->op) { | ||
| 272 | case CEPH_OSD_OP_READ: | ||
| 273 | case CEPH_OSD_OP_WRITE: | ||
| 274 | dst->extent.offset = | ||
| 275 | cpu_to_le64(src->extent.offset); | ||
| 276 | dst->extent.length = | ||
| 277 | cpu_to_le64(src->extent.length); | ||
| 278 | dst->extent.truncate_size = | ||
| 279 | cpu_to_le64(src->extent.truncate_size); | ||
| 280 | dst->extent.truncate_seq = | ||
| 281 | cpu_to_le32(src->extent.truncate_seq); | ||
| 282 | break; | ||
| 283 | |||
| 284 | case CEPH_OSD_OP_GETXATTR: | ||
| 285 | case CEPH_OSD_OP_SETXATTR: | ||
| 286 | case CEPH_OSD_OP_CMPXATTR: | ||
| 287 | BUG_ON(!req->r_trail); | ||
| 288 | |||
| 289 | dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); | ||
| 290 | dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); | ||
| 291 | dst->xattr.cmp_op = src->xattr.cmp_op; | ||
| 292 | dst->xattr.cmp_mode = src->xattr.cmp_mode; | ||
| 293 | ceph_pagelist_append(req->r_trail, src->xattr.name, | ||
| 294 | src->xattr.name_len); | ||
| 295 | ceph_pagelist_append(req->r_trail, src->xattr.val, | ||
| 296 | src->xattr.value_len); | ||
| 297 | break; | ||
| 298 | case CEPH_OSD_OP_CALL: | ||
| 299 | BUG_ON(!req->r_trail); | ||
| 300 | |||
| 301 | dst->cls.class_len = src->cls.class_len; | ||
| 302 | dst->cls.method_len = src->cls.method_len; | ||
| 303 | dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); | ||
| 304 | |||
| 305 | ceph_pagelist_append(req->r_trail, src->cls.class_name, | ||
| 306 | src->cls.class_len); | ||
| 307 | ceph_pagelist_append(req->r_trail, src->cls.method_name, | ||
| 308 | src->cls.method_len); | ||
| 309 | ceph_pagelist_append(req->r_trail, src->cls.indata, | ||
| 310 | src->cls.indata_len); | ||
| 311 | break; | ||
| 312 | case CEPH_OSD_OP_ROLLBACK: | ||
| 313 | dst->snap.snapid = cpu_to_le64(src->snap.snapid); | ||
| 314 | break; | ||
| 315 | case CEPH_OSD_OP_STARTSYNC: | ||
| 316 | break; | ||
| 317 | default: | ||
| 318 | pr_err("unrecognized osd opcode %d\n", dst->op); | ||
| 319 | WARN_ON(1); | ||
| 320 | break; | ||
| 321 | } | ||
| 322 | dst->payload_len = cpu_to_le32(src->payload_len); | ||
| 323 | } | ||
| 324 | |||
| 325 | /* | ||
| 326 | * build new request AND message | ||
| 327 | * | ||
| 328 | */ | ||
| 329 | void ceph_osdc_build_request(struct ceph_osd_request *req, | ||
| 330 | u64 off, u64 *plen, | ||
| 331 | struct ceph_osd_req_op *src_ops, | ||
| 332 | struct ceph_snap_context *snapc, | ||
| 333 | struct timespec *mtime, | ||
| 334 | const char *oid, | ||
| 335 | int oid_len) | ||
| 336 | { | ||
| 337 | struct ceph_msg *msg = req->r_request; | ||
| 338 | struct ceph_osd_request_head *head; | ||
| 339 | struct ceph_osd_req_op *src_op; | ||
| 340 | struct ceph_osd_op *op; | ||
| 341 | void *p; | ||
| 342 | int num_op = get_num_ops(src_ops, NULL); | ||
| 343 | size_t msg_size = sizeof(*head) + num_op*sizeof(*op); | ||
| 344 | int flags = req->r_flags; | ||
| 345 | u64 data_len = 0; | ||
| 346 | int i; | ||
| 347 | |||
| 188 | head = msg->front.iov_base; | 348 | head = msg->front.iov_base; |
| 189 | op = (void *)(head + 1); | 349 | op = (void *)(head + 1); |
| 190 | p = (void *)(op + num_op); | 350 | p = (void *)(op + num_op); |
| 191 | 351 | ||
| 192 | req->r_request = msg; | ||
| 193 | req->r_snapc = ceph_get_snap_context(snapc); | 352 | req->r_snapc = ceph_get_snap_context(snapc); |
| 194 | 353 | ||
| 195 | head->client_inc = cpu_to_le32(1); /* always, for now. */ | 354 | head->client_inc = cpu_to_le32(1); /* always, for now. */ |
| @@ -197,29 +356,23 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
| 197 | if (flags & CEPH_OSD_FLAG_WRITE) | 356 | if (flags & CEPH_OSD_FLAG_WRITE) |
| 198 | ceph_encode_timespec(&head->mtime, mtime); | 357 | ceph_encode_timespec(&head->mtime, mtime); |
| 199 | head->num_ops = cpu_to_le16(num_op); | 358 | head->num_ops = cpu_to_le16(num_op); |
| 200 | op->op = cpu_to_le16(opcode); | ||
| 201 | 359 | ||
| 202 | /* calculate max write size */ | ||
| 203 | calc_layout(osdc, vino, layout, off, plen, req); | ||
| 204 | req->r_file_layout = *layout; /* keep a copy */ | ||
| 205 | |||
| 206 | if (flags & CEPH_OSD_FLAG_WRITE) { | ||
| 207 | req->r_request->hdr.data_off = cpu_to_le16(off); | ||
| 208 | req->r_request->hdr.data_len = cpu_to_le32(*plen); | ||
| 209 | op->payload_len = cpu_to_le32(*plen); | ||
| 210 | } | ||
| 211 | op->extent.truncate_size = cpu_to_le64(truncate_size); | ||
| 212 | op->extent.truncate_seq = cpu_to_le32(truncate_seq); | ||
| 213 | 360 | ||
| 214 | /* fill in oid */ | 361 | /* fill in oid */ |
| 215 | head->object_len = cpu_to_le32(req->r_oid_len); | 362 | head->object_len = cpu_to_le32(oid_len); |
| 216 | memcpy(p, req->r_oid, req->r_oid_len); | 363 | memcpy(p, oid, oid_len); |
| 217 | p += req->r_oid_len; | 364 | p += oid_len; |
| 218 | 365 | ||
| 219 | if (do_sync) { | 366 | src_op = src_ops; |
| 367 | while (src_op->op) { | ||
| 368 | osd_req_encode_op(req, op, src_op); | ||
| 369 | src_op++; | ||
| 220 | op++; | 370 | op++; |
| 221 | op->op = cpu_to_le16(CEPH_OSD_OP_STARTSYNC); | ||
| 222 | } | 371 | } |
| 372 | |||
| 373 | if (req->r_trail) | ||
| 374 | data_len += req->r_trail->length; | ||
| 375 | |||
| 223 | if (snapc) { | 376 | if (snapc) { |
| 224 | head->snap_seq = cpu_to_le64(snapc->seq); | 377 | head->snap_seq = cpu_to_le64(snapc->seq); |
| 225 | head->num_snaps = cpu_to_le32(snapc->num_snaps); | 378 | head->num_snaps = cpu_to_le32(snapc->num_snaps); |
| @@ -229,12 +382,79 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
| 229 | } | 382 | } |
| 230 | } | 383 | } |
| 231 | 384 | ||
| 385 | if (flags & CEPH_OSD_FLAG_WRITE) { | ||
| 386 | req->r_request->hdr.data_off = cpu_to_le16(off); | ||
| 387 | req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len); | ||
| 388 | } else if (data_len) { | ||
| 389 | req->r_request->hdr.data_off = 0; | ||
| 390 | req->r_request->hdr.data_len = cpu_to_le32(data_len); | ||
| 391 | } | ||
| 392 | |||
| 232 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); | 393 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); |
| 233 | msg_size = p - msg->front.iov_base; | 394 | msg_size = p - msg->front.iov_base; |
| 234 | msg->front.iov_len = msg_size; | 395 | msg->front.iov_len = msg_size; |
| 235 | msg->hdr.front_len = cpu_to_le32(msg_size); | 396 | msg->hdr.front_len = cpu_to_le32(msg_size); |
| 397 | return; | ||
| 398 | } | ||
| 399 | EXPORT_SYMBOL(ceph_osdc_build_request); | ||
| 400 | |||
| 401 | /* | ||
| 402 | * build new request AND message, calculate layout, and adjust file | ||
| 403 | * extent as needed. | ||
| 404 | * | ||
| 405 | * if the file was recently truncated, we include information about its | ||
| 406 | * old and new size so that the object can be updated appropriately. (we | ||
| 407 | * avoid synchronously deleting truncated objects because it's slow.) | ||
| 408 | * | ||
| 409 | * if @do_sync, include a 'startsync' command so that the osd will flush | ||
| 410 | * data quickly. | ||
| 411 | */ | ||
| 412 | struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | ||
| 413 | struct ceph_file_layout *layout, | ||
| 414 | struct ceph_vino vino, | ||
| 415 | u64 off, u64 *plen, | ||
| 416 | int opcode, int flags, | ||
| 417 | struct ceph_snap_context *snapc, | ||
| 418 | int do_sync, | ||
| 419 | u32 truncate_seq, | ||
| 420 | u64 truncate_size, | ||
| 421 | struct timespec *mtime, | ||
| 422 | bool use_mempool, int num_reply) | ||
| 423 | { | ||
| 424 | struct ceph_osd_req_op ops[3]; | ||
| 425 | struct ceph_osd_request *req; | ||
| 426 | |||
| 427 | ops[0].op = opcode; | ||
| 428 | ops[0].extent.truncate_seq = truncate_seq; | ||
| 429 | ops[0].extent.truncate_size = truncate_size; | ||
| 430 | ops[0].payload_len = 0; | ||
| 431 | |||
| 432 | if (do_sync) { | ||
| 433 | ops[1].op = CEPH_OSD_OP_STARTSYNC; | ||
| 434 | ops[1].payload_len = 0; | ||
| 435 | ops[2].op = 0; | ||
| 436 | } else | ||
| 437 | ops[1].op = 0; | ||
| 438 | |||
| 439 | req = ceph_osdc_alloc_request(osdc, flags, | ||
| 440 | snapc, ops, | ||
| 441 | use_mempool, | ||
| 442 | GFP_NOFS, NULL, NULL); | ||
| 443 | if (IS_ERR(req)) | ||
| 444 | return req; | ||
| 445 | |||
| 446 | /* calculate max write size */ | ||
| 447 | calc_layout(osdc, vino, layout, off, plen, req, ops); | ||
| 448 | req->r_file_layout = *layout; /* keep a copy */ | ||
| 449 | |||
| 450 | ceph_osdc_build_request(req, off, plen, ops, | ||
| 451 | snapc, | ||
| 452 | mtime, | ||
| 453 | req->r_oid, req->r_oid_len); | ||
| 454 | |||
| 236 | return req; | 455 | return req; |
| 237 | } | 456 | } |
| 457 | EXPORT_SYMBOL(ceph_osdc_new_request); | ||
| 238 | 458 | ||
| 239 | /* | 459 | /* |
| 240 | * We keep osd requests in an rbtree, sorted by ->r_tid. | 460 | * We keep osd requests in an rbtree, sorted by ->r_tid. |
| @@ -389,7 +609,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc, | |||
| 389 | dout("__move_osd_to_lru %p\n", osd); | 609 | dout("__move_osd_to_lru %p\n", osd); |
| 390 | BUG_ON(!list_empty(&osd->o_osd_lru)); | 610 | BUG_ON(!list_empty(&osd->o_osd_lru)); |
| 391 | list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); | 611 | list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); |
| 392 | osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ; | 612 | osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; |
| 393 | } | 613 | } |
| 394 | 614 | ||
| 395 | static void __remove_osd_from_lru(struct ceph_osd *osd) | 615 | static void __remove_osd_from_lru(struct ceph_osd *osd) |
| @@ -483,7 +703,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) | |||
| 483 | static void __schedule_osd_timeout(struct ceph_osd_client *osdc) | 703 | static void __schedule_osd_timeout(struct ceph_osd_client *osdc) |
| 484 | { | 704 | { |
| 485 | schedule_delayed_work(&osdc->timeout_work, | 705 | schedule_delayed_work(&osdc->timeout_work, |
| 486 | osdc->client->mount_args->osd_keepalive_timeout * HZ); | 706 | osdc->client->options->osd_keepalive_timeout * HZ); |
| 487 | } | 707 | } |
| 488 | 708 | ||
| 489 | static void __cancel_osd_timeout(struct ceph_osd_client *osdc) | 709 | static void __cancel_osd_timeout(struct ceph_osd_client *osdc) |
| @@ -684,9 +904,9 @@ static void handle_timeout(struct work_struct *work) | |||
| 684 | container_of(work, struct ceph_osd_client, timeout_work.work); | 904 | container_of(work, struct ceph_osd_client, timeout_work.work); |
| 685 | struct ceph_osd_request *req, *last_req = NULL; | 905 | struct ceph_osd_request *req, *last_req = NULL; |
| 686 | struct ceph_osd *osd; | 906 | struct ceph_osd *osd; |
| 687 | unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; | 907 | unsigned long timeout = osdc->client->options->osd_timeout * HZ; |
| 688 | unsigned long keepalive = | 908 | unsigned long keepalive = |
| 689 | osdc->client->mount_args->osd_keepalive_timeout * HZ; | 909 | osdc->client->options->osd_keepalive_timeout * HZ; |
| 690 | unsigned long last_stamp = 0; | 910 | unsigned long last_stamp = 0; |
| 691 | struct rb_node *p; | 911 | struct rb_node *p; |
| 692 | struct list_head slow_osds; | 912 | struct list_head slow_osds; |
| @@ -773,7 +993,7 @@ static void handle_osds_timeout(struct work_struct *work) | |||
| 773 | container_of(work, struct ceph_osd_client, | 993 | container_of(work, struct ceph_osd_client, |
| 774 | osds_timeout_work.work); | 994 | osds_timeout_work.work); |
| 775 | unsigned long delay = | 995 | unsigned long delay = |
| 776 | osdc->client->mount_args->osd_idle_ttl * HZ >> 2; | 996 | osdc->client->options->osd_idle_ttl * HZ >> 2; |
| 777 | 997 | ||
| 778 | dout("osds timeout\n"); | 998 | dout("osds timeout\n"); |
| 779 | down_read(&osdc->map_sem); | 999 | down_read(&osdc->map_sem); |
| @@ -1104,6 +1324,10 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, | |||
| 1104 | 1324 | ||
| 1105 | req->r_request->pages = req->r_pages; | 1325 | req->r_request->pages = req->r_pages; |
| 1106 | req->r_request->nr_pages = req->r_num_pages; | 1326 | req->r_request->nr_pages = req->r_num_pages; |
| 1327 | #ifdef CONFIG_BLOCK | ||
| 1328 | req->r_request->bio = req->r_bio; | ||
| 1329 | #endif | ||
| 1330 | req->r_request->trail = req->r_trail; | ||
| 1107 | 1331 | ||
| 1108 | register_request(osdc, req); | 1332 | register_request(osdc, req); |
| 1109 | 1333 | ||
| @@ -1131,6 +1355,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, | |||
| 1131 | up_read(&osdc->map_sem); | 1355 | up_read(&osdc->map_sem); |
| 1132 | return rc; | 1356 | return rc; |
| 1133 | } | 1357 | } |
| 1358 | EXPORT_SYMBOL(ceph_osdc_start_request); | ||
| 1134 | 1359 | ||
| 1135 | /* | 1360 | /* |
| 1136 | * wait for a request to complete | 1361 | * wait for a request to complete |
| @@ -1153,6 +1378,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, | |||
| 1153 | dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); | 1378 | dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); |
| 1154 | return req->r_result; | 1379 | return req->r_result; |
| 1155 | } | 1380 | } |
| 1381 | EXPORT_SYMBOL(ceph_osdc_wait_request); | ||
| 1156 | 1382 | ||
| 1157 | /* | 1383 | /* |
| 1158 | * sync - wait for all in-flight requests to flush. avoid starvation. | 1384 | * sync - wait for all in-flight requests to flush. avoid starvation. |
| @@ -1186,6 +1412,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) | |||
| 1186 | mutex_unlock(&osdc->request_mutex); | 1412 | mutex_unlock(&osdc->request_mutex); |
| 1187 | dout("sync done (thru tid %llu)\n", last_tid); | 1413 | dout("sync done (thru tid %llu)\n", last_tid); |
| 1188 | } | 1414 | } |
| 1415 | EXPORT_SYMBOL(ceph_osdc_sync); | ||
| 1189 | 1416 | ||
| 1190 | /* | 1417 | /* |
| 1191 | * init, shutdown | 1418 | * init, shutdown |
| @@ -1211,7 +1438,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
| 1211 | INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); | 1438 | INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); |
| 1212 | 1439 | ||
| 1213 | schedule_delayed_work(&osdc->osds_timeout_work, | 1440 | schedule_delayed_work(&osdc->osds_timeout_work, |
| 1214 | round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ)); | 1441 | round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); |
| 1215 | 1442 | ||
| 1216 | err = -ENOMEM; | 1443 | err = -ENOMEM; |
| 1217 | osdc->req_mempool = mempool_create_kmalloc_pool(10, | 1444 | osdc->req_mempool = mempool_create_kmalloc_pool(10, |
| @@ -1237,6 +1464,7 @@ out_mempool: | |||
| 1237 | out: | 1464 | out: |
| 1238 | return err; | 1465 | return err; |
| 1239 | } | 1466 | } |
| 1467 | EXPORT_SYMBOL(ceph_osdc_init); | ||
| 1240 | 1468 | ||
| 1241 | void ceph_osdc_stop(struct ceph_osd_client *osdc) | 1469 | void ceph_osdc_stop(struct ceph_osd_client *osdc) |
| 1242 | { | 1470 | { |
| @@ -1251,6 +1479,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) | |||
| 1251 | ceph_msgpool_destroy(&osdc->msgpool_op); | 1479 | ceph_msgpool_destroy(&osdc->msgpool_op); |
| 1252 | ceph_msgpool_destroy(&osdc->msgpool_op_reply); | 1480 | ceph_msgpool_destroy(&osdc->msgpool_op_reply); |
| 1253 | } | 1481 | } |
| 1482 | EXPORT_SYMBOL(ceph_osdc_stop); | ||
| 1254 | 1483 | ||
| 1255 | /* | 1484 | /* |
| 1256 | * Read some contiguous pages. If we cross a stripe boundary, shorten | 1485 | * Read some contiguous pages. If we cross a stripe boundary, shorten |
| @@ -1288,6 +1517,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
| 1288 | dout("readpages result %d\n", rc); | 1517 | dout("readpages result %d\n", rc); |
| 1289 | return rc; | 1518 | return rc; |
| 1290 | } | 1519 | } |
| 1520 | EXPORT_SYMBOL(ceph_osdc_readpages); | ||
| 1291 | 1521 | ||
| 1292 | /* | 1522 | /* |
| 1293 | * do a synchronous write on N pages | 1523 | * do a synchronous write on N pages |
| @@ -1330,6 +1560,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
| 1330 | dout("writepages result %d\n", rc); | 1560 | dout("writepages result %d\n", rc); |
| 1331 | return rc; | 1561 | return rc; |
| 1332 | } | 1562 | } |
| 1563 | EXPORT_SYMBOL(ceph_osdc_writepages); | ||
| 1333 | 1564 | ||
| 1334 | /* | 1565 | /* |
| 1335 | * handle incoming message | 1566 | * handle incoming message |
| @@ -1420,6 +1651,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
| 1420 | } | 1651 | } |
| 1421 | m->pages = req->r_pages; | 1652 | m->pages = req->r_pages; |
| 1422 | m->nr_pages = req->r_num_pages; | 1653 | m->nr_pages = req->r_num_pages; |
| 1654 | #ifdef CONFIG_BLOCK | ||
| 1655 | m->bio = req->r_bio; | ||
| 1656 | #endif | ||
| 1423 | } | 1657 | } |
| 1424 | *skip = 0; | 1658 | *skip = 0; |
| 1425 | req->r_con_filling_msg = ceph_con_get(con); | 1659 | req->r_con_filling_msg = ceph_con_get(con); |
diff --git a/fs/ceph/osdmap.c b/net/ceph/osdmap.c index e31f118f1392..d73f3f6efa36 100644 --- a/fs/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
| @@ -1,14 +1,15 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/module.h> | ||
| 4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 5 | #include <asm/div64.h> | 6 | #include <asm/div64.h> |
| 6 | 7 | ||
| 7 | #include "super.h" | 8 | #include <linux/ceph/libceph.h> |
| 8 | #include "osdmap.h" | 9 | #include <linux/ceph/osdmap.h> |
| 9 | #include "crush/hash.h" | 10 | #include <linux/ceph/decode.h> |
| 10 | #include "crush/mapper.h" | 11 | #include <linux/crush/hash.h> |
| 11 | #include "decode.h" | 12 | #include <linux/crush/mapper.h> |
| 12 | 13 | ||
| 13 | char *ceph_osdmap_state_str(char *str, int len, int state) | 14 | char *ceph_osdmap_state_str(char *str, int len, int state) |
| 14 | { | 15 | { |
| @@ -417,6 +418,20 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | |||
| 417 | return NULL; | 418 | return NULL; |
| 418 | } | 419 | } |
| 419 | 420 | ||
| 421 | int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) | ||
| 422 | { | ||
| 423 | struct rb_node *rbp; | ||
| 424 | |||
| 425 | for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) { | ||
| 426 | struct ceph_pg_pool_info *pi = | ||
| 427 | rb_entry(rbp, struct ceph_pg_pool_info, node); | ||
| 428 | if (pi->name && strcmp(pi->name, name) == 0) | ||
| 429 | return pi->id; | ||
| 430 | } | ||
| 431 | return -ENOENT; | ||
| 432 | } | ||
| 433 | EXPORT_SYMBOL(ceph_pg_poolid_by_name); | ||
| 434 | |||
| 420 | static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | 435 | static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) |
| 421 | { | 436 | { |
| 422 | rb_erase(&pi->node, root); | 437 | rb_erase(&pi->node, root); |
| @@ -966,6 +981,7 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | |||
| 966 | 981 | ||
| 967 | dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); | 982 | dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); |
| 968 | } | 983 | } |
| 984 | EXPORT_SYMBOL(ceph_calc_file_object_mapping); | ||
| 969 | 985 | ||
| 970 | /* | 986 | /* |
| 971 | * calculate an object layout (i.e. pgid) from an oid, | 987 | * calculate an object layout (i.e. pgid) from an oid, |
| @@ -1011,6 +1027,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
| 1011 | ol->ol_stripe_unit = fl->fl_object_stripe_unit; | 1027 | ol->ol_stripe_unit = fl->fl_object_stripe_unit; |
| 1012 | return 0; | 1028 | return 0; |
| 1013 | } | 1029 | } |
| 1030 | EXPORT_SYMBOL(ceph_calc_object_layout); | ||
| 1014 | 1031 | ||
| 1015 | /* | 1032 | /* |
| 1016 | * Calculate raw osd vector for the given pgid. Return pointer to osd | 1033 | * Calculate raw osd vector for the given pgid. Return pointer to osd |
| @@ -1108,3 +1125,4 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | |||
| 1108 | return osds[i]; | 1125 | return osds[i]; |
| 1109 | return -1; | 1126 | return -1; |
| 1110 | } | 1127 | } |
| 1128 | EXPORT_SYMBOL(ceph_calc_pg_primary); | ||
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c new file mode 100644 index 000000000000..13cb409a7bba --- /dev/null +++ b/net/ceph/pagelist.c | |||
| @@ -0,0 +1,154 @@ | |||
| 1 | |||
| 2 | #include <linux/module.h> | ||
| 3 | #include <linux/gfp.h> | ||
| 4 | #include <linux/pagemap.h> | ||
| 5 | #include <linux/highmem.h> | ||
| 6 | #include <linux/ceph/pagelist.h> | ||
| 7 | |||
| 8 | static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) | ||
| 9 | { | ||
| 10 | if (pl->mapped_tail) { | ||
| 11 | struct page *page = list_entry(pl->head.prev, struct page, lru); | ||
| 12 | kunmap(page); | ||
| 13 | pl->mapped_tail = NULL; | ||
| 14 | } | ||
| 15 | } | ||
| 16 | |||
| 17 | int ceph_pagelist_release(struct ceph_pagelist *pl) | ||
| 18 | { | ||
| 19 | ceph_pagelist_unmap_tail(pl); | ||
| 20 | while (!list_empty(&pl->head)) { | ||
| 21 | struct page *page = list_first_entry(&pl->head, struct page, | ||
| 22 | lru); | ||
| 23 | list_del(&page->lru); | ||
| 24 | __free_page(page); | ||
| 25 | } | ||
| 26 | ceph_pagelist_free_reserve(pl); | ||
| 27 | return 0; | ||
| 28 | } | ||
| 29 | EXPORT_SYMBOL(ceph_pagelist_release); | ||
| 30 | |||
| 31 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | ||
| 32 | { | ||
| 33 | struct page *page; | ||
| 34 | |||
| 35 | if (!pl->num_pages_free) { | ||
| 36 | page = __page_cache_alloc(GFP_NOFS); | ||
| 37 | } else { | ||
| 38 | page = list_first_entry(&pl->free_list, struct page, lru); | ||
| 39 | list_del(&page->lru); | ||
| 40 | --pl->num_pages_free; | ||
| 41 | } | ||
| 42 | if (!page) | ||
| 43 | return -ENOMEM; | ||
| 44 | pl->room += PAGE_SIZE; | ||
| 45 | ceph_pagelist_unmap_tail(pl); | ||
| 46 | list_add_tail(&page->lru, &pl->head); | ||
| 47 | pl->mapped_tail = kmap(page); | ||
| 48 | return 0; | ||
| 49 | } | ||
| 50 | |||
| 51 | int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len) | ||
| 52 | { | ||
| 53 | while (pl->room < len) { | ||
| 54 | size_t bit = pl->room; | ||
| 55 | int ret; | ||
| 56 | |||
| 57 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), | ||
| 58 | buf, bit); | ||
| 59 | pl->length += bit; | ||
| 60 | pl->room -= bit; | ||
| 61 | buf += bit; | ||
| 62 | len -= bit; | ||
| 63 | ret = ceph_pagelist_addpage(pl); | ||
| 64 | if (ret) | ||
| 65 | return ret; | ||
| 66 | } | ||
| 67 | |||
| 68 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); | ||
| 69 | pl->length += len; | ||
| 70 | pl->room -= len; | ||
| 71 | return 0; | ||
| 72 | } | ||
| 73 | EXPORT_SYMBOL(ceph_pagelist_append); | ||
| 74 | |||
| 75 | /** | ||
| 76 | * Allocate enough pages for a pagelist to append the given amount | ||
| 77 | * of data without without allocating. | ||
| 78 | * Returns: 0 on success, -ENOMEM on error. | ||
| 79 | */ | ||
| 80 | int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space) | ||
| 81 | { | ||
| 82 | if (space <= pl->room) | ||
| 83 | return 0; | ||
| 84 | space -= pl->room; | ||
| 85 | space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT; /* conv to num pages */ | ||
| 86 | |||
| 87 | while (space > pl->num_pages_free) { | ||
| 88 | struct page *page = __page_cache_alloc(GFP_NOFS); | ||
| 89 | if (!page) | ||
| 90 | return -ENOMEM; | ||
| 91 | list_add_tail(&page->lru, &pl->free_list); | ||
| 92 | ++pl->num_pages_free; | ||
| 93 | } | ||
| 94 | return 0; | ||
| 95 | } | ||
| 96 | EXPORT_SYMBOL(ceph_pagelist_reserve); | ||
| 97 | |||
| 98 | /** | ||
| 99 | * Free any pages that have been preallocated. | ||
| 100 | */ | ||
| 101 | int ceph_pagelist_free_reserve(struct ceph_pagelist *pl) | ||
| 102 | { | ||
| 103 | while (!list_empty(&pl->free_list)) { | ||
| 104 | struct page *page = list_first_entry(&pl->free_list, | ||
| 105 | struct page, lru); | ||
| 106 | list_del(&page->lru); | ||
| 107 | __free_page(page); | ||
| 108 | --pl->num_pages_free; | ||
| 109 | } | ||
| 110 | BUG_ON(pl->num_pages_free); | ||
| 111 | return 0; | ||
| 112 | } | ||
| 113 | EXPORT_SYMBOL(ceph_pagelist_free_reserve); | ||
| 114 | |||
| 115 | /** | ||
| 116 | * Create a truncation point. | ||
| 117 | */ | ||
| 118 | void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, | ||
| 119 | struct ceph_pagelist_cursor *c) | ||
| 120 | { | ||
| 121 | c->pl = pl; | ||
| 122 | c->page_lru = pl->head.prev; | ||
| 123 | c->room = pl->room; | ||
| 124 | } | ||
| 125 | EXPORT_SYMBOL(ceph_pagelist_set_cursor); | ||
| 126 | |||
| 127 | /** | ||
| 128 | * Truncate a pagelist to the given point. Move extra pages to reserve. | ||
| 129 | * This won't sleep. | ||
| 130 | * Returns: 0 on success, | ||
| 131 | * -EINVAL if the pagelist doesn't match the trunc point pagelist | ||
| 132 | */ | ||
| 133 | int ceph_pagelist_truncate(struct ceph_pagelist *pl, | ||
| 134 | struct ceph_pagelist_cursor *c) | ||
| 135 | { | ||
| 136 | struct page *page; | ||
| 137 | |||
| 138 | if (pl != c->pl) | ||
| 139 | return -EINVAL; | ||
| 140 | ceph_pagelist_unmap_tail(pl); | ||
| 141 | while (pl->head.prev != c->page_lru) { | ||
| 142 | page = list_entry(pl->head.prev, struct page, lru); | ||
| 143 | list_del(&page->lru); /* remove from pagelist */ | ||
| 144 | list_add_tail(&page->lru, &pl->free_list); /* add to reserve */ | ||
| 145 | ++pl->num_pages_free; | ||
| 146 | } | ||
| 147 | pl->room = c->room; | ||
| 148 | if (!list_empty(&pl->head)) { | ||
| 149 | page = list_entry(pl->head.prev, struct page, lru); | ||
| 150 | pl->mapped_tail = kmap(page); | ||
| 151 | } | ||
| 152 | return 0; | ||
| 153 | } | ||
| 154 | EXPORT_SYMBOL(ceph_pagelist_truncate); | ||
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c new file mode 100644 index 000000000000..54caf0687155 --- /dev/null +++ b/net/ceph/pagevec.c | |||
| @@ -0,0 +1,223 @@ | |||
| 1 | #include <linux/ceph/ceph_debug.h> | ||
| 2 | |||
| 3 | #include <linux/module.h> | ||
| 4 | #include <linux/sched.h> | ||
| 5 | #include <linux/slab.h> | ||
| 6 | #include <linux/file.h> | ||
| 7 | #include <linux/namei.h> | ||
| 8 | #include <linux/writeback.h> | ||
| 9 | |||
| 10 | #include <linux/ceph/libceph.h> | ||
| 11 | |||
| 12 | /* | ||
| 13 | * build a vector of user pages | ||
| 14 | */ | ||
| 15 | struct page **ceph_get_direct_page_vector(const char __user *data, | ||
| 16 | int num_pages, | ||
| 17 | loff_t off, size_t len) | ||
| 18 | { | ||
| 19 | struct page **pages; | ||
| 20 | int rc; | ||
| 21 | |||
| 22 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | ||
| 23 | if (!pages) | ||
| 24 | return ERR_PTR(-ENOMEM); | ||
| 25 | |||
| 26 | down_read(¤t->mm->mmap_sem); | ||
| 27 | rc = get_user_pages(current, current->mm, (unsigned long)data, | ||
| 28 | num_pages, 0, 0, pages, NULL); | ||
| 29 | up_read(¤t->mm->mmap_sem); | ||
| 30 | if (rc < 0) | ||
| 31 | goto fail; | ||
| 32 | return pages; | ||
| 33 | |||
| 34 | fail: | ||
| 35 | kfree(pages); | ||
| 36 | return ERR_PTR(rc); | ||
| 37 | } | ||
| 38 | EXPORT_SYMBOL(ceph_get_direct_page_vector); | ||
| 39 | |||
| 40 | void ceph_put_page_vector(struct page **pages, int num_pages) | ||
| 41 | { | ||
| 42 | int i; | ||
| 43 | |||
| 44 | for (i = 0; i < num_pages; i++) | ||
| 45 | put_page(pages[i]); | ||
| 46 | kfree(pages); | ||
| 47 | } | ||
| 48 | EXPORT_SYMBOL(ceph_put_page_vector); | ||
| 49 | |||
| 50 | void ceph_release_page_vector(struct page **pages, int num_pages) | ||
| 51 | { | ||
| 52 | int i; | ||
| 53 | |||
| 54 | for (i = 0; i < num_pages; i++) | ||
| 55 | __free_pages(pages[i], 0); | ||
| 56 | kfree(pages); | ||
| 57 | } | ||
| 58 | EXPORT_SYMBOL(ceph_release_page_vector); | ||
| 59 | |||
| 60 | /* | ||
| 61 | * allocate a vector new pages | ||
| 62 | */ | ||
| 63 | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | ||
| 64 | { | ||
| 65 | struct page **pages; | ||
| 66 | int i; | ||
| 67 | |||
| 68 | pages = kmalloc(sizeof(*pages) * num_pages, flags); | ||
| 69 | if (!pages) | ||
| 70 | return ERR_PTR(-ENOMEM); | ||
| 71 | for (i = 0; i < num_pages; i++) { | ||
| 72 | pages[i] = __page_cache_alloc(flags); | ||
| 73 | if (pages[i] == NULL) { | ||
| 74 | ceph_release_page_vector(pages, i); | ||
| 75 | return ERR_PTR(-ENOMEM); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | return pages; | ||
| 79 | } | ||
| 80 | EXPORT_SYMBOL(ceph_alloc_page_vector); | ||
| 81 | |||
| 82 | /* | ||
| 83 | * copy user data into a page vector | ||
| 84 | */ | ||
| 85 | int ceph_copy_user_to_page_vector(struct page **pages, | ||
| 86 | const char __user *data, | ||
| 87 | loff_t off, size_t len) | ||
| 88 | { | ||
| 89 | int i = 0; | ||
| 90 | int po = off & ~PAGE_CACHE_MASK; | ||
| 91 | int left = len; | ||
| 92 | int l, bad; | ||
| 93 | |||
| 94 | while (left > 0) { | ||
| 95 | l = min_t(int, PAGE_CACHE_SIZE-po, left); | ||
| 96 | bad = copy_from_user(page_address(pages[i]) + po, data, l); | ||
| 97 | if (bad == l) | ||
| 98 | return -EFAULT; | ||
| 99 | data += l - bad; | ||
| 100 | left -= l - bad; | ||
| 101 | po += l - bad; | ||
| 102 | if (po == PAGE_CACHE_SIZE) { | ||
| 103 | po = 0; | ||
| 104 | i++; | ||
| 105 | } | ||
| 106 | } | ||
| 107 | return len; | ||
| 108 | } | ||
| 109 | EXPORT_SYMBOL(ceph_copy_user_to_page_vector); | ||
| 110 | |||
| 111 | int ceph_copy_to_page_vector(struct page **pages, | ||
| 112 | const char *data, | ||
| 113 | loff_t off, size_t len) | ||
| 114 | { | ||
| 115 | int i = 0; | ||
| 116 | size_t po = off & ~PAGE_CACHE_MASK; | ||
| 117 | size_t left = len; | ||
| 118 | size_t l; | ||
| 119 | |||
| 120 | while (left > 0) { | ||
| 121 | l = min_t(size_t, PAGE_CACHE_SIZE-po, left); | ||
| 122 | memcpy(page_address(pages[i]) + po, data, l); | ||
| 123 | data += l; | ||
| 124 | left -= l; | ||
| 125 | po += l; | ||
| 126 | if (po == PAGE_CACHE_SIZE) { | ||
| 127 | po = 0; | ||
| 128 | i++; | ||
| 129 | } | ||
| 130 | } | ||
| 131 | return len; | ||
| 132 | } | ||
| 133 | EXPORT_SYMBOL(ceph_copy_to_page_vector); | ||
| 134 | |||
| 135 | int ceph_copy_from_page_vector(struct page **pages, | ||
| 136 | char *data, | ||
| 137 | loff_t off, size_t len) | ||
| 138 | { | ||
| 139 | int i = 0; | ||
| 140 | size_t po = off & ~PAGE_CACHE_MASK; | ||
| 141 | size_t left = len; | ||
| 142 | size_t l; | ||
| 143 | |||
| 144 | while (left > 0) { | ||
| 145 | l = min_t(size_t, PAGE_CACHE_SIZE-po, left); | ||
| 146 | memcpy(data, page_address(pages[i]) + po, l); | ||
| 147 | data += l; | ||
| 148 | left -= l; | ||
| 149 | po += l; | ||
| 150 | if (po == PAGE_CACHE_SIZE) { | ||
| 151 | po = 0; | ||
| 152 | i++; | ||
| 153 | } | ||
| 154 | } | ||
| 155 | return len; | ||
| 156 | } | ||
| 157 | EXPORT_SYMBOL(ceph_copy_from_page_vector); | ||
| 158 | |||
| 159 | /* | ||
| 160 | * copy user data from a page vector into a user pointer | ||
| 161 | */ | ||
| 162 | int ceph_copy_page_vector_to_user(struct page **pages, | ||
| 163 | char __user *data, | ||
| 164 | loff_t off, size_t len) | ||
| 165 | { | ||
| 166 | int i = 0; | ||
| 167 | int po = off & ~PAGE_CACHE_MASK; | ||
| 168 | int left = len; | ||
| 169 | int l, bad; | ||
| 170 | |||
| 171 | while (left > 0) { | ||
| 172 | l = min_t(int, left, PAGE_CACHE_SIZE-po); | ||
| 173 | bad = copy_to_user(data, page_address(pages[i]) + po, l); | ||
| 174 | if (bad == l) | ||
| 175 | return -EFAULT; | ||
| 176 | data += l - bad; | ||
| 177 | left -= l - bad; | ||
| 178 | if (po) { | ||
| 179 | po += l - bad; | ||
| 180 | if (po == PAGE_CACHE_SIZE) | ||
| 181 | po = 0; | ||
| 182 | } | ||
| 183 | i++; | ||
| 184 | } | ||
| 185 | return len; | ||
| 186 | } | ||
| 187 | EXPORT_SYMBOL(ceph_copy_page_vector_to_user); | ||
| 188 | |||
| 189 | /* | ||
| 190 | * Zero an extent within a page vector. Offset is relative to the | ||
| 191 | * start of the first page. | ||
| 192 | */ | ||
| 193 | void ceph_zero_page_vector_range(int off, int len, struct page **pages) | ||
| 194 | { | ||
| 195 | int i = off >> PAGE_CACHE_SHIFT; | ||
| 196 | |||
| 197 | off &= ~PAGE_CACHE_MASK; | ||
| 198 | |||
| 199 | dout("zero_page_vector_page %u~%u\n", off, len); | ||
| 200 | |||
| 201 | /* leading partial page? */ | ||
| 202 | if (off) { | ||
| 203 | int end = min((int)PAGE_CACHE_SIZE, off + len); | ||
| 204 | dout("zeroing %d %p head from %d\n", i, pages[i], | ||
| 205 | (int)off); | ||
| 206 | zero_user_segment(pages[i], off, end); | ||
| 207 | len -= (end - off); | ||
| 208 | i++; | ||
| 209 | } | ||
| 210 | while (len >= PAGE_CACHE_SIZE) { | ||
| 211 | dout("zeroing %d %p len=%d\n", i, pages[i], len); | ||
| 212 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||
| 213 | len -= PAGE_CACHE_SIZE; | ||
| 214 | i++; | ||
| 215 | } | ||
| 216 | /* trailing partial page? */ | ||
| 217 | if (len) { | ||
| 218 | dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); | ||
| 219 | zero_user_segment(pages[i], 0, len); | ||
| 220 | } | ||
| 221 | } | ||
| 222 | EXPORT_SYMBOL(ceph_zero_page_vector_range); | ||
| 223 | |||
