aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/ext3_jbd.h
blob: 8c43b13a02fe1df033f161ab91d69cdd7a389858 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
/*
 * linux/include/linux/ext3_jbd.h
 *
 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
 *
 * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
 *
 * This file is part of the Linux kernel and is made available under
 * the terms of the GNU General Public License, version 2, or at your
 * option, any later version, incorporated herein by reference.
 *
 * Ext3-specific journaling extensions.
 */

#ifndef _LINUX_EXT3_JBD_H
#define _LINUX_EXT3_JBD_H

#include <linux/fs.h>
#include <linux/jbd.h>
#include <linux/ext3_fs.h>

#define EXT3_JOURNAL(inode)	(EXT3_SB((inode)->i_sb)->s_journal)

/* Define the number of blocks we need to account to a transaction to
 * modify one block of data.
 *
 * We may have to touch one inode, one bitmap buffer, up to three
 * indirection blocks, the group and superblock summaries, and the data
 * block to complete the transaction.  */

#define EXT3_SINGLEDATA_TRANS_BLOCKS	8U

/* Extended attribute operations touch at most two data buffers,
 * two bitmap buffers, and two group summaries, in addition to the inode
 * and the superblock, which are already accounted for. */

#define EXT3_XATTR_TRANS_BLOCKS		6U

/* Define the minimum size for a transaction which modifies data.  This
 * needs to take into account the fact that we may end up modifying two
 * quota files too (one for the group, one for the user quota).  The
 * superblock only gets updated once, of course, so don't bother
 * counting that again for the quota updates. */

#define EXT3_DATA_TRANS_BLOCKS(sb)	(EXT3_SINGLEDATA_TRANS_BLOCKS + \
					 EXT3_XATTR_TRANS_BLOCKS - 2 + \
					 2*EXT3_QUOTA_TRANS_BLOCKS(sb))

/* Delete operations potentially hit one directory's namespace plus an
 * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
 * generous.  We can grow the delete transaction later if necessary. */

#define EXT3_DELETE_TRANS_BLOCKS(sb)	(2 * EXT3_DATA_TRANS_BLOCKS(sb) + 64)

/* Define an arbitrary limit for the amount of data we will anticipate
 * writing to any given transaction.  For unbounded transactions such as
 * write(2) and truncate(2) we can write more than this, but we always
 * start off at the maximum transaction size and grow the transaction
 * optimistically as we go. */

#define EXT3_MAX_TRANS_DATA		64U

/* We break up a large truncate or write transaction once the handle's
 * buffer credits gets this low, we need either to extend the
 * transaction or to start a new one.  Reserve enough space here for
 * inode, bitmap, superblock, group and indirection updates for at least
 * one block, plus two quota updates.  Quota allocations are not
 * needed. */

#define EXT3_RESERVE_TRANS_BLOCKS	12U

#define EXT3_INDEX_EXTRA_TRANS_BLOCKS	8

#ifdef CONFIG_QUOTA
/* Amount of blocks needed for quota update - we know that the structure was
 * allocated so we need to update only inode+data */
#define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
/* Amount of blocks needed for quota insert/delete - we do some block writes
 * but inode, sb and group updates are done only once */
#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
		(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
		(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
#else
#define EXT3_QUOTA_TRANS_BLOCKS(sb) 0
#define EXT3_QUOTA_INIT_BLOCKS(sb) 0
#define EXT3_QUOTA_DEL_BLOCKS(sb) 0
#endif

int
ext3_mark_iloc_dirty(handle_t *handle,
		     struct inode *inode,
		     struct ext3_iloc *iloc);

/*
 * On success, We end up with an outstanding reference count against
 * iloc->bh.  This _must_ be cleaned up later.
 */

int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
			struct ext3_iloc *iloc);

int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode);

/*
 * Wrapper functions with which ext3 calls into JBD.  The intent here is
 * to allow these to be turned into appropriate stubs so ext3 can control
 * ext2 filesystems, so ext2+ext3 systems only nee one fs.  This work hasn't
 * been done yet.
 */

static inline void ext3_journal_release_buffer(handle_t *handle,
						struct buffer_head *bh)
{
	journal_release_buffer(handle, bh);
}

void ext3_journal_abort_handle(const char *caller, const char *err_fn,
		struct buffer_head *bh, handle_t *handle, int err);

int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
				struct buffer_head *bh);

int __ext3_journal_get_write_access(const char *where, handle_t *handle,
				struct buffer_head *bh);

int __ext3_journal_forget(const char *where, handle_t *handle,
				struct buffer_head *bh);

int __ext3_journal_revoke(const char *where, handle_t *handle,
				unsigned long blocknr, struct buffer_head *bh);

int __ext3_journal_get_create_access(const char *where,
				handle_t *handle, struct buffer_head *bh);

int __ext3_journal_dirty_metadata(const char *where,
				handle_t *handle, struct buffer_head *bh);

#define ext3_journal_get_undo_access(handle, bh) \
	__ext3_journal_get_undo_access(__FUNCTION__, (handle), (bh))
#define ext3_journal_get_write_access(handle, bh) \
	__ext3_journal_get_write_access(__FUNCTION__, (handle), (bh))
#define ext3_journal_revoke(handle, blocknr, bh) \
	__ext3_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
#define ext3_journal_get_create_access(handle, bh) \
	__ext3_journal_get_create_access(__FUNCTION__, (handle), (bh))
#define ext3_journal_dirty_metadata(handle, bh) \
	__ext3_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
#define ext3_journal_forget(handle, bh) \
	__ext3_journal_forget(__FUNCTION__, (handle), (bh))

int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh);

handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks);
int __ext3_journal_stop(const char *where, handle_t *handle);

static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
{
	return ext3_journal_start_sb(inode->i_sb, nblocks);
}

#define ext3_journal_stop(handle) \
	__ext3_journal_stop(__FUNCTION__, (handle))

static inline handle_t *ext3_journal_current_handle(void)
{
	return journal_current_handle();
}

static inline int ext3_journal_extend(handle_t *handle, int nblocks)
{
	return journal_extend(handle, nblocks);
}

static inline int ext3_journal_restart(handle_t *handle, int nblocks)
{
	return journal_restart(handle, nblocks);
}

static inline int ext3_journal_blocks_per_page(struct inode *inode)
{
	return journal_blocks_per_page(inode);
}

static inline int ext3_journal_force_commit(journal_t *journal)
{
	return journal_force_commit(journal);
}

/* super.c */
int ext3_force_commit(struct super_block *sb);

static inline int ext3_should_journal_data(struct inode *inode)
{
	if (!S_ISREG(inode->i_mode))
		return 1;
	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
		return 1;
	if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
		return 1;
	return 0;
}

static inline int ext3_should_order_data(struct inode *inode)
{
	if (!S_ISREG(inode->i_mode))
		return 0;
	if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
		return 0;
	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
		return 1;
	return 0;
}

static inline int ext3_should_writeback_data(struct inode *inode)
{
	if (!S_ISREG(inode->i_mode))
		return 0;
	if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
		return 0;
	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
		return 1;
	return 0;
}

#endif	/* _LINUX_EXT3_JBD_H */
"hl kwd">MINOR(dev)); return buffer; } EXPORT_SYMBOL(__bdevname); static struct parsed_partitions * check_partition(struct gendisk *hd, struct block_device *bdev) { struct parsed_partitions *state; int i, res, err; state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); if (!state) return NULL; state->pp_buf = (char *)__get_free_page(GFP_KERNEL); if (!state->pp_buf) { kfree(state); return NULL; } state->pp_buf[0] = '\0'; state->bdev = bdev; disk_name(hd, 0, state->name); snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name); if (isdigit(state->name[strlen(state->name)-1])) sprintf(state->name, "p"); state->limit = disk_max_parts(hd); i = res = err = 0; while (!res && check_part[i]) { memset(&state->parts, 0, sizeof(state->parts)); res = check_part[i++](state); if (res < 0) { /* We have hit an I/O error which we don't report now. * But record it, and let the others do their job. */ err = res; res = 0; } } if (res > 0) { printk(KERN_INFO "%s", state->pp_buf); free_page((unsigned long)state->pp_buf); return state; } if (state->access_beyond_eod) err = -ENOSPC; if (err) /* The partition is unrecognized. So report I/O errors if there were any */ res = err; if (!res) strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE); else if (warn_no_part) strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE); printk(KERN_INFO "%s", state->pp_buf); free_page((unsigned long)state->pp_buf); kfree(state); return ERR_PTR(res); } static ssize_t part_partition_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%d\n", p->partno); } static ssize_t part_start_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); } ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); } ssize_t part_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%d\n", p->policy ? 1 : 0); } ssize_t part_alignment_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); } ssize_t part_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%u\n", p->discard_alignment); } ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); int cpu; cpu = part_stat_lock(); part_round_stats(cpu, p); part_stat_unlock(); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " "%8u %8u %8u" "\n", part_stat_read(p, ios[READ]), part_stat_read(p, merges[READ]), (unsigned long long)part_stat_read(p, sectors[READ]), jiffies_to_msecs(part_stat_read(p, ticks[READ])), part_stat_read(p, ios[WRITE]), part_stat_read(p, merges[WRITE]), (unsigned long long)part_stat_read(p, sectors[WRITE]), jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), part_in_flight(p), jiffies_to_msecs(part_stat_read(p, io_ticks)), jiffies_to_msecs(part_stat_read(p, time_in_queue))); } ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]); } #ifdef CONFIG_FAIL_MAKE_REQUEST ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%d\n", p->make_it_fail); } ssize_t part_fail_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hd_struct *p = dev_to_part(dev); int i; if (count > 0 && sscanf(buf, "%d", &i) > 0) p->make_it_fail = (i == 0) ? 0 : 1; return count; } #endif static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL); static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); #endif static struct attribute *part_attrs[] = { &dev_attr_partition.attr, &dev_attr_start.attr, &dev_attr_size.attr, &dev_attr_ro.attr, &dev_attr_alignment_offset.attr, &dev_attr_discard_alignment.attr, &dev_attr_stat.attr, &dev_attr_inflight.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif NULL }; static struct attribute_group part_attr_group = { .attrs = part_attrs, }; static const struct attribute_group *part_attr_groups[] = { &part_attr_group, #ifdef CONFIG_BLK_DEV_IO_TRACE &blk_trace_attr_group, #endif NULL }; static void part_release(struct device *dev) { struct hd_struct *p = dev_to_part(dev); free_part_stats(p); free_part_info(p); kfree(p); } struct device_type part_type = { .name = "partition", .groups = part_attr_groups, .release = part_release, }; static void delete_partition_rcu_cb(struct rcu_head *head) { struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); part->start_sect = 0; part->nr_sects = 0; part_stat_set_all(part, 0); put_device(part_to_dev(part)); } void __delete_partition(struct hd_struct *part) { call_rcu(&part->rcu_head, delete_partition_rcu_cb); } void delete_partition(struct gendisk *disk, int partno) { struct disk_part_tbl *ptbl = disk->part_tbl; struct hd_struct *part; if (partno >= ptbl->len) return; part = ptbl->part[partno]; if (!part) return; blk_free_devt(part_devt(part)); rcu_assign_pointer(ptbl->part[partno], NULL); rcu_assign_pointer(ptbl->last_lookup, NULL); kobject_put(part->holder_dir); device_del(part_to_dev(part)); hd_struct_put(part); } static ssize_t whole_disk_show(struct device *dev, struct device_attribute *attr, char *buf) { return 0; } static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, whole_disk_show, NULL); struct hd_struct *add_partition(struct gendisk *disk, int partno, sector_t start, sector_t len, int flags, struct partition_meta_info *info) { struct hd_struct *p; dev_t devt = MKDEV(0, 0); struct device *ddev = disk_to_dev(disk); struct device *pdev; struct disk_part_tbl *ptbl; const char *dname; int err; err = disk_expand_part_tbl(disk, partno); if (err) return ERR_PTR(err); ptbl = disk->part_tbl; if (ptbl->part[partno]) return ERR_PTR(-EBUSY); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return ERR_PTR(-EBUSY); if (!init_part_stats(p)) { err = -ENOMEM; goto out_free; } pdev = part_to_dev(p); p->start_sect = start; p->alignment_offset = queue_limit_alignment_offset(&disk->queue->limits, start); p->discard_alignment = queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno; p->policy = get_disk_ro(disk); if (info) { struct partition_meta_info *pinfo = alloc_part_info(disk); if (!pinfo) goto out_free_stats; memcpy(pinfo, info, sizeof(*info)); p->info = pinfo; } dname = dev_name(ddev); if (isdigit(dname[strlen(dname) - 1])) dev_set_name(pdev, "%sp%d", dname, partno); else dev_set_name(pdev, "%s%d", dname, partno); device_initialize(pdev); pdev->class = &block_class; pdev->type = &part_type; pdev->parent = ddev; err = blk_alloc_devt(p, &devt); if (err) goto out_free_info; pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ dev_set_uevent_suppress(pdev, 1); err = device_add(pdev); if (err) goto out_put; err = -ENOMEM; p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); if (!p->holder_dir) goto out_del; dev_set_uevent_suppress(pdev, 0); if (flags & ADDPART_FLAG_WHOLEDISK) { err = device_create_file(pdev, &dev_attr_whole_disk); if (err) goto out_del; } /* everything is up and running, commence */ rcu_assign_pointer(ptbl->part[partno], p); /* suppress uevent if the disk supresses it */ if (!dev_get_uevent_suppress(ddev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); hd_ref_init(p); return p; out_free_info: free_part_info(p); out_free_stats: free_part_stats(p); out_free: kfree(p); return ERR_PTR(err); out_del: kobject_put(p->holder_dir); device_del(pdev); out_put: put_device(pdev); blk_free_devt(devt); return ERR_PTR(err); } static bool disk_unlock_native_capacity(struct gendisk *disk) { const struct block_device_operations *bdops = disk->fops; if (bdops->unlock_native_capacity && !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { printk(KERN_CONT "enabling native capacity\n"); bdops->unlock_native_capacity(disk); disk->flags |= GENHD_FL_NATIVE_CAPACITY; return true; } else { printk(KERN_CONT "truncated\n"); return false; } } int rescan_partitions(struct gendisk *disk, struct block_device *bdev) { struct parsed_partitions *state = NULL; struct disk_part_iter piter; struct hd_struct *part; int p, highest, res; rescan: if (state && !IS_ERR(state)) { kfree(state); state = NULL; } if (bdev->bd_part_count) return -EBUSY; res = invalidate_partition(disk, 0); if (res) return res; disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) delete_partition(disk, part->partno); disk_part_iter_exit(&piter); if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); check_disk_size_change(disk, bdev); bdev->bd_invalidated = 0; if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) return 0; if (IS_ERR(state)) { /* * I/O error reading the partition table. If any * partition code tried to read beyond EOD, retry * after unlocking native capacity. */ if (PTR_ERR(state) == -ENOSPC) { printk(KERN_WARNING "%s: partition table beyond EOD, ", disk->disk_name); if (disk_unlock_native_capacity(disk)) goto rescan; } return -EIO; } /* * If any partition code tried to read beyond EOD, try * unlocking native capacity even if partition table is * sucessfully read as we could be missing some partitions. */ if (state->access_beyond_eod) { printk(KERN_WARNING "%s: partition table partially beyond EOD, ", disk->disk_name); if (disk_unlock_native_capacity(disk)) goto rescan; } /* tell userspace that the media / partition table may have changed */ kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); /* Detect the highest partition number and preallocate * disk->part_tbl. This is an optimization and not strictly * necessary. */ for (p = 1, highest = 0; p < state->limit; p++) if (state->parts[p].size) highest = p; disk_expand_part_tbl(disk, highest); /* add partitions */ for (p = 1; p < state->limit; p++) { sector_t size, from; struct partition_meta_info *info = NULL; size = state->parts[p].size; if (!size) continue; from = state->parts[p].from; if (from >= get_capacity(disk)) { printk(KERN_WARNING "%s: p%d start %llu is beyond EOD, ", disk->disk_name, p, (unsigned long long) from); if (disk_unlock_native_capacity(disk))