aboutsummaryrefslogtreecommitdiffstats
path: root/fs/filesystems.c
blob: e3fa77c6ed56dce64e3fe68892022edaa85d0e56 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/*
 *  linux/fs/filesystems.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  table of configured filesystems
 */

#include <linux/syscalls.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h>	/* for 'current' */
#include <asm/uaccess.h>

/*
 * Handling of filesystem drivers list.
 * Rules:
 *	Inclusion to/removals from/scanning of list are protected by spinlock.
 *	During the unload module must call unregister_filesystem().
 *	We can access the fields of list element if:
 *		1) spinlock is held or
 *		2) we hold the reference to the module.
 *	The latter can be guaranteed by call of try_module_get(); if it
 *	returned 0 we must skip the element, otherwise we got the reference.
 *	Once the reference is obtained we can drop the spinlock.
 */

static struct file_system_type *file_systems;
static DEFINE_RWLOCK(file_systems_lock);

/* WARNING: This can be used only if we _already_ own a reference */
void get_filesystem(struct file_system_type *fs)
{
	__module_get(fs->owner);
}

void put_filesystem(struct file_system_type *fs)
{
	module_put(fs->owner);
}

static struct file_system_type **find_filesystem(const char *name)
{
	struct file_system_type **p;
	for (p=&file_systems; *p; p=&(*p)->next)
		if (strcmp((*p)->name,name) == 0)
			break;
	return p;
}

/**
 *	register_filesystem - register a new filesystem
 *	@fs: the file system structure
 *
 *	Adds the file system passed to the list of file systems the kernel
 *	is aware of for mount and other syscalls. Returns 0 on success,
 *	or a negative errno code on an error.
 *
 *	The &struct file_system_type that is passed is linked into the kernel 
 *	structures and must not be freed until the file system has been
 *	unregistered.
 */
 
int register_filesystem(struct file_system_type * fs)
{
	int res = 0;
	struct file_system_type ** p;

	if (fs->next)
		return -EBUSY;
	INIT_LIST_HEAD(&fs->fs_supers);
	write_lock(&file_systems_lock);
	p = find_filesystem(fs->name);
	if (*p)
		res = -EBUSY;
	else
		*p = fs;
	write_unlock(&file_systems_lock);
	return res;
}

EXPORT_SYMBOL(register_filesystem);

/**
 *	unregister_filesystem - unregister a file system
 *	@fs: filesystem to unregister
 *
 *	Remove a file system that was previously successfully registered
 *	with the kernel. An error is returned if the file system is not found.
 *	Zero is returned on a success.
 *	
 *	Once this function has returned the &struct file_system_type structure
 *	may be freed or reused.
 */
 
int unregister_filesystem(struct file_system_type * fs)
{
	struct file_system_type ** tmp;

	write_lock(&file_systems_lock);
	tmp = &file_systems;
	while (*tmp) {
		if (fs == *tmp) {
			*tmp = fs->next;
			fs->next = NULL;
			write_unlock(&file_systems_lock);
			return 0;
		}
		tmp = &(*tmp)->next;
	}
	write_unlock(&file_systems_lock);
	return -EINVAL;
}

EXPORT_SYMBOL(unregister_filesystem);

static int fs_index(const char __user * __name)
{
	struct file_system_type * tmp;
	char * name;
	int err, index;

	name = getname(__name);
	err = PTR_ERR(name);
	if (IS_ERR(name))
		return err;

	err = -EINVAL;
	read_lock(&file_systems_lock);
	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
		if (strcmp(tmp->name,name) == 0) {
			err = index;
			break;
		}
	}
	read_unlock(&file_systems_lock);
	putname(name);
	return err;
}

static int fs_name(unsigned int index, char __user * buf)
{
	struct file_system_type * tmp;
	int len, res;

	read_lock(&file_systems_lock);
	for (tmp = file_systems; tmp; tmp = tmp->next, index--)
		if (index <= 0 && try_module_get(tmp->owner))
			break;
	read_unlock(&file_systems_lock);
	if (!tmp)
		return -EINVAL;

	/* OK, we got the reference, so we can safely block */
	len = strlen(tmp->name) + 1;
	res = copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
	put_filesystem(tmp);
	return res;
}

static int fs_maxindex(void)
{
	struct file_system_type * tmp;
	int index;

	read_lock(&file_systems_lock);
	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
		;
	read_unlock(&file_systems_lock);
	return index;
}

/*
 * Whee.. Weird sysv syscall. 
 */
asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
{
	int retval = -EINVAL;

	switch (option) {
		case 1:
			retval = fs_index((const char __user *) arg1);
			break;

		case 2:
			retval = fs_name(arg1, (char __user *) arg2);
			break;

		case 3:
			retval = fs_maxindex();
			break;
	}
	return retval;
}

int get_filesystem_list(char * buf)
{
	int len = 0;
	struct file_system_type * tmp;

	read_lock(&file_systems_lock);
	tmp = file_systems;
	while (tmp && len < PAGE_SIZE - 80) {
		len += sprintf(buf+len, "%s\t%s\n",
			(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
			tmp->name);
		tmp = tmp->next;
	}
	read_unlock(&file_systems_lock);
	return len;
}

struct file_system_type *get_fs_type(const char *name)
{
	struct file_system_type *fs;

	read_lock(&file_systems_lock);
	fs = *(find_filesystem(name));
	if (fs && !try_module_get(fs->owner))
		fs = NULL;
	read_unlock(&file_systems_lock);
	if (!fs && (request_module("%s", name) == 0)) {
		read_lock(&file_systems_lock);
		fs = *(find_filesystem(name));
		if (fs && !try_module_get(fs->owner))
			fs = NULL;
		read_unlock(&file_systems_lock);
	}
	return fs;
}

EXPORT_SYMBOL(get_fs_type);
> unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, unsigned int ssize) { unsigned int blks; unsigned int first, second; blks = 1; first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize; if (nstruct > first) { second = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / ssize; blks += DIV_ROUND_UP(nstruct - first, second); } return blks; } /** * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters * @mapping: The associated mapping (maybe NULL) * @bd: The gfs2_bufdata to remove * * The ail lock _must_ be held when calling this function * */ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) { bd->bd_tr = NULL; list_del_init(&bd->bd_ail_st_list); list_del_init(&bd->bd_ail_gl_list); atomic_dec(&bd->bd_gl->gl_ail_count); brelse(bd->bd_bh); } /** * gfs2_ail1_start_one - Start I/O on a part of the AIL * @sdp: the filesystem * @wbc: The writeback control structure * @ai: The ail structure * */ static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct writeback_control *wbc, struct gfs2_trans *tr) __releases(&sdp->sd_ail_lock) __acquires(&sdp->sd_ail_lock) { struct gfs2_glock *gl = NULL; struct address_space *mapping; struct gfs2_bufdata *bd, *s; struct buffer_head *bh; list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) { bh = bd->bd_bh; gfs2_assert(sdp, bd->bd_tr == tr); if (!buffer_busy(bh)) { if (!buffer_uptodate(bh)) gfs2_io_error_bh(sdp, bh); list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); continue; } if (!buffer_dirty(bh)) continue; if (gl == bd->bd_gl) continue; gl = bd->bd_gl; list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list); mapping = bh->b_page->mapping; if (!mapping) continue; spin_unlock(&sdp->sd_ail_lock); generic_writepages(mapping, wbc); spin_lock(&sdp->sd_ail_lock); if (wbc->nr_to_write <= 0) break; return 1; } return 0; } /** * gfs2_ail1_flush - start writeback of some ail1 entries * @sdp: The super block * @wbc: The writeback control structure * * Writes back some ail1 entries, according to the limits in the * writeback control structure */ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) { struct list_head *head = &sdp->sd_ail1_list; struct gfs2_trans *tr; struct blk_plug plug; trace_gfs2_ail_flush(sdp, wbc, 1); blk_start_plug(&plug); spin_lock(&sdp->sd_ail_lock); restart: list_for_each_entry_reverse(tr, head, tr_list) { if (wbc->nr_to_write <= 0) break; if (gfs2_ail1_start_one(sdp, wbc, tr)) goto restart; } spin_unlock(&sdp->sd_ail_lock); blk_finish_plug(&plug); trace_gfs2_ail_flush(sdp, wbc, 0); } /** * gfs2_ail1_start - start writeback of all ail1 entries * @sdp: The superblock */ static void gfs2_ail1_start(struct gfs2_sbd *sdp) { struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .nr_to_write = LONG_MAX, .range_start = 0, .range_end = LLONG_MAX, }; return gfs2_ail1_flush(sdp, &wbc); } /** * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced * @sdp: the filesystem * @ai: the AIL entry * */ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { struct gfs2_bufdata *bd, *s; struct buffer_head *bh; list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) { bh = bd->bd_bh; gfs2_assert(sdp, bd->bd_tr == tr); if (buffer_busy(bh)) continue; if (!buffer_uptodate(bh)) gfs2_io_error_bh(sdp, bh); list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); } } /** * gfs2_ail1_empty - Try to empty the ail1 lists * @sdp: The superblock * * Tries to empty the ail1 lists, starting with the oldest first */ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) { struct gfs2_trans *tr, *s; int oldest_tr = 1; int ret; spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { gfs2_ail1_empty_one(sdp, tr); if (list_empty(&tr->tr_ail1_list) && oldest_tr) list_move(&tr->tr_list, &sdp->sd_ail2_list); else oldest_tr = 0; } ret = list_empty(&sdp->sd_ail1_list); spin_unlock(&sdp->sd_ail_lock); return ret; } static void gfs2_ail1_wait(struct gfs2_sbd *sdp) { struct gfs2_trans *tr; struct gfs2_bufdata *bd; struct buffer_head *bh; spin_lock(&sdp->sd_ail_lock); list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) { bh = bd->bd_bh; if (!buffer_locked(bh)) continue; get_bh(bh); spin_unlock(&sdp->sd_ail_lock); wait_on_buffer(bh); brelse(bh); return; } } spin_unlock(&sdp->sd_ail_lock); } /** * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced * @sdp: the filesystem * @ai: the AIL entry * */ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { struct list_head *head = &tr->tr_ail2_list; struct gfs2_bufdata *bd; while (!list_empty(head)) { bd = list_entry(head->prev, struct gfs2_bufdata, bd_ail_st_list); gfs2_assert(sdp, bd->bd_tr == tr); gfs2_remove_from_ail(bd); } } static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) { struct gfs2_trans *tr, *safe; unsigned int old_tail = sdp->sd_log_tail; int wrap = (new_tail < old_tail); int a, b, rm; spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) { a = (old_tail <= tr->tr_first); b = (tr->tr_first < new_tail); rm = (wrap) ? (a || b) : (a && b); if (!rm) continue; gfs2_ail2_empty_one(sdp, tr); list_del(&tr->tr_list); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); kfree(tr); } spin_unlock(&sdp->sd_ail_lock); } /** * gfs2_log_release - Release a given number of log blocks * @sdp: The GFS2 superblock * @blks: The number of blocks * */ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) { atomic_add(blks, &sdp->sd_log_blks_free); trace_gfs2_log_blocks(sdp, blks); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); up_read(&sdp->sd_log_flush_lock); } /** * gfs2_log_reserve - Make a log reservation * @sdp: The GFS2 superblock * @blks: The number of blocks to reserve * * Note that we never give out the last few blocks of the journal. Thats * due to the fact that there is a small number of header blocks * associated with each log flush. The exact number can't be known until * flush time, so we ensure that we have just enough free blocks at all * times to avoid running out during a log flush. * * We no longer flush the log here, instead we wake up logd to do that * for us. To avoid the thundering herd and to ensure that we deal fairly * with queued waiters, we use an exclusive wait. This means that when we * get woken with enough journal space to get our reservation, we need to * wake the next waiter on the list. * * Returns: errno */ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) { int ret = 0; unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize); unsigned wanted = blks + reserved_blks; DEFINE_WAIT(wait); int did_wait = 0; unsigned int free_blocks; if (gfs2_assert_warn(sdp, blks) || gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) return -EINVAL; retry: free_blocks = atomic_read(&sdp->sd_log_blks_free); if (unlikely(free_blocks <= wanted)) { do { prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait, TASK_UNINTERRUPTIBLE); wake_up(&sdp->sd_logd_waitq); did_wait = 1; if (atomic_read(&sdp->sd_log_blks_free) <= wanted) io_schedule(); free_blocks = atomic_read(&sdp->sd_log_blks_free); } while(free_blocks <= wanted); finish_wait(&sdp->sd_log_waitq, &wait); } atomic_inc(&sdp->sd_reserving_log); if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks, free_blocks - blks) != free_blocks) { if (atomic_dec_and_test(&sdp->sd_reserving_log)) wake_up(&sdp->sd_reserving_log_wait); goto retry; } trace_gfs2_log_blocks(sdp, -blks); /* * If we waited, then so might others, wake them up _after_ we get * our share of the log. */ if (unlikely(did_wait)) wake_up(&sdp->sd_log_waitq); down_read(&sdp->sd_log_flush_lock); if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { gfs2_log_release(sdp, blks); ret = -EROFS; } if (atomic_dec_and_test(&sdp->sd_reserving_log)) wake_up(&sdp->sd_reserving_log_wait); return ret; } /** * log_distance - Compute distance between two journal blocks * @sdp: The GFS2 superblock * @newer: The most recent journal block of the pair * @older: The older journal block of the pair * * Compute the distance (in the journal direction) between two * blocks in the journal * * Returns: the distance in blocks */ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer, unsigned int older) { int dist; dist = newer - older; if (dist < 0) dist += sdp->sd_jdesc->jd_blocks; return dist; } /** * calc_reserved - Calculate the number of blocks to reserve when * refunding a transaction's unused buffers. * @sdp: The GFS2 superblock * * This is complex. We need to reserve room for all our currently used * metadata buffers (e.g. normal file I/O rewriting file time stamps) and * all our journaled data buffers for journaled files (e.g. files in the * meta_fs like rindex, or files for which chattr +j was done.) * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush * will count it as free space (sd_log_blks_free) and corruption will follow. * * We can have metadata bufs and jdata bufs in the same journal. So each * type gets its own log header, for which we need to reserve a block. * In fact, each type has the potential for needing more than one header * in cases where we have more buffers than will fit on a journal page. * Metadata journal entries take up half the space of journaled buffer entries. * Thus, metadata entries have buf_limit (502) and journaled buffers have * databuf_limit (251) before they cause a wrap around. * * Also, we need to reserve blocks for revoke journal entries and one for an * overall header for the lot. * * Returns: the number of blocks reserved */ static unsigned int calc_reserved(struct gfs2_sbd *sdp) { unsigned int reserved = 0; unsigned int mbuf; unsigned int dbuf; struct gfs2_trans *tr = sdp->sd_log_tr; if (tr) { mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; reserved = mbuf + dbuf; /* Account for header blocks */ reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp)); reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp)); } if (sdp->sd_log_commited_revoke > 0) reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, sizeof(u64)); /* One for the overall header */ if (reserved) reserved++; return reserved; } static unsigned int current_tail(struct gfs2_sbd *sdp) { struct gfs2_trans *tr;