aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/gfs2-glocks.txt119
-rw-r--r--Documentation/filesystems/gfs2.txt9
-rw-r--r--fs/gfs2/acl.c12
-rw-r--r--fs/gfs2/aops.c18
-rw-r--r--fs/gfs2/bmap.c10
-rw-r--r--fs/gfs2/file.c12
-rw-r--r--fs/gfs2/glops.c6
-rw-r--r--fs/gfs2/incore.h26
-rw-r--r--fs/gfs2/inode.h3
-rw-r--r--fs/gfs2/log.c103
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c520
-rw-r--r--fs/gfs2/lops.h14
-rw-r--r--fs/gfs2/main.c26
-rw-r--r--fs/gfs2/meta_io.c28
-rw-r--r--fs/gfs2/meta_io.h4
-rw-r--r--fs/gfs2/ops_fstype.c1
-rw-r--r--fs/gfs2/quota.c6
-rw-r--r--fs/gfs2/rgrp.c102
-rw-r--r--fs/gfs2/trace_gfs2.h16
-rw-r--r--fs/gfs2/trans.c44
-rw-r--r--fs/gfs2/util.c3
-rw-r--r--fs/gfs2/util.h3
23 files changed, 588 insertions, 499 deletions
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt
index 0494f78d87e4..fcc79957be63 100644
--- a/Documentation/filesystems/gfs2-glocks.txt
+++ b/Documentation/filesystems/gfs2-glocks.txt
@@ -61,7 +61,9 @@ go_unlock | Called on the final local unlock of a lock
61go_dump | Called to print content of object for debugfs file, or on 61go_dump | Called to print content of object for debugfs file, or on
62 | error to dump glock to the log. 62 | error to dump glock to the log.
63go_type | The type of the glock, LM_TYPE_..... 63go_type | The type of the glock, LM_TYPE_.....
64go_min_hold_time | The minimum hold time 64go_callback | Called if the DLM sends a callback to drop this lock
65go_flags | GLOF_ASPACE is set, if the glock has an address space
66 | associated with it
65 67
66The minimum hold time for each lock is the time after a remote lock 68The minimum hold time for each lock is the time after a remote lock
67grant for which we ignore remote demote requests. This is in order to 69grant for which we ignore remote demote requests. This is in order to
@@ -89,6 +91,7 @@ go_demote_ok | Sometimes | Yes
89go_lock | Yes | No 91go_lock | Yes | No
90go_unlock | Yes | No 92go_unlock | Yes | No
91go_dump | Sometimes | Yes 93go_dump | Sometimes | Yes
94go_callback | Sometimes (N/A) | Yes
92 95
93N.B. Operations must not drop either the bit lock or the spinlock 96N.B. Operations must not drop either the bit lock or the spinlock
94if its held on entry. go_dump and do_demote_ok must never block. 97if its held on entry. go_dump and do_demote_ok must never block.
@@ -111,4 +114,118 @@ itself (locking order as above), and the other, known as the iopen
111glock is used in conjunction with the i_nlink field in the inode to 114glock is used in conjunction with the i_nlink field in the inode to
112determine the lifetime of the inode in question. Locking of inodes 115determine the lifetime of the inode in question. Locking of inodes
113is on a per-inode basis. Locking of rgrps is on a per rgrp basis. 116is on a per-inode basis. Locking of rgrps is on a per rgrp basis.
117In general we prefer to lock local locks prior to cluster locks.
118
119 Glock Statistics
120 ------------------
121
122The stats are divided into two sets: those relating to the
123super block and those relating to an individual glock. The
124super block stats are done on a per cpu basis in order to
125try and reduce the overhead of gathering them. They are also
126further divided by glock type. All timings are in nanoseconds.
127
128In the case of both the super block and glock statistics,
129the same information is gathered in each case. The super
130block timing statistics are used to provide default values for
131the glock timing statistics, so that newly created glocks
132should have, as far as possible, a sensible starting point.
133The per-glock counters are initialised to zero when the
134glock is created. The per-glock statistics are lost when
135the glock is ejected from memory.
136
137The statistics are divided into three pairs of mean and
138variance, plus two counters. The mean/variance pairs are
139smoothed exponential estimates and the algorithm used is
140one which will be very familiar to those used to calculation
141of round trip times in network code. See "TCP/IP Illustrated,
142Volume 1", W. Richard Stevens, sect 21.3, "Round-Trip Time Measurement",
143p. 299 and onwards. Also, Volume 2, Sect. 25.10, p. 838 and onwards.
144Unlike the TCP/IP Illustrated case, the mean and variance are
145not scaled, but are in units of integer nanoseconds.
146
147The three pairs of mean/variance measure the following
148things:
149
150 1. DLM lock time (non-blocking requests)
151 2. DLM lock time (blocking requests)
152 3. Inter-request time (again to the DLM)
153
154A non-blocking request is one which will complete right
155away, whatever the state of the DLM lock in question. That
156currently means any requests when (a) the current state of
157the lock is exclusive, i.e. a lock demotion (b) the requested
158state is either null or unlocked (again, a demotion) or (c) the
159"try lock" flag is set. A blocking request covers all the other
160lock requests.
161
162There are two counters. The first is there primarily to show
163how many lock requests have been made, and thus how much data
164has gone into the mean/variance calculations. The other counter
165is counting queuing of holders at the top layer of the glock
166code. Hopefully that number will be a lot larger than the number
167of dlm lock requests issued.
168
169So why gather these statistics? There are several reasons
170we'd like to get a better idea of these timings:
171
1721. To be able to better set the glock "min hold time"
1732. To spot performance issues more easily
1743. To improve the algorithm for selecting resource groups for
175allocation (to base it on lock wait time, rather than blindly
176using a "try lock")
177
178Due to the smoothing action of the updates, a step change in
179some input quantity being sampled will only fully be taken
180into account after 8 samples (or 4 for the variance) and this
181needs to be carefully considered when interpreting the
182results.
183
184Knowing both the time it takes a lock request to complete and
185the average time between lock requests for a glock means we
186can compute the total percentage of the time for which the
187node is able to use a glock vs. time that the rest of the
188cluster has its share. That will be very useful when setting
189the lock min hold time.
190
191Great care has been taken to ensure that we
192measure exactly the quantities that we want, as accurately
193as possible. There are always inaccuracies in any
194measuring system, but I hope this is as accurate as we
195can reasonably make it.
196
197Per sb stats can be found here:
198/sys/kernel/debug/gfs2/<fsname>/sbstats
199Per glock stats can be found here:
200/sys/kernel/debug/gfs2/<fsname>/glstats
201
202Assuming that debugfs is mounted on /sys/kernel/debug and also
203that <fsname> is replaced with the name of the gfs2 filesystem
204in question.
205
206The abbreviations used in the output as are follows:
207
208srtt - Smoothed round trip time for non-blocking dlm requests
209srttvar - Variance estimate for srtt
210srttb - Smoothed round trip time for (potentially) blocking dlm requests
211srttvarb - Variance estimate for srttb
212sirt - Smoothed inter-request time (for dlm requests)
213sirtvar - Variance estimate for sirt
214dlm - Number of dlm requests made (dcnt in glstats file)
215queue - Number of glock requests queued (qcnt in glstats file)
216
217The sbstats file contains a set of these stats for each glock type (so 8 lines
218for each type) and for each cpu (one column per cpu). The glstats file contains
219a set of these stats for each glock in a similar format to the glocks file, but
220using the format mean/variance for each of the timing stats.
221
222The gfs2_glock_lock_time tracepoint prints out the current values of the stats
223for the glock in question, along with some addition information on each dlm
224reply that is received:
225
226status - The status of the dlm request
227flags - The dlm request flags
228tdiff - The time taken by this specific request
229(remaining fields as per above list)
230
114 231
diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt
index 4cda926628aa..cc4f2306609e 100644
--- a/Documentation/filesystems/gfs2.txt
+++ b/Documentation/filesystems/gfs2.txt
@@ -1,7 +1,7 @@
1Global File System 1Global File System
2------------------ 2------------------
3 3
4http://sources.redhat.com/cluster/wiki/ 4https://fedorahosted.org/cluster/wiki/HomePage
5 5
6GFS is a cluster file system. It allows a cluster of computers to 6GFS is a cluster file system. It allows a cluster of computers to
7simultaneously use a block device that is shared between them (with FC, 7simultaneously use a block device that is shared between them (with FC,
@@ -30,7 +30,8 @@ needed, simply:
30 30
31If you are using Fedora, you need to install the gfs2-utils package 31If you are using Fedora, you need to install the gfs2-utils package
32and, for lock_dlm, you will also need to install the cman package 32and, for lock_dlm, you will also need to install the cman package
33and write a cluster.conf as per the documentation. 33and write a cluster.conf as per the documentation. For F17 and above
34cman has been replaced by the dlm package.
34 35
35GFS2 is not on-disk compatible with previous versions of GFS, but it 36GFS2 is not on-disk compatible with previous versions of GFS, but it
36is pretty close. 37is pretty close.
@@ -39,8 +40,6 @@ The following man pages can be found at the URL above:
39 fsck.gfs2 to repair a filesystem 40 fsck.gfs2 to repair a filesystem
40 gfs2_grow to expand a filesystem online 41 gfs2_grow to expand a filesystem online
41 gfs2_jadd to add journals to a filesystem online 42 gfs2_jadd to add journals to a filesystem online
42 gfs2_tool to manipulate, examine and tune a filesystem 43 tunegfs2 to manipulate, examine and tune a filesystem
43 gfs2_quota to examine and change quota values in a filesystem
44 gfs2_convert to convert a gfs filesystem to gfs2 in-place 44 gfs2_convert to convert a gfs filesystem to gfs2 in-place
45 mount.gfs2 to help mount(8) mount a filesystem
46 mkfs.gfs2 to make a filesystem 45 mkfs.gfs2 to make a filesystem
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 230eb0f005b6..bd4a5892c93c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -73,12 +73,8 @@ static int gfs2_set_mode(struct inode *inode, umode_t mode)
73 int error = 0; 73 int error = 0;
74 74
75 if (mode != inode->i_mode) { 75 if (mode != inode->i_mode) {
76 struct iattr iattr; 76 inode->i_mode = mode;
77 77 mark_inode_dirty(inode);
78 iattr.ia_valid = ATTR_MODE;
79 iattr.ia_mode = mode;
80
81 error = gfs2_setattr_simple(inode, &iattr);
82 } 78 }
83 79
84 return error; 80 return error;
@@ -126,9 +122,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode)
126 return PTR_ERR(acl); 122 return PTR_ERR(acl);
127 if (!acl) { 123 if (!acl) {
128 mode &= ~current_umask(); 124 mode &= ~current_umask();
129 if (mode != inode->i_mode) 125 return gfs2_set_mode(inode, mode);
130 error = gfs2_set_mode(inode, mode);
131 return error;
132 } 126 }
133 127
134 if (S_ISDIR(inode->i_mode)) { 128 if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 9b2ff0e851b1..e80a464850c8 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -36,8 +36,8 @@
36#include "glops.h" 36#include "glops.h"
37 37
38 38
39void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, 39static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
40 unsigned int from, unsigned int to) 40 unsigned int from, unsigned int to)
41{ 41{
42 struct buffer_head *head = page_buffers(page); 42 struct buffer_head *head = page_buffers(page);
43 unsigned int bsize = head->b_size; 43 unsigned int bsize = head->b_size;
@@ -517,15 +517,14 @@ out:
517/** 517/**
518 * gfs2_internal_read - read an internal file 518 * gfs2_internal_read - read an internal file
519 * @ip: The gfs2 inode 519 * @ip: The gfs2 inode
520 * @ra_state: The readahead state (or NULL for no readahead)
521 * @buf: The buffer to fill 520 * @buf: The buffer to fill
522 * @pos: The file position 521 * @pos: The file position
523 * @size: The amount to read 522 * @size: The amount to read
524 * 523 *
525 */ 524 */
526 525
527int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, 526int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
528 char *buf, loff_t *pos, unsigned size) 527 unsigned size)
529{ 528{
530 struct address_space *mapping = ip->i_inode.i_mapping; 529 struct address_space *mapping = ip->i_inode.i_mapping;
531 unsigned long index = *pos / PAGE_CACHE_SIZE; 530 unsigned long index = *pos / PAGE_CACHE_SIZE;
@@ -943,8 +942,8 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
943 clear_buffer_dirty(bh); 942 clear_buffer_dirty(bh);
944 bd = bh->b_private; 943 bd = bh->b_private;
945 if (bd) { 944 if (bd) {
946 if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh)) 945 if (!list_empty(&bd->bd_list) && !buffer_pinned(bh))
947 list_del_init(&bd->bd_le.le_list); 946 list_del_init(&bd->bd_list);
948 else 947 else
949 gfs2_remove_from_journal(bh, current->journal_info, 0); 948 gfs2_remove_from_journal(bh, current->journal_info, 0);
950 } 949 }
@@ -1084,10 +1083,9 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1084 bd = bh->b_private; 1083 bd = bh->b_private;
1085 if (bd) { 1084 if (bd) {
1086 gfs2_assert_warn(sdp, bd->bd_bh == bh); 1085 gfs2_assert_warn(sdp, bd->bd_bh == bh);
1087 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr)); 1086 if (!list_empty(&bd->bd_list)) {
1088 if (!list_empty(&bd->bd_le.le_list)) {
1089 if (!buffer_pinned(bh)) 1087 if (!buffer_pinned(bh))
1090 list_del_init(&bd->bd_le.le_list); 1088 list_del_init(&bd->bd_list);
1091 else 1089 else
1092 bd = NULL; 1090 bd = NULL;
1093 } 1091 }
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 03c04febe26f..dab54099dd98 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -324,7 +324,7 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
324 if (!dblock) 324 if (!dblock)
325 return x + 1; 325 return x + 1;
326 326
327 ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, 0, &mp->mp_bh[x+1]); 327 ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, &mp->mp_bh[x+1]);
328 if (ret) 328 if (ret)
329 return ret; 329 return ret;
330 } 330 }
@@ -882,7 +882,7 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
882 top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; 882 top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
883 bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; 883 bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
884 } else { 884 } else {
885 error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh); 885 error = gfs2_meta_indirect_buffer(ip, height, block, &bh);
886 if (error) 886 if (error)
887 return error; 887 return error;
888 888
@@ -1169,6 +1169,7 @@ static int do_grow(struct inode *inode, u64 size)
1169 struct buffer_head *dibh; 1169 struct buffer_head *dibh;
1170 struct gfs2_qadata *qa = NULL; 1170 struct gfs2_qadata *qa = NULL;
1171 int error; 1171 int error;
1172 int unstuff = 0;
1172 1173
1173 if (gfs2_is_stuffed(ip) && 1174 if (gfs2_is_stuffed(ip) &&
1174 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { 1175 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
@@ -1183,13 +1184,14 @@ static int do_grow(struct inode *inode, u64 size)
1183 error = gfs2_inplace_reserve(ip, 1); 1184 error = gfs2_inplace_reserve(ip, 1);
1184 if (error) 1185 if (error)
1185 goto do_grow_qunlock; 1186 goto do_grow_qunlock;
1187 unstuff = 1;
1186 } 1188 }
1187 1189
1188 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); 1190 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0);
1189 if (error) 1191 if (error)
1190 goto do_grow_release; 1192 goto do_grow_release;
1191 1193
1192 if (qa) { 1194 if (unstuff) {
1193 error = gfs2_unstuff_dinode(ip, NULL); 1195 error = gfs2_unstuff_dinode(ip, NULL);
1194 if (error) 1196 if (error)
1195 goto do_end_trans; 1197 goto do_end_trans;
@@ -1208,7 +1210,7 @@ static int do_grow(struct inode *inode, u64 size)
1208do_end_trans: 1210do_end_trans:
1209 gfs2_trans_end(sdp); 1211 gfs2_trans_end(sdp);
1210do_grow_release: 1212do_grow_release:
1211 if (qa) { 1213 if (unstuff) {
1212 gfs2_inplace_release(ip); 1214 gfs2_inplace_release(ip);
1213do_grow_qunlock: 1215do_grow_qunlock:
1214 gfs2_quota_unlock(ip); 1216 gfs2_quota_unlock(ip);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index a3d2c9ee8d66..31b199f6efc1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -558,14 +558,14 @@ fail:
558} 558}
559 559
560/** 560/**
561 * gfs2_close - called to close a struct file 561 * gfs2_release - called to close a struct file
562 * @inode: the inode the struct file belongs to 562 * @inode: the inode the struct file belongs to
563 * @file: the struct file being closed 563 * @file: the struct file being closed
564 * 564 *
565 * Returns: errno 565 * Returns: errno
566 */ 566 */
567 567
568static int gfs2_close(struct inode *inode, struct file *file) 568static int gfs2_release(struct inode *inode, struct file *file)
569{ 569{
570 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 570 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
571 struct gfs2_file *fp; 571 struct gfs2_file *fp;
@@ -1005,7 +1005,7 @@ const struct file_operations gfs2_file_fops = {
1005 .unlocked_ioctl = gfs2_ioctl, 1005 .unlocked_ioctl = gfs2_ioctl,
1006 .mmap = gfs2_mmap, 1006 .mmap = gfs2_mmap,
1007 .open = gfs2_open, 1007 .open = gfs2_open,
1008 .release = gfs2_close, 1008 .release = gfs2_release,
1009 .fsync = gfs2_fsync, 1009 .fsync = gfs2_fsync,
1010 .lock = gfs2_lock, 1010 .lock = gfs2_lock,
1011 .flock = gfs2_flock, 1011 .flock = gfs2_flock,
@@ -1019,7 +1019,7 @@ const struct file_operations gfs2_dir_fops = {
1019 .readdir = gfs2_readdir, 1019 .readdir = gfs2_readdir,
1020 .unlocked_ioctl = gfs2_ioctl, 1020 .unlocked_ioctl = gfs2_ioctl,
1021 .open = gfs2_open, 1021 .open = gfs2_open,
1022 .release = gfs2_close, 1022 .release = gfs2_release,
1023 .fsync = gfs2_fsync, 1023 .fsync = gfs2_fsync,
1024 .lock = gfs2_lock, 1024 .lock = gfs2_lock,
1025 .flock = gfs2_flock, 1025 .flock = gfs2_flock,
@@ -1037,7 +1037,7 @@ const struct file_operations gfs2_file_fops_nolock = {
1037 .unlocked_ioctl = gfs2_ioctl, 1037 .unlocked_ioctl = gfs2_ioctl,
1038 .mmap = gfs2_mmap, 1038 .mmap = gfs2_mmap,
1039 .open = gfs2_open, 1039 .open = gfs2_open,
1040 .release = gfs2_close, 1040 .release = gfs2_release,
1041 .fsync = gfs2_fsync, 1041 .fsync = gfs2_fsync,
1042 .splice_read = generic_file_splice_read, 1042 .splice_read = generic_file_splice_read,
1043 .splice_write = generic_file_splice_write, 1043 .splice_write = generic_file_splice_write,
@@ -1049,7 +1049,7 @@ const struct file_operations gfs2_dir_fops_nolock = {
1049 .readdir = gfs2_readdir, 1049 .readdir = gfs2_readdir,
1050 .unlocked_ioctl = gfs2_ioctl, 1050 .unlocked_ioctl = gfs2_ioctl,
1051 .open = gfs2_open, 1051 .open = gfs2_open,
1052 .release = gfs2_close, 1052 .release = gfs2_release,
1053 .fsync = gfs2_fsync, 1053 .fsync = gfs2_fsync,
1054 .llseek = default_llseek, 1054 .llseek = default_llseek,
1055}; 1055};
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 1656df7aacd2..4bdcf3784187 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -94,7 +94,6 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
94 /* A shortened, inline version of gfs2_trans_begin() */ 94 /* A shortened, inline version of gfs2_trans_begin() */
95 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); 95 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
96 tr.tr_ip = (unsigned long)__builtin_return_address(0); 96 tr.tr_ip = (unsigned long)__builtin_return_address(0);
97 INIT_LIST_HEAD(&tr.tr_list_buf);
98 gfs2_log_reserve(sdp, tr.tr_reserved); 97 gfs2_log_reserve(sdp, tr.tr_reserved);
99 BUG_ON(current->journal_info); 98 BUG_ON(current->journal_info);
100 current->journal_info = &tr; 99 current->journal_info = &tr;
@@ -379,11 +378,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
379 if (error) 378 if (error)
380 return error; 379 return error;
381 380
382 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
383 brelse(dibh);
384 return -EIO;
385 }
386
387 error = gfs2_dinode_in(ip, dibh->b_data); 381 error = gfs2_dinode_in(ip, dibh->b_data);
388 brelse(dibh); 382 brelse(dibh);
389 clear_bit(GIF_INVALID, &ip->i_flags); 383 clear_bit(GIF_INVALID, &ip->i_flags);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 47d0bda5ac2b..aa9949e5de26 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -26,7 +26,7 @@
26#define DIO_METADATA 0x00000020 26#define DIO_METADATA 0x00000020
27 27
28struct gfs2_log_operations; 28struct gfs2_log_operations;
29struct gfs2_log_element; 29struct gfs2_bufdata;
30struct gfs2_holder; 30struct gfs2_holder;
31struct gfs2_glock; 31struct gfs2_glock;
32struct gfs2_quota_data; 32struct gfs2_quota_data;
@@ -52,7 +52,7 @@ struct gfs2_log_header_host {
52 */ 52 */
53 53
54struct gfs2_log_operations { 54struct gfs2_log_operations {
55 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le); 55 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
56 void (*lo_before_commit) (struct gfs2_sbd *sdp); 56 void (*lo_before_commit) (struct gfs2_sbd *sdp);
57 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); 57 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
58 void (*lo_before_scan) (struct gfs2_jdesc *jd, 58 void (*lo_before_scan) (struct gfs2_jdesc *jd,
@@ -64,11 +64,6 @@ struct gfs2_log_operations {
64 const char *lo_name; 64 const char *lo_name;
65}; 65};
66 66
67struct gfs2_log_element {
68 struct list_head le_list;
69 const struct gfs2_log_operations *le_ops;
70};
71
72#define GBF_FULL 1 67#define GBF_FULL 1
73 68
74struct gfs2_bitmap { 69struct gfs2_bitmap {
@@ -118,15 +113,10 @@ TAS_BUFFER_FNS(Zeronew, zeronew)
118struct gfs2_bufdata { 113struct gfs2_bufdata {
119 struct buffer_head *bd_bh; 114 struct buffer_head *bd_bh;
120 struct gfs2_glock *bd_gl; 115 struct gfs2_glock *bd_gl;
116 u64 bd_blkno;
121 117
122 union { 118 struct list_head bd_list;
123 struct list_head list_tr; 119 const struct gfs2_log_operations *bd_ops;
124 u64 blkno;
125 } u;
126#define bd_list_tr u.list_tr
127#define bd_blkno u.blkno
128
129 struct gfs2_log_element bd_le;
130 120
131 struct gfs2_ail *bd_ail; 121 struct gfs2_ail *bd_ail;
132 struct list_head bd_ail_st_list; 122 struct list_head bd_ail_st_list;
@@ -411,13 +401,10 @@ struct gfs2_trans {
411 401
412 int tr_touched; 402 int tr_touched;
413 403
414 unsigned int tr_num_buf;
415 unsigned int tr_num_buf_new; 404 unsigned int tr_num_buf_new;
416 unsigned int tr_num_databuf_new; 405 unsigned int tr_num_databuf_new;
417 unsigned int tr_num_buf_rm; 406 unsigned int tr_num_buf_rm;
418 unsigned int tr_num_databuf_rm; 407 unsigned int tr_num_databuf_rm;
419 struct list_head tr_list_buf;
420
421 unsigned int tr_num_revoke; 408 unsigned int tr_num_revoke;
422 unsigned int tr_num_revoke_rm; 409 unsigned int tr_num_revoke_rm;
423}; 410};
@@ -699,7 +686,6 @@ struct gfs2_sbd {
699 686
700 struct list_head sd_log_le_buf; 687 struct list_head sd_log_le_buf;
701 struct list_head sd_log_le_revoke; 688 struct list_head sd_log_le_revoke;
702 struct list_head sd_log_le_rg;
703 struct list_head sd_log_le_databuf; 689 struct list_head sd_log_le_databuf;
704 struct list_head sd_log_le_ordered; 690 struct list_head sd_log_le_ordered;
705 691
@@ -716,7 +702,9 @@ struct gfs2_sbd {
716 702
717 struct rw_semaphore sd_log_flush_lock; 703 struct rw_semaphore sd_log_flush_lock;
718 atomic_t sd_log_in_flight; 704 atomic_t sd_log_in_flight;
705 struct bio *sd_log_bio;
719 wait_queue_head_t sd_log_flush_wait; 706 wait_queue_head_t sd_log_flush_wait;
707 int sd_log_error;
720 708
721 unsigned int sd_log_flush_head; 709 unsigned int sd_log_flush_head;
722 u64 sd_log_flush_wrapped; 710 u64 sd_log_flush_wrapped;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 276e7b52b658..c53c7477f6da 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -17,10 +17,7 @@
17 17
18extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); 18extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
19extern int gfs2_internal_read(struct gfs2_inode *ip, 19extern int gfs2_internal_read(struct gfs2_inode *ip,
20 struct file_ra_state *ra_state,
21 char *buf, loff_t *pos, unsigned size); 20 char *buf, loff_t *pos, unsigned size);
22extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
23 unsigned int from, unsigned int to);
24extern void gfs2_set_aops(struct inode *inode); 21extern void gfs2_set_aops(struct inode *inode);
25 22
26static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) 23static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 4752eadc7f6e..f4beeb9c81c1 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -32,8 +32,6 @@
32#include "dir.h" 32#include "dir.h"
33#include "trace_gfs2.h" 33#include "trace_gfs2.h"
34 34
35#define PULL 1
36
37/** 35/**
38 * gfs2_struct2blk - compute stuff 36 * gfs2_struct2blk - compute stuff
39 * @sdp: the filesystem 37 * @sdp: the filesystem
@@ -359,18 +357,6 @@ retry:
359 return 0; 357 return 0;
360} 358}
361 359
362u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
363{
364 struct gfs2_journal_extent *je;
365
366 list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) {
367 if (lbn >= je->lblock && lbn < je->lblock + je->blocks)
368 return je->dblock + lbn - je->lblock;
369 }
370
371 return -1;
372}
373
374/** 360/**
375 * log_distance - Compute distance between two journal blocks 361 * log_distance - Compute distance between two journal blocks
376 * @sdp: The GFS2 superblock 362 * @sdp: The GFS2 superblock
@@ -466,17 +452,6 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
466 return tail; 452 return tail;
467} 453}
468 454
469void gfs2_log_incr_head(struct gfs2_sbd *sdp)
470{
471 BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
472 (sdp->sd_log_flush_head != sdp->sd_log_head));
473
474 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
475 sdp->sd_log_flush_head = 0;
476 sdp->sd_log_flush_wrapped = 1;
477 }
478}
479
480static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) 455static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
481{ 456{
482 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); 457 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
@@ -511,8 +486,8 @@ static int bd_cmp(void *priv, struct list_head *a, struct list_head *b)
511{ 486{
512 struct gfs2_bufdata *bda, *bdb; 487 struct gfs2_bufdata *bda, *bdb;
513 488
514 bda = list_entry(a, struct gfs2_bufdata, bd_le.le_list); 489 bda = list_entry(a, struct gfs2_bufdata, bd_list);
515 bdb = list_entry(b, struct gfs2_bufdata, bd_le.le_list); 490 bdb = list_entry(b, struct gfs2_bufdata, bd_list);
516 491
517 if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) 492 if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
518 return -1; 493 return -1;
@@ -530,8 +505,8 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
530 gfs2_log_lock(sdp); 505 gfs2_log_lock(sdp);
531 list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp); 506 list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp);
532 while (!list_empty(&sdp->sd_log_le_ordered)) { 507 while (!list_empty(&sdp->sd_log_le_ordered)) {
533 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list); 508 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list);
534 list_move(&bd->bd_le.le_list, &written); 509 list_move(&bd->bd_list, &written);
535 bh = bd->bd_bh; 510 bh = bd->bd_bh;
536 if (!buffer_dirty(bh)) 511 if (!buffer_dirty(bh))
537 continue; 512 continue;
@@ -558,7 +533,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
558 533
559 gfs2_log_lock(sdp); 534 gfs2_log_lock(sdp);
560 while (!list_empty(&sdp->sd_log_le_ordered)) { 535 while (!list_empty(&sdp->sd_log_le_ordered)) {
561 bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list); 536 bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list);
562 bh = bd->bd_bh; 537 bh = bd->bd_bh;
563 if (buffer_locked(bh)) { 538 if (buffer_locked(bh)) {
564 get_bh(bh); 539 get_bh(bh);
@@ -568,7 +543,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
568 gfs2_log_lock(sdp); 543 gfs2_log_lock(sdp);
569 continue; 544 continue;
570 } 545 }
571 list_del_init(&bd->bd_le.le_list); 546 list_del_init(&bd->bd_list);
572 } 547 }
573 gfs2_log_unlock(sdp); 548 gfs2_log_unlock(sdp);
574} 549}
@@ -580,25 +555,19 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
580 * Returns: the initialized log buffer descriptor 555 * Returns: the initialized log buffer descriptor
581 */ 556 */
582 557
583static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) 558static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
584{ 559{
585 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
586 struct buffer_head *bh;
587 struct gfs2_log_header *lh; 560 struct gfs2_log_header *lh;
588 unsigned int tail; 561 unsigned int tail;
589 u32 hash; 562 u32 hash;
590 563 int rw = WRITE_FLUSH_FUA | REQ_META;
591 bh = sb_getblk(sdp->sd_vfs, blkno); 564 struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
592 lock_buffer(bh); 565 lh = page_address(page);
593 memset(bh->b_data, 0, bh->b_size); 566 clear_page(lh);
594 set_buffer_uptodate(bh);
595 clear_buffer_dirty(bh);
596 567
597 gfs2_ail1_empty(sdp); 568 gfs2_ail1_empty(sdp);
598 tail = current_tail(sdp); 569 tail = current_tail(sdp);
599 570
600 lh = (struct gfs2_log_header *)bh->b_data;
601 memset(lh, 0, sizeof(struct gfs2_log_header));
602 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 571 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
603 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); 572 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
604 lh->lh_header.__pad0 = cpu_to_be64(0); 573 lh->lh_header.__pad0 = cpu_to_be64(0);
@@ -608,31 +577,22 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
608 lh->lh_flags = cpu_to_be32(flags); 577 lh->lh_flags = cpu_to_be32(flags);
609 lh->lh_tail = cpu_to_be32(tail); 578 lh->lh_tail = cpu_to_be32(tail);
610 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head); 579 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
611 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); 580 hash = gfs2_disk_hash(page_address(page), sizeof(struct gfs2_log_header));
612 lh->lh_hash = cpu_to_be32(hash); 581 lh->lh_hash = cpu_to_be32(hash);
613 582
614 bh->b_end_io = end_buffer_write_sync;
615 get_bh(bh);
616 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { 583 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
617 gfs2_ordered_wait(sdp); 584 gfs2_ordered_wait(sdp);
618 log_flush_wait(sdp); 585 log_flush_wait(sdp);
619 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); 586 rw = WRITE_SYNC | REQ_META | REQ_PRIO;
620 } else {
621 submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
622 } 587 }
623 wait_on_buffer(bh);
624 588
625 if (!buffer_uptodate(bh)) 589 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
626 gfs2_io_error_bh(sdp, bh); 590 gfs2_log_write_page(sdp, page);
627 brelse(bh); 591 gfs2_log_flush_bio(sdp, rw);
592 log_flush_wait(sdp);
628 593
629 if (sdp->sd_log_tail != tail) 594 if (sdp->sd_log_tail != tail)
630 log_pull_tail(sdp, tail); 595 log_pull_tail(sdp, tail);
631 else
632 gfs2_assert_withdraw(sdp, !pull);
633
634 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
635 gfs2_log_incr_head(sdp);
636} 596}
637 597
638/** 598/**
@@ -678,15 +638,14 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
678 638
679 gfs2_ordered_write(sdp); 639 gfs2_ordered_write(sdp);
680 lops_before_commit(sdp); 640 lops_before_commit(sdp);
641 gfs2_log_flush_bio(sdp, WRITE);
681 642
682 if (sdp->sd_log_head != sdp->sd_log_flush_head) { 643 if (sdp->sd_log_head != sdp->sd_log_flush_head) {
683 log_write_header(sdp, 0, 0); 644 log_write_header(sdp, 0);
684 } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ 645 } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
685 gfs2_log_lock(sdp);
686 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ 646 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
687 trace_gfs2_log_blocks(sdp, -1); 647 trace_gfs2_log_blocks(sdp, -1);
688 gfs2_log_unlock(sdp); 648 log_write_header(sdp, 0);
689 log_write_header(sdp, 0, PULL);
690 } 649 }
691 lops_after_commit(sdp, ai); 650 lops_after_commit(sdp, ai);
692 651
@@ -735,21 +694,6 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
735 gfs2_log_unlock(sdp); 694 gfs2_log_unlock(sdp);
736} 695}
737 696
738static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
739{
740 struct list_head *head = &tr->tr_list_buf;
741 struct gfs2_bufdata *bd;
742
743 gfs2_log_lock(sdp);
744 while (!list_empty(head)) {
745 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
746 list_del_init(&bd->bd_list_tr);
747 tr->tr_num_buf--;
748 }
749 gfs2_log_unlock(sdp);
750 gfs2_assert_warn(sdp, !tr->tr_num_buf);
751}
752
753/** 697/**
754 * gfs2_log_commit - Commit a transaction to the log 698 * gfs2_log_commit - Commit a transaction to the log
755 * @sdp: the filesystem 699 * @sdp: the filesystem
@@ -768,8 +712,6 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
768void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) 712void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
769{ 713{
770 log_refund(sdp, tr); 714 log_refund(sdp, tr);
771 buf_lo_incore_commit(sdp, tr);
772
773 up_read(&sdp->sd_log_flush_lock); 715 up_read(&sdp->sd_log_flush_lock);
774 716
775 if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) || 717 if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
@@ -798,8 +740,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
798 sdp->sd_log_flush_head = sdp->sd_log_head; 740 sdp->sd_log_flush_head = sdp->sd_log_head;
799 sdp->sd_log_flush_wrapped = 0; 741 sdp->sd_log_flush_wrapped = 0;
800 742
801 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 743 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT);
802 (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
803 744
804 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); 745 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
805 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); 746 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
@@ -854,11 +795,9 @@ int gfs2_logd(void *data)
854 struct gfs2_sbd *sdp = data; 795 struct gfs2_sbd *sdp = data;
855 unsigned long t = 1; 796 unsigned long t = 1;
856 DEFINE_WAIT(wait); 797 DEFINE_WAIT(wait);
857 unsigned preflush;
858 798
859 while (!kthread_should_stop()) { 799 while (!kthread_should_stop()) {
860 800
861 preflush = atomic_read(&sdp->sd_log_pinned);
862 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { 801 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
863 gfs2_ail1_empty(sdp); 802 gfs2_ail1_empty(sdp);
864 gfs2_log_flush(sdp, NULL); 803 gfs2_log_flush(sdp, NULL);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index ff07454b582c..3fd5215ea25f 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -52,8 +52,6 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
52 unsigned int ssize); 52 unsigned int ssize);
53 53
54extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); 54extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
55extern void gfs2_log_incr_head(struct gfs2_sbd *sdp);
56extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn);
57extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 55extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
58extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 56extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
59extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); 57extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 6b1efb594d90..852c1be1dd3b 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -127,146 +127,277 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
127 atomic_dec(&sdp->sd_log_pinned); 127 atomic_dec(&sdp->sd_log_pinned);
128} 128}
129 129
130 130static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
131static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh)
132{ 131{
133 return (struct gfs2_log_descriptor *)bh->b_data; 132 BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
133 (sdp->sd_log_flush_head != sdp->sd_log_head));
134
135 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
136 sdp->sd_log_flush_head = 0;
137 sdp->sd_log_flush_wrapped = 1;
138 }
134} 139}
135 140
136static inline __be64 *bh_log_ptr(struct buffer_head *bh) 141static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
137{ 142{
138 struct gfs2_log_descriptor *ld = bh_log_desc(bh); 143 unsigned int lbn = sdp->sd_log_flush_head;
139 return (__force __be64 *)(ld + 1); 144 struct gfs2_journal_extent *je;
145 u64 block;
146
147 list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) {
148 if (lbn >= je->lblock && lbn < je->lblock + je->blocks) {
149 block = je->dblock + lbn - je->lblock;
150 gfs2_log_incr_head(sdp);
151 return block;
152 }
153 }
154
155 return -1;
140} 156}
141 157
142static inline __be64 *bh_ptr_end(struct buffer_head *bh) 158/**
159 * gfs2_end_log_write_bh - end log write of pagecache data with buffers
160 * @sdp: The superblock
161 * @bvec: The bio_vec
162 * @error: The i/o status
163 *
164 * This finds the relavent buffers and unlocks then and sets the
165 * error flag according to the status of the i/o request. This is
166 * used when the log is writing data which has an in-place version
167 * that is pinned in the pagecache.
168 */
169
170static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
171 int error)
143{ 172{
144 return (__force __be64 *)(bh->b_data + bh->b_size); 173 struct buffer_head *bh, *next;
174 struct page *page = bvec->bv_page;
175 unsigned size;
176
177 bh = page_buffers(page);
178 size = bvec->bv_len;
179 while (bh_offset(bh) < bvec->bv_offset)
180 bh = bh->b_this_page;
181 do {
182 if (error)
183 set_buffer_write_io_error(bh);
184 unlock_buffer(bh);
185 next = bh->b_this_page;
186 size -= bh->b_size;
187 brelse(bh);
188 bh = next;
189 } while(bh && size);
145} 190}
146 191
147/** 192/**
148 * gfs2_log_write_endio - End of I/O for a log buffer 193 * gfs2_end_log_write - end of i/o to the log
149 * @bh: The buffer head 194 * @bio: The bio
150 * @uptodate: I/O Status 195 * @error: Status of i/o request
196 *
197 * Each bio_vec contains either data from the pagecache or data
198 * relating to the log itself. Here we iterate over the bio_vec
199 * array, processing both kinds of data.
151 * 200 *
152 */ 201 */
153 202
154static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate) 203static void gfs2_end_log_write(struct bio *bio, int error)
155{ 204{
156 struct gfs2_sbd *sdp = bh->b_private; 205 struct gfs2_sbd *sdp = bio->bi_private;
157 bh->b_private = NULL; 206 struct bio_vec *bvec;
207 struct page *page;
208 int i;
158 209
159 end_buffer_write_sync(bh, uptodate); 210 if (error) {
211 sdp->sd_log_error = error;
212 fs_err(sdp, "Error %d writing to log\n", error);
213 }
214
215 bio_for_each_segment(bvec, bio, i) {
216 page = bvec->bv_page;
217 if (page_has_buffers(page))
218 gfs2_end_log_write_bh(sdp, bvec, error);
219 else
220 mempool_free(page, gfs2_page_pool);
221 }
222
223 bio_put(bio);
160 if (atomic_dec_and_test(&sdp->sd_log_in_flight)) 224 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
161 wake_up(&sdp->sd_log_flush_wait); 225 wake_up(&sdp->sd_log_flush_wait);
162} 226}
163 227
164/** 228/**
165 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data 229 * gfs2_log_flush_bio - Submit any pending log bio
166 * @sdp: The GFS2 superblock 230 * @sdp: The superblock
231 * @rw: The rw flags
167 * 232 *
168 * tReturns: the buffer_head 233 * Submit any pending part-built or full bio to the block device. If
234 * there is no pending bio, then this is a no-op.
169 */ 235 */
170 236
171static struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) 237void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw)
172{ 238{
173 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); 239 if (sdp->sd_log_bio) {
174 struct buffer_head *bh; 240 atomic_inc(&sdp->sd_log_in_flight);
241 submit_bio(rw, sdp->sd_log_bio);
242 sdp->sd_log_bio = NULL;
243 }
244}
175 245
176 bh = sb_getblk(sdp->sd_vfs, blkno); 246/**
177 lock_buffer(bh); 247 * gfs2_log_alloc_bio - Allocate a new bio for log writing
178 memset(bh->b_data, 0, bh->b_size); 248 * @sdp: The superblock
179 set_buffer_uptodate(bh); 249 * @blkno: The next device block number we want to write to
180 clear_buffer_dirty(bh); 250 *
181 gfs2_log_incr_head(sdp); 251 * This should never be called when there is a cached bio in the
182 atomic_inc(&sdp->sd_log_in_flight); 252 * super block. When it returns, there will be a cached bio in the
183 bh->b_private = sdp; 253 * super block which will have as many bio_vecs as the device is
184 bh->b_end_io = gfs2_log_write_endio; 254 * happy to handle.
255 *
256 * Returns: Newly allocated bio
257 */
185 258
186 return bh; 259static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
260{
261 struct super_block *sb = sdp->sd_vfs;
262 unsigned nrvecs = bio_get_nr_vecs(sb->s_bdev);
263 struct bio *bio;
264
265 BUG_ON(sdp->sd_log_bio);
266
267 while (1) {
268 bio = bio_alloc(GFP_NOIO, nrvecs);
269 if (likely(bio))
270 break;
271 nrvecs = max(nrvecs/2, 1U);
272 }
273
274 bio->bi_sector = blkno * (sb->s_blocksize >> 9);
275 bio->bi_bdev = sb->s_bdev;
276 bio->bi_end_io = gfs2_end_log_write;
277 bio->bi_private = sdp;
278
279 sdp->sd_log_bio = bio;
280
281 return bio;
187} 282}
188 283
189/** 284/**
190 * gfs2_fake_write_endio - 285 * gfs2_log_get_bio - Get cached log bio, or allocate a new one
191 * @bh: The buffer head 286 * @sdp: The superblock
192 * @uptodate: The I/O Status 287 * @blkno: The device block number we want to write to
288 *
289 * If there is a cached bio, then if the next block number is sequential
290 * with the previous one, return it, otherwise flush the bio to the
291 * device. If there is not a cached bio, or we just flushed it, then
292 * allocate a new one.
193 * 293 *
294 * Returns: The bio to use for log writes
194 */ 295 */
195 296
196static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate) 297static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
197{ 298{
198 struct buffer_head *real_bh = bh->b_private; 299 struct bio *bio = sdp->sd_log_bio;
199 struct gfs2_bufdata *bd = real_bh->b_private; 300 u64 nblk;
200 struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd; 301
302 if (bio) {
303 nblk = bio->bi_sector + bio_sectors(bio);
304 nblk >>= sdp->sd_fsb2bb_shift;
305 if (blkno == nblk)
306 return bio;
307 gfs2_log_flush_bio(sdp, WRITE);
308 }
201 309
202 end_buffer_write_sync(bh, uptodate); 310 return gfs2_log_alloc_bio(sdp, blkno);
203 mempool_free(bh, gfs2_bh_pool);
204 unlock_buffer(real_bh);
205 brelse(real_bh);
206 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
207 wake_up(&sdp->sd_log_flush_wait);
208} 311}
209 312
313
210/** 314/**
211 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log 315 * gfs2_log_write - write to log
212 * @sdp: the filesystem 316 * @sdp: the filesystem
213 * @data: the data the buffer_head should point to 317 * @page: the page to write
318 * @size: the size of the data to write
319 * @offset: the offset within the page
214 * 320 *
215 * Returns: the log buffer descriptor 321 * Try and add the page segment to the current bio. If that fails,
322 * submit the current bio to the device and create a new one, and
323 * then add the page segment to that.
216 */ 324 */
217 325
218static struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, 326static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
219 struct buffer_head *real) 327 unsigned size, unsigned offset)
220{ 328{
221 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); 329 u64 blkno = gfs2_log_bmap(sdp);
222 struct buffer_head *bh; 330 struct bio *bio;
331 int ret;
332
333 bio = gfs2_log_get_bio(sdp, blkno);
334 ret = bio_add_page(bio, page, size, offset);
335 if (ret == 0) {
336 gfs2_log_flush_bio(sdp, WRITE);
337 bio = gfs2_log_alloc_bio(sdp, blkno);
338 ret = bio_add_page(bio, page, size, offset);
339 WARN_ON(ret == 0);
340 }
341}
342
343/**
344 * gfs2_log_write_bh - write a buffer's content to the log
345 * @sdp: The super block
346 * @bh: The buffer pointing to the in-place location
347 *
348 * This writes the content of the buffer to the next available location
349 * in the log. The buffer will be unlocked once the i/o to the log has
350 * completed.
351 */
223 352
224 bh = mempool_alloc(gfs2_bh_pool, GFP_NOFS); 353static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
225 atomic_set(&bh->b_count, 1); 354{
226 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock); 355 gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh));
227 set_bh_page(bh, real->b_page, bh_offset(real)); 356}
228 bh->b_blocknr = blkno;
229 bh->b_size = sdp->sd_sb.sb_bsize;
230 bh->b_bdev = sdp->sd_vfs->s_bdev;
231 bh->b_private = real;
232 bh->b_end_io = gfs2_fake_write_endio;
233 357
234 gfs2_log_incr_head(sdp); 358/**
235 atomic_inc(&sdp->sd_log_in_flight); 359 * gfs2_log_write_page - write one block stored in a page, into the log
360 * @sdp: The superblock
361 * @page: The struct page
362 *
363 * This writes the first block-sized part of the page into the log. Note
364 * that the page must have been allocated from the gfs2_page_pool mempool
365 * and that after this has been called, ownership has been transferred and
366 * the page may be freed at any time.
367 */
236 368
237 return bh; 369void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
370{
371 struct super_block *sb = sdp->sd_vfs;
372 gfs2_log_write(sdp, page, sb->s_blocksize, 0);
238} 373}
239 374
240static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) 375static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
376 u32 ld_length, u32 ld_data1)
241{ 377{
242 struct buffer_head *bh = gfs2_log_get_buf(sdp); 378 struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
243 struct gfs2_log_descriptor *ld = bh_log_desc(bh); 379 struct gfs2_log_descriptor *ld = page_address(page);
380 clear_page(ld);
244 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 381 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
245 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); 382 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
246 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); 383 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
247 ld->ld_type = cpu_to_be32(ld_type); 384 ld->ld_type = cpu_to_be32(ld_type);
248 ld->ld_length = 0; 385 ld->ld_length = cpu_to_be32(ld_length);
249 ld->ld_data1 = 0; 386 ld->ld_data1 = cpu_to_be32(ld_data1);
250 ld->ld_data2 = 0; 387 ld->ld_data2 = 0;
251 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); 388 return page;
252 return bh;
253} 389}
254 390
255static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 391static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
256{ 392{
257 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
258 struct gfs2_meta_header *mh; 393 struct gfs2_meta_header *mh;
259 struct gfs2_trans *tr; 394 struct gfs2_trans *tr;
260 395
261 lock_buffer(bd->bd_bh); 396 lock_buffer(bd->bd_bh);
262 gfs2_log_lock(sdp); 397 gfs2_log_lock(sdp);
263 if (!list_empty(&bd->bd_list_tr))
264 goto out;
265 tr = current->journal_info; 398 tr = current->journal_info;
266 tr->tr_touched = 1; 399 tr->tr_touched = 1;
267 tr->tr_num_buf++; 400 if (!list_empty(&bd->bd_list))
268 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
269 if (!list_empty(&le->le_list))
270 goto out; 401 goto out;
271 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 402 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
272 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 403 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
@@ -276,62 +407,86 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
276 mh->__pad0 = cpu_to_be64(0); 407 mh->__pad0 = cpu_to_be64(0);
277 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); 408 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
278 sdp->sd_log_num_buf++; 409 sdp->sd_log_num_buf++;
279 list_add(&le->le_list, &sdp->sd_log_le_buf); 410 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
280 tr->tr_num_buf_new++; 411 tr->tr_num_buf_new++;
281out: 412out:
282 gfs2_log_unlock(sdp); 413 gfs2_log_unlock(sdp);
283 unlock_buffer(bd->bd_bh); 414 unlock_buffer(bd->bd_bh);
284} 415}
285 416
286static void buf_lo_before_commit(struct gfs2_sbd *sdp) 417static void gfs2_check_magic(struct buffer_head *bh)
418{
419 void *kaddr;
420 __be32 *ptr;
421
422 clear_buffer_escaped(bh);
423 kaddr = kmap_atomic(bh->b_page);
424 ptr = kaddr + bh_offset(bh);
425 if (*ptr == cpu_to_be32(GFS2_MAGIC))
426 set_buffer_escaped(bh);
427 kunmap_atomic(kaddr);
428}
429
430static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
431 unsigned int total, struct list_head *blist,
432 bool is_databuf)
287{ 433{
288 struct buffer_head *bh;
289 struct gfs2_log_descriptor *ld; 434 struct gfs2_log_descriptor *ld;
290 struct gfs2_bufdata *bd1 = NULL, *bd2; 435 struct gfs2_bufdata *bd1 = NULL, *bd2;
291 unsigned int total; 436 struct page *page;
292 unsigned int limit;
293 unsigned int num; 437 unsigned int num;
294 unsigned n; 438 unsigned n;
295 __be64 *ptr; 439 __be64 *ptr;
296 440
297 limit = buf_limit(sdp);
298 /* for 4k blocks, limit = 503 */
299
300 gfs2_log_lock(sdp); 441 gfs2_log_lock(sdp);
301 total = sdp->sd_log_num_buf; 442 bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
302 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
303 while(total) { 443 while(total) {
304 num = total; 444 num = total;
305 if (total > limit) 445 if (total > limit)
306 num = limit; 446 num = limit;
307 gfs2_log_unlock(sdp); 447 gfs2_log_unlock(sdp);
308 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA); 448 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA, num + 1, num);
449 ld = page_address(page);
309 gfs2_log_lock(sdp); 450 gfs2_log_lock(sdp);
310 ld = bh_log_desc(bh); 451 ptr = (__be64 *)(ld + 1);
311 ptr = bh_log_ptr(bh);
312 ld->ld_length = cpu_to_be32(num + 1);
313 ld->ld_data1 = cpu_to_be32(num);
314 452
315 n = 0; 453 n = 0;
316 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, 454 list_for_each_entry_continue(bd1, blist, bd_list) {
317 bd_le.le_list) {
318 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); 455 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
456 if (is_databuf) {
457 gfs2_check_magic(bd1->bd_bh);
458 *ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
459 }
319 if (++n >= num) 460 if (++n >= num)
320 break; 461 break;
321 } 462 }
322 463
323 gfs2_log_unlock(sdp); 464 gfs2_log_unlock(sdp);
324 submit_bh(WRITE_SYNC, bh); 465 gfs2_log_write_page(sdp, page);
325 gfs2_log_lock(sdp); 466 gfs2_log_lock(sdp);
326 467
327 n = 0; 468 n = 0;
328 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, 469 list_for_each_entry_continue(bd2, blist, bd_list) {
329 bd_le.le_list) {
330 get_bh(bd2->bd_bh); 470 get_bh(bd2->bd_bh);
331 gfs2_log_unlock(sdp); 471 gfs2_log_unlock(sdp);
332 lock_buffer(bd2->bd_bh); 472 lock_buffer(bd2->bd_bh);
333 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 473
334 submit_bh(WRITE_SYNC, bh); 474 if (buffer_escaped(bd2->bd_bh)) {
475 void *kaddr;
476 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
477 ptr = page_address(page);
478 kaddr = kmap_atomic(bd2->bd_bh->b_page);
479 memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
480 bd2->bd_bh->b_size);
481 kunmap_atomic(kaddr);
482 *(__be32 *)ptr = 0;
483 clear_buffer_escaped(bd2->bd_bh);
484 unlock_buffer(bd2->bd_bh);
485 brelse(bd2->bd_bh);
486 gfs2_log_write_page(sdp, page);
487 } else {
488 gfs2_log_write_bh(sdp, bd2->bd_bh);
489 }
335 gfs2_log_lock(sdp); 490 gfs2_log_lock(sdp);
336 if (++n >= num) 491 if (++n >= num)
337 break; 492 break;
@@ -343,14 +498,22 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
343 gfs2_log_unlock(sdp); 498 gfs2_log_unlock(sdp);
344} 499}
345 500
501static void buf_lo_before_commit(struct gfs2_sbd *sdp)
502{
503 unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
504
505 gfs2_before_commit(sdp, limit, sdp->sd_log_num_buf,
506 &sdp->sd_log_le_buf, 0);
507}
508
346static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 509static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
347{ 510{
348 struct list_head *head = &sdp->sd_log_le_buf; 511 struct list_head *head = &sdp->sd_log_le_buf;
349 struct gfs2_bufdata *bd; 512 struct gfs2_bufdata *bd;
350 513
351 while (!list_empty(head)) { 514 while (!list_empty(head)) {
352 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 515 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
353 list_del_init(&bd->bd_le.le_list); 516 list_del_init(&bd->bd_list);
354 sdp->sd_log_num_buf--; 517 sdp->sd_log_num_buf--;
355 518
356 gfs2_unpin(sdp, bd->bd_bh, ai); 519 gfs2_unpin(sdp, bd->bd_bh, ai);
@@ -437,9 +600,8 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
437 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); 600 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
438} 601}
439 602
440static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 603static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
441{ 604{
442 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
443 struct gfs2_glock *gl = bd->bd_gl; 605 struct gfs2_glock *gl = bd->bd_gl;
444 struct gfs2_trans *tr; 606 struct gfs2_trans *tr;
445 607
@@ -449,48 +611,48 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
449 sdp->sd_log_num_revoke++; 611 sdp->sd_log_num_revoke++;
450 atomic_inc(&gl->gl_revokes); 612 atomic_inc(&gl->gl_revokes);
451 set_bit(GLF_LFLUSH, &gl->gl_flags); 613 set_bit(GLF_LFLUSH, &gl->gl_flags);
452 list_add(&le->le_list, &sdp->sd_log_le_revoke); 614 list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
453} 615}
454 616
455static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 617static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
456{ 618{
457 struct gfs2_log_descriptor *ld; 619 struct gfs2_log_descriptor *ld;
458 struct gfs2_meta_header *mh; 620 struct gfs2_meta_header *mh;
459 struct buffer_head *bh;
460 unsigned int offset; 621 unsigned int offset;
461 struct list_head *head = &sdp->sd_log_le_revoke; 622 struct list_head *head = &sdp->sd_log_le_revoke;
462 struct gfs2_bufdata *bd; 623 struct gfs2_bufdata *bd;
624 struct page *page;
625 unsigned int length;
463 626
464 if (!sdp->sd_log_num_revoke) 627 if (!sdp->sd_log_num_revoke)
465 return; 628 return;
466 629
467 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE); 630 length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
468 ld = bh_log_desc(bh); 631 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
469 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, 632 ld = page_address(page);
470 sizeof(u64)));
471 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
472 offset = sizeof(struct gfs2_log_descriptor); 633 offset = sizeof(struct gfs2_log_descriptor);
473 634
474 list_for_each_entry(bd, head, bd_le.le_list) { 635 list_for_each_entry(bd, head, bd_list) {
475 sdp->sd_log_num_revoke--; 636 sdp->sd_log_num_revoke--;
476 637
477 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 638 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
478 submit_bh(WRITE_SYNC, bh);
479 639
480 bh = gfs2_log_get_buf(sdp); 640 gfs2_log_write_page(sdp, page);
481 mh = (struct gfs2_meta_header *)bh->b_data; 641 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
642 mh = page_address(page);
643 clear_page(mh);
482 mh->mh_magic = cpu_to_be32(GFS2_MAGIC); 644 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
483 mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB); 645 mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
484 mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB); 646 mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
485 offset = sizeof(struct gfs2_meta_header); 647 offset = sizeof(struct gfs2_meta_header);
486 } 648 }
487 649
488 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); 650 *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
489 offset += sizeof(u64); 651 offset += sizeof(u64);
490 } 652 }
491 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 653 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
492 654
493 submit_bh(WRITE_SYNC, bh); 655 gfs2_log_write_page(sdp, page);
494} 656}
495 657
496static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 658static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
@@ -500,8 +662,8 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
500 struct gfs2_glock *gl; 662 struct gfs2_glock *gl;
501 663
502 while (!list_empty(head)) { 664 while (!list_empty(head)) {
503 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 665 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
504 list_del_init(&bd->bd_le.le_list); 666 list_del_init(&bd->bd_list);
505 gl = bd->bd_gl; 667 gl = bd->bd_gl;
506 atomic_dec(&gl->gl_revokes); 668 atomic_dec(&gl->gl_revokes);
507 clear_bit(GLF_LFLUSH, &gl->gl_flags); 669 clear_bit(GLF_LFLUSH, &gl->gl_flags);
@@ -604,108 +766,33 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
604 * blocks, which isn't an enormous overhead but twice as much as 766 * blocks, which isn't an enormous overhead but twice as much as
605 * for normal metadata blocks. 767 * for normal metadata blocks.
606 */ 768 */
607static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 769static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
608{ 770{
609 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
610 struct gfs2_trans *tr = current->journal_info; 771 struct gfs2_trans *tr = current->journal_info;
611 struct address_space *mapping = bd->bd_bh->b_page->mapping; 772 struct address_space *mapping = bd->bd_bh->b_page->mapping;
612 struct gfs2_inode *ip = GFS2_I(mapping->host); 773 struct gfs2_inode *ip = GFS2_I(mapping->host);
613 774
614 lock_buffer(bd->bd_bh); 775 lock_buffer(bd->bd_bh);
615 gfs2_log_lock(sdp); 776 gfs2_log_lock(sdp);
616 if (tr) { 777 if (tr)
617 if (!list_empty(&bd->bd_list_tr))
618 goto out;
619 tr->tr_touched = 1; 778 tr->tr_touched = 1;
620 if (gfs2_is_jdata(ip)) { 779 if (!list_empty(&bd->bd_list))
621 tr->tr_num_buf++;
622 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
623 }
624 }
625 if (!list_empty(&le->le_list))
626 goto out; 780 goto out;
627
628 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 781 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
629 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 782 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
630 if (gfs2_is_jdata(ip)) { 783 if (gfs2_is_jdata(ip)) {
631 gfs2_pin(sdp, bd->bd_bh); 784 gfs2_pin(sdp, bd->bd_bh);
632 tr->tr_num_databuf_new++; 785 tr->tr_num_databuf_new++;
633 sdp->sd_log_num_databuf++; 786 sdp->sd_log_num_databuf++;
634 list_add_tail(&le->le_list, &sdp->sd_log_le_databuf); 787 list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
635 } else { 788 } else {
636 list_add_tail(&le->le_list, &sdp->sd_log_le_ordered); 789 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
637 } 790 }
638out: 791out:
639 gfs2_log_unlock(sdp); 792 gfs2_log_unlock(sdp);
640 unlock_buffer(bd->bd_bh); 793 unlock_buffer(bd->bd_bh);
641} 794}
642 795
643static void gfs2_check_magic(struct buffer_head *bh)
644{
645 void *kaddr;
646 __be32 *ptr;
647
648 clear_buffer_escaped(bh);
649 kaddr = kmap_atomic(bh->b_page);
650 ptr = kaddr + bh_offset(bh);
651 if (*ptr == cpu_to_be32(GFS2_MAGIC))
652 set_buffer_escaped(bh);
653 kunmap_atomic(kaddr);
654}
655
656static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
657 struct list_head *list, struct list_head *done,
658 unsigned int n)
659{
660 struct buffer_head *bh1;
661 struct gfs2_log_descriptor *ld;
662 struct gfs2_bufdata *bd;
663 __be64 *ptr;
664
665 if (!bh)
666 return;
667
668 ld = bh_log_desc(bh);
669 ld->ld_length = cpu_to_be32(n + 1);
670 ld->ld_data1 = cpu_to_be32(n);
671
672 ptr = bh_log_ptr(bh);
673
674 get_bh(bh);
675 submit_bh(WRITE_SYNC, bh);
676 gfs2_log_lock(sdp);
677 while(!list_empty(list)) {
678 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
679 list_move_tail(&bd->bd_le.le_list, done);
680 get_bh(bd->bd_bh);
681 while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) {
682 gfs2_log_incr_head(sdp);
683 ptr += 2;
684 }
685 gfs2_log_unlock(sdp);
686 lock_buffer(bd->bd_bh);
687 if (buffer_escaped(bd->bd_bh)) {
688 void *kaddr;
689 bh1 = gfs2_log_get_buf(sdp);
690 kaddr = kmap_atomic(bd->bd_bh->b_page);
691 memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
692 bh1->b_size);
693 kunmap_atomic(kaddr);
694 *(__be32 *)bh1->b_data = 0;
695 clear_buffer_escaped(bd->bd_bh);
696 unlock_buffer(bd->bd_bh);
697 brelse(bd->bd_bh);
698 } else {
699 bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
700 }
701 submit_bh(WRITE_SYNC, bh1);
702 gfs2_log_lock(sdp);
703 ptr += 2;
704 }
705 gfs2_log_unlock(sdp);
706 brelse(bh);
707}
708
709/** 796/**
710 * databuf_lo_before_commit - Scan the data buffers, writing as we go 797 * databuf_lo_before_commit - Scan the data buffers, writing as we go
711 * 798 *
@@ -713,37 +800,10 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
713 800
714static void databuf_lo_before_commit(struct gfs2_sbd *sdp) 801static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
715{ 802{
716 struct gfs2_bufdata *bd = NULL; 803 unsigned int limit = buf_limit(sdp) / 2;
717 struct buffer_head *bh = NULL;
718 unsigned int n = 0;
719 __be64 *ptr = NULL, *end = NULL;
720 LIST_HEAD(processed);
721 LIST_HEAD(in_progress);
722 804
723 gfs2_log_lock(sdp); 805 gfs2_before_commit(sdp, limit, sdp->sd_log_num_databuf,
724 while (!list_empty(&sdp->sd_log_le_databuf)) { 806 &sdp->sd_log_le_databuf, 1);
725 if (ptr == end) {
726 gfs2_log_unlock(sdp);
727 gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
728 n = 0;
729 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA);
730 ptr = bh_log_ptr(bh);
731 end = bh_ptr_end(bh) - 1;
732 gfs2_log_lock(sdp);
733 continue;
734 }
735 bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list);
736 list_move_tail(&bd->bd_le.le_list, &in_progress);
737 gfs2_check_magic(bd->bd_bh);
738 *ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr);
739 *ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0);
740 n++;
741 }
742 gfs2_log_unlock(sdp);
743 gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
744 gfs2_log_lock(sdp);
745 list_splice(&processed, &sdp->sd_log_le_databuf);
746 gfs2_log_unlock(sdp);
747} 807}
748 808
749static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 809static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
@@ -822,8 +882,8 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
822 struct gfs2_bufdata *bd; 882 struct gfs2_bufdata *bd;
823 883
824 while (!list_empty(head)) { 884 while (!list_empty(head)) {
825 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 885 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
826 list_del_init(&bd->bd_le.le_list); 886 list_del_init(&bd->bd_list);
827 sdp->sd_log_num_databuf--; 887 sdp->sd_log_num_databuf--;
828 gfs2_unpin(sdp, bd->bd_bh, ai); 888 gfs2_unpin(sdp, bd->bd_bh, ai);
829 } 889 }
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 3c0b2737658a..954a330585f4 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -27,6 +27,8 @@ extern const struct gfs2_log_operations gfs2_rg_lops;
27extern const struct gfs2_log_operations gfs2_databuf_lops; 27extern const struct gfs2_log_operations gfs2_databuf_lops;
28 28
29extern const struct gfs2_log_operations *gfs2_log_ops[]; 29extern const struct gfs2_log_operations *gfs2_log_ops[];
30extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
31extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw);
30 32
31static inline unsigned int buf_limit(struct gfs2_sbd *sdp) 33static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
32{ 34{
@@ -44,17 +46,17 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
44 return limit; 46 return limit;
45} 47}
46 48
47static inline void lops_init_le(struct gfs2_log_element *le, 49static inline void lops_init_le(struct gfs2_bufdata *bd,
48 const struct gfs2_log_operations *lops) 50 const struct gfs2_log_operations *lops)
49{ 51{
50 INIT_LIST_HEAD(&le->le_list); 52 INIT_LIST_HEAD(&bd->bd_list);
51 le->le_ops = lops; 53 bd->bd_ops = lops;
52} 54}
53 55
54static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 56static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
55{ 57{
56 if (le->le_ops->lo_add) 58 if (bd->bd_ops->lo_add)
57 le->le_ops->lo_add(sdp, le); 59 bd->bd_ops->lo_add(sdp, bd);
58} 60}
59 61
60static inline void lops_before_commit(struct gfs2_sbd *sdp) 62static inline void lops_before_commit(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 754426b1e52c..6cdb0f2a1b09 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -70,16 +70,6 @@ static void gfs2_init_gl_aspace_once(void *foo)
70 address_space_init_once(mapping); 70 address_space_init_once(mapping);
71} 71}
72 72
73static void *gfs2_bh_alloc(gfp_t mask, void *data)
74{
75 return alloc_buffer_head(mask);
76}
77
78static void gfs2_bh_free(void *ptr, void *data)
79{
80 return free_buffer_head(ptr);
81}
82
83/** 73/**
84 * init_gfs2_fs - Register GFS2 as a filesystem 74 * init_gfs2_fs - Register GFS2 as a filesystem
85 * 75 *
@@ -143,6 +133,12 @@ static int __init init_gfs2_fs(void)
143 if (!gfs2_quotad_cachep) 133 if (!gfs2_quotad_cachep)
144 goto fail; 134 goto fail;
145 135
136 gfs2_rsrv_cachep = kmem_cache_create("gfs2_mblk",
137 sizeof(struct gfs2_blkreserv),
138 0, 0, NULL);
139 if (!gfs2_rsrv_cachep)
140 goto fail;
141
146 register_shrinker(&qd_shrinker); 142 register_shrinker(&qd_shrinker);
147 143
148 error = register_filesystem(&gfs2_fs_type); 144 error = register_filesystem(&gfs2_fs_type);
@@ -164,8 +160,8 @@ static int __init init_gfs2_fs(void)
164 if (!gfs2_control_wq) 160 if (!gfs2_control_wq)
165 goto fail_recovery; 161 goto fail_recovery;
166 162
167 gfs2_bh_pool = mempool_create(1024, gfs2_bh_alloc, gfs2_bh_free, NULL); 163 gfs2_page_pool = mempool_create_page_pool(64, 0);
168 if (!gfs2_bh_pool) 164 if (!gfs2_page_pool)
169 goto fail_control; 165 goto fail_control;
170 166
171 gfs2_register_debugfs(); 167 gfs2_register_debugfs();
@@ -186,6 +182,9 @@ fail:
186 unregister_shrinker(&qd_shrinker); 182 unregister_shrinker(&qd_shrinker);
187 gfs2_glock_exit(); 183 gfs2_glock_exit();
188 184
185 if (gfs2_rsrv_cachep)
186 kmem_cache_destroy(gfs2_rsrv_cachep);
187
189 if (gfs2_quotad_cachep) 188 if (gfs2_quotad_cachep)
190 kmem_cache_destroy(gfs2_quotad_cachep); 189 kmem_cache_destroy(gfs2_quotad_cachep);
191 190
@@ -225,7 +224,8 @@ static void __exit exit_gfs2_fs(void)
225 224
226 rcu_barrier(); 225 rcu_barrier();
227 226
228 mempool_destroy(gfs2_bh_pool); 227 mempool_destroy(gfs2_page_pool);
228 kmem_cache_destroy(gfs2_rsrv_cachep);
229 kmem_cache_destroy(gfs2_quotad_cachep); 229 kmem_cache_destroy(gfs2_quotad_cachep);
230 kmem_cache_destroy(gfs2_rgrpd_cachep); 230 kmem_cache_destroy(gfs2_rgrpd_cachep);
231 kmem_cache_destroy(gfs2_bufdata_cachep); 231 kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 181586e673f9..6c1e5d1c404a 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -293,11 +293,10 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
293 bd->bd_bh = bh; 293 bd->bd_bh = bh;
294 bd->bd_gl = gl; 294 bd->bd_gl = gl;
295 295
296 INIT_LIST_HEAD(&bd->bd_list_tr);
297 if (meta) 296 if (meta)
298 lops_init_le(&bd->bd_le, &gfs2_buf_lops); 297 lops_init_le(bd, &gfs2_buf_lops);
299 else 298 else
300 lops_init_le(&bd->bd_le, &gfs2_databuf_lops); 299 lops_init_le(bd, &gfs2_databuf_lops);
301 bh->b_private = bd; 300 bh->b_private = bd;
302 301
303 if (meta) 302 if (meta)
@@ -313,7 +312,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
313 if (test_clear_buffer_pinned(bh)) { 312 if (test_clear_buffer_pinned(bh)) {
314 trace_gfs2_pin(bd, 0); 313 trace_gfs2_pin(bd, 0);
315 atomic_dec(&sdp->sd_log_pinned); 314 atomic_dec(&sdp->sd_log_pinned);
316 list_del_init(&bd->bd_le.le_list); 315 list_del_init(&bd->bd_list);
317 if (meta) { 316 if (meta) {
318 gfs2_assert_warn(sdp, sdp->sd_log_num_buf); 317 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
319 sdp->sd_log_num_buf--; 318 sdp->sd_log_num_buf--;
@@ -375,33 +374,24 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
375 * @ip: The GFS2 inode 374 * @ip: The GFS2 inode
376 * @height: The level of this buf in the metadata (indir addr) tree (if any) 375 * @height: The level of this buf in the metadata (indir addr) tree (if any)
377 * @num: The block number (device relative) of the buffer 376 * @num: The block number (device relative) of the buffer
378 * @new: Non-zero if we may create a new buffer
379 * @bhp: the buffer is returned here 377 * @bhp: the buffer is returned here
380 * 378 *
381 * Returns: errno 379 * Returns: errno
382 */ 380 */
383 381
384int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, 382int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
385 int new, struct buffer_head **bhp) 383 struct buffer_head **bhp)
386{ 384{
387 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 385 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
388 struct gfs2_glock *gl = ip->i_gl; 386 struct gfs2_glock *gl = ip->i_gl;
389 struct buffer_head *bh; 387 struct buffer_head *bh;
390 int ret = 0; 388 int ret = 0;
389 u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
391 390
392 if (new) { 391 ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh);
393 BUG_ON(height == 0); 392 if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
394 bh = gfs2_meta_new(gl, num); 393 brelse(bh);
395 gfs2_trans_add_bh(ip->i_gl, bh, 1); 394 ret = -EIO;
396 gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
397 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
398 } else {
399 u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
400 ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh);
401 if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
402 brelse(bh);
403 ret = -EIO;
404 }
405 } 395 }
406 *bhp = bh; 396 *bhp = bh;
407 return ret; 397 return ret;
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 22c526593131..c30973b07a7c 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -65,12 +65,12 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
65void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); 65void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
66 66
67int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, 67int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
68 int new, struct buffer_head **bhp); 68 struct buffer_head **bhp);
69 69
70static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, 70static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
71 struct buffer_head **bhp) 71 struct buffer_head **bhp)
72{ 72{
73 return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp); 73 return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, bhp);
74} 74}
75 75
76struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen); 76struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 6f3a18f9e176..c5871ae40561 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -99,7 +99,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
99 atomic_set(&sdp->sd_log_pinned, 0); 99 atomic_set(&sdp->sd_log_pinned, 0);
100 INIT_LIST_HEAD(&sdp->sd_log_le_buf); 100 INIT_LIST_HEAD(&sdp->sd_log_le_buf);
101 INIT_LIST_HEAD(&sdp->sd_log_le_revoke); 101 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
102 INIT_LIST_HEAD(&sdp->sd_log_le_rg);
103 INIT_LIST_HEAD(&sdp->sd_log_le_databuf); 102 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
104 INIT_LIST_HEAD(&sdp->sd_log_le_ordered); 103 INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
105 104
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 6019da3dcaed..b97178e7d397 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -652,7 +652,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
652 } 652 }
653 653
654 memset(&q, 0, sizeof(struct gfs2_quota)); 654 memset(&q, 0, sizeof(struct gfs2_quota));
655 err = gfs2_internal_read(ip, NULL, (char *)&q, &loc, sizeof(q)); 655 err = gfs2_internal_read(ip, (char *)&q, &loc, sizeof(q));
656 if (err < 0) 656 if (err < 0)
657 return err; 657 return err;
658 658
@@ -744,7 +744,7 @@ get_a_page:
744 i_size_write(inode, size); 744 i_size_write(inode, size);
745 inode->i_mtime = inode->i_atime = CURRENT_TIME; 745 inode->i_mtime = inode->i_atime = CURRENT_TIME;
746 mark_inode_dirty(inode); 746 mark_inode_dirty(inode);
747 return err; 747 return 0;
748 748
749unlock_out: 749unlock_out:
750 unlock_page(page); 750 unlock_page(page);
@@ -852,7 +852,7 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
852 852
853 memset(&q, 0, sizeof(struct gfs2_quota)); 853 memset(&q, 0, sizeof(struct gfs2_quota));
854 pos = qd2offset(qd); 854 pos = qd2offset(qd);
855 error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q)); 855 error = gfs2_internal_read(ip, (char *)&q, &pos, sizeof(q));
856 if (error < 0) 856 if (error < 0)
857 return error; 857 return error;
858 858
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3df65c9ab73b..f74fb9bd1973 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -70,15 +70,15 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
70 70
71/** 71/**
72 * gfs2_setbit - Set a bit in the bitmaps 72 * gfs2_setbit - Set a bit in the bitmaps
73 * @buffer: the buffer that holds the bitmaps 73 * @rgd: the resource group descriptor
74 * @buflen: the length (in bytes) of the buffer 74 * @buf2: the clone buffer that holds the bitmaps
75 * @bi: the bitmap structure
75 * @block: the block to set 76 * @block: the block to set
76 * @new_state: the new state of the block 77 * @new_state: the new state of the block
77 * 78 *
78 */ 79 */
79 80
80static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, 81static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2,
81 unsigned char *buf2, unsigned int offset,
82 struct gfs2_bitmap *bi, u32 block, 82 struct gfs2_bitmap *bi, u32 block,
83 unsigned char new_state) 83 unsigned char new_state)
84{ 84{
@@ -86,8 +86,8 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
86 unsigned int buflen = bi->bi_len; 86 unsigned int buflen = bi->bi_len;
87 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 87 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
88 88
89 byte1 = buf1 + offset + (block / GFS2_NBBY); 89 byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY);
90 end = buf1 + offset + buflen; 90 end = bi->bi_bh->b_data + bi->bi_offset + buflen;
91 91
92 BUG_ON(byte1 >= end); 92 BUG_ON(byte1 >= end);
93 93
@@ -110,7 +110,7 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
110 *byte1 ^= (cur_state ^ new_state) << bit; 110 *byte1 ^= (cur_state ^ new_state) << bit;
111 111
112 if (buf2) { 112 if (buf2) {
113 byte2 = buf2 + offset + (block / GFS2_NBBY); 113 byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY);
114 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; 114 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
115 *byte2 ^= (cur_state ^ new_state) << bit; 115 *byte2 ^= (cur_state ^ new_state) << bit;
116 } 116 }
@@ -118,6 +118,7 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
118 118
119/** 119/**
120 * gfs2_testbit - test a bit in the bitmaps 120 * gfs2_testbit - test a bit in the bitmaps
121 * @rgd: the resource group descriptor
121 * @buffer: the buffer that holds the bitmaps 122 * @buffer: the buffer that holds the bitmaps
122 * @buflen: the length (in bytes) of the buffer 123 * @buflen: the length (in bytes) of the buffer
123 * @block: the block to read 124 * @block: the block to read
@@ -179,7 +180,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
179/** 180/**
180 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 181 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
181 * a block in a given allocation state. 182 * a block in a given allocation state.
182 * @buffer: the buffer that holds the bitmaps 183 * @buf: the buffer that holds the bitmaps
183 * @len: the length (in bytes) of the buffer 184 * @len: the length (in bytes) of the buffer
184 * @goal: start search at this block's bit-pair (within @buffer) 185 * @goal: start search at this block's bit-pair (within @buffer)
185 * @state: GFS2_BLKST_XXX the state of the block we're looking for. 186 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
@@ -231,6 +232,7 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
231 232
232/** 233/**
233 * gfs2_bitcount - count the number of bits in a certain state 234 * gfs2_bitcount - count the number of bits in a certain state
235 * @rgd: the resource group descriptor
234 * @buffer: the buffer that holds the bitmaps 236 * @buffer: the buffer that holds the bitmaps
235 * @buflen: the length (in bytes) of the buffer 237 * @buflen: the length (in bytes) of the buffer
236 * @state: the state of the block we're looking for 238 * @state: the state of the block we're looking for
@@ -264,7 +266,6 @@ static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
264 266
265/** 267/**
266 * gfs2_rgrp_verify - Verify that a resource group is consistent 268 * gfs2_rgrp_verify - Verify that a resource group is consistent
267 * @sdp: the filesystem
268 * @rgd: the rgrp 269 * @rgd: the rgrp
269 * 270 *
270 */ 271 */
@@ -322,7 +323,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
322/** 323/**
323 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number 324 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
324 * @sdp: The GFS2 superblock 325 * @sdp: The GFS2 superblock
325 * @n: The data block number 326 * @blk: The data block number
327 * @exact: True if this needs to be an exact match
326 * 328 *
327 * Returns: The resource group, or NULL if not found 329 * Returns: The resource group, or NULL if not found
328 */ 330 */
@@ -380,7 +382,7 @@ struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
380 382
381/** 383/**
382 * gfs2_rgrpd_get_next - get the next RG 384 * gfs2_rgrpd_get_next - get the next RG
383 * @rgd: A RG 385 * @rgd: the resource group descriptor
384 * 386 *
385 * Returns: The next rgrp 387 * Returns: The next rgrp
386 */ 388 */
@@ -529,6 +531,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
529 531
530/** 532/**
531 * gfs2_ri_total - Total up the file system space, according to the rindex. 533 * gfs2_ri_total - Total up the file system space, according to the rindex.
534 * @sdp: the filesystem
532 * 535 *
533 */ 536 */
534u64 gfs2_ri_total(struct gfs2_sbd *sdp) 537u64 gfs2_ri_total(struct gfs2_sbd *sdp)
@@ -537,16 +540,14 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
537 struct inode *inode = sdp->sd_rindex; 540 struct inode *inode = sdp->sd_rindex;
538 struct gfs2_inode *ip = GFS2_I(inode); 541 struct gfs2_inode *ip = GFS2_I(inode);
539 char buf[sizeof(struct gfs2_rindex)]; 542 char buf[sizeof(struct gfs2_rindex)];
540 struct file_ra_state ra_state;
541 int error, rgrps; 543 int error, rgrps;
542 544
543 file_ra_state_init(&ra_state, inode->i_mapping);
544 for (rgrps = 0;; rgrps++) { 545 for (rgrps = 0;; rgrps++) {
545 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 546 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
546 547
547 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) 548 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
548 break; 549 break;
549 error = gfs2_internal_read(ip, &ra_state, buf, &pos, 550 error = gfs2_internal_read(ip, buf, &pos,
550 sizeof(struct gfs2_rindex)); 551 sizeof(struct gfs2_rindex));
551 if (error != sizeof(struct gfs2_rindex)) 552 if (error != sizeof(struct gfs2_rindex))
552 break; 553 break;
@@ -582,13 +583,12 @@ static int rgd_insert(struct gfs2_rgrpd *rgd)
582 583
583/** 584/**
584 * read_rindex_entry - Pull in a new resource index entry from the disk 585 * read_rindex_entry - Pull in a new resource index entry from the disk
585 * @gl: The glock covering the rindex inode 586 * @ip: Pointer to the rindex inode
586 * 587 *
587 * Returns: 0 on success, > 0 on EOF, error code otherwise 588 * Returns: 0 on success, > 0 on EOF, error code otherwise
588 */ 589 */
589 590
590static int read_rindex_entry(struct gfs2_inode *ip, 591static int read_rindex_entry(struct gfs2_inode *ip)
591 struct file_ra_state *ra_state)
592{ 592{
593 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 593 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
594 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); 594 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
@@ -599,7 +599,7 @@ static int read_rindex_entry(struct gfs2_inode *ip,
599 if (pos >= i_size_read(&ip->i_inode)) 599 if (pos >= i_size_read(&ip->i_inode))
600 return 1; 600 return 1;
601 601
602 error = gfs2_internal_read(ip, ra_state, (char *)&buf, &pos, 602 error = gfs2_internal_read(ip, (char *)&buf, &pos,
603 sizeof(struct gfs2_rindex)); 603 sizeof(struct gfs2_rindex));
604 604
605 if (error != sizeof(struct gfs2_rindex)) 605 if (error != sizeof(struct gfs2_rindex))
@@ -655,13 +655,10 @@ fail:
655static int gfs2_ri_update(struct gfs2_inode *ip) 655static int gfs2_ri_update(struct gfs2_inode *ip)
656{ 656{
657 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 657 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
658 struct inode *inode = &ip->i_inode;
659 struct file_ra_state ra_state;
660 int error; 658 int error;
661 659
662 file_ra_state_init(&ra_state, inode->i_mapping);
663 do { 660 do {
664 error = read_rindex_entry(ip, &ra_state); 661 error = read_rindex_entry(ip);
665 } while (error == 0); 662 } while (error == 0);
666 663
667 if (error < 0) 664 if (error < 0)
@@ -741,7 +738,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
741 738
742/** 739/**
743 * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps 740 * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps
744 * @rgd: the struct gfs2_rgrpd describing the RG to read in 741 * @gh: The glock holder for the resource group
745 * 742 *
746 * Read in all of a Resource Group's header and bitmap blocks. 743 * Read in all of a Resource Group's header and bitmap blocks.
747 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. 744 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
@@ -801,7 +798,7 @@ fail:
801 798
802/** 799/**
803 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() 800 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
804 * @rgd: the struct gfs2_rgrpd describing the RG to read in 801 * @gh: The glock holder for the resource group
805 * 802 *
806 */ 803 */
807 804
@@ -1002,11 +999,13 @@ struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
1002 * Returns: the struct gfs2_qadata 999 * Returns: the struct gfs2_qadata
1003 */ 1000 */
1004 1001
1005static struct gfs2_blkreserv *gfs2_blkrsv_get(struct gfs2_inode *ip) 1002static int gfs2_blkrsv_get(struct gfs2_inode *ip)
1006{ 1003{
1007 BUG_ON(ip->i_res != NULL); 1004 BUG_ON(ip->i_res != NULL);
1008 ip->i_res = kzalloc(sizeof(struct gfs2_blkreserv), GFP_NOFS); 1005 ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
1009 return ip->i_res; 1006 if (!ip->i_res)
1007 return -ENOMEM;
1008 return 0;
1010} 1009}
1011 1010
1012/** 1011/**
@@ -1038,6 +1037,8 @@ static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
1038/** 1037/**
1039 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 1038 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1040 * @rgd: The rgrp 1039 * @rgd: The rgrp
1040 * @last_unlinked: block address of the last dinode we unlinked
1041 * @skip: block address we should explicitly not unlink
1041 * 1042 *
1042 * Returns: 0 if no error 1043 * Returns: 0 if no error
1043 * The inode, if one has been found, in inode. 1044 * The inode, if one has been found, in inode.
@@ -1102,7 +1103,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
1102/** 1103/**
1103 * get_local_rgrp - Choose and lock a rgrp for allocation 1104 * get_local_rgrp - Choose and lock a rgrp for allocation
1104 * @ip: the inode to reserve space for 1105 * @ip: the inode to reserve space for
1105 * @rgp: the chosen and locked rgrp 1106 * @last_unlinked: the last unlinked block
1106 * 1107 *
1107 * Try to acquire rgrp in way which avoids contending with others. 1108 * Try to acquire rgrp in way which avoids contending with others.
1108 * 1109 *
@@ -1164,13 +1165,14 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1164static void gfs2_blkrsv_put(struct gfs2_inode *ip) 1165static void gfs2_blkrsv_put(struct gfs2_inode *ip)
1165{ 1166{
1166 BUG_ON(ip->i_res == NULL); 1167 BUG_ON(ip->i_res == NULL);
1167 kfree(ip->i_res); 1168 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
1168 ip->i_res = NULL; 1169 ip->i_res = NULL;
1169} 1170}
1170 1171
1171/** 1172/**
1172 * gfs2_inplace_reserve - Reserve space in the filesystem 1173 * gfs2_inplace_reserve - Reserve space in the filesystem
1173 * @ip: the inode to reserve space for 1174 * @ip: the inode to reserve space for
1175 * @requested: the number of blocks to be reserved
1174 * 1176 *
1175 * Returns: errno 1177 * Returns: errno
1176 */ 1178 */
@@ -1179,14 +1181,15 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1179{ 1181{
1180 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1182 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1181 struct gfs2_blkreserv *rs; 1183 struct gfs2_blkreserv *rs;
1182 int error = 0; 1184 int error;
1183 u64 last_unlinked = NO_BLOCK; 1185 u64 last_unlinked = NO_BLOCK;
1184 int tries = 0; 1186 int tries = 0;
1185 1187
1186 rs = gfs2_blkrsv_get(ip); 1188 error = gfs2_blkrsv_get(ip);
1187 if (!rs) 1189 if (error)
1188 return -ENOMEM; 1190 return error;
1189 1191
1192 rs = ip->i_res;
1190 rs->rs_requested = requested; 1193 rs->rs_requested = requested;
1191 if (gfs2_assert_warn(sdp, requested)) { 1194 if (gfs2_assert_warn(sdp, requested)) {
1192 error = -EINVAL; 1195 error = -EINVAL;
@@ -1268,7 +1271,6 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1268 * @rgd: the resource group descriptor 1271 * @rgd: the resource group descriptor
1269 * @goal: the goal block within the RG (start here to search for avail block) 1272 * @goal: the goal block within the RG (start here to search for avail block)
1270 * @state: GFS2_BLKST_XXX the before-allocation state to find 1273 * @state: GFS2_BLKST_XXX the before-allocation state to find
1271 * @dinode: TRUE if the first block we allocate is for a dinode
1272 * @rbi: address of the pointer to the bitmap containing the block found 1274 * @rbi: address of the pointer to the bitmap containing the block found
1273 * 1275 *
1274 * Walk rgrp's bitmap to find bits that represent a block in @state. 1276 * Walk rgrp's bitmap to find bits that represent a block in @state.
@@ -1282,13 +1284,12 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1282 * Returns: the block number found relative to the bitmap rbi 1284 * Returns: the block number found relative to the bitmap rbi
1283 */ 1285 */
1284 1286
1285static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 1287static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state,
1286 unsigned char state,
1287 struct gfs2_bitmap **rbi) 1288 struct gfs2_bitmap **rbi)
1288{ 1289{
1289 struct gfs2_bitmap *bi = NULL; 1290 struct gfs2_bitmap *bi = NULL;
1290 const u32 length = rgd->rd_length; 1291 const u32 length = rgd->rd_length;
1291 u32 blk = BFITNOENT; 1292 u32 biblk = BFITNOENT;
1292 unsigned int buf, x; 1293 unsigned int buf, x;
1293 const u8 *buffer = NULL; 1294 const u8 *buffer = NULL;
1294 1295
@@ -1325,8 +1326,8 @@ do_search:
1325 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) 1326 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1326 buffer = bi->bi_clone + bi->bi_offset; 1327 buffer = bi->bi_clone + bi->bi_offset;
1327 1328
1328 blk = gfs2_bitfit(buffer, bi->bi_len, goal, state); 1329 biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
1329 if (blk != BFITNOENT) 1330 if (biblk != BFITNOENT)
1330 break; 1331 break;
1331 1332
1332 if ((goal == 0) && (state == GFS2_BLKST_FREE)) 1333 if ((goal == 0) && (state == GFS2_BLKST_FREE))
@@ -1339,10 +1340,10 @@ skip:
1339 goal = 0; 1340 goal = 0;
1340 } 1341 }
1341 1342
1342 if (blk != BFITNOENT) 1343 if (biblk != BFITNOENT)
1343 *rbi = bi; 1344 *rbi = bi;
1344 1345
1345 return blk; 1346 return biblk;
1346} 1347}
1347 1348
1348/** 1349/**
@@ -1367,8 +1368,8 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
1367 *n = 0; 1368 *n = 0;
1368 buffer = bi->bi_bh->b_data + bi->bi_offset; 1369 buffer = bi->bi_bh->b_data + bi->bi_offset;
1369 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1370 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1370 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1371 gfs2_setbit(rgd, bi->bi_clone, bi, blk,
1371 bi, blk, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1372 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1372 (*n)++; 1373 (*n)++;
1373 goal = blk; 1374 goal = blk;
1374 while (*n < elen) { 1375 while (*n < elen) {
@@ -1378,8 +1379,7 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
1378 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != 1379 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
1379 GFS2_BLKST_FREE) 1380 GFS2_BLKST_FREE)
1380 break; 1381 break;
1381 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1382 gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED);
1382 bi, goal, GFS2_BLKST_USED);
1383 (*n)++; 1383 (*n)++;
1384 } 1384 }
1385 blk = gfs2_bi2rgd_blk(bi, blk); 1385 blk = gfs2_bi2rgd_blk(bi, blk);
@@ -1436,8 +1436,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1436 bi->bi_len); 1436 bi->bi_len);
1437 } 1437 }
1438 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1438 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1439 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, 1439 gfs2_setbit(rgd, NULL, bi, buf_blk, new_state);
1440 bi, buf_blk, new_state);
1441 } 1440 }
1442 1441
1443 return rgd; 1442 return rgd;
@@ -1557,7 +1556,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
1557 ip->i_inode.i_gid); 1556 ip->i_inode.i_gid);
1558 1557
1559 rgd->rd_free_clone -= *nblocks; 1558 rgd->rd_free_clone -= *nblocks;
1560 trace_gfs2_block_alloc(ip, block, *nblocks, 1559 trace_gfs2_block_alloc(ip, rgd, block, *nblocks,
1561 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1560 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1562 *bn = block; 1561 *bn = block;
1563 return 0; 1562 return 0;
@@ -1584,7 +1583,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
1584 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); 1583 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1585 if (!rgd) 1584 if (!rgd)
1586 return; 1585 return;
1587 trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); 1586 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
1588 rgd->rd_free += blen; 1587 rgd->rd_free += blen;
1589 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 1588 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
1590 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1589 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1622,7 +1621,7 @@ void gfs2_unlink_di(struct inode *inode)
1622 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); 1621 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
1623 if (!rgd) 1622 if (!rgd)
1624 return; 1623 return;
1625 trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED); 1624 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
1626 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1625 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1627 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1626 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1628} 1627}
@@ -1652,7 +1651,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
1652void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 1651void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1653{ 1652{
1654 gfs2_free_uninit_di(rgd, ip->i_no_addr); 1653 gfs2_free_uninit_di(rgd, ip->i_no_addr);
1655 trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE); 1654 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
1656 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); 1655 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
1657 gfs2_meta_wipe(ip, ip->i_no_addr, 1); 1656 gfs2_meta_wipe(ip, ip->i_no_addr, 1);
1658} 1657}
@@ -1752,7 +1751,6 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
1752 * and initialize an array of glock holders for them 1751 * and initialize an array of glock holders for them
1753 * @rlist: the list of resource groups 1752 * @rlist: the list of resource groups
1754 * @state: the lock state to acquire the RG lock in 1753 * @state: the lock state to acquire the RG lock in
1755 * @flags: the modifier flags for the holder structures
1756 * 1754 *
1757 * FIXME: Don't use NOFAIL 1755 * FIXME: Don't use NOFAIL
1758 * 1756 *
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index dfa89cd75534..1b8b81588199 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -457,10 +457,10 @@ TRACE_EVENT(gfs2_bmap,
457/* Keep track of blocks as they are allocated/freed */ 457/* Keep track of blocks as they are allocated/freed */
458TRACE_EVENT(gfs2_block_alloc, 458TRACE_EVENT(gfs2_block_alloc,
459 459
460 TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len, 460 TP_PROTO(const struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
461 u8 block_state), 461 u64 block, unsigned len, u8 block_state),
462 462
463 TP_ARGS(ip, block, len, block_state), 463 TP_ARGS(ip, rgd, block, len, block_state),
464 464
465 TP_STRUCT__entry( 465 TP_STRUCT__entry(
466 __field( dev_t, dev ) 466 __field( dev_t, dev )
@@ -468,6 +468,8 @@ TRACE_EVENT(gfs2_block_alloc,
468 __field( u64, inum ) 468 __field( u64, inum )
469 __field( u32, len ) 469 __field( u32, len )
470 __field( u8, block_state ) 470 __field( u8, block_state )
471 __field( u64, rd_addr )
472 __field( u32, rd_free_clone )
471 ), 473 ),
472 474
473 TP_fast_assign( 475 TP_fast_assign(
@@ -476,14 +478,18 @@ TRACE_EVENT(gfs2_block_alloc,
476 __entry->inum = ip->i_no_addr; 478 __entry->inum = ip->i_no_addr;
477 __entry->len = len; 479 __entry->len = len;
478 __entry->block_state = block_state; 480 __entry->block_state = block_state;
481 __entry->rd_addr = rgd->rd_addr;
482 __entry->rd_free_clone = rgd->rd_free_clone;
479 ), 483 ),
480 484
481 TP_printk("%u,%u bmap %llu alloc %llu/%lu %s", 485 TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u",
482 MAJOR(__entry->dev), MINOR(__entry->dev), 486 MAJOR(__entry->dev), MINOR(__entry->dev),
483 (unsigned long long)__entry->inum, 487 (unsigned long long)__entry->inum,
484 (unsigned long long)__entry->start, 488 (unsigned long long)__entry->start,
485 (unsigned long)__entry->len, 489 (unsigned long)__entry->len,
486 block_state_name(__entry->block_state)) 490 block_state_name(__entry->block_state),
491 (unsigned long long)__entry->rd_addr,
492 __entry->rd_free_clone)
487); 493);
488 494
489#endif /* _TRACE_GFS2_H */ 495#endif /* _TRACE_GFS2_H */
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 86ac75d99d31..ad3e2fb763d7 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -50,8 +50,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
50 if (revokes) 50 if (revokes)
51 tr->tr_reserved += gfs2_struct2blk(sdp, revokes, 51 tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
52 sizeof(u64)); 52 sizeof(u64));
53 INIT_LIST_HEAD(&tr->tr_list_buf);
54
55 gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); 53 gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh);
56 54
57 error = gfs2_glock_nq(&tr->tr_t_gh); 55 error = gfs2_glock_nq(&tr->tr_t_gh);
@@ -93,10 +91,21 @@ static void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
93 up_read(&sdp->sd_log_flush_lock); 91 up_read(&sdp->sd_log_flush_lock);
94} 92}
95 93
94static void gfs2_print_trans(const struct gfs2_trans *tr)
95{
96 print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
97 printk(KERN_WARNING "GFS2: blocks=%u revokes=%u reserved=%u touched=%d\n",
98 tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched);
99 printk(KERN_WARNING "GFS2: Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
100 tr->tr_num_buf_new, tr->tr_num_buf_rm,
101 tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
102 tr->tr_num_revoke, tr->tr_num_revoke_rm);
103}
104
96void gfs2_trans_end(struct gfs2_sbd *sdp) 105void gfs2_trans_end(struct gfs2_sbd *sdp)
97{ 106{
98 struct gfs2_trans *tr = current->journal_info; 107 struct gfs2_trans *tr = current->journal_info;
99 108 s64 nbuf;
100 BUG_ON(!tr); 109 BUG_ON(!tr);
101 current->journal_info = NULL; 110 current->journal_info = NULL;
102 111
@@ -110,16 +119,13 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
110 return; 119 return;
111 } 120 }
112 121
113 if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks)) { 122 nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new;
114 fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u ", 123 nbuf -= tr->tr_num_buf_rm;
115 tr->tr_num_buf, tr->tr_blocks); 124 nbuf -= tr->tr_num_databuf_rm;
116 print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip); 125
117 } 126 if (gfs2_assert_withdraw(sdp, (nbuf <= tr->tr_blocks) &&
118 if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) { 127 (tr->tr_num_revoke <= tr->tr_revokes)))
119 fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u ", 128 gfs2_print_trans(tr);
120 tr->tr_num_revoke, tr->tr_revokes);
121 print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
122 }
123 129
124 gfs2_log_commit(sdp, tr); 130 gfs2_log_commit(sdp, tr);
125 if (tr->tr_t_gh.gh_gl) { 131 if (tr->tr_t_gh.gh_gl) {
@@ -152,16 +158,16 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
152 gfs2_attach_bufdata(gl, bh, meta); 158 gfs2_attach_bufdata(gl, bh, meta);
153 bd = bh->b_private; 159 bd = bh->b_private;
154 } 160 }
155 lops_add(sdp, &bd->bd_le); 161 lops_add(sdp, bd);
156} 162}
157 163
158void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) 164void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
159{ 165{
160 BUG_ON(!list_empty(&bd->bd_le.le_list)); 166 BUG_ON(!list_empty(&bd->bd_list));
161 BUG_ON(!list_empty(&bd->bd_ail_st_list)); 167 BUG_ON(!list_empty(&bd->bd_ail_st_list));
162 BUG_ON(!list_empty(&bd->bd_ail_gl_list)); 168 BUG_ON(!list_empty(&bd->bd_ail_gl_list));
163 lops_init_le(&bd->bd_le, &gfs2_revoke_lops); 169 lops_init_le(bd, &gfs2_revoke_lops);
164 lops_add(sdp, &bd->bd_le); 170 lops_add(sdp, bd);
165} 171}
166 172
167void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) 173void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
@@ -171,9 +177,9 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
171 unsigned int n = len; 177 unsigned int n = len;
172 178
173 gfs2_log_lock(sdp); 179 gfs2_log_lock(sdp);
174 list_for_each_entry_safe(bd, tmp, &sdp->sd_log_le_revoke, bd_le.le_list) { 180 list_for_each_entry_safe(bd, tmp, &sdp->sd_log_le_revoke, bd_list) {
175 if ((bd->bd_blkno >= blkno) && (bd->bd_blkno < (blkno + len))) { 181 if ((bd->bd_blkno >= blkno) && (bd->bd_blkno < (blkno + len))) {
176 list_del_init(&bd->bd_le.le_list); 182 list_del_init(&bd->bd_list);
177 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); 183 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
178 sdp->sd_log_num_revoke--; 184 sdp->sd_log_num_revoke--;
179 kmem_cache_free(gfs2_bufdata_cachep, bd); 185 kmem_cache_free(gfs2_bufdata_cachep, bd);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 9e7765e8e7b0..f00d7c5744f6 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -25,7 +25,8 @@ struct kmem_cache *gfs2_inode_cachep __read_mostly;
25struct kmem_cache *gfs2_bufdata_cachep __read_mostly; 25struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
26struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; 26struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
27struct kmem_cache *gfs2_quotad_cachep __read_mostly; 27struct kmem_cache *gfs2_quotad_cachep __read_mostly;
28mempool_t *gfs2_bh_pool __read_mostly; 28struct kmem_cache *gfs2_rsrv_cachep __read_mostly;
29mempool_t *gfs2_page_pool __read_mostly;
29 30
30void gfs2_assert_i(struct gfs2_sbd *sdp) 31void gfs2_assert_i(struct gfs2_sbd *sdp)
31{ 32{
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index a4ce76c67dbb..3586b0dd6aa7 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -152,7 +152,8 @@ extern struct kmem_cache *gfs2_inode_cachep;
152extern struct kmem_cache *gfs2_bufdata_cachep; 152extern struct kmem_cache *gfs2_bufdata_cachep;
153extern struct kmem_cache *gfs2_rgrpd_cachep; 153extern struct kmem_cache *gfs2_rgrpd_cachep;
154extern struct kmem_cache *gfs2_quotad_cachep; 154extern struct kmem_cache *gfs2_quotad_cachep;
155extern mempool_t *gfs2_bh_pool; 155extern struct kmem_cache *gfs2_rsrv_cachep;
156extern mempool_t *gfs2_page_pool;
156 157
157static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, 158static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
158 unsigned int *p) 159 unsigned int *p)