aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2/log.c
diff options
context:
space:
mode:
authorBenjamin Marzinski <bmarzins@redhat.com>2010-05-04 15:29:16 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2010-05-05 04:39:18 -0400
commit5e687eac1bd31baed110d239ef827d3ba666f311 (patch)
tree71f596b87f60a1d15372fdcfabf3305faa4a57d7 /fs/gfs2/log.c
parent1a0eae8848cde6e0734360f6456496c995ee1e23 (diff)
GFS2: Various gfs2_logd improvements
This patch contains various tweaks to how log flushes and active item writeback work. gfs2_logd is now managed by a waitqueue, and gfs2_log_reseve now waits for gfs2_logd to do the log flushing. Multiple functions were rewritten to remove the need to call gfs2_log_lock(). Instead of using one test to see if gfs2_logd had work to do, there are now seperate tests to check if there are two many buffers in the incore log or if there are two many items on the active items list. This patch is a port of a patch Steve Whitehouse wrote about a year ago, with some minor changes. Since gfs2_ail1_start always submits all the active items, it no longer needs to keep track of the first ai submitted, so this has been removed. In gfs2_log_reserve(), the order of the calls to prepare_to_wait_exclusive() and wake_up() when firing off the logd thread has been switched. If it called wake_up first there was a small window for a race, where logd could run and return before gfs2_log_reserve was ready to get woken up. If gfs2_logd ran, but did not free up enough blocks, gfs2_log_reserve() would be left waiting for gfs2_logd to eventualy run because it timed out. Finally, gt_logd_secs, which controls how long to wait before gfs2_logd times out, and flushes the log, can now be set on mount with ar_commit. Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2/log.c')
-rw-r--r--fs/gfs2/log.c157
1 files changed, 87 insertions, 70 deletions
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index e5bf4b59d46e..d5959df6deb2 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -168,12 +168,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
168 return list_empty(&ai->ai_ail1_list); 168 return list_empty(&ai->ai_ail1_list);
169} 169}
170 170
171static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) 171static void gfs2_ail1_start(struct gfs2_sbd *sdp)
172{ 172{
173 struct list_head *head; 173 struct list_head *head;
174 u64 sync_gen; 174 u64 sync_gen;
175 struct list_head *first; 175 struct gfs2_ail *ai;
176 struct gfs2_ail *first_ai, *ai, *tmp;
177 int done = 0; 176 int done = 0;
178 177
179 gfs2_log_lock(sdp); 178 gfs2_log_lock(sdp);
@@ -184,21 +183,9 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
184 } 183 }
185 sync_gen = sdp->sd_ail_sync_gen++; 184 sync_gen = sdp->sd_ail_sync_gen++;
186 185
187 first = head->prev;
188 first_ai = list_entry(first, struct gfs2_ail, ai_list);
189 first_ai->ai_sync_gen = sync_gen;
190 gfs2_ail1_start_one(sdp, first_ai); /* This may drop log lock */
191
192 if (flags & DIO_ALL)
193 first = NULL;
194
195 while(!done) { 186 while(!done) {
196 if (first && (head->prev != first ||
197 gfs2_ail1_empty_one(sdp, first_ai, 0)))
198 break;
199
200 done = 1; 187 done = 1;
201 list_for_each_entry_safe_reverse(ai, tmp, head, ai_list) { 188 list_for_each_entry_reverse(ai, head, ai_list) {
202 if (ai->ai_sync_gen >= sync_gen) 189 if (ai->ai_sync_gen >= sync_gen)
203 continue; 190 continue;
204 ai->ai_sync_gen = sync_gen; 191 ai->ai_sync_gen = sync_gen;
@@ -290,58 +277,57 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
290 * flush time, so we ensure that we have just enough free blocks at all 277 * flush time, so we ensure that we have just enough free blocks at all
291 * times to avoid running out during a log flush. 278 * times to avoid running out during a log flush.
292 * 279 *
280 * We no longer flush the log here, instead we wake up logd to do that
281 * for us. To avoid the thundering herd and to ensure that we deal fairly
282 * with queued waiters, we use an exclusive wait. This means that when we
283 * get woken with enough journal space to get our reservation, we need to
284 * wake the next waiter on the list.
285 *
293 * Returns: errno 286 * Returns: errno
294 */ 287 */
295 288
296int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) 289int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
297{ 290{
298 unsigned int try = 0;
299 unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize); 291 unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
292 unsigned wanted = blks + reserved_blks;
293 DEFINE_WAIT(wait);
294 int did_wait = 0;
295 unsigned int free_blocks;
300 296
301 if (gfs2_assert_warn(sdp, blks) || 297 if (gfs2_assert_warn(sdp, blks) ||
302 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) 298 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
303 return -EINVAL; 299 return -EINVAL;
304 300retry:
305 mutex_lock(&sdp->sd_log_reserve_mutex); 301 free_blocks = atomic_read(&sdp->sd_log_blks_free);
306 gfs2_log_lock(sdp); 302 if (unlikely(free_blocks <= wanted)) {
307 while(atomic_read(&sdp->sd_log_blks_free) <= (blks + reserved_blks)) { 303 do {
308 gfs2_log_unlock(sdp); 304 prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait,
309 gfs2_ail1_empty(sdp, 0); 305 TASK_UNINTERRUPTIBLE);
310 gfs2_log_flush(sdp, NULL); 306 wake_up(&sdp->sd_logd_waitq);
311 307 did_wait = 1;
312 if (try++) 308 if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
313 gfs2_ail1_start(sdp, 0); 309 io_schedule();
314 gfs2_log_lock(sdp); 310 free_blocks = atomic_read(&sdp->sd_log_blks_free);
311 } while(free_blocks <= wanted);
312 finish_wait(&sdp->sd_log_waitq, &wait);
315 } 313 }
316 atomic_sub(blks, &sdp->sd_log_blks_free); 314 if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
315 free_blocks - blks) != free_blocks)
316 goto retry;
317 trace_gfs2_log_blocks(sdp, -blks); 317 trace_gfs2_log_blocks(sdp, -blks);
318 gfs2_log_unlock(sdp); 318
319 mutex_unlock(&sdp->sd_log_reserve_mutex); 319 /*
320 * If we waited, then so might others, wake them up _after_ we get
321 * our share of the log.
322 */
323 if (unlikely(did_wait))
324 wake_up(&sdp->sd_log_waitq);
320 325
321 down_read(&sdp->sd_log_flush_lock); 326 down_read(&sdp->sd_log_flush_lock);
322 327
323 return 0; 328 return 0;
324} 329}
325 330
326/**
327 * gfs2_log_release - Release a given number of log blocks
328 * @sdp: The GFS2 superblock
329 * @blks: The number of blocks
330 *
331 */
332
333void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
334{
335
336 gfs2_log_lock(sdp);
337 atomic_add(blks, &sdp->sd_log_blks_free);
338 trace_gfs2_log_blocks(sdp, blks);
339 gfs2_assert_withdraw(sdp,
340 atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
341 gfs2_log_unlock(sdp);
342 up_read(&sdp->sd_log_flush_lock);
343}
344
345static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) 331static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
346{ 332{
347 struct gfs2_journal_extent *je; 333 struct gfs2_journal_extent *je;
@@ -559,11 +545,10 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
559 545
560 ail2_empty(sdp, new_tail); 546 ail2_empty(sdp, new_tail);
561 547
562 gfs2_log_lock(sdp);
563 atomic_add(dist, &sdp->sd_log_blks_free); 548 atomic_add(dist, &sdp->sd_log_blks_free);
564 trace_gfs2_log_blocks(sdp, dist); 549 trace_gfs2_log_blocks(sdp, dist);
565 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); 550 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
566 gfs2_log_unlock(sdp); 551 sdp->sd_jdesc->jd_blocks);
567 552
568 sdp->sd_log_tail = new_tail; 553 sdp->sd_log_tail = new_tail;
569} 554}
@@ -822,6 +807,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
822 * @sdp: the filesystem 807 * @sdp: the filesystem
823 * @tr: the transaction 808 * @tr: the transaction
824 * 809 *
810 * We wake up gfs2_logd if the number of pinned blocks exceed thresh1
811 * or the total number of used blocks (pinned blocks plus AIL blocks)
812 * is greater than thresh2.
813 *
814 * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of
815 * journal size.
816 *
825 * Returns: errno 817 * Returns: errno
826 */ 818 */
827 819
@@ -832,10 +824,10 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
832 824
833 up_read(&sdp->sd_log_flush_lock); 825 up_read(&sdp->sd_log_flush_lock);
834 826
835 gfs2_log_lock(sdp); 827 if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
836 if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) 828 ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
837 wake_up_process(sdp->sd_logd_process); 829 atomic_read(&sdp->sd_log_thresh2)))
838 gfs2_log_unlock(sdp); 830 wake_up(&sdp->sd_logd_waitq);
839} 831}
840 832
841/** 833/**
@@ -882,13 +874,23 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
882{ 874{
883 gfs2_log_flush(sdp, NULL); 875 gfs2_log_flush(sdp, NULL);
884 for (;;) { 876 for (;;) {
885 gfs2_ail1_start(sdp, DIO_ALL); 877 gfs2_ail1_start(sdp);
886 if (gfs2_ail1_empty(sdp, DIO_ALL)) 878 if (gfs2_ail1_empty(sdp, DIO_ALL))
887 break; 879 break;
888 msleep(10); 880 msleep(10);
889 } 881 }
890} 882}
891 883
884static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
885{
886 return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1));
887}
888
889static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
890{
891 unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
892 return used_blocks >= atomic_read(&sdp->sd_log_thresh2);
893}
892 894
893/** 895/**
894 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks 896 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
@@ -901,28 +903,43 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
901int gfs2_logd(void *data) 903int gfs2_logd(void *data)
902{ 904{
903 struct gfs2_sbd *sdp = data; 905 struct gfs2_sbd *sdp = data;
904 unsigned long t; 906 unsigned long t = 1;
905 int need_flush; 907 DEFINE_WAIT(wait);
908 unsigned preflush;
906 909
907 while (!kthread_should_stop()) { 910 while (!kthread_should_stop()) {
908 /* Advance the log tail */
909 911
910 t = sdp->sd_log_flush_time + 912 preflush = atomic_read(&sdp->sd_log_pinned);
911 gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; 913 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
914 gfs2_ail1_empty(sdp, DIO_ALL);
915 gfs2_log_flush(sdp, NULL);
916 gfs2_ail1_empty(sdp, DIO_ALL);
917 }
912 918
913 gfs2_ail1_empty(sdp, DIO_ALL); 919 if (gfs2_ail_flush_reqd(sdp)) {
914 gfs2_log_lock(sdp); 920 gfs2_ail1_start(sdp);
915 need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks); 921 io_schedule();
916 gfs2_log_unlock(sdp); 922 gfs2_ail1_empty(sdp, 0);
917 if (need_flush || time_after_eq(jiffies, t)) {
918 gfs2_log_flush(sdp, NULL); 923 gfs2_log_flush(sdp, NULL);
919 sdp->sd_log_flush_time = jiffies; 924 gfs2_ail1_empty(sdp, DIO_ALL);
920 } 925 }
921 926
927 wake_up(&sdp->sd_log_waitq);
922 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 928 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
923 if (freezing(current)) 929 if (freezing(current))
924 refrigerator(); 930 refrigerator();
925 schedule_timeout_interruptible(t); 931
932 do {
933 prepare_to_wait(&sdp->sd_logd_waitq, &wait,
934 TASK_UNINTERRUPTIBLE);
935 if (!gfs2_ail_flush_reqd(sdp) &&
936 !gfs2_jrnl_flush_reqd(sdp) &&
937 !kthread_should_stop())
938 t = schedule_timeout(t);
939 } while(t && !gfs2_ail_flush_reqd(sdp) &&
940 !gfs2_jrnl_flush_reqd(sdp) &&
941 !kthread_should_stop());
942 finish_wait(&sdp->sd_logd_waitq, &wait);
926 } 943 }
927 944
928 return 0; 945 return 0;