diff options
Diffstat (limited to 'fs/gfs2/log.c')
-rw-r--r-- | fs/gfs2/log.c | 158 |
1 files changed, 88 insertions, 70 deletions
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index e5bf4b59d46e..b593f0e28f25 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -168,12 +168,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl | |||
168 | return list_empty(&ai->ai_ail1_list); | 168 | return list_empty(&ai->ai_ail1_list); |
169 | } | 169 | } |
170 | 170 | ||
171 | static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) | 171 | static void gfs2_ail1_start(struct gfs2_sbd *sdp) |
172 | { | 172 | { |
173 | struct list_head *head; | 173 | struct list_head *head; |
174 | u64 sync_gen; | 174 | u64 sync_gen; |
175 | struct list_head *first; | 175 | struct gfs2_ail *ai; |
176 | struct gfs2_ail *first_ai, *ai, *tmp; | ||
177 | int done = 0; | 176 | int done = 0; |
178 | 177 | ||
179 | gfs2_log_lock(sdp); | 178 | gfs2_log_lock(sdp); |
@@ -184,21 +183,9 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) | |||
184 | } | 183 | } |
185 | sync_gen = sdp->sd_ail_sync_gen++; | 184 | sync_gen = sdp->sd_ail_sync_gen++; |
186 | 185 | ||
187 | first = head->prev; | ||
188 | first_ai = list_entry(first, struct gfs2_ail, ai_list); | ||
189 | first_ai->ai_sync_gen = sync_gen; | ||
190 | gfs2_ail1_start_one(sdp, first_ai); /* This may drop log lock */ | ||
191 | |||
192 | if (flags & DIO_ALL) | ||
193 | first = NULL; | ||
194 | |||
195 | while(!done) { | 186 | while(!done) { |
196 | if (first && (head->prev != first || | ||
197 | gfs2_ail1_empty_one(sdp, first_ai, 0))) | ||
198 | break; | ||
199 | |||
200 | done = 1; | 187 | done = 1; |
201 | list_for_each_entry_safe_reverse(ai, tmp, head, ai_list) { | 188 | list_for_each_entry_reverse(ai, head, ai_list) { |
202 | if (ai->ai_sync_gen >= sync_gen) | 189 | if (ai->ai_sync_gen >= sync_gen) |
203 | continue; | 190 | continue; |
204 | ai->ai_sync_gen = sync_gen; | 191 | ai->ai_sync_gen = sync_gen; |
@@ -290,58 +277,57 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) | |||
290 | * flush time, so we ensure that we have just enough free blocks at all | 277 | * flush time, so we ensure that we have just enough free blocks at all |
291 | * times to avoid running out during a log flush. | 278 | * times to avoid running out during a log flush. |
292 | * | 279 | * |
280 | * We no longer flush the log here, instead we wake up logd to do that | ||
281 | * for us. To avoid the thundering herd and to ensure that we deal fairly | ||
282 | * with queued waiters, we use an exclusive wait. This means that when we | ||
283 | * get woken with enough journal space to get our reservation, we need to | ||
284 | * wake the next waiter on the list. | ||
285 | * | ||
293 | * Returns: errno | 286 | * Returns: errno |
294 | */ | 287 | */ |
295 | 288 | ||
296 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) | 289 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) |
297 | { | 290 | { |
298 | unsigned int try = 0; | ||
299 | unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize); | 291 | unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize); |
292 | unsigned wanted = blks + reserved_blks; | ||
293 | DEFINE_WAIT(wait); | ||
294 | int did_wait = 0; | ||
295 | unsigned int free_blocks; | ||
300 | 296 | ||
301 | if (gfs2_assert_warn(sdp, blks) || | 297 | if (gfs2_assert_warn(sdp, blks) || |
302 | gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) | 298 | gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) |
303 | return -EINVAL; | 299 | return -EINVAL; |
304 | 300 | retry: | |
305 | mutex_lock(&sdp->sd_log_reserve_mutex); | 301 | free_blocks = atomic_read(&sdp->sd_log_blks_free); |
306 | gfs2_log_lock(sdp); | 302 | if (unlikely(free_blocks <= wanted)) { |
307 | while(atomic_read(&sdp->sd_log_blks_free) <= (blks + reserved_blks)) { | 303 | do { |
308 | gfs2_log_unlock(sdp); | 304 | prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait, |
309 | gfs2_ail1_empty(sdp, 0); | 305 | TASK_UNINTERRUPTIBLE); |
310 | gfs2_log_flush(sdp, NULL); | 306 | wake_up(&sdp->sd_logd_waitq); |
311 | 307 | did_wait = 1; | |
312 | if (try++) | 308 | if (atomic_read(&sdp->sd_log_blks_free) <= wanted) |
313 | gfs2_ail1_start(sdp, 0); | 309 | io_schedule(); |
314 | gfs2_log_lock(sdp); | 310 | free_blocks = atomic_read(&sdp->sd_log_blks_free); |
311 | } while(free_blocks <= wanted); | ||
312 | finish_wait(&sdp->sd_log_waitq, &wait); | ||
315 | } | 313 | } |
316 | atomic_sub(blks, &sdp->sd_log_blks_free); | 314 | if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks, |
315 | free_blocks - blks) != free_blocks) | ||
316 | goto retry; | ||
317 | trace_gfs2_log_blocks(sdp, -blks); | 317 | trace_gfs2_log_blocks(sdp, -blks); |
318 | gfs2_log_unlock(sdp); | 318 | |
319 | mutex_unlock(&sdp->sd_log_reserve_mutex); | 319 | /* |
320 | * If we waited, then so might others, wake them up _after_ we get | ||
321 | * our share of the log. | ||
322 | */ | ||
323 | if (unlikely(did_wait)) | ||
324 | wake_up(&sdp->sd_log_waitq); | ||
320 | 325 | ||
321 | down_read(&sdp->sd_log_flush_lock); | 326 | down_read(&sdp->sd_log_flush_lock); |
322 | 327 | ||
323 | return 0; | 328 | return 0; |
324 | } | 329 | } |
325 | 330 | ||
326 | /** | ||
327 | * gfs2_log_release - Release a given number of log blocks | ||
328 | * @sdp: The GFS2 superblock | ||
329 | * @blks: The number of blocks | ||
330 | * | ||
331 | */ | ||
332 | |||
333 | void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) | ||
334 | { | ||
335 | |||
336 | gfs2_log_lock(sdp); | ||
337 | atomic_add(blks, &sdp->sd_log_blks_free); | ||
338 | trace_gfs2_log_blocks(sdp, blks); | ||
339 | gfs2_assert_withdraw(sdp, | ||
340 | atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); | ||
341 | gfs2_log_unlock(sdp); | ||
342 | up_read(&sdp->sd_log_flush_lock); | ||
343 | } | ||
344 | |||
345 | static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) | 331 | static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) |
346 | { | 332 | { |
347 | struct gfs2_journal_extent *je; | 333 | struct gfs2_journal_extent *je; |
@@ -559,11 +545,10 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) | |||
559 | 545 | ||
560 | ail2_empty(sdp, new_tail); | 546 | ail2_empty(sdp, new_tail); |
561 | 547 | ||
562 | gfs2_log_lock(sdp); | ||
563 | atomic_add(dist, &sdp->sd_log_blks_free); | 548 | atomic_add(dist, &sdp->sd_log_blks_free); |
564 | trace_gfs2_log_blocks(sdp, dist); | 549 | trace_gfs2_log_blocks(sdp, dist); |
565 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); | 550 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= |
566 | gfs2_log_unlock(sdp); | 551 | sdp->sd_jdesc->jd_blocks); |
567 | 552 | ||
568 | sdp->sd_log_tail = new_tail; | 553 | sdp->sd_log_tail = new_tail; |
569 | } | 554 | } |
@@ -615,6 +600,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
615 | if (buffer_eopnotsupp(bh)) { | 600 | if (buffer_eopnotsupp(bh)) { |
616 | clear_buffer_eopnotsupp(bh); | 601 | clear_buffer_eopnotsupp(bh); |
617 | set_buffer_uptodate(bh); | 602 | set_buffer_uptodate(bh); |
603 | fs_info(sdp, "barrier sync failed - disabling barriers\n"); | ||
618 | set_bit(SDF_NOBARRIERS, &sdp->sd_flags); | 604 | set_bit(SDF_NOBARRIERS, &sdp->sd_flags); |
619 | lock_buffer(bh); | 605 | lock_buffer(bh); |
620 | skip_barrier: | 606 | skip_barrier: |
@@ -822,6 +808,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
822 | * @sdp: the filesystem | 808 | * @sdp: the filesystem |
823 | * @tr: the transaction | 809 | * @tr: the transaction |
824 | * | 810 | * |
811 | * We wake up gfs2_logd if the number of pinned blocks exceed thresh1 | ||
812 | * or the total number of used blocks (pinned blocks plus AIL blocks) | ||
813 | * is greater than thresh2. | ||
814 | * | ||
815 | * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of | ||
816 | * journal size. | ||
817 | * | ||
825 | * Returns: errno | 818 | * Returns: errno |
826 | */ | 819 | */ |
827 | 820 | ||
@@ -832,10 +825,10 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
832 | 825 | ||
833 | up_read(&sdp->sd_log_flush_lock); | 826 | up_read(&sdp->sd_log_flush_lock); |
834 | 827 | ||
835 | gfs2_log_lock(sdp); | 828 | if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) || |
836 | if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) | 829 | ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) > |
837 | wake_up_process(sdp->sd_logd_process); | 830 | atomic_read(&sdp->sd_log_thresh2))) |
838 | gfs2_log_unlock(sdp); | 831 | wake_up(&sdp->sd_logd_waitq); |
839 | } | 832 | } |
840 | 833 | ||
841 | /** | 834 | /** |
@@ -882,13 +875,23 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) | |||
882 | { | 875 | { |
883 | gfs2_log_flush(sdp, NULL); | 876 | gfs2_log_flush(sdp, NULL); |
884 | for (;;) { | 877 | for (;;) { |
885 | gfs2_ail1_start(sdp, DIO_ALL); | 878 | gfs2_ail1_start(sdp); |
886 | if (gfs2_ail1_empty(sdp, DIO_ALL)) | 879 | if (gfs2_ail1_empty(sdp, DIO_ALL)) |
887 | break; | 880 | break; |
888 | msleep(10); | 881 | msleep(10); |
889 | } | 882 | } |
890 | } | 883 | } |
891 | 884 | ||
885 | static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) | ||
886 | { | ||
887 | return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1)); | ||
888 | } | ||
889 | |||
890 | static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) | ||
891 | { | ||
892 | unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free); | ||
893 | return used_blocks >= atomic_read(&sdp->sd_log_thresh2); | ||
894 | } | ||
892 | 895 | ||
893 | /** | 896 | /** |
894 | * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks | 897 | * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks |
@@ -901,28 +904,43 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) | |||
901 | int gfs2_logd(void *data) | 904 | int gfs2_logd(void *data) |
902 | { | 905 | { |
903 | struct gfs2_sbd *sdp = data; | 906 | struct gfs2_sbd *sdp = data; |
904 | unsigned long t; | 907 | unsigned long t = 1; |
905 | int need_flush; | 908 | DEFINE_WAIT(wait); |
909 | unsigned preflush; | ||
906 | 910 | ||
907 | while (!kthread_should_stop()) { | 911 | while (!kthread_should_stop()) { |
908 | /* Advance the log tail */ | ||
909 | 912 | ||
910 | t = sdp->sd_log_flush_time + | 913 | preflush = atomic_read(&sdp->sd_log_pinned); |
911 | gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; | 914 | if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { |
915 | gfs2_ail1_empty(sdp, DIO_ALL); | ||
916 | gfs2_log_flush(sdp, NULL); | ||
917 | gfs2_ail1_empty(sdp, DIO_ALL); | ||
918 | } | ||
912 | 919 | ||
913 | gfs2_ail1_empty(sdp, DIO_ALL); | 920 | if (gfs2_ail_flush_reqd(sdp)) { |
914 | gfs2_log_lock(sdp); | 921 | gfs2_ail1_start(sdp); |
915 | need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks); | 922 | io_schedule(); |
916 | gfs2_log_unlock(sdp); | 923 | gfs2_ail1_empty(sdp, 0); |
917 | if (need_flush || time_after_eq(jiffies, t)) { | ||
918 | gfs2_log_flush(sdp, NULL); | 924 | gfs2_log_flush(sdp, NULL); |
919 | sdp->sd_log_flush_time = jiffies; | 925 | gfs2_ail1_empty(sdp, DIO_ALL); |
920 | } | 926 | } |
921 | 927 | ||
928 | wake_up(&sdp->sd_log_waitq); | ||
922 | t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; | 929 | t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; |
923 | if (freezing(current)) | 930 | if (freezing(current)) |
924 | refrigerator(); | 931 | refrigerator(); |
925 | schedule_timeout_interruptible(t); | 932 | |
933 | do { | ||
934 | prepare_to_wait(&sdp->sd_logd_waitq, &wait, | ||
935 | TASK_UNINTERRUPTIBLE); | ||
936 | if (!gfs2_ail_flush_reqd(sdp) && | ||
937 | !gfs2_jrnl_flush_reqd(sdp) && | ||
938 | !kthread_should_stop()) | ||
939 | t = schedule_timeout(t); | ||
940 | } while(t && !gfs2_ail_flush_reqd(sdp) && | ||
941 | !gfs2_jrnl_flush_reqd(sdp) && | ||
942 | !kthread_should_stop()); | ||
943 | finish_wait(&sdp->sd_logd_waitq, &wait); | ||
926 | } | 944 | } |
927 | 945 | ||
928 | return 0; | 946 | return 0; |