diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-14 19:34:11 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-14 19:34:11 -0400 |
| commit | acd15a836053ff6b48e78dc6de388b225ba9e40d (patch) | |
| tree | 9dec0be18d746c5fb0d105233f50ebe7df455708 /fs/ocfs2/localalloc.c | |
| parent | 72f22b1eb6ca5e4676a632a04d40d46cb61d4562 (diff) | |
| parent | d4a8c93c8248534bdedb07f83c9aebd6f7d1d579 (diff) | |
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (56 commits)
ocfs2: Make cached block reads the common case.
ocfs2: Kill the last naked wait_on_buffer() for cached reads.
ocfs2: Move ocfs2_bread() into dir.c
ocfs2: Simplify ocfs2_read_block()
ocfs2: Require an inode for ocfs2_read_block(s)().
ocfs2: Separate out sync reads from ocfs2_read_blocks()
ocfs2: Refactor xattr list and remove ocfs2_xattr_handler().
ocfs2: Calculate EA hash only by its suffix.
ocfs2: Move trusted and user attribute support into xattr.c
ocfs2: Uninline ocfs2_xattr_name_hash()
ocfs2: Don't check for NULL before brelse()
ocfs2: use smaller counters in ocfs2_remove_xattr_clusters_from_cache
ocfs2: Documentation update for user_xattr / nouser_xattr mount options
ocfs2: make la_debug_mutex static
ocfs2: Remove pointless !!
ocfs2: Add empty bucket support in xattr.
ocfs2/xattr.c: Fix a bug when inserting xattr.
ocfs2: Add xattr mount option in ocfs2_show_options()
ocfs2: Switch over to JBD2.
ocfs2: Add the 'inode64' mount option.
...
Diffstat (limited to 'fs/ocfs2/localalloc.c')
| -rw-r--r-- | fs/ocfs2/localalloc.c | 384 |
1 files changed, 342 insertions, 42 deletions
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 28e492e4ec88..687b28713c32 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
| 29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
| 30 | #include <linux/bitops.h> | 30 | #include <linux/bitops.h> |
| 31 | #include <linux/debugfs.h> | ||
| 31 | 32 | ||
| 32 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC | 33 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC |
| 33 | #include <cluster/masklog.h> | 34 | #include <cluster/masklog.h> |
| @@ -47,8 +48,6 @@ | |||
| 47 | 48 | ||
| 48 | #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) | 49 | #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) |
| 49 | 50 | ||
| 50 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb); | ||
| 51 | |||
| 52 | static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); | 51 | static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); |
| 53 | 52 | ||
| 54 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | 53 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, |
| @@ -75,24 +74,129 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
| 75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | 74 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, |
| 76 | struct inode *local_alloc_inode); | 75 | struct inode *local_alloc_inode); |
| 77 | 76 | ||
| 78 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) | 77 | #ifdef CONFIG_OCFS2_FS_STATS |
| 78 | |||
| 79 | static int ocfs2_la_debug_open(struct inode *inode, struct file *file) | ||
| 80 | { | ||
| 81 | file->private_data = inode->i_private; | ||
| 82 | return 0; | ||
| 83 | } | ||
| 84 | |||
| 85 | #define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE | ||
| 86 | #define LA_DEBUG_VER 1 | ||
| 87 | static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf, | ||
| 88 | size_t count, loff_t *ppos) | ||
| 89 | { | ||
| 90 | static DEFINE_MUTEX(la_debug_mutex); | ||
| 91 | struct ocfs2_super *osb = file->private_data; | ||
| 92 | int written, ret; | ||
| 93 | char *buf = osb->local_alloc_debug_buf; | ||
| 94 | |||
| 95 | mutex_lock(&la_debug_mutex); | ||
| 96 | memset(buf, 0, LA_DEBUG_BUF_SZ); | ||
| 97 | |||
| 98 | written = snprintf(buf, LA_DEBUG_BUF_SZ, | ||
| 99 | "0x%x\t0x%llx\t%u\t%u\t0x%x\n", | ||
| 100 | LA_DEBUG_VER, | ||
| 101 | (unsigned long long)osb->la_last_gd, | ||
| 102 | osb->local_alloc_default_bits, | ||
| 103 | osb->local_alloc_bits, osb->local_alloc_state); | ||
| 104 | |||
| 105 | ret = simple_read_from_buffer(userbuf, count, ppos, buf, written); | ||
| 106 | |||
| 107 | mutex_unlock(&la_debug_mutex); | ||
| 108 | return ret; | ||
| 109 | } | ||
| 110 | |||
| 111 | static const struct file_operations ocfs2_la_debug_fops = { | ||
| 112 | .open = ocfs2_la_debug_open, | ||
| 113 | .read = ocfs2_la_debug_read, | ||
| 114 | }; | ||
| 115 | |||
| 116 | static void ocfs2_init_la_debug(struct ocfs2_super *osb) | ||
| 117 | { | ||
| 118 | osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS); | ||
| 119 | if (!osb->local_alloc_debug_buf) | ||
| 120 | return; | ||
| 121 | |||
| 122 | osb->local_alloc_debug = debugfs_create_file("local_alloc_stats", | ||
| 123 | S_IFREG|S_IRUSR, | ||
| 124 | osb->osb_debug_root, | ||
| 125 | osb, | ||
| 126 | &ocfs2_la_debug_fops); | ||
| 127 | if (!osb->local_alloc_debug) { | ||
| 128 | kfree(osb->local_alloc_debug_buf); | ||
| 129 | osb->local_alloc_debug_buf = NULL; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) | ||
| 134 | { | ||
| 135 | if (osb->local_alloc_debug) | ||
| 136 | debugfs_remove(osb->local_alloc_debug); | ||
| 137 | |||
| 138 | if (osb->local_alloc_debug_buf) | ||
| 139 | kfree(osb->local_alloc_debug_buf); | ||
| 140 | |||
| 141 | osb->local_alloc_debug_buf = NULL; | ||
| 142 | osb->local_alloc_debug = NULL; | ||
| 143 | } | ||
| 144 | #else /* CONFIG_OCFS2_FS_STATS */ | ||
| 145 | static void ocfs2_init_la_debug(struct ocfs2_super *osb) | ||
| 146 | { | ||
| 147 | return; | ||
| 148 | } | ||
| 149 | static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) | ||
| 150 | { | ||
| 151 | return; | ||
| 152 | } | ||
| 153 | #endif | ||
| 154 | |||
| 155 | static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) | ||
| 79 | { | 156 | { |
| 80 | BUG_ON(osb->s_clustersize_bits > 20); | 157 | return (osb->local_alloc_state == OCFS2_LA_THROTTLED || |
| 158 | osb->local_alloc_state == OCFS2_LA_ENABLED); | ||
| 159 | } | ||
| 81 | 160 | ||
| 82 | /* Size local alloc windows by the megabyte */ | 161 | void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, |
| 83 | return osb->local_alloc_size << (20 - osb->s_clustersize_bits); | 162 | unsigned int num_clusters) |
| 163 | { | ||
| 164 | spin_lock(&osb->osb_lock); | ||
| 165 | if (osb->local_alloc_state == OCFS2_LA_DISABLED || | ||
| 166 | osb->local_alloc_state == OCFS2_LA_THROTTLED) | ||
| 167 | if (num_clusters >= osb->local_alloc_default_bits) { | ||
| 168 | cancel_delayed_work(&osb->la_enable_wq); | ||
| 169 | osb->local_alloc_state = OCFS2_LA_ENABLED; | ||
| 170 | } | ||
| 171 | spin_unlock(&osb->osb_lock); | ||
| 172 | } | ||
| 173 | |||
| 174 | void ocfs2_la_enable_worker(struct work_struct *work) | ||
| 175 | { | ||
| 176 | struct ocfs2_super *osb = | ||
| 177 | container_of(work, struct ocfs2_super, | ||
| 178 | la_enable_wq.work); | ||
| 179 | spin_lock(&osb->osb_lock); | ||
| 180 | osb->local_alloc_state = OCFS2_LA_ENABLED; | ||
| 181 | spin_unlock(&osb->osb_lock); | ||
| 84 | } | 182 | } |
| 85 | 183 | ||
| 86 | /* | 184 | /* |
| 87 | * Tell us whether a given allocation should use the local alloc | 185 | * Tell us whether a given allocation should use the local alloc |
| 88 | * file. Otherwise, it has to go to the main bitmap. | 186 | * file. Otherwise, it has to go to the main bitmap. |
| 187 | * | ||
| 188 | * This function does semi-dirty reads of local alloc size and state! | ||
| 189 | * This is ok however, as the values are re-checked once under mutex. | ||
| 89 | */ | 190 | */ |
| 90 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | 191 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) |
| 91 | { | 192 | { |
| 92 | int la_bits = ocfs2_local_alloc_window_bits(osb); | ||
| 93 | int ret = 0; | 193 | int ret = 0; |
| 194 | int la_bits; | ||
| 195 | |||
| 196 | spin_lock(&osb->osb_lock); | ||
| 197 | la_bits = osb->local_alloc_bits; | ||
| 94 | 198 | ||
| 95 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) | 199 | if (!ocfs2_la_state_enabled(osb)) |
| 96 | goto bail; | 200 | goto bail; |
| 97 | 201 | ||
| 98 | /* la_bits should be at least twice the size (in clusters) of | 202 | /* la_bits should be at least twice the size (in clusters) of |
| @@ -106,6 +210,7 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | |||
| 106 | bail: | 210 | bail: |
| 107 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", | 211 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", |
| 108 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); | 212 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); |
| 213 | spin_unlock(&osb->osb_lock); | ||
| 109 | return ret; | 214 | return ret; |
| 110 | } | 215 | } |
| 111 | 216 | ||
| @@ -120,14 +225,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
| 120 | 225 | ||
| 121 | mlog_entry_void(); | 226 | mlog_entry_void(); |
| 122 | 227 | ||
| 123 | if (osb->local_alloc_size == 0) | 228 | ocfs2_init_la_debug(osb); |
| 229 | |||
| 230 | if (osb->local_alloc_bits == 0) | ||
| 124 | goto bail; | 231 | goto bail; |
| 125 | 232 | ||
| 126 | if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) { | 233 | if (osb->local_alloc_bits >= osb->bitmap_cpg) { |
| 127 | mlog(ML_NOTICE, "Requested local alloc window %d is larger " | 234 | mlog(ML_NOTICE, "Requested local alloc window %d is larger " |
| 128 | "than max possible %u. Using defaults.\n", | 235 | "than max possible %u. Using defaults.\n", |
| 129 | ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1)); | 236 | osb->local_alloc_bits, (osb->bitmap_cpg - 1)); |
| 130 | osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | 237 | osb->local_alloc_bits = |
| 238 | ocfs2_megabytes_to_clusters(osb->sb, | ||
| 239 | OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); | ||
| 131 | } | 240 | } |
| 132 | 241 | ||
| 133 | /* read the alloc off disk */ | 242 | /* read the alloc off disk */ |
| @@ -139,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
| 139 | goto bail; | 248 | goto bail; |
| 140 | } | 249 | } |
| 141 | 250 | ||
| 142 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, | 251 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, |
| 143 | &alloc_bh, 0, inode); | 252 | &alloc_bh, OCFS2_BH_IGNORE_CACHE); |
| 144 | if (status < 0) { | 253 | if (status < 0) { |
| 145 | mlog_errno(status); | 254 | mlog_errno(status); |
| 146 | goto bail; | 255 | goto bail; |
| @@ -185,13 +294,14 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
| 185 | 294 | ||
| 186 | bail: | 295 | bail: |
| 187 | if (status < 0) | 296 | if (status < 0) |
| 188 | if (alloc_bh) | 297 | brelse(alloc_bh); |
| 189 | brelse(alloc_bh); | ||
| 190 | if (inode) | 298 | if (inode) |
| 191 | iput(inode); | 299 | iput(inode); |
| 192 | 300 | ||
| 193 | mlog(0, "Local alloc window bits = %d\n", | 301 | if (status < 0) |
| 194 | ocfs2_local_alloc_window_bits(osb)); | 302 | ocfs2_shutdown_la_debug(osb); |
| 303 | |||
| 304 | mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); | ||
| 195 | 305 | ||
| 196 | mlog_exit(status); | 306 | mlog_exit(status); |
| 197 | return status; | 307 | return status; |
| @@ -217,6 +327,11 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) | |||
| 217 | 327 | ||
| 218 | mlog_entry_void(); | 328 | mlog_entry_void(); |
| 219 | 329 | ||
| 330 | cancel_delayed_work(&osb->la_enable_wq); | ||
| 331 | flush_workqueue(ocfs2_wq); | ||
| 332 | |||
| 333 | ocfs2_shutdown_la_debug(osb); | ||
| 334 | |||
| 220 | if (osb->local_alloc_state == OCFS2_LA_UNUSED) | 335 | if (osb->local_alloc_state == OCFS2_LA_UNUSED) |
| 221 | goto out; | 336 | goto out; |
| 222 | 337 | ||
| @@ -295,8 +410,7 @@ out_commit: | |||
| 295 | ocfs2_commit_trans(osb, handle); | 410 | ocfs2_commit_trans(osb, handle); |
| 296 | 411 | ||
| 297 | out_unlock: | 412 | out_unlock: |
| 298 | if (main_bm_bh) | 413 | brelse(main_bm_bh); |
| 299 | brelse(main_bm_bh); | ||
| 300 | 414 | ||
| 301 | ocfs2_inode_unlock(main_bm_inode, 1); | 415 | ocfs2_inode_unlock(main_bm_inode, 1); |
| 302 | 416 | ||
| @@ -345,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, | |||
| 345 | 459 | ||
| 346 | mutex_lock(&inode->i_mutex); | 460 | mutex_lock(&inode->i_mutex); |
| 347 | 461 | ||
| 348 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, | 462 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, |
| 349 | &alloc_bh, 0, inode); | 463 | &alloc_bh, OCFS2_BH_IGNORE_CACHE); |
| 350 | if (status < 0) { | 464 | if (status < 0) { |
| 351 | mlog_errno(status); | 465 | mlog_errno(status); |
| 352 | goto bail; | 466 | goto bail; |
| @@ -372,8 +486,7 @@ bail: | |||
| 372 | *alloc_copy = NULL; | 486 | *alloc_copy = NULL; |
| 373 | } | 487 | } |
| 374 | 488 | ||
| 375 | if (alloc_bh) | 489 | brelse(alloc_bh); |
| 376 | brelse(alloc_bh); | ||
| 377 | 490 | ||
| 378 | if (inode) { | 491 | if (inode) { |
| 379 | mutex_unlock(&inode->i_mutex); | 492 | mutex_unlock(&inode->i_mutex); |
| @@ -441,8 +554,7 @@ out_unlock: | |||
| 441 | out_mutex: | 554 | out_mutex: |
| 442 | mutex_unlock(&main_bm_inode->i_mutex); | 555 | mutex_unlock(&main_bm_inode->i_mutex); |
| 443 | 556 | ||
| 444 | if (main_bm_bh) | 557 | brelse(main_bm_bh); |
| 445 | brelse(main_bm_bh); | ||
| 446 | 558 | ||
| 447 | iput(main_bm_inode); | 559 | iput(main_bm_inode); |
| 448 | 560 | ||
| @@ -453,8 +565,48 @@ out: | |||
| 453 | return status; | 565 | return status; |
| 454 | } | 566 | } |
| 455 | 567 | ||
| 568 | /* Check to see if the local alloc window is within ac->ac_max_block */ | ||
| 569 | static int ocfs2_local_alloc_in_range(struct inode *inode, | ||
| 570 | struct ocfs2_alloc_context *ac, | ||
| 571 | u32 bits_wanted) | ||
| 572 | { | ||
| 573 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 574 | struct ocfs2_dinode *alloc; | ||
| 575 | struct ocfs2_local_alloc *la; | ||
| 576 | int start; | ||
| 577 | u64 block_off; | ||
| 578 | |||
| 579 | if (!ac->ac_max_block) | ||
| 580 | return 1; | ||
| 581 | |||
| 582 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | ||
| 583 | la = OCFS2_LOCAL_ALLOC(alloc); | ||
| 584 | |||
| 585 | start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); | ||
| 586 | if (start == -1) { | ||
| 587 | mlog_errno(-ENOSPC); | ||
| 588 | return 0; | ||
| 589 | } | ||
| 590 | |||
| 591 | /* | ||
| 592 | * Converting (bm_off + start + bits_wanted) to blocks gives us | ||
| 593 | * the blkno just past our actual allocation. This is perfect | ||
| 594 | * to compare with ac_max_block. | ||
| 595 | */ | ||
| 596 | block_off = ocfs2_clusters_to_blocks(inode->i_sb, | ||
| 597 | le32_to_cpu(la->la_bm_off) + | ||
| 598 | start + bits_wanted); | ||
| 599 | mlog(0, "Checking %llu against %llu\n", | ||
| 600 | (unsigned long long)block_off, | ||
| 601 | (unsigned long long)ac->ac_max_block); | ||
| 602 | if (block_off > ac->ac_max_block) | ||
| 603 | return 0; | ||
| 604 | |||
| 605 | return 1; | ||
| 606 | } | ||
| 607 | |||
| 456 | /* | 608 | /* |
| 457 | * make sure we've got at least bitswanted contiguous bits in the | 609 | * make sure we've got at least bits_wanted contiguous bits in the |
| 458 | * local alloc. You lose them when you drop i_mutex. | 610 | * local alloc. You lose them when you drop i_mutex. |
| 459 | * | 611 | * |
| 460 | * We will add ourselves to the transaction passed in, but may start | 612 | * We will add ourselves to the transaction passed in, but may start |
| @@ -485,16 +637,18 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
| 485 | 637 | ||
| 486 | mutex_lock(&local_alloc_inode->i_mutex); | 638 | mutex_lock(&local_alloc_inode->i_mutex); |
| 487 | 639 | ||
| 488 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) { | 640 | /* |
| 489 | status = -ENOSPC; | 641 | * We must double check state and allocator bits because |
| 490 | goto bail; | 642 | * another process may have changed them while holding i_mutex. |
| 491 | } | 643 | */ |
| 492 | 644 | spin_lock(&osb->osb_lock); | |
| 493 | if (bits_wanted > ocfs2_local_alloc_window_bits(osb)) { | 645 | if (!ocfs2_la_state_enabled(osb) || |
| 494 | mlog(0, "Asking for more than my max window size!\n"); | 646 | (bits_wanted > osb->local_alloc_bits)) { |
| 647 | spin_unlock(&osb->osb_lock); | ||
| 495 | status = -ENOSPC; | 648 | status = -ENOSPC; |
| 496 | goto bail; | 649 | goto bail; |
| 497 | } | 650 | } |
| 651 | spin_unlock(&osb->osb_lock); | ||
| 498 | 652 | ||
| 499 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | 653 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; |
| 500 | 654 | ||
| @@ -522,6 +676,36 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
| 522 | mlog_errno(status); | 676 | mlog_errno(status); |
| 523 | goto bail; | 677 | goto bail; |
| 524 | } | 678 | } |
| 679 | |||
| 680 | /* | ||
| 681 | * Under certain conditions, the window slide code | ||
| 682 | * might have reduced the number of bits available or | ||
| 683 | * disabled the the local alloc entirely. Re-check | ||
| 684 | * here and return -ENOSPC if necessary. | ||
| 685 | */ | ||
| 686 | status = -ENOSPC; | ||
| 687 | if (!ocfs2_la_state_enabled(osb)) | ||
| 688 | goto bail; | ||
| 689 | |||
| 690 | free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - | ||
| 691 | le32_to_cpu(alloc->id1.bitmap1.i_used); | ||
| 692 | if (bits_wanted > free_bits) | ||
| 693 | goto bail; | ||
| 694 | } | ||
| 695 | |||
| 696 | if (ac->ac_max_block) | ||
| 697 | mlog(0, "Calling in_range for max block %llu\n", | ||
| 698 | (unsigned long long)ac->ac_max_block); | ||
| 699 | |||
| 700 | if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac, | ||
| 701 | bits_wanted)) { | ||
| 702 | /* | ||
| 703 | * The window is outside ac->ac_max_block. | ||
| 704 | * This errno tells the caller to keep localalloc enabled | ||
| 705 | * but to get the allocation from the main bitmap. | ||
| 706 | */ | ||
| 707 | status = -EFBIG; | ||
| 708 | goto bail; | ||
| 525 | } | 709 | } |
| 526 | 710 | ||
| 527 | ac->ac_inode = local_alloc_inode; | 711 | ac->ac_inode = local_alloc_inode; |
| @@ -789,6 +973,85 @@ bail: | |||
| 789 | return status; | 973 | return status; |
| 790 | } | 974 | } |
| 791 | 975 | ||
| 976 | enum ocfs2_la_event { | ||
| 977 | OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ | ||
| 978 | OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has | ||
| 979 | * enough bits theoretically | ||
| 980 | * free, but a contiguous | ||
| 981 | * allocation could not be | ||
| 982 | * found. */ | ||
| 983 | OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have | ||
| 984 | * enough bits free to satisfy | ||
| 985 | * our request. */ | ||
| 986 | }; | ||
| 987 | #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) | ||
| 988 | /* | ||
| 989 | * Given an event, calculate the size of our next local alloc window. | ||
| 990 | * | ||
| 991 | * This should always be called under i_mutex of the local alloc inode | ||
| 992 | * so that local alloc disabling doesn't race with processes trying to | ||
| 993 | * use the allocator. | ||
| 994 | * | ||
| 995 | * Returns the state which the local alloc was left in. This value can | ||
| 996 | * be ignored by some paths. | ||
| 997 | */ | ||
| 998 | static int ocfs2_recalc_la_window(struct ocfs2_super *osb, | ||
| 999 | enum ocfs2_la_event event) | ||
| 1000 | { | ||
| 1001 | unsigned int bits; | ||
| 1002 | int state; | ||
| 1003 | |||
| 1004 | spin_lock(&osb->osb_lock); | ||
| 1005 | if (osb->local_alloc_state == OCFS2_LA_DISABLED) { | ||
| 1006 | WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); | ||
| 1007 | goto out_unlock; | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | /* | ||
| 1011 | * ENOSPC and fragmentation are treated similarly for now. | ||
| 1012 | */ | ||
| 1013 | if (event == OCFS2_LA_EVENT_ENOSPC || | ||
| 1014 | event == OCFS2_LA_EVENT_FRAGMENTED) { | ||
| 1015 | /* | ||
| 1016 | * We ran out of contiguous space in the primary | ||
| 1017 | * bitmap. Drastically reduce the number of bits used | ||
| 1018 | * by local alloc until we have to disable it. | ||
| 1019 | */ | ||
| 1020 | bits = osb->local_alloc_bits >> 1; | ||
| 1021 | if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { | ||
| 1022 | /* | ||
| 1023 | * By setting state to THROTTLED, we'll keep | ||
| 1024 | * the number of local alloc bits used down | ||
| 1025 | * until an event occurs which would give us | ||
| 1026 | * reason to assume the bitmap situation might | ||
| 1027 | * have changed. | ||
| 1028 | */ | ||
| 1029 | osb->local_alloc_state = OCFS2_LA_THROTTLED; | ||
| 1030 | osb->local_alloc_bits = bits; | ||
| 1031 | } else { | ||
| 1032 | osb->local_alloc_state = OCFS2_LA_DISABLED; | ||
| 1033 | } | ||
| 1034 | queue_delayed_work(ocfs2_wq, &osb->la_enable_wq, | ||
| 1035 | OCFS2_LA_ENABLE_INTERVAL); | ||
| 1036 | goto out_unlock; | ||
| 1037 | } | ||
| 1038 | |||
| 1039 | /* | ||
| 1040 | * Don't increase the size of the local alloc window until we | ||
| 1041 | * know we might be able to fulfill the request. Otherwise, we | ||
| 1042 | * risk bouncing around the global bitmap during periods of | ||
| 1043 | * low space. | ||
| 1044 | */ | ||
| 1045 | if (osb->local_alloc_state != OCFS2_LA_THROTTLED) | ||
| 1046 | osb->local_alloc_bits = osb->local_alloc_default_bits; | ||
| 1047 | |||
| 1048 | out_unlock: | ||
| 1049 | state = osb->local_alloc_state; | ||
| 1050 | spin_unlock(&osb->osb_lock); | ||
| 1051 | |||
| 1052 | return state; | ||
| 1053 | } | ||
| 1054 | |||
| 792 | static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | 1055 | static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, |
| 793 | struct ocfs2_alloc_context **ac, | 1056 | struct ocfs2_alloc_context **ac, |
| 794 | struct inode **bitmap_inode, | 1057 | struct inode **bitmap_inode, |
| @@ -803,12 +1066,21 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | |||
| 803 | goto bail; | 1066 | goto bail; |
| 804 | } | 1067 | } |
| 805 | 1068 | ||
| 806 | (*ac)->ac_bits_wanted = ocfs2_local_alloc_window_bits(osb); | 1069 | retry_enospc: |
| 1070 | (*ac)->ac_bits_wanted = osb->local_alloc_bits; | ||
| 807 | 1071 | ||
| 808 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); | 1072 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); |
| 1073 | if (status == -ENOSPC) { | ||
| 1074 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == | ||
| 1075 | OCFS2_LA_DISABLED) | ||
| 1076 | goto bail; | ||
| 1077 | |||
| 1078 | ocfs2_free_ac_resource(*ac); | ||
| 1079 | memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); | ||
| 1080 | goto retry_enospc; | ||
| 1081 | } | ||
| 809 | if (status < 0) { | 1082 | if (status < 0) { |
| 810 | if (status != -ENOSPC) | 1083 | mlog_errno(status); |
| 811 | mlog_errno(status); | ||
| 812 | goto bail; | 1084 | goto bail; |
| 813 | } | 1085 | } |
| 814 | 1086 | ||
| @@ -849,7 +1121,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
| 849 | "one\n"); | 1121 | "one\n"); |
| 850 | 1122 | ||
| 851 | mlog(0, "Allocating %u clusters for a new window.\n", | 1123 | mlog(0, "Allocating %u clusters for a new window.\n", |
| 852 | ocfs2_local_alloc_window_bits(osb)); | 1124 | osb->local_alloc_bits); |
| 853 | 1125 | ||
| 854 | /* Instruct the allocation code to try the most recently used | 1126 | /* Instruct the allocation code to try the most recently used |
| 855 | * cluster group. We'll re-record the group used this pass | 1127 | * cluster group. We'll re-record the group used this pass |
| @@ -859,9 +1131,36 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
| 859 | /* we used the generic suballoc reserve function, but we set | 1131 | /* we used the generic suballoc reserve function, but we set |
| 860 | * everything up nicely, so there's no reason why we can't use | 1132 | * everything up nicely, so there's no reason why we can't use |
| 861 | * the more specific cluster api to claim bits. */ | 1133 | * the more specific cluster api to claim bits. */ |
| 862 | status = ocfs2_claim_clusters(osb, handle, ac, | 1134 | status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, |
| 863 | ocfs2_local_alloc_window_bits(osb), | ||
| 864 | &cluster_off, &cluster_count); | 1135 | &cluster_off, &cluster_count); |
| 1136 | if (status == -ENOSPC) { | ||
| 1137 | retry_enospc: | ||
| 1138 | /* | ||
| 1139 | * Note: We could also try syncing the journal here to | ||
| 1140 | * allow use of any free bits which the current | ||
| 1141 | * transaction can't give us access to. --Mark | ||
| 1142 | */ | ||
| 1143 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == | ||
| 1144 | OCFS2_LA_DISABLED) | ||
| 1145 | goto bail; | ||
| 1146 | |||
| 1147 | status = ocfs2_claim_clusters(osb, handle, ac, | ||
| 1148 | osb->local_alloc_bits, | ||
| 1149 | &cluster_off, | ||
| 1150 | &cluster_count); | ||
| 1151 | if (status == -ENOSPC) | ||
| 1152 | goto retry_enospc; | ||
| 1153 | /* | ||
| 1154 | * We only shrunk the *minimum* number of in our | ||
| 1155 | * request - it's entirely possible that the allocator | ||
| 1156 | * might give us more than we asked for. | ||
| 1157 | */ | ||
| 1158 | if (status == 0) { | ||
| 1159 | spin_lock(&osb->osb_lock); | ||
| 1160 | osb->local_alloc_bits = cluster_count; | ||
| 1161 | spin_unlock(&osb->osb_lock); | ||
| 1162 | } | ||
| 1163 | } | ||
| 865 | if (status < 0) { | 1164 | if (status < 0) { |
| 866 | if (status != -ENOSPC) | 1165 | if (status != -ENOSPC) |
| 867 | mlog_errno(status); | 1166 | mlog_errno(status); |
| @@ -905,6 +1204,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | |||
| 905 | 1204 | ||
| 906 | mlog_entry_void(); | 1205 | mlog_entry_void(); |
| 907 | 1206 | ||
| 1207 | ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); | ||
| 1208 | |||
| 908 | /* This will lock the main bitmap for us. */ | 1209 | /* This will lock the main bitmap for us. */ |
| 909 | status = ocfs2_local_alloc_reserve_for_window(osb, | 1210 | status = ocfs2_local_alloc_reserve_for_window(osb, |
| 910 | &ac, | 1211 | &ac, |
| @@ -976,8 +1277,7 @@ bail: | |||
| 976 | if (handle) | 1277 | if (handle) |
| 977 | ocfs2_commit_trans(osb, handle); | 1278 | ocfs2_commit_trans(osb, handle); |
| 978 | 1279 | ||
| 979 | if (main_bm_bh) | 1280 | brelse(main_bm_bh); |
| 980 | brelse(main_bm_bh); | ||
| 981 | 1281 | ||
| 982 | if (main_bm_inode) | 1282 | if (main_bm_inode) |
| 983 | iput(main_bm_inode); | 1283 | iput(main_bm_inode); |
