diff options
| author | Jens Axboe <jens.axboe@oracle.com> | 2009-06-30 03:34:12 -0400 |
|---|---|---|
| committer | Jens Axboe <jens.axboe@oracle.com> | 2009-07-01 04:56:25 -0400 |
| commit | 6118b70b3a0b4c583439bb77600194c82f220ce3 (patch) | |
| tree | 6966f1b059f1bdadc0d679067796d9b4bf9a22c3 /block | |
| parent | d5036d770f871bd34c9cfd955e6dee692e1e8e81 (diff) | |
cfq-iosched: get rid of the need for __GFP_NOFAIL in cfq_find_alloc_queue()
Setup an emergency fallback cfqq that we allocate at IO scheduler init
time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
never fails without having to ensure free memory.
On cfqq lookup, always try to allocate a new cfqq if the given cfq io
context has the oom_cfqq assigned. This ensures that we only temporarily
punt to this shared queue.
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
| -rw-r--r-- | block/cfq-iosched.c | 137 |
1 files changed, 73 insertions, 64 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c760ae7019dd..1d9160ffa26d 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
| @@ -71,6 +71,51 @@ struct cfq_rb_root { | |||
| 71 | #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } | 71 | #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } |
| 72 | 72 | ||
| 73 | /* | 73 | /* |
| 74 | * Per process-grouping structure | ||
| 75 | */ | ||
| 76 | struct cfq_queue { | ||
| 77 | /* reference count */ | ||
| 78 | atomic_t ref; | ||
| 79 | /* various state flags, see below */ | ||
| 80 | unsigned int flags; | ||
| 81 | /* parent cfq_data */ | ||
| 82 | struct cfq_data *cfqd; | ||
| 83 | /* service_tree member */ | ||
| 84 | struct rb_node rb_node; | ||
| 85 | /* service_tree key */ | ||
| 86 | unsigned long rb_key; | ||
| 87 | /* prio tree member */ | ||
| 88 | struct rb_node p_node; | ||
| 89 | /* prio tree root we belong to, if any */ | ||
| 90 | struct rb_root *p_root; | ||
| 91 | /* sorted list of pending requests */ | ||
| 92 | struct rb_root sort_list; | ||
| 93 | /* if fifo isn't expired, next request to serve */ | ||
| 94 | struct request *next_rq; | ||
| 95 | /* requests queued in sort_list */ | ||
| 96 | int queued[2]; | ||
| 97 | /* currently allocated requests */ | ||
| 98 | int allocated[2]; | ||
| 99 | /* fifo list of requests in sort_list */ | ||
| 100 | struct list_head fifo; | ||
| 101 | |||
| 102 | unsigned long slice_end; | ||
| 103 | long slice_resid; | ||
| 104 | unsigned int slice_dispatch; | ||
| 105 | |||
| 106 | /* pending metadata requests */ | ||
| 107 | int meta_pending; | ||
| 108 | /* number of requests that are on the dispatch list or inside driver */ | ||
| 109 | int dispatched; | ||
| 110 | |||
| 111 | /* io prio of this group */ | ||
| 112 | unsigned short ioprio, org_ioprio; | ||
| 113 | unsigned short ioprio_class, org_ioprio_class; | ||
| 114 | |||
| 115 | pid_t pid; | ||
| 116 | }; | ||
| 117 | |||
| 118 | /* | ||
| 74 | * Per block device queue structure | 119 | * Per block device queue structure |
| 75 | */ | 120 | */ |
| 76 | struct cfq_data { | 121 | struct cfq_data { |
| @@ -135,51 +180,11 @@ struct cfq_data { | |||
| 135 | unsigned int cfq_slice_idle; | 180 | unsigned int cfq_slice_idle; |
| 136 | 181 | ||
| 137 | struct list_head cic_list; | 182 | struct list_head cic_list; |
| 138 | }; | ||
| 139 | 183 | ||
| 140 | /* | 184 | /* |
| 141 | * Per process-grouping structure | 185 | * Fallback dummy cfqq for extreme OOM conditions |
| 142 | */ | 186 | */ |
| 143 | struct cfq_queue { | 187 | struct cfq_queue oom_cfqq; |
| 144 | /* reference count */ | ||
| 145 | atomic_t ref; | ||
| 146 | /* various state flags, see below */ | ||
| 147 | unsigned int flags; | ||
| 148 | /* parent cfq_data */ | ||
| 149 | struct cfq_data *cfqd; | ||
| 150 | /* service_tree member */ | ||
| 151 | struct rb_node rb_node; | ||
| 152 | /* service_tree key */ | ||
| 153 | unsigned long rb_key; | ||
| 154 | /* prio tree member */ | ||
| 155 | struct rb_node p_node; | ||
| 156 | /* prio tree root we belong to, if any */ | ||
| 157 | struct rb_root *p_root; | ||
| 158 | /* sorted list of pending requests */ | ||
| 159 | struct rb_root sort_list; | ||
| 160 | /* if fifo isn't expired, next request to serve */ | ||
| 161 | struct request *next_rq; | ||
| 162 | /* requests queued in sort_list */ | ||
| 163 | int queued[2]; | ||
| 164 | /* currently allocated requests */ | ||
| 165 | int allocated[2]; | ||
| 166 | /* fifo list of requests in sort_list */ | ||
| 167 | struct list_head fifo; | ||
| 168 | |||
| 169 | unsigned long slice_end; | ||
| 170 | long slice_resid; | ||
| 171 | unsigned int slice_dispatch; | ||
| 172 | |||
| 173 | /* pending metadata requests */ | ||
| 174 | int meta_pending; | ||
| 175 | /* number of requests that are on the dispatch list or inside driver */ | ||
| 176 | int dispatched; | ||
| 177 | |||
| 178 | /* io prio of this group */ | ||
| 179 | unsigned short ioprio, org_ioprio; | ||
| 180 | unsigned short ioprio_class, org_ioprio_class; | ||
| 181 | |||
| 182 | pid_t pid; | ||
| 183 | }; | 188 | }; |
| 184 | 189 | ||
| 185 | enum cfqq_state_flags { | 190 | enum cfqq_state_flags { |
| @@ -1673,41 +1678,40 @@ retry: | |||
| 1673 | /* cic always exists here */ | 1678 | /* cic always exists here */ |
| 1674 | cfqq = cic_to_cfqq(cic, is_sync); | 1679 | cfqq = cic_to_cfqq(cic, is_sync); |
| 1675 | 1680 | ||
| 1676 | if (!cfqq) { | 1681 | /* |
| 1682 | * Always try a new alloc if we fell back to the OOM cfqq | ||
| 1683 | * originally, since it should just be a temporary situation. | ||
| 1684 | */ | ||
| 1685 | if (!cfqq || cfqq == &cfqd->oom_cfqq) { | ||
| 1686 | cfqq = NULL; | ||
| 1677 | if (new_cfqq) { | 1687 | if (new_cfqq) { |
| 1678 | cfqq = new_cfqq; | 1688 | cfqq = new_cfqq; |
| 1679 | new_cfqq = NULL; | 1689 | new_cfqq = NULL; |
| 1680 | } else if (gfp_mask & __GFP_WAIT) { | 1690 | } else if (gfp_mask & __GFP_WAIT) { |
| 1681 | /* | ||
| 1682 | * Inform the allocator of the fact that we will | ||
| 1683 | * just repeat this allocation if it fails, to allow | ||
| 1684 | * the allocator to do whatever it needs to attempt to | ||
| 1685 | * free memory. | ||
| 1686 | */ | ||
| 1687 | spin_unlock_irq(cfqd->queue->queue_lock); | 1691 | spin_unlock_irq(cfqd->queue->queue_lock); |
| 1688 | new_cfqq = kmem_cache_alloc_node(cfq_pool, | 1692 | new_cfqq = kmem_cache_alloc_node(cfq_pool, |
| 1689 | gfp_mask | __GFP_NOFAIL | __GFP_ZERO, | 1693 | gfp_mask | __GFP_ZERO, |
| 1690 | cfqd->queue->node); | 1694 | cfqd->queue->node); |
| 1691 | spin_lock_irq(cfqd->queue->queue_lock); | 1695 | spin_lock_irq(cfqd->queue->queue_lock); |
| 1692 | goto retry; | 1696 | if (new_cfqq) |
| 1697 | goto retry; | ||
| 1693 | } else { | 1698 | } else { |
| 1694 | cfqq = kmem_cache_alloc_node(cfq_pool, | 1699 | cfqq = kmem_cache_alloc_node(cfq_pool, |
| 1695 | gfp_mask | __GFP_ZERO, | 1700 | gfp_mask | __GFP_ZERO, |
| 1696 | cfqd->queue->node); | 1701 | cfqd->queue->node); |
| 1697 | if (!cfqq) | ||
| 1698 | goto out; | ||
| 1699 | } | 1702 | } |
| 1700 | 1703 | ||
| 1701 | cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); | 1704 | if (cfqq) { |
| 1702 | cfq_init_prio_data(cfqq, ioc); | 1705 | cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); |
| 1703 | cfq_log_cfqq(cfqd, cfqq, "alloced"); | 1706 | cfq_init_prio_data(cfqq, ioc); |
| 1707 | cfq_log_cfqq(cfqd, cfqq, "alloced"); | ||
| 1708 | } else | ||
| 1709 | cfqq = &cfqd->oom_cfqq; | ||
| 1704 | } | 1710 | } |
| 1705 | 1711 | ||
| 1706 | if (new_cfqq) | 1712 | if (new_cfqq) |
| 1707 | kmem_cache_free(cfq_pool, new_cfqq); | 1713 | kmem_cache_free(cfq_pool, new_cfqq); |
| 1708 | 1714 | ||
| 1709 | out: | ||
| 1710 | WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); | ||
| 1711 | return cfqq; | 1715 | return cfqq; |
| 1712 | } | 1716 | } |
| 1713 | 1717 | ||
| @@ -1740,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, | |||
| 1740 | cfqq = *async_cfqq; | 1744 | cfqq = *async_cfqq; |
| 1741 | } | 1745 | } |
| 1742 | 1746 | ||
| 1743 | if (!cfqq) { | 1747 | if (!cfqq) |
| 1744 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); | 1748 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); |
| 1745 | if (!cfqq) | ||
| 1746 | return NULL; | ||
| 1747 | } | ||
| 1748 | 1749 | ||
| 1749 | /* | 1750 | /* |
| 1750 | * pin the queue now that it's allocated, scheduler exit will prune it | 1751 | * pin the queue now that it's allocated, scheduler exit will prune it |
| @@ -2470,6 +2471,14 @@ static void *cfq_init_queue(struct request_queue *q) | |||
| 2470 | for (i = 0; i < CFQ_PRIO_LISTS; i++) | 2471 | for (i = 0; i < CFQ_PRIO_LISTS; i++) |
| 2471 | cfqd->prio_trees[i] = RB_ROOT; | 2472 | cfqd->prio_trees[i] = RB_ROOT; |
| 2472 | 2473 | ||
| 2474 | /* | ||
| 2475 | * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues. | ||
| 2476 | * Grab a permanent reference to it, so that the normal code flow | ||
| 2477 | * will not attempt to free it. | ||
| 2478 | */ | ||
| 2479 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); | ||
| 2480 | atomic_inc(&cfqd->oom_cfqq.ref); | ||
| 2481 | |||
| 2473 | INIT_LIST_HEAD(&cfqd->cic_list); | 2482 | INIT_LIST_HEAD(&cfqd->cic_list); |
| 2474 | 2483 | ||
| 2475 | cfqd->queue = q; | 2484 | cfqd->queue = q; |
