diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2009-06-30 03:34:12 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-07-01 04:56:25 -0400 |
commit | 6118b70b3a0b4c583439bb77600194c82f220ce3 (patch) | |
tree | 6966f1b059f1bdadc0d679067796d9b4bf9a22c3 /block/cfq-iosched.c | |
parent | d5036d770f871bd34c9cfd955e6dee692e1e8e81 (diff) |
cfq-iosched: get rid of the need for __GFP_NOFAIL in cfq_find_alloc_queue()
Setup an emergency fallback cfqq that we allocate at IO scheduler init
time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
never fails without having to ensure free memory.
On cfqq lookup, always try to allocate a new cfqq if the given cfq io
context has the oom_cfqq assigned. This ensures that we only temporarily
punt to this shared queue.
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r-- | block/cfq-iosched.c | 137 |
1 files changed, 73 insertions, 64 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c760ae7019dd..1d9160ffa26d 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -71,6 +71,51 @@ struct cfq_rb_root { | |||
71 | #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } | 71 | #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * Per process-grouping structure | ||
75 | */ | ||
76 | struct cfq_queue { | ||
77 | /* reference count */ | ||
78 | atomic_t ref; | ||
79 | /* various state flags, see below */ | ||
80 | unsigned int flags; | ||
81 | /* parent cfq_data */ | ||
82 | struct cfq_data *cfqd; | ||
83 | /* service_tree member */ | ||
84 | struct rb_node rb_node; | ||
85 | /* service_tree key */ | ||
86 | unsigned long rb_key; | ||
87 | /* prio tree member */ | ||
88 | struct rb_node p_node; | ||
89 | /* prio tree root we belong to, if any */ | ||
90 | struct rb_root *p_root; | ||
91 | /* sorted list of pending requests */ | ||
92 | struct rb_root sort_list; | ||
93 | /* if fifo isn't expired, next request to serve */ | ||
94 | struct request *next_rq; | ||
95 | /* requests queued in sort_list */ | ||
96 | int queued[2]; | ||
97 | /* currently allocated requests */ | ||
98 | int allocated[2]; | ||
99 | /* fifo list of requests in sort_list */ | ||
100 | struct list_head fifo; | ||
101 | |||
102 | unsigned long slice_end; | ||
103 | long slice_resid; | ||
104 | unsigned int slice_dispatch; | ||
105 | |||
106 | /* pending metadata requests */ | ||
107 | int meta_pending; | ||
108 | /* number of requests that are on the dispatch list or inside driver */ | ||
109 | int dispatched; | ||
110 | |||
111 | /* io prio of this group */ | ||
112 | unsigned short ioprio, org_ioprio; | ||
113 | unsigned short ioprio_class, org_ioprio_class; | ||
114 | |||
115 | pid_t pid; | ||
116 | }; | ||
117 | |||
118 | /* | ||
74 | * Per block device queue structure | 119 | * Per block device queue structure |
75 | */ | 120 | */ |
76 | struct cfq_data { | 121 | struct cfq_data { |
@@ -135,51 +180,11 @@ struct cfq_data { | |||
135 | unsigned int cfq_slice_idle; | 180 | unsigned int cfq_slice_idle; |
136 | 181 | ||
137 | struct list_head cic_list; | 182 | struct list_head cic_list; |
138 | }; | ||
139 | 183 | ||
140 | /* | 184 | /* |
141 | * Per process-grouping structure | 185 | * Fallback dummy cfqq for extreme OOM conditions |
142 | */ | 186 | */ |
143 | struct cfq_queue { | 187 | struct cfq_queue oom_cfqq; |
144 | /* reference count */ | ||
145 | atomic_t ref; | ||
146 | /* various state flags, see below */ | ||
147 | unsigned int flags; | ||
148 | /* parent cfq_data */ | ||
149 | struct cfq_data *cfqd; | ||
150 | /* service_tree member */ | ||
151 | struct rb_node rb_node; | ||
152 | /* service_tree key */ | ||
153 | unsigned long rb_key; | ||
154 | /* prio tree member */ | ||
155 | struct rb_node p_node; | ||
156 | /* prio tree root we belong to, if any */ | ||
157 | struct rb_root *p_root; | ||
158 | /* sorted list of pending requests */ | ||
159 | struct rb_root sort_list; | ||
160 | /* if fifo isn't expired, next request to serve */ | ||
161 | struct request *next_rq; | ||
162 | /* requests queued in sort_list */ | ||
163 | int queued[2]; | ||
164 | /* currently allocated requests */ | ||
165 | int allocated[2]; | ||
166 | /* fifo list of requests in sort_list */ | ||
167 | struct list_head fifo; | ||
168 | |||
169 | unsigned long slice_end; | ||
170 | long slice_resid; | ||
171 | unsigned int slice_dispatch; | ||
172 | |||
173 | /* pending metadata requests */ | ||
174 | int meta_pending; | ||
175 | /* number of requests that are on the dispatch list or inside driver */ | ||
176 | int dispatched; | ||
177 | |||
178 | /* io prio of this group */ | ||
179 | unsigned short ioprio, org_ioprio; | ||
180 | unsigned short ioprio_class, org_ioprio_class; | ||
181 | |||
182 | pid_t pid; | ||
183 | }; | 188 | }; |
184 | 189 | ||
185 | enum cfqq_state_flags { | 190 | enum cfqq_state_flags { |
@@ -1673,41 +1678,40 @@ retry: | |||
1673 | /* cic always exists here */ | 1678 | /* cic always exists here */ |
1674 | cfqq = cic_to_cfqq(cic, is_sync); | 1679 | cfqq = cic_to_cfqq(cic, is_sync); |
1675 | 1680 | ||
1676 | if (!cfqq) { | 1681 | /* |
1682 | * Always try a new alloc if we fell back to the OOM cfqq | ||
1683 | * originally, since it should just be a temporary situation. | ||
1684 | */ | ||
1685 | if (!cfqq || cfqq == &cfqd->oom_cfqq) { | ||
1686 | cfqq = NULL; | ||
1677 | if (new_cfqq) { | 1687 | if (new_cfqq) { |
1678 | cfqq = new_cfqq; | 1688 | cfqq = new_cfqq; |
1679 | new_cfqq = NULL; | 1689 | new_cfqq = NULL; |
1680 | } else if (gfp_mask & __GFP_WAIT) { | 1690 | } else if (gfp_mask & __GFP_WAIT) { |
1681 | /* | ||
1682 | * Inform the allocator of the fact that we will | ||
1683 | * just repeat this allocation if it fails, to allow | ||
1684 | * the allocator to do whatever it needs to attempt to | ||
1685 | * free memory. | ||
1686 | */ | ||
1687 | spin_unlock_irq(cfqd->queue->queue_lock); | 1691 | spin_unlock_irq(cfqd->queue->queue_lock); |
1688 | new_cfqq = kmem_cache_alloc_node(cfq_pool, | 1692 | new_cfqq = kmem_cache_alloc_node(cfq_pool, |
1689 | gfp_mask | __GFP_NOFAIL | __GFP_ZERO, | 1693 | gfp_mask | __GFP_ZERO, |
1690 | cfqd->queue->node); | 1694 | cfqd->queue->node); |
1691 | spin_lock_irq(cfqd->queue->queue_lock); | 1695 | spin_lock_irq(cfqd->queue->queue_lock); |
1692 | goto retry; | 1696 | if (new_cfqq) |
1697 | goto retry; | ||
1693 | } else { | 1698 | } else { |
1694 | cfqq = kmem_cache_alloc_node(cfq_pool, | 1699 | cfqq = kmem_cache_alloc_node(cfq_pool, |
1695 | gfp_mask | __GFP_ZERO, | 1700 | gfp_mask | __GFP_ZERO, |
1696 | cfqd->queue->node); | 1701 | cfqd->queue->node); |
1697 | if (!cfqq) | ||
1698 | goto out; | ||
1699 | } | 1702 | } |
1700 | 1703 | ||
1701 | cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); | 1704 | if (cfqq) { |
1702 | cfq_init_prio_data(cfqq, ioc); | 1705 | cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); |
1703 | cfq_log_cfqq(cfqd, cfqq, "alloced"); | 1706 | cfq_init_prio_data(cfqq, ioc); |
1707 | cfq_log_cfqq(cfqd, cfqq, "alloced"); | ||
1708 | } else | ||
1709 | cfqq = &cfqd->oom_cfqq; | ||
1704 | } | 1710 | } |
1705 | 1711 | ||
1706 | if (new_cfqq) | 1712 | if (new_cfqq) |
1707 | kmem_cache_free(cfq_pool, new_cfqq); | 1713 | kmem_cache_free(cfq_pool, new_cfqq); |
1708 | 1714 | ||
1709 | out: | ||
1710 | WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); | ||
1711 | return cfqq; | 1715 | return cfqq; |
1712 | } | 1716 | } |
1713 | 1717 | ||
@@ -1740,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, | |||
1740 | cfqq = *async_cfqq; | 1744 | cfqq = *async_cfqq; |
1741 | } | 1745 | } |
1742 | 1746 | ||
1743 | if (!cfqq) { | 1747 | if (!cfqq) |
1744 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); | 1748 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); |
1745 | if (!cfqq) | ||
1746 | return NULL; | ||
1747 | } | ||
1748 | 1749 | ||
1749 | /* | 1750 | /* |
1750 | * pin the queue now that it's allocated, scheduler exit will prune it | 1751 | * pin the queue now that it's allocated, scheduler exit will prune it |
@@ -2470,6 +2471,14 @@ static void *cfq_init_queue(struct request_queue *q) | |||
2470 | for (i = 0; i < CFQ_PRIO_LISTS; i++) | 2471 | for (i = 0; i < CFQ_PRIO_LISTS; i++) |
2471 | cfqd->prio_trees[i] = RB_ROOT; | 2472 | cfqd->prio_trees[i] = RB_ROOT; |
2472 | 2473 | ||
2474 | /* | ||
2475 | * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues. | ||
2476 | * Grab a permanent reference to it, so that the normal code flow | ||
2477 | * will not attempt to free it. | ||
2478 | */ | ||
2479 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); | ||
2480 | atomic_inc(&cfqd->oom_cfqq.ref); | ||
2481 | |||
2473 | INIT_LIST_HEAD(&cfqd->cic_list); | 2482 | INIT_LIST_HEAD(&cfqd->cic_list); |
2474 | 2483 | ||
2475 | cfqd->queue = q; | 2484 | cfqd->queue = q; |