aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2009-06-30 03:34:12 -0400
committerJens Axboe <jens.axboe@oracle.com>2009-07-01 04:56:25 -0400
commit6118b70b3a0b4c583439bb77600194c82f220ce3 (patch)
tree6966f1b059f1bdadc0d679067796d9b4bf9a22c3
parentd5036d770f871bd34c9cfd955e6dee692e1e8e81 (diff)
cfq-iosched: get rid of the need for __GFP_NOFAIL in cfq_find_alloc_queue()
Setup an emergency fallback cfqq that we allocate at IO scheduler init time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue() never fails without having to ensure free memory. On cfqq lookup, always try to allocate a new cfqq if the given cfq io context has the oom_cfqq assigned. This ensures that we only temporarily punt to this shared queue. Reviewed-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--block/cfq-iosched.c137
1 files changed, 73 insertions, 64 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c760ae7019dd..1d9160ffa26d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -71,6 +71,51 @@ struct cfq_rb_root {
71#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } 71#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, }
72 72
73/* 73/*
74 * Per process-grouping structure
75 */
76struct cfq_queue {
77 /* reference count */
78 atomic_t ref;
79 /* various state flags, see below */
80 unsigned int flags;
81 /* parent cfq_data */
82 struct cfq_data *cfqd;
83 /* service_tree member */
84 struct rb_node rb_node;
85 /* service_tree key */
86 unsigned long rb_key;
87 /* prio tree member */
88 struct rb_node p_node;
89 /* prio tree root we belong to, if any */
90 struct rb_root *p_root;
91 /* sorted list of pending requests */
92 struct rb_root sort_list;
93 /* if fifo isn't expired, next request to serve */
94 struct request *next_rq;
95 /* requests queued in sort_list */
96 int queued[2];
97 /* currently allocated requests */
98 int allocated[2];
99 /* fifo list of requests in sort_list */
100 struct list_head fifo;
101
102 unsigned long slice_end;
103 long slice_resid;
104 unsigned int slice_dispatch;
105
106 /* pending metadata requests */
107 int meta_pending;
108 /* number of requests that are on the dispatch list or inside driver */
109 int dispatched;
110
111 /* io prio of this group */
112 unsigned short ioprio, org_ioprio;
113 unsigned short ioprio_class, org_ioprio_class;
114
115 pid_t pid;
116};
117
118/*
74 * Per block device queue structure 119 * Per block device queue structure
75 */ 120 */
76struct cfq_data { 121struct cfq_data {
@@ -135,51 +180,11 @@ struct cfq_data {
135 unsigned int cfq_slice_idle; 180 unsigned int cfq_slice_idle;
136 181
137 struct list_head cic_list; 182 struct list_head cic_list;
138};
139 183
140/* 184 /*
141 * Per process-grouping structure 185 * Fallback dummy cfqq for extreme OOM conditions
142 */ 186 */
143struct cfq_queue { 187 struct cfq_queue oom_cfqq;
144 /* reference count */
145 atomic_t ref;
146 /* various state flags, see below */
147 unsigned int flags;
148 /* parent cfq_data */
149 struct cfq_data *cfqd;
150 /* service_tree member */
151 struct rb_node rb_node;
152 /* service_tree key */
153 unsigned long rb_key;
154 /* prio tree member */
155 struct rb_node p_node;
156 /* prio tree root we belong to, if any */
157 struct rb_root *p_root;
158 /* sorted list of pending requests */
159 struct rb_root sort_list;
160 /* if fifo isn't expired, next request to serve */
161 struct request *next_rq;
162 /* requests queued in sort_list */
163 int queued[2];
164 /* currently allocated requests */
165 int allocated[2];
166 /* fifo list of requests in sort_list */
167 struct list_head fifo;
168
169 unsigned long slice_end;
170 long slice_resid;
171 unsigned int slice_dispatch;
172
173 /* pending metadata requests */
174 int meta_pending;
175 /* number of requests that are on the dispatch list or inside driver */
176 int dispatched;
177
178 /* io prio of this group */
179 unsigned short ioprio, org_ioprio;
180 unsigned short ioprio_class, org_ioprio_class;
181
182 pid_t pid;
183}; 188};
184 189
185enum cfqq_state_flags { 190enum cfqq_state_flags {
@@ -1673,41 +1678,40 @@ retry:
1673 /* cic always exists here */ 1678 /* cic always exists here */
1674 cfqq = cic_to_cfqq(cic, is_sync); 1679 cfqq = cic_to_cfqq(cic, is_sync);
1675 1680
1676 if (!cfqq) { 1681 /*
1682 * Always try a new alloc if we fell back to the OOM cfqq
1683 * originally, since it should just be a temporary situation.
1684 */
1685 if (!cfqq || cfqq == &cfqd->oom_cfqq) {
1686 cfqq = NULL;
1677 if (new_cfqq) { 1687 if (new_cfqq) {
1678 cfqq = new_cfqq; 1688 cfqq = new_cfqq;
1679 new_cfqq = NULL; 1689 new_cfqq = NULL;
1680 } else if (gfp_mask & __GFP_WAIT) { 1690 } else if (gfp_mask & __GFP_WAIT) {
1681 /*
1682 * Inform the allocator of the fact that we will
1683 * just repeat this allocation if it fails, to allow
1684 * the allocator to do whatever it needs to attempt to
1685 * free memory.
1686 */
1687 spin_unlock_irq(cfqd->queue->queue_lock); 1691 spin_unlock_irq(cfqd->queue->queue_lock);
1688 new_cfqq = kmem_cache_alloc_node(cfq_pool, 1692 new_cfqq = kmem_cache_alloc_node(cfq_pool,
1689 gfp_mask | __GFP_NOFAIL | __GFP_ZERO, 1693 gfp_mask | __GFP_ZERO,
1690 cfqd->queue->node); 1694 cfqd->queue->node);
1691 spin_lock_irq(cfqd->queue->queue_lock); 1695 spin_lock_irq(cfqd->queue->queue_lock);
1692 goto retry; 1696 if (new_cfqq)
1697 goto retry;
1693 } else { 1698 } else {
1694 cfqq = kmem_cache_alloc_node(cfq_pool, 1699 cfqq = kmem_cache_alloc_node(cfq_pool,
1695 gfp_mask | __GFP_ZERO, 1700 gfp_mask | __GFP_ZERO,
1696 cfqd->queue->node); 1701 cfqd->queue->node);
1697 if (!cfqq)
1698 goto out;
1699 } 1702 }
1700 1703
1701 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); 1704 if (cfqq) {
1702 cfq_init_prio_data(cfqq, ioc); 1705 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
1703 cfq_log_cfqq(cfqd, cfqq, "alloced"); 1706 cfq_init_prio_data(cfqq, ioc);
1707 cfq_log_cfqq(cfqd, cfqq, "alloced");
1708 } else
1709 cfqq = &cfqd->oom_cfqq;
1704 } 1710 }
1705 1711
1706 if (new_cfqq) 1712 if (new_cfqq)
1707 kmem_cache_free(cfq_pool, new_cfqq); 1713 kmem_cache_free(cfq_pool, new_cfqq);
1708 1714
1709out:
1710 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
1711 return cfqq; 1715 return cfqq;
1712} 1716}
1713 1717
@@ -1740,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
1740 cfqq = *async_cfqq; 1744 cfqq = *async_cfqq;
1741 } 1745 }
1742 1746
1743 if (!cfqq) { 1747 if (!cfqq)
1744 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); 1748 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
1745 if (!cfqq)
1746 return NULL;
1747 }
1748 1749
1749 /* 1750 /*
1750 * pin the queue now that it's allocated, scheduler exit will prune it 1751 * pin the queue now that it's allocated, scheduler exit will prune it
@@ -2470,6 +2471,14 @@ static void *cfq_init_queue(struct request_queue *q)
2470 for (i = 0; i < CFQ_PRIO_LISTS; i++) 2471 for (i = 0; i < CFQ_PRIO_LISTS; i++)
2471 cfqd->prio_trees[i] = RB_ROOT; 2472 cfqd->prio_trees[i] = RB_ROOT;
2472 2473
2474 /*
2475 * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
2476 * Grab a permanent reference to it, so that the normal code flow
2477 * will not attempt to free it.
2478 */
2479 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
2480 atomic_inc(&cfqd->oom_cfqq.ref);
2481
2473 INIT_LIST_HEAD(&cfqd->cic_list); 2482 INIT_LIST_HEAD(&cfqd->cic_list);
2474 2483
2475 cfqd->queue = q; 2484 cfqd->queue = q;