aboutsummaryrefslogtreecommitdiffstats
path: root/fs/direct-io.c
Commit message (Expand)AuthorAge
* fs/direct-io.c: fix truncation error in dio_complete() returnEdward Shishkin2010-10-26
* O_DIRECT: fix the splitting up of contiguous I/OJeff Moyer2010-09-09
* sort out blockdev_direct_IO variantsChristoph Hellwig2010-08-09
* direct-io: move aio_complete into ->end_ioChristoph Hellwig2010-07-26
* fs: introduce new truncate sequencenpiggin@suse.de2010-05-27
* direct-io: do not merge logically non-contiguous requestsJosef Bacik2010-05-25
* direct-io: add a hook for the fs to provide its own submit_bio functionJosef Bacik2010-05-25
* dio: fix use-after-freeAl Viro2009-12-17
* direct-io: cleanup blockdev_direct_IO lockingChristoph Hellwig2009-12-16
* dio: don't zero out the pages array inside struct dioJeff Moyer2009-12-16
* Fix regression in direct writes performance due to WRITE_ODIRECT flag removalVivek Goyal2009-11-26
* aio: implement request batchingJeff Moyer2009-10-28
* block: get rid of the WRITE_ODIRECT flagJeff Moyer2009-10-28
* block: Do away with the notion of hardsect_sizeMartin K. Petersen2009-05-22
* dio: Remove code handling bio_alloc failure with __GFP_WAITNikanth Karthikesan2009-04-15
* block: Add flag for telling the IO schedulers NOT to anticipate more IOJens Axboe2009-04-06
* fs: truncate blocks outside i_size after O_DIRECT write errorDmitri Monakhov2009-01-06
* Remove Andrew Morton's old email accountsFrancois Cami2008-10-16
* dio: use get_user_pages_fastNick Piggin2008-07-26
* Pagecache zeroing: zero_user_segment, zero_user_segments and zero_userChristoph Lameter2008-02-05
* remove ZERO_PAGENick Piggin2007-10-16
* Drop 'size' argument from bio_endio and bi_end_ioNeilBrown2007-10-10
* dio: zero struct dio with kzalloc instead of manuallyZach Brown2007-08-21
* direct-io: fix error-path crashesBadari Pulavarty2007-08-11
* dio: remove bogus refcounting BUG_ONZach Brown2007-07-03
* Merge git://git.kernel.org/pub/scm/linux/kernel/git/bunk/trivialLinus Torvalds2007-05-09
|\
| * Fix misspellings collected by members of KJ list.Robert P. J. Day2007-05-09
* | fs: convert core functions to zero_user_pageNate Diller2007-05-09
|/
* [PATCH] dio: lock refcount operationsZach Brown2006-12-10
* [PATCH] dio: only call aio_complete() after returning -EIOCBQUEUEDZach Brown2006-12-10
* [PATCH] dio: remove duplicate bio wait codeZach Brown2006-12-10
* [PATCH] dio: formalize bio counters as a dio reference countZach Brown2006-12-10
* [PATCH] dio: call blk_run_address_space() once per opZach Brown2006-12-10
* [PATCH] dio: centralize completion in dio_complete()Zach Brown2006-12-10
* [PATCH] io-accounting: direct-ioAndrew Morton2006-12-10
* [PATCH] lockdep: annotate direct ioIngo Molnar2006-07-03
* [PATCH] Kill PF_SYNCWRITE flagJens Axboe2006-06-23
* BUG_ON() Conversion in fs/direct-io.cEric Sesterhenn2006-03-31
* Fixes a regression from the recent "remove ->get_blocks() support"Nathan Scott2006-03-28
* [PATCH] remove ->get_blocks() supportBadari Pulavarty2006-03-26
* [PATCH] direct-io: bug fix in dio handling write errorChen, Kenneth W2006-03-25
* Fix a direct I/O locking issue revealed by the new mutex code.Nathan Scott2006-03-14
* [PATCH] fix O_DIRECT read of last block in a sparse fileJeff Moyer2006-02-03
* [PATCH] mutex subsystem, semaphore to mutex: VFS, ->i_semJes Sorensen2006-01-09
* [PATCH] core remove PageReservedNick Piggin2005-10-30
* [PATCH] pass iocb to dio_iodone_tChristoph Hellwig2005-06-24
* [PATCH] Direct IO async short read fixDaniel McNeil2005-04-16
* Linux-2.6.12-rc2Linus Torvalds2005-04-16
8:20:36 -0400 committer Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400 Linux-2.6.12-rc2' href='/cgit/cgit.cgi/litmus-rt.git/commit/mm/pdflush.c?h=wip-shared-lib2&id=1da177e4c3f41524e886b7f1b8a0c1fc7321cac2'>1da177e4c3f4
174596a0b9f2









1da177e4c3f4





28a42b9ea7e4







174596a0b9f2


28a42b9ea7e4
1da177e4c3f4












125e18745f16
1da177e4c3f4


1da177e4c3f4










1da177e4c3f4
5aecd559871d

1da177e4c3f4

















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251




                                                                 
                             












                                                                       

                                                                    
                          
                         
                          




































































                                                                         
                                                    
                        












                                                         
                           
                                

                                                  




                                                                               



                                           
                                                          









                                                                     
                                                                    



















                                                                               
                                                                              



















                                                                            









                                                                              





                                                                             







                                                                             


                                                    
 












                                                                             
                                                                       


                                                










                                                                               
         

                                                     

















                                                 
/*
 * mm/pdflush.c - worker threads for writing back filesystem data
 *
 * Copyright (C) 2002, Linus Torvalds.
 *
 * 09Apr2002	Andrew Morton
 *		Initial version
 * 29Feb2004	kaos@sgi.com
 *		Move worker thread creation to kthread to avoid chewing
 *		up stack space with nested calls to kernel_thread.
 */

#include <linux/sched.h>
#include <linux/list.h>
#include <linux/signal.h>
#include <linux/spinlock.h>
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>		/* Needed by writeback.h	  */
#include <linux/writeback.h>	/* Prototypes pdflush_operation() */
#include <linux/kthread.h>
#include <linux/cpuset.h>
#include <linux/freezer.h>


/*
 * Minimum and maximum number of pdflush instances
 */
#define MIN_PDFLUSH_THREADS	2
#define MAX_PDFLUSH_THREADS	8

static void start_one_pdflush_thread(void);


/*
 * The pdflush threads are worker threads for writing back dirty data.
 * Ideally, we'd like one thread per active disk spindle.  But the disk
 * topology is very hard to divine at this level.   Instead, we take
 * care in various places to prevent more than one pdflush thread from
 * performing writeback against a single filesystem.  pdflush threads
 * have the PF_FLUSHER flag set in current->flags to aid in this.
 */

/*
 * All the pdflush threads.  Protected by pdflush_lock
 */
static LIST_HEAD(pdflush_list);
static DEFINE_SPINLOCK(pdflush_lock);

/*
 * The count of currently-running pdflush threads.  Protected
 * by pdflush_lock.
 *
 * Readable by sysctl, but not writable.  Published to userspace at
 * /proc/sys/vm/nr_pdflush_threads.
 */
int nr_pdflush_threads = 0;

/*
 * The time at which the pdflush thread pool last went empty
 */
static unsigned long last_empty_jifs;

/*
 * The pdflush thread.
 *
 * Thread pool management algorithm:
 * 
 * - The minimum and maximum number of pdflush instances are bound
 *   by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS.
 * 
 * - If there have been no idle pdflush instances for 1 second, create
 *   a new one.
 * 
 * - If the least-recently-went-to-sleep pdflush thread has been asleep
 *   for more than one second, terminate a thread.
 */

/*
 * A structure for passing work to a pdflush thread.  Also for passing
 * state information between pdflush threads.  Protected by pdflush_lock.
 */
struct pdflush_work {
	struct task_struct *who;	/* The thread */
	void (*fn)(unsigned long);	/* A callback function */
	unsigned long arg0;		/* An argument to the callback */
	struct list_head list;		/* On pdflush_list, when idle */
	unsigned long when_i_went_to_sleep;
};

static int __pdflush(struct pdflush_work *my_work)
{
	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
	set_freezable();
	my_work->fn = NULL;
	my_work->who = current;
	INIT_LIST_HEAD(&my_work->list);

	spin_lock_irq(&pdflush_lock);
	nr_pdflush_threads++;
	for ( ; ; ) {
		struct pdflush_work *pdf;

		set_current_state(TASK_INTERRUPTIBLE);
		list_move(&my_work->list, &pdflush_list);
		my_work->when_i_went_to_sleep = jiffies;
		spin_unlock_irq(&pdflush_lock);
		schedule();
		try_to_freeze();
		spin_lock_irq(&pdflush_lock);
		if (!list_empty(&my_work->list)) {
			/*
			 * Someone woke us up, but without removing our control
			 * structure from the global list.  swsusp will do this
			 * in try_to_freeze()->refrigerator().  Handle it.
			 */
			my_work->fn = NULL;
			continue;
		}
		if (my_work->fn == NULL) {
			printk("pdflush: bogus wakeup\n");
			continue;
		}
		spin_unlock_irq(&pdflush_lock);

		(*my_work->fn)(my_work->arg0);

		/*
		 * Thread creation: For how long have there been zero
		 * available threads?
		 */
		if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
			/* unlocked list_empty() test is OK here */
			if (list_empty(&pdflush_list)) {
				/* unlocked test is OK here */
				if (nr_pdflush_threads < MAX_PDFLUSH_THREADS)
					start_one_pdflush_thread();
			}
		}

		spin_lock_irq(&pdflush_lock);
		my_work->fn = NULL;

		/*
		 * Thread destruction: For how long has the sleepiest
		 * thread slept?
		 */
		if (list_empty(&pdflush_list))
			continue;
		if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
			continue;
		pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
		if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
			/* Limit exit rate */
			pdf->when_i_went_to_sleep = jiffies;
			break;					/* exeunt */
		}
	}
	nr_pdflush_threads--;
	spin_unlock_irq(&pdflush_lock);
	return 0;
}

/*
 * Of course, my_work wants to be just a local in __pdflush().  It is
 * separated out in this manner to hopefully prevent the compiler from
 * performing unfortunate optimisations against the auto variables.  Because
 * these are visible to other tasks and CPUs.  (No problem has actually
 * been observed.  This is just paranoia).
 */
static int pdflush(void *dummy)
{
	struct pdflush_work my_work;
	cpumask_var_t cpus_allowed;

	/*
	 * Since the caller doesn't even check kthread_run() worked, let's not
	 * freak out too much if this fails.
	 */
	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
		printk(KERN_WARNING "pdflush failed to allocate cpumask\n");
		return 0;
	}

	/*
	 * pdflush can spend a lot of time doing encryption via dm-crypt.  We
	 * don't want to do that at keventd's priority.
	 */
	set_user_nice(current, 0);

	/*
	 * Some configs put our parent kthread in a limited cpuset,
	 * which kthread() overrides, forcing cpus_allowed == CPU_MASK_ALL.
	 * Our needs are more modest - cut back to our cpusets cpus_allowed.
	 * This is needed as pdflush's are dynamically created and destroyed.
	 * The boottime pdflush's are easily placed w/o these 2 lines.
	 */
	cpuset_cpus_allowed(current, cpus_allowed);
	set_cpus_allowed_ptr(current, cpus_allowed);
	free_cpumask_var(cpus_allowed);

	return __pdflush(&my_work);
}

/*
 * Attempt to wake up a pdflush thread, and get it to do some work for you.
 * Returns zero if it indeed managed to find a worker thread, and passed your
 * payload to it.
 */
int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
{
	unsigned long flags;
	int ret = 0;

	BUG_ON(fn == NULL);	/* Hard to diagnose if it's deferred */

	spin_lock_irqsave(&pdflush_lock, flags);
	if (list_empty(&pdflush_list)) {
		ret = -1;
	} else {
		struct pdflush_work *pdf;

		pdf = list_entry(pdflush_list.next, struct pdflush_work, list);
		list_del_init(&pdf->list);
		if (list_empty(&pdflush_list))
			last_empty_jifs = jiffies;
		pdf->fn = fn;
		pdf->arg0 = arg0;
		wake_up_process(pdf->who);
	}
	spin_unlock_irqrestore(&pdflush_lock, flags);

	return ret;
}

static void start_one_pdflush_thread(void)
{
	kthread_run(pdflush, NULL, "pdflush");
}

static int __init pdflush_init(void)
{
	int i;

	for (i = 0; i < MIN_PDFLUSH_THREADS; i++)
		start_one_pdflush_thread();
	return 0;
}

module_init(pdflush_init);