litmus-rt.git - The LITMUS^RT kernel.

Branch	Commit message	Author	Age
archive/unc-master-3.0	P-FP: fix BUG_ON releated to priority inheritance	Bjoern Brandenburg	13 years
archived-2013.1	uncachedev: mmap memory that is not cached by CPUs	Glenn Elliott	12 years
archived-private-master	Merge branch 'wip-2.6.34' into old-private-master	Andrea Bastoni	15 years
archived-semi-part	Merge branch 'wip-semi-part' of ssh://cvs/cvs/proj/litmus/repo/litmus2010 int...	Andrea Bastoni	15 years
demo	Further refinements	Jonathan Herman	14 years
ecrts-pgm-final	Merge branch 'wip-ecrts14-pgm' of ssh://rtsrv.cs.unc.edu/home/litmus/litmus-r...	Glenn Elliott	12 years
ecrts14-pgm-final	Merge branch 'wip-ecrts14-pgm' of ssh://rtsrv.cs.unc.edu/home/litmus/litmus-r...	Glenn Elliott	12 years
gpusync-rtss12	Final GPUSync implementation.	Glenn Elliott	12 years
gpusync/staging	Rename IKGLP R2DGLP.	Glenn Elliott	12 years
linux-tip	Merge branch 'slab/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/p...	Linus Torvalds	15 years
litmus2008-patch-series	add i386 feather-trace implementation	Bjoern B. Brandenburg	16 years
master	PSN-EDF: use inferred_sporadic_job_release_at	Bjoern Brandenburg	9 years
pgm	make it compile	Glenn Elliott	12 years
prop/litmus-signals	Infrastructure for Litmus signals.	Glenn Elliott	13 years
prop/robust-tie-break	Fixed bug in edf_higher_prio().	Glenn Elliott	13 years
staging	Fix tracepoint compilation error	Felipe Cerqueira	13 years
test	9/23/2016	Namhoon Kim	9 years
tracing-devel	Test kernel tracing events capabilities	Andrea Bastoni	16 years
v2.6.34-with-arm-patches	smsc911x: Add spinlocks around registers access	Catalin Marinas	15 years
v2015.1	Add ARM syscall def for get_current_budget	Bjoern Brandenburg	10 years
wip-2011.2-bbb	Litmus core: simplify np-section protocol	Bjoern B. Brandenburg	14 years
wip-2011.2-bbb-trace	Refactor sched_trace_log_message() -> debug_trace_log_message()	Andrea Bastoni	14 years
wip-2012.3-gpu	SOBLIV draining support for C-EDF.	Glenn Elliott	12 years
wip-2012.3-gpu-preport	pick up last C-RM file	Glenn Elliott	12 years
wip-2012.3-gpu-rtss13	Fix critical bug in GPU tracker.	Glenn Elliott	12 years
wip-2012.3-gpu-sobliv-budget-w-kshark	Proper sobliv draining and many bug fixes.	Glenn Elliott	12 years
wip-aedzl-final	Make it easier to compile AEDZL interfaces in liblitmus.	Glenn Elliott	15 years
wip-aedzl-revised	Add sched_trace data for Apative EDZL	Glenn Elliott	15 years
wip-arbit-deadline	Fix compilation bug.	Glenn Elliott	13 years
wip-aux-tasks	Description of refined aux task inheritance.	Glenn Elliott	13 years
wip-bbb	GSN-EDF & Core: improve debug TRACE'ing for NP sections	Bjoern B. Brandenburg	14 years
wip-bbb-prio-don	use correct timestamp	Bjoern B. Brandenburg	14 years
wip-better-break	Implement hash-based EDF tie-breaking.	Glenn Elliott	13 years
wip-binary-heap	Make C-EDF work with simplified binheap_delete	Glenn Elliott	13 years
wip-budget	Added support for choices in budget policy enforcement.	Glenn Elliott	15 years
wip-color	Summarize schedulability with final record	Jonathan Herman	13 years
wip-color-jlh	sched_color: Fixed two bugs causing crashing on experiment restart and a rare...	Jonathan Herman	13 years
wip-d10-hz1000	Enable HZ=1000 on District 10	Bjoern B. Brandenburg	15 years
wip-default-clustering	Feature: Make default C-EDF clustering compile-time configurable.	Glenn Elliott	15 years
wip-dissipation-jerickso	Update from 2.6.36 to 2.6.36.4	Jeremy Erickson	11 years
wip-dissipation2-jerickso	Update 2.6.36 to 2.6.36.4	Jeremy Erickson	11 years
wip-ecrts14-pgm	Merge branch 'wip-ecrts14-pgm' of ssh://rtsrv.cs.unc.edu/home/litmus/litmus-r...	Glenn Elliott	12 years
wip-edf-hsb	last tested version	Jonathan Herman	14 years
wip-edf-os	Lookup table EDF-os	Jeremy Erickson	12 years
wip-edf-tie-break	Merge branch 'wip-edf-tie-break' of ssh://rtsrv.cs.unc.edu/home/litmus/litmus...	Glenn Elliott	13 years
wip-edzl-critique	Use hr_timer's active checks instead of having own flag.	Glenn Elliott	15 years
wip-edzl-final	Implementation of the EDZL scheduler.	Glenn Elliott	15 years
wip-edzl-revised	Clean up comments.	Glenn Elliott	15 years
wip-events	Added support for tracing arbitrary actions.	Jonathan Herman	15 years
wip-extra-debug	DBG: add additional tracing	Bjoern B. Brandenburg	15 years
wip-fix-switch-jerickso	Attempt to fix race condition with plugin switching	Jeremy Erickson	15 years
wip-fix3	sched: show length of runqueue clock deactivation in /proc/sched_debug	Bjoern B. Brandenburg	15 years
wip-fmlp-dequeue	Improve FMLP queue management.	Glenn Elliott	14 years
wip-ft-irq-flag	Feather-Trace: keep track of interrupt-related interference.	Bjoern B. Brandenburg	14 years
wip-gpu-cleanup	Enable sched_trace log injection from userspace	Glenn Elliott	13 years
wip-gpu-interrupts	Remove option for threading of all softirqs.	Glenn Elliott	14 years
wip-gpu-rtas12	Generalized GPU cost predictors + EWMA. (untested)	Glenn Elliott	13 years
wip-gpu-rtss12	Final GPUSync implementation.	Glenn Elliott	13 years
wip-gpu-rtss12-srp	experimental changes to support GPUs under SRP	Glenn Elliott	13 years
wip-gpusync-merge	Cleanup priority tracking for budget enforcement.	Glenn Elliott	12 years
wip-ikglp	Move RSM and IKGLP imp. to own .c files	Glenn Elliott	13 years
wip-k-fmlp	Merge branch 'mpi-master' into wip-k-fmlp	Glenn Elliott	14 years
wip-kernel-coloring	Added recolor syscall	Namhoon Kim	7 years
wip-kernthreads	Kludge work-queue processing into klitirqd.	Glenn Elliott	15 years
wip-klmirqd-to-aux	Allow klmirqd threads to be given names.	Glenn Elliott	13 years
wip-kshark	Merge branch 'mpi-staging' into wip-kshark	Jonathan Herman	13 years
wip-litmus-3.2	Merge commit 'v3.2' into litmus-staging	Andrea Bastoni	13 years
wip-litmus2011.2	Cleanup: Coding conformance for affinity stuff.	Glenn Elliott	14 years
wip-litmus3.0-2011.2	Feather-Trace: keep track of interrupt-related interference.	Bjoern B. Brandenburg	14 years
wip-master-2.6.33-rt	Avoid deadlock when switching task policy to BACKGROUND (ugly)	Andrea Bastoni	15 years
wip-mc	Removed ARM-specific hacks which disabled less common mixed-criticality featu...	Jonathan Herman	12 years
wip-mc-bipasa	MC-EDF added	bipasa chattopadhyay	13 years
wip-mc-jerickso	Split C/D queues	Jeremy Erickson	15 years
wip-mc2-cache-slack	Manually patched mc^2 related code	Ming Yang	10 years
wip-mcrit-mac	cosmetic	Mac Mollison	15 years
wip-merge-3.0	Prevent Linux to send IPI and queue tasks on remote CPUs.	Andrea Bastoni	14 years
wip-merge-v3.0	Prevent Linux to send IPI and queue tasks on remote CPUs.	Andrea Bastoni	14 years
wip-migration-affinity	NULL affinity dereference in C-EDF.	Glenn Elliott	14 years
wip-mmap-uncache	share branch with others	Glenn Elliott	13 years
wip-modechange	RTSS 2017 submission	Namhoon Kim	8 years
wip-nested-locking	Appears to be working.	Bryan Ward	12 years
wip-omlp-gedf	First implementation of G-OMLP.	Glenn Elliott	15 years
wip-pai	Some cleanup of PAI	Glenn Elliott	14 years
wip-percore-lib	9/21/2016	Namhoon Kim	9 years
wip-performance	CONFIG_DONT_PREEMPT_ON_TIE: Don't preeempt a scheduled task on priority tie.	Glenn Elliott	14 years
wip-pgm	Add PGM support to C-FL	Glenn Elliott	12 years
wip-pgm-split	First draft of C-FL-split	Namhoon Kim	12 years
wip-pm-ovd	Add preemption-and-migration overhead tracing support	Andrea Bastoni	15 years
wip-prio-inh	P-EDF updated to use the generic pi framework.	Glenn Elliott	15 years
wip-prioq-dgl	BUG FIX: Support DGLs with PRIOQ_MUTEX	Glenn Elliott	13 years
wip-refactored-gedf	Generalizd architecture for GEDF-style scheduelrs to reduce code redundancy.	Glenn Elliott	15 years
wip-release-master-fix	bugfix: release master CPU must signal task was picked	Bjoern B. Brandenburg	14 years
wip-robust-tie-break	EDF priority tie-breaks.	Glenn Elliott	13 years
wip-rt-kshark	Move task time accounting into the complete_job method.	Jonathan Herman	13 years
wip-rtas12-pgm	Scheduling of PGM jobs.	Glenn Elliott	13 years
wip-semi-part	Fix compile error with newer GCC	Jeremy Erickson	12 years
wip-semi-part-edfos-jerickso	Use initial CPU set by client	Jeremy Erickson	12 years
wip-shared-lib	TODO: Fix condition checks in replicate_page_move_mapping()	Namhoon Kim	9 years
wip-shared-lib2	RTAS 2017 Submission ver.	Namhoon Kim	9 years
wip-shared-mem	Initial commit for shared library	Namhoon Kim	9 years
wip-splitting-jerickso	Fix release behavior	Jeremy Erickson	13 years
wip-splitting-omlp-jerickso	Bjoern's Dissertation Code with Priority Donation	Jeremy Erickson	13 years
wip-stage-binheap	An efficient binary heap implementation.	Glenn Elliott	13 years
wip-sun-port	Dynamic memory allocation and clean exit for FeatherTrace	Christopher Kenna	15 years
wip-timer-trace	bugfix: C-EDF, clear scheduled field of the correct CPU upon task_exit	Andrea Bastoni	15 years
wip-tracepoints	Add kernel-style events for sched_trace_XXX() functions	Andrea Bastoni	14 years

Tag	Download	Author	Age
2015.1	commit 8e51b37822...	Bjoern Brandenburg	10 years
2013.1	commit bcaacec1ca...	Glenn Elliott	12 years
2012.3	commit c158b5fbe4...	Jonathan Herman	13 years
2012.2	commit b53c479a0f...	Glenn Elliott	13 years
2012.1	commit 83b11ea1c6...	Bjoern B. Brandenburg	14 years
rtas12-mc-beta-exp	commit 8e236ee20f...	Christopher Kenna	14 years
2011.1	commit d11808b5c6...	Christopher Kenna	15 years
v2.6.37-rc4	commit e8a7e48bb2...	Linus Torvalds	15 years
v2.6.37-rc3	commit 3561d43fd2...	Linus Torvalds	15 years
v2.6.37-rc2	commit e53beacd23...	Linus Torvalds	15 years
v2.6.37-rc1	commit c8ddb2713c...	Linus Torvalds	15 years
v2.6.36	commit f6f94e2ab1...	Linus Torvalds	15 years
2010.2	commit 5c5456402d...	Bjoern B. Brandenburg	15 years
v2.6.36-rc8	commit cd07202cc8...	Linus Torvalds	15 years
v2.6.36-rc7	commit cb655d0f3d...	Linus Torvalds	15 years
v2.6.36-rc6	commit 899611ee7d...	Linus Torvalds	15 years
v2.6.36-rc5	commit b30a3f6257...	Linus Torvalds	15 years
v2.6.36-rc4	commit 49553c2ef8...	Linus Torvalds	15 years
v2.6.36-rc3	commit 2bfc96a127...	Linus Torvalds	15 years
v2.6.36-rc2	commit 76be97c1fc...	Linus Torvalds	15 years
v2.6.36-rc1	commit da5cabf80e...	Linus Torvalds	15 years
v2.6.35	commit 9fe6206f40...	Linus Torvalds	15 years
v2.6.35-rc6	commit b37fa16e78...	Linus Torvalds	15 years
v2.6.35-rc5	commit 1c5474a65b...	Linus Torvalds	15 years
v2.6.35-rc4	commit 815c4163b6...	Linus Torvalds	15 years
v2.6.35-rc3	commit 7e27d6e778...	Linus Torvalds	15 years
v2.6.35-rc2	commit e44a21b726...	Linus Torvalds	15 years
v2.6.35-rc1	commit 67a3e12b05...	Linus Torvalds	15 years
2010.1	commit 7c1ff4c544...	Andrea Bastoni	15 years
v2.6.34	commit e40152ee1e...	Linus Torvalds	15 years
v2.6.33.4	commit 4640b4e7d9...	Greg Kroah-Hartman	15 years
v2.6.34-rc7	commit b57f95a382...	Linus Torvalds	15 years
v2.6.34-rc6	commit 66f41d4c5c...	Linus Torvalds	15 years
v2.6.33.3	commit 3e7ad8ed97...	Greg Kroah-Hartman	15 years
v2.6.34-rc5	commit 01bf0b6457...	Linus Torvalds	15 years
v2.6.34-rc4	commit 0d0fb0f9c5...	Linus Torvalds	15 years
v2.6.33.2	commit 19f00f070c...	Greg Kroah-Hartman	15 years
v2.6.34-rc3	commit 2eaa9cfdf3...	Linus Torvalds	15 years
v2.6.34-rc2	commit 220bf991b0...	Linus Torvalds	16 years
v2.6.33.1	commit dbdafe5ccf...	Greg Kroah-Hartman	16 years
v2.6.34-rc1	commit 57d54889cd...	Linus Torvalds	16 years
v2.6.33	commit 60b341b778...	Linus Torvalds	16 years
v2.6.33-rc8	commit 724e6d3fe8...	Linus Torvalds	16 years
v2.6.33-rc7	commit 29275254ca...	Linus Torvalds	16 years
v2.6.33-rc6	commit abe94c756c...	Linus Torvalds	16 years
v2.6.33-rc5	commit 92dcffb916...	Linus Torvalds	16 years
v2.6.33-rc4	commit 7284ce6c9f...	Linus Torvalds	16 years
v2.6.33-rc3	commit 74d2e4f8d7...	Linus Torvalds	16 years
v2.6.33-rc2	commit 6b7b284958...	Linus Torvalds	16 years
v2.6.33-rc1	commit 55639353a0...	Linus Torvalds	16 years
v2.6.32	commit 22763c5cf3...	Linus Torvalds	16 years
v2.6.32-rc8	commit 648f4e3e50...	Linus Torvalds	16 years
v2.6.32-rc7	commit 156171c71a...	Linus Torvalds	16 years
v2.6.32-rc6	commit b419148e56...	Linus Torvalds	16 years
v2.6.32-rc5	commit 012abeea66...	Linus Torvalds	16 years
v2.6.32-rc4	commit 161291396e...	Linus Torvalds	16 years
v2.6.32-rc3	commit 374576a8b6...	Linus Torvalds	16 years
v2.6.32-rc1	commit 17d857be64...	Linus Torvalds	16 years
v2.6.32-rc2	commit 17d857be64...	Linus Torvalds	16 years
v2.6.31	commit 74fca6a428...	Linus Torvalds	16 years
v2.6.31-rc9	commit e07cccf404...	Linus Torvalds	16 years
v2.6.31-rc8	commit 326ba5010a...	Linus Torvalds	16 years
v2.6.31-rc7	commit 422bef879e...	Linus Torvalds	16 years
v2.6.31-rc6	commit 64f1607ffb...	Linus Torvalds	16 years
v2.6.31-rc5	commit ed680c4ad4...	Linus Torvalds	16 years
v2.6.31-rc4	commit 4be3bd7849...	Linus Torvalds	16 years
v2.6.31-rc3	commit 6847e154e3...	Linus Torvalds	16 years
v2.6.31-rc2	commit 8e4a718ff3...	Linus Torvalds	16 years
v2.6.31-rc1	commit 28d0325ce6...	Linus Torvalds	16 years
v2.6.30	commit 07a2039b8e...	Linus Torvalds	16 years
v2.6.30-rc8	commit 9fa7eb283c...	Linus Torvalds	16 years
v2.6.30-rc7	commit 59a3759d0f...	Linus Torvalds	16 years
v2.6.30-rc6	commit 1406de8e11...	Linus Torvalds	16 years
v2.6.30-rc5	commit 091bf7624d...	Linus Torvalds	16 years
v2.6.30-rc4	commit 091438dd56...	Linus Torvalds	16 years
v2.6.30-rc3	commit 0910697403...	Linus Torvalds	16 years
v2.6.30-rc2	commit 0882e8dd3a...	Linus Torvalds	16 years
v2.6.30-rc1	commit 577c9c456f...	Linus Torvalds	16 years
v2.6.29	commit 8e0ee43bc2...	Linus Torvalds	16 years
v2.6.29-rc8	commit 041b62374c...	Linus Torvalds	17 years
v2.6.29-rc7	commit fec6c6fec3...	Linus Torvalds	17 years
v2.6.29-rc6	commit 20f4d6c3a2...	Linus Torvalds	17 years
v2.6.29-rc5	commit d2f8d7ee1a...	Linus Torvalds	17 years
v2.6.29-rc4	commit 8e4921515c...	Linus Torvalds	17 years
v2.6.29-rc3	commit 18e352e4a7...	Linus Torvalds	17 years
v2.6.29-rc2	commit 1de9e8e70f...	Linus Torvalds	17 years
v2.6.29-rc1	commit c59765042f...	Linus Torvalds	17 years
v2.6.28	commit 4a6908a3a0...	Linus Torvalds	17 years
v2.6.28-rc9	commit 929096fe9f...	Linus Torvalds	17 years
v2.6.28-rc8	commit 8b1fae4e42...	Linus Torvalds	17 years
v2.6.28-rc7	commit 061e41fdb5...	Linus Torvalds	17 years
v2.6.28-rc6	commit 13d428afc0...	Linus Torvalds	17 years
v2.6.28-rc5	commit 9bf1a2445f...	Linus Torvalds	17 years
v2.6.28-rc4	commit f7160c7573...	Linus Torvalds	17 years
v2.6.28-rc3	commit 45beca08dd...	Linus Torvalds	17 years
v2.6.28-rc2	commit 0173a3265b...	Linus Torvalds	17 years
v2.6.28-rc1	commit 57f8f7b60d...	Linus Torvalds	17 years
v2.6.27	commit 3fa8749e58...	Linus Torvalds	17 years
v2.6.27-rc9	commit 4330ed8ed4...	Linus Torvalds	17 years
v2.6.27-rc8	commit 94aca1dac6...	Linus Torvalds	17 years
v2.6.27-rc7	commit 72d31053f6...	Linus Torvalds	17 years
v2.6.27-rc6	commit adee14b2e1...	Linus Torvalds	17 years
v2.6.27-rc5	commit 24342c34a0...	Linus Torvalds	17 years
v2.6.27-rc4	commit 6a55617ed5...	Linus Torvalds	17 years
v2.6.27-rc3	commit 30a2f3c60a...	Linus Torvalds	17 years
v2.6.27-rc2	commit 0967d61ea0...	Linus Torvalds	17 years
v2.6.27-rc1	commit 6e86841d05...	Linus Torvalds	17 years
v2.6.26	commit bce7f793da...	Linus Torvalds	17 years
v2.6.26-rc9	commit b7279469d6...	Linus Torvalds	17 years
v2.6.26-rc8	commit 543cf4cb3f...	Linus Torvalds	17 years
v2.6.26-rc7	commit d70ac829b7...	Linus Torvalds	17 years
v2.6.26-rc6	commit 5dd34572ad...	Linus Torvalds	17 years
v2.6.26-rc5	commit 53c8ba9540...	Linus Torvalds	17 years
v2.6.26-rc4	commit e490517a03...	Linus Torvalds	17 years
v2.6.26-rc3	commit b8291ad07a...	Linus Torvalds	17 years
v2.6.26-rc2	commit 492c2e476e...	Linus Torvalds	17 years
v2.6.26-rc1	commit 2ddcca36c8...	Linus Torvalds	17 years
v2.6.25	commit 4b119e21d0...	Linus Torvalds	17 years
v2.6.25-rc9	commit 120dd64cac...	Linus Torvalds	17 years
v2.6.25-rc8	commit 0e81a8ae37...	Linus Torvalds	17 years
v2.6.25-rc7	commit 05dda977f2...	Linus Torvalds	17 years
v2.6.25-rc6	commit a978b30af3...	Linus Torvalds	18 years
v2.6.25-rc5	commit cdeeeae056...	Linus Torvalds	18 years
v2.6.25-rc4	commit 29e8c3c304...	Linus Torvalds	18 years
v2.6.25-rc3	commit bfa274e243...	Linus Torvalds	18 years
v2.6.25-rc2	commit 101142c37b...	Linus Torvalds	18 years
v2.6.25-rc1	commit 19af35546d...	Linus Torvalds	18 years
v2.6.24	commit 49914084e7...	Linus Torvalds	18 years
v2.6.24-rc8	commit cbd9c88369...	Linus Torvalds	18 years
v2.6.24-rc7	commit 3ce5445046...	Linus Torvalds	18 years
v2.6.24-rc6	commit ea67db4cdb...	Linus Torvalds	18 years
v2.6.24-rc5	commit 82d29bf6dc...	Linus Torvalds	18 years
v2.6.24-rc4	commit 09b56adc98...	Linus Torvalds	18 years
v2.6.24-rc3	commit d9f8bcbf67...	Linus Torvalds	18 years
v2.6.24-rc2	commit dbeeb816e8...	Linus Torvalds	18 years
v2.6.24-rc1	commit c9927c2bf4...	Linus Torvalds	18 years
v2.6.23	commit bbf25010f1...	Linus Torvalds	18 years
v2.6.23-rc9	commit 3146b39c18...	Linus Torvalds	18 years
v2.6.23-rc8	commit 4942de4a0e...	Linus Torvalds	18 years
v2.6.23-rc7	commit 81cfe79b9c...	Linus Torvalds	18 years
v2.6.23-rc6	commit 0d4cbb5e7f...	Linus Torvalds	18 years
v2.6.23-rc5	commit 40ffbfad6b...	Linus Torvalds	18 years
v2.6.23-rc4	commit b07d68b5ca...	Linus Torvalds	18 years
v2.6.23-rc3	commit 39d3520c92...	Linus Torvalds	18 years
v2.6.23-rc2	commit d4ac2477fa...	Linus Torvalds	18 years
v2.6.23-rc1	commit f695baf2df...	Linus Torvalds	18 years
v2.6.22	commit 7dcca30a32...	Linus Torvalds	18 years
v2.6.22-rc7	commit a38d6181ff...	Linus Torvalds	18 years
v2.6.22-rc6	commit 189548642c...	Linus Torvalds	18 years
v2.6.22-rc5	commit 188e1f81ba...	Linus Torvalds	18 years
v2.6.22-rc4	commit 5ecd3100e6...	Linus Torvalds	18 years
v2.6.22-rc3	commit c420bc9f09...	Linus Torvalds	18 years
v2.6.22-rc2	commit 55b637c6a0...	Linus Torvalds	18 years
v2.6.22-rc1	commit 39403865d2...	Linus Torvalds	18 years
v2.6.21	commit de46c33745...	Linus Torvalds	18 years
v2.6.21-rc7	commit 94a05509a9...	Linus Torvalds	18 years
v2.6.21-rc6	commit a21bd69e15...	Linus Torvalds	18 years
v2.6.21-rc5	commit e0f2e3a06b...	Linus Torvalds	18 years
v2.6.21-rc4	commit db98e0b434...	Linus Torvalds	19 years
v2.6.21-rc3	commit 08e15e81a4...	Linus Torvalds	19 years
v2.6.21-rc2	commit 606135a308...	Linus Torvalds	19 years
v2.6.21-rc1	commit c8f71b01a5...	Linus Torvalds	19 years
v2.6.20	commit 62d0cfcb27...	Linus Torvalds	19 years
v2.6.20-rc7	commit f56df2f4db...	Linus Torvalds	19 years
v2.6.20-rc6	commit 99abfeafb5...	Linus Torvalds	19 years
v2.6.20-rc5	commit a8b3485287...	Linus Torvalds	19 years
v2.6.20-rc4	commit bf81b46482...	Linus Torvalds	19 years
v2.6.20-rc3	commit 669df1b478...	Linus Torvalds	19 years
v2.6.20-rc2	commit 3bf8ba38f3...	Linus Torvalds	19 years
v2.6.20-rc1	commit cc016448b0...	Linus Torvalds	19 years
v2.6.19	commit 0215ffb08c...	Linus Torvalds	19 years
v2.6.19-rc6	commit 44597f65f6...	Linus Torvalds	19 years
v2.6.19-rc5	commit 80c2188127...	Linus Torvalds	19 years
v2.6.19-rc4	commit ae99a78af3...	Linus Torvalds	19 years
v2.6.19-rc3	commit 7059abedd2...	Linus Torvalds	19 years
v2.6.19-rc2	commit b4bd8c6643...	Linus Torvalds	19 years
v2.6.19-rc1	commit d223a60106...	Linus Torvalds	19 years
v2.6.18	commit e478bec0ba...	Linus Torvalds	19 years
v2.6.18-rc7	commit 95064a75eb...	Linus Torvalds	19 years
v2.6.18-rc6	commit c336923b66...	Linus Torvalds	19 years
v2.6.18-rc5	commit 60d4684068...	Linus Torvalds	19 years
v2.6.18-rc4	commit 9f737633e6...	Linus Torvalds	19 years
v2.6.18-rc3	commit b6ff50833a...	Linus Torvalds	19 years
v2.6.18-rc2	commit 82d6897fef...	Linus Torvalds	19 years
v2.6.18-rc1	commit 120bda20c6...	Linus Torvalds	19 years
v2.6.17	commit 427abfa28a...	Linus Torvalds	19 years
v2.6.17-rc6	commit 1def630a6a...	Linus Torvalds	19 years
v2.6.17-rc5	commit a8bd60705a...	Linus Torvalds	19 years
v2.6.17-rc4	commit d8c3291c73...	Linus Torvalds	19 years
v2.6.17-rc3	commit 2be4d50295...	Linus Torvalds	19 years
v2.6.17-rc2	commit 8bbde0e6d5...	Linus Torvalds	19 years
v2.6.17-rc1	commit 6246b6128b...	Linus Torvalds	19 years
v2.6.16	commit 7705a8792b...	Linus Torvalds	20 years
v2.6.16-rc6	commit 535744878e...	Linus Torvalds	20 years
v2.6.16-rc5	commit b9a33cebac...	Linus Torvalds	20 years
v2.6.16-rc4	commit bd71c2b174...	Linus Torvalds	20 years
v2.6.16-rc3	commit e9bb4c9929...	Linus Torvalds	20 years
v2.6.16-rc2	commit 826eeb53a6...	Linus Torvalds	20 years
v2.6.16-rc1	commit 2664b25051...	Linus Torvalds	20 years
v2.6.15	commit 88026842b0...	Linus Torvalds	20 years
v2.6.15-rc7	commit f89f5948fc...	Linus Torvalds	20 years
v2.6.15-rc6	commit df7addbb45...	Linus Torvalds	20 years
v2.6.15-rc5	commit 436b0f76f2...	Linus Torvalds	20 years
v2.6.15-rc4	commit 5666c0947e...	Linus Torvalds	20 years
v2.6.15-rc3	commit 624f54be20...	Linus Torvalds	20 years
v2.6.15-rc2	commit 3bedff1d73...	Linus Torvalds	20 years
v2.6.15-rc1	commit cd52d1ee9a...	Linus Torvalds	20 years
v2.6.14	commit 741b2252a5...	Linus Torvalds	20 years
v2.6.14-rc5	commit 93918e9afc...	Linus Torvalds	20 years
v2.6.14-rc4	commit 907a426179...	Linus Torvalds	20 years
v2.6.14-rc3	commit 1c9426e8a5...	Linus Torvalds	20 years
v2.6.14-rc2	commit 676d55ae30...	Linus Torvalds	20 years
v2.6.14-rc1	commit 2f4ba45a75...	Linus Torvalds	20 years
v2.6.13	commit 02b3e4e2d7...	Linus Torvalds	20 years
v2.6.13-rc7	commit 0572e3da3f...	Linus Torvalds	20 years
v2.6.13-rc6	commit 6fc32179de...	Linus Torvalds	20 years
v2.6.13-rc5	commit 9a351e30d7...	Linus Torvalds	20 years
v2.6.13-rc4	commit 6395352334...	Linus Torvalds	20 years
v2.6.11	tree c39ae07f39...
v2.6.11-tree	tree c39ae07f39...
v2.6.12	commit 9ee1c939d1...
v2.6.12-rc2	commit 1da177e4c3...
v2.6.12-rc3	commit a2755a80f4...
v2.6.12-rc4	commit 88d7bd8cb9...
v2.6.12-rc5	commit 2a24ab628a...
v2.6.12-rc6	commit 7cef5677ef...
v2.6.13-rc1	commit 4c91aedb75...
v2.6.13-rc2	commit a18bcb7450...
v2.6.13-rc3	commit c32511e271...

/* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994, Karl Keyte: Added support for disk statistics * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 */ /* * This handles all read/write requests to block devices */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/backing-dev.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/highmem.h> #include <linux/mm.h> #include <linux/kernel_stat.h> #include <linux/string.h> #include <linux/init.h> #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ #include <linux/completion.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/writeback.h> #include <linux/task_io_accounting_ops.h> #include <linux/interrupt.h> #include <linux/cpu.h> #include <linux/blktrace_api.h> #include <linux/fault-inject.h> /* * for max sense size */ #include <scsi/scsi_cmnd.h> static void blk_unplug_work(struct work_struct *work); static void blk_unplug_timeout(unsigned long data); static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); static void init_request_from_bio(struct request *req, struct bio *bio); static int __make_request(request_queue_t *q, struct bio *bio); static struct io_context *current_io_context(gfp_t gfp_flags, int node); /* * For the allocated request tables */ static struct kmem_cache *request_cachep; /* * For queue allocation */ static struct kmem_cache *requestq_cachep; /* * For io context allocations */ static struct kmem_cache *iocontext_cachep; /* * Controlling structure to kblockd */ static struct workqueue_struct *kblockd_workqueue; unsigned long blk_max_low_pfn, blk_max_pfn; EXPORT_SYMBOL(blk_max_low_pfn); EXPORT_SYMBOL(blk_max_pfn); static DEFINE_PER_CPU(struct list_head, blk_cpu_done); /* Amount of time in which a process may batch requests */ #define BLK_BATCH_TIME (HZ/50UL) /* Number of requests a "batching" process may submit */ #define BLK_BATCH_REQ 32 /* * Return the threshold (number of used requests) at which the queue is * considered to be congested. It include a little hysteresis to keep the * context switch rate down. */ static inline int queue_congestion_on_threshold(struct request_queue *q) { return q->nr_congestion_on; } /* * The threshold at which a queue is considered to be uncongested */ static inline int queue_congestion_off_threshold(struct request_queue *q) { return q->nr_congestion_off; } static void blk_queue_congestion_threshold(struct request_queue *q) { int nr; nr = q->nr_requests - (q->nr_requests / 8) + 1; if (nr > q->nr_requests) nr = q->nr_requests; q->nr_congestion_on = nr; nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; if (nr < 1) nr = 1; q->nr_congestion_off = nr; } /** * blk_get_backing_dev_info - get the address of a queue's backing_dev_info * @bdev: device * * Locates the passed device's request queue and returns the address of its * backing_dev_info * * Will return NULL if the request queue cannot be located. */ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) { struct backing_dev_info *ret = NULL; request_queue_t *q = bdev_get_queue(bdev); if (q) ret = &q->backing_dev_info; return ret; } EXPORT_SYMBOL(blk_get_backing_dev_info); /** * blk_queue_prep_rq - set a prepare_request function for queue * @q: queue * @pfn: prepare_request function * * It's possible for a queue to register a prepare_request callback which * is invoked before the request is handed to the request_fn. The goal of * the function is to prepare a request for I/O, it can be used to build a * cdb from the request data for instance. * */ void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn) { q->prep_rq_fn = pfn; } EXPORT_SYMBOL(blk_queue_prep_rq); /** * blk_queue_merge_bvec - set a merge_bvec function for queue * @q: queue * @mbfn: merge_bvec_fn * * Usually queues have static limitations on the max sectors or segments that * we can put in a request. Stacking drivers may have some settings that * are dynamic, and thus we have to query the queue whether it is ok to * add a new bio_vec to a bio at a given offset or not. If the block device * has such limitations, it needs to register a merge_bvec_fn to control * the size of bio's sent to it. Note that a block device *must* allow a * single page to be added to an empty bio. The block device driver may want * to use the bio_split() function to deal with these bio's. By default * no merge_bvec_fn is defined for a queue, and only the fixed limits are * honored. */ void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn) { q->merge_bvec_fn = mbfn; } EXPORT_SYMBOL(blk_queue_merge_bvec); void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn) { q->softirq_done_fn = fn; } EXPORT_SYMBOL(blk_queue_softirq_done); /** * blk_queue_make_request - define an alternate make_request function for a device * @q: the request queue for the device to be affected * @mfn: the alternate make_request function * * Description: * The normal way for &struct bios to be passed to a device * driver is for them to be collected into requests on a request * queue, and then to allow the device driver to select requests * off that queue when it is ready. This works well for many block * devices. However some block devices (typically virtual devices * such as md or lvm) do not benefit from the processing on the * request queue, and are served best by having the requests passed * directly to them. This can be achieved by providing a function * to blk_queue_make_request(). * * Caveat: * The driver that does this *must* be able to deal appropriately * with buffers in "highmemory". This can be accomplished by either calling * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling * blk_queue_bounce() to create a buffer in normal memory. **/ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) { /* * set defaults */ q->nr_requests = BLKDEV_MAX_RQ; blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); q->make_request_fn = mfn; q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; q->backing_dev_info.state = 0; q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; blk_queue_max_sectors(q, SAFE_MAX_SECTORS); blk_queue_hardsect_size(q, 512); blk_queue_dma_alignment(q, 511); blk_queue_congestion_threshold(q); q->nr_batching = BLK_BATCH_REQ; q->unplug_thresh = 4; /* hmm */ q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ if (q->unplug_delay == 0) q->unplug_delay = 1; INIT_WORK(&q->unplug_work, blk_unplug_work); q->unplug_timer.function = blk_unplug_timeout; q->unplug_timer.data = (unsigned long)q; /* * by default assume old behaviour and bounce for any highmem page */ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); } EXPORT_SYMBOL(blk_queue_make_request); static void rq_init(request_queue_t *q, struct request *rq) { INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->donelist); rq->errors = 0; rq->bio = rq->biotail = NULL; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); rq->ioprio = 0; rq->buffer = NULL; rq->ref_count = 1; rq->q = q; rq->special = NULL; rq->data_len = 0; rq->data = NULL; rq->nr_phys_segments = 0; rq->sense = NULL; rq->end_io = NULL; rq->end_io_data = NULL; rq->completion_data = NULL; } /** * blk_queue_ordered - does this queue support ordered writes * @q: the request queue * @ordered: one of QUEUE_ORDERED_* * @prepare_flush_fn: rq setup helper for cache flush ordered writes * * Description: * For journalled file systems, doing ordered writes on a commit * block instead of explicitly doing wait_on_buffer (which is bad * for performance) can be a big win. Block drivers supporting this * feature should call this function and indicate so. * **/ int blk_queue_ordered(request_queue_t *q, unsigned ordered, prepare_flush_fn *prepare_flush_fn) { if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && prepare_flush_fn == NULL) { printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); return -EINVAL; } if (ordered != QUEUE_ORDERED_NONE && ordered != QUEUE_ORDERED_DRAIN && ordered != QUEUE_ORDERED_DRAIN_FLUSH && ordered != QUEUE_ORDERED_DRAIN_FUA && ordered != QUEUE_ORDERED_TAG && ordered != QUEUE_ORDERED_TAG_FLUSH && ordered != QUEUE_ORDERED_TAG_FUA) { printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); return -EINVAL; } q->ordered = ordered; q->next_ordered = ordered; q->prepare_flush_fn = prepare_flush_fn; return 0; } EXPORT_SYMBOL(blk_queue_ordered); /** * blk_queue_issue_flush_fn - set function for issuing a flush * @q: the request queue * @iff: the function to be called issuing the flush * * Description: * If a driver supports issuing a flush command, the support is notified * to the block layer by defining it through this call. * **/ void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff) { q->issue_flush_fn = iff; } EXPORT_SYMBOL(blk_queue_issue_flush_fn); /* * Cache flushing for ordered writes handling */ inline unsigned blk_ordered_cur_seq(request_queue_t *q) { if (!q->ordseq) return 0; return 1 << ffz(q->ordseq); } unsigned blk_ordered_req_seq(struct request *rq) { request_queue_t *q = rq->q; BUG_ON(q->ordseq == 0); if (rq == &q->pre_flush_rq) return QUEUE_ORDSEQ_PREFLUSH; if (rq == &q->bar_rq) return QUEUE_ORDSEQ_BAR; if (rq == &q->post_flush_rq) return QUEUE_ORDSEQ_POSTFLUSH; if ((rq->cmd_flags & REQ_ORDERED_COLOR) == (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) return QUEUE_ORDSEQ_DRAIN; else return QUEUE_ORDSEQ_DONE; } void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error) { struct request *rq; int uptodate; if (error && !q->orderr) q->orderr = error; BUG_ON(q->ordseq & seq); q->ordseq |= seq; if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) return; /* * Okay, sequence complete. */ rq = q->orig_bar_rq; uptodate = q->orderr ? q->orderr : 1; q->ordseq = 0; end_that_request_first(rq, uptodate, rq->hard_nr_sectors); end_that_request_last(rq, uptodate); } static void pre_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); } static void bar_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); } static void post_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); } static void queue_flush(request_queue_t *q, unsigned which) { struct request *rq; rq_end_io_fn *end_io; if (which == QUEUE_ORDERED_PREFLUSH) { rq = &q->pre_flush_rq; end_io = pre_flush_end_io; } else { rq = &q->post_flush_rq; end_io = post_flush_end_io; } rq->cmd_flags = REQ_HARDBARRIER; rq_init(q, rq); rq->elevator_private = NULL; rq->elevator_private2 = NULL; rq->rq_disk = q->bar_rq.rq_disk; rq->end_io = end_io; q->prepare_flush_fn(q, rq); elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } static inline struct request *start_ordered(request_queue_t *q, struct request *rq) { q->bi_size = 0; q->orderr = 0; q->ordered = q->next_ordered; q->ordseq |= QUEUE_ORDSEQ_STARTED; /* * Prep proxy barrier request. */ blkdev_dequeue_request(rq); q->orig_bar_rq = rq; rq = &q->bar_rq; rq->cmd_flags = 0; rq_init(q, rq); if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) rq->cmd_flags |= REQ_RW; rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; rq->elevator_private = NULL; rq->elevator_private2 = NULL; init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; /* * Queue ordered sequence. As we stack them at the head, we * need to queue in reverse order. Note that we rely on that * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs * request gets inbetween ordered sequence. */ if (q->ordered & QUEUE_ORDERED_POSTFLUSH) queue_flush(q, QUEUE_ORDERED_POSTFLUSH); else q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); if (q->ordered & QUEUE_ORDERED_PREFLUSH) { queue_flush(q, QUEUE_ORDERED_PREFLUSH); rq = &q->pre_flush_rq; } else q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) q->ordseq |= QUEUE_ORDSEQ_DRAIN; else rq = NULL; return rq; } int blk_do_ordered(request_queue_t *q, struct request **rqp) { struct request *rq = *rqp; int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); if (!q->ordseq) { if (!is_barrier) return 1; if (q->next_ordered != QUEUE_ORDERED_NONE) { *rqp = start_ordered(q, rq); return 1; } else { /* * This can happen when the queue switches to * ORDERED_NONE while this request is on it. */ blkdev_dequeue_request(rq); end_that_request_first(rq, -EOPNOTSUPP, rq->hard_nr_sectors); end_that_request_last(rq, -EOPNOTSUPP); *rqp = NULL; return 0; } } /* * Ordered sequence in progress */ /* Special requests are not subject to ordering rules. */ if (!blk_fs_request(rq) && rq != &q->pre_flush_rq && rq != &q->post_flush_rq) return 1; if (q->ordered & QUEUE_ORDERED_TAG) { /* Ordered by tag. Blocking the next barrier is enough. */ if (is_barrier && rq != &q->bar_rq) *rqp = NULL; } else { /* Ordered by draining. Wait for turn. */ WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) *rqp = NULL; } return 1; } static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) { request_queue_t *q = bio->bi_private; struct bio_vec *bvec; int i; /* * This is dry run, restore bio_sector and size. We'll finish * this request again with the original bi_end_io after an * error occurs or post flush is complete. */ q->bi_size += bytes; if (bio->bi_size) return 1; /* Rewind bvec's */ bio->bi_idx = 0; bio_for_each_segment(bvec, bio, i) { bvec->bv_len += bvec->bv_offset; bvec->bv_offset = 0; } /* Reset bio */ set_bit(BIO_UPTODATE, &bio->bi_flags); bio->bi_size = q->bi_size; bio->bi_sector -= (q->bi_size >> 9); q->bi_size = 0; return 0; } static int ordered_bio_endio(struct request *rq, struct bio *bio, unsigned int nbytes, int error) { request_queue_t *q = rq->q; bio_end_io_t *endio; void *private; if (&q->bar_rq != rq) return 0; /* * Okay, this is the barrier request in progress, dry finish it. */ if (error && !q->orderr) q->orderr = error; endio = bio->bi_end_io; private = bio->bi_private; bio->bi_end_io = flush_dry_bio_endio; bio->bi_private = q; bio_endio(bio, nbytes, error); bio->bi_end_io = endio; bio->bi_private = private; return 1; } /** * blk_queue_bounce_limit - set bounce buffer limit for queue * @q: the request queue for the device * @dma_addr: bus address limit * * Description: * Different hardware can have different requirements as to what pages * it can do I/O directly to. A low level driver can call * blk_queue_bounce_limit to have lower memory pages allocated as bounce * buffers for doing I/O to pages residing above @page. **/ void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) { unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; int dma = 0; q->bounce_gfp = GFP_NOIO; #if BITS_PER_LONG == 64 /* Assume anything <= 4GB can be handled by IOMMU. Actually some IOMMUs can handle everything, but I don't know of a way to test this here. */ if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) dma = 1; q->bounce_pfn = max_low_pfn; #else if (bounce_pfn < blk_max_low_pfn) dma = 1; q->bounce_pfn = bounce_pfn; #endif if (dma) { init_emergency_isa_pool(); q->bounce_gfp = GFP_NOIO | GFP_DMA; q->bounce_pfn = bounce_pfn; } } EXPORT_SYMBOL(blk_queue_bounce_limit); /** * blk_queue_max_sectors - set max sectors for a request for this queue * @q: the request queue for the device * @max_sectors: max sectors in the usual 512b unit * * Description: * Enables a low level driver to set an upper limit on the size of * received requests. **/ void blk_queue_max_sectors(request_queue_t *q, unsigned int max_sectors) { if ((max_sectors << 9) < PAGE_CACHE_SIZE) { max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); } if (BLK_DEF_MAX_SECTORS > max_sectors) q->max_hw_sectors = q->max_sectors = max_sectors; else { q->max_sectors = BLK_DEF_MAX_SECTORS; q->max_hw_sectors = max_sectors; } } EXPORT_SYMBOL(blk_queue_max_sectors); /** * blk_queue_max_phys_segments - set max phys segments for a request for this queue * @q: the request queue for the device * @max_segments: max number of segments * * Description: * Enables a low level driver to set an upper limit on the number of * physical data segments in a request. This would be the largest sized * scatter list the driver could handle. **/ void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments) { if (!max_segments) { max_segments = 1; printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); } q->max_phys_segments = max_segments; } EXPORT_SYMBOL(blk_queue_max_phys_segments); /** * blk_queue_max_hw_segments - set max hw segments for a request for this queue * @q: the request queue for the device * @max_segments: max number of segments * * Description: * Enables a low level driver to set an upper limit on the number of * hw data segments in a request. This would be the largest number of * address/length pairs the host adapter can actually give as once * to the device. **/ void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments) { if (!max_segments) { max_segments = 1; printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); } q->max_hw_segments = max_segments; } EXPORT_SYMBOL(blk_queue_max_hw_segments); /** * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg * @q: the request queue for the device * @max_size: max size of segment in bytes * * Description: * Enables a low level driver to set an upper limit on the size of a * coalesced segment **/ void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size) { if (max_size < PAGE_CACHE_SIZE) { max_size = PAGE_CACHE_SIZE; printk("%s: set to minimum %d\n", __FUNCTION__, max_size); } q->max_segment_size = max_size; } EXPORT_SYMBOL(blk_queue_max_segment_size); /** * blk_queue_hardsect_size - set hardware sector size for the queue * @q: the request queue for the device * @size: the hardware sector size, in bytes * * Description: * This should typically be set to the lowest possible sector size * that the hardware can operate on (possible without reverting to * even internal read-modify-write operations). Usually the default * of 512 covers most hardware. **/ void blk_queue_hardsect_size(request_queue_t *q, unsigned short size) { q->hardsect_size = size; } EXPORT_SYMBOL(blk_queue_hardsect_size); /* * Returns the minimum that is _not_ zero, unless both are zero. */ #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) /** * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers * @t: the stacking driver (top) * @b: the underlying device (bottom) **/ void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) { /* zero is "infinity" */ t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); t->max_segment_size = min(t->max_segment_size,b->max_segment_size); t->hardsect_size = max(t->hardsect_size,b->hardsect_size); if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); } EXPORT_SYMBOL(blk_queue_stack_limits); /** * blk_queue_segment_boundary - set boundary rules for segment merging * @q: the request queue for the device * @mask: the memory boundary mask **/ void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask) { if (mask < PAGE_CACHE_SIZE - 1) { mask = PAGE_CACHE_SIZE - 1; printk("%s: set to minimum %lx\n", __FUNCTION__, mask); } q->seg_boundary_mask = mask; } EXPORT_SYMBOL(blk_queue_segment_boundary); /** * blk_queue_dma_alignment - set dma length and memory alignment * @q: the request queue for the device * @mask: alignment mask * * description: * set required memory and length aligment for direct dma transactions. * this is used when buiding direct io requests for the queue. * **/ void blk_queue_dma_alignment(request_queue_t *q, int mask) { q->dma_alignment = mask; } EXPORT_SYMBOL(blk_queue_dma_alignment); /** * blk_queue_find_tag - find a request by its tag and queue * @q: The request queue for the device * @tag: The tag of the request * * Notes: * Should be used when a device returns a tag and you want to match * it with a request. * * no locks need be held. **/ struct request *blk_queue_find_tag(request_queue_t *q, int tag) { return blk_map_queue_find_tag(q->queue_tags, tag); } EXPORT_SYMBOL(blk_queue_find_tag); /** * __blk_free_tags - release a given set of tag maintenance info * @bqt: the tag map to free * * Tries to free the specified @bqt@. Returns true if it was * actually freed and false if there are still references using it */ static int __blk_free_tags(struct blk_queue_tag *bqt) { int retval; retval = atomic_dec_and_test(&bqt->refcnt); if (retval) { BUG_ON(bqt->busy); BUG_ON(!list_empty(&bqt->busy_list)); kfree(bqt->tag_index); bqt->tag_index = NULL; kfree(bqt->tag_map); bqt->tag_map = NULL; kfree(bqt); } return retval; } /** * __blk_queue_free_tags - release tag maintenance info * @q: the request queue for the device * * Notes: * blk_cleanup_queue() will take care of calling this function, if tagging * has been used. So there's no need to call this directly. **/ static void __blk_queue_free_tags(request_queue_t *q) { struct blk_queue_tag *bqt = q->queue_tags; if (!bqt) return; __blk_free_tags(bqt); q->queue_tags = NULL; q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); } /** * blk_free_tags - release a given set of tag maintenance info * @bqt: the tag map to free * * For externally managed @bqt@ frees the map. Callers of this * function must guarantee to have released all the queues that * might have been using this tag map. */ void blk_free_tags(struct blk_queue_tag *bqt) { if (unlikely(!__blk_free_tags(bqt))) BUG(); } EXPORT_SYMBOL(blk_free_tags); /** * blk_queue_free_tags - release tag maintenance info * @q: the request queue for the device * * Notes: * This is used to disabled tagged queuing to a device, yet leave * queue in function. **/ void blk_queue_free_tags(request_queue_t *q) { clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); } EXPORT_SYMBOL(blk_queue_free_tags); static int init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) { struct request **tag_index; unsigned long *tag_map; int nr_ulongs; if (q && depth > q->nr_requests * 2) { depth = q->nr_requests * 2; printk(KERN_ERR "%s: adjusted depth to %d\n", __FUNCTION__, depth); } tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); if (!tag_index) goto fail; nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); if (!tag_map) goto fail; tags->real_max_depth = depth; tags->max_depth = depth; tags->tag_index = tag_index; tags->tag_map = tag_map; return 0; fail: kfree(tag_index); return -ENOMEM; } static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, int depth) { struct blk_queue_tag *tags; tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); if (!tags) goto fail; if (init_tag_map(q, tags, depth)) goto fail; INIT_LIST_HEAD(&tags->busy_list); tags->busy = 0; atomic_set(&tags->refcnt, 1); return tags; fail: kfree(tags); return NULL; } /** * blk_init_tags - initialize the tag info for an external tag map * @depth: the maximum queue depth supported * @tags: the tag to use **/ struct blk_queue_tag *blk_init_tags(int depth) { return __blk_queue_init_tags(NULL, depth); } EXPORT_SYMBOL(blk_init_tags); /** * blk_queue_init_tags - initialize the queue tag info * @q: the request queue for the device * @depth: the maximum queue depth supported * @tags: the tag to use **/ int blk_queue_init_tags(request_queue_t *q, int depth, struct blk_queue_tag *tags) { int rc; BUG_ON(tags && q->queue_tags && tags != q->queue_tags); if (!tags && !q->queue_tags) { tags = __blk_queue_init_tags(q, depth); if (!tags) goto fail; } else if (q->queue_tags) { if ((rc = blk_queue_resize_tags(q, depth))) return rc; set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); return 0; } else atomic_inc(&tags->refcnt); /* * assign it, all done */ q->queue_tags = tags; q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); return 0; fail: kfree(tags); return -ENOMEM; } EXPORT_SYMBOL(blk_queue_init_tags); /** * blk_queue_resize_tags - change the queueing depth * @q: the request queue for the device * @new_depth: the new max command queueing depth * * Notes: * Must be called with the queue lock held. **/ int blk_queue_resize_tags(request_queue_t *q, int new_depth) { struct blk_queue_tag *bqt = q->queue_tags; struct request **tag_index; unsigned long *tag_map; int max_depth, nr_ulongs; if (!bqt) return -ENXIO; /* * if we already have large enough real_max_depth. just * adjust max_depth. *NOTE* as requests with tag value * between new_depth and real_max_depth can be in-flight, tag * map can not be shrunk blindly here. */ if (new_depth <= bqt->real_max_depth) { bqt->max_depth = new_depth; return 0; } /* * Currently cannot replace a shared tag map with a new * one, so error out if this is the case */ if (atomic_read(&bqt->refcnt) != 1) return -EBUSY; /* * save the old state info, so we can copy it back */ tag_index = bqt->tag_index; tag_map = bqt->tag_map; max_depth = bqt->real_max_depth; if (init_tag_map(q, bqt, new_depth)) return -ENOMEM; memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); kfree(tag_index); kfree(tag_map); return 0; } EXPORT_SYMBOL(blk_queue_resize_tags); /** * blk_queue_end_tag - end tag operations for a request * @q: the request queue for the device * @rq: the request that has completed * * Description: * Typically called when end_that_request_first() returns 0, meaning * all transfers have been done for a request. It's important to call * this function before end_that_request_last(), as that will put the * request back on the free list thus corrupting the internal tag list. * * Notes: * queue lock must be held. **/ void blk_queue_end_tag(request_queue_t *q, struct request *rq) { struct blk_queue_tag *bqt = q->queue_tags; int tag = rq->tag; BUG_ON(tag == -1); if (unlikely(tag >= bqt->real_max_depth)) /* * This can happen after tag depth has been reduced. * FIXME: how about a warning or info message here? */ return; if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) { printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", __FUNCTION__, tag); return; } list_del_init(&rq->queuelist); rq->cmd_flags &= ~REQ_QUEUED; rq->tag = -1; if (unlikely(bqt->tag_index[tag] == NULL)) printk(KERN_ERR "%s: tag %d is missing\n", __FUNCTION__, tag); bqt->tag_index[tag] = NULL; bqt->busy--; } EXPORT_SYMBOL(blk_queue_end_tag); /** * blk_queue_start_tag - find a free tag and assign it * @q: the request queue for the device * @rq: the block request that needs tagging * * Description: * This can either be used as a stand-alone helper, or possibly be * assigned as the queue &prep_rq_fn (in which case &struct request * automagically gets a tag assigned). Note that this function * assumes that any type of request can be queued! if this is not * true for your device, you must check the request type before * calling this function. The request will also be removed from * the request queue, so it's the drivers responsibility to readd * it if it should need to be restarted for some reason. * * Notes: * queue lock must be held. **/ int blk_queue_start_tag(request_queue_t *q, struct request *rq) { struct blk_queue_tag *bqt = q->queue_tags; int tag; if (unlikely((rq->cmd_flags & REQ_QUEUED))) { printk(KERN_ERR "%s: request %p for device [%s] already tagged %d", __FUNCTION__, rq, rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); BUG(); } /* * Protect against shared tag maps, as we may not have exclusive * access to the tag map. */ do { tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); if (tag >= bqt->max_depth) return 1; } while (test_and_set_bit(tag, bqt->tag_map)); rq->cmd_flags |= REQ_QUEUED; rq->tag = tag; bqt->tag_index[tag] = rq; blkdev_dequeue_request(rq); list_add(&rq->queuelist, &bqt->busy_list); bqt->busy++; return 0; } EXPORT_SYMBOL(blk_queue_start_tag); /** * blk_queue_invalidate_tags - invalidate all pending tags * @q: the request queue for the device * * Description: * Hardware conditions may dictate a need to stop all pending requests. * In this case, we will safely clear the block side of the tag queue and * readd all requests to the request queue in the right order. * * Notes: * queue lock must be held. **/ void blk_queue_invalidate_tags(request_queue_t *q) { struct blk_queue_tag *bqt = q->queue_tags; struct list_head *tmp, *n; struct request *rq; list_for_each_safe(tmp, n, &bqt->busy_list) { rq = list_entry_rq(tmp); if (rq->tag == -1) { printk(KERN_ERR "%s: bad tag found on list\n", __FUNCTION__); list_del_init(&rq->queuelist); rq->cmd_flags &= ~REQ_QUEUED; } else blk_queue_end_tag(q, rq); rq->cmd_flags &= ~REQ_STARTED; __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); } } EXPORT_SYMBOL(blk_queue_invalidate_tags); void blk_dump_rq_flags(struct request *rq, char *msg) { int bit; printk("%s: dev %s: type=%x, flags=%x\n", msg, rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, rq->cmd_flags); printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, rq->nr_sectors, rq->current_nr_sectors); printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); if (blk_pc_request(rq)) { printk("cdb: "); for (bit = 0; bit < sizeof(rq->cmd); bit++) printk("%02x ", rq->cmd[bit]); printk("\n"); } } EXPORT_SYMBOL(blk_dump_rq_flags); void blk_recount_segments(request_queue_t *q, struct bio *bio) { struct bio_vec *bv, *bvprv = NULL; int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster; int high, highprv = 1; if (unlikely(!bio->bi_io_vec)) return; cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0; bio_for_each_segment(bv, bio, i) { /* * the trick here is making sure that a high page is never * considered part of another segment, since that might * change with the bounce page. */ high = page_to_pfn(bv->bv_page) > q->bounce_pfn; if (high || highprv) goto new_hw_segment; if (cluster) { if (seg_size + bv->bv_len > q->max_segment_size) goto new_segment; if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) goto new_segment; if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) goto new_hw_segment; seg_size += bv->bv_len; hw_seg_size += bv->bv_len; bvprv = bv; continue; } new_segment: if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) { hw_seg_size += bv->bv_len; } else { new_hw_segment: if (hw_seg_size > bio->bi_hw_front_size) bio->bi_hw_front_size = hw_seg_size; hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; nr_hw_segs++; } nr_phys_segs++; bvprv = bv; seg_size = bv->bv_len; highprv = high; } if (hw_seg_size > bio->bi_hw_back_size) bio->bi_hw_back_size = hw_seg_size; if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size) bio->bi_hw_front_size = hw_seg_size; bio->bi_phys_segments = nr_phys_segs; bio->bi_hw_segments = nr_hw_segs; bio->bi_flags |= (1 << BIO_SEG_VALID); } EXPORT_SYMBOL(blk_recount_segments); static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio, struct bio *nxt) { if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) return 0; if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) return 0; if (bio->bi_size + nxt->bi_size > q->max_segment_size) return 0; /* * bio and nxt are contigous in memory, check if the queue allows * these two to be merged into one */ if (BIO_SEG_BOUNDARY(q, bio, nxt)) return 1; return 0; } static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio, struct bio *nxt) { if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) blk_recount_segments(q, bio); if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID))) blk_recount_segments(q, nxt); if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size)) return 0; if (bio->bi_size + nxt->bi_size > q->max_segment_size) return 0; return 1; } /* * map a request to scatterlist, return number of sg entries setup. Caller * must make sure sg can hold rq->nr_phys_segments entries */ int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg) { struct bio_vec *bvec, *bvprv; struct bio *bio; int nsegs, i, cluster; nsegs = 0; cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); /* * for each bio in rq */ bvprv = NULL; rq_for_each_bio(bio, rq) { /* * for each segment in bio */ bio_for_each_segment(bvec, bio, i) { int nbytes = bvec->bv_len; if (bvprv && cluster) { if (sg[nsegs - 1].length + nbytes > q->max_segment_size) goto new_segment; if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) goto new_segment; sg[nsegs - 1].length += nbytes; } else { new_segment: memset(&sg[nsegs],0,sizeof(struct scatterlist)); sg[nsegs].page = bvec->bv_page; sg[nsegs].length = nbytes; sg[nsegs].offset = bvec->bv_offset; nsegs++; } bvprv = bvec; } /* segments in bio */ } /* bios in rq */ return nsegs; } EXPORT_SYMBOL(blk_rq_map_sg); /* * the standard queue merge functions, can be overridden with device * specific ones if so desired */ static inline int ll_new_mergeable(request_queue_t *q, struct request *req, struct bio *bio) { int nr_phys_segs = bio_phys_segments(q, bio); if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; } /* * A hw segment is just getting larger, bump just the phys * counter. */ req->nr_phys_segments += nr_phys_segs; return 1; } static inline int ll_new_hw_segment(request_queue_t *q, struct request *req, struct bio *bio) { int nr_hw_segs = bio_hw_segments(q, bio); int nr_phys_segs = bio_phys_segments(q, bio); if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; } /* * This will form the start of a new hw segment. Bump both * counters. */ req->nr_hw_segments += nr_hw_segs; req->nr_phys_segments += nr_phys_segs; return 1; } int ll_back_merge_fn(request_queue_t *q, struct request *req, struct bio *bio) { unsigned short max_sectors; int len; if (unlikely(blk_pc_request(req))) max_sectors = q->max_hw_sectors; else max_sectors = q->max_sectors; if (req->nr_sectors + bio_sectors(bio) > max_sectors) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; } if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID))) blk_recount_segments(q, req->biotail); if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) blk_recount_segments(q, bio); len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) && !BIOVEC_VIRT_OVERSIZE(len)) { int mergeable = ll_new_mergeable(q, req, bio); if (mergeable) { if (req->nr_hw_segments == 1) req->bio->bi_hw_front_size = len; if (bio->bi_hw_segments == 1) bio->bi_hw_back_size = len; } return mergeable; } return ll_new_hw_segment(q, req, bio); } EXPORT_SYMBOL(ll_back_merge_fn); static int ll_front_merge_fn(request_queue_t *q, struct request *req, struct bio *bio) { unsigned short max_sectors; int len; if (unlikely(blk_pc_request(req))) max_sectors = q->max_hw_sectors; else max_sectors = q->max_sectors; if (req->nr_sectors + bio_sectors(bio) > max_sectors) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; } len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) blk_recount_segments(q, bio); if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID))) blk_recount_segments(q, req->bio); if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && !BIOVEC_VIRT_OVERSIZE(len)) { int mergeable = ll_new_mergeable(q, req, bio); if (mergeable) { if (bio->bi_hw_segments == 1) bio->bi_hw_front_size = len; if (req->nr_hw_segments == 1) req->biotail->bi_hw_back_size = len; } return mergeable; } return ll_new_hw_segment(q, req, bio); } static int ll_merge_requests_fn(request_queue_t *q, struct request *req, struct request *next) { int total_phys_segments; int total_hw_segments; /* * First check if the either of the requests are re-queued * requests. Can't merge them if they are. */ if (req->special || next->special) return 0; /* * Will it become too large? */ if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) return 0; total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; if (blk_phys_contig_segment(q, req->biotail, next->bio)) total_phys_segments--; if (total_phys_segments > q->max_phys_segments) return 0; total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; if (blk_hw_contig_segment(q, req->biotail, next->bio)) { int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size; /* * propagate the combined length to the end of the requests */ if (req->nr_hw_segments == 1) req->bio->bi_hw_front_size = len; if (next->nr_hw_segments == 1) next->biotail->bi_hw_back_size = len; total_hw_segments--; } if (total_hw_segments > q->max_hw_segments) return 0; /* Merge is OK... */ req->nr_phys_segments = total_phys_segments; req->nr_hw_segments = total_hw_segments; return 1; } /* * "plug" the device if there are no outstanding requests: this will * force the transfer to start only after we have put all the requests * on the list. * * This is called with interrupts off and no requests on the queue and * with the queue lock held. */ void blk_plug_device(request_queue_t *q) { WARN_ON(!irqs_disabled()); /* * don't plug a stopped queue, it must be paired with blk_start_queue() * which will restart the queueing */ if (blk_queue_stopped(q)) return; if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); } } EXPORT_SYMBOL(blk_plug_device); /* * remove the queue from the plugged list, if present. called with * queue lock held and interrupts disabled. */ int blk_remove_plug(request_queue_t *q) { WARN_ON(!irqs_disabled()); if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) return 0; del_timer(&q->unplug_timer); return 1; } EXPORT_SYMBOL(blk_remove_plug); /* * remove the plug and let it rip.. */ void __generic_unplug_device(request_queue_t *q) { if (unlikely(blk_queue_stopped(q))) return; if (!blk_remove_plug(q)) return; q->request_fn(q); } EXPORT_SYMBOL(__generic_unplug_device); /** * generic_unplug_device - fire a request queue * @q: The &request_queue_t in question * * Description: * Linux uses plugging to build bigger requests queues before letting * the device have at them. If a queue is plugged, the I/O scheduler * is still adding and merging requests on the queue. Once the queue * gets unplugged, the request_fn defined for the queue is invoked and * transfers started. **/ void generic_unplug_device(request_queue_t *q) { spin_lock_irq(q->queue_lock); __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); } EXPORT_SYMBOL(generic_unplug_device); static void blk_backing_dev_unplug(struct backing_dev_info *bdi, struct page *page) { request_queue_t *q = bdi->unplug_io_data; /* * devices don't necessarily have an ->unplug_fn defined */ if (q->unplug_fn) { blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, q->rq.count[READ] + q->rq.count[WRITE]); q->unplug_fn(q); } } static void blk_unplug_work(struct work_struct *work) { request_queue_t *q = container_of(work, request_queue_t, unplug_work); blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, q->rq.count[READ] + q->rq.count[WRITE]); q->unplug_fn(q); } static void blk_unplug_timeout(unsigned long data) { request_queue_t *q = (request_queue_t *)data; blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, q->rq.count[READ] + q->rq.count[WRITE]); kblockd_schedule_work(&q->unplug_work); } /** * blk_start_queue - restart a previously stopped queue * @q: The &request_queue_t in question * * Description: * blk_start_queue() will clear the stop flag on the queue, and call * the request_fn for the queue if it was in a stopped state when * entered. Also see blk_stop_queue(). Queue lock must be held. **/ void blk_start_queue(request_queue_t *q) { WARN_ON(!irqs_disabled()); clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); /* * one level of recursion is ok and is much faster than kicking * the unplug handling */ if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { q->request_fn(q); clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); } else { blk_plug_device(q); kblockd_schedule_work(&q->unplug_work); } } EXPORT_SYMBOL(blk_start_queue); /** * blk_stop_queue - stop a queue * @q: The &request_queue_t in question * * Description: * The Linux block layer assumes that a block driver will consume all * entries on the request queue when the request_fn strategy is called. * Often this will not happen, because of hardware limitations (queue * depth settings). If a device driver gets a 'queue full' response, * or if it simply chooses not to queue more I/O at one point, it can * call this function to prevent the request_fn from being called until * the driver has signalled it's ready to go again. This happens by calling * blk_start_queue() to restart queue operations. Queue lock must be held. **/ void blk_stop_queue(request_queue_t *q) { blk_remove_plug(q); set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); } EXPORT_SYMBOL(blk_stop_queue); /** * blk_sync_queue - cancel any pending callbacks on a queue * @q: the queue * * Description: * The block layer may perform asynchronous callback activity * on a queue, such as calling the unplug function after a timeout. * A block device may call blk_sync_queue to ensure that any * such activity is cancelled, thus allowing it to release resources * that the callbacks might use. The caller must already have made sure * that its ->make_request_fn will not re-add plugging prior to calling * this function. * */ void blk_sync_queue(struct request_queue *q) { del_timer_sync(&q->unplug_timer); } EXPORT_SYMBOL(blk_sync_queue); /** * blk_run_queue - run a single device queue * @q: The queue to run */ void blk_run_queue(struct request_queue *q) { unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); blk_remove_plug(q); /* * Only recurse once to avoid overrunning the stack, let the unplug * handling reinvoke the handler shortly if we already got there. */ if (!elv_queue_empty(q)) { if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { q->request_fn(q); clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); } else { blk_plug_device(q); kblockd_schedule_work(&q->unplug_work); } } spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_run_queue); /** * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed * @kobj: the kobj belonging of the request queue to be released * * Description: * blk_cleanup_queue is the pair to blk_init_queue() or * blk_queue_make_request(). It should be called when a request queue is * being released; typically when a block device is being de-registered. * Currently, its primary task it to free all the &struct request * structures that were allocated to the queue and the queue itself. * * Caveat: * Hopefully the low level driver will have finished any * outstanding requests first... **/ static void blk_release_queue(struct kobject *kobj) { request_queue_t *q = container_of(kobj, struct request_queue, kobj); struct request_list *rl = &q->rq; blk_sync_queue(q); if (rl->rq_pool) mempool_destroy(rl->rq_pool); if (q->queue_tags) __blk_queue_free_tags(q); blk_trace_shutdown(q); kmem_cache_free(requestq_cachep, q); } void blk_put_queue(request_queue_t *q) { kobject_put(&q->kobj); } EXPORT_SYMBOL(blk_put_queue); void blk_cleanup_queue(request_queue_t * q) { mutex_lock(&q->sysfs_lock); set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); mutex_unlock(&q->sysfs_lock); if (q->elevator) elevator_exit(q->elevator); blk_put_queue(q); } EXPORT_SYMBOL(blk_cleanup_queue); static int blk_init_free_list(request_queue_t *q) { struct request_list *rl = &q->rq; rl->count[READ] = rl->count[WRITE] = 0; rl->starved[READ] = rl->starved[WRITE] = 0; rl->elvpriv = 0; init_waitqueue_head(&rl->wait[READ]); init_waitqueue_head(&rl->wait[WRITE]); rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep, q->node); if (!rl->rq_pool) return -ENOMEM; return 0; } request_queue_t *blk_alloc_queue(gfp_t gfp_mask) { return blk_alloc_queue_node(gfp_mask, -1); } EXPORT_SYMBOL(blk_alloc_queue); static struct kobj_type queue_ktype; request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) { request_queue_t *q; q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id); if (!q) return NULL; memset(q, 0, sizeof(*q)); init_timer(&q->unplug_timer); snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); q->kobj.ktype = &queue_ktype; kobject_init(&q->kobj); q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; q->backing_dev_info.unplug_io_data = q; mutex_init(&q->sysfs_lock); return q; } EXPORT_SYMBOL(blk_alloc_queue_node); /** * blk_init_queue - prepare a request queue for use with a block device * @rfn: The function to be called to process requests that have been * placed on the queue. * @lock: Request queue spin lock * * Description: * If a block device wishes to use the standard request handling procedures, * which sorts requests and coalesces adjacent requests, then it must * call blk_init_queue(). The function @rfn will be called when there * are requests on the queue that need to be processed. If the device * supports plugging, then @rfn may not be called immediately when requests * are available on the queue, but may be called at some time later instead. * Plugged queues are generally unplugged when a buffer belonging to one * of the requests on the queue is needed, or due to memory pressure. * * @rfn is not required, or even expected, to remove all requests off the * queue, but only as many as it can handle at a time. If it does leave * requests on the queue, it is responsible for arranging that the requests * get dealt with eventually. * * The queue spin lock must be held while manipulating the requests on the * request queue; this lock will be taken also from interrupt context, so irq * disabling is needed for it. * * Function returns a pointer to the initialized request queue, or NULL if * it didn't succeed. * * Note: * blk_init_queue() must be paired with a blk_cleanup_queue() call * when the block device is deactivated (such as at module unload). **/ request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) { return blk_init_queue_node(rfn, lock, -1); } EXPORT_SYMBOL(blk_init_queue); request_queue_t * blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) { request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id); if (!q) return NULL; q->node = node_id; if (blk_init_free_list(q)) { kmem_cache_free(requestq_cachep, q); return NULL; } /* * if caller didn't supply a lock, they get per-queue locking with * our embedded lock */ if (!lock) { spin_lock_init(&q->__queue_lock); lock = &q->__queue_lock; } q->request_fn = rfn; q->prep_rq_fn = NULL; q->unplug_fn = generic_unplug_device; q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); q->queue_lock = lock; blk_queue_segment_boundary(q, 0xffffffff); blk_queue_make_request(q, __make_request); blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); q->sg_reserved_size = INT_MAX; /* * all done */ if (!elevator_init(q, NULL)) { blk_queue_congestion_threshold(q); return q; } blk_put_queue(q); return NULL; } EXPORT_SYMBOL(blk_init_queue_node); int blk_get_queue(request_queue_t *q) { if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { kobject_get(&q->kobj); return 0; } return 1; } EXPORT_SYMBOL(blk_get_queue); static inline void blk_free_request(request_queue_t *q, struct request *rq) { if (rq->cmd_flags & REQ_ELVPRIV) elv_put_request(q, rq); mempool_free(rq, q->rq.rq_pool); } static struct request * blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); if (!rq) return NULL; /* * first three bits are identical in rq->cmd_flags and bio->bi_rw, * see bio.h and blkdev.h */ rq->cmd_flags = rw | REQ_ALLOCED; if (priv) { if (unlikely(elv_set_request(q, rq, gfp_mask))) { mempool_free(rq, q->rq.rq_pool); return NULL; } rq->cmd_flags |= REQ_ELVPRIV; } return rq; } /* * ioc_batching returns true if the ioc is a valid batching request and * should be given priority access to a request. */ static inline int ioc_batching(request_queue_t *q, struct io_context *ioc) { if (!ioc) return 0; /* * Make sure the process is able to allocate at least 1 request * even if the batch times out, otherwise we could theoretically * lose wakeups. */ return ioc->nr_batch_requests == q->nr_batching || (ioc->nr_batch_requests > 0 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); } /* * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This * will cause the process to be a "batcher" on all queues in the system. This * is the behaviour we want though - once it gets a wakeup it should be given * a nice run. */ static void ioc_set_batching(request_queue_t *q, struct io_context *ioc) { if (!ioc || ioc_batching(q, ioc)) return; ioc->nr_batch_requests = q->nr_batching; ioc->last_waited = jiffies; } static void __freed_request(request_queue_t *q, int rw) { struct request_list *rl = &q->rq; if (rl->count[rw] < queue_congestion_off_threshold(q)) blk_clear_queue_congested(q, rw); if (rl->count[rw] + 1 <= q->nr_requests) { if (waitqueue_active(&rl->wait[rw])) wake_up(&rl->wait[rw]); blk_clear_queue_full(q, rw); } } /* * A request has just been released. Account for it, update the full and * congestion status, wake up any waiters. Called under q->queue_lock. */ static void freed_request(request_queue_t *q, int rw, int priv) { struct request_list *rl = &q->rq; rl->count[rw]--; if (priv) rl->elvpriv--; __freed_request(q, rw); if (unlikely(rl->starved[rw ^ 1])) __freed_request(q, rw ^ 1); } #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* * Get a free request, queue_lock must be held. * Returns NULL on failure, with queue_lock held. * Returns !NULL on success, with queue_lock *not held*. */ static struct request *get_request(request_queue_t *q, int rw_flags, struct bio *bio, gfp_t gfp_mask) { struct request *rq = NULL; struct request_list *rl = &q->rq; struct io_context *ioc = NULL; const int rw = rw_flags & 0x01; int may_queue, priv; may_queue = elv_may_queue(q, rw_flags); if (may_queue == ELV_MQUEUE_NO) goto rq_starved; if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { if (rl->count[rw]+1 >= q->nr_requests) { ioc = current_io_context(GFP_ATOMIC, q->node); /* * The queue will fill after this allocation, so set * it as full, and mark this process as "batching". * This process will be allowed to complete a batch of * requests, others will be blocked. */ if (!blk_queue_full(q, rw)) { ioc_set_batching(q, ioc); blk_set_queue_full(q, rw); } else { if (may_queue != ELV_MQUEUE_MUST && !ioc_batching(q, ioc)) { /* * The queue is full and the allocating * process is not a "batcher", and not * exempted by the IO scheduler */ goto out; } } } blk_set_queue_congested(q, rw); } /* * Only allow batching queuers to allocate up to 50% over the defined * limit of requests, otherwise we could have thousands of requests * allocated with any setting of ->nr_requests */ if (rl->count[rw] >= (3 * q->nr_requests / 2)) goto out; rl->count[rw]++; rl->starved[rw] = 0; priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); if (priv) rl->elvpriv++; spin_unlock_irq(q->queue_lock); rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); if (unlikely(!rq)) { /* * Allocation failed presumably due to memory. Undo anything * we might have messed up. * * Allocating task should really be put onto the front of the * wait queue, but this is pretty rare. */ spin_lock_irq(q->queue_lock); freed_request(q, rw, priv); /* * in the very unlikely event that allocation failed and no * requests for this direction was pending, mark us starved * so that freeing of a request in the other direction will * notice us. another possible fix would be to split the * rq mempool into READ and WRITE */ rq_starved: if (unlikely(rl->count[rw] == 0)) rl->starved[rw] = 1; goto out; } /* * ioc may be NULL here, and ioc_batching will be false. That's * OK, if the queue is under the request limit then requests need * not count toward the nr_batch_requests limit. There will always * be some limit enforced by BLK_BATCH_TIME. */ if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; rq_init(q, rq); blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); out: return rq; } /* * No available requests for this queue, unplug the device and wait for some * requests to become available. * * Called with q->queue_lock held, and returns with it unlocked. */ static struct request *get_request_wait(request_queue_t *q, int rw_flags, struct bio *bio) { const int rw = rw_flags & 0x01; struct request *rq; rq = get_request(q, rw_flags, bio, GFP_NOIO); while (!rq) { DEFINE_WAIT(wait); struct request_list *rl = &q->rq; prepare_to_wait_exclusive(&rl->wait[rw], &wait, TASK_UNINTERRUPTIBLE); rq = get_request(q, rw_flags, bio, GFP_NOIO); if (!rq) { struct io_context *ioc; blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); io_schedule(); /* * After sleeping, we become a "batching" process and * will be able to allocate at least one request, and * up to a big batch of them for a small period time. * See ioc_batching, ioc_set_batching */ ioc = current_io_context(GFP_NOIO, q->node); ioc_set_batching(q, ioc); spin_lock_irq(q->queue_lock); } finish_wait(&rl->wait[rw], &wait); } return rq; } struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask) { struct request *rq; BUG_ON(rw != READ && rw != WRITE); spin_lock_irq(q->queue_lock); if (gfp_mask & __GFP_WAIT) { rq = get_request_wait(q, rw, NULL); } else { rq = get_request(q, rw, NULL, gfp_mask); if (!rq) spin_unlock_irq(q->queue_lock); } /* q->queue_lock is unlocked at this point */ return rq; } EXPORT_SYMBOL(blk_get_request); /** * blk_start_queueing - initiate dispatch of requests to device * @q: request queue to kick into gear * * This is basically a helper to remove the need to know whether a queue * is plugged or not if someone just wants to initiate dispatch of requests * for this queue. * * The queue lock must be held with interrupts disabled. */ void blk_start_queueing(request_queue_t *q) { if (!blk_queue_plugged(q)) q->request_fn(q); else __generic_unplug_device(q); } EXPORT_SYMBOL(blk_start_queueing); /** * blk_requeue_request - put a request back on queue * @q: request queue where request should be inserted * @rq: request to be inserted * * Description: * Drivers often keep queueing requests until the hardware cannot accept * more, when that condition happens we need to put the request back * on the queue. Must be called with queue lock held. */ void blk_requeue_request(request_queue_t *q, struct request *rq) { blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); elv_requeue_request(q, rq); } EXPORT_SYMBOL(blk_requeue_request); /** * blk_insert_request - insert a special request in to a request queue * @q: request queue where request should be inserted * @rq: request to be inserted * @at_head: insert request at head or tail of queue * @data: private data * * Description: * Many block devices need to execute commands asynchronously, so they don't * block the whole kernel from preemption during request execution. This is * accomplished normally by inserting aritficial requests tagged as * REQ_SPECIAL in to the corresponding request queue, and letting them be * scheduled for actual execution by the request queue. * * We have the option of inserting the head or the tail of the queue. * Typically we use the tail for new ioctls and so forth. We use the head * of the queue for things like a QUEUE_FULL message from a device, or a * host that is unable to accept a particular command. */ void blk_insert_request(request_queue_t *q, struct request *rq, int at_head, void *data) { int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; unsigned long flags; /* * tell I/O scheduler that this isn't a regular read/write (ie it * must not attempt merges on this) and that it acts as a soft * barrier */ rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd_flags |= REQ_SOFTBARRIER; rq->special = data; spin_lock_irqsave(q->queue_lock, flags); /* * If command is tagged, release the tag */ if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); drive_stat_acct(rq, rq->nr_sectors, 1); __elv_add_request(q, rq, where, 0); blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_insert_request); static int __blk_rq_unmap_user(struct bio *bio) { int ret = 0; if (bio) { if (bio_flagged(bio, BIO_USER_MAPPED)) bio_unmap_user(bio); else ret = bio_uncopy_user(bio); } return ret; } static int __blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, unsigned int len) { unsigned long uaddr; struct bio *bio, *orig_bio; int reading, ret; reading = rq_data_dir(rq) == READ; /* * if alignment requirement is satisfied, map in user pages for * direct dma. else, set up kernel bounce buffers */ uaddr = (unsigned long) ubuf; if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) bio = bio_map_user(q, NULL, uaddr, len, reading); else bio = bio_copy_user(q, uaddr, len, reading); if (IS_ERR(bio)) return PTR_ERR(bio); orig_bio = bio; blk_queue_bounce(q, &bio); /* * We link the bounce buffer in and could have to traverse it * later so we have to get a ref to prevent it from being freed */ bio_get(bio); if (!rq->bio) blk_rq_bio_prep(q, rq, bio); else if (!ll_back_merge_fn(q, rq, bio)) { ret = -EINVAL; goto unmap_bio; } else { rq->biotail->bi_next = bio; rq->biotail = bio; rq->data_len += bio->bi_size; } return bio->bi_size; unmap_bio: /* if it was boucned we must call the end io function */ bio_endio(bio, bio->bi_size, 0); __blk_rq_unmap_user(orig_bio); bio_put(bio); return ret; } /** * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage * @q: request queue where request should be inserted * @rq: request structure to fill * @ubuf: the user buffer * @len: length of user data * * Description: * Data will be mapped directly for zero copy io, if possible. Otherwise * a kernel bounce buffer is used. * * A matching blk_rq_unmap_user() must be issued at the end of io, while * still in process context. * * Note: The mapped bio may need to be bounced through blk_queue_bounce() * before being submitted to the device, as pages mapped may be out of * reach. It's the callers responsibility to make sure this happens. The * original bio must be passed back in to blk_rq_unmap_user() for proper * unmapping. */ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, unsigned long len) { unsigned long bytes_read = 0; struct bio *bio = NULL; int ret; if (len > (q->max_hw_sectors << 9)) return -EINVAL; if (!len || !ubuf) return -EINVAL; while (bytes_read != len) { unsigned long map_len, end, start; map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE); end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1) >> PAGE_SHIFT; start = (unsigned long)ubuf >> PAGE_SHIFT; /* * A bad offset could cause us to require BIO_MAX_PAGES + 1 * pages. If this happens we just lower the requested * mapping len by a page so that we can fit */ if (end - start > BIO_MAX_PAGES) map_len -= PAGE_SIZE; ret = __blk_rq_map_user(q, rq, ubuf, map_len); if (ret < 0) goto unmap_rq; if (!bio) bio = rq->bio; bytes_read += ret; ubuf += ret; } rq->buffer = rq->data = NULL; return 0; unmap_rq: blk_rq_unmap_user(bio); return ret; } EXPORT_SYMBOL(blk_rq_map_user); /** * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage * @q: request queue where request should be inserted * @rq: request to map data to * @iov: pointer to the iovec * @iov_count: number of elements in the iovec * @len: I/O byte count * * Description: * Data will be mapped directly for zero copy io, if possible. Otherwise * a kernel bounce buffer is used. * * A matching blk_rq_unmap_user() must be issued at the end of io, while * still in process context. * * Note: The mapped bio may need to be bounced through blk_queue_bounce() * before being submitted to the device, as pages mapped may be out of * reach. It's the callers responsibility to make sure this happens. The * original bio must be passed back in to blk_rq_unmap_user() for proper * unmapping. */ int blk_rq_map_user_iov(request_queue_t *q, struct request *rq, struct sg_iovec *iov, int iov_count, unsigned int len) { struct bio *bio; if (!iov || iov_count <= 0) return -EINVAL; /* we don't allow misaligned data like bio_map_user() does. If the * user is using sg, they're expected to know the alignment constraints * and respect them accordingly */ bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); if (IS_ERR(bio)) return PTR_ERR(bio); if (bio->bi_size != len) { bio_endio(bio, bio->bi_size, 0); bio_unmap_user(bio); return -EINVAL; } bio_get(bio); blk_rq_bio_prep(q, rq, bio); rq->buffer = rq->data = NULL; return 0; } EXPORT_SYMBOL(blk_rq_map_user_iov); /** * blk_rq_unmap_user - unmap a request with user data * @bio: start of bio list * * Description: * Unmap a rq previously mapped by blk_rq_map_user(). The caller must * supply the original rq->bio from the blk_rq_map_user() return, since * the io completion may have changed rq->bio. */ int blk_rq_unmap_user(struct bio *bio) { struct bio *mapped_bio; int ret = 0, ret2; while (bio) { mapped_bio = bio; if (unlikely(bio_flagged(bio, BIO_BOUNCED))) mapped_bio = bio->bi_private; ret2 = __blk_rq_unmap_user(mapped_bio); if (ret2 && !ret) ret = ret2; mapped_bio = bio; bio = bio->bi_next; bio_put(mapped_bio); } return ret; } EXPORT_SYMBOL(blk_rq_unmap_user); /** * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage * @q: request queue where request should be inserted * @rq: request to fill * @kbuf: the kernel buffer * @len: length of user data * @gfp_mask: memory allocation flags */ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, unsigned int len, gfp_t gfp_mask) { struct bio *bio; if (len > (q->max_hw_sectors << 9)) return -EINVAL; if (!len || !kbuf) return -EINVAL; bio = bio_map_kern(q, kbuf, len, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); if (rq_data_dir(rq) == WRITE) bio->bi_rw |= (1 << BIO_RW); blk_rq_bio_prep(q, rq, bio); blk_queue_bounce(q, &rq->bio); rq->buffer = rq->data = NULL; return 0; } EXPORT_SYMBOL(blk_rq_map_kern); /** * blk_execute_rq_nowait - insert a request into queue for execution * @q: queue to insert the request in * @bd_disk: matching gendisk * @rq: request to insert * @at_head: insert request at head or tail of queue * @done: I/O completion handler * * Description: * Insert a fully prepared request at the back of the io scheduler queue * for execution. Don't wait for completion. */ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, struct request *rq, int at_head, rq_end_io_fn *done) { int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; rq->rq_disk = bd_disk; rq->cmd_flags |= REQ_NOMERGE; rq->end_io = done; WARN_ON(irqs_disabled()); spin_lock_irq(q->queue_lock); __elv_add_request(q, rq, where, 1); __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); /** * blk_execute_rq - insert a request into queue for execution * @q: queue to insert the request in * @bd_disk: matching gendisk * @rq: request to insert * @at_head: insert request at head or tail of queue * * Description: * Insert a fully prepared request at the back of the io scheduler queue * for execution and wait for completion. */ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, struct request *rq, int at_head) { DECLARE_COMPLETION_ONSTACK(wait); char sense[SCSI_SENSE_BUFFERSIZE]; int err = 0; /* * we need an extra reference to the request, so we can look at * it after io completion */ rq->ref_count++; if (!rq->sense) { memset(sense, 0, sizeof(sense)); rq->sense = sense; rq->sense_len = 0; } rq->end_io_data = &wait; blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); wait_for_completion(&wait); if (rq->errors) err = -EIO; return err; } EXPORT_SYMBOL(blk_execute_rq); /** * blkdev_issue_flush - queue a flush * @bdev: blockdev to issue flush for * @error_sector: error sector * * Description: * Issue a flush for the block device in question. Caller can supply * room for storing the error offset in case of a flush error, if they * wish to. Caller must run wait_for_completion() on its own. */ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) { request_queue_t *q; if (bdev->bd_disk == NULL) return -ENXIO; q = bdev_get_queue(bdev); if (!q) return -ENXIO; if (!q->issue_flush_fn) return -EOPNOTSUPP; return q->issue_flush_fn(q, bdev->bd_disk, error_sector); } EXPORT_SYMBOL(blkdev_issue_flush); static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) { int rw = rq_data_dir(rq); if (!blk_fs_request(rq) || !rq->rq_disk) return; if (!new_io) { __disk_stat_inc(rq->rq_disk, merges[rw]); } else { disk_round_stats(rq->rq_disk); rq->rq_disk->in_flight++; } } /* * add-request adds a request to the linked list. * queue lock is held and interrupts disabled, as we muck with the * request queue list. */ static inline void add_request(request_queue_t * q, struct request * req) { drive_stat_acct(req, req->nr_sectors, 1); /* * elevator indicated where it wants this request to be * inserted at elevator_merge time */ __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); } /* * disk_round_stats() - Round off the performance stats on a struct * disk_stats. * * The average IO queue length and utilisation statistics are maintained * by observing the current state of the queue length and the amount of * time it has been in this state for. * * Normally, that accounting is done on IO completion, but that can result * in more than a second's worth of IO being accounted for within any one * second, leading to >100% utilisation. To deal with that, we call this * function to do a round-off before returning the results when reading * /proc/diskstats. This accounts immediately for all queue usage up to * the current jiffies and restarts the counters again. */ void disk_round_stats(struct gendisk *disk) { unsigned long now = jiffies; if (now == disk->stamp) return; if (disk->in_flight) { __disk_stat_add(disk, time_in_queue, disk->in_flight * (now - disk->stamp)); __disk_stat_add(disk, io_ticks, (now - disk->stamp)); } disk->stamp = now; } EXPORT_SYMBOL_GPL(disk_round_stats); /* * queue lock must be held */ void __blk_put_request(request_queue_t *q, struct request *req) { if (unlikely(!q)) return; if (unlikely(--req->ref_count)) return; elv_completed_request(q, req); /* * Request may not have originated from ll_rw_blk. if not, * it didn't come out of our reserved rq pools */ if (req->cmd_flags & REQ_ALLOCED) { int rw = rq_data_dir(req); int priv = req->cmd_flags & REQ_ELVPRIV; BUG_ON(!list_empty(&req->queuelist)); BUG_ON(!hlist_unhashed(&req->hash)); blk_free_request(q, req); freed_request(q, rw, priv); } } EXPORT_SYMBOL_GPL(__blk_put_request); void blk_put_request(struct request *req) { unsigned long flags; request_queue_t *q = req->q; /* * Gee, IDE calls in w/ NULL q. Fix IDE and remove the * following if (q) test. */ if (q) { spin_lock_irqsave(q->queue_lock, flags); __blk_put_request(q, req); spin_unlock_irqrestore(q->queue_lock, flags); } } EXPORT_SYMBOL(blk_put_request); /** * blk_end_sync_rq - executes a completion event on a request * @rq: request to complete * @error: end io status of the request */ void blk_end_sync_rq(struct request *rq, int error) { struct completion *waiting = rq->end_io_data; rq->end_io_data = NULL; __blk_put_request(rq->q, rq); /* * complete last, if this is a stack request the process (and thus * the rq pointer) could be invalid right after this complete() */ complete(waiting); } EXPORT_SYMBOL(blk_end_sync_rq); /* * Has to be called with the request spinlock acquired */ static int attempt_merge(request_queue_t *q, struct request *req, struct request *next) { if (!rq_mergeable(req) || !rq_mergeable(next)) return 0; /* * not contiguous */ if (req->sector + req->nr_sectors != next->sector) return 0; if (rq_data_dir(req) != rq_data_dir(next) || req->rq_disk != next->rq_disk || next->special) return 0; /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn * will have updated segment counts, update sector * counts here. */ if (!ll_merge_requests_fn(q, req, next)) return 0; /* * At this point we have either done a back merge * or front merge. We need the smaller start_time of * the merged requests to be the current request * for accounting purposes. */ if (time_after(req->start_time, next->start_time)) req->start_time = next->start_time; req->biotail->bi_next = next->bio; req->biotail = next->biotail; req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; elv_merge_requests(q, req, next); if (req->rq_disk) { disk_round_stats(req->rq_disk); req->rq_disk->in_flight--; } req->ioprio = ioprio_best(req->ioprio, next->ioprio); __blk_put_request(q, next); return 1; } static inline int attempt_back_merge(request_queue_t *q, struct request *rq) { struct request *next = elv_latter_request(q, rq); if (next) return attempt_merge(q, rq, next); return 0; } static inline int attempt_front_merge(request_queue_t *q, struct request *rq) { struct request *prev = elv_former_request(q, rq); if (prev) return attempt_merge(q, prev, rq); return 0; } static void init_request_from_bio(struct request *req, struct bio *bio) { req->cmd_type = REQ_TYPE_FS; /* * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) */ if (bio_rw_ahead(bio) || bio_failfast(bio)) req->cmd_flags |= REQ_FAILFAST; /* * REQ_BARRIER implies no merging, but lets make it explicit */ if (unlikely(bio_barrier(bio))) req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); if (bio_sync(bio)) req->cmd_flags |= REQ_RW_SYNC; if (bio_rw_meta(bio)) req->cmd_flags |= REQ_RW_META; req->errors = 0; req->hard_sector = req->sector = bio->bi_sector; req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio); req->nr_phys_segments = bio_phys_segments(req->q, bio); req->nr_hw_segments = bio_hw_segments(req->q, bio); req->buffer = bio_data(bio); /* see ->buffer comment above */ req->bio = req->biotail = bio; req->ioprio = bio_prio(bio); req->rq_disk = bio->bi_bdev->bd_disk; req->start_time = jiffies; } static int __make_request(request_queue_t *q, struct bio *bio) { struct request *req; int el_ret, nr_sectors, barrier, err; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); int rw_flags; nr_sectors = bio_sectors(bio); /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even * ISA dma in theory) */ blk_queue_bounce(q, &bio); barrier = bio_barrier(bio); if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { err = -EOPNOTSUPP; goto end_io; } spin_lock_irq(q->queue_lock); if (unlikely(barrier) || elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); switch (el_ret) { case ELEVATOR_BACK_MERGE: BUG_ON(!rq_mergeable(req)); if (!ll_back_merge_fn(q, req, bio)) break; blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); req->biotail->bi_next = bio; req->biotail = bio; req->nr_sectors = req->hard_nr_sectors += nr_sectors; req->ioprio = ioprio_best(req->ioprio, prio); drive_stat_acct(req, nr_sectors, 0); if (!attempt_back_merge(q, req)) elv_merged_request(q, req, el_ret); goto out; case ELEVATOR_FRONT_MERGE: BUG_ON(!rq_mergeable(req)); if (!ll_front_merge_fn(q, req, bio)) break; blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); bio->bi_next = req->bio; req->bio = bio; /* * may not be valid. if the low level driver said * it didn't need a bounce buffer then it better * not touch req->buffer either... */ req->buffer = bio_data(bio); req->current_nr_sectors = bio_cur_sectors(bio); req->hard_cur_sectors = req->current_nr_sectors; req->sector = req->hard_sector = bio->bi_sector; req->nr_sectors = req->hard_nr_sectors += nr_sectors; req->ioprio = ioprio_best(req->ioprio, prio); drive_stat_acct(req, nr_sectors, 0); if (!attempt_front_merge(q, req)) elv_merged_request(q, req, el_ret); goto out; /* ELV_NO_MERGE: elevator says don't/can't merge. */ default: ; } get_rq: /* * This sync check and mask will be re-done in init_request_from_bio(), * but we need to set it earlier to expose the sync flag to the * rq allocator and io schedulers. */ rw_flags = bio_data_dir(bio); if (sync) rw_flags |= REQ_RW_SYNC; /* * Grab a free request. This is might sleep but can not fail. * Returns with the queue unlocked. */ req = get_request_wait(q, rw_flags, bio); /* * After dropping the lock and possibly sleeping here, our request * may now be mergeable after it had proven unmergeable (above). * We don't worry about that case for efficiency. It won't happen * often, and the elevators are able to handle it. */ init_request_from_bio(req, bio); spin_lock_irq(q->queue_lock); if (elv_queue_empty(q)) blk_plug_device(q); add_request(q, req); out: if (sync) __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); return 0; end_io: bio_endio(bio, nr_sectors << 9, err); return 0; } /* * If bio->bi_dev is a partition, remap the location */ static inline void blk_partition_remap(struct bio *bio) { struct block_device *bdev = bio->bi_bdev; if (bdev != bdev->bd_contains) { struct hd_struct *p = bdev->bd_part; const int rw = bio_data_dir(bio); p->sectors[rw] += bio_sectors(bio); p->ios[rw]++; bio->bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; } } static void handle_bad_sector(struct bio *bio) { char b[BDEVNAME_SIZE]; printk(KERN_INFO "attempt to access beyond end of device\n"); printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", bdevname(bio->bi_bdev, b), bio->bi_rw, (unsigned long long)bio->bi_sector + bio_sectors(bio), (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); set_bit(BIO_EOF, &bio->bi_flags); } #ifdef CONFIG_FAIL_MAKE_REQUEST static DECLARE_FAULT_ATTR(fail_make_request); static int __init setup_fail_make_request(char *str) { return setup_fault_attr(&fail_make_request, str); } __setup("fail_make_request=", setup_fail_make_request); static int should_fail_request(struct bio *bio) { if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) return should_fail(&fail_make_request, bio->bi_size); return 0; } static int __init fail_make_request_debugfs(void) { return init_fault_attr_dentries(&fail_make_request, "fail_make_request"); } late_initcall(fail_make_request_debugfs); #else /* CONFIG_FAIL_MAKE_REQUEST */ static inline int should_fail_request(struct bio *bio) { return 0; } #endif /* CONFIG_FAIL_MAKE_REQUEST */ /** * generic_make_request: hand a buffer to its device driver for I/O * @bio: The bio describing the location in memory and on the device. * * generic_make_request() is used to make I/O requests of block * devices. It is passed a &struct bio, which describes the I/O that needs * to be done. * * generic_make_request() does not return any status. The * success/failure status of the request, along with notification of * completion, is delivered asynchronously through the bio->bi_end_io * function described (one day) else where. * * The caller of generic_make_request must make sure that bi_io_vec * are set to describe the memory buffer, and that bi_dev and bi_sector are * set to describe the device address, and the * bi_end_io and optionally bi_private are set to describe how * completion notification should be signaled. * * generic_make_request and the drivers it calls may use bi_next if this