summaryrefslogtreecommitdiffstats
path: root/include/linux/pwm.h
blob: 56518adc31dd702bc849078005a564b8ae6fa0df (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_PWM_H
#define __LINUX_PWM_H

#include <linux/err.h>
#include <linux/mutex.h>
#include <linux/of.h>

struct pwm_capture;
struct seq_file;

struct pwm_chip;

/**
 * enum pwm_polarity - polarity of a PWM signal
 * @PWM_POLARITY_NORMAL: a high signal for the duration of the duty-
 * cycle, followed by a low signal for the remainder of the pulse
 * period
 * @PWM_POLARITY_INVERSED: a low signal for the duration of the duty-
 * cycle, followed by a high signal for the remainder of the pulse
 * period
 */
enum pwm_polarity {
	PWM_POLARITY_NORMAL,
	PWM_POLARITY_INVERSED,
};

/**
 * struct pwm_args - board-dependent PWM arguments
 * @period: reference period
 * @polarity: reference polarity
 *
 * This structure describes board-dependent arguments attached to a PWM
 * device. These arguments are usually retrieved from the PWM lookup table or
 * device tree.
 *
 * Do not confuse this with the PWM state: PWM arguments represent the initial
 * configuration that users want to use on this PWM device rather than the
 * current PWM hardware state.
 */
struct pwm_args {
	unsigned int period;
	enum pwm_polarity polarity;
};

enum {
	PWMF_REQUESTED = 1 << 0,
	PWMF_EXPORTED = 1 << 1,
};

/*
 * struct pwm_state - state of a PWM channel
 * @period: PWM period (in nanoseconds)
 * @duty_cycle: PWM duty cycle (in nanoseconds)
 * @polarity: PWM polarity
 * @enabled: PWM enabled status
 */
struct pwm_state {
	unsigned int period;
	unsigned int duty_cycle;
	enum pwm_polarity polarity;
	bool enabled;
};

/**
 * struct pwm_device - PWM channel object
 * @label: name of the PWM device
 * @flags: flags associated with the PWM device
 * @hwpwm: per-chip relative index of the PWM device
 * @pwm: global index of the PWM device
 * @chip: PWM chip providing this PWM device
 * @chip_data: chip-private data associated with the PWM device
 * @args: PWM arguments
 * @state: curent PWM channel state
 */
struct pwm_device {
	const char *label;
	unsigned long flags;
	unsigned int hwpwm;
	unsigned int pwm;
	struct pwm_chip *chip;
	void *chip_data;

	struct pwm_args args;
	struct pwm_state state;
};

/**
 * pwm_get_state() - retrieve the current PWM state
 * @pwm: PWM device
 * @state: state to fill with the current PWM state
 */
static inline void pwm_get_state(const struct pwm_device *pwm,
				 struct pwm_state *state)
{
	*state = pwm->state;
}

static inline bool pwm_is_enabled(const struct pwm_device *pwm)
{
	struct pwm_state state;

	pwm_get_state(pwm, &state);

	return state.enabled;
}

static inline void pwm_set_period(struct pwm_device *pwm, unsigned int period)
{
	if (pwm)
		pwm->state.period = period;
}

static inline unsigned int pwm_get_period(const struct pwm_device *pwm)
{
	struct pwm_state state;

	pwm_get_state(pwm, &state);

	return state.period;
}

static inline void pwm_set_duty_cycle(struct pwm_device *pwm, unsigned int duty)
{
	if (pwm)
		pwm->state.duty_cycle = duty;
}

static inline unsigned int pwm_get_duty_cycle(const struct pwm_device *pwm)
{
	struct pwm_state state;

	pwm_get_state(pwm, &state);

	return state.duty_cycle;
}

static inline enum pwm_polarity pwm_get_polarity(const struct pwm_device *pwm)
{
	struct pwm_state state;

	pwm_get_state(pwm, &state);

	return state.polarity;
}

static inline void pwm_get_args(const struct pwm_device *pwm,
				struct pwm_args *args)
{
	*args = pwm->args;
}

/**
 * pwm_init_state() - prepare a new state to be applied with pwm_apply_state()
 * @pwm: PWM device
 * @state: state to fill with the prepared PWM state
 *
 * This functions prepares a state that can later be tweaked and applied
 * to the PWM device with pwm_apply_state(). This is a convenient function
 * that first retrieves the current PWM state and the replaces the period
 * and polarity fields with the reference values defined in pwm->args.
 * Once the function returns, you can adjust the ->enabled and ->duty_cycle
 * fields according to your needs before calling pwm_apply_state().
 *
 * ->duty_cycle is initially set to zero to avoid cases where the current
 * ->duty_cycle value exceed the pwm_args->period one, which would trigger
 * an error if the user calls pwm_apply_state() without adjusting ->duty_cycle
 * first.
 */
static inline void pwm_init_state(const struct pwm_device *pwm,
				  struct pwm_state *state)
{
	struct pwm_args args;

	/* First get the current state. */
	pwm_get_state(pwm, state);

	/* Then fill it with the reference config */
	pwm_get_args(pwm, &args);

	state->period = args.period;
	state->polarity = args.polarity;
	state->duty_cycle = 0;
}

/**
 * pwm_get_relative_duty_cycle() - Get a relative duty cycle value
 * @state: PWM state to extract the duty cycle from
 * @scale: target scale of the relative duty cycle
 *
 * This functions converts the absolute duty cycle stored in @state (expressed
 * in nanosecond) into a value relative to the period.
 *
 * For example if you want to get the duty_cycle expressed in percent, call:
 *
 * pwm_get_state(pwm, &state);
 * duty = pwm_get_relative_duty_cycle(&state, 100);
 */
static inline unsigned int
pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale)
{
	if (!state->period)
		return 0;

	return DIV_ROUND_CLOSEST_ULL((u64)state->duty_cycle * scale,
				     state->period);
}

/**
 * pwm_set_relative_duty_cycle() - Set a relative duty cycle value
 * @state: PWM state to fill
 * @duty_cycle: relative duty cycle value
 * @scale: scale in which @duty_cycle is expressed
 *
 * This functions converts a relative into an absolute duty cycle (expressed
 * in nanoseconds), and puts the result in state->duty_cycle.
 *
 * For example if you want to configure a 50% duty cycle, call:
 *
 * pwm_init_state(pwm, &state);
 * pwm_set_relative_duty_cycle(&state, 50, 100);
 * pwm_apply_state(pwm, &state);
 *
 * This functions returns -EINVAL if @duty_cycle and/or @scale are
 * inconsistent (@scale == 0 or @duty_cycle > @scale).
 */
static inline int
pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle,
			    unsigned int scale)
{
	if (!scale || duty_cycle > scale)
		return -EINVAL;

	state->duty_cycle = DIV_ROUND_CLOSEST_ULL((u64)duty_cycle *
						  state->period,
						  scale);

	return 0;
}

/**
 * struct pwm_ops - PWM controller operations
 * @request: optional hook for requesting a PWM
 * @free: optional hook for freeing a PWM
 * @config: configure duty cycles and period length for this PWM
 * @set_polarity: configure the polarity of this PWM
 * @capture: capture and report PWM signal
 * @enable: enable PWM output toggling
 * @disable: disable PWM output toggling
 * @apply: atomically apply a new PWM config. The state argument
 *	   should be adjusted with the real hardware config (if the
 *	   approximate the period or duty_cycle value, state should
 *	   reflect it)
 * @get_state: get the current PWM state. This function is only
 *	       called once per PWM device when the PWM chip is
 *	       registered.
 * @dbg_show: optional routine to show contents in debugfs
 * @owner: helps prevent removal of modules exporting active PWMs
 */
struct pwm_ops {
	int (*request)(struct pwm_chip *chip, struct pwm_device *pwm);
	void (*free)(struct pwm_chip *chip, struct pwm_device *pwm);
	int (*config)(struct pwm_chip *chip, struct pwm_device *pwm,
		      int duty_ns, int period_ns);
	int (*set_polarity)(struct pwm_chip *chip, struct pwm_device *pwm,
			    enum pwm_polarity polarity);
	int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm,
		       struct pwm_capture *result, unsigned long timeout);
	int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm);
	void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm);
	int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm,
		     struct pwm_state *state);
	void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm,
			  struct pwm_state *state);
#ifdef CONFIG_DEBUG_FS
	void (*dbg_show)(struct pwm_chip *chip, struct seq_file *s);
#endif
	struct module *owner;
};

/**
 * struct pwm_chip - abstract a PWM controller
 * @dev: device providing the PWMs
 * @list: list node for internal use
 * @ops: callbacks for this PWM controller
 * @base: number of first PWM controlled by this chip
 * @npwm: number of PWMs controlled by this chip
 * @pwms: array of PWM devices allocated by the framework
 * @of_xlate: request a PWM device given a device tree PWM specifier
 * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier
 */
struct pwm_chip {
	struct device *dev;
	struct list_head list;
	const struct pwm_ops *ops;
	int base;
	unsigned int npwm;

	struct pwm_device *pwms;

	struct pwm_device * (*of_xlate)(struct pwm_chip *pc,
					const struct of_phandle_args *args);
	unsigned int of_pwm_n_cells;
};

/**
 * struct pwm_capture - PWM capture data
 * @period: period of the PWM signal (in nanoseconds)
 * @duty_cycle: duty cycle of the PWM signal (in nanoseconds)
 */
struct pwm_capture {
	unsigned int period;
	unsigned int duty_cycle;
};

#if IS_ENABLED(CONFIG_PWM)
/* PWM user APIs */
struct pwm_device *pwm_request(int pwm_id, const char *label);
void pwm_free(struct pwm_device *pwm);
int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state);
int pwm_adjust_config(struct pwm_device *pwm);

/**
 * pwm_config() - change a PWM device configuration
 * @pwm: PWM device
 * @duty_ns: "on" time (in nanoseconds)
 * @period_ns: duration (in nanoseconds) of one cycle
 *
 * Returns: 0 on success or a negative error code on failure.
 */
static inline int pwm_config(struct pwm_device *pwm, int duty_ns,
			     int period_ns)
{
	struct pwm_state state;

	if (!pwm)
		return -EINVAL;

	if (duty_ns < 0 || period_ns < 0)
		return -EINVAL;

	pwm_get_state(pwm, &state);
	if (state.duty_cycle == duty_ns && state.period == period_ns)
		return 0;

	state.duty_cycle = duty_ns;
	state.period = period_ns;
	return pwm_apply_state(pwm, &state);
}

/**
 * pwm_set_polarity() - configure the polarity of a PWM signal
 * @pwm: PWM device
 * @polarity: new polarity of the PWM signal
 *
 * Note that the polarity cannot be configured while the PWM device is
 * enabled.
 *
 * Returns: 0 on success or a negative error code on failure.
 */
static inline int pwm_set_polarity(struct pwm_device *pwm,
				   enum pwm_polarity polarity)
{
	struct pwm_state state;

	if (!pwm)
		return -EINVAL;

	pwm_get_state(pwm, &state);
	if (state.polarity == polarity)
		return 0;

	/*
	 * Changing the polarity of a running PWM without adjusting the
	 * dutycycle/period value is a bit risky (can introduce glitches).
	 * Return -EBUSY in this case.
	 * Note that this is allowed when using pwm_apply_state() because
	 * the user specifies all the parameters.
	 */
	if (state.enabled)
		return -EBUSY;

	state.polarity = polarity;
	return pwm_apply_state(pwm, &state);
}

/**
 * pwm_enable() - start a PWM output toggling
 * @pwm: PWM device
 *
 * Returns: 0 on success or a negative error code on failure.
 */
static inline int pwm_enable(struct pwm_device *pwm)
{
	struct pwm_state state;

	if (!pwm)
		return -EINVAL;

	pwm_get_state(pwm, &state);
	if (state.enabled)
		return 0;

	state.enabled = true;
	return pwm_apply_state(pwm, &state);
}

/**
 * pwm_disable() - stop a PWM output toggling
 * @pwm: PWM device
 */
static inline void pwm_disable(struct pwm_device *pwm)
{
	struct pwm_state state;

	if (!pwm)
		return;

	pwm_get_state(pwm, &state);
	if (!state.enabled)
		return;

	state.enabled = false;
	pwm_apply_state(pwm, &state);
}

/* PWM provider APIs */
int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result,
		unsigned long timeout);
int pwm_set_chip_data(struct pwm_device *pwm, void *data);
void *pwm_get_chip_data(struct pwm_device *pwm);

int pwmchip_add_with_polarity(struct pwm_chip *chip,
			      enum pwm_polarity polarity);
int pwmchip_add(struct pwm_chip *chip);
int pwmchip_remove(struct pwm_chip *chip);
struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
					 unsigned int index,
					 const char *label);

struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
		const struct of_phandle_args *args);

struct pwm_device *pwm_get(struct device *dev, const char *con_id);
struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id);
void pwm_put(struct pwm_device *pwm);

struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id);
struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np,
				   const char *con_id);
void devm_pwm_put(struct device *dev, struct pwm_device *pwm);
#else
static inline struct pwm_device *pwm_request(int pwm_id, const char *label)
{
	return ERR_PTR(-ENODEV);
}

static inline void pwm_free(struct pwm_device *pwm)
{
}

static inline int pwm_apply_state(struct pwm_device *pwm,
				  const struct pwm_state *state)
{
	return -ENOTSUPP;
}

static inline int pwm_adjust_config(struct pwm_device *pwm)
{
	return -ENOTSUPP;
}

static inline int pwm_config(struct pwm_device *pwm, int duty_ns,
			     int period_ns)
{
	return -EINVAL;
}

static inline int pwm_capture(struct pwm_device *pwm,
			      struct pwm_capture *result,
			      unsigned long timeout)
{
	return -EINVAL;
}

static inline int pwm_set_polarity(struct pwm_device *pwm,
				   enum pwm_polarity polarity)
{
	return -ENOTSUPP;
}

static inline int pwm_enable(struct pwm_device *pwm)
{
	return -EINVAL;
}

static inline void pwm_disable(struct pwm_device *pwm)
{
}

static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data)
{
	return -EINVAL;
}

static inline void *pwm_get_chip_data(struct pwm_device *pwm)
{
	return NULL;
}

static inline int pwmchip_add(struct pwm_chip *chip)
{
	return -EINVAL;
}

static inline int pwmchip_add_inversed(struct pwm_chip *chip)
{
	return -EINVAL;
}

static inline int pwmchip_remove(struct pwm_chip *chip)
{
	return -EINVAL;
}

static inline struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
						       unsigned int index,
						       const char *label)
{
	return ERR_PTR(-ENODEV);
}

static inline struct pwm_device *pwm_get(struct device *dev,
					 const char *consumer)
{
	return ERR_PTR(-ENODEV);
}

static inline struct pwm_device *of_pwm_get(struct device_node *np,
					    const char *con_id)
{
	return ERR_PTR(-ENODEV);
}

static inline void pwm_put(struct pwm_device *pwm)
{
}

static inline struct pwm_device *devm_pwm_get(struct device *dev,
					      const char *consumer)
{
	return ERR_PTR(-ENODEV);
}

static inline struct pwm_device *devm_of_pwm_get(struct device *dev,
						 struct device_node *np,
						 const char *con_id)
{
	return ERR_PTR(-ENODEV);
}

static inline void devm_pwm_put(struct device *dev, struct pwm_device *pwm)
{
}
#endif

static inline void pwm_apply_args(struct pwm_device *pwm)
{
	struct pwm_state state = { };

	/*
	 * PWM users calling pwm_apply_args() expect to have a fresh config
	 * where the polarity and period are set according to pwm_args info.
	 * The problem is, polarity can only be changed when the PWM is
	 * disabled.
	 *
	 * PWM drivers supporting hardware readout may declare the PWM device
	 * as enabled, and prevent polarity setting, which changes from the
	 * existing behavior, where all PWM devices are declared as disabled
	 * at startup (even if they are actually enabled), thus authorizing
	 * polarity setting.
	 *
	 * To fulfill this requirement, we apply a new state which disables
	 * the PWM device and set the reference period and polarity config.
	 *
	 * Note that PWM users requiring a smooth handover between the
	 * bootloader and the kernel (like critical regulators controlled by
	 * PWM devices) will have to switch to the atomic API and avoid calling
	 * pwm_apply_args().
	 */

	state.enabled = false;
	state.polarity = pwm->args.polarity;
	state.period = pwm->args.period;

	pwm_apply_state(pwm, &state);
}

struct pwm_lookup {
	struct list_head list;
	const char *provider;
	unsigned int index;
	const char *dev_id;
	const char *con_id;
	unsigned int period;
	enum pwm_polarity polarity;
	const char *module; /* optional, may be NULL */
};

#define PWM_LOOKUP_WITH_MODULE(_provider, _index, _dev_id, _con_id,	\
			       _period, _polarity, _module)		\
	{								\
		.provider = _provider,					\
		.index = _index,					\
		.dev_id = _dev_id,					\
		.con_id = _con_id,					\
		.period = _period,					\
		.polarity = _polarity,					\
		.module = _module,					\
	}

#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id, _period, _polarity) \
	PWM_LOOKUP_WITH_MODULE(_provider, _index, _dev_id, _con_id, _period, \
			       _polarity, NULL)

#if IS_ENABLED(CONFIG_PWM)
void pwm_add_table(struct pwm_lookup *table, size_t num);
void pwm_remove_table(struct pwm_lookup *table, size_t num);
#else
static inline void pwm_add_table(struct pwm_lookup *table, size_t num)
{
}

static inline void pwm_remove_table(struct pwm_lookup *table, size_t num)
{
}
#endif

#ifdef CONFIG_PWM_SYSFS
void pwmchip_sysfs_export(struct pwm_chip *chip);
void pwmchip_sysfs_unexport(struct pwm_chip *chip);
void pwmchip_sysfs_unexport_children(struct pwm_chip *chip);
#else
static inline void pwmchip_sysfs_export(struct pwm_chip *chip)
{
}

static inline void pwmchip_sysfs_unexport(struct pwm_chip *chip)
{
}

static inline void pwmchip_sysfs_unexport_children(struct pwm_chip *chip)
{
}
#endif /* CONFIG_PWM_SYSFS */

#endif /* __LINUX_PWM_H */
s="hl opt">(bdev); } EXPORT_SYMBOL(fsync_bdev); /** * freeze_bdev -- lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * * If a superblock is found on this device, we take the s_umount semaphore * on it to make sure nobody unmounts until the snapshot creation is done. * The reference counter (bd_fsfreeze_count) guarantees that only the last * unfreeze process can unfreeze the frozen filesystem actually when multiple * freeze requests arrive simultaneously. It counts up in freeze_bdev() and * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze * actually. */ struct super_block *freeze_bdev(struct block_device *bdev) { struct super_block *sb; int error = 0; mutex_lock(&bdev->bd_fsfreeze_mutex); if (++bdev->bd_fsfreeze_count > 1) { /* * We don't even need to grab a reference - the first call * to freeze_bdev grab an active reference and only the last * thaw_bdev drops it. */ sb = get_super(bdev); drop_super(sb); mutex_unlock(&bdev->bd_fsfreeze_mutex); return sb; } sb = get_active_super(bdev); if (!sb) goto out; error = freeze_super(sb); if (error) { deactivate_super(sb); bdev->bd_fsfreeze_count--; mutex_unlock(&bdev->bd_fsfreeze_mutex); return ERR_PTR(error); } deactivate_super(sb); out: sync_blockdev(bdev); mutex_unlock(&bdev->bd_fsfreeze_mutex); return sb; /* thaw_bdev releases s->s_umount */ } EXPORT_SYMBOL(freeze_bdev); /** * thaw_bdev -- unlock filesystem * @bdev: blockdevice to unlock * @sb: associated superblock * * Unlocks the filesystem and marks it writeable again after freeze_bdev(). */ int thaw_bdev(struct block_device *bdev, struct super_block *sb) { int error = -EINVAL; mutex_lock(&bdev->bd_fsfreeze_mutex); if (!bdev->bd_fsfreeze_count) goto out; error = 0; if (--bdev->bd_fsfreeze_count > 0) goto out; if (!sb) goto out; error = thaw_super(sb); if (error) { bdev->bd_fsfreeze_count++; mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; } out: mutex_unlock(&bdev->bd_fsfreeze_mutex); return 0; } EXPORT_SYMBOL(thaw_bdev); static int blkdev_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, blkdev_get_block, wbc); } static int blkdev_readpage(struct file * file, struct page * page) { return block_read_full_page(page, blkdev_get_block); } static int blkdev_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { return block_write_begin(mapping, pos, len, flags, pagep, blkdev_get_block); } static int blkdev_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { int ret; ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); unlock_page(page); page_cache_release(page); return ret; } /* * private llseek: * for a block special file file->f_path.dentry->d_inode->i_size is zero * so we compute the size by hand (just as in block_read/write above) */ static loff_t block_llseek(struct file *file, loff_t offset, int origin) { struct inode *bd_inode = file->f_mapping->host; loff_t size; loff_t retval; mutex_lock(&bd_inode->i_mutex); size = i_size_read(bd_inode); retval = -EINVAL; switch (origin) { case SEEK_END: offset += size; break; case SEEK_CUR: offset += file->f_pos; case SEEK_SET: break; default: goto out; } if (offset >= 0 && offset <= size) { if (offset != file->f_pos) { file->f_pos = offset; } retval = offset; } out: mutex_unlock(&bd_inode->i_mutex); return retval; } int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { struct inode *bd_inode = filp->f_mapping->host; struct block_device *bdev = I_BDEV(bd_inode); int error; error = filemap_write_and_wait_range(filp->f_mapping, start, end); if (error) return error; /* * There is no need to serialise calls to blkdev_issue_flush with * i_mutex and doing so causes performance issues with concurrent * O_SYNC writers to a block device. */ error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); if (error == -EOPNOTSUPP) error = 0; return error; } EXPORT_SYMBOL(blkdev_fsync); /* * pseudo-fs */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); static struct kmem_cache * bdev_cachep __read_mostly; static struct inode *bdev_alloc_inode(struct super_block *sb) { struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; } static void bdev_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); struct bdev_inode *bdi = BDEV_I(inode); INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(bdev_cachep, bdi); } static void bdev_destroy_inode(struct inode *inode) { call_rcu(&inode->i_rcu, bdev_i_callback); } static void init_once(void *foo) { struct bdev_inode *ei = (struct bdev_inode *) foo; struct block_device *bdev = &ei->bdev; memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS INIT_LIST_HEAD(&bdev->bd_holder_disks); #endif inode_init_once(&ei->vfs_inode); /* Initialize mutex for freeze. */ mutex_init(&bdev->bd_fsfreeze_mutex); } static inline void __bd_forget(struct inode *inode) { list_del_init(&inode->i_devices); inode->i_bdev = NULL; inode->i_mapping = &inode->i_data; } static void bdev_evict_inode(struct inode *inode) { struct block_device *bdev = &BDEV_I(inode)->bdev; struct list_head *p; truncate_inode_pages(&inode->i_data, 0); invalidate_inode_buffers(inode); /* is it needed here? */ end_writeback(inode); spin_lock(&bdev_lock); while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { __bd_forget(list_entry(p, struct inode, i_devices)); } list_del_init(&bdev->bd_list); spin_unlock(&bdev_lock); } static const struct super_operations bdev_sops = { .statfs = simple_statfs, .alloc_inode = bdev_alloc_inode, .destroy_inode = bdev_destroy_inode, .drop_inode = generic_delete_inode, .evict_inode = bdev_evict_inode, }; static struct dentry *bd_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576); } static struct file_system_type bd_type = { .name = "bdev", .mount = bd_mount, .kill_sb = kill_anon_super, }; struct super_block *blockdev_superblock __read_mostly; void __init bdev_cache_init(void) { int err; struct vfsmount *bd_mnt; bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), init_once); err = register_filesystem(&bd_type); if (err) panic("Cannot register bdev pseudo-fs"); bd_mnt = kern_mount(&bd_type); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); /* * This vfsmount structure is only used to obtain the * blockdev_superblock, so tell kmemleak not to report it. */ kmemleak_not_leak(bd_mnt); blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ } /* * Most likely _very_ bad one - but then it's hardly critical for small * /dev and can be fixed when somebody will need really large one. * Keep in mind that it will be fed through icache hash function too. */ static inline unsigned long hash(dev_t dev) { return MAJOR(dev)+MINOR(dev); } static int bdev_test(struct inode *inode, void *data) { return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; } static int bdev_set(struct inode *inode, void *data) { BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; return 0; } static LIST_HEAD(all_bdevs); struct block_device *bdget(dev_t dev) { struct block_device *bdev; struct inode *inode; inode = iget5_locked(blockdev_superblock, hash(dev), bdev_test, bdev_set, &dev); if (!inode) return NULL; bdev = &BDEV_I(inode)->bdev; if (inode->i_state & I_NEW) { bdev->bd_contains = NULL; bdev->bd_super = NULL; bdev->bd_inode = inode; bdev->bd_block_size = (1 << inode->i_blkbits); bdev->bd_part_count = 0; bdev->bd_invalidated = 0; inode->i_mode = S_IFBLK; inode->i_rdev = dev; inode->i_bdev = bdev; inode->i_data.a_ops = &def_blk_aops; mapping_set_gfp_mask(&inode->i_data, GFP_USER); inode->i_data.backing_dev_info = &default_backing_dev_info; spin_lock(&bdev_lock); list_add(&bdev->bd_list, &all_bdevs); spin_unlock(&bdev_lock); unlock_new_inode(inode); } return bdev; } EXPORT_SYMBOL(bdget); /** * bdgrab -- Grab a reference to an already referenced block device * @bdev: Block device to grab a reference to. */ struct block_device *bdgrab(struct block_device *bdev) { ihold(bdev->bd_inode); return bdev; } long nr_blockdev_pages(void) { struct block_device *bdev; long ret = 0; spin_lock(&bdev_lock); list_for_each_entry(bdev, &all_bdevs, bd_list) { ret += bdev->bd_inode->i_mapping->nrpages; } spin_unlock(&bdev_lock); return ret; } void bdput(struct block_device *bdev) { iput(bdev->bd_inode); } EXPORT_SYMBOL(bdput); static struct block_device *bd_acquire(struct inode *inode) { struct block_device *bdev; spin_lock(&bdev_lock); bdev = inode->i_bdev; if (bdev) { ihold(bdev->bd_inode); spin_unlock(&bdev_lock); return bdev; } spin_unlock(&bdev_lock); bdev = bdget(inode->i_rdev); if (bdev) { spin_lock(&bdev_lock); if (!inode->i_bdev) { /* * We take an additional reference to bd_inode, * and it's released in clear_inode() of inode. * So, we can access it via ->i_mapping always * without igrab(). */ ihold(bdev->bd_inode); inode->i_bdev = bdev; inode->i_mapping = bdev->bd_inode->i_mapping; list_add(&inode->i_devices, &bdev->bd_inodes); } spin_unlock(&bdev_lock); } return bdev; } /* Call when you free inode */ void bd_forget(struct inode *inode) { struct block_device *bdev = NULL; spin_lock(&bdev_lock); if (inode->i_bdev) { if (!sb_is_blkdev_sb(inode->i_sb)) bdev = inode->i_bdev; __bd_forget(inode); } spin_unlock(&bdev_lock); if (bdev) iput(bdev->bd_inode); } /** * bd_may_claim - test whether a block device can be claimed * @bdev: block device of interest * @whole: whole block device containing @bdev, may equal @bdev * @holder: holder trying to claim @bdev * * Test whether @bdev can be claimed by @holder. * * CONTEXT: * spin_lock(&bdev_lock). * * RETURNS: * %true if @bdev can be claimed, %false otherwise. */ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, void *holder) { if (bdev->bd_holder == holder) return true; /* already a holder */ else if (bdev->bd_holder != NULL) return false; /* held by someone else */ else if (bdev->bd_contains == bdev) return true; /* is a whole device which isn't held */ else if (whole->bd_holder == bd_may_claim) return true; /* is a partition of a device that is being partitioned */ else if (whole->bd_holder != NULL) return false; /* is a partition of a held device */ else return true; /* is a partition of an un-held device */ } /** * bd_prepare_to_claim - prepare to claim a block device * @bdev: block device of interest * @whole: the whole device containing @bdev, may equal @bdev * @holder: holder trying to claim @bdev * * Prepare to claim @bdev. This function fails if @bdev is already * claimed by another holder and waits if another claiming is in * progress. This function doesn't actually claim. On successful * return, the caller has ownership of bd_claiming and bd_holder[s]. * * CONTEXT: * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab * it multiple times. * * RETURNS: * 0 if @bdev can be claimed, -EBUSY otherwise. */ static int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, void *holder) { retry: /* if someone else claimed, fail */ if (!bd_may_claim(bdev, whole, holder)) return -EBUSY; /* if claiming is already in progress, wait for it to finish */ if (whole->bd_claiming) { wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); DEFINE_WAIT(wait); prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); spin_unlock(&bdev_lock); schedule(); finish_wait(wq, &wait); spin_lock(&bdev_lock); goto retry; } /* yay, all mine */ return 0; } /** * bd_start_claiming - start claiming a block device * @bdev: block device of interest * @holder: holder trying to claim @bdev * * @bdev is about to be opened exclusively. Check @bdev can be opened * exclusively and mark that an exclusive open is in progress. Each * successful call to this function must be matched with a call to * either bd_finish_claiming() or bd_abort_claiming() (which do not * fail). * * This function is used to gain exclusive access to the block device * without actually causing other exclusive open attempts to fail. It * should be used when the open sequence itself requires exclusive * access but may subsequently fail. * * CONTEXT: * Might sleep. * * RETURNS: * Pointer to the block device containing @bdev on success, ERR_PTR() * value on failure. */ static struct block_device *bd_start_claiming(struct block_device *bdev, void *holder) { struct gendisk *disk; struct block_device *whole; int partno, err; might_sleep(); /* * @bdev might not have been initialized properly yet, look up * and grab the outer block device the hard way. */ disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) return ERR_PTR(-ENXIO); /* * Normally, @bdev should equal what's returned from bdget_disk() * if partno is 0; however, some drivers (floppy) use multiple * bdev's for the same physical device and @bdev may be one of the * aliases. Keep @bdev if partno is 0. This means claimer * tracking is broken for those devices but it has always been that * way. */ if (partno) whole = bdget_disk(disk, 0); else whole = bdgrab(bdev); module_put(disk->fops->owner); put_disk(disk); if (!whole) return ERR_PTR(-ENOMEM); /* prepare to claim, if successful, mark claiming in progress */ spin_lock(&bdev_lock); err = bd_prepare_to_claim(bdev, whole, holder); if (err == 0) { whole->bd_claiming = holder; spin_unlock(&bdev_lock); return whole; } else { spin_unlock(&bdev_lock); bdput(whole); return ERR_PTR(err); } } #ifdef CONFIG_SYSFS struct bd_holder_disk { struct list_head list; struct gendisk *disk; int refcnt; }; static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, struct gendisk *disk) { struct bd_holder_disk *holder; list_for_each_entry(holder, &bdev->bd_holder_disks, list) if (holder->disk == disk) return holder; return NULL; } static int add_symlink(struct kobject *from, struct kobject *to) { return sysfs_create_link(from, to, kobject_name(to)); } static void del_symlink(struct kobject *from, struct kobject *to) { sysfs_remove_link(from, kobject_name(to)); } /** * bd_link_disk_holder - create symlinks between holding disk and slave bdev * @bdev: the claimed slave bdev * @disk: the holding disk * * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. * * This functions creates the following sysfs symlinks. * * - from "slaves" directory of the holder @disk to the claimed @bdev * - from "holders" directory of the @bdev to the holder @disk * * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is * passed to bd_link_disk_holder(), then: * * /sys/block/dm-0/slaves/sda --> /sys/block/sda * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 * * The caller must have claimed @bdev before calling this function and * ensure that both @bdev and @disk are valid during the creation and * lifetime of these symlinks. * * CONTEXT: * Might sleep. * * RETURNS: * 0 on success, -errno on failure. */ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) { struct bd_holder_disk *holder; int ret = 0; mutex_lock(&bdev->bd_mutex); WARN_ON_ONCE(!bdev->bd_holder); /* FIXME: remove the following once add_disk() handles errors */ if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) goto out_unlock; holder = bd_find_holder_disk(bdev, disk); if (holder) { holder->refcnt++; goto out_unlock; } holder = kzalloc(sizeof(*holder), GFP_KERNEL); if (!holder) { ret = -ENOMEM; goto out_unlock; } INIT_LIST_HEAD(&holder->list); holder->disk = disk; holder->refcnt = 1; ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); if (ret) goto out_free; ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); if (ret) goto out_del; /* * bdev could be deleted beneath us which would implicitly destroy * the holder directory. Hold on to it. */ kobject_get(bdev->bd_part->holder_dir); list_add(&holder->list, &bdev->bd_holder_disks); goto out_unlock; out_del: del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); out_free: kfree(holder); out_unlock: mutex_unlock(&bdev->bd_mutex); return ret; } EXPORT_SYMBOL_GPL(bd_link_disk_holder); /** * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder() * @bdev: the calimed slave bdev * @disk: the holding disk * * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. * * CONTEXT: * Might sleep. */ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) { struct bd_holder_disk *holder; mutex_lock(&bdev->bd_mutex); holder = bd_find_holder_disk(bdev, disk); if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); kobject_put(bdev->bd_part->holder_dir); list_del_init(&holder->list); kfree(holder); } mutex_unlock(&bdev->bd_mutex); } EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); #endif /** * flush_disk - invalidates all buffer-cache entries on a disk * * @bdev: struct block device to be flushed * @kill_dirty: flag to guide handling of dirty inodes * * Invalidates all buffer-cache entries on a disk. It should be called * when a disk has been changed -- either by a media change or online * resize. */ static void flush_disk(struct block_device *bdev, bool kill_dirty) { if (__invalidate_device(bdev, kill_dirty)) { char name[BDEVNAME_SIZE] = ""; if (bdev->bd_disk) disk_name(bdev->bd_disk, 0, name); printk(KERN_WARNING "VFS: busy inodes on changed media or " "resized disk %s\n", name); } if (!bdev->bd_disk) return; if (disk_partitionable(bdev->bd_disk)) bdev->bd_invalidated = 1; } /** * check_disk_size_change - checks for disk size change and adjusts bdev size. * @disk: struct gendisk to check * @bdev: struct bdev to adjust. * * This routine checks to see if the bdev size does not match the disk size * and adjusts it if it differs. */ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) { loff_t disk_size, bdev_size; disk_size = (loff_t)get_capacity(disk) << 9; bdev_size = i_size_read(bdev->bd_inode); if (disk_size != bdev_size) { char name[BDEVNAME_SIZE]; disk_name(disk, 0, name); printk(KERN_INFO "%s: detected capacity change from %lld to %lld\n", name, bdev_size, disk_size); i_size_write(bdev->bd_inode, disk_size); flush_disk(bdev, false); } } EXPORT_SYMBOL(check_disk_size_change); /** * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back * @disk: struct gendisk to be revalidated * * This routine is a wrapper for lower-level driver's revalidate_disk * call-backs. It is used to do common pre and post operations needed * for all revalidate_disk operations. */ int revalidate_disk(struct gendisk *disk) { struct block_device *bdev; int ret = 0; if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); bdev = bdget_disk(disk, 0); if (!bdev) return ret; mutex_lock(&bdev->bd_mutex); check_disk_size_change(disk, bdev); mutex_unlock(&bdev->bd_mutex); bdput(bdev); return ret; } EXPORT_SYMBOL(revalidate_disk); /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This * is a relatively slow routine, so we have to try to minimize using * it. Thus it is called only upon a 'mount' or 'open'. This * is the best way of combining speed and utility, I think. * People changing diskettes in the middle of an operation deserve * to lose :-) */ int check_disk_change(struct block_device *bdev) { struct gendisk *disk = bdev->bd_disk; const struct block_device_operations *bdops = disk->fops; unsigned int events; events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST); if (!(events & DISK_EVENT_MEDIA_CHANGE)) return 0; flush_disk(bdev, true); if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); return 1; } EXPORT_SYMBOL(check_disk_change); void bd_set_size(struct block_device *bdev, loff_t size) { unsigned bsize = bdev_logical_block_size(bdev); bdev->bd_inode->i_size = size; while (bsize < PAGE_CACHE_SIZE) { if (size & bsize) break; bsize <<= 1; } bdev->bd_block_size = bsize; bdev->bd_inode->i_blkbits = blksize_bits(bsize); } EXPORT_SYMBOL(bd_set_size); static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); /* * bd_mutex locking: * * mutex_lock(part->bd_mutex) * mutex_lock_nested(whole->bd_mutex, 1) */ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; int ret; int partno; int perm = 0; if (mode & FMODE_READ) perm |= MAY_READ; if (mode & FMODE_WRITE) perm |= MAY_WRITE; /* * hooks: /n/, see "layering violations". */ if (!for_part) { ret = devcgroup_inode_permission(bdev->bd_inode, perm); if (ret != 0) { bdput(bdev); return ret; } } restart: ret = -ENXIO; disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) goto out; disk_block_events(disk); mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; ret = -ENXIO; bdev->bd_part = disk_get_part(disk, partno); if (!bdev->bd_part) goto out_clear; ret = 0; if (disk->fops->open) { ret = disk->fops->open(bdev, mode); if (ret == -ERESTARTSYS) { /* Lost a race with 'disk' being * deleted, try again. * See md.c */ disk_put_part(bdev->bd_part); bdev->bd_part = NULL; bdev->bd_disk = NULL; mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); module_put(disk->fops->owner); put_disk(disk); goto restart; } } if (!ret && !bdev->bd_openers) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); bdi = blk_get_backing_dev_info(bdev); if (bdi == NULL) bdi = &default_backing_dev_info; bdev_inode_switch_bdi(bdev->bd_inode, bdi); } /* * If the device is invalidated, rescan partition * if open succeeded or failed with -ENOMEDIUM. * The latter is necessary to prevent ghost * partitions on a removed medium. */ if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) rescan_partitions(disk, bdev); if (ret) goto out_clear; } else { struct block_device *whole; whole = bdget_disk(disk, 0); ret = -ENOMEM; if (!whole) goto out_clear; BUG_ON(for_part); ret = __blkdev_get(whole, mode, 1); if (ret) goto out_clear; bdev->bd_contains = whole; bdev_inode_switch_bdi(bdev->bd_inode, whole->bd_inode->i_data.backing_dev_info); bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || !bdev->bd_part || !bdev->bd_part->nr_sects) { ret = -ENXIO; goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); } } else { if (bdev->bd_contains == bdev) { ret = 0; if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) rescan_partitions(bdev->bd_disk, bdev); if (ret) goto out_unlock_bdev; } /* only one opener holds refs to the module and disk */ module_put(disk->fops->owner); put_disk(disk); } bdev->bd_openers++; if (for_part) bdev->bd_part_count++; mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); return 0; out_clear: disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); module_put(disk->fops->owner); put_disk(disk); out: bdput(bdev); return ret; } /** * blkdev_get - open a block device * @bdev: block_device to open * @mode: FMODE_* mask * @holder: exclusive holder identifier * * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is * open with exclusive access. Specifying %FMODE_EXCL with %NULL * @holder is invalid. Exclusive opens may nest for the same @holder. * * On success, the reference count of @bdev is unchanged. On failure, * @bdev is put. * * CONTEXT: * Might sleep. * * RETURNS: * 0 on success, -errno on failure. */ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) { struct block_device *whole = NULL; int res; WARN_ON_ONCE((mode & FMODE_EXCL) && !holder); if ((mode & FMODE_EXCL) && holder) { whole = bd_start_claiming(bdev, holder); if (IS_ERR(whole)) { bdput(bdev); return PTR_ERR(whole); } } res = __blkdev_get(bdev, mode, 0); if (whole) { struct gendisk *disk = whole->bd_disk; /* finish claiming */ mutex_lock(&bdev->bd_mutex); spin_lock(&bdev_lock); if (!res) { BUG_ON(!bd_may_claim(bdev, whole, holder)); /* * Note that for a whole device bd_holders * will be incremented twice, and bd_holder * will be set to bd_may_claim before being * set to holder */ whole->bd_holders++; whole->bd_holder = bd_may_claim; bdev->bd_holders++; bdev->bd_holder = holder; } /* tell others that we're done */ BUG_ON(whole->bd_claiming != holder); whole->bd_claiming = NULL; wake_up_bit(&whole->bd_claiming, 0); spin_unlock(&bdev_lock); /* * Block event polling for write claims if requested. Any * write holder makes the write_holder state stick until * all are released. This is good enough and tracking * individual writeable reference is too fragile given the * way @mode is used in blkdev_get/put(). */ if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder && (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { bdev->bd_write_holder = true; disk_block_events(disk); } mutex_unlock(&bdev->bd_mutex); bdput(whole); } return res; } EXPORT_SYMBOL(blkdev_get); /** * blkdev_get_by_path - open a block device by name * @path: path to the block device to open * @mode: FMODE_* mask * @holder: exclusive holder identifier * * Open the blockdevice described by the device file at @path. @mode * and @holder are identical to blkdev_get(). * * On success, the returned block_device has reference count of one. * * CONTEXT: * Might sleep. * * RETURNS: * Pointer to block_device on success, ERR_PTR(-errno) on failure. */ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder) { struct block_device *bdev; int err; bdev = lookup_bdev(path); if (IS_ERR(bdev)) return bdev; err = blkdev_get(bdev, mode, holder); if (err) return ERR_PTR(err); if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { blkdev_put(bdev, mode); return ERR_PTR(-EACCES); } return bdev; } EXPORT_SYMBOL(blkdev_get_by_path); /** * blkdev_get_by_dev - open a block device by device number * @dev: device number of block device to open * @mode: FMODE_* mask * @holder: exclusive holder identifier * * Open the blockdevice described by device number @dev. @mode and * @holder are identical to blkdev_get(). * * Use it ONLY if you really do not have anything better - i.e. when * you are behind a truly sucky interface and all you are given is a * device number. _Never_ to be used for internal purposes. If you * ever need it - reconsider your API. * * On success, the returned block_device has reference count of one. * * CONTEXT: * Might sleep. * * RETURNS: * Pointer to block_device on success, ERR_PTR(-errno) on failure. */ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) { struct block_device *bdev; int err; bdev = bdget(dev); if (!bdev) return ERR_PTR(-ENOMEM); err = blkdev_get(bdev, mode, holder); if (err) return ERR_PTR(err); return bdev; } EXPORT_SYMBOL(blkdev_get_by_dev); static int blkdev_open(struct inode * inode, struct file * filp) { struct block_device *bdev; /* * Preserve backwards compatibility and allow large file access * even if userspace doesn't ask for it explicitly. Some mkfs * binary needs it. We might want to drop this workaround * during an unstable branch. */ filp->f_flags |= O_LARGEFILE; if (filp->f_flags & O_NDELAY) filp->f_mode |= FMODE_NDELAY; if (filp->f_flags & O_EXCL) filp->f_mode |= FMODE_EXCL; if ((filp->f_flags & O_ACCMODE) == 3) filp->f_mode |= FMODE_WRITE_IOCTL; bdev = bd_acquire(inode); if (bdev == NULL) return -ENOMEM; filp->f_mapping = bdev->bd_inode->i_mapping; return blkdev_get(bdev, filp->f_mode, filp); } static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) { int ret = 0; struct gendisk *disk = bdev->bd_disk; struct block_device *victim = NULL; mutex_lock_nested(&bdev->bd_mutex, for_part); if (for_part) bdev->bd_part_count--; if (!--bdev->bd_openers) { WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); /* ->release can cause the old bdi to disappear, * so must switch it out first */ bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); } if (bdev->bd_contains == bdev) { if (disk->fops->release) ret = disk->fops->release(disk, mode); } if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; put_disk(disk); module_put(owner); disk_put_part(bdev->bd_part); bdev->bd_part = NULL; bdev->bd_disk = NULL; if (bdev != bdev->bd_contains) victim = bdev->bd_contains; bdev->bd_contains = NULL; } mutex_unlock(&bdev->bd_mutex); bdput(bdev); if (victim) __blkdev_put(victim, mode, 1); return ret; } int blkdev_put(struct block_device *bdev, fmode_t mode) { mutex_lock(&bdev->bd_mutex); if (mode & FMODE_EXCL) { bool bdev_free; /* * Release a claim on the device. The holder fields * are protected with bdev_lock. bd_mutex is to * synchronize disk_holder unlinking. */ spin_lock(&bdev_lock); WARN_ON_ONCE(--bdev->bd_holders < 0); WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0); /* bd_contains might point to self, check in a separate step */ if ((bdev_free = !bdev->bd_holders)) bdev->bd_holder = NULL; if (!bdev->bd_contains->bd_holders) bdev->bd_contains->bd_holder = NULL; spin_unlock(&bdev_lock); /* * If this was the last claim, remove holder link and * unblock evpoll if it was a write holder. */ if (bdev_free && bdev->bd_write_holder) { disk_unblock_events(bdev->bd_disk); bdev->bd_write_holder = false; } } /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE * event. This is to ensure detection of media removal commanded * from userland - e.g. eject(1). */ disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE); mutex_unlock(&bdev->bd_mutex); return __blkdev_put(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_put); static int blkdev_close(struct inode * inode, struct file * filp) { struct block_device *bdev = I_BDEV(filp->f_mapping->host); return blkdev_put(bdev, filp->f_mode); } static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct block_device *bdev = I_BDEV(file->f_mapping->host); fmode_t mode = file->f_mode; /* * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have * to updated it before every ioctl. */ if (file->f_flags & O_NDELAY) mode |= FMODE_NDELAY; else mode &= ~FMODE_NDELAY; return blkdev_ioctl(bdev, mode, cmd, arg); } /* * Write data to the block device. Only intended for the block device itself * and the raw driver which basically is a fake block device. * * Does not take i_mutex for the write and thus is not for general purpose * use. */ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; ssize_t ret; BUG_ON(iocb->ki_pos != pos); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); if (ret > 0 || ret == -EIOCBQUEUED) { ssize_t err; err = generic_write_sync(file, pos, ret); if (err < 0 && ret > 0) ret = err; } return ret; } EXPORT_SYMBOL_GPL(blkdev_aio_write); /* * Try to release a page associated with block device when the system * is under memory pressure. */ static int blkdev_releasepage(struct page *page, gfp_t wait) { struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super; if (super && super->s_op->bdev_try_to_free_page) return super->s_op->bdev_try_to_free_page(super, page, wait); return try_to_free_buffers(page); } static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, .writepage = blkdev_writepage, .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, .writepages = generic_writepages, .releasepage = blkdev_releasepage, .direct_IO = blkdev_direct_IO, }; const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_close, .llseek = block_llseek, .read = do_sync_read, .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = blkdev_aio_write, .mmap = generic_file_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, #endif .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) { int res; mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); res = blkdev_ioctl(bdev, 0, cmd, arg); set_fs(old_fs); return res; } EXPORT_SYMBOL(ioctl_by_bdev); /** * lookup_bdev - lookup a struct block_device by name * @pathname: special file representing the block device * * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) * otherwise. */ struct block_device *lookup_bdev(const char *pathname) { struct block_device *bdev; struct inode *inode; struct path path; int error; if (!pathname || !*pathname) return ERR_PTR(-EINVAL); error = kern_path(pathname, LOOKUP_FOLLOW, &path); if (error) return ERR_PTR(error); inode = path.dentry->d_inode; error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; if (path.mnt->mnt_flags & MNT_NODEV) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); if (!bdev) goto fail; out: path_put(&path); return bdev; fail: bdev = ERR_PTR(error); goto out; } EXPORT_SYMBOL(lookup_bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty) { struct super_block *sb = get_super(bdev); int res = 0; if (sb) { /* * no need to lock the super, get_super holds the * read mutex so the filesystem cannot go away * under us (->put_super runs with the write lock * hold). */ shrink_dcache_sb(sb); res = invalidate_inodes(sb, kill_dirty); drop_super(sb); } invalidate_bdev(bdev); return res; } EXPORT_SYMBOL(__invalidate_device);