aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-17 11:38:30 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-17 11:38:30 -0500
commitc397f8fa4379040bada53256c848e62c8b060392 (patch)
tree8101efb5c0c3b0a73e5e65f3474843c0914cc4d0
parent796e1c55717e9a6ff5c81b12289ffa1ffd919b6f (diff)
parentaaaf5fbf56f16c81a653713cc333b18ad6e25ea9 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge fifth set of updates from Andrew Morton: - A few things which were awaiting merges from linux-next: - rtc - ocfs2 - misc others - Willy's "dax" feature: direct fs access to memory (mainly NV-DIMMs) which isn't backed by pageframes. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (37 commits) rtc: add driver for DS1685 family of real time clocks MAINTAINERS: add entry for Maxim PMICs on Samsung boards lib/Kconfig: use bool instead of boolean powerpc: drop _PAGE_FILE and pte_file()-related helpers ocfs2: set append dio as a ro compat feature ocfs2: wait for orphan recovery first once append O_DIRECT write crash ocfs2: complete the rest request through buffer io ocfs2: do not fallback to buffer I/O write if appending ocfs2: allocate blocks in ocfs2_direct_IO_get_blocks ocfs2: implement ocfs2_direct_IO_write ocfs2: add orphan recovery types in ocfs2_recover_orphans ocfs2: add functions to add and remove inode in orphan dir ocfs2: prepare some interfaces used in append direct io MAINTAINERS: fix spelling mistake & remove trailing WS dax: does not work correctly with virtual aliasing caches brd: rename XIP to DAX ext4: add DAX functionality dax: add dax_zero_page_range ext2: get rid of most mentions of XIP in ext2 ext2: remove ext2_aops_xip ...
-rw-r--r--Documentation/filesystems/00-INDEX5
-rw-r--r--Documentation/filesystems/Locking3
-rw-r--r--Documentation/filesystems/dax.txt94
-rw-r--r--Documentation/filesystems/ext2.txt5
-rw-r--r--Documentation/filesystems/ext4.txt4
-rw-r--r--Documentation/filesystems/vfs.txt7
-rw-r--r--Documentation/filesystems/xip.txt71
-rw-r--r--MAINTAINERS34
-rw-r--r--arch/arm/boot/dts/zynq-parallella.dts2
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc32.h9
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h5
-rw-r--r--arch/powerpc/include/asm/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/pte-40x.h1
-rw-r--r--arch/powerpc/include/asm/pte-44x.h5
-rw-r--r--arch/powerpc/include/asm/pte-8xx.h1
-rw-r--r--arch/powerpc/include/asm/pte-book3e.h1
-rw-r--r--arch/powerpc/include/asm/pte-fsl-booke.h3
-rw-r--r--arch/powerpc/include/asm/pte-hash32.h1
-rw-r--r--arch/powerpc/include/asm/pte-hash64.h1
-rw-r--r--arch/powerpc/mm/pgtable_64.c2
-rw-r--r--drivers/block/Kconfig13
-rw-r--r--drivers/block/brd.c14
-rw-r--r--drivers/rtc/Kconfig90
-rw-r--r--drivers/rtc/Makefile1
-rw-r--r--drivers/rtc/rtc-ds1685.c2252
-rw-r--r--drivers/rtc/rtc-isl12022.c3
-rw-r--r--drivers/rtc/rtc-isl12057.c3
-rw-r--r--drivers/staging/iio/light/isl29028.c4
-rw-r--r--fs/Kconfig22
-rw-r--r--fs/Makefile1
-rw-r--r--fs/dax.c534
-rw-r--r--fs/exofs/inode.c1
-rw-r--r--fs/ext2/Kconfig11
-rw-r--r--fs/ext2/Makefile1
-rw-r--r--fs/ext2/ext2.h10
-rw-r--r--fs/ext2/file.c44
-rw-r--r--fs/ext2/inode.c38
-rw-r--r--fs/ext2/namei.c13
-rw-r--r--fs/ext2/super.c53
-rw-r--r--fs/ext2/xip.c86
-rw-r--r--fs/ext2/xip.h26
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/file.c49
-rw-r--r--fs/ext4/indirect.c18
-rw-r--r--fs/ext4/inode.c89
-rw-r--r--fs/ext4/namei.c10
-rw-r--r--fs/ext4/super.c39
-rw-r--r--fs/ocfs2/aops.c242
-rw-r--r--fs/ocfs2/file.c76
-rw-r--r--fs/ocfs2/file.h9
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/journal.c110
-rw-r--r--fs/ocfs2/journal.h5
-rw-r--r--fs/ocfs2/namei.c284
-rw-r--r--fs/ocfs2/namei.h8
-rw-r--r--fs/ocfs2/ocfs2.h23
-rw-r--r--fs/ocfs2/ocfs2_fs.h14
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/open.c5
-rw-r--r--include/linux/fs.h34
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/rmap.h2
-rw-r--r--include/linux/rtc/ds1685.h375
-rw-r--r--lib/Kconfig2
-rw-r--r--mm/Makefile1
-rw-r--r--mm/fadvise.c6
-rw-r--r--mm/filemap.c25
-rw-r--r--mm/filemap_xip.c478
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memory.c42
-rwxr-xr-xscripts/diffconfig1
72 files changed, 4491 insertions, 946 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index ac28149aede4..9922939e7d99 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -34,6 +34,9 @@ configfs/
34 - directory containing configfs documentation and example code. 34 - directory containing configfs documentation and example code.
35cramfs.txt 35cramfs.txt
36 - info on the cram filesystem for small storage (ROMs etc). 36 - info on the cram filesystem for small storage (ROMs etc).
37dax.txt
38 - info on avoiding the page cache for files stored on CPU-addressable
39 storage devices.
37debugfs.txt 40debugfs.txt
38 - info on the debugfs filesystem. 41 - info on the debugfs filesystem.
39devpts.txt 42devpts.txt
@@ -154,5 +157,3 @@ xfs-self-describing-metadata.txt
154 - info on XFS Self Describing Metadata. 157 - info on XFS Self Describing Metadata.
155xfs.txt 158xfs.txt
156 - info and mount options for the XFS filesystem. 159 - info and mount options for the XFS filesystem.
157xip.txt
158 - info on execute-in-place for file mappings.
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index b30753cbf431..2ca3d17eee56 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -199,8 +199,6 @@ prototypes:
199 int (*releasepage) (struct page *, int); 199 int (*releasepage) (struct page *, int);
200 void (*freepage)(struct page *); 200 void (*freepage)(struct page *);
201 int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); 201 int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
202 int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
203 unsigned long *);
204 int (*migratepage)(struct address_space *, struct page *, struct page *); 202 int (*migratepage)(struct address_space *, struct page *, struct page *);
205 int (*launder_page)(struct page *); 203 int (*launder_page)(struct page *);
206 int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); 204 int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
@@ -225,7 +223,6 @@ invalidatepage: yes
225releasepage: yes 223releasepage: yes
226freepage: yes 224freepage: yes
227direct_IO: 225direct_IO:
228get_xip_mem: maybe
229migratepage: yes (both) 226migratepage: yes (both)
230launder_page: yes 227launder_page: yes
231is_partially_uptodate: yes 228is_partially_uptodate: yes
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
new file mode 100644
index 000000000000..baf41118660d
--- /dev/null
+++ b/Documentation/filesystems/dax.txt
@@ -0,0 +1,94 @@
1Direct Access for files
2-----------------------
3
4Motivation
5----------
6
7The page cache is usually used to buffer reads and writes to files.
8It is also used to provide the pages which are mapped into userspace
9by a call to mmap.
10
11For block devices that are memory-like, the page cache pages would be
12unnecessary copies of the original storage. The DAX code removes the
13extra copy by performing reads and writes directly to the storage device.
14For file mappings, the storage device is mapped directly into userspace.
15
16
17Usage
18-----
19
20If you have a block device which supports DAX, you can make a filesystem
21on it as usual. When mounting it, use the -o dax option manually
22or add 'dax' to the options in /etc/fstab.
23
24
25Implementation Tips for Block Driver Writers
26--------------------------------------------
27
28To support DAX in your block driver, implement the 'direct_access'
29block device operation. It is used to translate the sector number
30(expressed in units of 512-byte sectors) to a page frame number (pfn)
31that identifies the physical page for the memory. It also returns a
32kernel virtual address that can be used to access the memory.
33
34The direct_access method takes a 'size' parameter that indicates the
35number of bytes being requested. The function should return the number
36of bytes that can be contiguously accessed at that offset. It may also
37return a negative errno if an error occurs.
38
39In order to support this method, the storage must be byte-accessible by
40the CPU at all times. If your device uses paging techniques to expose
41a large amount of memory through a smaller window, then you cannot
42implement direct_access. Equally, if your device can occasionally
43stall the CPU for an extended period, you should also not attempt to
44implement direct_access.
45
46These block devices may be used for inspiration:
47- axonram: Axon DDR2 device driver
48- brd: RAM backed block device driver
49- dcssblk: s390 dcss block device driver
50
51
52Implementation Tips for Filesystem Writers
53------------------------------------------
54
55Filesystem support consists of
56- adding support to mark inodes as being DAX by setting the S_DAX flag in
57 i_flags
58- implementing the direct_IO address space operation, and calling
59 dax_do_io() instead of blockdev_direct_IO() if S_DAX is set
60- implementing an mmap file operation for DAX files which sets the
61 VM_MIXEDMAP flag on the VMA, and setting the vm_ops to include handlers
62 for fault and page_mkwrite (which should probably call dax_fault() and
63 dax_mkwrite(), passing the appropriate get_block() callback)
64- calling dax_truncate_page() instead of block_truncate_page() for DAX files
65- calling dax_zero_page_range() instead of zero_user() for DAX files
66- ensuring that there is sufficient locking between reads, writes,
67 truncates and page faults
68
69The get_block() callback passed to the DAX functions may return
70uninitialised extents. If it does, it must ensure that simultaneous
71calls to get_block() (for example by a page-fault racing with a read()
72or a write()) work correctly.
73
74These filesystems may be used for inspiration:
75- ext2: the second extended filesystem, see Documentation/filesystems/ext2.txt
76- ext4: the fourth extended filesystem, see Documentation/filesystems/ext4.txt
77
78
79Shortcomings
80------------
81
82Even if the kernel or its modules are stored on a filesystem that supports
83DAX on a block device that supports DAX, they will still be copied into RAM.
84
85The DAX code does not work correctly on architectures which have virtually
86mapped caches such as ARM, MIPS and SPARC.
87
88Calling get_user_pages() on a range of user memory that has been mmaped
89from a DAX file will fail as there are no 'struct page' to describe
90those pages. This problem is being worked on. That means that O_DIRECT
91reads/writes to those memory ranges from a non-DAX file will fail (note
92that O_DIRECT reads/writes _of a DAX file_ do work, it is the memory
93that is being accessed that is key here). Other things that will not
94work include RDMA, sendfile() and splice().
diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.txt
index 67639f905f10..b9714569e472 100644
--- a/Documentation/filesystems/ext2.txt
+++ b/Documentation/filesystems/ext2.txt
@@ -20,6 +20,9 @@ minixdf Makes `df' act like Minix.
20check=none, nocheck (*) Don't do extra checking of bitmaps on mount 20check=none, nocheck (*) Don't do extra checking of bitmaps on mount
21 (check=normal and check=strict options removed) 21 (check=normal and check=strict options removed)
22 22
23dax Use direct access (no page cache). See
24 Documentation/filesystems/dax.txt.
25
23debug Extra debugging information is sent to the 26debug Extra debugging information is sent to the
24 kernel syslog. Useful for developers. 27 kernel syslog. Useful for developers.
25 28
@@ -56,8 +59,6 @@ noacl Don't support POSIX ACLs.
56 59
57nobh Do not attach buffer_heads to file pagecache. 60nobh Do not attach buffer_heads to file pagecache.
58 61
59xip Use execute in place (no caching) if possible
60
61grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. 62grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2.
62 63
63 64
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 919a3293aaa4..6c0108eb0137 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -386,6 +386,10 @@ max_dir_size_kb=n This limits the size of directories so that any
386i_version Enable 64-bit inode version support. This option is 386i_version Enable 64-bit inode version support. This option is
387 off by default. 387 off by default.
388 388
389dax Use direct access (no page cache). See
390 Documentation/filesystems/dax.txt. Note that
391 this option is incompatible with data=journal.
392
389Data Mode 393Data Mode
390========= 394=========
391There are 3 different data modes: 395There are 3 different data modes:
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 43ce0507ee25..966b22829f3b 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -591,8 +591,6 @@ struct address_space_operations {
591 int (*releasepage) (struct page *, int); 591 int (*releasepage) (struct page *, int);
592 void (*freepage)(struct page *); 592 void (*freepage)(struct page *);
593 ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); 593 ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
594 struct page* (*get_xip_page)(struct address_space *, sector_t,
595 int);
596 /* migrate the contents of a page to the specified target */ 594 /* migrate the contents of a page to the specified target */
597 int (*migratepage) (struct page *, struct page *); 595 int (*migratepage) (struct page *, struct page *);
598 int (*launder_page) (struct page *); 596 int (*launder_page) (struct page *);
@@ -748,11 +746,6 @@ struct address_space_operations {
748 and transfer data directly between the storage and the 746 and transfer data directly between the storage and the
749 application's address space. 747 application's address space.
750 748
751 get_xip_page: called by the VM to translate a block number to a page.
752 The page is valid until the corresponding filesystem is unmounted.
753 Filesystems that want to use execute-in-place (XIP) need to implement
754 it. An example implementation can be found in fs/ext2/xip.c.
755
756 migrate_page: This is used to compact the physical memory usage. 749 migrate_page: This is used to compact the physical memory usage.
757 If the VM wants to relocate a page (maybe off a memory card 750 If the VM wants to relocate a page (maybe off a memory card
758 that is signalling imminent failure) it will pass a new page 751 that is signalling imminent failure) it will pass a new page
diff --git a/Documentation/filesystems/xip.txt b/Documentation/filesystems/xip.txt
deleted file mode 100644
index b77472949ede..000000000000
--- a/Documentation/filesystems/xip.txt
+++ /dev/null
@@ -1,71 +0,0 @@
1Execute-in-place for file mappings
2----------------------------------
3
4Motivation
5----------
6File mappings are performed by mapping page cache pages to userspace. In
7addition, read&write type file operations also transfer data from/to the page
8cache.
9
10For memory backed storage devices that use the block device interface, the page
11cache pages are in fact copies of the original storage. Various approaches
12exist to work around the need for an extra copy. The ramdisk driver for example
13does read the data into the page cache, keeps a reference, and discards the
14original data behind later on.
15
16Execute-in-place solves this issue the other way around: instead of keeping
17data in the page cache, the need to have a page cache copy is eliminated
18completely. With execute-in-place, read&write type operations are performed
19directly from/to the memory backed storage device. For file mappings, the
20storage device itself is mapped directly into userspace.
21
22This implementation was initially written for shared memory segments between
23different virtual machines on s390 hardware to allow multiple machines to
24share the same binaries and libraries.
25
26Implementation
27--------------
28Execute-in-place is implemented in three steps: block device operation,
29address space operation, and file operations.
30
31A block device operation named direct_access is used to translate the
32block device sector number to a page frame number (pfn) that identifies
33the physical page for the memory. It also returns a kernel virtual
34address that can be used to access the memory.
35
36The direct_access method takes a 'size' parameter that indicates the
37number of bytes being requested. The function should return the number
38of bytes that can be contiguously accessed at that offset. It may also
39return a negative errno if an error occurs.
40
41The block device operation is optional, these block devices support it as of
42today:
43- dcssblk: s390 dcss block device driver
44
45An address space operation named get_xip_mem is used to retrieve references
46to a page frame number and a kernel address. To obtain these values a reference
47to an address_space is provided. This function assigns values to the kmem and
48pfn parameters. The third argument indicates whether the function should allocate
49blocks if needed.
50
51This address space operation is mutually exclusive with readpage&writepage that
52do page cache read/write operations.
53The following filesystems support it as of today:
54- ext2: the second extended filesystem, see Documentation/filesystems/ext2.txt
55
56A set of file operations that do utilize get_xip_page can be found in
57mm/filemap_xip.c . The following file operation implementations are provided:
58- aio_read/aio_write
59- readv/writev
60- sendfile
61
62The generic file operations do_sync_read/do_sync_write can be used to implement
63classic synchronous IO calls.
64
65Shortcomings
66------------
67This implementation is limited to storage devices that are cpu addressable at
68all times (no highmem or such). It works well on rom/ram, but enhancements are
69needed to make it work with flash in read+write mode.
70Putting the Linux kernel and/or its modules on a xip filesystem does not mean
71they are not copied.
diff --git a/MAINTAINERS b/MAINTAINERS
index 0beaaac20a83..e75c21840815 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -34,7 +34,7 @@ trivial patch so apply some common sense.
34 generalized kernel feature ready for next time. 34 generalized kernel feature ready for next time.
35 35
36 PLEASE check your patch with the automated style checker 36 PLEASE check your patch with the automated style checker
37 (scripts/checkpatch.pl) to catch trival style violations. 37 (scripts/checkpatch.pl) to catch trivial style violations.
38 See Documentation/CodingStyle for guidance here. 38 See Documentation/CodingStyle for guidance here.
39 39
40 PLEASE CC: the maintainers and mailing lists that are generated 40 PLEASE CC: the maintainers and mailing lists that are generated
@@ -2965,6 +2965,12 @@ S: Supported
2965F: drivers/input/touchscreen/cyttsp* 2965F: drivers/input/touchscreen/cyttsp*
2966F: include/linux/input/cyttsp.h 2966F: include/linux/input/cyttsp.h
2967 2967
2968DALLAS/MAXIM DS1685-FAMILY REAL TIME CLOCK
2969M: Joshua Kinard <kumba@gentoo.org>
2970S: Maintained
2971F: drivers/rtc/rtc-ds1685.c
2972F: include/linux/rtc/ds1685.h
2973
2968DAMA SLAVE for AX.25 2974DAMA SLAVE for AX.25
2969M: Joerg Reuter <jreuter@yaina.de> 2975M: Joerg Reuter <jreuter@yaina.de>
2970W: http://yaina.de/jreuter/ 2976W: http://yaina.de/jreuter/
@@ -3153,6 +3159,12 @@ L: linux-i2c@vger.kernel.org
3153S: Maintained 3159S: Maintained
3154F: drivers/i2c/busses/i2c-diolan-u2c.c 3160F: drivers/i2c/busses/i2c-diolan-u2c.c
3155 3161
3162DIRECT ACCESS (DAX)
3163M: Matthew Wilcox <willy@linux.intel.com>
3164L: linux-fsdevel@vger.kernel.org
3165S: Supported
3166F: fs/dax.c
3167
3156DIRECTORY NOTIFICATION (DNOTIFY) 3168DIRECTORY NOTIFICATION (DNOTIFY)
3157M: Eric Paris <eparis@parisplace.org> 3169M: Eric Paris <eparis@parisplace.org>
3158S: Maintained 3170S: Maintained
@@ -6212,6 +6224,26 @@ S: Supported
6212F: drivers/power/max14577_charger.c 6224F: drivers/power/max14577_charger.c
6213F: drivers/power/max77693_charger.c 6225F: drivers/power/max77693_charger.c
6214 6226
6227MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
6228M: Chanwoo Choi <cw00.choi@samsung.com>
6229M: Krzysztof Kozlowski <k.kozlowski@samsung.com>
6230L: linux-kernel@vger.kernel.org
6231S: Supported
6232F: drivers/*/max14577.c
6233F: drivers/*/max77686.c
6234F: drivers/*/max77693.c
6235F: drivers/extcon/extcon-max14577.c
6236F: drivers/extcon/extcon-max77693.c
6237F: drivers/rtc/rtc-max77686.c
6238F: drivers/clk/clk-max77686.c
6239F: Documentation/devicetree/bindings/mfd/max14577.txt
6240F: Documentation/devicetree/bindings/mfd/max77686.txt
6241F: Documentation/devicetree/bindings/mfd/max77693.txt
6242F: Documentation/devicetree/bindings/clock/maxim,max77686.txt
6243F: include/linux/mfd/max14577*.h
6244F: include/linux/mfd/max77686*.h
6245F: include/linux/mfd/max77693*.h
6246
6215MAXIRADIO FM RADIO RECEIVER DRIVER 6247MAXIRADIO FM RADIO RECEIVER DRIVER
6216M: Hans Verkuil <hverkuil@xs4all.nl> 6248M: Hans Verkuil <hverkuil@xs4all.nl>
6217L: linux-media@vger.kernel.org 6249L: linux-media@vger.kernel.org
diff --git a/arch/arm/boot/dts/zynq-parallella.dts b/arch/arm/boot/dts/zynq-parallella.dts
index ab1dc0a56cdd..174571232ea5 100644
--- a/arch/arm/boot/dts/zynq-parallella.dts
+++ b/arch/arm/boot/dts/zynq-parallella.dts
@@ -58,7 +58,7 @@
58 status = "okay"; 58 status = "okay";
59 59
60 isl9305: isl9305@68 { 60 isl9305: isl9305@68 {
61 compatible = "isl,isl9305"; 61 compatible = "isil,isl9305";
62 reg = <0x68>; 62 reg = <0x68>;
63 63
64 regulators { 64 regulators {
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index 14bdcbd31670..64b52b1cf542 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -333,8 +333,8 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
333/* 333/*
334 * Encode and decode a swap entry. 334 * Encode and decode a swap entry.
335 * Note that the bits we use in a PTE for representing a swap entry 335 * Note that the bits we use in a PTE for representing a swap entry
336 * must not include the _PAGE_PRESENT bit, the _PAGE_FILE bit, or the 336 * must not include the _PAGE_PRESENT bit or the _PAGE_HASHPTE bit (if used).
337 *_PAGE_HASHPTE bit (if used). -- paulus 337 * -- paulus
338 */ 338 */
339#define __swp_type(entry) ((entry).val & 0x1f) 339#define __swp_type(entry) ((entry).val & 0x1f)
340#define __swp_offset(entry) ((entry).val >> 5) 340#define __swp_offset(entry) ((entry).val >> 5)
@@ -342,11 +342,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
342#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) 342#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
343#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) 343#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
344 344
345/* Encode and decode a nonlinear file mapping entry */
346#define PTE_FILE_MAX_BITS 29
347#define pte_to_pgoff(pte) (pte_val(pte) >> 3)
348#define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE })
349
350#ifndef CONFIG_PPC_4K_PAGES 345#ifndef CONFIG_PPC_4K_PAGES
351void pgtable_cache_init(void); 346void pgtable_cache_init(void);
352#else 347#else
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index d46532ccc386..43e6ad424c7f 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -352,9 +352,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
352#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)}) 352#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)})
353#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT}) 353#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT})
354#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT }) 354#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT })
355#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_RPN_SHIFT)
356#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
357#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT)
358 355
359void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); 356void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
360void pgtable_cache_init(void); 357void pgtable_cache_init(void);
@@ -389,7 +386,7 @@ void pgtable_cache_init(void);
389 * The last three bits are intentionally left to zero. This memory location 386 * The last three bits are intentionally left to zero. This memory location
390 * are also used as normal page PTE pointers. So if we have any pointers 387 * are also used as normal page PTE pointers. So if we have any pointers
391 * left around while we collapse a hugepage, we need to make sure 388 * left around while we collapse a hugepage, we need to make sure
392 * _PAGE_PRESENT and _PAGE_FILE bits of that are zero when we look at them 389 * _PAGE_PRESENT bit of that is zero when we look at them
393 */ 390 */
394static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) 391static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
395{ 392{
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 79fee2eb8d56..9835ac4173b7 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -34,7 +34,6 @@ static inline int pte_write(pte_t pte)
34{ return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; } 34{ return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; }
35static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } 35static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
36static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 36static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
37static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
38static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } 37static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
39static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } 38static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
40static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } 39static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
diff --git a/arch/powerpc/include/asm/pte-40x.h b/arch/powerpc/include/asm/pte-40x.h
index ec0b0b0d1df9..486b1ef81338 100644
--- a/arch/powerpc/include/asm/pte-40x.h
+++ b/arch/powerpc/include/asm/pte-40x.h
@@ -38,7 +38,6 @@
38 */ 38 */
39 39
40#define _PAGE_GUARDED 0x001 /* G: page is guarded from prefetch */ 40#define _PAGE_GUARDED 0x001 /* G: page is guarded from prefetch */
41#define _PAGE_FILE 0x001 /* when !present: nonlinear file mapping */
42#define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */ 41#define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */
43#define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */ 42#define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */
44#define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */ 43#define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */
diff --git a/arch/powerpc/include/asm/pte-44x.h b/arch/powerpc/include/asm/pte-44x.h
index 4192b9bad901..36f75fab23f5 100644
--- a/arch/powerpc/include/asm/pte-44x.h
+++ b/arch/powerpc/include/asm/pte-44x.h
@@ -44,9 +44,6 @@
44 * - PRESENT *must* be in the bottom three bits because swap cache 44 * - PRESENT *must* be in the bottom three bits because swap cache
45 * entries use the top 29 bits for TLB2. 45 * entries use the top 29 bits for TLB2.
46 * 46 *
47 * - FILE *must* be in the bottom three bits because swap cache
48 * entries use the top 29 bits for TLB2.
49 *
50 * - CACHE COHERENT bit (M) has no effect on original PPC440 cores, 47 * - CACHE COHERENT bit (M) has no effect on original PPC440 cores,
51 * because it doesn't support SMP. However, some later 460 variants 48 * because it doesn't support SMP. However, some later 460 variants
52 * have -some- form of SMP support and so I keep the bit there for 49 * have -some- form of SMP support and so I keep the bit there for
@@ -68,7 +65,6 @@
68 * 65 *
69 * There are three protection bits available for SWAP entry: 66 * There are three protection bits available for SWAP entry:
70 * _PAGE_PRESENT 67 * _PAGE_PRESENT
71 * _PAGE_FILE
72 * _PAGE_HASHPTE (if HW has) 68 * _PAGE_HASHPTE (if HW has)
73 * 69 *
74 * So those three bits have to be inside of 0-2nd LSB of PTE. 70 * So those three bits have to be inside of 0-2nd LSB of PTE.
@@ -77,7 +73,6 @@
77 73
78#define _PAGE_PRESENT 0x00000001 /* S: PTE valid */ 74#define _PAGE_PRESENT 0x00000001 /* S: PTE valid */
79#define _PAGE_RW 0x00000002 /* S: Write permission */ 75#define _PAGE_RW 0x00000002 /* S: Write permission */
80#define _PAGE_FILE 0x00000004 /* S: nonlinear file mapping */
81#define _PAGE_EXEC 0x00000004 /* H: Execute permission */ 76#define _PAGE_EXEC 0x00000004 /* H: Execute permission */
82#define _PAGE_ACCESSED 0x00000008 /* S: Page referenced */ 77#define _PAGE_ACCESSED 0x00000008 /* S: Page referenced */
83#define _PAGE_DIRTY 0x00000010 /* S: Page dirty */ 78#define _PAGE_DIRTY 0x00000010 /* S: Page dirty */
diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h
index eb6edb44f140..97bae64afdaa 100644
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ b/arch/powerpc/include/asm/pte-8xx.h
@@ -29,7 +29,6 @@
29 29
30/* Definitions for 8xx embedded chips. */ 30/* Definitions for 8xx embedded chips. */
31#define _PAGE_PRESENT 0x0001 /* Page is valid */ 31#define _PAGE_PRESENT 0x0001 /* Page is valid */
32#define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */
33#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ 32#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */
34#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */ 33#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */
35#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */ 34#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */
diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h
index 576ad88104cb..91a704952ca1 100644
--- a/arch/powerpc/include/asm/pte-book3e.h
+++ b/arch/powerpc/include/asm/pte-book3e.h
@@ -10,7 +10,6 @@
10 10
11/* Architected bits */ 11/* Architected bits */
12#define _PAGE_PRESENT 0x000001 /* software: pte contains a translation */ 12#define _PAGE_PRESENT 0x000001 /* software: pte contains a translation */
13#define _PAGE_FILE 0x000002 /* (!present only) software: pte holds file offset */
14#define _PAGE_SW1 0x000002 13#define _PAGE_SW1 0x000002
15#define _PAGE_BAP_SR 0x000004 14#define _PAGE_BAP_SR 0x000004
16#define _PAGE_BAP_UR 0x000008 15#define _PAGE_BAP_UR 0x000008
diff --git a/arch/powerpc/include/asm/pte-fsl-booke.h b/arch/powerpc/include/asm/pte-fsl-booke.h
index e84dd7ed505e..9f5c3d04a1a3 100644
--- a/arch/powerpc/include/asm/pte-fsl-booke.h
+++ b/arch/powerpc/include/asm/pte-fsl-booke.h
@@ -13,14 +13,11 @@
13 - PRESENT *must* be in the bottom three bits because swap cache 13 - PRESENT *must* be in the bottom three bits because swap cache
14 entries use the top 29 bits. 14 entries use the top 29 bits.
15 15
16 - FILE *must* be in the bottom three bits because swap cache
17 entries use the top 29 bits.
18*/ 16*/
19 17
20/* Definitions for FSL Book-E Cores */ 18/* Definitions for FSL Book-E Cores */
21#define _PAGE_PRESENT 0x00001 /* S: PTE contains a translation */ 19#define _PAGE_PRESENT 0x00001 /* S: PTE contains a translation */
22#define _PAGE_USER 0x00002 /* S: User page (maps to UR) */ 20#define _PAGE_USER 0x00002 /* S: User page (maps to UR) */
23#define _PAGE_FILE 0x00002 /* S: when !present: nonlinear file mapping */
24#define _PAGE_RW 0x00004 /* S: Write permission (SW) */ 21#define _PAGE_RW 0x00004 /* S: Write permission (SW) */
25#define _PAGE_DIRTY 0x00008 /* S: Page dirty */ 22#define _PAGE_DIRTY 0x00008 /* S: Page dirty */
26#define _PAGE_EXEC 0x00010 /* H: SX permission */ 23#define _PAGE_EXEC 0x00010 /* H: SX permission */
diff --git a/arch/powerpc/include/asm/pte-hash32.h b/arch/powerpc/include/asm/pte-hash32.h
index 4aad4132d0a8..62cfb0c663bb 100644
--- a/arch/powerpc/include/asm/pte-hash32.h
+++ b/arch/powerpc/include/asm/pte-hash32.h
@@ -18,7 +18,6 @@
18 18
19#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ 19#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */
20#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ 20#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */
21#define _PAGE_FILE 0x004 /* when !present: nonlinear file mapping */
22#define _PAGE_USER 0x004 /* usermode access allowed */ 21#define _PAGE_USER 0x004 /* usermode access allowed */
23#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ 22#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */
24#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ 23#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */
diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h
index 55aea0caf95e..fc852f7e7b3a 100644
--- a/arch/powerpc/include/asm/pte-hash64.h
+++ b/arch/powerpc/include/asm/pte-hash64.h
@@ -16,7 +16,6 @@
16 */ 16 */
17#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */ 17#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
18#define _PAGE_USER 0x0002 /* matches one of the PP bits */ 18#define _PAGE_USER 0x0002 /* matches one of the PP bits */
19#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
20#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */ 19#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
21#define _PAGE_GUARDED 0x0008 20#define _PAGE_GUARDED 0x0008
22/* We can derive Memory coherence from _PAGE_NO_CACHE */ 21/* We can derive Memory coherence from _PAGE_NO_CACHE */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 91bb8836825a..6957cc1ca0a7 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -782,7 +782,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
782{ 782{
783 pmd_t pmd; 783 pmd_t pmd;
784 /* 784 /*
785 * For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always 785 * For a valid pte, we would have _PAGE_PRESENT always
786 * set. We use this to check THP page at pmd level. 786 * set. We use this to check THP page at pmd level.
787 * leaf pte for huge page, bottom two bits != 00 787 * leaf pte for huge page, bottom two bits != 00
788 */ 788 */
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 014a1cfc41c5..1b8094d4d7af 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -393,14 +393,15 @@ config BLK_DEV_RAM_SIZE
393 The default value is 4096 kilobytes. Only change this if you know 393 The default value is 4096 kilobytes. Only change this if you know
394 what you are doing. 394 what you are doing.
395 395
396config BLK_DEV_XIP 396config BLK_DEV_RAM_DAX
397 bool "Support XIP filesystems on RAM block device" 397 bool "Support Direct Access (DAX) to RAM block devices"
398 depends on BLK_DEV_RAM 398 depends on BLK_DEV_RAM && FS_DAX
399 default n 399 default n
400 help 400 help
401 Support XIP filesystems (such as ext2 with XIP support on) on 401 Support filesystems using DAX to access RAM block devices. This
402 top of block ram device. This will slightly enlarge the kernel, and 402 avoids double-buffering data in the page cache before copying it
403 will prevent RAM block device backing store memory from being 403 to the block device. Answering Y will slightly enlarge the kernel,
404 and will prevent RAM block device backing store memory from being
404 allocated from highmem (only a problem for highmem systems). 405 allocated from highmem (only a problem for highmem systems).
405 406
406config CDROM_PKTCDVD 407config CDROM_PKTCDVD
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index c01b921b1b4a..64ab4951e9d6 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -97,13 +97,13 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
97 * Must use NOIO because we don't want to recurse back into the 97 * Must use NOIO because we don't want to recurse back into the
98 * block or filesystem layers from page reclaim. 98 * block or filesystem layers from page reclaim.
99 * 99 *
100 * Cannot support XIP and highmem, because our ->direct_access 100 * Cannot support DAX and highmem, because our ->direct_access
101 * routine for XIP must return memory that is always addressable. 101 * routine for DAX must return memory that is always addressable.
102 * If XIP was reworked to use pfns and kmap throughout, this 102 * If DAX was reworked to use pfns and kmap throughout, this
103 * restriction might be able to be lifted. 103 * restriction might be able to be lifted.
104 */ 104 */
105 gfp_flags = GFP_NOIO | __GFP_ZERO; 105 gfp_flags = GFP_NOIO | __GFP_ZERO;
106#ifndef CONFIG_BLK_DEV_XIP 106#ifndef CONFIG_BLK_DEV_RAM_DAX
107 gfp_flags |= __GFP_HIGHMEM; 107 gfp_flags |= __GFP_HIGHMEM;
108#endif 108#endif
109 page = alloc_page(gfp_flags); 109 page = alloc_page(gfp_flags);
@@ -369,7 +369,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
369 return err; 369 return err;
370} 370}
371 371
372#ifdef CONFIG_BLK_DEV_XIP 372#ifdef CONFIG_BLK_DEV_RAM_DAX
373static long brd_direct_access(struct block_device *bdev, sector_t sector, 373static long brd_direct_access(struct block_device *bdev, sector_t sector,
374 void **kaddr, unsigned long *pfn, long size) 374 void **kaddr, unsigned long *pfn, long size)
375{ 375{
@@ -390,6 +390,8 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
390 */ 390 */
391 return PAGE_SIZE; 391 return PAGE_SIZE;
392} 392}
393#else
394#define brd_direct_access NULL
393#endif 395#endif
394 396
395static int brd_ioctl(struct block_device *bdev, fmode_t mode, 397static int brd_ioctl(struct block_device *bdev, fmode_t mode,
@@ -430,9 +432,7 @@ static const struct block_device_operations brd_fops = {
430 .owner = THIS_MODULE, 432 .owner = THIS_MODULE,
431 .rw_page = brd_rw_page, 433 .rw_page = brd_rw_page,
432 .ioctl = brd_ioctl, 434 .ioctl = brd_ioctl,
433#ifdef CONFIG_BLK_DEV_XIP
434 .direct_access = brd_direct_access, 435 .direct_access = brd_direct_access,
435#endif
436}; 436};
437 437
438/* 438/*
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 3bc9ddbe5cf7..0cf2e1d9cb17 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -801,6 +801,96 @@ config RTC_DRV_DS1553
801 This driver can also be built as a module. If so, the module 801 This driver can also be built as a module. If so, the module
802 will be called rtc-ds1553. 802 will be called rtc-ds1553.
803 803
804config RTC_DRV_DS1685_FAMILY
805 tristate "Dallas/Maxim DS1685 Family"
806 help
807 If you say yes here you get support for the Dallas/Maxim DS1685
808 family of real time chips. This family includes the DS1685/DS1687,
809 DS1689/DS1693, DS17285/DS17287, DS17485/DS17487, and
810 DS17885/DS17887 chips.
811
812 This driver can also be built as a module. If so, the module
813 will be called rtc-ds1685.
814
815choice
816 prompt "Subtype"
817 depends on RTC_DRV_DS1685_FAMILY
818 default RTC_DRV_DS1685
819
820config RTC_DRV_DS1685
821 bool "DS1685/DS1687"
822 help
823 This enables support for the Dallas/Maxim DS1685/DS1687 real time
824 clock chip.
825
826 This chip is commonly found in SGI O2 (IP32) and SGI Octane (IP30)
827 systems, as well as EPPC-405-UC modules by electronic system design
828 GmbH.
829
830config RTC_DRV_DS1689
831 bool "DS1689/DS1693"
832 help
833 This enables support for the Dallas/Maxim DS1689/DS1693 real time
834 clock chip.
835
836 This is an older RTC chip, supplanted by the DS1685/DS1687 above,
837 which supports a few minor features such as Vcc, Vbat, and Power
838 Cycle counters, plus a customer-specific, 8-byte ROM/Serial number.
839
840 It also works for the even older DS1688/DS1691 RTC chips, which are
841 virtually the same and carry the same model number. Both chips
842 have 114 bytes of user NVRAM.
843
844config RTC_DRV_DS17285
845 bool "DS17285/DS17287"
846 help
847 This enables support for the Dallas/Maxim DS17285/DS17287 real time
848 clock chip.
849
850 This chip features 2kb of extended NV-SRAM. It may possibly be
851 found in some SGI O2 systems (rare).
852
853config RTC_DRV_DS17485
854 bool "DS17485/DS17487"
855 help
856 This enables support for the Dallas/Maxim DS17485/DS17487 real time
857 clock chip.
858
859 This chip features 4kb of extended NV-SRAM.
860
861config RTC_DRV_DS17885
862 bool "DS17885/DS17887"
863 help
864 This enables support for the Dallas/Maxim DS17885/DS17887 real time
865 clock chip.
866
867 This chip features 8kb of extended NV-SRAM.
868
869endchoice
870
871config RTC_DS1685_PROC_REGS
872 bool "Display register values in /proc"
873 depends on RTC_DRV_DS1685_FAMILY && PROC_FS
874 help
875 Enable this to display a readout of all of the RTC registers in
876 /proc/drivers/rtc. Keep in mind that this can potentially lead
877 to lost interrupts, as reading Control Register C will clear
878 all pending IRQ flags.
879
880 Unless you are debugging this driver, choose N.
881
882config RTC_DS1685_SYSFS_REGS
883 bool "SysFS access to RTC register bits"
884 depends on RTC_DRV_DS1685_FAMILY && SYSFS
885 help
886 Enable this to provide access to the RTC control register bits
887 in /sys. Some of the bits are read-write, others are read-only.
888
889 Keep in mind that reading Control C's bits automatically clears
890 all pending IRQ flags - this can cause lost interrupts.
891
892 If you know that you need access to these bits, choose Y, Else N.
893
804config RTC_DRV_DS1742 894config RTC_DRV_DS1742
805 tristate "Maxim/Dallas DS1742/1743" 895 tristate "Maxim/Dallas DS1742/1743"
806 depends on HAS_IOMEM 896 depends on HAS_IOMEM
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 99ded8b75e95..69c87062b098 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_RTC_DRV_DS1390) += rtc-ds1390.o
54obj-$(CONFIG_RTC_DRV_DS1511) += rtc-ds1511.o 54obj-$(CONFIG_RTC_DRV_DS1511) += rtc-ds1511.o
55obj-$(CONFIG_RTC_DRV_DS1553) += rtc-ds1553.o 55obj-$(CONFIG_RTC_DRV_DS1553) += rtc-ds1553.o
56obj-$(CONFIG_RTC_DRV_DS1672) += rtc-ds1672.o 56obj-$(CONFIG_RTC_DRV_DS1672) += rtc-ds1672.o
57obj-$(CONFIG_RTC_DRV_DS1685_FAMILY) += rtc-ds1685.o
57obj-$(CONFIG_RTC_DRV_DS1742) += rtc-ds1742.o 58obj-$(CONFIG_RTC_DRV_DS1742) += rtc-ds1742.o
58obj-$(CONFIG_RTC_DRV_DS2404) += rtc-ds2404.o 59obj-$(CONFIG_RTC_DRV_DS2404) += rtc-ds2404.o
59obj-$(CONFIG_RTC_DRV_DS3232) += rtc-ds3232.o 60obj-$(CONFIG_RTC_DRV_DS3232) += rtc-ds3232.o
diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c
new file mode 100644
index 000000000000..8c3bfcb115b7
--- /dev/null
+++ b/drivers/rtc/rtc-ds1685.c
@@ -0,0 +1,2252 @@
1/*
2 * An rtc driver for the Dallas/Maxim DS1685/DS1687 and related real-time
3 * chips.
4 *
5 * Copyright (C) 2011-2014 Joshua Kinard <kumba@gentoo.org>.
6 * Copyright (C) 2009 Matthias Fuchs <matthias.fuchs@esd-electronics.com>.
7 *
8 * References:
9 * DS1685/DS1687 3V/5V Real-Time Clocks, 19-5215, Rev 4/10.
10 * DS17x85/DS17x87 3V/5V Real-Time Clocks, 19-5222, Rev 4/10.
11 * DS1689/DS1693 3V/5V Serialized Real-Time Clocks, Rev 112105.
12 * Application Note 90, Using the Multiplex Bus RTC Extended Features.
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License version 2 as
16 * published by the Free Software Foundation.
17 */
18
19#include <linux/bcd.h>
20#include <linux/delay.h>
21#include <linux/io.h>
22#include <linux/module.h>
23#include <linux/platform_device.h>
24#include <linux/rtc.h>
25#include <linux/workqueue.h>
26
27#include <linux/rtc/ds1685.h>
28
29#ifdef CONFIG_PROC_FS
30#include <linux/proc_fs.h>
31#endif
32
33#define DRV_VERSION "0.42.0"
34
35
36/* ----------------------------------------------------------------------- */
37/* Standard read/write functions if platform does not provide overrides */
38
39/**
40 * ds1685_read - read a value from an rtc register.
41 * @rtc: pointer to the ds1685 rtc structure.
42 * @reg: the register address to read.
43 */
44static u8
45ds1685_read(struct ds1685_priv *rtc, int reg)
46{
47 return readb((u8 __iomem *)rtc->regs +
48 (reg * rtc->regstep));
49}
50
51/**
52 * ds1685_write - write a value to an rtc register.
53 * @rtc: pointer to the ds1685 rtc structure.
54 * @reg: the register address to write.
55 * @value: value to write to the register.
56 */
57static void
58ds1685_write(struct ds1685_priv *rtc, int reg, u8 value)
59{
60 writeb(value, ((u8 __iomem *)rtc->regs +
61 (reg * rtc->regstep)));
62}
63/* ----------------------------------------------------------------------- */
64
65
66/* ----------------------------------------------------------------------- */
67/* Inlined functions */
68
69/**
70 * ds1685_rtc_bcd2bin - bcd2bin wrapper in case platform doesn't support BCD.
71 * @rtc: pointer to the ds1685 rtc structure.
72 * @val: u8 time value to consider converting.
73 * @bcd_mask: u8 mask value if BCD mode is used.
74 * @bin_mask: u8 mask value if BIN mode is used.
75 *
76 * Returns the value, converted to BIN if originally in BCD and bcd_mode TRUE.
77 */
78static inline u8
79ds1685_rtc_bcd2bin(struct ds1685_priv *rtc, u8 val, u8 bcd_mask, u8 bin_mask)
80{
81 if (rtc->bcd_mode)
82 return (bcd2bin(val) & bcd_mask);
83
84 return (val & bin_mask);
85}
86
87/**
88 * ds1685_rtc_bin2bcd - bin2bcd wrapper in case platform doesn't support BCD.
89 * @rtc: pointer to the ds1685 rtc structure.
90 * @val: u8 time value to consider converting.
91 * @bin_mask: u8 mask value if BIN mode is used.
92 * @bcd_mask: u8 mask value if BCD mode is used.
93 *
94 * Returns the value, converted to BCD if originally in BIN and bcd_mode TRUE.
95 */
96static inline u8
97ds1685_rtc_bin2bcd(struct ds1685_priv *rtc, u8 val, u8 bin_mask, u8 bcd_mask)
98{
99 if (rtc->bcd_mode)
100 return (bin2bcd(val) & bcd_mask);
101
102 return (val & bin_mask);
103}
104
105/**
106 * ds1685_rtc_switch_to_bank0 - switch the rtc to bank 0.
107 * @rtc: pointer to the ds1685 rtc structure.
108 */
109static inline void
110ds1685_rtc_switch_to_bank0(struct ds1685_priv *rtc)
111{
112 rtc->write(rtc, RTC_CTRL_A,
113 (rtc->read(rtc, RTC_CTRL_A) & ~(RTC_CTRL_A_DV0)));
114}
115
116/**
117 * ds1685_rtc_switch_to_bank1 - switch the rtc to bank 1.
118 * @rtc: pointer to the ds1685 rtc structure.
119 */
120static inline void
121ds1685_rtc_switch_to_bank1(struct ds1685_priv *rtc)
122{
123 rtc->write(rtc, RTC_CTRL_A,
124 (rtc->read(rtc, RTC_CTRL_A) | RTC_CTRL_A_DV0));
125}
126
127/**
128 * ds1685_rtc_begin_data_access - prepare the rtc for data access.
129 * @rtc: pointer to the ds1685 rtc structure.
130 *
131 * This takes several steps to prepare the rtc for access to get/set time
132 * and alarm values from the rtc registers:
133 * - Sets the SET bit in Control Register B.
134 * - Reads Ext Control Register 4A and checks the INCR bit.
135 * - If INCR is active, a short delay is added before Ext Control Register 4A
136 * is read again in a loop until INCR is inactive.
137 * - Switches the rtc to bank 1. This allows access to all relevant
138 * data for normal rtc operation, as bank 0 contains only the nvram.
139 */
140static inline void
141ds1685_rtc_begin_data_access(struct ds1685_priv *rtc)
142{
143 /* Set the SET bit in Ctrl B */
144 rtc->write(rtc, RTC_CTRL_B,
145 (rtc->read(rtc, RTC_CTRL_B) | RTC_CTRL_B_SET));
146
147 /* Read Ext Ctrl 4A and check the INCR bit to avoid a lockout. */
148 while (rtc->read(rtc, RTC_EXT_CTRL_4A) & RTC_CTRL_4A_INCR)
149 cpu_relax();
150
151 /* Switch to Bank 1 */
152 ds1685_rtc_switch_to_bank1(rtc);
153}
154
155/**
156 * ds1685_rtc_end_data_access - end data access on the rtc.
157 * @rtc: pointer to the ds1685 rtc structure.
158 *
159 * This ends what was started by ds1685_rtc_begin_data_access:
160 * - Switches the rtc back to bank 0.
161 * - Clears the SET bit in Control Register B.
162 */
163static inline void
164ds1685_rtc_end_data_access(struct ds1685_priv *rtc)
165{
166 /* Switch back to Bank 0 */
167 ds1685_rtc_switch_to_bank1(rtc);
168
169 /* Clear the SET bit in Ctrl B */
170 rtc->write(rtc, RTC_CTRL_B,
171 (rtc->read(rtc, RTC_CTRL_B) & ~(RTC_CTRL_B_SET)));
172}
173
174/**
175 * ds1685_rtc_begin_ctrl_access - prepare the rtc for ctrl access.
176 * @rtc: pointer to the ds1685 rtc structure.
177 * @flags: irq flags variable for spin_lock_irqsave.
178 *
179 * This takes several steps to prepare the rtc for access to read just the
180 * control registers:
181 * - Sets a spinlock on the rtc IRQ.
182 * - Switches the rtc to bank 1. This allows access to the two extended
183 * control registers.
184 *
185 * Only use this where you are certain another lock will not be held.
186 */
187static inline void
188ds1685_rtc_begin_ctrl_access(struct ds1685_priv *rtc, unsigned long flags)
189{
190 spin_lock_irqsave(&rtc->lock, flags);
191 ds1685_rtc_switch_to_bank1(rtc);
192}
193
194/**
195 * ds1685_rtc_end_ctrl_access - end ctrl access on the rtc.
196 * @rtc: pointer to the ds1685 rtc structure.
197 * @flags: irq flags variable for spin_unlock_irqrestore.
198 *
199 * This ends what was started by ds1685_rtc_begin_ctrl_access:
200 * - Switches the rtc back to bank 0.
201 * - Unsets the spinlock on the rtc IRQ.
202 */
203static inline void
204ds1685_rtc_end_ctrl_access(struct ds1685_priv *rtc, unsigned long flags)
205{
206 ds1685_rtc_switch_to_bank0(rtc);
207 spin_unlock_irqrestore(&rtc->lock, flags);
208}
209
210/**
211 * ds1685_rtc_get_ssn - retrieve the silicon serial number.
212 * @rtc: pointer to the ds1685 rtc structure.
213 * @ssn: u8 array to hold the bits of the silicon serial number.
214 *
215 * This number starts at 0x40, and is 8-bytes long, ending at 0x47. The
216 * first byte is the model number, the next six bytes are the serial number
217 * digits, and the final byte is a CRC check byte. Together, they form the
218 * silicon serial number.
219 *
220 * These values are stored in bank1, so ds1685_rtc_switch_to_bank1 must be
221 * called first before calling this function, else data will be read out of
222 * the bank0 NVRAM. Be sure to call ds1685_rtc_switch_to_bank0 when done.
223 */
224static inline void
225ds1685_rtc_get_ssn(struct ds1685_priv *rtc, u8 *ssn)
226{
227 ssn[0] = rtc->read(rtc, RTC_BANK1_SSN_MODEL);
228 ssn[1] = rtc->read(rtc, RTC_BANK1_SSN_BYTE_1);
229 ssn[2] = rtc->read(rtc, RTC_BANK1_SSN_BYTE_2);
230 ssn[3] = rtc->read(rtc, RTC_BANK1_SSN_BYTE_3);
231 ssn[4] = rtc->read(rtc, RTC_BANK1_SSN_BYTE_4);
232 ssn[5] = rtc->read(rtc, RTC_BANK1_SSN_BYTE_5);
233 ssn[6] = rtc->read(rtc, RTC_BANK1_SSN_BYTE_6);
234 ssn[7] = rtc->read(rtc, RTC_BANK1_SSN_CRC);
235}
236/* ----------------------------------------------------------------------- */
237
238
239/* ----------------------------------------------------------------------- */
240/* Read/Set Time & Alarm functions */
241
242/**
243 * ds1685_rtc_read_time - reads the time registers.
244 * @dev: pointer to device structure.
245 * @tm: pointer to rtc_time structure.
246 */
247static int
248ds1685_rtc_read_time(struct device *dev, struct rtc_time *tm)
249{
250 struct platform_device *pdev = to_platform_device(dev);
251 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
252 u8 ctrlb, century;
253 u8 seconds, minutes, hours, wday, mday, month, years;
254
255 /* Fetch the time info from the RTC registers. */
256 ds1685_rtc_begin_data_access(rtc);
257 seconds = rtc->read(rtc, RTC_SECS);
258 minutes = rtc->read(rtc, RTC_MINS);
259 hours = rtc->read(rtc, RTC_HRS);
260 wday = rtc->read(rtc, RTC_WDAY);
261 mday = rtc->read(rtc, RTC_MDAY);
262 month = rtc->read(rtc, RTC_MONTH);
263 years = rtc->read(rtc, RTC_YEAR);
264 century = rtc->read(rtc, RTC_CENTURY);
265 ctrlb = rtc->read(rtc, RTC_CTRL_B);
266 ds1685_rtc_end_data_access(rtc);
267
268 /* bcd2bin if needed, perform fixups, and store to rtc_time. */
269 years = ds1685_rtc_bcd2bin(rtc, years, RTC_YEAR_BCD_MASK,
270 RTC_YEAR_BIN_MASK);
271 century = ds1685_rtc_bcd2bin(rtc, century, RTC_CENTURY_MASK,
272 RTC_CENTURY_MASK);
273 tm->tm_sec = ds1685_rtc_bcd2bin(rtc, seconds, RTC_SECS_BCD_MASK,
274 RTC_SECS_BIN_MASK);
275 tm->tm_min = ds1685_rtc_bcd2bin(rtc, minutes, RTC_MINS_BCD_MASK,
276 RTC_MINS_BIN_MASK);
277 tm->tm_hour = ds1685_rtc_bcd2bin(rtc, hours, RTC_HRS_24_BCD_MASK,
278 RTC_HRS_24_BIN_MASK);
279 tm->tm_wday = (ds1685_rtc_bcd2bin(rtc, wday, RTC_WDAY_MASK,
280 RTC_WDAY_MASK) - 1);
281 tm->tm_mday = ds1685_rtc_bcd2bin(rtc, mday, RTC_MDAY_BCD_MASK,
282 RTC_MDAY_BIN_MASK);
283 tm->tm_mon = (ds1685_rtc_bcd2bin(rtc, month, RTC_MONTH_BCD_MASK,
284 RTC_MONTH_BIN_MASK) - 1);
285 tm->tm_year = ((years + (century * 100)) - 1900);
286 tm->tm_yday = rtc_year_days(tm->tm_mday, tm->tm_mon, tm->tm_year);
287 tm->tm_isdst = 0; /* RTC has hardcoded timezone, so don't use. */
288
289 return rtc_valid_tm(tm);
290}
291
292/**
293 * ds1685_rtc_set_time - sets the time registers.
294 * @dev: pointer to device structure.
295 * @tm: pointer to rtc_time structure.
296 */
297static int
298ds1685_rtc_set_time(struct device *dev, struct rtc_time *tm)
299{
300 struct platform_device *pdev = to_platform_device(dev);
301 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
302 u8 ctrlb, seconds, minutes, hours, wday, mday, month, years, century;
303
304 /* Fetch the time info from rtc_time. */
305 seconds = ds1685_rtc_bin2bcd(rtc, tm->tm_sec, RTC_SECS_BIN_MASK,
306 RTC_SECS_BCD_MASK);
307 minutes = ds1685_rtc_bin2bcd(rtc, tm->tm_min, RTC_MINS_BIN_MASK,
308 RTC_MINS_BCD_MASK);
309 hours = ds1685_rtc_bin2bcd(rtc, tm->tm_hour, RTC_HRS_24_BIN_MASK,
310 RTC_HRS_24_BCD_MASK);
311 wday = ds1685_rtc_bin2bcd(rtc, (tm->tm_wday + 1), RTC_WDAY_MASK,
312 RTC_WDAY_MASK);
313 mday = ds1685_rtc_bin2bcd(rtc, tm->tm_mday, RTC_MDAY_BIN_MASK,
314 RTC_MDAY_BCD_MASK);
315 month = ds1685_rtc_bin2bcd(rtc, (tm->tm_mon + 1), RTC_MONTH_BIN_MASK,
316 RTC_MONTH_BCD_MASK);
317 years = ds1685_rtc_bin2bcd(rtc, (tm->tm_year % 100),
318 RTC_YEAR_BIN_MASK, RTC_YEAR_BCD_MASK);
319 century = ds1685_rtc_bin2bcd(rtc, ((tm->tm_year + 1900) / 100),
320 RTC_CENTURY_MASK, RTC_CENTURY_MASK);
321
322 /*
323 * Perform Sanity Checks:
324 * - Months: !> 12, Month Day != 0.
325 * - Month Day !> Max days in current month.
326 * - Hours !>= 24, Mins !>= 60, Secs !>= 60, & Weekday !> 7.
327 */
328 if ((tm->tm_mon > 11) || (mday == 0))
329 return -EDOM;
330
331 if (tm->tm_mday > rtc_month_days(tm->tm_mon, tm->tm_year))
332 return -EDOM;
333
334 if ((tm->tm_hour >= 24) || (tm->tm_min >= 60) ||
335 (tm->tm_sec >= 60) || (wday > 7))
336 return -EDOM;
337
338 /*
339 * Set the data mode to use and store the time values in the
340 * RTC registers.
341 */
342 ds1685_rtc_begin_data_access(rtc);
343 ctrlb = rtc->read(rtc, RTC_CTRL_B);
344 if (rtc->bcd_mode)
345 ctrlb &= ~(RTC_CTRL_B_DM);
346 else
347 ctrlb |= RTC_CTRL_B_DM;
348 rtc->write(rtc, RTC_CTRL_B, ctrlb);
349 rtc->write(rtc, RTC_SECS, seconds);
350 rtc->write(rtc, RTC_MINS, minutes);
351 rtc->write(rtc, RTC_HRS, hours);
352 rtc->write(rtc, RTC_WDAY, wday);
353 rtc->write(rtc, RTC_MDAY, mday);
354 rtc->write(rtc, RTC_MONTH, month);
355 rtc->write(rtc, RTC_YEAR, years);
356 rtc->write(rtc, RTC_CENTURY, century);
357 ds1685_rtc_end_data_access(rtc);
358
359 return 0;
360}
361
362/**
363 * ds1685_rtc_read_alarm - reads the alarm registers.
364 * @dev: pointer to device structure.
365 * @alrm: pointer to rtc_wkalrm structure.
366 *
367 * There are three primary alarm registers: seconds, minutes, and hours.
368 * A fourth alarm register for the month date is also available in bank1 for
369 * kickstart/wakeup features. The DS1685/DS1687 manual states that a
370 * "don't care" value ranging from 0xc0 to 0xff may be written into one or
371 * more of the three alarm bytes to act as a wildcard value. The fourth
372 * byte doesn't support a "don't care" value.
373 */
374static int
375ds1685_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
376{
377 struct platform_device *pdev = to_platform_device(dev);
378 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
379 u8 seconds, minutes, hours, mday, ctrlb, ctrlc;
380
381 /* Fetch the alarm info from the RTC alarm registers. */
382 ds1685_rtc_begin_data_access(rtc);
383 seconds = rtc->read(rtc, RTC_SECS_ALARM);
384 minutes = rtc->read(rtc, RTC_MINS_ALARM);
385 hours = rtc->read(rtc, RTC_HRS_ALARM);
386 mday = rtc->read(rtc, RTC_MDAY_ALARM);
387 ctrlb = rtc->read(rtc, RTC_CTRL_B);
388 ctrlc = rtc->read(rtc, RTC_CTRL_C);
389 ds1685_rtc_end_data_access(rtc);
390
391 /* Check month date. */
392 if (!(mday >= 1) && (mday <= 31))
393 return -EDOM;
394
395 /*
396 * Check the three alarm bytes.
397 *
398 * The Linux RTC system doesn't support the "don't care" capability
399 * of this RTC chip. We check for it anyways in case support is
400 * added in the future.
401 */
402 if (unlikely((seconds >= 0xc0) && (seconds <= 0xff)))
403 alrm->time.tm_sec = -1;
404 else
405 alrm->time.tm_sec = ds1685_rtc_bcd2bin(rtc, seconds,
406 RTC_SECS_BCD_MASK,
407 RTC_SECS_BIN_MASK);
408
409 if (unlikely((minutes >= 0xc0) && (minutes <= 0xff)))
410 alrm->time.tm_min = -1;
411 else
412 alrm->time.tm_min = ds1685_rtc_bcd2bin(rtc, minutes,
413 RTC_MINS_BCD_MASK,
414 RTC_MINS_BIN_MASK);
415
416 if (unlikely((hours >= 0xc0) && (hours <= 0xff)))
417 alrm->time.tm_hour = -1;
418 else
419 alrm->time.tm_hour = ds1685_rtc_bcd2bin(rtc, hours,
420 RTC_HRS_24_BCD_MASK,
421 RTC_HRS_24_BIN_MASK);
422
423 /* Write the data to rtc_wkalrm. */
424 alrm->time.tm_mday = ds1685_rtc_bcd2bin(rtc, mday, RTC_MDAY_BCD_MASK,
425 RTC_MDAY_BIN_MASK);
426 alrm->time.tm_mon = -1;
427 alrm->time.tm_year = -1;
428 alrm->time.tm_wday = -1;
429 alrm->time.tm_yday = -1;
430 alrm->time.tm_isdst = -1;
431 alrm->enabled = !!(ctrlb & RTC_CTRL_B_AIE);
432 alrm->pending = !!(ctrlc & RTC_CTRL_C_AF);
433
434 return 0;
435}
436
437/**
438 * ds1685_rtc_set_alarm - sets the alarm in registers.
439 * @dev: pointer to device structure.
440 * @alrm: pointer to rtc_wkalrm structure.
441 */
442static int
443ds1685_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
444{
445 struct platform_device *pdev = to_platform_device(dev);
446 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
447 u8 ctrlb, seconds, minutes, hours, mday;
448
449 /* Fetch the alarm info and convert to BCD. */
450 seconds = ds1685_rtc_bin2bcd(rtc, alrm->time.tm_sec,
451 RTC_SECS_BIN_MASK,
452 RTC_SECS_BCD_MASK);
453 minutes = ds1685_rtc_bin2bcd(rtc, alrm->time.tm_min,
454 RTC_MINS_BIN_MASK,
455 RTC_MINS_BCD_MASK);
456 hours = ds1685_rtc_bin2bcd(rtc, alrm->time.tm_hour,
457 RTC_HRS_24_BIN_MASK,
458 RTC_HRS_24_BCD_MASK);
459 mday = ds1685_rtc_bin2bcd(rtc, alrm->time.tm_mday,
460 RTC_MDAY_BIN_MASK,
461 RTC_MDAY_BCD_MASK);
462
463 /* Check the month date for validity. */
464 if (!(mday >= 1) && (mday <= 31))
465 return -EDOM;
466
467 /*
468 * Check the three alarm bytes.
469 *
470 * The Linux RTC system doesn't support the "don't care" capability
471 * of this RTC chip because rtc_valid_tm tries to validate every
472 * field, and we only support four fields. We put the support
473 * here anyways for the future.
474 */
475 if (unlikely((seconds >= 0xc0) && (seconds <= 0xff)))
476 seconds = 0xff;
477
478 if (unlikely((minutes >= 0xc0) && (minutes <= 0xff)))
479 minutes = 0xff;
480
481 if (unlikely((hours >= 0xc0) && (hours <= 0xff)))
482 hours = 0xff;
483
484 alrm->time.tm_mon = -1;
485 alrm->time.tm_year = -1;
486 alrm->time.tm_wday = -1;
487 alrm->time.tm_yday = -1;
488 alrm->time.tm_isdst = -1;
489
490 /* Disable the alarm interrupt first. */
491 ds1685_rtc_begin_data_access(rtc);
492 ctrlb = rtc->read(rtc, RTC_CTRL_B);
493 rtc->write(rtc, RTC_CTRL_B, (ctrlb & ~(RTC_CTRL_B_AIE)));
494
495 /* Read ctrlc to clear RTC_CTRL_C_AF. */
496 rtc->read(rtc, RTC_CTRL_C);
497
498 /*
499 * Set the data mode to use and store the time values in the
500 * RTC registers.
501 */
502 ctrlb = rtc->read(rtc, RTC_CTRL_B);
503 if (rtc->bcd_mode)
504 ctrlb &= ~(RTC_CTRL_B_DM);
505 else
506 ctrlb |= RTC_CTRL_B_DM;
507 rtc->write(rtc, RTC_CTRL_B, ctrlb);
508 rtc->write(rtc, RTC_SECS_ALARM, seconds);
509 rtc->write(rtc, RTC_MINS_ALARM, minutes);
510 rtc->write(rtc, RTC_HRS_ALARM, hours);
511 rtc->write(rtc, RTC_MDAY_ALARM, mday);
512
513 /* Re-enable the alarm if needed. */
514 if (alrm->enabled) {
515 ctrlb = rtc->read(rtc, RTC_CTRL_B);
516 ctrlb |= RTC_CTRL_B_AIE;
517 rtc->write(rtc, RTC_CTRL_B, ctrlb);
518 }
519
520 /* Done! */
521 ds1685_rtc_end_data_access(rtc);
522
523 return 0;
524}
525/* ----------------------------------------------------------------------- */
526
527
528/* ----------------------------------------------------------------------- */
529/* /dev/rtcX Interface functions */
530
531#ifdef CONFIG_RTC_INTF_DEV
532/**
533 * ds1685_rtc_alarm_irq_enable - replaces ioctl() RTC_AIE on/off.
534 * @dev: pointer to device structure.
535 * @enabled: flag indicating whether to enable or disable.
536 */
537static int
538ds1685_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
539{
540 struct ds1685_priv *rtc = dev_get_drvdata(dev);
541 unsigned long flags = 0;
542
543 /* Enable/disable the Alarm IRQ-Enable flag. */
544 spin_lock_irqsave(&rtc->lock, flags);
545
546 /* Flip the requisite interrupt-enable bit. */
547 if (enabled)
548 rtc->write(rtc, RTC_CTRL_B, (rtc->read(rtc, RTC_CTRL_B) |
549 RTC_CTRL_B_AIE));
550 else
551 rtc->write(rtc, RTC_CTRL_B, (rtc->read(rtc, RTC_CTRL_B) &
552 ~(RTC_CTRL_B_AIE)));
553
554 /* Read Control C to clear all the flag bits. */
555 rtc->read(rtc, RTC_CTRL_C);
556 spin_unlock_irqrestore(&rtc->lock, flags);
557
558 return 0;
559}
560#endif
561/* ----------------------------------------------------------------------- */
562
563
564/* ----------------------------------------------------------------------- */
565/* IRQ handler & workqueue. */
566
567/**
568 * ds1685_rtc_irq_handler - IRQ handler.
569 * @irq: IRQ number.
570 * @dev_id: platform device pointer.
571 */
572static irqreturn_t
573ds1685_rtc_irq_handler(int irq, void *dev_id)
574{
575 struct platform_device *pdev = dev_id;
576 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
577 u8 ctrlb, ctrlc;
578 unsigned long events = 0;
579 u8 num_irqs = 0;
580
581 /* Abort early if the device isn't ready yet (i.e., DEBUG_SHIRQ). */
582 if (unlikely(!rtc))
583 return IRQ_HANDLED;
584
585 /* Ctrlb holds the interrupt-enable bits and ctrlc the flag bits. */
586 spin_lock(&rtc->lock);
587 ctrlb = rtc->read(rtc, RTC_CTRL_B);
588 ctrlc = rtc->read(rtc, RTC_CTRL_C);
589
590 /* Is the IRQF bit set? */
591 if (likely(ctrlc & RTC_CTRL_C_IRQF)) {
592 /*
593 * We need to determine if it was one of the standard
594 * events: PF, AF, or UF. If so, we handle them and
595 * update the RTC core.
596 */
597 if (likely(ctrlc & RTC_CTRL_B_PAU_MASK)) {
598 events = RTC_IRQF;
599
600 /* Check for a periodic interrupt. */
601 if ((ctrlb & RTC_CTRL_B_PIE) &&
602 (ctrlc & RTC_CTRL_C_PF)) {
603 events |= RTC_PF;
604 num_irqs++;
605 }
606
607 /* Check for an alarm interrupt. */
608 if ((ctrlb & RTC_CTRL_B_AIE) &&
609 (ctrlc & RTC_CTRL_C_AF)) {
610 events |= RTC_AF;
611 num_irqs++;
612 }
613
614 /* Check for an update interrupt. */
615 if ((ctrlb & RTC_CTRL_B_UIE) &&
616 (ctrlc & RTC_CTRL_C_UF)) {
617 events |= RTC_UF;
618 num_irqs++;
619 }
620
621 rtc_update_irq(rtc->dev, num_irqs, events);
622 } else {
623 /*
624 * One of the "extended" interrupts was received that
625 * is not recognized by the RTC core. These need to
626 * be handled in task context as they can call other
627 * functions and the time spent in irq context needs
628 * to be minimized. Schedule them into a workqueue
629 * and inform the RTC core that the IRQs were handled.
630 */
631 spin_unlock(&rtc->lock);
632 schedule_work(&rtc->work);
633 rtc_update_irq(rtc->dev, 0, 0);
634 return IRQ_HANDLED;
635 }
636 }
637 spin_unlock(&rtc->lock);
638
639 return events ? IRQ_HANDLED : IRQ_NONE;
640}
641
642/**
643 * ds1685_rtc_work_queue - work queue handler.
644 * @work: work_struct containing data to work on in task context.
645 */
646static void
647ds1685_rtc_work_queue(struct work_struct *work)
648{
649 struct ds1685_priv *rtc = container_of(work,
650 struct ds1685_priv, work);
651 struct platform_device *pdev = to_platform_device(&rtc->dev->dev);
652 struct mutex *rtc_mutex = &rtc->dev->ops_lock;
653 u8 ctrl4a, ctrl4b;
654
655 mutex_lock(rtc_mutex);
656
657 ds1685_rtc_switch_to_bank1(rtc);
658 ctrl4a = rtc->read(rtc, RTC_EXT_CTRL_4A);
659 ctrl4b = rtc->read(rtc, RTC_EXT_CTRL_4B);
660
661 /*
662 * Check for a kickstart interrupt. With Vcc applied, this
663 * typically means that the power button was pressed, so we
664 * begin the shutdown sequence.
665 */
666 if ((ctrl4b & RTC_CTRL_4B_KSE) && (ctrl4a & RTC_CTRL_4A_KF)) {
667 /* Briefly disable kickstarts to debounce button presses. */
668 rtc->write(rtc, RTC_EXT_CTRL_4B,
669 (rtc->read(rtc, RTC_EXT_CTRL_4B) &
670 ~(RTC_CTRL_4B_KSE)));
671
672 /* Clear the kickstart flag. */
673 rtc->write(rtc, RTC_EXT_CTRL_4A,
674 (ctrl4a & ~(RTC_CTRL_4A_KF)));
675
676
677 /*
678 * Sleep 500ms before re-enabling kickstarts. This allows
679 * adequate time to avoid reading signal jitter as additional
680 * button presses.
681 */
682 msleep(500);
683 rtc->write(rtc, RTC_EXT_CTRL_4B,
684 (rtc->read(rtc, RTC_EXT_CTRL_4B) |
685 RTC_CTRL_4B_KSE));
686
687 /* Call the platform pre-poweroff function. Else, shutdown. */
688 if (rtc->prepare_poweroff != NULL)
689 rtc->prepare_poweroff();
690 else
691 ds1685_rtc_poweroff(pdev);
692 }
693
694 /*
695 * Check for a wake-up interrupt. With Vcc applied, this is
696 * essentially a second alarm interrupt, except it takes into
697 * account the 'date' register in bank1 in addition to the
698 * standard three alarm registers.
699 */
700 if ((ctrl4b & RTC_CTRL_4B_WIE) && (ctrl4a & RTC_CTRL_4A_WF)) {
701 rtc->write(rtc, RTC_EXT_CTRL_4A,
702 (ctrl4a & ~(RTC_CTRL_4A_WF)));
703
704 /* Call the platform wake_alarm function if defined. */
705 if (rtc->wake_alarm != NULL)
706 rtc->wake_alarm();
707 else
708 dev_warn(&pdev->dev,
709 "Wake Alarm IRQ just occurred!\n");
710 }
711
712 /*
713 * Check for a ram-clear interrupt. This happens if RIE=1 and RF=0
714 * when RCE=1 in 4B. This clears all NVRAM bytes in bank0 by setting
715 * each byte to a logic 1. This has no effect on any extended
716 * NV-SRAM that might be present, nor on the time/calendar/alarm
717 * registers. After a ram-clear is completed, there is a minimum
718 * recovery time of ~150ms in which all reads/writes are locked out.
719 * NOTE: A ram-clear can still occur if RCE=1 and RIE=0. We cannot
720 * catch this scenario.
721 */
722 if ((ctrl4b & RTC_CTRL_4B_RIE) && (ctrl4a & RTC_CTRL_4A_RF)) {
723 rtc->write(rtc, RTC_EXT_CTRL_4A,
724 (ctrl4a & ~(RTC_CTRL_4A_RF)));
725 msleep(150);
726
727 /* Call the platform post_ram_clear function if defined. */
728 if (rtc->post_ram_clear != NULL)
729 rtc->post_ram_clear();
730 else
731 dev_warn(&pdev->dev,
732 "RAM-Clear IRQ just occurred!\n");
733 }
734 ds1685_rtc_switch_to_bank0(rtc);
735
736 mutex_unlock(rtc_mutex);
737}
738/* ----------------------------------------------------------------------- */
739
740
741/* ----------------------------------------------------------------------- */
742/* ProcFS interface */
743
744#ifdef CONFIG_PROC_FS
745#define NUM_REGS 6 /* Num of control registers. */
746#define NUM_BITS 8 /* Num bits per register. */
747#define NUM_SPACES 4 /* Num spaces between each bit. */
748
749/*
750 * Periodic Interrupt Rates.
751 */
752static const char *ds1685_rtc_pirq_rate[16] = {
753 "none", "3.90625ms", "7.8125ms", "0.122070ms", "0.244141ms",
754 "0.488281ms", "0.9765625ms", "1.953125ms", "3.90625ms", "7.8125ms",
755 "15.625ms", "31.25ms", "62.5ms", "125ms", "250ms", "500ms"
756};
757
758/*
759 * Square-Wave Output Frequencies.
760 */
761static const char *ds1685_rtc_sqw_freq[16] = {
762 "none", "256Hz", "128Hz", "8192Hz", "4096Hz", "2048Hz", "1024Hz",
763 "512Hz", "256Hz", "128Hz", "64Hz", "32Hz", "16Hz", "8Hz", "4Hz", "2Hz"
764};
765
766#ifdef CONFIG_RTC_DS1685_PROC_REGS
767/**
768 * ds1685_rtc_print_regs - helper function to print register values.
769 * @hex: hex byte to convert into binary bits.
770 * @dest: destination char array.
771 *
772 * This is basically a hex->binary function, just with extra spacing between
773 * the digits. It only works on 1-byte values (8 bits).
774 */
775static char*
776ds1685_rtc_print_regs(u8 hex, char *dest)
777{
778 u32 i, j;
779 char *tmp = dest;
780
781 for (i = 0; i < NUM_BITS; i++) {
782 *tmp++ = ((hex & 0x80) != 0 ? '1' : '0');
783 for (j = 0; j < NUM_SPACES; j++)
784 *tmp++ = ' ';
785 hex <<= 1;
786 }
787 *tmp++ = '\0';
788
789 return dest;
790}
791#endif
792
793/**
794 * ds1685_rtc_proc - procfs access function.
795 * @dev: pointer to device structure.
796 * @seq: pointer to seq_file structure.
797 */
798static int
799ds1685_rtc_proc(struct device *dev, struct seq_file *seq)
800{
801 struct platform_device *pdev = to_platform_device(dev);
802 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
803 u8 ctrla, ctrlb, ctrlc, ctrld, ctrl4a, ctrl4b, ssn[8];
804 char *model = '\0';
805#ifdef CONFIG_RTC_DS1685_PROC_REGS
806 char bits[NUM_REGS][(NUM_BITS * NUM_SPACES) + NUM_BITS + 1];
807#endif
808
809 /* Read all the relevant data from the control registers. */
810 ds1685_rtc_switch_to_bank1(rtc);
811 ds1685_rtc_get_ssn(rtc, ssn);
812 ctrla = rtc->read(rtc, RTC_CTRL_A);
813 ctrlb = rtc->read(rtc, RTC_CTRL_B);
814 ctrlc = rtc->read(rtc, RTC_CTRL_C);
815 ctrld = rtc->read(rtc, RTC_CTRL_D);
816 ctrl4a = rtc->read(rtc, RTC_EXT_CTRL_4A);
817 ctrl4b = rtc->read(rtc, RTC_EXT_CTRL_4B);
818 ds1685_rtc_switch_to_bank0(rtc);
819
820 /* Determine the RTC model. */
821 switch (ssn[0]) {
822 case RTC_MODEL_DS1685:
823 model = "DS1685/DS1687\0";
824 break;
825 case RTC_MODEL_DS1689:
826 model = "DS1689/DS1693\0";
827 break;
828 case RTC_MODEL_DS17285:
829 model = "DS17285/DS17287\0";
830 break;
831 case RTC_MODEL_DS17485:
832 model = "DS17485/DS17487\0";
833 break;
834 case RTC_MODEL_DS17885:
835 model = "DS17885/DS17887\0";
836 break;
837 default:
838 model = "Unknown\0";
839 break;
840 }
841
842 /* Print out the information. */
843 seq_printf(seq,
844 "Model\t\t: %s\n"
845 "Oscillator\t: %s\n"
846 "12/24hr\t\t: %s\n"
847 "DST\t\t: %s\n"
848 "Data mode\t: %s\n"
849 "Battery\t\t: %s\n"
850 "Aux batt\t: %s\n"
851 "Update IRQ\t: %s\n"
852 "Periodic IRQ\t: %s\n"
853 "Periodic Rate\t: %s\n"
854 "SQW Freq\t: %s\n"
855#ifdef CONFIG_RTC_DS1685_PROC_REGS
856 "Serial #\t: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n"
857 "Register Status\t:\n"
858 " Ctrl A\t: UIP DV2 DV1 DV0 RS3 RS2 RS1 RS0\n"
859 "\t\t: %s\n"
860 " Ctrl B\t: SET PIE AIE UIE SQWE DM 2412 DSE\n"
861 "\t\t: %s\n"
862 " Ctrl C\t: IRQF PF AF UF --- --- --- ---\n"
863 "\t\t: %s\n"
864 " Ctrl D\t: VRT --- --- --- --- --- --- ---\n"
865 "\t\t: %s\n"
866#if !defined(CONFIG_RTC_DRV_DS1685) && !defined(CONFIG_RTC_DRV_DS1689)
867 " Ctrl 4A\t: VRT2 INCR BME --- PAB RF WF KF\n"
868#else
869 " Ctrl 4A\t: VRT2 INCR --- --- PAB RF WF KF\n"
870#endif
871 "\t\t: %s\n"
872 " Ctrl 4B\t: ABE E32k CS RCE PRS RIE WIE KSE\n"
873 "\t\t: %s\n",
874#else
875 "Serial #\t: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
876#endif
877 model,
878 ((ctrla & RTC_CTRL_A_DV1) ? "enabled" : "disabled"),
879 ((ctrlb & RTC_CTRL_B_2412) ? "24-hour" : "12-hour"),
880 ((ctrlb & RTC_CTRL_B_DSE) ? "enabled" : "disabled"),
881 ((ctrlb & RTC_CTRL_B_DM) ? "binary" : "BCD"),
882 ((ctrld & RTC_CTRL_D_VRT) ? "ok" : "exhausted or n/a"),
883 ((ctrl4a & RTC_CTRL_4A_VRT2) ? "ok" : "exhausted or n/a"),
884 ((ctrlb & RTC_CTRL_B_UIE) ? "yes" : "no"),
885 ((ctrlb & RTC_CTRL_B_PIE) ? "yes" : "no"),
886 (!(ctrl4b & RTC_CTRL_4B_E32K) ?
887 ds1685_rtc_pirq_rate[(ctrla & RTC_CTRL_A_RS_MASK)] : "none"),
888 (!((ctrl4b & RTC_CTRL_4B_E32K)) ?
889 ds1685_rtc_sqw_freq[(ctrla & RTC_CTRL_A_RS_MASK)] : "32768Hz"),
890#ifdef CONFIG_RTC_DS1685_PROC_REGS
891 ssn[0], ssn[1], ssn[2], ssn[3], ssn[4], ssn[5], ssn[6], ssn[7],
892 ds1685_rtc_print_regs(ctrla, bits[0]),
893 ds1685_rtc_print_regs(ctrlb, bits[1]),
894 ds1685_rtc_print_regs(ctrlc, bits[2]),
895 ds1685_rtc_print_regs(ctrld, bits[3]),
896 ds1685_rtc_print_regs(ctrl4a, bits[4]),
897 ds1685_rtc_print_regs(ctrl4b, bits[5]));
898#else
899 ssn[0], ssn[1], ssn[2], ssn[3], ssn[4], ssn[5], ssn[6], ssn[7]);
900#endif
901 return 0;
902}
903#else
904#define ds1685_rtc_proc NULL
905#endif /* CONFIG_PROC_FS */
906/* ----------------------------------------------------------------------- */
907
908
909/* ----------------------------------------------------------------------- */
910/* RTC Class operations */
911
912static const struct rtc_class_ops
913ds1685_rtc_ops = {
914 .proc = ds1685_rtc_proc,
915 .read_time = ds1685_rtc_read_time,
916 .set_time = ds1685_rtc_set_time,
917 .read_alarm = ds1685_rtc_read_alarm,
918 .set_alarm = ds1685_rtc_set_alarm,
919 .alarm_irq_enable = ds1685_rtc_alarm_irq_enable,
920};
921/* ----------------------------------------------------------------------- */
922
923
924/* ----------------------------------------------------------------------- */
925/* SysFS interface */
926
927#ifdef CONFIG_SYSFS
928/**
929 * ds1685_rtc_sysfs_nvram_read - reads rtc nvram via sysfs.
930 * @file: pointer to file structure.
931 * @kobj: pointer to kobject structure.
932 * @bin_attr: pointer to bin_attribute structure.
933 * @buf: pointer to char array to hold the output.
934 * @pos: current file position pointer.
935 * @size: size of the data to read.
936 */
937static ssize_t
938ds1685_rtc_sysfs_nvram_read(struct file *filp, struct kobject *kobj,
939 struct bin_attribute *bin_attr, char *buf,
940 loff_t pos, size_t size)
941{
942 struct platform_device *pdev =
943 to_platform_device(container_of(kobj, struct device, kobj));
944 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
945 ssize_t count;
946 unsigned long flags = 0;
947
948 spin_lock_irqsave(&rtc->lock, flags);
949 ds1685_rtc_switch_to_bank0(rtc);
950
951 /* Read NVRAM in time and bank0 registers. */
952 for (count = 0; size > 0 && pos < NVRAM_TOTAL_SZ_BANK0;
953 count++, size--) {
954 if (count < NVRAM_SZ_TIME)
955 *buf++ = rtc->read(rtc, (NVRAM_TIME_BASE + pos++));
956 else
957 *buf++ = rtc->read(rtc, (NVRAM_BANK0_BASE + pos++));
958 }
959
960#ifndef CONFIG_RTC_DRV_DS1689
961 if (size > 0) {
962 ds1685_rtc_switch_to_bank1(rtc);
963
964#ifndef CONFIG_RTC_DRV_DS1685
965 /* Enable burst-mode on DS17x85/DS17x87 */
966 rtc->write(rtc, RTC_EXT_CTRL_4A,
967 (rtc->read(rtc, RTC_EXT_CTRL_4A) |
968 RTC_CTRL_4A_BME));
969
970 /* We need one write to RTC_BANK1_RAM_ADDR_LSB to start
971 * reading with burst-mode */
972 rtc->write(rtc, RTC_BANK1_RAM_ADDR_LSB,
973 (pos - NVRAM_TOTAL_SZ_BANK0));
974#endif
975
976 /* Read NVRAM in bank1 registers. */
977 for (count = 0; size > 0 && pos < NVRAM_TOTAL_SZ;
978 count++, size--) {
979#ifdef CONFIG_RTC_DRV_DS1685
980 /* DS1685/DS1687 has to write to RTC_BANK1_RAM_ADDR
981 * before each read. */
982 rtc->write(rtc, RTC_BANK1_RAM_ADDR,
983 (pos - NVRAM_TOTAL_SZ_BANK0));
984#endif
985 *buf++ = rtc->read(rtc, RTC_BANK1_RAM_DATA_PORT);
986 pos++;
987 }
988
989#ifndef CONFIG_RTC_DRV_DS1685
990 /* Disable burst-mode on DS17x85/DS17x87 */
991 rtc->write(rtc, RTC_EXT_CTRL_4A,
992 (rtc->read(rtc, RTC_EXT_CTRL_4A) &
993 ~(RTC_CTRL_4A_BME)));
994#endif
995 ds1685_rtc_switch_to_bank0(rtc);
996 }
997#endif /* !CONFIG_RTC_DRV_DS1689 */
998 spin_unlock_irqrestore(&rtc->lock, flags);
999
1000 /*
1001 * XXX: Bug? this appears to cause the function to get executed
1002 * several times in succession. But it's the only way to actually get
1003 * data written out to a file.
1004 */
1005 return count;
1006}
1007
1008/**
1009 * ds1685_rtc_sysfs_nvram_write - writes rtc nvram via sysfs.
1010 * @file: pointer to file structure.
1011 * @kobj: pointer to kobject structure.
1012 * @bin_attr: pointer to bin_attribute structure.
1013 * @buf: pointer to char array to hold the input.
1014 * @pos: current file position pointer.
1015 * @size: size of the data to write.
1016 */
1017static ssize_t
1018ds1685_rtc_sysfs_nvram_write(struct file *filp, struct kobject *kobj,
1019 struct bin_attribute *bin_attr, char *buf,
1020 loff_t pos, size_t size)
1021{
1022 struct platform_device *pdev =
1023 to_platform_device(container_of(kobj, struct device, kobj));
1024 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
1025 ssize_t count;
1026 unsigned long flags = 0;
1027
1028 spin_lock_irqsave(&rtc->lock, flags);
1029 ds1685_rtc_switch_to_bank0(rtc);
1030
1031 /* Write NVRAM in time and bank0 registers. */
1032 for (count = 0; size > 0 && pos < NVRAM_TOTAL_SZ_BANK0;
1033 count++, size--)
1034 if (count < NVRAM_SZ_TIME)
1035 rtc->write(rtc, (NVRAM_TIME_BASE + pos++),
1036 *buf++);
1037 else
1038 rtc->write(rtc, (NVRAM_BANK0_BASE), *buf++);
1039
1040#ifndef CONFIG_RTC_DRV_DS1689
1041 if (size > 0) {
1042 ds1685_rtc_switch_to_bank1(rtc);
1043
1044#ifndef CONFIG_RTC_DRV_DS1685
1045 /* Enable burst-mode on DS17x85/DS17x87 */
1046 rtc->write(rtc, RTC_EXT_CTRL_4A,
1047 (rtc->read(rtc, RTC_EXT_CTRL_4A) |
1048 RTC_CTRL_4A_BME));
1049
1050 /* We need one write to RTC_BANK1_RAM_ADDR_LSB to start
1051 * writing with burst-mode */
1052 rtc->write(rtc, RTC_BANK1_RAM_ADDR_LSB,
1053 (pos - NVRAM_TOTAL_SZ_BANK0));
1054#endif
1055
1056 /* Write NVRAM in bank1 registers. */
1057 for (count = 0; size > 0 && pos < NVRAM_TOTAL_SZ;
1058 count++, size--) {
1059#ifdef CONFIG_RTC_DRV_DS1685
1060 /* DS1685/DS1687 has to write to RTC_BANK1_RAM_ADDR
1061 * before each read. */
1062 rtc->write(rtc, RTC_BANK1_RAM_ADDR,
1063 (pos - NVRAM_TOTAL_SZ_BANK0));
1064#endif
1065 rtc->write(rtc, RTC_BANK1_RAM_DATA_PORT, *buf++);
1066 pos++;
1067 }
1068
1069#ifndef CONFIG_RTC_DRV_DS1685
1070 /* Disable burst-mode on DS17x85/DS17x87 */
1071 rtc->write(rtc, RTC_EXT_CTRL_4A,
1072 (rtc->read(rtc, RTC_EXT_CTRL_4A) &
1073 ~(RTC_CTRL_4A_BME)));
1074#endif
1075 ds1685_rtc_switch_to_bank0(rtc);
1076 }
1077#endif /* !CONFIG_RTC_DRV_DS1689 */
1078 spin_unlock_irqrestore(&rtc->lock, flags);
1079
1080 return count;
1081}
1082
1083/**
1084 * struct ds1685_rtc_sysfs_nvram_attr - sysfs attributes for rtc nvram.
1085 * @attr: nvram attributes.
1086 * @read: nvram read function.
1087 * @write: nvram write function.
1088 * @size: nvram total size (bank0 + extended).
1089 */
1090static struct bin_attribute
1091ds1685_rtc_sysfs_nvram_attr = {
1092 .attr = {
1093 .name = "nvram",
1094 .mode = S_IRUGO | S_IWUSR,
1095 },
1096 .read = ds1685_rtc_sysfs_nvram_read,
1097 .write = ds1685_rtc_sysfs_nvram_write,
1098 .size = NVRAM_TOTAL_SZ
1099};
1100
1101/**
1102 * ds1685_rtc_sysfs_battery_show - sysfs file for main battery status.
1103 * @dev: pointer to device structure.
1104 * @attr: pointer to device_attribute structure.
1105 * @buf: pointer to char array to hold the output.
1106 */
1107static ssize_t
1108ds1685_rtc_sysfs_battery_show(struct device *dev,
1109 struct device_attribute *attr, char *buf)
1110{
1111 struct platform_device *pdev = to_platform_device(dev);
1112 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
1113 u8 ctrld;
1114
1115 ctrld = rtc->read(rtc, RTC_CTRL_D);
1116
1117 return snprintf(buf, 13, "%s\n",
1118 (ctrld & RTC_CTRL_D_VRT) ? "ok" : "not ok or N/A");
1119}
1120static DEVICE_ATTR(battery, S_IRUGO, ds1685_rtc_sysfs_battery_show, NULL);
1121
1122/**
1123 * ds1685_rtc_sysfs_auxbatt_show - sysfs file for aux battery status.
1124 * @dev: pointer to device structure.
1125 * @attr: pointer to device_attribute structure.
1126 * @buf: pointer to char array to hold the output.
1127 */
1128static ssize_t
1129ds1685_rtc_sysfs_auxbatt_show(struct device *dev,
1130 struct device_attribute *attr, char *buf)
1131{
1132 struct platform_device *pdev = to_platform_device(dev);
1133 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
1134 u8 ctrl4a;
1135
1136 ds1685_rtc_switch_to_bank1(rtc);
1137 ctrl4a = rtc->read(rtc, RTC_EXT_CTRL_4A);
1138 ds1685_rtc_switch_to_bank0(rtc);
1139
1140 return snprintf(buf, 13, "%s\n",
1141 (ctrl4a & RTC_CTRL_4A_VRT2) ? "ok" : "not ok or N/A");
1142}
1143static DEVICE_ATTR(auxbatt, S_IRUGO, ds1685_rtc_sysfs_auxbatt_show, NULL);
1144
1145/**
1146 * ds1685_rtc_sysfs_serial_show - sysfs file for silicon serial number.
1147 * @dev: pointer to device structure.
1148 * @attr: pointer to device_attribute structure.
1149 * @buf: pointer to char array to hold the output.
1150 */
1151static ssize_t
1152ds1685_rtc_sysfs_serial_show(struct device *dev,
1153 struct device_attribute *attr, char *buf)
1154{
1155 struct platform_device *pdev = to_platform_device(dev);
1156 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
1157 u8 ssn[8];
1158
1159 ds1685_rtc_switch_to_bank1(rtc);
1160 ds1685_rtc_get_ssn(rtc, ssn);
1161 ds1685_rtc_switch_to_bank0(rtc);
1162
1163 return snprintf(buf, 24, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
1164 ssn[0], ssn[1], ssn[2], ssn[3], ssn[4], ssn[5],
1165 ssn[6], ssn[7]);
1166
1167 return 0;
1168}
1169static DEVICE_ATTR(serial, S_IRUGO, ds1685_rtc_sysfs_serial_show, NULL);
1170
1171/**
1172 * struct ds1685_rtc_sysfs_misc_attrs - list for misc RTC features.
1173 */
1174static struct attribute*
1175ds1685_rtc_sysfs_misc_attrs[] = {
1176 &dev_attr_battery.attr,
1177 &dev_attr_auxbatt.attr,
1178 &dev_attr_serial.attr,
1179 NULL,
1180};
1181
1182/**
1183 * struct ds1685_rtc_sysfs_misc_grp - attr group for misc RTC features.
1184 */
1185static const struct attribute_group
1186ds1685_rtc_sysfs_misc_grp = {
1187 .name = "misc",
1188 .attrs = ds1685_rtc_sysfs_misc_attrs,
1189};
1190
1191#ifdef CONFIG_RTC_DS1685_SYSFS_REGS
1192/**
1193 * struct ds1685_rtc_ctrl_regs.
1194 * @name: char pointer for the bit name.
1195 * @reg: control register the bit is in.
1196 * @bit: the bit's offset in the register.
1197 */
1198struct ds1685_rtc_ctrl_regs {
1199 const char *name;
1200 const u8 reg;
1201 const u8 bit;
1202};
1203
1204/*
1205 * Ctrl register bit lookup table.
1206 */
1207static const struct ds1685_rtc_ctrl_regs
1208ds1685_ctrl_regs_table[] = {
1209 { "uip", RTC_CTRL_A, RTC_CTRL_A_UIP },
1210 { "dv2", RTC_CTRL_A, RTC_CTRL_A_DV2 },
1211 { "dv1", RTC_CTRL_A, RTC_CTRL_A_DV1 },
1212 { "dv0", RTC_CTRL_A, RTC_CTRL_A_DV0 },
1213 { "rs3", RTC_CTRL_A, RTC_CTRL_A_RS3 },
1214 { "rs2", RTC_CTRL_A, RTC_CTRL_A_RS2 },
1215 { "rs1", RTC_CTRL_A, RTC_CTRL_A_RS1 },
1216 { "rs0", RTC_CTRL_A, RTC_CTRL_A_RS0 },
1217 { "set", RTC_CTRL_B, RTC_CTRL_B_SET },
1218 { "pie", RTC_CTRL_B, RTC_CTRL_B_PIE },
1219 { "aie", RTC_CTRL_B, RTC_CTRL_B_AIE },
1220 { "uie", RTC_CTRL_B, RTC_CTRL_B_UIE },
1221 { "sqwe", RTC_CTRL_B, RTC_CTRL_B_SQWE },
1222 { "dm", RTC_CTRL_B, RTC_CTRL_B_DM },
1223 { "2412", RTC_CTRL_B, RTC_CTRL_B_2412 },
1224 { "dse", RTC_CTRL_B, RTC_CTRL_B_DSE },
1225 { "irqf", RTC_CTRL_C, RTC_CTRL_C_IRQF },
1226 { "pf", RTC_CTRL_C, RTC_CTRL_C_PF },
1227 { "af", RTC_CTRL_C, RTC_CTRL_C_AF },
1228 { "uf", RTC_CTRL_C, RTC_CTRL_C_UF },
1229 { "vrt", RTC_CTRL_D, RTC_CTRL_D_VRT },
1230 { "vrt2", RTC_EXT_CTRL_4A, RTC_CTRL_4A_VRT2 },
1231 { "incr", RTC_EXT_CTRL_4A, RTC_CTRL_4A_INCR },
1232 { "pab", RTC_EXT_CTRL_4A, RTC_CTRL_4A_PAB },
1233 { "rf", RTC_EXT_CTRL_4A, RTC_CTRL_4A_RF },
1234 { "wf", RTC_EXT_CTRL_4A, RTC_CTRL_4A_WF },
1235 { "kf", RTC_EXT_CTRL_4A, RTC_CTRL_4A_KF },
1236#if !defined(CONFIG_RTC_DRV_DS1685) && !defined(CONFIG_RTC_DRV_DS1689)
1237 { "bme", RTC_EXT_CTRL_4A, RTC_CTRL_4A_BME },
1238#endif
1239 { "abe", RTC_EXT_CTRL_4B, RTC_CTRL_4B_ABE },
1240 { "e32k", RTC_EXT_CTRL_4B, RTC_CTRL_4B_E32K },
1241 { "cs", RTC_EXT_CTRL_4B, RTC_CTRL_4B_CS },
1242 { "rce", RTC_EXT_CTRL_4B, RTC_CTRL_4B_RCE },
1243 { "prs", RTC_EXT_CTRL_4B, RTC_CTRL_4B_PRS },
1244 { "rie", RTC_EXT_CTRL_4B, RTC_CTRL_4B_RIE },
1245 { "wie", RTC_EXT_CTRL_4B, RTC_CTRL_4B_WIE },
1246 { "kse", RTC_EXT_CTRL_4B, RTC_CTRL_4B_KSE },
1247 { NULL, 0, 0 },
1248};
1249
1250/**
1251 * ds1685_rtc_sysfs_ctrl_regs_lookup - ctrl register bit lookup function.
1252 * @name: ctrl register bit to look up in ds1685_ctrl_regs_table.
1253 */
1254static const struct ds1685_rtc_ctrl_regs*
1255ds1685_rtc_sysfs_ctrl_regs_lookup(const char *name)
1256{
1257 const struct ds1685_rtc_ctrl_regs *p = ds1685_ctrl_regs_table;
1258
1259 for (; p->name != NULL; ++p)
1260 if (strcmp(p->name, name) == 0)
1261 return p;
1262
1263 return NULL;
1264}
1265
1266/**
1267 * ds1685_rtc_sysfs_ctrl_regs_show - reads a ctrl register bit via sysfs.
1268 * @dev: pointer to device structure.
1269 * @attr: pointer to device_attribute structure.
1270 * @buf: pointer to char array to hold the output.
1271 */
1272static ssize_t
1273ds1685_rtc_sysfs_ctrl_regs_show(struct device *dev,
1274 struct device_attribute *attr, char *buf)
1275{
1276 u8 tmp;
1277 struct ds1685_priv *rtc = dev_get_drvdata(dev);
1278 const struct ds1685_rtc_ctrl_regs *reg_info =
1279 ds1685_rtc_sysfs_ctrl_regs_lookup(attr->attr.name);
1280
1281 /* Make sure we actually matched something. */
1282 if (!reg_info)
1283 return -EINVAL;
1284
1285 /* No spinlock during a read -- mutex is already held. */
1286 ds1685_rtc_switch_to_bank1(rtc);
1287 tmp = rtc->read(rtc, reg_info->reg) & reg_info->bit;
1288 ds1685_rtc_switch_to_bank0(rtc);
1289
1290 return snprintf(buf, 2, "%d\n", (tmp ? 1 : 0));
1291}
1292
1293/**
1294 * ds1685_rtc_sysfs_ctrl_regs_store - writes a ctrl register bit via sysfs.
1295 * @dev: pointer to device structure.
1296 * @attr: pointer to device_attribute structure.
1297 * @buf: pointer to char array to hold the output.
1298 * @count: number of bytes written.
1299 */
1300static ssize_t
1301ds1685_rtc_sysfs_ctrl_regs_store(struct device *dev,
1302 struct device_attribute *attr,
1303 const char *buf, size_t count)
1304{
1305 struct ds1685_priv *rtc = dev_get_drvdata(dev);
1306 u8 reg = 0, bit = 0, tmp;
1307 unsigned long flags = 0;
1308 long int val = 0;
1309 const struct ds1685_rtc_ctrl_regs *reg_info =
1310 ds1685_rtc_sysfs_ctrl_regs_lookup(attr->attr.name);
1311
1312 /* We only accept numbers. */
1313 if (kstrtol(buf, 10, &val) < 0)
1314 return -EINVAL;
1315
1316 /* bits are binary, 0 or 1 only. */
1317 if ((val != 0) && (val != 1))
1318 return -ERANGE;
1319
1320 /* Make sure we actually matched something. */
1321 if (!reg_info)
1322 return -EINVAL;
1323
1324 reg = reg_info->reg;
1325 bit = reg_info->bit;
1326
1327 /* Safe to spinlock during a write. */
1328 ds1685_rtc_begin_ctrl_access(rtc, flags);
1329 tmp = rtc->read(rtc, reg);
1330 rtc->write(rtc, reg, (val ? (tmp | bit) : (tmp & ~(bit))));
1331 ds1685_rtc_end_ctrl_access(rtc, flags);
1332
1333 return count;
1334}
1335
1336/**
1337 * DS1685_RTC_SYSFS_CTRL_REG_RO - device_attribute for read-only register bit.
1338 * @bit: bit to read.
1339 */
1340#define DS1685_RTC_SYSFS_CTRL_REG_RO(bit) \
1341 static DEVICE_ATTR(bit, S_IRUGO, \
1342 ds1685_rtc_sysfs_ctrl_regs_show, NULL)
1343
1344/**
1345 * DS1685_RTC_SYSFS_CTRL_REG_RW - device_attribute for read-write register bit.
1346 * @bit: bit to read or write.
1347 */
1348#define DS1685_RTC_SYSFS_CTRL_REG_RW(bit) \
1349 static DEVICE_ATTR(bit, S_IRUGO | S_IWUSR, \
1350 ds1685_rtc_sysfs_ctrl_regs_show, \
1351 ds1685_rtc_sysfs_ctrl_regs_store)
1352
1353/*
1354 * Control Register A bits.
1355 */
1356DS1685_RTC_SYSFS_CTRL_REG_RO(uip);
1357DS1685_RTC_SYSFS_CTRL_REG_RW(dv2);
1358DS1685_RTC_SYSFS_CTRL_REG_RW(dv1);
1359DS1685_RTC_SYSFS_CTRL_REG_RO(dv0);
1360DS1685_RTC_SYSFS_CTRL_REG_RW(rs3);
1361DS1685_RTC_SYSFS_CTRL_REG_RW(rs2);
1362DS1685_RTC_SYSFS_CTRL_REG_RW(rs1);
1363DS1685_RTC_SYSFS_CTRL_REG_RW(rs0);
1364
1365static struct attribute*
1366ds1685_rtc_sysfs_ctrla_attrs[] = {
1367 &dev_attr_uip.attr,
1368 &dev_attr_dv2.attr,
1369 &dev_attr_dv1.attr,
1370 &dev_attr_dv0.attr,
1371 &dev_attr_rs3.attr,
1372 &dev_attr_rs2.attr,
1373 &dev_attr_rs1.attr,
1374 &dev_attr_rs0.attr,
1375 NULL,
1376};
1377
1378static const struct attribute_group
1379ds1685_rtc_sysfs_ctrla_grp = {
1380 .name = "ctrla",
1381 .attrs = ds1685_rtc_sysfs_ctrla_attrs,
1382};
1383
1384
1385/*
1386 * Control Register B bits.
1387 */
1388DS1685_RTC_SYSFS_CTRL_REG_RO(set);
1389DS1685_RTC_SYSFS_CTRL_REG_RW(pie);
1390DS1685_RTC_SYSFS_CTRL_REG_RW(aie);
1391DS1685_RTC_SYSFS_CTRL_REG_RW(uie);
1392DS1685_RTC_SYSFS_CTRL_REG_RW(sqwe);
1393DS1685_RTC_SYSFS_CTRL_REG_RO(dm);
1394DS1685_RTC_SYSFS_CTRL_REG_RO(2412);
1395DS1685_RTC_SYSFS_CTRL_REG_RO(dse);
1396
1397static struct attribute*
1398ds1685_rtc_sysfs_ctrlb_attrs[] = {
1399 &dev_attr_set.attr,
1400 &dev_attr_pie.attr,
1401 &dev_attr_aie.attr,
1402 &dev_attr_uie.attr,
1403 &dev_attr_sqwe.attr,
1404 &dev_attr_dm.attr,
1405 &dev_attr_2412.attr,
1406 &dev_attr_dse.attr,
1407 NULL,
1408};
1409
1410static const struct attribute_group
1411ds1685_rtc_sysfs_ctrlb_grp = {
1412 .name = "ctrlb",
1413 .attrs = ds1685_rtc_sysfs_ctrlb_attrs,
1414};
1415
1416/*
1417 * Control Register C bits.
1418 *
1419 * Reading Control C clears these bits! Reading them individually can
1420 * possibly cause an interrupt to be missed. Use the /proc interface
1421 * to see all the bits in this register simultaneously.
1422 */
1423DS1685_RTC_SYSFS_CTRL_REG_RO(irqf);
1424DS1685_RTC_SYSFS_CTRL_REG_RO(pf);
1425DS1685_RTC_SYSFS_CTRL_REG_RO(af);
1426DS1685_RTC_SYSFS_CTRL_REG_RO(uf);
1427
1428static struct attribute*
1429ds1685_rtc_sysfs_ctrlc_attrs[] = {
1430 &dev_attr_irqf.attr,
1431 &dev_attr_pf.attr,
1432 &dev_attr_af.attr,
1433 &dev_attr_uf.attr,
1434 NULL,
1435};
1436
1437static const struct attribute_group
1438ds1685_rtc_sysfs_ctrlc_grp = {
1439 .name = "ctrlc",
1440 .attrs = ds1685_rtc_sysfs_ctrlc_attrs,
1441};
1442
1443/*
1444 * Control Register D bits.
1445 */
1446DS1685_RTC_SYSFS_CTRL_REG_RO(vrt);
1447
1448static struct attribute*
1449ds1685_rtc_sysfs_ctrld_attrs[] = {
1450 &dev_attr_vrt.attr,
1451 NULL,
1452};
1453
1454static const struct attribute_group
1455ds1685_rtc_sysfs_ctrld_grp = {
1456 .name = "ctrld",
1457 .attrs = ds1685_rtc_sysfs_ctrld_attrs,
1458};
1459
1460/*
1461 * Control Register 4A bits.
1462 */
1463DS1685_RTC_SYSFS_CTRL_REG_RO(vrt2);
1464DS1685_RTC_SYSFS_CTRL_REG_RO(incr);
1465DS1685_RTC_SYSFS_CTRL_REG_RW(pab);
1466DS1685_RTC_SYSFS_CTRL_REG_RW(rf);
1467DS1685_RTC_SYSFS_CTRL_REG_RW(wf);
1468DS1685_RTC_SYSFS_CTRL_REG_RW(kf);
1469#if !defined(CONFIG_RTC_DRV_DS1685) && !defined(CONFIG_RTC_DRV_DS1689)
1470DS1685_RTC_SYSFS_CTRL_REG_RO(bme);
1471#endif
1472
1473static struct attribute*
1474ds1685_rtc_sysfs_ctrl4a_attrs[] = {
1475 &dev_attr_vrt2.attr,
1476 &dev_attr_incr.attr,
1477 &dev_attr_pab.attr,
1478 &dev_attr_rf.attr,
1479 &dev_attr_wf.attr,
1480 &dev_attr_kf.attr,
1481#if !defined(CONFIG_RTC_DRV_DS1685) && !defined(CONFIG_RTC_DRV_DS1689)
1482 &dev_attr_bme.attr,
1483#endif
1484 NULL,
1485};
1486
1487static const struct attribute_group
1488ds1685_rtc_sysfs_ctrl4a_grp = {
1489 .name = "ctrl4a",
1490 .attrs = ds1685_rtc_sysfs_ctrl4a_attrs,
1491};
1492
1493/*
1494 * Control Register 4B bits.
1495 */
1496DS1685_RTC_SYSFS_CTRL_REG_RW(abe);
1497DS1685_RTC_SYSFS_CTRL_REG_RW(e32k);
1498DS1685_RTC_SYSFS_CTRL_REG_RO(cs);
1499DS1685_RTC_SYSFS_CTRL_REG_RW(rce);
1500DS1685_RTC_SYSFS_CTRL_REG_RW(prs);
1501DS1685_RTC_SYSFS_CTRL_REG_RW(rie);
1502DS1685_RTC_SYSFS_CTRL_REG_RW(wie);
1503DS1685_RTC_SYSFS_CTRL_REG_RW(kse);
1504
1505static struct attribute*
1506ds1685_rtc_sysfs_ctrl4b_attrs[] = {
1507 &dev_attr_abe.attr,
1508 &dev_attr_e32k.attr,
1509 &dev_attr_cs.attr,
1510 &dev_attr_rce.attr,
1511 &dev_attr_prs.attr,
1512 &dev_attr_rie.attr,
1513 &dev_attr_wie.attr,
1514 &dev_attr_kse.attr,
1515 NULL,
1516};
1517
1518static const struct attribute_group
1519ds1685_rtc_sysfs_ctrl4b_grp = {
1520 .name = "ctrl4b",
1521 .attrs = ds1685_rtc_sysfs_ctrl4b_attrs,
1522};
1523
1524
1525/**
1526 * struct ds1685_rtc_ctrl_regs.
1527 * @name: char pointer for the bit name.
1528 * @reg: control register the bit is in.
1529 * @bit: the bit's offset in the register.
1530 */
1531struct ds1685_rtc_time_regs {
1532 const char *name;
1533 const u8 reg;
1534 const u8 mask;
1535 const u8 min;
1536 const u8 max;
1537};
1538
1539/*
1540 * Time/Date register lookup tables.
1541 */
1542static const struct ds1685_rtc_time_regs
1543ds1685_time_regs_bcd_table[] = {
1544 { "seconds", RTC_SECS, RTC_SECS_BCD_MASK, 0, 59 },
1545 { "minutes", RTC_MINS, RTC_MINS_BCD_MASK, 0, 59 },
1546 { "hours", RTC_HRS, RTC_HRS_24_BCD_MASK, 0, 23 },
1547 { "wday", RTC_WDAY, RTC_WDAY_MASK, 1, 7 },
1548 { "mday", RTC_MDAY, RTC_MDAY_BCD_MASK, 1, 31 },
1549 { "month", RTC_MONTH, RTC_MONTH_BCD_MASK, 1, 12 },
1550 { "year", RTC_YEAR, RTC_YEAR_BCD_MASK, 0, 99 },
1551 { "century", RTC_CENTURY, RTC_CENTURY_MASK, 0, 99 },
1552 { "alarm_seconds", RTC_SECS_ALARM, RTC_SECS_BCD_MASK, 0, 59 },
1553 { "alarm_minutes", RTC_MINS_ALARM, RTC_MINS_BCD_MASK, 0, 59 },
1554 { "alarm_hours", RTC_HRS_ALARM, RTC_HRS_24_BCD_MASK, 0, 23 },
1555 { "alarm_mday", RTC_MDAY_ALARM, RTC_MDAY_ALARM_MASK, 1, 31 },
1556 { NULL, 0, 0, 0, 0 },
1557};
1558
1559static const struct ds1685_rtc_time_regs
1560ds1685_time_regs_bin_table[] = {
1561 { "seconds", RTC_SECS, RTC_SECS_BIN_MASK, 0x00, 0x3b },
1562 { "minutes", RTC_MINS, RTC_MINS_BIN_MASK, 0x00, 0x3b },
1563 { "hours", RTC_HRS, RTC_HRS_24_BIN_MASK, 0x00, 0x17 },
1564 { "wday", RTC_WDAY, RTC_WDAY_MASK, 0x01, 0x07 },
1565 { "mday", RTC_MDAY, RTC_MDAY_BIN_MASK, 0x01, 0x1f },
1566 { "month", RTC_MONTH, RTC_MONTH_BIN_MASK, 0x01, 0x0c },
1567 { "year", RTC_YEAR, RTC_YEAR_BIN_MASK, 0x00, 0x63 },
1568 { "century", RTC_CENTURY, RTC_CENTURY_MASK, 0x00, 0x63 },
1569 { "alarm_seconds", RTC_SECS_ALARM, RTC_SECS_BIN_MASK, 0x00, 0x3b },
1570 { "alarm_minutes", RTC_MINS_ALARM, RTC_MINS_BIN_MASK, 0x00, 0x3b },
1571 { "alarm_hours", RTC_HRS_ALARM, RTC_HRS_24_BIN_MASK, 0x00, 0x17 },
1572 { "alarm_mday", RTC_MDAY_ALARM, RTC_MDAY_ALARM_MASK, 0x01, 0x1f },
1573 { NULL, 0, 0, 0x00, 0x00 },
1574};
1575
1576/**
1577 * ds1685_rtc_sysfs_time_regs_bcd_lookup - time/date reg bit lookup function.
1578 * @name: register bit to look up in ds1685_time_regs_bcd_table.
1579 */
1580static const struct ds1685_rtc_time_regs*
1581ds1685_rtc_sysfs_time_regs_lookup(const char *name, bool bcd_mode)
1582{
1583 const struct ds1685_rtc_time_regs *p;
1584
1585 if (bcd_mode)
1586 p = ds1685_time_regs_bcd_table;
1587 else
1588 p = ds1685_time_regs_bin_table;
1589
1590 for (; p->name != NULL; ++p)
1591 if (strcmp(p->name, name) == 0)
1592 return p;
1593
1594 return NULL;
1595}
1596
1597/**
1598 * ds1685_rtc_sysfs_time_regs_show - reads a time/date register via sysfs.
1599 * @dev: pointer to device structure.
1600 * @attr: pointer to device_attribute structure.
1601 * @buf: pointer to char array to hold the output.
1602 */
1603static ssize_t
1604ds1685_rtc_sysfs_time_regs_show(struct device *dev,
1605 struct device_attribute *attr, char *buf)
1606{
1607 u8 tmp;
1608 struct ds1685_priv *rtc = dev_get_drvdata(dev);
1609 const struct ds1685_rtc_time_regs *bcd_reg_info =
1610 ds1685_rtc_sysfs_time_regs_lookup(attr->attr.name, true);
1611 const struct ds1685_rtc_time_regs *bin_reg_info =
1612 ds1685_rtc_sysfs_time_regs_lookup(attr->attr.name, false);
1613
1614 /* Make sure we actually matched something. */
1615 if (!bcd_reg_info && !bin_reg_info)
1616 return -EINVAL;
1617
1618 /* bcd_reg_info->reg == bin_reg_info->reg. */
1619 ds1685_rtc_begin_data_access(rtc);
1620 tmp = rtc->read(rtc, bcd_reg_info->reg);
1621 ds1685_rtc_end_data_access(rtc);
1622
1623 tmp = ds1685_rtc_bcd2bin(rtc, tmp, bcd_reg_info->mask,
1624 bin_reg_info->mask);
1625
1626 return snprintf(buf, 4, "%d\n", tmp);
1627}
1628
1629/**
1630 * ds1685_rtc_sysfs_time_regs_store - writes a time/date register via sysfs.
1631 * @dev: pointer to device structure.
1632 * @attr: pointer to device_attribute structure.
1633 * @buf: pointer to char array to hold the output.
1634 * @count: number of bytes written.
1635 */
1636static ssize_t
1637ds1685_rtc_sysfs_time_regs_store(struct device *dev,
1638 struct device_attribute *attr,
1639 const char *buf, size_t count)
1640{
1641 long int val = 0;
1642 struct ds1685_priv *rtc = dev_get_drvdata(dev);
1643 const struct ds1685_rtc_time_regs *bcd_reg_info =
1644 ds1685_rtc_sysfs_time_regs_lookup(attr->attr.name, true);
1645 const struct ds1685_rtc_time_regs *bin_reg_info =
1646 ds1685_rtc_sysfs_time_regs_lookup(attr->attr.name, false);
1647
1648 /* We only accept numbers. */
1649 if (kstrtol(buf, 10, &val) < 0)
1650 return -EINVAL;
1651
1652 /* Make sure we actually matched something. */
1653 if (!bcd_reg_info && !bin_reg_info)
1654 return -EINVAL;
1655
1656 /* Check for a valid range. */
1657 if (rtc->bcd_mode) {
1658 if ((val < bcd_reg_info->min) || (val > bcd_reg_info->max))
1659 return -ERANGE;
1660 } else {
1661 if ((val < bin_reg_info->min) || (val > bin_reg_info->max))
1662 return -ERANGE;
1663 }
1664
1665 val = ds1685_rtc_bin2bcd(rtc, val, bin_reg_info->mask,
1666 bcd_reg_info->mask);
1667
1668 /* bcd_reg_info->reg == bin_reg_info->reg. */
1669 ds1685_rtc_begin_data_access(rtc);
1670 rtc->write(rtc, bcd_reg_info->reg, val);
1671 ds1685_rtc_end_data_access(rtc);
1672
1673 return count;
1674}
1675
1676/**
1677 * DS1685_RTC_SYSFS_REG_RW - device_attribute for a read-write time register.
1678 * @reg: time/date register to read or write.
1679 */
1680#define DS1685_RTC_SYSFS_TIME_REG_RW(reg) \
1681 static DEVICE_ATTR(reg, S_IRUGO | S_IWUSR, \
1682 ds1685_rtc_sysfs_time_regs_show, \
1683 ds1685_rtc_sysfs_time_regs_store)
1684
1685/*
1686 * Time/Date Register bits.
1687 */
1688DS1685_RTC_SYSFS_TIME_REG_RW(seconds);
1689DS1685_RTC_SYSFS_TIME_REG_RW(minutes);
1690DS1685_RTC_SYSFS_TIME_REG_RW(hours);
1691DS1685_RTC_SYSFS_TIME_REG_RW(wday);
1692DS1685_RTC_SYSFS_TIME_REG_RW(mday);
1693DS1685_RTC_SYSFS_TIME_REG_RW(month);
1694DS1685_RTC_SYSFS_TIME_REG_RW(year);
1695DS1685_RTC_SYSFS_TIME_REG_RW(century);
1696DS1685_RTC_SYSFS_TIME_REG_RW(alarm_seconds);
1697DS1685_RTC_SYSFS_TIME_REG_RW(alarm_minutes);
1698DS1685_RTC_SYSFS_TIME_REG_RW(alarm_hours);
1699DS1685_RTC_SYSFS_TIME_REG_RW(alarm_mday);
1700
1701static struct attribute*
1702ds1685_rtc_sysfs_time_attrs[] = {
1703 &dev_attr_seconds.attr,
1704 &dev_attr_minutes.attr,
1705 &dev_attr_hours.attr,
1706 &dev_attr_wday.attr,
1707 &dev_attr_mday.attr,
1708 &dev_attr_month.attr,
1709 &dev_attr_year.attr,
1710 &dev_attr_century.attr,
1711 NULL,
1712};
1713
1714static const struct attribute_group
1715ds1685_rtc_sysfs_time_grp = {
1716 .name = "datetime",
1717 .attrs = ds1685_rtc_sysfs_time_attrs,
1718};
1719
1720static struct attribute*
1721ds1685_rtc_sysfs_alarm_attrs[] = {
1722 &dev_attr_alarm_seconds.attr,
1723 &dev_attr_alarm_minutes.attr,
1724 &dev_attr_alarm_hours.attr,
1725 &dev_attr_alarm_mday.attr,
1726 NULL,
1727};
1728
1729static const struct attribute_group
1730ds1685_rtc_sysfs_alarm_grp = {
1731 .name = "alarm",
1732 .attrs = ds1685_rtc_sysfs_alarm_attrs,
1733};
1734#endif /* CONFIG_RTC_DS1685_SYSFS_REGS */
1735
1736
1737/**
1738 * ds1685_rtc_sysfs_register - register sysfs files.
1739 * @dev: pointer to device structure.
1740 */
1741static int
1742ds1685_rtc_sysfs_register(struct device *dev)
1743{
1744 int ret = 0;
1745
1746 sysfs_bin_attr_init(&ds1685_rtc_sysfs_nvram_attr);
1747 ret = sysfs_create_bin_file(&dev->kobj, &ds1685_rtc_sysfs_nvram_attr);
1748 if (ret)
1749 return ret;
1750
1751 ret = sysfs_create_group(&dev->kobj, &ds1685_rtc_sysfs_misc_grp);
1752 if (ret)
1753 return ret;
1754
1755#ifdef CONFIG_RTC_DS1685_SYSFS_REGS
1756 ret = sysfs_create_group(&dev->kobj, &ds1685_rtc_sysfs_ctrla_grp);
1757 if (ret)
1758 return ret;
1759
1760 ret = sysfs_create_group(&dev->kobj, &ds1685_rtc_sysfs_ctrlb_grp);
1761 if (ret)
1762 return ret;
1763
1764 ret = sysfs_create_group(&dev->kobj, &ds1685_rtc_sysfs_ctrlc_grp);
1765 if (ret)
1766 return ret;
1767
1768 ret = sysfs_create_group(&dev->kobj, &ds1685_rtc_sysfs_ctrld_grp);
1769 if (ret)
1770 return ret;
1771
1772 ret = sysfs_create_group(&dev->kobj, &ds1685_rtc_sysfs_ctrl4a_grp);
1773 if (ret)
1774 return ret;
1775
1776 ret = sysfs_create_group(&dev->kobj, &ds1685_rtc_sysfs_ctrl4b_grp);
1777 if (ret)
1778 return ret;
1779
1780 ret = sysfs_create_group(&dev->kobj, &ds1685_rtc_sysfs_time_grp);
1781 if (ret)
1782 return ret;
1783
1784 ret = sysfs_create_group(&dev->kobj, &ds1685_rtc_sysfs_alarm_grp);
1785 if (ret)
1786 return ret;
1787#endif
1788 return 0;
1789}
1790
1791/**
1792 * ds1685_rtc_sysfs_unregister - unregister sysfs files.
1793 * @dev: pointer to device structure.
1794 */
1795static int
1796ds1685_rtc_sysfs_unregister(struct device *dev)
1797{
1798 sysfs_remove_bin_file(&dev->kobj, &ds1685_rtc_sysfs_nvram_attr);
1799 sysfs_remove_group(&dev->kobj, &ds1685_rtc_sysfs_misc_grp);
1800
1801#ifdef CONFIG_RTC_DS1685_SYSFS_REGS
1802 sysfs_remove_group(&dev->kobj, &ds1685_rtc_sysfs_ctrla_grp);
1803 sysfs_remove_group(&dev->kobj, &ds1685_rtc_sysfs_ctrlb_grp);
1804 sysfs_remove_group(&dev->kobj, &ds1685_rtc_sysfs_ctrlc_grp);
1805 sysfs_remove_group(&dev->kobj, &ds1685_rtc_sysfs_ctrld_grp);
1806 sysfs_remove_group(&dev->kobj, &ds1685_rtc_sysfs_ctrl4a_grp);
1807 sysfs_remove_group(&dev->kobj, &ds1685_rtc_sysfs_ctrl4b_grp);
1808 sysfs_remove_group(&dev->kobj, &ds1685_rtc_sysfs_time_grp);
1809 sysfs_remove_group(&dev->kobj, &ds1685_rtc_sysfs_alarm_grp);
1810#endif
1811
1812 return 0;
1813}
1814#endif /* CONFIG_SYSFS */
1815
1816
1817
1818/* ----------------------------------------------------------------------- */
1819/* Driver Probe/Removal */
1820
1821/**
1822 * ds1685_rtc_probe - initializes rtc driver.
1823 * @pdev: pointer to platform_device structure.
1824 */
1825static int
1826ds1685_rtc_probe(struct platform_device *pdev)
1827{
1828 struct rtc_device *rtc_dev;
1829 struct resource *res;
1830 struct ds1685_priv *rtc;
1831 struct ds1685_rtc_platform_data *pdata;
1832 u8 ctrla, ctrlb, hours;
1833 unsigned char am_pm;
1834 int ret = 0;
1835
1836 /* Get the platform data. */
1837 pdata = (struct ds1685_rtc_platform_data *) pdev->dev.platform_data;
1838 if (!pdata)
1839 return -ENODEV;
1840
1841 /* Allocate memory for the rtc device. */
1842 rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
1843 if (!rtc)
1844 return -ENOMEM;
1845
1846 /*
1847 * Allocate/setup any IORESOURCE_MEM resources, if required. Not all
1848 * platforms put the RTC in an easy-access place. Like the SGI Octane,
1849 * which attaches the RTC to a "ByteBus", hooked to a SuperIO chip
1850 * that sits behind the IOC3 PCI metadevice.
1851 */
1852 if (pdata->alloc_io_resources) {
1853 /* Get the platform resources. */
1854 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1855 if (!res)
1856 return -ENXIO;
1857 rtc->size = resource_size(res);
1858
1859 /* Request a memory region. */
1860 /* XXX: mmio-only for now. */
1861 if (!devm_request_mem_region(&pdev->dev, res->start, rtc->size,
1862 pdev->name))
1863 return -EBUSY;
1864
1865 /*
1866 * Set the base address for the rtc, and ioremap its
1867 * registers.
1868 */
1869 rtc->baseaddr = res->start;
1870 rtc->regs = devm_ioremap(&pdev->dev, res->start, rtc->size);
1871 if (!rtc->regs)
1872 return -ENOMEM;
1873 }
1874 rtc->alloc_io_resources = pdata->alloc_io_resources;
1875
1876 /* Get the register step size. */
1877 if (pdata->regstep > 0)
1878 rtc->regstep = pdata->regstep;
1879 else
1880 rtc->regstep = 1;
1881
1882 /* Platform read function, else default if mmio setup */
1883 if (pdata->plat_read)
1884 rtc->read = pdata->plat_read;
1885 else
1886 if (pdata->alloc_io_resources)
1887 rtc->read = ds1685_read;
1888 else
1889 return -ENXIO;
1890
1891 /* Platform write function, else default if mmio setup */
1892 if (pdata->plat_write)
1893 rtc->write = pdata->plat_write;
1894 else
1895 if (pdata->alloc_io_resources)
1896 rtc->write = ds1685_write;
1897 else
1898 return -ENXIO;
1899
1900 /* Platform pre-shutdown function, if defined. */
1901 if (pdata->plat_prepare_poweroff)
1902 rtc->prepare_poweroff = pdata->plat_prepare_poweroff;
1903
1904 /* Platform wake_alarm function, if defined. */
1905 if (pdata->plat_wake_alarm)
1906 rtc->wake_alarm = pdata->plat_wake_alarm;
1907
1908 /* Platform post_ram_clear function, if defined. */
1909 if (pdata->plat_post_ram_clear)
1910 rtc->post_ram_clear = pdata->plat_post_ram_clear;
1911
1912 /* Init the spinlock, workqueue, & set the driver data. */
1913 spin_lock_init(&rtc->lock);
1914 INIT_WORK(&rtc->work, ds1685_rtc_work_queue);
1915 platform_set_drvdata(pdev, rtc);
1916
1917 /* Turn the oscillator on if is not already on (DV1 = 1). */
1918 ctrla = rtc->read(rtc, RTC_CTRL_A);
1919 if (!(ctrla & RTC_CTRL_A_DV1))
1920 ctrla |= RTC_CTRL_A_DV1;
1921
1922 /* Enable the countdown chain (DV2 = 0) */
1923 ctrla &= ~(RTC_CTRL_A_DV2);
1924
1925 /* Clear RS3-RS0 in Control A. */
1926 ctrla &= ~(RTC_CTRL_A_RS_MASK);
1927
1928 /*
1929 * All done with Control A. Switch to Bank 1 for the remainder of
1930 * the RTC setup so we have access to the extended functions.
1931 */
1932 ctrla |= RTC_CTRL_A_DV0;
1933 rtc->write(rtc, RTC_CTRL_A, ctrla);
1934
1935 /* Default to 32768kHz output. */
1936 rtc->write(rtc, RTC_EXT_CTRL_4B,
1937 (rtc->read(rtc, RTC_EXT_CTRL_4B) | RTC_CTRL_4B_E32K));
1938
1939 /* Set the SET bit in Control B so we can do some housekeeping. */
1940 rtc->write(rtc, RTC_CTRL_B,
1941 (rtc->read(rtc, RTC_CTRL_B) | RTC_CTRL_B_SET));
1942
1943 /* Read Ext Ctrl 4A and check the INCR bit to avoid a lockout. */
1944 while (rtc->read(rtc, RTC_EXT_CTRL_4A) & RTC_CTRL_4A_INCR)
1945 cpu_relax();
1946
1947 /*
1948 * If the platform supports BCD mode, then set DM=0 in Control B.
1949 * Otherwise, set DM=1 for BIN mode.
1950 */
1951 ctrlb = rtc->read(rtc, RTC_CTRL_B);
1952 if (pdata->bcd_mode)
1953 ctrlb &= ~(RTC_CTRL_B_DM);
1954 else
1955 ctrlb |= RTC_CTRL_B_DM;
1956 rtc->bcd_mode = pdata->bcd_mode;
1957
1958 /*
1959 * Disable Daylight Savings Time (DSE = 0).
1960 * The RTC has hardcoded timezone information that is rendered
1961 * obselete. We'll let the OS deal with DST settings instead.
1962 */
1963 if (ctrlb & RTC_CTRL_B_DSE)
1964 ctrlb &= ~(RTC_CTRL_B_DSE);
1965
1966 /* Force 24-hour mode (2412 = 1). */
1967 if (!(ctrlb & RTC_CTRL_B_2412)) {
1968 /* Reinitialize the time hours. */
1969 hours = rtc->read(rtc, RTC_HRS);
1970 am_pm = hours & RTC_HRS_AMPM_MASK;
1971 hours = ds1685_rtc_bcd2bin(rtc, hours, RTC_HRS_12_BCD_MASK,
1972 RTC_HRS_12_BIN_MASK);
1973 hours = ((hours == 12) ? 0 : ((am_pm) ? hours + 12 : hours));
1974
1975 /* Enable 24-hour mode. */
1976 ctrlb |= RTC_CTRL_B_2412;
1977
1978 /* Write back to Control B, including DM & DSE bits. */
1979 rtc->write(rtc, RTC_CTRL_B, ctrlb);
1980
1981 /* Write the time hours back. */
1982 rtc->write(rtc, RTC_HRS,
1983 ds1685_rtc_bin2bcd(rtc, hours,
1984 RTC_HRS_24_BIN_MASK,
1985 RTC_HRS_24_BCD_MASK));
1986
1987 /* Reinitialize the alarm hours. */
1988 hours = rtc->read(rtc, RTC_HRS_ALARM);
1989 am_pm = hours & RTC_HRS_AMPM_MASK;
1990 hours = ds1685_rtc_bcd2bin(rtc, hours, RTC_HRS_12_BCD_MASK,
1991 RTC_HRS_12_BIN_MASK);
1992 hours = ((hours == 12) ? 0 : ((am_pm) ? hours + 12 : hours));
1993
1994 /* Write the alarm hours back. */
1995 rtc->write(rtc, RTC_HRS_ALARM,
1996 ds1685_rtc_bin2bcd(rtc, hours,
1997 RTC_HRS_24_BIN_MASK,
1998 RTC_HRS_24_BCD_MASK));
1999 } else {
2000 /* 24-hour mode is already set, so write Control B back. */
2001 rtc->write(rtc, RTC_CTRL_B, ctrlb);
2002 }
2003
2004 /* Unset the SET bit in Control B so the RTC can update. */
2005 rtc->write(rtc, RTC_CTRL_B,
2006 (rtc->read(rtc, RTC_CTRL_B) & ~(RTC_CTRL_B_SET)));
2007
2008 /* Check the main battery. */
2009 if (!(rtc->read(rtc, RTC_CTRL_D) & RTC_CTRL_D_VRT))
2010 dev_warn(&pdev->dev,
2011 "Main battery is exhausted! RTC may be invalid!\n");
2012
2013 /* Check the auxillary battery. It is optional. */
2014 if (!(rtc->read(rtc, RTC_EXT_CTRL_4A) & RTC_CTRL_4A_VRT2))
2015 dev_warn(&pdev->dev,
2016 "Aux battery is exhausted or not available.\n");
2017
2018 /* Read Ctrl B and clear PIE/AIE/UIE. */
2019 rtc->write(rtc, RTC_CTRL_B,
2020 (rtc->read(rtc, RTC_CTRL_B) & ~(RTC_CTRL_B_PAU_MASK)));
2021
2022 /* Reading Ctrl C auto-clears PF/AF/UF. */
2023 rtc->read(rtc, RTC_CTRL_C);
2024
2025 /* Read Ctrl 4B and clear RIE/WIE/KSE. */
2026 rtc->write(rtc, RTC_EXT_CTRL_4B,
2027 (rtc->read(rtc, RTC_EXT_CTRL_4B) & ~(RTC_CTRL_4B_RWK_MASK)));
2028
2029 /* Clear RF/WF/KF in Ctrl 4A. */
2030 rtc->write(rtc, RTC_EXT_CTRL_4A,
2031 (rtc->read(rtc, RTC_EXT_CTRL_4A) & ~(RTC_CTRL_4A_RWK_MASK)));
2032
2033 /*
2034 * Re-enable KSE to handle power button events. We do not enable
2035 * WIE or RIE by default.
2036 */
2037 rtc->write(rtc, RTC_EXT_CTRL_4B,
2038 (rtc->read(rtc, RTC_EXT_CTRL_4B) | RTC_CTRL_4B_KSE));
2039
2040 /*
2041 * Fetch the IRQ and setup the interrupt handler.
2042 *
2043 * Not all platforms have the IRQF pin tied to something. If not, the
2044 * RTC will still set the *IE / *F flags and raise IRQF in ctrlc, but
2045 * there won't be an automatic way of notifying the kernel about it,
2046 * unless ctrlc is explicitly polled.
2047 */
2048 if (!pdata->no_irq) {
2049 ret = platform_get_irq(pdev, 0);
2050 if (ret > 0) {
2051 rtc->irq_num = ret;
2052
2053 /* Request an IRQ. */
2054 ret = devm_request_irq(&pdev->dev, rtc->irq_num,
2055 ds1685_rtc_irq_handler,
2056 IRQF_SHARED, pdev->name, pdev);
2057
2058 /* Check to see if something came back. */
2059 if (unlikely(ret)) {
2060 dev_warn(&pdev->dev,
2061 "RTC interrupt not available\n");
2062 rtc->irq_num = 0;
2063 }
2064 } else
2065 return ret;
2066 }
2067 rtc->no_irq = pdata->no_irq;
2068
2069 /* Setup complete. */
2070 ds1685_rtc_switch_to_bank0(rtc);
2071
2072 /* Register the device as an RTC. */
2073 rtc_dev = rtc_device_register(pdev->name, &pdev->dev,
2074 &ds1685_rtc_ops, THIS_MODULE);
2075
2076 /* Success? */
2077 if (IS_ERR(rtc_dev))
2078 return PTR_ERR(rtc_dev);
2079
2080 /* Maximum periodic rate is 8192Hz (0.122070ms). */
2081 rtc_dev->max_user_freq = RTC_MAX_USER_FREQ;
2082
2083 /* See if the platform doesn't support UIE. */
2084 if (pdata->uie_unsupported)
2085 rtc_dev->uie_unsupported = 1;
2086 rtc->uie_unsupported = pdata->uie_unsupported;
2087
2088 rtc->dev = rtc_dev;
2089
2090#ifdef CONFIG_SYSFS
2091 ret = ds1685_rtc_sysfs_register(&pdev->dev);
2092 if (ret)
2093 rtc_device_unregister(rtc->dev);
2094#endif
2095
2096 /* Done! */
2097 return ret;
2098}
2099
2100/**
2101 * ds1685_rtc_remove - removes rtc driver.
2102 * @pdev: pointer to platform_device structure.
2103 */
2104static int
2105ds1685_rtc_remove(struct platform_device *pdev)
2106{
2107 struct ds1685_priv *rtc = platform_get_drvdata(pdev);
2108
2109#ifdef CONFIG_SYSFS
2110 ds1685_rtc_sysfs_unregister(&pdev->dev);
2111#endif
2112
2113 rtc_device_unregister(rtc->dev);
2114
2115 /* Read Ctrl B and clear PIE/AIE/UIE. */
2116 rtc->write(rtc, RTC_CTRL_B,
2117 (rtc->read(rtc, RTC_CTRL_B) &
2118 ~(RTC_CTRL_B_PAU_MASK)));
2119
2120 /* Reading Ctrl C auto-clears PF/AF/UF. */
2121 rtc->read(rtc, RTC_CTRL_C);
2122
2123 /* Read Ctrl 4B and clear RIE/WIE/KSE. */
2124 rtc->write(rtc, RTC_EXT_CTRL_4B,
2125 (rtc->read(rtc, RTC_EXT_CTRL_4B) &
2126 ~(RTC_CTRL_4B_RWK_MASK)));
2127
2128 /* Manually clear RF/WF/KF in Ctrl 4A. */
2129 rtc->write(rtc, RTC_EXT_CTRL_4A,
2130 (rtc->read(rtc, RTC_EXT_CTRL_4A) &
2131 ~(RTC_CTRL_4A_RWK_MASK)));
2132
2133 cancel_work_sync(&rtc->work);
2134
2135 return 0;
2136}
2137
2138/**
2139 * ds1685_rtc_driver - rtc driver properties.
2140 */
2141static struct platform_driver ds1685_rtc_driver = {
2142 .driver = {
2143 .name = "rtc-ds1685",
2144 .owner = THIS_MODULE,
2145 },
2146 .probe = ds1685_rtc_probe,
2147 .remove = ds1685_rtc_remove,
2148};
2149
2150/**
2151 * ds1685_rtc_init - rtc module init.
2152 */
2153static int __init
2154ds1685_rtc_init(void)
2155{
2156 return platform_driver_register(&ds1685_rtc_driver);
2157}
2158
2159/**
2160 * ds1685_rtc_exit - rtc module exit.
2161 */
2162static void __exit
2163ds1685_rtc_exit(void)
2164{
2165 platform_driver_unregister(&ds1685_rtc_driver);
2166}
2167
2168module_init(ds1685_rtc_init);
2169module_exit(ds1685_rtc_exit);
2170/* ----------------------------------------------------------------------- */
2171
2172
2173/* ----------------------------------------------------------------------- */
2174/* Poweroff function */
2175
2176/**
2177 * ds1685_rtc_poweroff - uses the RTC chip to power the system off.
2178 * @pdev: pointer to platform_device structure.
2179 */
2180extern void __noreturn
2181ds1685_rtc_poweroff(struct platform_device *pdev)
2182{
2183 u8 ctrla, ctrl4a, ctrl4b;
2184 struct ds1685_priv *rtc;
2185
2186 /* Check for valid RTC data, else, spin forever. */
2187 if (unlikely(!pdev)) {
2188 pr_emerg("rtc-ds1685: platform device data not available, spinning forever ...\n");
2189 unreachable();
2190 } else {
2191 /* Get the rtc data. */
2192 rtc = platform_get_drvdata(pdev);
2193
2194 /*
2195 * Disable our IRQ. We're powering down, so we're not
2196 * going to worry about cleaning up. Most of that should
2197 * have been taken care of by the shutdown scripts and this
2198 * is the final function call.
2199 */
2200 if (!rtc->no_irq)
2201 disable_irq_nosync(rtc->irq_num);
2202
2203 /* Oscillator must be on and the countdown chain enabled. */
2204 ctrla = rtc->read(rtc, RTC_CTRL_A);
2205 ctrla |= RTC_CTRL_A_DV1;
2206 ctrla &= ~(RTC_CTRL_A_DV2);
2207 rtc->write(rtc, RTC_CTRL_A, ctrla);
2208
2209 /*
2210 * Read Control 4A and check the status of the auxillary
2211 * battery. This must be present and working (VRT2 = 1)
2212 * for wakeup and kickstart functionality to be useful.
2213 */
2214 ds1685_rtc_switch_to_bank1(rtc);
2215 ctrl4a = rtc->read(rtc, RTC_EXT_CTRL_4A);
2216 if (ctrl4a & RTC_CTRL_4A_VRT2) {
2217 /* Clear all of the interrupt flags on Control 4A. */
2218 ctrl4a &= ~(RTC_CTRL_4A_RWK_MASK);
2219 rtc->write(rtc, RTC_EXT_CTRL_4A, ctrl4a);
2220
2221 /*
2222 * The auxillary battery is present and working.
2223 * Enable extended functions (ABE=1), enable
2224 * wake-up (WIE=1), and enable kickstart (KSE=1)
2225 * in Control 4B.
2226 */
2227 ctrl4b = rtc->read(rtc, RTC_EXT_CTRL_4B);
2228 ctrl4b |= (RTC_CTRL_4B_ABE | RTC_CTRL_4B_WIE |
2229 RTC_CTRL_4B_KSE);
2230 rtc->write(rtc, RTC_EXT_CTRL_4B, ctrl4b);
2231 }
2232
2233 /* Set PAB to 1 in Control 4A to power the system down. */
2234 dev_warn(&pdev->dev, "Powerdown.\n");
2235 msleep(20);
2236 rtc->write(rtc, RTC_EXT_CTRL_4A,
2237 (ctrl4a | RTC_CTRL_4A_PAB));
2238
2239 /* Spin ... we do not switch back to bank0. */
2240 unreachable();
2241 }
2242}
2243EXPORT_SYMBOL(ds1685_rtc_poweroff);
2244/* ----------------------------------------------------------------------- */
2245
2246
2247MODULE_AUTHOR("Joshua Kinard <kumba@gentoo.org>");
2248MODULE_AUTHOR("Matthias Fuchs <matthias.fuchs@esd-electronics.com>");
2249MODULE_DESCRIPTION("Dallas/Maxim DS1685/DS1687-series RTC driver");
2250MODULE_LICENSE("GPL");
2251MODULE_VERSION(DRV_VERSION);
2252MODULE_ALIAS("platform:rtc-ds1685");
diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c
index ee3ba7e6b45e..f9b082784b90 100644
--- a/drivers/rtc/rtc-isl12022.c
+++ b/drivers/rtc/rtc-isl12022.c
@@ -275,7 +275,8 @@ static int isl12022_probe(struct i2c_client *client,
275 275
276#ifdef CONFIG_OF 276#ifdef CONFIG_OF
277static const struct of_device_id isl12022_dt_match[] = { 277static const struct of_device_id isl12022_dt_match[] = {
278 { .compatible = "isl,isl12022" }, 278 { .compatible = "isl,isl12022" }, /* for backward compat., don't use */
279 { .compatible = "isil,isl12022" },
279 { }, 280 { },
280}; 281};
281#endif 282#endif
diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c
index b8f862953f7f..da818d3337ce 100644
--- a/drivers/rtc/rtc-isl12057.c
+++ b/drivers/rtc/rtc-isl12057.c
@@ -644,7 +644,8 @@ static SIMPLE_DEV_PM_OPS(isl12057_rtc_pm_ops, isl12057_rtc_suspend,
644 644
645#ifdef CONFIG_OF 645#ifdef CONFIG_OF
646static const struct of_device_id isl12057_dt_match[] = { 646static const struct of_device_id isl12057_dt_match[] = {
647 { .compatible = "isl,isl12057" }, 647 { .compatible = "isl,isl12057" }, /* for backward compat., don't use */
648 { .compatible = "isil,isl12057" },
648 { }, 649 { },
649}; 650};
650#endif 651#endif
diff --git a/drivers/staging/iio/light/isl29028.c b/drivers/staging/iio/light/isl29028.c
index e969107ddb47..6440e3b293ca 100644
--- a/drivers/staging/iio/light/isl29028.c
+++ b/drivers/staging/iio/light/isl29028.c
@@ -537,8 +537,8 @@ static const struct i2c_device_id isl29028_id[] = {
537MODULE_DEVICE_TABLE(i2c, isl29028_id); 537MODULE_DEVICE_TABLE(i2c, isl29028_id);
538 538
539static const struct of_device_id isl29028_of_match[] = { 539static const struct of_device_id isl29028_of_match[] = {
540 { .compatible = "isl,isl29028", }, 540 { .compatible = "isl,isl29028", }, /* for backward compat., don't use */
541 { .compatible = "isil,isl29028", },/* deprecated, don't use */ 541 { .compatible = "isil,isl29028", },
542 { }, 542 { },
543}; 543};
544MODULE_DEVICE_TABLE(of, isl29028_of_match); 544MODULE_DEVICE_TABLE(of, isl29028_of_match);
diff --git a/fs/Kconfig b/fs/Kconfig
index a6bb530b1ec5..ec35851e5b71 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -13,13 +13,6 @@ if BLOCK
13source "fs/ext2/Kconfig" 13source "fs/ext2/Kconfig"
14source "fs/ext3/Kconfig" 14source "fs/ext3/Kconfig"
15source "fs/ext4/Kconfig" 15source "fs/ext4/Kconfig"
16
17config FS_XIP
18# execute in place
19 bool
20 depends on EXT2_FS_XIP
21 default y
22
23source "fs/jbd/Kconfig" 16source "fs/jbd/Kconfig"
24source "fs/jbd2/Kconfig" 17source "fs/jbd2/Kconfig"
25 18
@@ -40,6 +33,21 @@ source "fs/ocfs2/Kconfig"
40source "fs/btrfs/Kconfig" 33source "fs/btrfs/Kconfig"
41source "fs/nilfs2/Kconfig" 34source "fs/nilfs2/Kconfig"
42 35
36config FS_DAX
37 bool "Direct Access (DAX) support"
38 depends on MMU
39 depends on !(ARM || MIPS || SPARC)
40 help
41 Direct Access (DAX) can be used on memory-backed block devices.
42 If the block device supports DAX and the filesystem supports DAX,
43 then you can avoid using the pagecache to buffer I/Os. Turning
44 on this option will compile in support for DAX; you will need to
45 mount the filesystem using the -o dax option.
46
47 If you do not have a block device that is capable of using this,
48 or if unsure, say N. Saying Y will increase the size of the kernel
49 by about 5kB.
50
43endif # BLOCK 51endif # BLOCK
44 52
45# Posix ACL utility routines 53# Posix ACL utility routines
diff --git a/fs/Makefile b/fs/Makefile
index bedff48e8fdc..0f4635f7c49c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_SIGNALFD) += signalfd.o
28obj-$(CONFIG_TIMERFD) += timerfd.o 28obj-$(CONFIG_TIMERFD) += timerfd.o
29obj-$(CONFIG_EVENTFD) += eventfd.o 29obj-$(CONFIG_EVENTFD) += eventfd.o
30obj-$(CONFIG_AIO) += aio.o 30obj-$(CONFIG_AIO) += aio.o
31obj-$(CONFIG_FS_DAX) += dax.o
31obj-$(CONFIG_FILE_LOCKING) += locks.o 32obj-$(CONFIG_FILE_LOCKING) += locks.o
32obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o 33obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
33obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o 34obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
diff --git a/fs/dax.c b/fs/dax.c
new file mode 100644
index 000000000000..ed1619ec6537
--- /dev/null
+++ b/fs/dax.c
@@ -0,0 +1,534 @@
1/*
2 * fs/dax.c - Direct Access filesystem code
3 * Copyright (c) 2013-2014 Intel Corporation
4 * Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
5 * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 */
16
17#include <linux/atomic.h>
18#include <linux/blkdev.h>
19#include <linux/buffer_head.h>
20#include <linux/fs.h>
21#include <linux/genhd.h>
22#include <linux/highmem.h>
23#include <linux/memcontrol.h>
24#include <linux/mm.h>
25#include <linux/mutex.h>
26#include <linux/sched.h>
27#include <linux/uio.h>
28#include <linux/vmstat.h>
29
30int dax_clear_blocks(struct inode *inode, sector_t block, long size)
31{
32 struct block_device *bdev = inode->i_sb->s_bdev;
33 sector_t sector = block << (inode->i_blkbits - 9);
34
35 might_sleep();
36 do {
37 void *addr;
38 unsigned long pfn;
39 long count;
40
41 count = bdev_direct_access(bdev, sector, &addr, &pfn, size);
42 if (count < 0)
43 return count;
44 BUG_ON(size < count);
45 while (count > 0) {
46 unsigned pgsz = PAGE_SIZE - offset_in_page(addr);
47 if (pgsz > count)
48 pgsz = count;
49 if (pgsz < PAGE_SIZE)
50 memset(addr, 0, pgsz);
51 else
52 clear_page(addr);
53 addr += pgsz;
54 size -= pgsz;
55 count -= pgsz;
56 BUG_ON(pgsz & 511);
57 sector += pgsz / 512;
58 cond_resched();
59 }
60 } while (size);
61
62 return 0;
63}
64EXPORT_SYMBOL_GPL(dax_clear_blocks);
65
66static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits)
67{
68 unsigned long pfn;
69 sector_t sector = bh->b_blocknr << (blkbits - 9);
70 return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size);
71}
72
73static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos,
74 loff_t end)
75{
76 loff_t final = end - pos + first; /* The final byte of the buffer */
77
78 if (first > 0)
79 memset(addr, 0, first);
80 if (final < size)
81 memset(addr + final, 0, size - final);
82}
83
84static bool buffer_written(struct buffer_head *bh)
85{
86 return buffer_mapped(bh) && !buffer_unwritten(bh);
87}
88
89/*
90 * When ext4 encounters a hole, it returns without modifying the buffer_head
91 * which means that we can't trust b_size. To cope with this, we set b_state
92 * to 0 before calling get_block and, if any bit is set, we know we can trust
93 * b_size. Unfortunate, really, since ext4 knows precisely how long a hole is
94 * and would save us time calling get_block repeatedly.
95 */
96static bool buffer_size_valid(struct buffer_head *bh)
97{
98 return bh->b_state != 0;
99}
100
101static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
102 loff_t start, loff_t end, get_block_t get_block,
103 struct buffer_head *bh)
104{
105 ssize_t retval = 0;
106 loff_t pos = start;
107 loff_t max = start;
108 loff_t bh_max = start;
109 void *addr;
110 bool hole = false;
111
112 if (rw != WRITE)
113 end = min(end, i_size_read(inode));
114
115 while (pos < end) {
116 unsigned len;
117 if (pos == max) {
118 unsigned blkbits = inode->i_blkbits;
119 sector_t block = pos >> blkbits;
120 unsigned first = pos - (block << blkbits);
121 long size;
122
123 if (pos == bh_max) {
124 bh->b_size = PAGE_ALIGN(end - pos);
125 bh->b_state = 0;
126 retval = get_block(inode, block, bh,
127 rw == WRITE);
128 if (retval)
129 break;
130 if (!buffer_size_valid(bh))
131 bh->b_size = 1 << blkbits;
132 bh_max = pos - first + bh->b_size;
133 } else {
134 unsigned done = bh->b_size -
135 (bh_max - (pos - first));
136 bh->b_blocknr += done >> blkbits;
137 bh->b_size -= done;
138 }
139
140 hole = (rw != WRITE) && !buffer_written(bh);
141 if (hole) {
142 addr = NULL;
143 size = bh->b_size - first;
144 } else {
145 retval = dax_get_addr(bh, &addr, blkbits);
146 if (retval < 0)
147 break;
148 if (buffer_unwritten(bh) || buffer_new(bh))
149 dax_new_buf(addr, retval, first, pos,
150 end);
151 addr += first;
152 size = retval - first;
153 }
154 max = min(pos + size, end);
155 }
156
157 if (rw == WRITE)
158 len = copy_from_iter(addr, max - pos, iter);
159 else if (!hole)
160 len = copy_to_iter(addr, max - pos, iter);
161 else
162 len = iov_iter_zero(max - pos, iter);
163
164 if (!len)
165 break;
166
167 pos += len;
168 addr += len;
169 }
170
171 return (pos == start) ? retval : pos - start;
172}
173
174/**
175 * dax_do_io - Perform I/O to a DAX file
176 * @rw: READ to read or WRITE to write
177 * @iocb: The control block for this I/O
178 * @inode: The file which the I/O is directed at
179 * @iter: The addresses to do I/O from or to
180 * @pos: The file offset where the I/O starts
181 * @get_block: The filesystem method used to translate file offsets to blocks
182 * @end_io: A filesystem callback for I/O completion
183 * @flags: See below
184 *
185 * This function uses the same locking scheme as do_blockdev_direct_IO:
186 * If @flags has DIO_LOCKING set, we assume that the i_mutex is held by the
187 * caller for writes. For reads, we take and release the i_mutex ourselves.
188 * If DIO_LOCKING is not set, the filesystem takes care of its own locking.
189 * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
190 * is in progress.
191 */
192ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
193 struct iov_iter *iter, loff_t pos,
194 get_block_t get_block, dio_iodone_t end_io, int flags)
195{
196 struct buffer_head bh;
197 ssize_t retval = -EINVAL;
198 loff_t end = pos + iov_iter_count(iter);
199
200 memset(&bh, 0, sizeof(bh));
201
202 if ((flags & DIO_LOCKING) && (rw == READ)) {
203 struct address_space *mapping = inode->i_mapping;
204 mutex_lock(&inode->i_mutex);
205 retval = filemap_write_and_wait_range(mapping, pos, end - 1);
206 if (retval) {
207 mutex_unlock(&inode->i_mutex);
208 goto out;
209 }
210 }
211
212 /* Protects against truncate */
213 atomic_inc(&inode->i_dio_count);
214
215 retval = dax_io(rw, inode, iter, pos, end, get_block, &bh);
216
217 if ((flags & DIO_LOCKING) && (rw == READ))
218 mutex_unlock(&inode->i_mutex);
219
220 if ((retval > 0) && end_io)
221 end_io(iocb, pos, retval, bh.b_private);
222
223 inode_dio_done(inode);
224 out:
225 return retval;
226}
227EXPORT_SYMBOL_GPL(dax_do_io);
228
229/*
230 * The user has performed a load from a hole in the file. Allocating
231 * a new page in the file would cause excessive storage usage for
232 * workloads with sparse files. We allocate a page cache page instead.
233 * We'll kick it out of the page cache if it's ever written to,
234 * otherwise it will simply fall out of the page cache under memory
235 * pressure without ever having been dirtied.
236 */
237static int dax_load_hole(struct address_space *mapping, struct page *page,
238 struct vm_fault *vmf)
239{
240 unsigned long size;
241 struct inode *inode = mapping->host;
242 if (!page)
243 page = find_or_create_page(mapping, vmf->pgoff,
244 GFP_KERNEL | __GFP_ZERO);
245 if (!page)
246 return VM_FAULT_OOM;
247 /* Recheck i_size under page lock to avoid truncate race */
248 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
249 if (vmf->pgoff >= size) {
250 unlock_page(page);
251 page_cache_release(page);
252 return VM_FAULT_SIGBUS;
253 }
254
255 vmf->page = page;
256 return VM_FAULT_LOCKED;
257}
258
259static int copy_user_bh(struct page *to, struct buffer_head *bh,
260 unsigned blkbits, unsigned long vaddr)
261{
262 void *vfrom, *vto;
263 if (dax_get_addr(bh, &vfrom, blkbits) < 0)
264 return -EIO;
265 vto = kmap_atomic(to);
266 copy_user_page(vto, vfrom, vaddr, to);
267 kunmap_atomic(vto);
268 return 0;
269}
270
271static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
272 struct vm_area_struct *vma, struct vm_fault *vmf)
273{
274 struct address_space *mapping = inode->i_mapping;
275 sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
276 unsigned long vaddr = (unsigned long)vmf->virtual_address;
277 void *addr;
278 unsigned long pfn;
279 pgoff_t size;
280 int error;
281
282 i_mmap_lock_read(mapping);
283
284 /*
285 * Check truncate didn't happen while we were allocating a block.
286 * If it did, this block may or may not be still allocated to the
287 * file. We can't tell the filesystem to free it because we can't
288 * take i_mutex here. In the worst case, the file still has blocks
289 * allocated past the end of the file.
290 */
291 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
292 if (unlikely(vmf->pgoff >= size)) {
293 error = -EIO;
294 goto out;
295 }
296
297 error = bdev_direct_access(bh->b_bdev, sector, &addr, &pfn, bh->b_size);
298 if (error < 0)
299 goto out;
300 if (error < PAGE_SIZE) {
301 error = -EIO;
302 goto out;
303 }
304
305 if (buffer_unwritten(bh) || buffer_new(bh))
306 clear_page(addr);
307
308 error = vm_insert_mixed(vma, vaddr, pfn);
309
310 out:
311 i_mmap_unlock_read(mapping);
312
313 if (bh->b_end_io)
314 bh->b_end_io(bh, 1);
315
316 return error;
317}
318
319static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
320 get_block_t get_block)
321{
322 struct file *file = vma->vm_file;
323 struct address_space *mapping = file->f_mapping;
324 struct inode *inode = mapping->host;
325 struct page *page;
326 struct buffer_head bh;
327 unsigned long vaddr = (unsigned long)vmf->virtual_address;
328 unsigned blkbits = inode->i_blkbits;
329 sector_t block;
330 pgoff_t size;
331 int error;
332 int major = 0;
333
334 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
335 if (vmf->pgoff >= size)
336 return VM_FAULT_SIGBUS;
337
338 memset(&bh, 0, sizeof(bh));
339 block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
340 bh.b_size = PAGE_SIZE;
341
342 repeat:
343 page = find_get_page(mapping, vmf->pgoff);
344 if (page) {
345 if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
346 page_cache_release(page);
347 return VM_FAULT_RETRY;
348 }
349 if (unlikely(page->mapping != mapping)) {
350 unlock_page(page);
351 page_cache_release(page);
352 goto repeat;
353 }
354 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
355 if (unlikely(vmf->pgoff >= size)) {
356 /*
357 * We have a struct page covering a hole in the file
358 * from a read fault and we've raced with a truncate
359 */
360 error = -EIO;
361 goto unlock_page;
362 }
363 }
364
365 error = get_block(inode, block, &bh, 0);
366 if (!error && (bh.b_size < PAGE_SIZE))
367 error = -EIO; /* fs corruption? */
368 if (error)
369 goto unlock_page;
370
371 if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
372 if (vmf->flags & FAULT_FLAG_WRITE) {
373 error = get_block(inode, block, &bh, 1);
374 count_vm_event(PGMAJFAULT);
375 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
376 major = VM_FAULT_MAJOR;
377 if (!error && (bh.b_size < PAGE_SIZE))
378 error = -EIO;
379 if (error)
380 goto unlock_page;
381 } else {
382 return dax_load_hole(mapping, page, vmf);
383 }
384 }
385
386 if (vmf->cow_page) {
387 struct page *new_page = vmf->cow_page;
388 if (buffer_written(&bh))
389 error = copy_user_bh(new_page, &bh, blkbits, vaddr);
390 else
391 clear_user_highpage(new_page, vaddr);
392 if (error)
393 goto unlock_page;
394 vmf->page = page;
395 if (!page) {
396 i_mmap_lock_read(mapping);
397 /* Check we didn't race with truncate */
398 size = (i_size_read(inode) + PAGE_SIZE - 1) >>
399 PAGE_SHIFT;
400 if (vmf->pgoff >= size) {
401 i_mmap_unlock_read(mapping);
402 error = -EIO;
403 goto out;
404 }
405 }
406 return VM_FAULT_LOCKED;
407 }
408
409 /* Check we didn't race with a read fault installing a new page */
410 if (!page && major)
411 page = find_lock_page(mapping, vmf->pgoff);
412
413 if (page) {
414 unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
415 PAGE_CACHE_SIZE, 0);
416 delete_from_page_cache(page);
417 unlock_page(page);
418 page_cache_release(page);
419 }
420
421 error = dax_insert_mapping(inode, &bh, vma, vmf);
422
423 out:
424 if (error == -ENOMEM)
425 return VM_FAULT_OOM | major;
426 /* -EBUSY is fine, somebody else faulted on the same PTE */
427 if ((error < 0) && (error != -EBUSY))
428 return VM_FAULT_SIGBUS | major;
429 return VM_FAULT_NOPAGE | major;
430
431 unlock_page:
432 if (page) {
433 unlock_page(page);
434 page_cache_release(page);
435 }
436 goto out;
437}
438
439/**
440 * dax_fault - handle a page fault on a DAX file
441 * @vma: The virtual memory area where the fault occurred
442 * @vmf: The description of the fault
443 * @get_block: The filesystem method used to translate file offsets to blocks
444 *
445 * When a page fault occurs, filesystems may call this helper in their
446 * fault handler for DAX files.
447 */
448int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
449 get_block_t get_block)
450{
451 int result;
452 struct super_block *sb = file_inode(vma->vm_file)->i_sb;
453
454 if (vmf->flags & FAULT_FLAG_WRITE) {
455 sb_start_pagefault(sb);
456 file_update_time(vma->vm_file);
457 }
458 result = do_dax_fault(vma, vmf, get_block);
459 if (vmf->flags & FAULT_FLAG_WRITE)
460 sb_end_pagefault(sb);
461
462 return result;
463}
464EXPORT_SYMBOL_GPL(dax_fault);
465
466/**
467 * dax_zero_page_range - zero a range within a page of a DAX file
468 * @inode: The file being truncated
469 * @from: The file offset that is being truncated to
470 * @length: The number of bytes to zero
471 * @get_block: The filesystem method used to translate file offsets to blocks
472 *
473 * This function can be called by a filesystem when it is zeroing part of a
474 * page in a DAX file. This is intended for hole-punch operations. If
475 * you are truncating a file, the helper function dax_truncate_page() may be
476 * more convenient.
477 *
478 * We work in terms of PAGE_CACHE_SIZE here for commonality with
479 * block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
480 * took care of disposing of the unnecessary blocks. Even if the filesystem
481 * block size is smaller than PAGE_SIZE, we have to zero the rest of the page
482 * since the file might be mmapped.
483 */
484int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
485 get_block_t get_block)
486{
487 struct buffer_head bh;
488 pgoff_t index = from >> PAGE_CACHE_SHIFT;
489 unsigned offset = from & (PAGE_CACHE_SIZE-1);
490 int err;
491
492 /* Block boundary? Nothing to do */
493 if (!length)
494 return 0;
495 BUG_ON((offset + length) > PAGE_CACHE_SIZE);
496
497 memset(&bh, 0, sizeof(bh));
498 bh.b_size = PAGE_CACHE_SIZE;
499 err = get_block(inode, index, &bh, 0);
500 if (err < 0)
501 return err;
502 if (buffer_written(&bh)) {
503 void *addr;
504 err = dax_get_addr(&bh, &addr, inode->i_blkbits);
505 if (err < 0)
506 return err;
507 memset(addr + offset, 0, length);
508 }
509
510 return 0;
511}
512EXPORT_SYMBOL_GPL(dax_zero_page_range);
513
514/**
515 * dax_truncate_page - handle a partial page being truncated in a DAX file
516 * @inode: The file being truncated
517 * @from: The file offset that is being truncated to
518 * @get_block: The filesystem method used to translate file offsets to blocks
519 *
520 * Similar to block_truncate_page(), this function can be called by a
521 * filesystem when it is truncating a DAX file to handle the partial page.
522 *
523 * We work in terms of PAGE_CACHE_SIZE here for commonality with
524 * block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
525 * took care of disposing of the unnecessary blocks. Even if the filesystem
526 * block size is smaller than PAGE_SIZE, we have to zero the rest of the page
527 * since the file might be mmapped.
528 */
529int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
530{
531 unsigned length = PAGE_CACHE_ALIGN(from) - from;
532 return dax_zero_page_range(inode, from, length, get_block);
533}
534EXPORT_SYMBOL_GPL(dax_truncate_page);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 6fc91df99ff8..a198e94813fe 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -985,7 +985,6 @@ const struct address_space_operations exofs_aops = {
985 .direct_IO = exofs_direct_IO, 985 .direct_IO = exofs_direct_IO,
986 986
987 /* With these NULL has special meaning or default is not exported */ 987 /* With these NULL has special meaning or default is not exported */
988 .get_xip_mem = NULL,
989 .migratepage = NULL, 988 .migratepage = NULL,
990 .launder_page = NULL, 989 .launder_page = NULL,
991 .is_partially_uptodate = NULL, 990 .is_partially_uptodate = NULL,
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index 14a6780fd034..c634874e12d9 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -42,14 +42,3 @@ config EXT2_FS_SECURITY
42 42
43 If you are not using a security module that requires using 43 If you are not using a security module that requires using
44 extended attributes for file security labels, say N. 44 extended attributes for file security labels, say N.
45
46config EXT2_FS_XIP
47 bool "Ext2 execute in place support"
48 depends on EXT2_FS && MMU
49 help
50 Execute in place can be used on memory-backed block devices. If you
51 enable this option, you can select to mount block devices which are
52 capable of this feature without using the page cache.
53
54 If you do not use a block device that is capable of using this,
55 or if unsure, say N.
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index f42af45cfd88..445b0e996a12 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -10,4 +10,3 @@ ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
10ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 10ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
11ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o 11ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
12ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o 12ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o
13ext2-$(CONFIG_EXT2_FS_XIP) += xip.o
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index e4279ead4a05..678f9ab08c48 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -380,10 +380,15 @@ struct ext2_inode {
380#define EXT2_MOUNT_NO_UID32 0x000200 /* Disable 32-bit UIDs */ 380#define EXT2_MOUNT_NO_UID32 0x000200 /* Disable 32-bit UIDs */
381#define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ 381#define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */
382#define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ 382#define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */
383#define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ 383#define EXT2_MOUNT_XIP 0x010000 /* Obsolete, use DAX */
384#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ 384#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */
385#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ 385#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */
386#define EXT2_MOUNT_RESERVATION 0x080000 /* Preallocation */ 386#define EXT2_MOUNT_RESERVATION 0x080000 /* Preallocation */
387#ifdef CONFIG_FS_DAX
388#define EXT2_MOUNT_DAX 0x100000 /* Direct Access */
389#else
390#define EXT2_MOUNT_DAX 0
391#endif
387 392
388 393
389#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt 394#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
@@ -788,11 +793,10 @@ extern int ext2_fsync(struct file *file, loff_t start, loff_t end,
788 int datasync); 793 int datasync);
789extern const struct inode_operations ext2_file_inode_operations; 794extern const struct inode_operations ext2_file_inode_operations;
790extern const struct file_operations ext2_file_operations; 795extern const struct file_operations ext2_file_operations;
791extern const struct file_operations ext2_xip_file_operations; 796extern const struct file_operations ext2_dax_file_operations;
792 797
793/* inode.c */ 798/* inode.c */
794extern const struct address_space_operations ext2_aops; 799extern const struct address_space_operations ext2_aops;
795extern const struct address_space_operations ext2_aops_xip;
796extern const struct address_space_operations ext2_nobh_aops; 800extern const struct address_space_operations ext2_nobh_aops;
797 801
798/* namei.c */ 802/* namei.c */
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 7c87b22a7228..e31701713516 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -25,6 +25,36 @@
25#include "xattr.h" 25#include "xattr.h"
26#include "acl.h" 26#include "acl.h"
27 27
28#ifdef CONFIG_FS_DAX
29static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
30{
31 return dax_fault(vma, vmf, ext2_get_block);
32}
33
34static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
35{
36 return dax_mkwrite(vma, vmf, ext2_get_block);
37}
38
39static const struct vm_operations_struct ext2_dax_vm_ops = {
40 .fault = ext2_dax_fault,
41 .page_mkwrite = ext2_dax_mkwrite,
42};
43
44static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
45{
46 if (!IS_DAX(file_inode(file)))
47 return generic_file_mmap(file, vma);
48
49 file_accessed(file);
50 vma->vm_ops = &ext2_dax_vm_ops;
51 vma->vm_flags |= VM_MIXEDMAP;
52 return 0;
53}
54#else
55#define ext2_file_mmap generic_file_mmap
56#endif
57
28/* 58/*
29 * Called when filp is released. This happens when all file descriptors 59 * Called when filp is released. This happens when all file descriptors
30 * for a single struct file are closed. Note that different open() calls 60 * for a single struct file are closed. Note that different open() calls
@@ -70,7 +100,7 @@ const struct file_operations ext2_file_operations = {
70#ifdef CONFIG_COMPAT 100#ifdef CONFIG_COMPAT
71 .compat_ioctl = ext2_compat_ioctl, 101 .compat_ioctl = ext2_compat_ioctl,
72#endif 102#endif
73 .mmap = generic_file_mmap, 103 .mmap = ext2_file_mmap,
74 .open = dquot_file_open, 104 .open = dquot_file_open,
75 .release = ext2_release_file, 105 .release = ext2_release_file,
76 .fsync = ext2_fsync, 106 .fsync = ext2_fsync,
@@ -78,16 +108,18 @@ const struct file_operations ext2_file_operations = {
78 .splice_write = iter_file_splice_write, 108 .splice_write = iter_file_splice_write,
79}; 109};
80 110
81#ifdef CONFIG_EXT2_FS_XIP 111#ifdef CONFIG_FS_DAX
82const struct file_operations ext2_xip_file_operations = { 112const struct file_operations ext2_dax_file_operations = {
83 .llseek = generic_file_llseek, 113 .llseek = generic_file_llseek,
84 .read = xip_file_read, 114 .read = new_sync_read,
85 .write = xip_file_write, 115 .write = new_sync_write,
116 .read_iter = generic_file_read_iter,
117 .write_iter = generic_file_write_iter,
86 .unlocked_ioctl = ext2_ioctl, 118 .unlocked_ioctl = ext2_ioctl,
87#ifdef CONFIG_COMPAT 119#ifdef CONFIG_COMPAT
88 .compat_ioctl = ext2_compat_ioctl, 120 .compat_ioctl = ext2_compat_ioctl,
89#endif 121#endif
90 .mmap = xip_file_mmap, 122 .mmap = ext2_file_mmap,
91 .open = dquot_file_open, 123 .open = dquot_file_open,
92 .release = ext2_release_file, 124 .release = ext2_release_file,
93 .fsync = ext2_fsync, 125 .fsync = ext2_fsync,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 36d35c36311d..6434bc000125 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -34,7 +34,6 @@
34#include <linux/aio.h> 34#include <linux/aio.h>
35#include "ext2.h" 35#include "ext2.h"
36#include "acl.h" 36#include "acl.h"
37#include "xip.h"
38#include "xattr.h" 37#include "xattr.h"
39 38
40static int __ext2_write_inode(struct inode *inode, int do_sync); 39static int __ext2_write_inode(struct inode *inode, int do_sync);
@@ -731,12 +730,14 @@ static int ext2_get_blocks(struct inode *inode,
731 goto cleanup; 730 goto cleanup;
732 } 731 }
733 732
734 if (ext2_use_xip(inode->i_sb)) { 733 if (IS_DAX(inode)) {
735 /* 734 /*
736 * we need to clear the block 735 * block must be initialised before we put it in the tree
736 * so that it's not found by another thread before it's
737 * initialised
737 */ 738 */
738 err = ext2_clear_xip_target (inode, 739 err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key),
739 le32_to_cpu(chain[depth-1].key)); 740 1 << inode->i_blkbits);
740 if (err) { 741 if (err) {
741 mutex_unlock(&ei->truncate_mutex); 742 mutex_unlock(&ei->truncate_mutex);
742 goto cleanup; 743 goto cleanup;
@@ -859,7 +860,12 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
859 size_t count = iov_iter_count(iter); 860 size_t count = iov_iter_count(iter);
860 ssize_t ret; 861 ssize_t ret;
861 862
862 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block); 863 if (IS_DAX(inode))
864 ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block,
865 NULL, DIO_LOCKING);
866 else
867 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
868 ext2_get_block);
863 if (ret < 0 && (rw & WRITE)) 869 if (ret < 0 && (rw & WRITE))
864 ext2_write_failed(mapping, offset + count); 870 ext2_write_failed(mapping, offset + count);
865 return ret; 871 return ret;
@@ -885,11 +891,6 @@ const struct address_space_operations ext2_aops = {
885 .error_remove_page = generic_error_remove_page, 891 .error_remove_page = generic_error_remove_page,
886}; 892};
887 893
888const struct address_space_operations ext2_aops_xip = {
889 .bmap = ext2_bmap,
890 .get_xip_mem = ext2_get_xip_mem,
891};
892
893const struct address_space_operations ext2_nobh_aops = { 894const struct address_space_operations ext2_nobh_aops = {
894 .readpage = ext2_readpage, 895 .readpage = ext2_readpage,
895 .readpages = ext2_readpages, 896 .readpages = ext2_readpages,
@@ -1201,8 +1202,8 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
1201 1202
1202 inode_dio_wait(inode); 1203 inode_dio_wait(inode);
1203 1204
1204 if (mapping_is_xip(inode->i_mapping)) 1205 if (IS_DAX(inode))
1205 error = xip_truncate_page(inode->i_mapping, newsize); 1206 error = dax_truncate_page(inode, newsize, ext2_get_block);
1206 else if (test_opt(inode->i_sb, NOBH)) 1207 else if (test_opt(inode->i_sb, NOBH))
1207 error = nobh_truncate_page(inode->i_mapping, 1208 error = nobh_truncate_page(inode->i_mapping,
1208 newsize, ext2_get_block); 1209 newsize, ext2_get_block);
@@ -1273,7 +1274,8 @@ void ext2_set_inode_flags(struct inode *inode)
1273{ 1274{
1274 unsigned int flags = EXT2_I(inode)->i_flags; 1275 unsigned int flags = EXT2_I(inode)->i_flags;
1275 1276
1276 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 1277 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
1278 S_DIRSYNC | S_DAX);
1277 if (flags & EXT2_SYNC_FL) 1279 if (flags & EXT2_SYNC_FL)
1278 inode->i_flags |= S_SYNC; 1280 inode->i_flags |= S_SYNC;
1279 if (flags & EXT2_APPEND_FL) 1281 if (flags & EXT2_APPEND_FL)
@@ -1284,6 +1286,8 @@ void ext2_set_inode_flags(struct inode *inode)
1284 inode->i_flags |= S_NOATIME; 1286 inode->i_flags |= S_NOATIME;
1285 if (flags & EXT2_DIRSYNC_FL) 1287 if (flags & EXT2_DIRSYNC_FL)
1286 inode->i_flags |= S_DIRSYNC; 1288 inode->i_flags |= S_DIRSYNC;
1289 if (test_opt(inode->i_sb, DAX))
1290 inode->i_flags |= S_DAX;
1287} 1291}
1288 1292
1289/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */ 1293/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
@@ -1384,9 +1388,9 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1384 1388
1385 if (S_ISREG(inode->i_mode)) { 1389 if (S_ISREG(inode->i_mode)) {
1386 inode->i_op = &ext2_file_inode_operations; 1390 inode->i_op = &ext2_file_inode_operations;
1387 if (ext2_use_xip(inode->i_sb)) { 1391 if (test_opt(inode->i_sb, DAX)) {
1388 inode->i_mapping->a_ops = &ext2_aops_xip; 1392 inode->i_mapping->a_ops = &ext2_aops;
1389 inode->i_fop = &ext2_xip_file_operations; 1393 inode->i_fop = &ext2_dax_file_operations;
1390 } else if (test_opt(inode->i_sb, NOBH)) { 1394 } else if (test_opt(inode->i_sb, NOBH)) {
1391 inode->i_mapping->a_ops = &ext2_nobh_aops; 1395 inode->i_mapping->a_ops = &ext2_nobh_aops;
1392 inode->i_fop = &ext2_file_operations; 1396 inode->i_fop = &ext2_file_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index c268d0af1db9..148f6e3789ea 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -35,7 +35,6 @@
35#include "ext2.h" 35#include "ext2.h"
36#include "xattr.h" 36#include "xattr.h"
37#include "acl.h" 37#include "acl.h"
38#include "xip.h"
39 38
40static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) 39static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
41{ 40{
@@ -105,9 +104,9 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
105 return PTR_ERR(inode); 104 return PTR_ERR(inode);
106 105
107 inode->i_op = &ext2_file_inode_operations; 106 inode->i_op = &ext2_file_inode_operations;
108 if (ext2_use_xip(inode->i_sb)) { 107 if (test_opt(inode->i_sb, DAX)) {
109 inode->i_mapping->a_ops = &ext2_aops_xip; 108 inode->i_mapping->a_ops = &ext2_aops;
110 inode->i_fop = &ext2_xip_file_operations; 109 inode->i_fop = &ext2_dax_file_operations;
111 } else if (test_opt(inode->i_sb, NOBH)) { 110 } else if (test_opt(inode->i_sb, NOBH)) {
112 inode->i_mapping->a_ops = &ext2_nobh_aops; 111 inode->i_mapping->a_ops = &ext2_nobh_aops;
113 inode->i_fop = &ext2_file_operations; 112 inode->i_fop = &ext2_file_operations;
@@ -126,9 +125,9 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
126 return PTR_ERR(inode); 125 return PTR_ERR(inode);
127 126
128 inode->i_op = &ext2_file_inode_operations; 127 inode->i_op = &ext2_file_inode_operations;
129 if (ext2_use_xip(inode->i_sb)) { 128 if (test_opt(inode->i_sb, DAX)) {
130 inode->i_mapping->a_ops = &ext2_aops_xip; 129 inode->i_mapping->a_ops = &ext2_aops;
131 inode->i_fop = &ext2_xip_file_operations; 130 inode->i_fop = &ext2_dax_file_operations;
132 } else if (test_opt(inode->i_sb, NOBH)) { 131 } else if (test_opt(inode->i_sb, NOBH)) {
133 inode->i_mapping->a_ops = &ext2_nobh_aops; 132 inode->i_mapping->a_ops = &ext2_nobh_aops;
134 inode->i_fop = &ext2_file_operations; 133 inode->i_fop = &ext2_file_operations;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ae55fddc26a9..d0e746e96511 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -35,7 +35,6 @@
35#include "ext2.h" 35#include "ext2.h"
36#include "xattr.h" 36#include "xattr.h"
37#include "acl.h" 37#include "acl.h"
38#include "xip.h"
39 38
40static void ext2_sync_super(struct super_block *sb, 39static void ext2_sync_super(struct super_block *sb,
41 struct ext2_super_block *es, int wait); 40 struct ext2_super_block *es, int wait);
@@ -292,9 +291,11 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root)
292 seq_puts(seq, ",grpquota"); 291 seq_puts(seq, ",grpquota");
293#endif 292#endif
294 293
295#if defined(CONFIG_EXT2_FS_XIP) 294#ifdef CONFIG_FS_DAX
296 if (sbi->s_mount_opt & EXT2_MOUNT_XIP) 295 if (sbi->s_mount_opt & EXT2_MOUNT_XIP)
297 seq_puts(seq, ",xip"); 296 seq_puts(seq, ",xip");
297 if (sbi->s_mount_opt & EXT2_MOUNT_DAX)
298 seq_puts(seq, ",dax");
298#endif 299#endif
299 300
300 if (!test_opt(sb, RESERVATION)) 301 if (!test_opt(sb, RESERVATION))
@@ -403,7 +404,7 @@ enum {
403 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, 404 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic,
404 Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug, 405 Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
405 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, 406 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
406 Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, 407 Opt_acl, Opt_noacl, Opt_xip, Opt_dax, Opt_ignore, Opt_err, Opt_quota,
407 Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation 408 Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
408}; 409};
409 410
@@ -432,6 +433,7 @@ static const match_table_t tokens = {
432 {Opt_acl, "acl"}, 433 {Opt_acl, "acl"},
433 {Opt_noacl, "noacl"}, 434 {Opt_noacl, "noacl"},
434 {Opt_xip, "xip"}, 435 {Opt_xip, "xip"},
436 {Opt_dax, "dax"},
435 {Opt_grpquota, "grpquota"}, 437 {Opt_grpquota, "grpquota"},
436 {Opt_ignore, "noquota"}, 438 {Opt_ignore, "noquota"},
437 {Opt_quota, "quota"}, 439 {Opt_quota, "quota"},
@@ -559,10 +561,14 @@ static int parse_options(char *options, struct super_block *sb)
559 break; 561 break;
560#endif 562#endif
561 case Opt_xip: 563 case Opt_xip:
562#ifdef CONFIG_EXT2_FS_XIP 564 ext2_msg(sb, KERN_INFO, "use dax instead of xip");
563 set_opt (sbi->s_mount_opt, XIP); 565 set_opt(sbi->s_mount_opt, XIP);
566 /* Fall through */
567 case Opt_dax:
568#ifdef CONFIG_FS_DAX
569 set_opt(sbi->s_mount_opt, DAX);
564#else 570#else
565 ext2_msg(sb, KERN_INFO, "xip option not supported"); 571 ext2_msg(sb, KERN_INFO, "dax option not supported");
566#endif 572#endif
567 break; 573 break;
568 574
@@ -877,9 +883,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
877 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? 883 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
878 MS_POSIXACL : 0); 884 MS_POSIXACL : 0);
879 885
880 ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
881 EXT2_MOUNT_XIP if not */
882
883 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && 886 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
884 (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || 887 (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
885 EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 888 EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
@@ -909,11 +912,17 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
909 912
910 blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); 913 blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
911 914
912 if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) { 915 if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
913 if (!silent) 916 if (blocksize != PAGE_SIZE) {
914 ext2_msg(sb, KERN_ERR, 917 ext2_msg(sb, KERN_ERR,
915 "error: unsupported blocksize for xip"); 918 "error: unsupported blocksize for dax");
916 goto failed_mount; 919 goto failed_mount;
920 }
921 if (!sb->s_bdev->bd_disk->fops->direct_access) {
922 ext2_msg(sb, KERN_ERR,
923 "error: device does not support dax");
924 goto failed_mount;
925 }
917 } 926 }
918 927
919 /* If the blocksize doesn't match, re-read the thing.. */ 928 /* If the blocksize doesn't match, re-read the thing.. */
@@ -1259,7 +1268,6 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1259{ 1268{
1260 struct ext2_sb_info * sbi = EXT2_SB(sb); 1269 struct ext2_sb_info * sbi = EXT2_SB(sb);
1261 struct ext2_super_block * es; 1270 struct ext2_super_block * es;
1262 unsigned long old_mount_opt = sbi->s_mount_opt;
1263 struct ext2_mount_options old_opts; 1271 struct ext2_mount_options old_opts;
1264 unsigned long old_sb_flags; 1272 unsigned long old_sb_flags;
1265 int err; 1273 int err;
@@ -1284,22 +1292,11 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1284 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1292 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1285 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 1293 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
1286 1294
1287 ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
1288 EXT2_MOUNT_XIP if not */
1289
1290 if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) {
1291 ext2_msg(sb, KERN_WARNING,
1292 "warning: unsupported blocksize for xip");
1293 err = -EINVAL;
1294 goto restore_opts;
1295 }
1296
1297 es = sbi->s_es; 1295 es = sbi->s_es;
1298 if ((sbi->s_mount_opt ^ old_mount_opt) & EXT2_MOUNT_XIP) { 1296 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT2_MOUNT_DAX) {
1299 ext2_msg(sb, KERN_WARNING, "warning: refusing change of " 1297 ext2_msg(sb, KERN_WARNING, "warning: refusing change of "
1300 "xip flag with busy inodes while remounting"); 1298 "dax flag with busy inodes while remounting");
1301 sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; 1299 sbi->s_mount_opt ^= EXT2_MOUNT_DAX;
1302 sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
1303 } 1300 }
1304 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { 1301 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
1305 spin_unlock(&sbi->s_lock); 1302 spin_unlock(&sbi->s_lock);
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
deleted file mode 100644
index bbc5fec6ff7f..000000000000
--- a/fs/ext2/xip.c
+++ /dev/null
@@ -1,86 +0,0 @@
1/*
2 * linux/fs/ext2/xip.c
3 *
4 * Copyright (C) 2005 IBM Corporation
5 * Author: Carsten Otte (cotte@de.ibm.com)
6 */
7
8#include <linux/mm.h>
9#include <linux/fs.h>
10#include <linux/genhd.h>
11#include <linux/buffer_head.h>
12#include <linux/blkdev.h>
13#include "ext2.h"
14#include "xip.h"
15
16static inline long __inode_direct_access(struct inode *inode, sector_t block,
17 void **kaddr, unsigned long *pfn, long size)
18{
19 struct block_device *bdev = inode->i_sb->s_bdev;
20 sector_t sector = block * (PAGE_SIZE / 512);
21 return bdev_direct_access(bdev, sector, kaddr, pfn, size);
22}
23
24static inline int
25__ext2_get_block(struct inode *inode, pgoff_t pgoff, int create,
26 sector_t *result)
27{
28 struct buffer_head tmp;
29 int rc;
30
31 memset(&tmp, 0, sizeof(struct buffer_head));
32 tmp.b_size = 1 << inode->i_blkbits;
33 rc = ext2_get_block(inode, pgoff, &tmp, create);
34 *result = tmp.b_blocknr;
35
36 /* did we get a sparse block (hole in the file)? */
37 if (!tmp.b_blocknr && !rc) {
38 BUG_ON(create);
39 rc = -ENODATA;
40 }
41
42 return rc;
43}
44
45int
46ext2_clear_xip_target(struct inode *inode, sector_t block)
47{
48 void *kaddr;
49 unsigned long pfn;
50 long size;
51
52 size = __inode_direct_access(inode, block, &kaddr, &pfn, PAGE_SIZE);
53 if (size < 0)
54 return size;
55 clear_page(kaddr);
56 return 0;
57}
58
59void ext2_xip_verify_sb(struct super_block *sb)
60{
61 struct ext2_sb_info *sbi = EXT2_SB(sb);
62
63 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) &&
64 !sb->s_bdev->bd_disk->fops->direct_access) {
65 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
66 ext2_msg(sb, KERN_WARNING,
67 "warning: ignoring xip option - "
68 "not supported by bdev");
69 }
70}
71
72int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create,
73 void **kmem, unsigned long *pfn)
74{
75 long rc;
76 sector_t block;
77
78 /* first, retrieve the sector number */
79 rc = __ext2_get_block(mapping->host, pgoff, create, &block);
80 if (rc)
81 return rc;
82
83 /* retrieve address of the target data */
84 rc = __inode_direct_access(mapping->host, block, kmem, pfn, PAGE_SIZE);
85 return (rc < 0) ? rc : 0;
86}
diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h
deleted file mode 100644
index 18b34d2f31b3..000000000000
--- a/fs/ext2/xip.h
+++ /dev/null
@@ -1,26 +0,0 @@
1/*
2 * linux/fs/ext2/xip.h
3 *
4 * Copyright (C) 2005 IBM Corporation
5 * Author: Carsten Otte (cotte@de.ibm.com)
6 */
7
8#ifdef CONFIG_EXT2_FS_XIP
9extern void ext2_xip_verify_sb (struct super_block *);
10extern int ext2_clear_xip_target (struct inode *, sector_t);
11
12static inline int ext2_use_xip (struct super_block *sb)
13{
14 struct ext2_sb_info *sbi = EXT2_SB(sb);
15 return (sbi->s_mount_opt & EXT2_MOUNT_XIP);
16}
17int ext2_get_xip_mem(struct address_space *, pgoff_t, int,
18 void **, unsigned long *);
19#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_mem)
20#else
21#define mapping_is_xip(map) 0
22#define ext2_xip_verify_sb(sb) do { } while (0)
23#define ext2_use_xip(sb) 0
24#define ext2_clear_xip_target(inode, chain) 0
25#define ext2_get_xip_mem NULL
26#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a75fba67bb1f..982d934fd9ac 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -965,6 +965,11 @@ struct ext4_inode_info {
965#define EXT4_MOUNT_ERRORS_MASK 0x00070 965#define EXT4_MOUNT_ERRORS_MASK 0x00070
966#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ 966#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
967#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ 967#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
968#ifdef CONFIG_FS_DAX
969#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
970#else
971#define EXT4_MOUNT_DAX 0
972#endif
968#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ 973#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
969#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ 974#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
970#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ 975#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
@@ -2578,6 +2583,7 @@ extern const struct file_operations ext4_dir_operations;
2578/* file.c */ 2583/* file.c */
2579extern const struct inode_operations ext4_file_inode_operations; 2584extern const struct inode_operations ext4_file_inode_operations;
2580extern const struct file_operations ext4_file_operations; 2585extern const struct file_operations ext4_file_operations;
2586extern const struct file_operations ext4_dax_file_operations;
2581extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2587extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2582 2588
2583/* inline.c */ 2589/* inline.c */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 7cb592386121..33a09da16c9c 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -95,7 +95,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
95 struct inode *inode = file_inode(iocb->ki_filp); 95 struct inode *inode = file_inode(iocb->ki_filp);
96 struct mutex *aio_mutex = NULL; 96 struct mutex *aio_mutex = NULL;
97 struct blk_plug plug; 97 struct blk_plug plug;
98 int o_direct = file->f_flags & O_DIRECT; 98 int o_direct = io_is_direct(file);
99 int overwrite = 0; 99 int overwrite = 0;
100 size_t length = iov_iter_count(from); 100 size_t length = iov_iter_count(from);
101 ssize_t ret; 101 ssize_t ret;
@@ -191,6 +191,26 @@ errout:
191 return ret; 191 return ret;
192} 192}
193 193
194#ifdef CONFIG_FS_DAX
195static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
196{
197 return dax_fault(vma, vmf, ext4_get_block);
198 /* Is this the right get_block? */
199}
200
201static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
202{
203 return dax_mkwrite(vma, vmf, ext4_get_block);
204}
205
206static const struct vm_operations_struct ext4_dax_vm_ops = {
207 .fault = ext4_dax_fault,
208 .page_mkwrite = ext4_dax_mkwrite,
209};
210#else
211#define ext4_dax_vm_ops ext4_file_vm_ops
212#endif
213
194static const struct vm_operations_struct ext4_file_vm_ops = { 214static const struct vm_operations_struct ext4_file_vm_ops = {
195 .fault = filemap_fault, 215 .fault = filemap_fault,
196 .map_pages = filemap_map_pages, 216 .map_pages = filemap_map_pages,
@@ -200,7 +220,12 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
200static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) 220static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
201{ 221{
202 file_accessed(file); 222 file_accessed(file);
203 vma->vm_ops = &ext4_file_vm_ops; 223 if (IS_DAX(file_inode(file))) {
224 vma->vm_ops = &ext4_dax_vm_ops;
225 vma->vm_flags |= VM_MIXEDMAP;
226 } else {
227 vma->vm_ops = &ext4_file_vm_ops;
228 }
204 return 0; 229 return 0;
205} 230}
206 231
@@ -599,6 +624,26 @@ const struct file_operations ext4_file_operations = {
599 .fallocate = ext4_fallocate, 624 .fallocate = ext4_fallocate,
600}; 625};
601 626
627#ifdef CONFIG_FS_DAX
628const struct file_operations ext4_dax_file_operations = {
629 .llseek = ext4_llseek,
630 .read = new_sync_read,
631 .write = new_sync_write,
632 .read_iter = generic_file_read_iter,
633 .write_iter = ext4_file_write_iter,
634 .unlocked_ioctl = ext4_ioctl,
635#ifdef CONFIG_COMPAT
636 .compat_ioctl = ext4_compat_ioctl,
637#endif
638 .mmap = ext4_file_mmap,
639 .open = ext4_file_open,
640 .release = ext4_release_file,
641 .fsync = ext4_sync_file,
642 /* Splice not yet supported with DAX */
643 .fallocate = ext4_fallocate,
644};
645#endif
646
602const struct inode_operations ext4_file_inode_operations = { 647const struct inode_operations ext4_file_inode_operations = {
603 .setattr = ext4_setattr, 648 .setattr = ext4_setattr,
604 .getattr = ext4_getattr, 649 .getattr = ext4_getattr,
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 36b369697a13..6b9878a24182 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -689,14 +689,22 @@ retry:
689 inode_dio_done(inode); 689 inode_dio_done(inode);
690 goto locked; 690 goto locked;
691 } 691 }
692 ret = __blockdev_direct_IO(rw, iocb, inode, 692 if (IS_DAX(inode))
693 inode->i_sb->s_bdev, iter, offset, 693 ret = dax_do_io(rw, iocb, inode, iter, offset,
694 ext4_get_block, NULL, NULL, 0); 694 ext4_get_block, NULL, 0);
695 else
696 ret = __blockdev_direct_IO(rw, iocb, inode,
697 inode->i_sb->s_bdev, iter, offset,
698 ext4_get_block, NULL, NULL, 0);
695 inode_dio_done(inode); 699 inode_dio_done(inode);
696 } else { 700 } else {
697locked: 701locked:
698 ret = blockdev_direct_IO(rw, iocb, inode, iter, 702 if (IS_DAX(inode))
699 offset, ext4_get_block); 703 ret = dax_do_io(rw, iocb, inode, iter, offset,
704 ext4_get_block, NULL, DIO_LOCKING);
705 else
706 ret = blockdev_direct_IO(rw, iocb, inode, iter,
707 offset, ext4_get_block);
700 708
701 if (unlikely((rw & WRITE) && ret < 0)) { 709 if (unlikely((rw & WRITE) && ret < 0)) {
702 loff_t isize = i_size_read(inode); 710 loff_t isize = i_size_read(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5653fa42930b..28555f191b62 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -657,6 +657,18 @@ has_zeroout:
657 return retval; 657 return retval;
658} 658}
659 659
660static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
661{
662 struct inode *inode = bh->b_assoc_map->host;
663 /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
664 loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
665 int err;
666 if (!uptodate)
667 return;
668 WARN_ON(!buffer_unwritten(bh));
669 err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
670}
671
660/* Maximum number of blocks we map for direct IO at once. */ 672/* Maximum number of blocks we map for direct IO at once. */
661#define DIO_MAX_BLOCKS 4096 673#define DIO_MAX_BLOCKS 4096
662 674
@@ -694,6 +706,11 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
694 706
695 map_bh(bh, inode->i_sb, map.m_pblk); 707 map_bh(bh, inode->i_sb, map.m_pblk);
696 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 708 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
709 if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {
710 bh->b_assoc_map = inode->i_mapping;
711 bh->b_private = (void *)(unsigned long)iblock;
712 bh->b_end_io = ext4_end_io_unwritten;
713 }
697 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) 714 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
698 set_buffer_defer_completion(bh); 715 set_buffer_defer_completion(bh);
699 bh->b_size = inode->i_sb->s_blocksize * map.m_len; 716 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -3010,13 +3027,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3010 get_block_func = ext4_get_block_write; 3027 get_block_func = ext4_get_block_write;
3011 dio_flags = DIO_LOCKING; 3028 dio_flags = DIO_LOCKING;
3012 } 3029 }
3013 ret = __blockdev_direct_IO(rw, iocb, inode, 3030 if (IS_DAX(inode))
3014 inode->i_sb->s_bdev, iter, 3031 ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func,
3015 offset, 3032 ext4_end_io_dio, dio_flags);
3016 get_block_func, 3033 else
3017 ext4_end_io_dio, 3034 ret = __blockdev_direct_IO(rw, iocb, inode,
3018 NULL, 3035 inode->i_sb->s_bdev, iter, offset,
3019 dio_flags); 3036 get_block_func,
3037 ext4_end_io_dio, NULL, dio_flags);
3020 3038
3021 /* 3039 /*
3022 * Put our reference to io_end. This can free the io_end structure e.g. 3040 * Put our reference to io_end. This can free the io_end structure e.g.
@@ -3180,19 +3198,12 @@ void ext4_set_aops(struct inode *inode)
3180 inode->i_mapping->a_ops = &ext4_aops; 3198 inode->i_mapping->a_ops = &ext4_aops;
3181} 3199}
3182 3200
3183/* 3201static int __ext4_block_zero_page_range(handle_t *handle,
3184 * ext4_block_zero_page_range() zeros out a mapping of length 'length'
3185 * starting from file offset 'from'. The range to be zero'd must
3186 * be contained with in one block. If the specified range exceeds
3187 * the end of the block it will be shortened to end of the block
3188 * that cooresponds to 'from'
3189 */
3190static int ext4_block_zero_page_range(handle_t *handle,
3191 struct address_space *mapping, loff_t from, loff_t length) 3202 struct address_space *mapping, loff_t from, loff_t length)
3192{ 3203{
3193 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; 3204 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3194 unsigned offset = from & (PAGE_CACHE_SIZE-1); 3205 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3195 unsigned blocksize, max, pos; 3206 unsigned blocksize, pos;
3196 ext4_lblk_t iblock; 3207 ext4_lblk_t iblock;
3197 struct inode *inode = mapping->host; 3208 struct inode *inode = mapping->host;
3198 struct buffer_head *bh; 3209 struct buffer_head *bh;
@@ -3205,14 +3216,6 @@ static int ext4_block_zero_page_range(handle_t *handle,
3205 return -ENOMEM; 3216 return -ENOMEM;
3206 3217
3207 blocksize = inode->i_sb->s_blocksize; 3218 blocksize = inode->i_sb->s_blocksize;
3208 max = blocksize - (offset & (blocksize - 1));
3209
3210 /*
3211 * correct length if it does not fall between
3212 * 'from' and the end of the block
3213 */
3214 if (length > max || length < 0)
3215 length = max;
3216 3219
3217 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 3220 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3218 3221
@@ -3278,6 +3281,33 @@ unlock:
3278} 3281}
3279 3282
3280/* 3283/*
3284 * ext4_block_zero_page_range() zeros out a mapping of length 'length'
3285 * starting from file offset 'from'. The range to be zero'd must
3286 * be contained with in one block. If the specified range exceeds
3287 * the end of the block it will be shortened to end of the block
3288 * that cooresponds to 'from'
3289 */
3290static int ext4_block_zero_page_range(handle_t *handle,
3291 struct address_space *mapping, loff_t from, loff_t length)
3292{
3293 struct inode *inode = mapping->host;
3294 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3295 unsigned blocksize = inode->i_sb->s_blocksize;
3296 unsigned max = blocksize - (offset & (blocksize - 1));
3297
3298 /*
3299 * correct length if it does not fall between
3300 * 'from' and the end of the block
3301 */
3302 if (length > max || length < 0)
3303 length = max;
3304
3305 if (IS_DAX(inode))
3306 return dax_zero_page_range(inode, from, length, ext4_get_block);
3307 return __ext4_block_zero_page_range(handle, mapping, from, length);
3308}
3309
3310/*
3281 * ext4_block_truncate_page() zeroes out a mapping from file offset `from' 3311 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
3282 * up to the end of the block which corresponds to `from'. 3312 * up to the end of the block which corresponds to `from'.
3283 * This required during truncate. We need to physically zero the tail end 3313 * This required during truncate. We need to physically zero the tail end
@@ -3798,8 +3828,10 @@ void ext4_set_inode_flags(struct inode *inode)
3798 new_fl |= S_NOATIME; 3828 new_fl |= S_NOATIME;
3799 if (flags & EXT4_DIRSYNC_FL) 3829 if (flags & EXT4_DIRSYNC_FL)
3800 new_fl |= S_DIRSYNC; 3830 new_fl |= S_DIRSYNC;
3831 if (test_opt(inode->i_sb, DAX))
3832 new_fl |= S_DAX;
3801 inode_set_flags(inode, new_fl, 3833 inode_set_flags(inode, new_fl,
3802 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 3834 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
3803} 3835}
3804 3836
3805/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ 3837/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
@@ -4052,7 +4084,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4052 4084
4053 if (S_ISREG(inode->i_mode)) { 4085 if (S_ISREG(inode->i_mode)) {
4054 inode->i_op = &ext4_file_inode_operations; 4086 inode->i_op = &ext4_file_inode_operations;
4055 inode->i_fop = &ext4_file_operations; 4087 if (test_opt(inode->i_sb, DAX))
4088 inode->i_fop = &ext4_dax_file_operations;
4089 else
4090 inode->i_fop = &ext4_file_operations;
4056 ext4_set_aops(inode); 4091 ext4_set_aops(inode);
4057 } else if (S_ISDIR(inode->i_mode)) { 4092 } else if (S_ISDIR(inode->i_mode)) {
4058 inode->i_op = &ext4_dir_inode_operations; 4093 inode->i_op = &ext4_dir_inode_operations;
@@ -4534,7 +4569,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4534 * Truncate pagecache after we've waited for commit 4569 * Truncate pagecache after we've waited for commit
4535 * in data=journal mode to make pages freeable. 4570 * in data=journal mode to make pages freeable.
4536 */ 4571 */
4537 truncate_pagecache(inode, inode->i_size); 4572 truncate_pagecache(inode, inode->i_size);
4538 } 4573 }
4539 /* 4574 /*
4540 * We want to call ext4_truncate() even if attr->ia_size == 4575 * We want to call ext4_truncate() even if attr->ia_size ==
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2291923dae4e..28fe71a2904c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2235,7 +2235,10 @@ retry:
2235 err = PTR_ERR(inode); 2235 err = PTR_ERR(inode);
2236 if (!IS_ERR(inode)) { 2236 if (!IS_ERR(inode)) {
2237 inode->i_op = &ext4_file_inode_operations; 2237 inode->i_op = &ext4_file_inode_operations;
2238 inode->i_fop = &ext4_file_operations; 2238 if (test_opt(inode->i_sb, DAX))
2239 inode->i_fop = &ext4_dax_file_operations;
2240 else
2241 inode->i_fop = &ext4_file_operations;
2239 ext4_set_aops(inode); 2242 ext4_set_aops(inode);
2240 err = ext4_add_nondir(handle, dentry, inode); 2243 err = ext4_add_nondir(handle, dentry, inode);
2241 if (!err && IS_DIRSYNC(dir)) 2244 if (!err && IS_DIRSYNC(dir))
@@ -2299,7 +2302,10 @@ retry:
2299 err = PTR_ERR(inode); 2302 err = PTR_ERR(inode);
2300 if (!IS_ERR(inode)) { 2303 if (!IS_ERR(inode)) {
2301 inode->i_op = &ext4_file_inode_operations; 2304 inode->i_op = &ext4_file_inode_operations;
2302 inode->i_fop = &ext4_file_operations; 2305 if (test_opt(inode->i_sb, DAX))
2306 inode->i_fop = &ext4_dax_file_operations;
2307 else
2308 inode->i_fop = &ext4_file_operations;
2303 ext4_set_aops(inode); 2309 ext4_set_aops(inode);
2304 d_tmpfile(dentry, inode); 2310 d_tmpfile(dentry, inode);
2305 err = ext4_orphan_add(handle, inode); 2311 err = ext4_orphan_add(handle, inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 64c39c7c594f..10e8c6b7ca08 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1124,7 +1124,7 @@ enum {
1124 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1124 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1125 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1125 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1126 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, 1126 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1127 Opt_usrquota, Opt_grpquota, Opt_i_version, 1127 Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
1128 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, 1128 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1129 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1129 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1130 Opt_inode_readahead_blks, Opt_journal_ioprio, 1130 Opt_inode_readahead_blks, Opt_journal_ioprio,
@@ -1187,6 +1187,7 @@ static const match_table_t tokens = {
1187 {Opt_barrier, "barrier"}, 1187 {Opt_barrier, "barrier"},
1188 {Opt_nobarrier, "nobarrier"}, 1188 {Opt_nobarrier, "nobarrier"},
1189 {Opt_i_version, "i_version"}, 1189 {Opt_i_version, "i_version"},
1190 {Opt_dax, "dax"},
1190 {Opt_stripe, "stripe=%u"}, 1191 {Opt_stripe, "stripe=%u"},
1191 {Opt_delalloc, "delalloc"}, 1192 {Opt_delalloc, "delalloc"},
1192 {Opt_nodelalloc, "nodelalloc"}, 1193 {Opt_nodelalloc, "nodelalloc"},
@@ -1371,6 +1372,7 @@ static const struct mount_opts {
1371 {Opt_min_batch_time, 0, MOPT_GTE0}, 1372 {Opt_min_batch_time, 0, MOPT_GTE0},
1372 {Opt_inode_readahead_blks, 0, MOPT_GTE0}, 1373 {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1373 {Opt_init_itable, 0, MOPT_GTE0}, 1374 {Opt_init_itable, 0, MOPT_GTE0},
1375 {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
1374 {Opt_stripe, 0, MOPT_GTE0}, 1376 {Opt_stripe, 0, MOPT_GTE0},
1375 {Opt_resuid, 0, MOPT_GTE0}, 1377 {Opt_resuid, 0, MOPT_GTE0},
1376 {Opt_resgid, 0, MOPT_GTE0}, 1378 {Opt_resgid, 0, MOPT_GTE0},
@@ -1607,6 +1609,11 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1607 } 1609 }
1608 sbi->s_jquota_fmt = m->mount_opt; 1610 sbi->s_jquota_fmt = m->mount_opt;
1609#endif 1611#endif
1612#ifndef CONFIG_FS_DAX
1613 } else if (token == Opt_dax) {
1614 ext4_msg(sb, KERN_INFO, "dax option not supported");
1615 return -1;
1616#endif
1610 } else { 1617 } else {
1611 if (!args->from) 1618 if (!args->from)
1612 arg = 1; 1619 arg = 1;
@@ -3589,6 +3596,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3589 "both data=journal and dioread_nolock"); 3596 "both data=journal and dioread_nolock");
3590 goto failed_mount; 3597 goto failed_mount;
3591 } 3598 }
3599 if (test_opt(sb, DAX)) {
3600 ext4_msg(sb, KERN_ERR, "can't mount with "
3601 "both data=journal and dax");
3602 goto failed_mount;
3603 }
3592 if (test_opt(sb, DELALLOC)) 3604 if (test_opt(sb, DELALLOC))
3593 clear_opt(sb, DELALLOC); 3605 clear_opt(sb, DELALLOC);
3594 } 3606 }
@@ -3652,6 +3664,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3652 goto failed_mount; 3664 goto failed_mount;
3653 } 3665 }
3654 3666
3667 if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
3668 if (blocksize != PAGE_SIZE) {
3669 ext4_msg(sb, KERN_ERR,
3670 "error: unsupported blocksize for dax");
3671 goto failed_mount;
3672 }
3673 if (!sb->s_bdev->bd_disk->fops->direct_access) {
3674 ext4_msg(sb, KERN_ERR,
3675 "error: device does not support dax");
3676 goto failed_mount;
3677 }
3678 }
3679
3655 if (sb->s_blocksize != blocksize) { 3680 if (sb->s_blocksize != blocksize) {
3656 /* Validate the filesystem blocksize */ 3681 /* Validate the filesystem blocksize */
3657 if (!sb_set_blocksize(sb, blocksize)) { 3682 if (!sb_set_blocksize(sb, blocksize)) {
@@ -4869,6 +4894,18 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4869 err = -EINVAL; 4894 err = -EINVAL;
4870 goto restore_opts; 4895 goto restore_opts;
4871 } 4896 }
4897 if (test_opt(sb, DAX)) {
4898 ext4_msg(sb, KERN_ERR, "can't mount with "
4899 "both data=journal and dax");
4900 err = -EINVAL;
4901 goto restore_opts;
4902 }
4903 }
4904
4905 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
4906 ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
4907 "dax flag with busy inodes while remounting");
4908 sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
4872 } 4909 }
4873 4910
4874 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 4911 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 46d93e941f3d..44db1808cdb5 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -28,6 +28,7 @@
28#include <linux/pipe_fs_i.h> 28#include <linux/pipe_fs_i.h>
29#include <linux/mpage.h> 29#include <linux/mpage.h>
30#include <linux/quotaops.h> 30#include <linux/quotaops.h>
31#include <linux/blkdev.h>
31 32
32#include <cluster/masklog.h> 33#include <cluster/masklog.h>
33 34
@@ -47,6 +48,9 @@
47#include "ocfs2_trace.h" 48#include "ocfs2_trace.h"
48 49
49#include "buffer_head_io.h" 50#include "buffer_head_io.h"
51#include "dir.h"
52#include "namei.h"
53#include "sysfile.h"
50 54
51static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, 55static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
52 struct buffer_head *bh_result, int create) 56 struct buffer_head *bh_result, int create)
@@ -506,18 +510,21 @@ bail:
506 * 510 *
507 * called like this: dio->get_blocks(dio->inode, fs_startblk, 511 * called like this: dio->get_blocks(dio->inode, fs_startblk,
508 * fs_count, map_bh, dio->rw == WRITE); 512 * fs_count, map_bh, dio->rw == WRITE);
509 *
510 * Note that we never bother to allocate blocks here, and thus ignore the
511 * create argument.
512 */ 513 */
513static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, 514static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
514 struct buffer_head *bh_result, int create) 515 struct buffer_head *bh_result, int create)
515{ 516{
516 int ret; 517 int ret;
518 u32 cpos = 0;
519 int alloc_locked = 0;
517 u64 p_blkno, inode_blocks, contig_blocks; 520 u64 p_blkno, inode_blocks, contig_blocks;
518 unsigned int ext_flags; 521 unsigned int ext_flags;
519 unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; 522 unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
520 unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; 523 unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
524 unsigned long len = bh_result->b_size;
525 unsigned int clusters_to_alloc = 0;
526
527 cpos = ocfs2_blocks_to_clusters(inode->i_sb, iblock);
521 528
522 /* This function won't even be called if the request isn't all 529 /* This function won't even be called if the request isn't all
523 * nicely aligned and of the right size, so there's no need 530 * nicely aligned and of the right size, so there's no need
@@ -539,6 +546,40 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
539 /* We should already CoW the refcounted extent in case of create. */ 546 /* We should already CoW the refcounted extent in case of create. */
540 BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED)); 547 BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED));
541 548
549 /* allocate blocks if no p_blkno is found, and create == 1 */
550 if (!p_blkno && create) {
551 ret = ocfs2_inode_lock(inode, NULL, 1);
552 if (ret < 0) {
553 mlog_errno(ret);
554 goto bail;
555 }
556
557 alloc_locked = 1;
558
559 /* fill hole, allocate blocks can't be larger than the size
560 * of the hole */
561 clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len);
562 if (clusters_to_alloc > contig_blocks)
563 clusters_to_alloc = contig_blocks;
564
565 /* allocate extent and insert them into the extent tree */
566 ret = ocfs2_extend_allocation(inode, cpos,
567 clusters_to_alloc, 0);
568 if (ret < 0) {
569 mlog_errno(ret);
570 goto bail;
571 }
572
573 ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
574 &contig_blocks, &ext_flags);
575 if (ret < 0) {
576 mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
577 (unsigned long long)iblock);
578 ret = -EIO;
579 goto bail;
580 }
581 }
582
542 /* 583 /*
543 * get_more_blocks() expects us to describe a hole by clearing 584 * get_more_blocks() expects us to describe a hole by clearing
544 * the mapped bit on bh_result(). 585 * the mapped bit on bh_result().
@@ -556,6 +597,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
556 contig_blocks = max_blocks; 597 contig_blocks = max_blocks;
557 bh_result->b_size = contig_blocks << blocksize_bits; 598 bh_result->b_size = contig_blocks << blocksize_bits;
558bail: 599bail:
600 if (alloc_locked)
601 ocfs2_inode_unlock(inode, 1);
559 return ret; 602 return ret;
560} 603}
561 604
@@ -597,6 +640,184 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
597 return try_to_free_buffers(page); 640 return try_to_free_buffers(page);
598} 641}
599 642
643static int ocfs2_is_overwrite(struct ocfs2_super *osb,
644 struct inode *inode, loff_t offset)
645{
646 int ret = 0;
647 u32 v_cpos = 0;
648 u32 p_cpos = 0;
649 unsigned int num_clusters = 0;
650 unsigned int ext_flags = 0;
651
652 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
653 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
654 &num_clusters, &ext_flags);
655 if (ret < 0) {
656 mlog_errno(ret);
657 return ret;
658 }
659
660 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN))
661 return 1;
662
663 return 0;
664}
665
666static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
667 struct iov_iter *iter,
668 loff_t offset)
669{
670 ssize_t ret = 0;
671 ssize_t written = 0;
672 bool orphaned = false;
673 int is_overwrite = 0;
674 struct file *file = iocb->ki_filp;
675 struct inode *inode = file_inode(file)->i_mapping->host;
676 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
677 struct buffer_head *di_bh = NULL;
678 size_t count = iter->count;
679 journal_t *journal = osb->journal->j_journal;
680 u32 zero_len;
681 int cluster_align;
682 loff_t final_size = offset + count;
683 int append_write = offset >= i_size_read(inode) ? 1 : 0;
684 unsigned int num_clusters = 0;
685 unsigned int ext_flags = 0;
686
687 {
688 u64 o = offset;
689
690 zero_len = do_div(o, 1 << osb->s_clustersize_bits);
691 cluster_align = !zero_len;
692 }
693
694 /*
695 * when final_size > inode->i_size, inode->i_size will be
696 * updated after direct write, so add the inode to orphan
697 * dir first.
698 */
699 if (final_size > i_size_read(inode)) {
700 ret = ocfs2_add_inode_to_orphan(osb, inode);
701 if (ret < 0) {
702 mlog_errno(ret);
703 goto out;
704 }
705 orphaned = true;
706 }
707
708 if (append_write) {
709 ret = ocfs2_inode_lock(inode, &di_bh, 1);
710 if (ret < 0) {
711 mlog_errno(ret);
712 goto clean_orphan;
713 }
714
715 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
716 ret = ocfs2_zero_extend(inode, di_bh, offset);
717 else
718 ret = ocfs2_extend_no_holes(inode, di_bh, offset,
719 offset);
720 if (ret < 0) {
721 mlog_errno(ret);
722 ocfs2_inode_unlock(inode, 1);
723 brelse(di_bh);
724 goto clean_orphan;
725 }
726
727 is_overwrite = ocfs2_is_overwrite(osb, inode, offset);
728 if (is_overwrite < 0) {
729 mlog_errno(is_overwrite);
730 ocfs2_inode_unlock(inode, 1);
731 brelse(di_bh);
732 goto clean_orphan;
733 }
734
735 ocfs2_inode_unlock(inode, 1);
736 brelse(di_bh);
737 di_bh = NULL;
738 }
739
740 written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
741 iter, offset,
742 ocfs2_direct_IO_get_blocks,
743 ocfs2_dio_end_io, NULL, 0);
744 if (unlikely(written < 0)) {
745 loff_t i_size = i_size_read(inode);
746
747 if (offset + count > i_size) {
748 ret = ocfs2_inode_lock(inode, &di_bh, 1);
749 if (ret < 0) {
750 mlog_errno(ret);
751 goto clean_orphan;
752 }
753
754 if (i_size == i_size_read(inode)) {
755 ret = ocfs2_truncate_file(inode, di_bh,
756 i_size);
757 if (ret < 0) {
758 if (ret != -ENOSPC)
759 mlog_errno(ret);
760
761 ocfs2_inode_unlock(inode, 1);
762 brelse(di_bh);
763 goto clean_orphan;
764 }
765 }
766
767 ocfs2_inode_unlock(inode, 1);
768 brelse(di_bh);
769
770 ret = jbd2_journal_force_commit(journal);
771 if (ret < 0)
772 mlog_errno(ret);
773 }
774 } else if (written < 0 && append_write && !is_overwrite &&
775 !cluster_align) {
776 u32 p_cpos = 0;
777 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
778
779 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
780 &num_clusters, &ext_flags);
781 if (ret < 0) {
782 mlog_errno(ret);
783 goto clean_orphan;
784 }
785
786 BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
787
788 ret = blkdev_issue_zeroout(osb->sb->s_bdev,
789 p_cpos << (osb->s_clustersize_bits - 9),
790 zero_len >> 9, GFP_KERNEL, false);
791 if (ret < 0)
792 mlog_errno(ret);
793 }
794
795clean_orphan:
796 if (orphaned) {
797 int tmp_ret;
798 int update_isize = written > 0 ? 1 : 0;
799 loff_t end = update_isize ? offset + written : 0;
800
801 tmp_ret = ocfs2_del_inode_from_orphan(osb, inode,
802 update_isize, end);
803 if (tmp_ret < 0) {
804 ret = tmp_ret;
805 goto out;
806 }
807
808 tmp_ret = jbd2_journal_force_commit(journal);
809 if (tmp_ret < 0) {
810 ret = tmp_ret;
811 mlog_errno(tmp_ret);
812 }
813 }
814
815out:
816 if (ret >= 0)
817 ret = written;
818 return ret;
819}
820
600static ssize_t ocfs2_direct_IO(int rw, 821static ssize_t ocfs2_direct_IO(int rw,
601 struct kiocb *iocb, 822 struct kiocb *iocb,
602 struct iov_iter *iter, 823 struct iov_iter *iter,
@@ -604,6 +825,9 @@ static ssize_t ocfs2_direct_IO(int rw,
604{ 825{
605 struct file *file = iocb->ki_filp; 826 struct file *file = iocb->ki_filp;
606 struct inode *inode = file_inode(file)->i_mapping->host; 827 struct inode *inode = file_inode(file)->i_mapping->host;
828 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
829 int full_coherency = !(osb->s_mount_opt &
830 OCFS2_MOUNT_COHERENCY_BUFFERED);
607 831
608 /* 832 /*
609 * Fallback to buffered I/O if we see an inode without 833 * Fallback to buffered I/O if we see an inode without
@@ -612,14 +836,20 @@ static ssize_t ocfs2_direct_IO(int rw,
612 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 836 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
613 return 0; 837 return 0;
614 838
615 /* Fallback to buffered I/O if we are appending. */ 839 /* Fallback to buffered I/O if we are appending and
616 if (i_size_read(inode) <= offset) 840 * concurrent O_DIRECT writes are allowed.
841 */
842 if (i_size_read(inode) <= offset && !full_coherency)
617 return 0; 843 return 0;
618 844
619 return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, 845 if (rw == READ)
846 return __blockdev_direct_IO(rw, iocb, inode,
847 inode->i_sb->s_bdev,
620 iter, offset, 848 iter, offset,
621 ocfs2_direct_IO_get_blocks, 849 ocfs2_direct_IO_get_blocks,
622 ocfs2_dio_end_io, NULL, 0); 850 ocfs2_dio_end_io, NULL, 0);
851 else
852 return ocfs2_direct_IO_write(iocb, iter, offset);
623} 853}
624 854
625static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, 855static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e0f04d55fd05..46e0d4e857c7 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -295,7 +295,7 @@ out:
295 return ret; 295 return ret;
296} 296}
297 297
298static int ocfs2_set_inode_size(handle_t *handle, 298int ocfs2_set_inode_size(handle_t *handle,
299 struct inode *inode, 299 struct inode *inode,
300 struct buffer_head *fe_bh, 300 struct buffer_head *fe_bh,
301 u64 new_i_size) 301 u64 new_i_size)
@@ -441,7 +441,7 @@ out:
441 return status; 441 return status;
442} 442}
443 443
444static int ocfs2_truncate_file(struct inode *inode, 444int ocfs2_truncate_file(struct inode *inode,
445 struct buffer_head *di_bh, 445 struct buffer_head *di_bh,
446 u64 new_i_size) 446 u64 new_i_size)
447{ 447{
@@ -709,6 +709,13 @@ leave:
709 return status; 709 return status;
710} 710}
711 711
712int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
713 u32 clusters_to_add, int mark_unwritten)
714{
715 return __ocfs2_extend_allocation(inode, logical_start,
716 clusters_to_add, mark_unwritten);
717}
718
712/* 719/*
713 * While a write will already be ordering the data, a truncate will not. 720 * While a write will already be ordering the data, a truncate will not.
714 * Thus, we need to explicitly order the zeroed pages. 721 * Thus, we need to explicitly order the zeroed pages.
@@ -2109,6 +2116,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2109 struct dentry *dentry = file->f_path.dentry; 2116 struct dentry *dentry = file->f_path.dentry;
2110 struct inode *inode = dentry->d_inode; 2117 struct inode *inode = dentry->d_inode;
2111 loff_t saved_pos = 0, end; 2118 loff_t saved_pos = 0, end;
2119 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2120 int full_coherency = !(osb->s_mount_opt &
2121 OCFS2_MOUNT_COHERENCY_BUFFERED);
2112 2122
2113 /* 2123 /*
2114 * We start with a read level meta lock and only jump to an ex 2124 * We start with a read level meta lock and only jump to an ex
@@ -2197,7 +2207,16 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2197 * one node could wind up truncating another 2207 * one node could wind up truncating another
2198 * nodes writes. 2208 * nodes writes.
2199 */ 2209 */
2200 if (end > i_size_read(inode)) { 2210 if (end > i_size_read(inode) && !full_coherency) {
2211 *direct_io = 0;
2212 break;
2213 }
2214
2215 /*
2216 * Fallback to old way if the feature bit is not set.
2217 */
2218 if (end > i_size_read(inode) &&
2219 !ocfs2_supports_append_dio(osb)) {
2201 *direct_io = 0; 2220 *direct_io = 0;
2202 break; 2221 break;
2203 } 2222 }
@@ -2210,7 +2229,13 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2210 */ 2229 */
2211 ret = ocfs2_check_range_for_holes(inode, saved_pos, count); 2230 ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
2212 if (ret == 1) { 2231 if (ret == 1) {
2213 *direct_io = 0; 2232 /*
2233 * Fallback to old way if the feature bit is not set.
2234 * Otherwise try dio first and then complete the rest
2235 * request through buffer io.
2236 */
2237 if (!ocfs2_supports_append_dio(osb))
2238 *direct_io = 0;
2214 ret = 0; 2239 ret = 0;
2215 } else if (ret < 0) 2240 } else if (ret < 0)
2216 mlog_errno(ret); 2241 mlog_errno(ret);
@@ -2243,6 +2268,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2243 u32 old_clusters; 2268 u32 old_clusters;
2244 struct file *file = iocb->ki_filp; 2269 struct file *file = iocb->ki_filp;
2245 struct inode *inode = file_inode(file); 2270 struct inode *inode = file_inode(file);
2271 struct address_space *mapping = file->f_mapping;
2246 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2272 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2247 int full_coherency = !(osb->s_mount_opt & 2273 int full_coherency = !(osb->s_mount_opt &
2248 OCFS2_MOUNT_COHERENCY_BUFFERED); 2274 OCFS2_MOUNT_COHERENCY_BUFFERED);
@@ -2357,11 +2383,51 @@ relock:
2357 2383
2358 iov_iter_truncate(from, count); 2384 iov_iter_truncate(from, count);
2359 if (direct_io) { 2385 if (direct_io) {
2386 loff_t endbyte;
2387 ssize_t written_buffered;
2360 written = generic_file_direct_write(iocb, from, *ppos); 2388 written = generic_file_direct_write(iocb, from, *ppos);
2361 if (written < 0) { 2389 if (written < 0 || written == count) {
2362 ret = written; 2390 ret = written;
2363 goto out_dio; 2391 goto out_dio;
2364 } 2392 }
2393
2394 /*
2395 * for completing the rest of the request.
2396 */
2397 *ppos += written;
2398 count -= written;
2399 written_buffered = generic_perform_write(file, from, *ppos);
2400 /*
2401 * If generic_file_buffered_write() returned a synchronous error
2402 * then we want to return the number of bytes which were
2403 * direct-written, or the error code if that was zero. Note
2404 * that this differs from normal direct-io semantics, which
2405 * will return -EFOO even if some bytes were written.
2406 */
2407 if (written_buffered < 0) {
2408 ret = written_buffered;
2409 goto out_dio;
2410 }
2411
2412 iocb->ki_pos = *ppos + written_buffered;
2413 /* We need to ensure that the page cache pages are written to
2414 * disk and invalidated to preserve the expected O_DIRECT
2415 * semantics.
2416 */
2417 endbyte = *ppos + written_buffered - 1;
2418 ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
2419 endbyte);
2420 if (ret == 0) {
2421 written += written_buffered;
2422 invalidate_mapping_pages(mapping,
2423 *ppos >> PAGE_CACHE_SHIFT,
2424 endbyte >> PAGE_CACHE_SHIFT);
2425 } else {
2426 /*
2427 * We don't know how much we wrote, so just return
2428 * the number of bytes which were direct-written
2429 */
2430 }
2365 } else { 2431 } else {
2366 current->backing_dev_info = inode_to_bdi(inode); 2432 current->backing_dev_info = inode_to_bdi(inode);
2367 written = generic_perform_write(file, from, *ppos); 2433 written = generic_perform_write(file, from, *ppos);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 97bf761c9e7c..e8c62f22215c 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -51,13 +51,22 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
51 struct ocfs2_alloc_context *data_ac, 51 struct ocfs2_alloc_context *data_ac,
52 struct ocfs2_alloc_context *meta_ac, 52 struct ocfs2_alloc_context *meta_ac,
53 enum ocfs2_alloc_restarted *reason_ret); 53 enum ocfs2_alloc_restarted *reason_ret);
54int ocfs2_set_inode_size(handle_t *handle,
55 struct inode *inode,
56 struct buffer_head *fe_bh,
57 u64 new_i_size);
54int ocfs2_simple_size_update(struct inode *inode, 58int ocfs2_simple_size_update(struct inode *inode,
55 struct buffer_head *di_bh, 59 struct buffer_head *di_bh,
56 u64 new_i_size); 60 u64 new_i_size);
61int ocfs2_truncate_file(struct inode *inode,
62 struct buffer_head *di_bh,
63 u64 new_i_size);
57int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, 64int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
58 u64 new_i_size, u64 zero_to); 65 u64 new_i_size, u64 zero_to);
59int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, 66int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
60 loff_t zero_to); 67 loff_t zero_to);
68int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
69 u32 clusters_to_add, int mark_unwritten);
61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 70int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 71int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
63 struct kstat *stat); 72 struct kstat *stat);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index c8b25de9efbb..3025c0da6b8a 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -648,7 +648,7 @@ static int ocfs2_remove_inode(struct inode *inode,
648 648
649 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { 649 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
650 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, 650 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
651 orphan_dir_bh); 651 orphan_dir_bh, false);
652 if (status < 0) { 652 if (status < 0) {
653 mlog_errno(status); 653 mlog_errno(status);
654 goto bail_commit; 654 goto bail_commit;
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ca3431ee7f24..5e86b247c821 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -81,6 +81,8 @@ struct ocfs2_inode_info
81 tid_t i_sync_tid; 81 tid_t i_sync_tid;
82 tid_t i_datasync_tid; 82 tid_t i_datasync_tid;
83 83
84 wait_queue_head_t append_dio_wq;
85
84 struct dquot *i_dquot[MAXQUOTAS]; 86 struct dquot *i_dquot[MAXQUOTAS];
85}; 87};
86 88
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index d10860fde165..ff531928269e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -50,6 +50,8 @@
50#include "sysfile.h" 50#include "sysfile.h"
51#include "uptodate.h" 51#include "uptodate.h"
52#include "quota.h" 52#include "quota.h"
53#include "file.h"
54#include "namei.h"
53 55
54#include "buffer_head_io.h" 56#include "buffer_head_io.h"
55#include "ocfs2_trace.h" 57#include "ocfs2_trace.h"
@@ -69,13 +71,15 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
69static int ocfs2_trylock_journal(struct ocfs2_super *osb, 71static int ocfs2_trylock_journal(struct ocfs2_super *osb,
70 int slot_num); 72 int slot_num);
71static int ocfs2_recover_orphans(struct ocfs2_super *osb, 73static int ocfs2_recover_orphans(struct ocfs2_super *osb,
72 int slot); 74 int slot,
75 enum ocfs2_orphan_reco_type orphan_reco_type);
73static int ocfs2_commit_thread(void *arg); 76static int ocfs2_commit_thread(void *arg);
74static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, 77static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
75 int slot_num, 78 int slot_num,
76 struct ocfs2_dinode *la_dinode, 79 struct ocfs2_dinode *la_dinode,
77 struct ocfs2_dinode *tl_dinode, 80 struct ocfs2_dinode *tl_dinode,
78 struct ocfs2_quota_recovery *qrec); 81 struct ocfs2_quota_recovery *qrec,
82 enum ocfs2_orphan_reco_type orphan_reco_type);
79 83
80static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) 84static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
81{ 85{
@@ -149,7 +153,8 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
149 return 0; 153 return 0;
150} 154}
151 155
152void ocfs2_queue_replay_slots(struct ocfs2_super *osb) 156void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
157 enum ocfs2_orphan_reco_type orphan_reco_type)
153{ 158{
154 struct ocfs2_replay_map *replay_map = osb->replay_map; 159 struct ocfs2_replay_map *replay_map = osb->replay_map;
155 int i; 160 int i;
@@ -163,7 +168,8 @@ void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
163 for (i = 0; i < replay_map->rm_slots; i++) 168 for (i = 0; i < replay_map->rm_slots; i++)
164 if (replay_map->rm_replay_slots[i]) 169 if (replay_map->rm_replay_slots[i])
165 ocfs2_queue_recovery_completion(osb->journal, i, NULL, 170 ocfs2_queue_recovery_completion(osb->journal, i, NULL,
166 NULL, NULL); 171 NULL, NULL,
172 orphan_reco_type);
167 replay_map->rm_state = REPLAY_DONE; 173 replay_map->rm_state = REPLAY_DONE;
168} 174}
169 175
@@ -1174,6 +1180,7 @@ struct ocfs2_la_recovery_item {
1174 struct ocfs2_dinode *lri_la_dinode; 1180 struct ocfs2_dinode *lri_la_dinode;
1175 struct ocfs2_dinode *lri_tl_dinode; 1181 struct ocfs2_dinode *lri_tl_dinode;
1176 struct ocfs2_quota_recovery *lri_qrec; 1182 struct ocfs2_quota_recovery *lri_qrec;
1183 enum ocfs2_orphan_reco_type lri_orphan_reco_type;
1177}; 1184};
1178 1185
1179/* Does the second half of the recovery process. By this point, the 1186/* Does the second half of the recovery process. By this point, the
@@ -1195,6 +1202,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
1195 struct ocfs2_dinode *la_dinode, *tl_dinode; 1202 struct ocfs2_dinode *la_dinode, *tl_dinode;
1196 struct ocfs2_la_recovery_item *item, *n; 1203 struct ocfs2_la_recovery_item *item, *n;
1197 struct ocfs2_quota_recovery *qrec; 1204 struct ocfs2_quota_recovery *qrec;
1205 enum ocfs2_orphan_reco_type orphan_reco_type;
1198 LIST_HEAD(tmp_la_list); 1206 LIST_HEAD(tmp_la_list);
1199 1207
1200 trace_ocfs2_complete_recovery( 1208 trace_ocfs2_complete_recovery(
@@ -1212,6 +1220,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
1212 la_dinode = item->lri_la_dinode; 1220 la_dinode = item->lri_la_dinode;
1213 tl_dinode = item->lri_tl_dinode; 1221 tl_dinode = item->lri_tl_dinode;
1214 qrec = item->lri_qrec; 1222 qrec = item->lri_qrec;
1223 orphan_reco_type = item->lri_orphan_reco_type;
1215 1224
1216 trace_ocfs2_complete_recovery_slot(item->lri_slot, 1225 trace_ocfs2_complete_recovery_slot(item->lri_slot,
1217 la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0, 1226 la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
@@ -1236,7 +1245,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
1236 kfree(tl_dinode); 1245 kfree(tl_dinode);
1237 } 1246 }
1238 1247
1239 ret = ocfs2_recover_orphans(osb, item->lri_slot); 1248 ret = ocfs2_recover_orphans(osb, item->lri_slot,
1249 orphan_reco_type);
1240 if (ret < 0) 1250 if (ret < 0)
1241 mlog_errno(ret); 1251 mlog_errno(ret);
1242 1252
@@ -1261,7 +1271,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1261 int slot_num, 1271 int slot_num,
1262 struct ocfs2_dinode *la_dinode, 1272 struct ocfs2_dinode *la_dinode,
1263 struct ocfs2_dinode *tl_dinode, 1273 struct ocfs2_dinode *tl_dinode,
1264 struct ocfs2_quota_recovery *qrec) 1274 struct ocfs2_quota_recovery *qrec,
1275 enum ocfs2_orphan_reco_type orphan_reco_type)
1265{ 1276{
1266 struct ocfs2_la_recovery_item *item; 1277 struct ocfs2_la_recovery_item *item;
1267 1278
@@ -1285,6 +1296,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1285 item->lri_slot = slot_num; 1296 item->lri_slot = slot_num;
1286 item->lri_tl_dinode = tl_dinode; 1297 item->lri_tl_dinode = tl_dinode;
1287 item->lri_qrec = qrec; 1298 item->lri_qrec = qrec;
1299 item->lri_orphan_reco_type = orphan_reco_type;
1288 1300
1289 spin_lock(&journal->j_lock); 1301 spin_lock(&journal->j_lock);
1290 list_add_tail(&item->lri_list, &journal->j_la_cleanups); 1302 list_add_tail(&item->lri_list, &journal->j_la_cleanups);
@@ -1304,7 +1316,8 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1304 /* No need to queue up our truncate_log as regular cleanup will catch 1316 /* No need to queue up our truncate_log as regular cleanup will catch
1305 * that */ 1317 * that */
1306 ocfs2_queue_recovery_completion(journal, osb->slot_num, 1318 ocfs2_queue_recovery_completion(journal, osb->slot_num,
1307 osb->local_alloc_copy, NULL, NULL); 1319 osb->local_alloc_copy, NULL, NULL,
1320 ORPHAN_NEED_TRUNCATE);
1308 ocfs2_schedule_truncate_log_flush(osb, 0); 1321 ocfs2_schedule_truncate_log_flush(osb, 0);
1309 1322
1310 osb->local_alloc_copy = NULL; 1323 osb->local_alloc_copy = NULL;
@@ -1312,7 +1325,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1312 1325
1313 /* queue to recover orphan slots for all offline slots */ 1326 /* queue to recover orphan slots for all offline slots */
1314 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); 1327 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1315 ocfs2_queue_replay_slots(osb); 1328 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1316 ocfs2_free_replay_slots(osb); 1329 ocfs2_free_replay_slots(osb);
1317} 1330}
1318 1331
@@ -1323,7 +1336,8 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
1323 osb->slot_num, 1336 osb->slot_num,
1324 NULL, 1337 NULL,
1325 NULL, 1338 NULL,
1326 osb->quota_rec); 1339 osb->quota_rec,
1340 ORPHAN_NEED_TRUNCATE);
1327 osb->quota_rec = NULL; 1341 osb->quota_rec = NULL;
1328 } 1342 }
1329} 1343}
@@ -1360,7 +1374,7 @@ restart:
1360 1374
1361 /* queue recovery for our own slot */ 1375 /* queue recovery for our own slot */
1362 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, 1376 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
1363 NULL, NULL); 1377 NULL, NULL, ORPHAN_NO_NEED_TRUNCATE);
1364 1378
1365 spin_lock(&osb->osb_lock); 1379 spin_lock(&osb->osb_lock);
1366 while (rm->rm_used) { 1380 while (rm->rm_used) {
@@ -1419,13 +1433,14 @@ skip_recovery:
1419 continue; 1433 continue;
1420 } 1434 }
1421 ocfs2_queue_recovery_completion(osb->journal, rm_quota[i], 1435 ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
1422 NULL, NULL, qrec); 1436 NULL, NULL, qrec,
1437 ORPHAN_NEED_TRUNCATE);
1423 } 1438 }
1424 1439
1425 ocfs2_super_unlock(osb, 1); 1440 ocfs2_super_unlock(osb, 1);
1426 1441
1427 /* queue recovery for offline slots */ 1442 /* queue recovery for offline slots */
1428 ocfs2_queue_replay_slots(osb); 1443 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1429 1444
1430bail: 1445bail:
1431 mutex_lock(&osb->recovery_lock); 1446 mutex_lock(&osb->recovery_lock);
@@ -1711,7 +1726,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1711 1726
1712 /* This will kfree the memory pointed to by la_copy and tl_copy */ 1727 /* This will kfree the memory pointed to by la_copy and tl_copy */
1713 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, 1728 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
1714 tl_copy, NULL); 1729 tl_copy, NULL, ORPHAN_NEED_TRUNCATE);
1715 1730
1716 status = 0; 1731 status = 0;
1717done: 1732done:
@@ -1901,7 +1916,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1901 1916
1902 for (i = 0; i < osb->max_slots; i++) 1917 for (i = 0; i < osb->max_slots; i++)
1903 ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL, 1918 ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
1904 NULL); 1919 NULL, ORPHAN_NO_NEED_TRUNCATE);
1905 /* 1920 /*
1906 * We queued a recovery on orphan slots, increment the sequence 1921 * We queued a recovery on orphan slots, increment the sequence
1907 * number and update LVB so other node will skip the scan for a while 1922 * number and update LVB so other node will skip the scan for a while
@@ -2000,6 +2015,13 @@ static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
2000 if (IS_ERR(iter)) 2015 if (IS_ERR(iter))
2001 return 0; 2016 return 0;
2002 2017
2018 /* Skip inodes which are already added to recover list, since dio may
2019 * happen concurrently with unlink/rename */
2020 if (OCFS2_I(iter)->ip_next_orphan) {
2021 iput(iter);
2022 return 0;
2023 }
2024
2003 trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno); 2025 trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
2004 /* No locking is required for the next_orphan queue as there 2026 /* No locking is required for the next_orphan queue as there
2005 * is only ever a single process doing orphan recovery. */ 2027 * is only ever a single process doing orphan recovery. */
@@ -2108,7 +2130,8 @@ static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
2108 * advertising our state to ocfs2_delete_inode(). 2130 * advertising our state to ocfs2_delete_inode().
2109 */ 2131 */
2110static int ocfs2_recover_orphans(struct ocfs2_super *osb, 2132static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2111 int slot) 2133 int slot,
2134 enum ocfs2_orphan_reco_type orphan_reco_type)
2112{ 2135{
2113 int ret = 0; 2136 int ret = 0;
2114 struct inode *inode = NULL; 2137 struct inode *inode = NULL;
@@ -2132,13 +2155,60 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2132 (unsigned long long)oi->ip_blkno); 2155 (unsigned long long)oi->ip_blkno);
2133 2156
2134 iter = oi->ip_next_orphan; 2157 iter = oi->ip_next_orphan;
2158 oi->ip_next_orphan = NULL;
2159
2160 /*
2161 * We need to take and drop the inode lock to
2162 * force read inode from disk.
2163 */
2164 ret = ocfs2_inode_lock(inode, NULL, 0);
2165 if (ret) {
2166 mlog_errno(ret);
2167 goto next;
2168 }
2169 ocfs2_inode_unlock(inode, 0);
2170
2171 if (inode->i_nlink == 0) {
2172 spin_lock(&oi->ip_lock);
2173 /* Set the proper information to get us going into
2174 * ocfs2_delete_inode. */
2175 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2176 spin_unlock(&oi->ip_lock);
2177 } else if (orphan_reco_type == ORPHAN_NEED_TRUNCATE) {
2178 struct buffer_head *di_bh = NULL;
2179
2180 ret = ocfs2_rw_lock(inode, 1);
2181 if (ret) {
2182 mlog_errno(ret);
2183 goto next;
2184 }
2185
2186 ret = ocfs2_inode_lock(inode, &di_bh, 1);
2187 if (ret < 0) {
2188 ocfs2_rw_unlock(inode, 1);
2189 mlog_errno(ret);
2190 goto next;
2191 }
2192
2193 ret = ocfs2_truncate_file(inode, di_bh,
2194 i_size_read(inode));
2195 ocfs2_inode_unlock(inode, 1);
2196 ocfs2_rw_unlock(inode, 1);
2197 brelse(di_bh);
2198 if (ret < 0) {
2199 if (ret != -ENOSPC)
2200 mlog_errno(ret);
2201 goto next;
2202 }
2203
2204 ret = ocfs2_del_inode_from_orphan(osb, inode, 0, 0);
2205 if (ret)
2206 mlog_errno(ret);
2135 2207
2136 spin_lock(&oi->ip_lock); 2208 wake_up(&OCFS2_I(inode)->append_dio_wq);
2137 /* Set the proper information to get us going into 2209 } /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */
2138 * ocfs2_delete_inode. */
2139 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2140 spin_unlock(&oi->ip_lock);
2141 2210
2211next:
2142 iput(inode); 2212 iput(inode);
2143 2213
2144 inode = iter; 2214 inode = iter;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 7f8cde94abfe..f4cd3c3e9fb7 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -472,6 +472,11 @@ static inline int ocfs2_unlink_credits(struct super_block *sb)
472 * orphan dir index leaf */ 472 * orphan dir index leaf */
473#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4) 473#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4)
474 474
475/* dinode + orphan dir dinode + extent tree leaf block + orphan dir entry +
476 * orphan dir index root + orphan dir index leaf */
477#define OCFS2_INODE_ADD_TO_ORPHAN_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 4)
478#define OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS OCFS2_INODE_ADD_TO_ORPHAN_CREDITS
479
475/* dinode update, old dir dinode update, new dir dinode update, old 480/* dinode update, old dir dinode update, new dir dinode update, old
476 * dir dir entry, new dir dir entry, dir entry update for renaming 481 * dir dir entry, new dir dir entry, dir entry update for renaming
477 * directory + target unlink + 3 x dir index leaves */ 482 * directory + target unlink + 3 x dir index leaves */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 914c121ec890..b5c3a5ea3ee6 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -79,7 +79,8 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
79 struct inode **ret_orphan_dir, 79 struct inode **ret_orphan_dir,
80 u64 blkno, 80 u64 blkno,
81 char *name, 81 char *name,
82 struct ocfs2_dir_lookup_result *lookup); 82 struct ocfs2_dir_lookup_result *lookup,
83 bool dio);
83 84
84static int ocfs2_orphan_add(struct ocfs2_super *osb, 85static int ocfs2_orphan_add(struct ocfs2_super *osb,
85 handle_t *handle, 86 handle_t *handle,
@@ -87,7 +88,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
87 struct buffer_head *fe_bh, 88 struct buffer_head *fe_bh,
88 char *name, 89 char *name,
89 struct ocfs2_dir_lookup_result *lookup, 90 struct ocfs2_dir_lookup_result *lookup,
90 struct inode *orphan_dir_inode); 91 struct inode *orphan_dir_inode,
92 bool dio);
91 93
92static int ocfs2_create_symlink_data(struct ocfs2_super *osb, 94static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
93 handle_t *handle, 95 handle_t *handle,
@@ -104,6 +106,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
104static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2); 106static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2);
105/* An orphan dir name is an 8 byte value, printed as a hex string */ 107/* An orphan dir name is an 8 byte value, printed as a hex string */
106#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) 108#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
109#define OCFS2_DIO_ORPHAN_PREFIX "dio-"
110#define OCFS2_DIO_ORPHAN_PREFIX_LEN 4
107 111
108static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, 112static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
109 unsigned int flags) 113 unsigned int flags)
@@ -952,7 +956,8 @@ static int ocfs2_unlink(struct inode *dir,
952 if (ocfs2_inode_is_unlinkable(inode)) { 956 if (ocfs2_inode_is_unlinkable(inode)) {
953 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, 957 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
954 OCFS2_I(inode)->ip_blkno, 958 OCFS2_I(inode)->ip_blkno,
955 orphan_name, &orphan_insert); 959 orphan_name, &orphan_insert,
960 false);
956 if (status < 0) { 961 if (status < 0) {
957 mlog_errno(status); 962 mlog_errno(status);
958 goto leave; 963 goto leave;
@@ -1004,7 +1009,7 @@ static int ocfs2_unlink(struct inode *dir,
1004 1009
1005 if (is_unlinkable) { 1010 if (is_unlinkable) {
1006 status = ocfs2_orphan_add(osb, handle, inode, fe_bh, 1011 status = ocfs2_orphan_add(osb, handle, inode, fe_bh,
1007 orphan_name, &orphan_insert, orphan_dir); 1012 orphan_name, &orphan_insert, orphan_dir, false);
1008 if (status < 0) 1013 if (status < 0)
1009 mlog_errno(status); 1014 mlog_errno(status);
1010 } 1015 }
@@ -1440,7 +1445,8 @@ static int ocfs2_rename(struct inode *old_dir,
1440 if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) { 1445 if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
1441 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, 1446 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
1442 OCFS2_I(new_inode)->ip_blkno, 1447 OCFS2_I(new_inode)->ip_blkno,
1443 orphan_name, &orphan_insert); 1448 orphan_name, &orphan_insert,
1449 false);
1444 if (status < 0) { 1450 if (status < 0) {
1445 mlog_errno(status); 1451 mlog_errno(status);
1446 goto bail; 1452 goto bail;
@@ -1507,7 +1513,7 @@ static int ocfs2_rename(struct inode *old_dir,
1507 if (should_add_orphan) { 1513 if (should_add_orphan) {
1508 status = ocfs2_orphan_add(osb, handle, new_inode, 1514 status = ocfs2_orphan_add(osb, handle, new_inode,
1509 newfe_bh, orphan_name, 1515 newfe_bh, orphan_name,
1510 &orphan_insert, orphan_dir); 1516 &orphan_insert, orphan_dir, false);
1511 if (status < 0) { 1517 if (status < 0) {
1512 mlog_errno(status); 1518 mlog_errno(status);
1513 goto bail; 1519 goto bail;
@@ -2088,12 +2094,28 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
2088 struct buffer_head *orphan_dir_bh, 2094 struct buffer_head *orphan_dir_bh,
2089 u64 blkno, 2095 u64 blkno,
2090 char *name, 2096 char *name,
2091 struct ocfs2_dir_lookup_result *lookup) 2097 struct ocfs2_dir_lookup_result *lookup,
2098 bool dio)
2092{ 2099{
2093 int ret; 2100 int ret;
2094 struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb); 2101 struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb);
2102 int namelen = dio ?
2103 (OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN) :
2104 OCFS2_ORPHAN_NAMELEN;
2105
2106 if (dio) {
2107 ret = snprintf(name, OCFS2_DIO_ORPHAN_PREFIX_LEN + 1, "%s",
2108 OCFS2_DIO_ORPHAN_PREFIX);
2109 if (ret != OCFS2_DIO_ORPHAN_PREFIX_LEN) {
2110 ret = -EINVAL;
2111 mlog_errno(ret);
2112 return ret;
2113 }
2095 2114
2096 ret = ocfs2_blkno_stringify(blkno, name); 2115 ret = ocfs2_blkno_stringify(blkno,
2116 name + OCFS2_DIO_ORPHAN_PREFIX_LEN);
2117 } else
2118 ret = ocfs2_blkno_stringify(blkno, name);
2097 if (ret < 0) { 2119 if (ret < 0) {
2098 mlog_errno(ret); 2120 mlog_errno(ret);
2099 return ret; 2121 return ret;
@@ -2101,7 +2123,7 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
2101 2123
2102 ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, 2124 ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
2103 orphan_dir_bh, name, 2125 orphan_dir_bh, name,
2104 OCFS2_ORPHAN_NAMELEN, lookup); 2126 namelen, lookup);
2105 if (ret < 0) { 2127 if (ret < 0) {
2106 mlog_errno(ret); 2128 mlog_errno(ret);
2107 return ret; 2129 return ret;
@@ -2128,7 +2150,8 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
2128 struct inode **ret_orphan_dir, 2150 struct inode **ret_orphan_dir,
2129 u64 blkno, 2151 u64 blkno,
2130 char *name, 2152 char *name,
2131 struct ocfs2_dir_lookup_result *lookup) 2153 struct ocfs2_dir_lookup_result *lookup,
2154 bool dio)
2132{ 2155{
2133 struct inode *orphan_dir_inode = NULL; 2156 struct inode *orphan_dir_inode = NULL;
2134 struct buffer_head *orphan_dir_bh = NULL; 2157 struct buffer_head *orphan_dir_bh = NULL;
@@ -2142,7 +2165,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
2142 } 2165 }
2143 2166
2144 ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh, 2167 ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh,
2145 blkno, name, lookup); 2168 blkno, name, lookup, dio);
2146 if (ret < 0) { 2169 if (ret < 0) {
2147 mlog_errno(ret); 2170 mlog_errno(ret);
2148 goto out; 2171 goto out;
@@ -2170,12 +2193,16 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
2170 struct buffer_head *fe_bh, 2193 struct buffer_head *fe_bh,
2171 char *name, 2194 char *name,
2172 struct ocfs2_dir_lookup_result *lookup, 2195 struct ocfs2_dir_lookup_result *lookup,
2173 struct inode *orphan_dir_inode) 2196 struct inode *orphan_dir_inode,
2197 bool dio)
2174{ 2198{
2175 struct buffer_head *orphan_dir_bh = NULL; 2199 struct buffer_head *orphan_dir_bh = NULL;
2176 int status = 0; 2200 int status = 0;
2177 struct ocfs2_dinode *orphan_fe; 2201 struct ocfs2_dinode *orphan_fe;
2178 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; 2202 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
2203 int namelen = dio ?
2204 (OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN) :
2205 OCFS2_ORPHAN_NAMELEN;
2179 2206
2180 trace_ocfs2_orphan_add_begin( 2207 trace_ocfs2_orphan_add_begin(
2181 (unsigned long long)OCFS2_I(inode)->ip_blkno); 2208 (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -2219,7 +2246,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
2219 ocfs2_journal_dirty(handle, orphan_dir_bh); 2246 ocfs2_journal_dirty(handle, orphan_dir_bh);
2220 2247
2221 status = __ocfs2_add_entry(handle, orphan_dir_inode, name, 2248 status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
2222 OCFS2_ORPHAN_NAMELEN, inode, 2249 namelen, inode,
2223 OCFS2_I(inode)->ip_blkno, 2250 OCFS2_I(inode)->ip_blkno,
2224 orphan_dir_bh, lookup); 2251 orphan_dir_bh, lookup);
2225 if (status < 0) { 2252 if (status < 0) {
@@ -2227,13 +2254,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
2227 goto rollback; 2254 goto rollback;
2228 } 2255 }
2229 2256
2230 fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL); 2257 if (dio) {
2231 OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; 2258 /* Update flag OCFS2_DIO_ORPHANED_FL and record the orphan
2259 * slot.
2260 */
2261 fe->i_flags |= cpu_to_le32(OCFS2_DIO_ORPHANED_FL);
2262 fe->i_dio_orphaned_slot = cpu_to_le16(osb->slot_num);
2263 } else {
2264 fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL);
2265 OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
2232 2266
2233 /* Record which orphan dir our inode now resides 2267 /* Record which orphan dir our inode now resides
2234 * in. delete_inode will use this to determine which orphan 2268 * in. delete_inode will use this to determine which orphan
2235 * dir to lock. */ 2269 * dir to lock. */
2236 fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); 2270 fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
2271 }
2237 2272
2238 ocfs2_journal_dirty(handle, fe_bh); 2273 ocfs2_journal_dirty(handle, fe_bh);
2239 2274
@@ -2258,14 +2293,28 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2258 handle_t *handle, 2293 handle_t *handle,
2259 struct inode *orphan_dir_inode, 2294 struct inode *orphan_dir_inode,
2260 struct inode *inode, 2295 struct inode *inode,
2261 struct buffer_head *orphan_dir_bh) 2296 struct buffer_head *orphan_dir_bh,
2297 bool dio)
2262{ 2298{
2263 char name[OCFS2_ORPHAN_NAMELEN + 1]; 2299 const int namelen = OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN;
2300 char name[namelen + 1];
2264 struct ocfs2_dinode *orphan_fe; 2301 struct ocfs2_dinode *orphan_fe;
2265 int status = 0; 2302 int status = 0;
2266 struct ocfs2_dir_lookup_result lookup = { NULL, }; 2303 struct ocfs2_dir_lookup_result lookup = { NULL, };
2267 2304
2268 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); 2305 if (dio) {
2306 status = snprintf(name, OCFS2_DIO_ORPHAN_PREFIX_LEN + 1, "%s",
2307 OCFS2_DIO_ORPHAN_PREFIX);
2308 if (status != OCFS2_DIO_ORPHAN_PREFIX_LEN) {
2309 status = -EINVAL;
2310 mlog_errno(status);
2311 return status;
2312 }
2313
2314 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno,
2315 name + OCFS2_DIO_ORPHAN_PREFIX_LEN);
2316 } else
2317 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
2269 if (status < 0) { 2318 if (status < 0) {
2270 mlog_errno(status); 2319 mlog_errno(status);
2271 goto leave; 2320 goto leave;
@@ -2273,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2273 2322
2274 trace_ocfs2_orphan_del( 2323 trace_ocfs2_orphan_del(
2275 (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, 2324 (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
2276 name, OCFS2_ORPHAN_NAMELEN); 2325 name, namelen);
2277 2326
2278 /* find it's spot in the orphan directory */ 2327 /* find it's spot in the orphan directory */
2279 status = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, orphan_dir_inode, 2328 status = ocfs2_find_entry(name, namelen, orphan_dir_inode,
2280 &lookup); 2329 &lookup);
2281 if (status) { 2330 if (status) {
2282 mlog_errno(status); 2331 mlog_errno(status);
@@ -2376,7 +2425,8 @@ static int ocfs2_prep_new_orphaned_file(struct inode *dir,
2376 } 2425 }
2377 2426
2378 ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh, 2427 ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh,
2379 di_blkno, orphan_name, orphan_insert); 2428 di_blkno, orphan_name, orphan_insert,
2429 false);
2380 if (ret < 0) { 2430 if (ret < 0) {
2381 mlog_errno(ret); 2431 mlog_errno(ret);
2382 goto out; 2432 goto out;
@@ -2482,7 +2532,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2482 2532
2483 di = (struct ocfs2_dinode *)new_di_bh->b_data; 2533 di = (struct ocfs2_dinode *)new_di_bh->b_data;
2484 status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name, 2534 status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
2485 &orphan_insert, orphan_dir); 2535 &orphan_insert, orphan_dir, false);
2486 if (status < 0) { 2536 if (status < 0) {
2487 mlog_errno(status); 2537 mlog_errno(status);
2488 goto leave; 2538 goto leave;
@@ -2527,6 +2577,186 @@ leave:
2527 return status; 2577 return status;
2528} 2578}
2529 2579
2580static int ocfs2_dio_orphan_recovered(struct inode *inode)
2581{
2582 int ret;
2583 struct buffer_head *di_bh = NULL;
2584 struct ocfs2_dinode *di = NULL;
2585
2586 ret = ocfs2_inode_lock(inode, &di_bh, 1);
2587 if (ret < 0) {
2588 mlog_errno(ret);
2589 return 0;
2590 }
2591
2592 di = (struct ocfs2_dinode *) di_bh->b_data;
2593 ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL));
2594 ocfs2_inode_unlock(inode, 1);
2595 brelse(di_bh);
2596
2597 return ret;
2598}
2599
2600#define OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL 10000
2601int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
2602 struct inode *inode)
2603{
2604 char orphan_name[OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN + 1];
2605 struct inode *orphan_dir_inode = NULL;
2606 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
2607 struct buffer_head *di_bh = NULL;
2608 int status = 0;
2609 handle_t *handle = NULL;
2610 struct ocfs2_dinode *di = NULL;
2611
2612restart:
2613 status = ocfs2_inode_lock(inode, &di_bh, 1);
2614 if (status < 0) {
2615 mlog_errno(status);
2616 goto bail;
2617 }
2618
2619 di = (struct ocfs2_dinode *) di_bh->b_data;
2620 /*
2621 * Another append dio crashed?
2622 * If so, wait for recovery first.
2623 */
2624 if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
2625 ocfs2_inode_unlock(inode, 1);
2626 brelse(di_bh);
2627 wait_event_interruptible_timeout(OCFS2_I(inode)->append_dio_wq,
2628 ocfs2_dio_orphan_recovered(inode),
2629 msecs_to_jiffies(OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL));
2630 goto restart;
2631 }
2632
2633 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir_inode,
2634 OCFS2_I(inode)->ip_blkno,
2635 orphan_name,
2636 &orphan_insert,
2637 true);
2638 if (status < 0) {
2639 mlog_errno(status);
2640 goto bail_unlock_inode;
2641 }
2642
2643 handle = ocfs2_start_trans(osb,
2644 OCFS2_INODE_ADD_TO_ORPHAN_CREDITS);
2645 if (IS_ERR(handle)) {
2646 status = PTR_ERR(handle);
2647 goto bail_unlock_orphan;
2648 }
2649
2650 status = ocfs2_orphan_add(osb, handle, inode, di_bh, orphan_name,
2651 &orphan_insert, orphan_dir_inode, true);
2652 if (status)
2653 mlog_errno(status);
2654
2655 ocfs2_commit_trans(osb, handle);
2656
2657bail_unlock_orphan:
2658 ocfs2_inode_unlock(orphan_dir_inode, 1);
2659 mutex_unlock(&orphan_dir_inode->i_mutex);
2660 iput(orphan_dir_inode);
2661
2662 ocfs2_free_dir_lookup_result(&orphan_insert);
2663
2664bail_unlock_inode:
2665 ocfs2_inode_unlock(inode, 1);
2666 brelse(di_bh);
2667
2668bail:
2669 return status;
2670}
2671
2672int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
2673 struct inode *inode, int update_isize,
2674 loff_t end)
2675{
2676 struct inode *orphan_dir_inode = NULL;
2677 struct buffer_head *orphan_dir_bh = NULL;
2678 struct buffer_head *di_bh = NULL;
2679 struct ocfs2_dinode *di = NULL;
2680 handle_t *handle = NULL;
2681 int status = 0;
2682
2683 status = ocfs2_inode_lock(inode, &di_bh, 1);
2684 if (status < 0) {
2685 mlog_errno(status);
2686 goto bail;
2687 }
2688 di = (struct ocfs2_dinode *) di_bh->b_data;
2689
2690 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2691 ORPHAN_DIR_SYSTEM_INODE,
2692 le16_to_cpu(di->i_dio_orphaned_slot));
2693 if (!orphan_dir_inode) {
2694 status = -ENOENT;
2695 mlog_errno(status);
2696 goto bail_unlock_inode;
2697 }
2698
2699 mutex_lock(&orphan_dir_inode->i_mutex);
2700 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
2701 if (status < 0) {
2702 mutex_unlock(&orphan_dir_inode->i_mutex);
2703 iput(orphan_dir_inode);
2704 mlog_errno(status);
2705 goto bail_unlock_inode;
2706 }
2707
2708 handle = ocfs2_start_trans(osb,
2709 OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS);
2710 if (IS_ERR(handle)) {
2711 status = PTR_ERR(handle);
2712 goto bail_unlock_orphan;
2713 }
2714
2715 BUG_ON(!(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)));
2716
2717 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode,
2718 inode, orphan_dir_bh, true);
2719 if (status < 0) {
2720 mlog_errno(status);
2721 goto bail_commit;
2722 }
2723
2724 status = ocfs2_journal_access_di(handle,
2725 INODE_CACHE(inode),
2726 di_bh,
2727 OCFS2_JOURNAL_ACCESS_WRITE);
2728 if (status < 0) {
2729 mlog_errno(status);
2730 goto bail_commit;
2731 }
2732
2733 di->i_flags &= ~cpu_to_le32(OCFS2_DIO_ORPHANED_FL);
2734 di->i_dio_orphaned_slot = 0;
2735
2736 if (update_isize) {
2737 status = ocfs2_set_inode_size(handle, inode, di_bh, end);
2738 if (status)
2739 mlog_errno(status);
2740 } else
2741 ocfs2_journal_dirty(handle, di_bh);
2742
2743bail_commit:
2744 ocfs2_commit_trans(osb, handle);
2745
2746bail_unlock_orphan:
2747 ocfs2_inode_unlock(orphan_dir_inode, 1);
2748 mutex_unlock(&orphan_dir_inode->i_mutex);
2749 brelse(orphan_dir_bh);
2750 iput(orphan_dir_inode);
2751
2752bail_unlock_inode:
2753 ocfs2_inode_unlock(inode, 1);
2754 brelse(di_bh);
2755
2756bail:
2757 return status;
2758}
2759
2530int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, 2760int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2531 struct inode *inode, 2761 struct inode *inode,
2532 struct dentry *dentry) 2762 struct dentry *dentry)
@@ -2615,7 +2845,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2615 } 2845 }
2616 2846
2617 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, 2847 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
2618 orphan_dir_bh); 2848 orphan_dir_bh, false);
2619 if (status < 0) { 2849 if (status < 0) {
2620 mlog_errno(status); 2850 mlog_errno(status);
2621 goto out_commit; 2851 goto out_commit;
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index e5d059d4f115..5ddecce172fa 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -34,10 +34,16 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
34 handle_t *handle, 34 handle_t *handle,
35 struct inode *orphan_dir_inode, 35 struct inode *orphan_dir_inode,
36 struct inode *inode, 36 struct inode *inode,
37 struct buffer_head *orphan_dir_bh); 37 struct buffer_head *orphan_dir_bh,
38 bool dio);
38int ocfs2_create_inode_in_orphan(struct inode *dir, 39int ocfs2_create_inode_in_orphan(struct inode *dir,
39 int mode, 40 int mode,
40 struct inode **new_inode); 41 struct inode **new_inode);
42int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
43 struct inode *inode);
44int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
45 struct inode *inode, int update_isize,
46 loff_t end);
41int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, 47int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
42 struct inode *new_inode, 48 struct inode *new_inode,
43 struct dentry *new_dentry); 49 struct dentry *new_dentry);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index fdbcbfed529e..8490c64d34fe 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -209,6 +209,11 @@ struct ocfs2_lock_res {
209#endif 209#endif
210}; 210};
211 211
212enum ocfs2_orphan_reco_type {
213 ORPHAN_NO_NEED_TRUNCATE = 0,
214 ORPHAN_NEED_TRUNCATE,
215};
216
212enum ocfs2_orphan_scan_state { 217enum ocfs2_orphan_scan_state {
213 ORPHAN_SCAN_ACTIVE, 218 ORPHAN_SCAN_ACTIVE,
214 ORPHAN_SCAN_INACTIVE 219 ORPHAN_SCAN_INACTIVE
@@ -495,6 +500,14 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
495 return 0; 500 return 0;
496} 501}
497 502
503static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb)
504{
505 if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
506 return 1;
507 return 0;
508}
509
510
498static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb) 511static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
499{ 512{
500 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA) 513 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
@@ -726,6 +739,16 @@ static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb,
726 return clusters; 739 return clusters;
727} 740}
728 741
742static inline unsigned int ocfs2_bytes_to_clusters(struct super_block *sb,
743 u64 bytes)
744{
745 int cl_bits = OCFS2_SB(sb)->s_clustersize_bits;
746 unsigned int clusters;
747
748 clusters = (unsigned int)(bytes >> cl_bits);
749 return clusters;
750}
751
729static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb, 752static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb,
730 u64 bytes) 753 u64 bytes)
731{ 754{
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 938387a10d5d..20e37a3ed26f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -105,7 +105,8 @@
105 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) 105 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)
106#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ 106#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
107 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ 107 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
108 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) 108 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \
109 | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
109 110
110/* 111/*
111 * Heartbeat-only devices are missing journals and other files. The 112 * Heartbeat-only devices are missing journals and other files. The
@@ -199,6 +200,11 @@
199#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002 200#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
200#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004 201#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
201 202
203/*
204 * Append Direct IO support
205 */
206#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO 0x0008
207
202/* The byte offset of the first backup block will be 1G. 208/* The byte offset of the first backup block will be 1G.
203 * The following will be 4G, 16G, 64G, 256G and 1T. 209 * The following will be 4G, 16G, 64G, 256G and 1T.
204 */ 210 */
@@ -229,6 +235,8 @@
229#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ 235#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
230#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ 236#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
231#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */ 237#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */
238#define OCFS2_DIO_ORPHANED_FL (0X00002000) /* On the orphan list especially
239 * for dio */
232 240
233/* 241/*
234 * Flags on ocfs2_dinode.i_dyn_features 242 * Flags on ocfs2_dinode.i_dyn_features
@@ -729,7 +737,9 @@ struct ocfs2_dinode {
729 inode belongs to. Only valid 737 inode belongs to. Only valid
730 if allocated from a 738 if allocated from a
731 discontiguous block group */ 739 discontiguous block group */
732/*A0*/ __le64 i_reserved2[3]; 740/*A0*/ __le16 i_dio_orphaned_slot; /* only used for append dio write */
741 __le16 i_reserved1[3];
742 __le64 i_reserved2[2];
733/*B8*/ union { 743/*B8*/ union {
734 __le64 i_pad1; /* Generic way to refer to this 744 __le64 i_pad1; /* Generic way to refer to this
735 64bit union */ 745 64bit union */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 87a1f7679d9b..26675185b886 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1746,6 +1746,8 @@ static void ocfs2_inode_init_once(void *data)
1746 ocfs2_lock_res_init_once(&oi->ip_inode_lockres); 1746 ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
1747 ocfs2_lock_res_init_once(&oi->ip_open_lockres); 1747 ocfs2_lock_res_init_once(&oi->ip_open_lockres);
1748 1748
1749 init_waitqueue_head(&oi->append_dio_wq);
1750
1749 ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode), 1751 ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
1750 &ocfs2_inode_caching_ops); 1752 &ocfs2_inode_caching_ops);
1751 1753
diff --git a/fs/open.c b/fs/open.c
index 813be037b412..a293c2020676 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -667,11 +667,8 @@ int open_check_o_direct(struct file *f)
667{ 667{
668 /* NB: we're sure to have correct a_ops only after f_op->open */ 668 /* NB: we're sure to have correct a_ops only after f_op->open */
669 if (f->f_flags & O_DIRECT) { 669 if (f->f_flags & O_DIRECT) {
670 if (!f->f_mapping->a_ops || 670 if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
671 ((!f->f_mapping->a_ops->direct_IO) &&
672 (!f->f_mapping->a_ops->get_xip_mem))) {
673 return -EINVAL; 671 return -EINVAL;
674 }
675 } 672 }
676 return 0; 673 return 0;
677} 674}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e49f10cc8a73..ed5a0900b94d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -51,6 +51,7 @@ struct swap_info_struct;
51struct seq_file; 51struct seq_file;
52struct workqueue_struct; 52struct workqueue_struct;
53struct iov_iter; 53struct iov_iter;
54struct vm_fault;
54 55
55extern void __init inode_init(void); 56extern void __init inode_init(void);
56extern void __init inode_init_early(void); 57extern void __init inode_init_early(void);
@@ -361,8 +362,6 @@ struct address_space_operations {
361 int (*releasepage) (struct page *, gfp_t); 362 int (*releasepage) (struct page *, gfp_t);
362 void (*freepage)(struct page *); 363 void (*freepage)(struct page *);
363 ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); 364 ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
364 int (*get_xip_mem)(struct address_space *, pgoff_t, int,
365 void **, unsigned long *);
366 /* 365 /*
367 * migrate the contents of a page to the specified target. If 366 * migrate the contents of a page to the specified target. If
368 * migrate_mode is MIGRATE_ASYNC, it must not block. 367 * migrate_mode is MIGRATE_ASYNC, it must not block.
@@ -1677,6 +1676,11 @@ struct super_operations {
1677#define S_IMA 1024 /* Inode has an associated IMA struct */ 1676#define S_IMA 1024 /* Inode has an associated IMA struct */
1678#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ 1677#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */
1679#define S_NOSEC 4096 /* no suid or xattr security attributes */ 1678#define S_NOSEC 4096 /* no suid or xattr security attributes */
1679#ifdef CONFIG_FS_DAX
1680#define S_DAX 8192 /* Direct Access, avoiding the page cache */
1681#else
1682#define S_DAX 0 /* Make all the DAX code disappear */
1683#endif
1680 1684
1681/* 1685/*
1682 * Note that nosuid etc flags are inode-specific: setting some file-system 1686 * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -1714,6 +1718,7 @@ struct super_operations {
1714#define IS_IMA(inode) ((inode)->i_flags & S_IMA) 1718#define IS_IMA(inode) ((inode)->i_flags & S_IMA)
1715#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) 1719#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
1716#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) 1720#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
1721#define IS_DAX(inode) ((inode)->i_flags & S_DAX)
1717 1722
1718#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ 1723#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
1719 (inode)->i_rdev == WHITEOUT_DEV) 1724 (inode)->i_rdev == WHITEOUT_DEV)
@@ -2581,19 +2586,13 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
2581extern int generic_file_open(struct inode * inode, struct file * filp); 2586extern int generic_file_open(struct inode * inode, struct file * filp);
2582extern int nonseekable_open(struct inode * inode, struct file * filp); 2587extern int nonseekable_open(struct inode * inode, struct file * filp);
2583 2588
2584#ifdef CONFIG_FS_XIP 2589ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *,
2585extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, 2590 loff_t, get_block_t, dio_iodone_t, int flags);
2586 loff_t *ppos); 2591int dax_clear_blocks(struct inode *, sector_t block, long size);
2587extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); 2592int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
2588extern ssize_t xip_file_write(struct file *filp, const char __user *buf, 2593int dax_truncate_page(struct inode *, loff_t from, get_block_t);
2589 size_t len, loff_t *ppos); 2594int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
2590extern int xip_truncate_page(struct address_space *mapping, loff_t from); 2595#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb)
2591#else
2592static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
2593{
2594 return 0;
2595}
2596#endif
2597 2596
2598#ifdef CONFIG_BLOCK 2597#ifdef CONFIG_BLOCK
2599typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, 2598typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
@@ -2750,6 +2749,11 @@ extern int generic_show_options(struct seq_file *m, struct dentry *root);
2750extern void save_mount_options(struct super_block *sb, char *options); 2749extern void save_mount_options(struct super_block *sb, char *options);
2751extern void replace_mount_options(struct super_block *sb, char *options); 2750extern void replace_mount_options(struct super_block *sb, char *options);
2752 2751
2752static inline bool io_is_direct(struct file *filp)
2753{
2754 return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
2755}
2756
2753static inline ino_t parent_ino(struct dentry *dentry) 2757static inline ino_t parent_ino(struct dentry *dentry)
2754{ 2758{
2755 ino_t res; 2759 ino_t res;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9bee7ec0c31f..47a93928b90f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -224,6 +224,7 @@ struct vm_fault {
224 pgoff_t pgoff; /* Logical page offset based on vma */ 224 pgoff_t pgoff; /* Logical page offset based on vma */
225 void __user *virtual_address; /* Faulting virtual address */ 225 void __user *virtual_address; /* Faulting virtual address */
226 226
227 struct page *cow_page; /* Handler may choose to COW */
227 struct page *page; /* ->fault handlers should return a 228 struct page *page; /* ->fault handlers should return a
228 * page here, unless VM_FAULT_NOPAGE 229 * page here, unless VM_FAULT_NOPAGE
229 * is set (which is also implied by 230 * is set (which is also implied by
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b38f559130d5..c4c559a45dc8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -198,7 +198,7 @@ int page_referenced(struct page *, int is_locked,
198int try_to_unmap(struct page *, enum ttu_flags flags); 198int try_to_unmap(struct page *, enum ttu_flags flags);
199 199
200/* 200/*
201 * Called from mm/filemap_xip.c to unmap empty zero page 201 * Used by uprobes to replace a userspace page safely
202 */ 202 */
203pte_t *__page_check_address(struct page *, struct mm_struct *, 203pte_t *__page_check_address(struct page *, struct mm_struct *,
204 unsigned long, spinlock_t **, int); 204 unsigned long, spinlock_t **, int);
diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h
new file mode 100644
index 000000000000..e6337a56d741
--- /dev/null
+++ b/include/linux/rtc/ds1685.h
@@ -0,0 +1,375 @@
1/*
2 * Definitions for the registers, addresses, and platform data of the
3 * DS1685/DS1687-series RTC chips.
4 *
5 * This Driver also works for the DS17X85/DS17X87 RTC chips. Functionally
6 * similar to the DS1685/DS1687, they support a few extra features which
7 * include larger, battery-backed NV-SRAM, burst-mode access, and an RTC
8 * write counter.
9 *
10 * Copyright (C) 2011-2014 Joshua Kinard <kumba@gentoo.org>.
11 * Copyright (C) 2009 Matthias Fuchs <matthias.fuchs@esd-electronics.com>.
12 *
13 * References:
14 * DS1685/DS1687 3V/5V Real-Time Clocks, 19-5215, Rev 4/10.
15 * DS17x85/DS17x87 3V/5V Real-Time Clocks, 19-5222, Rev 4/10.
16 * DS1689/DS1693 3V/5V Serialized Real-Time Clocks, Rev 112105.
17 * Application Note 90, Using the Multiplex Bus RTC Extended Features.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License version 2 as
21 * published by the Free Software Foundation.
22 */
23
24#ifndef _LINUX_RTC_DS1685_H_
25#define _LINUX_RTC_DS1685_H_
26
27#include <linux/rtc.h>
28#include <linux/platform_device.h>
29#include <linux/workqueue.h>
30
31/**
32 * struct ds1685_priv - DS1685 private data structure.
33 * @dev: pointer to the rtc_device structure.
34 * @regs: iomapped base address pointer of the RTC registers.
35 * @regstep: padding/step size between registers (optional).
36 * @baseaddr: base address of the RTC device.
37 * @size: resource size.
38 * @lock: private lock variable for spin locking/unlocking.
39 * @work: private workqueue.
40 * @irq: IRQ number assigned to the RTC device.
41 * @prepare_poweroff: pointer to platform pre-poweroff function.
42 * @wake_alarm: pointer to platform wake alarm function.
43 * @post_ram_clear: pointer to platform post ram-clear function.
44 */
45struct ds1685_priv {
46 struct rtc_device *dev;
47 void __iomem *regs;
48 u32 regstep;
49 resource_size_t baseaddr;
50 size_t size;
51 spinlock_t lock;
52 struct work_struct work;
53 int irq_num;
54 bool bcd_mode;
55 bool no_irq;
56 bool uie_unsupported;
57 bool alloc_io_resources;
58 u8 (*read)(struct ds1685_priv *, int);
59 void (*write)(struct ds1685_priv *, int, u8);
60 void (*prepare_poweroff)(void);
61 void (*wake_alarm)(void);
62 void (*post_ram_clear)(void);
63};
64
65
66/**
67 * struct ds1685_rtc_platform_data - platform data structure.
68 * @plat_prepare_poweroff: platform-specific pre-poweroff function.
69 * @plat_wake_alarm: platform-specific wake alarm function.
70 * @plat_post_ram_clear: platform-specific post ram-clear function.
71 *
72 * If your platform needs to use a custom padding/step size between
73 * registers, or uses one or more of the extended interrupts and needs special
74 * handling, then include this header file in your platform definition and
75 * set regstep and the plat_* pointers as appropriate.
76 */
77struct ds1685_rtc_platform_data {
78 const u32 regstep;
79 const bool bcd_mode;
80 const bool no_irq;
81 const bool uie_unsupported;
82 const bool alloc_io_resources;
83 u8 (*plat_read)(struct ds1685_priv *, int);
84 void (*plat_write)(struct ds1685_priv *, int, u8);
85 void (*plat_prepare_poweroff)(void);
86 void (*plat_wake_alarm)(void);
87 void (*plat_post_ram_clear)(void);
88};
89
90
91/*
92 * Time Registers.
93 */
94#define RTC_SECS 0x00 /* Seconds 00-59 */
95#define RTC_SECS_ALARM 0x01 /* Alarm Seconds 00-59 */
96#define RTC_MINS 0x02 /* Minutes 00-59 */
97#define RTC_MINS_ALARM 0x03 /* Alarm Minutes 00-59 */
98#define RTC_HRS 0x04 /* Hours 01-12 AM/PM || 00-23 */
99#define RTC_HRS_ALARM 0x05 /* Alarm Hours 01-12 AM/PM || 00-23 */
100#define RTC_WDAY 0x06 /* Day of Week 01-07 */
101#define RTC_MDAY 0x07 /* Day of Month 01-31 */
102#define RTC_MONTH 0x08 /* Month 01-12 */
103#define RTC_YEAR 0x09 /* Year 00-99 */
104#define RTC_CENTURY 0x48 /* Century 00-99 */
105#define RTC_MDAY_ALARM 0x49 /* Alarm Day of Month 01-31 */
106
107
108/*
109 * Bit masks for the Time registers in BCD Mode (DM = 0).
110 */
111#define RTC_SECS_BCD_MASK 0x7f /* - x x x x x x x */
112#define RTC_MINS_BCD_MASK 0x7f /* - x x x x x x x */
113#define RTC_HRS_12_BCD_MASK 0x1f /* - - - x x x x x */
114#define RTC_HRS_24_BCD_MASK 0x3f /* - - x x x x x x */
115#define RTC_MDAY_BCD_MASK 0x3f /* - - x x x x x x */
116#define RTC_MONTH_BCD_MASK 0x1f /* - - - x x x x x */
117#define RTC_YEAR_BCD_MASK 0xff /* x x x x x x x x */
118
119/*
120 * Bit masks for the Time registers in BIN Mode (DM = 1).
121 */
122#define RTC_SECS_BIN_MASK 0x3f /* - - x x x x x x */
123#define RTC_MINS_BIN_MASK 0x3f /* - - x x x x x x */
124#define RTC_HRS_12_BIN_MASK 0x0f /* - - - - x x x x */
125#define RTC_HRS_24_BIN_MASK 0x1f /* - - - x x x x x */
126#define RTC_MDAY_BIN_MASK 0x1f /* - - - x x x x x */
127#define RTC_MONTH_BIN_MASK 0x0f /* - - - - x x x x */
128#define RTC_YEAR_BIN_MASK 0x7f /* - x x x x x x x */
129
130/*
131 * Bit masks common for the Time registers in BCD or BIN Mode.
132 */
133#define RTC_WDAY_MASK 0x07 /* - - - - - x x x */
134#define RTC_CENTURY_MASK 0xff /* x x x x x x x x */
135#define RTC_MDAY_ALARM_MASK 0xff /* x x x x x x x x */
136#define RTC_HRS_AMPM_MASK BIT(7) /* Mask for the AM/PM bit */
137
138
139
140/*
141 * Control Registers.
142 */
143#define RTC_CTRL_A 0x0a /* Control Register A */
144#define RTC_CTRL_B 0x0b /* Control Register B */
145#define RTC_CTRL_C 0x0c /* Control Register C */
146#define RTC_CTRL_D 0x0d /* Control Register D */
147#define RTC_EXT_CTRL_4A 0x4a /* Extended Control Register 4A */
148#define RTC_EXT_CTRL_4B 0x4b /* Extended Control Register 4B */
149
150
151/*
152 * Bit names in Control Register A.
153 */
154#define RTC_CTRL_A_UIP BIT(7) /* Update In Progress */
155#define RTC_CTRL_A_DV2 BIT(6) /* Countdown Chain */
156#define RTC_CTRL_A_DV1 BIT(5) /* Oscillator Enable */
157#define RTC_CTRL_A_DV0 BIT(4) /* Bank Select */
158#define RTC_CTRL_A_RS2 BIT(2) /* Rate-Selection Bit 2 */
159#define RTC_CTRL_A_RS3 BIT(3) /* Rate-Selection Bit 3 */
160#define RTC_CTRL_A_RS1 BIT(1) /* Rate-Selection Bit 1 */
161#define RTC_CTRL_A_RS0 BIT(0) /* Rate-Selection Bit 0 */
162#define RTC_CTRL_A_RS_MASK 0x0f /* RS3 + RS2 + RS1 + RS0 */
163
164/*
165 * Bit names in Control Register B.
166 */
167#define RTC_CTRL_B_SET BIT(7) /* SET Bit */
168#define RTC_CTRL_B_PIE BIT(6) /* Periodic-Interrupt Enable */
169#define RTC_CTRL_B_AIE BIT(5) /* Alarm-Interrupt Enable */
170#define RTC_CTRL_B_UIE BIT(4) /* Update-Ended Interrupt-Enable */
171#define RTC_CTRL_B_SQWE BIT(3) /* Square-Wave Enable */
172#define RTC_CTRL_B_DM BIT(2) /* Data Mode */
173#define RTC_CTRL_B_2412 BIT(1) /* 12-Hr/24-Hr Mode */
174#define RTC_CTRL_B_DSE BIT(0) /* Daylight Savings Enable */
175#define RTC_CTRL_B_PAU_MASK 0x70 /* PIE + AIE + UIE */
176
177
178/*
179 * Bit names in Control Register C.
180 *
181 * BIT(0), BIT(1), BIT(2), & BIT(3) are unused, always return 0, and cannot
182 * be written to.
183 */
184#define RTC_CTRL_C_IRQF BIT(7) /* Interrupt-Request Flag */
185#define RTC_CTRL_C_PF BIT(6) /* Periodic-Interrupt Flag */
186#define RTC_CTRL_C_AF BIT(5) /* Alarm-Interrupt Flag */
187#define RTC_CTRL_C_UF BIT(4) /* Update-Ended Interrupt Flag */
188#define RTC_CTRL_C_PAU_MASK 0x70 /* PF + AF + UF */
189
190
191/*
192 * Bit names in Control Register D.
193 *
194 * BIT(0) through BIT(6) are unused, always return 0, and cannot
195 * be written to.
196 */
197#define RTC_CTRL_D_VRT BIT(7) /* Valid RAM and Time */
198
199
200/*
201 * Bit names in Extended Control Register 4A.
202 *
203 * On the DS1685/DS1687/DS1689/DS1693, BIT(4) and BIT(5) are reserved for
204 * future use. They can be read from and written to, but have no effect
205 * on the RTC's operation.
206 *
207 * On the DS17x85/DS17x87, BIT(5) is Burst-Mode Enable (BME), and allows
208 * access to the extended NV-SRAM by automatically incrementing the address
209 * register when they are read from or written to.
210 */
211#define RTC_CTRL_4A_VRT2 BIT(7) /* Auxillary Battery Status */
212#define RTC_CTRL_4A_INCR BIT(6) /* Increment-in-Progress Status */
213#define RTC_CTRL_4A_PAB BIT(3) /* Power-Active Bar Control */
214#define RTC_CTRL_4A_RF BIT(2) /* RAM-Clear Flag */
215#define RTC_CTRL_4A_WF BIT(1) /* Wake-Up Alarm Flag */
216#define RTC_CTRL_4A_KF BIT(0) /* Kickstart Flag */
217#if !defined(CONFIG_RTC_DRV_DS1685) && !defined(CONFIG_RTC_DRV_DS1689)
218#define RTC_CTRL_4A_BME BIT(5) /* Burst-Mode Enable */
219#endif
220#define RTC_CTRL_4A_RWK_MASK 0x07 /* RF + WF + KF */
221
222
223/*
224 * Bit names in Extended Control Register 4B.
225 */
226#define RTC_CTRL_4B_ABE BIT(7) /* Auxillary Battery Enable */
227#define RTC_CTRL_4B_E32K BIT(6) /* Enable 32.768Hz on SQW Pin */
228#define RTC_CTRL_4B_CS BIT(5) /* Crystal Select */
229#define RTC_CTRL_4B_RCE BIT(4) /* RAM Clear-Enable */
230#define RTC_CTRL_4B_PRS BIT(3) /* PAB Reset-Select */
231#define RTC_CTRL_4B_RIE BIT(2) /* RAM Clear-Interrupt Enable */
232#define RTC_CTRL_4B_WIE BIT(1) /* Wake-Up Alarm-Interrupt Enable */
233#define RTC_CTRL_4B_KSE BIT(0) /* Kickstart Interrupt-Enable */
234#define RTC_CTRL_4B_RWK_MASK 0x07 /* RIE + WIE + KSE */
235
236
237/*
238 * Misc register names in Bank 1.
239 *
240 * The DV0 bit in Control Register A must be set to 1 for these registers
241 * to become available, including Extended Control Registers 4A & 4B.
242 */
243#define RTC_BANK1_SSN_MODEL 0x40 /* Model Number */
244#define RTC_BANK1_SSN_BYTE_1 0x41 /* 1st Byte of Serial Number */
245#define RTC_BANK1_SSN_BYTE_2 0x42 /* 2nd Byte of Serial Number */
246#define RTC_BANK1_SSN_BYTE_3 0x43 /* 3rd Byte of Serial Number */
247#define RTC_BANK1_SSN_BYTE_4 0x44 /* 4th Byte of Serial Number */
248#define RTC_BANK1_SSN_BYTE_5 0x45 /* 5th Byte of Serial Number */
249#define RTC_BANK1_SSN_BYTE_6 0x46 /* 6th Byte of Serial Number */
250#define RTC_BANK1_SSN_CRC 0x47 /* Serial CRC Byte */
251#define RTC_BANK1_RAM_DATA_PORT 0x53 /* Extended RAM Data Port */
252
253
254/*
255 * Model-specific registers in Bank 1.
256 *
257 * The addresses below differ depending on the model of the RTC chip
258 * selected in the kernel configuration. Not all of these features are
259 * supported in the main driver at present.
260 *
261 * DS1685/DS1687 - Extended NV-SRAM address (LSB only).
262 * DS1689/DS1693 - Vcc, Vbat, Pwr Cycle Counters & Customer-specific S/N.
263 * DS17x85/DS17x87 - Extended NV-SRAM addresses (MSB & LSB) & Write counter.
264 */
265#if defined(CONFIG_RTC_DRV_DS1685)
266#define RTC_BANK1_RAM_ADDR 0x50 /* NV-SRAM Addr */
267#elif defined(CONFIG_RTC_DRV_DS1689)
268#define RTC_BANK1_VCC_CTR_LSB 0x54 /* Vcc Counter Addr (LSB) */
269#define RTC_BANK1_VCC_CTR_MSB 0x57 /* Vcc Counter Addr (MSB) */
270#define RTC_BANK1_VBAT_CTR_LSB 0x58 /* Vbat Counter Addr (LSB) */
271#define RTC_BANK1_VBAT_CTR_MSB 0x5b /* Vbat Counter Addr (MSB) */
272#define RTC_BANK1_PWR_CTR_LSB 0x5c /* Pwr Cycle Counter Addr (LSB) */
273#define RTC_BANK1_PWR_CTR_MSB 0x5d /* Pwr Cycle Counter Addr (MSB) */
274#define RTC_BANK1_UNIQ_SN 0x60 /* Customer-specific S/N */
275#else /* DS17x85/DS17x87 */
276#define RTC_BANK1_RAM_ADDR_LSB 0x50 /* NV-SRAM Addr (LSB) */
277#define RTC_BANK1_RAM_ADDR_MSB 0x51 /* NV-SRAM Addr (MSB) */
278#define RTC_BANK1_WRITE_CTR 0x5e /* RTC Write Counter */
279#endif
280
281
282/*
283 * Model numbers.
284 *
285 * The DS1688/DS1691 and DS1689/DS1693 chips share the same model number
286 * and the manual doesn't indicate any major differences. As such, they
287 * are regarded as the same chip in this driver.
288 */
289#define RTC_MODEL_DS1685 0x71 /* DS1685/DS1687 */
290#define RTC_MODEL_DS17285 0x72 /* DS17285/DS17287 */
291#define RTC_MODEL_DS1689 0x73 /* DS1688/DS1691/DS1689/DS1693 */
292#define RTC_MODEL_DS17485 0x74 /* DS17485/DS17487 */
293#define RTC_MODEL_DS17885 0x78 /* DS17885/DS17887 */
294
295
296/*
297 * Periodic Interrupt Rates / Square-Wave Output Frequency
298 *
299 * Periodic rates are selected by setting the RS3-RS0 bits in Control
300 * Register A and enabled via either the E32K bit in Extended Control
301 * Register 4B or the SQWE bit in Control Register B.
302 *
303 * E32K overrides the settings of RS3-RS0 and outputs a frequency of 32768Hz
304 * on the SQW pin of the RTC chip. While there are 16 possible selections,
305 * the 1-of-16 decoder is only able to divide the base 32768Hz signal into 13
306 * smaller frequencies. The values 0x01 and 0x02 are not used and are
307 * synonymous with 0x08 and 0x09, respectively.
308 *
309 * When E32K is set to a logic 1, periodic interrupts are disabled and reading
310 * /dev/rtc will return -EINVAL. This also applies if the periodic interrupt
311 * frequency is set to 0Hz.
312 *
313 * Not currently used by the rtc-ds1685 driver because the RTC core removed
314 * support for hardware-generated periodic-interrupts in favour of
315 * hrtimer-generated interrupts. But these defines are kept around for use
316 * in userland, as documentation to the hardware, and possible future use if
317 * hardware-generated periodic interrupts are ever added back.
318 */
319 /* E32K RS3 RS2 RS1 RS0 */
320#define RTC_SQW_8192HZ 0x03 /* 0 0 0 1 1 */
321#define RTC_SQW_4096HZ 0x04 /* 0 0 1 0 0 */
322#define RTC_SQW_2048HZ 0x05 /* 0 0 1 0 1 */
323#define RTC_SQW_1024HZ 0x06 /* 0 0 1 1 0 */
324#define RTC_SQW_512HZ 0x07 /* 0 0 1 1 1 */
325#define RTC_SQW_256HZ 0x08 /* 0 1 0 0 0 */
326#define RTC_SQW_128HZ 0x09 /* 0 1 0 0 1 */
327#define RTC_SQW_64HZ 0x0a /* 0 1 0 1 0 */
328#define RTC_SQW_32HZ 0x0b /* 0 1 0 1 1 */
329#define RTC_SQW_16HZ 0x0c /* 0 1 1 0 0 */
330#define RTC_SQW_8HZ 0x0d /* 0 1 1 0 1 */
331#define RTC_SQW_4HZ 0x0e /* 0 1 1 1 0 */
332#define RTC_SQW_2HZ 0x0f /* 0 1 1 1 1 */
333#define RTC_SQW_0HZ 0x00 /* 0 0 0 0 0 */
334#define RTC_SQW_32768HZ 32768 /* 1 - - - - */
335#define RTC_MAX_USER_FREQ 8192
336
337
338/*
339 * NVRAM data & addresses:
340 * - 50 bytes of NVRAM are available just past the clock registers.
341 * - 64 additional bytes are available in Bank0.
342 *
343 * Extended, battery-backed NV-SRAM:
344 * - DS1685/DS1687 - 128 bytes.
345 * - DS1689/DS1693 - 0 bytes.
346 * - DS17285/DS17287 - 2048 bytes.
347 * - DS17485/DS17487 - 4096 bytes.
348 * - DS17885/DS17887 - 8192 bytes.
349 */
350#define NVRAM_TIME_BASE 0x0e /* NVRAM Addr in Time regs */
351#define NVRAM_BANK0_BASE 0x40 /* NVRAM Addr in Bank0 regs */
352#define NVRAM_SZ_TIME 50
353#define NVRAM_SZ_BANK0 64
354#if defined(CONFIG_RTC_DRV_DS1685)
355# define NVRAM_SZ_EXTND 128
356#elif defined(CONFIG_RTC_DRV_DS1689)
357# define NVRAM_SZ_EXTND 0
358#elif defined(CONFIG_RTC_DRV_DS17285)
359# define NVRAM_SZ_EXTND 2048
360#elif defined(CONFIG_RTC_DRV_DS17485)
361# define NVRAM_SZ_EXTND 4096
362#elif defined(CONFIG_RTC_DRV_DS17885)
363# define NVRAM_SZ_EXTND 8192
364#endif
365#define NVRAM_TOTAL_SZ_BANK0 (NVRAM_SZ_TIME + NVRAM_SZ_BANK0)
366#define NVRAM_TOTAL_SZ (NVRAM_TOTAL_SZ_BANK0 + NVRAM_SZ_EXTND)
367
368
369/*
370 * Function Prototypes.
371 */
372extern void __noreturn
373ds1685_rtc_poweroff(struct platform_device *pdev);
374
375#endif /* _LINUX_RTC_DS1685_H_ */
diff --git a/lib/Kconfig b/lib/Kconfig
index cd177caf3876..cb9758e0ba0c 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -14,7 +14,7 @@ config BITREVERSE
14 tristate 14 tristate
15 15
16config HAVE_ARCH_BITREVERSE 16config HAVE_ARCH_BITREVERSE
17 boolean 17 bool
18 default n 18 default n
19 depends on BITREVERSE 19 depends on BITREVERSE
20 help 20 help
diff --git a/mm/Makefile b/mm/Makefile
index 088c68e9ec35..3c1caa2693bd 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -55,7 +55,6 @@ obj-$(CONFIG_KMEMCHECK) += kmemcheck.o
55obj-$(CONFIG_KASAN) += kasan/ 55obj-$(CONFIG_KASAN) += kasan/
56obj-$(CONFIG_FAILSLAB) += failslab.o 56obj-$(CONFIG_FAILSLAB) += failslab.o
57obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o 57obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
58obj-$(CONFIG_FS_XIP) += filemap_xip.o
59obj-$(CONFIG_MIGRATION) += migrate.o 58obj-$(CONFIG_MIGRATION) += migrate.o
60obj-$(CONFIG_QUICKLIST) += quicklist.o 59obj-$(CONFIG_QUICKLIST) += quicklist.o
61obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o 60obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
diff --git a/mm/fadvise.c b/mm/fadvise.c
index fac23ecf8d72..4a3907cf79f8 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -28,6 +28,7 @@
28SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) 28SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
29{ 29{
30 struct fd f = fdget(fd); 30 struct fd f = fdget(fd);
31 struct inode *inode;
31 struct address_space *mapping; 32 struct address_space *mapping;
32 struct backing_dev_info *bdi; 33 struct backing_dev_info *bdi;
33 loff_t endbyte; /* inclusive */ 34 loff_t endbyte; /* inclusive */
@@ -39,7 +40,8 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
39 if (!f.file) 40 if (!f.file)
40 return -EBADF; 41 return -EBADF;
41 42
42 if (S_ISFIFO(file_inode(f.file)->i_mode)) { 43 inode = file_inode(f.file);
44 if (S_ISFIFO(inode->i_mode)) {
43 ret = -ESPIPE; 45 ret = -ESPIPE;
44 goto out; 46 goto out;
45 } 47 }
@@ -50,7 +52,7 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
50 goto out; 52 goto out;
51 } 53 }
52 54
53 if (mapping->a_ops->get_xip_mem) { 55 if (IS_DAX(inode)) {
54 switch (advice) { 56 switch (advice) {
55 case POSIX_FADV_NORMAL: 57 case POSIX_FADV_NORMAL:
56 case POSIX_FADV_RANDOM: 58 case POSIX_FADV_RANDOM:
diff --git a/mm/filemap.c b/mm/filemap.c
index d9f5336552d7..ad7242043bdb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1695,8 +1695,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1695 loff_t *ppos = &iocb->ki_pos; 1695 loff_t *ppos = &iocb->ki_pos;
1696 loff_t pos = *ppos; 1696 loff_t pos = *ppos;
1697 1697
1698 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 1698 if (io_is_direct(file)) {
1699 if (file->f_flags & O_DIRECT) {
1700 struct address_space *mapping = file->f_mapping; 1699 struct address_space *mapping = file->f_mapping;
1701 struct inode *inode = mapping->host; 1700 struct inode *inode = mapping->host;
1702 size_t count = iov_iter_count(iter); 1701 size_t count = iov_iter_count(iter);
@@ -1723,9 +1722,11 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1723 * we've already read everything we wanted to, or if 1722 * we've already read everything we wanted to, or if
1724 * there was a short read because we hit EOF, go ahead 1723 * there was a short read because we hit EOF, go ahead
1725 * and return. Otherwise fallthrough to buffered io for 1724 * and return. Otherwise fallthrough to buffered io for
1726 * the rest of the read. 1725 * the rest of the read. Buffered reads will not work for
1726 * DAX files, so don't bother trying.
1727 */ 1727 */
1728 if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) { 1728 if (retval < 0 || !iov_iter_count(iter) || *ppos >= size ||
1729 IS_DAX(inode)) {
1729 file_accessed(file); 1730 file_accessed(file);
1730 goto out; 1731 goto out;
1731 } 1732 }
@@ -2582,18 +2583,20 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2582 if (err) 2583 if (err)
2583 goto out; 2584 goto out;
2584 2585
2585 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 2586 if (io_is_direct(file)) {
2586 if (unlikely(file->f_flags & O_DIRECT)) {
2587 loff_t endbyte; 2587 loff_t endbyte;
2588 2588
2589 written = generic_file_direct_write(iocb, from, pos); 2589 written = generic_file_direct_write(iocb, from, pos);
2590 if (written < 0 || written == count)
2591 goto out;
2592
2593 /* 2590 /*
2594 * direct-io write to a hole: fall through to buffered I/O 2591 * If the write stopped short of completing, fall back to
2595 * for completing the rest of the request. 2592 * buffered writes. Some filesystems do this for writes to
2593 * holes, for example. For DAX files, a buffered write will
2594 * not succeed (even if it did, DAX does not handle dirty
2595 * page-cache pages correctly).
2596 */ 2596 */
2597 if (written < 0 || written == count || IS_DAX(inode))
2598 goto out;
2599
2597 pos += written; 2600 pos += written;
2598 count -= written; 2601 count -= written;
2599 2602
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
deleted file mode 100644
index c175f9f25210..000000000000
--- a/mm/filemap_xip.c
+++ /dev/null
@@ -1,478 +0,0 @@
1/*
2 * linux/mm/filemap_xip.c
3 *
4 * Copyright (C) 2005 IBM Corporation
5 * Author: Carsten Otte <cotte@de.ibm.com>
6 *
7 * derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds
8 *
9 */
10
11#include <linux/fs.h>
12#include <linux/backing-dev.h>
13#include <linux/pagemap.h>
14#include <linux/export.h>
15#include <linux/uio.h>
16#include <linux/rmap.h>
17#include <linux/mmu_notifier.h>
18#include <linux/sched.h>
19#include <linux/seqlock.h>
20#include <linux/mutex.h>
21#include <linux/gfp.h>
22#include <asm/tlbflush.h>
23#include <asm/io.h>
24
25/*
26 * We do use our own empty page to avoid interference with other users
27 * of ZERO_PAGE(), such as /dev/zero
28 */
29static DEFINE_MUTEX(xip_sparse_mutex);
30static seqcount_t xip_sparse_seq = SEQCNT_ZERO(xip_sparse_seq);
31static struct page *__xip_sparse_page;
32
33/* called under xip_sparse_mutex */
34static struct page *xip_sparse_page(void)
35{
36 if (!__xip_sparse_page) {
37 struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
38
39 if (page)
40 __xip_sparse_page = page;
41 }
42 return __xip_sparse_page;
43}
44
45/*
46 * This is a file read routine for execute in place files, and uses
47 * the mapping->a_ops->get_xip_mem() function for the actual low-level
48 * stuff.
49 *
50 * Note the struct file* is not used at all. It may be NULL.
51 */
52static ssize_t
53do_xip_mapping_read(struct address_space *mapping,
54 struct file_ra_state *_ra,
55 struct file *filp,
56 char __user *buf,
57 size_t len,
58 loff_t *ppos)
59{
60 struct inode *inode = mapping->host;
61 pgoff_t index, end_index;
62 unsigned long offset;
63 loff_t isize, pos;
64 size_t copied = 0, error = 0;
65
66 BUG_ON(!mapping->a_ops->get_xip_mem);
67
68 pos = *ppos;
69 index = pos >> PAGE_CACHE_SHIFT;
70 offset = pos & ~PAGE_CACHE_MASK;
71
72 isize = i_size_read(inode);
73 if (!isize)
74 goto out;
75
76 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
77 do {
78 unsigned long nr, left;
79 void *xip_mem;
80 unsigned long xip_pfn;
81 int zero = 0;
82
83 /* nr is the maximum number of bytes to copy from this page */
84 nr = PAGE_CACHE_SIZE;
85 if (index >= end_index) {
86 if (index > end_index)
87 goto out;
88 nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
89 if (nr <= offset) {
90 goto out;
91 }
92 }
93 nr = nr - offset;
94 if (nr > len - copied)
95 nr = len - copied;
96
97 error = mapping->a_ops->get_xip_mem(mapping, index, 0,
98 &xip_mem, &xip_pfn);
99 if (unlikely(error)) {
100 if (error == -ENODATA) {
101 /* sparse */
102 zero = 1;
103 } else
104 goto out;
105 }
106
107 /* If users can be writing to this page using arbitrary
108 * virtual addresses, take care about potential aliasing
109 * before reading the page on the kernel side.
110 */
111 if (mapping_writably_mapped(mapping))
112 /* address based flush */ ;
113
114 /*
115 * Ok, we have the mem, so now we can copy it to user space...
116 *
117 * The actor routine returns how many bytes were actually used..
118 * NOTE! This may not be the same as how much of a user buffer
119 * we filled up (we may be padding etc), so we can only update
120 * "pos" here (the actor routine has to update the user buffer
121 * pointers and the remaining count).
122 */
123 if (!zero)
124 left = __copy_to_user(buf+copied, xip_mem+offset, nr);
125 else
126 left = __clear_user(buf + copied, nr);
127
128 if (left) {
129 error = -EFAULT;
130 goto out;
131 }
132
133 copied += (nr - left);
134 offset += (nr - left);
135 index += offset >> PAGE_CACHE_SHIFT;
136 offset &= ~PAGE_CACHE_MASK;
137 } while (copied < len);
138
139out:
140 *ppos = pos + copied;
141 if (filp)
142 file_accessed(filp);
143
144 return (copied ? copied : error);
145}
146
147ssize_t
148xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
149{
150 if (!access_ok(VERIFY_WRITE, buf, len))
151 return -EFAULT;
152
153 return do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp,
154 buf, len, ppos);
155}
156EXPORT_SYMBOL_GPL(xip_file_read);
157
158/*
159 * __xip_unmap is invoked from xip_unmap and xip_write
160 *
161 * This function walks all vmas of the address_space and unmaps the
162 * __xip_sparse_page when found at pgoff.
163 */
164static void __xip_unmap(struct address_space * mapping, unsigned long pgoff)
165{
166 struct vm_area_struct *vma;
167 struct page *page;
168 unsigned count;
169 int locked = 0;
170
171 count = read_seqcount_begin(&xip_sparse_seq);
172
173 page = __xip_sparse_page;
174 if (!page)
175 return;
176
177retry:
178 i_mmap_lock_read(mapping);
179 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
180 pte_t *pte, pteval;
181 spinlock_t *ptl;
182 struct mm_struct *mm = vma->vm_mm;
183 unsigned long address = vma->vm_start +
184 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
185
186 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
187 pte = page_check_address(page, mm, address, &ptl, 1);
188 if (pte) {
189 /* Nuke the page table entry. */
190 flush_cache_page(vma, address, pte_pfn(*pte));
191 pteval = ptep_clear_flush(vma, address, pte);
192 page_remove_rmap(page);
193 dec_mm_counter(mm, MM_FILEPAGES);
194 BUG_ON(pte_dirty(pteval));
195 pte_unmap_unlock(pte, ptl);
196 /* must invalidate_page _before_ freeing the page */
197 mmu_notifier_invalidate_page(mm, address);
198 page_cache_release(page);
199 }
200 }
201 i_mmap_unlock_read(mapping);
202
203 if (locked) {
204 mutex_unlock(&xip_sparse_mutex);
205 } else if (read_seqcount_retry(&xip_sparse_seq, count)) {
206 mutex_lock(&xip_sparse_mutex);
207 locked = 1;
208 goto retry;
209 }
210}
211
212/*
213 * xip_fault() is invoked via the vma operations vector for a
214 * mapped memory region to read in file data during a page fault.
215 *
216 * This function is derived from filemap_fault, but used for execute in place
217 */
218static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
219{
220 struct file *file = vma->vm_file;
221 struct address_space *mapping = file->f_mapping;
222 struct inode *inode = mapping->host;
223 pgoff_t size;
224 void *xip_mem;
225 unsigned long xip_pfn;
226 struct page *page;
227 int error;
228
229 /* XXX: are VM_FAULT_ codes OK? */
230again:
231 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
232 if (vmf->pgoff >= size)
233 return VM_FAULT_SIGBUS;
234
235 error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
236 &xip_mem, &xip_pfn);
237 if (likely(!error))
238 goto found;
239 if (error != -ENODATA)
240 return VM_FAULT_OOM;
241
242 /* sparse block */
243 if ((vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
244 (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) &&
245 (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
246 int err;
247
248 /* maybe shared writable, allocate new block */
249 mutex_lock(&xip_sparse_mutex);
250 error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,
251 &xip_mem, &xip_pfn);
252 mutex_unlock(&xip_sparse_mutex);
253 if (error)
254 return VM_FAULT_SIGBUS;
255 /* unmap sparse mappings at pgoff from all other vmas */
256 __xip_unmap(mapping, vmf->pgoff);
257
258found:
259 err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
260 xip_pfn);
261 if (err == -ENOMEM)
262 return VM_FAULT_OOM;
263 /*
264 * err == -EBUSY is fine, we've raced against another thread
265 * that faulted-in the same page
266 */
267 if (err != -EBUSY)
268 BUG_ON(err);
269 return VM_FAULT_NOPAGE;
270 } else {
271 int err, ret = VM_FAULT_OOM;
272
273 mutex_lock(&xip_sparse_mutex);
274 write_seqcount_begin(&xip_sparse_seq);
275 error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
276 &xip_mem, &xip_pfn);
277 if (unlikely(!error)) {
278 write_seqcount_end(&xip_sparse_seq);
279 mutex_unlock(&xip_sparse_mutex);
280 goto again;
281 }
282 if (error != -ENODATA)
283 goto out;
284 /* not shared and writable, use xip_sparse_page() */
285 page = xip_sparse_page();
286 if (!page)
287 goto out;
288 err = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
289 page);
290 if (err == -ENOMEM)
291 goto out;
292
293 ret = VM_FAULT_NOPAGE;
294out:
295 write_seqcount_end(&xip_sparse_seq);
296 mutex_unlock(&xip_sparse_mutex);
297
298 return ret;
299 }
300}
301
302static const struct vm_operations_struct xip_file_vm_ops = {
303 .fault = xip_file_fault,
304 .page_mkwrite = filemap_page_mkwrite,
305};
306
307int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
308{
309 BUG_ON(!file->f_mapping->a_ops->get_xip_mem);
310
311 file_accessed(file);
312 vma->vm_ops = &xip_file_vm_ops;
313 vma->vm_flags |= VM_MIXEDMAP;
314 return 0;
315}
316EXPORT_SYMBOL_GPL(xip_file_mmap);
317
318static ssize_t
319__xip_file_write(struct file *filp, const char __user *buf,
320 size_t count, loff_t pos, loff_t *ppos)
321{
322 struct address_space * mapping = filp->f_mapping;
323 const struct address_space_operations *a_ops = mapping->a_ops;
324 struct inode *inode = mapping->host;
325 long status = 0;
326 size_t bytes;
327 ssize_t written = 0;
328
329 BUG_ON(!mapping->a_ops->get_xip_mem);
330
331 do {
332 unsigned long index;
333 unsigned long offset;
334 size_t copied;
335 void *xip_mem;
336 unsigned long xip_pfn;
337
338 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
339 index = pos >> PAGE_CACHE_SHIFT;
340 bytes = PAGE_CACHE_SIZE - offset;
341 if (bytes > count)
342 bytes = count;
343
344 status = a_ops->get_xip_mem(mapping, index, 0,
345 &xip_mem, &xip_pfn);
346 if (status == -ENODATA) {
347 /* we allocate a new page unmap it */
348 mutex_lock(&xip_sparse_mutex);
349 status = a_ops->get_xip_mem(mapping, index, 1,
350 &xip_mem, &xip_pfn);
351 mutex_unlock(&xip_sparse_mutex);
352 if (!status)
353 /* unmap page at pgoff from all other vmas */
354 __xip_unmap(mapping, index);
355 }
356
357 if (status)
358 break;
359
360 copied = bytes -
361 __copy_from_user_nocache(xip_mem + offset, buf, bytes);
362
363 if (likely(copied > 0)) {
364 status = copied;
365
366 if (status >= 0) {
367 written += status;
368 count -= status;
369 pos += status;
370 buf += status;
371 }
372 }
373 if (unlikely(copied != bytes))
374 if (status >= 0)
375 status = -EFAULT;
376 if (status < 0)
377 break;
378 } while (count);
379 *ppos = pos;
380 /*
381 * No need to use i_size_read() here, the i_size
382 * cannot change under us because we hold i_mutex.
383 */
384 if (pos > inode->i_size) {
385 i_size_write(inode, pos);
386 mark_inode_dirty(inode);
387 }
388
389 return written ? written : status;
390}
391
392ssize_t
393xip_file_write(struct file *filp, const char __user *buf, size_t len,
394 loff_t *ppos)
395{
396 struct address_space *mapping = filp->f_mapping;
397 struct inode *inode = mapping->host;
398 size_t count;
399 loff_t pos;
400 ssize_t ret;
401
402 mutex_lock(&inode->i_mutex);
403
404 if (!access_ok(VERIFY_READ, buf, len)) {
405 ret=-EFAULT;
406 goto out_up;
407 }
408
409 pos = *ppos;
410 count = len;
411
412 /* We can write back this queue in page reclaim */
413 current->backing_dev_info = inode_to_bdi(inode);
414
415 ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode));
416 if (ret)
417 goto out_backing;
418 if (count == 0)
419 goto out_backing;
420
421 ret = file_remove_suid(filp);
422 if (ret)
423 goto out_backing;
424
425 ret = file_update_time(filp);
426 if (ret)
427 goto out_backing;
428
429 ret = __xip_file_write (filp, buf, count, pos, ppos);
430
431 out_backing:
432 current->backing_dev_info = NULL;
433 out_up:
434 mutex_unlock(&inode->i_mutex);
435 return ret;
436}
437EXPORT_SYMBOL_GPL(xip_file_write);
438
439/*
440 * truncate a page used for execute in place
441 * functionality is analog to block_truncate_page but does use get_xip_mem
442 * to get the page instead of page cache
443 */
444int
445xip_truncate_page(struct address_space *mapping, loff_t from)
446{
447 pgoff_t index = from >> PAGE_CACHE_SHIFT;
448 unsigned offset = from & (PAGE_CACHE_SIZE-1);
449 unsigned blocksize;
450 unsigned length;
451 void *xip_mem;
452 unsigned long xip_pfn;
453 int err;
454
455 BUG_ON(!mapping->a_ops->get_xip_mem);
456
457 blocksize = 1 << mapping->host->i_blkbits;
458 length = offset & (blocksize - 1);
459
460 /* Block boundary? Nothing to do */
461 if (!length)
462 return 0;
463
464 length = blocksize - length;
465
466 err = mapping->a_ops->get_xip_mem(mapping, index, 0,
467 &xip_mem, &xip_pfn);
468 if (unlikely(err)) {
469 if (err == -ENODATA)
470 /* Hole? No need to truncate */
471 return 0;
472 else
473 return err;
474 }
475 memset(xip_mem + offset, 0, length);
476 return 0;
477}
478EXPORT_SYMBOL_GPL(xip_truncate_page);
diff --git a/mm/madvise.c b/mm/madvise.c
index 1077cbdc8b52..d551475517bf 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -239,7 +239,7 @@ static long madvise_willneed(struct vm_area_struct *vma,
239 return -EBADF; 239 return -EBADF;
240#endif 240#endif
241 241
242 if (file->f_mapping->a_ops->get_xip_mem) { 242 if (IS_DAX(file_inode(file))) {
243 /* no bad return value, but ignore advice */ 243 /* no bad return value, but ignore advice */
244 return 0; 244 return 0;
245 } 245 }
diff --git a/mm/memory.c b/mm/memory.c
index 99275325f303..8068893697bb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1965,6 +1965,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
1965 vmf.pgoff = page->index; 1965 vmf.pgoff = page->index;
1966 vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; 1966 vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
1967 vmf.page = page; 1967 vmf.page = page;
1968 vmf.cow_page = NULL;
1968 1969
1969 ret = vma->vm_ops->page_mkwrite(vma, &vmf); 1970 ret = vma->vm_ops->page_mkwrite(vma, &vmf);
1970 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) 1971 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
@@ -2329,6 +2330,7 @@ void unmap_mapping_range(struct address_space *mapping,
2329 details.last_index = ULONG_MAX; 2330 details.last_index = ULONG_MAX;
2330 2331
2331 2332
2333 /* DAX uses i_mmap_lock to serialise file truncate vs page fault */
2332 i_mmap_lock_write(mapping); 2334 i_mmap_lock_write(mapping);
2333 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) 2335 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
2334 unmap_mapping_range_tree(&mapping->i_mmap, &details); 2336 unmap_mapping_range_tree(&mapping->i_mmap, &details);
@@ -2638,7 +2640,8 @@ oom:
2638 * See filemap_fault() and __lock_page_retry(). 2640 * See filemap_fault() and __lock_page_retry().
2639 */ 2641 */
2640static int __do_fault(struct vm_area_struct *vma, unsigned long address, 2642static int __do_fault(struct vm_area_struct *vma, unsigned long address,
2641 pgoff_t pgoff, unsigned int flags, struct page **page) 2643 pgoff_t pgoff, unsigned int flags,
2644 struct page *cow_page, struct page **page)
2642{ 2645{
2643 struct vm_fault vmf; 2646 struct vm_fault vmf;
2644 int ret; 2647 int ret;
@@ -2647,10 +2650,13 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
2647 vmf.pgoff = pgoff; 2650 vmf.pgoff = pgoff;
2648 vmf.flags = flags; 2651 vmf.flags = flags;
2649 vmf.page = NULL; 2652 vmf.page = NULL;
2653 vmf.cow_page = cow_page;
2650 2654
2651 ret = vma->vm_ops->fault(vma, &vmf); 2655 ret = vma->vm_ops->fault(vma, &vmf);
2652 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 2656 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
2653 return ret; 2657 return ret;
2658 if (!vmf.page)
2659 goto out;
2654 2660
2655 if (unlikely(PageHWPoison(vmf.page))) { 2661 if (unlikely(PageHWPoison(vmf.page))) {
2656 if (ret & VM_FAULT_LOCKED) 2662 if (ret & VM_FAULT_LOCKED)
@@ -2664,6 +2670,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
2664 else 2670 else
2665 VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page); 2671 VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page);
2666 2672
2673 out:
2667 *page = vmf.page; 2674 *page = vmf.page;
2668 return ret; 2675 return ret;
2669} 2676}
@@ -2834,7 +2841,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2834 pte_unmap_unlock(pte, ptl); 2841 pte_unmap_unlock(pte, ptl);
2835 } 2842 }
2836 2843
2837 ret = __do_fault(vma, address, pgoff, flags, &fault_page); 2844 ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
2838 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 2845 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
2839 return ret; 2846 return ret;
2840 2847
@@ -2874,26 +2881,43 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2874 return VM_FAULT_OOM; 2881 return VM_FAULT_OOM;
2875 } 2882 }
2876 2883
2877 ret = __do_fault(vma, address, pgoff, flags, &fault_page); 2884 ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page);
2878 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 2885 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
2879 goto uncharge_out; 2886 goto uncharge_out;
2880 2887
2881 copy_user_highpage(new_page, fault_page, address, vma); 2888 if (fault_page)
2889 copy_user_highpage(new_page, fault_page, address, vma);
2882 __SetPageUptodate(new_page); 2890 __SetPageUptodate(new_page);
2883 2891
2884 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 2892 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
2885 if (unlikely(!pte_same(*pte, orig_pte))) { 2893 if (unlikely(!pte_same(*pte, orig_pte))) {
2886 pte_unmap_unlock(pte, ptl); 2894 pte_unmap_unlock(pte, ptl);
2887 unlock_page(fault_page); 2895 if (fault_page) {
2888 page_cache_release(fault_page); 2896 unlock_page(fault_page);
2897 page_cache_release(fault_page);
2898 } else {
2899 /*
2900 * The fault handler has no page to lock, so it holds
2901 * i_mmap_lock for read to protect against truncate.
2902 */
2903 i_mmap_unlock_read(vma->vm_file->f_mapping);
2904 }
2889 goto uncharge_out; 2905 goto uncharge_out;
2890 } 2906 }
2891 do_set_pte(vma, address, new_page, pte, true, true); 2907 do_set_pte(vma, address, new_page, pte, true, true);
2892 mem_cgroup_commit_charge(new_page, memcg, false); 2908 mem_cgroup_commit_charge(new_page, memcg, false);
2893 lru_cache_add_active_or_unevictable(new_page, vma); 2909 lru_cache_add_active_or_unevictable(new_page, vma);
2894 pte_unmap_unlock(pte, ptl); 2910 pte_unmap_unlock(pte, ptl);
2895 unlock_page(fault_page); 2911 if (fault_page) {
2896 page_cache_release(fault_page); 2912 unlock_page(fault_page);
2913 page_cache_release(fault_page);
2914 } else {
2915 /*
2916 * The fault handler has no page to lock, so it holds
2917 * i_mmap_lock for read to protect against truncate.
2918 */
2919 i_mmap_unlock_read(vma->vm_file->f_mapping);
2920 }
2897 return ret; 2921 return ret;
2898uncharge_out: 2922uncharge_out:
2899 mem_cgroup_cancel_charge(new_page, memcg); 2923 mem_cgroup_cancel_charge(new_page, memcg);
@@ -2912,7 +2936,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2912 int dirtied = 0; 2936 int dirtied = 0;
2913 int ret, tmp; 2937 int ret, tmp;
2914 2938
2915 ret = __do_fault(vma, address, pgoff, flags, &fault_page); 2939 ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
2916 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 2940 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
2917 return ret; 2941 return ret;
2918 2942
diff --git a/scripts/diffconfig b/scripts/diffconfig
index 6d672836e187..0db267d0adc9 100755
--- a/scripts/diffconfig
+++ b/scripts/diffconfig
@@ -28,7 +28,6 @@ If no config files are specified, .config and .config.old are used.
28Example usage: 28Example usage:
29 $ diffconfig .config config-with-some-changes 29 $ diffconfig .config config-with-some-changes
30-EXT2_FS_XATTR n 30-EXT2_FS_XATTR n
31-EXT2_FS_XIP n
32 CRAMFS n -> y 31 CRAMFS n -> y
33 EXT2_FS y -> n 32 EXT2_FS y -> n
34 LOG_BUF_SHIFT 14 -> 16 33 LOG_BUF_SHIFT 14 -> 16