aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/feature-removal-schedule.txt10
-rw-r--r--Documentation/filesystems/nfs/00-INDEX2
-rw-r--r--Documentation/filesystems/nfs/pnfs.txt48
-rw-r--r--arch/arm/plat-omap/fb.c14
-rw-r--r--arch/arm/plat-omap/include/plat/display.h31
-rw-r--r--arch/arm/plat-omap/include/plat/vrfb.h16
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c4
-rw-r--r--drivers/cpufreq/cpufreq.c4
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c42
-rw-r--r--drivers/video/omap/lcd_omap3beagle.c2
-rw-r--r--drivers/video/omap2/displays/Kconfig2
-rw-r--r--drivers/video/omap2/displays/panel-acx565akm.c6
-rw-r--r--drivers/video/omap2/displays/panel-generic.c6
-rw-r--r--drivers/video/omap2/displays/panel-sharp-lq043t1dg01.c6
-rw-r--r--drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c6
-rw-r--r--drivers/video/omap2/displays/panel-toppoly-tdo35s.c6
-rw-r--r--drivers/video/omap2/displays/panel-tpo-td043mtea1.c6
-rw-r--r--drivers/video/omap2/dss/Makefile2
-rw-r--r--drivers/video/omap2/dss/core.c3
-rw-r--r--drivers/video/omap2/dss/dispc.c270
-rw-r--r--drivers/video/omap2/dss/dsi.c1
-rw-r--r--drivers/video/omap2/dss/dss_features.c191
-rw-r--r--drivers/video/omap2/dss/dss_features.h50
-rw-r--r--drivers/video/omap2/dss/manager.c33
-rw-r--r--drivers/video/omap2/dss/overlay.c24
-rw-r--r--drivers/video/omap2/omapfb/Kconfig2
-rw-r--r--drivers/video/omap2/omapfb/omapfb-main.c26
-rw-r--r--fs/Makefile5
-rw-r--r--fs/compat.c2
-rw-r--r--fs/lockd/host.c1
-rw-r--r--fs/lockd/mon.c1
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svc4proc.c2
-rw-r--r--fs/lockd/svclock.c31
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/nfs/Kconfig8
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/callback_proc.c8
-rw-r--r--fs/nfs/client.c11
-rw-r--r--fs/nfs/dns_resolve.c6
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/inode.c3
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/nfs4filelayout.c280
-rw-r--r--fs/nfs/nfs4filelayout.h94
-rw-r--r--fs/nfs/nfs4filelayoutdev.c448
-rw-r--r--fs/nfs/nfs4proc.c218
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/nfs4xdr.c360
-rw-r--r--fs/nfs/pnfs.c783
-rw-r--r--fs/nfs/pnfs.h189
-rw-r--r--fs/nfs/read.c3
-rw-r--r--fs/nfsd/Kconfig12
-rw-r--r--fs/nfsd/export.c73
-rw-r--r--fs/nfsd/nfs4callback.c245
-rw-r--r--fs/nfsd/nfs4idmap.c105
-rw-r--r--fs/nfsd/nfs4proc.c7
-rw-r--r--fs/nfsd/nfs4state.c493
-rw-r--r--fs/nfsd/nfs4xdr.c18
-rw-r--r--fs/nfsd/nfsctl.c26
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/nfsd/state.h52
-rw-r--r--include/linux/net.h2
-rw-r--r--include/linux/nfs4.h65
-rw-r--r--include/linux/nfs_fs.h5
-rw-r--r--include/linux/nfs_fs_sb.h3
-rw-r--r--include/linux/nfs_xdr.h50
-rw-r--r--include/linux/sunrpc/auth.h4
-rw-r--r--include/linux/sunrpc/cache.h37
-rw-r--r--include/linux/sunrpc/clnt.h1
-rw-r--r--include/linux/sunrpc/gss_spkm3.h55
-rw-r--r--include/linux/sunrpc/stats.h23
-rw-r--r--include/linux/sunrpc/svc_xprt.h32
-rw-r--r--include/linux/sunrpc/svcauth.h17
-rw-r--r--include/linux/sunrpc/xdr.h7
-rw-r--r--include/linux/sunrpc/xprt.h4
-rw-r--r--net/socket.c3
-rw-r--r--net/sunrpc/Kconfig19
-rw-r--r--net/sunrpc/auth.c2
-rw-r--r--net/sunrpc/auth_generic.c2
-rw-r--r--net/sunrpc/auth_gss/Makefile5
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c2
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c247
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c186
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c267
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c127
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c51
-rw-r--r--net/sunrpc/cache.c288
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/netns.h19
-rw-r--r--net/sunrpc/rpcb_clnt.c4
-rw-r--r--net/sunrpc/stats.c43
-rw-r--r--net/sunrpc/sunrpc_syms.c58
-rw-r--r--net/sunrpc/svc.c3
-rw-r--r--net/sunrpc/svc_xprt.c59
-rw-r--r--net/sunrpc/svcauth_unix.c194
-rw-r--r--net/sunrpc/svcsock.c27
-rw-r--r--net/sunrpc/xprt.c39
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c49
-rw-r--r--net/sunrpc/xprtrdma/transport.c25
-rw-r--r--net/sunrpc/xprtsock.c358
107 files changed, 4606 insertions, 2216 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index e833c8c81e69..d2af87ba96e1 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -535,3 +535,13 @@ Why: Hareware scan is the prefer method for iwlwifi devices for
535Who: Wey-Yi Guy <wey-yi.w.guy@intel.com> 535Who: Wey-Yi Guy <wey-yi.w.guy@intel.com>
536 536
537---------------------------- 537----------------------------
538
539What: access to nfsd auth cache through sys_nfsservctl or '.' files
540 in the 'nfsd' filesystem.
541When: 2.6.40
542Why: This is a legacy interface which have been replaced by a more
543 dynamic cache. Continuing to maintain this interface is an
544 unnecessary burden.
545Who: NeilBrown <neilb@suse.de>
546
547----------------------------
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX
index 3225a5662114..a57e12411d2a 100644
--- a/Documentation/filesystems/nfs/00-INDEX
+++ b/Documentation/filesystems/nfs/00-INDEX
@@ -12,6 +12,8 @@ nfs-rdma.txt
12 - how to install and setup the Linux NFS/RDMA client and server software 12 - how to install and setup the Linux NFS/RDMA client and server software
13nfsroot.txt 13nfsroot.txt
14 - short guide on setting up a diskless box with NFS root filesystem. 14 - short guide on setting up a diskless box with NFS root filesystem.
15pnfs.txt
16 - short explanation of some of the internals of the pnfs client code
15rpc-cache.txt 17rpc-cache.txt
16 - introduction to the caching mechanisms in the sunrpc layer. 18 - introduction to the caching mechanisms in the sunrpc layer.
17idmapper.txt 19idmapper.txt
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt
new file mode 100644
index 000000000000..bc0b9cfe095b
--- /dev/null
+++ b/Documentation/filesystems/nfs/pnfs.txt
@@ -0,0 +1,48 @@
1Reference counting in pnfs:
2==========================
3
4The are several inter-related caches. We have layouts which can
5reference multiple devices, each of which can reference multiple data servers.
6Each data server can be referenced by multiple devices. Each device
7can be referenced by multiple layouts. To keep all of this straight,
8we need to reference count.
9
10
11struct pnfs_layout_hdr
12----------------------
13The on-the-wire command LAYOUTGET corresponds to struct
14pnfs_layout_segment, usually referred to by the variable name lseg.
15Each nfs_inode may hold a pointer to a cache of of these layout
16segments in nfsi->layout, of type struct pnfs_layout_hdr.
17
18We reference the header for the inode pointing to it, across each
19outstanding RPC call that references it (LAYOUTGET, LAYOUTRETURN,
20LAYOUTCOMMIT), and for each lseg held within.
21
22Each header is also (when non-empty) put on a list associated with
23struct nfs_client (cl_layouts). Being put on this list does not bump
24the reference count, as the layout is kept around by the lseg that
25keeps it in the list.
26
27deviceid_cache
28--------------
29lsegs reference device ids, which are resolved per nfs_client and
30layout driver type. The device ids are held in a RCU cache (struct
31nfs4_deviceid_cache). The cache itself is referenced across each
32mount. The entries (struct nfs4_deviceid) themselves are held across
33the lifetime of each lseg referencing them.
34
35RCU is used because the deviceid is basically a write once, read many
36data structure. The hlist size of 32 buckets needs better
37justification, but seems reasonable given that we can have multiple
38deviceid's per filesystem, and multiple filesystems per nfs_client.
39
40The hash code is copied from the nfsd code base. A discussion of
41hashing and variations of this algorithm can be found at:
42http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809
43
44data server cache
45-----------------
46file driver devices refer to data servers, which are kept in a module
47level cache. Its reference is held over the lifetime of the deviceid
48pointing to it.
diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c
index bb78c1532fae..c9e5d7298c40 100644
--- a/arch/arm/plat-omap/fb.c
+++ b/arch/arm/plat-omap/fb.c
@@ -96,7 +96,7 @@ static int fbmem_region_reserved(unsigned long start, size_t size)
96 * Get the region_idx`th region from board config/ATAG and convert it to 96 * Get the region_idx`th region from board config/ATAG and convert it to
97 * our internal format. 97 * our internal format.
98 */ 98 */
99static int get_fbmem_region(int region_idx, struct omapfb_mem_region *rg) 99static int __init get_fbmem_region(int region_idx, struct omapfb_mem_region *rg)
100{ 100{
101 const struct omap_fbmem_config *conf; 101 const struct omap_fbmem_config *conf;
102 u32 paddr; 102 u32 paddr;
@@ -128,7 +128,7 @@ static int set_fbmem_region_type(struct omapfb_mem_region *rg, int mem_type,
128 * type = 0 && paddr = 0, a default don't care case maps to 128 * type = 0 && paddr = 0, a default don't care case maps to
129 * the SDRAM type. 129 * the SDRAM type.
130 */ 130 */
131 if (rg->type || (!rg->type && !rg->paddr)) 131 if (rg->type || !rg->paddr)
132 return 0; 132 return 0;
133 if (ranges_overlap(rg->paddr, rg->size, mem_start, mem_size)) { 133 if (ranges_overlap(rg->paddr, rg->size, mem_start, mem_size)) {
134 rg->type = mem_type; 134 rg->type = mem_type;
@@ -260,7 +260,7 @@ void __init omapfb_reserve_sdram_memblock(void)
260 * this point, since the driver built as a module would have problem with 260 * this point, since the driver built as a module would have problem with
261 * freeing / reallocating the regions. 261 * freeing / reallocating the regions.
262 */ 262 */
263unsigned long omapfb_reserve_sram(unsigned long sram_pstart, 263unsigned long __init omapfb_reserve_sram(unsigned long sram_pstart,
264 unsigned long sram_vstart, 264 unsigned long sram_vstart,
265 unsigned long sram_size, 265 unsigned long sram_size,
266 unsigned long pstart_avail, 266 unsigned long pstart_avail,
@@ -334,7 +334,7 @@ void omapfb_set_ctrl_platform_data(void *data)
334 omapfb_config.ctrl_platform_data = data; 334 omapfb_config.ctrl_platform_data = data;
335} 335}
336 336
337static inline int omap_init_fb(void) 337static int __init omap_init_fb(void)
338{ 338{
339 const struct omap_lcd_config *conf; 339 const struct omap_lcd_config *conf;
340 340
@@ -379,7 +379,7 @@ void omapfb_set_platform_data(struct omapfb_platform_data *data)
379 omapfb_config = *data; 379 omapfb_config = *data;
380} 380}
381 381
382static inline int omap_init_fb(void) 382static int __init omap_init_fb(void)
383{ 383{
384 return platform_device_register(&omap_fb_device); 384 return platform_device_register(&omap_fb_device);
385} 385}
@@ -390,7 +390,7 @@ void omapfb_reserve_sdram_memblock(void)
390{ 390{
391} 391}
392 392
393unsigned long omapfb_reserve_sram(unsigned long sram_pstart, 393unsigned long __init omapfb_reserve_sram(unsigned long sram_pstart,
394 unsigned long sram_vstart, 394 unsigned long sram_vstart,
395 unsigned long sram_size, 395 unsigned long sram_size,
396 unsigned long start_avail, 396 unsigned long start_avail,
@@ -409,7 +409,7 @@ void omapfb_reserve_sdram_memblock(void)
409{ 409{
410} 410}
411 411
412unsigned long omapfb_reserve_sram(unsigned long sram_pstart, 412unsigned long __init omapfb_reserve_sram(unsigned long sram_pstart,
413 unsigned long sram_vstart, 413 unsigned long sram_vstart,
414 unsigned long sram_size, 414 unsigned long sram_size,
415 unsigned long start_avail, 415 unsigned long start_avail,
diff --git a/arch/arm/plat-omap/include/plat/display.h b/arch/arm/plat-omap/include/plat/display.h
index 8bd15bdb4132..c915a661f1f5 100644
--- a/arch/arm/plat-omap/include/plat/display.h
+++ b/arch/arm/plat-omap/include/plat/display.h
@@ -81,37 +81,6 @@ enum omap_color_mode {
81 OMAP_DSS_COLOR_ARGB32 = 1 << 11, /* ARGB32 */ 81 OMAP_DSS_COLOR_ARGB32 = 1 << 11, /* ARGB32 */
82 OMAP_DSS_COLOR_RGBA32 = 1 << 12, /* RGBA32 */ 82 OMAP_DSS_COLOR_RGBA32 = 1 << 12, /* RGBA32 */
83 OMAP_DSS_COLOR_RGBX32 = 1 << 13, /* RGBx32 */ 83 OMAP_DSS_COLOR_RGBX32 = 1 << 13, /* RGBx32 */
84
85 OMAP_DSS_COLOR_GFX_OMAP2 =
86 OMAP_DSS_COLOR_CLUT1 | OMAP_DSS_COLOR_CLUT2 |
87 OMAP_DSS_COLOR_CLUT4 | OMAP_DSS_COLOR_CLUT8 |
88 OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_RGB16 |
89 OMAP_DSS_COLOR_RGB24U | OMAP_DSS_COLOR_RGB24P,
90
91 OMAP_DSS_COLOR_VID_OMAP2 =
92 OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U |
93 OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_YUV2 |
94 OMAP_DSS_COLOR_UYVY,
95
96 OMAP_DSS_COLOR_GFX_OMAP3 =
97 OMAP_DSS_COLOR_CLUT1 | OMAP_DSS_COLOR_CLUT2 |
98 OMAP_DSS_COLOR_CLUT4 | OMAP_DSS_COLOR_CLUT8 |
99 OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_ARGB16 |
100 OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U |
101 OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_ARGB32 |
102 OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_RGBX32,
103
104 OMAP_DSS_COLOR_VID1_OMAP3 =
105 OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_RGB16 |
106 OMAP_DSS_COLOR_RGB24U | OMAP_DSS_COLOR_RGB24P |
107 OMAP_DSS_COLOR_YUV2 | OMAP_DSS_COLOR_UYVY,
108
109 OMAP_DSS_COLOR_VID2_OMAP3 =
110 OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_ARGB16 |
111 OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U |
112 OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_YUV2 |
113 OMAP_DSS_COLOR_UYVY | OMAP_DSS_COLOR_ARGB32 |
114 OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_RGBX32,
115}; 84};
116 85
117enum omap_lcd_display_type { 86enum omap_lcd_display_type {
diff --git a/arch/arm/plat-omap/include/plat/vrfb.h b/arch/arm/plat-omap/include/plat/vrfb.h
index d8a03ced3b10..3792bdea2f6d 100644
--- a/arch/arm/plat-omap/include/plat/vrfb.h
+++ b/arch/arm/plat-omap/include/plat/vrfb.h
@@ -35,6 +35,7 @@ struct vrfb {
35 bool yuv_mode; 35 bool yuv_mode;
36}; 36};
37 37
38#ifdef CONFIG_OMAP2_VRFB
38extern int omap_vrfb_request_ctx(struct vrfb *vrfb); 39extern int omap_vrfb_request_ctx(struct vrfb *vrfb);
39extern void omap_vrfb_release_ctx(struct vrfb *vrfb); 40extern void omap_vrfb_release_ctx(struct vrfb *vrfb);
40extern void omap_vrfb_adjust_size(u16 *width, u16 *height, 41extern void omap_vrfb_adjust_size(u16 *width, u16 *height,
@@ -47,4 +48,19 @@ extern void omap_vrfb_setup(struct vrfb *vrfb, unsigned long paddr,
47extern int omap_vrfb_map_angle(struct vrfb *vrfb, u16 height, u8 rot); 48extern int omap_vrfb_map_angle(struct vrfb *vrfb, u16 height, u8 rot);
48extern void omap_vrfb_restore_context(void); 49extern void omap_vrfb_restore_context(void);
49 50
51#else
52static inline int omap_vrfb_request_ctx(struct vrfb *vrfb) { return 0; }
53static inline void omap_vrfb_release_ctx(struct vrfb *vrfb) {}
54static inline void omap_vrfb_adjust_size(u16 *width, u16 *height,
55 u8 bytespp) {}
56static inline u32 omap_vrfb_min_phys_size(u16 width, u16 height, u8 bytespp)
57 { return 0; }
58static inline u16 omap_vrfb_max_height(u32 phys_size, u16 width, u8 bytespp)
59 { return 0; }
60static inline void omap_vrfb_setup(struct vrfb *vrfb, unsigned long paddr,
61 u16 width, u16 height, unsigned bytespp, bool yuv_mode) {}
62static inline int omap_vrfb_map_angle(struct vrfb *vrfb, u16 height, u8 rot)
63 { return 0; }
64static inline void omap_vrfb_restore_context(void) {}
65#endif
50#endif /* __VRFB_H */ 66#endif /* __VRFB_H */
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 733093d60436..141abebc4516 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -393,7 +393,7 @@ static struct cpufreq_driver nforce2_driver = {
393 * Detects nForce2 A2 and C1 stepping 393 * Detects nForce2 A2 and C1 stepping
394 * 394 *
395 */ 395 */
396static unsigned int nforce2_detect_chipset(void) 396static int nforce2_detect_chipset(void)
397{ 397{
398 nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 398 nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
399 PCI_DEVICE_ID_NVIDIA_NFORCE2, 399 PCI_DEVICE_ID_NVIDIA_NFORCE2,
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index fc09f142d94d..d9f51367666b 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -35,7 +35,7 @@ static unsigned int longrun_low_freq, longrun_high_freq;
35 * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS 35 * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
36 * and MSR_TMTA_LONGRUN_CTRL 36 * and MSR_TMTA_LONGRUN_CTRL
37 */ 37 */
38static void __init longrun_get_policy(struct cpufreq_policy *policy) 38static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
39{ 39{
40 u32 msr_lo, msr_hi; 40 u32 msr_lo, msr_hi;
41 41
@@ -165,7 +165,7 @@ static unsigned int longrun_get(unsigned int cpu)
165 * TMTA rules: 165 * TMTA rules:
166 * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) 166 * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
167 */ 167 */
168static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq, 168static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
169 unsigned int *high_freq) 169 unsigned int *high_freq)
170{ 170{
171 u32 msr_lo, msr_hi; 171 u32 msr_lo, msr_hi;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 199dcb9f0b83..c63a43823744 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -918,8 +918,8 @@ static int cpufreq_add_dev_interface(unsigned int cpu,
918 918
919 spin_lock_irqsave(&cpufreq_driver_lock, flags); 919 spin_lock_irqsave(&cpufreq_driver_lock, flags);
920 for_each_cpu(j, policy->cpus) { 920 for_each_cpu(j, policy->cpus) {
921 if (!cpu_online(j)) 921 if (!cpu_online(j))
922 continue; 922 continue;
923 per_cpu(cpufreq_cpu_data, j) = policy; 923 per_cpu(cpufreq_cpu_data, j) = policy;
924 per_cpu(cpufreq_policy_cpu, j) = policy->cpu; 924 per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
925 } 925 }
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 7b5093664e49..c631f27a3dcc 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -30,6 +30,8 @@
30 30
31#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) 31#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10)
32#define DEF_FREQUENCY_UP_THRESHOLD (80) 32#define DEF_FREQUENCY_UP_THRESHOLD (80)
33#define DEF_SAMPLING_DOWN_FACTOR (1)
34#define MAX_SAMPLING_DOWN_FACTOR (100000)
33#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) 35#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3)
34#define MICRO_FREQUENCY_UP_THRESHOLD (95) 36#define MICRO_FREQUENCY_UP_THRESHOLD (95)
35#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) 37#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
@@ -82,6 +84,7 @@ struct cpu_dbs_info_s {
82 unsigned int freq_lo; 84 unsigned int freq_lo;
83 unsigned int freq_lo_jiffies; 85 unsigned int freq_lo_jiffies;
84 unsigned int freq_hi_jiffies; 86 unsigned int freq_hi_jiffies;
87 unsigned int rate_mult;
85 int cpu; 88 int cpu;
86 unsigned int sample_type:1; 89 unsigned int sample_type:1;
87 /* 90 /*
@@ -108,10 +111,12 @@ static struct dbs_tuners {
108 unsigned int up_threshold; 111 unsigned int up_threshold;
109 unsigned int down_differential; 112 unsigned int down_differential;
110 unsigned int ignore_nice; 113 unsigned int ignore_nice;
114 unsigned int sampling_down_factor;
111 unsigned int powersave_bias; 115 unsigned int powersave_bias;
112 unsigned int io_is_busy; 116 unsigned int io_is_busy;
113} dbs_tuners_ins = { 117} dbs_tuners_ins = {
114 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 118 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
119 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
115 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, 120 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
116 .ignore_nice = 0, 121 .ignore_nice = 0,
117 .powersave_bias = 0, 122 .powersave_bias = 0,
@@ -259,6 +264,7 @@ static ssize_t show_##file_name \
259show_one(sampling_rate, sampling_rate); 264show_one(sampling_rate, sampling_rate);
260show_one(io_is_busy, io_is_busy); 265show_one(io_is_busy, io_is_busy);
261show_one(up_threshold, up_threshold); 266show_one(up_threshold, up_threshold);
267show_one(sampling_down_factor, sampling_down_factor);
262show_one(ignore_nice_load, ignore_nice); 268show_one(ignore_nice_load, ignore_nice);
263show_one(powersave_bias, powersave_bias); 269show_one(powersave_bias, powersave_bias);
264 270
@@ -340,6 +346,29 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
340 return count; 346 return count;
341} 347}
342 348
349static ssize_t store_sampling_down_factor(struct kobject *a,
350 struct attribute *b, const char *buf, size_t count)
351{
352 unsigned int input, j;
353 int ret;
354 ret = sscanf(buf, "%u", &input);
355
356 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
357 return -EINVAL;
358 mutex_lock(&dbs_mutex);
359 dbs_tuners_ins.sampling_down_factor = input;
360
361 /* Reset down sampling multiplier in case it was active */
362 for_each_online_cpu(j) {
363 struct cpu_dbs_info_s *dbs_info;
364 dbs_info = &per_cpu(od_cpu_dbs_info, j);
365 dbs_info->rate_mult = 1;
366 }
367 mutex_unlock(&dbs_mutex);
368
369 return count;
370}
371
343static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, 372static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
344 const char *buf, size_t count) 373 const char *buf, size_t count)
345{ 374{
@@ -401,6 +430,7 @@ static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
401define_one_global_rw(sampling_rate); 430define_one_global_rw(sampling_rate);
402define_one_global_rw(io_is_busy); 431define_one_global_rw(io_is_busy);
403define_one_global_rw(up_threshold); 432define_one_global_rw(up_threshold);
433define_one_global_rw(sampling_down_factor);
404define_one_global_rw(ignore_nice_load); 434define_one_global_rw(ignore_nice_load);
405define_one_global_rw(powersave_bias); 435define_one_global_rw(powersave_bias);
406 436
@@ -409,6 +439,7 @@ static struct attribute *dbs_attributes[] = {
409 &sampling_rate_min.attr, 439 &sampling_rate_min.attr,
410 &sampling_rate.attr, 440 &sampling_rate.attr,
411 &up_threshold.attr, 441 &up_threshold.attr,
442 &sampling_down_factor.attr,
412 &ignore_nice_load.attr, 443 &ignore_nice_load.attr,
413 &powersave_bias.attr, 444 &powersave_bias.attr,
414 &io_is_busy.attr, 445 &io_is_busy.attr,
@@ -562,6 +593,10 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
562 593
563 /* Check for frequency increase */ 594 /* Check for frequency increase */
564 if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { 595 if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
596 /* If switching to max speed, apply sampling_down_factor */
597 if (policy->cur < policy->max)
598 this_dbs_info->rate_mult =
599 dbs_tuners_ins.sampling_down_factor;
565 dbs_freq_increase(policy, policy->max); 600 dbs_freq_increase(policy, policy->max);
566 return; 601 return;
567 } 602 }
@@ -584,6 +619,9 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
584 (dbs_tuners_ins.up_threshold - 619 (dbs_tuners_ins.up_threshold -
585 dbs_tuners_ins.down_differential); 620 dbs_tuners_ins.down_differential);
586 621
622 /* No longer fully busy, reset rate_mult */
623 this_dbs_info->rate_mult = 1;
624
587 if (freq_next < policy->min) 625 if (freq_next < policy->min)
588 freq_next = policy->min; 626 freq_next = policy->min;
589 627
@@ -607,7 +645,8 @@ static void do_dbs_timer(struct work_struct *work)
607 int sample_type = dbs_info->sample_type; 645 int sample_type = dbs_info->sample_type;
608 646
609 /* We want all CPUs to do sampling nearly on same jiffy */ 647 /* We want all CPUs to do sampling nearly on same jiffy */
610 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 648 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate
649 * dbs_info->rate_mult);
611 650
612 if (num_online_cpus() > 1) 651 if (num_online_cpus() > 1)
613 delay -= jiffies % delay; 652 delay -= jiffies % delay;
@@ -711,6 +750,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
711 } 750 }
712 } 751 }
713 this_dbs_info->cpu = cpu; 752 this_dbs_info->cpu = cpu;
753 this_dbs_info->rate_mult = 1;
714 ondemand_powersave_bias_init_cpu(cpu); 754 ondemand_powersave_bias_init_cpu(cpu);
715 /* 755 /*
716 * Start the timerschedule work, when this governor 756 * Start the timerschedule work, when this governor
diff --git a/drivers/video/omap/lcd_omap3beagle.c b/drivers/video/omap/lcd_omap3beagle.c
index ca75cc2a87a5..d7c6c3e0afc6 100644
--- a/drivers/video/omap/lcd_omap3beagle.c
+++ b/drivers/video/omap/lcd_omap3beagle.c
@@ -25,8 +25,6 @@
25#include <linux/gpio.h> 25#include <linux/gpio.h>
26#include <linux/i2c/twl.h> 26#include <linux/i2c/twl.h>
27 27
28#include <plat/mux.h>
29#include <plat/mux.h>
30#include <asm/mach-types.h> 28#include <asm/mach-types.h>
31 29
32#include "omapfb.h" 30#include "omapfb.h"
diff --git a/drivers/video/omap2/displays/Kconfig b/drivers/video/omap2/displays/Kconfig
index 881c9f77c75a..12327bbfdbbb 100644
--- a/drivers/video/omap2/displays/Kconfig
+++ b/drivers/video/omap2/displays/Kconfig
@@ -40,7 +40,7 @@ config PANEL_TPO_TD043MTEA1
40 40
41config PANEL_ACX565AKM 41config PANEL_ACX565AKM
42 tristate "ACX565AKM Panel" 42 tristate "ACX565AKM Panel"
43 depends on OMAP2_DSS_SDI 43 depends on OMAP2_DSS_SDI && SPI
44 select BACKLIGHT_CLASS_DEVICE 44 select BACKLIGHT_CLASS_DEVICE
45 help 45 help
46 This is the LCD panel used on Nokia N900 46 This is the LCD panel used on Nokia N900
diff --git a/drivers/video/omap2/displays/panel-acx565akm.c b/drivers/video/omap2/displays/panel-acx565akm.c
index 07fbb8a733bb..e77310653207 100644
--- a/drivers/video/omap2/displays/panel-acx565akm.c
+++ b/drivers/video/omap2/displays/panel-acx565akm.c
@@ -587,6 +587,9 @@ static int acx_panel_power_on(struct omap_dss_device *dssdev)
587 587
588 dev_dbg(&dssdev->dev, "%s\n", __func__); 588 dev_dbg(&dssdev->dev, "%s\n", __func__);
589 589
590 if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE)
591 return 0;
592
590 mutex_lock(&md->mutex); 593 mutex_lock(&md->mutex);
591 594
592 r = omapdss_sdi_display_enable(dssdev); 595 r = omapdss_sdi_display_enable(dssdev);
@@ -644,6 +647,9 @@ static void acx_panel_power_off(struct omap_dss_device *dssdev)
644 647
645 dev_dbg(&dssdev->dev, "%s\n", __func__); 648 dev_dbg(&dssdev->dev, "%s\n", __func__);
646 649
650 if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE)
651 return;
652
647 mutex_lock(&md->mutex); 653 mutex_lock(&md->mutex);
648 654
649 if (!md->enabled) { 655 if (!md->enabled) {
diff --git a/drivers/video/omap2/displays/panel-generic.c b/drivers/video/omap2/displays/panel-generic.c
index 300eff5de1b4..395a68de3990 100644
--- a/drivers/video/omap2/displays/panel-generic.c
+++ b/drivers/video/omap2/displays/panel-generic.c
@@ -39,6 +39,9 @@ static int generic_panel_power_on(struct omap_dss_device *dssdev)
39{ 39{
40 int r; 40 int r;
41 41
42 if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE)
43 return 0;
44
42 r = omapdss_dpi_display_enable(dssdev); 45 r = omapdss_dpi_display_enable(dssdev);
43 if (r) 46 if (r)
44 goto err0; 47 goto err0;
@@ -58,6 +61,9 @@ err0:
58 61
59static void generic_panel_power_off(struct omap_dss_device *dssdev) 62static void generic_panel_power_off(struct omap_dss_device *dssdev)
60{ 63{
64 if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE)
65 return;
66
61 if (dssdev->platform_disable) 67 if (dssdev->platform_disable)
62 dssdev->platform_disable(dssdev); 68 dssdev->platform_disable(dssdev);
63 69
diff --git a/drivers/video/omap2/displays/panel-sharp-lq043t1dg01.c b/drivers/video/omap2/displays/panel-sharp-lq043t1dg01.c
index 10267461991c..0c6896cea2d0 100644
--- a/drivers/video/omap2/displays/panel-sharp-lq043t1dg01.c
+++ b/drivers/video/omap2/displays/panel-sharp-lq043t1dg01.c
@@ -43,6 +43,9 @@ static int sharp_lq_panel_power_on(struct omap_dss_device *dssdev)
43{ 43{
44 int r; 44 int r;
45 45
46 if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE)
47 return 0;
48
46 r = omapdss_dpi_display_enable(dssdev); 49 r = omapdss_dpi_display_enable(dssdev);
47 if (r) 50 if (r)
48 goto err0; 51 goto err0;
@@ -65,6 +68,9 @@ err0:
65 68
66static void sharp_lq_panel_power_off(struct omap_dss_device *dssdev) 69static void sharp_lq_panel_power_off(struct omap_dss_device *dssdev)
67{ 70{
71 if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE)
72 return;
73
68 if (dssdev->platform_disable) 74 if (dssdev->platform_disable)
69 dssdev->platform_disable(dssdev); 75 dssdev->platform_disable(dssdev);
70 76
diff --git a/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c b/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c
index 7d9eb2b1f5af..9a138f650e05 100644
--- a/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c
+++ b/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c
@@ -135,6 +135,9 @@ static int sharp_ls_power_on(struct omap_dss_device *dssdev)
135{ 135{
136 int r = 0; 136 int r = 0;
137 137
138 if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE)
139 return 0;
140
138 r = omapdss_dpi_display_enable(dssdev); 141 r = omapdss_dpi_display_enable(dssdev);
139 if (r) 142 if (r)
140 goto err0; 143 goto err0;
@@ -157,6 +160,9 @@ err0:
157 160
158static void sharp_ls_power_off(struct omap_dss_device *dssdev) 161static void sharp_ls_power_off(struct omap_dss_device *dssdev)
159{ 162{
163 if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE)
164 return;
165
160 if (dssdev->platform_disable) 166 if (dssdev->platform_disable)
161 dssdev->platform_disable(dssdev); 167 dssdev->platform_disable(dssdev);
162 168
diff --git a/drivers/video/omap2/displays/panel-toppoly-tdo35s.c b/drivers/video/omap2/displays/panel-toppoly-tdo35s.c
index e320e67d06f3..526e906c8a6c 100644
--- a/drivers/video/omap2/displays/panel-toppoly-tdo35s.c
+++ b/drivers/video/omap2/displays/panel-toppoly-tdo35s.c
@@ -46,6 +46,9 @@ static int toppoly_tdo_panel_power_on(struct omap_dss_device *dssdev)
46{ 46{
47 int r; 47 int r;
48 48
49 if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE)
50 return 0;
51
49 r = omapdss_dpi_display_enable(dssdev); 52 r = omapdss_dpi_display_enable(dssdev);
50 if (r) 53 if (r)
51 goto err0; 54 goto err0;
@@ -65,6 +68,9 @@ err0:
65 68
66static void toppoly_tdo_panel_power_off(struct omap_dss_device *dssdev) 69static void toppoly_tdo_panel_power_off(struct omap_dss_device *dssdev)
67{ 70{
71 if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE)
72 return;
73
68 if (dssdev->platform_disable) 74 if (dssdev->platform_disable)
69 dssdev->platform_disable(dssdev); 75 dssdev->platform_disable(dssdev);
70 76
diff --git a/drivers/video/omap2/displays/panel-tpo-td043mtea1.c b/drivers/video/omap2/displays/panel-tpo-td043mtea1.c
index e866e76b13d0..dbe9d43b4850 100644
--- a/drivers/video/omap2/displays/panel-tpo-td043mtea1.c
+++ b/drivers/video/omap2/displays/panel-tpo-td043mtea1.c
@@ -269,6 +269,9 @@ static int tpo_td043_power_on(struct omap_dss_device *dssdev)
269 int nreset_gpio = dssdev->reset_gpio; 269 int nreset_gpio = dssdev->reset_gpio;
270 int r; 270 int r;
271 271
272 if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE)
273 return 0;
274
272 r = omapdss_dpi_display_enable(dssdev); 275 r = omapdss_dpi_display_enable(dssdev);
273 if (r) 276 if (r)
274 goto err0; 277 goto err0;
@@ -308,6 +311,9 @@ static void tpo_td043_power_off(struct omap_dss_device *dssdev)
308 struct tpo_td043_device *tpo_td043 = dev_get_drvdata(&dssdev->dev); 311 struct tpo_td043_device *tpo_td043 = dev_get_drvdata(&dssdev->dev);
309 int nreset_gpio = dssdev->reset_gpio; 312 int nreset_gpio = dssdev->reset_gpio;
310 313
314 if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE)
315 return;
316
311 tpo_td043_write(tpo_td043->spi, 3, 317 tpo_td043_write(tpo_td043->spi, 3,
312 TPO_R03_VAL_STANDBY | TPO_R03_EN_PWM); 318 TPO_R03_VAL_STANDBY | TPO_R03_EN_PWM);
313 319
diff --git a/drivers/video/omap2/dss/Makefile b/drivers/video/omap2/dss/Makefile
index d71b5d9d71b1..7db17b5e570c 100644
--- a/drivers/video/omap2/dss/Makefile
+++ b/drivers/video/omap2/dss/Makefile
@@ -1,5 +1,5 @@
1obj-$(CONFIG_OMAP2_DSS) += omapdss.o 1obj-$(CONFIG_OMAP2_DSS) += omapdss.o
2omapdss-y := core.o dss.o dispc.o display.o manager.o overlay.o 2omapdss-y := core.o dss.o dss_features.o dispc.o display.o manager.o overlay.o
3omapdss-$(CONFIG_OMAP2_DSS_DPI) += dpi.o 3omapdss-$(CONFIG_OMAP2_DSS_DPI) += dpi.o
4omapdss-$(CONFIG_OMAP2_DSS_RFBI) += rfbi.o 4omapdss-$(CONFIG_OMAP2_DSS_RFBI) += rfbi.o
5omapdss-$(CONFIG_OMAP2_DSS_VENC) += venc.o 5omapdss-$(CONFIG_OMAP2_DSS_VENC) += venc.o
diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c
index b3a498f22d36..8e89f6049280 100644
--- a/drivers/video/omap2/dss/core.c
+++ b/drivers/video/omap2/dss/core.c
@@ -37,6 +37,7 @@
37#include <plat/clock.h> 37#include <plat/clock.h>
38 38
39#include "dss.h" 39#include "dss.h"
40#include "dss_features.h"
40 41
41static struct { 42static struct {
42 struct platform_device *pdev; 43 struct platform_device *pdev;
@@ -502,6 +503,8 @@ static int omap_dss_probe(struct platform_device *pdev)
502 503
503 core.pdev = pdev; 504 core.pdev = pdev;
504 505
506 dss_features_init();
507
505 dss_init_overlay_managers(pdev); 508 dss_init_overlay_managers(pdev);
506 dss_init_overlays(pdev); 509 dss_init_overlays(pdev);
507 510
diff --git a/drivers/video/omap2/dss/dispc.c b/drivers/video/omap2/dss/dispc.c
index 5ecdc0004094..fa40fa59a9ac 100644
--- a/drivers/video/omap2/dss/dispc.c
+++ b/drivers/video/omap2/dss/dispc.c
@@ -39,6 +39,7 @@
39#include <plat/display.h> 39#include <plat/display.h>
40 40
41#include "dss.h" 41#include "dss.h"
42#include "dss_features.h"
42 43
43/* DISPC */ 44/* DISPC */
44#define DISPC_BASE 0x48050400 45#define DISPC_BASE 0x48050400
@@ -139,6 +140,22 @@ struct omap_dispc_isr_data {
139 u32 mask; 140 u32 mask;
140}; 141};
141 142
143struct dispc_h_coef {
144 s8 hc4;
145 s8 hc3;
146 u8 hc2;
147 s8 hc1;
148 s8 hc0;
149};
150
151struct dispc_v_coef {
152 s8 vc22;
153 s8 vc2;
154 u8 vc1;
155 s8 vc0;
156 s8 vc00;
157};
158
142#define REG_GET(idx, start, end) \ 159#define REG_GET(idx, start, end) \
143 FLD_GET(dispc_read_reg(idx), start, end) 160 FLD_GET(dispc_read_reg(idx), start, end)
144 161
@@ -564,106 +581,77 @@ static void _dispc_set_scale_coef(enum omap_plane plane, int hscaleup,
564 int vscaleup, int five_taps) 581 int vscaleup, int five_taps)
565{ 582{
566 /* Coefficients for horizontal up-sampling */ 583 /* Coefficients for horizontal up-sampling */
567 static const u32 coef_hup[8] = { 584 static const struct dispc_h_coef coef_hup[8] = {
568 0x00800000, 585 { 0, 0, 128, 0, 0 },
569 0x0D7CF800, 586 { -1, 13, 124, -8, 0 },
570 0x1E70F5FF, 587 { -2, 30, 112, -11, -1 },
571 0x335FF5FE, 588 { -5, 51, 95, -11, -2 },
572 0xF74949F7, 589 { 0, -9, 73, 73, -9 },
573 0xF55F33FB, 590 { -2, -11, 95, 51, -5 },
574 0xF5701EFE, 591 { -1, -11, 112, 30, -2 },
575 0xF87C0DFF, 592 { 0, -8, 124, 13, -1 },
576 }; 593 };
577 594
578 /* Coefficients for horizontal down-sampling */ 595 /* Coefficients for vertical up-sampling */
579 static const u32 coef_hdown[8] = { 596 static const struct dispc_v_coef coef_vup_3tap[8] = {
580 0x24382400, 597 { 0, 0, 128, 0, 0 },
581 0x28371FFE, 598 { 0, 3, 123, 2, 0 },
582 0x2C361BFB, 599 { 0, 12, 111, 5, 0 },
583 0x303516F9, 600 { 0, 32, 89, 7, 0 },
584 0x11343311, 601 { 0, 0, 64, 64, 0 },
585 0x1635300C, 602 { 0, 7, 89, 32, 0 },
586 0x1B362C08, 603 { 0, 5, 111, 12, 0 },
587 0x1F372804, 604 { 0, 2, 123, 3, 0 },
588 }; 605 };
589 606
590 /* Coefficients for horizontal and vertical up-sampling */ 607 static const struct dispc_v_coef coef_vup_5tap[8] = {
591 static const u32 coef_hvup[2][8] = { 608 { 0, 0, 128, 0, 0 },
592 { 609 { -1, 13, 124, -8, 0 },
593 0x00800000, 610 { -2, 30, 112, -11, -1 },
594 0x037B02FF, 611 { -5, 51, 95, -11, -2 },
595 0x0C6F05FE, 612 { 0, -9, 73, 73, -9 },
596 0x205907FB, 613 { -2, -11, 95, 51, -5 },
597 0x00404000, 614 { -1, -11, 112, 30, -2 },
598 0x075920FE, 615 { 0, -8, 124, 13, -1 },
599 0x056F0CFF,
600 0x027B0300,
601 },
602 {
603 0x00800000,
604 0x0D7CF8FF,
605 0x1E70F5FE,
606 0x335FF5FB,
607 0xF7404000,
608 0xF55F33FE,
609 0xF5701EFF,
610 0xF87C0D00,
611 },
612 }; 616 };
613 617
614 /* Coefficients for horizontal and vertical down-sampling */ 618 /* Coefficients for horizontal down-sampling */
615 static const u32 coef_hvdown[2][8] = { 619 static const struct dispc_h_coef coef_hdown[8] = {
616 { 620 { 0, 36, 56, 36, 0 },
617 0x24382400, 621 { 4, 40, 55, 31, -2 },
618 0x28391F04, 622 { 8, 44, 54, 27, -5 },
619 0x2D381B08, 623 { 12, 48, 53, 22, -7 },
620 0x3237170C, 624 { -9, 17, 52, 51, 17 },
621 0x123737F7, 625 { -7, 22, 53, 48, 12 },
622 0x173732F9, 626 { -5, 27, 54, 44, 8 },
623 0x1B382DFB, 627 { -2, 31, 55, 40, 4 },
624 0x1F3928FE,
625 },
626 {
627 0x24382400,
628 0x28371F04,
629 0x2C361B08,
630 0x3035160C,
631 0x113433F7,
632 0x163530F9,
633 0x1B362CFB,
634 0x1F3728FE,
635 },
636 }; 628 };
637 629
638 /* Coefficients for vertical up-sampling */ 630 /* Coefficients for vertical down-sampling */
639 static const u32 coef_vup[8] = { 631 static const struct dispc_v_coef coef_vdown_3tap[8] = {
640 0x00000000, 632 { 0, 36, 56, 36, 0 },
641 0x0000FF00, 633 { 0, 40, 57, 31, 0 },
642 0x0000FEFF, 634 { 0, 45, 56, 27, 0 },
643 0x0000FBFE, 635 { 0, 50, 55, 23, 0 },
644 0x000000F7, 636 { 0, 18, 55, 55, 0 },
645 0x0000FEFB, 637 { 0, 23, 55, 50, 0 },
646 0x0000FFFE, 638 { 0, 27, 56, 45, 0 },
647 0x000000FF, 639 { 0, 31, 57, 40, 0 },
648 }; 640 };
649 641
650 642 static const struct dispc_v_coef coef_vdown_5tap[8] = {
651 /* Coefficients for vertical down-sampling */ 643 { 0, 36, 56, 36, 0 },
652 static const u32 coef_vdown[8] = { 644 { 4, 40, 55, 31, -2 },
653 0x00000000, 645 { 8, 44, 54, 27, -5 },
654 0x000004FE, 646 { 12, 48, 53, 22, -7 },
655 0x000008FB, 647 { -9, 17, 52, 51, 17 },
656 0x00000CF9, 648 { -7, 22, 53, 48, 12 },
657 0x0000F711, 649 { -5, 27, 54, 44, 8 },
658 0x0000F90C, 650 { -2, 31, 55, 40, 4 },
659 0x0000FB08,
660 0x0000FE04,
661 }; 651 };
662 652
663 const u32 *h_coef; 653 const struct dispc_h_coef *h_coef;
664 const u32 *hv_coef; 654 const struct dispc_v_coef *v_coef;
665 const u32 *hv_coef_mod;
666 const u32 *v_coef;
667 int i; 655 int i;
668 656
669 if (hscaleup) 657 if (hscaleup)
@@ -671,47 +659,34 @@ static void _dispc_set_scale_coef(enum omap_plane plane, int hscaleup,
671 else 659 else
672 h_coef = coef_hdown; 660 h_coef = coef_hdown;
673 661
674 if (vscaleup) { 662 if (vscaleup)
675 hv_coef = coef_hvup[five_taps]; 663 v_coef = five_taps ? coef_vup_5tap : coef_vup_3tap;
676 v_coef = coef_vup; 664 else
677 665 v_coef = five_taps ? coef_vdown_5tap : coef_vdown_3tap;
678 if (hscaleup)
679 hv_coef_mod = NULL;
680 else
681 hv_coef_mod = coef_hvdown[five_taps];
682 } else {
683 hv_coef = coef_hvdown[five_taps];
684 v_coef = coef_vdown;
685
686 if (hscaleup)
687 hv_coef_mod = coef_hvup[five_taps];
688 else
689 hv_coef_mod = NULL;
690 }
691 666
692 for (i = 0; i < 8; i++) { 667 for (i = 0; i < 8; i++) {
693 u32 h, hv; 668 u32 h, hv;
694 669
695 h = h_coef[i]; 670 h = FLD_VAL(h_coef[i].hc0, 7, 0)
696 671 | FLD_VAL(h_coef[i].hc1, 15, 8)
697 hv = hv_coef[i]; 672 | FLD_VAL(h_coef[i].hc2, 23, 16)
698 673 | FLD_VAL(h_coef[i].hc3, 31, 24);
699 if (hv_coef_mod) { 674 hv = FLD_VAL(h_coef[i].hc4, 7, 0)
700 hv &= 0xffffff00; 675 | FLD_VAL(v_coef[i].vc0, 15, 8)
701 hv |= (hv_coef_mod[i] & 0xff); 676 | FLD_VAL(v_coef[i].vc1, 23, 16)
702 } 677 | FLD_VAL(v_coef[i].vc2, 31, 24);
703 678
704 _dispc_write_firh_reg(plane, i, h); 679 _dispc_write_firh_reg(plane, i, h);
705 _dispc_write_firhv_reg(plane, i, hv); 680 _dispc_write_firhv_reg(plane, i, hv);
706 } 681 }
707 682
708 if (!five_taps) 683 if (five_taps) {
709 return; 684 for (i = 0; i < 8; i++) {
710 685 u32 v;
711 for (i = 0; i < 8; i++) { 686 v = FLD_VAL(v_coef[i].vc00, 7, 0)
712 u32 v; 687 | FLD_VAL(v_coef[i].vc22, 15, 8);
713 v = v_coef[i]; 688 _dispc_write_firv_reg(plane, i, v);
714 _dispc_write_firv_reg(plane, i, v); 689 }
715 } 690 }
716} 691}
717 692
@@ -800,12 +775,12 @@ static void _dispc_set_vid_size(enum omap_plane plane, int width, int height)
800 775
801static void _dispc_setup_global_alpha(enum omap_plane plane, u8 global_alpha) 776static void _dispc_setup_global_alpha(enum omap_plane plane, u8 global_alpha)
802{ 777{
803 778 if (!dss_has_feature(FEAT_GLOBAL_ALPHA))
804 BUG_ON(plane == OMAP_DSS_VIDEO1);
805
806 if (cpu_is_omap24xx())
807 return; 779 return;
808 780
781 BUG_ON(!dss_has_feature(FEAT_GLOBAL_ALPHA_VID1) &&
782 plane == OMAP_DSS_VIDEO1);
783
809 if (plane == OMAP_DSS_GFX) 784 if (plane == OMAP_DSS_GFX)
810 REG_FLD_MOD(DISPC_GLOBAL_ALPHA, global_alpha, 7, 0); 785 REG_FLD_MOD(DISPC_GLOBAL_ALPHA, global_alpha, 7, 0);
811 else if (plane == OMAP_DSS_VIDEO2) 786 else if (plane == OMAP_DSS_VIDEO2)
@@ -975,17 +950,14 @@ static void dispc_read_plane_fifo_sizes(void)
975 DISPC_VID_FIFO_SIZE_STATUS(1) }; 950 DISPC_VID_FIFO_SIZE_STATUS(1) };
976 u32 size; 951 u32 size;
977 int plane; 952 int plane;
953 u8 start, end;
978 954
979 enable_clocks(1); 955 enable_clocks(1);
980 956
981 for (plane = 0; plane < ARRAY_SIZE(dispc.fifo_size); ++plane) { 957 dss_feat_get_reg_field(FEAT_REG_FIFOSIZE, &start, &end);
982 if (cpu_is_omap24xx())
983 size = FLD_GET(dispc_read_reg(fsz_reg[plane]), 8, 0);
984 else if (cpu_is_omap34xx())
985 size = FLD_GET(dispc_read_reg(fsz_reg[plane]), 10, 0);
986 else
987 BUG();
988 958
959 for (plane = 0; plane < ARRAY_SIZE(dispc.fifo_size); ++plane) {
960 size = FLD_GET(dispc_read_reg(fsz_reg[plane]), start, end);
989 dispc.fifo_size[plane] = size; 961 dispc.fifo_size[plane] = size;
990 } 962 }
991 963
@@ -1002,6 +974,8 @@ void dispc_setup_plane_fifo(enum omap_plane plane, u32 low, u32 high)
1002 const struct dispc_reg ftrs_reg[] = { DISPC_GFX_FIFO_THRESHOLD, 974 const struct dispc_reg ftrs_reg[] = { DISPC_GFX_FIFO_THRESHOLD,
1003 DISPC_VID_FIFO_THRESHOLD(0), 975 DISPC_VID_FIFO_THRESHOLD(0),
1004 DISPC_VID_FIFO_THRESHOLD(1) }; 976 DISPC_VID_FIFO_THRESHOLD(1) };
977 u8 hi_start, hi_end, lo_start, lo_end;
978
1005 enable_clocks(1); 979 enable_clocks(1);
1006 980
1007 DSSDBG("fifo(%d) low/high old %u/%u, new %u/%u\n", 981 DSSDBG("fifo(%d) low/high old %u/%u, new %u/%u\n",
@@ -1010,12 +984,12 @@ void dispc_setup_plane_fifo(enum omap_plane plane, u32 low, u32 high)
1010 REG_GET(ftrs_reg[plane], 27, 16), 984 REG_GET(ftrs_reg[plane], 27, 16),
1011 low, high); 985 low, high);
1012 986
1013 if (cpu_is_omap24xx()) 987 dss_feat_get_reg_field(FEAT_REG_FIFOHIGHTHRESHOLD, &hi_start, &hi_end);
1014 dispc_write_reg(ftrs_reg[plane], 988 dss_feat_get_reg_field(FEAT_REG_FIFOLOWTHRESHOLD, &lo_start, &lo_end);
1015 FLD_VAL(high, 24, 16) | FLD_VAL(low, 8, 0)); 989
1016 else 990 dispc_write_reg(ftrs_reg[plane],
1017 dispc_write_reg(ftrs_reg[plane], 991 FLD_VAL(high, hi_start, hi_end) |
1018 FLD_VAL(high, 27, 16) | FLD_VAL(low, 11, 0)); 992 FLD_VAL(low, lo_start, lo_end));
1019 993
1020 enable_clocks(0); 994 enable_clocks(0);
1021} 995}
@@ -1035,13 +1009,16 @@ static void _dispc_set_fir(enum omap_plane plane, int hinc, int vinc)
1035 u32 val; 1009 u32 val;
1036 const struct dispc_reg fir_reg[] = { DISPC_VID_FIR(0), 1010 const struct dispc_reg fir_reg[] = { DISPC_VID_FIR(0),
1037 DISPC_VID_FIR(1) }; 1011 DISPC_VID_FIR(1) };
1012 u8 hinc_start, hinc_end, vinc_start, vinc_end;
1038 1013
1039 BUG_ON(plane == OMAP_DSS_GFX); 1014 BUG_ON(plane == OMAP_DSS_GFX);
1040 1015
1041 if (cpu_is_omap24xx()) 1016 dss_feat_get_reg_field(FEAT_REG_FIRHINC, &hinc_start, &hinc_end);
1042 val = FLD_VAL(vinc, 27, 16) | FLD_VAL(hinc, 11, 0); 1017 dss_feat_get_reg_field(FEAT_REG_FIRVINC, &vinc_start, &vinc_end);
1043 else 1018
1044 val = FLD_VAL(vinc, 28, 16) | FLD_VAL(hinc, 12, 0); 1019 val = FLD_VAL(vinc, vinc_start, vinc_end) |
1020 FLD_VAL(hinc, hinc_start, hinc_end);
1021
1045 dispc_write_reg(fir_reg[plane-1], val); 1022 dispc_write_reg(fir_reg[plane-1], val);
1046} 1023}
1047 1024
@@ -1567,6 +1544,8 @@ static int _dispc_setup_plane(enum omap_plane plane,
1567 case OMAP_DSS_COLOR_ARGB16: 1544 case OMAP_DSS_COLOR_ARGB16:
1568 case OMAP_DSS_COLOR_ARGB32: 1545 case OMAP_DSS_COLOR_ARGB32:
1569 case OMAP_DSS_COLOR_RGBA32: 1546 case OMAP_DSS_COLOR_RGBA32:
1547 if (!dss_has_feature(FEAT_GLOBAL_ALPHA))
1548 return -EINVAL;
1570 case OMAP_DSS_COLOR_RGBX32: 1549 case OMAP_DSS_COLOR_RGBX32:
1571 if (cpu_is_omap24xx()) 1550 if (cpu_is_omap24xx())
1572 return -EINVAL; 1551 return -EINVAL;
@@ -1607,9 +1586,10 @@ static int _dispc_setup_plane(enum omap_plane plane,
1607 case OMAP_DSS_COLOR_ARGB16: 1586 case OMAP_DSS_COLOR_ARGB16:
1608 case OMAP_DSS_COLOR_ARGB32: 1587 case OMAP_DSS_COLOR_ARGB32:
1609 case OMAP_DSS_COLOR_RGBA32: 1588 case OMAP_DSS_COLOR_RGBA32:
1610 if (cpu_is_omap24xx()) 1589 if (!dss_has_feature(FEAT_GLOBAL_ALPHA))
1611 return -EINVAL; 1590 return -EINVAL;
1612 if (plane == OMAP_DSS_VIDEO1) 1591 if (!dss_has_feature(FEAT_GLOBAL_ALPHA_VID1) &&
1592 plane == OMAP_DSS_VIDEO1)
1613 return -EINVAL; 1593 return -EINVAL;
1614 break; 1594 break;
1615 1595
@@ -2002,7 +1982,7 @@ void dispc_enable_trans_key(enum omap_channel ch, bool enable)
2002} 1982}
2003void dispc_enable_alpha_blending(enum omap_channel ch, bool enable) 1983void dispc_enable_alpha_blending(enum omap_channel ch, bool enable)
2004{ 1984{
2005 if (cpu_is_omap24xx()) 1985 if (!dss_has_feature(FEAT_GLOBAL_ALPHA))
2006 return; 1986 return;
2007 1987
2008 enable_clocks(1); 1988 enable_clocks(1);
@@ -2016,7 +1996,7 @@ bool dispc_alpha_blending_enabled(enum omap_channel ch)
2016{ 1996{
2017 bool enabled; 1997 bool enabled;
2018 1998
2019 if (cpu_is_omap24xx()) 1999 if (!dss_has_feature(FEAT_GLOBAL_ALPHA))
2020 return false; 2000 return false;
2021 2001
2022 enable_clocks(1); 2002 enable_clocks(1);
diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c
index b3fa3a7db911..aa4f7a5fae29 100644
--- a/drivers/video/omap2/dss/dsi.c
+++ b/drivers/video/omap2/dss/dsi.c
@@ -3274,7 +3274,6 @@ int dsi_init(struct platform_device *pdev)
3274 3274
3275 dsi.vdds_dsi_reg = dss_get_vdds_dsi(); 3275 dsi.vdds_dsi_reg = dss_get_vdds_dsi();
3276 if (IS_ERR(dsi.vdds_dsi_reg)) { 3276 if (IS_ERR(dsi.vdds_dsi_reg)) {
3277 iounmap(dsi.base);
3278 DSSERR("can't get VDDS_DSI regulator\n"); 3277 DSSERR("can't get VDDS_DSI regulator\n");
3279 r = PTR_ERR(dsi.vdds_dsi_reg); 3278 r = PTR_ERR(dsi.vdds_dsi_reg);
3280 goto err2; 3279 goto err2;
diff --git a/drivers/video/omap2/dss/dss_features.c b/drivers/video/omap2/dss/dss_features.c
new file mode 100644
index 000000000000..867f68de125f
--- /dev/null
+++ b/drivers/video/omap2/dss/dss_features.c
@@ -0,0 +1,191 @@
1/*
2 * linux/drivers/video/omap2/dss/dss_features.c
3 *
4 * Copyright (C) 2010 Texas Instruments
5 * Author: Archit Taneja <archit@ti.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include <linux/kernel.h>
21#include <linux/types.h>
22#include <linux/err.h>
23#include <linux/slab.h>
24
25#include <plat/display.h>
26#include <plat/cpu.h>
27
28#include "dss_features.h"
29
30/* Defines a generic omap register field */
31struct dss_reg_field {
32 enum dss_feat_reg_field id;
33 u8 start, end;
34};
35
36struct omap_dss_features {
37 const struct dss_reg_field *reg_fields;
38 const int num_reg_fields;
39
40 const u32 has_feature;
41
42 const int num_mgrs;
43 const int num_ovls;
44 const enum omap_display_type *supported_displays;
45 const enum omap_color_mode *supported_color_modes;
46};
47
48/* This struct is assigned to one of the below during initialization */
49static struct omap_dss_features *omap_current_dss_features;
50
51static const struct dss_reg_field omap2_dss_reg_fields[] = {
52 { FEAT_REG_FIRHINC, 11, 0 },
53 { FEAT_REG_FIRVINC, 27, 16 },
54 { FEAT_REG_FIFOLOWTHRESHOLD, 8, 0 },
55 { FEAT_REG_FIFOHIGHTHRESHOLD, 24, 16 },
56 { FEAT_REG_FIFOSIZE, 8, 0 },
57};
58
59static const struct dss_reg_field omap3_dss_reg_fields[] = {
60 { FEAT_REG_FIRHINC, 12, 0 },
61 { FEAT_REG_FIRVINC, 28, 16 },
62 { FEAT_REG_FIFOLOWTHRESHOLD, 11, 0 },
63 { FEAT_REG_FIFOHIGHTHRESHOLD, 27, 16 },
64 { FEAT_REG_FIFOSIZE, 10, 0 },
65};
66
67static const enum omap_display_type omap2_dss_supported_displays[] = {
68 /* OMAP_DSS_CHANNEL_LCD */
69 OMAP_DISPLAY_TYPE_DPI | OMAP_DISPLAY_TYPE_DBI |
70 OMAP_DISPLAY_TYPE_SDI | OMAP_DISPLAY_TYPE_DSI,
71
72 /* OMAP_DSS_CHANNEL_DIGIT */
73 OMAP_DISPLAY_TYPE_VENC,
74};
75
76static const enum omap_display_type omap3_dss_supported_displays[] = {
77 /* OMAP_DSS_CHANNEL_LCD */
78 OMAP_DISPLAY_TYPE_DPI | OMAP_DISPLAY_TYPE_DBI |
79 OMAP_DISPLAY_TYPE_SDI | OMAP_DISPLAY_TYPE_DSI,
80
81 /* OMAP_DSS_CHANNEL_DIGIT */
82 OMAP_DISPLAY_TYPE_VENC,
83};
84
85static const enum omap_color_mode omap2_dss_supported_color_modes[] = {
86 /* OMAP_DSS_GFX */
87 OMAP_DSS_COLOR_CLUT1 | OMAP_DSS_COLOR_CLUT2 |
88 OMAP_DSS_COLOR_CLUT4 | OMAP_DSS_COLOR_CLUT8 |
89 OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_RGB16 |
90 OMAP_DSS_COLOR_RGB24U | OMAP_DSS_COLOR_RGB24P,
91
92 /* OMAP_DSS_VIDEO1 */
93 OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U |
94 OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_YUV2 |
95 OMAP_DSS_COLOR_UYVY,
96
97 /* OMAP_DSS_VIDEO2 */
98 OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U |
99 OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_YUV2 |
100 OMAP_DSS_COLOR_UYVY,
101};
102
103static const enum omap_color_mode omap3_dss_supported_color_modes[] = {
104 /* OMAP_DSS_GFX */
105 OMAP_DSS_COLOR_CLUT1 | OMAP_DSS_COLOR_CLUT2 |
106 OMAP_DSS_COLOR_CLUT4 | OMAP_DSS_COLOR_CLUT8 |
107 OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_ARGB16 |
108 OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U |
109 OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_ARGB32 |
110 OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_RGBX32,
111
112 /* OMAP_DSS_VIDEO1 */
113 OMAP_DSS_COLOR_RGB24U | OMAP_DSS_COLOR_RGB24P |
114 OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_RGB16 |
115 OMAP_DSS_COLOR_YUV2 | OMAP_DSS_COLOR_UYVY,
116
117 /* OMAP_DSS_VIDEO2 */
118 OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_ARGB16 |
119 OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U |
120 OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_YUV2 |
121 OMAP_DSS_COLOR_UYVY | OMAP_DSS_COLOR_ARGB32 |
122 OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_RGBX32,
123};
124
125/* OMAP2 DSS Features */
126static struct omap_dss_features omap2_dss_features = {
127 .reg_fields = omap2_dss_reg_fields,
128 .num_reg_fields = ARRAY_SIZE(omap2_dss_reg_fields),
129
130 .num_mgrs = 2,
131 .num_ovls = 3,
132 .supported_displays = omap2_dss_supported_displays,
133 .supported_color_modes = omap2_dss_supported_color_modes,
134};
135
136/* OMAP3 DSS Features */
137static struct omap_dss_features omap3_dss_features = {
138 .reg_fields = omap3_dss_reg_fields,
139 .num_reg_fields = ARRAY_SIZE(omap3_dss_reg_fields),
140
141 .has_feature = FEAT_GLOBAL_ALPHA,
142
143 .num_mgrs = 2,
144 .num_ovls = 3,
145 .supported_displays = omap3_dss_supported_displays,
146 .supported_color_modes = omap3_dss_supported_color_modes,
147};
148
149/* Functions returning values related to a DSS feature */
150int dss_feat_get_num_mgrs(void)
151{
152 return omap_current_dss_features->num_mgrs;
153}
154
155int dss_feat_get_num_ovls(void)
156{
157 return omap_current_dss_features->num_ovls;
158}
159
160enum omap_display_type dss_feat_get_supported_displays(enum omap_channel channel)
161{
162 return omap_current_dss_features->supported_displays[channel];
163}
164
165enum omap_color_mode dss_feat_get_supported_color_modes(enum omap_plane plane)
166{
167 return omap_current_dss_features->supported_color_modes[plane];
168}
169
170/* DSS has_feature check */
171bool dss_has_feature(enum dss_feat_id id)
172{
173 return omap_current_dss_features->has_feature & id;
174}
175
176void dss_feat_get_reg_field(enum dss_feat_reg_field id, u8 *start, u8 *end)
177{
178 if (id >= omap_current_dss_features->num_reg_fields)
179 BUG();
180
181 *start = omap_current_dss_features->reg_fields[id].start;
182 *end = omap_current_dss_features->reg_fields[id].end;
183}
184
185void dss_features_init(void)
186{
187 if (cpu_is_omap24xx())
188 omap_current_dss_features = &omap2_dss_features;
189 else
190 omap_current_dss_features = &omap3_dss_features;
191}
diff --git a/drivers/video/omap2/dss/dss_features.h b/drivers/video/omap2/dss/dss_features.h
new file mode 100644
index 000000000000..cb231eaa9b31
--- /dev/null
+++ b/drivers/video/omap2/dss/dss_features.h
@@ -0,0 +1,50 @@
1/*
2 * linux/drivers/video/omap2/dss/dss_features.h
3 *
4 * Copyright (C) 2010 Texas Instruments
5 * Author: Archit Taneja <archit@ti.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#ifndef __OMAP2_DSS_FEATURES_H
21#define __OMAP2_DSS_FEATURES_H
22
23#define MAX_DSS_MANAGERS 2
24#define MAX_DSS_OVERLAYS 3
25
26/* DSS has feature id */
27enum dss_feat_id {
28 FEAT_GLOBAL_ALPHA = 1 << 0,
29 FEAT_GLOBAL_ALPHA_VID1 = 1 << 1,
30};
31
32/* DSS register field id */
33enum dss_feat_reg_field {
34 FEAT_REG_FIRHINC,
35 FEAT_REG_FIRVINC,
36 FEAT_REG_FIFOHIGHTHRESHOLD,
37 FEAT_REG_FIFOLOWTHRESHOLD,
38 FEAT_REG_FIFOSIZE,
39};
40
41/* DSS Feature Functions */
42int dss_feat_get_num_mgrs(void);
43int dss_feat_get_num_ovls(void);
44enum omap_display_type dss_feat_get_supported_displays(enum omap_channel channel);
45enum omap_color_mode dss_feat_get_supported_color_modes(enum omap_plane plane);
46
47bool dss_has_feature(enum dss_feat_id id);
48void dss_feat_get_reg_field(enum dss_feat_reg_field id, u8 *start, u8 *end);
49void dss_features_init(void);
50#endif
diff --git a/drivers/video/omap2/dss/manager.c b/drivers/video/omap2/dss/manager.c
index 6a649ab5539e..545e9b9a4d92 100644
--- a/drivers/video/omap2/dss/manager.c
+++ b/drivers/video/omap2/dss/manager.c
@@ -33,6 +33,7 @@
33#include <plat/cpu.h> 33#include <plat/cpu.h>
34 34
35#include "dss.h" 35#include "dss.h"
36#include "dss_features.h"
36 37
37static int num_managers; 38static int num_managers;
38static struct list_head manager_list; 39static struct list_head manager_list;
@@ -448,8 +449,8 @@ struct manager_cache_data {
448 449
449static struct { 450static struct {
450 spinlock_t lock; 451 spinlock_t lock;
451 struct overlay_cache_data overlay_cache[3]; 452 struct overlay_cache_data overlay_cache[MAX_DSS_OVERLAYS];
452 struct manager_cache_data manager_cache[2]; 453 struct manager_cache_data manager_cache[MAX_DSS_MANAGERS];
453 454
454 bool irq_enabled; 455 bool irq_enabled;
455} dss_cache; 456} dss_cache;
@@ -882,12 +883,12 @@ static int configure_dispc(void)
882{ 883{
883 struct overlay_cache_data *oc; 884 struct overlay_cache_data *oc;
884 struct manager_cache_data *mc; 885 struct manager_cache_data *mc;
885 const int num_ovls = ARRAY_SIZE(dss_cache.overlay_cache); 886 const int num_ovls = dss_feat_get_num_ovls();
886 const int num_mgrs = ARRAY_SIZE(dss_cache.manager_cache); 887 const int num_mgrs = dss_feat_get_num_mgrs();
887 int i; 888 int i;
888 int r; 889 int r;
889 bool mgr_busy[2]; 890 bool mgr_busy[MAX_DSS_MANAGERS];
890 bool mgr_go[2]; 891 bool mgr_go[MAX_DSS_MANAGERS];
891 bool busy; 892 bool busy;
892 893
893 r = 0; 894 r = 0;
@@ -989,7 +990,7 @@ void dss_setup_partial_planes(struct omap_dss_device *dssdev,
989{ 990{
990 struct overlay_cache_data *oc; 991 struct overlay_cache_data *oc;
991 struct manager_cache_data *mc; 992 struct manager_cache_data *mc;
992 const int num_ovls = ARRAY_SIZE(dss_cache.overlay_cache); 993 const int num_ovls = dss_feat_get_num_ovls();
993 struct omap_overlay_manager *mgr; 994 struct omap_overlay_manager *mgr;
994 int i; 995 int i;
995 u16 x, y, w, h; 996 u16 x, y, w, h;
@@ -1121,8 +1122,8 @@ void dss_start_update(struct omap_dss_device *dssdev)
1121{ 1122{
1122 struct manager_cache_data *mc; 1123 struct manager_cache_data *mc;
1123 struct overlay_cache_data *oc; 1124 struct overlay_cache_data *oc;
1124 const int num_ovls = ARRAY_SIZE(dss_cache.overlay_cache); 1125 const int num_ovls = dss_feat_get_num_ovls();
1125 const int num_mgrs = ARRAY_SIZE(dss_cache.manager_cache); 1126 const int num_mgrs = dss_feat_get_num_mgrs();
1126 struct omap_overlay_manager *mgr; 1127 struct omap_overlay_manager *mgr;
1127 int i; 1128 int i;
1128 1129
@@ -1151,10 +1152,10 @@ static void dss_apply_irq_handler(void *data, u32 mask)
1151{ 1152{
1152 struct manager_cache_data *mc; 1153 struct manager_cache_data *mc;
1153 struct overlay_cache_data *oc; 1154 struct overlay_cache_data *oc;
1154 const int num_ovls = ARRAY_SIZE(dss_cache.overlay_cache); 1155 const int num_ovls = dss_feat_get_num_ovls();
1155 const int num_mgrs = ARRAY_SIZE(dss_cache.manager_cache); 1156 const int num_mgrs = dss_feat_get_num_mgrs();
1156 int i, r; 1157 int i, r;
1157 bool mgr_busy[2]; 1158 bool mgr_busy[MAX_DSS_MANAGERS];
1158 1159
1159 mgr_busy[0] = dispc_go_busy(0); 1160 mgr_busy[0] = dispc_go_busy(0);
1160 mgr_busy[1] = dispc_go_busy(1); 1161 mgr_busy[1] = dispc_go_busy(1);
@@ -1461,7 +1462,7 @@ int dss_init_overlay_managers(struct platform_device *pdev)
1461 1462
1462 num_managers = 0; 1463 num_managers = 0;
1463 1464
1464 for (i = 0; i < 2; ++i) { 1465 for (i = 0; i < dss_feat_get_num_mgrs(); ++i) {
1465 struct omap_overlay_manager *mgr; 1466 struct omap_overlay_manager *mgr;
1466 mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); 1467 mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
1467 1468
@@ -1471,14 +1472,10 @@ int dss_init_overlay_managers(struct platform_device *pdev)
1471 case 0: 1472 case 0:
1472 mgr->name = "lcd"; 1473 mgr->name = "lcd";
1473 mgr->id = OMAP_DSS_CHANNEL_LCD; 1474 mgr->id = OMAP_DSS_CHANNEL_LCD;
1474 mgr->supported_displays =
1475 OMAP_DISPLAY_TYPE_DPI | OMAP_DISPLAY_TYPE_DBI |
1476 OMAP_DISPLAY_TYPE_SDI | OMAP_DISPLAY_TYPE_DSI;
1477 break; 1475 break;
1478 case 1: 1476 case 1:
1479 mgr->name = "tv"; 1477 mgr->name = "tv";
1480 mgr->id = OMAP_DSS_CHANNEL_DIGIT; 1478 mgr->id = OMAP_DSS_CHANNEL_DIGIT;
1481 mgr->supported_displays = OMAP_DISPLAY_TYPE_VENC;
1482 break; 1479 break;
1483 } 1480 }
1484 1481
@@ -1494,6 +1491,8 @@ int dss_init_overlay_managers(struct platform_device *pdev)
1494 mgr->disable = &dss_mgr_disable; 1491 mgr->disable = &dss_mgr_disable;
1495 1492
1496 mgr->caps = OMAP_DSS_OVL_MGR_CAP_DISPC; 1493 mgr->caps = OMAP_DSS_OVL_MGR_CAP_DISPC;
1494 mgr->supported_displays =
1495 dss_feat_get_supported_displays(mgr->id);
1497 1496
1498 dss_overlay_setup_dispc_manager(mgr); 1497 dss_overlay_setup_dispc_manager(mgr);
1499 1498
diff --git a/drivers/video/omap2/dss/overlay.c b/drivers/video/omap2/dss/overlay.c
index 244dca81a399..75642c22cac7 100644
--- a/drivers/video/omap2/dss/overlay.c
+++ b/drivers/video/omap2/dss/overlay.c
@@ -35,6 +35,7 @@
35#include <plat/cpu.h> 35#include <plat/cpu.h>
36 36
37#include "dss.h" 37#include "dss.h"
38#include "dss_features.h"
38 39
39static int num_overlays; 40static int num_overlays;
40static struct list_head overlay_list; 41static struct list_head overlay_list;
@@ -237,7 +238,8 @@ static ssize_t overlay_global_alpha_store(struct omap_overlay *ovl,
237 /* Video1 plane does not support global alpha 238 /* Video1 plane does not support global alpha
238 * to always make it 255 completely opaque 239 * to always make it 255 completely opaque
239 */ 240 */
240 if (ovl->id == OMAP_DSS_VIDEO1) 241 if (!dss_has_feature(FEAT_GLOBAL_ALPHA_VID1) &&
242 ovl->id == OMAP_DSS_VIDEO1)
241 info.global_alpha = 255; 243 info.global_alpha = 255;
242 else 244 else
243 info.global_alpha = simple_strtoul(buf, NULL, 10); 245 info.global_alpha = simple_strtoul(buf, NULL, 10);
@@ -510,11 +512,11 @@ static void omap_dss_add_overlay(struct omap_overlay *overlay)
510 list_add_tail(&overlay->list, &overlay_list); 512 list_add_tail(&overlay->list, &overlay_list);
511} 513}
512 514
513static struct omap_overlay *dispc_overlays[3]; 515static struct omap_overlay *dispc_overlays[MAX_DSS_OVERLAYS];
514 516
515void dss_overlay_setup_dispc_manager(struct omap_overlay_manager *mgr) 517void dss_overlay_setup_dispc_manager(struct omap_overlay_manager *mgr)
516{ 518{
517 mgr->num_overlays = 3; 519 mgr->num_overlays = dss_feat_get_num_ovls();
518 mgr->overlays = dispc_overlays; 520 mgr->overlays = dispc_overlays;
519} 521}
520 522
@@ -535,7 +537,7 @@ void dss_init_overlays(struct platform_device *pdev)
535 537
536 num_overlays = 0; 538 num_overlays = 0;
537 539
538 for (i = 0; i < 3; ++i) { 540 for (i = 0; i < dss_feat_get_num_ovls(); ++i) {
539 struct omap_overlay *ovl; 541 struct omap_overlay *ovl;
540 ovl = kzalloc(sizeof(*ovl), GFP_KERNEL); 542 ovl = kzalloc(sizeof(*ovl), GFP_KERNEL);
541 543
@@ -545,18 +547,12 @@ void dss_init_overlays(struct platform_device *pdev)
545 case 0: 547 case 0:
546 ovl->name = "gfx"; 548 ovl->name = "gfx";
547 ovl->id = OMAP_DSS_GFX; 549 ovl->id = OMAP_DSS_GFX;
548 ovl->supported_modes = cpu_is_omap34xx() ?
549 OMAP_DSS_COLOR_GFX_OMAP3 :
550 OMAP_DSS_COLOR_GFX_OMAP2;
551 ovl->caps = OMAP_DSS_OVL_CAP_DISPC; 550 ovl->caps = OMAP_DSS_OVL_CAP_DISPC;
552 ovl->info.global_alpha = 255; 551 ovl->info.global_alpha = 255;
553 break; 552 break;
554 case 1: 553 case 1:
555 ovl->name = "vid1"; 554 ovl->name = "vid1";
556 ovl->id = OMAP_DSS_VIDEO1; 555 ovl->id = OMAP_DSS_VIDEO1;
557 ovl->supported_modes = cpu_is_omap34xx() ?
558 OMAP_DSS_COLOR_VID1_OMAP3 :
559 OMAP_DSS_COLOR_VID_OMAP2;
560 ovl->caps = OMAP_DSS_OVL_CAP_SCALE | 556 ovl->caps = OMAP_DSS_OVL_CAP_SCALE |
561 OMAP_DSS_OVL_CAP_DISPC; 557 OMAP_DSS_OVL_CAP_DISPC;
562 ovl->info.global_alpha = 255; 558 ovl->info.global_alpha = 255;
@@ -564,9 +560,6 @@ void dss_init_overlays(struct platform_device *pdev)
564 case 2: 560 case 2:
565 ovl->name = "vid2"; 561 ovl->name = "vid2";
566 ovl->id = OMAP_DSS_VIDEO2; 562 ovl->id = OMAP_DSS_VIDEO2;
567 ovl->supported_modes = cpu_is_omap34xx() ?
568 OMAP_DSS_COLOR_VID2_OMAP3 :
569 OMAP_DSS_COLOR_VID_OMAP2;
570 ovl->caps = OMAP_DSS_OVL_CAP_SCALE | 563 ovl->caps = OMAP_DSS_OVL_CAP_SCALE |
571 OMAP_DSS_OVL_CAP_DISPC; 564 OMAP_DSS_OVL_CAP_DISPC;
572 ovl->info.global_alpha = 255; 565 ovl->info.global_alpha = 255;
@@ -579,6 +572,9 @@ void dss_init_overlays(struct platform_device *pdev)
579 ovl->get_overlay_info = &dss_ovl_get_overlay_info; 572 ovl->get_overlay_info = &dss_ovl_get_overlay_info;
580 ovl->wait_for_go = &dss_ovl_wait_for_go; 573 ovl->wait_for_go = &dss_ovl_wait_for_go;
581 574
575 ovl->supported_modes =
576 dss_feat_get_supported_color_modes(ovl->id);
577
582 omap_dss_add_overlay(ovl); 578 omap_dss_add_overlay(ovl);
583 579
584 r = kobject_init_and_add(&ovl->kobj, &overlay_ktype, 580 r = kobject_init_and_add(&ovl->kobj, &overlay_ktype,
@@ -651,7 +647,7 @@ void dss_recheck_connections(struct omap_dss_device *dssdev, bool force)
651 } 647 }
652 648
653 if (mgr) { 649 if (mgr) {
654 for (i = 0; i < 3; i++) { 650 for (i = 0; i < dss_feat_get_num_ovls(); i++) {
655 struct omap_overlay *ovl; 651 struct omap_overlay *ovl;
656 ovl = omap_dss_get_overlay(i); 652 ovl = omap_dss_get_overlay(i);
657 if (!ovl->manager || force) { 653 if (!ovl->manager || force) {
diff --git a/drivers/video/omap2/omapfb/Kconfig b/drivers/video/omap2/omapfb/Kconfig
index 43496d6c377f..65149b22cf37 100644
--- a/drivers/video/omap2/omapfb/Kconfig
+++ b/drivers/video/omap2/omapfb/Kconfig
@@ -3,7 +3,7 @@ menuconfig FB_OMAP2
3 depends on FB && OMAP2_DSS 3 depends on FB && OMAP2_DSS
4 4
5 select OMAP2_VRAM 5 select OMAP2_VRAM
6 select OMAP2_VRFB 6 select OMAP2_VRFB if ARCH_OMAP2 || ARCH_OMAP3
7 select FB_CFB_FILLRECT 7 select FB_CFB_FILLRECT
8 select FB_CFB_COPYAREA 8 select FB_CFB_COPYAREA
9 select FB_CFB_IMAGEBLIT 9 select FB_CFB_IMAGEBLIT
diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c
index 04034d410d6d..6a704f176c22 100644
--- a/drivers/video/omap2/omapfb/omapfb-main.c
+++ b/drivers/video/omap2/omapfb/omapfb-main.c
@@ -714,10 +714,10 @@ int check_fb_var(struct fb_info *fbi, struct fb_var_screeninfo *var)
714 var->pixclock = timings.pixel_clock != 0 ? 714 var->pixclock = timings.pixel_clock != 0 ?
715 KHZ2PICOS(timings.pixel_clock) : 715 KHZ2PICOS(timings.pixel_clock) :
716 0; 716 0;
717 var->left_margin = timings.hfp; 717 var->left_margin = timings.hbp;
718 var->right_margin = timings.hbp; 718 var->right_margin = timings.hfp;
719 var->upper_margin = timings.vfp; 719 var->upper_margin = timings.vbp;
720 var->lower_margin = timings.vbp; 720 var->lower_margin = timings.vfp;
721 var->hsync_len = timings.hsw; 721 var->hsync_len = timings.hsw;
722 var->vsync_len = timings.vsw; 722 var->vsync_len = timings.vsw;
723 } else { 723 } else {
@@ -2059,10 +2059,10 @@ static int omapfb_mode_to_timings(const char *mode_str,
2059 2059
2060 if (r != 0) { 2060 if (r != 0) {
2061 timings->pixel_clock = PICOS2KHZ(var.pixclock); 2061 timings->pixel_clock = PICOS2KHZ(var.pixclock);
2062 timings->hfp = var.left_margin; 2062 timings->hbp = var.left_margin;
2063 timings->hbp = var.right_margin; 2063 timings->hfp = var.right_margin;
2064 timings->vfp = var.upper_margin; 2064 timings->vbp = var.upper_margin;
2065 timings->vbp = var.lower_margin; 2065 timings->vfp = var.lower_margin;
2066 timings->hsw = var.hsync_len; 2066 timings->hsw = var.hsync_len;
2067 timings->vsw = var.vsync_len; 2067 timings->vsw = var.vsync_len;
2068 timings->x_res = var.xres; 2068 timings->x_res = var.xres;
@@ -2198,6 +2198,16 @@ static int omapfb_probe(struct platform_device *pdev)
2198 goto err0; 2198 goto err0;
2199 } 2199 }
2200 2200
2201 /* TODO : Replace cpu check with omap_has_vrfb once HAS_FEATURE
2202 * available for OMAP2 and OMAP3
2203 */
2204 if (def_vrfb && !cpu_is_omap24xx() && !cpu_is_omap34xx()) {
2205 def_vrfb = 0;
2206 dev_warn(&pdev->dev, "VRFB is not supported on this hardware, "
2207 "ignoring the module parameter vrfb=y\n");
2208 }
2209
2210
2201 mutex_init(&fbdev->mtx); 2211 mutex_init(&fbdev->mtx);
2202 2212
2203 fbdev->dev = &pdev->dev; 2213 fbdev->dev = &pdev->dev;
diff --git a/fs/Makefile b/fs/Makefile
index e6ec1d309b1d..26956fcec917 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -29,10 +29,7 @@ obj-$(CONFIG_EVENTFD) += eventfd.o
29obj-$(CONFIG_AIO) += aio.o 29obj-$(CONFIG_AIO) += aio.o
30obj-$(CONFIG_FILE_LOCKING) += locks.o 30obj-$(CONFIG_FILE_LOCKING) += locks.o
31obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o 31obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
32 32obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o
33nfsd-$(CONFIG_NFSD) := nfsctl.o
34obj-y += $(nfsd-y) $(nfsd-m)
35
36obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o 33obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
37obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o 34obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
38obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o 35obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
diff --git a/fs/compat.c b/fs/compat.c
index 0644a154672b..f03abdadc401 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1963,7 +1963,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1963} 1963}
1964#endif /* HAVE_SET_RESTORE_SIGMASK */ 1964#endif /* HAVE_SET_RESTORE_SIGMASK */
1965 1965
1966#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) 1966#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED)
1967/* Stuff for NFS server syscalls... */ 1967/* Stuff for NFS server syscalls... */
1968struct compat_nfsctl_svc { 1968struct compat_nfsctl_svc {
1969 u16 svc32_port; 1969 u16 svc32_port;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index bb464d12104c..25e21e4023b2 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -353,6 +353,7 @@ nlm_bind_host(struct nlm_host *host)
353 .to_retries = 5U, 353 .to_retries = 5U,
354 }; 354 };
355 struct rpc_create_args args = { 355 struct rpc_create_args args = {
356 .net = &init_net,
356 .protocol = host->h_proto, 357 .protocol = host->h_proto,
357 .address = nlm_addr(host), 358 .address = nlm_addr(host),
358 .addrsize = host->h_addrlen, 359 .addrsize = host->h_addrlen,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e3015464fbab..e0c918949644 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -69,6 +69,7 @@ static struct rpc_clnt *nsm_create(void)
69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK), 69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
70 }; 70 };
71 struct rpc_create_args args = { 71 struct rpc_create_args args = {
72 .net = &init_net,
72 .protocol = XPRT_TRANSPORT_UDP, 73 .protocol = XPRT_TRANSPORT_UDP,
73 .address = (struct sockaddr *)&sin, 74 .address = (struct sockaddr *)&sin,
74 .addrsize = sizeof(sin), 75 .addrsize = sizeof(sin),
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f1bacf1a0391..b13aabc12298 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -206,7 +206,7 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name,
206 206
207 xprt = svc_find_xprt(serv, name, family, 0); 207 xprt = svc_find_xprt(serv, name, family, 0);
208 if (xprt == NULL) 208 if (xprt == NULL)
209 return svc_create_xprt(serv, name, family, port, 209 return svc_create_xprt(serv, name, &init_net, family, port,
210 SVC_SOCK_DEFAULTS); 210 SVC_SOCK_DEFAULTS);
211 svc_xprt_put(xprt); 211 svc_xprt_put(xprt);
212 return 0; 212 return 0;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 031c6569a134..a336e832475d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -230,9 +230,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
230 230
231static void nlm4svc_callback_release(void *data) 231static void nlm4svc_callback_release(void *data)
232{ 232{
233 lock_kernel();
234 nlm_release_call(data); 233 nlm_release_call(data);
235 unlock_kernel();
236} 234}
237 235
238static const struct rpc_call_ops nlm4svc_callback_ops = { 236static const struct rpc_call_ops nlm4svc_callback_ops = {
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 84055d31bfc5..6f1ef000975a 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -52,12 +52,13 @@ static const struct rpc_call_ops nlmsvc_grant_ops;
52 * The list of blocked locks to retry 52 * The list of blocked locks to retry
53 */ 53 */
54static LIST_HEAD(nlm_blocked); 54static LIST_HEAD(nlm_blocked);
55static DEFINE_SPINLOCK(nlm_blocked_lock);
55 56
56/* 57/*
57 * Insert a blocked lock into the global list 58 * Insert a blocked lock into the global list
58 */ 59 */
59static void 60static void
60nlmsvc_insert_block(struct nlm_block *block, unsigned long when) 61nlmsvc_insert_block_locked(struct nlm_block *block, unsigned long when)
61{ 62{
62 struct nlm_block *b; 63 struct nlm_block *b;
63 struct list_head *pos; 64 struct list_head *pos;
@@ -87,6 +88,13 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
87 block->b_when = when; 88 block->b_when = when;
88} 89}
89 90
91static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
92{
93 spin_lock(&nlm_blocked_lock);
94 nlmsvc_insert_block_locked(block, when);
95 spin_unlock(&nlm_blocked_lock);
96}
97
90/* 98/*
91 * Remove a block from the global list 99 * Remove a block from the global list
92 */ 100 */
@@ -94,7 +102,9 @@ static inline void
94nlmsvc_remove_block(struct nlm_block *block) 102nlmsvc_remove_block(struct nlm_block *block)
95{ 103{
96 if (!list_empty(&block->b_list)) { 104 if (!list_empty(&block->b_list)) {
105 spin_lock(&nlm_blocked_lock);
97 list_del_init(&block->b_list); 106 list_del_init(&block->b_list);
107 spin_unlock(&nlm_blocked_lock);
98 nlmsvc_release_block(block); 108 nlmsvc_release_block(block);
99 } 109 }
100} 110}
@@ -651,7 +661,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
651 struct nlm_block *block; 661 struct nlm_block *block;
652 int rc = -ENOENT; 662 int rc = -ENOENT;
653 663
654 lock_kernel(); 664 spin_lock(&nlm_blocked_lock);
655 list_for_each_entry(block, &nlm_blocked, b_list) { 665 list_for_each_entry(block, &nlm_blocked, b_list) {
656 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { 666 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
657 dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n", 667 dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
@@ -665,13 +675,13 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
665 } else if (result == 0) 675 } else if (result == 0)
666 block->b_granted = 1; 676 block->b_granted = 1;
667 677
668 nlmsvc_insert_block(block, 0); 678 nlmsvc_insert_block_locked(block, 0);
669 svc_wake_up(block->b_daemon); 679 svc_wake_up(block->b_daemon);
670 rc = 0; 680 rc = 0;
671 break; 681 break;
672 } 682 }
673 } 683 }
674 unlock_kernel(); 684 spin_unlock(&nlm_blocked_lock);
675 if (rc == -ENOENT) 685 if (rc == -ENOENT)
676 printk(KERN_WARNING "lockd: grant for unknown block\n"); 686 printk(KERN_WARNING "lockd: grant for unknown block\n");
677 return rc; 687 return rc;
@@ -803,7 +813,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
803 813
804 dprintk("lockd: GRANT_MSG RPC callback\n"); 814 dprintk("lockd: GRANT_MSG RPC callback\n");
805 815
806 lock_kernel(); 816 spin_lock(&nlm_blocked_lock);
807 /* if the block is not on a list at this point then it has 817 /* if the block is not on a list at this point then it has
808 * been invalidated. Don't try to requeue it. 818 * been invalidated. Don't try to requeue it.
809 * 819 *
@@ -825,19 +835,20 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
825 /* Call was successful, now wait for client callback */ 835 /* Call was successful, now wait for client callback */
826 timeout = 60 * HZ; 836 timeout = 60 * HZ;
827 } 837 }
828 nlmsvc_insert_block(block, timeout); 838 nlmsvc_insert_block_locked(block, timeout);
829 svc_wake_up(block->b_daemon); 839 svc_wake_up(block->b_daemon);
830out: 840out:
831 unlock_kernel(); 841 spin_unlock(&nlm_blocked_lock);
832} 842}
833 843
844/*
845 * FIXME: nlmsvc_release_block() grabs a mutex. This is not allowed for an
846 * .rpc_release rpc_call_op
847 */
834static void nlmsvc_grant_release(void *data) 848static void nlmsvc_grant_release(void *data)
835{ 849{
836 struct nlm_rqst *call = data; 850 struct nlm_rqst *call = data;
837
838 lock_kernel();
839 nlmsvc_release_block(call->a_block); 851 nlmsvc_release_block(call->a_block);
840 unlock_kernel();
841} 852}
842 853
843static const struct rpc_call_ops nlmsvc_grant_ops = { 854static const struct rpc_call_ops nlmsvc_grant_ops = {
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 0f2ab741ae7c..c3069f38d602 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -260,9 +260,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
260 260
261static void nlmsvc_callback_release(void *data) 261static void nlmsvc_callback_release(void *data)
262{ 262{
263 lock_kernel();
264 nlm_release_call(data); 263 nlm_release_call(data);
265 unlock_kernel();
266} 264}
267 265
268static const struct rpc_call_ops nlmsvc_callback_ops = { 266static const struct rpc_call_ops nlmsvc_callback_ops = {
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 5c55c26af165..fd667652c502 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -77,13 +77,17 @@ config NFS_V4
77 77
78config NFS_V4_1 78config NFS_V4_1
79 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" 79 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
80 depends on NFS_V4 && EXPERIMENTAL 80 depends on NFS_FS && NFS_V4 && EXPERIMENTAL
81 select PNFS_FILE_LAYOUT
81 help 82 help
82 This option enables support for minor version 1 of the NFSv4 protocol 83 This option enables support for minor version 1 of the NFSv4 protocol
83 (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. 84 (RFC 5661) in the kernel's NFS client.
84 85
85 If unsure, say N. 86 If unsure, say N.
86 87
88config PNFS_FILE_LAYOUT
89 tristate
90
87config ROOT_NFS 91config ROOT_NFS
88 bool "Root file system on NFS" 92 bool "Root file system on NFS"
89 depends on NFS_FS=y && IP_PNP 93 depends on NFS_FS=y && IP_PNP
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index da7fda639eac..4776ff9e3814 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -15,5 +15,9 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
15 delegation.o idmap.o \ 15 delegation.o idmap.o \
16 callback.o callback_xdr.o callback_proc.o \ 16 callback.o callback_xdr.o callback_proc.o \
17 nfs4namespace.o 17 nfs4namespace.o
18nfs-$(CONFIG_NFS_V4_1) += pnfs.o
18nfs-$(CONFIG_SYSCTL) += sysctl.o 19nfs-$(CONFIG_SYSCTL) += sysctl.o
19nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o 20nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
21
22obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
23nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index e17b49e2eabd..aeec017fe814 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -109,7 +109,7 @@ nfs4_callback_up(struct svc_serv *serv)
109{ 109{
110 int ret; 110 int ret;
111 111
112 ret = svc_create_xprt(serv, "tcp", PF_INET, 112 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
113 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 113 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
114 if (ret <= 0) 114 if (ret <= 0)
115 goto out_err; 115 goto out_err;
@@ -117,7 +117,7 @@ nfs4_callback_up(struct svc_serv *serv)
117 dprintk("NFS: Callback listener port = %u (af %u)\n", 117 dprintk("NFS: Callback listener port = %u (af %u)\n",
118 nfs_callback_tcpport, PF_INET); 118 nfs_callback_tcpport, PF_INET);
119 119
120 ret = svc_create_xprt(serv, "tcp", PF_INET6, 120 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
121 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 121 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
122 if (ret > 0) { 122 if (ret > 0) {
123 nfs_callback_tcpport6 = ret; 123 nfs_callback_tcpport6 = ret;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 930d10fecdaf..2950fca0c61b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -118,11 +118,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n
118 if (delegation == NULL) 118 if (delegation == NULL)
119 return 0; 119 return 0;
120 120
121 /* seqid is 4-bytes long */ 121 if (stateid->stateid.seqid != 0)
122 if (((u32 *) &stateid->data)[0] != 0)
123 return 0; 122 return 0;
124 if (memcmp(&delegation->stateid.data[4], &stateid->data[4], 123 if (memcmp(&delegation->stateid.stateid.other,
125 sizeof(stateid->data)-4)) 124 &stateid->stateid.other,
125 NFS4_STATEID_OTHER_SIZE))
126 return 0; 126 return 0;
127 127
128 return 1; 128 return 1;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a882785eba41..0870d0d4efc0 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -48,6 +48,7 @@
48#include "iostat.h" 48#include "iostat.h"
49#include "internal.h" 49#include "internal.h"
50#include "fscache.h" 50#include "fscache.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_CLIENT 53#define NFSDBG_FACILITY NFSDBG_CLIENT
53 54
@@ -155,7 +156,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
155 cred = rpc_lookup_machine_cred(); 156 cred = rpc_lookup_machine_cred();
156 if (!IS_ERR(cred)) 157 if (!IS_ERR(cred))
157 clp->cl_machine_cred = cred; 158 clp->cl_machine_cred = cred;
158 159#if defined(CONFIG_NFS_V4_1)
160 INIT_LIST_HEAD(&clp->cl_layouts);
161#endif
159 nfs_fscache_get_client_cookie(clp); 162 nfs_fscache_get_client_cookie(clp);
160 163
161 return clp; 164 return clp;
@@ -252,6 +255,7 @@ void nfs_put_client(struct nfs_client *clp)
252 nfs_free_client(clp); 255 nfs_free_client(clp);
253 } 256 }
254} 257}
258EXPORT_SYMBOL_GPL(nfs_put_client);
255 259
256#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 260#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
257/* 261/*
@@ -601,6 +605,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
601{ 605{
602 struct rpc_clnt *clnt = NULL; 606 struct rpc_clnt *clnt = NULL;
603 struct rpc_create_args args = { 607 struct rpc_create_args args = {
608 .net = &init_net,
604 .protocol = clp->cl_proto, 609 .protocol = clp->cl_proto,
605 .address = (struct sockaddr *)&clp->cl_addr, 610 .address = (struct sockaddr *)&clp->cl_addr,
606 .addrsize = clp->cl_addrlen, 611 .addrsize = clp->cl_addrlen,
@@ -900,6 +905,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
900 if (server->wsize > NFS_MAX_FILE_IO_SIZE) 905 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
901 server->wsize = NFS_MAX_FILE_IO_SIZE; 906 server->wsize = NFS_MAX_FILE_IO_SIZE;
902 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 907 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
908 set_pnfs_layoutdriver(server, fsinfo->layouttype);
909
903 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); 910 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
904 911
905 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); 912 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
@@ -939,6 +946,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
939 } 946 }
940 947
941 fsinfo.fattr = fattr; 948 fsinfo.fattr = fattr;
949 fsinfo.layouttype = 0;
942 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); 950 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
943 if (error < 0) 951 if (error < 0)
944 goto out_error; 952 goto out_error;
@@ -1021,6 +1029,7 @@ void nfs_free_server(struct nfs_server *server)
1021{ 1029{
1022 dprintk("--> nfs_free_server()\n"); 1030 dprintk("--> nfs_free_server()\n");
1023 1031
1032 unset_pnfs_layoutdriver(server);
1024 spin_lock(&nfs_client_lock); 1033 spin_lock(&nfs_client_lock);
1025 list_del(&server->client_link); 1034 list_del(&server->client_link);
1026 list_del(&server->master_link); 1035 list_del(&server->master_link);
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index dba50a5625db..a6e711ad130f 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
167 return 0; 167 return 0;
168 } 168 }
169 item = container_of(h, struct nfs_dns_ent, h); 169 item = container_of(h, struct nfs_dns_ent, h);
170 ttl = (long)item->h.expiry_time - (long)get_seconds(); 170 ttl = item->h.expiry_time - seconds_since_boot();
171 if (ttl < 0) 171 if (ttl < 0)
172 ttl = 0; 172 ttl = 0;
173 173
@@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
239 ttl = get_expiry(&buf); 239 ttl = get_expiry(&buf);
240 if (ttl == 0) 240 if (ttl == 0)
241 goto out; 241 goto out;
242 key.h.expiry_time = ttl + get_seconds(); 242 key.h.expiry_time = ttl + seconds_since_boot();
243 243
244 ret = -ENOMEM; 244 ret = -ENOMEM;
245 item = nfs_dns_lookup(cd, &key); 245 item = nfs_dns_lookup(cd, &key);
@@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd,
301 goto out_err; 301 goto out_err;
302 ret = -ETIMEDOUT; 302 ret = -ETIMEDOUT;
303 if (!test_bit(CACHE_VALID, &(*item)->h.flags) 303 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
304 || (*item)->h.expiry_time < get_seconds() 304 || (*item)->h.expiry_time < seconds_since_boot()
305 || cd->flush_time > (*item)->h.last_refresh) 305 || cd->flush_time > (*item)->h.last_refresh)
306 goto out_put; 306 goto out_put;
307 ret = -ENOENT; 307 ret = -ENOENT;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e18c31e08a28..e756075637b0 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -36,6 +36,7 @@
36#include "internal.h" 36#include "internal.h"
37#include "iostat.h" 37#include "iostat.h"
38#include "fscache.h" 38#include "fscache.h"
39#include "pnfs.h"
39 40
40#define NFSDBG_FACILITY NFSDBG_FILE 41#define NFSDBG_FACILITY NFSDBG_FILE
41 42
@@ -386,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
386 file->f_path.dentry->d_name.name, 387 file->f_path.dentry->d_name.name,
387 mapping->host->i_ino, len, (long long) pos); 388 mapping->host->i_ino, len, (long long) pos);
388 389
390 pnfs_update_layout(mapping->host,
391 nfs_file_open_context(file),
392 IOMODE_RW);
393
389start: 394start:
390 /* 395 /*
391 * Prevent starvation issues if someone is doing a consistency 396 * Prevent starvation issues if someone is doing a consistency
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6eec28656415..314f57164602 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -48,6 +48,7 @@
48#include "internal.h" 48#include "internal.h"
49#include "fscache.h" 49#include "fscache.h"
50#include "dns_resolve.h" 50#include "dns_resolve.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_VFS 53#define NFSDBG_FACILITY NFSDBG_VFS
53 54
@@ -1410,6 +1411,7 @@ void nfs4_evict_inode(struct inode *inode)
1410{ 1411{
1411 truncate_inode_pages(&inode->i_data, 0); 1412 truncate_inode_pages(&inode->i_data, 0);
1412 end_writeback(inode); 1413 end_writeback(inode);
1414 pnfs_destroy_layout(NFS_I(inode));
1413 /* If we are holding a delegation, return it! */ 1415 /* If we are holding a delegation, return it! */
1414 nfs_inode_return_delegation_noreclaim(inode); 1416 nfs_inode_return_delegation_noreclaim(inode);
1415 /* First call standard NFS clear_inode() code */ 1417 /* First call standard NFS clear_inode() code */
@@ -1447,6 +1449,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1447 nfsi->delegation = NULL; 1449 nfsi->delegation = NULL;
1448 nfsi->delegation_state = 0; 1450 nfsi->delegation_state = 0;
1449 init_rwsem(&nfsi->rwsem); 1451 init_rwsem(&nfsi->rwsem);
1452 nfsi->layout = NULL;
1450#endif 1453#endif
1451} 1454}
1452 1455
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d610203d95c6..eceafe74f473 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
153 .rpc_resp = &result, 153 .rpc_resp = &result,
154 }; 154 };
155 struct rpc_create_args args = { 155 struct rpc_create_args args = {
156 .net = &init_net,
156 .protocol = info->protocol, 157 .protocol = info->protocol,
157 .address = info->sap, 158 .address = info->sap,
158 .addrsize = info->salen, 159 .addrsize = info->salen,
@@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
224 .to_retries = 2, 225 .to_retries = 2,
225 }; 226 };
226 struct rpc_create_args args = { 227 struct rpc_create_args args = {
228 .net = &init_net,
227 .protocol = IPPROTO_UDP, 229 .protocol = IPPROTO_UDP,
228 .address = info->sap, 230 .address = info->sap,
229 .addrsize = info->salen, 231 .addrsize = info->salen,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
new file mode 100644
index 000000000000..2e92f0d8d654
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.c
@@ -0,0 +1,280 @@
1/*
2 * Module for the pnfs nfs4 file layout driver.
3 * Defines all I/O and Policy interface operations, plus code
4 * to register itself with the pNFS client.
5 *
6 * Copyright (c) 2002
7 * The Regents of the University of Michigan
8 * All Rights Reserved
9 *
10 * Dean Hildebrand <dhildebz@umich.edu>
11 *
12 * Permission is granted to use, copy, create derivative works, and
13 * redistribute this software and such derivative works for any purpose,
14 * so long as the name of the University of Michigan is not used in
15 * any advertising or publicity pertaining to the use or distribution
16 * of this software without specific, written prior authorization. If
17 * the above copyright notice or any other identification of the
18 * University of Michigan is included in any copy of any portion of
19 * this software, then the disclaimer below must also be included.
20 *
21 * This software is provided as is, without representation or warranty
22 * of any kind either express or implied, including without limitation
23 * the implied warranties of merchantability, fitness for a particular
24 * purpose, or noninfringement. The Regents of the University of
25 * Michigan shall not be liable for any damages, including special,
26 * indirect, incidental, or consequential damages, with respect to any
27 * claim arising out of or in connection with the use of the software,
28 * even if it has been or is hereafter advised of the possibility of
29 * such damages.
30 */
31
32#include <linux/nfs_fs.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
41MODULE_DESCRIPTION("The NFSv4 file layout driver");
42
43static int
44filelayout_set_layoutdriver(struct nfs_server *nfss)
45{
46 int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client,
47 nfs4_fl_free_deviceid_callback);
48 if (status) {
49 printk(KERN_WARNING "%s: deviceid cache could not be "
50 "initialized\n", __func__);
51 return status;
52 }
53 dprintk("%s: deviceid cache has been initialized successfully\n",
54 __func__);
55 return 0;
56}
57
58/* Clear out the layout by destroying its device list */
59static int
60filelayout_clear_layoutdriver(struct nfs_server *nfss)
61{
62 dprintk("--> %s\n", __func__);
63
64 if (nfss->nfs_client->cl_devid_cache)
65 pnfs_put_deviceid_cache(nfss->nfs_client);
66 return 0;
67}
68
69/*
70 * filelayout_check_layout()
71 *
72 * Make sure layout segment parameters are sane WRT the device.
73 * At this point no generic layer initialization of the lseg has occurred,
74 * and nothing has been added to the layout_hdr cache.
75 *
76 */
77static int
78filelayout_check_layout(struct pnfs_layout_hdr *lo,
79 struct nfs4_filelayout_segment *fl,
80 struct nfs4_layoutget_res *lgr,
81 struct nfs4_deviceid *id)
82{
83 struct nfs4_file_layout_dsaddr *dsaddr;
84 int status = -EINVAL;
85 struct nfs_server *nfss = NFS_SERVER(lo->inode);
86
87 dprintk("--> %s\n", __func__);
88
89 if (fl->pattern_offset > lgr->range.offset) {
90 dprintk("%s pattern_offset %lld to large\n",
91 __func__, fl->pattern_offset);
92 goto out;
93 }
94
95 if (fl->stripe_unit % PAGE_SIZE) {
96 dprintk("%s Stripe unit (%u) not page aligned\n",
97 __func__, fl->stripe_unit);
98 goto out;
99 }
100
101 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
103 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->inode, id);
105 if (dsaddr == NULL)
106 goto out;
107 }
108 fl->dsaddr = dsaddr;
109
110 if (fl->first_stripe_index < 0 ||
111 fl->first_stripe_index >= dsaddr->stripe_count) {
112 dprintk("%s Bad first_stripe_index %d\n",
113 __func__, fl->first_stripe_index);
114 goto out_put;
115 }
116
117 if ((fl->stripe_type == STRIPE_SPARSE &&
118 fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
119 (fl->stripe_type == STRIPE_DENSE &&
120 fl->num_fh != dsaddr->stripe_count)) {
121 dprintk("%s num_fh %u not valid for given packing\n",
122 __func__, fl->num_fh);
123 goto out_put;
124 }
125
126 if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
127 dprintk("%s Stripe unit (%u) not aligned with rsize %u "
128 "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
129 nfss->wsize);
130 }
131
132 status = 0;
133out:
134 dprintk("--> %s returns %d\n", __func__, status);
135 return status;
136out_put:
137 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid);
138 goto out;
139}
140
141static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
142{
143 int i;
144
145 for (i = 0; i < fl->num_fh; i++) {
146 if (!fl->fh_array[i])
147 break;
148 kfree(fl->fh_array[i]);
149 }
150 kfree(fl->fh_array);
151 fl->fh_array = NULL;
152}
153
154static void
155_filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
156{
157 filelayout_free_fh_array(fl);
158 kfree(fl);
159}
160
161static int
162filelayout_decode_layout(struct pnfs_layout_hdr *flo,
163 struct nfs4_filelayout_segment *fl,
164 struct nfs4_layoutget_res *lgr,
165 struct nfs4_deviceid *id)
166{
167 uint32_t *p = (uint32_t *)lgr->layout.buf;
168 uint32_t nfl_util;
169 int i;
170
171 dprintk("%s: set_layout_map Begin\n", __func__);
172
173 memcpy(id, p, sizeof(*id));
174 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
175 print_deviceid(id);
176
177 nfl_util = be32_to_cpup(p++);
178 if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
179 fl->commit_through_mds = 1;
180 if (nfl_util & NFL4_UFLG_DENSE)
181 fl->stripe_type = STRIPE_DENSE;
182 else
183 fl->stripe_type = STRIPE_SPARSE;
184 fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
185
186 fl->first_stripe_index = be32_to_cpup(p++);
187 p = xdr_decode_hyper(p, &fl->pattern_offset);
188 fl->num_fh = be32_to_cpup(p++);
189
190 dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n",
191 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
192 fl->pattern_offset);
193
194 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
195 GFP_KERNEL);
196 if (!fl->fh_array)
197 return -ENOMEM;
198
199 for (i = 0; i < fl->num_fh; i++) {
200 /* Do we want to use a mempool here? */
201 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
202 if (!fl->fh_array[i]) {
203 filelayout_free_fh_array(fl);
204 return -ENOMEM;
205 }
206 fl->fh_array[i]->size = be32_to_cpup(p++);
207 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
208 printk(KERN_ERR "Too big fh %d received %d\n",
209 i, fl->fh_array[i]->size);
210 filelayout_free_fh_array(fl);
211 return -EIO;
212 }
213 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
214 p += XDR_QUADLEN(fl->fh_array[i]->size);
215 dprintk("DEBUG: %s: fh len %d\n", __func__,
216 fl->fh_array[i]->size);
217 }
218
219 return 0;
220}
221
222static struct pnfs_layout_segment *
223filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
224 struct nfs4_layoutget_res *lgr)
225{
226 struct nfs4_filelayout_segment *fl;
227 int rc;
228 struct nfs4_deviceid id;
229
230 dprintk("--> %s\n", __func__);
231 fl = kzalloc(sizeof(*fl), GFP_KERNEL);
232 if (!fl)
233 return NULL;
234
235 rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
236 if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
237 _filelayout_free_lseg(fl);
238 return NULL;
239 }
240 return &fl->generic_hdr;
241}
242
243static void
244filelayout_free_lseg(struct pnfs_layout_segment *lseg)
245{
246 struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode);
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
248
249 dprintk("--> %s\n", __func__);
250 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache,
251 &fl->dsaddr->deviceid);
252 _filelayout_free_lseg(fl);
253}
254
255static struct pnfs_layoutdriver_type filelayout_type = {
256 .id = LAYOUT_NFSV4_1_FILES,
257 .name = "LAYOUT_NFSV4_1_FILES",
258 .owner = THIS_MODULE,
259 .set_layoutdriver = filelayout_set_layoutdriver,
260 .clear_layoutdriver = filelayout_clear_layoutdriver,
261 .alloc_lseg = filelayout_alloc_lseg,
262 .free_lseg = filelayout_free_lseg,
263};
264
265static int __init nfs4filelayout_init(void)
266{
267 printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
268 __func__);
269 return pnfs_register_layoutdriver(&filelayout_type);
270}
271
272static void __exit nfs4filelayout_exit(void)
273{
274 printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
275 __func__);
276 pnfs_unregister_layoutdriver(&filelayout_type);
277}
278
279module_init(nfs4filelayout_init);
280module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
new file mode 100644
index 000000000000..bbf60dd2ab9d
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.h
@@ -0,0 +1,94 @@
1/*
2 * NFSv4 file layout driver data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_NFS4FILELAYOUT_H
31#define FS_NFS_NFS4FILELAYOUT_H
32
33#include "pnfs.h"
34
35/*
36 * Field testing shows we need to support upto 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256
39 * RFC 5661 multipath_list4 structures.
40 */
41#define NFS4_PNFS_MAX_STRIPE_CNT 4096
42#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
43
44enum stripetype4 {
45 STRIPE_SPARSE = 1,
46 STRIPE_DENSE = 2
47};
48
49/* Individual ip address */
50struct nfs4_pnfs_ds {
51 struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
52 u32 ds_ip_addr;
53 u32 ds_port;
54 struct nfs_client *ds_clp;
55 atomic_t ds_count;
56};
57
58struct nfs4_file_layout_dsaddr {
59 struct pnfs_deviceid_node deviceid;
60 u32 stripe_count;
61 u8 *stripe_indices;
62 u32 ds_num;
63 struct nfs4_pnfs_ds *ds_list[1];
64};
65
66struct nfs4_filelayout_segment {
67 struct pnfs_layout_segment generic_hdr;
68 u32 stripe_type;
69 u32 commit_through_mds;
70 u32 stripe_unit;
71 u32 first_stripe_index;
72 u64 pattern_offset;
73 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
74 unsigned int num_fh;
75 struct nfs_fh **fh_array;
76};
77
78static inline struct nfs4_filelayout_segment *
79FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
80{
81 return container_of(lseg,
82 struct nfs4_filelayout_segment,
83 generic_hdr);
84}
85
86extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *);
87extern void print_ds(struct nfs4_pnfs_ds *ds);
88extern void print_deviceid(struct nfs4_deviceid *dev_id);
89extern struct nfs4_file_layout_dsaddr *
90nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id);
91struct nfs4_file_layout_dsaddr *
92get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
93
94#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
new file mode 100644
index 000000000000..51fe64ace55a
--- /dev/null
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -0,0 +1,448 @@
1/*
2 * Device operations for the pnfs nfs4 file layout driver.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 * Garth Goodson <Garth.Goodson@netapp.com>
10 *
11 * Permission is granted to use, copy, create derivative works, and
12 * redistribute this software and such derivative works for any purpose,
13 * so long as the name of the University of Michigan is not used in
14 * any advertising or publicity pertaining to the use or distribution
15 * of this software without specific, written prior authorization. If
16 * the above copyright notice or any other identification of the
17 * University of Michigan is included in any copy of any portion of
18 * this software, then the disclaimer below must also be included.
19 *
20 * This software is provided as is, without representation or warranty
21 * of any kind either express or implied, including without limitation
22 * the implied warranties of merchantability, fitness for a particular
23 * purpose, or noninfringement. The Regents of the University of
24 * Michigan shall not be liable for any damages, including special,
25 * indirect, incidental, or consequential damages, with respect to any
26 * claim arising out of or in connection with the use of the software,
27 * even if it has been or is hereafter advised of the possibility of
28 * such damages.
29 */
30
31#include <linux/nfs_fs.h>
32#include <linux/vmalloc.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39/*
40 * Data server cache
41 *
42 * Data servers can be mapped to different device ids.
43 * nfs4_pnfs_ds reference counting
44 * - set to 1 on allocation
45 * - incremented when a device id maps a data server already in the cache.
46 * - decremented when deviceid is removed from the cache.
47 */
48DEFINE_SPINLOCK(nfs4_ds_cache_lock);
49static LIST_HEAD(nfs4_data_server_cache);
50
51/* Debug routines */
52void
53print_ds(struct nfs4_pnfs_ds *ds)
54{
55 if (ds == NULL) {
56 printk("%s NULL device\n", __func__);
57 return;
58 }
59 printk(" ip_addr %x port %hu\n"
60 " ref count %d\n"
61 " client %p\n"
62 " cl_exchange_flags %x\n",
63 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
64 atomic_read(&ds->ds_count), ds->ds_clp,
65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
66}
67
68void
69print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
70{
71 int i;
72
73 ifdebug(FACILITY) {
74 printk("%s dsaddr->ds_num %d\n", __func__,
75 dsaddr->ds_num);
76 for (i = 0; i < dsaddr->ds_num; i++)
77 print_ds(dsaddr->ds_list[i]);
78 }
79}
80
81void print_deviceid(struct nfs4_deviceid *id)
82{
83 u32 *p = (u32 *)id;
84
85 dprintk("%s: device id= [%x%x%x%x]\n", __func__,
86 p[0], p[1], p[2], p[3]);
87}
88
89/* nfs4_ds_cache_lock is held */
90static struct nfs4_pnfs_ds *
91_data_server_lookup_locked(u32 ip_addr, u32 port)
92{
93 struct nfs4_pnfs_ds *ds;
94
95 dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
96 ntohl(ip_addr), ntohs(port));
97
98 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
99 if (ds->ds_ip_addr == ip_addr &&
100 ds->ds_port == port) {
101 return ds;
102 }
103 }
104 return NULL;
105}
106
107static void
108destroy_ds(struct nfs4_pnfs_ds *ds)
109{
110 dprintk("--> %s\n", __func__);
111 ifdebug(FACILITY)
112 print_ds(ds);
113
114 if (ds->ds_clp)
115 nfs_put_client(ds->ds_clp);
116 kfree(ds);
117}
118
119static void
120nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
121{
122 struct nfs4_pnfs_ds *ds;
123 int i;
124
125 print_deviceid(&dsaddr->deviceid.de_id);
126
127 for (i = 0; i < dsaddr->ds_num; i++) {
128 ds = dsaddr->ds_list[i];
129 if (ds != NULL) {
130 if (atomic_dec_and_lock(&ds->ds_count,
131 &nfs4_ds_cache_lock)) {
132 list_del_init(&ds->ds_node);
133 spin_unlock(&nfs4_ds_cache_lock);
134 destroy_ds(ds);
135 }
136 }
137 }
138 kfree(dsaddr->stripe_indices);
139 kfree(dsaddr);
140}
141
142void
143nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
144{
145 struct nfs4_file_layout_dsaddr *dsaddr =
146 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
147
148 nfs4_fl_free_deviceid(dsaddr);
149}
150
151static struct nfs4_pnfs_ds *
152nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
153{
154 struct nfs4_pnfs_ds *tmp_ds, *ds;
155
156 ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
157 if (!ds)
158 goto out;
159
160 spin_lock(&nfs4_ds_cache_lock);
161 tmp_ds = _data_server_lookup_locked(ip_addr, port);
162 if (tmp_ds == NULL) {
163 ds->ds_ip_addr = ip_addr;
164 ds->ds_port = port;
165 atomic_set(&ds->ds_count, 1);
166 INIT_LIST_HEAD(&ds->ds_node);
167 ds->ds_clp = NULL;
168 list_add(&ds->ds_node, &nfs4_data_server_cache);
169 dprintk("%s add new data server ip 0x%x\n", __func__,
170 ds->ds_ip_addr);
171 } else {
172 kfree(ds);
173 atomic_inc(&tmp_ds->ds_count);
174 dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
175 __func__, tmp_ds->ds_ip_addr,
176 atomic_read(&tmp_ds->ds_count));
177 ds = tmp_ds;
178 }
179 spin_unlock(&nfs4_ds_cache_lock);
180out:
181 return ds;
182}
183
184/*
185 * Currently only support ipv4, and one multi-path address.
186 */
187static struct nfs4_pnfs_ds *
188decode_and_add_ds(__be32 **pp, struct inode *inode)
189{
190 struct nfs4_pnfs_ds *ds = NULL;
191 char *buf;
192 const char *ipend, *pstr;
193 u32 ip_addr, port;
194 int nlen, rlen, i;
195 int tmp[2];
196 __be32 *r_netid, *r_addr, *p = *pp;
197
198 /* r_netid */
199 nlen = be32_to_cpup(p++);
200 r_netid = p;
201 p += XDR_QUADLEN(nlen);
202
203 /* r_addr */
204 rlen = be32_to_cpup(p++);
205 r_addr = p;
206 p += XDR_QUADLEN(rlen);
207 *pp = p;
208
209 /* Check that netid is "tcp" */
210 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) {
211 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
212 goto out_err;
213 }
214
215 /* ipv6 length plus port is legal */
216 if (rlen > INET6_ADDRSTRLEN + 8) {
217 dprintk("%s Invalid address, length %d\n", __func__,
218 rlen);
219 goto out_err;
220 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL);
222 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen);
224
225 /* replace the port dots with dashes for the in4_pton() delimiter*/
226 for (i = 0; i < 2; i++) {
227 char *res = strrchr(buf, '.');
228 *res = '-';
229 }
230
231 /* Currently only support ipv4 address */
232 if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
233 dprintk("%s: Only ipv4 addresses supported\n", __func__);
234 goto out_free;
235 }
236
237 /* port */
238 pstr = ipend;
239 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
240 port = htons((tmp[0] << 8) | (tmp[1]));
241
242 ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
243 dprintk("%s Decoded address and port %s\n", __func__, buf);
244out_free:
245 kfree(buf);
246out_err:
247 return ds;
248}
249
250/* Decode opaque device data and return the result */
251static struct nfs4_file_layout_dsaddr*
252decode_device(struct inode *ino, struct pnfs_device *pdev)
253{
254 int i, dummy;
255 u32 cnt, num;
256 u8 *indexp;
257 __be32 *p = (__be32 *)pdev->area, *indicesp;
258 struct nfs4_file_layout_dsaddr *dsaddr;
259
260 /* Get the stripe count (number of stripe index) */
261 cnt = be32_to_cpup(p++);
262 dprintk("%s stripe count %d\n", __func__, cnt);
263 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
264 printk(KERN_WARNING "%s: stripe count %d greater than "
265 "supported maximum %d\n", __func__,
266 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
267 goto out_err;
268 }
269
270 /* Check the multipath list count */
271 indicesp = p;
272 p += XDR_QUADLEN(cnt << 2);
273 num = be32_to_cpup(p++);
274 dprintk("%s ds_num %u\n", __func__, num);
275 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
276 printk(KERN_WARNING "%s: multipath count %d greater than "
277 "supported maximum %d\n", __func__,
278 num, NFS4_PNFS_MAX_MULTI_CNT);
279 goto out_err;
280 }
281 dsaddr = kzalloc(sizeof(*dsaddr) +
282 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
283 GFP_KERNEL);
284 if (!dsaddr)
285 goto out_err;
286
287 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
288 if (!dsaddr->stripe_indices)
289 goto out_err_free;
290
291 dsaddr->stripe_count = cnt;
292 dsaddr->ds_num = num;
293
294 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
295
296 /* Go back an read stripe indices */
297 p = indicesp;
298 indexp = &dsaddr->stripe_indices[0];
299 for (i = 0; i < dsaddr->stripe_count; i++) {
300 *indexp = be32_to_cpup(p++);
301 if (*indexp >= num)
302 goto out_err_free;
303 indexp++;
304 }
305 /* Skip already read multipath list count */
306 p++;
307
308 for (i = 0; i < dsaddr->ds_num; i++) {
309 int j;
310
311 dummy = be32_to_cpup(p++); /* multipath count */
312 if (dummy > 1) {
313 printk(KERN_WARNING
314 "%s: Multipath count %d not supported, "
315 "skipping all greater than 1\n", __func__,
316 dummy);
317 }
318 for (j = 0; j < dummy; j++) {
319 if (j == 0) {
320 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
321 if (dsaddr->ds_list[i] == NULL)
322 goto out_err_free;
323 } else {
324 u32 len;
325 /* skip extra multipath */
326 len = be32_to_cpup(p++);
327 p += XDR_QUADLEN(len);
328 len = be32_to_cpup(p++);
329 p += XDR_QUADLEN(len);
330 continue;
331 }
332 }
333 }
334 return dsaddr;
335
336out_err_free:
337 nfs4_fl_free_deviceid(dsaddr);
338out_err:
339 dprintk("%s ERROR: returning NULL\n", __func__);
340 return NULL;
341}
342
343/*
344 * Decode the opaque device specified in 'dev'
345 * and add it to the list of available devices.
346 * If the deviceid is already cached, nfs4_add_deviceid will return
347 * a pointer to the cached struct and throw away the new.
348 */
349static struct nfs4_file_layout_dsaddr*
350decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
351{
352 struct nfs4_file_layout_dsaddr *dsaddr;
353 struct pnfs_deviceid_node *d;
354
355 dsaddr = decode_device(inode, dev);
356 if (!dsaddr) {
357 printk(KERN_WARNING "%s: Could not decode or add device\n",
358 __func__);
359 return NULL;
360 }
361
362 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
363 &dsaddr->deviceid);
364
365 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
366}
367
368/*
369 * Retrieve the information for dev_id, add it to the list
370 * of available devices, and return it.
371 */
372struct nfs4_file_layout_dsaddr *
373get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
374{
375 struct pnfs_device *pdev = NULL;
376 u32 max_resp_sz;
377 int max_pages;
378 struct page **pages = NULL;
379 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
380 int rc, i;
381 struct nfs_server *server = NFS_SERVER(inode);
382
383 /*
384 * Use the session max response size as the basis for setting
385 * GETDEVICEINFO's maxcount
386 */
387 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
388 max_pages = max_resp_sz >> PAGE_SHIFT;
389 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
390 __func__, inode, max_resp_sz, max_pages);
391
392 pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
393 if (pdev == NULL)
394 return NULL;
395
396 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
397 if (pages == NULL) {
398 kfree(pdev);
399 return NULL;
400 }
401 for (i = 0; i < max_pages; i++) {
402 pages[i] = alloc_page(GFP_KERNEL);
403 if (!pages[i])
404 goto out_free;
405 }
406
407 /* set pdev->area */
408 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
409 if (!pdev->area)
410 goto out_free;
411
412 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
413 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
414 pdev->pages = pages;
415 pdev->pgbase = 0;
416 pdev->pglen = PAGE_SIZE * max_pages;
417 pdev->mincount = 0;
418
419 rc = nfs4_proc_getdeviceinfo(server, pdev);
420 dprintk("%s getdevice info returns %d\n", __func__, rc);
421 if (rc)
422 goto out_free;
423
424 /*
425 * Found new device, need to decode it and then add it to the
426 * list of known devices for this mountpoint.
427 */
428 dsaddr = decode_and_add_device(inode, pdev);
429out_free:
430 if (pdev->area != NULL)
431 vunmap(pdev->area);
432 for (i = 0; i < max_pages; i++)
433 __free_page(pages[i]);
434 kfree(pages);
435 kfree(pdev);
436 dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
437 return dsaddr;
438}
439
440struct nfs4_file_layout_dsaddr *
441nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
442{
443 struct pnfs_deviceid_node *d;
444
445 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
446 return (d == NULL) ? NULL :
447 container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
448}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e87fe612ca18..32c8758c99fd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,6 +55,7 @@
55#include "internal.h" 55#include "internal.h"
56#include "iostat.h" 56#include "iostat.h"
57#include "callback.h" 57#include "callback.h"
58#include "pnfs.h"
58 59
59#define NFSDBG_FACILITY NFSDBG_PROC 60#define NFSDBG_FACILITY NFSDBG_PROC
60 61
@@ -130,6 +131,7 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
130 | FATTR4_WORD0_MAXWRITE 131 | FATTR4_WORD0_MAXWRITE
131 | FATTR4_WORD0_LEASE_TIME, 132 | FATTR4_WORD0_LEASE_TIME,
132 FATTR4_WORD1_TIME_DELTA 133 FATTR4_WORD1_TIME_DELTA
134 | FATTR4_WORD1_FS_LAYOUT_TYPES
133}; 135};
134 136
135const u32 nfs4_fs_locations_bitmap[2] = { 137const u32 nfs4_fs_locations_bitmap[2] = {
@@ -4840,49 +4842,56 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
4840 args->bc_attrs.max_reqs); 4842 args->bc_attrs.max_reqs);
4841} 4843}
4842 4844
4843static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) 4845static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4844{ 4846{
4845 if (rcvd <= sent) 4847 struct nfs4_channel_attrs *sent = &args->fc_attrs;
4846 return 0; 4848 struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
4847 printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " 4849
4848 "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); 4850 if (rcvd->headerpadsz > sent->headerpadsz)
4849 return -EINVAL; 4851 return -EINVAL;
4852 if (rcvd->max_resp_sz > sent->max_resp_sz)
4853 return -EINVAL;
4854 /*
4855 * Our requested max_ops is the minimum we need; we're not
4856 * prepared to break up compounds into smaller pieces than that.
4857 * So, no point even trying to continue if the server won't
4858 * cooperate:
4859 */
4860 if (rcvd->max_ops < sent->max_ops)
4861 return -EINVAL;
4862 if (rcvd->max_reqs == 0)
4863 return -EINVAL;
4864 return 0;
4850} 4865}
4851 4866
4852#define _verify_fore_channel_attr(_name_) \ 4867static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4853 _verify_channel_attr("fore", #_name_, \ 4868{
4854 args->fc_attrs._name_, \ 4869 struct nfs4_channel_attrs *sent = &args->bc_attrs;
4855 session->fc_attrs._name_) 4870 struct nfs4_channel_attrs *rcvd = &session->bc_attrs;
4856 4871
4857#define _verify_back_channel_attr(_name_) \ 4872 if (rcvd->max_rqst_sz > sent->max_rqst_sz)
4858 _verify_channel_attr("back", #_name_, \ 4873 return -EINVAL;
4859 args->bc_attrs._name_, \ 4874 if (rcvd->max_resp_sz < sent->max_resp_sz)
4860 session->bc_attrs._name_) 4875 return -EINVAL;
4876 if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
4877 return -EINVAL;
4878 /* These would render the backchannel useless: */
4879 if (rcvd->max_ops == 0)
4880 return -EINVAL;
4881 if (rcvd->max_reqs == 0)
4882 return -EINVAL;
4883 return 0;
4884}
4861 4885
4862/*
4863 * The server is not allowed to increase the fore channel header pad size,
4864 * maximum response size, or maximum number of operations.
4865 *
4866 * The back channel attributes are only negotiatied down: We send what the
4867 * (back channel) server insists upon.
4868 */
4869static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, 4886static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
4870 struct nfs4_session *session) 4887 struct nfs4_session *session)
4871{ 4888{
4872 int ret = 0; 4889 int ret;
4873
4874 ret |= _verify_fore_channel_attr(headerpadsz);
4875 ret |= _verify_fore_channel_attr(max_resp_sz);
4876 ret |= _verify_fore_channel_attr(max_ops);
4877
4878 ret |= _verify_back_channel_attr(headerpadsz);
4879 ret |= _verify_back_channel_attr(max_rqst_sz);
4880 ret |= _verify_back_channel_attr(max_resp_sz);
4881 ret |= _verify_back_channel_attr(max_resp_sz_cached);
4882 ret |= _verify_back_channel_attr(max_ops);
4883 ret |= _verify_back_channel_attr(max_reqs);
4884 4890
4885 return ret; 4891 ret = nfs4_verify_fore_channel_attrs(args, session);
4892 if (ret)
4893 return ret;
4894 return nfs4_verify_back_channel_attrs(args, session);
4886} 4895}
4887 4896
4888static int _nfs4_proc_create_session(struct nfs_client *clp) 4897static int _nfs4_proc_create_session(struct nfs_client *clp)
@@ -5255,6 +5264,147 @@ out:
5255 dprintk("<-- %s status=%d\n", __func__, status); 5264 dprintk("<-- %s status=%d\n", __func__, status);
5256 return status; 5265 return status;
5257} 5266}
5267
5268static void
5269nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
5270{
5271 struct nfs4_layoutget *lgp = calldata;
5272 struct inode *ino = lgp->args.inode;
5273 struct nfs_server *server = NFS_SERVER(ino);
5274
5275 dprintk("--> %s\n", __func__);
5276 if (nfs4_setup_sequence(server, &lgp->args.seq_args,
5277 &lgp->res.seq_res, 0, task))
5278 return;
5279 rpc_call_start(task);
5280}
5281
5282static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
5283{
5284 struct nfs4_layoutget *lgp = calldata;
5285 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5286
5287 dprintk("--> %s\n", __func__);
5288
5289 if (!nfs4_sequence_done(task, &lgp->res.seq_res))
5290 return;
5291
5292 switch (task->tk_status) {
5293 case 0:
5294 break;
5295 case -NFS4ERR_LAYOUTTRYLATER:
5296 case -NFS4ERR_RECALLCONFLICT:
5297 task->tk_status = -NFS4ERR_DELAY;
5298 /* Fall through */
5299 default:
5300 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5301 rpc_restart_call_prepare(task);
5302 return;
5303 }
5304 }
5305 lgp->status = task->tk_status;
5306 dprintk("<-- %s\n", __func__);
5307}
5308
5309static void nfs4_layoutget_release(void *calldata)
5310{
5311 struct nfs4_layoutget *lgp = calldata;
5312
5313 dprintk("--> %s\n", __func__);
5314 put_layout_hdr(lgp->args.inode);
5315 if (lgp->res.layout.buf != NULL)
5316 free_page((unsigned long) lgp->res.layout.buf);
5317 put_nfs_open_context(lgp->args.ctx);
5318 kfree(calldata);
5319 dprintk("<-- %s\n", __func__);
5320}
5321
5322static const struct rpc_call_ops nfs4_layoutget_call_ops = {
5323 .rpc_call_prepare = nfs4_layoutget_prepare,
5324 .rpc_call_done = nfs4_layoutget_done,
5325 .rpc_release = nfs4_layoutget_release,
5326};
5327
5328int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5329{
5330 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5331 struct rpc_task *task;
5332 struct rpc_message msg = {
5333 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
5334 .rpc_argp = &lgp->args,
5335 .rpc_resp = &lgp->res,
5336 };
5337 struct rpc_task_setup task_setup_data = {
5338 .rpc_client = server->client,
5339 .rpc_message = &msg,
5340 .callback_ops = &nfs4_layoutget_call_ops,
5341 .callback_data = lgp,
5342 .flags = RPC_TASK_ASYNC,
5343 };
5344 int status = 0;
5345
5346 dprintk("--> %s\n", __func__);
5347
5348 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
5349 if (lgp->res.layout.buf == NULL) {
5350 nfs4_layoutget_release(lgp);
5351 return -ENOMEM;
5352 }
5353
5354 lgp->res.seq_res.sr_slot = NULL;
5355 task = rpc_run_task(&task_setup_data);
5356 if (IS_ERR(task))
5357 return PTR_ERR(task);
5358 status = nfs4_wait_for_completion_rpc_task(task);
5359 if (status != 0)
5360 goto out;
5361 status = lgp->status;
5362 if (status != 0)
5363 goto out;
5364 status = pnfs_layout_process(lgp);
5365out:
5366 rpc_put_task(task);
5367 dprintk("<-- %s status=%d\n", __func__, status);
5368 return status;
5369}
5370
5371static int
5372_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5373{
5374 struct nfs4_getdeviceinfo_args args = {
5375 .pdev = pdev,
5376 };
5377 struct nfs4_getdeviceinfo_res res = {
5378 .pdev = pdev,
5379 };
5380 struct rpc_message msg = {
5381 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
5382 .rpc_argp = &args,
5383 .rpc_resp = &res,
5384 };
5385 int status;
5386
5387 dprintk("--> %s\n", __func__);
5388 status = nfs4_call_sync(server, &msg, &args, &res, 0);
5389 dprintk("<-- %s status=%d\n", __func__, status);
5390
5391 return status;
5392}
5393
5394int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5395{
5396 struct nfs4_exception exception = { };
5397 int err;
5398
5399 do {
5400 err = nfs4_handle_exception(server,
5401 _nfs4_proc_getdeviceinfo(server, pdev),
5402 &exception);
5403 } while (exception.retry);
5404 return err;
5405}
5406EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5407
5258#endif /* CONFIG_NFS_V4_1 */ 5408#endif /* CONFIG_NFS_V4_1 */
5259 5409
5260struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5410struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index aa0b02a610c4..f575a3126737 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -54,6 +54,7 @@
54#include "callback.h" 54#include "callback.h"
55#include "delegation.h" 55#include "delegation.h"
56#include "internal.h" 56#include "internal.h"
57#include "pnfs.h"
57 58
58#define OPENOWNER_POOL_SIZE 8 59#define OPENOWNER_POOL_SIZE 8
59 60
@@ -1475,6 +1476,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1475 } 1476 }
1476 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1477 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1477 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); 1478 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1479 pnfs_destroy_all_layouts(clp);
1478 } 1480 }
1479 1481
1480 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { 1482 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index bd2101d918c8..f313c4cce7e4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
52#include <linux/nfs_idmap.h> 52#include <linux/nfs_idmap.h>
53#include "nfs4_fs.h" 53#include "nfs4_fs.h"
54#include "internal.h" 54#include "internal.h"
55#include "pnfs.h"
55 56
56#define NFSDBG_FACILITY NFSDBG_XDR 57#define NFSDBG_FACILITY NFSDBG_XDR
57 58
@@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int);
310 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) 311 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
311#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) 312#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4)
312#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) 313#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4)
314#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
315 XDR_QUADLEN(NFS4_DEVICEID4_SIZE))
316#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
317 1 /* layout type */ + \
318 1 /* opaque devaddr4 length */ + \
319 /* devaddr4 payload is read into page */ \
320 1 /* notification bitmap length */ + \
321 1 /* notification bitmap */)
322#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
323 encode_stateid_maxsz)
324#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
325 decode_stateid_maxsz + \
326 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
313#else /* CONFIG_NFS_V4_1 */ 327#else /* CONFIG_NFS_V4_1 */
314#define encode_sequence_maxsz 0 328#define encode_sequence_maxsz 0
315#define decode_sequence_maxsz 0 329#define decode_sequence_maxsz 0
@@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int);
699#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ 713#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \
700 decode_sequence_maxsz + \ 714 decode_sequence_maxsz + \
701 decode_reclaim_complete_maxsz) 715 decode_reclaim_complete_maxsz)
716#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \
717 encode_sequence_maxsz +\
718 encode_getdeviceinfo_maxsz)
719#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \
720 decode_sequence_maxsz + \
721 decode_getdeviceinfo_maxsz)
722#define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \
723 encode_sequence_maxsz + \
724 encode_putfh_maxsz + \
725 encode_layoutget_maxsz)
726#define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \
727 decode_sequence_maxsz + \
728 decode_putfh_maxsz + \
729 decode_layoutget_maxsz)
702 730
703const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 731const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
704 compound_encode_hdr_maxsz + 732 compound_encode_hdr_maxsz +
@@ -1737,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr,
1737#endif /* CONFIG_NFS_V4_1 */ 1765#endif /* CONFIG_NFS_V4_1 */
1738} 1766}
1739 1767
1768#ifdef CONFIG_NFS_V4_1
1769static void
1770encode_getdeviceinfo(struct xdr_stream *xdr,
1771 const struct nfs4_getdeviceinfo_args *args,
1772 struct compound_hdr *hdr)
1773{
1774 __be32 *p;
1775
1776 p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE);
1777 *p++ = cpu_to_be32(OP_GETDEVICEINFO);
1778 p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
1779 NFS4_DEVICEID4_SIZE);
1780 *p++ = cpu_to_be32(args->pdev->layout_type);
1781 *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
1782 *p++ = cpu_to_be32(0); /* bitmap length 0 */
1783 hdr->nops++;
1784 hdr->replen += decode_getdeviceinfo_maxsz;
1785}
1786
1787static void
1788encode_layoutget(struct xdr_stream *xdr,
1789 const struct nfs4_layoutget_args *args,
1790 struct compound_hdr *hdr)
1791{
1792 nfs4_stateid stateid;
1793 __be32 *p;
1794
1795 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
1796 *p++ = cpu_to_be32(OP_LAYOUTGET);
1797 *p++ = cpu_to_be32(0); /* Signal layout available */
1798 *p++ = cpu_to_be32(args->type);
1799 *p++ = cpu_to_be32(args->range.iomode);
1800 p = xdr_encode_hyper(p, args->range.offset);
1801 p = xdr_encode_hyper(p, args->range.length);
1802 p = xdr_encode_hyper(p, args->minlength);
1803 pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
1804 args->ctx->state);
1805 p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
1806 *p = cpu_to_be32(args->maxcount);
1807
1808 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
1809 __func__,
1810 args->type,
1811 args->range.iomode,
1812 (unsigned long)args->range.offset,
1813 (unsigned long)args->range.length,
1814 args->maxcount);
1815 hdr->nops++;
1816 hdr->replen += decode_layoutget_maxsz;
1817}
1818#endif /* CONFIG_NFS_V4_1 */
1819
1740/* 1820/*
1741 * END OF "GENERIC" ENCODE ROUTINES. 1821 * END OF "GENERIC" ENCODE ROUTINES.
1742 */ 1822 */
@@ -2554,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
2554 return 0; 2634 return 0;
2555} 2635}
2556 2636
2637/*
2638 * Encode GETDEVICEINFO request
2639 */
2640static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
2641 struct nfs4_getdeviceinfo_args *args)
2642{
2643 struct xdr_stream xdr;
2644 struct compound_hdr hdr = {
2645 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2646 };
2647
2648 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2649 encode_compound_hdr(&xdr, req, &hdr);
2650 encode_sequence(&xdr, &args->seq_args, &hdr);
2651 encode_getdeviceinfo(&xdr, args, &hdr);
2652
2653 /* set up reply kvec. Subtract notification bitmap max size (2)
2654 * so that notification bitmap is put in xdr_buf tail */
2655 xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
2656 args->pdev->pages, args->pdev->pgbase,
2657 args->pdev->pglen);
2658
2659 encode_nops(&hdr);
2660 return 0;
2661}
2662
2663/*
2664 * Encode LAYOUTGET request
2665 */
2666static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
2667 struct nfs4_layoutget_args *args)
2668{
2669 struct xdr_stream xdr;
2670 struct compound_hdr hdr = {
2671 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2672 };
2673
2674 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2675 encode_compound_hdr(&xdr, req, &hdr);
2676 encode_sequence(&xdr, &args->seq_args, &hdr);
2677 encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
2678 encode_layoutget(&xdr, args, &hdr);
2679 encode_nops(&hdr);
2680 return 0;
2681}
2557#endif /* CONFIG_NFS_V4_1 */ 2682#endif /* CONFIG_NFS_V4_1 */
2558 2683
2559static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2684static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -3978,6 +4103,61 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3978 return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep); 4103 return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
3979} 4104}
3980 4105
4106/*
4107 * Decode potentially multiple layout types. Currently we only support
4108 * one layout driver per file system.
4109 */
4110static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
4111 uint32_t *layouttype)
4112{
4113 uint32_t *p;
4114 int num;
4115
4116 p = xdr_inline_decode(xdr, 4);
4117 if (unlikely(!p))
4118 goto out_overflow;
4119 num = be32_to_cpup(p);
4120
4121 /* pNFS is not supported by the underlying file system */
4122 if (num == 0) {
4123 *layouttype = 0;
4124 return 0;
4125 }
4126 if (num > 1)
4127 printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
4128 "per filesystem not supported\n", __func__);
4129
4130 /* Decode and set first layout type, move xdr->p past unused types */
4131 p = xdr_inline_decode(xdr, num * 4);
4132 if (unlikely(!p))
4133 goto out_overflow;
4134 *layouttype = be32_to_cpup(p);
4135 return 0;
4136out_overflow:
4137 print_overflow_msg(__func__, xdr);
4138 return -EIO;
4139}
4140
4141/*
4142 * The type of file system exported.
4143 * Note we must ensure that layouttype is set in any non-error case.
4144 */
4145static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
4146 uint32_t *layouttype)
4147{
4148 int status = 0;
4149
4150 dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
4151 if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
4152 return -EIO;
4153 if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
4154 status = decode_first_pnfs_layout_type(xdr, layouttype);
4155 bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
4156 } else
4157 *layouttype = 0;
4158 return status;
4159}
4160
3981static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) 4161static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
3982{ 4162{
3983 __be32 *savep; 4163 __be32 *savep;
@@ -4006,6 +4186,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
4006 status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); 4186 status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
4007 if (status != 0) 4187 if (status != 0)
4008 goto xdr_error; 4188 goto xdr_error;
4189 status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
4190 if (status != 0)
4191 goto xdr_error;
4009 4192
4010 status = verify_attr_len(xdr, savep, attrlen); 4193 status = verify_attr_len(xdr, savep, attrlen);
4011xdr_error: 4194xdr_error:
@@ -4772,6 +4955,134 @@ out_overflow:
4772#endif /* CONFIG_NFS_V4_1 */ 4955#endif /* CONFIG_NFS_V4_1 */
4773} 4956}
4774 4957
4958#if defined(CONFIG_NFS_V4_1)
4959
4960static int decode_getdeviceinfo(struct xdr_stream *xdr,
4961 struct pnfs_device *pdev)
4962{
4963 __be32 *p;
4964 uint32_t len, type;
4965 int status;
4966
4967 status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
4968 if (status) {
4969 if (status == -ETOOSMALL) {
4970 p = xdr_inline_decode(xdr, 4);
4971 if (unlikely(!p))
4972 goto out_overflow;
4973 pdev->mincount = be32_to_cpup(p);
4974 dprintk("%s: Min count too small. mincnt = %u\n",
4975 __func__, pdev->mincount);
4976 }
4977 return status;
4978 }
4979
4980 p = xdr_inline_decode(xdr, 8);
4981 if (unlikely(!p))
4982 goto out_overflow;
4983 type = be32_to_cpup(p++);
4984 if (type != pdev->layout_type) {
4985 dprintk("%s: layout mismatch req: %u pdev: %u\n",
4986 __func__, pdev->layout_type, type);
4987 return -EINVAL;
4988 }
4989 /*
4990 * Get the length of the opaque device_addr4. xdr_read_pages places
4991 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
4992 * and places the remaining xdr data in xdr_buf->tail
4993 */
4994 pdev->mincount = be32_to_cpup(p);
4995 xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
4996
4997 /* Parse notification bitmap, verifying that it is zero. */
4998 p = xdr_inline_decode(xdr, 4);
4999 if (unlikely(!p))
5000 goto out_overflow;
5001 len = be32_to_cpup(p);
5002 if (len) {
5003 int i;
5004
5005 p = xdr_inline_decode(xdr, 4 * len);
5006 if (unlikely(!p))
5007 goto out_overflow;
5008 for (i = 0; i < len; i++, p++) {
5009 if (be32_to_cpup(p)) {
5010 dprintk("%s: notifications not supported\n",
5011 __func__);
5012 return -EIO;
5013 }
5014 }
5015 }
5016 return 0;
5017out_overflow:
5018 print_overflow_msg(__func__, xdr);
5019 return -EIO;
5020}
5021
5022static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5023 struct nfs4_layoutget_res *res)
5024{
5025 __be32 *p;
5026 int status;
5027 u32 layout_count;
5028
5029 status = decode_op_hdr(xdr, OP_LAYOUTGET);
5030 if (status)
5031 return status;
5032 p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
5033 if (unlikely(!p))
5034 goto out_overflow;
5035 res->return_on_close = be32_to_cpup(p++);
5036 p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
5037 layout_count = be32_to_cpup(p);
5038 if (!layout_count) {
5039 dprintk("%s: server responded with empty layout array\n",
5040 __func__);
5041 return -EINVAL;
5042 }
5043
5044 p = xdr_inline_decode(xdr, 24);
5045 if (unlikely(!p))
5046 goto out_overflow;
5047 p = xdr_decode_hyper(p, &res->range.offset);
5048 p = xdr_decode_hyper(p, &res->range.length);
5049 res->range.iomode = be32_to_cpup(p++);
5050 res->type = be32_to_cpup(p++);
5051
5052 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
5053 if (unlikely(status))
5054 return status;
5055
5056 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
5057 __func__,
5058 (unsigned long)res->range.offset,
5059 (unsigned long)res->range.length,
5060 res->range.iomode,
5061 res->type,
5062 res->layout.len);
5063
5064 /* nfs4_proc_layoutget allocated a single page */
5065 if (res->layout.len > PAGE_SIZE)
5066 return -ENOMEM;
5067 memcpy(res->layout.buf, p, res->layout.len);
5068
5069 if (layout_count > 1) {
5070 /* We only handle a length one array at the moment. Any
5071 * further entries are just ignored. Note that this means
5072 * the client may see a response that is less than the
5073 * minimum it requested.
5074 */
5075 dprintk("%s: server responded with %d layouts, dropping tail\n",
5076 __func__, layout_count);
5077 }
5078
5079 return 0;
5080out_overflow:
5081 print_overflow_msg(__func__, xdr);
5082 return -EIO;
5083}
5084#endif /* CONFIG_NFS_V4_1 */
5085
4775/* 5086/*
4776 * END OF "GENERIC" DECODE ROUTINES. 5087 * END OF "GENERIC" DECODE ROUTINES.
4777 */ 5088 */
@@ -5799,6 +6110,53 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
5799 status = decode_reclaim_complete(&xdr, (void *)NULL); 6110 status = decode_reclaim_complete(&xdr, (void *)NULL);
5800 return status; 6111 return status;
5801} 6112}
6113
6114/*
6115 * Decode GETDEVINFO response
6116 */
6117static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
6118 struct nfs4_getdeviceinfo_res *res)
6119{
6120 struct xdr_stream xdr;
6121 struct compound_hdr hdr;
6122 int status;
6123
6124 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6125 status = decode_compound_hdr(&xdr, &hdr);
6126 if (status != 0)
6127 goto out;
6128 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6129 if (status != 0)
6130 goto out;
6131 status = decode_getdeviceinfo(&xdr, res->pdev);
6132out:
6133 return status;
6134}
6135
6136/*
6137 * Decode LAYOUTGET response
6138 */
6139static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
6140 struct nfs4_layoutget_res *res)
6141{
6142 struct xdr_stream xdr;
6143 struct compound_hdr hdr;
6144 int status;
6145
6146 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6147 status = decode_compound_hdr(&xdr, &hdr);
6148 if (status)
6149 goto out;
6150 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6151 if (status)
6152 goto out;
6153 status = decode_putfh(&xdr);
6154 if (status)
6155 goto out;
6156 status = decode_layoutget(&xdr, rqstp, res);
6157out:
6158 return status;
6159}
5802#endif /* CONFIG_NFS_V4_1 */ 6160#endif /* CONFIG_NFS_V4_1 */
5803 6161
5804__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, 6162__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
@@ -5990,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = {
5990 PROC(SEQUENCE, enc_sequence, dec_sequence), 6348 PROC(SEQUENCE, enc_sequence, dec_sequence),
5991 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 6349 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
5992 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6350 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6351 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6352 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
5993#endif /* CONFIG_NFS_V4_1 */ 6353#endif /* CONFIG_NFS_V4_1 */
5994}; 6354};
5995 6355
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
new file mode 100644
index 000000000000..db773428f95f
--- /dev/null
+++ b/fs/nfs/pnfs.c
@@ -0,0 +1,783 @@
1/*
2 * pNFS functions to call and manage layout drivers.
3 *
4 * Copyright (c) 2002 [year of first publication]
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#include <linux/nfs_fs.h>
31#include "internal.h"
32#include "pnfs.h"
33
34#define NFSDBG_FACILITY NFSDBG_PNFS
35
36/* Locking:
37 *
38 * pnfs_spinlock:
39 * protects pnfs_modules_tbl.
40 */
41static DEFINE_SPINLOCK(pnfs_spinlock);
42
43/*
44 * pnfs_modules_tbl holds all pnfs modules
45 */
46static LIST_HEAD(pnfs_modules_tbl);
47
48/* Return the registered pnfs layout driver module matching given id */
49static struct pnfs_layoutdriver_type *
50find_pnfs_driver_locked(u32 id)
51{
52 struct pnfs_layoutdriver_type *local;
53
54 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
55 if (local->id == id)
56 goto out;
57 local = NULL;
58out:
59 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
60 return local;
61}
62
63static struct pnfs_layoutdriver_type *
64find_pnfs_driver(u32 id)
65{
66 struct pnfs_layoutdriver_type *local;
67
68 spin_lock(&pnfs_spinlock);
69 local = find_pnfs_driver_locked(id);
70 spin_unlock(&pnfs_spinlock);
71 return local;
72}
73
74void
75unset_pnfs_layoutdriver(struct nfs_server *nfss)
76{
77 if (nfss->pnfs_curr_ld) {
78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
79 module_put(nfss->pnfs_curr_ld->owner);
80 }
81 nfss->pnfs_curr_ld = NULL;
82}
83
84/*
85 * Try to set the server's pnfs module to the pnfs layout type specified by id.
86 * Currently only one pNFS layout driver per filesystem is supported.
87 *
88 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
89 */
90void
91set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
92{
93 struct pnfs_layoutdriver_type *ld_type = NULL;
94
95 if (id == 0)
96 goto out_no_driver;
97 if (!(server->nfs_client->cl_exchange_flags &
98 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
99 printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
100 id, server->nfs_client->cl_exchange_flags);
101 goto out_no_driver;
102 }
103 ld_type = find_pnfs_driver(id);
104 if (!ld_type) {
105 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
106 ld_type = find_pnfs_driver(id);
107 if (!ld_type) {
108 dprintk("%s: No pNFS module found for %u.\n",
109 __func__, id);
110 goto out_no_driver;
111 }
112 }
113 if (!try_module_get(ld_type->owner)) {
114 dprintk("%s: Could not grab reference on module\n", __func__);
115 goto out_no_driver;
116 }
117 server->pnfs_curr_ld = ld_type;
118 if (ld_type->set_layoutdriver(server)) {
119 printk(KERN_ERR
120 "%s: Error initializing mount point for layout driver %u.\n",
121 __func__, id);
122 module_put(ld_type->owner);
123 goto out_no_driver;
124 }
125 dprintk("%s: pNFS module for %u set\n", __func__, id);
126 return;
127
128out_no_driver:
129 dprintk("%s: Using NFSv4 I/O\n", __func__);
130 server->pnfs_curr_ld = NULL;
131}
132
133int
134pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
135{
136 int status = -EINVAL;
137 struct pnfs_layoutdriver_type *tmp;
138
139 if (ld_type->id == 0) {
140 printk(KERN_ERR "%s id 0 is reserved\n", __func__);
141 return status;
142 }
143 if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
144 printk(KERN_ERR "%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__);
146 return status;
147 }
148
149 spin_lock(&pnfs_spinlock);
150 tmp = find_pnfs_driver_locked(ld_type->id);
151 if (!tmp) {
152 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
153 status = 0;
154 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
155 ld_type->name);
156 } else {
157 printk(KERN_ERR "%s Module with id %d already loaded!\n",
158 __func__, ld_type->id);
159 }
160 spin_unlock(&pnfs_spinlock);
161
162 return status;
163}
164EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
165
166void
167pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
168{
169 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
170 spin_lock(&pnfs_spinlock);
171 list_del(&ld_type->pnfs_tblid);
172 spin_unlock(&pnfs_spinlock);
173}
174EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
175
176/*
177 * pNFS client layout cache
178 */
179
180static void
181get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
182{
183 assert_spin_locked(&lo->inode->i_lock);
184 lo->refcount++;
185}
186
187static void
188put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
189{
190 assert_spin_locked(&lo->inode->i_lock);
191 BUG_ON(lo->refcount == 0);
192
193 lo->refcount--;
194 if (!lo->refcount) {
195 dprintk("%s: freeing layout cache %p\n", __func__, lo);
196 BUG_ON(!list_empty(&lo->layouts));
197 NFS_I(lo->inode)->layout = NULL;
198 kfree(lo);
199 }
200}
201
202void
203put_layout_hdr(struct inode *inode)
204{
205 spin_lock(&inode->i_lock);
206 put_layout_hdr_locked(NFS_I(inode)->layout);
207 spin_unlock(&inode->i_lock);
208}
209
210static void
211init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
212{
213 INIT_LIST_HEAD(&lseg->fi_list);
214 kref_init(&lseg->kref);
215 lseg->layout = lo;
216}
217
218/* Called without i_lock held, as the free_lseg call may sleep */
219static void
220destroy_lseg(struct kref *kref)
221{
222 struct pnfs_layout_segment *lseg =
223 container_of(kref, struct pnfs_layout_segment, kref);
224 struct inode *ino = lseg->layout->inode;
225
226 dprintk("--> %s\n", __func__);
227 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
228 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
229 put_layout_hdr(ino);
230}
231
232static void
233put_lseg(struct pnfs_layout_segment *lseg)
234{
235 if (!lseg)
236 return;
237
238 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
239 atomic_read(&lseg->kref.refcount));
240 kref_put(&lseg->kref, destroy_lseg);
241}
242
243static void
244pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
245{
246 struct pnfs_layout_segment *lseg, *next;
247 struct nfs_client *clp;
248
249 dprintk("%s:Begin lo %p\n", __func__, lo);
250
251 assert_spin_locked(&lo->inode->i_lock);
252 list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
253 dprintk("%s: freeing lseg %p\n", __func__, lseg);
254 list_move(&lseg->fi_list, tmp_list);
255 }
256 clp = NFS_SERVER(lo->inode)->nfs_client;
257 spin_lock(&clp->cl_lock);
258 /* List does not take a reference, so no need for put here */
259 list_del_init(&lo->layouts);
260 spin_unlock(&clp->cl_lock);
261 write_seqlock(&lo->seqlock);
262 clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
263 write_sequnlock(&lo->seqlock);
264
265 dprintk("%s:Return\n", __func__);
266}
267
268static void
269pnfs_free_lseg_list(struct list_head *tmp_list)
270{
271 struct pnfs_layout_segment *lseg;
272
273 while (!list_empty(tmp_list)) {
274 lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
275 fi_list);
276 dprintk("%s calling put_lseg on %p\n", __func__, lseg);
277 list_del(&lseg->fi_list);
278 put_lseg(lseg);
279 }
280}
281
282void
283pnfs_destroy_layout(struct nfs_inode *nfsi)
284{
285 struct pnfs_layout_hdr *lo;
286 LIST_HEAD(tmp_list);
287
288 spin_lock(&nfsi->vfs_inode.i_lock);
289 lo = nfsi->layout;
290 if (lo) {
291 pnfs_clear_lseg_list(lo, &tmp_list);
292 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
293 put_layout_hdr_locked(lo);
294 }
295 spin_unlock(&nfsi->vfs_inode.i_lock);
296 pnfs_free_lseg_list(&tmp_list);
297}
298
299/*
300 * Called by the state manger to remove all layouts established under an
301 * expired lease.
302 */
303void
304pnfs_destroy_all_layouts(struct nfs_client *clp)
305{
306 struct pnfs_layout_hdr *lo;
307 LIST_HEAD(tmp_list);
308
309 spin_lock(&clp->cl_lock);
310 list_splice_init(&clp->cl_layouts, &tmp_list);
311 spin_unlock(&clp->cl_lock);
312
313 while (!list_empty(&tmp_list)) {
314 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
315 layouts);
316 dprintk("%s freeing layout for inode %lu\n", __func__,
317 lo->inode->i_ino);
318 pnfs_destroy_layout(NFS_I(lo->inode));
319 }
320}
321
322/* update lo->stateid with new if is more recent
323 *
324 * lo->stateid could be the open stateid, in which case we just use what given.
325 */
326static void
327pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
328 const nfs4_stateid *new)
329{
330 nfs4_stateid *old = &lo->stateid;
331 bool overwrite = false;
332
333 write_seqlock(&lo->seqlock);
334 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
335 memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
336 overwrite = true;
337 else {
338 u32 oldseq, newseq;
339
340 oldseq = be32_to_cpu(old->stateid.seqid);
341 newseq = be32_to_cpu(new->stateid.seqid);
342 if ((int)(newseq - oldseq) > 0)
343 overwrite = true;
344 }
345 if (overwrite)
346 memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
347 write_sequnlock(&lo->seqlock);
348}
349
350static void
351pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
352 struct nfs4_state *state)
353{
354 int seq;
355
356 dprintk("--> %s\n", __func__);
357 write_seqlock(&lo->seqlock);
358 do {
359 seq = read_seqbegin(&state->seqlock);
360 memcpy(lo->stateid.data, state->stateid.data,
361 sizeof(state->stateid.data));
362 } while (read_seqretry(&state->seqlock, seq));
363 set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
364 write_sequnlock(&lo->seqlock);
365 dprintk("<-- %s\n", __func__);
366}
367
368void
369pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
370 struct nfs4_state *open_state)
371{
372 int seq;
373
374 dprintk("--> %s\n", __func__);
375 do {
376 seq = read_seqbegin(&lo->seqlock);
377 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
378 /* This will trigger retry of the read */
379 pnfs_layout_from_open_stateid(lo, open_state);
380 } else
381 memcpy(dst->data, lo->stateid.data,
382 sizeof(lo->stateid.data));
383 } while (read_seqretry(&lo->seqlock, seq));
384 dprintk("<-- %s\n", __func__);
385}
386
387/*
388* Get layout from server.
389* for now, assume that whole file layouts are requested.
390* arg->offset: 0
391* arg->length: all ones
392*/
393static struct pnfs_layout_segment *
394send_layoutget(struct pnfs_layout_hdr *lo,
395 struct nfs_open_context *ctx,
396 u32 iomode)
397{
398 struct inode *ino = lo->inode;
399 struct nfs_server *server = NFS_SERVER(ino);
400 struct nfs4_layoutget *lgp;
401 struct pnfs_layout_segment *lseg = NULL;
402
403 dprintk("--> %s\n", __func__);
404
405 BUG_ON(ctx == NULL);
406 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
407 if (lgp == NULL) {
408 put_layout_hdr(lo->inode);
409 return NULL;
410 }
411 lgp->args.minlength = NFS4_MAX_UINT64;
412 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
413 lgp->args.range.iomode = iomode;
414 lgp->args.range.offset = 0;
415 lgp->args.range.length = NFS4_MAX_UINT64;
416 lgp->args.type = server->pnfs_curr_ld->id;
417 lgp->args.inode = ino;
418 lgp->args.ctx = get_nfs_open_context(ctx);
419 lgp->lsegpp = &lseg;
420
421 /* Synchronously retrieve layout information from server and
422 * store in lseg.
423 */
424 nfs4_proc_layoutget(lgp);
425 if (!lseg) {
426 /* remember that LAYOUTGET failed and suspend trying */
427 set_bit(lo_fail_bit(iomode), &lo->state);
428 }
429 return lseg;
430}
431
432/*
433 * Compare two layout segments for sorting into layout cache.
434 * We want to preferentially return RW over RO layouts, so ensure those
435 * are seen first.
436 */
437static s64
438cmp_layout(u32 iomode1, u32 iomode2)
439{
440 /* read > read/write */
441 return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
442}
443
444static void
445pnfs_insert_layout(struct pnfs_layout_hdr *lo,
446 struct pnfs_layout_segment *lseg)
447{
448 struct pnfs_layout_segment *lp;
449 int found = 0;
450
451 dprintk("%s:Begin\n", __func__);
452
453 assert_spin_locked(&lo->inode->i_lock);
454 if (list_empty(&lo->segs)) {
455 struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
456
457 spin_lock(&clp->cl_lock);
458 BUG_ON(!list_empty(&lo->layouts));
459 list_add_tail(&lo->layouts, &clp->cl_layouts);
460 spin_unlock(&clp->cl_lock);
461 }
462 list_for_each_entry(lp, &lo->segs, fi_list) {
463 if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
464 continue;
465 list_add_tail(&lseg->fi_list, &lp->fi_list);
466 dprintk("%s: inserted lseg %p "
467 "iomode %d offset %llu length %llu before "
468 "lp %p iomode %d offset %llu length %llu\n",
469 __func__, lseg, lseg->range.iomode,
470 lseg->range.offset, lseg->range.length,
471 lp, lp->range.iomode, lp->range.offset,
472 lp->range.length);
473 found = 1;
474 break;
475 }
476 if (!found) {
477 list_add_tail(&lseg->fi_list, &lo->segs);
478 dprintk("%s: inserted lseg %p "
479 "iomode %d offset %llu length %llu at tail\n",
480 __func__, lseg, lseg->range.iomode,
481 lseg->range.offset, lseg->range.length);
482 }
483 get_layout_hdr_locked(lo);
484
485 dprintk("%s:Return\n", __func__);
486}
487
488static struct pnfs_layout_hdr *
489alloc_init_layout_hdr(struct inode *ino)
490{
491 struct pnfs_layout_hdr *lo;
492
493 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
494 if (!lo)
495 return NULL;
496 lo->refcount = 1;
497 INIT_LIST_HEAD(&lo->layouts);
498 INIT_LIST_HEAD(&lo->segs);
499 seqlock_init(&lo->seqlock);
500 lo->inode = ino;
501 return lo;
502}
503
504static struct pnfs_layout_hdr *
505pnfs_find_alloc_layout(struct inode *ino)
506{
507 struct nfs_inode *nfsi = NFS_I(ino);
508 struct pnfs_layout_hdr *new = NULL;
509
510 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
511
512 assert_spin_locked(&ino->i_lock);
513 if (nfsi->layout)
514 return nfsi->layout;
515
516 spin_unlock(&ino->i_lock);
517 new = alloc_init_layout_hdr(ino);
518 spin_lock(&ino->i_lock);
519
520 if (likely(nfsi->layout == NULL)) /* Won the race? */
521 nfsi->layout = new;
522 else
523 kfree(new);
524 return nfsi->layout;
525}
526
527/*
528 * iomode matching rules:
529 * iomode lseg match
530 * ----- ----- -----
531 * ANY READ true
532 * ANY RW true
533 * RW READ false
534 * RW RW true
535 * READ READ true
536 * READ RW true
537 */
538static int
539is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
540{
541 return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
542}
543
544/*
545 * lookup range in layout
546 */
547static struct pnfs_layout_segment *
548pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
549{
550 struct pnfs_layout_segment *lseg, *ret = NULL;
551
552 dprintk("%s:Begin\n", __func__);
553
554 assert_spin_locked(&lo->inode->i_lock);
555 list_for_each_entry(lseg, &lo->segs, fi_list) {
556 if (is_matching_lseg(lseg, iomode)) {
557 ret = lseg;
558 break;
559 }
560 if (cmp_layout(iomode, lseg->range.iomode) > 0)
561 break;
562 }
563
564 dprintk("%s:Return lseg %p ref %d\n",
565 __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
566 return ret;
567}
568
569/*
570 * Layout segment is retreived from the server if not cached.
571 * The appropriate layout segment is referenced and returned to the caller.
572 */
573struct pnfs_layout_segment *
574pnfs_update_layout(struct inode *ino,
575 struct nfs_open_context *ctx,
576 enum pnfs_iomode iomode)
577{
578 struct nfs_inode *nfsi = NFS_I(ino);
579 struct pnfs_layout_hdr *lo;
580 struct pnfs_layout_segment *lseg = NULL;
581
582 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
583 return NULL;
584 spin_lock(&ino->i_lock);
585 lo = pnfs_find_alloc_layout(ino);
586 if (lo == NULL) {
587 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
588 goto out_unlock;
589 }
590
591 /* Check to see if the layout for the given range already exists */
592 lseg = pnfs_has_layout(lo, iomode);
593 if (lseg) {
594 dprintk("%s: Using cached lseg %p for iomode %d)\n",
595 __func__, lseg, iomode);
596 goto out_unlock;
597 }
598
599 /* if LAYOUTGET already failed once we don't try again */
600 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
601 goto out_unlock;
602
603 get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
604 spin_unlock(&ino->i_lock);
605
606 lseg = send_layoutget(lo, ctx, iomode);
607out:
608 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
609 nfsi->layout->state, lseg);
610 return lseg;
611out_unlock:
612 spin_unlock(&ino->i_lock);
613 goto out;
614}
615
616int
617pnfs_layout_process(struct nfs4_layoutget *lgp)
618{
619 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
620 struct nfs4_layoutget_res *res = &lgp->res;
621 struct pnfs_layout_segment *lseg;
622 struct inode *ino = lo->inode;
623 int status = 0;
624
625 /* Inject layout blob into I/O device driver */
626 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
627 if (!lseg || IS_ERR(lseg)) {
628 if (!lseg)
629 status = -ENOMEM;
630 else
631 status = PTR_ERR(lseg);
632 dprintk("%s: Could not allocate layout: error %d\n",
633 __func__, status);
634 goto out;
635 }
636
637 spin_lock(&ino->i_lock);
638 init_lseg(lo, lseg);
639 lseg->range = res->range;
640 *lgp->lsegpp = lseg;
641 pnfs_insert_layout(lo, lseg);
642
643 /* Done processing layoutget. Set the layout stateid */
644 pnfs_set_layout_stateid(lo, &res->stateid);
645 spin_unlock(&ino->i_lock);
646out:
647 return status;
648}
649
650/*
651 * Device ID cache. Currently supports one layout type per struct nfs_client.
652 * Add layout type to the lookup key to expand to support multiple types.
653 */
654int
655pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
656 void (*free_callback)(struct pnfs_deviceid_node *))
657{
658 struct pnfs_deviceid_cache *c;
659
660 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
661 if (!c)
662 return -ENOMEM;
663 spin_lock(&clp->cl_lock);
664 if (clp->cl_devid_cache != NULL) {
665 atomic_inc(&clp->cl_devid_cache->dc_ref);
666 dprintk("%s [kref [%d]]\n", __func__,
667 atomic_read(&clp->cl_devid_cache->dc_ref));
668 kfree(c);
669 } else {
670 /* kzalloc initializes hlists */
671 spin_lock_init(&c->dc_lock);
672 atomic_set(&c->dc_ref, 1);
673 c->dc_free_callback = free_callback;
674 clp->cl_devid_cache = c;
675 dprintk("%s [new]\n", __func__);
676 }
677 spin_unlock(&clp->cl_lock);
678 return 0;
679}
680EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
681
682/*
683 * Called from pnfs_layoutdriver_type->free_lseg
684 * last layout segment reference frees deviceid
685 */
686void
687pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
688 struct pnfs_deviceid_node *devid)
689{
690 struct nfs4_deviceid *id = &devid->de_id;
691 struct pnfs_deviceid_node *d;
692 struct hlist_node *n;
693 long h = nfs4_deviceid_hash(id);
694
695 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
696 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
697 return;
698
699 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
700 if (!memcmp(&d->de_id, id, sizeof(*id))) {
701 hlist_del_rcu(&d->de_node);
702 spin_unlock(&c->dc_lock);
703 synchronize_rcu();
704 c->dc_free_callback(devid);
705 return;
706 }
707 spin_unlock(&c->dc_lock);
708 /* Why wasn't it found in the list? */
709 BUG();
710}
711EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
712
713/* Find and reference a deviceid */
714struct pnfs_deviceid_node *
715pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
716{
717 struct pnfs_deviceid_node *d;
718 struct hlist_node *n;
719 long hash = nfs4_deviceid_hash(id);
720
721 dprintk("--> %s hash %ld\n", __func__, hash);
722 rcu_read_lock();
723 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
724 if (!memcmp(&d->de_id, id, sizeof(*id))) {
725 if (!atomic_inc_not_zero(&d->de_ref)) {
726 goto fail;
727 } else {
728 rcu_read_unlock();
729 return d;
730 }
731 }
732 }
733fail:
734 rcu_read_unlock();
735 return NULL;
736}
737EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
738
739/*
740 * Add a deviceid to the cache.
741 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
742 */
743struct pnfs_deviceid_node *
744pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
745{
746 struct pnfs_deviceid_node *d;
747 long hash = nfs4_deviceid_hash(&new->de_id);
748
749 dprintk("--> %s hash %ld\n", __func__, hash);
750 spin_lock(&c->dc_lock);
751 d = pnfs_find_get_deviceid(c, &new->de_id);
752 if (d) {
753 spin_unlock(&c->dc_lock);
754 dprintk("%s [discard]\n", __func__);
755 c->dc_free_callback(new);
756 return d;
757 }
758 INIT_HLIST_NODE(&new->de_node);
759 atomic_set(&new->de_ref, 1);
760 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
761 spin_unlock(&c->dc_lock);
762 dprintk("%s [new]\n", __func__);
763 return new;
764}
765EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
766
767void
768pnfs_put_deviceid_cache(struct nfs_client *clp)
769{
770 struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
771
772 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
773 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
774 int i;
775 /* Verify cache is empty */
776 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
777 BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
778 clp->cl_devid_cache = NULL;
779 spin_unlock(&clp->cl_lock);
780 kfree(local);
781 }
782}
783EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
new file mode 100644
index 000000000000..e12367d50489
--- /dev/null
+++ b/fs/nfs/pnfs.h
@@ -0,0 +1,189 @@
1/*
2 * pNFS client data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H
32
33struct pnfs_layout_segment {
34 struct list_head fi_list;
35 struct pnfs_layout_range range;
36 struct kref kref;
37 struct pnfs_layout_hdr *layout;
38};
39
40#ifdef CONFIG_NFS_V4_1
41
42#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
43
44enum {
45 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
46 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
47 NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
48};
49
50/* Per-layout driver specific registration structure */
51struct pnfs_layoutdriver_type {
52 struct list_head pnfs_tblid;
53 const u32 id;
54 const char *name;
55 struct module *owner;
56 int (*set_layoutdriver) (struct nfs_server *);
57 int (*clear_layoutdriver) (struct nfs_server *);
58 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
59 void (*free_lseg) (struct pnfs_layout_segment *lseg);
60};
61
62struct pnfs_layout_hdr {
63 unsigned long refcount;
64 struct list_head layouts; /* other client layouts */
65 struct list_head segs; /* layout segments list */
66 seqlock_t seqlock; /* Protects the stateid */
67 nfs4_stateid stateid;
68 unsigned long state;
69 struct inode *inode;
70};
71
72struct pnfs_device {
73 struct nfs4_deviceid dev_id;
74 unsigned int layout_type;
75 unsigned int mincount;
76 struct page **pages;
77 void *area;
78 unsigned int pgbase;
79 unsigned int pglen;
80};
81
82/*
83 * Device ID RCU cache. A device ID is unique per client ID and layout type.
84 */
85#define NFS4_DEVICE_ID_HASH_BITS 5
86#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
87#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
88
89static inline u32
90nfs4_deviceid_hash(struct nfs4_deviceid *id)
91{
92 unsigned char *cptr = (unsigned char *)id->data;
93 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
94 u32 x = 0;
95
96 while (nbytes--) {
97 x *= 37;
98 x += *cptr++;
99 }
100 return x & NFS4_DEVICE_ID_HASH_MASK;
101}
102
103struct pnfs_deviceid_node {
104 struct hlist_node de_node;
105 struct nfs4_deviceid de_id;
106 atomic_t de_ref;
107};
108
109struct pnfs_deviceid_cache {
110 spinlock_t dc_lock;
111 atomic_t dc_ref;
112 void (*dc_free_callback)(struct pnfs_deviceid_node *);
113 struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
114};
115
116extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
117 void (*free_callback)(struct pnfs_deviceid_node *));
118extern void pnfs_put_deviceid_cache(struct nfs_client *);
119extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
120 struct pnfs_deviceid_cache *,
121 struct nfs4_deviceid *);
122extern struct pnfs_deviceid_node *pnfs_add_deviceid(
123 struct pnfs_deviceid_cache *,
124 struct pnfs_deviceid_node *);
125extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
126 struct pnfs_deviceid_node *devid);
127
128extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
129extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
130
131/* nfs4proc.c */
132extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
133 struct pnfs_device *dev);
134extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
135
136/* pnfs.c */
137struct pnfs_layout_segment *
138pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
139 enum pnfs_iomode access_type);
140void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
141void unset_pnfs_layoutdriver(struct nfs_server *);
142int pnfs_layout_process(struct nfs4_layoutget *lgp);
143void pnfs_destroy_layout(struct nfs_inode *);
144void pnfs_destroy_all_layouts(struct nfs_client *);
145void put_layout_hdr(struct inode *inode);
146void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
147 struct nfs4_state *open_state);
148
149
150static inline int lo_fail_bit(u32 iomode)
151{
152 return iomode == IOMODE_RW ?
153 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
154}
155
156/* Return true if a layout driver is being used for this mountpoint */
157static inline int pnfs_enabled_sb(struct nfs_server *nfss)
158{
159 return nfss->pnfs_curr_ld != NULL;
160}
161
162#else /* CONFIG_NFS_V4_1 */
163
164static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
165{
166}
167
168static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
169{
170}
171
172static inline struct pnfs_layout_segment *
173pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
174 enum pnfs_iomode access_type)
175{
176 return NULL;
177}
178
179static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
180{
181}
182
183static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
184{
185}
186
187#endif /* CONFIG_NFS_V4_1 */
188
189#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 79859c81a943..e4b62c6f5a6e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -25,6 +25,7 @@
25#include "internal.h" 25#include "internal.h"
26#include "iostat.h" 26#include "iostat.h"
27#include "fscache.h" 27#include "fscache.h"
28#include "pnfs.h"
28 29
29#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
30 31
@@ -120,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
120 len = nfs_page_length(page); 121 len = nfs_page_length(page);
121 if (len == 0) 122 if (len == 0)
122 return nfs_return_empty_page(page); 123 return nfs_return_empty_page(page);
124 pnfs_update_layout(inode, ctx, IOMODE_READ);
123 new = nfs_create_request(ctx, inode, page, 0, len); 125 new = nfs_create_request(ctx, inode, page, 0, len);
124 if (IS_ERR(new)) { 126 if (IS_ERR(new)) {
125 unlock_page(page); 127 unlock_page(page);
@@ -624,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
624 if (ret == 0) 626 if (ret == 0)
625 goto read_complete; /* all pages were read */ 627 goto read_complete; /* all pages were read */
626 628
629 pnfs_update_layout(inode, desc.ctx, IOMODE_READ);
627 if (rsize < PAGE_CACHE_SIZE) 630 if (rsize < PAGE_CACHE_SIZE)
628 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 631 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
629 else 632 else
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 7cf4ddafb4ab..31a78fce4732 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -29,6 +29,18 @@ config NFSD
29 29
30 If unsure, say N. 30 If unsure, say N.
31 31
32config NFSD_DEPRECATED
33 bool "Include support for deprecated syscall interface to NFSD"
34 depends on NFSD
35 default y
36 help
37 The syscall interface to nfsd was obsoleted in 2.6.0 by a new
38 filesystem based interface. The old interface is due for removal
39 in 2.6.40. If you wish to remove the interface before then
40 say N.
41
42 In unsure, say Y.
43
32config NFSD_V2_ACL 44config NFSD_V2_ACL
33 bool 45 bool
34 depends on NFSD 46 depends on NFSD
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c2a4f71d87dd..c0fcb7ab7f6d 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -28,9 +28,6 @@
28typedef struct auth_domain svc_client; 28typedef struct auth_domain svc_client;
29typedef struct svc_export svc_export; 29typedef struct svc_export svc_export;
30 30
31static void exp_do_unexport(svc_export *unexp);
32static int exp_verify_string(char *cp, int max);
33
34/* 31/*
35 * We have two caches. 32 * We have two caches.
36 * One maps client+vfsmnt+dentry to export options - the export map 33 * One maps client+vfsmnt+dentry to export options - the export map
@@ -802,6 +799,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
802 return ek; 799 return ek;
803} 800}
804 801
802#ifdef CONFIG_NFSD_DEPRECATED
805static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, 803static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
806 struct svc_export *exp) 804 struct svc_export *exp)
807{ 805{
@@ -852,6 +850,7 @@ exp_get_fsid_key(svc_client *clp, int fsid)
852 850
853 return exp_find_key(clp, FSID_NUM, fsidv, NULL); 851 return exp_find_key(clp, FSID_NUM, fsidv, NULL);
854} 852}
853#endif
855 854
856static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, 855static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
857 struct cache_req *reqp) 856 struct cache_req *reqp)
@@ -893,6 +892,7 @@ static struct svc_export *exp_parent(svc_client *clp, struct path *path)
893 return exp; 892 return exp;
894} 893}
895 894
895#ifdef CONFIG_NFSD_DEPRECATED
896/* 896/*
897 * Hashtable locking. Write locks are placed only by user processes 897 * Hashtable locking. Write locks are placed only by user processes
898 * wanting to modify export information. 898 * wanting to modify export information.
@@ -925,6 +925,19 @@ exp_writeunlock(void)
925{ 925{
926 up_write(&hash_sem); 926 up_write(&hash_sem);
927} 927}
928#else
929
930/* hash_sem not needed once deprecated interface is removed */
931void exp_readlock(void) {}
932static inline void exp_writelock(void){}
933void exp_readunlock(void) {}
934static inline void exp_writeunlock(void){}
935
936#endif
937
938#ifdef CONFIG_NFSD_DEPRECATED
939static void exp_do_unexport(svc_export *unexp);
940static int exp_verify_string(char *cp, int max);
928 941
929static void exp_fsid_unhash(struct svc_export *exp) 942static void exp_fsid_unhash(struct svc_export *exp)
930{ 943{
@@ -935,10 +948,9 @@ static void exp_fsid_unhash(struct svc_export *exp)
935 948
936 ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); 949 ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
937 if (!IS_ERR(ek)) { 950 if (!IS_ERR(ek)) {
938 ek->h.expiry_time = get_seconds()-1; 951 sunrpc_invalidate(&ek->h, &svc_expkey_cache);
939 cache_put(&ek->h, &svc_expkey_cache); 952 cache_put(&ek->h, &svc_expkey_cache);
940 } 953 }
941 svc_expkey_cache.nextcheck = get_seconds();
942} 954}
943 955
944static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) 956static int exp_fsid_hash(svc_client *clp, struct svc_export *exp)
@@ -973,10 +985,9 @@ static void exp_unhash(struct svc_export *exp)
973 985
974 ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); 986 ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
975 if (!IS_ERR(ek)) { 987 if (!IS_ERR(ek)) {
976 ek->h.expiry_time = get_seconds()-1; 988 sunrpc_invalidate(&ek->h, &svc_expkey_cache);
977 cache_put(&ek->h, &svc_expkey_cache); 989 cache_put(&ek->h, &svc_expkey_cache);
978 } 990 }
979 svc_expkey_cache.nextcheck = get_seconds();
980} 991}
981 992
982/* 993/*
@@ -1097,8 +1108,7 @@ out:
1097static void 1108static void
1098exp_do_unexport(svc_export *unexp) 1109exp_do_unexport(svc_export *unexp)
1099{ 1110{
1100 unexp->h.expiry_time = get_seconds()-1; 1111 sunrpc_invalidate(&unexp->h, &svc_export_cache);
1101 svc_export_cache.nextcheck = get_seconds();
1102 exp_unhash(unexp); 1112 exp_unhash(unexp);
1103 exp_fsid_unhash(unexp); 1113 exp_fsid_unhash(unexp);
1104} 1114}
@@ -1150,6 +1160,7 @@ out_unlock:
1150 exp_writeunlock(); 1160 exp_writeunlock();
1151 return err; 1161 return err;
1152} 1162}
1163#endif /* CONFIG_NFSD_DEPRECATED */
1153 1164
1154/* 1165/*
1155 * Obtain the root fh on behalf of a client. 1166 * Obtain the root fh on behalf of a client.
@@ -1459,25 +1470,43 @@ static void show_secinfo_flags(struct seq_file *m, int flags)
1459 show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); 1470 show_expflags(m, flags, NFSEXP_SECINFO_FLAGS);
1460} 1471}
1461 1472
1473static bool secinfo_flags_equal(int f, int g)
1474{
1475 f &= NFSEXP_SECINFO_FLAGS;
1476 g &= NFSEXP_SECINFO_FLAGS;
1477 return f == g;
1478}
1479
1480static int show_secinfo_run(struct seq_file *m, struct exp_flavor_info **fp, struct exp_flavor_info *end)
1481{
1482 int flags;
1483
1484 flags = (*fp)->flags;
1485 seq_printf(m, ",sec=%d", (*fp)->pseudoflavor);
1486 (*fp)++;
1487 while (*fp != end && secinfo_flags_equal(flags, (*fp)->flags)) {
1488 seq_printf(m, ":%d", (*fp)->pseudoflavor);
1489 (*fp)++;
1490 }
1491 return flags;
1492}
1493
1462static void show_secinfo(struct seq_file *m, struct svc_export *exp) 1494static void show_secinfo(struct seq_file *m, struct svc_export *exp)
1463{ 1495{
1464 struct exp_flavor_info *f; 1496 struct exp_flavor_info *f;
1465 struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; 1497 struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
1466 int lastflags = 0, first = 0; 1498 int flags;
1467 1499
1468 if (exp->ex_nflavors == 0) 1500 if (exp->ex_nflavors == 0)
1469 return; 1501 return;
1470 for (f = exp->ex_flavors; f < end; f++) { 1502 f = exp->ex_flavors;
1471 if (first || f->flags != lastflags) { 1503 flags = show_secinfo_run(m, &f, end);
1472 if (!first) 1504 if (!secinfo_flags_equal(flags, exp->ex_flags))
1473 show_secinfo_flags(m, lastflags); 1505 show_secinfo_flags(m, flags);
1474 seq_printf(m, ",sec=%d", f->pseudoflavor); 1506 while (f != end) {
1475 lastflags = f->flags; 1507 flags = show_secinfo_run(m, &f, end);
1476 } else { 1508 show_secinfo_flags(m, flags);
1477 seq_printf(m, ":%d", f->pseudoflavor);
1478 }
1479 } 1509 }
1480 show_secinfo_flags(m, lastflags);
1481} 1510}
1482 1511
1483static void exp_flags(struct seq_file *m, int flag, int fsid, 1512static void exp_flags(struct seq_file *m, int flag, int fsid,
@@ -1532,6 +1561,7 @@ const struct seq_operations nfs_exports_op = {
1532 .show = e_show, 1561 .show = e_show,
1533}; 1562};
1534 1563
1564#ifdef CONFIG_NFSD_DEPRECATED
1535/* 1565/*
1536 * Add or modify a client. 1566 * Add or modify a client.
1537 * Change requests may involve the list of host addresses. The list of 1567 * Change requests may involve the list of host addresses. The list of
@@ -1563,7 +1593,7 @@ exp_addclient(struct nfsctl_client *ncp)
1563 /* Insert client into hashtable. */ 1593 /* Insert client into hashtable. */
1564 for (i = 0; i < ncp->cl_naddr; i++) { 1594 for (i = 0; i < ncp->cl_naddr; i++) {
1565 ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); 1595 ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6);
1566 auth_unix_add_addr(&addr6, dom); 1596 auth_unix_add_addr(&init_net, &addr6, dom);
1567 } 1597 }
1568 auth_unix_forget_old(dom); 1598 auth_unix_forget_old(dom);
1569 auth_domain_put(dom); 1599 auth_domain_put(dom);
@@ -1621,6 +1651,7 @@ exp_verify_string(char *cp, int max)
1621 printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); 1651 printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp);
1622 return 0; 1652 return 0;
1623} 1653}
1654#endif /* CONFIG_NFSD_DEPRECATED */
1624 1655
1625/* 1656/*
1626 * Initialize the exports module. 1657 * Initialize the exports module.
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 988cbb3a19b6..143da2eecd7b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -41,7 +41,6 @@
41 41
42#define NFSPROC4_CB_NULL 0 42#define NFSPROC4_CB_NULL 0
43#define NFSPROC4_CB_COMPOUND 1 43#define NFSPROC4_CB_COMPOUND 1
44#define NFS4_STATEID_SIZE 16
45 44
46/* Index of predefined Linux callback client operations */ 45/* Index of predefined Linux callback client operations */
47 46
@@ -248,10 +247,11 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
248} 247}
249 248
250static void 249static void
251encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, 250encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
252 struct nfs4_cb_compound_hdr *hdr) 251 struct nfs4_cb_compound_hdr *hdr)
253{ 252{
254 __be32 *p; 253 __be32 *p;
254 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
255 255
256 if (hdr->minorversion == 0) 256 if (hdr->minorversion == 0)
257 return; 257 return;
@@ -259,8 +259,8 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
259 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); 259 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
260 260
261 WRITE32(OP_CB_SEQUENCE); 261 WRITE32(OP_CB_SEQUENCE);
262 WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); 262 WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN);
263 WRITE32(args->cbs_clp->cl_cb_seq_nr); 263 WRITE32(ses->se_cb_seq_nr);
264 WRITE32(0); /* slotid, always 0 */ 264 WRITE32(0); /* slotid, always 0 */
265 WRITE32(0); /* highest slotid always 0 */ 265 WRITE32(0); /* highest slotid always 0 */
266 WRITE32(0); /* cachethis always 0 */ 266 WRITE32(0); /* cachethis always 0 */
@@ -280,18 +280,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
280 280
281static int 281static int
282nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, 282nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
283 struct nfs4_rpc_args *rpc_args) 283 struct nfsd4_callback *cb)
284{ 284{
285 struct xdr_stream xdr; 285 struct xdr_stream xdr;
286 struct nfs4_delegation *args = rpc_args->args_op; 286 struct nfs4_delegation *args = cb->cb_op;
287 struct nfs4_cb_compound_hdr hdr = { 287 struct nfs4_cb_compound_hdr hdr = {
288 .ident = args->dl_ident, 288 .ident = cb->cb_clp->cl_cb_ident,
289 .minorversion = rpc_args->args_seq.cbs_minorversion, 289 .minorversion = cb->cb_minorversion,
290 }; 290 };
291 291
292 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 292 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
293 encode_cb_compound_hdr(&xdr, &hdr); 293 encode_cb_compound_hdr(&xdr, &hdr);
294 encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr); 294 encode_cb_sequence(&xdr, cb, &hdr);
295 encode_cb_recall(&xdr, args, &hdr); 295 encode_cb_recall(&xdr, args, &hdr);
296 encode_cb_nops(&hdr); 296 encode_cb_nops(&hdr);
297 return 0; 297 return 0;
@@ -339,15 +339,16 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
339 * with a single slot. 339 * with a single slot.
340 */ 340 */
341static int 341static int
342decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, 342decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
343 struct rpc_rqst *rqstp) 343 struct rpc_rqst *rqstp)
344{ 344{
345 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
345 struct nfs4_sessionid id; 346 struct nfs4_sessionid id;
346 int status; 347 int status;
347 u32 dummy; 348 u32 dummy;
348 __be32 *p; 349 __be32 *p;
349 350
350 if (res->cbs_minorversion == 0) 351 if (cb->cb_minorversion == 0)
351 return 0; 352 return 0;
352 353
353 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); 354 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
@@ -363,13 +364,12 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
363 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); 364 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
364 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 365 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
365 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); 366 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
366 if (memcmp(id.data, res->cbs_clp->cl_sessionid.data, 367 if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
367 NFS4_MAX_SESSIONID_LEN)) {
368 dprintk("%s Invalid session id\n", __func__); 368 dprintk("%s Invalid session id\n", __func__);
369 goto out; 369 goto out;
370 } 370 }
371 READ32(dummy); 371 READ32(dummy);
372 if (dummy != res->cbs_clp->cl_cb_seq_nr) { 372 if (dummy != ses->se_cb_seq_nr) {
373 dprintk("%s Invalid sequence number\n", __func__); 373 dprintk("%s Invalid sequence number\n", __func__);
374 goto out; 374 goto out;
375 } 375 }
@@ -393,7 +393,7 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
393 393
394static int 394static int
395nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, 395nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
396 struct nfsd4_cb_sequence *seq) 396 struct nfsd4_callback *cb)
397{ 397{
398 struct xdr_stream xdr; 398 struct xdr_stream xdr;
399 struct nfs4_cb_compound_hdr hdr; 399 struct nfs4_cb_compound_hdr hdr;
@@ -403,8 +403,8 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
403 status = decode_cb_compound_hdr(&xdr, &hdr); 403 status = decode_cb_compound_hdr(&xdr, &hdr);
404 if (status) 404 if (status)
405 goto out; 405 goto out;
406 if (seq) { 406 if (cb) {
407 status = decode_cb_sequence(&xdr, seq, rqstp); 407 status = decode_cb_sequence(&xdr, cb, rqstp);
408 if (status) 408 if (status)
409 goto out; 409 goto out;
410 } 410 }
@@ -473,30 +473,34 @@ static int max_cb_time(void)
473/* Reference counting, callback cleanup, etc., all look racy as heck. 473/* Reference counting, callback cleanup, etc., all look racy as heck.
474 * And why is cl_cb_set an atomic? */ 474 * And why is cl_cb_set an atomic? */
475 475
476int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) 476int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
477{ 477{
478 struct rpc_timeout timeparms = { 478 struct rpc_timeout timeparms = {
479 .to_initval = max_cb_time(), 479 .to_initval = max_cb_time(),
480 .to_retries = 0, 480 .to_retries = 0,
481 }; 481 };
482 struct rpc_create_args args = { 482 struct rpc_create_args args = {
483 .protocol = XPRT_TRANSPORT_TCP, 483 .net = &init_net,
484 .address = (struct sockaddr *) &cb->cb_addr, 484 .address = (struct sockaddr *) &conn->cb_addr,
485 .addrsize = cb->cb_addrlen, 485 .addrsize = conn->cb_addrlen,
486 .timeout = &timeparms, 486 .timeout = &timeparms,
487 .program = &cb_program, 487 .program = &cb_program,
488 .prognumber = cb->cb_prog,
489 .version = 0, 488 .version = 0,
490 .authflavor = clp->cl_flavor, 489 .authflavor = clp->cl_flavor,
491 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 490 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
492 .client_name = clp->cl_principal,
493 }; 491 };
494 struct rpc_clnt *client; 492 struct rpc_clnt *client;
495 493
496 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 494 if (clp->cl_minorversion == 0) {
497 return -EINVAL; 495 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
498 if (cb->cb_minorversion) { 496 return -EINVAL;
499 args.bc_xprt = cb->cb_xprt; 497 args.client_name = clp->cl_principal;
498 args.prognumber = conn->cb_prog,
499 args.protocol = XPRT_TRANSPORT_TCP;
500 clp->cl_cb_ident = conn->cb_ident;
501 } else {
502 args.bc_xprt = conn->cb_xprt;
503 args.prognumber = clp->cl_cb_session->se_cb_prog;
500 args.protocol = XPRT_TRANSPORT_BC_TCP; 504 args.protocol = XPRT_TRANSPORT_BC_TCP;
501 } 505 }
502 /* Create RPC client */ 506 /* Create RPC client */
@@ -506,7 +510,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
506 PTR_ERR(client)); 510 PTR_ERR(client));
507 return PTR_ERR(client); 511 return PTR_ERR(client);
508 } 512 }
509 nfsd4_set_callback_client(clp, client); 513 clp->cl_cb_client = client;
510 return 0; 514 return 0;
511 515
512} 516}
@@ -519,7 +523,7 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason)
519 523
520static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) 524static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
521{ 525{
522 struct nfs4_client *clp = calldata; 526 struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
523 527
524 if (task->tk_status) 528 if (task->tk_status)
525 warn_no_callback_path(clp, task->tk_status); 529 warn_no_callback_path(clp, task->tk_status);
@@ -528,6 +532,8 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
528} 532}
529 533
530static const struct rpc_call_ops nfsd4_cb_probe_ops = { 534static const struct rpc_call_ops nfsd4_cb_probe_ops = {
535 /* XXX: release method to ensure we set the cb channel down if
536 * necessary on early failure? */
531 .rpc_call_done = nfsd4_cb_probe_done, 537 .rpc_call_done = nfsd4_cb_probe_done,
532}; 538};
533 539
@@ -543,38 +549,42 @@ int set_callback_cred(void)
543 return 0; 549 return 0;
544} 550}
545 551
552static struct workqueue_struct *callback_wq;
546 553
547void do_probe_callback(struct nfs4_client *clp) 554static void do_probe_callback(struct nfs4_client *clp)
548{ 555{
549 struct rpc_message msg = { 556 struct nfsd4_callback *cb = &clp->cl_cb_null;
550 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
551 .rpc_argp = clp,
552 .rpc_cred = callback_cred
553 };
554 int status;
555 557
556 status = rpc_call_async(clp->cl_cb_client, &msg, 558 cb->cb_op = NULL;
557 RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 559 cb->cb_clp = clp;
558 &nfsd4_cb_probe_ops, (void *)clp); 560
559 if (status) 561 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
560 warn_no_callback_path(clp, status); 562 cb->cb_msg.rpc_argp = NULL;
563 cb->cb_msg.rpc_resp = NULL;
564 cb->cb_msg.rpc_cred = callback_cred;
565
566 cb->cb_ops = &nfsd4_cb_probe_ops;
567
568 queue_work(callback_wq, &cb->cb_work);
561} 569}
562 570
563/* 571/*
564 * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... 572 * Poke the callback thread to process any updates to the callback
573 * parameters, and send a null probe.
565 */ 574 */
566void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb) 575void nfsd4_probe_callback(struct nfs4_client *clp)
567{ 576{
568 int status; 577 set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
578 do_probe_callback(clp);
579}
569 580
581void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
582{
570 BUG_ON(atomic_read(&clp->cl_cb_set)); 583 BUG_ON(atomic_read(&clp->cl_cb_set));
571 584
572 status = setup_callback_client(clp, cb); 585 spin_lock(&clp->cl_lock);
573 if (status) { 586 memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
574 warn_no_callback_path(clp, status); 587 spin_unlock(&clp->cl_lock);
575 return;
576 }
577 do_probe_callback(clp);
578} 588}
579 589
580/* 590/*
@@ -585,8 +595,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
585static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, 595static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
586 struct rpc_task *task) 596 struct rpc_task *task)
587{ 597{
588 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; 598 u32 *ptr = (u32 *)clp->cl_cb_session->se_sessionid.data;
589 u32 *ptr = (u32 *)clp->cl_sessionid.data;
590 int status = 0; 599 int status = 0;
591 600
592 dprintk("%s: %u:%u:%u:%u\n", __func__, 601 dprintk("%s: %u:%u:%u:%u\n", __func__,
@@ -598,14 +607,6 @@ static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
598 status = -EAGAIN; 607 status = -EAGAIN;
599 goto out; 608 goto out;
600 } 609 }
601
602 /*
603 * We'll need the clp during XDR encoding and decoding,
604 * and the sequence during decoding to verify the reply
605 */
606 args->args_seq.cbs_clp = clp;
607 task->tk_msg.rpc_resp = &args->args_seq;
608
609out: 610out:
610 dprintk("%s status=%d\n", __func__, status); 611 dprintk("%s status=%d\n", __func__, status);
611 return status; 612 return status;
@@ -617,13 +618,13 @@ out:
617 */ 618 */
618static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) 619static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
619{ 620{
620 struct nfs4_delegation *dp = calldata; 621 struct nfsd4_callback *cb = calldata;
622 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
621 struct nfs4_client *clp = dp->dl_client; 623 struct nfs4_client *clp = dp->dl_client;
622 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; 624 u32 minorversion = clp->cl_minorversion;
623 u32 minorversion = clp->cl_cb_conn.cb_minorversion;
624 int status = 0; 625 int status = 0;
625 626
626 args->args_seq.cbs_minorversion = minorversion; 627 cb->cb_minorversion = minorversion;
627 if (minorversion) { 628 if (minorversion) {
628 status = nfsd41_cb_setup_sequence(clp, task); 629 status = nfsd41_cb_setup_sequence(clp, task);
629 if (status) { 630 if (status) {
@@ -640,19 +641,20 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
640 641
641static void nfsd4_cb_done(struct rpc_task *task, void *calldata) 642static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
642{ 643{
643 struct nfs4_delegation *dp = calldata; 644 struct nfsd4_callback *cb = calldata;
645 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
644 struct nfs4_client *clp = dp->dl_client; 646 struct nfs4_client *clp = dp->dl_client;
645 647
646 dprintk("%s: minorversion=%d\n", __func__, 648 dprintk("%s: minorversion=%d\n", __func__,
647 clp->cl_cb_conn.cb_minorversion); 649 clp->cl_minorversion);
648 650
649 if (clp->cl_cb_conn.cb_minorversion) { 651 if (clp->cl_minorversion) {
650 /* No need for lock, access serialized in nfsd4_cb_prepare */ 652 /* No need for lock, access serialized in nfsd4_cb_prepare */
651 ++clp->cl_cb_seq_nr; 653 ++clp->cl_cb_session->se_cb_seq_nr;
652 clear_bit(0, &clp->cl_cb_slot_busy); 654 clear_bit(0, &clp->cl_cb_slot_busy);
653 rpc_wake_up_next(&clp->cl_cb_waitq); 655 rpc_wake_up_next(&clp->cl_cb_waitq);
654 dprintk("%s: freed slot, new seqid=%d\n", __func__, 656 dprintk("%s: freed slot, new seqid=%d\n", __func__,
655 clp->cl_cb_seq_nr); 657 clp->cl_cb_session->se_cb_seq_nr);
656 658
657 /* We're done looking into the sequence information */ 659 /* We're done looking into the sequence information */
658 task->tk_msg.rpc_resp = NULL; 660 task->tk_msg.rpc_resp = NULL;
@@ -662,7 +664,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
662 664
663static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) 665static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
664{ 666{
665 struct nfs4_delegation *dp = calldata; 667 struct nfsd4_callback *cb = calldata;
668 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
666 struct nfs4_client *clp = dp->dl_client; 669 struct nfs4_client *clp = dp->dl_client;
667 struct rpc_clnt *current_rpc_client = clp->cl_cb_client; 670 struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
668 671
@@ -707,7 +710,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
707 710
708static void nfsd4_cb_recall_release(void *calldata) 711static void nfsd4_cb_recall_release(void *calldata)
709{ 712{
710 struct nfs4_delegation *dp = calldata; 713 struct nfsd4_callback *cb = calldata;
714 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
711 715
712 nfs4_put_delegation(dp); 716 nfs4_put_delegation(dp);
713} 717}
@@ -718,8 +722,6 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
718 .rpc_release = nfsd4_cb_recall_release, 722 .rpc_release = nfsd4_cb_recall_release,
719}; 723};
720 724
721static struct workqueue_struct *callback_wq;
722
723int nfsd4_create_callback_queue(void) 725int nfsd4_create_callback_queue(void)
724{ 726{
725 callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); 727 callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
@@ -734,57 +736,88 @@ void nfsd4_destroy_callback_queue(void)
734} 736}
735 737
736/* must be called under the state lock */ 738/* must be called under the state lock */
737void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new) 739void nfsd4_shutdown_callback(struct nfs4_client *clp)
738{ 740{
739 struct rpc_clnt *old = clp->cl_cb_client; 741 set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags);
740
741 clp->cl_cb_client = new;
742 /* 742 /*
743 * After this, any work that saw the old value of cl_cb_client will 743 * Note this won't actually result in a null callback;
744 * be gone: 744 * instead, nfsd4_do_callback_rpc() will detect the killed
745 * client, destroy the rpc client, and stop:
745 */ 746 */
747 do_probe_callback(clp);
746 flush_workqueue(callback_wq); 748 flush_workqueue(callback_wq);
747 /* So we can safely shut it down: */
748 if (old)
749 rpc_shutdown_client(old);
750} 749}
751 750
752/* 751void nfsd4_release_cb(struct nfsd4_callback *cb)
753 * called with dp->dl_count inc'ed.
754 */
755static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
756{ 752{
757 struct nfs4_client *clp = dp->dl_client; 753 if (cb->cb_ops->rpc_release)
758 struct rpc_clnt *clnt = clp->cl_cb_client; 754 cb->cb_ops->rpc_release(cb);
759 struct nfs4_rpc_args *args = &dp->dl_recall.cb_args; 755}
760 struct rpc_message msg = {
761 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
762 .rpc_cred = callback_cred
763 };
764 756
765 if (clnt == NULL) { 757void nfsd4_process_cb_update(struct nfsd4_callback *cb)
766 nfs4_put_delegation(dp); 758{
767 return; /* Client is shutting down; give up. */ 759 struct nfs4_cb_conn conn;
760 struct nfs4_client *clp = cb->cb_clp;
761 int err;
762
763 /*
764 * This is either an update, or the client dying; in either case,
765 * kill the old client:
766 */
767 if (clp->cl_cb_client) {
768 rpc_shutdown_client(clp->cl_cb_client);
769 clp->cl_cb_client = NULL;
768 } 770 }
771 if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags))
772 return;
773 spin_lock(&clp->cl_lock);
774 /*
775 * Only serialized callback code is allowed to clear these
776 * flags; main nfsd code can only set them:
777 */
778 BUG_ON(!clp->cl_cb_flags);
779 clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
780 memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
781 spin_unlock(&clp->cl_lock);
769 782
770 args->args_op = dp; 783 err = setup_callback_client(clp, &conn);
771 msg.rpc_argp = args; 784 if (err)
772 dp->dl_retries = 1; 785 warn_no_callback_path(clp, err);
773 rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp);
774} 786}
775 787
776void nfsd4_do_callback_rpc(struct work_struct *w) 788void nfsd4_do_callback_rpc(struct work_struct *w)
777{ 789{
778 /* XXX: for now, just send off delegation recall. */ 790 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
779 /* In future, generalize to handle any sort of callback. */ 791 struct nfs4_client *clp = cb->cb_clp;
780 struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work); 792 struct rpc_clnt *clnt;
781 struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
782 793
783 _nfsd4_cb_recall(dp); 794 if (clp->cl_cb_flags)
784} 795 nfsd4_process_cb_update(cb);
785 796
797 clnt = clp->cl_cb_client;
798 if (!clnt) {
799 /* Callback channel broken, or client killed; give up: */
800 nfsd4_release_cb(cb);
801 return;
802 }
803 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
804 cb->cb_ops, cb);
805}
786 806
787void nfsd4_cb_recall(struct nfs4_delegation *dp) 807void nfsd4_cb_recall(struct nfs4_delegation *dp)
788{ 808{
809 struct nfsd4_callback *cb = &dp->dl_recall;
810
811 dp->dl_retries = 1;
812 cb->cb_op = dp;
813 cb->cb_clp = dp->dl_client;
814 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL];
815 cb->cb_msg.rpc_argp = cb;
816 cb->cb_msg.rpc_resp = cb;
817 cb->cb_msg.rpc_cred = callback_cred;
818
819 cb->cb_ops = &nfsd4_cb_recall_ops;
820 dp->dl_retries = 1;
821
789 queue_work(callback_wq, &dp->dl_recall.cb_work); 822 queue_work(callback_wq, &dp->dl_recall.cb_work);
790} 823}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index c78dbf493424..f0695e815f0e 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -482,109 +482,26 @@ nfsd_idmap_shutdown(void)
482 cache_unregister(&nametoid_cache); 482 cache_unregister(&nametoid_cache);
483} 483}
484 484
485/*
486 * Deferred request handling
487 */
488
489struct idmap_defer_req {
490 struct cache_req req;
491 struct cache_deferred_req deferred_req;
492 wait_queue_head_t waitq;
493 atomic_t count;
494};
495
496static inline void
497put_mdr(struct idmap_defer_req *mdr)
498{
499 if (atomic_dec_and_test(&mdr->count))
500 kfree(mdr);
501}
502
503static inline void
504get_mdr(struct idmap_defer_req *mdr)
505{
506 atomic_inc(&mdr->count);
507}
508
509static void
510idmap_revisit(struct cache_deferred_req *dreq, int toomany)
511{
512 struct idmap_defer_req *mdr =
513 container_of(dreq, struct idmap_defer_req, deferred_req);
514
515 wake_up(&mdr->waitq);
516 put_mdr(mdr);
517}
518
519static struct cache_deferred_req *
520idmap_defer(struct cache_req *req)
521{
522 struct idmap_defer_req *mdr =
523 container_of(req, struct idmap_defer_req, req);
524
525 mdr->deferred_req.revisit = idmap_revisit;
526 get_mdr(mdr);
527 return (&mdr->deferred_req);
528}
529
530static inline int
531do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key,
532 struct cache_detail *detail, struct ent **item,
533 struct idmap_defer_req *mdr)
534{
535 *item = lookup_fn(key);
536 if (!*item)
537 return -ENOMEM;
538 return cache_check(detail, &(*item)->h, &mdr->req);
539}
540
541static inline int
542do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
543 struct ent *key, struct cache_detail *detail,
544 struct ent **item)
545{
546 int ret = -ENOMEM;
547
548 *item = lookup_fn(key);
549 if (!*item)
550 goto out_err;
551 ret = -ETIMEDOUT;
552 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
553 || (*item)->h.expiry_time < get_seconds()
554 || detail->flush_time > (*item)->h.last_refresh)
555 goto out_put;
556 ret = -ENOENT;
557 if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
558 goto out_put;
559 return 0;
560out_put:
561 cache_put(&(*item)->h, detail);
562out_err:
563 *item = NULL;
564 return ret;
565}
566
567static int 485static int
568idmap_lookup(struct svc_rqst *rqstp, 486idmap_lookup(struct svc_rqst *rqstp,
569 struct ent *(*lookup_fn)(struct ent *), struct ent *key, 487 struct ent *(*lookup_fn)(struct ent *), struct ent *key,
570 struct cache_detail *detail, struct ent **item) 488 struct cache_detail *detail, struct ent **item)
571{ 489{
572 struct idmap_defer_req *mdr;
573 int ret; 490 int ret;
574 491
575 mdr = kzalloc(sizeof(*mdr), GFP_KERNEL); 492 *item = lookup_fn(key);
576 if (!mdr) 493 if (!*item)
577 return -ENOMEM; 494 return -ENOMEM;
578 atomic_set(&mdr->count, 1); 495 retry:
579 init_waitqueue_head(&mdr->waitq); 496 ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle);
580 mdr->req.defer = idmap_defer; 497
581 ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr); 498 if (ret == -ETIMEDOUT) {
582 if (ret == -EAGAIN) { 499 struct ent *prev_item = *item;
583 wait_event_interruptible_timeout(mdr->waitq, 500 *item = lookup_fn(key);
584 test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ); 501 if (*item != prev_item)
585 ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item); 502 goto retry;
503 cache_put(&(*item)->h, detail);
586 } 504 }
587 put_mdr(mdr);
588 return ret; 505 return ret;
589} 506}
590 507
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 59ec449b0c7f..0cdfd022bb7b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1031,8 +1031,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1031 resp->cstate.session = NULL; 1031 resp->cstate.session = NULL;
1032 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); 1032 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
1033 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); 1033 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
1034 /* Use the deferral mechanism only for NFSv4.0 compounds */ 1034 /*
1035 rqstp->rq_usedeferral = (args->minorversion == 0); 1035 * Don't use the deferral mechanism for NFSv4; compounds make it
1036 * too hard to avoid non-idempotency problems.
1037 */
1038 rqstp->rq_usedeferral = 0;
1036 1039
1037 /* 1040 /*
1038 * According to RFC3010, this takes precedence over all other errors. 1041 * According to RFC3010, this takes precedence over all other errors.
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a7292fcf7718..9019e8ec9dc8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -207,7 +207,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
207{ 207{
208 struct nfs4_delegation *dp; 208 struct nfs4_delegation *dp;
209 struct nfs4_file *fp = stp->st_file; 209 struct nfs4_file *fp = stp->st_file;
210 struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn;
211 210
212 dprintk("NFSD alloc_init_deleg\n"); 211 dprintk("NFSD alloc_init_deleg\n");
213 /* 212 /*
@@ -234,7 +233,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
234 nfs4_file_get_access(fp, O_RDONLY); 233 nfs4_file_get_access(fp, O_RDONLY);
235 dp->dl_flock = NULL; 234 dp->dl_flock = NULL;
236 dp->dl_type = type; 235 dp->dl_type = type;
237 dp->dl_ident = cb->cb_ident;
238 dp->dl_stateid.si_boot = boot_time; 236 dp->dl_stateid.si_boot = boot_time;
239 dp->dl_stateid.si_stateownerid = current_delegid++; 237 dp->dl_stateid.si_stateownerid = current_delegid++;
240 dp->dl_stateid.si_fileid = 0; 238 dp->dl_stateid.si_fileid = 0;
@@ -535,171 +533,258 @@ gen_sessionid(struct nfsd4_session *ses)
535 */ 533 */
536#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) 534#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
537 535
536static void
537free_session_slots(struct nfsd4_session *ses)
538{
539 int i;
540
541 for (i = 0; i < ses->se_fchannel.maxreqs; i++)
542 kfree(ses->se_slots[i]);
543}
544
538/* 545/*
539 * Give the client the number of ca_maxresponsesize_cached slots it 546 * We don't actually need to cache the rpc and session headers, so we
540 * requests, of size bounded by NFSD_SLOT_CACHE_SIZE, 547 * can allocate a little less for each slot:
541 * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more 548 */
542 * than NFSD_MAX_SLOTS_PER_SESSION. 549static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
543 * 550{
544 * If we run out of reserved DRC memory we should (up to a point) 551 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
552}
553
554static int nfsd4_sanitize_slot_size(u32 size)
555{
556 size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */
557 size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE);
558
559 return size;
560}
561
562/*
563 * XXX: If we run out of reserved DRC memory we could (up to a point)
545 * re-negotiate active sessions and reduce their slot usage to make 564 * re-negotiate active sessions and reduce their slot usage to make
546 * rooom for new connections. For now we just fail the create session. 565 * rooom for new connections. For now we just fail the create session.
547 */ 566 */
548static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) 567static int nfsd4_get_drc_mem(int slotsize, u32 num)
549{ 568{
550 int mem, size = fchan->maxresp_cached; 569 int avail;
551 570
552 if (fchan->maxreqs < 1) 571 num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
553 return nfserr_inval;
554 572
555 if (size < NFSD_MIN_HDR_SEQ_SZ) 573 spin_lock(&nfsd_drc_lock);
556 size = NFSD_MIN_HDR_SEQ_SZ; 574 avail = min_t(int, NFSD_MAX_MEM_PER_SESSION,
557 size -= NFSD_MIN_HDR_SEQ_SZ; 575 nfsd_drc_max_mem - nfsd_drc_mem_used);
558 if (size > NFSD_SLOT_CACHE_SIZE) 576 num = min_t(int, num, avail / slotsize);
559 size = NFSD_SLOT_CACHE_SIZE; 577 nfsd_drc_mem_used += num * slotsize;
560 578 spin_unlock(&nfsd_drc_lock);
561 /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
562 mem = fchan->maxreqs * size;
563 if (mem > NFSD_MAX_MEM_PER_SESSION) {
564 fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
565 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
566 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
567 mem = fchan->maxreqs * size;
568 }
569 579
580 return num;
581}
582
583static void nfsd4_put_drc_mem(int slotsize, int num)
584{
570 spin_lock(&nfsd_drc_lock); 585 spin_lock(&nfsd_drc_lock);
571 /* bound the total session drc memory ussage */ 586 nfsd_drc_mem_used -= slotsize * num;
572 if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
573 fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
574 mem = fchan->maxreqs * size;
575 }
576 nfsd_drc_mem_used += mem;
577 spin_unlock(&nfsd_drc_lock); 587 spin_unlock(&nfsd_drc_lock);
588}
578 589
579 if (fchan->maxreqs == 0) 590static struct nfsd4_session *alloc_session(int slotsize, int numslots)
580 return nfserr_jukebox; 591{
592 struct nfsd4_session *new;
593 int mem, i;
581 594
582 fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; 595 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
583 return 0; 596 + sizeof(struct nfsd4_session) > PAGE_SIZE);
597 mem = numslots * sizeof(struct nfsd4_slot *);
598
599 new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
600 if (!new)
601 return NULL;
602 /* allocate each struct nfsd4_slot and data cache in one piece */
603 for (i = 0; i < numslots; i++) {
604 mem = sizeof(struct nfsd4_slot) + slotsize;
605 new->se_slots[i] = kzalloc(mem, GFP_KERNEL);
606 if (!new->se_slots[i])
607 goto out_free;
608 }
609 return new;
610out_free:
611 while (i--)
612 kfree(new->se_slots[i]);
613 kfree(new);
614 return NULL;
584} 615}
585 616
586/* 617static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize)
587 * fchan holds the client values on input, and the server values on output
588 * sv_max_mesg is the maximum payload plus one page for overhead.
589 */
590static int init_forechannel_attrs(struct svc_rqst *rqstp,
591 struct nfsd4_channel_attrs *session_fchan,
592 struct nfsd4_channel_attrs *fchan)
593{ 618{
594 int status = 0; 619 u32 maxrpc = nfsd_serv->sv_max_mesg;
595 __u32 maxcount = nfsd_serv->sv_max_mesg;
596 620
597 /* headerpadsz set to zero in encode routine */ 621 new->maxreqs = numslots;
622 new->maxresp_cached = slotsize + NFSD_MIN_HDR_SEQ_SZ;
623 new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc);
624 new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc);
625 new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND);
626}
598 627
599 /* Use the client's max request and max response size if possible */ 628static void free_conn(struct nfsd4_conn *c)
600 if (fchan->maxreq_sz > maxcount) 629{
601 fchan->maxreq_sz = maxcount; 630 svc_xprt_put(c->cn_xprt);
602 session_fchan->maxreq_sz = fchan->maxreq_sz; 631 kfree(c);
632}
603 633
604 if (fchan->maxresp_sz > maxcount) 634static void nfsd4_conn_lost(struct svc_xpt_user *u)
605 fchan->maxresp_sz = maxcount; 635{
606 session_fchan->maxresp_sz = fchan->maxresp_sz; 636 struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
637 struct nfs4_client *clp = c->cn_session->se_client;
607 638
608 /* Use the client's maxops if possible */ 639 spin_lock(&clp->cl_lock);
609 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) 640 if (!list_empty(&c->cn_persession)) {
610 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; 641 list_del(&c->cn_persession);
611 session_fchan->maxops = fchan->maxops; 642 free_conn(c);
643 }
644 spin_unlock(&clp->cl_lock);
645}
612 646
613 /* FIXME: Error means no more DRC pages so the server should 647static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags)
614 * recover pages from existing sessions. For now fail session 648{
615 * creation. 649 struct nfsd4_conn *conn;
616 */
617 status = set_forechannel_drc_size(fchan);
618 650
619 session_fchan->maxresp_cached = fchan->maxresp_cached; 651 conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL);
620 session_fchan->maxreqs = fchan->maxreqs; 652 if (!conn)
653 return NULL;
654 svc_xprt_get(rqstp->rq_xprt);
655 conn->cn_xprt = rqstp->rq_xprt;
656 conn->cn_flags = flags;
657 INIT_LIST_HEAD(&conn->cn_xpt_user.list);
658 return conn;
659}
621 660
622 dprintk("%s status %d\n", __func__, status); 661static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
623 return status; 662{
663 conn->cn_session = ses;
664 list_add(&conn->cn_persession, &ses->se_conns);
624} 665}
625 666
626static void 667static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
627free_session_slots(struct nfsd4_session *ses)
628{ 668{
629 int i; 669 struct nfs4_client *clp = ses->se_client;
630 670
631 for (i = 0; i < ses->se_fchannel.maxreqs; i++) 671 spin_lock(&clp->cl_lock);
632 kfree(ses->se_slots[i]); 672 __nfsd4_hash_conn(conn, ses);
673 spin_unlock(&clp->cl_lock);
633} 674}
634 675
635/* 676static void nfsd4_register_conn(struct nfsd4_conn *conn)
636 * We don't actually need to cache the rpc and session headers, so we
637 * can allocate a little less for each slot:
638 */
639static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
640{ 677{
641 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; 678 conn->cn_xpt_user.callback = nfsd4_conn_lost;
679 register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
642} 680}
643 681
644static int 682static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
645alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
646 struct nfsd4_create_session *cses)
647{ 683{
648 struct nfsd4_session *new, tmp; 684 struct nfsd4_conn *conn;
649 struct nfsd4_slot *sp; 685 u32 flags = NFS4_CDFC4_FORE;
650 int idx, slotsize, cachesize, i;
651 int status;
652 686
653 memset(&tmp, 0, sizeof(tmp)); 687 if (ses->se_flags & SESSION4_BACK_CHAN)
688 flags |= NFS4_CDFC4_BACK;
689 conn = alloc_conn(rqstp, flags);
690 if (!conn)
691 return nfserr_jukebox;
692 nfsd4_hash_conn(conn, ses);
693 nfsd4_register_conn(conn);
694 return nfs_ok;
695}
654 696
655 /* FIXME: For now, we just accept the client back channel attributes. */ 697static void nfsd4_del_conns(struct nfsd4_session *s)
656 tmp.se_bchannel = cses->back_channel; 698{
657 status = init_forechannel_attrs(rqstp, &tmp.se_fchannel, 699 struct nfs4_client *clp = s->se_client;
658 &cses->fore_channel); 700 struct nfsd4_conn *c;
659 if (status)
660 goto out;
661 701
662 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) 702 spin_lock(&clp->cl_lock);
663 + sizeof(struct nfsd4_session) > PAGE_SIZE); 703 while (!list_empty(&s->se_conns)) {
704 c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession);
705 list_del_init(&c->cn_persession);
706 spin_unlock(&clp->cl_lock);
664 707
665 status = nfserr_jukebox; 708 unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user);
666 /* allocate struct nfsd4_session and slot table pointers in one piece */ 709 free_conn(c);
667 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
668 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
669 if (!new)
670 goto out;
671 710
672 memcpy(new, &tmp, sizeof(*new)); 711 spin_lock(&clp->cl_lock);
712 }
713 spin_unlock(&clp->cl_lock);
714}
673 715
674 /* allocate each struct nfsd4_slot and data cache in one piece */ 716void free_session(struct kref *kref)
675 cachesize = slot_bytes(&new->se_fchannel); 717{
676 for (i = 0; i < new->se_fchannel.maxreqs; i++) { 718 struct nfsd4_session *ses;
677 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); 719 int mem;
678 if (!sp) 720
679 goto out_free; 721 ses = container_of(kref, struct nfsd4_session, se_ref);
680 new->se_slots[i] = sp; 722 nfsd4_del_conns(ses);
723 spin_lock(&nfsd_drc_lock);
724 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
725 nfsd_drc_mem_used -= mem;
726 spin_unlock(&nfsd_drc_lock);
727 free_session_slots(ses);
728 kfree(ses);
729}
730
731static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses)
732{
733 struct nfsd4_session *new;
734 struct nfsd4_channel_attrs *fchan = &cses->fore_channel;
735 int numslots, slotsize;
736 int status;
737 int idx;
738
739 /*
740 * Note decreasing slot size below client's request may
741 * make it difficult for client to function correctly, whereas
742 * decreasing the number of slots will (just?) affect
743 * performance. When short on memory we therefore prefer to
744 * decrease number of slots instead of their size.
745 */
746 slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached);
747 numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs);
748
749 new = alloc_session(slotsize, numslots);
750 if (!new) {
751 nfsd4_put_drc_mem(slotsize, fchan->maxreqs);
752 return NULL;
681 } 753 }
754 init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize);
682 755
683 new->se_client = clp; 756 new->se_client = clp;
684 gen_sessionid(new); 757 gen_sessionid(new);
685 idx = hash_sessionid(&new->se_sessionid);
686 memcpy(clp->cl_sessionid.data, new->se_sessionid.data,
687 NFS4_MAX_SESSIONID_LEN);
688 758
759 INIT_LIST_HEAD(&new->se_conns);
760
761 new->se_cb_seq_nr = 1;
689 new->se_flags = cses->flags; 762 new->se_flags = cses->flags;
763 new->se_cb_prog = cses->callback_prog;
690 kref_init(&new->se_ref); 764 kref_init(&new->se_ref);
765 idx = hash_sessionid(&new->se_sessionid);
691 spin_lock(&client_lock); 766 spin_lock(&client_lock);
692 list_add(&new->se_hash, &sessionid_hashtbl[idx]); 767 list_add(&new->se_hash, &sessionid_hashtbl[idx]);
693 list_add(&new->se_perclnt, &clp->cl_sessions); 768 list_add(&new->se_perclnt, &clp->cl_sessions);
694 spin_unlock(&client_lock); 769 spin_unlock(&client_lock);
695 770
696 status = nfs_ok; 771 status = nfsd4_new_conn(rqstp, new);
697out: 772 /* whoops: benny points out, status is ignored! (err, or bogus) */
698 return status; 773 if (status) {
699out_free: 774 free_session(&new->se_ref);
700 free_session_slots(new); 775 return NULL;
701 kfree(new); 776 }
702 goto out; 777 if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) {
778 struct sockaddr *sa = svc_addr(rqstp);
779
780 clp->cl_cb_session = new;
781 clp->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
782 svc_xprt_get(rqstp->rq_xprt);
783 rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
784 clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
785 nfsd4_probe_callback(clp);
786 }
787 return new;
703} 788}
704 789
705/* caller must hold client_lock */ 790/* caller must hold client_lock */
@@ -731,21 +816,6 @@ unhash_session(struct nfsd4_session *ses)
731 list_del(&ses->se_perclnt); 816 list_del(&ses->se_perclnt);
732} 817}
733 818
734void
735free_session(struct kref *kref)
736{
737 struct nfsd4_session *ses;
738 int mem;
739
740 ses = container_of(kref, struct nfsd4_session, se_ref);
741 spin_lock(&nfsd_drc_lock);
742 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
743 nfsd_drc_mem_used -= mem;
744 spin_unlock(&nfsd_drc_lock);
745 free_session_slots(ses);
746 kfree(ses);
747}
748
749/* must be called under the client_lock */ 819/* must be called under the client_lock */
750static inline void 820static inline void
751renew_client_locked(struct nfs4_client *clp) 821renew_client_locked(struct nfs4_client *clp)
@@ -812,6 +882,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
812static inline void 882static inline void
813free_client(struct nfs4_client *clp) 883free_client(struct nfs4_client *clp)
814{ 884{
885 while (!list_empty(&clp->cl_sessions)) {
886 struct nfsd4_session *ses;
887 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
888 se_perclnt);
889 list_del(&ses->se_perclnt);
890 nfsd4_put_session(ses);
891 }
815 if (clp->cl_cred.cr_group_info) 892 if (clp->cl_cred.cr_group_info)
816 put_group_info(clp->cl_cred.cr_group_info); 893 put_group_info(clp->cl_cred.cr_group_info);
817 kfree(clp->cl_principal); 894 kfree(clp->cl_principal);
@@ -838,15 +915,12 @@ release_session_client(struct nfsd4_session *session)
838static inline void 915static inline void
839unhash_client_locked(struct nfs4_client *clp) 916unhash_client_locked(struct nfs4_client *clp)
840{ 917{
918 struct nfsd4_session *ses;
919
841 mark_client_expired(clp); 920 mark_client_expired(clp);
842 list_del(&clp->cl_lru); 921 list_del(&clp->cl_lru);
843 while (!list_empty(&clp->cl_sessions)) { 922 list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
844 struct nfsd4_session *ses; 923 list_del_init(&ses->se_hash);
845 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
846 se_perclnt);
847 unhash_session(ses);
848 nfsd4_put_session(ses);
849 }
850} 924}
851 925
852static void 926static void
@@ -875,7 +949,7 @@ expire_client(struct nfs4_client *clp)
875 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); 949 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
876 release_openowner(sop); 950 release_openowner(sop);
877 } 951 }
878 nfsd4_set_callback_client(clp, NULL); 952 nfsd4_shutdown_callback(clp);
879 if (clp->cl_cb_conn.cb_xprt) 953 if (clp->cl_cb_conn.cb_xprt)
880 svc_xprt_put(clp->cl_cb_conn.cb_xprt); 954 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
881 list_del(&clp->cl_idhash); 955 list_del(&clp->cl_idhash);
@@ -960,6 +1034,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
960 if (clp == NULL) 1034 if (clp == NULL)
961 return NULL; 1035 return NULL;
962 1036
1037 INIT_LIST_HEAD(&clp->cl_sessions);
1038
963 princ = svc_gss_principal(rqstp); 1039 princ = svc_gss_principal(rqstp);
964 if (princ) { 1040 if (princ) {
965 clp->cl_principal = kstrdup(princ, GFP_KERNEL); 1041 clp->cl_principal = kstrdup(princ, GFP_KERNEL);
@@ -976,8 +1052,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
976 INIT_LIST_HEAD(&clp->cl_strhash); 1052 INIT_LIST_HEAD(&clp->cl_strhash);
977 INIT_LIST_HEAD(&clp->cl_openowners); 1053 INIT_LIST_HEAD(&clp->cl_openowners);
978 INIT_LIST_HEAD(&clp->cl_delegations); 1054 INIT_LIST_HEAD(&clp->cl_delegations);
979 INIT_LIST_HEAD(&clp->cl_sessions);
980 INIT_LIST_HEAD(&clp->cl_lru); 1055 INIT_LIST_HEAD(&clp->cl_lru);
1056 spin_lock_init(&clp->cl_lock);
1057 INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc);
981 clp->cl_time = get_seconds(); 1058 clp->cl_time = get_seconds();
982 clear_bit(0, &clp->cl_cb_slot_busy); 1059 clear_bit(0, &clp->cl_cb_slot_busy);
983 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 1060 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
@@ -986,7 +1063,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
986 clp->cl_flavor = rqstp->rq_flavor; 1063 clp->cl_flavor = rqstp->rq_flavor;
987 copy_cred(&clp->cl_cred, &rqstp->rq_cred); 1064 copy_cred(&clp->cl_cred, &rqstp->rq_cred);
988 gen_confirm(clp); 1065 gen_confirm(clp);
989 1066 clp->cl_cb_session = NULL;
990 return clp; 1067 return clp;
991} 1068}
992 1069
@@ -1098,7 +1175,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
1098static void 1175static void
1099gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) 1176gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
1100{ 1177{
1101 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 1178 struct nfs4_cb_conn *conn = &clp->cl_cb_conn;
1102 unsigned short expected_family; 1179 unsigned short expected_family;
1103 1180
1104 /* Currently, we only support tcp and tcp6 for the callback channel */ 1181 /* Currently, we only support tcp and tcp6 for the callback channel */
@@ -1111,24 +1188,23 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
1111 else 1188 else
1112 goto out_err; 1189 goto out_err;
1113 1190
1114 cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, 1191 conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
1115 se->se_callback_addr_len, 1192 se->se_callback_addr_len,
1116 (struct sockaddr *) &cb->cb_addr, 1193 (struct sockaddr *)&conn->cb_addr,
1117 sizeof(cb->cb_addr)); 1194 sizeof(conn->cb_addr));
1118 1195
1119 if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) 1196 if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family)
1120 goto out_err; 1197 goto out_err;
1121 1198
1122 if (cb->cb_addr.ss_family == AF_INET6) 1199 if (conn->cb_addr.ss_family == AF_INET6)
1123 ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; 1200 ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid;
1124 1201
1125 cb->cb_minorversion = 0; 1202 conn->cb_prog = se->se_callback_prog;
1126 cb->cb_prog = se->se_callback_prog; 1203 conn->cb_ident = se->se_callback_ident;
1127 cb->cb_ident = se->se_callback_ident;
1128 return; 1204 return;
1129out_err: 1205out_err:
1130 cb->cb_addr.ss_family = AF_UNSPEC; 1206 conn->cb_addr.ss_family = AF_UNSPEC;
1131 cb->cb_addrlen = 0; 1207 conn->cb_addrlen = 0;
1132 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " 1208 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
1133 "will not receive delegations\n", 1209 "will not receive delegations\n",
1134 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 1210 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
@@ -1415,7 +1491,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1415{ 1491{
1416 struct sockaddr *sa = svc_addr(rqstp); 1492 struct sockaddr *sa = svc_addr(rqstp);
1417 struct nfs4_client *conf, *unconf; 1493 struct nfs4_client *conf, *unconf;
1494 struct nfsd4_session *new;
1418 struct nfsd4_clid_slot *cs_slot = NULL; 1495 struct nfsd4_clid_slot *cs_slot = NULL;
1496 bool confirm_me = false;
1419 int status = 0; 1497 int status = 0;
1420 1498
1421 nfs4_lock_state(); 1499 nfs4_lock_state();
@@ -1438,7 +1516,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1438 cs_slot->sl_seqid, cr_ses->seqid); 1516 cs_slot->sl_seqid, cr_ses->seqid);
1439 goto out; 1517 goto out;
1440 } 1518 }
1441 cs_slot->sl_seqid++;
1442 } else if (unconf) { 1519 } else if (unconf) {
1443 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || 1520 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
1444 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { 1521 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
@@ -1451,25 +1528,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1451 if (status) { 1528 if (status) {
1452 /* an unconfirmed replay returns misordered */ 1529 /* an unconfirmed replay returns misordered */
1453 status = nfserr_seq_misordered; 1530 status = nfserr_seq_misordered;
1454 goto out_cache; 1531 goto out;
1455 } 1532 }
1456 1533
1457 cs_slot->sl_seqid++; /* from 0 to 1 */ 1534 confirm_me = true;
1458 move_to_confirmed(unconf);
1459
1460 if (cr_ses->flags & SESSION4_BACK_CHAN) {
1461 unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
1462 svc_xprt_get(rqstp->rq_xprt);
1463 rpc_copy_addr(
1464 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
1465 sa);
1466 unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
1467 unconf->cl_cb_conn.cb_minorversion =
1468 cstate->minorversion;
1469 unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
1470 unconf->cl_cb_seq_nr = 1;
1471 nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
1472 }
1473 conf = unconf; 1535 conf = unconf;
1474 } else { 1536 } else {
1475 status = nfserr_stale_clientid; 1537 status = nfserr_stale_clientid;
@@ -1477,22 +1539,30 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1477 } 1539 }
1478 1540
1479 /* 1541 /*
1542 * XXX: we should probably set this at creation time, and check
1543 * for consistent minorversion use throughout:
1544 */
1545 conf->cl_minorversion = 1;
1546 /*
1480 * We do not support RDMA or persistent sessions 1547 * We do not support RDMA or persistent sessions
1481 */ 1548 */
1482 cr_ses->flags &= ~SESSION4_PERSIST; 1549 cr_ses->flags &= ~SESSION4_PERSIST;
1483 cr_ses->flags &= ~SESSION4_RDMA; 1550 cr_ses->flags &= ~SESSION4_RDMA;
1484 1551
1485 status = alloc_init_session(rqstp, conf, cr_ses); 1552 status = nfserr_jukebox;
1486 if (status) 1553 new = alloc_init_session(rqstp, conf, cr_ses);
1554 if (!new)
1487 goto out; 1555 goto out;
1488 1556 status = nfs_ok;
1489 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, 1557 memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
1490 NFS4_MAX_SESSIONID_LEN); 1558 NFS4_MAX_SESSIONID_LEN);
1559 cs_slot->sl_seqid++;
1491 cr_ses->seqid = cs_slot->sl_seqid; 1560 cr_ses->seqid = cs_slot->sl_seqid;
1492 1561
1493out_cache:
1494 /* cache solo and embedded create sessions under the state lock */ 1562 /* cache solo and embedded create sessions under the state lock */
1495 nfsd4_cache_create_session(cr_ses, cs_slot, status); 1563 nfsd4_cache_create_session(cr_ses, cs_slot, status);
1564 if (confirm_me)
1565 move_to_confirmed(conf);
1496out: 1566out:
1497 nfs4_unlock_state(); 1567 nfs4_unlock_state();
1498 dprintk("%s returns %d\n", __func__, ntohl(status)); 1568 dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -1546,8 +1616,11 @@ nfsd4_destroy_session(struct svc_rqst *r,
1546 1616
1547 nfs4_lock_state(); 1617 nfs4_lock_state();
1548 /* wait for callbacks */ 1618 /* wait for callbacks */
1549 nfsd4_set_callback_client(ses->se_client, NULL); 1619 nfsd4_shutdown_callback(ses->se_client);
1550 nfs4_unlock_state(); 1620 nfs4_unlock_state();
1621
1622 nfsd4_del_conns(ses);
1623
1551 nfsd4_put_session(ses); 1624 nfsd4_put_session(ses);
1552 status = nfs_ok; 1625 status = nfs_ok;
1553out: 1626out:
@@ -1555,6 +1628,36 @@ out:
1555 return status; 1628 return status;
1556} 1629}
1557 1630
1631static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
1632{
1633 struct nfsd4_conn *c;
1634
1635 list_for_each_entry(c, &s->se_conns, cn_persession) {
1636 if (c->cn_xprt == xpt) {
1637 return c;
1638 }
1639 }
1640 return NULL;
1641}
1642
1643static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
1644{
1645 struct nfs4_client *clp = ses->se_client;
1646 struct nfsd4_conn *c;
1647
1648 spin_lock(&clp->cl_lock);
1649 c = __nfsd4_find_conn(new->cn_xprt, ses);
1650 if (c) {
1651 spin_unlock(&clp->cl_lock);
1652 free_conn(new);
1653 return;
1654 }
1655 __nfsd4_hash_conn(new, ses);
1656 spin_unlock(&clp->cl_lock);
1657 nfsd4_register_conn(new);
1658 return;
1659}
1660
1558__be32 1661__be32
1559nfsd4_sequence(struct svc_rqst *rqstp, 1662nfsd4_sequence(struct svc_rqst *rqstp,
1560 struct nfsd4_compound_state *cstate, 1663 struct nfsd4_compound_state *cstate,
@@ -1563,11 +1666,20 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1563 struct nfsd4_compoundres *resp = rqstp->rq_resp; 1666 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1564 struct nfsd4_session *session; 1667 struct nfsd4_session *session;
1565 struct nfsd4_slot *slot; 1668 struct nfsd4_slot *slot;
1669 struct nfsd4_conn *conn;
1566 int status; 1670 int status;
1567 1671
1568 if (resp->opcnt != 1) 1672 if (resp->opcnt != 1)
1569 return nfserr_sequence_pos; 1673 return nfserr_sequence_pos;
1570 1674
1675 /*
1676 * Will be either used or freed by nfsd4_sequence_check_conn
1677 * below.
1678 */
1679 conn = alloc_conn(rqstp, NFS4_CDFC4_FORE);
1680 if (!conn)
1681 return nfserr_jukebox;
1682
1571 spin_lock(&client_lock); 1683 spin_lock(&client_lock);
1572 status = nfserr_badsession; 1684 status = nfserr_badsession;
1573 session = find_in_sessionid_hashtbl(&seq->sessionid); 1685 session = find_in_sessionid_hashtbl(&seq->sessionid);
@@ -1599,6 +1711,9 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1599 if (status) 1711 if (status)
1600 goto out; 1712 goto out;
1601 1713
1714 nfsd4_sequence_check_conn(conn, session);
1715 conn = NULL;
1716
1602 /* Success! bump slot seqid */ 1717 /* Success! bump slot seqid */
1603 slot->sl_inuse = true; 1718 slot->sl_inuse = true;
1604 slot->sl_seqid = seq->seqid; 1719 slot->sl_seqid = seq->seqid;
@@ -1613,6 +1728,7 @@ out:
1613 nfsd4_get_session(cstate->session); 1728 nfsd4_get_session(cstate->session);
1614 atomic_inc(&session->se_client->cl_refcount); 1729 atomic_inc(&session->se_client->cl_refcount);
1615 } 1730 }
1731 kfree(conn);
1616 spin_unlock(&client_lock); 1732 spin_unlock(&client_lock);
1617 dprintk("%s: return %d\n", __func__, ntohl(status)); 1733 dprintk("%s: return %d\n", __func__, ntohl(status));
1618 return status; 1734 return status;
@@ -1747,6 +1863,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1747 goto out; 1863 goto out;
1748 gen_clid(new); 1864 gen_clid(new);
1749 } 1865 }
1866 /*
1867 * XXX: we should probably set this at creation time, and check
1868 * for consistent minorversion use throughout:
1869 */
1870 new->cl_minorversion = 0;
1750 gen_callback(new, setclid, rpc_get_scope_id(sa)); 1871 gen_callback(new, setclid, rpc_get_scope_id(sa));
1751 add_to_unconfirmed(new, strhashval); 1872 add_to_unconfirmed(new, strhashval);
1752 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; 1873 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
@@ -1807,7 +1928,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1807 status = nfserr_clid_inuse; 1928 status = nfserr_clid_inuse;
1808 else { 1929 else {
1809 atomic_set(&conf->cl_cb_set, 0); 1930 atomic_set(&conf->cl_cb_set, 0);
1810 nfsd4_probe_callback(conf, &unconf->cl_cb_conn); 1931 nfsd4_change_callback(conf, &unconf->cl_cb_conn);
1932 nfsd4_probe_callback(conf);
1811 expire_client(unconf); 1933 expire_client(unconf);
1812 status = nfs_ok; 1934 status = nfs_ok;
1813 1935
@@ -1841,7 +1963,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1841 } 1963 }
1842 move_to_confirmed(unconf); 1964 move_to_confirmed(unconf);
1843 conf = unconf; 1965 conf = unconf;
1844 nfsd4_probe_callback(conf, &conf->cl_cb_conn); 1966 nfsd4_probe_callback(conf);
1845 status = nfs_ok; 1967 status = nfs_ok;
1846 } 1968 }
1847 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) 1969 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
@@ -2944,7 +3066,11 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2944 if (STALE_STATEID(stateid)) 3066 if (STALE_STATEID(stateid))
2945 goto out; 3067 goto out;
2946 3068
2947 status = nfserr_bad_stateid; 3069 /*
3070 * We assume that any stateid that has the current boot time,
3071 * but that we can't find, is expired:
3072 */
3073 status = nfserr_expired;
2948 if (is_delegation_stateid(stateid)) { 3074 if (is_delegation_stateid(stateid)) {
2949 dp = find_delegation_stateid(ino, stateid); 3075 dp = find_delegation_stateid(ino, stateid);
2950 if (!dp) 3076 if (!dp)
@@ -2964,6 +3090,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2964 stp = find_stateid(stateid, flags); 3090 stp = find_stateid(stateid, flags);
2965 if (!stp) 3091 if (!stp)
2966 goto out; 3092 goto out;
3093 status = nfserr_bad_stateid;
2967 if (nfs4_check_fh(current_fh, stp)) 3094 if (nfs4_check_fh(current_fh, stp))
2968 goto out; 3095 goto out;
2969 if (!stp->st_stateowner->so_confirmed) 3096 if (!stp->st_stateowner->so_confirmed)
@@ -3038,8 +3165,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
3038 * a replayed close: 3165 * a replayed close:
3039 */ 3166 */
3040 sop = search_close_lru(stateid->si_stateownerid, flags); 3167 sop = search_close_lru(stateid->si_stateownerid, flags);
3168 /* It's not stale; let's assume it's expired: */
3041 if (sop == NULL) 3169 if (sop == NULL)
3042 return nfserr_bad_stateid; 3170 return nfserr_expired;
3043 *sopp = sop; 3171 *sopp = sop;
3044 goto check_replay; 3172 goto check_replay;
3045 } 3173 }
@@ -3304,6 +3432,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3304 status = nfserr_bad_stateid; 3432 status = nfserr_bad_stateid;
3305 if (!is_delegation_stateid(stateid)) 3433 if (!is_delegation_stateid(stateid))
3306 goto out; 3434 goto out;
3435 status = nfserr_expired;
3307 dp = find_delegation_stateid(inode, stateid); 3436 dp = find_delegation_stateid(inode, stateid);
3308 if (!dp) 3437 if (!dp)
3309 goto out; 3438 goto out;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1a468bbd330f..f35a94a04026 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1805,19 +1805,23 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1805 goto out_nfserr; 1805 goto out_nfserr;
1806 } 1806 }
1807 } 1807 }
1808 if ((buflen -= 16) < 0)
1809 goto out_resource;
1810 1808
1811 if (unlikely(bmval2)) { 1809 if (bmval2) {
1810 if ((buflen -= 16) < 0)
1811 goto out_resource;
1812 WRITE32(3); 1812 WRITE32(3);
1813 WRITE32(bmval0); 1813 WRITE32(bmval0);
1814 WRITE32(bmval1); 1814 WRITE32(bmval1);
1815 WRITE32(bmval2); 1815 WRITE32(bmval2);
1816 } else if (likely(bmval1)) { 1816 } else if (bmval1) {
1817 if ((buflen -= 12) < 0)
1818 goto out_resource;
1817 WRITE32(2); 1819 WRITE32(2);
1818 WRITE32(bmval0); 1820 WRITE32(bmval0);
1819 WRITE32(bmval1); 1821 WRITE32(bmval1);
1820 } else { 1822 } else {
1823 if ((buflen -= 8) < 0)
1824 goto out_resource;
1821 WRITE32(1); 1825 WRITE32(1);
1822 WRITE32(bmval0); 1826 WRITE32(bmval0);
1823 } 1827 }
@@ -1828,15 +1832,17 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1828 u32 word1 = nfsd_suppattrs1(minorversion); 1832 u32 word1 = nfsd_suppattrs1(minorversion);
1829 u32 word2 = nfsd_suppattrs2(minorversion); 1833 u32 word2 = nfsd_suppattrs2(minorversion);
1830 1834
1831 if ((buflen -= 12) < 0)
1832 goto out_resource;
1833 if (!aclsupport) 1835 if (!aclsupport)
1834 word0 &= ~FATTR4_WORD0_ACL; 1836 word0 &= ~FATTR4_WORD0_ACL;
1835 if (!word2) { 1837 if (!word2) {
1838 if ((buflen -= 12) < 0)
1839 goto out_resource;
1836 WRITE32(2); 1840 WRITE32(2);
1837 WRITE32(word0); 1841 WRITE32(word0);
1838 WRITE32(word1); 1842 WRITE32(word1);
1839 } else { 1843 } else {
1844 if ((buflen -= 16) < 0)
1845 goto out_resource;
1840 WRITE32(3); 1846 WRITE32(3);
1841 WRITE32(word0); 1847 WRITE32(word0);
1842 WRITE32(word1); 1848 WRITE32(word1);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 06fa87e52e82..d6dc3f61f8ba 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -22,6 +22,7 @@
22 */ 22 */
23enum { 23enum {
24 NFSD_Root = 1, 24 NFSD_Root = 1,
25#ifdef CONFIG_NFSD_DEPRECATED
25 NFSD_Svc, 26 NFSD_Svc,
26 NFSD_Add, 27 NFSD_Add,
27 NFSD_Del, 28 NFSD_Del,
@@ -29,6 +30,7 @@ enum {
29 NFSD_Unexport, 30 NFSD_Unexport,
30 NFSD_Getfd, 31 NFSD_Getfd,
31 NFSD_Getfs, 32 NFSD_Getfs,
33#endif
32 NFSD_List, 34 NFSD_List,
33 NFSD_Export_features, 35 NFSD_Export_features,
34 NFSD_Fh, 36 NFSD_Fh,
@@ -54,6 +56,7 @@ enum {
54/* 56/*
55 * write() for these nodes. 57 * write() for these nodes.
56 */ 58 */
59#ifdef CONFIG_NFSD_DEPRECATED
57static ssize_t write_svc(struct file *file, char *buf, size_t size); 60static ssize_t write_svc(struct file *file, char *buf, size_t size);
58static ssize_t write_add(struct file *file, char *buf, size_t size); 61static ssize_t write_add(struct file *file, char *buf, size_t size);
59static ssize_t write_del(struct file *file, char *buf, size_t size); 62static ssize_t write_del(struct file *file, char *buf, size_t size);
@@ -61,6 +64,7 @@ static ssize_t write_export(struct file *file, char *buf, size_t size);
61static ssize_t write_unexport(struct file *file, char *buf, size_t size); 64static ssize_t write_unexport(struct file *file, char *buf, size_t size);
62static ssize_t write_getfd(struct file *file, char *buf, size_t size); 65static ssize_t write_getfd(struct file *file, char *buf, size_t size);
63static ssize_t write_getfs(struct file *file, char *buf, size_t size); 66static ssize_t write_getfs(struct file *file, char *buf, size_t size);
67#endif
64static ssize_t write_filehandle(struct file *file, char *buf, size_t size); 68static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
65static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); 69static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size);
66static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); 70static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size);
@@ -76,6 +80,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
76#endif 80#endif
77 81
78static ssize_t (*write_op[])(struct file *, char *, size_t) = { 82static ssize_t (*write_op[])(struct file *, char *, size_t) = {
83#ifdef CONFIG_NFSD_DEPRECATED
79 [NFSD_Svc] = write_svc, 84 [NFSD_Svc] = write_svc,
80 [NFSD_Add] = write_add, 85 [NFSD_Add] = write_add,
81 [NFSD_Del] = write_del, 86 [NFSD_Del] = write_del,
@@ -83,6 +88,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
83 [NFSD_Unexport] = write_unexport, 88 [NFSD_Unexport] = write_unexport,
84 [NFSD_Getfd] = write_getfd, 89 [NFSD_Getfd] = write_getfd,
85 [NFSD_Getfs] = write_getfs, 90 [NFSD_Getfs] = write_getfs,
91#endif
86 [NFSD_Fh] = write_filehandle, 92 [NFSD_Fh] = write_filehandle,
87 [NFSD_FO_UnlockIP] = write_unlock_ip, 93 [NFSD_FO_UnlockIP] = write_unlock_ip,
88 [NFSD_FO_UnlockFS] = write_unlock_fs, 94 [NFSD_FO_UnlockFS] = write_unlock_fs,
@@ -121,6 +127,14 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
121 127
122static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) 128static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
123{ 129{
130 static int warned;
131 if (file->f_dentry->d_name.name[0] == '.' && !warned) {
132 printk(KERN_INFO
133 "Warning: \"%s\" uses deprecated NFSD interface: %s."
134 " This will be removed in 2.6.40\n",
135 current->comm, file->f_dentry->d_name.name);
136 warned = 1;
137 }
124 if (! file->private_data) { 138 if (! file->private_data) {
125 /* An attempt to read a transaction file without writing 139 /* An attempt to read a transaction file without writing
126 * causes a 0-byte write so that the file can return 140 * causes a 0-byte write so that the file can return
@@ -187,6 +201,7 @@ static const struct file_operations pool_stats_operations = {
187 * payload - write methods 201 * payload - write methods
188 */ 202 */
189 203
204#ifdef CONFIG_NFSD_DEPRECATED
190/** 205/**
191 * write_svc - Start kernel's NFSD server 206 * write_svc - Start kernel's NFSD server
192 * 207 *
@@ -402,7 +417,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
402 417
403 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); 418 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
404 419
405 clp = auth_unix_lookup(&in6); 420 clp = auth_unix_lookup(&init_net, &in6);
406 if (!clp) 421 if (!clp)
407 err = -EPERM; 422 err = -EPERM;
408 else { 423 else {
@@ -465,7 +480,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
465 480
466 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); 481 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
467 482
468 clp = auth_unix_lookup(&in6); 483 clp = auth_unix_lookup(&init_net, &in6);
469 if (!clp) 484 if (!clp)
470 err = -EPERM; 485 err = -EPERM;
471 else { 486 else {
@@ -482,6 +497,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
482 out: 497 out:
483 return err; 498 return err;
484} 499}
500#endif /* CONFIG_NFSD_DEPRECATED */
485 501
486/** 502/**
487 * write_unlock_ip - Release all locks used by a client 503 * write_unlock_ip - Release all locks used by a client
@@ -1000,12 +1016,12 @@ static ssize_t __write_ports_addxprt(char *buf)
1000 if (err != 0) 1016 if (err != 0)
1001 return err; 1017 return err;
1002 1018
1003 err = svc_create_xprt(nfsd_serv, transport, 1019 err = svc_create_xprt(nfsd_serv, transport, &init_net,
1004 PF_INET, port, SVC_SOCK_ANONYMOUS); 1020 PF_INET, port, SVC_SOCK_ANONYMOUS);
1005 if (err < 0) 1021 if (err < 0)
1006 goto out_err; 1022 goto out_err;
1007 1023
1008 err = svc_create_xprt(nfsd_serv, transport, 1024 err = svc_create_xprt(nfsd_serv, transport, &init_net,
1009 PF_INET6, port, SVC_SOCK_ANONYMOUS); 1025 PF_INET6, port, SVC_SOCK_ANONYMOUS);
1010 if (err < 0 && err != -EAFNOSUPPORT) 1026 if (err < 0 && err != -EAFNOSUPPORT)
1011 goto out_close; 1027 goto out_close;
@@ -1356,6 +1372,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
1356static int nfsd_fill_super(struct super_block * sb, void * data, int silent) 1372static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1357{ 1373{
1358 static struct tree_descr nfsd_files[] = { 1374 static struct tree_descr nfsd_files[] = {
1375#ifdef CONFIG_NFSD_DEPRECATED
1359 [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, 1376 [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR},
1360 [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, 1377 [NFSD_Add] = {".add", &transaction_ops, S_IWUSR},
1361 [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, 1378 [NFSD_Del] = {".del", &transaction_ops, S_IWUSR},
@@ -1363,6 +1380,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1363 [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, 1380 [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR},
1364 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, 1381 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
1365 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, 1382 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
1383#endif
1366 [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, 1384 [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
1367 [NFSD_Export_features] = {"export_features", 1385 [NFSD_Export_features] = {"export_features",
1368 &export_features_operations, S_IRUGO}, 1386 &export_features_operations, S_IRUGO},
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index b76ac3a82e39..6b641cf2c19a 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -249,7 +249,7 @@ extern time_t nfsd4_grace;
249#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ 249#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
250#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ 250#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
251 251
252#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ 252#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
253 253
254/* 254/*
255 * The following attributes are currently not supported by the NFSv4 server: 255 * The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e2c43464f237..2bae1d86f5f2 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -16,6 +16,7 @@
16#include <linux/lockd/bind.h> 16#include <linux/lockd/bind.h>
17#include <linux/nfsacl.h> 17#include <linux/nfsacl.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <net/net_namespace.h>
19#include "nfsd.h" 20#include "nfsd.h"
20#include "cache.h" 21#include "cache.h"
21#include "vfs.h" 22#include "vfs.h"
@@ -186,12 +187,12 @@ static int nfsd_init_socks(int port)
186 if (!list_empty(&nfsd_serv->sv_permsocks)) 187 if (!list_empty(&nfsd_serv->sv_permsocks))
187 return 0; 188 return 0;
188 189
189 error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, 190 error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port,
190 SVC_SOCK_DEFAULTS); 191 SVC_SOCK_DEFAULTS);
191 if (error < 0) 192 if (error < 0)
192 return error; 193 return error;
193 194
194 error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, 195 error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port,
195 SVC_SOCK_DEFAULTS); 196 SVC_SOCK_DEFAULTS);
196 if (error < 0) 197 if (error < 0)
197 return error; 198 return error;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 322518c88e4b..39adc27b0685 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -35,6 +35,7 @@
35#ifndef _NFSD4_STATE_H 35#ifndef _NFSD4_STATE_H
36#define _NFSD4_STATE_H 36#define _NFSD4_STATE_H
37 37
38#include <linux/sunrpc/svc_xprt.h>
38#include <linux/nfsd/nfsfh.h> 39#include <linux/nfsd/nfsfh.h>
39#include "nfsfh.h" 40#include "nfsfh.h"
40 41
@@ -64,19 +65,12 @@ typedef struct {
64 (s)->si_fileid, \ 65 (s)->si_fileid, \
65 (s)->si_generation 66 (s)->si_generation
66 67
67struct nfsd4_cb_sequence {
68 /* args/res */
69 u32 cbs_minorversion;
70 struct nfs4_client *cbs_clp;
71};
72
73struct nfs4_rpc_args {
74 void *args_op;
75 struct nfsd4_cb_sequence args_seq;
76};
77
78struct nfsd4_callback { 68struct nfsd4_callback {
79 struct nfs4_rpc_args cb_args; 69 void *cb_op;
70 struct nfs4_client *cb_clp;
71 u32 cb_minorversion;
72 struct rpc_message cb_msg;
73 const struct rpc_call_ops *cb_ops;
80 struct work_struct cb_work; 74 struct work_struct cb_work;
81}; 75};
82 76
@@ -91,7 +85,6 @@ struct nfs4_delegation {
91 u32 dl_type; 85 u32 dl_type;
92 time_t dl_time; 86 time_t dl_time;
93/* For recall: */ 87/* For recall: */
94 u32 dl_ident;
95 stateid_t dl_stateid; 88 stateid_t dl_stateid;
96 struct knfsd_fh dl_fh; 89 struct knfsd_fh dl_fh;
97 int dl_retries; 90 int dl_retries;
@@ -103,8 +96,8 @@ struct nfs4_cb_conn {
103 /* SETCLIENTID info */ 96 /* SETCLIENTID info */
104 struct sockaddr_storage cb_addr; 97 struct sockaddr_storage cb_addr;
105 size_t cb_addrlen; 98 size_t cb_addrlen;
106 u32 cb_prog; 99 u32 cb_prog; /* used only in 4.0 case;
107 u32 cb_minorversion; 100 per-session otherwise */
108 u32 cb_ident; /* minorversion 0 only */ 101 u32 cb_ident; /* minorversion 0 only */
109 struct svc_xprt *cb_xprt; /* minorversion 1 only */ 102 struct svc_xprt *cb_xprt; /* minorversion 1 only */
110}; 103};
@@ -160,6 +153,15 @@ struct nfsd4_clid_slot {
160 struct nfsd4_create_session sl_cr_ses; 153 struct nfsd4_create_session sl_cr_ses;
161}; 154};
162 155
156struct nfsd4_conn {
157 struct list_head cn_persession;
158 struct svc_xprt *cn_xprt;
159 struct svc_xpt_user cn_xpt_user;
160 struct nfsd4_session *cn_session;
161/* CDFC4_FORE, CDFC4_BACK: */
162 unsigned char cn_flags;
163};
164
163struct nfsd4_session { 165struct nfsd4_session {
164 struct kref se_ref; 166 struct kref se_ref;
165 struct list_head se_hash; /* hash by sessionid */ 167 struct list_head se_hash; /* hash by sessionid */
@@ -169,6 +171,9 @@ struct nfsd4_session {
169 struct nfs4_sessionid se_sessionid; 171 struct nfs4_sessionid se_sessionid;
170 struct nfsd4_channel_attrs se_fchannel; 172 struct nfsd4_channel_attrs se_fchannel;
171 struct nfsd4_channel_attrs se_bchannel; 173 struct nfsd4_channel_attrs se_bchannel;
174 struct list_head se_conns;
175 u32 se_cb_prog;
176 u32 se_cb_seq_nr;
172 struct nfsd4_slot *se_slots[]; /* forward channel slots */ 177 struct nfsd4_slot *se_slots[]; /* forward channel slots */
173}; 178};
174 179
@@ -221,24 +226,32 @@ struct nfs4_client {
221 clientid_t cl_clientid; /* generated by server */ 226 clientid_t cl_clientid; /* generated by server */
222 nfs4_verifier cl_confirm; /* generated by server */ 227 nfs4_verifier cl_confirm; /* generated by server */
223 u32 cl_firststate; /* recovery dir creation */ 228 u32 cl_firststate; /* recovery dir creation */
229 u32 cl_minorversion;
224 230
225 /* for v4.0 and v4.1 callbacks: */ 231 /* for v4.0 and v4.1 callbacks: */
226 struct nfs4_cb_conn cl_cb_conn; 232 struct nfs4_cb_conn cl_cb_conn;
233#define NFSD4_CLIENT_CB_UPDATE 1
234#define NFSD4_CLIENT_KILL 2
235 unsigned long cl_cb_flags;
227 struct rpc_clnt *cl_cb_client; 236 struct rpc_clnt *cl_cb_client;
237 u32 cl_cb_ident;
228 atomic_t cl_cb_set; 238 atomic_t cl_cb_set;
239 struct nfsd4_callback cl_cb_null;
240 struct nfsd4_session *cl_cb_session;
241
242 /* for all client information that callback code might need: */
243 spinlock_t cl_lock;
229 244
230 /* for nfs41 */ 245 /* for nfs41 */
231 struct list_head cl_sessions; 246 struct list_head cl_sessions;
232 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ 247 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
233 u32 cl_exchange_flags; 248 u32 cl_exchange_flags;
234 struct nfs4_sessionid cl_sessionid;
235 /* number of rpc's in progress over an associated session: */ 249 /* number of rpc's in progress over an associated session: */
236 atomic_t cl_refcount; 250 atomic_t cl_refcount;
237 251
238 /* for nfs41 callbacks */ 252 /* for nfs41 callbacks */
239 /* We currently support a single back channel with a single slot */ 253 /* We currently support a single back channel with a single slot */
240 unsigned long cl_cb_slot_busy; 254 unsigned long cl_cb_slot_busy;
241 u32 cl_cb_seq_nr;
242 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ 255 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
243 /* wait here for slots */ 256 /* wait here for slots */
244}; 257};
@@ -440,12 +453,13 @@ extern int nfs4_in_grace(void);
440extern __be32 nfs4_check_open_reclaim(clientid_t *clid); 453extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
441extern void nfs4_free_stateowner(struct kref *kref); 454extern void nfs4_free_stateowner(struct kref *kref);
442extern int set_callback_cred(void); 455extern int set_callback_cred(void);
443extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); 456extern void nfsd4_probe_callback(struct nfs4_client *clp);
457extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
444extern void nfsd4_do_callback_rpc(struct work_struct *); 458extern void nfsd4_do_callback_rpc(struct work_struct *);
445extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 459extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
446extern int nfsd4_create_callback_queue(void); 460extern int nfsd4_create_callback_queue(void);
447extern void nfsd4_destroy_callback_queue(void); 461extern void nfsd4_destroy_callback_queue(void);
448extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *); 462extern void nfsd4_shutdown_callback(struct nfs4_client *);
449extern void nfs4_put_delegation(struct nfs4_delegation *dp); 463extern void nfs4_put_delegation(struct nfs4_delegation *dp);
450extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); 464extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
451extern void nfsd4_init_recdir(char *recdir_name); 465extern void nfsd4_init_recdir(char *recdir_name);
diff --git a/include/linux/net.h b/include/linux/net.h
index dee0b11a8759..16faa130088c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -229,6 +229,8 @@ enum {
229extern int sock_wake_async(struct socket *sk, int how, int band); 229extern int sock_wake_async(struct socket *sk, int how, int band);
230extern int sock_register(const struct net_proto_family *fam); 230extern int sock_register(const struct net_proto_family *fam);
231extern void sock_unregister(int family); 231extern void sock_unregister(int family);
232extern int __sock_create(struct net *net, int family, int type, int proto,
233 struct socket **res, int kern);
232extern int sock_create(int family, int type, int proto, 234extern int sock_create(int family, int type, int proto,
233 struct socket **res); 235 struct socket **res);
234extern int sock_create_kern(int family, int type, int proto, 236extern int sock_create_kern(int family, int type, int proto,
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 07e40c625972..4925b22219d2 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -17,7 +17,9 @@
17 17
18#define NFS4_BITMAP_SIZE 2 18#define NFS4_BITMAP_SIZE 2
19#define NFS4_VERIFIER_SIZE 8 19#define NFS4_VERIFIER_SIZE 8
20#define NFS4_STATEID_SIZE 16 20#define NFS4_STATEID_SEQID_SIZE 4
21#define NFS4_STATEID_OTHER_SIZE 12
22#define NFS4_STATEID_SIZE (NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE)
21#define NFS4_FHSIZE 128 23#define NFS4_FHSIZE 128
22#define NFS4_MAXPATHLEN PATH_MAX 24#define NFS4_MAXPATHLEN PATH_MAX
23#define NFS4_MAXNAMLEN NAME_MAX 25#define NFS4_MAXNAMLEN NAME_MAX
@@ -61,6 +63,9 @@
61#define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000 63#define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000
62#define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 64#define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000
63 65
66#define NFS4_CDFC4_FORE 0x1
67#define NFS4_CDFC4_BACK 0x2
68
64#define NFS4_SET_TO_SERVER_TIME 0 69#define NFS4_SET_TO_SERVER_TIME 0
65#define NFS4_SET_TO_CLIENT_TIME 1 70#define NFS4_SET_TO_CLIENT_TIME 1
66 71
@@ -167,7 +172,16 @@ struct nfs4_acl {
167}; 172};
168 173
169typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; 174typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
170typedef struct { char data[NFS4_STATEID_SIZE]; } nfs4_stateid; 175
176struct nfs41_stateid {
177 __be32 seqid;
178 char other[NFS4_STATEID_OTHER_SIZE];
179} __attribute__ ((packed));
180
181typedef union {
182 char data[NFS4_STATEID_SIZE];
183 struct nfs41_stateid stateid;
184} nfs4_stateid;
171 185
172enum nfs_opnum4 { 186enum nfs_opnum4 {
173 OP_ACCESS = 3, 187 OP_ACCESS = 3,
@@ -471,6 +485,8 @@ enum lock_type4 {
471#define FATTR4_WORD1_TIME_MODIFY (1UL << 21) 485#define FATTR4_WORD1_TIME_MODIFY (1UL << 21)
472#define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22) 486#define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22)
473#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) 487#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23)
488#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30)
489#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
474 490
475#define NFSPROC4_NULL 0 491#define NFSPROC4_NULL 0
476#define NFSPROC4_COMPOUND 1 492#define NFSPROC4_COMPOUND 1
@@ -532,6 +548,8 @@ enum {
532 NFSPROC4_CLNT_SEQUENCE, 548 NFSPROC4_CLNT_SEQUENCE,
533 NFSPROC4_CLNT_GET_LEASE_TIME, 549 NFSPROC4_CLNT_GET_LEASE_TIME,
534 NFSPROC4_CLNT_RECLAIM_COMPLETE, 550 NFSPROC4_CLNT_RECLAIM_COMPLETE,
551 NFSPROC4_CLNT_LAYOUTGET,
552 NFSPROC4_CLNT_GETDEVICEINFO,
535}; 553};
536 554
537/* nfs41 types */ 555/* nfs41 types */
@@ -550,6 +568,49 @@ enum state_protect_how4 {
550 SP4_SSV = 2 568 SP4_SSV = 2
551}; 569};
552 570
571enum pnfs_layouttype {
572 LAYOUT_NFSV4_1_FILES = 1,
573 LAYOUT_OSD2_OBJECTS = 2,
574 LAYOUT_BLOCK_VOLUME = 3,
575};
576
577/* used for both layout return and recall */
578enum pnfs_layoutreturn_type {
579 RETURN_FILE = 1,
580 RETURN_FSID = 2,
581 RETURN_ALL = 3
582};
583
584enum pnfs_iomode {
585 IOMODE_READ = 1,
586 IOMODE_RW = 2,
587 IOMODE_ANY = 3,
588};
589
590enum pnfs_notify_deviceid_type4 {
591 NOTIFY_DEVICEID4_CHANGE = 1 << 1,
592 NOTIFY_DEVICEID4_DELETE = 1 << 2,
593};
594
595#define NFL4_UFLG_MASK 0x0000003F
596#define NFL4_UFLG_DENSE 0x00000001
597#define NFL4_UFLG_COMMIT_THRU_MDS 0x00000002
598#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK 0xFFFFFFC0
599
600/* Encoded in the loh_body field of type layouthint4 */
601enum filelayout_hint_care4 {
602 NFLH4_CARE_DENSE = NFL4_UFLG_DENSE,
603 NFLH4_CARE_COMMIT_THRU_MDS = NFL4_UFLG_COMMIT_THRU_MDS,
604 NFLH4_CARE_STRIPE_UNIT_SIZE = 0x00000040,
605 NFLH4_CARE_STRIPE_COUNT = 0x00000080
606};
607
608#define NFS4_DEVICEID4_SIZE 16
609
610struct nfs4_deviceid {
611 char data[NFS4_DEVICEID4_SIZE];
612};
613
553#endif 614#endif
554#endif 615#endif
555 616
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a46e430d9622..bba26684acdc 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -188,6 +188,9 @@ struct nfs_inode {
188 struct nfs_delegation __rcu *delegation; 188 struct nfs_delegation __rcu *delegation;
189 fmode_t delegation_state; 189 fmode_t delegation_state;
190 struct rw_semaphore rwsem; 190 struct rw_semaphore rwsem;
191
192 /* pNFS layout information */
193 struct pnfs_layout_hdr *layout;
191#endif /* CONFIG_NFS_V4*/ 194#endif /* CONFIG_NFS_V4*/
192#ifdef CONFIG_NFS_FSCACHE 195#ifdef CONFIG_NFS_FSCACHE
193 struct fscache_cookie *fscache; 196 struct fscache_cookie *fscache;
@@ -615,6 +618,8 @@ nfs_fileid_to_ino_t(u64 fileid)
615#define NFSDBG_CLIENT 0x0200 618#define NFSDBG_CLIENT 0x0200
616#define NFSDBG_MOUNT 0x0400 619#define NFSDBG_MOUNT 0x0400
617#define NFSDBG_FSCACHE 0x0800 620#define NFSDBG_FSCACHE 0x0800
621#define NFSDBG_PNFS 0x1000
622#define NFSDBG_PNFS_LD 0x2000
618#define NFSDBG_ALL 0xFFFF 623#define NFSDBG_ALL 0xFFFF
619 624
620#ifdef __KERNEL__ 625#ifdef __KERNEL__
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 5eef862ec187..452d96436d26 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -82,6 +82,8 @@ struct nfs_client {
82 /* The flags used for obtaining the clientid during EXCHANGE_ID */ 82 /* The flags used for obtaining the clientid during EXCHANGE_ID */
83 u32 cl_exchange_flags; 83 u32 cl_exchange_flags;
84 struct nfs4_session *cl_session; /* sharred session */ 84 struct nfs4_session *cl_session; /* sharred session */
85 struct list_head cl_layouts;
86 struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
85#endif /* CONFIG_NFS_V4_1 */ 87#endif /* CONFIG_NFS_V4_1 */
86 88
87#ifdef CONFIG_NFS_FSCACHE 89#ifdef CONFIG_NFS_FSCACHE
@@ -145,6 +147,7 @@ struct nfs_server {
145 u32 acl_bitmask; /* V4 bitmask representing the ACEs 147 u32 acl_bitmask; /* V4 bitmask representing the ACEs
146 that are supported on this 148 that are supported on this
147 filesystem */ 149 filesystem */
150 struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */
148#endif 151#endif
149 void (*destroy)(struct nfs_server *); 152 void (*destroy)(struct nfs_server *);
150 153
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index da7a1300dc60..ba6cc8f223c9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -114,6 +114,7 @@ struct nfs_fsinfo {
114 __u64 maxfilesize; 114 __u64 maxfilesize;
115 struct timespec time_delta; /* server time granularity */ 115 struct timespec time_delta; /* server time granularity */
116 __u32 lease_time; /* in seconds */ 116 __u32 lease_time; /* in seconds */
117 __u32 layouttype; /* supported pnfs layout driver */
117}; 118};
118 119
119struct nfs_fsstat { 120struct nfs_fsstat {
@@ -186,6 +187,55 @@ struct nfs4_get_lease_time_res {
186 struct nfs4_sequence_res lr_seq_res; 187 struct nfs4_sequence_res lr_seq_res;
187}; 188};
188 189
190#define PNFS_LAYOUT_MAXSIZE 4096
191
192struct nfs4_layoutdriver_data {
193 __u32 len;
194 void *buf;
195};
196
197struct pnfs_layout_range {
198 u32 iomode;
199 u64 offset;
200 u64 length;
201};
202
203struct nfs4_layoutget_args {
204 __u32 type;
205 struct pnfs_layout_range range;
206 __u64 minlength;
207 __u32 maxcount;
208 struct inode *inode;
209 struct nfs_open_context *ctx;
210 struct nfs4_sequence_args seq_args;
211};
212
213struct nfs4_layoutget_res {
214 __u32 return_on_close;
215 struct pnfs_layout_range range;
216 __u32 type;
217 nfs4_stateid stateid;
218 struct nfs4_layoutdriver_data layout;
219 struct nfs4_sequence_res seq_res;
220};
221
222struct nfs4_layoutget {
223 struct nfs4_layoutget_args args;
224 struct nfs4_layoutget_res res;
225 struct pnfs_layout_segment **lsegpp;
226 int status;
227};
228
229struct nfs4_getdeviceinfo_args {
230 struct pnfs_device *pdev;
231 struct nfs4_sequence_args seq_args;
232};
233
234struct nfs4_getdeviceinfo_res {
235 struct pnfs_device *pdev;
236 struct nfs4_sequence_res seq_res;
237};
238
189/* 239/*
190 * Arguments to the open call. 240 * Arguments to the open call.
191 */ 241 */
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 5bbc447175dc..b2024757edd5 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -122,8 +122,8 @@ extern const struct rpc_authops authnull_ops;
122int __init rpc_init_authunix(void); 122int __init rpc_init_authunix(void);
123int __init rpc_init_generic_auth(void); 123int __init rpc_init_generic_auth(void);
124int __init rpcauth_init_module(void); 124int __init rpcauth_init_module(void);
125void __exit rpcauth_remove_module(void); 125void rpcauth_remove_module(void);
126void __exit rpc_destroy_generic_auth(void); 126void rpc_destroy_generic_auth(void);
127void rpc_destroy_authunix(void); 127void rpc_destroy_authunix(void);
128 128
129struct rpc_cred * rpc_lookup_cred(void); 129struct rpc_cred * rpc_lookup_cred(void);
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 7bf3e84b92f4..6950c981882d 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -125,12 +125,15 @@ struct cache_detail {
125 */ 125 */
126struct cache_req { 126struct cache_req {
127 struct cache_deferred_req *(*defer)(struct cache_req *req); 127 struct cache_deferred_req *(*defer)(struct cache_req *req);
128 int thread_wait; /* How long (jiffies) we can block the
129 * current thread to wait for updates.
130 */
128}; 131};
129/* this must be embedded in a deferred_request that is being 132/* this must be embedded in a deferred_request that is being
130 * delayed awaiting cache-fill 133 * delayed awaiting cache-fill
131 */ 134 */
132struct cache_deferred_req { 135struct cache_deferred_req {
133 struct list_head hash; /* on hash chain */ 136 struct hlist_node hash; /* on hash chain */
134 struct list_head recent; /* on fifo */ 137 struct list_head recent; /* on fifo */
135 struct cache_head *item; /* cache item we wait on */ 138 struct cache_head *item; /* cache item we wait on */
136 void *owner; /* we might need to discard all defered requests 139 void *owner; /* we might need to discard all defered requests
@@ -194,7 +197,9 @@ extern void cache_purge(struct cache_detail *detail);
194#define NEVER (0x7FFFFFFF) 197#define NEVER (0x7FFFFFFF)
195extern void __init cache_initialize(void); 198extern void __init cache_initialize(void);
196extern int cache_register(struct cache_detail *cd); 199extern int cache_register(struct cache_detail *cd);
200extern int cache_register_net(struct cache_detail *cd, struct net *net);
197extern void cache_unregister(struct cache_detail *cd); 201extern void cache_unregister(struct cache_detail *cd);
202extern void cache_unregister_net(struct cache_detail *cd, struct net *net);
198 203
199extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, 204extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
200 mode_t, struct cache_detail *); 205 mode_t, struct cache_detail *);
@@ -218,14 +223,42 @@ static inline int get_int(char **bpp, int *anint)
218 return 0; 223 return 0;
219} 224}
220 225
226/*
227 * timestamps kept in the cache are expressed in seconds
228 * since boot. This is the best for measuring differences in
229 * real time.
230 */
231static inline time_t seconds_since_boot(void)
232{
233 struct timespec boot;
234 getboottime(&boot);
235 return get_seconds() - boot.tv_sec;
236}
237
238static inline time_t convert_to_wallclock(time_t sinceboot)
239{
240 struct timespec boot;
241 getboottime(&boot);
242 return boot.tv_sec + sinceboot;
243}
244
221static inline time_t get_expiry(char **bpp) 245static inline time_t get_expiry(char **bpp)
222{ 246{
223 int rv; 247 int rv;
248 struct timespec boot;
249
224 if (get_int(bpp, &rv)) 250 if (get_int(bpp, &rv))
225 return 0; 251 return 0;
226 if (rv < 0) 252 if (rv < 0)
227 return 0; 253 return 0;
228 return rv; 254 getboottime(&boot);
255 return rv - boot.tv_sec;
229} 256}
230 257
258static inline void sunrpc_invalidate(struct cache_head *h,
259 struct cache_detail *detail)
260{
261 h->expiry_time = seconds_since_boot() - 1;
262 detail->nextcheck = seconds_since_boot();
263}
231#endif /* _LINUX_SUNRPC_CACHE_H_ */ 264#endif /* _LINUX_SUNRPC_CACHE_H_ */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index c83df09a8e2b..a5a55f284b7d 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -102,6 +102,7 @@ struct rpc_procinfo {
102#ifdef __KERNEL__ 102#ifdef __KERNEL__
103 103
104struct rpc_create_args { 104struct rpc_create_args {
105 struct net *net;
105 int protocol; 106 int protocol;
106 struct sockaddr *address; 107 struct sockaddr *address;
107 size_t addrsize; 108 size_t addrsize;
diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h
deleted file mode 100644
index e3e6a3437f8b..000000000000
--- a/include/linux/sunrpc/gss_spkm3.h
+++ /dev/null
@@ -1,55 +0,0 @@
1/*
2 * linux/include/linux/sunrpc/gss_spkm3.h
3 *
4 * Copyright (c) 2000 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 */
9
10#include <linux/sunrpc/auth_gss.h>
11#include <linux/sunrpc/gss_err.h>
12#include <linux/sunrpc/gss_asn1.h>
13
14struct spkm3_ctx {
15 struct xdr_netobj ctx_id; /* per message context id */
16 int endtime; /* endtime of the context */
17 struct xdr_netobj mech_used;
18 unsigned int ret_flags ;
19 struct xdr_netobj conf_alg;
20 struct xdr_netobj derived_conf_key;
21 struct xdr_netobj intg_alg;
22 struct xdr_netobj derived_integ_key;
23};
24
25/* OIDs declarations for K-ALG, I-ALG, C-ALG, and OWF-ALG */
26extern const struct xdr_netobj hmac_md5_oid;
27extern const struct xdr_netobj cast5_cbc_oid;
28
29/* SPKM InnerContext Token types */
30
31#define SPKM_ERROR_TOK 3
32#define SPKM_MIC_TOK 4
33#define SPKM_WRAP_TOK 5
34#define SPKM_DEL_TOK 6
35
36u32 spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_netobj * token, int toktype);
37
38u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype);
39
40#define CKSUMTYPE_RSA_MD5 0x0007
41#define CKSUMTYPE_HMAC_MD5 0x0008
42
43s32 make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
44 unsigned int hdrlen, struct xdr_buf *body,
45 unsigned int body_offset, struct xdr_netobj *cksum);
46void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits);
47int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen,
48 int explen);
49void spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen,
50 unsigned char *ctxhdr, int elen, int zbit);
51void spkm3_make_mic_token(unsigned char **tokp, int toklen,
52 struct xdr_netobj *mic_hdr,
53 struct xdr_netobj *md5cksum, int md5elen, int md5zbit);
54u32 spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen,
55 unsigned char **cksum);
diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h
index 5fa0f2084307..680471d1f28a 100644
--- a/include/linux/sunrpc/stats.h
+++ b/include/linux/sunrpc/stats.h
@@ -38,8 +38,21 @@ struct svc_stat {
38 rpcbadclnt; 38 rpcbadclnt;
39}; 39};
40 40
41void rpc_proc_init(void); 41struct net;
42void rpc_proc_exit(void); 42#ifdef CONFIG_PROC_FS
43int rpc_proc_init(struct net *);
44void rpc_proc_exit(struct net *);
45#else
46static inline int rpc_proc_init(struct net *net)
47{
48 return 0;
49}
50
51static inline void rpc_proc_exit(struct net *net)
52{
53}
54#endif
55
43#ifdef MODULE 56#ifdef MODULE
44void rpc_modcount(struct inode *, int); 57void rpc_modcount(struct inode *, int);
45#endif 58#endif
@@ -54,9 +67,6 @@ void svc_proc_unregister(const char *);
54 67
55void svc_seq_show(struct seq_file *, 68void svc_seq_show(struct seq_file *,
56 const struct svc_stat *); 69 const struct svc_stat *);
57
58extern struct proc_dir_entry *proc_net_rpc;
59
60#else 70#else
61 71
62static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; } 72static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; }
@@ -69,9 +79,6 @@ static inline void svc_proc_unregister(const char *p) {}
69 79
70static inline void svc_seq_show(struct seq_file *seq, 80static inline void svc_seq_show(struct seq_file *seq,
71 const struct svc_stat *st) {} 81 const struct svc_stat *st) {}
72
73#define proc_net_rpc NULL
74
75#endif 82#endif
76 83
77#endif /* _LINUX_SUNRPC_STATS_H */ 84#endif /* _LINUX_SUNRPC_STATS_H */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 5f4e18b3ce73..bbdb680ffbe9 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -12,6 +12,7 @@
12 12
13struct svc_xprt_ops { 13struct svc_xprt_ops {
14 struct svc_xprt *(*xpo_create)(struct svc_serv *, 14 struct svc_xprt *(*xpo_create)(struct svc_serv *,
15 struct net *net,
15 struct sockaddr *, int, 16 struct sockaddr *, int,
16 int); 17 int);
17 struct svc_xprt *(*xpo_accept)(struct svc_xprt *); 18 struct svc_xprt *(*xpo_accept)(struct svc_xprt *);
@@ -32,6 +33,16 @@ struct svc_xprt_class {
32 u32 xcl_max_payload; 33 u32 xcl_max_payload;
33}; 34};
34 35
36/*
37 * This is embedded in an object that wants a callback before deleting
38 * an xprt; intended for use by NFSv4.1, which needs to know when a
39 * client's tcp connection (and hence possibly a backchannel) goes away.
40 */
41struct svc_xpt_user {
42 struct list_head list;
43 void (*callback)(struct svc_xpt_user *);
44};
45
35struct svc_xprt { 46struct svc_xprt {
36 struct svc_xprt_class *xpt_class; 47 struct svc_xprt_class *xpt_class;
37 struct svc_xprt_ops *xpt_ops; 48 struct svc_xprt_ops *xpt_ops;
@@ -66,14 +77,31 @@ struct svc_xprt {
66 struct sockaddr_storage xpt_remote; /* remote peer's address */ 77 struct sockaddr_storage xpt_remote; /* remote peer's address */
67 size_t xpt_remotelen; /* length of address */ 78 size_t xpt_remotelen; /* length of address */
68 struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ 79 struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */
80 struct list_head xpt_users; /* callbacks on free */
81
82 struct net *xpt_net;
69}; 83};
70 84
85static inline void register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
86{
87 spin_lock(&xpt->xpt_lock);
88 list_add(&u->list, &xpt->xpt_users);
89 spin_unlock(&xpt->xpt_lock);
90}
91
92static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
93{
94 spin_lock(&xpt->xpt_lock);
95 list_del_init(&u->list);
96 spin_unlock(&xpt->xpt_lock);
97}
98
71int svc_reg_xprt_class(struct svc_xprt_class *); 99int svc_reg_xprt_class(struct svc_xprt_class *);
72void svc_unreg_xprt_class(struct svc_xprt_class *); 100void svc_unreg_xprt_class(struct svc_xprt_class *);
73void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, 101void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
74 struct svc_serv *); 102 struct svc_serv *);
75int svc_create_xprt(struct svc_serv *, const char *, const int, 103int svc_create_xprt(struct svc_serv *, const char *, struct net *,
76 const unsigned short, int); 104 const int, const unsigned short, int);
77void svc_xprt_enqueue(struct svc_xprt *xprt); 105void svc_xprt_enqueue(struct svc_xprt *xprt);
78void svc_xprt_received(struct svc_xprt *); 106void svc_xprt_received(struct svc_xprt *);
79void svc_xprt_put(struct svc_xprt *xprt); 107void svc_xprt_put(struct svc_xprt *xprt);
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index d39dbdc7b10f..25d333c1b571 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -108,10 +108,15 @@ struct auth_ops {
108#define SVC_NEGATIVE 4 108#define SVC_NEGATIVE 4
109#define SVC_OK 5 109#define SVC_OK 5
110#define SVC_DROP 6 110#define SVC_DROP 6
111#define SVC_DENIED 7 111#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely
112#define SVC_PENDING 8 112 * lost so if there is a tcp connection, it
113#define SVC_COMPLETE 9 113 * should be closed
114 */
115#define SVC_DENIED 8
116#define SVC_PENDING 9
117#define SVC_COMPLETE 10
114 118
119struct svc_xprt;
115 120
116extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp); 121extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp);
117extern int svc_authorise(struct svc_rqst *rqstp); 122extern int svc_authorise(struct svc_rqst *rqstp);
@@ -121,13 +126,13 @@ extern void svc_auth_unregister(rpc_authflavor_t flavor);
121 126
122extern struct auth_domain *unix_domain_find(char *name); 127extern struct auth_domain *unix_domain_find(char *name);
123extern void auth_domain_put(struct auth_domain *item); 128extern void auth_domain_put(struct auth_domain *item);
124extern int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom); 129extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom);
125extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); 130extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
126extern struct auth_domain *auth_domain_find(char *name); 131extern struct auth_domain *auth_domain_find(char *name);
127extern struct auth_domain *auth_unix_lookup(struct in6_addr *addr); 132extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr);
128extern int auth_unix_forget_old(struct auth_domain *dom); 133extern int auth_unix_forget_old(struct auth_domain *dom);
129extern void svcauth_unix_purge(void); 134extern void svcauth_unix_purge(void);
130extern void svcauth_unix_info_release(void *); 135extern void svcauth_unix_info_release(struct svc_xprt *xpt);
131extern int svcauth_unix_set_client(struct svc_rqst *rqstp); 136extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
132 137
133static inline unsigned long hash_str(char *name, int bits) 138static inline unsigned long hash_str(char *name, int bits)
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index ab91d86565fd..498ab93a81e4 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -132,6 +132,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
132 return p + 2; 132 return p + 2;
133} 133}
134 134
135static inline __be32 *
136xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len)
137{
138 memcpy(ptr, p, len);
139 return p + XDR_QUADLEN(len);
140}
141
135/* 142/*
136 * Adjust kvec to reflect end of xdr'ed data (RPC client XDR) 143 * Adjust kvec to reflect end of xdr'ed data (RPC client XDR)
137 */ 144 */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index ff5a77b28c50..89d10d279a20 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -224,6 +224,7 @@ struct rpc_xprt {
224 bklog_u; /* backlog queue utilization */ 224 bklog_u; /* backlog queue utilization */
225 } stat; 225 } stat;
226 226
227 struct net *xprt_net;
227 const char *address_strings[RPC_DISPLAY_MAX]; 228 const char *address_strings[RPC_DISPLAY_MAX];
228}; 229};
229 230
@@ -249,6 +250,7 @@ static inline int bc_prealloc(struct rpc_rqst *req)
249 250
250struct xprt_create { 251struct xprt_create {
251 int ident; /* XPRT_TRANSPORT identifier */ 252 int ident; /* XPRT_TRANSPORT identifier */
253 struct net * net;
252 struct sockaddr * srcaddr; /* optional local address */ 254 struct sockaddr * srcaddr; /* optional local address */
253 struct sockaddr * dstaddr; /* remote peer address */ 255 struct sockaddr * dstaddr; /* remote peer address */
254 size_t addrlen; 256 size_t addrlen;
@@ -280,6 +282,8 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
280void xprt_release(struct rpc_task *task); 282void xprt_release(struct rpc_task *task);
281struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); 283struct rpc_xprt * xprt_get(struct rpc_xprt *xprt);
282void xprt_put(struct rpc_xprt *xprt); 284void xprt_put(struct rpc_xprt *xprt);
285struct rpc_xprt * xprt_alloc(struct net *net, int size, int max_req);
286void xprt_free(struct rpc_xprt *);
283 287
284static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) 288static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
285{ 289{
diff --git a/net/socket.c b/net/socket.c
index abf3e2561521..7f67c072d496 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1145,7 +1145,7 @@ call_kill:
1145} 1145}
1146EXPORT_SYMBOL(sock_wake_async); 1146EXPORT_SYMBOL(sock_wake_async);
1147 1147
1148static int __sock_create(struct net *net, int family, int type, int protocol, 1148int __sock_create(struct net *net, int family, int type, int protocol,
1149 struct socket **res, int kern) 1149 struct socket **res, int kern)
1150{ 1150{
1151 int err; 1151 int err;
@@ -1257,6 +1257,7 @@ out_release:
1257 rcu_read_unlock(); 1257 rcu_read_unlock();
1258 goto out_sock_release; 1258 goto out_sock_release;
1259} 1259}
1260EXPORT_SYMBOL(__sock_create);
1260 1261
1261int sock_create(int family, int type, int protocol, struct socket **res) 1262int sock_create(int family, int type, int protocol, struct socket **res)
1262{ 1263{
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 3376d7657185..8873fd8ddacd 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -36,22 +36,3 @@ config RPCSEC_GSS_KRB5
36 Kerberos support should be installed. 36 Kerberos support should be installed.
37 37
38 If unsure, say Y. 38 If unsure, say Y.
39
40config RPCSEC_GSS_SPKM3
41 tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
42 depends on SUNRPC && EXPERIMENTAL
43 select SUNRPC_GSS
44 select CRYPTO
45 select CRYPTO_MD5
46 select CRYPTO_DES
47 select CRYPTO_CAST5
48 select CRYPTO_CBC
49 help
50 Choose Y here to enable Secure RPC using the SPKM3 public key
51 GSS-API mechanism (RFC 2025).
52
53 Secure RPC calls with SPKM3 require an auxiliary userspace
54 daemon which may be found in the Linux nfs-utils package
55 available from http://linux-nfs.org/.
56
57 If unsure, say N.
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 68192e562749..afe67849269f 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -658,7 +658,7 @@ out1:
658 return err; 658 return err;
659} 659}
660 660
661void __exit rpcauth_remove_module(void) 661void rpcauth_remove_module(void)
662{ 662{
663 rpc_destroy_authunix(); 663 rpc_destroy_authunix();
664 rpc_destroy_generic_auth(); 664 rpc_destroy_generic_auth();
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 43162bb3b78f..e010a015d996 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -158,7 +158,7 @@ int __init rpc_init_generic_auth(void)
158 return rpcauth_init_credcache(&generic_auth); 158 return rpcauth_init_credcache(&generic_auth);
159} 159}
160 160
161void __exit rpc_destroy_generic_auth(void) 161void rpc_destroy_generic_auth(void)
162{ 162{
163 rpcauth_destroy_credcache(&generic_auth); 163 rpcauth_destroy_credcache(&generic_auth);
164} 164}
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 74a231735f67..7350d86a32ee 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -11,8 +11,3 @@ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 11
12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ 12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o 13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
14
15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
16
17rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \
18 gss_spkm3_token.o
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 778e5dfc5144..f375decc024b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -427,7 +427,7 @@ static int
427context_derive_keys_rc4(struct krb5_ctx *ctx) 427context_derive_keys_rc4(struct krb5_ctx *ctx)
428{ 428{
429 struct crypto_hash *hmac; 429 struct crypto_hash *hmac;
430 char sigkeyconstant[] = "signaturekey"; 430 static const char sigkeyconstant[] = "signaturekey";
431 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */ 431 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */
432 struct hash_desc desc; 432 struct hash_desc desc;
433 struct scatterlist sg[1]; 433 struct scatterlist sg[1];
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
deleted file mode 100644
index adade3d313f2..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ /dev/null
@@ -1,247 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_mech.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 * J. Bruce Fields <bfields@umich.edu>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36
37#include <linux/err.h>
38#include <linux/module.h>
39#include <linux/init.h>
40#include <linux/types.h>
41#include <linux/slab.h>
42#include <linux/sunrpc/auth.h>
43#include <linux/in.h>
44#include <linux/sunrpc/svcauth_gss.h>
45#include <linux/sunrpc/gss_spkm3.h>
46#include <linux/sunrpc/xdr.h>
47#include <linux/crypto.h>
48
49#ifdef RPC_DEBUG
50# define RPCDBG_FACILITY RPCDBG_AUTH
51#endif
52
53static const void *
54simple_get_bytes(const void *p, const void *end, void *res, int len)
55{
56 const void *q = (const void *)((const char *)p + len);
57 if (unlikely(q > end || q < p))
58 return ERR_PTR(-EFAULT);
59 memcpy(res, p, len);
60 return q;
61}
62
63static const void *
64simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
65{
66 const void *q;
67 unsigned int len;
68 p = simple_get_bytes(p, end, &len, sizeof(len));
69 if (IS_ERR(p))
70 return p;
71 res->len = len;
72 if (len == 0) {
73 res->data = NULL;
74 return p;
75 }
76 q = (const void *)((const char *)p + len);
77 if (unlikely(q > end || q < p))
78 return ERR_PTR(-EFAULT);
79 res->data = kmemdup(p, len, GFP_NOFS);
80 if (unlikely(res->data == NULL))
81 return ERR_PTR(-ENOMEM);
82 return q;
83}
84
85static int
86gss_import_sec_context_spkm3(const void *p, size_t len,
87 struct gss_ctx *ctx_id,
88 gfp_t gfp_mask)
89{
90 const void *end = (const void *)((const char *)p + len);
91 struct spkm3_ctx *ctx;
92 int version;
93
94 if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
95 goto out_err;
96
97 p = simple_get_bytes(p, end, &version, sizeof(version));
98 if (IS_ERR(p))
99 goto out_err_free_ctx;
100 if (version != 1) {
101 dprintk("RPC: unknown spkm3 token format: "
102 "obsolete nfs-utils?\n");
103 p = ERR_PTR(-EINVAL);
104 goto out_err_free_ctx;
105 }
106
107 p = simple_get_netobj(p, end, &ctx->ctx_id);
108 if (IS_ERR(p))
109 goto out_err_free_ctx;
110
111 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
112 if (IS_ERR(p))
113 goto out_err_free_ctx_id;
114
115 p = simple_get_netobj(p, end, &ctx->mech_used);
116 if (IS_ERR(p))
117 goto out_err_free_ctx_id;
118
119 p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
120 if (IS_ERR(p))
121 goto out_err_free_mech;
122
123 p = simple_get_netobj(p, end, &ctx->conf_alg);
124 if (IS_ERR(p))
125 goto out_err_free_mech;
126
127 p = simple_get_netobj(p, end, &ctx->derived_conf_key);
128 if (IS_ERR(p))
129 goto out_err_free_conf_alg;
130
131 p = simple_get_netobj(p, end, &ctx->intg_alg);
132 if (IS_ERR(p))
133 goto out_err_free_conf_key;
134
135 p = simple_get_netobj(p, end, &ctx->derived_integ_key);
136 if (IS_ERR(p))
137 goto out_err_free_intg_alg;
138
139 if (p != end) {
140 p = ERR_PTR(-EFAULT);
141 goto out_err_free_intg_key;
142 }
143
144 ctx_id->internal_ctx_id = ctx;
145
146 dprintk("RPC: Successfully imported new spkm context.\n");
147 return 0;
148
149out_err_free_intg_key:
150 kfree(ctx->derived_integ_key.data);
151out_err_free_intg_alg:
152 kfree(ctx->intg_alg.data);
153out_err_free_conf_key:
154 kfree(ctx->derived_conf_key.data);
155out_err_free_conf_alg:
156 kfree(ctx->conf_alg.data);
157out_err_free_mech:
158 kfree(ctx->mech_used.data);
159out_err_free_ctx_id:
160 kfree(ctx->ctx_id.data);
161out_err_free_ctx:
162 kfree(ctx);
163out_err:
164 return PTR_ERR(p);
165}
166
167static void
168gss_delete_sec_context_spkm3(void *internal_ctx)
169{
170 struct spkm3_ctx *sctx = internal_ctx;
171
172 kfree(sctx->derived_integ_key.data);
173 kfree(sctx->intg_alg.data);
174 kfree(sctx->derived_conf_key.data);
175 kfree(sctx->conf_alg.data);
176 kfree(sctx->mech_used.data);
177 kfree(sctx->ctx_id.data);
178 kfree(sctx);
179}
180
181static u32
182gss_verify_mic_spkm3(struct gss_ctx *ctx,
183 struct xdr_buf *signbuf,
184 struct xdr_netobj *checksum)
185{
186 u32 maj_stat = 0;
187 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
188
189 maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK);
190
191 dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
192 return maj_stat;
193}
194
195static u32
196gss_get_mic_spkm3(struct gss_ctx *ctx,
197 struct xdr_buf *message_buffer,
198 struct xdr_netobj *message_token)
199{
200 u32 err = 0;
201 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
202
203 err = spkm3_make_token(sctx, message_buffer,
204 message_token, SPKM_MIC_TOK);
205 dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err);
206 return err;
207}
208
209static const struct gss_api_ops gss_spkm3_ops = {
210 .gss_import_sec_context = gss_import_sec_context_spkm3,
211 .gss_get_mic = gss_get_mic_spkm3,
212 .gss_verify_mic = gss_verify_mic_spkm3,
213 .gss_delete_sec_context = gss_delete_sec_context_spkm3,
214};
215
216static struct pf_desc gss_spkm3_pfs[] = {
217 {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"},
218 {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"},
219};
220
221static struct gss_api_mech gss_spkm3_mech = {
222 .gm_name = "spkm3",
223 .gm_owner = THIS_MODULE,
224 .gm_oid = {7, "\053\006\001\005\005\001\003"},
225 .gm_ops = &gss_spkm3_ops,
226 .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs),
227 .gm_pfs = gss_spkm3_pfs,
228};
229
230static int __init init_spkm3_module(void)
231{
232 int status;
233
234 status = gss_mech_register(&gss_spkm3_mech);
235 if (status)
236 printk("Failed to register spkm3 gss mechanism!\n");
237 return status;
238}
239
240static void __exit cleanup_spkm3_module(void)
241{
242 gss_mech_unregister(&gss_spkm3_mech);
243}
244
245MODULE_LICENSE("GPL");
246module_init(init_spkm3_module);
247module_exit(cleanup_spkm3_module);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
deleted file mode 100644
index 5a3a65a0e2b4..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ /dev/null
@@ -1,186 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_seal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/jiffies.h>
38#include <linux/sunrpc/gss_spkm3.h>
39#include <linux/random.h>
40#include <linux/crypto.h>
41#include <linux/pagemap.h>
42#include <linux/scatterlist.h>
43#include <linux/sunrpc/xdr.h>
44
45#ifdef RPC_DEBUG
46# define RPCDBG_FACILITY RPCDBG_AUTH
47#endif
48
49const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"};
50const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"};
51
52/*
53 * spkm3_make_token()
54 *
55 * Only SPKM_MIC_TOK with md5 intg-alg is supported
56 */
57
58u32
59spkm3_make_token(struct spkm3_ctx *ctx,
60 struct xdr_buf * text, struct xdr_netobj * token,
61 int toktype)
62{
63 s32 checksum_type;
64 char tokhdrbuf[25];
65 char cksumdata[16];
66 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
67 struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf};
68 int tokenlen = 0;
69 unsigned char *ptr;
70 s32 now;
71 int ctxelen = 0, ctxzbit = 0;
72 int md5elen = 0, md5zbit = 0;
73
74 now = jiffies;
75
76 if (ctx->ctx_id.len != 16) {
77 dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
78 ctx->ctx_id.len);
79 goto out_err;
80 }
81
82 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
83 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
84 "algorithm. only support hmac-md5 I-ALG.\n");
85 goto out_err;
86 } else
87 checksum_type = CKSUMTYPE_HMAC_MD5;
88
89 if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) {
90 dprintk("RPC: gss_spkm3_seal: unsupported C-ALG "
91 "algorithm\n");
92 goto out_err;
93 }
94
95 if (toktype == SPKM_MIC_TOK) {
96 /* Calculate checksum over the mic-header */
97 asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
98 spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
99 ctxelen, ctxzbit);
100 if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key,
101 (char *)mic_hdr.data, mic_hdr.len,
102 text, 0, &md5cksum))
103 goto out_err;
104
105 asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
106 tokenlen = 10 + ctxelen + 1 + md5elen + 1;
107
108 /* Create token header using generic routines */
109 token->len = g_token_size(&ctx->mech_used, tokenlen + 2);
110
111 ptr = token->data;
112 g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr);
113
114 spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
115 } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
116 dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK "
117 "not supported\n");
118 goto out_err;
119 }
120
121 /* XXX need to implement sequence numbers, and ctx->expired */
122
123 return GSS_S_COMPLETE;
124out_err:
125 token->data = NULL;
126 token->len = 0;
127 return GSS_S_FAILURE;
128}
129
130static int
131spkm3_checksummer(struct scatterlist *sg, void *data)
132{
133 struct hash_desc *desc = data;
134
135 return crypto_hash_update(desc, sg, sg->length);
136}
137
138/* checksum the plaintext data and hdrlen bytes of the token header */
139s32
140make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
141 unsigned int hdrlen, struct xdr_buf *body,
142 unsigned int body_offset, struct xdr_netobj *cksum)
143{
144 char *cksumname;
145 struct hash_desc desc; /* XXX add to ctx? */
146 struct scatterlist sg[1];
147 int err;
148
149 switch (cksumtype) {
150 case CKSUMTYPE_HMAC_MD5:
151 cksumname = "hmac(md5)";
152 break;
153 default:
154 dprintk("RPC: spkm3_make_checksum:"
155 " unsupported checksum %d", cksumtype);
156 return GSS_S_FAILURE;
157 }
158
159 if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE;
160
161 desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
162 if (IS_ERR(desc.tfm))
163 return GSS_S_FAILURE;
164 cksum->len = crypto_hash_digestsize(desc.tfm);
165 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
166
167 err = crypto_hash_setkey(desc.tfm, key->data, key->len);
168 if (err)
169 goto out;
170
171 err = crypto_hash_init(&desc);
172 if (err)
173 goto out;
174
175 sg_init_one(sg, header, hdrlen);
176 crypto_hash_update(&desc, sg, sg->length);
177
178 xdr_process_buf(body, body_offset, body->len - body_offset,
179 spkm3_checksummer, &desc);
180 crypto_hash_final(&desc, cksum->data);
181
182out:
183 crypto_free_hash(desc.tfm);
184
185 return err ? GSS_S_FAILURE : 0;
186}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
deleted file mode 100644
index a99825d7caa0..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ /dev/null
@@ -1,267 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_token.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/random.h>
41#include <linux/crypto.h>
42
43#ifdef RPC_DEBUG
44# define RPCDBG_FACILITY RPCDBG_AUTH
45#endif
46
47/*
48 * asn1_bitstring_len()
49 *
50 * calculate the asn1 bitstring length of the xdr_netobject
51 */
52void
53asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
54{
55 int i, zbit = 0,elen = in->len;
56 char *ptr;
57
58 ptr = &in->data[in->len -1];
59
60 /* count trailing 0's */
61 for(i = in->len; i > 0; i--) {
62 if (*ptr == 0) {
63 ptr--;
64 elen--;
65 } else
66 break;
67 }
68
69 /* count number of 0 bits in final octet */
70 ptr = &in->data[elen - 1];
71 for(i = 0; i < 8; i++) {
72 short mask = 0x01;
73
74 if (!((mask << i) & *ptr))
75 zbit++;
76 else
77 break;
78 }
79 *enclen = elen;
80 *zerobits = zbit;
81}
82
83/*
84 * decode_asn1_bitstring()
85 *
86 * decode a bitstring into a buffer of the expected length.
87 * enclen = bit string length
88 * explen = expected length (define in rfc)
89 */
90int
91decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
92{
93 if (!(out->data = kzalloc(explen,GFP_NOFS)))
94 return 0;
95 out->len = explen;
96 memcpy(out->data, in, enclen);
97 return 1;
98}
99
100/*
101 * SPKMInnerContextToken choice SPKM_MIC asn1 token layout
102 *
103 * contextid is always 16 bytes plain data. max asn1 bitstring len = 17.
104 *
105 * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum)
106 *
107 * pos value
108 * ----------
109 * [0] a4 SPKM-MIC tag
110 * [1] ?? innertoken length (max 44)
111 *
112 *
113 * tok_hdr piece of checksum data starts here
114 *
115 * the maximum mic-header len = 9 + 17 = 26
116 * mic-header
117 * ----------
118 * [2] 30 SEQUENCE tag
119 * [3] ?? mic-header length: (max 23) = TokenID + ContextID
120 *
121 * TokenID - all fields constant and can be hardcoded
122 * -------
123 * [4] 02 Type 2
124 * [5] 02 Length 2
125 * [6][7] 01 01 TokenID (SPKM_MIC_TOK)
126 *
127 * ContextID - encoded length not constant, calculated
128 * ---------
129 * [8] 03 Type 3
130 * [9] ?? encoded length
131 * [10] ?? ctxzbit
132 * [11] contextid
133 *
134 * mic_header piece of checksum data ends here.
135 *
136 * int-cksum - encoded length not constant, calculated
137 * ---------
138 * [??] 03 Type 3
139 * [??] ?? encoded length
140 * [??] ?? md5zbit
141 * [??] int-cksum (NID_md5 = 16)
142 *
143 * maximum SPKM-MIC innercontext token length =
144 * 10 + encoded contextid_size(17 max) + 2 + encoded
145 * cksum_size (17 maxfor NID_md5) = 46
146 */
147
148/*
149 * spkm3_mic_header()
150 *
151 * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation
152 * elen: 16 byte context id asn1 bitstring encoded length
153 */
154void
155spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit)
156{
157 char *hptr = *hdrbuf;
158 char *top = *hdrbuf;
159
160 *(u8 *)hptr++ = 0x30;
161 *(u8 *)hptr++ = elen + 7; /* on the wire header length */
162
163 /* tokenid */
164 *(u8 *)hptr++ = 0x02;
165 *(u8 *)hptr++ = 0x02;
166 *(u8 *)hptr++ = 0x01;
167 *(u8 *)hptr++ = 0x01;
168
169 /* coniextid */
170 *(u8 *)hptr++ = 0x03;
171 *(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */
172 *(u8 *)hptr++ = zbit;
173 memcpy(hptr, ctxdata, elen);
174 hptr += elen;
175 *hdrlen = hptr - top;
176}
177
178/*
179 * spkm3_mic_innercontext_token()
180 *
181 * *tokp points to the beginning of the SPKM_MIC token described
182 * in rfc 2025, section 3.2.1:
183 *
184 * toklen is the inner token length
185 */
186void
187spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
188{
189 unsigned char *ict = *tokp;
190
191 *(u8 *)ict++ = 0xa4;
192 *(u8 *)ict++ = toklen;
193 memcpy(ict, mic_hdr->data, mic_hdr->len);
194 ict += mic_hdr->len;
195
196 *(u8 *)ict++ = 0x03;
197 *(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */
198 *(u8 *)ict++ = md5zbit;
199 memcpy(ict, md5cksum->data, md5elen);
200}
201
202u32
203spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum)
204{
205 struct xdr_netobj spkm3_ctx_id = {.len =0, .data = NULL};
206 unsigned char *ptr = *tokp;
207 int ctxelen;
208 u32 ret = GSS_S_DEFECTIVE_TOKEN;
209
210 /* spkm3 innercontext token preamble */
211 if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) {
212 dprintk("RPC: BAD SPKM ictoken preamble\n");
213 goto out;
214 }
215
216 *mic_hdrlen = ptr[3];
217
218 /* token type */
219 if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) {
220 dprintk("RPC: BAD asn1 SPKM3 token type\n");
221 goto out;
222 }
223
224 /* only support SPKM_MIC_TOK */
225 if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
226 dprintk("RPC: ERROR unsupported SPKM3 token\n");
227 goto out;
228 }
229
230 /* contextid */
231 if (ptr[8] != 0x03) {
232 dprintk("RPC: BAD SPKM3 asn1 context-id type\n");
233 goto out;
234 }
235
236 ctxelen = ptr[9];
237 if (ctxelen > 17) { /* length includes asn1 zbit octet */
238 dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen);
239 goto out;
240 }
241
242 /* ignore ptr[10] */
243
244 if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16))
245 goto out;
246
247 /*
248 * in the current implementation: the optional int-alg is not present
249 * so the default int-alg (md5) is used the optional snd-seq field is
250 * also not present
251 */
252
253 if (*mic_hdrlen != 6 + ctxelen) {
254 dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only "
255 "support default int-alg (should be absent) "
256 "and do not support snd-seq\n", *mic_hdrlen);
257 goto out;
258 }
259 /* checksum */
260 *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */
261
262 ret = GSS_S_COMPLETE;
263out:
264 kfree(spkm3_ctx_id.data);
265 return ret;
266}
267
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
deleted file mode 100644
index cc21ee860bb6..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ /dev/null
@@ -1,127 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_unseal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/crypto.h>
41
42#ifdef RPC_DEBUG
43# define RPCDBG_FACILITY RPCDBG_AUTH
44#endif
45
46/*
47 * spkm3_read_token()
48 *
49 * only SPKM_MIC_TOK with md5 intg-alg is supported
50 */
51u32
52spkm3_read_token(struct spkm3_ctx *ctx,
53 struct xdr_netobj *read_token, /* checksum */
54 struct xdr_buf *message_buffer, /* signbuf */
55 int toktype)
56{
57 s32 checksum_type;
58 s32 code;
59 struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
60 char cksumdata[16];
61 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
62 unsigned char *ptr = (unsigned char *)read_token->data;
63 unsigned char *cksum;
64 int bodysize, md5elen;
65 int mic_hdrlen;
66 u32 ret = GSS_S_DEFECTIVE_TOKEN;
67
68 if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
69 &bodysize, &ptr, read_token->len))
70 goto out;
71
72 /* decode the token */
73
74 if (toktype != SPKM_MIC_TOK) {
75 dprintk("RPC: BAD SPKM3 token type: %d\n", toktype);
76 goto out;
77 }
78
79 if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
80 goto out;
81
82 if (*cksum++ != 0x03) {
83 dprintk("RPC: spkm3_read_token BAD checksum type\n");
84 goto out;
85 }
86 md5elen = *cksum++;
87 cksum++; /* move past the zbit */
88
89 if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
90 goto out;
91
92 /* HARD CODED FOR MD5 */
93
94 /* compute the checksum of the message.
95 * ptr + 2 = start of header piece of checksum
96 * mic_hdrlen + 2 = length of header piece of checksum
97 */
98 ret = GSS_S_DEFECTIVE_TOKEN;
99 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
100 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
101 "algorithm\n");
102 goto out;
103 }
104
105 checksum_type = CKSUMTYPE_HMAC_MD5;
106
107 code = make_spkm3_checksum(checksum_type,
108 &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2,
109 message_buffer, 0, &md5cksum);
110
111 if (code)
112 goto out;
113
114 ret = GSS_S_BAD_SIG;
115 code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
116 if (code) {
117 dprintk("RPC: bad MIC checksum\n");
118 goto out;
119 }
120
121
122 /* XXX: need to add expiration and sequencing */
123 ret = GSS_S_COMPLETE;
124out:
125 kfree(wire_cksum.data);
126 return ret;
127}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index cc385b3a59c2..dec2a6fc7c12 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -964,7 +964,7 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
964 if (rqstp->rq_gssclient == NULL) 964 if (rqstp->rq_gssclient == NULL)
965 return SVC_DENIED; 965 return SVC_DENIED;
966 stat = svcauth_unix_set_client(rqstp); 966 stat = svcauth_unix_set_client(rqstp);
967 if (stat == SVC_DROP) 967 if (stat == SVC_DROP || stat == SVC_CLOSE)
968 return stat; 968 return stat;
969 return SVC_OK; 969 return SVC_OK;
970} 970}
@@ -1018,7 +1018,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
1018 return SVC_DENIED; 1018 return SVC_DENIED;
1019 memset(&rsikey, 0, sizeof(rsikey)); 1019 memset(&rsikey, 0, sizeof(rsikey));
1020 if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) 1020 if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
1021 return SVC_DROP; 1021 return SVC_CLOSE;
1022 *authp = rpc_autherr_badverf; 1022 *authp = rpc_autherr_badverf;
1023 if (svc_safe_getnetobj(argv, &tmpobj)) { 1023 if (svc_safe_getnetobj(argv, &tmpobj)) {
1024 kfree(rsikey.in_handle.data); 1024 kfree(rsikey.in_handle.data);
@@ -1026,38 +1026,35 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
1026 } 1026 }
1027 if (dup_netobj(&rsikey.in_token, &tmpobj)) { 1027 if (dup_netobj(&rsikey.in_token, &tmpobj)) {
1028 kfree(rsikey.in_handle.data); 1028 kfree(rsikey.in_handle.data);
1029 return SVC_DROP; 1029 return SVC_CLOSE;
1030 } 1030 }
1031 1031
1032 /* Perform upcall, or find upcall result: */ 1032 /* Perform upcall, or find upcall result: */
1033 rsip = rsi_lookup(&rsikey); 1033 rsip = rsi_lookup(&rsikey);
1034 rsi_free(&rsikey); 1034 rsi_free(&rsikey);
1035 if (!rsip) 1035 if (!rsip)
1036 return SVC_DROP; 1036 return SVC_CLOSE;
1037 switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { 1037 if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0)
1038 case -EAGAIN:
1039 case -ETIMEDOUT:
1040 case -ENOENT:
1041 /* No upcall result: */ 1038 /* No upcall result: */
1042 return SVC_DROP; 1039 return SVC_CLOSE;
1043 case 0: 1040
1044 ret = SVC_DROP; 1041 ret = SVC_CLOSE;
1045 /* Got an answer to the upcall; use it: */ 1042 /* Got an answer to the upcall; use it: */
1046 if (gss_write_init_verf(rqstp, rsip)) 1043 if (gss_write_init_verf(rqstp, rsip))
1047 goto out; 1044 goto out;
1048 if (resv->iov_len + 4 > PAGE_SIZE) 1045 if (resv->iov_len + 4 > PAGE_SIZE)
1049 goto out; 1046 goto out;
1050 svc_putnl(resv, RPC_SUCCESS); 1047 svc_putnl(resv, RPC_SUCCESS);
1051 if (svc_safe_putnetobj(resv, &rsip->out_handle)) 1048 if (svc_safe_putnetobj(resv, &rsip->out_handle))
1052 goto out; 1049 goto out;
1053 if (resv->iov_len + 3 * 4 > PAGE_SIZE) 1050 if (resv->iov_len + 3 * 4 > PAGE_SIZE)
1054 goto out; 1051 goto out;
1055 svc_putnl(resv, rsip->major_status); 1052 svc_putnl(resv, rsip->major_status);
1056 svc_putnl(resv, rsip->minor_status); 1053 svc_putnl(resv, rsip->minor_status);
1057 svc_putnl(resv, GSS_SEQ_WIN); 1054 svc_putnl(resv, GSS_SEQ_WIN);
1058 if (svc_safe_putnetobj(resv, &rsip->out_token)) 1055 if (svc_safe_putnetobj(resv, &rsip->out_token))
1059 goto out; 1056 goto out;
1060 } 1057
1061 ret = SVC_COMPLETE; 1058 ret = SVC_COMPLETE;
1062out: 1059out:
1063 cache_put(&rsip->h, &rsi_cache); 1060 cache_put(&rsip->h, &rsi_cache);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 7dce81a926c5..e433e7580e27 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -33,15 +33,16 @@
33#include <linux/sunrpc/cache.h> 33#include <linux/sunrpc/cache.h>
34#include <linux/sunrpc/stats.h> 34#include <linux/sunrpc/stats.h>
35#include <linux/sunrpc/rpc_pipe_fs.h> 35#include <linux/sunrpc/rpc_pipe_fs.h>
36#include "netns.h"
36 37
37#define RPCDBG_FACILITY RPCDBG_CACHE 38#define RPCDBG_FACILITY RPCDBG_CACHE
38 39
39static int cache_defer_req(struct cache_req *req, struct cache_head *item); 40static void cache_defer_req(struct cache_req *req, struct cache_head *item);
40static void cache_revisit_request(struct cache_head *item); 41static void cache_revisit_request(struct cache_head *item);
41 42
42static void cache_init(struct cache_head *h) 43static void cache_init(struct cache_head *h)
43{ 44{
44 time_t now = get_seconds(); 45 time_t now = seconds_since_boot();
45 h->next = NULL; 46 h->next = NULL;
46 h->flags = 0; 47 h->flags = 0;
47 kref_init(&h->ref); 48 kref_init(&h->ref);
@@ -51,7 +52,7 @@ static void cache_init(struct cache_head *h)
51 52
52static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) 53static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
53{ 54{
54 return (h->expiry_time < get_seconds()) || 55 return (h->expiry_time < seconds_since_boot()) ||
55 (detail->flush_time > h->last_refresh); 56 (detail->flush_time > h->last_refresh);
56} 57}
57 58
@@ -126,7 +127,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
126static void cache_fresh_locked(struct cache_head *head, time_t expiry) 127static void cache_fresh_locked(struct cache_head *head, time_t expiry)
127{ 128{
128 head->expiry_time = expiry; 129 head->expiry_time = expiry;
129 head->last_refresh = get_seconds(); 130 head->last_refresh = seconds_since_boot();
130 set_bit(CACHE_VALID, &head->flags); 131 set_bit(CACHE_VALID, &head->flags);
131} 132}
132 133
@@ -237,7 +238,7 @@ int cache_check(struct cache_detail *detail,
237 238
238 /* now see if we want to start an upcall */ 239 /* now see if we want to start an upcall */
239 refresh_age = (h->expiry_time - h->last_refresh); 240 refresh_age = (h->expiry_time - h->last_refresh);
240 age = get_seconds() - h->last_refresh; 241 age = seconds_since_boot() - h->last_refresh;
241 242
242 if (rqstp == NULL) { 243 if (rqstp == NULL) {
243 if (rv == -EAGAIN) 244 if (rv == -EAGAIN)
@@ -252,7 +253,7 @@ int cache_check(struct cache_detail *detail,
252 cache_revisit_request(h); 253 cache_revisit_request(h);
253 if (rv == -EAGAIN) { 254 if (rv == -EAGAIN) {
254 set_bit(CACHE_NEGATIVE, &h->flags); 255 set_bit(CACHE_NEGATIVE, &h->flags);
255 cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY); 256 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
256 cache_fresh_unlocked(h, detail); 257 cache_fresh_unlocked(h, detail);
257 rv = -ENOENT; 258 rv = -ENOENT;
258 } 259 }
@@ -267,7 +268,8 @@ int cache_check(struct cache_detail *detail,
267 } 268 }
268 269
269 if (rv == -EAGAIN) { 270 if (rv == -EAGAIN) {
270 if (cache_defer_req(rqstp, h) < 0) { 271 cache_defer_req(rqstp, h);
272 if (!test_bit(CACHE_PENDING, &h->flags)) {
271 /* Request is not deferred */ 273 /* Request is not deferred */
272 rv = cache_is_valid(detail, h); 274 rv = cache_is_valid(detail, h);
273 if (rv == -EAGAIN) 275 if (rv == -EAGAIN)
@@ -387,11 +389,11 @@ static int cache_clean(void)
387 return -1; 389 return -1;
388 } 390 }
389 current_detail = list_entry(next, struct cache_detail, others); 391 current_detail = list_entry(next, struct cache_detail, others);
390 if (current_detail->nextcheck > get_seconds()) 392 if (current_detail->nextcheck > seconds_since_boot())
391 current_index = current_detail->hash_size; 393 current_index = current_detail->hash_size;
392 else { 394 else {
393 current_index = 0; 395 current_index = 0;
394 current_detail->nextcheck = get_seconds()+30*60; 396 current_detail->nextcheck = seconds_since_boot()+30*60;
395 } 397 }
396 } 398 }
397 399
@@ -476,7 +478,7 @@ EXPORT_SYMBOL_GPL(cache_flush);
476void cache_purge(struct cache_detail *detail) 478void cache_purge(struct cache_detail *detail)
477{ 479{
478 detail->flush_time = LONG_MAX; 480 detail->flush_time = LONG_MAX;
479 detail->nextcheck = get_seconds(); 481 detail->nextcheck = seconds_since_boot();
480 cache_flush(); 482 cache_flush();
481 detail->flush_time = 1; 483 detail->flush_time = 1;
482} 484}
@@ -505,81 +507,155 @@ EXPORT_SYMBOL_GPL(cache_purge);
505 507
506static DEFINE_SPINLOCK(cache_defer_lock); 508static DEFINE_SPINLOCK(cache_defer_lock);
507static LIST_HEAD(cache_defer_list); 509static LIST_HEAD(cache_defer_list);
508static struct list_head cache_defer_hash[DFR_HASHSIZE]; 510static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
509static int cache_defer_cnt; 511static int cache_defer_cnt;
510 512
511static int cache_defer_req(struct cache_req *req, struct cache_head *item) 513static void __unhash_deferred_req(struct cache_deferred_req *dreq)
514{
515 hlist_del_init(&dreq->hash);
516 if (!list_empty(&dreq->recent)) {
517 list_del_init(&dreq->recent);
518 cache_defer_cnt--;
519 }
520}
521
522static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
512{ 523{
513 struct cache_deferred_req *dreq, *discard;
514 int hash = DFR_HASH(item); 524 int hash = DFR_HASH(item);
515 525
516 if (cache_defer_cnt >= DFR_MAX) { 526 INIT_LIST_HEAD(&dreq->recent);
517 /* too much in the cache, randomly drop this one, 527 hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
518 * or continue and drop the oldest below 528}
519 */ 529
520 if (net_random()&1) 530static void setup_deferral(struct cache_deferred_req *dreq,
521 return -ENOMEM; 531 struct cache_head *item,
522 } 532 int count_me)
523 dreq = req->defer(req); 533{
524 if (dreq == NULL)
525 return -ENOMEM;
526 534
527 dreq->item = item; 535 dreq->item = item;
528 536
529 spin_lock(&cache_defer_lock); 537 spin_lock(&cache_defer_lock);
530 538
531 list_add(&dreq->recent, &cache_defer_list); 539 __hash_deferred_req(dreq, item);
532
533 if (cache_defer_hash[hash].next == NULL)
534 INIT_LIST_HEAD(&cache_defer_hash[hash]);
535 list_add(&dreq->hash, &cache_defer_hash[hash]);
536 540
537 /* it is in, now maybe clean up */ 541 if (count_me) {
538 discard = NULL; 542 cache_defer_cnt++;
539 if (++cache_defer_cnt > DFR_MAX) { 543 list_add(&dreq->recent, &cache_defer_list);
540 discard = list_entry(cache_defer_list.prev,
541 struct cache_deferred_req, recent);
542 list_del_init(&discard->recent);
543 list_del_init(&discard->hash);
544 cache_defer_cnt--;
545 } 544 }
545
546 spin_unlock(&cache_defer_lock); 546 spin_unlock(&cache_defer_lock);
547 547
548}
549
550struct thread_deferred_req {
551 struct cache_deferred_req handle;
552 struct completion completion;
553};
554
555static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
556{
557 struct thread_deferred_req *dr =
558 container_of(dreq, struct thread_deferred_req, handle);
559 complete(&dr->completion);
560}
561
562static void cache_wait_req(struct cache_req *req, struct cache_head *item)
563{
564 struct thread_deferred_req sleeper;
565 struct cache_deferred_req *dreq = &sleeper.handle;
566
567 sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
568 dreq->revisit = cache_restart_thread;
569
570 setup_deferral(dreq, item, 0);
571
572 if (!test_bit(CACHE_PENDING, &item->flags) ||
573 wait_for_completion_interruptible_timeout(
574 &sleeper.completion, req->thread_wait) <= 0) {
575 /* The completion wasn't completed, so we need
576 * to clean up
577 */
578 spin_lock(&cache_defer_lock);
579 if (!hlist_unhashed(&sleeper.handle.hash)) {
580 __unhash_deferred_req(&sleeper.handle);
581 spin_unlock(&cache_defer_lock);
582 } else {
583 /* cache_revisit_request already removed
584 * this from the hash table, but hasn't
585 * called ->revisit yet. It will very soon
586 * and we need to wait for it.
587 */
588 spin_unlock(&cache_defer_lock);
589 wait_for_completion(&sleeper.completion);
590 }
591 }
592}
593
594static void cache_limit_defers(void)
595{
596 /* Make sure we haven't exceed the limit of allowed deferred
597 * requests.
598 */
599 struct cache_deferred_req *discard = NULL;
600
601 if (cache_defer_cnt <= DFR_MAX)
602 return;
603
604 spin_lock(&cache_defer_lock);
605
606 /* Consider removing either the first or the last */
607 if (cache_defer_cnt > DFR_MAX) {
608 if (net_random() & 1)
609 discard = list_entry(cache_defer_list.next,
610 struct cache_deferred_req, recent);
611 else
612 discard = list_entry(cache_defer_list.prev,
613 struct cache_deferred_req, recent);
614 __unhash_deferred_req(discard);
615 }
616 spin_unlock(&cache_defer_lock);
548 if (discard) 617 if (discard)
549 /* there was one too many */
550 discard->revisit(discard, 1); 618 discard->revisit(discard, 1);
619}
551 620
552 if (!test_bit(CACHE_PENDING, &item->flags)) { 621static void cache_defer_req(struct cache_req *req, struct cache_head *item)
553 /* must have just been validated... */ 622{
554 cache_revisit_request(item); 623 struct cache_deferred_req *dreq;
555 return -EAGAIN; 624
625 if (req->thread_wait) {
626 cache_wait_req(req, item);
627 if (!test_bit(CACHE_PENDING, &item->flags))
628 return;
556 } 629 }
557 return 0; 630 dreq = req->defer(req);
631 if (dreq == NULL)
632 return;
633 setup_deferral(dreq, item, 1);
634 if (!test_bit(CACHE_PENDING, &item->flags))
635 /* Bit could have been cleared before we managed to
636 * set up the deferral, so need to revisit just in case
637 */
638 cache_revisit_request(item);
639
640 cache_limit_defers();
558} 641}
559 642
560static void cache_revisit_request(struct cache_head *item) 643static void cache_revisit_request(struct cache_head *item)
561{ 644{
562 struct cache_deferred_req *dreq; 645 struct cache_deferred_req *dreq;
563 struct list_head pending; 646 struct list_head pending;
564 647 struct hlist_node *lp, *tmp;
565 struct list_head *lp;
566 int hash = DFR_HASH(item); 648 int hash = DFR_HASH(item);
567 649
568 INIT_LIST_HEAD(&pending); 650 INIT_LIST_HEAD(&pending);
569 spin_lock(&cache_defer_lock); 651 spin_lock(&cache_defer_lock);
570 652
571 lp = cache_defer_hash[hash].next; 653 hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash)
572 if (lp) { 654 if (dreq->item == item) {
573 while (lp != &cache_defer_hash[hash]) { 655 __unhash_deferred_req(dreq);
574 dreq = list_entry(lp, struct cache_deferred_req, hash); 656 list_add(&dreq->recent, &pending);
575 lp = lp->next;
576 if (dreq->item == item) {
577 list_del_init(&dreq->hash);
578 list_move(&dreq->recent, &pending);
579 cache_defer_cnt--;
580 }
581 } 657 }
582 } 658
583 spin_unlock(&cache_defer_lock); 659 spin_unlock(&cache_defer_lock);
584 660
585 while (!list_empty(&pending)) { 661 while (!list_empty(&pending)) {
@@ -600,9 +676,8 @@ void cache_clean_deferred(void *owner)
600 676
601 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { 677 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
602 if (dreq->owner == owner) { 678 if (dreq->owner == owner) {
603 list_del_init(&dreq->hash); 679 __unhash_deferred_req(dreq);
604 list_move(&dreq->recent, &pending); 680 list_add(&dreq->recent, &pending);
605 cache_defer_cnt--;
606 } 681 }
607 } 682 }
608 spin_unlock(&cache_defer_lock); 683 spin_unlock(&cache_defer_lock);
@@ -901,7 +976,7 @@ static int cache_release(struct inode *inode, struct file *filp,
901 filp->private_data = NULL; 976 filp->private_data = NULL;
902 kfree(rp); 977 kfree(rp);
903 978
904 cd->last_close = get_seconds(); 979 cd->last_close = seconds_since_boot();
905 atomic_dec(&cd->readers); 980 atomic_dec(&cd->readers);
906 } 981 }
907 module_put(cd->owner); 982 module_put(cd->owner);
@@ -1014,6 +1089,23 @@ static void warn_no_listener(struct cache_detail *detail)
1014 } 1089 }
1015} 1090}
1016 1091
1092static bool cache_listeners_exist(struct cache_detail *detail)
1093{
1094 if (atomic_read(&detail->readers))
1095 return true;
1096 if (detail->last_close == 0)
1097 /* This cache was never opened */
1098 return false;
1099 if (detail->last_close < seconds_since_boot() - 30)
1100 /*
1101 * We allow for the possibility that someone might
1102 * restart a userspace daemon without restarting the
1103 * server; but after 30 seconds, we give up.
1104 */
1105 return false;
1106 return true;
1107}
1108
1017/* 1109/*
1018 * register an upcall request to user-space and queue it up for read() by the 1110 * register an upcall request to user-space and queue it up for read() by the
1019 * upcall daemon. 1111 * upcall daemon.
@@ -1032,10 +1124,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
1032 char *bp; 1124 char *bp;
1033 int len; 1125 int len;
1034 1126
1035 if (atomic_read(&detail->readers) == 0 && 1127 if (!cache_listeners_exist(detail)) {
1036 detail->last_close < get_seconds() - 30) { 1128 warn_no_listener(detail);
1037 warn_no_listener(detail); 1129 return -EINVAL;
1038 return -EINVAL;
1039 } 1130 }
1040 1131
1041 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1132 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
@@ -1094,13 +1185,19 @@ int qword_get(char **bpp, char *dest, int bufsize)
1094 if (bp[0] == '\\' && bp[1] == 'x') { 1185 if (bp[0] == '\\' && bp[1] == 'x') {
1095 /* HEX STRING */ 1186 /* HEX STRING */
1096 bp += 2; 1187 bp += 2;
1097 while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) { 1188 while (len < bufsize) {
1098 int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; 1189 int h, l;
1099 bp++; 1190
1100 byte <<= 4; 1191 h = hex_to_bin(bp[0]);
1101 byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; 1192 if (h < 0)
1102 *dest++ = byte; 1193 break;
1103 bp++; 1194
1195 l = hex_to_bin(bp[1]);
1196 if (l < 0)
1197 break;
1198
1199 *dest++ = (h << 4) | l;
1200 bp += 2;
1104 len++; 1201 len++;
1105 } 1202 }
1106 } else { 1203 } else {
@@ -1218,7 +1315,8 @@ static int c_show(struct seq_file *m, void *p)
1218 1315
1219 ifdebug(CACHE) 1316 ifdebug(CACHE)
1220 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", 1317 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
1221 cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags); 1318 convert_to_wallclock(cp->expiry_time),
1319 atomic_read(&cp->ref.refcount), cp->flags);
1222 cache_get(cp); 1320 cache_get(cp);
1223 if (cache_check(cd, cp, NULL)) 1321 if (cache_check(cd, cp, NULL))
1224 /* cache_check does a cache_put on failure */ 1322 /* cache_check does a cache_put on failure */
@@ -1284,7 +1382,7 @@ static ssize_t read_flush(struct file *file, char __user *buf,
1284 unsigned long p = *ppos; 1382 unsigned long p = *ppos;
1285 size_t len; 1383 size_t len;
1286 1384
1287 sprintf(tbuf, "%lu\n", cd->flush_time); 1385 sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time));
1288 len = strlen(tbuf); 1386 len = strlen(tbuf);
1289 if (p >= len) 1387 if (p >= len)
1290 return 0; 1388 return 0;
@@ -1302,19 +1400,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1302 struct cache_detail *cd) 1400 struct cache_detail *cd)
1303{ 1401{
1304 char tbuf[20]; 1402 char tbuf[20];
1305 char *ep; 1403 char *bp, *ep;
1306 long flushtime; 1404
1307 if (*ppos || count > sizeof(tbuf)-1) 1405 if (*ppos || count > sizeof(tbuf)-1)
1308 return -EINVAL; 1406 return -EINVAL;
1309 if (copy_from_user(tbuf, buf, count)) 1407 if (copy_from_user(tbuf, buf, count))
1310 return -EFAULT; 1408 return -EFAULT;
1311 tbuf[count] = 0; 1409 tbuf[count] = 0;
1312 flushtime = simple_strtoul(tbuf, &ep, 0); 1410 simple_strtoul(tbuf, &ep, 0);
1313 if (*ep && *ep != '\n') 1411 if (*ep && *ep != '\n')
1314 return -EINVAL; 1412 return -EINVAL;
1315 1413
1316 cd->flush_time = flushtime; 1414 bp = tbuf;
1317 cd->nextcheck = get_seconds(); 1415 cd->flush_time = get_expiry(&bp);
1416 cd->nextcheck = seconds_since_boot();
1318 cache_flush(); 1417 cache_flush();
1319 1418
1320 *ppos += count; 1419 *ppos += count;
@@ -1438,8 +1537,10 @@ static const struct file_operations cache_flush_operations_procfs = {
1438 .llseek = no_llseek, 1537 .llseek = no_llseek,
1439}; 1538};
1440 1539
1441static void remove_cache_proc_entries(struct cache_detail *cd) 1540static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
1442{ 1541{
1542 struct sunrpc_net *sn;
1543
1443 if (cd->u.procfs.proc_ent == NULL) 1544 if (cd->u.procfs.proc_ent == NULL)
1444 return; 1545 return;
1445 if (cd->u.procfs.flush_ent) 1546 if (cd->u.procfs.flush_ent)
@@ -1449,15 +1550,18 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
1449 if (cd->u.procfs.content_ent) 1550 if (cd->u.procfs.content_ent)
1450 remove_proc_entry("content", cd->u.procfs.proc_ent); 1551 remove_proc_entry("content", cd->u.procfs.proc_ent);
1451 cd->u.procfs.proc_ent = NULL; 1552 cd->u.procfs.proc_ent = NULL;
1452 remove_proc_entry(cd->name, proc_net_rpc); 1553 sn = net_generic(net, sunrpc_net_id);
1554 remove_proc_entry(cd->name, sn->proc_net_rpc);
1453} 1555}
1454 1556
1455#ifdef CONFIG_PROC_FS 1557#ifdef CONFIG_PROC_FS
1456static int create_cache_proc_entries(struct cache_detail *cd) 1558static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1457{ 1559{
1458 struct proc_dir_entry *p; 1560 struct proc_dir_entry *p;
1561 struct sunrpc_net *sn;
1459 1562
1460 cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); 1563 sn = net_generic(net, sunrpc_net_id);
1564 cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc);
1461 if (cd->u.procfs.proc_ent == NULL) 1565 if (cd->u.procfs.proc_ent == NULL)
1462 goto out_nomem; 1566 goto out_nomem;
1463 cd->u.procfs.channel_ent = NULL; 1567 cd->u.procfs.channel_ent = NULL;
@@ -1488,11 +1592,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
1488 } 1592 }
1489 return 0; 1593 return 0;
1490out_nomem: 1594out_nomem:
1491 remove_cache_proc_entries(cd); 1595 remove_cache_proc_entries(cd, net);
1492 return -ENOMEM; 1596 return -ENOMEM;
1493} 1597}
1494#else /* CONFIG_PROC_FS */ 1598#else /* CONFIG_PROC_FS */
1495static int create_cache_proc_entries(struct cache_detail *cd) 1599static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1496{ 1600{
1497 return 0; 1601 return 0;
1498} 1602}
@@ -1503,23 +1607,33 @@ void __init cache_initialize(void)
1503 INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); 1607 INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
1504} 1608}
1505 1609
1506int cache_register(struct cache_detail *cd) 1610int cache_register_net(struct cache_detail *cd, struct net *net)
1507{ 1611{
1508 int ret; 1612 int ret;
1509 1613
1510 sunrpc_init_cache_detail(cd); 1614 sunrpc_init_cache_detail(cd);
1511 ret = create_cache_proc_entries(cd); 1615 ret = create_cache_proc_entries(cd, net);
1512 if (ret) 1616 if (ret)
1513 sunrpc_destroy_cache_detail(cd); 1617 sunrpc_destroy_cache_detail(cd);
1514 return ret; 1618 return ret;
1515} 1619}
1620
1621int cache_register(struct cache_detail *cd)
1622{
1623 return cache_register_net(cd, &init_net);
1624}
1516EXPORT_SYMBOL_GPL(cache_register); 1625EXPORT_SYMBOL_GPL(cache_register);
1517 1626
1518void cache_unregister(struct cache_detail *cd) 1627void cache_unregister_net(struct cache_detail *cd, struct net *net)
1519{ 1628{
1520 remove_cache_proc_entries(cd); 1629 remove_cache_proc_entries(cd, net);
1521 sunrpc_destroy_cache_detail(cd); 1630 sunrpc_destroy_cache_detail(cd);
1522} 1631}
1632
1633void cache_unregister(struct cache_detail *cd)
1634{
1635 cache_unregister_net(cd, &init_net);
1636}
1523EXPORT_SYMBOL_GPL(cache_unregister); 1637EXPORT_SYMBOL_GPL(cache_unregister);
1524 1638
1525static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, 1639static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cbc5b8ccc8be..9dab9573be41 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -284,6 +284,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
284 struct rpc_xprt *xprt; 284 struct rpc_xprt *xprt;
285 struct rpc_clnt *clnt; 285 struct rpc_clnt *clnt;
286 struct xprt_create xprtargs = { 286 struct xprt_create xprtargs = {
287 .net = args->net,
287 .ident = args->protocol, 288 .ident = args->protocol,
288 .srcaddr = args->saddress, 289 .srcaddr = args->saddress,
289 .dstaddr = args->address, 290 .dstaddr = args->address,
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
new file mode 100644
index 000000000000..d013bf211cae
--- /dev/null
+++ b/net/sunrpc/netns.h
@@ -0,0 +1,19 @@
1#ifndef __SUNRPC_NETNS_H__
2#define __SUNRPC_NETNS_H__
3
4#include <net/net_namespace.h>
5#include <net/netns/generic.h>
6
7struct cache_detail;
8
9struct sunrpc_net {
10 struct proc_dir_entry *proc_net_rpc;
11 struct cache_detail *ip_map_cache;
12};
13
14extern int sunrpc_net_id;
15
16int ip_map_cache_create(struct net *);
17void ip_map_cache_destroy(struct net *);
18
19#endif
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 63ec116b4dd4..fa6d7ca2c851 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -177,6 +177,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex);
177static int rpcb_create_local(void) 177static int rpcb_create_local(void)
178{ 178{
179 struct rpc_create_args args = { 179 struct rpc_create_args args = {
180 .net = &init_net,
180 .protocol = XPRT_TRANSPORT_TCP, 181 .protocol = XPRT_TRANSPORT_TCP,
181 .address = (struct sockaddr *)&rpcb_inaddr_loopback, 182 .address = (struct sockaddr *)&rpcb_inaddr_loopback,
182 .addrsize = sizeof(rpcb_inaddr_loopback), 183 .addrsize = sizeof(rpcb_inaddr_loopback),
@@ -229,6 +230,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
229 size_t salen, int proto, u32 version) 230 size_t salen, int proto, u32 version)
230{ 231{
231 struct rpc_create_args args = { 232 struct rpc_create_args args = {
233 .net = &init_net,
232 .protocol = proto, 234 .protocol = proto,
233 .address = srvaddr, 235 .address = srvaddr,
234 .addrsize = salen, 236 .addrsize = salen,
@@ -248,7 +250,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
248 ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); 250 ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT);
249 break; 251 break;
250 default: 252 default:
251 return NULL; 253 return ERR_PTR(-EAFNOSUPPORT);
252 } 254 }
253 255
254 return rpc_create(&args); 256 return rpc_create(&args);
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index ea1046f3f9a3..f71a73107ae9 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -22,11 +22,10 @@
22#include <linux/sunrpc/clnt.h> 22#include <linux/sunrpc/clnt.h>
23#include <linux/sunrpc/svcsock.h> 23#include <linux/sunrpc/svcsock.h>
24#include <linux/sunrpc/metrics.h> 24#include <linux/sunrpc/metrics.h>
25#include <net/net_namespace.h>
26 25
27#define RPCDBG_FACILITY RPCDBG_MISC 26#include "netns.h"
28 27
29struct proc_dir_entry *proc_net_rpc = NULL; 28#define RPCDBG_FACILITY RPCDBG_MISC
30 29
31/* 30/*
32 * Get RPC client stats 31 * Get RPC client stats
@@ -218,10 +217,11 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats);
218static inline struct proc_dir_entry * 217static inline struct proc_dir_entry *
219do_register(const char *name, void *data, const struct file_operations *fops) 218do_register(const char *name, void *data, const struct file_operations *fops)
220{ 219{
221 rpc_proc_init(); 220 struct sunrpc_net *sn;
222 dprintk("RPC: registering /proc/net/rpc/%s\n", name);
223 221
224 return proc_create_data(name, 0, proc_net_rpc, fops, data); 222 dprintk("RPC: registering /proc/net/rpc/%s\n", name);
223 sn = net_generic(&init_net, sunrpc_net_id);
224 return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
225} 225}
226 226
227struct proc_dir_entry * 227struct proc_dir_entry *
@@ -234,7 +234,10 @@ EXPORT_SYMBOL_GPL(rpc_proc_register);
234void 234void
235rpc_proc_unregister(const char *name) 235rpc_proc_unregister(const char *name)
236{ 236{
237 remove_proc_entry(name, proc_net_rpc); 237 struct sunrpc_net *sn;
238
239 sn = net_generic(&init_net, sunrpc_net_id);
240 remove_proc_entry(name, sn->proc_net_rpc);
238} 241}
239EXPORT_SYMBOL_GPL(rpc_proc_unregister); 242EXPORT_SYMBOL_GPL(rpc_proc_unregister);
240 243
@@ -248,25 +251,29 @@ EXPORT_SYMBOL_GPL(svc_proc_register);
248void 251void
249svc_proc_unregister(const char *name) 252svc_proc_unregister(const char *name)
250{ 253{
251 remove_proc_entry(name, proc_net_rpc); 254 struct sunrpc_net *sn;
255
256 sn = net_generic(&init_net, sunrpc_net_id);
257 remove_proc_entry(name, sn->proc_net_rpc);
252} 258}
253EXPORT_SYMBOL_GPL(svc_proc_unregister); 259EXPORT_SYMBOL_GPL(svc_proc_unregister);
254 260
255void 261int rpc_proc_init(struct net *net)
256rpc_proc_init(void)
257{ 262{
263 struct sunrpc_net *sn;
264
258 dprintk("RPC: registering /proc/net/rpc\n"); 265 dprintk("RPC: registering /proc/net/rpc\n");
259 if (!proc_net_rpc) 266 sn = net_generic(net, sunrpc_net_id);
260 proc_net_rpc = proc_mkdir("rpc", init_net.proc_net); 267 sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net);
268 if (sn->proc_net_rpc == NULL)
269 return -ENOMEM;
270
271 return 0;
261} 272}
262 273
263void 274void rpc_proc_exit(struct net *net)
264rpc_proc_exit(void)
265{ 275{
266 dprintk("RPC: unregistering /proc/net/rpc\n"); 276 dprintk("RPC: unregistering /proc/net/rpc\n");
267 if (proc_net_rpc) { 277 remove_proc_entry("rpc", net->proc_net);
268 proc_net_rpc = NULL;
269 remove_proc_entry("rpc", init_net.proc_net);
270 }
271} 278}
272 279
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c0d085013a2b..9d0809160994 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -22,7 +22,44 @@
22#include <linux/sunrpc/rpc_pipe_fs.h> 22#include <linux/sunrpc/rpc_pipe_fs.h>
23#include <linux/sunrpc/xprtsock.h> 23#include <linux/sunrpc/xprtsock.h>
24 24
25extern struct cache_detail ip_map_cache, unix_gid_cache; 25#include "netns.h"
26
27int sunrpc_net_id;
28
29static __net_init int sunrpc_init_net(struct net *net)
30{
31 int err;
32
33 err = rpc_proc_init(net);
34 if (err)
35 goto err_proc;
36
37 err = ip_map_cache_create(net);
38 if (err)
39 goto err_ipmap;
40
41 return 0;
42
43err_ipmap:
44 rpc_proc_exit(net);
45err_proc:
46 return err;
47}
48
49static __net_exit void sunrpc_exit_net(struct net *net)
50{
51 ip_map_cache_destroy(net);
52 rpc_proc_exit(net);
53}
54
55static struct pernet_operations sunrpc_net_ops = {
56 .init = sunrpc_init_net,
57 .exit = sunrpc_exit_net,
58 .id = &sunrpc_net_id,
59 .size = sizeof(struct sunrpc_net),
60};
61
62extern struct cache_detail unix_gid_cache;
26 63
27extern void cleanup_rpcb_clnt(void); 64extern void cleanup_rpcb_clnt(void);
28 65
@@ -38,18 +75,22 @@ init_sunrpc(void)
38 err = rpcauth_init_module(); 75 err = rpcauth_init_module();
39 if (err) 76 if (err)
40 goto out3; 77 goto out3;
78
79 cache_initialize();
80
81 err = register_pernet_subsys(&sunrpc_net_ops);
82 if (err)
83 goto out4;
41#ifdef RPC_DEBUG 84#ifdef RPC_DEBUG
42 rpc_register_sysctl(); 85 rpc_register_sysctl();
43#endif 86#endif
44#ifdef CONFIG_PROC_FS
45 rpc_proc_init();
46#endif
47 cache_initialize();
48 cache_register(&ip_map_cache);
49 cache_register(&unix_gid_cache); 87 cache_register(&unix_gid_cache);
50 svc_init_xprt_sock(); /* svc sock transport */ 88 svc_init_xprt_sock(); /* svc sock transport */
51 init_socket_xprt(); /* clnt sock transport */ 89 init_socket_xprt(); /* clnt sock transport */
52 return 0; 90 return 0;
91
92out4:
93 rpcauth_remove_module();
53out3: 94out3:
54 rpc_destroy_mempool(); 95 rpc_destroy_mempool();
55out2: 96out2:
@@ -67,14 +108,11 @@ cleanup_sunrpc(void)
67 svc_cleanup_xprt_sock(); 108 svc_cleanup_xprt_sock();
68 unregister_rpc_pipefs(); 109 unregister_rpc_pipefs();
69 rpc_destroy_mempool(); 110 rpc_destroy_mempool();
70 cache_unregister(&ip_map_cache);
71 cache_unregister(&unix_gid_cache); 111 cache_unregister(&unix_gid_cache);
112 unregister_pernet_subsys(&sunrpc_net_ops);
72#ifdef RPC_DEBUG 113#ifdef RPC_DEBUG
73 rpc_unregister_sysctl(); 114 rpc_unregister_sysctl();
74#endif 115#endif
75#ifdef CONFIG_PROC_FS
76 rpc_proc_exit();
77#endif
78 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 116 rcu_barrier(); /* Wait for completion of call_rcu()'s */
79} 117}
80MODULE_LICENSE("GPL"); 118MODULE_LICENSE("GPL");
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d9017d64597e..6359c42c4941 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1055,6 +1055,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1055 goto err_bad; 1055 goto err_bad;
1056 case SVC_DENIED: 1056 case SVC_DENIED:
1057 goto err_bad_auth; 1057 goto err_bad_auth;
1058 case SVC_CLOSE:
1059 if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
1060 svc_close_xprt(rqstp->rq_xprt);
1058 case SVC_DROP: 1061 case SVC_DROP:
1059 goto dropit; 1062 goto dropit;
1060 case SVC_COMPLETE: 1063 case SVC_COMPLETE:
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index cbc084939dd8..c82fe739fbdc 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -100,16 +100,14 @@ EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
100 */ 100 */
101int svc_print_xprts(char *buf, int maxlen) 101int svc_print_xprts(char *buf, int maxlen)
102{ 102{
103 struct list_head *le; 103 struct svc_xprt_class *xcl;
104 char tmpstr[80]; 104 char tmpstr[80];
105 int len = 0; 105 int len = 0;
106 buf[0] = '\0'; 106 buf[0] = '\0';
107 107
108 spin_lock(&svc_xprt_class_lock); 108 spin_lock(&svc_xprt_class_lock);
109 list_for_each(le, &svc_xprt_class_list) { 109 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
110 int slen; 110 int slen;
111 struct svc_xprt_class *xcl =
112 list_entry(le, struct svc_xprt_class, xcl_list);
113 111
114 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); 112 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
115 slen = strlen(tmpstr); 113 slen = strlen(tmpstr);
@@ -128,9 +126,9 @@ static void svc_xprt_free(struct kref *kref)
128 struct svc_xprt *xprt = 126 struct svc_xprt *xprt =
129 container_of(kref, struct svc_xprt, xpt_ref); 127 container_of(kref, struct svc_xprt, xpt_ref);
130 struct module *owner = xprt->xpt_class->xcl_owner; 128 struct module *owner = xprt->xpt_class->xcl_owner;
131 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) && 129 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
132 xprt->xpt_auth_cache != NULL) 130 svcauth_unix_info_release(xprt);
133 svcauth_unix_info_release(xprt->xpt_auth_cache); 131 put_net(xprt->xpt_net);
134 xprt->xpt_ops->xpo_free(xprt); 132 xprt->xpt_ops->xpo_free(xprt);
135 module_put(owner); 133 module_put(owner);
136} 134}
@@ -156,15 +154,18 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
156 INIT_LIST_HEAD(&xprt->xpt_list); 154 INIT_LIST_HEAD(&xprt->xpt_list);
157 INIT_LIST_HEAD(&xprt->xpt_ready); 155 INIT_LIST_HEAD(&xprt->xpt_ready);
158 INIT_LIST_HEAD(&xprt->xpt_deferred); 156 INIT_LIST_HEAD(&xprt->xpt_deferred);
157 INIT_LIST_HEAD(&xprt->xpt_users);
159 mutex_init(&xprt->xpt_mutex); 158 mutex_init(&xprt->xpt_mutex);
160 spin_lock_init(&xprt->xpt_lock); 159 spin_lock_init(&xprt->xpt_lock);
161 set_bit(XPT_BUSY, &xprt->xpt_flags); 160 set_bit(XPT_BUSY, &xprt->xpt_flags);
162 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); 161 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
162 xprt->xpt_net = get_net(&init_net);
163} 163}
164EXPORT_SYMBOL_GPL(svc_xprt_init); 164EXPORT_SYMBOL_GPL(svc_xprt_init);
165 165
166static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, 166static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
167 struct svc_serv *serv, 167 struct svc_serv *serv,
168 struct net *net,
168 const int family, 169 const int family,
169 const unsigned short port, 170 const unsigned short port,
170 int flags) 171 int flags)
@@ -199,12 +200,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
199 return ERR_PTR(-EAFNOSUPPORT); 200 return ERR_PTR(-EAFNOSUPPORT);
200 } 201 }
201 202
202 return xcl->xcl_ops->xpo_create(serv, sap, len, flags); 203 return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
203} 204}
204 205
205int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, 206int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
206 const int family, const unsigned short port, 207 struct net *net, const int family,
207 int flags) 208 const unsigned short port, int flags)
208{ 209{
209 struct svc_xprt_class *xcl; 210 struct svc_xprt_class *xcl;
210 211
@@ -220,7 +221,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
220 goto err; 221 goto err;
221 222
222 spin_unlock(&svc_xprt_class_lock); 223 spin_unlock(&svc_xprt_class_lock);
223 newxprt = __svc_xpo_create(xcl, serv, family, port, flags); 224 newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags);
224 if (IS_ERR(newxprt)) { 225 if (IS_ERR(newxprt)) {
225 module_put(xcl->xcl_owner); 226 module_put(xcl->xcl_owner);
226 return PTR_ERR(newxprt); 227 return PTR_ERR(newxprt);
@@ -329,12 +330,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
329 "svc_xprt_enqueue: " 330 "svc_xprt_enqueue: "
330 "threads and transports both waiting??\n"); 331 "threads and transports both waiting??\n");
331 332
332 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
333 /* Don't enqueue dead transports */
334 dprintk("svc: transport %p is dead, not enqueued\n", xprt);
335 goto out_unlock;
336 }
337
338 pool->sp_stats.packets++; 333 pool->sp_stats.packets++;
339 334
340 /* Mark transport as busy. It will remain in this state until 335 /* Mark transport as busy. It will remain in this state until
@@ -651,6 +646,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
651 if (signalled() || kthread_should_stop()) 646 if (signalled() || kthread_should_stop())
652 return -EINTR; 647 return -EINTR;
653 648
649 /* Normally we will wait up to 5 seconds for any required
650 * cache information to be provided.
651 */
652 rqstp->rq_chandle.thread_wait = 5*HZ;
653
654 spin_lock_bh(&pool->sp_lock); 654 spin_lock_bh(&pool->sp_lock);
655 xprt = svc_xprt_dequeue(pool); 655 xprt = svc_xprt_dequeue(pool);
656 if (xprt) { 656 if (xprt) {
@@ -658,6 +658,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
658 svc_xprt_get(xprt); 658 svc_xprt_get(xprt);
659 rqstp->rq_reserved = serv->sv_max_mesg; 659 rqstp->rq_reserved = serv->sv_max_mesg;
660 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); 660 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
661
662 /* As there is a shortage of threads and this request
663 * had to be queued, don't allow the thread to wait so
664 * long for cache updates.
665 */
666 rqstp->rq_chandle.thread_wait = 1*HZ;
661 } else { 667 } else {
662 /* No data pending. Go to sleep */ 668 /* No data pending. Go to sleep */
663 svc_thread_enqueue(pool, rqstp); 669 svc_thread_enqueue(pool, rqstp);
@@ -868,6 +874,19 @@ static void svc_age_temp_xprts(unsigned long closure)
868 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); 874 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
869} 875}
870 876
877static void call_xpt_users(struct svc_xprt *xprt)
878{
879 struct svc_xpt_user *u;
880
881 spin_lock(&xprt->xpt_lock);
882 while (!list_empty(&xprt->xpt_users)) {
883 u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list);
884 list_del(&u->list);
885 u->callback(u);
886 }
887 spin_unlock(&xprt->xpt_lock);
888}
889
871/* 890/*
872 * Remove a dead transport 891 * Remove a dead transport
873 */ 892 */
@@ -878,7 +897,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
878 897
879 /* Only do this once */ 898 /* Only do this once */
880 if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) 899 if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
881 return; 900 BUG();
882 901
883 dprintk("svc: svc_delete_xprt(%p)\n", xprt); 902 dprintk("svc: svc_delete_xprt(%p)\n", xprt);
884 xprt->xpt_ops->xpo_detach(xprt); 903 xprt->xpt_ops->xpo_detach(xprt);
@@ -900,6 +919,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
900 while ((dr = svc_deferred_dequeue(xprt)) != NULL) 919 while ((dr = svc_deferred_dequeue(xprt)) != NULL)
901 kfree(dr); 920 kfree(dr);
902 921
922 call_xpt_users(xprt);
903 svc_xprt_put(xprt); 923 svc_xprt_put(xprt);
904} 924}
905 925
@@ -910,10 +930,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
910 /* someone else will have to effect the close */ 930 /* someone else will have to effect the close */
911 return; 931 return;
912 932
913 svc_xprt_get(xprt);
914 svc_delete_xprt(xprt); 933 svc_delete_xprt(xprt);
915 clear_bit(XPT_BUSY, &xprt->xpt_flags);
916 svc_xprt_put(xprt);
917} 934}
918EXPORT_SYMBOL_GPL(svc_close_xprt); 935EXPORT_SYMBOL_GPL(svc_close_xprt);
919 936
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 207311610988..560677d187f1 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -18,6 +18,8 @@
18 18
19#include <linux/sunrpc/clnt.h> 19#include <linux/sunrpc/clnt.h>
20 20
21#include "netns.h"
22
21/* 23/*
22 * AUTHUNIX and AUTHNULL credentials are both handled here. 24 * AUTHUNIX and AUTHNULL credentials are both handled here.
23 * AUTHNULL is treated just like AUTHUNIX except that the uid/gid 25 * AUTHNULL is treated just like AUTHUNIX except that the uid/gid
@@ -92,7 +94,6 @@ struct ip_map {
92 struct unix_domain *m_client; 94 struct unix_domain *m_client;
93 int m_add_change; 95 int m_add_change;
94}; 96};
95static struct cache_head *ip_table[IP_HASHMAX];
96 97
97static void ip_map_put(struct kref *kref) 98static void ip_map_put(struct kref *kref)
98{ 99{
@@ -178,8 +179,8 @@ static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
178 return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); 179 return sunrpc_cache_pipe_upcall(cd, h, ip_map_request);
179} 180}
180 181
181static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); 182static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
182static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); 183static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
183 184
184static int ip_map_parse(struct cache_detail *cd, 185static int ip_map_parse(struct cache_detail *cd,
185 char *mesg, int mlen) 186 char *mesg, int mlen)
@@ -219,10 +220,9 @@ static int ip_map_parse(struct cache_detail *cd,
219 switch (address.sa.sa_family) { 220 switch (address.sa.sa_family) {
220 case AF_INET: 221 case AF_INET:
221 /* Form a mapped IPv4 address in sin6 */ 222 /* Form a mapped IPv4 address in sin6 */
222 memset(&sin6, 0, sizeof(sin6));
223 sin6.sin6_family = AF_INET6; 223 sin6.sin6_family = AF_INET6;
224 sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); 224 ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
225 sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr; 225 &sin6.sin6_addr);
226 break; 226 break;
227#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 227#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
228 case AF_INET6: 228 case AF_INET6:
@@ -249,9 +249,9 @@ static int ip_map_parse(struct cache_detail *cd,
249 dom = NULL; 249 dom = NULL;
250 250
251 /* IPv6 scope IDs are ignored for now */ 251 /* IPv6 scope IDs are ignored for now */
252 ipmp = ip_map_lookup(class, &sin6.sin6_addr); 252 ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr);
253 if (ipmp) { 253 if (ipmp) {
254 err = ip_map_update(ipmp, 254 err = __ip_map_update(cd, ipmp,
255 container_of(dom, struct unix_domain, h), 255 container_of(dom, struct unix_domain, h),
256 expiry); 256 expiry);
257 } else 257 } else
@@ -294,29 +294,15 @@ static int ip_map_show(struct seq_file *m,
294} 294}
295 295
296 296
297struct cache_detail ip_map_cache = { 297static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
298 .owner = THIS_MODULE, 298 struct in6_addr *addr)
299 .hash_size = IP_HASHMAX,
300 .hash_table = ip_table,
301 .name = "auth.unix.ip",
302 .cache_put = ip_map_put,
303 .cache_upcall = ip_map_upcall,
304 .cache_parse = ip_map_parse,
305 .cache_show = ip_map_show,
306 .match = ip_map_match,
307 .init = ip_map_init,
308 .update = update,
309 .alloc = ip_map_alloc,
310};
311
312static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
313{ 299{
314 struct ip_map ip; 300 struct ip_map ip;
315 struct cache_head *ch; 301 struct cache_head *ch;
316 302
317 strcpy(ip.m_class, class); 303 strcpy(ip.m_class, class);
318 ipv6_addr_copy(&ip.m_addr, addr); 304 ipv6_addr_copy(&ip.m_addr, addr);
319 ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, 305 ch = sunrpc_cache_lookup(cd, &ip.h,
320 hash_str(class, IP_HASHBITS) ^ 306 hash_str(class, IP_HASHBITS) ^
321 hash_ip6(*addr)); 307 hash_ip6(*addr));
322 308
@@ -326,7 +312,17 @@ static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
326 return NULL; 312 return NULL;
327} 313}
328 314
329static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) 315static inline struct ip_map *ip_map_lookup(struct net *net, char *class,
316 struct in6_addr *addr)
317{
318 struct sunrpc_net *sn;
319
320 sn = net_generic(net, sunrpc_net_id);
321 return __ip_map_lookup(sn->ip_map_cache, class, addr);
322}
323
324static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
325 struct unix_domain *udom, time_t expiry)
330{ 326{
331 struct ip_map ip; 327 struct ip_map ip;
332 struct cache_head *ch; 328 struct cache_head *ch;
@@ -344,17 +340,25 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
344 ip.m_add_change++; 340 ip.m_add_change++;
345 } 341 }
346 ip.h.expiry_time = expiry; 342 ip.h.expiry_time = expiry;
347 ch = sunrpc_cache_update(&ip_map_cache, 343 ch = sunrpc_cache_update(cd, &ip.h, &ipm->h,
348 &ip.h, &ipm->h,
349 hash_str(ipm->m_class, IP_HASHBITS) ^ 344 hash_str(ipm->m_class, IP_HASHBITS) ^
350 hash_ip6(ipm->m_addr)); 345 hash_ip6(ipm->m_addr));
351 if (!ch) 346 if (!ch)
352 return -ENOMEM; 347 return -ENOMEM;
353 cache_put(ch, &ip_map_cache); 348 cache_put(ch, cd);
354 return 0; 349 return 0;
355} 350}
356 351
357int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) 352static inline int ip_map_update(struct net *net, struct ip_map *ipm,
353 struct unix_domain *udom, time_t expiry)
354{
355 struct sunrpc_net *sn;
356
357 sn = net_generic(net, sunrpc_net_id);
358 return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
359}
360
361int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom)
358{ 362{
359 struct unix_domain *udom; 363 struct unix_domain *udom;
360 struct ip_map *ipmp; 364 struct ip_map *ipmp;
@@ -362,10 +366,10 @@ int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom)
362 if (dom->flavour != &svcauth_unix) 366 if (dom->flavour != &svcauth_unix)
363 return -EINVAL; 367 return -EINVAL;
364 udom = container_of(dom, struct unix_domain, h); 368 udom = container_of(dom, struct unix_domain, h);
365 ipmp = ip_map_lookup("nfsd", addr); 369 ipmp = ip_map_lookup(net, "nfsd", addr);
366 370
367 if (ipmp) 371 if (ipmp)
368 return ip_map_update(ipmp, udom, NEVER); 372 return ip_map_update(net, ipmp, udom, NEVER);
369 else 373 else
370 return -ENOMEM; 374 return -ENOMEM;
371} 375}
@@ -383,16 +387,18 @@ int auth_unix_forget_old(struct auth_domain *dom)
383} 387}
384EXPORT_SYMBOL_GPL(auth_unix_forget_old); 388EXPORT_SYMBOL_GPL(auth_unix_forget_old);
385 389
386struct auth_domain *auth_unix_lookup(struct in6_addr *addr) 390struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr)
387{ 391{
388 struct ip_map *ipm; 392 struct ip_map *ipm;
389 struct auth_domain *rv; 393 struct auth_domain *rv;
394 struct sunrpc_net *sn;
390 395
391 ipm = ip_map_lookup("nfsd", addr); 396 sn = net_generic(net, sunrpc_net_id);
397 ipm = ip_map_lookup(net, "nfsd", addr);
392 398
393 if (!ipm) 399 if (!ipm)
394 return NULL; 400 return NULL;
395 if (cache_check(&ip_map_cache, &ipm->h, NULL)) 401 if (cache_check(sn->ip_map_cache, &ipm->h, NULL))
396 return NULL; 402 return NULL;
397 403
398 if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { 404 if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) {
@@ -403,22 +409,29 @@ struct auth_domain *auth_unix_lookup(struct in6_addr *addr)
403 rv = &ipm->m_client->h; 409 rv = &ipm->m_client->h;
404 kref_get(&rv->ref); 410 kref_get(&rv->ref);
405 } 411 }
406 cache_put(&ipm->h, &ip_map_cache); 412 cache_put(&ipm->h, sn->ip_map_cache);
407 return rv; 413 return rv;
408} 414}
409EXPORT_SYMBOL_GPL(auth_unix_lookup); 415EXPORT_SYMBOL_GPL(auth_unix_lookup);
410 416
411void svcauth_unix_purge(void) 417void svcauth_unix_purge(void)
412{ 418{
413 cache_purge(&ip_map_cache); 419 struct net *net;
420
421 for_each_net(net) {
422 struct sunrpc_net *sn;
423
424 sn = net_generic(net, sunrpc_net_id);
425 cache_purge(sn->ip_map_cache);
426 }
414} 427}
415EXPORT_SYMBOL_GPL(svcauth_unix_purge); 428EXPORT_SYMBOL_GPL(svcauth_unix_purge);
416 429
417static inline struct ip_map * 430static inline struct ip_map *
418ip_map_cached_get(struct svc_rqst *rqstp) 431ip_map_cached_get(struct svc_xprt *xprt)
419{ 432{
420 struct ip_map *ipm = NULL; 433 struct ip_map *ipm = NULL;
421 struct svc_xprt *xprt = rqstp->rq_xprt; 434 struct sunrpc_net *sn;
422 435
423 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { 436 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
424 spin_lock(&xprt->xpt_lock); 437 spin_lock(&xprt->xpt_lock);
@@ -430,9 +443,10 @@ ip_map_cached_get(struct svc_rqst *rqstp)
430 * remembered, e.g. by a second mount from the 443 * remembered, e.g. by a second mount from the
431 * same IP address. 444 * same IP address.
432 */ 445 */
446 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
433 xprt->xpt_auth_cache = NULL; 447 xprt->xpt_auth_cache = NULL;
434 spin_unlock(&xprt->xpt_lock); 448 spin_unlock(&xprt->xpt_lock);
435 cache_put(&ipm->h, &ip_map_cache); 449 cache_put(&ipm->h, sn->ip_map_cache);
436 return NULL; 450 return NULL;
437 } 451 }
438 cache_get(&ipm->h); 452 cache_get(&ipm->h);
@@ -443,10 +457,8 @@ ip_map_cached_get(struct svc_rqst *rqstp)
443} 457}
444 458
445static inline void 459static inline void
446ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) 460ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm)
447{ 461{
448 struct svc_xprt *xprt = rqstp->rq_xprt;
449
450 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { 462 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
451 spin_lock(&xprt->xpt_lock); 463 spin_lock(&xprt->xpt_lock);
452 if (xprt->xpt_auth_cache == NULL) { 464 if (xprt->xpt_auth_cache == NULL) {
@@ -456,15 +468,26 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
456 } 468 }
457 spin_unlock(&xprt->xpt_lock); 469 spin_unlock(&xprt->xpt_lock);
458 } 470 }
459 if (ipm) 471 if (ipm) {
460 cache_put(&ipm->h, &ip_map_cache); 472 struct sunrpc_net *sn;
473
474 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
475 cache_put(&ipm->h, sn->ip_map_cache);
476 }
461} 477}
462 478
463void 479void
464svcauth_unix_info_release(void *info) 480svcauth_unix_info_release(struct svc_xprt *xpt)
465{ 481{
466 struct ip_map *ipm = info; 482 struct ip_map *ipm;
467 cache_put(&ipm->h, &ip_map_cache); 483
484 ipm = xpt->xpt_auth_cache;
485 if (ipm != NULL) {
486 struct sunrpc_net *sn;
487
488 sn = net_generic(xpt->xpt_net, sunrpc_net_id);
489 cache_put(&ipm->h, sn->ip_map_cache);
490 }
468} 491}
469 492
470/**************************************************************************** 493/****************************************************************************
@@ -674,6 +697,8 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
674 switch (ret) { 697 switch (ret) {
675 case -ENOENT: 698 case -ENOENT:
676 return ERR_PTR(-ENOENT); 699 return ERR_PTR(-ENOENT);
700 case -ETIMEDOUT:
701 return ERR_PTR(-ESHUTDOWN);
677 case 0: 702 case 0:
678 gi = get_group_info(ug->gi); 703 gi = get_group_info(ug->gi);
679 cache_put(&ug->h, &unix_gid_cache); 704 cache_put(&ug->h, &unix_gid_cache);
@@ -691,6 +716,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
691 struct ip_map *ipm; 716 struct ip_map *ipm;
692 struct group_info *gi; 717 struct group_info *gi;
693 struct svc_cred *cred = &rqstp->rq_cred; 718 struct svc_cred *cred = &rqstp->rq_cred;
719 struct svc_xprt *xprt = rqstp->rq_xprt;
720 struct net *net = xprt->xpt_net;
721 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
694 722
695 switch (rqstp->rq_addr.ss_family) { 723 switch (rqstp->rq_addr.ss_family) {
696 case AF_INET: 724 case AF_INET:
@@ -709,26 +737,27 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
709 if (rqstp->rq_proc == 0) 737 if (rqstp->rq_proc == 0)
710 return SVC_OK; 738 return SVC_OK;
711 739
712 ipm = ip_map_cached_get(rqstp); 740 ipm = ip_map_cached_get(xprt);
713 if (ipm == NULL) 741 if (ipm == NULL)
714 ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, 742 ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
715 &sin6->sin6_addr); 743 &sin6->sin6_addr);
716 744
717 if (ipm == NULL) 745 if (ipm == NULL)
718 return SVC_DENIED; 746 return SVC_DENIED;
719 747
720 switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { 748 switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
721 default: 749 default:
722 BUG(); 750 BUG();
723 case -EAGAIN:
724 case -ETIMEDOUT: 751 case -ETIMEDOUT:
752 return SVC_CLOSE;
753 case -EAGAIN:
725 return SVC_DROP; 754 return SVC_DROP;
726 case -ENOENT: 755 case -ENOENT:
727 return SVC_DENIED; 756 return SVC_DENIED;
728 case 0: 757 case 0:
729 rqstp->rq_client = &ipm->m_client->h; 758 rqstp->rq_client = &ipm->m_client->h;
730 kref_get(&rqstp->rq_client->ref); 759 kref_get(&rqstp->rq_client->ref);
731 ip_map_cached_put(rqstp, ipm); 760 ip_map_cached_put(xprt, ipm);
732 break; 761 break;
733 } 762 }
734 763
@@ -736,6 +765,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
736 switch (PTR_ERR(gi)) { 765 switch (PTR_ERR(gi)) {
737 case -EAGAIN: 766 case -EAGAIN:
738 return SVC_DROP; 767 return SVC_DROP;
768 case -ESHUTDOWN:
769 return SVC_CLOSE;
739 case -ENOENT: 770 case -ENOENT:
740 break; 771 break;
741 default: 772 default:
@@ -776,7 +807,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
776 cred->cr_gid = (gid_t) -1; 807 cred->cr_gid = (gid_t) -1;
777 cred->cr_group_info = groups_alloc(0); 808 cred->cr_group_info = groups_alloc(0);
778 if (cred->cr_group_info == NULL) 809 if (cred->cr_group_info == NULL)
779 return SVC_DROP; /* kmalloc failure - client must retry */ 810 return SVC_CLOSE; /* kmalloc failure - client must retry */
780 811
781 /* Put NULL verifier */ 812 /* Put NULL verifier */
782 svc_putnl(resv, RPC_AUTH_NULL); 813 svc_putnl(resv, RPC_AUTH_NULL);
@@ -840,7 +871,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
840 goto badcred; 871 goto badcred;
841 cred->cr_group_info = groups_alloc(slen); 872 cred->cr_group_info = groups_alloc(slen);
842 if (cred->cr_group_info == NULL) 873 if (cred->cr_group_info == NULL)
843 return SVC_DROP; 874 return SVC_CLOSE;
844 for (i = 0; i < slen; i++) 875 for (i = 0; i < slen; i++)
845 GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); 876 GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
846 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { 877 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
@@ -886,3 +917,56 @@ struct auth_ops svcauth_unix = {
886 .set_client = svcauth_unix_set_client, 917 .set_client = svcauth_unix_set_client,
887}; 918};
888 919
920int ip_map_cache_create(struct net *net)
921{
922 int err = -ENOMEM;
923 struct cache_detail *cd;
924 struct cache_head **tbl;
925 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
926
927 cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
928 if (cd == NULL)
929 goto err_cd;
930
931 tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL);
932 if (tbl == NULL)
933 goto err_tbl;
934
935 cd->owner = THIS_MODULE,
936 cd->hash_size = IP_HASHMAX,
937 cd->hash_table = tbl,
938 cd->name = "auth.unix.ip",
939 cd->cache_put = ip_map_put,
940 cd->cache_upcall = ip_map_upcall,
941 cd->cache_parse = ip_map_parse,
942 cd->cache_show = ip_map_show,
943 cd->match = ip_map_match,
944 cd->init = ip_map_init,
945 cd->update = update,
946 cd->alloc = ip_map_alloc,
947
948 err = cache_register_net(cd, net);
949 if (err)
950 goto err_reg;
951
952 sn->ip_map_cache = cd;
953 return 0;
954
955err_reg:
956 kfree(tbl);
957err_tbl:
958 kfree(cd);
959err_cd:
960 return err;
961}
962
963void ip_map_cache_destroy(struct net *net)
964{
965 struct sunrpc_net *sn;
966
967 sn = net_generic(net, sunrpc_net_id);
968 cache_purge(sn->ip_map_cache);
969 cache_unregister_net(sn->ip_map_cache, net);
970 kfree(sn->ip_map_cache->hash_table);
971 kfree(sn->ip_map_cache);
972}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7e534dd09077..07919e16be3e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -64,7 +64,8 @@ static void svc_tcp_sock_detach(struct svc_xprt *);
64static void svc_sock_free(struct svc_xprt *); 64static void svc_sock_free(struct svc_xprt *);
65 65
66static struct svc_xprt *svc_create_socket(struct svc_serv *, int, 66static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
67 struct sockaddr *, int, int); 67 struct net *, struct sockaddr *,
68 int, int);
68#ifdef CONFIG_DEBUG_LOCK_ALLOC 69#ifdef CONFIG_DEBUG_LOCK_ALLOC
69static struct lock_class_key svc_key[2]; 70static struct lock_class_key svc_key[2];
70static struct lock_class_key svc_slock_key[2]; 71static struct lock_class_key svc_slock_key[2];
@@ -657,10 +658,11 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
657} 658}
658 659
659static struct svc_xprt *svc_udp_create(struct svc_serv *serv, 660static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
661 struct net *net,
660 struct sockaddr *sa, int salen, 662 struct sockaddr *sa, int salen,
661 int flags) 663 int flags)
662{ 664{
663 return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); 665 return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
664} 666}
665 667
666static struct svc_xprt_ops svc_udp_ops = { 668static struct svc_xprt_ops svc_udp_ops = {
@@ -1133,9 +1135,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
1133 reclen = htonl(0x80000000|((xbufp->len ) - 4)); 1135 reclen = htonl(0x80000000|((xbufp->len ) - 4));
1134 memcpy(xbufp->head[0].iov_base, &reclen, 4); 1136 memcpy(xbufp->head[0].iov_base, &reclen, 4);
1135 1137
1136 if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags))
1137 return -ENOTCONN;
1138
1139 sent = svc_sendto(rqstp, &rqstp->rq_res); 1138 sent = svc_sendto(rqstp, &rqstp->rq_res);
1140 if (sent != xbufp->len) { 1139 if (sent != xbufp->len) {
1141 printk(KERN_NOTICE 1140 printk(KERN_NOTICE
@@ -1178,10 +1177,11 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
1178} 1177}
1179 1178
1180static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, 1179static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
1180 struct net *net,
1181 struct sockaddr *sa, int salen, 1181 struct sockaddr *sa, int salen,
1182 int flags) 1182 int flags)
1183{ 1183{
1184 return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); 1184 return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
1185} 1185}
1186 1186
1187static struct svc_xprt_ops svc_tcp_ops = { 1187static struct svc_xprt_ops svc_tcp_ops = {
@@ -1258,19 +1258,13 @@ void svc_sock_update_bufs(struct svc_serv *serv)
1258 * The number of server threads has changed. Update 1258 * The number of server threads has changed. Update
1259 * rcvbuf and sndbuf accordingly on all sockets 1259 * rcvbuf and sndbuf accordingly on all sockets
1260 */ 1260 */
1261 struct list_head *le; 1261 struct svc_sock *svsk;
1262 1262
1263 spin_lock_bh(&serv->sv_lock); 1263 spin_lock_bh(&serv->sv_lock);
1264 list_for_each(le, &serv->sv_permsocks) { 1264 list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list)
1265 struct svc_sock *svsk =
1266 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1267 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1265 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1268 } 1266 list_for_each_entry(svsk, &serv->sv_tempsocks, sk_xprt.xpt_list)
1269 list_for_each(le, &serv->sv_tempsocks) {
1270 struct svc_sock *svsk =
1271 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1272 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1267 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1273 }
1274 spin_unlock_bh(&serv->sv_lock); 1268 spin_unlock_bh(&serv->sv_lock);
1275} 1269}
1276EXPORT_SYMBOL_GPL(svc_sock_update_bufs); 1270EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
@@ -1385,6 +1379,7 @@ EXPORT_SYMBOL_GPL(svc_addsock);
1385 */ 1379 */
1386static struct svc_xprt *svc_create_socket(struct svc_serv *serv, 1380static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1387 int protocol, 1381 int protocol,
1382 struct net *net,
1388 struct sockaddr *sin, int len, 1383 struct sockaddr *sin, int len,
1389 int flags) 1384 int flags)
1390{ 1385{
@@ -1421,7 +1416,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1421 return ERR_PTR(-EINVAL); 1416 return ERR_PTR(-EINVAL);
1422 } 1417 }
1423 1418
1424 error = sock_create_kern(family, type, protocol, &sock); 1419 error = __sock_create(net, family, type, protocol, &sock, 1);
1425 if (error < 0) 1420 if (error < 0)
1426 return ERR_PTR(error); 1421 return ERR_PTR(error);
1427 1422
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 970fb00f388c..4c8f18aff7c3 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -199,8 +199,6 @@ int xprt_reserve_xprt(struct rpc_task *task)
199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
200 if (task == xprt->snd_task) 200 if (task == xprt->snd_task)
201 return 1; 201 return 1;
202 if (task == NULL)
203 return 0;
204 goto out_sleep; 202 goto out_sleep;
205 } 203 }
206 xprt->snd_task = task; 204 xprt->snd_task = task;
@@ -757,13 +755,11 @@ static void xprt_connect_status(struct rpc_task *task)
757 */ 755 */
758struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) 756struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
759{ 757{
760 struct list_head *pos; 758 struct rpc_rqst *entry;
761 759
762 list_for_each(pos, &xprt->recv) { 760 list_for_each_entry(entry, &xprt->recv, rq_list)
763 struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
764 if (entry->rq_xid == xid) 761 if (entry->rq_xid == xid)
765 return entry; 762 return entry;
766 }
767 763
768 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", 764 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n",
769 ntohl(xid)); 765 ntohl(xid));
@@ -962,6 +958,37 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
962 spin_unlock(&xprt->reserve_lock); 958 spin_unlock(&xprt->reserve_lock);
963} 959}
964 960
961struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
962{
963 struct rpc_xprt *xprt;
964
965 xprt = kzalloc(size, GFP_KERNEL);
966 if (xprt == NULL)
967 goto out;
968
969 xprt->max_reqs = max_req;
970 xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
971 if (xprt->slot == NULL)
972 goto out_free;
973
974 xprt->xprt_net = get_net(net);
975 return xprt;
976
977out_free:
978 kfree(xprt);
979out:
980 return NULL;
981}
982EXPORT_SYMBOL_GPL(xprt_alloc);
983
984void xprt_free(struct rpc_xprt *xprt)
985{
986 put_net(xprt->xprt_net);
987 kfree(xprt->slot);
988 kfree(xprt);
989}
990EXPORT_SYMBOL_GPL(xprt_free);
991
965/** 992/**
966 * xprt_reserve - allocate an RPC request slot 993 * xprt_reserve - allocate an RPC request slot
967 * @task: RPC task requesting a slot allocation 994 * @task: RPC task requesting a slot allocation
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index d718b8fa9525..09af4fab1a45 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -43,6 +43,7 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/fs.h> 44#include <linux/fs.h>
45#include <linux/sysctl.h> 45#include <linux/sysctl.h>
46#include <linux/workqueue.h>
46#include <linux/sunrpc/clnt.h> 47#include <linux/sunrpc/clnt.h>
47#include <linux/sunrpc/sched.h> 48#include <linux/sunrpc/sched.h>
48#include <linux/sunrpc/svc_rdma.h> 49#include <linux/sunrpc/svc_rdma.h>
@@ -74,6 +75,8 @@ atomic_t rdma_stat_sq_prod;
74struct kmem_cache *svc_rdma_map_cachep; 75struct kmem_cache *svc_rdma_map_cachep;
75struct kmem_cache *svc_rdma_ctxt_cachep; 76struct kmem_cache *svc_rdma_ctxt_cachep;
76 77
78struct workqueue_struct *svc_rdma_wq;
79
77/* 80/*
78 * This function implements reading and resetting an atomic_t stat 81 * This function implements reading and resetting an atomic_t stat
79 * variable through read/write to a proc file. Any write to the file 82 * variable through read/write to a proc file. Any write to the file
@@ -231,7 +234,7 @@ static ctl_table svcrdma_root_table[] = {
231void svc_rdma_cleanup(void) 234void svc_rdma_cleanup(void)
232{ 235{
233 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); 236 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
234 flush_scheduled_work(); 237 destroy_workqueue(svc_rdma_wq);
235 if (svcrdma_table_header) { 238 if (svcrdma_table_header) {
236 unregister_sysctl_table(svcrdma_table_header); 239 unregister_sysctl_table(svcrdma_table_header);
237 svcrdma_table_header = NULL; 240 svcrdma_table_header = NULL;
@@ -249,6 +252,11 @@ int svc_rdma_init(void)
249 dprintk("\tsq_depth : %d\n", 252 dprintk("\tsq_depth : %d\n",
250 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); 253 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
251 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); 254 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
255
256 svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
257 if (!svc_rdma_wq)
258 return -ENOMEM;
259
252 if (!svcrdma_table_header) 260 if (!svcrdma_table_header)
253 svcrdma_table_header = 261 svcrdma_table_header =
254 register_sysctl_table(svcrdma_root_table); 262 register_sysctl_table(svcrdma_root_table);
@@ -283,6 +291,7 @@ int svc_rdma_init(void)
283 kmem_cache_destroy(svc_rdma_map_cachep); 291 kmem_cache_destroy(svc_rdma_map_cachep);
284 err0: 292 err0:
285 unregister_sysctl_table(svcrdma_table_header); 293 unregister_sysctl_table(svcrdma_table_header);
294 destroy_workqueue(svc_rdma_wq);
286 return -ENOMEM; 295 return -ENOMEM;
287} 296}
288MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); 297MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 0194de814933..df67211c4baf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -263,9 +263,9 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; 263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) { 264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
265 frmr->page_list->page_list[page_no] = 265 frmr->page_list->page_list[page_no] =
266 ib_dma_map_single(xprt->sc_cm_id->device, 266 ib_dma_map_page(xprt->sc_cm_id->device,
267 page_address(rqstp->rq_arg.pages[page_no]), 267 rqstp->rq_arg.pages[page_no], 0,
268 PAGE_SIZE, DMA_FROM_DEVICE); 268 PAGE_SIZE, DMA_FROM_DEVICE);
269 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 269 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
270 frmr->page_list->page_list[page_no])) 270 frmr->page_list->page_list[page_no]))
271 goto fatal_err; 271 goto fatal_err;
@@ -309,17 +309,21 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
309 int count) 309 int count)
310{ 310{
311 int i; 311 int i;
312 unsigned long off;
312 313
313 ctxt->count = count; 314 ctxt->count = count;
314 ctxt->direction = DMA_FROM_DEVICE; 315 ctxt->direction = DMA_FROM_DEVICE;
315 for (i = 0; i < count; i++) { 316 for (i = 0; i < count; i++) {
316 ctxt->sge[i].length = 0; /* in case map fails */ 317 ctxt->sge[i].length = 0; /* in case map fails */
317 if (!frmr) { 318 if (!frmr) {
319 BUG_ON(0 == virt_to_page(vec[i].iov_base));
320 off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
318 ctxt->sge[i].addr = 321 ctxt->sge[i].addr =
319 ib_dma_map_single(xprt->sc_cm_id->device, 322 ib_dma_map_page(xprt->sc_cm_id->device,
320 vec[i].iov_base, 323 virt_to_page(vec[i].iov_base),
321 vec[i].iov_len, 324 off,
322 DMA_FROM_DEVICE); 325 vec[i].iov_len,
326 DMA_FROM_DEVICE);
323 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 327 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
324 ctxt->sge[i].addr)) 328 ctxt->sge[i].addr))
325 return -EINVAL; 329 return -EINVAL;
@@ -491,6 +495,7 @@ next_sge:
491 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", 495 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
492 err); 496 err);
493 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 497 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
498 svc_rdma_unmap_dma(ctxt);
494 svc_rdma_put_context(ctxt, 0); 499 svc_rdma_put_context(ctxt, 0);
495 goto out; 500 goto out;
496 } 501 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b15e1ebb2bfa..249a835b703f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -70,8 +70,8 @@
70 * on extra page for the RPCRMDA header. 70 * on extra page for the RPCRMDA header.
71 */ 71 */
72static int fast_reg_xdr(struct svcxprt_rdma *xprt, 72static int fast_reg_xdr(struct svcxprt_rdma *xprt,
73 struct xdr_buf *xdr, 73 struct xdr_buf *xdr,
74 struct svc_rdma_req_map *vec) 74 struct svc_rdma_req_map *vec)
75{ 75{
76 int sge_no; 76 int sge_no;
77 u32 sge_bytes; 77 u32 sge_bytes;
@@ -96,21 +96,25 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
96 vec->count = 2; 96 vec->count = 2;
97 sge_no++; 97 sge_no++;
98 98
99 /* Build the FRMR */ 99 /* Map the XDR head */
100 frmr->kva = frva; 100 frmr->kva = frva;
101 frmr->direction = DMA_TO_DEVICE; 101 frmr->direction = DMA_TO_DEVICE;
102 frmr->access_flags = 0; 102 frmr->access_flags = 0;
103 frmr->map_len = PAGE_SIZE; 103 frmr->map_len = PAGE_SIZE;
104 frmr->page_list_len = 1; 104 frmr->page_list_len = 1;
105 page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
105 frmr->page_list->page_list[page_no] = 106 frmr->page_list->page_list[page_no] =
106 ib_dma_map_single(xprt->sc_cm_id->device, 107 ib_dma_map_page(xprt->sc_cm_id->device,
107 (void *)xdr->head[0].iov_base, 108 virt_to_page(xdr->head[0].iov_base),
108 PAGE_SIZE, DMA_TO_DEVICE); 109 page_off,
110 PAGE_SIZE - page_off,
111 DMA_TO_DEVICE);
109 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 112 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
110 frmr->page_list->page_list[page_no])) 113 frmr->page_list->page_list[page_no]))
111 goto fatal_err; 114 goto fatal_err;
112 atomic_inc(&xprt->sc_dma_used); 115 atomic_inc(&xprt->sc_dma_used);
113 116
117 /* Map the XDR page list */
114 page_off = xdr->page_base; 118 page_off = xdr->page_base;
115 page_bytes = xdr->page_len + page_off; 119 page_bytes = xdr->page_len + page_off;
116 if (!page_bytes) 120 if (!page_bytes)
@@ -128,9 +132,9 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
128 page_bytes -= sge_bytes; 132 page_bytes -= sge_bytes;
129 133
130 frmr->page_list->page_list[page_no] = 134 frmr->page_list->page_list[page_no] =
131 ib_dma_map_single(xprt->sc_cm_id->device, 135 ib_dma_map_page(xprt->sc_cm_id->device,
132 page_address(page), 136 page, page_off,
133 PAGE_SIZE, DMA_TO_DEVICE); 137 sge_bytes, DMA_TO_DEVICE);
134 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 138 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
135 frmr->page_list->page_list[page_no])) 139 frmr->page_list->page_list[page_no]))
136 goto fatal_err; 140 goto fatal_err;
@@ -166,8 +170,10 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
166 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; 170 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
167 171
168 frmr->page_list->page_list[page_no] = 172 frmr->page_list->page_list[page_no] =
169 ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, 173 ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
170 DMA_TO_DEVICE); 174 page_off,
175 PAGE_SIZE,
176 DMA_TO_DEVICE);
171 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 177 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
172 frmr->page_list->page_list[page_no])) 178 frmr->page_list->page_list[page_no]))
173 goto fatal_err; 179 goto fatal_err;
@@ -245,6 +251,35 @@ static int map_xdr(struct svcxprt_rdma *xprt,
245 return 0; 251 return 0;
246} 252}
247 253
254static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
255 struct xdr_buf *xdr,
256 u32 xdr_off, size_t len, int dir)
257{
258 struct page *page;
259 dma_addr_t dma_addr;
260 if (xdr_off < xdr->head[0].iov_len) {
261 /* This offset is in the head */
262 xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
263 page = virt_to_page(xdr->head[0].iov_base);
264 } else {
265 xdr_off -= xdr->head[0].iov_len;
266 if (xdr_off < xdr->page_len) {
267 /* This offset is in the page list */
268 page = xdr->pages[xdr_off >> PAGE_SHIFT];
269 xdr_off &= ~PAGE_MASK;
270 } else {
271 /* This offset is in the tail */
272 xdr_off -= xdr->page_len;
273 xdr_off += (unsigned long)
274 xdr->tail[0].iov_base & ~PAGE_MASK;
275 page = virt_to_page(xdr->tail[0].iov_base);
276 }
277 }
278 dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
279 min_t(size_t, PAGE_SIZE, len), dir);
280 return dma_addr;
281}
282
248/* Assumptions: 283/* Assumptions:
249 * - We are using FRMR 284 * - We are using FRMR
250 * - or - 285 * - or -
@@ -293,10 +328,9 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
293 sge[sge_no].length = sge_bytes; 328 sge[sge_no].length = sge_bytes;
294 if (!vec->frmr) { 329 if (!vec->frmr) {
295 sge[sge_no].addr = 330 sge[sge_no].addr =
296 ib_dma_map_single(xprt->sc_cm_id->device, 331 dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
297 (void *) 332 sge_bytes, DMA_TO_DEVICE);
298 vec->sge[xdr_sge_no].iov_base + sge_off, 333 xdr_off += sge_bytes;
299 sge_bytes, DMA_TO_DEVICE);
300 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 334 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
301 sge[sge_no].addr)) 335 sge[sge_no].addr))
302 goto err; 336 goto err;
@@ -333,6 +367,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
333 goto err; 367 goto err;
334 return 0; 368 return 0;
335 err: 369 err:
370 svc_rdma_unmap_dma(ctxt);
371 svc_rdma_put_frmr(xprt, vec->frmr);
336 svc_rdma_put_context(ctxt, 0); 372 svc_rdma_put_context(ctxt, 0);
337 /* Fatal error, close transport */ 373 /* Fatal error, close transport */
338 return -EIO; 374 return -EIO;
@@ -494,7 +530,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
494 * In all three cases, this function prepares the RPCRDMA header in 530 * In all three cases, this function prepares the RPCRDMA header in
495 * sge[0], the 'type' parameter indicates the type to place in the 531 * sge[0], the 'type' parameter indicates the type to place in the
496 * RPCRDMA header, and the 'byte_count' field indicates how much of 532 * RPCRDMA header, and the 'byte_count' field indicates how much of
497 * the XDR to include in this RDMA_SEND. 533 * the XDR to include in this RDMA_SEND. NB: The offset of the payload
534 * to send is zero in the XDR.
498 */ 535 */
499static int send_reply(struct svcxprt_rdma *rdma, 536static int send_reply(struct svcxprt_rdma *rdma,
500 struct svc_rqst *rqstp, 537 struct svc_rqst *rqstp,
@@ -536,23 +573,24 @@ static int send_reply(struct svcxprt_rdma *rdma,
536 ctxt->sge[0].lkey = rdma->sc_dma_lkey; 573 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
537 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 574 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
538 ctxt->sge[0].addr = 575 ctxt->sge[0].addr =
539 ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), 576 ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
540 ctxt->sge[0].length, DMA_TO_DEVICE); 577 ctxt->sge[0].length, DMA_TO_DEVICE);
541 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) 578 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
542 goto err; 579 goto err;
543 atomic_inc(&rdma->sc_dma_used); 580 atomic_inc(&rdma->sc_dma_used);
544 581
545 ctxt->direction = DMA_TO_DEVICE; 582 ctxt->direction = DMA_TO_DEVICE;
546 583
547 /* Determine how many of our SGE are to be transmitted */ 584 /* Map the payload indicated by 'byte_count' */
548 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 585 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
586 int xdr_off = 0;
549 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 587 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
550 byte_count -= sge_bytes; 588 byte_count -= sge_bytes;
551 if (!vec->frmr) { 589 if (!vec->frmr) {
552 ctxt->sge[sge_no].addr = 590 ctxt->sge[sge_no].addr =
553 ib_dma_map_single(rdma->sc_cm_id->device, 591 dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
554 vec->sge[sge_no].iov_base, 592 sge_bytes, DMA_TO_DEVICE);
555 sge_bytes, DMA_TO_DEVICE); 593 xdr_off += sge_bytes;
556 if (ib_dma_mapping_error(rdma->sc_cm_id->device, 594 if (ib_dma_mapping_error(rdma->sc_cm_id->device,
557 ctxt->sge[sge_no].addr)) 595 ctxt->sge[sge_no].addr))
558 goto err; 596 goto err;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index edea15a54e51..9df1eadc912a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -45,6 +45,7 @@
45#include <linux/sched.h> 45#include <linux/sched.h>
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/spinlock.h> 47#include <linux/spinlock.h>
48#include <linux/workqueue.h>
48#include <rdma/ib_verbs.h> 49#include <rdma/ib_verbs.h>
49#include <rdma/rdma_cm.h> 50#include <rdma/rdma_cm.h>
50#include <linux/sunrpc/svc_rdma.h> 51#include <linux/sunrpc/svc_rdma.h>
@@ -52,6 +53,7 @@
52#define RPCDBG_FACILITY RPCDBG_SVCXPRT 53#define RPCDBG_FACILITY RPCDBG_SVCXPRT
53 54
54static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 55static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
56 struct net *net,
55 struct sockaddr *sa, int salen, 57 struct sockaddr *sa, int salen,
56 int flags); 58 int flags);
57static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); 59static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
@@ -89,6 +91,9 @@ struct svc_xprt_class svc_rdma_class = {
89/* WR context cache. Created in svc_rdma.c */ 91/* WR context cache. Created in svc_rdma.c */
90extern struct kmem_cache *svc_rdma_ctxt_cachep; 92extern struct kmem_cache *svc_rdma_ctxt_cachep;
91 93
94/* Workqueue created in svc_rdma.c */
95extern struct workqueue_struct *svc_rdma_wq;
96
92struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) 97struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
93{ 98{
94 struct svc_rdma_op_ctxt *ctxt; 99 struct svc_rdma_op_ctxt *ctxt;
@@ -120,7 +125,7 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
120 */ 125 */
121 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { 126 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
122 atomic_dec(&xprt->sc_dma_used); 127 atomic_dec(&xprt->sc_dma_used);
123 ib_dma_unmap_single(xprt->sc_cm_id->device, 128 ib_dma_unmap_page(xprt->sc_cm_id->device,
124 ctxt->sge[i].addr, 129 ctxt->sge[i].addr,
125 ctxt->sge[i].length, 130 ctxt->sge[i].length,
126 ctxt->direction); 131 ctxt->direction);
@@ -502,8 +507,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
502 BUG_ON(sge_no >= xprt->sc_max_sge); 507 BUG_ON(sge_no >= xprt->sc_max_sge);
503 page = svc_rdma_get_page(); 508 page = svc_rdma_get_page();
504 ctxt->pages[sge_no] = page; 509 ctxt->pages[sge_no] = page;
505 pa = ib_dma_map_single(xprt->sc_cm_id->device, 510 pa = ib_dma_map_page(xprt->sc_cm_id->device,
506 page_address(page), PAGE_SIZE, 511 page, 0, PAGE_SIZE,
507 DMA_FROM_DEVICE); 512 DMA_FROM_DEVICE);
508 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) 513 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
509 goto err_put_ctxt; 514 goto err_put_ctxt;
@@ -511,9 +516,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
511 ctxt->sge[sge_no].addr = pa; 516 ctxt->sge[sge_no].addr = pa;
512 ctxt->sge[sge_no].length = PAGE_SIZE; 517 ctxt->sge[sge_no].length = PAGE_SIZE;
513 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; 518 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
519 ctxt->count = sge_no + 1;
514 buflen += PAGE_SIZE; 520 buflen += PAGE_SIZE;
515 } 521 }
516 ctxt->count = sge_no;
517 recv_wr.next = NULL; 522 recv_wr.next = NULL;
518 recv_wr.sg_list = &ctxt->sge[0]; 523 recv_wr.sg_list = &ctxt->sge[0];
519 recv_wr.num_sge = ctxt->count; 524 recv_wr.num_sge = ctxt->count;
@@ -529,6 +534,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
529 return ret; 534 return ret;
530 535
531 err_put_ctxt: 536 err_put_ctxt:
537 svc_rdma_unmap_dma(ctxt);
532 svc_rdma_put_context(ctxt, 1); 538 svc_rdma_put_context(ctxt, 1);
533 return -ENOMEM; 539 return -ENOMEM;
534} 540}
@@ -670,6 +676,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
670 * Create a listening RDMA service endpoint. 676 * Create a listening RDMA service endpoint.
671 */ 677 */
672static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 678static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
679 struct net *net,
673 struct sockaddr *sa, int salen, 680 struct sockaddr *sa, int salen,
674 int flags) 681 int flags)
675{ 682{
@@ -798,8 +805,8 @@ static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
798 if (ib_dma_mapping_error(frmr->mr->device, addr)) 805 if (ib_dma_mapping_error(frmr->mr->device, addr))
799 continue; 806 continue;
800 atomic_dec(&xprt->sc_dma_used); 807 atomic_dec(&xprt->sc_dma_used);
801 ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, 808 ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
802 frmr->direction); 809 frmr->direction);
803 } 810 }
804} 811}
805 812
@@ -1184,7 +1191,7 @@ static void svc_rdma_free(struct svc_xprt *xprt)
1184 struct svcxprt_rdma *rdma = 1191 struct svcxprt_rdma *rdma =
1185 container_of(xprt, struct svcxprt_rdma, sc_xprt); 1192 container_of(xprt, struct svcxprt_rdma, sc_xprt);
1186 INIT_WORK(&rdma->sc_work, __svc_rdma_free); 1193 INIT_WORK(&rdma->sc_work, __svc_rdma_free);
1187 schedule_work(&rdma->sc_work); 1194 queue_work(svc_rdma_wq, &rdma->sc_work);
1188} 1195}
1189 1196
1190static int svc_rdma_has_wspace(struct svc_xprt *xprt) 1197static int svc_rdma_has_wspace(struct svc_xprt *xprt)
@@ -1274,7 +1281,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1274 atomic_read(&xprt->sc_sq_count) < 1281 atomic_read(&xprt->sc_sq_count) <
1275 xprt->sc_sq_depth); 1282 xprt->sc_sq_depth);
1276 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1283 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1277 return 0; 1284 return -ENOTCONN;
1278 continue; 1285 continue;
1279 } 1286 }
1280 /* Take a transport ref for each WR posted */ 1287 /* Take a transport ref for each WR posted */
@@ -1306,7 +1313,6 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1306 enum rpcrdma_errcode err) 1313 enum rpcrdma_errcode err)
1307{ 1314{
1308 struct ib_send_wr err_wr; 1315 struct ib_send_wr err_wr;
1309 struct ib_sge sge;
1310 struct page *p; 1316 struct page *p;
1311 struct svc_rdma_op_ctxt *ctxt; 1317 struct svc_rdma_op_ctxt *ctxt;
1312 u32 *va; 1318 u32 *va;
@@ -1319,26 +1325,27 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1319 /* XDR encode error */ 1325 /* XDR encode error */
1320 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1326 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1321 1327
1328 ctxt = svc_rdma_get_context(xprt);
1329 ctxt->direction = DMA_FROM_DEVICE;
1330 ctxt->count = 1;
1331 ctxt->pages[0] = p;
1332
1322 /* Prepare SGE for local address */ 1333 /* Prepare SGE for local address */
1323 sge.addr = ib_dma_map_single(xprt->sc_cm_id->device, 1334 ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
1324 page_address(p), PAGE_SIZE, DMA_FROM_DEVICE); 1335 p, 0, length, DMA_FROM_DEVICE);
1325 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { 1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1326 put_page(p); 1337 put_page(p);
1327 return; 1338 return;
1328 } 1339 }
1329 atomic_inc(&xprt->sc_dma_used); 1340 atomic_inc(&xprt->sc_dma_used);
1330 sge.lkey = xprt->sc_dma_lkey; 1341 ctxt->sge[0].lkey = xprt->sc_dma_lkey;
1331 sge.length = length; 1342 ctxt->sge[0].length = length;
1332
1333 ctxt = svc_rdma_get_context(xprt);
1334 ctxt->count = 1;
1335 ctxt->pages[0] = p;
1336 1343
1337 /* Prepare SEND WR */ 1344 /* Prepare SEND WR */
1338 memset(&err_wr, 0, sizeof err_wr); 1345 memset(&err_wr, 0, sizeof err_wr);
1339 ctxt->wr_op = IB_WR_SEND; 1346 ctxt->wr_op = IB_WR_SEND;
1340 err_wr.wr_id = (unsigned long)ctxt; 1347 err_wr.wr_id = (unsigned long)ctxt;
1341 err_wr.sg_list = &sge; 1348 err_wr.sg_list = ctxt->sge;
1342 err_wr.num_sge = 1; 1349 err_wr.num_sge = 1;
1343 err_wr.opcode = IB_WR_SEND; 1350 err_wr.opcode = IB_WR_SEND;
1344 err_wr.send_flags = IB_SEND_SIGNALED; 1351 err_wr.send_flags = IB_SEND_SIGNALED;
@@ -1348,9 +1355,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1348 if (ret) { 1355 if (ret) {
1349 dprintk("svcrdma: Error %d posting send for protocol error\n", 1356 dprintk("svcrdma: Error %d posting send for protocol error\n",
1350 ret); 1357 ret);
1351 ib_dma_unmap_single(xprt->sc_cm_id->device, 1358 svc_rdma_unmap_dma(ctxt);
1352 sge.addr, PAGE_SIZE,
1353 DMA_FROM_DEVICE);
1354 svc_rdma_put_context(ctxt, 1); 1359 svc_rdma_put_context(ctxt, 1);
1355 } 1360 }
1356} 1361}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a85e866a77f7..0867070bb5ca 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -237,8 +237,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
237 237
238 dprintk("RPC: %s: called\n", __func__); 238 dprintk("RPC: %s: called\n", __func__);
239 239
240 cancel_delayed_work(&r_xprt->rdma_connect); 240 cancel_delayed_work_sync(&r_xprt->rdma_connect);
241 flush_scheduled_work();
242 241
243 xprt_clear_connected(xprt); 242 xprt_clear_connected(xprt);
244 243
@@ -251,9 +250,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
251 250
252 xprt_rdma_free_addresses(xprt); 251 xprt_rdma_free_addresses(xprt);
253 252
254 kfree(xprt->slot); 253 xprt_free(xprt);
255 xprt->slot = NULL;
256 kfree(xprt);
257 254
258 dprintk("RPC: %s: returning\n", __func__); 255 dprintk("RPC: %s: returning\n", __func__);
259 256
@@ -285,23 +282,14 @@ xprt_setup_rdma(struct xprt_create *args)
285 return ERR_PTR(-EBADF); 282 return ERR_PTR(-EBADF);
286 } 283 }
287 284
288 xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); 285 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
286 xprt_rdma_slot_table_entries);
289 if (xprt == NULL) { 287 if (xprt == NULL) {
290 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", 288 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
291 __func__); 289 __func__);
292 return ERR_PTR(-ENOMEM); 290 return ERR_PTR(-ENOMEM);
293 } 291 }
294 292
295 xprt->max_reqs = xprt_rdma_slot_table_entries;
296 xprt->slot = kcalloc(xprt->max_reqs,
297 sizeof(struct rpc_rqst), GFP_KERNEL);
298 if (xprt->slot == NULL) {
299 dprintk("RPC: %s: couldn't allocate %d slots\n",
300 __func__, xprt->max_reqs);
301 kfree(xprt);
302 return ERR_PTR(-ENOMEM);
303 }
304
305 /* 60 second timeout, no retries */ 293 /* 60 second timeout, no retries */
306 xprt->timeout = &xprt_rdma_default_timeout; 294 xprt->timeout = &xprt_rdma_default_timeout;
307 xprt->bind_timeout = (60U * HZ); 295 xprt->bind_timeout = (60U * HZ);
@@ -410,8 +398,7 @@ out3:
410out2: 398out2:
411 rpcrdma_ia_close(&new_xprt->rx_ia); 399 rpcrdma_ia_close(&new_xprt->rx_ia);
412out1: 400out1:
413 kfree(xprt->slot); 401 xprt_free(xprt);
414 kfree(xprt);
415 return ERR_PTR(rc); 402 return ERR_PTR(rc);
416} 403}
417 404
@@ -460,7 +447,7 @@ xprt_rdma_connect(struct rpc_task *task)
460 } else { 447 } else {
461 schedule_delayed_work(&r_xprt->rdma_connect, 0); 448 schedule_delayed_work(&r_xprt->rdma_connect, 0);
462 if (!RPC_IS_ASYNC(task)) 449 if (!RPC_IS_ASYNC(task))
463 flush_scheduled_work(); 450 flush_delayed_work(&r_xprt->rdma_connect);
464 } 451 }
465} 452}
466 453
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fe9306bf10cc..dfcab5ac65af 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -774,8 +774,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
774 774
775 xs_close(xprt); 775 xs_close(xprt);
776 xs_free_peer_addresses(xprt); 776 xs_free_peer_addresses(xprt);
777 kfree(xprt->slot); 777 xprt_free(xprt);
778 kfree(xprt);
779 module_put(THIS_MODULE); 778 module_put(THIS_MODULE);
780} 779}
781 780
@@ -1516,7 +1515,7 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
1516 xs_update_peer_port(xprt); 1515 xs_update_peer_port(xprt);
1517} 1516}
1518 1517
1519static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) 1518static unsigned short xs_get_srcport(struct sock_xprt *transport)
1520{ 1519{
1521 unsigned short port = transport->srcport; 1520 unsigned short port = transport->srcport;
1522 1521
@@ -1525,7 +1524,7 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket
1525 return port; 1524 return port;
1526} 1525}
1527 1526
1528static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) 1527static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port)
1529{ 1528{
1530 if (transport->srcport != 0) 1529 if (transport->srcport != 0)
1531 transport->srcport = 0; 1530 transport->srcport = 0;
@@ -1535,23 +1534,18 @@ static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket
1535 return xprt_max_resvport; 1534 return xprt_max_resvport;
1536 return --port; 1535 return --port;
1537} 1536}
1538 1537static int xs_bind(struct sock_xprt *transport, struct socket *sock)
1539static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1540{ 1538{
1541 struct sockaddr_in myaddr = { 1539 struct sockaddr_storage myaddr;
1542 .sin_family = AF_INET,
1543 };
1544 struct sockaddr_in *sa;
1545 int err, nloop = 0; 1540 int err, nloop = 0;
1546 unsigned short port = xs_get_srcport(transport, sock); 1541 unsigned short port = xs_get_srcport(transport);
1547 unsigned short last; 1542 unsigned short last;
1548 1543
1549 sa = (struct sockaddr_in *)&transport->srcaddr; 1544 memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
1550 myaddr.sin_addr = sa->sin_addr;
1551 do { 1545 do {
1552 myaddr.sin_port = htons(port); 1546 rpc_set_port((struct sockaddr *)&myaddr, port);
1553 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1547 err = kernel_bind(sock, (struct sockaddr *)&myaddr,
1554 sizeof(myaddr)); 1548 transport->xprt.addrlen);
1555 if (port == 0) 1549 if (port == 0)
1556 break; 1550 break;
1557 if (err == 0) { 1551 if (err == 0) {
@@ -1559,48 +1553,23 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1559 break; 1553 break;
1560 } 1554 }
1561 last = port; 1555 last = port;
1562 port = xs_next_srcport(transport, sock, port); 1556 port = xs_next_srcport(transport, port);
1563 if (port > last) 1557 if (port > last)
1564 nloop++; 1558 nloop++;
1565 } while (err == -EADDRINUSE && nloop != 2); 1559 } while (err == -EADDRINUSE && nloop != 2);
1566 dprintk("RPC: %s %pI4:%u: %s (%d)\n",
1567 __func__, &myaddr.sin_addr,
1568 port, err ? "failed" : "ok", err);
1569 return err;
1570}
1571
1572static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
1573{
1574 struct sockaddr_in6 myaddr = {
1575 .sin6_family = AF_INET6,
1576 };
1577 struct sockaddr_in6 *sa;
1578 int err, nloop = 0;
1579 unsigned short port = xs_get_srcport(transport, sock);
1580 unsigned short last;
1581 1560
1582 sa = (struct sockaddr_in6 *)&transport->srcaddr; 1561 if (myaddr.ss_family == AF_INET)
1583 myaddr.sin6_addr = sa->sin6_addr; 1562 dprintk("RPC: %s %pI4:%u: %s (%d)\n", __func__,
1584 do { 1563 &((struct sockaddr_in *)&myaddr)->sin_addr,
1585 myaddr.sin6_port = htons(port); 1564 port, err ? "failed" : "ok", err);
1586 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1565 else
1587 sizeof(myaddr)); 1566 dprintk("RPC: %s %pI6:%u: %s (%d)\n", __func__,
1588 if (port == 0) 1567 &((struct sockaddr_in6 *)&myaddr)->sin6_addr,
1589 break; 1568 port, err ? "failed" : "ok", err);
1590 if (err == 0) {
1591 transport->srcport = port;
1592 break;
1593 }
1594 last = port;
1595 port = xs_next_srcport(transport, sock, port);
1596 if (port > last)
1597 nloop++;
1598 } while (err == -EADDRINUSE && nloop != 2);
1599 dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n",
1600 &myaddr.sin6_addr, port, err ? "failed" : "ok", err);
1601 return err; 1569 return err;
1602} 1570}
1603 1571
1572
1604#ifdef CONFIG_DEBUG_LOCK_ALLOC 1573#ifdef CONFIG_DEBUG_LOCK_ALLOC
1605static struct lock_class_key xs_key[2]; 1574static struct lock_class_key xs_key[2];
1606static struct lock_class_key xs_slock_key[2]; 1575static struct lock_class_key xs_slock_key[2];
@@ -1622,6 +1591,18 @@ static inline void xs_reclassify_socket6(struct socket *sock)
1622 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", 1591 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
1623 &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); 1592 &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
1624} 1593}
1594
1595static inline void xs_reclassify_socket(int family, struct socket *sock)
1596{
1597 switch (family) {
1598 case AF_INET:
1599 xs_reclassify_socket4(sock);
1600 break;
1601 case AF_INET6:
1602 xs_reclassify_socket6(sock);
1603 break;
1604 }
1605}
1625#else 1606#else
1626static inline void xs_reclassify_socket4(struct socket *sock) 1607static inline void xs_reclassify_socket4(struct socket *sock)
1627{ 1608{
@@ -1630,8 +1611,36 @@ static inline void xs_reclassify_socket4(struct socket *sock)
1630static inline void xs_reclassify_socket6(struct socket *sock) 1611static inline void xs_reclassify_socket6(struct socket *sock)
1631{ 1612{
1632} 1613}
1614
1615static inline void xs_reclassify_socket(int family, struct socket *sock)
1616{
1617}
1633#endif 1618#endif
1634 1619
1620static struct socket *xs_create_sock(struct rpc_xprt *xprt,
1621 struct sock_xprt *transport, int family, int type, int protocol)
1622{
1623 struct socket *sock;
1624 int err;
1625
1626 err = __sock_create(xprt->xprt_net, family, type, protocol, &sock, 1);
1627 if (err < 0) {
1628 dprintk("RPC: can't create %d transport socket (%d).\n",
1629 protocol, -err);
1630 goto out;
1631 }
1632 xs_reclassify_socket(family, sock);
1633
1634 if (xs_bind(transport, sock)) {
1635 sock_release(sock);
1636 goto out;
1637 }
1638
1639 return sock;
1640out:
1641 return ERR_PTR(err);
1642}
1643
1635static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1644static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1636{ 1645{
1637 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1646 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1661,82 +1670,23 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1661 xs_udp_do_set_buffer_size(xprt); 1670 xs_udp_do_set_buffer_size(xprt);
1662} 1671}
1663 1672
1664/** 1673static void xs_udp_setup_socket(struct work_struct *work)
1665 * xs_udp_connect_worker4 - set up a UDP socket
1666 * @work: RPC transport to connect
1667 *
1668 * Invoked by a work queue tasklet.
1669 */
1670static void xs_udp_connect_worker4(struct work_struct *work)
1671{ 1674{
1672 struct sock_xprt *transport = 1675 struct sock_xprt *transport =
1673 container_of(work, struct sock_xprt, connect_worker.work); 1676 container_of(work, struct sock_xprt, connect_worker.work);
1674 struct rpc_xprt *xprt = &transport->xprt; 1677 struct rpc_xprt *xprt = &transport->xprt;
1675 struct socket *sock = transport->sock; 1678 struct socket *sock = transport->sock;
1676 int err, status = -EIO; 1679 int status = -EIO;
1677 1680
1678 if (xprt->shutdown) 1681 if (xprt->shutdown)
1679 goto out; 1682 goto out;
1680 1683
1681 /* Start by resetting any existing state */ 1684 /* Start by resetting any existing state */
1682 xs_reset_transport(transport); 1685 xs_reset_transport(transport);
1683 1686 sock = xs_create_sock(xprt, transport,
1684 err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1687 xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP);
1685 if (err < 0) { 1688 if (IS_ERR(sock))
1686 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1687 goto out; 1689 goto out;
1688 }
1689 xs_reclassify_socket4(sock);
1690
1691 if (xs_bind4(transport, sock)) {
1692 sock_release(sock);
1693 goto out;
1694 }
1695
1696 dprintk("RPC: worker connecting xprt %p via %s to "
1697 "%s (port %s)\n", xprt,
1698 xprt->address_strings[RPC_DISPLAY_PROTO],
1699 xprt->address_strings[RPC_DISPLAY_ADDR],
1700 xprt->address_strings[RPC_DISPLAY_PORT]);
1701
1702 xs_udp_finish_connecting(xprt, sock);
1703 status = 0;
1704out:
1705 xprt_clear_connecting(xprt);
1706 xprt_wake_pending_tasks(xprt, status);
1707}
1708
1709/**
1710 * xs_udp_connect_worker6 - set up a UDP socket
1711 * @work: RPC transport to connect
1712 *
1713 * Invoked by a work queue tasklet.
1714 */
1715static void xs_udp_connect_worker6(struct work_struct *work)
1716{
1717 struct sock_xprt *transport =
1718 container_of(work, struct sock_xprt, connect_worker.work);
1719 struct rpc_xprt *xprt = &transport->xprt;
1720 struct socket *sock = transport->sock;
1721 int err, status = -EIO;
1722
1723 if (xprt->shutdown)
1724 goto out;
1725
1726 /* Start by resetting any existing state */
1727 xs_reset_transport(transport);
1728
1729 err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
1730 if (err < 0) {
1731 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1732 goto out;
1733 }
1734 xs_reclassify_socket6(sock);
1735
1736 if (xs_bind6(transport, sock) < 0) {
1737 sock_release(sock);
1738 goto out;
1739 }
1740 1690
1741 dprintk("RPC: worker connecting xprt %p via %s to " 1691 dprintk("RPC: worker connecting xprt %p via %s to "
1742 "%s (port %s)\n", xprt, 1692 "%s (port %s)\n", xprt,
@@ -1755,12 +1705,12 @@ out:
1755 * We need to preserve the port number so the reply cache on the server can 1705 * We need to preserve the port number so the reply cache on the server can
1756 * find our cached RPC replies when we get around to reconnecting. 1706 * find our cached RPC replies when we get around to reconnecting.
1757 */ 1707 */
1758static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 1708static void xs_abort_connection(struct sock_xprt *transport)
1759{ 1709{
1760 int result; 1710 int result;
1761 struct sockaddr any; 1711 struct sockaddr any;
1762 1712
1763 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); 1713 dprintk("RPC: disconnecting xprt %p to reuse port\n", transport);
1764 1714
1765 /* 1715 /*
1766 * Disconnect the transport socket by doing a connect operation 1716 * Disconnect the transport socket by doing a connect operation
@@ -1770,13 +1720,13 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo
1770 any.sa_family = AF_UNSPEC; 1720 any.sa_family = AF_UNSPEC;
1771 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 1721 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
1772 if (!result) 1722 if (!result)
1773 xs_sock_mark_closed(xprt); 1723 xs_sock_mark_closed(&transport->xprt);
1774 else 1724 else
1775 dprintk("RPC: AF_UNSPEC connect return code %d\n", 1725 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1776 result); 1726 result);
1777} 1727}
1778 1728
1779static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 1729static void xs_tcp_reuse_connection(struct sock_xprt *transport)
1780{ 1730{
1781 unsigned int state = transport->inet->sk_state; 1731 unsigned int state = transport->inet->sk_state;
1782 1732
@@ -1799,7 +1749,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra
1799 "sk_shutdown set to %d\n", 1749 "sk_shutdown set to %d\n",
1800 __func__, transport->inet->sk_shutdown); 1750 __func__, transport->inet->sk_shutdown);
1801 } 1751 }
1802 xs_abort_connection(xprt, transport); 1752 xs_abort_connection(transport);
1803} 1753}
1804 1754
1805static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1755static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
@@ -1852,12 +1802,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1852 * 1802 *
1853 * Invoked by a work queue tasklet. 1803 * Invoked by a work queue tasklet.
1854 */ 1804 */
1855static void xs_tcp_setup_socket(struct rpc_xprt *xprt, 1805static void xs_tcp_setup_socket(struct work_struct *work)
1856 struct sock_xprt *transport,
1857 struct socket *(*create_sock)(struct rpc_xprt *,
1858 struct sock_xprt *))
1859{ 1806{
1807 struct sock_xprt *transport =
1808 container_of(work, struct sock_xprt, connect_worker.work);
1860 struct socket *sock = transport->sock; 1809 struct socket *sock = transport->sock;
1810 struct rpc_xprt *xprt = &transport->xprt;
1861 int status = -EIO; 1811 int status = -EIO;
1862 1812
1863 if (xprt->shutdown) 1813 if (xprt->shutdown)
@@ -1865,7 +1815,8 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1865 1815
1866 if (!sock) { 1816 if (!sock) {
1867 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 1817 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1868 sock = create_sock(xprt, transport); 1818 sock = xs_create_sock(xprt, transport,
1819 xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP);
1869 if (IS_ERR(sock)) { 1820 if (IS_ERR(sock)) {
1870 status = PTR_ERR(sock); 1821 status = PTR_ERR(sock);
1871 goto out; 1822 goto out;
@@ -1876,7 +1827,7 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1876 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, 1827 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
1877 &xprt->state); 1828 &xprt->state);
1878 /* "close" the socket, preserving the local port */ 1829 /* "close" the socket, preserving the local port */
1879 xs_tcp_reuse_connection(xprt, transport); 1830 xs_tcp_reuse_connection(transport);
1880 1831
1881 if (abort_and_exit) 1832 if (abort_and_exit)
1882 goto out_eagain; 1833 goto out_eagain;
@@ -1925,84 +1876,6 @@ out:
1925 xprt_wake_pending_tasks(xprt, status); 1876 xprt_wake_pending_tasks(xprt, status);
1926} 1877}
1927 1878
1928static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
1929 struct sock_xprt *transport)
1930{
1931 struct socket *sock;
1932 int err;
1933
1934 /* start from scratch */
1935 err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
1936 if (err < 0) {
1937 dprintk("RPC: can't create TCP transport socket (%d).\n",
1938 -err);
1939 goto out_err;
1940 }
1941 xs_reclassify_socket4(sock);
1942
1943 if (xs_bind4(transport, sock) < 0) {
1944 sock_release(sock);
1945 goto out_err;
1946 }
1947 return sock;
1948out_err:
1949 return ERR_PTR(-EIO);
1950}
1951
1952/**
1953 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
1954 * @work: RPC transport to connect
1955 *
1956 * Invoked by a work queue tasklet.
1957 */
1958static void xs_tcp_connect_worker4(struct work_struct *work)
1959{
1960 struct sock_xprt *transport =
1961 container_of(work, struct sock_xprt, connect_worker.work);
1962 struct rpc_xprt *xprt = &transport->xprt;
1963
1964 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
1965}
1966
1967static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
1968 struct sock_xprt *transport)
1969{
1970 struct socket *sock;
1971 int err;
1972
1973 /* start from scratch */
1974 err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
1975 if (err < 0) {
1976 dprintk("RPC: can't create TCP transport socket (%d).\n",
1977 -err);
1978 goto out_err;
1979 }
1980 xs_reclassify_socket6(sock);
1981
1982 if (xs_bind6(transport, sock) < 0) {
1983 sock_release(sock);
1984 goto out_err;
1985 }
1986 return sock;
1987out_err:
1988 return ERR_PTR(-EIO);
1989}
1990
1991/**
1992 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
1993 * @work: RPC transport to connect
1994 *
1995 * Invoked by a work queue tasklet.
1996 */
1997static void xs_tcp_connect_worker6(struct work_struct *work)
1998{
1999 struct sock_xprt *transport =
2000 container_of(work, struct sock_xprt, connect_worker.work);
2001 struct rpc_xprt *xprt = &transport->xprt;
2002
2003 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
2004}
2005
2006/** 1879/**
2007 * xs_connect - connect a socket to a remote endpoint 1880 * xs_connect - connect a socket to a remote endpoint
2008 * @task: address of RPC task that manages state of connect request 1881 * @task: address of RPC task that manages state of connect request
@@ -2262,6 +2135,31 @@ static struct rpc_xprt_ops bc_tcp_ops = {
2262 .print_stats = xs_tcp_print_stats, 2135 .print_stats = xs_tcp_print_stats,
2263}; 2136};
2264 2137
2138static int xs_init_anyaddr(const int family, struct sockaddr *sap)
2139{
2140 static const struct sockaddr_in sin = {
2141 .sin_family = AF_INET,
2142 .sin_addr.s_addr = htonl(INADDR_ANY),
2143 };
2144 static const struct sockaddr_in6 sin6 = {
2145 .sin6_family = AF_INET6,
2146 .sin6_addr = IN6ADDR_ANY_INIT,
2147 };
2148
2149 switch (family) {
2150 case AF_INET:
2151 memcpy(sap, &sin, sizeof(sin));
2152 break;
2153 case AF_INET6:
2154 memcpy(sap, &sin6, sizeof(sin6));
2155 break;
2156 default:
2157 dprintk("RPC: %s: Bad address family\n", __func__);
2158 return -EAFNOSUPPORT;
2159 }
2160 return 0;
2161}
2162
2265static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, 2163static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2266 unsigned int slot_table_size) 2164 unsigned int slot_table_size)
2267{ 2165{
@@ -2273,27 +2171,25 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2273 return ERR_PTR(-EBADF); 2171 return ERR_PTR(-EBADF);
2274 } 2172 }
2275 2173
2276 new = kzalloc(sizeof(*new), GFP_KERNEL); 2174 xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
2277 if (new == NULL) { 2175 if (xprt == NULL) {
2278 dprintk("RPC: xs_setup_xprt: couldn't allocate " 2176 dprintk("RPC: xs_setup_xprt: couldn't allocate "
2279 "rpc_xprt\n"); 2177 "rpc_xprt\n");
2280 return ERR_PTR(-ENOMEM); 2178 return ERR_PTR(-ENOMEM);
2281 } 2179 }
2282 xprt = &new->xprt;
2283
2284 xprt->max_reqs = slot_table_size;
2285 xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
2286 if (xprt->slot == NULL) {
2287 kfree(xprt);
2288 dprintk("RPC: xs_setup_xprt: couldn't allocate slot "
2289 "table\n");
2290 return ERR_PTR(-ENOMEM);
2291 }
2292 2180
2181 new = container_of(xprt, struct sock_xprt, xprt);
2293 memcpy(&xprt->addr, args->dstaddr, args->addrlen); 2182 memcpy(&xprt->addr, args->dstaddr, args->addrlen);
2294 xprt->addrlen = args->addrlen; 2183 xprt->addrlen = args->addrlen;
2295 if (args->srcaddr) 2184 if (args->srcaddr)
2296 memcpy(&new->srcaddr, args->srcaddr, args->addrlen); 2185 memcpy(&new->srcaddr, args->srcaddr, args->addrlen);
2186 else {
2187 int err;
2188 err = xs_init_anyaddr(args->dstaddr->sa_family,
2189 (struct sockaddr *)&new->srcaddr);
2190 if (err != 0)
2191 return ERR_PTR(err);
2192 }
2297 2193
2298 return xprt; 2194 return xprt;
2299} 2195}
@@ -2341,7 +2237,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2341 xprt_set_bound(xprt); 2237 xprt_set_bound(xprt);
2342 2238
2343 INIT_DELAYED_WORK(&transport->connect_worker, 2239 INIT_DELAYED_WORK(&transport->connect_worker,
2344 xs_udp_connect_worker4); 2240 xs_udp_setup_socket);
2345 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); 2241 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
2346 break; 2242 break;
2347 case AF_INET6: 2243 case AF_INET6:
@@ -2349,7 +2245,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2349 xprt_set_bound(xprt); 2245 xprt_set_bound(xprt);
2350 2246
2351 INIT_DELAYED_WORK(&transport->connect_worker, 2247 INIT_DELAYED_WORK(&transport->connect_worker,
2352 xs_udp_connect_worker6); 2248 xs_udp_setup_socket);
2353 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); 2249 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
2354 break; 2250 break;
2355 default: 2251 default:
@@ -2371,8 +2267,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2371 return xprt; 2267 return xprt;
2372 ret = ERR_PTR(-EINVAL); 2268 ret = ERR_PTR(-EINVAL);
2373out_err: 2269out_err:
2374 kfree(xprt->slot); 2270 xprt_free(xprt);
2375 kfree(xprt);
2376 return ret; 2271 return ret;
2377} 2272}
2378 2273
@@ -2416,7 +2311,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2416 xprt_set_bound(xprt); 2311 xprt_set_bound(xprt);
2417 2312
2418 INIT_DELAYED_WORK(&transport->connect_worker, 2313 INIT_DELAYED_WORK(&transport->connect_worker,
2419 xs_tcp_connect_worker4); 2314 xs_tcp_setup_socket);
2420 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); 2315 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
2421 break; 2316 break;
2422 case AF_INET6: 2317 case AF_INET6:
@@ -2424,7 +2319,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2424 xprt_set_bound(xprt); 2319 xprt_set_bound(xprt);
2425 2320
2426 INIT_DELAYED_WORK(&transport->connect_worker, 2321 INIT_DELAYED_WORK(&transport->connect_worker,
2427 xs_tcp_connect_worker6); 2322 xs_tcp_setup_socket);
2428 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 2323 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
2429 break; 2324 break;
2430 default: 2325 default:
@@ -2447,8 +2342,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2447 return xprt; 2342 return xprt;
2448 ret = ERR_PTR(-EINVAL); 2343 ret = ERR_PTR(-EINVAL);
2449out_err: 2344out_err:
2450 kfree(xprt->slot); 2345 xprt_free(xprt);
2451 kfree(xprt);
2452 return ret; 2346 return ret;
2453} 2347}
2454 2348
@@ -2507,15 +2401,10 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2507 goto out_err; 2401 goto out_err;
2508 } 2402 }
2509 2403
2510 if (xprt_bound(xprt)) 2404 dprintk("RPC: set up xprt to %s (port %s) via %s\n",
2511 dprintk("RPC: set up xprt to %s (port %s) via %s\n", 2405 xprt->address_strings[RPC_DISPLAY_ADDR],
2512 xprt->address_strings[RPC_DISPLAY_ADDR], 2406 xprt->address_strings[RPC_DISPLAY_PORT],
2513 xprt->address_strings[RPC_DISPLAY_PORT], 2407 xprt->address_strings[RPC_DISPLAY_PROTO]);
2514 xprt->address_strings[RPC_DISPLAY_PROTO]);
2515 else
2516 dprintk("RPC: set up xprt to %s (autobind) via %s\n",
2517 xprt->address_strings[RPC_DISPLAY_ADDR],
2518 xprt->address_strings[RPC_DISPLAY_PROTO]);
2519 2408
2520 /* 2409 /*
2521 * Since we don't want connections for the backchannel, we set 2410 * Since we don't want connections for the backchannel, we set
@@ -2528,8 +2417,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2528 return xprt; 2417 return xprt;
2529 ret = ERR_PTR(-EINVAL); 2418 ret = ERR_PTR(-EINVAL);
2530out_err: 2419out_err:
2531 kfree(xprt->slot); 2420 xprt_free(xprt);
2532 kfree(xprt);
2533 return ret; 2421 return ret;
2534} 2422}
2535 2423