aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c33
-rw-r--r--fs/9p/v9fs_vfs.h1
-rw-r--r--fs/9p/vfs_file.c19
-rw-r--r--fs/9p/vfs_inode.c2
-rw-r--r--fs/cifs/CHANGES4
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/connect.c30
-rw-r--r--fs/cifs/inode.c12
-rw-r--r--fs/cifs/readdir.c8
-rw-r--r--fs/cifs/sess.c11
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/exec.c21
-rw-r--r--fs/nfsd/export.c10
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/cluster/tcp.c10
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h4
-rw-r--r--fs/ocfs2/dlm/dlmapi.h2
-rw-r--r--fs/ocfs2/dlm/dlmast.c2
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c2
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c2
-rw-r--r--fs/ocfs2/dlm/dlmlock.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c38
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c147
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c8
-rw-r--r--fs/ocfs2/dlmglue.c85
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/extent_map.c2
-rw-r--r--fs/ocfs2/file.c18
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/ioctl.c14
-rw-r--r--fs/ocfs2/journal.c2
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/ocfs2_fs.h11
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/stack_o2cb.c12
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/ocfs2/symlink.c10
-rw-r--r--fs/ocfs2/uptodate.c4
42 files changed, 403 insertions, 167 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index cf62b05e296a..7d6c2139891d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -84,7 +84,7 @@ static const match_table_t tokens = {
84 84
85static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) 85static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
86{ 86{
87 char *options; 87 char *options, *tmp_options;
88 substring_t args[MAX_OPT_ARGS]; 88 substring_t args[MAX_OPT_ARGS];
89 char *p; 89 char *p;
90 int option = 0; 90 int option = 0;
@@ -102,9 +102,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
102 if (!opts) 102 if (!opts)
103 return 0; 103 return 0;
104 104
105 options = kstrdup(opts, GFP_KERNEL); 105 tmp_options = kstrdup(opts, GFP_KERNEL);
106 if (!options) 106 if (!tmp_options) {
107 ret = -ENOMEM;
107 goto fail_option_alloc; 108 goto fail_option_alloc;
109 }
110 options = tmp_options;
108 111
109 while ((p = strsep(&options, ",")) != NULL) { 112 while ((p = strsep(&options, ",")) != NULL) {
110 int token; 113 int token;
@@ -159,8 +162,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
159 break; 162 break;
160 case Opt_cache: 163 case Opt_cache:
161 s = match_strdup(&args[0]); 164 s = match_strdup(&args[0]);
162 if (!s) 165 if (!s) {
163 goto fail_option_alloc; 166 ret = -ENOMEM;
167 P9_DPRINTK(P9_DEBUG_ERROR,
168 "problem allocating copy of cache arg\n");
169 goto free_and_return;
170 }
164 171
165 if (strcmp(s, "loose") == 0) 172 if (strcmp(s, "loose") == 0)
166 v9ses->cache = CACHE_LOOSE; 173 v9ses->cache = CACHE_LOOSE;
@@ -173,8 +180,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
173 180
174 case Opt_access: 181 case Opt_access:
175 s = match_strdup(&args[0]); 182 s = match_strdup(&args[0]);
176 if (!s) 183 if (!s) {
177 goto fail_option_alloc; 184 ret = -ENOMEM;
185 P9_DPRINTK(P9_DEBUG_ERROR,
186 "problem allocating copy of access arg\n");
187 goto free_and_return;
188 }
178 189
179 v9ses->flags &= ~V9FS_ACCESS_MASK; 190 v9ses->flags &= ~V9FS_ACCESS_MASK;
180 if (strcmp(s, "user") == 0) 191 if (strcmp(s, "user") == 0)
@@ -194,13 +205,11 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
194 continue; 205 continue;
195 } 206 }
196 } 207 }
197 kfree(options);
198 return ret;
199 208
209free_and_return:
210 kfree(tmp_options);
200fail_option_alloc: 211fail_option_alloc:
201 P9_DPRINTK(P9_DEBUG_ERROR, 212 return ret;
202 "failed to allocate copy of option argument\n");
203 return -ENOMEM;
204} 213}
205 214
206/** 215/**
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 3a7560e35865..ed835836e0dc 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -60,3 +60,4 @@ void v9fs_dentry_release(struct dentry *);
60int v9fs_uflags2omode(int uflags, int extended); 60int v9fs_uflags2omode(int uflags, int extended);
61 61
62ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); 62ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
63void v9fs_blank_wstat(struct p9_wstat *wstat);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3902bf43a088..74a0461a9ac0 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -257,6 +257,23 @@ v9fs_file_write(struct file *filp, const char __user * data,
257 return total; 257 return total;
258} 258}
259 259
260static int v9fs_file_fsync(struct file *filp, struct dentry *dentry,
261 int datasync)
262{
263 struct p9_fid *fid;
264 struct p9_wstat wstat;
265 int retval;
266
267 P9_DPRINTK(P9_DEBUG_VFS, "filp %p dentry %p datasync %x\n", filp,
268 dentry, datasync);
269
270 fid = filp->private_data;
271 v9fs_blank_wstat(&wstat);
272
273 retval = p9_client_wstat(fid, &wstat);
274 return retval;
275}
276
260static const struct file_operations v9fs_cached_file_operations = { 277static const struct file_operations v9fs_cached_file_operations = {
261 .llseek = generic_file_llseek, 278 .llseek = generic_file_llseek,
262 .read = do_sync_read, 279 .read = do_sync_read,
@@ -266,6 +283,7 @@ static const struct file_operations v9fs_cached_file_operations = {
266 .release = v9fs_dir_release, 283 .release = v9fs_dir_release,
267 .lock = v9fs_file_lock, 284 .lock = v9fs_file_lock,
268 .mmap = generic_file_readonly_mmap, 285 .mmap = generic_file_readonly_mmap,
286 .fsync = v9fs_file_fsync,
269}; 287};
270 288
271const struct file_operations v9fs_file_operations = { 289const struct file_operations v9fs_file_operations = {
@@ -276,4 +294,5 @@ const struct file_operations v9fs_file_operations = {
276 .release = v9fs_dir_release, 294 .release = v9fs_dir_release,
277 .lock = v9fs_file_lock, 295 .lock = v9fs_file_lock,
278 .mmap = generic_file_readonly_mmap, 296 .mmap = generic_file_readonly_mmap,
297 .fsync = v9fs_file_fsync,
279}; 298};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9d03d1ebca6f..a407fa3388c0 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -176,7 +176,7 @@ int v9fs_uflags2omode(int uflags, int extended)
176 * 176 *
177 */ 177 */
178 178
179static void 179void
180v9fs_blank_wstat(struct p9_wstat *wstat) 180v9fs_blank_wstat(struct p9_wstat *wstat)
181{ 181{
182 wstat->type = ~0; 182 wstat->type = ~0;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 7b2600b380d7..49503d2edc7e 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,7 @@
1Version 1.62
2------------
3Add sockopt=TCP_NODELAY mount option.
4
1Version 1.61 5Version 1.61
2------------ 6------------
3Fix append problem to Samba servers (files opened with O_APPEND could 7Fix append problem to Samba servers (files opened with O_APPEND could
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index ac2b24c192f8..78c1b86d55f6 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
113extern const struct export_operations cifs_export_ops; 113extern const struct export_operations cifs_export_ops;
114#endif /* EXPERIMENTAL */ 114#endif /* EXPERIMENTAL */
115 115
116#define CIFS_VERSION "1.61" 116#define CIFS_VERSION "1.62"
117#endif /* _CIFSFS_H */ 117#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 4b35f7ec0583..ed751bb657db 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -149,6 +149,7 @@ struct TCP_Server_Info {
149 bool svlocal:1; /* local server or remote */ 149 bool svlocal:1; /* local server or remote */
150 bool noblocksnd; /* use blocking sendmsg */ 150 bool noblocksnd; /* use blocking sendmsg */
151 bool noautotune; /* do not autotune send buf sizes */ 151 bool noautotune; /* do not autotune send buf sizes */
152 bool tcp_nodelay;
152 atomic_t inFlight; /* number of requests on the wire to server */ 153 atomic_t inFlight; /* number of requests on the wire to server */
153#ifdef CONFIG_CIFS_STATS2 154#ifdef CONFIG_CIFS_STATS2
154 atomic_t inSend; /* requests trying to send */ 155 atomic_t inSend; /* requests trying to send */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3bbcaa716b3c..2e9e09ca0e30 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -98,7 +98,7 @@ struct smb_vol {
98 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */ 98 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
99 unsigned int rsize; 99 unsigned int rsize;
100 unsigned int wsize; 100 unsigned int wsize;
101 unsigned int sockopt; 101 bool sockopt_tcp_nodelay:1;
102 unsigned short int port; 102 unsigned short int port;
103 char *prepath; 103 char *prepath;
104}; 104};
@@ -1142,9 +1142,11 @@ cifs_parse_mount_options(char *options, const char *devname,
1142 simple_strtoul(value, &value, 0); 1142 simple_strtoul(value, &value, 0);
1143 } 1143 }
1144 } else if (strnicmp(data, "sockopt", 5) == 0) { 1144 } else if (strnicmp(data, "sockopt", 5) == 0) {
1145 if (value && *value) { 1145 if (!value || !*value) {
1146 vol->sockopt = 1146 cERROR(1, ("no socket option specified"));
1147 simple_strtoul(value, &value, 0); 1147 continue;
1148 } else if (strnicmp(value, "TCP_NODELAY", 11) == 0) {
1149 vol->sockopt_tcp_nodelay = 1;
1148 } 1150 }
1149 } else if (strnicmp(data, "netbiosname", 4) == 0) { 1151 } else if (strnicmp(data, "netbiosname", 4) == 0) {
1150 if (!value || !*value || (*value == ' ')) { 1152 if (!value || !*value || (*value == ' ')) {
@@ -1514,6 +1516,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1514 1516
1515 tcp_ses->noblocksnd = volume_info->noblocksnd; 1517 tcp_ses->noblocksnd = volume_info->noblocksnd;
1516 tcp_ses->noautotune = volume_info->noautotune; 1518 tcp_ses->noautotune = volume_info->noautotune;
1519 tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
1517 atomic_set(&tcp_ses->inFlight, 0); 1520 atomic_set(&tcp_ses->inFlight, 0);
1518 init_waitqueue_head(&tcp_ses->response_q); 1521 init_waitqueue_head(&tcp_ses->response_q);
1519 init_waitqueue_head(&tcp_ses->request_q); 1522 init_waitqueue_head(&tcp_ses->request_q);
@@ -1764,6 +1767,7 @@ static int
1764ipv4_connect(struct TCP_Server_Info *server) 1767ipv4_connect(struct TCP_Server_Info *server)
1765{ 1768{
1766 int rc = 0; 1769 int rc = 0;
1770 int val;
1767 bool connected = false; 1771 bool connected = false;
1768 __be16 orig_port = 0; 1772 __be16 orig_port = 0;
1769 struct socket *socket = server->ssocket; 1773 struct socket *socket = server->ssocket;
@@ -1845,6 +1849,14 @@ ipv4_connect(struct TCP_Server_Info *server)
1845 socket->sk->sk_rcvbuf = 140 * 1024; 1849 socket->sk->sk_rcvbuf = 140 * 1024;
1846 } 1850 }
1847 1851
1852 if (server->tcp_nodelay) {
1853 val = 1;
1854 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
1855 (char *)&val, sizeof(val));
1856 if (rc)
1857 cFYI(1, ("set TCP_NODELAY socket option error %d", rc));
1858 }
1859
1848 cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx", 1860 cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx",
1849 socket->sk->sk_sndbuf, 1861 socket->sk->sk_sndbuf,
1850 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo)); 1862 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo));
@@ -1916,6 +1928,7 @@ static int
1916ipv6_connect(struct TCP_Server_Info *server) 1928ipv6_connect(struct TCP_Server_Info *server)
1917{ 1929{
1918 int rc = 0; 1930 int rc = 0;
1931 int val;
1919 bool connected = false; 1932 bool connected = false;
1920 __be16 orig_port = 0; 1933 __be16 orig_port = 0;
1921 struct socket *socket = server->ssocket; 1934 struct socket *socket = server->ssocket;
@@ -1987,6 +2000,15 @@ ipv6_connect(struct TCP_Server_Info *server)
1987 */ 2000 */
1988 socket->sk->sk_rcvtimeo = 7 * HZ; 2001 socket->sk->sk_rcvtimeo = 7 * HZ;
1989 socket->sk->sk_sndtimeo = 5 * HZ; 2002 socket->sk->sk_sndtimeo = 5 * HZ;
2003
2004 if (server->tcp_nodelay) {
2005 val = 1;
2006 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
2007 (char *)&val, sizeof(val));
2008 if (rc)
2009 cFYI(1, ("set TCP_NODELAY socket option error %d", rc));
2010 }
2011
1990 server->ssocket = socket; 2012 server->ssocket = socket;
1991 2013
1992 return rc; 2014 return rc;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index cf18ee765590..e3fda978f481 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1762,8 +1762,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1762 CIFS_MOUNT_MAP_SPECIAL_CHR); 1762 CIFS_MOUNT_MAP_SPECIAL_CHR);
1763 } 1763 }
1764 1764
1765 if (!rc) 1765 if (!rc) {
1766 rc = inode_setattr(inode, attrs); 1766 rc = inode_setattr(inode, attrs);
1767
1768 /* force revalidate when any of these times are set since some
1769 of the fs types (eg ext3, fat) do not have fine enough
1770 time granularity to match protocol, and we do not have a
1771 a way (yet) to query the server fs's time granularity (and
1772 whether it rounds times down).
1773 */
1774 if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME)))
1775 cifsInode->time = 0;
1776 }
1767out: 1777out:
1768 kfree(args); 1778 kfree(args);
1769 kfree(full_path); 1779 kfree(full_path);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f84062f9a985..c343b14ba2d3 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -77,6 +77,11 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
77 77
78 cFYI(1, ("For %s", name->name)); 78 cFYI(1, ("For %s", name->name));
79 79
80 if (parent->d_op && parent->d_op->d_hash)
81 parent->d_op->d_hash(parent, name);
82 else
83 name->hash = full_name_hash(name->name, name->len);
84
80 dentry = d_lookup(parent, name); 85 dentry = d_lookup(parent, name);
81 if (dentry) { 86 if (dentry) {
82 /* FIXME: check for inode number changes? */ 87 /* FIXME: check for inode number changes? */
@@ -666,12 +671,11 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
666 min(len, max_len), nlt, 671 min(len, max_len), nlt,
667 cifs_sb->mnt_cifs_flags & 672 cifs_sb->mnt_cifs_flags &
668 CIFS_MOUNT_MAP_SPECIAL_CHR); 673 CIFS_MOUNT_MAP_SPECIAL_CHR);
674 pqst->len -= nls_nullsize(nlt);
669 } else { 675 } else {
670 pqst->name = filename; 676 pqst->name = filename;
671 pqst->len = len; 677 pqst->len = len;
672 } 678 }
673 pqst->hash = full_name_hash(pqst->name, pqst->len);
674/* cFYI(1, ("filldir on %s",pqst->name)); */
675 return rc; 679 return rc;
676} 680}
677 681
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7085a6275c4c..aaa9c1c5a5bd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -223,9 +223,9 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
223 /* null user mount */ 223 /* null user mount */
224 *bcc_ptr = 0; 224 *bcc_ptr = 0;
225 *(bcc_ptr+1) = 0; 225 *(bcc_ptr+1) = 0;
226 } else { /* 300 should be long enough for any conceivable user name */ 226 } else {
227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName, 227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName,
228 300, nls_cp); 228 MAX_USERNAME_SIZE, nls_cp);
229 } 229 }
230 bcc_ptr += 2 * bytes_ret; 230 bcc_ptr += 2 * bytes_ret;
231 bcc_ptr += 2; /* account for null termination */ 231 bcc_ptr += 2; /* account for null termination */
@@ -246,11 +246,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
246 /* copy user */ 246 /* copy user */
247 if (ses->userName == NULL) { 247 if (ses->userName == NULL) {
248 /* BB what about null user mounts - check that we do this BB */ 248 /* BB what about null user mounts - check that we do this BB */
249 } else { /* 300 should be long enough for any conceivable user name */ 249 } else {
250 strncpy(bcc_ptr, ses->userName, 300); 250 strncpy(bcc_ptr, ses->userName, MAX_USERNAME_SIZE);
251 } 251 }
252 /* BB improve check for overflow */ 252 bcc_ptr += strnlen(ses->userName, MAX_USERNAME_SIZE);
253 bcc_ptr += strnlen(ses->userName, 300);
254 *bcc_ptr = 0; 253 *bcc_ptr = 0;
255 bcc_ptr++; /* account for null termination */ 254 bcc_ptr++; /* account for null termination */
256 255
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 7cbbc7ab4b50..0ca9ec4a79c3 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -942,6 +942,7 @@ COMPATIBLE_IOCTL(TCSETSF)
942COMPATIBLE_IOCTL(TIOCLINUX) 942COMPATIBLE_IOCTL(TIOCLINUX)
943COMPATIBLE_IOCTL(TIOCSBRK) 943COMPATIBLE_IOCTL(TIOCSBRK)
944COMPATIBLE_IOCTL(TIOCCBRK) 944COMPATIBLE_IOCTL(TIOCCBRK)
945COMPATIBLE_IOCTL(TIOCGSID)
945COMPATIBLE_IOCTL(TIOCGICOUNT) 946COMPATIBLE_IOCTL(TIOCGICOUNT)
946/* Little t */ 947/* Little t */
947COMPATIBLE_IOCTL(TIOCGETD) 948COMPATIBLE_IOCTL(TIOCGETD)
@@ -1044,6 +1045,8 @@ COMPATIBLE_IOCTL(FIOQSIZE)
1044#ifdef CONFIG_BLOCK 1045#ifdef CONFIG_BLOCK
1045/* loop */ 1046/* loop */
1046IGNORE_IOCTL(LOOP_CLR_FD) 1047IGNORE_IOCTL(LOOP_CLR_FD)
1048/* md calls this on random blockdevs */
1049IGNORE_IOCTL(RAID_VERSION)
1047/* SG stuff */ 1050/* SG stuff */
1048COMPATIBLE_IOCTL(SG_SET_TIMEOUT) 1051COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
1049COMPATIBLE_IOCTL(SG_GET_TIMEOUT) 1052COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
diff --git a/fs/exec.c b/fs/exec.c
index 0790a107ff7e..e95c692ef0e4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -571,6 +571,9 @@ int setup_arg_pages(struct linux_binprm *bprm,
571 struct vm_area_struct *prev = NULL; 571 struct vm_area_struct *prev = NULL;
572 unsigned long vm_flags; 572 unsigned long vm_flags;
573 unsigned long stack_base; 573 unsigned long stack_base;
574 unsigned long stack_size;
575 unsigned long stack_expand;
576 unsigned long rlim_stack;
574 577
575#ifdef CONFIG_STACK_GROWSUP 578#ifdef CONFIG_STACK_GROWSUP
576 /* Limit stack size to 1GB */ 579 /* Limit stack size to 1GB */
@@ -627,10 +630,24 @@ int setup_arg_pages(struct linux_binprm *bprm,
627 goto out_unlock; 630 goto out_unlock;
628 } 631 }
629 632
633 stack_expand = EXTRA_STACK_VM_PAGES * PAGE_SIZE;
634 stack_size = vma->vm_end - vma->vm_start;
635 /*
636 * Align this down to a page boundary as expand_stack
637 * will align it up.
638 */
639 rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
640 rlim_stack = min(rlim_stack, stack_size);
630#ifdef CONFIG_STACK_GROWSUP 641#ifdef CONFIG_STACK_GROWSUP
631 stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE; 642 if (stack_size + stack_expand > rlim_stack)
643 stack_base = vma->vm_start + rlim_stack;
644 else
645 stack_base = vma->vm_end + stack_expand;
632#else 646#else
633 stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE; 647 if (stack_size + stack_expand > rlim_stack)
648 stack_base = vma->vm_end - rlim_stack;
649 else
650 stack_base = vma->vm_start - stack_expand;
634#endif 651#endif
635 ret = expand_stack(vma, stack_base); 652 ret = expand_stack(vma, stack_base);
636 if (ret) 653 if (ret)
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c487810a2366..a0c4016413f1 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1316,19 +1316,11 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
1316 1316
1317static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp) 1317static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp)
1318{ 1318{
1319 struct svc_export *exp;
1320 u32 fsidv[2]; 1319 u32 fsidv[2];
1321 1320
1322 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); 1321 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
1323 1322
1324 exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); 1323 return rqst_exp_find(rqstp, FSID_NUM, fsidv);
1325 /*
1326 * We shouldn't have accepting an nfsv4 request at all if we
1327 * don't have a pseudoexport!:
1328 */
1329 if (IS_ERR(exp) && PTR_ERR(exp) == -ENOENT)
1330 exp = ERR_PTR(-ESERVERFAULT);
1331 return exp;
1332} 1324}
1333 1325
1334/* 1326/*
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3dae4a13f6e4..7e9df11260f4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -599,7 +599,7 @@ bail:
599 return ret; 599 return ret;
600} 600}
601 601
602/* 602/*
603 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're 603 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
604 * particularly interested in the aio/dio case. Like the core uses 604 * particularly interested in the aio/dio case. Like the core uses
605 * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from 605 * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from
@@ -670,7 +670,7 @@ static ssize_t ocfs2_direct_IO(int rw,
670 670
671 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 671 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
672 inode->i_sb->s_bdev, iov, offset, 672 inode->i_sb->s_bdev, iov, offset,
673 nr_segs, 673 nr_segs,
674 ocfs2_direct_IO_get_blocks, 674 ocfs2_direct_IO_get_blocks,
675 ocfs2_dio_end_io); 675 ocfs2_dio_end_io);
676 676
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index d43d34a1dd31..21c808f752d8 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -368,7 +368,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
368 } 368 }
369 ocfs2_metadata_cache_io_unlock(ci); 369 ocfs2_metadata_cache_io_unlock(ci);
370 370
371 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 371 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
372 (unsigned long long)block, nr, 372 (unsigned long long)block, nr,
373 ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes", 373 ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
374 flags); 374 flags);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index eda5b8bcddd5..5c9890006708 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -78,7 +78,7 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type);
78 78
79unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; 79unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
80 80
81/* Only sets a new threshold if there are no active regions. 81/* Only sets a new threshold if there are no active regions.
82 * 82 *
83 * No locking or otherwise interesting code is required for reading 83 * No locking or otherwise interesting code is required for reading
84 * o2hb_dead_threshold as it can't change once regions are active and 84 * o2hb_dead_threshold as it can't change once regions are active and
@@ -170,7 +170,7 @@ static void o2hb_write_timeout(struct work_struct *work)
170 170
171 mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " 171 mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
172 "milliseconds\n", reg->hr_dev_name, 172 "milliseconds\n", reg->hr_dev_name,
173 jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); 173 jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
174 o2quo_disk_timeout(); 174 o2quo_disk_timeout();
175} 175}
176 176
@@ -624,7 +624,7 @@ static int o2hb_check_slot(struct o2hb_region *reg,
624 "seq %llu last %llu changed %u equal %u\n", 624 "seq %llu last %llu changed %u equal %u\n",
625 slot->ds_node_num, (long long)slot->ds_last_generation, 625 slot->ds_node_num, (long long)slot->ds_last_generation,
626 le32_to_cpu(hb_block->hb_cksum), 626 le32_to_cpu(hb_block->hb_cksum),
627 (unsigned long long)le64_to_cpu(hb_block->hb_seq), 627 (unsigned long long)le64_to_cpu(hb_block->hb_seq),
628 (unsigned long long)slot->ds_last_time, slot->ds_changed_samples, 628 (unsigned long long)slot->ds_last_time, slot->ds_changed_samples,
629 slot->ds_equal_samples); 629 slot->ds_equal_samples);
630 630
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 334f231a422c..d8d0c65ac03c 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -485,7 +485,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
485 } 485 }
486 486
487 if (was_valid && !valid) { 487 if (was_valid && !valid) {
488 printk(KERN_INFO "o2net: no longer connected to " 488 printk(KERN_NOTICE "o2net: no longer connected to "
489 SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); 489 SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
490 o2net_complete_nodes_nsw(nn); 490 o2net_complete_nodes_nsw(nn);
491 } 491 }
@@ -493,7 +493,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
493 if (!was_valid && valid) { 493 if (!was_valid && valid) {
494 o2quo_conn_up(o2net_num_from_nn(nn)); 494 o2quo_conn_up(o2net_num_from_nn(nn));
495 cancel_delayed_work(&nn->nn_connect_expired); 495 cancel_delayed_work(&nn->nn_connect_expired);
496 printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", 496 printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n",
497 o2nm_this_node() > sc->sc_node->nd_num ? 497 o2nm_this_node() > sc->sc_node->nd_num ?
498 "connected to" : "accepted connection from", 498 "connected to" : "accepted connection from",
499 SC_NODEF_ARGS(sc)); 499 SC_NODEF_ARGS(sc));
@@ -930,7 +930,7 @@ static void o2net_sendpage(struct o2net_sock_container *sc,
930 cond_resched(); 930 cond_resched();
931 continue; 931 continue;
932 } 932 }
933 mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT 933 mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT
934 " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); 934 " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
935 o2net_ensure_shutdown(nn, sc, 0); 935 o2net_ensure_shutdown(nn, sc, 0);
936 break; 936 break;
@@ -1476,14 +1476,14 @@ static void o2net_idle_timer(unsigned long data)
1476 1476
1477 do_gettimeofday(&now); 1477 do_gettimeofday(&now);
1478 1478
1479 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " 1479 printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
1480 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), 1480 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
1481 o2net_idle_timeout() / 1000, 1481 o2net_idle_timeout() / 1000,
1482 o2net_idle_timeout() % 1000); 1482 o2net_idle_timeout() % 1000);
1483 mlog(ML_NOTICE, "here are some times that might help debug the " 1483 mlog(ML_NOTICE, "here are some times that might help debug the "
1484 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1484 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
1485 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", 1485 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
1486 sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, 1486 sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
1487 now.tv_sec, (long) now.tv_usec, 1487 now.tv_sec, (long) now.tv_usec,
1488 sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec, 1488 sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec,
1489 sc->sc_tv_advance_start.tv_sec, 1489 sc->sc_tv_advance_start.tv_sec,
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 8d58cfe410b1..96fa7ebc530c 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -32,10 +32,10 @@
32 * on their number */ 32 * on their number */
33#define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) 33#define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS)
34 34
35/* 35/*
36 * This version number represents quite a lot, unfortunately. It not 36 * This version number represents quite a lot, unfortunately. It not
37 * only represents the raw network message protocol on the wire but also 37 * only represents the raw network message protocol on the wire but also
38 * locking semantics of the file system using the protocol. It should 38 * locking semantics of the file system using the protocol. It should
39 * be somewhere else, I'm sure, but right now it isn't. 39 * be somewhere else, I'm sure, but right now it isn't.
40 * 40 *
41 * With version 11, we separate out the filesystem locking portion. The 41 * With version 11, we separate out the filesystem locking portion. The
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
index b5786a787fab..3cfa114aa391 100644
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -95,7 +95,7 @@ const char *dlm_errname(enum dlm_status err);
95 mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \ 95 mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \
96} while (0) 96} while (0)
97 97
98#define DLM_LKSB_UNUSED1 0x01 98#define DLM_LKSB_UNUSED1 0x01
99#define DLM_LKSB_PUT_LVB 0x02 99#define DLM_LKSB_PUT_LVB 0x02
100#define DLM_LKSB_GET_LVB 0x04 100#define DLM_LKSB_GET_LVB 0x04
101#define DLM_LKSB_UNUSED2 0x08 101#define DLM_LKSB_UNUSED2 0x08
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 01cf8cc3d286..dccc439fa087 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -123,7 +123,7 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
123 dlm_lock_put(lock); 123 dlm_lock_put(lock);
124 /* free up the reserved bast that we are cancelling. 124 /* free up the reserved bast that we are cancelling.
125 * guaranteed that this will not be the last reserved 125 * guaranteed that this will not be the last reserved
126 * ast because *both* an ast and a bast were reserved 126 * ast because *both* an ast and a bast were reserved
127 * to get to this point. the res->spinlock will not be 127 * to get to this point. the res->spinlock will not be
128 * taken here */ 128 * taken here */
129 dlm_lockres_release_ast(dlm, res); 129 dlm_lockres_release_ast(dlm, res);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index ca96bce50e18..f283bce776b4 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -396,7 +396,7 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
396 /* instead of logging the same network error over 396 /* instead of logging the same network error over
397 * and over, sleep here and wait for the heartbeat 397 * and over, sleep here and wait for the heartbeat
398 * to notice the node is dead. times out after 5s. */ 398 * to notice the node is dead. times out after 5s. */
399 dlm_wait_for_node_death(dlm, res->owner, 399 dlm_wait_for_node_death(dlm, res->owner,
400 DLM_NODE_DEATH_WAIT_MAX); 400 DLM_NODE_DEATH_WAIT_MAX);
401 ret = DLM_RECOVERING; 401 ret = DLM_RECOVERING;
402 mlog(0, "node %u died so returning DLM_RECOVERING " 402 mlog(0, "node %u died so returning DLM_RECOVERING "
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 42b0bad7a612..0cd24cf54396 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -102,7 +102,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
102 assert_spin_locked(&res->spinlock); 102 assert_spin_locked(&res->spinlock);
103 103
104 stringify_lockname(res->lockname.name, res->lockname.len, 104 stringify_lockname(res->lockname.name, res->lockname.len,
105 buf, sizeof(buf) - 1); 105 buf, sizeof(buf));
106 printk("lockres: %s, owner=%u, state=%u\n", 106 printk("lockres: %s, owner=%u, state=%u\n",
107 buf, res->owner, res->state); 107 buf, res->owner, res->state);
108 printk(" last used: %lu, refcnt: %u, on purge list: %s\n", 108 printk(" last used: %lu, refcnt: %u, on purge list: %s\n",
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 0334000676d3..988c9055fd4e 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -816,7 +816,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
816 } 816 }
817 817
818 /* Once the dlm ctxt is marked as leaving then we don't want 818 /* Once the dlm ctxt is marked as leaving then we don't want
819 * to be put in someone's domain map. 819 * to be put in someone's domain map.
820 * Also, explicitly disallow joining at certain troublesome 820 * Also, explicitly disallow joining at certain troublesome
821 * times (ie. during recovery). */ 821 * times (ie. during recovery). */
822 if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { 822 if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) {
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 437698e9465f..733337772671 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -269,7 +269,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
269 } 269 }
270 dlm_revert_pending_lock(res, lock); 270 dlm_revert_pending_lock(res, lock);
271 dlm_lock_put(lock); 271 dlm_lock_put(lock);
272 } else if (dlm_is_recovery_lock(res->lockname.name, 272 } else if (dlm_is_recovery_lock(res->lockname.name,
273 res->lockname.len)) { 273 res->lockname.len)) {
274 /* special case for the $RECOVERY lock. 274 /* special case for the $RECOVERY lock.
275 * there will never be an AST delivered to put 275 * there will never be an AST delivered to put
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 03ccf9a7b1f4..a659606dcb95 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -366,7 +366,7 @@ void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
366 struct dlm_master_list_entry *mle; 366 struct dlm_master_list_entry *mle;
367 367
368 assert_spin_locked(&dlm->spinlock); 368 assert_spin_locked(&dlm->spinlock);
369 369
370 list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { 370 list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
371 if (node_up) 371 if (node_up)
372 dlm_mle_node_up(dlm, mle, NULL, idx); 372 dlm_mle_node_up(dlm, mle, NULL, idx);
@@ -833,7 +833,7 @@ lookup:
833 __dlm_insert_mle(dlm, mle); 833 __dlm_insert_mle(dlm, mle);
834 834
835 /* still holding the dlm spinlock, check the recovery map 835 /* still holding the dlm spinlock, check the recovery map
836 * to see if there are any nodes that still need to be 836 * to see if there are any nodes that still need to be
837 * considered. these will not appear in the mle nodemap 837 * considered. these will not appear in the mle nodemap
838 * but they might own this lockres. wait on them. */ 838 * but they might own this lockres. wait on them. */
839 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); 839 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
@@ -883,7 +883,7 @@ redo_request:
883 msleep(500); 883 msleep(500);
884 } 884 }
885 continue; 885 continue;
886 } 886 }
887 887
888 dlm_kick_recovery_thread(dlm); 888 dlm_kick_recovery_thread(dlm);
889 msleep(1000); 889 msleep(1000);
@@ -939,8 +939,8 @@ wait:
939 res->lockname.name, blocked); 939 res->lockname.name, blocked);
940 if (++tries > 20) { 940 if (++tries > 20) {
941 mlog(ML_ERROR, "%s:%.*s: spinning on " 941 mlog(ML_ERROR, "%s:%.*s: spinning on "
942 "dlm_wait_for_lock_mastery, blocked=%d\n", 942 "dlm_wait_for_lock_mastery, blocked=%d\n",
943 dlm->name, res->lockname.len, 943 dlm->name, res->lockname.len,
944 res->lockname.name, blocked); 944 res->lockname.name, blocked);
945 dlm_print_one_lock_resource(res); 945 dlm_print_one_lock_resource(res);
946 dlm_print_one_mle(mle); 946 dlm_print_one_mle(mle);
@@ -1029,7 +1029,7 @@ recheck:
1029 ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked); 1029 ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked);
1030 b = (mle->type == DLM_MLE_BLOCK); 1030 b = (mle->type == DLM_MLE_BLOCK);
1031 if ((*blocked && !b) || (!*blocked && b)) { 1031 if ((*blocked && !b) || (!*blocked && b)) {
1032 mlog(0, "%s:%.*s: status change: old=%d new=%d\n", 1032 mlog(0, "%s:%.*s: status change: old=%d new=%d\n",
1033 dlm->name, res->lockname.len, res->lockname.name, 1033 dlm->name, res->lockname.len, res->lockname.name,
1034 *blocked, b); 1034 *blocked, b);
1035 *blocked = b; 1035 *blocked = b;
@@ -1602,7 +1602,7 @@ send_response:
1602 } 1602 }
1603 mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", 1603 mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
1604 dlm->node_num, res->lockname.len, res->lockname.name); 1604 dlm->node_num, res->lockname.len, res->lockname.name);
1605 ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, 1605 ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
1606 DLM_ASSERT_MASTER_MLE_CLEANUP); 1606 DLM_ASSERT_MASTER_MLE_CLEANUP);
1607 if (ret < 0) { 1607 if (ret < 0) {
1608 mlog(ML_ERROR, "failed to dispatch assert master work\n"); 1608 mlog(ML_ERROR, "failed to dispatch assert master work\n");
@@ -1701,7 +1701,7 @@ again:
1701 1701
1702 if (r & DLM_ASSERT_RESPONSE_REASSERT) { 1702 if (r & DLM_ASSERT_RESPONSE_REASSERT) {
1703 mlog(0, "%.*s: node %u create mles on other " 1703 mlog(0, "%.*s: node %u create mles on other "
1704 "nodes and requests a re-assert\n", 1704 "nodes and requests a re-assert\n",
1705 namelen, lockname, to); 1705 namelen, lockname, to);
1706 reassert = 1; 1706 reassert = 1;
1707 } 1707 }
@@ -1812,7 +1812,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
1812 spin_unlock(&dlm->master_lock); 1812 spin_unlock(&dlm->master_lock);
1813 spin_unlock(&dlm->spinlock); 1813 spin_unlock(&dlm->spinlock);
1814 goto done; 1814 goto done;
1815 } 1815 }
1816 } 1816 }
1817 } 1817 }
1818 spin_unlock(&dlm->master_lock); 1818 spin_unlock(&dlm->master_lock);
@@ -1883,7 +1883,7 @@ ok:
1883 int extra_ref = 0; 1883 int extra_ref = 0;
1884 int nn = -1; 1884 int nn = -1;
1885 int rr, err = 0; 1885 int rr, err = 0;
1886 1886
1887 spin_lock(&mle->spinlock); 1887 spin_lock(&mle->spinlock);
1888 if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION) 1888 if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
1889 extra_ref = 1; 1889 extra_ref = 1;
@@ -1891,7 +1891,7 @@ ok:
1891 /* MASTER mle: if any bits set in the response map 1891 /* MASTER mle: if any bits set in the response map
1892 * then the calling node needs to re-assert to clear 1892 * then the calling node needs to re-assert to clear
1893 * up nodes that this node contacted */ 1893 * up nodes that this node contacted */
1894 while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, 1894 while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES,
1895 nn+1)) < O2NM_MAX_NODES) { 1895 nn+1)) < O2NM_MAX_NODES) {
1896 if (nn != dlm->node_num && nn != assert->node_idx) 1896 if (nn != dlm->node_num && nn != assert->node_idx)
1897 master_request = 1; 1897 master_request = 1;
@@ -2002,7 +2002,7 @@ kill:
2002 __dlm_print_one_lock_resource(res); 2002 __dlm_print_one_lock_resource(res);
2003 spin_unlock(&res->spinlock); 2003 spin_unlock(&res->spinlock);
2004 spin_unlock(&dlm->spinlock); 2004 spin_unlock(&dlm->spinlock);
2005 *ret_data = (void *)res; 2005 *ret_data = (void *)res;
2006 dlm_put(dlm); 2006 dlm_put(dlm);
2007 return -EINVAL; 2007 return -EINVAL;
2008} 2008}
@@ -2040,10 +2040,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
2040 item->u.am.request_from = request_from; 2040 item->u.am.request_from = request_from;
2041 item->u.am.flags = flags; 2041 item->u.am.flags = flags;
2042 2042
2043 if (ignore_higher) 2043 if (ignore_higher)
2044 mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, 2044 mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len,
2045 res->lockname.name); 2045 res->lockname.name);
2046 2046
2047 spin_lock(&dlm->work_lock); 2047 spin_lock(&dlm->work_lock);
2048 list_add_tail(&item->list, &dlm->work_list); 2048 list_add_tail(&item->list, &dlm->work_list);
2049 spin_unlock(&dlm->work_lock); 2049 spin_unlock(&dlm->work_lock);
@@ -2133,7 +2133,7 @@ put:
2133 * think that $RECOVERY is currently mastered by a dead node. If so, 2133 * think that $RECOVERY is currently mastered by a dead node. If so,
2134 * we wait a short time to allow that node to get notified by its own 2134 * we wait a short time to allow that node to get notified by its own
2135 * heartbeat stack, then check again. All $RECOVERY lock resources 2135 * heartbeat stack, then check again. All $RECOVERY lock resources
2136 * mastered by dead nodes are purged when the hearbeat callback is 2136 * mastered by dead nodes are purged when the hearbeat callback is
2137 * fired, so we can know for sure that it is safe to continue once 2137 * fired, so we can know for sure that it is safe to continue once
2138 * the node returns a live node or no node. */ 2138 * the node returns a live node or no node. */
2139static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, 2139static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
@@ -2174,7 +2174,7 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
2174 ret = -EAGAIN; 2174 ret = -EAGAIN;
2175 } 2175 }
2176 spin_unlock(&dlm->spinlock); 2176 spin_unlock(&dlm->spinlock);
2177 mlog(0, "%s: reco lock master is %u\n", dlm->name, 2177 mlog(0, "%s: reco lock master is %u\n", dlm->name,
2178 master); 2178 master);
2179 break; 2179 break;
2180 } 2180 }
@@ -2602,7 +2602,7 @@ fail:
2602 2602
2603 mlog(0, "%s:%.*s: timed out during migration\n", 2603 mlog(0, "%s:%.*s: timed out during migration\n",
2604 dlm->name, res->lockname.len, res->lockname.name); 2604 dlm->name, res->lockname.len, res->lockname.name);
2605 /* avoid hang during shutdown when migrating lockres 2605 /* avoid hang during shutdown when migrating lockres
2606 * to a node which also goes down */ 2606 * to a node which also goes down */
2607 if (dlm_is_node_dead(dlm, target)) { 2607 if (dlm_is_node_dead(dlm, target)) {
2608 mlog(0, "%s:%.*s: expected migration " 2608 mlog(0, "%s:%.*s: expected migration "
@@ -2738,7 +2738,7 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm,
2738 can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING); 2738 can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING);
2739 spin_unlock(&res->spinlock); 2739 spin_unlock(&res->spinlock);
2740 2740
2741 /* target has died, so make the caller break out of the 2741 /* target has died, so make the caller break out of the
2742 * wait_event, but caller must recheck the domain_map */ 2742 * wait_event, but caller must recheck the domain_map */
2743 spin_lock(&dlm->spinlock); 2743 spin_lock(&dlm->spinlock);
2744 if (!test_bit(mig_target, dlm->domain_map)) 2744 if (!test_bit(mig_target, dlm->domain_map))
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 2f9e4e19a4f2..344bcf90cbf4 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1050,7 +1050,7 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
1050 if (lock->ml.node == dead_node) { 1050 if (lock->ml.node == dead_node) {
1051 mlog(0, "AHA! there was " 1051 mlog(0, "AHA! there was "
1052 "a $RECOVERY lock for dead " 1052 "a $RECOVERY lock for dead "
1053 "node %u (%s)!\n", 1053 "node %u (%s)!\n",
1054 dead_node, dlm->name); 1054 dead_node, dlm->name);
1055 list_del_init(&lock->list); 1055 list_del_init(&lock->list);
1056 dlm_lock_put(lock); 1056 dlm_lock_put(lock);
@@ -1164,6 +1164,39 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
1164 mres->master = master; 1164 mres->master = master;
1165} 1165}
1166 1166
1167static void dlm_prepare_lvb_for_migration(struct dlm_lock *lock,
1168 struct dlm_migratable_lockres *mres,
1169 int queue)
1170{
1171 if (!lock->lksb)
1172 return;
1173
1174 /* Ignore lvb in all locks in the blocked list */
1175 if (queue == DLM_BLOCKED_LIST)
1176 return;
1177
1178 /* Only consider lvbs in locks with granted EX or PR lock levels */
1179 if (lock->ml.type != LKM_EXMODE && lock->ml.type != LKM_PRMODE)
1180 return;
1181
1182 if (dlm_lvb_is_empty(mres->lvb)) {
1183 memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
1184 return;
1185 }
1186
1187 /* Ensure the lvb copied for migration matches in other valid locks */
1188 if (!memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))
1189 return;
1190
1191 mlog(ML_ERROR, "Mismatched lvb in lock cookie=%u:%llu, name=%.*s, "
1192 "node=%u\n",
1193 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
1194 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
1195 lock->lockres->lockname.len, lock->lockres->lockname.name,
1196 lock->ml.node);
1197 dlm_print_one_lock_resource(lock->lockres);
1198 BUG();
1199}
1167 1200
1168/* returns 1 if this lock fills the network structure, 1201/* returns 1 if this lock fills the network structure,
1169 * 0 otherwise */ 1202 * 0 otherwise */
@@ -1181,20 +1214,7 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
1181 ml->list = queue; 1214 ml->list = queue;
1182 if (lock->lksb) { 1215 if (lock->lksb) {
1183 ml->flags = lock->lksb->flags; 1216 ml->flags = lock->lksb->flags;
1184 /* send our current lvb */ 1217 dlm_prepare_lvb_for_migration(lock, mres, queue);
1185 if (ml->type == LKM_EXMODE ||
1186 ml->type == LKM_PRMODE) {
1187 /* if it is already set, this had better be a PR
1188 * and it has to match */
1189 if (!dlm_lvb_is_empty(mres->lvb) &&
1190 (ml->type == LKM_EXMODE ||
1191 memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
1192 mlog(ML_ERROR, "mismatched lvbs!\n");
1193 dlm_print_one_lock_resource(lock->lockres);
1194 BUG();
1195 }
1196 memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
1197 }
1198 } 1218 }
1199 ml->node = lock->ml.node; 1219 ml->node = lock->ml.node;
1200 mres->num_locks++; 1220 mres->num_locks++;
@@ -1730,6 +1750,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1730 struct dlm_lock *lock = NULL; 1750 struct dlm_lock *lock = NULL;
1731 u8 from = O2NM_MAX_NODES; 1751 u8 from = O2NM_MAX_NODES;
1732 unsigned int added = 0; 1752 unsigned int added = 0;
1753 __be64 c;
1733 1754
1734 mlog(0, "running %d locks for this lockres\n", mres->num_locks); 1755 mlog(0, "running %d locks for this lockres\n", mres->num_locks);
1735 for (i=0; i<mres->num_locks; i++) { 1756 for (i=0; i<mres->num_locks; i++) {
@@ -1777,19 +1798,48 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1777 /* lock is always created locally first, and 1798 /* lock is always created locally first, and
1778 * destroyed locally last. it must be on the list */ 1799 * destroyed locally last. it must be on the list */
1779 if (!lock) { 1800 if (!lock) {
1780 __be64 c = ml->cookie; 1801 c = ml->cookie;
1781 mlog(ML_ERROR, "could not find local lock " 1802 mlog(ML_ERROR, "Could not find local lock "
1782 "with cookie %u:%llu!\n", 1803 "with cookie %u:%llu, node %u, "
1804 "list %u, flags 0x%x, type %d, "
1805 "conv %d, highest blocked %d\n",
1783 dlm_get_lock_cookie_node(be64_to_cpu(c)), 1806 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1784 dlm_get_lock_cookie_seq(be64_to_cpu(c))); 1807 dlm_get_lock_cookie_seq(be64_to_cpu(c)),
1808 ml->node, ml->list, ml->flags, ml->type,
1809 ml->convert_type, ml->highest_blocked);
1810 __dlm_print_one_lock_resource(res);
1811 BUG();
1812 }
1813
1814 if (lock->ml.node != ml->node) {
1815 c = lock->ml.cookie;
1816 mlog(ML_ERROR, "Mismatched node# in lock "
1817 "cookie %u:%llu, name %.*s, node %u\n",
1818 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1819 dlm_get_lock_cookie_seq(be64_to_cpu(c)),
1820 res->lockname.len, res->lockname.name,
1821 lock->ml.node);
1822 c = ml->cookie;
1823 mlog(ML_ERROR, "Migrate lock cookie %u:%llu, "
1824 "node %u, list %u, flags 0x%x, type %d, "
1825 "conv %d, highest blocked %d\n",
1826 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1827 dlm_get_lock_cookie_seq(be64_to_cpu(c)),
1828 ml->node, ml->list, ml->flags, ml->type,
1829 ml->convert_type, ml->highest_blocked);
1785 __dlm_print_one_lock_resource(res); 1830 __dlm_print_one_lock_resource(res);
1786 BUG(); 1831 BUG();
1787 } 1832 }
1788 BUG_ON(lock->ml.node != ml->node);
1789 1833
1790 if (tmpq != queue) { 1834 if (tmpq != queue) {
1791 mlog(0, "lock was on %u instead of %u for %.*s\n", 1835 c = ml->cookie;
1792 j, ml->list, res->lockname.len, res->lockname.name); 1836 mlog(0, "Lock cookie %u:%llu was on list %u "
1837 "instead of list %u for %.*s\n",
1838 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1839 dlm_get_lock_cookie_seq(be64_to_cpu(c)),
1840 j, ml->list, res->lockname.len,
1841 res->lockname.name);
1842 __dlm_print_one_lock_resource(res);
1793 spin_unlock(&res->spinlock); 1843 spin_unlock(&res->spinlock);
1794 continue; 1844 continue;
1795 } 1845 }
@@ -1839,7 +1889,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1839 * the lvb. */ 1889 * the lvb. */
1840 memcpy(res->lvb, mres->lvb, DLM_LVB_LEN); 1890 memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
1841 } else { 1891 } else {
1842 /* otherwise, the node is sending its 1892 /* otherwise, the node is sending its
1843 * most recent valid lvb info */ 1893 * most recent valid lvb info */
1844 BUG_ON(ml->type != LKM_EXMODE && 1894 BUG_ON(ml->type != LKM_EXMODE &&
1845 ml->type != LKM_PRMODE); 1895 ml->type != LKM_PRMODE);
@@ -1886,7 +1936,7 @@ skip_lvb:
1886 spin_lock(&res->spinlock); 1936 spin_lock(&res->spinlock);
1887 list_for_each_entry(lock, queue, list) { 1937 list_for_each_entry(lock, queue, list) {
1888 if (lock->ml.cookie == ml->cookie) { 1938 if (lock->ml.cookie == ml->cookie) {
1889 __be64 c = lock->ml.cookie; 1939 c = lock->ml.cookie;
1890 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " 1940 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
1891 "exists on this lockres!\n", dlm->name, 1941 "exists on this lockres!\n", dlm->name,
1892 res->lockname.len, res->lockname.name, 1942 res->lockname.len, res->lockname.name,
@@ -2114,7 +2164,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
2114 assert_spin_locked(&res->spinlock); 2164 assert_spin_locked(&res->spinlock);
2115 2165
2116 if (res->owner == dlm->node_num) 2166 if (res->owner == dlm->node_num)
2117 /* if this node owned the lockres, and if the dead node 2167 /* if this node owned the lockres, and if the dead node
2118 * had an EX when he died, blank out the lvb */ 2168 * had an EX when he died, blank out the lvb */
2119 search_node = dead_node; 2169 search_node = dead_node;
2120 else { 2170 else {
@@ -2152,7 +2202,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2152 2202
2153 /* this node is the lockres master: 2203 /* this node is the lockres master:
2154 * 1) remove any stale locks for the dead node 2204 * 1) remove any stale locks for the dead node
2155 * 2) if the dead node had an EX when he died, blank out the lvb 2205 * 2) if the dead node had an EX when he died, blank out the lvb
2156 */ 2206 */
2157 assert_spin_locked(&dlm->spinlock); 2207 assert_spin_locked(&dlm->spinlock);
2158 assert_spin_locked(&res->spinlock); 2208 assert_spin_locked(&res->spinlock);
@@ -2193,7 +2243,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2193 mlog(0, "%s:%.*s: freed %u locks for dead node %u, " 2243 mlog(0, "%s:%.*s: freed %u locks for dead node %u, "
2194 "dropping ref from lockres\n", dlm->name, 2244 "dropping ref from lockres\n", dlm->name,
2195 res->lockname.len, res->lockname.name, freed, dead_node); 2245 res->lockname.len, res->lockname.name, freed, dead_node);
2196 BUG_ON(!test_bit(dead_node, res->refmap)); 2246 if(!test_bit(dead_node, res->refmap)) {
2247 mlog(ML_ERROR, "%s:%.*s: freed %u locks for dead node %u, "
2248 "but ref was not set\n", dlm->name,
2249 res->lockname.len, res->lockname.name, freed, dead_node);
2250 __dlm_print_one_lock_resource(res);
2251 }
2197 dlm_lockres_clear_refmap_bit(dead_node, res); 2252 dlm_lockres_clear_refmap_bit(dead_node, res);
2198 } else if (test_bit(dead_node, res->refmap)) { 2253 } else if (test_bit(dead_node, res->refmap)) {
2199 mlog(0, "%s:%.*s: dead node %u had a ref, but had " 2254 mlog(0, "%s:%.*s: dead node %u had a ref, but had "
@@ -2260,7 +2315,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2260 } 2315 }
2261 spin_unlock(&res->spinlock); 2316 spin_unlock(&res->spinlock);
2262 continue; 2317 continue;
2263 } 2318 }
2264 spin_lock(&res->spinlock); 2319 spin_lock(&res->spinlock);
2265 /* zero the lvb if necessary */ 2320 /* zero the lvb if necessary */
2266 dlm_revalidate_lvb(dlm, res, dead_node); 2321 dlm_revalidate_lvb(dlm, res, dead_node);
@@ -2411,7 +2466,7 @@ static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st)
2411 * this function on each node racing to become the recovery 2466 * this function on each node racing to become the recovery
2412 * master will not stop attempting this until either: 2467 * master will not stop attempting this until either:
2413 * a) this node gets the EX (and becomes the recovery master), 2468 * a) this node gets the EX (and becomes the recovery master),
2414 * or b) dlm->reco.new_master gets set to some nodenum 2469 * or b) dlm->reco.new_master gets set to some nodenum
2415 * != O2NM_INVALID_NODE_NUM (another node will do the reco). 2470 * != O2NM_INVALID_NODE_NUM (another node will do the reco).
2416 * so each time a recovery master is needed, the entire cluster 2471 * so each time a recovery master is needed, the entire cluster
2417 * will sync at this point. if the new master dies, that will 2472 * will sync at this point. if the new master dies, that will
@@ -2424,7 +2479,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
2424 2479
2425 mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n", 2480 mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n",
2426 dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num); 2481 dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num);
2427again: 2482again:
2428 memset(&lksb, 0, sizeof(lksb)); 2483 memset(&lksb, 0, sizeof(lksb));
2429 2484
2430 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, 2485 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
@@ -2437,8 +2492,8 @@ again:
2437 if (ret == DLM_NORMAL) { 2492 if (ret == DLM_NORMAL) {
2438 mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n", 2493 mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n",
2439 dlm->name, dlm->node_num); 2494 dlm->name, dlm->node_num);
2440 2495
2441 /* got the EX lock. check to see if another node 2496 /* got the EX lock. check to see if another node
2442 * just became the reco master */ 2497 * just became the reco master */
2443 if (dlm_reco_master_ready(dlm)) { 2498 if (dlm_reco_master_ready(dlm)) {
2444 mlog(0, "%s: got reco EX lock, but %u will " 2499 mlog(0, "%s: got reco EX lock, but %u will "
@@ -2451,12 +2506,12 @@ again:
2451 /* see if recovery was already finished elsewhere */ 2506 /* see if recovery was already finished elsewhere */
2452 spin_lock(&dlm->spinlock); 2507 spin_lock(&dlm->spinlock);
2453 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { 2508 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
2454 status = -EINVAL; 2509 status = -EINVAL;
2455 mlog(0, "%s: got reco EX lock, but " 2510 mlog(0, "%s: got reco EX lock, but "
2456 "node got recovered already\n", dlm->name); 2511 "node got recovered already\n", dlm->name);
2457 if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { 2512 if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
2458 mlog(ML_ERROR, "%s: new master is %u " 2513 mlog(ML_ERROR, "%s: new master is %u "
2459 "but no dead node!\n", 2514 "but no dead node!\n",
2460 dlm->name, dlm->reco.new_master); 2515 dlm->name, dlm->reco.new_master);
2461 BUG(); 2516 BUG();
2462 } 2517 }
@@ -2468,7 +2523,7 @@ again:
2468 * set the master and send the messages to begin recovery */ 2523 * set the master and send the messages to begin recovery */
2469 if (!status) { 2524 if (!status) {
2470 mlog(0, "%s: dead=%u, this=%u, sending " 2525 mlog(0, "%s: dead=%u, this=%u, sending "
2471 "begin_reco now\n", dlm->name, 2526 "begin_reco now\n", dlm->name,
2472 dlm->reco.dead_node, dlm->node_num); 2527 dlm->reco.dead_node, dlm->node_num);
2473 status = dlm_send_begin_reco_message(dlm, 2528 status = dlm_send_begin_reco_message(dlm,
2474 dlm->reco.dead_node); 2529 dlm->reco.dead_node);
@@ -2501,7 +2556,7 @@ again:
2501 mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n", 2556 mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n",
2502 dlm->name, dlm->node_num); 2557 dlm->name, dlm->node_num);
2503 /* another node is master. wait on 2558 /* another node is master. wait on
2504 * reco.new_master != O2NM_INVALID_NODE_NUM 2559 * reco.new_master != O2NM_INVALID_NODE_NUM
2505 * for at most one second */ 2560 * for at most one second */
2506 wait_event_timeout(dlm->dlm_reco_thread_wq, 2561 wait_event_timeout(dlm->dlm_reco_thread_wq,
2507 dlm_reco_master_ready(dlm), 2562 dlm_reco_master_ready(dlm),
@@ -2589,7 +2644,13 @@ retry:
2589 "begin reco msg (%d)\n", dlm->name, nodenum, ret); 2644 "begin reco msg (%d)\n", dlm->name, nodenum, ret);
2590 ret = 0; 2645 ret = 0;
2591 } 2646 }
2592 if (ret == -EAGAIN) { 2647
2648 /*
2649 * Prior to commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8,
2650 * dlm_begin_reco_handler() returned EAGAIN and not -EAGAIN.
2651 * We are handling both for compatibility reasons.
2652 */
2653 if (ret == -EAGAIN || ret == EAGAIN) {
2593 mlog(0, "%s: trying to start recovery of node " 2654 mlog(0, "%s: trying to start recovery of node "
2594 "%u, but node %u is waiting for last recovery " 2655 "%u, but node %u is waiting for last recovery "
2595 "to complete, backoff for a bit\n", dlm->name, 2656 "to complete, backoff for a bit\n", dlm->name,
@@ -2599,7 +2660,7 @@ retry:
2599 } 2660 }
2600 if (ret < 0) { 2661 if (ret < 0) {
2601 struct dlm_lock_resource *res; 2662 struct dlm_lock_resource *res;
2602 /* this is now a serious problem, possibly ENOMEM 2663 /* this is now a serious problem, possibly ENOMEM
2603 * in the network stack. must retry */ 2664 * in the network stack. must retry */
2604 mlog_errno(ret); 2665 mlog_errno(ret);
2605 mlog(ML_ERROR, "begin reco of dlm %s to node %u " 2666 mlog(ML_ERROR, "begin reco of dlm %s to node %u "
@@ -2612,7 +2673,7 @@ retry:
2612 } else { 2673 } else {
2613 mlog(ML_ERROR, "recovery lock not found\n"); 2674 mlog(ML_ERROR, "recovery lock not found\n");
2614 } 2675 }
2615 /* sleep for a bit in hopes that we can avoid 2676 /* sleep for a bit in hopes that we can avoid
2616 * another ENOMEM */ 2677 * another ENOMEM */
2617 msleep(100); 2678 msleep(100);
2618 goto retry; 2679 goto retry;
@@ -2664,7 +2725,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2664 } 2725 }
2665 if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) { 2726 if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) {
2666 mlog(ML_NOTICE, "%s: dead_node previously set to %u, " 2727 mlog(ML_NOTICE, "%s: dead_node previously set to %u, "
2667 "node %u changing it to %u\n", dlm->name, 2728 "node %u changing it to %u\n", dlm->name,
2668 dlm->reco.dead_node, br->node_idx, br->dead_node); 2729 dlm->reco.dead_node, br->node_idx, br->dead_node);
2669 } 2730 }
2670 dlm_set_reco_master(dlm, br->node_idx); 2731 dlm_set_reco_master(dlm, br->node_idx);
@@ -2730,8 +2791,8 @@ stage2:
2730 if (ret < 0) { 2791 if (ret < 0) {
2731 mlog_errno(ret); 2792 mlog_errno(ret);
2732 if (dlm_is_host_down(ret)) { 2793 if (dlm_is_host_down(ret)) {
2733 /* this has no effect on this recovery 2794 /* this has no effect on this recovery
2734 * session, so set the status to zero to 2795 * session, so set the status to zero to
2735 * finish out the last recovery */ 2796 * finish out the last recovery */
2736 mlog(ML_ERROR, "node %u went down after this " 2797 mlog(ML_ERROR, "node %u went down after this "
2737 "node finished recovery.\n", nodenum); 2798 "node finished recovery.\n", nodenum);
@@ -2768,7 +2829,7 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2768 mlog(0, "%s: node %u finalizing recovery stage%d of " 2829 mlog(0, "%s: node %u finalizing recovery stage%d of "
2769 "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage, 2830 "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage,
2770 fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master); 2831 fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master);
2771 2832
2772 spin_lock(&dlm->spinlock); 2833 spin_lock(&dlm->spinlock);
2773 2834
2774 if (dlm->reco.new_master != fr->node_idx) { 2835 if (dlm->reco.new_master != fr->node_idx) {
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 00f53b2aea76..49e29ecd0201 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -190,8 +190,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
190 actions &= ~(DLM_UNLOCK_REMOVE_LOCK| 190 actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
191 DLM_UNLOCK_REGRANT_LOCK| 191 DLM_UNLOCK_REGRANT_LOCK|
192 DLM_UNLOCK_CLEAR_CONVERT_TYPE); 192 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
193 } else if (status == DLM_RECOVERING || 193 } else if (status == DLM_RECOVERING ||
194 status == DLM_MIGRATING || 194 status == DLM_MIGRATING ||
195 status == DLM_FORWARD) { 195 status == DLM_FORWARD) {
196 /* must clear the actions because this unlock 196 /* must clear the actions because this unlock
197 * is about to be retried. cannot free or do 197 * is about to be retried. cannot free or do
@@ -661,14 +661,14 @@ retry:
661 if (call_ast) { 661 if (call_ast) {
662 mlog(0, "calling unlockast(%p, %d)\n", data, status); 662 mlog(0, "calling unlockast(%p, %d)\n", data, status);
663 if (is_master) { 663 if (is_master) {
664 /* it is possible that there is one last bast 664 /* it is possible that there is one last bast
665 * pending. make sure it is flushed, then 665 * pending. make sure it is flushed, then
666 * call the unlockast. 666 * call the unlockast.
667 * not an issue if this is a mastered remotely, 667 * not an issue if this is a mastered remotely,
668 * since this lock has been removed from the 668 * since this lock has been removed from the
669 * lockres queues and cannot be found. */ 669 * lockres queues and cannot be found. */
670 dlm_kick_thread(dlm, NULL); 670 dlm_kick_thread(dlm, NULL);
671 wait_event(dlm->ast_wq, 671 wait_event(dlm->ast_wq,
672 dlm_lock_basts_flushed(dlm, lock)); 672 dlm_lock_basts_flushed(dlm, lock));
673 } 673 }
674 (*unlockast)(data, status); 674 (*unlockast)(data, status);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index c5e4a49e3a12..e044019cb3b1 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -875,6 +875,14 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
875 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 875 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
876 876
877 lockres->l_level = lockres->l_requested; 877 lockres->l_level = lockres->l_requested;
878
879 /*
880 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
881 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
882 * downconverting the lock before the upconvert has fully completed.
883 */
884 lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
885
878 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 886 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
879 887
880 mlog_exit_void(); 888 mlog_exit_void();
@@ -907,8 +915,6 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
907 915
908 assert_spin_locked(&lockres->l_lock); 916 assert_spin_locked(&lockres->l_lock);
909 917
910 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
911
912 if (level > lockres->l_blocking) { 918 if (level > lockres->l_blocking) {
913 /* only schedule a downconvert if we haven't already scheduled 919 /* only schedule a downconvert if we haven't already scheduled
914 * one that goes low enough to satisfy the level we're 920 * one that goes low enough to satisfy the level we're
@@ -921,6 +927,9 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
921 lockres->l_blocking = level; 927 lockres->l_blocking = level;
922 } 928 }
923 929
930 if (needs_downconvert)
931 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
932
924 mlog_exit(needs_downconvert); 933 mlog_exit(needs_downconvert);
925 return needs_downconvert; 934 return needs_downconvert;
926} 935}
@@ -1133,6 +1142,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1133 mlog_entry_void(); 1142 mlog_entry_void();
1134 spin_lock_irqsave(&lockres->l_lock, flags); 1143 spin_lock_irqsave(&lockres->l_lock, flags);
1135 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1144 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1145 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1136 if (convert) 1146 if (convert)
1137 lockres->l_action = OCFS2_AST_INVALID; 1147 lockres->l_action = OCFS2_AST_INVALID;
1138 else 1148 else
@@ -1323,13 +1333,13 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1323again: 1333again:
1324 wait = 0; 1334 wait = 0;
1325 1335
1336 spin_lock_irqsave(&lockres->l_lock, flags);
1337
1326 if (catch_signals && signal_pending(current)) { 1338 if (catch_signals && signal_pending(current)) {
1327 ret = -ERESTARTSYS; 1339 ret = -ERESTARTSYS;
1328 goto out; 1340 goto unlock;
1329 } 1341 }
1330 1342
1331 spin_lock_irqsave(&lockres->l_lock, flags);
1332
1333 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1343 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1334 "Cluster lock called on freeing lockres %s! flags " 1344 "Cluster lock called on freeing lockres %s! flags "
1335 "0x%lx\n", lockres->l_name, lockres->l_flags); 1345 "0x%lx\n", lockres->l_name, lockres->l_flags);
@@ -1346,6 +1356,25 @@ again:
1346 goto unlock; 1356 goto unlock;
1347 } 1357 }
1348 1358
1359 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1360 /*
1361 * We've upconverted. If the lock now has a level we can
1362 * work with, we take it. If, however, the lock is not at the
1363 * required level, we go thru the full cycle. One way this could
1364 * happen is if a process requesting an upconvert to PR is
1365 * closely followed by another requesting upconvert to an EX.
1366 * If the process requesting EX lands here, we want it to
1367 * continue attempting to upconvert and let the process
1368 * requesting PR take the lock.
1369 * If multiple processes request upconvert to PR, the first one
1370 * here will take the lock. The others will have to go thru the
1371 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1372 * downconvert request.
1373 */
1374 if (level <= lockres->l_level)
1375 goto update_holders;
1376 }
1377
1349 if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1378 if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1350 !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1379 !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1351 /* is the lock is currently blocked on behalf of 1380 /* is the lock is currently blocked on behalf of
@@ -1416,11 +1445,14 @@ again:
1416 goto again; 1445 goto again;
1417 } 1446 }
1418 1447
1448update_holders:
1419 /* Ok, if we get here then we're good to go. */ 1449 /* Ok, if we get here then we're good to go. */
1420 ocfs2_inc_holders(lockres, level); 1450 ocfs2_inc_holders(lockres, level);
1421 1451
1422 ret = 0; 1452 ret = 0;
1423unlock: 1453unlock:
1454 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1455
1424 spin_unlock_irqrestore(&lockres->l_lock, flags); 1456 spin_unlock_irqrestore(&lockres->l_lock, flags);
1425out: 1457out:
1426 /* 1458 /*
@@ -3155,7 +3187,7 @@ out:
3155/* Mark the lockres as being dropped. It will no longer be 3187/* Mark the lockres as being dropped. It will no longer be
3156 * queued if blocking, but we still may have to wait on it 3188 * queued if blocking, but we still may have to wait on it
3157 * being dequeued from the downconvert thread before we can consider 3189 * being dequeued from the downconvert thread before we can consider
3158 * it safe to drop. 3190 * it safe to drop.
3159 * 3191 *
3160 * You can *not* attempt to call cluster_lock on this lockres anymore. */ 3192 * You can *not* attempt to call cluster_lock on this lockres anymore. */
3161void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 3193void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
@@ -3352,6 +3384,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3352 unsigned long flags; 3384 unsigned long flags;
3353 int blocking; 3385 int blocking;
3354 int new_level; 3386 int new_level;
3387 int level;
3355 int ret = 0; 3388 int ret = 0;
3356 int set_lvb = 0; 3389 int set_lvb = 0;
3357 unsigned int gen; 3390 unsigned int gen;
@@ -3360,9 +3393,17 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3360 3393
3361 spin_lock_irqsave(&lockres->l_lock, flags); 3394 spin_lock_irqsave(&lockres->l_lock, flags);
3362 3395
3363 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
3364
3365recheck: 3396recheck:
3397 /*
3398 * Is it still blocking? If not, we have no more work to do.
3399 */
3400 if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
3401 BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
3402 spin_unlock_irqrestore(&lockres->l_lock, flags);
3403 ret = 0;
3404 goto leave;
3405 }
3406
3366 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3407 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3367 /* XXX 3408 /* XXX
3368 * This is a *big* race. The OCFS2_LOCK_PENDING flag 3409 * This is a *big* race. The OCFS2_LOCK_PENDING flag
@@ -3401,6 +3442,31 @@ recheck:
3401 goto leave; 3442 goto leave;
3402 } 3443 }
3403 3444
3445 /*
3446 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3447 * set when the ast is received for an upconvert just before the
3448 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3449 * on the heels of the ast, we want to delay the downconvert just
3450 * enough to allow the up requestor to do its task. Because this
3451 * lock is in the blocked queue, the lock will be downconverted
3452 * as soon as the requestor is done with the lock.
3453 */
3454 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3455 goto leave_requeue;
3456
3457 /*
3458 * How can we block and yet be at NL? We were trying to upconvert
3459 * from NL and got canceled. The code comes back here, and now
3460 * we notice and clear BLOCKING.
3461 */
3462 if (lockres->l_level == DLM_LOCK_NL) {
3463 BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
3464 lockres->l_blocking = DLM_LOCK_NL;
3465 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
3466 spin_unlock_irqrestore(&lockres->l_lock, flags);
3467 goto leave;
3468 }
3469
3404 /* if we're blocking an exclusive and we have *any* holders, 3470 /* if we're blocking an exclusive and we have *any* holders,
3405 * then requeue. */ 3471 * then requeue. */
3406 if ((lockres->l_blocking == DLM_LOCK_EX) 3472 if ((lockres->l_blocking == DLM_LOCK_EX)
@@ -3438,6 +3504,7 @@ recheck:
3438 * may sleep, so we save off a copy of what we're blocking as 3504 * may sleep, so we save off a copy of what we're blocking as
3439 * it may change while we're not holding the spin lock. */ 3505 * it may change while we're not holding the spin lock. */
3440 blocking = lockres->l_blocking; 3506 blocking = lockres->l_blocking;
3507 level = lockres->l_level;
3441 spin_unlock_irqrestore(&lockres->l_lock, flags); 3508 spin_unlock_irqrestore(&lockres->l_lock, flags);
3442 3509
3443 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3510 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
@@ -3446,7 +3513,7 @@ recheck:
3446 goto leave; 3513 goto leave;
3447 3514
3448 spin_lock_irqsave(&lockres->l_lock, flags); 3515 spin_lock_irqsave(&lockres->l_lock, flags);
3449 if (blocking != lockres->l_blocking) { 3516 if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
3450 /* If this changed underneath us, then we can't drop 3517 /* If this changed underneath us, then we can't drop
3451 * it just yet. */ 3518 * it just yet. */
3452 goto recheck; 3519 goto recheck;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 15713cbb865c..19ad145d2af3 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -239,7 +239,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
239 mlog(0, "Encoding parent: blkno: %llu, generation: %u\n", 239 mlog(0, "Encoding parent: blkno: %llu, generation: %u\n",
240 (unsigned long long)blkno, generation); 240 (unsigned long long)blkno, generation);
241 } 241 }
242 242
243 *max_len = len; 243 *max_len = len;
244 244
245bail: 245bail:
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index d35a27f4523e..5328529e7fd2 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -192,7 +192,7 @@ static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
192 emi->ei_clusters += ins->ei_clusters; 192 emi->ei_clusters += ins->ei_clusters;
193 return 1; 193 return 1;
194 } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && 194 } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
195 (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys && 195 (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
196 ins->ei_flags == emi->ei_flags) { 196 ins->ei_flags == emi->ei_flags) {
197 emi->ei_phys = ins->ei_phys; 197 emi->ei_phys = ins->ei_phys;
198 emi->ei_cpos = ins->ei_cpos; 198 emi->ei_cpos = ins->ei_cpos;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 06ccf6a86d35..558ce0312421 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -749,7 +749,7 @@ static int ocfs2_write_zero_page(struct inode *inode,
749 int ret; 749 int ret;
750 750
751 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ 751 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
752 /* ugh. in prepare/commit_write, if from==to==start of block, we 752 /* ugh. in prepare/commit_write, if from==to==start of block, we
753 ** skip the prepare. make sure we never send an offset for the start 753 ** skip the prepare. make sure we never send an offset for the start
754 ** of a block 754 ** of a block
755 */ 755 */
@@ -1779,7 +1779,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1779 struct inode *inode = dentry->d_inode; 1779 struct inode *inode = dentry->d_inode;
1780 loff_t saved_pos, end; 1780 loff_t saved_pos, end;
1781 1781
1782 /* 1782 /*
1783 * We start with a read level meta lock and only jump to an ex 1783 * We start with a read level meta lock and only jump to an ex
1784 * if we need to make modifications here. 1784 * if we need to make modifications here.
1785 */ 1785 */
@@ -2013,8 +2013,8 @@ out_dio:
2013 /* buffered aio wouldn't have proper lock coverage today */ 2013 /* buffered aio wouldn't have proper lock coverage today */
2014 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2014 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2015 2015
2016 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode) || 2016 if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
2017 (file->f_flags & O_DIRECT && has_refcount)) { 2017 ((file->f_flags & O_DIRECT) && has_refcount)) {
2018 ret = filemap_fdatawrite_range(file->f_mapping, pos, 2018 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2019 pos + count - 1); 2019 pos + count - 1);
2020 if (ret < 0) 2020 if (ret < 0)
@@ -2033,7 +2033,7 @@ out_dio:
2033 pos + count - 1); 2033 pos + count - 1);
2034 } 2034 }
2035 2035
2036 /* 2036 /*
2037 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io 2037 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
2038 * function pointer which is called when o_direct io completes so that 2038 * function pointer which is called when o_direct io completes so that
2039 * it can unlock our rw lock. (it's the clustered equivalent of 2039 * it can unlock our rw lock. (it's the clustered equivalent of
@@ -2198,7 +2198,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2198 goto bail; 2198 goto bail;
2199 } 2199 }
2200 2200
2201 /* 2201 /*
2202 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 2202 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
2203 * need locks to protect pending reads from racing with truncate. 2203 * need locks to protect pending reads from racing with truncate.
2204 */ 2204 */
@@ -2220,10 +2220,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2220 * We're fine letting folks race truncates and extending 2220 * We're fine letting folks race truncates and extending
2221 * writes with read across the cluster, just like they can 2221 * writes with read across the cluster, just like they can
2222 * locally. Hence no rw_lock during read. 2222 * locally. Hence no rw_lock during read.
2223 * 2223 *
2224 * Take and drop the meta data lock to update inode fields 2224 * Take and drop the meta data lock to update inode fields
2225 * like i_size. This allows the checks down below 2225 * like i_size. This allows the checks down below
2226 * generic_file_aio_read() a chance of actually working. 2226 * generic_file_aio_read() a chance of actually working.
2227 */ 2227 */
2228 ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); 2228 ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
2229 if (ret < 0) { 2229 if (ret < 0) {
@@ -2248,7 +2248,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2248bail: 2248bail:
2249 if (have_alloc_sem) 2249 if (have_alloc_sem)
2250 up_read(&inode->i_alloc_sem); 2250 up_read(&inode->i_alloc_sem);
2251 if (rw_level != -1) 2251 if (rw_level != -1)
2252 ocfs2_rw_unlock(inode, rw_level); 2252 ocfs2_rw_unlock(inode, rw_level);
2253 mlog_exit(ret); 2253 mlog_exit(ret);
2254 2254
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 0297fb8982b8..88459bdd1ff3 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -475,7 +475,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
475 if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) { 475 if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
476 status = ocfs2_try_open_lock(inode, 0); 476 status = ocfs2_try_open_lock(inode, 0);
477 if (status) { 477 if (status) {
478 make_bad_inode(inode); 478 make_bad_inode(inode);
479 return status; 479 return status;
480 } 480 }
481 } 481 }
@@ -684,7 +684,7 @@ bail:
684 return status; 684 return status;
685} 685}
686 686
687/* 687/*
688 * Serialize with orphan dir recovery. If the process doing 688 * Serialize with orphan dir recovery. If the process doing
689 * recovery on this orphan dir does an iget() with the dir 689 * recovery on this orphan dir does an iget() with the dir
690 * i_mutex held, we'll deadlock here. Instead we detect this 690 * i_mutex held, we'll deadlock here. Instead we detect this
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 31fbb0619510..7d9d9c132cef 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/mount.h> 9#include <linux/mount.h>
10#include <linux/compat.h>
10 11
11#define MLOG_MASK_PREFIX ML_INODE 12#define MLOG_MASK_PREFIX ML_INODE
12#include <cluster/masklog.h> 13#include <cluster/masklog.h>
@@ -181,6 +182,10 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
181#ifdef CONFIG_COMPAT 182#ifdef CONFIG_COMPAT
182long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) 183long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
183{ 184{
185 bool preserve;
186 struct reflink_arguments args;
187 struct inode *inode = file->f_path.dentry->d_inode;
188
184 switch (cmd) { 189 switch (cmd) {
185 case OCFS2_IOC32_GETFLAGS: 190 case OCFS2_IOC32_GETFLAGS:
186 cmd = OCFS2_IOC_GETFLAGS; 191 cmd = OCFS2_IOC_GETFLAGS;
@@ -195,8 +200,15 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
195 case OCFS2_IOC_GROUP_EXTEND: 200 case OCFS2_IOC_GROUP_EXTEND:
196 case OCFS2_IOC_GROUP_ADD: 201 case OCFS2_IOC_GROUP_ADD:
197 case OCFS2_IOC_GROUP_ADD64: 202 case OCFS2_IOC_GROUP_ADD64:
198 case OCFS2_IOC_REFLINK:
199 break; 203 break;
204 case OCFS2_IOC_REFLINK:
205 if (copy_from_user(&args, (struct reflink_arguments *)arg,
206 sizeof(args)))
207 return -EFAULT;
208 preserve = (args.preserve != 0);
209
210 return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path),
211 compat_ptr(args.new_path), preserve);
200 default: 212 default:
201 return -ENOIOCTLCMD; 213 return -ENOIOCTLCMD;
202 } 214 }
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index bf34c491ae96..9336c60e3a36 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -2034,7 +2034,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
2034 status = -ENOENT; 2034 status = -ENOENT;
2035 mlog_errno(status); 2035 mlog_errno(status);
2036 return status; 2036 return status;
2037 } 2037 }
2038 2038
2039 mutex_lock(&orphan_dir_inode->i_mutex); 2039 mutex_lock(&orphan_dir_inode->i_mutex);
2040 status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0); 2040 status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 9362eea7424b..740f448041e2 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -136,6 +136,10 @@ enum ocfs2_unlock_action {
136#define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a 136#define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a
137 call to dlm_lock. Only 137 call to dlm_lock. Only
138 exists with BUSY set. */ 138 exists with BUSY set. */
139#define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread
140 * from downconverting
141 * before the upconvert
142 * has completed */
139 143
140struct ocfs2_lock_res_ops; 144struct ocfs2_lock_res_ops;
141 145
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 1a1a679e51b5..7638a38c32bc 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1417,9 +1417,16 @@ static inline int ocfs2_fast_symlink_chars(int blocksize)
1417 return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink); 1417 return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink);
1418} 1418}
1419 1419
1420static inline int ocfs2_max_inline_data(int blocksize) 1420static inline int ocfs2_max_inline_data_with_xattr(int blocksize,
1421 struct ocfs2_dinode *di)
1421{ 1422{
1422 return blocksize - offsetof(struct ocfs2_dinode, id2.i_data.id_data); 1423 if (di && (di->i_dyn_features & OCFS2_INLINE_XATTR_FL))
1424 return blocksize -
1425 offsetof(struct ocfs2_dinode, id2.i_data.id_data) -
1426 di->i_xattr_inline_size;
1427 else
1428 return blocksize -
1429 offsetof(struct ocfs2_dinode, id2.i_data.id_data);
1423} 1430}
1424 1431
1425static inline int ocfs2_extent_recs_per_inode(int blocksize) 1432static inline int ocfs2_extent_recs_per_inode(int blocksize)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 74db2be75dd6..8ae65c9c020c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2945,7 +2945,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2945 2945
2946 while (offset < end) { 2946 while (offset < end) {
2947 page_index = offset >> PAGE_CACHE_SHIFT; 2947 page_index = offset >> PAGE_CACHE_SHIFT;
2948 map_end = (page_index + 1) << PAGE_CACHE_SHIFT; 2948 map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
2949 if (map_end > end) 2949 if (map_end > end)
2950 map_end = end; 2950 map_end = end;
2951 2951
@@ -2957,8 +2957,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2957 2957
2958 page = grab_cache_page(mapping, page_index); 2958 page = grab_cache_page(mapping, page_index);
2959 2959
2960 /* This page can't be dirtied before we CoW it out. */ 2960 /*
2961 BUG_ON(PageDirty(page)); 2961 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
2962 * can't be dirtied before we CoW it out.
2963 */
2964 if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
2965 BUG_ON(PageDirty(page));
2962 2966
2963 if (!PageUptodate(page)) { 2967 if (!PageUptodate(page)) {
2964 ret = block_read_full_page(page, ocfs2_get_block); 2968 ret = block_read_full_page(page, ocfs2_get_block);
@@ -3170,7 +3174,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
3170 3174
3171 while (offset < end) { 3175 while (offset < end) {
3172 page_index = offset >> PAGE_CACHE_SHIFT; 3176 page_index = offset >> PAGE_CACHE_SHIFT;
3173 map_end = (page_index + 1) << PAGE_CACHE_SHIFT; 3177 map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
3174 if (map_end > end) 3178 if (map_end > end)
3175 map_end = end; 3179 map_end = end;
3176 3180
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index e49c41050264..3038c92af493 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -277,7 +277,7 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
277 u32 dlm_key; 277 u32 dlm_key;
278 struct dlm_ctxt *dlm; 278 struct dlm_ctxt *dlm;
279 struct o2dlm_private *priv; 279 struct o2dlm_private *priv;
280 struct dlm_protocol_version dlm_version; 280 struct dlm_protocol_version fs_version;
281 281
282 BUG_ON(conn == NULL); 282 BUG_ON(conn == NULL);
283 BUG_ON(o2cb_stack.sp_proto == NULL); 283 BUG_ON(o2cb_stack.sp_proto == NULL);
@@ -304,18 +304,18 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
304 /* used by the dlm code to make message headers unique, each 304 /* used by the dlm code to make message headers unique, each
305 * node in this domain must agree on this. */ 305 * node in this domain must agree on this. */
306 dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen); 306 dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen);
307 dlm_version.pv_major = conn->cc_version.pv_major; 307 fs_version.pv_major = conn->cc_version.pv_major;
308 dlm_version.pv_minor = conn->cc_version.pv_minor; 308 fs_version.pv_minor = conn->cc_version.pv_minor;
309 309
310 dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version); 310 dlm = dlm_register_domain(conn->cc_name, dlm_key, &fs_version);
311 if (IS_ERR(dlm)) { 311 if (IS_ERR(dlm)) {
312 rc = PTR_ERR(dlm); 312 rc = PTR_ERR(dlm);
313 mlog_errno(rc); 313 mlog_errno(rc);
314 goto out_free; 314 goto out_free;
315 } 315 }
316 316
317 conn->cc_version.pv_major = dlm_version.pv_major; 317 conn->cc_version.pv_major = fs_version.pv_major;
318 conn->cc_version.pv_minor = dlm_version.pv_minor; 318 conn->cc_version.pv_minor = fs_version.pv_minor;
319 conn->cc_lockspace = dlm; 319 conn->cc_lockspace = dlm;
320 320
321 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); 321 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 26069917a9f5..755cd49a5ef3 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1062,7 +1062,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1062 "file system, but write access is " 1062 "file system, but write access is "
1063 "unavailable.\n"); 1063 "unavailable.\n");
1064 else 1064 else
1065 mlog_errno(status); 1065 mlog_errno(status);
1066 goto read_super_error; 1066 goto read_super_error;
1067 } 1067 }
1068 1068
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 49b133ccbf11..32499d213fc4 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -137,20 +137,20 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry,
137 } 137 }
138 138
139 memcpy(link, target, len); 139 memcpy(link, target, len);
140 nd_set_link(nd, link);
141 140
142bail: 141bail:
142 nd_set_link(nd, status ? ERR_PTR(status) : link);
143 brelse(bh); 143 brelse(bh);
144 144
145 mlog_exit(status); 145 mlog_exit(status);
146 return status ? ERR_PTR(status) : link; 146 return NULL;
147} 147}
148 148
149static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 149static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
150{ 150{
151 char *link = cookie; 151 char *link = nd_get_link(nd);
152 152 if (!IS_ERR(link))
153 kfree(link); 153 kfree(link);
154} 154}
155 155
156const struct inode_operations ocfs2_symlink_inode_operations = { 156const struct inode_operations ocfs2_symlink_inode_operations = {
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index c61369342a27..a0a120e82b97 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -267,8 +267,8 @@ static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
267} 267}
268 268
269/* Warning: even if it returns true, this does *not* guarantee that 269/* Warning: even if it returns true, this does *not* guarantee that
270 * the block is stored in our inode metadata cache. 270 * the block is stored in our inode metadata cache.
271 * 271 *
272 * This can be called under lock_buffer() 272 * This can be called under lock_buffer()
273 */ 273 */
274int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci, 274int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,