aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/mux.c2
-rw-r--r--fs/Kconfig54
-rw-r--r--fs/afs/cell.c3
-rw-r--r--fs/afs/kafsasyncd.c9
-rw-r--r--fs/afs/server.c6
-rw-r--r--fs/afs/vlocation.c6
-rw-r--r--fs/afs/vnode.c3
-rw-r--r--fs/aio.c2
-rw-r--r--fs/autofs4/expire.c3
-rw-r--r--fs/buffer.c3
-rw-r--r--fs/cifs/CHANGES17
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README39
-rw-r--r--fs/cifs/asn1.c10
-rw-r--r--fs/cifs/cifs_debug.c134
-rw-r--r--fs/cifs/cifs_debug.h4
-rw-r--r--fs/cifs/cifs_unicode.c1
-rw-r--r--fs/cifs/cifsencrypt.c140
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/cifs/cifsfs.h3
-rw-r--r--fs/cifs/cifsglob.h71
-rw-r--r--fs/cifs/cifspdu.h98
-rw-r--r--fs/cifs/cifsproto.h14
-rw-r--r--fs/cifs/cifssmb.c287
-rw-r--r--fs/cifs/connect.c498
-rw-r--r--fs/cifs/dir.c15
-rw-r--r--fs/cifs/fcntl.c4
-rw-r--r--fs/cifs/file.c52
-rw-r--r--fs/cifs/inode.c39
-rw-r--r--fs/cifs/link.c7
-rw-r--r--fs/cifs/misc.c10
-rw-r--r--fs/cifs/netmisc.c4
-rw-r--r--fs/cifs/ntlmssp.c143
-rw-r--r--fs/cifs/readdir.c184
-rw-r--r--fs/cifs/sess.c538
-rw-r--r--fs/cifs/smbencrypt.c1
-rw-r--r--fs/cifs/transport.c3
-rw-r--r--fs/coda/psdev.c2
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/compat.c16
-rw-r--r--fs/compat_ioctl.c1
-rw-r--r--fs/configfs/dir.c6
-rw-r--r--fs/dcache.c5
-rw-r--r--fs/dquot.c4
-rw-r--r--fs/exec.c147
-rw-r--r--fs/ext3/super.c6
-rw-r--r--fs/jbd/journal.c3
-rw-r--r--fs/jffs2/acl.c2
-rw-r--r--fs/jffs2/erase.c34
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/jffs2/gc.c6
-rw-r--r--fs/jffs2/jffs2_fs_sb.h3
-rw-r--r--fs/jffs2/malloc.c2
-rw-r--r--fs/jffs2/nodelist.c3
-rw-r--r--fs/jffs2/nodemgmt.c24
-rw-r--r--fs/jffs2/readinode.c1
-rw-r--r--fs/jffs2/scan.c55
-rw-r--r--fs/jffs2/summary.c43
-rw-r--r--fs/jffs2/wbuf.c3
-rw-r--r--fs/jffs2/xattr.c632
-rw-r--r--fs/jffs2/xattr.h21
-rw-r--r--fs/jfs/jfs_extent.c8
-rw-r--r--fs/libfs.c10
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h8
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsd/nfs4state.c5
-rw-r--r--fs/nfsd/nfscache.c3
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/ocfs2/dlm/dlmast.c15
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h63
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c33
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c6
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h30
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c103
-rw-r--r--fs/ocfs2/dlm/dlmfs.c6
-rw-r--r--fs/ocfs2/dlm/dlmlock.c73
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c448
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c593
-rw-r--r--fs/ocfs2/dlm/dlmthread.c74
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c13
-rw-r--r--fs/ocfs2/dlm/userdlm.c2
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ocfs2/journal.c5
-rw-r--r--fs/ocfs2/vote.c8
-rw-r--r--fs/openpromfs/inode.c1158
-rw-r--r--fs/pnode.c9
-rw-r--r--fs/proc/base.c1086
-rw-r--r--fs/proc/inode.c11
-rw-r--r--fs/proc/internal.h22
-rw-r--r--fs/proc/task_mmu.c140
-rw-r--r--fs/proc/task_nommu.c21
-rw-r--r--fs/reiserfs/file.c8
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/smbfs/request.c6
-rw-r--r--fs/smbfs/smbiod.c3
-rw-r--r--fs/sysfs/dir.c10
-rw-r--r--fs/ufs/inode.c111
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h2
-rw-r--r--fs/xfs/xfs_behavior.h3
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_log.c4
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_mount.c21
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_trans.h4
-rw-r--r--fs/xfs/xfs_vnodeops.c11
114 files changed, 4311 insertions, 3310 deletions
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index f4407eb276c7..12e1baa4508d 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -712,7 +712,7 @@ static void v9fs_read_work(void *a)
712 * v9fs_send_request - send 9P request 712 * v9fs_send_request - send 9P request
713 * The function can sleep until the request is scheduled for sending. 713 * The function can sleep until the request is scheduled for sending.
714 * The function can be interrupted. Return from the function is not 714 * The function can be interrupted. Return from the function is not
715 * a guarantee that the request is sent succesfully. Can return errors 715 * a guarantee that the request is sent successfully. Can return errors
716 * that can be retrieved by PTR_ERR macros. 716 * that can be retrieved by PTR_ERR macros.
717 * 717 *
718 * @m: mux data 718 * @m: mux data
diff --git a/fs/Kconfig b/fs/Kconfig
index 1cdc043922d5..6dc8cfd6d80c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1116,7 +1116,7 @@ config JFFS2_SUMMARY
1116 1116
1117config JFFS2_FS_XATTR 1117config JFFS2_FS_XATTR
1118 bool "JFFS2 XATTR support (EXPERIMENTAL)" 1118 bool "JFFS2 XATTR support (EXPERIMENTAL)"
1119 depends on JFFS2_FS && EXPERIMENTAL && !JFFS2_FS_WRITEBUFFER 1119 depends on JFFS2_FS && EXPERIMENTAL
1120 default n 1120 default n
1121 help 1121 help
1122 Extended attributes are name:value pairs associated with inodes by 1122 Extended attributes are name:value pairs associated with inodes by
@@ -1490,7 +1490,12 @@ config NFSD
1490 select LOCKD 1490 select LOCKD
1491 select SUNRPC 1491 select SUNRPC
1492 select EXPORTFS 1492 select EXPORTFS
1493 select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL 1493 select NFSD_V2_ACL if NFSD_V3_ACL
1494 select NFS_ACL_SUPPORT if NFSD_V2_ACL
1495 select NFSD_TCP if NFSD_V4
1496 select CRYPTO_MD5 if NFSD_V4
1497 select CRYPTO if NFSD_V4
1498 select FS_POSIX_ACL if NFSD_V4
1494 help 1499 help
1495 If you want your Linux box to act as an NFS *server*, so that other 1500 If you want your Linux box to act as an NFS *server*, so that other
1496 computers on your local network which support NFS can access certain 1501 computers on your local network which support NFS can access certain
@@ -1528,7 +1533,6 @@ config NFSD_V3
1528config NFSD_V3_ACL 1533config NFSD_V3_ACL
1529 bool "Provide server support for the NFSv3 ACL protocol extension" 1534 bool "Provide server support for the NFSv3 ACL protocol extension"
1530 depends on NFSD_V3 1535 depends on NFSD_V3
1531 select NFSD_V2_ACL
1532 help 1536 help
1533 Implement the NFSv3 ACL protocol extension for manipulating POSIX 1537 Implement the NFSv3 ACL protocol extension for manipulating POSIX
1534 Access Control Lists on exported file systems. NFS clients should 1538 Access Control Lists on exported file systems. NFS clients should
@@ -1538,10 +1542,6 @@ config NFSD_V3_ACL
1538config NFSD_V4 1542config NFSD_V4
1539 bool "Provide NFSv4 server support (EXPERIMENTAL)" 1543 bool "Provide NFSv4 server support (EXPERIMENTAL)"
1540 depends on NFSD_V3 && EXPERIMENTAL 1544 depends on NFSD_V3 && EXPERIMENTAL
1541 select NFSD_TCP
1542 select CRYPTO_MD5
1543 select CRYPTO
1544 select FS_POSIX_ACL
1545 help 1545 help
1546 If you would like to include the NFSv4 server as well as the NFSv2 1546 If you would like to include the NFSv4 server as well as the NFSv2
1547 and NFSv3 servers, say Y here. This feature is experimental, and 1547 and NFSv3 servers, say Y here. This feature is experimental, and
@@ -1722,7 +1722,7 @@ config CIFS_STATS
1722 mounted by the cifs client to be displayed in /proc/fs/cifs/Stats 1722 mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
1723 1723
1724config CIFS_STATS2 1724config CIFS_STATS2
1725 bool "CIFS extended statistics" 1725 bool "Extended statistics"
1726 depends on CIFS_STATS 1726 depends on CIFS_STATS
1727 help 1727 help
1728 Enabling this option will allow more detailed statistics on SMB 1728 Enabling this option will allow more detailed statistics on SMB
@@ -1735,6 +1735,32 @@ config CIFS_STATS2
1735 Unless you are a developer or are doing network performance analysis 1735 Unless you are a developer or are doing network performance analysis
1736 or tuning, say N. 1736 or tuning, say N.
1737 1737
1738config CIFS_WEAK_PW_HASH
1739 bool "Support legacy servers which use weaker LANMAN security"
1740 depends on CIFS
1741 help
1742 Modern CIFS servers including Samba and most Windows versions
1743 (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
1744 security mechanisms. These hash the password more securely
1745 than the mechanisms used in the older LANMAN version of the
1746 SMB protocol needed to establish sessions with old SMB servers.
1747
1748 Enabling this option allows the cifs module to mount to older
1749 LANMAN based servers such as OS/2 and Windows 95, but such
1750 mounts may be less secure than mounts using NTLM or more recent
1751 security mechanisms if you are on a public network. Unless you
1752 have a need to access old SMB servers (and are on a private
1753 network) you probably want to say N. Even if this support
1754 is enabled in the kernel build, they will not be used
1755 automatically. At runtime LANMAN mounts are disabled but
1756 can be set to required (or optional) either in
1757 /proc/fs/cifs (see fs/cifs/README for more detail) or via an
1758 option on the mount command. This support is disabled by
1759 default in order to reduce the possibility of a downgrade
1760 attack.
1761
1762 If unsure, say N.
1763
1738config CIFS_XATTR 1764config CIFS_XATTR
1739 bool "CIFS extended attributes" 1765 bool "CIFS extended attributes"
1740 depends on CIFS 1766 depends on CIFS
@@ -1763,6 +1789,16 @@ config CIFS_POSIX
1763 (such as Samba 3.10 and later) which can negotiate 1789 (such as Samba 3.10 and later) which can negotiate
1764 CIFS POSIX ACL support. If unsure, say N. 1790 CIFS POSIX ACL support. If unsure, say N.
1765 1791
1792config CIFS_DEBUG2
1793 bool "Enable additional CIFS debugging routines"
1794 help
1795 Enabling this option adds a few more debugging routines
1796 to the cifs code which slightly increases the size of
1797 the cifs module and can cause additional logging of debug
1798 messages in some error paths, slowing performance. This
1799 option can be turned off unless you are debugging
1800 cifs problems. If unsure, say N.
1801
1766config CIFS_EXPERIMENTAL 1802config CIFS_EXPERIMENTAL
1767 bool "CIFS Experimental Features (EXPERIMENTAL)" 1803 bool "CIFS Experimental Features (EXPERIMENTAL)"
1768 depends on CIFS && EXPERIMENTAL 1804 depends on CIFS && EXPERIMENTAL
@@ -1778,7 +1814,7 @@ config CIFS_EXPERIMENTAL
1778 If unsure, say N. 1814 If unsure, say N.
1779 1815
1780config CIFS_UPCALL 1816config CIFS_UPCALL
1781 bool "CIFS Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" 1817 bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
1782 depends on CIFS_EXPERIMENTAL 1818 depends on CIFS_EXPERIMENTAL
1783 select CONNECTOR 1819 select CONNECTOR
1784 help 1820 help
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 009a9ae88d61..bfc1fd22d5b1 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -413,8 +413,7 @@ int afs_server_find_by_peer(const struct rxrpc_peer *peer,
413 413
414 /* we found it in the graveyard - resurrect it */ 414 /* we found it in the graveyard - resurrect it */
415 found_dead_server: 415 found_dead_server:
416 list_del(&server->link); 416 list_move_tail(&server->link, &cell->sv_list);
417 list_add_tail(&server->link, &cell->sv_list);
418 afs_get_server(server); 417 afs_get_server(server);
419 afs_kafstimod_del_timer(&server->timeout); 418 afs_kafstimod_del_timer(&server->timeout);
420 spin_unlock(&cell->sv_gylock); 419 spin_unlock(&cell->sv_gylock);
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index 7ac07d0d47b9..f09a794f248e 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -136,8 +136,7 @@ static int kafsasyncd(void *arg)
136 if (!list_empty(&kafsasyncd_async_attnq)) { 136 if (!list_empty(&kafsasyncd_async_attnq)) {
137 op = list_entry(kafsasyncd_async_attnq.next, 137 op = list_entry(kafsasyncd_async_attnq.next,
138 struct afs_async_op, link); 138 struct afs_async_op, link);
139 list_del(&op->link); 139 list_move_tail(&op->link,
140 list_add_tail(&op->link,
141 &kafsasyncd_async_busyq); 140 &kafsasyncd_async_busyq);
142 } 141 }
143 142
@@ -204,8 +203,7 @@ void afs_kafsasyncd_begin_op(struct afs_async_op *op)
204 init_waitqueue_entry(&op->waiter, kafsasyncd_task); 203 init_waitqueue_entry(&op->waiter, kafsasyncd_task);
205 add_wait_queue(&op->call->waitq, &op->waiter); 204 add_wait_queue(&op->call->waitq, &op->waiter);
206 205
207 list_del(&op->link); 206 list_move_tail(&op->link, &kafsasyncd_async_busyq);
208 list_add_tail(&op->link, &kafsasyncd_async_busyq);
209 207
210 spin_unlock(&kafsasyncd_async_lock); 208 spin_unlock(&kafsasyncd_async_lock);
211 209
@@ -223,8 +221,7 @@ void afs_kafsasyncd_attend_op(struct afs_async_op *op)
223 221
224 spin_lock(&kafsasyncd_async_lock); 222 spin_lock(&kafsasyncd_async_lock);
225 223
226 list_del(&op->link); 224 list_move_tail(&op->link, &kafsasyncd_async_attnq);
227 list_add_tail(&op->link, &kafsasyncd_async_attnq);
228 225
229 spin_unlock(&kafsasyncd_async_lock); 226 spin_unlock(&kafsasyncd_async_lock);
230 227
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 62b093aa41c6..22afaae1a4ce 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -123,8 +123,7 @@ int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr,
123 resurrect_server: 123 resurrect_server:
124 _debug("resurrecting server"); 124 _debug("resurrecting server");
125 125
126 list_del(&zombie->link); 126 list_move_tail(&zombie->link, &cell->sv_list);
127 list_add_tail(&zombie->link, &cell->sv_list);
128 afs_get_server(zombie); 127 afs_get_server(zombie);
129 afs_kafstimod_del_timer(&zombie->timeout); 128 afs_kafstimod_del_timer(&zombie->timeout);
130 spin_unlock(&cell->sv_gylock); 129 spin_unlock(&cell->sv_gylock);
@@ -168,8 +167,7 @@ void afs_put_server(struct afs_server *server)
168 } 167 }
169 168
170 spin_lock(&cell->sv_gylock); 169 spin_lock(&cell->sv_gylock);
171 list_del(&server->link); 170 list_move_tail(&server->link, &cell->sv_graveyard);
172 list_add_tail(&server->link, &cell->sv_graveyard);
173 171
174 /* time out in 10 secs */ 172 /* time out in 10 secs */
175 afs_kafstimod_add_timer(&server->timeout, 10 * HZ); 173 afs_kafstimod_add_timer(&server->timeout, 10 * HZ);
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index eced20618ecc..331f730a1fb3 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -326,8 +326,7 @@ int afs_vlocation_lookup(struct afs_cell *cell,
326 /* found in the graveyard - resurrect */ 326 /* found in the graveyard - resurrect */
327 _debug("found in graveyard"); 327 _debug("found in graveyard");
328 atomic_inc(&vlocation->usage); 328 atomic_inc(&vlocation->usage);
329 list_del(&vlocation->link); 329 list_move_tail(&vlocation->link, &cell->vl_list);
330 list_add_tail(&vlocation->link, &cell->vl_list);
331 spin_unlock(&cell->vl_gylock); 330 spin_unlock(&cell->vl_gylock);
332 331
333 afs_kafstimod_del_timer(&vlocation->timeout); 332 afs_kafstimod_del_timer(&vlocation->timeout);
@@ -478,8 +477,7 @@ static void __afs_put_vlocation(struct afs_vlocation *vlocation)
478 } 477 }
479 478
480 /* move to graveyard queue */ 479 /* move to graveyard queue */
481 list_del(&vlocation->link); 480 list_move_tail(&vlocation->link,&cell->vl_graveyard);
482 list_add_tail(&vlocation->link,&cell->vl_graveyard);
483 481
484 /* remove from pending timeout queue (refcounted if actually being 482 /* remove from pending timeout queue (refcounted if actually being
485 * updated) */ 483 * updated) */
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 9867fef3261d..cf62da5d7825 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -104,8 +104,7 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
104 vnode->cb_expiry * HZ); 104 vnode->cb_expiry * HZ);
105 105
106 spin_lock(&afs_cb_hash_lock); 106 spin_lock(&afs_cb_hash_lock);
107 list_del(&vnode->cb_hash_link); 107 list_move_tail(&vnode->cb_hash_link,
108 list_add_tail(&vnode->cb_hash_link,
109 &afs_cb_hash(server, &vnode->fid)); 108 &afs_cb_hash(server, &vnode->fid));
110 spin_unlock(&afs_cb_hash_lock); 109 spin_unlock(&afs_cb_hash_lock);
111 110
diff --git a/fs/aio.c b/fs/aio.c
index 8c34a62df7d7..950630187acc 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -641,7 +641,7 @@ static inline int __queue_kicked_iocb(struct kiocb *iocb)
641 * invoked both for initial i/o submission and 641 * invoked both for initial i/o submission and
642 * subsequent retries via the aio_kick_handler. 642 * subsequent retries via the aio_kick_handler.
643 * Expects to be invoked with iocb->ki_ctx->lock 643 * Expects to be invoked with iocb->ki_ctx->lock
644 * already held. The lock is released and reaquired 644 * already held. The lock is released and reacquired
645 * as needed during processing. 645 * as needed during processing.
646 * 646 *
647 * Calls the iocb retry method (already setup for the 647 * Calls the iocb retry method (already setup for the
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 4456d1daa40f..8dbd44f10e9d 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -376,8 +376,7 @@ next:
376 DPRINTK("returning %p %.*s", 376 DPRINTK("returning %p %.*s",
377 expired, (int)expired->d_name.len, expired->d_name.name); 377 expired, (int)expired->d_name.len, expired->d_name.name);
378 spin_lock(&dcache_lock); 378 spin_lock(&dcache_lock);
379 list_del(&expired->d_parent->d_subdirs); 379 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
380 list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
381 spin_unlock(&dcache_lock); 380 spin_unlock(&dcache_lock);
382 return expired; 381 return expired;
383 } 382 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 373bb6292bdc..f23bb647db47 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -564,7 +564,7 @@ still_busy:
564 * Completion handler for block_write_full_page() - pages which are unlocked 564 * Completion handler for block_write_full_page() - pages which are unlocked
565 * during I/O, and which have PageWriteback cleared upon I/O completion. 565 * during I/O, and which have PageWriteback cleared upon I/O completion.
566 */ 566 */
567void end_buffer_async_write(struct buffer_head *bh, int uptodate) 567static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
568{ 568{
569 char b[BDEVNAME_SIZE]; 569 char b[BDEVNAME_SIZE];
570 unsigned long flags; 570 unsigned long flags;
@@ -3166,7 +3166,6 @@ EXPORT_SYMBOL(block_sync_page);
3166EXPORT_SYMBOL(block_truncate_page); 3166EXPORT_SYMBOL(block_truncate_page);
3167EXPORT_SYMBOL(block_write_full_page); 3167EXPORT_SYMBOL(block_write_full_page);
3168EXPORT_SYMBOL(cont_prepare_write); 3168EXPORT_SYMBOL(cont_prepare_write);
3169EXPORT_SYMBOL(end_buffer_async_write);
3170EXPORT_SYMBOL(end_buffer_read_sync); 3169EXPORT_SYMBOL(end_buffer_read_sync);
3171EXPORT_SYMBOL(end_buffer_write_sync); 3170EXPORT_SYMBOL(end_buffer_write_sync);
3172EXPORT_SYMBOL(file_fsync); 3171EXPORT_SYMBOL(file_fsync);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 7271bb0257f6..a61d17ed1827 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,9 +1,24 @@
1Version 1.44
2------------
3Rewritten sessionsetup support, including support for legacy SMB
4session setup needed for OS/2 and older servers such as Windows 95 and 98.
5Fix oops on ls to OS/2 servers. Add support for level 1 FindFirst
6so we can do search (ls etc.) to OS/2. Do not send NTCreateX
7or recent levels of FindFirst unless server says it supports NT SMBs
8(instead use legacy equivalents from LANMAN dialect). Fix to allow
9NTLMv2 authentication support (now can use stronger password hashing
10on mount if corresponding /proc/fs/cifs/SecurityFlags is set (0x4004).
11Allow override of global cifs security flags on mount via "sec=" option(s).
12
1Version 1.43 13Version 1.43
2------------ 14------------
3POSIX locking to servers which support CIFS POSIX Extensions 15POSIX locking to servers which support CIFS POSIX Extensions
4(disabled by default controlled by proc/fs/cifs/Experimental). 16(disabled by default controlled by proc/fs/cifs/Experimental).
5Handle conversion of long share names (especially Asian languages) 17Handle conversion of long share names (especially Asian languages)
6to Unicode during mount. 18to Unicode during mount. Fix memory leak in sess struct on reconnect.
19Fix rare oops after acpi suspend. Fix O_TRUNC opens to overwrite on
20cifs open which helps rare case when setpathinfo fails or server does
21not support it.
7 22
8Version 1.42 23Version 1.42
9------------ 24------------
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 58c77254a23b..a26f26ed5a17 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -3,4 +3,4 @@
3# 3#
4obj-$(CONFIG_CIFS) += cifs.o 4obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o ntlmssp.o 6cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o sess.o
diff --git a/fs/cifs/README b/fs/cifs/README
index 0355003f4f0a..7986d0d97ace 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -443,7 +443,10 @@ A partial list of the supported mount options follows:
443 SFU does). In the future the bottom 9 bits of the mode 443 SFU does). In the future the bottom 9 bits of the mode
444 mode also will be emulated using queries of the security 444 mode also will be emulated using queries of the security
445 descriptor (ACL). 445 descriptor (ACL).
446sec Security mode. Allowed values are: 446 sign Must use packet signing (helps avoid unwanted data modification
447 by intermediate systems in the route). Note that signing
448 does not work with lanman or plaintext authentication.
449 sec Security mode. Allowed values are:
447 none attempt to connection as a null user (no name) 450 none attempt to connection as a null user (no name)
448 krb5 Use Kerberos version 5 authentication 451 krb5 Use Kerberos version 5 authentication
449 krb5i Use Kerberos authentication and packet signing 452 krb5i Use Kerberos authentication and packet signing
@@ -453,6 +456,8 @@ sec Security mode. Allowed values are:
453 server requires signing also can be the default) 456 server requires signing also can be the default)
454 ntlmv2 Use NTLMv2 password hashing 457 ntlmv2 Use NTLMv2 password hashing
455 ntlmv2i Use NTLMv2 password hashing with packet signing 458 ntlmv2i Use NTLMv2 password hashing with packet signing
459 lanman (if configured in kernel config) use older
460 lanman hash
456 461
457The mount.cifs mount helper also accepts a few mount options before -o 462The mount.cifs mount helper also accepts a few mount options before -o
458including: 463including:
@@ -485,14 +490,34 @@ PacketSigningEnabled If set to one, cifs packet signing is enabled
485 it. If set to two, cifs packet signing is 490 it. If set to two, cifs packet signing is
486 required even if the server considers packet 491 required even if the server considers packet
487 signing optional. (default 1) 492 signing optional. (default 1)
493SecurityFlags Flags which control security negotiation and
494 also packet signing. Authentication (may/must)
495 flags (e.g. for NTLM and/or NTLMv2) may be combined with
496 the signing flags. Specifying two different password
497 hashing mechanisms (as "must use") on the other hand
498 does not make much sense. Default flags are
499 0x07007
500 (NTLM, NTLMv2 and packet signing allowed). Maximum
501 allowable flags if you want to allow mounts to servers
502 using weaker password hashes is 0x37037 (lanman,
503 plaintext, ntlm, ntlmv2, signing allowed):
504
505 may use packet signing 0x00001
506 must use packet signing 0x01001
507 may use NTLM (most common password hash) 0x00002
508 must use NTLM 0x02002
509 may use NTLMv2 0x00004
510 must use NTLMv2 0x04004
511 may use Kerberos security (not implemented yet) 0x00008
512 must use Kerberos (not implemented yet) 0x08008
513 may use lanman (weak) password hash 0x00010
514 must use lanman password hash 0x10010
515 may use plaintext passwords 0x00020
516 must use plaintext passwords 0x20020
517 (reserved for future packet encryption) 0x00040
518
488cifsFYI If set to one, additional debug information is 519cifsFYI If set to one, additional debug information is
489 logged to the system error log. (default 0) 520 logged to the system error log. (default 0)
490ExtendedSecurity If set to one, SPNEGO session establishment
491 is allowed which enables more advanced
492 secure CIFS session establishment (default 0)
493NTLMV2Enabled If set to one, more secure password hashes
494 are used when the server supports them and
495 when kerberos is not negotiated (default 0)
496traceSMB If set to one, debug information is logged to the 521traceSMB If set to one, debug information is logged to the
497 system error log with the start of smb requests 522 system error log with the start of smb requests
498 and responses (default 0) 523 and responses (default 0)
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 086ae8f4a207..031cdf293256 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -467,7 +467,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
467 asn1_open(&ctx, security_blob, length); 467 asn1_open(&ctx, security_blob, length);
468 468
469 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 469 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
470 cFYI(1, ("Error decoding negTokenInit header ")); 470 cFYI(1, ("Error decoding negTokenInit header"));
471 return 0; 471 return 0;
472 } else if ((cls != ASN1_APL) || (con != ASN1_CON) 472 } else if ((cls != ASN1_APL) || (con != ASN1_CON)
473 || (tag != ASN1_EOC)) { 473 || (tag != ASN1_EOC)) {
@@ -495,7 +495,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
495 } 495 }
496 496
497 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 497 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
498 cFYI(1, ("Error decoding negTokenInit ")); 498 cFYI(1, ("Error decoding negTokenInit"));
499 return 0; 499 return 0;
500 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 500 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
501 || (tag != ASN1_EOC)) { 501 || (tag != ASN1_EOC)) {
@@ -505,7 +505,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
505 } 505 }
506 506
507 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 507 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
508 cFYI(1, ("Error decoding negTokenInit ")); 508 cFYI(1, ("Error decoding negTokenInit"));
509 return 0; 509 return 0;
510 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 510 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
511 || (tag != ASN1_SEQ)) { 511 || (tag != ASN1_SEQ)) {
@@ -515,7 +515,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
515 } 515 }
516 516
517 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 517 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
518 cFYI(1, ("Error decoding 2nd part of negTokenInit ")); 518 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
519 return 0; 519 return 0;
520 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 520 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
521 || (tag != ASN1_EOC)) { 521 || (tag != ASN1_EOC)) {
@@ -527,7 +527,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
527 527
528 if (asn1_header_decode 528 if (asn1_header_decode
529 (&ctx, &sequence_end, &cls, &con, &tag) == 0) { 529 (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
530 cFYI(1, ("Error decoding 2nd part of negTokenInit ")); 530 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
531 return 0; 531 return 0;
532 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 532 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
533 || (tag != ASN1_SEQ)) { 533 || (tag != ASN1_SEQ)) {
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index f4124a32bef8..96abeb738978 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -39,7 +39,7 @@ cifs_dump_mem(char *label, void *data, int length)
39 char *charptr = data; 39 char *charptr = data;
40 char buf[10], line[80]; 40 char buf[10], line[80];
41 41
42 printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n\n", 42 printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n",
43 label, length, data); 43 label, length, data);
44 for (i = 0; i < length; i += 16) { 44 for (i = 0; i < length; i += 16) {
45 line[0] = 0; 45 line[0] = 0;
@@ -57,6 +57,57 @@ cifs_dump_mem(char *label, void *data, int length)
57 } 57 }
58} 58}
59 59
60#ifdef CONFIG_CIFS_DEBUG2
61void cifs_dump_detail(struct smb_hdr * smb)
62{
63 cERROR(1,("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
64 smb->Command, smb->Status.CifsError,
65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid));
66 cERROR(1,("smb buf %p len %d", smb, smbCalcSize_LE(smb)));
67}
68
69
70void cifs_dump_mids(struct TCP_Server_Info * server)
71{
72 struct list_head *tmp;
73 struct mid_q_entry * mid_entry;
74
75 if(server == NULL)
76 return;
77
78 cERROR(1,("Dump pending requests:"));
79 spin_lock(&GlobalMid_Lock);
80 list_for_each(tmp, &server->pending_mid_q) {
81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
82 if(mid_entry) {
83 cERROR(1,("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
84 mid_entry->midState,
85 (int)mid_entry->command,
86 mid_entry->pid,
87 mid_entry->tsk,
88 mid_entry->mid));
89#ifdef CONFIG_CIFS_STATS2
90 cERROR(1,("IsLarge: %d buf: %p time rcv: %ld now: %ld",
91 mid_entry->largeBuf,
92 mid_entry->resp_buf,
93 mid_entry->when_received,
94 jiffies));
95#endif /* STATS2 */
96 cERROR(1,("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
97 mid_entry->multiEnd));
98 if(mid_entry->resp_buf) {
99 cifs_dump_detail(mid_entry->resp_buf);
100 cifs_dump_mem("existing buf: ",
101 mid_entry->resp_buf,
102 62 /* fixme */);
103 }
104
105 }
106 }
107 spin_unlock(&GlobalMid_Lock);
108}
109#endif /* CONFIG_CIFS_DEBUG2 */
110
60#ifdef CONFIG_PROC_FS 111#ifdef CONFIG_PROC_FS
61static int 112static int
62cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, 113cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
@@ -73,7 +124,6 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
73 124
74 *beginBuffer = buf + offset; 125 *beginBuffer = buf + offset;
75 126
76
77 length = 127 length =
78 sprintf(buf, 128 sprintf(buf,
79 "Display Internal CIFS Data Structures for Debugging\n" 129 "Display Internal CIFS Data Structures for Debugging\n"
@@ -395,12 +445,12 @@ static read_proc_t traceSMB_read;
395static write_proc_t traceSMB_write; 445static write_proc_t traceSMB_write;
396static read_proc_t multiuser_mount_read; 446static read_proc_t multiuser_mount_read;
397static write_proc_t multiuser_mount_write; 447static write_proc_t multiuser_mount_write;
398static read_proc_t extended_security_read; 448static read_proc_t security_flags_read;
399static write_proc_t extended_security_write; 449static write_proc_t security_flags_write;
400static read_proc_t ntlmv2_enabled_read; 450/* static read_proc_t ntlmv2_enabled_read;
401static write_proc_t ntlmv2_enabled_write; 451static write_proc_t ntlmv2_enabled_write;
402static read_proc_t packet_signing_enabled_read; 452static read_proc_t packet_signing_enabled_read;
403static write_proc_t packet_signing_enabled_write; 453static write_proc_t packet_signing_enabled_write;*/
404static read_proc_t experimEnabled_read; 454static read_proc_t experimEnabled_read;
405static write_proc_t experimEnabled_write; 455static write_proc_t experimEnabled_write;
406static read_proc_t linuxExtensionsEnabled_read; 456static read_proc_t linuxExtensionsEnabled_read;
@@ -458,10 +508,10 @@ cifs_proc_init(void)
458 pde->write_proc = multiuser_mount_write; 508 pde->write_proc = multiuser_mount_write;
459 509
460 pde = 510 pde =
461 create_proc_read_entry("ExtendedSecurity", 0, proc_fs_cifs, 511 create_proc_read_entry("SecurityFlags", 0, proc_fs_cifs,
462 extended_security_read, NULL); 512 security_flags_read, NULL);
463 if (pde) 513 if (pde)
464 pde->write_proc = extended_security_write; 514 pde->write_proc = security_flags_write;
465 515
466 pde = 516 pde =
467 create_proc_read_entry("LookupCacheEnabled", 0, proc_fs_cifs, 517 create_proc_read_entry("LookupCacheEnabled", 0, proc_fs_cifs,
@@ -469,7 +519,7 @@ cifs_proc_init(void)
469 if (pde) 519 if (pde)
470 pde->write_proc = lookupFlag_write; 520 pde->write_proc = lookupFlag_write;
471 521
472 pde = 522/* pde =
473 create_proc_read_entry("NTLMV2Enabled", 0, proc_fs_cifs, 523 create_proc_read_entry("NTLMV2Enabled", 0, proc_fs_cifs,
474 ntlmv2_enabled_read, NULL); 524 ntlmv2_enabled_read, NULL);
475 if (pde) 525 if (pde)
@@ -479,7 +529,7 @@ cifs_proc_init(void)
479 create_proc_read_entry("PacketSigningEnabled", 0, proc_fs_cifs, 529 create_proc_read_entry("PacketSigningEnabled", 0, proc_fs_cifs,
480 packet_signing_enabled_read, NULL); 530 packet_signing_enabled_read, NULL);
481 if (pde) 531 if (pde)
482 pde->write_proc = packet_signing_enabled_write; 532 pde->write_proc = packet_signing_enabled_write;*/
483} 533}
484 534
485void 535void
@@ -496,9 +546,9 @@ cifs_proc_clean(void)
496#endif 546#endif
497 remove_proc_entry("MultiuserMount", proc_fs_cifs); 547 remove_proc_entry("MultiuserMount", proc_fs_cifs);
498 remove_proc_entry("OplockEnabled", proc_fs_cifs); 548 remove_proc_entry("OplockEnabled", proc_fs_cifs);
499 remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); 549/* remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */
500 remove_proc_entry("ExtendedSecurity",proc_fs_cifs); 550 remove_proc_entry("SecurityFlags",proc_fs_cifs);
501 remove_proc_entry("PacketSigningEnabled",proc_fs_cifs); 551/* remove_proc_entry("PacketSigningEnabled",proc_fs_cifs); */
502 remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs); 552 remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs);
503 remove_proc_entry("Experimental",proc_fs_cifs); 553 remove_proc_entry("Experimental",proc_fs_cifs);
504 remove_proc_entry("LookupCacheEnabled",proc_fs_cifs); 554 remove_proc_entry("LookupCacheEnabled",proc_fs_cifs);
@@ -782,12 +832,12 @@ multiuser_mount_write(struct file *file, const char __user *buffer,
782} 832}
783 833
784static int 834static int
785extended_security_read(char *page, char **start, off_t off, 835security_flags_read(char *page, char **start, off_t off,
786 int count, int *eof, void *data) 836 int count, int *eof, void *data)
787{ 837{
788 int len; 838 int len;
789 839
790 len = sprintf(page, "%d\n", extended_security); 840 len = sprintf(page, "0x%x\n", extended_security);
791 841
792 len -= off; 842 len -= off;
793 *start = page + off; 843 *start = page + off;
@@ -803,24 +853,52 @@ extended_security_read(char *page, char **start, off_t off,
803 return len; 853 return len;
804} 854}
805static int 855static int
806extended_security_write(struct file *file, const char __user *buffer, 856security_flags_write(struct file *file, const char __user *buffer,
807 unsigned long count, void *data) 857 unsigned long count, void *data)
808{ 858{
859 unsigned int flags;
860 char flags_string[12];
809 char c; 861 char c;
810 int rc;
811 862
812 rc = get_user(c, buffer); 863 if((count < 1) || (count > 11))
813 if (rc) 864 return -EINVAL;
814 return rc; 865
815 if (c == '0' || c == 'n' || c == 'N') 866 memset(flags_string, 0, 12);
816 extended_security = 0; 867
817 else if (c == '1' || c == 'y' || c == 'Y') 868 if(copy_from_user(flags_string, buffer, count))
818 extended_security = 1; 869 return -EFAULT;
870
871 if(count < 3) {
872 /* single char or single char followed by null */
873 c = flags_string[0];
874 if (c == '0' || c == 'n' || c == 'N')
875 extended_security = CIFSSEC_DEF; /* default */
876 else if (c == '1' || c == 'y' || c == 'Y')
877 extended_security = CIFSSEC_MAX;
878 return count;
879 }
880 /* else we have a number */
881
882 flags = simple_strtoul(flags_string, NULL, 0);
883
884 cFYI(1,("sec flags 0x%x", flags));
885
886 if(flags <= 0) {
887 cERROR(1,("invalid security flags %s",flags_string));
888 return -EINVAL;
889 }
819 890
891 if(flags & ~CIFSSEC_MASK) {
892 cERROR(1,("attempt to set unsupported security flags 0x%x",
893 flags & ~CIFSSEC_MASK));
894 return -EINVAL;
895 }
896 /* flags look ok - update the global security flags for cifs module */
897 extended_security = flags;
820 return count; 898 return count;
821} 899}
822 900
823static int 901/* static int
824ntlmv2_enabled_read(char *page, char **start, off_t off, 902ntlmv2_enabled_read(char *page, char **start, off_t off,
825 int count, int *eof, void *data) 903 int count, int *eof, void *data)
826{ 904{
@@ -855,6 +933,8 @@ ntlmv2_enabled_write(struct file *file, const char __user *buffer,
855 ntlmv2_support = 0; 933 ntlmv2_support = 0;
856 else if (c == '1' || c == 'y' || c == 'Y') 934 else if (c == '1' || c == 'y' || c == 'Y')
857 ntlmv2_support = 1; 935 ntlmv2_support = 1;
936 else if (c == '2')
937 ntlmv2_support = 2;
858 938
859 return count; 939 return count;
860} 940}
@@ -898,7 +978,7 @@ packet_signing_enabled_write(struct file *file, const char __user *buffer,
898 sign_CIFS_PDUs = 2; 978 sign_CIFS_PDUs = 2;
899 979
900 return count; 980 return count;
901} 981} */
902 982
903 983
904#endif 984#endif
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 4304d9dcfb6c..c26cd0d2c6d5 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -24,6 +24,10 @@
24#define _H_CIFS_DEBUG 24#define _H_CIFS_DEBUG
25 25
26void cifs_dump_mem(char *label, void *data, int length); 26void cifs_dump_mem(char *label, void *data, int length);
27#ifdef CONFIG_CIFS_DEBUG2
28void cifs_dump_detail(struct smb_hdr *);
29void cifs_dump_mids(struct TCP_Server_Info *);
30#endif
27extern int traceSMB; /* flag which enables the function below */ 31extern int traceSMB; /* flag which enables the function below */
28void dump_smb(struct smb_hdr *, int); 32void dump_smb(struct smb_hdr *, int);
29#define CIFS_INFO 0x01 33#define CIFS_INFO 0x01
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index d2b128255944..d2a8b2941fc2 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -22,6 +22,7 @@
22#include "cifs_unicode.h" 22#include "cifs_unicode.h"
23#include "cifs_uniupr.h" 23#include "cifs_uniupr.h"
24#include "cifspdu.h" 24#include "cifspdu.h"
25#include "cifsglob.h"
25#include "cifs_debug.h" 26#include "cifs_debug.h"
26 27
27/* 28/*
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index e7d63737e651..a89efaf78a26 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -26,6 +26,8 @@
26#include "md5.h" 26#include "md5.h"
27#include "cifs_unicode.h" 27#include "cifs_unicode.h"
28#include "cifsproto.h" 28#include "cifsproto.h"
29#include <linux/ctype.h>
30#include <linux/random.h>
29 31
30/* Calculate and return the CIFS signature based on the mac key and the smb pdu */ 32/* Calculate and return the CIFS signature based on the mac key and the smb pdu */
31/* the 16 byte signature must be allocated by the caller */ 33/* the 16 byte signature must be allocated by the caller */
@@ -35,6 +37,8 @@
35 37
36extern void mdfour(unsigned char *out, unsigned char *in, int n); 38extern void mdfour(unsigned char *out, unsigned char *in, int n);
37extern void E_md4hash(const unsigned char *passwd, unsigned char *p16); 39extern void E_md4hash(const unsigned char *passwd, unsigned char *p16);
40extern void SMBencrypt(unsigned char *passwd, unsigned char *c8,
41 unsigned char *p24);
38 42
39static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu, 43static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu,
40 const char * key, char * signature) 44 const char * key, char * signature)
@@ -45,7 +49,7 @@ static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu,
45 return -EINVAL; 49 return -EINVAL;
46 50
47 MD5Init(&context); 51 MD5Init(&context);
48 MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16); 52 MD5Update(&context,key,CIFS_SESS_KEY_SIZE+16);
49 MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length); 53 MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length);
50 MD5Final(signature,&context); 54 MD5Final(signature,&context);
51 return 0; 55 return 0;
@@ -90,7 +94,7 @@ static int cifs_calc_signature2(const struct kvec * iov, int n_vec,
90 return -EINVAL; 94 return -EINVAL;
91 95
92 MD5Init(&context); 96 MD5Init(&context);
93 MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16); 97 MD5Update(&context,key,CIFS_SESS_KEY_SIZE+16);
94 for(i=0;i<n_vec;i++) { 98 for(i=0;i<n_vec;i++) {
95 if(iov[i].iov_base == NULL) { 99 if(iov[i].iov_base == NULL) {
96 cERROR(1,("null iovec entry")); 100 cERROR(1,("null iovec entry"));
@@ -204,11 +208,12 @@ int cifs_calculate_mac_key(char * key, const char * rn, const char * password)
204 208
205 E_md4hash(password, temp_key); 209 E_md4hash(password, temp_key);
206 mdfour(key,temp_key,16); 210 mdfour(key,temp_key,16);
207 memcpy(key+16,rn, CIFS_SESSION_KEY_SIZE); 211 memcpy(key+16,rn, CIFS_SESS_KEY_SIZE);
208 return 0; 212 return 0;
209} 213}
210 214
211int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses, struct nls_table * nls_info) 215int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses,
216 const struct nls_table * nls_info)
212{ 217{
213 char temp_hash[16]; 218 char temp_hash[16];
214 struct HMACMD5Context ctx; 219 struct HMACMD5Context ctx;
@@ -225,6 +230,8 @@ int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses, struct nls_table * nls_
225 user_name_len = strlen(ses->userName); 230 user_name_len = strlen(ses->userName);
226 if(user_name_len > MAX_USERNAME_SIZE) 231 if(user_name_len > MAX_USERNAME_SIZE)
227 return -EINVAL; 232 return -EINVAL;
233 if(ses->domainName == NULL)
234 return -EINVAL; /* BB should we use CIFS_LINUX_DOM */
228 dom_name_len = strlen(ses->domainName); 235 dom_name_len = strlen(ses->domainName);
229 if(dom_name_len > MAX_USERNAME_SIZE) 236 if(dom_name_len > MAX_USERNAME_SIZE)
230 return -EINVAL; 237 return -EINVAL;
@@ -259,16 +266,131 @@ int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses, struct nls_table * nls_
259 kfree(unicode_buf); 266 kfree(unicode_buf);
260 return 0; 267 return 0;
261} 268}
262void CalcNTLMv2_response(const struct cifsSesInfo * ses,char * v2_session_response) 269
270#ifdef CONFIG_CIFS_WEAK_PW_HASH
271void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key)
272{
273 int i;
274 char password_with_pad[CIFS_ENCPWD_SIZE];
275
276 if(ses->server == NULL)
277 return;
278
279 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
280 strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE);
281
282 if((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0)
283 if(extended_security & CIFSSEC_MAY_PLNTXT) {
284 memcpy(lnm_session_key, password_with_pad, CIFS_ENCPWD_SIZE);
285 return;
286 }
287
288 /* calculate old style session key */
289 /* calling toupper is less broken than repeatedly
290 calling nls_toupper would be since that will never
291 work for UTF8, but neither handles multibyte code pages
292 but the only alternative would be converting to UCS-16 (Unicode)
293 (using a routine something like UniStrupr) then
294 uppercasing and then converting back from Unicode - which
295 would only worth doing it if we knew it were utf8. Basically
296 utf8 and other multibyte codepages each need their own strupper
297 function since a byte at a time will ont work. */
298
299 for(i = 0; i < CIFS_ENCPWD_SIZE; i++) {
300 password_with_pad[i] = toupper(password_with_pad[i]);
301 }
302
303 SMBencrypt(password_with_pad, ses->server->cryptKey, lnm_session_key);
304 /* clear password before we return/free memory */
305 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
306}
307#endif /* CIFS_WEAK_PW_HASH */
308
309static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
310 const struct nls_table * nls_cp)
311{
312 int rc = 0;
313 int len;
314 char nt_hash[16];
315 struct HMACMD5Context * pctxt;
316 wchar_t * user;
317 wchar_t * domain;
318
319 pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
320
321 if(pctxt == NULL)
322 return -ENOMEM;
323
324 /* calculate md4 hash of password */
325 E_md4hash(ses->password, nt_hash);
326
327 /* convert Domainname to unicode and uppercase */
328 hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
329
330 /* convert ses->userName to unicode and uppercase */
331 len = strlen(ses->userName);
332 user = kmalloc(2 + (len * 2), GFP_KERNEL);
333 if(user == NULL)
334 goto calc_exit_2;
335 len = cifs_strtoUCS(user, ses->userName, len, nls_cp);
336 UniStrupr(user);
337 hmac_md5_update((char *)user, 2*len, pctxt);
338
339 /* convert ses->domainName to unicode and uppercase */
340 if(ses->domainName) {
341 len = strlen(ses->domainName);
342
343 domain = kmalloc(2 + (len * 2), GFP_KERNEL);
344 if(domain == NULL)
345 goto calc_exit_1;
346 len = cifs_strtoUCS(domain, ses->domainName, len, nls_cp);
347 UniStrupr(domain);
348
349 hmac_md5_update((char *)domain, 2*len, pctxt);
350
351 kfree(domain);
352 }
353calc_exit_1:
354 kfree(user);
355calc_exit_2:
356 /* BB FIXME what about bytes 24 through 40 of the signing key?
357 compare with the NTLM example */
358 hmac_md5_final(ses->server->mac_signing_key, pctxt);
359
360 return rc;
361}
362
363void setup_ntlmv2_rsp(struct cifsSesInfo * ses, char * resp_buf,
364 const struct nls_table * nls_cp)
365{
366 int rc;
367 struct ntlmv2_resp * buf = (struct ntlmv2_resp *)resp_buf;
368
369 buf->blob_signature = cpu_to_le32(0x00000101);
370 buf->reserved = 0;
371 buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
372 get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
373 buf->reserved2 = 0;
374 buf->names[0].type = 0;
375 buf->names[0].length = 0;
376
377 /* calculate buf->ntlmv2_hash */
378 rc = calc_ntlmv2_hash(ses, nls_cp);
379 if(rc)
380 cERROR(1,("could not get v2 hash rc %d",rc));
381 CalcNTLMv2_response(ses, resp_buf);
382}
383
384void CalcNTLMv2_response(const struct cifsSesInfo * ses, char * v2_session_response)
263{ 385{
264 struct HMACMD5Context context; 386 struct HMACMD5Context context;
387 /* rest of v2 struct already generated */
265 memcpy(v2_session_response + 8, ses->server->cryptKey,8); 388 memcpy(v2_session_response + 8, ses->server->cryptKey,8);
266 /* gen_blob(v2_session_response + 16); */
267 hmac_md5_init_limK_to_64(ses->server->mac_signing_key, 16, &context); 389 hmac_md5_init_limK_to_64(ses->server->mac_signing_key, 16, &context);
268 390
269 hmac_md5_update(ses->server->cryptKey,8,&context); 391 hmac_md5_update(v2_session_response+8,
270/* hmac_md5_update(v2_session_response+16)client thing,8,&context); */ /* BB fix */ 392 sizeof(struct ntlmv2_resp) - 8, &context);
271 393
272 hmac_md5_final(v2_session_response,&context); 394 hmac_md5_final(v2_session_response,&context);
273 cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); /* BB removeme BB */ 395/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
274} 396}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8b4de6eaabd0..c28ede599946 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -56,8 +56,8 @@ unsigned int experimEnabled = 0;
56unsigned int linuxExtEnabled = 1; 56unsigned int linuxExtEnabled = 1;
57unsigned int lookupCacheEnabled = 1; 57unsigned int lookupCacheEnabled = 1;
58unsigned int multiuser_mount = 0; 58unsigned int multiuser_mount = 0;
59unsigned int extended_security = 0; 59unsigned int extended_security = CIFSSEC_DEF;
60unsigned int ntlmv2_support = 0; 60/* unsigned int ntlmv2_support = 0; */
61unsigned int sign_CIFS_PDUs = 1; 61unsigned int sign_CIFS_PDUs = 1;
62extern struct task_struct * oplockThread; /* remove sparse warning */ 62extern struct task_struct * oplockThread; /* remove sparse warning */
63struct task_struct * oplockThread = NULL; 63struct task_struct * oplockThread = NULL;
@@ -908,7 +908,7 @@ static int cifs_dnotify_thread(void * dummyarg)
908 struct cifsSesInfo *ses; 908 struct cifsSesInfo *ses;
909 909
910 do { 910 do {
911 if(try_to_freeze()) 911 if (try_to_freeze())
912 continue; 912 continue;
913 set_current_state(TASK_INTERRUPTIBLE); 913 set_current_state(TASK_INTERRUPTIBLE);
914 schedule_timeout(15*HZ); 914 schedule_timeout(15*HZ);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index d56c0577c710..a6384d83fdef 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -33,6 +33,7 @@
33#endif 33#endif
34 34
35extern struct address_space_operations cifs_addr_ops; 35extern struct address_space_operations cifs_addr_ops;
36extern struct address_space_operations cifs_addr_ops_smallbuf;
36 37
37/* Functions related to super block operations */ 38/* Functions related to super block operations */
38extern struct super_operations cifs_super_ops; 39extern struct super_operations cifs_super_ops;
@@ -99,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
99extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 100extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
100extern int cifs_ioctl (struct inode * inode, struct file * filep, 101extern int cifs_ioctl (struct inode * inode, struct file * filep,
101 unsigned int command, unsigned long arg); 102 unsigned int command, unsigned long arg);
102#define CIFS_VERSION "1.43" 103#define CIFS_VERSION "1.44"
103#endif /* _CIFSFS_H */ 104#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 006eb33bff5f..6d7cf5f3bc0b 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -88,7 +88,8 @@ enum statusEnum {
88}; 88};
89 89
90enum securityEnum { 90enum securityEnum {
91 NTLM = 0, /* Legacy NTLM012 auth with NTLM hash */ 91 LANMAN = 0, /* Legacy LANMAN auth */
92 NTLM, /* Legacy NTLM012 auth with NTLM hash */
92 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ 93 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
93 RawNTLMSSP, /* NTLMSSP without SPNEGO */ 94 RawNTLMSSP, /* NTLMSSP without SPNEGO */
94 NTLMSSP, /* NTLMSSP via SPNEGO */ 95 NTLMSSP, /* NTLMSSP via SPNEGO */
@@ -157,7 +158,7 @@ struct TCP_Server_Info {
157 /* 16th byte of RFC1001 workstation name is always null */ 158 /* 16th byte of RFC1001 workstation name is always null */
158 char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; 159 char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL];
159 __u32 sequence_number; /* needed for CIFS PDU signature */ 160 __u32 sequence_number; /* needed for CIFS PDU signature */
160 char mac_signing_key[CIFS_SESSION_KEY_SIZE + 16]; 161 char mac_signing_key[CIFS_SESS_KEY_SIZE + 16];
161}; 162};
162 163
163/* 164/*
@@ -179,10 +180,13 @@ struct cifsUidInfo {
179struct cifsSesInfo { 180struct cifsSesInfo {
180 struct list_head cifsSessionList; 181 struct list_head cifsSessionList;
181 struct semaphore sesSem; 182 struct semaphore sesSem;
183#if 0
182 struct cifsUidInfo *uidInfo; /* pointer to user info */ 184 struct cifsUidInfo *uidInfo; /* pointer to user info */
185#endif
183 struct TCP_Server_Info *server; /* pointer to server info */ 186 struct TCP_Server_Info *server; /* pointer to server info */
184 atomic_t inUse; /* # of mounts (tree connections) on this ses */ 187 atomic_t inUse; /* # of mounts (tree connections) on this ses */
185 enum statusEnum status; 188 enum statusEnum status;
189 unsigned overrideSecFlg; /* if non-zero override global sec flags */
186 __u16 ipc_tid; /* special tid for connection to IPC share */ 190 __u16 ipc_tid; /* special tid for connection to IPC share */
187 __u16 flags; 191 __u16 flags;
188 char *serverOS; /* name of operating system underlying server */ 192 char *serverOS; /* name of operating system underlying server */
@@ -194,7 +198,7 @@ struct cifsSesInfo {
194 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 198 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
195 TCP names - will ipv6 and sctp addresses fit? */ 199 TCP names - will ipv6 and sctp addresses fit? */
196 char userName[MAX_USERNAME_SIZE + 1]; 200 char userName[MAX_USERNAME_SIZE + 1];
197 char domainName[MAX_USERNAME_SIZE + 1]; 201 char * domainName;
198 char * password; 202 char * password;
199}; 203};
200/* session flags */ 204/* session flags */
@@ -209,12 +213,12 @@ struct cifsTconInfo {
209 struct list_head openFileList; 213 struct list_head openFileList;
210 struct semaphore tconSem; 214 struct semaphore tconSem;
211 struct cifsSesInfo *ses; /* pointer to session associated with */ 215 struct cifsSesInfo *ses; /* pointer to session associated with */
212 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource (in ASCII not UTF) */ 216 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
213 char *nativeFileSystem; 217 char *nativeFileSystem;
214 __u16 tid; /* The 2 byte tree id */ 218 __u16 tid; /* The 2 byte tree id */
215 __u16 Flags; /* optional support bits */ 219 __u16 Flags; /* optional support bits */
216 enum statusEnum tidStatus; 220 enum statusEnum tidStatus;
217 atomic_t useCount; /* how many mounts (explicit or implicit) to this share */ 221 atomic_t useCount; /* how many explicit/implicit mounts to share */
218#ifdef CONFIG_CIFS_STATS 222#ifdef CONFIG_CIFS_STATS
219 atomic_t num_smbs_sent; 223 atomic_t num_smbs_sent;
220 atomic_t num_writes; 224 atomic_t num_writes;
@@ -254,7 +258,7 @@ struct cifsTconInfo {
254 spinlock_t stat_lock; 258 spinlock_t stat_lock;
255#endif /* CONFIG_CIFS_STATS */ 259#endif /* CONFIG_CIFS_STATS */
256 FILE_SYSTEM_DEVICE_INFO fsDevInfo; 260 FILE_SYSTEM_DEVICE_INFO fsDevInfo;
257 FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if file system name truncated */ 261 FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
258 FILE_SYSTEM_UNIX_INFO fsUnixInfo; 262 FILE_SYSTEM_UNIX_INFO fsUnixInfo;
259 unsigned retry:1; 263 unsigned retry:1;
260 unsigned nocase:1; 264 unsigned nocase:1;
@@ -305,7 +309,6 @@ struct cifsFileInfo {
305 atomic_t wrtPending; /* handle in use - defer close */ 309 atomic_t wrtPending; /* handle in use - defer close */
306 struct semaphore fh_sem; /* prevents reopen race after dead ses*/ 310 struct semaphore fh_sem; /* prevents reopen race after dead ses*/
307 char * search_resume_name; /* BB removeme BB */ 311 char * search_resume_name; /* BB removeme BB */
308 unsigned int resume_name_length; /* BB removeme - field renamed and moved BB */
309 struct cifs_search_info srch_inf; 312 struct cifs_search_info srch_inf;
310}; 313};
311 314
@@ -391,9 +394,9 @@ struct mid_q_entry {
391 struct smb_hdr *resp_buf; /* response buffer */ 394 struct smb_hdr *resp_buf; /* response buffer */
392 int midState; /* wish this were enum but can not pass to wait_event */ 395 int midState; /* wish this were enum but can not pass to wait_event */
393 __u8 command; /* smb command code */ 396 __u8 command; /* smb command code */
394 unsigned multiPart:1; /* multiple responses to one SMB request */
395 unsigned largeBuf:1; /* if valid response, is pointer to large buf */ 397 unsigned largeBuf:1; /* if valid response, is pointer to large buf */
396 unsigned multiResp:1; /* multiple trans2 responses for one request */ 398 unsigned multiRsp:1; /* multiple trans2 responses for one request */
399 unsigned multiEnd:1; /* both received */
397}; 400};
398 401
399struct oplock_q_entry { 402struct oplock_q_entry {
@@ -430,15 +433,35 @@ struct dir_notify_req {
430#define CIFS_LARGE_BUFFER 2 433#define CIFS_LARGE_BUFFER 2
431#define CIFS_IOVEC 4 /* array of response buffers */ 434#define CIFS_IOVEC 4 /* array of response buffers */
432 435
433/* Type of session setup needed */ 436/* Security Flags: indicate type of session setup needed */
434#define CIFS_PLAINTEXT 0 437#define CIFSSEC_MAY_SIGN 0x00001
435#define CIFS_LANMAN 1 438#define CIFSSEC_MAY_NTLM 0x00002
436#define CIFS_NTLM 2 439#define CIFSSEC_MAY_NTLMV2 0x00004
437#define CIFS_NTLMSSP_NEG 3 440#define CIFSSEC_MAY_KRB5 0x00008
438#define CIFS_NTLMSSP_AUTH 4 441#ifdef CONFIG_CIFS_WEAK_PW_HASH
439#define CIFS_SPNEGO_INIT 5 442#define CIFSSEC_MAY_LANMAN 0x00010
440#define CIFS_SPNEGO_TARG 6 443#define CIFSSEC_MAY_PLNTXT 0x00020
441 444#endif /* weak passwords */
445#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */
446
447#define CIFSSEC_MUST_SIGN 0x01001
448/* note that only one of the following can be set so the
449result of setting MUST flags more than once will be to
450require use of the stronger protocol */
451#define CIFSSEC_MUST_NTLM 0x02002
452#define CIFSSEC_MUST_NTLMV2 0x04004
453#define CIFSSEC_MUST_KRB5 0x08008
454#ifdef CONFIG_CIFS_WEAK_PW_HASH
455#define CIFSSEC_MUST_LANMAN 0x10010
456#define CIFSSEC_MUST_PLNTXT 0x20020
457#define CIFSSEC_MASK 0x37037 /* current flags supported if weak */
458#else
459#define CIFSSEC_MASK 0x07007 /* flags supported if no weak config */
460#endif /* WEAK_PW_HASH */
461#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
462
463#define CIFSSEC_DEF CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2
464#define CIFSSEC_MAX CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2
442/* 465/*
443 ***************************************************************** 466 *****************************************************************
444 * All constants go here 467 * All constants go here
@@ -500,16 +523,16 @@ GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; /* protects list inserts on 3 above */
500GLOBAL_EXTERN struct list_head GlobalOplock_Q; 523GLOBAL_EXTERN struct list_head GlobalOplock_Q;
501 524
502GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; /* Outstanding dir notify requests */ 525GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; /* Outstanding dir notify requests */
503GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q; /* Dir notify response queue */ 526GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;/* DirNotify response queue */
504 527
505/* 528/*
506 * Global transaction id (XID) information 529 * Global transaction id (XID) information
507 */ 530 */
508GLOBAL_EXTERN unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */ 531GLOBAL_EXTERN unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */
509GLOBAL_EXTERN unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */ 532GLOBAL_EXTERN unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */
510GLOBAL_EXTERN unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ 533GLOBAL_EXTERN unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */
511GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above and list operations */ 534GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above & list operations */
512 /* on midQ entries */ 535 /* on midQ entries */
513GLOBAL_EXTERN char Local_System_Name[15]; 536GLOBAL_EXTERN char Local_System_Name[15];
514 537
515/* 538/*
@@ -531,7 +554,7 @@ GLOBAL_EXTERN atomic_t smBufAllocCount;
531GLOBAL_EXTERN atomic_t midCount; 554GLOBAL_EXTERN atomic_t midCount;
532 555
533/* Misc globals */ 556/* Misc globals */
534GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions 557GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions
535 to be established on existing mount if we 558 to be established on existing mount if we
536 have the uid/password or Kerberos credential 559 have the uid/password or Kerberos credential
537 or equivalent for current user */ 560 or equivalent for current user */
@@ -540,8 +563,8 @@ GLOBAL_EXTERN unsigned int experimEnabled;
540GLOBAL_EXTERN unsigned int lookupCacheEnabled; 563GLOBAL_EXTERN unsigned int lookupCacheEnabled;
541GLOBAL_EXTERN unsigned int extended_security; /* if on, session setup sent 564GLOBAL_EXTERN unsigned int extended_security; /* if on, session setup sent
542 with more secure ntlmssp2 challenge/resp */ 565 with more secure ntlmssp2 challenge/resp */
543GLOBAL_EXTERN unsigned int ntlmv2_support; /* better optional password hash */
544GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */ 566GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */
567GLOBAL_EXTERN unsigned int secFlags;
545GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/ 568GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
546GLOBAL_EXTERN unsigned int CIFSMaxBufSize; /* max size not including hdr */ 569GLOBAL_EXTERN unsigned int CIFSMaxBufSize; /* max size not including hdr */
547GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ 570GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b2233ac05bd2..86239023545b 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -16,7 +16,7 @@
16 * 16 *
17 * You should have received a copy of the GNU Lesser General Public License 17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software 18 * along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21 21
22#ifndef _CIFSPDU_H 22#ifndef _CIFSPDU_H
@@ -24,8 +24,14 @@
24 24
25#include <net/sock.h> 25#include <net/sock.h>
26 26
27#ifdef CONFIG_CIFS_WEAK_PW_HASH
28#define LANMAN_PROT 0
29#define CIFS_PROT 1
30#else
27#define CIFS_PROT 0 31#define CIFS_PROT 0
28#define BAD_PROT CIFS_PROT+1 32#endif
33#define POSIX_PROT CIFS_PROT+1
34#define BAD_PROT 0xFFFF
29 35
30/* SMB command codes */ 36/* SMB command codes */
31/* Some commands have minimal (wct=0,bcc=0), or uninteresting, responses 37/* Some commands have minimal (wct=0,bcc=0), or uninteresting, responses
@@ -110,7 +116,7 @@
110/* 116/*
111 * Size of the session key (crypto key encrypted with the password 117 * Size of the session key (crypto key encrypted with the password
112 */ 118 */
113#define CIFS_SESSION_KEY_SIZE (24) 119#define CIFS_SESS_KEY_SIZE (24)
114 120
115/* 121/*
116 * Maximum user name length 122 * Maximum user name length
@@ -400,6 +406,29 @@ typedef struct negotiate_req {
400 unsigned char DialectsArray[1]; 406 unsigned char DialectsArray[1];
401} __attribute__((packed)) NEGOTIATE_REQ; 407} __attribute__((packed)) NEGOTIATE_REQ;
402 408
409/* Dialect index is 13 for LANMAN */
410
411typedef struct lanman_neg_rsp {
412 struct smb_hdr hdr; /* wct = 13 */
413 __le16 DialectIndex;
414 __le16 SecurityMode;
415 __le16 MaxBufSize;
416 __le16 MaxMpxCount;
417 __le16 MaxNumberVcs;
418 __le16 RawMode;
419 __le32 SessionKey;
420 __le32 ServerTime;
421 __le16 ServerTimeZone;
422 __le16 EncryptionKeyLength;
423 __le16 Reserved;
424 __u16 ByteCount;
425 unsigned char EncryptionKey[1];
426} __attribute__((packed)) LANMAN_NEG_RSP;
427
428#define READ_RAW_ENABLE 1
429#define WRITE_RAW_ENABLE 2
430#define RAW_ENABLE (READ_RAW_ENABLE | WRITE_RAW_ENABLE)
431
403typedef struct negotiate_rsp { 432typedef struct negotiate_rsp {
404 struct smb_hdr hdr; /* wct = 17 */ 433 struct smb_hdr hdr; /* wct = 17 */
405 __le16 DialectIndex; 434 __le16 DialectIndex;
@@ -509,7 +538,7 @@ typedef union smb_com_session_setup_andx {
509/* unsigned char * NativeOS; */ 538/* unsigned char * NativeOS; */
510/* unsigned char * NativeLanMan; */ 539/* unsigned char * NativeLanMan; */
511/* unsigned char * PrimaryDomain; */ 540/* unsigned char * PrimaryDomain; */
512 } __attribute__((packed)) resp; /* NTLM response format (with or without extended security */ 541 } __attribute__((packed)) resp; /* NTLM response with or without extended sec*/
513 542
514 struct { /* request format */ 543 struct { /* request format */
515 struct smb_hdr hdr; /* wct = 10 */ 544 struct smb_hdr hdr; /* wct = 10 */
@@ -520,8 +549,8 @@ typedef union smb_com_session_setup_andx {
520 __le16 MaxMpxCount; 549 __le16 MaxMpxCount;
521 __le16 VcNumber; 550 __le16 VcNumber;
522 __u32 SessionKey; 551 __u32 SessionKey;
523 __le16 PassswordLength; 552 __le16 PasswordLength;
524 __u32 Reserved; 553 __u32 Reserved; /* encrypt key len and offset */
525 __le16 ByteCount; 554 __le16 ByteCount;
526 unsigned char AccountPassword[1]; /* followed by */ 555 unsigned char AccountPassword[1]; /* followed by */
527 /* STRING AccountName */ 556 /* STRING AccountName */
@@ -543,6 +572,26 @@ typedef union smb_com_session_setup_andx {
543 } __attribute__((packed)) old_resp; /* pre-NTLM (LANMAN2.1) response */ 572 } __attribute__((packed)) old_resp; /* pre-NTLM (LANMAN2.1) response */
544} __attribute__((packed)) SESSION_SETUP_ANDX; 573} __attribute__((packed)) SESSION_SETUP_ANDX;
545 574
575/* format of NLTMv2 Response ie "case sensitive password" hash when NTLMv2 */
576
577struct ntlmssp2_name {
578 __le16 type;
579 __le16 length;
580/* char name[length]; */
581} __attribute__((packed));
582
583struct ntlmv2_resp {
584 char ntlmv2_hash[CIFS_ENCPWD_SIZE];
585 __le32 blob_signature;
586 __u32 reserved;
587 __le64 time;
588 __u64 client_chal; /* random */
589 __u32 reserved2;
590 struct ntlmssp2_name names[1];
591 /* array of name entries could follow ending in minimum 4 byte struct */
592} __attribute__((packed));
593
594
546#define CIFS_NETWORK_OPSYS "CIFS VFS Client for Linux" 595#define CIFS_NETWORK_OPSYS "CIFS VFS Client for Linux"
547 596
548/* Capabilities bits (for NTLM SessSetup request) */ 597/* Capabilities bits (for NTLM SessSetup request) */
@@ -573,7 +622,9 @@ typedef struct smb_com_tconx_req {
573} __attribute__((packed)) TCONX_REQ; 622} __attribute__((packed)) TCONX_REQ;
574 623
575typedef struct smb_com_tconx_rsp { 624typedef struct smb_com_tconx_rsp {
576 struct smb_hdr hdr; /* wct = 3 *//* note that Win2000 has sent wct=7 in some cases on responses. Four unspecified words followed OptionalSupport */ 625 struct smb_hdr hdr; /* wct = 3 note that Win2000 has sent wct = 7
626 in some cases on responses. Four unspecified
627 words followed OptionalSupport */
577 __u8 AndXCommand; 628 __u8 AndXCommand;
578 __u8 AndXReserved; 629 __u8 AndXReserved;
579 __le16 AndXOffset; 630 __le16 AndXOffset;
@@ -1323,6 +1374,9 @@ struct smb_t2_rsp {
1323#define SMB_FILE_MAXIMUM_INFO 0x40d 1374#define SMB_FILE_MAXIMUM_INFO 0x40d
1324 1375
1325/* Find File infolevels */ 1376/* Find File infolevels */
1377#define SMB_FIND_FILE_INFO_STANDARD 0x001
1378#define SMB_FIND_FILE_QUERY_EA_SIZE 0x002
1379#define SMB_FIND_FILE_QUERY_EAS_FROM_LIST 0x003
1326#define SMB_FIND_FILE_DIRECTORY_INFO 0x101 1380#define SMB_FIND_FILE_DIRECTORY_INFO 0x101
1327#define SMB_FIND_FILE_FULL_DIRECTORY_INFO 0x102 1381#define SMB_FIND_FILE_FULL_DIRECTORY_INFO 0x102
1328#define SMB_FIND_FILE_NAMES_INFO 0x103 1382#define SMB_FIND_FILE_NAMES_INFO 0x103
@@ -1844,13 +1898,13 @@ typedef struct {
1844typedef struct { 1898typedef struct {
1845 __le32 DeviceType; 1899 __le32 DeviceType;
1846 __le32 DeviceCharacteristics; 1900 __le32 DeviceCharacteristics;
1847} __attribute__((packed)) FILE_SYSTEM_DEVICE_INFO; /* device info, level 0x104 */ 1901} __attribute__((packed)) FILE_SYSTEM_DEVICE_INFO; /* device info level 0x104 */
1848 1902
1849typedef struct { 1903typedef struct {
1850 __le32 Attributes; 1904 __le32 Attributes;
1851 __le32 MaxPathNameComponentLength; 1905 __le32 MaxPathNameComponentLength;
1852 __le32 FileSystemNameLen; 1906 __le32 FileSystemNameLen;
1853 char FileSystemName[52]; /* do not really need to save this - so potentially get only subset of name */ 1907 char FileSystemName[52]; /* do not have to save this - get subset? */
1854} __attribute__((packed)) FILE_SYSTEM_ATTRIBUTE_INFO; 1908} __attribute__((packed)) FILE_SYSTEM_ATTRIBUTE_INFO;
1855 1909
1856/******************************************************************************/ 1910/******************************************************************************/
@@ -1947,7 +2001,8 @@ typedef struct {
1947 2001
1948struct file_allocation_info { 2002struct file_allocation_info {
1949 __le64 AllocationSize; /* Note old Samba srvr rounds this up too much */ 2003 __le64 AllocationSize; /* Note old Samba srvr rounds this up too much */
1950} __attribute__((packed)); /* size used on disk, level 0x103 for set, 0x105 for query */ 2004} __attribute__((packed)); /* size used on disk, for level 0x103 for set,
2005 0x105 for query */
1951 2006
1952struct file_end_of_file_info { 2007struct file_end_of_file_info {
1953 __le64 FileSize; /* offset to end of file */ 2008 __le64 FileSize; /* offset to end of file */
@@ -2054,7 +2109,7 @@ typedef struct {
2054 __le32 ExtFileAttributes; 2109 __le32 ExtFileAttributes;
2055 __le32 FileNameLength; 2110 __le32 FileNameLength;
2056 char FileName[1]; 2111 char FileName[1];
2057} __attribute__((packed)) FILE_DIRECTORY_INFO; /* level 0x101 FF response data area */ 2112} __attribute__((packed)) FILE_DIRECTORY_INFO; /* level 0x101 FF resp data */
2058 2113
2059typedef struct { 2114typedef struct {
2060 __le32 NextEntryOffset; 2115 __le32 NextEntryOffset;
@@ -2069,7 +2124,7 @@ typedef struct {
2069 __le32 FileNameLength; 2124 __le32 FileNameLength;
2070 __le32 EaSize; /* length of the xattrs */ 2125 __le32 EaSize; /* length of the xattrs */
2071 char FileName[1]; 2126 char FileName[1];
2072} __attribute__((packed)) FILE_FULL_DIRECTORY_INFO; /* level 0x102 FF response data area */ 2127} __attribute__((packed)) FILE_FULL_DIRECTORY_INFO; /* level 0x102 rsp data */
2073 2128
2074typedef struct { 2129typedef struct {
2075 __le32 NextEntryOffset; 2130 __le32 NextEntryOffset;
@@ -2086,7 +2141,7 @@ typedef struct {
2086 __le32 Reserved; 2141 __le32 Reserved;
2087 __u64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/ 2142 __u64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
2088 char FileName[1]; 2143 char FileName[1];
2089} __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF response data area */ 2144} __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF rsp data */
2090 2145
2091typedef struct { 2146typedef struct {
2092 __le32 NextEntryOffset; 2147 __le32 NextEntryOffset;
@@ -2104,7 +2159,22 @@ typedef struct {
2104 __u8 Reserved; 2159 __u8 Reserved;
2105 __u8 ShortName[12]; 2160 __u8 ShortName[12];
2106 char FileName[1]; 2161 char FileName[1];
2107} __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO; /* level 0x104 FF response data area */ 2162} __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO; /* level 0x104 FFrsp data */
2163
2164typedef struct {
2165 __u32 ResumeKey;
2166 __le16 CreationDate; /* SMB Date */
2167 __le16 CreationTime; /* SMB Time */
2168 __le16 LastAccessDate;
2169 __le16 LastAccessTime;
2170 __le16 LastWriteDate;
2171 __le16 LastWriteTime;
2172 __le32 DataSize; /* File Size (EOF) */
2173 __le32 AllocationSize;
2174 __le16 Attributes; /* verify not u32 */
2175 __u8 FileNameLength;
2176 char FileName[1];
2177} __attribute__((packed)) FIND_FILE_STANDARD_INFO; /* level 0x1 FF resp data */
2108 2178
2109 2179
2110struct win_dev { 2180struct win_dev {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 310ea2f0e0bf..a5ddc62d6fe6 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -64,14 +64,12 @@ extern int map_smb_to_linux_error(struct smb_hdr *smb);
64extern void header_assemble(struct smb_hdr *, char /* command */ , 64extern void header_assemble(struct smb_hdr *, char /* command */ ,
65 const struct cifsTconInfo *, int /* length of 65 const struct cifsTconInfo *, int /* length of
66 fixed section (word count) in two byte units */); 66 fixed section (word count) in two byte units */);
67#ifdef CONFIG_CIFS_EXPERIMENTAL
68extern int small_smb_init_no_tc(const int smb_cmd, const int wct, 67extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
69 struct cifsSesInfo *ses, 68 struct cifsSesInfo *ses,
70 void ** request_buf); 69 void ** request_buf);
71extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, 70extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
72 const int stage, int * pNTLMv2_flg, 71 const int stage,
73 const struct nls_table *nls_cp); 72 const struct nls_table *nls_cp);
74#endif
75extern __u16 GetNextMid(struct TCP_Server_Info *server); 73extern __u16 GetNextMid(struct TCP_Server_Info *server);
76extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16, 74extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16,
77 struct cifsTconInfo *); 75 struct cifsTconInfo *);
@@ -285,8 +283,14 @@ extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
285extern int cifs_verify_signature(struct smb_hdr *, const char * mac_key, 283extern int cifs_verify_signature(struct smb_hdr *, const char * mac_key,
286 __u32 expected_sequence_number); 284 __u32 expected_sequence_number);
287extern int cifs_calculate_mac_key(char * key,const char * rn,const char * pass); 285extern int cifs_calculate_mac_key(char * key,const char * rn,const char * pass);
288extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, struct nls_table *); 286extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
289extern void CalcNTLMv2_response(const struct cifsSesInfo *,char * ); 287 const struct nls_table *);
288extern void CalcNTLMv2_response(const struct cifsSesInfo *, char * );
289extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
290 const struct nls_table *);
291#ifdef CONFIG_CIFS_WEAK_PW_HASH
292extern void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key);
293#endif /* CIFS_WEAK_PW_HASH */
290extern int CIFSSMBCopy(int xid, 294extern int CIFSSMBCopy(int xid,
291 struct cifsTconInfo *source_tcon, 295 struct cifsTconInfo *source_tcon,
292 const char *fromName, 296 const char *fromName,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 925881e00ff2..19678c575dfc 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -44,8 +44,11 @@ static struct {
44 int index; 44 int index;
45 char *name; 45 char *name;
46} protocols[] = { 46} protocols[] = {
47#ifdef CONFIG_CIFS_WEAK_PW_HASH
48 {LANMAN_PROT, "\2LM1.2X002"},
49#endif /* weak password hashing for legacy clients */
47 {CIFS_PROT, "\2NT LM 0.12"}, 50 {CIFS_PROT, "\2NT LM 0.12"},
48 {CIFS_PROT, "\2POSIX 2"}, 51 {POSIX_PROT, "\2POSIX 2"},
49 {BAD_PROT, "\2"} 52 {BAD_PROT, "\2"}
50}; 53};
51#else 54#else
@@ -53,11 +56,29 @@ static struct {
53 int index; 56 int index;
54 char *name; 57 char *name;
55} protocols[] = { 58} protocols[] = {
59#ifdef CONFIG_CIFS_WEAK_PW_HASH
60 {LANMAN_PROT, "\2LM1.2X002"},
61#endif /* weak password hashing for legacy clients */
56 {CIFS_PROT, "\2NT LM 0.12"}, 62 {CIFS_PROT, "\2NT LM 0.12"},
57 {BAD_PROT, "\2"} 63 {BAD_PROT, "\2"}
58}; 64};
59#endif 65#endif
60 66
67/* define the number of elements in the cifs dialect array */
68#ifdef CONFIG_CIFS_POSIX
69#ifdef CONFIG_CIFS_WEAK_PW_HASH
70#define CIFS_NUM_PROT 3
71#else
72#define CIFS_NUM_PROT 2
73#endif /* CIFS_WEAK_PW_HASH */
74#else /* not posix */
75#ifdef CONFIG_CIFS_WEAK_PW_HASH
76#define CIFS_NUM_PROT 2
77#else
78#define CIFS_NUM_PROT 1
79#endif /* CONFIG_CIFS_WEAK_PW_HASH */
80#endif /* CIFS_POSIX */
81
61 82
62/* Mark as invalid, all open files on tree connections since they 83/* Mark as invalid, all open files on tree connections since they
63 were closed when session to server was lost */ 84 were closed when session to server was lost */
@@ -188,7 +209,6 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
188 return rc; 209 return rc;
189} 210}
190 211
191#ifdef CONFIG_CIFS_EXPERIMENTAL
192int 212int
193small_smb_init_no_tc(const int smb_command, const int wct, 213small_smb_init_no_tc(const int smb_command, const int wct,
194 struct cifsSesInfo *ses, void **request_buf) 214 struct cifsSesInfo *ses, void **request_buf)
@@ -214,7 +234,6 @@ small_smb_init_no_tc(const int smb_command, const int wct,
214 234
215 return rc; 235 return rc;
216} 236}
217#endif /* CONFIG_CIFS_EXPERIMENTAL */
218 237
219/* If the return code is zero, this function must fill in request_buf pointer */ 238/* If the return code is zero, this function must fill in request_buf pointer */
220static int 239static int
@@ -322,7 +341,8 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
322 /* potential retries of smb operations it turns out we can determine */ 341 /* potential retries of smb operations it turns out we can determine */
323 /* from the mid flags when the request buffer can be resent without */ 342 /* from the mid flags when the request buffer can be resent without */
324 /* having to use a second distinct buffer for the response */ 343 /* having to use a second distinct buffer for the response */
325 *response_buf = *request_buf; 344 if(response_buf)
345 *response_buf = *request_buf;
326 346
327 header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon, 347 header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon,
328 wct /*wct */ ); 348 wct /*wct */ );
@@ -373,8 +393,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
373 NEGOTIATE_RSP *pSMBr; 393 NEGOTIATE_RSP *pSMBr;
374 int rc = 0; 394 int rc = 0;
375 int bytes_returned; 395 int bytes_returned;
396 int i;
376 struct TCP_Server_Info * server; 397 struct TCP_Server_Info * server;
377 u16 count; 398 u16 count;
399 unsigned int secFlags;
378 400
379 if(ses->server) 401 if(ses->server)
380 server = ses->server; 402 server = ses->server;
@@ -386,101 +408,200 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
386 (void **) &pSMB, (void **) &pSMBr); 408 (void **) &pSMB, (void **) &pSMBr);
387 if (rc) 409 if (rc)
388 return rc; 410 return rc;
411
412 /* if any of auth flags (ie not sign or seal) are overriden use them */
413 if(ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
414 secFlags = ses->overrideSecFlg;
415 else /* if override flags set only sign/seal OR them with global auth */
416 secFlags = extended_security | ses->overrideSecFlg;
417
418 cFYI(1,("secFlags 0x%x",secFlags));
419
389 pSMB->hdr.Mid = GetNextMid(server); 420 pSMB->hdr.Mid = GetNextMid(server);
390 pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; 421 pSMB->hdr.Flags2 |= SMBFLG2_UNICODE;
391 if (extended_security) 422 if((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
392 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 423 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
393 424
394 count = strlen(protocols[0].name) + 1; 425 count = 0;
395 strncpy(pSMB->DialectsArray, protocols[0].name, 30); 426 for(i=0;i<CIFS_NUM_PROT;i++) {
396 /* null guaranteed to be at end of source and target buffers anyway */ 427 strncpy(pSMB->DialectsArray+count, protocols[i].name, 16);
397 428 count += strlen(protocols[i].name) + 1;
429 /* null at end of source and target buffers anyway */
430 }
398 pSMB->hdr.smb_buf_length += count; 431 pSMB->hdr.smb_buf_length += count;
399 pSMB->ByteCount = cpu_to_le16(count); 432 pSMB->ByteCount = cpu_to_le16(count);
400 433
401 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 434 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
402 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 435 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
403 if (rc == 0) { 436 if (rc != 0)
404 server->secMode = pSMBr->SecurityMode; 437 goto neg_err_exit;
405 if((server->secMode & SECMODE_USER) == 0) 438
406 cFYI(1,("share mode security")); 439 cFYI(1,("Dialect: %d", pSMBr->DialectIndex));
407 server->secType = NTLM; /* BB override default for 440 /* Check wct = 1 error case */
408 NTLMv2 or kerberos v5 */ 441 if((pSMBr->hdr.WordCount < 13) || (pSMBr->DialectIndex == BAD_PROT)) {
409 /* one byte - no need to convert this or EncryptionKeyLen 442 /* core returns wct = 1, but we do not ask for core - otherwise
410 from little endian */ 443 small wct just comes when dialect index is -1 indicating we
411 server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount); 444 could not negotiate a common dialect */
412 /* probably no need to store and check maxvcs */ 445 rc = -EOPNOTSUPP;
413 server->maxBuf = 446 goto neg_err_exit;
414 min(le32_to_cpu(pSMBr->MaxBufferSize), 447#ifdef CONFIG_CIFS_WEAK_PW_HASH
448 } else if((pSMBr->hdr.WordCount == 13)
449 && (pSMBr->DialectIndex == LANMAN_PROT)) {
450 struct lanman_neg_rsp * rsp = (struct lanman_neg_rsp *)pSMBr;
451
452 if((secFlags & CIFSSEC_MAY_LANMAN) ||
453 (secFlags & CIFSSEC_MAY_PLNTXT))
454 server->secType = LANMAN;
455 else {
456 cERROR(1, ("mount failed weak security disabled"
457 " in /proc/fs/cifs/SecurityFlags"));
458 rc = -EOPNOTSUPP;
459 goto neg_err_exit;
460 }
461 server->secMode = (__u8)le16_to_cpu(rsp->SecurityMode);
462 server->maxReq = le16_to_cpu(rsp->MaxMpxCount);
463 server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize),
464 (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
465 GETU32(server->sessid) = le32_to_cpu(rsp->SessionKey);
466 /* even though we do not use raw we might as well set this
467 accurately, in case we ever find a need for it */
468 if((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
469 server->maxRw = 0xFF00;
470 server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE;
471 } else {
472 server->maxRw = 0;/* we do not need to use raw anyway */
473 server->capabilities = CAP_MPX_MODE;
474 }
475 server->timeZone = le16_to_cpu(rsp->ServerTimeZone);
476
477 /* BB get server time for time conversions and add
478 code to use it and timezone since this is not UTC */
479
480 if (rsp->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
481 memcpy(server->cryptKey, rsp->EncryptionKey,
482 CIFS_CRYPTO_KEY_SIZE);
483 } else if (server->secMode & SECMODE_PW_ENCRYPT) {
484 rc = -EIO; /* need cryptkey unless plain text */
485 goto neg_err_exit;
486 }
487
488 cFYI(1,("LANMAN negotiated"));
489 /* we will not end up setting signing flags - as no signing
490 was in LANMAN and server did not return the flags on */
491 goto signing_check;
492#else /* weak security disabled */
493 } else if(pSMBr->hdr.WordCount == 13) {
494 cERROR(1,("mount failed, cifs module not built "
495 "with CIFS_WEAK_PW_HASH support"));
496 rc = -EOPNOTSUPP;
497#endif /* WEAK_PW_HASH */
498 goto neg_err_exit;
499 } else if(pSMBr->hdr.WordCount != 17) {
500 /* unknown wct */
501 rc = -EOPNOTSUPP;
502 goto neg_err_exit;
503 }
504 /* else wct == 17 NTLM */
505 server->secMode = pSMBr->SecurityMode;
506 if((server->secMode & SECMODE_USER) == 0)
507 cFYI(1,("share mode security"));
508
509 if((server->secMode & SECMODE_PW_ENCRYPT) == 0)
510#ifdef CONFIG_CIFS_WEAK_PW_HASH
511 if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0)
512#endif /* CIFS_WEAK_PW_HASH */
513 cERROR(1,("Server requests plain text password"
514 " but client support disabled"));
515
516 if((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2)
517 server->secType = NTLMv2;
518 else if(secFlags & CIFSSEC_MAY_NTLM)
519 server->secType = NTLM;
520 else if(secFlags & CIFSSEC_MAY_NTLMV2)
521 server->secType = NTLMv2;
522 /* else krb5 ... any others ... */
523
524 /* one byte, so no need to convert this or EncryptionKeyLen from
525 little endian */
526 server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount);
527 /* probably no need to store and check maxvcs */
528 server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize),
415 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); 529 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
416 server->maxRw = le32_to_cpu(pSMBr->MaxRawSize); 530 server->maxRw = le32_to_cpu(pSMBr->MaxRawSize);
417 cFYI(0, ("Max buf = %d", ses->server->maxBuf)); 531 cFYI(0, ("Max buf = %d", ses->server->maxBuf));
418 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey); 532 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
419 server->capabilities = le32_to_cpu(pSMBr->Capabilities); 533 server->capabilities = le32_to_cpu(pSMBr->Capabilities);
420 server->timeZone = le16_to_cpu(pSMBr->ServerTimeZone); 534 server->timeZone = le16_to_cpu(pSMBr->ServerTimeZone);
421 /* BB with UTC do we ever need to be using srvr timezone? */ 535 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
422 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { 536 memcpy(server->cryptKey, pSMBr->u.EncryptionKey,
423 memcpy(server->cryptKey, pSMBr->u.EncryptionKey, 537 CIFS_CRYPTO_KEY_SIZE);
424 CIFS_CRYPTO_KEY_SIZE); 538 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC)
425 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) 539 && (pSMBr->EncryptionKeyLength == 0)) {
426 && (pSMBr->EncryptionKeyLength == 0)) { 540 /* decode security blob */
427 /* decode security blob */ 541 } else if (server->secMode & SECMODE_PW_ENCRYPT) {
428 } else 542 rc = -EIO; /* no crypt key only if plain text pwd */
429 rc = -EIO; 543 goto neg_err_exit;
544 }
430 545
431 /* BB might be helpful to save off the domain of server here */ 546 /* BB might be helpful to save off the domain of server here */
432 547
433 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) && 548 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
434 (server->capabilities & CAP_EXTENDED_SECURITY)) { 549 (server->capabilities & CAP_EXTENDED_SECURITY)) {
435 count = pSMBr->ByteCount; 550 count = pSMBr->ByteCount;
436 if (count < 16) 551 if (count < 16)
437 rc = -EIO; 552 rc = -EIO;
438 else if (count == 16) { 553 else if (count == 16) {
439 server->secType = RawNTLMSSP; 554 server->secType = RawNTLMSSP;
440 if (server->socketUseCount.counter > 1) { 555 if (server->socketUseCount.counter > 1) {
441 if (memcmp 556 if (memcmp(server->server_GUID,
442 (server->server_GUID, 557 pSMBr->u.extended_response.
443 pSMBr->u.extended_response. 558 GUID, 16) != 0) {
444 GUID, 16) != 0) { 559 cFYI(1, ("server UID changed"));
445 cFYI(1, ("server UID changed"));
446 memcpy(server->
447 server_GUID,
448 pSMBr->u.
449 extended_response.
450 GUID, 16);
451 }
452 } else
453 memcpy(server->server_GUID, 560 memcpy(server->server_GUID,
454 pSMBr->u.extended_response. 561 pSMBr->u.extended_response.GUID,
455 GUID, 16); 562 16);
456 } else {
457 rc = decode_negTokenInit(pSMBr->u.
458 extended_response.
459 SecurityBlob,
460 count - 16,
461 &server->secType);
462 if(rc == 1) {
463 /* BB Need to fill struct for sessetup here */
464 rc = -EOPNOTSUPP;
465 } else {
466 rc = -EINVAL;
467 } 563 }
564 } else
565 memcpy(server->server_GUID,
566 pSMBr->u.extended_response.GUID, 16);
567 } else {
568 rc = decode_negTokenInit(pSMBr->u.extended_response.
569 SecurityBlob,
570 count - 16,
571 &server->secType);
572 if(rc == 1) {
573 /* BB Need to fill struct for sessetup here */
574 rc = -EOPNOTSUPP;
575 } else {
576 rc = -EINVAL;
468 } 577 }
469 } else
470 server->capabilities &= ~CAP_EXTENDED_SECURITY;
471 if(sign_CIFS_PDUs == FALSE) {
472 if(server->secMode & SECMODE_SIGN_REQUIRED)
473 cERROR(1,
474 ("Server requires /proc/fs/cifs/PacketSigningEnabled"));
475 server->secMode &= ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
476 } else if(sign_CIFS_PDUs == 1) {
477 if((server->secMode & SECMODE_SIGN_REQUIRED) == 0)
478 server->secMode &= ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
479 } 578 }
480 579 } else
580 server->capabilities &= ~CAP_EXTENDED_SECURITY;
581
582#ifdef CONFIG_CIFS_WEAK_PW_HASH
583signing_check:
584#endif
585 if(sign_CIFS_PDUs == FALSE) {
586 if(server->secMode & SECMODE_SIGN_REQUIRED)
587 cERROR(1,("Server requires "
588 "/proc/fs/cifs/PacketSigningEnabled to be on"));
589 server->secMode &=
590 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
591 } else if(sign_CIFS_PDUs == 1) {
592 if((server->secMode & SECMODE_SIGN_REQUIRED) == 0)
593 server->secMode &=
594 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
595 } else if(sign_CIFS_PDUs == 2) {
596 if((server->secMode &
597 (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) {
598 cERROR(1,("signing required but server lacks support"));
599 }
481 } 600 }
482 601neg_err_exit:
483 cifs_buf_release(pSMB); 602 cifs_buf_release(pSMB);
603
604 cFYI(1,("negprot rc %d",rc));
484 return rc; 605 return rc;
485} 606}
486 607
@@ -2239,7 +2360,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2239 } 2360 }
2240 symlinkinfo[buflen] = 0; /* just in case so the caller 2361 symlinkinfo[buflen] = 0; /* just in case so the caller
2241 does not go off the end of the buffer */ 2362 does not go off the end of the buffer */
2242 cFYI(1,("readlink result - %s ",symlinkinfo)); 2363 cFYI(1,("readlink result - %s",symlinkinfo));
2243 } 2364 }
2244 } 2365 }
2245qreparse_out: 2366qreparse_out:
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index bae1479318d1..876eb9ef85fe 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -49,8 +49,6 @@
49 49
50static DECLARE_COMPLETION(cifsd_complete); 50static DECLARE_COMPLETION(cifsd_complete);
51 51
52extern void SMBencrypt(unsigned char *passwd, unsigned char *c8,
53 unsigned char *p24);
54extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, 52extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
55 unsigned char *p24); 53 unsigned char *p24);
56 54
@@ -70,6 +68,7 @@ struct smb_vol {
70 gid_t linux_gid; 68 gid_t linux_gid;
71 mode_t file_mode; 69 mode_t file_mode;
72 mode_t dir_mode; 70 mode_t dir_mode;
71 unsigned secFlg;
73 unsigned rw:1; 72 unsigned rw:1;
74 unsigned retry:1; 73 unsigned retry:1;
75 unsigned intr:1; 74 unsigned intr:1;
@@ -83,12 +82,7 @@ struct smb_vol {
83 unsigned remap:1; /* set to remap seven reserved chars in filenames */ 82 unsigned remap:1; /* set to remap seven reserved chars in filenames */
84 unsigned posix_paths:1; /* unset to not ask for posix pathnames. */ 83 unsigned posix_paths:1; /* unset to not ask for posix pathnames. */
85 unsigned sfu_emul:1; 84 unsigned sfu_emul:1;
86 unsigned krb5:1;
87 unsigned ntlm:1;
88 unsigned ntlmv2:1;
89 unsigned nullauth:1; /* attempt to authenticate with null user */ 85 unsigned nullauth:1; /* attempt to authenticate with null user */
90 unsigned sign:1;
91 unsigned seal:1; /* encrypt */
92 unsigned nocase; /* request case insensitive filenames */ 86 unsigned nocase; /* request case insensitive filenames */
93 unsigned nobrl; /* disable sending byte range locks to srv */ 87 unsigned nobrl; /* disable sending byte range locks to srv */
94 unsigned int rsize; 88 unsigned int rsize;
@@ -369,21 +363,21 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
369 continue; 363 continue;
370 if (bigbuf == NULL) { 364 if (bigbuf == NULL) {
371 bigbuf = cifs_buf_get(); 365 bigbuf = cifs_buf_get();
372 if(bigbuf == NULL) { 366 if (!bigbuf) {
373 cERROR(1,("No memory for large SMB response")); 367 cERROR(1, ("No memory for large SMB response"));
374 msleep(3000); 368 msleep(3000);
375 /* retry will check if exiting */ 369 /* retry will check if exiting */
376 continue; 370 continue;
377 } 371 }
378 } else if(isLargeBuf) { 372 } else if (isLargeBuf) {
379 /* we are reusing a dirtry large buf, clear its start */ 373 /* we are reusing a dirty large buf, clear its start */
380 memset(bigbuf, 0, sizeof (struct smb_hdr)); 374 memset(bigbuf, 0, sizeof (struct smb_hdr));
381 } 375 }
382 376
383 if (smallbuf == NULL) { 377 if (smallbuf == NULL) {
384 smallbuf = cifs_small_buf_get(); 378 smallbuf = cifs_small_buf_get();
385 if(smallbuf == NULL) { 379 if (!smallbuf) {
386 cERROR(1,("No memory for SMB response")); 380 cERROR(1, ("No memory for SMB response"));
387 msleep(1000); 381 msleep(1000);
388 /* retry will check if exiting */ 382 /* retry will check if exiting */
389 continue; 383 continue;
@@ -403,12 +397,12 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
403 kernel_recvmsg(csocket, &smb_msg, 397 kernel_recvmsg(csocket, &smb_msg,
404 &iov, 1, 4, 0 /* BB see socket.h flags */); 398 &iov, 1, 4, 0 /* BB see socket.h flags */);
405 399
406 if(server->tcpStatus == CifsExiting) { 400 if (server->tcpStatus == CifsExiting) {
407 break; 401 break;
408 } else if (server->tcpStatus == CifsNeedReconnect) { 402 } else if (server->tcpStatus == CifsNeedReconnect) {
409 cFYI(1,("Reconnect after server stopped responding")); 403 cFYI(1, ("Reconnect after server stopped responding"));
410 cifs_reconnect(server); 404 cifs_reconnect(server);
411 cFYI(1,("call to reconnect done")); 405 cFYI(1, ("call to reconnect done"));
412 csocket = server->ssocket; 406 csocket = server->ssocket;
413 continue; 407 continue;
414 } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) { 408 } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) {
@@ -417,15 +411,15 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
417 tcpStatus CifsNeedReconnect if server hung */ 411 tcpStatus CifsNeedReconnect if server hung */
418 continue; 412 continue;
419 } else if (length <= 0) { 413 } else if (length <= 0) {
420 if(server->tcpStatus == CifsNew) { 414 if (server->tcpStatus == CifsNew) {
421 cFYI(1,("tcp session abend after SMBnegprot")); 415 cFYI(1, ("tcp session abend after SMBnegprot"));
422 /* some servers kill the TCP session rather than 416 /* some servers kill the TCP session rather than
423 returning an SMB negprot error, in which 417 returning an SMB negprot error, in which
424 case reconnecting here is not going to help, 418 case reconnecting here is not going to help,
425 and so simply return error to mount */ 419 and so simply return error to mount */
426 break; 420 break;
427 } 421 }
428 if(length == -EINTR) { 422 if (!try_to_freeze() && (length == -EINTR)) {
429 cFYI(1,("cifsd thread killed")); 423 cFYI(1,("cifsd thread killed"));
430 break; 424 break;
431 } 425 }
@@ -585,9 +579,11 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
585 /* merge response - fix up 1st*/ 579 /* merge response - fix up 1st*/
586 if(coalesce_t2(smb_buffer, 580 if(coalesce_t2(smb_buffer,
587 mid_entry->resp_buf)) { 581 mid_entry->resp_buf)) {
582 mid_entry->multiRsp = 1;
588 break; 583 break;
589 } else { 584 } else {
590 /* all parts received */ 585 /* all parts received */
586 mid_entry->multiEnd = 1;
591 goto multi_t2_fnd; 587 goto multi_t2_fnd;
592 } 588 }
593 } else { 589 } else {
@@ -632,9 +628,14 @@ multi_t2_fnd:
632 wake_up_process(task_to_wake); 628 wake_up_process(task_to_wake);
633 } else if ((is_valid_oplock_break(smb_buffer, server) == FALSE) 629 } else if ((is_valid_oplock_break(smb_buffer, server) == FALSE)
634 && (isMultiRsp == FALSE)) { 630 && (isMultiRsp == FALSE)) {
635 cERROR(1, ("No task to wake, unknown frame rcvd!")); 631 cERROR(1, ("No task to wake, unknown frame rcvd! NumMids %d", midCount.counter));
636 cifs_dump_mem("Received Data is: ",(char *)smb_buffer, 632 cifs_dump_mem("Received Data is: ",(char *)smb_buffer,
637 sizeof(struct smb_hdr)); 633 sizeof(struct smb_hdr));
634#ifdef CONFIG_CIFS_DEBUG2
635 cifs_dump_detail(smb_buffer);
636 cifs_dump_mids(server);
637#endif /* CIFS_DEBUG2 */
638
638 } 639 }
639 } /* end while !EXITING */ 640 } /* end while !EXITING */
640 641
@@ -784,7 +785,6 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
784 785
785 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ 786 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
786 vol->rw = TRUE; 787 vol->rw = TRUE;
787 vol->ntlm = TRUE;
788 /* default is always to request posix paths. */ 788 /* default is always to request posix paths. */
789 vol->posix_paths = 1; 789 vol->posix_paths = 1;
790 790
@@ -915,30 +915,35 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
915 cERROR(1,("no security value specified")); 915 cERROR(1,("no security value specified"));
916 continue; 916 continue;
917 } else if (strnicmp(value, "krb5i", 5) == 0) { 917 } else if (strnicmp(value, "krb5i", 5) == 0) {
918 vol->sign = 1; 918 vol->secFlg |= CIFSSEC_MAY_KRB5 |
919 vol->krb5 = 1; 919 CIFSSEC_MUST_SIGN;
920 } else if (strnicmp(value, "krb5p", 5) == 0) { 920 } else if (strnicmp(value, "krb5p", 5) == 0) {
921 /* vol->seal = 1; 921 /* vol->secFlg |= CIFSSEC_MUST_SEAL |
922 vol->krb5 = 1; */ 922 CIFSSEC_MAY_KRB5; */
923 cERROR(1,("Krb5 cifs privacy not supported")); 923 cERROR(1,("Krb5 cifs privacy not supported"));
924 return 1; 924 return 1;
925 } else if (strnicmp(value, "krb5", 4) == 0) { 925 } else if (strnicmp(value, "krb5", 4) == 0) {
926 vol->krb5 = 1; 926 vol->secFlg |= CIFSSEC_MAY_KRB5;
927 } else if (strnicmp(value, "ntlmv2i", 7) == 0) { 927 } else if (strnicmp(value, "ntlmv2i", 7) == 0) {
928 vol->ntlmv2 = 1; 928 vol->secFlg |= CIFSSEC_MAY_NTLMV2 |
929 vol->sign = 1; 929 CIFSSEC_MUST_SIGN;
930 } else if (strnicmp(value, "ntlmv2", 6) == 0) { 930 } else if (strnicmp(value, "ntlmv2", 6) == 0) {
931 vol->ntlmv2 = 1; 931 vol->secFlg |= CIFSSEC_MAY_NTLMV2;
932 } else if (strnicmp(value, "ntlmi", 5) == 0) { 932 } else if (strnicmp(value, "ntlmi", 5) == 0) {
933 vol->ntlm = 1; 933 vol->secFlg |= CIFSSEC_MAY_NTLM |
934 vol->sign = 1; 934 CIFSSEC_MUST_SIGN;
935 } else if (strnicmp(value, "ntlm", 4) == 0) { 935 } else if (strnicmp(value, "ntlm", 4) == 0) {
936 /* ntlm is default so can be turned off too */ 936 /* ntlm is default so can be turned off too */
937 vol->ntlm = 1; 937 vol->secFlg |= CIFSSEC_MAY_NTLM;
938 } else if (strnicmp(value, "nontlm", 6) == 0) { 938 } else if (strnicmp(value, "nontlm", 6) == 0) {
939 vol->ntlm = 0; 939 /* BB is there a better way to do this? */
940 vol->secFlg |= CIFSSEC_MAY_NTLMV2;
941#ifdef CONFIG_CIFS_WEAK_PW_HASH
942 } else if (strnicmp(value, "lanman", 6) == 0) {
943 vol->secFlg |= CIFSSEC_MAY_LANMAN;
944#endif
940 } else if (strnicmp(value, "none", 4) == 0) { 945 } else if (strnicmp(value, "none", 4) == 0) {
941 vol->nullauth = 1; 946 vol->nullauth = 1;
942 } else { 947 } else {
943 cERROR(1,("bad security option: %s", value)); 948 cERROR(1,("bad security option: %s", value));
944 return 1; 949 return 1;
@@ -976,7 +981,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
976 } 981 }
977 /* BB are there cases in which a comma can be valid in 982 /* BB are there cases in which a comma can be valid in
978 a domain name and need special handling? */ 983 a domain name and need special handling? */
979 if (strnlen(value, 65) < 65) { 984 if (strnlen(value, 256) < 256) {
980 vol->domainname = value; 985 vol->domainname = value;
981 cFYI(1, ("Domain name set")); 986 cFYI(1, ("Domain name set"));
982 } else { 987 } else {
@@ -1168,6 +1173,10 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
1168 vol->no_psx_acl = 0; 1173 vol->no_psx_acl = 0;
1169 } else if (strnicmp(data, "noacl",5) == 0) { 1174 } else if (strnicmp(data, "noacl",5) == 0) {
1170 vol->no_psx_acl = 1; 1175 vol->no_psx_acl = 1;
1176 } else if (strnicmp(data, "sign",4) == 0) {
1177 vol->secFlg |= CIFSSEC_MUST_SIGN;
1178/* } else if (strnicmp(data, "seal",4) == 0) {
1179 vol->secFlg |= CIFSSEC_MUST_SEAL; */
1171 } else if (strnicmp(data, "direct",6) == 0) { 1180 } else if (strnicmp(data, "direct",6) == 0) {
1172 vol->direct_io = 1; 1181 vol->direct_io = 1;
1173 } else if (strnicmp(data, "forcedirectio",13) == 0) { 1182 } else if (strnicmp(data, "forcedirectio",13) == 0) {
@@ -1762,11 +1771,18 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1762 if (volume_info.username) 1771 if (volume_info.username)
1763 strncpy(pSesInfo->userName, 1772 strncpy(pSesInfo->userName,
1764 volume_info.username,MAX_USERNAME_SIZE); 1773 volume_info.username,MAX_USERNAME_SIZE);
1765 if (volume_info.domainname) 1774 if (volume_info.domainname) {
1766 strncpy(pSesInfo->domainName, 1775 int len = strlen(volume_info.domainname);
1767 volume_info.domainname,MAX_USERNAME_SIZE); 1776 pSesInfo->domainName =
1777 kmalloc(len + 1, GFP_KERNEL);
1778 if(pSesInfo->domainName)
1779 strcpy(pSesInfo->domainName,
1780 volume_info.domainname);
1781 }
1768 pSesInfo->linux_uid = volume_info.linux_uid; 1782 pSesInfo->linux_uid = volume_info.linux_uid;
1783 pSesInfo->overrideSecFlg = volume_info.secFlg;
1769 down(&pSesInfo->sesSem); 1784 down(&pSesInfo->sesSem);
1785 /* BB FIXME need to pass vol->secFlgs BB */
1770 rc = cifs_setup_session(xid,pSesInfo, cifs_sb->local_nls); 1786 rc = cifs_setup_session(xid,pSesInfo, cifs_sb->local_nls);
1771 up(&pSesInfo->sesSem); 1787 up(&pSesInfo->sesSem);
1772 if(!rc) 1788 if(!rc)
@@ -1980,7 +1996,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1980 1996
1981static int 1997static int
1982CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, 1998CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
1983 char session_key[CIFS_SESSION_KEY_SIZE], 1999 char session_key[CIFS_SESS_KEY_SIZE],
1984 const struct nls_table *nls_codepage) 2000 const struct nls_table *nls_codepage)
1985{ 2001{
1986 struct smb_hdr *smb_buffer; 2002 struct smb_hdr *smb_buffer;
@@ -2038,15 +2054,15 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2038 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); 2054 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
2039 2055
2040 pSMB->req_no_secext.CaseInsensitivePasswordLength = 2056 pSMB->req_no_secext.CaseInsensitivePasswordLength =
2041 cpu_to_le16(CIFS_SESSION_KEY_SIZE); 2057 cpu_to_le16(CIFS_SESS_KEY_SIZE);
2042 2058
2043 pSMB->req_no_secext.CaseSensitivePasswordLength = 2059 pSMB->req_no_secext.CaseSensitivePasswordLength =
2044 cpu_to_le16(CIFS_SESSION_KEY_SIZE); 2060 cpu_to_le16(CIFS_SESS_KEY_SIZE);
2045 bcc_ptr = pByteArea(smb_buffer); 2061 bcc_ptr = pByteArea(smb_buffer);
2046 memcpy(bcc_ptr, (char *) session_key, CIFS_SESSION_KEY_SIZE); 2062 memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE);
2047 bcc_ptr += CIFS_SESSION_KEY_SIZE; 2063 bcc_ptr += CIFS_SESS_KEY_SIZE;
2048 memcpy(bcc_ptr, (char *) session_key, CIFS_SESSION_KEY_SIZE); 2064 memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE);
2049 bcc_ptr += CIFS_SESSION_KEY_SIZE; 2065 bcc_ptr += CIFS_SESS_KEY_SIZE;
2050 2066
2051 if (ses->capabilities & CAP_UNICODE) { 2067 if (ses->capabilities & CAP_UNICODE) {
2052 if ((long) bcc_ptr % 2) { /* must be word aligned for Unicode */ 2068 if ((long) bcc_ptr % 2) { /* must be word aligned for Unicode */
@@ -2054,7 +2070,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2054 bcc_ptr++; 2070 bcc_ptr++;
2055 } 2071 }
2056 if(user == NULL) 2072 if(user == NULL)
2057 bytes_returned = 0; /* skill null user */ 2073 bytes_returned = 0; /* skip null user */
2058 else 2074 else
2059 bytes_returned = 2075 bytes_returned =
2060 cifs_strtoUCS((__le16 *) bcc_ptr, user, 100, 2076 cifs_strtoUCS((__le16 *) bcc_ptr, user, 100,
@@ -2162,8 +2178,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2162 if (remaining_words > 0) { 2178 if (remaining_words > 0) {
2163 len = UniStrnlen((wchar_t *)bcc_ptr, 2179 len = UniStrnlen((wchar_t *)bcc_ptr,
2164 remaining_words-1); 2180 remaining_words-1);
2165 if(ses->serverNOS) 2181 kfree(ses->serverNOS);
2166 kfree(ses->serverNOS);
2167 ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL); 2182 ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL);
2168 if(ses->serverNOS == NULL) 2183 if(ses->serverNOS == NULL)
2169 goto sesssetup_nomem; 2184 goto sesssetup_nomem;
@@ -2203,12 +2218,10 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2203 /* if these kcallocs fail not much we 2218 /* if these kcallocs fail not much we
2204 can do, but better to not fail the 2219 can do, but better to not fail the
2205 sesssetup itself */ 2220 sesssetup itself */
2206 if(ses->serverDomain) 2221 kfree(ses->serverDomain);
2207 kfree(ses->serverDomain);
2208 ses->serverDomain = 2222 ses->serverDomain =
2209 kzalloc(2, GFP_KERNEL); 2223 kzalloc(2, GFP_KERNEL);
2210 if(ses->serverNOS) 2224 kfree(ses->serverNOS);
2211 kfree(ses->serverNOS);
2212 ses->serverNOS = 2225 ses->serverNOS =
2213 kzalloc(2, GFP_KERNEL); 2226 kzalloc(2, GFP_KERNEL);
2214 } 2227 }
@@ -2217,8 +2230,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2217 if (((long) bcc_ptr + len) - (long) 2230 if (((long) bcc_ptr + len) - (long)
2218 pByteArea(smb_buffer_response) 2231 pByteArea(smb_buffer_response)
2219 <= BCC(smb_buffer_response)) { 2232 <= BCC(smb_buffer_response)) {
2220 if(ses->serverOS) 2233 kfree(ses->serverOS);
2221 kfree(ses->serverOS);
2222 ses->serverOS = kzalloc(len + 1,GFP_KERNEL); 2234 ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
2223 if(ses->serverOS == NULL) 2235 if(ses->serverOS == NULL)
2224 goto sesssetup_nomem; 2236 goto sesssetup_nomem;
@@ -2229,8 +2241,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2229 bcc_ptr++; 2241 bcc_ptr++;
2230 2242
2231 len = strnlen(bcc_ptr, 1024); 2243 len = strnlen(bcc_ptr, 1024);
2232 if(ses->serverNOS) 2244 kfree(ses->serverNOS);
2233 kfree(ses->serverNOS);
2234 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL); 2245 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
2235 if(ses->serverNOS == NULL) 2246 if(ses->serverNOS == NULL)
2236 goto sesssetup_nomem; 2247 goto sesssetup_nomem;
@@ -2274,292 +2285,6 @@ sesssetup_nomem: /* do not return an error on nomem for the info strings,
2274} 2285}
2275 2286
2276static int 2287static int
2277CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2278 char *SecurityBlob,int SecurityBlobLength,
2279 const struct nls_table *nls_codepage)
2280{
2281 struct smb_hdr *smb_buffer;
2282 struct smb_hdr *smb_buffer_response;
2283 SESSION_SETUP_ANDX *pSMB;
2284 SESSION_SETUP_ANDX *pSMBr;
2285 char *bcc_ptr;
2286 char *user;
2287 char *domain;
2288 int rc = 0;
2289 int remaining_words = 0;
2290 int bytes_returned = 0;
2291 int len;
2292 __u32 capabilities;
2293 __u16 count;
2294
2295 cFYI(1, ("In spnego sesssetup "));
2296 if(ses == NULL)
2297 return -EINVAL;
2298 user = ses->userName;
2299 domain = ses->domainName;
2300
2301 smb_buffer = cifs_buf_get();
2302 if (smb_buffer == NULL) {
2303 return -ENOMEM;
2304 }
2305 smb_buffer_response = smb_buffer;
2306 pSMBr = pSMB = (SESSION_SETUP_ANDX *) smb_buffer;
2307
2308 /* send SMBsessionSetup here */
2309 header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
2310 NULL /* no tCon exists yet */ , 12 /* wct */ );
2311
2312 smb_buffer->Mid = GetNextMid(ses->server);
2313 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
2314 pSMB->req.AndXCommand = 0xFF;
2315 if(ses->server->maxBuf > 64*1024)
2316 ses->server->maxBuf = (64*1023);
2317 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
2318 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
2319
2320 if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
2321 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
2322
2323 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
2324 CAP_EXTENDED_SECURITY;
2325 if (ses->capabilities & CAP_UNICODE) {
2326 smb_buffer->Flags2 |= SMBFLG2_UNICODE;
2327 capabilities |= CAP_UNICODE;
2328 }
2329 if (ses->capabilities & CAP_STATUS32) {
2330 smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS;
2331 capabilities |= CAP_STATUS32;
2332 }
2333 if (ses->capabilities & CAP_DFS) {
2334 smb_buffer->Flags2 |= SMBFLG2_DFS;
2335 capabilities |= CAP_DFS;
2336 }
2337 pSMB->req.Capabilities = cpu_to_le32(capabilities);
2338
2339 pSMB->req.SecurityBlobLength = cpu_to_le16(SecurityBlobLength);
2340 bcc_ptr = pByteArea(smb_buffer);
2341 memcpy(bcc_ptr, SecurityBlob, SecurityBlobLength);
2342 bcc_ptr += SecurityBlobLength;
2343
2344 if (ses->capabilities & CAP_UNICODE) {
2345 if ((long) bcc_ptr % 2) { /* must be word aligned for Unicode strings */
2346 *bcc_ptr = 0;
2347 bcc_ptr++;
2348 }
2349 bytes_returned =
2350 cifs_strtoUCS((__le16 *) bcc_ptr, user, 100, nls_codepage);
2351 bcc_ptr += 2 * bytes_returned; /* convert num of 16 bit words to bytes */
2352 bcc_ptr += 2; /* trailing null */
2353 if (domain == NULL)
2354 bytes_returned =
2355 cifs_strtoUCS((__le16 *) bcc_ptr,
2356 "CIFS_LINUX_DOM", 32, nls_codepage);
2357 else
2358 bytes_returned =
2359 cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
2360 nls_codepage);
2361 bcc_ptr += 2 * bytes_returned;
2362 bcc_ptr += 2;
2363 bytes_returned =
2364 cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ",
2365 32, nls_codepage);
2366 bcc_ptr += 2 * bytes_returned;
2367 bytes_returned =
2368 cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32,
2369 nls_codepage);
2370 bcc_ptr += 2 * bytes_returned;
2371 bcc_ptr += 2;
2372 bytes_returned =
2373 cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
2374 64, nls_codepage);
2375 bcc_ptr += 2 * bytes_returned;
2376 bcc_ptr += 2;
2377 } else {
2378 strncpy(bcc_ptr, user, 200);
2379 bcc_ptr += strnlen(user, 200);
2380 *bcc_ptr = 0;
2381 bcc_ptr++;
2382 if (domain == NULL) {
2383 strcpy(bcc_ptr, "CIFS_LINUX_DOM");
2384 bcc_ptr += strlen("CIFS_LINUX_DOM") + 1;
2385 } else {
2386 strncpy(bcc_ptr, domain, 64);
2387 bcc_ptr += strnlen(domain, 64);
2388 *bcc_ptr = 0;
2389 bcc_ptr++;
2390 }
2391 strcpy(bcc_ptr, "Linux version ");
2392 bcc_ptr += strlen("Linux version ");
2393 strcpy(bcc_ptr, system_utsname.release);
2394 bcc_ptr += strlen(system_utsname.release) + 1;
2395 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
2396 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
2397 }
2398 count = (long) bcc_ptr - (long) pByteArea(smb_buffer);
2399 smb_buffer->smb_buf_length += count;
2400 pSMB->req.ByteCount = cpu_to_le16(count);
2401
2402 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
2403 &bytes_returned, 1);
2404 if (rc) {
2405/* rc = map_smb_to_linux_error(smb_buffer_response); *//* done in SendReceive now */
2406 } else if ((smb_buffer_response->WordCount == 3)
2407 || (smb_buffer_response->WordCount == 4)) {
2408 __u16 action = le16_to_cpu(pSMBr->resp.Action);
2409 __u16 blob_len =
2410 le16_to_cpu(pSMBr->resp.SecurityBlobLength);
2411 if (action & GUEST_LOGIN)
2412 cFYI(1, (" Guest login")); /* BB do we want to set anything in SesInfo struct ? */
2413 if (ses) {
2414 ses->Suid = smb_buffer_response->Uid; /* UID left in wire format (le) */
2415 cFYI(1, ("UID = %d ", ses->Suid));
2416 bcc_ptr = pByteArea(smb_buffer_response); /* response can have either 3 or 4 word count - Samba sends 3 */
2417
2418 /* BB Fix below to make endian neutral !! */
2419
2420 if ((pSMBr->resp.hdr.WordCount == 3)
2421 || ((pSMBr->resp.hdr.WordCount == 4)
2422 && (blob_len <
2423 pSMBr->resp.ByteCount))) {
2424 if (pSMBr->resp.hdr.WordCount == 4) {
2425 bcc_ptr +=
2426 blob_len;
2427 cFYI(1,
2428 ("Security Blob Length %d ",
2429 blob_len));
2430 }
2431
2432 if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
2433 if ((long) (bcc_ptr) % 2) {
2434 remaining_words =
2435 (BCC(smb_buffer_response)
2436 - 1) / 2;
2437 bcc_ptr++; /* Unicode strings must be word aligned */
2438 } else {
2439 remaining_words =
2440 BCC
2441 (smb_buffer_response) / 2;
2442 }
2443 len =
2444 UniStrnlen((wchar_t *) bcc_ptr,
2445 remaining_words - 1);
2446/* We look for obvious messed up bcc or strings in response so we do not go off
2447 the end since (at least) WIN2K and Windows XP have a major bug in not null
2448 terminating last Unicode string in response */
2449 if(ses->serverOS)
2450 kfree(ses->serverOS);
2451 ses->serverOS =
2452 kzalloc(2 * (len + 1), GFP_KERNEL);
2453 cifs_strfromUCS_le(ses->serverOS,
2454 (__le16 *)
2455 bcc_ptr, len,
2456 nls_codepage);
2457 bcc_ptr += 2 * (len + 1);
2458 remaining_words -= len + 1;
2459 ses->serverOS[2 * len] = 0;
2460 ses->serverOS[1 + (2 * len)] = 0;
2461 if (remaining_words > 0) {
2462 len = UniStrnlen((wchar_t *)bcc_ptr,
2463 remaining_words
2464 - 1);
2465 if(ses->serverNOS)
2466 kfree(ses->serverNOS);
2467 ses->serverNOS =
2468 kzalloc(2 * (len + 1),
2469 GFP_KERNEL);
2470 cifs_strfromUCS_le(ses->serverNOS,
2471 (__le16 *)bcc_ptr,
2472 len,
2473 nls_codepage);
2474 bcc_ptr += 2 * (len + 1);
2475 ses->serverNOS[2 * len] = 0;
2476 ses->serverNOS[1 + (2 * len)] = 0;
2477 remaining_words -= len + 1;
2478 if (remaining_words > 0) {
2479 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2480 /* last string not null terminated (e.g.Windows XP/2000) */
2481 if(ses->serverDomain)
2482 kfree(ses->serverDomain);
2483 ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL);
2484 cifs_strfromUCS_le(ses->serverDomain,
2485 (__le16 *)bcc_ptr,
2486 len, nls_codepage);
2487 bcc_ptr += 2*(len+1);
2488 ses->serverDomain[2*len] = 0;
2489 ses->serverDomain[1+(2*len)] = 0;
2490 } /* else no more room so create dummy domain string */
2491 else {
2492 if(ses->serverDomain)
2493 kfree(ses->serverDomain);
2494 ses->serverDomain =
2495 kzalloc(2,GFP_KERNEL);
2496 }
2497 } else {/* no room use dummy domain&NOS */
2498 if(ses->serverDomain)
2499 kfree(ses->serverDomain);
2500 ses->serverDomain = kzalloc(2, GFP_KERNEL);
2501 if(ses->serverNOS)
2502 kfree(ses->serverNOS);
2503 ses->serverNOS = kzalloc(2, GFP_KERNEL);
2504 }
2505 } else { /* ASCII */
2506
2507 len = strnlen(bcc_ptr, 1024);
2508 if (((long) bcc_ptr + len) - (long)
2509 pByteArea(smb_buffer_response)
2510 <= BCC(smb_buffer_response)) {
2511 if(ses->serverOS)
2512 kfree(ses->serverOS);
2513 ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
2514 strncpy(ses->serverOS, bcc_ptr, len);
2515
2516 bcc_ptr += len;
2517 bcc_ptr[0] = 0; /* null terminate the string */
2518 bcc_ptr++;
2519
2520 len = strnlen(bcc_ptr, 1024);
2521 if(ses->serverNOS)
2522 kfree(ses->serverNOS);
2523 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
2524 strncpy(ses->serverNOS, bcc_ptr, len);
2525 bcc_ptr += len;
2526 bcc_ptr[0] = 0;
2527 bcc_ptr++;
2528
2529 len = strnlen(bcc_ptr, 1024);
2530 if(ses->serverDomain)
2531 kfree(ses->serverDomain);
2532 ses->serverDomain = kzalloc(len + 1, GFP_KERNEL);
2533 strncpy(ses->serverDomain, bcc_ptr, len);
2534 bcc_ptr += len;
2535 bcc_ptr[0] = 0;
2536 bcc_ptr++;
2537 } else
2538 cFYI(1,
2539 ("Variable field of length %d extends beyond end of smb ",
2540 len));
2541 }
2542 } else {
2543 cERROR(1,
2544 (" Security Blob Length extends beyond end of SMB"));
2545 }
2546 } else {
2547 cERROR(1, ("No session structure passed in."));
2548 }
2549 } else {
2550 cERROR(1,
2551 (" Invalid Word count %d: ",
2552 smb_buffer_response->WordCount));
2553 rc = -EIO;
2554 }
2555
2556 if (smb_buffer)
2557 cifs_buf_release(smb_buffer);
2558
2559 return rc;
2560}
2561
2562static int
2563CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, 2288CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2564 struct cifsSesInfo *ses, int * pNTLMv2_flag, 2289 struct cifsSesInfo *ses, int * pNTLMv2_flag,
2565 const struct nls_table *nls_codepage) 2290 const struct nls_table *nls_codepage)
@@ -2635,8 +2360,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2635 /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128; 2360 /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128;
2636 if(sign_CIFS_PDUs) 2361 if(sign_CIFS_PDUs)
2637 negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN; 2362 negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN;
2638 if(ntlmv2_support) 2363/* if(ntlmv2_support)
2639 negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2; 2364 negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2;*/
2640 /* setup pointers to domain name and workstation name */ 2365 /* setup pointers to domain name and workstation name */
2641 bcc_ptr += SecurityBlobLength; 2366 bcc_ptr += SecurityBlobLength;
2642 2367
@@ -2783,8 +2508,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2783 bcc_ptr, 2508 bcc_ptr,
2784 remaining_words 2509 remaining_words
2785 - 1); 2510 - 1);
2786 if(ses->serverNOS) 2511 kfree(ses->serverNOS);
2787 kfree(ses->serverNOS);
2788 ses->serverNOS = 2512 ses->serverNOS =
2789 kzalloc(2 * (len + 1), 2513 kzalloc(2 * (len + 1),
2790 GFP_KERNEL); 2514 GFP_KERNEL);
@@ -2802,8 +2526,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2802 if (remaining_words > 0) { 2526 if (remaining_words > 0) {
2803 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 2527 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2804 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ 2528 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
2805 if(ses->serverDomain) 2529 kfree(ses->serverDomain);
2806 kfree(ses->serverDomain);
2807 ses->serverDomain = 2530 ses->serverDomain =
2808 kzalloc(2 * 2531 kzalloc(2 *
2809 (len + 2532 (len +
@@ -2822,19 +2545,16 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2822 = 0; 2545 = 0;
2823 } /* else no more room so create dummy domain string */ 2546 } /* else no more room so create dummy domain string */
2824 else { 2547 else {
2825 if(ses->serverDomain) 2548 kfree(ses->serverDomain);
2826 kfree(ses->serverDomain);
2827 ses->serverDomain = 2549 ses->serverDomain =
2828 kzalloc(2, 2550 kzalloc(2,
2829 GFP_KERNEL); 2551 GFP_KERNEL);
2830 } 2552 }
2831 } else { /* no room so create dummy domain and NOS string */ 2553 } else { /* no room so create dummy domain and NOS string */
2832 if(ses->serverDomain); 2554 kfree(ses->serverDomain);
2833 kfree(ses->serverDomain);
2834 ses->serverDomain = 2555 ses->serverDomain =
2835 kzalloc(2, GFP_KERNEL); 2556 kzalloc(2, GFP_KERNEL);
2836 if(ses->serverNOS) 2557 kfree(ses->serverNOS);
2837 kfree(ses->serverNOS);
2838 ses->serverNOS = 2558 ses->serverNOS =
2839 kzalloc(2, GFP_KERNEL); 2559 kzalloc(2, GFP_KERNEL);
2840 } 2560 }
@@ -2856,8 +2576,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2856 bcc_ptr++; 2576 bcc_ptr++;
2857 2577
2858 len = strnlen(bcc_ptr, 1024); 2578 len = strnlen(bcc_ptr, 1024);
2859 if(ses->serverNOS) 2579 kfree(ses->serverNOS);
2860 kfree(ses->serverNOS);
2861 ses->serverNOS = 2580 ses->serverNOS =
2862 kzalloc(len + 1, 2581 kzalloc(len + 1,
2863 GFP_KERNEL); 2582 GFP_KERNEL);
@@ -2867,8 +2586,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2867 bcc_ptr++; 2586 bcc_ptr++;
2868 2587
2869 len = strnlen(bcc_ptr, 1024); 2588 len = strnlen(bcc_ptr, 1024);
2870 if(ses->serverDomain) 2589 kfree(ses->serverDomain);
2871 kfree(ses->serverDomain);
2872 ses->serverDomain = 2590 ses->serverDomain =
2873 kzalloc(len + 1, 2591 kzalloc(len + 1,
2874 GFP_KERNEL); 2592 GFP_KERNEL);
@@ -2994,14 +2712,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2994 SecurityBlob->LmChallengeResponse.Buffer = 0; 2712 SecurityBlob->LmChallengeResponse.Buffer = 0;
2995 2713
2996 SecurityBlob->NtChallengeResponse.Length = 2714 SecurityBlob->NtChallengeResponse.Length =
2997 cpu_to_le16(CIFS_SESSION_KEY_SIZE); 2715 cpu_to_le16(CIFS_SESS_KEY_SIZE);
2998 SecurityBlob->NtChallengeResponse.MaximumLength = 2716 SecurityBlob->NtChallengeResponse.MaximumLength =
2999 cpu_to_le16(CIFS_SESSION_KEY_SIZE); 2717 cpu_to_le16(CIFS_SESS_KEY_SIZE);
3000 memcpy(bcc_ptr, ntlm_session_key, CIFS_SESSION_KEY_SIZE); 2718 memcpy(bcc_ptr, ntlm_session_key, CIFS_SESS_KEY_SIZE);
3001 SecurityBlob->NtChallengeResponse.Buffer = 2719 SecurityBlob->NtChallengeResponse.Buffer =
3002 cpu_to_le32(SecurityBlobLength); 2720 cpu_to_le32(SecurityBlobLength);
3003 SecurityBlobLength += CIFS_SESSION_KEY_SIZE; 2721 SecurityBlobLength += CIFS_SESS_KEY_SIZE;
3004 bcc_ptr += CIFS_SESSION_KEY_SIZE; 2722 bcc_ptr += CIFS_SESS_KEY_SIZE;
3005 2723
3006 if (ses->capabilities & CAP_UNICODE) { 2724 if (ses->capabilities & CAP_UNICODE) {
3007 if (domain == NULL) { 2725 if (domain == NULL) {
@@ -3190,8 +2908,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3190 bcc_ptr, 2908 bcc_ptr,
3191 remaining_words 2909 remaining_words
3192 - 1); 2910 - 1);
3193 if(ses->serverNOS) 2911 kfree(ses->serverNOS);
3194 kfree(ses->serverNOS);
3195 ses->serverNOS = 2912 ses->serverNOS =
3196 kzalloc(2 * (len + 1), 2913 kzalloc(2 * (len + 1),
3197 GFP_KERNEL); 2914 GFP_KERNEL);
@@ -3244,8 +2961,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3244 if(ses->serverDomain) 2961 if(ses->serverDomain)
3245 kfree(ses->serverDomain); 2962 kfree(ses->serverDomain);
3246 ses->serverDomain = kzalloc(2, GFP_KERNEL); 2963 ses->serverDomain = kzalloc(2, GFP_KERNEL);
3247 if(ses->serverNOS) 2964 kfree(ses->serverNOS);
3248 kfree(ses->serverNOS);
3249 ses->serverNOS = kzalloc(2, GFP_KERNEL); 2965 ses->serverNOS = kzalloc(2, GFP_KERNEL);
3250 } 2966 }
3251 } else { /* ASCII */ 2967 } else { /* ASCII */
@@ -3263,8 +2979,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3263 bcc_ptr++; 2979 bcc_ptr++;
3264 2980
3265 len = strnlen(bcc_ptr, 1024); 2981 len = strnlen(bcc_ptr, 1024);
3266 if(ses->serverNOS) 2982 kfree(ses->serverNOS);
3267 kfree(ses->serverNOS);
3268 ses->serverNOS = kzalloc(len+1,GFP_KERNEL); 2983 ses->serverNOS = kzalloc(len+1,GFP_KERNEL);
3269 strncpy(ses->serverNOS, bcc_ptr, len); 2984 strncpy(ses->serverNOS, bcc_ptr, len);
3270 bcc_ptr += len; 2985 bcc_ptr += len;
@@ -3340,22 +3055,33 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3340 bcc_ptr = &pSMB->Password[0]; 3055 bcc_ptr = &pSMB->Password[0];
3341 if((ses->server->secMode) & SECMODE_USER) { 3056 if((ses->server->secMode) & SECMODE_USER) {
3342 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ 3057 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
3058 *bcc_ptr = 0; /* password is null byte */
3343 bcc_ptr++; /* skip password */ 3059 bcc_ptr++; /* skip password */
3060 /* already aligned so no need to do it below */
3344 } else { 3061 } else {
3345 pSMB->PasswordLength = cpu_to_le16(CIFS_SESSION_KEY_SIZE); 3062 pSMB->PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
3346 /* BB FIXME add code to fail this if NTLMv2 or Kerberos 3063 /* BB FIXME add code to fail this if NTLMv2 or Kerberos
3347 specified as required (when that support is added to 3064 specified as required (when that support is added to
3348 the vfs in the future) as only NTLM or the much 3065 the vfs in the future) as only NTLM or the much
3349 weaker LANMAN (which we do not send) is accepted 3066 weaker LANMAN (which we do not send by default) is accepted
3350 by Samba (not sure whether other servers allow 3067 by Samba (not sure whether other servers allow
3351 NTLMv2 password here) */ 3068 NTLMv2 password here) */
3069#ifdef CONFIG_CIFS_WEAK_PW_HASH
3070 if((extended_security & CIFSSEC_MAY_LANMAN) &&
3071 (ses->server->secType == LANMAN))
3072 calc_lanman_hash(ses, bcc_ptr);
3073 else
3074#endif /* CIFS_WEAK_PW_HASH */
3352 SMBNTencrypt(ses->password, 3075 SMBNTencrypt(ses->password,
3353 ses->server->cryptKey, 3076 ses->server->cryptKey,
3354 bcc_ptr); 3077 bcc_ptr);
3355 3078
3356 bcc_ptr += CIFS_SESSION_KEY_SIZE; 3079 bcc_ptr += CIFS_SESS_KEY_SIZE;
3357 *bcc_ptr = 0; 3080 if(ses->capabilities & CAP_UNICODE) {
3358 bcc_ptr++; /* align */ 3081 /* must align unicode strings */
3082 *bcc_ptr = 0; /* null byte password */
3083 bcc_ptr++;
3084 }
3359 } 3085 }
3360 3086
3361 if(ses->server->secMode & 3087 if(ses->server->secMode &
@@ -3429,7 +3155,10 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3429 } 3155 }
3430 /* else do not bother copying these informational fields */ 3156 /* else do not bother copying these informational fields */
3431 } 3157 }
3432 tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport); 3158 if(smb_buffer_response->WordCount == 3)
3159 tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport);
3160 else
3161 tcon->Flags = 0;
3433 cFYI(1, ("Tcon flags: 0x%x ", tcon->Flags)); 3162 cFYI(1, ("Tcon flags: 0x%x ", tcon->Flags));
3434 } else if ((rc == 0) && tcon == NULL) { 3163 } else if ((rc == 0) && tcon == NULL) {
3435 /* all we need to save for IPC$ connection */ 3164 /* all we need to save for IPC$ connection */
@@ -3494,7 +3223,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3494 struct nls_table * nls_info) 3223 struct nls_table * nls_info)
3495{ 3224{
3496 int rc = 0; 3225 int rc = 0;
3497 char ntlm_session_key[CIFS_SESSION_KEY_SIZE]; 3226 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
3498 int ntlmv2_flag = FALSE; 3227 int ntlmv2_flag = FALSE;
3499 int first_time = 0; 3228 int first_time = 0;
3500 3229
@@ -3526,20 +3255,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3526 pSesInfo->server->secMode, 3255 pSesInfo->server->secMode,
3527 pSesInfo->server->capabilities, 3256 pSesInfo->server->capabilities,
3528 pSesInfo->server->timeZone)); 3257 pSesInfo->server->timeZone));
3529#ifdef CONFIG_CIFS_EXPERIMENTAL 3258 if(experimEnabled < 2)
3530 if(experimEnabled > 1) 3259 rc = CIFS_SessSetup(xid, pSesInfo,
3531 rc = CIFS_SessSetup(xid, pSesInfo, CIFS_NTLM /* type */, 3260 first_time, nls_info);
3532 &ntlmv2_flag, nls_info); 3261 else if (extended_security
3533 else
3534#endif
3535 if (extended_security
3536 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3262 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3537 && (pSesInfo->server->secType == NTLMSSP)) { 3263 && (pSesInfo->server->secType == NTLMSSP)) {
3538 cFYI(1, ("New style sesssetup")); 3264 rc = -EOPNOTSUPP;
3539 rc = CIFSSpnegoSessSetup(xid, pSesInfo,
3540 NULL /* security blob */,
3541 0 /* blob length */,
3542 nls_info);
3543 } else if (extended_security 3265 } else if (extended_security
3544 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3266 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3545 && (pSesInfo->server->secType == RawNTLMSSP)) { 3267 && (pSesInfo->server->secType == RawNTLMSSP)) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 82315edc77d7..ba4cbe9b0684 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -113,7 +113,7 @@ cifs_bp_rename_retry:
113 full_path[namelen+2] = 0; 113 full_path[namelen+2] = 0;
114BB remove above eight lines BB */ 114BB remove above eight lines BB */
115 115
116/* Inode operations in similar order to how they appear in the Linux file fs.h */ 116/* Inode operations in similar order to how they appear in Linux file fs.h */
117 117
118int 118int
119cifs_create(struct inode *inode, struct dentry *direntry, int mode, 119cifs_create(struct inode *inode, struct dentry *direntry, int mode,
@@ -178,11 +178,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
178 FreeXid(xid); 178 FreeXid(xid);
179 return -ENOMEM; 179 return -ENOMEM;
180 } 180 }
181 181 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
182 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, 182 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
183 desiredAccess, CREATE_NOT_DIR, 183 desiredAccess, CREATE_NOT_DIR,
184 &fileHandle, &oplock, buf, cifs_sb->local_nls, 184 &fileHandle, &oplock, buf, cifs_sb->local_nls,
185 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 185 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
186 else
187 rc = -EIO; /* no NT SMB support fall into legacy open below */
188
186 if(rc == -EIO) { 189 if(rc == -EIO) {
187 /* old server, retry the open legacy style */ 190 /* old server, retry the open legacy style */
188 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, 191 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
@@ -191,7 +194,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
191 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 194 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
192 } 195 }
193 if (rc) { 196 if (rc) {
194 cFYI(1, ("cifs_create returned 0x%x ", rc)); 197 cFYI(1, ("cifs_create returned 0x%x", rc));
195 } else { 198 } else {
196 /* If Open reported that we actually created a file 199 /* If Open reported that we actually created a file
197 then we now have to set the mode if possible */ 200 then we now have to set the mode if possible */
@@ -369,6 +372,10 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
369 cifs_sb->mnt_cifs_flags & 372 cifs_sb->mnt_cifs_flags &
370 CIFS_MOUNT_MAP_SPECIAL_CHR); 373 CIFS_MOUNT_MAP_SPECIAL_CHR);
371 374
375 /* BB FIXME - add handling for backlevel servers
376 which need legacy open and check for all
377 calls to SMBOpen for fallback to
378 SMBLeagcyOpen */
372 if(!rc) { 379 if(!rc) {
373 /* BB Do not bother to decode buf since no 380 /* BB Do not bother to decode buf since no
374 local inode yet to put timestamps in, 381 local inode yet to put timestamps in,
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index 633a93811328..d91a3d44e9e3 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -91,14 +91,14 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
91 if(full_path == NULL) { 91 if(full_path == NULL) {
92 rc = -ENOMEM; 92 rc = -ENOMEM;
93 } else { 93 } else {
94 cERROR(1,("cifs dir notify on file %s with arg 0x%lx",full_path,arg)); /* BB removeme BB */ 94 cFYI(1,("dir notify on file %s Arg 0x%lx",full_path,arg));
95 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, 95 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
96 GENERIC_READ | SYNCHRONIZE, 0 /* create options */, 96 GENERIC_READ | SYNCHRONIZE, 0 /* create options */,
97 &netfid, &oplock,NULL, cifs_sb->local_nls, 97 &netfid, &oplock,NULL, cifs_sb->local_nls,
98 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 98 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
99 /* BB fixme - add this handle to a notify handle list */ 99 /* BB fixme - add this handle to a notify handle list */
100 if(rc) { 100 if(rc) {
101 cERROR(1,("Could not open directory for notify")); /* BB remove BB */ 101 cFYI(1,("Could not open directory for notify"));
102 } else { 102 } else {
103 filter = convert_to_cifs_notify_flags(arg); 103 filter = convert_to_cifs_notify_flags(arg);
104 if(filter != 0) { 104 if(filter != 0) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index b4a18c1cab0a..e9c1573f6aa7 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -110,7 +110,6 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
110 &pCifsInode->openFileList); 110 &pCifsInode->openFileList);
111 } 111 }
112 write_unlock(&GlobalSMBSeslock); 112 write_unlock(&GlobalSMBSeslock);
113 write_unlock(&file->f_owner.lock);
114 if (pCifsInode->clientCanCacheRead) { 113 if (pCifsInode->clientCanCacheRead) {
115 /* we have the inode open somewhere else 114 /* we have the inode open somewhere else
116 no need to discard cache data */ 115 no need to discard cache data */
@@ -201,7 +200,7 @@ int cifs_open(struct inode *inode, struct file *file)
201 } else { 200 } else {
202 if (file->f_flags & O_EXCL) 201 if (file->f_flags & O_EXCL)
203 cERROR(1, ("could not find file instance for " 202 cERROR(1, ("could not find file instance for "
204 "new file %p ", file)); 203 "new file %p", file));
205 } 204 }
206 } 205 }
207 206
@@ -260,10 +259,15 @@ int cifs_open(struct inode *inode, struct file *file)
260 rc = -ENOMEM; 259 rc = -ENOMEM;
261 goto out; 260 goto out;
262 } 261 }
263 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess, 262
264 CREATE_NOT_DIR, &netfid, &oplock, buf, 263 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
264 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
265 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
265 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags 266 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
266 & CIFS_MOUNT_MAP_SPECIAL_CHR); 267 & CIFS_MOUNT_MAP_SPECIAL_CHR);
268 else
269 rc = -EIO; /* no NT SMB support fall into legacy open below */
270
267 if (rc == -EIO) { 271 if (rc == -EIO) {
268 /* Old server, try legacy style OpenX */ 272 /* Old server, try legacy style OpenX */
269 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, 273 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
@@ -272,7 +276,7 @@ int cifs_open(struct inode *inode, struct file *file)
272 & CIFS_MOUNT_MAP_SPECIAL_CHR); 276 & CIFS_MOUNT_MAP_SPECIAL_CHR);
273 } 277 }
274 if (rc) { 278 if (rc) {
275 cFYI(1, ("cifs_open returned 0x%x ", rc)); 279 cFYI(1, ("cifs_open returned 0x%x", rc));
276 goto out; 280 goto out;
277 } 281 }
278 file->private_data = 282 file->private_data =
@@ -282,7 +286,6 @@ int cifs_open(struct inode *inode, struct file *file)
282 goto out; 286 goto out;
283 } 287 }
284 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid); 288 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
285 write_lock(&file->f_owner.lock);
286 write_lock(&GlobalSMBSeslock); 289 write_lock(&GlobalSMBSeslock);
287 list_add(&pCifsFile->tlist, &pTcon->openFileList); 290 list_add(&pCifsFile->tlist, &pTcon->openFileList);
288 291
@@ -293,7 +296,6 @@ int cifs_open(struct inode *inode, struct file *file)
293 &oplock, buf, full_path, xid); 296 &oplock, buf, full_path, xid);
294 } else { 297 } else {
295 write_unlock(&GlobalSMBSeslock); 298 write_unlock(&GlobalSMBSeslock);
296 write_unlock(&file->f_owner.lock);
297 } 299 }
298 300
299 if (oplock & CIFS_CREATE_ACTION) { 301 if (oplock & CIFS_CREATE_ACTION) {
@@ -409,8 +411,8 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
409 CIFS_MOUNT_MAP_SPECIAL_CHR); 411 CIFS_MOUNT_MAP_SPECIAL_CHR);
410 if (rc) { 412 if (rc) {
411 up(&pCifsFile->fh_sem); 413 up(&pCifsFile->fh_sem);
412 cFYI(1, ("cifs_open returned 0x%x ", rc)); 414 cFYI(1, ("cifs_open returned 0x%x", rc));
413 cFYI(1, ("oplock: %d ", oplock)); 415 cFYI(1, ("oplock: %d", oplock));
414 } else { 416 } else {
415 pCifsFile->netfid = netfid; 417 pCifsFile->netfid = netfid;
416 pCifsFile->invalidHandle = FALSE; 418 pCifsFile->invalidHandle = FALSE;
@@ -472,7 +474,6 @@ int cifs_close(struct inode *inode, struct file *file)
472 pTcon = cifs_sb->tcon; 474 pTcon = cifs_sb->tcon;
473 if (pSMBFile) { 475 if (pSMBFile) {
474 pSMBFile->closePend = TRUE; 476 pSMBFile->closePend = TRUE;
475 write_lock(&file->f_owner.lock);
476 if (pTcon) { 477 if (pTcon) {
477 /* no sense reconnecting to close a file that is 478 /* no sense reconnecting to close a file that is
478 already closed */ 479 already closed */
@@ -487,23 +488,18 @@ int cifs_close(struct inode *inode, struct file *file)
487 the struct would be in each open file, 488 the struct would be in each open file,
488 but this should give enough time to 489 but this should give enough time to
489 clear the socket */ 490 clear the socket */
490 write_unlock(&file->f_owner.lock);
491 cERROR(1,("close with pending writes")); 491 cERROR(1,("close with pending writes"));
492 msleep(timeout); 492 msleep(timeout);
493 write_lock(&file->f_owner.lock);
494 timeout *= 4; 493 timeout *= 4;
495 } 494 }
496 write_unlock(&file->f_owner.lock);
497 rc = CIFSSMBClose(xid, pTcon, 495 rc = CIFSSMBClose(xid, pTcon,
498 pSMBFile->netfid); 496 pSMBFile->netfid);
499 write_lock(&file->f_owner.lock);
500 } 497 }
501 } 498 }
502 write_lock(&GlobalSMBSeslock); 499 write_lock(&GlobalSMBSeslock);
503 list_del(&pSMBFile->flist); 500 list_del(&pSMBFile->flist);
504 list_del(&pSMBFile->tlist); 501 list_del(&pSMBFile->tlist);
505 write_unlock(&GlobalSMBSeslock); 502 write_unlock(&GlobalSMBSeslock);
506 write_unlock(&file->f_owner.lock);
507 kfree(pSMBFile->search_resume_name); 503 kfree(pSMBFile->search_resume_name);
508 kfree(file->private_data); 504 kfree(file->private_data);
509 file->private_data = NULL; 505 file->private_data = NULL;
@@ -531,7 +527,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
531 (struct cifsFileInfo *)file->private_data; 527 (struct cifsFileInfo *)file->private_data;
532 char *ptmp; 528 char *ptmp;
533 529
534 cFYI(1, ("Closedir inode = 0x%p with ", inode)); 530 cFYI(1, ("Closedir inode = 0x%p", inode));
535 531
536 xid = GetXid(); 532 xid = GetXid();
537 533
@@ -605,7 +601,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
605 } 601 }
606 if (pfLock->fl_flags & FL_ACCESS) 602 if (pfLock->fl_flags & FL_ACCESS)
607 cFYI(1, ("Process suspended by mandatory locking - " 603 cFYI(1, ("Process suspended by mandatory locking - "
608 "not implemented yet ")); 604 "not implemented yet"));
609 if (pfLock->fl_flags & FL_LEASE) 605 if (pfLock->fl_flags & FL_LEASE)
610 cFYI(1, ("Lease on file - not implemented yet")); 606 cFYI(1, ("Lease on file - not implemented yet"));
611 if (pfLock->fl_flags & 607 if (pfLock->fl_flags &
@@ -1375,7 +1371,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1375 1371
1376 xid = GetXid(); 1372 xid = GetXid();
1377 1373
1378 cFYI(1, ("Sync file - name: %s datasync: 0x%x ", 1374 cFYI(1, ("Sync file - name: %s datasync: 0x%x",
1379 dentry->d_name.name, datasync)); 1375 dentry->d_name.name, datasync));
1380 1376
1381 rc = filemap_fdatawrite(inode->i_mapping); 1377 rc = filemap_fdatawrite(inode->i_mapping);
@@ -1404,7 +1400,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1404/* fill in rpages then 1400/* fill in rpages then
1405 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */ 1401 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1406 1402
1407/* cFYI(1, ("rpages is %d for sync page of Index %ld ", rpages, index)); 1403/* cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index));
1408 1404
1409#if 0 1405#if 0
1410 if (rc < 0) 1406 if (rc < 0)
@@ -1836,7 +1832,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
1836 if (rc < 0) 1832 if (rc < 0)
1837 goto io_error; 1833 goto io_error;
1838 else 1834 else
1839 cFYI(1, ("Bytes read %d ",rc)); 1835 cFYI(1, ("Bytes read %d",rc));
1840 1836
1841 file->f_dentry->d_inode->i_atime = 1837 file->f_dentry->d_inode->i_atime =
1842 current_fs_time(file->f_dentry->d_inode->i_sb); 1838 current_fs_time(file->f_dentry->d_inode->i_sb);
@@ -1957,3 +1953,19 @@ struct address_space_operations cifs_addr_ops = {
1957 /* .sync_page = cifs_sync_page, */ 1953 /* .sync_page = cifs_sync_page, */
1958 /* .direct_IO = */ 1954 /* .direct_IO = */
1959}; 1955};
1956
1957/*
1958 * cifs_readpages requires the server to support a buffer large enough to
1959 * contain the header plus one complete page of data. Otherwise, we need
1960 * to leave cifs_readpages out of the address space operations.
1961 */
1962struct address_space_operations cifs_addr_ops_smallbuf = {
1963 .readpage = cifs_readpage,
1964 .writepage = cifs_writepage,
1965 .writepages = cifs_writepages,
1966 .prepare_write = cifs_prepare_write,
1967 .commit_write = cifs_commit_write,
1968 .set_page_dirty = __set_page_dirty_nobuffers,
1969 /* .sync_page = cifs_sync_page, */
1970 /* .direct_IO = */
1971};
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4093764ef461..b88147c1dc27 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -41,7 +41,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
41 char *tmp_path; 41 char *tmp_path;
42 42
43 pTcon = cifs_sb->tcon; 43 pTcon = cifs_sb->tcon;
44 cFYI(1, ("Getting info on %s ", search_path)); 44 cFYI(1, ("Getting info on %s", search_path));
45 /* could have done a find first instead but this returns more info */ 45 /* could have done a find first instead but this returns more info */
46 rc = CIFSSMBUnixQPathInfo(xid, pTcon, search_path, &findData, 46 rc = CIFSSMBUnixQPathInfo(xid, pTcon, search_path, &findData,
47 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 47 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
@@ -97,9 +97,9 @@ int cifs_get_inode_info_unix(struct inode **pinode,
97 inode = *pinode; 97 inode = *pinode;
98 cifsInfo = CIFS_I(inode); 98 cifsInfo = CIFS_I(inode);
99 99
100 cFYI(1, ("Old time %ld ", cifsInfo->time)); 100 cFYI(1, ("Old time %ld", cifsInfo->time));
101 cifsInfo->time = jiffies; 101 cifsInfo->time = jiffies;
102 cFYI(1, ("New time %ld ", cifsInfo->time)); 102 cFYI(1, ("New time %ld", cifsInfo->time));
103 /* this is ok to set on every inode revalidate */ 103 /* this is ok to set on every inode revalidate */
104 atomic_set(&cifsInfo->inUse,1); 104 atomic_set(&cifsInfo->inUse,1);
105 105
@@ -180,11 +180,12 @@ int cifs_get_inode_info_unix(struct inode **pinode,
180 else /* not direct, send byte range locks */ 180 else /* not direct, send byte range locks */
181 inode->i_fop = &cifs_file_ops; 181 inode->i_fop = &cifs_file_ops;
182 182
183 inode->i_data.a_ops = &cifs_addr_ops;
184 /* check if server can support readpages */ 183 /* check if server can support readpages */
185 if(pTcon->ses->server->maxBuf < 184 if(pTcon->ses->server->maxBuf <
186 4096 + MAX_CIFS_HDR_SIZE) 185 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
187 inode->i_data.a_ops->readpages = NULL; 186 inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
187 else
188 inode->i_data.a_ops = &cifs_addr_ops;
188 } else if (S_ISDIR(inode->i_mode)) { 189 } else if (S_ISDIR(inode->i_mode)) {
189 cFYI(1, ("Directory inode")); 190 cFYI(1, ("Directory inode"));
190 inode->i_op = &cifs_dir_inode_ops; 191 inode->i_op = &cifs_dir_inode_ops;
@@ -421,23 +422,23 @@ int cifs_get_inode_info(struct inode **pinode,
421 inode = *pinode; 422 inode = *pinode;
422 cifsInfo = CIFS_I(inode); 423 cifsInfo = CIFS_I(inode);
423 cifsInfo->cifsAttrs = attr; 424 cifsInfo->cifsAttrs = attr;
424 cFYI(1, ("Old time %ld ", cifsInfo->time)); 425 cFYI(1, ("Old time %ld", cifsInfo->time));
425 cifsInfo->time = jiffies; 426 cifsInfo->time = jiffies;
426 cFYI(1, ("New time %ld ", cifsInfo->time)); 427 cFYI(1, ("New time %ld", cifsInfo->time));
427 428
428 /* blksize needs to be multiple of two. So safer to default to 429 /* blksize needs to be multiple of two. So safer to default to
429 blksize and blkbits set in superblock so 2**blkbits and blksize 430 blksize and blkbits set in superblock so 2**blkbits and blksize
430 will match rather than setting to: 431 will match rather than setting to:
431 (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/ 432 (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/
432 433
433 /* Linux can not store file creation time unfortunately so we ignore it */ 434 /* Linux can not store file creation time so ignore it */
434 inode->i_atime = 435 inode->i_atime =
435 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime)); 436 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
436 inode->i_mtime = 437 inode->i_mtime =
437 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); 438 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
438 inode->i_ctime = 439 inode->i_ctime =
439 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); 440 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
440 cFYI(0, ("Attributes came in as 0x%x ", attr)); 441 cFYI(0, ("Attributes came in as 0x%x", attr));
441 442
442 /* set default mode. will override for dirs below */ 443 /* set default mode. will override for dirs below */
443 if (atomic_read(&cifsInfo->inUse) == 0) 444 if (atomic_read(&cifsInfo->inUse) == 0)
@@ -519,10 +520,11 @@ int cifs_get_inode_info(struct inode **pinode,
519 else /* not direct, send byte range locks */ 520 else /* not direct, send byte range locks */
520 inode->i_fop = &cifs_file_ops; 521 inode->i_fop = &cifs_file_ops;
521 522
522 inode->i_data.a_ops = &cifs_addr_ops;
523 if(pTcon->ses->server->maxBuf < 523 if(pTcon->ses->server->maxBuf <
524 4096 + MAX_CIFS_HDR_SIZE) 524 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
525 inode->i_data.a_ops->readpages = NULL; 525 inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
526 else
527 inode->i_data.a_ops = &cifs_addr_ops;
526 } else if (S_ISDIR(inode->i_mode)) { 528 } else if (S_ISDIR(inode->i_mode)) {
527 cFYI(1, ("Directory inode")); 529 cFYI(1, ("Directory inode"));
528 inode->i_op = &cifs_dir_inode_ops; 530 inode->i_op = &cifs_dir_inode_ops;
@@ -731,7 +733,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
731 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls, 733 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls,
732 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 734 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
733 if (rc) { 735 if (rc) {
734 cFYI(1, ("cifs_mkdir returned 0x%x ", rc)); 736 cFYI(1, ("cifs_mkdir returned 0x%x", rc));
735 d_drop(direntry); 737 d_drop(direntry);
736 } else { 738 } else {
737 inode->i_nlink++; 739 inode->i_nlink++;
@@ -798,7 +800,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
798 char *full_path = NULL; 800 char *full_path = NULL;
799 struct cifsInodeInfo *cifsInode; 801 struct cifsInodeInfo *cifsInode;
800 802
801 cFYI(1, ("cifs_rmdir, inode = 0x%p with ", inode)); 803 cFYI(1, ("cifs_rmdir, inode = 0x%p", inode));
802 804
803 xid = GetXid(); 805 xid = GetXid();
804 806
@@ -1121,7 +1123,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1121 1123
1122 xid = GetXid(); 1124 xid = GetXid();
1123 1125
1124 cFYI(1, ("In cifs_setattr, name = %s attrs->iavalid 0x%x ", 1126 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
1125 direntry->d_name.name, attrs->ia_valid)); 1127 direntry->d_name.name, attrs->ia_valid));
1126 1128
1127 cifs_sb = CIFS_SB(direntry->d_inode->i_sb); 1129 cifs_sb = CIFS_SB(direntry->d_inode->i_sb);
@@ -1157,6 +1159,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1157 when the local oplock break takes longer to flush 1159 when the local oplock break takes longer to flush
1158 writebehind data than the SMB timeout for the SetPathInfo 1160 writebehind data than the SMB timeout for the SetPathInfo
1159 request would allow */ 1161 request would allow */
1162
1160 open_file = find_writable_file(cifsInode); 1163 open_file = find_writable_file(cifsInode);
1161 if (open_file) { 1164 if (open_file) {
1162 __u16 nfid = open_file->netfid; 1165 __u16 nfid = open_file->netfid;
@@ -1289,7 +1292,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1289 it may be useful to Windows - but we do 1292 it may be useful to Windows - but we do
1290 not want to set ctime unless some other 1293 not want to set ctime unless some other
1291 timestamp is changing */ 1294 timestamp is changing */
1292 cFYI(1, ("CIFS - CTIME changed ")); 1295 cFYI(1, ("CIFS - CTIME changed"));
1293 time_buf.ChangeTime = 1296 time_buf.ChangeTime =
1294 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); 1297 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
1295 } else 1298 } else
@@ -1356,7 +1359,7 @@ cifs_setattr_exit:
1356 1359
1357void cifs_delete_inode(struct inode *inode) 1360void cifs_delete_inode(struct inode *inode)
1358{ 1361{
1359 cFYI(1, ("In cifs_delete_inode, inode = 0x%p ", inode)); 1362 cFYI(1, ("In cifs_delete_inode, inode = 0x%p", inode));
1360 /* may have to add back in if and when safe distributed caching of 1363 /* may have to add back in if and when safe distributed caching of
1361 directories added e.g. via FindNotify */ 1364 directories added e.g. via FindNotify */
1362} 1365}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 2ec99f833142..a57f5d6e6213 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -167,7 +167,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
167 return -ENOMEM; 167 return -ENOMEM;
168 } 168 }
169 169
170 cFYI(1, ("Full path: %s ", full_path)); 170 cFYI(1, ("Full path: %s", full_path));
171 cFYI(1, ("symname is %s", symname)); 171 cFYI(1, ("symname is %s", symname));
172 172
173 /* BB what if DFS and this volume is on different share? BB */ 173 /* BB what if DFS and this volume is on different share? BB */
@@ -186,8 +186,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
186 inode->i_sb,xid); 186 inode->i_sb,xid);
187 187
188 if (rc != 0) { 188 if (rc != 0) {
189 cFYI(1, 189 cFYI(1, ("Create symlink ok, getinodeinfo fail rc = %d",
190 ("Create symlink worked but get_inode_info failed with rc = %d ",
191 rc)); 190 rc));
192 } else { 191 } else {
193 if (pTcon->nocase) 192 if (pTcon->nocase)
@@ -289,7 +288,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
289 else { 288 else {
290 cFYI(1,("num referral: %d",num_referrals)); 289 cFYI(1,("num referral: %d",num_referrals));
291 if(referrals) { 290 if(referrals) {
292 cFYI(1,("referral string: %s ",referrals)); 291 cFYI(1,("referral string: %s",referrals));
293 strncpy(tmpbuffer, referrals, len-1); 292 strncpy(tmpbuffer, referrals, len-1);
294 } 293 }
295 } 294 }
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index fafd056426e4..22c937e5884f 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -101,6 +101,7 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
101 kfree(buf_to_free->serverDomain); 101 kfree(buf_to_free->serverDomain);
102 kfree(buf_to_free->serverNOS); 102 kfree(buf_to_free->serverNOS);
103 kfree(buf_to_free->password); 103 kfree(buf_to_free->password);
104 kfree(buf_to_free->domainName);
104 kfree(buf_to_free); 105 kfree(buf_to_free);
105} 106}
106 107
@@ -499,11 +500,12 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
499 if(pSMBr->ByteCount > sizeof(struct file_notify_information)) { 500 if(pSMBr->ByteCount > sizeof(struct file_notify_information)) {
500 data_offset = le32_to_cpu(pSMBr->DataOffset); 501 data_offset = le32_to_cpu(pSMBr->DataOffset);
501 502
502 pnotify = (struct file_notify_information *)((char *)&pSMBr->hdr.Protocol 503 pnotify = (struct file_notify_information *)
503 + data_offset); 504 ((char *)&pSMBr->hdr.Protocol + data_offset);
504 cFYI(1,("dnotify on %s with action: 0x%x",pnotify->FileName, 505 cFYI(1,("dnotify on %s Action: 0x%x",pnotify->FileName,
505 pnotify->Action)); /* BB removeme BB */ 506 pnotify->Action)); /* BB removeme BB */
506 /* cifs_dump_mem("Received notify Data is: ",buf,sizeof(struct smb_hdr)+60); */ 507 /* cifs_dump_mem("Rcvd notify Data: ",buf,
508 sizeof(struct smb_hdr)+60); */
507 return TRUE; 509 return TRUE;
508 } 510 }
509 if(pSMBr->hdr.Status.CifsError) { 511 if(pSMBr->hdr.Status.CifsError) {
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 5de74d216fdd..b66eff5dc624 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -84,11 +84,11 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
84 84
85static const struct smb_to_posix_error mapping_table_ERRSRV[] = { 85static const struct smb_to_posix_error mapping_table_ERRSRV[] = {
86 {ERRerror, -EIO}, 86 {ERRerror, -EIO},
87 {ERRbadpw, -EPERM}, 87 {ERRbadpw, -EACCES}, /* was EPERM */
88 {ERRbadtype, -EREMOTE}, 88 {ERRbadtype, -EREMOTE},
89 {ERRaccess, -EACCES}, 89 {ERRaccess, -EACCES},
90 {ERRinvtid, -ENXIO}, 90 {ERRinvtid, -ENXIO},
91 {ERRinvnetname, -ENODEV}, 91 {ERRinvnetname, -ENXIO},
92 {ERRinvdevice, -ENXIO}, 92 {ERRinvdevice, -ENXIO},
93 {ERRqfull, -ENOSPC}, 93 {ERRqfull, -ENOSPC},
94 {ERRqtoobig, -ENOSPC}, 94 {ERRqtoobig, -ENOSPC},
diff --git a/fs/cifs/ntlmssp.c b/fs/cifs/ntlmssp.c
deleted file mode 100644
index 115359cc7a32..000000000000
--- a/fs/cifs/ntlmssp.c
+++ /dev/null
@@ -1,143 +0,0 @@
1/*
2 * fs/cifs/ntlmssp.h
3 *
4 * Copyright (c) International Business Machines Corp., 2006
5 * Author(s): Steve French (sfrench@us.ibm.com)
6 *
7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include "cifspdu.h"
23#include "cifsglob.h"
24#include "cifsproto.h"
25#include "cifs_unicode.h"
26#include "cifs_debug.h"
27#include "ntlmssp.h"
28#include "nterr.h"
29
30#ifdef CONFIG_CIFS_EXPERIMENTAL
31static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
32{
33 __u32 capabilities = 0;
34
35 /* init fields common to all four types of SessSetup */
36 /* note that header is initialized to zero in header_assemble */
37 pSMB->req.AndXCommand = 0xFF;
38 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
39 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
40
41 /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
42
43 /* BB verify whether signing required on neg or just on auth frame
44 (and NTLM case) */
45
46 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
47 CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
48
49 if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
50 pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
51
52 if (ses->capabilities & CAP_UNICODE) {
53 pSMB->req.hdr.Flags2 |= SMBFLG2_UNICODE;
54 capabilities |= CAP_UNICODE;
55 }
56 if (ses->capabilities & CAP_STATUS32) {
57 pSMB->req.hdr.Flags2 |= SMBFLG2_ERR_STATUS;
58 capabilities |= CAP_STATUS32;
59 }
60 if (ses->capabilities & CAP_DFS) {
61 pSMB->req.hdr.Flags2 |= SMBFLG2_DFS;
62 capabilities |= CAP_DFS;
63 }
64
65 /* BB check whether to init vcnum BB */
66 return capabilities;
67}
68int
69CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int type,
70 int * pNTLMv2_flg, const struct nls_table *nls_cp)
71{
72 int rc = 0;
73 int wct;
74 struct smb_hdr *smb_buffer;
75 char *bcc_ptr;
76 SESSION_SETUP_ANDX *pSMB;
77 __u32 capabilities;
78
79 if(ses == NULL)
80 return -EINVAL;
81
82 cFYI(1,("SStp type: %d",type));
83 if(type < CIFS_NTLM) {
84#ifndef CONFIG_CIFS_WEAK_PW_HASH
85 /* LANMAN and plaintext are less secure and off by default.
86 So we make this explicitly be turned on in kconfig (in the
87 build) and turned on at runtime (changed from the default)
88 in proc/fs/cifs or via mount parm. Unfortunately this is
89 needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
90 return -EOPNOTSUPP;
91#endif
92 wct = 10; /* lanman 2 style sessionsetup */
93 } else if(type < CIFS_NTLMSSP_NEG)
94 wct = 13; /* old style NTLM sessionsetup */
95 else /* same size for negotiate or auth, NTLMSSP or extended security */
96 wct = 12;
97
98 rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
99 (void **)&smb_buffer);
100 if(rc)
101 return rc;
102
103 pSMB = (SESSION_SETUP_ANDX *)smb_buffer;
104
105 capabilities = cifs_ssetup_hdr(ses, pSMB);
106 bcc_ptr = pByteArea(smb_buffer);
107 if(type > CIFS_NTLM) {
108 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
109 capabilities |= CAP_EXTENDED_SECURITY;
110 pSMB->req.Capabilities = cpu_to_le32(capabilities);
111 /* BB set password lengths */
112 } else if(type < CIFS_NTLM) /* lanman */ {
113 /* no capabilities flags in old lanman negotiation */
114 /* pSMB->old_req.PasswordLength = */ /* BB fixme BB */
115 } else /* type CIFS_NTLM */ {
116 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
117 pSMB->req_no_secext.CaseInsensitivePasswordLength =
118 cpu_to_le16(CIFS_SESSION_KEY_SIZE);
119 pSMB->req_no_secext.CaseSensitivePasswordLength =
120 cpu_to_le16(CIFS_SESSION_KEY_SIZE);
121 }
122
123
124 /* copy session key */
125
126 /* if Unicode, align strings to two byte boundary */
127
128 /* copy user name */ /* BB Do we need to special case null user name? */
129
130 /* copy domain name */
131
132 /* copy Linux version */
133
134 /* copy network operating system name */
135
136 /* update bcc and smb buffer length */
137
138/* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */
139 /* SMB request buf freed in SendReceive2 */
140
141 return rc;
142}
143#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b689c5035124..03bbcb377913 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -21,6 +21,7 @@
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */ 22 */
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <linux/pagemap.h>
24#include <linux/stat.h> 25#include <linux/stat.h>
25#include <linux/smp_lock.h> 26#include <linux/smp_lock.h>
26#include "cifspdu.h" 27#include "cifspdu.h"
@@ -31,8 +32,8 @@
31#include "cifs_fs_sb.h" 32#include "cifs_fs_sb.h"
32#include "cifsfs.h" 33#include "cifsfs.h"
33 34
34/* BB fixme - add debug wrappers around this function to disable it fixme BB */ 35#ifdef CONFIG_CIFS_DEBUG2
35/* static void dump_cifs_file_struct(struct file *file, char *label) 36static void dump_cifs_file_struct(struct file *file, char *label)
36{ 37{
37 struct cifsFileInfo * cf; 38 struct cifsFileInfo * cf;
38 39
@@ -53,7 +54,8 @@
53 } 54 }
54 55
55 } 56 }
56} */ 57}
58#endif /* DEBUG2 */
57 59
58/* Returns one if new inode created (which therefore needs to be hashed) */ 60/* Returns one if new inode created (which therefore needs to be hashed) */
59/* Might check in the future if inode number changed so we can rehash inode */ 61/* Might check in the future if inode number changed so we can rehash inode */
@@ -107,32 +109,52 @@ static int construct_dentry(struct qstr *qstring, struct file *file,
107 return rc; 109 return rc;
108} 110}
109 111
110static void fill_in_inode(struct inode *tmp_inode, 112static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
111 FILE_DIRECTORY_INFO *pfindData, int *pobject_type, int isNewInode) 113 char * buf, int *pobject_type, int isNewInode)
112{ 114{
113 loff_t local_size; 115 loff_t local_size;
114 struct timespec local_mtime; 116 struct timespec local_mtime;
115 117
116 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode); 118 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
117 struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb); 119 struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
118 __u32 attr = le32_to_cpu(pfindData->ExtFileAttributes); 120 __u32 attr;
119 __u64 allocation_size = le64_to_cpu(pfindData->AllocationSize); 121 __u64 allocation_size;
120 __u64 end_of_file = le64_to_cpu(pfindData->EndOfFile); 122 __u64 end_of_file;
121
122 cifsInfo->cifsAttrs = attr;
123 cifsInfo->time = jiffies;
124 123
125 /* save mtime and size */ 124 /* save mtime and size */
126 local_mtime = tmp_inode->i_mtime; 125 local_mtime = tmp_inode->i_mtime;
127 local_size = tmp_inode->i_size; 126 local_size = tmp_inode->i_size;
128 127
128 if(new_buf_type) {
129 FILE_DIRECTORY_INFO *pfindData = (FILE_DIRECTORY_INFO *)buf;
130
131 attr = le32_to_cpu(pfindData->ExtFileAttributes);
132 allocation_size = le64_to_cpu(pfindData->AllocationSize);
133 end_of_file = le64_to_cpu(pfindData->EndOfFile);
134 tmp_inode->i_atime =
135 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
136 tmp_inode->i_mtime =
137 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
138 tmp_inode->i_ctime =
139 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
140 } else { /* legacy, OS2 and DOS style */
141 FIND_FILE_STANDARD_INFO * pfindData =
142 (FIND_FILE_STANDARD_INFO *)buf;
143
144 attr = le16_to_cpu(pfindData->Attributes);
145 allocation_size = le32_to_cpu(pfindData->AllocationSize);
146 end_of_file = le32_to_cpu(pfindData->DataSize);
147 tmp_inode->i_atime = CURRENT_TIME;
148 /* tmp_inode->i_mtime = BB FIXME - add dos time handling
149 tmp_inode->i_ctime = 0; BB FIXME */
150
151 }
152
129 /* Linux can not store file creation time unfortunately so ignore it */ 153 /* Linux can not store file creation time unfortunately so ignore it */
130 tmp_inode->i_atime = 154
131 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime)); 155 cifsInfo->cifsAttrs = attr;
132 tmp_inode->i_mtime = 156 cifsInfo->time = jiffies;
133 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); 157
134 tmp_inode->i_ctime =
135 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
136 /* treat dos attribute of read-only as read-only mode bit e.g. 555? */ 158 /* treat dos attribute of read-only as read-only mode bit e.g. 555? */
137 /* 2767 perms - indicate mandatory locking */ 159 /* 2767 perms - indicate mandatory locking */
138 /* BB fill in uid and gid here? with help from winbind? 160 /* BB fill in uid and gid here? with help from winbind?
@@ -215,11 +237,13 @@ static void fill_in_inode(struct inode *tmp_inode,
215 else 237 else
216 tmp_inode->i_fop = &cifs_file_ops; 238 tmp_inode->i_fop = &cifs_file_ops;
217 239
218 tmp_inode->i_data.a_ops = &cifs_addr_ops;
219 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) && 240 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
220 (cifs_sb->tcon->ses->server->maxBuf < 241 (cifs_sb->tcon->ses->server->maxBuf <
221 4096 + MAX_CIFS_HDR_SIZE)) 242 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
222 tmp_inode->i_data.a_ops->readpages = NULL; 243 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
244 else
245 tmp_inode->i_data.a_ops = &cifs_addr_ops;
246
223 if(isNewInode) 247 if(isNewInode)
224 return; /* No sense invalidating pages for new inode 248 return; /* No sense invalidating pages for new inode
225 since have not started caching readahead file 249 since have not started caching readahead file
@@ -338,11 +362,12 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
338 else 362 else
339 tmp_inode->i_fop = &cifs_file_ops; 363 tmp_inode->i_fop = &cifs_file_ops;
340 364
341 tmp_inode->i_data.a_ops = &cifs_addr_ops;
342 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) && 365 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
343 (cifs_sb->tcon->ses->server->maxBuf < 366 (cifs_sb->tcon->ses->server->maxBuf <
344 4096 + MAX_CIFS_HDR_SIZE)) 367 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
345 tmp_inode->i_data.a_ops->readpages = NULL; 368 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
369 else
370 tmp_inode->i_data.a_ops = &cifs_addr_ops;
346 371
347 if(isNewInode) 372 if(isNewInode)
348 return; /* No sense invalidating pages for new inode since we 373 return; /* No sense invalidating pages for new inode since we
@@ -415,7 +440,10 @@ static int initiate_cifs_search(const int xid, struct file *file)
415ffirst_retry: 440ffirst_retry:
416 /* test for Unix extensions */ 441 /* test for Unix extensions */
417 if (pTcon->ses->capabilities & CAP_UNIX) { 442 if (pTcon->ses->capabilities & CAP_UNIX) {
418 cifsFile->srch_inf.info_level = SMB_FIND_FILE_UNIX; 443 cifsFile->srch_inf.info_level = SMB_FIND_FILE_UNIX;
444 } else if ((pTcon->ses->capabilities &
445 (CAP_NT_SMBS | CAP_NT_FIND)) == 0) {
446 cifsFile->srch_inf.info_level = SMB_FIND_FILE_INFO_STANDARD;
419 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 447 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
420 cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; 448 cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
421 } else /* not srvinos - BB fixme add check for backlevel? */ { 449 } else /* not srvinos - BB fixme add check for backlevel? */ {
@@ -451,12 +479,19 @@ static int cifs_unicode_bytelen(char *str)
451 return len << 1; 479 return len << 1;
452} 480}
453 481
454static char *nxt_dir_entry(char *old_entry, char *end_of_smb) 482static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level)
455{ 483{
456 char * new_entry; 484 char * new_entry;
457 FILE_DIRECTORY_INFO * pDirInfo = (FILE_DIRECTORY_INFO *)old_entry; 485 FILE_DIRECTORY_INFO * pDirInfo = (FILE_DIRECTORY_INFO *)old_entry;
458 486
459 new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset); 487 if(level == SMB_FIND_FILE_INFO_STANDARD) {
488 FIND_FILE_STANDARD_INFO * pfData;
489 pfData = (FIND_FILE_STANDARD_INFO *)pDirInfo;
490
491 new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) +
492 pfData->FileNameLength;
493 } else
494 new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset);
460 cFYI(1,("new entry %p old entry %p",new_entry,old_entry)); 495 cFYI(1,("new entry %p old entry %p",new_entry,old_entry));
461 /* validate that new_entry is not past end of SMB */ 496 /* validate that new_entry is not past end of SMB */
462 if(new_entry >= end_of_smb) { 497 if(new_entry >= end_of_smb) {
@@ -464,7 +499,10 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb)
464 ("search entry %p began after end of SMB %p old entry %p", 499 ("search entry %p began after end of SMB %p old entry %p",
465 new_entry, end_of_smb, old_entry)); 500 new_entry, end_of_smb, old_entry));
466 return NULL; 501 return NULL;
467 } else if (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb) { 502 } else if(((level == SMB_FIND_FILE_INFO_STANDARD) &&
503 (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) ||
504 ((level != SMB_FIND_FILE_INFO_STANDARD) &&
505 (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) {
468 cERROR(1,("search entry %p extends after end of SMB %p", 506 cERROR(1,("search entry %p extends after end of SMB %p",
469 new_entry, end_of_smb)); 507 new_entry, end_of_smb));
470 return NULL; 508 return NULL;
@@ -482,7 +520,7 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
482 char * filename = NULL; 520 char * filename = NULL;
483 int len = 0; 521 int len = 0;
484 522
485 if(cfile->srch_inf.info_level == 0x202) { 523 if(cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) {
486 FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry; 524 FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry;
487 filename = &pFindData->FileName[0]; 525 filename = &pFindData->FileName[0];
488 if(cfile->srch_inf.unicode) { 526 if(cfile->srch_inf.unicode) {
@@ -491,26 +529,34 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
491 /* BB should we make this strnlen of PATH_MAX? */ 529 /* BB should we make this strnlen of PATH_MAX? */
492 len = strnlen(filename, 5); 530 len = strnlen(filename, 5);
493 } 531 }
494 } else if(cfile->srch_inf.info_level == 0x101) { 532 } else if(cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) {
495 FILE_DIRECTORY_INFO * pFindData = 533 FILE_DIRECTORY_INFO * pFindData =
496 (FILE_DIRECTORY_INFO *)current_entry; 534 (FILE_DIRECTORY_INFO *)current_entry;
497 filename = &pFindData->FileName[0]; 535 filename = &pFindData->FileName[0];
498 len = le32_to_cpu(pFindData->FileNameLength); 536 len = le32_to_cpu(pFindData->FileNameLength);
499 } else if(cfile->srch_inf.info_level == 0x102) { 537 } else if(cfile->srch_inf.info_level ==
538 SMB_FIND_FILE_FULL_DIRECTORY_INFO) {
500 FILE_FULL_DIRECTORY_INFO * pFindData = 539 FILE_FULL_DIRECTORY_INFO * pFindData =
501 (FILE_FULL_DIRECTORY_INFO *)current_entry; 540 (FILE_FULL_DIRECTORY_INFO *)current_entry;
502 filename = &pFindData->FileName[0]; 541 filename = &pFindData->FileName[0];
503 len = le32_to_cpu(pFindData->FileNameLength); 542 len = le32_to_cpu(pFindData->FileNameLength);
504 } else if(cfile->srch_inf.info_level == 0x105) { 543 } else if(cfile->srch_inf.info_level ==
544 SMB_FIND_FILE_ID_FULL_DIR_INFO) {
505 SEARCH_ID_FULL_DIR_INFO * pFindData = 545 SEARCH_ID_FULL_DIR_INFO * pFindData =
506 (SEARCH_ID_FULL_DIR_INFO *)current_entry; 546 (SEARCH_ID_FULL_DIR_INFO *)current_entry;
507 filename = &pFindData->FileName[0]; 547 filename = &pFindData->FileName[0];
508 len = le32_to_cpu(pFindData->FileNameLength); 548 len = le32_to_cpu(pFindData->FileNameLength);
509 } else if(cfile->srch_inf.info_level == 0x104) { 549 } else if(cfile->srch_inf.info_level ==
550 SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
510 FILE_BOTH_DIRECTORY_INFO * pFindData = 551 FILE_BOTH_DIRECTORY_INFO * pFindData =
511 (FILE_BOTH_DIRECTORY_INFO *)current_entry; 552 (FILE_BOTH_DIRECTORY_INFO *)current_entry;
512 filename = &pFindData->FileName[0]; 553 filename = &pFindData->FileName[0];
513 len = le32_to_cpu(pFindData->FileNameLength); 554 len = le32_to_cpu(pFindData->FileNameLength);
555 } else if(cfile->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) {
556 FIND_FILE_STANDARD_INFO * pFindData =
557 (FIND_FILE_STANDARD_INFO *)current_entry;
558 filename = &pFindData->FileName[0];
559 len = le32_to_cpu(pFindData->FileNameLength);
514 } else { 560 } else {
515 cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level)); 561 cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level));
516 } 562 }
@@ -597,7 +643,9 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
597 . and .. for the root of a drive and for those we need 643 . and .. for the root of a drive and for those we need
598 to start two entries earlier */ 644 to start two entries earlier */
599 645
600/* dump_cifs_file_struct(file, "In fce ");*/ 646#ifdef CONFIG_CIFS_DEBUG2
647 dump_cifs_file_struct(file, "In fce ");
648#endif
601 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) && 649 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) &&
602 is_dir_changed(file)) || 650 is_dir_changed(file)) ||
603 (index_to_find < first_entry_in_buffer)) { 651 (index_to_find < first_entry_in_buffer)) {
@@ -644,10 +692,12 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
644 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry 692 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry
645 - cifsFile->srch_inf.entries_in_buffer; 693 - cifsFile->srch_inf.entries_in_buffer;
646 pos_in_buf = index_to_find - first_entry_in_buffer; 694 pos_in_buf = index_to_find - first_entry_in_buffer;
647 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf)); 695 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf));
696
648 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) { 697 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) {
649 /* go entry by entry figuring out which is first */ 698 /* go entry by entry figuring out which is first */
650 current_entry = nxt_dir_entry(current_entry,end_of_smb); 699 current_entry = nxt_dir_entry(current_entry,end_of_smb,
700 cifsFile->srch_inf.info_level);
651 } 701 }
652 if((current_entry == NULL) && (i < pos_in_buf)) { 702 if((current_entry == NULL) && (i < pos_in_buf)) {
653 /* BB fixme - check if we should flag this error */ 703 /* BB fixme - check if we should flag this error */
@@ -674,7 +724,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
674/* inode num, inode type and filename returned */ 724/* inode num, inode type and filename returned */
675static int cifs_get_name_from_search_buf(struct qstr *pqst, 725static int cifs_get_name_from_search_buf(struct qstr *pqst,
676 char *current_entry, __u16 level, unsigned int unicode, 726 char *current_entry, __u16 level, unsigned int unicode,
677 struct cifs_sb_info * cifs_sb, ino_t *pinum) 727 struct cifs_sb_info * cifs_sb, int max_len, ino_t *pinum)
678{ 728{
679 int rc = 0; 729 int rc = 0;
680 unsigned int len = 0; 730 unsigned int len = 0;
@@ -718,10 +768,22 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
718 (FILE_BOTH_DIRECTORY_INFO *)current_entry; 768 (FILE_BOTH_DIRECTORY_INFO *)current_entry;
719 filename = &pFindData->FileName[0]; 769 filename = &pFindData->FileName[0];
720 len = le32_to_cpu(pFindData->FileNameLength); 770 len = le32_to_cpu(pFindData->FileNameLength);
771 } else if(level == SMB_FIND_FILE_INFO_STANDARD) {
772 FIND_FILE_STANDARD_INFO * pFindData =
773 (FIND_FILE_STANDARD_INFO *)current_entry;
774 filename = &pFindData->FileName[0];
775 /* one byte length, no name conversion */
776 len = (unsigned int)pFindData->FileNameLength;
721 } else { 777 } else {
722 cFYI(1,("Unknown findfirst level %d",level)); 778 cFYI(1,("Unknown findfirst level %d",level));
723 return -EINVAL; 779 return -EINVAL;
724 } 780 }
781
782 if(len > max_len) {
783 cERROR(1,("bad search response length %d past smb end", len));
784 return -EINVAL;
785 }
786
725 if(unicode) { 787 if(unicode) {
726 /* BB fixme - test with long names */ 788 /* BB fixme - test with long names */
727 /* Note converted filename can be longer than in unicode */ 789 /* Note converted filename can be longer than in unicode */
@@ -741,7 +803,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
741} 803}
742 804
743static int cifs_filldir(char *pfindEntry, struct file *file, 805static int cifs_filldir(char *pfindEntry, struct file *file,
744 filldir_t filldir, void *direntry, char *scratch_buf) 806 filldir_t filldir, void *direntry, char *scratch_buf, int max_len)
745{ 807{
746 int rc = 0; 808 int rc = 0;
747 struct qstr qstring; 809 struct qstr qstring;
@@ -777,6 +839,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
777 rc = cifs_get_name_from_search_buf(&qstring,pfindEntry, 839 rc = cifs_get_name_from_search_buf(&qstring,pfindEntry,
778 pCifsF->srch_inf.info_level, 840 pCifsF->srch_inf.info_level,
779 pCifsF->srch_inf.unicode,cifs_sb, 841 pCifsF->srch_inf.unicode,cifs_sb,
842 max_len,
780 &inum /* returned */); 843 &inum /* returned */);
781 844
782 if(rc) 845 if(rc)
@@ -798,13 +861,16 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
798 /* we pass in rc below, indicating whether it is a new inode, 861 /* we pass in rc below, indicating whether it is a new inode,
799 so we can figure out whether to invalidate the inode cached 862 so we can figure out whether to invalidate the inode cached
800 data if the file has changed */ 863 data if the file has changed */
801 if(pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) { 864 if(pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX)
802 unix_fill_in_inode(tmp_inode, 865 unix_fill_in_inode(tmp_inode,
803 (FILE_UNIX_INFO *)pfindEntry,&obj_type, rc); 866 (FILE_UNIX_INFO *)pfindEntry,
804 } else { 867 &obj_type, rc);
805 fill_in_inode(tmp_inode, 868 else if(pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD)
806 (FILE_DIRECTORY_INFO *)pfindEntry,&obj_type, rc); 869 fill_in_inode(tmp_inode, 0 /* old level 1 buffer type */,
807 } 870 pfindEntry, &obj_type, rc);
871 else
872 fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc);
873
808 874
809 rc = filldir(direntry,qstring.name,qstring.len,file->f_pos, 875 rc = filldir(direntry,qstring.name,qstring.len,file->f_pos,
810 tmp_inode->i_ino,obj_type); 876 tmp_inode->i_ino,obj_type);
@@ -864,6 +930,12 @@ static int cifs_save_resume_key(const char *current_entry,
864 filename = &pFindData->FileName[0]; 930 filename = &pFindData->FileName[0];
865 len = le32_to_cpu(pFindData->FileNameLength); 931 len = le32_to_cpu(pFindData->FileNameLength);
866 cifsFile->srch_inf.resume_key = pFindData->FileIndex; 932 cifsFile->srch_inf.resume_key = pFindData->FileIndex;
933 } else if(level == SMB_FIND_FILE_INFO_STANDARD) {
934 FIND_FILE_STANDARD_INFO * pFindData =
935 (FIND_FILE_STANDARD_INFO *)current_entry;
936 filename = &pFindData->FileName[0];
937 /* one byte length, no name conversion */
938 len = (unsigned int)pFindData->FileNameLength;
867 } else { 939 } else {
868 cFYI(1,("Unknown findfirst level %d",level)); 940 cFYI(1,("Unknown findfirst level %d",level));
869 return -EINVAL; 941 return -EINVAL;
@@ -884,6 +956,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
884 int num_to_fill = 0; 956 int num_to_fill = 0;
885 char * tmp_buf = NULL; 957 char * tmp_buf = NULL;
886 char * end_of_smb; 958 char * end_of_smb;
959 int max_len;
887 960
888 xid = GetXid(); 961 xid = GetXid();
889 962
@@ -909,7 +982,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
909 case 1: 982 case 1:
910 if (filldir(direntry, "..", 2, file->f_pos, 983 if (filldir(direntry, "..", 2, file->f_pos,
911 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) { 984 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
912 cERROR(1, ("Filldir for parent dir failed ")); 985 cERROR(1, ("Filldir for parent dir failed"));
913 rc = -ENOMEM; 986 rc = -ENOMEM;
914 break; 987 break;
915 } 988 }
@@ -959,10 +1032,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
959 goto rddir2_exit; 1032 goto rddir2_exit;
960 } 1033 }
961 cFYI(1,("loop through %d times filling dir for net buf %p", 1034 cFYI(1,("loop through %d times filling dir for net buf %p",
962 num_to_fill,cifsFile->srch_inf.ntwrk_buf_start)); 1035 num_to_fill,cifsFile->srch_inf.ntwrk_buf_start));
963 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + 1036 max_len = smbCalcSize((struct smb_hdr *)
964 smbCalcSize((struct smb_hdr *) 1037 cifsFile->srch_inf.ntwrk_buf_start);
965 cifsFile->srch_inf.ntwrk_buf_start); 1038 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
1039
966 /* To be safe - for UCS to UTF-8 with strings loaded 1040 /* To be safe - for UCS to UTF-8 with strings loaded
967 with the rare long characters alloc more to account for 1041 with the rare long characters alloc more to account for
968 such multibyte target UTF-8 characters. cifs_unicode.c, 1042 such multibyte target UTF-8 characters. cifs_unicode.c,
@@ -977,17 +1051,19 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
977 } 1051 }
978 /* if buggy server returns . and .. late do 1052 /* if buggy server returns . and .. late do
979 we want to check for that here? */ 1053 we want to check for that here? */
980 rc = cifs_filldir(current_entry, file, 1054 rc = cifs_filldir(current_entry, file,
981 filldir, direntry,tmp_buf); 1055 filldir, direntry, tmp_buf, max_len);
982 file->f_pos++; 1056 file->f_pos++;
983 if(file->f_pos == cifsFile->srch_inf.index_of_last_entry) { 1057 if(file->f_pos ==
1058 cifsFile->srch_inf.index_of_last_entry) {
984 cFYI(1,("last entry in buf at pos %lld %s", 1059 cFYI(1,("last entry in buf at pos %lld %s",
985 file->f_pos,tmp_buf)); /* BB removeme BB */ 1060 file->f_pos,tmp_buf));
986 cifs_save_resume_key(current_entry,cifsFile); 1061 cifs_save_resume_key(current_entry,cifsFile);
987 break; 1062 break;
988 } else 1063 } else
989 current_entry = nxt_dir_entry(current_entry, 1064 current_entry =
990 end_of_smb); 1065 nxt_dir_entry(current_entry, end_of_smb,
1066 cifsFile->srch_inf.info_level);
991 } 1067 }
992 kfree(tmp_buf); 1068 kfree(tmp_buf);
993 break; 1069 break;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
new file mode 100644
index 000000000000..7202d534ef0b
--- /dev/null
+++ b/fs/cifs/sess.c
@@ -0,0 +1,538 @@
1/*
2 * fs/cifs/sess.c
3 *
4 * SMB/CIFS session setup handling routines
5 *
6 * Copyright (c) International Business Machines Corp., 2006
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 *
9 * This library is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as published
11 * by the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
17 * the GNU Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#include "cifspdu.h"
25#include "cifsglob.h"
26#include "cifsproto.h"
27#include "cifs_unicode.h"
28#include "cifs_debug.h"
29#include "ntlmssp.h"
30#include "nterr.h"
31#include <linux/utsname.h>
32
33extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
34 unsigned char *p24);
35
36static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
37{
38 __u32 capabilities = 0;
39
40 /* init fields common to all four types of SessSetup */
41 /* note that header is initialized to zero in header_assemble */
42 pSMB->req.AndXCommand = 0xFF;
43 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
44 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
45
46 /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
47
48 /* BB verify whether signing required on neg or just on auth frame
49 (and NTLM case) */
50
51 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
52 CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
53
54 if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
55 pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
56
57 if (ses->capabilities & CAP_UNICODE) {
58 pSMB->req.hdr.Flags2 |= SMBFLG2_UNICODE;
59 capabilities |= CAP_UNICODE;
60 }
61 if (ses->capabilities & CAP_STATUS32) {
62 pSMB->req.hdr.Flags2 |= SMBFLG2_ERR_STATUS;
63 capabilities |= CAP_STATUS32;
64 }
65 if (ses->capabilities & CAP_DFS) {
66 pSMB->req.hdr.Flags2 |= SMBFLG2_DFS;
67 capabilities |= CAP_DFS;
68 }
69 if (ses->capabilities & CAP_UNIX) {
70 capabilities |= CAP_UNIX;
71 }
72
73 /* BB check whether to init vcnum BB */
74 return capabilities;
75}
76
77static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
78 const struct nls_table * nls_cp)
79{
80 char * bcc_ptr = *pbcc_area;
81 int bytes_ret = 0;
82
83 /* BB FIXME add check that strings total less
84 than 335 or will need to send them as arrays */
85
86 /* unicode strings, must be word aligned before the call */
87/* if ((long) bcc_ptr % 2) {
88 *bcc_ptr = 0;
89 bcc_ptr++;
90 } */
91 /* copy user */
92 if(ses->userName == NULL) {
93 /* BB what about null user mounts - check that we do this BB */
94 } else { /* 300 should be long enough for any conceivable user name */
95 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName,
96 300, nls_cp);
97 }
98 bcc_ptr += 2 * bytes_ret;
99 bcc_ptr += 2; /* account for null termination */
100 /* copy domain */
101 if(ses->domainName == NULL)
102 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr,
103 "CIFS_LINUX_DOM", 32, nls_cp);
104 else
105 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName,
106 256, nls_cp);
107 bcc_ptr += 2 * bytes_ret;
108 bcc_ptr += 2; /* account for null terminator */
109
110 /* Copy OS version */
111 bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
112 nls_cp);
113 bcc_ptr += 2 * bytes_ret;
114 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release,
115 32, nls_cp);
116 bcc_ptr += 2 * bytes_ret;
117 bcc_ptr += 2; /* trailing null */
118
119 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
120 32, nls_cp);
121 bcc_ptr += 2 * bytes_ret;
122 bcc_ptr += 2; /* trailing null */
123
124 *pbcc_area = bcc_ptr;
125}
126
127static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
128 const struct nls_table * nls_cp)
129{
130 char * bcc_ptr = *pbcc_area;
131
132 /* copy user */
133 /* BB what about null user mounts - check that we do this BB */
134 /* copy user */
135 if(ses->userName == NULL) {
136 /* BB what about null user mounts - check that we do this BB */
137 } else { /* 300 should be long enough for any conceivable user name */
138 strncpy(bcc_ptr, ses->userName, 300);
139 }
140 /* BB improve check for overflow */
141 bcc_ptr += strnlen(ses->userName, 300);
142 *bcc_ptr = 0;
143 bcc_ptr++; /* account for null termination */
144
145 /* copy domain */
146
147 if(ses->domainName == NULL) {
148 strcpy(bcc_ptr, "CIFS_LINUX_DOM");
149 bcc_ptr += 14; /* strlen(CIFS_LINUX_DOM) */
150 } else {
151 strncpy(bcc_ptr, ses->domainName, 256);
152 bcc_ptr += strnlen(ses->domainName, 256);
153 }
154 *bcc_ptr = 0;
155 bcc_ptr++;
156
157 /* BB check for overflow here */
158
159 strcpy(bcc_ptr, "Linux version ");
160 bcc_ptr += strlen("Linux version ");
161 strcpy(bcc_ptr, system_utsname.release);
162 bcc_ptr += strlen(system_utsname.release) + 1;
163
164 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
165 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
166
167 *pbcc_area = bcc_ptr;
168}
169
170static int decode_unicode_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo *ses,
171 const struct nls_table * nls_cp)
172{
173 int rc = 0;
174 int words_left, len;
175 char * data = *pbcc_area;
176
177
178
179 cFYI(1,("bleft %d",bleft));
180
181
182 /* word align, if bytes remaining is not even */
183 if(bleft % 2) {
184 bleft--;
185 data++;
186 }
187 words_left = bleft / 2;
188
189 /* save off server operating system */
190 len = UniStrnlen((wchar_t *) data, words_left);
191
192/* We look for obvious messed up bcc or strings in response so we do not go off
193 the end since (at least) WIN2K and Windows XP have a major bug in not null
194 terminating last Unicode string in response */
195 if(len >= words_left)
196 return rc;
197
198 if(ses->serverOS)
199 kfree(ses->serverOS);
200 /* UTF-8 string will not grow more than four times as big as UCS-16 */
201 ses->serverOS = kzalloc(4 * len, GFP_KERNEL);
202 if(ses->serverOS != NULL) {
203 cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len,
204 nls_cp);
205 }
206 data += 2 * (len + 1);
207 words_left -= len + 1;
208
209 /* save off server network operating system */
210 len = UniStrnlen((wchar_t *) data, words_left);
211
212 if(len >= words_left)
213 return rc;
214
215 if(ses->serverNOS)
216 kfree(ses->serverNOS);
217 ses->serverNOS = kzalloc(4 * len, GFP_KERNEL); /* BB this is wrong length FIXME BB */
218 if(ses->serverNOS != NULL) {
219 cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len,
220 nls_cp);
221 if(strncmp(ses->serverNOS, "NT LAN Manager 4",16) == 0) {
222 cFYI(1,("NT4 server"));
223 ses->flags |= CIFS_SES_NT4;
224 }
225 }
226 data += 2 * (len + 1);
227 words_left -= len + 1;
228
229 /* save off server domain */
230 len = UniStrnlen((wchar_t *) data, words_left);
231
232 if(len > words_left)
233 return rc;
234
235 if(ses->serverDomain)
236 kfree(ses->serverDomain);
237 ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
238 if(ses->serverDomain != NULL) {
239 cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
240 nls_cp);
241 ses->serverDomain[2*len] = 0;
242 ses->serverDomain[(2*len) + 1] = 0;
243 }
244 data += 2 * (len + 1);
245 words_left -= len + 1;
246
247 cFYI(1,("words left: %d",words_left));
248
249 return rc;
250}
251
252static int decode_ascii_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo *ses,
253 const struct nls_table * nls_cp)
254{
255 int rc = 0;
256 int len;
257 char * bcc_ptr = *pbcc_area;
258
259 cFYI(1,("decode sessetup ascii. bleft %d", bleft));
260
261 len = strnlen(bcc_ptr, bleft);
262 if(len >= bleft)
263 return rc;
264
265 if(ses->serverOS)
266 kfree(ses->serverOS);
267
268 ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
269 if(ses->serverOS)
270 strncpy(ses->serverOS, bcc_ptr, len);
271
272 bcc_ptr += len + 1;
273 bleft -= len + 1;
274
275 len = strnlen(bcc_ptr, bleft);
276 if(len >= bleft)
277 return rc;
278
279 if(ses->serverNOS)
280 kfree(ses->serverNOS);
281
282 ses->serverNOS = kzalloc(len + 1, GFP_KERNEL);
283 if(ses->serverNOS)
284 strncpy(ses->serverNOS, bcc_ptr, len);
285
286 bcc_ptr += len + 1;
287 bleft -= len + 1;
288
289 len = strnlen(bcc_ptr, bleft);
290 if(len > bleft)
291 return rc;
292
293 if(ses->serverDomain)
294 kfree(ses->serverDomain);
295
296 ses->serverDomain = kzalloc(len + 1, GFP_KERNEL);
297 if(ses->serverOS)
298 strncpy(ses->serverOS, bcc_ptr, len);
299
300 bcc_ptr += len + 1;
301 bleft -= len + 1;
302
303 cFYI(1,("ascii: bytes left %d",bleft));
304
305 return rc;
306}
307
308int
309CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
310 const struct nls_table *nls_cp)
311{
312 int rc = 0;
313 int wct;
314 struct smb_hdr *smb_buf;
315 char *bcc_ptr;
316 char *str_area;
317 SESSION_SETUP_ANDX *pSMB;
318 __u32 capabilities;
319 int count;
320 int resp_buf_type = 0;
321 struct kvec iov[2];
322 enum securityEnum type;
323 __u16 action;
324 int bytes_remaining;
325
326 if(ses == NULL)
327 return -EINVAL;
328
329 type = ses->server->secType;
330
331 cFYI(1,("sess setup type %d",type));
332 if(type == LANMAN) {
333#ifndef CONFIG_CIFS_WEAK_PW_HASH
334 /* LANMAN and plaintext are less secure and off by default.
335 So we make this explicitly be turned on in kconfig (in the
336 build) and turned on at runtime (changed from the default)
337 in proc/fs/cifs or via mount parm. Unfortunately this is
338 needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
339 return -EOPNOTSUPP;
340#endif
341 wct = 10; /* lanman 2 style sessionsetup */
342 } else if((type == NTLM) || (type == NTLMv2)) {
343 /* For NTLMv2 failures eventually may need to retry NTLM */
344 wct = 13; /* old style NTLM sessionsetup */
345 } else /* same size for negotiate or auth, NTLMSSP or extended security */
346 wct = 12;
347
348 rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
349 (void **)&smb_buf);
350 if(rc)
351 return rc;
352
353 pSMB = (SESSION_SETUP_ANDX *)smb_buf;
354
355 capabilities = cifs_ssetup_hdr(ses, pSMB);
356
357 /* we will send the SMB in two pieces,
358 a fixed length beginning part, and a
359 second part which will include the strings
360 and rest of bcc area, in order to avoid having
361 to do a large buffer 17K allocation */
362 iov[0].iov_base = (char *)pSMB;
363 iov[0].iov_len = smb_buf->smb_buf_length + 4;
364
365 /* 2000 big enough to fit max user, domain, NOS name etc. */
366 str_area = kmalloc(2000, GFP_KERNEL);
367 bcc_ptr = str_area;
368
369 if(type == LANMAN) {
370#ifdef CONFIG_CIFS_WEAK_PW_HASH
371 char lnm_session_key[CIFS_SESS_KEY_SIZE];
372
373 /* no capabilities flags in old lanman negotiation */
374
375 pSMB->old_req.PasswordLength = CIFS_SESS_KEY_SIZE;
376 /* BB calculate hash with password */
377 /* and copy into bcc */
378
379 calc_lanman_hash(ses, lnm_session_key);
380
381/* #ifdef CONFIG_CIFS_DEBUG2
382 cifs_dump_mem("cryptkey: ",ses->server->cryptKey,
383 CIFS_SESS_KEY_SIZE);
384#endif */
385 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE);
386 bcc_ptr += CIFS_SESS_KEY_SIZE;
387
388 /* can not sign if LANMAN negotiated so no need
389 to calculate signing key? but what if server
390 changed to do higher than lanman dialect and
391 we reconnected would we ever calc signing_key? */
392
393 cFYI(1,("Negotiating LANMAN setting up strings"));
394 /* Unicode not allowed for LANMAN dialects */
395 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
396#endif
397 } else if (type == NTLM) {
398 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
399
400 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
401 pSMB->req_no_secext.CaseInsensitivePasswordLength =
402 cpu_to_le16(CIFS_SESS_KEY_SIZE);
403 pSMB->req_no_secext.CaseSensitivePasswordLength =
404 cpu_to_le16(CIFS_SESS_KEY_SIZE);
405
406 /* calculate session key */
407 SMBNTencrypt(ses->password, ses->server->cryptKey,
408 ntlm_session_key);
409
410 if(first_time) /* should this be moved into common code
411 with similar ntlmv2 path? */
412 cifs_calculate_mac_key(ses->server->mac_signing_key,
413 ntlm_session_key, ses->password);
414 /* copy session key */
415
416 memcpy(bcc_ptr, (char *)ntlm_session_key,CIFS_SESS_KEY_SIZE);
417 bcc_ptr += CIFS_SESS_KEY_SIZE;
418 memcpy(bcc_ptr, (char *)ntlm_session_key,CIFS_SESS_KEY_SIZE);
419 bcc_ptr += CIFS_SESS_KEY_SIZE;
420 if(ses->capabilities & CAP_UNICODE) {
421 /* unicode strings must be word aligned */
422 if (iov[0].iov_len % 2) {
423 *bcc_ptr = 0;
424 bcc_ptr++;
425 }
426 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
427 } else
428 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
429 } else if (type == NTLMv2) {
430 char * v2_sess_key =
431 kmalloc(sizeof(struct ntlmv2_resp), GFP_KERNEL);
432
433 /* BB FIXME change all users of v2_sess_key to
434 struct ntlmv2_resp */
435
436 if(v2_sess_key == NULL) {
437 cifs_small_buf_release(smb_buf);
438 return -ENOMEM;
439 }
440
441 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
442
443 /* LM2 password would be here if we supported it */
444 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
445 /* cpu_to_le16(LM2_SESS_KEY_SIZE); */
446
447 pSMB->req_no_secext.CaseSensitivePasswordLength =
448 cpu_to_le16(sizeof(struct ntlmv2_resp));
449
450 /* calculate session key */
451 setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
452 if(first_time) /* should this be moved into common code
453 with similar ntlmv2 path? */
454 /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
455 response BB FIXME, v2_sess_key); */
456
457 /* copy session key */
458
459 /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
460 bcc_ptr += LM2_SESS_KEY_SIZE; */
461 memcpy(bcc_ptr, (char *)v2_sess_key, sizeof(struct ntlmv2_resp));
462 bcc_ptr += sizeof(struct ntlmv2_resp);
463 kfree(v2_sess_key);
464 if(ses->capabilities & CAP_UNICODE) {
465 if(iov[0].iov_len % 2) {
466 *bcc_ptr = 0;
467 } bcc_ptr++;
468 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
469 } else
470 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
471 } else /* NTLMSSP or SPNEGO */ {
472 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
473 capabilities |= CAP_EXTENDED_SECURITY;
474 pSMB->req.Capabilities = cpu_to_le32(capabilities);
475 /* BB set password lengths */
476 }
477
478 count = (long) bcc_ptr - (long) str_area;
479 smb_buf->smb_buf_length += count;
480
481 BCC_LE(smb_buf) = cpu_to_le16(count);
482
483 iov[1].iov_base = str_area;
484 iov[1].iov_len = count;
485 rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type, 0);
486 /* SMB request buf freed in SendReceive2 */
487
488 cFYI(1,("ssetup rc from sendrecv2 is %d",rc));
489 if(rc)
490 goto ssetup_exit;
491
492 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
493 smb_buf = (struct smb_hdr *)iov[0].iov_base;
494
495 if((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) {
496 rc = -EIO;
497 cERROR(1,("bad word count %d", smb_buf->WordCount));
498 goto ssetup_exit;
499 }
500 action = le16_to_cpu(pSMB->resp.Action);
501 if (action & GUEST_LOGIN)
502 cFYI(1, ("Guest login")); /* BB mark SesInfo struct? */
503 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
504 cFYI(1, ("UID = %d ", ses->Suid));
505 /* response can have either 3 or 4 word count - Samba sends 3 */
506 /* and lanman response is 3 */
507 bytes_remaining = BCC(smb_buf);
508 bcc_ptr = pByteArea(smb_buf);
509
510 if(smb_buf->WordCount == 4) {
511 __u16 blob_len;
512 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
513 bcc_ptr += blob_len;
514 if(blob_len > bytes_remaining) {
515 cERROR(1,("bad security blob length %d", blob_len));
516 rc = -EINVAL;
517 goto ssetup_exit;
518 }
519 bytes_remaining -= blob_len;
520 }
521
522 /* BB check if Unicode and decode strings */
523 if(smb_buf->Flags2 & SMBFLG2_UNICODE)
524 rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining,
525 ses, nls_cp);
526 else
527 rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,nls_cp);
528
529ssetup_exit:
530 kfree(str_area);
531 if(resp_buf_type == CIFS_SMALL_BUFFER) {
532 cFYI(1,("ssetup freeing small buf %p", iov[0].iov_base));
533 cifs_small_buf_release(iov[0].iov_base);
534 } else if(resp_buf_type == CIFS_LARGE_BUFFER)
535 cifs_buf_release(iov[0].iov_base);
536
537 return rc;
538}
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 6103bcdfb16d..f518c5e45035 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -30,6 +30,7 @@
30#include <linux/random.h> 30#include <linux/random.h>
31#include "cifs_unicode.h" 31#include "cifs_unicode.h"
32#include "cifspdu.h" 32#include "cifspdu.h"
33#include "cifsglob.h"
33#include "md5.h" 34#include "md5.h"
34#include "cifs_debug.h" 35#include "cifs_debug.h"
35#include "cifsencrypt.h" 36#include "cifsencrypt.h"
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 3da80409466c..17ba329e2b3d 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -654,8 +654,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
654 654
655 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 655 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
656 up(&ses->server->tcpSem); 656 up(&ses->server->tcpSem);
657 cERROR(1, 657 cERROR(1, ("Illegal length, greater than maximum frame, %d",
658 ("Illegal length, greater than maximum frame, %d ",
659 in_buf->smb_buf_length)); 658 in_buf->smb_buf_length));
660 DeleteMidQEntry(midQ); 659 DeleteMidQEntry(midQ);
661 /* If not lock req, update # of requests on wire to server */ 660 /* If not lock req, update # of requests on wire to server */
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 6c6771db36da..7caee8d8ea3b 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -259,7 +259,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
259 /* If request was not a signal, enqueue and don't free */ 259 /* If request was not a signal, enqueue and don't free */
260 if (!(req->uc_flags & REQ_ASYNC)) { 260 if (!(req->uc_flags & REQ_ASYNC)) {
261 req->uc_flags |= REQ_READ; 261 req->uc_flags |= REQ_READ;
262 list_add(&(req->uc_chain), vcp->vc_processing.prev); 262 list_add_tail(&(req->uc_chain), &vcp->vc_processing);
263 goto out; 263 goto out;
264 } 264 }
265 265
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index b040eba13a7d..a5b5e631ba61 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -725,7 +725,7 @@ static int coda_upcall(struct coda_sb_info *sbi,
725 ((union inputArgs *)buffer)->ih.unique = req->uc_unique; 725 ((union inputArgs *)buffer)->ih.unique = req->uc_unique;
726 726
727 /* Append msg to pending queue and poke Venus. */ 727 /* Append msg to pending queue and poke Venus. */
728 list_add(&(req->uc_chain), vcommp->vc_pending.prev); 728 list_add_tail(&(req->uc_chain), &vcommp->vc_pending);
729 729
730 wake_up_interruptible(&vcommp->vc_waitq); 730 wake_up_interruptible(&vcommp->vc_waitq);
731 /* We can be interrupted while we wait for Venus to process 731 /* We can be interrupted while we wait for Venus to process
diff --git a/fs/compat.c b/fs/compat.c
index 7e7e5bc4f3cf..e31e9cf96647 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -55,6 +55,20 @@
55 55
56extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); 56extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
57 57
58int compat_log = 1;
59
60int compat_printk(const char *fmt, ...)
61{
62 va_list ap;
63 int ret;
64 if (!compat_log)
65 return 0;
66 va_start(ap, fmt);
67 ret = vprintk(fmt, ap);
68 va_end(ap);
69 return ret;
70}
71
58/* 72/*
59 * Not all architectures have sys_utime, so implement this in terms 73 * Not all architectures have sys_utime, so implement this in terms
60 * of sys_utimes. 74 * of sys_utimes.
@@ -359,7 +373,7 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd,
359 sprintf(buf,"'%c'", (cmd>>24) & 0x3f); 373 sprintf(buf,"'%c'", (cmd>>24) & 0x3f);
360 if (!isprint(buf[1])) 374 if (!isprint(buf[1]))
361 sprintf(buf, "%02x", buf[1]); 375 sprintf(buf, "%02x", buf[1]);
362 printk("ioctl32(%s:%d): Unknown cmd fd(%d) " 376 compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
363 "cmd(%08x){%s} arg(%08x) on %s\n", 377 "cmd(%08x){%s} arg(%08x) on %s\n",
364 current->comm, current->pid, 378 current->comm, current->pid,
365 (int)fd, (unsigned int)cmd, buf, 379 (int)fd, (unsigned int)cmd, buf,
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9eb9824dd332..d8ecfedef189 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -80,6 +80,7 @@
80#include <net/bluetooth/rfcomm.h> 80#include <net/bluetooth/rfcomm.h>
81 81
82#include <linux/capi.h> 82#include <linux/capi.h>
83#include <linux/gigaset_dev.h>
83 84
84#include <scsi/scsi.h> 85#include <scsi/scsi.h>
85#include <scsi/scsi_ioctl.h> 86#include <scsi/scsi_ioctl.h>
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5f952187fc53..207f8006fd6c 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1009,8 +1009,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1009 /* fallthrough */ 1009 /* fallthrough */
1010 default: 1010 default:
1011 if (filp->f_pos == 2) { 1011 if (filp->f_pos == 2) {
1012 list_del(q); 1012 list_move(q, &parent_sd->s_children);
1013 list_add(q, &parent_sd->s_children);
1014 } 1013 }
1015 for (p=q->next; p!= &parent_sd->s_children; p=p->next) { 1014 for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
1016 struct configfs_dirent *next; 1015 struct configfs_dirent *next;
@@ -1033,8 +1032,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1033 dt_type(next)) < 0) 1032 dt_type(next)) < 0)
1034 return 0; 1033 return 0;
1035 1034
1036 list_del(q); 1035 list_move(q, p);
1037 list_add(q, p);
1038 p = q; 1036 p = q;
1039 filp->f_pos++; 1037 filp->f_pos++;
1040 } 1038 }
diff --git a/fs/dcache.c b/fs/dcache.c
index b85fda360533..48b44a714b35 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -522,8 +522,7 @@ void shrink_dcache_sb(struct super_block * sb)
522 dentry = list_entry(tmp, struct dentry, d_lru); 522 dentry = list_entry(tmp, struct dentry, d_lru);
523 if (dentry->d_sb != sb) 523 if (dentry->d_sb != sb)
524 continue; 524 continue;
525 list_del(tmp); 525 list_move(tmp, &dentry_unused);
526 list_add(tmp, &dentry_unused);
527 } 526 }
528 527
529 /* 528 /*
@@ -638,7 +637,7 @@ resume:
638 * of the unused list for prune_dcache 637 * of the unused list for prune_dcache
639 */ 638 */
640 if (!atomic_read(&dentry->d_count)) { 639 if (!atomic_read(&dentry->d_count)) {
641 list_add(&dentry->d_lru, dentry_unused.prev); 640 list_add_tail(&dentry->d_lru, &dentry_unused);
642 dentry_stat.nr_unused++; 641 dentry_stat.nr_unused++;
643 found++; 642 found++;
644 } 643 }
diff --git a/fs/dquot.c b/fs/dquot.c
index 81d87a413c68..0122a279106a 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -250,7 +250,7 @@ static inline struct dquot *find_dquot(unsigned int hashent, struct super_block
250/* Add a dquot to the tail of the free list */ 250/* Add a dquot to the tail of the free list */
251static inline void put_dquot_last(struct dquot *dquot) 251static inline void put_dquot_last(struct dquot *dquot)
252{ 252{
253 list_add(&dquot->dq_free, free_dquots.prev); 253 list_add_tail(&dquot->dq_free, &free_dquots);
254 dqstats.free_dquots++; 254 dqstats.free_dquots++;
255} 255}
256 256
@@ -266,7 +266,7 @@ static inline void put_inuse(struct dquot *dquot)
266{ 266{
267 /* We add to the back of inuse list so we don't have to restart 267 /* We add to the back of inuse list so we don't have to restart
268 * when traversing this list and we block */ 268 * when traversing this list and we block */
269 list_add(&dquot->dq_inuse, inuse_list.prev); 269 list_add_tail(&dquot->dq_inuse, &inuse_list);
270 dqstats.allocated_dquots++; 270 dqstats.allocated_dquots++;
271} 271}
272 272
diff --git a/fs/exec.c b/fs/exec.c
index 0b88bf646143..c8494f513eaf 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -666,8 +666,6 @@ static int de_thread(struct task_struct *tsk)
666 * and to assume its PID: 666 * and to assume its PID:
667 */ 667 */
668 if (!thread_group_leader(current)) { 668 if (!thread_group_leader(current)) {
669 struct dentry *proc_dentry1, *proc_dentry2;
670
671 /* 669 /*
672 * Wait for the thread group leader to be a zombie. 670 * Wait for the thread group leader to be a zombie.
673 * It should already be zombie at this point, most 671 * It should already be zombie at this point, most
@@ -689,10 +687,6 @@ static int de_thread(struct task_struct *tsk)
689 */ 687 */
690 current->start_time = leader->start_time; 688 current->start_time = leader->start_time;
691 689
692 spin_lock(&leader->proc_lock);
693 spin_lock(&current->proc_lock);
694 proc_dentry1 = proc_pid_unhash(current);
695 proc_dentry2 = proc_pid_unhash(leader);
696 write_lock_irq(&tasklist_lock); 690 write_lock_irq(&tasklist_lock);
697 691
698 BUG_ON(leader->tgid != current->tgid); 692 BUG_ON(leader->tgid != current->tgid);
@@ -713,7 +707,7 @@ static int de_thread(struct task_struct *tsk)
713 attach_pid(current, PIDTYPE_PID, current->pid); 707 attach_pid(current, PIDTYPE_PID, current->pid);
714 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); 708 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
715 attach_pid(current, PIDTYPE_SID, current->signal->session); 709 attach_pid(current, PIDTYPE_SID, current->signal->session);
716 list_add_tail_rcu(&current->tasks, &init_task.tasks); 710 list_replace_rcu(&leader->tasks, &current->tasks);
717 711
718 current->group_leader = current; 712 current->group_leader = current;
719 leader->group_leader = current; 713 leader->group_leader = current;
@@ -721,7 +715,6 @@ static int de_thread(struct task_struct *tsk)
721 /* Reduce leader to a thread */ 715 /* Reduce leader to a thread */
722 detach_pid(leader, PIDTYPE_PGID); 716 detach_pid(leader, PIDTYPE_PGID);
723 detach_pid(leader, PIDTYPE_SID); 717 detach_pid(leader, PIDTYPE_SID);
724 list_del_init(&leader->tasks);
725 718
726 current->exit_signal = SIGCHLD; 719 current->exit_signal = SIGCHLD;
727 720
@@ -729,10 +722,6 @@ static int de_thread(struct task_struct *tsk)
729 leader->exit_state = EXIT_DEAD; 722 leader->exit_state = EXIT_DEAD;
730 723
731 write_unlock_irq(&tasklist_lock); 724 write_unlock_irq(&tasklist_lock);
732 spin_unlock(&leader->proc_lock);
733 spin_unlock(&current->proc_lock);
734 proc_pid_flush(proc_dentry1);
735 proc_pid_flush(proc_dentry2);
736 } 725 }
737 726
738 /* 727 /*
@@ -1379,67 +1368,102 @@ static void format_corename(char *corename, const char *pattern, long signr)
1379 *out_ptr = 0; 1368 *out_ptr = 0;
1380} 1369}
1381 1370
1382static void zap_threads (struct mm_struct *mm) 1371static void zap_process(struct task_struct *start)
1383{ 1372{
1384 struct task_struct *g, *p; 1373 struct task_struct *t;
1385 struct task_struct *tsk = current;
1386 struct completion *vfork_done = tsk->vfork_done;
1387 int traced = 0;
1388 1374
1389 /* 1375 start->signal->flags = SIGNAL_GROUP_EXIT;
1390 * Make sure nobody is waiting for us to release the VM, 1376 start->signal->group_stop_count = 0;
1391 * otherwise we can deadlock when we wait on each other
1392 */
1393 if (vfork_done) {
1394 tsk->vfork_done = NULL;
1395 complete(vfork_done);
1396 }
1397 1377
1398 read_lock(&tasklist_lock); 1378 t = start;
1399 do_each_thread(g,p) 1379 do {
1400 if (mm == p->mm && p != tsk) { 1380 if (t != current && t->mm) {
1401 force_sig_specific(SIGKILL, p); 1381 t->mm->core_waiters++;
1402 mm->core_waiters++; 1382 sigaddset(&t->pending.signal, SIGKILL);
1403 if (unlikely(p->ptrace) && 1383 signal_wake_up(t, 1);
1404 unlikely(p->parent->mm == mm))
1405 traced = 1;
1406 } 1384 }
1407 while_each_thread(g,p); 1385 } while ((t = next_thread(t)) != start);
1386}
1408 1387
1409 read_unlock(&tasklist_lock); 1388static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1389 int exit_code)
1390{
1391 struct task_struct *g, *p;
1392 unsigned long flags;
1393 int err = -EAGAIN;
1394
1395 spin_lock_irq(&tsk->sighand->siglock);
1396 if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
1397 tsk->signal->group_exit_code = exit_code;
1398 zap_process(tsk);
1399 err = 0;
1400 }
1401 spin_unlock_irq(&tsk->sighand->siglock);
1402 if (err)
1403 return err;
1410 1404
1411 if (unlikely(traced)) { 1405 if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
1412 /* 1406 goto done;
1413 * We are zapping a thread and the thread it ptraces. 1407
1414 * If the tracee went into a ptrace stop for exit tracing, 1408 rcu_read_lock();
1415 * we could deadlock since the tracer is waiting for this 1409 for_each_process(g) {
1416 * coredump to finish. Detach them so they can both die. 1410 if (g == tsk->group_leader)
1417 */ 1411 continue;
1418 write_lock_irq(&tasklist_lock); 1412
1419 do_each_thread(g,p) { 1413 p = g;
1420 if (mm == p->mm && p != tsk && 1414 do {
1421 p->ptrace && p->parent->mm == mm) { 1415 if (p->mm) {
1422 __ptrace_detach(p, 0); 1416 if (p->mm == mm) {
1417 /*
1418 * p->sighand can't disappear, but
1419 * may be changed by de_thread()
1420 */
1421 lock_task_sighand(p, &flags);
1422 zap_process(p);
1423 unlock_task_sighand(p, &flags);
1424 }
1425 break;
1423 } 1426 }
1424 } while_each_thread(g,p); 1427 } while ((p = next_thread(p)) != g);
1425 write_unlock_irq(&tasklist_lock);
1426 } 1428 }
1429 rcu_read_unlock();
1430done:
1431 return mm->core_waiters;
1427} 1432}
1428 1433
1429static void coredump_wait(struct mm_struct *mm) 1434static int coredump_wait(int exit_code)
1430{ 1435{
1431 DECLARE_COMPLETION(startup_done); 1436 struct task_struct *tsk = current;
1437 struct mm_struct *mm = tsk->mm;
1438 struct completion startup_done;
1439 struct completion *vfork_done;
1432 int core_waiters; 1440 int core_waiters;
1433 1441
1442 init_completion(&mm->core_done);
1443 init_completion(&startup_done);
1434 mm->core_startup_done = &startup_done; 1444 mm->core_startup_done = &startup_done;
1435 1445
1436 zap_threads(mm); 1446 core_waiters = zap_threads(tsk, mm, exit_code);
1437 core_waiters = mm->core_waiters;
1438 up_write(&mm->mmap_sem); 1447 up_write(&mm->mmap_sem);
1439 1448
1449 if (unlikely(core_waiters < 0))
1450 goto fail;
1451
1452 /*
1453 * Make sure nobody is waiting for us to release the VM,
1454 * otherwise we can deadlock when we wait on each other
1455 */
1456 vfork_done = tsk->vfork_done;
1457 if (vfork_done) {
1458 tsk->vfork_done = NULL;
1459 complete(vfork_done);
1460 }
1461
1440 if (core_waiters) 1462 if (core_waiters)
1441 wait_for_completion(&startup_done); 1463 wait_for_completion(&startup_done);
1464fail:
1442 BUG_ON(mm->core_waiters); 1465 BUG_ON(mm->core_waiters);
1466 return core_waiters;
1443} 1467}
1444 1468
1445int do_coredump(long signr, int exit_code, struct pt_regs * regs) 1469int do_coredump(long signr, int exit_code, struct pt_regs * regs)
@@ -1473,22 +1497,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1473 } 1497 }
1474 mm->dumpable = 0; 1498 mm->dumpable = 0;
1475 1499
1476 retval = -EAGAIN; 1500 retval = coredump_wait(exit_code);
1477 spin_lock_irq(&current->sighand->siglock); 1501 if (retval < 0)
1478 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
1479 current->signal->flags = SIGNAL_GROUP_EXIT;
1480 current->signal->group_exit_code = exit_code;
1481 current->signal->group_stop_count = 0;
1482 retval = 0;
1483 }
1484 spin_unlock_irq(&current->sighand->siglock);
1485 if (retval) {
1486 up_write(&mm->mmap_sem);
1487 goto fail; 1502 goto fail;
1488 }
1489
1490 init_completion(&mm->core_done);
1491 coredump_wait(mm);
1492 1503
1493 /* 1504 /*
1494 * Clear any false indication of pending signals that might 1505 * Clear any false indication of pending signals that might
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b2891cc29db1..b7483360a2db 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -630,7 +630,7 @@ enum {
630 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 630 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
631 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 631 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
632 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 632 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
633 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, 633 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
634 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 634 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
635 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 635 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
636 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 636 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -666,6 +666,7 @@ static match_table_t tokens = {
666 {Opt_noreservation, "noreservation"}, 666 {Opt_noreservation, "noreservation"},
667 {Opt_noload, "noload"}, 667 {Opt_noload, "noload"},
668 {Opt_nobh, "nobh"}, 668 {Opt_nobh, "nobh"},
669 {Opt_bh, "bh"},
669 {Opt_commit, "commit=%u"}, 670 {Opt_commit, "commit=%u"},
670 {Opt_journal_update, "journal=update"}, 671 {Opt_journal_update, "journal=update"},
671 {Opt_journal_inum, "journal=%u"}, 672 {Opt_journal_inum, "journal=%u"},
@@ -1014,6 +1015,9 @@ clear_qf_name:
1014 case Opt_nobh: 1015 case Opt_nobh:
1015 set_opt(sbi->s_mount_opt, NOBH); 1016 set_opt(sbi->s_mount_opt, NOBH);
1016 break; 1017 break;
1018 case Opt_bh:
1019 clear_opt(sbi->s_mount_opt, NOBH);
1020 break;
1017 default: 1021 default:
1018 printk (KERN_ERR 1022 printk (KERN_ERR
1019 "EXT3-fs: Unrecognized mount option \"%s\" " 1023 "EXT3-fs: Unrecognized mount option \"%s\" "
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 7f96b5cb6781..8c9b28dff119 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -34,6 +34,7 @@
34#include <linux/suspend.h> 34#include <linux/suspend.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/kthread.h> 36#include <linux/kthread.h>
37#include <linux/poison.h>
37#include <linux/proc_fs.h> 38#include <linux/proc_fs.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
@@ -1675,7 +1676,7 @@ static void journal_free_journal_head(struct journal_head *jh)
1675{ 1676{
1676#ifdef CONFIG_JBD_DEBUG 1677#ifdef CONFIG_JBD_DEBUG
1677 atomic_dec(&nr_journal_heads); 1678 atomic_dec(&nr_journal_heads);
1678 memset(jh, 0x5b, sizeof(*jh)); 1679 memset(jh, JBD_POISON_FREE, sizeof(*jh));
1679#endif 1680#endif
1680 kmem_cache_free(journal_head_cache, jh); 1681 kmem_cache_free(journal_head_cache, jh);
1681} 1682}
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 320dd48b834e..9c2077e7e081 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -267,6 +267,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
267 } 267 }
268 268
269 rc = do_jffs2_setxattr(inode, xprefix, "", value, size, 0); 269 rc = do_jffs2_setxattr(inode, xprefix, "", value, size, 0);
270 if (!value && rc == -ENODATA)
271 rc = 0;
270 if (value) 272 if (value)
271 kfree(value); 273 kfree(value);
272 if (!rc) { 274 if (!rc) {
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 1862e8bc101d..ad0121088dde 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -53,8 +53,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
53 if (!instr) { 53 if (!instr) {
54 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); 54 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n");
55 spin_lock(&c->erase_completion_lock); 55 spin_lock(&c->erase_completion_lock);
56 list_del(&jeb->list); 56 list_move(&jeb->list, &c->erase_pending_list);
57 list_add(&jeb->list, &c->erase_pending_list);
58 c->erasing_size -= c->sector_size; 57 c->erasing_size -= c->sector_size;
59 c->dirty_size += c->sector_size; 58 c->dirty_size += c->sector_size;
60 jeb->dirty_size = c->sector_size; 59 jeb->dirty_size = c->sector_size;
@@ -86,8 +85,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
86 /* Erase failed immediately. Refile it on the list */ 85 /* Erase failed immediately. Refile it on the list */
87 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret)); 86 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret));
88 spin_lock(&c->erase_completion_lock); 87 spin_lock(&c->erase_completion_lock);
89 list_del(&jeb->list); 88 list_move(&jeb->list, &c->erase_pending_list);
90 list_add(&jeb->list, &c->erase_pending_list);
91 c->erasing_size -= c->sector_size; 89 c->erasing_size -= c->sector_size;
92 c->dirty_size += c->sector_size; 90 c->dirty_size += c->sector_size;
93 jeb->dirty_size = c->sector_size; 91 jeb->dirty_size = c->sector_size;
@@ -161,8 +159,7 @@ static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblo
161{ 159{
162 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset)); 160 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset));
163 spin_lock(&c->erase_completion_lock); 161 spin_lock(&c->erase_completion_lock);
164 list_del(&jeb->list); 162 list_move_tail(&jeb->list, &c->erase_complete_list);
165 list_add_tail(&jeb->list, &c->erase_complete_list);
166 spin_unlock(&c->erase_completion_lock); 163 spin_unlock(&c->erase_completion_lock);
167 /* Ensure that kupdated calls us again to mark them clean */ 164 /* Ensure that kupdated calls us again to mark them clean */
168 jffs2_erase_pending_trigger(c); 165 jffs2_erase_pending_trigger(c);
@@ -178,8 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
178 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { 175 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
179 /* We'd like to give this block another try. */ 176 /* We'd like to give this block another try. */
180 spin_lock(&c->erase_completion_lock); 177 spin_lock(&c->erase_completion_lock);
181 list_del(&jeb->list); 178 list_move(&jeb->list, &c->erase_pending_list);
182 list_add(&jeb->list, &c->erase_pending_list);
183 c->erasing_size -= c->sector_size; 179 c->erasing_size -= c->sector_size;
184 c->dirty_size += c->sector_size; 180 c->dirty_size += c->sector_size;
185 jeb->dirty_size = c->sector_size; 181 jeb->dirty_size = c->sector_size;
@@ -191,8 +187,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
191 spin_lock(&c->erase_completion_lock); 187 spin_lock(&c->erase_completion_lock);
192 c->erasing_size -= c->sector_size; 188 c->erasing_size -= c->sector_size;
193 c->bad_size += c->sector_size; 189 c->bad_size += c->sector_size;
194 list_del(&jeb->list); 190 list_move(&jeb->list, &c->bad_list);
195 list_add(&jeb->list, &c->bad_list);
196 c->nr_erasing_blocks--; 191 c->nr_erasing_blocks--;
197 spin_unlock(&c->erase_completion_lock); 192 spin_unlock(&c->erase_completion_lock);
198 wake_up(&c->erase_wait); 193 wake_up(&c->erase_wait);
@@ -230,7 +225,6 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
230 at the end of the linked list. Stash it and continue 225 at the end of the linked list. Stash it and continue
231 from the beginning of the list */ 226 from the beginning of the list */
232 ic = (struct jffs2_inode_cache *)(*prev); 227 ic = (struct jffs2_inode_cache *)(*prev);
233 BUG_ON(ic->class != RAWNODE_CLASS_INODE_CACHE);
234 prev = &ic->nodes; 228 prev = &ic->nodes;
235 continue; 229 continue;
236 } 230 }
@@ -254,7 +248,8 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
254 248
255 /* PARANOIA */ 249 /* PARANOIA */
256 if (!ic) { 250 if (!ic) {
257 printk(KERN_WARNING "inode_cache not found in remove_node_refs()!!\n"); 251 JFFS2_WARNING("inode_cache/xattr_datum/xattr_ref"
252 " not found in remove_node_refs()!!\n");
258 return; 253 return;
259 } 254 }
260 255
@@ -279,8 +274,19 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
279 printk("\n"); 274 printk("\n");
280 }); 275 });
281 276
282 if (ic->nodes == (void *)ic && ic->nlink == 0) 277 switch (ic->class) {
283 jffs2_del_ino_cache(c, ic); 278#ifdef CONFIG_JFFS2_FS_XATTR
279 case RAWNODE_CLASS_XATTR_DATUM:
280 jffs2_release_xattr_datum(c, (struct jffs2_xattr_datum *)ic);
281 break;
282 case RAWNODE_CLASS_XATTR_REF:
283 jffs2_release_xattr_ref(c, (struct jffs2_xattr_ref *)ic);
284 break;
285#endif
286 default:
287 if (ic->nodes == (void *)ic && ic->nlink == 0)
288 jffs2_del_ino_cache(c, ic);
289 }
284} 290}
285 291
286void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 292void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 2900ec3ec3af..97caa77d60cf 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -227,8 +227,6 @@ void jffs2_clear_inode (struct inode *inode)
227 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 227 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
228 228
229 D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode)); 229 D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode));
230
231 jffs2_xattr_delete_inode(c, f->inocache);
232 jffs2_do_clear_inode(c, f); 230 jffs2_do_clear_inode(c, f);
233} 231}
234 232
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 477c526d638b..daff3341ff92 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -165,6 +165,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
165 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n", 165 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
166 ic->ino)); 166 ic->ino));
167 spin_unlock(&c->inocache_lock); 167 spin_unlock(&c->inocache_lock);
168 jffs2_xattr_delete_inode(c, ic);
168 continue; 169 continue;
169 } 170 }
170 switch(ic->state) { 171 switch(ic->state) {
@@ -275,13 +276,12 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
275 * We can decide whether this node is inode or xattr by ic->class. */ 276 * We can decide whether this node is inode or xattr by ic->class. */
276 if (ic->class == RAWNODE_CLASS_XATTR_DATUM 277 if (ic->class == RAWNODE_CLASS_XATTR_DATUM
277 || ic->class == RAWNODE_CLASS_XATTR_REF) { 278 || ic->class == RAWNODE_CLASS_XATTR_REF) {
278 BUG_ON(raw->next_in_ino != (void *)ic);
279 spin_unlock(&c->erase_completion_lock); 279 spin_unlock(&c->erase_completion_lock);
280 280
281 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) { 281 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
282 ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic); 282 ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
283 } else { 283 } else {
284 ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic); 284 ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
285 } 285 }
286 goto release_sem; 286 goto release_sem;
287 } 287 }
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 935fec1b1201..b98594992eed 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -119,8 +119,11 @@ struct jffs2_sb_info {
119#ifdef CONFIG_JFFS2_FS_XATTR 119#ifdef CONFIG_JFFS2_FS_XATTR
120#define XATTRINDEX_HASHSIZE (57) 120#define XATTRINDEX_HASHSIZE (57)
121 uint32_t highest_xid; 121 uint32_t highest_xid;
122 uint32_t highest_xseqno;
122 struct list_head xattrindex[XATTRINDEX_HASHSIZE]; 123 struct list_head xattrindex[XATTRINDEX_HASHSIZE];
123 struct list_head xattr_unchecked; 124 struct list_head xattr_unchecked;
125 struct list_head xattr_dead_list;
126 struct jffs2_xattr_ref *xref_dead_list;
124 struct jffs2_xattr_ref *xref_temp; 127 struct jffs2_xattr_ref *xref_temp;
125 struct rw_semaphore xattr_sem; 128 struct rw_semaphore xattr_sem;
126 uint32_t xdatum_mem_usage; 129 uint32_t xdatum_mem_usage;
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c
index 4889d0700c0e..8310c95478e9 100644
--- a/fs/jffs2/malloc.c
+++ b/fs/jffs2/malloc.c
@@ -291,6 +291,7 @@ struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void)
291 291
292 memset(xd, 0, sizeof(struct jffs2_xattr_datum)); 292 memset(xd, 0, sizeof(struct jffs2_xattr_datum));
293 xd->class = RAWNODE_CLASS_XATTR_DATUM; 293 xd->class = RAWNODE_CLASS_XATTR_DATUM;
294 xd->node = (void *)xd;
294 INIT_LIST_HEAD(&xd->xindex); 295 INIT_LIST_HEAD(&xd->xindex);
295 return xd; 296 return xd;
296} 297}
@@ -309,6 +310,7 @@ struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void)
309 310
310 memset(ref, 0, sizeof(struct jffs2_xattr_ref)); 311 memset(ref, 0, sizeof(struct jffs2_xattr_ref));
311 ref->class = RAWNODE_CLASS_XATTR_REF; 312 ref->class = RAWNODE_CLASS_XATTR_REF;
313 ref->node = (void *)ref;
312 return ref; 314 return ref;
313} 315}
314 316
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 927dfe42ba76..7675b33396c7 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -906,6 +906,9 @@ void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old)
906{ 906{
907 struct jffs2_inode_cache **prev; 907 struct jffs2_inode_cache **prev;
908 908
909#ifdef CONFIG_JFFS2_FS_XATTR
910 BUG_ON(old->xref);
911#endif
909 dbg_inocache("del %p (ino #%u)\n", old, old->ino); 912 dbg_inocache("del %p (ino #%u)\n", old, old->ino);
910 spin_lock(&c->inocache_lock); 913 spin_lock(&c->inocache_lock);
911 914
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 8bedfd2ff689..d88376992ed9 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -211,8 +211,7 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
211 struct jffs2_eraseblock *ejeb; 211 struct jffs2_eraseblock *ejeb;
212 212
213 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list); 213 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list);
214 list_del(&ejeb->list); 214 list_move_tail(&ejeb->list, &c->erase_pending_list);
215 list_add_tail(&ejeb->list, &c->erase_pending_list);
216 c->nr_erasing_blocks++; 215 c->nr_erasing_blocks++;
217 jffs2_erase_pending_trigger(c); 216 jffs2_erase_pending_trigger(c);
218 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n", 217 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n",
@@ -684,19 +683,26 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
684 spin_lock(&c->erase_completion_lock); 683 spin_lock(&c->erase_completion_lock);
685 684
686 ic = jffs2_raw_ref_to_ic(ref); 685 ic = jffs2_raw_ref_to_ic(ref);
687 /* It seems we should never call jffs2_mark_node_obsolete() for
688 XATTR nodes.... yet. Make sure we notice if/when we change
689 that :) */
690 BUG_ON(ic->class != RAWNODE_CLASS_INODE_CACHE);
691 for (p = &ic->nodes; (*p) != ref; p = &((*p)->next_in_ino)) 686 for (p = &ic->nodes; (*p) != ref; p = &((*p)->next_in_ino))
692 ; 687 ;
693 688
694 *p = ref->next_in_ino; 689 *p = ref->next_in_ino;
695 ref->next_in_ino = NULL; 690 ref->next_in_ino = NULL;
696 691
697 if (ic->nodes == (void *)ic && ic->nlink == 0) 692 switch (ic->class) {
698 jffs2_del_ino_cache(c, ic); 693#ifdef CONFIG_JFFS2_FS_XATTR
699 694 case RAWNODE_CLASS_XATTR_DATUM:
695 jffs2_release_xattr_datum(c, (struct jffs2_xattr_datum *)ic);
696 break;
697 case RAWNODE_CLASS_XATTR_REF:
698 jffs2_release_xattr_ref(c, (struct jffs2_xattr_ref *)ic);
699 break;
700#endif
701 default:
702 if (ic->nodes == (void *)ic && ic->nlink == 0)
703 jffs2_del_ino_cache(c, ic);
704 break;
705 }
700 spin_unlock(&c->erase_completion_lock); 706 spin_unlock(&c->erase_completion_lock);
701 } 707 }
702 708
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 5fec012b02ed..cc1899268c43 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -968,6 +968,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
968 struct jffs2_full_dirent *fd, *fds; 968 struct jffs2_full_dirent *fd, *fds;
969 int deleted; 969 int deleted;
970 970
971 jffs2_xattr_delete_inode(c, f->inocache);
971 down(&f->sem); 972 down(&f->sem);
972 deleted = f->inocache && !f->inocache->nlink; 973 deleted = f->inocache && !f->inocache->nlink;
973 974
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 61618080b86f..2bfdc33752d3 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -317,20 +317,23 @@ static int jffs2_scan_xattr_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
317 struct jffs2_summary *s) 317 struct jffs2_summary *s)
318{ 318{
319 struct jffs2_xattr_datum *xd; 319 struct jffs2_xattr_datum *xd;
320 uint32_t totlen, crc; 320 uint32_t xid, version, totlen, crc;
321 int err; 321 int err;
322 322
323 crc = crc32(0, rx, sizeof(struct jffs2_raw_xattr) - 4); 323 crc = crc32(0, rx, sizeof(struct jffs2_raw_xattr) - 4);
324 if (crc != je32_to_cpu(rx->node_crc)) { 324 if (crc != je32_to_cpu(rx->node_crc)) {
325 if (je32_to_cpu(rx->node_crc) != 0xffffffff) 325 JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
326 JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", 326 ofs, je32_to_cpu(rx->node_crc), crc);
327 ofs, je32_to_cpu(rx->node_crc), crc);
328 if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(rx->totlen)))) 327 if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(rx->totlen))))
329 return err; 328 return err;
330 return 0; 329 return 0;
331 } 330 }
332 331
333 totlen = PAD(sizeof(*rx) + rx->name_len + 1 + je16_to_cpu(rx->value_len)); 332 xid = je32_to_cpu(rx->xid);
333 version = je32_to_cpu(rx->version);
334
335 totlen = PAD(sizeof(struct jffs2_raw_xattr)
336 + rx->name_len + 1 + je16_to_cpu(rx->value_len));
334 if (totlen != je32_to_cpu(rx->totlen)) { 337 if (totlen != je32_to_cpu(rx->totlen)) {
335 JFFS2_WARNING("node length mismatch at %#08x, read=%u, calc=%u\n", 338 JFFS2_WARNING("node length mismatch at %#08x, read=%u, calc=%u\n",
336 ofs, je32_to_cpu(rx->totlen), totlen); 339 ofs, je32_to_cpu(rx->totlen), totlen);
@@ -339,22 +342,24 @@ static int jffs2_scan_xattr_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
339 return 0; 342 return 0;
340 } 343 }
341 344
342 xd = jffs2_setup_xattr_datum(c, je32_to_cpu(rx->xid), je32_to_cpu(rx->version)); 345 xd = jffs2_setup_xattr_datum(c, xid, version);
343 if (IS_ERR(xd)) { 346 if (IS_ERR(xd))
344 if (PTR_ERR(xd) == -EEXIST) {
345 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rx->totlen)))))
346 return err;
347 return 0;
348 }
349 return PTR_ERR(xd); 347 return PTR_ERR(xd);
350 }
351 xd->xprefix = rx->xprefix;
352 xd->name_len = rx->name_len;
353 xd->value_len = je16_to_cpu(rx->value_len);
354 xd->data_crc = je32_to_cpu(rx->data_crc);
355 348
356 xd->node = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, NULL); 349 if (xd->version > version) {
357 /* FIXME */ xd->node->next_in_ino = (void *)xd; 350 struct jffs2_raw_node_ref *raw
351 = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, NULL);
352 raw->next_in_ino = xd->node->next_in_ino;
353 xd->node->next_in_ino = raw;
354 } else {
355 xd->version = version;
356 xd->xprefix = rx->xprefix;
357 xd->name_len = rx->name_len;
358 xd->value_len = je16_to_cpu(rx->value_len);
359 xd->data_crc = je32_to_cpu(rx->data_crc);
360
361 jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, (void *)xd);
362 }
358 363
359 if (jffs2_sum_active()) 364 if (jffs2_sum_active())
360 jffs2_sum_add_xattr_mem(s, rx, ofs - jeb->offset); 365 jffs2_sum_add_xattr_mem(s, rx, ofs - jeb->offset);
@@ -373,9 +378,8 @@ static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock
373 378
374 crc = crc32(0, rr, sizeof(*rr) - 4); 379 crc = crc32(0, rr, sizeof(*rr) - 4);
375 if (crc != je32_to_cpu(rr->node_crc)) { 380 if (crc != je32_to_cpu(rr->node_crc)) {
376 if (je32_to_cpu(rr->node_crc) != 0xffffffff) 381 JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
377 JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", 382 ofs, je32_to_cpu(rr->node_crc), crc);
378 ofs, je32_to_cpu(rr->node_crc), crc);
379 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rr->totlen))))) 383 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rr->totlen)))))
380 return err; 384 return err;
381 return 0; 385 return 0;
@@ -395,6 +399,7 @@ static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock
395 return -ENOMEM; 399 return -ENOMEM;
396 400
397 /* BEFORE jffs2_build_xattr_subsystem() called, 401 /* BEFORE jffs2_build_xattr_subsystem() called,
402 * and AFTER xattr_ref is marked as a dead xref,
398 * ref->xid is used to store 32bit xid, xd is not used 403 * ref->xid is used to store 32bit xid, xd is not used
399 * ref->ino is used to store 32bit inode-number, ic is not used 404 * ref->ino is used to store 32bit inode-number, ic is not used
400 * Thoes variables are declared as union, thus using those 405 * Thoes variables are declared as union, thus using those
@@ -404,11 +409,13 @@ static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock
404 */ 409 */
405 ref->ino = je32_to_cpu(rr->ino); 410 ref->ino = je32_to_cpu(rr->ino);
406 ref->xid = je32_to_cpu(rr->xid); 411 ref->xid = je32_to_cpu(rr->xid);
412 ref->xseqno = je32_to_cpu(rr->xseqno);
413 if (ref->xseqno > c->highest_xseqno)
414 c->highest_xseqno = (ref->xseqno & ~XREF_DELETE_MARKER);
407 ref->next = c->xref_temp; 415 ref->next = c->xref_temp;
408 c->xref_temp = ref; 416 c->xref_temp = ref;
409 417
410 ref->node = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(rr->totlen)), NULL); 418 jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(rr->totlen)), (void *)ref);
411 /* FIXME */ ref->node->next_in_ino = (void *)ref;
412 419
413 if (jffs2_sum_active()) 420 if (jffs2_sum_active())
414 jffs2_sum_add_xref_mem(s, rr, ofs - jeb->offset); 421 jffs2_sum_add_xref_mem(s, rr, ofs - jeb->offset);
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 0b02fc79e4d1..c19bd476e8ec 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -5,7 +5,7 @@
5 * Zoltan Sogor <weth@inf.u-szeged.hu>, 5 * Zoltan Sogor <weth@inf.u-szeged.hu>,
6 * Patrik Kluba <pajko@halom.u-szeged.hu>, 6 * Patrik Kluba <pajko@halom.u-szeged.hu>,
7 * University of Szeged, Hungary 7 * University of Szeged, Hungary
8 * 2005 KaiGai Kohei <kaigai@ak.jp.nec.com> 8 * 2006 KaiGai Kohei <kaigai@ak.jp.nec.com>
9 * 9 *
10 * For licensing information, see the file 'LICENCE' in this directory. 10 * For licensing information, see the file 'LICENCE' in this directory.
11 * 11 *
@@ -43,7 +43,7 @@ int jffs2_sum_init(struct jffs2_sb_info *c)
43 return -ENOMEM; 43 return -ENOMEM;
44 } 44 }
45 45
46 dbg_summary("returned succesfully\n"); 46 dbg_summary("returned successfully\n");
47 47
48 return 0; 48 return 0;
49} 49}
@@ -310,8 +310,6 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs,
310#ifdef CONFIG_JFFS2_FS_XATTR 310#ifdef CONFIG_JFFS2_FS_XATTR
311 case JFFS2_NODETYPE_XATTR: { 311 case JFFS2_NODETYPE_XATTR: {
312 struct jffs2_sum_xattr_mem *temp; 312 struct jffs2_sum_xattr_mem *temp;
313 if (je32_to_cpu(node->x.version) == 0xffffffff)
314 return 0;
315 temp = kmalloc(sizeof(struct jffs2_sum_xattr_mem), GFP_KERNEL); 313 temp = kmalloc(sizeof(struct jffs2_sum_xattr_mem), GFP_KERNEL);
316 if (!temp) 314 if (!temp)
317 goto no_mem; 315 goto no_mem;
@@ -327,10 +325,6 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs,
327 } 325 }
328 case JFFS2_NODETYPE_XREF: { 326 case JFFS2_NODETYPE_XREF: {
329 struct jffs2_sum_xref_mem *temp; 327 struct jffs2_sum_xref_mem *temp;
330
331 if (je32_to_cpu(node->r.ino) == 0xffffffff
332 && je32_to_cpu(node->r.xid) == 0xffffffff)
333 return 0;
334 temp = kmalloc(sizeof(struct jffs2_sum_xref_mem), GFP_KERNEL); 328 temp = kmalloc(sizeof(struct jffs2_sum_xref_mem), GFP_KERNEL);
335 if (!temp) 329 if (!temp)
336 goto no_mem; 330 goto no_mem;
@@ -483,22 +477,20 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
483 477
484 xd = jffs2_setup_xattr_datum(c, je32_to_cpu(spx->xid), 478 xd = jffs2_setup_xattr_datum(c, je32_to_cpu(spx->xid),
485 je32_to_cpu(spx->version)); 479 je32_to_cpu(spx->version));
486 if (IS_ERR(xd)) { 480 if (IS_ERR(xd))
487 if (PTR_ERR(xd) == -EEXIST) {
488 /* a newer version of xd exists */
489 if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(spx->totlen))))
490 return err;
491 sp += JFFS2_SUMMARY_XATTR_SIZE;
492 break;
493 }
494 JFFS2_NOTICE("allocation of xattr_datum failed\n");
495 return PTR_ERR(xd); 481 return PTR_ERR(xd);
482 if (xd->version > je32_to_cpu(spx->version)) {
483 /* node is not the newest one */
484 struct jffs2_raw_node_ref *raw
485 = sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED,
486 PAD(je32_to_cpu(spx->totlen)), NULL);
487 raw->next_in_ino = xd->node->next_in_ino;
488 xd->node->next_in_ino = raw;
489 } else {
490 xd->version = je32_to_cpu(spx->version);
491 sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED,
492 PAD(je32_to_cpu(spx->totlen)), (void *)xd);
496 } 493 }
497
498 xd->node = sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED,
499 PAD(je32_to_cpu(spx->totlen)), NULL);
500 /* FIXME */ xd->node->next_in_ino = (void *)xd;
501
502 *pseudo_random += je32_to_cpu(spx->xid); 494 *pseudo_random += je32_to_cpu(spx->xid);
503 sp += JFFS2_SUMMARY_XATTR_SIZE; 495 sp += JFFS2_SUMMARY_XATTR_SIZE;
504 496
@@ -519,14 +511,11 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
519 JFFS2_NOTICE("allocation of xattr_datum failed\n"); 511 JFFS2_NOTICE("allocation of xattr_datum failed\n");
520 return -ENOMEM; 512 return -ENOMEM;
521 } 513 }
522 ref->ino = 0xfffffffe;
523 ref->xid = 0xfffffffd;
524 ref->next = c->xref_temp; 514 ref->next = c->xref_temp;
525 c->xref_temp = ref; 515 c->xref_temp = ref;
526 516
527 ref->node = sum_link_node_ref(c, jeb, je32_to_cpu(spr->offset) | REF_UNCHECKED, 517 sum_link_node_ref(c, jeb, je32_to_cpu(spr->offset) | REF_UNCHECKED,
528 PAD(sizeof(struct jffs2_raw_xref)), NULL); 518 PAD(sizeof(struct jffs2_raw_xref)), (void *)ref);
529 /* FIXME */ ref->node->next_in_ino = (void *)ref;
530 519
531 *pseudo_random += ref->node->flash_offset; 520 *pseudo_random += ref->node->flash_offset;
532 sp += JFFS2_SUMMARY_XREF_SIZE; 521 sp += JFFS2_SUMMARY_XREF_SIZE;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index a7f153f79ecb..b9b700730dfe 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -495,8 +495,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
495 /* Fix up the original jeb now it's on the bad_list */ 495 /* Fix up the original jeb now it's on the bad_list */
496 if (first_raw == jeb->first_node) { 496 if (first_raw == jeb->first_node) {
497 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset)); 497 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset));
498 list_del(&jeb->list); 498 list_move(&jeb->list, &c->erase_pending_list);
499 list_add(&jeb->list, &c->erase_pending_list);
500 c->nr_erasing_blocks++; 499 c->nr_erasing_blocks++;
501 jffs2_erase_pending_trigger(c); 500 jffs2_erase_pending_trigger(c);
502 } 501 }
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 2d82e250be34..18e66dbf23b4 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -23,18 +23,15 @@
23 * xattr_datum_hashkey(xprefix, xname, xvalue, xsize) 23 * xattr_datum_hashkey(xprefix, xname, xvalue, xsize)
24 * is used to calcurate xdatum hashkey. The reminder of hashkey into XATTRINDEX_HASHSIZE is 24 * is used to calcurate xdatum hashkey. The reminder of hashkey into XATTRINDEX_HASHSIZE is
25 * the index of the xattr name/value pair cache (c->xattrindex). 25 * the index of the xattr name/value pair cache (c->xattrindex).
26 * is_xattr_datum_unchecked(c, xd)
27 * returns 1, if xdatum contains any unchecked raw nodes. if all raw nodes are not
28 * unchecked, it returns 0.
26 * unload_xattr_datum(c, xd) 29 * unload_xattr_datum(c, xd)
27 * is used to release xattr name/value pair and detach from c->xattrindex. 30 * is used to release xattr name/value pair and detach from c->xattrindex.
28 * reclaim_xattr_datum(c) 31 * reclaim_xattr_datum(c)
29 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when 32 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when
30 * memory usage by cache is over c->xdatum_mem_threshold. Currentry, this threshold 33 * memory usage by cache is over c->xdatum_mem_threshold. Currentry, this threshold
31 * is hard coded as 32KiB. 34 * is hard coded as 32KiB.
32 * delete_xattr_datum_node(c, xd)
33 * is used to delete a jffs2 node is dominated by xdatum. When EBS(Erase Block Summary) is
34 * enabled, it overwrites the obsolete node by myself.
35 * delete_xattr_datum(c, xd)
36 * is used to delete jffs2_xattr_datum object. It must be called with 0-value of reference
37 * counter. (It means how many jffs2_xattr_ref object refers this xdatum.)
38 * do_verify_xattr_datum(c, xd) 35 * do_verify_xattr_datum(c, xd)
39 * is used to load the xdatum informations without name/value pair from the medium. 36 * is used to load the xdatum informations without name/value pair from the medium.
40 * It's necessary once, because those informations are not collected during mounting 37 * It's necessary once, because those informations are not collected during mounting
@@ -53,8 +50,10 @@
53 * is used to write xdatum to medium. xd->version will be incremented. 50 * is used to write xdatum to medium. xd->version will be incremented.
54 * create_xattr_datum(c, xprefix, xname, xvalue, xsize) 51 * create_xattr_datum(c, xprefix, xname, xvalue, xsize)
55 * is used to create new xdatum and write to medium. 52 * is used to create new xdatum and write to medium.
53 * delete_xattr_datum(c, xd)
54 * is used to delete a xdatum. It marks xd JFFS2_XFLAGS_DEAD, and allows
55 * GC to reclaim those physical nodes.
56 * -------------------------------------------------- */ 56 * -------------------------------------------------- */
57
58static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize) 57static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize)
59{ 58{
60 int name_len = strlen(xname); 59 int name_len = strlen(xname);
@@ -62,6 +61,22 @@ static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *
62 return crc32(xprefix, xname, name_len) ^ crc32(xprefix, xvalue, xsize); 61 return crc32(xprefix, xname, name_len) ^ crc32(xprefix, xvalue, xsize);
63} 62}
64 63
64static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
65{
66 struct jffs2_raw_node_ref *raw;
67 int rc = 0;
68
69 spin_lock(&c->erase_completion_lock);
70 for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) {
71 if (ref_flags(raw) == REF_UNCHECKED) {
72 rc = 1;
73 break;
74 }
75 }
76 spin_unlock(&c->erase_completion_lock);
77 return rc;
78}
79
65static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) 80static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
66{ 81{
67 /* must be called under down_write(xattr_sem) */ 82 /* must be called under down_write(xattr_sem) */
@@ -107,77 +122,33 @@ static void reclaim_xattr_datum(struct jffs2_sb_info *c)
107 before, c->xdatum_mem_usage, before - c->xdatum_mem_usage); 122 before, c->xdatum_mem_usage, before - c->xdatum_mem_usage);
108} 123}
109 124
110static void delete_xattr_datum_node(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
111{
112 /* must be called under down_write(xattr_sem) */
113 struct jffs2_raw_xattr rx;
114 size_t length;
115 int rc;
116
117 if (!xd->node) {
118 JFFS2_WARNING("xdatum (xid=%u) is removed twice.\n", xd->xid);
119 return;
120 }
121 if (jffs2_sum_active()) {
122 memset(&rx, 0xff, sizeof(struct jffs2_raw_xattr));
123 rc = jffs2_flash_read(c, ref_offset(xd->node),
124 sizeof(struct jffs2_unknown_node),
125 &length, (char *)&rx);
126 if (rc || length != sizeof(struct jffs2_unknown_node)) {
127 JFFS2_ERROR("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n",
128 rc, sizeof(struct jffs2_unknown_node),
129 length, ref_offset(xd->node));
130 }
131 rc = jffs2_flash_write(c, ref_offset(xd->node), sizeof(rx),
132 &length, (char *)&rx);
133 if (rc || length != sizeof(struct jffs2_raw_xattr)) {
134 JFFS2_ERROR("jffs2_flash_write()=%d, req=%zu, wrote=%zu ar %#08x\n",
135 rc, sizeof(rx), length, ref_offset(xd->node));
136 }
137 }
138 spin_lock(&c->erase_completion_lock);
139 xd->node->next_in_ino = NULL;
140 spin_unlock(&c->erase_completion_lock);
141 jffs2_mark_node_obsolete(c, xd->node);
142 xd->node = NULL;
143}
144
145static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
146{
147 /* must be called under down_write(xattr_sem) */
148 BUG_ON(xd->refcnt);
149
150 unload_xattr_datum(c, xd);
151 if (xd->node) {
152 delete_xattr_datum_node(c, xd);
153 xd->node = NULL;
154 }
155 jffs2_free_xattr_datum(xd);
156}
157
158static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) 125static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
159{ 126{
160 /* must be called under down_write(xattr_sem) */ 127 /* must be called under down_write(xattr_sem) */
161 struct jffs2_eraseblock *jeb; 128 struct jffs2_eraseblock *jeb;
129 struct jffs2_raw_node_ref *raw;
162 struct jffs2_raw_xattr rx; 130 struct jffs2_raw_xattr rx;
163 size_t readlen; 131 size_t readlen;
164 uint32_t crc, totlen; 132 uint32_t crc, offset, totlen;
165 int rc; 133 int rc;
166 134
167 BUG_ON(!xd->node); 135 spin_lock(&c->erase_completion_lock);
168 BUG_ON(ref_flags(xd->node) != REF_UNCHECKED); 136 offset = ref_offset(xd->node);
137 if (ref_flags(xd->node) == REF_PRISTINE)
138 goto complete;
139 spin_unlock(&c->erase_completion_lock);
169 140
170 rc = jffs2_flash_read(c, ref_offset(xd->node), sizeof(rx), &readlen, (char *)&rx); 141 rc = jffs2_flash_read(c, offset, sizeof(rx), &readlen, (char *)&rx);
171 if (rc || readlen != sizeof(rx)) { 142 if (rc || readlen != sizeof(rx)) {
172 JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n", 143 JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n",
173 rc, sizeof(rx), readlen, ref_offset(xd->node)); 144 rc, sizeof(rx), readlen, offset);
174 return rc ? rc : -EIO; 145 return rc ? rc : -EIO;
175 } 146 }
176 crc = crc32(0, &rx, sizeof(rx) - 4); 147 crc = crc32(0, &rx, sizeof(rx) - 4);
177 if (crc != je32_to_cpu(rx.node_crc)) { 148 if (crc != je32_to_cpu(rx.node_crc)) {
178 if (je32_to_cpu(rx.node_crc) != 0xffffffff) 149 JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
179 JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", 150 offset, je32_to_cpu(rx.hdr_crc), crc);
180 ref_offset(xd->node), je32_to_cpu(rx.hdr_crc), crc); 151 xd->flags |= JFFS2_XFLAGS_INVALID;
181 return EIO; 152 return EIO;
182 } 153 }
183 totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len)); 154 totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len));
@@ -188,11 +159,12 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
188 || je32_to_cpu(rx.version) != xd->version) { 159 || je32_to_cpu(rx.version) != xd->version) {
189 JFFS2_ERROR("inconsistent xdatum at %#08x, magic=%#04x/%#04x, " 160 JFFS2_ERROR("inconsistent xdatum at %#08x, magic=%#04x/%#04x, "
190 "nodetype=%#04x/%#04x, totlen=%u/%u, xid=%u/%u, version=%u/%u\n", 161 "nodetype=%#04x/%#04x, totlen=%u/%u, xid=%u/%u, version=%u/%u\n",
191 ref_offset(xd->node), je16_to_cpu(rx.magic), JFFS2_MAGIC_BITMASK, 162 offset, je16_to_cpu(rx.magic), JFFS2_MAGIC_BITMASK,
192 je16_to_cpu(rx.nodetype), JFFS2_NODETYPE_XATTR, 163 je16_to_cpu(rx.nodetype), JFFS2_NODETYPE_XATTR,
193 je32_to_cpu(rx.totlen), totlen, 164 je32_to_cpu(rx.totlen), totlen,
194 je32_to_cpu(rx.xid), xd->xid, 165 je32_to_cpu(rx.xid), xd->xid,
195 je32_to_cpu(rx.version), xd->version); 166 je32_to_cpu(rx.version), xd->version);
167 xd->flags |= JFFS2_XFLAGS_INVALID;
196 return EIO; 168 return EIO;
197 } 169 }
198 xd->xprefix = rx.xprefix; 170 xd->xprefix = rx.xprefix;
@@ -200,14 +172,17 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
200 xd->value_len = je16_to_cpu(rx.value_len); 172 xd->value_len = je16_to_cpu(rx.value_len);
201 xd->data_crc = je32_to_cpu(rx.data_crc); 173 xd->data_crc = je32_to_cpu(rx.data_crc);
202 174
203 /* This JFFS2_NODETYPE_XATTR node is checked */
204 jeb = &c->blocks[ref_offset(xd->node) / c->sector_size];
205 totlen = PAD(je32_to_cpu(rx.totlen));
206
207 spin_lock(&c->erase_completion_lock); 175 spin_lock(&c->erase_completion_lock);
208 c->unchecked_size -= totlen; c->used_size += totlen; 176 complete:
209 jeb->unchecked_size -= totlen; jeb->used_size += totlen; 177 for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) {
210 xd->node->flash_offset = ref_offset(xd->node) | REF_PRISTINE; 178 jeb = &c->blocks[ref_offset(raw) / c->sector_size];
179 totlen = PAD(ref_totlen(c, jeb, raw));
180 if (ref_flags(raw) == REF_UNCHECKED) {
181 c->unchecked_size -= totlen; c->used_size += totlen;
182 jeb->unchecked_size -= totlen; jeb->used_size += totlen;
183 }
184 raw->flash_offset = ref_offset(raw) | ((xd->node==raw) ? REF_PRISTINE : REF_NORMAL);
185 }
211 spin_unlock(&c->erase_completion_lock); 186 spin_unlock(&c->erase_completion_lock);
212 187
213 /* unchecked xdatum is chained with c->xattr_unchecked */ 188 /* unchecked xdatum is chained with c->xattr_unchecked */
@@ -227,7 +202,6 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum
227 uint32_t crc, length; 202 uint32_t crc, length;
228 int i, ret, retry = 0; 203 int i, ret, retry = 0;
229 204
230 BUG_ON(!xd->node);
231 BUG_ON(ref_flags(xd->node) != REF_PRISTINE); 205 BUG_ON(ref_flags(xd->node) != REF_PRISTINE);
232 BUG_ON(!list_empty(&xd->xindex)); 206 BUG_ON(!list_empty(&xd->xindex));
233 retry: 207 retry:
@@ -253,6 +227,7 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum
253 " at %#08x, read: 0x%08x calculated: 0x%08x\n", 227 " at %#08x, read: 0x%08x calculated: 0x%08x\n",
254 ref_offset(xd->node), xd->data_crc, crc); 228 ref_offset(xd->node), xd->data_crc, crc);
255 kfree(data); 229 kfree(data);
230 xd->flags |= JFFS2_XFLAGS_INVALID;
256 return EIO; 231 return EIO;
257 } 232 }
258 233
@@ -286,16 +261,14 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x
286 * rc > 0 : Unrecoverable error, this node should be deleted. 261 * rc > 0 : Unrecoverable error, this node should be deleted.
287 */ 262 */
288 int rc = 0; 263 int rc = 0;
289 BUG_ON(xd->xname); 264
290 if (!xd->node) 265 BUG_ON(xd->flags & JFFS2_XFLAGS_DEAD);
266 if (xd->xname)
267 return 0;
268 if (xd->flags & JFFS2_XFLAGS_INVALID)
291 return EIO; 269 return EIO;
292 if (unlikely(ref_flags(xd->node) != REF_PRISTINE)) { 270 if (unlikely(is_xattr_datum_unchecked(c, xd)))
293 rc = do_verify_xattr_datum(c, xd); 271 rc = do_verify_xattr_datum(c, xd);
294 if (rc > 0) {
295 list_del_init(&xd->xindex);
296 delete_xattr_datum_node(c, xd);
297 }
298 }
299 if (!rc) 272 if (!rc)
300 rc = do_load_xattr_datum(c, xd); 273 rc = do_load_xattr_datum(c, xd);
301 return rc; 274 return rc;
@@ -304,7 +277,6 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x
304static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) 277static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
305{ 278{
306 /* must be called under down_write(xattr_sem) */ 279 /* must be called under down_write(xattr_sem) */
307 struct jffs2_raw_node_ref *raw;
308 struct jffs2_raw_xattr rx; 280 struct jffs2_raw_xattr rx;
309 struct kvec vecs[2]; 281 struct kvec vecs[2];
310 size_t length; 282 size_t length;
@@ -312,14 +284,16 @@ static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x
312 uint32_t phys_ofs = write_ofs(c); 284 uint32_t phys_ofs = write_ofs(c);
313 285
314 BUG_ON(!xd->xname); 286 BUG_ON(!xd->xname);
287 BUG_ON(xd->flags & (JFFS2_XFLAGS_DEAD|JFFS2_XFLAGS_INVALID));
315 288
316 vecs[0].iov_base = &rx; 289 vecs[0].iov_base = &rx;
317 vecs[0].iov_len = PAD(sizeof(rx)); 290 vecs[0].iov_len = sizeof(rx);
318 vecs[1].iov_base = xd->xname; 291 vecs[1].iov_base = xd->xname;
319 vecs[1].iov_len = xd->name_len + 1 + xd->value_len; 292 vecs[1].iov_len = xd->name_len + 1 + xd->value_len;
320 totlen = vecs[0].iov_len + vecs[1].iov_len; 293 totlen = vecs[0].iov_len + vecs[1].iov_len;
321 294
322 /* Setup raw-xattr */ 295 /* Setup raw-xattr */
296 memset(&rx, 0, sizeof(rx));
323 rx.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 297 rx.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
324 rx.nodetype = cpu_to_je16(JFFS2_NODETYPE_XATTR); 298 rx.nodetype = cpu_to_je16(JFFS2_NODETYPE_XATTR);
325 rx.totlen = cpu_to_je32(PAD(totlen)); 299 rx.totlen = cpu_to_je32(PAD(totlen));
@@ -343,14 +317,8 @@ static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x
343 317
344 return rc; 318 return rc;
345 } 319 }
346
347 /* success */ 320 /* success */
348 raw = jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(totlen), NULL); 321 jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(totlen), (void *)xd);
349 /* FIXME */ raw->next_in_ino = (void *)xd;
350
351 if (xd->node)
352 delete_xattr_datum_node(c, xd);
353 xd->node = raw;
354 322
355 dbg_xattr("success on saving xdatum (xid=%u, version=%u, xprefix=%u, xname='%s')\n", 323 dbg_xattr("success on saving xdatum (xid=%u, version=%u, xprefix=%u, xname='%s')\n",
356 xd->xid, xd->version, xd->xprefix, xd->xname); 324 xd->xid, xd->version, xd->xprefix, xd->xname);
@@ -377,7 +345,7 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
377 && xd->value_len==xsize 345 && xd->value_len==xsize
378 && !strcmp(xd->xname, xname) 346 && !strcmp(xd->xname, xname)
379 && !memcmp(xd->xvalue, xvalue, xsize)) { 347 && !memcmp(xd->xvalue, xvalue, xsize)) {
380 xd->refcnt++; 348 atomic_inc(&xd->refcnt);
381 return xd; 349 return xd;
382 } 350 }
383 } 351 }
@@ -397,7 +365,7 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
397 strcpy(data, xname); 365 strcpy(data, xname);
398 memcpy(data + name_len + 1, xvalue, xsize); 366 memcpy(data + name_len + 1, xvalue, xsize);
399 367
400 xd->refcnt = 1; 368 atomic_set(&xd->refcnt, 1);
401 xd->xid = ++c->highest_xid; 369 xd->xid = ++c->highest_xid;
402 xd->flags |= JFFS2_XFLAGS_HOT; 370 xd->flags |= JFFS2_XFLAGS_HOT;
403 xd->xprefix = xprefix; 371 xd->xprefix = xprefix;
@@ -426,20 +394,36 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
426 return xd; 394 return xd;
427} 395}
428 396
397static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
398{
399 /* must be called under down_write(xattr_sem) */
400 BUG_ON(atomic_read(&xd->refcnt));
401
402 unload_xattr_datum(c, xd);
403 xd->flags |= JFFS2_XFLAGS_DEAD;
404 spin_lock(&c->erase_completion_lock);
405 if (xd->node == (void *)xd) {
406 BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID));
407 jffs2_free_xattr_datum(xd);
408 } else {
409 list_add(&xd->xindex, &c->xattr_dead_list);
410 }
411 spin_unlock(&c->erase_completion_lock);
412 dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xd->xid, xd->version);
413}
414
429/* -------- xref related functions ------------------ 415/* -------- xref related functions ------------------
430 * verify_xattr_ref(c, ref) 416 * verify_xattr_ref(c, ref)
431 * is used to load xref information from medium. Because summary data does not 417 * is used to load xref information from medium. Because summary data does not
432 * contain xid/ino, it's necessary to verify once while mounting process. 418 * contain xid/ino, it's necessary to verify once while mounting process.
433 * delete_xattr_ref_node(c, ref)
434 * is used to delete a jffs2 node is dominated by xref. When EBS is enabled,
435 * it overwrites the obsolete node by myself.
436 * delete_xattr_ref(c, ref)
437 * is used to delete jffs2_xattr_ref object. If the reference counter of xdatum
438 * is refered by this xref become 0, delete_xattr_datum() is called later.
439 * save_xattr_ref(c, ref) 419 * save_xattr_ref(c, ref)
440 * is used to write xref to medium. 420 * is used to write xref to medium. If delete marker is marked, it write
421 * a delete marker of xref into medium.
441 * create_xattr_ref(c, ic, xd) 422 * create_xattr_ref(c, ic, xd)
442 * is used to create a new xref and write to medium. 423 * is used to create a new xref and write to medium.
424 * delete_xattr_ref(c, ref)
425 * is used to delete jffs2_xattr_ref. It marks xref XREF_DELETE_MARKER,
426 * and allows GC to reclaim those physical nodes.
443 * jffs2_xattr_delete_inode(c, ic) 427 * jffs2_xattr_delete_inode(c, ic)
444 * is called to remove xrefs related to obsolete inode when inode is unlinked. 428 * is called to remove xrefs related to obsolete inode when inode is unlinked.
445 * jffs2_xattr_free_inode(c, ic) 429 * jffs2_xattr_free_inode(c, ic)
@@ -450,25 +434,29 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
450static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) 434static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
451{ 435{
452 struct jffs2_eraseblock *jeb; 436 struct jffs2_eraseblock *jeb;
437 struct jffs2_raw_node_ref *raw;
453 struct jffs2_raw_xref rr; 438 struct jffs2_raw_xref rr;
454 size_t readlen; 439 size_t readlen;
455 uint32_t crc, totlen; 440 uint32_t crc, offset, totlen;
456 int rc; 441 int rc;
457 442
458 BUG_ON(ref_flags(ref->node) != REF_UNCHECKED); 443 spin_lock(&c->erase_completion_lock);
444 if (ref_flags(ref->node) != REF_UNCHECKED)
445 goto complete;
446 offset = ref_offset(ref->node);
447 spin_unlock(&c->erase_completion_lock);
459 448
460 rc = jffs2_flash_read(c, ref_offset(ref->node), sizeof(rr), &readlen, (char *)&rr); 449 rc = jffs2_flash_read(c, offset, sizeof(rr), &readlen, (char *)&rr);
461 if (rc || sizeof(rr) != readlen) { 450 if (rc || sizeof(rr) != readlen) {
462 JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu, at %#08x\n", 451 JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu, at %#08x\n",
463 rc, sizeof(rr), readlen, ref_offset(ref->node)); 452 rc, sizeof(rr), readlen, offset);
464 return rc ? rc : -EIO; 453 return rc ? rc : -EIO;
465 } 454 }
466 /* obsolete node */ 455 /* obsolete node */
467 crc = crc32(0, &rr, sizeof(rr) - 4); 456 crc = crc32(0, &rr, sizeof(rr) - 4);
468 if (crc != je32_to_cpu(rr.node_crc)) { 457 if (crc != je32_to_cpu(rr.node_crc)) {
469 if (je32_to_cpu(rr.node_crc) != 0xffffffff) 458 JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
470 JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", 459 offset, je32_to_cpu(rr.node_crc), crc);
471 ref_offset(ref->node), je32_to_cpu(rr.node_crc), crc);
472 return EIO; 460 return EIO;
473 } 461 }
474 if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK 462 if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK
@@ -476,22 +464,28 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref
476 || je32_to_cpu(rr.totlen) != PAD(sizeof(rr))) { 464 || je32_to_cpu(rr.totlen) != PAD(sizeof(rr))) {
477 JFFS2_ERROR("inconsistent xref at %#08x, magic=%#04x/%#04x, " 465 JFFS2_ERROR("inconsistent xref at %#08x, magic=%#04x/%#04x, "
478 "nodetype=%#04x/%#04x, totlen=%u/%zu\n", 466 "nodetype=%#04x/%#04x, totlen=%u/%zu\n",
479 ref_offset(ref->node), je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK, 467 offset, je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK,
480 je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF, 468 je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF,
481 je32_to_cpu(rr.totlen), PAD(sizeof(rr))); 469 je32_to_cpu(rr.totlen), PAD(sizeof(rr)));
482 return EIO; 470 return EIO;
483 } 471 }
484 ref->ino = je32_to_cpu(rr.ino); 472 ref->ino = je32_to_cpu(rr.ino);
485 ref->xid = je32_to_cpu(rr.xid); 473 ref->xid = je32_to_cpu(rr.xid);
486 474 ref->xseqno = je32_to_cpu(rr.xseqno);
487 /* fixup superblock/eraseblock info */ 475 if (ref->xseqno > c->highest_xseqno)
488 jeb = &c->blocks[ref_offset(ref->node) / c->sector_size]; 476 c->highest_xseqno = (ref->xseqno & ~XREF_DELETE_MARKER);
489 totlen = PAD(sizeof(rr));
490 477
491 spin_lock(&c->erase_completion_lock); 478 spin_lock(&c->erase_completion_lock);
492 c->unchecked_size -= totlen; c->used_size += totlen; 479 complete:
493 jeb->unchecked_size -= totlen; jeb->used_size += totlen; 480 for (raw=ref->node; raw != (void *)ref; raw=raw->next_in_ino) {
494 ref->node->flash_offset = ref_offset(ref->node) | REF_PRISTINE; 481 jeb = &c->blocks[ref_offset(raw) / c->sector_size];
482 totlen = PAD(ref_totlen(c, jeb, raw));
483 if (ref_flags(raw) == REF_UNCHECKED) {
484 c->unchecked_size -= totlen; c->used_size += totlen;
485 jeb->unchecked_size -= totlen; jeb->used_size += totlen;
486 }
487 raw->flash_offset = ref_offset(raw) | ((ref->node==raw) ? REF_PRISTINE : REF_NORMAL);
488 }
495 spin_unlock(&c->erase_completion_lock); 489 spin_unlock(&c->erase_completion_lock);
496 490
497 dbg_xattr("success on verifying xref (ino=%u, xid=%u) at %#08x\n", 491 dbg_xattr("success on verifying xref (ino=%u, xid=%u) at %#08x\n",
@@ -499,58 +493,12 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref
499 return 0; 493 return 0;
500} 494}
501 495
502static void delete_xattr_ref_node(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
503{
504 struct jffs2_raw_xref rr;
505 size_t length;
506 int rc;
507
508 if (jffs2_sum_active()) {
509 memset(&rr, 0xff, sizeof(rr));
510 rc = jffs2_flash_read(c, ref_offset(ref->node),
511 sizeof(struct jffs2_unknown_node),
512 &length, (char *)&rr);
513 if (rc || length != sizeof(struct jffs2_unknown_node)) {
514 JFFS2_ERROR("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n",
515 rc, sizeof(struct jffs2_unknown_node),
516 length, ref_offset(ref->node));
517 }
518 rc = jffs2_flash_write(c, ref_offset(ref->node), sizeof(rr),
519 &length, (char *)&rr);
520 if (rc || length != sizeof(struct jffs2_raw_xref)) {
521 JFFS2_ERROR("jffs2_flash_write()=%d, req=%zu, wrote=%zu at %#08x\n",
522 rc, sizeof(rr), length, ref_offset(ref->node));
523 }
524 }
525 spin_lock(&c->erase_completion_lock);
526 ref->node->next_in_ino = NULL;
527 spin_unlock(&c->erase_completion_lock);
528 jffs2_mark_node_obsolete(c, ref->node);
529 ref->node = NULL;
530}
531
532static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
533{
534 /* must be called under down_write(xattr_sem) */
535 struct jffs2_xattr_datum *xd;
536
537 BUG_ON(!ref->node);
538 delete_xattr_ref_node(c, ref);
539
540 xd = ref->xd;
541 xd->refcnt--;
542 if (!xd->refcnt)
543 delete_xattr_datum(c, xd);
544 jffs2_free_xattr_ref(ref);
545}
546
547static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) 496static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
548{ 497{
549 /* must be called under down_write(xattr_sem) */ 498 /* must be called under down_write(xattr_sem) */
550 struct jffs2_raw_node_ref *raw;
551 struct jffs2_raw_xref rr; 499 struct jffs2_raw_xref rr;
552 size_t length; 500 size_t length;
553 uint32_t phys_ofs = write_ofs(c); 501 uint32_t xseqno, phys_ofs = write_ofs(c);
554 int ret; 502 int ret;
555 503
556 rr.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 504 rr.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -558,8 +506,16 @@ static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
558 rr.totlen = cpu_to_je32(PAD(sizeof(rr))); 506 rr.totlen = cpu_to_je32(PAD(sizeof(rr)));
559 rr.hdr_crc = cpu_to_je32(crc32(0, &rr, sizeof(struct jffs2_unknown_node) - 4)); 507 rr.hdr_crc = cpu_to_je32(crc32(0, &rr, sizeof(struct jffs2_unknown_node) - 4));
560 508
561 rr.ino = cpu_to_je32(ref->ic->ino); 509 xseqno = (c->highest_xseqno += 2);
562 rr.xid = cpu_to_je32(ref->xd->xid); 510 if (is_xattr_ref_dead(ref)) {
511 xseqno |= XREF_DELETE_MARKER;
512 rr.ino = cpu_to_je32(ref->ino);
513 rr.xid = cpu_to_je32(ref->xid);
514 } else {
515 rr.ino = cpu_to_je32(ref->ic->ino);
516 rr.xid = cpu_to_je32(ref->xd->xid);
517 }
518 rr.xseqno = cpu_to_je32(xseqno);
563 rr.node_crc = cpu_to_je32(crc32(0, &rr, sizeof(rr) - 4)); 519 rr.node_crc = cpu_to_je32(crc32(0, &rr, sizeof(rr) - 4));
564 520
565 ret = jffs2_flash_write(c, phys_ofs, sizeof(rr), &length, (char *)&rr); 521 ret = jffs2_flash_write(c, phys_ofs, sizeof(rr), &length, (char *)&rr);
@@ -572,12 +528,9 @@ static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
572 528
573 return ret; 529 return ret;
574 } 530 }
575 531 /* success */
576 raw = jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(sizeof(rr)), NULL); 532 ref->xseqno = xseqno;
577 /* FIXME */ raw->next_in_ino = (void *)ref; 533 jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(sizeof(rr)), (void *)ref);
578 if (ref->node)
579 delete_xattr_ref_node(c, ref);
580 ref->node = raw;
581 534
582 dbg_xattr("success on saving xref (ino=%u, xid=%u)\n", ref->ic->ino, ref->xd->xid); 535 dbg_xattr("success on saving xref (ino=%u, xid=%u)\n", ref->ic->ino, ref->xd->xid);
583 536
@@ -610,6 +563,27 @@ static struct jffs2_xattr_ref *create_xattr_ref(struct jffs2_sb_info *c, struct
610 return ref; /* success */ 563 return ref; /* success */
611} 564}
612 565
566static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
567{
568 /* must be called under down_write(xattr_sem) */
569 struct jffs2_xattr_datum *xd;
570
571 xd = ref->xd;
572 ref->xseqno |= XREF_DELETE_MARKER;
573 ref->ino = ref->ic->ino;
574 ref->xid = ref->xd->xid;
575 spin_lock(&c->erase_completion_lock);
576 ref->next = c->xref_dead_list;
577 c->xref_dead_list = ref;
578 spin_unlock(&c->erase_completion_lock);
579
580 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) was removed.\n",
581 ref->ino, ref->xid, ref->xseqno);
582
583 if (atomic_dec_and_test(&xd->refcnt))
584 delete_xattr_datum(c, xd);
585}
586
613void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) 587void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
614{ 588{
615 /* It's called from jffs2_clear_inode() on inode removing. 589 /* It's called from jffs2_clear_inode() on inode removing.
@@ -638,8 +612,7 @@ void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i
638 for (ref = ic->xref; ref; ref = _ref) { 612 for (ref = ic->xref; ref; ref = _ref) {
639 _ref = ref->next; 613 _ref = ref->next;
640 xd = ref->xd; 614 xd = ref->xd;
641 xd->refcnt--; 615 if (atomic_dec_and_test(&xd->refcnt)) {
642 if (!xd->refcnt) {
643 unload_xattr_datum(c, xd); 616 unload_xattr_datum(c, xd);
644 jffs2_free_xattr_datum(xd); 617 jffs2_free_xattr_datum(xd);
645 } 618 }
@@ -655,7 +628,7 @@ static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cac
655 * duplicate name/value pairs. If duplicate name/value pair would be found, 628 * duplicate name/value pairs. If duplicate name/value pair would be found,
656 * one will be removed. 629 * one will be removed.
657 */ 630 */
658 struct jffs2_xattr_ref *ref, *cmp, **pref; 631 struct jffs2_xattr_ref *ref, *cmp, **pref, **pcmp;
659 int rc = 0; 632 int rc = 0;
660 633
661 if (likely(ic->flags & INO_FLAGS_XATTR_CHECKED)) 634 if (likely(ic->flags & INO_FLAGS_XATTR_CHECKED))
@@ -673,13 +646,13 @@ static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cac
673 } else if (unlikely(rc < 0)) 646 } else if (unlikely(rc < 0))
674 goto out; 647 goto out;
675 } 648 }
676 for (cmp=ref->next, pref=&ref->next; cmp; pref=&cmp->next, cmp=cmp->next) { 649 for (cmp=ref->next, pcmp=&ref->next; cmp; pcmp=&cmp->next, cmp=cmp->next) {
677 if (!cmp->xd->xname) { 650 if (!cmp->xd->xname) {
678 ref->xd->flags |= JFFS2_XFLAGS_BIND; 651 ref->xd->flags |= JFFS2_XFLAGS_BIND;
679 rc = load_xattr_datum(c, cmp->xd); 652 rc = load_xattr_datum(c, cmp->xd);
680 ref->xd->flags &= ~JFFS2_XFLAGS_BIND; 653 ref->xd->flags &= ~JFFS2_XFLAGS_BIND;
681 if (unlikely(rc > 0)) { 654 if (unlikely(rc > 0)) {
682 *pref = cmp->next; 655 *pcmp = cmp->next;
683 delete_xattr_ref(c, cmp); 656 delete_xattr_ref(c, cmp);
684 goto retry; 657 goto retry;
685 } else if (unlikely(rc < 0)) 658 } else if (unlikely(rc < 0))
@@ -687,8 +660,13 @@ static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cac
687 } 660 }
688 if (ref->xd->xprefix == cmp->xd->xprefix 661 if (ref->xd->xprefix == cmp->xd->xprefix
689 && !strcmp(ref->xd->xname, cmp->xd->xname)) { 662 && !strcmp(ref->xd->xname, cmp->xd->xname)) {
690 *pref = cmp->next; 663 if (ref->xseqno > cmp->xseqno) {
691 delete_xattr_ref(c, cmp); 664 *pcmp = cmp->next;
665 delete_xattr_ref(c, cmp);
666 } else {
667 *pref = ref->next;
668 delete_xattr_ref(c, ref);
669 }
692 goto retry; 670 goto retry;
693 } 671 }
694 } 672 }
@@ -719,9 +697,13 @@ void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c)
719 for (i=0; i < XATTRINDEX_HASHSIZE; i++) 697 for (i=0; i < XATTRINDEX_HASHSIZE; i++)
720 INIT_LIST_HEAD(&c->xattrindex[i]); 698 INIT_LIST_HEAD(&c->xattrindex[i]);
721 INIT_LIST_HEAD(&c->xattr_unchecked); 699 INIT_LIST_HEAD(&c->xattr_unchecked);
700 INIT_LIST_HEAD(&c->xattr_dead_list);
701 c->xref_dead_list = NULL;
722 c->xref_temp = NULL; 702 c->xref_temp = NULL;
723 703
724 init_rwsem(&c->xattr_sem); 704 init_rwsem(&c->xattr_sem);
705 c->highest_xid = 0;
706 c->highest_xseqno = 0;
725 c->xdatum_mem_usage = 0; 707 c->xdatum_mem_usage = 0;
726 c->xdatum_mem_threshold = 32 * 1024; /* Default 32KB */ 708 c->xdatum_mem_threshold = 32 * 1024; /* Default 32KB */
727} 709}
@@ -751,7 +733,11 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
751 _ref = ref->next; 733 _ref = ref->next;
752 jffs2_free_xattr_ref(ref); 734 jffs2_free_xattr_ref(ref);
753 } 735 }
754 c->xref_temp = NULL; 736
737 for (ref=c->xref_dead_list; ref; ref = _ref) {
738 _ref = ref->next;
739 jffs2_free_xattr_ref(ref);
740 }
755 741
756 for (i=0; i < XATTRINDEX_HASHSIZE; i++) { 742 for (i=0; i < XATTRINDEX_HASHSIZE; i++) {
757 list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) { 743 list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) {
@@ -761,100 +747,143 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
761 jffs2_free_xattr_datum(xd); 747 jffs2_free_xattr_datum(xd);
762 } 748 }
763 } 749 }
750
751 list_for_each_entry_safe(xd, _xd, &c->xattr_dead_list, xindex) {
752 list_del(&xd->xindex);
753 jffs2_free_xattr_datum(xd);
754 }
764} 755}
765 756
757#define XREF_TMPHASH_SIZE (128)
766void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) 758void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
767{ 759{
768 struct jffs2_xattr_ref *ref, *_ref; 760 struct jffs2_xattr_ref *ref, *_ref;
761 struct jffs2_xattr_ref *xref_tmphash[XREF_TMPHASH_SIZE];
769 struct jffs2_xattr_datum *xd, *_xd; 762 struct jffs2_xattr_datum *xd, *_xd;
770 struct jffs2_inode_cache *ic; 763 struct jffs2_inode_cache *ic;
771 int i, xdatum_count =0, xdatum_unchecked_count = 0, xref_count = 0; 764 struct jffs2_raw_node_ref *raw;
765 int i, xdatum_count = 0, xdatum_unchecked_count = 0, xref_count = 0;
766 int xdatum_orphan_count = 0, xref_orphan_count = 0, xref_dead_count = 0;
772 767
773 BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING)); 768 BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING));
774 769
775 /* Phase.1 */ 770 /* Phase.1 : Merge same xref */
771 for (i=0; i < XREF_TMPHASH_SIZE; i++)
772 xref_tmphash[i] = NULL;
776 for (ref=c->xref_temp; ref; ref=_ref) { 773 for (ref=c->xref_temp; ref; ref=_ref) {
774 struct jffs2_xattr_ref *tmp;
775
777 _ref = ref->next; 776 _ref = ref->next;
778 /* checking REF_UNCHECKED nodes */
779 if (ref_flags(ref->node) != REF_PRISTINE) { 777 if (ref_flags(ref->node) != REF_PRISTINE) {
780 if (verify_xattr_ref(c, ref)) { 778 if (verify_xattr_ref(c, ref)) {
781 delete_xattr_ref_node(c, ref); 779 BUG_ON(ref->node->next_in_ino != (void *)ref);
780 ref->node->next_in_ino = NULL;
781 jffs2_mark_node_obsolete(c, ref->node);
782 jffs2_free_xattr_ref(ref); 782 jffs2_free_xattr_ref(ref);
783 continue; 783 continue;
784 } 784 }
785 } 785 }
786 /* At this point, ref->xid and ref->ino contain XID and inode number. 786
787 ref->xd and ref->ic are not valid yet. */ 787 i = (ref->ino ^ ref->xid) % XREF_TMPHASH_SIZE;
788 xd = jffs2_find_xattr_datum(c, ref->xid); 788 for (tmp=xref_tmphash[i]; tmp; tmp=tmp->next) {
789 ic = jffs2_get_ino_cache(c, ref->ino); 789 if (tmp->ino == ref->ino && tmp->xid == ref->xid)
790 if (!xd || !ic) { 790 break;
791 if (ref_flags(ref->node) != REF_UNCHECKED) 791 }
792 JFFS2_WARNING("xref(ino=%u, xid=%u) is orphan. \n", 792 if (tmp) {
793 ref->ino, ref->xid); 793 raw = ref->node;
794 delete_xattr_ref_node(c, ref); 794 if (ref->xseqno > tmp->xseqno) {
795 tmp->xseqno = ref->xseqno;
796 raw->next_in_ino = tmp->node;
797 tmp->node = raw;
798 } else {
799 raw->next_in_ino = tmp->node->next_in_ino;
800 tmp->node->next_in_ino = raw;
801 }
795 jffs2_free_xattr_ref(ref); 802 jffs2_free_xattr_ref(ref);
796 continue; 803 continue;
804 } else {
805 ref->next = xref_tmphash[i];
806 xref_tmphash[i] = ref;
797 } 807 }
798 ref->xd = xd;
799 ref->ic = ic;
800 xd->refcnt++;
801 ref->next = ic->xref;
802 ic->xref = ref;
803 xref_count++;
804 } 808 }
805 c->xref_temp = NULL; 809 c->xref_temp = NULL;
806 /* After this, ref->xid/ino are NEVER used. */
807 810
808 /* Phase.2 */ 811 /* Phase.2 : Bind xref with inode_cache and xattr_datum */
812 for (i=0; i < XREF_TMPHASH_SIZE; i++) {
813 for (ref=xref_tmphash[i]; ref; ref=_ref) {
814 xref_count++;
815 _ref = ref->next;
816 if (is_xattr_ref_dead(ref)) {
817 ref->next = c->xref_dead_list;
818 c->xref_dead_list = ref;
819 xref_dead_count++;
820 continue;
821 }
822 /* At this point, ref->xid and ref->ino contain XID and inode number.
823 ref->xd and ref->ic are not valid yet. */
824 xd = jffs2_find_xattr_datum(c, ref->xid);
825 ic = jffs2_get_ino_cache(c, ref->ino);
826 if (!xd || !ic) {
827 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n",
828 ref->ino, ref->xid, ref->xseqno);
829 ref->xseqno |= XREF_DELETE_MARKER;
830 ref->next = c->xref_dead_list;
831 c->xref_dead_list = ref;
832 xref_orphan_count++;
833 continue;
834 }
835 ref->xd = xd;
836 ref->ic = ic;
837 atomic_inc(&xd->refcnt);
838 ref->next = ic->xref;
839 ic->xref = ref;
840 }
841 }
842
843 /* Phase.3 : Link unchecked xdatum to xattr_unchecked list */
809 for (i=0; i < XATTRINDEX_HASHSIZE; i++) { 844 for (i=0; i < XATTRINDEX_HASHSIZE; i++) {
810 list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) { 845 list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) {
846 xdatum_count++;
811 list_del_init(&xd->xindex); 847 list_del_init(&xd->xindex);
812 if (!xd->refcnt) { 848 if (!atomic_read(&xd->refcnt)) {
813 if (ref_flags(xd->node) != REF_UNCHECKED) 849 dbg_xattr("xdatum(xid=%u, version=%u) is orphan.\n",
814 JFFS2_WARNING("orphan xdatum(xid=%u, version=%u) at %#08x\n", 850 xd->xid, xd->version);
815 xd->xid, xd->version, ref_offset(xd->node)); 851 xd->flags |= JFFS2_XFLAGS_DEAD;
816 delete_xattr_datum(c, xd); 852 list_add(&xd->xindex, &c->xattr_unchecked);
853 xdatum_orphan_count++;
817 continue; 854 continue;
818 } 855 }
819 if (ref_flags(xd->node) != REF_PRISTINE) { 856 if (is_xattr_datum_unchecked(c, xd)) {
820 dbg_xattr("unchecked xdatum(xid=%u) at %#08x\n", 857 dbg_xattr("unchecked xdatum(xid=%u, version=%u)\n",
821 xd->xid, ref_offset(xd->node)); 858 xd->xid, xd->version);
822 list_add(&xd->xindex, &c->xattr_unchecked); 859 list_add(&xd->xindex, &c->xattr_unchecked);
823 xdatum_unchecked_count++; 860 xdatum_unchecked_count++;
824 } 861 }
825 xdatum_count++;
826 } 862 }
827 } 863 }
828 /* build complete */ 864 /* build complete */
829 JFFS2_NOTICE("complete building xattr subsystem, %u of xdatum (%u unchecked) and " 865 JFFS2_NOTICE("complete building xattr subsystem, %u of xdatum"
830 "%u of xref found.\n", xdatum_count, xdatum_unchecked_count, xref_count); 866 " (%u unchecked, %u orphan) and "
867 "%u of xref (%u dead, %u orphan) found.\n",
868 xdatum_count, xdatum_unchecked_count, xdatum_orphan_count,
869 xref_count, xref_dead_count, xref_orphan_count);
831} 870}
832 871
833struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, 872struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
834 uint32_t xid, uint32_t version) 873 uint32_t xid, uint32_t version)
835{ 874{
836 struct jffs2_xattr_datum *xd, *_xd; 875 struct jffs2_xattr_datum *xd;
837 876
838 _xd = jffs2_find_xattr_datum(c, xid); 877 xd = jffs2_find_xattr_datum(c, xid);
839 if (_xd) { 878 if (!xd) {
840 dbg_xattr("duplicate xdatum (xid=%u, version=%u/%u) at %#08x\n", 879 xd = jffs2_alloc_xattr_datum();
841 xid, version, _xd->version, ref_offset(_xd->node)); 880 if (!xd)
842 if (version < _xd->version) 881 return ERR_PTR(-ENOMEM);
843 return ERR_PTR(-EEXIST); 882 xd->xid = xid;
844 } 883 xd->version = version;
845 xd = jffs2_alloc_xattr_datum(); 884 if (xd->xid > c->highest_xid)
846 if (!xd) 885 c->highest_xid = xd->xid;
847 return ERR_PTR(-ENOMEM); 886 list_add_tail(&xd->xindex, &c->xattrindex[xid % XATTRINDEX_HASHSIZE]);
848 xd->xid = xid;
849 xd->version = version;
850 if (xd->xid > c->highest_xid)
851 c->highest_xid = xd->xid;
852 list_add_tail(&xd->xindex, &c->xattrindex[xid % XATTRINDEX_HASHSIZE]);
853
854 if (_xd) {
855 list_del_init(&_xd->xindex);
856 delete_xattr_datum_node(c, _xd);
857 jffs2_free_xattr_datum(_xd);
858 } 887 }
859 return xd; 888 return xd;
860} 889}
@@ -1080,9 +1109,23 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
1080 goto out; 1109 goto out;
1081 } 1110 }
1082 if (!buffer) { 1111 if (!buffer) {
1083 *pref = ref->next; 1112 ref->ino = ic->ino;
1084 delete_xattr_ref(c, ref); 1113 ref->xid = xd->xid;
1085 rc = 0; 1114 ref->xseqno |= XREF_DELETE_MARKER;
1115 rc = save_xattr_ref(c, ref);
1116 if (!rc) {
1117 *pref = ref->next;
1118 spin_lock(&c->erase_completion_lock);
1119 ref->next = c->xref_dead_list;
1120 c->xref_dead_list = ref;
1121 spin_unlock(&c->erase_completion_lock);
1122 if (atomic_dec_and_test(&xd->refcnt))
1123 delete_xattr_datum(c, xd);
1124 } else {
1125 ref->ic = ic;
1126 ref->xd = xd;
1127 ref->xseqno &= ~XREF_DELETE_MARKER;
1128 }
1086 goto out; 1129 goto out;
1087 } 1130 }
1088 goto found; 1131 goto found;
@@ -1094,7 +1137,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
1094 goto out; 1137 goto out;
1095 } 1138 }
1096 if (!buffer) { 1139 if (!buffer) {
1097 rc = -EINVAL; 1140 rc = -ENODATA;
1098 goto out; 1141 goto out;
1099 } 1142 }
1100 found: 1143 found:
@@ -1110,16 +1153,14 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
1110 request = PAD(sizeof(struct jffs2_raw_xref)); 1153 request = PAD(sizeof(struct jffs2_raw_xref));
1111 rc = jffs2_reserve_space(c, request, &length, 1154 rc = jffs2_reserve_space(c, request, &length,
1112 ALLOC_NORMAL, JFFS2_SUMMARY_XREF_SIZE); 1155 ALLOC_NORMAL, JFFS2_SUMMARY_XREF_SIZE);
1156 down_write(&c->xattr_sem);
1113 if (rc) { 1157 if (rc) {
1114 JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request); 1158 JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request);
1115 down_write(&c->xattr_sem); 1159 if (atomic_dec_and_test(&xd->refcnt))
1116 xd->refcnt--;
1117 if (!xd->refcnt)
1118 delete_xattr_datum(c, xd); 1160 delete_xattr_datum(c, xd);
1119 up_write(&c->xattr_sem); 1161 up_write(&c->xattr_sem);
1120 return rc; 1162 return rc;
1121 } 1163 }
1122 down_write(&c->xattr_sem);
1123 if (ref) 1164 if (ref)
1124 *pref = ref->next; 1165 *pref = ref->next;
1125 newref = create_xattr_ref(c, ic, xd); 1166 newref = create_xattr_ref(c, ic, xd);
@@ -1129,8 +1170,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
1129 ic->xref = ref; 1170 ic->xref = ref;
1130 } 1171 }
1131 rc = PTR_ERR(newref); 1172 rc = PTR_ERR(newref);
1132 xd->refcnt--; 1173 if (atomic_dec_and_test(&xd->refcnt))
1133 if (!xd->refcnt)
1134 delete_xattr_datum(c, xd); 1174 delete_xattr_datum(c, xd);
1135 } else if (ref) { 1175 } else if (ref) {
1136 delete_xattr_ref(c, ref); 1176 delete_xattr_ref(c, ref);
@@ -1142,38 +1182,40 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
1142} 1182}
1143 1183
1144/* -------- garbage collector functions ------------- 1184/* -------- garbage collector functions -------------
1145 * jffs2_garbage_collect_xattr_datum(c, xd) 1185 * jffs2_garbage_collect_xattr_datum(c, xd, raw)
1146 * is used to move xdatum into new node. 1186 * is used to move xdatum into new node.
1147 * jffs2_garbage_collect_xattr_ref(c, ref) 1187 * jffs2_garbage_collect_xattr_ref(c, ref, raw)
1148 * is used to move xref into new node. 1188 * is used to move xref into new node.
1149 * jffs2_verify_xattr(c) 1189 * jffs2_verify_xattr(c)
1150 * is used to call do_verify_xattr_datum() before garbage collecting. 1190 * is used to call do_verify_xattr_datum() before garbage collecting.
1191 * jffs2_release_xattr_datum(c, xd)
1192 * is used to release an in-memory object of xdatum.
1193 * jffs2_release_xattr_ref(c, ref)
1194 * is used to release an in-memory object of xref.
1151 * -------------------------------------------------- */ 1195 * -------------------------------------------------- */
1152int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) 1196int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd,
1197 struct jffs2_raw_node_ref *raw)
1153{ 1198{
1154 uint32_t totlen, length, old_ofs; 1199 uint32_t totlen, length, old_ofs;
1155 int rc = -EINVAL; 1200 int rc = 0;
1156 1201
1157 down_write(&c->xattr_sem); 1202 down_write(&c->xattr_sem);
1158 BUG_ON(!xd->node); 1203 if (xd->node != raw)
1159 1204 goto out;
1160 old_ofs = ref_offset(xd->node); 1205 if (xd->flags & (JFFS2_XFLAGS_DEAD|JFFS2_XFLAGS_INVALID))
1161 totlen = ref_totlen(c, c->gcblock, xd->node);
1162 if (totlen < sizeof(struct jffs2_raw_xattr))
1163 goto out; 1206 goto out;
1164 1207
1165 if (!xd->xname) { 1208 rc = load_xattr_datum(c, xd);
1166 rc = load_xattr_datum(c, xd); 1209 if (unlikely(rc)) {
1167 if (unlikely(rc > 0)) { 1210 rc = (rc > 0) ? 0 : rc;
1168 delete_xattr_datum_node(c, xd); 1211 goto out;
1169 rc = 0;
1170 goto out;
1171 } else if (unlikely(rc < 0))
1172 goto out;
1173 } 1212 }
1213 old_ofs = ref_offset(xd->node);
1214 totlen = PAD(sizeof(struct jffs2_raw_xattr)
1215 + xd->name_len + 1 + xd->value_len);
1174 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE); 1216 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE);
1175 if (rc || length < totlen) { 1217 if (rc) {
1176 JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, totlen); 1218 JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen);
1177 rc = rc ? rc : -EBADFD; 1219 rc = rc ? rc : -EBADFD;
1178 goto out; 1220 goto out;
1179 } 1221 }
@@ -1182,27 +1224,32 @@ int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xatt
1182 dbg_xattr("xdatum (xid=%u, version=%u) GC'ed from %#08x to %08x\n", 1224 dbg_xattr("xdatum (xid=%u, version=%u) GC'ed from %#08x to %08x\n",
1183 xd->xid, xd->version, old_ofs, ref_offset(xd->node)); 1225 xd->xid, xd->version, old_ofs, ref_offset(xd->node));
1184 out: 1226 out:
1227 if (!rc)
1228 jffs2_mark_node_obsolete(c, raw);
1185 up_write(&c->xattr_sem); 1229 up_write(&c->xattr_sem);
1186 return rc; 1230 return rc;
1187} 1231}
1188 1232
1189 1233int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref,
1190int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) 1234 struct jffs2_raw_node_ref *raw)
1191{ 1235{
1192 uint32_t totlen, length, old_ofs; 1236 uint32_t totlen, length, old_ofs;
1193 int rc = -EINVAL; 1237 int rc = 0;
1194 1238
1195 down_write(&c->xattr_sem); 1239 down_write(&c->xattr_sem);
1196 BUG_ON(!ref->node); 1240 BUG_ON(!ref->node);
1197 1241
1242 if (ref->node != raw)
1243 goto out;
1244 if (is_xattr_ref_dead(ref) && (raw->next_in_ino == (void *)ref))
1245 goto out;
1246
1198 old_ofs = ref_offset(ref->node); 1247 old_ofs = ref_offset(ref->node);
1199 totlen = ref_totlen(c, c->gcblock, ref->node); 1248 totlen = ref_totlen(c, c->gcblock, ref->node);
1200 if (totlen != sizeof(struct jffs2_raw_xref))
1201 goto out;
1202 1249
1203 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE); 1250 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE);
1204 if (rc || length < totlen) { 1251 if (rc) {
1205 JFFS2_WARNING("%s: jffs2_reserve_space() = %d, request = %u\n", 1252 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
1206 __FUNCTION__, rc, totlen); 1253 __FUNCTION__, rc, totlen);
1207 rc = rc ? rc : -EBADFD; 1254 rc = rc ? rc : -EBADFD;
1208 goto out; 1255 goto out;
@@ -1212,6 +1259,8 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
1212 dbg_xattr("xref (ino=%u, xid=%u) GC'ed from %#08x to %08x\n", 1259 dbg_xattr("xref (ino=%u, xid=%u) GC'ed from %#08x to %08x\n",
1213 ref->ic->ino, ref->xd->xid, old_ofs, ref_offset(ref->node)); 1260 ref->ic->ino, ref->xd->xid, old_ofs, ref_offset(ref->node));
1214 out: 1261 out:
1262 if (!rc)
1263 jffs2_mark_node_obsolete(c, raw);
1215 up_write(&c->xattr_sem); 1264 up_write(&c->xattr_sem);
1216 return rc; 1265 return rc;
1217} 1266}
@@ -1219,20 +1268,59 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
1219int jffs2_verify_xattr(struct jffs2_sb_info *c) 1268int jffs2_verify_xattr(struct jffs2_sb_info *c)
1220{ 1269{
1221 struct jffs2_xattr_datum *xd, *_xd; 1270 struct jffs2_xattr_datum *xd, *_xd;
1271 struct jffs2_eraseblock *jeb;
1272 struct jffs2_raw_node_ref *raw;
1273 uint32_t totlen;
1222 int rc; 1274 int rc;
1223 1275
1224 down_write(&c->xattr_sem); 1276 down_write(&c->xattr_sem);
1225 list_for_each_entry_safe(xd, _xd, &c->xattr_unchecked, xindex) { 1277 list_for_each_entry_safe(xd, _xd, &c->xattr_unchecked, xindex) {
1226 rc = do_verify_xattr_datum(c, xd); 1278 rc = do_verify_xattr_datum(c, xd);
1227 if (rc == 0) { 1279 if (rc < 0)
1228 list_del_init(&xd->xindex); 1280 continue;
1229 break; 1281 list_del_init(&xd->xindex);
1230 } else if (rc > 0) { 1282 spin_lock(&c->erase_completion_lock);
1231 list_del_init(&xd->xindex); 1283 for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) {
1232 delete_xattr_datum_node(c, xd); 1284 if (ref_flags(raw) != REF_UNCHECKED)
1285 continue;
1286 jeb = &c->blocks[ref_offset(raw) / c->sector_size];
1287 totlen = PAD(ref_totlen(c, jeb, raw));
1288 c->unchecked_size -= totlen; c->used_size += totlen;
1289 jeb->unchecked_size -= totlen; jeb->used_size += totlen;
1290 raw->flash_offset = ref_offset(raw)
1291 | ((xd->node == (void *)raw) ? REF_PRISTINE : REF_NORMAL);
1233 } 1292 }
1293 if (xd->flags & JFFS2_XFLAGS_DEAD)
1294 list_add(&xd->xindex, &c->xattr_dead_list);
1295 spin_unlock(&c->erase_completion_lock);
1234 } 1296 }
1235 up_write(&c->xattr_sem); 1297 up_write(&c->xattr_sem);
1236
1237 return list_empty(&c->xattr_unchecked) ? 1 : 0; 1298 return list_empty(&c->xattr_unchecked) ? 1 : 0;
1238} 1299}
1300
1301void jffs2_release_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
1302{
1303 /* must be called under spin_lock(&c->erase_completion_lock) */
1304 if (atomic_read(&xd->refcnt) || xd->node != (void *)xd)
1305 return;
1306
1307 list_del(&xd->xindex);
1308 jffs2_free_xattr_datum(xd);
1309}
1310
1311void jffs2_release_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
1312{
1313 /* must be called under spin_lock(&c->erase_completion_lock) */
1314 struct jffs2_xattr_ref *tmp, **ptmp;
1315
1316 if (ref->node != (void *)ref)
1317 return;
1318
1319 for (tmp=c->xref_dead_list, ptmp=&c->xref_dead_list; tmp; ptmp=&tmp->next, tmp=tmp->next) {
1320 if (ref == tmp) {
1321 *ptmp = tmp->next;
1322 break;
1323 }
1324 }
1325 jffs2_free_xattr_ref(ref);
1326}
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
index 2c199856c582..06a5c69dcf8b 100644
--- a/fs/jffs2/xattr.h
+++ b/fs/jffs2/xattr.h
@@ -16,6 +16,8 @@
16 16
17#define JFFS2_XFLAGS_HOT (0x01) /* This datum is HOT */ 17#define JFFS2_XFLAGS_HOT (0x01) /* This datum is HOT */
18#define JFFS2_XFLAGS_BIND (0x02) /* This datum is not reclaimed */ 18#define JFFS2_XFLAGS_BIND (0x02) /* This datum is not reclaimed */
19#define JFFS2_XFLAGS_DEAD (0x40) /* This datum is already dead */
20#define JFFS2_XFLAGS_INVALID (0x80) /* This datum contains crc error */
19 21
20struct jffs2_xattr_datum 22struct jffs2_xattr_datum
21{ 23{
@@ -23,10 +25,10 @@ struct jffs2_xattr_datum
23 struct jffs2_raw_node_ref *node; 25 struct jffs2_raw_node_ref *node;
24 uint8_t class; 26 uint8_t class;
25 uint8_t flags; 27 uint8_t flags;
26 uint16_t xprefix; /* see JFFS2_XATTR_PREFIX_* */ 28 uint16_t xprefix; /* see JFFS2_XATTR_PREFIX_* */
27 29
28 struct list_head xindex; /* chained from c->xattrindex[n] */ 30 struct list_head xindex; /* chained from c->xattrindex[n] */
29 uint32_t refcnt; /* # of xattr_ref refers this */ 31 atomic_t refcnt; /* # of xattr_ref refers this */
30 uint32_t xid; 32 uint32_t xid;
31 uint32_t version; 33 uint32_t version;
32 34
@@ -47,6 +49,7 @@ struct jffs2_xattr_ref
47 uint8_t flags; /* Currently unused */ 49 uint8_t flags; /* Currently unused */
48 u16 unused; 50 u16 unused;
49 51
52 uint32_t xseqno;
50 union { 53 union {
51 struct jffs2_inode_cache *ic; /* reference to jffs2_inode_cache */ 54 struct jffs2_inode_cache *ic; /* reference to jffs2_inode_cache */
52 uint32_t ino; /* only used in scanning/building */ 55 uint32_t ino; /* only used in scanning/building */
@@ -58,6 +61,12 @@ struct jffs2_xattr_ref
58 struct jffs2_xattr_ref *next; /* chained from ic->xref_list */ 61 struct jffs2_xattr_ref *next; /* chained from ic->xref_list */
59}; 62};
60 63
64#define XREF_DELETE_MARKER (0x00000001)
65static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref)
66{
67 return ((ref->xseqno & XREF_DELETE_MARKER) != 0);
68}
69
61#ifdef CONFIG_JFFS2_FS_XATTR 70#ifdef CONFIG_JFFS2_FS_XATTR
62 71
63extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c); 72extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c);
@@ -70,9 +79,13 @@ extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c
70extern void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic); 79extern void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic);
71extern void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic); 80extern void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic);
72 81
73extern int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd); 82extern int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd,
74extern int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref); 83 struct jffs2_raw_node_ref *raw);
84extern int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref,
85 struct jffs2_raw_node_ref *raw);
75extern int jffs2_verify_xattr(struct jffs2_sb_info *c); 86extern int jffs2_verify_xattr(struct jffs2_sb_info *c);
87extern void jffs2_release_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd);
88extern void jffs2_release_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref);
76 89
77extern int do_jffs2_getxattr(struct inode *inode, int xprefix, const char *xname, 90extern int do_jffs2_getxattr(struct inode *inode, int xprefix, const char *xname,
78 char *buffer, size_t size); 91 char *buffer, size_t size);
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 5549378358bf..4d52593a5fc6 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -126,7 +126,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
126 126
127 /* allocate the disk blocks for the extent. initially, extBalloc() 127 /* allocate the disk blocks for the extent. initially, extBalloc()
128 * will try to allocate disk blocks for the requested size (xlen). 128 * will try to allocate disk blocks for the requested size (xlen).
129 * if this fails (xlen contigious free blocks not avaliable), it'll 129 * if this fails (xlen contiguous free blocks not avaliable), it'll
130 * try to allocate a smaller number of blocks (producing a smaller 130 * try to allocate a smaller number of blocks (producing a smaller
131 * extent), with this smaller number of blocks consisting of the 131 * extent), with this smaller number of blocks consisting of the
132 * requested number of blocks rounded down to the next smaller 132 * requested number of blocks rounded down to the next smaller
@@ -493,7 +493,7 @@ int extFill(struct inode *ip, xad_t * xp)
493 * 493 *
494 * initially, we will try to allocate disk blocks for the 494 * initially, we will try to allocate disk blocks for the
495 * requested size (nblocks). if this fails (nblocks 495 * requested size (nblocks). if this fails (nblocks
496 * contigious free blocks not avaliable), we'll try to allocate 496 * contiguous free blocks not avaliable), we'll try to allocate
497 * a smaller number of blocks (producing a smaller extent), with 497 * a smaller number of blocks (producing a smaller extent), with
498 * this smaller number of blocks consisting of the requested 498 * this smaller number of blocks consisting of the requested
499 * number of blocks rounded down to the next smaller power of 2 499 * number of blocks rounded down to the next smaller power of 2
@@ -529,7 +529,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
529 529
530 /* get the number of blocks to initially attempt to allocate. 530 /* get the number of blocks to initially attempt to allocate.
531 * we'll first try the number of blocks requested unless this 531 * we'll first try the number of blocks requested unless this
532 * number is greater than the maximum number of contigious free 532 * number is greater than the maximum number of contiguous free
533 * blocks in the map. in that case, we'll start off with the 533 * blocks in the map. in that case, we'll start off with the
534 * maximum free. 534 * maximum free.
535 */ 535 */
@@ -586,7 +586,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
586 * in place. if this fails, we'll try to move the extent 586 * in place. if this fails, we'll try to move the extent
587 * to a new set of blocks. if moving the extent, we initially 587 * to a new set of blocks. if moving the extent, we initially
588 * will try to allocate disk blocks for the requested size 588 * will try to allocate disk blocks for the requested size
589 * (nnew). if this fails (nnew contigious free blocks not 589 * (nnew). if this fails (new contiguous free blocks not
590 * avaliable), we'll try to allocate a smaller number of 590 * avaliable), we'll try to allocate a smaller number of
591 * blocks (producing a smaller extent), with this smaller 591 * blocks (producing a smaller extent), with this smaller
592 * number of blocks consisting of the requested number of 592 * number of blocks consisting of the requested number of
diff --git a/fs/libfs.c b/fs/libfs.c
index fc785d8befb9..ac02ea602c3d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -149,10 +149,9 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
149 /* fallthrough */ 149 /* fallthrough */
150 default: 150 default:
151 spin_lock(&dcache_lock); 151 spin_lock(&dcache_lock);
152 if (filp->f_pos == 2) { 152 if (filp->f_pos == 2)
153 list_del(q); 153 list_move(q, &dentry->d_subdirs);
154 list_add(q, &dentry->d_subdirs); 154
155 }
156 for (p=q->next; p != &dentry->d_subdirs; p=p->next) { 155 for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
157 struct dentry *next; 156 struct dentry *next;
158 next = list_entry(p, struct dentry, d_u.d_child); 157 next = list_entry(p, struct dentry, d_u.d_child);
@@ -164,8 +163,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
164 return 0; 163 return 0;
165 spin_lock(&dcache_lock); 164 spin_lock(&dcache_lock);
166 /* next is still alive */ 165 /* next is still alive */
167 list_del(q); 166 list_move(q, p);
168 list_add(q, p);
169 p = q; 167 p = q;
170 filp->f_pos++; 168 filp->f_pos++;
171 } 169 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 866430bb024d..b3ed212ea416 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -526,10 +526,8 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
526{ 526{
527 struct vfsmount *p; 527 struct vfsmount *p;
528 528
529 for (p = mnt; p; p = next_mnt(p, mnt)) { 529 for (p = mnt; p; p = next_mnt(p, mnt))
530 list_del(&p->mnt_hash); 530 list_move(&p->mnt_hash, kill);
531 list_add(&p->mnt_hash, kill);
532 }
533 531
534 if (propagate) 532 if (propagate)
535 propagate_umount(kill); 533 propagate_umount(kill);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 402005c35ab3..8ca9707be6c9 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -909,7 +909,7 @@ int __init nfs_init_directcache(void)
909 * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures 909 * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures
910 * 910 *
911 */ 911 */
912void __exit nfs_destroy_directcache(void) 912void nfs_destroy_directcache(void)
913{ 913{
914 if (kmem_cache_destroy(nfs_direct_cachep)) 914 if (kmem_cache_destroy(nfs_direct_cachep))
915 printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n"); 915 printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 51bc88b662fe..c5b916605fb0 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1132,7 +1132,7 @@ static int __init nfs_init_inodecache(void)
1132 return 0; 1132 return 0;
1133} 1133}
1134 1134
1135static void __exit nfs_destroy_inodecache(void) 1135static void nfs_destroy_inodecache(void)
1136{ 1136{
1137 if (kmem_cache_destroy(nfs_inode_cachep)) 1137 if (kmem_cache_destroy(nfs_inode_cachep))
1138 printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n"); 1138 printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n");
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index bd2815e2dec1..4fe51c1292bb 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -31,15 +31,15 @@ extern struct svc_version nfs4_callback_version1;
31 31
32/* pagelist.c */ 32/* pagelist.c */
33extern int __init nfs_init_nfspagecache(void); 33extern int __init nfs_init_nfspagecache(void);
34extern void __exit nfs_destroy_nfspagecache(void); 34extern void nfs_destroy_nfspagecache(void);
35extern int __init nfs_init_readpagecache(void); 35extern int __init nfs_init_readpagecache(void);
36extern void __exit nfs_destroy_readpagecache(void); 36extern void nfs_destroy_readpagecache(void);
37extern int __init nfs_init_writepagecache(void); 37extern int __init nfs_init_writepagecache(void);
38extern void __exit nfs_destroy_writepagecache(void); 38extern void nfs_destroy_writepagecache(void);
39 39
40#ifdef CONFIG_NFS_DIRECTIO 40#ifdef CONFIG_NFS_DIRECTIO
41extern int __init nfs_init_directcache(void); 41extern int __init nfs_init_directcache(void);
42extern void __exit nfs_destroy_directcache(void); 42extern void nfs_destroy_directcache(void);
43#else 43#else
44#define nfs_init_directcache() (0) 44#define nfs_init_directcache() (0)
45#define nfs_destroy_directcache() do {} while(0) 45#define nfs_destroy_directcache() do {} while(0)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ef9429643ebc..d89f6fb3b3a3 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -390,7 +390,7 @@ int __init nfs_init_nfspagecache(void)
390 return 0; 390 return 0;
391} 391}
392 392
393void __exit nfs_destroy_nfspagecache(void) 393void nfs_destroy_nfspagecache(void)
394{ 394{
395 if (kmem_cache_destroy(nfs_page_cachep)) 395 if (kmem_cache_destroy(nfs_page_cachep))
396 printk(KERN_INFO "nfs_page: not all structures were freed\n"); 396 printk(KERN_INFO "nfs_page: not all structures were freed\n");
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 41c2ffee24f5..32cf3773af0c 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -711,7 +711,7 @@ int __init nfs_init_readpagecache(void)
711 return 0; 711 return 0;
712} 712}
713 713
714void __exit nfs_destroy_readpagecache(void) 714void nfs_destroy_readpagecache(void)
715{ 715{
716 mempool_destroy(nfs_rdata_mempool); 716 mempool_destroy(nfs_rdata_mempool);
717 if (kmem_cache_destroy(nfs_rdata_cachep)) 717 if (kmem_cache_destroy(nfs_rdata_cachep))
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b383fdd3a15c..8fccb9cb173b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1551,7 +1551,7 @@ int __init nfs_init_writepagecache(void)
1551 return 0; 1551 return 0;
1552} 1552}
1553 1553
1554void __exit nfs_destroy_writepagecache(void) 1554void nfs_destroy_writepagecache(void)
1555{ 1555{
1556 mempool_destroy(nfs_commit_mempool); 1556 mempool_destroy(nfs_commit_mempool);
1557 mempool_destroy(nfs_wdata_mempool); 1557 mempool_destroy(nfs_wdata_mempool);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 96c7578cbe1e..7c7d01672d35 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -123,7 +123,7 @@ static void release_stateid(struct nfs4_stateid *stp, int flags);
123 */ 123 */
124 124
125/* recall_lock protects the del_recall_lru */ 125/* recall_lock protects the del_recall_lru */
126static spinlock_t recall_lock = SPIN_LOCK_UNLOCKED; 126static DEFINE_SPINLOCK(recall_lock);
127static struct list_head del_recall_lru; 127static struct list_head del_recall_lru;
128 128
129static void 129static void
@@ -529,8 +529,7 @@ move_to_confirmed(struct nfs4_client *clp)
529 529
530 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); 530 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
531 list_del_init(&clp->cl_strhash); 531 list_del_init(&clp->cl_strhash);
532 list_del_init(&clp->cl_idhash); 532 list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
533 list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
534 strhashval = clientstr_hashval(clp->cl_recdir); 533 strhashval = clientstr_hashval(clp->cl_recdir);
535 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); 534 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
536 renew_client(clp); 535 renew_client(clp);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index d852ebb538e3..fdf7cf3dfadc 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -103,8 +103,7 @@ nfsd_cache_shutdown(void)
103static void 103static void
104lru_put_end(struct svc_cacherep *rp) 104lru_put_end(struct svc_cacherep *rp)
105{ 105{
106 list_del(&rp->c_lru); 106 list_move_tail(&rp->c_lru, &lru_head);
107 list_add_tail(&rp->c_lru, &lru_head);
108} 107}
109 108
110/* 109/*
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 21f38accd039..1d26cfcd9f84 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -54,7 +54,7 @@ static DECLARE_RWSEM(o2hb_callback_sem);
54 * multiple hb threads are watching multiple regions. A node is live 54 * multiple hb threads are watching multiple regions. A node is live
55 * whenever any of the threads sees activity from the node in its region. 55 * whenever any of the threads sees activity from the node in its region.
56 */ 56 */
57static spinlock_t o2hb_live_lock = SPIN_LOCK_UNLOCKED; 57static DEFINE_SPINLOCK(o2hb_live_lock);
58static struct list_head o2hb_live_slots[O2NM_MAX_NODES]; 58static struct list_head o2hb_live_slots[O2NM_MAX_NODES];
59static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 59static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
60static LIST_HEAD(o2hb_node_events); 60static LIST_HEAD(o2hb_node_events);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 0f60cc0d3985..1591eb37a723 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -108,7 +108,7 @@
108 ##args); \ 108 ##args); \
109} while (0) 109} while (0)
110 110
111static rwlock_t o2net_handler_lock = RW_LOCK_UNLOCKED; 111static DEFINE_RWLOCK(o2net_handler_lock);
112static struct rb_root o2net_handler_tree = RB_ROOT; 112static struct rb_root o2net_handler_tree = RB_ROOT;
113 113
114static struct o2net_node o2net_nodes[O2NM_MAX_NODES]; 114static struct o2net_node o2net_nodes[O2NM_MAX_NODES];
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 355593dd8ef8..42775e2bbe2c 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -197,12 +197,14 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
197 lock->ml.node == dlm->node_num ? "master" : 197 lock->ml.node == dlm->node_num ? "master" :
198 "remote"); 198 "remote");
199 memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN); 199 memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN);
200 } else if (lksb->flags & DLM_LKSB_PUT_LVB) {
201 mlog(0, "setting lvb from lockres for %s node\n",
202 lock->ml.node == dlm->node_num ? "master" :
203 "remote");
204 memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
205 } 200 }
201 /* Do nothing for lvb put requests - they should be done in
202 * place when the lock is downconverted - otherwise we risk
203 * racing gets and puts which could result in old lvb data
204 * being propagated. We leave the put flag set and clear it
205 * here. In the future we might want to clear it at the time
206 * the put is actually done.
207 */
206 spin_unlock(&res->spinlock); 208 spin_unlock(&res->spinlock);
207 } 209 }
208 210
@@ -381,8 +383,7 @@ do_ast:
381 ret = DLM_NORMAL; 383 ret = DLM_NORMAL;
382 if (past->type == DLM_AST) { 384 if (past->type == DLM_AST) {
383 /* do not alter lock refcount. switching lists. */ 385 /* do not alter lock refcount. switching lists. */
384 list_del_init(&lock->list); 386 list_move_tail(&lock->list, &res->granted);
385 list_add_tail(&lock->list, &res->granted);
386 mlog(0, "ast: adding to granted list... type=%d, " 387 mlog(0, "ast: adding to granted list... type=%d, "
387 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); 388 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
388 if (lock->ml.convert_type != LKM_IVMODE) { 389 if (lock->ml.convert_type != LKM_IVMODE) {
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 88cc43df18f1..9bdc9cf65991 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -37,7 +37,17 @@
37#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes 37#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
38#define DLM_THREAD_MS 200 // flush at least every 200 ms 38#define DLM_THREAD_MS 200 // flush at least every 200 ms
39 39
40#define DLM_HASH_BUCKETS (PAGE_SIZE / sizeof(struct hlist_head)) 40#define DLM_HASH_SIZE_DEFAULT (1 << 14)
41#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE
42# define DLM_HASH_PAGES 1
43#else
44# define DLM_HASH_PAGES (DLM_HASH_SIZE_DEFAULT / PAGE_SIZE)
45#endif
46#define DLM_BUCKETS_PER_PAGE (PAGE_SIZE / sizeof(struct hlist_head))
47#define DLM_HASH_BUCKETS (DLM_HASH_PAGES * DLM_BUCKETS_PER_PAGE)
48
49/* Intended to make it easier for us to switch out hash functions */
50#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
41 51
42enum dlm_ast_type { 52enum dlm_ast_type {
43 DLM_AST = 0, 53 DLM_AST = 0,
@@ -61,7 +71,8 @@ static inline int dlm_is_recovery_lock(const char *lock_name, int name_len)
61 return 0; 71 return 0;
62} 72}
63 73
64#define DLM_RECO_STATE_ACTIVE 0x0001 74#define DLM_RECO_STATE_ACTIVE 0x0001
75#define DLM_RECO_STATE_FINALIZE 0x0002
65 76
66struct dlm_recovery_ctxt 77struct dlm_recovery_ctxt
67{ 78{
@@ -85,7 +96,7 @@ enum dlm_ctxt_state {
85struct dlm_ctxt 96struct dlm_ctxt
86{ 97{
87 struct list_head list; 98 struct list_head list;
88 struct hlist_head *lockres_hash; 99 struct hlist_head **lockres_hash;
89 struct list_head dirty_list; 100 struct list_head dirty_list;
90 struct list_head purge_list; 101 struct list_head purge_list;
91 struct list_head pending_asts; 102 struct list_head pending_asts;
@@ -120,6 +131,7 @@ struct dlm_ctxt
120 struct o2hb_callback_func dlm_hb_down; 131 struct o2hb_callback_func dlm_hb_down;
121 struct task_struct *dlm_thread_task; 132 struct task_struct *dlm_thread_task;
122 struct task_struct *dlm_reco_thread_task; 133 struct task_struct *dlm_reco_thread_task;
134 struct workqueue_struct *dlm_worker;
123 wait_queue_head_t dlm_thread_wq; 135 wait_queue_head_t dlm_thread_wq;
124 wait_queue_head_t dlm_reco_thread_wq; 136 wait_queue_head_t dlm_reco_thread_wq;
125 wait_queue_head_t ast_wq; 137 wait_queue_head_t ast_wq;
@@ -132,6 +144,11 @@ struct dlm_ctxt
132 struct list_head dlm_eviction_callbacks; 144 struct list_head dlm_eviction_callbacks;
133}; 145};
134 146
147static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned i)
148{
149 return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE);
150}
151
135/* these keventd work queue items are for less-frequently 152/* these keventd work queue items are for less-frequently
136 * called functions that cannot be directly called from the 153 * called functions that cannot be directly called from the
137 * net message handlers for some reason, usually because 154 * net message handlers for some reason, usually because
@@ -216,20 +233,29 @@ struct dlm_lock_resource
216 /* WARNING: Please see the comment in dlm_init_lockres before 233 /* WARNING: Please see the comment in dlm_init_lockres before
217 * adding fields here. */ 234 * adding fields here. */
218 struct hlist_node hash_node; 235 struct hlist_node hash_node;
236 struct qstr lockname;
219 struct kref refs; 237 struct kref refs;
220 238
221 /* please keep these next 3 in this order 239 /*
222 * some funcs want to iterate over all lists */ 240 * Please keep granted, converting, and blocked in this order,
241 * as some funcs want to iterate over all lists.
242 *
243 * All four lists are protected by the hash's reference.
244 */
223 struct list_head granted; 245 struct list_head granted;
224 struct list_head converting; 246 struct list_head converting;
225 struct list_head blocked; 247 struct list_head blocked;
248 struct list_head purge;
226 249
250 /*
251 * These two lists require you to hold an additional reference
252 * while they are on the list.
253 */
227 struct list_head dirty; 254 struct list_head dirty;
228 struct list_head recovering; // dlm_recovery_ctxt.resources list 255 struct list_head recovering; // dlm_recovery_ctxt.resources list
229 256
230 /* unused lock resources have their last_used stamped and are 257 /* unused lock resources have their last_used stamped and are
231 * put on a list for the dlm thread to run. */ 258 * put on a list for the dlm thread to run. */
232 struct list_head purge;
233 unsigned long last_used; 259 unsigned long last_used;
234 260
235 unsigned migration_pending:1; 261 unsigned migration_pending:1;
@@ -238,7 +264,6 @@ struct dlm_lock_resource
238 wait_queue_head_t wq; 264 wait_queue_head_t wq;
239 u8 owner; //node which owns the lock resource, or unknown 265 u8 owner; //node which owns the lock resource, or unknown
240 u16 state; 266 u16 state;
241 struct qstr lockname;
242 char lvb[DLM_LVB_LEN]; 267 char lvb[DLM_LVB_LEN];
243}; 268};
244 269
@@ -300,6 +325,15 @@ enum dlm_lockres_list {
300 DLM_BLOCKED_LIST 325 DLM_BLOCKED_LIST
301}; 326};
302 327
328static inline int dlm_lvb_is_empty(char *lvb)
329{
330 int i;
331 for (i=0; i<DLM_LVB_LEN; i++)
332 if (lvb[i])
333 return 0;
334 return 1;
335}
336
303static inline struct list_head * 337static inline struct list_head *
304dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) 338dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx)
305{ 339{
@@ -609,7 +643,8 @@ struct dlm_finalize_reco
609{ 643{
610 u8 node_idx; 644 u8 node_idx;
611 u8 dead_node; 645 u8 dead_node;
612 __be16 pad1; 646 u8 flags;
647 u8 pad1;
613 __be32 pad2; 648 __be32 pad2;
614}; 649};
615 650
@@ -676,6 +711,7 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
676void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); 711void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
677int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); 712int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
678int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); 713int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
714int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout);
679 715
680void dlm_put(struct dlm_ctxt *dlm); 716void dlm_put(struct dlm_ctxt *dlm);
681struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); 717struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
@@ -687,14 +723,20 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
687 struct dlm_lock_resource *res); 723 struct dlm_lock_resource *res);
688void dlm_purge_lockres(struct dlm_ctxt *dlm, 724void dlm_purge_lockres(struct dlm_ctxt *dlm,
689 struct dlm_lock_resource *lockres); 725 struct dlm_lock_resource *lockres);
690void dlm_lockres_get(struct dlm_lock_resource *res); 726static inline void dlm_lockres_get(struct dlm_lock_resource *res)
727{
728 /* This is called on every lookup, so it might be worth
729 * inlining. */
730 kref_get(&res->refs);
731}
691void dlm_lockres_put(struct dlm_lock_resource *res); 732void dlm_lockres_put(struct dlm_lock_resource *res);
692void __dlm_unhash_lockres(struct dlm_lock_resource *res); 733void __dlm_unhash_lockres(struct dlm_lock_resource *res);
693void __dlm_insert_lockres(struct dlm_ctxt *dlm, 734void __dlm_insert_lockres(struct dlm_ctxt *dlm,
694 struct dlm_lock_resource *res); 735 struct dlm_lock_resource *res);
695struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, 736struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
696 const char *name, 737 const char *name,
697 unsigned int len); 738 unsigned int len,
739 unsigned int hash);
698struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, 740struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
699 const char *name, 741 const char *name,
700 unsigned int len); 742 unsigned int len);
@@ -819,6 +861,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm,
819 u8 dead_node); 861 u8 dead_node);
820int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); 862int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
821 863
864int __dlm_lockres_unused(struct dlm_lock_resource *res);
822 865
823static inline const char * dlm_lock_mode_name(int mode) 866static inline const char * dlm_lock_mode_name(int mode)
824{ 867{
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 8285228d9e37..c764dc8e40a2 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -214,6 +214,9 @@ grant:
214 if (lock->ml.node == dlm->node_num) 214 if (lock->ml.node == dlm->node_num)
215 mlog(0, "doing in-place convert for nonlocal lock\n"); 215 mlog(0, "doing in-place convert for nonlocal lock\n");
216 lock->ml.type = type; 216 lock->ml.type = type;
217 if (lock->lksb->flags & DLM_LKSB_PUT_LVB)
218 memcpy(res->lvb, lock->lksb->lvb, DLM_LVB_LEN);
219
217 status = DLM_NORMAL; 220 status = DLM_NORMAL;
218 *call_ast = 1; 221 *call_ast = 1;
219 goto unlock_exit; 222 goto unlock_exit;
@@ -231,8 +234,7 @@ switch_queues:
231 234
232 lock->ml.convert_type = type; 235 lock->ml.convert_type = type;
233 /* do not alter lock refcount. switching lists. */ 236 /* do not alter lock refcount. switching lists. */
234 list_del_init(&lock->list); 237 list_move_tail(&lock->list, &res->converting);
235 list_add_tail(&lock->list, &res->converting);
236 238
237unlock_exit: 239unlock_exit:
238 spin_unlock(&lock->spinlock); 240 spin_unlock(&lock->spinlock);
@@ -248,8 +250,7 @@ void dlm_revert_pending_convert(struct dlm_lock_resource *res,
248 struct dlm_lock *lock) 250 struct dlm_lock *lock)
249{ 251{
250 /* do not alter lock refcount. switching lists. */ 252 /* do not alter lock refcount. switching lists. */
251 list_del_init(&lock->list); 253 list_move_tail(&lock->list, &res->granted);
252 list_add_tail(&lock->list, &res->granted);
253 lock->ml.convert_type = LKM_IVMODE; 254 lock->ml.convert_type = LKM_IVMODE;
254 lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB); 255 lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB);
255} 256}
@@ -294,8 +295,7 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
294 res->state |= DLM_LOCK_RES_IN_PROGRESS; 295 res->state |= DLM_LOCK_RES_IN_PROGRESS;
295 /* move lock to local convert queue */ 296 /* move lock to local convert queue */
296 /* do not alter lock refcount. switching lists. */ 297 /* do not alter lock refcount. switching lists. */
297 list_del_init(&lock->list); 298 list_move_tail(&lock->list, &res->converting);
298 list_add_tail(&lock->list, &res->converting);
299 lock->convert_pending = 1; 299 lock->convert_pending = 1;
300 lock->ml.convert_type = type; 300 lock->ml.convert_type = type;
301 301
@@ -464,6 +464,12 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
464 } 464 }
465 465
466 spin_lock(&res->spinlock); 466 spin_lock(&res->spinlock);
467 status = __dlm_lockres_state_to_status(res);
468 if (status != DLM_NORMAL) {
469 spin_unlock(&res->spinlock);
470 dlm_error(status);
471 goto leave;
472 }
467 list_for_each(iter, &res->granted) { 473 list_for_each(iter, &res->granted) {
468 lock = list_entry(iter, struct dlm_lock, list); 474 lock = list_entry(iter, struct dlm_lock, list);
469 if (lock->ml.cookie == cnv->cookie && 475 if (lock->ml.cookie == cnv->cookie &&
@@ -473,6 +479,21 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
473 } 479 }
474 lock = NULL; 480 lock = NULL;
475 } 481 }
482 if (!lock) {
483 __dlm_print_one_lock_resource(res);
484 list_for_each(iter, &res->granted) {
485 lock = list_entry(iter, struct dlm_lock, list);
486 if (lock->ml.node == cnv->node_idx) {
487 mlog(ML_ERROR, "There is something here "
488 "for node %u, lock->ml.cookie=%llu, "
489 "cnv->cookie=%llu\n", cnv->node_idx,
490 (unsigned long long)lock->ml.cookie,
491 (unsigned long long)cnv->cookie);
492 break;
493 }
494 }
495 lock = NULL;
496 }
476 spin_unlock(&res->spinlock); 497 spin_unlock(&res->spinlock);
477 if (!lock) { 498 if (!lock) {
478 status = DLM_IVLOCKID; 499 status = DLM_IVLOCKID;
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index c7eae5d3324e..3f6c8d88f7af 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -37,10 +37,8 @@
37 37
38#include "dlmapi.h" 38#include "dlmapi.h"
39#include "dlmcommon.h" 39#include "dlmcommon.h"
40#include "dlmdebug.h"
41 40
42#include "dlmdomain.h" 41#include "dlmdomain.h"
43#include "dlmdebug.h"
44 42
45#define MLOG_MASK_PREFIX ML_DLM 43#define MLOG_MASK_PREFIX ML_DLM
46#include "cluster/masklog.h" 44#include "cluster/masklog.h"
@@ -120,6 +118,7 @@ void dlm_print_one_lock(struct dlm_lock *lockid)
120} 118}
121EXPORT_SYMBOL_GPL(dlm_print_one_lock); 119EXPORT_SYMBOL_GPL(dlm_print_one_lock);
122 120
121#if 0
123void dlm_dump_lock_resources(struct dlm_ctxt *dlm) 122void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
124{ 123{
125 struct dlm_lock_resource *res; 124 struct dlm_lock_resource *res;
@@ -136,12 +135,13 @@ void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
136 135
137 spin_lock(&dlm->spinlock); 136 spin_lock(&dlm->spinlock);
138 for (i=0; i<DLM_HASH_BUCKETS; i++) { 137 for (i=0; i<DLM_HASH_BUCKETS; i++) {
139 bucket = &(dlm->lockres_hash[i]); 138 bucket = dlm_lockres_hash(dlm, i);
140 hlist_for_each_entry(res, iter, bucket, hash_node) 139 hlist_for_each_entry(res, iter, bucket, hash_node)
141 dlm_print_one_lock_resource(res); 140 dlm_print_one_lock_resource(res);
142 } 141 }
143 spin_unlock(&dlm->spinlock); 142 spin_unlock(&dlm->spinlock);
144} 143}
144#endif /* 0 */
145 145
146static const char *dlm_errnames[] = { 146static const char *dlm_errnames[] = {
147 [DLM_NORMAL] = "DLM_NORMAL", 147 [DLM_NORMAL] = "DLM_NORMAL",
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
deleted file mode 100644
index 6858510c3ccd..000000000000
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ /dev/null
@@ -1,30 +0,0 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * dlmdebug.h
5 *
6 * Copyright (C) 2004 Oracle. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public
19 * License along with this program; if not, write to the
20 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 * Boston, MA 021110-1307, USA.
22 *
23 */
24
25#ifndef DLMDEBUG_H
26#define DLMDEBUG_H
27
28void dlm_dump_lock_resources(struct dlm_ctxt *dlm);
29
30#endif
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 8f3a9e3106fd..b8c23f7ba67e 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -41,7 +41,6 @@
41#include "dlmapi.h" 41#include "dlmapi.h"
42#include "dlmcommon.h" 42#include "dlmcommon.h"
43 43
44#include "dlmdebug.h"
45#include "dlmdomain.h" 44#include "dlmdomain.h"
46 45
47#include "dlmver.h" 46#include "dlmver.h"
@@ -49,6 +48,33 @@
49#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) 48#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
50#include "cluster/masklog.h" 49#include "cluster/masklog.h"
51 50
51static void dlm_free_pagevec(void **vec, int pages)
52{
53 while (pages--)
54 free_page((unsigned long)vec[pages]);
55 kfree(vec);
56}
57
58static void **dlm_alloc_pagevec(int pages)
59{
60 void **vec = kmalloc(pages * sizeof(void *), GFP_KERNEL);
61 int i;
62
63 if (!vec)
64 return NULL;
65
66 for (i = 0; i < pages; i++)
67 if (!(vec[i] = (void *)__get_free_page(GFP_KERNEL)))
68 goto out_free;
69
70 mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n",
71 pages, DLM_HASH_PAGES, (unsigned long)DLM_BUCKETS_PER_PAGE);
72 return vec;
73out_free:
74 dlm_free_pagevec(vec, i);
75 return NULL;
76}
77
52/* 78/*
53 * 79 *
54 * spinlock lock ordering: if multiple locks are needed, obey this ordering: 80 * spinlock lock ordering: if multiple locks are needed, obey this ordering:
@@ -62,7 +88,7 @@
62 * 88 *
63 */ 89 */
64 90
65spinlock_t dlm_domain_lock = SPIN_LOCK_UNLOCKED; 91DEFINE_SPINLOCK(dlm_domain_lock);
66LIST_HEAD(dlm_domains); 92LIST_HEAD(dlm_domains);
67static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); 93static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
68 94
@@ -90,8 +116,7 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
90 assert_spin_locked(&dlm->spinlock); 116 assert_spin_locked(&dlm->spinlock);
91 117
92 q = &res->lockname; 118 q = &res->lockname;
93 q->hash = full_name_hash(q->name, q->len); 119 bucket = dlm_lockres_hash(dlm, q->hash);
94 bucket = &(dlm->lockres_hash[q->hash % DLM_HASH_BUCKETS]);
95 120
96 /* get a reference for our hashtable */ 121 /* get a reference for our hashtable */
97 dlm_lockres_get(res); 122 dlm_lockres_get(res);
@@ -100,34 +125,32 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
100} 125}
101 126
102struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, 127struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
103 const char *name, 128 const char *name,
104 unsigned int len) 129 unsigned int len,
130 unsigned int hash)
105{ 131{
106 unsigned int hash;
107 struct hlist_node *iter;
108 struct dlm_lock_resource *tmpres=NULL;
109 struct hlist_head *bucket; 132 struct hlist_head *bucket;
133 struct hlist_node *list;
110 134
111 mlog_entry("%.*s\n", len, name); 135 mlog_entry("%.*s\n", len, name);
112 136
113 assert_spin_locked(&dlm->spinlock); 137 assert_spin_locked(&dlm->spinlock);
114 138
115 hash = full_name_hash(name, len); 139 bucket = dlm_lockres_hash(dlm, hash);
116
117 bucket = &(dlm->lockres_hash[hash % DLM_HASH_BUCKETS]);
118
119 /* check for pre-existing lock */
120 hlist_for_each(iter, bucket) {
121 tmpres = hlist_entry(iter, struct dlm_lock_resource, hash_node);
122 if (tmpres->lockname.len == len &&
123 memcmp(tmpres->lockname.name, name, len) == 0) {
124 dlm_lockres_get(tmpres);
125 break;
126 }
127 140
128 tmpres = NULL; 141 hlist_for_each(list, bucket) {
142 struct dlm_lock_resource *res = hlist_entry(list,
143 struct dlm_lock_resource, hash_node);
144 if (res->lockname.name[0] != name[0])
145 continue;
146 if (unlikely(res->lockname.len != len))
147 continue;
148 if (memcmp(res->lockname.name + 1, name + 1, len - 1))
149 continue;
150 dlm_lockres_get(res);
151 return res;
129 } 152 }
130 return tmpres; 153 return NULL;
131} 154}
132 155
133struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, 156struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
@@ -135,9 +158,10 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
135 unsigned int len) 158 unsigned int len)
136{ 159{
137 struct dlm_lock_resource *res; 160 struct dlm_lock_resource *res;
161 unsigned int hash = dlm_lockid_hash(name, len);
138 162
139 spin_lock(&dlm->spinlock); 163 spin_lock(&dlm->spinlock);
140 res = __dlm_lookup_lockres(dlm, name, len); 164 res = __dlm_lookup_lockres(dlm, name, len, hash);
141 spin_unlock(&dlm->spinlock); 165 spin_unlock(&dlm->spinlock);
142 return res; 166 return res;
143} 167}
@@ -194,7 +218,7 @@ static int dlm_wait_on_domain_helper(const char *domain)
194static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) 218static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
195{ 219{
196 if (dlm->lockres_hash) 220 if (dlm->lockres_hash)
197 free_page((unsigned long) dlm->lockres_hash); 221 dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
198 222
199 if (dlm->name) 223 if (dlm->name)
200 kfree(dlm->name); 224 kfree(dlm->name);
@@ -278,11 +302,21 @@ int dlm_domain_fully_joined(struct dlm_ctxt *dlm)
278 return ret; 302 return ret;
279} 303}
280 304
305static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm)
306{
307 if (dlm->dlm_worker) {
308 flush_workqueue(dlm->dlm_worker);
309 destroy_workqueue(dlm->dlm_worker);
310 dlm->dlm_worker = NULL;
311 }
312}
313
281static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) 314static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
282{ 315{
283 dlm_unregister_domain_handlers(dlm); 316 dlm_unregister_domain_handlers(dlm);
284 dlm_complete_thread(dlm); 317 dlm_complete_thread(dlm);
285 dlm_complete_recovery_thread(dlm); 318 dlm_complete_recovery_thread(dlm);
319 dlm_destroy_dlm_worker(dlm);
286 320
287 /* We've left the domain. Now we can take ourselves out of the 321 /* We've left the domain. Now we can take ourselves out of the
288 * list and allow the kref stuff to help us free the 322 * list and allow the kref stuff to help us free the
@@ -304,8 +338,8 @@ static void dlm_migrate_all_locks(struct dlm_ctxt *dlm)
304restart: 338restart:
305 spin_lock(&dlm->spinlock); 339 spin_lock(&dlm->spinlock);
306 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 340 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
307 while (!hlist_empty(&dlm->lockres_hash[i])) { 341 while (!hlist_empty(dlm_lockres_hash(dlm, i))) {
308 res = hlist_entry(dlm->lockres_hash[i].first, 342 res = hlist_entry(dlm_lockres_hash(dlm, i)->first,
309 struct dlm_lock_resource, hash_node); 343 struct dlm_lock_resource, hash_node);
310 /* need reference when manually grabbing lockres */ 344 /* need reference when manually grabbing lockres */
311 dlm_lockres_get(res); 345 dlm_lockres_get(res);
@@ -1126,6 +1160,13 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
1126 goto bail; 1160 goto bail;
1127 } 1161 }
1128 1162
1163 dlm->dlm_worker = create_singlethread_workqueue("dlm_wq");
1164 if (!dlm->dlm_worker) {
1165 status = -ENOMEM;
1166 mlog_errno(status);
1167 goto bail;
1168 }
1169
1129 do { 1170 do {
1130 unsigned int backoff; 1171 unsigned int backoff;
1131 status = dlm_try_to_join_domain(dlm); 1172 status = dlm_try_to_join_domain(dlm);
@@ -1166,6 +1207,7 @@ bail:
1166 dlm_unregister_domain_handlers(dlm); 1207 dlm_unregister_domain_handlers(dlm);
1167 dlm_complete_thread(dlm); 1208 dlm_complete_thread(dlm);
1168 dlm_complete_recovery_thread(dlm); 1209 dlm_complete_recovery_thread(dlm);
1210 dlm_destroy_dlm_worker(dlm);
1169 } 1211 }
1170 1212
1171 return status; 1213 return status;
@@ -1191,7 +1233,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1191 goto leave; 1233 goto leave;
1192 } 1234 }
1193 1235
1194 dlm->lockres_hash = (struct hlist_head *) __get_free_page(GFP_KERNEL); 1236 dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES);
1195 if (!dlm->lockres_hash) { 1237 if (!dlm->lockres_hash) {
1196 mlog_errno(-ENOMEM); 1238 mlog_errno(-ENOMEM);
1197 kfree(dlm->name); 1239 kfree(dlm->name);
@@ -1200,8 +1242,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1200 goto leave; 1242 goto leave;
1201 } 1243 }
1202 1244
1203 for (i=0; i<DLM_HASH_BUCKETS; i++) 1245 for (i = 0; i < DLM_HASH_BUCKETS; i++)
1204 INIT_HLIST_HEAD(&dlm->lockres_hash[i]); 1246 INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i));
1205 1247
1206 strcpy(dlm->name, domain); 1248 strcpy(dlm->name, domain);
1207 dlm->key = key; 1249 dlm->key = key;
@@ -1231,6 +1273,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1231 1273
1232 dlm->dlm_thread_task = NULL; 1274 dlm->dlm_thread_task = NULL;
1233 dlm->dlm_reco_thread_task = NULL; 1275 dlm->dlm_reco_thread_task = NULL;
1276 dlm->dlm_worker = NULL;
1234 init_waitqueue_head(&dlm->dlm_thread_wq); 1277 init_waitqueue_head(&dlm->dlm_thread_wq);
1235 init_waitqueue_head(&dlm->dlm_reco_thread_wq); 1278 init_waitqueue_head(&dlm->dlm_reco_thread_wq);
1236 init_waitqueue_head(&dlm->reco.event); 1279 init_waitqueue_head(&dlm->reco.event);
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 7273d9fa6bab..033ad1701232 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -116,7 +116,7 @@ static int dlmfs_file_open(struct inode *inode,
116 * doesn't make sense for LVB writes. */ 116 * doesn't make sense for LVB writes. */
117 file->f_flags &= ~O_APPEND; 117 file->f_flags &= ~O_APPEND;
118 118
119 fp = kmalloc(sizeof(*fp), GFP_KERNEL); 119 fp = kmalloc(sizeof(*fp), GFP_NOFS);
120 if (!fp) { 120 if (!fp) {
121 status = -ENOMEM; 121 status = -ENOMEM;
122 goto bail; 122 goto bail;
@@ -196,7 +196,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
196 else 196 else
197 readlen = count - *ppos; 197 readlen = count - *ppos;
198 198
199 lvb_buf = kmalloc(readlen, GFP_KERNEL); 199 lvb_buf = kmalloc(readlen, GFP_NOFS);
200 if (!lvb_buf) 200 if (!lvb_buf)
201 return -ENOMEM; 201 return -ENOMEM;
202 202
@@ -240,7 +240,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
240 else 240 else
241 writelen = count - *ppos; 241 writelen = count - *ppos;
242 242
243 lvb_buf = kmalloc(writelen, GFP_KERNEL); 243 lvb_buf = kmalloc(writelen, GFP_NOFS);
244 if (!lvb_buf) 244 if (!lvb_buf)
245 return -ENOMEM; 245 return -ENOMEM;
246 246
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 6fea28318d6d..5ca57ec650c7 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -53,7 +53,7 @@
53#define MLOG_MASK_PREFIX ML_DLM 53#define MLOG_MASK_PREFIX ML_DLM
54#include "cluster/masklog.h" 54#include "cluster/masklog.h"
55 55
56static spinlock_t dlm_cookie_lock = SPIN_LOCK_UNLOCKED; 56static DEFINE_SPINLOCK(dlm_cookie_lock);
57static u64 dlm_next_cookie = 1; 57static u64 dlm_next_cookie = 1;
58 58
59static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm, 59static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
@@ -201,6 +201,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
201 struct dlm_lock *lock, int flags) 201 struct dlm_lock *lock, int flags)
202{ 202{
203 enum dlm_status status = DLM_DENIED; 203 enum dlm_status status = DLM_DENIED;
204 int lockres_changed = 1;
204 205
205 mlog_entry("type=%d\n", lock->ml.type); 206 mlog_entry("type=%d\n", lock->ml.type);
206 mlog(0, "lockres %.*s, flags = 0x%x\n", res->lockname.len, 207 mlog(0, "lockres %.*s, flags = 0x%x\n", res->lockname.len,
@@ -226,8 +227,25 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
226 res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 227 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
227 lock->lock_pending = 0; 228 lock->lock_pending = 0;
228 if (status != DLM_NORMAL) { 229 if (status != DLM_NORMAL) {
229 if (status != DLM_NOTQUEUED) 230 if (status == DLM_RECOVERING &&
231 dlm_is_recovery_lock(res->lockname.name,
232 res->lockname.len)) {
233 /* recovery lock was mastered by dead node.
234 * we need to have calc_usage shoot down this
235 * lockres and completely remaster it. */
236 mlog(0, "%s: recovery lock was owned by "
237 "dead node %u, remaster it now.\n",
238 dlm->name, res->owner);
239 } else if (status != DLM_NOTQUEUED) {
240 /*
241 * DO NOT call calc_usage, as this would unhash
242 * the remote lockres before we ever get to use
243 * it. treat as if we never made any change to
244 * the lockres.
245 */
246 lockres_changed = 0;
230 dlm_error(status); 247 dlm_error(status);
248 }
231 dlm_revert_pending_lock(res, lock); 249 dlm_revert_pending_lock(res, lock);
232 dlm_lock_put(lock); 250 dlm_lock_put(lock);
233 } else if (dlm_is_recovery_lock(res->lockname.name, 251 } else if (dlm_is_recovery_lock(res->lockname.name,
@@ -239,12 +257,12 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
239 mlog(0, "%s: $RECOVERY lock for this node (%u) is " 257 mlog(0, "%s: $RECOVERY lock for this node (%u) is "
240 "mastered by %u; got lock, manually granting (no ast)\n", 258 "mastered by %u; got lock, manually granting (no ast)\n",
241 dlm->name, dlm->node_num, res->owner); 259 dlm->name, dlm->node_num, res->owner);
242 list_del_init(&lock->list); 260 list_move_tail(&lock->list, &res->granted);
243 list_add_tail(&lock->list, &res->granted);
244 } 261 }
245 spin_unlock(&res->spinlock); 262 spin_unlock(&res->spinlock);
246 263
247 dlm_lockres_calc_usage(dlm, res); 264 if (lockres_changed)
265 dlm_lockres_calc_usage(dlm, res);
248 266
249 wake_up(&res->wq); 267 wake_up(&res->wq);
250 return status; 268 return status;
@@ -281,6 +299,14 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
281 if (tmpret >= 0) { 299 if (tmpret >= 0) {
282 // successfully sent and received 300 // successfully sent and received
283 ret = status; // this is already a dlm_status 301 ret = status; // this is already a dlm_status
302 if (ret == DLM_REJECTED) {
303 mlog(ML_ERROR, "%s:%.*s: BUG. this is a stale lockres "
304 "no longer owned by %u. that node is coming back "
305 "up currently.\n", dlm->name, create.namelen,
306 create.name, res->owner);
307 dlm_print_one_lock_resource(res);
308 BUG();
309 }
284 } else { 310 } else {
285 mlog_errno(tmpret); 311 mlog_errno(tmpret);
286 if (dlm_is_host_down(tmpret)) { 312 if (dlm_is_host_down(tmpret)) {
@@ -382,13 +408,13 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
382 struct dlm_lock *lock; 408 struct dlm_lock *lock;
383 int kernel_allocated = 0; 409 int kernel_allocated = 0;
384 410
385 lock = kcalloc(1, sizeof(*lock), GFP_KERNEL); 411 lock = kcalloc(1, sizeof(*lock), GFP_NOFS);
386 if (!lock) 412 if (!lock)
387 return NULL; 413 return NULL;
388 414
389 if (!lksb) { 415 if (!lksb) {
390 /* zero memory only if kernel-allocated */ 416 /* zero memory only if kernel-allocated */
391 lksb = kcalloc(1, sizeof(*lksb), GFP_KERNEL); 417 lksb = kcalloc(1, sizeof(*lksb), GFP_NOFS);
392 if (!lksb) { 418 if (!lksb) {
393 kfree(lock); 419 kfree(lock);
394 return NULL; 420 return NULL;
@@ -429,11 +455,16 @@ int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data)
429 if (!dlm_grab(dlm)) 455 if (!dlm_grab(dlm))
430 return DLM_REJECTED; 456 return DLM_REJECTED;
431 457
432 mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
433 "Domain %s not fully joined!\n", dlm->name);
434
435 name = create->name; 458 name = create->name;
436 namelen = create->namelen; 459 namelen = create->namelen;
460 status = DLM_REJECTED;
461 if (!dlm_domain_fully_joined(dlm)) {
462 mlog(ML_ERROR, "Domain %s not fully joined, but node %u is "
463 "sending a create_lock message for lock %.*s!\n",
464 dlm->name, create->node_idx, namelen, name);
465 dlm_error(status);
466 goto leave;
467 }
437 468
438 status = DLM_IVBUFLEN; 469 status = DLM_IVBUFLEN;
439 if (namelen > DLM_LOCKID_NAME_MAX) { 470 if (namelen > DLM_LOCKID_NAME_MAX) {
@@ -669,18 +700,22 @@ retry_lock:
669 msleep(100); 700 msleep(100);
670 /* no waiting for dlm_reco_thread */ 701 /* no waiting for dlm_reco_thread */
671 if (recovery) { 702 if (recovery) {
672 if (status == DLM_RECOVERING) { 703 if (status != DLM_RECOVERING)
673 mlog(0, "%s: got RECOVERING " 704 goto retry_lock;
674 "for $REOCVERY lock, master " 705
675 "was %u\n", dlm->name, 706 mlog(0, "%s: got RECOVERING "
676 res->owner); 707 "for $RECOVERY lock, master "
677 dlm_wait_for_node_death(dlm, res->owner, 708 "was %u\n", dlm->name,
678 DLM_NODE_DEATH_WAIT_MAX); 709 res->owner);
679 } 710 /* wait to see the node go down, then
711 * drop down and allow the lockres to
712 * get cleaned up. need to remaster. */
713 dlm_wait_for_node_death(dlm, res->owner,
714 DLM_NODE_DEATH_WAIT_MAX);
680 } else { 715 } else {
681 dlm_wait_for_recovery(dlm); 716 dlm_wait_for_recovery(dlm);
717 goto retry_lock;
682 } 718 }
683 goto retry_lock;
684 } 719 }
685 720
686 if (status != DLM_NORMAL) { 721 if (status != DLM_NORMAL) {
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 940be4c13b1f..1b8346dd0572 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -47,7 +47,6 @@
47 47
48#include "dlmapi.h" 48#include "dlmapi.h"
49#include "dlmcommon.h" 49#include "dlmcommon.h"
50#include "dlmdebug.h"
51#include "dlmdomain.h" 50#include "dlmdomain.h"
52 51
53#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) 52#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER)
@@ -74,6 +73,7 @@ struct dlm_master_list_entry
74 wait_queue_head_t wq; 73 wait_queue_head_t wq;
75 atomic_t woken; 74 atomic_t woken;
76 struct kref mle_refs; 75 struct kref mle_refs;
76 int inuse;
77 unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 77 unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
78 unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 78 unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
79 unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 79 unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
@@ -127,18 +127,30 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
127 return 1; 127 return 1;
128} 128}
129 129
130#if 0 130#define dlm_print_nodemap(m) _dlm_print_nodemap(m,#m)
131/* Code here is included but defined out as it aids debugging */ 131static void _dlm_print_nodemap(unsigned long *map, const char *mapname)
132{
133 int i;
134 printk("%s=[ ", mapname);
135 for (i=0; i<O2NM_MAX_NODES; i++)
136 if (test_bit(i, map))
137 printk("%d ", i);
138 printk("]");
139}
132 140
133void dlm_print_one_mle(struct dlm_master_list_entry *mle) 141static void dlm_print_one_mle(struct dlm_master_list_entry *mle)
134{ 142{
135 int i = 0, refs; 143 int refs;
136 char *type; 144 char *type;
137 char attached; 145 char attached;
138 u8 master; 146 u8 master;
139 unsigned int namelen; 147 unsigned int namelen;
140 const char *name; 148 const char *name;
141 struct kref *k; 149 struct kref *k;
150 unsigned long *maybe = mle->maybe_map,
151 *vote = mle->vote_map,
152 *resp = mle->response_map,
153 *node = mle->node_map;
142 154
143 k = &mle->mle_refs; 155 k = &mle->mle_refs;
144 if (mle->type == DLM_MLE_BLOCK) 156 if (mle->type == DLM_MLE_BLOCK)
@@ -159,18 +171,29 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle)
159 name = mle->u.res->lockname.name; 171 name = mle->u.res->lockname.name;
160 } 172 }
161 173
162 mlog(ML_NOTICE, " #%3d: %3s %3d %3u %3u %c (%d)%.*s\n", 174 mlog(ML_NOTICE, "%.*s: %3s refs=%3d mas=%3u new=%3u evt=%c inuse=%d ",
163 i, type, refs, master, mle->new_master, attached, 175 namelen, name, type, refs, master, mle->new_master, attached,
164 namelen, namelen, name); 176 mle->inuse);
177 dlm_print_nodemap(maybe);
178 printk(", ");
179 dlm_print_nodemap(vote);
180 printk(", ");
181 dlm_print_nodemap(resp);
182 printk(", ");
183 dlm_print_nodemap(node);
184 printk(", ");
185 printk("\n");
165} 186}
166 187
188#if 0
189/* Code here is included but defined out as it aids debugging */
190
167static void dlm_dump_mles(struct dlm_ctxt *dlm) 191static void dlm_dump_mles(struct dlm_ctxt *dlm)
168{ 192{
169 struct dlm_master_list_entry *mle; 193 struct dlm_master_list_entry *mle;
170 struct list_head *iter; 194 struct list_head *iter;
171 195
172 mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); 196 mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name);
173 mlog(ML_NOTICE, " ####: type refs owner new events? lockname nodemap votemap respmap maybemap\n");
174 spin_lock(&dlm->master_lock); 197 spin_lock(&dlm->master_lock);
175 list_for_each(iter, &dlm->master_list) { 198 list_for_each(iter, &dlm->master_list) {
176 mle = list_entry(iter, struct dlm_master_list_entry, list); 199 mle = list_entry(iter, struct dlm_master_list_entry, list);
@@ -314,6 +337,31 @@ static inline void dlm_mle_detach_hb_events(struct dlm_ctxt *dlm,
314 spin_unlock(&dlm->spinlock); 337 spin_unlock(&dlm->spinlock);
315} 338}
316 339
340static void dlm_get_mle_inuse(struct dlm_master_list_entry *mle)
341{
342 struct dlm_ctxt *dlm;
343 dlm = mle->dlm;
344
345 assert_spin_locked(&dlm->spinlock);
346 assert_spin_locked(&dlm->master_lock);
347 mle->inuse++;
348 kref_get(&mle->mle_refs);
349}
350
351static void dlm_put_mle_inuse(struct dlm_master_list_entry *mle)
352{
353 struct dlm_ctxt *dlm;
354 dlm = mle->dlm;
355
356 spin_lock(&dlm->spinlock);
357 spin_lock(&dlm->master_lock);
358 mle->inuse--;
359 __dlm_put_mle(mle);
360 spin_unlock(&dlm->master_lock);
361 spin_unlock(&dlm->spinlock);
362
363}
364
317/* remove from list and free */ 365/* remove from list and free */
318static void __dlm_put_mle(struct dlm_master_list_entry *mle) 366static void __dlm_put_mle(struct dlm_master_list_entry *mle)
319{ 367{
@@ -322,9 +370,14 @@ static void __dlm_put_mle(struct dlm_master_list_entry *mle)
322 370
323 assert_spin_locked(&dlm->spinlock); 371 assert_spin_locked(&dlm->spinlock);
324 assert_spin_locked(&dlm->master_lock); 372 assert_spin_locked(&dlm->master_lock);
325 BUG_ON(!atomic_read(&mle->mle_refs.refcount)); 373 if (!atomic_read(&mle->mle_refs.refcount)) {
326 374 /* this may or may not crash, but who cares.
327 kref_put(&mle->mle_refs, dlm_mle_release); 375 * it's a BUG. */
376 mlog(ML_ERROR, "bad mle: %p\n", mle);
377 dlm_print_one_mle(mle);
378 BUG();
379 } else
380 kref_put(&mle->mle_refs, dlm_mle_release);
328} 381}
329 382
330 383
@@ -367,6 +420,7 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
367 memset(mle->response_map, 0, sizeof(mle->response_map)); 420 memset(mle->response_map, 0, sizeof(mle->response_map));
368 mle->master = O2NM_MAX_NODES; 421 mle->master = O2NM_MAX_NODES;
369 mle->new_master = O2NM_MAX_NODES; 422 mle->new_master = O2NM_MAX_NODES;
423 mle->inuse = 0;
370 424
371 if (mle->type == DLM_MLE_MASTER) { 425 if (mle->type == DLM_MLE_MASTER) {
372 BUG_ON(!res); 426 BUG_ON(!res);
@@ -564,6 +618,28 @@ static void dlm_lockres_release(struct kref *kref)
564 mlog(0, "destroying lockres %.*s\n", res->lockname.len, 618 mlog(0, "destroying lockres %.*s\n", res->lockname.len,
565 res->lockname.name); 619 res->lockname.name);
566 620
621 if (!hlist_unhashed(&res->hash_node) ||
622 !list_empty(&res->granted) ||
623 !list_empty(&res->converting) ||
624 !list_empty(&res->blocked) ||
625 !list_empty(&res->dirty) ||
626 !list_empty(&res->recovering) ||
627 !list_empty(&res->purge)) {
628 mlog(ML_ERROR,
629 "Going to BUG for resource %.*s."
630 " We're on a list! [%c%c%c%c%c%c%c]\n",
631 res->lockname.len, res->lockname.name,
632 !hlist_unhashed(&res->hash_node) ? 'H' : ' ',
633 !list_empty(&res->granted) ? 'G' : ' ',
634 !list_empty(&res->converting) ? 'C' : ' ',
635 !list_empty(&res->blocked) ? 'B' : ' ',
636 !list_empty(&res->dirty) ? 'D' : ' ',
637 !list_empty(&res->recovering) ? 'R' : ' ',
638 !list_empty(&res->purge) ? 'P' : ' ');
639
640 dlm_print_one_lock_resource(res);
641 }
642
567 /* By the time we're ready to blow this guy away, we shouldn't 643 /* By the time we're ready to blow this guy away, we shouldn't
568 * be on any lists. */ 644 * be on any lists. */
569 BUG_ON(!hlist_unhashed(&res->hash_node)); 645 BUG_ON(!hlist_unhashed(&res->hash_node));
@@ -579,11 +655,6 @@ static void dlm_lockres_release(struct kref *kref)
579 kfree(res); 655 kfree(res);
580} 656}
581 657
582void dlm_lockres_get(struct dlm_lock_resource *res)
583{
584 kref_get(&res->refs);
585}
586
587void dlm_lockres_put(struct dlm_lock_resource *res) 658void dlm_lockres_put(struct dlm_lock_resource *res)
588{ 659{
589 kref_put(&res->refs, dlm_lockres_release); 660 kref_put(&res->refs, dlm_lockres_release);
@@ -603,7 +674,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
603 memcpy(qname, name, namelen); 674 memcpy(qname, name, namelen);
604 675
605 res->lockname.len = namelen; 676 res->lockname.len = namelen;
606 res->lockname.hash = full_name_hash(name, namelen); 677 res->lockname.hash = dlm_lockid_hash(name, namelen);
607 678
608 init_waitqueue_head(&res->wq); 679 init_waitqueue_head(&res->wq);
609 spin_lock_init(&res->spinlock); 680 spin_lock_init(&res->spinlock);
@@ -637,11 +708,11 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
637{ 708{
638 struct dlm_lock_resource *res; 709 struct dlm_lock_resource *res;
639 710
640 res = kmalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL); 711 res = kmalloc(sizeof(struct dlm_lock_resource), GFP_NOFS);
641 if (!res) 712 if (!res)
642 return NULL; 713 return NULL;
643 714
644 res->lockname.name = kmalloc(namelen, GFP_KERNEL); 715 res->lockname.name = kmalloc(namelen, GFP_NOFS);
645 if (!res->lockname.name) { 716 if (!res->lockname.name) {
646 kfree(res); 717 kfree(res);
647 return NULL; 718 return NULL;
@@ -677,19 +748,20 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
677 int blocked = 0; 748 int blocked = 0;
678 int ret, nodenum; 749 int ret, nodenum;
679 struct dlm_node_iter iter; 750 struct dlm_node_iter iter;
680 unsigned int namelen; 751 unsigned int namelen, hash;
681 int tries = 0; 752 int tries = 0;
682 int bit, wait_on_recovery = 0; 753 int bit, wait_on_recovery = 0;
683 754
684 BUG_ON(!lockid); 755 BUG_ON(!lockid);
685 756
686 namelen = strlen(lockid); 757 namelen = strlen(lockid);
758 hash = dlm_lockid_hash(lockid, namelen);
687 759
688 mlog(0, "get lockres %s (len %d)\n", lockid, namelen); 760 mlog(0, "get lockres %s (len %d)\n", lockid, namelen);
689 761
690lookup: 762lookup:
691 spin_lock(&dlm->spinlock); 763 spin_lock(&dlm->spinlock);
692 tmpres = __dlm_lookup_lockres(dlm, lockid, namelen); 764 tmpres = __dlm_lookup_lockres(dlm, lockid, namelen, hash);
693 if (tmpres) { 765 if (tmpres) {
694 spin_unlock(&dlm->spinlock); 766 spin_unlock(&dlm->spinlock);
695 mlog(0, "found in hash!\n"); 767 mlog(0, "found in hash!\n");
@@ -704,7 +776,7 @@ lookup:
704 mlog(0, "allocating a new resource\n"); 776 mlog(0, "allocating a new resource\n");
705 /* nothing found and we need to allocate one. */ 777 /* nothing found and we need to allocate one. */
706 alloc_mle = (struct dlm_master_list_entry *) 778 alloc_mle = (struct dlm_master_list_entry *)
707 kmem_cache_alloc(dlm_mle_cache, GFP_KERNEL); 779 kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
708 if (!alloc_mle) 780 if (!alloc_mle)
709 goto leave; 781 goto leave;
710 res = dlm_new_lockres(dlm, lockid, namelen); 782 res = dlm_new_lockres(dlm, lockid, namelen);
@@ -790,10 +862,11 @@ lookup:
790 * if so, the creator of the BLOCK may try to put the last 862 * if so, the creator of the BLOCK may try to put the last
791 * ref at this time in the assert master handler, so we 863 * ref at this time in the assert master handler, so we
792 * need an extra one to keep from a bad ptr deref. */ 864 * need an extra one to keep from a bad ptr deref. */
793 dlm_get_mle(mle); 865 dlm_get_mle_inuse(mle);
794 spin_unlock(&dlm->master_lock); 866 spin_unlock(&dlm->master_lock);
795 spin_unlock(&dlm->spinlock); 867 spin_unlock(&dlm->spinlock);
796 868
869redo_request:
797 while (wait_on_recovery) { 870 while (wait_on_recovery) {
798 /* any cluster changes that occurred after dropping the 871 /* any cluster changes that occurred after dropping the
799 * dlm spinlock would be detectable be a change on the mle, 872 * dlm spinlock would be detectable be a change on the mle,
@@ -812,7 +885,7 @@ lookup:
812 } 885 }
813 886
814 dlm_kick_recovery_thread(dlm); 887 dlm_kick_recovery_thread(dlm);
815 msleep(100); 888 msleep(1000);
816 dlm_wait_for_recovery(dlm); 889 dlm_wait_for_recovery(dlm);
817 890
818 spin_lock(&dlm->spinlock); 891 spin_lock(&dlm->spinlock);
@@ -825,13 +898,15 @@ lookup:
825 } else 898 } else
826 wait_on_recovery = 0; 899 wait_on_recovery = 0;
827 spin_unlock(&dlm->spinlock); 900 spin_unlock(&dlm->spinlock);
901
902 if (wait_on_recovery)
903 dlm_wait_for_node_recovery(dlm, bit, 10000);
828 } 904 }
829 905
830 /* must wait for lock to be mastered elsewhere */ 906 /* must wait for lock to be mastered elsewhere */
831 if (blocked) 907 if (blocked)
832 goto wait; 908 goto wait;
833 909
834redo_request:
835 ret = -EINVAL; 910 ret = -EINVAL;
836 dlm_node_iter_init(mle->vote_map, &iter); 911 dlm_node_iter_init(mle->vote_map, &iter);
837 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { 912 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
@@ -856,6 +931,7 @@ wait:
856 /* keep going until the response map includes all nodes */ 931 /* keep going until the response map includes all nodes */
857 ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); 932 ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked);
858 if (ret < 0) { 933 if (ret < 0) {
934 wait_on_recovery = 1;
859 mlog(0, "%s:%.*s: node map changed, redo the " 935 mlog(0, "%s:%.*s: node map changed, redo the "
860 "master request now, blocked=%d\n", 936 "master request now, blocked=%d\n",
861 dlm->name, res->lockname.len, 937 dlm->name, res->lockname.len,
@@ -866,7 +942,7 @@ wait:
866 dlm->name, res->lockname.len, 942 dlm->name, res->lockname.len,
867 res->lockname.name, blocked); 943 res->lockname.name, blocked);
868 dlm_print_one_lock_resource(res); 944 dlm_print_one_lock_resource(res);
869 /* dlm_print_one_mle(mle); */ 945 dlm_print_one_mle(mle);
870 tries = 0; 946 tries = 0;
871 } 947 }
872 goto redo_request; 948 goto redo_request;
@@ -880,7 +956,7 @@ wait:
880 dlm_mle_detach_hb_events(dlm, mle); 956 dlm_mle_detach_hb_events(dlm, mle);
881 dlm_put_mle(mle); 957 dlm_put_mle(mle);
882 /* put the extra ref */ 958 /* put the extra ref */
883 dlm_put_mle(mle); 959 dlm_put_mle_inuse(mle);
884 960
885wake_waiters: 961wake_waiters:
886 spin_lock(&res->spinlock); 962 spin_lock(&res->spinlock);
@@ -921,12 +997,14 @@ recheck:
921 spin_unlock(&res->spinlock); 997 spin_unlock(&res->spinlock);
922 /* this will cause the master to re-assert across 998 /* this will cause the master to re-assert across
923 * the whole cluster, freeing up mles */ 999 * the whole cluster, freeing up mles */
924 ret = dlm_do_master_request(mle, res->owner); 1000 if (res->owner != dlm->node_num) {
925 if (ret < 0) { 1001 ret = dlm_do_master_request(mle, res->owner);
926 /* give recovery a chance to run */ 1002 if (ret < 0) {
927 mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret); 1003 /* give recovery a chance to run */
928 msleep(500); 1004 mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret);
929 goto recheck; 1005 msleep(500);
1006 goto recheck;
1007 }
930 } 1008 }
931 ret = 0; 1009 ret = 0;
932 goto leave; 1010 goto leave;
@@ -962,6 +1040,12 @@ recheck:
962 "rechecking now\n", dlm->name, res->lockname.len, 1040 "rechecking now\n", dlm->name, res->lockname.len,
963 res->lockname.name); 1041 res->lockname.name);
964 goto recheck; 1042 goto recheck;
1043 } else {
1044 if (!voting_done) {
1045 mlog(0, "map not changed and voting not done "
1046 "for %s:%.*s\n", dlm->name, res->lockname.len,
1047 res->lockname.name);
1048 }
965 } 1049 }
966 1050
967 if (m != O2NM_MAX_NODES) { 1051 if (m != O2NM_MAX_NODES) {
@@ -1129,18 +1213,6 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
1129 set_bit(node, mle->vote_map); 1213 set_bit(node, mle->vote_map);
1130 } else { 1214 } else {
1131 mlog(ML_ERROR, "node down! %d\n", node); 1215 mlog(ML_ERROR, "node down! %d\n", node);
1132
1133 /* if the node wasn't involved in mastery skip it,
1134 * but clear it out from the maps so that it will
1135 * not affect mastery of this lockres */
1136 clear_bit(node, mle->response_map);
1137 clear_bit(node, mle->vote_map);
1138 if (!test_bit(node, mle->maybe_map))
1139 goto next;
1140
1141 /* if we're already blocked on lock mastery, and the
1142 * dead node wasn't the expected master, or there is
1143 * another node in the maybe_map, keep waiting */
1144 if (blocked) { 1216 if (blocked) {
1145 int lowest = find_next_bit(mle->maybe_map, 1217 int lowest = find_next_bit(mle->maybe_map,
1146 O2NM_MAX_NODES, 0); 1218 O2NM_MAX_NODES, 0);
@@ -1148,54 +1220,53 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
1148 /* act like it was never there */ 1220 /* act like it was never there */
1149 clear_bit(node, mle->maybe_map); 1221 clear_bit(node, mle->maybe_map);
1150 1222
1151 if (node != lowest) 1223 if (node == lowest) {
1152 goto next; 1224 mlog(0, "expected master %u died"
1153 1225 " while this node was blocked "
1154 mlog(ML_ERROR, "expected master %u died while " 1226 "waiting on it!\n", node);
1155 "this node was blocked waiting on it!\n", 1227 lowest = find_next_bit(mle->maybe_map,
1156 node); 1228 O2NM_MAX_NODES,
1157 lowest = find_next_bit(mle->maybe_map, 1229 lowest+1);
1158 O2NM_MAX_NODES, 1230 if (lowest < O2NM_MAX_NODES) {
1159 lowest+1); 1231 mlog(0, "%s:%.*s:still "
1160 if (lowest < O2NM_MAX_NODES) { 1232 "blocked. waiting on %u "
1161 mlog(0, "still blocked. waiting " 1233 "now\n", dlm->name,
1162 "on %u now\n", lowest); 1234 res->lockname.len,
1163 goto next; 1235 res->lockname.name,
1236 lowest);
1237 } else {
1238 /* mle is an MLE_BLOCK, but
1239 * there is now nothing left to
1240 * block on. we need to return
1241 * all the way back out and try
1242 * again with an MLE_MASTER.
1243 * dlm_do_local_recovery_cleanup
1244 * has already run, so the mle
1245 * refcount is ok */
1246 mlog(0, "%s:%.*s: no "
1247 "longer blocking. try to "
1248 "master this here\n",
1249 dlm->name,
1250 res->lockname.len,
1251 res->lockname.name);
1252 mle->type = DLM_MLE_MASTER;
1253 mle->u.res = res;
1254 }
1164 } 1255 }
1165
1166 /* mle is an MLE_BLOCK, but there is now
1167 * nothing left to block on. we need to return
1168 * all the way back out and try again with
1169 * an MLE_MASTER. dlm_do_local_recovery_cleanup
1170 * has already run, so the mle refcount is ok */
1171 mlog(0, "no longer blocking. we can "
1172 "try to master this here\n");
1173 mle->type = DLM_MLE_MASTER;
1174 memset(mle->maybe_map, 0,
1175 sizeof(mle->maybe_map));
1176 memset(mle->response_map, 0,
1177 sizeof(mle->maybe_map));
1178 memcpy(mle->vote_map, mle->node_map,
1179 sizeof(mle->node_map));
1180 mle->u.res = res;
1181 set_bit(dlm->node_num, mle->maybe_map);
1182
1183 ret = -EAGAIN;
1184 goto next;
1185 } 1256 }
1186 1257
1187 clear_bit(node, mle->maybe_map); 1258 /* now blank out everything, as if we had never
1188 if (node > dlm->node_num) 1259 * contacted anyone */
1189 goto next; 1260 memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
1190 1261 memset(mle->response_map, 0, sizeof(mle->response_map));
1191 mlog(0, "dead node in map!\n"); 1262 /* reset the vote_map to the current node_map */
1192 /* yuck. go back and re-contact all nodes 1263 memcpy(mle->vote_map, mle->node_map,
1193 * in the vote_map, removing this node. */ 1264 sizeof(mle->node_map));
1194 memset(mle->response_map, 0, 1265 /* put myself into the maybe map */
1195 sizeof(mle->response_map)); 1266 if (mle->type != DLM_MLE_BLOCK)
1267 set_bit(dlm->node_num, mle->maybe_map);
1196 } 1268 }
1197 ret = -EAGAIN; 1269 ret = -EAGAIN;
1198next:
1199 node = dlm_bitmap_diff_iter_next(&bdi, &sc); 1270 node = dlm_bitmap_diff_iter_next(&bdi, &sc);
1200 } 1271 }
1201 return ret; 1272 return ret;
@@ -1316,7 +1387,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
1316 struct dlm_master_request *request = (struct dlm_master_request *) msg->buf; 1387 struct dlm_master_request *request = (struct dlm_master_request *) msg->buf;
1317 struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL; 1388 struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL;
1318 char *name; 1389 char *name;
1319 unsigned int namelen; 1390 unsigned int namelen, hash;
1320 int found, ret; 1391 int found, ret;
1321 int set_maybe; 1392 int set_maybe;
1322 int dispatch_assert = 0; 1393 int dispatch_assert = 0;
@@ -1331,6 +1402,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
1331 1402
1332 name = request->name; 1403 name = request->name;
1333 namelen = request->namelen; 1404 namelen = request->namelen;
1405 hash = dlm_lockid_hash(name, namelen);
1334 1406
1335 if (namelen > DLM_LOCKID_NAME_MAX) { 1407 if (namelen > DLM_LOCKID_NAME_MAX) {
1336 response = DLM_IVBUFLEN; 1408 response = DLM_IVBUFLEN;
@@ -1339,7 +1411,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
1339 1411
1340way_up_top: 1412way_up_top:
1341 spin_lock(&dlm->spinlock); 1413 spin_lock(&dlm->spinlock);
1342 res = __dlm_lookup_lockres(dlm, name, namelen); 1414 res = __dlm_lookup_lockres(dlm, name, namelen, hash);
1343 if (res) { 1415 if (res) {
1344 spin_unlock(&dlm->spinlock); 1416 spin_unlock(&dlm->spinlock);
1345 1417
@@ -1459,21 +1531,18 @@ way_up_top:
1459 spin_unlock(&dlm->spinlock); 1531 spin_unlock(&dlm->spinlock);
1460 1532
1461 mle = (struct dlm_master_list_entry *) 1533 mle = (struct dlm_master_list_entry *)
1462 kmem_cache_alloc(dlm_mle_cache, GFP_KERNEL); 1534 kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
1463 if (!mle) { 1535 if (!mle) {
1464 response = DLM_MASTER_RESP_ERROR; 1536 response = DLM_MASTER_RESP_ERROR;
1465 mlog_errno(-ENOMEM); 1537 mlog_errno(-ENOMEM);
1466 goto send_response; 1538 goto send_response;
1467 } 1539 }
1468 spin_lock(&dlm->spinlock);
1469 dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL,
1470 name, namelen);
1471 spin_unlock(&dlm->spinlock);
1472 goto way_up_top; 1540 goto way_up_top;
1473 } 1541 }
1474 1542
1475 // mlog(0, "this is second time thru, already allocated, " 1543 // mlog(0, "this is second time thru, already allocated, "
1476 // "add the block.\n"); 1544 // "add the block.\n");
1545 dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen);
1477 set_bit(request->node_idx, mle->maybe_map); 1546 set_bit(request->node_idx, mle->maybe_map);
1478 list_add(&mle->list, &dlm->master_list); 1547 list_add(&mle->list, &dlm->master_list);
1479 response = DLM_MASTER_RESP_NO; 1548 response = DLM_MASTER_RESP_NO;
@@ -1556,6 +1625,8 @@ again:
1556 dlm_node_iter_init(nodemap, &iter); 1625 dlm_node_iter_init(nodemap, &iter);
1557 while ((to = dlm_node_iter_next(&iter)) >= 0) { 1626 while ((to = dlm_node_iter_next(&iter)) >= 0) {
1558 int r = 0; 1627 int r = 0;
1628 struct dlm_master_list_entry *mle = NULL;
1629
1559 mlog(0, "sending assert master to %d (%.*s)\n", to, 1630 mlog(0, "sending assert master to %d (%.*s)\n", to,
1560 namelen, lockname); 1631 namelen, lockname);
1561 memset(&assert, 0, sizeof(assert)); 1632 memset(&assert, 0, sizeof(assert));
@@ -1567,20 +1638,28 @@ again:
1567 tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key, 1638 tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
1568 &assert, sizeof(assert), to, &r); 1639 &assert, sizeof(assert), to, &r);
1569 if (tmpret < 0) { 1640 if (tmpret < 0) {
1570 mlog(ML_ERROR, "assert_master returned %d!\n", tmpret); 1641 mlog(0, "assert_master returned %d!\n", tmpret);
1571 if (!dlm_is_host_down(tmpret)) { 1642 if (!dlm_is_host_down(tmpret)) {
1572 mlog(ML_ERROR, "unhandled error!\n"); 1643 mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
1573 BUG(); 1644 BUG();
1574 } 1645 }
1575 /* a node died. finish out the rest of the nodes. */ 1646 /* a node died. finish out the rest of the nodes. */
1576 mlog(ML_ERROR, "link to %d went down!\n", to); 1647 mlog(0, "link to %d went down!\n", to);
1577 /* any nonzero status return will do */ 1648 /* any nonzero status return will do */
1578 ret = tmpret; 1649 ret = tmpret;
1579 } else if (r < 0) { 1650 } else if (r < 0) {
1580 /* ok, something horribly messed. kill thyself. */ 1651 /* ok, something horribly messed. kill thyself. */
1581 mlog(ML_ERROR,"during assert master of %.*s to %u, " 1652 mlog(ML_ERROR,"during assert master of %.*s to %u, "
1582 "got %d.\n", namelen, lockname, to, r); 1653 "got %d.\n", namelen, lockname, to, r);
1583 dlm_dump_lock_resources(dlm); 1654 spin_lock(&dlm->spinlock);
1655 spin_lock(&dlm->master_lock);
1656 if (dlm_find_mle(dlm, &mle, (char *)lockname,
1657 namelen)) {
1658 dlm_print_one_mle(mle);
1659 __dlm_put_mle(mle);
1660 }
1661 spin_unlock(&dlm->master_lock);
1662 spin_unlock(&dlm->spinlock);
1584 BUG(); 1663 BUG();
1585 } else if (r == EAGAIN) { 1664 } else if (r == EAGAIN) {
1586 mlog(0, "%.*s: node %u create mles on other " 1665 mlog(0, "%.*s: node %u create mles on other "
@@ -1612,7 +1691,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
1612 struct dlm_assert_master *assert = (struct dlm_assert_master *)msg->buf; 1691 struct dlm_assert_master *assert = (struct dlm_assert_master *)msg->buf;
1613 struct dlm_lock_resource *res = NULL; 1692 struct dlm_lock_resource *res = NULL;
1614 char *name; 1693 char *name;
1615 unsigned int namelen; 1694 unsigned int namelen, hash;
1616 u32 flags; 1695 u32 flags;
1617 int master_request = 0; 1696 int master_request = 0;
1618 int ret = 0; 1697 int ret = 0;
@@ -1622,6 +1701,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
1622 1701
1623 name = assert->name; 1702 name = assert->name;
1624 namelen = assert->namelen; 1703 namelen = assert->namelen;
1704 hash = dlm_lockid_hash(name, namelen);
1625 flags = be32_to_cpu(assert->flags); 1705 flags = be32_to_cpu(assert->flags);
1626 1706
1627 if (namelen > DLM_LOCKID_NAME_MAX) { 1707 if (namelen > DLM_LOCKID_NAME_MAX) {
@@ -1646,7 +1726,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
1646 if (bit >= O2NM_MAX_NODES) { 1726 if (bit >= O2NM_MAX_NODES) {
1647 /* not necessarily an error, though less likely. 1727 /* not necessarily an error, though less likely.
1648 * could be master just re-asserting. */ 1728 * could be master just re-asserting. */
1649 mlog(ML_ERROR, "no bits set in the maybe_map, but %u " 1729 mlog(0, "no bits set in the maybe_map, but %u "
1650 "is asserting! (%.*s)\n", assert->node_idx, 1730 "is asserting! (%.*s)\n", assert->node_idx,
1651 namelen, name); 1731 namelen, name);
1652 } else if (bit != assert->node_idx) { 1732 } else if (bit != assert->node_idx) {
@@ -1658,19 +1738,36 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
1658 * number winning the mastery will respond 1738 * number winning the mastery will respond
1659 * YES to mastery requests, but this node 1739 * YES to mastery requests, but this node
1660 * had no way of knowing. let it pass. */ 1740 * had no way of knowing. let it pass. */
1661 mlog(ML_ERROR, "%u is the lowest node, " 1741 mlog(0, "%u is the lowest node, "
1662 "%u is asserting. (%.*s) %u must " 1742 "%u is asserting. (%.*s) %u must "
1663 "have begun after %u won.\n", bit, 1743 "have begun after %u won.\n", bit,
1664 assert->node_idx, namelen, name, bit, 1744 assert->node_idx, namelen, name, bit,
1665 assert->node_idx); 1745 assert->node_idx);
1666 } 1746 }
1667 } 1747 }
1748 if (mle->type == DLM_MLE_MIGRATION) {
1749 if (flags & DLM_ASSERT_MASTER_MLE_CLEANUP) {
1750 mlog(0, "%s:%.*s: got cleanup assert"
1751 " from %u for migration\n",
1752 dlm->name, namelen, name,
1753 assert->node_idx);
1754 } else if (!(flags & DLM_ASSERT_MASTER_FINISH_MIGRATION)) {
1755 mlog(0, "%s:%.*s: got unrelated assert"
1756 " from %u for migration, ignoring\n",
1757 dlm->name, namelen, name,
1758 assert->node_idx);
1759 __dlm_put_mle(mle);
1760 spin_unlock(&dlm->master_lock);
1761 spin_unlock(&dlm->spinlock);
1762 goto done;
1763 }
1764 }
1668 } 1765 }
1669 spin_unlock(&dlm->master_lock); 1766 spin_unlock(&dlm->master_lock);
1670 1767
1671 /* ok everything checks out with the MLE 1768 /* ok everything checks out with the MLE
1672 * now check to see if there is a lockres */ 1769 * now check to see if there is a lockres */
1673 res = __dlm_lookup_lockres(dlm, name, namelen); 1770 res = __dlm_lookup_lockres(dlm, name, namelen, hash);
1674 if (res) { 1771 if (res) {
1675 spin_lock(&res->spinlock); 1772 spin_lock(&res->spinlock);
1676 if (res->state & DLM_LOCK_RES_RECOVERING) { 1773 if (res->state & DLM_LOCK_RES_RECOVERING) {
@@ -1679,7 +1776,8 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
1679 goto kill; 1776 goto kill;
1680 } 1777 }
1681 if (!mle) { 1778 if (!mle) {
1682 if (res->owner != assert->node_idx) { 1779 if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN &&
1780 res->owner != assert->node_idx) {
1683 mlog(ML_ERROR, "assert_master from " 1781 mlog(ML_ERROR, "assert_master from "
1684 "%u, but current owner is " 1782 "%u, but current owner is "
1685 "%u! (%.*s)\n", 1783 "%u! (%.*s)\n",
@@ -1732,6 +1830,7 @@ ok:
1732 if (mle) { 1830 if (mle) {
1733 int extra_ref = 0; 1831 int extra_ref = 0;
1734 int nn = -1; 1832 int nn = -1;
1833 int rr, err = 0;
1735 1834
1736 spin_lock(&mle->spinlock); 1835 spin_lock(&mle->spinlock);
1737 if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION) 1836 if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
@@ -1751,27 +1850,64 @@ ok:
1751 wake_up(&mle->wq); 1850 wake_up(&mle->wq);
1752 spin_unlock(&mle->spinlock); 1851 spin_unlock(&mle->spinlock);
1753 1852
1754 if (mle->type == DLM_MLE_MIGRATION && res) { 1853 if (res) {
1755 mlog(0, "finishing off migration of lockres %.*s, "
1756 "from %u to %u\n",
1757 res->lockname.len, res->lockname.name,
1758 dlm->node_num, mle->new_master);
1759 spin_lock(&res->spinlock); 1854 spin_lock(&res->spinlock);
1760 res->state &= ~DLM_LOCK_RES_MIGRATING; 1855 if (mle->type == DLM_MLE_MIGRATION) {
1761 dlm_change_lockres_owner(dlm, res, mle->new_master); 1856 mlog(0, "finishing off migration of lockres %.*s, "
1762 BUG_ON(res->state & DLM_LOCK_RES_DIRTY); 1857 "from %u to %u\n",
1858 res->lockname.len, res->lockname.name,
1859 dlm->node_num, mle->new_master);
1860 res->state &= ~DLM_LOCK_RES_MIGRATING;
1861 dlm_change_lockres_owner(dlm, res, mle->new_master);
1862 BUG_ON(res->state & DLM_LOCK_RES_DIRTY);
1863 } else {
1864 dlm_change_lockres_owner(dlm, res, mle->master);
1865 }
1763 spin_unlock(&res->spinlock); 1866 spin_unlock(&res->spinlock);
1764 } 1867 }
1765 /* master is known, detach if not already detached */ 1868
1766 dlm_mle_detach_hb_events(dlm, mle); 1869 /* master is known, detach if not already detached.
1767 dlm_put_mle(mle); 1870 * ensures that only one assert_master call will happen
1768 1871 * on this mle. */
1872 spin_lock(&dlm->spinlock);
1873 spin_lock(&dlm->master_lock);
1874
1875 rr = atomic_read(&mle->mle_refs.refcount);
1876 if (mle->inuse > 0) {
1877 if (extra_ref && rr < 3)
1878 err = 1;
1879 else if (!extra_ref && rr < 2)
1880 err = 1;
1881 } else {
1882 if (extra_ref && rr < 2)
1883 err = 1;
1884 else if (!extra_ref && rr < 1)
1885 err = 1;
1886 }
1887 if (err) {
1888 mlog(ML_ERROR, "%s:%.*s: got assert master from %u "
1889 "that will mess up this node, refs=%d, extra=%d, "
1890 "inuse=%d\n", dlm->name, namelen, name,
1891 assert->node_idx, rr, extra_ref, mle->inuse);
1892 dlm_print_one_mle(mle);
1893 }
1894 list_del_init(&mle->list);
1895 __dlm_mle_detach_hb_events(dlm, mle);
1896 __dlm_put_mle(mle);
1769 if (extra_ref) { 1897 if (extra_ref) {
1770 /* the assert master message now balances the extra 1898 /* the assert master message now balances the extra
1771 * ref given by the master / migration request message. 1899 * ref given by the master / migration request message.
1772 * if this is the last put, it will be removed 1900 * if this is the last put, it will be removed
1773 * from the list. */ 1901 * from the list. */
1774 dlm_put_mle(mle); 1902 __dlm_put_mle(mle);
1903 }
1904 spin_unlock(&dlm->master_lock);
1905 spin_unlock(&dlm->spinlock);
1906 } else if (res) {
1907 if (res->owner != assert->node_idx) {
1908 mlog(0, "assert_master from %u, but current "
1909 "owner is %u (%.*s), no mle\n", assert->node_idx,
1910 res->owner, namelen, name);
1775 } 1911 }
1776 } 1912 }
1777 1913
@@ -1788,12 +1924,12 @@ done:
1788 1924
1789kill: 1925kill:
1790 /* kill the caller! */ 1926 /* kill the caller! */
1927 mlog(ML_ERROR, "Bad message received from another node. Dumping state "
1928 "and killing the other node now! This node is OK and can continue.\n");
1929 __dlm_print_one_lock_resource(res);
1791 spin_unlock(&res->spinlock); 1930 spin_unlock(&res->spinlock);
1792 spin_unlock(&dlm->spinlock); 1931 spin_unlock(&dlm->spinlock);
1793 dlm_lockres_put(res); 1932 dlm_lockres_put(res);
1794 mlog(ML_ERROR, "Bad message received from another node. Dumping state "
1795 "and killing the other node now! This node is OK and can continue.\n");
1796 dlm_dump_lock_resources(dlm);
1797 dlm_put(dlm); 1933 dlm_put(dlm);
1798 return -EINVAL; 1934 return -EINVAL;
1799} 1935}
@@ -1803,7 +1939,7 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
1803 int ignore_higher, u8 request_from, u32 flags) 1939 int ignore_higher, u8 request_from, u32 flags)
1804{ 1940{
1805 struct dlm_work_item *item; 1941 struct dlm_work_item *item;
1806 item = kcalloc(1, sizeof(*item), GFP_KERNEL); 1942 item = kcalloc(1, sizeof(*item), GFP_NOFS);
1807 if (!item) 1943 if (!item)
1808 return -ENOMEM; 1944 return -ENOMEM;
1809 1945
@@ -1825,7 +1961,7 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
1825 list_add_tail(&item->list, &dlm->work_list); 1961 list_add_tail(&item->list, &dlm->work_list);
1826 spin_unlock(&dlm->work_lock); 1962 spin_unlock(&dlm->work_lock);
1827 1963
1828 schedule_work(&dlm->dispatched_work); 1964 queue_work(dlm->dlm_worker, &dlm->dispatched_work);
1829 return 0; 1965 return 0;
1830} 1966}
1831 1967
@@ -1866,6 +2002,23 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
1866 } 2002 }
1867 } 2003 }
1868 2004
2005 /*
2006 * If we're migrating this lock to someone else, we are no
2007 * longer allowed to assert out own mastery. OTOH, we need to
2008 * prevent migration from starting while we're still asserting
2009 * our dominance. The reserved ast delays migration.
2010 */
2011 spin_lock(&res->spinlock);
2012 if (res->state & DLM_LOCK_RES_MIGRATING) {
2013 mlog(0, "Someone asked us to assert mastery, but we're "
2014 "in the middle of migration. Skipping assert, "
2015 "the new master will handle that.\n");
2016 spin_unlock(&res->spinlock);
2017 goto put;
2018 } else
2019 __dlm_lockres_reserve_ast(res);
2020 spin_unlock(&res->spinlock);
2021
1869 /* this call now finishes out the nodemap 2022 /* this call now finishes out the nodemap
1870 * even if one or more nodes die */ 2023 * even if one or more nodes die */
1871 mlog(0, "worker about to master %.*s here, this=%u\n", 2024 mlog(0, "worker about to master %.*s here, this=%u\n",
@@ -1875,9 +2028,14 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
1875 nodemap, flags); 2028 nodemap, flags);
1876 if (ret < 0) { 2029 if (ret < 0) {
1877 /* no need to restart, we are done */ 2030 /* no need to restart, we are done */
1878 mlog_errno(ret); 2031 if (!dlm_is_host_down(ret))
2032 mlog_errno(ret);
1879 } 2033 }
1880 2034
2035 /* Ok, we've asserted ourselves. Let's let migration start. */
2036 dlm_lockres_release_ast(dlm, res);
2037
2038put:
1881 dlm_lockres_put(res); 2039 dlm_lockres_put(res);
1882 2040
1883 mlog(0, "finished with dlm_assert_master_worker\n"); 2041 mlog(0, "finished with dlm_assert_master_worker\n");
@@ -1916,6 +2074,7 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
1916 BUG(); 2074 BUG();
1917 /* host is down, so answer for that node would be 2075 /* host is down, so answer for that node would be
1918 * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */ 2076 * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */
2077 ret = 0;
1919 } 2078 }
1920 2079
1921 if (master != DLM_LOCK_RES_OWNER_UNKNOWN) { 2080 if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
@@ -2016,14 +2175,14 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
2016 */ 2175 */
2017 2176
2018 ret = -ENOMEM; 2177 ret = -ENOMEM;
2019 mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_KERNEL); 2178 mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS);
2020 if (!mres) { 2179 if (!mres) {
2021 mlog_errno(ret); 2180 mlog_errno(ret);
2022 goto leave; 2181 goto leave;
2023 } 2182 }
2024 2183
2025 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, 2184 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache,
2026 GFP_KERNEL); 2185 GFP_NOFS);
2027 if (!mle) { 2186 if (!mle) {
2028 mlog_errno(ret); 2187 mlog_errno(ret);
2029 goto leave; 2188 goto leave;
@@ -2117,7 +2276,7 @@ fail:
2117 * take both dlm->spinlock and dlm->master_lock */ 2276 * take both dlm->spinlock and dlm->master_lock */
2118 spin_lock(&dlm->spinlock); 2277 spin_lock(&dlm->spinlock);
2119 spin_lock(&dlm->master_lock); 2278 spin_lock(&dlm->master_lock);
2120 dlm_get_mle(mle); 2279 dlm_get_mle_inuse(mle);
2121 spin_unlock(&dlm->master_lock); 2280 spin_unlock(&dlm->master_lock);
2122 spin_unlock(&dlm->spinlock); 2281 spin_unlock(&dlm->spinlock);
2123 2282
@@ -2134,7 +2293,10 @@ fail:
2134 /* migration failed, detach and clean up mle */ 2293 /* migration failed, detach and clean up mle */
2135 dlm_mle_detach_hb_events(dlm, mle); 2294 dlm_mle_detach_hb_events(dlm, mle);
2136 dlm_put_mle(mle); 2295 dlm_put_mle(mle);
2137 dlm_put_mle(mle); 2296 dlm_put_mle_inuse(mle);
2297 spin_lock(&res->spinlock);
2298 res->state &= ~DLM_LOCK_RES_MIGRATING;
2299 spin_unlock(&res->spinlock);
2138 goto leave; 2300 goto leave;
2139 } 2301 }
2140 2302
@@ -2164,8 +2326,8 @@ fail:
2164 /* avoid hang during shutdown when migrating lockres 2326 /* avoid hang during shutdown when migrating lockres
2165 * to a node which also goes down */ 2327 * to a node which also goes down */
2166 if (dlm_is_node_dead(dlm, target)) { 2328 if (dlm_is_node_dead(dlm, target)) {
2167 mlog(0, "%s:%.*s: expected migration target %u " 2329 mlog(0, "%s:%.*s: expected migration "
2168 "is no longer up. restarting.\n", 2330 "target %u is no longer up, restarting\n",
2169 dlm->name, res->lockname.len, 2331 dlm->name, res->lockname.len,
2170 res->lockname.name, target); 2332 res->lockname.name, target);
2171 ret = -ERESTARTSYS; 2333 ret = -ERESTARTSYS;
@@ -2175,7 +2337,10 @@ fail:
2175 /* migration failed, detach and clean up mle */ 2337 /* migration failed, detach and clean up mle */
2176 dlm_mle_detach_hb_events(dlm, mle); 2338 dlm_mle_detach_hb_events(dlm, mle);
2177 dlm_put_mle(mle); 2339 dlm_put_mle(mle);
2178 dlm_put_mle(mle); 2340 dlm_put_mle_inuse(mle);
2341 spin_lock(&res->spinlock);
2342 res->state &= ~DLM_LOCK_RES_MIGRATING;
2343 spin_unlock(&res->spinlock);
2179 goto leave; 2344 goto leave;
2180 } 2345 }
2181 /* TODO: if node died: stop, clean up, return error */ 2346 /* TODO: if node died: stop, clean up, return error */
@@ -2191,7 +2356,7 @@ fail:
2191 2356
2192 /* master is known, detach if not already detached */ 2357 /* master is known, detach if not already detached */
2193 dlm_mle_detach_hb_events(dlm, mle); 2358 dlm_mle_detach_hb_events(dlm, mle);
2194 dlm_put_mle(mle); 2359 dlm_put_mle_inuse(mle);
2195 ret = 0; 2360 ret = 0;
2196 2361
2197 dlm_lockres_calc_usage(dlm, res); 2362 dlm_lockres_calc_usage(dlm, res);
@@ -2462,7 +2627,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data)
2462 struct dlm_migrate_request *migrate = (struct dlm_migrate_request *) msg->buf; 2627 struct dlm_migrate_request *migrate = (struct dlm_migrate_request *) msg->buf;
2463 struct dlm_master_list_entry *mle = NULL, *oldmle = NULL; 2628 struct dlm_master_list_entry *mle = NULL, *oldmle = NULL;
2464 const char *name; 2629 const char *name;
2465 unsigned int namelen; 2630 unsigned int namelen, hash;
2466 int ret = 0; 2631 int ret = 0;
2467 2632
2468 if (!dlm_grab(dlm)) 2633 if (!dlm_grab(dlm))
@@ -2470,10 +2635,11 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data)
2470 2635
2471 name = migrate->name; 2636 name = migrate->name;
2472 namelen = migrate->namelen; 2637 namelen = migrate->namelen;
2638 hash = dlm_lockid_hash(name, namelen);
2473 2639
2474 /* preallocate.. if this fails, abort */ 2640 /* preallocate.. if this fails, abort */
2475 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, 2641 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache,
2476 GFP_KERNEL); 2642 GFP_NOFS);
2477 2643
2478 if (!mle) { 2644 if (!mle) {
2479 ret = -ENOMEM; 2645 ret = -ENOMEM;
@@ -2482,7 +2648,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data)
2482 2648
2483 /* check for pre-existing lock */ 2649 /* check for pre-existing lock */
2484 spin_lock(&dlm->spinlock); 2650 spin_lock(&dlm->spinlock);
2485 res = __dlm_lookup_lockres(dlm, name, namelen); 2651 res = __dlm_lookup_lockres(dlm, name, namelen, hash);
2486 spin_lock(&dlm->master_lock); 2652 spin_lock(&dlm->master_lock);
2487 2653
2488 if (res) { 2654 if (res) {
@@ -2580,6 +2746,7 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
2580 /* remove it from the list so that only one 2746 /* remove it from the list so that only one
2581 * mle will be found */ 2747 * mle will be found */
2582 list_del_init(&tmp->list); 2748 list_del_init(&tmp->list);
2749 __dlm_mle_detach_hb_events(dlm, mle);
2583 } 2750 }
2584 spin_unlock(&tmp->spinlock); 2751 spin_unlock(&tmp->spinlock);
2585 } 2752 }
@@ -2601,6 +2768,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
2601 struct list_head *iter, *iter2; 2768 struct list_head *iter, *iter2;
2602 struct dlm_master_list_entry *mle; 2769 struct dlm_master_list_entry *mle;
2603 struct dlm_lock_resource *res; 2770 struct dlm_lock_resource *res;
2771 unsigned int hash;
2604 2772
2605 mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node); 2773 mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node);
2606top: 2774top:
@@ -2640,7 +2808,7 @@ top:
2640 * may result in the mle being unlinked and 2808 * may result in the mle being unlinked and
2641 * freed, but there may still be a process 2809 * freed, but there may still be a process
2642 * waiting in the dlmlock path which is fine. */ 2810 * waiting in the dlmlock path which is fine. */
2643 mlog(ML_ERROR, "node %u was expected master\n", 2811 mlog(0, "node %u was expected master\n",
2644 dead_node); 2812 dead_node);
2645 atomic_set(&mle->woken, 1); 2813 atomic_set(&mle->woken, 1);
2646 spin_unlock(&mle->spinlock); 2814 spin_unlock(&mle->spinlock);
@@ -2673,19 +2841,21 @@ top:
2673 2841
2674 /* remove from the list early. NOTE: unlinking 2842 /* remove from the list early. NOTE: unlinking
2675 * list_head while in list_for_each_safe */ 2843 * list_head while in list_for_each_safe */
2844 __dlm_mle_detach_hb_events(dlm, mle);
2676 spin_lock(&mle->spinlock); 2845 spin_lock(&mle->spinlock);
2677 list_del_init(&mle->list); 2846 list_del_init(&mle->list);
2678 atomic_set(&mle->woken, 1); 2847 atomic_set(&mle->woken, 1);
2679 spin_unlock(&mle->spinlock); 2848 spin_unlock(&mle->spinlock);
2680 wake_up(&mle->wq); 2849 wake_up(&mle->wq);
2681 2850
2682 mlog(0, "node %u died during migration from " 2851 mlog(0, "%s: node %u died during migration from "
2683 "%u to %u!\n", dead_node, 2852 "%u to %u!\n", dlm->name, dead_node,
2684 mle->master, mle->new_master); 2853 mle->master, mle->new_master);
2685 /* if there is a lockres associated with this 2854 /* if there is a lockres associated with this
2686 * mle, find it and set its owner to UNKNOWN */ 2855 * mle, find it and set its owner to UNKNOWN */
2856 hash = dlm_lockid_hash(mle->u.name.name, mle->u.name.len);
2687 res = __dlm_lookup_lockres(dlm, mle->u.name.name, 2857 res = __dlm_lookup_lockres(dlm, mle->u.name.name,
2688 mle->u.name.len); 2858 mle->u.name.len, hash);
2689 if (res) { 2859 if (res) {
2690 /* unfortunately if we hit this rare case, our 2860 /* unfortunately if we hit this rare case, our
2691 * lock ordering is messed. we need to drop 2861 * lock ordering is messed. we need to drop
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 805cbabac051..29b2845f370d 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -98,8 +98,8 @@ static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data);
98 98
99static u64 dlm_get_next_mig_cookie(void); 99static u64 dlm_get_next_mig_cookie(void);
100 100
101static spinlock_t dlm_reco_state_lock = SPIN_LOCK_UNLOCKED; 101static DEFINE_SPINLOCK(dlm_reco_state_lock);
102static spinlock_t dlm_mig_cookie_lock = SPIN_LOCK_UNLOCKED; 102static DEFINE_SPINLOCK(dlm_mig_cookie_lock);
103static u64 dlm_mig_cookie = 1; 103static u64 dlm_mig_cookie = 1;
104 104
105static u64 dlm_get_next_mig_cookie(void) 105static u64 dlm_get_next_mig_cookie(void)
@@ -115,12 +115,37 @@ static u64 dlm_get_next_mig_cookie(void)
115 return c; 115 return c;
116} 116}
117 117
118static inline void dlm_set_reco_dead_node(struct dlm_ctxt *dlm,
119 u8 dead_node)
120{
121 assert_spin_locked(&dlm->spinlock);
122 if (dlm->reco.dead_node != dead_node)
123 mlog(0, "%s: changing dead_node from %u to %u\n",
124 dlm->name, dlm->reco.dead_node, dead_node);
125 dlm->reco.dead_node = dead_node;
126}
127
128static inline void dlm_set_reco_master(struct dlm_ctxt *dlm,
129 u8 master)
130{
131 assert_spin_locked(&dlm->spinlock);
132 mlog(0, "%s: changing new_master from %u to %u\n",
133 dlm->name, dlm->reco.new_master, master);
134 dlm->reco.new_master = master;
135}
136
137static inline void __dlm_reset_recovery(struct dlm_ctxt *dlm)
138{
139 assert_spin_locked(&dlm->spinlock);
140 clear_bit(dlm->reco.dead_node, dlm->recovery_map);
141 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
142 dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
143}
144
118static inline void dlm_reset_recovery(struct dlm_ctxt *dlm) 145static inline void dlm_reset_recovery(struct dlm_ctxt *dlm)
119{ 146{
120 spin_lock(&dlm->spinlock); 147 spin_lock(&dlm->spinlock);
121 clear_bit(dlm->reco.dead_node, dlm->recovery_map); 148 __dlm_reset_recovery(dlm);
122 dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
123 dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
124 spin_unlock(&dlm->spinlock); 149 spin_unlock(&dlm->spinlock);
125} 150}
126 151
@@ -132,12 +157,21 @@ void dlm_dispatch_work(void *data)
132 struct list_head *iter, *iter2; 157 struct list_head *iter, *iter2;
133 struct dlm_work_item *item; 158 struct dlm_work_item *item;
134 dlm_workfunc_t *workfunc; 159 dlm_workfunc_t *workfunc;
160 int tot=0;
161
162 if (!dlm_joined(dlm))
163 return;
135 164
136 spin_lock(&dlm->work_lock); 165 spin_lock(&dlm->work_lock);
137 list_splice_init(&dlm->work_list, &tmp_list); 166 list_splice_init(&dlm->work_list, &tmp_list);
138 spin_unlock(&dlm->work_lock); 167 spin_unlock(&dlm->work_lock);
139 168
140 list_for_each_safe(iter, iter2, &tmp_list) { 169 list_for_each_safe(iter, iter2, &tmp_list) {
170 tot++;
171 }
172 mlog(0, "%s: work thread has %d work items\n", dlm->name, tot);
173
174 list_for_each_safe(iter, iter2, &tmp_list) {
141 item = list_entry(iter, struct dlm_work_item, list); 175 item = list_entry(iter, struct dlm_work_item, list);
142 workfunc = item->func; 176 workfunc = item->func;
143 list_del_init(&item->list); 177 list_del_init(&item->list);
@@ -220,6 +254,52 @@ void dlm_complete_recovery_thread(struct dlm_ctxt *dlm)
220 * 254 *
221 */ 255 */
222 256
257static void dlm_print_reco_node_status(struct dlm_ctxt *dlm)
258{
259 struct dlm_reco_node_data *ndata;
260 struct dlm_lock_resource *res;
261
262 mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n",
263 dlm->name, dlm->dlm_reco_thread_task->pid,
264 dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive",
265 dlm->reco.dead_node, dlm->reco.new_master);
266
267 list_for_each_entry(ndata, &dlm->reco.node_data, list) {
268 char *st = "unknown";
269 switch (ndata->state) {
270 case DLM_RECO_NODE_DATA_INIT:
271 st = "init";
272 break;
273 case DLM_RECO_NODE_DATA_REQUESTING:
274 st = "requesting";
275 break;
276 case DLM_RECO_NODE_DATA_DEAD:
277 st = "dead";
278 break;
279 case DLM_RECO_NODE_DATA_RECEIVING:
280 st = "receiving";
281 break;
282 case DLM_RECO_NODE_DATA_REQUESTED:
283 st = "requested";
284 break;
285 case DLM_RECO_NODE_DATA_DONE:
286 st = "done";
287 break;
288 case DLM_RECO_NODE_DATA_FINALIZE_SENT:
289 st = "finalize-sent";
290 break;
291 default:
292 st = "bad";
293 break;
294 }
295 mlog(ML_NOTICE, "%s: reco state, node %u, state=%s\n",
296 dlm->name, ndata->node_num, st);
297 }
298 list_for_each_entry(res, &dlm->reco.resources, recovering) {
299 mlog(ML_NOTICE, "%s: lockres %.*s on recovering list\n",
300 dlm->name, res->lockname.len, res->lockname.name);
301 }
302}
223 303
224#define DLM_RECO_THREAD_TIMEOUT_MS (5 * 1000) 304#define DLM_RECO_THREAD_TIMEOUT_MS (5 * 1000)
225 305
@@ -267,11 +347,23 @@ int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node)
267{ 347{
268 int dead; 348 int dead;
269 spin_lock(&dlm->spinlock); 349 spin_lock(&dlm->spinlock);
270 dead = test_bit(node, dlm->domain_map); 350 dead = !test_bit(node, dlm->domain_map);
271 spin_unlock(&dlm->spinlock); 351 spin_unlock(&dlm->spinlock);
272 return dead; 352 return dead;
273} 353}
274 354
355/* returns true if node is no longer in the domain
356 * could be dead or just not joined */
357static int dlm_is_node_recovered(struct dlm_ctxt *dlm, u8 node)
358{
359 int recovered;
360 spin_lock(&dlm->spinlock);
361 recovered = !test_bit(node, dlm->recovery_map);
362 spin_unlock(&dlm->spinlock);
363 return recovered;
364}
365
366
275int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) 367int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
276{ 368{
277 if (timeout) { 369 if (timeout) {
@@ -290,6 +382,24 @@ int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
290 return 0; 382 return 0;
291} 383}
292 384
385int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout)
386{
387 if (timeout) {
388 mlog(0, "%s: waiting %dms for notification of "
389 "recovery of node %u\n", dlm->name, timeout, node);
390 wait_event_timeout(dlm->dlm_reco_thread_wq,
391 dlm_is_node_recovered(dlm, node),
392 msecs_to_jiffies(timeout));
393 } else {
394 mlog(0, "%s: waiting indefinitely for notification "
395 "of recovery of node %u\n", dlm->name, node);
396 wait_event(dlm->dlm_reco_thread_wq,
397 dlm_is_node_recovered(dlm, node));
398 }
399 /* for now, return 0 */
400 return 0;
401}
402
293/* callers of the top-level api calls (dlmlock/dlmunlock) should 403/* callers of the top-level api calls (dlmlock/dlmunlock) should
294 * block on the dlm->reco.event when recovery is in progress. 404 * block on the dlm->reco.event when recovery is in progress.
295 * the dlm recovery thread will set this state when it begins 405 * the dlm recovery thread will set this state when it begins
@@ -308,6 +418,13 @@ static int dlm_in_recovery(struct dlm_ctxt *dlm)
308 418
309void dlm_wait_for_recovery(struct dlm_ctxt *dlm) 419void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
310{ 420{
421 if (dlm_in_recovery(dlm)) {
422 mlog(0, "%s: reco thread %d in recovery: "
423 "state=%d, master=%u, dead=%u\n",
424 dlm->name, dlm->dlm_reco_thread_task->pid,
425 dlm->reco.state, dlm->reco.new_master,
426 dlm->reco.dead_node);
427 }
311 wait_event(dlm->reco.event, !dlm_in_recovery(dlm)); 428 wait_event(dlm->reco.event, !dlm_in_recovery(dlm));
312} 429}
313 430
@@ -341,7 +458,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
341 mlog(0, "new master %u died while recovering %u!\n", 458 mlog(0, "new master %u died while recovering %u!\n",
342 dlm->reco.new_master, dlm->reco.dead_node); 459 dlm->reco.new_master, dlm->reco.dead_node);
343 /* unset the new_master, leave dead_node */ 460 /* unset the new_master, leave dead_node */
344 dlm->reco.new_master = O2NM_INVALID_NODE_NUM; 461 dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
345 } 462 }
346 463
347 /* select a target to recover */ 464 /* select a target to recover */
@@ -350,14 +467,14 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
350 467
351 bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0); 468 bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0);
352 if (bit >= O2NM_MAX_NODES || bit < 0) 469 if (bit >= O2NM_MAX_NODES || bit < 0)
353 dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; 470 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
354 else 471 else
355 dlm->reco.dead_node = bit; 472 dlm_set_reco_dead_node(dlm, bit);
356 } else if (!test_bit(dlm->reco.dead_node, dlm->recovery_map)) { 473 } else if (!test_bit(dlm->reco.dead_node, dlm->recovery_map)) {
357 /* BUG? */ 474 /* BUG? */
358 mlog(ML_ERROR, "dead_node %u no longer in recovery map!\n", 475 mlog(ML_ERROR, "dead_node %u no longer in recovery map!\n",
359 dlm->reco.dead_node); 476 dlm->reco.dead_node);
360 dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; 477 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
361 } 478 }
362 479
363 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { 480 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
@@ -366,7 +483,8 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
366 /* return to main thread loop and sleep. */ 483 /* return to main thread loop and sleep. */
367 return 0; 484 return 0;
368 } 485 }
369 mlog(0, "recovery thread found node %u in the recovery map!\n", 486 mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
487 dlm->name, dlm->dlm_reco_thread_task->pid,
370 dlm->reco.dead_node); 488 dlm->reco.dead_node);
371 spin_unlock(&dlm->spinlock); 489 spin_unlock(&dlm->spinlock);
372 490
@@ -389,8 +507,8 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
389 } 507 }
390 mlog(0, "another node will master this recovery session.\n"); 508 mlog(0, "another node will master this recovery session.\n");
391 } 509 }
392 mlog(0, "dlm=%s, new_master=%u, this node=%u, dead_node=%u\n", 510 mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n",
393 dlm->name, dlm->reco.new_master, 511 dlm->name, dlm->dlm_reco_thread_task->pid, dlm->reco.new_master,
394 dlm->node_num, dlm->reco.dead_node); 512 dlm->node_num, dlm->reco.dead_node);
395 513
396 /* it is safe to start everything back up here 514 /* it is safe to start everything back up here
@@ -402,11 +520,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
402 return 0; 520 return 0;
403 521
404master_here: 522master_here:
405 mlog(0, "mastering recovery of %s:%u here(this=%u)!\n", 523 mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n",
524 dlm->dlm_reco_thread_task->pid,
406 dlm->name, dlm->reco.dead_node, dlm->node_num); 525 dlm->name, dlm->reco.dead_node, dlm->node_num);
407 526
408 status = dlm_remaster_locks(dlm, dlm->reco.dead_node); 527 status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
409 if (status < 0) { 528 if (status < 0) {
529 /* we should never hit this anymore */
410 mlog(ML_ERROR, "error %d remastering locks for node %u, " 530 mlog(ML_ERROR, "error %d remastering locks for node %u, "
411 "retrying.\n", status, dlm->reco.dead_node); 531 "retrying.\n", status, dlm->reco.dead_node);
412 /* yield a bit to allow any final network messages 532 /* yield a bit to allow any final network messages
@@ -433,9 +553,16 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
433 int destroy = 0; 553 int destroy = 0;
434 int pass = 0; 554 int pass = 0;
435 555
436 status = dlm_init_recovery_area(dlm, dead_node); 556 do {
437 if (status < 0) 557 /* we have become recovery master. there is no escaping
438 goto leave; 558 * this, so just keep trying until we get it. */
559 status = dlm_init_recovery_area(dlm, dead_node);
560 if (status < 0) {
561 mlog(ML_ERROR, "%s: failed to alloc recovery area, "
562 "retrying\n", dlm->name);
563 msleep(1000);
564 }
565 } while (status != 0);
439 566
440 /* safe to access the node data list without a lock, since this 567 /* safe to access the node data list without a lock, since this
441 * process is the only one to change the list */ 568 * process is the only one to change the list */
@@ -452,16 +579,36 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
452 continue; 579 continue;
453 } 580 }
454 581
455 status = dlm_request_all_locks(dlm, ndata->node_num, dead_node); 582 do {
456 if (status < 0) { 583 status = dlm_request_all_locks(dlm, ndata->node_num,
457 mlog_errno(status); 584 dead_node);
458 if (dlm_is_host_down(status)) 585 if (status < 0) {
459 ndata->state = DLM_RECO_NODE_DATA_DEAD; 586 mlog_errno(status);
460 else { 587 if (dlm_is_host_down(status)) {
461 destroy = 1; 588 /* node died, ignore it for recovery */
462 goto leave; 589 status = 0;
590 ndata->state = DLM_RECO_NODE_DATA_DEAD;
591 /* wait for the domain map to catch up
592 * with the network state. */
593 wait_event_timeout(dlm->dlm_reco_thread_wq,
594 dlm_is_node_dead(dlm,
595 ndata->node_num),
596 msecs_to_jiffies(1000));
597 mlog(0, "waited 1 sec for %u, "
598 "dead? %s\n", ndata->node_num,
599 dlm_is_node_dead(dlm, ndata->node_num) ?
600 "yes" : "no");
601 } else {
602 /* -ENOMEM on the other node */
603 mlog(0, "%s: node %u returned "
604 "%d during recovery, retrying "
605 "after a short wait\n",
606 dlm->name, ndata->node_num,
607 status);
608 msleep(100);
609 }
463 } 610 }
464 } 611 } while (status != 0);
465 612
466 switch (ndata->state) { 613 switch (ndata->state) {
467 case DLM_RECO_NODE_DATA_INIT: 614 case DLM_RECO_NODE_DATA_INIT:
@@ -473,10 +620,9 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
473 mlog(0, "node %u died after requesting " 620 mlog(0, "node %u died after requesting "
474 "recovery info for node %u\n", 621 "recovery info for node %u\n",
475 ndata->node_num, dead_node); 622 ndata->node_num, dead_node);
476 // start all over 623 /* fine. don't need this node's info.
477 destroy = 1; 624 * continue without it. */
478 status = -EAGAIN; 625 break;
479 goto leave;
480 case DLM_RECO_NODE_DATA_REQUESTING: 626 case DLM_RECO_NODE_DATA_REQUESTING:
481 ndata->state = DLM_RECO_NODE_DATA_REQUESTED; 627 ndata->state = DLM_RECO_NODE_DATA_REQUESTED;
482 mlog(0, "now receiving recovery data from " 628 mlog(0, "now receiving recovery data from "
@@ -520,35 +666,26 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
520 BUG(); 666 BUG();
521 break; 667 break;
522 case DLM_RECO_NODE_DATA_DEAD: 668 case DLM_RECO_NODE_DATA_DEAD:
523 mlog(ML_NOTICE, "node %u died after " 669 mlog(0, "node %u died after "
524 "requesting recovery info for " 670 "requesting recovery info for "
525 "node %u\n", ndata->node_num, 671 "node %u\n", ndata->node_num,
526 dead_node); 672 dead_node);
527 spin_unlock(&dlm_reco_state_lock); 673 break;
528 // start all over
529 destroy = 1;
530 status = -EAGAIN;
531 /* instead of spinning like crazy here,
532 * wait for the domain map to catch up
533 * with the network state. otherwise this
534 * can be hit hundreds of times before
535 * the node is really seen as dead. */
536 wait_event_timeout(dlm->dlm_reco_thread_wq,
537 dlm_is_node_dead(dlm,
538 ndata->node_num),
539 msecs_to_jiffies(1000));
540 mlog(0, "waited 1 sec for %u, "
541 "dead? %s\n", ndata->node_num,
542 dlm_is_node_dead(dlm, ndata->node_num) ?
543 "yes" : "no");
544 goto leave;
545 case DLM_RECO_NODE_DATA_RECEIVING: 674 case DLM_RECO_NODE_DATA_RECEIVING:
546 case DLM_RECO_NODE_DATA_REQUESTED: 675 case DLM_RECO_NODE_DATA_REQUESTED:
676 mlog(0, "%s: node %u still in state %s\n",
677 dlm->name, ndata->node_num,
678 ndata->state==DLM_RECO_NODE_DATA_RECEIVING ?
679 "receiving" : "requested");
547 all_nodes_done = 0; 680 all_nodes_done = 0;
548 break; 681 break;
549 case DLM_RECO_NODE_DATA_DONE: 682 case DLM_RECO_NODE_DATA_DONE:
683 mlog(0, "%s: node %u state is done\n",
684 dlm->name, ndata->node_num);
550 break; 685 break;
551 case DLM_RECO_NODE_DATA_FINALIZE_SENT: 686 case DLM_RECO_NODE_DATA_FINALIZE_SENT:
687 mlog(0, "%s: node %u state is finalize\n",
688 dlm->name, ndata->node_num);
552 break; 689 break;
553 } 690 }
554 } 691 }
@@ -578,7 +715,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
578 jiffies, dlm->reco.dead_node, 715 jiffies, dlm->reco.dead_node,
579 dlm->node_num, dlm->reco.new_master); 716 dlm->node_num, dlm->reco.new_master);
580 destroy = 1; 717 destroy = 1;
581 status = ret; 718 status = 0;
582 /* rescan everything marked dirty along the way */ 719 /* rescan everything marked dirty along the way */
583 dlm_kick_thread(dlm, NULL); 720 dlm_kick_thread(dlm, NULL);
584 break; 721 break;
@@ -591,7 +728,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
591 728
592 } 729 }
593 730
594leave:
595 if (destroy) 731 if (destroy)
596 dlm_destroy_recovery_area(dlm, dead_node); 732 dlm_destroy_recovery_area(dlm, dead_node);
597 733
@@ -617,7 +753,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
617 } 753 }
618 BUG_ON(num == dead_node); 754 BUG_ON(num == dead_node);
619 755
620 ndata = kcalloc(1, sizeof(*ndata), GFP_KERNEL); 756 ndata = kcalloc(1, sizeof(*ndata), GFP_NOFS);
621 if (!ndata) { 757 if (!ndata) {
622 dlm_destroy_recovery_area(dlm, dead_node); 758 dlm_destroy_recovery_area(dlm, dead_node);
623 return -ENOMEM; 759 return -ENOMEM;
@@ -691,16 +827,25 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data)
691 if (!dlm_grab(dlm)) 827 if (!dlm_grab(dlm))
692 return -EINVAL; 828 return -EINVAL;
693 829
830 if (lr->dead_node != dlm->reco.dead_node) {
831 mlog(ML_ERROR, "%s: node %u sent dead_node=%u, but local "
832 "dead_node is %u\n", dlm->name, lr->node_idx,
833 lr->dead_node, dlm->reco.dead_node);
834 dlm_print_reco_node_status(dlm);
835 /* this is a hack */
836 dlm_put(dlm);
837 return -ENOMEM;
838 }
694 BUG_ON(lr->dead_node != dlm->reco.dead_node); 839 BUG_ON(lr->dead_node != dlm->reco.dead_node);
695 840
696 item = kcalloc(1, sizeof(*item), GFP_KERNEL); 841 item = kcalloc(1, sizeof(*item), GFP_NOFS);
697 if (!item) { 842 if (!item) {
698 dlm_put(dlm); 843 dlm_put(dlm);
699 return -ENOMEM; 844 return -ENOMEM;
700 } 845 }
701 846
702 /* this will get freed by dlm_request_all_locks_worker */ 847 /* this will get freed by dlm_request_all_locks_worker */
703 buf = (char *) __get_free_page(GFP_KERNEL); 848 buf = (char *) __get_free_page(GFP_NOFS);
704 if (!buf) { 849 if (!buf) {
705 kfree(item); 850 kfree(item);
706 dlm_put(dlm); 851 dlm_put(dlm);
@@ -715,7 +860,7 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data)
715 spin_lock(&dlm->work_lock); 860 spin_lock(&dlm->work_lock);
716 list_add_tail(&item->list, &dlm->work_list); 861 list_add_tail(&item->list, &dlm->work_list);
717 spin_unlock(&dlm->work_lock); 862 spin_unlock(&dlm->work_lock);
718 schedule_work(&dlm->dispatched_work); 863 queue_work(dlm->dlm_worker, &dlm->dispatched_work);
719 864
720 dlm_put(dlm); 865 dlm_put(dlm);
721 return 0; 866 return 0;
@@ -730,32 +875,34 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
730 struct list_head *iter; 875 struct list_head *iter;
731 int ret; 876 int ret;
732 u8 dead_node, reco_master; 877 u8 dead_node, reco_master;
878 int skip_all_done = 0;
733 879
734 dlm = item->dlm; 880 dlm = item->dlm;
735 dead_node = item->u.ral.dead_node; 881 dead_node = item->u.ral.dead_node;
736 reco_master = item->u.ral.reco_master; 882 reco_master = item->u.ral.reco_master;
737 mres = (struct dlm_migratable_lockres *)data; 883 mres = (struct dlm_migratable_lockres *)data;
738 884
885 mlog(0, "%s: recovery worker started, dead=%u, master=%u\n",
886 dlm->name, dead_node, reco_master);
887
739 if (dead_node != dlm->reco.dead_node || 888 if (dead_node != dlm->reco.dead_node ||
740 reco_master != dlm->reco.new_master) { 889 reco_master != dlm->reco.new_master) {
741 /* show extra debug info if the recovery state is messed */ 890 /* worker could have been created before the recovery master
742 mlog(ML_ERROR, "%s: bad reco state: reco(dead=%u, master=%u), " 891 * died. if so, do not continue, but do not error. */
743 "request(dead=%u, master=%u)\n", 892 if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) {
744 dlm->name, dlm->reco.dead_node, dlm->reco.new_master, 893 mlog(ML_NOTICE, "%s: will not send recovery state, "
745 dead_node, reco_master); 894 "recovery master %u died, thread=(dead=%u,mas=%u)"
746 mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u " 895 " current=(dead=%u,mas=%u)\n", dlm->name,
747 "entry[0]={c=%u:%llu,l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n", 896 reco_master, dead_node, reco_master,
748 dlm->name, mres->lockname_len, mres->lockname, mres->master, 897 dlm->reco.dead_node, dlm->reco.new_master);
749 mres->num_locks, mres->total_locks, mres->flags, 898 } else {
750 dlm_get_lock_cookie_node(mres->ml[0].cookie), 899 mlog(ML_NOTICE, "%s: reco state invalid: reco(dead=%u, "
751 dlm_get_lock_cookie_seq(mres->ml[0].cookie), 900 "master=%u), request(dead=%u, master=%u)\n",
752 mres->ml[0].list, mres->ml[0].flags, 901 dlm->name, dlm->reco.dead_node,
753 mres->ml[0].type, mres->ml[0].convert_type, 902 dlm->reco.new_master, dead_node, reco_master);
754 mres->ml[0].highest_blocked, mres->ml[0].node); 903 }
755 BUG(); 904 goto leave;
756 } 905 }
757 BUG_ON(dead_node != dlm->reco.dead_node);
758 BUG_ON(reco_master != dlm->reco.new_master);
759 906
760 /* lock resources should have already been moved to the 907 /* lock resources should have already been moved to the
761 * dlm->reco.resources list. now move items from that list 908 * dlm->reco.resources list. now move items from that list
@@ -766,12 +913,20 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
766 dlm_move_reco_locks_to_list(dlm, &resources, dead_node); 913 dlm_move_reco_locks_to_list(dlm, &resources, dead_node);
767 914
768 /* now we can begin blasting lockreses without the dlm lock */ 915 /* now we can begin blasting lockreses without the dlm lock */
916
917 /* any errors returned will be due to the new_master dying,
918 * the dlm_reco_thread should detect this */
769 list_for_each(iter, &resources) { 919 list_for_each(iter, &resources) {
770 res = list_entry (iter, struct dlm_lock_resource, recovering); 920 res = list_entry (iter, struct dlm_lock_resource, recovering);
771 ret = dlm_send_one_lockres(dlm, res, mres, reco_master, 921 ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
772 DLM_MRES_RECOVERY); 922 DLM_MRES_RECOVERY);
773 if (ret < 0) 923 if (ret < 0) {
774 mlog_errno(ret); 924 mlog(ML_ERROR, "%s: node %u went down while sending "
925 "recovery state for dead node %u, ret=%d\n", dlm->name,
926 reco_master, dead_node, ret);
927 skip_all_done = 1;
928 break;
929 }
775 } 930 }
776 931
777 /* move the resources back to the list */ 932 /* move the resources back to the list */
@@ -779,10 +934,15 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
779 list_splice_init(&resources, &dlm->reco.resources); 934 list_splice_init(&resources, &dlm->reco.resources);
780 spin_unlock(&dlm->spinlock); 935 spin_unlock(&dlm->spinlock);
781 936
782 ret = dlm_send_all_done_msg(dlm, dead_node, reco_master); 937 if (!skip_all_done) {
783 if (ret < 0) 938 ret = dlm_send_all_done_msg(dlm, dead_node, reco_master);
784 mlog_errno(ret); 939 if (ret < 0) {
785 940 mlog(ML_ERROR, "%s: node %u went down while sending "
941 "recovery all-done for dead node %u, ret=%d\n",
942 dlm->name, reco_master, dead_node, ret);
943 }
944 }
945leave:
786 free_page((unsigned long)data); 946 free_page((unsigned long)data);
787} 947}
788 948
@@ -801,8 +961,14 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
801 961
802 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, 962 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
803 sizeof(done_msg), send_to, &tmpret); 963 sizeof(done_msg), send_to, &tmpret);
804 /* negative status is ignored by the caller */ 964 if (ret < 0) {
805 if (ret >= 0) 965 if (!dlm_is_host_down(ret)) {
966 mlog_errno(ret);
967 mlog(ML_ERROR, "%s: unknown error sending data-done "
968 "to %u\n", dlm->name, send_to);
969 BUG();
970 }
971 } else
806 ret = tmpret; 972 ret = tmpret;
807 return ret; 973 return ret;
808} 974}
@@ -822,7 +988,11 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data)
822 mlog(0, "got DATA DONE: dead_node=%u, reco.dead_node=%u, " 988 mlog(0, "got DATA DONE: dead_node=%u, reco.dead_node=%u, "
823 "node_idx=%u, this node=%u\n", done->dead_node, 989 "node_idx=%u, this node=%u\n", done->dead_node,
824 dlm->reco.dead_node, done->node_idx, dlm->node_num); 990 dlm->reco.dead_node, done->node_idx, dlm->node_num);
825 BUG_ON(done->dead_node != dlm->reco.dead_node); 991
992 mlog_bug_on_msg((done->dead_node != dlm->reco.dead_node),
993 "Got DATA DONE: dead_node=%u, reco.dead_node=%u, "
994 "node_idx=%u, this node=%u\n", done->dead_node,
995 dlm->reco.dead_node, done->node_idx, dlm->node_num);
826 996
827 spin_lock(&dlm_reco_state_lock); 997 spin_lock(&dlm_reco_state_lock);
828 list_for_each(iter, &dlm->reco.node_data) { 998 list_for_each(iter, &dlm->reco.node_data) {
@@ -905,13 +1075,11 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
905 mlog(0, "found lockres owned by dead node while " 1075 mlog(0, "found lockres owned by dead node while "
906 "doing recovery for node %u. sending it.\n", 1076 "doing recovery for node %u. sending it.\n",
907 dead_node); 1077 dead_node);
908 list_del_init(&res->recovering); 1078 list_move_tail(&res->recovering, list);
909 list_add_tail(&res->recovering, list);
910 } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 1079 } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
911 mlog(0, "found UNKNOWN owner while doing recovery " 1080 mlog(0, "found UNKNOWN owner while doing recovery "
912 "for node %u. sending it.\n", dead_node); 1081 "for node %u. sending it.\n", dead_node);
913 list_del_init(&res->recovering); 1082 list_move_tail(&res->recovering, list);
914 list_add_tail(&res->recovering, list);
915 } 1083 }
916 } 1084 }
917 spin_unlock(&dlm->spinlock); 1085 spin_unlock(&dlm->spinlock);
@@ -1023,8 +1191,9 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
1023 ml->type == LKM_PRMODE) { 1191 ml->type == LKM_PRMODE) {
1024 /* if it is already set, this had better be a PR 1192 /* if it is already set, this had better be a PR
1025 * and it has to match */ 1193 * and it has to match */
1026 if (mres->lvb[0] && (ml->type == LKM_EXMODE || 1194 if (!dlm_lvb_is_empty(mres->lvb) &&
1027 memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) { 1195 (ml->type == LKM_EXMODE ||
1196 memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
1028 mlog(ML_ERROR, "mismatched lvbs!\n"); 1197 mlog(ML_ERROR, "mismatched lvbs!\n");
1029 __dlm_print_one_lock_resource(lock->lockres); 1198 __dlm_print_one_lock_resource(lock->lockres);
1030 BUG(); 1199 BUG();
@@ -1083,22 +1252,25 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1083 * we must send it immediately. */ 1252 * we must send it immediately. */
1084 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, 1253 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to,
1085 res, total_locks); 1254 res, total_locks);
1086 if (ret < 0) { 1255 if (ret < 0)
1087 // TODO 1256 goto error;
1088 mlog(ML_ERROR, "dlm_send_mig_lockres_msg "
1089 "returned %d, TODO\n", ret);
1090 BUG();
1091 }
1092 } 1257 }
1093 } 1258 }
1094 /* flush any remaining locks */ 1259 /* flush any remaining locks */
1095 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks); 1260 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks);
1096 if (ret < 0) { 1261 if (ret < 0)
1097 // TODO 1262 goto error;
1098 mlog(ML_ERROR, "dlm_send_mig_lockres_msg returned %d, " 1263 return ret;
1099 "TODO\n", ret); 1264
1265error:
1266 mlog(ML_ERROR, "%s: dlm_send_mig_lockres_msg returned %d\n",
1267 dlm->name, ret);
1268 if (!dlm_is_host_down(ret))
1100 BUG(); 1269 BUG();
1101 } 1270 mlog(0, "%s: node %u went down while sending %s "
1271 "lockres %.*s\n", dlm->name, send_to,
1272 flags & DLM_MRES_RECOVERY ? "recovery" : "migration",
1273 res->lockname.len, res->lockname.name);
1102 return ret; 1274 return ret;
1103} 1275}
1104 1276
@@ -1146,8 +1318,8 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
1146 mlog(0, "all done flag. all lockres data received!\n"); 1318 mlog(0, "all done flag. all lockres data received!\n");
1147 1319
1148 ret = -ENOMEM; 1320 ret = -ENOMEM;
1149 buf = kmalloc(be16_to_cpu(msg->data_len), GFP_KERNEL); 1321 buf = kmalloc(be16_to_cpu(msg->data_len), GFP_NOFS);
1150 item = kcalloc(1, sizeof(*item), GFP_KERNEL); 1322 item = kcalloc(1, sizeof(*item), GFP_NOFS);
1151 if (!buf || !item) 1323 if (!buf || !item)
1152 goto leave; 1324 goto leave;
1153 1325
@@ -1238,7 +1410,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
1238 spin_lock(&dlm->work_lock); 1410 spin_lock(&dlm->work_lock);
1239 list_add_tail(&item->list, &dlm->work_list); 1411 list_add_tail(&item->list, &dlm->work_list);
1240 spin_unlock(&dlm->work_lock); 1412 spin_unlock(&dlm->work_lock);
1241 schedule_work(&dlm->dispatched_work); 1413 queue_work(dlm->dlm_worker, &dlm->dispatched_work);
1242 1414
1243leave: 1415leave:
1244 dlm_put(dlm); 1416 dlm_put(dlm);
@@ -1406,6 +1578,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data)
1406 struct dlm_ctxt *dlm = data; 1578 struct dlm_ctxt *dlm = data;
1407 struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf; 1579 struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf;
1408 struct dlm_lock_resource *res = NULL; 1580 struct dlm_lock_resource *res = NULL;
1581 unsigned int hash;
1409 int master = DLM_LOCK_RES_OWNER_UNKNOWN; 1582 int master = DLM_LOCK_RES_OWNER_UNKNOWN;
1410 u32 flags = DLM_ASSERT_MASTER_REQUERY; 1583 u32 flags = DLM_ASSERT_MASTER_REQUERY;
1411 1584
@@ -1415,8 +1588,10 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data)
1415 return master; 1588 return master;
1416 } 1589 }
1417 1590
1591 hash = dlm_lockid_hash(req->name, req->namelen);
1592
1418 spin_lock(&dlm->spinlock); 1593 spin_lock(&dlm->spinlock);
1419 res = __dlm_lookup_lockres(dlm, req->name, req->namelen); 1594 res = __dlm_lookup_lockres(dlm, req->name, req->namelen, hash);
1420 if (res) { 1595 if (res) {
1421 spin_lock(&res->spinlock); 1596 spin_lock(&res->spinlock);
1422 master = res->owner; 1597 master = res->owner;
@@ -1483,7 +1658,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1483 struct dlm_lock *newlock = NULL; 1658 struct dlm_lock *newlock = NULL;
1484 struct dlm_lockstatus *lksb = NULL; 1659 struct dlm_lockstatus *lksb = NULL;
1485 int ret = 0; 1660 int ret = 0;
1486 int i; 1661 int i, bad;
1487 struct list_head *iter; 1662 struct list_head *iter;
1488 struct dlm_lock *lock = NULL; 1663 struct dlm_lock *lock = NULL;
1489 1664
@@ -1529,8 +1704,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1529 1704
1530 /* move the lock to its proper place */ 1705 /* move the lock to its proper place */
1531 /* do not alter lock refcount. switching lists. */ 1706 /* do not alter lock refcount. switching lists. */
1532 list_del_init(&lock->list); 1707 list_move_tail(&lock->list, queue);
1533 list_add_tail(&lock->list, queue);
1534 spin_unlock(&res->spinlock); 1708 spin_unlock(&res->spinlock);
1535 1709
1536 mlog(0, "just reordered a local lock!\n"); 1710 mlog(0, "just reordered a local lock!\n");
@@ -1553,28 +1727,48 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1553 } 1727 }
1554 lksb->flags |= (ml->flags & 1728 lksb->flags |= (ml->flags &
1555 (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB)); 1729 (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB));
1556 1730
1557 if (mres->lvb[0]) { 1731 if (ml->type == LKM_NLMODE)
1732 goto skip_lvb;
1733
1734 if (!dlm_lvb_is_empty(mres->lvb)) {
1558 if (lksb->flags & DLM_LKSB_PUT_LVB) { 1735 if (lksb->flags & DLM_LKSB_PUT_LVB) {
1559 /* other node was trying to update 1736 /* other node was trying to update
1560 * lvb when node died. recreate the 1737 * lvb when node died. recreate the
1561 * lksb with the updated lvb. */ 1738 * lksb with the updated lvb. */
1562 memcpy(lksb->lvb, mres->lvb, DLM_LVB_LEN); 1739 memcpy(lksb->lvb, mres->lvb, DLM_LVB_LEN);
1740 /* the lock resource lvb update must happen
1741 * NOW, before the spinlock is dropped.
1742 * we no longer wait for the AST to update
1743 * the lvb. */
1744 memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
1563 } else { 1745 } else {
1564 /* otherwise, the node is sending its 1746 /* otherwise, the node is sending its
1565 * most recent valid lvb info */ 1747 * most recent valid lvb info */
1566 BUG_ON(ml->type != LKM_EXMODE && 1748 BUG_ON(ml->type != LKM_EXMODE &&
1567 ml->type != LKM_PRMODE); 1749 ml->type != LKM_PRMODE);
1568 if (res->lvb[0] && (ml->type == LKM_EXMODE || 1750 if (!dlm_lvb_is_empty(res->lvb) &&
1569 memcmp(res->lvb, mres->lvb, DLM_LVB_LEN))) { 1751 (ml->type == LKM_EXMODE ||
1570 mlog(ML_ERROR, "received bad lvb!\n"); 1752 memcmp(res->lvb, mres->lvb, DLM_LVB_LEN))) {
1571 __dlm_print_one_lock_resource(res); 1753 int i;
1572 BUG(); 1754 mlog(ML_ERROR, "%s:%.*s: received bad "
1755 "lvb! type=%d\n", dlm->name,
1756 res->lockname.len,
1757 res->lockname.name, ml->type);
1758 printk("lockres lvb=[");
1759 for (i=0; i<DLM_LVB_LEN; i++)
1760 printk("%02x", res->lvb[i]);
1761 printk("]\nmigrated lvb=[");
1762 for (i=0; i<DLM_LVB_LEN; i++)
1763 printk("%02x", mres->lvb[i]);
1764 printk("]\n");
1765 dlm_print_one_lock_resource(res);
1766 BUG();
1573 } 1767 }
1574 memcpy(res->lvb, mres->lvb, DLM_LVB_LEN); 1768 memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
1575 } 1769 }
1576 } 1770 }
1577 1771skip_lvb:
1578 1772
1579 /* NOTE: 1773 /* NOTE:
1580 * wrt lock queue ordering and recovery: 1774 * wrt lock queue ordering and recovery:
@@ -1592,9 +1786,33 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1592 * relative to each other, but clearly *not* 1786 * relative to each other, but clearly *not*
1593 * preserved relative to locks from other nodes. 1787 * preserved relative to locks from other nodes.
1594 */ 1788 */
1789 bad = 0;
1595 spin_lock(&res->spinlock); 1790 spin_lock(&res->spinlock);
1596 dlm_lock_get(newlock); 1791 list_for_each_entry(lock, queue, list) {
1597 list_add_tail(&newlock->list, queue); 1792 if (lock->ml.cookie == ml->cookie) {
1793 u64 c = lock->ml.cookie;
1794 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
1795 "exists on this lockres!\n", dlm->name,
1796 res->lockname.len, res->lockname.name,
1797 dlm_get_lock_cookie_node(c),
1798 dlm_get_lock_cookie_seq(c));
1799
1800 mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, "
1801 "node=%u, cookie=%u:%llu, queue=%d\n",
1802 ml->type, ml->convert_type, ml->node,
1803 dlm_get_lock_cookie_node(ml->cookie),
1804 dlm_get_lock_cookie_seq(ml->cookie),
1805 ml->list);
1806
1807 __dlm_print_one_lock_resource(res);
1808 bad = 1;
1809 break;
1810 }
1811 }
1812 if (!bad) {
1813 dlm_lock_get(newlock);
1814 list_add_tail(&newlock->list, queue);
1815 }
1598 spin_unlock(&res->spinlock); 1816 spin_unlock(&res->spinlock);
1599 } 1817 }
1600 mlog(0, "done running all the locks\n"); 1818 mlog(0, "done running all the locks\n");
@@ -1618,8 +1836,14 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
1618 struct dlm_lock *lock; 1836 struct dlm_lock *lock;
1619 1837
1620 res->state |= DLM_LOCK_RES_RECOVERING; 1838 res->state |= DLM_LOCK_RES_RECOVERING;
1621 if (!list_empty(&res->recovering)) 1839 if (!list_empty(&res->recovering)) {
1840 mlog(0,
1841 "Recovering res %s:%.*s, is already on recovery list!\n",
1842 dlm->name, res->lockname.len, res->lockname.name);
1622 list_del_init(&res->recovering); 1843 list_del_init(&res->recovering);
1844 }
1845 /* We need to hold a reference while on the recovery list */
1846 dlm_lockres_get(res);
1623 list_add_tail(&res->recovering, &dlm->reco.resources); 1847 list_add_tail(&res->recovering, &dlm->reco.resources);
1624 1848
1625 /* find any pending locks and put them back on proper list */ 1849 /* find any pending locks and put them back on proper list */
@@ -1708,9 +1932,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1708 spin_lock(&res->spinlock); 1932 spin_lock(&res->spinlock);
1709 dlm_change_lockres_owner(dlm, res, new_master); 1933 dlm_change_lockres_owner(dlm, res, new_master);
1710 res->state &= ~DLM_LOCK_RES_RECOVERING; 1934 res->state &= ~DLM_LOCK_RES_RECOVERING;
1711 __dlm_dirty_lockres(dlm, res); 1935 if (!__dlm_lockres_unused(res))
1936 __dlm_dirty_lockres(dlm, res);
1712 spin_unlock(&res->spinlock); 1937 spin_unlock(&res->spinlock);
1713 wake_up(&res->wq); 1938 wake_up(&res->wq);
1939 dlm_lockres_put(res);
1714 } 1940 }
1715 } 1941 }
1716 1942
@@ -1719,7 +1945,7 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1719 * the RECOVERING state and set the owner 1945 * the RECOVERING state and set the owner
1720 * if necessary */ 1946 * if necessary */
1721 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 1947 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
1722 bucket = &(dlm->lockres_hash[i]); 1948 bucket = dlm_lockres_hash(dlm, i);
1723 hlist_for_each_entry(res, hash_iter, bucket, hash_node) { 1949 hlist_for_each_entry(res, hash_iter, bucket, hash_node) {
1724 if (res->state & DLM_LOCK_RES_RECOVERING) { 1950 if (res->state & DLM_LOCK_RES_RECOVERING) {
1725 if (res->owner == dead_node) { 1951 if (res->owner == dead_node) {
@@ -1743,11 +1969,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1743 dlm->name, res->lockname.len, 1969 dlm->name, res->lockname.len,
1744 res->lockname.name, res->owner); 1970 res->lockname.name, res->owner);
1745 list_del_init(&res->recovering); 1971 list_del_init(&res->recovering);
1972 dlm_lockres_put(res);
1746 } 1973 }
1747 spin_lock(&res->spinlock); 1974 spin_lock(&res->spinlock);
1748 dlm_change_lockres_owner(dlm, res, new_master); 1975 dlm_change_lockres_owner(dlm, res, new_master);
1749 res->state &= ~DLM_LOCK_RES_RECOVERING; 1976 res->state &= ~DLM_LOCK_RES_RECOVERING;
1750 __dlm_dirty_lockres(dlm, res); 1977 if (!__dlm_lockres_unused(res))
1978 __dlm_dirty_lockres(dlm, res);
1751 spin_unlock(&res->spinlock); 1979 spin_unlock(&res->spinlock);
1752 wake_up(&res->wq); 1980 wake_up(&res->wq);
1753 } 1981 }
@@ -1884,7 +2112,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
1884 * need to be fired as a result. 2112 * need to be fired as a result.
1885 */ 2113 */
1886 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 2114 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
1887 bucket = &(dlm->lockres_hash[i]); 2115 bucket = dlm_lockres_hash(dlm, i);
1888 hlist_for_each_entry(res, iter, bucket, hash_node) { 2116 hlist_for_each_entry(res, iter, bucket, hash_node) {
1889 /* always prune any $RECOVERY entries for dead nodes, 2117 /* always prune any $RECOVERY entries for dead nodes,
1890 * otherwise hangs can occur during later recovery */ 2118 * otherwise hangs can occur during later recovery */
@@ -1924,6 +2152,20 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
1924{ 2152{
1925 assert_spin_locked(&dlm->spinlock); 2153 assert_spin_locked(&dlm->spinlock);
1926 2154
2155 if (dlm->reco.new_master == idx) {
2156 mlog(0, "%s: recovery master %d just died\n",
2157 dlm->name, idx);
2158 if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
2159 /* finalize1 was reached, so it is safe to clear
2160 * the new_master and dead_node. that recovery
2161 * is complete. */
2162 mlog(0, "%s: dead master %d had reached "
2163 "finalize1 state, clearing\n", dlm->name, idx);
2164 dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
2165 __dlm_reset_recovery(dlm);
2166 }
2167 }
2168
1927 /* check to see if the node is already considered dead */ 2169 /* check to see if the node is already considered dead */
1928 if (!test_bit(idx, dlm->live_nodes_map)) { 2170 if (!test_bit(idx, dlm->live_nodes_map)) {
1929 mlog(0, "for domain %s, node %d is already dead. " 2171 mlog(0, "for domain %s, node %d is already dead. "
@@ -2087,7 +2329,7 @@ again:
2087 2329
2088 /* set the new_master to this node */ 2330 /* set the new_master to this node */
2089 spin_lock(&dlm->spinlock); 2331 spin_lock(&dlm->spinlock);
2090 dlm->reco.new_master = dlm->node_num; 2332 dlm_set_reco_master(dlm, dlm->node_num);
2091 spin_unlock(&dlm->spinlock); 2333 spin_unlock(&dlm->spinlock);
2092 } 2334 }
2093 2335
@@ -2125,6 +2367,10 @@ again:
2125 mlog(0, "%s: reco master %u is ready to recover %u\n", 2367 mlog(0, "%s: reco master %u is ready to recover %u\n",
2126 dlm->name, dlm->reco.new_master, dlm->reco.dead_node); 2368 dlm->name, dlm->reco.new_master, dlm->reco.dead_node);
2127 status = -EEXIST; 2369 status = -EEXIST;
2370 } else if (ret == DLM_RECOVERING) {
2371 mlog(0, "dlm=%s dlmlock says master node died (this=%u)\n",
2372 dlm->name, dlm->node_num);
2373 goto again;
2128 } else { 2374 } else {
2129 struct dlm_lock_resource *res; 2375 struct dlm_lock_resource *res;
2130 2376
@@ -2156,7 +2402,7 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
2156 2402
2157 mlog_entry("%u\n", dead_node); 2403 mlog_entry("%u\n", dead_node);
2158 2404
2159 mlog(0, "dead node is %u\n", dead_node); 2405 mlog(0, "%s: dead node is %u\n", dlm->name, dead_node);
2160 2406
2161 spin_lock(&dlm->spinlock); 2407 spin_lock(&dlm->spinlock);
2162 dlm_node_iter_init(dlm->domain_map, &iter); 2408 dlm_node_iter_init(dlm->domain_map, &iter);
@@ -2214,6 +2460,14 @@ retry:
2214 * another ENOMEM */ 2460 * another ENOMEM */
2215 msleep(100); 2461 msleep(100);
2216 goto retry; 2462 goto retry;
2463 } else if (ret == EAGAIN) {
2464 mlog(0, "%s: trying to start recovery of node "
2465 "%u, but node %u is waiting for last recovery "
2466 "to complete, backoff for a bit\n", dlm->name,
2467 dead_node, nodenum);
2468 /* TODO Look into replacing msleep with cond_resched() */
2469 msleep(100);
2470 goto retry;
2217 } 2471 }
2218 } 2472 }
2219 2473
@@ -2229,8 +2483,20 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2229 if (!dlm_grab(dlm)) 2483 if (!dlm_grab(dlm))
2230 return 0; 2484 return 0;
2231 2485
2232 mlog(0, "node %u wants to recover node %u\n", 2486 spin_lock(&dlm->spinlock);
2233 br->node_idx, br->dead_node); 2487 if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
2488 mlog(0, "%s: node %u wants to recover node %u (%u:%u) "
2489 "but this node is in finalize state, waiting on finalize2\n",
2490 dlm->name, br->node_idx, br->dead_node,
2491 dlm->reco.dead_node, dlm->reco.new_master);
2492 spin_unlock(&dlm->spinlock);
2493 return EAGAIN;
2494 }
2495 spin_unlock(&dlm->spinlock);
2496
2497 mlog(0, "%s: node %u wants to recover node %u (%u:%u)\n",
2498 dlm->name, br->node_idx, br->dead_node,
2499 dlm->reco.dead_node, dlm->reco.new_master);
2234 2500
2235 dlm_fire_domain_eviction_callbacks(dlm, br->dead_node); 2501 dlm_fire_domain_eviction_callbacks(dlm, br->dead_node);
2236 2502
@@ -2252,8 +2518,8 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2252 "node %u changing it to %u\n", dlm->name, 2518 "node %u changing it to %u\n", dlm->name,
2253 dlm->reco.dead_node, br->node_idx, br->dead_node); 2519 dlm->reco.dead_node, br->node_idx, br->dead_node);
2254 } 2520 }
2255 dlm->reco.new_master = br->node_idx; 2521 dlm_set_reco_master(dlm, br->node_idx);
2256 dlm->reco.dead_node = br->dead_node; 2522 dlm_set_reco_dead_node(dlm, br->dead_node);
2257 if (!test_bit(br->dead_node, dlm->recovery_map)) { 2523 if (!test_bit(br->dead_node, dlm->recovery_map)) {
2258 mlog(0, "recovery master %u sees %u as dead, but this " 2524 mlog(0, "recovery master %u sees %u as dead, but this "
2259 "node has not yet. marking %u as dead\n", 2525 "node has not yet. marking %u as dead\n",
@@ -2272,10 +2538,16 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2272 spin_unlock(&dlm->spinlock); 2538 spin_unlock(&dlm->spinlock);
2273 2539
2274 dlm_kick_recovery_thread(dlm); 2540 dlm_kick_recovery_thread(dlm);
2541
2542 mlog(0, "%s: recovery started by node %u, for %u (%u:%u)\n",
2543 dlm->name, br->node_idx, br->dead_node,
2544 dlm->reco.dead_node, dlm->reco.new_master);
2545
2275 dlm_put(dlm); 2546 dlm_put(dlm);
2276 return 0; 2547 return 0;
2277} 2548}
2278 2549
2550#define DLM_FINALIZE_STAGE2 0x01
2279static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm) 2551static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
2280{ 2552{
2281 int ret = 0; 2553 int ret = 0;
@@ -2283,25 +2555,31 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
2283 struct dlm_node_iter iter; 2555 struct dlm_node_iter iter;
2284 int nodenum; 2556 int nodenum;
2285 int status; 2557 int status;
2558 int stage = 1;
2286 2559
2287 mlog(0, "finishing recovery for node %s:%u\n", 2560 mlog(0, "finishing recovery for node %s:%u, "
2288 dlm->name, dlm->reco.dead_node); 2561 "stage %d\n", dlm->name, dlm->reco.dead_node, stage);
2289 2562
2290 spin_lock(&dlm->spinlock); 2563 spin_lock(&dlm->spinlock);
2291 dlm_node_iter_init(dlm->domain_map, &iter); 2564 dlm_node_iter_init(dlm->domain_map, &iter);
2292 spin_unlock(&dlm->spinlock); 2565 spin_unlock(&dlm->spinlock);
2293 2566
2567stage2:
2294 memset(&fr, 0, sizeof(fr)); 2568 memset(&fr, 0, sizeof(fr));
2295 fr.node_idx = dlm->node_num; 2569 fr.node_idx = dlm->node_num;
2296 fr.dead_node = dlm->reco.dead_node; 2570 fr.dead_node = dlm->reco.dead_node;
2571 if (stage == 2)
2572 fr.flags |= DLM_FINALIZE_STAGE2;
2297 2573
2298 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { 2574 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
2299 if (nodenum == dlm->node_num) 2575 if (nodenum == dlm->node_num)
2300 continue; 2576 continue;
2301 ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key, 2577 ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key,
2302 &fr, sizeof(fr), nodenum, &status); 2578 &fr, sizeof(fr), nodenum, &status);
2303 if (ret >= 0) { 2579 if (ret >= 0)
2304 ret = status; 2580 ret = status;
2581 if (ret < 0) {
2582 mlog_errno(ret);
2305 if (dlm_is_host_down(ret)) { 2583 if (dlm_is_host_down(ret)) {
2306 /* this has no effect on this recovery 2584 /* this has no effect on this recovery
2307 * session, so set the status to zero to 2585 * session, so set the status to zero to
@@ -2309,13 +2587,17 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
2309 mlog(ML_ERROR, "node %u went down after this " 2587 mlog(ML_ERROR, "node %u went down after this "
2310 "node finished recovery.\n", nodenum); 2588 "node finished recovery.\n", nodenum);
2311 ret = 0; 2589 ret = 0;
2590 continue;
2312 } 2591 }
2313 }
2314 if (ret < 0) {
2315 mlog_errno(ret);
2316 break; 2592 break;
2317 } 2593 }
2318 } 2594 }
2595 if (stage == 1) {
2596 /* reset the node_iter back to the top and send finalize2 */
2597 iter.curnode = -1;
2598 stage = 2;
2599 goto stage2;
2600 }
2319 2601
2320 return ret; 2602 return ret;
2321} 2603}
@@ -2324,14 +2606,19 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2324{ 2606{
2325 struct dlm_ctxt *dlm = data; 2607 struct dlm_ctxt *dlm = data;
2326 struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; 2608 struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf;
2609 int stage = 1;
2327 2610
2328 /* ok to return 0, domain has gone away */ 2611 /* ok to return 0, domain has gone away */
2329 if (!dlm_grab(dlm)) 2612 if (!dlm_grab(dlm))
2330 return 0; 2613 return 0;
2331 2614
2332 mlog(0, "node %u finalizing recovery of node %u\n", 2615 if (fr->flags & DLM_FINALIZE_STAGE2)
2333 fr->node_idx, fr->dead_node); 2616 stage = 2;
2334 2617
2618 mlog(0, "%s: node %u finalizing recovery stage%d of "
2619 "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage,
2620 fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master);
2621
2335 spin_lock(&dlm->spinlock); 2622 spin_lock(&dlm->spinlock);
2336 2623
2337 if (dlm->reco.new_master != fr->node_idx) { 2624 if (dlm->reco.new_master != fr->node_idx) {
@@ -2347,13 +2634,41 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2347 BUG(); 2634 BUG();
2348 } 2635 }
2349 2636
2350 dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx); 2637 switch (stage) {
2351 2638 case 1:
2352 spin_unlock(&dlm->spinlock); 2639 dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx);
2640 if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
2641 mlog(ML_ERROR, "%s: received finalize1 from "
2642 "new master %u for dead node %u, but "
2643 "this node has already received it!\n",
2644 dlm->name, fr->node_idx, fr->dead_node);
2645 dlm_print_reco_node_status(dlm);
2646 BUG();
2647 }
2648 dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
2649 spin_unlock(&dlm->spinlock);
2650 break;
2651 case 2:
2652 if (!(dlm->reco.state & DLM_RECO_STATE_FINALIZE)) {
2653 mlog(ML_ERROR, "%s: received finalize2 from "
2654 "new master %u for dead node %u, but "
2655 "this node did not have finalize1!\n",
2656 dlm->name, fr->node_idx, fr->dead_node);
2657 dlm_print_reco_node_status(dlm);
2658 BUG();
2659 }
2660 dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
2661 spin_unlock(&dlm->spinlock);
2662 dlm_reset_recovery(dlm);
2663 dlm_kick_recovery_thread(dlm);
2664 break;
2665 default:
2666 BUG();
2667 }
2353 2668
2354 dlm_reset_recovery(dlm); 2669 mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n",
2670 dlm->name, fr->node_idx, dlm->reco.dead_node, dlm->reco.new_master);
2355 2671
2356 dlm_kick_recovery_thread(dlm);
2357 dlm_put(dlm); 2672 dlm_put(dlm);
2358 return 0; 2673 return 0;
2359} 2674}
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 5be9d14f12cb..0c822f3ffb05 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -39,6 +39,7 @@
39#include <linux/inet.h> 39#include <linux/inet.h>
40#include <linux/timer.h> 40#include <linux/timer.h>
41#include <linux/kthread.h> 41#include <linux/kthread.h>
42#include <linux/delay.h>
42 43
43 44
44#include "cluster/heartbeat.h" 45#include "cluster/heartbeat.h"
@@ -53,6 +54,8 @@
53#include "cluster/masklog.h" 54#include "cluster/masklog.h"
54 55
55static int dlm_thread(void *data); 56static int dlm_thread(void *data);
57static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
58 struct dlm_lock_resource *lockres);
56 59
57static void dlm_flush_asts(struct dlm_ctxt *dlm); 60static void dlm_flush_asts(struct dlm_ctxt *dlm);
58 61
@@ -80,7 +83,7 @@ repeat:
80} 83}
81 84
82 85
83static int __dlm_lockres_unused(struct dlm_lock_resource *res) 86int __dlm_lockres_unused(struct dlm_lock_resource *res)
84{ 87{
85 if (list_empty(&res->granted) && 88 if (list_empty(&res->granted) &&
86 list_empty(&res->converting) && 89 list_empty(&res->converting) &&
@@ -103,6 +106,20 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
103 assert_spin_locked(&res->spinlock); 106 assert_spin_locked(&res->spinlock);
104 107
105 if (__dlm_lockres_unused(res)){ 108 if (__dlm_lockres_unused(res)){
109 /* For now, just keep any resource we master */
110 if (res->owner == dlm->node_num)
111 {
112 if (!list_empty(&res->purge)) {
113 mlog(0, "we master %s:%.*s, but it is on "
114 "the purge list. Removing\n",
115 dlm->name, res->lockname.len,
116 res->lockname.name);
117 list_del_init(&res->purge);
118 dlm->purge_count--;
119 }
120 return;
121 }
122
106 if (list_empty(&res->purge)) { 123 if (list_empty(&res->purge)) {
107 mlog(0, "putting lockres %.*s from purge list\n", 124 mlog(0, "putting lockres %.*s from purge list\n",
108 res->lockname.len, res->lockname.name); 125 res->lockname.len, res->lockname.name);
@@ -110,10 +127,23 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
110 res->last_used = jiffies; 127 res->last_used = jiffies;
111 list_add_tail(&res->purge, &dlm->purge_list); 128 list_add_tail(&res->purge, &dlm->purge_list);
112 dlm->purge_count++; 129 dlm->purge_count++;
130
131 /* if this node is not the owner, there is
132 * no way to keep track of who the owner could be.
133 * unhash it to avoid serious problems. */
134 if (res->owner != dlm->node_num) {
135 mlog(0, "%s:%.*s: doing immediate "
136 "purge of lockres owned by %u\n",
137 dlm->name, res->lockname.len,
138 res->lockname.name, res->owner);
139
140 dlm_purge_lockres_now(dlm, res);
141 }
113 } 142 }
114 } else if (!list_empty(&res->purge)) { 143 } else if (!list_empty(&res->purge)) {
115 mlog(0, "removing lockres %.*s from purge list\n", 144 mlog(0, "removing lockres %.*s from purge list, "
116 res->lockname.len, res->lockname.name); 145 "owner=%u\n", res->lockname.len, res->lockname.name,
146 res->owner);
117 147
118 list_del_init(&res->purge); 148 list_del_init(&res->purge);
119 dlm->purge_count--; 149 dlm->purge_count--;
@@ -165,6 +195,7 @@ again:
165 } else if (ret < 0) { 195 } else if (ret < 0) {
166 mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n", 196 mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n",
167 lockres->lockname.len, lockres->lockname.name); 197 lockres->lockname.len, lockres->lockname.name);
198 msleep(100);
168 goto again; 199 goto again;
169 } 200 }
170 201
@@ -178,6 +209,24 @@ finish:
178 __dlm_unhash_lockres(lockres); 209 __dlm_unhash_lockres(lockres);
179} 210}
180 211
212/* make an unused lockres go away immediately.
213 * as soon as the dlm spinlock is dropped, this lockres
214 * will not be found. kfree still happens on last put. */
215static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
216 struct dlm_lock_resource *lockres)
217{
218 assert_spin_locked(&dlm->spinlock);
219 assert_spin_locked(&lockres->spinlock);
220
221 BUG_ON(!__dlm_lockres_unused(lockres));
222
223 if (!list_empty(&lockres->purge)) {
224 list_del_init(&lockres->purge);
225 dlm->purge_count--;
226 }
227 __dlm_unhash_lockres(lockres);
228}
229
181static void dlm_run_purge_list(struct dlm_ctxt *dlm, 230static void dlm_run_purge_list(struct dlm_ctxt *dlm,
182 int purge_now) 231 int purge_now)
183{ 232{
@@ -318,8 +367,7 @@ converting:
318 367
319 target->ml.type = target->ml.convert_type; 368 target->ml.type = target->ml.convert_type;
320 target->ml.convert_type = LKM_IVMODE; 369 target->ml.convert_type = LKM_IVMODE;
321 list_del_init(&target->list); 370 list_move_tail(&target->list, &res->granted);
322 list_add_tail(&target->list, &res->granted);
323 371
324 BUG_ON(!target->lksb); 372 BUG_ON(!target->lksb);
325 target->lksb->status = DLM_NORMAL; 373 target->lksb->status = DLM_NORMAL;
@@ -380,8 +428,7 @@ blocked:
380 target->ml.type, target->ml.node); 428 target->ml.type, target->ml.node);
381 429
382 // target->ml.type is already correct 430 // target->ml.type is already correct
383 list_del_init(&target->list); 431 list_move_tail(&target->list, &res->granted);
384 list_add_tail(&target->list, &res->granted);
385 432
386 BUG_ON(!target->lksb); 433 BUG_ON(!target->lksb);
387 target->lksb->status = DLM_NORMAL; 434 target->lksb->status = DLM_NORMAL;
@@ -422,6 +469,8 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
422 /* don't shuffle secondary queues */ 469 /* don't shuffle secondary queues */
423 if ((res->owner == dlm->node_num) && 470 if ((res->owner == dlm->node_num) &&
424 !(res->state & DLM_LOCK_RES_DIRTY)) { 471 !(res->state & DLM_LOCK_RES_DIRTY)) {
472 /* ref for dirty_list */
473 dlm_lockres_get(res);
425 list_add_tail(&res->dirty, &dlm->dirty_list); 474 list_add_tail(&res->dirty, &dlm->dirty_list);
426 res->state |= DLM_LOCK_RES_DIRTY; 475 res->state |= DLM_LOCK_RES_DIRTY;
427 } 476 }
@@ -606,6 +655,8 @@ static int dlm_thread(void *data)
606 list_del_init(&res->dirty); 655 list_del_init(&res->dirty);
607 spin_unlock(&res->spinlock); 656 spin_unlock(&res->spinlock);
608 spin_unlock(&dlm->spinlock); 657 spin_unlock(&dlm->spinlock);
658 /* Drop dirty_list ref */
659 dlm_lockres_put(res);
609 660
610 /* lockres can be re-dirtied/re-added to the 661 /* lockres can be re-dirtied/re-added to the
611 * dirty_list in this gap, but that is ok */ 662 * dirty_list in this gap, but that is ok */
@@ -642,8 +693,9 @@ static int dlm_thread(void *data)
642 * spinlock and do NOT have the dlm lock. 693 * spinlock and do NOT have the dlm lock.
643 * safe to reserve/queue asts and run the lists. */ 694 * safe to reserve/queue asts and run the lists. */
644 695
645 mlog(0, "calling dlm_shuffle_lists with dlm=%p, " 696 mlog(0, "calling dlm_shuffle_lists with dlm=%s, "
646 "res=%p\n", dlm, res); 697 "res=%.*s\n", dlm->name,
698 res->lockname.len, res->lockname.name);
647 699
648 /* called while holding lockres lock */ 700 /* called while holding lockres lock */
649 dlm_shuffle_lists(dlm, res); 701 dlm_shuffle_lists(dlm, res);
@@ -657,6 +709,8 @@ in_progress:
657 /* if the lock was in-progress, stick 709 /* if the lock was in-progress, stick
658 * it on the back of the list */ 710 * it on the back of the list */
659 if (delay) { 711 if (delay) {
712 /* ref for dirty_list */
713 dlm_lockres_get(res);
660 spin_lock(&res->spinlock); 714 spin_lock(&res->spinlock);
661 list_add_tail(&res->dirty, &dlm->dirty_list); 715 list_add_tail(&res->dirty, &dlm->dirty_list);
662 res->state |= DLM_LOCK_RES_DIRTY; 716 res->state |= DLM_LOCK_RES_DIRTY;
@@ -677,7 +731,7 @@ in_progress:
677 731
678 /* yield and continue right away if there is more work to do */ 732 /* yield and continue right away if there is more work to do */
679 if (!n) { 733 if (!n) {
680 yield(); 734 cond_resched();
681 continue; 735 continue;
682 } 736 }
683 737
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 7b1a27542674..b0c3134f4f70 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -271,8 +271,7 @@ void dlm_commit_pending_unlock(struct dlm_lock_resource *res,
271void dlm_commit_pending_cancel(struct dlm_lock_resource *res, 271void dlm_commit_pending_cancel(struct dlm_lock_resource *res,
272 struct dlm_lock *lock) 272 struct dlm_lock *lock)
273{ 273{
274 list_del_init(&lock->list); 274 list_move_tail(&lock->list, &res->granted);
275 list_add_tail(&lock->list, &res->granted);
276 lock->ml.convert_type = LKM_IVMODE; 275 lock->ml.convert_type = LKM_IVMODE;
277} 276}
278 277
@@ -319,6 +318,16 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
319 318
320 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name); 319 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
321 320
321 if (owner == dlm->node_num) {
322 /* ended up trying to contact ourself. this means
323 * that the lockres had been remote but became local
324 * via a migration. just retry it, now as local */
325 mlog(0, "%s:%.*s: this node became the master due to a "
326 "migration, re-evaluate now\n", dlm->name,
327 res->lockname.len, res->lockname.name);
328 return DLM_FORWARD;
329 }
330
322 memset(&unlock, 0, sizeof(unlock)); 331 memset(&unlock, 0, sizeof(unlock));
323 unlock.node_idx = dlm->node_num; 332 unlock.node_idx = dlm->node_num;
324 unlock.flags = cpu_to_be32(flags); 333 unlock.flags = cpu_to_be32(flags);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index 74ca4e5f9765..e641b084b343 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -672,7 +672,7 @@ struct dlm_ctxt *user_dlm_register_context(struct qstr *name)
672 u32 dlm_key; 672 u32 dlm_key;
673 char *domain; 673 char *domain;
674 674
675 domain = kmalloc(name->len + 1, GFP_KERNEL); 675 domain = kmalloc(name->len + 1, GFP_NOFS);
676 if (!domain) { 676 if (!domain) {
677 mlog_errno(-ENOMEM); 677 mlog_errno(-ENOMEM);
678 return ERR_PTR(-ENOMEM); 678 return ERR_PTR(-ENOMEM);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 64cd52860c87..4acd37286bdd 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -242,7 +242,7 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
242 mlog_exit_void(); 242 mlog_exit_void();
243} 243}
244 244
245static spinlock_t ocfs2_dlm_tracking_lock = SPIN_LOCK_UNLOCKED; 245static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
246 246
247static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 247static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
248 struct ocfs2_dlm_debug *dlm_debug) 248 struct ocfs2_dlm_debug *dlm_debug)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index eebc3cfa6be8..910a601b2e98 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -49,7 +49,7 @@
49 49
50#include "buffer_head_io.h" 50#include "buffer_head_io.h"
51 51
52spinlock_t trans_inc_lock = SPIN_LOCK_UNLOCKED; 52DEFINE_SPINLOCK(trans_inc_lock);
53 53
54static int ocfs2_force_read_journal(struct inode *inode); 54static int ocfs2_force_read_journal(struct inode *inode);
55static int ocfs2_recover_node(struct ocfs2_super *osb, 55static int ocfs2_recover_node(struct ocfs2_super *osb,
@@ -222,8 +222,7 @@ void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle,
222 BUG_ON(!list_empty(&OCFS2_I(inode)->ip_handle_list)); 222 BUG_ON(!list_empty(&OCFS2_I(inode)->ip_handle_list));
223 223
224 OCFS2_I(inode)->ip_handle = handle; 224 OCFS2_I(inode)->ip_handle = handle;
225 list_del(&(OCFS2_I(inode)->ip_handle_list)); 225 list_move_tail(&(OCFS2_I(inode)->ip_handle_list), &(handle->inode_list));
226 list_add_tail(&(OCFS2_I(inode)->ip_handle_list), &(handle->inode_list));
227} 226}
228 227
229static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle) 228static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle)
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index ee42765a8553..cf70fe2075b8 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -988,9 +988,7 @@ int ocfs2_request_mount_vote(struct ocfs2_super *osb)
988 } 988 }
989 989
990bail: 990bail:
991 if (request) 991 kfree(request);
992 kfree(request);
993
994 return status; 992 return status;
995} 993}
996 994
@@ -1021,9 +1019,7 @@ int ocfs2_request_umount_vote(struct ocfs2_super *osb)
1021 } 1019 }
1022 1020
1023bail: 1021bail:
1024 if (request) 1022 kfree(request);
1025 kfree(request);
1026
1027 return status; 1023 return status;
1028} 1024}
1029 1025
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index efc7c91128af..93a56bd4a2b7 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -1,5 +1,4 @@
1/* $Id: inode.c,v 1.15 2001/11/12 09:43:39 davem Exp $ 1/* inode.c: /proc/openprom handling routines
2 * openpromfs.c: /proc/openprom handling routines
3 * 2 *
4 * Copyright (C) 1996-1999 Jakub Jelinek (jakub@redhat.com) 3 * Copyright (C) 1996-1999 Jakub Jelinek (jakub@redhat.com)
5 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) 4 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
@@ -12,762 +11,245 @@
12#include <linux/openprom_fs.h> 11#include <linux/openprom_fs.h>
13#include <linux/init.h> 12#include <linux/init.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
15#include <linux/smp_lock.h> 14#include <linux/seq_file.h>
16 15
17#include <asm/openprom.h> 16#include <asm/openprom.h>
18#include <asm/oplib.h> 17#include <asm/oplib.h>
18#include <asm/prom.h>
19#include <asm/uaccess.h> 19#include <asm/uaccess.h>
20 20
21#define ALIASES_NNODES 64 21static DEFINE_MUTEX(op_mutex);
22
23typedef struct {
24 u16 parent;
25 u16 next;
26 u16 child;
27 u16 first_prop;
28 u32 node;
29} openpromfs_node;
30
31typedef struct {
32#define OPP_STRING 0x10
33#define OPP_STRINGLIST 0x20
34#define OPP_BINARY 0x40
35#define OPP_HEXSTRING 0x80
36#define OPP_DIRTY 0x01
37#define OPP_QUOTED 0x02
38#define OPP_NOTQUOTED 0x04
39#define OPP_ASCIIZ 0x08
40 u32 flag;
41 u32 alloclen;
42 u32 len;
43 char *value;
44 char name[8];
45} openprom_property;
46
47static openpromfs_node *nodes;
48static int alloced;
49static u16 last_node;
50static u16 first_prop;
51static u16 options = 0xffff;
52static u16 aliases = 0xffff;
53static int aliases_nodes;
54static char *alias_names [ALIASES_NNODES];
55
56#define OPENPROM_ROOT_INO 16
57#define OPENPROM_FIRST_INO OPENPROM_ROOT_INO
58#define NODE(ino) nodes[ino - OPENPROM_FIRST_INO]
59#define NODE2INO(node) (node + OPENPROM_FIRST_INO)
60#define NODEP2INO(no) (no + OPENPROM_FIRST_INO + last_node)
61
62static int openpromfs_create (struct inode *, struct dentry *, int, struct nameidata *);
63static int openpromfs_readdir(struct file *, void *, filldir_t);
64static struct dentry *openpromfs_lookup(struct inode *, struct dentry *dentry, struct nameidata *nd);
65static int openpromfs_unlink (struct inode *, struct dentry *dentry);
66 22
67static inline u16 ptr_nod(void *p) 23#define OPENPROM_ROOT_INO 0
68{
69 return (long)p & 0xFFFF;
70}
71 24
72static ssize_t nodenum_read(struct file *file, char __user *buf, 25enum op_inode_type {
73 size_t count, loff_t *ppos) 26 op_inode_node,
27 op_inode_prop,
28};
29
30union op_inode_data {
31 struct device_node *node;
32 struct property *prop;
33};
34
35struct op_inode_info {
36 struct inode vfs_inode;
37 enum op_inode_type type;
38 union op_inode_data u;
39};
40
41static inline struct op_inode_info *OP_I(struct inode *inode)
74{ 42{
75 struct inode *inode = file->f_dentry->d_inode; 43 return container_of(inode, struct op_inode_info, vfs_inode);
76 char buffer[10];
77
78 if (count < 0 || !inode->u.generic_ip)
79 return -EINVAL;
80 sprintf (buffer, "%8.8lx\n", (long)inode->u.generic_ip);
81 if (file->f_pos >= 9)
82 return 0;
83 if (count > 9 - file->f_pos)
84 count = 9 - file->f_pos;
85 if (copy_to_user(buf, buffer + file->f_pos, count))
86 return -EFAULT;
87 *ppos += count;
88 return count;
89} 44}
90 45
91static ssize_t property_read(struct file *filp, char __user *buf, 46static int is_string(unsigned char *p, int len)
92 size_t count, loff_t *ppos)
93{ 47{
94 struct inode *inode = filp->f_dentry->d_inode; 48 int i;
95 int i, j, k;
96 u32 node;
97 char *p, *s;
98 u32 *q;
99 openprom_property *op;
100 char buffer[64];
101
102 if (!filp->private_data) {
103 node = nodes[ptr_nod(inode->u.generic_ip)].node;
104 i = ((u32)(long)inode->u.generic_ip) >> 16;
105 if (ptr_nod(inode->u.generic_ip) == aliases) {
106 if (i >= aliases_nodes)
107 p = NULL;
108 else
109 p = alias_names [i];
110 } else
111 for (p = prom_firstprop (node, buffer);
112 i && p && *p;
113 p = prom_nextprop (node, p, buffer), i--)
114 /* nothing */ ;
115 if (!p || !*p)
116 return -EIO;
117 i = prom_getproplen (node, p);
118 if (i < 0) {
119 if (ptr_nod(inode->u.generic_ip) == aliases)
120 i = 0;
121 else
122 return -EIO;
123 }
124 k = i;
125 if (i < 64) i = 64;
126 filp->private_data = kmalloc (sizeof (openprom_property)
127 + (j = strlen (p)) + 2 * i,
128 GFP_KERNEL);
129 if (!filp->private_data)
130 return -ENOMEM;
131 op = filp->private_data;
132 op->flag = 0;
133 op->alloclen = 2 * i;
134 strcpy (op->name, p);
135 op->value = (char *)(((unsigned long)(op->name + j + 4)) & ~3);
136 op->len = k;
137 if (k && prom_getproperty (node, p, op->value, i) < 0)
138 return -EIO;
139 op->value [k] = 0;
140 if (k) {
141 for (s = NULL, p = op->value; p < op->value + k; p++) {
142 if ((*p >= ' ' && *p <= '~') || *p == '\n') {
143 op->flag |= OPP_STRING;
144 s = p;
145 continue;
146 }
147 if (p > op->value && !*p && s == p - 1) {
148 if (p < op->value + k - 1)
149 op->flag |= OPP_STRINGLIST;
150 else
151 op->flag |= OPP_ASCIIZ;
152 continue;
153 }
154 if (k == 1 && !*p) {
155 op->flag |= (OPP_STRING|OPP_ASCIIZ);
156 break;
157 }
158 op->flag &= ~(OPP_STRING|OPP_STRINGLIST);
159 if (k & 3)
160 op->flag |= OPP_HEXSTRING;
161 else
162 op->flag |= OPP_BINARY;
163 break;
164 }
165 if (op->flag & OPP_STRINGLIST)
166 op->flag &= ~(OPP_STRING);
167 if (op->flag & OPP_ASCIIZ)
168 op->len--;
169 }
170 } else
171 op = filp->private_data;
172 if (!count || !(op->len || (op->flag & OPP_ASCIIZ)))
173 return 0;
174 if (*ppos >= 0xffffff || count >= 0xffffff)
175 return -EINVAL;
176 if (op->flag & OPP_STRINGLIST) {
177 for (k = 0, p = op->value; p < op->value + op->len; p++)
178 if (!*p)
179 k++;
180 i = op->len + 4 * k + 3;
181 } else if (op->flag & OPP_STRING) {
182 i = op->len + 3;
183 } else if (op->flag & OPP_BINARY) {
184 i = (op->len * 9) >> 2;
185 } else {
186 i = (op->len << 1) + 1;
187 }
188 k = *ppos;
189 if (k >= i) return 0;
190 if (count > i - k) count = i - k;
191 if (op->flag & OPP_STRING) {
192 if (!k) {
193 if (put_user('\'', buf))
194 return -EFAULT;
195 k++;
196 count--;
197 }
198 49
199 if (k + count >= i - 2) 50 for (i = 0; i < len; i++) {
200 j = i - 2 - k; 51 unsigned char val = p[i];
201 else
202 j = count;
203
204 if (j >= 0) {
205 if (copy_to_user(buf + k - *ppos,
206 op->value + k - 1, j))
207 return -EFAULT;
208 count -= j;
209 k += j;
210 }
211 52
212 if (count) { 53 if ((i && !val) ||
213 if (put_user('\'', &buf [k++ - *ppos])) 54 (val >= ' ' && val <= '~'))
214 return -EFAULT; 55 continue;
215 }
216 if (count > 1) {
217 if (put_user('\n', &buf [k++ - *ppos]))
218 return -EFAULT;
219 }
220 } else if (op->flag & OPP_STRINGLIST) {
221 char *tmp;
222
223 tmp = kmalloc (i, GFP_KERNEL);
224 if (!tmp)
225 return -ENOMEM;
226
227 s = tmp;
228 *s++ = '\'';
229 for (p = op->value; p < op->value + op->len; p++) {
230 if (!*p) {
231 strcpy(s, "' + '");
232 s += 5;
233 continue;
234 }
235 *s++ = *p;
236 }
237 strcpy(s, "'\n");
238
239 if (copy_to_user(buf, tmp + k, count))
240 return -EFAULT;
241
242 kfree(tmp);
243 k += count;
244
245 } else if (op->flag & OPP_BINARY) {
246 char buffer[10];
247 u32 *first, *last;
248 int first_off, last_cnt;
249
250 first = ((u32 *)op->value) + k / 9;
251 first_off = k % 9;
252 last = ((u32 *)op->value) + (k + count - 1) / 9;
253 last_cnt = (k + count) % 9;
254 if (!last_cnt) last_cnt = 9;
255
256 if (first == last) {
257 sprintf (buffer, "%08x.", *first);
258 if (copy_to_user(buf, buffer + first_off,
259 last_cnt - first_off))
260 return -EFAULT;
261 buf += last_cnt - first_off;
262 } else {
263 for (q = first; q <= last; q++) {
264 sprintf (buffer, "%08x.", *q);
265 if (q == first) {
266 if (copy_to_user(buf, buffer + first_off,
267 9 - first_off))
268 return -EFAULT;
269 buf += 9 - first_off;
270 } else if (q == last) {
271 if (copy_to_user(buf, buffer, last_cnt))
272 return -EFAULT;
273 buf += last_cnt;
274 } else {
275 if (copy_to_user(buf, buffer, 9))
276 return -EFAULT;
277 buf += 9;
278 }
279 }
280 }
281 56
282 if (last == (u32 *)(op->value + op->len - 4) && last_cnt == 9) { 57 return 0;
283 if (put_user('\n', (buf - 1))) 58 }
284 return -EFAULT;
285 }
286 59
287 k += count; 60 return 1;
61}
288 62
289 } else if (op->flag & OPP_HEXSTRING) { 63static int property_show(struct seq_file *f, void *v)
290 char buffer[3]; 64{
65 struct property *prop = f->private;
66 void *pval;
67 int len;
291 68
292 if ((k < i - 1) && (k & 1)) { 69 len = prop->length;
293 sprintf (buffer, "%02x", 70 pval = prop->value;
294 (unsigned char) *(op->value + (k >> 1)) & 0xff);
295 if (put_user(buffer[1], &buf[k++ - *ppos]))
296 return -EFAULT;
297 count--;
298 }
299 71
300 for (; (count > 1) && (k < i - 1); k += 2) { 72 if (is_string(pval, len)) {
301 sprintf (buffer, "%02x", 73 while (len > 0) {
302 (unsigned char) *(op->value + (k >> 1)) & 0xff); 74 int n = strlen(pval);
303 if (copy_to_user(buf + k - *ppos, buffer, 2))
304 return -EFAULT;
305 count -= 2;
306 }
307 75
308 if (count && (k < i - 1)) { 76 seq_printf(f, "%s", (char *) pval);
309 sprintf (buffer, "%02x",
310 (unsigned char) *(op->value + (k >> 1)) & 0xff);
311 if (put_user(buffer[0], &buf[k++ - *ppos]))
312 return -EFAULT;
313 count--;
314 }
315 77
316 if (count) { 78 /* Skip over the NULL byte too. */
317 if (put_user('\n', &buf [k++ - *ppos])) 79 pval += n + 1;
318 return -EFAULT; 80 len -= n + 1;
319 }
320 }
321 count = k - *ppos;
322 *ppos = k;
323 return count;
324}
325 81
326static ssize_t property_write(struct file *filp, const char __user *buf, 82 if (len > 0)
327 size_t count, loff_t *ppos) 83 seq_printf(f, " + ");
328{
329 int i, j, k;
330 char *p;
331 u32 *q;
332 void *b;
333 openprom_property *op;
334
335 if (*ppos >= 0xffffff || count >= 0xffffff)
336 return -EINVAL;
337 if (!filp->private_data) {
338 i = property_read (filp, NULL, 0, NULL);
339 if (i)
340 return i;
341 }
342 k = *ppos;
343 op = filp->private_data;
344 if (!(op->flag & OPP_STRING)) {
345 u32 *first, *last;
346 int first_off, last_cnt;
347 u32 mask, mask2;
348 char tmp [9];
349 int forcelen = 0;
350
351 j = k % 9;
352 for (i = 0; i < count; i++, j++) {
353 if (j == 9) j = 0;
354 if (!j) {
355 char ctmp;
356 if (get_user(ctmp, &buf[i]))
357 return -EFAULT;
358 if (ctmp != '.') {
359 if (ctmp != '\n') {
360 if (op->flag & OPP_BINARY)
361 return -EINVAL;
362 else
363 goto write_try_string;
364 } else {
365 count = i + 1;
366 forcelen = 1;
367 break;
368 }
369 }
370 } else {
371 char ctmp;
372 if (get_user(ctmp, &buf[i]))
373 return -EFAULT;
374 if (ctmp < '0' ||
375 (ctmp > '9' && ctmp < 'A') ||
376 (ctmp > 'F' && ctmp < 'a') ||
377 ctmp > 'f') {
378 if (op->flag & OPP_BINARY)
379 return -EINVAL;
380 else
381 goto write_try_string;
382 }
383 }
384 }
385 op->flag |= OPP_BINARY;
386 tmp [8] = 0;
387 i = ((count + k + 8) / 9) << 2;
388 if (op->alloclen <= i) {
389 b = kmalloc (sizeof (openprom_property) + 2 * i,
390 GFP_KERNEL);
391 if (!b)
392 return -ENOMEM;
393 memcpy (b, filp->private_data,
394 sizeof (openprom_property)
395 + strlen (op->name) + op->alloclen);
396 memset (b + sizeof (openprom_property)
397 + strlen (op->name) + op->alloclen,
398 0, 2 * i - op->alloclen);
399 op = b;
400 op->alloclen = 2*i;
401 b = filp->private_data;
402 filp->private_data = op;
403 kfree (b);
404 } 84 }
405 first = ((u32 *)op->value) + (k / 9); 85 } else {
406 first_off = k % 9; 86 if (len & 3) {
407 last = (u32 *)(op->value + i); 87 while (len) {
408 last_cnt = (k + count) % 9; 88 len--;
409 if (first + 1 == last) { 89 if (len)
410 memset (tmp, '0', 8); 90 seq_printf(f, "%02x.",
411 if (copy_from_user(tmp + first_off, buf, 91 *(unsigned char *) pval);
412 (count + first_off > 8) ? 92 else
413 8 - first_off : count)) 93 seq_printf(f, "%02x",
414 return -EFAULT; 94 *(unsigned char *) pval);
415 mask = 0xffffffff; 95 pval++;
416 mask2 = 0xffffffff;
417 for (j = 0; j < first_off; j++)
418 mask >>= 1;
419 for (j = 8 - count - first_off; j > 0; j--)
420 mask2 <<= 1;
421 mask &= mask2;
422 if (mask) {
423 *first &= ~mask;
424 *first |= simple_strtoul (tmp, NULL, 16);
425 op->flag |= OPP_DIRTY;
426 } 96 }
427 } else { 97 } else {
428 op->flag |= OPP_DIRTY; 98 while (len >= 4) {
429 for (q = first; q < last; q++) { 99 len -= 4;
430 if (q == first) { 100
431 if (first_off < 8) { 101 if (len)
432 memset (tmp, '0', 8); 102 seq_printf(f, "%08x.",
433 if (copy_from_user(tmp + first_off, 103 *(unsigned int *) pval);
434 buf, 104 else
435 8 - first_off)) 105 seq_printf(f, "%08x",
436 return -EFAULT; 106 *(unsigned int *) pval);
437 mask = 0xffffffff; 107 pval += 4;
438 for (j = 0; j < first_off; j++)
439 mask >>= 1;
440 *q &= ~mask;
441 *q |= simple_strtoul (tmp,NULL,16);
442 }
443 buf += 9;
444 } else if ((q == last - 1) && last_cnt
445 && (last_cnt < 8)) {
446 memset (tmp, '0', 8);
447 if (copy_from_user(tmp, buf, last_cnt))
448 return -EFAULT;
449 mask = 0xffffffff;
450 for (j = 0; j < 8 - last_cnt; j++)
451 mask <<= 1;
452 *q &= ~mask;
453 *q |= simple_strtoul (tmp, NULL, 16);
454 buf += last_cnt;
455 } else {
456 char tchars[2 * sizeof(long) + 1];
457
458 if (copy_from_user(tchars, buf, sizeof(tchars) - 1))
459 return -EFAULT;
460 tchars[sizeof(tchars) - 1] = '\0';
461 *q = simple_strtoul (tchars, NULL, 16);
462 buf += 9;
463 }
464 }
465 }
466 if (!forcelen) {
467 if (op->len < i)
468 op->len = i;
469 } else
470 op->len = i;
471 *ppos += count;
472 }
473write_try_string:
474 if (!(op->flag & OPP_BINARY)) {
475 if (!(op->flag & (OPP_QUOTED | OPP_NOTQUOTED))) {
476 char ctmp;
477
478 /* No way, if somebody starts writing from the middle,
479 * we don't know whether he uses quotes around or not
480 */
481 if (k > 0)
482 return -EINVAL;
483 if (get_user(ctmp, buf))
484 return -EFAULT;
485 if (ctmp == '\'') {
486 op->flag |= OPP_QUOTED;
487 buf++;
488 count--;
489 (*ppos)++;
490 if (!count) {
491 op->flag |= OPP_STRING;
492 return 1;
493 }
494 } else
495 op->flag |= OPP_NOTQUOTED;
496 }
497 op->flag |= OPP_STRING;
498 if (op->alloclen <= count + *ppos) {
499 b = kmalloc (sizeof (openprom_property)
500 + 2 * (count + *ppos), GFP_KERNEL);
501 if (!b)
502 return -ENOMEM;
503 memcpy (b, filp->private_data,
504 sizeof (openprom_property)
505 + strlen (op->name) + op->alloclen);
506 memset (b + sizeof (openprom_property)
507 + strlen (op->name) + op->alloclen,
508 0, 2*(count - *ppos) - op->alloclen);
509 op = b;
510 op->alloclen = 2*(count + *ppos);
511 b = filp->private_data;
512 filp->private_data = op;
513 kfree (b);
514 }
515 p = op->value + *ppos - ((op->flag & OPP_QUOTED) ? 1 : 0);
516 if (copy_from_user(p, buf, count))
517 return -EFAULT;
518 op->flag |= OPP_DIRTY;
519 for (i = 0; i < count; i++, p++)
520 if (*p == '\n') {
521 *p = 0;
522 break;
523 } 108 }
524 if (i < count) {
525 op->len = p - op->value;
526 *ppos += i + 1;
527 if ((p > op->value) && (op->flag & OPP_QUOTED)
528 && (*(p - 1) == '\''))
529 op->len--;
530 } else {
531 if (p - op->value > op->len)
532 op->len = p - op->value;
533 *ppos += count;
534 } 109 }
535 } 110 }
536 return *ppos - k; 111 seq_printf(f, "\n");
112
113 return 0;
537} 114}
538 115
539int property_release (struct inode *inode, struct file *filp) 116static void *property_start(struct seq_file *f, loff_t *pos)
540{ 117{
541 openprom_property *op = filp->private_data; 118 if (*pos == 0)
542 int error; 119 return pos;
543 u32 node; 120 return NULL;
544 121}
545 if (!op) 122
546 return 0; 123static void *property_next(struct seq_file *f, void *v, loff_t *pos)
547 lock_kernel(); 124{
548 node = nodes[ptr_nod(inode->u.generic_ip)].node; 125 (*pos)++;
549 if (ptr_nod(inode->u.generic_ip) == aliases) { 126 return NULL;
550 if ((op->flag & OPP_DIRTY) && (op->flag & OPP_STRING)) { 127}
551 char *p = op->name; 128
552 int i = (op->value - op->name) - strlen (op->name) - 1; 129static void property_stop(struct seq_file *f, void *v)
553 op->value [op->len] = 0; 130{
554 *(op->value - 1) = ' '; 131 /* Nothing to do */
555 if (i) { 132}
556 for (p = op->value - i - 2; p >= op->name; p--) 133
557 p[i] = *p; 134static struct seq_operations property_op = {
558 p = op->name + i; 135 .start = property_start,
559 } 136 .next = property_next,
560 memcpy (p - 8, "nvalias ", 8); 137 .stop = property_stop,
561 prom_feval (p - 8); 138 .show = property_show
562 } 139};
563 } else if (op->flag & OPP_DIRTY) { 140
564 if (op->flag & OPP_STRING) { 141static int property_open(struct inode *inode, struct file *file)
565 op->value [op->len] = 0; 142{
566 error = prom_setprop (node, op->name, 143 struct op_inode_info *oi = OP_I(inode);
567 op->value, op->len + 1); 144 int ret;
568 if (error <= 0) 145
569 printk (KERN_WARNING "openpromfs: " 146 BUG_ON(oi->type != op_inode_prop);
570 "Couldn't write property %s\n", 147
571 op->name); 148 ret = seq_open(file, &property_op);
572 } else if ((op->flag & OPP_BINARY) || !op->len) { 149 if (!ret) {
573 error = prom_setprop (node, op->name, 150 struct seq_file *m = file->private_data;
574 op->value, op->len); 151 m->private = oi->u.prop;
575 if (error <= 0)
576 printk (KERN_WARNING "openpromfs: "
577 "Couldn't write property %s\n",
578 op->name);
579 } else {
580 printk (KERN_WARNING "openpromfs: "
581 "Unknown property type of %s\n",
582 op->name);
583 }
584 } 152 }
585 unlock_kernel(); 153 return ret;
586 kfree (filp->private_data);
587 return 0;
588} 154}
589 155
590static const struct file_operations openpromfs_prop_ops = { 156static const struct file_operations openpromfs_prop_ops = {
591 .read = property_read, 157 .open = property_open,
592 .write = property_write, 158 .read = seq_read,
593 .release = property_release, 159 .llseek = seq_lseek,
160 .release = seq_release,
594}; 161};
595 162
596static const struct file_operations openpromfs_nodenum_ops = { 163static int openpromfs_readdir(struct file *, void *, filldir_t);
597 .read = nodenum_read,
598};
599 164
600static const struct file_operations openprom_operations = { 165static const struct file_operations openprom_operations = {
601 .read = generic_read_dir, 166 .read = generic_read_dir,
602 .readdir = openpromfs_readdir, 167 .readdir = openpromfs_readdir,
603}; 168};
604 169
605static struct inode_operations openprom_alias_inode_operations = { 170static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *);
606 .create = openpromfs_create,
607 .lookup = openpromfs_lookup,
608 .unlink = openpromfs_unlink,
609};
610 171
611static struct inode_operations openprom_inode_operations = { 172static struct inode_operations openprom_inode_operations = {
612 .lookup = openpromfs_lookup, 173 .lookup = openpromfs_lookup,
613}; 174};
614 175
615static int lookup_children(u16 n, const char * name, int len) 176static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
616{ 177{
617 int ret; 178 struct op_inode_info *ent_oi, *oi = OP_I(dir);
618 u16 node; 179 struct device_node *dp, *child;
619 for (; n != 0xffff; n = nodes[n].next) { 180 struct property *prop;
620 node = nodes[n].child; 181 enum op_inode_type ent_type;
621 if (node != 0xffff) { 182 union op_inode_data ent_data;
622 char buffer[128];
623 int i;
624 char *p;
625
626 while (node != 0xffff) {
627 if (prom_getname (nodes[node].node,
628 buffer, 128) >= 0) {
629 i = strlen (buffer);
630 if ((len == i)
631 && !strncmp (buffer, name, len))
632 return NODE2INO(node);
633 p = strchr (buffer, '@');
634 if (p && (len == p - buffer)
635 && !strncmp (buffer, name, len))
636 return NODE2INO(node);
637 }
638 node = nodes[node].next;
639 }
640 } else
641 continue;
642 ret = lookup_children (nodes[n].child, name, len);
643 if (ret) return ret;
644 }
645 return 0;
646}
647
648static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
649{
650 int ino = 0;
651#define OPFSL_DIR 0
652#define OPFSL_PROPERTY 1
653#define OPFSL_NODENUM 2
654 int type = 0;
655 char buffer[128];
656 char *p;
657 const char *name; 183 const char *name;
658 u32 n;
659 u16 dirnode;
660 unsigned int len;
661 int i;
662 struct inode *inode; 184 struct inode *inode;
663 char buffer2[64]; 185 unsigned int ino;
186 int len;
664 187
665 inode = NULL; 188 BUG_ON(oi->type != op_inode_node);
189
190 dp = oi->u.node;
191
666 name = dentry->d_name.name; 192 name = dentry->d_name.name;
667 len = dentry->d_name.len; 193 len = dentry->d_name.len;
668 lock_kernel(); 194
669 if (name [0] == '.' && len == 5 && !strncmp (name + 1, "node", 4)) { 195 mutex_lock(&op_mutex);
670 ino = NODEP2INO(NODE(dir->i_ino).first_prop); 196
671 type = OPFSL_NODENUM; 197 child = dp->child;
672 } 198 while (child) {
673 if (!ino) { 199 int n = strlen(child->path_component_name);
674 u16 node = NODE(dir->i_ino).child; 200
675 while (node != 0xffff) { 201 if (len == n &&
676 if (prom_getname (nodes[node].node, buffer, 128) >= 0) { 202 !strncmp(child->path_component_name, name, len)) {
677 i = strlen (buffer); 203 ent_type = op_inode_node;
678 if (len == i && !strncmp (buffer, name, len)) { 204 ent_data.node = child;
679 ino = NODE2INO(node); 205 ino = child->unique_id;
680 type = OPFSL_DIR; 206 goto found;
681 break;
682 }
683 p = strchr (buffer, '@');
684 if (p && (len == p - buffer)
685 && !strncmp (buffer, name, len)) {
686 ino = NODE2INO(node);
687 type = OPFSL_DIR;
688 break;
689 }
690 }
691 node = nodes[node].next;
692 }
693 }
694 n = NODE(dir->i_ino).node;
695 dirnode = dir->i_ino - OPENPROM_FIRST_INO;
696 if (!ino) {
697 int j = NODEP2INO(NODE(dir->i_ino).first_prop);
698 if (dirnode != aliases) {
699 for (p = prom_firstprop (n, buffer2);
700 p && *p;
701 p = prom_nextprop (n, p, buffer2)) {
702 j++;
703 if ((len == strlen (p))
704 && !strncmp (p, name, len)) {
705 ino = j;
706 type = OPFSL_PROPERTY;
707 break;
708 }
709 }
710 } else {
711 int k;
712 for (k = 0; k < aliases_nodes; k++) {
713 j++;
714 if (alias_names [k]
715 && (len == strlen (alias_names [k]))
716 && !strncmp (alias_names [k], name, len)) {
717 ino = j;
718 type = OPFSL_PROPERTY;
719 break;
720 }
721 }
722 } 207 }
208 child = child->sibling;
723 } 209 }
724 if (!ino) { 210
725 ino = lookup_children (NODE(dir->i_ino).child, name, len); 211 prop = dp->properties;
726 if (ino) 212 while (prop) {
727 type = OPFSL_DIR; 213 int n = strlen(prop->name);
728 else { 214
729 unlock_kernel(); 215 if (len == n && !strncmp(prop->name, name, len)) {
730 return ERR_PTR(-ENOENT); 216 ent_type = op_inode_prop;
217 ent_data.prop = prop;
218 ino = prop->unique_id;
219 goto found;
731 } 220 }
221
222 prop = prop->next;
732 } 223 }
733 inode = iget (dir->i_sb, ino); 224
734 unlock_kernel(); 225 mutex_unlock(&op_mutex);
226 return ERR_PTR(-ENOENT);
227
228found:
229 inode = iget(dir->i_sb, ino);
230 mutex_unlock(&op_mutex);
735 if (!inode) 231 if (!inode)
736 return ERR_PTR(-EINVAL); 232 return ERR_PTR(-EINVAL);
737 switch (type) { 233 ent_oi = OP_I(inode);
738 case OPFSL_DIR: 234 ent_oi->type = ent_type;
235 ent_oi->u = ent_data;
236
237 switch (ent_type) {
238 case op_inode_node:
739 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 239 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
740 if (ino == OPENPROM_FIRST_INO + aliases) { 240 inode->i_op = &openprom_inode_operations;
741 inode->i_mode |= S_IWUSR;
742 inode->i_op = &openprom_alias_inode_operations;
743 } else
744 inode->i_op = &openprom_inode_operations;
745 inode->i_fop = &openprom_operations; 241 inode->i_fop = &openprom_operations;
746 inode->i_nlink = 2; 242 inode->i_nlink = 2;
747 break; 243 break;
748 case OPFSL_NODENUM: 244 case op_inode_prop:
749 inode->i_mode = S_IFREG | S_IRUGO; 245 if (!strcmp(dp->name, "options") && (len == 17) &&
750 inode->i_fop = &openpromfs_nodenum_ops; 246 !strncmp (name, "security-password", 17))
751 inode->i_nlink = 1;
752 inode->u.generic_ip = (void *)(long)(n);
753 break;
754 case OPFSL_PROPERTY:
755 if ((dirnode == options) && (len == 17)
756 && !strncmp (name, "security-password", 17))
757 inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; 247 inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
758 else { 248 else
759 inode->i_mode = S_IFREG | S_IRUGO; 249 inode->i_mode = S_IFREG | S_IRUGO;
760 if (dirnode == options || dirnode == aliases) {
761 if (len != 4 || strncmp (name, "name", 4))
762 inode->i_mode |= S_IWUSR;
763 }
764 }
765 inode->i_fop = &openpromfs_prop_ops; 250 inode->i_fop = &openpromfs_prop_ops;
766 inode->i_nlink = 1; 251 inode->i_nlink = 1;
767 if (inode->i_size < 0) 252 inode->i_size = ent_oi->u.prop->length;
768 inode->i_size = 0;
769 inode->u.generic_ip = (void *)(long)(((u16)dirnode) |
770 (((u16)(ino - NODEP2INO(NODE(dir->i_ino).first_prop) - 1)) << 16));
771 break; 253 break;
772 } 254 }
773 255
@@ -781,237 +263,89 @@ static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentr
781static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir) 263static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
782{ 264{
783 struct inode *inode = filp->f_dentry->d_inode; 265 struct inode *inode = filp->f_dentry->d_inode;
266 struct op_inode_info *oi = OP_I(inode);
267 struct device_node *dp = oi->u.node;
268 struct device_node *child;
269 struct property *prop;
784 unsigned int ino; 270 unsigned int ino;
785 u32 n; 271 int i;
786 int i, j; 272
787 char buffer[128]; 273 mutex_lock(&op_mutex);
788 u16 node;
789 char *p;
790 char buffer2[64];
791
792 lock_kernel();
793 274
794 ino = inode->i_ino; 275 ino = inode->i_ino;
795 i = filp->f_pos; 276 i = filp->f_pos;
796 switch (i) { 277 switch (i) {
797 case 0: 278 case 0:
798 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) goto out; 279 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
280 goto out;
799 i++; 281 i++;
800 filp->f_pos++; 282 filp->f_pos++;
801 /* fall thru */ 283 /* fall thru */
802 case 1: 284 case 1:
803 if (filldir(dirent, "..", 2, i, 285 if (filldir(dirent, "..", 2, i,
804 (NODE(ino).parent == 0xffff) ? 286 (dp->parent == NULL ?
805 OPENPROM_ROOT_INO : NODE2INO(NODE(ino).parent), DT_DIR) < 0) 287 OPENPROM_ROOT_INO :
288 dp->parent->unique_id), DT_DIR) < 0)
806 goto out; 289 goto out;
807 i++; 290 i++;
808 filp->f_pos++; 291 filp->f_pos++;
809 /* fall thru */ 292 /* fall thru */
810 default: 293 default:
811 i -= 2; 294 i -= 2;
812 node = NODE(ino).child; 295
813 while (i && node != 0xffff) { 296 /* First, the children nodes as directories. */
814 node = nodes[node].next; 297 child = dp->child;
298 while (i && child) {
299 child = child->sibling;
815 i--; 300 i--;
816 } 301 }
817 while (node != 0xffff) { 302 while (child) {
818 if (prom_getname (nodes[node].node, buffer, 128) < 0) 303 if (filldir(dirent,
819 goto out; 304 child->path_component_name,
820 if (filldir(dirent, buffer, strlen(buffer), 305 strlen(child->path_component_name),
821 filp->f_pos, NODE2INO(node), DT_DIR) < 0) 306 filp->f_pos, child->unique_id, DT_DIR) < 0)
822 goto out; 307 goto out;
308
823 filp->f_pos++; 309 filp->f_pos++;
824 node = nodes[node].next; 310 child = child->sibling;
825 } 311 }
826 j = NODEP2INO(NODE(ino).first_prop); 312
827 if (!i) { 313 /* Next, the properties as files. */
828 if (filldir(dirent, ".node", 5, filp->f_pos, j, DT_REG) < 0) 314 prop = dp->properties;
315 while (i && prop) {
316 prop = prop->next;
317 i--;
318 }
319 while (prop) {
320 if (filldir(dirent, prop->name, strlen(prop->name),
321 filp->f_pos, prop->unique_id, DT_REG) < 0)
829 goto out; 322 goto out;
323
830 filp->f_pos++; 324 filp->f_pos++;
831 } else 325 prop = prop->next;
832 i--;
833 n = NODE(ino).node;
834 if (ino == OPENPROM_FIRST_INO + aliases) {
835 for (j++; i < aliases_nodes; i++, j++) {
836 if (alias_names [i]) {
837 if (filldir (dirent, alias_names [i],
838 strlen (alias_names [i]),
839 filp->f_pos, j, DT_REG) < 0) goto out;
840 filp->f_pos++;
841 }
842 }
843 } else {
844 for (p = prom_firstprop (n, buffer2);
845 p && *p;
846 p = prom_nextprop (n, p, buffer2)) {
847 j++;
848 if (i) i--;
849 else {
850 if (filldir(dirent, p, strlen(p),
851 filp->f_pos, j, DT_REG) < 0)
852 goto out;
853 filp->f_pos++;
854 }
855 }
856 } 326 }
857 } 327 }
858out: 328out:
859 unlock_kernel(); 329 mutex_unlock(&op_mutex);
860 return 0;
861}
862
863static int openpromfs_create (struct inode *dir, struct dentry *dentry, int mode,
864 struct nameidata *nd)
865{
866 char *p;
867 struct inode *inode;
868
869 if (!dir)
870 return -ENOENT;
871 if (dentry->d_name.len > 256)
872 return -EINVAL;
873 p = kmalloc (dentry->d_name.len + 1, GFP_KERNEL);
874 if (!p)
875 return -ENOMEM;
876 strncpy (p, dentry->d_name.name, dentry->d_name.len);
877 p [dentry->d_name.len] = 0;
878 lock_kernel();
879 if (aliases_nodes == ALIASES_NNODES) {
880 kfree(p);
881 unlock_kernel();
882 return -EIO;
883 }
884 alias_names [aliases_nodes++] = p;
885 inode = iget (dir->i_sb,
886 NODEP2INO(NODE(dir->i_ino).first_prop) + aliases_nodes);
887 if (!inode) {
888 unlock_kernel();
889 return -EINVAL;
890 }
891 inode->i_mode = S_IFREG | S_IRUGO | S_IWUSR;
892 inode->i_fop = &openpromfs_prop_ops;
893 inode->i_nlink = 1;
894 if (inode->i_size < 0) inode->i_size = 0;
895 inode->u.generic_ip = (void *)(long)(((u16)aliases) |
896 (((u16)(aliases_nodes - 1)) << 16));
897 unlock_kernel();
898 d_instantiate(dentry, inode);
899 return 0; 330 return 0;
900} 331}
901 332
902static int openpromfs_unlink (struct inode *dir, struct dentry *dentry) 333static kmem_cache_t *op_inode_cachep;
903{
904 unsigned int len;
905 char *p;
906 const char *name;
907 int i;
908
909 name = dentry->d_name.name;
910 len = dentry->d_name.len;
911 lock_kernel();
912 for (i = 0; i < aliases_nodes; i++)
913 if ((strlen (alias_names [i]) == len)
914 && !strncmp (name, alias_names[i], len)) {
915 char buffer[512];
916
917 p = alias_names [i];
918 alias_names [i] = NULL;
919 kfree (p);
920 strcpy (buffer, "nvunalias ");
921 memcpy (buffer + 10, name, len);
922 buffer [10 + len] = 0;
923 prom_feval (buffer);
924 }
925 unlock_kernel();
926 return 0;
927}
928 334
929/* {{{ init section */ 335static struct inode *openprom_alloc_inode(struct super_block *sb)
930static int __init check_space (u16 n)
931{ 336{
932 unsigned long pages; 337 struct op_inode_info *oi;
933 338
934 if ((1 << alloced) * PAGE_SIZE < (n + 2) * sizeof(openpromfs_node)) { 339 oi = kmem_cache_alloc(op_inode_cachep, SLAB_KERNEL);
935 pages = __get_free_pages (GFP_KERNEL, alloced + 1); 340 if (!oi)
936 if (!pages) 341 return NULL;
937 return -1;
938 342
939 if (nodes) { 343 return &oi->vfs_inode;
940 memcpy ((char *)pages, nodes,
941 (1 << alloced) * PAGE_SIZE);
942 free_pages ((unsigned long)nodes, alloced);
943 }
944 alloced++;
945 nodes = (openpromfs_node *)pages;
946 }
947 return 0;
948} 344}
949 345
950static u16 __init get_nodes (u16 parent, u32 node) 346static void openprom_destroy_inode(struct inode *inode)
951{ 347{
952 char *p; 348 kmem_cache_free(op_inode_cachep, OP_I(inode));
953 u16 n = last_node++, i;
954 char buffer[64];
955
956 if (check_space (n) < 0)
957 return 0xffff;
958 nodes[n].parent = parent;
959 nodes[n].node = node;
960 nodes[n].next = 0xffff;
961 nodes[n].child = 0xffff;
962 nodes[n].first_prop = first_prop++;
963 if (!parent) {
964 char buffer[8];
965 int j;
966
967 if ((j = prom_getproperty (node, "name", buffer, 8)) >= 0) {
968 buffer[j] = 0;
969 if (!strcmp (buffer, "options"))
970 options = n;
971 else if (!strcmp (buffer, "aliases"))
972 aliases = n;
973 }
974 }
975 if (n != aliases)
976 for (p = prom_firstprop (node, buffer);
977 p && p != (char *)-1 && *p;
978 p = prom_nextprop (node, p, buffer))
979 first_prop++;
980 else {
981 char *q;
982 for (p = prom_firstprop (node, buffer);
983 p && p != (char *)-1 && *p;
984 p = prom_nextprop (node, p, buffer)) {
985 if (aliases_nodes == ALIASES_NNODES)
986 break;
987 for (i = 0; i < aliases_nodes; i++)
988 if (!strcmp (p, alias_names [i]))
989 break;
990 if (i < aliases_nodes)
991 continue;
992 q = kmalloc (strlen (p) + 1, GFP_KERNEL);
993 if (!q)
994 return 0xffff;
995 strcpy (q, p);
996 alias_names [aliases_nodes++] = q;
997 }
998 first_prop += ALIASES_NNODES;
999 }
1000 node = prom_getchild (node);
1001 if (node) {
1002 parent = get_nodes (n, node);
1003 if (parent == 0xffff)
1004 return 0xffff;
1005 nodes[n].child = parent;
1006 while ((node = prom_getsibling (node)) != 0) {
1007 i = get_nodes (n, node);
1008 if (i == 0xffff)
1009 return 0xffff;
1010 nodes[parent].next = i;
1011 parent = i;
1012 }
1013 }
1014 return n;
1015} 349}
1016 350
1017static void openprom_read_inode(struct inode * inode) 351static void openprom_read_inode(struct inode * inode)
@@ -1031,6 +365,8 @@ static int openprom_remount(struct super_block *sb, int *flags, char *data)
1031} 365}
1032 366
1033static struct super_operations openprom_sops = { 367static struct super_operations openprom_sops = {
368 .alloc_inode = openprom_alloc_inode,
369 .destroy_inode = openprom_destroy_inode,
1034 .read_inode = openprom_read_inode, 370 .read_inode = openprom_read_inode,
1035 .statfs = simple_statfs, 371 .statfs = simple_statfs,
1036 .remount_fs = openprom_remount, 372 .remount_fs = openprom_remount,
@@ -1038,7 +374,8 @@ static struct super_operations openprom_sops = {
1038 374
1039static int openprom_fill_super(struct super_block *s, void *data, int silent) 375static int openprom_fill_super(struct super_block *s, void *data, int silent)
1040{ 376{
1041 struct inode * root_inode; 377 struct inode *root_inode;
378 struct op_inode_info *oi;
1042 379
1043 s->s_flags |= MS_NOATIME; 380 s->s_flags |= MS_NOATIME;
1044 s->s_blocksize = 1024; 381 s->s_blocksize = 1024;
@@ -1049,6 +386,11 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent)
1049 root_inode = iget(s, OPENPROM_ROOT_INO); 386 root_inode = iget(s, OPENPROM_ROOT_INO);
1050 if (!root_inode) 387 if (!root_inode)
1051 goto out_no_root; 388 goto out_no_root;
389
390 oi = OP_I(root_inode);
391 oi->type = op_inode_node;
392 oi->u.node = of_find_node_by_path("/");
393
1052 s->s_root = d_alloc_root(root_inode); 394 s->s_root = d_alloc_root(root_inode);
1053 if (!s->s_root) 395 if (!s->s_root)
1054 goto out_no_root; 396 goto out_no_root;
@@ -1073,29 +415,39 @@ static struct file_system_type openprom_fs_type = {
1073 .kill_sb = kill_anon_super, 415 .kill_sb = kill_anon_super,
1074}; 416};
1075 417
418static void op_inode_init_once(void *data, kmem_cache_t * cachep, unsigned long flags)
419{
420 struct op_inode_info *oi = (struct op_inode_info *) data;
421
422 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
423 SLAB_CTOR_CONSTRUCTOR)
424 inode_init_once(&oi->vfs_inode);
425}
426
1076static int __init init_openprom_fs(void) 427static int __init init_openprom_fs(void)
1077{ 428{
1078 nodes = (openpromfs_node *)__get_free_pages(GFP_KERNEL, 0); 429 int err;
1079 if (!nodes) { 430
1080 printk (KERN_WARNING "openpromfs: can't get free page\n"); 431 op_inode_cachep = kmem_cache_create("op_inode_cache",
1081 return -EIO; 432 sizeof(struct op_inode_info),
1082 } 433 0,
1083 if (get_nodes (0xffff, prom_root_node) == 0xffff) { 434 (SLAB_RECLAIM_ACCOUNT |
1084 printk (KERN_WARNING "openpromfs: couldn't setup tree\n"); 435 SLAB_MEM_SPREAD),
1085 return -EIO; 436 op_inode_init_once, NULL);
1086 } 437 if (!op_inode_cachep)
1087 nodes[last_node].first_prop = first_prop; 438 return -ENOMEM;
1088 return register_filesystem(&openprom_fs_type); 439
440 err = register_filesystem(&openprom_fs_type);
441 if (err)
442 kmem_cache_destroy(op_inode_cachep);
443
444 return err;
1089} 445}
1090 446
1091static void __exit exit_openprom_fs(void) 447static void __exit exit_openprom_fs(void)
1092{ 448{
1093 int i;
1094 unregister_filesystem(&openprom_fs_type); 449 unregister_filesystem(&openprom_fs_type);
1095 free_pages ((unsigned long)nodes, alloced); 450 kmem_cache_destroy(op_inode_cachep);
1096 for (i = 0; i < aliases_nodes; i++)
1097 kfree (alias_names [i]);
1098 nodes = NULL;
1099} 451}
1100 452
1101module_init(init_openprom_fs) 453module_init(init_openprom_fs)
diff --git a/fs/pnode.c b/fs/pnode.c
index 37b568ed0e05..da42ee61c1df 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -53,8 +53,7 @@ static int do_make_slave(struct vfsmount *mnt)
53 if (master) { 53 if (master) {
54 list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) 54 list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
55 slave_mnt->mnt_master = master; 55 slave_mnt->mnt_master = master;
56 list_del(&mnt->mnt_slave); 56 list_move(&mnt->mnt_slave, &master->mnt_slave_list);
57 list_add(&mnt->mnt_slave, &master->mnt_slave_list);
58 list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); 57 list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
59 INIT_LIST_HEAD(&mnt->mnt_slave_list); 58 INIT_LIST_HEAD(&mnt->mnt_slave_list);
60 } else { 59 } else {
@@ -283,10 +282,8 @@ static void __propagate_umount(struct vfsmount *mnt)
283 * umount the child only if the child has no 282 * umount the child only if the child has no
284 * other children 283 * other children
285 */ 284 */
286 if (child && list_empty(&child->mnt_mounts)) { 285 if (child && list_empty(&child->mnt_mounts))
287 list_del(&child->mnt_hash); 286 list_move_tail(&child->mnt_hash, &mnt->mnt_hash);
288 list_add_tail(&child->mnt_hash, &mnt->mnt_hash);
289 }
290 } 287 }
291} 288}
292 289
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6afff725a8c9..6ba7785319de 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -74,6 +74,16 @@
74#include <linux/poll.h> 74#include <linux/poll.h>
75#include "internal.h" 75#include "internal.h"
76 76
77/* NOTE:
78 * Implementing inode permission operations in /proc is almost
79 * certainly an error. Permission checks need to happen during
80 * each system call not at open time. The reason is that most of
81 * what we wish to check for permissions in /proc varies at runtime.
82 *
83 * The classic example of a problem is opening file descriptors
84 * in /proc for a task before it execs a suid executable.
85 */
86
77/* 87/*
78 * For hysterical raisins we keep the same inumbers as in the old procfs. 88 * For hysterical raisins we keep the same inumbers as in the old procfs.
79 * Feel free to change the macro below - just keep the range distinct from 89 * Feel free to change the macro below - just keep the range distinct from
@@ -121,6 +131,8 @@ enum pid_directory_inos {
121 PROC_TGID_ATTR_PREV, 131 PROC_TGID_ATTR_PREV,
122 PROC_TGID_ATTR_EXEC, 132 PROC_TGID_ATTR_EXEC,
123 PROC_TGID_ATTR_FSCREATE, 133 PROC_TGID_ATTR_FSCREATE,
134 PROC_TGID_ATTR_KEYCREATE,
135 PROC_TGID_ATTR_SOCKCREATE,
124#endif 136#endif
125#ifdef CONFIG_AUDITSYSCALL 137#ifdef CONFIG_AUDITSYSCALL
126 PROC_TGID_LOGINUID, 138 PROC_TGID_LOGINUID,
@@ -162,6 +174,8 @@ enum pid_directory_inos {
162 PROC_TID_ATTR_PREV, 174 PROC_TID_ATTR_PREV,
163 PROC_TID_ATTR_EXEC, 175 PROC_TID_ATTR_EXEC,
164 PROC_TID_ATTR_FSCREATE, 176 PROC_TID_ATTR_FSCREATE,
177 PROC_TID_ATTR_KEYCREATE,
178 PROC_TID_ATTR_SOCKCREATE,
165#endif 179#endif
166#ifdef CONFIG_AUDITSYSCALL 180#ifdef CONFIG_AUDITSYSCALL
167 PROC_TID_LOGINUID, 181 PROC_TID_LOGINUID,
@@ -173,6 +187,9 @@ enum pid_directory_inos {
173 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ 187 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
174}; 188};
175 189
190/* Worst case buffer size needed for holding an integer. */
191#define PROC_NUMBUF 10
192
176struct pid_entry { 193struct pid_entry {
177 int type; 194 int type;
178 int len; 195 int len;
@@ -275,6 +292,8 @@ static struct pid_entry tgid_attr_stuff[] = {
275 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 292 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
276 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 293 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
277 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 294 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
295 E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
296 E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
278 {0,0,NULL,0} 297 {0,0,NULL,0}
279}; 298};
280static struct pid_entry tid_attr_stuff[] = { 299static struct pid_entry tid_attr_stuff[] = {
@@ -282,6 +301,8 @@ static struct pid_entry tid_attr_stuff[] = {
282 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 301 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
283 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 302 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
284 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 303 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
304 E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
305 E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
285 {0,0,NULL,0} 306 {0,0,NULL,0}
286}; 307};
287#endif 308#endif
@@ -290,12 +311,15 @@ static struct pid_entry tid_attr_stuff[] = {
290 311
291static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 312static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
292{ 313{
293 struct task_struct *task = proc_task(inode); 314 struct task_struct *task = get_proc_task(inode);
294 struct files_struct *files; 315 struct files_struct *files = NULL;
295 struct file *file; 316 struct file *file;
296 int fd = proc_type(inode) - PROC_TID_FD_DIR; 317 int fd = proc_fd(inode);
297 318
298 files = get_files_struct(task); 319 if (task) {
320 files = get_files_struct(task);
321 put_task_struct(task);
322 }
299 if (files) { 323 if (files) {
300 /* 324 /*
301 * We are not taking a ref to the file structure, so we must 325 * We are not taking a ref to the file structure, so we must
@@ -327,29 +351,33 @@ static struct fs_struct *get_fs_struct(struct task_struct *task)
327 return fs; 351 return fs;
328} 352}
329 353
330static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 354static int get_nr_threads(struct task_struct *tsk)
331{ 355{
332 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 356 /* Must be called with the rcu_read_lock held */
333 int result = -ENOENT; 357 unsigned long flags;
334 if (fs) { 358 int count = 0;
335 read_lock(&fs->lock); 359
336 *mnt = mntget(fs->pwdmnt); 360 if (lock_task_sighand(tsk, &flags)) {
337 *dentry = dget(fs->pwd); 361 count = atomic_read(&tsk->signal->count);
338 read_unlock(&fs->lock); 362 unlock_task_sighand(tsk, &flags);
339 result = 0;
340 put_fs_struct(fs);
341 } 363 }
342 return result; 364 return count;
343} 365}
344 366
345static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 367static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
346{ 368{
347 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 369 struct task_struct *task = get_proc_task(inode);
370 struct fs_struct *fs = NULL;
348 int result = -ENOENT; 371 int result = -ENOENT;
372
373 if (task) {
374 fs = get_fs_struct(task);
375 put_task_struct(task);
376 }
349 if (fs) { 377 if (fs) {
350 read_lock(&fs->lock); 378 read_lock(&fs->lock);
351 *mnt = mntget(fs->rootmnt); 379 *mnt = mntget(fs->pwdmnt);
352 *dentry = dget(fs->root); 380 *dentry = dget(fs->pwd);
353 read_unlock(&fs->lock); 381 read_unlock(&fs->lock);
354 result = 0; 382 result = 0;
355 put_fs_struct(fs); 383 put_fs_struct(fs);
@@ -357,42 +385,16 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
357 return result; 385 return result;
358} 386}
359 387
360 388static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
361/* Same as proc_root_link, but this addionally tries to get fs from other
362 * threads in the group */
363static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
364 struct vfsmount **mnt)
365{ 389{
366 struct fs_struct *fs; 390 struct task_struct *task = get_proc_task(inode);
391 struct fs_struct *fs = NULL;
367 int result = -ENOENT; 392 int result = -ENOENT;
368 struct task_struct *leader = proc_task(inode);
369 393
370 task_lock(leader); 394 if (task) {
371 fs = leader->fs; 395 fs = get_fs_struct(task);
372 if (fs) { 396 put_task_struct(task);
373 atomic_inc(&fs->count);
374 task_unlock(leader);
375 } else {
376 /* Try to get fs from other threads */
377 task_unlock(leader);
378 read_lock(&tasklist_lock);
379 if (pid_alive(leader)) {
380 struct task_struct *task = leader;
381
382 while ((task = next_thread(task)) != leader) {
383 task_lock(task);
384 fs = task->fs;
385 if (fs) {
386 atomic_inc(&fs->count);
387 task_unlock(task);
388 break;
389 }
390 task_unlock(task);
391 }
392 }
393 read_unlock(&tasklist_lock);
394 } 397 }
395
396 if (fs) { 398 if (fs) {
397 read_lock(&fs->lock); 399 read_lock(&fs->lock);
398 *mnt = mntget(fs->rootmnt); 400 *mnt = mntget(fs->rootmnt);
@@ -404,7 +406,6 @@ static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
404 return result; 406 return result;
405} 407}
406 408
407
408#define MAY_PTRACE(task) \ 409#define MAY_PTRACE(task) \
409 (task == current || \ 410 (task == current || \
410 (task->parent == current && \ 411 (task->parent == current && \
@@ -535,142 +536,22 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
535/************************************************************************/ 536/************************************************************************/
536 537
537/* permission checks */ 538/* permission checks */
538 539static int proc_fd_access_allowed(struct inode *inode)
539/* If the process being read is separated by chroot from the reading process,
540 * don't let the reader access the threads.
541 *
542 * note: this does dput(root) and mntput(vfsmnt) on exit.
543 */
544static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt)
545{
546 struct dentry *de, *base;
547 struct vfsmount *our_vfsmnt, *mnt;
548 int res = 0;
549
550 read_lock(&current->fs->lock);
551 our_vfsmnt = mntget(current->fs->rootmnt);
552 base = dget(current->fs->root);
553 read_unlock(&current->fs->lock);
554
555 spin_lock(&vfsmount_lock);
556 de = root;
557 mnt = vfsmnt;
558
559 while (mnt != our_vfsmnt) {
560 if (mnt == mnt->mnt_parent)
561 goto out;
562 de = mnt->mnt_mountpoint;
563 mnt = mnt->mnt_parent;
564 }
565
566 if (!is_subdir(de, base))
567 goto out;
568 spin_unlock(&vfsmount_lock);
569
570exit:
571 dput(base);
572 mntput(our_vfsmnt);
573 dput(root);
574 mntput(vfsmnt);
575 return res;
576out:
577 spin_unlock(&vfsmount_lock);
578 res = -EACCES;
579 goto exit;
580}
581
582static int proc_check_root(struct inode *inode)
583{
584 struct dentry *root;
585 struct vfsmount *vfsmnt;
586
587 if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
588 return -ENOENT;
589 return proc_check_chroot(root, vfsmnt);
590}
591
592static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
593{
594 if (generic_permission(inode, mask, NULL) != 0)
595 return -EACCES;
596 return proc_check_root(inode);
597}
598
599static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
600{
601 struct dentry *root;
602 struct vfsmount *vfsmnt;
603
604 if (generic_permission(inode, mask, NULL) != 0)
605 return -EACCES;
606
607 if (proc_task_root_link(inode, &root, &vfsmnt))
608 return -ENOENT;
609
610 return proc_check_chroot(root, vfsmnt);
611}
612
613extern struct seq_operations proc_pid_maps_op;
614static int maps_open(struct inode *inode, struct file *file)
615{
616 struct task_struct *task = proc_task(inode);
617 int ret = seq_open(file, &proc_pid_maps_op);
618 if (!ret) {
619 struct seq_file *m = file->private_data;
620 m->private = task;
621 }
622 return ret;
623}
624
625static struct file_operations proc_maps_operations = {
626 .open = maps_open,
627 .read = seq_read,
628 .llseek = seq_lseek,
629 .release = seq_release,
630};
631
632#ifdef CONFIG_NUMA
633extern struct seq_operations proc_pid_numa_maps_op;
634static int numa_maps_open(struct inode *inode, struct file *file)
635{
636 struct task_struct *task = proc_task(inode);
637 int ret = seq_open(file, &proc_pid_numa_maps_op);
638 if (!ret) {
639 struct seq_file *m = file->private_data;
640 m->private = task;
641 }
642 return ret;
643}
644
645static struct file_operations proc_numa_maps_operations = {
646 .open = numa_maps_open,
647 .read = seq_read,
648 .llseek = seq_lseek,
649 .release = seq_release,
650};
651#endif
652
653#ifdef CONFIG_MMU
654extern struct seq_operations proc_pid_smaps_op;
655static int smaps_open(struct inode *inode, struct file *file)
656{ 540{
657 struct task_struct *task = proc_task(inode); 541 struct task_struct *task;
658 int ret = seq_open(file, &proc_pid_smaps_op); 542 int allowed = 0;
659 if (!ret) { 543 /* Allow access to a task's file descriptors if it is us or we
660 struct seq_file *m = file->private_data; 544 * may use ptrace attach to the process and find out that
661 m->private = task; 545 * information.
546 */
547 task = get_proc_task(inode);
548 if (task) {
549 allowed = ptrace_may_attach(task);
550 put_task_struct(task);
662 } 551 }
663 return ret; 552 return allowed;
664} 553}
665 554
666static struct file_operations proc_smaps_operations = {
667 .open = smaps_open,
668 .read = seq_read,
669 .llseek = seq_lseek,
670 .release = seq_release,
671};
672#endif
673
674extern struct seq_operations mounts_op; 555extern struct seq_operations mounts_op;
675struct proc_mounts { 556struct proc_mounts {
676 struct seq_file m; 557 struct seq_file m;
@@ -679,16 +560,19 @@ struct proc_mounts {
679 560
680static int mounts_open(struct inode *inode, struct file *file) 561static int mounts_open(struct inode *inode, struct file *file)
681{ 562{
682 struct task_struct *task = proc_task(inode); 563 struct task_struct *task = get_proc_task(inode);
683 struct namespace *namespace; 564 struct namespace *namespace = NULL;
684 struct proc_mounts *p; 565 struct proc_mounts *p;
685 int ret = -EINVAL; 566 int ret = -EINVAL;
686 567
687 task_lock(task); 568 if (task) {
688 namespace = task->namespace; 569 task_lock(task);
689 if (namespace) 570 namespace = task->namespace;
690 get_namespace(namespace); 571 if (namespace)
691 task_unlock(task); 572 get_namespace(namespace);
573 task_unlock(task);
574 put_task_struct(task);
575 }
692 576
693 if (namespace) { 577 if (namespace) {
694 ret = -ENOMEM; 578 ret = -ENOMEM;
@@ -745,17 +629,21 @@ static struct file_operations proc_mounts_operations = {
745extern struct seq_operations mountstats_op; 629extern struct seq_operations mountstats_op;
746static int mountstats_open(struct inode *inode, struct file *file) 630static int mountstats_open(struct inode *inode, struct file *file)
747{ 631{
748 struct task_struct *task = proc_task(inode);
749 int ret = seq_open(file, &mountstats_op); 632 int ret = seq_open(file, &mountstats_op);
750 633
751 if (!ret) { 634 if (!ret) {
752 struct seq_file *m = file->private_data; 635 struct seq_file *m = file->private_data;
753 struct namespace *namespace; 636 struct namespace *namespace = NULL;
754 task_lock(task); 637 struct task_struct *task = get_proc_task(inode);
755 namespace = task->namespace; 638
756 if (namespace) 639 if (task) {
757 get_namespace(namespace); 640 task_lock(task);
758 task_unlock(task); 641 namespace = task->namespace;
642 if (namespace)
643 get_namespace(namespace);
644 task_unlock(task);
645 put_task_struct(task);
646 }
759 647
760 if (namespace) 648 if (namespace)
761 m->private = namespace; 649 m->private = namespace;
@@ -782,18 +670,27 @@ static ssize_t proc_info_read(struct file * file, char __user * buf,
782 struct inode * inode = file->f_dentry->d_inode; 670 struct inode * inode = file->f_dentry->d_inode;
783 unsigned long page; 671 unsigned long page;
784 ssize_t length; 672 ssize_t length;
785 struct task_struct *task = proc_task(inode); 673 struct task_struct *task = get_proc_task(inode);
674
675 length = -ESRCH;
676 if (!task)
677 goto out_no_task;
786 678
787 if (count > PROC_BLOCK_SIZE) 679 if (count > PROC_BLOCK_SIZE)
788 count = PROC_BLOCK_SIZE; 680 count = PROC_BLOCK_SIZE;
681
682 length = -ENOMEM;
789 if (!(page = __get_free_page(GFP_KERNEL))) 683 if (!(page = __get_free_page(GFP_KERNEL)))
790 return -ENOMEM; 684 goto out;
791 685
792 length = PROC_I(inode)->op.proc_read(task, (char*)page); 686 length = PROC_I(inode)->op.proc_read(task, (char*)page);
793 687
794 if (length >= 0) 688 if (length >= 0)
795 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 689 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
796 free_page(page); 690 free_page(page);
691out:
692 put_task_struct(task);
693out_no_task:
797 return length; 694 return length;
798} 695}
799 696
@@ -810,12 +707,15 @@ static int mem_open(struct inode* inode, struct file* file)
810static ssize_t mem_read(struct file * file, char __user * buf, 707static ssize_t mem_read(struct file * file, char __user * buf,
811 size_t count, loff_t *ppos) 708 size_t count, loff_t *ppos)
812{ 709{
813 struct task_struct *task = proc_task(file->f_dentry->d_inode); 710 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
814 char *page; 711 char *page;
815 unsigned long src = *ppos; 712 unsigned long src = *ppos;
816 int ret = -ESRCH; 713 int ret = -ESRCH;
817 struct mm_struct *mm; 714 struct mm_struct *mm;
818 715
716 if (!task)
717 goto out_no_task;
718
819 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 719 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
820 goto out; 720 goto out;
821 721
@@ -865,6 +765,8 @@ out_put:
865out_free: 765out_free:
866 free_page((unsigned long) page); 766 free_page((unsigned long) page);
867out: 767out:
768 put_task_struct(task);
769out_no_task:
868 return ret; 770 return ret;
869} 771}
870 772
@@ -877,15 +779,20 @@ static ssize_t mem_write(struct file * file, const char * buf,
877{ 779{
878 int copied = 0; 780 int copied = 0;
879 char *page; 781 char *page;
880 struct task_struct *task = proc_task(file->f_dentry->d_inode); 782 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
881 unsigned long dst = *ppos; 783 unsigned long dst = *ppos;
882 784
785 copied = -ESRCH;
786 if (!task)
787 goto out_no_task;
788
883 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 789 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
884 return -ESRCH; 790 goto out;
885 791
792 copied = -ENOMEM;
886 page = (char *)__get_free_page(GFP_USER); 793 page = (char *)__get_free_page(GFP_USER);
887 if (!page) 794 if (!page)
888 return -ENOMEM; 795 goto out;
889 796
890 while (count > 0) { 797 while (count > 0) {
891 int this_len, retval; 798 int this_len, retval;
@@ -908,6 +815,9 @@ static ssize_t mem_write(struct file * file, const char * buf,
908 } 815 }
909 *ppos = dst; 816 *ppos = dst;
910 free_page((unsigned long) page); 817 free_page((unsigned long) page);
818out:
819 put_task_struct(task);
820out_no_task:
911 return copied; 821 return copied;
912} 822}
913#endif 823#endif
@@ -938,13 +848,18 @@ static struct file_operations proc_mem_operations = {
938static ssize_t oom_adjust_read(struct file *file, char __user *buf, 848static ssize_t oom_adjust_read(struct file *file, char __user *buf,
939 size_t count, loff_t *ppos) 849 size_t count, loff_t *ppos)
940{ 850{
941 struct task_struct *task = proc_task(file->f_dentry->d_inode); 851 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
942 char buffer[8]; 852 char buffer[PROC_NUMBUF];
943 size_t len; 853 size_t len;
944 int oom_adjust = task->oomkilladj; 854 int oom_adjust;
945 loff_t __ppos = *ppos; 855 loff_t __ppos = *ppos;
946 856
947 len = sprintf(buffer, "%i\n", oom_adjust); 857 if (!task)
858 return -ESRCH;
859 oom_adjust = task->oomkilladj;
860 put_task_struct(task);
861
862 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
948 if (__ppos >= len) 863 if (__ppos >= len)
949 return 0; 864 return 0;
950 if (count > len-__ppos) 865 if (count > len-__ppos)
@@ -958,15 +873,15 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
958static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 873static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
959 size_t count, loff_t *ppos) 874 size_t count, loff_t *ppos)
960{ 875{
961 struct task_struct *task = proc_task(file->f_dentry->d_inode); 876 struct task_struct *task;
962 char buffer[8], *end; 877 char buffer[PROC_NUMBUF], *end;
963 int oom_adjust; 878 int oom_adjust;
964 879
965 if (!capable(CAP_SYS_RESOURCE)) 880 if (!capable(CAP_SYS_RESOURCE))
966 return -EPERM; 881 return -EPERM;
967 memset(buffer, 0, 8); 882 memset(buffer, 0, sizeof(buffer));
968 if (count > 6) 883 if (count > sizeof(buffer) - 1)
969 count = 6; 884 count = sizeof(buffer) - 1;
970 if (copy_from_user(buffer, buf, count)) 885 if (copy_from_user(buffer, buf, count))
971 return -EFAULT; 886 return -EFAULT;
972 oom_adjust = simple_strtol(buffer, &end, 0); 887 oom_adjust = simple_strtol(buffer, &end, 0);
@@ -974,7 +889,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
974 return -EINVAL; 889 return -EINVAL;
975 if (*end == '\n') 890 if (*end == '\n')
976 end++; 891 end++;
892 task = get_proc_task(file->f_dentry->d_inode);
893 if (!task)
894 return -ESRCH;
977 task->oomkilladj = oom_adjust; 895 task->oomkilladj = oom_adjust;
896 put_task_struct(task);
978 if (end - buffer == 0) 897 if (end - buffer == 0)
979 return -EIO; 898 return -EIO;
980 return end - buffer; 899 return end - buffer;
@@ -985,22 +904,21 @@ static struct file_operations proc_oom_adjust_operations = {
985 .write = oom_adjust_write, 904 .write = oom_adjust_write,
986}; 905};
987 906
988static struct inode_operations proc_mem_inode_operations = {
989 .permission = proc_permission,
990};
991
992#ifdef CONFIG_AUDITSYSCALL 907#ifdef CONFIG_AUDITSYSCALL
993#define TMPBUFLEN 21 908#define TMPBUFLEN 21
994static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 909static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
995 size_t count, loff_t *ppos) 910 size_t count, loff_t *ppos)
996{ 911{
997 struct inode * inode = file->f_dentry->d_inode; 912 struct inode * inode = file->f_dentry->d_inode;
998 struct task_struct *task = proc_task(inode); 913 struct task_struct *task = get_proc_task(inode);
999 ssize_t length; 914 ssize_t length;
1000 char tmpbuf[TMPBUFLEN]; 915 char tmpbuf[TMPBUFLEN];
1001 916
917 if (!task)
918 return -ESRCH;
1002 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 919 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1003 audit_get_loginuid(task->audit_context)); 920 audit_get_loginuid(task->audit_context));
921 put_task_struct(task);
1004 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 922 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1005} 923}
1006 924
@@ -1010,13 +928,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1010 struct inode * inode = file->f_dentry->d_inode; 928 struct inode * inode = file->f_dentry->d_inode;
1011 char *page, *tmp; 929 char *page, *tmp;
1012 ssize_t length; 930 ssize_t length;
1013 struct task_struct *task = proc_task(inode);
1014 uid_t loginuid; 931 uid_t loginuid;
1015 932
1016 if (!capable(CAP_AUDIT_CONTROL)) 933 if (!capable(CAP_AUDIT_CONTROL))
1017 return -EPERM; 934 return -EPERM;
1018 935
1019 if (current != task) 936 if (current != pid_task(proc_pid(inode), PIDTYPE_PID))
1020 return -EPERM; 937 return -EPERM;
1021 938
1022 if (count >= PAGE_SIZE) 939 if (count >= PAGE_SIZE)
@@ -1040,7 +957,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1040 goto out_free_page; 957 goto out_free_page;
1041 958
1042 } 959 }
1043 length = audit_set_loginuid(task, loginuid); 960 length = audit_set_loginuid(current, loginuid);
1044 if (likely(length == 0)) 961 if (likely(length == 0))
1045 length = count; 962 length = count;
1046 963
@@ -1059,13 +976,16 @@ static struct file_operations proc_loginuid_operations = {
1059static ssize_t seccomp_read(struct file *file, char __user *buf, 976static ssize_t seccomp_read(struct file *file, char __user *buf,
1060 size_t count, loff_t *ppos) 977 size_t count, loff_t *ppos)
1061{ 978{
1062 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 979 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
1063 char __buf[20]; 980 char __buf[20];
1064 loff_t __ppos = *ppos; 981 loff_t __ppos = *ppos;
1065 size_t len; 982 size_t len;
1066 983
984 if (!tsk)
985 return -ESRCH;
1067 /* no need to print the trailing zero, so use only len */ 986 /* no need to print the trailing zero, so use only len */
1068 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 987 len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
988 put_task_struct(tsk);
1069 if (__ppos >= len) 989 if (__ppos >= len)
1070 return 0; 990 return 0;
1071 if (count > len - __ppos) 991 if (count > len - __ppos)
@@ -1079,29 +999,43 @@ static ssize_t seccomp_read(struct file *file, char __user *buf,
1079static ssize_t seccomp_write(struct file *file, const char __user *buf, 999static ssize_t seccomp_write(struct file *file, const char __user *buf,
1080 size_t count, loff_t *ppos) 1000 size_t count, loff_t *ppos)
1081{ 1001{
1082 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 1002 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
1083 char __buf[20], *end; 1003 char __buf[20], *end;
1084 unsigned int seccomp_mode; 1004 unsigned int seccomp_mode;
1005 ssize_t result;
1006
1007 result = -ESRCH;
1008 if (!tsk)
1009 goto out_no_task;
1085 1010
1086 /* can set it only once to be even more secure */ 1011 /* can set it only once to be even more secure */
1012 result = -EPERM;
1087 if (unlikely(tsk->seccomp.mode)) 1013 if (unlikely(tsk->seccomp.mode))
1088 return -EPERM; 1014 goto out;
1089 1015
1016 result = -EFAULT;
1090 memset(__buf, 0, sizeof(__buf)); 1017 memset(__buf, 0, sizeof(__buf));
1091 count = min(count, sizeof(__buf) - 1); 1018 count = min(count, sizeof(__buf) - 1);
1092 if (copy_from_user(__buf, buf, count)) 1019 if (copy_from_user(__buf, buf, count))
1093 return -EFAULT; 1020 goto out;
1021
1094 seccomp_mode = simple_strtoul(__buf, &end, 0); 1022 seccomp_mode = simple_strtoul(__buf, &end, 0);
1095 if (*end == '\n') 1023 if (*end == '\n')
1096 end++; 1024 end++;
1025 result = -EINVAL;
1097 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 1026 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
1098 tsk->seccomp.mode = seccomp_mode; 1027 tsk->seccomp.mode = seccomp_mode;
1099 set_tsk_thread_flag(tsk, TIF_SECCOMP); 1028 set_tsk_thread_flag(tsk, TIF_SECCOMP);
1100 } else 1029 } else
1101 return -EINVAL; 1030 goto out;
1031 result = -EIO;
1102 if (unlikely(!(end - __buf))) 1032 if (unlikely(!(end - __buf)))
1103 return -EIO; 1033 goto out;
1104 return end - __buf; 1034 result = end - __buf;
1035out:
1036 put_task_struct(tsk);
1037out_no_task:
1038 return result;
1105} 1039}
1106 1040
1107static struct file_operations proc_seccomp_operations = { 1041static struct file_operations proc_seccomp_operations = {
@@ -1118,10 +1052,8 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1118 /* We don't need a base pointer in the /proc filesystem */ 1052 /* We don't need a base pointer in the /proc filesystem */
1119 path_release(nd); 1053 path_release(nd);
1120 1054
1121 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 1055 /* Are we allowed to snoop on the tasks file descriptors? */
1122 goto out; 1056 if (!proc_fd_access_allowed(inode))
1123 error = proc_check_root(inode);
1124 if (error)
1125 goto out; 1057 goto out;
1126 1058
1127 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); 1059 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
@@ -1163,12 +1095,8 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
1163 struct dentry *de; 1095 struct dentry *de;
1164 struct vfsmount *mnt = NULL; 1096 struct vfsmount *mnt = NULL;
1165 1097
1166 lock_kernel(); 1098 /* Are we allowed to snoop on the tasks file descriptors? */
1167 1099 if (!proc_fd_access_allowed(inode))
1168 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
1169 goto out;
1170 error = proc_check_root(inode);
1171 if (error)
1172 goto out; 1100 goto out;
1173 1101
1174 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); 1102 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
@@ -1179,7 +1107,6 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
1179 dput(de); 1107 dput(de);
1180 mntput(mnt); 1108 mntput(mnt);
1181out: 1109out:
1182 unlock_kernel();
1183 return error; 1110 return error;
1184} 1111}
1185 1112
@@ -1188,21 +1115,20 @@ static struct inode_operations proc_pid_link_inode_operations = {
1188 .follow_link = proc_pid_follow_link 1115 .follow_link = proc_pid_follow_link
1189}; 1116};
1190 1117
1191#define NUMBUF 10
1192
1193static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) 1118static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1194{ 1119{
1195 struct inode *inode = filp->f_dentry->d_inode; 1120 struct dentry *dentry = filp->f_dentry;
1196 struct task_struct *p = proc_task(inode); 1121 struct inode *inode = dentry->d_inode;
1122 struct task_struct *p = get_proc_task(inode);
1197 unsigned int fd, tid, ino; 1123 unsigned int fd, tid, ino;
1198 int retval; 1124 int retval;
1199 char buf[NUMBUF]; 1125 char buf[PROC_NUMBUF];
1200 struct files_struct * files; 1126 struct files_struct * files;
1201 struct fdtable *fdt; 1127 struct fdtable *fdt;
1202 1128
1203 retval = -ENOENT; 1129 retval = -ENOENT;
1204 if (!pid_alive(p)) 1130 if (!p)
1205 goto out; 1131 goto out_no_task;
1206 retval = 0; 1132 retval = 0;
1207 tid = p->pid; 1133 tid = p->pid;
1208 1134
@@ -1213,7 +1139,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1213 goto out; 1139 goto out;
1214 filp->f_pos++; 1140 filp->f_pos++;
1215 case 1: 1141 case 1:
1216 ino = fake_ino(tid, PROC_TID_INO); 1142 ino = parent_ino(dentry);
1217 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1143 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1218 goto out; 1144 goto out;
1219 filp->f_pos++; 1145 filp->f_pos++;
@@ -1232,7 +1158,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1232 continue; 1158 continue;
1233 rcu_read_unlock(); 1159 rcu_read_unlock();
1234 1160
1235 j = NUMBUF; 1161 j = PROC_NUMBUF;
1236 i = fd; 1162 i = fd;
1237 do { 1163 do {
1238 j--; 1164 j--;
@@ -1241,7 +1167,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1241 } while (i); 1167 } while (i);
1242 1168
1243 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1169 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
1244 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1170 if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
1245 rcu_read_lock(); 1171 rcu_read_lock();
1246 break; 1172 break;
1247 } 1173 }
@@ -1251,6 +1177,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1251 put_files_struct(files); 1177 put_files_struct(files);
1252 } 1178 }
1253out: 1179out:
1180 put_task_struct(p);
1181out_no_task:
1254 return retval; 1182 return retval;
1255} 1183}
1256 1184
@@ -1262,16 +1190,18 @@ static int proc_pident_readdir(struct file *filp,
1262 int pid; 1190 int pid;
1263 struct dentry *dentry = filp->f_dentry; 1191 struct dentry *dentry = filp->f_dentry;
1264 struct inode *inode = dentry->d_inode; 1192 struct inode *inode = dentry->d_inode;
1193 struct task_struct *task = get_proc_task(inode);
1265 struct pid_entry *p; 1194 struct pid_entry *p;
1266 ino_t ino; 1195 ino_t ino;
1267 int ret; 1196 int ret;
1268 1197
1269 ret = -ENOENT; 1198 ret = -ENOENT;
1270 if (!pid_alive(proc_task(inode))) 1199 if (!task)
1271 goto out; 1200 goto out;
1272 1201
1273 ret = 0; 1202 ret = 0;
1274 pid = proc_task(inode)->pid; 1203 pid = task->pid;
1204 put_task_struct(task);
1275 i = filp->f_pos; 1205 i = filp->f_pos;
1276 switch (i) { 1206 switch (i) {
1277 case 0: 1207 case 0:
@@ -1354,22 +1284,19 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1354 1284
1355 /* Common stuff */ 1285 /* Common stuff */
1356 ei = PROC_I(inode); 1286 ei = PROC_I(inode);
1357 ei->task = NULL;
1358 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1287 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1359 inode->i_ino = fake_ino(task->pid, ino); 1288 inode->i_ino = fake_ino(task->pid, ino);
1360 1289
1361 if (!pid_alive(task))
1362 goto out_unlock;
1363
1364 /* 1290 /*
1365 * grab the reference to task. 1291 * grab the reference to task.
1366 */ 1292 */
1367 get_task_struct(task); 1293 ei->pid = get_pid(task->pids[PIDTYPE_PID].pid);
1368 ei->task = task; 1294 if (!ei->pid)
1369 ei->type = ino; 1295 goto out_unlock;
1296
1370 inode->i_uid = 0; 1297 inode->i_uid = 0;
1371 inode->i_gid = 0; 1298 inode->i_gid = 0;
1372 if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { 1299 if (task_dumpable(task)) {
1373 inode->i_uid = task->euid; 1300 inode->i_uid = task->euid;
1374 inode->i_gid = task->egid; 1301 inode->i_gid = task->egid;
1375 } 1302 }
@@ -1379,7 +1306,6 @@ out:
1379 return inode; 1306 return inode;
1380 1307
1381out_unlock: 1308out_unlock:
1382 ei->pde = NULL;
1383 iput(inode); 1309 iput(inode);
1384 return NULL; 1310 return NULL;
1385} 1311}
@@ -1393,13 +1319,21 @@ out_unlock:
1393 * 1319 *
1394 * Rewrite the inode's ownerships here because the owning task may have 1320 * Rewrite the inode's ownerships here because the owning task may have
1395 * performed a setuid(), etc. 1321 * performed a setuid(), etc.
1322 *
1323 * Before the /proc/pid/status file was created the only way to read
1324 * the effective uid of a /process was to stat /proc/pid. Reading
1325 * /proc/pid/status is slow enough that procps and other packages
1326 * kept stating /proc/pid. To keep the rules in /proc simple I have
1327 * made this apply to all per process world readable and executable
1328 * directories.
1396 */ 1329 */
1397static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1330static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1398{ 1331{
1399 struct inode *inode = dentry->d_inode; 1332 struct inode *inode = dentry->d_inode;
1400 struct task_struct *task = proc_task(inode); 1333 struct task_struct *task = get_proc_task(inode);
1401 if (pid_alive(task)) { 1334 if (task) {
1402 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { 1335 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1336 task_dumpable(task)) {
1403 inode->i_uid = task->euid; 1337 inode->i_uid = task->euid;
1404 inode->i_gid = task->egid; 1338 inode->i_gid = task->egid;
1405 } else { 1339 } else {
@@ -1407,59 +1341,75 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1407 inode->i_gid = 0; 1341 inode->i_gid = 0;
1408 } 1342 }
1409 security_task_to_inode(task, inode); 1343 security_task_to_inode(task, inode);
1344 put_task_struct(task);
1410 return 1; 1345 return 1;
1411 } 1346 }
1412 d_drop(dentry); 1347 d_drop(dentry);
1413 return 0; 1348 return 0;
1414} 1349}
1415 1350
1351static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1352{
1353 struct inode *inode = dentry->d_inode;
1354 struct task_struct *task;
1355 generic_fillattr(inode, stat);
1356
1357 rcu_read_lock();
1358 stat->uid = 0;
1359 stat->gid = 0;
1360 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1361 if (task) {
1362 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1363 task_dumpable(task)) {
1364 stat->uid = task->euid;
1365 stat->gid = task->egid;
1366 }
1367 }
1368 rcu_read_unlock();
1369 return 0;
1370}
1371
1416static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1372static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1417{ 1373{
1418 struct inode *inode = dentry->d_inode; 1374 struct inode *inode = dentry->d_inode;
1419 struct task_struct *task = proc_task(inode); 1375 struct task_struct *task = get_proc_task(inode);
1420 int fd = proc_type(inode) - PROC_TID_FD_DIR; 1376 int fd = proc_fd(inode);
1421 struct files_struct *files; 1377 struct files_struct *files;
1422 1378
1423 files = get_files_struct(task); 1379 if (task) {
1424 if (files) { 1380 files = get_files_struct(task);
1425 rcu_read_lock(); 1381 if (files) {
1426 if (fcheck_files(files, fd)) { 1382 rcu_read_lock();
1383 if (fcheck_files(files, fd)) {
1384 rcu_read_unlock();
1385 put_files_struct(files);
1386 if (task_dumpable(task)) {
1387 inode->i_uid = task->euid;
1388 inode->i_gid = task->egid;
1389 } else {
1390 inode->i_uid = 0;
1391 inode->i_gid = 0;
1392 }
1393 security_task_to_inode(task, inode);
1394 put_task_struct(task);
1395 return 1;
1396 }
1427 rcu_read_unlock(); 1397 rcu_read_unlock();
1428 put_files_struct(files); 1398 put_files_struct(files);
1429 if (task_dumpable(task)) {
1430 inode->i_uid = task->euid;
1431 inode->i_gid = task->egid;
1432 } else {
1433 inode->i_uid = 0;
1434 inode->i_gid = 0;
1435 }
1436 security_task_to_inode(task, inode);
1437 return 1;
1438 } 1399 }
1439 rcu_read_unlock(); 1400 put_task_struct(task);
1440 put_files_struct(files);
1441 } 1401 }
1442 d_drop(dentry); 1402 d_drop(dentry);
1443 return 0; 1403 return 0;
1444} 1404}
1445 1405
1446static void pid_base_iput(struct dentry *dentry, struct inode *inode)
1447{
1448 struct task_struct *task = proc_task(inode);
1449 spin_lock(&task->proc_lock);
1450 if (task->proc_dentry == dentry)
1451 task->proc_dentry = NULL;
1452 spin_unlock(&task->proc_lock);
1453 iput(inode);
1454}
1455
1456static int pid_delete_dentry(struct dentry * dentry) 1406static int pid_delete_dentry(struct dentry * dentry)
1457{ 1407{
1458 /* Is the task we represent dead? 1408 /* Is the task we represent dead?
1459 * If so, then don't put the dentry on the lru list, 1409 * If so, then don't put the dentry on the lru list,
1460 * kill it immediately. 1410 * kill it immediately.
1461 */ 1411 */
1462 return !pid_alive(proc_task(dentry->d_inode)); 1412 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1463} 1413}
1464 1414
1465static struct dentry_operations tid_fd_dentry_operations = 1415static struct dentry_operations tid_fd_dentry_operations =
@@ -1474,13 +1424,6 @@ static struct dentry_operations pid_dentry_operations =
1474 .d_delete = pid_delete_dentry, 1424 .d_delete = pid_delete_dentry,
1475}; 1425};
1476 1426
1477static struct dentry_operations pid_base_dentry_operations =
1478{
1479 .d_revalidate = pid_revalidate,
1480 .d_iput = pid_base_iput,
1481 .d_delete = pid_delete_dentry,
1482};
1483
1484/* Lookups */ 1427/* Lookups */
1485 1428
1486static unsigned name_to_int(struct dentry *dentry) 1429static unsigned name_to_int(struct dentry *dentry)
@@ -1508,22 +1451,24 @@ out:
1508/* SMP-safe */ 1451/* SMP-safe */
1509static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1452static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1510{ 1453{
1511 struct task_struct *task = proc_task(dir); 1454 struct task_struct *task = get_proc_task(dir);
1512 unsigned fd = name_to_int(dentry); 1455 unsigned fd = name_to_int(dentry);
1456 struct dentry *result = ERR_PTR(-ENOENT);
1513 struct file * file; 1457 struct file * file;
1514 struct files_struct * files; 1458 struct files_struct * files;
1515 struct inode *inode; 1459 struct inode *inode;
1516 struct proc_inode *ei; 1460 struct proc_inode *ei;
1517 1461
1462 if (!task)
1463 goto out_no_task;
1518 if (fd == ~0U) 1464 if (fd == ~0U)
1519 goto out; 1465 goto out;
1520 if (!pid_alive(task))
1521 goto out;
1522 1466
1523 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1467 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
1524 if (!inode) 1468 if (!inode)
1525 goto out; 1469 goto out;
1526 ei = PROC_I(inode); 1470 ei = PROC_I(inode);
1471 ei->fd = fd;
1527 files = get_files_struct(task); 1472 files = get_files_struct(task);
1528 if (!files) 1473 if (!files)
1529 goto out_unlock; 1474 goto out_unlock;
@@ -1548,19 +1493,25 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1548 ei->op.proc_get_link = proc_fd_link; 1493 ei->op.proc_get_link = proc_fd_link;
1549 dentry->d_op = &tid_fd_dentry_operations; 1494 dentry->d_op = &tid_fd_dentry_operations;
1550 d_add(dentry, inode); 1495 d_add(dentry, inode);
1551 return NULL; 1496 /* Close the race of the process dying before we return the dentry */
1497 if (tid_fd_revalidate(dentry, NULL))
1498 result = NULL;
1499out:
1500 put_task_struct(task);
1501out_no_task:
1502 return result;
1552 1503
1553out_unlock2: 1504out_unlock2:
1554 spin_unlock(&files->file_lock); 1505 spin_unlock(&files->file_lock);
1555 put_files_struct(files); 1506 put_files_struct(files);
1556out_unlock: 1507out_unlock:
1557 iput(inode); 1508 iput(inode);
1558out: 1509 goto out;
1559 return ERR_PTR(-ENOENT);
1560} 1510}
1561 1511
1562static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); 1512static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir);
1563static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); 1513static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd);
1514static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
1564 1515
1565static struct file_operations proc_fd_operations = { 1516static struct file_operations proc_fd_operations = {
1566 .read = generic_read_dir, 1517 .read = generic_read_dir,
@@ -1577,12 +1528,11 @@ static struct file_operations proc_task_operations = {
1577 */ 1528 */
1578static struct inode_operations proc_fd_inode_operations = { 1529static struct inode_operations proc_fd_inode_operations = {
1579 .lookup = proc_lookupfd, 1530 .lookup = proc_lookupfd,
1580 .permission = proc_permission,
1581}; 1531};
1582 1532
1583static struct inode_operations proc_task_inode_operations = { 1533static struct inode_operations proc_task_inode_operations = {
1584 .lookup = proc_task_lookup, 1534 .lookup = proc_task_lookup,
1585 .permission = proc_task_permission, 1535 .getattr = proc_task_getattr,
1586}; 1536};
1587 1537
1588#ifdef CONFIG_SECURITY 1538#ifdef CONFIG_SECURITY
@@ -1592,12 +1542,17 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1592 struct inode * inode = file->f_dentry->d_inode; 1542 struct inode * inode = file->f_dentry->d_inode;
1593 unsigned long page; 1543 unsigned long page;
1594 ssize_t length; 1544 ssize_t length;
1595 struct task_struct *task = proc_task(inode); 1545 struct task_struct *task = get_proc_task(inode);
1546
1547 length = -ESRCH;
1548 if (!task)
1549 goto out_no_task;
1596 1550
1597 if (count > PAGE_SIZE) 1551 if (count > PAGE_SIZE)
1598 count = PAGE_SIZE; 1552 count = PAGE_SIZE;
1553 length = -ENOMEM;
1599 if (!(page = __get_free_page(GFP_KERNEL))) 1554 if (!(page = __get_free_page(GFP_KERNEL)))
1600 return -ENOMEM; 1555 goto out;
1601 1556
1602 length = security_getprocattr(task, 1557 length = security_getprocattr(task,
1603 (char*)file->f_dentry->d_name.name, 1558 (char*)file->f_dentry->d_name.name,
@@ -1605,6 +1560,9 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1605 if (length >= 0) 1560 if (length >= 0)
1606 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1561 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
1607 free_page(page); 1562 free_page(page);
1563out:
1564 put_task_struct(task);
1565out_no_task:
1608 return length; 1566 return length;
1609} 1567}
1610 1568
@@ -1614,26 +1572,36 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1614 struct inode * inode = file->f_dentry->d_inode; 1572 struct inode * inode = file->f_dentry->d_inode;
1615 char *page; 1573 char *page;
1616 ssize_t length; 1574 ssize_t length;
1617 struct task_struct *task = proc_task(inode); 1575 struct task_struct *task = get_proc_task(inode);
1618 1576
1577 length = -ESRCH;
1578 if (!task)
1579 goto out_no_task;
1619 if (count > PAGE_SIZE) 1580 if (count > PAGE_SIZE)
1620 count = PAGE_SIZE; 1581 count = PAGE_SIZE;
1621 if (*ppos != 0) { 1582
1622 /* No partial writes. */ 1583 /* No partial writes. */
1623 return -EINVAL; 1584 length = -EINVAL;
1624 } 1585 if (*ppos != 0)
1586 goto out;
1587
1588 length = -ENOMEM;
1625 page = (char*)__get_free_page(GFP_USER); 1589 page = (char*)__get_free_page(GFP_USER);
1626 if (!page) 1590 if (!page)
1627 return -ENOMEM; 1591 goto out;
1592
1628 length = -EFAULT; 1593 length = -EFAULT;
1629 if (copy_from_user(page, buf, count)) 1594 if (copy_from_user(page, buf, count))
1630 goto out; 1595 goto out_free;
1631 1596
1632 length = security_setprocattr(task, 1597 length = security_setprocattr(task,
1633 (char*)file->f_dentry->d_name.name, 1598 (char*)file->f_dentry->d_name.name,
1634 (void*)page, count); 1599 (void*)page, count);
1635out: 1600out_free:
1636 free_page((unsigned long) page); 1601 free_page((unsigned long) page);
1602out:
1603 put_task_struct(task);
1604out_no_task:
1637 return length; 1605 return length;
1638} 1606}
1639 1607
@@ -1648,24 +1616,22 @@ static struct file_operations proc_tgid_attr_operations;
1648static struct inode_operations proc_tgid_attr_inode_operations; 1616static struct inode_operations proc_tgid_attr_inode_operations;
1649#endif 1617#endif
1650 1618
1651static int get_tid_list(int index, unsigned int *tids, struct inode *dir);
1652
1653/* SMP-safe */ 1619/* SMP-safe */
1654static struct dentry *proc_pident_lookup(struct inode *dir, 1620static struct dentry *proc_pident_lookup(struct inode *dir,
1655 struct dentry *dentry, 1621 struct dentry *dentry,
1656 struct pid_entry *ents) 1622 struct pid_entry *ents)
1657{ 1623{
1658 struct inode *inode; 1624 struct inode *inode;
1659 int error; 1625 struct dentry *error;
1660 struct task_struct *task = proc_task(dir); 1626 struct task_struct *task = get_proc_task(dir);
1661 struct pid_entry *p; 1627 struct pid_entry *p;
1662 struct proc_inode *ei; 1628 struct proc_inode *ei;
1663 1629
1664 error = -ENOENT; 1630 error = ERR_PTR(-ENOENT);
1665 inode = NULL; 1631 inode = NULL;
1666 1632
1667 if (!pid_alive(task)) 1633 if (!task)
1668 goto out; 1634 goto out_no_task;
1669 1635
1670 for (p = ents; p->name; p++) { 1636 for (p = ents; p->name; p++) {
1671 if (p->len != dentry->d_name.len) 1637 if (p->len != dentry->d_name.len)
@@ -1676,7 +1642,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1676 if (!p->name) 1642 if (!p->name)
1677 goto out; 1643 goto out;
1678 1644
1679 error = -EINVAL; 1645 error = ERR_PTR(-EINVAL);
1680 inode = proc_pid_make_inode(dir->i_sb, task, p->type); 1646 inode = proc_pid_make_inode(dir->i_sb, task, p->type);
1681 if (!inode) 1647 if (!inode)
1682 goto out; 1648 goto out;
@@ -1689,7 +1655,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1689 */ 1655 */
1690 switch(p->type) { 1656 switch(p->type) {
1691 case PROC_TGID_TASK: 1657 case PROC_TGID_TASK:
1692 inode->i_nlink = 2 + get_tid_list(2, NULL, dir); 1658 inode->i_nlink = 2;
1693 inode->i_op = &proc_task_inode_operations; 1659 inode->i_op = &proc_task_inode_operations;
1694 inode->i_fop = &proc_task_operations; 1660 inode->i_fop = &proc_task_operations;
1695 break; 1661 break;
@@ -1759,7 +1725,6 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1759#endif 1725#endif
1760 case PROC_TID_MEM: 1726 case PROC_TID_MEM:
1761 case PROC_TGID_MEM: 1727 case PROC_TGID_MEM:
1762 inode->i_op = &proc_mem_inode_operations;
1763 inode->i_fop = &proc_mem_operations; 1728 inode->i_fop = &proc_mem_operations;
1764 break; 1729 break;
1765#ifdef CONFIG_SECCOMP 1730#ifdef CONFIG_SECCOMP
@@ -1801,6 +1766,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1801 case PROC_TGID_ATTR_EXEC: 1766 case PROC_TGID_ATTR_EXEC:
1802 case PROC_TID_ATTR_FSCREATE: 1767 case PROC_TID_ATTR_FSCREATE:
1803 case PROC_TGID_ATTR_FSCREATE: 1768 case PROC_TGID_ATTR_FSCREATE:
1769 case PROC_TID_ATTR_KEYCREATE:
1770 case PROC_TGID_ATTR_KEYCREATE:
1771 case PROC_TID_ATTR_SOCKCREATE:
1772 case PROC_TGID_ATTR_SOCKCREATE:
1804 inode->i_fop = &proc_pid_attr_operations; 1773 inode->i_fop = &proc_pid_attr_operations;
1805 break; 1774 break;
1806#endif 1775#endif
@@ -1842,14 +1811,18 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1842 default: 1811 default:
1843 printk("procfs: impossible type (%d)",p->type); 1812 printk("procfs: impossible type (%d)",p->type);
1844 iput(inode); 1813 iput(inode);
1845 return ERR_PTR(-EINVAL); 1814 error = ERR_PTR(-EINVAL);
1815 goto out;
1846 } 1816 }
1847 dentry->d_op = &pid_dentry_operations; 1817 dentry->d_op = &pid_dentry_operations;
1848 d_add(dentry, inode); 1818 d_add(dentry, inode);
1849 return NULL; 1819 /* Close the race of the process dying before we return the dentry */
1850 1820 if (pid_revalidate(dentry, NULL))
1821 error = NULL;
1851out: 1822out:
1852 return ERR_PTR(error); 1823 put_task_struct(task);
1824out_no_task:
1825 return error;
1853} 1826}
1854 1827
1855static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1828static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
@@ -1872,10 +1845,12 @@ static struct file_operations proc_tid_base_operations = {
1872 1845
1873static struct inode_operations proc_tgid_base_inode_operations = { 1846static struct inode_operations proc_tgid_base_inode_operations = {
1874 .lookup = proc_tgid_base_lookup, 1847 .lookup = proc_tgid_base_lookup,
1848 .getattr = pid_getattr,
1875}; 1849};
1876 1850
1877static struct inode_operations proc_tid_base_inode_operations = { 1851static struct inode_operations proc_tid_base_inode_operations = {
1878 .lookup = proc_tid_base_lookup, 1852 .lookup = proc_tid_base_lookup,
1853 .getattr = pid_getattr,
1879}; 1854};
1880 1855
1881#ifdef CONFIG_SECURITY 1856#ifdef CONFIG_SECURITY
@@ -1917,10 +1892,12 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1917 1892
1918static struct inode_operations proc_tgid_attr_inode_operations = { 1893static struct inode_operations proc_tgid_attr_inode_operations = {
1919 .lookup = proc_tgid_attr_lookup, 1894 .lookup = proc_tgid_attr_lookup,
1895 .getattr = pid_getattr,
1920}; 1896};
1921 1897
1922static struct inode_operations proc_tid_attr_inode_operations = { 1898static struct inode_operations proc_tid_attr_inode_operations = {
1923 .lookup = proc_tid_attr_lookup, 1899 .lookup = proc_tid_attr_lookup,
1900 .getattr = pid_getattr,
1924}; 1901};
1925#endif 1902#endif
1926 1903
@@ -1930,14 +1907,14 @@ static struct inode_operations proc_tid_attr_inode_operations = {
1930static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 1907static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1931 int buflen) 1908 int buflen)
1932{ 1909{
1933 char tmp[30]; 1910 char tmp[PROC_NUMBUF];
1934 sprintf(tmp, "%d", current->tgid); 1911 sprintf(tmp, "%d", current->tgid);
1935 return vfs_readlink(dentry,buffer,buflen,tmp); 1912 return vfs_readlink(dentry,buffer,buflen,tmp);
1936} 1913}
1937 1914
1938static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 1915static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1939{ 1916{
1940 char tmp[30]; 1917 char tmp[PROC_NUMBUF];
1941 sprintf(tmp, "%d", current->tgid); 1918 sprintf(tmp, "%d", current->tgid);
1942 return ERR_PTR(vfs_follow_link(nd,tmp)); 1919 return ERR_PTR(vfs_follow_link(nd,tmp));
1943} 1920}
@@ -1948,67 +1925,80 @@ static struct inode_operations proc_self_inode_operations = {
1948}; 1925};
1949 1926
1950/** 1927/**
1951 * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. 1928 * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
1952 * @p: task that should be flushed. 1929 *
1930 * @task: task that should be flushed.
1931 *
1932 * Looks in the dcache for
1933 * /proc/@pid
1934 * /proc/@tgid/task/@pid
1935 * if either directory is present flushes it and all of it'ts children
1936 * from the dcache.
1953 * 1937 *
1954 * Drops the /proc/@pid dcache entry from the hash chains. 1938 * It is safe and reasonable to cache /proc entries for a task until
1939 * that task exits. After that they just clog up the dcache with
1940 * useless entries, possibly causing useful dcache entries to be
1941 * flushed instead. This routine is proved to flush those useless
1942 * dcache entries at process exit time.
1955 * 1943 *
1956 * Dropping /proc/@pid entries and detach_pid must be synchroneous, 1944 * NOTE: This routine is just an optimization so it does not guarantee
1957 * otherwise e.g. /proc/@pid/exe might point to the wrong executable, 1945 * that no dcache entries will exist at process exit time it
1958 * if the pid value is immediately reused. This is enforced by 1946 * just makes it very unlikely that any will persist.
1959 * - caller must acquire spin_lock(p->proc_lock)
1960 * - must be called before detach_pid()
1961 * - proc_pid_lookup acquires proc_lock, and checks that
1962 * the target is not dead by looking at the attach count
1963 * of PIDTYPE_PID.
1964 */ 1947 */
1965 1948void proc_flush_task(struct task_struct *task)
1966struct dentry *proc_pid_unhash(struct task_struct *p)
1967{ 1949{
1968 struct dentry *proc_dentry; 1950 struct dentry *dentry, *leader, *dir;
1951 char buf[PROC_NUMBUF];
1952 struct qstr name;
1953
1954 name.name = buf;
1955 name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1956 dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1957 if (dentry) {
1958 shrink_dcache_parent(dentry);
1959 d_drop(dentry);
1960 dput(dentry);
1961 }
1969 1962
1970 proc_dentry = p->proc_dentry; 1963 if (thread_group_leader(task))
1971 if (proc_dentry != NULL) { 1964 goto out;
1972 1965
1973 spin_lock(&dcache_lock); 1966 name.name = buf;
1974 spin_lock(&proc_dentry->d_lock); 1967 name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
1975 if (!d_unhashed(proc_dentry)) { 1968 leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1976 dget_locked(proc_dentry); 1969 if (!leader)
1977 __d_drop(proc_dentry); 1970 goto out;
1978 spin_unlock(&proc_dentry->d_lock);
1979 } else {
1980 spin_unlock(&proc_dentry->d_lock);
1981 proc_dentry = NULL;
1982 }
1983 spin_unlock(&dcache_lock);
1984 }
1985 return proc_dentry;
1986}
1987 1971
1988/** 1972 name.name = "task";
1989 * proc_pid_flush - recover memory used by stale /proc/@pid/x entries 1973 name.len = strlen(name.name);
1990 * @proc_dentry: directoy to prune. 1974 dir = d_hash_and_lookup(leader, &name);
1991 * 1975 if (!dir)
1992 * Shrink the /proc directory that was used by the just killed thread. 1976 goto out_put_leader;
1993 */ 1977
1994 1978 name.name = buf;
1995void proc_pid_flush(struct dentry *proc_dentry) 1979 name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1996{ 1980 dentry = d_hash_and_lookup(dir, &name);
1997 might_sleep(); 1981 if (dentry) {
1998 if(proc_dentry != NULL) { 1982 shrink_dcache_parent(dentry);
1999 shrink_dcache_parent(proc_dentry); 1983 d_drop(dentry);
2000 dput(proc_dentry); 1984 dput(dentry);
2001 } 1985 }
1986
1987 dput(dir);
1988out_put_leader:
1989 dput(leader);
1990out:
1991 return;
2002} 1992}
2003 1993
2004/* SMP-safe */ 1994/* SMP-safe */
2005struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1995struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2006{ 1996{
1997 struct dentry *result = ERR_PTR(-ENOENT);
2007 struct task_struct *task; 1998 struct task_struct *task;
2008 struct inode *inode; 1999 struct inode *inode;
2009 struct proc_inode *ei; 2000 struct proc_inode *ei;
2010 unsigned tgid; 2001 unsigned tgid;
2011 int died;
2012 2002
2013 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { 2003 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
2014 inode = new_inode(dir->i_sb); 2004 inode = new_inode(dir->i_sb);
@@ -2029,21 +2019,18 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
2029 if (tgid == ~0U) 2019 if (tgid == ~0U)
2030 goto out; 2020 goto out;
2031 2021
2032 read_lock(&tasklist_lock); 2022 rcu_read_lock();
2033 task = find_task_by_pid(tgid); 2023 task = find_task_by_pid(tgid);
2034 if (task) 2024 if (task)
2035 get_task_struct(task); 2025 get_task_struct(task);
2036 read_unlock(&tasklist_lock); 2026 rcu_read_unlock();
2037 if (!task) 2027 if (!task)
2038 goto out; 2028 goto out;
2039 2029
2040 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); 2030 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
2031 if (!inode)
2032 goto out_put_task;
2041 2033
2042
2043 if (!inode) {
2044 put_task_struct(task);
2045 goto out;
2046 }
2047 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2034 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2048 inode->i_op = &proc_tgid_base_inode_operations; 2035 inode->i_op = &proc_tgid_base_inode_operations;
2049 inode->i_fop = &proc_tgid_base_operations; 2036 inode->i_fop = &proc_tgid_base_operations;
@@ -2054,45 +2041,40 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
2054 inode->i_nlink = 4; 2041 inode->i_nlink = 4;
2055#endif 2042#endif
2056 2043
2057 dentry->d_op = &pid_base_dentry_operations; 2044 dentry->d_op = &pid_dentry_operations;
2058 2045
2059 died = 0;
2060 d_add(dentry, inode); 2046 d_add(dentry, inode);
2061 spin_lock(&task->proc_lock); 2047 /* Close the race of the process dying before we return the dentry */
2062 task->proc_dentry = dentry; 2048 if (pid_revalidate(dentry, NULL))
2063 if (!pid_alive(task)) { 2049 result = NULL;
2064 dentry = proc_pid_unhash(task);
2065 died = 1;
2066 }
2067 spin_unlock(&task->proc_lock);
2068 2050
2051out_put_task:
2069 put_task_struct(task); 2052 put_task_struct(task);
2070 if (died) {
2071 proc_pid_flush(dentry);
2072 goto out;
2073 }
2074 return NULL;
2075out: 2053out:
2076 return ERR_PTR(-ENOENT); 2054 return result;
2077} 2055}
2078 2056
2079/* SMP-safe */ 2057/* SMP-safe */
2080static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2058static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2081{ 2059{
2060 struct dentry *result = ERR_PTR(-ENOENT);
2082 struct task_struct *task; 2061 struct task_struct *task;
2083 struct task_struct *leader = proc_task(dir); 2062 struct task_struct *leader = get_proc_task(dir);
2084 struct inode *inode; 2063 struct inode *inode;
2085 unsigned tid; 2064 unsigned tid;
2086 2065
2066 if (!leader)
2067 goto out_no_task;
2068
2087 tid = name_to_int(dentry); 2069 tid = name_to_int(dentry);
2088 if (tid == ~0U) 2070 if (tid == ~0U)
2089 goto out; 2071 goto out;
2090 2072
2091 read_lock(&tasklist_lock); 2073 rcu_read_lock();
2092 task = find_task_by_pid(tid); 2074 task = find_task_by_pid(tid);
2093 if (task) 2075 if (task)
2094 get_task_struct(task); 2076 get_task_struct(task);
2095 read_unlock(&tasklist_lock); 2077 rcu_read_unlock();
2096 if (!task) 2078 if (!task)
2097 goto out; 2079 goto out;
2098 if (leader->tgid != task->tgid) 2080 if (leader->tgid != task->tgid)
@@ -2113,101 +2095,95 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
2113 inode->i_nlink = 3; 2095 inode->i_nlink = 3;
2114#endif 2096#endif
2115 2097
2116 dentry->d_op = &pid_base_dentry_operations; 2098 dentry->d_op = &pid_dentry_operations;
2117 2099
2118 d_add(dentry, inode); 2100 d_add(dentry, inode);
2101 /* Close the race of the process dying before we return the dentry */
2102 if (pid_revalidate(dentry, NULL))
2103 result = NULL;
2119 2104
2120 put_task_struct(task);
2121 return NULL;
2122out_drop_task: 2105out_drop_task:
2123 put_task_struct(task); 2106 put_task_struct(task);
2124out: 2107out:
2125 return ERR_PTR(-ENOENT); 2108 put_task_struct(leader);
2109out_no_task:
2110 return result;
2126} 2111}
2127 2112
2128#define PROC_NUMBUF 10
2129#define PROC_MAXPIDS 20
2130
2131/* 2113/*
2132 * Get a few tgid's to return for filldir - we need to hold the 2114 * Find the first tgid to return to user space.
2133 * tasklist lock while doing this, and we must release it before 2115 *
2134 * we actually do the filldir itself, so we use a temp buffer.. 2116 * Usually this is just whatever follows &init_task, but if the users
2117 * buffer was too small to hold the full list or there was a seek into
2118 * the middle of the directory we have more work to do.
2119 *
2120 * In the case of a short read we start with find_task_by_pid.
2121 *
2122 * In the case of a seek we start with &init_task and walk nr
2123 * threads past it.
2135 */ 2124 */
2136static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) 2125static struct task_struct *first_tgid(int tgid, unsigned int nr)
2137{ 2126{
2138 struct task_struct *p; 2127 struct task_struct *pos;
2139 int nr_tgids = 0; 2128 rcu_read_lock();
2140 2129 if (tgid && nr) {
2141 index--; 2130 pos = find_task_by_pid(tgid);
2142 read_lock(&tasklist_lock); 2131 if (pos && thread_group_leader(pos))
2143 p = NULL; 2132 goto found;
2144 if (version) {
2145 p = find_task_by_pid(version);
2146 if (p && !thread_group_leader(p))
2147 p = NULL;
2148 } 2133 }
2134 /* If nr exceeds the number of processes get out quickly */
2135 pos = NULL;
2136 if (nr && nr >= nr_processes())
2137 goto done;
2149 2138
2150 if (p) 2139 /* If we haven't found our starting place yet start with
2151 index = 0; 2140 * the init_task and walk nr tasks forward.
2152 else 2141 */
2153 p = next_task(&init_task); 2142 for (pos = next_task(&init_task); nr > 0; --nr) {
2154 2143 pos = next_task(pos);
2155 for ( ; p != &init_task; p = next_task(p)) { 2144 if (pos == &init_task) {
2156 int tgid = p->pid; 2145 pos = NULL;
2157 if (!pid_alive(p)) 2146 goto done;
2158 continue; 2147 }
2159 if (--index >= 0)
2160 continue;
2161 tgids[nr_tgids] = tgid;
2162 nr_tgids++;
2163 if (nr_tgids >= PROC_MAXPIDS)
2164 break;
2165 } 2148 }
2166 read_unlock(&tasklist_lock); 2149found:
2167 return nr_tgids; 2150 get_task_struct(pos);
2151done:
2152 rcu_read_unlock();
2153 return pos;
2168} 2154}
2169 2155
2170/* 2156/*
2171 * Get a few tid's to return for filldir - we need to hold the 2157 * Find the next task in the task list.
2172 * tasklist lock while doing this, and we must release it before 2158 * Return NULL if we loop or there is any error.
2173 * we actually do the filldir itself, so we use a temp buffer.. 2159 *
2160 * The reference to the input task_struct is released.
2174 */ 2161 */
2175static int get_tid_list(int index, unsigned int *tids, struct inode *dir) 2162static struct task_struct *next_tgid(struct task_struct *start)
2176{ 2163{
2177 struct task_struct *leader_task = proc_task(dir); 2164 struct task_struct *pos;
2178 struct task_struct *task = leader_task; 2165 rcu_read_lock();
2179 int nr_tids = 0; 2166 pos = start;
2180 2167 if (pid_alive(start))
2181 index -= 2; 2168 pos = next_task(start);
2182 read_lock(&tasklist_lock); 2169 if (pid_alive(pos) && (pos != &init_task)) {
2183 /* 2170 get_task_struct(pos);
2184 * The starting point task (leader_task) might be an already 2171 goto done;
2185 * unlinked task, which cannot be used to access the task-list 2172 }
2186 * via next_thread(). 2173 pos = NULL;
2187 */ 2174done:
2188 if (pid_alive(task)) do { 2175 rcu_read_unlock();
2189 int tid = task->pid; 2176 put_task_struct(start);
2190 2177 return pos;
2191 if (--index >= 0)
2192 continue;
2193 if (tids != NULL)
2194 tids[nr_tids] = tid;
2195 nr_tids++;
2196 if (nr_tids >= PROC_MAXPIDS)
2197 break;
2198 } while ((task = next_thread(task)) != leader_task);
2199 read_unlock(&tasklist_lock);
2200 return nr_tids;
2201} 2178}
2202 2179
2203/* for the /proc/ directory itself, after non-process stuff has been done */ 2180/* for the /proc/ directory itself, after non-process stuff has been done */
2204int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2181int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2205{ 2182{
2206 unsigned int tgid_array[PROC_MAXPIDS];
2207 char buf[PROC_NUMBUF]; 2183 char buf[PROC_NUMBUF];
2208 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 2184 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
2209 unsigned int nr_tgids, i; 2185 struct task_struct *task;
2210 int next_tgid; 2186 int tgid;
2211 2187
2212 if (!nr) { 2188 if (!nr) {
2213 ino_t ino = fake_ino(0,PROC_TGID_INO); 2189 ino_t ino = fake_ino(0,PROC_TGID_INO);
@@ -2216,63 +2192,116 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2216 filp->f_pos++; 2192 filp->f_pos++;
2217 nr++; 2193 nr++;
2218 } 2194 }
2195 nr -= 1;
2219 2196
2220 /* f_version caches the tgid value that the last readdir call couldn't 2197 /* f_version caches the tgid value that the last readdir call couldn't
2221 * return. lseek aka telldir automagically resets f_version to 0. 2198 * return. lseek aka telldir automagically resets f_version to 0.
2222 */ 2199 */
2223 next_tgid = filp->f_version; 2200 tgid = filp->f_version;
2224 filp->f_version = 0; 2201 filp->f_version = 0;
2225 for (;;) { 2202 for (task = first_tgid(tgid, nr);
2226 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); 2203 task;
2227 if (!nr_tgids) { 2204 task = next_tgid(task), filp->f_pos++) {
2228 /* no more entries ! */ 2205 int len;
2206 ino_t ino;
2207 tgid = task->pid;
2208 len = snprintf(buf, sizeof(buf), "%d", tgid);
2209 ino = fake_ino(tgid, PROC_TGID_INO);
2210 if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) {
2211 /* returning this tgid failed, save it as the first
2212 * pid for the next readir call */
2213 filp->f_version = tgid;
2214 put_task_struct(task);
2229 break; 2215 break;
2230 } 2216 }
2231 next_tgid = 0; 2217 }
2218 return 0;
2219}
2232 2220
2233 /* do not use the last found pid, reserve it for next_tgid */ 2221/*
2234 if (nr_tgids == PROC_MAXPIDS) { 2222 * Find the first tid of a thread group to return to user space.
2235 nr_tgids--; 2223 *
2236 next_tgid = tgid_array[nr_tgids]; 2224 * Usually this is just the thread group leader, but if the users
2237 } 2225 * buffer was too small or there was a seek into the middle of the
2226 * directory we have more work todo.
2227 *
2228 * In the case of a short read we start with find_task_by_pid.
2229 *
2230 * In the case of a seek we start with the leader and walk nr
2231 * threads past it.
2232 */
2233static struct task_struct *first_tid(struct task_struct *leader,
2234 int tid, int nr)
2235{
2236 struct task_struct *pos;
2238 2237
2239 for (i=0;i<nr_tgids;i++) { 2238 rcu_read_lock();
2240 int tgid = tgid_array[i]; 2239 /* Attempt to start with the pid of a thread */
2241 ino_t ino = fake_ino(tgid,PROC_TGID_INO); 2240 if (tid && (nr > 0)) {
2242 unsigned long j = PROC_NUMBUF; 2241 pos = find_task_by_pid(tid);
2242 if (pos && (pos->group_leader == leader))
2243 goto found;
2244 }
2243 2245
2244 do 2246 /* If nr exceeds the number of threads there is nothing todo */
2245 buf[--j] = '0' + (tgid % 10); 2247 pos = NULL;
2246 while ((tgid /= 10) != 0); 2248 if (nr && nr >= get_nr_threads(leader))
2249 goto out;
2247 2250
2248 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { 2251 /* If we haven't found our starting place yet start
2249 /* returning this tgid failed, save it as the first 2252 * with the leader and walk nr threads forward.
2250 * pid for the next readir call */ 2253 */
2251 filp->f_version = tgid_array[i]; 2254 for (pos = leader; nr > 0; --nr) {
2252 goto out; 2255 pos = next_thread(pos);
2253 } 2256 if (pos == leader) {
2254 filp->f_pos++; 2257 pos = NULL;
2255 nr++; 2258 goto out;
2256 } 2259 }
2257 } 2260 }
2261found:
2262 get_task_struct(pos);
2258out: 2263out:
2259 return 0; 2264 rcu_read_unlock();
2265 return pos;
2266}
2267
2268/*
2269 * Find the next thread in the thread list.
2270 * Return NULL if there is an error or no next thread.
2271 *
2272 * The reference to the input task_struct is released.
2273 */
2274static struct task_struct *next_tid(struct task_struct *start)
2275{
2276 struct task_struct *pos = NULL;
2277 rcu_read_lock();
2278 if (pid_alive(start)) {
2279 pos = next_thread(start);
2280 if (thread_group_leader(pos))
2281 pos = NULL;
2282 else
2283 get_task_struct(pos);
2284 }
2285 rcu_read_unlock();
2286 put_task_struct(start);
2287 return pos;
2260} 2288}
2261 2289
2262/* for the /proc/TGID/task/ directories */ 2290/* for the /proc/TGID/task/ directories */
2263static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 2291static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
2264{ 2292{
2265 unsigned int tid_array[PROC_MAXPIDS];
2266 char buf[PROC_NUMBUF]; 2293 char buf[PROC_NUMBUF];
2267 unsigned int nr_tids, i;
2268 struct dentry *dentry = filp->f_dentry; 2294 struct dentry *dentry = filp->f_dentry;
2269 struct inode *inode = dentry->d_inode; 2295 struct inode *inode = dentry->d_inode;
2296 struct task_struct *leader = get_proc_task(inode);
2297 struct task_struct *task;
2270 int retval = -ENOENT; 2298 int retval = -ENOENT;
2271 ino_t ino; 2299 ino_t ino;
2300 int tid;
2272 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2301 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
2273 2302
2274 if (!pid_alive(proc_task(inode))) 2303 if (!leader)
2275 goto out; 2304 goto out_no_task;
2276 retval = 0; 2305 retval = 0;
2277 2306
2278 switch (pos) { 2307 switch (pos) {
@@ -2290,24 +2319,45 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
2290 /* fall through */ 2319 /* fall through */
2291 } 2320 }
2292 2321
2293 nr_tids = get_tid_list(pos, tid_array, inode); 2322 /* f_version caches the tgid value that the last readdir call couldn't
2294 inode->i_nlink = pos + nr_tids; 2323 * return. lseek aka telldir automagically resets f_version to 0.
2295 2324 */
2296 for (i = 0; i < nr_tids; i++) { 2325 tid = filp->f_version;
2297 unsigned long j = PROC_NUMBUF; 2326 filp->f_version = 0;
2298 int tid = tid_array[i]; 2327 for (task = first_tid(leader, tid, pos - 2);
2299 2328 task;
2300 ino = fake_ino(tid,PROC_TID_INO); 2329 task = next_tid(task), pos++) {
2301 2330 int len;
2302 do 2331 tid = task->pid;
2303 buf[--j] = '0' + (tid % 10); 2332 len = snprintf(buf, sizeof(buf), "%d", tid);
2304 while ((tid /= 10) != 0); 2333 ino = fake_ino(tid, PROC_TID_INO);
2305 2334 if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) {
2306 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) 2335 /* returning this tgid failed, save it as the first
2336 * pid for the next readir call */
2337 filp->f_version = tid;
2338 put_task_struct(task);
2307 break; 2339 break;
2308 pos++; 2340 }
2309 } 2341 }
2310out: 2342out:
2311 filp->f_pos = pos; 2343 filp->f_pos = pos;
2344 put_task_struct(leader);
2345out_no_task:
2312 return retval; 2346 return retval;
2313} 2347}
2348
2349static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
2350{
2351 struct inode *inode = dentry->d_inode;
2352 struct task_struct *p = get_proc_task(inode);
2353 generic_fillattr(inode, stat);
2354
2355 if (p) {
2356 rcu_read_lock();
2357 stat->nlink += get_nr_threads(p);
2358 rcu_read_unlock();
2359 put_task_struct(p);
2360 }
2361
2362 return 0;
2363}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 722b9c463111..6dcef089e18e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -58,14 +58,11 @@ static void de_put(struct proc_dir_entry *de)
58static void proc_delete_inode(struct inode *inode) 58static void proc_delete_inode(struct inode *inode)
59{ 59{
60 struct proc_dir_entry *de; 60 struct proc_dir_entry *de;
61 struct task_struct *tsk;
62 61
63 truncate_inode_pages(&inode->i_data, 0); 62 truncate_inode_pages(&inode->i_data, 0);
64 63
65 /* Let go of any associated process */ 64 /* Stop tracking associated processes */
66 tsk = PROC_I(inode)->task; 65 put_pid(PROC_I(inode)->pid);
67 if (tsk)
68 put_task_struct(tsk);
69 66
70 /* Let go of any associated proc directory entry */ 67 /* Let go of any associated proc directory entry */
71 de = PROC_I(inode)->pde; 68 de = PROC_I(inode)->pde;
@@ -94,8 +91,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
94 ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL); 91 ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL);
95 if (!ei) 92 if (!ei)
96 return NULL; 93 return NULL;
97 ei->task = NULL; 94 ei->pid = NULL;
98 ei->type = 0; 95 ei->fd = 0;
99 ei->op.proc_get_link = NULL; 96 ei->op.proc_get_link = NULL;
100 ei->pde = NULL; 97 ei->pde = NULL;
101 inode = &ei->vfs_inode; 98 inode = &ei->vfs_inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 0502f17b860d..146a434ba944 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -37,16 +37,30 @@ extern int proc_tgid_stat(struct task_struct *, char *);
37extern int proc_pid_status(struct task_struct *, char *); 37extern int proc_pid_status(struct task_struct *, char *);
38extern int proc_pid_statm(struct task_struct *, char *); 38extern int proc_pid_statm(struct task_struct *, char *);
39 39
40extern struct file_operations proc_maps_operations;
41extern struct file_operations proc_numa_maps_operations;
42extern struct file_operations proc_smaps_operations;
43
44extern struct file_operations proc_maps_operations;
45extern struct file_operations proc_numa_maps_operations;
46extern struct file_operations proc_smaps_operations;
47
48
40void free_proc_entry(struct proc_dir_entry *de); 49void free_proc_entry(struct proc_dir_entry *de);
41 50
42int proc_init_inodecache(void); 51int proc_init_inodecache(void);
43 52
44static inline struct task_struct *proc_task(struct inode *inode) 53static inline struct pid *proc_pid(struct inode *inode)
54{
55 return PROC_I(inode)->pid;
56}
57
58static inline struct task_struct *get_proc_task(struct inode *inode)
45{ 59{
46 return PROC_I(inode)->task; 60 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
47} 61}
48 62
49static inline int proc_type(struct inode *inode) 63static inline int proc_fd(struct inode *inode)
50{ 64{
51 return PROC_I(inode)->type; 65 return PROC_I(inode)->fd;
52} 66}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 91b7c15ab373..0a163a4f7764 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -75,9 +75,13 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
75{ 75{
76 struct vm_area_struct * vma; 76 struct vm_area_struct * vma;
77 int result = -ENOENT; 77 int result = -ENOENT;
78 struct task_struct *task = proc_task(inode); 78 struct task_struct *task = get_proc_task(inode);
79 struct mm_struct * mm = get_task_mm(task); 79 struct mm_struct * mm = NULL;
80 80
81 if (task) {
82 mm = get_task_mm(task);
83 put_task_struct(task);
84 }
81 if (!mm) 85 if (!mm)
82 goto out; 86 goto out;
83 down_read(&mm->mmap_sem); 87 down_read(&mm->mmap_sem);
@@ -118,9 +122,15 @@ struct mem_size_stats
118 unsigned long private_dirty; 122 unsigned long private_dirty;
119}; 123};
120 124
125__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
126{
127 return NULL;
128}
129
121static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) 130static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
122{ 131{
123 struct task_struct *task = m->private; 132 struct proc_maps_private *priv = m->private;
133 struct task_struct *task = priv->task;
124 struct vm_area_struct *vma = v; 134 struct vm_area_struct *vma = v;
125 struct mm_struct *mm = vma->vm_mm; 135 struct mm_struct *mm = vma->vm_mm;
126 struct file *file = vma->vm_file; 136 struct file *file = vma->vm_file;
@@ -153,22 +163,23 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
153 pad_len_spaces(m, len); 163 pad_len_spaces(m, len);
154 seq_path(m, file->f_vfsmnt, file->f_dentry, "\n"); 164 seq_path(m, file->f_vfsmnt, file->f_dentry, "\n");
155 } else { 165 } else {
156 if (mm) { 166 const char *name = arch_vma_name(vma);
157 if (vma->vm_start <= mm->start_brk && 167 if (!name) {
168 if (mm) {
169 if (vma->vm_start <= mm->start_brk &&
158 vma->vm_end >= mm->brk) { 170 vma->vm_end >= mm->brk) {
159 pad_len_spaces(m, len); 171 name = "[heap]";
160 seq_puts(m, "[heap]"); 172 } else if (vma->vm_start <= mm->start_stack &&
161 } else { 173 vma->vm_end >= mm->start_stack) {
162 if (vma->vm_start <= mm->start_stack && 174 name = "[stack]";
163 vma->vm_end >= mm->start_stack) {
164
165 pad_len_spaces(m, len);
166 seq_puts(m, "[stack]");
167 } 175 }
176 } else {
177 name = "[vdso]";
168 } 178 }
169 } else { 179 }
180 if (name) {
170 pad_len_spaces(m, len); 181 pad_len_spaces(m, len);
171 seq_puts(m, "[vdso]"); 182 seq_puts(m, name);
172 } 183 }
173 } 184 }
174 seq_putc(m, '\n'); 185 seq_putc(m, '\n');
@@ -295,12 +306,16 @@ static int show_smap(struct seq_file *m, void *v)
295 306
296static void *m_start(struct seq_file *m, loff_t *pos) 307static void *m_start(struct seq_file *m, loff_t *pos)
297{ 308{
298 struct task_struct *task = m->private; 309 struct proc_maps_private *priv = m->private;
299 unsigned long last_addr = m->version; 310 unsigned long last_addr = m->version;
300 struct mm_struct *mm; 311 struct mm_struct *mm;
301 struct vm_area_struct *vma, *tail_vma; 312 struct vm_area_struct *vma, *tail_vma = NULL;
302 loff_t l = *pos; 313 loff_t l = *pos;
303 314
315 /* Clear the per syscall fields in priv */
316 priv->task = NULL;
317 priv->tail_vma = NULL;
318
304 /* 319 /*
305 * We remember last_addr rather than next_addr to hit with 320 * We remember last_addr rather than next_addr to hit with
306 * mmap_cache most of the time. We have zero last_addr at 321 * mmap_cache most of the time. We have zero last_addr at
@@ -311,11 +326,15 @@ static void *m_start(struct seq_file *m, loff_t *pos)
311 if (last_addr == -1UL) 326 if (last_addr == -1UL)
312 return NULL; 327 return NULL;
313 328
314 mm = get_task_mm(task); 329 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
330 if (!priv->task)
331 return NULL;
332
333 mm = get_task_mm(priv->task);
315 if (!mm) 334 if (!mm)
316 return NULL; 335 return NULL;
317 336
318 tail_vma = get_gate_vma(task); 337 priv->tail_vma = tail_vma = get_gate_vma(priv->task);
319 down_read(&mm->mmap_sem); 338 down_read(&mm->mmap_sem);
320 339
321 /* Start with last addr hint */ 340 /* Start with last addr hint */
@@ -350,11 +369,9 @@ out:
350 return tail_vma; 369 return tail_vma;
351} 370}
352 371
353static void m_stop(struct seq_file *m, void *v) 372static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
354{ 373{
355 struct task_struct *task = m->private; 374 if (vma && vma != priv->tail_vma) {
356 struct vm_area_struct *vma = v;
357 if (vma && vma != get_gate_vma(task)) {
358 struct mm_struct *mm = vma->vm_mm; 375 struct mm_struct *mm = vma->vm_mm;
359 up_read(&mm->mmap_sem); 376 up_read(&mm->mmap_sem);
360 mmput(mm); 377 mmput(mm);
@@ -363,38 +380,103 @@ static void m_stop(struct seq_file *m, void *v)
363 380
364static void *m_next(struct seq_file *m, void *v, loff_t *pos) 381static void *m_next(struct seq_file *m, void *v, loff_t *pos)
365{ 382{
366 struct task_struct *task = m->private; 383 struct proc_maps_private *priv = m->private;
367 struct vm_area_struct *vma = v; 384 struct vm_area_struct *vma = v;
368 struct vm_area_struct *tail_vma = get_gate_vma(task); 385 struct vm_area_struct *tail_vma = priv->tail_vma;
369 386
370 (*pos)++; 387 (*pos)++;
371 if (vma && (vma != tail_vma) && vma->vm_next) 388 if (vma && (vma != tail_vma) && vma->vm_next)
372 return vma->vm_next; 389 return vma->vm_next;
373 m_stop(m, v); 390 vma_stop(priv, vma);
374 return (vma != tail_vma)? tail_vma: NULL; 391 return (vma != tail_vma)? tail_vma: NULL;
375} 392}
376 393
377struct seq_operations proc_pid_maps_op = { 394static void m_stop(struct seq_file *m, void *v)
395{
396 struct proc_maps_private *priv = m->private;
397 struct vm_area_struct *vma = v;
398
399 vma_stop(priv, vma);
400 if (priv->task)
401 put_task_struct(priv->task);
402}
403
404static struct seq_operations proc_pid_maps_op = {
378 .start = m_start, 405 .start = m_start,
379 .next = m_next, 406 .next = m_next,
380 .stop = m_stop, 407 .stop = m_stop,
381 .show = show_map 408 .show = show_map
382}; 409};
383 410
384struct seq_operations proc_pid_smaps_op = { 411static struct seq_operations proc_pid_smaps_op = {
385 .start = m_start, 412 .start = m_start,
386 .next = m_next, 413 .next = m_next,
387 .stop = m_stop, 414 .stop = m_stop,
388 .show = show_smap 415 .show = show_smap
389}; 416};
390 417
418static int do_maps_open(struct inode *inode, struct file *file,
419 struct seq_operations *ops)
420{
421 struct proc_maps_private *priv;
422 int ret = -ENOMEM;
423 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
424 if (priv) {
425 priv->pid = proc_pid(inode);
426 ret = seq_open(file, ops);
427 if (!ret) {
428 struct seq_file *m = file->private_data;
429 m->private = priv;
430 } else {
431 kfree(priv);
432 }
433 }
434 return ret;
435}
436
437static int maps_open(struct inode *inode, struct file *file)
438{
439 return do_maps_open(inode, file, &proc_pid_maps_op);
440}
441
442struct file_operations proc_maps_operations = {
443 .open = maps_open,
444 .read = seq_read,
445 .llseek = seq_lseek,
446 .release = seq_release_private,
447};
448
391#ifdef CONFIG_NUMA 449#ifdef CONFIG_NUMA
392extern int show_numa_map(struct seq_file *m, void *v); 450extern int show_numa_map(struct seq_file *m, void *v);
393 451
394struct seq_operations proc_pid_numa_maps_op = { 452static struct seq_operations proc_pid_numa_maps_op = {
395 .start = m_start, 453 .start = m_start,
396 .next = m_next, 454 .next = m_next,
397 .stop = m_stop, 455 .stop = m_stop,
398 .show = show_numa_map 456 .show = show_numa_map
399}; 457};
458
459static int numa_maps_open(struct inode *inode, struct file *file)
460{
461 return do_maps_open(inode, file, &proc_pid_numa_maps_op);
462}
463
464struct file_operations proc_numa_maps_operations = {
465 .open = numa_maps_open,
466 .read = seq_read,
467 .llseek = seq_lseek,
468 .release = seq_release_private,
469};
400#endif 470#endif
471
472static int smaps_open(struct inode *inode, struct file *file)
473{
474 return do_maps_open(inode, file, &proc_pid_smaps_op);
475}
476
477struct file_operations proc_smaps_operations = {
478 .open = smaps_open,
479 .read = seq_read,
480 .llseek = seq_lseek,
481 .release = seq_release_private,
482};
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 8f68827ed10e..af69f28277b6 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -156,9 +156,28 @@ static void *m_next(struct seq_file *m, void *v, loff_t *pos)
156{ 156{
157 return NULL; 157 return NULL;
158} 158}
159struct seq_operations proc_pid_maps_op = { 159static struct seq_operations proc_pid_maps_op = {
160 .start = m_start, 160 .start = m_start,
161 .next = m_next, 161 .next = m_next,
162 .stop = m_stop, 162 .stop = m_stop,
163 .show = show_map 163 .show = show_map
164}; 164};
165
166static int maps_open(struct inode *inode, struct file *file)
167{
168 int ret;
169 ret = seq_open(file, &proc_pid_maps_op);
170 if (!ret) {
171 struct seq_file *m = file->private_data;
172 m->private = NULL;
173 }
174 return ret;
175}
176
177struct file_operations proc_maps_operations = {
178 .open = maps_open,
179 .read = seq_read,
180 .llseek = seq_lseek,
181 .release = seq_release,
182};
183
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index cf6e1cf40351..752cea12e30f 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1560,12 +1560,6 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
1560 return res; 1560 return res;
1561} 1561}
1562 1562
1563static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
1564 size_t count, loff_t pos)
1565{
1566 return generic_file_aio_write(iocb, buf, count, pos);
1567}
1568
1569const struct file_operations reiserfs_file_operations = { 1563const struct file_operations reiserfs_file_operations = {
1570 .read = generic_file_read, 1564 .read = generic_file_read,
1571 .write = reiserfs_file_write, 1565 .write = reiserfs_file_write,
@@ -1575,7 +1569,7 @@ const struct file_operations reiserfs_file_operations = {
1575 .fsync = reiserfs_sync_file, 1569 .fsync = reiserfs_sync_file,
1576 .sendfile = generic_file_sendfile, 1570 .sendfile = generic_file_sendfile,
1577 .aio_read = generic_file_aio_read, 1571 .aio_read = generic_file_aio_read,
1578 .aio_write = reiserfs_aio_write, 1572 .aio_write = generic_file_aio_write,
1579 .splice_read = generic_file_splice_read, 1573 .splice_read = generic_file_splice_read,
1580 .splice_write = generic_file_splice_write, 1574 .splice_write = generic_file_splice_write,
1581}; 1575};
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 1b73529b8099..49d1a53dbef0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -834,8 +834,7 @@ static int write_ordered_buffers(spinlock_t * lock,
834 get_bh(bh); 834 get_bh(bh);
835 if (test_set_buffer_locked(bh)) { 835 if (test_set_buffer_locked(bh)) {
836 if (!buffer_dirty(bh)) { 836 if (!buffer_dirty(bh)) {
837 list_del_init(&jh->list); 837 list_move(&jh->list, &tmp);
838 list_add(&jh->list, &tmp);
839 goto loop_next; 838 goto loop_next;
840 } 839 }
841 spin_unlock(lock); 840 spin_unlock(lock);
@@ -855,8 +854,7 @@ static int write_ordered_buffers(spinlock_t * lock,
855 ret = -EIO; 854 ret = -EIO;
856 } 855 }
857 if (buffer_dirty(bh)) { 856 if (buffer_dirty(bh)) {
858 list_del_init(&jh->list); 857 list_move(&jh->list, &tmp);
859 list_add(&jh->list, &tmp);
860 add_to_chunk(&chunk, bh, lock, write_ordered_chunk); 858 add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
861 } else { 859 } else {
862 reiserfs_free_jh(bh); 860 reiserfs_free_jh(bh);
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index c71dd2760d32..c8e96195b96e 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -400,8 +400,7 @@ static int smb_request_send_req(struct smb_request *req)
400 if (!(req->rq_flags & SMB_REQ_TRANSMITTED)) 400 if (!(req->rq_flags & SMB_REQ_TRANSMITTED))
401 goto out; 401 goto out;
402 402
403 list_del_init(&req->rq_queue); 403 list_move_tail(&req->rq_queue, &server->recvq);
404 list_add_tail(&req->rq_queue, &server->recvq);
405 result = 1; 404 result = 1;
406out: 405out:
407 return result; 406 return result;
@@ -435,8 +434,7 @@ int smb_request_send_server(struct smb_sb_info *server)
435 result = smb_request_send_req(req); 434 result = smb_request_send_req(req);
436 if (result < 0) { 435 if (result < 0) {
437 server->conn_error = result; 436 server->conn_error = result;
438 list_del_init(&req->rq_queue); 437 list_move(&req->rq_queue, &server->xmitq);
439 list_add(&req->rq_queue, &server->xmitq);
440 result = -EIO; 438 result = -EIO;
441 goto out; 439 goto out;
442 } 440 }
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
index 3f71384020cb..24577e2c489b 100644
--- a/fs/smbfs/smbiod.c
+++ b/fs/smbfs/smbiod.c
@@ -193,8 +193,7 @@ int smbiod_retry(struct smb_sb_info *server)
193 if (req->rq_flags & SMB_REQ_RETRY) { 193 if (req->rq_flags & SMB_REQ_RETRY) {
194 /* must move the request to the xmitq */ 194 /* must move the request to the xmitq */
195 VERBOSE("retrying request %p on recvq\n", req); 195 VERBOSE("retrying request %p on recvq\n", req);
196 list_del(&req->rq_queue); 196 list_move(&req->rq_queue, &server->xmitq);
197 list_add(&req->rq_queue, &server->xmitq);
198 continue; 197 continue;
199 } 198 }
200#endif 199#endif
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 610b5bdbe75b..61c42430cba3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -430,10 +430,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
430 i++; 430 i++;
431 /* fallthrough */ 431 /* fallthrough */
432 default: 432 default:
433 if (filp->f_pos == 2) { 433 if (filp->f_pos == 2)
434 list_del(q); 434 list_move(q, &parent_sd->s_children);
435 list_add(q, &parent_sd->s_children); 435
436 }
437 for (p=q->next; p!= &parent_sd->s_children; p=p->next) { 436 for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
438 struct sysfs_dirent *next; 437 struct sysfs_dirent *next;
439 const char * name; 438 const char * name;
@@ -455,8 +454,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
455 dt_type(next)) < 0) 454 dt_type(next)) < 0)
456 return 0; 455 return 0;
457 456
458 list_del(q); 457 list_move(q, p);
459 list_add(q, p);
460 p = q; 458 p = q;
461 filp->f_pos++; 459 filp->f_pos++;
462 } 460 }
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index f2dbdf5a8769..259bd196099d 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -605,39 +605,12 @@ static void ufs_set_inode_ops(struct inode *inode)
605 ufs_get_inode_dev(inode->i_sb, UFS_I(inode))); 605 ufs_get_inode_dev(inode->i_sb, UFS_I(inode)));
606} 606}
607 607
608void ufs_read_inode (struct inode * inode) 608static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
609{ 609{
610 struct ufs_inode_info *ufsi = UFS_I(inode); 610 struct ufs_inode_info *ufsi = UFS_I(inode);
611 struct super_block * sb; 611 struct super_block *sb = inode->i_sb;
612 struct ufs_sb_private_info * uspi;
613 struct ufs_inode * ufs_inode;
614 struct ufs2_inode *ufs2_inode;
615 struct buffer_head * bh;
616 mode_t mode; 612 mode_t mode;
617 unsigned i; 613 unsigned i;
618 unsigned flags;
619
620 UFSD("ENTER, ino %lu\n", inode->i_ino);
621
622 sb = inode->i_sb;
623 uspi = UFS_SB(sb)->s_uspi;
624 flags = UFS_SB(sb)->s_flags;
625
626 if (inode->i_ino < UFS_ROOTINO ||
627 inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) {
628 ufs_warning (sb, "ufs_read_inode", "bad inode number (%lu)\n", inode->i_ino);
629 goto bad_inode;
630 }
631
632 bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino));
633 if (!bh) {
634 ufs_warning (sb, "ufs_read_inode", "unable to read inode %lu\n", inode->i_ino);
635 goto bad_inode;
636 }
637 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
638 goto ufs2_inode;
639
640 ufs_inode = (struct ufs_inode *) (bh->b_data + sizeof(struct ufs_inode) * ufs_inotofsbo(inode->i_ino));
641 614
642 /* 615 /*
643 * Copy data to the in-core inode. 616 * Copy data to the in-core inode.
@@ -661,14 +634,11 @@ void ufs_read_inode (struct inode * inode)
661 inode->i_atime.tv_nsec = 0; 634 inode->i_atime.tv_nsec = 0;
662 inode->i_ctime.tv_nsec = 0; 635 inode->i_ctime.tv_nsec = 0;
663 inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks); 636 inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks);
664 inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat) */
665 inode->i_version++;
666 ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags); 637 ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags);
667 ufsi->i_gen = fs32_to_cpu(sb, ufs_inode->ui_gen); 638 ufsi->i_gen = fs32_to_cpu(sb, ufs_inode->ui_gen);
668 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow); 639 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
669 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag); 640 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
670 ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; 641
671 ufsi->i_dir_start_lookup = 0;
672 642
673 if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) { 643 if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) {
674 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++) 644 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++)
@@ -677,24 +647,16 @@ void ufs_read_inode (struct inode * inode)
677 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++) 647 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++)
678 ufsi->i_u1.i_symlink[i] = ufs_inode->ui_u2.ui_symlink[i]; 648 ufsi->i_u1.i_symlink[i] = ufs_inode->ui_u2.ui_symlink[i];
679 } 649 }
680 ufsi->i_osync = 0; 650}
681
682 ufs_set_inode_ops(inode);
683
684 brelse (bh);
685
686 UFSD("EXIT\n");
687 return;
688 651
689bad_inode: 652static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
690 make_bad_inode(inode); 653{
691 return; 654 struct ufs_inode_info *ufsi = UFS_I(inode);
655 struct super_block *sb = inode->i_sb;
656 mode_t mode;
657 unsigned i;
692 658
693ufs2_inode :
694 UFSD("Reading ufs2 inode, ino %lu\n", inode->i_ino); 659 UFSD("Reading ufs2 inode, ino %lu\n", inode->i_ino);
695
696 ufs2_inode = (struct ufs2_inode *)(bh->b_data + sizeof(struct ufs2_inode) * ufs_inotofsbo(inode->i_ino));
697
698 /* 660 /*
699 * Copy data to the in-core inode. 661 * Copy data to the in-core inode.
700 */ 662 */
@@ -717,26 +679,64 @@ ufs2_inode :
717 inode->i_atime.tv_nsec = 0; 679 inode->i_atime.tv_nsec = 0;
718 inode->i_ctime.tv_nsec = 0; 680 inode->i_ctime.tv_nsec = 0;
719 inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks); 681 inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks);
720 inode->i_blksize = PAGE_SIZE; /*This is the optimal IO size(for stat)*/
721
722 inode->i_version++;
723 ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags); 682 ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags);
724 ufsi->i_gen = fs32_to_cpu(sb, ufs2_inode->ui_gen); 683 ufsi->i_gen = fs32_to_cpu(sb, ufs2_inode->ui_gen);
725 /* 684 /*
726 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow); 685 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
727 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag); 686 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
728 */ 687 */
729 ufsi->i_lastfrag= (inode->i_size + uspi->s_fsize- 1) >> uspi->s_fshift;
730 688
731 if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) { 689 if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) {
732 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++) 690 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++)
733 ufsi->i_u1.u2_i_data[i] = 691 ufsi->i_u1.u2_i_data[i] =
734 ufs2_inode->ui_u2.ui_addr.ui_db[i]; 692 ufs2_inode->ui_u2.ui_addr.ui_db[i];
735 } 693 } else {
736 else {
737 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++) 694 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++)
738 ufsi->i_u1.i_symlink[i] = ufs2_inode->ui_u2.ui_symlink[i]; 695 ufsi->i_u1.i_symlink[i] = ufs2_inode->ui_u2.ui_symlink[i];
739 } 696 }
697}
698
699void ufs_read_inode(struct inode * inode)
700{
701 struct ufs_inode_info *ufsi = UFS_I(inode);
702 struct super_block * sb;
703 struct ufs_sb_private_info * uspi;
704 struct buffer_head * bh;
705
706 UFSD("ENTER, ino %lu\n", inode->i_ino);
707
708 sb = inode->i_sb;
709 uspi = UFS_SB(sb)->s_uspi;
710
711 if (inode->i_ino < UFS_ROOTINO ||
712 inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) {
713 ufs_warning(sb, "ufs_read_inode", "bad inode number (%lu)\n",
714 inode->i_ino);
715 goto bad_inode;
716 }
717
718 bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino));
719 if (!bh) {
720 ufs_warning(sb, "ufs_read_inode", "unable to read inode %lu\n",
721 inode->i_ino);
722 goto bad_inode;
723 }
724 if ((UFS_SB(sb)->s_flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
725 struct ufs2_inode *ufs2_inode = (struct ufs2_inode *)bh->b_data;
726
727 ufs2_read_inode(inode,
728 ufs2_inode + ufs_inotofsbo(inode->i_ino));
729 } else {
730 struct ufs_inode *ufs_inode = (struct ufs_inode *)bh->b_data;
731
732 ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino));
733 }
734
735 inode->i_blksize = PAGE_SIZE;/*This is the optimal IO size (for stat)*/
736 inode->i_version++;
737 ufsi->i_lastfrag =
738 (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
739 ufsi->i_dir_start_lookup = 0;
740 ufsi->i_osync = 0; 740 ufsi->i_osync = 0;
741 741
742 ufs_set_inode_ops(inode); 742 ufs_set_inode_ops(inode);
@@ -745,6 +745,9 @@ ufs2_inode :
745 745
746 UFSD("EXIT\n"); 746 UFSD("EXIT\n");
747 return; 747 return;
748
749bad_inode:
750 make_bad_inode(inode);
748} 751}
749 752
750static int ufs_update_inode(struct inode * inode, int do_sync) 753static int ufs_update_inode(struct inode * inode, int do_sync)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 12810baeb5d4..d9180020de63 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -419,16 +419,15 @@ xfs_vn_link(
419 int error; 419 int error;
420 420
421 ip = old_dentry->d_inode; /* inode being linked to */ 421 ip = old_dentry->d_inode; /* inode being linked to */
422 if (S_ISDIR(ip->i_mode))
423 return -EPERM;
424
425 tdvp = vn_from_inode(dir); 422 tdvp = vn_from_inode(dir);
426 vp = vn_from_inode(ip); 423 vp = vn_from_inode(ip);
427 424
425 VN_HOLD(vp);
428 error = bhv_vop_link(tdvp, vp, dentry, NULL); 426 error = bhv_vop_link(tdvp, vp, dentry, NULL);
429 if (likely(!error)) { 427 if (unlikely(error)) {
428 VN_RELE(vp);
429 } else {
430 VMODIFY(tdvp); 430 VMODIFY(tdvp);
431 VN_HOLD(vp);
432 xfs_validate_fields(ip, &vattr); 431 xfs_validate_fields(ip, &vattr);
433 d_instantiate(dentry, ip); 432 d_instantiate(dentry, ip);
434 } 433 }
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index aa26ab906c88..028eb17ec2ed 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -140,9 +140,7 @@ BUFFER_FNS(PrivateStart, unwritten);
140#define current_pid() (current->pid) 140#define current_pid() (current->pid)
141#define current_fsuid(cred) (current->fsuid) 141#define current_fsuid(cred) (current->fsuid)
142#define current_fsgid(cred) (current->fsgid) 142#define current_fsgid(cred) (current->fsgid)
143#define current_set_flags(f) (current->flags |= (f))
144#define current_test_flags(f) (current->flags & (f)) 143#define current_test_flags(f) (current->flags & (f))
145#define current_clear_flags(f) (current->flags & ~(f))
146#define current_set_flags_nested(sp, f) \ 144#define current_set_flags_nested(sp, f) \
147 (*(sp) = current->flags, current->flags |= (f)) 145 (*(sp) = current->flags, current->flags |= (f))
148#define current_clear_flags_nested(sp, f) \ 146#define current_clear_flags_nested(sp, f) \
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 35c6a01963a7..c42b3221b20c 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -93,7 +93,7 @@ typedef enum {
93 */ 93 */
94static inline struct bhv_vnode *vn_from_inode(struct inode *inode) 94static inline struct bhv_vnode *vn_from_inode(struct inode *inode)
95{ 95{
96 return (bhv_vnode_t *)list_entry(inode, bhv_vnode_t, v_inode); 96 return container_of(inode, bhv_vnode_t, v_inode);
97} 97}
98static inline struct inode *vn_to_inode(struct bhv_vnode *vnode) 98static inline struct inode *vn_to_inode(struct bhv_vnode *vnode)
99{ 99{
diff --git a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h
index 1d8ff103201c..6e6e56fb352d 100644
--- a/fs/xfs/xfs_behavior.h
+++ b/fs/xfs/xfs_behavior.h
@@ -78,15 +78,12 @@
78 * 78 *
79 */ 79 */
80 80
81struct bhv_head_lock;
82
83/* 81/*
84 * Behavior head. Head of the chain of behaviors. 82 * Behavior head. Head of the chain of behaviors.
85 * Contained within each virtualized object data structure. 83 * Contained within each virtualized object data structure.
86 */ 84 */
87typedef struct bhv_head { 85typedef struct bhv_head {
88 struct bhv_desc *bh_first; /* first behavior in chain */ 86 struct bhv_desc *bh_first; /* first behavior in chain */
89 struct bhv_head_lock *bh_lockp; /* pointer to lock info struct */
90} bhv_head_t; 87} bhv_head_t;
91 88
92/* 89/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5fa0adb7e173..86c1bf0bba9e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1961,9 +1961,9 @@ xfs_iunlink_remove(
1961 xfs_agino_t agino; 1961 xfs_agino_t agino;
1962 xfs_agino_t next_agino; 1962 xfs_agino_t next_agino;
1963 xfs_buf_t *last_ibp; 1963 xfs_buf_t *last_ibp;
1964 xfs_dinode_t *last_dip; 1964 xfs_dinode_t *last_dip = NULL;
1965 short bucket_index; 1965 short bucket_index;
1966 int offset, last_offset; 1966 int offset, last_offset = 0;
1967 int error; 1967 int error;
1968 int agi_ok; 1968 int agi_ok;
1969 1969
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index d8f5d4cbe8b7..e730328636c3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1740,10 +1740,10 @@ xlog_write(xfs_mount_t * mp,
1740 xlog_in_core_t **commit_iclog, 1740 xlog_in_core_t **commit_iclog,
1741 uint flags) 1741 uint flags)
1742{ 1742{
1743 xlog_t *log = mp->m_log; 1743 xlog_t *log = mp->m_log;
1744 xlog_ticket_t *ticket = (xlog_ticket_t *)tic; 1744 xlog_ticket_t *ticket = (xlog_ticket_t *)tic;
1745 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */
1745 xlog_op_header_t *logop_head; /* ptr to log operation header */ 1746 xlog_op_header_t *logop_head; /* ptr to log operation header */
1746 xlog_in_core_t *iclog; /* ptr to current in-core log */
1747 __psint_t ptr; /* copy address into data region */ 1747 __psint_t ptr; /* copy address into data region */
1748 int len; /* # xlog_write() bytes 2 still copy */ 1748 int len; /* # xlog_write() bytes 2 still copy */
1749 int index; /* region index currently copying */ 1749 int index; /* region index currently copying */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 55b4237c2153..3cb678e3a132 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -990,6 +990,8 @@ xlog_find_zeroed(
990 xfs_daddr_t num_scan_bblks; 990 xfs_daddr_t num_scan_bblks;
991 int error, log_bbnum = log->l_logBBsize; 991 int error, log_bbnum = log->l_logBBsize;
992 992
993 *blk_no = 0;
994
993 /* check totally zeroed log */ 995 /* check totally zeroed log */
994 bp = xlog_get_bp(log, 1); 996 bp = xlog_get_bp(log, 1);
995 if (!bp) 997 if (!bp)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 10dbf203c62f..4be5c0b2d296 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1721,15 +1721,14 @@ xfs_mount_log_sbunit(
1721 * is present to prevent thrashing). 1721 * is present to prevent thrashing).
1722 */ 1722 */
1723 1723
1724#ifdef CONFIG_HOTPLUG_CPU
1724/* 1725/*
1725 * hot-plug CPU notifier support. 1726 * hot-plug CPU notifier support.
1726 * 1727 *
1727 * We cannot use the hotcpu_register() function because it does 1728 * We need a notifier per filesystem as we need to be able to identify
1728 * not allow notifier instances. We need a notifier per filesystem 1729 * the filesystem to balance the counters out. This is achieved by
1729 * as we need to be able to identify the filesystem to balance 1730 * having a notifier block embedded in the xfs_mount_t and doing pointer
1730 * the counters out. This is achieved by having a notifier block 1731 * magic to get the mount pointer from the notifier block address.
1731 * embedded in the xfs_mount_t and doing pointer magic to get the
1732 * mount pointer from the notifier block address.
1733 */ 1732 */
1734STATIC int 1733STATIC int
1735xfs_icsb_cpu_notify( 1734xfs_icsb_cpu_notify(
@@ -1779,6 +1778,7 @@ xfs_icsb_cpu_notify(
1779 1778
1780 return NOTIFY_OK; 1779 return NOTIFY_OK;
1781} 1780}
1781#endif /* CONFIG_HOTPLUG_CPU */
1782 1782
1783int 1783int
1784xfs_icsb_init_counters( 1784xfs_icsb_init_counters(
@@ -1791,9 +1791,11 @@ xfs_icsb_init_counters(
1791 if (mp->m_sb_cnts == NULL) 1791 if (mp->m_sb_cnts == NULL)
1792 return -ENOMEM; 1792 return -ENOMEM;
1793 1793
1794#ifdef CONFIG_HOTPLUG_CPU
1794 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; 1795 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
1795 mp->m_icsb_notifier.priority = 0; 1796 mp->m_icsb_notifier.priority = 0;
1796 register_cpu_notifier(&mp->m_icsb_notifier); 1797 register_hotcpu_notifier(&mp->m_icsb_notifier);
1798#endif /* CONFIG_HOTPLUG_CPU */
1797 1799
1798 for_each_online_cpu(i) { 1800 for_each_online_cpu(i) {
1799 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1801 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
@@ -1812,7 +1814,7 @@ xfs_icsb_destroy_counters(
1812 xfs_mount_t *mp) 1814 xfs_mount_t *mp)
1813{ 1815{
1814 if (mp->m_sb_cnts) { 1816 if (mp->m_sb_cnts) {
1815 unregister_cpu_notifier(&mp->m_icsb_notifier); 1817 unregister_hotcpu_notifier(&mp->m_icsb_notifier);
1816 free_percpu(mp->m_sb_cnts); 1818 free_percpu(mp->m_sb_cnts);
1817 } 1819 }
1818} 1820}
@@ -2026,7 +2028,7 @@ xfs_icsb_balance_counter(
2026 xfs_sb_field_t field, 2028 xfs_sb_field_t field,
2027 int flags) 2029 int flags)
2028{ 2030{
2029 uint64_t count, resid = 0; 2031 uint64_t count, resid;
2030 int weight = num_online_cpus(); 2032 int weight = num_online_cpus();
2031 int s; 2033 int s;
2032 2034
@@ -2058,6 +2060,7 @@ xfs_icsb_balance_counter(
2058 break; 2060 break;
2059 default: 2061 default:
2060 BUG(); 2062 BUG();
2063 count = resid = 0; /* quiet, gcc */
2061 break; 2064 break;
2062 } 2065 }
2063 2066
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 0c1e42b037ef..5a0b678956e0 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1929,7 +1929,7 @@ xfs_growfs_rt(
1929 /* 1929 /*
1930 * Initial error checking. 1930 * Initial error checking.
1931 */ 1931 */
1932 if (mp->m_rtdev_targp || mp->m_rbmip == NULL || 1932 if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
1933 (nrblocks = in->newblocks) <= sbp->sb_rblocks || 1933 (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
1934 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) 1934 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
1935 return XFS_ERROR(EINVAL); 1935 return XFS_ERROR(EINVAL);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index cb65c3a603f5..9dc88b380608 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -338,8 +338,6 @@ typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
338typedef struct xfs_trans { 338typedef struct xfs_trans {
339 unsigned int t_magic; /* magic number */ 339 unsigned int t_magic; /* magic number */
340 xfs_log_callback_t t_logcb; /* log callback struct */ 340 xfs_log_callback_t t_logcb; /* log callback struct */
341 struct xfs_trans *t_forw; /* async list pointers */
342 struct xfs_trans *t_back; /* async list pointers */
343 unsigned int t_type; /* transaction type */ 341 unsigned int t_type; /* transaction type */
344 unsigned int t_log_res; /* amt of log space resvd */ 342 unsigned int t_log_res; /* amt of log space resvd */
345 unsigned int t_log_count; /* count for perm log res */ 343 unsigned int t_log_count; /* count for perm log res */
@@ -364,9 +362,11 @@ typedef struct xfs_trans {
364 long t_res_fdblocks_delta; /* on-disk only chg */ 362 long t_res_fdblocks_delta; /* on-disk only chg */
365 long t_frextents_delta;/* superblock freextents chg*/ 363 long t_frextents_delta;/* superblock freextents chg*/
366 long t_res_frextents_delta; /* on-disk only chg */ 364 long t_res_frextents_delta; /* on-disk only chg */
365#ifdef DEBUG
367 long t_ag_freeblks_delta; /* debugging counter */ 366 long t_ag_freeblks_delta; /* debugging counter */
368 long t_ag_flist_delta; /* debugging counter */ 367 long t_ag_flist_delta; /* debugging counter */
369 long t_ag_btree_delta; /* debugging counter */ 368 long t_ag_btree_delta; /* debugging counter */
369#endif
370 long t_dblocks_delta;/* superblock dblocks change */ 370 long t_dblocks_delta;/* superblock dblocks change */
371 long t_agcount_delta;/* superblock agcount change */ 371 long t_agcount_delta;/* superblock agcount change */
372 long t_imaxpct_delta;/* superblock imaxpct change */ 372 long t_imaxpct_delta;/* superblock imaxpct change */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 00a6b7dc24a0..23cfa5837728 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2603,8 +2603,7 @@ xfs_link(
2603 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); 2603 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address);
2604 2604
2605 target_namelen = VNAMELEN(dentry); 2605 target_namelen = VNAMELEN(dentry);
2606 if (VN_ISDIR(src_vp)) 2606 ASSERT(!VN_ISDIR(src_vp));
2607 return XFS_ERROR(EPERM);
2608 2607
2609 sip = xfs_vtoi(src_vp); 2608 sip = xfs_vtoi(src_vp);
2610 tdp = XFS_BHVTOI(target_dir_bdp); 2609 tdp = XFS_BHVTOI(target_dir_bdp);
@@ -2699,9 +2698,8 @@ xfs_link(
2699 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 2698 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
2700 2699
2701 error = xfs_bumplink(tp, sip); 2700 error = xfs_bumplink(tp, sip);
2702 if (error) { 2701 if (error)
2703 goto abort_return; 2702 goto abort_return;
2704 }
2705 2703
2706 /* 2704 /*
2707 * If this is a synchronous mount, make sure that the 2705 * If this is a synchronous mount, make sure that the
@@ -2719,9 +2717,8 @@ xfs_link(
2719 } 2717 }
2720 2718
2721 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2719 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
2722 if (error) { 2720 if (error)
2723 goto std_return; 2721 goto std_return;
2724 }
2725 2722
2726 /* Fall through to std_return with error = 0. */ 2723 /* Fall through to std_return with error = 0. */
2727std_return: 2724std_return:
@@ -2742,6 +2739,8 @@ std_return:
2742 xfs_trans_cancel(tp, cancel_flags); 2739 xfs_trans_cancel(tp, cancel_flags);
2743 goto std_return; 2740 goto std_return;
2744} 2741}
2742
2743
2745/* 2744/*
2746 * xfs_mkdir 2745 * xfs_mkdir
2747 * 2746 *