aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/journal.c
diff options
context:
space:
mode:
authorSrinivas Eeda <srinivas.eeda@oracle.com>2009-06-03 20:02:55 -0400
committerJoel Becker <joel.becker@oracle.com>2009-06-03 22:14:31 -0400
commit83273932fbefb6ceef9c0b82ac4d23900728f4d9 (patch)
tree7ece8471af3d2a22543542e990369aca47aa3d25 /fs/ocfs2/journal.c
parentedd45c08499a3e9d4c25431cd2b6a9ce5f692c92 (diff)
ocfs2: timer to queue scan of all orphan slots
When a dentry is unlinked, the unlinking node takes an EX on the dentry lock before moving the dentry to the orphan directory. Other nodes that have this dentry in cache have a PR on the same dentry lock. When the EX is requested, the other nodes flag the corresponding inode as MAYBE_ORPHANED during downconvert. The inode is finally deleted when the last node to iput the inode sees that i_nlink==0 and the MAYBE_ORPHANED flag is set. A problem arises if a node is forced to free dentry locks because of memory pressure. If this happens, the node will no longer get downconvert notifications for the dentries that have been unlinked on another node. If it also happens that node is actively using the corresponding inode and happens to be the one performing the last iput on that inode, it will fail to delete the inode as it will not have the MAYBE_ORPHANED flag set. This patch fixes this shortcoming by introducing a periodic scan of the orphan directories to delete such inodes. Care has been taken to distribute the workload across the cluster so that no one node has to perform the task all the time. Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2/journal.c')
-rw-r--r--fs/ocfs2/journal.c107
1 files changed, 107 insertions, 0 deletions
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a20a0f1e37f..44ed768782e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -28,6 +28,8 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/kthread.h> 30#include <linux/kthread.h>
31#include <linux/time.h>
32#include <linux/random.h>
31 33
32#define MLOG_MASK_PREFIX ML_JOURNAL 34#define MLOG_MASK_PREFIX ML_JOURNAL
33#include <cluster/masklog.h> 35#include <cluster/masklog.h>
@@ -52,6 +54,8 @@
52 54
53DEFINE_SPINLOCK(trans_inc_lock); 55DEFINE_SPINLOCK(trans_inc_lock);
54 56
57#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
58
55static int ocfs2_force_read_journal(struct inode *inode); 59static int ocfs2_force_read_journal(struct inode *inode);
56static int ocfs2_recover_node(struct ocfs2_super *osb, 60static int ocfs2_recover_node(struct ocfs2_super *osb,
57 int node_num, int slot_num); 61 int node_num, int slot_num);
@@ -1841,6 +1845,109 @@ bail:
1841 return status; 1845 return status;
1842} 1846}
1843 1847
1848/*
1849 * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some
1850 * randomness to the timeout to minimize multple nodes firing the timer at the
1851 * same time.
1852 */
1853static inline unsigned long ocfs2_orphan_scan_timeout(void)
1854{
1855 unsigned long time;
1856
1857 get_random_bytes(&time, sizeof(time));
1858 time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
1859 return msecs_to_jiffies(time);
1860}
1861
1862/*
1863 * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for
1864 * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This
1865 * is done to catch any orphans that are left over in orphan directories.
1866 *
1867 * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT
1868 * seconds. It gets an EX lock on os_lockres and checks sequence number
1869 * stored in LVB. If the sequence number has changed, it means some other
1870 * node has done the scan. This node skips the scan and tracks the
1871 * sequence number. If the sequence number didn't change, it means a scan
1872 * hasn't happened. The node queues a scan and increments the
1873 * sequence number in the LVB.
1874 */
1875void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1876{
1877 struct ocfs2_orphan_scan *os;
1878 int status, i;
1879 u32 seqno = 0;
1880
1881 os = &osb->osb_orphan_scan;
1882
1883 status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX);
1884 if (status < 0) {
1885 if (status != -EAGAIN)
1886 mlog_errno(status);
1887 goto out;
1888 }
1889
1890 if (os->os_seqno != seqno) {
1891 os->os_seqno = seqno;
1892 goto unlock;
1893 }
1894
1895 for (i = 0; i < osb->max_slots; i++)
1896 ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
1897 NULL);
1898 /*
1899 * We queued a recovery on orphan slots, increment the sequence
1900 * number and update LVB so other node will skip the scan for a while
1901 */
1902 seqno++;
1903unlock:
1904 ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX);
1905out:
1906 return;
1907}
1908
1909/* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */
1910void ocfs2_orphan_scan_work(struct work_struct *work)
1911{
1912 struct ocfs2_orphan_scan *os;
1913 struct ocfs2_super *osb;
1914
1915 os = container_of(work, struct ocfs2_orphan_scan,
1916 os_orphan_scan_work.work);
1917 osb = os->os_osb;
1918
1919 mutex_lock(&os->os_lock);
1920 ocfs2_queue_orphan_scan(osb);
1921 schedule_delayed_work(&os->os_orphan_scan_work,
1922 ocfs2_orphan_scan_timeout());
1923 mutex_unlock(&os->os_lock);
1924}
1925
1926void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
1927{
1928 struct ocfs2_orphan_scan *os;
1929
1930 os = &osb->osb_orphan_scan;
1931 mutex_lock(&os->os_lock);
1932 cancel_delayed_work(&os->os_orphan_scan_work);
1933 mutex_unlock(&os->os_lock);
1934}
1935
1936int ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1937{
1938 struct ocfs2_orphan_scan *os;
1939
1940 os = &osb->osb_orphan_scan;
1941 os->os_osb = osb;
1942 mutex_init(&os->os_lock);
1943
1944 INIT_DELAYED_WORK(&os->os_orphan_scan_work,
1945 ocfs2_orphan_scan_work);
1946 schedule_delayed_work(&os->os_orphan_scan_work,
1947 ocfs2_orphan_scan_timeout());
1948 return 0;
1949}
1950
1844struct ocfs2_orphan_filldir_priv { 1951struct ocfs2_orphan_filldir_priv {
1845 struct inode *head; 1952 struct inode *head;
1846 struct ocfs2_super *osb; 1953 struct ocfs2_super *osb;