aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/filesystems/quota.txt
blob: 5e8de25bf0f1ec86aac76ad6a102d557b883603f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

Quota subsystem
===============

Quota subsystem allows system administrator to set limits on used space and
number of used inodes (inode is a filesystem structure which is associated with
each file or directory) for users and/or groups. For both used space and number
of used inodes there are actually two limits. The first one is called softlimit
and the second one hardlimit.  An user can never exceed a hardlimit for any
resource (unless he has CAP_SYS_RESOURCE capability). User is allowed to exceed
softlimit but only for limited period of time. This period is called "grace
period" or "grace time". When grace time is over, user is not able to allocate
more space/inodes until he frees enough of them to get below softlimit.

Quota limits (and amount of grace time) are set independently for each
filesystem.

For more details about quota design, see the documentation in quota-tools package
(http://sourceforge.net/projects/linuxquota).

Quota netlink interface
=======================
When user exceeds a softlimit, runs out of grace time or reaches hardlimit,
quota subsystem traditionally printed a message to the controlling terminal of
the process which caused the excess. This method has the disadvantage that
when user is using a graphical desktop he usually cannot see the message.
Thus quota netlink interface has been designed to pass information about
the above events to userspace. There they can be captured by an application
and processed accordingly.

The interface uses generic netlink framework (see
http://lwn.net/Articles/208755/ and http://people.suug.ch/~tgr/libnl/ for more
details about this layer). The name of the quota generic netlink interface
is "VFS_DQUOT". Definitions of constants below are in <linux/quota.h>.
  Currently, the interface supports only one message type QUOTA_NL_C_WARNING.
This command is used to send a notification about any of the above mentioned
events. Each message has six attributes. These are (type of the argument is
in parentheses):
        QUOTA_NL_A_QTYPE (u32)
	  - type of quota being exceeded (one of USRQUOTA, GRPQUOTA)
        QUOTA_NL_A_EXCESS_ID (u64)
	  - UID/GID (depends on quota type) of user / group whose limit
	    is being exceeded.
        QUOTA_NL_A_CAUSED_ID (u64)
	  - UID of a user who caused the event
        QUOTA_NL_A_WARNING (u32)
	  - what kind of limit is exceeded:
		QUOTA_NL_IHARDWARN - inode hardlimit
		QUOTA_NL_ISOFTLONGWARN - inode softlimit is exceeded longer
		  than given grace period
		QUOTA_NL_ISOFTWARN - inode softlimit
		QUOTA_NL_BHARDWARN - space (block) hardlimit
		QUOTA_NL_BSOFTLONGWARN - space (block) softlimit is exceeded
		  longer than given grace period.
		QUOTA_NL_BSOFTWARN - space (block) softlimit
	  - four warnings are also defined for the event when user stops
	    exceeding some limit:
		QUOTA_NL_IHARDBELOW - inode hardlimit
		QUOTA_NL_ISOFTBELOW - inode softlimit
		QUOTA_NL_BHARDBELOW - space (block) hardlimit
		QUOTA_NL_BSOFTBELOW - space (block) softlimit
        QUOTA_NL_A_DEV_MAJOR (u32)
	  - major number of a device with the affected filesystem
        QUOTA_NL_A_DEV_MINOR (u32)
	  - minor number of a device with the affected filesystem
lt;matthew@wil.cx> 2008-03-07 21:55:58 -0500 committer Matthew Wilcox <willy@linux.intel.com> 2008-04-17 10:42:34 -0400 Generic semaphore implementation' href='/cgit/cgit.cgi/litmus-rt.git/commit/kernel/semaphore.c?h=wip-extra-debug&id=64ac24e738823161693bf791f87adc802cf529ff'>64ac24e73882
00b41ec2611d
bf726eab3711
00b41ec2611d

64ac24e73882





714493cd5468









f06d96865861





00b41ec2611d
bf726eab3711
00b41ec2611d

f06d96865861





64ac24e73882




714493cd5468
64ac24e73882





















714493cd5468









f1241c87a16c





00b41ec2611d
bf726eab3711
00b41ec2611d

f1241c87a16c





714493cd5468






64ac24e73882




00b41ec2611d


64ac24e73882









00b41ec2611d
64ac24e73882


f1241c87a16c


64ac24e73882
f1241c87a16c

64ac24e73882
64ac24e73882


bf726eab3711
00b41ec2611d

64ac24e73882

5b2becc8cffd
00b41ec2611d


64ac24e73882

f1241c87a16c
64ac24e73882
00b41ec2611d

64ac24e73882

00b41ec2611d




64ac24e73882
00b41ec2611d
64ac24e73882



f1241c87a16c
64ac24e73882



f1241c87a16c
64ac24e73882

f06d96865861

f1241c87a16c





f06d96865861

64ac24e73882

b17170b2fac9

00b41ec2611d

b17170b2fac9
64ac24e73882
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263




                                                        



















                                                                         







                            
                         
 

                                                                
                                                           
                                                                        

                                                 










                                                                         




                                             


                                   




                                                  








                                                                             





                                             
                                   
                             

                                                   





                                                  









                                                                            





                                             
                                   
                             

                                              





                                                  




                                                                       
                                                               





















                                                                          









                                                                             





                                                     
                                   
                             

                                                      





                                                  






                                                                      




                                             


                                                









                                                  
               


  


                                                                   
   

                                                                             
 


                                           
                                                     

                           

                  
                                                      


                                         

                                              
                                                    
                                          

                                 

         




                               
                               
                      



                                                          
                                                                       



                                                                       
                                                                            

 

                                                                  





                                                                               

 

                                                        

                                                                               

                                
                                      
 
/*
 * Copyright (c) 2008 Intel Corporation
 * Author: Matthew Wilcox <willy@linux.intel.com>
 *
 * Distributed under the terms of the GNU GPL, version 2
 *
 * This file implements counting semaphores.
 * A counting semaphore may be acquired 'n' times before sleeping.
 * See mutex.c for single-acquisition sleeping locks which enforce
 * rules which allow code to be debugged more easily.
 */

/*
 * Some notes on the implementation:
 *
 * The spinlock controls access to the other members of the semaphore.
 * down_trylock() and up() can be called from interrupt context, so we
 * have to disable interrupts when taking the lock.  It turns out various
 * parts of the kernel expect to be able to use down() on a semaphore in
 * interrupt context when they know it will succeed, so we have to use
 * irqsave variants for down(), down_interruptible() and down_killable()
 * too.
 *
 * The ->count variable represents how many more tasks can acquire this
 * semaphore.  If it's zero, there may be tasks waiting on the wait_list.
 */

#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/spinlock.h>
#include <linux/ftrace.h>

static noinline void __down(struct semaphore *sem);
static noinline int __down_interruptible(struct semaphore *sem);
static noinline int __down_killable(struct semaphore *sem);
static noinline int __down_timeout(struct semaphore *sem, long jiffies);
static noinline void __up(struct semaphore *sem);

/**
 * down - acquire the semaphore
 * @sem: the semaphore to be acquired
 *
 * Acquires the semaphore.  If no more tasks are allowed to acquire the
 * semaphore, calling this function will put the task to sleep until the
 * semaphore is released.
 *
 * Use of this function is deprecated, please use down_interruptible() or
 * down_killable() instead.
 */
void down(struct semaphore *sem)
{
	unsigned long flags;

	spin_lock_irqsave(&sem->lock, flags);
	if (likely(sem->count > 0))
		sem->count--;
	else
		__down(sem);
	spin_unlock_irqrestore(&sem->lock, flags);
}
EXPORT_SYMBOL(down);

/**
 * down_interruptible - acquire the semaphore unless interrupted
 * @sem: the semaphore to be acquired
 *
 * Attempts to acquire the semaphore.  If no more tasks are allowed to
 * acquire the semaphore, calling this function will put the task to sleep.
 * If the sleep is interrupted by a signal, this function will return -EINTR.
 * If the semaphore is successfully acquired, this function returns 0.
 */
int down_interruptible(struct semaphore *sem)
{
	unsigned long flags;
	int result = 0;

	spin_lock_irqsave(&sem->lock, flags);
	if (likely(sem->count > 0))
		sem->count--;
	else
		result = __down_interruptible(sem);
	spin_unlock_irqrestore(&sem->lock, flags);

	return result;
}
EXPORT_SYMBOL(down_interruptible);

/**
 * down_killable - acquire the semaphore unless killed
 * @sem: the semaphore to be acquired
 *
 * Attempts to acquire the semaphore.  If no more tasks are allowed to
 * acquire the semaphore, calling this function will put the task to sleep.
 * If the sleep is interrupted by a fatal signal, this function will return
 * -EINTR.  If the semaphore is successfully acquired, this function returns
 * 0.
 */
int down_killable(struct semaphore *sem)
{
	unsigned long flags;
	int result = 0;

	spin_lock_irqsave(&sem->lock, flags);
	if (likely(sem->count > 0))
		sem->count--;
	else
		result = __down_killable(sem);
	spin_unlock_irqrestore(&sem->lock, flags);

	return result;
}
EXPORT_SYMBOL(down_killable);

/**
 * down_trylock - try to acquire the semaphore, without waiting
 * @sem: the semaphore to be acquired
 *
 * Try to acquire the semaphore atomically.  Returns 0 if the mutex has
 * been acquired successfully or 1 if it it cannot be acquired.
 *
 * NOTE: This return value is inverted from both spin_trylock and
 * mutex_trylock!  Be careful about this when converting code.
 *
 * Unlike mutex_trylock, this function can be used from interrupt context,
 * and the semaphore can be released by any task or interrupt.
 */
int down_trylock(struct semaphore *sem)
{
	unsigned long flags;
	int count;

	spin_lock_irqsave(&sem->lock, flags);
	count = sem->count - 1;
	if (likely(count >= 0))
		sem->count = count;
	spin_unlock_irqrestore(&sem->lock, flags);

	return (count < 0);
}
EXPORT_SYMBOL(down_trylock);

/**
 * down_timeout - acquire the semaphore within a specified time
 * @sem: the semaphore to be acquired
 * @jiffies: how long to wait before failing
 *
 * Attempts to acquire the semaphore.  If no more tasks are allowed to
 * acquire the semaphore, calling this function will put the task to sleep.
 * If the semaphore is not released within the specified number of jiffies,
 * this function returns -ETIME.  It returns 0 if the semaphore was acquired.
 */
int down_timeout(struct semaphore *sem, long jiffies)
{
	unsigned long flags;
	int result = 0;

	spin_lock_irqsave(&sem->lock, flags);
	if (likely(sem->count > 0))
		sem->count--;
	else
		result = __down_timeout(sem, jiffies);
	spin_unlock_irqrestore(&sem->lock, flags);

	return result;
}
EXPORT_SYMBOL(down_timeout);

/**
 * up - release the semaphore
 * @sem: the semaphore to release
 *
 * Release the semaphore.  Unlike mutexes, up() may be called from any
 * context and even by tasks which have never called down().
 */
void up(struct semaphore *sem)
{
	unsigned long flags;

	spin_lock_irqsave(&sem->lock, flags);
	if (likely(list_empty(&sem->wait_list)))
		sem->count++;
	else
		__up(sem);
	spin_unlock_irqrestore(&sem->lock, flags);
}
EXPORT_SYMBOL(up);

/* Functions for the contended case */

struct semaphore_waiter {
	struct list_head list;
	struct task_struct *task;
	int up;
};

/*
 * Because this function is inlined, the 'state' parameter will be
 * constant, and thus optimised away by the compiler.  Likewise the
 * 'timeout' parameter for the cases without timeouts.
 */
static inline int __sched __down_common(struct semaphore *sem, long state,
								long timeout)
{
	struct task_struct *task = current;
	struct semaphore_waiter waiter;

	list_add_tail(&waiter.list, &sem->wait_list);
	waiter.task = task;
	waiter.up = 0;

	for (;;) {
		if (signal_pending_state(state, task))
			goto interrupted;
		if (timeout <= 0)
			goto timed_out;
		__set_task_state(task, state);
		spin_unlock_irq(&sem->lock);
		timeout = schedule_timeout(timeout);
		spin_lock_irq(&sem->lock);
		if (waiter.up)
			return 0;
	}

 timed_out:
	list_del(&waiter.list);
	return -ETIME;

 interrupted:
	list_del(&waiter.list);
	return -EINTR;
}

static noinline void __sched __down(struct semaphore *sem)
{
	__down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}

static noinline int __sched __down_interruptible(struct semaphore *sem)
{
	return __down_common(sem, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}

static noinline int __sched __down_killable(struct semaphore *sem)
{
	return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT);
}

static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
{
	return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
}

static noinline void __sched __up(struct semaphore *sem)
{
	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
						struct semaphore_waiter, list);
	list_del(&waiter->list);
	waiter->up = 1;
	wake_up_process(waiter->task);
}