diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Makefile | 4 | ||||
-rw-r--r-- | fs/afs/afs.h | 23 | ||||
-rw-r--r-- | fs/afs/afs_fs.h | 3 | ||||
-rw-r--r-- | fs/afs/dir.c | 18 | ||||
-rw-r--r-- | fs/afs/file.c | 2 | ||||
-rw-r--r-- | fs/afs/fsclient.c | 298 | ||||
-rw-r--r-- | fs/afs/inode.c | 10 | ||||
-rw-r--r-- | fs/afs/internal.h | 6 | ||||
-rw-r--r-- | fs/afs/super.c | 44 | ||||
-rw-r--r-- | fs/afs/vnode.c | 85 | ||||
-rw-r--r-- | fs/afs/write.c | 5 | ||||
-rw-r--r-- | fs/aio.c | 28 | ||||
-rw-r--r-- | fs/anon_inodes.c | 200 | ||||
-rw-r--r-- | fs/autofs/autofs_i.h | 4 | ||||
-rw-r--r-- | fs/autofs/inode.c | 47 | ||||
-rw-r--r-- | fs/autofs/root.c | 83 | ||||
-rw-r--r-- | fs/autofs4/inode.c | 16 | ||||
-rw-r--r-- | fs/autofs4/root.c | 18 | ||||
-rw-r--r-- | fs/compat.c | 49 | ||||
-rw-r--r-- | fs/eventfd.c | 228 | ||||
-rw-r--r-- | fs/eventpoll.c | 1178 | ||||
-rw-r--r-- | fs/exec.c | 13 | ||||
-rw-r--r-- | fs/mpage.c | 174 | ||||
-rw-r--r-- | fs/partitions/Kconfig | 2 | ||||
-rw-r--r-- | fs/partitions/efi.c | 12 | ||||
-rw-r--r-- | fs/signalfd.c | 349 | ||||
-rw-r--r-- | fs/timerfd.c | 227 |
27 files changed, 2164 insertions, 962 deletions
diff --git a/fs/Makefile b/fs/Makefile index 9edf4112bee0..720c29d57a62 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -22,6 +22,10 @@ endif | |||
22 | obj-$(CONFIG_INOTIFY) += inotify.o | 22 | obj-$(CONFIG_INOTIFY) += inotify.o |
23 | obj-$(CONFIG_INOTIFY_USER) += inotify_user.o | 23 | obj-$(CONFIG_INOTIFY_USER) += inotify_user.o |
24 | obj-$(CONFIG_EPOLL) += eventpoll.o | 24 | obj-$(CONFIG_EPOLL) += eventpoll.o |
25 | obj-$(CONFIG_ANON_INODES) += anon_inodes.o | ||
26 | obj-$(CONFIG_SIGNALFD) += signalfd.o | ||
27 | obj-$(CONFIG_TIMERFD) += timerfd.o | ||
28 | obj-$(CONFIG_EVENTFD) += eventfd.o | ||
25 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o | 29 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o |
26 | 30 | ||
27 | nfsd-$(CONFIG_NFSD) := nfsctl.o | 31 | nfsd-$(CONFIG_NFSD) := nfsctl.o |
diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 52d0752265b8..245257948140 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h | |||
@@ -16,6 +16,9 @@ | |||
16 | 16 | ||
17 | #define AFS_MAXCELLNAME 64 /* maximum length of a cell name */ | 17 | #define AFS_MAXCELLNAME 64 /* maximum length of a cell name */ |
18 | #define AFS_MAXVOLNAME 64 /* maximum length of a volume name */ | 18 | #define AFS_MAXVOLNAME 64 /* maximum length of a volume name */ |
19 | #define AFSNAMEMAX 256 /* maximum length of a filename plus NUL */ | ||
20 | #define AFSPATHMAX 1024 /* maximum length of a pathname plus NUL */ | ||
21 | #define AFSOPAQUEMAX 1024 /* maximum length of an opaque field */ | ||
19 | 22 | ||
20 | typedef unsigned afs_volid_t; | 23 | typedef unsigned afs_volid_t; |
21 | typedef unsigned afs_vnodeid_t; | 24 | typedef unsigned afs_vnodeid_t; |
@@ -143,4 +146,24 @@ struct afs_volsync { | |||
143 | time_t creation; /* volume creation time */ | 146 | time_t creation; /* volume creation time */ |
144 | }; | 147 | }; |
145 | 148 | ||
149 | /* | ||
150 | * AFS volume status record | ||
151 | */ | ||
152 | struct afs_volume_status { | ||
153 | u32 vid; /* volume ID */ | ||
154 | u32 parent_id; /* parent volume ID */ | ||
155 | u8 online; /* true if volume currently online and available */ | ||
156 | u8 in_service; /* true if volume currently in service */ | ||
157 | u8 blessed; /* same as in_service */ | ||
158 | u8 needs_salvage; /* true if consistency checking required */ | ||
159 | u32 type; /* volume type (afs_voltype_t) */ | ||
160 | u32 min_quota; /* minimum space set aside (blocks) */ | ||
161 | u32 max_quota; /* maximum space this volume may occupy (blocks) */ | ||
162 | u32 blocks_in_use; /* space this volume currently occupies (blocks) */ | ||
163 | u32 part_blocks_avail; /* space available in volume's partition */ | ||
164 | u32 part_max_blocks; /* size of volume's partition */ | ||
165 | }; | ||
166 | |||
167 | #define AFS_BLOCK_SIZE 1024 | ||
168 | |||
146 | #endif /* AFS_H */ | 169 | #endif /* AFS_H */ |
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index d963ef4daee8..a18c374ebe08 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h | |||
@@ -28,7 +28,8 @@ enum AFS_FS_Operations { | |||
28 | FSMAKEDIR = 141, /* AFS Create a directory */ | 28 | FSMAKEDIR = 141, /* AFS Create a directory */ |
29 | FSREMOVEDIR = 142, /* AFS Remove a directory */ | 29 | FSREMOVEDIR = 142, /* AFS Remove a directory */ |
30 | FSGIVEUPCALLBACKS = 147, /* AFS Discard callback promises */ | 30 | FSGIVEUPCALLBACKS = 147, /* AFS Discard callback promises */ |
31 | FSGETVOLUMEINFO = 148, /* AFS Get root volume information */ | 31 | FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */ |
32 | FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */ | ||
32 | FSGETROOTVOLUME = 151, /* AFS Get root volume name */ | 33 | FSGETROOTVOLUME = 151, /* AFS Get root volume name */ |
33 | FSLOOKUP = 161, /* AFS lookup file in directory */ | 34 | FSLOOKUP = 161, /* AFS lookup file in directory */ |
34 | FSFETCHDATA64 = 65537, /* AFS Fetch file data */ | 35 | FSFETCHDATA64 = 65537, /* AFS Fetch file data */ |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2fb31276196b..719af4fb15dc 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -497,7 +497,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | |||
497 | 497 | ||
498 | ASSERTCMP(dentry->d_inode, ==, NULL); | 498 | ASSERTCMP(dentry->d_inode, ==, NULL); |
499 | 499 | ||
500 | if (dentry->d_name.len > 255) { | 500 | if (dentry->d_name.len >= AFSNAMEMAX) { |
501 | _leave(" = -ENAMETOOLONG"); | 501 | _leave(" = -ENAMETOOLONG"); |
502 | return ERR_PTR(-ENAMETOOLONG); | 502 | return ERR_PTR(-ENAMETOOLONG); |
503 | } | 503 | } |
@@ -736,7 +736,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
736 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); | 736 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); |
737 | 737 | ||
738 | ret = -ENAMETOOLONG; | 738 | ret = -ENAMETOOLONG; |
739 | if (dentry->d_name.len > 255) | 739 | if (dentry->d_name.len >= AFSNAMEMAX) |
740 | goto error; | 740 | goto error; |
741 | 741 | ||
742 | key = afs_request_key(dvnode->volume->cell); | 742 | key = afs_request_key(dvnode->volume->cell); |
@@ -801,7 +801,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) | |||
801 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); | 801 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); |
802 | 802 | ||
803 | ret = -ENAMETOOLONG; | 803 | ret = -ENAMETOOLONG; |
804 | if (dentry->d_name.len > 255) | 804 | if (dentry->d_name.len >= AFSNAMEMAX) |
805 | goto error; | 805 | goto error; |
806 | 806 | ||
807 | key = afs_request_key(dvnode->volume->cell); | 807 | key = afs_request_key(dvnode->volume->cell); |
@@ -847,7 +847,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) | |||
847 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); | 847 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); |
848 | 848 | ||
849 | ret = -ENAMETOOLONG; | 849 | ret = -ENAMETOOLONG; |
850 | if (dentry->d_name.len > 255) | 850 | if (dentry->d_name.len >= AFSNAMEMAX) |
851 | goto error; | 851 | goto error; |
852 | 852 | ||
853 | key = afs_request_key(dvnode->volume->cell); | 853 | key = afs_request_key(dvnode->volume->cell); |
@@ -921,7 +921,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
921 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); | 921 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); |
922 | 922 | ||
923 | ret = -ENAMETOOLONG; | 923 | ret = -ENAMETOOLONG; |
924 | if (dentry->d_name.len > 255) | 924 | if (dentry->d_name.len >= AFSNAMEMAX) |
925 | goto error; | 925 | goto error; |
926 | 926 | ||
927 | key = afs_request_key(dvnode->volume->cell); | 927 | key = afs_request_key(dvnode->volume->cell); |
@@ -990,7 +990,7 @@ static int afs_link(struct dentry *from, struct inode *dir, | |||
990 | dentry->d_name.name); | 990 | dentry->d_name.name); |
991 | 991 | ||
992 | ret = -ENAMETOOLONG; | 992 | ret = -ENAMETOOLONG; |
993 | if (dentry->d_name.len > 255) | 993 | if (dentry->d_name.len >= AFSNAMEMAX) |
994 | goto error; | 994 | goto error; |
995 | 995 | ||
996 | key = afs_request_key(dvnode->volume->cell); | 996 | key = afs_request_key(dvnode->volume->cell); |
@@ -1038,11 +1038,11 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, | |||
1038 | content); | 1038 | content); |
1039 | 1039 | ||
1040 | ret = -ENAMETOOLONG; | 1040 | ret = -ENAMETOOLONG; |
1041 | if (dentry->d_name.len > 255) | 1041 | if (dentry->d_name.len >= AFSNAMEMAX) |
1042 | goto error; | 1042 | goto error; |
1043 | 1043 | ||
1044 | ret = -EINVAL; | 1044 | ret = -EINVAL; |
1045 | if (strlen(content) > 1023) | 1045 | if (strlen(content) >= AFSPATHMAX) |
1046 | goto error; | 1046 | goto error; |
1047 | 1047 | ||
1048 | key = afs_request_key(dvnode->volume->cell); | 1048 | key = afs_request_key(dvnode->volume->cell); |
@@ -1112,7 +1112,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1112 | new_dentry->d_name.name); | 1112 | new_dentry->d_name.name); |
1113 | 1113 | ||
1114 | ret = -ENAMETOOLONG; | 1114 | ret = -ENAMETOOLONG; |
1115 | if (new_dentry->d_name.len > 255) | 1115 | if (new_dentry->d_name.len >= AFSNAMEMAX) |
1116 | goto error; | 1116 | goto error; |
1117 | 1117 | ||
1118 | key = afs_request_key(orig_dvnode->volume->cell); | 1118 | key = afs_request_key(orig_dvnode->volume->cell); |
diff --git a/fs/afs/file.c b/fs/afs/file.c index 3e25795e5a42..9c0e721d9fc2 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -236,7 +236,7 @@ static void afs_invalidatepage(struct page *page, unsigned long offset) | |||
236 | { | 236 | { |
237 | int ret = 1; | 237 | int ret = 1; |
238 | 238 | ||
239 | kenter("{%lu},%lu", page->index, offset); | 239 | _enter("{%lu},%lu", page->index, offset); |
240 | 240 | ||
241 | BUG_ON(!PageLocked(page)); | 241 | BUG_ON(!PageLocked(page)); |
242 | 242 | ||
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 56cc0efa2a0c..5dff1308b6f0 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c | |||
@@ -202,6 +202,29 @@ static void xdr_encode_AFS_StoreStatus(__be32 **_bp, struct iattr *attr) | |||
202 | } | 202 | } |
203 | 203 | ||
204 | /* | 204 | /* |
205 | * decode an AFSFetchVolumeStatus block | ||
206 | */ | ||
207 | static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, | ||
208 | struct afs_volume_status *vs) | ||
209 | { | ||
210 | const __be32 *bp = *_bp; | ||
211 | |||
212 | vs->vid = ntohl(*bp++); | ||
213 | vs->parent_id = ntohl(*bp++); | ||
214 | vs->online = ntohl(*bp++); | ||
215 | vs->in_service = ntohl(*bp++); | ||
216 | vs->blessed = ntohl(*bp++); | ||
217 | vs->needs_salvage = ntohl(*bp++); | ||
218 | vs->type = ntohl(*bp++); | ||
219 | vs->min_quota = ntohl(*bp++); | ||
220 | vs->max_quota = ntohl(*bp++); | ||
221 | vs->blocks_in_use = ntohl(*bp++); | ||
222 | vs->part_blocks_avail = ntohl(*bp++); | ||
223 | vs->part_max_blocks = ntohl(*bp++); | ||
224 | *_bp = bp; | ||
225 | } | ||
226 | |||
227 | /* | ||
205 | * deliver reply data to an FS.FetchStatus | 228 | * deliver reply data to an FS.FetchStatus |
206 | */ | 229 | */ |
207 | static int afs_deliver_fs_fetch_status(struct afs_call *call, | 230 | static int afs_deliver_fs_fetch_status(struct afs_call *call, |
@@ -1450,3 +1473,278 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, | |||
1450 | 1473 | ||
1451 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | 1474 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); |
1452 | } | 1475 | } |
1476 | |||
1477 | /* | ||
1478 | * deliver reply data to an FS.GetVolumeStatus | ||
1479 | */ | ||
1480 | static int afs_deliver_fs_get_volume_status(struct afs_call *call, | ||
1481 | struct sk_buff *skb, bool last) | ||
1482 | { | ||
1483 | const __be32 *bp; | ||
1484 | char *p; | ||
1485 | int ret; | ||
1486 | |||
1487 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); | ||
1488 | |||
1489 | switch (call->unmarshall) { | ||
1490 | case 0: | ||
1491 | call->offset = 0; | ||
1492 | call->unmarshall++; | ||
1493 | |||
1494 | /* extract the returned status record */ | ||
1495 | case 1: | ||
1496 | _debug("extract status"); | ||
1497 | ret = afs_extract_data(call, skb, last, call->buffer, | ||
1498 | 12 * 4); | ||
1499 | switch (ret) { | ||
1500 | case 0: break; | ||
1501 | case -EAGAIN: return 0; | ||
1502 | default: return ret; | ||
1503 | } | ||
1504 | |||
1505 | bp = call->buffer; | ||
1506 | xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2); | ||
1507 | call->offset = 0; | ||
1508 | call->unmarshall++; | ||
1509 | |||
1510 | /* extract the volume name length */ | ||
1511 | case 2: | ||
1512 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); | ||
1513 | switch (ret) { | ||
1514 | case 0: break; | ||
1515 | case -EAGAIN: return 0; | ||
1516 | default: return ret; | ||
1517 | } | ||
1518 | |||
1519 | call->count = ntohl(call->tmp); | ||
1520 | _debug("volname length: %u", call->count); | ||
1521 | if (call->count >= AFSNAMEMAX) | ||
1522 | return -EBADMSG; | ||
1523 | call->offset = 0; | ||
1524 | call->unmarshall++; | ||
1525 | |||
1526 | /* extract the volume name */ | ||
1527 | case 3: | ||
1528 | _debug("extract volname"); | ||
1529 | if (call->count > 0) { | ||
1530 | ret = afs_extract_data(call, skb, last, call->reply3, | ||
1531 | call->count); | ||
1532 | switch (ret) { | ||
1533 | case 0: break; | ||
1534 | case -EAGAIN: return 0; | ||
1535 | default: return ret; | ||
1536 | } | ||
1537 | } | ||
1538 | |||
1539 | p = call->reply3; | ||
1540 | p[call->count] = 0; | ||
1541 | _debug("volname '%s'", p); | ||
1542 | |||
1543 | call->offset = 0; | ||
1544 | call->unmarshall++; | ||
1545 | |||
1546 | /* extract the volume name padding */ | ||
1547 | if ((call->count & 3) == 0) { | ||
1548 | call->unmarshall++; | ||
1549 | goto no_volname_padding; | ||
1550 | } | ||
1551 | call->count = 4 - (call->count & 3); | ||
1552 | |||
1553 | case 4: | ||
1554 | ret = afs_extract_data(call, skb, last, call->buffer, | ||
1555 | call->count); | ||
1556 | switch (ret) { | ||
1557 | case 0: break; | ||
1558 | case -EAGAIN: return 0; | ||
1559 | default: return ret; | ||
1560 | } | ||
1561 | |||
1562 | call->offset = 0; | ||
1563 | call->unmarshall++; | ||
1564 | no_volname_padding: | ||
1565 | |||
1566 | /* extract the offline message length */ | ||
1567 | case 5: | ||
1568 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); | ||
1569 | switch (ret) { | ||
1570 | case 0: break; | ||
1571 | case -EAGAIN: return 0; | ||
1572 | default: return ret; | ||
1573 | } | ||
1574 | |||
1575 | call->count = ntohl(call->tmp); | ||
1576 | _debug("offline msg length: %u", call->count); | ||
1577 | if (call->count >= AFSNAMEMAX) | ||
1578 | return -EBADMSG; | ||
1579 | call->offset = 0; | ||
1580 | call->unmarshall++; | ||
1581 | |||
1582 | /* extract the offline message */ | ||
1583 | case 6: | ||
1584 | _debug("extract offline"); | ||
1585 | if (call->count > 0) { | ||
1586 | ret = afs_extract_data(call, skb, last, call->reply3, | ||
1587 | call->count); | ||
1588 | switch (ret) { | ||
1589 | case 0: break; | ||
1590 | case -EAGAIN: return 0; | ||
1591 | default: return ret; | ||
1592 | } | ||
1593 | } | ||
1594 | |||
1595 | p = call->reply3; | ||
1596 | p[call->count] = 0; | ||
1597 | _debug("offline '%s'", p); | ||
1598 | |||
1599 | call->offset = 0; | ||
1600 | call->unmarshall++; | ||
1601 | |||
1602 | /* extract the offline message padding */ | ||
1603 | if ((call->count & 3) == 0) { | ||
1604 | call->unmarshall++; | ||
1605 | goto no_offline_padding; | ||
1606 | } | ||
1607 | call->count = 4 - (call->count & 3); | ||
1608 | |||
1609 | case 7: | ||
1610 | ret = afs_extract_data(call, skb, last, call->buffer, | ||
1611 | call->count); | ||
1612 | switch (ret) { | ||
1613 | case 0: break; | ||
1614 | case -EAGAIN: return 0; | ||
1615 | default: return ret; | ||
1616 | } | ||
1617 | |||
1618 | call->offset = 0; | ||
1619 | call->unmarshall++; | ||
1620 | no_offline_padding: | ||
1621 | |||
1622 | /* extract the message of the day length */ | ||
1623 | case 8: | ||
1624 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); | ||
1625 | switch (ret) { | ||
1626 | case 0: break; | ||
1627 | case -EAGAIN: return 0; | ||
1628 | default: return ret; | ||
1629 | } | ||
1630 | |||
1631 | call->count = ntohl(call->tmp); | ||
1632 | _debug("motd length: %u", call->count); | ||
1633 | if (call->count >= AFSNAMEMAX) | ||
1634 | return -EBADMSG; | ||
1635 | call->offset = 0; | ||
1636 | call->unmarshall++; | ||
1637 | |||
1638 | /* extract the message of the day */ | ||
1639 | case 9: | ||
1640 | _debug("extract motd"); | ||
1641 | if (call->count > 0) { | ||
1642 | ret = afs_extract_data(call, skb, last, call->reply3, | ||
1643 | call->count); | ||
1644 | switch (ret) { | ||
1645 | case 0: break; | ||
1646 | case -EAGAIN: return 0; | ||
1647 | default: return ret; | ||
1648 | } | ||
1649 | } | ||
1650 | |||
1651 | p = call->reply3; | ||
1652 | p[call->count] = 0; | ||
1653 | _debug("motd '%s'", p); | ||
1654 | |||
1655 | call->offset = 0; | ||
1656 | call->unmarshall++; | ||
1657 | |||
1658 | /* extract the message of the day padding */ | ||
1659 | if ((call->count & 3) == 0) { | ||
1660 | call->unmarshall++; | ||
1661 | goto no_motd_padding; | ||
1662 | } | ||
1663 | call->count = 4 - (call->count & 3); | ||
1664 | |||
1665 | case 10: | ||
1666 | ret = afs_extract_data(call, skb, last, call->buffer, | ||
1667 | call->count); | ||
1668 | switch (ret) { | ||
1669 | case 0: break; | ||
1670 | case -EAGAIN: return 0; | ||
1671 | default: return ret; | ||
1672 | } | ||
1673 | |||
1674 | call->offset = 0; | ||
1675 | call->unmarshall++; | ||
1676 | no_motd_padding: | ||
1677 | |||
1678 | case 11: | ||
1679 | _debug("trailer %d", skb->len); | ||
1680 | if (skb->len != 0) | ||
1681 | return -EBADMSG; | ||
1682 | break; | ||
1683 | } | ||
1684 | |||
1685 | if (!last) | ||
1686 | return 0; | ||
1687 | |||
1688 | _leave(" = 0 [done]"); | ||
1689 | return 0; | ||
1690 | } | ||
1691 | |||
1692 | /* | ||
1693 | * destroy an FS.GetVolumeStatus call | ||
1694 | */ | ||
1695 | static void afs_get_volume_status_call_destructor(struct afs_call *call) | ||
1696 | { | ||
1697 | kfree(call->reply3); | ||
1698 | call->reply3 = NULL; | ||
1699 | afs_flat_call_destructor(call); | ||
1700 | } | ||
1701 | |||
1702 | /* | ||
1703 | * FS.GetVolumeStatus operation type | ||
1704 | */ | ||
1705 | static const struct afs_call_type afs_RXFSGetVolumeStatus = { | ||
1706 | .name = "FS.GetVolumeStatus", | ||
1707 | .deliver = afs_deliver_fs_get_volume_status, | ||
1708 | .abort_to_error = afs_abort_to_error, | ||
1709 | .destructor = afs_get_volume_status_call_destructor, | ||
1710 | }; | ||
1711 | |||
1712 | /* | ||
1713 | * fetch the status of a volume | ||
1714 | */ | ||
1715 | int afs_fs_get_volume_status(struct afs_server *server, | ||
1716 | struct key *key, | ||
1717 | struct afs_vnode *vnode, | ||
1718 | struct afs_volume_status *vs, | ||
1719 | const struct afs_wait_mode *wait_mode) | ||
1720 | { | ||
1721 | struct afs_call *call; | ||
1722 | __be32 *bp; | ||
1723 | void *tmpbuf; | ||
1724 | |||
1725 | _enter(""); | ||
1726 | |||
1727 | tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL); | ||
1728 | if (!tmpbuf) | ||
1729 | return -ENOMEM; | ||
1730 | |||
1731 | call = afs_alloc_flat_call(&afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4); | ||
1732 | if (!call) { | ||
1733 | kfree(tmpbuf); | ||
1734 | return -ENOMEM; | ||
1735 | } | ||
1736 | |||
1737 | call->key = key; | ||
1738 | call->reply = vnode; | ||
1739 | call->reply2 = vs; | ||
1740 | call->reply3 = tmpbuf; | ||
1741 | call->service_id = FS_SERVICE; | ||
1742 | call->port = htons(AFS_FS_PORT); | ||
1743 | |||
1744 | /* marshall the parameters */ | ||
1745 | bp = call->request; | ||
1746 | bp[0] = htonl(FSGETVOLUMESTATUS); | ||
1747 | bp[1] = htonl(vnode->fid.vid); | ||
1748 | |||
1749 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | ||
1750 | } | ||
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 515a5d12d8fb..47f5fed7195d 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
@@ -209,11 +209,15 @@ bad_inode: | |||
209 | */ | 209 | */ |
210 | void afs_zap_data(struct afs_vnode *vnode) | 210 | void afs_zap_data(struct afs_vnode *vnode) |
211 | { | 211 | { |
212 | _enter("zap data {%x:%u}", vnode->fid.vid, vnode->fid.vnode); | 212 | _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); |
213 | 213 | ||
214 | /* nuke all the non-dirty pages that aren't locked, mapped or being | 214 | /* nuke all the non-dirty pages that aren't locked, mapped or being |
215 | * written back */ | 215 | * written back in a regular file and completely discard the pages in a |
216 | invalidate_remote_inode(&vnode->vfs_inode); | 216 | * directory or symlink */ |
217 | if (S_ISREG(vnode->vfs_inode.i_mode)) | ||
218 | invalidate_remote_inode(&vnode->vfs_inode); | ||
219 | else | ||
220 | invalidate_inode_pages2(vnode->vfs_inode.i_mapping); | ||
217 | } | 221 | } |
218 | 222 | ||
219 | /* | 223 | /* |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a30d4fa768e3..4953ba5a6f44 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -506,6 +506,10 @@ extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *, | |||
506 | extern int afs_fs_setattr(struct afs_server *, struct key *, | 506 | extern int afs_fs_setattr(struct afs_server *, struct key *, |
507 | struct afs_vnode *, struct iattr *, | 507 | struct afs_vnode *, struct iattr *, |
508 | const struct afs_wait_mode *); | 508 | const struct afs_wait_mode *); |
509 | extern int afs_fs_get_volume_status(struct afs_server *, struct key *, | ||
510 | struct afs_vnode *, | ||
511 | struct afs_volume_status *, | ||
512 | const struct afs_wait_mode *); | ||
509 | 513 | ||
510 | /* | 514 | /* |
511 | * inode.c | 515 | * inode.c |
@@ -672,6 +676,8 @@ extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *, | |||
672 | extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t, | 676 | extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t, |
673 | unsigned, unsigned); | 677 | unsigned, unsigned); |
674 | extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); | 678 | extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); |
679 | extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *, | ||
680 | struct afs_volume_status *); | ||
675 | 681 | ||
676 | /* | 682 | /* |
677 | * volume.c | 683 | * volume.c |
diff --git a/fs/afs/super.c b/fs/afs/super.c index d24be334b608..579af632c8e8 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -21,22 +21,20 @@ | |||
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/parser.h> | 23 | #include <linux/parser.h> |
24 | #include <linux/statfs.h> | ||
24 | #include "internal.h" | 25 | #include "internal.h" |
25 | 26 | ||
26 | #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ | 27 | #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ |
27 | 28 | ||
28 | static void afs_i_init_once(void *foo, struct kmem_cache *cachep, | 29 | static void afs_i_init_once(void *foo, struct kmem_cache *cachep, |
29 | unsigned long flags); | 30 | unsigned long flags); |
30 | |||
31 | static int afs_get_sb(struct file_system_type *fs_type, | 31 | static int afs_get_sb(struct file_system_type *fs_type, |
32 | int flags, const char *dev_name, | 32 | int flags, const char *dev_name, |
33 | void *data, struct vfsmount *mnt); | 33 | void *data, struct vfsmount *mnt); |
34 | |||
35 | static struct inode *afs_alloc_inode(struct super_block *sb); | 34 | static struct inode *afs_alloc_inode(struct super_block *sb); |
36 | |||
37 | static void afs_put_super(struct super_block *sb); | 35 | static void afs_put_super(struct super_block *sb); |
38 | |||
39 | static void afs_destroy_inode(struct inode *inode); | 36 | static void afs_destroy_inode(struct inode *inode); |
37 | static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); | ||
40 | 38 | ||
41 | struct file_system_type afs_fs_type = { | 39 | struct file_system_type afs_fs_type = { |
42 | .owner = THIS_MODULE, | 40 | .owner = THIS_MODULE, |
@@ -47,7 +45,7 @@ struct file_system_type afs_fs_type = { | |||
47 | }; | 45 | }; |
48 | 46 | ||
49 | static const struct super_operations afs_super_ops = { | 47 | static const struct super_operations afs_super_ops = { |
50 | .statfs = simple_statfs, | 48 | .statfs = afs_statfs, |
51 | .alloc_inode = afs_alloc_inode, | 49 | .alloc_inode = afs_alloc_inode, |
52 | .drop_inode = generic_delete_inode, | 50 | .drop_inode = generic_delete_inode, |
53 | .write_inode = afs_write_inode, | 51 | .write_inode = afs_write_inode, |
@@ -488,6 +486,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb) | |||
488 | vnode->flags = 1 << AFS_VNODE_UNSET; | 486 | vnode->flags = 1 << AFS_VNODE_UNSET; |
489 | vnode->cb_promised = false; | 487 | vnode->cb_promised = false; |
490 | 488 | ||
489 | _leave(" = %p", &vnode->vfs_inode); | ||
491 | return &vnode->vfs_inode; | 490 | return &vnode->vfs_inode; |
492 | } | 491 | } |
493 | 492 | ||
@@ -498,7 +497,7 @@ static void afs_destroy_inode(struct inode *inode) | |||
498 | { | 497 | { |
499 | struct afs_vnode *vnode = AFS_FS_I(inode); | 498 | struct afs_vnode *vnode = AFS_FS_I(inode); |
500 | 499 | ||
501 | _enter("{%lu}", inode->i_ino); | 500 | _enter("%p{%x:%u}", inode, vnode->fid.vid, vnode->fid.vnode); |
502 | 501 | ||
503 | _debug("DESTROY INODE %p", inode); | 502 | _debug("DESTROY INODE %p", inode); |
504 | 503 | ||
@@ -507,3 +506,36 @@ static void afs_destroy_inode(struct inode *inode) | |||
507 | kmem_cache_free(afs_inode_cachep, vnode); | 506 | kmem_cache_free(afs_inode_cachep, vnode); |
508 | atomic_dec(&afs_count_active_inodes); | 507 | atomic_dec(&afs_count_active_inodes); |
509 | } | 508 | } |
509 | |||
510 | /* | ||
511 | * return information about an AFS volume | ||
512 | */ | ||
513 | static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
514 | { | ||
515 | struct afs_volume_status vs; | ||
516 | struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); | ||
517 | struct key *key; | ||
518 | int ret; | ||
519 | |||
520 | key = afs_request_key(vnode->volume->cell); | ||
521 | if (IS_ERR(key)) | ||
522 | return PTR_ERR(key); | ||
523 | |||
524 | ret = afs_vnode_get_volume_status(vnode, key, &vs); | ||
525 | key_put(key); | ||
526 | if (ret < 0) { | ||
527 | _leave(" = %d", ret); | ||
528 | return ret; | ||
529 | } | ||
530 | |||
531 | buf->f_type = dentry->d_sb->s_magic; | ||
532 | buf->f_bsize = AFS_BLOCK_SIZE; | ||
533 | buf->f_namelen = AFSNAMEMAX - 1; | ||
534 | |||
535 | if (vs.max_quota == 0) | ||
536 | buf->f_blocks = vs.part_max_blocks; | ||
537 | else | ||
538 | buf->f_blocks = vs.max_quota; | ||
539 | buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use; | ||
540 | return 0; | ||
541 | } | ||
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index ec814660209f..c36c98ce2c3c 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c | |||
@@ -175,24 +175,33 @@ static void afs_vnode_deleted_remotely(struct afs_vnode *vnode) | |||
175 | { | 175 | { |
176 | struct afs_server *server; | 176 | struct afs_server *server; |
177 | 177 | ||
178 | _enter("{%p}", vnode->server); | ||
179 | |||
178 | set_bit(AFS_VNODE_DELETED, &vnode->flags); | 180 | set_bit(AFS_VNODE_DELETED, &vnode->flags); |
179 | 181 | ||
180 | server = vnode->server; | 182 | server = vnode->server; |
181 | if (vnode->cb_promised) { | 183 | if (server) { |
182 | spin_lock(&server->cb_lock); | ||
183 | if (vnode->cb_promised) { | 184 | if (vnode->cb_promised) { |
184 | rb_erase(&vnode->cb_promise, &server->cb_promises); | 185 | spin_lock(&server->cb_lock); |
185 | vnode->cb_promised = false; | 186 | if (vnode->cb_promised) { |
187 | rb_erase(&vnode->cb_promise, | ||
188 | &server->cb_promises); | ||
189 | vnode->cb_promised = false; | ||
190 | } | ||
191 | spin_unlock(&server->cb_lock); | ||
186 | } | 192 | } |
187 | spin_unlock(&server->cb_lock); | ||
188 | } | ||
189 | 193 | ||
190 | spin_lock(&vnode->server->fs_lock); | 194 | spin_lock(&server->fs_lock); |
191 | rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes); | 195 | rb_erase(&vnode->server_rb, &server->fs_vnodes); |
192 | spin_unlock(&vnode->server->fs_lock); | 196 | spin_unlock(&server->fs_lock); |
193 | 197 | ||
194 | vnode->server = NULL; | 198 | vnode->server = NULL; |
195 | afs_put_server(server); | 199 | afs_put_server(server); |
200 | } else { | ||
201 | ASSERT(!vnode->cb_promised); | ||
202 | } | ||
203 | |||
204 | _leave(""); | ||
196 | } | 205 | } |
197 | 206 | ||
198 | /* | 207 | /* |
@@ -225,7 +234,7 @@ void afs_vnode_finalise_status_update(struct afs_vnode *vnode, | |||
225 | */ | 234 | */ |
226 | static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret) | 235 | static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret) |
227 | { | 236 | { |
228 | _enter("%p,%d", vnode, ret); | 237 | _enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, ret); |
229 | 238 | ||
230 | spin_lock(&vnode->lock); | 239 | spin_lock(&vnode->lock); |
231 | 240 | ||
@@ -860,3 +869,55 @@ no_server: | |||
860 | spin_unlock(&vnode->lock); | 869 | spin_unlock(&vnode->lock); |
861 | return PTR_ERR(server); | 870 | return PTR_ERR(server); |
862 | } | 871 | } |
872 | |||
873 | /* | ||
874 | * get the status of a volume | ||
875 | */ | ||
876 | int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, | ||
877 | struct afs_volume_status *vs) | ||
878 | { | ||
879 | struct afs_server *server; | ||
880 | int ret; | ||
881 | |||
882 | _enter("%s{%x:%u.%u},%x,", | ||
883 | vnode->volume->vlocation->vldb.name, | ||
884 | vnode->fid.vid, | ||
885 | vnode->fid.vnode, | ||
886 | vnode->fid.unique, | ||
887 | key_serial(key)); | ||
888 | |||
889 | /* this op will fetch the status */ | ||
890 | spin_lock(&vnode->lock); | ||
891 | vnode->update_cnt++; | ||
892 | spin_unlock(&vnode->lock); | ||
893 | |||
894 | do { | ||
895 | /* pick a server to query */ | ||
896 | server = afs_volume_pick_fileserver(vnode); | ||
897 | if (IS_ERR(server)) | ||
898 | goto no_server; | ||
899 | |||
900 | _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); | ||
901 | |||
902 | ret = afs_fs_get_volume_status(server, key, vnode, vs, &afs_sync_call); | ||
903 | |||
904 | } while (!afs_volume_release_fileserver(vnode, server, ret)); | ||
905 | |||
906 | /* adjust the flags */ | ||
907 | if (ret == 0) { | ||
908 | afs_vnode_finalise_status_update(vnode, server); | ||
909 | afs_put_server(server); | ||
910 | } else { | ||
911 | afs_vnode_status_update_failed(vnode, ret); | ||
912 | } | ||
913 | |||
914 | _leave(" = %d", ret); | ||
915 | return ret; | ||
916 | |||
917 | no_server: | ||
918 | spin_lock(&vnode->lock); | ||
919 | vnode->update_cnt--; | ||
920 | ASSERTCMP(vnode->update_cnt, >=, 0); | ||
921 | spin_unlock(&vnode->lock); | ||
922 | return PTR_ERR(server); | ||
923 | } | ||
diff --git a/fs/afs/write.c b/fs/afs/write.c index 67ae4dbf66b3..28f37516c126 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -395,8 +395,9 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, | |||
395 | if (n == 0) | 395 | if (n == 0) |
396 | goto no_more; | 396 | goto no_more; |
397 | if (pages[0]->index != start) { | 397 | if (pages[0]->index != start) { |
398 | for (n--; n >= 0; n--) | 398 | do { |
399 | put_page(pages[n]); | 399 | put_page(pages[--n]); |
400 | } while (n > 0); | ||
400 | goto no_more; | 401 | goto no_more; |
401 | } | 402 | } |
402 | 403 | ||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
31 | #include <linux/workqueue.h> | 31 | #include <linux/workqueue.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/eventfd.h> | ||
33 | 34 | ||
34 | #include <asm/kmap_types.h> | 35 | #include <asm/kmap_types.h> |
35 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
@@ -417,6 +418,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) | |||
417 | req->private = NULL; | 418 | req->private = NULL; |
418 | req->ki_iovec = NULL; | 419 | req->ki_iovec = NULL; |
419 | INIT_LIST_HEAD(&req->ki_run_list); | 420 | INIT_LIST_HEAD(&req->ki_run_list); |
421 | req->ki_eventfd = ERR_PTR(-EINVAL); | ||
420 | 422 | ||
421 | /* Check if the completion queue has enough free space to | 423 | /* Check if the completion queue has enough free space to |
422 | * accept an event from this io. | 424 | * accept an event from this io. |
@@ -458,6 +460,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) | |||
458 | { | 460 | { |
459 | assert_spin_locked(&ctx->ctx_lock); | 461 | assert_spin_locked(&ctx->ctx_lock); |
460 | 462 | ||
463 | if (!IS_ERR(req->ki_eventfd)) | ||
464 | fput(req->ki_eventfd); | ||
461 | if (req->ki_dtor) | 465 | if (req->ki_dtor) |
462 | req->ki_dtor(req); | 466 | req->ki_dtor(req); |
463 | if (req->ki_iovec != &req->ki_inline_vec) | 467 | if (req->ki_iovec != &req->ki_inline_vec) |
@@ -942,6 +946,14 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2) | |||
942 | return 1; | 946 | return 1; |
943 | } | 947 | } |
944 | 948 | ||
949 | /* | ||
950 | * Check if the user asked us to deliver the result through an | ||
951 | * eventfd. The eventfd_signal() function is safe to be called | ||
952 | * from IRQ context. | ||
953 | */ | ||
954 | if (!IS_ERR(iocb->ki_eventfd)) | ||
955 | eventfd_signal(iocb->ki_eventfd, 1); | ||
956 | |||
945 | info = &ctx->ring_info; | 957 | info = &ctx->ring_info; |
946 | 958 | ||
947 | /* add a completion event to the ring buffer. | 959 | /* add a completion event to the ring buffer. |
@@ -1526,8 +1538,7 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1526 | ssize_t ret; | 1538 | ssize_t ret; |
1527 | 1539 | ||
1528 | /* enforce forwards compatibility on users */ | 1540 | /* enforce forwards compatibility on users */ |
1529 | if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2 || | 1541 | if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) { |
1530 | iocb->aio_reserved3)) { | ||
1531 | pr_debug("EINVAL: io_submit: reserve field set\n"); | 1542 | pr_debug("EINVAL: io_submit: reserve field set\n"); |
1532 | return -EINVAL; | 1543 | return -EINVAL; |
1533 | } | 1544 | } |
@@ -1551,6 +1562,19 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1551 | fput(file); | 1562 | fput(file); |
1552 | return -EAGAIN; | 1563 | return -EAGAIN; |
1553 | } | 1564 | } |
1565 | if (iocb->aio_flags & IOCB_FLAG_RESFD) { | ||
1566 | /* | ||
1567 | * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an | ||
1568 | * instance of the file* now. The file descriptor must be | ||
1569 | * an eventfd() fd, and will be signaled for each completed | ||
1570 | * event using the eventfd_signal() function. | ||
1571 | */ | ||
1572 | req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); | ||
1573 | if (unlikely(IS_ERR(req->ki_eventfd))) { | ||
1574 | ret = PTR_ERR(req->ki_eventfd); | ||
1575 | goto out_put_req; | ||
1576 | } | ||
1577 | } | ||
1554 | 1578 | ||
1555 | req->ki_filp = file; | 1579 | req->ki_filp = file; |
1556 | ret = put_user(req->ki_key, &user_iocb->aio_key); | 1580 | ret = put_user(req->ki_key, &user_iocb->aio_key); |
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c new file mode 100644 index 000000000000..40fe3a3222e4 --- /dev/null +++ b/fs/anon_inodes.c | |||
@@ -0,0 +1,200 @@ | |||
1 | /* | ||
2 | * fs/anon_inodes.c | ||
3 | * | ||
4 | * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> | ||
5 | * | ||
6 | * Thanks to Arnd Bergmann for code review and suggestions. | ||
7 | * More changes for Thomas Gleixner suggestions. | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/file.h> | ||
12 | #include <linux/poll.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/mount.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/magic.h> | ||
20 | #include <linux/anon_inodes.h> | ||
21 | |||
22 | #include <asm/uaccess.h> | ||
23 | |||
24 | static struct vfsmount *anon_inode_mnt __read_mostly; | ||
25 | static struct inode *anon_inode_inode; | ||
26 | static const struct file_operations anon_inode_fops; | ||
27 | |||
28 | static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags, | ||
29 | const char *dev_name, void *data, | ||
30 | struct vfsmount *mnt) | ||
31 | { | ||
32 | return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC, | ||
33 | mnt); | ||
34 | } | ||
35 | |||
36 | static int anon_inodefs_delete_dentry(struct dentry *dentry) | ||
37 | { | ||
38 | /* | ||
39 | * We faked vfs to believe the dentry was hashed when we created it. | ||
40 | * Now we restore the flag so that dput() will work correctly. | ||
41 | */ | ||
42 | dentry->d_flags |= DCACHE_UNHASHED; | ||
43 | return 1; | ||
44 | } | ||
45 | |||
46 | static struct file_system_type anon_inode_fs_type = { | ||
47 | .name = "anon_inodefs", | ||
48 | .get_sb = anon_inodefs_get_sb, | ||
49 | .kill_sb = kill_anon_super, | ||
50 | }; | ||
51 | static struct dentry_operations anon_inodefs_dentry_operations = { | ||
52 | .d_delete = anon_inodefs_delete_dentry, | ||
53 | }; | ||
54 | |||
55 | /** | ||
56 | * anon_inode_getfd - creates a new file instance by hooking it up to and | ||
57 | * anonymous inode, and a dentry that describe the "class" | ||
58 | * of the file | ||
59 | * | ||
60 | * @pfd: [out] pointer to the file descriptor | ||
61 | * @dpinode: [out] pointer to the inode | ||
62 | * @pfile: [out] pointer to the file struct | ||
63 | * @name: [in] name of the "class" of the new file | ||
64 | * @fops [in] file operations for the new file | ||
65 | * @priv [in] private data for the new file (will be file's private_data) | ||
66 | * | ||
67 | * Creates a new file by hooking it on a single inode. This is useful for files | ||
68 | * that do not need to have a full-fledged inode in order to operate correctly. | ||
69 | * All the files created with anon_inode_getfd() will share a single inode, by | ||
70 | * hence saving memory and avoiding code duplication for the file/inode/dentry | ||
71 | * setup. | ||
72 | */ | ||
73 | int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, | ||
74 | const char *name, const struct file_operations *fops, | ||
75 | void *priv) | ||
76 | { | ||
77 | struct qstr this; | ||
78 | struct dentry *dentry; | ||
79 | struct inode *inode; | ||
80 | struct file *file; | ||
81 | int error, fd; | ||
82 | |||
83 | if (IS_ERR(anon_inode_inode)) | ||
84 | return -ENODEV; | ||
85 | file = get_empty_filp(); | ||
86 | if (!file) | ||
87 | return -ENFILE; | ||
88 | |||
89 | inode = igrab(anon_inode_inode); | ||
90 | if (IS_ERR(inode)) { | ||
91 | error = PTR_ERR(inode); | ||
92 | goto err_put_filp; | ||
93 | } | ||
94 | |||
95 | error = get_unused_fd(); | ||
96 | if (error < 0) | ||
97 | goto err_iput; | ||
98 | fd = error; | ||
99 | |||
100 | /* | ||
101 | * Link the inode to a directory entry by creating a unique name | ||
102 | * using the inode sequence number. | ||
103 | */ | ||
104 | error = -ENOMEM; | ||
105 | this.name = name; | ||
106 | this.len = strlen(name); | ||
107 | this.hash = 0; | ||
108 | dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); | ||
109 | if (!dentry) | ||
110 | goto err_put_unused_fd; | ||
111 | dentry->d_op = &anon_inodefs_dentry_operations; | ||
112 | /* Do not publish this dentry inside the global dentry hash table */ | ||
113 | dentry->d_flags &= ~DCACHE_UNHASHED; | ||
114 | d_instantiate(dentry, inode); | ||
115 | |||
116 | file->f_path.mnt = mntget(anon_inode_mnt); | ||
117 | file->f_path.dentry = dentry; | ||
118 | file->f_mapping = inode->i_mapping; | ||
119 | |||
120 | file->f_pos = 0; | ||
121 | file->f_flags = O_RDWR; | ||
122 | file->f_op = fops; | ||
123 | file->f_mode = FMODE_READ | FMODE_WRITE; | ||
124 | file->f_version = 0; | ||
125 | file->private_data = priv; | ||
126 | |||
127 | fd_install(fd, file); | ||
128 | |||
129 | *pfd = fd; | ||
130 | *pinode = inode; | ||
131 | *pfile = file; | ||
132 | return 0; | ||
133 | |||
134 | err_put_unused_fd: | ||
135 | put_unused_fd(fd); | ||
136 | err_iput: | ||
137 | iput(inode); | ||
138 | err_put_filp: | ||
139 | put_filp(file); | ||
140 | return error; | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * A single inode exist for all anon_inode files. Contrary to pipes, | ||
145 | * anon_inode inodes has no per-instance data associated, so we can avoid | ||
146 | * the allocation of multiple of them. | ||
147 | */ | ||
148 | static struct inode *anon_inode_mkinode(void) | ||
149 | { | ||
150 | struct inode *inode = new_inode(anon_inode_mnt->mnt_sb); | ||
151 | |||
152 | if (!inode) | ||
153 | return ERR_PTR(-ENOMEM); | ||
154 | |||
155 | inode->i_fop = &anon_inode_fops; | ||
156 | |||
157 | /* | ||
158 | * Mark the inode dirty from the very beginning, | ||
159 | * that way it will never be moved to the dirty | ||
160 | * list because mark_inode_dirty() will think | ||
161 | * that it already _is_ on the dirty list. | ||
162 | */ | ||
163 | inode->i_state = I_DIRTY; | ||
164 | inode->i_mode = S_IRUSR | S_IWUSR; | ||
165 | inode->i_uid = current->fsuid; | ||
166 | inode->i_gid = current->fsgid; | ||
167 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
168 | return inode; | ||
169 | } | ||
170 | |||
171 | static int __init anon_inode_init(void) | ||
172 | { | ||
173 | int error; | ||
174 | |||
175 | error = register_filesystem(&anon_inode_fs_type); | ||
176 | if (error) | ||
177 | goto err_exit; | ||
178 | anon_inode_mnt = kern_mount(&anon_inode_fs_type); | ||
179 | if (IS_ERR(anon_inode_mnt)) { | ||
180 | error = PTR_ERR(anon_inode_mnt); | ||
181 | goto err_unregister_filesystem; | ||
182 | } | ||
183 | anon_inode_inode = anon_inode_mkinode(); | ||
184 | if (IS_ERR(anon_inode_inode)) { | ||
185 | error = PTR_ERR(anon_inode_inode); | ||
186 | goto err_mntput; | ||
187 | } | ||
188 | |||
189 | return 0; | ||
190 | |||
191 | err_mntput: | ||
192 | mntput(anon_inode_mnt); | ||
193 | err_unregister_filesystem: | ||
194 | unregister_filesystem(&anon_inode_fs_type); | ||
195 | err_exit: | ||
196 | panic(KERN_ERR "anon_inode_init() failed (%d)\n", error); | ||
197 | } | ||
198 | |||
199 | fs_initcall(anon_inode_init); | ||
200 | |||
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 4ef544434b51..8b4cca3c4705 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h | |||
@@ -101,7 +101,7 @@ struct autofs_symlink { | |||
101 | struct autofs_sb_info { | 101 | struct autofs_sb_info { |
102 | u32 magic; | 102 | u32 magic; |
103 | struct file *pipe; | 103 | struct file *pipe; |
104 | pid_t oz_pgrp; | 104 | struct pid *oz_pgrp; |
105 | int catatonic; | 105 | int catatonic; |
106 | struct super_block *sb; | 106 | struct super_block *sb; |
107 | unsigned long exp_timeout; | 107 | unsigned long exp_timeout; |
@@ -122,7 +122,7 @@ static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb) | |||
122 | filesystem without "magic".) */ | 122 | filesystem without "magic".) */ |
123 | 123 | ||
124 | static inline int autofs_oz_mode(struct autofs_sb_info *sbi) { | 124 | static inline int autofs_oz_mode(struct autofs_sb_info *sbi) { |
125 | return sbi->catatonic || process_group(current) == sbi->oz_pgrp; | 125 | return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; |
126 | } | 126 | } |
127 | 127 | ||
128 | /* Hash operations */ | 128 | /* Hash operations */ |
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index aa0b61ff8270..e7204d71acc9 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c | |||
@@ -34,12 +34,14 @@ void autofs_kill_sb(struct super_block *sb) | |||
34 | if (!sbi) | 34 | if (!sbi) |
35 | goto out_kill_sb; | 35 | goto out_kill_sb; |
36 | 36 | ||
37 | if ( !sbi->catatonic ) | 37 | if (!sbi->catatonic) |
38 | autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */ | 38 | autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */ |
39 | 39 | ||
40 | put_pid(sbi->oz_pgrp); | ||
41 | |||
40 | autofs_hash_nuke(sbi); | 42 | autofs_hash_nuke(sbi); |
41 | for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) { | 43 | for (n = 0; n < AUTOFS_MAX_SYMLINKS; n++) { |
42 | if ( test_bit(n, sbi->symlink_bitmap) ) | 44 | if (test_bit(n, sbi->symlink_bitmap)) |
43 | kfree(sbi->symlink[n].data); | 45 | kfree(sbi->symlink[n].data); |
44 | } | 46 | } |
45 | 47 | ||
@@ -69,7 +71,8 @@ static match_table_t autofs_tokens = { | |||
69 | {Opt_err, NULL} | 71 | {Opt_err, NULL} |
70 | }; | 72 | }; |
71 | 73 | ||
72 | static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, pid_t *pgrp, int *minproto, int *maxproto) | 74 | static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, |
75 | pid_t *pgrp, int *minproto, int *maxproto) | ||
73 | { | 76 | { |
74 | char *p; | 77 | char *p; |
75 | substring_t args[MAX_OPT_ARGS]; | 78 | substring_t args[MAX_OPT_ARGS]; |
@@ -138,9 +141,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) | |||
138 | int pipefd; | 141 | int pipefd; |
139 | struct autofs_sb_info *sbi; | 142 | struct autofs_sb_info *sbi; |
140 | int minproto, maxproto; | 143 | int minproto, maxproto; |
144 | pid_t pgid; | ||
141 | 145 | ||
142 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 146 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
143 | if ( !sbi ) | 147 | if (!sbi) |
144 | goto fail_unlock; | 148 | goto fail_unlock; |
145 | DPRINTK(("autofs: starting up, sbi = %p\n",sbi)); | 149 | DPRINTK(("autofs: starting up, sbi = %p\n",sbi)); |
146 | 150 | ||
@@ -149,7 +153,6 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) | |||
149 | sbi->pipe = NULL; | 153 | sbi->pipe = NULL; |
150 | sbi->catatonic = 1; | 154 | sbi->catatonic = 1; |
151 | sbi->exp_timeout = 0; | 155 | sbi->exp_timeout = 0; |
152 | sbi->oz_pgrp = process_group(current); | ||
153 | autofs_initialize_hash(&sbi->dirhash); | 156 | autofs_initialize_hash(&sbi->dirhash); |
154 | sbi->queues = NULL; | 157 | sbi->queues = NULL; |
155 | memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN); | 158 | memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN); |
@@ -169,26 +172,36 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) | |||
169 | goto fail_iput; | 172 | goto fail_iput; |
170 | 173 | ||
171 | /* Can this call block? - WTF cares? s is locked. */ | 174 | /* Can this call block? - WTF cares? s is locked. */ |
172 | if ( parse_options(data,&pipefd,&root_inode->i_uid,&root_inode->i_gid,&sbi->oz_pgrp,&minproto,&maxproto) ) { | 175 | if (parse_options(data, &pipefd, &root_inode->i_uid, |
176 | &root_inode->i_gid, &pgid, &minproto, | ||
177 | &maxproto)) { | ||
173 | printk("autofs: called with bogus options\n"); | 178 | printk("autofs: called with bogus options\n"); |
174 | goto fail_dput; | 179 | goto fail_dput; |
175 | } | 180 | } |
176 | 181 | ||
177 | /* Couldn't this be tested earlier? */ | 182 | /* Couldn't this be tested earlier? */ |
178 | if ( minproto > AUTOFS_PROTO_VERSION || | 183 | if (minproto > AUTOFS_PROTO_VERSION || |
179 | maxproto < AUTOFS_PROTO_VERSION ) { | 184 | maxproto < AUTOFS_PROTO_VERSION) { |
180 | printk("autofs: kernel does not match daemon version\n"); | 185 | printk("autofs: kernel does not match daemon version\n"); |
181 | goto fail_dput; | 186 | goto fail_dput; |
182 | } | 187 | } |
183 | 188 | ||
184 | DPRINTK(("autofs: pipe fd = %d, pgrp = %u\n", pipefd, sbi->oz_pgrp)); | 189 | DPRINTK(("autofs: pipe fd = %d, pgrp = %u\n", pipefd, pgid)); |
190 | sbi->oz_pgrp = find_get_pid(pgid); | ||
191 | |||
192 | if (!sbi->oz_pgrp) { | ||
193 | printk("autofs: could not find process group %d\n", pgid); | ||
194 | goto fail_dput; | ||
195 | } | ||
196 | |||
185 | pipe = fget(pipefd); | 197 | pipe = fget(pipefd); |
186 | 198 | ||
187 | if ( !pipe ) { | 199 | if (!pipe) { |
188 | printk("autofs: could not open pipe file descriptor\n"); | 200 | printk("autofs: could not open pipe file descriptor\n"); |
189 | goto fail_dput; | 201 | goto fail_put_pid; |
190 | } | 202 | } |
191 | if ( !pipe->f_op || !pipe->f_op->write ) | 203 | |
204 | if (!pipe->f_op || !pipe->f_op->write) | ||
192 | goto fail_fput; | 205 | goto fail_fput; |
193 | sbi->pipe = pipe; | 206 | sbi->pipe = pipe; |
194 | sbi->catatonic = 0; | 207 | sbi->catatonic = 0; |
@@ -202,6 +215,8 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) | |||
202 | fail_fput: | 215 | fail_fput: |
203 | printk("autofs: pipe file descriptor does not contain proper ops\n"); | 216 | printk("autofs: pipe file descriptor does not contain proper ops\n"); |
204 | fput(pipe); | 217 | fput(pipe); |
218 | fail_put_pid: | ||
219 | put_pid(sbi->oz_pgrp); | ||
205 | fail_dput: | 220 | fail_dput: |
206 | dput(root); | 221 | dput(root); |
207 | goto fail_free; | 222 | goto fail_free; |
@@ -230,7 +245,7 @@ static void autofs_read_inode(struct inode *inode) | |||
230 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 245 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
231 | inode->i_blocks = 0; | 246 | inode->i_blocks = 0; |
232 | 247 | ||
233 | if ( ino == AUTOFS_ROOT_INO ) { | 248 | if (ino == AUTOFS_ROOT_INO) { |
234 | inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; | 249 | inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; |
235 | inode->i_op = &autofs_root_inode_operations; | 250 | inode->i_op = &autofs_root_inode_operations; |
236 | inode->i_fop = &autofs_root_operations; | 251 | inode->i_fop = &autofs_root_operations; |
@@ -241,12 +256,12 @@ static void autofs_read_inode(struct inode *inode) | |||
241 | inode->i_uid = inode->i_sb->s_root->d_inode->i_uid; | 256 | inode->i_uid = inode->i_sb->s_root->d_inode->i_uid; |
242 | inode->i_gid = inode->i_sb->s_root->d_inode->i_gid; | 257 | inode->i_gid = inode->i_sb->s_root->d_inode->i_gid; |
243 | 258 | ||
244 | if ( ino >= AUTOFS_FIRST_SYMLINK && ino < AUTOFS_FIRST_DIR_INO ) { | 259 | if (ino >= AUTOFS_FIRST_SYMLINK && ino < AUTOFS_FIRST_DIR_INO) { |
245 | /* Symlink inode - should be in symlink list */ | 260 | /* Symlink inode - should be in symlink list */ |
246 | struct autofs_symlink *sl; | 261 | struct autofs_symlink *sl; |
247 | 262 | ||
248 | n = ino - AUTOFS_FIRST_SYMLINK; | 263 | n = ino - AUTOFS_FIRST_SYMLINK; |
249 | if ( n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) { | 264 | if (n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) { |
250 | printk("autofs: Looking for bad symlink inode %u\n", (unsigned int) ino); | 265 | printk("autofs: Looking for bad symlink inode %u\n", (unsigned int) ino); |
251 | return; | 266 | return; |
252 | } | 267 | } |
diff --git a/fs/autofs/root.c b/fs/autofs/root.c index f2597205939d..c1489533277a 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c | |||
@@ -67,8 +67,8 @@ static int autofs_root_readdir(struct file *filp, void *dirent, filldir_t filldi | |||
67 | filp->f_pos = ++nr; | 67 | filp->f_pos = ++nr; |
68 | /* fall through */ | 68 | /* fall through */ |
69 | default: | 69 | default: |
70 | while ( onr = nr, ent = autofs_hash_enum(dirhash,&nr,ent) ) { | 70 | while (onr = nr, ent = autofs_hash_enum(dirhash,&nr,ent)) { |
71 | if ( !ent->dentry || d_mountpoint(ent->dentry) ) { | 71 | if (!ent->dentry || d_mountpoint(ent->dentry)) { |
72 | if (filldir(dirent,ent->name,ent->len,onr,ent->ino,DT_UNKNOWN) < 0) | 72 | if (filldir(dirent,ent->name,ent->len,onr,ent->ino,DT_UNKNOWN) < 0) |
73 | goto out; | 73 | goto out; |
74 | filp->f_pos = nr; | 74 | filp->f_pos = nr; |
@@ -88,10 +88,10 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str | |||
88 | struct autofs_dir_ent *ent; | 88 | struct autofs_dir_ent *ent; |
89 | int status = 0; | 89 | int status = 0; |
90 | 90 | ||
91 | if ( !(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name)) ) { | 91 | if (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name))) { |
92 | do { | 92 | do { |
93 | if ( status && dentry->d_inode ) { | 93 | if (status && dentry->d_inode) { |
94 | if ( status != -ENOENT ) | 94 | if (status != -ENOENT) |
95 | printk("autofs warning: lookup failure on positive dentry, status = %d, name = %s\n", status, dentry->d_name.name); | 95 | printk("autofs warning: lookup failure on positive dentry, status = %d, name = %s\n", status, dentry->d_name.name); |
96 | return 0; /* Try to get the kernel to invalidate this dentry */ | 96 | return 0; /* Try to get the kernel to invalidate this dentry */ |
97 | } | 97 | } |
@@ -106,7 +106,7 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str | |||
106 | return 1; | 106 | return 1; |
107 | } | 107 | } |
108 | status = autofs_wait(sbi, &dentry->d_name); | 108 | status = autofs_wait(sbi, &dentry->d_name); |
109 | } while (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name)) ); | 109 | } while (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name))); |
110 | } | 110 | } |
111 | 111 | ||
112 | /* Abuse this field as a pointer to the directory entry, used to | 112 | /* Abuse this field as a pointer to the directory entry, used to |
@@ -124,13 +124,13 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str | |||
124 | 124 | ||
125 | /* If this is a directory that isn't a mount point, bitch at the | 125 | /* If this is a directory that isn't a mount point, bitch at the |
126 | daemon and fix it in user space */ | 126 | daemon and fix it in user space */ |
127 | if ( S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) ) { | 127 | if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) { |
128 | return !autofs_wait(sbi, &dentry->d_name); | 128 | return !autofs_wait(sbi, &dentry->d_name); |
129 | } | 129 | } |
130 | 130 | ||
131 | /* We don't update the usages for the autofs daemon itself, this | 131 | /* We don't update the usages for the autofs daemon itself, this |
132 | is necessary for recursive autofs mounts */ | 132 | is necessary for recursive autofs mounts */ |
133 | if ( !autofs_oz_mode(sbi) ) { | 133 | if (!autofs_oz_mode(sbi)) { |
134 | autofs_update_usage(&sbi->dirhash,ent); | 134 | autofs_update_usage(&sbi->dirhash,ent); |
135 | } | 135 | } |
136 | 136 | ||
@@ -157,7 +157,7 @@ static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd) | |||
157 | sbi = autofs_sbi(dir->i_sb); | 157 | sbi = autofs_sbi(dir->i_sb); |
158 | 158 | ||
159 | /* Pending dentry */ | 159 | /* Pending dentry */ |
160 | if ( dentry->d_flags & DCACHE_AUTOFS_PENDING ) { | 160 | if (dentry->d_flags & DCACHE_AUTOFS_PENDING) { |
161 | if (autofs_oz_mode(sbi)) | 161 | if (autofs_oz_mode(sbi)) |
162 | res = 1; | 162 | res = 1; |
163 | else | 163 | else |
@@ -173,7 +173,7 @@ static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd) | |||
173 | } | 173 | } |
174 | 174 | ||
175 | /* Check for a non-mountpoint directory */ | 175 | /* Check for a non-mountpoint directory */ |
176 | if ( S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) ) { | 176 | if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) { |
177 | if (autofs_oz_mode(sbi)) | 177 | if (autofs_oz_mode(sbi)) |
178 | res = 1; | 178 | res = 1; |
179 | else | 179 | else |
@@ -183,9 +183,9 @@ static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd) | |||
183 | } | 183 | } |
184 | 184 | ||
185 | /* Update the usage list */ | 185 | /* Update the usage list */ |
186 | if ( !autofs_oz_mode(sbi) ) { | 186 | if (!autofs_oz_mode(sbi)) { |
187 | ent = (struct autofs_dir_ent *) dentry->d_time; | 187 | ent = (struct autofs_dir_ent *) dentry->d_time; |
188 | if ( ent ) | 188 | if (ent) |
189 | autofs_update_usage(&sbi->dirhash,ent); | 189 | autofs_update_usage(&sbi->dirhash,ent); |
190 | } | 190 | } |
191 | unlock_kernel(); | 191 | unlock_kernel(); |
@@ -213,8 +213,10 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr | |||
213 | sbi = autofs_sbi(dir->i_sb); | 213 | sbi = autofs_sbi(dir->i_sb); |
214 | 214 | ||
215 | oz_mode = autofs_oz_mode(sbi); | 215 | oz_mode = autofs_oz_mode(sbi); |
216 | DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", | 216 | DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, " |
217 | current->pid, process_group(current), sbi->catatonic, oz_mode)); | 217 | "oz_mode = %d\n", pid_nr(task_pid(current)), |
218 | process_group(current), sbi->catatonic, | ||
219 | oz_mode)); | ||
218 | 220 | ||
219 | /* | 221 | /* |
220 | * Mark the dentry incomplete, but add it. This is needed so | 222 | * Mark the dentry incomplete, but add it. This is needed so |
@@ -258,7 +260,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr | |||
258 | * doesn't do the right thing for all system calls, but it should | 260 | * doesn't do the right thing for all system calls, but it should |
259 | * be OK for the operations we permit from an autofs. | 261 | * be OK for the operations we permit from an autofs. |
260 | */ | 262 | */ |
261 | if ( dentry->d_inode && d_unhashed(dentry) ) | 263 | if (dentry->d_inode && d_unhashed(dentry)) |
262 | return ERR_PTR(-ENOENT); | 264 | return ERR_PTR(-ENOENT); |
263 | 265 | ||
264 | return NULL; | 266 | return NULL; |
@@ -277,18 +279,18 @@ static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const c | |||
277 | autofs_say(dentry->d_name.name,dentry->d_name.len); | 279 | autofs_say(dentry->d_name.name,dentry->d_name.len); |
278 | 280 | ||
279 | lock_kernel(); | 281 | lock_kernel(); |
280 | if ( !autofs_oz_mode(sbi) ) { | 282 | if (!autofs_oz_mode(sbi)) { |
281 | unlock_kernel(); | 283 | unlock_kernel(); |
282 | return -EACCES; | 284 | return -EACCES; |
283 | } | 285 | } |
284 | 286 | ||
285 | if ( autofs_hash_lookup(dh, &dentry->d_name) ) { | 287 | if (autofs_hash_lookup(dh, &dentry->d_name)) { |
286 | unlock_kernel(); | 288 | unlock_kernel(); |
287 | return -EEXIST; | 289 | return -EEXIST; |
288 | } | 290 | } |
289 | 291 | ||
290 | n = find_first_zero_bit(sbi->symlink_bitmap,AUTOFS_MAX_SYMLINKS); | 292 | n = find_first_zero_bit(sbi->symlink_bitmap,AUTOFS_MAX_SYMLINKS); |
291 | if ( n >= AUTOFS_MAX_SYMLINKS ) { | 293 | if (n >= AUTOFS_MAX_SYMLINKS) { |
292 | unlock_kernel(); | 294 | unlock_kernel(); |
293 | return -ENOSPC; | 295 | return -ENOSPC; |
294 | } | 296 | } |
@@ -297,14 +299,14 @@ static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const c | |||
297 | sl = &sbi->symlink[n]; | 299 | sl = &sbi->symlink[n]; |
298 | sl->len = strlen(symname); | 300 | sl->len = strlen(symname); |
299 | sl->data = kmalloc(slsize = sl->len+1, GFP_KERNEL); | 301 | sl->data = kmalloc(slsize = sl->len+1, GFP_KERNEL); |
300 | if ( !sl->data ) { | 302 | if (!sl->data) { |
301 | clear_bit(n,sbi->symlink_bitmap); | 303 | clear_bit(n,sbi->symlink_bitmap); |
302 | unlock_kernel(); | 304 | unlock_kernel(); |
303 | return -ENOSPC; | 305 | return -ENOSPC; |
304 | } | 306 | } |
305 | 307 | ||
306 | ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL); | 308 | ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL); |
307 | if ( !ent ) { | 309 | if (!ent) { |
308 | kfree(sl->data); | 310 | kfree(sl->data); |
309 | clear_bit(n,sbi->symlink_bitmap); | 311 | clear_bit(n,sbi->symlink_bitmap); |
310 | unlock_kernel(); | 312 | unlock_kernel(); |
@@ -312,7 +314,7 @@ static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const c | |||
312 | } | 314 | } |
313 | 315 | ||
314 | ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL); | 316 | ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL); |
315 | if ( !ent->name ) { | 317 | if (!ent->name) { |
316 | kfree(sl->data); | 318 | kfree(sl->data); |
317 | kfree(ent); | 319 | kfree(ent); |
318 | clear_bit(n,sbi->symlink_bitmap); | 320 | clear_bit(n,sbi->symlink_bitmap); |
@@ -354,23 +356,23 @@ static int autofs_root_unlink(struct inode *dir, struct dentry *dentry) | |||
354 | 356 | ||
355 | /* This allows root to remove symlinks */ | 357 | /* This allows root to remove symlinks */ |
356 | lock_kernel(); | 358 | lock_kernel(); |
357 | if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) { | 359 | if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) { |
358 | unlock_kernel(); | 360 | unlock_kernel(); |
359 | return -EACCES; | 361 | return -EACCES; |
360 | } | 362 | } |
361 | 363 | ||
362 | ent = autofs_hash_lookup(dh, &dentry->d_name); | 364 | ent = autofs_hash_lookup(dh, &dentry->d_name); |
363 | if ( !ent ) { | 365 | if (!ent) { |
364 | unlock_kernel(); | 366 | unlock_kernel(); |
365 | return -ENOENT; | 367 | return -ENOENT; |
366 | } | 368 | } |
367 | 369 | ||
368 | n = ent->ino - AUTOFS_FIRST_SYMLINK; | 370 | n = ent->ino - AUTOFS_FIRST_SYMLINK; |
369 | if ( n >= AUTOFS_MAX_SYMLINKS ) { | 371 | if (n >= AUTOFS_MAX_SYMLINKS) { |
370 | unlock_kernel(); | 372 | unlock_kernel(); |
371 | return -EISDIR; /* It's a directory, dummy */ | 373 | return -EISDIR; /* It's a directory, dummy */ |
372 | } | 374 | } |
373 | if ( !test_bit(n,sbi->symlink_bitmap) ) { | 375 | if (!test_bit(n,sbi->symlink_bitmap)) { |
374 | unlock_kernel(); | 376 | unlock_kernel(); |
375 | return -EINVAL; /* Nonexistent symlink? Shouldn't happen */ | 377 | return -EINVAL; /* Nonexistent symlink? Shouldn't happen */ |
376 | } | 378 | } |
@@ -392,23 +394,23 @@ static int autofs_root_rmdir(struct inode *dir, struct dentry *dentry) | |||
392 | struct autofs_dir_ent *ent; | 394 | struct autofs_dir_ent *ent; |
393 | 395 | ||
394 | lock_kernel(); | 396 | lock_kernel(); |
395 | if ( !autofs_oz_mode(sbi) ) { | 397 | if (!autofs_oz_mode(sbi)) { |
396 | unlock_kernel(); | 398 | unlock_kernel(); |
397 | return -EACCES; | 399 | return -EACCES; |
398 | } | 400 | } |
399 | 401 | ||
400 | ent = autofs_hash_lookup(dh, &dentry->d_name); | 402 | ent = autofs_hash_lookup(dh, &dentry->d_name); |
401 | if ( !ent ) { | 403 | if (!ent) { |
402 | unlock_kernel(); | 404 | unlock_kernel(); |
403 | return -ENOENT; | 405 | return -ENOENT; |
404 | } | 406 | } |
405 | 407 | ||
406 | if ( (unsigned int)ent->ino < AUTOFS_FIRST_DIR_INO ) { | 408 | if ((unsigned int)ent->ino < AUTOFS_FIRST_DIR_INO) { |
407 | unlock_kernel(); | 409 | unlock_kernel(); |
408 | return -ENOTDIR; /* Not a directory */ | 410 | return -ENOTDIR; /* Not a directory */ |
409 | } | 411 | } |
410 | 412 | ||
411 | if ( ent->dentry != dentry ) { | 413 | if (ent->dentry != dentry) { |
412 | printk("autofs_rmdir: odentry != dentry for entry %s\n", dentry->d_name.name); | 414 | printk("autofs_rmdir: odentry != dentry for entry %s\n", dentry->d_name.name); |
413 | } | 415 | } |
414 | 416 | ||
@@ -429,18 +431,18 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
429 | ino_t ino; | 431 | ino_t ino; |
430 | 432 | ||
431 | lock_kernel(); | 433 | lock_kernel(); |
432 | if ( !autofs_oz_mode(sbi) ) { | 434 | if (!autofs_oz_mode(sbi)) { |
433 | unlock_kernel(); | 435 | unlock_kernel(); |
434 | return -EACCES; | 436 | return -EACCES; |
435 | } | 437 | } |
436 | 438 | ||
437 | ent = autofs_hash_lookup(dh, &dentry->d_name); | 439 | ent = autofs_hash_lookup(dh, &dentry->d_name); |
438 | if ( ent ) { | 440 | if (ent) { |
439 | unlock_kernel(); | 441 | unlock_kernel(); |
440 | return -EEXIST; | 442 | return -EEXIST; |
441 | } | 443 | } |
442 | 444 | ||
443 | if ( sbi->next_dir_ino < AUTOFS_FIRST_DIR_INO ) { | 445 | if (sbi->next_dir_ino < AUTOFS_FIRST_DIR_INO) { |
444 | printk("autofs: Out of inode numbers -- what the heck did you do??\n"); | 446 | printk("autofs: Out of inode numbers -- what the heck did you do??\n"); |
445 | unlock_kernel(); | 447 | unlock_kernel(); |
446 | return -ENOSPC; | 448 | return -ENOSPC; |
@@ -448,13 +450,13 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
448 | ino = sbi->next_dir_ino++; | 450 | ino = sbi->next_dir_ino++; |
449 | 451 | ||
450 | ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL); | 452 | ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL); |
451 | if ( !ent ) { | 453 | if (!ent) { |
452 | unlock_kernel(); | 454 | unlock_kernel(); |
453 | return -ENOSPC; | 455 | return -ENOSPC; |
454 | } | 456 | } |
455 | 457 | ||
456 | ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL); | 458 | ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL); |
457 | if ( !ent->name ) { | 459 | if (!ent->name) { |
458 | kfree(ent); | 460 | kfree(ent); |
459 | unlock_kernel(); | 461 | unlock_kernel(); |
460 | return -ENOSPC; | 462 | return -ENOSPC; |
@@ -483,7 +485,7 @@ static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi, | |||
483 | put_user(sbi->exp_timeout / HZ, p)) | 485 | put_user(sbi->exp_timeout / HZ, p)) |
484 | return -EFAULT; | 486 | return -EFAULT; |
485 | 487 | ||
486 | if ( ntimeout > ULONG_MAX/HZ ) | 488 | if (ntimeout > ULONG_MAX/HZ) |
487 | sbi->exp_timeout = 0; | 489 | sbi->exp_timeout = 0; |
488 | else | 490 | else |
489 | sbi->exp_timeout = ntimeout * HZ; | 491 | sbi->exp_timeout = ntimeout * HZ; |
@@ -511,15 +513,14 @@ static inline int autofs_expire_run(struct super_block *sb, | |||
511 | pkt.hdr.proto_version = AUTOFS_PROTO_VERSION; | 513 | pkt.hdr.proto_version = AUTOFS_PROTO_VERSION; |
512 | pkt.hdr.type = autofs_ptype_expire; | 514 | pkt.hdr.type = autofs_ptype_expire; |
513 | 515 | ||
514 | if ( !sbi->exp_timeout || | 516 | if (!sbi->exp_timeout || !(ent = autofs_expire(sb,sbi,mnt))) |
515 | !(ent = autofs_expire(sb,sbi,mnt)) ) | ||
516 | return -EAGAIN; | 517 | return -EAGAIN; |
517 | 518 | ||
518 | pkt.len = ent->len; | 519 | pkt.len = ent->len; |
519 | memcpy(pkt.name, ent->name, pkt.len); | 520 | memcpy(pkt.name, ent->name, pkt.len); |
520 | pkt.name[pkt.len] = '\0'; | 521 | pkt.name[pkt.len] = '\0'; |
521 | 522 | ||
522 | if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) | 523 | if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) |
523 | return -EFAULT; | 524 | return -EFAULT; |
524 | 525 | ||
525 | return 0; | 526 | return 0; |
@@ -537,11 +538,11 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp, | |||
537 | 538 | ||
538 | DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,process_group(current))); | 539 | DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,process_group(current))); |
539 | 540 | ||
540 | if ( _IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || | 541 | if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || |
541 | _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT ) | 542 | _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) |
542 | return -ENOTTY; | 543 | return -ENOTTY; |
543 | 544 | ||
544 | if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) | 545 | if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
545 | return -EPERM; | 546 | return -EPERM; |
546 | 547 | ||
547 | switch(cmd) { | 548 | switch(cmd) { |
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 5769a2f9ad60..692364e8ffc3 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c | |||
@@ -218,8 +218,7 @@ static match_table_t tokens = { | |||
218 | }; | 218 | }; |
219 | 219 | ||
220 | static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, | 220 | static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, |
221 | pid_t *pgrp, unsigned int *type, | 221 | pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto) |
222 | int *minproto, int *maxproto) | ||
223 | { | 222 | { |
224 | char *p; | 223 | char *p; |
225 | substring_t args[MAX_OPT_ARGS]; | 224 | substring_t args[MAX_OPT_ARGS]; |
@@ -314,7 +313,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
314 | struct autofs_info *ino; | 313 | struct autofs_info *ino; |
315 | 314 | ||
316 | sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); | 315 | sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); |
317 | if ( !sbi ) | 316 | if (!sbi) |
318 | goto fail_unlock; | 317 | goto fail_unlock; |
319 | DPRINTK("starting up, sbi = %p",sbi); | 318 | DPRINTK("starting up, sbi = %p",sbi); |
320 | 319 | ||
@@ -363,10 +362,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
363 | root->d_fsdata = ino; | 362 | root->d_fsdata = ino; |
364 | 363 | ||
365 | /* Can this call block? */ | 364 | /* Can this call block? */ |
366 | if (parse_options(data, &pipefd, | 365 | if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, |
367 | &root_inode->i_uid, &root_inode->i_gid, | 366 | &sbi->oz_pgrp, &sbi->type, &sbi->min_proto, |
368 | &sbi->oz_pgrp, &sbi->type, | 367 | &sbi->max_proto)) { |
369 | &sbi->min_proto, &sbi->max_proto)) { | ||
370 | printk("autofs: called with bogus options\n"); | 368 | printk("autofs: called with bogus options\n"); |
371 | goto fail_dput; | 369 | goto fail_dput; |
372 | } | 370 | } |
@@ -396,11 +394,11 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
396 | DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp); | 394 | DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp); |
397 | pipe = fget(pipefd); | 395 | pipe = fget(pipefd); |
398 | 396 | ||
399 | if ( !pipe ) { | 397 | if (!pipe) { |
400 | printk("autofs: could not open pipe file descriptor\n"); | 398 | printk("autofs: could not open pipe file descriptor\n"); |
401 | goto fail_dput; | 399 | goto fail_dput; |
402 | } | 400 | } |
403 | if ( !pipe->f_op || !pipe->f_op->write ) | 401 | if (!pipe->f_op || !pipe->f_op->write) |
404 | goto fail_fput; | 402 | goto fail_fput; |
405 | sbi->pipe = pipe; | 403 | sbi->pipe = pipe; |
406 | sbi->pipefd = pipefd; | 404 | sbi->pipefd = pipefd; |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 15170f4e13a7..2d4c8a3e604e 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -759,7 +759,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | |||
759 | struct autofs_info *p_ino; | 759 | struct autofs_info *p_ino; |
760 | 760 | ||
761 | /* This allows root to remove symlinks */ | 761 | /* This allows root to remove symlinks */ |
762 | if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) | 762 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
763 | return -EACCES; | 763 | return -EACCES; |
764 | 764 | ||
765 | if (atomic_dec_and_test(&ino->count)) { | 765 | if (atomic_dec_and_test(&ino->count)) { |
@@ -833,7 +833,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
833 | struct autofs_info *p_ino; | 833 | struct autofs_info *p_ino; |
834 | struct inode *inode; | 834 | struct inode *inode; |
835 | 835 | ||
836 | if ( !autofs4_oz_mode(sbi) ) | 836 | if (!autofs4_oz_mode(sbi)) |
837 | return -EACCES; | 837 | return -EACCES; |
838 | 838 | ||
839 | DPRINTK("dentry %p, creating %.*s", | 839 | DPRINTK("dentry %p, creating %.*s", |
@@ -871,11 +871,11 @@ static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, | |||
871 | int rv; | 871 | int rv; |
872 | unsigned long ntimeout; | 872 | unsigned long ntimeout; |
873 | 873 | ||
874 | if ( (rv = get_user(ntimeout, p)) || | 874 | if ((rv = get_user(ntimeout, p)) || |
875 | (rv = put_user(sbi->exp_timeout/HZ, p)) ) | 875 | (rv = put_user(sbi->exp_timeout/HZ, p))) |
876 | return rv; | 876 | return rv; |
877 | 877 | ||
878 | if ( ntimeout > ULONG_MAX/HZ ) | 878 | if (ntimeout > ULONG_MAX/HZ) |
879 | sbi->exp_timeout = 0; | 879 | sbi->exp_timeout = 0; |
880 | else | 880 | else |
881 | sbi->exp_timeout = ntimeout * HZ; | 881 | sbi->exp_timeout = ntimeout * HZ; |
@@ -906,7 +906,7 @@ static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int __user *p) | |||
906 | DPRINTK("returning %d", sbi->needs_reghost); | 906 | DPRINTK("returning %d", sbi->needs_reghost); |
907 | 907 | ||
908 | status = put_user(sbi->needs_reghost, p); | 908 | status = put_user(sbi->needs_reghost, p); |
909 | if ( status ) | 909 | if (status) |
910 | return status; | 910 | return status; |
911 | 911 | ||
912 | sbi->needs_reghost = 0; | 912 | sbi->needs_reghost = 0; |
@@ -975,11 +975,11 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp, | |||
975 | DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u", | 975 | DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u", |
976 | cmd,arg,sbi,process_group(current)); | 976 | cmd,arg,sbi,process_group(current)); |
977 | 977 | ||
978 | if ( _IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || | 978 | if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || |
979 | _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT ) | 979 | _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) |
980 | return -ENOTTY; | 980 | return -ENOTTY; |
981 | 981 | ||
982 | if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) | 982 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
983 | return -EPERM; | 983 | return -EPERM; |
984 | 984 | ||
985 | switch(cmd) { | 985 | switch(cmd) { |
diff --git a/fs/compat.c b/fs/compat.c index 9cf75df9b2bb..7b21b0a82596 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/tsacct_kern.h> | 46 | #include <linux/tsacct_kern.h> |
47 | #include <linux/security.h> | 47 | #include <linux/security.h> |
48 | #include <linux/highmem.h> | 48 | #include <linux/highmem.h> |
49 | #include <linux/signal.h> | ||
49 | #include <linux/poll.h> | 50 | #include <linux/poll.h> |
50 | #include <linux/mm.h> | 51 | #include <linux/mm.h> |
51 | #include <linux/eventpoll.h> | 52 | #include <linux/eventpoll.h> |
@@ -2199,3 +2200,51 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, | |||
2199 | #endif /* TIF_RESTORE_SIGMASK */ | 2200 | #endif /* TIF_RESTORE_SIGMASK */ |
2200 | 2201 | ||
2201 | #endif /* CONFIG_EPOLL */ | 2202 | #endif /* CONFIG_EPOLL */ |
2203 | |||
2204 | #ifdef CONFIG_SIGNALFD | ||
2205 | |||
2206 | asmlinkage long compat_sys_signalfd(int ufd, | ||
2207 | const compat_sigset_t __user *sigmask, | ||
2208 | compat_size_t sigsetsize) | ||
2209 | { | ||
2210 | compat_sigset_t ss32; | ||
2211 | sigset_t tmp; | ||
2212 | sigset_t __user *ksigmask; | ||
2213 | |||
2214 | if (sigsetsize != sizeof(compat_sigset_t)) | ||
2215 | return -EINVAL; | ||
2216 | if (copy_from_user(&ss32, sigmask, sizeof(ss32))) | ||
2217 | return -EFAULT; | ||
2218 | sigset_from_compat(&tmp, &ss32); | ||
2219 | ksigmask = compat_alloc_user_space(sizeof(sigset_t)); | ||
2220 | if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t))) | ||
2221 | return -EFAULT; | ||
2222 | |||
2223 | return sys_signalfd(ufd, ksigmask, sizeof(sigset_t)); | ||
2224 | } | ||
2225 | |||
2226 | #endif /* CONFIG_SIGNALFD */ | ||
2227 | |||
2228 | #ifdef CONFIG_TIMERFD | ||
2229 | |||
2230 | asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags, | ||
2231 | const struct compat_itimerspec __user *utmr) | ||
2232 | { | ||
2233 | long res; | ||
2234 | struct itimerspec t; | ||
2235 | struct itimerspec __user *ut; | ||
2236 | |||
2237 | res = -EFAULT; | ||
2238 | if (get_compat_itimerspec(&t, utmr)) | ||
2239 | goto err_exit; | ||
2240 | ut = compat_alloc_user_space(sizeof(*ut)); | ||
2241 | if (copy_to_user(ut, &t, sizeof(t)) ) | ||
2242 | goto err_exit; | ||
2243 | |||
2244 | res = sys_timerfd(ufd, clockid, flags, ut); | ||
2245 | err_exit: | ||
2246 | return res; | ||
2247 | } | ||
2248 | |||
2249 | #endif /* CONFIG_TIMERFD */ | ||
2250 | |||
diff --git a/fs/eventfd.c b/fs/eventfd.c new file mode 100644 index 000000000000..480e2b3c4166 --- /dev/null +++ b/fs/eventfd.c | |||
@@ -0,0 +1,228 @@ | |||
1 | /* | ||
2 | * fs/eventfd.c | ||
3 | * | ||
4 | * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | #include <linux/file.h> | ||
9 | #include <linux/poll.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/fs.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/list.h> | ||
15 | #include <linux/spinlock.h> | ||
16 | #include <linux/anon_inodes.h> | ||
17 | #include <linux/eventfd.h> | ||
18 | |||
19 | struct eventfd_ctx { | ||
20 | spinlock_t lock; | ||
21 | wait_queue_head_t wqh; | ||
22 | /* | ||
23 | * Every time that a write(2) is performed on an eventfd, the | ||
24 | * value of the __u64 being written is added to "count" and a | ||
25 | * wakeup is performed on "wqh". A read(2) will return the "count" | ||
26 | * value to userspace, and will reset "count" to zero. The kernel | ||
27 | * size eventfd_signal() also, adds to the "count" counter and | ||
28 | * issue a wakeup. | ||
29 | */ | ||
30 | __u64 count; | ||
31 | }; | ||
32 | |||
33 | /* | ||
34 | * Adds "n" to the eventfd counter "count". Returns "n" in case of | ||
35 | * success, or a value lower then "n" in case of coutner overflow. | ||
36 | * This function is supposed to be called by the kernel in paths | ||
37 | * that do not allow sleeping. In this function we allow the counter | ||
38 | * to reach the ULLONG_MAX value, and we signal this as overflow | ||
39 | * condition by returining a POLLERR to poll(2). | ||
40 | */ | ||
41 | int eventfd_signal(struct file *file, int n) | ||
42 | { | ||
43 | struct eventfd_ctx *ctx = file->private_data; | ||
44 | unsigned long flags; | ||
45 | |||
46 | if (n < 0) | ||
47 | return -EINVAL; | ||
48 | spin_lock_irqsave(&ctx->lock, flags); | ||
49 | if (ULLONG_MAX - ctx->count < n) | ||
50 | n = (int) (ULLONG_MAX - ctx->count); | ||
51 | ctx->count += n; | ||
52 | if (waitqueue_active(&ctx->wqh)) | ||
53 | wake_up_locked(&ctx->wqh); | ||
54 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
55 | |||
56 | return n; | ||
57 | } | ||
58 | |||
59 | static int eventfd_release(struct inode *inode, struct file *file) | ||
60 | { | ||
61 | kfree(file->private_data); | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | static unsigned int eventfd_poll(struct file *file, poll_table *wait) | ||
66 | { | ||
67 | struct eventfd_ctx *ctx = file->private_data; | ||
68 | unsigned int events = 0; | ||
69 | unsigned long flags; | ||
70 | |||
71 | poll_wait(file, &ctx->wqh, wait); | ||
72 | |||
73 | spin_lock_irqsave(&ctx->lock, flags); | ||
74 | if (ctx->count > 0) | ||
75 | events |= POLLIN; | ||
76 | if (ctx->count == ULLONG_MAX) | ||
77 | events |= POLLERR; | ||
78 | if (ULLONG_MAX - 1 > ctx->count) | ||
79 | events |= POLLOUT; | ||
80 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
81 | |||
82 | return events; | ||
83 | } | ||
84 | |||
85 | static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, | ||
86 | loff_t *ppos) | ||
87 | { | ||
88 | struct eventfd_ctx *ctx = file->private_data; | ||
89 | ssize_t res; | ||
90 | __u64 ucnt; | ||
91 | DECLARE_WAITQUEUE(wait, current); | ||
92 | |||
93 | if (count < sizeof(ucnt)) | ||
94 | return -EINVAL; | ||
95 | spin_lock_irq(&ctx->lock); | ||
96 | res = -EAGAIN; | ||
97 | ucnt = ctx->count; | ||
98 | if (ucnt > 0) | ||
99 | res = sizeof(ucnt); | ||
100 | else if (!(file->f_flags & O_NONBLOCK)) { | ||
101 | __add_wait_queue(&ctx->wqh, &wait); | ||
102 | for (res = 0;;) { | ||
103 | set_current_state(TASK_INTERRUPTIBLE); | ||
104 | if (ctx->count > 0) { | ||
105 | ucnt = ctx->count; | ||
106 | res = sizeof(ucnt); | ||
107 | break; | ||
108 | } | ||
109 | if (signal_pending(current)) { | ||
110 | res = -ERESTARTSYS; | ||
111 | break; | ||
112 | } | ||
113 | spin_unlock_irq(&ctx->lock); | ||
114 | schedule(); | ||
115 | spin_lock_irq(&ctx->lock); | ||
116 | } | ||
117 | __remove_wait_queue(&ctx->wqh, &wait); | ||
118 | __set_current_state(TASK_RUNNING); | ||
119 | } | ||
120 | if (res > 0) { | ||
121 | ctx->count = 0; | ||
122 | if (waitqueue_active(&ctx->wqh)) | ||
123 | wake_up_locked(&ctx->wqh); | ||
124 | } | ||
125 | spin_unlock_irq(&ctx->lock); | ||
126 | if (res > 0 && put_user(ucnt, (__u64 __user *) buf)) | ||
127 | return -EFAULT; | ||
128 | |||
129 | return res; | ||
130 | } | ||
131 | |||
132 | static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, | ||
133 | loff_t *ppos) | ||
134 | { | ||
135 | struct eventfd_ctx *ctx = file->private_data; | ||
136 | ssize_t res; | ||
137 | __u64 ucnt; | ||
138 | DECLARE_WAITQUEUE(wait, current); | ||
139 | |||
140 | if (count < sizeof(ucnt)) | ||
141 | return -EINVAL; | ||
142 | if (copy_from_user(&ucnt, buf, sizeof(ucnt))) | ||
143 | return -EFAULT; | ||
144 | if (ucnt == ULLONG_MAX) | ||
145 | return -EINVAL; | ||
146 | spin_lock_irq(&ctx->lock); | ||
147 | res = -EAGAIN; | ||
148 | if (ULLONG_MAX - ctx->count > ucnt) | ||
149 | res = sizeof(ucnt); | ||
150 | else if (!(file->f_flags & O_NONBLOCK)) { | ||
151 | __add_wait_queue(&ctx->wqh, &wait); | ||
152 | for (res = 0;;) { | ||
153 | set_current_state(TASK_INTERRUPTIBLE); | ||
154 | if (ULLONG_MAX - ctx->count > ucnt) { | ||
155 | res = sizeof(ucnt); | ||
156 | break; | ||
157 | } | ||
158 | if (signal_pending(current)) { | ||
159 | res = -ERESTARTSYS; | ||
160 | break; | ||
161 | } | ||
162 | spin_unlock_irq(&ctx->lock); | ||
163 | schedule(); | ||
164 | spin_lock_irq(&ctx->lock); | ||
165 | } | ||
166 | __remove_wait_queue(&ctx->wqh, &wait); | ||
167 | __set_current_state(TASK_RUNNING); | ||
168 | } | ||
169 | if (res > 0) { | ||
170 | ctx->count += ucnt; | ||
171 | if (waitqueue_active(&ctx->wqh)) | ||
172 | wake_up_locked(&ctx->wqh); | ||
173 | } | ||
174 | spin_unlock_irq(&ctx->lock); | ||
175 | |||
176 | return res; | ||
177 | } | ||
178 | |||
179 | static const struct file_operations eventfd_fops = { | ||
180 | .release = eventfd_release, | ||
181 | .poll = eventfd_poll, | ||
182 | .read = eventfd_read, | ||
183 | .write = eventfd_write, | ||
184 | }; | ||
185 | |||
186 | struct file *eventfd_fget(int fd) | ||
187 | { | ||
188 | struct file *file; | ||
189 | |||
190 | file = fget(fd); | ||
191 | if (!file) | ||
192 | return ERR_PTR(-EBADF); | ||
193 | if (file->f_op != &eventfd_fops) { | ||
194 | fput(file); | ||
195 | return ERR_PTR(-EINVAL); | ||
196 | } | ||
197 | |||
198 | return file; | ||
199 | } | ||
200 | |||
201 | asmlinkage long sys_eventfd(unsigned int count) | ||
202 | { | ||
203 | int error, fd; | ||
204 | struct eventfd_ctx *ctx; | ||
205 | struct file *file; | ||
206 | struct inode *inode; | ||
207 | |||
208 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | ||
209 | if (!ctx) | ||
210 | return -ENOMEM; | ||
211 | |||
212 | init_waitqueue_head(&ctx->wqh); | ||
213 | spin_lock_init(&ctx->lock); | ||
214 | ctx->count = count; | ||
215 | |||
216 | /* | ||
217 | * When we call this, the initialization must be complete, since | ||
218 | * anon_inode_getfd() will install the fd. | ||
219 | */ | ||
220 | error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]", | ||
221 | &eventfd_fops, ctx); | ||
222 | if (!error) | ||
223 | return fd; | ||
224 | |||
225 | kfree(ctx); | ||
226 | return error; | ||
227 | } | ||
228 | |||
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index b5c7ca584939..1aad34ea61a4 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -11,7 +11,6 @@ | |||
11 | * | 11 | * |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/init.h> | 14 | #include <linux/init.h> |
16 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
17 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
@@ -34,6 +33,7 @@ | |||
34 | #include <linux/mount.h> | 33 | #include <linux/mount.h> |
35 | #include <linux/bitops.h> | 34 | #include <linux/bitops.h> |
36 | #include <linux/mutex.h> | 35 | #include <linux/mutex.h> |
36 | #include <linux/anon_inodes.h> | ||
37 | #include <asm/uaccess.h> | 37 | #include <asm/uaccess.h> |
38 | #include <asm/system.h> | 38 | #include <asm/system.h> |
39 | #include <asm/io.h> | 39 | #include <asm/io.h> |
@@ -41,7 +41,6 @@ | |||
41 | #include <asm/atomic.h> | 41 | #include <asm/atomic.h> |
42 | #include <asm/semaphore.h> | 42 | #include <asm/semaphore.h> |
43 | 43 | ||
44 | |||
45 | /* | 44 | /* |
46 | * LOCKING: | 45 | * LOCKING: |
47 | * There are three level of locking required by epoll : | 46 | * There are three level of locking required by epoll : |
@@ -74,9 +73,6 @@ | |||
74 | * a greater scalability. | 73 | * a greater scalability. |
75 | */ | 74 | */ |
76 | 75 | ||
77 | |||
78 | #define EVENTPOLLFS_MAGIC 0x03111965 /* My birthday should work for this :) */ | ||
79 | |||
80 | #define DEBUG_EPOLL 0 | 76 | #define DEBUG_EPOLL 0 |
81 | 77 | ||
82 | #if DEBUG_EPOLL > 0 | 78 | #if DEBUG_EPOLL > 0 |
@@ -106,7 +102,6 @@ | |||
106 | 102 | ||
107 | #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) | 103 | #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) |
108 | 104 | ||
109 | |||
110 | struct epoll_filefd { | 105 | struct epoll_filefd { |
111 | struct file *file; | 106 | struct file *file; |
112 | int fd; | 107 | int fd; |
@@ -224,43 +219,6 @@ struct ep_pqueue { | |||
224 | struct epitem *epi; | 219 | struct epitem *epi; |
225 | }; | 220 | }; |
226 | 221 | ||
227 | |||
228 | |||
229 | static void ep_poll_safewake_init(struct poll_safewake *psw); | ||
230 | static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq); | ||
231 | static int ep_getfd(int *efd, struct inode **einode, struct file **efile, | ||
232 | struct eventpoll *ep); | ||
233 | static int ep_alloc(struct eventpoll **pep); | ||
234 | static void ep_free(struct eventpoll *ep); | ||
235 | static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd); | ||
236 | static void ep_use_epitem(struct epitem *epi); | ||
237 | static void ep_release_epitem(struct epitem *epi); | ||
238 | static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, | ||
239 | poll_table *pt); | ||
240 | static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi); | ||
241 | static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | ||
242 | struct file *tfile, int fd); | ||
243 | static int ep_modify(struct eventpoll *ep, struct epitem *epi, | ||
244 | struct epoll_event *event); | ||
245 | static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi); | ||
246 | static int ep_unlink(struct eventpoll *ep, struct epitem *epi); | ||
247 | static int ep_remove(struct eventpoll *ep, struct epitem *epi); | ||
248 | static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key); | ||
249 | static int ep_eventpoll_close(struct inode *inode, struct file *file); | ||
250 | static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait); | ||
251 | static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, | ||
252 | struct epoll_event __user *events, int maxevents); | ||
253 | static int ep_events_transfer(struct eventpoll *ep, | ||
254 | struct epoll_event __user *events, | ||
255 | int maxevents); | ||
256 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | ||
257 | int maxevents, long timeout); | ||
258 | static int eventpollfs_delete_dentry(struct dentry *dentry); | ||
259 | static struct inode *ep_eventpoll_inode(void); | ||
260 | static int eventpollfs_get_sb(struct file_system_type *fs_type, | ||
261 | int flags, const char *dev_name, | ||
262 | void *data, struct vfsmount *mnt); | ||
263 | |||
264 | /* | 222 | /* |
265 | * This semaphore is used to serialize ep_free() and eventpoll_release_file(). | 223 | * This semaphore is used to serialize ep_free() and eventpoll_release_file(). |
266 | */ | 224 | */ |
@@ -275,37 +233,6 @@ static struct kmem_cache *epi_cache __read_mostly; | |||
275 | /* Slab cache used to allocate "struct eppoll_entry" */ | 233 | /* Slab cache used to allocate "struct eppoll_entry" */ |
276 | static struct kmem_cache *pwq_cache __read_mostly; | 234 | static struct kmem_cache *pwq_cache __read_mostly; |
277 | 235 | ||
278 | /* Virtual fs used to allocate inodes for eventpoll files */ | ||
279 | static struct vfsmount *eventpoll_mnt __read_mostly; | ||
280 | |||
281 | /* File callbacks that implement the eventpoll file behaviour */ | ||
282 | static const struct file_operations eventpoll_fops = { | ||
283 | .release = ep_eventpoll_close, | ||
284 | .poll = ep_eventpoll_poll | ||
285 | }; | ||
286 | |||
287 | /* | ||
288 | * This is used to register the virtual file system from where | ||
289 | * eventpoll inodes are allocated. | ||
290 | */ | ||
291 | static struct file_system_type eventpoll_fs_type = { | ||
292 | .name = "eventpollfs", | ||
293 | .get_sb = eventpollfs_get_sb, | ||
294 | .kill_sb = kill_anon_super, | ||
295 | }; | ||
296 | |||
297 | /* Very basic directory entry operations for the eventpoll virtual file system */ | ||
298 | static struct dentry_operations eventpollfs_dentry_operations = { | ||
299 | .d_delete = eventpollfs_delete_dentry, | ||
300 | }; | ||
301 | |||
302 | |||
303 | |||
304 | /* Fast test to see if the file is an evenpoll file */ | ||
305 | static inline int is_file_epoll(struct file *f) | ||
306 | { | ||
307 | return f->f_op == &eventpoll_fops; | ||
308 | } | ||
309 | 236 | ||
310 | /* Setup the structure that is used as key for the rb-tree */ | 237 | /* Setup the structure that is used as key for the rb-tree */ |
311 | static inline void ep_set_ffd(struct epoll_filefd *ffd, | 238 | static inline void ep_set_ffd(struct epoll_filefd *ffd, |
@@ -374,7 +301,6 @@ static void ep_poll_safewake_init(struct poll_safewake *psw) | |||
374 | spin_lock_init(&psw->lock); | 301 | spin_lock_init(&psw->lock); |
375 | } | 302 | } |
376 | 303 | ||
377 | |||
378 | /* | 304 | /* |
379 | * Perform a safe wake up of the poll wait list. The problem is that | 305 | * Perform a safe wake up of the poll wait list. The problem is that |
380 | * with the new callback'd wake up system, it is possible that the | 306 | * with the new callback'd wake up system, it is possible that the |
@@ -429,399 +355,144 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) | |||
429 | spin_unlock_irqrestore(&psw->lock, flags); | 355 | spin_unlock_irqrestore(&psw->lock, flags); |
430 | } | 356 | } |
431 | 357 | ||
432 | |||
433 | /* | 358 | /* |
434 | * This is called from eventpoll_release() to unlink files from the eventpoll | 359 | * This function unregister poll callbacks from the associated file descriptor. |
435 | * interface. We need to have this facility to cleanup correctly files that are | 360 | * Since this must be called without holding "ep->lock" the atomic exchange trick |
436 | * closed without being removed from the eventpoll interface. | 361 | * will protect us from multiple unregister. |
437 | */ | 362 | */ |
438 | void eventpoll_release_file(struct file *file) | 363 | static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) |
439 | { | 364 | { |
440 | struct list_head *lsthead = &file->f_ep_links; | 365 | int nwait; |
441 | struct eventpoll *ep; | 366 | struct list_head *lsthead = &epi->pwqlist; |
442 | struct epitem *epi; | 367 | struct eppoll_entry *pwq; |
443 | 368 | ||
444 | /* | 369 | /* This is called without locks, so we need the atomic exchange */ |
445 | * We don't want to get "file->f_ep_lock" because it is not | 370 | nwait = xchg(&epi->nwait, 0); |
446 | * necessary. It is not necessary because we're in the "struct file" | ||
447 | * cleanup path, and this means that noone is using this file anymore. | ||
448 | * The only hit might come from ep_free() but by holding the semaphore | ||
449 | * will correctly serialize the operation. We do need to acquire | ||
450 | * "ep->sem" after "epmutex" because ep_remove() requires it when called | ||
451 | * from anywhere but ep_free(). | ||
452 | */ | ||
453 | mutex_lock(&epmutex); | ||
454 | 371 | ||
455 | while (!list_empty(lsthead)) { | 372 | if (nwait) { |
456 | epi = list_first_entry(lsthead, struct epitem, fllink); | 373 | while (!list_empty(lsthead)) { |
374 | pwq = list_first_entry(lsthead, struct eppoll_entry, llink); | ||
457 | 375 | ||
458 | ep = epi->ep; | 376 | list_del_init(&pwq->llink); |
459 | list_del_init(&epi->fllink); | 377 | remove_wait_queue(pwq->whead, &pwq->wait); |
460 | down_write(&ep->sem); | 378 | kmem_cache_free(pwq_cache, pwq); |
461 | ep_remove(ep, epi); | 379 | } |
462 | up_write(&ep->sem); | ||
463 | } | 380 | } |
464 | |||
465 | mutex_unlock(&epmutex); | ||
466 | } | 381 | } |
467 | 382 | ||
468 | |||
469 | /* | 383 | /* |
470 | * It opens an eventpoll file descriptor by suggesting a storage of "size" | 384 | * Unlink the "struct epitem" from all places it might have been hooked up. |
471 | * file descriptors. The size parameter is just an hint about how to size | 385 | * This function must be called with write IRQ lock on "ep->lock". |
472 | * data structures. It won't prevent the user to store more than "size" | ||
473 | * file descriptors inside the epoll interface. It is the kernel part of | ||
474 | * the userspace epoll_create(2). | ||
475 | */ | 386 | */ |
476 | asmlinkage long sys_epoll_create(int size) | 387 | static int ep_unlink(struct eventpoll *ep, struct epitem *epi) |
477 | { | 388 | { |
478 | int error, fd = -1; | 389 | int error; |
479 | struct eventpoll *ep; | ||
480 | struct inode *inode; | ||
481 | struct file *file; | ||
482 | |||
483 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", | ||
484 | current, size)); | ||
485 | |||
486 | /* | ||
487 | * Sanity check on the size parameter, and create the internal data | ||
488 | * structure ( "struct eventpoll" ). | ||
489 | */ | ||
490 | error = -EINVAL; | ||
491 | if (size <= 0 || (error = ep_alloc(&ep)) != 0) | ||
492 | goto eexit_1; | ||
493 | 390 | ||
494 | /* | 391 | /* |
495 | * Creates all the items needed to setup an eventpoll file. That is, | 392 | * It can happen that this one is called for an item already unlinked. |
496 | * a file structure, and inode and a free file descriptor. | 393 | * The check protect us from doing a double unlink ( crash ). |
497 | */ | 394 | */ |
498 | error = ep_getfd(&fd, &inode, &file, ep); | 395 | error = -ENOENT; |
499 | if (error) | 396 | if (!ep_rb_linked(&epi->rbn)) |
500 | goto eexit_2; | 397 | goto error_return; |
501 | |||
502 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", | ||
503 | current, size, fd)); | ||
504 | |||
505 | return fd; | ||
506 | |||
507 | eexit_2: | ||
508 | ep_free(ep); | ||
509 | kfree(ep); | ||
510 | eexit_1: | ||
511 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", | ||
512 | current, size, error)); | ||
513 | return error; | ||
514 | } | ||
515 | |||
516 | |||
517 | /* | ||
518 | * The following function implements the controller interface for | ||
519 | * the eventpoll file that enables the insertion/removal/change of | ||
520 | * file descriptors inside the interest set. It represents | ||
521 | * the kernel part of the user space epoll_ctl(2). | ||
522 | */ | ||
523 | asmlinkage long | ||
524 | sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event) | ||
525 | { | ||
526 | int error; | ||
527 | struct file *file, *tfile; | ||
528 | struct eventpoll *ep; | ||
529 | struct epitem *epi; | ||
530 | struct epoll_event epds; | ||
531 | |||
532 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n", | ||
533 | current, epfd, op, fd, event)); | ||
534 | |||
535 | error = -EFAULT; | ||
536 | if (ep_op_has_event(op) && | ||
537 | copy_from_user(&epds, event, sizeof(struct epoll_event))) | ||
538 | goto eexit_1; | ||
539 | |||
540 | /* Get the "struct file *" for the eventpoll file */ | ||
541 | error = -EBADF; | ||
542 | file = fget(epfd); | ||
543 | if (!file) | ||
544 | goto eexit_1; | ||
545 | |||
546 | /* Get the "struct file *" for the target file */ | ||
547 | tfile = fget(fd); | ||
548 | if (!tfile) | ||
549 | goto eexit_2; | ||
550 | |||
551 | /* The target file descriptor must support poll */ | ||
552 | error = -EPERM; | ||
553 | if (!tfile->f_op || !tfile->f_op->poll) | ||
554 | goto eexit_3; | ||
555 | 398 | ||
556 | /* | 399 | /* |
557 | * We have to check that the file structure underneath the file descriptor | 400 | * Clear the event mask for the unlinked item. This will avoid item |
558 | * the user passed to us _is_ an eventpoll file. And also we do not permit | 401 | * notifications to be sent after the unlink operation from inside |
559 | * adding an epoll file descriptor inside itself. | 402 | * the kernel->userspace event transfer loop. |
560 | */ | 403 | */ |
561 | error = -EINVAL; | 404 | epi->event.events = 0; |
562 | if (file == tfile || !is_file_epoll(file)) | ||
563 | goto eexit_3; | ||
564 | 405 | ||
565 | /* | 406 | /* |
566 | * At this point it is safe to assume that the "private_data" contains | 407 | * At this point is safe to do the job, unlink the item from our rb-tree. |
567 | * our own data structure. | 408 | * This operation togheter with the above check closes the door to |
409 | * double unlinks. | ||
568 | */ | 410 | */ |
569 | ep = file->private_data; | 411 | ep_rb_erase(&epi->rbn, &ep->rbr); |
570 | |||
571 | down_write(&ep->sem); | ||
572 | |||
573 | /* Try to lookup the file inside our RB tree */ | ||
574 | epi = ep_find(ep, tfile, fd); | ||
575 | |||
576 | error = -EINVAL; | ||
577 | switch (op) { | ||
578 | case EPOLL_CTL_ADD: | ||
579 | if (!epi) { | ||
580 | epds.events |= POLLERR | POLLHUP; | ||
581 | |||
582 | error = ep_insert(ep, &epds, tfile, fd); | ||
583 | } else | ||
584 | error = -EEXIST; | ||
585 | break; | ||
586 | case EPOLL_CTL_DEL: | ||
587 | if (epi) | ||
588 | error = ep_remove(ep, epi); | ||
589 | else | ||
590 | error = -ENOENT; | ||
591 | break; | ||
592 | case EPOLL_CTL_MOD: | ||
593 | if (epi) { | ||
594 | epds.events |= POLLERR | POLLHUP; | ||
595 | error = ep_modify(ep, epi, &epds); | ||
596 | } else | ||
597 | error = -ENOENT; | ||
598 | break; | ||
599 | } | ||
600 | 412 | ||
601 | /* | 413 | /* |
602 | * The function ep_find() increments the usage count of the structure | 414 | * If the item we are going to remove is inside the ready file descriptors |
603 | * so, if this is not NULL, we need to release it. | 415 | * we want to remove it from this list to avoid stale events. |
604 | */ | 416 | */ |
605 | if (epi) | 417 | if (ep_is_linked(&epi->rdllink)) |
606 | ep_release_epitem(epi); | 418 | list_del_init(&epi->rdllink); |
607 | 419 | ||
608 | up_write(&ep->sem); | 420 | error = 0; |
421 | error_return: | ||
609 | 422 | ||
610 | eexit_3: | 423 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", |
611 | fput(tfile); | 424 | current, ep, epi->ffd.file, error)); |
612 | eexit_2: | ||
613 | fput(file); | ||
614 | eexit_1: | ||
615 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n", | ||
616 | current, epfd, op, fd, event, error)); | ||
617 | 425 | ||
618 | return error; | 426 | return error; |
619 | } | 427 | } |
620 | 428 | ||
621 | |||
622 | /* | 429 | /* |
623 | * Implement the event wait interface for the eventpoll file. It is the kernel | 430 | * Increment the usage count of the "struct epitem" making it sure |
624 | * part of the user space epoll_wait(2). | 431 | * that the user will have a valid pointer to reference. |
625 | */ | 432 | */ |
626 | asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, | 433 | static void ep_use_epitem(struct epitem *epi) |
627 | int maxevents, int timeout) | ||
628 | { | 434 | { |
629 | int error; | 435 | atomic_inc(&epi->usecnt); |
630 | struct file *file; | ||
631 | struct eventpoll *ep; | ||
632 | |||
633 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n", | ||
634 | current, epfd, events, maxevents, timeout)); | ||
635 | |||
636 | /* The maximum number of event must be greater than zero */ | ||
637 | if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) | ||
638 | return -EINVAL; | ||
639 | |||
640 | /* Verify that the area passed by the user is writeable */ | ||
641 | if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { | ||
642 | error = -EFAULT; | ||
643 | goto eexit_1; | ||
644 | } | ||
645 | |||
646 | /* Get the "struct file *" for the eventpoll file */ | ||
647 | error = -EBADF; | ||
648 | file = fget(epfd); | ||
649 | if (!file) | ||
650 | goto eexit_1; | ||
651 | |||
652 | /* | ||
653 | * We have to check that the file structure underneath the fd | ||
654 | * the user passed to us _is_ an eventpoll file. | ||
655 | */ | ||
656 | error = -EINVAL; | ||
657 | if (!is_file_epoll(file)) | ||
658 | goto eexit_2; | ||
659 | |||
660 | /* | ||
661 | * At this point it is safe to assume that the "private_data" contains | ||
662 | * our own data structure. | ||
663 | */ | ||
664 | ep = file->private_data; | ||
665 | |||
666 | /* Time to fish for events ... */ | ||
667 | error = ep_poll(ep, events, maxevents, timeout); | ||
668 | |||
669 | eexit_2: | ||
670 | fput(file); | ||
671 | eexit_1: | ||
672 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n", | ||
673 | current, epfd, events, maxevents, timeout, error)); | ||
674 | |||
675 | return error; | ||
676 | } | 436 | } |
677 | 437 | ||
678 | |||
679 | #ifdef TIF_RESTORE_SIGMASK | ||
680 | |||
681 | /* | 438 | /* |
682 | * Implement the event wait interface for the eventpoll file. It is the kernel | 439 | * Decrement ( release ) the usage count by signaling that the user |
683 | * part of the user space epoll_pwait(2). | 440 | * has finished using the structure. It might lead to freeing the |
441 | * structure itself if the count goes to zero. | ||
684 | */ | 442 | */ |
685 | asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, | 443 | static void ep_release_epitem(struct epitem *epi) |
686 | int maxevents, int timeout, const sigset_t __user *sigmask, | ||
687 | size_t sigsetsize) | ||
688 | { | 444 | { |
689 | int error; | 445 | if (atomic_dec_and_test(&epi->usecnt)) |
690 | sigset_t ksigmask, sigsaved; | 446 | kmem_cache_free(epi_cache, epi); |
691 | |||
692 | /* | ||
693 | * If the caller wants a certain signal mask to be set during the wait, | ||
694 | * we apply it here. | ||
695 | */ | ||
696 | if (sigmask) { | ||
697 | if (sigsetsize != sizeof(sigset_t)) | ||
698 | return -EINVAL; | ||
699 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) | ||
700 | return -EFAULT; | ||
701 | sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | ||
702 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | ||
703 | } | ||
704 | |||
705 | error = sys_epoll_wait(epfd, events, maxevents, timeout); | ||
706 | |||
707 | /* | ||
708 | * If we changed the signal mask, we need to restore the original one. | ||
709 | * In case we've got a signal while waiting, we do not restore the | ||
710 | * signal mask yet, and we allow do_signal() to deliver the signal on | ||
711 | * the way back to userspace, before the signal mask is restored. | ||
712 | */ | ||
713 | if (sigmask) { | ||
714 | if (error == -EINTR) { | ||
715 | memcpy(¤t->saved_sigmask, &sigsaved, | ||
716 | sizeof(sigsaved)); | ||
717 | set_thread_flag(TIF_RESTORE_SIGMASK); | ||
718 | } else | ||
719 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | ||
720 | } | ||
721 | |||
722 | return error; | ||
723 | } | 447 | } |
724 | 448 | ||
725 | #endif /* #ifdef TIF_RESTORE_SIGMASK */ | ||
726 | |||
727 | |||
728 | /* | 449 | /* |
729 | * Creates the file descriptor to be used by the epoll interface. | 450 | * Removes a "struct epitem" from the eventpoll RB tree and deallocates |
451 | * all the associated resources. | ||
730 | */ | 452 | */ |
731 | static int ep_getfd(int *efd, struct inode **einode, struct file **efile, | 453 | static int ep_remove(struct eventpoll *ep, struct epitem *epi) |
732 | struct eventpoll *ep) | ||
733 | { | 454 | { |
734 | struct qstr this; | 455 | int error; |
735 | char name[32]; | 456 | unsigned long flags; |
736 | struct dentry *dentry; | 457 | struct file *file = epi->ffd.file; |
737 | struct inode *inode; | ||
738 | struct file *file; | ||
739 | int error, fd; | ||
740 | |||
741 | /* Get an ready to use file */ | ||
742 | error = -ENFILE; | ||
743 | file = get_empty_filp(); | ||
744 | if (!file) | ||
745 | goto eexit_1; | ||
746 | |||
747 | /* Allocates an inode from the eventpoll file system */ | ||
748 | inode = ep_eventpoll_inode(); | ||
749 | if (IS_ERR(inode)) { | ||
750 | error = PTR_ERR(inode); | ||
751 | goto eexit_2; | ||
752 | } | ||
753 | |||
754 | /* Allocates a free descriptor to plug the file onto */ | ||
755 | error = get_unused_fd(); | ||
756 | if (error < 0) | ||
757 | goto eexit_3; | ||
758 | fd = error; | ||
759 | 458 | ||
760 | /* | 459 | /* |
761 | * Link the inode to a directory entry by creating a unique name | 460 | * Removes poll wait queue hooks. We _have_ to do this without holding |
762 | * using the inode number. | 461 | * the "ep->lock" otherwise a deadlock might occur. This because of the |
462 | * sequence of the lock acquisition. Here we do "ep->lock" then the wait | ||
463 | * queue head lock when unregistering the wait queue. The wakeup callback | ||
464 | * will run by holding the wait queue head lock and will call our callback | ||
465 | * that will try to get "ep->lock". | ||
763 | */ | 466 | */ |
764 | error = -ENOMEM; | 467 | ep_unregister_pollwait(ep, epi); |
765 | sprintf(name, "[%lu]", inode->i_ino); | ||
766 | this.name = name; | ||
767 | this.len = strlen(name); | ||
768 | this.hash = inode->i_ino; | ||
769 | dentry = d_alloc(eventpoll_mnt->mnt_sb->s_root, &this); | ||
770 | if (!dentry) | ||
771 | goto eexit_4; | ||
772 | dentry->d_op = &eventpollfs_dentry_operations; | ||
773 | d_add(dentry, inode); | ||
774 | file->f_path.mnt = mntget(eventpoll_mnt); | ||
775 | file->f_path.dentry = dentry; | ||
776 | file->f_mapping = inode->i_mapping; | ||
777 | |||
778 | file->f_pos = 0; | ||
779 | file->f_flags = O_RDONLY; | ||
780 | file->f_op = &eventpoll_fops; | ||
781 | file->f_mode = FMODE_READ; | ||
782 | file->f_version = 0; | ||
783 | file->private_data = ep; | ||
784 | |||
785 | /* Install the new setup file into the allocated fd. */ | ||
786 | fd_install(fd, file); | ||
787 | |||
788 | *efd = fd; | ||
789 | *einode = inode; | ||
790 | *efile = file; | ||
791 | return 0; | ||
792 | 468 | ||
793 | eexit_4: | 469 | /* Remove the current item from the list of epoll hooks */ |
794 | put_unused_fd(fd); | 470 | spin_lock(&file->f_ep_lock); |
795 | eexit_3: | 471 | if (ep_is_linked(&epi->fllink)) |
796 | iput(inode); | 472 | list_del_init(&epi->fllink); |
797 | eexit_2: | 473 | spin_unlock(&file->f_ep_lock); |
798 | put_filp(file); | ||
799 | eexit_1: | ||
800 | return error; | ||
801 | } | ||
802 | 474 | ||
475 | /* We need to acquire the write IRQ lock before calling ep_unlink() */ | ||
476 | write_lock_irqsave(&ep->lock, flags); | ||
803 | 477 | ||
804 | static int ep_alloc(struct eventpoll **pep) | 478 | /* Really unlink the item from the RB tree */ |
805 | { | 479 | error = ep_unlink(ep, epi); |
806 | struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL); | ||
807 | 480 | ||
808 | if (!ep) | 481 | write_unlock_irqrestore(&ep->lock, flags); |
809 | return -ENOMEM; | ||
810 | 482 | ||
811 | rwlock_init(&ep->lock); | 483 | if (error) |
812 | init_rwsem(&ep->sem); | 484 | goto error_return; |
813 | init_waitqueue_head(&ep->wq); | ||
814 | init_waitqueue_head(&ep->poll_wait); | ||
815 | INIT_LIST_HEAD(&ep->rdllist); | ||
816 | ep->rbr = RB_ROOT; | ||
817 | 485 | ||
818 | *pep = ep; | 486 | /* At this point it is safe to free the eventpoll item */ |
487 | ep_release_epitem(epi); | ||
819 | 488 | ||
820 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n", | 489 | error = 0; |
821 | current, ep)); | 490 | error_return: |
822 | return 0; | 491 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p) = %d\n", |
823 | } | 492 | current, ep, file, error)); |
824 | 493 | ||
494 | return error; | ||
495 | } | ||
825 | 496 | ||
826 | static void ep_free(struct eventpoll *ep) | 497 | static void ep_free(struct eventpoll *ep) |
827 | { | 498 | { |
@@ -865,6 +536,104 @@ static void ep_free(struct eventpoll *ep) | |||
865 | mutex_unlock(&epmutex); | 536 | mutex_unlock(&epmutex); |
866 | } | 537 | } |
867 | 538 | ||
539 | static int ep_eventpoll_release(struct inode *inode, struct file *file) | ||
540 | { | ||
541 | struct eventpoll *ep = file->private_data; | ||
542 | |||
543 | if (ep) { | ||
544 | ep_free(ep); | ||
545 | kfree(ep); | ||
546 | } | ||
547 | |||
548 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); | ||
549 | return 0; | ||
550 | } | ||
551 | |||
552 | static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) | ||
553 | { | ||
554 | unsigned int pollflags = 0; | ||
555 | unsigned long flags; | ||
556 | struct eventpoll *ep = file->private_data; | ||
557 | |||
558 | /* Insert inside our poll wait queue */ | ||
559 | poll_wait(file, &ep->poll_wait, wait); | ||
560 | |||
561 | /* Check our condition */ | ||
562 | read_lock_irqsave(&ep->lock, flags); | ||
563 | if (!list_empty(&ep->rdllist)) | ||
564 | pollflags = POLLIN | POLLRDNORM; | ||
565 | read_unlock_irqrestore(&ep->lock, flags); | ||
566 | |||
567 | return pollflags; | ||
568 | } | ||
569 | |||
570 | /* File callbacks that implement the eventpoll file behaviour */ | ||
571 | static const struct file_operations eventpoll_fops = { | ||
572 | .release = ep_eventpoll_release, | ||
573 | .poll = ep_eventpoll_poll | ||
574 | }; | ||
575 | |||
576 | /* Fast test to see if the file is an evenpoll file */ | ||
577 | static inline int is_file_epoll(struct file *f) | ||
578 | { | ||
579 | return f->f_op == &eventpoll_fops; | ||
580 | } | ||
581 | |||
582 | /* | ||
583 | * This is called from eventpoll_release() to unlink files from the eventpoll | ||
584 | * interface. We need to have this facility to cleanup correctly files that are | ||
585 | * closed without being removed from the eventpoll interface. | ||
586 | */ | ||
587 | void eventpoll_release_file(struct file *file) | ||
588 | { | ||
589 | struct list_head *lsthead = &file->f_ep_links; | ||
590 | struct eventpoll *ep; | ||
591 | struct epitem *epi; | ||
592 | |||
593 | /* | ||
594 | * We don't want to get "file->f_ep_lock" because it is not | ||
595 | * necessary. It is not necessary because we're in the "struct file" | ||
596 | * cleanup path, and this means that noone is using this file anymore. | ||
597 | * The only hit might come from ep_free() but by holding the semaphore | ||
598 | * will correctly serialize the operation. We do need to acquire | ||
599 | * "ep->sem" after "epmutex" because ep_remove() requires it when called | ||
600 | * from anywhere but ep_free(). | ||
601 | */ | ||
602 | mutex_lock(&epmutex); | ||
603 | |||
604 | while (!list_empty(lsthead)) { | ||
605 | epi = list_first_entry(lsthead, struct epitem, fllink); | ||
606 | |||
607 | ep = epi->ep; | ||
608 | list_del_init(&epi->fllink); | ||
609 | down_write(&ep->sem); | ||
610 | ep_remove(ep, epi); | ||
611 | up_write(&ep->sem); | ||
612 | } | ||
613 | |||
614 | mutex_unlock(&epmutex); | ||
615 | } | ||
616 | |||
617 | static int ep_alloc(struct eventpoll **pep) | ||
618 | { | ||
619 | struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL); | ||
620 | |||
621 | if (!ep) | ||
622 | return -ENOMEM; | ||
623 | |||
624 | rwlock_init(&ep->lock); | ||
625 | init_rwsem(&ep->sem); | ||
626 | init_waitqueue_head(&ep->wq); | ||
627 | init_waitqueue_head(&ep->poll_wait); | ||
628 | INIT_LIST_HEAD(&ep->rdllist); | ||
629 | ep->rbr = RB_ROOT; | ||
630 | |||
631 | *pep = ep; | ||
632 | |||
633 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n", | ||
634 | current, ep)); | ||
635 | return 0; | ||
636 | } | ||
868 | 637 | ||
869 | /* | 638 | /* |
870 | * Search the file inside the eventpoll tree. It add usage count to | 639 | * Search the file inside the eventpoll tree. It add usage count to |
@@ -902,30 +671,58 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) | |||
902 | return epir; | 671 | return epir; |
903 | } | 672 | } |
904 | 673 | ||
905 | |||
906 | /* | 674 | /* |
907 | * Increment the usage count of the "struct epitem" making it sure | 675 | * This is the callback that is passed to the wait queue wakeup |
908 | * that the user will have a valid pointer to reference. | 676 | * machanism. It is called by the stored file descriptors when they |
677 | * have events to report. | ||
909 | */ | 678 | */ |
910 | static void ep_use_epitem(struct epitem *epi) | 679 | static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key) |
911 | { | 680 | { |
681 | int pwake = 0; | ||
682 | unsigned long flags; | ||
683 | struct epitem *epi = ep_item_from_wait(wait); | ||
684 | struct eventpoll *ep = epi->ep; | ||
912 | 685 | ||
913 | atomic_inc(&epi->usecnt); | 686 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", |
914 | } | 687 | current, epi->ffd.file, epi, ep)); |
915 | 688 | ||
689 | write_lock_irqsave(&ep->lock, flags); | ||
916 | 690 | ||
917 | /* | 691 | /* |
918 | * Decrement ( release ) the usage count by signaling that the user | 692 | * If the event mask does not contain any poll(2) event, we consider the |
919 | * has finished using the structure. It might lead to freeing the | 693 | * descriptor to be disabled. This condition is likely the effect of the |
920 | * structure itself if the count goes to zero. | 694 | * EPOLLONESHOT bit that disables the descriptor when an event is received, |
921 | */ | 695 | * until the next EPOLL_CTL_MOD will be issued. |
922 | static void ep_release_epitem(struct epitem *epi) | 696 | */ |
923 | { | 697 | if (!(epi->event.events & ~EP_PRIVATE_BITS)) |
698 | goto is_disabled; | ||
924 | 699 | ||
925 | if (atomic_dec_and_test(&epi->usecnt)) | 700 | /* If this file is already in the ready list we exit soon */ |
926 | kmem_cache_free(epi_cache, epi); | 701 | if (ep_is_linked(&epi->rdllink)) |
927 | } | 702 | goto is_linked; |
928 | 703 | ||
704 | list_add_tail(&epi->rdllink, &ep->rdllist); | ||
705 | |||
706 | is_linked: | ||
707 | /* | ||
708 | * Wake up ( if active ) both the eventpoll wait list and the ->poll() | ||
709 | * wait list. | ||
710 | */ | ||
711 | if (waitqueue_active(&ep->wq)) | ||
712 | __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | | ||
713 | TASK_INTERRUPTIBLE); | ||
714 | if (waitqueue_active(&ep->poll_wait)) | ||
715 | pwake++; | ||
716 | |||
717 | is_disabled: | ||
718 | write_unlock_irqrestore(&ep->lock, flags); | ||
719 | |||
720 | /* We have to call this outside the lock */ | ||
721 | if (pwake) | ||
722 | ep_poll_safewake(&psw, &ep->poll_wait); | ||
723 | |||
724 | return 1; | ||
725 | } | ||
929 | 726 | ||
930 | /* | 727 | /* |
931 | * This is the callback that is used to add our wait queue to the | 728 | * This is the callback that is used to add our wait queue to the |
@@ -950,7 +747,6 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, | |||
950 | } | 747 | } |
951 | } | 748 | } |
952 | 749 | ||
953 | |||
954 | static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) | 750 | static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) |
955 | { | 751 | { |
956 | int kcmp; | 752 | int kcmp; |
@@ -970,7 +766,6 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) | |||
970 | rb_insert_color(&epi->rbn, &ep->rbr); | 766 | rb_insert_color(&epi->rbn, &ep->rbr); |
971 | } | 767 | } |
972 | 768 | ||
973 | |||
974 | static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | 769 | static int ep_insert(struct eventpoll *ep, struct epoll_event *event, |
975 | struct file *tfile, int fd) | 770 | struct file *tfile, int fd) |
976 | { | 771 | { |
@@ -981,7 +776,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
981 | 776 | ||
982 | error = -ENOMEM; | 777 | error = -ENOMEM; |
983 | if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) | 778 | if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) |
984 | goto eexit_1; | 779 | goto error_return; |
985 | 780 | ||
986 | /* Item initialization follow here ... */ | 781 | /* Item initialization follow here ... */ |
987 | ep_rb_initnode(&epi->rbn); | 782 | ep_rb_initnode(&epi->rbn); |
@@ -1011,7 +806,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1011 | * high memory pressure. | 806 | * high memory pressure. |
1012 | */ | 807 | */ |
1013 | if (epi->nwait < 0) | 808 | if (epi->nwait < 0) |
1014 | goto eexit_2; | 809 | goto error_unregister; |
1015 | 810 | ||
1016 | /* Add the current item to the list of active epoll hook for this file */ | 811 | /* Add the current item to the list of active epoll hook for this file */ |
1017 | spin_lock(&tfile->f_ep_lock); | 812 | spin_lock(&tfile->f_ep_lock); |
@@ -1046,7 +841,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1046 | 841 | ||
1047 | return 0; | 842 | return 0; |
1048 | 843 | ||
1049 | eexit_2: | 844 | error_unregister: |
1050 | ep_unregister_pollwait(ep, epi); | 845 | ep_unregister_pollwait(ep, epi); |
1051 | 846 | ||
1052 | /* | 847 | /* |
@@ -1059,11 +854,10 @@ eexit_2: | |||
1059 | write_unlock_irqrestore(&ep->lock, flags); | 854 | write_unlock_irqrestore(&ep->lock, flags); |
1060 | 855 | ||
1061 | kmem_cache_free(epi_cache, epi); | 856 | kmem_cache_free(epi_cache, epi); |
1062 | eexit_1: | 857 | error_return: |
1063 | return error; | 858 | return error; |
1064 | } | 859 | } |
1065 | 860 | ||
1066 | |||
1067 | /* | 861 | /* |
1068 | * Modify the interest event mask by dropping an event if the new mask | 862 | * Modify the interest event mask by dropping an event if the new mask |
1069 | * has a match in the current file status. | 863 | * has a match in the current file status. |
@@ -1126,216 +920,6 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1126 | return 0; | 920 | return 0; |
1127 | } | 921 | } |
1128 | 922 | ||
1129 | |||
1130 | /* | ||
1131 | * This function unregister poll callbacks from the associated file descriptor. | ||
1132 | * Since this must be called without holding "ep->lock" the atomic exchange trick | ||
1133 | * will protect us from multiple unregister. | ||
1134 | */ | ||
1135 | static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) | ||
1136 | { | ||
1137 | int nwait; | ||
1138 | struct list_head *lsthead = &epi->pwqlist; | ||
1139 | struct eppoll_entry *pwq; | ||
1140 | |||
1141 | /* This is called without locks, so we need the atomic exchange */ | ||
1142 | nwait = xchg(&epi->nwait, 0); | ||
1143 | |||
1144 | if (nwait) { | ||
1145 | while (!list_empty(lsthead)) { | ||
1146 | pwq = list_first_entry(lsthead, struct eppoll_entry, llink); | ||
1147 | |||
1148 | list_del_init(&pwq->llink); | ||
1149 | remove_wait_queue(pwq->whead, &pwq->wait); | ||
1150 | kmem_cache_free(pwq_cache, pwq); | ||
1151 | } | ||
1152 | } | ||
1153 | } | ||
1154 | |||
1155 | |||
1156 | /* | ||
1157 | * Unlink the "struct epitem" from all places it might have been hooked up. | ||
1158 | * This function must be called with write IRQ lock on "ep->lock". | ||
1159 | */ | ||
1160 | static int ep_unlink(struct eventpoll *ep, struct epitem *epi) | ||
1161 | { | ||
1162 | int error; | ||
1163 | |||
1164 | /* | ||
1165 | * It can happen that this one is called for an item already unlinked. | ||
1166 | * The check protect us from doing a double unlink ( crash ). | ||
1167 | */ | ||
1168 | error = -ENOENT; | ||
1169 | if (!ep_rb_linked(&epi->rbn)) | ||
1170 | goto eexit_1; | ||
1171 | |||
1172 | /* | ||
1173 | * Clear the event mask for the unlinked item. This will avoid item | ||
1174 | * notifications to be sent after the unlink operation from inside | ||
1175 | * the kernel->userspace event transfer loop. | ||
1176 | */ | ||
1177 | epi->event.events = 0; | ||
1178 | |||
1179 | /* | ||
1180 | * At this point is safe to do the job, unlink the item from our rb-tree. | ||
1181 | * This operation togheter with the above check closes the door to | ||
1182 | * double unlinks. | ||
1183 | */ | ||
1184 | ep_rb_erase(&epi->rbn, &ep->rbr); | ||
1185 | |||
1186 | /* | ||
1187 | * If the item we are going to remove is inside the ready file descriptors | ||
1188 | * we want to remove it from this list to avoid stale events. | ||
1189 | */ | ||
1190 | if (ep_is_linked(&epi->rdllink)) | ||
1191 | list_del_init(&epi->rdllink); | ||
1192 | |||
1193 | error = 0; | ||
1194 | eexit_1: | ||
1195 | |||
1196 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", | ||
1197 | current, ep, epi->ffd.file, error)); | ||
1198 | |||
1199 | return error; | ||
1200 | } | ||
1201 | |||
1202 | |||
1203 | /* | ||
1204 | * Removes a "struct epitem" from the eventpoll RB tree and deallocates | ||
1205 | * all the associated resources. | ||
1206 | */ | ||
1207 | static int ep_remove(struct eventpoll *ep, struct epitem *epi) | ||
1208 | { | ||
1209 | int error; | ||
1210 | unsigned long flags; | ||
1211 | struct file *file = epi->ffd.file; | ||
1212 | |||
1213 | /* | ||
1214 | * Removes poll wait queue hooks. We _have_ to do this without holding | ||
1215 | * the "ep->lock" otherwise a deadlock might occur. This because of the | ||
1216 | * sequence of the lock acquisition. Here we do "ep->lock" then the wait | ||
1217 | * queue head lock when unregistering the wait queue. The wakeup callback | ||
1218 | * will run by holding the wait queue head lock and will call our callback | ||
1219 | * that will try to get "ep->lock". | ||
1220 | */ | ||
1221 | ep_unregister_pollwait(ep, epi); | ||
1222 | |||
1223 | /* Remove the current item from the list of epoll hooks */ | ||
1224 | spin_lock(&file->f_ep_lock); | ||
1225 | if (ep_is_linked(&epi->fllink)) | ||
1226 | list_del_init(&epi->fllink); | ||
1227 | spin_unlock(&file->f_ep_lock); | ||
1228 | |||
1229 | /* We need to acquire the write IRQ lock before calling ep_unlink() */ | ||
1230 | write_lock_irqsave(&ep->lock, flags); | ||
1231 | |||
1232 | /* Really unlink the item from the RB tree */ | ||
1233 | error = ep_unlink(ep, epi); | ||
1234 | |||
1235 | write_unlock_irqrestore(&ep->lock, flags); | ||
1236 | |||
1237 | if (error) | ||
1238 | goto eexit_1; | ||
1239 | |||
1240 | /* At this point it is safe to free the eventpoll item */ | ||
1241 | ep_release_epitem(epi); | ||
1242 | |||
1243 | error = 0; | ||
1244 | eexit_1: | ||
1245 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p) = %d\n", | ||
1246 | current, ep, file, error)); | ||
1247 | |||
1248 | return error; | ||
1249 | } | ||
1250 | |||
1251 | |||
1252 | /* | ||
1253 | * This is the callback that is passed to the wait queue wakeup | ||
1254 | * machanism. It is called by the stored file descriptors when they | ||
1255 | * have events to report. | ||
1256 | */ | ||
1257 | static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key) | ||
1258 | { | ||
1259 | int pwake = 0; | ||
1260 | unsigned long flags; | ||
1261 | struct epitem *epi = ep_item_from_wait(wait); | ||
1262 | struct eventpoll *ep = epi->ep; | ||
1263 | |||
1264 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", | ||
1265 | current, epi->ffd.file, epi, ep)); | ||
1266 | |||
1267 | write_lock_irqsave(&ep->lock, flags); | ||
1268 | |||
1269 | /* | ||
1270 | * If the event mask does not contain any poll(2) event, we consider the | ||
1271 | * descriptor to be disabled. This condition is likely the effect of the | ||
1272 | * EPOLLONESHOT bit that disables the descriptor when an event is received, | ||
1273 | * until the next EPOLL_CTL_MOD will be issued. | ||
1274 | */ | ||
1275 | if (!(epi->event.events & ~EP_PRIVATE_BITS)) | ||
1276 | goto is_disabled; | ||
1277 | |||
1278 | /* If this file is already in the ready list we exit soon */ | ||
1279 | if (ep_is_linked(&epi->rdllink)) | ||
1280 | goto is_linked; | ||
1281 | |||
1282 | list_add_tail(&epi->rdllink, &ep->rdllist); | ||
1283 | |||
1284 | is_linked: | ||
1285 | /* | ||
1286 | * Wake up ( if active ) both the eventpoll wait list and the ->poll() | ||
1287 | * wait list. | ||
1288 | */ | ||
1289 | if (waitqueue_active(&ep->wq)) | ||
1290 | __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | | ||
1291 | TASK_INTERRUPTIBLE); | ||
1292 | if (waitqueue_active(&ep->poll_wait)) | ||
1293 | pwake++; | ||
1294 | |||
1295 | is_disabled: | ||
1296 | write_unlock_irqrestore(&ep->lock, flags); | ||
1297 | |||
1298 | /* We have to call this outside the lock */ | ||
1299 | if (pwake) | ||
1300 | ep_poll_safewake(&psw, &ep->poll_wait); | ||
1301 | |||
1302 | return 1; | ||
1303 | } | ||
1304 | |||
1305 | |||
1306 | static int ep_eventpoll_close(struct inode *inode, struct file *file) | ||
1307 | { | ||
1308 | struct eventpoll *ep = file->private_data; | ||
1309 | |||
1310 | if (ep) { | ||
1311 | ep_free(ep); | ||
1312 | kfree(ep); | ||
1313 | } | ||
1314 | |||
1315 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); | ||
1316 | return 0; | ||
1317 | } | ||
1318 | |||
1319 | |||
1320 | static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) | ||
1321 | { | ||
1322 | unsigned int pollflags = 0; | ||
1323 | unsigned long flags; | ||
1324 | struct eventpoll *ep = file->private_data; | ||
1325 | |||
1326 | /* Insert inside our poll wait queue */ | ||
1327 | poll_wait(file, &ep->poll_wait, wait); | ||
1328 | |||
1329 | /* Check our condition */ | ||
1330 | read_lock_irqsave(&ep->lock, flags); | ||
1331 | if (!list_empty(&ep->rdllist)) | ||
1332 | pollflags = POLLIN | POLLRDNORM; | ||
1333 | read_unlock_irqrestore(&ep->lock, flags); | ||
1334 | |||
1335 | return pollflags; | ||
1336 | } | ||
1337 | |||
1338 | |||
1339 | /* | 923 | /* |
1340 | * This function is called without holding the "ep->lock" since the call to | 924 | * This function is called without holding the "ep->lock" since the call to |
1341 | * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ | 925 | * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ |
@@ -1447,7 +1031,6 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, | |||
1447 | return eventcnt == 0 ? error: eventcnt; | 1031 | return eventcnt == 0 ? error: eventcnt; |
1448 | } | 1032 | } |
1449 | 1033 | ||
1450 | |||
1451 | /* | 1034 | /* |
1452 | * Perform the transfer of events to user space. | 1035 | * Perform the transfer of events to user space. |
1453 | */ | 1036 | */ |
@@ -1483,7 +1066,6 @@ static int ep_events_transfer(struct eventpoll *ep, | |||
1483 | return eventcnt; | 1066 | return eventcnt; |
1484 | } | 1067 | } |
1485 | 1068 | ||
1486 | |||
1487 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | 1069 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, |
1488 | int maxevents, long timeout) | 1070 | int maxevents, long timeout) |
1489 | { | 1071 | { |
@@ -1553,52 +1135,262 @@ retry: | |||
1553 | return res; | 1135 | return res; |
1554 | } | 1136 | } |
1555 | 1137 | ||
1556 | static int eventpollfs_delete_dentry(struct dentry *dentry) | 1138 | /* |
1139 | * It opens an eventpoll file descriptor by suggesting a storage of "size" | ||
1140 | * file descriptors. The size parameter is just an hint about how to size | ||
1141 | * data structures. It won't prevent the user to store more than "size" | ||
1142 | * file descriptors inside the epoll interface. It is the kernel part of | ||
1143 | * the userspace epoll_create(2). | ||
1144 | */ | ||
1145 | asmlinkage long sys_epoll_create(int size) | ||
1557 | { | 1146 | { |
1147 | int error, fd = -1; | ||
1148 | struct eventpoll *ep; | ||
1149 | struct inode *inode; | ||
1150 | struct file *file; | ||
1558 | 1151 | ||
1559 | return 1; | 1152 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", |
1153 | current, size)); | ||
1154 | |||
1155 | /* | ||
1156 | * Sanity check on the size parameter, and create the internal data | ||
1157 | * structure ( "struct eventpoll" ). | ||
1158 | */ | ||
1159 | error = -EINVAL; | ||
1160 | if (size <= 0 || (error = ep_alloc(&ep)) != 0) | ||
1161 | goto error_return; | ||
1162 | |||
1163 | /* | ||
1164 | * Creates all the items needed to setup an eventpoll file. That is, | ||
1165 | * a file structure, and inode and a free file descriptor. | ||
1166 | */ | ||
1167 | error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]", | ||
1168 | &eventpoll_fops, ep); | ||
1169 | if (error) | ||
1170 | goto error_free; | ||
1171 | |||
1172 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", | ||
1173 | current, size, fd)); | ||
1174 | |||
1175 | return fd; | ||
1176 | |||
1177 | error_free: | ||
1178 | ep_free(ep); | ||
1179 | kfree(ep); | ||
1180 | error_return: | ||
1181 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", | ||
1182 | current, size, error)); | ||
1183 | return error; | ||
1560 | } | 1184 | } |
1561 | 1185 | ||
1562 | static struct inode *ep_eventpoll_inode(void) | 1186 | /* |
1187 | * The following function implements the controller interface for | ||
1188 | * the eventpoll file that enables the insertion/removal/change of | ||
1189 | * file descriptors inside the interest set. It represents | ||
1190 | * the kernel part of the user space epoll_ctl(2). | ||
1191 | */ | ||
1192 | asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, | ||
1193 | struct epoll_event __user *event) | ||
1563 | { | 1194 | { |
1564 | int error = -ENOMEM; | 1195 | int error; |
1565 | struct inode *inode = new_inode(eventpoll_mnt->mnt_sb); | 1196 | struct file *file, *tfile; |
1197 | struct eventpoll *ep; | ||
1198 | struct epitem *epi; | ||
1199 | struct epoll_event epds; | ||
1200 | |||
1201 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n", | ||
1202 | current, epfd, op, fd, event)); | ||
1203 | |||
1204 | error = -EFAULT; | ||
1205 | if (ep_op_has_event(op) && | ||
1206 | copy_from_user(&epds, event, sizeof(struct epoll_event))) | ||
1207 | goto error_return; | ||
1208 | |||
1209 | /* Get the "struct file *" for the eventpoll file */ | ||
1210 | error = -EBADF; | ||
1211 | file = fget(epfd); | ||
1212 | if (!file) | ||
1213 | goto error_return; | ||
1214 | |||
1215 | /* Get the "struct file *" for the target file */ | ||
1216 | tfile = fget(fd); | ||
1217 | if (!tfile) | ||
1218 | goto error_fput; | ||
1219 | |||
1220 | /* The target file descriptor must support poll */ | ||
1221 | error = -EPERM; | ||
1222 | if (!tfile->f_op || !tfile->f_op->poll) | ||
1223 | goto error_tgt_fput; | ||
1224 | |||
1225 | /* | ||
1226 | * We have to check that the file structure underneath the file descriptor | ||
1227 | * the user passed to us _is_ an eventpoll file. And also we do not permit | ||
1228 | * adding an epoll file descriptor inside itself. | ||
1229 | */ | ||
1230 | error = -EINVAL; | ||
1231 | if (file == tfile || !is_file_epoll(file)) | ||
1232 | goto error_tgt_fput; | ||
1566 | 1233 | ||
1567 | if (!inode) | 1234 | /* |
1568 | goto eexit_1; | 1235 | * At this point it is safe to assume that the "private_data" contains |
1236 | * our own data structure. | ||
1237 | */ | ||
1238 | ep = file->private_data; | ||
1239 | |||
1240 | down_write(&ep->sem); | ||
1569 | 1241 | ||
1570 | inode->i_fop = &eventpoll_fops; | 1242 | /* Try to lookup the file inside our RB tree */ |
1243 | epi = ep_find(ep, tfile, fd); | ||
1244 | |||
1245 | error = -EINVAL; | ||
1246 | switch (op) { | ||
1247 | case EPOLL_CTL_ADD: | ||
1248 | if (!epi) { | ||
1249 | epds.events |= POLLERR | POLLHUP; | ||
1571 | 1250 | ||
1251 | error = ep_insert(ep, &epds, tfile, fd); | ||
1252 | } else | ||
1253 | error = -EEXIST; | ||
1254 | break; | ||
1255 | case EPOLL_CTL_DEL: | ||
1256 | if (epi) | ||
1257 | error = ep_remove(ep, epi); | ||
1258 | else | ||
1259 | error = -ENOENT; | ||
1260 | break; | ||
1261 | case EPOLL_CTL_MOD: | ||
1262 | if (epi) { | ||
1263 | epds.events |= POLLERR | POLLHUP; | ||
1264 | error = ep_modify(ep, epi, &epds); | ||
1265 | } else | ||
1266 | error = -ENOENT; | ||
1267 | break; | ||
1268 | } | ||
1572 | /* | 1269 | /* |
1573 | * Mark the inode dirty from the very beginning, | 1270 | * The function ep_find() increments the usage count of the structure |
1574 | * that way it will never be moved to the dirty | 1271 | * so, if this is not NULL, we need to release it. |
1575 | * list because mark_inode_dirty() will think | ||
1576 | * that it already _is_ on the dirty list. | ||
1577 | */ | 1272 | */ |
1578 | inode->i_state = I_DIRTY; | 1273 | if (epi) |
1579 | inode->i_mode = S_IRUSR | S_IWUSR; | 1274 | ep_release_epitem(epi); |
1580 | inode->i_uid = current->fsuid; | 1275 | up_write(&ep->sem); |
1581 | inode->i_gid = current->fsgid; | 1276 | |
1582 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 1277 | error_tgt_fput: |
1583 | return inode; | 1278 | fput(tfile); |
1584 | 1279 | error_fput: | |
1585 | eexit_1: | 1280 | fput(file); |
1586 | return ERR_PTR(error); | 1281 | error_return: |
1282 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n", | ||
1283 | current, epfd, op, fd, event, error)); | ||
1284 | |||
1285 | return error; | ||
1587 | } | 1286 | } |
1588 | 1287 | ||
1589 | static int | 1288 | /* |
1590 | eventpollfs_get_sb(struct file_system_type *fs_type, int flags, | 1289 | * Implement the event wait interface for the eventpoll file. It is the kernel |
1591 | const char *dev_name, void *data, struct vfsmount *mnt) | 1290 | * part of the user space epoll_wait(2). |
1291 | */ | ||
1292 | asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, | ||
1293 | int maxevents, int timeout) | ||
1592 | { | 1294 | { |
1593 | return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC, | 1295 | int error; |
1594 | mnt); | 1296 | struct file *file; |
1297 | struct eventpoll *ep; | ||
1298 | |||
1299 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n", | ||
1300 | current, epfd, events, maxevents, timeout)); | ||
1301 | |||
1302 | /* The maximum number of event must be greater than zero */ | ||
1303 | if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) | ||
1304 | return -EINVAL; | ||
1305 | |||
1306 | /* Verify that the area passed by the user is writeable */ | ||
1307 | if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { | ||
1308 | error = -EFAULT; | ||
1309 | goto error_return; | ||
1310 | } | ||
1311 | |||
1312 | /* Get the "struct file *" for the eventpoll file */ | ||
1313 | error = -EBADF; | ||
1314 | file = fget(epfd); | ||
1315 | if (!file) | ||
1316 | goto error_return; | ||
1317 | |||
1318 | /* | ||
1319 | * We have to check that the file structure underneath the fd | ||
1320 | * the user passed to us _is_ an eventpoll file. | ||
1321 | */ | ||
1322 | error = -EINVAL; | ||
1323 | if (!is_file_epoll(file)) | ||
1324 | goto error_fput; | ||
1325 | |||
1326 | /* | ||
1327 | * At this point it is safe to assume that the "private_data" contains | ||
1328 | * our own data structure. | ||
1329 | */ | ||
1330 | ep = file->private_data; | ||
1331 | |||
1332 | /* Time to fish for events ... */ | ||
1333 | error = ep_poll(ep, events, maxevents, timeout); | ||
1334 | |||
1335 | error_fput: | ||
1336 | fput(file); | ||
1337 | error_return: | ||
1338 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n", | ||
1339 | current, epfd, events, maxevents, timeout, error)); | ||
1340 | |||
1341 | return error; | ||
1595 | } | 1342 | } |
1596 | 1343 | ||
1344 | #ifdef TIF_RESTORE_SIGMASK | ||
1597 | 1345 | ||
1598 | static int __init eventpoll_init(void) | 1346 | /* |
1347 | * Implement the event wait interface for the eventpoll file. It is the kernel | ||
1348 | * part of the user space epoll_pwait(2). | ||
1349 | */ | ||
1350 | asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, | ||
1351 | int maxevents, int timeout, const sigset_t __user *sigmask, | ||
1352 | size_t sigsetsize) | ||
1599 | { | 1353 | { |
1600 | int error; | 1354 | int error; |
1355 | sigset_t ksigmask, sigsaved; | ||
1356 | |||
1357 | /* | ||
1358 | * If the caller wants a certain signal mask to be set during the wait, | ||
1359 | * we apply it here. | ||
1360 | */ | ||
1361 | if (sigmask) { | ||
1362 | if (sigsetsize != sizeof(sigset_t)) | ||
1363 | return -EINVAL; | ||
1364 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) | ||
1365 | return -EFAULT; | ||
1366 | sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | ||
1367 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | ||
1368 | } | ||
1369 | |||
1370 | error = sys_epoll_wait(epfd, events, maxevents, timeout); | ||
1371 | |||
1372 | /* | ||
1373 | * If we changed the signal mask, we need to restore the original one. | ||
1374 | * In case we've got a signal while waiting, we do not restore the | ||
1375 | * signal mask yet, and we allow do_signal() to deliver the signal on | ||
1376 | * the way back to userspace, before the signal mask is restored. | ||
1377 | */ | ||
1378 | if (sigmask) { | ||
1379 | if (error == -EINTR) { | ||
1380 | memcpy(¤t->saved_sigmask, &sigsaved, | ||
1381 | sizeof(sigsaved)); | ||
1382 | set_thread_flag(TIF_RESTORE_SIGMASK); | ||
1383 | } else | ||
1384 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | ||
1385 | } | ||
1386 | |||
1387 | return error; | ||
1388 | } | ||
1601 | 1389 | ||
1390 | #endif /* #ifdef TIF_RESTORE_SIGMASK */ | ||
1391 | |||
1392 | static int __init eventpoll_init(void) | ||
1393 | { | ||
1602 | mutex_init(&epmutex); | 1394 | mutex_init(&epmutex); |
1603 | 1395 | ||
1604 | /* Initialize the structure used to perform safe poll wait head wake ups */ | 1396 | /* Initialize the structure used to perform safe poll wait head wake ups */ |
@@ -1614,39 +1406,7 @@ static int __init eventpoll_init(void) | |||
1614 | sizeof(struct eppoll_entry), 0, | 1406 | sizeof(struct eppoll_entry), 0, |
1615 | EPI_SLAB_DEBUG|SLAB_PANIC, NULL, NULL); | 1407 | EPI_SLAB_DEBUG|SLAB_PANIC, NULL, NULL); |
1616 | 1408 | ||
1617 | /* | ||
1618 | * Register the virtual file system that will be the source of inodes | ||
1619 | * for the eventpoll files | ||
1620 | */ | ||
1621 | error = register_filesystem(&eventpoll_fs_type); | ||
1622 | if (error) | ||
1623 | goto epanic; | ||
1624 | |||
1625 | /* Mount the above commented virtual file system */ | ||
1626 | eventpoll_mnt = kern_mount(&eventpoll_fs_type); | ||
1627 | error = PTR_ERR(eventpoll_mnt); | ||
1628 | if (IS_ERR(eventpoll_mnt)) | ||
1629 | goto epanic; | ||
1630 | |||
1631 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: successfully initialized.\n", | ||
1632 | current)); | ||
1633 | return 0; | 1409 | return 0; |
1634 | |||
1635 | epanic: | ||
1636 | panic("eventpoll_init() failed\n"); | ||
1637 | } | 1410 | } |
1411 | fs_initcall(eventpoll_init); | ||
1638 | 1412 | ||
1639 | |||
1640 | static void __exit eventpoll_exit(void) | ||
1641 | { | ||
1642 | /* Undo all operations done inside eventpoll_init() */ | ||
1643 | unregister_filesystem(&eventpoll_fs_type); | ||
1644 | mntput(eventpoll_mnt); | ||
1645 | kmem_cache_destroy(pwq_cache); | ||
1646 | kmem_cache_destroy(epi_cache); | ||
1647 | } | ||
1648 | |||
1649 | module_init(eventpoll_init); | ||
1650 | module_exit(eventpoll_exit); | ||
1651 | |||
1652 | MODULE_LICENSE("GPL"); | ||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/tsacct_kern.h> | 50 | #include <linux/tsacct_kern.h> |
51 | #include <linux/cn_proc.h> | 51 | #include <linux/cn_proc.h> |
52 | #include <linux/audit.h> | 52 | #include <linux/audit.h> |
53 | #include <linux/signalfd.h> | ||
53 | 54 | ||
54 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
55 | #include <asm/mmu_context.h> | 56 | #include <asm/mmu_context.h> |
@@ -582,6 +583,13 @@ static int de_thread(struct task_struct *tsk) | |||
582 | int count; | 583 | int count; |
583 | 584 | ||
584 | /* | 585 | /* |
586 | * Tell all the sighand listeners that this sighand has | ||
587 | * been detached. The signalfd_detach() function grabs the | ||
588 | * sighand lock, if signal listeners are present on the sighand. | ||
589 | */ | ||
590 | signalfd_detach(tsk); | ||
591 | |||
592 | /* | ||
585 | * If we don't share sighandlers, then we aren't sharing anything | 593 | * If we don't share sighandlers, then we aren't sharing anything |
586 | * and we can just re-use it all. | 594 | * and we can just re-use it all. |
587 | */ | 595 | */ |
@@ -702,7 +710,7 @@ static int de_thread(struct task_struct *tsk) | |||
702 | */ | 710 | */ |
703 | detach_pid(tsk, PIDTYPE_PID); | 711 | detach_pid(tsk, PIDTYPE_PID); |
704 | tsk->pid = leader->pid; | 712 | tsk->pid = leader->pid; |
705 | attach_pid(tsk, PIDTYPE_PID, tsk->pid); | 713 | attach_pid(tsk, PIDTYPE_PID, find_pid(tsk->pid)); |
706 | transfer_pid(leader, tsk, PIDTYPE_PGID); | 714 | transfer_pid(leader, tsk, PIDTYPE_PGID); |
707 | transfer_pid(leader, tsk, PIDTYPE_SID); | 715 | transfer_pid(leader, tsk, PIDTYPE_SID); |
708 | list_replace_rcu(&leader->tasks, &tsk->tasks); | 716 | list_replace_rcu(&leader->tasks, &tsk->tasks); |
@@ -757,8 +765,7 @@ no_thread_group: | |||
757 | spin_unlock(&oldsighand->siglock); | 765 | spin_unlock(&oldsighand->siglock); |
758 | write_unlock_irq(&tasklist_lock); | 766 | write_unlock_irq(&tasklist_lock); |
759 | 767 | ||
760 | if (atomic_dec_and_test(&oldsighand->count)) | 768 | __cleanup_sighand(oldsighand); |
761 | kmem_cache_free(sighand_cachep, oldsighand); | ||
762 | } | 769 | } |
763 | 770 | ||
764 | BUG_ON(!thread_group_leader(tsk)); | 771 | BUG_ON(!thread_group_leader(tsk)); |
diff --git a/fs/mpage.c b/fs/mpage.c index 0fb914fc2ee0..c1698f2291aa 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -454,11 +454,18 @@ EXPORT_SYMBOL(mpage_readpage); | |||
454 | * written, so it can intelligently allocate a suitably-sized BIO. For now, | 454 | * written, so it can intelligently allocate a suitably-sized BIO. For now, |
455 | * just allocate full-size (16-page) BIOs. | 455 | * just allocate full-size (16-page) BIOs. |
456 | */ | 456 | */ |
457 | static struct bio * | 457 | struct mpage_data { |
458 | __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, | 458 | struct bio *bio; |
459 | sector_t *last_block_in_bio, int *ret, struct writeback_control *wbc, | 459 | sector_t last_block_in_bio; |
460 | writepage_t writepage_fn) | 460 | get_block_t *get_block; |
461 | unsigned use_writepage; | ||
462 | }; | ||
463 | |||
464 | static int __mpage_writepage(struct page *page, struct writeback_control *wbc, | ||
465 | void *data) | ||
461 | { | 466 | { |
467 | struct mpage_data *mpd = data; | ||
468 | struct bio *bio = mpd->bio; | ||
462 | struct address_space *mapping = page->mapping; | 469 | struct address_space *mapping = page->mapping; |
463 | struct inode *inode = page->mapping->host; | 470 | struct inode *inode = page->mapping->host; |
464 | const unsigned blkbits = inode->i_blkbits; | 471 | const unsigned blkbits = inode->i_blkbits; |
@@ -476,6 +483,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, | |||
476 | int length; | 483 | int length; |
477 | struct buffer_head map_bh; | 484 | struct buffer_head map_bh; |
478 | loff_t i_size = i_size_read(inode); | 485 | loff_t i_size = i_size_read(inode); |
486 | int ret = 0; | ||
479 | 487 | ||
480 | if (page_has_buffers(page)) { | 488 | if (page_has_buffers(page)) { |
481 | struct buffer_head *head = page_buffers(page); | 489 | struct buffer_head *head = page_buffers(page); |
@@ -538,7 +546,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, | |||
538 | 546 | ||
539 | map_bh.b_state = 0; | 547 | map_bh.b_state = 0; |
540 | map_bh.b_size = 1 << blkbits; | 548 | map_bh.b_size = 1 << blkbits; |
541 | if (get_block(inode, block_in_file, &map_bh, 1)) | 549 | if (mpd->get_block(inode, block_in_file, &map_bh, 1)) |
542 | goto confused; | 550 | goto confused; |
543 | if (buffer_new(&map_bh)) | 551 | if (buffer_new(&map_bh)) |
544 | unmap_underlying_metadata(map_bh.b_bdev, | 552 | unmap_underlying_metadata(map_bh.b_bdev, |
@@ -584,7 +592,7 @@ page_is_mapped: | |||
584 | /* | 592 | /* |
585 | * This page will go to BIO. Do we need to send this BIO off first? | 593 | * This page will go to BIO. Do we need to send this BIO off first? |
586 | */ | 594 | */ |
587 | if (bio && *last_block_in_bio != blocks[0] - 1) | 595 | if (bio && mpd->last_block_in_bio != blocks[0] - 1) |
588 | bio = mpage_bio_submit(WRITE, bio); | 596 | bio = mpage_bio_submit(WRITE, bio); |
589 | 597 | ||
590 | alloc_new: | 598 | alloc_new: |
@@ -641,7 +649,7 @@ alloc_new: | |||
641 | boundary_block, 1 << blkbits); | 649 | boundary_block, 1 << blkbits); |
642 | } | 650 | } |
643 | } else { | 651 | } else { |
644 | *last_block_in_bio = blocks[blocks_per_page - 1]; | 652 | mpd->last_block_in_bio = blocks[blocks_per_page - 1]; |
645 | } | 653 | } |
646 | goto out; | 654 | goto out; |
647 | 655 | ||
@@ -649,18 +657,19 @@ confused: | |||
649 | if (bio) | 657 | if (bio) |
650 | bio = mpage_bio_submit(WRITE, bio); | 658 | bio = mpage_bio_submit(WRITE, bio); |
651 | 659 | ||
652 | if (writepage_fn) { | 660 | if (mpd->use_writepage) { |
653 | *ret = (*writepage_fn)(page, wbc); | 661 | ret = mapping->a_ops->writepage(page, wbc); |
654 | } else { | 662 | } else { |
655 | *ret = -EAGAIN; | 663 | ret = -EAGAIN; |
656 | goto out; | 664 | goto out; |
657 | } | 665 | } |
658 | /* | 666 | /* |
659 | * The caller has a ref on the inode, so *mapping is stable | 667 | * The caller has a ref on the inode, so *mapping is stable |
660 | */ | 668 | */ |
661 | mapping_set_error(mapping, *ret); | 669 | mapping_set_error(mapping, ret); |
662 | out: | 670 | out: |
663 | return bio; | 671 | mpd->bio = bio; |
672 | return ret; | ||
664 | } | 673 | } |
665 | 674 | ||
666 | /** | 675 | /** |
@@ -683,120 +692,27 @@ out: | |||
683 | * the call was made get new I/O started against them. If wbc->sync_mode is | 692 | * the call was made get new I/O started against them. If wbc->sync_mode is |
684 | * WB_SYNC_ALL then we were called for data integrity and we must wait for | 693 | * WB_SYNC_ALL then we were called for data integrity and we must wait for |
685 | * existing IO to complete. | 694 | * existing IO to complete. |
686 | * | ||
687 | * If you fix this you should check generic_writepages() also! | ||
688 | */ | 695 | */ |
689 | int | 696 | int |
690 | mpage_writepages(struct address_space *mapping, | 697 | mpage_writepages(struct address_space *mapping, |
691 | struct writeback_control *wbc, get_block_t get_block) | 698 | struct writeback_control *wbc, get_block_t get_block) |
692 | { | 699 | { |
693 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 700 | int ret; |
694 | struct bio *bio = NULL; | 701 | |
695 | sector_t last_block_in_bio = 0; | 702 | if (!get_block) |
696 | int ret = 0; | 703 | ret = generic_writepages(mapping, wbc); |
697 | int done = 0; | 704 | else { |
698 | int (*writepage)(struct page *page, struct writeback_control *wbc); | 705 | struct mpage_data mpd = { |
699 | struct pagevec pvec; | 706 | .bio = NULL, |
700 | int nr_pages; | 707 | .last_block_in_bio = 0, |
701 | pgoff_t index; | 708 | .get_block = get_block, |
702 | pgoff_t end; /* Inclusive */ | 709 | .use_writepage = 1, |
703 | int scanned = 0; | 710 | }; |
704 | int range_whole = 0; | 711 | |
705 | 712 | ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); | |
706 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 713 | if (mpd.bio) |
707 | wbc->encountered_congestion = 1; | 714 | mpage_bio_submit(WRITE, mpd.bio); |
708 | return 0; | ||
709 | } | ||
710 | |||
711 | writepage = NULL; | ||
712 | if (get_block == NULL) | ||
713 | writepage = mapping->a_ops->writepage; | ||
714 | |||
715 | pagevec_init(&pvec, 0); | ||
716 | if (wbc->range_cyclic) { | ||
717 | index = mapping->writeback_index; /* Start from prev offset */ | ||
718 | end = -1; | ||
719 | } else { | ||
720 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
721 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
722 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
723 | range_whole = 1; | ||
724 | scanned = 1; | ||
725 | } | 715 | } |
726 | retry: | ||
727 | while (!done && (index <= end) && | ||
728 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
729 | PAGECACHE_TAG_DIRTY, | ||
730 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
731 | unsigned i; | ||
732 | |||
733 | scanned = 1; | ||
734 | for (i = 0; i < nr_pages; i++) { | ||
735 | struct page *page = pvec.pages[i]; | ||
736 | |||
737 | /* | ||
738 | * At this point we hold neither mapping->tree_lock nor | ||
739 | * lock on the page itself: the page may be truncated or | ||
740 | * invalidated (changing page->mapping to NULL), or even | ||
741 | * swizzled back from swapper_space to tmpfs file | ||
742 | * mapping | ||
743 | */ | ||
744 | |||
745 | lock_page(page); | ||
746 | |||
747 | if (unlikely(page->mapping != mapping)) { | ||
748 | unlock_page(page); | ||
749 | continue; | ||
750 | } | ||
751 | |||
752 | if (!wbc->range_cyclic && page->index > end) { | ||
753 | done = 1; | ||
754 | unlock_page(page); | ||
755 | continue; | ||
756 | } | ||
757 | |||
758 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
759 | wait_on_page_writeback(page); | ||
760 | |||
761 | if (PageWriteback(page) || | ||
762 | !clear_page_dirty_for_io(page)) { | ||
763 | unlock_page(page); | ||
764 | continue; | ||
765 | } | ||
766 | |||
767 | if (writepage) { | ||
768 | ret = (*writepage)(page, wbc); | ||
769 | mapping_set_error(mapping, ret); | ||
770 | } else { | ||
771 | bio = __mpage_writepage(bio, page, get_block, | ||
772 | &last_block_in_bio, &ret, wbc, | ||
773 | page->mapping->a_ops->writepage); | ||
774 | } | ||
775 | if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) | ||
776 | unlock_page(page); | ||
777 | if (ret || (--(wbc->nr_to_write) <= 0)) | ||
778 | done = 1; | ||
779 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
780 | wbc->encountered_congestion = 1; | ||
781 | done = 1; | ||
782 | } | ||
783 | } | ||
784 | pagevec_release(&pvec); | ||
785 | cond_resched(); | ||
786 | } | ||
787 | if (!scanned && !done) { | ||
788 | /* | ||
789 | * We hit the last page and there is more work to be done: wrap | ||
790 | * back to the start of the file | ||
791 | */ | ||
792 | scanned = 1; | ||
793 | index = 0; | ||
794 | goto retry; | ||
795 | } | ||
796 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
797 | mapping->writeback_index = index; | ||
798 | if (bio) | ||
799 | mpage_bio_submit(WRITE, bio); | ||
800 | return ret; | 716 | return ret; |
801 | } | 717 | } |
802 | EXPORT_SYMBOL(mpage_writepages); | 718 | EXPORT_SYMBOL(mpage_writepages); |
@@ -804,15 +720,15 @@ EXPORT_SYMBOL(mpage_writepages); | |||
804 | int mpage_writepage(struct page *page, get_block_t get_block, | 720 | int mpage_writepage(struct page *page, get_block_t get_block, |
805 | struct writeback_control *wbc) | 721 | struct writeback_control *wbc) |
806 | { | 722 | { |
807 | int ret = 0; | 723 | struct mpage_data mpd = { |
808 | struct bio *bio; | 724 | .bio = NULL, |
809 | sector_t last_block_in_bio = 0; | 725 | .last_block_in_bio = 0, |
810 | 726 | .get_block = get_block, | |
811 | bio = __mpage_writepage(NULL, page, get_block, | 727 | .use_writepage = 0, |
812 | &last_block_in_bio, &ret, wbc, NULL); | 728 | }; |
813 | if (bio) | 729 | int ret = __mpage_writepage(page, wbc, &mpd); |
814 | mpage_bio_submit(WRITE, bio); | 730 | if (mpd.bio) |
815 | 731 | mpage_bio_submit(WRITE, mpd.bio); | |
816 | return ret; | 732 | return ret; |
817 | } | 733 | } |
818 | EXPORT_SYMBOL(mpage_writepage); | 734 | EXPORT_SYMBOL(mpage_writepage); |
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index 01207042048b..7638a1c42a7d 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig | |||
@@ -239,7 +239,7 @@ config EFI_PARTITION | |||
239 | 239 | ||
240 | config SYSV68_PARTITION | 240 | config SYSV68_PARTITION |
241 | bool "SYSV68 partition table support" if PARTITION_ADVANCED | 241 | bool "SYSV68 partition table support" if PARTITION_ADVANCED |
242 | default y if M68K | 242 | default y if VME |
243 | help | 243 | help |
244 | Say Y here if you would like to be able to read the hard disk | 244 | Say Y here if you would like to be able to read the hard disk |
245 | partition table format used by Motorola Delta machines (using | 245 | partition table format used by Motorola Delta machines (using |
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index 1bea610078b3..e7b07006bc41 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c | |||
@@ -152,7 +152,7 @@ last_lba(struct block_device *bdev) | |||
152 | } | 152 | } |
153 | 153 | ||
154 | static inline int | 154 | static inline int |
155 | pmbr_part_valid(struct partition *part, u64 lastlba) | 155 | pmbr_part_valid(struct partition *part) |
156 | { | 156 | { |
157 | if (part->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT && | 157 | if (part->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT && |
158 | le32_to_cpu(part->start_sect) == 1UL) | 158 | le32_to_cpu(part->start_sect) == 1UL) |
@@ -163,7 +163,6 @@ pmbr_part_valid(struct partition *part, u64 lastlba) | |||
163 | /** | 163 | /** |
164 | * is_pmbr_valid(): test Protective MBR for validity | 164 | * is_pmbr_valid(): test Protective MBR for validity |
165 | * @mbr: pointer to a legacy mbr structure | 165 | * @mbr: pointer to a legacy mbr structure |
166 | * @lastlba: last_lba for the whole device | ||
167 | * | 166 | * |
168 | * Description: Returns 1 if PMBR is valid, 0 otherwise. | 167 | * Description: Returns 1 if PMBR is valid, 0 otherwise. |
169 | * Validity depends on two things: | 168 | * Validity depends on two things: |
@@ -171,13 +170,13 @@ pmbr_part_valid(struct partition *part, u64 lastlba) | |||
171 | * 2) One partition of type 0xEE is found | 170 | * 2) One partition of type 0xEE is found |
172 | */ | 171 | */ |
173 | static int | 172 | static int |
174 | is_pmbr_valid(legacy_mbr *mbr, u64 lastlba) | 173 | is_pmbr_valid(legacy_mbr *mbr) |
175 | { | 174 | { |
176 | int i; | 175 | int i; |
177 | if (!mbr || le16_to_cpu(mbr->signature) != MSDOS_MBR_SIGNATURE) | 176 | if (!mbr || le16_to_cpu(mbr->signature) != MSDOS_MBR_SIGNATURE) |
178 | return 0; | 177 | return 0; |
179 | for (i = 0; i < 4; i++) | 178 | for (i = 0; i < 4; i++) |
180 | if (pmbr_part_valid(&mbr->partition_record[i], lastlba)) | 179 | if (pmbr_part_valid(&mbr->partition_record[i])) |
181 | return 1; | 180 | return 1; |
182 | return 0; | 181 | return 0; |
183 | } | 182 | } |
@@ -516,7 +515,7 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) | |||
516 | int good_pgpt = 0, good_agpt = 0, good_pmbr = 0; | 515 | int good_pgpt = 0, good_agpt = 0, good_pmbr = 0; |
517 | gpt_header *pgpt = NULL, *agpt = NULL; | 516 | gpt_header *pgpt = NULL, *agpt = NULL; |
518 | gpt_entry *pptes = NULL, *aptes = NULL; | 517 | gpt_entry *pptes = NULL, *aptes = NULL; |
519 | legacy_mbr *legacymbr = NULL; | 518 | legacy_mbr *legacymbr; |
520 | u64 lastlba; | 519 | u64 lastlba; |
521 | if (!bdev || !gpt || !ptes) | 520 | if (!bdev || !gpt || !ptes) |
522 | return 0; | 521 | return 0; |
@@ -528,9 +527,8 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) | |||
528 | if (legacymbr) { | 527 | if (legacymbr) { |
529 | read_lba(bdev, 0, (u8 *) legacymbr, | 528 | read_lba(bdev, 0, (u8 *) legacymbr, |
530 | sizeof (*legacymbr)); | 529 | sizeof (*legacymbr)); |
531 | good_pmbr = is_pmbr_valid(legacymbr, lastlba); | 530 | good_pmbr = is_pmbr_valid(legacymbr); |
532 | kfree(legacymbr); | 531 | kfree(legacymbr); |
533 | legacymbr=NULL; | ||
534 | } | 532 | } |
535 | if (!good_pmbr) | 533 | if (!good_pmbr) |
536 | goto fail; | 534 | goto fail; |
diff --git a/fs/signalfd.c b/fs/signalfd.c new file mode 100644 index 000000000000..7cfeab412b45 --- /dev/null +++ b/fs/signalfd.c | |||
@@ -0,0 +1,349 @@ | |||
1 | /* | ||
2 | * fs/signalfd.c | ||
3 | * | ||
4 | * Copyright (C) 2003 Linus Torvalds | ||
5 | * | ||
6 | * Mon Mar 5, 2007: Davide Libenzi <davidel@xmailserver.org> | ||
7 | * Changed ->read() to return a siginfo strcture instead of signal number. | ||
8 | * Fixed locking in ->poll(). | ||
9 | * Added sighand-detach notification. | ||
10 | * Added fd re-use in sys_signalfd() syscall. | ||
11 | * Now using anonymous inode source. | ||
12 | * Thanks to Oleg Nesterov for useful code review and suggestions. | ||
13 | * More comments and suggestions from Arnd Bergmann. | ||
14 | */ | ||
15 | |||
16 | #include <linux/file.h> | ||
17 | #include <linux/poll.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/fs.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/signal.h> | ||
23 | #include <linux/list.h> | ||
24 | #include <linux/anon_inodes.h> | ||
25 | #include <linux/signalfd.h> | ||
26 | |||
27 | struct signalfd_ctx { | ||
28 | struct list_head lnk; | ||
29 | wait_queue_head_t wqh; | ||
30 | sigset_t sigmask; | ||
31 | struct task_struct *tsk; | ||
32 | }; | ||
33 | |||
34 | struct signalfd_lockctx { | ||
35 | struct task_struct *tsk; | ||
36 | unsigned long flags; | ||
37 | }; | ||
38 | |||
39 | /* | ||
40 | * Tries to acquire the sighand lock. We do not increment the sighand | ||
41 | * use count, and we do not even pin the task struct, so we need to | ||
42 | * do it inside an RCU read lock, and we must be prepared for the | ||
43 | * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand | ||
44 | * being detached. We return 0 if the sighand has been detached, or | ||
45 | * 1 if we were able to pin the sighand lock. | ||
46 | */ | ||
47 | static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk) | ||
48 | { | ||
49 | struct sighand_struct *sighand = NULL; | ||
50 | |||
51 | rcu_read_lock(); | ||
52 | lk->tsk = rcu_dereference(ctx->tsk); | ||
53 | if (likely(lk->tsk != NULL)) | ||
54 | sighand = lock_task_sighand(lk->tsk, &lk->flags); | ||
55 | rcu_read_unlock(); | ||
56 | |||
57 | if (sighand && !ctx->tsk) { | ||
58 | unlock_task_sighand(lk->tsk, &lk->flags); | ||
59 | sighand = NULL; | ||
60 | } | ||
61 | |||
62 | return sighand != NULL; | ||
63 | } | ||
64 | |||
65 | static void signalfd_unlock(struct signalfd_lockctx *lk) | ||
66 | { | ||
67 | unlock_task_sighand(lk->tsk, &lk->flags); | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * This must be called with the sighand lock held. | ||
72 | */ | ||
73 | void signalfd_deliver(struct task_struct *tsk, int sig) | ||
74 | { | ||
75 | struct sighand_struct *sighand = tsk->sighand; | ||
76 | struct signalfd_ctx *ctx, *tmp; | ||
77 | |||
78 | BUG_ON(!sig); | ||
79 | list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) { | ||
80 | /* | ||
81 | * We use a negative signal value as a way to broadcast that the | ||
82 | * sighand has been orphaned, so that we can notify all the | ||
83 | * listeners about this. Remember the ctx->sigmask is inverted, | ||
84 | * so if the user is interested in a signal, that corresponding | ||
85 | * bit will be zero. | ||
86 | */ | ||
87 | if (sig < 0) { | ||
88 | if (ctx->tsk == tsk) { | ||
89 | ctx->tsk = NULL; | ||
90 | list_del_init(&ctx->lnk); | ||
91 | wake_up(&ctx->wqh); | ||
92 | } | ||
93 | } else { | ||
94 | if (!sigismember(&ctx->sigmask, sig)) | ||
95 | wake_up(&ctx->wqh); | ||
96 | } | ||
97 | } | ||
98 | } | ||
99 | |||
100 | static void signalfd_cleanup(struct signalfd_ctx *ctx) | ||
101 | { | ||
102 | struct signalfd_lockctx lk; | ||
103 | |||
104 | /* | ||
105 | * This is tricky. If the sighand is gone, we do not need to remove | ||
106 | * context from the list, the list itself won't be there anymore. | ||
107 | */ | ||
108 | if (signalfd_lock(ctx, &lk)) { | ||
109 | list_del(&ctx->lnk); | ||
110 | signalfd_unlock(&lk); | ||
111 | } | ||
112 | kfree(ctx); | ||
113 | } | ||
114 | |||
115 | static int signalfd_release(struct inode *inode, struct file *file) | ||
116 | { | ||
117 | signalfd_cleanup(file->private_data); | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | static unsigned int signalfd_poll(struct file *file, poll_table *wait) | ||
122 | { | ||
123 | struct signalfd_ctx *ctx = file->private_data; | ||
124 | unsigned int events = 0; | ||
125 | struct signalfd_lockctx lk; | ||
126 | |||
127 | poll_wait(file, &ctx->wqh, wait); | ||
128 | |||
129 | /* | ||
130 | * Let the caller get a POLLIN in this case, ala socket recv() when | ||
131 | * the peer disconnects. | ||
132 | */ | ||
133 | if (signalfd_lock(ctx, &lk)) { | ||
134 | if (next_signal(&lk.tsk->pending, &ctx->sigmask) > 0 || | ||
135 | next_signal(&lk.tsk->signal->shared_pending, | ||
136 | &ctx->sigmask) > 0) | ||
137 | events |= POLLIN; | ||
138 | signalfd_unlock(&lk); | ||
139 | } else | ||
140 | events |= POLLIN; | ||
141 | |||
142 | return events; | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * Copied from copy_siginfo_to_user() in kernel/signal.c | ||
147 | */ | ||
148 | static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | ||
149 | siginfo_t const *kinfo) | ||
150 | { | ||
151 | long err; | ||
152 | |||
153 | BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128); | ||
154 | |||
155 | /* | ||
156 | * Unused memebers should be zero ... | ||
157 | */ | ||
158 | err = __clear_user(uinfo, sizeof(*uinfo)); | ||
159 | |||
160 | /* | ||
161 | * If you change siginfo_t structure, please be sure | ||
162 | * this code is fixed accordingly. | ||
163 | */ | ||
164 | err |= __put_user(kinfo->si_signo, &uinfo->signo); | ||
165 | err |= __put_user(kinfo->si_errno, &uinfo->err); | ||
166 | err |= __put_user((short)kinfo->si_code, &uinfo->code); | ||
167 | switch (kinfo->si_code & __SI_MASK) { | ||
168 | case __SI_KILL: | ||
169 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | ||
170 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | ||
171 | break; | ||
172 | case __SI_TIMER: | ||
173 | err |= __put_user(kinfo->si_tid, &uinfo->tid); | ||
174 | err |= __put_user(kinfo->si_overrun, &uinfo->overrun); | ||
175 | err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); | ||
176 | break; | ||
177 | case __SI_POLL: | ||
178 | err |= __put_user(kinfo->si_band, &uinfo->band); | ||
179 | err |= __put_user(kinfo->si_fd, &uinfo->fd); | ||
180 | break; | ||
181 | case __SI_FAULT: | ||
182 | err |= __put_user((long)kinfo->si_addr, &uinfo->addr); | ||
183 | #ifdef __ARCH_SI_TRAPNO | ||
184 | err |= __put_user(kinfo->si_trapno, &uinfo->trapno); | ||
185 | #endif | ||
186 | break; | ||
187 | case __SI_CHLD: | ||
188 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | ||
189 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | ||
190 | err |= __put_user(kinfo->si_status, &uinfo->status); | ||
191 | err |= __put_user(kinfo->si_utime, &uinfo->utime); | ||
192 | err |= __put_user(kinfo->si_stime, &uinfo->stime); | ||
193 | break; | ||
194 | case __SI_RT: /* This is not generated by the kernel as of now. */ | ||
195 | case __SI_MESGQ: /* But this is */ | ||
196 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | ||
197 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | ||
198 | err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); | ||
199 | break; | ||
200 | default: /* this is just in case for now ... */ | ||
201 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | ||
202 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | ||
203 | break; | ||
204 | } | ||
205 | |||
206 | return err ? -EFAULT: sizeof(*uinfo); | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * Returns either the size of a "struct signalfd_siginfo", or zero if the | ||
211 | * sighand we are attached to, has been orphaned. The "count" parameter | ||
212 | * must be at least the size of a "struct signalfd_siginfo". | ||
213 | */ | ||
214 | static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, | ||
215 | loff_t *ppos) | ||
216 | { | ||
217 | struct signalfd_ctx *ctx = file->private_data; | ||
218 | ssize_t res = 0; | ||
219 | int locked, signo; | ||
220 | siginfo_t info; | ||
221 | struct signalfd_lockctx lk; | ||
222 | DECLARE_WAITQUEUE(wait, current); | ||
223 | |||
224 | if (count < sizeof(struct signalfd_siginfo)) | ||
225 | return -EINVAL; | ||
226 | locked = signalfd_lock(ctx, &lk); | ||
227 | if (!locked) | ||
228 | return 0; | ||
229 | res = -EAGAIN; | ||
230 | signo = dequeue_signal(lk.tsk, &ctx->sigmask, &info); | ||
231 | if (signo == 0 && !(file->f_flags & O_NONBLOCK)) { | ||
232 | add_wait_queue(&ctx->wqh, &wait); | ||
233 | for (;;) { | ||
234 | set_current_state(TASK_INTERRUPTIBLE); | ||
235 | signo = dequeue_signal(lk.tsk, &ctx->sigmask, &info); | ||
236 | if (signo != 0) | ||
237 | break; | ||
238 | if (signal_pending(current)) { | ||
239 | res = -ERESTARTSYS; | ||
240 | break; | ||
241 | } | ||
242 | signalfd_unlock(&lk); | ||
243 | schedule(); | ||
244 | locked = signalfd_lock(ctx, &lk); | ||
245 | if (unlikely(!locked)) { | ||
246 | /* | ||
247 | * Let the caller read zero byte, ala socket | ||
248 | * recv() when the peer disconnect. This test | ||
249 | * must be done before doing a dequeue_signal(), | ||
250 | * because if the sighand has been orphaned, | ||
251 | * the dequeue_signal() call is going to crash. | ||
252 | */ | ||
253 | res = 0; | ||
254 | break; | ||
255 | } | ||
256 | } | ||
257 | remove_wait_queue(&ctx->wqh, &wait); | ||
258 | __set_current_state(TASK_RUNNING); | ||
259 | } | ||
260 | if (likely(locked)) | ||
261 | signalfd_unlock(&lk); | ||
262 | if (likely(signo)) | ||
263 | res = signalfd_copyinfo((struct signalfd_siginfo __user *) buf, | ||
264 | &info); | ||
265 | |||
266 | return res; | ||
267 | } | ||
268 | |||
269 | static const struct file_operations signalfd_fops = { | ||
270 | .release = signalfd_release, | ||
271 | .poll = signalfd_poll, | ||
272 | .read = signalfd_read, | ||
273 | }; | ||
274 | |||
275 | /* | ||
276 | * Create a file descriptor that is associated with our signal | ||
277 | * state. We can pass it around to others if we want to, but | ||
278 | * it will always be _our_ signal state. | ||
279 | */ | ||
280 | asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) | ||
281 | { | ||
282 | int error; | ||
283 | sigset_t sigmask; | ||
284 | struct signalfd_ctx *ctx; | ||
285 | struct sighand_struct *sighand; | ||
286 | struct file *file; | ||
287 | struct inode *inode; | ||
288 | struct signalfd_lockctx lk; | ||
289 | |||
290 | if (sizemask != sizeof(sigset_t) || | ||
291 | copy_from_user(&sigmask, user_mask, sizeof(sigmask))) | ||
292 | return error = -EINVAL; | ||
293 | sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | ||
294 | signotset(&sigmask); | ||
295 | |||
296 | if (ufd == -1) { | ||
297 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | ||
298 | if (!ctx) | ||
299 | return -ENOMEM; | ||
300 | |||
301 | init_waitqueue_head(&ctx->wqh); | ||
302 | ctx->sigmask = sigmask; | ||
303 | ctx->tsk = current; | ||
304 | |||
305 | sighand = current->sighand; | ||
306 | /* | ||
307 | * Add this fd to the list of signal listeners. | ||
308 | */ | ||
309 | spin_lock_irq(&sighand->siglock); | ||
310 | list_add_tail(&ctx->lnk, &sighand->signalfd_list); | ||
311 | spin_unlock_irq(&sighand->siglock); | ||
312 | |||
313 | /* | ||
314 | * When we call this, the initialization must be complete, since | ||
315 | * anon_inode_getfd() will install the fd. | ||
316 | */ | ||
317 | error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", | ||
318 | &signalfd_fops, ctx); | ||
319 | if (error) | ||
320 | goto err_fdalloc; | ||
321 | } else { | ||
322 | file = fget(ufd); | ||
323 | if (!file) | ||
324 | return -EBADF; | ||
325 | ctx = file->private_data; | ||
326 | if (file->f_op != &signalfd_fops) { | ||
327 | fput(file); | ||
328 | return -EINVAL; | ||
329 | } | ||
330 | /* | ||
331 | * We need to be prepared of the fact that the sighand this fd | ||
332 | * is attached to, has been detched. In that case signalfd_lock() | ||
333 | * will return 0, and we'll just skip setting the new mask. | ||
334 | */ | ||
335 | if (signalfd_lock(ctx, &lk)) { | ||
336 | ctx->sigmask = sigmask; | ||
337 | signalfd_unlock(&lk); | ||
338 | } | ||
339 | wake_up(&ctx->wqh); | ||
340 | fput(file); | ||
341 | } | ||
342 | |||
343 | return ufd; | ||
344 | |||
345 | err_fdalloc: | ||
346 | signalfd_cleanup(ctx); | ||
347 | return error; | ||
348 | } | ||
349 | |||
diff --git a/fs/timerfd.c b/fs/timerfd.c new file mode 100644 index 000000000000..e329e37f15a8 --- /dev/null +++ b/fs/timerfd.c | |||
@@ -0,0 +1,227 @@ | |||
1 | /* | ||
2 | * fs/timerfd.c | ||
3 | * | ||
4 | * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> | ||
5 | * | ||
6 | * | ||
7 | * Thanks to Thomas Gleixner for code reviews and useful comments. | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/file.h> | ||
12 | #include <linux/poll.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/fs.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/list.h> | ||
18 | #include <linux/spinlock.h> | ||
19 | #include <linux/time.h> | ||
20 | #include <linux/hrtimer.h> | ||
21 | #include <linux/anon_inodes.h> | ||
22 | #include <linux/timerfd.h> | ||
23 | |||
24 | struct timerfd_ctx { | ||
25 | struct hrtimer tmr; | ||
26 | ktime_t tintv; | ||
27 | spinlock_t lock; | ||
28 | wait_queue_head_t wqh; | ||
29 | int expired; | ||
30 | }; | ||
31 | |||
32 | /* | ||
33 | * This gets called when the timer event triggers. We set the "expired" | ||
34 | * flag, but we do not re-arm the timer (in case it's necessary, | ||
35 | * tintv.tv64 != 0) until the timer is read. | ||
36 | */ | ||
37 | static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | ||
38 | { | ||
39 | struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr); | ||
40 | unsigned long flags; | ||
41 | |||
42 | spin_lock_irqsave(&ctx->lock, flags); | ||
43 | ctx->expired = 1; | ||
44 | wake_up_locked(&ctx->wqh); | ||
45 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
46 | |||
47 | return HRTIMER_NORESTART; | ||
48 | } | ||
49 | |||
50 | static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags, | ||
51 | const struct itimerspec *ktmr) | ||
52 | { | ||
53 | enum hrtimer_mode htmode; | ||
54 | ktime_t texp; | ||
55 | |||
56 | htmode = (flags & TFD_TIMER_ABSTIME) ? | ||
57 | HRTIMER_MODE_ABS: HRTIMER_MODE_REL; | ||
58 | |||
59 | texp = timespec_to_ktime(ktmr->it_value); | ||
60 | ctx->expired = 0; | ||
61 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); | ||
62 | hrtimer_init(&ctx->tmr, clockid, htmode); | ||
63 | ctx->tmr.expires = texp; | ||
64 | ctx->tmr.function = timerfd_tmrproc; | ||
65 | if (texp.tv64 != 0) | ||
66 | hrtimer_start(&ctx->tmr, texp, htmode); | ||
67 | } | ||
68 | |||
69 | static int timerfd_release(struct inode *inode, struct file *file) | ||
70 | { | ||
71 | struct timerfd_ctx *ctx = file->private_data; | ||
72 | |||
73 | hrtimer_cancel(&ctx->tmr); | ||
74 | kfree(ctx); | ||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | static unsigned int timerfd_poll(struct file *file, poll_table *wait) | ||
79 | { | ||
80 | struct timerfd_ctx *ctx = file->private_data; | ||
81 | unsigned int events = 0; | ||
82 | unsigned long flags; | ||
83 | |||
84 | poll_wait(file, &ctx->wqh, wait); | ||
85 | |||
86 | spin_lock_irqsave(&ctx->lock, flags); | ||
87 | if (ctx->expired) | ||
88 | events |= POLLIN; | ||
89 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
90 | |||
91 | return events; | ||
92 | } | ||
93 | |||
94 | static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, | ||
95 | loff_t *ppos) | ||
96 | { | ||
97 | struct timerfd_ctx *ctx = file->private_data; | ||
98 | ssize_t res; | ||
99 | u32 ticks = 0; | ||
100 | DECLARE_WAITQUEUE(wait, current); | ||
101 | |||
102 | if (count < sizeof(ticks)) | ||
103 | return -EINVAL; | ||
104 | spin_lock_irq(&ctx->lock); | ||
105 | res = -EAGAIN; | ||
106 | if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) { | ||
107 | __add_wait_queue(&ctx->wqh, &wait); | ||
108 | for (res = 0;;) { | ||
109 | set_current_state(TASK_INTERRUPTIBLE); | ||
110 | if (ctx->expired) { | ||
111 | res = 0; | ||
112 | break; | ||
113 | } | ||
114 | if (signal_pending(current)) { | ||
115 | res = -ERESTARTSYS; | ||
116 | break; | ||
117 | } | ||
118 | spin_unlock_irq(&ctx->lock); | ||
119 | schedule(); | ||
120 | spin_lock_irq(&ctx->lock); | ||
121 | } | ||
122 | __remove_wait_queue(&ctx->wqh, &wait); | ||
123 | __set_current_state(TASK_RUNNING); | ||
124 | } | ||
125 | if (ctx->expired) { | ||
126 | ctx->expired = 0; | ||
127 | if (ctx->tintv.tv64 != 0) { | ||
128 | /* | ||
129 | * If tintv.tv64 != 0, this is a periodic timer that | ||
130 | * needs to be re-armed. We avoid doing it in the timer | ||
131 | * callback to avoid DoS attacks specifying a very | ||
132 | * short timer period. | ||
133 | */ | ||
134 | ticks = (u32) | ||
135 | hrtimer_forward(&ctx->tmr, | ||
136 | hrtimer_cb_get_time(&ctx->tmr), | ||
137 | ctx->tintv); | ||
138 | hrtimer_restart(&ctx->tmr); | ||
139 | } else | ||
140 | ticks = 1; | ||
141 | } | ||
142 | spin_unlock_irq(&ctx->lock); | ||
143 | if (ticks) | ||
144 | res = put_user(ticks, buf) ? -EFAULT: sizeof(ticks); | ||
145 | return res; | ||
146 | } | ||
147 | |||
148 | static const struct file_operations timerfd_fops = { | ||
149 | .release = timerfd_release, | ||
150 | .poll = timerfd_poll, | ||
151 | .read = timerfd_read, | ||
152 | }; | ||
153 | |||
154 | asmlinkage long sys_timerfd(int ufd, int clockid, int flags, | ||
155 | const struct itimerspec __user *utmr) | ||
156 | { | ||
157 | int error; | ||
158 | struct timerfd_ctx *ctx; | ||
159 | struct file *file; | ||
160 | struct inode *inode; | ||
161 | struct itimerspec ktmr; | ||
162 | |||
163 | if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) | ||
164 | return -EFAULT; | ||
165 | |||
166 | if (clockid != CLOCK_MONOTONIC && | ||
167 | clockid != CLOCK_REALTIME) | ||
168 | return -EINVAL; | ||
169 | if (!timespec_valid(&ktmr.it_value) || | ||
170 | !timespec_valid(&ktmr.it_interval)) | ||
171 | return -EINVAL; | ||
172 | |||
173 | if (ufd == -1) { | ||
174 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | ||
175 | if (!ctx) | ||
176 | return -ENOMEM; | ||
177 | |||
178 | init_waitqueue_head(&ctx->wqh); | ||
179 | spin_lock_init(&ctx->lock); | ||
180 | |||
181 | timerfd_setup(ctx, clockid, flags, &ktmr); | ||
182 | |||
183 | /* | ||
184 | * When we call this, the initialization must be complete, since | ||
185 | * anon_inode_getfd() will install the fd. | ||
186 | */ | ||
187 | error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", | ||
188 | &timerfd_fops, ctx); | ||
189 | if (error) | ||
190 | goto err_tmrcancel; | ||
191 | } else { | ||
192 | file = fget(ufd); | ||
193 | if (!file) | ||
194 | return -EBADF; | ||
195 | ctx = file->private_data; | ||
196 | if (file->f_op != &timerfd_fops) { | ||
197 | fput(file); | ||
198 | return -EINVAL; | ||
199 | } | ||
200 | /* | ||
201 | * We need to stop the existing timer before reprogramming | ||
202 | * it to the new values. | ||
203 | */ | ||
204 | for (;;) { | ||
205 | spin_lock_irq(&ctx->lock); | ||
206 | if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) | ||
207 | break; | ||
208 | spin_unlock_irq(&ctx->lock); | ||
209 | cpu_relax(); | ||
210 | } | ||
211 | /* | ||
212 | * Re-program the timer to the new value ... | ||
213 | */ | ||
214 | timerfd_setup(ctx, clockid, flags, &ktmr); | ||
215 | |||
216 | spin_unlock_irq(&ctx->lock); | ||
217 | fput(file); | ||
218 | } | ||
219 | |||
220 | return ufd; | ||
221 | |||
222 | err_tmrcancel: | ||
223 | hrtimer_cancel(&ctx->tmr); | ||
224 | kfree(ctx); | ||
225 | return error; | ||
226 | } | ||
227 | |||