From c4a3e0a529ab3e65223e81681c7c6b1bc188fa58 Mon Sep 17 00:00:00 2001 From: "Bagalkote, Sreenivas" Date: Tue, 20 Sep 2005 17:46:58 -0400 Subject: [SCSI] MegaRAID SAS RAID: new driver Signed-off-by: Sreenivas Bagalkote Signed-off-by: James Bottomley --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c49d28eca561..20fb79810c3c 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -185,6 +185,7 @@ #define PCI_DEVICE_ID_LSI_61C102 0x0901 #define PCI_DEVICE_ID_LSI_63C815 0x1000 #define PCI_DEVICE_ID_LSI_SAS1064 0x0050 +#define PCI_DEVICE_ID_LSI_SAS1064R 0x0411 #define PCI_DEVICE_ID_LSI_SAS1066 0x005E #define PCI_DEVICE_ID_LSI_SAS1068 0x0054 #define PCI_DEVICE_ID_LSI_SAS1064A 0x005C @@ -559,6 +560,7 @@ #define PCI_VENDOR_ID_DELL 0x1028 #define PCI_DEVICE_ID_DELL_RACIII 0x0008 #define PCI_DEVICE_ID_DELL_RAC4 0x0012 +#define PCI_DEVICE_ID_DELL_PERC5 0x0015 #define PCI_VENDOR_ID_MATROX 0x102B #define PCI_DEVICE_ID_MATROX_MGA_2 0x0518 -- cgit v1.2.2 From 9356b8fc07dc126cd91d2b12f314d760ab48996e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Sep 2005 15:23:16 -0700 Subject: [NET]: Reorder some hot fields of struct net_device Place them on separate cache lines in SMP to lower memory bouncing between multiple CPU accessing the device. - One part is mostly used on receive path (including eth_type_trans()) (poll_list, poll, quota, weight, last_rx, dev_addr, broadcast) - One part is mostly used on queue transmit path (qdisc) (queue_lock, qdisc, qdisc_sleeping, qdisc_list, tx_queue_len) - One part is mostly used on xmit path (device) (xmit_lock, xmit_lock_owner, priv, hard_start_xmit, trans_start) 'features' is placed outside of these hot points, in a location that may be shared by all cpus (because mostly read) name_hlist is moved close to name[IFNAMSIZ] to speedup __dev_get_by_name() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 89 +++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c717907896d..368e4c825ff1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -265,6 +265,8 @@ struct net_device * the interface. */ char name[IFNAMSIZ]; + /* device name hash chain */ + struct hlist_node name_hlist; /* * I/O specific fields @@ -292,6 +294,21 @@ struct net_device /* ------- Fields preinitialized in Space.c finish here ------- */ + /* Net device features */ + unsigned long features; +#define NETIF_F_SG 1 /* Scatter/gather IO. */ +#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ +#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ +#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ +#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ +#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ +#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ +#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ +#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ +#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ +#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ +#define NETIF_F_LLTX 4096 /* LockLess TX */ + struct net_device *next_sched; /* Interface index. Unique device identifier */ @@ -316,9 +333,6 @@ struct net_device * will (read: may be cleaned up at will). */ - /* These may be needed for future network-power-down code. */ - unsigned long trans_start; /* Time (in jiffies) of last Tx */ - unsigned long last_rx; /* Time of last Rx */ unsigned short flags; /* interface flags (a la BSD) */ unsigned short gflags; @@ -328,15 +342,12 @@ struct net_device unsigned mtu; /* interface MTU value */ unsigned short type; /* interface hardware type */ unsigned short hard_header_len; /* hardware hdr length */ - void *priv; /* pointer to private data */ struct net_device *master; /* Pointer to master device of a group, * which this device is member of. */ /* Interface address info. */ - unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ - unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ @@ -346,8 +357,6 @@ struct net_device int promiscuity; int allmulti; - int watchdog_timeo; - struct timer_list watchdog_timer; /* Protocol specific pointers */ @@ -358,32 +367,62 @@ struct net_device void *ec_ptr; /* Econet specific data */ void *ax25_ptr; /* AX.25 specific data */ - struct list_head poll_list; /* Link to poll list */ +/* + * Cache line mostly used on receive path (including eth_type_trans()) + */ + struct list_head poll_list ____cacheline_aligned_in_smp; + /* Link to poll list */ + + int (*poll) (struct net_device *dev, int *quota); int quota; int weight; + unsigned long last_rx; /* Time of last Rx */ + /* Interface address info used in eth_type_trans() */ + unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast + because most packets are unicast) */ + + unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ +/* + * Cache line mostly used on queue transmit path (qdisc) + */ + /* device queue lock */ + spinlock_t queue_lock ____cacheline_aligned_in_smp; struct Qdisc *qdisc; struct Qdisc *qdisc_sleeping; - struct Qdisc *qdisc_ingress; struct list_head qdisc_list; unsigned long tx_queue_len; /* Max frames per queue allowed */ /* ingress path synchronizer */ spinlock_t ingress_lock; + struct Qdisc *qdisc_ingress; + +/* + * One part is mostly used on xmit path (device) + */ /* hard_start_xmit synchronizer */ - spinlock_t xmit_lock; + spinlock_t xmit_lock ____cacheline_aligned_in_smp; /* cpu id of processor entered to hard_start_xmit or -1, if nobody entered there. */ int xmit_lock_owner; - /* device queue lock */ - spinlock_t queue_lock; + void *priv; /* pointer to private data */ + int (*hard_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + /* These may be needed for future network-power-down code. */ + unsigned long trans_start; /* Time (in jiffies) of last Tx */ + + int watchdog_timeo; /* used by dev_watchdog() */ + struct timer_list watchdog_timer; + +/* + * refcnt is a very hot point, so align it on SMP + */ /* Number of references to this device */ - atomic_t refcnt; + atomic_t refcnt ____cacheline_aligned_in_smp; + /* delayed register/unregister */ struct list_head todo_list; - /* device name hash chain */ - struct hlist_node name_hlist; /* device index hash chain */ struct hlist_node index_hlist; @@ -396,21 +435,6 @@ struct net_device NETREG_RELEASED, /* called free_netdev */ } reg_state; - /* Net device features */ - unsigned long features; -#define NETIF_F_SG 1 /* Scatter/gather IO. */ -#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ -#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ -#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ -#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ -#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ -#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ -#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ -#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ -#define NETIF_F_LLTX 4096 /* LockLess TX */ - /* Called after device is detached from network. */ void (*uninit)(struct net_device *dev); /* Called after last user reference disappears. */ @@ -419,10 +443,7 @@ struct net_device /* Pointers to interface service routines. */ int (*open)(struct net_device *dev); int (*stop)(struct net_device *dev); - int (*hard_start_xmit) (struct sk_buff *skb, - struct net_device *dev); #define HAVE_NETDEV_POLL - int (*poll) (struct net_device *dev, int *quota); int (*hard_header) (struct sk_buff *skb, struct net_device *dev, unsigned short type, -- cgit v1.2.2 From 1f26dac32057baaf67d10b45c6b5277db862911d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Sep 2005 15:24:13 -0700 Subject: [NET]: Add Sun Cassini driver. Written by Adrian Sun (asun@darksunrising.com). Ported to 2.6.x by Tom 'spot' Callaway . Further cleaned up and integrated by David S. Miller Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 68f11ac1a314..eb36fd293b41 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -392,6 +392,7 @@ #define PCI_DEVICE_ID_NS_87560_USB 0x0012 #define PCI_DEVICE_ID_NS_83815 0x0020 #define PCI_DEVICE_ID_NS_83820 0x0022 +#define PCI_DEVICE_ID_NS_SATURN 0x0035 #define PCI_DEVICE_ID_NS_SCx200_BRIDGE 0x0500 #define PCI_DEVICE_ID_NS_SCx200_SMI 0x0501 #define PCI_DEVICE_ID_NS_SCx200_IDE 0x0502 @@ -983,6 +984,7 @@ #define PCI_DEVICE_ID_SUN_SABRE 0xa000 #define PCI_DEVICE_ID_SUN_HUMMINGBIRD 0xa001 #define PCI_DEVICE_ID_SUN_TOMATILLO 0xa801 +#define PCI_DEVICE_ID_SUN_CASSINI 0xabba #define PCI_VENDOR_ID_CMD 0x1095 #define PCI_DEVICE_ID_CMD_640 0x0640 -- cgit v1.2.2 From 2fab35d78f32fc107e1af4b1ec23f557fa20d911 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 27 Sep 2005 15:59:43 -0700 Subject: [NET]: Fix GCC4 compile error: sysctl in linux/if_ether.h The following is generated when compiling a recent (2.6.14-rc2-git5) kernel configured for ARM, with GCC4. CC init/main.o In file included from include/linux/netdevice.h:29, from include/net/sock.h:48, from init/main.c:50: include/linux/if_ether.h:114: error: array type has incomplete element type It seems that if CONFIG_SYSCTL is not set, then the compiler will throw an error due to the definition of the ether_table[] array Attached is a solution to the problem Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index fc2d4c8225aa..d21c305c6c64 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -111,7 +111,9 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) return (struct ethhdr *)skb->mac.raw; } +#ifdef CONFIG_SYSCTL extern struct ctl_table ether_table[]; #endif +#endif #endif /* _LINUX_IF_ETHER_H */ -- cgit v1.2.2 From aa55a08687059aa169d10a313c41f238c2070488 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 29 Sep 2005 19:58:53 +0400 Subject: [PATCH] fix TASK_STOPPED vs TASK_NONINTERACTIVE interaction do_signal_stop: for_each_thread(t) { if (t->state < TASK_STOPPED) ++sig->group_stop_count; } However, TASK_NONINTERACTIVE > TASK_STOPPED, so this loop will not count TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE threads. See also wait_task_stopped(), which checks ->state > TASK_STOPPED. Signed-off-by: Oleg Nesterov [ We really probably should always use the appropriate bitmasks to test task states, not do it like this. Using something like #define TASK_RUNNABLE (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | TASK_NONINTERACTIVE) and then doing "if (task->state & TASK_RUNNABLE)" or similar. But the ordering of the task states is historical, and keeping the ordering does make sense regardless. ] Signed-off-by: Linus Torvalds --- include/linux/sched.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 49e617fa0f66..afe6c61f13e5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -110,11 +110,11 @@ extern unsigned long nr_iowait(void); #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 #define TASK_UNINTERRUPTIBLE 2 -#define TASK_STOPPED 4 -#define TASK_TRACED 8 -#define EXIT_ZOMBIE 16 -#define EXIT_DEAD 32 -#define TASK_NONINTERACTIVE 64 +#define TASK_NONINTERACTIVE 4 +#define TASK_STOPPED 8 +#define TASK_TRACED 16 +#define EXIT_ZOMBIE 32 +#define EXIT_DEAD 64 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) -- cgit v1.2.2 From 4a8342d233a39ee582e9f7260e12d2f5fd194a05 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Sep 2005 15:18:21 -0700 Subject: Revert task flag re-ordering, add comments Roland points out that the flags end up having non-obvious dependencies elsewhere, so revert aa55a08687059aa169d10a313c41f238c2070488 and add some comments about why things are as they are. We'll just have to fix up the broken comparisons. Roland has a patch. Signed-off-by: Linus Torvalds --- include/linux/sched.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index afe6c61f13e5..c3ba31f210a9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -107,14 +107,26 @@ extern unsigned long nr_iowait(void); #include +/* + * Task state bitmask. NOTE! These bits are also + * encoded in fs/proc/array.c: get_task_state(). + * + * We have two separate sets of flags: task->state + * is about runnability, while task->exit_state are + * about the task exiting. Confusing, but this way + * modifying one set can't modify the other one by + * mistake. + */ #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 #define TASK_UNINTERRUPTIBLE 2 -#define TASK_NONINTERACTIVE 4 -#define TASK_STOPPED 8 -#define TASK_TRACED 16 -#define EXIT_ZOMBIE 32 -#define EXIT_DEAD 64 +#define TASK_STOPPED 4 +#define TASK_TRACED 8 +/* in tsk->exit_state */ +#define EXIT_ZOMBIE 16 +#define EXIT_DEAD 32 +/* in tsk->state again */ +#define TASK_NONINTERACTIVE 64 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) -- cgit v1.2.2 From 897f15fb587fd2772b9e7ff6ec0265057f3c3975 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 30 Sep 2005 11:58:55 -0700 Subject: [PATCH] aio: remove unlocked task_list test and resulting race Only one of the run or kick path is supposed to put an iocb on the run list. If both of them do it than one of them can end up referencing a freed iocb. The kick path could delete the task_list item from the wait queue before getting the ctx_lock and putting the iocb on the run list. The run path was testing the task_list item outside the lock so that it could catch ki_retry methods that return -EIOCBRETRY *without* putting the iocb on a wait queue and promising to call kick_iocb. This unlocked check could then race with the kick path to cause both to try and put the iocb on the run list. The patch stops the run path from testing task_list by requring that any ki_retry that returns -EIOCBRETRY *must* guarantee that kick_iocb() will be called in the future. aio_p{read,write}, the only in-tree -EIOCBRETRY users, are updated. Signed-off-by: Zach Brown Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/aio.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index a4d5af907f90..60def658b246 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -43,6 +43,40 @@ struct kioctx; #define kiocbIsKicked(iocb) test_bit(KIF_KICKED, &(iocb)->ki_flags) #define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_flags) +/* is there a better place to document function pointer methods? */ +/** + * ki_retry - iocb forward progress callback + * @kiocb: The kiocb struct to advance by performing an operation. + * + * This callback is called when the AIO core wants a given AIO operation + * to make forward progress. The kiocb argument describes the operation + * that is to be performed. As the operation proceeds, perhaps partially, + * ki_retry is expected to update the kiocb with progress made. Typically + * ki_retry is set in the AIO core and it itself calls file_operations + * helpers. + * + * ki_retry's return value determines when the AIO operation is completed + * and an event is generated in the AIO event ring. Except the special + * return values described below, the value that is returned from ki_retry + * is transferred directly into the completion ring as the operation's + * resulting status. Once this has happened ki_retry *MUST NOT* reference + * the kiocb pointer again. + * + * If ki_retry returns -EIOCBQUEUED it has made a promise that aio_complete() + * will be called on the kiocb pointer in the future. The AIO core will + * not ask the method again -- ki_retry must ensure forward progress. + * aio_complete() must be called once and only once in the future, multiple + * calls may result in undefined behaviour. + * + * If ki_retry returns -EIOCBRETRY it has made a promise that kick_iocb() + * will be called on the kiocb pointer in the future. This may happen + * through generic helpers that associate kiocb->ki_wait with a wait + * queue head that ki_retry uses via current->io_wait. It can also happen + * with custom tracking and manual calls to kick_iocb(), though that is + * discouraged. In either case, kick_iocb() must be called once and only + * once. ki_retry must ensure forward progress, the AIO core will wait + * indefinitely for kick_iocb() to be called. + */ struct kiocb { struct list_head ki_run_list; long ki_flags; -- cgit v1.2.2 From fd2e54b35bd70d11c160ded4834e2378e915356e Mon Sep 17 00:00:00 2001 From: Diego Calleja Date: Sat, 1 Oct 2005 17:00:48 +0200 Subject: [PATCH] trivial #if -> #ifdef Use '#ifdef' consistently on __KERNEL__. This was reported as bug #5340 (isn't easier to send a fix than report the bug?!) Signed-off-by: Diego Calleja Signed-off-by: Linus Torvalds --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 4ed2107bc020..2f0299a448f6 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -183,7 +183,7 @@ struct of_device_id char name[32]; char type[32]; char compatible[128]; -#if __KERNEL__ +#ifdef __KERNEL__ void *data; #else kernel_ulong_t data; -- cgit v1.2.2 From 325ed8239309cb29f10ea58c5a668058ead11479 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Oct 2005 13:57:23 -0700 Subject: [NET]: Fix packet timestamping. I've found the problem in general. It affects any 64-bit architecture. The problem occurs when you change the system time. Suppose that when you boot your system clock is forward by a day. This gets recorded down in skb_tv_base. You then wind the clock back by a day. From that point onwards the offset will be negative which essentially overflows the 32-bit variables they're stored in. In fact, why don't we just store the real time stamp in those 32-bit variables? After all, we're not going to overflow for quite a while yet. When we do overflow, we'll need a better solution of course. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2741c0c55e83..466c879f82b8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -155,8 +155,6 @@ struct skb_shared_info { #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) -extern struct timeval skb_tv_base; - struct skb_timeval { u32 off_sec; u32 off_usec; @@ -175,7 +173,7 @@ enum { * @prev: Previous buffer in list * @list: List we are on * @sk: Socket we are owned by - * @tstamp: Time we arrived stored as offset to skb_tv_base + * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on * @h: Transport layer header @@ -1255,10 +1253,6 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval * { stamp->tv_sec = skb->tstamp.off_sec; stamp->tv_usec = skb->tstamp.off_usec; - if (skb->tstamp.off_sec) { - stamp->tv_sec += skb_tv_base.tv_sec; - stamp->tv_usec += skb_tv_base.tv_usec; - } } /** @@ -1272,8 +1266,8 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval * */ static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp) { - skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec; - skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec; + skb->tstamp.off_sec = stamp->tv_sec; + skb->tstamp.off_usec = stamp->tv_usec; } extern void __net_timestamp(struct sk_buff *skb); -- cgit v1.2.2 From 81c3d5470ecc70564eb9209946730fe2be93ad06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Oct 2005 14:13:38 -0700 Subject: [INET]: speedup inet (tcp/dccp) lookups Arnaldo and I agreed it could be applied now, because I have other pending patches depending on this one (Thank you Arnaldo) (The other important patch moves skc_refcnt in a separate cache line, so that the SMP/NUMA performance doesnt suffer from cache line ping pongs) 1) First some performance data : -------------------------------- tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established() The most time critical code is : sk_for_each(sk, node, &head->chain) { if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } The sk_for_each() does use prefetch() hints but only the begining of "struct sock" is prefetched. As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far away from the begining of "struct sock", it has to bring into CPU cache cold cache line. Each iteration has to use at least 2 cache lines. This can be problematic if some chains are very long. 2) The goal ----------- The idea I had is to change things so that INET_MATCH() may return FALSE in 99% of cases only using the data already in the CPU cache, using one cache line per iteration. 3) Description of the patch --------------------------- Adds a new 'unsigned int skc_hash' field in 'struct sock_common', filling a 32 bits hole on 64 bits platform. struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; struct proto *skc_prot; }; Store in this 32 bits field the full hash, not masked by (ehash_size - 1) Using this full hash as the first comparison done in INET_MATCH permits us immediatly skip the element without touching a second cache line in case of a miss. Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to sk_hash and tw_hash) already contains the slot number if we mask with (ehash_size - 1) File include/net/inet_hashtables.h 64 bits platforms : #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) ((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 32bits platforms: #define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) - Adds a prefetch(head->chain.first) in __inet_lookup_established()/__tcp_v4_check_established() and __inet6_lookup_established()/__tcp_v6_check_established() and __dccp_v4_check_established() to bring into cache the first element of the list, before the {read|write}_lock(&head->lock); Signed-off-by: Eric Dumazet Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 +++-- include/linux/tc_ematch/tc_em_meta.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bb6f88e14061..e0b922785d98 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -372,8 +372,9 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ -#define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ - (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ +#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\ + (((__sk)->sk_hash == (__hash)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index 081b1ee8516e..e21937cf91d0 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -71,7 +71,7 @@ enum TCF_META_ID_SK_SNDBUF, TCF_META_ID_SK_ALLOCS, TCF_META_ID_SK_ROUTE_CAPS, - TCF_META_ID_SK_HASHENT, + TCF_META_ID_SK_HASH, TCF_META_ID_SK_LINGERTIME, TCF_META_ID_SK_ACK_BACKLOG, TCF_META_ID_SK_MAX_ACK_BACKLOG, -- cgit v1.2.2 From e5ed639913eea3e4783a550291775ab78dd84966 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Oct 2005 14:35:55 -0700 Subject: [IPV4]: Replace __in_dev_get with __in_dev_get_rcu/rtnl The following patch renames __in_dev_get() to __in_dev_get_rtnl() and introduces __in_dev_get_rcu() to cover the second case. 1) RCU with refcnt should use in_dev_get(). 2) RCU without refcnt should use __in_dev_get_rcu(). 3) All others must hold RTNL and use __in_dev_get_rtnl(). There is one exception in net/ipv4/route.c which is in fact a pre-existing race condition. I've marked it as such so that we remember to fix it. This patch is based on suggestions and prior work by Suzanne Wood and Paul McKenney. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 7e1e15f934f3..fd7af86151b1 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -142,13 +142,21 @@ static __inline__ int bad_mask(u32 mask, u32 addr) #define endfor_ifa(in_dev) } +static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) +{ + struct in_device *in_dev = dev->ip_ptr; + if (in_dev) + in_dev = rcu_dereference(in_dev); + return in_dev; +} + static __inline__ struct in_device * in_dev_get(const struct net_device *dev) { struct in_device *in_dev; rcu_read_lock(); - in_dev = dev->ip_ptr; + in_dev = __in_dev_get_rcu(dev); if (in_dev) atomic_inc(&in_dev->refcnt); rcu_read_unlock(); @@ -156,7 +164,7 @@ in_dev_get(const struct net_device *dev) } static __inline__ struct in_device * -__in_dev_get(const struct net_device *dev) +__in_dev_get_rtnl(const struct net_device *dev) { return (struct in_device*)dev->ip_ptr; } -- cgit v1.2.2 From ce0fe7e70a0ad11097a3773e9f3f0de3d859edf0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Oct 2005 17:43:06 +0100 Subject: [PATCH] bfs endianness annotations Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/bfs_fs.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bfs_fs.h b/include/linux/bfs_fs.h index c1237aa92e38..8ed6dfdcd783 100644 --- a/include/linux/bfs_fs.h +++ b/include/linux/bfs_fs.h @@ -20,19 +20,19 @@ /* BFS inode layout on disk */ struct bfs_inode { - __u16 i_ino; + __le16 i_ino; __u16 i_unused; - __u32 i_sblock; - __u32 i_eblock; - __u32 i_eoffset; - __u32 i_vtype; - __u32 i_mode; - __s32 i_uid; - __s32 i_gid; - __u32 i_nlink; - __u32 i_atime; - __u32 i_mtime; - __u32 i_ctime; + __le32 i_sblock; + __le32 i_eblock; + __le32 i_eoffset; + __le32 i_vtype; + __le32 i_mode; + __le32 i_uid; + __le32 i_gid; + __le32 i_nlink; + __le32 i_atime; + __le32 i_mtime; + __le32 i_ctime; __u32 i_padding[4]; }; @@ -41,17 +41,17 @@ struct bfs_inode { #define BFS_DIRS_PER_BLOCK 32 struct bfs_dirent { - __u16 ino; + __le16 ino; char name[BFS_NAMELEN]; }; /* BFS superblock layout on disk */ struct bfs_super_block { - __u32 s_magic; - __u32 s_start; - __u32 s_end; - __s32 s_from; - __s32 s_to; + __le32 s_magic; + __le32 s_start; + __le32 s_end; + __le32 s_from; + __le32 s_to; __s32 s_bfrom; __s32 s_bto; char s_fsname[6]; @@ -66,15 +66,15 @@ struct bfs_super_block { #define BFS_INO2OFF(ino) \ ((__u32)(((ino) - BFS_ROOT_INO) * sizeof(struct bfs_inode)) + BFS_BSIZE) #define BFS_NZFILESIZE(ip) \ - ((cpu_to_le32((ip)->i_eoffset) + 1) - cpu_to_le32((ip)->i_sblock) * BFS_BSIZE) + ((le32_to_cpu((ip)->i_eoffset) + 1) - le32_to_cpu((ip)->i_sblock) * BFS_BSIZE) #define BFS_FILESIZE(ip) \ ((ip)->i_sblock == 0 ? 0 : BFS_NZFILESIZE(ip)) #define BFS_FILEBLOCKS(ip) \ - ((ip)->i_sblock == 0 ? 0 : (cpu_to_le32((ip)->i_eblock) + 1) - cpu_to_le32((ip)->i_sblock)) + ((ip)->i_sblock == 0 ? 0 : (le32_to_cpu((ip)->i_eblock) + 1) - le32_to_cpu((ip)->i_sblock)) #define BFS_UNCLEAN(bfs_sb, sb) \ - ((cpu_to_le32(bfs_sb->s_from) != -1) && (cpu_to_le32(bfs_sb->s_to) != -1) && !(sb->s_flags & MS_RDONLY)) + ((le32_to_cpu(bfs_sb->s_from) != -1) && (le32_to_cpu(bfs_sb->s_to) != -1) && !(sb->s_flags & MS_RDONLY)) #endif /* _LINUX_BFS_FS_H */ -- cgit v1.2.2 From 7b5b3f3d826ea87c224c66de9c95c09e7f110ecd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Oct 2005 22:38:44 -0700 Subject: [ATM]: fix sparse gfp nocast warnings Fix implicit nocast warnings in atm code: net/atm/atm_misc.c:35:44: warning: implicit cast to nocast type drivers/atm/fore200e.c:183:33: warning: implicit cast to nocast type Also use kzalloc() instead of kmalloc(). Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 9f374cfa1b05..f1fd849e5535 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -457,7 +457,7 @@ static inline void atm_dev_put(struct atm_dev *dev) int atm_charge(struct atm_vcc *vcc,int truesize); struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size, - int gfp_flags); + unsigned int __nocast gfp_flags); int atm_pcr_goal(struct atm_trafprm *tp); void vcc_release_async(struct atm_vcc *vcc, int reply); -- cgit v1.2.2 From 17b698856328a42d5874ac87640e2cd84a824eef Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Oct 2005 22:41:16 -0700 Subject: [CONNECTOR]: fix sparse gfp nocast warnings Fix implicit nocast warnings in connector code: drivers/connector/connector.c:102:24: warning: implicit cast to nocast type drivers/connector/connector.c:114:45: warning: implicit cast to nocast type Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/connector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 86d4b0a81713..96582c9911ac 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -149,7 +149,7 @@ struct cn_dev { int cn_add_callback(struct cb_id *, char *, void (*callback) (void *)); void cn_del_callback(struct cb_id *); -int cn_netlink_send(struct cn_msg *, u32, int); +int cn_netlink_send(struct cn_msg *, u32, unsigned int __nocast); int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *)); void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); -- cgit v1.2.2 From 3d2aef668920e8d93b77f145f8f647f62abe75db Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Oct 2005 22:45:14 -0700 Subject: [TEXTSEARCH]: fix sparse gfp nocast warnings Fix nocast sparse warnings: include/linux/textsearch.h:165:57: warning: implicit cast to nocast type Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/textsearch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 941f45ac117a..1a4990e448e9 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -158,7 +158,8 @@ extern unsigned int textsearch_find_continuous(struct ts_config *, #define TS_PRIV_ALIGNTO 8 #define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1)) -static inline struct ts_config *alloc_ts_config(size_t payload, int gfp_mask) +static inline struct ts_config *alloc_ts_config(size_t payload, + unsigned int __nocast gfp_mask) { struct ts_config *conf; -- cgit v1.2.2 From 0f21ba7cc3320d33459ecb3f538f1a42040c29cd Mon Sep 17 00:00:00 2001 From: Eric Kinzie Date: Thu, 6 Oct 2005 22:19:28 -0700 Subject: [ATM]: add support for LECS addresses learned from network From: Eric Kinzie Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- include/linux/atmdev.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index f1fd849e5535..aca9b344bd35 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -76,6 +76,13 @@ struct atm_dev_stats { /* set interface ESI */ #define ATM_SETESIF _IOW('a',ATMIOC_ITF+13,struct atmif_sioc) /* force interface ESI */ +#define ATM_ADDLECSADDR _IOW('a', ATMIOC_ITF+14, struct atmif_sioc) + /* register a LECS address */ +#define ATM_DELLECSADDR _IOW('a', ATMIOC_ITF+15, struct atmif_sioc) + /* unregister a LECS address */ +#define ATM_GETLECSADDR _IOW('a', ATMIOC_ITF+16, struct atmif_sioc) + /* retrieve LECS address(es) */ + #define ATM_GETSTAT _IOW('a',ATMIOC_SARCOM+0,struct atmif_sioc) /* get AAL layer statistics */ #define ATM_GETSTATZ _IOW('a',ATMIOC_SARCOM+1,struct atmif_sioc) @@ -328,6 +335,8 @@ struct atm_dev_addr { struct list_head entry; /* next address */ }; +enum atm_addr_type_t { ATM_ADDR_LOCAL, ATM_ADDR_LECS }; + struct atm_dev { const struct atmdev_ops *ops; /* device operations; NULL if unused */ const struct atmphy_ops *phy; /* PHY operations, may be undefined */ @@ -338,6 +347,7 @@ struct atm_dev { void *phy_data; /* private PHY date */ unsigned long flags; /* device flags (ATM_DF_*) */ struct list_head local; /* local ATM addresses */ + struct list_head lecs; /* LECS ATM addresses learned via ILMI */ unsigned char esi[ESI_LEN]; /* ESI ("MAC" addr) */ struct atm_cirange ci_range; /* VPI/VCI range */ struct k_atm_dev_stats stats; /* statistics */ -- cgit v1.2.2 From 468ed2b0c85ec4310b429e60358213b6d077289e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Oct 2005 15:07:38 +0100 Subject: [PATCH] Keys: Split key permissions checking into a .c file The attached patch splits key permissions checking out of key-ui.h and moves it into a .c file. It's quite large and called quite a lot, and it's about to get bigger with the addition of LSM support for keys... key_any_permission() is also discarded as it's no longer used. Signed-Off-By: David Howells Signed-off-by: Linus Torvalds --- include/linux/key-ui.h | 91 +++----------------------------------------------- 1 file changed, 5 insertions(+), 86 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h index 918c34a8347e..7a2e332067c3 100644 --- a/include/linux/key-ui.h +++ b/include/linux/key-ui.h @@ -38,97 +38,16 @@ struct keyring_list { struct key *keys[0]; }; - /* * check to see whether permission is granted to use a key in the desired way */ -static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) -{ - struct key *key = key_ref_to_ptr(key_ref); - key_perm_t kperm; - - if (is_key_possessed(key_ref)) - kperm = key->perm >> 24; - else if (key->uid == current->fsuid) - kperm = key->perm >> 16; - else if (key->gid != -1 && - key->perm & KEY_GRP_ALL && - in_group_p(key->gid) - ) - kperm = key->perm >> 8; - else - kperm = key->perm; - - kperm = kperm & perm & KEY_ALL; - - return kperm == perm; -} - -/* - * check to see whether permission is granted to use a key in at least one of - * the desired ways - */ -static inline int key_any_permission(const key_ref_t key_ref, key_perm_t perm) -{ - struct key *key = key_ref_to_ptr(key_ref); - key_perm_t kperm; - - if (is_key_possessed(key_ref)) - kperm = key->perm >> 24; - else if (key->uid == current->fsuid) - kperm = key->perm >> 16; - else if (key->gid != -1 && - key->perm & KEY_GRP_ALL && - in_group_p(key->gid) - ) - kperm = key->perm >> 8; - else - kperm = key->perm; +extern int key_task_permission(const key_ref_t key_ref, + struct task_struct *context, + key_perm_t perm); - kperm = kperm & perm & KEY_ALL; - - return kperm != 0; -} - -static inline int key_task_groups_search(struct task_struct *tsk, gid_t gid) -{ - int ret; - - task_lock(tsk); - ret = groups_search(tsk->group_info, gid); - task_unlock(tsk); - return ret; -} - -static inline int key_task_permission(const key_ref_t key_ref, - struct task_struct *context, - key_perm_t perm) +static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) { - struct key *key = key_ref_to_ptr(key_ref); - key_perm_t kperm; - - if (is_key_possessed(key_ref)) { - kperm = key->perm >> 24; - } - else if (key->uid == context->fsuid) { - kperm = key->perm >> 16; - } - else if (key->gid != -1 && - key->perm & KEY_GRP_ALL && ( - key->gid == context->fsgid || - key_task_groups_search(context, key->gid) - ) - ) { - kperm = key->perm >> 8; - } - else { - kperm = key->perm; - } - - kperm = kperm & perm & KEY_ALL; - - return kperm == perm; - + return key_task_permission(key_ref, current, perm); } extern key_ref_t lookup_user_key(struct task_struct *context, -- cgit v1.2.2 From dd0fc66fb33cd610bc1a5db8a5e232d34879b4d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Oct 2005 07:46:04 +0100 Subject: [PATCH] gfp flags annotations - part 1 - added typedef unsigned int __nocast gfp_t; - replaced __nocast uses for gfp flags with gfp_t - it gives exactly the same warnings as far as sparse is concerned, doesn't change generated code (from gcc point of view we replaced unsigned int with typedef) and documents what's going on far better. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/atmdev.h | 2 +- include/linux/bio.h | 6 +++--- include/linux/buffer_head.h | 2 +- include/linux/connector.h | 2 +- include/linux/cpuset.h | 5 ++--- include/linux/dmapool.h | 2 +- include/linux/gfp.h | 14 +++++++------- include/linux/jbd.h | 2 +- include/linux/kfifo.h | 4 ++-- include/linux/mempool.h | 9 ++++----- include/linux/netlink.h | 2 +- include/linux/pagemap.h | 2 +- include/linux/posix_acl.h | 6 +++--- include/linux/radix-tree.h | 2 +- include/linux/security.h | 6 ++---- include/linux/skbuff.h | 28 ++++++++++++++-------------- include/linux/slab.h | 19 +++++++++---------- include/linux/string.h | 2 +- include/linux/swap.h | 2 +- include/linux/textsearch.h | 2 +- include/linux/types.h | 4 ++++ include/linux/vmalloc.h | 4 ++-- 22 files changed, 63 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index aca9b344bd35..e7d0593bb576 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -467,7 +467,7 @@ static inline void atm_dev_put(struct atm_dev *dev) int atm_charge(struct atm_vcc *vcc,int truesize); struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size, - unsigned int __nocast gfp_flags); + gfp_t gfp_flags); int atm_pcr_goal(struct atm_trafprm *tp); void vcc_release_async(struct atm_vcc *vcc, int reply); diff --git a/include/linux/bio.h b/include/linux/bio.h index 6e1c79c8b6bf..3344b4e8e43a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -276,8 +276,8 @@ extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(int, int, int); extern void bioset_free(struct bio_set *); -extern struct bio *bio_alloc(unsigned int __nocast, int); -extern struct bio *bio_alloc_bioset(unsigned int __nocast, int, struct bio_set *); +extern struct bio *bio_alloc(gfp_t, int); +extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); extern void bio_free(struct bio *, struct bio_set *); @@ -287,7 +287,7 @@ extern int bio_phys_segments(struct request_queue *, struct bio *); extern int bio_hw_segments(struct request_queue *, struct bio *); extern void __bio_clone(struct bio *, struct bio *); -extern struct bio *bio_clone(struct bio *, unsigned int __nocast); +extern struct bio *bio_clone(struct bio *, gfp_t); extern void bio_init(struct bio *); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 90828493791f..6a1d154c0825 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -172,7 +172,7 @@ void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, int size); struct buffer_head *__bread(struct block_device *, sector_t block, int size); -struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags); +struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void FASTCALL(unlock_buffer(struct buffer_head *bh)); void FASTCALL(__lock_buffer(struct buffer_head *bh)); diff --git a/include/linux/connector.h b/include/linux/connector.h index 96582c9911ac..95952cc1f525 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -149,7 +149,7 @@ struct cn_dev { int cn_add_callback(struct cb_id *, char *, void (*callback) (void *)); void cn_del_callback(struct cb_id *); -int cn_netlink_send(struct cn_msg *, u32, unsigned int __nocast); +int cn_netlink_send(struct cn_msg *, u32, gfp_t); int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *)); void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 24062a1dbf61..6e2deef96b34 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -23,7 +23,7 @@ void cpuset_init_current_mems_allowed(void); void cpuset_update_current_mems_allowed(void); void cpuset_restrict_to_mems_allowed(unsigned long *nodes); int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); -extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask); +extern int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); extern int cpuset_excl_nodes_overlap(const struct task_struct *p); extern struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); @@ -49,8 +49,7 @@ static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) return 1; } -static inline int cpuset_zone_allowed(struct zone *z, - unsigned int __nocast gfp_mask) +static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { return 1; } diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index 4932ee5c77f0..76f12f46db7f 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -19,7 +19,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev, void dma_pool_destroy(struct dma_pool *pool); -void *dma_pool_alloc(struct dma_pool *pool, unsigned int __nocast mem_flags, +void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle); void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t addr); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 4dc990f3b5cc..3010e172394d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -85,9 +85,9 @@ static inline void arch_free_page(struct page *page, int order) { } #endif extern struct page * -FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *)); +FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *)); -static inline struct page *alloc_pages_node(int nid, unsigned int __nocast gfp_mask, +static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { if (unlikely(order >= MAX_ORDER)) @@ -98,17 +98,17 @@ static inline struct page *alloc_pages_node(int nid, unsigned int __nocast gfp_m } #ifdef CONFIG_NUMA -extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask, unsigned order); +extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order); static inline struct page * -alloc_pages(unsigned int __nocast gfp_mask, unsigned int order) +alloc_pages(gfp_t gfp_mask, unsigned int order) { if (unlikely(order >= MAX_ORDER)) return NULL; return alloc_pages_current(gfp_mask, order); } -extern struct page *alloc_page_vma(unsigned __nocast gfp_mask, +extern struct page *alloc_page_vma(gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr); #else #define alloc_pages(gfp_mask, order) \ @@ -117,8 +117,8 @@ extern struct page *alloc_page_vma(unsigned __nocast gfp_mask, #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask, unsigned int order)); -extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask)); +extern unsigned long FASTCALL(__get_free_pages(gfp_t gfp_mask, unsigned int order)); +extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask)); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask),0) diff --git a/include/linux/jbd.h b/include/linux/jbd.h index de097269bd7f..ff853b3173c6 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -935,7 +935,7 @@ void journal_put_journal_head(struct journal_head *jh); */ extern kmem_cache_t *jbd_handle_cache; -static inline handle_t *jbd_alloc_handle(unsigned int __nocast gfp_flags) +static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags) { return kmem_cache_alloc(jbd_handle_cache, gfp_flags); } diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index c27cd428d269..48eccd865bd8 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -35,8 +35,8 @@ struct kfifo { }; extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, - unsigned int __nocast gfp_mask, spinlock_t *lock); -extern struct kfifo *kfifo_alloc(unsigned int size, unsigned int __nocast gfp_mask, + gfp_t gfp_mask, spinlock_t *lock); +extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock); extern void kfifo_free(struct kfifo *fifo); extern unsigned int __kfifo_put(struct kfifo *fifo, diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 796220ce47cc..f2427d7394b0 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -6,7 +6,7 @@ #include -typedef void * (mempool_alloc_t)(unsigned int __nocast gfp_mask, void *pool_data); +typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data); typedef void (mempool_free_t)(void *element, void *pool_data); typedef struct mempool_s { @@ -26,17 +26,16 @@ extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, int nid); -extern int mempool_resize(mempool_t *pool, int new_min_nr, - unsigned int __nocast gfp_mask); +extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask); extern void mempool_destroy(mempool_t *pool); -extern void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask); +extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask); extern void mempool_free(void *element, mempool_t *pool); /* * A mempool_alloc_t and mempool_free_t that get the memory from * a slab that is passed in through pool_data. */ -void *mempool_alloc_slab(unsigned int __nocast gfp_mask, void *pool_data); +void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); #endif /* _LINUX_MEMPOOL_H */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index bdebdc564506..ba25ca874c20 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -131,7 +131,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (* extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, - __u32 group, unsigned int __nocast allocation); + __u32 group, gfp_t allocation); extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d9a25647a295..acbf31c154f8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -19,7 +19,7 @@ #define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ #define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ -static inline unsigned int __nocast mapping_gfp_mask(struct address_space * mapping) +static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->flags & __GFP_BITS_MASK; } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 4caedddaa033..4bc241290c24 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -71,11 +71,11 @@ posix_acl_release(struct posix_acl *acl) /* posix_acl.c */ -extern struct posix_acl *posix_acl_alloc(int, unsigned int __nocast); -extern struct posix_acl *posix_acl_clone(const struct posix_acl *, unsigned int __nocast); +extern struct posix_acl *posix_acl_alloc(int, gfp_t); +extern struct posix_acl *posix_acl_clone(const struct posix_acl *, gfp_t); extern int posix_acl_valid(const struct posix_acl *); extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); -extern struct posix_acl *posix_acl_from_mode(mode_t, unsigned int __nocast); +extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, mode_t *); extern int posix_acl_create_masq(struct posix_acl *, mode_t *); extern int posix_acl_chmod_masq(struct posix_acl *, mode_t); diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 9c51917b1cce..045d4761febc 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -50,7 +50,7 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long); unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items); -int radix_tree_preload(unsigned int __nocast gfp_mask); +int radix_tree_preload(gfp_t gfp_mask); void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, int tag); diff --git a/include/linux/security.h b/include/linux/security.h index 0e43460d374e..627382e74057 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2634,8 +2634,7 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return security_ops->socket_getpeersec(sock, optval, optlen, len); } -static inline int security_sk_alloc(struct sock *sk, int family, - unsigned int __nocast priority) +static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority) { return security_ops->sk_alloc_security(sk, family, priority); } @@ -2752,8 +2751,7 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return -ENOPROTOOPT; } -static inline int security_sk_alloc(struct sock *sk, int family, - unsigned int __nocast priority) +static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority) { return 0; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 466c879f82b8..8f5d9e7f8734 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -302,37 +302,37 @@ struct sk_buff { extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, - unsigned int __nocast priority, int fclone); + gfp_t priority, int fclone); static inline struct sk_buff *alloc_skb(unsigned int size, - unsigned int __nocast priority) + gfp_t priority) { return __alloc_skb(size, priority, 0); } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, - unsigned int __nocast priority) + gfp_t priority) { return __alloc_skb(size, priority, 1); } extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, - unsigned int __nocast priority); + gfp_t priority); extern void kfree_skbmem(struct sk_buff *skb); extern struct sk_buff *skb_clone(struct sk_buff *skb, - unsigned int __nocast priority); + gfp_t priority); extern struct sk_buff *skb_copy(const struct sk_buff *skb, - unsigned int __nocast priority); + gfp_t priority); extern struct sk_buff *pskb_copy(struct sk_buff *skb, - unsigned int __nocast gfp_mask); + gfp_t gfp_mask); extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, - unsigned int __nocast gfp_mask); + gfp_t gfp_mask); extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, - unsigned int __nocast priority); + gfp_t priority); extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) kfree_skb(a) extern void skb_over_panic(struct sk_buff *skb, int len, @@ -484,7 +484,7 @@ static inline int skb_shared(const struct sk_buff *skb) * NULL is returned on a memory allocation failure. */ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, - unsigned int __nocast pri) + gfp_t pri) { might_sleep_if(pri & __GFP_WAIT); if (skb_shared(skb)) { @@ -516,7 +516,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, * %NULL is returned on a memory allocation failure. */ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, - unsigned int __nocast pri) + gfp_t pri) { might_sleep_if(pri & __GFP_WAIT); if (skb_cloned(skb)) { @@ -1017,7 +1017,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) * %NULL is returned in there is no free memory. */ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct sk_buff *skb = alloc_skb(length + 16, gfp_mask); if (likely(skb)) @@ -1130,8 +1130,8 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i, * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ -extern int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp); -static inline int skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp) +extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp); +static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) { return __skb_linearize(skb, gfp); } diff --git a/include/linux/slab.h b/include/linux/slab.h index 1f356f3bbc64..5fc04a16ecb0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -61,11 +61,11 @@ extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned lo void (*)(void *, kmem_cache_t *, unsigned long)); extern int kmem_cache_destroy(kmem_cache_t *); extern int kmem_cache_shrink(kmem_cache_t *); -extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast); +extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); extern const char *kmem_cache_name(kmem_cache_t *); -extern kmem_cache_t *kmem_find_general_cachep(size_t size, unsigned int __nocast gfpflags); +extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags); /* Size description struct for general caches. */ struct cache_sizes { @@ -74,9 +74,9 @@ struct cache_sizes { kmem_cache_t *cs_dmacachep; }; extern struct cache_sizes malloc_sizes[]; -extern void *__kmalloc(size_t, unsigned int __nocast); +extern void *__kmalloc(size_t, gfp_t); -static inline void *kmalloc(size_t size, unsigned int __nocast flags) +static inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { int i = 0; @@ -99,7 +99,7 @@ found: return __kmalloc(size, flags); } -extern void *kzalloc(size_t, unsigned int __nocast); +extern void *kzalloc(size_t, gfp_t); /** * kcalloc - allocate memory for an array. The memory is set to zero. @@ -107,7 +107,7 @@ extern void *kzalloc(size_t, unsigned int __nocast); * @size: element size. * @flags: the type of memory to allocate. */ -static inline void *kcalloc(size_t n, size_t size, unsigned int __nocast flags) +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) { if (n != 0 && size > INT_MAX / n) return NULL; @@ -118,15 +118,14 @@ extern void kfree(const void *); extern unsigned int ksize(const void *); #ifdef CONFIG_NUMA -extern void *kmem_cache_alloc_node(kmem_cache_t *, - unsigned int __nocast flags, int node); -extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node); +extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); +extern void *kmalloc_node(size_t size, gfp_t flags, int node); #else static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node) { return kmem_cache_alloc(cachep, flags); } -static inline void *kmalloc_node(size_t size, unsigned int __nocast flags, int node) +static inline void *kmalloc_node(size_t size, gfp_t flags, int node) { return kmalloc(size, flags); } diff --git a/include/linux/string.h b/include/linux/string.h index dab2652acbd8..369be3264a55 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -88,7 +88,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif -extern char *kstrdup(const char *s, unsigned int __nocast gfp); +extern char *kstrdup(const char *s, gfp_t gfp); #ifdef __cplusplus } diff --git a/include/linux/swap.h b/include/linux/swap.h index 3c9ff0048153..a7bf1a3b1496 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -147,7 +147,7 @@ struct swap_list_t { #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) /* linux/mm/oom_kill.c */ -extern void out_of_memory(unsigned int __nocast gfp_mask, int order); +extern void out_of_memory(gfp_t gfp_mask, int order); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 1a4990e448e9..515046d1b2f4 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -159,7 +159,7 @@ extern unsigned int textsearch_find_continuous(struct ts_config *, #define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1)) static inline struct ts_config *alloc_ts_config(size_t payload, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct ts_config *conf; diff --git a/include/linux/types.h b/include/linux/types.h index 2b678c22ca4a..0aee34f9da9f 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -165,6 +165,10 @@ typedef __u64 __bitwise __le64; typedef __u64 __bitwise __be64; #endif +#ifdef __KERNEL__ +typedef unsigned __nocast gfp_t; +#endif + struct ustat { __kernel_daddr_t f_tfree; __kernel_ino_t f_tinode; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b244f69ef682..3701a0673d2c 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -34,8 +34,8 @@ struct vm_struct { extern void *vmalloc(unsigned long size); extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); -extern void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, pgprot_t prot); -extern void *__vmalloc_area(struct vm_struct *area, unsigned int __nocast gfp_mask, pgprot_t prot); +extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); +extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot); extern void vfree(void *addr); extern void *vmap(struct page **pages, unsigned int count, -- cgit v1.2.2 From 3dd083255ddcfa87751fa8e32f61a9547a15a541 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 9 Oct 2005 21:19:40 +0200 Subject: [PATCH] x86_64: Set up safe page tables during resume The following patch makes swsusp avoid the possible temporary corruption of page translation tables during resume on x86-64. This is achieved by creating a copy of the relevant page tables that will not be modified by swsusp and can be safely used by it on resume. The problem is that during resume on x86-64 swsusp may temporarily corrupt the page tables used for the direct mapping of RAM. If that happens, a page fault occurs and cannot be handled properly, which leads to the solid hang of the affected system. This leads to the loss of the system's state from before suspend and may result in the loss of data or the corruption of filesystems, so it is a serious issue. Also, it appears to happen quite often (for me, as often as 50% of the time). The problem is related to the fact that (at least) one of the PMD entries used in the direct memory mapping (starting at PAGE_OFFSET) points to a page table the physical address of which is much greater than the physical address of the PMD entry itself. Moreover, unfortunately, the physical address of the page table before suspend (i.e. the one stored in the suspend image) happens to be different to the physical address of the corresponding page table used during resume (i.e. the one that is valid right before swsusp_arch_resume() in arch/x86_64/kernel/suspend_asm.S is executed). Thus while the image is restored, the "offending" PMD entry gets overwritten, so it does not point to the right physical address any more (i.e. there's no page table at the address pointed to by it, because it points to the address the page table has been at during suspend). Consequently, if the PMD entry is used later on, and it _is_ used in the process of copying the image pages, a page fault occurs, but it cannot be handled in the normal way and the system hangs. In principle we can call create_resume_mapping() from swsusp_arch_resume() (ie. from suspend_asm.S), but then the memory allocations in create_resume_mapping(), resume_pud_mapping(), and resume_pmd_mapping() must be made carefully so that we use _only_ NosaveFree pages in them (the other pages are overwritten by the loop in swsusp_arch_resume()). Additionally, we are in atomic context at that time, so we cannot use GFP_KERNEL. Moreover, if one of the allocations fails, we should free all of the allocated pages, so we need to trace them somehow. All of this is done in the appended patch, except that the functions populating the page tables are located in arch/x86_64/kernel/suspend.c rather than in init.c. It may be done in a more elegan way in the future, with the help of some swsusp patches that are in the works now. [AK: move some externs into headers, renamed a function] Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index f2e96fdfaae0..ad15a54806d8 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -71,5 +71,7 @@ void restore_processor_state(void); struct saved_context; void __save_processor_state(struct saved_context *ctxt); void __restore_processor_state(struct saved_context *ctxt); +extern unsigned long get_usable_page(unsigned gfp_mask); +extern void free_eaten_memory(void); #endif /* _LINUX_SWSUSP_H */ -- cgit v1.2.2 From 46113830a18847cff8da73005e57bc49c2f95a56 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 19:44:29 +0200 Subject: [PATCH] Fix signal sending in usbdevio on async URB completion If a process issues an URB from userspace and (starts to) terminate before the URB comes back, we run into the issue described above. This is because the urb saves a pointer to "current" when it is posted to the device, but there's no guarantee that this pointer is still valid afterwards. In fact, there are three separate issues: 1) the pointer to "current" can become invalid, since the task could be completely gone when the URB completion comes back from the device. 2) Even if the saved task pointer is still pointing to a valid task_struct, task_struct->sighand could have gone meanwhile. 3) Even if the process is perfectly fine, permissions may have changed, and we can no longer send it a signal. So what we do instead, is to save the PID and uid's of the process, and introduce a new kill_proc_info_as_uid() function. Signed-off-by: Harald Welte [ Fixed up types and added symbol exports ] Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c3ba31f210a9..27519df0f987 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1018,6 +1018,7 @@ extern int force_sig_info(int, struct siginfo *, struct task_struct *); extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp); extern int kill_pg_info(int, struct siginfo *, pid_t); extern int kill_proc_info(int, struct siginfo *, pid_t); +extern int kill_proc_info_as_uid(int, struct siginfo *, pid_t, uid_t, uid_t); extern void do_notify_parent(struct task_struct *, int); extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); -- cgit v1.2.2