aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wright <chrisw@sous-sol.org>2010-05-16 04:05:45 -0400
committerDavid S. Miller <davem@davemloft.net>2010-05-16 04:05:45 -0400
commitc02db8c6290bb992442fec1407643c94cc414375 (patch)
tree05ec7bdd7efc005cd9e3905e0d6f6469bf08668b
parent55fa0cfd7c3ac2ae34cac7dca2e3fbcfe661e6c3 (diff)
rtnetlink: make SR-IOV VF interface symmetric
Now we have a set of nested attributes: IFLA_VFINFO_LIST (NESTED) IFLA_VF_INFO (NESTED) IFLA_VF_MAC IFLA_VF_VLAN IFLA_VF_TX_RATE This allows a single set to operate on multiple attributes if desired. Among other things, it means a dump can be replayed to set state. The current interface has yet to be released, so this seems like something to consider for 2.6.34. Signed-off-by: Chris Wright <chrisw@sous-sol.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/if_link.h23
-rw-r--r--net/core/rtnetlink.c159
2 files changed, 129 insertions, 53 deletions
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c9bf92cd7653..d94963b379d9 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -79,10 +79,7 @@ enum {
79 IFLA_NET_NS_PID, 79 IFLA_NET_NS_PID,
80 IFLA_IFALIAS, 80 IFLA_IFALIAS,
81 IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ 81 IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */
82 IFLA_VF_MAC, /* Hardware queue specific attributes */ 82 IFLA_VFINFO_LIST,
83 IFLA_VF_VLAN,
84 IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */
85 IFLA_VFINFO,
86 __IFLA_MAX 83 __IFLA_MAX
87}; 84};
88 85
@@ -203,6 +200,24 @@ enum macvlan_mode {
203 200
204/* SR-IOV virtual function managment section */ 201/* SR-IOV virtual function managment section */
205 202
203enum {
204 IFLA_VF_INFO_UNSPEC,
205 IFLA_VF_INFO,
206 __IFLA_VF_INFO_MAX,
207};
208
209#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
210
211enum {
212 IFLA_VF_UNSPEC,
213 IFLA_VF_MAC, /* Hardware queue specific attributes */
214 IFLA_VF_VLAN,
215 IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */
216 __IFLA_VF_MAX,
217};
218
219#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
220
206struct ifla_vf_mac { 221struct ifla_vf_mac {
207 __u32 vf; 222 __u32 vf;
208 __u8 mac[32]; /* MAX_ADDR_LEN */ 223 __u8 mac[32]; /* MAX_ADDR_LEN */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fe776c9ddeca..31e85d327aa2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -602,12 +602,19 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
602 a->tx_compressed = b->tx_compressed; 602 a->tx_compressed = b->tx_compressed;
603}; 603};
604 604
605/* All VF info */
605static inline int rtnl_vfinfo_size(const struct net_device *dev) 606static inline int rtnl_vfinfo_size(const struct net_device *dev)
606{ 607{
607 if (dev->dev.parent && dev_is_pci(dev->dev.parent)) 608 if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
608 return dev_num_vf(dev->dev.parent) * 609
609 sizeof(struct ifla_vf_info); 610 int num_vfs = dev_num_vf(dev->dev.parent);
610 else 611 size_t size = nlmsg_total_size(sizeof(struct nlattr));
612 size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
613 size += num_vfs * (sizeof(struct ifla_vf_mac) +
614 sizeof(struct ifla_vf_vlan) +
615 sizeof(struct ifla_vf_tx_rate));
616 return size;
617 } else
611 return 0; 618 return 0;
612} 619}
613 620
@@ -629,7 +636,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
629 + nla_total_size(1) /* IFLA_OPERSTATE */ 636 + nla_total_size(1) /* IFLA_OPERSTATE */
630 + nla_total_size(1) /* IFLA_LINKMODE */ 637 + nla_total_size(1) /* IFLA_LINKMODE */
631 + nla_total_size(4) /* IFLA_NUM_VF */ 638 + nla_total_size(4) /* IFLA_NUM_VF */
632 + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */ 639 + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
633 + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ 640 + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
634} 641}
635 642
@@ -700,14 +707,37 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
700 707
701 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { 708 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
702 int i; 709 int i;
703 struct ifla_vf_info ivi;
704 710
705 NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); 711 struct nlattr *vfinfo, *vf;
706 for (i = 0; i < dev_num_vf(dev->dev.parent); i++) { 712 int num_vfs = dev_num_vf(dev->dev.parent);
713
714 NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
715 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
716 if (!vfinfo)
717 goto nla_put_failure;
718 for (i = 0; i < num_vfs; i++) {
719 struct ifla_vf_info ivi;
720 struct ifla_vf_mac vf_mac;
721 struct ifla_vf_vlan vf_vlan;
722 struct ifla_vf_tx_rate vf_tx_rate;
707 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) 723 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
708 break; 724 break;
709 NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi); 725 vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
726 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
727 vf_vlan.vlan = ivi.vlan;
728 vf_vlan.qos = ivi.qos;
729 vf_tx_rate.rate = ivi.tx_rate;
730 vf = nla_nest_start(skb, IFLA_VF_INFO);
731 if (!vf) {
732 nla_nest_cancel(skb, vfinfo);
733 goto nla_put_failure;
734 }
735 NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
736 NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
737 NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
738 nla_nest_end(skb, vf);
710 } 739 }
740 nla_nest_end(skb, vfinfo);
711 } 741 }
712 if (dev->rtnl_link_ops) { 742 if (dev->rtnl_link_ops) {
713 if (rtnl_link_fill(skb, dev) < 0) 743 if (rtnl_link_fill(skb, dev) < 0)
@@ -769,12 +799,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
769 [IFLA_LINKINFO] = { .type = NLA_NESTED }, 799 [IFLA_LINKINFO] = { .type = NLA_NESTED },
770 [IFLA_NET_NS_PID] = { .type = NLA_U32 }, 800 [IFLA_NET_NS_PID] = { .type = NLA_U32 },
771 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, 801 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
772 [IFLA_VF_MAC] = { .type = NLA_BINARY, 802 [IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
773 .len = sizeof(struct ifla_vf_mac) },
774 [IFLA_VF_VLAN] = { .type = NLA_BINARY,
775 .len = sizeof(struct ifla_vf_vlan) },
776 [IFLA_VF_TX_RATE] = { .type = NLA_BINARY,
777 .len = sizeof(struct ifla_vf_tx_rate) },
778}; 803};
779EXPORT_SYMBOL(ifla_policy); 804EXPORT_SYMBOL(ifla_policy);
780 805
@@ -783,6 +808,19 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
783 [IFLA_INFO_DATA] = { .type = NLA_NESTED }, 808 [IFLA_INFO_DATA] = { .type = NLA_NESTED },
784}; 809};
785 810
811static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
812 [IFLA_VF_INFO] = { .type = NLA_NESTED },
813};
814
815static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
816 [IFLA_VF_MAC] = { .type = NLA_BINARY,
817 .len = sizeof(struct ifla_vf_mac) },
818 [IFLA_VF_VLAN] = { .type = NLA_BINARY,
819 .len = sizeof(struct ifla_vf_vlan) },
820 [IFLA_VF_TX_RATE] = { .type = NLA_BINARY,
821 .len = sizeof(struct ifla_vf_tx_rate) },
822};
823
786struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) 824struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
787{ 825{
788 struct net *net; 826 struct net *net;
@@ -812,6 +850,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
812 return 0; 850 return 0;
813} 851}
814 852
853static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
854{
855 int rem, err = -EINVAL;
856 struct nlattr *vf;
857 const struct net_device_ops *ops = dev->netdev_ops;
858
859 nla_for_each_nested(vf, attr, rem) {
860 switch (nla_type(vf)) {
861 case IFLA_VF_MAC: {
862 struct ifla_vf_mac *ivm;
863 ivm = nla_data(vf);
864 err = -EOPNOTSUPP;
865 if (ops->ndo_set_vf_mac)
866 err = ops->ndo_set_vf_mac(dev, ivm->vf,
867 ivm->mac);
868 break;
869 }
870 case IFLA_VF_VLAN: {
871 struct ifla_vf_vlan *ivv;
872 ivv = nla_data(vf);
873 err = -EOPNOTSUPP;
874 if (ops->ndo_set_vf_vlan)
875 err = ops->ndo_set_vf_vlan(dev, ivv->vf,
876 ivv->vlan,
877 ivv->qos);
878 break;
879 }
880 case IFLA_VF_TX_RATE: {
881 struct ifla_vf_tx_rate *ivt;
882 ivt = nla_data(vf);
883 err = -EOPNOTSUPP;
884 if (ops->ndo_set_vf_tx_rate)
885 err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
886 ivt->rate);
887 break;
888 }
889 default:
890 err = -EINVAL;
891 break;
892 }
893 if (err)
894 break;
895 }
896 return err;
897}
898
815static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, 899static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
816 struct nlattr **tb, char *ifname, int modified) 900 struct nlattr **tb, char *ifname, int modified)
817{ 901{
@@ -942,40 +1026,17 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
942 write_unlock_bh(&dev_base_lock); 1026 write_unlock_bh(&dev_base_lock);
943 } 1027 }
944 1028
945 if (tb[IFLA_VF_MAC]) { 1029 if (tb[IFLA_VFINFO_LIST]) {
946 struct ifla_vf_mac *ivm; 1030 struct nlattr *attr;
947 ivm = nla_data(tb[IFLA_VF_MAC]); 1031 int rem;
948 err = -EOPNOTSUPP; 1032 nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
949 if (ops->ndo_set_vf_mac) 1033 if (nla_type(attr) != IFLA_VF_INFO)
950 err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac); 1034 goto errout;
951 if (err < 0) 1035 err = do_setvfinfo(dev, attr);
952 goto errout; 1036 if (err < 0)
953 modified = 1; 1037 goto errout;
954 } 1038 modified = 1;
955 1039 }
956 if (tb[IFLA_VF_VLAN]) {
957 struct ifla_vf_vlan *ivv;
958 ivv = nla_data(tb[IFLA_VF_VLAN]);
959 err = -EOPNOTSUPP;
960 if (ops->ndo_set_vf_vlan)
961 err = ops->ndo_set_vf_vlan(dev, ivv->vf,
962 ivv->vlan,
963 ivv->qos);
964 if (err < 0)
965 goto errout;
966 modified = 1;
967 }
968 err = 0;
969
970 if (tb[IFLA_VF_TX_RATE]) {
971 struct ifla_vf_tx_rate *ivt;
972 ivt = nla_data(tb[IFLA_VF_TX_RATE]);
973 err = -EOPNOTSUPP;
974 if (ops->ndo_set_vf_tx_rate)
975 err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate);
976 if (err < 0)
977 goto errout;
978 modified = 1;
979 } 1040 }
980 err = 0; 1041 err = 0;
981 1042
l kwb">struct sigpending shared_pending; /* thread group exit support */ int group_exit_code; /* overloaded: * - notify group_exit_task when ->count is equal to notify_count * - everyone except group_exit_task is stopped during signal delivery * of fatal signals, group_exit_task processes the signal. */ struct task_struct *group_exit_task; int notify_count; /* thread group stop support, overloads group_exit_code too */ int group_stop_count; unsigned int flags; /* see SIGNAL_* flags below */ /* POSIX.1b Interval Timers */ struct list_head posix_timers; /* ITIMER_REAL timer for the process */ struct timer_list real_timer; unsigned long it_real_value, it_real_incr; /* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */ cputime_t it_prof_expires, it_virt_expires; cputime_t it_prof_incr, it_virt_incr; /* job control IDs */ pid_t pgrp; pid_t tty_old_pgrp; pid_t session; /* boolean value for session group leader */ int leader; struct tty_struct *tty; /* NULL if no tty */ /* * Cumulative resource counters for dead threads in the group, * and for reaped dead child processes forked by this group. * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ cputime_t utime, stime, cutime, cstime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; /* * Cumulative ns of scheduled CPU time for dead threads in the * group, not including a zombie group leader. (This only differs * from jiffies_to_ns(utime + stime) if sched_clock uses something * other than jiffies.) */ unsigned long long sched_time; /* * We don't bother to synchronize most readers of this at all, * because there is no reader checking a limit that actually needs * to get both rlim_cur and rlim_max atomically, and either one * alone is a single word that can safely be read normally. * getrlimit/setrlimit use task_lock(current->group_leader) to * protect this instead of the siglock, because they really * have no need to disable irqs. */ struct rlimit rlim[RLIM_NLIMITS]; struct list_head cpu_timers[3]; /* keep the process-shared keyrings here so that they do the right * thing in threads created with CLONE_THREAD */ #ifdef CONFIG_KEYS struct key *session_keyring; /* keyring inherited over fork */ struct key *process_keyring; /* keyring private to this process */ #endif }; /* Context switch must be unlocked if interrupts are to be enabled */ #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW # define __ARCH_WANT_UNLOCKED_CTXSW #endif /* * Bits in flags field of signal_struct. */ #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ #define SIGNAL_STOP_DEQUEUED 0x00000002 /* stop signal dequeued */ #define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */ /* * Priority of a process goes from 0..MAX_PRIO-1, valid RT * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL tasks are * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values * are inverted: lower p->prio value means higher priority. * * The MAX_USER_RT_PRIO value allows the actual maximum * RT priority to be separate from the value exported to * user-space. This allows kernel threads to set their * priority to a value higher than any user task. Note: * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. */ #define MAX_USER_RT_PRIO 100 #define MAX_RT_PRIO MAX_USER_RT_PRIO #define MAX_PRIO (MAX_RT_PRIO + 40) #define rt_task(p) (unlikely((p)->prio < MAX_RT_PRIO)) /* * Some day this will be a full-fledged user tracking system.. */ struct user_struct { atomic_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ atomic_t files; /* How many open files does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_INOTIFY atomic_t inotify_watches; /* How many inotify watches does this user have? */ atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ #endif /* protected by mq_lock */ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ unsigned long locked_shm; /* How many pages of mlocked shm ? */ #ifdef CONFIG_KEYS struct key *uid_keyring; /* UID specific keyring */ struct key *session_keyring; /* UID's default session keyring */ #endif /* Hash table maintenance information */ struct list_head uidhash_list; uid_t uid; }; extern struct user_struct *find_user(uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) typedef struct prio_array prio_array_t; struct backing_dev_info; struct reclaim_state; #ifdef CONFIG_SCHEDSTATS struct sched_info { /* cumulative counters */ unsigned long cpu_time, /* time spent on the cpu */ run_delay, /* time spent waiting on a runqueue */ pcnt; /* # of timeslices run on this cpu */ /* timestamps */ unsigned long last_arrival, /* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ }; extern struct file_operations proc_schedstat_operations; #endif enum idle_type { SCHED_IDLE, NOT_IDLE, NEWLY_IDLE, MAX_IDLE_TYPES }; /* * sched-domains (multiprocessor balancing) declarations: */ #ifdef CONFIG_SMP #define SCHED_LOAD_SCALE 128UL /* increase resolution of load */ #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ #define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */ #define SD_BALANCE_EXEC 4 /* Balance on exec */ #define SD_BALANCE_FORK 8 /* Balance on fork, clone */ #define SD_WAKE_IDLE 16 /* Wake to idle CPU on task wakeup */ #define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */ #define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */ #define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ struct sched_group { struct sched_group *next; /* Must be a circular list */ cpumask_t cpumask; /* * CPU power of this group, SCHED_LOAD_SCALE being max power for a * single CPU. This is read only (except for setup, hotplug CPU). */ unsigned long cpu_power; }; struct sched_domain { /* These fields must be setup */ struct sched_domain *parent; /* top domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ cpumask_t span; /* span of all CPUs in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ unsigned int imbalance_pct; /* No balance until over watermark */ unsigned long long cache_hot_time; /* Task considered cache hot (ns) */ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ unsigned int per_cpu_gain; /* CPU % gained by adding domain cpus */ unsigned int busy_idx; unsigned int idle_idx; unsigned int newidle_idx; unsigned int wake_idx; unsigned int forkexec_idx; int flags; /* See SD_* */ /* Runtime fields. */ unsigned long last_balance; /* init to jiffies. units in jiffies */ unsigned int balance_interval; /* initialise to 1. units in ms. */ unsigned int nr_balance_failed; /* initialise to 0 */ #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ unsigned long lb_cnt[MAX_IDLE_TYPES]; unsigned long lb_failed[MAX_IDLE_TYPES]; unsigned long lb_balanced[MAX_IDLE_TYPES]; unsigned long lb_imbalance[MAX_IDLE_TYPES]; unsigned long lb_gained[MAX_IDLE_TYPES]; unsigned long lb_hot_gained[MAX_IDLE_TYPES]; unsigned long lb_nobusyg[MAX_IDLE_TYPES]; unsigned long lb_nobusyq[MAX_IDLE_TYPES]; /* Active load balancing */ unsigned long alb_cnt; unsigned long alb_failed; unsigned long alb_pushed; /* SD_BALANCE_EXEC stats */ unsigned long sbe_cnt; unsigned long sbe_balanced; unsigned long sbe_pushed; /* SD_BALANCE_FORK stats */ unsigned long sbf_cnt; unsigned long sbf_balanced; unsigned long sbf_pushed; /* try_to_wake_up() stats */ unsigned long ttwu_wake_remote; unsigned long ttwu_move_affine; unsigned long ttwu_move_balance; #endif }; extern void partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2); #endif /* CONFIG_SMP */ struct io_context; /* See blkdev.h */ void exit_io_context(void); struct cpuset; #define NGROUPS_SMALL 32 #define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t))) struct group_info { int ngroups; atomic_t usage; gid_t small_block[NGROUPS_SMALL]; int nblocks; gid_t *blocks[0]; }; /* * get_group_info() must be called with the owning task locked (via task_lock()) * when task != current. The reason being that the vast majority of callers are * looking at current->group_info, which can not be changed except by the * current task. Changing current->group_info requires the task lock, too. */ #define get_group_info(group_info) do { \ atomic_inc(&(group_info)->usage); \ } while (0) #define put_group_info(group_info) do { \ if (atomic_dec_and_test(&(group_info)->usage)) \ groups_free(group_info); \ } while (0) extern struct group_info *groups_alloc(int gidsetsize); extern void groups_free(struct group_info *group_info); extern int set_current_groups(struct group_info *group_info); extern int groups_search(struct group_info *group_info, gid_t grp); /* access the groups "array" with this macro */ #define GROUP_AT(gi, i) \ ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK extern void prefetch_stack(struct task_struct*); #else static inline void prefetch_stack(struct task_struct *t) { } #endif struct audit_context; /* See audit.c */ struct mempolicy; struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ struct thread_info *thread_info; atomic_t usage; unsigned long flags; /* per process flags, defined below */ unsigned long ptrace; int lock_depth; /* BKL lock depth */ #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) int oncpu; #endif int prio, static_prio; struct list_head run_list; prio_array_t *array; unsigned short ioprio; unsigned long sleep_avg; unsigned long long timestamp, last_ran; unsigned long long sched_time; /* sched_clock time spent running */ int activated; unsigned long policy; cpumask_t cpus_allowed; unsigned int time_slice, first_time_slice; #ifdef CONFIG_SCHEDSTATS struct sched_info sched_info; #endif struct list_head tasks; /* * ptrace_list/ptrace_children forms the list of my children * that were stolen by a ptracer. */ struct list_head ptrace_children; struct list_head ptrace_list; struct mm_struct *mm, *active_mm; /* task state */ struct linux_binfmt *binfmt; long exit_state; int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ /* ??? */ unsigned long personality; unsigned did_exec:1; pid_t pid; pid_t tgid; /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with * p->parent->pid) */ struct task_struct *real_parent; /* real parent process (when being debugged) */ struct task_struct *parent; /* parent process */ /* * children/sibling forms the list of my children plus the * tasks I'm ptracing. */ struct list_head children; /* list of my children */ struct list_head sibling; /* linkage in my parent's children list */ struct task_struct *group_leader; /* threadgroup leader */ /* PID/PID hash table linkage. */ struct pid pids[PIDTYPE_MAX]; struct completion *vfork_done; /* for vfork() */ int __user *set_child_tid; /* CLONE_CHILD_SETTID */ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ unsigned long rt_priority; cputime_t utime, stime; unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; cputime_t it_prof_expires, it_virt_expires; unsigned long long it_sched_expires; struct list_head cpu_timers[3]; /* process credentials */ uid_t uid,euid,suid,fsuid; gid_t gid,egid,sgid,fsgid; struct group_info *group_info; kernel_cap_t cap_effective, cap_inheritable, cap_permitted; unsigned keep_capabilities:1; struct user_struct *user; #ifdef CONFIG_KEYS struct key *thread_keyring; /* keyring private to this thread */ unsigned char jit_keyring; /* default keyring to attach requested keys to */ #endif int oomkilladj; /* OOM kill score adjustment (bit shift). */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) - initialized normally by flush_old_exec */ /* file system info */ int link_count, total_link_count; /* ipc stuff */ struct sysv_sem sysvsem; /* CPU-specific state of this task */ struct thread_struct thread; /* filesystem information */ struct fs_struct *fs; /* open file information */ struct files_struct *files; /* namespace */ struct namespace *namespace; /* signal handlers */ struct signal_struct *signal; struct sighand_struct *sighand; sigset_t blocked, real_blocked; struct sigpending pending; unsigned long sas_ss_sp; size_t sas_ss_size; int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; void *security; struct audit_context *audit_context; seccomp_t seccomp; /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ spinlock_t alloc_lock; /* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */ spinlock_t proc_lock; /* journalling filesystem info */ void *journal_info; /* VM state */ struct reclaim_state *reclaim_state; struct dentry *proc_dentry; struct backing_dev_info *backing_dev_info; struct io_context *io_context; unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ /* * current io wait handle: wait queue entry to use for io waits * If this thread is processing aio, this points at the waitqueue * inside the currently handled kiocb. It may be NULL (i.e. default * to a stack based synchronous wait) if its doing sync IO. */ wait_queue_t *io_wait; /* i/o counters(bytes read/written, #syscalls */ u64 rchar, wchar, syscr, syscw; #if defined(CONFIG_BSD_PROCESS_ACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ clock_t acct_stimexpd; /* clock_t-converted stime since last update */ #endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; short il_next; #endif #ifdef CONFIG_CPUSETS struct cpuset *cpuset; nodemask_t mems_allowed; int cpuset_mems_generation; #endif atomic_t fs_excl; /* holding fs exclusive resources */ }; static inline pid_t process_group(struct task_struct *tsk) { return tsk->signal->pgrp; } /** * pid_alive - check that a task structure is not stale * @p: Task structure to be checked. * * Test if a process is not yet dead (at most zombie state) * If pid_alive fails, then pointers within the task structure * can be stale and must not be dereferenced. */ static inline int pid_alive(struct task_struct *p) { return p->pids[PIDTYPE_PID].nr != 0; } extern void free_task(struct task_struct *tsk); extern void __put_task_struct(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) #define put_task_struct(tsk) \ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) /* * Per process flags */ #define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ /* Not implemented yet, only for 486*/ #define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_DEAD 0x00000008 /* Dead */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_FLUSHER 0x00001000 /* responsible for disk writeback */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ #define PF_FREEZE 0x00004000 /* this task is being frozen for suspend now */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ #define PF_KSWAPD 0x00040000 /* I am kswapd */ #define PF_SWAPOFF 0x00080000 /* I am in swapoff */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ #define PF_BORROWED_MM 0x00400000 /* I am a kthread doing use_mm */ #define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */ /* * Only the _current_ task can read/write to tsk->flags, but other * tasks can access tsk->flags in readonly mode for example * with tsk_used_math (like during threaded core dumping). * There is however an exception to this rule during ptrace * or during fork: the ptracer task is allowed to write to the * child->flags of its traced child (same goes for fork, the parent * can write to the child->flags), because we're guaranteed the * child is not running and in turn not changing child->flags * at the same time the parent does it. */ #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) #define clear_used_math() clear_stopped_child_used_math(current) #define set_used_math() set_stopped_child_used_math(current) #define conditional_stopped_child_used_math(condition, child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) #define conditional_used_math(condition) \ conditional_stopped_child_used_math(condition, current) #define copy_to_stopped_child_used_math(child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) #ifdef CONFIG_SMP extern int set_cpus_allowed(task_t *p, cpumask_t new_mask); #else static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) { if (!cpus_intersects(new_mask, cpu_online_map)) return -EINVAL; return 0; } #endif extern unsigned long long sched_clock(void); extern unsigned long long current_sched_time(const task_t *current_task); /* sched_exec is called by processes performing an exec */ #ifdef CONFIG_SMP extern void sched_exec(void); #else #define sched_exec() {} #endif #ifdef CONFIG_HOTPLUG_CPU extern void idle_task_exit(void); #else static inline void idle_task_exit(void) {} #endif extern void sched_idle_next(void); extern void set_user_nice(task_t *p, long nice); extern int task_prio(const task_t *p); extern int task_nice(const task_t *p); extern int can_nice(const task_t *p, const int nice); extern int task_curr(const task_t *p); extern int idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); extern task_t *idle_task(int cpu); extern task_t *curr_task(int cpu); extern void set_curr_task(int cpu, task_t *p); void yield(void); /* * The default (Linux) execution domain. */ extern struct exec_domain default_exec_domain; union thread_union { struct thread_info thread_info; unsigned long stack[THREAD_SIZE/sizeof(long)]; }; #ifndef __HAVE_ARCH_KSTACK_END static inline int kstack_end(void *addr) { /* Reliable end of stack detection: * Some APM bios versions misalign the stack */ return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); } #endif extern union thread_union init_thread_union; extern struct task_struct init_task; extern struct mm_struct init_mm; #define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr) extern struct task_struct *find_task_by_pid_type(int type, int pid); extern void set_special_pids(pid_t session, pid_t pgrp); extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ extern struct user_struct * alloc_uid(uid_t); static inline struct user_struct *get_uid(struct user_struct *u) { atomic_inc(&u->__count); return u; }