aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Holzheu <holzheu@de.ibm.com>2006-06-23 05:05:06 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-23 10:43:02 -0400
commit24bbb1faf3f0420eb252dd0fdc1e477b1d4d73bd (patch)
treece7f9358fdeaa4f299cb996e8a1d4224d51ee4c4
parentb9e122c80cd2e10fe18678c63db4717871ed31cf (diff)
[PATCH] s390_hypfs filesystem
On zSeries machines there exists an interface which allows the operating system to retrieve LPAR hypervisor accounting data. For example, it is possible to get usage data for physical and virtual cpus. In order to provide this information to user space programs, I implemented a new virtual Linux file system named 's390_hypfs' using the Linux 2.6 libfs framework. The name 's390_hypfs' stands for 'S390 Hypervisor Filesystem'. All the accounting information is put into different virtual files which can be accessed from user space. All data is represented as ASCII strings. When the file system is mounted the accounting information is retrieved and a file system tree is created with the attribute files containing the cpu information. The content of the files remains unchanged until a new update is made. An update can be triggered from user space through writing 'something' into a special purpose update file. We create the following directory structure: <mount-point>/ update cpus/ <cpu-id> type mgmtime <cpu-id> ... hyp/ type systems/ <lpar-name> cpus/ <cpu-id> type mgmtime cputime onlinetime <cpu-id> ... <lpar-name> cpus/ ... - update: File to trigger update - cpus/: Directory for all physical cpus - cpus/<cpu-id>/: Directory for one physical cpu. - cpus/<cpu-id>/type: Type name of physical zSeries cpu. - cpus/<cpu-id>/mgmtime: Physical-LPAR-management time in microseconds. - hyp/: Directory for hypervisor information - hyp/type: Typ of hypervisor (currently only 'LPAR Hypervisor') - systems/: Directory for all LPARs - systems/<lpar-name>/: Directory for one LPAR. - systems/<lpar-name>/cpus/<cpu-id>/: Directory for the virtual cpus - systems/<lpar-name>/cpus/<cpu-id>/type: Typ of cpu. - systems/<lpar-name>/cpus/<cpu-id>/mgmtime: Accumulated number of microseconds during which a physical CPU was assigned to the logical cpu and the cpu time was consumed by the hypervisor and was not provided to the LPAR (LPAR overhead). - systems/<lpar-name>/cpus/<cpu-id>/cputime: Accumulated number of microseconds during which a physical CPU was assigned to the logical cpu and the cpu time was consumed by the LPAR. - systems/<lpar-name>/cpus/<cpu-id>/onlinetime: Accumulated number of microseconds during which the logical CPU has been online. As mount point for the filesystem /sys/hypervisor/s390 is created. The update process is triggered when writing 'something' into the 'update' file at the top level hypfs directory. You can do this e.g. with 'echo 1 > update'. During the update the whole directory structure is deleted and built up again. Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Ingo Oeser <ioe-lkml@rameria.de> Cc: Joern Engel <joern@wohnheim.fh-wedel.de> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@de.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/s390/Kconfig8
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/hypfs/Makefile7
-rw-r--r--arch/s390/hypfs/hypfs.h30
-rw-r--r--arch/s390/hypfs/hypfs_diag.c696
-rw-r--r--arch/s390/hypfs/hypfs_diag.h16
-rw-r--r--arch/s390/hypfs/inode.c492
7 files changed, 1250 insertions, 1 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 01c5c082f97..821a141889d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -446,6 +446,14 @@ config NO_IDLE_HZ_INIT
446 The HZ timer is switched off in idle by default. That means the 446 The HZ timer is switched off in idle by default. That means the
447 HZ timer is already disabled at boot time. 447 HZ timer is already disabled at boot time.
448 448
449config S390_HYPFS_FS
450 bool "s390 hypervisor file system support"
451 select SYS_HYPERVISOR
452 default y
453 help
454 This is a virtual file system intended to provide accounting
455 information in an s390 hypervisor environment.
456
449config KEXEC 457config KEXEC
450 bool "kexec system call (EXPERIMENTAL)" 458 bool "kexec system call (EXPERIMENTAL)"
451 depends on EXPERIMENTAL 459 depends on EXPERIMENTAL
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 7bb16fb97d4..b3791fb094a 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -76,7 +76,7 @@ LDFLAGS_vmlinux := -e start
76head-y := arch/$(ARCH)/kernel/head.o arch/$(ARCH)/kernel/init_task.o 76head-y := arch/$(ARCH)/kernel/head.o arch/$(ARCH)/kernel/init_task.o
77 77
78core-y += arch/$(ARCH)/mm/ arch/$(ARCH)/kernel/ arch/$(ARCH)/crypto/ \ 78core-y += arch/$(ARCH)/mm/ arch/$(ARCH)/kernel/ arch/$(ARCH)/crypto/ \
79 arch/$(ARCH)/appldata/ 79 arch/$(ARCH)/appldata/ arch/$(ARCH)/hypfs/
80libs-y += arch/$(ARCH)/lib/ 80libs-y += arch/$(ARCH)/lib/
81drivers-y += drivers/s390/ 81drivers-y += drivers/s390/
82drivers-$(CONFIG_MATHEMU) += arch/$(ARCH)/math-emu/ 82drivers-$(CONFIG_MATHEMU) += arch/$(ARCH)/math-emu/
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
new file mode 100644
index 00000000000..f4b00cd81f7
--- /dev/null
+++ b/arch/s390/hypfs/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the linux hypfs filesystem routines.
3#
4
5obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o
6
7s390_hypfs-objs := inode.o hypfs_diag.o
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
new file mode 100644
index 00000000000..ea5567be00f
--- /dev/null
+++ b/arch/s390/hypfs/hypfs.h
@@ -0,0 +1,30 @@
1/*
2 * fs/hypfs/hypfs.h
3 * Hypervisor filesystem for Linux on s390.
4 *
5 * Copyright (C) IBM Corp. 2006
6 * Author(s): Michael Holzheu <holzheu@de.ibm.com>
7 */
8
9#ifndef _HYPFS_H_
10#define _HYPFS_H_
11
12#include <linux/fs.h>
13#include <linux/types.h>
14
15#define REG_FILE_MODE 0440
16#define UPDATE_FILE_MODE 0220
17#define DIR_MODE 0550
18
19extern struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent,
20 const char *name);
21
22extern struct dentry *hypfs_create_u64(struct super_block *sb,
23 struct dentry *dir, const char *name,
24 __u64 value);
25
26extern struct dentry *hypfs_create_str(struct super_block *sb,
27 struct dentry *dir, const char *name,
28 char *string);
29
30#endif /* _HYPFS_H_ */
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
new file mode 100644
index 00000000000..efa74af7f04
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -0,0 +1,696 @@
1/*
2 * fs/hypfs/hypfs_diag.c
3 * Hypervisor filesystem for Linux on s390. Diag 204 and 224
4 * implementation.
5 *
6 * Copyright (C) IBM Corp. 2006
7 * Author(s): Michael Holzheu <holzheu@de.ibm.com>
8 */
9
10#include <linux/types.h>
11#include <linux/errno.h>
12#include <linux/string.h>
13#include <linux/vmalloc.h>
14#include <asm/ebcdic.h>
15#include "hypfs.h"
16
17#define LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */
18#define CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */
19#define TMP_SIZE 64 /* size of temporary buffers */
20
21/* diag 204 subcodes */
22enum diag204_sc {
23 SUBC_STIB4 = 4,
24 SUBC_RSI = 5,
25 SUBC_STIB6 = 6,
26 SUBC_STIB7 = 7
27};
28
29/* The two available diag 204 data formats */
30enum diag204_format {
31 INFO_SIMPLE = 0,
32 INFO_EXT = 0x00010000
33};
34
35/* bit is set in flags, when physical cpu info is included in diag 204 data */
36#define LPAR_PHYS_FLG 0x80
37
38static char *diag224_cpu_names; /* diag 224 name table */
39static enum diag204_sc diag204_store_sc; /* used subcode for store */
40static enum diag204_format diag204_info_type; /* used diag 204 data format */
41
42static void *diag204_buf; /* 4K aligned buffer for diag204 data */
43static void *diag204_buf_vmalloc; /* vmalloc pointer for diag204 data */
44static int diag204_buf_pages; /* number of pages for diag204 data */
45
46/*
47 * DIAG 204 data structures and member access functions.
48 *
49 * Since we have two different diag 204 data formats for old and new s390
50 * machines, we do not access the structs directly, but use getter functions for
51 * each struct member instead. This should make the code more readable.
52 */
53
54/* Time information block */
55
56struct info_blk_hdr {
57 __u8 npar;
58 __u8 flags;
59 __u16 tslice;
60 __u16 phys_cpus;
61 __u16 this_part;
62 __u64 curtod;
63} __attribute__ ((packed));
64
65struct x_info_blk_hdr {
66 __u8 npar;
67 __u8 flags;
68 __u16 tslice;
69 __u16 phys_cpus;
70 __u16 this_part;
71 __u64 curtod1;
72 __u64 curtod2;
73 char reserved[40];
74} __attribute__ ((packed));
75
76static inline int info_blk_hdr__size(enum diag204_format type)
77{
78 if (type == INFO_SIMPLE)
79 return sizeof(struct info_blk_hdr);
80 else /* INFO_EXT */
81 return sizeof(struct x_info_blk_hdr);
82}
83
84static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
85{
86 if (type == INFO_SIMPLE)
87 return ((struct info_blk_hdr *)hdr)->npar;
88 else /* INFO_EXT */
89 return ((struct x_info_blk_hdr *)hdr)->npar;
90}
91
92static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
93{
94 if (type == INFO_SIMPLE)
95 return ((struct info_blk_hdr *)hdr)->flags;
96 else /* INFO_EXT */
97 return ((struct x_info_blk_hdr *)hdr)->flags;
98}
99
100static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr)
101{
102 if (type == INFO_SIMPLE)
103 return ((struct info_blk_hdr *)hdr)->phys_cpus;
104 else /* INFO_EXT */
105 return ((struct x_info_blk_hdr *)hdr)->phys_cpus;
106}
107
108/* Partition header */
109
110struct part_hdr {
111 __u8 pn;
112 __u8 cpus;
113 char reserved[6];
114 char part_name[LPAR_NAME_LEN];
115} __attribute__ ((packed));
116
117struct x_part_hdr {
118 __u8 pn;
119 __u8 cpus;
120 __u8 rcpus;
121 __u8 pflag;
122 __u32 mlu;
123 char part_name[LPAR_NAME_LEN];
124 char lpc_name[8];
125 char os_name[8];
126 __u64 online_cs;
127 __u64 online_es;
128 __u8 upid;
129 char reserved1[3];
130 __u32 group_mlu;
131 char group_name[8];
132 char reserved2[32];
133} __attribute__ ((packed));
134
135static inline int part_hdr__size(enum diag204_format type)
136{
137 if (type == INFO_SIMPLE)
138 return sizeof(struct part_hdr);
139 else /* INFO_EXT */
140 return sizeof(struct x_part_hdr);
141}
142
143static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
144{
145 if (type == INFO_SIMPLE)
146 return ((struct part_hdr *)hdr)->cpus;
147 else /* INFO_EXT */
148 return ((struct x_part_hdr *)hdr)->rcpus;
149}
150
151static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
152 char *name)
153{
154 if (type == INFO_SIMPLE)
155 memcpy(name, ((struct part_hdr *)hdr)->part_name,
156 LPAR_NAME_LEN);
157 else /* INFO_EXT */
158 memcpy(name, ((struct x_part_hdr *)hdr)->part_name,
159 LPAR_NAME_LEN);
160 EBCASC(name, LPAR_NAME_LEN);
161 name[LPAR_NAME_LEN] = 0;
162 strstrip(name);
163}
164
165struct cpu_info {
166 __u16 cpu_addr;
167 char reserved1[2];
168 __u8 ctidx;
169 __u8 cflag;
170 __u16 weight;
171 __u64 acc_time;
172 __u64 lp_time;
173} __attribute__ ((packed));
174
175struct x_cpu_info {
176 __u16 cpu_addr;
177 char reserved1[2];
178 __u8 ctidx;
179 __u8 cflag;
180 __u16 weight;
181 __u64 acc_time;
182 __u64 lp_time;
183 __u16 min_weight;
184 __u16 cur_weight;
185 __u16 max_weight;
186 char reseved2[2];
187 __u64 online_time;
188 __u64 wait_time;
189 __u32 pma_weight;
190 __u32 polar_weight;
191 char reserved3[40];
192} __attribute__ ((packed));
193
194/* CPU info block */
195
196static inline int cpu_info__size(enum diag204_format type)
197{
198 if (type == INFO_SIMPLE)
199 return sizeof(struct cpu_info);
200 else /* INFO_EXT */
201 return sizeof(struct x_cpu_info);
202}
203
204static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
205{
206 if (type == INFO_SIMPLE)
207 return ((struct cpu_info *)hdr)->ctidx;
208 else /* INFO_EXT */
209 return ((struct x_cpu_info *)hdr)->ctidx;
210}
211
212static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
213{
214 if (type == INFO_SIMPLE)
215 return ((struct cpu_info *)hdr)->cpu_addr;
216 else /* INFO_EXT */
217 return ((struct x_cpu_info *)hdr)->cpu_addr;
218}
219
220static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
221{
222 if (type == INFO_SIMPLE)
223 return ((struct cpu_info *)hdr)->acc_time;
224 else /* INFO_EXT */
225 return ((struct x_cpu_info *)hdr)->acc_time;
226}
227
228static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
229{
230 if (type == INFO_SIMPLE)
231 return ((struct cpu_info *)hdr)->lp_time;
232 else /* INFO_EXT */
233 return ((struct x_cpu_info *)hdr)->lp_time;
234}
235
236static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
237{
238 if (type == INFO_SIMPLE)
239 return 0; /* online_time not available in simple info */
240 else /* INFO_EXT */
241 return ((struct x_cpu_info *)hdr)->online_time;
242}
243
244/* Physical header */
245
246struct phys_hdr {
247 char reserved1[1];
248 __u8 cpus;
249 char reserved2[6];
250 char mgm_name[8];
251} __attribute__ ((packed));
252
253struct x_phys_hdr {
254 char reserved1[1];
255 __u8 cpus;
256 char reserved2[6];
257 char mgm_name[8];
258 char reserved3[80];
259} __attribute__ ((packed));
260
261static inline int phys_hdr__size(enum diag204_format type)
262{
263 if (type == INFO_SIMPLE)
264 return sizeof(struct phys_hdr);
265 else /* INFO_EXT */
266 return sizeof(struct x_phys_hdr);
267}
268
269static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
270{
271 if (type == INFO_SIMPLE)
272 return ((struct phys_hdr *)hdr)->cpus;
273 else /* INFO_EXT */
274 return ((struct x_phys_hdr *)hdr)->cpus;
275}
276
277/* Physical CPU info block */
278
279struct phys_cpu {
280 __u16 cpu_addr;
281 char reserved1[2];
282 __u8 ctidx;
283 char reserved2[3];
284 __u64 mgm_time;
285 char reserved3[8];
286} __attribute__ ((packed));
287
288struct x_phys_cpu {
289 __u16 cpu_addr;
290 char reserved1[2];
291 __u8 ctidx;
292 char reserved2[3];
293 __u64 mgm_time;
294 char reserved3[80];
295} __attribute__ ((packed));
296
297static inline int phys_cpu__size(enum diag204_format type)
298{
299 if (type == INFO_SIMPLE)
300 return sizeof(struct phys_cpu);
301 else /* INFO_EXT */
302 return sizeof(struct x_phys_cpu);
303}
304
305static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
306{
307 if (type == INFO_SIMPLE)
308 return ((struct phys_cpu *)hdr)->cpu_addr;
309 else /* INFO_EXT */
310 return ((struct x_phys_cpu *)hdr)->cpu_addr;
311}
312
313static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
314{
315 if (type == INFO_SIMPLE)
316 return ((struct phys_cpu *)hdr)->mgm_time;
317 else /* INFO_EXT */
318 return ((struct x_phys_cpu *)hdr)->mgm_time;
319}
320
321static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
322{
323 if (type == INFO_SIMPLE)
324 return ((struct phys_cpu *)hdr)->ctidx;
325 else /* INFO_EXT */
326 return ((struct x_phys_cpu *)hdr)->ctidx;
327}
328
329/* Diagnose 204 functions */
330
331static int diag204(unsigned long subcode, unsigned long size, void *addr)
332{
333 register unsigned long _subcode asm("0") = subcode;
334 register unsigned long _size asm("1") = size;
335
336 asm volatile (" diag %2,%0,0x204\n"
337 "0: \n" ".section __ex_table,\"a\"\n"
338#ifndef __s390x__
339 " .align 4\n"
340 " .long 0b,0b\n"
341#else
342 " .align 8\n"
343 " .quad 0b,0b\n"
344#endif
345 ".previous":"+d" (_subcode), "+d"(_size)
346 :"d"(addr)
347 :"memory");
348 if (_subcode)
349 return -1;
350 else
351 return _size;
352}
353
354/*
355 * For the old diag subcode 4 with simple data format we have to use real
356 * memory. If we use subcode 6 or 7 with extended data format, we can (and
357 * should) use vmalloc, since we need a lot of memory in that case. Currently
358 * up to 93 pages!
359 */
360
361static void diag204_free_buffer(void)
362{
363 if (!diag204_buf)
364 return;
365 if (diag204_buf_vmalloc) {
366 vfree(diag204_buf_vmalloc);
367 diag204_buf_vmalloc = NULL;
368 } else {
369 free_pages((unsigned long) diag204_buf, 0);
370 }
371 diag204_buf_pages = 0;
372 diag204_buf = NULL;
373}
374
375static void *diag204_alloc_vbuf(int pages)
376{
377 /* The buffer has to be page aligned! */
378 diag204_buf_vmalloc = vmalloc(PAGE_SIZE * (pages + 1));
379 if (!diag204_buf_vmalloc)
380 return ERR_PTR(-ENOMEM);
381 diag204_buf = (void*)((unsigned long)diag204_buf_vmalloc
382 & ~0xfffUL) + 0x1000;
383 diag204_buf_pages = pages;
384 return diag204_buf;
385}
386
387static void *diag204_alloc_rbuf(void)
388{
389 diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0);
390 if (diag204_buf)
391 return ERR_PTR(-ENOMEM);
392 diag204_buf_pages = 1;
393 return diag204_buf;
394}
395
396static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
397{
398 if (diag204_buf) {
399 *pages = diag204_buf_pages;
400 return diag204_buf;
401 }
402 if (fmt == INFO_SIMPLE) {
403 *pages = 1;
404 return diag204_alloc_rbuf();
405 } else {/* INFO_EXT */
406 *pages = diag204(SUBC_RSI | INFO_EXT, 0, 0);
407 if (*pages <= 0)
408 return ERR_PTR(-ENOSYS);
409 else
410 return diag204_alloc_vbuf(*pages);
411 }
412}
413
414/*
415 * diag204_probe() has to find out, which type of diagnose 204 implementation
416 * we have on our machine. Currently there are three possible scanarios:
417 * - subcode 4 + simple data format (only one page)
418 * - subcode 4-6 + extended data format
419 * - subcode 4-7 + extended data format
420 *
421 * Subcode 5 is used to retrieve the size of the data, provided by subcodes
422 * 6 and 7. Subcode 7 basically has the same function as subcode 6. In addition
423 * to subcode 6 it provides also information about secondary cpus.
424 * In order to get as much information as possible, we first try
425 * subcode 7, then 6 and if both fail, we use subcode 4.
426 */
427
428static int diag204_probe(void)
429{
430 void *buf;
431 int pages, rc;
432
433 buf = diag204_get_buffer(INFO_EXT, &pages);
434 if (!IS_ERR(buf)) {
435 if (diag204(SUBC_STIB7 | INFO_EXT, pages, buf) >= 0) {
436 diag204_store_sc = SUBC_STIB7;
437 diag204_info_type = INFO_EXT;
438 goto out;
439 }
440 if (diag204(SUBC_STIB6 | INFO_EXT, pages, buf) >= 0) {
441 diag204_store_sc = SUBC_STIB7;
442 diag204_info_type = INFO_EXT;
443 goto out;
444 }
445 diag204_free_buffer();
446 }
447
448 /* subcodes 6 and 7 failed, now try subcode 4 */
449
450 buf = diag204_get_buffer(INFO_SIMPLE, &pages);
451 if (IS_ERR(buf)) {
452 rc = PTR_ERR(buf);
453 goto fail_alloc;
454 }
455 if (diag204(SUBC_STIB4 | INFO_SIMPLE, pages, buf) >= 0) {
456 diag204_store_sc = SUBC_STIB4;
457 diag204_info_type = INFO_SIMPLE;
458 goto out;
459 } else {
460 rc = -ENOSYS;
461 goto fail_store;
462 }
463out:
464 rc = 0;
465fail_store:
466 diag204_free_buffer();
467fail_alloc:
468 return rc;
469}
470
471static void *diag204_store(void)
472{
473 void *buf;
474 int pages;
475
476 buf = diag204_get_buffer(diag204_info_type, &pages);
477 if (IS_ERR(buf))
478 goto out;
479 if (diag204(diag204_store_sc | diag204_info_type, pages, buf) < 0)
480 return ERR_PTR(-ENOSYS);
481out:
482 return buf;
483}
484
485/* Diagnose 224 functions */
486
487static void diag224(void *ptr)
488{
489 asm volatile(" diag %0,%1,0x224\n"
490 : :"d" (0), "d"(ptr) : "memory");
491}
492
493static int diag224_get_name_table(void)
494{
495 /* memory must be below 2GB */
496 diag224_cpu_names = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
497 if (!diag224_cpu_names)
498 return -ENOMEM;
499 diag224(diag224_cpu_names);
500 EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
501 return 0;
502}
503
504static void diag224_delete_name_table(void)
505{
506 kfree(diag224_cpu_names);
507}
508
509static int diag224_idx2name(int index, char *name)
510{
511 memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN),
512 CPU_NAME_LEN);
513 name[CPU_NAME_LEN] = 0;
514 strstrip(name);
515 return 0;
516}
517
518__init int hypfs_diag_init(void)
519{
520 int rc;
521
522 if (diag204_probe()) {
523 printk(KERN_ERR "hypfs: diag 204 not working.");
524 return -ENODATA;
525 }
526 rc = diag224_get_name_table();
527 if (rc) {
528 diag224_delete_name_table();
529 printk(KERN_ERR "hypfs: could not get name table.\n");
530 }
531 return rc;
532}
533
534__exit void hypfs_diag_exit(void)
535{
536 diag224_delete_name_table();
537 diag204_free_buffer();
538}
539
540/*
541 * Functions to create the directory structure
542 * *******************************************
543 */
544
545static int hypfs_create_cpu_files(struct super_block *sb,
546 struct dentry *cpus_dir, void *cpu_info)
547{
548 struct dentry *cpu_dir;
549 char buffer[TMP_SIZE];
550 void *rc;
551
552 snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
553 cpu_info));
554 cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
555 rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
556 cpu_info__acc_time(diag204_info_type, cpu_info) -
557 cpu_info__lp_time(diag204_info_type, cpu_info));
558 if (IS_ERR(rc))
559 return PTR_ERR(rc);
560 rc = hypfs_create_u64(sb, cpu_dir, "cputime",
561 cpu_info__lp_time(diag204_info_type, cpu_info));
562 if (IS_ERR(rc))
563 return PTR_ERR(rc);
564 if (diag204_info_type == INFO_EXT) {
565 rc = hypfs_create_u64(sb, cpu_dir, "onlinetime",
566 cpu_info__online_time(diag204_info_type,
567 cpu_info));
568 if (IS_ERR(rc))
569 return PTR_ERR(rc);
570 }
571 diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
572 rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
573 if (IS_ERR(rc))
574 return PTR_ERR(rc);
575 return 0;
576}
577
578static void *hypfs_create_lpar_files(struct super_block *sb,
579 struct dentry *systems_dir, void *part_hdr)
580{
581 struct dentry *cpus_dir;
582 struct dentry *lpar_dir;
583 char lpar_name[LPAR_NAME_LEN + 1];
584 void *cpu_info;
585 int i;
586
587 part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
588 lpar_name[LPAR_NAME_LEN] = 0;
589 lpar_dir = hypfs_mkdir(sb, systems_dir, lpar_name);
590 if (IS_ERR(lpar_dir))
591 return lpar_dir;
592 cpus_dir = hypfs_mkdir(sb, lpar_dir, "cpus");
593 if (IS_ERR(cpus_dir))
594 return cpus_dir;
595 cpu_info = part_hdr + part_hdr__size(diag204_info_type);
596 for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
597 int rc;
598 rc = hypfs_create_cpu_files(sb, cpus_dir, cpu_info);
599 if (rc)
600 return ERR_PTR(rc);
601 cpu_info += cpu_info__size(diag204_info_type);
602 }
603 return cpu_info;
604}
605
606static int hypfs_create_phys_cpu_files(struct super_block *sb,
607 struct dentry *cpus_dir, void *cpu_info)
608{
609 struct dentry *cpu_dir;
610 char buffer[TMP_SIZE];
611 void *rc;
612
613 snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
614 cpu_info));
615 cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
616 if (IS_ERR(cpu_dir))
617 return PTR_ERR(cpu_dir);
618 rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
619 phys_cpu__mgm_time(diag204_info_type, cpu_info));
620 if (IS_ERR(rc))
621 return PTR_ERR(rc);
622 diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
623 rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
624 if (IS_ERR(rc))
625 return PTR_ERR(rc);
626 return 0;
627}
628
629static void *hypfs_create_phys_files(struct super_block *sb,
630 struct dentry *parent_dir, void *phys_hdr)
631{
632 int i;
633 void *cpu_info;
634 struct dentry *cpus_dir;
635
636 cpus_dir = hypfs_mkdir(sb, parent_dir, "cpus");
637 if (IS_ERR(cpus_dir))
638 return cpus_dir;
639 cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
640 for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
641 int rc;
642 rc = hypfs_create_phys_cpu_files(sb, cpus_dir, cpu_info);
643 if (rc)
644 return ERR_PTR(rc);
645 cpu_info += phys_cpu__size(diag204_info_type);
646 }
647 return cpu_info;
648}
649
650int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
651{
652 struct dentry *systems_dir, *hyp_dir;
653 void *time_hdr, *part_hdr;
654 int i, rc;
655 void *buffer, *ptr;
656
657 buffer = diag204_store();
658 if (IS_ERR(buffer))
659 return PTR_ERR(buffer);
660
661 systems_dir = hypfs_mkdir(sb, root, "systems");
662 if (IS_ERR(systems_dir)) {
663 rc = PTR_ERR(systems_dir);
664 goto err_out;
665 }
666 time_hdr = (struct x_info_blk_hdr *)buffer;
667 part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
668 for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
669 part_hdr = hypfs_create_lpar_files(sb, systems_dir, part_hdr);
670 if (IS_ERR(part_hdr)) {
671 rc = PTR_ERR(part_hdr);
672 goto err_out;
673 }
674 }
675 if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) {
676 ptr = hypfs_create_phys_files(sb, root, part_hdr);
677 if (IS_ERR(ptr)) {
678 rc = PTR_ERR(ptr);
679 goto err_out;
680 }
681 }
682 hyp_dir = hypfs_mkdir(sb, root, "hyp");
683 if (IS_ERR(hyp_dir)) {
684 rc = PTR_ERR(hyp_dir);
685 goto err_out;
686 }
687 ptr = hypfs_create_str(sb, hyp_dir, "type", "LPAR Hypervisor");
688 if (IS_ERR(ptr)) {
689 rc = PTR_ERR(ptr);
690 goto err_out;
691 }
692 rc = 0;
693
694err_out:
695 return rc;
696}
diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h
new file mode 100644
index 00000000000..793dea6b9bb
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag.h
@@ -0,0 +1,16 @@
1/*
2 * fs/hypfs/hypfs_diag.h
3 * Hypervisor filesystem for Linux on s390.
4 *
5 * Copyright (C) IBM Corp. 2006
6 * Author(s): Michael Holzheu <holzheu@de.ibm.com>
7 */
8
9#ifndef _HYPFS_DIAG_H_
10#define _HYPFS_DIAG_H_
11
12extern int hypfs_diag_init(void);
13extern void hypfs_diag_exit(void);
14extern int hypfs_diag_create_files(struct super_block *sb, struct dentry *root);
15
16#endif /* _HYPFS_DIAG_H_ */
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
new file mode 100644
index 00000000000..bab560a1357
--- /dev/null
+++ b/arch/s390/hypfs/inode.c
@@ -0,0 +1,492 @@
1/*
2 * fs/hypfs/inode.c
3 * Hypervisor filesystem for Linux on s390.
4 *
5 * Copyright (C) IBM Corp. 2006
6 * Author(s): Michael Holzheu <holzheu@de.ibm.com>
7 */
8
9#include <linux/types.h>
10#include <linux/errno.h>
11#include <linux/fs.h>
12#include <linux/namei.h>
13#include <linux/vfs.h>
14#include <linux/pagemap.h>
15#include <linux/gfp.h>
16#include <linux/time.h>
17#include <linux/parser.h>
18#include <linux/sysfs.h>
19#include <linux/module.h>
20#include <asm/ebcdic.h>
21#include "hypfs.h"
22#include "hypfs_diag.h"
23
24#define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */
25#define TMP_SIZE 64 /* size of temporary buffers */
26
27static struct dentry *hypfs_create_update_file(struct super_block *sb,
28 struct dentry *dir);
29
30struct hypfs_sb_info {
31 uid_t uid; /* uid used for files and dirs */
32 gid_t gid; /* gid used for files and dirs */
33 struct dentry *update_file; /* file to trigger update */
34 time_t last_update; /* last update time in secs since 1970 */
35 struct mutex lock; /* lock to protect update process */
36};
37
38static struct file_operations hypfs_file_ops;
39static struct file_system_type hypfs_type;
40static struct super_operations hypfs_s_ops;
41
42/* start of list of all dentries, which have to be deleted on update */
43static struct dentry *hypfs_last_dentry;
44
45static void hypfs_update_update(struct super_block *sb)
46{
47 struct hypfs_sb_info *sb_info = sb->s_fs_info;
48 struct inode *inode = sb_info->update_file->d_inode;
49
50 sb_info->last_update = get_seconds();
51 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
52}
53
54/* directory tree removal functions */
55
56static void hypfs_add_dentry(struct dentry *dentry)
57{
58 dentry->d_fsdata = hypfs_last_dentry;
59 hypfs_last_dentry = dentry;
60}
61
62static void hypfs_remove(struct dentry *dentry)
63{
64 struct dentry *parent;
65
66 parent = dentry->d_parent;
67 if (S_ISDIR(dentry->d_inode->i_mode))
68 simple_rmdir(parent->d_inode, dentry);
69 else
70 simple_unlink(parent->d_inode, dentry);
71 d_delete(dentry);
72 dput(dentry);
73}
74
75static void hypfs_delete_tree(struct dentry *root)
76{
77 while (hypfs_last_dentry) {
78 struct dentry *next_dentry;
79 next_dentry = hypfs_last_dentry->d_fsdata;
80 hypfs_remove(hypfs_last_dentry);
81 hypfs_last_dentry = next_dentry;
82 }
83}
84
85static struct inode *hypfs_make_inode(struct super_block *sb, int mode)
86{
87 struct inode *ret = new_inode(sb);
88
89 if (ret) {
90 struct hypfs_sb_info *hypfs_info = sb->s_fs_info;
91 ret->i_mode = mode;
92 ret->i_uid = hypfs_info->uid;
93 ret->i_gid = hypfs_info->gid;
94 ret->i_blksize = PAGE_CACHE_SIZE;
95 ret->i_blocks = 0;
96 ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
97 if (mode & S_IFDIR)
98 ret->i_nlink = 2;
99 else
100 ret->i_nlink = 1;
101 }
102 return ret;
103}
104
105static void hypfs_drop_inode(struct inode *inode)
106{
107 kfree(inode->u.generic_ip);
108 generic_delete_inode(inode);
109}
110
111static int hypfs_open(struct inode *inode, struct file *filp)
112{
113 char *data = filp->f_dentry->d_inode->u.generic_ip;
114 struct hypfs_sb_info *fs_info;
115
116 if (filp->f_mode & FMODE_WRITE) {
117 if (!(inode->i_mode & S_IWUGO))
118 return -EACCES;
119 }
120 if (filp->f_mode & FMODE_READ) {
121 if (!(inode->i_mode & S_IRUGO))
122 return -EACCES;
123 }
124
125 fs_info = inode->i_sb->s_fs_info;
126 if(data) {
127 mutex_lock(&fs_info->lock);
128 filp->private_data = kstrdup(data, GFP_KERNEL);
129 if (!filp->private_data) {
130 mutex_unlock(&fs_info->lock);
131 return -ENOMEM;
132 }
133 mutex_unlock(&fs_info->lock);
134 }
135 return 0;
136}
137
138static ssize_t hypfs_aio_read(struct kiocb *iocb, __user char *buf,
139 size_t count, loff_t offset)
140{
141 char *data;
142 size_t len;
143 struct file *filp = iocb->ki_filp;
144
145 data = filp->private_data;
146 len = strlen(data);
147 if (offset > len) {
148 count = 0;
149 goto out;
150 }
151 if (count > len - offset)
152 count = len - offset;
153 if (copy_to_user(buf, data + offset, count)) {
154 count = -EFAULT;
155 goto out;
156 }
157 iocb->ki_pos += count;
158 file_accessed(filp);
159out:
160 return count;
161}
162static ssize_t hypfs_aio_write(struct kiocb *iocb, const char __user *buf,
163 size_t count, loff_t pos)
164{
165 int rc;
166 struct super_block *sb;
167 struct hypfs_sb_info *fs_info;
168
169 sb = iocb->ki_filp->f_dentry->d_inode->i_sb;
170 fs_info = sb->s_fs_info;
171 /*
172 * Currently we only allow one update per second for two reasons:
173 * 1. diag 204 is VERY expensive
174 * 2. If several processes do updates in parallel and then read the
175 * hypfs data, the likelihood of collisions is reduced, if we restrict
176 * the minimum update interval. A collision occurs, if during the
177 * data gathering of one process another process triggers an update
178 * If the first process wants to ensure consistent data, it has
179 * to restart data collection in this case.
180 */
181 mutex_lock(&fs_info->lock);
182 if (fs_info->last_update == get_seconds()) {
183 rc = -EBUSY;
184 goto out;
185 }
186 hypfs_delete_tree(sb->s_root);
187 rc = hypfs_diag_create_files(sb, sb->s_root);
188 if (rc) {
189 printk(KERN_ERR "hypfs: Update failed\n");
190 hypfs_delete_tree(sb->s_root);
191 goto out;
192 }
193 hypfs_update_update(sb);
194 rc = count;
195out:
196 mutex_unlock(&fs_info->lock);
197 return rc;
198}
199
200static int hypfs_release(struct inode *inode, struct file *filp)
201{
202 kfree(filp->private_data);
203 return 0;
204}
205
206enum { opt_uid, opt_gid, opt_err };
207
208static match_table_t hypfs_tokens = {
209 {opt_uid, "uid=%u"},
210 {opt_gid, "gid=%u"},
211 {opt_err, NULL}
212};
213
214static int hypfs_parse_options(char *options, struct super_block *sb)
215{
216 char *str;
217 substring_t args[MAX_OPT_ARGS];
218
219 if (!options)
220 return 0;
221 while ((str = strsep(&options, ",")) != NULL) {
222 int token, option;
223 struct hypfs_sb_info *hypfs_info = sb->s_fs_info;
224
225 if (!*str)
226 continue;
227 token = match_token(str, hypfs_tokens, args);
228 switch (token) {
229 case opt_uid:
230 if (match_int(&args[0], &option))
231 return -EINVAL;
232 hypfs_info->uid = option;
233 break;
234 case opt_gid:
235 if (match_int(&args[0], &option))
236 return -EINVAL;
237 hypfs_info->gid = option;
238 break;
239 case opt_err:
240 default:
241 printk(KERN_ERR "hypfs: Unrecognized mount option "
242 "\"%s\" or missing value\n", str);
243 return -EINVAL;
244 }
245 }
246 return 0;
247}
248
249static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
250{
251 struct inode *root_inode;
252 struct dentry *root_dentry;
253 int rc = 0;
254 struct hypfs_sb_info *sbi;
255
256 sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL);
257 if (!sbi)
258 return -ENOMEM;
259 mutex_init(&sbi->lock);
260 sbi->uid = current->uid;
261 sbi->gid = current->gid;
262 sb->s_fs_info = sbi;
263 sb->s_blocksize = PAGE_CACHE_SIZE;
264 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
265 sb->s_magic = HYPFS_MAGIC;
266 sb->s_op = &hypfs_s_ops;
267 if (hypfs_parse_options(data, sb)) {
268 rc = -EINVAL;
269 goto err_alloc;
270 }
271 root_inode = hypfs_make_inode(sb, S_IFDIR | 0755);
272 if (!root_inode) {
273 rc = -ENOMEM;
274 goto err_alloc;
275 }
276 root_inode->i_op = &simple_dir_inode_operations;
277 root_inode->i_fop = &simple_dir_operations;
278 root_dentry = d_alloc_root(root_inode);
279 if (!root_dentry) {
280 iput(root_inode);
281 rc = -ENOMEM;
282 goto err_alloc;
283 }
284 rc = hypfs_diag_create_files(sb, root_dentry);
285 if (rc)
286 goto err_tree;
287 sbi->update_file = hypfs_create_update_file(sb, root_dentry);
288 if (IS_ERR(sbi->update_file)) {
289 rc = PTR_ERR(sbi->update_file);
290 goto err_tree;
291 }
292 hypfs_update_update(sb);
293 sb->s_root = root_dentry;
294 return 0;
295
296err_tree:
297 hypfs_delete_tree(root_dentry);
298 d_genocide(root_dentry);
299 dput(root_dentry);
300err_alloc:
301 kfree(sbi);
302 return rc;
303}
304
305static struct super_block *hypfs_get_super(struct file_system_type *fst,
306 int flags, const char *devname,
307 void *data)
308{
309 return get_sb_single(fst, flags, data, hypfs_fill_super);
310}
311
312static void hypfs_kill_super(struct super_block *sb)
313{
314 struct hypfs_sb_info *sb_info = sb->s_fs_info;
315
316 hypfs_delete_tree(sb->s_root);
317 hypfs_remove(sb_info->update_file);
318 kfree(sb->s_fs_info);
319 sb->s_fs_info = NULL;
320 kill_litter_super(sb);
321}
322
323static struct dentry *hypfs_create_file(struct super_block *sb,
324 struct dentry *parent, const char *name,
325 char *data, mode_t mode)
326{
327 struct dentry *dentry;
328 struct inode *inode;
329 struct qstr qname;
330
331 qname.name = name;
332 qname.len = strlen(name);
333 qname.hash = full_name_hash(name, qname.len);
334 dentry = lookup_one_len(name, parent, strlen(name));
335 if (IS_ERR(dentry))
336 return ERR_PTR(-ENOMEM);
337 inode = hypfs_make_inode(sb, mode);
338 if (!inode) {
339 dput(dentry);
340 return ERR_PTR(-ENOMEM);
341 }
342 if (mode & S_IFREG) {
343 inode->i_fop = &hypfs_file_ops;
344 if (data)
345 inode->i_size = strlen(data);
346 else
347 inode->i_size = 0;
348 } else if (mode & S_IFDIR) {
349 inode->i_op = &simple_dir_inode_operations;
350 inode->i_fop = &simple_dir_operations;
351 parent->d_inode->i_nlink++;
352 } else
353 BUG();
354 inode->u.generic_ip = data;
355 d_instantiate(dentry, inode);
356 dget(dentry);
357 return dentry;
358}
359
360struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent,
361 const char *name)
362{
363 struct dentry *dentry;
364
365 dentry = hypfs_create_file(sb, parent, name, NULL, S_IFDIR | DIR_MODE);
366 if (IS_ERR(dentry))
367 return dentry;
368 hypfs_add_dentry(dentry);
369 parent->d_inode->i_nlink++;
370 return dentry;
371}
372
373static struct dentry *hypfs_create_update_file(struct super_block *sb,
374 struct dentry *dir)
375{
376 struct dentry *dentry;
377
378 dentry = hypfs_create_file(sb, dir, "update", NULL,
379 S_IFREG | UPDATE_FILE_MODE);
380 /*
381 * We do not put the update file on the 'delete' list with
382 * hypfs_add_dentry(), since it should not be removed when the tree
383 * is updated.
384 */
385 return dentry;
386}
387
388struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
389 const char *name, __u64 value)
390{
391 char *buffer;
392 char tmp[TMP_SIZE];
393 struct dentry *dentry;
394
395 snprintf(tmp, TMP_SIZE, "%lld\n", (unsigned long long int)value);
396 buffer = kstrdup(tmp, GFP_KERNEL);
397 if (!buffer)
398 return ERR_PTR(-ENOMEM);
399 dentry =
400 hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE);
401 if (IS_ERR(dentry)) {
402 kfree(buffer);
403 return ERR_PTR(-ENOMEM);
404 }
405 hypfs_add_dentry(dentry);
406 return dentry;
407}
408
409struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir,
410 const char *name, char *string)
411{
412 char *buffer;
413 struct dentry *dentry;
414
415 buffer = kmalloc(strlen(string) + 2, GFP_KERNEL);
416 if (!buffer)
417 return ERR_PTR(-ENOMEM);
418 sprintf(buffer, "%s\n", string);
419 dentry =
420 hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE);
421 if (IS_ERR(dentry)) {
422 kfree(buffer);
423 return ERR_PTR(-ENOMEM);
424 }
425 hypfs_add_dentry(dentry);
426 return dentry;
427}
428
429static struct file_operations hypfs_file_ops = {
430 .open = hypfs_open,
431 .release = hypfs_release,
432 .read = do_sync_read,
433 .write = do_sync_write,
434 .aio_read = hypfs_aio_read,
435 .aio_write = hypfs_aio_write,
436};
437
438static struct file_system_type hypfs_type = {
439 .owner = THIS_MODULE,
440 .name = "s390_hypfs",
441 .get_sb = hypfs_get_super,
442 .kill_sb = hypfs_kill_super
443};
444
445static struct super_operations hypfs_s_ops = {
446 .statfs = simple_statfs,
447 .drop_inode = hypfs_drop_inode,
448};
449
450static decl_subsys(s390, NULL, NULL);
451
452static int __init hypfs_init(void)
453{
454 int rc;
455
456 if (MACHINE_IS_VM)
457 return -ENODATA;
458 if (hypfs_diag_init()) {
459 rc = -ENODATA;
460 goto fail_diag;
461 }
462 kset_set_kset_s(&s390_subsys, hypervisor_subsys);
463 rc = subsystem_register(&s390_subsys);
464 if (rc)
465 goto fail_sysfs;
466 rc = register_filesystem(&hypfs_type);
467 if (rc)
468 goto fail_filesystem;
469 return 0;
470
471fail_filesystem:
472 subsystem_unregister(&s390_subsys);
473fail_sysfs:
474 hypfs_diag_exit();
475fail_diag:
476 printk(KERN_ERR "hypfs: Initialization failed with rc = %i.\n", rc);
477 return rc;
478}
479
480static void __exit hypfs_exit(void)
481{
482 hypfs_diag_exit();
483 unregister_filesystem(&hypfs_type);
484 subsystem_unregister(&s390_subsys);
485}
486
487module_init(hypfs_init)
488module_exit(hypfs_exit)
489
490MODULE_LICENSE("GPL");
491MODULE_AUTHOR("Michael Holzheu <holzheu@de.ibm.com>");
492MODULE_DESCRIPTION("s390 Hypervisor Filesystem");