]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/powerpc/platforms/pseries/phyp_dump.c
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit...
[net-next-2.6.git] / arch / powerpc / platforms / pseries / phyp_dump.c
CommitLineData
6ac26c8a
MA
1/*
2 * Hypervisor-assisted dump
3 *
4 * Linas Vepstas, Manish Ahuja 2008
5 * Copyright 2008 IBM Corp.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 */
13
5a0e3ad6 14#include <linux/gfp.h>
6ac26c8a 15#include <linux/init.h>
d5a29c7a 16#include <linux/kobject.h>
6ac26c8a 17#include <linux/mm.h>
d5a29c7a 18#include <linux/of.h>
6ac26c8a
MA
19#include <linux/pfn.h>
20#include <linux/swap.h>
d5a29c7a 21#include <linux/sysfs.h>
6ac26c8a
MA
22
23#include <asm/page.h>
24#include <asm/phyp_dump.h>
25#include <asm/machdep.h>
26#include <asm/prom.h>
d5a29c7a 27#include <asm/rtas.h>
6ac26c8a
MA
28
29/* Variables, used to communicate data between early boot and late boot */
30static struct phyp_dump phyp_dump_vars;
31struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
32
2c4f4113
MA
33static int ibm_configure_kernel_dump;
34/* ------------------------------------------------- */
35/* RTAS interfaces to declare the dump regions */
36
37struct dump_section {
38 u32 dump_flags;
39 u16 source_type;
40 u16 error_flags;
41 u64 source_address;
42 u64 source_length;
43 u64 length_copied;
44 u64 destination_address;
45};
46
47struct phyp_dump_header {
48 u32 version;
49 u16 num_of_sections;
50 u16 status;
51
52 u32 first_offset_section;
53 u32 dump_disk_section;
54 u64 block_num_dd;
55 u64 num_of_blocks_dd;
56 u32 offset_dd;
57 u32 maxtime_to_auto;
58 /* No dump disk path string used */
59
60 struct dump_section cpu_data;
61 struct dump_section hpte_data;
62 struct dump_section kernel_data;
63};
64
65/* The dump header *must be* in low memory, so .bss it */
66static struct phyp_dump_header phdr;
67
68#define NUM_DUMP_SECTIONS 3
69#define DUMP_HEADER_VERSION 0x1
70#define DUMP_REQUEST_FLAG 0x1
71#define DUMP_SOURCE_CPU 0x0001
72#define DUMP_SOURCE_HPTE 0x0002
73#define DUMP_SOURCE_RMO 0x0011
a9c508da
MA
74#define DUMP_ERROR_FLAG 0x2000
75#define DUMP_TRIGGERED 0x4000
76#define DUMP_PERFORMED 0x8000
77
2c4f4113
MA
78
79/**
80 * init_dump_header() - initialize the header declaring a dump
81 * Returns: length of dump save area.
82 *
83 * When the hypervisor saves crashed state, it needs to put
84 * it somewhere. The dump header tells the hypervisor where
85 * the data can be saved.
86 */
87static unsigned long init_dump_header(struct phyp_dump_header *ph)
88{
89 unsigned long addr_offset = 0;
90
91 /* Set up the dump header */
92 ph->version = DUMP_HEADER_VERSION;
93 ph->num_of_sections = NUM_DUMP_SECTIONS;
94 ph->status = 0;
95
96 ph->first_offset_section =
97 (u32)offsetof(struct phyp_dump_header, cpu_data);
98 ph->dump_disk_section = 0;
99 ph->block_num_dd = 0;
100 ph->num_of_blocks_dd = 0;
101 ph->offset_dd = 0;
102
103 ph->maxtime_to_auto = 0; /* disabled */
104
105 /* The first two sections are mandatory */
106 ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
107 ph->cpu_data.source_type = DUMP_SOURCE_CPU;
108 ph->cpu_data.source_address = 0;
109 ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
110 ph->cpu_data.destination_address = addr_offset;
111 addr_offset += phyp_dump_info->cpu_state_size;
112
113 ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
114 ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
115 ph->hpte_data.source_address = 0;
116 ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
117 ph->hpte_data.destination_address = addr_offset;
118 addr_offset += phyp_dump_info->hpte_region_size;
119
120 /* This section describes the low kernel region */
121 ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
122 ph->kernel_data.source_type = DUMP_SOURCE_RMO;
123 ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
124 ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
125 ph->kernel_data.destination_address = addr_offset;
126 addr_offset += ph->kernel_data.source_length;
127
128 return addr_offset;
129}
130
599c1aa5
MA
131static void print_dump_header(const struct phyp_dump_header *ph)
132{
133#ifdef DEBUG
7a2eab0d
TB
134 if (ph == NULL)
135 return;
136
599c1aa5
MA
137 printk(KERN_INFO "dump header:\n");
138 /* setup some ph->sections required */
139 printk(KERN_INFO "version = %d\n", ph->version);
140 printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
141 printk(KERN_INFO "Status = 0x%x\n", ph->status);
142
143 /* No ph->disk, so all should be set to 0 */
144 printk(KERN_INFO "Offset to first section 0x%x\n",
145 ph->first_offset_section);
146 printk(KERN_INFO "dump disk sections should be zero\n");
147 printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
802bdea8
SR
148 printk(KERN_INFO "block num = %lld\n", ph->block_num_dd);
149 printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd);
599c1aa5
MA
150 printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
151 printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
152
153 /*set cpu state and hpte states as well scratch pad area */
8354be9c 154 printk(KERN_INFO " CPU AREA\n");
599c1aa5
MA
155 printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
156 printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
157 printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
802bdea8 158 printk(KERN_INFO "cpu source_address =%llx\n",
599c1aa5 159 ph->cpu_data.source_address);
802bdea8 160 printk(KERN_INFO "cpu source_length =%llx\n",
599c1aa5 161 ph->cpu_data.source_length);
802bdea8 162 printk(KERN_INFO "cpu length_copied =%llx\n",
599c1aa5
MA
163 ph->cpu_data.length_copied);
164
8354be9c 165 printk(KERN_INFO " HPTE AREA\n");
599c1aa5
MA
166 printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
167 printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
168 printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
802bdea8 169 printk(KERN_INFO "HPTE source_address =%llx\n",
599c1aa5 170 ph->hpte_data.source_address);
802bdea8 171 printk(KERN_INFO "HPTE source_length =%llx\n",
599c1aa5 172 ph->hpte_data.source_length);
802bdea8 173 printk(KERN_INFO "HPTE length_copied =%llx\n",
599c1aa5
MA
174 ph->hpte_data.length_copied);
175
8354be9c 176 printk(KERN_INFO " SRSD AREA\n");
599c1aa5
MA
177 printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
178 printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
179 printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
802bdea8 180 printk(KERN_INFO "SRSD source_address =%llx\n",
599c1aa5 181 ph->kernel_data.source_address);
802bdea8 182 printk(KERN_INFO "SRSD source_length =%llx\n",
599c1aa5 183 ph->kernel_data.source_length);
802bdea8 184 printk(KERN_INFO "SRSD length_copied =%llx\n",
599c1aa5
MA
185 ph->kernel_data.length_copied);
186#endif
187}
188
7415d5e0
MA
189static ssize_t show_phyp_dump_active(struct kobject *kobj,
190 struct kobj_attribute *attr, char *buf)
191{
192
193 /* create filesystem entry so kdump is phyp-dump aware */
194 return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
195}
196
197static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
198 show_phyp_dump_active,
199 NULL);
200
2c4f4113
MA
201static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
202{
203 int rc;
a9c508da
MA
204
205 /* Add addr value if not initialized before */
206 if (ph->cpu_data.destination_address == 0) {
207 ph->cpu_data.destination_address += addr;
208 ph->hpte_data.destination_address += addr;
209 ph->kernel_data.destination_address += addr;
210 }
211
212 /* ToDo Invalidate kdump and free memory range. */
2c4f4113
MA
213
214 do {
215 rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
216 1, ph, sizeof(struct phyp_dump_header));
217 } while (rtas_busy_delay(rc));
218
599c1aa5 219 if (rc) {
2c4f4113
MA
220 printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
221 "register\n", rc);
599c1aa5 222 print_dump_header(ph);
7415d5e0 223 return;
599c1aa5 224 }
7415d5e0
MA
225
226 rc = sysfs_create_file(kernel_kobj, &pdl.attr);
227 if (rc)
228 printk(KERN_ERR "phyp-dump: unable to create sysfs"
229 " file (%d)\n", rc);
2c4f4113
MA
230}
231
a9c508da
MA
232static
233void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
234{
235 int rc;
236
237 /* Add addr value if not initialized before */
238 if (ph->cpu_data.destination_address == 0) {
239 ph->cpu_data.destination_address += addr;
240 ph->hpte_data.destination_address += addr;
241 ph->kernel_data.destination_address += addr;
242 }
243
244 do {
245 rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
246 2, ph, sizeof(struct phyp_dump_header));
247 } while (rtas_busy_delay(rc));
248
249 if (rc) {
250 printk(KERN_ERR "phyp-dump: unexpected error (%d) "
251 "on invalidate\n", rc);
252 print_dump_header(ph);
253 }
254}
255
d5a29c7a 256/* ------------------------------------------------- */
6ac26c8a
MA
257/**
258 * release_memory_range -- release memory previously lmb_reserved
259 * @start_pfn: starting physical frame number
260 * @nr_pages: number of pages to free.
261 *
262 * This routine will release memory that had been previously
263 * lmb_reserved in early boot. The released memory becomes
264 * available for genreal use.
265 */
a9c508da
MA
266static void release_memory_range(unsigned long start_pfn,
267 unsigned long nr_pages)
6ac26c8a
MA
268{
269 struct page *rpage;
270 unsigned long end_pfn;
271 long i;
272
273 end_pfn = start_pfn + nr_pages;
274
275 for (i = start_pfn; i <= end_pfn; i++) {
276 rpage = pfn_to_page(i);
277 if (PageReserved(rpage)) {
278 ClearPageReserved(rpage);
279 init_page_count(rpage);
280 __free_page(rpage);
281 totalram_pages++;
282 }
283 }
284}
285
fd35cff8
MA
286/**
287 * track_freed_range -- Counts the range being freed.
288 * Once the counter goes to zero, it re-registers dump for
289 * future use.
290 */
291static void
292track_freed_range(unsigned long addr, unsigned long length)
293{
294 static unsigned long scratch_area_size, reserved_area_size;
295
296 if (addr < phyp_dump_info->init_reserve_start)
297 return;
298
299 if ((addr >= phyp_dump_info->init_reserve_start) &&
300 (addr <= phyp_dump_info->init_reserve_start +
301 phyp_dump_info->init_reserve_size))
302 reserved_area_size += length;
303
304 if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
305 (addr <= phyp_dump_info->reserved_scratch_addr +
306 phyp_dump_info->reserved_scratch_size))
307 scratch_area_size += length;
308
309 if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
310 (scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
311
312 invalidate_last_dump(&phdr,
313 phyp_dump_info->reserved_scratch_addr);
314 register_dump_area(&phdr,
315 phyp_dump_info->reserved_scratch_addr);
316 }
317}
318
d5a29c7a
MA
319/* ------------------------------------------------- */
320/**
321 * sysfs_release_region -- sysfs interface to release memory range.
322 *
323 * Usage:
324 * "echo <start addr> <length> > /sys/kernel/release_region"
325 *
326 * Example:
327 * "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
328 *
329 * will release 256MB starting at 1GB.
330 */
331static ssize_t store_release_region(struct kobject *kobj,
332 struct kobj_attribute *attr,
333 const char *buf, size_t count)
6ac26c8a 334{
d5a29c7a 335 unsigned long start_addr, length, end_addr;
6ac26c8a 336 unsigned long start_pfn, nr_pages;
d5a29c7a
MA
337 ssize_t ret;
338
339 ret = sscanf(buf, "%lx %lx", &start_addr, &length);
340 if (ret != 2)
341 return -EINVAL;
342
fd35cff8
MA
343 track_freed_range(start_addr, length);
344
d5a29c7a
MA
345 /* Range-check - don't free any reserved memory that
346 * wasn't reserved for phyp-dump */
347 if (start_addr < phyp_dump_info->init_reserve_start)
348 start_addr = phyp_dump_info->init_reserve_start;
349
350 end_addr = phyp_dump_info->init_reserve_start +
351 phyp_dump_info->init_reserve_size;
352 if (start_addr+length > end_addr)
353 length = end_addr - start_addr;
354
355 /* Release the region of memory assed in by user */
356 start_pfn = PFN_DOWN(start_addr);
357 nr_pages = PFN_DOWN(length);
358 release_memory_range(start_pfn, nr_pages);
359
360 return count;
361}
362
a9c508da
MA
363static ssize_t show_release_region(struct kobject *kobj,
364 struct kobj_attribute *attr, char *buf)
365{
366 u64 second_addr_range;
367
368 /* total reserved size - start of scratch area */
369 second_addr_range = phyp_dump_info->init_reserve_size -
370 phyp_dump_info->reserved_scratch_size;
802bdea8
SR
371 return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:"
372 " DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n",
a9c508da
MA
373 phdr.cpu_data.destination_address,
374 phdr.cpu_data.length_copied,
375 phdr.hpte_data.destination_address,
376 phdr.hpte_data.length_copied,
377 phdr.kernel_data.destination_address,
378 phdr.kernel_data.length_copied,
379 phyp_dump_info->init_reserve_start,
380 second_addr_range);
381}
382
d5a29c7a 383static struct kobj_attribute rr = __ATTR(release_region, 0600,
a9c508da
MA
384 show_release_region,
385 store_release_region);
d5a29c7a
MA
386
387static int __init phyp_dump_setup(void)
388{
389 struct device_node *rtas;
2c4f4113
MA
390 const struct phyp_dump_header *dump_header = NULL;
391 unsigned long dump_area_start;
392 unsigned long dump_area_length;
d5a29c7a
MA
393 int header_len = 0;
394 int rc;
6ac26c8a
MA
395
396 /* If no memory was reserved in early boot, there is nothing to do */
397 if (phyp_dump_info->init_reserve_size == 0)
398 return 0;
399
d5a29c7a
MA
400 /* Return if phyp dump not supported */
401 if (!phyp_dump_info->phyp_dump_configured)
402 return -ENOSYS;
403
2c4f4113
MA
404 /* Is there dump data waiting for us? If there isn't,
405 * then register a new dump area, and release all of
406 * the rest of the reserved ram.
407 *
408 * The /rtas/ibm,kernel-dump rtas node is present only
409 * if there is dump data waiting for us.
410 */
d5a29c7a
MA
411 rtas = of_find_node_by_path("/rtas");
412 if (rtas) {
413 dump_header = of_get_property(rtas, "ibm,kernel-dump",
414 &header_len);
415 of_node_put(rtas);
416 }
417
532774ec
TB
418 ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");
419
599c1aa5 420 print_dump_header(dump_header);
2c4f4113 421 dump_area_length = init_dump_header(&phdr);
2c4f4113
MA
422 /* align down */
423 dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
424
425 if (dump_header == NULL) {
426 register_dump_area(&phdr, dump_area_start);
d5a29c7a 427 return 0;
2c4f4113 428 }
d5a29c7a 429
a9c508da
MA
430 /* re-register the dump area, if old dump was invalid */
431 if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
432 invalidate_last_dump(&phdr, dump_area_start);
433 register_dump_area(&phdr, dump_area_start);
434 return 0;
435 }
436
437 if (dump_header) {
438 phyp_dump_info->reserved_scratch_addr =
439 dump_header->cpu_data.destination_address;
440 phyp_dump_info->reserved_scratch_size =
441 dump_header->cpu_data.source_length +
442 dump_header->hpte_data.source_length +
443 dump_header->kernel_data.source_length;
444 }
445
d5a29c7a
MA
446 /* Should we create a dump_subsys, analogous to s390/ipl.c ? */
447 rc = sysfs_create_file(kernel_kobj, &rr.attr);
2c4f4113 448 if (rc)
d5a29c7a
MA
449 printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
450 rc);
6ac26c8a 451
2c4f4113 452 /* ToDo: re-register the dump area, for next time. */
6ac26c8a
MA
453 return 0;
454}
455machine_subsys_initcall(pseries, phyp_dump_setup);
456
457int __init early_init_dt_scan_phyp_dump(unsigned long node,
458 const char *uname, int depth, void *data)
459{
460 const unsigned int *sizes;
461
462 phyp_dump_info->phyp_dump_configured = 0;
463 phyp_dump_info->phyp_dump_is_active = 0;
464
465 if (depth != 1 || strcmp(uname, "rtas") != 0)
466 return 0;
467
468 if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
469 phyp_dump_info->phyp_dump_configured++;
470
471 if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
472 phyp_dump_info->phyp_dump_is_active++;
473
474 sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
475 NULL);
476 if (!sizes)
477 return 0;
478
479 if (sizes[0] == 1)
480 phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
481
482 if (sizes[3] == 2)
483 phyp_dump_info->hpte_region_size =
484 *((unsigned long *)&sizes[4]);
485 return 1;
486}
654f596d
MA
487
488/* Look for phyp_dump= cmdline option */
489static int __init early_phyp_dump_enabled(char *p)
490{
491 phyp_dump_info->phyp_dump_at_boot = 1;
492
493 if (!p)
494 return 0;
495
496 if (strncmp(p, "1", 1) == 0)
497 phyp_dump_info->phyp_dump_at_boot = 1;
498 else if (strncmp(p, "0", 1) == 0)
499 phyp_dump_info->phyp_dump_at_boot = 0;
500
501 return 0;
502}
503early_param("phyp_dump", early_phyp_dump_enabled);
504
37ddd5d0
MA
505/* Look for phyp_dump_reserve_size= cmdline option */
506static int __init early_phyp_dump_reserve_size(char *p)
507{
508 if (p)
509 phyp_dump_info->reserve_bootvar = memparse(p, &p);
510
511 return 0;
512}
513early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);