]>
Commit | Line | Data |
---|---|---|
6ac26c8a MA |
1 | /* |
2 | * Hypervisor-assisted dump | |
3 | * | |
4 | * Linas Vepstas, Manish Ahuja 2008 | |
5 | * Copyright 2008 IBM Corp. | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation; either version | |
10 | * 2 of the License, or (at your option) any later version. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include <linux/init.h> | |
d5a29c7a | 15 | #include <linux/kobject.h> |
6ac26c8a | 16 | #include <linux/mm.h> |
d5a29c7a | 17 | #include <linux/of.h> |
6ac26c8a MA |
18 | #include <linux/pfn.h> |
19 | #include <linux/swap.h> | |
d5a29c7a | 20 | #include <linux/sysfs.h> |
6ac26c8a MA |
21 | |
22 | #include <asm/page.h> | |
23 | #include <asm/phyp_dump.h> | |
24 | #include <asm/machdep.h> | |
25 | #include <asm/prom.h> | |
d5a29c7a | 26 | #include <asm/rtas.h> |
6ac26c8a MA |
27 | |
28 | /* Variables, used to communicate data between early boot and late boot */ | |
29 | static struct phyp_dump phyp_dump_vars; | |
30 | struct phyp_dump *phyp_dump_info = &phyp_dump_vars; | |
31 | ||
2c4f4113 MA |
32 | static int ibm_configure_kernel_dump; |
33 | /* ------------------------------------------------- */ | |
34 | /* RTAS interfaces to declare the dump regions */ | |
35 | ||
36 | struct dump_section { | |
37 | u32 dump_flags; | |
38 | u16 source_type; | |
39 | u16 error_flags; | |
40 | u64 source_address; | |
41 | u64 source_length; | |
42 | u64 length_copied; | |
43 | u64 destination_address; | |
44 | }; | |
45 | ||
46 | struct phyp_dump_header { | |
47 | u32 version; | |
48 | u16 num_of_sections; | |
49 | u16 status; | |
50 | ||
51 | u32 first_offset_section; | |
52 | u32 dump_disk_section; | |
53 | u64 block_num_dd; | |
54 | u64 num_of_blocks_dd; | |
55 | u32 offset_dd; | |
56 | u32 maxtime_to_auto; | |
57 | /* No dump disk path string used */ | |
58 | ||
59 | struct dump_section cpu_data; | |
60 | struct dump_section hpte_data; | |
61 | struct dump_section kernel_data; | |
62 | }; | |
63 | ||
64 | /* The dump header *must be* in low memory, so .bss it */ | |
65 | static struct phyp_dump_header phdr; | |
66 | ||
67 | #define NUM_DUMP_SECTIONS 3 | |
68 | #define DUMP_HEADER_VERSION 0x1 | |
69 | #define DUMP_REQUEST_FLAG 0x1 | |
70 | #define DUMP_SOURCE_CPU 0x0001 | |
71 | #define DUMP_SOURCE_HPTE 0x0002 | |
72 | #define DUMP_SOURCE_RMO 0x0011 | |
a9c508da MA |
73 | #define DUMP_ERROR_FLAG 0x2000 |
74 | #define DUMP_TRIGGERED 0x4000 | |
75 | #define DUMP_PERFORMED 0x8000 | |
76 | ||
2c4f4113 MA |
77 | |
78 | /** | |
79 | * init_dump_header() - initialize the header declaring a dump | |
80 | * Returns: length of dump save area. | |
81 | * | |
82 | * When the hypervisor saves crashed state, it needs to put | |
83 | * it somewhere. The dump header tells the hypervisor where | |
84 | * the data can be saved. | |
85 | */ | |
86 | static unsigned long init_dump_header(struct phyp_dump_header *ph) | |
87 | { | |
88 | unsigned long addr_offset = 0; | |
89 | ||
90 | /* Set up the dump header */ | |
91 | ph->version = DUMP_HEADER_VERSION; | |
92 | ph->num_of_sections = NUM_DUMP_SECTIONS; | |
93 | ph->status = 0; | |
94 | ||
95 | ph->first_offset_section = | |
96 | (u32)offsetof(struct phyp_dump_header, cpu_data); | |
97 | ph->dump_disk_section = 0; | |
98 | ph->block_num_dd = 0; | |
99 | ph->num_of_blocks_dd = 0; | |
100 | ph->offset_dd = 0; | |
101 | ||
102 | ph->maxtime_to_auto = 0; /* disabled */ | |
103 | ||
104 | /* The first two sections are mandatory */ | |
105 | ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG; | |
106 | ph->cpu_data.source_type = DUMP_SOURCE_CPU; | |
107 | ph->cpu_data.source_address = 0; | |
108 | ph->cpu_data.source_length = phyp_dump_info->cpu_state_size; | |
109 | ph->cpu_data.destination_address = addr_offset; | |
110 | addr_offset += phyp_dump_info->cpu_state_size; | |
111 | ||
112 | ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG; | |
113 | ph->hpte_data.source_type = DUMP_SOURCE_HPTE; | |
114 | ph->hpte_data.source_address = 0; | |
115 | ph->hpte_data.source_length = phyp_dump_info->hpte_region_size; | |
116 | ph->hpte_data.destination_address = addr_offset; | |
117 | addr_offset += phyp_dump_info->hpte_region_size; | |
118 | ||
119 | /* This section describes the low kernel region */ | |
120 | ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG; | |
121 | ph->kernel_data.source_type = DUMP_SOURCE_RMO; | |
122 | ph->kernel_data.source_address = PHYP_DUMP_RMR_START; | |
123 | ph->kernel_data.source_length = PHYP_DUMP_RMR_END; | |
124 | ph->kernel_data.destination_address = addr_offset; | |
125 | addr_offset += ph->kernel_data.source_length; | |
126 | ||
127 | return addr_offset; | |
128 | } | |
129 | ||
599c1aa5 MA |
130 | static void print_dump_header(const struct phyp_dump_header *ph) |
131 | { | |
132 | #ifdef DEBUG | |
133 | printk(KERN_INFO "dump header:\n"); | |
134 | /* setup some ph->sections required */ | |
135 | printk(KERN_INFO "version = %d\n", ph->version); | |
136 | printk(KERN_INFO "Sections = %d\n", ph->num_of_sections); | |
137 | printk(KERN_INFO "Status = 0x%x\n", ph->status); | |
138 | ||
139 | /* No ph->disk, so all should be set to 0 */ | |
140 | printk(KERN_INFO "Offset to first section 0x%x\n", | |
141 | ph->first_offset_section); | |
142 | printk(KERN_INFO "dump disk sections should be zero\n"); | |
143 | printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section); | |
144 | printk(KERN_INFO "block num = %ld\n", ph->block_num_dd); | |
145 | printk(KERN_INFO "number of blocks = %ld\n", ph->num_of_blocks_dd); | |
146 | printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd); | |
147 | printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto); | |
148 | ||
149 | /*set cpu state and hpte states as well scratch pad area */ | |
150 | printk(KERN_INFO " CPU AREA \n"); | |
151 | printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags); | |
152 | printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type); | |
153 | printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags); | |
154 | printk(KERN_INFO "cpu source_address =%lx\n", | |
155 | ph->cpu_data.source_address); | |
156 | printk(KERN_INFO "cpu source_length =%lx\n", | |
157 | ph->cpu_data.source_length); | |
158 | printk(KERN_INFO "cpu length_copied =%lx\n", | |
159 | ph->cpu_data.length_copied); | |
160 | ||
161 | printk(KERN_INFO " HPTE AREA \n"); | |
162 | printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags); | |
163 | printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type); | |
164 | printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags); | |
165 | printk(KERN_INFO "HPTE source_address =%lx\n", | |
166 | ph->hpte_data.source_address); | |
167 | printk(KERN_INFO "HPTE source_length =%lx\n", | |
168 | ph->hpte_data.source_length); | |
169 | printk(KERN_INFO "HPTE length_copied =%lx\n", | |
170 | ph->hpte_data.length_copied); | |
171 | ||
172 | printk(KERN_INFO " SRSD AREA \n"); | |
173 | printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags); | |
174 | printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type); | |
175 | printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags); | |
176 | printk(KERN_INFO "SRSD source_address =%lx\n", | |
177 | ph->kernel_data.source_address); | |
178 | printk(KERN_INFO "SRSD source_length =%lx\n", | |
179 | ph->kernel_data.source_length); | |
180 | printk(KERN_INFO "SRSD length_copied =%lx\n", | |
181 | ph->kernel_data.length_copied); | |
182 | #endif | |
183 | } | |
184 | ||
7415d5e0 MA |
185 | static ssize_t show_phyp_dump_active(struct kobject *kobj, |
186 | struct kobj_attribute *attr, char *buf) | |
187 | { | |
188 | ||
189 | /* create filesystem entry so kdump is phyp-dump aware */ | |
190 | return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot); | |
191 | } | |
192 | ||
193 | static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600, | |
194 | show_phyp_dump_active, | |
195 | NULL); | |
196 | ||
2c4f4113 MA |
197 | static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr) |
198 | { | |
199 | int rc; | |
a9c508da MA |
200 | |
201 | /* Add addr value if not initialized before */ | |
202 | if (ph->cpu_data.destination_address == 0) { | |
203 | ph->cpu_data.destination_address += addr; | |
204 | ph->hpte_data.destination_address += addr; | |
205 | ph->kernel_data.destination_address += addr; | |
206 | } | |
207 | ||
208 | /* ToDo Invalidate kdump and free memory range. */ | |
2c4f4113 MA |
209 | |
210 | do { | |
211 | rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL, | |
212 | 1, ph, sizeof(struct phyp_dump_header)); | |
213 | } while (rtas_busy_delay(rc)); | |
214 | ||
599c1aa5 | 215 | if (rc) { |
2c4f4113 MA |
216 | printk(KERN_ERR "phyp-dump: unexpected error (%d) on " |
217 | "register\n", rc); | |
599c1aa5 | 218 | print_dump_header(ph); |
7415d5e0 | 219 | return; |
599c1aa5 | 220 | } |
7415d5e0 MA |
221 | |
222 | rc = sysfs_create_file(kernel_kobj, &pdl.attr); | |
223 | if (rc) | |
224 | printk(KERN_ERR "phyp-dump: unable to create sysfs" | |
225 | " file (%d)\n", rc); | |
2c4f4113 MA |
226 | } |
227 | ||
a9c508da MA |
228 | static |
229 | void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr) | |
230 | { | |
231 | int rc; | |
232 | ||
233 | /* Add addr value if not initialized before */ | |
234 | if (ph->cpu_data.destination_address == 0) { | |
235 | ph->cpu_data.destination_address += addr; | |
236 | ph->hpte_data.destination_address += addr; | |
237 | ph->kernel_data.destination_address += addr; | |
238 | } | |
239 | ||
240 | do { | |
241 | rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL, | |
242 | 2, ph, sizeof(struct phyp_dump_header)); | |
243 | } while (rtas_busy_delay(rc)); | |
244 | ||
245 | if (rc) { | |
246 | printk(KERN_ERR "phyp-dump: unexpected error (%d) " | |
247 | "on invalidate\n", rc); | |
248 | print_dump_header(ph); | |
249 | } | |
250 | } | |
251 | ||
d5a29c7a | 252 | /* ------------------------------------------------- */ |
6ac26c8a MA |
253 | /** |
254 | * release_memory_range -- release memory previously lmb_reserved | |
255 | * @start_pfn: starting physical frame number | |
256 | * @nr_pages: number of pages to free. | |
257 | * | |
258 | * This routine will release memory that had been previously | |
259 | * lmb_reserved in early boot. The released memory becomes | |
260 | * available for genreal use. | |
261 | */ | |
a9c508da MA |
262 | static void release_memory_range(unsigned long start_pfn, |
263 | unsigned long nr_pages) | |
6ac26c8a MA |
264 | { |
265 | struct page *rpage; | |
266 | unsigned long end_pfn; | |
267 | long i; | |
268 | ||
269 | end_pfn = start_pfn + nr_pages; | |
270 | ||
271 | for (i = start_pfn; i <= end_pfn; i++) { | |
272 | rpage = pfn_to_page(i); | |
273 | if (PageReserved(rpage)) { | |
274 | ClearPageReserved(rpage); | |
275 | init_page_count(rpage); | |
276 | __free_page(rpage); | |
277 | totalram_pages++; | |
278 | } | |
279 | } | |
280 | } | |
281 | ||
fd35cff8 MA |
282 | /** |
283 | * track_freed_range -- Counts the range being freed. | |
284 | * Once the counter goes to zero, it re-registers dump for | |
285 | * future use. | |
286 | */ | |
287 | static void | |
288 | track_freed_range(unsigned long addr, unsigned long length) | |
289 | { | |
290 | static unsigned long scratch_area_size, reserved_area_size; | |
291 | ||
292 | if (addr < phyp_dump_info->init_reserve_start) | |
293 | return; | |
294 | ||
295 | if ((addr >= phyp_dump_info->init_reserve_start) && | |
296 | (addr <= phyp_dump_info->init_reserve_start + | |
297 | phyp_dump_info->init_reserve_size)) | |
298 | reserved_area_size += length; | |
299 | ||
300 | if ((addr >= phyp_dump_info->reserved_scratch_addr) && | |
301 | (addr <= phyp_dump_info->reserved_scratch_addr + | |
302 | phyp_dump_info->reserved_scratch_size)) | |
303 | scratch_area_size += length; | |
304 | ||
305 | if ((reserved_area_size == phyp_dump_info->init_reserve_size) && | |
306 | (scratch_area_size == phyp_dump_info->reserved_scratch_size)) { | |
307 | ||
308 | invalidate_last_dump(&phdr, | |
309 | phyp_dump_info->reserved_scratch_addr); | |
310 | register_dump_area(&phdr, | |
311 | phyp_dump_info->reserved_scratch_addr); | |
312 | } | |
313 | } | |
314 | ||
d5a29c7a MA |
315 | /* ------------------------------------------------- */ |
316 | /** | |
317 | * sysfs_release_region -- sysfs interface to release memory range. | |
318 | * | |
319 | * Usage: | |
320 | * "echo <start addr> <length> > /sys/kernel/release_region" | |
321 | * | |
322 | * Example: | |
323 | * "echo 0x40000000 0x10000000 > /sys/kernel/release_region" | |
324 | * | |
325 | * will release 256MB starting at 1GB. | |
326 | */ | |
327 | static ssize_t store_release_region(struct kobject *kobj, | |
328 | struct kobj_attribute *attr, | |
329 | const char *buf, size_t count) | |
6ac26c8a | 330 | { |
d5a29c7a | 331 | unsigned long start_addr, length, end_addr; |
6ac26c8a | 332 | unsigned long start_pfn, nr_pages; |
d5a29c7a MA |
333 | ssize_t ret; |
334 | ||
335 | ret = sscanf(buf, "%lx %lx", &start_addr, &length); | |
336 | if (ret != 2) | |
337 | return -EINVAL; | |
338 | ||
fd35cff8 MA |
339 | track_freed_range(start_addr, length); |
340 | ||
d5a29c7a MA |
341 | /* Range-check - don't free any reserved memory that |
342 | * wasn't reserved for phyp-dump */ | |
343 | if (start_addr < phyp_dump_info->init_reserve_start) | |
344 | start_addr = phyp_dump_info->init_reserve_start; | |
345 | ||
346 | end_addr = phyp_dump_info->init_reserve_start + | |
347 | phyp_dump_info->init_reserve_size; | |
348 | if (start_addr+length > end_addr) | |
349 | length = end_addr - start_addr; | |
350 | ||
351 | /* Release the region of memory assed in by user */ | |
352 | start_pfn = PFN_DOWN(start_addr); | |
353 | nr_pages = PFN_DOWN(length); | |
354 | release_memory_range(start_pfn, nr_pages); | |
355 | ||
356 | return count; | |
357 | } | |
358 | ||
a9c508da MA |
359 | static ssize_t show_release_region(struct kobject *kobj, |
360 | struct kobj_attribute *attr, char *buf) | |
361 | { | |
362 | u64 second_addr_range; | |
363 | ||
364 | /* total reserved size - start of scratch area */ | |
365 | second_addr_range = phyp_dump_info->init_reserve_size - | |
366 | phyp_dump_info->reserved_scratch_size; | |
367 | return sprintf(buf, "CPU:0x%lx-0x%lx: HPTE:0x%lx-0x%lx:" | |
368 | " DUMP:0x%lx-0x%lx, 0x%lx-0x%lx:\n", | |
369 | phdr.cpu_data.destination_address, | |
370 | phdr.cpu_data.length_copied, | |
371 | phdr.hpte_data.destination_address, | |
372 | phdr.hpte_data.length_copied, | |
373 | phdr.kernel_data.destination_address, | |
374 | phdr.kernel_data.length_copied, | |
375 | phyp_dump_info->init_reserve_start, | |
376 | second_addr_range); | |
377 | } | |
378 | ||
d5a29c7a | 379 | static struct kobj_attribute rr = __ATTR(release_region, 0600, |
a9c508da MA |
380 | show_release_region, |
381 | store_release_region); | |
d5a29c7a MA |
382 | |
383 | static int __init phyp_dump_setup(void) | |
384 | { | |
385 | struct device_node *rtas; | |
2c4f4113 MA |
386 | const struct phyp_dump_header *dump_header = NULL; |
387 | unsigned long dump_area_start; | |
388 | unsigned long dump_area_length; | |
d5a29c7a MA |
389 | int header_len = 0; |
390 | int rc; | |
6ac26c8a MA |
391 | |
392 | /* If no memory was reserved in early boot, there is nothing to do */ | |
393 | if (phyp_dump_info->init_reserve_size == 0) | |
394 | return 0; | |
395 | ||
d5a29c7a MA |
396 | /* Return if phyp dump not supported */ |
397 | if (!phyp_dump_info->phyp_dump_configured) | |
398 | return -ENOSYS; | |
399 | ||
2c4f4113 MA |
400 | /* Is there dump data waiting for us? If there isn't, |
401 | * then register a new dump area, and release all of | |
402 | * the rest of the reserved ram. | |
403 | * | |
404 | * The /rtas/ibm,kernel-dump rtas node is present only | |
405 | * if there is dump data waiting for us. | |
406 | */ | |
d5a29c7a MA |
407 | rtas = of_find_node_by_path("/rtas"); |
408 | if (rtas) { | |
409 | dump_header = of_get_property(rtas, "ibm,kernel-dump", | |
410 | &header_len); | |
411 | of_node_put(rtas); | |
412 | } | |
413 | ||
599c1aa5 | 414 | print_dump_header(dump_header); |
2c4f4113 | 415 | dump_area_length = init_dump_header(&phdr); |
2c4f4113 MA |
416 | /* align down */ |
417 | dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK; | |
418 | ||
419 | if (dump_header == NULL) { | |
420 | register_dump_area(&phdr, dump_area_start); | |
d5a29c7a | 421 | return 0; |
2c4f4113 | 422 | } |
d5a29c7a | 423 | |
a9c508da MA |
424 | /* re-register the dump area, if old dump was invalid */ |
425 | if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) { | |
426 | invalidate_last_dump(&phdr, dump_area_start); | |
427 | register_dump_area(&phdr, dump_area_start); | |
428 | return 0; | |
429 | } | |
430 | ||
431 | if (dump_header) { | |
432 | phyp_dump_info->reserved_scratch_addr = | |
433 | dump_header->cpu_data.destination_address; | |
434 | phyp_dump_info->reserved_scratch_size = | |
435 | dump_header->cpu_data.source_length + | |
436 | dump_header->hpte_data.source_length + | |
437 | dump_header->kernel_data.source_length; | |
438 | } | |
439 | ||
d5a29c7a MA |
440 | /* Should we create a dump_subsys, analogous to s390/ipl.c ? */ |
441 | rc = sysfs_create_file(kernel_kobj, &rr.attr); | |
2c4f4113 | 442 | if (rc) |
d5a29c7a MA |
443 | printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n", |
444 | rc); | |
6ac26c8a | 445 | |
2c4f4113 | 446 | /* ToDo: re-register the dump area, for next time. */ |
6ac26c8a MA |
447 | return 0; |
448 | } | |
449 | machine_subsys_initcall(pseries, phyp_dump_setup); | |
450 | ||
451 | int __init early_init_dt_scan_phyp_dump(unsigned long node, | |
452 | const char *uname, int depth, void *data) | |
453 | { | |
454 | const unsigned int *sizes; | |
455 | ||
456 | phyp_dump_info->phyp_dump_configured = 0; | |
457 | phyp_dump_info->phyp_dump_is_active = 0; | |
458 | ||
459 | if (depth != 1 || strcmp(uname, "rtas") != 0) | |
460 | return 0; | |
461 | ||
462 | if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL)) | |
463 | phyp_dump_info->phyp_dump_configured++; | |
464 | ||
465 | if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL)) | |
466 | phyp_dump_info->phyp_dump_is_active++; | |
467 | ||
468 | sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", | |
469 | NULL); | |
470 | if (!sizes) | |
471 | return 0; | |
472 | ||
473 | if (sizes[0] == 1) | |
474 | phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]); | |
475 | ||
476 | if (sizes[3] == 2) | |
477 | phyp_dump_info->hpte_region_size = | |
478 | *((unsigned long *)&sizes[4]); | |
479 | return 1; | |
480 | } | |
654f596d MA |
481 | |
482 | /* Look for phyp_dump= cmdline option */ | |
483 | static int __init early_phyp_dump_enabled(char *p) | |
484 | { | |
485 | phyp_dump_info->phyp_dump_at_boot = 1; | |
486 | ||
487 | if (!p) | |
488 | return 0; | |
489 | ||
490 | if (strncmp(p, "1", 1) == 0) | |
491 | phyp_dump_info->phyp_dump_at_boot = 1; | |
492 | else if (strncmp(p, "0", 1) == 0) | |
493 | phyp_dump_info->phyp_dump_at_boot = 0; | |
494 | ||
495 | return 0; | |
496 | } | |
497 | early_param("phyp_dump", early_phyp_dump_enabled); | |
498 | ||
37ddd5d0 MA |
499 | /* Look for phyp_dump_reserve_size= cmdline option */ |
500 | static int __init early_phyp_dump_reserve_size(char *p) | |
501 | { | |
502 | if (p) | |
503 | phyp_dump_info->reserve_bootvar = memparse(p, &p); | |
504 | ||
505 | return 0; | |
506 | } | |
507 | early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size); |