aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries/phyp_dump.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries/phyp_dump.c')
-rw-r--r--arch/powerpc/platforms/pseries/phyp_dump.c513
1 files changed, 513 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/pseries/phyp_dump.c b/arch/powerpc/platforms/pseries/phyp_dump.c
new file mode 100644
index 00000000..6e7742da
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/phyp_dump.c
@@ -0,0 +1,513 @@
+/*
+ * Hypervisor-assisted dump
+ *
+ * Linas Vepstas, Manish Ahuja 2008
+ * Copyright 2008 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/pfn.h>
+#include <linux/swap.h>
+#include <linux/sysfs.h>
+
+#include <asm/page.h>
+#include <asm/phyp_dump.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+
+/* Variables, used to communicate data between early boot and late boot */
+static struct phyp_dump phyp_dump_vars;
+struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
+
+static int ibm_configure_kernel_dump;
+/* ------------------------------------------------- */
+/* RTAS interfaces to declare the dump regions */
+
+struct dump_section {
+ u32 dump_flags;
+ u16 source_type;
+ u16 error_flags;
+ u64 source_address;
+ u64 source_length;
+ u64 length_copied;
+ u64 destination_address;
+};
+
+struct phyp_dump_header {
+ u32 version;
+ u16 num_of_sections;
+ u16 status;
+
+ u32 first_offset_section;
+ u32 dump_disk_section;
+ u64 block_num_dd;
+ u64 num_of_blocks_dd;
+ u32 offset_dd;
+ u32 maxtime_to_auto;
+ /* No dump disk path string used */
+
+ struct dump_section cpu_data;
+ struct dump_section hpte_data;
+ struct dump_section kernel_data;
+};
+
+/* The dump header *must be* in low memory, so .bss it */
+static struct phyp_dump_header phdr;
+
+#define NUM_DUMP_SECTIONS 3
+#define DUMP_HEADER_VERSION 0x1
+#define DUMP_REQUEST_FLAG 0x1
+#define DUMP_SOURCE_CPU 0x0001
+#define DUMP_SOURCE_HPTE 0x0002
+#define DUMP_SOURCE_RMO 0x0011
+#define DUMP_ERROR_FLAG 0x2000
+#define DUMP_TRIGGERED 0x4000
+#define DUMP_PERFORMED 0x8000
+
+
+/**
+ * init_dump_header() - initialize the header declaring a dump
+ * Returns: length of dump save area.
+ *
+ * When the hypervisor saves crashed state, it needs to put
+ * it somewhere. The dump header tells the hypervisor where
+ * the data can be saved.
+ */
+static unsigned long init_dump_header(struct phyp_dump_header *ph)
+{
+ unsigned long addr_offset = 0;
+
+ /* Set up the dump header */
+ ph->version = DUMP_HEADER_VERSION;
+ ph->num_of_sections = NUM_DUMP_SECTIONS;
+ ph->status = 0;
+
+ ph->first_offset_section =
+ (u32)offsetof(struct phyp_dump_header, cpu_data);
+ ph->dump_disk_section = 0;
+ ph->block_num_dd = 0;
+ ph->num_of_blocks_dd = 0;
+ ph->offset_dd = 0;
+
+ ph->maxtime_to_auto = 0; /* disabled */
+
+ /* The first two sections are mandatory */
+ ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
+ ph->cpu_data.source_type = DUMP_SOURCE_CPU;
+ ph->cpu_data.source_address = 0;
+ ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
+ ph->cpu_data.destination_address = addr_offset;
+ addr_offset += phyp_dump_info->cpu_state_size;
+
+ ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
+ ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
+ ph->hpte_data.source_address = 0;
+ ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
+ ph->hpte_data.destination_address = addr_offset;
+ addr_offset += phyp_dump_info->hpte_region_size;
+
+ /* This section describes the low kernel region */
+ ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
+ ph->kernel_data.source_type = DUMP_SOURCE_RMO;
+ ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
+ ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
+ ph->kernel_data.destination_address = addr_offset;
+ addr_offset += ph->kernel_data.source_length;
+
+ return addr_offset;
+}
+
+static void print_dump_header(const struct phyp_dump_header *ph)
+{
+#ifdef DEBUG
+ if (ph == NULL)
+ return;
+
+ printk(KERN_INFO "dump header:\n");
+ /* setup some ph->sections required */
+ printk(KERN_INFO "version = %d\n", ph->version);
+ printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
+ printk(KERN_INFO "Status = 0x%x\n", ph->status);
+
+ /* No ph->disk, so all should be set to 0 */
+ printk(KERN_INFO "Offset to first section 0x%x\n",
+ ph->first_offset_section);
+ printk(KERN_INFO "dump disk sections should be zero\n");
+ printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
+ printk(KERN_INFO "block num = %lld\n", ph->block_num_dd);
+ printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd);
+ printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
+ printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
+
+ /*set cpu state and hpte states as well scratch pad area */
+ printk(KERN_INFO " CPU AREA\n");
+ printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
+ printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
+ printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
+ printk(KERN_INFO "cpu source_address =%llx\n",
+ ph->cpu_data.source_address);
+ printk(KERN_INFO "cpu source_length =%llx\n",
+ ph->cpu_data.source_length);
+ printk(KERN_INFO "cpu length_copied =%llx\n",
+ ph->cpu_data.length_copied);
+
+ printk(KERN_INFO " HPTE AREA\n");
+ printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
+ printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
+ printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
+ printk(KERN_INFO "HPTE source_address =%llx\n",
+ ph->hpte_data.source_address);
+ printk(KERN_INFO "HPTE source_length =%llx\n",
+ ph->hpte_data.source_length);
+ printk(KERN_INFO "HPTE length_copied =%llx\n",
+ ph->hpte_data.length_copied);
+
+ printk(KERN_INFO " SRSD AREA\n");
+ printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
+ printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
+ printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
+ printk(KERN_INFO "SRSD source_address =%llx\n",
+ ph->kernel_data.source_address);
+ printk(KERN_INFO "SRSD source_length =%llx\n",
+ ph->kernel_data.source_length);
+ printk(KERN_INFO "SRSD length_copied =%llx\n",
+ ph->kernel_data.length_copied);
+#endif
+}
+
+static ssize_t show_phyp_dump_active(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+
+ /* create filesystem entry so kdump is phyp-dump aware */
+ return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
+}
+
+static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
+ show_phyp_dump_active,
+ NULL);
+
+static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
+{
+ int rc;
+
+ /* Add addr value if not initialized before */
+ if (ph->cpu_data.destination_address == 0) {
+ ph->cpu_data.destination_address += addr;
+ ph->hpte_data.destination_address += addr;
+ ph->kernel_data.destination_address += addr;
+ }
+
+ /* ToDo Invalidate kdump and free memory range. */
+
+ do {
+ rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
+ 1, ph, sizeof(struct phyp_dump_header));
+ } while (rtas_busy_delay(rc));
+
+ if (rc) {
+ printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
+ "register\n", rc);
+ print_dump_header(ph);
+ return;
+ }
+
+ rc = sysfs_create_file(kernel_kobj, &pdl.attr);
+ if (rc)
+ printk(KERN_ERR "phyp-dump: unable to create sysfs"
+ " file (%d)\n", rc);
+}
+
+static
+void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
+{
+ int rc;
+
+ /* Add addr value if not initialized before */
+ if (ph->cpu_data.destination_address == 0) {
+ ph->cpu_data.destination_address += addr;
+ ph->hpte_data.destination_address += addr;
+ ph->kernel_data.destination_address += addr;
+ }
+
+ do {
+ rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
+ 2, ph, sizeof(struct phyp_dump_header));
+ } while (rtas_busy_delay(rc));
+
+ if (rc) {
+ printk(KERN_ERR "phyp-dump: unexpected error (%d) "
+ "on invalidate\n", rc);
+ print_dump_header(ph);
+ }
+}
+
+/* ------------------------------------------------- */
+/**
+ * release_memory_range -- release memory previously memblock_reserved
+ * @start_pfn: starting physical frame number
+ * @nr_pages: number of pages to free.
+ *
+ * This routine will release memory that had been previously
+ * memblock_reserved in early boot. The released memory becomes
+ * available for genreal use.
+ */
+static void release_memory_range(unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ struct page *rpage;
+ unsigned long end_pfn;
+ long i;
+
+ end_pfn = start_pfn + nr_pages;
+
+ for (i = start_pfn; i <= end_pfn; i++) {
+ rpage = pfn_to_page(i);
+ if (PageReserved(rpage)) {
+ ClearPageReserved(rpage);
+ init_page_count(rpage);
+ __free_page(rpage);
+ totalram_pages++;
+ }
+ }
+}
+
+/**
+ * track_freed_range -- Counts the range being freed.
+ * Once the counter goes to zero, it re-registers dump for
+ * future use.
+ */
+static void
+track_freed_range(unsigned long addr, unsigned long length)
+{
+ static unsigned long scratch_area_size, reserved_area_size;
+
+ if (addr < phyp_dump_info->init_reserve_start)
+ return;
+
+ if ((addr >= phyp_dump_info->init_reserve_start) &&
+ (addr <= phyp_dump_info->init_reserve_start +
+ phyp_dump_info->init_reserve_size))
+ reserved_area_size += length;
+
+ if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
+ (addr <= phyp_dump_info->reserved_scratch_addr +
+ phyp_dump_info->reserved_scratch_size))
+ scratch_area_size += length;
+
+ if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
+ (scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
+
+ invalidate_last_dump(&phdr,
+ phyp_dump_info->reserved_scratch_addr);
+ register_dump_area(&phdr,
+ phyp_dump_info->reserved_scratch_addr);
+ }
+}
+
+/* ------------------------------------------------- */
+/**
+ * sysfs_release_region -- sysfs interface to release memory range.
+ *
+ * Usage:
+ * "echo <start addr> <length> > /sys/kernel/release_region"
+ *
+ * Example:
+ * "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
+ *
+ * will release 256MB starting at 1GB.
+ */
+static ssize_t store_release_region(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long start_addr, length, end_addr;
+ unsigned long start_pfn, nr_pages;
+ ssize_t ret;
+
+ ret = sscanf(buf, "%lx %lx", &start_addr, &length);
+ if (ret != 2)
+ return -EINVAL;
+
+ track_freed_range(start_addr, length);
+
+ /* Range-check - don't free any reserved memory that
+ * wasn't reserved for phyp-dump */
+ if (start_addr < phyp_dump_info->init_reserve_start)
+ start_addr = phyp_dump_info->init_reserve_start;
+
+ end_addr = phyp_dump_info->init_reserve_start +
+ phyp_dump_info->init_reserve_size;
+ if (start_addr+length > end_addr)
+ length = end_addr - start_addr;
+
+ /* Release the region of memory assed in by user */
+ start_pfn = PFN_DOWN(start_addr);
+ nr_pages = PFN_DOWN(length);
+ release_memory_range(start_pfn, nr_pages);
+
+ return count;
+}
+
+static ssize_t show_release_region(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ u64 second_addr_range;
+
+ /* total reserved size - start of scratch area */
+ second_addr_range = phyp_dump_info->init_reserve_size -
+ phyp_dump_info->reserved_scratch_size;
+ return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:"
+ " DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n",
+ phdr.cpu_data.destination_address,
+ phdr.cpu_data.length_copied,
+ phdr.hpte_data.destination_address,
+ phdr.hpte_data.length_copied,
+ phdr.kernel_data.destination_address,
+ phdr.kernel_data.length_copied,
+ phyp_dump_info->init_reserve_start,
+ second_addr_range);
+}
+
+static struct kobj_attribute rr = __ATTR(release_region, 0600,
+ show_release_region,
+ store_release_region);
+
+static int __init phyp_dump_setup(void)
+{
+ struct device_node *rtas;
+ const struct phyp_dump_header *dump_header = NULL;
+ unsigned long dump_area_start;
+ unsigned long dump_area_length;
+ int header_len = 0;
+ int rc;
+
+ /* If no memory was reserved in early boot, there is nothing to do */
+ if (phyp_dump_info->init_reserve_size == 0)
+ return 0;
+
+ /* Return if phyp dump not supported */
+ if (!phyp_dump_info->phyp_dump_configured)
+ return -ENOSYS;
+
+ /* Is there dump data waiting for us? If there isn't,
+ * then register a new dump area, and release all of
+ * the rest of the reserved ram.
+ *
+ * The /rtas/ibm,kernel-dump rtas node is present only
+ * if there is dump data waiting for us.
+ */
+ rtas = of_find_node_by_path("/rtas");
+ if (rtas) {
+ dump_header = of_get_property(rtas, "ibm,kernel-dump",
+ &header_len);
+ of_node_put(rtas);
+ }
+
+ ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");
+
+ print_dump_header(dump_header);
+ dump_area_length = init_dump_header(&phdr);
+ /* align down */
+ dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
+
+ if (dump_header == NULL) {
+ register_dump_area(&phdr, dump_area_start);
+ return 0;
+ }
+
+ /* re-register the dump area, if old dump was invalid */
+ if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
+ invalidate_last_dump(&phdr, dump_area_start);
+ register_dump_area(&phdr, dump_area_start);
+ return 0;
+ }
+
+ if (dump_header) {
+ phyp_dump_info->reserved_scratch_addr =
+ dump_header->cpu_data.destination_address;
+ phyp_dump_info->reserved_scratch_size =
+ dump_header->cpu_data.source_length +
+ dump_header->hpte_data.source_length +
+ dump_header->kernel_data.source_length;
+ }
+
+ /* Should we create a dump_subsys, analogous to s390/ipl.c ? */
+ rc = sysfs_create_file(kernel_kobj, &rr.attr);
+ if (rc)
+ printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
+ rc);
+
+ /* ToDo: re-register the dump area, for next time. */
+ return 0;
+}
+machine_subsys_initcall(pseries, phyp_dump_setup);
+
+int __init early_init_dt_scan_phyp_dump(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ const unsigned int *sizes;
+
+ phyp_dump_info->phyp_dump_configured = 0;
+ phyp_dump_info->phyp_dump_is_active = 0;
+
+ if (depth != 1 || strcmp(uname, "rtas") != 0)
+ return 0;
+
+ if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
+ phyp_dump_info->phyp_dump_configured++;
+
+ if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
+ phyp_dump_info->phyp_dump_is_active++;
+
+ sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
+ NULL);
+ if (!sizes)
+ return 0;
+
+ if (sizes[0] == 1)
+ phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
+
+ if (sizes[3] == 2)
+ phyp_dump_info->hpte_region_size =
+ *((unsigned long *)&sizes[4]);
+ return 1;
+}
+
+/* Look for phyp_dump= cmdline option */
+static int __init early_phyp_dump_enabled(char *p)
+{
+ phyp_dump_info->phyp_dump_at_boot = 1;
+
+ if (!p)
+ return 0;
+
+ if (strncmp(p, "1", 1) == 0)
+ phyp_dump_info->phyp_dump_at_boot = 1;
+ else if (strncmp(p, "0", 1) == 0)
+ phyp_dump_info->phyp_dump_at_boot = 0;
+
+ return 0;
+}
+early_param("phyp_dump", early_phyp_dump_enabled);
+
+/* Look for phyp_dump_reserve_size= cmdline option */
+static int __init early_phyp_dump_reserve_size(char *p)
+{
+ if (p)
+ phyp_dump_info->reserve_bootvar = memparse(p, &p);
+
+ return 0;
+}
+early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);