]> bbs.cooldavid.org Git - net-next-2.6.git/commitdiff
Blackfin: implement nmi_watchdog for SMP on BF561
authorGraf Yang <graf.yang@analog.com>
Wed, 20 Jan 2010 10:56:24 +0000 (10:56 +0000)
committerMike Frysinger <vapier@gentoo.org>
Tue, 9 Mar 2010 05:30:49 +0000 (00:30 -0500)
Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
arch/blackfin/Kconfig.debug
arch/blackfin/include/asm/irq.h
arch/blackfin/include/asm/nmi.h [new file with mode: 0644]
arch/blackfin/include/asm/smp.h
arch/blackfin/kernel/Makefile
arch/blackfin/kernel/nmi.c [new file with mode: 0644]
arch/blackfin/kernel/time-ts.c
arch/blackfin/mach-common/interrupt.S

index 87f195ee2e06234e50399566b25243e8ca821bda..1460d7b5edc15d2dfffd61c5ab4c07abcf3ce2c2 100644 (file)
@@ -238,6 +238,15 @@ config EARLY_PRINTK
          all of this lives in the init section and is thrown away after the
          kernel boots completely.
 
          all of this lives in the init section and is thrown away after the
          kernel boots completely.
 
+config NMI_WATCHDOG
+       bool "Enable NMI watchdog to help debugging lockup on SMP"
+       default n
+       depends on (SMP && !BFIN_SCRATCH_REG_RETN)
+       help
+         If any CPU in the system does not execute the period local timer
+         interrupt for more than 5 seconds, then the NMI handler dumps debug
+         information. This information can be used to debug the lockup.
+
 config CPLB_INFO
        bool "Display the CPLB information"
        help
 config CPLB_INFO
        bool "Display the CPLB information"
        help
index 89de539ed0100624ead2e5ea6d7f9790eed11cf2..12f4060a31b034e9cd7190b075da6f58d6d4abaa 100644 (file)
@@ -38,4 +38,8 @@
 
 #include <asm-generic/irq.h>
 
 
 #include <asm-generic/irq.h>
 
+#ifdef CONFIG_NMI_WATCHDOG
+# define ARCH_HAS_NMI_WATCHDOG
+#endif
+
 #endif                         /* _BFIN_IRQ_H_ */
 #endif                         /* _BFIN_IRQ_H_ */
diff --git a/arch/blackfin/include/asm/nmi.h b/arch/blackfin/include/asm/nmi.h
new file mode 100644 (file)
index 0000000..b9caac4
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ * Copyright 2010 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2
+ */
+
+#ifndef _BFIN_NMI_H_
+#define _BFIN_NMI_H_
+
+#include <linux/nmi.h>
+
+#endif
index 29fb88219470e3946bf3bd69b6c611457020e216..7f26de09ca9cd53cc7e922bbfaa16022f718a141 100644 (file)
@@ -22,6 +22,7 @@ extern char coreb_trampoline_start, coreb_trampoline_end;
 struct corelock_slot {
        int lock;
 };
 struct corelock_slot {
        int lock;
 };
+extern struct corelock_slot corelock;
 
 void smp_icache_flush_range_others(unsigned long start,
                                   unsigned long end);
 
 void smp_icache_flush_range_others(unsigned long start,
                                   unsigned long end);
index a8ddbc8ed5af6d430ce5da0bf42ec2cf6ddfb531..346a421f1562fad53d66485d3d8d7ea2b51935ff 100644 (file)
@@ -25,6 +25,7 @@ obj-$(CONFIG_CPLB_INFO)              += cplbinfo.o
 obj-$(CONFIG_MODULES)                += module.o
 obj-$(CONFIG_KGDB)                   += kgdb.o
 obj-$(CONFIG_KGDB_TESTS)             += kgdb_test.o
 obj-$(CONFIG_MODULES)                += module.o
 obj-$(CONFIG_KGDB)                   += kgdb.o
 obj-$(CONFIG_KGDB_TESTS)             += kgdb_test.o
+obj-$(CONFIG_NMI_WATCHDOG)           += nmi.o
 obj-$(CONFIG_EARLY_PRINTK)           += early_printk.o
 obj-$(CONFIG_EARLY_PRINTK)           += shadow_console.o
 obj-$(CONFIG_STACKTRACE)             += stacktrace.o
 obj-$(CONFIG_EARLY_PRINTK)           += early_printk.o
 obj-$(CONFIG_EARLY_PRINTK)           += shadow_console.o
 obj-$(CONFIG_STACKTRACE)             += stacktrace.o
diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
new file mode 100644 (file)
index 0000000..19093c1
--- /dev/null
@@ -0,0 +1,313 @@
+/*
+ * Blackfin nmi_watchdog Driver
+ *
+ * Originally based on bfin_wdt.c
+ * Copyright 2010-2010 Analog Devices Inc.
+ *             Graff Yang <graf.yang@analog.com>
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/bitops.h>
+#include <linux/hardirq.h>
+#include <linux/sysdev.h>
+#include <linux/pm.h>
+#include <linux/nmi.h>
+#include <linux/smp.h>
+#include <linux/timer.h>
+#include <asm/blackfin.h>
+#include <asm/atomic.h>
+#include <asm/cacheflush.h>
+
+/* Bit in WDOG_CTL that indicates watchdog has expired (WDR0) */
+#define WDOG_EXPIRED 0x8000
+
+/* Masks for WDEV field in WDOG_CTL register */
+#define ICTL_RESET   0x0
+#define ICTL_NMI     0x2
+#define ICTL_GPI     0x4
+#define ICTL_NONE    0x6
+#define ICTL_MASK    0x6
+
+/* Masks for WDEN field in WDOG_CTL register */
+#define WDEN_MASK    0x0FF0
+#define WDEN_ENABLE  0x0000
+#define WDEN_DISABLE 0x0AD0
+
+#define DRV_NAME "nmi-wdt"
+
+#define NMI_WDT_TIMEOUT 5          /* 5 seconds */
+#define NMI_CHECK_TIMEOUT (4 * HZ) /* 4 seconds in jiffies */
+static int nmi_wdt_cpu = 1;
+
+static unsigned int timeout = NMI_WDT_TIMEOUT;
+static int nmi_active;
+
+static unsigned short wdoga_ctl;
+static unsigned int wdoga_cnt;
+static struct corelock_slot saved_corelock;
+static atomic_t nmi_touched[NR_CPUS];
+static struct timer_list ntimer;
+
+enum {
+       COREA_ENTER_NMI = 0,
+       COREA_EXIT_NMI,
+       COREB_EXIT_NMI,
+
+       NMI_EVENT_NR,
+};
+static unsigned long nmi_event __attribute__ ((__section__(".l2.bss")));
+
+/* we are in nmi, non-atomic bit ops is safe */
+static inline void set_nmi_event(int event)
+{
+       __set_bit(event, &nmi_event);
+}
+
+static inline void wait_nmi_event(int event)
+{
+       while (!test_bit(event, &nmi_event))
+               barrier();
+       __clear_bit(event, &nmi_event);
+}
+
+static inline void send_corea_nmi(void)
+{
+       wdoga_ctl = bfin_read_WDOGA_CTL();
+       wdoga_cnt = bfin_read_WDOGA_CNT();
+
+       bfin_write_WDOGA_CTL(WDEN_DISABLE);
+       bfin_write_WDOGA_CNT(0);
+       bfin_write_WDOGA_CTL(WDEN_ENABLE | ICTL_NMI);
+}
+
+static inline void restore_corea_nmi(void)
+{
+       bfin_write_WDOGA_CTL(WDEN_DISABLE);
+       bfin_write_WDOGA_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE);
+
+       bfin_write_WDOGA_CNT(wdoga_cnt);
+       bfin_write_WDOGA_CTL(wdoga_ctl);
+}
+
+static inline void save_corelock(void)
+{
+       saved_corelock = corelock;
+       corelock.lock = 0;
+}
+
+static inline void restore_corelock(void)
+{
+       corelock = saved_corelock;
+}
+
+
+static inline void nmi_wdt_keepalive(void)
+{
+       bfin_write_WDOGB_STAT(0);
+}
+
+static inline void nmi_wdt_stop(void)
+{
+       bfin_write_WDOGB_CTL(WDEN_DISABLE);
+}
+
+/* before calling this function, you must stop the WDT */
+static inline void nmi_wdt_clear(void)
+{
+       /* clear TRO bit, disable event generation */
+       bfin_write_WDOGB_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE);
+}
+
+static inline void nmi_wdt_start(void)
+{
+       bfin_write_WDOGB_CTL(WDEN_ENABLE | ICTL_NMI);
+}
+
+static inline int nmi_wdt_running(void)
+{
+       return ((bfin_read_WDOGB_CTL() & WDEN_MASK) != WDEN_DISABLE);
+}
+
+static inline int nmi_wdt_set_timeout(unsigned long t)
+{
+       u32 cnt, max_t, sclk;
+       int run;
+
+       sclk = get_sclk();
+       max_t = -1 / sclk;
+       cnt = t * sclk;
+       if (t > max_t) {
+               pr_warning("NMI: timeout value is too large\n");
+               return -EINVAL;
+       }
+
+       run = nmi_wdt_running();
+       nmi_wdt_stop();
+       bfin_write_WDOGB_CNT(cnt);
+       if (run)
+               nmi_wdt_start();
+
+       timeout = t;
+
+       return 0;
+}
+
+int check_nmi_wdt_touched(void)
+{
+       unsigned int this_cpu = smp_processor_id();
+       unsigned int cpu;
+
+       cpumask_t mask = cpu_online_map;
+
+       if (!atomic_read(&nmi_touched[this_cpu]))
+               return 0;
+
+       atomic_set(&nmi_touched[this_cpu], 0);
+
+       cpu_clear(this_cpu, mask);
+       for_each_cpu_mask(cpu, mask) {
+               invalidate_dcache_range((unsigned long)(&nmi_touched[cpu]),
+                               (unsigned long)(&nmi_touched[cpu]));
+               if (!atomic_read(&nmi_touched[cpu]))
+                       return 0;
+               atomic_set(&nmi_touched[cpu], 0);
+       }
+
+       return 1;
+}
+
+static void nmi_wdt_timer(unsigned long data)
+{
+       if (check_nmi_wdt_touched())
+               nmi_wdt_keepalive();
+
+       mod_timer(&ntimer, jiffies + NMI_CHECK_TIMEOUT);
+}
+
+static int __init init_nmi_wdt(void)
+{
+       nmi_wdt_set_timeout(timeout);
+       nmi_wdt_start();
+       nmi_active = true;
+
+       init_timer(&ntimer);
+       ntimer.function = nmi_wdt_timer;
+       ntimer.expires = jiffies + NMI_CHECK_TIMEOUT;
+       add_timer(&ntimer);
+
+       pr_info("nmi_wdt: initialized: timeout=%d sec\n", timeout);
+       return 0;
+}
+device_initcall(init_nmi_wdt);
+
+void touch_nmi_watchdog(void)
+{
+       atomic_set(&nmi_touched[smp_processor_id()], 1);
+}
+
+/* Suspend/resume support */
+#ifdef CONFIG_PM
+static int nmi_wdt_suspend(struct sys_device *dev, pm_message_t state)
+{
+       nmi_wdt_stop();
+       return 0;
+}
+
+static int nmi_wdt_resume(struct sys_device *dev)
+{
+       if (nmi_active)
+               nmi_wdt_start();
+       return 0;
+}
+
+static struct sysdev_class nmi_sysclass = {
+       .name           = DRV_NAME,
+       .resume         = nmi_wdt_resume,
+       .suspend        = nmi_wdt_suspend,
+};
+
+static struct sys_device device_nmi_wdt = {
+       .id     = 0,
+       .cls    = &nmi_sysclass,
+};
+
+static int __init init_nmi_wdt_sysfs(void)
+{
+       int error;
+
+       if (!nmi_active)
+               return 0;
+
+       error = sysdev_class_register(&nmi_sysclass);
+       if (!error)
+               error = sysdev_register(&device_nmi_wdt);
+       return error;
+}
+late_initcall(init_nmi_wdt_sysfs);
+
+#endif /* CONFIG_PM */
+
+
+asmlinkage notrace void do_nmi(struct pt_regs *fp)
+{
+       unsigned int cpu = smp_processor_id();
+       nmi_enter();
+
+       cpu_pda[cpu].__nmi_count += 1;
+
+       if (cpu == nmi_wdt_cpu) {
+               /* CoreB goes here first */
+
+               /* reload the WDOG_STAT */
+               nmi_wdt_keepalive();
+
+               /* clear nmi interrupt for CoreB */
+               nmi_wdt_stop();
+               nmi_wdt_clear();
+
+               /* trigger NMI interrupt of CoreA */
+               send_corea_nmi();
+
+               /* waiting CoreB to enter NMI */
+               wait_nmi_event(COREA_ENTER_NMI);
+
+               /* recover WDOGA's settings */
+               restore_corea_nmi();
+
+               save_corelock();
+
+               /* corelock is save/cleared, CoreA is dummping messages */
+
+               wait_nmi_event(COREA_EXIT_NMI);
+       } else {
+               /* OK, CoreA entered NMI */
+               set_nmi_event(COREA_ENTER_NMI);
+       }
+
+       pr_emerg("\nNMI Watchdog detected LOCKUP, dump for CPU %d\n", cpu);
+       dump_bfin_process(fp);
+       dump_bfin_mem(fp);
+       show_regs(fp);
+       dump_bfin_trace_buffer();
+       show_stack(current, (unsigned long *)fp);
+
+       if (cpu == nmi_wdt_cpu) {
+               pr_emerg("This fault is not recoverable, sorry!\n");
+
+               /* CoreA dump finished, restore the corelock */
+               restore_corelock();
+
+               set_nmi_event(COREB_EXIT_NMI);
+       } else {
+               /* CoreB dump finished, notice the CoreA we are done */
+               set_nmi_event(COREA_EXIT_NMI);
+
+               /* synchronize with CoreA */
+               wait_nmi_event(COREB_EXIT_NMI);
+       }
+
+       nmi_exit();
+}
index a351f97c87a3d24ef6553c4dd4af0d6784a60622..41a907596c70dbbd6999d193bd1aaba91ca17996 100644 (file)
@@ -21,6 +21,7 @@
 #include <asm/blackfin.h>
 #include <asm/time.h>
 #include <asm/gptimers.h>
 #include <asm/blackfin.h>
 #include <asm/time.h>
 #include <asm/gptimers.h>
+#include <asm/nmi.h>
 
 /* Accelerators for sched_clock()
  * convert from cycles(64bits) => nanoseconds (64bits)
 
 /* Accelerators for sched_clock()
  * convert from cycles(64bits) => nanoseconds (64bits)
@@ -309,6 +310,9 @@ irqreturn_t bfin_coretmr_interrupt(int irq, void *dev_id)
 
        smp_mb();
        evt->event_handler(evt);
 
        smp_mb();
        evt->event_handler(evt);
+
+       touch_nmi_watchdog();
+
        return IRQ_HANDLED;
 }
 
        return IRQ_HANDLED;
 }
 
index 0a0c088ead8ce1074c4aa6deeb5c1c78ddc6b552..cee62cf4acd429d07b4d84991c6e088f795a1eb4 100644 (file)
@@ -194,12 +194,28 @@ ENTRY(_evt_ivhw)
 ENDPROC(_evt_ivhw)
 
 /* Interrupt routine for evt2 (NMI).
 ENDPROC(_evt_ivhw)
 
 /* Interrupt routine for evt2 (NMI).
- * We don't actually use this, so just return.
  * For inner circle type details, please see:
  * http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:nmi
  */
 ENTRY(_evt_nmi)
  * For inner circle type details, please see:
  * http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:nmi
  */
 ENTRY(_evt_nmi)
+#ifndef CONFIG_NMI_WATCHDOG
 .weak _evt_nmi
 .weak _evt_nmi
+#else
+       /* Not take account of CPLBs, this handler will not return */
+       SAVE_ALL_SYS
+       r0 = sp;
+       r1 = retn;
+       [sp + PT_PC] = r1;
+       trace_buffer_save(p4,r5);
+
+       ANOMALY_283_315_WORKAROUND(p4, r5)
+
+       SP += -12;
+       call _do_nmi;
+       SP += 12;
+1:
+       jump 1b;
+#endif
        rtn;
 ENDPROC(_evt_nmi)
 
        rtn;
 ENDPROC(_evt_nmi)