]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - drivers/edac/amd64_edac.c
amd64_edac: cleanup/complete NB MCE decoding
[net-next-2.6.git] / drivers / edac / amd64_edac.c
index c9b88d829701d70271c9e42e028095953f0a7bc8..5af87d44c80cd5a1bb4751c20d5ebe56fc6146e1 100644 (file)
@@ -2355,62 +2355,47 @@ static void amd64_decode_bus_error(struct mem_ctl_info *mci,
                                          "Error Overflow set");
 }
 
-int amd64_process_error_info(struct mem_ctl_info *mci,
-                            struct err_regs *regs,
-                            int handle_errors)
+void amd64_decode_nb_mce(struct mem_ctl_info *mci, struct err_regs *regs,
+                        int handle_errors)
 {
-       struct amd64_pvt *pvt;
-       u32 err_code, ext_ec;
-       int gart_tlb_error = 0;
-
-       pvt = mci->pvt_info;
+       struct amd64_pvt *pvt = mci->pvt_info;
+       int ecc;
+       u32 ec  = ERROR_CODE(regs->nbsl);
+       u32 xec = EXT_ERROR_CODE(regs->nbsl);
 
        if (!handle_errors)
-               return 1;
+               return;
 
-       debugf1("NorthBridge ERROR: mci(0x%p)\n", mci);
-       debugf1("  MC node(%d) Error-Address(0x%.8x-%.8x)\n",
-               pvt->mc_node_id, regs->nbeah, regs->nbeal);
-       debugf1("  nbsh(0x%.8x) nbsl(0x%.8x)\n",
-               regs->nbsh, regs->nbsl);
-       debugf1("  Valid Error=%s Overflow=%s\n",
-               (regs->nbsh & K8_NBSH_VALID_BIT) ? "True" : "False",
-               (regs->nbsh & K8_NBSH_OVERFLOW) ? "True" : "False");
-       debugf1("  Err Uncorrected=%s MCA Error Reporting=%s\n",
-               (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) ?
-                       "True" : "False",
-               (regs->nbsh & K8_NBSH_ERR_ENABLE) ?
-                       "True" : "False");
-       debugf1("  MiscErr Valid=%s ErrAddr Valid=%s PCC=%s\n",
-               (regs->nbsh & K8_NBSH_MISC_ERR_VALID) ?
-                       "True" : "False",
-               (regs->nbsh & K8_NBSH_VALID_ERROR_ADDR) ?
-                       "True" : "False",
-               (regs->nbsh & K8_NBSH_PCC) ?
-                       "True" : "False");
-       debugf1("  CECC=%s UECC=%s Found by Scruber=%s\n",
-               (regs->nbsh & K8_NBSH_CECC) ?
-                       "True" : "False",
-               (regs->nbsh & K8_NBSH_UECC) ?
-                       "True" : "False",
-               (regs->nbsh & K8_NBSH_ERR_SCRUBER) ?
-                       "True" : "False");
-       debugf1("  CORE0=%s CORE1=%s CORE2=%s CORE3=%s\n",
-               (regs->nbsh & K8_NBSH_CORE0) ? "True" : "False",
-               (regs->nbsh & K8_NBSH_CORE1) ? "True" : "False",
-               (regs->nbsh & K8_NBSH_CORE2) ? "True" : "False",
-               (regs->nbsh & K8_NBSH_CORE3) ? "True" : "False");
+       pr_emerg(" Northbridge ERROR, mc node %d", pvt->mc_node_id);
 
+       /*
+        * F10h, revD can disable ErrCpu[3:0] so check that first and also the
+        * value encoding has changed so interpret those differently
+        */
+       if ((boot_cpu_data.x86 == 0x10) &&
+           (boot_cpu_data.x86_model > 8)) {
+               if (regs->nbsh & K8_NBSH_ERR_CPU_VAL)
+                       pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf));
+       } else {
+               pr_cont(", core: %d\n", ilog2((regs->nbsh & 0xf)));
+       }
 
-       err_code = ERROR_CODE(regs->nbsl);
+       pr_emerg(" Error: %sorrected",
+                ((regs->nbsh & K8_NBSH_UC_ERR) ? "Unc" : "C"));
+       pr_cont(", Report Error: %s",
+                ((regs->nbsh & K8_NBSH_ERR_EN) ? "yes" : "no"));
+       pr_cont(", MiscV: %svalid, CPU context corrupt: %s",
+               ((regs->nbsh & K8_NBSH_MISCV) ? "" : "In"),
+               ((regs->nbsh & K8_NBSH_PCC)   ? "yes" : "no"));
 
-       /* Determine which error type:
-        *      1) GART errors - non-fatal, developmental events
-        *      2) MEMORY errors
-        *      3) BUS errors
-        *      4) Unknown error
-        */
-       if (TLB_ERROR(err_code)) {
+       /* do the two bits[14:13] together */
+       ecc = regs->nbsh & (0x3 << 13);
+       if (ecc)
+               pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
+
+       pr_cont("\n");
+
+       if (TLB_ERROR(ec)) {
                /*
                 * GART errors are intended to help graphics driver developers
                 * to detect bad GART PTEs. It is recommended by AMD to disable
@@ -2423,52 +2408,34 @@ int amd64_process_error_info(struct mem_ctl_info *mci,
                 * [1] section 13.10.1 on BIOS and Kernel Developers Guide for
                 *     AMD NPT family 0Fh processors
                 */
-               if (report_gart_errors == 0)
-                       return 1;
-
-               /*
-                * Only if GART error reporting is requested should we generate
-                * any logs.
-                */
-               gart_tlb_error = 1;
+               if (!report_gart_errors)
+                       return;
 
-               debugf1("GART TLB error\n");
+               pr_emerg("GART TLB error\n");
                amd64_decode_gart_tlb_error(mci, regs);
-       } else if (MEM_ERROR(err_code)) {
-               debugf1("Memory/Cache error\n");
+       } else if (MEM_ERROR(ec)) {
+               pr_emerg("Memory/Cache error\n");
                amd64_decode_mem_cache_error(mci, regs);
-       } else if (BUS_ERROR(err_code)) {
-               debugf1("Bus (Link/DRAM) error\n");
+       } else if (BUS_ERROR(ec)) {
+               pr_emerg("Bus (Link/DRAM) error\n");
                amd64_decode_bus_error(mci, regs);
        } else {
                /* shouldn't reach here! */
                amd64_mc_printk(mci, KERN_WARNING,
                             "%s(): unknown MCE error 0x%x\n", __func__,
-                            err_code);
+                            ec);
        }
 
-       ext_ec = EXT_ERROR_CODE(regs->nbsl);
-       amd64_mc_printk(mci, KERN_ERR,
-               "ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]);
+       pr_emerg("%s.\n", EXT_ERR_MSG(xec));
 
        /*
         * Check the UE bit of the NB status high register, if set generate some
         * logs. If NOT a GART error, then process the event as a NO-INFO event.
         * If it was a GART error, skip that process.
         */
-       if (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) {
-               amd64_mc_printk(mci, KERN_CRIT, "uncorrected error\n");
-               if (!gart_tlb_error)
-                       edac_mc_handle_ue_no_info(mci, "UE bit is set\n");
-       }
-
-       if (regs->nbsh & K8_NBSH_PCC)
-               amd64_mc_printk(mci, KERN_CRIT,
-                       "PCC (processor context corrupt) set\n");
-
-       return 1;
+       if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
+               edac_mc_handle_ue_no_info(mci, "UE bit is set");
 }
-EXPORT_SYMBOL_GPL(amd64_process_error_info);
 
 /*
  * The main polling 'check' function, called FROM the edac core to perform the
@@ -2479,7 +2446,7 @@ static void amd64_check(struct mem_ctl_info *mci)
        struct err_regs regs;
 
        if (amd64_get_error_info(mci, &regs))
-               amd64_process_error_info(mci, &regs, 1);
+               amd64_decode_nb_mce(mci, &regs, 1);
 }
 
 /*