patch-2.4.25 linux-2.4.25/arch/ia64/kernel/mca.c

Next file: linux-2.4.25/arch/ia64/kernel/mca_asm.S
Previous file: linux-2.4.25/arch/ia64/kernel/ivt.S
Back to the patch index
Back to the overall index

diff -urN linux-2.4.24/arch/ia64/kernel/mca.c linux-2.4.25/arch/ia64/kernel/mca.c
@@ -36,6 +36,10 @@
  *                      SAL 3.0 spec.
  * 00/03/29 C. Fleckenstein  Fixed PAL/SAL update issues, began MCA bug fixes, logging issues,
  *                           added min save state dump, added INIT handler.
+ *
+ * 2003-12-08 Keith Owens <kaos@sgi.com>
+ *            smp_call_function() must not be called from interrupt context (can
+ *            deadlock on tasklist_lock).  Use keventd to call smp_call_function().
  */
 #include <linux/config.h>
 #include <linux/types.h>
@@ -50,6 +54,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/smp.h>
+#include <linux/tqueue.h>
 
 #include <asm/delay.h>
 #include <asm/machvec.h>
@@ -80,19 +85,19 @@
 u64				ia64_mca_stackframe[32];
 u64				ia64_mca_bspstore[1024];
 u64				ia64_init_stack[INIT_TASK_SIZE/8] __attribute__((aligned(16)));
-u64				ia64_mca_sal_data_area[1356];
-u64				ia64_tlb_functional;
 u64				ia64_os_mca_recovery_successful;
-/* TODO: need to assign min-state structure to UC memory */
-u64				ia64_mca_min_state_save_info[MIN_STATE_AREA_SIZE] __attribute__((aligned(512)));
+u64				ia64_mca_serialize;
 static void			ia64_mca_wakeup_ipi_wait(void);
 static void			ia64_mca_wakeup(int cpu);
 static void			ia64_mca_wakeup_all(void);
 static void			ia64_log_init(int);
 extern void			ia64_monarch_init_handler (void);
 extern void			ia64_slave_init_handler (void);
+static u64			ia64_log_get(int sal_info_type, u8 **buffer);
 extern struct hw_interrupt_type	irq_type_iosapic_level;
 
+struct ia64_mca_tlb_info ia64_mca_tlb_list[NR_CPUS];
+
 static struct irqaction cmci_irqaction = {
 	.handler =	ia64_mca_cmc_int_handler,
 	.flags =	SA_INTERRUPT,
@@ -151,7 +156,9 @@
  */
 static int cpe_poll_enabled = 1;
 
-extern void salinfo_log_wakeup(int);
+extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size);
+
+static struct tq_struct	cmc_disable_tq, cmc_enable_tq;
 
 /*
  *  ia64_mca_log_sal_error_record
@@ -166,11 +173,13 @@
 int
 ia64_mca_log_sal_error_record(int sal_info_type, int called_from_init)
 {
-	int platform_err = 0;
+	u8 *buffer;
+	u64 size;
+	int platform_err;
 
-	/* Get the MCA error record */
-	if (!ia64_log_get(sal_info_type, (prfunc_t)printk))
-		return platform_err;		/* no record retrieved */
+	size = ia64_log_get(sal_info_type, &buffer);
+	if (!size)
+		return 0;
 
 	/* TODO:
 	 * 1. analyze error logs to determine recoverability
@@ -178,8 +187,11 @@
 	 * 3. set ia64_os_mca_recovery_successful flag, if applicable
 	 */
 
-	salinfo_log_wakeup(sal_info_type);
+	salinfo_log_wakeup(sal_info_type, buffer, size);
 	platform_err = ia64_log_print(sal_info_type, (prfunc_t)printk);
+	/* Clear logs from corrected errors in case there's no user-level logger */
+	if (sal_info_type == SAL_INFO_TYPE_CPE || sal_info_type == SAL_INFO_TYPE_CMC)
+		ia64_sal_clear_state_info(sal_info_type);
 
 	return platform_err;
 }
@@ -462,26 +474,6 @@
 #endif /* PLATFORM_MCA_HANDLERS */
 
 /*
- * routine to process and prepare to dump min_state_save
- * information for debugging purposes.
- */
-void
-ia64_process_min_state_save (pal_min_state_area_t *pmss)
-{
-	int i, max = MIN_STATE_AREA_SIZE;
-	u64 *tpmss_ptr = (u64 *)pmss;
-	u64 *return_min_state_ptr = ia64_mca_min_state_save_info;
-
-	for (i=0;i<max;i++) {
-
-		/* copy min-state register info for eventual return to PAL */
-		*return_min_state_ptr++ = *tpmss_ptr;
-
-		tpmss_ptr++;  /* skip to next entry */
-	}
-}
-
-/*
  * ia64_mca_cmc_vector_setup
  *
  *  Setup the corrected machine check vector register in the processor and
@@ -620,6 +612,36 @@
 }
 
 /*
+ * ia64_mca_cmc_vector_disable_keventd
+ *
+ * Called via keventd (smp_call_function() is not safe in interrupt context) to
+ * disable the cmc interrupt vector.
+ *
+ * Note: needs preempt_disable() if you apply the preempt patch to 2.4.
+ */
+static void
+ia64_mca_cmc_vector_disable_keventd(void *unused)
+{
+	ia64_mca_cmc_vector_disable(NULL);
+	smp_call_function(ia64_mca_cmc_vector_disable, NULL, 1, 0);
+}
+
+/*
+ * ia64_mca_cmc_vector_enable_keventd
+ *
+ * Called via keventd (smp_call_function() is not safe in interrupt context) to
+ * enable the cmc interrupt vector.
+ *
+ * Note: needs preempt_disable() if you apply the preempt patch to 2.4.
+ */
+static void
+ia64_mca_cmc_vector_enable_keventd(void *unused)
+{
+	smp_call_function(ia64_mca_cmc_vector_enable, NULL, 1, 0);
+	ia64_mca_cmc_vector_enable(NULL);
+}
+
+/*
  * ia64_mca_init
  *
  *  Do all the system level mca specific initialization.
@@ -652,6 +674,9 @@
 
 	IA64_MCA_DEBUG("ia64_mca_init: begin\n");
 
+	INIT_TQUEUE(&cmc_disable_tq, ia64_mca_cmc_vector_disable_keventd, NULL);
+	INIT_TQUEUE(&cmc_enable_tq, ia64_mca_cmc_vector_enable_keventd, NULL);
+
 	/* initialize recovery success indicator */
 	ia64_os_mca_recovery_successful = 0;
 
@@ -834,7 +859,7 @@
 			irr = ia64_get_irr3();
 			break;
 		}
-	} while (!(irr & (1 << irr_bit))) ;
+	} while (!(irr & (1UL << irr_bit))) ;
 }
 
 /*
@@ -950,6 +975,9 @@
 void
 ia64_return_to_sal_check(void)
 {
+	pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
+		&ia64_sal_to_os_handoff_state.proc_state_param;
+
 	/* Copy over some relevant stuff from the sal_to_os_mca_handoff
 	 * so that it can be used at the time of os_mca_to_sal_handoff
 	 */
@@ -959,15 +987,22 @@
 	ia64_os_to_sal_handoff_state.imots_sal_check_ra =
 		ia64_sal_to_os_handoff_state.imsto_sal_check_ra;
 
-	/* Cold Boot for uncorrectable MCA */
-	ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT;
+	/*
+	 * Did we correct the error? At the moment the only error that
+	 * we fix is a TLB error, if any other kind of error occurred
+	 * we must reboot.
+	 */
+	if (psp->cc == 1 && psp->bc == 1 && psp->rc == 1 && psp->uc == 1)
+		ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT;
+	else
+		ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED;
 
 	/* Default = tell SAL to return to same context */
 	ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT;
 
-	/* Register pointer to new min state values */
 	ia64_os_to_sal_handoff_state.imots_new_min_state =
-		ia64_mca_min_state_save_info;
+		(u64 *)ia64_sal_to_os_handoff_state.pal_min_state;
+
 }
 
 /*
@@ -1056,14 +1091,7 @@
 
 			cmc_polling_enabled = 1;
 			spin_unlock(&cmc_history_lock);
-
-			/*
-			 * We rely on the local_irq_enable() above so
-			 * that this can't deadlock.
-			 */
-			ia64_mca_cmc_vector_disable(NULL);
-
-			smp_call_function(ia64_mca_cmc_vector_disable, NULL, 1, 0);
+			schedule_task(&cmc_disable_tq);
 
 			/*
 			 * Corrected errors will still be corrected, but
@@ -1157,19 +1185,7 @@
 		if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) {
 
 			printk(KERN_WARNING "%s: Returning to interrupt driven CMC handler\n", __FUNCTION__);
-
-			/*
-			 * The cmc interrupt handler enabled irqs, so
-			 * this can't deadlock.
-			 */
-			smp_call_function(ia64_mca_cmc_vector_enable, NULL, 1, 0);
-
-			/*
-			 * Turn off interrupts before re-enabling the
-			 * cmc vector locally.  Make sure we get out.
-			 */
-			local_irq_disable();
-			ia64_mca_cmc_vector_enable(NULL);
+			schedule_task(&cmc_enable_tq);
 			cmc_polling_enabled = 0;
 
 		} else {
@@ -1416,12 +1432,12 @@
  *	Get the current MCA log from SAL and copy it into the OS log buffer.
  *
  *  Inputs  :   info_type   (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
- *              prfunc      (fn ptr of log output function)
  *  Outputs :   size        (total record length)
+ *              *buffer     (ptr to error record)
  *
  */
 u64
-ia64_log_get(int sal_info_type, prfunc_t prfunc)
+ia64_log_get(int sal_info_type, u8 **buffer)
 {
 	sal_log_record_header_t     *log_buffer;
 	u64                         total_len = 0;
@@ -1439,6 +1455,7 @@
 		IA64_LOG_UNLOCK(sal_info_type);
 		IA64_MCA_DEBUG("ia64_log_get: SAL error record type %d retrieved. "
 			       "Record length = %ld\n", sal_info_type, total_len);
+		*buffer = (u8 *) log_buffer;
 		return total_len;
 	} else {
 		IA64_LOG_UNLOCK(sal_info_type);
@@ -1483,7 +1500,7 @@
 void
 ia64_log_rec_header_print (sal_log_record_header_t *lh, prfunc_t prfunc)
 {
-	prfunc("+Err Record ID: %d    SAL Rev: %2x.%02x\n", lh->id,
+	prfunc("+Err Record ID: %ld    SAL Rev: %2x.%02x\n", lh->id,
 			lh->revision.major, lh->revision.minor);
 	prfunc("+Time: %02x/%02x/%02x%02x %02x:%02x:%02x    Severity %d\n",
 			lh->timestamp.slh_month, lh->timestamp.slh_day,
@@ -1606,13 +1623,13 @@
 	if (info->dl)
 		prfunc(" Line: Data,");
 	prfunc(" Operation: %s,", pal_cache_op[info->op]);
-	if (info->wv)
+	if (info->wiv)
 		prfunc(" Way: %d,", info->way);
 	if (cache_check_info->valid.target_identifier)
 		/* Hope target address is saved in target_identifier */
 		if (info->tv)
 			prfunc(" Target Addr: 0x%lx,", target_addr);
-	if (info->mc)
+	if (info->mcc)
 		prfunc(" MC: Corrected");
 	prfunc("\n");
 }
@@ -1648,13 +1665,13 @@
 		prfunc("  Failure: Data Translation Cache");
 	if (info->itr) {
 		prfunc("  Failure: Instruction Translation Register");
-		prfunc(" ,Slot: %d", info->tr_slot);
+		prfunc(" ,Slot: %ld", info->tr_slot);
 	}
 	if (info->dtr) {
 		prfunc("  Failure: Data Translation Register");
-		prfunc(" ,Slot: %d", info->tr_slot);
+		prfunc(" ,Slot: %ld", info->tr_slot);
 	}
-	if (info->mc)
+	if (info->mcc)
 		prfunc(" ,MC: Corrected");
 	prfunc("\n");
 }
@@ -1700,7 +1717,7 @@
 		prfunc(" ,Error: Internal");
 	if (info->eb)
 		prfunc(" ,Error: External");
-	if (info->mc)
+	if (info->mcc)
 		prfunc(" ,MC: Corrected");
 	if (info->tv)
 		prfunc(" ,Target Address: 0x%lx", targ_addr);
@@ -2148,9 +2165,6 @@
 	if (slpi->valid.psi_static_struct) {
 		spsi = (sal_processor_static_info_t *)p_data;
 
-		/* copy interrupted context PAL min-state info */
-		ia64_process_min_state_save(&spsi->min_state_area);
-
 		/* Print branch register contents if valid */
 		if (spsi->valid.br)
 			ia64_log_processor_regs_print(spsi->br, 8, "Branch", "br",
@@ -2376,7 +2390,8 @@
 		ia64_log_rec_header_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc);
 		break;
 	      case SAL_INFO_TYPE_INIT:
-		prfunc("+MCA INIT ERROR LOG (UNIMPLEMENTED)\n");
+		prfunc("+CPU %d: SAL log contains INIT error record\n", smp_processor_id());
+		ia64_log_rec_header_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc);
 		break;
 	      case SAL_INFO_TYPE_CMC:
 		prfunc("+BEGIN HARDWARE ERROR STATE AT CMC\n");

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)