Skip to content

Commit 0e66f05

Browse files
Ganesh Goudarsmb49
authored andcommitted
powerpc/pseries: Avoid using addr_to_pfn in real mode
BugLink: https://bugs.launchpad.net/bugs/1902137 [ Upstream commit 4ff753f ] When an UE or memory error exception is encountered the MCE handler tries to find the pfn using addr_to_pfn() which takes effective address as an argument, later pfn is used to poison the page where memory error occurred, recent rework in this area made addr_to_pfn to run in real mode, which can be fatal as it may try to access memory outside RMO region. Have two helper functions to separate things to be done in real mode and virtual mode without changing any functionality. This also fixes the following error as the use of addr_to_pfn is now moved to virtual mode. Without this change following kernel crash is seen on hitting UE. [ 485.128036] Oops: Kernel access of bad area, sig: 11 [#1] [ 485.128040] LE SMP NR_CPUS=2048 NUMA pSeries [ 485.128047] Modules linked in: [ 485.128067] CPU: 15 PID: 6536 Comm: insmod Kdump: loaded Tainted: G OE 5.7.0 #22 [ 485.128074] NIP: c00000000009b24c LR: c0000000000398d8 CTR: c000000000cd57c0 [ 485.128078] REGS: c000000003f1f970 TRAP: 0300 Tainted: G OE (5.7.0) [ 485.128082] MSR: 8000000000001003 <SF,ME,RI,LE> CR: 28008284 XER: 00000001 [ 485.128088] CFAR: c00000000009b190 DAR: c0000001fab00000 DSISR: 40000000 IRQMASK: 1 [ 485.128088] GPR00: 0000000000000001 c000000003f1fbf0 c000000001634300 0000b0fa01000000 [ 485.128088] GPR04: d000000002220000 0000000000000000 00000000fab00000 0000000000000022 [ 485.128088] GPR08: c0000001fab00000 0000000000000000 c0000001fab00000 c000000003f1fc14 [ 485.128088] GPR12: 0000000000000008 c000000003ff5880 d000000002100008 0000000000000000 [ 485.128088] GPR16: 000000000000ff20 000000000000fff1 000000000000fff2 d0000000021a1100 [ 485.128088] GPR20: d000000002200000 c00000015c893c50 c000000000d49b28 c00000015c893c50 [ 485.128088] GPR24: d0000000021a0d08 c0000000014e5da8 d0000000021a0818 000000000000000a [ 485.128088] GPR28: 0000000000000008 000000000000000a c0000000017e2970 000000000000000a [ 485.128125] NIP [c00000000009b24c] __find_linux_pte+0x11c/0x310 [ 485.128130] LR [c0000000000398d8] addr_to_pfn+0x138/0x170 [ 485.128133] Call Trace: [ 485.128135] Instruction dump: [ 485.128138] 3929ffff 7d4a3378 7c883c36 7d2907b4 794a1564 7d294038 794af082 3900ffff [ 485.128144] 79291f24 790af00e 78e70020 7d095214 <7c69502a> 2fa30000 419e011c 70690040 [ 485.128152] ---[ end trace d34b27e29ae0e340 ]--- Fixes: 9ca766f ("powerpc/64s/pseries: machine check convert to use common event code") Signed-off-by: Ganesh Goudar <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Sasha Levin <[email protected]> Signed-off-by: Kamal Mostafa <[email protected]> Signed-off-by: Ian May <[email protected]>
1 parent 0250019 commit 0e66f05

File tree

1 file changed

+69
-49
lines changed
  • arch/powerpc/platforms/pseries

1 file changed

+69
-49
lines changed

arch/powerpc/platforms/pseries/ras.c

Lines changed: 69 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -521,18 +521,55 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
521521
return 0; /* need to perform reset */
522522
}
523523

524+
static int mce_handle_err_realmode(int disposition, u8 error_type)
525+
{
526+
#ifdef CONFIG_PPC_BOOK3S_64
527+
if (disposition == RTAS_DISP_NOT_RECOVERED) {
528+
switch (error_type) {
529+
case MC_ERROR_TYPE_SLB:
530+
case MC_ERROR_TYPE_ERAT:
531+
/*
532+
* Store the old slb content in paca before flushing.
533+
* Print this when we go to virtual mode.
534+
* There are chances that we may hit MCE again if there
535+
* is a parity error on the SLB entry we trying to read
536+
* for saving. Hence limit the slb saving to single
537+
* level of recursion.
538+
*/
539+
if (local_paca->in_mce == 1)
540+
slb_save_contents(local_paca->mce_faulty_slbs);
541+
flush_and_reload_slb();
542+
disposition = RTAS_DISP_FULLY_RECOVERED;
543+
break;
544+
default:
545+
break;
546+
}
547+
} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
548+
/* Platform corrected itself but could be degraded */
549+
pr_err("MCE: limited recovery, system may be degraded\n");
550+
disposition = RTAS_DISP_FULLY_RECOVERED;
551+
}
552+
#endif
553+
return disposition;
554+
}
524555

525-
static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
556+
static int mce_handle_err_virtmode(struct pt_regs *regs,
557+
struct rtas_error_log *errp,
558+
struct pseries_mc_errorlog *mce_log,
559+
int disposition)
526560
{
527561
struct mce_error_info mce_err = { 0 };
528-
unsigned long eaddr = 0, paddr = 0;
529-
struct pseries_errorlog *pseries_log;
530-
struct pseries_mc_errorlog *mce_log;
531-
int disposition = rtas_error_disposition(errp);
532562
int initiator = rtas_error_initiator(errp);
533563
int severity = rtas_error_severity(errp);
564+
unsigned long eaddr = 0, paddr = 0;
534565
u8 error_type, err_sub_type;
535566

567+
if (!mce_log)
568+
goto out;
569+
570+
error_type = mce_log->error_type;
571+
err_sub_type = rtas_mc_error_sub_type(mce_log);
572+
536573
if (initiator == RTAS_INITIATOR_UNKNOWN)
537574
mce_err.initiator = MCE_INITIATOR_UNKNOWN;
538575
else if (initiator == RTAS_INITIATOR_CPU)
@@ -571,18 +608,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
571608
mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
572609
mce_err.error_class = MCE_ECLASS_UNKNOWN;
573610

574-
if (!rtas_error_extended(errp))
575-
goto out;
576-
577-
pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
578-
if (pseries_log == NULL)
579-
goto out;
580-
581-
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
582-
error_type = mce_log->error_type;
583-
err_sub_type = rtas_mc_error_sub_type(mce_log);
584-
585-
switch (mce_log->error_type) {
611+
switch (error_type) {
586612
case MC_ERROR_TYPE_UE:
587613
mce_err.error_type = MCE_ERROR_TYPE_UE;
588614
mce_common_process_ue(regs, &mce_err);
@@ -682,37 +708,31 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
682708
mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
683709
break;
684710
}
711+
out:
712+
save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
713+
&mce_err, regs->nip, eaddr, paddr);
714+
return disposition;
715+
}
685716

686-
#ifdef CONFIG_PPC_BOOK3S_64
687-
if (disposition == RTAS_DISP_NOT_RECOVERED) {
688-
switch (error_type) {
689-
case MC_ERROR_TYPE_SLB:
690-
case MC_ERROR_TYPE_ERAT:
691-
/*
692-
* Store the old slb content in paca before flushing.
693-
* Print this when we go to virtual mode.
694-
* There are chances that we may hit MCE again if there
695-
* is a parity error on the SLB entry we trying to read
696-
* for saving. Hence limit the slb saving to single
697-
* level of recursion.
698-
*/
699-
if (local_paca->in_mce == 1)
700-
slb_save_contents(local_paca->mce_faulty_slbs);
701-
flush_and_reload_slb();
702-
disposition = RTAS_DISP_FULLY_RECOVERED;
703-
break;
704-
default:
705-
break;
706-
}
707-
} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
708-
/* Platform corrected itself but could be degraded */
709-
printk(KERN_ERR "MCE: limited recovery, system may "
710-
"be degraded\n");
711-
disposition = RTAS_DISP_FULLY_RECOVERED;
712-
}
713-
#endif
717+
static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
718+
{
719+
struct pseries_errorlog *pseries_log;
720+
struct pseries_mc_errorlog *mce_log = NULL;
721+
int disposition = rtas_error_disposition(errp);
722+
u8 error_type;
723+
724+
if (!rtas_error_extended(errp))
725+
goto out;
726+
727+
pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
728+
if (!pseries_log)
729+
goto out;
730+
731+
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
732+
error_type = mce_log->error_type;
733+
734+
disposition = mce_handle_err_realmode(disposition, error_type);
714735

715-
out:
716736
/*
717737
* Enable translation as we will be accessing per-cpu variables
718738
* in save_mce_event() which may fall outside RMO region, also
@@ -723,10 +743,10 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
723743
* Note: All the realmode handling like flushing SLB entries for
724744
* SLB multihit is done by now.
725745
*/
746+
out:
726747
mtmsr(mfmsr() | MSR_IR | MSR_DR);
727-
save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
728-
&mce_err, regs->nip, eaddr, paddr);
729-
748+
disposition = mce_handle_err_virtmode(regs, errp, mce_log,
749+
disposition);
730750
return disposition;
731751
}
732752

0 commit comments

Comments
 (0)