ia64/xen-unstable

changeset 19429:891af2c54155

MCA interfaces between XEN/DOM0, let DOM0 know the MCA recovery action

Signed-off-by: Jiang, yunhong <yunhong.jiang@intel.com>
Signed-off-by: Ke, liping <liping.ke@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Mar 20 17:25:29 2009 +0000 (2009-03-20)
parents cc60defe5b96
children 0477f9061c8a
files xen/arch/x86/cpu/mcheck/x86_mca.h xen/include/public/arch-x86/xen-mca.h
line diff
     1.1 --- a/xen/arch/x86/cpu/mcheck/x86_mca.h	Fri Mar 20 17:24:53 2009 +0000
     1.2 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h	Fri Mar 20 17:25:29 2009 +0000
     1.3 @@ -87,6 +87,53 @@
     1.4  typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS);
     1.5  DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned);
     1.6  
     1.7 +/* Below interfaces are defined for MCA internal processing:
     1.8 + * a. pre_handler will be called early in MCA ISR context, mainly for early
     1.9 + *    need_reset detection for avoiding log missing. Also, it is used to judge
    1.10 + *    impacted DOMAIN if possible.
    1.11 + * b. mca_error_handler is actually a (error_action_index,
    1.12 + *    recovery_hanlder pointer) pair. The defined recovery_handler
    1.13 + *    performs the actual recovery operations such as page_offline, cpu_offline
    1.14 + *    in softIRQ context when the per_bank MCA error matching the corresponding
    1.15 + *    mca_code index. If pre_handler can't judge the impacted domain,
    1.16 + *    recovery_handler must figure it out.
    1.17 +*/
    1.18 +
    1.19 +/* MCA error has been recovered successfully by the recovery action*/
    1.20 +#define MCA_RECOVERED (0x1 < 0)
    1.21 +/* MCA error impact the specified DOMAIN in owner field below */
    1.22 +#define MCA_OWNER (0x1 < 1)
    1.23 +/* MCA error can't be recovered and need reset */
    1.24 +#define MCA_NEED_RESET (0x1 < 2)
    1.25 +/* MCA error need further actions in softIRQ context for recovery */
    1.26 +#define MCA_MORE_ACTION (0x1 < 3)
    1.27 +
    1.28 +struct mca_handle_result
    1.29 +{
    1.30 +    uint32_t result;
    1.31 +    /* Used one result & MCA_OWNER */
    1.32 +    domid_t owner;
    1.33 +    /* Used by mca_error_handler, result & MCA_RECOVRED */
    1.34 +    struct recovery_action *action;
    1.35 +};
    1.36 +
    1.37 +extern void (*mca_prehandler)( struct cpu_user_regs *regs,
    1.38 +                        struct mca_handle_result *result);
    1.39 +
    1.40 +struct mca_error_handler
    1.41 +{
    1.42 +    /* Assume corresponding recovery action could be uniquely
    1.43 +     * identified by mca_code. Otherwise, we might need to have
    1.44 +     * a seperate function to decode the corresponding actions
    1.45 +     * for the particular mca error later.
    1.46 +    */
    1.47 +    uint16_t mca_code;
    1.48 +    void (*recovery_handler)( struct mcinfo_bank *bank,
    1.49 +                    struct mcinfo_global *global,
    1.50 +                    struct mcinfo_extended *extension,
    1.51 +                    struct mca_handle_result *result);
    1.52 +};
    1.53 +
    1.54  /* Global variables */
    1.55  extern int mce_disabled;
    1.56  extern unsigned int nr_mce_banks;
     2.1 --- a/xen/include/public/arch-x86/xen-mca.h	Fri Mar 20 17:24:53 2009 +0000
     2.2 +++ b/xen/include/public/arch-x86/xen-mca.h	Fri Mar 20 17:25:29 2009 +0000
     2.3 @@ -104,6 +104,7 @@
     2.4  #define MC_TYPE_GLOBAL          0
     2.5  #define MC_TYPE_BANK            1
     2.6  #define MC_TYPE_EXTENDED        2
     2.7 +#define MC_TYPE_RECOVERY        3
     2.8  
     2.9  struct mcinfo_common {
    2.10      uint16_t type;      /* structure type */
    2.11 @@ -172,6 +173,68 @@ struct mcinfo_extended {
    2.12      struct mcinfo_msr mc_msr[10];
    2.13  };
    2.14  
    2.15 +/* Recovery Action flags. Giving recovery result information to DOM0 */
    2.16 +
    2.17 +/* Xen takes successful recovery action, the error is recovered */
    2.18 +#define REC_ACTION_RECOVERED (0x1 << 0)
    2.19 +/* No action is performed by XEN */
    2.20 +#define REC_ACTION_NONE (0x1 << 1)
    2.21 +/* It's possible DOM0 might take action ownership in some case */
    2.22 +#define REC_ACTION_NEED_RESET (0x1 << 2)
    2.23 +
    2.24 +/* Different Recovery Action types, if the action is performed successfully,
    2.25 + * REC_ACTION_RECOVERED flag will be returned.
    2.26 + */
    2.27 +
    2.28 +/* Page Offline Action */
    2.29 +#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
    2.30 +/* CPU offline Action */
    2.31 +#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
    2.32 +/* L3 cache disable Action */
    2.33 +#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
    2.34 +
    2.35 +/* Below interface used between XEN/DOM0 for passing XEN's recovery action 
    2.36 + * information to DOM0. 
    2.37 + * usage Senario: After offlining broken page, XEN might pass its page offline
    2.38 + * recovery action result to DOM0. DOM0 will save the information in 
    2.39 + * non-volatile memory for further proactive actions, such as offlining the
    2.40 + * easy broken page earlier when doing next reboot.
    2.41 +*/
    2.42 +struct page_offline_action
    2.43 +{
    2.44 +    /* Params for passing the offlined page number to DOM0 */
    2.45 +    uint64_t mfn;
    2.46 +    uint64_t status;
    2.47 +};
    2.48 +
    2.49 +struct cpu_offline_action
    2.50 +{
    2.51 +    /* Params for passing the identity of the offlined CPU to DOM0 */
    2.52 +    uint32_t mc_socketid;
    2.53 +    uint16_t mc_coreid;
    2.54 +    uint16_t mc_core_threadid;
    2.55 +};
    2.56 +
    2.57 +#define MAX_UNION_SIZE 16
    2.58 +struct mc_recovery
    2.59 +{
    2.60 +    uint16_t mc_bank; /* bank nr */
    2.61 +    uint8_t action_flags;
    2.62 +    uint8_t action_types;
    2.63 +    union {
    2.64 +        struct page_offline_action page_retire;
    2.65 +        struct cpu_offline_action cpu_offline;
    2.66 +        uint8_t pad[MAX_UNION_SIZE];
    2.67 +    } action_info;
    2.68 +};
    2.69 +
    2.70 +struct mcinfo_recovery
    2.71 +{
    2.72 +    struct mcinfo_common common;
    2.73 +    struct mc_recovery mc_action;
    2.74 +};
    2.75 +
    2.76 +
    2.77  #define MCINFO_HYPERCALLSIZE	1024
    2.78  #define MCINFO_MAXSIZE		768
    2.79