--- /dev/null
+#!/usr/bin/env python3
+
+import argparse
+import copy
+import lark
+import os
+import re
+
+
+T = {
+ # translating qemu -> libvirt cpu vendor names
+ "CPUID_VENDOR_AMD": "AMD",
+ "CPUID_VENDOR_INTEL": "Intel",
+ "CPUID_VENDOR_HYGON": "Hygon",
+
+ # translating qemu -> libvirt cpu feature names
+ "CPUID_6_EAX_ARAT": "arat",
+ "CPUID_7_0_EBX_ADX": "adx",
+ "CPUID_7_0_EBX_AVX2": "avx2",
+ "CPUID_7_0_EBX_AVX512BW": "avx512bw",
+ "CPUID_7_0_EBX_AVX512CD": "avx512cd",
+ "CPUID_7_0_EBX_AVX512DQ": "avx512dq",
+ "CPUID_7_0_EBX_AVX512ER": "avx512er",
+ "CPUID_7_0_EBX_AVX512F": "avx512f",
+ "CPUID_7_0_EBX_AVX512PF": "avx512pf",
+ "CPUID_7_0_EBX_AVX512VL": "avx512vl",
+ "CPUID_7_0_EBX_BMI1": "bmi1",
+ "CPUID_7_0_EBX_BMI2": "bmi2",
+ "CPUID_7_0_EBX_CLFLUSHOPT": "clflushopt",
+ "CPUID_7_0_EBX_CLWB": "clwb",
+ "CPUID_7_0_EBX_ERMS": "erms",
+ "CPUID_7_0_EBX_FSGSBASE": "fsgsbase",
+ "CPUID_7_0_EBX_HLE": "hle",
+ "CPUID_7_0_EBX_INVPCID": "invpcid",
+ "CPUID_7_0_EBX_MPX": "mpx",
+ "CPUID_7_0_EBX_RDSEED": "rdseed",
+ "CPUID_7_0_EBX_RTM": "rtm",
+ "CPUID_7_0_EBX_SHA_NI": "sha-ni",
+ "CPUID_7_0_EBX_SMAP": "smap",
+ "CPUID_7_0_EBX_SMEP": "smep",
+ "CPUID_7_0_ECX_AVX512BITALG": "avx512bitalg",
+ "CPUID_7_0_ECX_AVX512_VBMI2": "avx512vbmi2",
+ "CPUID_7_0_ECX_AVX512_VBMI": "avx512vbmi",
+ "CPUID_7_0_ECX_AVX512VNNI": "avx512vnni",
+ "CPUID_7_0_ECX_AVX512_VPOPCNTDQ": "avx512-vpopcntdq",
+ "CPUID_7_0_ECX_CLDEMOTE": "cldemote",
+ "CPUID_7_0_ECX_GFNI": "gfni",
+ "CPUID_7_0_ECX_LA57": "la57",
+ "CPUID_7_0_ECX_MOVDIR64B": "movdir64b",
+ "CPUID_7_0_ECX_MOVDIRI": "movdiri",
+ "CPUID_7_0_ECX_PKU": "pku",
+ "CPUID_7_0_ECX_RDPID": "rdpid",
+ "CPUID_7_0_ECX_UMIP": "umip",
+ "CPUID_7_0_ECX_VAES": "vaes",
+ "CPUID_7_0_ECX_VPCLMULQDQ": "vpclmulqdq",
+ "CPUID_7_0_EDX_ARCH_CAPABILITIES": "arch-capabilities",
+ "CPUID_7_0_EDX_AVX512_4FMAPS": "avx512-4fmaps",
+ "CPUID_7_0_EDX_AVX512_4VNNIW": "avx512-4vnniw",
+ "CPUID_7_0_EDX_CORE_CAPABILITY": "core-capability",
+ "CPUID_7_0_EDX_SPEC_CTRL": "spec-ctrl",
+ "CPUID_7_0_EDX_SPEC_CTRL_SSBD": "ssbd",
+ "CPUID_7_0_EDX_STIBP": "stibp",
+ "CPUID_7_1_EAX_AVX512_BF16": "avx512-bf16",
+ "CPUID_8000_0008_EBX_CLZERO": "clzero",
+ "CPUID_8000_0008_EBX_IBPB": "ibpb",
+ "CPUID_8000_0008_EBX_STIBP": "amd-stibp",
+ "CPUID_8000_0008_EBX_WBNOINVD": "wbnoinvd",
+ "CPUID_8000_0008_EBX_XSAVEERPTR": "xsaveerptr",
+ "CPUID_ACPI": "acpi",
+ "CPUID_APIC": "apic",
+ "CPUID_CLFLUSH": "clflush",
+ "CPUID_CMOV": "cmov",
+ "CPUID_CX8": "cx8",
+ "CPUID_DE": "de",
+ "CPUID_EXT2_3DNOW": "3dnow",
+ "CPUID_EXT2_3DNOWEXT": "3dnowext",
+ "CPUID_EXT2_FFXSR": "fxsr_opt",
+ "CPUID_EXT2_LM": "lm",
+ "CPUID_EXT2_MMXEXT": "mmxext",
+ "CPUID_EXT2_NX": "nx",
+ "CPUID_EXT2_PDPE1GB": "pdpe1gb",
+ "CPUID_EXT2_RDTSCP": "rdtscp",
+ "CPUID_EXT2_SYSCALL": "syscall",
+ "CPUID_EXT3_3DNOWPREFETCH": "3dnowprefetch",
+ "CPUID_EXT3_ABM": "abm",
+ "CPUID_EXT3_CR8LEG": "cr8legacy",
+ "CPUID_EXT3_FMA4": "fma4",
+ "CPUID_EXT3_LAHF_LM": "lahf_lm",
+ "CPUID_EXT3_MISALIGNSSE": "misalignsse",
+ "CPUID_EXT3_OSVW": "osvw",
+ "CPUID_EXT3_PERFCORE": "perfctr_core",
+ "CPUID_EXT3_SSE4A": "sse4a",
+ "CPUID_EXT3_SVM": "svm",
+ "CPUID_EXT3_TBM": "tbm",
+ "CPUID_EXT3_XOP": "xop",
+ "CPUID_EXT_AES": "aes",
+ "CPUID_EXT_AVX": "avx",
+ "CPUID_EXT_CX16": "cx16",
+ "CPUID_EXT_F16C": "f16c",
+ "CPUID_EXT_FMA": "fma",
+ "CPUID_EXT_MOVBE": "movbe",
+ "CPUID_EXT_PCID": "pcid",
+ "CPUID_EXT_PCLMULQDQ": "pclmuldq",
+ "CPUID_EXT_POPCNT": "popcnt",
+ "CPUID_EXT_RDRAND": "rdrand",
+ "CPUID_EXT_SSE3": "pni",
+ "CPUID_EXT_SSE41": "sse4.1",
+ "CPUID_EXT_SSE42": "sse4.2",
+ "CPUID_EXT_SSSE3": "ssse3",
+ "CPUID_EXT_TSC_DEADLINE_TIMER": "tsc-deadline",
+ "CPUID_EXT_X2APIC": "x2apic",
+ "CPUID_EXT_XSAVE": "xsave",
+ "CPUID_FP87": "fpu",
+ "CPUID_FXSR": "fxsr",
+ "CPUID_MCA": "mca",
+ "CPUID_MCE": "mce",
+ "CPUID_MMX": "mmx",
+ "CPUID_MSR": "msr",
+ "CPUID_MTRR": "mtrr",
+ "CPUID_PAE": "pae",
+ "CPUID_PAT": "pat",
+ "CPUID_PGE": "pge",
+ "CPUID_PSE36": "pse36",
+ "CPUID_PSE": "pse",
+ "CPUID_SEP": "sep",
+ "CPUID_SSE2": "sse2",
+ "CPUID_SSE": "sse",
+ "CPUID_SS": "ss",
+ "CPUID_SVM_NPT": "npt",
+ "CPUID_SVM_NRIPSAVE": "nrip-save",
+ "CPUID_TSC": "tsc",
+ "CPUID_VME": "vme",
+ "CPUID_XSAVE_XGETBV1": "xgetbv1",
+ "CPUID_XSAVE_XSAVEC": "xsavec",
+ "CPUID_XSAVE_XSAVEOPT": "xsaveopt",
+ "CPUID_XSAVE_XSAVES": "xsaves",
+ "MSR_ARCH_CAP_IBRS_ALL": "ibrs-all",
+ "MSR_ARCH_CAP_MDS_NO": "mds-no",
+ "MSR_ARCH_CAP_PSCHANGE_MC_NO": "pschange-mc-no",
+ "MSR_ARCH_CAP_RDCL_NO": "rdctl-no",
+ "MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY": "skip-l1dfl-vmentry",
+ "MSR_ARCH_CAP_TAA_NO": "taa-no",
+ "MSR_CORE_CAP_SPLIT_LOCK_DETECT": "split-lock-detect",
+
+ # always disabled features
+ "CPUID_EXT_MONITOR": None,
+ "0": None,
+
+ # set to "no auto enable" by qemu
+ "CPUID_EXT3_TOPOEXT": None,
+ "MSR_VMX_BASIC_DUAL_MONITOR": None,
+}
+
+
+def readline_cont(f):
+ """Read one logical line from a file `f` i.e. continues lines that end in
+ a backslash."""
+
+ line = f.readline()
+ while line.endswith("\\\n"):
+ line = line[:-2] + " " + f.readline()
+ return line
+
+
+def read_builtin_x86_defs(filename):
+ """Extract content between begin_mark and end_mark from file `filename` as
+ string, while expanding shorthand macros like "I486_FEATURES"."""
+
+ begin_mark = "static X86CPUDefinition builtin_x86_defs[] = {\n"
+ end_mark = "};\n"
+ shorthand = re.compile("^#define ([A-Z0-9_]+_FEATURES) (.*)$")
+ lines = list()
+ shorthands = dict()
+
+ with open(filename, "rt") as f:
+ while True:
+ line = readline_cont(f)
+ if line == begin_mark:
+ break
+ if not line:
+ raise RuntimeError("begin mark not found")
+ match = shorthand.match(line)
+ if match:
+ # TCG definitions are irrelevant for cpu models
+ newk = match.group(1)
+ if newk.startswith("TCG_"):
+ continue
+
+ # remove comments, whitespace and bit operators, effectively
+ # turning the bitfield into a list
+ newv = re.sub("([()|\t\n])|(/\\*.*?\\*/)", " ", match.group(2))
+
+ # resolve recursive shorthands
+ for k, v in shorthands.items():
+ newv = newv.replace(k, v)
+
+ shorthands[newk] = newv
+
+ while True:
+ line = readline_cont(f)
+ if line == end_mark:
+ break
+ if not line:
+ raise RuntimeError("end marker not found")
+
+ # apply shorthands
+ for k, v in shorthands.items():
+ line = line.replace(k, v)
+ lines.append(line)
+
+ return "".join(lines)
+
+
+def transform(item):
+ """Recursively transform a Lark syntax tree into python native objects."""
+
+ if isinstance(item, lark.lexer.Token):
+ return str(item)
+
+ if item.data == "list":
+ retval = list()
+ for child in item.children:
+ value = transform(child)
+ if value is None:
+ continue
+ retval.append(value)
+ return retval
+
+ if item.data == "map":
+ retval = dict()
+ for child in item.children:
+ if len(child.children) != 2:
+ raise RuntimeError("map entry with more than 2 elements")
+ key = transform(child.children[0])
+ value = transform(child.children[1])
+ if key is None:
+ raise RuntimeError("map entry with 'None' key")
+ if value is None:
+ continue
+ retval[key] = value
+ return retval
+
+ if item.data == "text":
+ retval = list()
+ for child in item.children:
+ value = transform(child)
+ if value is None:
+ continue
+ retval.append(value)
+ return " ".join(retval)
+
+ if item.data == "value":
+ if item.children:
+ raise RuntimeError("empty list is not empty")
+ return None
+
+ raise RuntimeError("unexpected item type")
+
+
+def expand_model(model):
+ """Expand a qemu cpu model description that has its feature split up into
+ different fields and may have differing versions into several libvirt-
+ friendly cpu models."""
+
+ result = {
+ "name": model.pop(".name"),
+ "vendor": T[model.pop(".vendor")],
+ "features": set(),
+ "extra": dict()}
+
+ if ".family" in model and ".model" in model:
+ result["family"] = model.pop(".family")
+ result["model"] = model.pop(".model")
+
+ for k in [k for k in model if k.startswith(".features")]:
+ v = model.pop(k)
+ for feature in v.split():
+ if feature.startswith("VMX_") or feature.startswith("MSR_VMX_"):
+ continue
+ translated = T.get(feature, feature)
+ if translated:
+ result["features"].add(translated)
+
+ versions = model.pop(".versions", [])
+ for k, v in model.items():
+ result["extra"]["model" + k] = v
+ yield result
+
+ for version in versions:
+ result = copy.deepcopy(result)
+ result["name"] = version.pop(".alias", result["name"])
+
+ props = version.pop(".props", dict())
+ for k, v in props:
+ if v == "on":
+ result["features"].add(k)
+ elif v == "off" and k in result["features"]:
+ result["features"].remove(k)
+ else:
+ result["extra"]["property." + k] = v
+
+ for k, v in version.items():
+ result["extra"]["version" + k] = v
+
+ yield result
+
+
+def output_model(f, model):
+ if model["extra"]:
+ f.write("<!-- extra info from qemu:\n")
+ for k, v in model["extra"].items():
+ f.write(" '{}': '{}'\n".format(k, v))
+ f.write("-->\n")
+
+ f.write("<cpus>\n")
+ f.write(" <model name='{}'>\n".format(model["name"]))
+ f.write(" <decode host='on' guest='on'/>\n")
+ f.write(" <signature family='{}' model='{}'/>\n".format(
+ model["family"], model["model"]))
+ f.write(" <vendor name='{}'/>\n".format(model["vendor"]))
+ for feature in sorted(model["features"]):
+ f.write(" <feature name='{}'/>\n".format(feature))
+ f.write(" </model>\n")
+ f.write("</cpus>\n")
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Synchronize x86 cpu models from QEMU i386 target.")
+ parser.add_argument(
+ "cpufile",
+ help="Path to 'target/i386/cpu.c' file in the QEMU repository",
+ type=os.path.realpath)
+ parser.add_argument(
+ "outdir",
+ help="Path to 'src/cpu_map' directory in the libvirt repository",
+ type=os.path.realpath)
+
+ args = parser.parse_args()
+
+ builtin_x86_defs = read_builtin_x86_defs(args.cpufile)
+
+ ast = lark.Lark(r"""
+ list: value ( "," value )* ","?
+ map: keyvalue ( "," keyvalue )* ","?
+ keyvalue: IDENTIFIER "=" value
+ ?value: text | "{" "}" | "{" list "}" | "{" map "}"
+ text: (IDENTIFIER | "\"" (/[^"]+/)? "\"")+
+ IDENTIFIER: /[\[\]\._&a-zA-Z0-9]/+
+ %ignore (" " | "\r" | "\n" | "\t" | "|" )+
+ %ignore "(" ( "X86CPUVersionDefinition" | "PropValue" ) "[])"
+ %ignore "//" /.*?/ "\n"
+ %ignore "/*" /(.|\n)*?/ "*/"
+ """, start="list").parse(builtin_x86_defs)
+
+ models_json = transform(ast)
+
+ models = list()
+ for model in models_json:
+ models.extend(expand_model(model))
+
+ for model in models:
+ name = os.path.join(args.outdir, "x86_{}.xml".format(model["name"]))
+ with open(name, "wt") as f:
+ output_model(f, model)
+
+
+if __name__ == "__main__":
+ main()