summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c705
-rw-r--r--arch/x86/crypto/fpu.c207
-rw-r--r--arch/x86/crypto/glue_helper.c74
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb.c2
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_ctx.h2
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb.c2
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_ctx.h2
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb.c2
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_ctx.h2
-rw-r--r--arch/x86/entry/entry_32.S4
-rw-r--r--arch/x86/entry/vdso/vma.c4
-rw-r--r--arch/x86/include/asm/asm-prototypes.h16
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h39
-rw-r--r--arch/x86/include/asm/e820.h12
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/include/asm/kvm_page_track.h14
-rw-r--r--arch/x86/include/asm/trace/exceptions.h2
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h2
-rw-r--r--arch/x86/include/asm/vmx.h37
-rw-r--r--arch/x86/include/uapi/asm/vmx.h5
-rw-r--r--arch/x86/kernel/acpi/apei.c3
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S9
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c2
-rw-r--r--arch/x86/kernel/crash.c37
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c48
-rw-r--r--arch/x86/kernel/ldt.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c6
-rw-r--r--arch/x86/kernel/setup.c24
-rw-r--r--arch/x86/kernel/tracepoint.c3
-rw-r--r--arch/x86/kvm/cpuid.c26
-rw-r--r--arch/x86/kvm/emulate.c200
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/i8254.c15
-rw-r--r--arch/x86/kvm/i8254.h3
-rw-r--r--arch/x86/kvm/lapic.c212
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c43
-rw-r--r--arch/x86/kvm/page_track.c31
-rw-r--r--arch/x86/kvm/svm.c21
-rw-r--r--arch/x86/kvm/vmx.c1092
-rw-r--r--arch/x86/kvm/x86.c94
-rw-r--r--arch/x86/pci/xen.c4
-rw-r--r--arch/x86/platform/ce4100/ce4100.c2
-rw-r--r--arch/x86/power/hibernate_64.c94
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c1
-rw-r--r--arch/x86/xen/setup.c6
49 files changed, 1923 insertions, 1216 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a219cf2c0e58..dd47e60aabf5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1551,7 +1551,7 @@ config X86_CHECK_BIOS_CORRUPTION
line. By default it scans the low 64k of memory every 60
seconds; see the memory_corruption_check_size and
memory_corruption_check_period parameters in
- Documentation/kernel-parameters.txt to adjust this.
+ Documentation/admin-guide/kernel-parameters.rst to adjust this.
When enabled with the default parameters, this option has
almost no overhead, as it reserves a relatively small amount
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 34d9e15857c3..44163e8c3868 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -25,7 +25,7 @@ KCOV_INSTRUMENT := n
targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
-KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
+KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ -O2
KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
cflags-$(CONFIG_X86_32) := -march=i386
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index aa8b0672f87a..31c34ee131f3 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -21,7 +21,6 @@
#include <linux/hardirq.h>
#include <linux/types.h>
-#include <linux/crypto.h>
#include <linux/module.h>
#include <linux/err.h>
#include <crypto/algapi.h>
@@ -29,14 +28,14 @@
#include <crypto/cryptd.h>
#include <crypto/ctr.h>
#include <crypto/b128ops.h>
-#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/api.h>
#include <asm/crypto/aes.h>
-#include <crypto/ablk_helper.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#ifdef CONFIG_X86_64
@@ -45,28 +44,26 @@
#define AESNI_ALIGN 16
+#define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN)))
#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1))
#define RFC4106_HASH_SUBKEY_SIZE 16
+#define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
+#define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA)
+#define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA)
/* This data is stored at the end of the crypto_tfm struct.
* It's a type of per "session" data storage location.
* This needs to be 16 byte aligned.
*/
struct aesni_rfc4106_gcm_ctx {
- u8 hash_subkey[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
- struct crypto_aes_ctx aes_key_expanded
- __attribute__ ((__aligned__(AESNI_ALIGN)));
+ u8 hash_subkey[16] AESNI_ALIGN_ATTR;
+ struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
u8 nonce[4];
};
-struct aesni_lrw_ctx {
- struct lrw_table_ctx lrw_table;
- u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
-};
-
struct aesni_xts_ctx {
- u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
- u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+ u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
+ u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
};
asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -360,96 +357,95 @@ static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
aesni_dec(ctx, dst, src);
}
-static int ecb_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int len)
+{
+ return aes_set_key_common(crypto_skcipher_tfm(tfm),
+ crypto_skcipher_ctx(tfm), key, len);
+}
+
+static int ecb_encrypt(struct skcipher_request *req)
{
- struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes);
}
kernel_fpu_end();
return err;
}
-static int ecb_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes);
}
kernel_fpu_end();
return err;
}
-static int cbc_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes);
}
kernel_fpu_end();
return err;
}
-static int cbc_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes);
}
kernel_fpu_end();
@@ -458,7 +454,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
#ifdef CONFIG_X86_64
static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
- struct blkcipher_walk *walk)
+ struct skcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
u8 keystream[AES_BLOCK_SIZE];
@@ -491,157 +487,53 @@ static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
}
#endif
-static int ctr_crypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
- struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
kernel_fpu_begin();
while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes);
}
if (walk.nbytes) {
ctr_crypt_final(ctx, &walk);
- err = blkcipher_walk_done(desc, &walk, 0);
+ err = skcipher_walk_done(&walk, 0);
}
kernel_fpu_end();
return err;
}
-#endif
-
-static int ablk_ecb_init(struct crypto_tfm *tfm)
-{
- return ablk_init_common(tfm, "__driver-ecb-aes-aesni");
-}
-
-static int ablk_cbc_init(struct crypto_tfm *tfm)
-{
- return ablk_init_common(tfm, "__driver-cbc-aes-aesni");
-}
-
-#ifdef CONFIG_X86_64
-static int ablk_ctr_init(struct crypto_tfm *tfm)
-{
- return ablk_init_common(tfm, "__driver-ctr-aes-aesni");
-}
-
-#endif
-
-#if IS_ENABLED(CONFIG_CRYPTO_PCBC)
-static int ablk_pcbc_init(struct crypto_tfm *tfm)
-{
- return ablk_init_common(tfm, "fpu(pcbc(__driver-aes-aesni))");
-}
-#endif
-
-static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
-{
- aesni_ecb_enc(ctx, blks, blks, nbytes);
-}
-static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
-{
- aesni_ecb_dec(ctx, blks, blks, nbytes);
-}
-
-static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
+static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
- struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err;
- err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key,
- keylen - AES_BLOCK_SIZE);
+ err = xts_verify_key(tfm, key, keylen);
if (err)
return err;
- return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE);
-}
-
-static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm)
-{
- struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
- lrw_free_table(&ctx->lrw_table);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[8];
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
- .crypt_fn = lrw_xts_encrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- kernel_fpu_begin();
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- kernel_fpu_end();
-
- return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[8];
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
- .crypt_fn = lrw_xts_decrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- kernel_fpu_begin();
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- kernel_fpu_end();
-
- return ret;
-}
-
-static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm);
- int err;
-
- err = xts_check_key(tfm, key, keylen);
- if (err)
- return err;
+ keylen /= 2;
/* first half of xts-key is for crypt */
- err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2);
+ err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx,
+ key, keylen);
if (err)
return err;
/* second half of xts-key is for tweak */
- return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2,
- keylen / 2);
+ return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx,
+ key + keylen, keylen);
}
@@ -650,8 +542,6 @@ static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
aesni_enc(ctx, out, in);
}
-#ifdef CONFIG_X86_64
-
static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc));
@@ -698,83 +588,28 @@ static const struct common_glue_ctx aesni_dec_xts = {
} }
};
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
{
- struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- return glue_xts_crypt_128bit(&aesni_enc_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(aesni_xts_tweak),
- aes_ctx(ctx->raw_tweak_ctx),
- aes_ctx(ctx->raw_crypt_ctx));
+ return glue_xts_req_128bit(&aesni_enc_xts, req,
+ XTS_TWEAK_CAST(aesni_xts_tweak),
+ aes_ctx(ctx->raw_tweak_ctx),
+ aes_ctx(ctx->raw_crypt_ctx));
}
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
{
- struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
- return glue_xts_crypt_128bit(&aesni_dec_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(aesni_xts_tweak),
- aes_ctx(ctx->raw_tweak_ctx),
- aes_ctx(ctx->raw_crypt_ctx));
-}
-
-#else
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[8];
- struct xts_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
- .tweak_fn = aesni_xts_tweak,
- .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
- .crypt_fn = lrw_xts_encrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- kernel_fpu_begin();
- ret = xts_crypt(desc, dst, src, nbytes, &req);
- kernel_fpu_end();
-
- return ret;
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[8];
- struct xts_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
- .tweak_fn = aesni_xts_tweak,
- .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
- .crypt_fn = lrw_xts_decrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- kernel_fpu_begin();
- ret = xts_crypt(desc, dst, src, nbytes, &req);
- kernel_fpu_end();
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- return ret;
+ return glue_xts_req_128bit(&aesni_dec_xts, req,
+ XTS_TWEAK_CAST(aesni_xts_tweak),
+ aes_ctx(ctx->raw_tweak_ctx),
+ aes_ctx(ctx->raw_crypt_ctx));
}
-#endif
-
-#ifdef CONFIG_X86_64
static int rfc4106_init(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
@@ -1077,9 +912,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
- AESNI_ALIGN - 1,
- .cra_alignmask = 0,
+ .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
@@ -1091,14 +924,12 @@ static struct crypto_alg aesni_algs[] = { {
}
}
}, {
- .cra_name = "__aes-aesni",
- .cra_driver_name = "__driver-aes-aesni",
- .cra_priority = 0,
+ .cra_name = "__aes",
+ .cra_driver_name = "__aes-aesni",
+ .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
- AESNI_ALIGN - 1,
- .cra_alignmask = 0,
+ .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
@@ -1109,250 +940,94 @@ static struct crypto_alg aesni_algs[] = { {
.cia_decrypt = __aes_decrypt
}
}
-}, {
- .cra_name = "__ecb-aes-aesni",
- .cra_driver_name = "__driver-ecb-aes-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
- AESNI_ALIGN - 1,
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = aes_set_key,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "__cbc-aes-aesni",
- .cra_driver_name = "__driver-cbc-aes-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
- AESNI_ALIGN - 1,
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = aes_set_key,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "ecb(aes)",
- .cra_driver_name = "ecb-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_ecb_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
+} };
+
+static struct skcipher_alg aesni_skciphers[] = {
+ {
+ .base = {
+ .cra_name = "__ecb(aes)",
+ .cra_driver_name = "__ecb-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
+ .cra_module = THIS_MODULE,
},
- },
-}, {
- .cra_name = "cbc(aes)",
- .cra_driver_name = "cbc-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_cbc_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aesni_skcipher_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cbc(aes)",
+ .cra_driver_name = "__cbc-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
+ .cra_module = THIS_MODULE,
},
- },
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aesni_skcipher_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
#ifdef CONFIG_X86_64
-}, {
- .cra_name = "__ctr-aes-aesni",
- .cra_driver_name = "__driver-ctr-aes-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
- AESNI_ALIGN - 1,
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = aes_set_key,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
+ }, {
+ .base = {
+ .cra_name = "__ctr(aes)",
+ .cra_driver_name = "__ctr-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
+ .cra_module = THIS_MODULE,
},
- },
-}, {
- .cra_name = "ctr(aes)",
- .cra_driver_name = "ctr-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_ctr_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .setkey = aesni_skcipher_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }, {
+ .base = {
+ .cra_name = "__xts(aes)",
+ .cra_driver_name = "__xts-aes-aesni",
+ .cra_priority = 401,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = XTS_AES_CTX_SIZE,
+ .cra_module = THIS_MODULE,
},
- },
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_aesni_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
#endif
+ }
+};
+
+struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
+
+struct {
+ const char *algname;
+ const char *drvname;
+ const char *basename;
+ struct simd_skcipher_alg *simd;
+} aesni_simd_skciphers2[] = {
#if IS_ENABLED(CONFIG_CRYPTO_PCBC)
-}, {
- .cra_name = "pcbc(aes)",
- .cra_driver_name = "pcbc-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_pcbc_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
+ {
+ .algname = "pcbc(aes)",
+ .drvname = "pcbc-aes-aesni",
+ .basename = "fpu(pcbc(__aes-aesni))",
},
#endif
-}, {
- .cra_name = "__lrw-aes-aesni",
- .cra_driver_name = "__driver-lrw-aes-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct aesni_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_exit = lrw_aesni_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = lrw_aesni_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-}, {
- .cra_name = "__xts-aes-aesni",
- .cra_driver_name = "__driver-xts-aes-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct aesni_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = 2 * AES_MIN_KEY_SIZE,
- .max_keysize = 2 * AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = xts_aesni_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-}, {
- .cra_name = "lrw(aes)",
- .cra_driver_name = "lrw-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "xts(aes)",
- .cra_driver_name = "xts-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = 2 * AES_MIN_KEY_SIZE,
- .max_keysize = 2 * AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-} };
+};
#ifdef CONFIG_X86_64
static struct aead_alg aesni_aead_algs[] = { {
@@ -1401,9 +1076,27 @@ static const struct x86_cpu_id aesni_cpu_id[] = {
};
MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+static void aesni_free_simds(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
+ aesni_simd_skciphers[i]; i++)
+ simd_skcipher_free(aesni_simd_skciphers[i]);
+
+ for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2) &&
+ aesni_simd_skciphers2[i].simd; i++)
+ simd_skcipher_free(aesni_simd_skciphers2[i].simd);
+}
+
static int __init aesni_init(void)
{
+ struct simd_skcipher_alg *simd;
+ const char *basename;
+ const char *algname;
+ const char *drvname;
int err;
+ int i;
if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
@@ -1445,13 +1138,48 @@ static int __init aesni_init(void)
if (err)
goto fpu_exit;
+ err = crypto_register_skciphers(aesni_skciphers,
+ ARRAY_SIZE(aesni_skciphers));
+ if (err)
+ goto unregister_algs;
+
err = crypto_register_aeads(aesni_aead_algs,
ARRAY_SIZE(aesni_aead_algs));
if (err)
- goto unregister_algs;
+ goto unregister_skciphers;
+
+ for (i = 0; i < ARRAY_SIZE(aesni_skciphers); i++) {
+ algname = aesni_skciphers[i].base.cra_name + 2;
+ drvname = aesni_skciphers[i].base.cra_driver_name + 2;
+ basename = aesni_skciphers[i].base.cra_driver_name;
+ simd = simd_skcipher_create_compat(algname, drvname, basename);
+ err = PTR_ERR(simd);
+ if (IS_ERR(simd))
+ goto unregister_simds;
+
+ aesni_simd_skciphers[i] = simd;
+ }
- return err;
+ for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) {
+ algname = aesni_simd_skciphers2[i].algname;
+ drvname = aesni_simd_skciphers2[i].drvname;
+ basename = aesni_simd_skciphers2[i].basename;
+ simd = simd_skcipher_create_compat(algname, drvname, basename);
+ err = PTR_ERR(simd);
+ if (IS_ERR(simd))
+ goto unregister_simds;
+ aesni_simd_skciphers2[i].simd = simd;
+ }
+
+ return 0;
+
+unregister_simds:
+ aesni_free_simds();
+ crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
+unregister_skciphers:
+ crypto_unregister_skciphers(aesni_skciphers,
+ ARRAY_SIZE(aesni_skciphers));
unregister_algs:
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
fpu_exit:
@@ -1461,7 +1189,10 @@ fpu_exit:
static void __exit aesni_exit(void)
{
+ aesni_free_simds();
crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
+ crypto_unregister_skciphers(aesni_skciphers,
+ ARRAY_SIZE(aesni_skciphers));
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
crypto_fpu_exit();
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index e7d679e2a018..406680476c52 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -11,143 +11,186 @@
*
*/
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
-#include <linux/crypto.h>
#include <asm/fpu/api.h>
struct crypto_fpu_ctx {
- struct crypto_blkcipher *child;
+ struct crypto_skcipher *child;
};
-static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
+static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key,
unsigned int keylen)
{
- struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
- struct crypto_blkcipher *child = ctx->child;
+ struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent);
+ struct crypto_skcipher *child = ctx->child;
int err;
- crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
- crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
- CRYPTO_TFM_REQ_MASK);
- err = crypto_blkcipher_setkey(child, key, keylen);
- crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
- CRYPTO_TFM_RES_MASK);
+ crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+ CRYPTO_TFM_REQ_MASK);
+ err = crypto_skcipher_setkey(child, key, keylen);
+ crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+ CRYPTO_TFM_RES_MASK);
return err;
}
-static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int crypto_fpu_encrypt(struct skcipher_request *req)
{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_skcipher *child = ctx->child;
+ SKCIPHER_REQUEST_ON_STACK(subreq, child);
int err;
- struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
- struct crypto_blkcipher *child = ctx->child;
- struct blkcipher_desc desc = {
- .tfm = child,
- .info = desc_in->info,
- .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
- };
+
+ skcipher_request_set_tfm(subreq, child);
+ skcipher_request_set_callback(subreq, 0, NULL, NULL);
+ skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+ req->iv);
kernel_fpu_begin();
- err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes);
+ err = crypto_skcipher_encrypt(subreq);
kernel_fpu_end();
+
+ skcipher_request_zero(subreq);
return err;
}
-static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int crypto_fpu_decrypt(struct skcipher_request *req)
{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_skcipher *child = ctx->child;
+ SKCIPHER_REQUEST_ON_STACK(subreq, child);
int err;
- struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
- struct crypto_blkcipher *child = ctx->child;
- struct blkcipher_desc desc = {
- .tfm = child,
- .info = desc_in->info,
- .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
- };
+
+ skcipher_request_set_tfm(subreq, child);
+ skcipher_request_set_callback(subreq, 0, NULL, NULL);
+ skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+ req->iv);
kernel_fpu_begin();
- err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes);
+ err = crypto_skcipher_decrypt(subreq);
kernel_fpu_end();
+
+ skcipher_request_zero(subreq);
return err;
}
-static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
+static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm)
{
- struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
- struct crypto_spawn *spawn = crypto_instance_ctx(inst);
- struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
- struct crypto_blkcipher *cipher;
+ struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+ struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_skcipher_spawn *spawn;
+ struct crypto_skcipher *cipher;
- cipher = crypto_spawn_blkcipher(spawn);
+ spawn = skcipher_instance_ctx(inst);
+ cipher = crypto_spawn_skcipher(spawn);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
ctx->child = cipher;
+
return 0;
}
-static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm)
+{
+ struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ crypto_free_skcipher(ctx->child);
+}
+
+static void crypto_fpu_free(struct skcipher_instance *inst)
{
- struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
- crypto_free_blkcipher(ctx->child);
+ crypto_drop_skcipher(skcipher_instance_ctx(inst));
+ kfree(inst);
}
-static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
+static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb)
{
- struct crypto_instance *inst;
- struct crypto_alg *alg;
+ struct crypto_skcipher_spawn *spawn;
+ struct skcipher_instance *inst;
+ struct crypto_attr_type *algt;
+ struct skcipher_alg *alg;
+ const char *cipher_name;
int err;
- err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+ algt = crypto_get_attr_type(tb);
+ if (IS_ERR(algt))
+ return PTR_ERR(algt);
+
+ if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) &
+ algt->mask)
+ return -EINVAL;
+
+ if (!(algt->mask & CRYPTO_ALG_INTERNAL))
+ return -EINVAL;
+
+ cipher_name = crypto_attr_alg_name(tb[1]);
+ if (IS_ERR(cipher_name))
+ return PTR_ERR(cipher_name);
+
+ inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ spawn = skcipher_instance_ctx(inst);
+
+ crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
+ err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
if (err)
- return ERR_PTR(err);
-
- alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
- CRYPTO_ALG_TYPE_MASK);
- if (IS_ERR(alg))
- return ERR_CAST(alg);
-
- inst = crypto_alloc_instance("fpu", alg);
- if (IS_ERR(inst))
- goto out_put_alg;
-
- inst->alg.cra_flags = alg->cra_flags;
- inst->alg.cra_priority = alg->cra_priority;
- inst->alg.cra_blocksize = alg->cra_blocksize;
- inst->alg.cra_alignmask = alg->cra_alignmask;
- inst->alg.cra_type = alg->cra_type;
- inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize;
- inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
- inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
- inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
- inst->alg.cra_init = crypto_fpu_init_tfm;
- inst->alg.cra_exit = crypto_fpu_exit_tfm;
- inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey;
- inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt;
- inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt;
-
-out_put_alg:
- crypto_mod_put(alg);
- return inst;
-}
+ goto out_free_inst;
-static void crypto_fpu_free(struct crypto_instance *inst)
-{
- crypto_drop_spawn(crypto_instance_ctx(inst));
+ alg = crypto_skcipher_spawn_alg(spawn);
+
+ err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu",
+ &alg->base);
+ if (err)
+ goto out_drop_skcipher;
+
+ inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL;
+ inst->alg.base.cra_priority = alg->base.cra_priority;
+ inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
+ inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
+
+ inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
+ inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
+ inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
+
+ inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
+
+ inst->alg.init = crypto_fpu_init_tfm;
+ inst->alg.exit = crypto_fpu_exit_tfm;
+
+ inst->alg.setkey = crypto_fpu_setkey;
+ inst->alg.encrypt = crypto_fpu_encrypt;
+ inst->alg.decrypt = crypto_fpu_decrypt;
+
+ inst->free = crypto_fpu_free;
+
+ err = skcipher_register_instance(tmpl, inst);
+ if (err)
+ goto out_drop_skcipher;
+
+out:
+ return err;
+
+out_drop_skcipher:
+ crypto_drop_skcipher(spawn);
+out_free_inst:
kfree(inst);
+ goto out;
}
static struct crypto_template crypto_fpu_tmpl = {
.name = "fpu",
- .alloc = crypto_fpu_alloc,
- .free = crypto_fpu_free,
+ .create = crypto_fpu_create,
.module = THIS_MODULE,
};
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 6a85598931b5..260a060d7275 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -27,10 +27,10 @@
#include <linux/module.h>
#include <crypto/b128ops.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/crypto/glue_helper.h>
-#include <crypto/scatterwalk.h>
static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
@@ -339,6 +339,41 @@ done:
return nbytes;
}
+static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+ void *ctx,
+ struct skcipher_walk *walk)
+{
+ const unsigned int bsize = 128 / 8;
+ unsigned int nbytes = walk->nbytes;
+ u128 *src = walk->src.virt.addr;
+ u128 *dst = walk->dst.virt.addr;
+ unsigned int num_blocks, func_bytes;
+ unsigned int i;
+
+ /* Process multi-block batch */
+ for (i = 0; i < gctx->num_funcs; i++) {
+ num_blocks = gctx->funcs[i].num_blocks;
+ func_bytes = bsize * num_blocks;
+
+ if (nbytes >= func_bytes) {
+ do {
+ gctx->funcs[i].fn_u.xts(ctx, dst, src,
+ walk->iv);
+
+ src += num_blocks;
+ dst += num_blocks;
+ nbytes -= func_bytes;
+ } while (nbytes >= func_bytes);
+
+ if (nbytes < bsize)
+ goto done;
+ }
+ }
+
+done:
+ return nbytes;
+}
+
/* for implementations implementing faster XTS IV generator */
int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc, struct scatterlist *dst,
@@ -379,6 +414,43 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
}
EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
+int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+ struct skcipher_request *req,
+ common_glue_func_t tweak_fn, void *tweak_ctx,
+ void *crypt_ctx)
+{
+ const unsigned int bsize = 128 / 8;
+ struct skcipher_walk walk;
+ bool fpu_enabled = false;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+ nbytes = walk.nbytes;
+ if (!nbytes)
+ return err;
+
+ /* set minimum length to bsize, for tweak_fn */
+ fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit,
+ &walk, fpu_enabled,
+ nbytes < bsize ? bsize : nbytes);
+
+ /* calculate first value of T */
+ tweak_fn(tweak_ctx, walk.iv, walk.iv);
+
+ while (nbytes) {
+ nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk);
+
+ err = skcipher_walk_done(&walk, nbytes);
+ nbytes = walk.nbytes;
+ }
+
+ glue_fpu_end(fpu_enabled);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
+
void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
common_glue_func_t fn)
{
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c
index 9e5b67127a09..acf9fdf01671 100644
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb.c
@@ -114,7 +114,7 @@ static inline void sha1_init_digest(uint32_t *digest)
}
static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
- uint32_t total_len)
+ uint64_t total_len)
{
uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
index 98a35bcc6f4a..13590ccf965c 100644
--- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
@@ -125,7 +125,7 @@ struct sha1_hash_ctx {
/* error flag */
int error;
- uint32_t total_length;
+ uint64_t total_length;
const void *incoming_buffer;
uint32_t incoming_buffer_length;
uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2];
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c
index 6f97fb33ae21..7926a226b120 100644
--- a/arch/x86/crypto/sha256-mb/sha256_mb.c
+++ b/arch/x86/crypto/sha256-mb/sha256_mb.c
@@ -115,7 +115,7 @@ inline void sha256_init_digest(uint32_t *digest)
}
inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
- uint32_t total_len)
+ uint64_t total_len)
{
uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
index edd252b73206..aabb30320af0 100644
--- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
@@ -125,7 +125,7 @@ struct sha256_hash_ctx {
/* error flag */
int error;
- uint32_t total_length;
+ uint64_t total_length;
const void *incoming_buffer;
uint32_t incoming_buffer_length;
uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2];
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
index d210174a52b0..9c1bb6d58141 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb.c
@@ -117,7 +117,7 @@ inline void sha512_init_digest(uint64_t *digest)
}
inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
- uint32_t total_len)
+ uint64_t total_len)
{
uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
index 9d4b2c8208d5..e4653f5eec3f 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
@@ -119,7 +119,7 @@ struct sha512_hash_ctx {
/* error flag */
int error;
- uint32_t total_length;
+ uint64_t total_length;
const void *incoming_buffer;
uint32_t incoming_buffer_length;
uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2];
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index acc0c6f36f3f..701d29f8e4d3 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -926,8 +926,8 @@ ftrace_graph_call:
jmp ftrace_stub
#endif
-.globl ftrace_stub
-ftrace_stub:
+/* This is weak to keep gas from relaxing the jumps */
+WEAK(ftrace_stub)
ret
END(ftrace_caller)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index e739002427ed..40121d14d34d 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -109,7 +109,7 @@ static int vvar_fault(const struct vm_special_mapping *sm,
return VM_FAULT_SIGBUS;
if (sym_offset == image->sym_vvar_page) {
- ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+ ret = vm_insert_pfn(vma, vmf->address,
__pa_symbol(&__vvar_page) >> PAGE_SHIFT);
} else if (sym_offset == image->sym_pvclock_page) {
struct pvclock_vsyscall_time_info *pvti =
@@ -117,7 +117,7 @@ static int vvar_fault(const struct vm_special_mapping *sm,
if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
ret = vm_insert_pfn(
vma,
- (unsigned long)vmf->virtual_address,
+ vmf->address,
__pa(pvti) >> PAGE_SHIFT);
}
}
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..44b8762fa0c7
--- /dev/null
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -0,0 +1,16 @@
+#include <asm/ftrace.h>
+#include <asm/uaccess.h>
+#include <asm/string.h>
+#include <asm/page.h>
+#include <asm/checksum.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/special_insns.h>
+#include <asm/preempt.h>
+
+#ifndef CONFIG_X86_CMPXCHG64
+extern void cmpxchg8b_emu(void);
+#endif
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 03bb1065c335..29e53ea7d764 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -5,8 +5,8 @@
#ifndef _CRYPTO_GLUE_HELPER_H
#define _CRYPTO_GLUE_HELPER_H
+#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
-#include <linux/crypto.h>
#include <asm/fpu/api.h>
#include <crypto/b128ops.h>
@@ -69,6 +69,31 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
return true;
}
+static inline bool glue_skwalk_fpu_begin(unsigned int bsize,
+ int fpu_blocks_limit,
+ struct skcipher_walk *walk,
+ bool fpu_enabled, unsigned int nbytes)
+{
+ if (likely(fpu_blocks_limit < 0))
+ return false;
+
+ if (fpu_enabled)
+ return true;
+
+ /*
+ * Vector-registers are only used when chunk to be processed is large
+ * enough, so do not enable FPU until it is necessary.
+ */
+ if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
+ return false;
+
+ /* prevent sleeping if FPU is in use */
+ skcipher_walk_atomise(walk);
+
+ kernel_fpu_begin();
+ return true;
+}
+
static inline void glue_fpu_end(bool fpu_enabled)
{
if (fpu_enabled)
@@ -139,6 +164,18 @@ extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
common_glue_func_t tweak_fn, void *tweak_ctx,
void *crypt_ctx);
+extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
+ struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes,
+ common_glue_func_t tweak_fn, void *tweak_ctx,
+ void *crypt_ctx);
+
+extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+ struct skcipher_request *req,
+ common_glue_func_t tweak_fn, void *tweak_ctx,
+ void *crypt_ctx);
+
extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
le128 *iv, common_glue_func_t fn);
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 476b574de99e..ec23d8e1297c 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -1,13 +1,17 @@
#ifndef _ASM_X86_E820_H
#define _ASM_X86_E820_H
-#ifdef CONFIG_EFI
+/*
+ * E820_X_MAX is the maximum size of the extended E820 table. The extended
+ * table may contain up to 3 extra E820 entries per possible NUMA node, so we
+ * make room for 3 * MAX_NUMNODES possible entries, beyond the standard 128.
+ * Also note that E820_X_MAX *must* be defined before we include uapi/asm/e820.h.
+ */
#include <linux/numa.h>
#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
-#else /* ! CONFIG_EFI */
-#define E820_X_MAX E820MAX
-#endif
+
#include <uapi/asm/e820.h>
+
#ifndef __ASSEMBLY__
/* see comment in arch/x86/kernel/e820.c */
extern struct e820map *e820;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bdde80731f49..7892530cbacf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -191,6 +191,8 @@ enum {
#define PFERR_RSVD_BIT 3
#define PFERR_FETCH_BIT 4
#define PFERR_PK_BIT 5
+#define PFERR_GUEST_FINAL_BIT 32
+#define PFERR_GUEST_PAGE_BIT 33
#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
@@ -198,6 +200,13 @@ enum {
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
+#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
+#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
+
+#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
+ PFERR_USER_MASK | \
+ PFERR_WRITE_MASK | \
+ PFERR_PRESENT_MASK)
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
@@ -1062,6 +1071,7 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
+bool pdptrs_changed(struct kvm_vcpu *vcpu);
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
@@ -1124,7 +1134,8 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
struct x86_emulate_ctxt;
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
+int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
@@ -1203,7 +1214,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
@@ -1358,7 +1369,8 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
-void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
+int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
+int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
int kvm_is_in_guest(void);
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index c2b8d24a235c..d74747b031ec 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -29,9 +29,20 @@ struct kvm_page_track_notifier_node {
* @gpa: the physical address written by guest.
* @new: the data was written to the address.
* @bytes: the written length.
+ * @node: this node
*/
void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes);
+ int bytes, struct kvm_page_track_notifier_node *node);
+ /*
+ * It is called when memory slot is being moved or removed
+ * users can drop write-protection for the pages in that memory slot
+ *
+ * @kvm: the kvm where memory slot being moved or removed
+ * @slot: the memory slot being moved or removed
+ * @node: this node
+ */
+ void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
+ struct kvm_page_track_notifier_node *node);
};
void kvm_page_track_init(struct kvm *kvm);
@@ -58,4 +69,5 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
struct kvm_page_track_notifier_node *n);
void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
int bytes);
+void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
#endif
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
index 2fbc66c7885b..2422b14c50a7 100644
--- a/arch/x86/include/asm/trace/exceptions.h
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -6,7 +6,7 @@
#include <linux/tracepoint.h>
-extern void trace_irq_vector_regfunc(void);
+extern int trace_irq_vector_regfunc(void);
extern void trace_irq_vector_unregfunc(void);
DECLARE_EVENT_CLASS(x86_exceptions,
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 38a09a13a9bc..32dd6a9e343c 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -6,7 +6,7 @@
#include <linux/tracepoint.h>
-extern void trace_irq_vector_regfunc(void);
+extern int trace_irq_vector_regfunc(void);
extern void trace_irq_vector_unregfunc(void);
DECLARE_EVENT_CLASS(x86_irq_vector,
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index a002b07a7099..2b5b2d4b924e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -25,6 +25,7 @@
#define VMX_H
+#include <linux/bitops.h>
#include <linux/types.h>
#include <uapi/asm/vmx.h>
@@ -60,6 +61,7 @@
*/
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_DESC 0x00000004
#define SECONDARY_EXEC_RDTSCP 0x00000008
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
@@ -110,6 +112,36 @@
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_MISC_ACTIVITY_HLT 0x00000040
+static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
+{
+ return vmx_basic & GENMASK_ULL(30, 0);
+}
+
+static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
+{
+ return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
+}
+
+static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
+{
+ return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+}
+
+static inline int vmx_misc_cr3_count(u64 vmx_misc)
+{
+ return (vmx_misc & GENMASK_ULL(24, 16)) >> 16;
+}
+
+static inline int vmx_misc_max_msr(u64 vmx_misc)
+{
+ return (vmx_misc & GENMASK_ULL(27, 25)) >> 25;
+}
+
+static inline int vmx_misc_mseg_revid(u64 vmx_misc)
+{
+ return (vmx_misc & GENMASK_ULL(63, 32)) >> 32;
+}
+
/* VMCS Encodings */
enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000,
@@ -399,10 +431,11 @@ enum vmcs_field {
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2)
#define VMX_NR_VPIDS (1 << 16)
+#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR 0
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
#define VMX_VPID_EXTENT_ALL_CONTEXT 2
+#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL 3
-#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
#define VMX_EPT_EXTENT_CONTEXT 1
#define VMX_EPT_EXTENT_GLOBAL 2
#define VMX_EPT_EXTENT_SHIFT 24
@@ -419,8 +452,10 @@ enum vmcs_field {
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
#define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */
+#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT (1ull << 8) /* (40 - 32) */
#define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */
#define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */
+#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */
#define VMX_EPT_DEFAULT_GAW 3
#define VMX_EPT_MAX_GAW 0x4
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 37fee272618f..14458658e988 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -65,6 +65,8 @@
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_EOI_INDUCED 45
+#define EXIT_REASON_GDTR_IDTR 46
+#define EXIT_REASON_LDTR_TR 47
#define EXIT_REASON_EPT_VIOLATION 48
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
@@ -113,6 +115,8 @@
{ EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
{ EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
+ { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
+ { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
{ EXIT_REASON_INVEPT, "INVEPT" }, \
@@ -129,6 +133,7 @@
{ EXIT_REASON_XRSTORS, "XRSTORS" }
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
+#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c
index c280df6b2aa2..ea3046e0b0cf 100644
--- a/arch/x86/kernel/acpi/apei.c
+++ b/arch/x86/kernel/acpi/apei.c
@@ -24,9 +24,6 @@ int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data)
struct acpi_hest_ia_corrected *cmc;
struct acpi_hest_ia_error_bank *mc_bank;
- if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
- return 0;
-
cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
if (!cmc->enabled)
return 0;
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 169963f471bb..50b8ed0317a3 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -109,6 +109,15 @@ ENTRY(do_suspend_lowlevel)
movq pt_regs_r14(%rax), %r14
movq pt_regs_r15(%rax), %r15
+#ifdef CONFIG_KASAN
+ /*
+ * The suspend path may have poisoned some areas deeper in the stack,
+ * which we now need to unpoison.
+ */
+ movq %rsp, %rdi
+ call kasan_unpoison_task_stack_below
+#endif
+
xorl %eax, %eax
addq $8, %rsp
FRAME_END
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index de6626c18e42..be6337156502 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -934,6 +934,8 @@ static int __populate_cache_leaves(unsigned int cpu)
ci_leaf_init(this_leaf++, &id4_regs);
__cache_cpumap_setup(cpu, idx, &id4_regs);
}
+ this_cpu_ci->cpu_map_populated = true;
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 132e1ec67da0..00ef43233e03 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -516,7 +516,7 @@ int mce_available(struct cpuinfo_x86 *c)
static void mce_schedule_work(void)
{
- if (!mce_gen_pool_empty() && keventd_up())
+ if (!mce_gen_pool_empty())
schedule_work(&mce_work);
}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 650830e39e3a..3741461c63a0 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -631,9 +631,9 @@ static int determine_backup_region(u64 start, u64 end, void *arg)
int crash_load_segments(struct kimage *image)
{
- unsigned long src_start, src_sz, elf_sz;
- void *elf_addr;
int ret;
+ struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+ .buf_max = ULONG_MAX, .top_down = false };
/*
* Determine and load a segment for backup area. First 640K RAM
@@ -647,43 +647,44 @@ int crash_load_segments(struct kimage *image)
if (ret < 0)
return ret;
- src_start = image->arch.backup_src_start;
- src_sz = image->arch.backup_src_sz;
-
/* Add backup segment. */
- if (src_sz) {
+ if (image->arch.backup_src_sz) {
+ kbuf.buffer = &crash_zero_bytes;
+ kbuf.bufsz = sizeof(crash_zero_bytes);
+ kbuf.memsz = image->arch.backup_src_sz;
+ kbuf.buf_align = PAGE_SIZE;
/*
* Ideally there is no source for backup segment. This is
* copied in purgatory after crash. Just add a zero filled
* segment for now to make sure checksum logic works fine.
*/
- ret = kexec_add_buffer(image, (char *)&crash_zero_bytes,
- sizeof(crash_zero_bytes), src_sz,
- PAGE_SIZE, 0, -1, 0,
- &image->arch.backup_load_addr);
+ ret = kexec_add_buffer(&kbuf);
if (ret)
return ret;
+ image->arch.backup_load_addr = kbuf.mem;
pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
- image->arch.backup_load_addr, src_start, src_sz);
+ image->arch.backup_load_addr,
+ image->arch.backup_src_start, kbuf.memsz);
}
/* Prepare elf headers and add a segment */
- ret = prepare_elf_headers(image, &elf_addr, &elf_sz);
+ ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz);
if (ret)
return ret;
- image->arch.elf_headers = elf_addr;
- image->arch.elf_headers_sz = elf_sz;
+ image->arch.elf_headers = kbuf.buffer;
+ image->arch.elf_headers_sz = kbuf.bufsz;
- ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz,
- ELF_CORE_HEADER_ALIGN, 0, -1, 0,
- &image->arch.elf_load_addr);
+ kbuf.memsz = kbuf.bufsz;
+ kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+ ret = kexec_add_buffer(&kbuf);
if (ret) {
vfree((void *)image->arch.elf_headers);
return ret;
}
+ image->arch.elf_load_addr = kbuf.mem;
pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- image->arch.elf_load_addr, elf_sz, elf_sz);
+ image->arch.elf_load_addr, kbuf.bufsz, kbuf.bufsz);
return ret;
}
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 3407b148c240..d0a814a9d96a 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -331,17 +331,17 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
struct setup_header *header;
int setup_sects, kern16_size, ret = 0;
- unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
+ unsigned long setup_header_size, params_cmdline_sz;
struct boot_params *params;
unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
unsigned long purgatory_load_addr;
- unsigned long kernel_bufsz, kernel_memsz, kernel_align;
- char *kernel_buf;
struct bzimage64_data *ldata;
struct kexec_entry64_regs regs64;
void *stack;
unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
+ struct kexec_buf kbuf = { .image = image, .buf_max = ULONG_MAX,
+ .top_down = true };
header = (struct setup_header *)(kernel + setup_hdr_offset);
setup_sects = header->setup_sects;
@@ -402,11 +402,11 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
MAX_ELFCOREHDR_STR_LEN;
params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
- params_misc_sz = params_cmdline_sz + efi_map_sz +
+ kbuf.bufsz = params_cmdline_sz + efi_map_sz +
sizeof(struct setup_data) +
sizeof(struct efi_setup_data);
- params = kzalloc(params_misc_sz, GFP_KERNEL);
+ params = kzalloc(kbuf.bufsz, GFP_KERNEL);
if (!params)
return ERR_PTR(-ENOMEM);
efi_map_offset = params_cmdline_sz;
@@ -418,37 +418,41 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
/* Is there a limit on setup header size? */
memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
- ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
- params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
- ULONG_MAX, 1, &bootparam_load_addr);
+ kbuf.buffer = params;
+ kbuf.memsz = kbuf.bufsz;
+ kbuf.buf_align = 16;
+ kbuf.buf_min = MIN_BOOTPARAM_ADDR;
+ ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
+ bootparam_load_addr = kbuf.mem;
pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- bootparam_load_addr, params_misc_sz, params_misc_sz);
+ bootparam_load_addr, kbuf.bufsz, kbuf.bufsz);
/* Load kernel */
- kernel_buf = kernel + kern16_size;
- kernel_bufsz = kernel_len - kern16_size;
- kernel_memsz = PAGE_ALIGN(header->init_size);
- kernel_align = header->kernel_alignment;
-
- ret = kexec_add_buffer(image, kernel_buf,
- kernel_bufsz, kernel_memsz, kernel_align,
- MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
- &kernel_load_addr);
+ kbuf.buffer = kernel + kern16_size;
+ kbuf.bufsz = kernel_len - kern16_size;
+ kbuf.memsz = PAGE_ALIGN(header->init_size);
+ kbuf.buf_align = header->kernel_alignment;
+ kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
+ ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
+ kernel_load_addr = kbuf.mem;
pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- kernel_load_addr, kernel_memsz, kernel_memsz);
+ kernel_load_addr, kbuf.bufsz, kbuf.memsz);
/* Load initrd high */
if (initrd) {
- ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
- PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
- ULONG_MAX, 1, &initrd_load_addr);
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
+ ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
+ initrd_load_addr = kbuf.mem;
pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
initrd_load_addr, initrd_len, initrd_len);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index f09df2ff1bcc..d4a15831ac58 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -93,7 +93,7 @@ static void free_ldt_struct(struct ldt_struct *ldt)
paravirt_free_ldt(ldt->entries, ldt->size);
if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(ldt->entries);
+ vfree_atomic(ldt->entries);
else
free_page((unsigned long)ldt->entries);
kfree(ldt);
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 8c1f218926d7..307b1f4543de 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -328,7 +328,7 @@ void machine_kexec(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
- VMCOREINFO_SYMBOL(phys_base);
+ VMCOREINFO_NUMBER(phys_base);
VMCOREINFO_SYMBOL(init_level4_pgt);
#ifdef CONFIG_NUMA
@@ -337,9 +337,7 @@ void arch_crash_save_vmcoreinfo(void)
#endif
vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
kaslr_offset());
- VMCOREINFO_PAGE_OFFSET(PAGE_OFFSET);
- VMCOREINFO_VMALLOC_START(VMALLOC_START);
- VMCOREINFO_VMEMMAP_START(VMEMMAP_START);
+ VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
}
/* arch-dependent functionality related to kexec file-based syscall */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9c337b0e8ba7..4cfba947d774 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -985,6 +985,30 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+ * Memory used by the kernel cannot be hot-removed because Linux
+ * cannot migrate the kernel pages. When memory hotplug is
+ * enabled, we should prevent memblock from allocating memory
+ * for the kernel.
+ *
+ * ACPI SRAT records all hotpluggable memory ranges. But before
+ * SRAT is parsed, we don't know about it.
+ *
+ * The kernel image is loaded into memory at very early time. We
+ * cannot prevent this anyway. So on NUMA system, we set any
+ * node the kernel resides in as un-hotpluggable.
+ *
+ * Since on modern servers, one node could have double-digit
+ * gigabytes memory, we can assume the memory around the kernel
+ * image is also un-hotpluggable. So before SRAT is parsed, just
+ * allocate memory near the kernel image to try the best to keep
+ * the kernel away from hotpluggable memory.
+ */
+ if (movable_node_is_enabled())
+ memblock_set_bottom_up(true);
+#endif
+
x86_report_nx();
/* after early param, so could get panic from serial */
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
index 1c113db9ed57..15515132bf0d 100644
--- a/arch/x86/kernel/tracepoint.c
+++ b/arch/x86/kernel/tracepoint.c
@@ -34,7 +34,7 @@ static void switch_idt(void *arg)
local_irq_restore(flags);
}
-void trace_irq_vector_regfunc(void)
+int trace_irq_vector_regfunc(void)
{
mutex_lock(&irq_vector_mutex);
if (!trace_irq_vector_refcount) {
@@ -44,6 +44,7 @@ void trace_irq_vector_regfunc(void)
}
trace_irq_vector_refcount++;
mutex_unlock(&irq_vector_mutex);
+ return 0;
}
void trace_irq_vector_unregfunc(void)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0aefb626fa8f..b2d3cf1ef54a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,6 +16,7 @@
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
+#include <asm/processor.h>
#include <asm/user.h>
#include <asm/fpu/xstate.h>
#include "cpuid.h"
@@ -64,6 +65,11 @@ u64 kvm_supported_xcr0(void)
#define F(x) bit(X86_FEATURE_##x)
+/* These are scattered features in cpufeatures.h. */
+#define KVM_CPUID_BIT_AVX512_4VNNIW 2
+#define KVM_CPUID_BIT_AVX512_4FMAPS 3
+#define KF(x) bit(KVM_CPUID_BIT_##x)
+
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -80,6 +86,10 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
best->ecx |= F(OSXSAVE);
}
+ best->edx &= ~F(APIC);
+ if (vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)
+ best->edx |= F(APIC);
+
if (apic) {
if (best->ecx & F(TSC_DEADLINE_TIMER))
apic->lapic_timer.timer_mode_mask = 3 << 17;
@@ -374,6 +384,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
+ /* cpuid 7.0.edx*/
+ const u32 kvm_cpuid_7_0_edx_x86_features =
+ KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
+
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -456,12 +470,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled)
entry->ecx &= ~F(PKU);
+ entry->edx &= kvm_cpuid_7_0_edx_x86_features;
+ entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);
} else {
entry->ebx = 0;
entry->ecx = 0;
+ entry->edx = 0;
}
entry->eax = 0;
- entry->edx = 0;
break;
}
case 9:
@@ -861,17 +877,17 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
}
EXPORT_SYMBOL_GPL(kvm_cpuid);
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
- u32 function, eax, ebx, ecx, edx;
+ u32 eax, ebx, ecx, edx;
- function = eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
- kvm_x86_ops->skip_emulated_instruction(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a3ce9d260d68..56628a44668b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -158,9 +158,11 @@
#define Src2GS (OpGS << Src2Shift)
#define Src2Mask (OpMask << Src2Shift)
#define Mmx ((u64)1 << 40) /* MMX Vector instruction */
+#define AlignMask ((u64)7 << 41)
#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
-#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
-#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
+#define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
+#define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
+#define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
#define NoWrite ((u64)1 << 45) /* No writeback */
#define SrcWrite ((u64)1 << 46) /* Write back src operand */
@@ -446,6 +448,26 @@ FOP_END;
FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
FOP_END;
+/*
+ * XXX: inoutclob user must know where the argument is being expanded.
+ * Relying on CC_HAVE_ASM_GOTO would allow us to remove _fault.
+ */
+#define asm_safe(insn, inoutclob...) \
+({ \
+ int _fault = 0; \
+ \
+ asm volatile("1:" insn "\n" \
+ "2:\n" \
+ ".pushsection .fixup, \"ax\"\n" \
+ "3: movl $1, %[_fault]\n" \
+ " jmp 2b\n" \
+ ".popsection\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [_fault] "+qm"(_fault) inoutclob ); \
+ \
+ _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
+})
+
static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
enum x86_intercept intercept,
enum x86_intercept_stage stage)
@@ -632,21 +654,26 @@ static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
* depending on whether they're AVX encoded or not.
*
* Also included is CMPXCHG16B which is not a vector instruction, yet it is
- * subject to the same check.
+ * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
+ * 512 bytes of data must be aligned to a 16 byte boundary.
*/
-static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
+static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
{
- if (likely(size < 16))
- return false;
+ u64 alignment = ctxt->d & AlignMask;
- if (ctxt->d & Aligned)
- return true;
- else if (ctxt->d & Unaligned)
- return false;
- else if (ctxt->d & Avx)
- return false;
- else
- return true;
+ if (likely(size < 16))
+ return 1;
+
+ switch (alignment) {
+ case Unaligned:
+ case Avx:
+ return 1;
+ case Aligned16:
+ return 16;
+ case Aligned:
+ default:
+ return size;
+ }
}
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
@@ -704,7 +731,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
}
break;
}
- if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
+ if (la & (insn_alignment(ctxt, size) - 1))
return emulate_gp(ctxt, 0);
return X86EMUL_CONTINUE;
bad:
@@ -3842,6 +3869,131 @@ static int em_movsxd(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int check_fxsr(struct x86_emulate_ctxt *ctxt)
+{
+ u32 eax = 1, ebx, ecx = 0, edx;
+
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+ if (!(edx & FFL(FXSR)))
+ return emulate_ud(ctxt);
+
+ if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
+ return emulate_nm(ctxt);
+
+ /*
+ * Don't emulate a case that should never be hit, instead of working
+ * around a lack of fxsave64/fxrstor64 on old compilers.
+ */
+ if (ctxt->mode >= X86EMUL_MODE_PROT64)
+ return X86EMUL_UNHANDLEABLE;
+
+ return X86EMUL_CONTINUE;
+}
+
+/*
+ * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
+ * 1) 16 bit mode
+ * 2) 32 bit mode
+ * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
+ * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
+ * save and restore
+ * 3) 64-bit mode with REX.W prefix
+ * - like (2), but XMM 8-15 are being saved and restored
+ * 4) 64-bit mode without REX.W prefix
+ * - like (3), but FIP and FDP are 64 bit
+ *
+ * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
+ * desired result. (4) is not emulated.
+ *
+ * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
+ * and FPU DS) should match.
+ */
+static int em_fxsave(struct x86_emulate_ctxt *ctxt)
+{
+ struct fxregs_state fx_state;
+ size_t size;
+ int rc;
+
+ rc = check_fxsr(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ ctxt->ops->get_fpu(ctxt);
+
+ rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
+
+ ctxt->ops->put_fpu(ctxt);
+
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)
+ size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]);
+ else
+ size = offsetof(struct fxregs_state, xmm_space[0]);
+
+ return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+}
+
+static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
+ struct fxregs_state *new)
+{
+ int rc = X86EMUL_CONTINUE;
+ struct fxregs_state old;
+
+ rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old));
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ /*
+ * 64 bit host will restore XMM 8-15, which is not correct on non-64
+ * bit guests. Load the current values in order to preserve 64 bit
+ * XMMs after fxrstor.
+ */
+#ifdef CONFIG_X86_64
+ /* XXX: accessing XMM 8-15 very awkwardly */
+ memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16);
+#endif
+
+ /*
+ * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but
+ * does save and restore MXCSR.
+ */
+ if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))
+ memcpy(new->xmm_space, old.xmm_space, 8 * 16);
+
+ return rc;
+}
+
+static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
+{
+ struct fxregs_state fx_state;
+ int rc;
+
+ rc = check_fxsr(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ if (fx_state.mxcsr >> 16)
+ return emulate_gp(ctxt, 0);
+
+ ctxt->ops->get_fpu(ctxt);
+
+ if (ctxt->mode < X86EMUL_MODE_PROT64)
+ rc = fxrstor_fixup(ctxt, &fx_state);
+
+ if (rc == X86EMUL_CONTINUE)
+ rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
+
+ ctxt->ops->put_fpu(ctxt);
+
+ return rc;
+}
+
static bool valid_cr(int nr)
{
switch (nr) {
@@ -4194,7 +4346,9 @@ static const struct gprefix pfx_0f_ae_7 = {
};
static const struct group_dual group15 = { {
- N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
+ I(ModRM | Aligned16, em_fxsave),
+ I(ModRM | Aligned16, em_fxrstor),
+ N, N, N, N, N, GP(0, &pfx_0f_ae_7),
}, {
N, N, N, N, N, N, N, N,
} };
@@ -5066,21 +5220,13 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
{
- bool fault = false;
+ int rc;
ctxt->ops->get_fpu(ctxt);
- asm volatile("1: fwait \n\t"
- "2: \n\t"
- ".pushsection .fixup,\"ax\" \n\t"
- "3: \n\t"
- "movb $1, %[fault] \n\t"
- "jmp 2b \n\t"
- ".popsection \n\t"
- _ASM_EXTABLE(1b, 3b)
- : [fault]"+qm"(fault));
+ rc = asm_safe("fwait");
ctxt->ops->put_fpu(ctxt);
- if (unlikely(fault))
+ if (unlikely(rc != X86EMUL_CONTINUE))
return emulate_exception(ctxt, MF_VECTOR, 0, false);
return X86EMUL_CONTINUE;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 42b1c83741c8..99cde5220e07 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -291,7 +291,7 @@ static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata)
return ret;
}
-int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
+static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
{
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
struct kvm_lapic_irq irq;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 16a7134eedac..a78b445ce411 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -212,7 +212,7 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
*/
smp_mb();
if (atomic_dec_if_positive(&ps->pending) > 0)
- kthread_queue_work(&pit->worker, &pit->expired);
+ kthread_queue_work(pit->worker, &pit->expired);
}
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -272,7 +272,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
if (atomic_read(&ps->reinject))
atomic_inc(&ps->pending);
- kthread_queue_work(&pt->worker, &pt->expired);
+ kthread_queue_work(pt->worker, &pt->expired);
if (ps->is_periodic) {
hrtimer_add_expires_ns(&ps->timer, ps->period);
@@ -667,10 +667,8 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
pid_nr = pid_vnr(pid);
put_pid(pid);
- kthread_init_worker(&pit->worker);
- pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker,
- "kvm-pit/%d", pid_nr);
- if (IS_ERR(pit->worker_task))
+ pit->worker = kthread_create_worker(0, "kvm-pit/%d", pid_nr);
+ if (IS_ERR(pit->worker))
goto fail_kthread;
kthread_init_work(&pit->expired, pit_do_work);
@@ -713,7 +711,7 @@ fail_register_speaker:
fail_register_pit:
mutex_unlock(&kvm->slots_lock);
kvm_pit_set_reinject(pit, false);
- kthread_stop(pit->worker_task);
+ kthread_destroy_worker(pit->worker);
fail_kthread:
kvm_free_irq_source_id(kvm, pit->irq_source_id);
fail_request:
@@ -730,8 +728,7 @@ void kvm_free_pit(struct kvm *kvm)
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
kvm_pit_set_reinject(pit, false);
hrtimer_cancel(&pit->pit_state.timer);
- kthread_flush_work(&pit->expired);
- kthread_stop(pit->worker_task);
+ kthread_destroy_worker(pit->worker);
kvm_free_irq_source_id(kvm, pit->irq_source_id);
kfree(pit);
}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 2f5af0798326..600bee9dcbbd 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -44,8 +44,7 @@ struct kvm_pit {
struct kvm_kpit_state pit_state;
int irq_source_id;
struct kvm_irq_mask_notifier mask_notifier;
- struct kthread_worker worker;
- struct task_struct *worker_task;
+ struct kthread_worker *worker;
struct kthread_work expired;
};
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6f69340f9fa3..34a66b2d47e6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -342,9 +342,11 @@ void __kvm_apic_update_irr(u32 *pir, void *regs)
u32 i, pir_val;
for (i = 0; i <= 7; i++) {
- pir_val = xchg(&pir[i], 0);
- if (pir_val)
+ pir_val = READ_ONCE(pir[i]);
+ if (pir_val) {
+ pir_val = xchg(&pir[i], 0);
*((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val;
+ }
}
}
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
@@ -1090,7 +1092,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
- ktime_t remaining;
+ ktime_t remaining, now;
s64 ns;
u32 tmcct;
@@ -1101,7 +1103,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
apic->lapic_timer.period == 0)
return 0;
- remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
+ now = ktime_get();
+ remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
if (ktime_to_ns(remaining) < 0)
remaining = ktime_set(0, 0);
@@ -1332,7 +1335,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_save(flags);
- now = apic->lapic_timer.timer.base->get_time();
+ now = ktime_get();
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
if (likely(tscdeadline > guest_tsc)) {
ns = (tscdeadline - guest_tsc) * 1000000ULL;
@@ -1347,6 +1350,79 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_restore(flags);
}
+static void start_sw_period(struct kvm_lapic *apic)
+{
+ if (!apic->lapic_timer.period)
+ return;
+
+ if (apic_lvtt_oneshot(apic) &&
+ ktime_after(ktime_get(),
+ apic->lapic_timer.target_expiration)) {
+ apic_timer_expired(apic);
+ return;
+ }
+
+ hrtimer_start(&apic->lapic_timer.timer,
+ apic->lapic_timer.target_expiration,
+ HRTIMER_MODE_ABS_PINNED);
+}
+
+static bool set_target_expiration(struct kvm_lapic *apic)
+{
+ ktime_t now;
+ u64 tscl = rdtsc();
+
+ now = ktime_get();
+ apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
+ * APIC_BUS_CYCLE_NS * apic->divide_count;
+
+ if (!apic->lapic_timer.period)
+ return false;
+
+ /*
+ * Do not allow the guest to program periodic timers with small
+ * interval, since the hrtimers are not throttled by the host
+ * scheduler.
+ */
+ if (apic_lvtt_period(apic)) {
+ s64 min_period = min_timer_period_us * 1000LL;
+
+ if (apic->lapic_timer.period < min_period) {
+ pr_info_ratelimited(
+ "kvm: vcpu %i: requested %lld ns "
+ "lapic timer period limited to %lld ns\n",
+ apic->vcpu->vcpu_id,
+ apic->lapic_timer.period, min_period);
+ apic->lapic_timer.period = min_period;
+ }
+ }
+
+ apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
+ PRIx64 ", "
+ "timer initial count 0x%x, period %lldns, "
+ "expire @ 0x%016" PRIx64 ".\n", __func__,
+ APIC_BUS_CYCLE_NS, ktime_to_ns(now),
+ kvm_lapic_get_reg(apic, APIC_TMICT),
+ apic->lapic_timer.period,
+ ktime_to_ns(ktime_add_ns(now,
+ apic->lapic_timer.period)));
+
+ apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+ nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
+
+ return true;
+}
+
+static void advance_periodic_target_expiration(struct kvm_lapic *apic)
+{
+ apic->lapic_timer.tscdeadline +=
+ nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ apic->lapic_timer.target_expiration =
+ ktime_add_ns(apic->lapic_timer.target_expiration,
+ apic->lapic_timer.period);
+}
+
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
@@ -1356,52 +1432,59 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
-static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
+static void cancel_hv_timer(struct kvm_lapic *apic)
{
kvm_x86_ops->cancel_hv_timer(apic->vcpu);
apic->lapic_timer.hv_timer_in_use = false;
}
-void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- WARN_ON(!apic->lapic_timer.hv_timer_in_use);
- WARN_ON(swait_active(&vcpu->wq));
- cancel_hv_tscdeadline(apic);
- apic_timer_expired(apic);
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
-
-static bool start_hv_tscdeadline(struct kvm_lapic *apic)
+static bool start_hv_timer(struct kvm_lapic *apic)
{
u64 tscdeadline = apic->lapic_timer.tscdeadline;
- if (atomic_read(&apic->lapic_timer.pending) ||
+ if ((atomic_read(&apic->lapic_timer.pending) &&
+ !apic_lvtt_period(apic)) ||
kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
if (apic->lapic_timer.hv_timer_in_use)
- cancel_hv_tscdeadline(apic);
+ cancel_hv_timer(apic);
} else {
apic->lapic_timer.hv_timer_in_use = true;
hrtimer_cancel(&apic->lapic_timer.timer);
/* In case the sw timer triggered in the window */
- if (atomic_read(&apic->lapic_timer.pending))
- cancel_hv_tscdeadline(apic);
+ if (atomic_read(&apic->lapic_timer.pending) &&
+ !apic_lvtt_period(apic))
+ cancel_hv_timer(apic);
}
trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
apic->lapic_timer.hv_timer_in_use);
return apic->lapic_timer.hv_timer_in_use;
}
+void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+ WARN_ON(swait_active(&vcpu->wq));
+ cancel_hv_timer(apic);
+ apic_timer_expired(apic);
+
+ if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
+ advance_periodic_target_expiration(apic);
+ if (!start_hv_timer(apic))
+ start_sw_period(apic);
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
+
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
WARN_ON(apic->lapic_timer.hv_timer_in_use);
- if (apic_lvtt_tscdeadline(apic))
- start_hv_tscdeadline(apic);
+ start_hv_timer(apic);
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
@@ -1413,62 +1496,28 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
if (!apic->lapic_timer.hv_timer_in_use)
return;
- cancel_hv_tscdeadline(apic);
+ cancel_hv_timer(apic);
if (atomic_read(&apic->lapic_timer.pending))
return;
- start_sw_tscdeadline(apic);
+ if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
+ start_sw_period(apic);
+ else if (apic_lvtt_tscdeadline(apic))
+ start_sw_tscdeadline(apic);
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
static void start_apic_timer(struct kvm_lapic *apic)
{
- ktime_t now;
-
atomic_set(&apic->lapic_timer.pending, 0);
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
- /* lapic timer in oneshot or periodic mode */
- now = apic->lapic_timer.timer.base->get_time();
- apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
- * APIC_BUS_CYCLE_NS * apic->divide_count;
-
- if (!apic->lapic_timer.period)
- return;
- /*
- * Do not allow the guest to program periodic timers with small
- * interval, since the hrtimers are not throttled by the host
- * scheduler.
- */
- if (apic_lvtt_period(apic)) {
- s64 min_period = min_timer_period_us * 1000LL;
-
- if (apic->lapic_timer.period < min_period) {
- pr_info_ratelimited(
- "kvm: vcpu %i: requested %lld ns "
- "lapic timer period limited to %lld ns\n",
- apic->vcpu->vcpu_id,
- apic->lapic_timer.period, min_period);
- apic->lapic_timer.period = min_period;
- }
- }
-
- hrtimer_start(&apic->lapic_timer.timer,
- ktime_add_ns(now, apic->lapic_timer.period),
- HRTIMER_MODE_ABS_PINNED);
-
- apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
- PRIx64 ", "
- "timer initial count 0x%x, period %lldns, "
- "expire @ 0x%016" PRIx64 ".\n", __func__,
- APIC_BUS_CYCLE_NS, ktime_to_ns(now),
- kvm_lapic_get_reg(apic, APIC_TMICT),
- apic->lapic_timer.period,
- ktime_to_ns(ktime_add_ns(now,
- apic->lapic_timer.period)));
+ if (set_target_expiration(apic) &&
+ !(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
+ start_sw_period(apic);
} else if (apic_lvtt_tscdeadline(apic)) {
- if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
+ if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
start_sw_tscdeadline(apic);
}
}
@@ -1701,13 +1750,22 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
* LAPIC interface
*----------------------------------------------------------------------
*/
+u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ if (!lapic_in_kernel(vcpu))
+ return 0;
+
+ return apic->lapic_timer.tscdeadline;
+}
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
- apic_lvtt_period(apic))
+ if (!lapic_in_kernel(vcpu) ||
+ !apic_lvtt_tscdeadline(apic))
return 0;
return apic->lapic_timer.tscdeadline;
@@ -1748,14 +1806,17 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
u64 old_value = vcpu->arch.apic_base;
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!apic) {
+ if (!apic)
value |= MSR_IA32_APICBASE_BSP;
- vcpu->arch.apic_base = value;
- return;
- }
vcpu->arch.apic_base = value;
+ if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
+ kvm_update_cpuid(vcpu);
+
+ if (!apic)
+ return;
+
/* update jump label if enable bit changes */
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
if (value & MSR_IA32_APICBASE_ENABLE) {
@@ -1909,6 +1970,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
apic_timer_expired(apic);
if (lapic_is_periodic(apic)) {
+ advance_periodic_target_expiration(apic);
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
return HRTIMER_RESTART;
} else
@@ -1993,6 +2055,10 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
kvm_apic_local_deliver(apic, APIC_LVTT);
if (apic_lvtt_tscdeadline(apic))
apic->lapic_timer.tscdeadline = 0;
+ if (apic_lvtt_oneshot(apic)) {
+ apic->lapic_timer.tscdeadline = 0;
+ apic->lapic_timer.target_expiration = ktime_set(0, 0);
+ }
atomic_set(&apic->lapic_timer.pending, 0);
}
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index f60d01c29d51..e0c80233b3e1 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -15,6 +15,7 @@
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
+ ktime_t target_expiration;
u32 timer_mode;
u32 timer_mode_mask;
u64 tscdeadline;
@@ -85,6 +86,7 @@ int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
+u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d9c7e986b4e4..7012de4a1fed 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1660,17 +1660,9 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
* This has some overhead, but not as much as the cost of swapping
* out actively used pages or breaking up actively used hugepages.
*/
- if (!shadow_accessed_mask) {
- /*
- * We are holding the kvm->mmu_lock, and we are blowing up
- * shadow PTEs. MMU notifier consumers need to be kept at bay.
- * This is correct as long as we don't decouple the mmu_lock
- * protected regions (like invalidate_range_start|end does).
- */
- kvm->mmu_notifier_seq++;
+ if (!shadow_accessed_mask)
return kvm_handle_hva_range(kvm, start, end, 0,
kvm_unmap_rmapp);
- }
return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
}
@@ -4405,7 +4397,8 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
}
static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes)
+ const u8 *new, int bytes,
+ struct kvm_page_track_notifier_node *node)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *sp;
@@ -4508,7 +4501,7 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
}
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
void *insn, int insn_len)
{
int r, emulation_type = EMULTYPE_RETRY;
@@ -4527,12 +4520,28 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
return r;
}
- r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
+ r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
+ false);
if (r < 0)
return r;
if (!r)
return 1;
+ /*
+ * Before emulating the instruction, check if the error code
+ * was due to a RO violation while translating the guest page.
+ * This can occur when using nested virtualization with nested
+ * paging in both guests. If true, we simply unprotect the page
+ * and resume the guest.
+ *
+ * Note: AMD only (since it supports the PFERR_GUEST_PAGE_MASK used
+ * in PFERR_NEXT_GUEST_PAGE)
+ */
+ if (error_code == PFERR_NESTED_GUEST_PAGE) {
+ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
+ return 1;
+ }
+
if (mmio_info_in_cache(vcpu, cr2, direct))
emulation_type = 0;
emulate:
@@ -4617,11 +4626,19 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
init_kvm_mmu(vcpu);
}
+static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ struct kvm_page_track_notifier_node *node)
+{
+ kvm_mmu_invalidate_zap_all_pages(kvm);
+}
+
void kvm_mmu_init_vm(struct kvm *kvm)
{
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
node->track_write = kvm_mmu_pte_write;
+ node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
kvm_page_track_register_notifier(kvm, node);
}
@@ -4958,7 +4975,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots)
* zap all shadow pages.
*/
if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) {
- printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n");
+ kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
kvm_mmu_invalidate_zap_all_pages(kvm);
}
}
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index b431539c3714..4a1c13eaa518 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -106,6 +106,7 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
kvm_flush_remote_tlbs(kvm);
}
+EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
/*
* remove the guest page from the tracking pool which stops the interception
@@ -135,6 +136,7 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
*/
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
+EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
/*
* check if the corresponding access on the specified guest page is tracked.
@@ -181,6 +183,7 @@ kvm_page_track_register_notifier(struct kvm *kvm,
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
spin_unlock(&kvm->mmu_lock);
}
+EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
/*
* stop receiving the event interception. It is the opposed operation of
@@ -199,6 +202,7 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
spin_unlock(&kvm->mmu_lock);
synchronize_srcu(&head->track_srcu);
}
+EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
/*
* Notify the node that write access is intercepted and write emulation is
@@ -222,6 +226,31 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
idx = srcu_read_lock(&head->track_srcu);
hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
if (n->track_write)
- n->track_write(vcpu, gpa, new, bytes);
+ n->track_write(vcpu, gpa, new, bytes, n);
+ srcu_read_unlock(&head->track_srcu, idx);
+}
+
+/*
+ * Notify the node that memory slot is being removed or moved so that it can
+ * drop write-protection for the pages in the memory slot.
+ *
+ * The node should figure out it has any write-protected pages in this slot
+ * by itself.
+ */
+void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+ struct kvm_page_track_notifier_head *head;
+ struct kvm_page_track_notifier_node *n;
+ int idx;
+
+ head = &kvm->arch.track_notifier_head;
+
+ if (hlist_empty(&head->track_notifier_list))
+ return;
+
+ idx = srcu_read_lock(&head->track_srcu);
+ hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+ if (n->track_flush_slot)
+ n->track_flush_slot(kvm, slot, n);
srcu_read_unlock(&head->track_srcu, idx);
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8ca1eca5038d..08a4d3ab3455 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2074,7 +2074,7 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
static int pf_interception(struct vcpu_svm *svm)
{
u64 fault_address = svm->vmcb->control.exit_info_2;
- u32 error_code;
+ u64 error_code;
int r = 1;
switch (svm->apf_reason) {
@@ -2270,7 +2270,7 @@ static int io_interception(struct vcpu_svm *svm)
++svm->vcpu.stat.io_exits;
string = (io_info & SVM_IOIO_STR_MASK) != 0;
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
- if (string || in)
+ if (string)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = io_info >> 16;
@@ -2278,7 +2278,8 @@ static int io_interception(struct vcpu_svm *svm)
svm->next_rip = svm->vmcb->control.exit_info_2;
skip_emulated_instruction(&svm->vcpu);
- return kvm_fast_pio_out(vcpu, size, port);
+ return in ? kvm_fast_pio_in(vcpu, size, port)
+ : kvm_fast_pio_out(vcpu, size, port);
}
static int nmi_interception(struct vcpu_svm *svm)
@@ -3150,8 +3151,7 @@ static int skinit_interception(struct vcpu_svm *svm)
static int wbinvd_interception(struct vcpu_svm *svm)
{
- kvm_emulate_wbinvd(&svm->vcpu);
- return 1;
+ return kvm_emulate_wbinvd(&svm->vcpu);
}
static int xsetbv_interception(struct vcpu_svm *svm)
@@ -3238,8 +3238,7 @@ static int task_switch_interception(struct vcpu_svm *svm)
static int cpuid_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
- kvm_emulate_cpuid(&svm->vcpu);
- return 1;
+ return kvm_emulate_cpuid(&svm->vcpu);
}
static int iret_interception(struct vcpu_svm *svm)
@@ -3275,9 +3274,7 @@ static int rdpmc_interception(struct vcpu_svm *svm)
return emulate_on_interception(svm);
err = kvm_rdpmc(&svm->vcpu);
- kvm_complete_insn_gp(&svm->vcpu, err);
-
- return 1;
+ return kvm_complete_insn_gp(&svm->vcpu, err);
}
static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
@@ -3374,9 +3371,7 @@ static int cr_interception(struct vcpu_svm *svm)
}
kvm_register_write(&svm->vcpu, reg, val);
}
- kvm_complete_insn_gp(&svm->vcpu, err);
-
- return 1;
+ return kvm_complete_insn_gp(&svm->vcpu, err);
}
static int dr_interception(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3980da515fd0..aae43c6f2472 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -132,6 +132,22 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
+#define VMX_VPID_EXTENT_SUPPORTED_MASK \
+ (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
+ VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
+ VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
+ VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
+
+/*
+ * Hyper-V requires all of these, so mark them as supported even though
+ * they are just treated the same as all-context.
+ */
+#define VMX_VPID_EXTENT_SUPPORTED_MASK \
+ (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
+ VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
+ VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
+ VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
+
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
@@ -446,23 +462,31 @@ struct nested_vmx {
u16 vpid02;
u16 last_vpid;
+ /*
+ * We only store the "true" versions of the VMX capability MSRs. We
+ * generate the "non-true" versions by setting the must-be-1 bits
+ * according to the SDM.
+ */
u32 nested_vmx_procbased_ctls_low;
u32 nested_vmx_procbased_ctls_high;
- u32 nested_vmx_true_procbased_ctls_low;
u32 nested_vmx_secondary_ctls_low;
u32 nested_vmx_secondary_ctls_high;
u32 nested_vmx_pinbased_ctls_low;
u32 nested_vmx_pinbased_ctls_high;
u32 nested_vmx_exit_ctls_low;
u32 nested_vmx_exit_ctls_high;
- u32 nested_vmx_true_exit_ctls_low;
u32 nested_vmx_entry_ctls_low;
u32 nested_vmx_entry_ctls_high;
- u32 nested_vmx_true_entry_ctls_low;
u32 nested_vmx_misc_low;
u32 nested_vmx_misc_high;
u32 nested_vmx_ept_caps;
u32 nested_vmx_vpid_caps;
+ u64 nested_vmx_basic;
+ u64 nested_vmx_cr0_fixed0;
+ u64 nested_vmx_cr0_fixed1;
+ u64 nested_vmx_cr4_fixed0;
+ u64 nested_vmx_cr4_fixed1;
+ u64 nested_vmx_vmcs_enum;
};
#define POSTED_INTR_ON 0
@@ -520,6 +544,12 @@ static inline void pi_set_sn(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
static inline int pi_test_on(struct pi_desc *pi_desc)
{
return test_bit(POSTED_INTR_ON,
@@ -920,16 +950,32 @@ static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
-static unsigned long *vmx_io_bitmap_a;
-static unsigned long *vmx_io_bitmap_b;
-static unsigned long *vmx_msr_bitmap_legacy;
-static unsigned long *vmx_msr_bitmap_longmode;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
-static unsigned long *vmx_vmread_bitmap;
-static unsigned long *vmx_vmwrite_bitmap;
+enum {
+ VMX_IO_BITMAP_A,
+ VMX_IO_BITMAP_B,
+ VMX_MSR_BITMAP_LEGACY,
+ VMX_MSR_BITMAP_LONGMODE,
+ VMX_MSR_BITMAP_LEGACY_X2APIC_APICV,
+ VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV,
+ VMX_MSR_BITMAP_LEGACY_X2APIC,
+ VMX_MSR_BITMAP_LONGMODE_X2APIC,
+ VMX_VMREAD_BITMAP,
+ VMX_VMWRITE_BITMAP,
+ VMX_BITMAP_NR
+};
+
+static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
+
+#define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A])
+#define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B])
+#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
+#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
+#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
+#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
+#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
+#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
+#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
+#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
static bool cpu_has_load_ia32_efer;
static bool cpu_has_load_perf_global_ctrl;
@@ -2523,14 +2569,14 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
+ msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv;
else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
+ msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv;
} else {
if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
+ msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+ msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
}
} else {
if (is_long_mode(vcpu))
@@ -2706,9 +2752,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
/* We support free control of debug control saving. */
- vmx->nested.nested_vmx_true_exit_ctls_low =
- vmx->nested.nested_vmx_exit_ctls_low &
- ~VM_EXIT_SAVE_DEBUG_CONTROLS;
+ vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2727,9 +2771,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
/* We support free control of debug control loading. */
- vmx->nested.nested_vmx_true_entry_ctls_low =
- vmx->nested.nested_vmx_entry_ctls_low &
- ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
+ vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
/* cpu-based controls */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2762,8 +2804,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
CPU_BASED_USE_MSR_BITMAPS;
/* We support free control of CR3 access interception. */
- vmx->nested.nested_vmx_true_procbased_ctls_low =
- vmx->nested.nested_vmx_procbased_ctls_low &
+ vmx->nested.nested_vmx_procbased_ctls_low &=
~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
/* secondary cpu-based controls */
@@ -2774,6 +2815,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_secondary_ctls_high &=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
+ SECONDARY_EXEC_DESC |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -2805,8 +2847,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
*/
if (enable_vpid)
vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
- VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |
- VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
+ VMX_VPID_EXTENT_SUPPORTED_MASK;
else
vmx->nested.nested_vmx_vpid_caps = 0;
@@ -2823,14 +2864,52 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
VMX_MISC_ACTIVITY_HLT;
vmx->nested.nested_vmx_misc_high = 0;
+
+ /*
+ * This MSR reports some information about VMX support. We
+ * should return information about the VMX we emulate for the
+ * guest, and the VMCS structure we give it - not about the
+ * VMX support of the underlying hardware.
+ */
+ vmx->nested.nested_vmx_basic =
+ VMCS12_REVISION |
+ VMX_BASIC_TRUE_CTLS |
+ ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
+ (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
+
+ if (cpu_has_vmx_basic_inout())
+ vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
+
+ /*
+ * These MSRs specify bits which the guest must keep fixed on
+ * while L1 is in VMXON mode (in L1's root mode, or running an L2).
+ * We picked the standard core2 setting.
+ */
+#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
+#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
+ vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
+ vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
+
+ /* These MSRs specify bits which the guest must keep fixed off. */
+ rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1);
+ rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
+
+ /* highest index: VMX_PREEMPTION_TIMER_VALUE */
+ vmx->nested.nested_vmx_vmcs_enum = 0x2e;
+}
+
+/*
+ * if fixed0[i] == 1: val[i] must be 1
+ * if fixed1[i] == 0: val[i] must be 0
+ */
+static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
+{
+ return ((val & fixed1) | fixed0) == val;
}
static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
{
- /*
- * Bits 0 in high must be 0, and bits 1 in low must be 1.
- */
- return ((control & high) | low) == control;
+ return fixed_bits_valid(control, low, high);
}
static inline u64 vmx_control_msr(u32 low, u32 high)
@@ -2838,87 +2917,285 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
return low | ((u64)high << 32);
}
-/* Returns 0 on success, non-0 otherwise. */
-static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
+{
+ superset &= mask;
+ subset &= mask;
+
+ return (superset | subset) == superset;
+}
+
+static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
+{
+ const u64 feature_and_reserved =
+ /* feature (except bit 48; see below) */
+ BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
+ /* reserved */
+ BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
+ u64 vmx_basic = vmx->nested.nested_vmx_basic;
+
+ if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
+ return -EINVAL;
+
+ /*
+ * KVM does not emulate a version of VMX that constrains physical
+ * addresses of VMX structures (e.g. VMCS) to 32-bits.
+ */
+ if (data & BIT_ULL(48))
+ return -EINVAL;
+
+ if (vmx_basic_vmcs_revision_id(vmx_basic) !=
+ vmx_basic_vmcs_revision_id(data))
+ return -EINVAL;
+
+ if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
+ return -EINVAL;
+
+ vmx->nested.nested_vmx_basic = data;
+ return 0;
+}
+
+static int
+vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
+{
+ u64 supported;
+ u32 *lowp, *highp;
+
+ switch (msr_index) {
+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+ lowp = &vmx->nested.nested_vmx_pinbased_ctls_low;
+ highp = &vmx->nested.nested_vmx_pinbased_ctls_high;
+ break;
+ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+ lowp = &vmx->nested.nested_vmx_procbased_ctls_low;
+ highp = &vmx->nested.nested_vmx_procbased_ctls_high;
+ break;
+ case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+ lowp = &vmx->nested.nested_vmx_exit_ctls_low;
+ highp = &vmx->nested.nested_vmx_exit_ctls_high;
+ break;
+ case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+ lowp = &vmx->nested.nested_vmx_entry_ctls_low;
+ highp = &vmx->nested.nested_vmx_entry_ctls_high;
+ break;
+ case MSR_IA32_VMX_PROCBASED_CTLS2:
+ lowp = &vmx->nested.nested_vmx_secondary_ctls_low;
+ highp = &vmx->nested.nested_vmx_secondary_ctls_high;
+ break;
+ default:
+ BUG();
+ }
+
+ supported = vmx_control_msr(*lowp, *highp);
+
+ /* Check must-be-1 bits are still 1. */
+ if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
+ return -EINVAL;
+
+ /* Check must-be-0 bits are still 0. */
+ if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
+ return -EINVAL;
+
+ *lowp = data;
+ *highp = data >> 32;
+ return 0;
+}
+
+static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
+{
+ const u64 feature_and_reserved_bits =
+ /* feature */
+ BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
+ BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
+ /* reserved */
+ GENMASK_ULL(13, 9) | BIT_ULL(31);
+ u64 vmx_misc;
+
+ vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low,
+ vmx->nested.nested_vmx_misc_high);
+
+ if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
+ return -EINVAL;
+
+ if ((vmx->nested.nested_vmx_pinbased_ctls_high &
+ PIN_BASED_VMX_PREEMPTION_TIMER) &&
+ vmx_misc_preemption_timer_rate(data) !=
+ vmx_misc_preemption_timer_rate(vmx_misc))
+ return -EINVAL;
+
+ if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
+ return -EINVAL;
+
+ if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
+ return -EINVAL;
+
+ if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
+ return -EINVAL;
+
+ vmx->nested.nested_vmx_misc_low = data;
+ vmx->nested.nested_vmx_misc_high = data >> 32;
+ return 0;
+}
+
+static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
+{
+ u64 vmx_ept_vpid_cap;
+
+ vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps,
+ vmx->nested.nested_vmx_vpid_caps);
+
+ /* Every bit is either reserved or a feature bit. */
+ if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
+ return -EINVAL;
+
+ vmx->nested.nested_vmx_ept_caps = data;
+ vmx->nested.nested_vmx_vpid_caps = data >> 32;
+ return 0;
+}
+
+static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
+{
+ u64 *msr;
+
+ switch (msr_index) {
+ case MSR_IA32_VMX_CR0_FIXED0:
+ msr = &vmx->nested.nested_vmx_cr0_fixed0;
+ break;
+ case MSR_IA32_VMX_CR4_FIXED0:
+ msr = &vmx->nested.nested_vmx_cr4_fixed0;
+ break;
+ default:
+ BUG();
+ }
+
+ /*
+ * 1 bits (which indicates bits which "must-be-1" during VMX operation)
+ * must be 1 in the restored value.
+ */
+ if (!is_bitwise_subset(data, *msr, -1ULL))
+ return -EINVAL;
+
+ *msr = data;
+ return 0;
+}
+
+/*
+ * Called when userspace is restoring VMX MSRs.
+ *
+ * Returns 0 on success, non-0 otherwise.
+ */
+static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
switch (msr_index) {
case MSR_IA32_VMX_BASIC:
+ return vmx_restore_vmx_basic(vmx, data);
+ case MSR_IA32_VMX_PINBASED_CTLS:
+ case MSR_IA32_VMX_PROCBASED_CTLS:
+ case MSR_IA32_VMX_EXIT_CTLS:
+ case MSR_IA32_VMX_ENTRY_CTLS:
+ /*
+ * The "non-true" VMX capability MSRs are generated from the
+ * "true" MSRs, so we do not support restoring them directly.
+ *
+ * If userspace wants to emulate VMX_BASIC[55]=0, userspace
+ * should restore the "true" MSRs with the must-be-1 bits
+ * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
+ * DEFAULT SETTINGS".
+ */
+ return -EINVAL;
+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+ case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+ case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+ case MSR_IA32_VMX_PROCBASED_CTLS2:
+ return vmx_restore_control_msr(vmx, msr_index, data);
+ case MSR_IA32_VMX_MISC:
+ return vmx_restore_vmx_misc(vmx, data);
+ case MSR_IA32_VMX_CR0_FIXED0:
+ case MSR_IA32_VMX_CR4_FIXED0:
+ return vmx_restore_fixed0_msr(vmx, msr_index, data);
+ case MSR_IA32_VMX_CR0_FIXED1:
+ case MSR_IA32_VMX_CR4_FIXED1:
+ /*
+ * These MSRs are generated based on the vCPU's CPUID, so we
+ * do not support restoring them directly.
+ */
+ return -EINVAL;
+ case MSR_IA32_VMX_EPT_VPID_CAP:
+ return vmx_restore_vmx_ept_vpid_cap(vmx, data);
+ case MSR_IA32_VMX_VMCS_ENUM:
+ vmx->nested.nested_vmx_vmcs_enum = data;
+ return 0;
+ default:
/*
- * This MSR reports some information about VMX support. We
- * should return information about the VMX we emulate for the
- * guest, and the VMCS structure we give it - not about the
- * VMX support of the underlying hardware.
+ * The rest of the VMX capability MSRs do not support restore.
*/
- *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
- ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
- (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
- if (cpu_has_vmx_basic_inout())
- *pdata |= VMX_BASIC_INOUT;
+ return -EINVAL;
+ }
+}
+
+/* Returns 0 on success, non-0 otherwise. */
+static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ switch (msr_index) {
+ case MSR_IA32_VMX_BASIC:
+ *pdata = vmx->nested.nested_vmx_basic;
break;
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
*pdata = vmx_control_msr(
vmx->nested.nested_vmx_pinbased_ctls_low,
vmx->nested.nested_vmx_pinbased_ctls_high);
+ if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
+ *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
- *pdata = vmx_control_msr(
- vmx->nested.nested_vmx_true_procbased_ctls_low,
- vmx->nested.nested_vmx_procbased_ctls_high);
- break;
case MSR_IA32_VMX_PROCBASED_CTLS:
*pdata = vmx_control_msr(
vmx->nested.nested_vmx_procbased_ctls_low,
vmx->nested.nested_vmx_procbased_ctls_high);
+ if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
+ *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
- *pdata = vmx_control_msr(
- vmx->nested.nested_vmx_true_exit_ctls_low,
- vmx->nested.nested_vmx_exit_ctls_high);
- break;
case MSR_IA32_VMX_EXIT_CTLS:
*pdata = vmx_control_msr(
vmx->nested.nested_vmx_exit_ctls_low,
vmx->nested.nested_vmx_exit_ctls_high);
+ if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
+ *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
- *pdata = vmx_control_msr(
- vmx->nested.nested_vmx_true_entry_ctls_low,
- vmx->nested.nested_vmx_entry_ctls_high);
- break;
case MSR_IA32_VMX_ENTRY_CTLS:
*pdata = vmx_control_msr(
vmx->nested.nested_vmx_entry_ctls_low,
vmx->nested.nested_vmx_entry_ctls_high);
+ if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
+ *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_MISC:
*pdata = vmx_control_msr(
vmx->nested.nested_vmx_misc_low,
vmx->nested.nested_vmx_misc_high);
break;
- /*
- * These MSRs specify bits which the guest must keep fixed (on or off)
- * while L1 is in VMXON mode (in L1's root mode, or running an L2).
- * We picked the standard core2 setting.
- */
-#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
-#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
case MSR_IA32_VMX_CR0_FIXED0:
- *pdata = VMXON_CR0_ALWAYSON;
+ *pdata = vmx->nested.nested_vmx_cr0_fixed0;
break;
case MSR_IA32_VMX_CR0_FIXED1:
- *pdata = -1ULL;
+ *pdata = vmx->nested.nested_vmx_cr0_fixed1;
break;
case MSR_IA32_VMX_CR4_FIXED0:
- *pdata = VMXON_CR4_ALWAYSON;
+ *pdata = vmx->nested.nested_vmx_cr4_fixed0;
break;
case MSR_IA32_VMX_CR4_FIXED1:
- *pdata = -1ULL;
+ *pdata = vmx->nested.nested_vmx_cr4_fixed1;
break;
case MSR_IA32_VMX_VMCS_ENUM:
- *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */
+ *pdata = vmx->nested.nested_vmx_vmcs_enum;
break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
*pdata = vmx_control_msr(
@@ -3101,7 +3378,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx_leave_nested(vcpu);
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
- return 1; /* they are read-only */
+ if (!msr_info->host_initiated)
+ return 1; /* they are read-only */
+ if (!nested_vmx_allowed(vcpu))
+ return 1;
+ return vmx_set_vmx_msr(vcpu, msr_index, data);
case MSR_IA32_XSS:
if (!vmx_xsaves_supported())
return 1;
@@ -3863,6 +4144,40 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
(unsigned long *)&vcpu->arch.regs_dirty);
}
+static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+ if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
+ SECONDARY_EXEC_UNRESTRICTED_GUEST &&
+ nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
+ fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
+
+ return fixed_bits_valid(val, fixed0, fixed1);
+}
+
+static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+
+ return fixed_bits_valid(val, fixed0, fixed1);
+}
+
+static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed1;
+
+ return fixed_bits_valid(val, fixed0, fixed1);
+}
+
+/* No difference in the restrictions on guest and host CR4 in VMX operation. */
+#define nested_guest_cr4_valid nested_cr4_valid
+#define nested_host_cr4_valid nested_cr4_valid
+
static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
@@ -3991,8 +4306,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!nested_vmx_allowed(vcpu))
return 1;
}
- if (to_vmx(vcpu)->nested.vmxon &&
- ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON))
+
+ if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
return 1;
vcpu->arch.cr4 = cr4;
@@ -4569,41 +4884,6 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
}
}
-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
-{
- int f = sizeof(unsigned long);
-
- if (!cpu_has_vmx_msr_bitmap())
- return;
-
- /*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if (msr <= 0x1fff) {
- if (type & MSR_TYPE_R)
- /* read-low */
- __set_bit(msr, msr_bitmap + 0x000 / f);
-
- if (type & MSR_TYPE_W)
- /* write-low */
- __set_bit(msr, msr_bitmap + 0x800 / f);
-
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- if (type & MSR_TYPE_R)
- /* read-high */
- __set_bit(msr, msr_bitmap + 0x400 / f);
-
- if (type & MSR_TYPE_W)
- /* write-high */
- __set_bit(msr, msr_bitmap + 0xc00 / f);
-
- }
-}
-
/*
* If a msr is allowed by L0, we should check whether it is allowed by L1.
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
@@ -4659,48 +4939,18 @@ static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
msr, MSR_TYPE_R | MSR_TYPE_W);
}
-static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
-{
- if (apicv_active) {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
- } else {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- }
-}
-
-static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
+static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active)
{
if (apicv_active) {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv,
+ msr, type);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv,
+ msr, type);
} else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- }
-}
-
-static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
-{
- if (apicv_active) {
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_W);
+ msr, type);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_W);
- } else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
+ msr, type);
}
}
@@ -4822,9 +5072,15 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!pi_test_and_clear_on(&vmx->pi_desc))
+ if (!pi_test_on(&vmx->pi_desc))
return;
+ pi_clear_on(&vmx->pi_desc);
+ /*
+ * IOMMU can write to PIR.ON, so the barrier matters even on UP.
+ * But on x86 this is just a compiler barrier anyway.
+ */
+ smp_mb__after_atomic();
kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
}
@@ -5583,7 +5839,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
static int handle_io(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
- int size, in, string;
+ int size, in, string, ret;
unsigned port;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -5597,9 +5853,14 @@ static int handle_io(struct kvm_vcpu *vcpu)
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
- skip_emulated_instruction(vcpu);
- return kvm_fast_pio_out(vcpu, size, port);
+ ret = kvm_skip_emulated_instruction(vcpu);
+
+ /*
+ * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
+ * KVM_EXIT_DEBUG here.
+ */
+ return kvm_fast_pio_out(vcpu, size, port) && ret;
}
static void
@@ -5613,18 +5874,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[2] = 0xc1;
}
-static bool nested_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
-{
- unsigned long always_on = VMXON_CR0_ALWAYSON;
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
- if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
- SECONDARY_EXEC_UNRESTRICTED_GUEST &&
- nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
- always_on &= ~(X86_CR0_PE | X86_CR0_PG);
- return (val & always_on) == always_on;
-}
-
/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
{
@@ -5643,7 +5892,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
val = (val & ~vmcs12->cr0_guest_host_mask) |
(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
- if (!nested_cr0_valid(vcpu, val))
+ if (!nested_guest_cr0_valid(vcpu, val))
return 1;
if (kvm_set_cr0(vcpu, val))
@@ -5652,8 +5901,9 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
return 0;
} else {
if (to_vmx(vcpu)->nested.vmxon &&
- ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
+ !nested_host_cr0_valid(vcpu, val))
return 1;
+
return kvm_set_cr0(vcpu, val);
}
}
@@ -5697,6 +5947,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
int cr;
int reg;
int err;
+ int ret;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
cr = exit_qualification & 15;
@@ -5708,25 +5959,27 @@ static int handle_cr(struct kvm_vcpu *vcpu)
switch (cr) {
case 0:
err = handle_set_cr0(vcpu, val);
- kvm_complete_insn_gp(vcpu, err);
- return 1;
+ return kvm_complete_insn_gp(vcpu, err);
case 3:
err = kvm_set_cr3(vcpu, val);
- kvm_complete_insn_gp(vcpu, err);
- return 1;
+ return kvm_complete_insn_gp(vcpu, err);
case 4:
err = handle_set_cr4(vcpu, val);
- kvm_complete_insn_gp(vcpu, err);
- return 1;
+ return kvm_complete_insn_gp(vcpu, err);
case 8: {
u8 cr8_prev = kvm_get_cr8(vcpu);
u8 cr8 = (u8)val;
err = kvm_set_cr8(vcpu, cr8);
- kvm_complete_insn_gp(vcpu, err);
+ ret = kvm_complete_insn_gp(vcpu, err);
if (lapic_in_kernel(vcpu))
- return 1;
+ return ret;
if (cr8_prev <= cr8)
- return 1;
+ return ret;
+ /*
+ * TODO: we might be squashing a
+ * KVM_GUESTDBG_SINGLESTEP-triggered
+ * KVM_EXIT_DEBUG here.
+ */
vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
return 0;
}
@@ -5735,23 +5988,20 @@ static int handle_cr(struct kvm_vcpu *vcpu)
case 2: /* clts */
handle_clts(vcpu);
trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
- skip_emulated_instruction(vcpu);
vmx_fpu_activate(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
case 1: /*mov from cr*/
switch (cr) {
case 3:
val = kvm_read_cr3(vcpu);
kvm_register_write(vcpu, reg, val);
trace_kvm_cr_read(cr, val);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
case 8:
val = kvm_get_cr8(vcpu);
kvm_register_write(vcpu, reg, val);
trace_kvm_cr_read(cr, val);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
break;
case 3: /* lmsw */
@@ -5759,8 +6009,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
kvm_lmsw(vcpu, val);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
default:
break;
}
@@ -5831,8 +6080,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
return 1;
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
@@ -5864,8 +6112,7 @@ static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
static int handle_cpuid(struct kvm_vcpu *vcpu)
{
- kvm_emulate_cpuid(vcpu);
- return 1;
+ return kvm_emulate_cpuid(vcpu);
}
static int handle_rdmsr(struct kvm_vcpu *vcpu)
@@ -5886,8 +6133,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
/* FIXME: handling of bits 32:63 of rax, rdx */
vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_wrmsr(struct kvm_vcpu *vcpu)
@@ -5907,8 +6153,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
}
trace_kvm_msr_write(ecx, data);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
@@ -5952,8 +6197,7 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
kvm_mmu_invlpg(vcpu, exit_qualification);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_rdpmc(struct kvm_vcpu *vcpu)
@@ -5961,15 +6205,12 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu)
int err;
err = kvm_rdpmc(vcpu);
- kvm_complete_insn_gp(vcpu, err);
-
- return 1;
+ return kvm_complete_insn_gp(vcpu, err);
}
static int handle_wbinvd(struct kvm_vcpu *vcpu)
{
- kvm_emulate_wbinvd(vcpu);
- return 1;
+ return kvm_emulate_wbinvd(vcpu);
}
static int handle_xsetbv(struct kvm_vcpu *vcpu)
@@ -5978,20 +6219,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
if (kvm_set_xcr(vcpu, index, new_bv) == 0)
- skip_emulated_instruction(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
return 1;
}
static int handle_xsaves(struct kvm_vcpu *vcpu)
{
- skip_emulated_instruction(vcpu);
+ kvm_skip_emulated_instruction(vcpu);
WARN(1, "this should never happen\n");
return 1;
}
static int handle_xrstors(struct kvm_vcpu *vcpu)
{
- skip_emulated_instruction(vcpu);
+ kvm_skip_emulated_instruction(vcpu);
WARN(1, "this should never happen\n");
return 1;
}
@@ -6012,8 +6253,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
(offset == APIC_EOI)) {
kvm_lapic_set_eoi(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
}
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
@@ -6161,9 +6401,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
- skip_emulated_instruction(vcpu);
trace_kvm_fast_mmio(gpa);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
ret = handle_mmio_page_fault(vcpu, gpa, true);
@@ -6348,50 +6587,13 @@ static __init int hardware_setup(void)
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
kvm_define_shared_msr(i, vmx_msr_index[i]);
- vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_io_bitmap_a)
- return r;
+ for (i = 0; i < VMX_BITMAP_NR; i++) {
+ vmx_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_bitmap[i])
+ goto out;
+ }
vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_io_bitmap_b)
- goto out;
-
- vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy)
- goto out1;
-
- vmx_msr_bitmap_legacy_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic)
- goto out2;
-
- vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
- goto out3;
-
- vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode)
- goto out4;
-
- vmx_msr_bitmap_longmode_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic)
- goto out5;
-
- vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
- goto out6;
-
- vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_vmread_bitmap)
- goto out7;
-
- vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_vmwrite_bitmap)
- goto out8;
-
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
@@ -6409,7 +6611,7 @@ static __init int hardware_setup(void)
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
- goto out9;
+ goto out;
}
if (boot_cpu_has(X86_FEATURE_NX))
@@ -6472,39 +6674,34 @@ static __init int hardware_setup(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
- memcpy(vmx_msr_bitmap_legacy_x2apic,
+ memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic,
+ memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
vmx_msr_bitmap_longmode, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+ memcpy(vmx_msr_bitmap_legacy_x2apic,
vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+ memcpy(vmx_msr_bitmap_longmode_x2apic,
vmx_msr_bitmap_longmode, PAGE_SIZE);
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+ for (msr = 0x800; msr <= 0x8ff; msr++) {
+ if (msr == 0x839 /* TMCCT */)
+ continue;
+ vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true);
+ }
+
/*
- * enable_apicv && kvm_vcpu_apicv_active()
+ * TPR reads and writes can be virtualized even if virtual interrupt
+ * delivery is not in use.
*/
- for (msr = 0x800; msr <= 0x8ff; msr++)
- vmx_disable_intercept_msr_read_x2apic(msr, true);
+ vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true);
+ vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false);
- /* TMCCT */
- vmx_enable_intercept_msr_read_x2apic(0x839, true);
- /* TPR */
- vmx_disable_intercept_msr_write_x2apic(0x808, true);
/* EOI */
- vmx_disable_intercept_msr_write_x2apic(0x80b, true);
+ vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true);
/* SELF-IPI */
- vmx_disable_intercept_msr_write_x2apic(0x83f, true);
-
- /*
- * (enable_apicv && !kvm_vcpu_apicv_active()) ||
- * !enable_apicv
- */
- /* TPR */
- vmx_disable_intercept_msr_read_x2apic(0x808, false);
- vmx_disable_intercept_msr_write_x2apic(0x808, false);
+ vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true);
if (enable_ept) {
kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
@@ -6551,42 +6748,19 @@ static __init int hardware_setup(void)
return alloc_kvm_area();
-out9:
- free_page((unsigned long)vmx_vmwrite_bitmap);
-out8:
- free_page((unsigned long)vmx_vmread_bitmap);
-out7:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
-out6:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out5:
- free_page((unsigned long)vmx_msr_bitmap_longmode);
-out4:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
-out3:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
- free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
- free_page((unsigned long)vmx_io_bitmap_b);
out:
- free_page((unsigned long)vmx_io_bitmap_a);
+ for (i = 0; i < VMX_BITMAP_NR; i++)
+ free_page((unsigned long)vmx_bitmap[i]);
return r;
}
static __exit void hardware_unsetup(void)
{
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_legacy);
- free_page((unsigned long)vmx_msr_bitmap_longmode);
- free_page((unsigned long)vmx_io_bitmap_b);
- free_page((unsigned long)vmx_io_bitmap_a);
- free_page((unsigned long)vmx_vmwrite_bitmap);
- free_page((unsigned long)vmx_vmread_bitmap);
+ int i;
+
+ for (i = 0; i < VMX_BITMAP_NR; i++)
+ free_page((unsigned long)vmx_bitmap[i]);
free_kvm_area();
}
@@ -6600,16 +6774,13 @@ static int handle_pause(struct kvm_vcpu *vcpu)
if (ple_gap)
grow_ple_window(vcpu);
- skip_emulated_instruction(vcpu);
kvm_vcpu_on_spin(vcpu);
-
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_nop(struct kvm_vcpu *vcpu)
{
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_mwait(struct kvm_vcpu *vcpu)
@@ -6916,8 +7087,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
*/
if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
nested_vmx_failInvalid(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
page = nested_get_page(vcpu, vmptr);
@@ -6925,8 +7095,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
*(u32 *)kmap(page) != VMCS12_REVISION) {
nested_vmx_failInvalid(vcpu);
kunmap(page);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
kunmap(page);
vmx->nested.vmxon_ptr = vmptr;
@@ -6935,30 +7104,26 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_INVALID_ADDRESS);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
if (vmptr == vmx->nested.vmxon_ptr) {
nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_VMXON_POINTER);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
break;
case EXIT_REASON_VMPTRLD:
if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INVALID_ADDRESS);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
if (vmptr == vmx->nested.vmxon_ptr) {
nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_VMXON_POINTER);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
break;
default:
@@ -7014,8 +7179,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
if (vmx->nested.vmxon) {
nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
@@ -7055,9 +7219,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
vmx->nested.vmxon = true;
- skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
out_shadow_vmcs:
kfree(vmx->nested.cached_vmcs12);
@@ -7176,9 +7339,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
free_nested(to_vmx(vcpu));
- skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* Emulate the VMCLEAR instruction */
@@ -7217,9 +7379,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
nested_free_vmcs02(vmx, vmptr);
- skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
@@ -7417,7 +7578,6 @@ static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (vmx->nested.current_vmptr == -1ull) {
nested_vmx_failInvalid(vcpu);
- skip_emulated_instruction(vcpu);
return 0;
}
return 1;
@@ -7431,17 +7591,18 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
gva_t gva = 0;
- if (!nested_vmx_check_permission(vcpu) ||
- !nested_vmx_check_vmcs12(vcpu))
+ if (!nested_vmx_check_permission(vcpu))
return 1;
+ if (!nested_vmx_check_vmcs12(vcpu))
+ return kvm_skip_emulated_instruction(vcpu);
+
/* Decode instruction info and find the field to read */
field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
/* Read the field, zero-extended to a u64 field_value */
if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/*
* Now copy part of this value to register or memory, as requested.
@@ -7461,8 +7622,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
}
nested_vmx_succeed(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
@@ -7481,10 +7641,12 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
u64 field_value = 0;
struct x86_exception e;
- if (!nested_vmx_check_permission(vcpu) ||
- !nested_vmx_check_vmcs12(vcpu))
+ if (!nested_vmx_check_permission(vcpu))
return 1;
+ if (!nested_vmx_check_vmcs12(vcpu))
+ return kvm_skip_emulated_instruction(vcpu);
+
if (vmx_instruction_info & (1u << 10))
field_value = kvm_register_readl(vcpu,
(((vmx_instruction_info) >> 3) & 0xf));
@@ -7504,19 +7666,16 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
if (vmcs_field_readonly(field)) {
nested_vmx_failValid(vcpu,
VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
if (vmcs12_write_any(vcpu, field, field_value) < 0) {
nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
nested_vmx_succeed(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* Emulate the VMPTRLD instruction */
@@ -7537,8 +7696,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
page = nested_get_page(vcpu, vmptr);
if (page == NULL) {
nested_vmx_failInvalid(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
new_vmcs12 = kmap(page);
if (new_vmcs12->revision_id != VMCS12_REVISION) {
@@ -7546,8 +7704,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
nested_release_page_clean(page);
nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
nested_release_vmcs12(vmx);
@@ -7571,8 +7728,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
}
nested_vmx_succeed(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* Emulate the VMPTRST instruction */
@@ -7597,8 +7753,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
return 1;
}
nested_vmx_succeed(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* Emulate the INVEPT instruction */
@@ -7636,8 +7791,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
if (type >= 32 || !(types & (1 << type))) {
nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* According to the Intel VMX instruction reference, the memory
@@ -7668,8 +7822,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
break;
}
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_invvpid(struct kvm_vcpu *vcpu)
@@ -7694,13 +7847,13 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
- types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
+ types = (vmx->nested.nested_vmx_vpid_caps &
+ VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
if (type >= 32 || !(types & (1 << type))) {
nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* according to the intel vmx instruction reference, the memory
@@ -7716,23 +7869,26 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
}
switch (type) {
+ case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
- /*
- * Old versions of KVM use the single-context version so we
- * have to support it; just treat it the same as all-context.
- */
+ case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
+ if (!vpid) {
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ break;
case VMX_VPID_EXTENT_ALL_CONTEXT:
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
- nested_vmx_succeed(vcpu);
break;
default:
- /* Trap individual address invalidation invvpid calls */
- BUG_ON(1);
- break;
+ WARN_ON_ONCE(1);
+ return kvm_skip_emulated_instruction(vcpu);
}
- skip_emulated_instruction(vcpu);
- return 1;
+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02);
+ nested_vmx_succeed(vcpu);
+
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_pml_full(struct kvm_vcpu *vcpu)
@@ -8071,6 +8227,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
case EXIT_REASON_IO_INSTRUCTION:
return nested_vmx_exit_handled_io(vcpu, vmcs12);
+ case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
case EXIT_REASON_MSR_READ:
case EXIT_REASON_MSR_WRITE:
return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
@@ -8620,11 +8778,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
register void *__sp asm(_ASM_SP);
- /*
- * If external interrupt exists, IF bit is set in rflags/eflags on the
- * interrupt stack frame, and interrupt will be enabled on a return
- * from interrupt handler.
- */
if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
unsigned int vector;
@@ -8809,7 +8962,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host);
}
-void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
+static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 tscl;
@@ -9279,6 +9432,50 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
(new_ctl & ~mask) | (cur_ctl & mask));
}
+/*
+ * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
+ * (indicating "allowed-1") if they are supported in the guest's CPUID.
+ */
+static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_cpuid_entry2 *entry;
+
+ vmx->nested.nested_vmx_cr0_fixed1 = 0xffffffff;
+ vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE;
+
+#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
+ if (entry && (entry->_reg & (_cpuid_mask))) \
+ vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask); \
+} while (0)
+
+ entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME));
+ cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME));
+ cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC));
+ cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE));
+ cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE));
+ cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE));
+ cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE));
+ cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE));
+ cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR));
+ cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
+ cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX));
+ cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX));
+ cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID));
+ cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE));
+
+ entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
+ cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE));
+ cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
+ cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
+ cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
+ /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */
+ cr4_fixed1_update(bit(11), ecx, bit(2));
+
+#undef cr4_fixed1_update
+}
+
static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -9320,6 +9517,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
else
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+
+ if (nested_vmx_allowed(vcpu))
+ nested_vmx_cr_fixed1_bits_update(vcpu);
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9774,6 +9974,49 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
return 0;
}
+static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ unsigned long invalid_mask;
+
+ invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
+ return (val & invalid_mask) == 0;
+}
+
+/*
+ * Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are
+ * emulating VM entry into a guest with EPT enabled.
+ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
+ * is assigned to entry_failure_code on failure.
+ */
+static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
+ unsigned long *entry_failure_code)
+{
+ if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
+ if (!nested_cr3_valid(vcpu, cr3)) {
+ *entry_failure_code = ENTRY_FAIL_DEFAULT;
+ return 1;
+ }
+
+ /*
+ * If PAE paging and EPT are both on, CR3 is not used by the CPU and
+ * must not be dereferenced.
+ */
+ if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) &&
+ !nested_ept) {
+ if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
+ *entry_failure_code = ENTRY_FAIL_PDPTE;
+ return 1;
+ }
+ }
+
+ vcpu->arch.cr3 = cr3;
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ }
+
+ kvm_mmu_reset_context(vcpu);
+ return 0;
+}
+
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -9782,11 +10025,15 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
* needs. In addition to modifying the active vmcs (which is vmcs02), this
* function also has additional necessary side-effects, like setting various
* vcpu->arch fields.
+ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
+ * is assigned to entry_failure_code on failure.
*/
-static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ unsigned long *entry_failure_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
+ bool nested_ept_enabled = false;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -9951,6 +10198,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_intr_status);
}
+ nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0;
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}
@@ -9964,6 +10212,15 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx_set_constant_host_state(vmx);
/*
+ * Set the MSR load/store lists to match L0's settings.
+ */
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+
+ /*
* HOST_RSP is normally set correctly in vmx_vcpu_run() just before
* entry, but only if the current (host) sp changed from the value
* we wrote last (vmx->host_rsp). This cache is no longer relevant
@@ -10069,15 +10326,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
nested_ept_init_mmu_context(vcpu);
}
- if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
- vcpu->arch.efer = vmcs12->guest_ia32_efer;
- else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
- vcpu->arch.efer |= (EFER_LMA | EFER_LME);
- else
- vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
- /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
- vmx_set_efer(vcpu, vcpu->arch.efer);
-
/*
* This sets GUEST_CR0 to vmcs12->guest_cr0, with possibly a modified
* TS bit (for lazy fpu) and bits which we consider mandatory enabled.
@@ -10092,8 +10340,20 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx_set_cr4(vcpu, vmcs12->guest_cr4);
vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
- /* shadow page tables on either EPT or shadow page tables */
- kvm_set_cr3(vcpu, vmcs12->guest_cr3);
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
+ vcpu->arch.efer = vmcs12->guest_ia32_efer;
+ else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
+ vcpu->arch.efer |= (EFER_LMA | EFER_LME);
+ else
+ vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
+ /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
+ vmx_set_efer(vcpu, vcpu->arch.efer);
+
+ /* Shadow page tables on either EPT or shadow page tables. */
+ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled,
+ entry_failure_code))
+ return 1;
+
kvm_mmu_reset_context(vcpu);
if (!enable_ept)
@@ -10111,6 +10371,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
+ return 0;
}
/*
@@ -10125,12 +10386,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
struct loaded_vmcs *vmcs02;
bool ia32e;
u32 msr_entry_idx;
+ unsigned long exit_qualification;
- if (!nested_vmx_check_permission(vcpu) ||
- !nested_vmx_check_vmcs12(vcpu))
+ if (!nested_vmx_check_permission(vcpu))
return 1;
- skip_emulated_instruction(vcpu);
+ if (!nested_vmx_check_vmcs12(vcpu))
+ goto out;
+
vmcs12 = get_vmcs12(vcpu);
if (enable_shadow_vmcs)
@@ -10150,37 +10413,37 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
nested_vmx_failValid(vcpu,
launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
: VMXERR_VMRESUME_NONLAUNCHED_VMCS);
- return 1;
+ goto out;
}
if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
- vmx->nested.nested_vmx_true_procbased_ctls_low,
+ vmx->nested.nested_vmx_procbased_ctls_low,
vmx->nested.nested_vmx_procbased_ctls_high) ||
!vmx_control_verify(vmcs12->secondary_vm_exec_control,
vmx->nested.nested_vmx_secondary_ctls_low,
@@ -10189,33 +10452,34 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
vmx->nested.nested_vmx_pinbased_ctls_low,
vmx->nested.nested_vmx_pinbased_ctls_high) ||
!vmx_control_verify(vmcs12->vm_exit_controls,
- vmx->nested.nested_vmx_true_exit_ctls_low,
+ vmx->nested.nested_vmx_exit_ctls_low,
vmx->nested.nested_vmx_exit_ctls_high) ||
!vmx_control_verify(vmcs12->vm_entry_controls,
- vmx->nested.nested_vmx_true_entry_ctls_low,
+ vmx->nested.nested_vmx_entry_ctls_low,
vmx->nested.nested_vmx_entry_ctls_high))
{
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
- if (((vmcs12->host_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
- ((vmcs12->host_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
+ if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
+ !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
+ !nested_cr3_valid(vcpu, vmcs12->host_cr3)) {
nested_vmx_failValid(vcpu,
VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
- return 1;
+ goto out;
}
- if (!nested_cr0_valid(vcpu, vmcs12->guest_cr0) ||
- ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
+ if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
+ !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) {
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
- return 1;
+ goto out;
}
if (vmcs12->vmcs_link_pointer != -1ull) {
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR);
- return 1;
+ goto out;
}
/*
@@ -10235,7 +10499,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) {
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
- return 1;
+ goto out;
}
}
@@ -10253,7 +10517,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) {
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
- return 1;
+ goto out;
}
}
@@ -10266,6 +10530,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (!vmcs02)
return -ENOMEM;
+ /*
+ * After this point, the trap flag no longer triggers a singlestep trap
+ * on the vm entry instructions. Don't call
+ * kvm_skip_emulated_instruction.
+ */
+ skip_emulated_instruction(vcpu);
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@@ -10280,7 +10550,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
vmx_segment_cache_clear(vmx);
- prepare_vmcs02(vcpu, vmcs12);
+ if (prepare_vmcs02(vcpu, vmcs12, &exit_qualification)) {
+ leave_guest_mode(vcpu);
+ vmx_load_vmcs01(vcpu);
+ nested_vmx_entry_failure(vcpu, vmcs12,
+ EXIT_REASON_INVALID_STATE, exit_qualification);
+ return 1;
+ }
msr_entry_idx = nested_vmx_load_msr(vcpu,
vmcs12->vm_entry_msr_load_addr,
@@ -10307,6 +10583,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* the success flag) when L2 exits (see nested_vmx_vmexit()).
*/
return 1;
+
+out:
+ return kvm_skip_emulated_instruction(vcpu);
}
/*
@@ -10612,6 +10891,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct kvm_segment seg;
+ unsigned long entry_failure_code;
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -10649,8 +10929,12 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
nested_ept_uninit_mmu_context(vcpu);
- kvm_set_cr3(vcpu, vmcs12->host_cr3);
- kvm_mmu_reset_context(vcpu);
+ /*
+ * Only PDPTE load can fail as the value of cr3 was checked on entry and
+ * couldn't have changed.
+ */
+ if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
+ nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
@@ -10751,6 +11035,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ u32 vm_inst_error = 0;
/* trying to cancel vmlaunch/vmresume is a bug */
WARN_ON_ONCE(vmx->nested.nested_run_pending);
@@ -10763,6 +11048,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmcs12->vm_exit_msr_store_count))
nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+ if (unlikely(vmx->fail))
+ vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
+
vmx_load_vmcs01(vcpu);
if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
@@ -10791,6 +11079,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
load_vmcs12_host_state(vcpu, vmcs12);
/* Update any VMCS fields that might have changed while L2 ran */
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
if (vmx->hv_deadline_tsc == -1)
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
@@ -10839,7 +11129,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
if (unlikely(vmx->fail)) {
vmx->fail = 0;
- nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR));
+ nested_vmx_failValid(vcpu, vm_inst_error);
} else
nested_vmx_succeed(vcpu);
if (enable_shadow_vmcs)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6f5f465fdb6b..1f0d2383f5ee 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -434,12 +434,14 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
-void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
+int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
{
if (err)
kvm_inject_gp(vcpu, 0);
else
- kvm_x86_ops->skip_emulated_instruction(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+
+ return 1;
}
EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
@@ -573,7 +575,7 @@ out:
}
EXPORT_SYMBOL_GPL(load_pdptrs);
-static bool pdptrs_changed(struct kvm_vcpu *vcpu)
+bool pdptrs_changed(struct kvm_vcpu *vcpu)
{
u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
bool changed = true;
@@ -599,6 +601,7 @@ out:
return changed;
}
+EXPORT_SYMBOL_GPL(pdptrs_changed);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
@@ -2178,7 +2181,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_KVM_SYSTEM_TIME_NEW:
case MSR_KVM_SYSTEM_TIME: {
- u64 gpa_offset;
struct kvm_arch *ka = &vcpu->kvm->arch;
kvmclock_reset(vcpu);
@@ -2200,8 +2202,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!(data & 1))
break;
- gpa_offset = data & ~(PAGE_MASK | 1);
-
if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.pv_time, data & ~1ULL,
sizeof(struct pvclock_vcpu_time_info)))
@@ -2296,7 +2296,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
if (!ignore_msrs) {
- vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
+ vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
return 1;
} else {
@@ -2508,7 +2508,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
if (!ignore_msrs) {
- vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index);
+ vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
+ msr_info->index);
return 1;
} else {
vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
@@ -2812,7 +2813,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
if (kvm_lapic_hv_timer_in_use(vcpu) &&
kvm_x86_ops->set_hv_timer(vcpu,
- kvm_get_lapic_tscdeadline_msr(vcpu)))
+ kvm_get_lapic_target_expiration_tsc(vcpu)))
kvm_lapic_switch_to_sw_timer(vcpu);
/*
* On a host with synchronized TSC, there is no need to update
@@ -4832,7 +4833,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
}
-int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
+static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
{
if (!need_emulate_wbinvd(vcpu))
return X86EMUL_CONTINUE;
@@ -4852,8 +4853,8 @@ int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
{
- kvm_x86_ops->skip_emulated_instruction(vcpu);
- return kvm_emulate_wbinvd_noskip(vcpu);
+ kvm_emulate_wbinvd_noskip(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
@@ -5451,7 +5452,6 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
kvm_run->exit_reason = KVM_EXIT_DEBUG;
*r = EMULATE_USER_EXIT;
} else {
- vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
/*
* "Certain debug exceptions may clear bit 0-3. The
* remaining contents of the DR6 register are never
@@ -5464,6 +5464,17 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
}
}
+int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+ unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+ int r = EMULATE_DONE;
+
+ kvm_x86_ops->skip_emulated_instruction(vcpu);
+ kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ return r == EMULATE_DONE;
+}
+EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
+
static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
{
if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
@@ -5649,6 +5660,49 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
}
EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
+static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
+{
+ unsigned long val;
+
+ /* We should only ever be called with arch.pio.count equal to 1 */
+ BUG_ON(vcpu->arch.pio.count != 1);
+
+ /* For size less than 4 we merge, else we zero extend */
+ val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX)
+ : 0;
+
+ /*
+ * Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform
+ * the copy and tracing
+ */
+ emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
+ vcpu->arch.pio.port, &val, 1);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+
+ return 1;
+}
+
+int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
+{
+ unsigned long val;
+ int ret;
+
+ /* For size less than 4 we merge, else we zero extend */
+ val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0;
+
+ ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
+ &val, 1);
+ if (ret) {
+ kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+ return ret;
+ }
+
+ vcpu->arch.complete_userspace_io = complete_fast_pio_in;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_pio_in);
+
static int kvmclock_cpu_down_prep(unsigned int cpu)
{
__this_cpu_write(cpu_tsc_khz, 0);
@@ -5998,8 +6052,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
- kvm_x86_ops->skip_emulated_instruction(vcpu);
- return kvm_vcpu_halt(vcpu);
+ int ret = kvm_skip_emulated_instruction(vcpu);
+ /*
+ * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered
+ * KVM_EXIT_DEBUG here.
+ */
+ return kvm_vcpu_halt(vcpu) && ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
@@ -6030,9 +6088,9 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
- int op_64_bit, r = 1;
+ int op_64_bit, r;
- kvm_x86_ops->skip_emulated_instruction(vcpu);
+ r = kvm_skip_emulated_instruction(vcpu);
if (kvm_hv_hypercall_enabled(vcpu->kvm))
return kvm_hv_hypercall(vcpu);
@@ -8175,7 +8233,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
- kvm_mmu_invalidate_zap_all_pages(kvm);
+ kvm_page_track_flush_slot(kvm, slot);
}
static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index bedfab98077a..e1fb269c87af 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -264,8 +264,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
return 0;
error:
- dev_err(&dev->dev,
- "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+ dev_err(&dev->dev, "Failed to create MSI%s! ret=%d!\n",
+ type == PCI_CAP_ID_MSI ? "" : "-X", irq);
return irq;
}
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index b27bccd4390f..821cb41f00e6 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -89,7 +89,7 @@ static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value)
}
static void ce4100_serial_fixup(int port, struct uart_port *up,
- unsigned short *capabilites)
+ u32 *capabilites)
{
#ifdef CONFIG_EARLY_PRINTK
/*
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 9634557a5444..ded2e8272382 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -11,6 +11,10 @@
#include <linux/gfp.h>
#include <linux/smp.h>
#include <linux/suspend.h>
+#include <linux/scatterlist.h>
+#include <linux/kdebug.h>
+
+#include <crypto/hash.h>
#include <asm/init.h>
#include <asm/proto.h>
@@ -177,14 +181,86 @@ int pfn_is_nosave(unsigned long pfn)
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
}
+#define MD5_DIGEST_SIZE 16
+
struct restore_data_record {
unsigned long jump_address;
unsigned long jump_address_phys;
unsigned long cr3;
unsigned long magic;
+ u8 e820_digest[MD5_DIGEST_SIZE];
};
-#define RESTORE_MAGIC 0x123456789ABCDEF0UL
+#define RESTORE_MAGIC 0x23456789ABCDEF01UL
+
+#if IS_BUILTIN(CONFIG_CRYPTO_MD5)
+/**
+ * get_e820_md5 - calculate md5 according to given e820 map
+ *
+ * @map: the e820 map to be calculated
+ * @buf: the md5 result to be stored to
+ */
+static int get_e820_md5(struct e820map *map, void *buf)
+{
+ struct scatterlist sg;
+ struct crypto_ahash *tfm;
+ int size;
+ int ret = 0;
+
+ tfm = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+ return -ENOMEM;
+
+ {
+ AHASH_REQUEST_ON_STACK(req, tfm);
+ size = offsetof(struct e820map, map)
+ + sizeof(struct e820entry) * map->nr_map;
+ ahash_request_set_tfm(req, tfm);
+ sg_init_one(&sg, (u8 *)map, size);
+ ahash_request_set_callback(req, 0, NULL, NULL);
+ ahash_request_set_crypt(req, &sg, buf, size);
+
+ if (crypto_ahash_digest(req))
+ ret = -EINVAL;
+ ahash_request_zero(req);
+ }
+ crypto_free_ahash(tfm);
+
+ return ret;
+}
+
+static void hibernation_e820_save(void *buf)
+{
+ get_e820_md5(e820_saved, buf);
+}
+
+static bool hibernation_e820_mismatch(void *buf)
+{
+ int ret;
+ u8 result[MD5_DIGEST_SIZE];
+
+ memset(result, 0, MD5_DIGEST_SIZE);
+ /* If there is no digest in suspend kernel, let it go. */
+ if (!memcmp(result, buf, MD5_DIGEST_SIZE))
+ return false;
+
+ ret = get_e820_md5(e820_saved, result);
+ if (ret)
+ return true;
+
+ return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false;
+}
+#else
+static void hibernation_e820_save(void *buf)
+{
+}
+
+static bool hibernation_e820_mismatch(void *buf)
+{
+ /* If md5 is not builtin for restore kernel, let it go. */
+ return false;
+}
+#endif
/**
* arch_hibernation_header_save - populate the architecture specific part
@@ -201,6 +277,9 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
rdr->jump_address_phys = __pa_symbol(&restore_registers);
rdr->cr3 = restore_cr3;
rdr->magic = RESTORE_MAGIC;
+
+ hibernation_e820_save(rdr->e820_digest);
+
return 0;
}
@@ -216,5 +295,16 @@ int arch_hibernation_header_restore(void *addr)
restore_jump_address = rdr->jump_address;
jump_address_phys = rdr->jump_address_phys;
restore_cr3 = rdr->cr3;
- return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
+
+ if (rdr->magic != RESTORE_MAGIC) {
+ pr_crit("Unrecognized hibernate image header format!\n");
+ return -EINVAL;
+ }
+
+ if (hibernation_e820_mismatch(rdr->e820_digest)) {
+ pr_crit("Hibernate inconsistent memory map detected!\n");
+ return -ENODEV;
+ }
+
+ return 0;
}
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 0e98e5d241d0..a9fafb5c8738 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -19,7 +19,6 @@
int xen_swiotlb __read_mostly;
static struct dma_map_ops xen_swiotlb_dma_ops = {
- .mapping_error = xen_swiotlb_dma_mapping_error,
.alloc = xen_swiotlb_alloc_coherent,
.free = xen_swiotlb_free_coherent,
.sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index f8960fca0827..8c394e30e5fe 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -41,7 +41,7 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
unsigned long xen_released_pages;
/* E820 map used during setting up memory. */
-static struct e820entry xen_e820_map[E820MAX] __initdata;
+static struct e820entry xen_e820_map[E820_X_MAX] __initdata;
static u32 xen_e820_map_entries __initdata;
/*
@@ -750,7 +750,7 @@ char * __init xen_memory_setup(void)
max_pfn = min(max_pfn, xen_start_info->nr_pages);
mem_end = PFN_PHYS(max_pfn);
- memmap.nr_entries = E820MAX;
+ memmap.nr_entries = ARRAY_SIZE(xen_e820_map);
set_xen_guest_handle(memmap.buffer, xen_e820_map);
op = xen_initial_domain() ?
@@ -923,7 +923,7 @@ char * __init xen_auto_xlated_memory_setup(void)
int i;
int rc;
- memmap.nr_entries = E820MAX;
+ memmap.nr_entries = ARRAY_SIZE(xen_e820_map);
set_xen_guest_handle(memmap.buffer, xen_e820_map);
rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);