kernel: 5.4: import wireguard backport

Rather than using the clunky, old, slower wireguard-linux-compat out of tree module, this commit does a patch-by-patch backport of upstream's wireguard to 5.4. This specific backport is in widespread use, being part of SUSE's enterprise kernel, Oracle's enterprise kernel, Google's Android kernel, Gentoo's distro kernel, and probably more I've forgotten about. It's definately the "more proper" way of adding wireguard to a kernel than the ugly compat.h hell of the wireguard-linux-compat repo. And most importantly for OpenWRT, it allows using the same module configuration code for 5.10 as for 5.4, with no need for bifurcation. These patches are from the backport tree which is maintained in the open here: https://git.zx2c4.com/wireguard-linux/log/?h=backport-5.4.y I'll be sending PRs to update this as needed. Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
author: Jason A. Donenfeld <Jason@zx2c4.com> 2021-02-19 14:29:04 +0100
committer: David Bauer <mail@david-bauer.net> 2021-02-26 20:41:01 +0100
commit: 3888fa78802354ab7bbd19b7d061fd80a16ce06b (patch)
tree: 2225a6313cb6482f0cb9c09df662a0d44197350e /target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch
parent: 7d4143234c4dfdd050ebc64ec8231f9d81ea65af (diff)
download: upstream-3888fa78802354ab7bbd19b7d061fd80a16ce06b.tar.gz
upstream-3888fa78802354ab7bbd19b7d061fd80a16ce06b.tar.bz2
upstream-3888fa78802354ab7bbd19b7d061fd80a16ce06b.zip
1 files changed, 691 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch b/target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch
new file mode 100644
index 0000000000..7f907f2364
--- /dev/null
+++ b/target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch
@@ -0,0 +1,691 @@
+From a92bd97c758d32511f0deeef84f25c3a1d5e7879 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Fri, 8 Nov 2019 13:22:14 +0100
+Subject: [PATCH 008/124] crypto: arm/chacha - remove dependency on generic
+ ChaCha driver
+
+commit b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 upstream.
+
+Instead of falling back to the generic ChaCha skcipher driver for
+non-SIMD cases, use a fast scalar implementation for ARM authored
+by Eric Biggers. This removes the module dependency on chacha-generic
+altogether, which also simplifies things when we expose the ChaCha
+library interface from this module.
+
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+---
+ arch/arm/crypto/Kconfig              |   4 +-
+ arch/arm/crypto/Makefile             |   3 +-
+ arch/arm/crypto/chacha-glue.c        | 304 +++++++++++++++++++++++++++
+ arch/arm/crypto/chacha-neon-glue.c   | 202 ------------------
+ arch/arm/crypto/chacha-scalar-core.S |  65 +++---
+ arch/arm64/crypto/chacha-neon-glue.c |   2 +-
+ 6 files changed, 340 insertions(+), 240 deletions(-)
+ create mode 100644 arch/arm/crypto/chacha-glue.c
+ delete mode 100644 arch/arm/crypto/chacha-neon-glue.c
+
+--- a/arch/arm/crypto/Kconfig
++++ b/arch/arm/crypto/Kconfig
+@@ -127,10 +127,8 @@ config CRYPTO_CRC32_ARM_CE
+ 	select CRYPTO_HASH
+ 
+ config CRYPTO_CHACHA20_NEON
+-	tristate "NEON accelerated ChaCha stream cipher algorithms"
+-	depends on KERNEL_MODE_NEON
++	tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
+ 	select CRYPTO_BLKCIPHER
+-	select CRYPTO_CHACHA20
+ 
+ config CRYPTO_NHPOLY1305_NEON
+ 	tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
+--- a/arch/arm/crypto/Makefile
++++ b/arch/arm/crypto/Makefile
+@@ -53,7 +53,8 @@ aes-arm-ce-y	:= aes-ce-core.o aes-ce-glu
+ ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
+ crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
+ crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
+-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
++chacha-neon-y := chacha-scalar-core.o chacha-glue.o
++chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
+ nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
+ 
+ ifdef REGENERATE_ARM_CRYPTO
+--- /dev/null
++++ b/arch/arm/crypto/chacha-glue.c
+@@ -0,0 +1,304 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
++ * including ChaCha20 (RFC7539)
++ *
++ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
++ * Copyright (C) 2015 Martin Willi
++ */
++
++#include <crypto/algapi.h>
++#include <crypto/internal/chacha.h>
++#include <crypto/internal/simd.h>
++#include <crypto/internal/skcipher.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++
++#include <asm/cputype.h>
++#include <asm/hwcap.h>
++#include <asm/neon.h>
++#include <asm/simd.h>
++
++asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
++				      int nrounds);
++asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
++				       int nrounds);
++asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
++asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
++
++asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
++			     const u32 *state, int nrounds);
++
++static inline bool neon_usable(void)
++{
++	return crypto_simd_usable();
++}
++
++static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
++			  unsigned int bytes, int nrounds)
++{
++	u8 buf[CHACHA_BLOCK_SIZE];
++
++	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
++		chacha_4block_xor_neon(state, dst, src, nrounds);
++		bytes -= CHACHA_BLOCK_SIZE * 4;
++		src += CHACHA_BLOCK_SIZE * 4;
++		dst += CHACHA_BLOCK_SIZE * 4;
++		state[12] += 4;
++	}
++	while (bytes >= CHACHA_BLOCK_SIZE) {
++		chacha_block_xor_neon(state, dst, src, nrounds);
++		bytes -= CHACHA_BLOCK_SIZE;
++		src += CHACHA_BLOCK_SIZE;
++		dst += CHACHA_BLOCK_SIZE;
++		state[12]++;
++	}
++	if (bytes) {
++		memcpy(buf, src, bytes);
++		chacha_block_xor_neon(state, buf, buf, nrounds);
++		memcpy(dst, buf, bytes);
++	}
++}
++
++static int chacha_stream_xor(struct skcipher_request *req,
++			     const struct chacha_ctx *ctx, const u8 *iv,
++			     bool neon)
++{
++	struct skcipher_walk walk;
++	u32 state[16];
++	int err;
++
++	err = skcipher_walk_virt(&walk, req, false);
++
++	chacha_init_generic(state, ctx->key, iv);
++
++	while (walk.nbytes > 0) {
++		unsigned int nbytes = walk.nbytes;
++
++		if (nbytes < walk.total)
++			nbytes = round_down(nbytes, walk.stride);
++
++		if (!neon) {
++			chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
++				     nbytes, state, ctx->nrounds);
++			state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
++		} else {
++			kernel_neon_begin();
++			chacha_doneon(state, walk.dst.virt.addr,
++				      walk.src.virt.addr, nbytes, ctx->nrounds);
++			kernel_neon_end();
++		}
++		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
++	}
++
++	return err;
++}
++
++static int do_chacha(struct skcipher_request *req, bool neon)
++{
++	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
++	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
++
++	return chacha_stream_xor(req, ctx, req->iv, neon);
++}
++
++static int chacha_arm(struct skcipher_request *req)
++{
++	return do_chacha(req, false);
++}
++
++static int chacha_neon(struct skcipher_request *req)
++{
++	return do_chacha(req, neon_usable());
++}
++
++static int do_xchacha(struct skcipher_request *req, bool neon)
++{
++	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
++	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
++	struct chacha_ctx subctx;
++	u32 state[16];
++	u8 real_iv[16];
++
++	chacha_init_generic(state, ctx->key, req->iv);
++
++	if (!neon) {
++		hchacha_block_arm(state, subctx.key, ctx->nrounds);
++	} else {
++		kernel_neon_begin();
++		hchacha_block_neon(state, subctx.key, ctx->nrounds);
++		kernel_neon_end();
++	}
++	subctx.nrounds = ctx->nrounds;
++
++	memcpy(&real_iv[0], req->iv + 24, 8);
++	memcpy(&real_iv[8], req->iv + 16, 8);
++	return chacha_stream_xor(req, &subctx, real_iv, neon);
++}
++
++static int xchacha_arm(struct skcipher_request *req)
++{
++	return do_xchacha(req, false);
++}
++
++static int xchacha_neon(struct skcipher_request *req)
++{
++	return do_xchacha(req, neon_usable());
++}
++
++static struct skcipher_alg arm_algs[] = {
++	{
++		.base.cra_name		= "chacha20",
++		.base.cra_driver_name	= "chacha20-arm",
++		.base.cra_priority	= 200,
++		.base.cra_blocksize	= 1,
++		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
++		.base.cra_module	= THIS_MODULE,
++
++		.min_keysize		= CHACHA_KEY_SIZE,
++		.max_keysize		= CHACHA_KEY_SIZE,
++		.ivsize			= CHACHA_IV_SIZE,
++		.chunksize		= CHACHA_BLOCK_SIZE,
++		.setkey			= chacha20_setkey,
++		.encrypt		= chacha_arm,
++		.decrypt		= chacha_arm,
++	}, {
++		.base.cra_name		= "xchacha20",
++		.base.cra_driver_name	= "xchacha20-arm",
++		.base.cra_priority	= 200,
++		.base.cra_blocksize	= 1,
++		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
++		.base.cra_module	= THIS_MODULE,
++
++		.min_keysize		= CHACHA_KEY_SIZE,
++		.max_keysize		= CHACHA_KEY_SIZE,
++		.ivsize			= XCHACHA_IV_SIZE,
++		.chunksize		= CHACHA_BLOCK_SIZE,
++		.setkey			= chacha20_setkey,
++		.encrypt		= xchacha_arm,
++		.decrypt		= xchacha_arm,
++	}, {
++		.base.cra_name		= "xchacha12",
++		.base.cra_driver_name	= "xchacha12-arm",
++		.base.cra_priority	= 200,
++		.base.cra_blocksize	= 1,
++		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
++		.base.cra_module	= THIS_MODULE,
++
++		.min_keysize		= CHACHA_KEY_SIZE,
++		.max_keysize		= CHACHA_KEY_SIZE,
++		.ivsize			= XCHACHA_IV_SIZE,
++		.chunksize		= CHACHA_BLOCK_SIZE,
++		.setkey			= chacha12_setkey,
++		.encrypt		= xchacha_arm,
++		.decrypt		= xchacha_arm,
++	},
++};
++
++static struct skcipher_alg neon_algs[] = {
++	{
++		.base.cra_name		= "chacha20",
++		.base.cra_driver_name	= "chacha20-neon",
++		.base.cra_priority	= 300,
++		.base.cra_blocksize	= 1,
++		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
++		.base.cra_module	= THIS_MODULE,
++
++		.min_keysize		= CHACHA_KEY_SIZE,
++		.max_keysize		= CHACHA_KEY_SIZE,
++		.ivsize			= CHACHA_IV_SIZE,
++		.chunksize		= CHACHA_BLOCK_SIZE,
++		.walksize		= 4 * CHACHA_BLOCK_SIZE,
++		.setkey			= chacha20_setkey,
++		.encrypt		= chacha_neon,
++		.decrypt		= chacha_neon,
++	}, {
++		.base.cra_name		= "xchacha20",
++		.base.cra_driver_name	= "xchacha20-neon",
++		.base.cra_priority	= 300,
++		.base.cra_blocksize	= 1,
++		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
++		.base.cra_module	= THIS_MODULE,
++
++		.min_keysize		= CHACHA_KEY_SIZE,
++		.max_keysize		= CHACHA_KEY_SIZE,
++		.ivsize			= XCHACHA_IV_SIZE,
++		.chunksize		= CHACHA_BLOCK_SIZE,
++		.walksize		= 4 * CHACHA_BLOCK_SIZE,
++		.setkey			= chacha20_setkey,
++		.encrypt		= xchacha_neon,
++		.decrypt		= xchacha_neon,
++	}, {
++		.base.cra_name		= "xchacha12",
++		.base.cra_driver_name	= "xchacha12-neon",
++		.base.cra_priority	= 300,
++		.base.cra_blocksize	= 1,
++		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
++		.base.cra_module	= THIS_MODULE,
++
++		.min_keysize		= CHACHA_KEY_SIZE,
++		.max_keysize		= CHACHA_KEY_SIZE,
++		.ivsize			= XCHACHA_IV_SIZE,
++		.chunksize		= CHACHA_BLOCK_SIZE,
++		.walksize		= 4 * CHACHA_BLOCK_SIZE,
++		.setkey			= chacha12_setkey,
++		.encrypt		= xchacha_neon,
++		.decrypt		= xchacha_neon,
++	}
++};
++
++static int __init chacha_simd_mod_init(void)
++{
++	int err;
++
++	err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
++	if (err)
++		return err;
++
++	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
++		int i;
++
++		switch (read_cpuid_part()) {
++		case ARM_CPU_PART_CORTEX_A7:
++		case ARM_CPU_PART_CORTEX_A5:
++			/*
++			 * The Cortex-A7 and Cortex-A5 do not perform well with
++			 * the NEON implementation but do incredibly with the
++			 * scalar one and use less power.
++			 */
++			for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
++				neon_algs[i].base.cra_priority = 0;
++			break;
++		}
++
++		err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
++		if (err)
++			crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
++	}
++	return err;
++}
++
++static void __exit chacha_simd_mod_fini(void)
++{
++	crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
++	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
++		crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
++}
++
++module_init(chacha_simd_mod_init);
++module_exit(chacha_simd_mod_fini);
++
++MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
++MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
++MODULE_LICENSE("GPL v2");
++MODULE_ALIAS_CRYPTO("chacha20");
++MODULE_ALIAS_CRYPTO("chacha20-arm");
++MODULE_ALIAS_CRYPTO("xchacha20");
++MODULE_ALIAS_CRYPTO("xchacha20-arm");
++MODULE_ALIAS_CRYPTO("xchacha12");
++MODULE_ALIAS_CRYPTO("xchacha12-arm");
++#ifdef CONFIG_KERNEL_MODE_NEON
++MODULE_ALIAS_CRYPTO("chacha20-neon");
++MODULE_ALIAS_CRYPTO("xchacha20-neon");
++MODULE_ALIAS_CRYPTO("xchacha12-neon");
++#endif
+--- a/arch/arm/crypto/chacha-neon-glue.c
++++ /dev/null
+@@ -1,202 +0,0 @@
+-/*
+- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+- * including ChaCha20 (RFC7539)
+- *
+- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License version 2 as
+- * published by the Free Software Foundation.
+- *
+- * Based on:
+- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+- *
+- * Copyright (C) 2015 Martin Willi
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2 of the License, or
+- * (at your option) any later version.
+- */
+-
+-#include <crypto/algapi.h>
+-#include <crypto/internal/chacha.h>
+-#include <crypto/internal/simd.h>
+-#include <crypto/internal/skcipher.h>
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-
+-#include <asm/hwcap.h>
+-#include <asm/neon.h>
+-#include <asm/simd.h>
+-
+-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+-				      int nrounds);
+-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+-				       int nrounds);
+-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+-
+-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+-			  unsigned int bytes, int nrounds)
+-{
+-	u8 buf[CHACHA_BLOCK_SIZE];
+-
+-	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+-		chacha_4block_xor_neon(state, dst, src, nrounds);
+-		bytes -= CHACHA_BLOCK_SIZE * 4;
+-		src += CHACHA_BLOCK_SIZE * 4;
+-		dst += CHACHA_BLOCK_SIZE * 4;
+-		state[12] += 4;
+-	}
+-	while (bytes >= CHACHA_BLOCK_SIZE) {
+-		chacha_block_xor_neon(state, dst, src, nrounds);
+-		bytes -= CHACHA_BLOCK_SIZE;
+-		src += CHACHA_BLOCK_SIZE;
+-		dst += CHACHA_BLOCK_SIZE;
+-		state[12]++;
+-	}
+-	if (bytes) {
+-		memcpy(buf, src, bytes);
+-		chacha_block_xor_neon(state, buf, buf, nrounds);
+-		memcpy(dst, buf, bytes);
+-	}
+-}
+-
+-static int chacha_neon_stream_xor(struct skcipher_request *req,
+-				  const struct chacha_ctx *ctx, const u8 *iv)
+-{
+-	struct skcipher_walk walk;
+-	u32 state[16];
+-	int err;
+-
+-	err = skcipher_walk_virt(&walk, req, false);
+-
+-	crypto_chacha_init(state, ctx, iv);
+-
+-	while (walk.nbytes > 0) {
+-		unsigned int nbytes = walk.nbytes;
+-
+-		if (nbytes < walk.total)
+-			nbytes = round_down(nbytes, walk.stride);
+-
+-		kernel_neon_begin();
+-		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
+-			      nbytes, ctx->nrounds);
+-		kernel_neon_end();
+-		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+-	}
+-
+-	return err;
+-}
+-
+-static int chacha_neon(struct skcipher_request *req)
+-{
+-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+-	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+-
+-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
+-		return crypto_chacha_crypt(req);
+-
+-	return chacha_neon_stream_xor(req, ctx, req->iv);
+-}
+-
+-static int xchacha_neon(struct skcipher_request *req)
+-{
+-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+-	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+-	struct chacha_ctx subctx;
+-	u32 state[16];
+-	u8 real_iv[16];
+-
+-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
+-		return crypto_xchacha_crypt(req);
+-
+-	crypto_chacha_init(state, ctx, req->iv);
+-
+-	kernel_neon_begin();
+-	hchacha_block_neon(state, subctx.key, ctx->nrounds);
+-	kernel_neon_end();
+-	subctx.nrounds = ctx->nrounds;
+-
+-	memcpy(&real_iv[0], req->iv + 24, 8);
+-	memcpy(&real_iv[8], req->iv + 16, 8);
+-	return chacha_neon_stream_xor(req, &subctx, real_iv);
+-}
+-
+-static struct skcipher_alg algs[] = {
+-	{
+-		.base.cra_name		= "chacha20",
+-		.base.cra_driver_name	= "chacha20-neon",
+-		.base.cra_priority	= 300,
+-		.base.cra_blocksize	= 1,
+-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+-		.base.cra_module	= THIS_MODULE,
+-
+-		.min_keysize		= CHACHA_KEY_SIZE,
+-		.max_keysize		= CHACHA_KEY_SIZE,
+-		.ivsize			= CHACHA_IV_SIZE,
+-		.chunksize		= CHACHA_BLOCK_SIZE,
+-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+-		.setkey			= crypto_chacha20_setkey,
+-		.encrypt		= chacha_neon,
+-		.decrypt		= chacha_neon,
+-	}, {
+-		.base.cra_name		= "xchacha20",
+-		.base.cra_driver_name	= "xchacha20-neon",
+-		.base.cra_priority	= 300,
+-		.base.cra_blocksize	= 1,
+-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+-		.base.cra_module	= THIS_MODULE,
+-
+-		.min_keysize		= CHACHA_KEY_SIZE,
+-		.max_keysize		= CHACHA_KEY_SIZE,
+-		.ivsize			= XCHACHA_IV_SIZE,
+-		.chunksize		= CHACHA_BLOCK_SIZE,
+-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+-		.setkey			= crypto_chacha20_setkey,
+-		.encrypt		= xchacha_neon,
+-		.decrypt		= xchacha_neon,
+-	}, {
+-		.base.cra_name		= "xchacha12",
+-		.base.cra_driver_name	= "xchacha12-neon",
+-		.base.cra_priority	= 300,
+-		.base.cra_blocksize	= 1,
+-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+-		.base.cra_module	= THIS_MODULE,
+-
+-		.min_keysize		= CHACHA_KEY_SIZE,
+-		.max_keysize		= CHACHA_KEY_SIZE,
+-		.ivsize			= XCHACHA_IV_SIZE,
+-		.chunksize		= CHACHA_BLOCK_SIZE,
+-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+-		.setkey			= crypto_chacha12_setkey,
+-		.encrypt		= xchacha_neon,
+-		.decrypt		= xchacha_neon,
+-	}
+-};
+-
+-static int __init chacha_simd_mod_init(void)
+-{
+-	if (!(elf_hwcap & HWCAP_NEON))
+-		return -ENODEV;
+-
+-	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+-}
+-
+-static void __exit chacha_simd_mod_fini(void)
+-{
+-	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+-}
+-
+-module_init(chacha_simd_mod_init);
+-module_exit(chacha_simd_mod_fini);
+-
+-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
+-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+-MODULE_LICENSE("GPL v2");
+-MODULE_ALIAS_CRYPTO("chacha20");
+-MODULE_ALIAS_CRYPTO("chacha20-neon");
+-MODULE_ALIAS_CRYPTO("xchacha20");
+-MODULE_ALIAS_CRYPTO("xchacha20-neon");
+-MODULE_ALIAS_CRYPTO("xchacha12");
+-MODULE_ALIAS_CRYPTO("xchacha12-neon");
+--- a/arch/arm/crypto/chacha-scalar-core.S
++++ b/arch/arm/crypto/chacha-scalar-core.S
+@@ -41,14 +41,6 @@
+ 	X14	.req	r12
+ 	X15	.req	r14
+ 
+-.Lexpand_32byte_k:
+-	// "expand 32-byte k"
+-	.word	0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+-
+-#ifdef __thumb2__
+-#  define adrl adr
+-#endif
+-
+ .macro __rev		out, in,  t0, t1, t2
+ .if __LINUX_ARM_ARCH__ >= 6
+ 	rev		\out, \in
+@@ -391,61 +383,65 @@
+ .endm	// _chacha
+ 
+ /*
+- * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
+- *		     const u32 iv[4]);
++ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
++ *		     const u32 *state, int nrounds);
+  */
+-ENTRY(chacha20_arm)
++ENTRY(chacha_doarm)
+ 	cmp		r2, #0			// len == 0?
+ 	reteq		lr
+ 
++	ldr		ip, [sp]
++	cmp		ip, #12
++
+ 	push		{r0-r2,r4-r11,lr}
+ 
+ 	// Push state x0-x15 onto stack.
+ 	// Also store an extra copy of x10-x11 just before the state.
+ 
+-	ldr		r4, [sp, #48]		// iv
+-	mov		r0, sp
+-	sub		sp, #80
+-
+-	// iv: x12-x15
+-	ldm		r4, {X12,X13,X14,X15}
+-	stmdb		r0!, {X12,X13,X14,X15}
++	add		X12, r3, #48
++	ldm		X12, {X12,X13,X14,X15}
++	push		{X12,X13,X14,X15}
++	sub		sp, sp, #64
+ 
+-	// key: x4-x11
+-	__ldrd		X8_X10, X9_X11, r3, 24
++	__ldrd		X8_X10, X9_X11, r3, 40
+ 	__strd		X8_X10, X9_X11, sp, 8
+-	stmdb		r0!, {X8_X10, X9_X11}
+-	ldm		r3, {X4-X9_X11}
+-	stmdb		r0!, {X4-X9_X11}
+-
+-	// constants: x0-x3
+-	adrl		X3, .Lexpand_32byte_k
+-	ldm		X3, {X0-X3}
++	__strd		X8_X10, X9_X11, sp, 56
++	ldm		r3, {X0-X9_X11}
+ 	__strd		X0, X1, sp, 16
+ 	__strd		X2, X3, sp, 24
++	__strd		X4, X5, sp, 32
++	__strd		X6, X7, sp, 40
++	__strd		X8_X10, X9_X11, sp, 48
+ 
++	beq		1f
+ 	_chacha		20
+ 
+-	add		sp, #76
++0:	add		sp, #76
+ 	pop		{r4-r11, pc}
+-ENDPROC(chacha20_arm)
++
++1:	_chacha		12
++	b		0b
++ENDPROC(chacha_doarm)
+ 
+ /*
+- * void hchacha20_arm(const u32 state[16], u32 out[8]);
++ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
+  */
+-ENTRY(hchacha20_arm)
++ENTRY(hchacha_block_arm)
+ 	push		{r1,r4-r11,lr}
+ 
++	cmp		r2, #12			// ChaCha12 ?
++
+ 	mov		r14, r0
+ 	ldmia		r14!, {r0-r11}		// load x0-x11
+ 	push		{r10-r11}		// store x10-x11 to stack
+ 	ldm		r14, {r10-r12,r14}	// load x12-x15
+ 	sub		sp, #8
+ 
++	beq		1f
+ 	_chacha_permute	20
+ 
+ 	// Skip over (unused0-unused1, x10-x11)
+-	add		sp, #16
++0:	add		sp, #16
+ 
+ 	// Fix up rotations of x12-x15
+ 	ror		X12, X12, #drot
+@@ -458,4 +454,7 @@ ENTRY(hchacha20_arm)
+ 	stm		r4, {X0,X1,X2,X3,X12,X13,X14,X15}
+ 
+ 	pop		{r4-r11,pc}
+-ENDPROC(hchacha20_arm)
++
++1:	_chacha_permute	12
++	b		0b
++ENDPROC(hchacha_block_arm)
+--- a/arch/arm64/crypto/chacha-neon-glue.c
++++ b/arch/arm64/crypto/chacha-neon-glue.c
+@@ -1,5 +1,5 @@
+ /*
+- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
++ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
+  * including ChaCha20 (RFC7539)
+  *
+  * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
author	Jason A. Donenfeld <Jason@zx2c4.com>	2021-02-19 14:29:04 +0100
committer	David Bauer <mail@david-bauer.net>	2021-02-26 20:41:01 +0100
commit	3888fa78802354ab7bbd19b7d061fd80a16ce06b (patch)
tree	2225a6313cb6482f0cb9c09df662a0d44197350e /target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch
parent	7d4143234c4dfdd050ebc64ec8231f9d81ea65af (diff)
download	upstream-3888fa78802354ab7bbd19b7d061fd80a16ce06b.tar.gz upstream-3888fa78802354ab7bbd19b7d061fd80a16ce06b.tar.bz2 upstream-3888fa78802354ab7bbd19b7d061fd80a16ce06b.zip