diff -Nru zfs-linux-0.8.3/debian/changelog zfs-linux-0.8.3/debian/changelog --- zfs-linux-0.8.3/debian/changelog 2020-04-14 09:14:33.000000000 +0000 +++ zfs-linux-0.8.3/debian/changelog 2023-11-02 14:49:36.000000000 +0000 @@ -1,3 +1,152 @@ +zfs-linux (0.8.3-1ubuntu12.16) focal-security; urgency=medium + + * SECURITY UPDATE: silent failure when parsing IPv6 restrictions + - debian/patches/CVE-2013-20001.patch: pass through ipv6 addresses in + bracket notation in lib/libshare/os/linux/nfs.c, man/man8/zfs.8, + tests/runfiles/linux.run, + tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile.am, + tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_007_neg.ksh, + tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_013_pos.ksh. + - CVE-2013-20001 + + -- Marc Deslauriers Thu, 02 Nov 2023 10:49:36 -0400 + +zfs-linux (0.8.3-1ubuntu12.15) focal; urgency=medium + + * Fix zfs_arc_max getting ignored when value below allmem/32 (LP: #1964992) + - d/p/4930-Dont-ignore-zfs_arc_max-below-allmem-32.patch + - d/p/4931-Restore-processing-for-arc-min-and-arc-max.patch + + -- Ghadi Elie Rahme Wed, 30 Nov 2022 15:46:58 +0000 + +zfs-linux (0.8.3-1ubuntu12.14) focal; urgency=medium + + * Fix zfs_get_data access to files causing panics (LP: #1946686) + - debian/patches/4920-Fix-zfs_get_data-access-to-files-with-wrong-generati.patch + backport from upstream ZFS commit 296a4a369bc1078a694f88570972330985b3b1b8 + ("Fix zfs_get_data access to files with wrong generation") + + -- Colin Ian King Tue, 12 Oct 2021 14:02:12 +0100 + +zfs-linux (0.8.3-1ubuntu12.13) focal; urgency=medium + + * Fix zfs receive -s when using ZFS 2.x kernel drivers (LP: #1939177) + - 4910-Fix-EIO-after-resuming-receive-of-new-dataset-over-a.patch + - 4911-compat-nullify-action-handle.patch + + -- Colin Ian King Mon, 16 Aug 2021 15:55:52 +0100 + +zfs-linux (0.8.3-1ubuntu12.12) focal; urgency=medium + + * Fix dependency loop preventing swap partitions from being mounted + correctly (LP: #1875577) + - d/p/4900-Fix-a-dependency-loop.patch + - d/p/4901-Fix-another-dependency-loop.patch + + -- Heitor Alves de Siqueira Mon, 12 Jul 2021 14:36:13 +0000 + +zfs-linux (0.8.3-1ubuntu12.11) focal; urgency=medium + + * Improve scrub testing in kernel-smoke-test-scrub (LP: #1934910) + On some slower small memory systems we need to check until resilvering + has completed. The most reliably method is to poll on a scrub until + it finishes successfully. Give up after ~15 minutes and flag a failure + if the scrub can't start or complete successfully. + + -- Colin Ian King Thu, 8 Jul 2021 08:51:21 +0100 + +zfs-linux (0.8.3-1ubuntu12.10) focal; urgency=medium + + * fix uio partial copies (LP: #1904589) + + -- Andrea Righi Thu, 10 Jun 2021 14:49:23 +0100 + +zfs-linux (0.8.3-1ubuntu12.9) focal; urgency=medium + + * No change rebuild in security pocket. LP: #1914279. + + -- Dimitri John Ledkov Thu, 29 Apr 2021 16:09:45 +0100 + +zfs-linux (0.8.3-1ubuntu12.8) focal; urgency=medium + + * Prevent build of the zfs-dkms binary package for kernels later than 5.4. + This is a re-working of the fix for bug #1902701 with the \ escaped + so that 5.10+ kernels get detected correctly (LP: #1919252) + + -- Colin Ian King Wed, 07 Apr 2021 13:44:14 +0100 + +zfs-linux (0.8.3-1ubuntu12.7) focal; urgency=medium + + * Fix race condition in zfs_iput_async (LP: #1916486) + - Upstream ZFS fix 43eaef6de817 ("Fix zrele race in zrele_async that can + cause hang") + + -- Heitor Alves de Siqueira Thu, 25 Feb 2021 19:48:51 +0000 + +zfs-linux (0.8.3-1ubuntu12.6) focal; urgency=medium + + [ Didier Roche ] + [ Jean-Baptiste Lallement ] + * Generate clone uuid without dd which is flagged as having an executable + stack. Thanks Usarin Heininga for the patch (LP: #1894329) + + [ Andrea Righi ] + * fix potential user-space double free when running "zfs mount -a" + (LP: #1902588) + - 4702-Revert-Let-zfs-mount-all-tolerate-in-progress-mounts.patch + + -- Colin Ian King Mon, 30 Nov 2020 19:00:00 +0000 + +zfs-linux (0.8.3-1ubuntu12.5) focal; urgency=medium + + * Prevent build of the zfs-dkms binary package for kernels later than 5.4. + If that is required, one should use the zfs-dkms package of a later series + (like it is done for built-in modules of Ubuntu kernels). (LP: #1902701) + + -- Stefan Bader Tue, 03 Nov 2020 18:05:38 +0100 + +zfs-linux (0.8.3-1ubuntu12.4) focal; urgency=medium + + * Fix zfs-dkms build on arm64 with PREEMPTION and BLK_CGROUP (LP: #1892001) + - 4700-Fix-DKMS-build-on-arm64-with-PREEMPTION-and-BLK_CGRO.patch + preempt_schedule_notrace is GPL-only so redfine it to preempt_schedule + on arm64 with PREEMPTION and BLK_CGROUP enabled to 'fix' the DKMS + build failure. + + -- Juerg Haefliger Tue, 18 Aug 2020 11:10:41 +0200 + +zfs-linux (0.8.3-1ubuntu12.3) focal; urgency=medium + + * Fix volume wait on locked encrypted zvols (LP: #1888405) + [ James Dingwall ] + - 4620-zfs-vol-wait-fix-locked-encrypted-vols.patch + zfs-volume-wait.service systemd unit does not start if the encrypted + zvol is locked. The /sbin/zvol_wait should not wait for links when the + volume has property keystatus=unavailable. Add a check for this. + + -- Colin Ian King Wed, 22 Jul 2020 09:58:22 +0100 + +zfs-linux (0.8.3-1ubuntu12.2) focal; urgency=medium + + * Don't report errors if modprobe fails (LP: #1880421) + - 4510-silently-ignore-modprobe-failure.patch + loading ZFS modules on zfs-utils installation is a nice + to have feature, but don't throw an error if modules are + not available to load + + -- Colin Ian King Mon, 6 Jul 2020 12:13:15 +0100 + +zfs-linux (0.8.3-1ubuntu12.1) focal; urgency=medium + + * Backport AES-GCM performance accelleration (LP: #1881107) + - backport of upstream zfs commit 31b160f0a6c673c8f926233af2ed6d5354808393 + ("ICP: Improve AES-GCM performance"). + tests on a memory backed pool show performance improvements of ~15-22% + for AES-CCM writes, ~17-20% AES-CCM reads, 34-36% AES-GCM writes and + ~79-80% AES-GCM reads. + + -- Colin Ian King Tue, 28 May 2020 11:54:33 +0100 + zfs-linux (0.8.3-1ubuntu12) focal; urgency=medium [ Jean-Baptiste Lallement ] diff -Nru zfs-linux-0.8.3/debian/patches/4000-zsys-support.patch zfs-linux-0.8.3/debian/patches/4000-zsys-support.patch --- zfs-linux-0.8.3/debian/patches/4000-zsys-support.patch 2020-04-02 10:35:17.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4000-zsys-support.patch 2020-11-30 19:00:00.000000000 +0000 @@ -165,7 +165,7 @@ + +uid() +{ -+ dd if=/dev/urandom of=/dev/stdout bs=1 count=100 2>/dev/null | tr -dc 'a-z0-9' | cut -c-6 ++ grep -a -m10 -E "\*" /dev/urandom 2>/dev/null | tr -dc 'a-z0-9' | cut -c-6 +} Index: zfs-linux-0.8.3/etc/systemd/system-generators/zfs-mount-generator.in =================================================================== diff -Nru zfs-linux-0.8.3/debian/patches/4510-silently-ignore-modprobe-failure.patch zfs-linux-0.8.3/debian/patches/4510-silently-ignore-modprobe-failure.patch --- zfs-linux-0.8.3/debian/patches/4510-silently-ignore-modprobe-failure.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4510-silently-ignore-modprobe-failure.patch 2020-07-06 11:10:38.000000000 +0000 @@ -0,0 +1,33 @@ +Description: Don't fail if zfs modules can't load on package installation + Ideally, modprobe should be attempted but not fatal (LP: #1880421) +Author: Colin Ian King +Origin: ubuntu +Forwarded: no +Last-Update: 2020-06-04 + +Index: zfs-linux-0.8.4/etc/systemd/system/zfs-load-module.service.in +=================================================================== +--- zfs-linux-0.8.4.orig/etc/systemd/system/zfs-load-module.service.in ++++ zfs-linux-0.8.4/etc/systemd/system/zfs-load-module.service.in +@@ -10,7 +10,7 @@ After=systemd-remount-fs.service + [Service] + Type=oneshot + RemainAfterExit=yes +-ExecStart=/sbin/modprobe zfs ++ExecStart=-/sbin/modprobe zfs + + [Install] + WantedBy=zfs-mount.service +Index: zfs-linux-0.8.4/etc/systemd/system/zfs-share.service.in +=================================================================== +--- zfs-linux-0.8.4.orig/etc/systemd/system/zfs-share.service.in ++++ zfs-linux-0.8.4/etc/systemd/system/zfs-share.service.in +@@ -13,7 +13,7 @@ PartOf=smb.service + Type=oneshot + RemainAfterExit=yes + ExecStartPre=-/bin/rm -f /etc/dfs/sharetab +-ExecStart=@sbindir@/zfs share -a ++ExecStart=-@sbindir@/zfs share -a + + [Install] + WantedBy=zfs.target diff -Nru zfs-linux-0.8.3/debian/patches/4610-ICP-Improve-AES-GCM-performance.patch zfs-linux-0.8.3/debian/patches/4610-ICP-Improve-AES-GCM-performance.patch --- zfs-linux-0.8.3/debian/patches/4610-ICP-Improve-AES-GCM-performance.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4610-ICP-Improve-AES-GCM-performance.patch 2020-05-28 10:53:02.000000000 +0000 @@ -0,0 +1,3115 @@ +From 31b160f0a6c673c8f926233af2ed6d5354808393 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Attila=20F=C3=BCl=C3=B6p?= +Date: Mon, 10 Feb 2020 21:59:50 +0100 +Subject: [PATCH] ICP: Improve AES-GCM performance +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 8bit + +Currently SIMD accelerated AES-GCM performance is limited by two +factors: + +a. The need to disable preemption and interrupts and save the FPU +state before using it and to do the reverse when done. Due to the +way the code is organized (see (b) below) we have to pay this price +twice for each 16 byte GCM block processed. + +b. Most processing is done in C, operating on single GCM blocks. +The use of SIMD instructions is limited to the AES encryption of the +counter block (AES-NI) and the Galois multiplication (PCLMULQDQ). +This leads to the FPU not being fully utilized for crypto +operations. + +To solve (a) we do crypto processing in larger chunks while owning +the FPU. An `icp_gcm_avx_chunk_size` module parameter was introduced +to make this chunk size tweakable. It defaults to 32 KiB. This step +alone roughly doubles performance. (b) is tackled by porting and +using the highly optimized openssl AES-GCM assembler routines, which +do all the processing (CTR, AES, GMULT) in a single routine. Both +steps together result in up to 32x reduction of the time spend in +the en/decryption routines, leading up to approximately 12x +throughput increase for large (128 KiB) blocks. + +Lastly, this commit changes the default encryption algorithm from +AES-CCM to AES-GCM when setting the `encryption=on` property. + +Reviewed-By: Brian Behlendorf +Reviewed-By: Jason King +Reviewed-By: Tom Caputi +Reviewed-By: Richard Laager +Signed-off-by: Attila Fülöp +Closes #9749 +Signed-off-by: Colin Ian King +--- + COPYRIGHT | 4 + + config/toolchain-simd.m4 | 21 + + include/linux/simd_x86.h | 13 + + include/sys/zio.h | 2 +- + lib/libicp/Makefile.am | 2 + + include/linux/simd.h | 15 +- + man/man8/zfsprops.8 | 2 +- + module/icp/Makefile.in | 9 + + module/icp/algs/modes/gcm.c | 746 ++++++++++++++- + .../modes/THIRDPARTYLICENSE.cryptogams | 36 + + .../THIRDPARTYLICENSE.cryptogams.descrip | 1 + + .../modes/THIRDPARTYLICENSE.openssl | 177 ++++ + .../modes/THIRDPARTYLICENSE.openssl.descrip | 1 + + .../icp/asm-x86_64/modes/aesni-gcm-x86_64.S | 892 ++++++++++++++++++ + module/icp/asm-x86_64/modes/ghash-x86_64.S | 714 ++++++++++++++ + module/icp/include/aes/aes_impl.h | 5 + + module/icp/include/modes/modes.h | 29 +- + .../zfs_create/zfs_create_crypt_combos.ksh | 2 +- + .../zpool_create_crypt_combos.ksh | 2 +- + .../functional/rsend/send_encrypted_props.ksh | 12 +- + 20 files changed, 2654 insertions(+), 31 deletions(-) + create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams + create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip + create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl + create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip + create mode 100644 module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S + create mode 100644 module/icp/asm-x86_64/modes/ghash-x86_64.S + +Index: zfs-linux-0.8.3/COPYRIGHT +=================================================================== +--- zfs-linux-0.8.3.orig/COPYRIGHT ++++ zfs-linux-0.8.3/COPYRIGHT +@@ -20,6 +20,10 @@ notable exceptions and their respective + * AES Implementation: module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl + * PBKDF2 Implementation: lib/libzfs/THIRDPARTYLICENSE.openssl + * SPL Implementation: module/spl/THIRDPARTYLICENSE.gplv2 ++ * GCM Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams ++ * GCM Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl ++ * GHASH Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams ++ * GHASH Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl + + This product includes software developed by the OpenSSL Project for use + in the OpenSSL Toolkit (http://www.openssl.org/) +Index: zfs-linux-0.8.3/config/toolchain-simd.m4 +=================================================================== +--- zfs-linux-0.8.3.orig/config/toolchain-simd.m4 ++++ zfs-linux-0.8.3/config/toolchain-simd.m4 +@@ -23,6 +23,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ ++ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE + ;; + esac + ]) +@@ -400,4 +401,24 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BU + ], [ + AC_MSG_RESULT([no]) + ]) ++]) ++ ++dnl # ++dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE ++dnl # ++AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [ ++ AC_MSG_CHECKING([whether host toolchain supports MOVBE]) ++ ++ AC_LINK_IFELSE([AC_LANG_SOURCE([ ++ [ ++ void main() ++ { ++ __asm__ __volatile__("movbe 0(%eax), %eax"); ++ } ++ ]])], [ ++ AC_MSG_RESULT([yes]) ++ AC_DEFINE([HAVE_MOVBE], 1, [Define if host toolchain supports MOVBE]) ++ ], [ ++ AC_MSG_RESULT([no]) ++ ]) + ]) +Index: zfs-linux-0.8.3/include/linux/simd_x86.h +=================================================================== +--- zfs-linux-0.8.3.orig/include/linux/simd_x86.h ++++ zfs-linux-0.8.3/include/linux/simd_x86.h +@@ -382,7 +382,8 @@ typedef enum cpuid_inst_sets { + AVX512ER, + AVX512VL, + AES, +- PCLMULQDQ ++ PCLMULQDQ, ++ MOVBE + } cpuid_inst_sets_t; + + /* +@@ -406,6 +407,7 @@ typedef struct cpuid_feature_desc { + #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */ + #define _AES_BIT (1U << 25) + #define _PCLMULQDQ_BIT (1U << 1) ++#define _MOVBE_BIT (1U << 22) + + /* + * Descriptions of supported instruction sets +@@ -433,6 +435,7 @@ static const cpuid_feature_desc_t cpuid_ + [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX }, + [AES] = {1U, 0U, _AES_BIT, ECX }, + [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX }, ++ [MOVBE] = {1U, 0U, _MOVBE_BIT, ECX }, + }; + + /* +@@ -505,6 +508,7 @@ CPUID_FEATURE_CHECK(avx512er, AVX512ER); + CPUID_FEATURE_CHECK(avx512vl, AVX512VL); + CPUID_FEATURE_CHECK(aes, AES); + CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ); ++CPUID_FEATURE_CHECK(movbe, MOVBE); + + #endif /* !defined(_KERNEL) */ + +@@ -719,6 +723,19 @@ zfs_pclmulqdq_available(void) + #endif + } + ++/* ++ * Check if MOVBE instruction is available ++ */ ++static inline boolean_t ++zfs_movbe_available(void) ++{ ++#if defined(X86_FEATURE_MOVBE) ++ return (!!boot_cpu_has(X86_FEATURE_MOVBE)); ++#else ++ return (B_FALSE); ++#endif ++} ++ + /* + * AVX-512 family of instruction sets: + * +Index: zfs-linux-0.8.3/include/sys/zio.h +=================================================================== +--- zfs-linux-0.8.3.orig/include/sys/zio.h ++++ zfs-linux-0.8.3/include/sys/zio.h +@@ -118,7 +118,7 @@ enum zio_encrypt { + ZIO_CRYPT_FUNCTIONS + }; + +-#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM ++#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_GCM + #define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF + + /* macros defining encryption lengths */ +Index: zfs-linux-0.8.3/lib/libicp/Makefile.am +=================================================================== +--- zfs-linux-0.8.3.orig/lib/libicp/Makefile.am ++++ zfs-linux-0.8.3/lib/libicp/Makefile.am +@@ -20,6 +20,8 @@ ASM_SOURCES_AS = \ + asm-x86_64/aes/aes_amd64.S \ + asm-x86_64/aes/aes_aesni.S \ + asm-x86_64/modes/gcm_pclmulqdq.S \ ++ asm-x86_64/modes/aesni-gcm-x86_64.S \ ++ asm-x86_64/modes/ghash-x86_64.S \ + asm-x86_64/sha1/sha1-x86_64.S \ + asm-x86_64/sha2/sha256_impl.S \ + asm-x86_64/sha2/sha512_impl.S +Index: zfs-linux-0.8.3/module/icp/Makefile.in +=================================================================== +--- zfs-linux-0.8.3.orig/module/icp/Makefile.in ++++ zfs-linux-0.8.3/module/icp/Makefile.in +@@ -69,9 +69,18 @@ $(MODULE)-objs += algs/skein/skein_iv.o + $(MODULE)-objs += $(ASM_SOURCES) + + $(MODULE)-$(CONFIG_X86) += algs/modes/gcm_pclmulqdq.o ++$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/aesni-gcm-x86_64.o ++$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/ghash-x86_64.o + $(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_aesni.o + $(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_x86-64.o + ++# Suppress objtool "can't find jump dest instruction at" warnings. They ++# are caused by the constants which are defined in the text section of the ++# assembly file using .byte instructions (e.g. bswap_mask). The objtool ++# utility tries to interpret them as opcodes and obviously fails doing so. ++OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y ++OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y ++ + ICP_DIRS = \ + api \ + core \ +Index: zfs-linux-0.8.3/module/icp/algs/modes/gcm.c +=================================================================== +--- zfs-linux-0.8.3.orig/module/icp/algs/modes/gcm.c ++++ zfs-linux-0.8.3/module/icp/algs/modes/gcm.c +@@ -30,12 +30,46 @@ + #include + #include + #include ++#ifdef CAN_USE_GCM_ASM ++#include ++#endif + + #define GHASH(c, d, t, o) \ + xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \ + (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \ + (uint64_t *)(void *)(t)); + ++/* Select GCM implementation */ ++#define IMPL_FASTEST (UINT32_MAX) ++#define IMPL_CYCLE (UINT32_MAX-1) ++#ifdef CAN_USE_GCM_ASM ++#define IMPL_AVX (UINT32_MAX-2) ++#endif ++#define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) ++static uint32_t icp_gcm_impl = IMPL_FASTEST; ++static uint32_t user_sel_impl = IMPL_FASTEST; ++ ++#ifdef CAN_USE_GCM_ASM ++/* ++ * Whether to use the optimized openssl gcm and ghash implementations. ++ * Set to true if module parameter icp_gcm_impl == "avx". ++ */ ++static boolean_t gcm_use_avx = B_FALSE; ++#define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) ++ ++static inline boolean_t gcm_avx_will_work(void); ++static inline void gcm_set_avx(boolean_t); ++static inline boolean_t gcm_toggle_avx(void); ++ ++static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, ++ crypto_data_t *, size_t); ++ ++static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); ++static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); ++static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *, ++ size_t, size_t); ++#endif /* ifdef CAN_USE_GCM_ASM */ ++ + /* + * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode + * is done in another function. +@@ -47,6 +81,12 @@ gcm_mode_encrypt_contiguous_blocks(gcm_c + void (*copy_block)(uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) + { ++#ifdef CAN_USE_GCM_ASM ++ if (ctx->gcm_use_avx == B_TRUE) ++ return (gcm_mode_encrypt_contiguous_blocks_avx( ++ ctx, data, length, out, block_size)); ++#endif ++ + const gcm_impl_ops_t *gops; + size_t remainder = length; + size_t need = 0; +@@ -109,6 +149,14 @@ gcm_mode_encrypt_contiguous_blocks(gcm_c + + ctx->gcm_processed_data_len += block_size; + ++ /* ++ * The following copies a complete GCM block back to where it ++ * came from if there was a remainder in the last call and out ++ * is NULL. That doesn't seem to make sense. So we assert this ++ * can't happen and leave the code in for reference. ++ * See https://github.com/zfsonlinux/zfs/issues/9661 ++ */ ++ ASSERT(out != NULL); + if (out == NULL) { + if (ctx->gcm_remainder_len > 0) { + bcopy(blockp, ctx->gcm_copy_to, +@@ -169,6 +217,11 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto + void (*copy_block)(uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) + { ++#ifdef CAN_USE_GCM_ASM ++ if (ctx->gcm_use_avx == B_TRUE) ++ return (gcm_encrypt_final_avx(ctx, out, block_size)); ++#endif ++ + const gcm_impl_ops_t *gops; + uint64_t counter_mask = ntohll(0x00000000ffffffffULL); + uint8_t *ghash, *macp = NULL; +@@ -321,6 +374,11 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto + int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) + { ++#ifdef CAN_USE_GCM_ASM ++ if (ctx->gcm_use_avx == B_TRUE) ++ return (gcm_decrypt_final_avx(ctx, out, block_size)); ++#endif ++ + const gcm_impl_ops_t *gops; + size_t pt_len; + size_t remainder; +@@ -526,6 +584,9 @@ gcm_init(gcm_ctx_t *ctx, unsigned char * + return (CRYPTO_SUCCESS); + } + ++/* ++ * Init the GCM context struct. Handle the cycle and avx implementations here. ++ */ + int + gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, + int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), +@@ -556,11 +617,37 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *p + return (CRYPTO_MECHANISM_PARAM_INVALID); + } + +- if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, +- gcm_param->pAAD, gcm_param->ulAADLen, block_size, +- encrypt_block, copy_block, xor_block) != 0) { +- rv = CRYPTO_MECHANISM_PARAM_INVALID; ++#ifdef CAN_USE_GCM_ASM ++ /* ++ * Handle the "cycle" implementation by creating avx and non avx ++ * contexts alternately. ++ */ ++ if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { ++ gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; ++ } else { ++ gcm_ctx->gcm_use_avx = gcm_toggle_avx(); + } ++ /* We don't handle byte swapped key schedules in the avx code path. */ ++ aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; ++ if (ks->ops->needs_byteswap == B_TRUE) { ++ gcm_ctx->gcm_use_avx = B_FALSE; ++ } ++ /* Avx and non avx context initialization differs from here on. */ ++ if (gcm_ctx->gcm_use_avx == B_FALSE) { ++#endif /* ifdef CAN_USE_GCM_ASM */ ++ if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, ++ gcm_param->pAAD, gcm_param->ulAADLen, block_size, ++ encrypt_block, copy_block, xor_block) != 0) { ++ rv = CRYPTO_MECHANISM_PARAM_INVALID; ++ } ++#ifdef CAN_USE_GCM_ASM ++ } else { ++ if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, ++ gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) { ++ rv = CRYPTO_MECHANISM_PARAM_INVALID; ++ } ++ } ++#endif /* ifdef CAN_USE_GCM_ASM */ + + return (rv); + } +@@ -590,11 +677,37 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char * + return (CRYPTO_MECHANISM_PARAM_INVALID); + } + +- if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, +- gmac_param->pAAD, gmac_param->ulAADLen, block_size, +- encrypt_block, copy_block, xor_block) != 0) { +- rv = CRYPTO_MECHANISM_PARAM_INVALID; ++#ifdef CAN_USE_GCM_ASM ++ /* ++ * Handle the "cycle" implementation by creating avx and non avx ++ * contexts alternately. ++ */ ++ if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { ++ gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; ++ } else { ++ gcm_ctx->gcm_use_avx = gcm_toggle_avx(); ++ } ++ /* We don't handle byte swapped key schedules in the avx code path. */ ++ aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; ++ if (ks->ops->needs_byteswap == B_TRUE) { ++ gcm_ctx->gcm_use_avx = B_FALSE; ++ } ++ /* Avx and non avx context initialization differs from here on. */ ++ if (gcm_ctx->gcm_use_avx == B_FALSE) { ++#endif /* ifdef CAN_USE_GCM_ASM */ ++ if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, ++ gmac_param->pAAD, gmac_param->ulAADLen, block_size, ++ encrypt_block, copy_block, xor_block) != 0) { ++ rv = CRYPTO_MECHANISM_PARAM_INVALID; ++ } ++#ifdef CAN_USE_GCM_ASM ++ } else { ++ if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, ++ gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) { ++ rv = CRYPTO_MECHANISM_PARAM_INVALID; ++ } + } ++#endif /* ifdef CAN_USE_GCM_ASM */ + + return (rv); + } +@@ -645,15 +758,6 @@ const gcm_impl_ops_t *gcm_all_impl[] = { + /* Indicate that benchmark has been completed */ + static boolean_t gcm_impl_initialized = B_FALSE; + +-/* Select GCM implementation */ +-#define IMPL_FASTEST (UINT32_MAX) +-#define IMPL_CYCLE (UINT32_MAX-1) +- +-#define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) +- +-static uint32_t icp_gcm_impl = IMPL_FASTEST; +-static uint32_t user_sel_impl = IMPL_FASTEST; +- + /* Hold all supported implementations */ + static size_t gcm_supp_impl_cnt = 0; + static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; +@@ -685,6 +789,16 @@ gcm_impl_get_ops() + size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt; + ops = gcm_supp_impl[idx]; + break; ++#ifdef CAN_USE_GCM_ASM ++ case IMPL_AVX: ++ /* ++ * Make sure that we return a valid implementation while ++ * switching to the avx implementation since there still ++ * may be unfinished non-avx contexts around. ++ */ ++ ops = &gcm_generic_impl; ++ break; ++#endif + default: + ASSERT3U(impl, <, gcm_supp_impl_cnt); + ASSERT3U(gcm_supp_impl_cnt, >, 0); +@@ -733,6 +847,16 @@ gcm_impl_init(void) + + strcpy(gcm_fastest_impl.name, "fastest"); + ++#ifdef CAN_USE_GCM_ASM ++ /* ++ * Use the avx implementation if it's available and the implementation ++ * hasn't changed from its default value of fastest on module load. ++ */ ++ if (gcm_avx_will_work() && ++ GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { ++ gcm_set_avx(B_TRUE); ++ } ++#endif + /* Finish initialization */ + atomic_swap_32(&icp_gcm_impl, user_sel_impl); + gcm_impl_initialized = B_TRUE; +@@ -744,6 +868,9 @@ static const struct { + } gcm_impl_opts[] = { + { "cycle", IMPL_CYCLE }, + { "fastest", IMPL_FASTEST }, ++#ifdef CAN_USE_GCM_ASM ++ { "avx", IMPL_AVX }, ++#endif + }; + + /* +@@ -777,6 +904,12 @@ gcm_impl_set(const char *val) + + /* Check mandatory options */ + for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { ++#ifdef CAN_USE_GCM_ASM ++ /* Ignore avx implementation if it won't work. */ ++ if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { ++ continue; ++ } ++#endif + if (strcmp(req_name, gcm_impl_opts[i].name) == 0) { + impl = gcm_impl_opts[i].sel; + err = 0; +@@ -795,6 +928,18 @@ gcm_impl_set(const char *val) + } + } + } ++#ifdef CAN_USE_GCM_ASM ++ /* ++ * Use the avx implementation if available and the requested one is ++ * avx or fastest. ++ */ ++ if (gcm_avx_will_work() == B_TRUE && ++ (impl == IMPL_AVX || impl == IMPL_FASTEST)) { ++ gcm_set_avx(B_TRUE); ++ } else { ++ gcm_set_avx(B_FALSE); ++ } ++#endif + + if (err == 0) { + if (gcm_impl_initialized) +@@ -826,6 +971,12 @@ icp_gcm_impl_get(char *buffer, zfs_kerne + + /* list mandatory options */ + for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { ++#ifdef CAN_USE_GCM_ASM ++ /* Ignore avx implementation if it won't work. */ ++ if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { ++ continue; ++ } ++#endif + fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s "; + cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name); + } +@@ -842,4 +993,563 @@ icp_gcm_impl_get(char *buffer, zfs_kerne + module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get, + NULL, 0644); + MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); +-#endif ++#endif /* defined(__KERNEL) */ ++ ++#ifdef CAN_USE_GCM_ASM ++#define GCM_BLOCK_LEN 16 ++/* ++ * The openssl asm routines are 6x aggregated and need that many bytes ++ * at minimum. ++ */ ++#define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6) ++#define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3) ++/* ++ * Ensure the chunk size is reasonable since we are allocating a ++ * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts. ++ */ ++#define GCM_AVX_MAX_CHUNK_SIZE \ ++ (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES) ++ ++/* Get the chunk size module parameter. */ ++#define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size ++ ++/* Clear the FPU registers since they hold sensitive internal state. */ ++#define clear_fpu_regs() clear_fpu_regs_avx() ++#define GHASH_AVX(ctx, in, len) \ ++ gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t (*)[2])(ctx)->gcm_Htable, \ ++ in, len) ++ ++#define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) ++ ++/* ++ * Module parameter: number of bytes to process at once while owning the FPU. ++ * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is ++ * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES. ++ */ ++static uint32_t gcm_avx_chunk_size = ++ ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; ++ ++extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); ++extern void clear_fpu_regs_avx(void); ++extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); ++extern void aes_encrypt_intel(const uint32_t rk[], int nr, ++ const uint32_t pt[4], uint32_t ct[4]); ++ ++extern void gcm_init_htab_avx(uint64_t Htable[16][2], const uint64_t H[2]); ++extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t Htable[16][2], ++ const uint8_t *in, size_t len); ++ ++extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, ++ const void *, uint64_t *, uint64_t *); ++ ++extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, ++ const void *, uint64_t *, uint64_t *); ++ ++static inline boolean_t ++gcm_avx_will_work(void) ++{ ++ /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */ ++ return (kfpu_allowed() && ++ zfs_avx_available() && zfs_movbe_available() && ++ zfs_aes_available() && zfs_pclmulqdq_available()); ++} ++ ++static inline void ++gcm_set_avx(boolean_t val) ++{ ++ if (gcm_avx_will_work() == B_TRUE) { ++ atomic_swap_32(&gcm_use_avx, val); ++ } ++} ++ ++static inline boolean_t ++gcm_toggle_avx(void) ++{ ++ if (gcm_avx_will_work() == B_TRUE) { ++ return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX)); ++ } else { ++ return (B_FALSE); ++ } ++} ++ ++/* ++ * Clear senssitve data in the context. ++ * ++ * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and ++ * ctx->gcm_Htable contain the hash sub key which protects authentication. ++ * ++ * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for ++ * a known plaintext attack, they consists of the IV and the first and last ++ * counter respectively. If they should be cleared is debatable. ++ */ ++static inline void ++gcm_clear_ctx(gcm_ctx_t *ctx) ++{ ++ bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder)); ++ bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); ++ bzero(ctx->gcm_Htable, sizeof (ctx->gcm_Htable)); ++ bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0)); ++ bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp)); ++} ++ ++/* Increment the GCM counter block by n. */ ++static inline void ++gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n) ++{ ++ uint64_t counter_mask = ntohll(0x00000000ffffffffULL); ++ uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask); ++ ++ counter = htonll(counter + n); ++ counter &= counter_mask; ++ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; ++} ++ ++/* ++ * Encrypt multiple blocks of data in GCM mode. ++ * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines ++ * if possible. While processing a chunk the FPU is "locked". ++ */ ++static int ++gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, ++ size_t length, crypto_data_t *out, size_t block_size) ++{ ++ size_t bleft = length; ++ size_t need = 0; ++ size_t done = 0; ++ uint8_t *datap = (uint8_t *)data; ++ size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; ++ const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); ++ uint64_t *ghash = ctx->gcm_ghash; ++ uint64_t *cb = ctx->gcm_cb; ++ uint8_t *ct_buf = NULL; ++ uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; ++ int rv = CRYPTO_SUCCESS; ++ ++ ASSERT(block_size == GCM_BLOCK_LEN); ++ /* ++ * If the last call left an incomplete block, try to fill ++ * it first. ++ */ ++ if (ctx->gcm_remainder_len > 0) { ++ need = block_size - ctx->gcm_remainder_len; ++ if (length < need) { ++ /* Accumulate bytes here and return. */ ++ bcopy(datap, (uint8_t *)ctx->gcm_remainder + ++ ctx->gcm_remainder_len, length); ++ ++ ctx->gcm_remainder_len += length; ++ if (ctx->gcm_copy_to == NULL) { ++ ctx->gcm_copy_to = datap; ++ } ++ return (CRYPTO_SUCCESS); ++ } else { ++ /* Complete incomplete block. */ ++ bcopy(datap, (uint8_t *)ctx->gcm_remainder + ++ ctx->gcm_remainder_len, need); ++ ++ ctx->gcm_copy_to = NULL; ++ } ++ } ++ ++ /* Allocate a buffer to encrypt to if there is enough input. */ ++ if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { ++ ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag); ++ if (ct_buf == NULL) { ++ return (CRYPTO_HOST_MEMORY); ++ } ++ } ++ ++ /* If we completed an incomplete block, encrypt and write it out. */ ++ if (ctx->gcm_remainder_len > 0) { ++ kfpu_begin(); ++ aes_encrypt_intel(key->encr_ks.ks32, key->nr, ++ (const uint32_t *)cb, (uint32_t *)tmp); ++ ++ gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp); ++ GHASH_AVX(ctx, tmp, block_size); ++ clear_fpu_regs(); ++ kfpu_end(); ++ /* ++ * We don't follow gcm_mode_encrypt_contiguous_blocks() here ++ * but assert that out is not null. ++ * See gcm_mode_encrypt_contiguous_blocks() above and ++ * https://github.com/zfsonlinux/zfs/issues/9661 ++ */ ++ ASSERT(out != NULL); ++ rv = crypto_put_output_data(tmp, out, block_size); ++ out->cd_offset += block_size; ++ gcm_incr_counter_block(ctx); ++ ctx->gcm_processed_data_len += block_size; ++ bleft -= need; ++ datap += need; ++ ctx->gcm_remainder_len = 0; ++ } ++ ++ /* Do the bulk encryption in chunk_size blocks. */ ++ for (; bleft >= chunk_size; bleft -= chunk_size) { ++ kfpu_begin(); ++ done = aesni_gcm_encrypt( ++ datap, ct_buf, chunk_size, key, cb, ghash); ++ ++ clear_fpu_regs(); ++ kfpu_end(); ++ if (done != chunk_size) { ++ rv = CRYPTO_FAILED; ++ goto out_nofpu; ++ } ++ if (out != NULL) { ++ rv = crypto_put_output_data(ct_buf, out, chunk_size); ++ if (rv != CRYPTO_SUCCESS) { ++ goto out_nofpu; ++ } ++ out->cd_offset += chunk_size; ++ } ++ datap += chunk_size; ++ ctx->gcm_processed_data_len += chunk_size; ++ } ++ /* Check if we are already done. */ ++ if (bleft == 0) { ++ goto out_nofpu; ++ } ++ /* Bulk encrypt the remaining data. */ ++ kfpu_begin(); ++ if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { ++ done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash); ++ if (done == 0) { ++ rv = CRYPTO_FAILED; ++ goto out; ++ } ++ if (out != NULL) { ++ rv = crypto_put_output_data(ct_buf, out, done); ++ if (rv != CRYPTO_SUCCESS) { ++ goto out; ++ } ++ out->cd_offset += done; ++ } ++ ctx->gcm_processed_data_len += done; ++ datap += done; ++ bleft -= done; ++ ++ } ++ /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */ ++ while (bleft > 0) { ++ if (bleft < block_size) { ++ bcopy(datap, ctx->gcm_remainder, bleft); ++ ctx->gcm_remainder_len = bleft; ++ ctx->gcm_copy_to = datap; ++ goto out; ++ } ++ /* Encrypt, hash and write out. */ ++ aes_encrypt_intel(key->encr_ks.ks32, key->nr, ++ (const uint32_t *)cb, (uint32_t *)tmp); ++ ++ gcm_xor_avx(datap, tmp); ++ GHASH_AVX(ctx, tmp, block_size); ++ if (out != NULL) { ++ rv = crypto_put_output_data(tmp, out, block_size); ++ if (rv != CRYPTO_SUCCESS) { ++ goto out; ++ } ++ out->cd_offset += block_size; ++ } ++ gcm_incr_counter_block(ctx); ++ ctx->gcm_processed_data_len += block_size; ++ datap += block_size; ++ bleft -= block_size; ++ } ++out: ++ clear_fpu_regs(); ++ kfpu_end(); ++out_nofpu: ++ if (ct_buf != NULL) { ++ vmem_free(ct_buf, chunk_size); ++ } ++ return (rv); ++} ++ ++/* ++ * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual ++ * incomplete last block. Encrypt the ICB. Calculate the tag and write it out. ++ */ ++static int ++gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) ++{ ++ uint8_t *ghash = (uint8_t *)ctx->gcm_ghash; ++ uint32_t *J0 = (uint32_t *)ctx->gcm_J0; ++ uint8_t *remainder = (uint8_t *)ctx->gcm_remainder; ++ size_t rem_len = ctx->gcm_remainder_len; ++ const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; ++ int aes_rounds = ((aes_key_t *)keysched)->nr; ++ int rv; ++ ++ ASSERT(block_size == GCM_BLOCK_LEN); ++ ++ if (out->cd_length < (rem_len + ctx->gcm_tag_len)) { ++ return (CRYPTO_DATA_LEN_RANGE); ++ } ++ ++ kfpu_begin(); ++ /* Pad last incomplete block with zeros, encrypt and hash. */ ++ if (rem_len > 0) { ++ uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; ++ const uint32_t *cb = (uint32_t *)ctx->gcm_cb; ++ ++ aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp); ++ bzero(remainder + rem_len, block_size - rem_len); ++ for (int i = 0; i < rem_len; i++) { ++ remainder[i] ^= tmp[i]; ++ } ++ GHASH_AVX(ctx, remainder, block_size); ++ ctx->gcm_processed_data_len += rem_len; ++ /* No need to increment counter_block, it's the last block. */ ++ } ++ /* Finish tag. */ ++ ctx->gcm_len_a_len_c[1] = ++ htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); ++ GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size); ++ aes_encrypt_intel(keysched, aes_rounds, J0, J0); ++ ++ gcm_xor_avx((uint8_t *)J0, ghash); ++ clear_fpu_regs(); ++ kfpu_end(); ++ ++ /* Output remainder. */ ++ if (rem_len > 0) { ++ rv = crypto_put_output_data(remainder, out, rem_len); ++ if (rv != CRYPTO_SUCCESS) ++ return (rv); ++ } ++ out->cd_offset += rem_len; ++ ctx->gcm_remainder_len = 0; ++ rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); ++ if (rv != CRYPTO_SUCCESS) ++ return (rv); ++ ++ out->cd_offset += ctx->gcm_tag_len; ++ /* Clear sensitive data in the context before returning. */ ++ gcm_clear_ctx(ctx); ++ return (CRYPTO_SUCCESS); ++} ++ ++/* ++ * Finalize decryption: We just have accumulated crypto text, so now we ++ * decrypt it here inplace. ++ */ ++static int ++gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) ++{ ++ ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len); ++ ASSERT3U(block_size, ==, 16); ++ ++ size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; ++ size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; ++ uint8_t *datap = ctx->gcm_pt_buf; ++ const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); ++ uint32_t *cb = (uint32_t *)ctx->gcm_cb; ++ uint64_t *ghash = ctx->gcm_ghash; ++ uint32_t *tmp = (uint32_t *)ctx->gcm_tmp; ++ int rv = CRYPTO_SUCCESS; ++ size_t bleft, done; ++ ++ /* ++ * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be ++ * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of ++ * GCM_AVX_MIN_DECRYPT_BYTES. ++ */ ++ for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) { ++ kfpu_begin(); ++ done = aesni_gcm_decrypt(datap, datap, chunk_size, ++ (const void *)key, ctx->gcm_cb, ghash); ++ clear_fpu_regs(); ++ kfpu_end(); ++ if (done != chunk_size) { ++ return (CRYPTO_FAILED); ++ } ++ datap += done; ++ } ++ /* Decrypt remainder, which is less then chunk size, in one go. */ ++ kfpu_begin(); ++ if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { ++ done = aesni_gcm_decrypt(datap, datap, bleft, ++ (const void *)key, ctx->gcm_cb, ghash); ++ if (done == 0) { ++ clear_fpu_regs(); ++ kfpu_end(); ++ return (CRYPTO_FAILED); ++ } ++ datap += done; ++ bleft -= done; ++ } ++ ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES); ++ ++ /* ++ * Now less then GCM_AVX_MIN_DECRYPT_BYTES bytes remain, ++ * decrypt them block by block. ++ */ ++ while (bleft > 0) { ++ /* Incomplete last block. */ ++ if (bleft < block_size) { ++ uint8_t *lastb = (uint8_t *)ctx->gcm_remainder; ++ ++ bzero(lastb, block_size); ++ bcopy(datap, lastb, bleft); ++ /* The GCM processing. */ ++ GHASH_AVX(ctx, lastb, block_size); ++ aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); ++ for (size_t i = 0; i < bleft; i++) { ++ datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i]; ++ } ++ break; ++ } ++ /* The GCM processing. */ ++ GHASH_AVX(ctx, datap, block_size); ++ aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); ++ gcm_xor_avx((uint8_t *)tmp, datap); ++ gcm_incr_counter_block(ctx); ++ ++ datap += block_size; ++ bleft -= block_size; ++ } ++ if (rv != CRYPTO_SUCCESS) { ++ clear_fpu_regs(); ++ kfpu_end(); ++ return (rv); ++ } ++ /* Decryption done, finish the tag. */ ++ ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); ++ GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size); ++ aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0, ++ (uint32_t *)ctx->gcm_J0); ++ ++ gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash); ++ ++ /* We are done with the FPU, restore its state. */ ++ clear_fpu_regs(); ++ kfpu_end(); ++ ++ /* Compare the input authentication tag with what we calculated. */ ++ if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { ++ /* They don't match. */ ++ return (CRYPTO_INVALID_MAC); ++ } ++ rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); ++ if (rv != CRYPTO_SUCCESS) { ++ return (rv); ++ } ++ out->cd_offset += pt_len; ++ gcm_clear_ctx(ctx); ++ return (CRYPTO_SUCCESS); ++} ++ ++/* ++ * Initialize the GCM params H, Htabtle and the counter block. Save the ++ * initial counter block. ++ */ ++static int ++gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, ++ unsigned char *auth_data, size_t auth_data_len, size_t block_size) ++{ ++ uint8_t *cb = (uint8_t *)ctx->gcm_cb; ++ uint64_t *H = ctx->gcm_H; ++ const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; ++ int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr; ++ uint8_t *datap = auth_data; ++ size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; ++ size_t bleft; ++ ++ ASSERT(block_size == GCM_BLOCK_LEN); ++ ++ /* Init H (encrypt zero block) and create the initial counter block. */ ++ bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash)); ++ bzero(H, sizeof (ctx->gcm_H)); ++ kfpu_begin(); ++ aes_encrypt_intel(keysched, aes_rounds, ++ (const uint32_t *)H, (uint32_t *)H); ++ ++ gcm_init_htab_avx(ctx->gcm_Htable, H); ++ ++ if (iv_len == 12) { ++ bcopy(iv, cb, 12); ++ cb[12] = 0; ++ cb[13] = 0; ++ cb[14] = 0; ++ cb[15] = 1; ++ /* We need the ICB later. */ ++ bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0)); ++ } else { ++ /* ++ * Most consumers use 12 byte IVs, so it's OK to use the ++ * original routines for other IV sizes, just avoid nesting ++ * kfpu_begin calls. ++ */ ++ clear_fpu_regs(); ++ kfpu_end(); ++ gcm_format_initial_blocks(iv, iv_len, ctx, block_size, ++ aes_copy_block, aes_xor_block); ++ kfpu_begin(); ++ } ++ ++ /* Openssl post increments the counter, adjust for that. */ ++ gcm_incr_counter_block(ctx); ++ ++ /* Ghash AAD in chunk_size blocks. */ ++ for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) { ++ GHASH_AVX(ctx, datap, chunk_size); ++ datap += chunk_size; ++ clear_fpu_regs(); ++ kfpu_end(); ++ kfpu_begin(); ++ } ++ /* Ghash the remainder and handle possible incomplete GCM block. */ ++ if (bleft > 0) { ++ size_t incomp = bleft % block_size; ++ ++ bleft -= incomp; ++ if (bleft > 0) { ++ GHASH_AVX(ctx, datap, bleft); ++ datap += bleft; ++ } ++ if (incomp > 0) { ++ /* Zero pad and hash incomplete last block. */ ++ uint8_t *authp = (uint8_t *)ctx->gcm_tmp; ++ ++ bzero(authp, block_size); ++ bcopy(datap, authp, incomp); ++ GHASH_AVX(ctx, authp, block_size); ++ } ++ } ++ clear_fpu_regs(); ++ kfpu_end(); ++ return (CRYPTO_SUCCESS); ++} ++ ++#if defined(_KERNEL) ++static int ++icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp) ++{ ++ unsigned long val; ++ char val_rounded[16]; ++ int error = 0; ++ ++ error = kstrtoul(buf, 0, &val); ++ if (error) ++ return (error); ++ ++ val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; ++ ++ if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE) ++ return (-EINVAL); ++ ++ snprintf(val_rounded, 16, "%u", (uint32_t)val); ++ error = param_set_uint(val_rounded, kp); ++ return (error); ++} ++ ++module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size, ++ param_get_uint, &gcm_avx_chunk_size, 0644); ++ ++MODULE_PARM_DESC(icp_gcm_avx_chunk_size, ++ "How many bytes to process while owning the FPU"); ++ ++#endif /* defined(__KERNEL) */ ++#endif /* ifdef CAN_USE_GCM_ASM */ +Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams +=================================================================== +--- /dev/null ++++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams +@@ -0,0 +1,36 @@ ++Copyright (c) 2006-2017, CRYPTOGAMS by ++All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions ++are met: ++ ++ * Redistributions of source code must retain copyright notices, ++ this list of conditions and the following disclaimer. ++ ++ * Redistributions in binary form must reproduce the above ++ copyright notice, this list of conditions and the following ++ disclaimer in the documentation and/or other materials ++ provided with the distribution. ++ ++ * Neither the name of the CRYPTOGAMS nor the names of its ++ copyright holder and contributors may be used to endorse or ++ promote products derived from this software without specific ++ prior written permission. ++ ++ALTERNATIVELY, provided that this notice is retained in full, this ++product may be distributed under the terms of the GNU General Public ++License (GPL), in which case the provisions of the GPL apply INSTEAD OF ++those given above. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip +=================================================================== +--- /dev/null ++++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip +@@ -0,0 +1 @@ ++PORTIONS OF GCM and GHASH FUNCTIONALITY +Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl +=================================================================== +--- /dev/null ++++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl +@@ -0,0 +1,177 @@ ++ ++ Apache License ++ Version 2.0, January 2004 ++ https://www.apache.org/licenses/ ++ ++ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION ++ ++ 1. Definitions. ++ ++ "License" shall mean the terms and conditions for use, reproduction, ++ and distribution as defined by Sections 1 through 9 of this document. ++ ++ "Licensor" shall mean the copyright owner or entity authorized by ++ the copyright owner that is granting the License. ++ ++ "Legal Entity" shall mean the union of the acting entity and all ++ other entities that control, are controlled by, or are under common ++ control with that entity. For the purposes of this definition, ++ "control" means (i) the power, direct or indirect, to cause the ++ direction or management of such entity, whether by contract or ++ otherwise, or (ii) ownership of fifty percent (50%) or more of the ++ outstanding shares, or (iii) beneficial ownership of such entity. ++ ++ "You" (or "Your") shall mean an individual or Legal Entity ++ exercising permissions granted by this License. ++ ++ "Source" form shall mean the preferred form for making modifications, ++ including but not limited to software source code, documentation ++ source, and configuration files. ++ ++ "Object" form shall mean any form resulting from mechanical ++ transformation or translation of a Source form, including but ++ not limited to compiled object code, generated documentation, ++ and conversions to other media types. ++ ++ "Work" shall mean the work of authorship, whether in Source or ++ Object form, made available under the License, as indicated by a ++ copyright notice that is included in or attached to the work ++ (an example is provided in the Appendix below). ++ ++ "Derivative Works" shall mean any work, whether in Source or Object ++ form, that is based on (or derived from) the Work and for which the ++ editorial revisions, annotations, elaborations, or other modifications ++ represent, as a whole, an original work of authorship. For the purposes ++ of this License, Derivative Works shall not include works that remain ++ separable from, or merely link (or bind by name) to the interfaces of, ++ the Work and Derivative Works thereof. ++ ++ "Contribution" shall mean any work of authorship, including ++ the original version of the Work and any modifications or additions ++ to that Work or Derivative Works thereof, that is intentionally ++ submitted to Licensor for inclusion in the Work by the copyright owner ++ or by an individual or Legal Entity authorized to submit on behalf of ++ the copyright owner. For the purposes of this definition, "submitted" ++ means any form of electronic, verbal, or written communication sent ++ to the Licensor or its representatives, including but not limited to ++ communication on electronic mailing lists, source code control systems, ++ and issue tracking systems that are managed by, or on behalf of, the ++ Licensor for the purpose of discussing and improving the Work, but ++ excluding communication that is conspicuously marked or otherwise ++ designated in writing by the copyright owner as "Not a Contribution." ++ ++ "Contributor" shall mean Licensor and any individual or Legal Entity ++ on behalf of whom a Contribution has been received by Licensor and ++ subsequently incorporated within the Work. ++ ++ 2. Grant of Copyright License. Subject to the terms and conditions of ++ this License, each Contributor hereby grants to You a perpetual, ++ worldwide, non-exclusive, no-charge, royalty-free, irrevocable ++ copyright license to reproduce, prepare Derivative Works of, ++ publicly display, publicly perform, sublicense, and distribute the ++ Work and such Derivative Works in Source or Object form. ++ ++ 3. Grant of Patent License. Subject to the terms and conditions of ++ this License, each Contributor hereby grants to You a perpetual, ++ worldwide, non-exclusive, no-charge, royalty-free, irrevocable ++ (except as stated in this section) patent license to make, have made, ++ use, offer to sell, sell, import, and otherwise transfer the Work, ++ where such license applies only to those patent claims licensable ++ by such Contributor that are necessarily infringed by their ++ Contribution(s) alone or by combination of their Contribution(s) ++ with the Work to which such Contribution(s) was submitted. If You ++ institute patent litigation against any entity (including a ++ cross-claim or counterclaim in a lawsuit) alleging that the Work ++ or a Contribution incorporated within the Work constitutes direct ++ or contributory patent infringement, then any patent licenses ++ granted to You under this License for that Work shall terminate ++ as of the date such litigation is filed. ++ ++ 4. Redistribution. You may reproduce and distribute copies of the ++ Work or Derivative Works thereof in any medium, with or without ++ modifications, and in Source or Object form, provided that You ++ meet the following conditions: ++ ++ (a) You must give any other recipients of the Work or ++ Derivative Works a copy of this License; and ++ ++ (b) You must cause any modified files to carry prominent notices ++ stating that You changed the files; and ++ ++ (c) You must retain, in the Source form of any Derivative Works ++ that You distribute, all copyright, patent, trademark, and ++ attribution notices from the Source form of the Work, ++ excluding those notices that do not pertain to any part of ++ the Derivative Works; and ++ ++ (d) If the Work includes a "NOTICE" text file as part of its ++ distribution, then any Derivative Works that You distribute must ++ include a readable copy of the attribution notices contained ++ within such NOTICE file, excluding those notices that do not ++ pertain to any part of the Derivative Works, in at least one ++ of the following places: within a NOTICE text file distributed ++ as part of the Derivative Works; within the Source form or ++ documentation, if provided along with the Derivative Works; or, ++ within a display generated by the Derivative Works, if and ++ wherever such third-party notices normally appear. The contents ++ of the NOTICE file are for informational purposes only and ++ do not modify the License. You may add Your own attribution ++ notices within Derivative Works that You distribute, alongside ++ or as an addendum to the NOTICE text from the Work, provided ++ that such additional attribution notices cannot be construed ++ as modifying the License. ++ ++ You may add Your own copyright statement to Your modifications and ++ may provide additional or different license terms and conditions ++ for use, reproduction, or distribution of Your modifications, or ++ for any such Derivative Works as a whole, provided Your use, ++ reproduction, and distribution of the Work otherwise complies with ++ the conditions stated in this License. ++ ++ 5. Submission of Contributions. Unless You explicitly state otherwise, ++ any Contribution intentionally submitted for inclusion in the Work ++ by You to the Licensor shall be under the terms and conditions of ++ this License, without any additional terms or conditions. ++ Notwithstanding the above, nothing herein shall supersede or modify ++ the terms of any separate license agreement you may have executed ++ with Licensor regarding such Contributions. ++ ++ 6. Trademarks. This License does not grant permission to use the trade ++ names, trademarks, service marks, or product names of the Licensor, ++ except as required for reasonable and customary use in describing the ++ origin of the Work and reproducing the content of the NOTICE file. ++ ++ 7. Disclaimer of Warranty. Unless required by applicable law or ++ agreed to in writing, Licensor provides the Work (and each ++ Contributor provides its Contributions) on an "AS IS" BASIS, ++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or ++ implied, including, without limitation, any warranties or conditions ++ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A ++ PARTICULAR PURPOSE. You are solely responsible for determining the ++ appropriateness of using or redistributing the Work and assume any ++ risks associated with Your exercise of permissions under this License. ++ ++ 8. Limitation of Liability. In no event and under no legal theory, ++ whether in tort (including negligence), contract, or otherwise, ++ unless required by applicable law (such as deliberate and grossly ++ negligent acts) or agreed to in writing, shall any Contributor be ++ liable to You for damages, including any direct, indirect, special, ++ incidental, or consequential damages of any character arising as a ++ result of this License or out of the use or inability to use the ++ Work (including but not limited to damages for loss of goodwill, ++ work stoppage, computer failure or malfunction, or any and all ++ other commercial damages or losses), even if such Contributor ++ has been advised of the possibility of such damages. ++ ++ 9. Accepting Warranty or Additional Liability. While redistributing ++ the Work or Derivative Works thereof, You may choose to offer, ++ and charge a fee for, acceptance of support, warranty, indemnity, ++ or other liability obligations and/or rights consistent with this ++ License. However, in accepting such obligations, You may act only ++ on Your own behalf and on Your sole responsibility, not on behalf ++ of any other Contributor, and only if You agree to indemnify, ++ defend, and hold each Contributor harmless for any liability ++ incurred by, or claims asserted against, such Contributor by reason ++ of your accepting any such warranty or additional liability. ++ ++ END OF TERMS AND CONDITIONS +Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip +=================================================================== +--- /dev/null ++++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip +@@ -0,0 +1 @@ ++PORTIONS OF GCM and GHASH FUNCTIONALITY +Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S +=================================================================== +--- /dev/null ++++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S +@@ -0,0 +1,892 @@ ++# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the Apache License 2.0 (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++# ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# ++# AES-NI-CTR+GHASH stitch. ++# ++# February 2013 ++# ++# OpenSSL GCM implementation is organized in such way that its ++# performance is rather close to the sum of its streamed components, ++# in the context parallelized AES-NI CTR and modulo-scheduled ++# PCLMULQDQ-enabled GHASH. Unfortunately, as no stitch implementation ++# was observed to perform significantly better than the sum of the ++# components on contemporary CPUs, the effort was deemed impossible to ++# justify. This module is based on combination of Intel submissions, ++# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max ++# Locktyukhin of Intel Corp. who verified that it reduces shuffles ++# pressure with notable relative improvement, achieving 1.0 cycle per ++# byte processed with 128-bit key on Haswell processor, 0.74 - on ++# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled ++# measurements for favourable packet size, one divisible by 96. ++# Applications using the EVP interface will observe a few percent ++# worse performance.] ++# ++# Knights Landing processes 1 byte in 1.25 cycles (measured with EVP). ++# ++# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest ++# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf ++ ++# Generated once from ++# https://github.com/openssl/openssl/blob/5ffc3324/crypto/modes/asm/aesni-gcm-x86_64.pl ++# and modified for ICP. Modification are kept at a bare minimum to ease later ++# upstream merges. ++ ++#if defined(__x86_64__) && defined(HAVE_AVX) && \ ++ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) && defined(HAVE_MOVBE) ++ ++.text ++ ++.type _aesni_ctr32_ghash_6x,@function ++.align 32 ++_aesni_ctr32_ghash_6x: ++ vmovdqu 32(%r11),%xmm2 ++ subq $6,%rdx ++ vpxor %xmm4,%xmm4,%xmm4 ++ vmovdqu 0-128(%rcx),%xmm15 ++ vpaddb %xmm2,%xmm1,%xmm10 ++ vpaddb %xmm2,%xmm10,%xmm11 ++ vpaddb %xmm2,%xmm11,%xmm12 ++ vpaddb %xmm2,%xmm12,%xmm13 ++ vpaddb %xmm2,%xmm13,%xmm14 ++ vpxor %xmm15,%xmm1,%xmm9 ++ vmovdqu %xmm4,16+8(%rsp) ++ jmp .Loop6x ++ ++.align 32 ++.Loop6x: ++ addl $100663296,%ebx ++ jc .Lhandle_ctr32 ++ vmovdqu 0-32(%r9),%xmm3 ++ vpaddb %xmm2,%xmm14,%xmm1 ++ vpxor %xmm15,%xmm10,%xmm10 ++ vpxor %xmm15,%xmm11,%xmm11 ++ ++.Lresume_ctr32: ++ vmovdqu %xmm1,(%r8) ++ vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 ++ vpxor %xmm15,%xmm12,%xmm12 ++ vmovups 16-128(%rcx),%xmm2 ++ vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 ++ xorq %r12,%r12 ++ cmpq %r14,%r15 ++ ++ vaesenc %xmm2,%xmm9,%xmm9 ++ vmovdqu 48+8(%rsp),%xmm0 ++ vpxor %xmm15,%xmm13,%xmm13 ++ vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 ++ vaesenc %xmm2,%xmm10,%xmm10 ++ vpxor %xmm15,%xmm14,%xmm14 ++ setnc %r12b ++ vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vmovdqu 16-32(%r9),%xmm3 ++ negq %r12 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ vpxor %xmm5,%xmm6,%xmm6 ++ vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 ++ vpxor %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm2,%xmm13,%xmm13 ++ vpxor %xmm5,%xmm1,%xmm4 ++ andq $0x60,%r12 ++ vmovups 32-128(%rcx),%xmm15 ++ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 ++ vaesenc %xmm2,%xmm14,%xmm14 ++ ++ vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 ++ leaq (%r14,%r12,1),%r14 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor 16+8(%rsp),%xmm8,%xmm8 ++ vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 ++ vmovdqu 64+8(%rsp),%xmm0 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ movbeq 88(%r14),%r13 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ movbeq 80(%r14),%r12 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ movq %r13,32+8(%rsp) ++ vaesenc %xmm15,%xmm13,%xmm13 ++ movq %r12,40+8(%rsp) ++ vmovdqu 48-32(%r9),%xmm5 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vmovups 48-128(%rcx),%xmm15 ++ vpxor %xmm1,%xmm6,%xmm6 ++ vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor %xmm2,%xmm6,%xmm6 ++ vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vpxor %xmm3,%xmm7,%xmm7 ++ vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 ++ vmovdqu 80+8(%rsp),%xmm0 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vmovdqu 64-32(%r9),%xmm1 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vmovups 64-128(%rcx),%xmm15 ++ vpxor %xmm2,%xmm6,%xmm6 ++ vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor %xmm3,%xmm6,%xmm6 ++ vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ movbeq 72(%r14),%r13 ++ vpxor %xmm5,%xmm7,%xmm7 ++ vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ movbeq 64(%r14),%r12 ++ vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 ++ vmovdqu 96+8(%rsp),%xmm0 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ movq %r13,48+8(%rsp) ++ vaesenc %xmm15,%xmm13,%xmm13 ++ movq %r12,56+8(%rsp) ++ vpxor %xmm2,%xmm4,%xmm4 ++ vmovdqu 96-32(%r9),%xmm2 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vmovups 80-128(%rcx),%xmm15 ++ vpxor %xmm3,%xmm6,%xmm6 ++ vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor %xmm5,%xmm6,%xmm6 ++ vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ movbeq 56(%r14),%r13 ++ vpxor %xmm1,%xmm7,%xmm7 ++ vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 ++ vpxor 112+8(%rsp),%xmm8,%xmm8 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ movbeq 48(%r14),%r12 ++ vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ movq %r13,64+8(%rsp) ++ vaesenc %xmm15,%xmm13,%xmm13 ++ movq %r12,72+8(%rsp) ++ vpxor %xmm3,%xmm4,%xmm4 ++ vmovdqu 112-32(%r9),%xmm3 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vmovups 96-128(%rcx),%xmm15 ++ vpxor %xmm5,%xmm6,%xmm6 ++ vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor %xmm1,%xmm6,%xmm6 ++ vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ movbeq 40(%r14),%r13 ++ vpxor %xmm2,%xmm7,%xmm7 ++ vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ movbeq 32(%r14),%r12 ++ vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ movq %r13,80+8(%rsp) ++ vaesenc %xmm15,%xmm13,%xmm13 ++ movq %r12,88+8(%rsp) ++ vpxor %xmm5,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ vpxor %xmm1,%xmm6,%xmm6 ++ ++ vmovups 112-128(%rcx),%xmm15 ++ vpslldq $8,%xmm6,%xmm5 ++ vpxor %xmm2,%xmm4,%xmm4 ++ vmovdqu 16(%r11),%xmm3 ++ ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor %xmm8,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vpxor %xmm5,%xmm4,%xmm4 ++ movbeq 24(%r14),%r13 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ movbeq 16(%r14),%r12 ++ vpalignr $8,%xmm4,%xmm4,%xmm0 ++ vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 ++ movq %r13,96+8(%rsp) ++ vaesenc %xmm15,%xmm12,%xmm12 ++ movq %r12,104+8(%rsp) ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vmovups 128-128(%rcx),%xmm1 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vaesenc %xmm1,%xmm9,%xmm9 ++ vmovups 144-128(%rcx),%xmm15 ++ vaesenc %xmm1,%xmm10,%xmm10 ++ vpsrldq $8,%xmm6,%xmm6 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vpxor %xmm6,%xmm7,%xmm7 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vpxor %xmm0,%xmm4,%xmm4 ++ movbeq 8(%r14),%r13 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ movbeq 0(%r14),%r12 ++ vaesenc %xmm1,%xmm14,%xmm14 ++ vmovups 160-128(%rcx),%xmm1 ++ cmpl $12,%ebp // ICP uses 10,12,14 not 9,11,13 for rounds. ++ jb .Lenc_tail ++ ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vaesenc %xmm1,%xmm9,%xmm9 ++ vaesenc %xmm1,%xmm10,%xmm10 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ vmovups 176-128(%rcx),%xmm15 ++ vaesenc %xmm1,%xmm14,%xmm14 ++ vmovups 192-128(%rcx),%xmm1 ++ cmpl $14,%ebp // ICP does not zero key schedule. ++ jb .Lenc_tail ++ ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vaesenc %xmm1,%xmm9,%xmm9 ++ vaesenc %xmm1,%xmm10,%xmm10 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ vmovups 208-128(%rcx),%xmm15 ++ vaesenc %xmm1,%xmm14,%xmm14 ++ vmovups 224-128(%rcx),%xmm1 ++ jmp .Lenc_tail ++ ++.align 32 ++.Lhandle_ctr32: ++ vmovdqu (%r11),%xmm0 ++ vpshufb %xmm0,%xmm1,%xmm6 ++ vmovdqu 48(%r11),%xmm5 ++ vpaddd 64(%r11),%xmm6,%xmm10 ++ vpaddd %xmm5,%xmm6,%xmm11 ++ vmovdqu 0-32(%r9),%xmm3 ++ vpaddd %xmm5,%xmm10,%xmm12 ++ vpshufb %xmm0,%xmm10,%xmm10 ++ vpaddd %xmm5,%xmm11,%xmm13 ++ vpshufb %xmm0,%xmm11,%xmm11 ++ vpxor %xmm15,%xmm10,%xmm10 ++ vpaddd %xmm5,%xmm12,%xmm14 ++ vpshufb %xmm0,%xmm12,%xmm12 ++ vpxor %xmm15,%xmm11,%xmm11 ++ vpaddd %xmm5,%xmm13,%xmm1 ++ vpshufb %xmm0,%xmm13,%xmm13 ++ vpshufb %xmm0,%xmm14,%xmm14 ++ vpshufb %xmm0,%xmm1,%xmm1 ++ jmp .Lresume_ctr32 ++ ++.align 32 ++.Lenc_tail: ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vmovdqu %xmm7,16+8(%rsp) ++ vpalignr $8,%xmm4,%xmm4,%xmm8 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 ++ vpxor 0(%rdi),%xmm1,%xmm2 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vpxor 16(%rdi),%xmm1,%xmm0 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vpxor 32(%rdi),%xmm1,%xmm5 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vpxor 48(%rdi),%xmm1,%xmm6 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ vpxor 64(%rdi),%xmm1,%xmm7 ++ vpxor 80(%rdi),%xmm1,%xmm3 ++ vmovdqu (%r8),%xmm1 ++ ++ vaesenclast %xmm2,%xmm9,%xmm9 ++ vmovdqu 32(%r11),%xmm2 ++ vaesenclast %xmm0,%xmm10,%xmm10 ++ vpaddb %xmm2,%xmm1,%xmm0 ++ movq %r13,112+8(%rsp) ++ leaq 96(%rdi),%rdi ++ vaesenclast %xmm5,%xmm11,%xmm11 ++ vpaddb %xmm2,%xmm0,%xmm5 ++ movq %r12,120+8(%rsp) ++ leaq 96(%rsi),%rsi ++ vmovdqu 0-128(%rcx),%xmm15 ++ vaesenclast %xmm6,%xmm12,%xmm12 ++ vpaddb %xmm2,%xmm5,%xmm6 ++ vaesenclast %xmm7,%xmm13,%xmm13 ++ vpaddb %xmm2,%xmm6,%xmm7 ++ vaesenclast %xmm3,%xmm14,%xmm14 ++ vpaddb %xmm2,%xmm7,%xmm3 ++ ++ addq $0x60,%r10 ++ subq $0x6,%rdx ++ jc .L6x_done ++ ++ vmovups %xmm9,-96(%rsi) ++ vpxor %xmm15,%xmm1,%xmm9 ++ vmovups %xmm10,-80(%rsi) ++ vmovdqa %xmm0,%xmm10 ++ vmovups %xmm11,-64(%rsi) ++ vmovdqa %xmm5,%xmm11 ++ vmovups %xmm12,-48(%rsi) ++ vmovdqa %xmm6,%xmm12 ++ vmovups %xmm13,-32(%rsi) ++ vmovdqa %xmm7,%xmm13 ++ vmovups %xmm14,-16(%rsi) ++ vmovdqa %xmm3,%xmm14 ++ vmovdqu 32+8(%rsp),%xmm7 ++ jmp .Loop6x ++ ++.L6x_done: ++ vpxor 16+8(%rsp),%xmm8,%xmm8 ++ vpxor %xmm4,%xmm8,%xmm8 ++ ++ .byte 0xf3,0xc3 ++.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x ++.globl aesni_gcm_decrypt ++.type aesni_gcm_decrypt,@function ++.align 32 ++aesni_gcm_decrypt: ++.cfi_startproc ++ xorq %r10,%r10 ++ cmpq $0x60,%rdx ++ jb .Lgcm_dec_abort ++ ++ leaq (%rsp),%rax ++.cfi_def_cfa_register %rax ++ pushq %rbx ++.cfi_offset %rbx,-16 ++ pushq %rbp ++.cfi_offset %rbp,-24 ++ pushq %r12 ++.cfi_offset %r12,-32 ++ pushq %r13 ++.cfi_offset %r13,-40 ++ pushq %r14 ++.cfi_offset %r14,-48 ++ pushq %r15 ++.cfi_offset %r15,-56 ++ vzeroupper ++ ++ vmovdqu (%r8),%xmm1 ++ addq $-128,%rsp ++ movl 12(%r8),%ebx ++ leaq .Lbswap_mask(%rip),%r11 ++ leaq -128(%rcx),%r14 ++ movq $0xf80,%r15 ++ vmovdqu (%r9),%xmm8 ++ andq $-128,%rsp ++ vmovdqu (%r11),%xmm0 ++ leaq 128(%rcx),%rcx ++ leaq 32+32(%r9),%r9 ++ movl 504-128(%rcx),%ebp // ICP has a larger offset for rounds. ++ vpshufb %xmm0,%xmm8,%xmm8 ++ ++ andq %r15,%r14 ++ andq %rsp,%r15 ++ subq %r14,%r15 ++ jc .Ldec_no_key_aliasing ++ cmpq $768,%r15 ++ jnc .Ldec_no_key_aliasing ++ subq %r15,%rsp ++.Ldec_no_key_aliasing: ++ ++ vmovdqu 80(%rdi),%xmm7 ++ leaq (%rdi),%r14 ++ vmovdqu 64(%rdi),%xmm4 ++ leaq -192(%rdi,%rdx,1),%r15 ++ vmovdqu 48(%rdi),%xmm5 ++ shrq $4,%rdx ++ xorq %r10,%r10 ++ vmovdqu 32(%rdi),%xmm6 ++ vpshufb %xmm0,%xmm7,%xmm7 ++ vmovdqu 16(%rdi),%xmm2 ++ vpshufb %xmm0,%xmm4,%xmm4 ++ vmovdqu (%rdi),%xmm3 ++ vpshufb %xmm0,%xmm5,%xmm5 ++ vmovdqu %xmm4,48(%rsp) ++ vpshufb %xmm0,%xmm6,%xmm6 ++ vmovdqu %xmm5,64(%rsp) ++ vpshufb %xmm0,%xmm2,%xmm2 ++ vmovdqu %xmm6,80(%rsp) ++ vpshufb %xmm0,%xmm3,%xmm3 ++ vmovdqu %xmm2,96(%rsp) ++ vmovdqu %xmm3,112(%rsp) ++ ++ call _aesni_ctr32_ghash_6x ++ ++ vmovups %xmm9,-96(%rsi) ++ vmovups %xmm10,-80(%rsi) ++ vmovups %xmm11,-64(%rsi) ++ vmovups %xmm12,-48(%rsi) ++ vmovups %xmm13,-32(%rsi) ++ vmovups %xmm14,-16(%rsi) ++ ++ vpshufb (%r11),%xmm8,%xmm8 ++ vmovdqu %xmm8,-64(%r9) ++ ++ vzeroupper ++ movq -48(%rax),%r15 ++.cfi_restore %r15 ++ movq -40(%rax),%r14 ++.cfi_restore %r14 ++ movq -32(%rax),%r13 ++.cfi_restore %r13 ++ movq -24(%rax),%r12 ++.cfi_restore %r12 ++ movq -16(%rax),%rbp ++.cfi_restore %rbp ++ movq -8(%rax),%rbx ++.cfi_restore %rbx ++ leaq (%rax),%rsp ++.cfi_def_cfa_register %rsp ++.Lgcm_dec_abort: ++ movq %r10,%rax ++ .byte 0xf3,0xc3 ++.cfi_endproc ++.size aesni_gcm_decrypt,.-aesni_gcm_decrypt ++.type _aesni_ctr32_6x,@function ++.align 32 ++_aesni_ctr32_6x: ++ vmovdqu 0-128(%rcx),%xmm4 ++ vmovdqu 32(%r11),%xmm2 ++ leaq -2(%rbp),%r13 // ICP uses 10,12,14 not 9,11,13 for rounds. ++ vmovups 16-128(%rcx),%xmm15 ++ leaq 32-128(%rcx),%r12 ++ vpxor %xmm4,%xmm1,%xmm9 ++ addl $100663296,%ebx ++ jc .Lhandle_ctr32_2 ++ vpaddb %xmm2,%xmm1,%xmm10 ++ vpaddb %xmm2,%xmm10,%xmm11 ++ vpxor %xmm4,%xmm10,%xmm10 ++ vpaddb %xmm2,%xmm11,%xmm12 ++ vpxor %xmm4,%xmm11,%xmm11 ++ vpaddb %xmm2,%xmm12,%xmm13 ++ vpxor %xmm4,%xmm12,%xmm12 ++ vpaddb %xmm2,%xmm13,%xmm14 ++ vpxor %xmm4,%xmm13,%xmm13 ++ vpaddb %xmm2,%xmm14,%xmm1 ++ vpxor %xmm4,%xmm14,%xmm14 ++ jmp .Loop_ctr32 ++ ++.align 16 ++.Loop_ctr32: ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ vmovups (%r12),%xmm15 ++ leaq 16(%r12),%r12 ++ decl %r13d ++ jnz .Loop_ctr32 ++ ++ vmovdqu (%r12),%xmm3 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor 0(%rdi),%xmm3,%xmm4 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vpxor 16(%rdi),%xmm3,%xmm5 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vpxor 32(%rdi),%xmm3,%xmm6 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vpxor 48(%rdi),%xmm3,%xmm8 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vpxor 64(%rdi),%xmm3,%xmm2 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ vpxor 80(%rdi),%xmm3,%xmm3 ++ leaq 96(%rdi),%rdi ++ ++ vaesenclast %xmm4,%xmm9,%xmm9 ++ vaesenclast %xmm5,%xmm10,%xmm10 ++ vaesenclast %xmm6,%xmm11,%xmm11 ++ vaesenclast %xmm8,%xmm12,%xmm12 ++ vaesenclast %xmm2,%xmm13,%xmm13 ++ vaesenclast %xmm3,%xmm14,%xmm14 ++ vmovups %xmm9,0(%rsi) ++ vmovups %xmm10,16(%rsi) ++ vmovups %xmm11,32(%rsi) ++ vmovups %xmm12,48(%rsi) ++ vmovups %xmm13,64(%rsi) ++ vmovups %xmm14,80(%rsi) ++ leaq 96(%rsi),%rsi ++ ++ .byte 0xf3,0xc3 ++.align 32 ++.Lhandle_ctr32_2: ++ vpshufb %xmm0,%xmm1,%xmm6 ++ vmovdqu 48(%r11),%xmm5 ++ vpaddd 64(%r11),%xmm6,%xmm10 ++ vpaddd %xmm5,%xmm6,%xmm11 ++ vpaddd %xmm5,%xmm10,%xmm12 ++ vpshufb %xmm0,%xmm10,%xmm10 ++ vpaddd %xmm5,%xmm11,%xmm13 ++ vpshufb %xmm0,%xmm11,%xmm11 ++ vpxor %xmm4,%xmm10,%xmm10 ++ vpaddd %xmm5,%xmm12,%xmm14 ++ vpshufb %xmm0,%xmm12,%xmm12 ++ vpxor %xmm4,%xmm11,%xmm11 ++ vpaddd %xmm5,%xmm13,%xmm1 ++ vpshufb %xmm0,%xmm13,%xmm13 ++ vpxor %xmm4,%xmm12,%xmm12 ++ vpshufb %xmm0,%xmm14,%xmm14 ++ vpxor %xmm4,%xmm13,%xmm13 ++ vpshufb %xmm0,%xmm1,%xmm1 ++ vpxor %xmm4,%xmm14,%xmm14 ++ jmp .Loop_ctr32 ++.size _aesni_ctr32_6x,.-_aesni_ctr32_6x ++ ++.globl aesni_gcm_encrypt ++.type aesni_gcm_encrypt,@function ++.align 32 ++aesni_gcm_encrypt: ++.cfi_startproc ++ xorq %r10,%r10 ++ cmpq $288,%rdx ++ jb .Lgcm_enc_abort ++ ++ leaq (%rsp),%rax ++.cfi_def_cfa_register %rax ++ pushq %rbx ++.cfi_offset %rbx,-16 ++ pushq %rbp ++.cfi_offset %rbp,-24 ++ pushq %r12 ++.cfi_offset %r12,-32 ++ pushq %r13 ++.cfi_offset %r13,-40 ++ pushq %r14 ++.cfi_offset %r14,-48 ++ pushq %r15 ++.cfi_offset %r15,-56 ++ vzeroupper ++ ++ vmovdqu (%r8),%xmm1 ++ addq $-128,%rsp ++ movl 12(%r8),%ebx ++ leaq .Lbswap_mask(%rip),%r11 ++ leaq -128(%rcx),%r14 ++ movq $0xf80,%r15 ++ leaq 128(%rcx),%rcx ++ vmovdqu (%r11),%xmm0 ++ andq $-128,%rsp ++ movl 504-128(%rcx),%ebp // ICP has an larger offset for rounds. ++ ++ andq %r15,%r14 ++ andq %rsp,%r15 ++ subq %r14,%r15 ++ jc .Lenc_no_key_aliasing ++ cmpq $768,%r15 ++ jnc .Lenc_no_key_aliasing ++ subq %r15,%rsp ++.Lenc_no_key_aliasing: ++ ++ leaq (%rsi),%r14 ++ leaq -192(%rsi,%rdx,1),%r15 ++ shrq $4,%rdx ++ ++ call _aesni_ctr32_6x ++ vpshufb %xmm0,%xmm9,%xmm8 ++ vpshufb %xmm0,%xmm10,%xmm2 ++ vmovdqu %xmm8,112(%rsp) ++ vpshufb %xmm0,%xmm11,%xmm4 ++ vmovdqu %xmm2,96(%rsp) ++ vpshufb %xmm0,%xmm12,%xmm5 ++ vmovdqu %xmm4,80(%rsp) ++ vpshufb %xmm0,%xmm13,%xmm6 ++ vmovdqu %xmm5,64(%rsp) ++ vpshufb %xmm0,%xmm14,%xmm7 ++ vmovdqu %xmm6,48(%rsp) ++ ++ call _aesni_ctr32_6x ++ ++ vmovdqu (%r9),%xmm8 ++ leaq 32+32(%r9),%r9 ++ subq $12,%rdx ++ movq $192,%r10 ++ vpshufb %xmm0,%xmm8,%xmm8 ++ ++ call _aesni_ctr32_ghash_6x ++ vmovdqu 32(%rsp),%xmm7 ++ vmovdqu (%r11),%xmm0 ++ vmovdqu 0-32(%r9),%xmm3 ++ vpunpckhqdq %xmm7,%xmm7,%xmm1 ++ vmovdqu 32-32(%r9),%xmm15 ++ vmovups %xmm9,-96(%rsi) ++ vpshufb %xmm0,%xmm9,%xmm9 ++ vpxor %xmm7,%xmm1,%xmm1 ++ vmovups %xmm10,-80(%rsi) ++ vpshufb %xmm0,%xmm10,%xmm10 ++ vmovups %xmm11,-64(%rsi) ++ vpshufb %xmm0,%xmm11,%xmm11 ++ vmovups %xmm12,-48(%rsi) ++ vpshufb %xmm0,%xmm12,%xmm12 ++ vmovups %xmm13,-32(%rsi) ++ vpshufb %xmm0,%xmm13,%xmm13 ++ vmovups %xmm14,-16(%rsi) ++ vpshufb %xmm0,%xmm14,%xmm14 ++ vmovdqu %xmm9,16(%rsp) ++ vmovdqu 48(%rsp),%xmm6 ++ vmovdqu 16-32(%r9),%xmm0 ++ vpunpckhqdq %xmm6,%xmm6,%xmm2 ++ vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5 ++ vpxor %xmm6,%xmm2,%xmm2 ++ vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 ++ vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 ++ ++ vmovdqu 64(%rsp),%xmm9 ++ vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4 ++ vmovdqu 48-32(%r9),%xmm3 ++ vpxor %xmm5,%xmm4,%xmm4 ++ vpunpckhqdq %xmm9,%xmm9,%xmm5 ++ vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6 ++ vpxor %xmm9,%xmm5,%xmm5 ++ vpxor %xmm7,%xmm6,%xmm6 ++ vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 ++ vmovdqu 80-32(%r9),%xmm15 ++ vpxor %xmm1,%xmm2,%xmm2 ++ ++ vmovdqu 80(%rsp),%xmm1 ++ vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7 ++ vmovdqu 64-32(%r9),%xmm0 ++ vpxor %xmm4,%xmm7,%xmm7 ++ vpunpckhqdq %xmm1,%xmm1,%xmm4 ++ vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpxor %xmm6,%xmm9,%xmm9 ++ vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5 ++ vpxor %xmm2,%xmm5,%xmm5 ++ ++ vmovdqu 96(%rsp),%xmm2 ++ vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6 ++ vmovdqu 96-32(%r9),%xmm3 ++ vpxor %xmm7,%xmm6,%xmm6 ++ vpunpckhqdq %xmm2,%xmm2,%xmm7 ++ vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm7,%xmm7 ++ vpxor %xmm9,%xmm1,%xmm1 ++ vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4 ++ vmovdqu 128-32(%r9),%xmm15 ++ vpxor %xmm5,%xmm4,%xmm4 ++ ++ vpxor 112(%rsp),%xmm8,%xmm8 ++ vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5 ++ vmovdqu 112-32(%r9),%xmm0 ++ vpunpckhqdq %xmm8,%xmm8,%xmm9 ++ vpxor %xmm6,%xmm5,%xmm5 ++ vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2 ++ vpxor %xmm8,%xmm9,%xmm9 ++ vpxor %xmm1,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7 ++ vpxor %xmm4,%xmm7,%xmm4 ++ ++ vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6 ++ vmovdqu 0-32(%r9),%xmm3 ++ vpunpckhqdq %xmm14,%xmm14,%xmm1 ++ vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8 ++ vpxor %xmm14,%xmm1,%xmm1 ++ vpxor %xmm5,%xmm6,%xmm5 ++ vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9 ++ vmovdqu 32-32(%r9),%xmm15 ++ vpxor %xmm2,%xmm8,%xmm7 ++ vpxor %xmm4,%xmm9,%xmm6 ++ ++ vmovdqu 16-32(%r9),%xmm0 ++ vpxor %xmm5,%xmm7,%xmm9 ++ vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4 ++ vpxor %xmm9,%xmm6,%xmm6 ++ vpunpckhqdq %xmm13,%xmm13,%xmm2 ++ vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14 ++ vpxor %xmm13,%xmm2,%xmm2 ++ vpslldq $8,%xmm6,%xmm9 ++ vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 ++ vpxor %xmm9,%xmm5,%xmm8 ++ vpsrldq $8,%xmm6,%xmm6 ++ vpxor %xmm6,%xmm7,%xmm7 ++ ++ vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5 ++ vmovdqu 48-32(%r9),%xmm3 ++ vpxor %xmm4,%xmm5,%xmm5 ++ vpunpckhqdq %xmm12,%xmm12,%xmm9 ++ vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13 ++ vpxor %xmm12,%xmm9,%xmm9 ++ vpxor %xmm14,%xmm13,%xmm13 ++ vpalignr $8,%xmm8,%xmm8,%xmm14 ++ vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 ++ vmovdqu 80-32(%r9),%xmm15 ++ vpxor %xmm1,%xmm2,%xmm2 ++ ++ vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4 ++ vmovdqu 64-32(%r9),%xmm0 ++ vpxor %xmm5,%xmm4,%xmm4 ++ vpunpckhqdq %xmm11,%xmm11,%xmm1 ++ vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12 ++ vpxor %xmm11,%xmm1,%xmm1 ++ vpxor %xmm13,%xmm12,%xmm12 ++ vxorps 16(%rsp),%xmm7,%xmm7 ++ vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9 ++ vpxor %xmm2,%xmm9,%xmm9 ++ ++ vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 ++ vxorps %xmm14,%xmm8,%xmm8 ++ ++ vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5 ++ vmovdqu 96-32(%r9),%xmm3 ++ vpxor %xmm4,%xmm5,%xmm5 ++ vpunpckhqdq %xmm10,%xmm10,%xmm2 ++ vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11 ++ vpxor %xmm10,%xmm2,%xmm2 ++ vpalignr $8,%xmm8,%xmm8,%xmm14 ++ vpxor %xmm12,%xmm11,%xmm11 ++ vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1 ++ vmovdqu 128-32(%r9),%xmm15 ++ vpxor %xmm9,%xmm1,%xmm1 ++ ++ vxorps %xmm7,%xmm14,%xmm14 ++ vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 ++ vxorps %xmm14,%xmm8,%xmm8 ++ ++ vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4 ++ vmovdqu 112-32(%r9),%xmm0 ++ vpxor %xmm5,%xmm4,%xmm4 ++ vpunpckhqdq %xmm8,%xmm8,%xmm9 ++ vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10 ++ vpxor %xmm8,%xmm9,%xmm9 ++ vpxor %xmm11,%xmm10,%xmm10 ++ vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2 ++ vpxor %xmm1,%xmm2,%xmm2 ++ ++ vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5 ++ vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7 ++ vpxor %xmm4,%xmm5,%xmm5 ++ vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6 ++ vpxor %xmm10,%xmm7,%xmm7 ++ vpxor %xmm2,%xmm6,%xmm6 ++ ++ vpxor %xmm5,%xmm7,%xmm4 ++ vpxor %xmm4,%xmm6,%xmm6 ++ vpslldq $8,%xmm6,%xmm1 ++ vmovdqu 16(%r11),%xmm3 ++ vpsrldq $8,%xmm6,%xmm6 ++ vpxor %xmm1,%xmm5,%xmm8 ++ vpxor %xmm6,%xmm7,%xmm7 ++ ++ vpalignr $8,%xmm8,%xmm8,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 ++ vpxor %xmm2,%xmm8,%xmm8 ++ ++ vpalignr $8,%xmm8,%xmm8,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 ++ vpxor %xmm7,%xmm2,%xmm2 ++ vpxor %xmm2,%xmm8,%xmm8 ++ vpshufb (%r11),%xmm8,%xmm8 ++ vmovdqu %xmm8,-64(%r9) ++ ++ vzeroupper ++ movq -48(%rax),%r15 ++.cfi_restore %r15 ++ movq -40(%rax),%r14 ++.cfi_restore %r14 ++ movq -32(%rax),%r13 ++.cfi_restore %r13 ++ movq -24(%rax),%r12 ++.cfi_restore %r12 ++ movq -16(%rax),%rbp ++.cfi_restore %rbp ++ movq -8(%rax),%rbx ++.cfi_restore %rbx ++ leaq (%rax),%rsp ++.cfi_def_cfa_register %rsp ++.Lgcm_enc_abort: ++ movq %r10,%rax ++ .byte 0xf3,0xc3 ++.cfi_endproc ++.size aesni_gcm_encrypt,.-aesni_gcm_encrypt ++ ++/* Some utility routines */ ++ ++/* ++ * clear all fpu registers ++ * void clear_fpu_regs_avx(void); ++ */ ++.globl clear_fpu_regs_avx ++.type clear_fpu_regs_avx,@function ++.align 32 ++clear_fpu_regs_avx: ++ vzeroall ++ ret ++.size clear_fpu_regs_avx,.-clear_fpu_regs_avx ++ ++/* ++ * void gcm_xor_avx(const uint8_t *src, uint8_t *dst); ++ * ++ * XORs one pair of unaligned 128-bit blocks from `src' and `dst' and ++ * stores the result at `dst'. The XOR is performed using FPU registers, ++ * so make sure FPU state is saved when running this in the kernel. ++ */ ++.globl gcm_xor_avx ++.type gcm_xor_avx,@function ++.align 32 ++gcm_xor_avx: ++ movdqu (%rdi), %xmm0 ++ movdqu (%rsi), %xmm1 ++ pxor %xmm1, %xmm0 ++ movdqu %xmm0, (%rsi) ++ ret ++.size gcm_xor_avx,.-gcm_xor_avx ++ ++/* ++ * Toggle a boolean_t value atomically and return the new value. ++ * boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); ++ */ ++.globl atomic_toggle_boolean_nv ++.type atomic_toggle_boolean_nv,@function ++.align 32 ++atomic_toggle_boolean_nv: ++ xorl %eax, %eax ++ lock ++ xorl $1, (%rdi) ++ jz 1f ++ movl $1, %eax ++1: ++ ret ++.size atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv ++ ++.align 64 ++.Lbswap_mask: ++.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 ++.Lpoly: ++.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 ++.Lone_msb: ++.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 ++.Ltwo_lsb: ++.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ++.Lone_lsb: ++.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ++.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 64 ++ ++/* Mark the stack non-executable. */ ++#if defined(__linux__) && defined(__ELF__) ++.section .note.GNU-stack,"",%progbits ++#endif ++ ++#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */ +Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/ghash-x86_64.S +=================================================================== +--- /dev/null ++++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/ghash-x86_64.S +@@ -0,0 +1,714 @@ ++# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the Apache License 2.0 (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++# ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# March, June 2010 ++# ++# The module implements "4-bit" GCM GHASH function and underlying ++# single multiplication operation in GF(2^128). "4-bit" means that ++# it uses 256 bytes per-key table [+128 bytes shared table]. GHASH ++# function features so called "528B" variant utilizing additional ++# 256+16 bytes of per-key storage [+512 bytes shared table]. ++# Performance results are for this streamed GHASH subroutine and are ++# expressed in cycles per processed byte, less is better: ++# ++# gcc 3.4.x(*) assembler ++# ++# P4 28.6 14.0 +100% ++# Opteron 19.3 7.7 +150% ++# Core2 17.8 8.1(**) +120% ++# Atom 31.6 16.8 +88% ++# VIA Nano 21.8 10.1 +115% ++# ++# (*) comparison is not completely fair, because C results are ++# for vanilla "256B" implementation, while assembler results ++# are for "528B";-) ++# (**) it's mystery [to me] why Core2 result is not same as for ++# Opteron; ++ ++# May 2010 ++# ++# Add PCLMULQDQ version performing at 2.02 cycles per processed byte. ++# See ghash-x86.pl for background information and details about coding ++# techniques. ++# ++# Special thanks to David Woodhouse for providing access to a ++# Westmere-based system on behalf of Intel Open Source Technology Centre. ++ ++# December 2012 ++# ++# Overhaul: aggregate Karatsuba post-processing, improve ILP in ++# reduction_alg9, increase reduction aggregate factor to 4x. As for ++# the latter. ghash-x86.pl discusses that it makes lesser sense to ++# increase aggregate factor. Then why increase here? Critical path ++# consists of 3 independent pclmulqdq instructions, Karatsuba post- ++# processing and reduction. "On top" of this we lay down aggregated ++# multiplication operations, triplets of independent pclmulqdq's. As ++# issue rate for pclmulqdq is limited, it makes lesser sense to ++# aggregate more multiplications than it takes to perform remaining ++# non-multiplication operations. 2x is near-optimal coefficient for ++# contemporary Intel CPUs (therefore modest improvement coefficient), ++# but not for Bulldozer. Latter is because logical SIMD operations ++# are twice as slow in comparison to Intel, so that critical path is ++# longer. A CPU with higher pclmulqdq issue rate would also benefit ++# from higher aggregate factor... ++# ++# Westmere 1.78(+13%) ++# Sandy Bridge 1.80(+8%) ++# Ivy Bridge 1.80(+7%) ++# Haswell 0.55(+93%) (if system doesn't support AVX) ++# Broadwell 0.45(+110%)(if system doesn't support AVX) ++# Skylake 0.44(+110%)(if system doesn't support AVX) ++# Bulldozer 1.49(+27%) ++# Silvermont 2.88(+13%) ++# Knights L 2.12(-) (if system doesn't support AVX) ++# Goldmont 1.08(+24%) ++ ++# March 2013 ++# ++# ... 8x aggregate factor AVX code path is using reduction algorithm ++# suggested by Shay Gueron[1]. Even though contemporary AVX-capable ++# CPUs such as Sandy and Ivy Bridge can execute it, the code performs ++# sub-optimally in comparison to above mentioned version. But thanks ++# to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that ++# it performs in 0.41 cycles per byte on Haswell processor, in ++# 0.29 on Broadwell, and in 0.36 on Skylake. ++# ++# Knights Landing achieves 1.09 cpb. ++# ++# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest ++ ++# Generated once from ++# https://github.com/openssl/openssl/blob/5ffc3324/crypto/modes/asm/ghash-x86_64.pl ++# and modified for ICP. Modification are kept at a bare minimum to ease later ++# upstream merges. ++ ++#if defined(__x86_64__) && defined(HAVE_AVX) && \ ++ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) ++ ++.text ++ ++.globl gcm_gmult_clmul ++.type gcm_gmult_clmul,@function ++.align 16 ++gcm_gmult_clmul: ++.cfi_startproc ++.L_gmult_clmul: ++ movdqu (%rdi),%xmm0 ++ movdqa .Lbswap_mask(%rip),%xmm5 ++ movdqu (%rsi),%xmm2 ++ movdqu 32(%rsi),%xmm4 ++.byte 102,15,56,0,197 ++ movdqa %xmm0,%xmm1 ++ pshufd $78,%xmm0,%xmm3 ++ pxor %xmm0,%xmm3 ++.byte 102,15,58,68,194,0 ++.byte 102,15,58,68,202,17 ++.byte 102,15,58,68,220,0 ++ pxor %xmm0,%xmm3 ++ pxor %xmm1,%xmm3 ++ ++ movdqa %xmm3,%xmm4 ++ psrldq $8,%xmm3 ++ pslldq $8,%xmm4 ++ pxor %xmm3,%xmm1 ++ pxor %xmm4,%xmm0 ++ ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ ++ ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++.byte 102,15,56,0,197 ++ movdqu %xmm0,(%rdi) ++ .byte 0xf3,0xc3 ++.cfi_endproc ++.size gcm_gmult_clmul,.-gcm_gmult_clmul ++ ++.globl gcm_init_htab_avx ++.type gcm_init_htab_avx,@function ++.align 32 ++gcm_init_htab_avx: ++.cfi_startproc ++ vzeroupper ++ ++ vmovdqu (%rsi),%xmm2 ++ // KCF/ICP stores H in network byte order with the hi qword first ++ // so we need to swap all bytes, not the 2 qwords. ++ vmovdqu .Lbswap_mask(%rip),%xmm4 ++ vpshufb %xmm4,%xmm2,%xmm2 ++ ++ ++ vpshufd $255,%xmm2,%xmm4 ++ vpsrlq $63,%xmm2,%xmm3 ++ vpsllq $1,%xmm2,%xmm2 ++ vpxor %xmm5,%xmm5,%xmm5 ++ vpcmpgtd %xmm4,%xmm5,%xmm5 ++ vpslldq $8,%xmm3,%xmm3 ++ vpor %xmm3,%xmm2,%xmm2 ++ ++ ++ vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 ++ vpxor %xmm5,%xmm2,%xmm2 ++ ++ vpunpckhqdq %xmm2,%xmm2,%xmm6 ++ vmovdqa %xmm2,%xmm0 ++ vpxor %xmm2,%xmm6,%xmm6 ++ movq $4,%r10 ++ jmp .Linit_start_avx ++.align 32 ++.Linit_loop_avx: ++ vpalignr $8,%xmm3,%xmm4,%xmm5 ++ vmovdqu %xmm5,-16(%rdi) ++ vpunpckhqdq %xmm0,%xmm0,%xmm3 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 ++ vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 ++ vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 ++ vpxor %xmm0,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ ++ vpslldq $8,%xmm3,%xmm4 ++ vpsrldq $8,%xmm3,%xmm3 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpsllq $57,%xmm0,%xmm3 ++ vpsllq $62,%xmm0,%xmm4 ++ vpxor %xmm3,%xmm4,%xmm4 ++ vpsllq $63,%xmm0,%xmm3 ++ vpxor %xmm3,%xmm4,%xmm4 ++ vpslldq $8,%xmm4,%xmm3 ++ vpsrldq $8,%xmm4,%xmm4 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vpsrlq $1,%xmm0,%xmm4 ++ vpxor %xmm0,%xmm1,%xmm1 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpsrlq $5,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpsrlq $1,%xmm0,%xmm0 ++ vpxor %xmm1,%xmm0,%xmm0 ++.Linit_start_avx: ++ vmovdqa %xmm0,%xmm5 ++ vpunpckhqdq %xmm0,%xmm0,%xmm3 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 ++ vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 ++ vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 ++ vpxor %xmm0,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ ++ vpslldq $8,%xmm3,%xmm4 ++ vpsrldq $8,%xmm3,%xmm3 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpsllq $57,%xmm0,%xmm3 ++ vpsllq $62,%xmm0,%xmm4 ++ vpxor %xmm3,%xmm4,%xmm4 ++ vpsllq $63,%xmm0,%xmm3 ++ vpxor %xmm3,%xmm4,%xmm4 ++ vpslldq $8,%xmm4,%xmm3 ++ vpsrldq $8,%xmm4,%xmm4 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vpsrlq $1,%xmm0,%xmm4 ++ vpxor %xmm0,%xmm1,%xmm1 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpsrlq $5,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpsrlq $1,%xmm0,%xmm0 ++ vpxor %xmm1,%xmm0,%xmm0 ++ vpshufd $78,%xmm5,%xmm3 ++ vpshufd $78,%xmm0,%xmm4 ++ vpxor %xmm5,%xmm3,%xmm3 ++ vmovdqu %xmm5,0(%rdi) ++ vpxor %xmm0,%xmm4,%xmm4 ++ vmovdqu %xmm0,16(%rdi) ++ leaq 48(%rdi),%rdi ++ subq $1,%r10 ++ jnz .Linit_loop_avx ++ ++ vpalignr $8,%xmm4,%xmm3,%xmm5 ++ vmovdqu %xmm5,-16(%rdi) ++ ++ vzeroupper ++ .byte 0xf3,0xc3 ++.cfi_endproc ++.size gcm_init_htab_avx,.-gcm_init_htab_avx ++ ++.globl gcm_gmult_avx ++.type gcm_gmult_avx,@function ++.align 32 ++gcm_gmult_avx: ++.cfi_startproc ++ jmp .L_gmult_clmul ++.cfi_endproc ++.size gcm_gmult_avx,.-gcm_gmult_avx ++.globl gcm_ghash_avx ++.type gcm_ghash_avx,@function ++.align 32 ++gcm_ghash_avx: ++.cfi_startproc ++ vzeroupper ++ ++ vmovdqu (%rdi),%xmm10 ++ leaq .L0x1c2_polynomial(%rip),%r10 ++ leaq 64(%rsi),%rsi ++ vmovdqu .Lbswap_mask(%rip),%xmm13 ++ vpshufb %xmm13,%xmm10,%xmm10 ++ cmpq $0x80,%rcx ++ jb .Lshort_avx ++ subq $0x80,%rcx ++ ++ vmovdqu 112(%rdx),%xmm14 ++ vmovdqu 0-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vmovdqu 32-64(%rsi),%xmm7 ++ ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vmovdqu 96(%rdx),%xmm15 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpxor %xmm14,%xmm9,%xmm9 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 16-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vmovdqu 80(%rdx),%xmm14 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm15,%xmm8,%xmm8 ++ ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vmovdqu 48-64(%rsi),%xmm6 ++ vpxor %xmm14,%xmm9,%xmm9 ++ vmovdqu 64(%rdx),%xmm15 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 80-64(%rsi),%xmm7 ++ ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 64-64(%rsi),%xmm6 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 48(%rdx),%xmm14 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vmovdqu 96-64(%rsi),%xmm6 ++ vpxor %xmm5,%xmm2,%xmm2 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 128-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ ++ vmovdqu 32(%rdx),%xmm15 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 112-64(%rsi),%xmm6 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 16(%rdx),%xmm14 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vmovdqu 144-64(%rsi),%xmm6 ++ vpxor %xmm5,%xmm2,%xmm2 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 176-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ ++ vmovdqu (%rdx),%xmm15 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 160-64(%rsi),%xmm6 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 ++ ++ leaq 128(%rdx),%rdx ++ cmpq $0x80,%rcx ++ jb .Ltail_avx ++ ++ vpxor %xmm10,%xmm15,%xmm15 ++ subq $0x80,%rcx ++ jmp .Loop8x_avx ++ ++.align 32 ++.Loop8x_avx: ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vmovdqu 112(%rdx),%xmm14 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 ++ vmovdqu 0-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 ++ vmovdqu 32-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ ++ vmovdqu 96(%rdx),%xmm15 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpxor %xmm3,%xmm10,%xmm10 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vxorps %xmm4,%xmm11,%xmm11 ++ vmovdqu 16-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm5,%xmm12,%xmm12 ++ vxorps %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 80(%rdx),%xmm14 ++ vpxor %xmm10,%xmm12,%xmm12 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpxor %xmm11,%xmm12,%xmm12 ++ vpslldq $8,%xmm12,%xmm9 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vpsrldq $8,%xmm12,%xmm12 ++ vpxor %xmm9,%xmm10,%xmm10 ++ vmovdqu 48-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vxorps %xmm12,%xmm11,%xmm11 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 80-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ vpxor %xmm2,%xmm5,%xmm5 ++ ++ vmovdqu 64(%rdx),%xmm15 ++ vpalignr $8,%xmm10,%xmm10,%xmm12 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 64-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vxorps %xmm15,%xmm8,%xmm8 ++ vpxor %xmm5,%xmm2,%xmm2 ++ ++ vmovdqu 48(%rdx),%xmm14 ++ vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vmovdqu 96-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 128-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ vpxor %xmm2,%xmm5,%xmm5 ++ ++ vmovdqu 32(%rdx),%xmm15 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 112-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vpxor %xmm5,%xmm2,%xmm2 ++ vxorps %xmm12,%xmm10,%xmm10 ++ ++ vmovdqu 16(%rdx),%xmm14 ++ vpalignr $8,%xmm10,%xmm10,%xmm12 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vmovdqu 144-64(%rsi),%xmm6 ++ vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 ++ vxorps %xmm11,%xmm12,%xmm12 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 176-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ vpxor %xmm2,%xmm5,%xmm5 ++ ++ vmovdqu (%rdx),%xmm15 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 160-64(%rsi),%xmm6 ++ vpxor %xmm12,%xmm15,%xmm15 ++ vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm10,%xmm15,%xmm15 ++ ++ leaq 128(%rdx),%rdx ++ subq $0x80,%rcx ++ jnc .Loop8x_avx ++ ++ addq $0x80,%rcx ++ jmp .Ltail_no_xor_avx ++ ++.align 32 ++.Lshort_avx: ++ vmovdqu -16(%rdx,%rcx,1),%xmm14 ++ leaq (%rdx,%rcx,1),%rdx ++ vmovdqu 0-64(%rsi),%xmm6 ++ vmovdqu 32-64(%rsi),%xmm7 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ ++ vmovdqa %xmm0,%xmm3 ++ vmovdqa %xmm1,%xmm4 ++ vmovdqa %xmm2,%xmm5 ++ subq $0x10,%rcx ++ jz .Ltail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -32(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 16-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vpsrldq $8,%xmm7,%xmm7 ++ subq $0x10,%rcx ++ jz .Ltail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -48(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 48-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vmovdqu 80-64(%rsi),%xmm7 ++ subq $0x10,%rcx ++ jz .Ltail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -64(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 64-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vpsrldq $8,%xmm7,%xmm7 ++ subq $0x10,%rcx ++ jz .Ltail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -80(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 96-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vmovdqu 128-64(%rsi),%xmm7 ++ subq $0x10,%rcx ++ jz .Ltail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -96(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 112-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vpsrldq $8,%xmm7,%xmm7 ++ subq $0x10,%rcx ++ jz .Ltail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -112(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 144-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vmovq 184-64(%rsi),%xmm7 ++ subq $0x10,%rcx ++ jmp .Ltail_avx ++ ++.align 32 ++.Ltail_avx: ++ vpxor %xmm10,%xmm15,%xmm15 ++.Ltail_no_xor_avx: ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ ++ vmovdqu (%r10),%xmm12 ++ ++ vpxor %xmm0,%xmm3,%xmm10 ++ vpxor %xmm1,%xmm4,%xmm11 ++ vpxor %xmm2,%xmm5,%xmm5 ++ ++ vpxor %xmm10,%xmm5,%xmm5 ++ vpxor %xmm11,%xmm5,%xmm5 ++ vpslldq $8,%xmm5,%xmm9 ++ vpsrldq $8,%xmm5,%xmm5 ++ vpxor %xmm9,%xmm10,%xmm10 ++ vpxor %xmm5,%xmm11,%xmm11 ++ ++ vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 ++ vpalignr $8,%xmm10,%xmm10,%xmm10 ++ vpxor %xmm9,%xmm10,%xmm10 ++ ++ vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 ++ vpalignr $8,%xmm10,%xmm10,%xmm10 ++ vpxor %xmm11,%xmm10,%xmm10 ++ vpxor %xmm9,%xmm10,%xmm10 ++ ++ cmpq $0,%rcx ++ jne .Lshort_avx ++ ++ vpshufb %xmm13,%xmm10,%xmm10 ++ vmovdqu %xmm10,(%rdi) ++ vzeroupper ++ .byte 0xf3,0xc3 ++.cfi_endproc ++.size gcm_ghash_avx,.-gcm_ghash_avx ++.align 64 ++.Lbswap_mask: ++.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 ++.L0x1c2_polynomial: ++.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 ++.L7_mask: ++.long 7,0,7,0 ++.L7_mask_poly: ++.long 7,0,450,0 ++.align 64 ++.type .Lrem_4bit,@object ++.Lrem_4bit: ++.long 0,0,0,471859200,0,943718400,0,610271232 ++.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 ++.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 ++.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 ++.type .Lrem_8bit,@object ++.Lrem_8bit: ++.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E ++.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E ++.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E ++.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E ++.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E ++.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E ++.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E ++.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E ++.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE ++.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE ++.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE ++.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE ++.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E ++.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E ++.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE ++.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE ++.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E ++.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E ++.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E ++.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E ++.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E ++.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E ++.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E ++.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E ++.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE ++.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE ++.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE ++.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE ++.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E ++.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E ++.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE ++.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE ++ ++.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 64 ++ ++/* Mark the stack non-executable. */ ++#if defined(__linux__) && defined(__ELF__) ++.section .note.GNU-stack,"",%progbits ++#endif ++ ++#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */ +Index: zfs-linux-0.8.3/module/icp/include/aes/aes_impl.h +=================================================================== +--- zfs-linux-0.8.3.orig/module/icp/include/aes/aes_impl.h ++++ zfs-linux-0.8.3/module/icp/include/aes/aes_impl.h +@@ -107,6 +107,11 @@ typedef union { + } aes_ks_t; + + typedef struct aes_impl_ops aes_impl_ops_t; ++ ++/* ++ * The absolute offset of the encr_ks (0) and the nr (504) fields are hard ++ * coded in aesni-gcm-x86_64, so please don't change (or adjust accordingly). ++ */ + typedef struct aes_key aes_key_t; + struct aes_key { + aes_ks_t encr_ks; /* encryption key schedule */ +Index: zfs-linux-0.8.3/module/icp/include/modes/modes.h +=================================================================== +--- zfs-linux-0.8.3.orig/module/icp/include/modes/modes.h ++++ zfs-linux-0.8.3/module/icp/include/modes/modes.h +@@ -34,6 +34,16 @@ extern "C" { + #include + #include + ++/* ++ * Does the build chain support all instructions needed for the GCM assembler ++ * routines. AVX support should imply AES-NI and PCLMULQDQ, but make sure ++ * anyhow. ++ */ ++#if defined(__x86_64__) && defined(HAVE_AVX) && \ ++ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) && defined(HAVE_MOVBE) ++#define CAN_USE_GCM_ASM ++#endif ++ + #define ECB_MODE 0x00000002 + #define CBC_MODE 0x00000004 + #define CTR_MODE 0x00000008 +@@ -189,13 +199,17 @@ typedef struct ccm_ctx { + * + * gcm_H: Subkey. + * ++ * gcm_Htable: Pre-computed and pre-shifted H, H^2, ... H^6 for the ++ * Karatsuba Algorithm in host byte order. ++ * + * gcm_J0: Pre-counter block generated from the IV. + * + * gcm_len_a_len_c: 64-bit representations of the bit lengths of + * AAD and ciphertext. + * +- * gcm_kmflag: Current value of kmflag. Used only for allocating +- * the plaintext buffer during decryption. ++ * gcm_kmflag: Current value of kmflag. Used for allocating ++ * the plaintext buffer during decryption and a ++ * gcm_avx_chunk_size'd buffer for avx enabled encryption. + */ + typedef struct gcm_ctx { + struct common_ctx gcm_common; +@@ -203,12 +217,23 @@ typedef struct gcm_ctx { + size_t gcm_processed_data_len; + size_t gcm_pt_buf_len; + uint32_t gcm_tmp[4]; ++ /* ++ * The relative positions of gcm_ghash, gcm_H and pre-computed ++ * gcm_Htable are hard coded in aesni-gcm-x86_64.S and ghash-x86_64.S, ++ * so please don't change (or adjust accordingly). ++ */ + uint64_t gcm_ghash[2]; + uint64_t gcm_H[2]; ++#ifdef CAN_USE_GCM_ASM ++ uint64_t gcm_Htable[12][2]; ++#endif + uint64_t gcm_J0[2]; + uint64_t gcm_len_a_len_c[2]; + uint8_t *gcm_pt_buf; + int gcm_kmflag; ++#ifdef CAN_USE_GCM_ASM ++ boolean_t gcm_use_avx; ++#endif + } gcm_ctx_t; + + #define gcm_keysched gcm_common.cc_keysched +Index: zfs-linux-0.8.3/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh +=================================================================== +--- zfs-linux-0.8.3.orig/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh ++++ zfs-linux-0.8.3/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh +@@ -53,7 +53,7 @@ set -A ENCRYPTION_ALGS \ + "encryption=aes-256-gcm" + + set -A ENCRYPTION_PROPS \ +- "encryption=aes-256-ccm" \ ++ "encryption=aes-256-gcm" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ +Index: zfs-linux-0.8.3/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh +=================================================================== +--- zfs-linux-0.8.3.orig/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh ++++ zfs-linux-0.8.3/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh +@@ -48,7 +48,7 @@ set -A ENCRYPTION_ALGS "encryption=on" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +-set -A ENCRYPTION_PROPS "encryption=aes-256-ccm" \ ++set -A ENCRYPTION_PROPS "encryption=aes-256-gcm" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ +Index: zfs-linux-0.8.3/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh +=================================================================== +--- zfs-linux-0.8.3.orig/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh ++++ zfs-linux-0.8.3/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh +@@ -124,7 +124,7 @@ ds=$TESTPOOL/recv + log_must eval "zfs send $snap > $sendfile" + log_must eval "zfs recv -o encryption=on -o keyformat=passphrase" \ + "-o keylocation=file://$keyfile $ds < $sendfile" +-log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" ++log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" + log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" + log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" + log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" +@@ -140,7 +140,7 @@ ds=$TESTPOOL/recv + log_must eval "zfs send -p $snap > $sendfile" + log_must eval "zfs recv -o encryption=on -o keyformat=passphrase" \ + "-o keylocation=file://$keyfile $ds < $sendfile" +-log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" ++log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" + log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" + log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" + log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" +@@ -158,7 +158,7 @@ ds=$TESTPOOL/recv + log_must eval "zfs send -R $snap > $sendfile" + log_must eval "zfs recv -o encryption=on -o keyformat=passphrase" \ + "-o keylocation=file://$keyfile $ds < $sendfile" +-log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" ++log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" + log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" + log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" + log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" +@@ -174,7 +174,7 @@ ds=$TESTPOOL/crypt/recv + log_must eval "zfs send -p $snap > $sendfile" + log_must eval "zfs recv -x encryption $ds < $sendfile" + log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" +-log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" ++log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" + log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" + log_must test "$(get_prop 'mounted' $ds)" == "yes" + recv_cksum=$(md5digest /$ds/$TESTFILE0) +@@ -188,7 +188,7 @@ ds=$TESTPOOL/crypt/recv + log_must eval "zfs send -R $snap > $sendfile" + log_must eval "zfs recv -x encryption $ds < $sendfile" + log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" +-log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" ++log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" + log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" + log_must test "$(get_prop 'mounted' $ds)" == "yes" + recv_cksum=$(md5digest /$ds/$TESTFILE0) +@@ -202,7 +202,7 @@ ds=$TESTPOOL/crypt/recv + log_must eval "zfs send -R $snap2 > $sendfile" + log_must eval "zfs recv -x encryption $ds < $sendfile" + log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" +-log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" ++log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" + log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" + log_must test "$(get_prop 'mounted' $ds)" == "yes" + recv_cksum=$(md5digest /$ds/$TESTFILE0) diff -Nru zfs-linux-0.8.3/debian/patches/4620-zfs-vol-wait-fix-locked-encrypted-vols.patch zfs-linux-0.8.3/debian/patches/4620-zfs-vol-wait-fix-locked-encrypted-vols.patch --- zfs-linux-0.8.3/debian/patches/4620-zfs-vol-wait-fix-locked-encrypted-vols.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4620-zfs-vol-wait-fix-locked-encrypted-vols.patch 2020-07-22 08:56:05.000000000 +0000 @@ -0,0 +1,39 @@ +Description: don't wait for links when volume has property keystatus=unavailable + zfs-volume-wait.service systemd unit does not start if the encrypted + zvol is locked. The /sbin/zvol_wait should not wait for links when the + volume has property keystatus=unavailable. This patch fixes this issue +Bug: https://bugs.launchpad.net/ubuntu/+source/zfs-linux/+bug/1888405 +Author: James Dingwall +Origin: ubuntu +Forwarded: no +Reviewed-By: Colin Ian King +Last-Update: 2020-07-22 + +Index: zfs-linux-0.8.3/cmd/zvol_wait/zvol_wait +=================================================================== +--- zfs-linux-0.8.3.orig/cmd/zvol_wait/zvol_wait ++++ zfs-linux-0.8.3/cmd/zvol_wait/zvol_wait +@@ -24,6 +24,14 @@ filter_out_deleted_zvols() { + done + } + ++filter_out_locked_zvols() { ++ while read -r zvol; do ++ if ! [ "$(zfs list -H -o keystatus rpool/export/vault/block "$zvol")" = "unavailable" ]; then ++ echo "$zvol" ++ fi ++ done ++} ++ + list_zvols() { + zfs list -t volume -H -o name,volmode,receive_resume_token | + while read -r zvol_line; do +@@ -71,7 +79,7 @@ while [ "$outer_loop" -lt 20 ]; do + while [ "$inner_loop" -lt 30 ]; do + inner_loop=$((inner_loop + 1)) + +- zvols="$(echo "$zvols" | filter_out_zvols_with_links)" ++ zvols="$(echo "$zvols" | filter_out_zvols_with_links | filter_out_locked_zvols)" + + zvols_count=$(count_zvols) + if [ "$zvols_count" -eq 0 ]; then diff -Nru zfs-linux-0.8.3/debian/patches/4700-Fix-DKMS-build-on-arm64-with-PREEMPTION-and-BLK_CGRO.patch zfs-linux-0.8.3/debian/patches/4700-Fix-DKMS-build-on-arm64-with-PREEMPTION-and-BLK_CGRO.patch --- zfs-linux-0.8.3/debian/patches/4700-Fix-DKMS-build-on-arm64-with-PREEMPTION-and-BLK_CGRO.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4700-Fix-DKMS-build-on-arm64-with-PREEMPTION-and-BLK_CGRO.patch 2020-08-18 09:10:41.000000000 +0000 @@ -0,0 +1,56 @@ +From 46cd180400093965271820d34fa1071f9769a0fb Mon Sep 17 00:00:00 2001 +From: Juerg Haefliger +Date: Tue, 18 Aug 2020 10:52:25 +0200 +Subject: [PATCH] Fix DKMS build on arm64 with PREEMPTION and BLK_CGROUP + enabled + +With PREEMPTION=y and BLK_CGROUP=y preempt_schedule_notrace() is being +used on arm64 which is a GPL-only function and hence the build of the +DKMS kernel module fails. + +'Fix' that by redefining preempt_schedule_notrace() to preempt_schedule() +which should be safe as long as tracing is not used. + +Signed-off-by: Juerg Haefliger +--- + module/zfs/vdev_disk.c | 2 ++ + module/zfs/zfs_compat.h | 14 ++++++++++++++ + 2 files changed, 16 insertions(+) + create mode 100644 module/zfs/zfs_compat.h + +diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c +index 8544bb8ffb6f..2a7096a6436d 100644 +--- a/module/zfs/vdev_disk.c ++++ b/module/zfs/vdev_disk.c +@@ -26,6 +26,8 @@ + * Copyright (c) 2012, 2019 by Delphix. All rights reserved. + */ + ++#include "zfs_compat.h" ++ + #include + #include + #include +diff --git a/module/zfs/zfs_compat.h b/module/zfs/zfs_compat.h +new file mode 100644 +index 000000000000..6ef26f436f3c +--- /dev/null ++++ b/module/zfs/zfs_compat.h +@@ -0,0 +1,14 @@ ++#ifndef _ZFS_COMPAT_H_ ++#define _ZFS_COMPAT_H_ ++ ++/* ++ * preempt_schedule_notrace is GPL-only which breaks the ZFS build, so ++ * replace it with preempt_schedule under the following condition: ++*/ ++#if defined(CONFIG_ARM64) && \ ++ defined(CONFIG_PREEMPTION) && \ ++ defined(CONFIG_BLK_CGROUP) ++#define preempt_schedule_notrace(x) preempt_schedule(x) ++#endif ++ ++#endif /* _ZFS_COMPAT_H_ */ +-- +2.25.1 + diff -Nru zfs-linux-0.8.3/debian/patches/4701-Bugfix-fix-uio-partial-copies.patch zfs-linux-0.8.3/debian/patches/4701-Bugfix-fix-uio-partial-copies.patch --- zfs-linux-0.8.3/debian/patches/4701-Bugfix-fix-uio-partial-copies.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4701-Bugfix-fix-uio-partial-copies.patch 2021-06-10 13:35:40.000000000 +0000 @@ -0,0 +1,90 @@ +From 590ababea2ed4c41ea3c769f35a5d3ae2eb13e8d Mon Sep 17 00:00:00 2001 +From: Fabio Scaccabarozzi +Date: Wed, 1 Apr 2020 18:48:54 +0200 +Subject: [PATCH] Bugfix/fix uio partial copies +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 8bit + +In zfs_write(), the loop continues to the next iteration without +accounting for partial copies occurring in uiomove_iov when +copy_from_user/__copy_from_user_inatomic return a non-zero status. +This results in "zfs: accessing past end of object..." in the +kernel log, and the write failing. + +Account for partial copies and update uio struct before returning +EFAULT, leave a comment explaining the reason why this is done. + +Reviewed-by: Brian Behlendorf +Reviewed-by: ilbsmart +Signed-off-by: Fabio Scaccabarozzi +Closes #8673 +Closes #10148 +--- + module/zcommon/zfs_uio.c | 25 +++++++++++++++++-------- + module/zfs/zfs_vnops.c | 9 +++++++++ + 2 files changed, 26 insertions(+), 8 deletions(-) + +Index: zfs-linux-0.8.3/module/zcommon/zfs_uio.c +=================================================================== +--- zfs-linux-0.8.3.orig/module/zcommon/zfs_uio.c ++++ zfs-linux-0.8.3/module/zcommon/zfs_uio.c +@@ -80,22 +80,31 @@ uiomove_iov(void *p, size_t n, enum uio_ + if (copy_to_user(iov->iov_base+skip, p, cnt)) + return (EFAULT); + } else { ++ unsigned long b_left = 0; + if (uio->uio_fault_disable) { + if (!zfs_access_ok(VERIFY_READ, + (iov->iov_base + skip), cnt)) { + return (EFAULT); + } + pagefault_disable(); +- if (__copy_from_user_inatomic(p, +- (iov->iov_base + skip), cnt)) { +- pagefault_enable(); +- return (EFAULT); +- } ++ b_left = ++ __copy_from_user_inatomic(p, ++ (iov->iov_base + skip), cnt); + pagefault_enable(); + } else { +- if (copy_from_user(p, +- (iov->iov_base + skip), cnt)) +- return (EFAULT); ++ b_left = ++ copy_from_user(p, ++ (iov->iov_base + skip), cnt); ++ } ++ if (b_left > 0) { ++ unsigned long c_bytes = ++ cnt - b_left; ++ uio->uio_skip += c_bytes; ++ ASSERT3U(uio->uio_skip, <, ++ iov->iov_len); ++ uio->uio_resid -= c_bytes; ++ uio->uio_loffset += c_bytes; ++ return (EFAULT); + } + } + break; +Index: zfs-linux-0.8.3/module/zfs/zfs_vnops.c +=================================================================== +--- zfs-linux-0.8.3.orig/module/zfs/zfs_vnops.c ++++ zfs-linux-0.8.3/module/zfs/zfs_vnops.c +@@ -829,6 +829,15 @@ zfs_write(struct inode *ip, uio_t *uio, + uio->uio_fault_disable = B_FALSE; + if (error == EFAULT) { + dmu_tx_commit(tx); ++ /* ++ * Account for partial writes before ++ * continuing the loop. ++ * Update needs to occur before the next ++ * uio_prefaultpages, or prefaultpages may ++ * error, and we may break the loop early. ++ */ ++ if (tx_bytes != uio->uio_resid) ++ n -= tx_bytes - uio->uio_resid; + if (uio_prefaultpages(MIN(n, max_blksz), uio)) { + break; + } diff -Nru zfs-linux-0.8.3/debian/patches/4702-Revert-Let-zfs-mount-all-tolerate-in-progress-mounts.patch zfs-linux-0.8.3/debian/patches/4702-Revert-Let-zfs-mount-all-tolerate-in-progress-mounts.patch --- zfs-linux-0.8.3/debian/patches/4702-Revert-Let-zfs-mount-all-tolerate-in-progress-mounts.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4702-Revert-Let-zfs-mount-all-tolerate-in-progress-mounts.patch 2020-11-30 19:00:00.000000000 +0000 @@ -0,0 +1,53 @@ +From d1b84da8c1a69c084f04b504beefe804591bca07 Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Tue, 26 May 2020 16:07:50 -0700 +Subject: [PATCH] Revert "Let zfs mount all tolerate in-progress mounts" + +This reverts commit a9cd8bf which introduced a segfault when running +`zfs mount -a` multiple times when there are mountpoints which are +not empty. This segfault is now seen frequently by the CI after +the mount code was updated to directly call mount(2). + +The original reason this logic was added is described in #8881. +Since then the systemd `zfs-share.target` has been updated to run +"After" the `zfs-mount.server` which should avoid this issue. + +Reviewed-by: Don Brady +Signed-off-by: Brian Behlendorf +Closes #9560 +Closes #10364 +--- + cmd/zfs/zfs_main.c | 19 +------------------ + 1 file changed, 1 insertion(+), 18 deletions(-) + +Index: zfs-linux-0.8.3/cmd/zfs/zfs_main.c +=================================================================== +--- zfs-linux-0.8.3.orig/cmd/zfs/zfs_main.c ++++ zfs-linux-0.8.3/cmd/zfs/zfs_main.c +@@ -6447,25 +6447,8 @@ share_mount_one(zfs_handle_t *zhp, int o + return (1); + } + +- if (zfs_mount(zhp, options, flags) != 0) { +- /* +- * Check if a mount sneaked in after we checked +- */ +- if (!explicit && +- libzfs_errno(g_zfs) == EZFS_MOUNTFAILED) { +- usleep(10 * MILLISEC); +- libzfs_mnttab_cache(g_zfs, B_FALSE); +- +- if (zfs_is_mounted(zhp, NULL)) { +- (void) fprintf(stderr, gettext( +- "Ignoring previous 'already " +- "mounted' error for '%s'\n"), +- zfs_get_name(zhp)); +- return (0); +- } +- } ++ if (zfs_mount(zhp, options, flags) != 0) + return (1); +- } + break; + } + diff -Nru zfs-linux-0.8.3/debian/patches/4800-fix-iput-race-in-zfs_iput_async.patch zfs-linux-0.8.3/debian/patches/4800-fix-iput-race-in-zfs_iput_async.patch --- zfs-linux-0.8.3/debian/patches/4800-fix-iput-race-in-zfs_iput_async.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4800-fix-iput-race-in-zfs_iput_async.patch 2021-06-10 13:37:17.000000000 +0000 @@ -0,0 +1,52 @@ +From 43eaef6de817dab3e098488f8e02a11fe57944d0 Mon Sep 17 00:00:00 2001 +From: Paul Dagnelie +Date: Wed, 27 Jan 2021 21:29:58 -0800 +Subject: [PATCH] Fix zrele race in zrele_async that can cause hang + +There is a race condition in zfs_zrele_async when we are checking if +we would be the one to evict an inode. This can lead to a txg sync +deadlock. + +Instead of calling into iput directly, we attempt to perform the atomic +decrement ourselves, unless that would set the i_count value to zero. +In that case, we dispatch a call to iput to run later, to prevent a +deadlock from occurring. + +Reviewed-by: Brian Behlendorf +Reviewed-by: Matthew Ahrens +Signed-off-by: Paul Dagnelie +Closes #11527 +Closes #11530 + +Origin: backport, https://github.com/openzfs/zfs/commit/43eaef6de817 +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1916486 +--- + module/zfs/zfs_vnops.c | 13 +++++++++++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +Index: zfs-linux-0.8.3/module/zfs/zfs_vnops.c +=================================================================== +--- zfs-linux-0.8.3.orig/module/zfs/zfs_vnops.c ++++ zfs-linux-0.8.3/module/zfs/zfs_vnops.c +@@ -996,11 +996,18 @@ zfs_iput_async(struct inode *ip) + ASSERT(atomic_read(&ip->i_count) > 0); + ASSERT(os != NULL); + +- if (atomic_read(&ip->i_count) == 1) ++ /* ++ * If decrementing the count would put us at 0, we can't do it inline ++ * here, because that would be synchronous. Instead, dispatch an iput ++ * to run later. ++ * ++ * For more information on the dangers of a synchronous iput, see the ++ * header comment of this file. ++ */ ++ if (!atomic_add_unless(&ip->i_count, -1, 1)) { + VERIFY(taskq_dispatch(dsl_pool_iput_taskq(dmu_objset_pool(os)), + (task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID); +- else +- iput(ip); ++ } + } + + /* ARGSUSED */ diff -Nru zfs-linux-0.8.3/debian/patches/4900-Fix-a-dependency-loop.patch zfs-linux-0.8.3/debian/patches/4900-Fix-a-dependency-loop.patch --- zfs-linux-0.8.3/debian/patches/4900-Fix-a-dependency-loop.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4900-Fix-a-dependency-loop.patch 2021-07-12 14:36:13.000000000 +0000 @@ -0,0 +1,103 @@ +From aa1f71b103697cb8ae3e27fac7e3696237837f0d Mon Sep 17 00:00:00 2001 +From: Richard Laager +Date: Sat, 30 May 2020 18:40:45 -0500 +Subject: [PATCH 1/2] Fix a dependency loop + +When generating units with zfs-mount-generator, if the pool is already +imported, zfs-import.target is not needed. This avoids a dependency +loop on root-on-ZFS systems: + systemd-random-seed.service After (via RequiresMountsFor) + var-lib.mount After + zfs-import.target After + zfs-import-{cache,scan}.service After + cryptsetup.service After + systemd-random-seed.service + +Reviewed-by: Antonio Russo +Reviewed-by: InsanePrawn +Signed-off-by: Richard Laager +Closes #10388 + +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1875577 +Origin: backport, https://github.com/openzfs/zfs/commit/ec41cafee1da +--- + .../system-generators/zfs-mount-generator.in | 27 +++++++++++++++++-- + etc/systemd/system/zfs-mount.service.in | 1 - + 2 files changed, 25 insertions(+), 3 deletions(-) + +diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in +index 1f88b02179d2..298b301e1983 100755 +--- a/etc/systemd/system-generators/zfs-mount-generator.in ++++ b/etc/systemd/system-generators/zfs-mount-generator.in +@@ -42,6 +42,8 @@ else + do_fail "zero or three arguments required" + fi + ++pools=$(zpool list -H -o name || true) ++ + # For ZFSs marked "auto", a dependency is created for local-fs.target. To + # avoid regressions, this dependency is reduced to "wants" rather than + # "requires". **THIS MAY CHANGE** +@@ -58,10 +60,10 @@ process_line() { + # zfs list -H -o name,... + # fields are tab separated + IFS="$(printf '\t')" +- # protect against special characters in, e.g., mountpoints +- set -f + set -- $1 ++ + dataset="${1}" ++ pool="${dataset%%/*}" + p_mountpoint="${2}" + p_canmount="${3}" + p_atime="${4}" +@@ -77,6 +79,25 @@ process_line() { + # Minimal pre-requisites to mount a ZFS dataset + wants="zfs-import.target" + ++ # If the pool is already imported, zfs-import.target is not needed. This ++ # avoids a dependency loop on root-on-ZFS systems: ++ # systemd-random-seed.service After (via RequiresMountsFor) var-lib.mount ++ # After zfs-import.target After zfs-import-{cache,scan}.service After ++ # cryptsetup.service After systemd-random-seed.service. ++ # ++ # Pools are newline-separated and may contain spaces in their names. ++ # There is no better portable way to set IFS to just a newline. Using ++ # $(printf '\n') doesn't work because $(...) strips trailing newlines. ++ IFS=" ++" ++ for p in $pools ; do ++ if [ "$p" = "$pool" ] ; then ++ after="" ++ wants="" ++ break ++ fi ++ done ++ + # Handle encryption + if [ -n "${p_encroot}" ] && + [ "${p_encroot}" != "-" ] ; then +@@ -335,6 +356,8 @@ initzsys + + # Feed each line into process_line + for cachefile in "${FSLIST}/"* ; do ++ # Disable glob expansion to protect against special characters when parsing. ++ set -f + while read -r fs ; do + process_line "${fs}" + done < "${cachefile}" +diff --git a/etc/systemd/system/zfs-mount.service.in b/etc/systemd/system/zfs-mount.service.in +index c2f2fa567541..a99e8343eb1f 100644 +--- a/etc/systemd/system/zfs-mount.service.in ++++ b/etc/systemd/system/zfs-mount.service.in +@@ -6,7 +6,6 @@ After=systemd-udev-settle.service + After=zfs-import.target + After=systemd-remount-fs.service + Before=local-fs.target +-Before=systemd-random-seed.service + After=zfs-load-module.service + ConditionPathExists=/sys/module/zfs + +-- +2.32.0 + diff -Nru zfs-linux-0.8.3/debian/patches/4901-Fix-another-dependency-loop.patch zfs-linux-0.8.3/debian/patches/4901-Fix-another-dependency-loop.patch --- zfs-linux-0.8.3/debian/patches/4901-Fix-another-dependency-loop.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4901-Fix-another-dependency-loop.patch 2021-07-12 14:36:13.000000000 +0000 @@ -0,0 +1,45 @@ +From 25c312ec22065123f0d9cb1b55be8dd9b3e62589 Mon Sep 17 00:00:00 2001 +From: Richard Laager +Date: Sat, 30 May 2020 20:39:31 -0500 +Subject: [PATCH 2/2] Fix another dependency loop + +zfs-load-key-DATASET.service was gaining an +After=systemd-journald.socket due to its stdout/stderr going to the +journal (which is the default). systemd-journald.socket has an After +(via RequiresMountsFor=/run/systemd/journal) on -.mount. If the root +filesystem is encrypted, -.mount gets an After +zfs-load-key-DATASET.service. + +By setting stdout and stderr to null on the key load services, we avoid +this loop. + +Reviewed-by: Antonio Russo +Reviewed-by: InsanePrawn +Signed-off-by: Richard Laager +Closes #10356 +Closes #10388 + +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1875577 +Origin: upstream, https://github.com/openzfs/zfs/commit/62663fb7ec19 +--- + etc/systemd/system-generators/zfs-mount-generator.in | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in +index 298b301e1983..31352b05659f 100755 +--- a/etc/systemd/system-generators/zfs-mount-generator.in ++++ b/etc/systemd/system-generators/zfs-mount-generator.in +@@ -140,6 +140,10 @@ ${pathdep} + [Service] + Type=oneshot + RemainAfterExit=yes ++# This avoids a dependency loop involving systemd-journald.socket if this ++# dataset is a parent of the root filesystem. ++StandardOutput=null ++StandardError=null + ExecStart=${keyloadcmd} + ExecStop=@sbindir@/zfs unload-key '${dataset}' + EOF +-- +2.32.0 + diff -Nru zfs-linux-0.8.3/debian/patches/4910-Fix-EIO-after-resuming-receive-of-new-dataset-over-a.patch zfs-linux-0.8.3/debian/patches/4910-Fix-EIO-after-resuming-receive-of-new-dataset-over-a.patch --- zfs-linux-0.8.3/debian/patches/4910-Fix-EIO-after-resuming-receive-of-new-dataset-over-a.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4910-Fix-EIO-after-resuming-receive-of-new-dataset-over-a.patch 2021-08-16 14:54:19.000000000 +0000 @@ -0,0 +1,60 @@ +From d6bee967ed264fd85a0a386a819caf27f09a3242 Mon Sep 17 00:00:00 2001 +From: Alan Somers +Date: Fri, 2 Oct 2020 18:47:09 -0600 +Subject: [PATCH] Fix EIO after resuming receive of new dataset over an + existing one +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 8bit + +When resuming an interrupted ZFS send stream that creates a new dataset +with the same name as an existing dataset, if the existing dataset is +accessed after the failed receive, then after the subsequent successful +receive it will return EIO. This happens because nothing mounts the new +dataset, leaving the old, no longer valid dataset still mounted. + +This commit fixes zfs receive to always unmount and remount the +destination, regardless of whether the stream is a new stream or a +resumed stream. + +Sponsored by: Axcient +Reviewed-by: Brian Behlendorf +Reviewed-by: Ryan Moeller +Signed-off-by: Alan Somers +External-issue: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=249579 +Closes #10995 +Closes #10999 +--- + lib/libzfs/libzfs_sendrecv.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +Index: zfs-linux-0.8.3/lib/libzfs/libzfs_sendrecv.c +=================================================================== +--- zfs-linux-0.8.3.orig/lib/libzfs/libzfs_sendrecv.c ++++ zfs-linux-0.8.3/lib/libzfs/libzfs_sendrecv.c +@@ -3784,7 +3784,7 @@ zfs_receive_one(libzfs_handle_t *hdl, in + char errbuf[1024]; + const char *chopprefix; + boolean_t newfs = B_FALSE; +- boolean_t stream_wantsnewfs; ++ boolean_t stream_wantsnewfs, stream_resumingnewfs; + boolean_t newprops = B_FALSE; + uint64_t read_bytes = 0; + uint64_t errflags = 0; +@@ -3992,6 +3992,8 @@ zfs_receive_one(libzfs_handle_t *hdl, in + DMU_BACKUP_FEATURE_EMBED_DATA; + stream_wantsnewfs = (drrb->drr_fromguid == 0 || + (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming; ++ stream_resumingnewfs = (drrb->drr_fromguid == 0 || ++ (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && resuming; + + if (stream_wantsnewfs) { + /* +@@ -4156,7 +4158,7 @@ zfs_receive_one(libzfs_handle_t *hdl, in + } + + if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && +- stream_wantsnewfs) { ++ (stream_wantsnewfs || stream_resumingnewfs)) { + /* We can't do online recv in this case */ + clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0); + if (clp == NULL) { diff -Nru zfs-linux-0.8.3/debian/patches/4911-compat-nullify-action-handle.patch zfs-linux-0.8.3/debian/patches/4911-compat-nullify-action-handle.patch --- zfs-linux-0.8.3/debian/patches/4911-compat-nullify-action-handle.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4911-compat-nullify-action-handle.patch 2021-10-12 13:02:12.000000000 +0000 @@ -0,0 +1,30 @@ +Description: Fix recv -s compat issue with 2.x kernel driver + The action handle should be set to null so that the user space recv + is forwardly compatible with 2.x kernel ZFS driver to allow the + recv -s option to work. +Author: Colin Ian King +Origin: ubuntu +Forwarded: no +Last-Update: 2021-08-16 + +Index: zfs-linux-0.8.3/lib/libzfs/libzfs_sendrecv.c +=================================================================== +--- zfs-linux-0.8.3.orig/lib/libzfs/libzfs_sendrecv.c ++++ zfs-linux-0.8.3/lib/libzfs/libzfs_sendrecv.c +@@ -4801,7 +4801,6 @@ zfs_receive(libzfs_handle_t *hdl, const + char *top_zfs = NULL; + int err; + int cleanup_fd; +- uint64_t action_handle = 0; + struct stat sb; + char *originsnap = NULL; + +@@ -4856,7 +4855,7 @@ zfs_receive(libzfs_handle_t *hdl, const + VERIFY(cleanup_fd >= 0); + + err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL, +- stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL, props); ++ stream_avl, &top_zfs, cleanup_fd, NULL, NULL, props); + + VERIFY(0 == close(cleanup_fd)); + diff -Nru zfs-linux-0.8.3/debian/patches/4920-Fix-zfs_get_data-access-to-files-with-wrong-generati.patch zfs-linux-0.8.3/debian/patches/4920-Fix-zfs_get_data-access-to-files-with-wrong-generati.patch --- zfs-linux-0.8.3/debian/patches/4920-Fix-zfs_get_data-access-to-files-with-wrong-generati.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4920-Fix-zfs_get_data-access-to-files-with-wrong-generati.patch 2021-10-12 13:02:12.000000000 +0000 @@ -0,0 +1,134 @@ +Index: zfs-linux-0.8.3/cmd/ztest/ztest.c +=================================================================== +--- zfs-linux-0.8.3.orig/cmd/ztest/ztest.c ++++ zfs-linux-0.8.3/cmd/ztest/ztest.c +@@ -2182,8 +2182,8 @@ ztest_get_done(zgd_t *zgd, int error) + } + + static int +-ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, +- zio_t *zio) ++ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf, ++ struct lwb *lwb, zio_t *zio) + { + ztest_ds_t *zd = arg; + objset_t *os = zd->zd_os; +Index: zfs-linux-0.8.3/include/sys/zil.h +=================================================================== +--- zfs-linux-0.8.3.orig/include/sys/zil.h ++++ zfs-linux-0.8.3/include/sys/zil.h +@@ -399,6 +399,7 @@ typedef struct itx { + void *itx_callback_data; /* User data for the callback */ + size_t itx_size; /* allocated itx structure size */ + uint64_t itx_oid; /* object id */ ++ uint64_t itx_gen; /* gen number for zfs_get_data */ + lr_t itx_lr; /* common part of log record */ + /* followed by type-specific part of lr_xx_t and its immediate data */ + } itx_t; +@@ -467,7 +468,7 @@ typedef int zil_parse_blk_func_t(zilog_t + typedef int zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg, + uint64_t txg); + typedef int zil_replay_func_t(void *arg1, void *arg2, boolean_t byteswap); +-typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, ++typedef int zil_get_data_t(void *arg, uint64_t arg2, lr_write_t *lr, char *dbuf, + struct lwb *lwb, zio_t *zio); + + extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, +Index: zfs-linux-0.8.3/module/zfs/zfs_log.c +=================================================================== +--- zfs-linux-0.8.3.orig/module/zfs/zfs_log.c ++++ zfs-linux-0.8.3/module/zfs/zfs_log.c +@@ -515,6 +515,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t * + uint32_t blocksize = zp->z_blksz; + itx_wr_state_t write_state; + uintptr_t fsync_cnt; ++ uint64_t gen = 0; + + if (zil_replaying(zilog, tx) || zp->z_unlinked || + zfs_xattr_owner_unlinked(zp)) { +@@ -537,6 +538,9 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t * + (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1)); + } + ++ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen, ++ sizeof (gen)); ++ + while (resid) { + itx_t *itx; + lr_write_t *lr; +@@ -577,6 +581,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t * + BP_ZERO(&lr->lr_blkptr); + + itx->itx_private = ZTOZSB(zp); ++ itx->itx_gen = gen; + + if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) && + (fsync_cnt == 0)) +Index: zfs-linux-0.8.3/module/zfs/zil.c +=================================================================== +--- zfs-linux-0.8.3.orig/module/zfs/zil.c ++++ zfs-linux-0.8.3/module/zfs/zil.c +@@ -1740,7 +1740,8 @@ cont: + * completed after "lwb_write_zio" completed. + */ + error = zilog->zl_get_data(itx->itx_private, +- lrwb, dbuf, lwb, lwb->lwb_write_zio); ++ itx->itx_gen, lrwb, dbuf, lwb, ++ lwb->lwb_write_zio); + + if (error == EIO) { + txg_wait_synced(zilog->zl_dmu_pool, txg); +Index: zfs-linux-0.8.3/module/zfs/zvol.c +=================================================================== +--- zfs-linux-0.8.3.orig/module/zfs/zvol.c ++++ zfs-linux-0.8.3/module/zfs/zvol.c +@@ -953,7 +953,8 @@ zvol_get_done(zgd_t *zgd, int error) + * Get data to generate a TX_WRITE intent log record. + */ + static int +-zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) ++zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf, ++ struct lwb *lwb, zio_t *zio) + { + zvol_state_t *zv = arg; + uint64_t offset = lr->lr_offset; +Index: zfs-linux-0.8.3/module/zfs/zfs_vnops.c +=================================================================== +--- zfs-linux-0.8.3.orig/module/zfs/zfs_vnops.c ++++ zfs-linux-0.8.3/module/zfs/zfs_vnops.c +@@ -1038,7 +1038,8 @@ static int zil_fault_io = 0; + * Get data to generate a TX_WRITE intent log record. + */ + int +-zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) ++zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, ++ struct lwb *lwb, zio_t *zio) + { + zfsvfs_t *zfsvfs = arg; + objset_t *os = zfsvfs->z_os; +@@ -1049,6 +1050,7 @@ zfs_get_data(void *arg, lr_write_t *lr, + dmu_buf_t *db; + zgd_t *zgd; + int error = 0; ++ uint64_t zp_gen; + + ASSERT3P(lwb, !=, NULL); + ASSERT3P(zio, !=, NULL); +@@ -1067,6 +1069,17 @@ zfs_get_data(void *arg, lr_write_t *lr, + zfs_iput_async(ZTOI(zp)); + return (SET_ERROR(ENOENT)); + } ++ ++ /* check if generation number matches */ ++ if (sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, ++ sizeof (zp_gen)) != 0) { ++ zfs_iput_async(ZTOI(zp)); ++ return (SET_ERROR(EIO)); ++ } ++ if (zp_gen != gen) { ++ zfs_iput_async(ZTOI(zp)); ++ return (SET_ERROR(ENOENT)); ++ } + + zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); + zgd->zgd_lwb = lwb; diff -Nru zfs-linux-0.8.3/debian/patches/4930-Dont-ignore-zfs_arc_max-below-allmem-32.patch zfs-linux-0.8.3/debian/patches/4930-Dont-ignore-zfs_arc_max-below-allmem-32.patch --- zfs-linux-0.8.3/debian/patches/4930-Dont-ignore-zfs_arc_max-below-allmem-32.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4930-Dont-ignore-zfs_arc_max-below-allmem-32.patch 2022-11-30 15:46:58.000000000 +0000 @@ -0,0 +1,161 @@ +From 5e8b50777c7d3db8794c2cacbdea58eebd1532e0 Mon Sep 17 00:00:00 2001 +From: Ryan Moeller +Date: Thu, 9 Apr 2020 18:39:48 -0400 +Subject: [PATCH] Don't ignore zfs_arc_max below allmem/32 + +Set arc_c_min before arc_c_max so that when zfs_arc_min is set lower +than the default allmem/32 zfs_arc_max can also be set lower. + +Add warning messages when tunables are being ignored. + +Reviewed-by: Brian Behlendorf +Signed-off-by: Ryan Moeller +Closes #10157 +Closes #10158 + +Origin: backport, https://github.com/openzfs/zfs/commit/36a6e2335c45 +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1964992 +--- + module/zfs/arc.c | 46 +++++++++++++++++++++++++++++++--------------- + 1 file changed, 31 insertions(+), 15 deletions(-) + +--- a/module/zfs/arc.c ++++ b/module/zfs/arc.c +@@ -1109,7 +1109,7 @@ + static void arc_access(arc_buf_hdr_t *, kmutex_t *); + static boolean_t arc_is_overflowing(void); + static void arc_buf_watch(arc_buf_t *); +-static void arc_tuning_update(void); ++static void arc_tuning_update(boolean_t); + static void arc_prune_async(int64_t); + static uint64_t arc_all_memory(void); + +@@ -5096,7 +5096,7 @@ + * their actual internal variable counterparts. Without this, + * changing those module params at runtime would have no effect. + */ +- arc_tuning_update(); ++ arc_tuning_update(B_FALSE); + + /* + * This is necessary in order to keep the kstat information +@@ -7553,6 +7553,14 @@ + multilist_get_num_sublists(ml)); + } + ++#define WARN_IF_TUNING_IGNORED(tuning, value, do_warn) do { \ ++ if ((do_warn) && (tuning) && ((tuning) != (value))) { \ ++ cmn_err(CE_WARN, \ ++ "ignoring tunable %s (using %llu instead)", \ ++ (#tuning), (value)); \ ++ } \ ++} while (0) ++ + /* + * Called during module initialization and periodically thereafter to + * apply reasonable changes to the exposed performance tunings. Can also be +@@ -7561,11 +7569,20 @@ + * values will be applied. + */ + static void +-arc_tuning_update(void) ++arc_tuning_update(boolean_t verbose) + { + uint64_t allmem = arc_all_memory(); + unsigned long limit; + ++ /* Valid range: 32M - */ ++ if ((zfs_arc_min) && (zfs_arc_min != arc_c_min) && ++ (zfs_arc_min >= 2ULL << SPA_MAXBLOCKSHIFT) && ++ (zfs_arc_min <= arc_c_max)) { ++ arc_c_min = zfs_arc_min; ++ arc_c = MAX(arc_c, arc_c_min); ++ } ++ WARN_IF_TUNING_IGNORED(zfs_arc_min, arc_c_min, verbose); ++ + /* Valid range: 64M - */ + if ((zfs_arc_max) && (zfs_arc_max != arc_c_max) && + (zfs_arc_max >= 64 << 20) && (zfs_arc_max < allmem) && +@@ -7578,14 +7595,7 @@ + if (arc_dnode_limit > arc_meta_limit) + arc_dnode_limit = arc_meta_limit; + } +- +- /* Valid range: 32M - */ +- if ((zfs_arc_min) && (zfs_arc_min != arc_c_min) && +- (zfs_arc_min >= 2ULL << SPA_MAXBLOCKSHIFT) && +- (zfs_arc_min <= arc_c_max)) { +- arc_c_min = zfs_arc_min; +- arc_c = MAX(arc_c, arc_c_min); +- } ++ WARN_IF_TUNING_IGNORED(zfs_arc_max, arc_c_max, verbose); + + /* Valid range: 16M - */ + if ((zfs_arc_meta_min) && (zfs_arc_meta_min != arc_meta_min) && +@@ -7597,6 +7607,7 @@ + if (arc_dnode_limit < arc_meta_min) + arc_dnode_limit = arc_meta_min; + } ++ WARN_IF_TUNING_IGNORED(zfs_arc_meta_min, arc_meta_min, verbose); + + /* Valid range: - */ + limit = zfs_arc_meta_limit ? zfs_arc_meta_limit : +@@ -7605,6 +7616,7 @@ + (limit >= arc_meta_min) && + (limit <= arc_c_max)) + arc_meta_limit = limit; ++ WARN_IF_TUNING_IGNORED(zfs_arc_meta_limit, arc_meta_limit, verbose); + + /* Valid range: - */ + limit = zfs_arc_dnode_limit ? zfs_arc_dnode_limit : +@@ -7613,6 +7625,8 @@ + (limit >= arc_meta_min) && + (limit <= arc_meta_limit)) + arc_dnode_limit = limit; ++ WARN_IF_TUNING_IGNORED(zfs_arc_dnode_limit, arc_dnode_limit, ++ verbose); + + /* Valid range: 1 - N */ + if (zfs_arc_grow_retry) +@@ -7642,11 +7656,13 @@ + if ((zfs_arc_lotsfree_percent >= 0) && + (zfs_arc_lotsfree_percent <= 100)) + arc_lotsfree_percent = zfs_arc_lotsfree_percent; ++ WARN_IF_TUNING_IGNORED(zfs_arc_lotsfree_percent, arc_lotsfree_percent, ++ verbose); + + /* Valid range: 0 - */ + if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free)) + arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), allmem); +- ++ WARN_IF_TUNING_IGNORED(zfs_arc_sys_free, arc_sys_free, verbose); + } + + static void +@@ -7845,7 +7861,7 @@ + arc_dnode_limit = (percent * arc_meta_limit) / 100; + + /* Apply user specified tunings */ +- arc_tuning_update(); ++ arc_tuning_update(B_TRUE); + + /* if kmem_flags are set, lets try to use less memory */ + if (kmem_debugging()) +@@ -9401,7 +9417,7 @@ + if (error < 0) + return (SET_ERROR(error)); + +- arc_tuning_update(); ++ arc_tuning_update(B_TRUE); + + return (0); + } +@@ -9415,7 +9431,7 @@ + if (error < 0) + return (SET_ERROR(error)); + +- arc_tuning_update(); ++ arc_tuning_update(B_TRUE); + + return (0); + } diff -Nru zfs-linux-0.8.3/debian/patches/4931-Restore-processing-for-arc-min-and-arc-max.patch zfs-linux-0.8.3/debian/patches/4931-Restore-processing-for-arc-min-and-arc-max.patch --- zfs-linux-0.8.3/debian/patches/4931-Restore-processing-for-arc-min-and-arc-max.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/4931-Restore-processing-for-arc-min-and-arc-max.patch 2022-11-30 15:46:58.000000000 +0000 @@ -0,0 +1,120 @@ +From e945e8d7f4fcafd4f1c01abd90810fc09ab6a811 Mon Sep 17 00:00:00 2001 +From: Allan Jude +Date: Mon, 16 Aug 2021 11:35:19 -0400 +Subject: [PATCH] Restore FreeBSD sysctl processing for arc.min and arc.max + +Before OpenZFS 2.0, trying to set the FreeBSD sysctl vfs.zfs.arc_max +to a disallowed value would return an error. +Since the switch, it instead only generates WARN_IF_TUNING_IGNORED + +Keep the ability to set the sysctl's specifically to 0, even though +that is less than the minimum, because some tests depend on this. + +Also lost, was the ability to set vfs.zfs.arc_max to a value less +than the default vfs.zfs.arc_min at boot time. Restore this as well. + +Reviewed-by: Tony Nguyen +Reviewed-by: Ryan Moeller +Signed-off-by: Allan Jude +Closes #12161 + +Origin: backport, https://github.com/openzfs/zfs/commit/e945e8d7f4fc +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1964992 +Backport-Notes: dropped FreeBSD-specific sections from upstream +--- +module/zfs/arc.c | 37 +++++++++++++++++++++++++------------ +1 file changed, 25 insertions(+), 12 deletions(-) + +diff --git a/module/zfs/arc.c b/arc.c +index 82a4d43..a0571c3 100644 +--- a/module/zfs/arc.c ++++ b/module/zfs/arc.c +@@ -5096,7 +5096,7 @@ arc_adjust_cb_check(void *arg, zthr_t *zthr) + * their actual internal variable counterparts. Without this, + * changing those module params at runtime would have no effect. + */ +- arc_tuning_update(B_FALSE); ++ arc_tuning_update(B_TRUE); + + /* + * This is necessary in order to keep the kstat information +@@ -7557,10 +7557,9 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj) + if ((do_warn) && (tuning) && ((tuning) != (value))) { \ + cmn_err(CE_WARN, \ + "ignoring tunable %s (using %llu instead)", \ +- (#tuning), (value)); \ ++ (#tuning), (u_longlong_t)(value)); \ + } \ + } while (0) +- + /* + * Called during module initialization and periodically thereafter to + * apply reasonable changes to the exposed performance tunings. Can also be +@@ -7588,7 +7587,7 @@ arc_tuning_update(boolean_t verbose) + (zfs_arc_max >= 64 << 20) && (zfs_arc_max < allmem) && + (zfs_arc_max > arc_c_min)) { + arc_c_max = zfs_arc_max; +- arc_c = arc_c_max; ++ arc_c = MIN(arc_c, arc_c_max); + arc_p = (arc_c >> 1); + if (arc_meta_limit > arc_c_max) + arc_meta_limit = arc_c_max; +@@ -7625,8 +7624,7 @@ arc_tuning_update(boolean_t verbose) + (limit >= arc_meta_min) && + (limit <= arc_meta_limit)) + arc_dnode_limit = limit; +- WARN_IF_TUNING_IGNORED(zfs_arc_dnode_limit, arc_dnode_limit, +- verbose); ++ WARN_IF_TUNING_IGNORED(zfs_arc_dnode_limit, arc_dnode_limit,verbose); + + /* Valid range: 1 - N */ + if (zfs_arc_grow_retry) +@@ -7656,13 +7654,13 @@ arc_tuning_update(boolean_t verbose) + if ((zfs_arc_lotsfree_percent >= 0) && + (zfs_arc_lotsfree_percent <= 100)) + arc_lotsfree_percent = zfs_arc_lotsfree_percent; +- WARN_IF_TUNING_IGNORED(zfs_arc_lotsfree_percent, arc_lotsfree_percent, +- verbose); ++ WARN_IF_TUNING_IGNORED(zfs_arc_lotsfree_percent, arc_lotsfree_percent,verbose); + + /* Valid range: 0 - */ + if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free)) + arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), allmem); + WARN_IF_TUNING_IGNORED(zfs_arc_sys_free, arc_sys_free, verbose); ++ + } + + static void +@@ -7827,13 +7825,28 @@ arc_init(void) + arc_sys_free = MAX(allmem / 64, (512 * 1024)); + arc_need_free = 0; + #endif ++ /* Set min cache to 1/32 of all memory, or 32MB, whichever is more. */ ++ arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT); + +- /* Set max to 1/2 of all memory */ +- arc_c_max = allmem / 2; ++ /* How to set default max varies by platform. */ ++ arc_c_max = MAX(allmem / 2, arc_c_min); + + #ifdef _KERNEL +- /* Set min cache to 1/32 of all memory, or 32MB, whichever is more */ +- arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT); ++ /* ++ * If zfs_arc_max is non-zero at init, meaning it was set in the kernel ++ * environment before the module was loaded, don't block setting the ++ * maximum because it is less than arc_c_min, instead, reset arc_c_min ++ * to a lower value. ++ * zfs_arc_min will be handled by arc_tuning_update(). ++ */ ++ if (zfs_arc_max != 0 && zfs_arc_max >= 64 << 20 && ++ zfs_arc_max < allmem) { ++ arc_c_max = zfs_arc_max; ++ if (arc_c_min >= arc_c_max) { ++ arc_c_min = MAX(zfs_arc_max / 2, ++ 2ULL << SPA_MAXBLOCKSHIFT); ++ } ++ } + #else + /* + * In userland, there's only the memory pressure that we artificially diff -Nru zfs-linux-0.8.3/debian/patches/CVE-2013-20001.patch zfs-linux-0.8.3/debian/patches/CVE-2013-20001.patch --- zfs-linux-0.8.3/debian/patches/CVE-2013-20001.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/CVE-2013-20001.patch 2023-11-02 14:49:23.000000000 +0000 @@ -0,0 +1,221 @@ +Backport of: + +From 6cb5e1e7591da20af3a15793e022345a73e40fb7 Mon Sep 17 00:00:00 2001 +From: felixdoerre +Date: Wed, 20 Oct 2021 19:40:00 +0200 +Subject: [PATCH] libshare: nfs: pass through ipv6 addresses in bracket + notation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Recognize when the host part of a sharenfs attribute is an ipv6 +Literal and pass that through without modification. + +Reviewed-by: Brian Behlendorf +Signed-off-by: Felix Dörre +Closes: #11171 +Closes #11939 +Closes: #1894 +--- + lib/libshare/os/linux/nfs.c | 47 +++++++++-- + man/man8/zfs.8 | 2 +- + tests/runfiles/linux.run | 2 +- + .../functional/cli_root/zfs_share/Makefile.am | 1 + + .../cli_root/zfs_share/zfs_share_007_neg.ksh | 2 +- + .../cli_root/zfs_share/zfs_share_013_pos.ksh | 80 +++++++++++++++++++ + 6 files changed, 126 insertions(+), 8 deletions(-) + create mode 100755 tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_013_pos.ksh + +--- a/lib/libshare/nfs.c ++++ b/lib/libshare/nfs.c +@@ -129,8 +129,9 @@ foreach_nfs_host_cb(const char *opt, con + { + int rc; + const char *access; +- char *host_dup, *host, *next; ++ char *host_dup, *host, *next, *v6Literal; + nfs_host_cookie_t *udata = (nfs_host_cookie_t *)pcookie; ++ int cidr_len; + + #ifdef DEBUG + fprintf(stderr, "foreach_nfs_host_cb: key=%s, value=%s\n", opt, value); +@@ -153,10 +154,46 @@ foreach_nfs_host_cb(const char *opt, con + host = host_dup; + + do { +- next = strchr(host, ':'); +- if (next != NULL) { +- *next = '\0'; +- next++; ++ if (*host == '[') { ++ host++; ++ v6Literal = strchr(host, ']'); ++ if (v6Literal == NULL) { ++ free(host_dup); ++ return (SA_SYNTAX_ERR); ++ } ++ if (v6Literal[1] == '\0') { ++ *v6Literal = '\0'; ++ next = NULL; ++ } else if (v6Literal[1] == '/') { ++ next = strchr(v6Literal + 2, ':'); ++ if (next == NULL) { ++ cidr_len = ++ strlen(v6Literal + 1); ++ memmove(v6Literal, ++ v6Literal + 1, ++ cidr_len); ++ v6Literal[cidr_len] = '\0'; ++ } else { ++ cidr_len = next - v6Literal - 1; ++ memmove(v6Literal, ++ v6Literal + 1, ++ cidr_len); ++ v6Literal[cidr_len] = '\0'; ++ next++; ++ } ++ } else if (v6Literal[1] == ':') { ++ *v6Literal = '\0'; ++ next = v6Literal + 2; ++ } else { ++ free(host_dup); ++ return (SA_SYNTAX_ERR); ++ } ++ } else { ++ next = strchr(host, ':'); ++ if (next != NULL) { ++ *next = '\0'; ++ next++; ++ } + } + + rc = udata->callback(udata->sharepath, host, +--- a/man/man8/zfs.8 ++++ b/man/man8/zfs.8 +@@ -4912,7 +4912,7 @@ on the + .Em tank/home + file system. + .Bd -literal +-# zfs set sharenfs='rw=@123.123.0.0/16,root=neo' tank/home ++# zfs set sharenfs='rw=@123.123.0.0/16:[::1],root=neo' tank/home + .Ed + .Pp + If you are using +--- a/tests/runfiles/linux.run ++++ b/tests/runfiles/linux.run +@@ -257,7 +257,7 @@ tags = ['functional', 'cli_root', 'zfs_s + tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos', + 'zfs_share_004_pos', 'zfs_share_005_pos', 'zfs_share_006_pos', + 'zfs_share_007_neg', 'zfs_share_008_neg', 'zfs_share_009_neg', +- 'zfs_share_010_neg', 'zfs_share_011_pos'] ++ 'zfs_share_010_neg', 'zfs_share_011_pos', 'zfs_share_013_pos'] + tags = ['functional', 'cli_root', 'zfs_share'] + + [tests/functional/cli_root/zfs_snapshot] +--- a/tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile.am ++++ b/tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile.am +@@ -12,7 +12,8 @@ dist_pkgdata_SCRIPTS = \ + zfs_share_008_neg.ksh \ + zfs_share_009_neg.ksh \ + zfs_share_010_neg.ksh \ +- zfs_share_011_pos.ksh ++ zfs_share_011_pos.ksh \ ++ zfs_share_013_pos.ksh + + dist_pkgdata_DATA = \ + zfs_share.cfg +--- a/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_007_neg.ksh ++++ b/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_007_neg.ksh +@@ -51,7 +51,7 @@ function cleanup { + + set -A badopts \ + "r0" "r0=machine1" "r0=machine1:machine2" \ +- "-g" "-b" "-c" "-d" "--invalid" \ ++ "-g" "-b" "-c" "-d" "--invalid" "rw=[::1]a:[::2]" "rw=[::1" \ + "$TESTPOOL" "$TESTPOOL/$TESTFS" "$TESTPOOL\$TESTCTR\$TESTFS1" + + log_assert "Verify that invalid share parameters and options are caught." +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_013_pos.ksh +@@ -0,0 +1,80 @@ ++#!/bin/ksh -p ++# ++# CDDL HEADER START ++# ++# The contents of this file are subject to the terms of the ++# Common Development and Distribution License (the "License"). ++# You may not use this file except in compliance with the License. ++# ++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ++# or http://www.opensolaris.org/os/licensing. ++# See the License for the specific language governing permissions ++# and limitations under the License. ++# ++# When distributing Covered Code, include this CDDL HEADER in each ++# file and include the License file at usr/src/OPENSOLARIS.LICENSE. ++# If applicable, add the following below this CDDL HEADER, with the ++# fields enclosed by brackets "[]" replaced with your own identifying ++# information: Portions Copyright [yyyy] [name of copyright owner] ++# ++# CDDL HEADER END ++# ++ ++# ++# Copyright (c) 2020, Felix Dörre ++# ++ ++. $STF_SUITE/include/libtest.shlib ++ ++# ++# DESCRIPTION: ++# Verify that NFS share options including ipv6 literals are parsed and propagated correctly. ++# ++ ++verify_runnable "global" ++ ++function cleanup ++{ ++ log_must zfs set sharenfs=off $TESTPOOL/$TESTFS ++ is_shared $TESTPOOL/$TESTFS && \ ++ log_must unshare_fs $TESTPOOL/$TESTFS ++} ++ ++log_onexit cleanup ++ ++cleanup ++ ++log_must zfs set sharenfs="rw=[::1]" $TESTPOOL/$TESTFS ++output=$(showshares_nfs 2>&1) ++log_must grep "::1(" <<< "$output" > /dev/null ++ ++log_must zfs set sharenfs="rw=[2::3]" $TESTPOOL/$TESTFS ++output=$(showshares_nfs 2>&1) ++log_must grep "2::3(" <<< "$output" > /dev/null ++ ++log_must zfs set sharenfs="rw=[::1]:[2::3]" $TESTPOOL/$TESTFS ++output=$(showshares_nfs 2>&1) ++log_must grep "::1(" <<< "$output" > /dev/null ++log_must grep "2::3(" <<< "$output" > /dev/null ++ ++log_must zfs set sharenfs="rw=[::1]/64" $TESTPOOL/$TESTFS ++output=$(showshares_nfs 2>&1) ++log_must grep "::1/64(" <<< "$output" > /dev/null ++ ++log_must zfs set sharenfs="rw=[2::3]/128" $TESTPOOL/$TESTFS ++output=$(showshares_nfs 2>&1) ++log_must grep "2::3/128(" <<< "$output" > /dev/null ++ ++log_must zfs set sharenfs="rw=[::1]/32:[2::3]/128" $TESTPOOL/$TESTFS ++output=$(showshares_nfs 2>&1) ++log_must grep "::1/32(" <<< "$output" > /dev/null ++log_must grep "2::3/128(" <<< "$output" > /dev/null ++ ++log_must zfs set sharenfs="rw=[::1]:[2::3]/64:[2a01:1234:1234:1234:aa34:234:1234:1234]:1.2.3.4/24" $TESTPOOL/$TESTFS ++output=$(showshares_nfs 2>&1) ++log_must grep "::1(" <<< "$output" > /dev/null ++log_must grep "2::3/64(" <<< "$output" > /dev/null ++log_must grep "2a01:1234:1234:1234:aa34:234:1234:1234(" <<< "$output" > /dev/null ++log_must grep "1\\.2\\.3\\.4/24(" <<< "$output" > /dev/null ++ ++log_pass "NFS share ip address propagated correctly." diff -Nru zfs-linux-0.8.3/debian/patches/series zfs-linux-0.8.3/debian/patches/series --- zfs-linux-0.8.3/debian/patches/series 2020-04-14 09:14:33.000000000 +0000 +++ zfs-linux-0.8.3/debian/patches/series 2023-11-02 14:45:19.000000000 +0000 @@ -12,9 +12,24 @@ force-verbose-rules.patch #unapplied/init-debian-openrc-workaround.patch # OpenRC users can apply this locally +4510-silently-ignore-modprobe-failure.patch 4550-Linux-5.5-compat-blkg_tryget.patch 4600-Linux-5.6-compat-struct-proc_ops.patch 4601-Linux-5.6-compat-timestamp_truncate.patch 4602-Linux-5.6-compat-ktime_get_raw_ts64.patch 4603-Linux-5.6-compat-time_t.patch zfs-mount-container-start.patch +4610-ICP-Improve-AES-GCM-performance.patch +4620-zfs-vol-wait-fix-locked-encrypted-vols.patch +4700-Fix-DKMS-build-on-arm64-with-PREEMPTION-and-BLK_CGRO.patch +4701-Bugfix-fix-uio-partial-copies.patch +4702-Revert-Let-zfs-mount-all-tolerate-in-progress-mounts.patch +4800-fix-iput-race-in-zfs_iput_async.patch +4900-Fix-a-dependency-loop.patch +4901-Fix-another-dependency-loop.patch +4910-Fix-EIO-after-resuming-receive-of-new-dataset-over-a.patch +4911-compat-nullify-action-handle.patch +4920-Fix-zfs_get_data-access-to-files-with-wrong-generati.patch +4930-Dont-ignore-zfs_arc_max-below-allmem-32.patch +4931-Restore-processing-for-arc-min-and-arc-max.patch +CVE-2013-20001.patch diff -Nru zfs-linux-0.8.3/debian/rules zfs-linux-0.8.3/debian/rules --- zfs-linux-0.8.3/debian/rules 2020-01-21 12:40:40.000000000 +0000 +++ zfs-linux-0.8.3/debian/rules 2021-04-07 12:42:29.000000000 +0000 @@ -130,6 +130,8 @@ override_dh_dkms: '$(CURDIR)/scripts/dkms.mkconf' -n $(NAME) -v $(DEB_VERSION_UPSTREAM) -f '$(CURDIR)/scripts/zfs-dkms.dkms' + sed -ie '/^PACKAGE_VERSION/a BUILD_EXCLUSIVE_KERNEL="^(4\\.[0-9]+|5\\.[01234])\\."' \ + '$(CURDIR)/scripts/zfs-dkms.dkms' dh_dkms rm -f '$(CURDIR)/scripts/zfs-dkms.dkms' diff -Nru zfs-linux-0.8.3/debian/tests/kernel-smoke-test-scrub zfs-linux-0.8.3/debian/tests/kernel-smoke-test-scrub --- zfs-linux-0.8.3/debian/tests/kernel-smoke-test-scrub 2019-09-27 04:40:44.000000000 +0000 +++ zfs-linux-0.8.3/debian/tests/kernel-smoke-test-scrub 2021-07-08 07:51:21.000000000 +0000 @@ -36,7 +36,7 @@ ret=$? if [ $ret -ne 0 ]; then echo "FAILED: zpool create failed, exit code=$ret" - rm ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} + rm -f ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} exit 1 fi @@ -60,7 +60,7 @@ if [ $ret -ne 0 ]; then echo "FAILED: zpool detach failed, exit code=$ret" zpool destroy ${POOL} - rm ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} + rm -f ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} exit 1 fi @@ -68,22 +68,48 @@ # Zero corrupted VDEV and re-attach # dd if=/dev/zero of=${VDEV0} bs=1M count=${VDEV_SZ} > /dev/null 2>&1 +sync zpool attach ${POOL} ${VDEV1} ${VDEV0} -f ret=$? if [ $ret -ne 0 ]; then echo "FAILED: zpool attach failed, exit code=$ret" zpool destroy ${POOL} - rm ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} + rm -f ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} exit 1 fi -sleep 10 # sometimes this may fail because pool is busy resilvering -zpool scrub ${POOL} -ret=$? +resilvering=0 +# +# Initial first check +# +n=$(zpool status | grep "resilvering" | wc -l) +if [ $n -gt 0 ]; then + resilvering=1 +fi + +# +# ..and do the scrub +# +i=0 +while true +do + (zpool scrub ${POOL}) > /dev/null 2>&1 + ret=$? + if [ $ret -ne 0 ]; then + sleep 1 + i=$((i + 1)) + sleep 1 + if [ $i -gt 900 ]; then + break + fi + else + break + fi +done if [ $ret -ne 0 ]; then - echo "FAILED: zpool scrub failed, exit code=$ret" + echo "FAILED: zpool scrub failed after $i attempts, exit code=$ret" zpool destroy ${POOL} - rm ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} + rm -f ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} exit 1 fi @@ -94,7 +120,7 @@ if [ x"$sum1" != x"$sum2" ]; then echo "FAILED: corrupted data on scrubbed pool" zpool destroy ${POOL} - rm ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} + rm -f ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} exit 1 fi @@ -106,10 +132,16 @@ # destroy failed, try to clean up, but this # wil probably fail # - rm ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} + rm -f ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} exit 1 fi -rm ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} +rm -f ${VDEV0} ${VDEV1} ${VDEV2} ${VDEV3} echo "PASSED" +echo "NOTE: zpool scrub completed after about $i second(s)" +if [ $resilvering -eq 1 ]; then + echo "NOTE: Resilvering detected + completed and srub completed" +else + echo "NOTE: Resilvering not detected (too fast to detect) and scrub completed" +fi exit 0