Files
wolfssl/wolfcrypt/src/aes_gcm_asm.asm
Sean Parkinson 648a057147 ML-DSA/Dilithium: Intel x64 ASM
Optimize code knowing it is for Intel x64.
Change signing to calculate one polynomial at a time so that if it isn't
valid then we fail early.
Other minor improvements.
Move the SHA-3 4 blocks at a time assembly into SHA-3 asm file.
Make constants in assembly the same length (front pad with zeros).
2025-08-07 14:01:50 +10:00

16481 lines
523 KiB
NASM

; /* aes_gcm_asm.asm */
; /*
; * Copyright (C) 2006-2025 wolfSSL Inc.
; *
; * This file is part of wolfSSL.
; *
; * wolfSSL is free software; you can redistribute it and/or modify
; * it under the terms of the GNU General Public License as published by
; * the Free Software Foundation; either version 3 of the License, or
; * (at your option) any later version.
; *
; * wolfSSL is distributed in the hope that it will be useful,
; * but WITHOUT ANY WARRANTY; without even the implied warranty of
; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; * GNU General Public License for more details.
; *
; * You should have received a copy of the GNU General Public License
; * along with this program; if not, write to the Free Software
; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
; */
IF @Version LT 1200
; AVX2 instructions not recognized by old versions of MASM
IFNDEF NO_AVX2_SUPPORT
NO_AVX2_SUPPORT = 1
ENDIF
; MOVBE instruction not recognized by old versions of MASM
IFNDEF NO_MOVBE_SUPPORT
NO_MOVBE_SUPPORT = 1
ENDIF
ENDIF
IFNDEF HAVE_INTEL_AVX1
HAVE_INTEL_AVX1 = 1
ENDIF
IFNDEF NO_AVX2_SUPPORT
HAVE_INTEL_AVX2 = 1
ENDIF
IFNDEF _WIN64
_WIN64 = 1
ENDIF
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_aesni_rev8 QWORD 579005069656919567, 283686952306183
ptr_L_GCM_generate_m0_aesni_rev8 QWORD L_GCM_generate_m0_aesni_rev8
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_aesni_mod2_128 QWORD 0, 16212958658533785600
ptr_L_GCM_generate_m0_aesni_mod2_128 QWORD L_GCM_generate_m0_aesni_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
GCM_generate_m0_aesni PROC
sub rsp, 80
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu xmm9, OWORD PTR L_GCM_generate_m0_aesni_rev8
movdqu xmm10, OWORD PTR L_GCM_generate_m0_aesni_mod2_128
pxor xmm8, xmm8
movdqu xmm0, OWORD PTR [rcx]
movdqu OWORD PTR [rdx], xmm8
movdqu xmm8, xmm0
pshufb xmm0, xmm9
movdqu xmm5, xmm0
movdqu xmm4, xmm0
psllq xmm5, 63
psrlq xmm4, 1
movdqu xmm1, xmm5
pslldq xmm1, 8
psrldq xmm5, 8
pshufd xmm1, xmm1, 255
por xmm4, xmm5
psrad xmm1, 31
pand xmm1, xmm10
pxor xmm1, xmm4
movdqu xmm5, xmm1
movdqu xmm4, xmm1
psllq xmm5, 63
psrlq xmm4, 1
movdqu xmm2, xmm5
pslldq xmm2, 8
psrldq xmm5, 8
pshufd xmm2, xmm2, 255
por xmm4, xmm5
psrad xmm2, 31
pand xmm2, xmm10
pxor xmm2, xmm4
movdqu xmm5, xmm2
movdqu xmm4, xmm2
psllq xmm5, 63
psrlq xmm4, 1
movdqu xmm3, xmm5
pslldq xmm3, 8
psrldq xmm5, 8
pshufd xmm3, xmm3, 255
por xmm4, xmm5
psrad xmm3, 31
pand xmm3, xmm10
pxor xmm3, xmm4
pshufb xmm3, xmm9
pshufb xmm2, xmm9
movdqu xmm8, xmm3
pshufb xmm1, xmm9
pshufb xmm0, xmm9
pxor xmm8, xmm2
movdqu OWORD PTR [rdx+16], xmm3
movdqu OWORD PTR [rdx+32], xmm2
movdqu OWORD PTR [rdx+48], xmm8
movdqu OWORD PTR [rdx+64], xmm1
movdqu xmm4, xmm3
movdqu xmm5, xmm2
movdqu xmm6, xmm8
pxor xmm4, xmm1
pxor xmm5, xmm1
pxor xmm6, xmm1
movdqu OWORD PTR [rdx+80], xmm4
movdqu OWORD PTR [rdx+96], xmm5
movdqu OWORD PTR [rdx+112], xmm6
movdqu OWORD PTR [rdx+128], xmm0
pxor xmm1, xmm0
movdqu xmm4, xmm3
movdqu xmm6, xmm2
pxor xmm4, xmm0
pxor xmm6, xmm0
movdqu OWORD PTR [rdx+144], xmm4
movdqu OWORD PTR [rdx+160], xmm6
pxor xmm6, xmm3
movdqu OWORD PTR [rdx+176], xmm6
movdqu OWORD PTR [rdx+192], xmm1
movdqu xmm4, xmm3
movdqu xmm5, xmm2
movdqu xmm6, xmm8
pxor xmm4, xmm1
pxor xmm5, xmm1
pxor xmm6, xmm1
movdqu OWORD PTR [rdx+208], xmm4
movdqu OWORD PTR [rdx+224], xmm5
movdqu OWORD PTR [rdx+240], xmm6
movdqu xmm0, OWORD PTR [rdx]
movdqu xmm1, OWORD PTR [rdx+16]
movdqu xmm2, OWORD PTR [rdx+32]
movdqu xmm3, OWORD PTR [rdx+48]
pshufb xmm0, xmm9
pshufb xmm1, xmm9
pshufb xmm2, xmm9
pshufb xmm3, xmm9
movdqu xmm4, xmm0
movdqu xmm5, xmm1
movdqu xmm6, xmm2
movdqu xmm7, xmm3
psllq xmm4, 60
psllq xmm5, 60
psllq xmm6, 60
psllq xmm7, 60
psrlq xmm0, 4
psrlq xmm1, 4
psrlq xmm2, 4
psrlq xmm3, 4
psrldq xmm4, 8
psrldq xmm5, 8
psrldq xmm6, 8
psrldq xmm7, 8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
movdqu OWORD PTR [rdx+256], xmm0
movdqu OWORD PTR [rdx+272], xmm1
movdqu OWORD PTR [rdx+288], xmm2
movdqu OWORD PTR [rdx+304], xmm3
movdqu xmm0, OWORD PTR [rdx+64]
movdqu xmm1, OWORD PTR [rdx+80]
movdqu xmm2, OWORD PTR [rdx+96]
movdqu xmm3, OWORD PTR [rdx+112]
pshufb xmm0, xmm9
pshufb xmm1, xmm9
pshufb xmm2, xmm9
pshufb xmm3, xmm9
movdqu xmm4, xmm0
movdqu xmm5, xmm1
movdqu xmm6, xmm2
movdqu xmm7, xmm3
psllq xmm4, 60
psllq xmm5, 60
psllq xmm6, 60
psllq xmm7, 60
psrlq xmm0, 4
psrlq xmm1, 4
psrlq xmm2, 4
psrlq xmm3, 4
psrldq xmm4, 8
psrldq xmm5, 8
psrldq xmm6, 8
psrldq xmm7, 8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
movdqu OWORD PTR [rdx+320], xmm0
movdqu OWORD PTR [rdx+336], xmm1
movdqu OWORD PTR [rdx+352], xmm2
movdqu OWORD PTR [rdx+368], xmm3
movdqu xmm0, OWORD PTR [rdx+128]
movdqu xmm1, OWORD PTR [rdx+144]
movdqu xmm2, OWORD PTR [rdx+160]
movdqu xmm3, OWORD PTR [rdx+176]
pshufb xmm0, xmm9
pshufb xmm1, xmm9
pshufb xmm2, xmm9
pshufb xmm3, xmm9
movdqu xmm4, xmm0
movdqu xmm5, xmm1
movdqu xmm6, xmm2
movdqu xmm7, xmm3
psllq xmm4, 60
psllq xmm5, 60
psllq xmm6, 60
psllq xmm7, 60
psrlq xmm0, 4
psrlq xmm1, 4
psrlq xmm2, 4
psrlq xmm3, 4
psrldq xmm4, 8
psrldq xmm5, 8
psrldq xmm6, 8
psrldq xmm7, 8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
movdqu OWORD PTR [rdx+384], xmm0
movdqu OWORD PTR [rdx+400], xmm1
movdqu OWORD PTR [rdx+416], xmm2
movdqu OWORD PTR [rdx+432], xmm3
movdqu xmm0, OWORD PTR [rdx+192]
movdqu xmm1, OWORD PTR [rdx+208]
movdqu xmm2, OWORD PTR [rdx+224]
movdqu xmm3, OWORD PTR [rdx+240]
pshufb xmm0, xmm9
pshufb xmm1, xmm9
pshufb xmm2, xmm9
pshufb xmm3, xmm9
movdqu xmm4, xmm0
movdqu xmm5, xmm1
movdqu xmm6, xmm2
movdqu xmm7, xmm3
psllq xmm4, 60
psllq xmm5, 60
psllq xmm6, 60
psllq xmm7, 60
psrlq xmm0, 4
psrlq xmm1, 4
psrlq xmm2, 4
psrlq xmm3, 4
psrldq xmm4, 8
psrldq xmm5, 8
psrldq xmm6, 8
psrldq xmm7, 8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
movdqu OWORD PTR [rdx+448], xmm0
movdqu OWORD PTR [rdx+464], xmm1
movdqu OWORD PTR [rdx+480], xmm2
movdqu OWORD PTR [rdx+496], xmm3
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
add rsp, 80
ret
GCM_generate_m0_aesni ENDP
_text ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_one QWORD 0, 1
ptr_L_aes_gcm_one QWORD L_aes_gcm_one
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_two QWORD 0, 2
ptr_L_aes_gcm_two QWORD L_aes_gcm_two
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_three QWORD 0, 3
ptr_L_aes_gcm_three QWORD L_aes_gcm_three
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_four QWORD 0, 4
ptr_L_aes_gcm_four QWORD L_aes_gcm_four
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_five QWORD 0, 5
ptr_L_aes_gcm_five QWORD L_aes_gcm_five
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_six QWORD 0, 6
ptr_L_aes_gcm_six QWORD L_aes_gcm_six
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_seven QWORD 0, 7
ptr_L_aes_gcm_seven QWORD L_aes_gcm_seven
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_eight QWORD 0, 8
ptr_L_aes_gcm_eight QWORD L_aes_gcm_eight
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567
ptr_L_aes_gcm_bswap_epi64 QWORD L_aes_gcm_bswap_epi64
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183
ptr_L_aes_gcm_bswap_mask QWORD L_aes_gcm_bswap_mask
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_mod2_128 QWORD 1, 13979173243358019584
ptr_L_aes_gcm_mod2_128 QWORD L_aes_gcm_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_aesni PROC
push r13
push rdi
push rsi
push r12
push rbx
push r14
push r15
mov rdi, rcx
mov rsi, rdx
mov r12, r8
mov rax, r9
mov r8, QWORD PTR [rsp+96]
mov r9d, DWORD PTR [rsp+104]
mov r11d, DWORD PTR [rsp+112]
mov ebx, DWORD PTR [rsp+120]
mov r14d, DWORD PTR [rsp+128]
mov r15, QWORD PTR [rsp+136]
mov r10d, DWORD PTR [rsp+144]
sub rsp, 320
movdqu OWORD PTR [rsp+160], xmm6
movdqu OWORD PTR [rsp+176], xmm7
movdqu OWORD PTR [rsp+192], xmm8
movdqu OWORD PTR [rsp+208], xmm9
movdqu OWORD PTR [rsp+224], xmm10
movdqu OWORD PTR [rsp+240], xmm11
movdqu OWORD PTR [rsp+256], xmm12
movdqu OWORD PTR [rsp+272], xmm13
movdqu OWORD PTR [rsp+288], xmm14
movdqu OWORD PTR [rsp+304], xmm15
pxor xmm4, xmm4
pxor xmm6, xmm6
cmp ebx, 12
mov edx, ebx
jne L_AES_GCM_encrypt_aesni_iv_not_12
; # Calculate values when IV is 12 bytes
; Set counter based on IV
mov ecx, 16777216
pinsrq xmm4, QWORD PTR [rax], 0
pinsrd xmm4, DWORD PTR [rax+8], 2
pinsrd xmm4, ecx, 3
; H = Encrypt X(=0) and T = Encrypt counter
movdqa xmm1, xmm4
movdqa xmm5, OWORD PTR [r15]
pxor xmm1, xmm5
movdqa xmm7, OWORD PTR [r15+16]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+32]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+48]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+64]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+80]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+96]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+112]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+128]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+144]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
cmp r10d, 11
movdqa xmm7, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_aesni_calc_iv_12_last
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+176]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
cmp r10d, 13
movdqa xmm7, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_aesni_calc_iv_12_last
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+208]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+224]
L_AES_GCM_encrypt_aesni_calc_iv_12_last:
aesenclast xmm5, xmm7
aesenclast xmm1, xmm7
pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
movdqu OWORD PTR [rsp+144], xmm1
jmp L_AES_GCM_encrypt_aesni_iv_done
L_AES_GCM_encrypt_aesni_iv_not_12:
; Calculate values when IV is not 12 bytes
; H = Encrypt X(=0)
movdqa xmm5, OWORD PTR [r15]
aesenc xmm5, [r15+16]
aesenc xmm5, [r15+32]
aesenc xmm5, [r15+48]
aesenc xmm5, [r15+64]
aesenc xmm5, [r15+80]
aesenc xmm5, [r15+96]
aesenc xmm5, [r15+112]
aesenc xmm5, [r15+128]
aesenc xmm5, [r15+144]
cmp r10d, 11
movdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_aesni_calc_iv_1_aesenc_avx_last
aesenc xmm5, xmm9
aesenc xmm5, [r15+176]
cmp r10d, 13
movdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_aesni_calc_iv_1_aesenc_avx_last
aesenc xmm5, xmm9
aesenc xmm5, [r15+208]
movdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_encrypt_aesni_calc_iv_1_aesenc_avx_last:
aesenclast xmm5, xmm9
pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
; Calc counter
; Initialization vector
cmp edx, 0
mov rcx, 0
je L_AES_GCM_encrypt_aesni_calc_iv_done
cmp edx, 16
jl L_AES_GCM_encrypt_aesni_calc_iv_lt16
and edx, 4294967280
L_AES_GCM_encrypt_aesni_calc_iv_16_loop:
movdqu xmm8, OWORD PTR [rax+rcx]
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm4, xmm8
pshufd xmm1, xmm4, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm4, 17
pclmulqdq xmm0, xmm4, 0
pxor xmm1, xmm4
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm7, xmm0
movdqa xmm4, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm7, xmm2
pxor xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm4
psrld xmm0, 31
psrld xmm1, 31
pslld xmm7, 1
pslld xmm4, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm4, xmm2
por xmm7, xmm0
por xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm7
movdqa xmm2, xmm7
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm7, xmm0
movdqa xmm2, xmm7
movdqa xmm3, xmm7
movdqa xmm0, xmm7
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm7
pxor xmm4, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_encrypt_aesni_calc_iv_16_loop
mov edx, ebx
cmp ecx, edx
je L_AES_GCM_encrypt_aesni_calc_iv_done
L_AES_GCM_encrypt_aesni_calc_iv_lt16:
sub rsp, 16
pxor xmm8, xmm8
xor ebx, ebx
movdqu OWORD PTR [rsp], xmm8
L_AES_GCM_encrypt_aesni_calc_iv_loop:
movzx r13d, BYTE PTR [rax+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_encrypt_aesni_calc_iv_loop
movdqu xmm8, OWORD PTR [rsp]
add rsp, 16
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm4, xmm8
pshufd xmm1, xmm4, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm4, 17
pclmulqdq xmm0, xmm4, 0
pxor xmm1, xmm4
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm7, xmm0
movdqa xmm4, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm7, xmm2
pxor xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm4
psrld xmm0, 31
psrld xmm1, 31
pslld xmm7, 1
pslld xmm4, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm4, xmm2
por xmm7, xmm0
por xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm7
movdqa xmm2, xmm7
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm7, xmm0
movdqa xmm2, xmm7
movdqa xmm3, xmm7
movdqa xmm0, xmm7
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm7
pxor xmm4, xmm2
L_AES_GCM_encrypt_aesni_calc_iv_done:
; T = Encrypt counter
pxor xmm0, xmm0
shl edx, 3
pinsrq xmm0, rdx, 0
pxor xmm4, xmm0
pshufd xmm1, xmm4, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm4, 17
pclmulqdq xmm0, xmm4, 0
pxor xmm1, xmm4
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm7, xmm0
movdqa xmm4, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm7, xmm2
pxor xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm4
psrld xmm0, 31
psrld xmm1, 31
pslld xmm7, 1
pslld xmm4, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm4, xmm2
por xmm7, xmm0
por xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm7
movdqa xmm2, xmm7
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm7, xmm0
movdqa xmm2, xmm7
movdqa xmm3, xmm7
movdqa xmm0, xmm7
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm7
pxor xmm4, xmm2
pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
; Encrypt counter
movdqa xmm8, OWORD PTR [r15]
pxor xmm8, xmm4
aesenc xmm8, [r15+16]
aesenc xmm8, [r15+32]
aesenc xmm8, [r15+48]
aesenc xmm8, [r15+64]
aesenc xmm8, [r15+80]
aesenc xmm8, [r15+96]
aesenc xmm8, [r15+112]
aesenc xmm8, [r15+128]
aesenc xmm8, [r15+144]
cmp r10d, 11
movdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_aesni_calc_iv_2_aesenc_avx_last
aesenc xmm8, xmm9
aesenc xmm8, [r15+176]
cmp r10d, 13
movdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_aesni_calc_iv_2_aesenc_avx_last
aesenc xmm8, xmm9
aesenc xmm8, [r15+208]
movdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_encrypt_aesni_calc_iv_2_aesenc_avx_last:
aesenclast xmm8, xmm9
movdqu OWORD PTR [rsp+144], xmm8
L_AES_GCM_encrypt_aesni_iv_done:
; Additional authentication data
mov edx, r11d
cmp edx, 0
je L_AES_GCM_encrypt_aesni_calc_aad_done
xor ecx, ecx
cmp edx, 16
jl L_AES_GCM_encrypt_aesni_calc_aad_lt16
and edx, 4294967280
L_AES_GCM_encrypt_aesni_calc_aad_16_loop:
movdqu xmm8, OWORD PTR [r12+rcx]
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm6, xmm8
pshufd xmm1, xmm6, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm6, 17
pclmulqdq xmm0, xmm6, 0
pxor xmm1, xmm6
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm7, xmm0
movdqa xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm7, xmm2
pxor xmm6, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm6
psrld xmm0, 31
psrld xmm1, 31
pslld xmm7, 1
pslld xmm6, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm6, xmm2
por xmm7, xmm0
por xmm6, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm7
movdqa xmm2, xmm7
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm7, xmm0
movdqa xmm2, xmm7
movdqa xmm3, xmm7
movdqa xmm0, xmm7
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm7
pxor xmm6, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_encrypt_aesni_calc_aad_16_loop
mov edx, r11d
cmp ecx, edx
je L_AES_GCM_encrypt_aesni_calc_aad_done
L_AES_GCM_encrypt_aesni_calc_aad_lt16:
sub rsp, 16
pxor xmm8, xmm8
xor ebx, ebx
movdqu OWORD PTR [rsp], xmm8
L_AES_GCM_encrypt_aesni_calc_aad_loop:
movzx r13d, BYTE PTR [r12+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_encrypt_aesni_calc_aad_loop
movdqu xmm8, OWORD PTR [rsp]
add rsp, 16
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm6, xmm8
pshufd xmm1, xmm6, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm6, 17
pclmulqdq xmm0, xmm6, 0
pxor xmm1, xmm6
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm7, xmm0
movdqa xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm7, xmm2
pxor xmm6, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm6
psrld xmm0, 31
psrld xmm1, 31
pslld xmm7, 1
pslld xmm6, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm6, xmm2
por xmm7, xmm0
por xmm6, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm7
movdqa xmm2, xmm7
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm7, xmm0
movdqa xmm2, xmm7
movdqa xmm3, xmm7
movdqa xmm0, xmm7
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm7
pxor xmm6, xmm2
L_AES_GCM_encrypt_aesni_calc_aad_done:
; Calculate counter and H
pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
movdqa xmm9, xmm5
paddd xmm4, OWORD PTR L_aes_gcm_one
movdqa xmm8, xmm5
movdqu OWORD PTR [rsp+128], xmm4
psrlq xmm9, 63
psllq xmm8, 1
pslldq xmm9, 8
por xmm8, xmm9
pshufd xmm5, xmm5, 255
psrad xmm5, 31
pand xmm5, OWORD PTR L_aes_gcm_mod2_128
pxor xmm5, xmm8
xor rbx, rbx
cmp r9d, 128
mov r13d, r9d
jl L_AES_GCM_encrypt_aesni_done_128
and r13d, 4294967168
movdqa xmm2, xmm6
; H ^ 1
movdqu OWORD PTR [rsp], xmm5
; H ^ 2
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm5, 78
movdqa xmm11, xmm5
movdqa xmm8, xmm5
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm5
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm0, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm0, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm0, xmm14
movdqu OWORD PTR [rsp+16], xmm0
; H ^ 3
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm0, 78
movdqa xmm11, xmm0
movdqa xmm8, xmm0
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm0
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm1, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm1, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm1, xmm14
movdqu OWORD PTR [rsp+32], xmm1
; H ^ 4
pshufd xmm9, xmm0, 78
pshufd xmm10, xmm0, 78
movdqa xmm11, xmm0
movdqa xmm8, xmm0
pclmulqdq xmm11, xmm0, 17
pclmulqdq xmm8, xmm0, 0
pxor xmm9, xmm0
pxor xmm10, xmm0
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm3, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm3, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm3, xmm14
movdqu OWORD PTR [rsp+48], xmm3
; H ^ 5
pshufd xmm9, xmm0, 78
pshufd xmm10, xmm1, 78
movdqa xmm11, xmm1
movdqa xmm8, xmm1
pclmulqdq xmm11, xmm0, 17
pclmulqdq xmm8, xmm0, 0
pxor xmm9, xmm0
pxor xmm10, xmm1
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+64], xmm7
; H ^ 6
pshufd xmm9, xmm1, 78
pshufd xmm10, xmm1, 78
movdqa xmm11, xmm1
movdqa xmm8, xmm1
pclmulqdq xmm11, xmm1, 17
pclmulqdq xmm8, xmm1, 0
pxor xmm9, xmm1
pxor xmm10, xmm1
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+80], xmm7
; H ^ 7
pshufd xmm9, xmm1, 78
pshufd xmm10, xmm3, 78
movdqa xmm11, xmm3
movdqa xmm8, xmm3
pclmulqdq xmm11, xmm1, 17
pclmulqdq xmm8, xmm1, 0
pxor xmm9, xmm1
pxor xmm10, xmm3
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+96], xmm7
; H ^ 8
pshufd xmm9, xmm3, 78
pshufd xmm10, xmm3, 78
movdqa xmm11, xmm3
movdqa xmm8, xmm3
pclmulqdq xmm11, xmm3, 17
pclmulqdq xmm8, xmm3, 0
pxor xmm9, xmm3
pxor xmm10, xmm3
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+112], xmm7
; First 128 bytes of input
movdqu xmm8, OWORD PTR [rsp+128]
movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
movdqa xmm0, xmm8
pshufb xmm8, xmm1
movdqa xmm9, xmm0
paddd xmm9, OWORD PTR L_aes_gcm_one
pshufb xmm9, xmm1
movdqa xmm10, xmm0
paddd xmm10, OWORD PTR L_aes_gcm_two
pshufb xmm10, xmm1
movdqa xmm11, xmm0
paddd xmm11, OWORD PTR L_aes_gcm_three
pshufb xmm11, xmm1
movdqa xmm12, xmm0
paddd xmm12, OWORD PTR L_aes_gcm_four
pshufb xmm12, xmm1
movdqa xmm13, xmm0
paddd xmm13, OWORD PTR L_aes_gcm_five
pshufb xmm13, xmm1
movdqa xmm14, xmm0
paddd xmm14, OWORD PTR L_aes_gcm_six
pshufb xmm14, xmm1
movdqa xmm15, xmm0
paddd xmm15, OWORD PTR L_aes_gcm_seven
pshufb xmm15, xmm1
paddd xmm0, OWORD PTR L_aes_gcm_eight
movdqa xmm7, OWORD PTR [r15]
movdqu OWORD PTR [rsp+128], xmm0
pxor xmm8, xmm7
pxor xmm9, xmm7
pxor xmm10, xmm7
pxor xmm11, xmm7
pxor xmm12, xmm7
pxor xmm13, xmm7
pxor xmm14, xmm7
pxor xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+16]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+32]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+48]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+64]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+80]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+96]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+112]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+128]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+144]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
cmp r10d, 11
movdqa xmm7, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_aesni_enc_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+176]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
cmp r10d, 13
movdqa xmm7, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_aesni_enc_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+208]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+224]
L_AES_GCM_encrypt_aesni_enc_done:
aesenclast xmm8, xmm7
aesenclast xmm9, xmm7
movdqu xmm0, OWORD PTR [rdi]
movdqu xmm1, OWORD PTR [rdi+16]
pxor xmm8, xmm0
pxor xmm9, xmm1
movdqu OWORD PTR [rsi], xmm8
movdqu OWORD PTR [rsi+16], xmm9
aesenclast xmm10, xmm7
aesenclast xmm11, xmm7
movdqu xmm0, OWORD PTR [rdi+32]
movdqu xmm1, OWORD PTR [rdi+48]
pxor xmm10, xmm0
pxor xmm11, xmm1
movdqu OWORD PTR [rsi+32], xmm10
movdqu OWORD PTR [rsi+48], xmm11
aesenclast xmm12, xmm7
aesenclast xmm13, xmm7
movdqu xmm0, OWORD PTR [rdi+64]
movdqu xmm1, OWORD PTR [rdi+80]
pxor xmm12, xmm0
pxor xmm13, xmm1
movdqu OWORD PTR [rsi+64], xmm12
movdqu OWORD PTR [rsi+80], xmm13
aesenclast xmm14, xmm7
aesenclast xmm15, xmm7
movdqu xmm0, OWORD PTR [rdi+96]
movdqu xmm1, OWORD PTR [rdi+112]
pxor xmm14, xmm0
pxor xmm15, xmm1
movdqu OWORD PTR [rsi+96], xmm14
movdqu OWORD PTR [rsi+112], xmm15
cmp r13d, 128
mov ebx, 128
jle L_AES_GCM_encrypt_aesni_end_128
; More 128 bytes of input
L_AES_GCM_encrypt_aesni_ghash_128:
lea rcx, QWORD PTR [rdi+rbx]
lea rdx, QWORD PTR [rsi+rbx]
movdqu xmm8, OWORD PTR [rsp+128]
movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
movdqa xmm0, xmm8
pshufb xmm8, xmm1
movdqa xmm9, xmm0
paddd xmm9, OWORD PTR L_aes_gcm_one
pshufb xmm9, xmm1
movdqa xmm10, xmm0
paddd xmm10, OWORD PTR L_aes_gcm_two
pshufb xmm10, xmm1
movdqa xmm11, xmm0
paddd xmm11, OWORD PTR L_aes_gcm_three
pshufb xmm11, xmm1
movdqa xmm12, xmm0
paddd xmm12, OWORD PTR L_aes_gcm_four
pshufb xmm12, xmm1
movdqa xmm13, xmm0
paddd xmm13, OWORD PTR L_aes_gcm_five
pshufb xmm13, xmm1
movdqa xmm14, xmm0
paddd xmm14, OWORD PTR L_aes_gcm_six
pshufb xmm14, xmm1
movdqa xmm15, xmm0
paddd xmm15, OWORD PTR L_aes_gcm_seven
pshufb xmm15, xmm1
paddd xmm0, OWORD PTR L_aes_gcm_eight
movdqa xmm7, OWORD PTR [r15]
movdqu OWORD PTR [rsp+128], xmm0
pxor xmm8, xmm7
pxor xmm9, xmm7
pxor xmm10, xmm7
pxor xmm11, xmm7
pxor xmm12, xmm7
pxor xmm13, xmm7
pxor xmm14, xmm7
pxor xmm15, xmm7
movdqu xmm7, OWORD PTR [rsp+112]
movdqu xmm0, OWORD PTR [rdx+-128]
aesenc xmm8, [r15+16]
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm0, xmm2
pshufd xmm1, xmm7, 78
pshufd xmm5, xmm0, 78
pxor xmm1, xmm7
pxor xmm5, xmm0
movdqa xmm3, xmm0
pclmulqdq xmm3, xmm7, 17
aesenc xmm9, [r15+16]
aesenc xmm10, [r15+16]
movdqa xmm2, xmm0
pclmulqdq xmm2, xmm7, 0
aesenc xmm11, [r15+16]
aesenc xmm12, [r15+16]
pclmulqdq xmm1, xmm5, 0
aesenc xmm13, [r15+16]
aesenc xmm14, [r15+16]
aesenc xmm15, [r15+16]
pxor xmm1, xmm2
pxor xmm1, xmm3
movdqu xmm7, OWORD PTR [rsp+96]
movdqu xmm0, OWORD PTR [rdx+-112]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+32]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+32]
aesenc xmm10, [r15+32]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+32]
aesenc xmm12, [r15+32]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+32]
aesenc xmm14, [r15+32]
aesenc xmm15, [r15+32]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+80]
movdqu xmm0, OWORD PTR [rdx+-96]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+48]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+48]
aesenc xmm10, [r15+48]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+48]
aesenc xmm12, [r15+48]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+48]
aesenc xmm14, [r15+48]
aesenc xmm15, [r15+48]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+64]
movdqu xmm0, OWORD PTR [rdx+-80]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+64]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+64]
aesenc xmm10, [r15+64]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+64]
aesenc xmm12, [r15+64]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+64]
aesenc xmm14, [r15+64]
aesenc xmm15, [r15+64]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+48]
movdqu xmm0, OWORD PTR [rdx+-64]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+80]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+80]
aesenc xmm10, [r15+80]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+80]
aesenc xmm12, [r15+80]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+80]
aesenc xmm14, [r15+80]
aesenc xmm15, [r15+80]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+32]
movdqu xmm0, OWORD PTR [rdx+-48]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+96]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+96]
aesenc xmm10, [r15+96]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+96]
aesenc xmm12, [r15+96]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+96]
aesenc xmm14, [r15+96]
aesenc xmm15, [r15+96]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm0, OWORD PTR [rdx+-32]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+112]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+112]
aesenc xmm10, [r15+112]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+112]
aesenc xmm12, [r15+112]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+112]
aesenc xmm14, [r15+112]
aesenc xmm15, [r15+112]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp]
movdqu xmm0, OWORD PTR [rdx+-16]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+128]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+128]
aesenc xmm10, [r15+128]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+128]
aesenc xmm12, [r15+128]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+128]
aesenc xmm14, [r15+128]
aesenc xmm15, [r15+128]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqa xmm5, xmm1
psrldq xmm1, 8
pslldq xmm5, 8
aesenc xmm8, [r15+144]
pxor xmm2, xmm5
pxor xmm3, xmm1
movdqa xmm7, xmm2
movdqa xmm4, xmm2
movdqa xmm5, xmm2
aesenc xmm9, [r15+144]
pslld xmm7, 31
pslld xmm4, 30
pslld xmm5, 25
aesenc xmm10, [r15+144]
pxor xmm7, xmm4
pxor xmm7, xmm5
aesenc xmm11, [r15+144]
movdqa xmm4, xmm7
pslldq xmm7, 12
psrldq xmm4, 4
aesenc xmm12, [r15+144]
pxor xmm2, xmm7
movdqa xmm5, xmm2
movdqa xmm1, xmm2
movdqa xmm0, xmm2
aesenc xmm13, [r15+144]
psrld xmm5, 1
psrld xmm1, 2
psrld xmm0, 7
aesenc xmm14, [r15+144]
pxor xmm5, xmm1
pxor xmm5, xmm0
aesenc xmm15, [r15+144]
pxor xmm5, xmm4
pxor xmm2, xmm5
pxor xmm2, xmm3
cmp r10d, 11
movdqa xmm7, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_aesni_aesenc_128_ghash_avx_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+176]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
cmp r10d, 13
movdqa xmm7, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_aesni_aesenc_128_ghash_avx_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+208]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+224]
L_AES_GCM_encrypt_aesni_aesenc_128_ghash_avx_done:
aesenclast xmm8, xmm7
aesenclast xmm9, xmm7
movdqu xmm0, OWORD PTR [rcx]
movdqu xmm1, OWORD PTR [rcx+16]
pxor xmm8, xmm0
pxor xmm9, xmm1
movdqu OWORD PTR [rdx], xmm8
movdqu OWORD PTR [rdx+16], xmm9
aesenclast xmm10, xmm7
aesenclast xmm11, xmm7
movdqu xmm0, OWORD PTR [rcx+32]
movdqu xmm1, OWORD PTR [rcx+48]
pxor xmm10, xmm0
pxor xmm11, xmm1
movdqu OWORD PTR [rdx+32], xmm10
movdqu OWORD PTR [rdx+48], xmm11
aesenclast xmm12, xmm7
aesenclast xmm13, xmm7
movdqu xmm0, OWORD PTR [rcx+64]
movdqu xmm1, OWORD PTR [rcx+80]
pxor xmm12, xmm0
pxor xmm13, xmm1
movdqu OWORD PTR [rdx+64], xmm12
movdqu OWORD PTR [rdx+80], xmm13
aesenclast xmm14, xmm7
aesenclast xmm15, xmm7
movdqu xmm0, OWORD PTR [rcx+96]
movdqu xmm1, OWORD PTR [rcx+112]
pxor xmm14, xmm0
pxor xmm15, xmm1
movdqu OWORD PTR [rdx+96], xmm14
movdqu OWORD PTR [rdx+112], xmm15
add ebx, 128
cmp ebx, r13d
jl L_AES_GCM_encrypt_aesni_ghash_128
L_AES_GCM_encrypt_aesni_end_128:
movdqa xmm4, OWORD PTR L_aes_gcm_bswap_mask
pshufb xmm8, xmm4
pshufb xmm9, xmm4
pshufb xmm10, xmm4
pshufb xmm11, xmm4
pxor xmm8, xmm2
pshufb xmm12, xmm4
pshufb xmm13, xmm4
pshufb xmm14, xmm4
pshufb xmm15, xmm4
movdqu xmm7, OWORD PTR [rsp+112]
pshufd xmm1, xmm8, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm8, 17
pclmulqdq xmm0, xmm8, 0
pxor xmm1, xmm8
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm4, xmm0
movdqa xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+96]
pshufd xmm1, xmm9, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm9, 17
pclmulqdq xmm0, xmm9, 0
pxor xmm1, xmm9
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+80]
pshufd xmm1, xmm10, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm10, 17
pclmulqdq xmm0, xmm10, 0
pxor xmm1, xmm10
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+64]
pshufd xmm1, xmm11, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm11, 17
pclmulqdq xmm0, xmm11, 0
pxor xmm1, xmm11
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+48]
pshufd xmm1, xmm12, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm12, 17
pclmulqdq xmm0, xmm12, 0
pxor xmm1, xmm12
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+32]
pshufd xmm1, xmm13, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm13, 17
pclmulqdq xmm0, xmm13, 0
pxor xmm1, xmm13
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+16]
pshufd xmm1, xmm14, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm14, 17
pclmulqdq xmm0, xmm14, 0
pxor xmm1, xmm14
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp]
pshufd xmm1, xmm15, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm15, 17
pclmulqdq xmm0, xmm15, 0
pxor xmm1, xmm15
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqa xmm0, xmm4
movdqa xmm1, xmm4
movdqa xmm2, xmm4
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm4, xmm0
movdqa xmm2, xmm4
movdqa xmm3, xmm4
movdqa xmm0, xmm4
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm4
pxor xmm6, xmm2
movdqu xmm5, OWORD PTR [rsp]
L_AES_GCM_encrypt_aesni_done_128:
mov edx, r9d
cmp ebx, edx
jge L_AES_GCM_encrypt_aesni_done_enc
mov r13d, r9d
and r13d, 4294967280
cmp ebx, r13d
jge L_AES_GCM_encrypt_aesni_last_block_done
lea rcx, QWORD PTR [rdi+rbx]
lea rdx, QWORD PTR [rsi+rbx]
movdqu xmm8, OWORD PTR [rsp+128]
movdqa xmm9, xmm8
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
paddd xmm9, OWORD PTR L_aes_gcm_one
pxor xmm8, [r15]
movdqu OWORD PTR [rsp+128], xmm9
aesenc xmm8, [r15+16]
aesenc xmm8, [r15+32]
aesenc xmm8, [r15+48]
aesenc xmm8, [r15+64]
aesenc xmm8, [r15+80]
aesenc xmm8, [r15+96]
aesenc xmm8, [r15+112]
aesenc xmm8, [r15+128]
aesenc xmm8, [r15+144]
cmp r10d, 11
movdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_aesni_aesenc_block_aesenc_avx_last
aesenc xmm8, xmm9
aesenc xmm8, [r15+176]
cmp r10d, 13
movdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_aesni_aesenc_block_aesenc_avx_last
aesenc xmm8, xmm9
aesenc xmm8, [r15+208]
movdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_encrypt_aesni_aesenc_block_aesenc_avx_last:
aesenclast xmm8, xmm9
movdqu xmm9, OWORD PTR [rcx]
pxor xmm8, xmm9
movdqu OWORD PTR [rdx], xmm8
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm6, xmm8
add ebx, 16
cmp ebx, r13d
jge L_AES_GCM_encrypt_aesni_last_block_ghash
L_AES_GCM_encrypt_aesni_last_block_start:
lea rcx, QWORD PTR [rdi+rbx]
lea rdx, QWORD PTR [rsi+rbx]
movdqu xmm8, OWORD PTR [rsp+128]
movdqa xmm9, xmm8
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
paddd xmm9, OWORD PTR L_aes_gcm_one
pxor xmm8, [r15]
movdqu OWORD PTR [rsp+128], xmm9
movdqa xmm10, xmm6
pclmulqdq xmm10, xmm5, 16
aesenc xmm8, [r15+16]
aesenc xmm8, [r15+32]
movdqa xmm11, xmm6
pclmulqdq xmm11, xmm5, 1
aesenc xmm8, [r15+48]
aesenc xmm8, [r15+64]
movdqa xmm12, xmm6
pclmulqdq xmm12, xmm5, 0
aesenc xmm8, [r15+80]
movdqa xmm1, xmm6
pclmulqdq xmm1, xmm5, 17
aesenc xmm8, [r15+96]
pxor xmm10, xmm11
movdqa xmm2, xmm10
psrldq xmm10, 8
pslldq xmm2, 8
aesenc xmm8, [r15+112]
movdqa xmm3, xmm1
pxor xmm2, xmm12
pxor xmm3, xmm10
movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
movdqa xmm11, xmm2
pclmulqdq xmm11, xmm0, 16
aesenc xmm8, [r15+128]
pshufd xmm10, xmm2, 78
pxor xmm10, xmm11
movdqa xmm11, xmm10
pclmulqdq xmm11, xmm0, 16
aesenc xmm8, [r15+144]
pshufd xmm6, xmm10, 78
pxor xmm6, xmm11
pxor xmm6, xmm3
cmp r10d, 11
movdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_aesni_aesenc_gfmul_last
aesenc xmm8, xmm9
aesenc xmm8, [r15+176]
cmp r10d, 13
movdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_aesni_aesenc_gfmul_last
aesenc xmm8, xmm9
aesenc xmm8, [r15+208]
movdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_encrypt_aesni_aesenc_gfmul_last:
aesenclast xmm8, xmm9
movdqu xmm9, OWORD PTR [rcx]
pxor xmm8, xmm9
movdqu OWORD PTR [rdx], xmm8
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm6, xmm8
add ebx, 16
cmp ebx, r13d
jl L_AES_GCM_encrypt_aesni_last_block_start
L_AES_GCM_encrypt_aesni_last_block_ghash:
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm6, 78
movdqa xmm11, xmm6
movdqa xmm8, xmm6
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm6
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm6, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm6, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm6, xmm14
L_AES_GCM_encrypt_aesni_last_block_done:
mov ecx, r9d
mov edx, ecx
and ecx, 15
jz L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_done
movdqu xmm4, OWORD PTR [rsp+128]
pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
pxor xmm4, [r15]
aesenc xmm4, [r15+16]
aesenc xmm4, [r15+32]
aesenc xmm4, [r15+48]
aesenc xmm4, [r15+64]
aesenc xmm4, [r15+80]
aesenc xmm4, [r15+96]
aesenc xmm4, [r15+112]
aesenc xmm4, [r15+128]
aesenc xmm4, [r15+144]
cmp r10d, 11
movdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_aesenc_avx_last
aesenc xmm4, xmm9
aesenc xmm4, [r15+176]
cmp r10d, 13
movdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_aesenc_avx_last
aesenc xmm4, xmm9
aesenc xmm4, [r15+208]
movdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_aesenc_avx_last:
aesenclast xmm4, xmm9
sub rsp, 16
xor ecx, ecx
movdqu OWORD PTR [rsp], xmm4
L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_loop:
movzx r13d, BYTE PTR [rdi+rbx]
xor r13b, BYTE PTR [rsp+rcx]
mov BYTE PTR [rsi+rbx], r13b
mov BYTE PTR [rsp+rcx], r13b
inc ebx
inc ecx
cmp ebx, edx
jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_loop
xor r13, r13
cmp ecx, 16
je L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_finish_enc
L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_byte_loop:
mov BYTE PTR [rsp+rcx], r13b
inc ecx
cmp ecx, 16
jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_byte_loop
L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_finish_enc:
movdqu xmm4, OWORD PTR [rsp]
add rsp, 16
pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm6, xmm4
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm6, 78
movdqa xmm11, xmm6
movdqa xmm8, xmm6
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm6
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm6, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm6, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm6, xmm14
L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_done:
L_AES_GCM_encrypt_aesni_done_enc:
mov edx, r9d
mov ecx, r11d
shl rdx, 3
shl rcx, 3
pinsrq xmm0, rdx, 0
pinsrq xmm0, rcx, 1
pxor xmm6, xmm0
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm6, 78
movdqa xmm11, xmm6
movdqa xmm8, xmm6
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm6
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm6, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm6, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm6, xmm14
pshufb xmm6, OWORD PTR L_aes_gcm_bswap_mask
movdqu xmm0, OWORD PTR [rsp+144]
pxor xmm0, xmm6
cmp r14d, 16
je L_AES_GCM_encrypt_aesni_store_tag_16
xor rcx, rcx
movdqu OWORD PTR [rsp], xmm0
L_AES_GCM_encrypt_aesni_store_tag_loop:
movzx r13d, BYTE PTR [rsp+rcx]
mov BYTE PTR [r8+rcx], r13b
inc ecx
cmp ecx, r14d
jne L_AES_GCM_encrypt_aesni_store_tag_loop
jmp L_AES_GCM_encrypt_aesni_store_tag_done
L_AES_GCM_encrypt_aesni_store_tag_16:
movdqu OWORD PTR [r8], xmm0
L_AES_GCM_encrypt_aesni_store_tag_done:
movdqu xmm6, OWORD PTR [rsp+160]
movdqu xmm7, OWORD PTR [rsp+176]
movdqu xmm8, OWORD PTR [rsp+192]
movdqu xmm9, OWORD PTR [rsp+208]
movdqu xmm10, OWORD PTR [rsp+224]
movdqu xmm11, OWORD PTR [rsp+240]
movdqu xmm12, OWORD PTR [rsp+256]
movdqu xmm13, OWORD PTR [rsp+272]
movdqu xmm14, OWORD PTR [rsp+288]
movdqu xmm15, OWORD PTR [rsp+304]
add rsp, 320
pop r15
pop r14
pop rbx
pop r12
pop rsi
pop rdi
pop r13
ret
AES_GCM_encrypt_aesni ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_decrypt_aesni PROC
push r13
push rdi
push rsi
push r12
push rbx
push r14
push r15
push rbp
mov rdi, rcx
mov rsi, rdx
mov r12, r8
mov rax, r9
mov r8, QWORD PTR [rsp+104]
mov r9d, DWORD PTR [rsp+112]
mov r11d, DWORD PTR [rsp+120]
mov ebx, DWORD PTR [rsp+128]
mov r14d, DWORD PTR [rsp+136]
mov r15, QWORD PTR [rsp+144]
mov r10d, DWORD PTR [rsp+152]
mov rbp, QWORD PTR [rsp+160]
sub rsp, 328
movdqu OWORD PTR [rsp+168], xmm6
movdqu OWORD PTR [rsp+184], xmm7
movdqu OWORD PTR [rsp+200], xmm8
movdqu OWORD PTR [rsp+216], xmm9
movdqu OWORD PTR [rsp+232], xmm10
movdqu OWORD PTR [rsp+248], xmm11
movdqu OWORD PTR [rsp+264], xmm12
movdqu OWORD PTR [rsp+280], xmm13
movdqu OWORD PTR [rsp+296], xmm14
movdqu OWORD PTR [rsp+312], xmm15
pxor xmm4, xmm4
pxor xmm6, xmm6
cmp ebx, 12
mov edx, ebx
jne L_AES_GCM_decrypt_aesni_iv_not_12
; # Calculate values when IV is 12 bytes
; Set counter based on IV
mov ecx, 16777216
pinsrq xmm4, QWORD PTR [rax], 0
pinsrd xmm4, DWORD PTR [rax+8], 2
pinsrd xmm4, ecx, 3
; H = Encrypt X(=0) and T = Encrypt counter
movdqa xmm1, xmm4
movdqa xmm5, OWORD PTR [r15]
pxor xmm1, xmm5
movdqa xmm7, OWORD PTR [r15+16]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+32]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+48]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+64]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+80]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+96]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+112]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+128]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+144]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
cmp r10d, 11
movdqa xmm7, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_aesni_calc_iv_12_last
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+176]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
cmp r10d, 13
movdqa xmm7, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_aesni_calc_iv_12_last
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+208]
aesenc xmm5, xmm7
aesenc xmm1, xmm7
movdqa xmm7, OWORD PTR [r15+224]
L_AES_GCM_decrypt_aesni_calc_iv_12_last:
aesenclast xmm5, xmm7
aesenclast xmm1, xmm7
pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
movdqu OWORD PTR [rsp+144], xmm1
jmp L_AES_GCM_decrypt_aesni_iv_done
L_AES_GCM_decrypt_aesni_iv_not_12:
; Calculate values when IV is not 12 bytes
; H = Encrypt X(=0)
movdqa xmm5, OWORD PTR [r15]
aesenc xmm5, [r15+16]
aesenc xmm5, [r15+32]
aesenc xmm5, [r15+48]
aesenc xmm5, [r15+64]
aesenc xmm5, [r15+80]
aesenc xmm5, [r15+96]
aesenc xmm5, [r15+112]
aesenc xmm5, [r15+128]
aesenc xmm5, [r15+144]
cmp r10d, 11
movdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_aesni_calc_iv_1_aesenc_avx_last
aesenc xmm5, xmm9
aesenc xmm5, [r15+176]
cmp r10d, 13
movdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_aesni_calc_iv_1_aesenc_avx_last
aesenc xmm5, xmm9
aesenc xmm5, [r15+208]
movdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_decrypt_aesni_calc_iv_1_aesenc_avx_last:
aesenclast xmm5, xmm9
pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
; Calc counter
; Initialization vector
cmp edx, 0
mov rcx, 0
je L_AES_GCM_decrypt_aesni_calc_iv_done
cmp edx, 16
jl L_AES_GCM_decrypt_aesni_calc_iv_lt16
and edx, 4294967280
L_AES_GCM_decrypt_aesni_calc_iv_16_loop:
movdqu xmm8, OWORD PTR [rax+rcx]
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm4, xmm8
pshufd xmm1, xmm4, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm4, 17
pclmulqdq xmm0, xmm4, 0
pxor xmm1, xmm4
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm7, xmm0
movdqa xmm4, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm7, xmm2
pxor xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm4
psrld xmm0, 31
psrld xmm1, 31
pslld xmm7, 1
pslld xmm4, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm4, xmm2
por xmm7, xmm0
por xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm7
movdqa xmm2, xmm7
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm7, xmm0
movdqa xmm2, xmm7
movdqa xmm3, xmm7
movdqa xmm0, xmm7
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm7
pxor xmm4, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_decrypt_aesni_calc_iv_16_loop
mov edx, ebx
cmp ecx, edx
je L_AES_GCM_decrypt_aesni_calc_iv_done
L_AES_GCM_decrypt_aesni_calc_iv_lt16:
sub rsp, 16
pxor xmm8, xmm8
xor ebx, ebx
movdqu OWORD PTR [rsp], xmm8
L_AES_GCM_decrypt_aesni_calc_iv_loop:
movzx r13d, BYTE PTR [rax+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_decrypt_aesni_calc_iv_loop
movdqu xmm8, OWORD PTR [rsp]
add rsp, 16
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm4, xmm8
pshufd xmm1, xmm4, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm4, 17
pclmulqdq xmm0, xmm4, 0
pxor xmm1, xmm4
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm7, xmm0
movdqa xmm4, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm7, xmm2
pxor xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm4
psrld xmm0, 31
psrld xmm1, 31
pslld xmm7, 1
pslld xmm4, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm4, xmm2
por xmm7, xmm0
por xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm7
movdqa xmm2, xmm7
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm7, xmm0
movdqa xmm2, xmm7
movdqa xmm3, xmm7
movdqa xmm0, xmm7
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm7
pxor xmm4, xmm2
L_AES_GCM_decrypt_aesni_calc_iv_done:
; T = Encrypt counter
pxor xmm0, xmm0
shl edx, 3
pinsrq xmm0, rdx, 0
pxor xmm4, xmm0
pshufd xmm1, xmm4, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm4, 17
pclmulqdq xmm0, xmm4, 0
pxor xmm1, xmm4
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm7, xmm0
movdqa xmm4, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm7, xmm2
pxor xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm4
psrld xmm0, 31
psrld xmm1, 31
pslld xmm7, 1
pslld xmm4, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm4, xmm2
por xmm7, xmm0
por xmm4, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm7
movdqa xmm2, xmm7
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm7, xmm0
movdqa xmm2, xmm7
movdqa xmm3, xmm7
movdqa xmm0, xmm7
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm7
pxor xmm4, xmm2
pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
; Encrypt counter
movdqa xmm8, OWORD PTR [r15]
pxor xmm8, xmm4
aesenc xmm8, [r15+16]
aesenc xmm8, [r15+32]
aesenc xmm8, [r15+48]
aesenc xmm8, [r15+64]
aesenc xmm8, [r15+80]
aesenc xmm8, [r15+96]
aesenc xmm8, [r15+112]
aesenc xmm8, [r15+128]
aesenc xmm8, [r15+144]
cmp r10d, 11
movdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_aesni_calc_iv_2_aesenc_avx_last
aesenc xmm8, xmm9
aesenc xmm8, [r15+176]
cmp r10d, 13
movdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_aesni_calc_iv_2_aesenc_avx_last
aesenc xmm8, xmm9
aesenc xmm8, [r15+208]
movdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_decrypt_aesni_calc_iv_2_aesenc_avx_last:
aesenclast xmm8, xmm9
movdqu OWORD PTR [rsp+144], xmm8
L_AES_GCM_decrypt_aesni_iv_done:
; Additional authentication data
mov edx, r11d
cmp edx, 0
je L_AES_GCM_decrypt_aesni_calc_aad_done
xor ecx, ecx
cmp edx, 16
jl L_AES_GCM_decrypt_aesni_calc_aad_lt16
and edx, 4294967280
L_AES_GCM_decrypt_aesni_calc_aad_16_loop:
movdqu xmm8, OWORD PTR [r12+rcx]
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm6, xmm8
pshufd xmm1, xmm6, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm6, 17
pclmulqdq xmm0, xmm6, 0
pxor xmm1, xmm6
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm7, xmm0
movdqa xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm7, xmm2
pxor xmm6, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm6
psrld xmm0, 31
psrld xmm1, 31
pslld xmm7, 1
pslld xmm6, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm6, xmm2
por xmm7, xmm0
por xmm6, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm7
movdqa xmm2, xmm7
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm7, xmm0
movdqa xmm2, xmm7
movdqa xmm3, xmm7
movdqa xmm0, xmm7
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm7
pxor xmm6, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_decrypt_aesni_calc_aad_16_loop
mov edx, r11d
cmp ecx, edx
je L_AES_GCM_decrypt_aesni_calc_aad_done
L_AES_GCM_decrypt_aesni_calc_aad_lt16:
sub rsp, 16
pxor xmm8, xmm8
xor ebx, ebx
movdqu OWORD PTR [rsp], xmm8
L_AES_GCM_decrypt_aesni_calc_aad_loop:
movzx r13d, BYTE PTR [r12+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_decrypt_aesni_calc_aad_loop
movdqu xmm8, OWORD PTR [rsp]
add rsp, 16
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm6, xmm8
pshufd xmm1, xmm6, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm6, 17
pclmulqdq xmm0, xmm6, 0
pxor xmm1, xmm6
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm7, xmm0
movdqa xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm7, xmm2
pxor xmm6, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm6
psrld xmm0, 31
psrld xmm1, 31
pslld xmm7, 1
pslld xmm6, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm6, xmm2
por xmm7, xmm0
por xmm6, xmm1
movdqa xmm0, xmm7
movdqa xmm1, xmm7
movdqa xmm2, xmm7
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm7, xmm0
movdqa xmm2, xmm7
movdqa xmm3, xmm7
movdqa xmm0, xmm7
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm7
pxor xmm6, xmm2
L_AES_GCM_decrypt_aesni_calc_aad_done:
; Calculate counter and H
pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
movdqa xmm9, xmm5
paddd xmm4, OWORD PTR L_aes_gcm_one
movdqa xmm8, xmm5
movdqu OWORD PTR [rsp+128], xmm4
psrlq xmm9, 63
psllq xmm8, 1
pslldq xmm9, 8
por xmm8, xmm9
pshufd xmm5, xmm5, 255
psrad xmm5, 31
pand xmm5, OWORD PTR L_aes_gcm_mod2_128
pxor xmm5, xmm8
xor ebx, ebx
cmp r9d, 128
mov r13d, r9d
jl L_AES_GCM_decrypt_aesni_done_128
and r13d, 4294967168
movdqa xmm2, xmm6
; H ^ 1
movdqu OWORD PTR [rsp], xmm5
; H ^ 2
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm5, 78
movdqa xmm11, xmm5
movdqa xmm8, xmm5
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm5
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm0, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm0, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm0, xmm14
movdqu OWORD PTR [rsp+16], xmm0
; H ^ 3
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm0, 78
movdqa xmm11, xmm0
movdqa xmm8, xmm0
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm0
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm1, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm1, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm1, xmm14
movdqu OWORD PTR [rsp+32], xmm1
; H ^ 4
pshufd xmm9, xmm0, 78
pshufd xmm10, xmm0, 78
movdqa xmm11, xmm0
movdqa xmm8, xmm0
pclmulqdq xmm11, xmm0, 17
pclmulqdq xmm8, xmm0, 0
pxor xmm9, xmm0
pxor xmm10, xmm0
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm3, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm3, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm3, xmm14
movdqu OWORD PTR [rsp+48], xmm3
; H ^ 5
pshufd xmm9, xmm0, 78
pshufd xmm10, xmm1, 78
movdqa xmm11, xmm1
movdqa xmm8, xmm1
pclmulqdq xmm11, xmm0, 17
pclmulqdq xmm8, xmm0, 0
pxor xmm9, xmm0
pxor xmm10, xmm1
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+64], xmm7
; H ^ 6
pshufd xmm9, xmm1, 78
pshufd xmm10, xmm1, 78
movdqa xmm11, xmm1
movdqa xmm8, xmm1
pclmulqdq xmm11, xmm1, 17
pclmulqdq xmm8, xmm1, 0
pxor xmm9, xmm1
pxor xmm10, xmm1
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+80], xmm7
; H ^ 7
pshufd xmm9, xmm1, 78
pshufd xmm10, xmm3, 78
movdqa xmm11, xmm3
movdqa xmm8, xmm3
pclmulqdq xmm11, xmm1, 17
pclmulqdq xmm8, xmm1, 0
pxor xmm9, xmm1
pxor xmm10, xmm3
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+96], xmm7
; H ^ 8
pshufd xmm9, xmm3, 78
pshufd xmm10, xmm3, 78
movdqa xmm11, xmm3
movdqa xmm8, xmm3
pclmulqdq xmm11, xmm3, 17
pclmulqdq xmm8, xmm3, 0
pxor xmm9, xmm3
pxor xmm10, xmm3
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+112], xmm7
L_AES_GCM_decrypt_aesni_ghash_128:
lea rcx, QWORD PTR [rdi+rbx]
lea rdx, QWORD PTR [rsi+rbx]
movdqu xmm8, OWORD PTR [rsp+128]
movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
movdqa xmm0, xmm8
pshufb xmm8, xmm1
movdqa xmm9, xmm0
paddd xmm9, OWORD PTR L_aes_gcm_one
pshufb xmm9, xmm1
movdqa xmm10, xmm0
paddd xmm10, OWORD PTR L_aes_gcm_two
pshufb xmm10, xmm1
movdqa xmm11, xmm0
paddd xmm11, OWORD PTR L_aes_gcm_three
pshufb xmm11, xmm1
movdqa xmm12, xmm0
paddd xmm12, OWORD PTR L_aes_gcm_four
pshufb xmm12, xmm1
movdqa xmm13, xmm0
paddd xmm13, OWORD PTR L_aes_gcm_five
pshufb xmm13, xmm1
movdqa xmm14, xmm0
paddd xmm14, OWORD PTR L_aes_gcm_six
pshufb xmm14, xmm1
movdqa xmm15, xmm0
paddd xmm15, OWORD PTR L_aes_gcm_seven
pshufb xmm15, xmm1
paddd xmm0, OWORD PTR L_aes_gcm_eight
movdqa xmm7, OWORD PTR [r15]
movdqu OWORD PTR [rsp+128], xmm0
pxor xmm8, xmm7
pxor xmm9, xmm7
pxor xmm10, xmm7
pxor xmm11, xmm7
pxor xmm12, xmm7
pxor xmm13, xmm7
pxor xmm14, xmm7
pxor xmm15, xmm7
movdqu xmm7, OWORD PTR [rsp+112]
movdqu xmm0, OWORD PTR [rcx]
aesenc xmm8, [r15+16]
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm0, xmm2
pshufd xmm1, xmm7, 78
pshufd xmm5, xmm0, 78
pxor xmm1, xmm7
pxor xmm5, xmm0
movdqa xmm3, xmm0
pclmulqdq xmm3, xmm7, 17
aesenc xmm9, [r15+16]
aesenc xmm10, [r15+16]
movdqa xmm2, xmm0
pclmulqdq xmm2, xmm7, 0
aesenc xmm11, [r15+16]
aesenc xmm12, [r15+16]
pclmulqdq xmm1, xmm5, 0
aesenc xmm13, [r15+16]
aesenc xmm14, [r15+16]
aesenc xmm15, [r15+16]
pxor xmm1, xmm2
pxor xmm1, xmm3
movdqu xmm7, OWORD PTR [rsp+96]
movdqu xmm0, OWORD PTR [rcx+16]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+32]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+32]
aesenc xmm10, [r15+32]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+32]
aesenc xmm12, [r15+32]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+32]
aesenc xmm14, [r15+32]
aesenc xmm15, [r15+32]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+80]
movdqu xmm0, OWORD PTR [rcx+32]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+48]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+48]
aesenc xmm10, [r15+48]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+48]
aesenc xmm12, [r15+48]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+48]
aesenc xmm14, [r15+48]
aesenc xmm15, [r15+48]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+64]
movdqu xmm0, OWORD PTR [rcx+48]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+64]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+64]
aesenc xmm10, [r15+64]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+64]
aesenc xmm12, [r15+64]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+64]
aesenc xmm14, [r15+64]
aesenc xmm15, [r15+64]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+48]
movdqu xmm0, OWORD PTR [rcx+64]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+80]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+80]
aesenc xmm10, [r15+80]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+80]
aesenc xmm12, [r15+80]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+80]
aesenc xmm14, [r15+80]
aesenc xmm15, [r15+80]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+32]
movdqu xmm0, OWORD PTR [rcx+80]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+96]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+96]
aesenc xmm10, [r15+96]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+96]
aesenc xmm12, [r15+96]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+96]
aesenc xmm14, [r15+96]
aesenc xmm15, [r15+96]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm0, OWORD PTR [rcx+96]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+112]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+112]
aesenc xmm10, [r15+112]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+112]
aesenc xmm12, [r15+112]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+112]
aesenc xmm14, [r15+112]
aesenc xmm15, [r15+112]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp]
movdqu xmm0, OWORD PTR [rcx+112]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [r15+128]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [r15+128]
aesenc xmm10, [r15+128]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [r15+128]
aesenc xmm12, [r15+128]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [r15+128]
aesenc xmm14, [r15+128]
aesenc xmm15, [r15+128]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqa xmm5, xmm1
psrldq xmm1, 8
pslldq xmm5, 8
aesenc xmm8, [r15+144]
pxor xmm2, xmm5
pxor xmm3, xmm1
movdqa xmm7, xmm2
movdqa xmm4, xmm2
movdqa xmm5, xmm2
aesenc xmm9, [r15+144]
pslld xmm7, 31
pslld xmm4, 30
pslld xmm5, 25
aesenc xmm10, [r15+144]
pxor xmm7, xmm4
pxor xmm7, xmm5
aesenc xmm11, [r15+144]
movdqa xmm4, xmm7
pslldq xmm7, 12
psrldq xmm4, 4
aesenc xmm12, [r15+144]
pxor xmm2, xmm7
movdqa xmm5, xmm2
movdqa xmm1, xmm2
movdqa xmm0, xmm2
aesenc xmm13, [r15+144]
psrld xmm5, 1
psrld xmm1, 2
psrld xmm0, 7
aesenc xmm14, [r15+144]
pxor xmm5, xmm1
pxor xmm5, xmm0
aesenc xmm15, [r15+144]
pxor xmm5, xmm4
pxor xmm2, xmm5
pxor xmm2, xmm3
cmp r10d, 11
movdqa xmm7, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_aesni_aesenc_128_ghash_avx_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+176]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
cmp r10d, 13
movdqa xmm7, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_aesni_aesenc_128_ghash_avx_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+208]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [r15+224]
L_AES_GCM_decrypt_aesni_aesenc_128_ghash_avx_done:
aesenclast xmm8, xmm7
aesenclast xmm9, xmm7
movdqu xmm0, OWORD PTR [rcx]
movdqu xmm1, OWORD PTR [rcx+16]
pxor xmm8, xmm0
pxor xmm9, xmm1
movdqu OWORD PTR [rdx], xmm8
movdqu OWORD PTR [rdx+16], xmm9
aesenclast xmm10, xmm7
aesenclast xmm11, xmm7
movdqu xmm0, OWORD PTR [rcx+32]
movdqu xmm1, OWORD PTR [rcx+48]
pxor xmm10, xmm0
pxor xmm11, xmm1
movdqu OWORD PTR [rdx+32], xmm10
movdqu OWORD PTR [rdx+48], xmm11
aesenclast xmm12, xmm7
aesenclast xmm13, xmm7
movdqu xmm0, OWORD PTR [rcx+64]
movdqu xmm1, OWORD PTR [rcx+80]
pxor xmm12, xmm0
pxor xmm13, xmm1
movdqu OWORD PTR [rdx+64], xmm12
movdqu OWORD PTR [rdx+80], xmm13
aesenclast xmm14, xmm7
aesenclast xmm15, xmm7
movdqu xmm0, OWORD PTR [rcx+96]
movdqu xmm1, OWORD PTR [rcx+112]
pxor xmm14, xmm0
pxor xmm15, xmm1
movdqu OWORD PTR [rdx+96], xmm14
movdqu OWORD PTR [rdx+112], xmm15
add ebx, 128
cmp ebx, r13d
jl L_AES_GCM_decrypt_aesni_ghash_128
movdqa xmm6, xmm2
movdqu xmm5, OWORD PTR [rsp]
L_AES_GCM_decrypt_aesni_done_128:
mov edx, r9d
cmp ebx, edx
jge L_AES_GCM_decrypt_aesni_done_dec
mov r13d, r9d
and r13d, 4294967280
cmp ebx, r13d
jge L_AES_GCM_decrypt_aesni_last_block_done
L_AES_GCM_decrypt_aesni_last_block_start:
lea rcx, QWORD PTR [rdi+rbx]
lea rdx, QWORD PTR [rsi+rbx]
movdqu xmm1, OWORD PTR [rcx]
movdqa xmm0, xmm5
pshufb xmm1, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm1, xmm6
movdqu xmm8, OWORD PTR [rsp+128]
movdqa xmm9, xmm8
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
paddd xmm9, OWORD PTR L_aes_gcm_one
pxor xmm8, [r15]
movdqu OWORD PTR [rsp+128], xmm9
movdqa xmm10, xmm1
pclmulqdq xmm10, xmm0, 16
aesenc xmm8, [r15+16]
aesenc xmm8, [r15+32]
movdqa xmm11, xmm1
pclmulqdq xmm11, xmm0, 1
aesenc xmm8, [r15+48]
aesenc xmm8, [r15+64]
movdqa xmm12, xmm1
pclmulqdq xmm12, xmm0, 0
aesenc xmm8, [r15+80]
movdqa xmm1, xmm1
pclmulqdq xmm1, xmm0, 17
aesenc xmm8, [r15+96]
pxor xmm10, xmm11
movdqa xmm2, xmm10
psrldq xmm10, 8
pslldq xmm2, 8
aesenc xmm8, [r15+112]
movdqa xmm3, xmm1
pxor xmm2, xmm12
pxor xmm3, xmm10
movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
movdqa xmm11, xmm2
pclmulqdq xmm11, xmm0, 16
aesenc xmm8, [r15+128]
pshufd xmm10, xmm2, 78
pxor xmm10, xmm11
movdqa xmm11, xmm10
pclmulqdq xmm11, xmm0, 16
aesenc xmm8, [r15+144]
pshufd xmm6, xmm10, 78
pxor xmm6, xmm11
pxor xmm6, xmm3
cmp r10d, 11
movdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_aesni_aesenc_gfmul_last
aesenc xmm8, xmm9
aesenc xmm8, [r15+176]
cmp r10d, 13
movdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_aesni_aesenc_gfmul_last
aesenc xmm8, xmm9
aesenc xmm8, [r15+208]
movdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_decrypt_aesni_aesenc_gfmul_last:
aesenclast xmm8, xmm9
movdqu xmm9, OWORD PTR [rcx]
pxor xmm8, xmm9
movdqu OWORD PTR [rdx], xmm8
add ebx, 16
cmp ebx, r13d
jl L_AES_GCM_decrypt_aesni_last_block_start
L_AES_GCM_decrypt_aesni_last_block_done:
mov ecx, r9d
mov edx, ecx
and ecx, 15
jz L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_done
movdqu xmm4, OWORD PTR [rsp+128]
pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
pxor xmm4, [r15]
aesenc xmm4, [r15+16]
aesenc xmm4, [r15+32]
aesenc xmm4, [r15+48]
aesenc xmm4, [r15+64]
aesenc xmm4, [r15+80]
aesenc xmm4, [r15+96]
aesenc xmm4, [r15+112]
aesenc xmm4, [r15+128]
aesenc xmm4, [r15+144]
cmp r10d, 11
movdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_aesenc_avx_last
aesenc xmm4, xmm9
aesenc xmm4, [r15+176]
cmp r10d, 13
movdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_aesenc_avx_last
aesenc xmm4, xmm9
aesenc xmm4, [r15+208]
movdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_aesenc_avx_last:
aesenclast xmm4, xmm9
sub rsp, 32
xor ecx, ecx
movdqu OWORD PTR [rsp], xmm4
pxor xmm0, xmm0
movdqu OWORD PTR [rsp+16], xmm0
L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_loop:
movzx r13d, BYTE PTR [rdi+rbx]
mov BYTE PTR [rsp+rcx+16], r13b
xor r13b, BYTE PTR [rsp+rcx]
mov BYTE PTR [rsi+rbx], r13b
inc ebx
inc ecx
cmp ebx, edx
jl L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_loop
movdqu xmm4, OWORD PTR [rsp+16]
add rsp, 32
pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm6, xmm4
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm6, 78
movdqa xmm11, xmm6
movdqa xmm8, xmm6
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm6
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm6, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm6, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm6, xmm14
L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_done:
L_AES_GCM_decrypt_aesni_done_dec:
mov edx, r9d
mov ecx, r11d
shl rdx, 3
shl rcx, 3
pinsrq xmm0, rdx, 0
pinsrq xmm0, rcx, 1
pxor xmm6, xmm0
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm6, 78
movdqa xmm11, xmm6
movdqa xmm8, xmm6
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm6
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm6, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm6, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm6, xmm14
pshufb xmm6, OWORD PTR L_aes_gcm_bswap_mask
movdqu xmm0, OWORD PTR [rsp+144]
pxor xmm0, xmm6
cmp r14d, 16
je L_AES_GCM_decrypt_aesni_cmp_tag_16
sub rsp, 16
xor rcx, rcx
xor rbx, rbx
movdqu OWORD PTR [rsp], xmm0
L_AES_GCM_decrypt_aesni_cmp_tag_loop:
movzx r13d, BYTE PTR [rsp+rcx]
xor r13b, BYTE PTR [r8+rcx]
or bl, r13b
inc ecx
cmp ecx, r14d
jne L_AES_GCM_decrypt_aesni_cmp_tag_loop
cmp bl, 0
sete bl
add rsp, 16
xor rcx, rcx
jmp L_AES_GCM_decrypt_aesni_cmp_tag_done
L_AES_GCM_decrypt_aesni_cmp_tag_16:
movdqu xmm1, OWORD PTR [r8]
pcmpeqb xmm0, xmm1
pmovmskb rdx, xmm0
; %%edx == 0xFFFF then return 1 else => return 0
xor ebx, ebx
cmp edx, 65535
sete bl
L_AES_GCM_decrypt_aesni_cmp_tag_done:
mov DWORD PTR [rbp], ebx
movdqu xmm6, OWORD PTR [rsp+168]
movdqu xmm7, OWORD PTR [rsp+184]
movdqu xmm8, OWORD PTR [rsp+200]
movdqu xmm9, OWORD PTR [rsp+216]
movdqu xmm10, OWORD PTR [rsp+232]
movdqu xmm11, OWORD PTR [rsp+248]
movdqu xmm12, OWORD PTR [rsp+264]
movdqu xmm13, OWORD PTR [rsp+280]
movdqu xmm14, OWORD PTR [rsp+296]
movdqu xmm15, OWORD PTR [rsp+312]
add rsp, 328
pop rbp
pop r15
pop r14
pop rbx
pop r12
pop rsi
pop rdi
pop r13
ret
AES_GCM_decrypt_aesni ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_init_aesni PROC
push rdi
push rsi
push r12
push r13
push r14
mov rdi, rcx
mov rsi, rdx
mov r10, r8
mov r11d, r9d
mov rax, QWORD PTR [rsp+80]
mov r8, QWORD PTR [rsp+88]
mov r9, QWORD PTR [rsp+96]
sub rsp, 80
movdqu OWORD PTR [rsp+16], xmm6
movdqu OWORD PTR [rsp+32], xmm7
movdqu OWORD PTR [rsp+48], xmm8
movdqu OWORD PTR [rsp+64], xmm15
pxor xmm4, xmm4
mov edx, r11d
cmp edx, 12
jne L_AES_GCM_init_aesni_iv_not_12
; # Calculate values when IV is 12 bytes
; Set counter based on IV
mov ecx, 16777216
pinsrq xmm4, QWORD PTR [r10], 0
pinsrd xmm4, DWORD PTR [r10+8], 2
pinsrd xmm4, ecx, 3
; H = Encrypt X(=0) and T = Encrypt counter
movdqa xmm1, xmm4
movdqa xmm5, OWORD PTR [rdi]
pxor xmm1, xmm5
movdqa xmm6, OWORD PTR [rdi+16]
aesenc xmm5, xmm6
aesenc xmm1, xmm6
movdqa xmm6, OWORD PTR [rdi+32]
aesenc xmm5, xmm6
aesenc xmm1, xmm6
movdqa xmm6, OWORD PTR [rdi+48]
aesenc xmm5, xmm6
aesenc xmm1, xmm6
movdqa xmm6, OWORD PTR [rdi+64]
aesenc xmm5, xmm6
aesenc xmm1, xmm6
movdqa xmm6, OWORD PTR [rdi+80]
aesenc xmm5, xmm6
aesenc xmm1, xmm6
movdqa xmm6, OWORD PTR [rdi+96]
aesenc xmm5, xmm6
aesenc xmm1, xmm6
movdqa xmm6, OWORD PTR [rdi+112]
aesenc xmm5, xmm6
aesenc xmm1, xmm6
movdqa xmm6, OWORD PTR [rdi+128]
aesenc xmm5, xmm6
aesenc xmm1, xmm6
movdqa xmm6, OWORD PTR [rdi+144]
aesenc xmm5, xmm6
aesenc xmm1, xmm6
cmp esi, 11
movdqa xmm6, OWORD PTR [rdi+160]
jl L_AES_GCM_init_aesni_calc_iv_12_last
aesenc xmm5, xmm6
aesenc xmm1, xmm6
movdqa xmm6, OWORD PTR [rdi+176]
aesenc xmm5, xmm6
aesenc xmm1, xmm6
cmp esi, 13
movdqa xmm6, OWORD PTR [rdi+192]
jl L_AES_GCM_init_aesni_calc_iv_12_last
aesenc xmm5, xmm6
aesenc xmm1, xmm6
movdqa xmm6, OWORD PTR [rdi+208]
aesenc xmm5, xmm6
aesenc xmm1, xmm6
movdqa xmm6, OWORD PTR [rdi+224]
L_AES_GCM_init_aesni_calc_iv_12_last:
aesenclast xmm5, xmm6
aesenclast xmm1, xmm6
pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
movdqu xmm15, xmm1
jmp L_AES_GCM_init_aesni_iv_done
L_AES_GCM_init_aesni_iv_not_12:
; Calculate values when IV is not 12 bytes
; H = Encrypt X(=0)
movdqa xmm5, OWORD PTR [rdi]
aesenc xmm5, [rdi+16]
aesenc xmm5, [rdi+32]
aesenc xmm5, [rdi+48]
aesenc xmm5, [rdi+64]
aesenc xmm5, [rdi+80]
aesenc xmm5, [rdi+96]
aesenc xmm5, [rdi+112]
aesenc xmm5, [rdi+128]
aesenc xmm5, [rdi+144]
cmp esi, 11
movdqa xmm8, OWORD PTR [rdi+160]
jl L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last
aesenc xmm5, xmm8
aesenc xmm5, [rdi+176]
cmp esi, 13
movdqa xmm8, OWORD PTR [rdi+192]
jl L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last
aesenc xmm5, xmm8
aesenc xmm5, [rdi+208]
movdqa xmm8, OWORD PTR [rdi+224]
L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last:
aesenclast xmm5, xmm8
pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
; Calc counter
; Initialization vector
cmp edx, 0
mov rcx, 0
je L_AES_GCM_init_aesni_calc_iv_done
cmp edx, 16
jl L_AES_GCM_init_aesni_calc_iv_lt16
and edx, 4294967280
L_AES_GCM_init_aesni_calc_iv_16_loop:
movdqu xmm7, OWORD PTR [r10+rcx]
pshufb xmm7, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm4, xmm7
pshufd xmm1, xmm4, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm4, 17
pclmulqdq xmm0, xmm4, 0
pxor xmm1, xmm4
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm6, xmm0
movdqa xmm4, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm6, xmm2
pxor xmm4, xmm1
movdqa xmm0, xmm6
movdqa xmm1, xmm4
psrld xmm0, 31
psrld xmm1, 31
pslld xmm6, 1
pslld xmm4, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm4, xmm2
por xmm6, xmm0
por xmm4, xmm1
movdqa xmm0, xmm6
movdqa xmm1, xmm6
movdqa xmm2, xmm6
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm6, xmm0
movdqa xmm2, xmm6
movdqa xmm3, xmm6
movdqa xmm0, xmm6
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm6
pxor xmm4, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_init_aesni_calc_iv_16_loop
mov edx, r11d
cmp ecx, edx
je L_AES_GCM_init_aesni_calc_iv_done
L_AES_GCM_init_aesni_calc_iv_lt16:
sub rsp, 16
pxor xmm7, xmm7
xor r13d, r13d
movdqu OWORD PTR [rsp], xmm7
L_AES_GCM_init_aesni_calc_iv_loop:
movzx r12d, BYTE PTR [r10+rcx]
mov BYTE PTR [rsp+r13], r12b
inc ecx
inc r13d
cmp ecx, edx
jl L_AES_GCM_init_aesni_calc_iv_loop
movdqu xmm7, OWORD PTR [rsp]
add rsp, 16
pshufb xmm7, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm4, xmm7
pshufd xmm1, xmm4, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm4, 17
pclmulqdq xmm0, xmm4, 0
pxor xmm1, xmm4
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm6, xmm0
movdqa xmm4, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm6, xmm2
pxor xmm4, xmm1
movdqa xmm0, xmm6
movdqa xmm1, xmm4
psrld xmm0, 31
psrld xmm1, 31
pslld xmm6, 1
pslld xmm4, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm4, xmm2
por xmm6, xmm0
por xmm4, xmm1
movdqa xmm0, xmm6
movdqa xmm1, xmm6
movdqa xmm2, xmm6
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm6, xmm0
movdqa xmm2, xmm6
movdqa xmm3, xmm6
movdqa xmm0, xmm6
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm6
pxor xmm4, xmm2
L_AES_GCM_init_aesni_calc_iv_done:
; T = Encrypt counter
pxor xmm0, xmm0
shl edx, 3
pinsrq xmm0, rdx, 0
pxor xmm4, xmm0
pshufd xmm1, xmm4, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm4, 17
pclmulqdq xmm0, xmm4, 0
pxor xmm1, xmm4
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm6, xmm0
movdqa xmm4, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm6, xmm2
pxor xmm4, xmm1
movdqa xmm0, xmm6
movdqa xmm1, xmm4
psrld xmm0, 31
psrld xmm1, 31
pslld xmm6, 1
pslld xmm4, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm4, xmm2
por xmm6, xmm0
por xmm4, xmm1
movdqa xmm0, xmm6
movdqa xmm1, xmm6
movdqa xmm2, xmm6
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm6, xmm0
movdqa xmm2, xmm6
movdqa xmm3, xmm6
movdqa xmm0, xmm6
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm6
pxor xmm4, xmm2
pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
; Encrypt counter
movdqa xmm7, OWORD PTR [rdi]
pxor xmm7, xmm4
aesenc xmm7, [rdi+16]
aesenc xmm7, [rdi+32]
aesenc xmm7, [rdi+48]
aesenc xmm7, [rdi+64]
aesenc xmm7, [rdi+80]
aesenc xmm7, [rdi+96]
aesenc xmm7, [rdi+112]
aesenc xmm7, [rdi+128]
aesenc xmm7, [rdi+144]
cmp esi, 11
movdqa xmm8, OWORD PTR [rdi+160]
jl L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last
aesenc xmm7, xmm8
aesenc xmm7, [rdi+176]
cmp esi, 13
movdqa xmm8, OWORD PTR [rdi+192]
jl L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last
aesenc xmm7, xmm8
aesenc xmm7, [rdi+208]
movdqa xmm8, OWORD PTR [rdi+224]
L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last:
aesenclast xmm7, xmm8
movdqu xmm15, xmm7
L_AES_GCM_init_aesni_iv_done:
movdqa OWORD PTR [r9], xmm15
pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
paddd xmm4, OWORD PTR L_aes_gcm_one
movdqa OWORD PTR [rax], xmm5
movdqa OWORD PTR [r8], xmm4
movdqu xmm6, OWORD PTR [rsp+16]
movdqu xmm7, OWORD PTR [rsp+32]
movdqu xmm8, OWORD PTR [rsp+48]
movdqu xmm15, OWORD PTR [rsp+64]
add rsp, 80
pop r14
pop r13
pop r12
pop rsi
pop rdi
ret
AES_GCM_init_aesni ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_aad_update_aesni PROC
mov rax, rcx
sub rsp, 32
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqa xmm5, OWORD PTR [r8]
movdqa xmm6, OWORD PTR [r9]
xor ecx, ecx
L_AES_GCM_aad_update_aesni_16_loop:
movdqu xmm7, OWORD PTR [rax+rcx]
pshufb xmm7, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm5, xmm7
pshufd xmm1, xmm5, 78
pshufd xmm2, xmm6, 78
movdqa xmm3, xmm6
movdqa xmm0, xmm6
pclmulqdq xmm3, xmm5, 17
pclmulqdq xmm0, xmm5, 0
pxor xmm1, xmm5
pxor xmm2, xmm6
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm4, xmm0
movdqa xmm5, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm5, xmm1
movdqa xmm0, xmm4
movdqa xmm1, xmm5
psrld xmm0, 31
psrld xmm1, 31
pslld xmm4, 1
pslld xmm5, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm5, xmm2
por xmm4, xmm0
por xmm5, xmm1
movdqa xmm0, xmm4
movdqa xmm1, xmm4
movdqa xmm2, xmm4
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm4, xmm0
movdqa xmm2, xmm4
movdqa xmm3, xmm4
movdqa xmm0, xmm4
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm4
pxor xmm5, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_aad_update_aesni_16_loop
movdqa OWORD PTR [r8], xmm5
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
add rsp, 32
ret
AES_GCM_aad_update_aesni ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_block_aesni PROC
mov r10, r8
mov r11, r9
mov rax, QWORD PTR [rsp+40]
movdqu xmm0, OWORD PTR [rax]
movdqa xmm1, xmm0
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_epi64
paddd xmm1, OWORD PTR L_aes_gcm_one
pxor xmm0, [rcx]
movdqu OWORD PTR [rax], xmm1
aesenc xmm0, [rcx+16]
aesenc xmm0, [rcx+32]
aesenc xmm0, [rcx+48]
aesenc xmm0, [rcx+64]
aesenc xmm0, [rcx+80]
aesenc xmm0, [rcx+96]
aesenc xmm0, [rcx+112]
aesenc xmm0, [rcx+128]
aesenc xmm0, [rcx+144]
cmp edx, 11
movdqa xmm1, OWORD PTR [rcx+160]
jl L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last
aesenc xmm0, xmm1
aesenc xmm0, [rcx+176]
cmp edx, 13
movdqa xmm1, OWORD PTR [rcx+192]
jl L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last
aesenc xmm0, xmm1
aesenc xmm0, [rcx+208]
movdqa xmm1, OWORD PTR [rcx+224]
L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last:
aesenclast xmm0, xmm1
movdqu xmm1, OWORD PTR [r11]
pxor xmm0, xmm1
movdqu OWORD PTR [r10], xmm0
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
ret
AES_GCM_encrypt_block_aesni ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_ghash_block_aesni PROC
sub rsp, 32
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqa xmm4, OWORD PTR [rdx]
movdqa xmm5, OWORD PTR [r8]
movdqu xmm7, OWORD PTR [rcx]
pshufb xmm7, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm4, xmm7
pshufd xmm1, xmm4, 78
pshufd xmm2, xmm5, 78
movdqa xmm3, xmm5
movdqa xmm0, xmm5
pclmulqdq xmm3, xmm4, 17
pclmulqdq xmm0, xmm4, 0
pxor xmm1, xmm4
pxor xmm2, xmm5
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm6, xmm0
movdqa xmm4, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm6, xmm2
pxor xmm4, xmm1
movdqa xmm0, xmm6
movdqa xmm1, xmm4
psrld xmm0, 31
psrld xmm1, 31
pslld xmm6, 1
pslld xmm4, 1
movdqa xmm2, xmm0
pslldq xmm0, 4
psrldq xmm2, 12
pslldq xmm1, 4
por xmm4, xmm2
por xmm6, xmm0
por xmm4, xmm1
movdqa xmm0, xmm6
movdqa xmm1, xmm6
movdqa xmm2, xmm6
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm6, xmm0
movdqa xmm2, xmm6
movdqa xmm3, xmm6
movdqa xmm0, xmm6
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm6
pxor xmm4, xmm2
movdqa OWORD PTR [rdx], xmm4
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
add rsp, 32
ret
AES_GCM_ghash_block_aesni ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_update_aesni PROC
push r13
push r12
push r14
push r15
push rdi
mov rax, rcx
mov r10, r8
mov r8d, edx
mov r11, r9
mov r9d, DWORD PTR [rsp+80]
mov r12, QWORD PTR [rsp+88]
mov r14, QWORD PTR [rsp+96]
mov r15, QWORD PTR [rsp+104]
sub rsp, 320
movdqu OWORD PTR [rsp+160], xmm6
movdqu OWORD PTR [rsp+176], xmm7
movdqu OWORD PTR [rsp+192], xmm8
movdqu OWORD PTR [rsp+208], xmm9
movdqu OWORD PTR [rsp+224], xmm10
movdqu OWORD PTR [rsp+240], xmm11
movdqu OWORD PTR [rsp+256], xmm12
movdqu OWORD PTR [rsp+272], xmm13
movdqu OWORD PTR [rsp+288], xmm14
movdqu OWORD PTR [rsp+304], xmm15
movdqa xmm6, OWORD PTR [r12]
movdqa xmm5, OWORD PTR [r14]
movdqa xmm9, xmm5
movdqa xmm8, xmm5
psrlq xmm9, 63
psllq xmm8, 1
pslldq xmm9, 8
por xmm8, xmm9
pshufd xmm5, xmm5, 255
psrad xmm5, 31
pand xmm5, OWORD PTR L_aes_gcm_mod2_128
pxor xmm5, xmm8
xor rdi, rdi
cmp r9d, 128
mov r13d, r9d
jl L_AES_GCM_encrypt_update_aesni_done_128
and r13d, 4294967168
movdqa xmm2, xmm6
; H ^ 1
movdqu OWORD PTR [rsp], xmm5
; H ^ 2
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm5, 78
movdqa xmm11, xmm5
movdqa xmm8, xmm5
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm5
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm0, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm0, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm0, xmm14
movdqu OWORD PTR [rsp+16], xmm0
; H ^ 3
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm0, 78
movdqa xmm11, xmm0
movdqa xmm8, xmm0
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm0
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm1, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm1, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm1, xmm14
movdqu OWORD PTR [rsp+32], xmm1
; H ^ 4
pshufd xmm9, xmm0, 78
pshufd xmm10, xmm0, 78
movdqa xmm11, xmm0
movdqa xmm8, xmm0
pclmulqdq xmm11, xmm0, 17
pclmulqdq xmm8, xmm0, 0
pxor xmm9, xmm0
pxor xmm10, xmm0
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm3, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm3, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm3, xmm14
movdqu OWORD PTR [rsp+48], xmm3
; H ^ 5
pshufd xmm9, xmm0, 78
pshufd xmm10, xmm1, 78
movdqa xmm11, xmm1
movdqa xmm8, xmm1
pclmulqdq xmm11, xmm0, 17
pclmulqdq xmm8, xmm0, 0
pxor xmm9, xmm0
pxor xmm10, xmm1
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+64], xmm7
; H ^ 6
pshufd xmm9, xmm1, 78
pshufd xmm10, xmm1, 78
movdqa xmm11, xmm1
movdqa xmm8, xmm1
pclmulqdq xmm11, xmm1, 17
pclmulqdq xmm8, xmm1, 0
pxor xmm9, xmm1
pxor xmm10, xmm1
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+80], xmm7
; H ^ 7
pshufd xmm9, xmm1, 78
pshufd xmm10, xmm3, 78
movdqa xmm11, xmm3
movdqa xmm8, xmm3
pclmulqdq xmm11, xmm1, 17
pclmulqdq xmm8, xmm1, 0
pxor xmm9, xmm1
pxor xmm10, xmm3
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+96], xmm7
; H ^ 8
pshufd xmm9, xmm3, 78
pshufd xmm10, xmm3, 78
movdqa xmm11, xmm3
movdqa xmm8, xmm3
pclmulqdq xmm11, xmm3, 17
pclmulqdq xmm8, xmm3, 0
pxor xmm9, xmm3
pxor xmm10, xmm3
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+112], xmm7
; First 128 bytes of input
movdqu xmm8, OWORD PTR [r15]
movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
movdqa xmm0, xmm8
pshufb xmm8, xmm1
movdqa xmm9, xmm0
paddd xmm9, OWORD PTR L_aes_gcm_one
pshufb xmm9, xmm1
movdqa xmm10, xmm0
paddd xmm10, OWORD PTR L_aes_gcm_two
pshufb xmm10, xmm1
movdqa xmm11, xmm0
paddd xmm11, OWORD PTR L_aes_gcm_three
pshufb xmm11, xmm1
movdqa xmm12, xmm0
paddd xmm12, OWORD PTR L_aes_gcm_four
pshufb xmm12, xmm1
movdqa xmm13, xmm0
paddd xmm13, OWORD PTR L_aes_gcm_five
pshufb xmm13, xmm1
movdqa xmm14, xmm0
paddd xmm14, OWORD PTR L_aes_gcm_six
pshufb xmm14, xmm1
movdqa xmm15, xmm0
paddd xmm15, OWORD PTR L_aes_gcm_seven
pshufb xmm15, xmm1
paddd xmm0, OWORD PTR L_aes_gcm_eight
movdqa xmm7, OWORD PTR [rax]
movdqu OWORD PTR [r15], xmm0
pxor xmm8, xmm7
pxor xmm9, xmm7
pxor xmm10, xmm7
pxor xmm11, xmm7
pxor xmm12, xmm7
pxor xmm13, xmm7
pxor xmm14, xmm7
pxor xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+16]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+32]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+48]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+64]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+80]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+96]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+112]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+128]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+144]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
cmp r8d, 11
movdqa xmm7, OWORD PTR [rax+160]
jl L_AES_GCM_encrypt_update_aesni_enc_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+176]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
cmp r8d, 13
movdqa xmm7, OWORD PTR [rax+192]
jl L_AES_GCM_encrypt_update_aesni_enc_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+208]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_aesni_enc_done:
aesenclast xmm8, xmm7
aesenclast xmm9, xmm7
movdqu xmm0, OWORD PTR [r11]
movdqu xmm1, OWORD PTR [r11+16]
pxor xmm8, xmm0
pxor xmm9, xmm1
movdqu OWORD PTR [r10], xmm8
movdqu OWORD PTR [r10+16], xmm9
aesenclast xmm10, xmm7
aesenclast xmm11, xmm7
movdqu xmm0, OWORD PTR [r11+32]
movdqu xmm1, OWORD PTR [r11+48]
pxor xmm10, xmm0
pxor xmm11, xmm1
movdqu OWORD PTR [r10+32], xmm10
movdqu OWORD PTR [r10+48], xmm11
aesenclast xmm12, xmm7
aesenclast xmm13, xmm7
movdqu xmm0, OWORD PTR [r11+64]
movdqu xmm1, OWORD PTR [r11+80]
pxor xmm12, xmm0
pxor xmm13, xmm1
movdqu OWORD PTR [r10+64], xmm12
movdqu OWORD PTR [r10+80], xmm13
aesenclast xmm14, xmm7
aesenclast xmm15, xmm7
movdqu xmm0, OWORD PTR [r11+96]
movdqu xmm1, OWORD PTR [r11+112]
pxor xmm14, xmm0
pxor xmm15, xmm1
movdqu OWORD PTR [r10+96], xmm14
movdqu OWORD PTR [r10+112], xmm15
cmp r13d, 128
mov edi, 128
jle L_AES_GCM_encrypt_update_aesni_end_128
; More 128 bytes of input
L_AES_GCM_encrypt_update_aesni_ghash_128:
lea rcx, QWORD PTR [r11+rdi]
lea rdx, QWORD PTR [r10+rdi]
movdqu xmm8, OWORD PTR [r15]
movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
movdqa xmm0, xmm8
pshufb xmm8, xmm1
movdqa xmm9, xmm0
paddd xmm9, OWORD PTR L_aes_gcm_one
pshufb xmm9, xmm1
movdqa xmm10, xmm0
paddd xmm10, OWORD PTR L_aes_gcm_two
pshufb xmm10, xmm1
movdqa xmm11, xmm0
paddd xmm11, OWORD PTR L_aes_gcm_three
pshufb xmm11, xmm1
movdqa xmm12, xmm0
paddd xmm12, OWORD PTR L_aes_gcm_four
pshufb xmm12, xmm1
movdqa xmm13, xmm0
paddd xmm13, OWORD PTR L_aes_gcm_five
pshufb xmm13, xmm1
movdqa xmm14, xmm0
paddd xmm14, OWORD PTR L_aes_gcm_six
pshufb xmm14, xmm1
movdqa xmm15, xmm0
paddd xmm15, OWORD PTR L_aes_gcm_seven
pshufb xmm15, xmm1
paddd xmm0, OWORD PTR L_aes_gcm_eight
movdqa xmm7, OWORD PTR [rax]
movdqu OWORD PTR [r15], xmm0
pxor xmm8, xmm7
pxor xmm9, xmm7
pxor xmm10, xmm7
pxor xmm11, xmm7
pxor xmm12, xmm7
pxor xmm13, xmm7
pxor xmm14, xmm7
pxor xmm15, xmm7
movdqu xmm7, OWORD PTR [rsp+112]
movdqu xmm0, OWORD PTR [rdx+-128]
aesenc xmm8, [rax+16]
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm0, xmm2
pshufd xmm1, xmm7, 78
pshufd xmm5, xmm0, 78
pxor xmm1, xmm7
pxor xmm5, xmm0
movdqa xmm3, xmm0
pclmulqdq xmm3, xmm7, 17
aesenc xmm9, [rax+16]
aesenc xmm10, [rax+16]
movdqa xmm2, xmm0
pclmulqdq xmm2, xmm7, 0
aesenc xmm11, [rax+16]
aesenc xmm12, [rax+16]
pclmulqdq xmm1, xmm5, 0
aesenc xmm13, [rax+16]
aesenc xmm14, [rax+16]
aesenc xmm15, [rax+16]
pxor xmm1, xmm2
pxor xmm1, xmm3
movdqu xmm7, OWORD PTR [rsp+96]
movdqu xmm0, OWORD PTR [rdx+-112]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+32]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+32]
aesenc xmm10, [rax+32]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+32]
aesenc xmm12, [rax+32]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+32]
aesenc xmm14, [rax+32]
aesenc xmm15, [rax+32]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+80]
movdqu xmm0, OWORD PTR [rdx+-96]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+48]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+48]
aesenc xmm10, [rax+48]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+48]
aesenc xmm12, [rax+48]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+48]
aesenc xmm14, [rax+48]
aesenc xmm15, [rax+48]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+64]
movdqu xmm0, OWORD PTR [rdx+-80]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+64]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+64]
aesenc xmm10, [rax+64]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+64]
aesenc xmm12, [rax+64]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+64]
aesenc xmm14, [rax+64]
aesenc xmm15, [rax+64]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+48]
movdqu xmm0, OWORD PTR [rdx+-64]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+80]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+80]
aesenc xmm10, [rax+80]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+80]
aesenc xmm12, [rax+80]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+80]
aesenc xmm14, [rax+80]
aesenc xmm15, [rax+80]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+32]
movdqu xmm0, OWORD PTR [rdx+-48]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+96]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+96]
aesenc xmm10, [rax+96]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+96]
aesenc xmm12, [rax+96]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+96]
aesenc xmm14, [rax+96]
aesenc xmm15, [rax+96]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm0, OWORD PTR [rdx+-32]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+112]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+112]
aesenc xmm10, [rax+112]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+112]
aesenc xmm12, [rax+112]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+112]
aesenc xmm14, [rax+112]
aesenc xmm15, [rax+112]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp]
movdqu xmm0, OWORD PTR [rdx+-16]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+128]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+128]
aesenc xmm10, [rax+128]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+128]
aesenc xmm12, [rax+128]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+128]
aesenc xmm14, [rax+128]
aesenc xmm15, [rax+128]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqa xmm5, xmm1
psrldq xmm1, 8
pslldq xmm5, 8
aesenc xmm8, [rax+144]
pxor xmm2, xmm5
pxor xmm3, xmm1
movdqa xmm7, xmm2
movdqa xmm4, xmm2
movdqa xmm5, xmm2
aesenc xmm9, [rax+144]
pslld xmm7, 31
pslld xmm4, 30
pslld xmm5, 25
aesenc xmm10, [rax+144]
pxor xmm7, xmm4
pxor xmm7, xmm5
aesenc xmm11, [rax+144]
movdqa xmm4, xmm7
pslldq xmm7, 12
psrldq xmm4, 4
aesenc xmm12, [rax+144]
pxor xmm2, xmm7
movdqa xmm5, xmm2
movdqa xmm1, xmm2
movdqa xmm0, xmm2
aesenc xmm13, [rax+144]
psrld xmm5, 1
psrld xmm1, 2
psrld xmm0, 7
aesenc xmm14, [rax+144]
pxor xmm5, xmm1
pxor xmm5, xmm0
aesenc xmm15, [rax+144]
pxor xmm5, xmm4
pxor xmm2, xmm5
pxor xmm2, xmm3
cmp r8d, 11
movdqa xmm7, OWORD PTR [rax+160]
jl L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+176]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
cmp r8d, 13
movdqa xmm7, OWORD PTR [rax+192]
jl L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+208]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done:
aesenclast xmm8, xmm7
aesenclast xmm9, xmm7
movdqu xmm0, OWORD PTR [rcx]
movdqu xmm1, OWORD PTR [rcx+16]
pxor xmm8, xmm0
pxor xmm9, xmm1
movdqu OWORD PTR [rdx], xmm8
movdqu OWORD PTR [rdx+16], xmm9
aesenclast xmm10, xmm7
aesenclast xmm11, xmm7
movdqu xmm0, OWORD PTR [rcx+32]
movdqu xmm1, OWORD PTR [rcx+48]
pxor xmm10, xmm0
pxor xmm11, xmm1
movdqu OWORD PTR [rdx+32], xmm10
movdqu OWORD PTR [rdx+48], xmm11
aesenclast xmm12, xmm7
aesenclast xmm13, xmm7
movdqu xmm0, OWORD PTR [rcx+64]
movdqu xmm1, OWORD PTR [rcx+80]
pxor xmm12, xmm0
pxor xmm13, xmm1
movdqu OWORD PTR [rdx+64], xmm12
movdqu OWORD PTR [rdx+80], xmm13
aesenclast xmm14, xmm7
aesenclast xmm15, xmm7
movdqu xmm0, OWORD PTR [rcx+96]
movdqu xmm1, OWORD PTR [rcx+112]
pxor xmm14, xmm0
pxor xmm15, xmm1
movdqu OWORD PTR [rdx+96], xmm14
movdqu OWORD PTR [rdx+112], xmm15
add edi, 128
cmp edi, r13d
jl L_AES_GCM_encrypt_update_aesni_ghash_128
L_AES_GCM_encrypt_update_aesni_end_128:
movdqa xmm4, OWORD PTR L_aes_gcm_bswap_mask
pshufb xmm8, xmm4
pshufb xmm9, xmm4
pshufb xmm10, xmm4
pshufb xmm11, xmm4
pxor xmm8, xmm2
pshufb xmm12, xmm4
pshufb xmm13, xmm4
pshufb xmm14, xmm4
pshufb xmm15, xmm4
movdqu xmm7, OWORD PTR [rsp+112]
pshufd xmm1, xmm8, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm8, 17
pclmulqdq xmm0, xmm8, 0
pxor xmm1, xmm8
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
movdqa xmm4, xmm0
movdqa xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+96]
pshufd xmm1, xmm9, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm9, 17
pclmulqdq xmm0, xmm9, 0
pxor xmm1, xmm9
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+80]
pshufd xmm1, xmm10, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm10, 17
pclmulqdq xmm0, xmm10, 0
pxor xmm1, xmm10
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+64]
pshufd xmm1, xmm11, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm11, 17
pclmulqdq xmm0, xmm11, 0
pxor xmm1, xmm11
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+48]
pshufd xmm1, xmm12, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm12, 17
pclmulqdq xmm0, xmm12, 0
pxor xmm1, xmm12
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+32]
pshufd xmm1, xmm13, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm13, 17
pclmulqdq xmm0, xmm13, 0
pxor xmm1, xmm13
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp+16]
pshufd xmm1, xmm14, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm14, 17
pclmulqdq xmm0, xmm14, 0
pxor xmm1, xmm14
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqu xmm7, OWORD PTR [rsp]
pshufd xmm1, xmm15, 78
pshufd xmm2, xmm7, 78
movdqa xmm3, xmm7
movdqa xmm0, xmm7
pclmulqdq xmm3, xmm15, 17
pclmulqdq xmm0, xmm15, 0
pxor xmm1, xmm15
pxor xmm2, xmm7
pclmulqdq xmm1, xmm2, 0
pxor xmm1, xmm0
pxor xmm1, xmm3
movdqa xmm2, xmm1
pxor xmm4, xmm0
pxor xmm6, xmm3
pslldq xmm2, 8
psrldq xmm1, 8
pxor xmm4, xmm2
pxor xmm6, xmm1
movdqa xmm0, xmm4
movdqa xmm1, xmm4
movdqa xmm2, xmm4
pslld xmm0, 31
pslld xmm1, 30
pslld xmm2, 25
pxor xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm1, xmm0
psrldq xmm1, 4
pslldq xmm0, 12
pxor xmm4, xmm0
movdqa xmm2, xmm4
movdqa xmm3, xmm4
movdqa xmm0, xmm4
psrld xmm2, 1
psrld xmm3, 2
psrld xmm0, 7
pxor xmm2, xmm3
pxor xmm2, xmm0
pxor xmm2, xmm1
pxor xmm2, xmm4
pxor xmm6, xmm2
movdqu xmm5, OWORD PTR [rsp]
L_AES_GCM_encrypt_update_aesni_done_128:
mov edx, r9d
cmp edi, edx
jge L_AES_GCM_encrypt_update_aesni_done_enc
mov r13d, r9d
and r13d, 4294967280
cmp edi, r13d
jge L_AES_GCM_encrypt_update_aesni_last_block_done
lea rcx, QWORD PTR [r11+rdi]
lea rdx, QWORD PTR [r10+rdi]
movdqu xmm8, OWORD PTR [r15]
movdqa xmm9, xmm8
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
paddd xmm9, OWORD PTR L_aes_gcm_one
pxor xmm8, [rax]
movdqu OWORD PTR [r15], xmm9
aesenc xmm8, [rax+16]
aesenc xmm8, [rax+32]
aesenc xmm8, [rax+48]
aesenc xmm8, [rax+64]
aesenc xmm8, [rax+80]
aesenc xmm8, [rax+96]
aesenc xmm8, [rax+112]
aesenc xmm8, [rax+128]
aesenc xmm8, [rax+144]
cmp r8d, 11
movdqa xmm9, OWORD PTR [rax+160]
jl L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last
aesenc xmm8, xmm9
aesenc xmm8, [rax+176]
cmp r8d, 13
movdqa xmm9, OWORD PTR [rax+192]
jl L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last
aesenc xmm8, xmm9
aesenc xmm8, [rax+208]
movdqa xmm9, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last:
aesenclast xmm8, xmm9
movdqu xmm9, OWORD PTR [rcx]
pxor xmm8, xmm9
movdqu OWORD PTR [rdx], xmm8
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm6, xmm8
add edi, 16
cmp edi, r13d
jge L_AES_GCM_encrypt_update_aesni_last_block_ghash
L_AES_GCM_encrypt_update_aesni_last_block_start:
lea rcx, QWORD PTR [r11+rdi]
lea rdx, QWORD PTR [r10+rdi]
movdqu xmm8, OWORD PTR [r15]
movdqa xmm9, xmm8
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
paddd xmm9, OWORD PTR L_aes_gcm_one
pxor xmm8, [rax]
movdqu OWORD PTR [r15], xmm9
movdqa xmm10, xmm6
pclmulqdq xmm10, xmm5, 16
aesenc xmm8, [rax+16]
aesenc xmm8, [rax+32]
movdqa xmm11, xmm6
pclmulqdq xmm11, xmm5, 1
aesenc xmm8, [rax+48]
aesenc xmm8, [rax+64]
movdqa xmm12, xmm6
pclmulqdq xmm12, xmm5, 0
aesenc xmm8, [rax+80]
movdqa xmm1, xmm6
pclmulqdq xmm1, xmm5, 17
aesenc xmm8, [rax+96]
pxor xmm10, xmm11
movdqa xmm2, xmm10
psrldq xmm10, 8
pslldq xmm2, 8
aesenc xmm8, [rax+112]
movdqa xmm3, xmm1
pxor xmm2, xmm12
pxor xmm3, xmm10
movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
movdqa xmm11, xmm2
pclmulqdq xmm11, xmm0, 16
aesenc xmm8, [rax+128]
pshufd xmm10, xmm2, 78
pxor xmm10, xmm11
movdqa xmm11, xmm10
pclmulqdq xmm11, xmm0, 16
aesenc xmm8, [rax+144]
pshufd xmm6, xmm10, 78
pxor xmm6, xmm11
pxor xmm6, xmm3
cmp r8d, 11
movdqa xmm9, OWORD PTR [rax+160]
jl L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last
aesenc xmm8, xmm9
aesenc xmm8, [rax+176]
cmp r8d, 13
movdqa xmm9, OWORD PTR [rax+192]
jl L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last
aesenc xmm8, xmm9
aesenc xmm8, [rax+208]
movdqa xmm9, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last:
aesenclast xmm8, xmm9
movdqu xmm9, OWORD PTR [rcx]
pxor xmm8, xmm9
movdqu OWORD PTR [rdx], xmm8
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm6, xmm8
add edi, 16
cmp edi, r13d
jl L_AES_GCM_encrypt_update_aesni_last_block_start
L_AES_GCM_encrypt_update_aesni_last_block_ghash:
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm6, 78
movdqa xmm11, xmm6
movdqa xmm8, xmm6
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm6
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm6, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm6, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm6, xmm14
L_AES_GCM_encrypt_update_aesni_last_block_done:
L_AES_GCM_encrypt_update_aesni_done_enc:
movdqa OWORD PTR [r12], xmm6
movdqu xmm6, OWORD PTR [rsp+160]
movdqu xmm7, OWORD PTR [rsp+176]
movdqu xmm8, OWORD PTR [rsp+192]
movdqu xmm9, OWORD PTR [rsp+208]
movdqu xmm10, OWORD PTR [rsp+224]
movdqu xmm11, OWORD PTR [rsp+240]
movdqu xmm12, OWORD PTR [rsp+256]
movdqu xmm13, OWORD PTR [rsp+272]
movdqu xmm14, OWORD PTR [rsp+288]
movdqu xmm15, OWORD PTR [rsp+304]
add rsp, 320
pop rdi
pop r15
pop r14
pop r12
pop r13
ret
AES_GCM_encrypt_update_aesni ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_final_aesni PROC
push r13
push r12
push r14
mov rax, rcx
mov r10d, r9d
mov r9, rdx
mov r11d, DWORD PTR [rsp+64]
mov r12, QWORD PTR [rsp+72]
mov r14, QWORD PTR [rsp+80]
sub rsp, 144
movdqu OWORD PTR [rsp+16], xmm6
movdqu OWORD PTR [rsp+32], xmm7
movdqu OWORD PTR [rsp+48], xmm8
movdqu OWORD PTR [rsp+64], xmm9
movdqu OWORD PTR [rsp+80], xmm10
movdqu OWORD PTR [rsp+96], xmm11
movdqu OWORD PTR [rsp+112], xmm12
movdqu OWORD PTR [rsp+128], xmm13
movdqa xmm4, OWORD PTR [rax]
movdqa xmm5, OWORD PTR [r12]
movdqa xmm6, OWORD PTR [r14]
movdqa xmm8, xmm5
movdqa xmm7, xmm5
psrlq xmm8, 63
psllq xmm7, 1
pslldq xmm8, 8
por xmm7, xmm8
pshufd xmm5, xmm5, 255
psrad xmm5, 31
pand xmm5, OWORD PTR L_aes_gcm_mod2_128
pxor xmm5, xmm7
mov edx, r10d
mov ecx, r11d
shl rdx, 3
shl rcx, 3
pinsrq xmm0, rdx, 0
pinsrq xmm0, rcx, 1
pxor xmm4, xmm0
pshufd xmm8, xmm5, 78
pshufd xmm9, xmm4, 78
movdqa xmm10, xmm4
movdqa xmm7, xmm4
pclmulqdq xmm10, xmm5, 17
pclmulqdq xmm7, xmm5, 0
pxor xmm8, xmm5
pxor xmm9, xmm4
pclmulqdq xmm8, xmm9, 0
pxor xmm8, xmm7
pxor xmm8, xmm10
movdqa xmm9, xmm8
movdqa xmm4, xmm10
pslldq xmm9, 8
psrldq xmm8, 8
pxor xmm7, xmm9
pxor xmm4, xmm8
movdqa xmm11, xmm7
movdqa xmm12, xmm7
movdqa xmm13, xmm7
pslld xmm11, 31
pslld xmm12, 30
pslld xmm13, 25
pxor xmm11, xmm12
pxor xmm11, xmm13
movdqa xmm12, xmm11
psrldq xmm12, 4
pslldq xmm11, 12
pxor xmm7, xmm11
movdqa xmm13, xmm7
movdqa xmm9, xmm7
movdqa xmm8, xmm7
psrld xmm13, 1
psrld xmm9, 2
psrld xmm8, 7
pxor xmm13, xmm9
pxor xmm13, xmm8
pxor xmm13, xmm12
pxor xmm13, xmm7
pxor xmm4, xmm13
pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
movdqu xmm0, xmm6
pxor xmm0, xmm4
cmp r8d, 16
je L_AES_GCM_encrypt_final_aesni_store_tag_16
xor rcx, rcx
movdqu OWORD PTR [rsp], xmm0
L_AES_GCM_encrypt_final_aesni_store_tag_loop:
movzx r13d, BYTE PTR [rsp+rcx]
mov BYTE PTR [r9+rcx], r13b
inc ecx
cmp ecx, r8d
jne L_AES_GCM_encrypt_final_aesni_store_tag_loop
jmp L_AES_GCM_encrypt_final_aesni_store_tag_done
L_AES_GCM_encrypt_final_aesni_store_tag_16:
movdqu OWORD PTR [r9], xmm0
L_AES_GCM_encrypt_final_aesni_store_tag_done:
movdqu xmm6, OWORD PTR [rsp+16]
movdqu xmm7, OWORD PTR [rsp+32]
movdqu xmm8, OWORD PTR [rsp+48]
movdqu xmm9, OWORD PTR [rsp+64]
movdqu xmm10, OWORD PTR [rsp+80]
movdqu xmm11, OWORD PTR [rsp+96]
movdqu xmm12, OWORD PTR [rsp+112]
movdqu xmm13, OWORD PTR [rsp+128]
add rsp, 144
pop r14
pop r12
pop r13
ret
AES_GCM_encrypt_final_aesni ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_decrypt_update_aesni PROC
push r13
push r12
push r14
push r15
push rdi
push rsi
mov rax, rcx
mov r10, r8
mov r8d, edx
mov r11, r9
mov r9d, DWORD PTR [rsp+88]
mov r12, QWORD PTR [rsp+96]
mov r14, QWORD PTR [rsp+104]
mov r15, QWORD PTR [rsp+112]
sub rsp, 328
movdqu OWORD PTR [rsp+168], xmm6
movdqu OWORD PTR [rsp+184], xmm7
movdqu OWORD PTR [rsp+200], xmm8
movdqu OWORD PTR [rsp+216], xmm9
movdqu OWORD PTR [rsp+232], xmm10
movdqu OWORD PTR [rsp+248], xmm11
movdqu OWORD PTR [rsp+264], xmm12
movdqu OWORD PTR [rsp+280], xmm13
movdqu OWORD PTR [rsp+296], xmm14
movdqu OWORD PTR [rsp+312], xmm15
movdqa xmm6, OWORD PTR [r12]
movdqa xmm5, OWORD PTR [r14]
movdqa xmm9, xmm5
movdqa xmm8, xmm5
psrlq xmm9, 63
psllq xmm8, 1
pslldq xmm9, 8
por xmm8, xmm9
pshufd xmm5, xmm5, 255
psrad xmm5, 31
pand xmm5, OWORD PTR L_aes_gcm_mod2_128
pxor xmm5, xmm8
xor edi, edi
cmp r9d, 128
mov r13d, r9d
jl L_AES_GCM_decrypt_update_aesni_done_128
and r13d, 4294967168
movdqa xmm2, xmm6
; H ^ 1
movdqu OWORD PTR [rsp], xmm5
; H ^ 2
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm5, 78
movdqa xmm11, xmm5
movdqa xmm8, xmm5
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm5
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm0, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm0, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm0, xmm14
movdqu OWORD PTR [rsp+16], xmm0
; H ^ 3
pshufd xmm9, xmm5, 78
pshufd xmm10, xmm0, 78
movdqa xmm11, xmm0
movdqa xmm8, xmm0
pclmulqdq xmm11, xmm5, 17
pclmulqdq xmm8, xmm5, 0
pxor xmm9, xmm5
pxor xmm10, xmm0
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm1, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm1, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm1, xmm14
movdqu OWORD PTR [rsp+32], xmm1
; H ^ 4
pshufd xmm9, xmm0, 78
pshufd xmm10, xmm0, 78
movdqa xmm11, xmm0
movdqa xmm8, xmm0
pclmulqdq xmm11, xmm0, 17
pclmulqdq xmm8, xmm0, 0
pxor xmm9, xmm0
pxor xmm10, xmm0
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm3, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm3, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm3, xmm14
movdqu OWORD PTR [rsp+48], xmm3
; H ^ 5
pshufd xmm9, xmm0, 78
pshufd xmm10, xmm1, 78
movdqa xmm11, xmm1
movdqa xmm8, xmm1
pclmulqdq xmm11, xmm0, 17
pclmulqdq xmm8, xmm0, 0
pxor xmm9, xmm0
pxor xmm10, xmm1
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+64], xmm7
; H ^ 6
pshufd xmm9, xmm1, 78
pshufd xmm10, xmm1, 78
movdqa xmm11, xmm1
movdqa xmm8, xmm1
pclmulqdq xmm11, xmm1, 17
pclmulqdq xmm8, xmm1, 0
pxor xmm9, xmm1
pxor xmm10, xmm1
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+80], xmm7
; H ^ 7
pshufd xmm9, xmm1, 78
pshufd xmm10, xmm3, 78
movdqa xmm11, xmm3
movdqa xmm8, xmm3
pclmulqdq xmm11, xmm1, 17
pclmulqdq xmm8, xmm1, 0
pxor xmm9, xmm1
pxor xmm10, xmm3
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+96], xmm7
; H ^ 8
pshufd xmm9, xmm3, 78
pshufd xmm10, xmm3, 78
movdqa xmm11, xmm3
movdqa xmm8, xmm3
pclmulqdq xmm11, xmm3, 17
pclmulqdq xmm8, xmm3, 0
pxor xmm9, xmm3
pxor xmm10, xmm3
pclmulqdq xmm9, xmm10, 0
pxor xmm9, xmm8
pxor xmm9, xmm11
movdqa xmm10, xmm9
movdqa xmm7, xmm11
pslldq xmm10, 8
psrldq xmm9, 8
pxor xmm8, xmm10
pxor xmm7, xmm9
movdqa xmm12, xmm8
movdqa xmm13, xmm8
movdqa xmm14, xmm8
pslld xmm12, 31
pslld xmm13, 30
pslld xmm14, 25
pxor xmm12, xmm13
pxor xmm12, xmm14
movdqa xmm13, xmm12
psrldq xmm13, 4
pslldq xmm12, 12
pxor xmm8, xmm12
movdqa xmm14, xmm8
movdqa xmm10, xmm8
movdqa xmm9, xmm8
psrld xmm14, 1
psrld xmm10, 2
psrld xmm9, 7
pxor xmm14, xmm10
pxor xmm14, xmm9
pxor xmm14, xmm13
pxor xmm14, xmm8
pxor xmm7, xmm14
movdqu OWORD PTR [rsp+112], xmm7
L_AES_GCM_decrypt_update_aesni_ghash_128:
lea rcx, QWORD PTR [r11+rdi]
lea rdx, QWORD PTR [r10+rdi]
movdqu xmm8, OWORD PTR [r15]
movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
movdqa xmm0, xmm8
pshufb xmm8, xmm1
movdqa xmm9, xmm0
paddd xmm9, OWORD PTR L_aes_gcm_one
pshufb xmm9, xmm1
movdqa xmm10, xmm0
paddd xmm10, OWORD PTR L_aes_gcm_two
pshufb xmm10, xmm1
movdqa xmm11, xmm0
paddd xmm11, OWORD PTR L_aes_gcm_three
pshufb xmm11, xmm1
movdqa xmm12, xmm0
paddd xmm12, OWORD PTR L_aes_gcm_four
pshufb xmm12, xmm1
movdqa xmm13, xmm0
paddd xmm13, OWORD PTR L_aes_gcm_five
pshufb xmm13, xmm1
movdqa xmm14, xmm0
paddd xmm14, OWORD PTR L_aes_gcm_six
pshufb xmm14, xmm1
movdqa xmm15, xmm0
paddd xmm15, OWORD PTR L_aes_gcm_seven
pshufb xmm15, xmm1
paddd xmm0, OWORD PTR L_aes_gcm_eight
movdqa xmm7, OWORD PTR [rax]
movdqu OWORD PTR [r15], xmm0
pxor xmm8, xmm7
pxor xmm9, xmm7
pxor xmm10, xmm7
pxor xmm11, xmm7
pxor xmm12, xmm7
pxor xmm13, xmm7
pxor xmm14, xmm7
pxor xmm15, xmm7
movdqu xmm7, OWORD PTR [rsp+112]
movdqu xmm0, OWORD PTR [rcx]
aesenc xmm8, [rax+16]
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm0, xmm2
pshufd xmm1, xmm7, 78
pshufd xmm5, xmm0, 78
pxor xmm1, xmm7
pxor xmm5, xmm0
movdqa xmm3, xmm0
pclmulqdq xmm3, xmm7, 17
aesenc xmm9, [rax+16]
aesenc xmm10, [rax+16]
movdqa xmm2, xmm0
pclmulqdq xmm2, xmm7, 0
aesenc xmm11, [rax+16]
aesenc xmm12, [rax+16]
pclmulqdq xmm1, xmm5, 0
aesenc xmm13, [rax+16]
aesenc xmm14, [rax+16]
aesenc xmm15, [rax+16]
pxor xmm1, xmm2
pxor xmm1, xmm3
movdqu xmm7, OWORD PTR [rsp+96]
movdqu xmm0, OWORD PTR [rcx+16]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+32]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+32]
aesenc xmm10, [rax+32]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+32]
aesenc xmm12, [rax+32]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+32]
aesenc xmm14, [rax+32]
aesenc xmm15, [rax+32]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+80]
movdqu xmm0, OWORD PTR [rcx+32]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+48]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+48]
aesenc xmm10, [rax+48]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+48]
aesenc xmm12, [rax+48]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+48]
aesenc xmm14, [rax+48]
aesenc xmm15, [rax+48]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+64]
movdqu xmm0, OWORD PTR [rcx+48]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+64]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+64]
aesenc xmm10, [rax+64]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+64]
aesenc xmm12, [rax+64]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+64]
aesenc xmm14, [rax+64]
aesenc xmm15, [rax+64]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+48]
movdqu xmm0, OWORD PTR [rcx+64]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+80]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+80]
aesenc xmm10, [rax+80]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+80]
aesenc xmm12, [rax+80]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+80]
aesenc xmm14, [rax+80]
aesenc xmm15, [rax+80]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+32]
movdqu xmm0, OWORD PTR [rcx+80]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+96]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+96]
aesenc xmm10, [rax+96]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+96]
aesenc xmm12, [rax+96]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+96]
aesenc xmm14, [rax+96]
aesenc xmm15, [rax+96]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm0, OWORD PTR [rcx+96]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+112]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+112]
aesenc xmm10, [rax+112]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+112]
aesenc xmm12, [rax+112]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+112]
aesenc xmm14, [rax+112]
aesenc xmm15, [rax+112]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqu xmm7, OWORD PTR [rsp]
movdqu xmm0, OWORD PTR [rcx+112]
pshufd xmm4, xmm7, 78
pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
aesenc xmm8, [rax+128]
pxor xmm4, xmm7
pshufd xmm5, xmm0, 78
pxor xmm5, xmm0
movdqa xmm6, xmm0
pclmulqdq xmm6, xmm7, 17
aesenc xmm9, [rax+128]
aesenc xmm10, [rax+128]
pclmulqdq xmm7, xmm0, 0
aesenc xmm11, [rax+128]
aesenc xmm12, [rax+128]
pclmulqdq xmm4, xmm5, 0
aesenc xmm13, [rax+128]
aesenc xmm14, [rax+128]
aesenc xmm15, [rax+128]
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm1, xmm6
pxor xmm3, xmm6
pxor xmm1, xmm4
movdqa xmm5, xmm1
psrldq xmm1, 8
pslldq xmm5, 8
aesenc xmm8, [rax+144]
pxor xmm2, xmm5
pxor xmm3, xmm1
movdqa xmm7, xmm2
movdqa xmm4, xmm2
movdqa xmm5, xmm2
aesenc xmm9, [rax+144]
pslld xmm7, 31
pslld xmm4, 30
pslld xmm5, 25
aesenc xmm10, [rax+144]
pxor xmm7, xmm4
pxor xmm7, xmm5
aesenc xmm11, [rax+144]
movdqa xmm4, xmm7
pslldq xmm7, 12
psrldq xmm4, 4
aesenc xmm12, [rax+144]
pxor xmm2, xmm7
movdqa xmm5, xmm2
movdqa xmm1, xmm2
movdqa xmm0, xmm2
aesenc xmm13, [rax+144]
psrld xmm5, 1
psrld xmm1, 2
psrld xmm0, 7
aesenc xmm14, [rax+144]
pxor xmm5, xmm1
pxor xmm5, xmm0
aesenc xmm15, [rax+144]
pxor xmm5, xmm4
pxor xmm2, xmm5
pxor xmm2, xmm3
cmp r8d, 11
movdqa xmm7, OWORD PTR [rax+160]
jl L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+176]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
cmp r8d, 13
movdqa xmm7, OWORD PTR [rax+192]
jl L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+208]
aesenc xmm8, xmm7
aesenc xmm9, xmm7
aesenc xmm10, xmm7
aesenc xmm11, xmm7
aesenc xmm12, xmm7
aesenc xmm13, xmm7
aesenc xmm14, xmm7
aesenc xmm15, xmm7
movdqa xmm7, OWORD PTR [rax+224]
L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done:
aesenclast xmm8, xmm7
aesenclast xmm9, xmm7
movdqu xmm0, OWORD PTR [rcx]
movdqu xmm1, OWORD PTR [rcx+16]
pxor xmm8, xmm0
pxor xmm9, xmm1
movdqu OWORD PTR [rdx], xmm8
movdqu OWORD PTR [rdx+16], xmm9
aesenclast xmm10, xmm7
aesenclast xmm11, xmm7
movdqu xmm0, OWORD PTR [rcx+32]
movdqu xmm1, OWORD PTR [rcx+48]
pxor xmm10, xmm0
pxor xmm11, xmm1
movdqu OWORD PTR [rdx+32], xmm10
movdqu OWORD PTR [rdx+48], xmm11
aesenclast xmm12, xmm7
aesenclast xmm13, xmm7
movdqu xmm0, OWORD PTR [rcx+64]
movdqu xmm1, OWORD PTR [rcx+80]
pxor xmm12, xmm0
pxor xmm13, xmm1
movdqu OWORD PTR [rdx+64], xmm12
movdqu OWORD PTR [rdx+80], xmm13
aesenclast xmm14, xmm7
aesenclast xmm15, xmm7
movdqu xmm0, OWORD PTR [rcx+96]
movdqu xmm1, OWORD PTR [rcx+112]
pxor xmm14, xmm0
pxor xmm15, xmm1
movdqu OWORD PTR [rdx+96], xmm14
movdqu OWORD PTR [rdx+112], xmm15
add edi, 128
cmp edi, r13d
jl L_AES_GCM_decrypt_update_aesni_ghash_128
movdqa xmm6, xmm2
movdqu xmm5, OWORD PTR [rsp]
L_AES_GCM_decrypt_update_aesni_done_128:
mov edx, r9d
cmp edi, edx
jge L_AES_GCM_decrypt_update_aesni_done_dec
mov r13d, r9d
and r13d, 4294967280
cmp edi, r13d
jge L_AES_GCM_decrypt_update_aesni_last_block_done
L_AES_GCM_decrypt_update_aesni_last_block_start:
lea rcx, QWORD PTR [r11+rdi]
lea rdx, QWORD PTR [r10+rdi]
movdqu xmm1, OWORD PTR [rcx]
movdqa xmm0, xmm5
pshufb xmm1, OWORD PTR L_aes_gcm_bswap_mask
pxor xmm1, xmm6
movdqu xmm8, OWORD PTR [r15]
movdqa xmm9, xmm8
pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
paddd xmm9, OWORD PTR L_aes_gcm_one
pxor xmm8, [rax]
movdqu OWORD PTR [r15], xmm9
movdqa xmm10, xmm1
pclmulqdq xmm10, xmm0, 16
aesenc xmm8, [rax+16]
aesenc xmm8, [rax+32]
movdqa xmm11, xmm1
pclmulqdq xmm11, xmm0, 1
aesenc xmm8, [rax+48]
aesenc xmm8, [rax+64]
movdqa xmm12, xmm1
pclmulqdq xmm12, xmm0, 0
aesenc xmm8, [rax+80]
movdqa xmm1, xmm1
pclmulqdq xmm1, xmm0, 17
aesenc xmm8, [rax+96]
pxor xmm10, xmm11
movdqa xmm2, xmm10
psrldq xmm10, 8
pslldq xmm2, 8
aesenc xmm8, [rax+112]
movdqa xmm3, xmm1
pxor xmm2, xmm12
pxor xmm3, xmm10
movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
movdqa xmm11, xmm2
pclmulqdq xmm11, xmm0, 16
aesenc xmm8, [rax+128]
pshufd xmm10, xmm2, 78
pxor xmm10, xmm11
movdqa xmm11, xmm10
pclmulqdq xmm11, xmm0, 16
aesenc xmm8, [rax+144]
pshufd xmm6, xmm10, 78
pxor xmm6, xmm11
pxor xmm6, xmm3
cmp r8d, 11
movdqa xmm9, OWORD PTR [rax+160]
jl L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last
aesenc xmm8, xmm9
aesenc xmm8, [rax+176]
cmp r8d, 13
movdqa xmm9, OWORD PTR [rax+192]
jl L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last
aesenc xmm8, xmm9
aesenc xmm8, [rax+208]
movdqa xmm9, OWORD PTR [rax+224]
L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last:
aesenclast xmm8, xmm9
movdqu xmm9, OWORD PTR [rcx]
pxor xmm8, xmm9
movdqu OWORD PTR [rdx], xmm8
add edi, 16
cmp edi, r13d
jl L_AES_GCM_decrypt_update_aesni_last_block_start
L_AES_GCM_decrypt_update_aesni_last_block_done:
L_AES_GCM_decrypt_update_aesni_done_dec:
movdqa OWORD PTR [r12], xmm6
movdqu xmm6, OWORD PTR [rsp+168]
movdqu xmm7, OWORD PTR [rsp+184]
movdqu xmm8, OWORD PTR [rsp+200]
movdqu xmm9, OWORD PTR [rsp+216]
movdqu xmm10, OWORD PTR [rsp+232]
movdqu xmm11, OWORD PTR [rsp+248]
movdqu xmm12, OWORD PTR [rsp+264]
movdqu xmm13, OWORD PTR [rsp+280]
movdqu xmm14, OWORD PTR [rsp+296]
movdqu xmm15, OWORD PTR [rsp+312]
add rsp, 328
pop rsi
pop rdi
pop r15
pop r14
pop r12
pop r13
ret
AES_GCM_decrypt_update_aesni ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_decrypt_final_aesni PROC
push r13
push r12
push r14
push rbp
push r15
mov rax, rcx
mov r10d, r9d
mov r9, rdx
mov r11d, DWORD PTR [rsp+80]
mov r12, QWORD PTR [rsp+88]
mov r14, QWORD PTR [rsp+96]
mov rbp, QWORD PTR [rsp+104]
sub rsp, 160
movdqu OWORD PTR [rsp+16], xmm6
movdqu OWORD PTR [rsp+32], xmm7
movdqu OWORD PTR [rsp+48], xmm8
movdqu OWORD PTR [rsp+64], xmm9
movdqu OWORD PTR [rsp+80], xmm10
movdqu OWORD PTR [rsp+96], xmm11
movdqu OWORD PTR [rsp+112], xmm12
movdqu OWORD PTR [rsp+128], xmm13
movdqu OWORD PTR [rsp+144], xmm15
movdqa xmm6, OWORD PTR [rax]
movdqa xmm5, OWORD PTR [r12]
movdqa xmm15, OWORD PTR [r14]
movdqa xmm8, xmm5
movdqa xmm7, xmm5
psrlq xmm8, 63
psllq xmm7, 1
pslldq xmm8, 8
por xmm7, xmm8
pshufd xmm5, xmm5, 255
psrad xmm5, 31
pand xmm5, OWORD PTR L_aes_gcm_mod2_128
pxor xmm5, xmm7
mov edx, r10d
mov ecx, r11d
shl rdx, 3
shl rcx, 3
pinsrq xmm0, rdx, 0
pinsrq xmm0, rcx, 1
pxor xmm6, xmm0
pshufd xmm8, xmm5, 78
pshufd xmm9, xmm6, 78
movdqa xmm10, xmm6
movdqa xmm7, xmm6
pclmulqdq xmm10, xmm5, 17
pclmulqdq xmm7, xmm5, 0
pxor xmm8, xmm5
pxor xmm9, xmm6
pclmulqdq xmm8, xmm9, 0
pxor xmm8, xmm7
pxor xmm8, xmm10
movdqa xmm9, xmm8
movdqa xmm6, xmm10
pslldq xmm9, 8
psrldq xmm8, 8
pxor xmm7, xmm9
pxor xmm6, xmm8
movdqa xmm11, xmm7
movdqa xmm12, xmm7
movdqa xmm13, xmm7
pslld xmm11, 31
pslld xmm12, 30
pslld xmm13, 25
pxor xmm11, xmm12
pxor xmm11, xmm13
movdqa xmm12, xmm11
psrldq xmm12, 4
pslldq xmm11, 12
pxor xmm7, xmm11
movdqa xmm13, xmm7
movdqa xmm9, xmm7
movdqa xmm8, xmm7
psrld xmm13, 1
psrld xmm9, 2
psrld xmm8, 7
pxor xmm13, xmm9
pxor xmm13, xmm8
pxor xmm13, xmm12
pxor xmm13, xmm7
pxor xmm6, xmm13
pshufb xmm6, OWORD PTR L_aes_gcm_bswap_mask
movdqu xmm0, xmm15
pxor xmm0, xmm6
cmp r8d, 16
je L_AES_GCM_decrypt_final_aesni_cmp_tag_16
sub rsp, 16
xor rcx, rcx
xor r15, r15
movdqu OWORD PTR [rsp], xmm0
L_AES_GCM_decrypt_final_aesni_cmp_tag_loop:
movzx r13d, BYTE PTR [rsp+rcx]
xor r13b, BYTE PTR [r9+rcx]
or r15b, r13b
inc ecx
cmp ecx, r8d
jne L_AES_GCM_decrypt_final_aesni_cmp_tag_loop
cmp r15b, 0
sete r15b
add rsp, 16
xor rcx, rcx
jmp L_AES_GCM_decrypt_final_aesni_cmp_tag_done
L_AES_GCM_decrypt_final_aesni_cmp_tag_16:
movdqu xmm1, OWORD PTR [r9]
pcmpeqb xmm0, xmm1
pmovmskb rdx, xmm0
; %%edx == 0xFFFF then return 1 else => return 0
xor r15d, r15d
cmp edx, 65535
sete r15b
L_AES_GCM_decrypt_final_aesni_cmp_tag_done:
mov DWORD PTR [rbp], r15d
movdqu xmm6, OWORD PTR [rsp+16]
movdqu xmm7, OWORD PTR [rsp+32]
movdqu xmm8, OWORD PTR [rsp+48]
movdqu xmm9, OWORD PTR [rsp+64]
movdqu xmm10, OWORD PTR [rsp+80]
movdqu xmm11, OWORD PTR [rsp+96]
movdqu xmm12, OWORD PTR [rsp+112]
movdqu xmm13, OWORD PTR [rsp+128]
movdqu xmm15, OWORD PTR [rsp+144]
add rsp, 160
pop r15
pop rbp
pop r14
pop r12
pop r13
ret
AES_GCM_decrypt_final_aesni ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX1
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx1_rev8 QWORD 579005069656919567, 283686952306183
ptr_L_GCM_generate_m0_avx1_rev8 QWORD L_GCM_generate_m0_avx1_rev8
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx1_mod2_128 QWORD 0, 16212958658533785600
ptr_L_GCM_generate_m0_avx1_mod2_128 QWORD L_GCM_generate_m0_avx1_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
GCM_generate_m0_avx1 PROC
sub rsp, 80
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu xmm9, OWORD PTR L_GCM_generate_m0_avx1_rev8
vmovdqu xmm10, OWORD PTR L_GCM_generate_m0_avx1_mod2_128
vpxor xmm8, xmm8, xmm8
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu xmm8, xmm0
vpshufb xmm0, xmm0, xmm9
vpsllq xmm5, xmm0, 63
vpsrlq xmm4, xmm0, 1
vpslldq xmm1, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm1, xmm1, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm1, xmm1, 31
vpand xmm1, xmm1, xmm10
vpxor xmm1, xmm1, xmm4
vpsllq xmm5, xmm1, 63
vpsrlq xmm4, xmm1, 1
vpslldq xmm2, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm2, xmm2, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm2, xmm2, 31
vpand xmm2, xmm2, xmm10
vpxor xmm2, xmm2, xmm4
vpsllq xmm5, xmm2, 63
vpsrlq xmm4, xmm2, 1
vpslldq xmm3, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm3, xmm3, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm3, xmm3, 31
vpand xmm3, xmm3, xmm10
vpxor xmm3, xmm3, xmm4
vpshufb xmm3, xmm3, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm0, xmm0, xmm9
vpxor xmm8, xmm3, xmm2
vmovdqu OWORD PTR [rdx+16], xmm3
vmovdqu OWORD PTR [rdx+32], xmm2
vmovdqu OWORD PTR [rdx+48], xmm8
vmovdqu OWORD PTR [rdx+64], xmm1
vpxor xmm4, xmm3, xmm1
vpxor xmm5, xmm2, xmm1
vpxor xmm6, xmm8, xmm1
vmovdqu OWORD PTR [rdx+80], xmm4
vmovdqu OWORD PTR [rdx+96], xmm5
vmovdqu OWORD PTR [rdx+112], xmm6
vmovdqu OWORD PTR [rdx+128], xmm0
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm3, xmm0
vpxor xmm6, xmm2, xmm0
vmovdqu OWORD PTR [rdx+144], xmm4
vmovdqu OWORD PTR [rdx+160], xmm6
vpxor xmm6, xmm3, xmm6
vmovdqu OWORD PTR [rdx+176], xmm6
vmovdqu OWORD PTR [rdx+192], xmm1
vpxor xmm4, xmm3, xmm1
vpxor xmm5, xmm2, xmm1
vpxor xmm6, xmm8, xmm1
vmovdqu OWORD PTR [rdx+208], xmm4
vmovdqu OWORD PTR [rdx+224], xmm5
vmovdqu OWORD PTR [rdx+240], xmm6
vmovdqu xmm0, OWORD PTR [rdx]
vmovdqu xmm1, OWORD PTR [rdx+16]
vmovdqu xmm2, OWORD PTR [rdx+32]
vmovdqu xmm3, OWORD PTR [rdx+48]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+256], xmm0
vmovdqu OWORD PTR [rdx+272], xmm1
vmovdqu OWORD PTR [rdx+288], xmm2
vmovdqu OWORD PTR [rdx+304], xmm3
vmovdqu xmm0, OWORD PTR [rdx+64]
vmovdqu xmm1, OWORD PTR [rdx+80]
vmovdqu xmm2, OWORD PTR [rdx+96]
vmovdqu xmm3, OWORD PTR [rdx+112]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+320], xmm0
vmovdqu OWORD PTR [rdx+336], xmm1
vmovdqu OWORD PTR [rdx+352], xmm2
vmovdqu OWORD PTR [rdx+368], xmm3
vmovdqu xmm0, OWORD PTR [rdx+128]
vmovdqu xmm1, OWORD PTR [rdx+144]
vmovdqu xmm2, OWORD PTR [rdx+160]
vmovdqu xmm3, OWORD PTR [rdx+176]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+384], xmm0
vmovdqu OWORD PTR [rdx+400], xmm1
vmovdqu OWORD PTR [rdx+416], xmm2
vmovdqu OWORD PTR [rdx+432], xmm3
vmovdqu xmm0, OWORD PTR [rdx+192]
vmovdqu xmm1, OWORD PTR [rdx+208]
vmovdqu xmm2, OWORD PTR [rdx+224]
vmovdqu xmm3, OWORD PTR [rdx+240]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+448], xmm0
vmovdqu OWORD PTR [rdx+464], xmm1
vmovdqu OWORD PTR [rdx+480], xmm2
vmovdqu OWORD PTR [rdx+496], xmm3
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
add rsp, 80
ret
GCM_generate_m0_avx1 ENDP
_text ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_one QWORD 0, 1
ptr_L_avx1_aes_gcm_one QWORD L_avx1_aes_gcm_one
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_two QWORD 0, 2
ptr_L_avx1_aes_gcm_two QWORD L_avx1_aes_gcm_two
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_three QWORD 0, 3
ptr_L_avx1_aes_gcm_three QWORD L_avx1_aes_gcm_three
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_four QWORD 0, 4
ptr_L_avx1_aes_gcm_four QWORD L_avx1_aes_gcm_four
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_five QWORD 0, 5
ptr_L_avx1_aes_gcm_five QWORD L_avx1_aes_gcm_five
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_six QWORD 0, 6
ptr_L_avx1_aes_gcm_six QWORD L_avx1_aes_gcm_six
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_seven QWORD 0, 7
ptr_L_avx1_aes_gcm_seven QWORD L_avx1_aes_gcm_seven
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_eight QWORD 0, 8
ptr_L_avx1_aes_gcm_eight QWORD L_avx1_aes_gcm_eight
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567
ptr_L_avx1_aes_gcm_bswap_epi64 QWORD L_avx1_aes_gcm_bswap_epi64
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183
ptr_L_avx1_aes_gcm_bswap_mask QWORD L_avx1_aes_gcm_bswap_mask
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_mod2_128 QWORD 1, 13979173243358019584
ptr_L_avx1_aes_gcm_mod2_128 QWORD L_avx1_aes_gcm_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_avx1 PROC
push r13
push rdi
push rsi
push r12
push rbx
push r14
push r15
mov rdi, rcx
mov rsi, rdx
mov r12, r8
mov rax, r9
mov r8, QWORD PTR [rsp+96]
mov r9d, DWORD PTR [rsp+104]
mov r11d, DWORD PTR [rsp+112]
mov ebx, DWORD PTR [rsp+120]
mov r14d, DWORD PTR [rsp+128]
mov r15, QWORD PTR [rsp+136]
mov r10d, DWORD PTR [rsp+144]
sub rsp, 320
vmovdqu OWORD PTR [rsp+160], xmm6
vmovdqu OWORD PTR [rsp+176], xmm7
vmovdqu OWORD PTR [rsp+192], xmm8
vmovdqu OWORD PTR [rsp+208], xmm9
vmovdqu OWORD PTR [rsp+224], xmm10
vmovdqu OWORD PTR [rsp+240], xmm11
vmovdqu OWORD PTR [rsp+256], xmm12
vmovdqu OWORD PTR [rsp+272], xmm13
vmovdqu OWORD PTR [rsp+288], xmm14
vmovdqu OWORD PTR [rsp+304], xmm15
vpxor xmm4, xmm4, xmm4
vpxor xmm6, xmm6, xmm6
mov edx, ebx
cmp edx, 12
jne L_AES_GCM_encrypt_avx1_iv_not_12
; # Calculate values when IV is 12 bytes
; Set counter based on IV
mov ecx, 16777216
vmovq xmm4, QWORD PTR [rax]
vpinsrd xmm4, xmm4, DWORD PTR [rax+8], 2
vpinsrd xmm4, xmm4, ecx, 3
; H = Encrypt X(=0) and T = Encrypt counter
vmovdqa xmm5, OWORD PTR [r15]
vpxor xmm1, xmm4, xmm5
vmovdqa xmm7, OWORD PTR [r15+16]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+32]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+48]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+64]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+80]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+96]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+112]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+128]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+144]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
cmp r10d, 11
vmovdqa xmm7, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_avx1_calc_iv_12_last
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+176]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
cmp r10d, 13
vmovdqa xmm7, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_avx1_calc_iv_12_last
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+208]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+224]
L_AES_GCM_encrypt_avx1_calc_iv_12_last:
vaesenclast xmm5, xmm5, xmm7
vaesenclast xmm1, xmm1, xmm7
vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
vmovdqu OWORD PTR [rsp+144], xmm1
jmp L_AES_GCM_encrypt_avx1_iv_done
L_AES_GCM_encrypt_avx1_iv_not_12:
; Calculate values when IV is not 12 bytes
; H = Encrypt X(=0)
vmovdqa xmm5, OWORD PTR [r15]
vaesenc xmm5, xmm5, [r15+16]
vaesenc xmm5, xmm5, [r15+32]
vaesenc xmm5, xmm5, [r15+48]
vaesenc xmm5, xmm5, [r15+64]
vaesenc xmm5, xmm5, [r15+80]
vaesenc xmm5, xmm5, [r15+96]
vaesenc xmm5, xmm5, [r15+112]
vaesenc xmm5, xmm5, [r15+128]
vaesenc xmm5, xmm5, [r15+144]
cmp r10d, 11
vmovdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm9
vaesenc xmm5, xmm5, [r15+176]
cmp r10d, 13
vmovdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm9
vaesenc xmm5, xmm5, [r15+208]
vmovdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last:
vaesenclast xmm5, xmm5, xmm9
vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
; Calc counter
; Initialization vector
cmp edx, 0
mov rcx, 0
je L_AES_GCM_encrypt_avx1_calc_iv_done
cmp edx, 16
jl L_AES_GCM_encrypt_avx1_calc_iv_lt16
and edx, 4294967280
L_AES_GCM_encrypt_avx1_calc_iv_16_loop:
vmovdqu xmm8, OWORD PTR [rax+rcx]
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm8
; ghash_gfmul_avx
vpshufd xmm1, xmm4, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm4, 17
vpclmulqdq xmm0, xmm5, xmm4, 0
vpxor xmm1, xmm1, xmm4
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm7, xmm0
vmovdqa xmm4, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm7, xmm7, xmm2
vpxor xmm4, xmm4, xmm1
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
vpslld xmm0, xmm7, 31
vpslld xmm1, xmm7, 30
vpslld xmm2, xmm7, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm7, xmm7, xmm0
vpsrld xmm2, xmm7, 1
vpsrld xmm3, xmm7, 2
vpsrld xmm0, xmm7, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm7
vpxor xmm4, xmm4, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_encrypt_avx1_calc_iv_16_loop
mov edx, ebx
cmp ecx, edx
je L_AES_GCM_encrypt_avx1_calc_iv_done
L_AES_GCM_encrypt_avx1_calc_iv_lt16:
sub rsp, 16
vpxor xmm8, xmm8, xmm8
xor ebx, ebx
vmovdqu OWORD PTR [rsp], xmm8
L_AES_GCM_encrypt_avx1_calc_iv_loop:
movzx r13d, BYTE PTR [rax+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_encrypt_avx1_calc_iv_loop
vmovdqu xmm8, OWORD PTR [rsp]
add rsp, 16
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm8
; ghash_gfmul_avx
vpshufd xmm1, xmm4, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm4, 17
vpclmulqdq xmm0, xmm5, xmm4, 0
vpxor xmm1, xmm1, xmm4
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm7, xmm0
vmovdqa xmm4, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm7, xmm7, xmm2
vpxor xmm4, xmm4, xmm1
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
vpslld xmm0, xmm7, 31
vpslld xmm1, xmm7, 30
vpslld xmm2, xmm7, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm7, xmm7, xmm0
vpsrld xmm2, xmm7, 1
vpsrld xmm3, xmm7, 2
vpsrld xmm0, xmm7, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm7
vpxor xmm4, xmm4, xmm2
L_AES_GCM_encrypt_avx1_calc_iv_done:
; T = Encrypt counter
vpxor xmm0, xmm0, xmm0
shl edx, 3
vmovq xmm0, rdx
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpshufd xmm1, xmm4, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm4, 17
vpclmulqdq xmm0, xmm5, xmm4, 0
vpxor xmm1, xmm1, xmm4
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm7, xmm0
vmovdqa xmm4, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm7, xmm7, xmm2
vpxor xmm4, xmm4, xmm1
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
vpslld xmm0, xmm7, 31
vpslld xmm1, xmm7, 30
vpslld xmm2, xmm7, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm7, xmm7, xmm0
vpsrld xmm2, xmm7, 1
vpsrld xmm3, xmm7, 2
vpsrld xmm0, xmm7, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm7
vpxor xmm4, xmm4, xmm2
vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
; Encrypt counter
vmovdqa xmm8, OWORD PTR [r15]
vpxor xmm8, xmm8, xmm4
vaesenc xmm8, xmm8, [r15+16]
vaesenc xmm8, xmm8, [r15+32]
vaesenc xmm8, xmm8, [r15+48]
vaesenc xmm8, xmm8, [r15+64]
vaesenc xmm8, xmm8, [r15+80]
vaesenc xmm8, xmm8, [r15+96]
vaesenc xmm8, xmm8, [r15+112]
vaesenc xmm8, xmm8, [r15+128]
vaesenc xmm8, xmm8, [r15+144]
cmp r10d, 11
vmovdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [r15+176]
cmp r10d, 13
vmovdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [r15+208]
vmovdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last:
vaesenclast xmm8, xmm8, xmm9
vmovdqu OWORD PTR [rsp+144], xmm8
L_AES_GCM_encrypt_avx1_iv_done:
; Additional authentication data
mov edx, r11d
cmp edx, 0
je L_AES_GCM_encrypt_avx1_calc_aad_done
xor ecx, ecx
cmp edx, 16
jl L_AES_GCM_encrypt_avx1_calc_aad_lt16
and edx, 4294967280
L_AES_GCM_encrypt_avx1_calc_aad_16_loop:
vmovdqu xmm8, OWORD PTR [r12+rcx]
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm8
; ghash_gfmul_avx
vpshufd xmm1, xmm6, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm6, 17
vpclmulqdq xmm0, xmm5, xmm6, 0
vpxor xmm1, xmm1, xmm6
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm7, xmm0
vmovdqa xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm7, xmm7, xmm2
vpxor xmm6, xmm6, xmm1
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm6, 31
vpslld xmm7, xmm7, 1
vpslld xmm6, xmm6, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm6, xmm6, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm6, xmm6, xmm1
vpslld xmm0, xmm7, 31
vpslld xmm1, xmm7, 30
vpslld xmm2, xmm7, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm7, xmm7, xmm0
vpsrld xmm2, xmm7, 1
vpsrld xmm3, xmm7, 2
vpsrld xmm0, xmm7, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm7
vpxor xmm6, xmm6, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_encrypt_avx1_calc_aad_16_loop
mov edx, r11d
cmp ecx, edx
je L_AES_GCM_encrypt_avx1_calc_aad_done
L_AES_GCM_encrypt_avx1_calc_aad_lt16:
sub rsp, 16
vpxor xmm8, xmm8, xmm8
xor ebx, ebx
vmovdqu OWORD PTR [rsp], xmm8
L_AES_GCM_encrypt_avx1_calc_aad_loop:
movzx r13d, BYTE PTR [r12+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_encrypt_avx1_calc_aad_loop
vmovdqu xmm8, OWORD PTR [rsp]
add rsp, 16
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm8
; ghash_gfmul_avx
vpshufd xmm1, xmm6, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm6, 17
vpclmulqdq xmm0, xmm5, xmm6, 0
vpxor xmm1, xmm1, xmm6
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm7, xmm0
vmovdqa xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm7, xmm7, xmm2
vpxor xmm6, xmm6, xmm1
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm6, 31
vpslld xmm7, xmm7, 1
vpslld xmm6, xmm6, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm6, xmm6, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm6, xmm6, xmm1
vpslld xmm0, xmm7, 31
vpslld xmm1, xmm7, 30
vpslld xmm2, xmm7, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm7, xmm7, xmm0
vpsrld xmm2, xmm7, 1
vpsrld xmm3, xmm7, 2
vpsrld xmm0, xmm7, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm7
vpxor xmm6, xmm6, xmm2
L_AES_GCM_encrypt_avx1_calc_aad_done:
; Calculate counter and H
vpsrlq xmm9, xmm5, 63
vpsllq xmm8, xmm5, 1
vpslldq xmm9, xmm9, 8
vpor xmm8, xmm8, xmm9
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
vpaddd xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_one
vpxor xmm5, xmm5, xmm8
vmovdqu OWORD PTR [rsp+128], xmm4
xor ebx, ebx
cmp r9d, 128
mov r13d, r9d
jl L_AES_GCM_encrypt_avx1_done_128
and r13d, 4294967168
vmovdqa xmm2, xmm6
; H ^ 1
vmovdqu OWORD PTR [rsp], xmm5
; H ^ 2
vpclmulqdq xmm8, xmm5, xmm5, 0
vpclmulqdq xmm0, xmm5, xmm5, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm0, xmm0, xmm14
vmovdqu OWORD PTR [rsp+16], xmm0
; H ^ 3
; ghash_gfmul_red_avx
vpshufd xmm9, xmm5, 78
vpshufd xmm10, xmm0, 78
vpclmulqdq xmm11, xmm0, xmm5, 17
vpclmulqdq xmm8, xmm0, xmm5, 0
vpxor xmm9, xmm9, xmm5
vpxor xmm10, xmm10, xmm0
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm1, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm1, xmm1, xmm14
vmovdqu OWORD PTR [rsp+32], xmm1
; H ^ 4
vpclmulqdq xmm8, xmm0, xmm0, 0
vpclmulqdq xmm3, xmm0, xmm0, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm3, xmm3, xmm14
vmovdqu OWORD PTR [rsp+48], xmm3
; H ^ 5
; ghash_gfmul_red_avx
vpshufd xmm9, xmm0, 78
vpshufd xmm10, xmm1, 78
vpclmulqdq xmm11, xmm1, xmm0, 17
vpclmulqdq xmm8, xmm1, xmm0, 0
vpxor xmm9, xmm9, xmm0
vpxor xmm10, xmm10, xmm1
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm7, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+64], xmm7
; H ^ 6
vpclmulqdq xmm8, xmm1, xmm1, 0
vpclmulqdq xmm7, xmm1, xmm1, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+80], xmm7
; H ^ 7
; ghash_gfmul_red_avx
vpshufd xmm9, xmm1, 78
vpshufd xmm10, xmm3, 78
vpclmulqdq xmm11, xmm3, xmm1, 17
vpclmulqdq xmm8, xmm3, xmm1, 0
vpxor xmm9, xmm9, xmm1
vpxor xmm10, xmm10, xmm3
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm7, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+96], xmm7
; H ^ 8
vpclmulqdq xmm8, xmm3, xmm3, 0
vpclmulqdq xmm7, xmm3, xmm3, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+112], xmm7
; First 128 bytes of input
vmovdqu xmm0, OWORD PTR [rsp+128]
vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpshufb xmm8, xmm0, xmm1
vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
vpshufb xmm9, xmm9, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
vpshufb xmm10, xmm10, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
vpshufb xmm11, xmm11, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
vpshufb xmm12, xmm12, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
vpshufb xmm13, xmm13, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
vpshufb xmm14, xmm14, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
vpshufb xmm15, xmm15, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
vmovdqa xmm7, OWORD PTR [r15]
vmovdqu OWORD PTR [rsp+128], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+16]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+32]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+48]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+64]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+80]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+96]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+112]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+128]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+144]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r10d, 11
vmovdqa xmm7, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_avx1_aesenc_128_enc_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r10d, 13
vmovdqa xmm7, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_avx1_aesenc_128_enc_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+224]
L_AES_GCM_encrypt_avx1_aesenc_128_enc_done:
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vmovdqu xmm0, OWORD PTR [rdi]
vmovdqu xmm1, OWORD PTR [rdi+16]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vmovdqu OWORD PTR [rsi], xmm8
vmovdqu OWORD PTR [rsi+16], xmm9
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [rdi+32]
vmovdqu xmm1, OWORD PTR [rdi+48]
vpxor xmm10, xmm10, xmm0
vpxor xmm11, xmm11, xmm1
vmovdqu OWORD PTR [rsi+32], xmm10
vmovdqu OWORD PTR [rsi+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vmovdqu xmm0, OWORD PTR [rdi+64]
vmovdqu xmm1, OWORD PTR [rdi+80]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vmovdqu OWORD PTR [rsi+64], xmm12
vmovdqu OWORD PTR [rsi+80], xmm13
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rdi+96]
vmovdqu xmm1, OWORD PTR [rdi+112]
vpxor xmm14, xmm14, xmm0
vpxor xmm15, xmm15, xmm1
vmovdqu OWORD PTR [rsi+96], xmm14
vmovdqu OWORD PTR [rsi+112], xmm15
cmp r13d, 128
mov ebx, 128
jle L_AES_GCM_encrypt_avx1_end_128
; More 128 bytes of input
L_AES_GCM_encrypt_avx1_ghash_128:
lea rcx, QWORD PTR [rdi+rbx]
lea rdx, QWORD PTR [rsi+rbx]
vmovdqu xmm0, OWORD PTR [rsp+128]
vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpshufb xmm8, xmm0, xmm1
vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
vpshufb xmm9, xmm9, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
vpshufb xmm10, xmm10, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
vpshufb xmm11, xmm11, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
vpshufb xmm12, xmm12, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
vpshufb xmm13, xmm13, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
vpshufb xmm14, xmm14, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
vpshufb xmm15, xmm15, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
vmovdqa xmm7, OWORD PTR [r15]
vmovdqu OWORD PTR [rsp+128], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsp+112]
vmovdqu xmm0, OWORD PTR [rdx+-128]
vaesenc xmm8, xmm8, [r15+16]
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm0, xmm0, xmm2
vpshufd xmm1, xmm7, 78
vpshufd xmm5, xmm0, 78
vpxor xmm1, xmm1, xmm7
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm3, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+16]
vaesenc xmm10, xmm10, [r15+16]
vpclmulqdq xmm2, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+16]
vaesenc xmm12, xmm12, [r15+16]
vpclmulqdq xmm1, xmm1, xmm5, 0
vaesenc xmm13, xmm13, [r15+16]
vaesenc xmm14, xmm14, [r15+16]
vaesenc xmm15, xmm15, [r15+16]
vpxor xmm1, xmm1, xmm2
vpxor xmm1, xmm1, xmm3
vmovdqu xmm7, OWORD PTR [rsp+96]
vmovdqu xmm0, OWORD PTR [rdx+-112]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+32]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+32]
vaesenc xmm10, xmm10, [r15+32]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+32]
vaesenc xmm12, xmm12, [r15+32]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+32]
vaesenc xmm14, xmm14, [r15+32]
vaesenc xmm15, xmm15, [r15+32]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+80]
vmovdqu xmm0, OWORD PTR [rdx+-96]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+48]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+48]
vaesenc xmm10, xmm10, [r15+48]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+48]
vaesenc xmm12, xmm12, [r15+48]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+48]
vaesenc xmm14, xmm14, [r15+48]
vaesenc xmm15, xmm15, [r15+48]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+64]
vmovdqu xmm0, OWORD PTR [rdx+-80]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+64]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+64]
vaesenc xmm10, xmm10, [r15+64]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+64]
vaesenc xmm12, xmm12, [r15+64]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+64]
vaesenc xmm14, xmm14, [r15+64]
vaesenc xmm15, xmm15, [r15+64]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+48]
vmovdqu xmm0, OWORD PTR [rdx+-64]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+80]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+80]
vaesenc xmm10, xmm10, [r15+80]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+80]
vaesenc xmm12, xmm12, [r15+80]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+80]
vaesenc xmm14, xmm14, [r15+80]
vaesenc xmm15, xmm15, [r15+80]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+32]
vmovdqu xmm0, OWORD PTR [rdx+-48]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+96]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+96]
vaesenc xmm10, xmm10, [r15+96]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+96]
vaesenc xmm12, xmm12, [r15+96]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+96]
vaesenc xmm14, xmm14, [r15+96]
vaesenc xmm15, xmm15, [r15+96]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm0, OWORD PTR [rdx+-32]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+112]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+112]
vaesenc xmm10, xmm10, [r15+112]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+112]
vaesenc xmm12, xmm12, [r15+112]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+112]
vaesenc xmm14, xmm14, [r15+112]
vaesenc xmm15, xmm15, [r15+112]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp]
vmovdqu xmm0, OWORD PTR [rdx+-16]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+128]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+128]
vaesenc xmm10, xmm10, [r15+128]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+128]
vaesenc xmm12, xmm12, [r15+128]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+128]
vaesenc xmm14, xmm14, [r15+128]
vaesenc xmm15, xmm15, [r15+128]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vpslldq xmm5, xmm1, 8
vpsrldq xmm1, xmm1, 8
vaesenc xmm8, xmm8, [r15+144]
vpxor xmm2, xmm2, xmm5
vpxor xmm3, xmm3, xmm1
vaesenc xmm9, xmm9, [r15+144]
vpslld xmm7, xmm2, 31
vpslld xmm4, xmm2, 30
vpslld xmm5, xmm2, 25
vaesenc xmm10, xmm10, [r15+144]
vpxor xmm7, xmm7, xmm4
vpxor xmm7, xmm7, xmm5
vaesenc xmm11, xmm11, [r15+144]
vpsrldq xmm4, xmm7, 4
vpslldq xmm7, xmm7, 12
vaesenc xmm12, xmm12, [r15+144]
vpxor xmm2, xmm2, xmm7
vpsrld xmm5, xmm2, 1
vaesenc xmm13, xmm13, [r15+144]
vpsrld xmm1, xmm2, 2
vpsrld xmm0, xmm2, 7
vaesenc xmm14, xmm14, [r15+144]
vpxor xmm5, xmm5, xmm1
vpxor xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, [r15+144]
vpxor xmm5, xmm5, xmm4
vpxor xmm2, xmm2, xmm5
vpxor xmm2, xmm2, xmm3
cmp r10d, 11
vmovdqa xmm7, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r10d, 13
vmovdqa xmm7, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+224]
L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done:
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu xmm1, OWORD PTR [rcx+16]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu OWORD PTR [rdx+16], xmm9
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [rcx+32]
vmovdqu xmm1, OWORD PTR [rcx+48]
vpxor xmm10, xmm10, xmm0
vpxor xmm11, xmm11, xmm1
vmovdqu OWORD PTR [rdx+32], xmm10
vmovdqu OWORD PTR [rdx+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vmovdqu xmm0, OWORD PTR [rcx+64]
vmovdqu xmm1, OWORD PTR [rcx+80]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vmovdqu OWORD PTR [rdx+64], xmm12
vmovdqu OWORD PTR [rdx+80], xmm13
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rcx+96]
vmovdqu xmm1, OWORD PTR [rcx+112]
vpxor xmm14, xmm14, xmm0
vpxor xmm15, xmm15, xmm1
vmovdqu OWORD PTR [rdx+96], xmm14
vmovdqu OWORD PTR [rdx+112], xmm15
add ebx, 128
cmp ebx, r13d
jl L_AES_GCM_encrypt_avx1_ghash_128
L_AES_GCM_encrypt_avx1_end_128:
vmovdqa xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpshufb xmm8, xmm8, xmm4
vpshufb xmm9, xmm9, xmm4
vpshufb xmm10, xmm10, xmm4
vpshufb xmm11, xmm11, xmm4
vpxor xmm8, xmm8, xmm2
vpshufb xmm12, xmm12, xmm4
vpshufb xmm13, xmm13, xmm4
vpshufb xmm14, xmm14, xmm4
vpshufb xmm15, xmm15, xmm4
vmovdqu xmm7, OWORD PTR [rsp]
vmovdqu xmm5, OWORD PTR [rsp+16]
; ghash_gfmul_avx
vpshufd xmm1, xmm15, 78
vpshufd xmm2, xmm7, 78
vpclmulqdq xmm3, xmm7, xmm15, 17
vpclmulqdq xmm0, xmm7, xmm15, 0
vpxor xmm1, xmm1, xmm15
vpxor xmm2, xmm2, xmm7
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm4, xmm0
vmovdqa xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm14, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm14, 17
vpclmulqdq xmm0, xmm5, xmm14, 0
vpxor xmm1, xmm1, xmm14
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
vmovdqu xmm7, OWORD PTR [rsp+32]
vmovdqu xmm5, OWORD PTR [rsp+48]
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm13, 78
vpshufd xmm2, xmm7, 78
vpclmulqdq xmm3, xmm7, xmm13, 17
vpclmulqdq xmm0, xmm7, xmm13, 0
vpxor xmm1, xmm1, xmm13
vpxor xmm2, xmm2, xmm7
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm12, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm12, 17
vpclmulqdq xmm0, xmm5, xmm12, 0
vpxor xmm1, xmm1, xmm12
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
vmovdqu xmm7, OWORD PTR [rsp+64]
vmovdqu xmm5, OWORD PTR [rsp+80]
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm11, 78
vpshufd xmm2, xmm7, 78
vpclmulqdq xmm3, xmm7, xmm11, 17
vpclmulqdq xmm0, xmm7, xmm11, 0
vpxor xmm1, xmm1, xmm11
vpxor xmm2, xmm2, xmm7
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm10, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm10, 17
vpclmulqdq xmm0, xmm5, xmm10, 0
vpxor xmm1, xmm1, xmm10
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
vmovdqu xmm7, OWORD PTR [rsp+96]
vmovdqu xmm5, OWORD PTR [rsp+112]
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm9, 78
vpshufd xmm2, xmm7, 78
vpclmulqdq xmm3, xmm7, xmm9, 17
vpclmulqdq xmm0, xmm7, xmm9, 0
vpxor xmm1, xmm1, xmm9
vpxor xmm2, xmm2, xmm7
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm8, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm8, 17
vpclmulqdq xmm0, xmm5, xmm8, 0
vpxor xmm1, xmm1, xmm8
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
vpslld xmm0, xmm4, 31
vpslld xmm1, xmm4, 30
vpslld xmm2, xmm4, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm4, xmm4, xmm0
vpsrld xmm2, xmm4, 1
vpsrld xmm3, xmm4, 2
vpsrld xmm0, xmm4, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm4
vpxor xmm6, xmm6, xmm2
vmovdqu xmm5, OWORD PTR [rsp]
L_AES_GCM_encrypt_avx1_done_128:
mov edx, r9d
cmp ebx, edx
jge L_AES_GCM_encrypt_avx1_done_enc
mov r13d, r9d
and r13d, 4294967280
cmp ebx, r13d
jge L_AES_GCM_encrypt_avx1_last_block_done
vmovdqu xmm9, OWORD PTR [rsp+128]
vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
vmovdqu OWORD PTR [rsp+128], xmm9
vpxor xmm8, xmm8, [r15]
vaesenc xmm8, xmm8, [r15+16]
vaesenc xmm8, xmm8, [r15+32]
vaesenc xmm8, xmm8, [r15+48]
vaesenc xmm8, xmm8, [r15+64]
vaesenc xmm8, xmm8, [r15+80]
vaesenc xmm8, xmm8, [r15+96]
vaesenc xmm8, xmm8, [r15+112]
vaesenc xmm8, xmm8, [r15+128]
vaesenc xmm8, xmm8, [r15+144]
cmp r10d, 11
vmovdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_avx1_aesenc_block_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [r15+176]
cmp r10d, 13
vmovdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_avx1_aesenc_block_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [r15+208]
vmovdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_encrypt_avx1_aesenc_block_last:
vaesenclast xmm8, xmm8, xmm9
vmovdqu xmm9, OWORD PTR [rdi+rbx]
vpxor xmm8, xmm8, xmm9
vmovdqu OWORD PTR [rsi+rbx], xmm8
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm8
add ebx, 16
cmp ebx, r13d
jge L_AES_GCM_encrypt_avx1_last_block_ghash
L_AES_GCM_encrypt_avx1_last_block_start:
vmovdqu xmm13, OWORD PTR [rdi+rbx]
vmovdqu xmm9, OWORD PTR [rsp+128]
vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
vmovdqu OWORD PTR [rsp+128], xmm9
vpxor xmm8, xmm8, [r15]
vpclmulqdq xmm10, xmm6, xmm5, 16
vaesenc xmm8, xmm8, [r15+16]
vaesenc xmm8, xmm8, [r15+32]
vpclmulqdq xmm11, xmm6, xmm5, 1
vaesenc xmm8, xmm8, [r15+48]
vaesenc xmm8, xmm8, [r15+64]
vpclmulqdq xmm12, xmm6, xmm5, 0
vaesenc xmm8, xmm8, [r15+80]
vpclmulqdq xmm1, xmm6, xmm5, 17
vaesenc xmm8, xmm8, [r15+96]
vpxor xmm10, xmm10, xmm11
vpslldq xmm2, xmm10, 8
vpsrldq xmm10, xmm10, 8
vaesenc xmm8, xmm8, [r15+112]
vpxor xmm2, xmm2, xmm12
vpxor xmm3, xmm1, xmm10
vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
vpclmulqdq xmm11, xmm2, xmm0, 16
vaesenc xmm8, xmm8, [r15+128]
vpshufd xmm10, xmm2, 78
vpxor xmm10, xmm10, xmm11
vpclmulqdq xmm11, xmm10, xmm0, 16
vaesenc xmm8, xmm8, [r15+144]
vpshufd xmm10, xmm10, 78
vpxor xmm10, xmm10, xmm11
vpxor xmm6, xmm10, xmm3
cmp r10d, 11
vmovdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [r15+176]
cmp r10d, 13
vmovdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [r15+208]
vmovdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_encrypt_avx1_aesenc_gfmul_last:
vaesenclast xmm8, xmm8, xmm9
vmovdqa xmm0, xmm13
vpxor xmm8, xmm8, xmm0
vmovdqu OWORD PTR [rsi+rbx], xmm8
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
add ebx, 16
vpxor xmm6, xmm6, xmm8
cmp ebx, r13d
jl L_AES_GCM_encrypt_avx1_last_block_start
L_AES_GCM_encrypt_avx1_last_block_ghash:
; ghash_gfmul_red_avx
vpshufd xmm9, xmm5, 78
vpshufd xmm10, xmm6, 78
vpclmulqdq xmm11, xmm6, xmm5, 17
vpclmulqdq xmm8, xmm6, xmm5, 0
vpxor xmm9, xmm9, xmm5
vpxor xmm10, xmm10, xmm6
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm6, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm6, xmm6, xmm14
L_AES_GCM_encrypt_avx1_last_block_done:
mov ecx, r9d
mov edx, ecx
and ecx, 15
jz L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done
vmovdqu xmm4, OWORD PTR [rsp+128]
vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpxor xmm4, xmm4, [r15]
vaesenc xmm4, xmm4, [r15+16]
vaesenc xmm4, xmm4, [r15+32]
vaesenc xmm4, xmm4, [r15+48]
vaesenc xmm4, xmm4, [r15+64]
vaesenc xmm4, xmm4, [r15+80]
vaesenc xmm4, xmm4, [r15+96]
vaesenc xmm4, xmm4, [r15+112]
vaesenc xmm4, xmm4, [r15+128]
vaesenc xmm4, xmm4, [r15+144]
cmp r10d, 11
vmovdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last
vaesenc xmm4, xmm4, xmm9
vaesenc xmm4, xmm4, [r15+176]
cmp r10d, 13
vmovdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last
vaesenc xmm4, xmm4, xmm9
vaesenc xmm4, xmm4, [r15+208]
vmovdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last:
vaesenclast xmm4, xmm4, xmm9
sub rsp, 16
xor ecx, ecx
vmovdqu OWORD PTR [rsp], xmm4
L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop:
movzx r13d, BYTE PTR [rdi+rbx]
xor r13b, BYTE PTR [rsp+rcx]
mov BYTE PTR [rsi+rbx], r13b
mov BYTE PTR [rsp+rcx], r13b
inc ebx
inc ecx
cmp ebx, edx
jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop
xor r13, r13
cmp ecx, 16
je L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc
L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop:
mov BYTE PTR [rsp+rcx], r13b
inc ecx
cmp ecx, 16
jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop
L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc:
vmovdqu xmm4, OWORD PTR [rsp]
add rsp, 16
vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm4
; ghash_gfmul_red_avx
vpshufd xmm9, xmm5, 78
vpshufd xmm10, xmm6, 78
vpclmulqdq xmm11, xmm6, xmm5, 17
vpclmulqdq xmm8, xmm6, xmm5, 0
vpxor xmm9, xmm9, xmm5
vpxor xmm10, xmm10, xmm6
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm6, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm6, xmm6, xmm14
L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done:
L_AES_GCM_encrypt_avx1_done_enc:
mov edx, r9d
mov ecx, r11d
shl rdx, 3
shl rcx, 3
vmovq xmm0, rdx
vmovq xmm1, rcx
vpunpcklqdq xmm0, xmm0, xmm1
vpxor xmm6, xmm6, xmm0
; ghash_gfmul_red_avx
vpshufd xmm9, xmm5, 78
vpshufd xmm10, xmm6, 78
vpclmulqdq xmm11, xmm6, xmm5, 17
vpclmulqdq xmm8, xmm6, xmm5, 0
vpxor xmm9, xmm9, xmm5
vpxor xmm10, xmm10, xmm6
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm6, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm6, xmm6, xmm14
vpshufb xmm6, xmm6, OWORD PTR L_avx1_aes_gcm_bswap_mask
vmovdqu xmm0, OWORD PTR [rsp+144]
vpxor xmm0, xmm0, xmm6
cmp r14d, 16
je L_AES_GCM_encrypt_avx1_store_tag_16
xor rcx, rcx
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_encrypt_avx1_store_tag_loop:
movzx r13d, BYTE PTR [rsp+rcx]
mov BYTE PTR [r8+rcx], r13b
inc ecx
cmp ecx, r14d
jne L_AES_GCM_encrypt_avx1_store_tag_loop
jmp L_AES_GCM_encrypt_avx1_store_tag_done
L_AES_GCM_encrypt_avx1_store_tag_16:
vmovdqu OWORD PTR [r8], xmm0
L_AES_GCM_encrypt_avx1_store_tag_done:
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+160]
vmovdqu xmm7, OWORD PTR [rsp+176]
vmovdqu xmm8, OWORD PTR [rsp+192]
vmovdqu xmm9, OWORD PTR [rsp+208]
vmovdqu xmm10, OWORD PTR [rsp+224]
vmovdqu xmm11, OWORD PTR [rsp+240]
vmovdqu xmm12, OWORD PTR [rsp+256]
vmovdqu xmm13, OWORD PTR [rsp+272]
vmovdqu xmm14, OWORD PTR [rsp+288]
vmovdqu xmm15, OWORD PTR [rsp+304]
add rsp, 320
pop r15
pop r14
pop rbx
pop r12
pop rsi
pop rdi
pop r13
ret
AES_GCM_encrypt_avx1 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_decrypt_avx1 PROC
push r13
push rdi
push rsi
push r12
push rbx
push r14
push r15
push rbp
mov rdi, rcx
mov rsi, rdx
mov r12, r8
mov rax, r9
mov r8, QWORD PTR [rsp+104]
mov r9d, DWORD PTR [rsp+112]
mov r11d, DWORD PTR [rsp+120]
mov ebx, DWORD PTR [rsp+128]
mov r14d, DWORD PTR [rsp+136]
mov r15, QWORD PTR [rsp+144]
mov r10d, DWORD PTR [rsp+152]
mov rbp, QWORD PTR [rsp+160]
sub rsp, 328
vmovdqu OWORD PTR [rsp+168], xmm6
vmovdqu OWORD PTR [rsp+184], xmm7
vmovdqu OWORD PTR [rsp+200], xmm8
vmovdqu OWORD PTR [rsp+216], xmm9
vmovdqu OWORD PTR [rsp+232], xmm10
vmovdqu OWORD PTR [rsp+248], xmm11
vmovdqu OWORD PTR [rsp+264], xmm12
vmovdqu OWORD PTR [rsp+280], xmm13
vmovdqu OWORD PTR [rsp+296], xmm14
vmovdqu OWORD PTR [rsp+312], xmm15
vpxor xmm4, xmm4, xmm4
vpxor xmm6, xmm6, xmm6
cmp ebx, 12
mov edx, ebx
jne L_AES_GCM_decrypt_avx1_iv_not_12
; # Calculate values when IV is 12 bytes
; Set counter based on IV
mov ecx, 16777216
vmovq xmm4, QWORD PTR [rax]
vpinsrd xmm4, xmm4, DWORD PTR [rax+8], 2
vpinsrd xmm4, xmm4, ecx, 3
; H = Encrypt X(=0) and T = Encrypt counter
vmovdqa xmm5, OWORD PTR [r15]
vpxor xmm1, xmm4, xmm5
vmovdqa xmm7, OWORD PTR [r15+16]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+32]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+48]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+64]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+80]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+96]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+112]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+128]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+144]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
cmp r10d, 11
vmovdqa xmm7, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_avx1_calc_iv_12_last
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+176]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
cmp r10d, 13
vmovdqa xmm7, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_avx1_calc_iv_12_last
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+208]
vaesenc xmm5, xmm5, xmm7
vaesenc xmm1, xmm1, xmm7
vmovdqa xmm7, OWORD PTR [r15+224]
L_AES_GCM_decrypt_avx1_calc_iv_12_last:
vaesenclast xmm5, xmm5, xmm7
vaesenclast xmm1, xmm1, xmm7
vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
vmovdqu OWORD PTR [rsp+144], xmm1
jmp L_AES_GCM_decrypt_avx1_iv_done
L_AES_GCM_decrypt_avx1_iv_not_12:
; Calculate values when IV is not 12 bytes
; H = Encrypt X(=0)
vmovdqa xmm5, OWORD PTR [r15]
vaesenc xmm5, xmm5, [r15+16]
vaesenc xmm5, xmm5, [r15+32]
vaesenc xmm5, xmm5, [r15+48]
vaesenc xmm5, xmm5, [r15+64]
vaesenc xmm5, xmm5, [r15+80]
vaesenc xmm5, xmm5, [r15+96]
vaesenc xmm5, xmm5, [r15+112]
vaesenc xmm5, xmm5, [r15+128]
vaesenc xmm5, xmm5, [r15+144]
cmp r10d, 11
vmovdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm9
vaesenc xmm5, xmm5, [r15+176]
cmp r10d, 13
vmovdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm9
vaesenc xmm5, xmm5, [r15+208]
vmovdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last:
vaesenclast xmm5, xmm5, xmm9
vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
; Calc counter
; Initialization vector
cmp edx, 0
mov rcx, 0
je L_AES_GCM_decrypt_avx1_calc_iv_done
cmp edx, 16
jl L_AES_GCM_decrypt_avx1_calc_iv_lt16
and edx, 4294967280
L_AES_GCM_decrypt_avx1_calc_iv_16_loop:
vmovdqu xmm8, OWORD PTR [rax+rcx]
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm8
; ghash_gfmul_avx
vpshufd xmm1, xmm4, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm4, 17
vpclmulqdq xmm0, xmm5, xmm4, 0
vpxor xmm1, xmm1, xmm4
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm7, xmm0
vmovdqa xmm4, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm7, xmm7, xmm2
vpxor xmm4, xmm4, xmm1
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
vpslld xmm0, xmm7, 31
vpslld xmm1, xmm7, 30
vpslld xmm2, xmm7, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm7, xmm7, xmm0
vpsrld xmm2, xmm7, 1
vpsrld xmm3, xmm7, 2
vpsrld xmm0, xmm7, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm7
vpxor xmm4, xmm4, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_decrypt_avx1_calc_iv_16_loop
mov edx, ebx
cmp ecx, edx
je L_AES_GCM_decrypt_avx1_calc_iv_done
L_AES_GCM_decrypt_avx1_calc_iv_lt16:
sub rsp, 16
vpxor xmm8, xmm8, xmm8
xor ebx, ebx
vmovdqu OWORD PTR [rsp], xmm8
L_AES_GCM_decrypt_avx1_calc_iv_loop:
movzx r13d, BYTE PTR [rax+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_decrypt_avx1_calc_iv_loop
vmovdqu xmm8, OWORD PTR [rsp]
add rsp, 16
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm8
; ghash_gfmul_avx
vpshufd xmm1, xmm4, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm4, 17
vpclmulqdq xmm0, xmm5, xmm4, 0
vpxor xmm1, xmm1, xmm4
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm7, xmm0
vmovdqa xmm4, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm7, xmm7, xmm2
vpxor xmm4, xmm4, xmm1
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
vpslld xmm0, xmm7, 31
vpslld xmm1, xmm7, 30
vpslld xmm2, xmm7, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm7, xmm7, xmm0
vpsrld xmm2, xmm7, 1
vpsrld xmm3, xmm7, 2
vpsrld xmm0, xmm7, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm7
vpxor xmm4, xmm4, xmm2
L_AES_GCM_decrypt_avx1_calc_iv_done:
; T = Encrypt counter
vpxor xmm0, xmm0, xmm0
shl edx, 3
vmovq xmm0, rdx
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpshufd xmm1, xmm4, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm4, 17
vpclmulqdq xmm0, xmm5, xmm4, 0
vpxor xmm1, xmm1, xmm4
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm7, xmm0
vmovdqa xmm4, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm7, xmm7, xmm2
vpxor xmm4, xmm4, xmm1
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
vpslld xmm0, xmm7, 31
vpslld xmm1, xmm7, 30
vpslld xmm2, xmm7, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm7, xmm7, xmm0
vpsrld xmm2, xmm7, 1
vpsrld xmm3, xmm7, 2
vpsrld xmm0, xmm7, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm7
vpxor xmm4, xmm4, xmm2
vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
; Encrypt counter
vmovdqa xmm8, OWORD PTR [r15]
vpxor xmm8, xmm8, xmm4
vaesenc xmm8, xmm8, [r15+16]
vaesenc xmm8, xmm8, [r15+32]
vaesenc xmm8, xmm8, [r15+48]
vaesenc xmm8, xmm8, [r15+64]
vaesenc xmm8, xmm8, [r15+80]
vaesenc xmm8, xmm8, [r15+96]
vaesenc xmm8, xmm8, [r15+112]
vaesenc xmm8, xmm8, [r15+128]
vaesenc xmm8, xmm8, [r15+144]
cmp r10d, 11
vmovdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [r15+176]
cmp r10d, 13
vmovdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [r15+208]
vmovdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last:
vaesenclast xmm8, xmm8, xmm9
vmovdqu OWORD PTR [rsp+144], xmm8
L_AES_GCM_decrypt_avx1_iv_done:
; Additional authentication data
mov edx, r11d
cmp edx, 0
je L_AES_GCM_decrypt_avx1_calc_aad_done
xor ecx, ecx
cmp edx, 16
jl L_AES_GCM_decrypt_avx1_calc_aad_lt16
and edx, 4294967280
L_AES_GCM_decrypt_avx1_calc_aad_16_loop:
vmovdqu xmm8, OWORD PTR [r12+rcx]
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm8
; ghash_gfmul_avx
vpshufd xmm1, xmm6, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm6, 17
vpclmulqdq xmm0, xmm5, xmm6, 0
vpxor xmm1, xmm1, xmm6
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm7, xmm0
vmovdqa xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm7, xmm7, xmm2
vpxor xmm6, xmm6, xmm1
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm6, 31
vpslld xmm7, xmm7, 1
vpslld xmm6, xmm6, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm6, xmm6, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm6, xmm6, xmm1
vpslld xmm0, xmm7, 31
vpslld xmm1, xmm7, 30
vpslld xmm2, xmm7, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm7, xmm7, xmm0
vpsrld xmm2, xmm7, 1
vpsrld xmm3, xmm7, 2
vpsrld xmm0, xmm7, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm7
vpxor xmm6, xmm6, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_decrypt_avx1_calc_aad_16_loop
mov edx, r11d
cmp ecx, edx
je L_AES_GCM_decrypt_avx1_calc_aad_done
L_AES_GCM_decrypt_avx1_calc_aad_lt16:
sub rsp, 16
vpxor xmm8, xmm8, xmm8
xor ebx, ebx
vmovdqu OWORD PTR [rsp], xmm8
L_AES_GCM_decrypt_avx1_calc_aad_loop:
movzx r13d, BYTE PTR [r12+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_decrypt_avx1_calc_aad_loop
vmovdqu xmm8, OWORD PTR [rsp]
add rsp, 16
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm8
; ghash_gfmul_avx
vpshufd xmm1, xmm6, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm6, 17
vpclmulqdq xmm0, xmm5, xmm6, 0
vpxor xmm1, xmm1, xmm6
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm7, xmm0
vmovdqa xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm7, xmm7, xmm2
vpxor xmm6, xmm6, xmm1
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm6, 31
vpslld xmm7, xmm7, 1
vpslld xmm6, xmm6, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm6, xmm6, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm6, xmm6, xmm1
vpslld xmm0, xmm7, 31
vpslld xmm1, xmm7, 30
vpslld xmm2, xmm7, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm7, xmm7, xmm0
vpsrld xmm2, xmm7, 1
vpsrld xmm3, xmm7, 2
vpsrld xmm0, xmm7, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm7
vpxor xmm6, xmm6, xmm2
L_AES_GCM_decrypt_avx1_calc_aad_done:
; Calculate counter and H
vpsrlq xmm9, xmm5, 63
vpsllq xmm8, xmm5, 1
vpslldq xmm9, xmm9, 8
vpor xmm8, xmm8, xmm9
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
vpaddd xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_one
vpxor xmm5, xmm5, xmm8
vmovdqu OWORD PTR [rsp+128], xmm4
xor ebx, ebx
cmp r9d, 128
mov r13d, r9d
jl L_AES_GCM_decrypt_avx1_done_128
and r13d, 4294967168
vmovdqa xmm2, xmm6
; H ^ 1
vmovdqu OWORD PTR [rsp], xmm5
; H ^ 2
vpclmulqdq xmm8, xmm5, xmm5, 0
vpclmulqdq xmm0, xmm5, xmm5, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm0, xmm0, xmm14
vmovdqu OWORD PTR [rsp+16], xmm0
; H ^ 3
; ghash_gfmul_red_avx
vpshufd xmm9, xmm5, 78
vpshufd xmm10, xmm0, 78
vpclmulqdq xmm11, xmm0, xmm5, 17
vpclmulqdq xmm8, xmm0, xmm5, 0
vpxor xmm9, xmm9, xmm5
vpxor xmm10, xmm10, xmm0
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm1, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm1, xmm1, xmm14
vmovdqu OWORD PTR [rsp+32], xmm1
; H ^ 4
vpclmulqdq xmm8, xmm0, xmm0, 0
vpclmulqdq xmm3, xmm0, xmm0, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm3, xmm3, xmm14
vmovdqu OWORD PTR [rsp+48], xmm3
; H ^ 5
; ghash_gfmul_red_avx
vpshufd xmm9, xmm0, 78
vpshufd xmm10, xmm1, 78
vpclmulqdq xmm11, xmm1, xmm0, 17
vpclmulqdq xmm8, xmm1, xmm0, 0
vpxor xmm9, xmm9, xmm0
vpxor xmm10, xmm10, xmm1
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm7, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+64], xmm7
; H ^ 6
vpclmulqdq xmm8, xmm1, xmm1, 0
vpclmulqdq xmm7, xmm1, xmm1, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+80], xmm7
; H ^ 7
; ghash_gfmul_red_avx
vpshufd xmm9, xmm1, 78
vpshufd xmm10, xmm3, 78
vpclmulqdq xmm11, xmm3, xmm1, 17
vpclmulqdq xmm8, xmm3, xmm1, 0
vpxor xmm9, xmm9, xmm1
vpxor xmm10, xmm10, xmm3
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm7, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+96], xmm7
; H ^ 8
vpclmulqdq xmm8, xmm3, xmm3, 0
vpclmulqdq xmm7, xmm3, xmm3, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+112], xmm7
L_AES_GCM_decrypt_avx1_ghash_128:
lea rcx, QWORD PTR [rdi+rbx]
lea rdx, QWORD PTR [rsi+rbx]
vmovdqu xmm0, OWORD PTR [rsp+128]
vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpshufb xmm8, xmm0, xmm1
vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
vpshufb xmm9, xmm9, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
vpshufb xmm10, xmm10, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
vpshufb xmm11, xmm11, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
vpshufb xmm12, xmm12, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
vpshufb xmm13, xmm13, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
vpshufb xmm14, xmm14, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
vpshufb xmm15, xmm15, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
vmovdqa xmm7, OWORD PTR [r15]
vmovdqu OWORD PTR [rsp+128], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsp+112]
vmovdqu xmm0, OWORD PTR [rcx]
vaesenc xmm8, xmm8, [r15+16]
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm0, xmm0, xmm2
vpshufd xmm1, xmm7, 78
vpshufd xmm5, xmm0, 78
vpxor xmm1, xmm1, xmm7
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm3, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+16]
vaesenc xmm10, xmm10, [r15+16]
vpclmulqdq xmm2, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+16]
vaesenc xmm12, xmm12, [r15+16]
vpclmulqdq xmm1, xmm1, xmm5, 0
vaesenc xmm13, xmm13, [r15+16]
vaesenc xmm14, xmm14, [r15+16]
vaesenc xmm15, xmm15, [r15+16]
vpxor xmm1, xmm1, xmm2
vpxor xmm1, xmm1, xmm3
vmovdqu xmm7, OWORD PTR [rsp+96]
vmovdqu xmm0, OWORD PTR [rcx+16]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+32]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+32]
vaesenc xmm10, xmm10, [r15+32]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+32]
vaesenc xmm12, xmm12, [r15+32]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+32]
vaesenc xmm14, xmm14, [r15+32]
vaesenc xmm15, xmm15, [r15+32]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+80]
vmovdqu xmm0, OWORD PTR [rcx+32]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+48]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+48]
vaesenc xmm10, xmm10, [r15+48]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+48]
vaesenc xmm12, xmm12, [r15+48]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+48]
vaesenc xmm14, xmm14, [r15+48]
vaesenc xmm15, xmm15, [r15+48]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+64]
vmovdqu xmm0, OWORD PTR [rcx+48]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+64]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+64]
vaesenc xmm10, xmm10, [r15+64]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+64]
vaesenc xmm12, xmm12, [r15+64]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+64]
vaesenc xmm14, xmm14, [r15+64]
vaesenc xmm15, xmm15, [r15+64]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+48]
vmovdqu xmm0, OWORD PTR [rcx+64]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+80]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+80]
vaesenc xmm10, xmm10, [r15+80]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+80]
vaesenc xmm12, xmm12, [r15+80]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+80]
vaesenc xmm14, xmm14, [r15+80]
vaesenc xmm15, xmm15, [r15+80]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+32]
vmovdqu xmm0, OWORD PTR [rcx+80]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+96]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+96]
vaesenc xmm10, xmm10, [r15+96]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+96]
vaesenc xmm12, xmm12, [r15+96]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+96]
vaesenc xmm14, xmm14, [r15+96]
vaesenc xmm15, xmm15, [r15+96]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm0, OWORD PTR [rcx+96]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+112]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+112]
vaesenc xmm10, xmm10, [r15+112]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+112]
vaesenc xmm12, xmm12, [r15+112]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+112]
vaesenc xmm14, xmm14, [r15+112]
vaesenc xmm15, xmm15, [r15+112]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp]
vmovdqu xmm0, OWORD PTR [rcx+112]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [r15+128]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [r15+128]
vaesenc xmm10, xmm10, [r15+128]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [r15+128]
vaesenc xmm12, xmm12, [r15+128]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [r15+128]
vaesenc xmm14, xmm14, [r15+128]
vaesenc xmm15, xmm15, [r15+128]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vpslldq xmm5, xmm1, 8
vpsrldq xmm1, xmm1, 8
vaesenc xmm8, xmm8, [r15+144]
vpxor xmm2, xmm2, xmm5
vpxor xmm3, xmm3, xmm1
vaesenc xmm9, xmm9, [r15+144]
vpslld xmm7, xmm2, 31
vpslld xmm4, xmm2, 30
vpslld xmm5, xmm2, 25
vaesenc xmm10, xmm10, [r15+144]
vpxor xmm7, xmm7, xmm4
vpxor xmm7, xmm7, xmm5
vaesenc xmm11, xmm11, [r15+144]
vpsrldq xmm4, xmm7, 4
vpslldq xmm7, xmm7, 12
vaesenc xmm12, xmm12, [r15+144]
vpxor xmm2, xmm2, xmm7
vpsrld xmm5, xmm2, 1
vaesenc xmm13, xmm13, [r15+144]
vpsrld xmm1, xmm2, 2
vpsrld xmm0, xmm2, 7
vaesenc xmm14, xmm14, [r15+144]
vpxor xmm5, xmm5, xmm1
vpxor xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, [r15+144]
vpxor xmm5, xmm5, xmm4
vpxor xmm2, xmm2, xmm5
vpxor xmm2, xmm2, xmm3
cmp r10d, 11
vmovdqa xmm7, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r10d, 13
vmovdqa xmm7, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [r15+224]
L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done:
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu xmm1, OWORD PTR [rcx+16]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu OWORD PTR [rdx+16], xmm9
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [rcx+32]
vmovdqu xmm1, OWORD PTR [rcx+48]
vpxor xmm10, xmm10, xmm0
vpxor xmm11, xmm11, xmm1
vmovdqu OWORD PTR [rdx+32], xmm10
vmovdqu OWORD PTR [rdx+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vmovdqu xmm0, OWORD PTR [rcx+64]
vmovdqu xmm1, OWORD PTR [rcx+80]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vmovdqu OWORD PTR [rdx+64], xmm12
vmovdqu OWORD PTR [rdx+80], xmm13
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rcx+96]
vmovdqu xmm1, OWORD PTR [rcx+112]
vpxor xmm14, xmm14, xmm0
vpxor xmm15, xmm15, xmm1
vmovdqu OWORD PTR [rdx+96], xmm14
vmovdqu OWORD PTR [rdx+112], xmm15
add ebx, 128
cmp ebx, r13d
jl L_AES_GCM_decrypt_avx1_ghash_128
vmovdqa xmm6, xmm2
vmovdqu xmm5, OWORD PTR [rsp]
L_AES_GCM_decrypt_avx1_done_128:
mov edx, r9d
cmp ebx, edx
jge L_AES_GCM_decrypt_avx1_done_dec
mov r13d, r9d
and r13d, 4294967280
cmp ebx, r13d
jge L_AES_GCM_decrypt_avx1_last_block_done
L_AES_GCM_decrypt_avx1_last_block_start:
vmovdqu xmm13, OWORD PTR [rdi+rbx]
vmovdqa xmm0, xmm5
vpshufb xmm1, xmm13, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm1, xmm1, xmm6
vmovdqu xmm9, OWORD PTR [rsp+128]
vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
vmovdqu OWORD PTR [rsp+128], xmm9
vpxor xmm8, xmm8, [r15]
vpclmulqdq xmm10, xmm1, xmm0, 16
vaesenc xmm8, xmm8, [r15+16]
vaesenc xmm8, xmm8, [r15+32]
vpclmulqdq xmm11, xmm1, xmm0, 1
vaesenc xmm8, xmm8, [r15+48]
vaesenc xmm8, xmm8, [r15+64]
vpclmulqdq xmm12, xmm1, xmm0, 0
vaesenc xmm8, xmm8, [r15+80]
vpclmulqdq xmm1, xmm1, xmm0, 17
vaesenc xmm8, xmm8, [r15+96]
vpxor xmm10, xmm10, xmm11
vpslldq xmm2, xmm10, 8
vpsrldq xmm10, xmm10, 8
vaesenc xmm8, xmm8, [r15+112]
vpxor xmm2, xmm2, xmm12
vpxor xmm3, xmm1, xmm10
vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
vpclmulqdq xmm11, xmm2, xmm0, 16
vaesenc xmm8, xmm8, [r15+128]
vpshufd xmm10, xmm2, 78
vpxor xmm10, xmm10, xmm11
vpclmulqdq xmm11, xmm10, xmm0, 16
vaesenc xmm8, xmm8, [r15+144]
vpshufd xmm10, xmm10, 78
vpxor xmm10, xmm10, xmm11
vpxor xmm6, xmm10, xmm3
cmp r10d, 11
vmovdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [r15+176]
cmp r10d, 13
vmovdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [r15+208]
vmovdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_decrypt_avx1_aesenc_gfmul_last:
vaesenclast xmm8, xmm8, xmm9
vmovdqa xmm0, xmm13
vpxor xmm8, xmm8, xmm0
vmovdqu OWORD PTR [rsi+rbx], xmm8
add ebx, 16
cmp ebx, r13d
jl L_AES_GCM_decrypt_avx1_last_block_start
L_AES_GCM_decrypt_avx1_last_block_done:
mov ecx, r9d
mov edx, ecx
and ecx, 15
jz L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done
vmovdqu xmm4, OWORD PTR [rsp+128]
vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpxor xmm4, xmm4, [r15]
vaesenc xmm4, xmm4, [r15+16]
vaesenc xmm4, xmm4, [r15+32]
vaesenc xmm4, xmm4, [r15+48]
vaesenc xmm4, xmm4, [r15+64]
vaesenc xmm4, xmm4, [r15+80]
vaesenc xmm4, xmm4, [r15+96]
vaesenc xmm4, xmm4, [r15+112]
vaesenc xmm4, xmm4, [r15+128]
vaesenc xmm4, xmm4, [r15+144]
cmp r10d, 11
vmovdqa xmm9, OWORD PTR [r15+160]
jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last
vaesenc xmm4, xmm4, xmm9
vaesenc xmm4, xmm4, [r15+176]
cmp r10d, 13
vmovdqa xmm9, OWORD PTR [r15+192]
jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last
vaesenc xmm4, xmm4, xmm9
vaesenc xmm4, xmm4, [r15+208]
vmovdqa xmm9, OWORD PTR [r15+224]
L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last:
vaesenclast xmm4, xmm4, xmm9
sub rsp, 32
xor ecx, ecx
vmovdqu OWORD PTR [rsp], xmm4
vpxor xmm0, xmm0, xmm0
vmovdqu OWORD PTR [rsp+16], xmm0
L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop:
movzx r13d, BYTE PTR [rdi+rbx]
mov BYTE PTR [rsp+rcx+16], r13b
xor r13b, BYTE PTR [rsp+rcx]
mov BYTE PTR [rsi+rbx], r13b
inc ebx
inc ecx
cmp ebx, edx
jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop
vmovdqu xmm4, OWORD PTR [rsp+16]
add rsp, 32
vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm4
; ghash_gfmul_red_avx
vpshufd xmm9, xmm5, 78
vpshufd xmm10, xmm6, 78
vpclmulqdq xmm11, xmm6, xmm5, 17
vpclmulqdq xmm8, xmm6, xmm5, 0
vpxor xmm9, xmm9, xmm5
vpxor xmm10, xmm10, xmm6
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm6, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm6, xmm6, xmm14
L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done:
L_AES_GCM_decrypt_avx1_done_dec:
mov edx, r9d
mov ecx, r11d
shl rdx, 3
shl rcx, 3
vmovq xmm0, rdx
vmovq xmm1, rcx
vpunpcklqdq xmm0, xmm0, xmm1
vpxor xmm6, xmm6, xmm0
; ghash_gfmul_red_avx
vpshufd xmm9, xmm5, 78
vpshufd xmm10, xmm6, 78
vpclmulqdq xmm11, xmm6, xmm5, 17
vpclmulqdq xmm8, xmm6, xmm5, 0
vpxor xmm9, xmm9, xmm5
vpxor xmm10, xmm10, xmm6
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm6, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm6, xmm6, xmm14
vpshufb xmm6, xmm6, OWORD PTR L_avx1_aes_gcm_bswap_mask
vmovdqu xmm0, OWORD PTR [rsp+144]
vpxor xmm0, xmm0, xmm6
cmp r14d, 16
je L_AES_GCM_decrypt_avx1_cmp_tag_16
sub rsp, 16
xor rcx, rcx
xor rbx, rbx
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_decrypt_avx1_cmp_tag_loop:
movzx r13d, BYTE PTR [rsp+rcx]
xor r13b, BYTE PTR [r8+rcx]
or bl, r13b
inc ecx
cmp ecx, r14d
jne L_AES_GCM_decrypt_avx1_cmp_tag_loop
cmp bl, 0
sete bl
add rsp, 16
xor rcx, rcx
jmp L_AES_GCM_decrypt_avx1_cmp_tag_done
L_AES_GCM_decrypt_avx1_cmp_tag_16:
vmovdqu xmm1, OWORD PTR [r8]
vpcmpeqb xmm0, xmm0, xmm1
vpmovmskb rdx, xmm0
; %%edx == 0xFFFF then return 1 else => return 0
xor ebx, ebx
cmp edx, 65535
sete bl
L_AES_GCM_decrypt_avx1_cmp_tag_done:
mov DWORD PTR [rbp], ebx
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+168]
vmovdqu xmm7, OWORD PTR [rsp+184]
vmovdqu xmm8, OWORD PTR [rsp+200]
vmovdqu xmm9, OWORD PTR [rsp+216]
vmovdqu xmm10, OWORD PTR [rsp+232]
vmovdqu xmm11, OWORD PTR [rsp+248]
vmovdqu xmm12, OWORD PTR [rsp+264]
vmovdqu xmm13, OWORD PTR [rsp+280]
vmovdqu xmm14, OWORD PTR [rsp+296]
vmovdqu xmm15, OWORD PTR [rsp+312]
add rsp, 328
pop rbp
pop r15
pop r14
pop rbx
pop r12
pop rsi
pop rdi
pop r13
ret
AES_GCM_decrypt_avx1 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_init_avx1 PROC
push rdi
push rsi
push r12
push r13
mov rdi, rcx
mov rsi, rdx
mov r10, r8
mov r11d, r9d
mov rax, QWORD PTR [rsp+72]
mov r8, QWORD PTR [rsp+80]
mov r9, QWORD PTR [rsp+88]
sub rsp, 80
vmovdqu OWORD PTR [rsp+16], xmm6
vmovdqu OWORD PTR [rsp+32], xmm7
vmovdqu OWORD PTR [rsp+48], xmm8
vmovdqu OWORD PTR [rsp+64], xmm15
vpxor xmm4, xmm4, xmm4
mov edx, r11d
cmp edx, 12
jne L_AES_GCM_init_avx1_iv_not_12
; # Calculate values when IV is 12 bytes
; Set counter based on IV
mov ecx, 16777216
vmovq xmm4, QWORD PTR [r10]
vpinsrd xmm4, xmm4, DWORD PTR [r10+8], 2
vpinsrd xmm4, xmm4, ecx, 3
; H = Encrypt X(=0) and T = Encrypt counter
vmovdqa xmm5, OWORD PTR [rdi]
vpxor xmm1, xmm4, xmm5
vmovdqa xmm6, OWORD PTR [rdi+16]
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
vmovdqa xmm6, OWORD PTR [rdi+32]
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
vmovdqa xmm6, OWORD PTR [rdi+48]
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
vmovdqa xmm6, OWORD PTR [rdi+64]
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
vmovdqa xmm6, OWORD PTR [rdi+80]
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
vmovdqa xmm6, OWORD PTR [rdi+96]
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
vmovdqa xmm6, OWORD PTR [rdi+112]
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
vmovdqa xmm6, OWORD PTR [rdi+128]
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
vmovdqa xmm6, OWORD PTR [rdi+144]
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
cmp esi, 11
vmovdqa xmm6, OWORD PTR [rdi+160]
jl L_AES_GCM_init_avx1_calc_iv_12_last
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
vmovdqa xmm6, OWORD PTR [rdi+176]
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
cmp esi, 13
vmovdqa xmm6, OWORD PTR [rdi+192]
jl L_AES_GCM_init_avx1_calc_iv_12_last
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
vmovdqa xmm6, OWORD PTR [rdi+208]
vaesenc xmm5, xmm5, xmm6
vaesenc xmm1, xmm1, xmm6
vmovdqa xmm6, OWORD PTR [rdi+224]
L_AES_GCM_init_avx1_calc_iv_12_last:
vaesenclast xmm5, xmm5, xmm6
vaesenclast xmm1, xmm1, xmm6
vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
vmovdqu xmm15, xmm1
jmp L_AES_GCM_init_avx1_iv_done
L_AES_GCM_init_avx1_iv_not_12:
; Calculate values when IV is not 12 bytes
; H = Encrypt X(=0)
vmovdqa xmm5, OWORD PTR [rdi]
vaesenc xmm5, xmm5, [rdi+16]
vaesenc xmm5, xmm5, [rdi+32]
vaesenc xmm5, xmm5, [rdi+48]
vaesenc xmm5, xmm5, [rdi+64]
vaesenc xmm5, xmm5, [rdi+80]
vaesenc xmm5, xmm5, [rdi+96]
vaesenc xmm5, xmm5, [rdi+112]
vaesenc xmm5, xmm5, [rdi+128]
vaesenc xmm5, xmm5, [rdi+144]
cmp esi, 11
vmovdqa xmm8, OWORD PTR [rdi+160]
jl L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm8
vaesenc xmm5, xmm5, [rdi+176]
cmp esi, 13
vmovdqa xmm8, OWORD PTR [rdi+192]
jl L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm8
vaesenc xmm5, xmm5, [rdi+208]
vmovdqa xmm8, OWORD PTR [rdi+224]
L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last:
vaesenclast xmm5, xmm5, xmm8
vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
; Calc counter
; Initialization vector
cmp edx, 0
mov rcx, 0
je L_AES_GCM_init_avx1_calc_iv_done
cmp edx, 16
jl L_AES_GCM_init_avx1_calc_iv_lt16
and edx, 4294967280
L_AES_GCM_init_avx1_calc_iv_16_loop:
vmovdqu xmm7, OWORD PTR [r10+rcx]
vpshufb xmm7, xmm7, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm7
; ghash_gfmul_avx
vpshufd xmm1, xmm4, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm4, 17
vpclmulqdq xmm0, xmm5, xmm4, 0
vpxor xmm1, xmm1, xmm4
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm6, xmm0
vmovdqa xmm4, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm6, xmm6, xmm2
vpxor xmm4, xmm4, xmm1
vpsrld xmm0, xmm6, 31
vpsrld xmm1, xmm4, 31
vpslld xmm6, xmm6, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm6, xmm6, xmm0
vpor xmm4, xmm4, xmm1
vpslld xmm0, xmm6, 31
vpslld xmm1, xmm6, 30
vpslld xmm2, xmm6, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm6, xmm6, xmm0
vpsrld xmm2, xmm6, 1
vpsrld xmm3, xmm6, 2
vpsrld xmm0, xmm6, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm6
vpxor xmm4, xmm4, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_init_avx1_calc_iv_16_loop
mov edx, r11d
cmp ecx, edx
je L_AES_GCM_init_avx1_calc_iv_done
L_AES_GCM_init_avx1_calc_iv_lt16:
sub rsp, 16
vpxor xmm7, xmm7, xmm7
xor r13d, r13d
vmovdqu OWORD PTR [rsp], xmm7
L_AES_GCM_init_avx1_calc_iv_loop:
movzx r12d, BYTE PTR [r10+rcx]
mov BYTE PTR [rsp+r13], r12b
inc ecx
inc r13d
cmp ecx, edx
jl L_AES_GCM_init_avx1_calc_iv_loop
vmovdqu xmm7, OWORD PTR [rsp]
add rsp, 16
vpshufb xmm7, xmm7, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm7
; ghash_gfmul_avx
vpshufd xmm1, xmm4, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm4, 17
vpclmulqdq xmm0, xmm5, xmm4, 0
vpxor xmm1, xmm1, xmm4
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm6, xmm0
vmovdqa xmm4, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm6, xmm6, xmm2
vpxor xmm4, xmm4, xmm1
vpsrld xmm0, xmm6, 31
vpsrld xmm1, xmm4, 31
vpslld xmm6, xmm6, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm6, xmm6, xmm0
vpor xmm4, xmm4, xmm1
vpslld xmm0, xmm6, 31
vpslld xmm1, xmm6, 30
vpslld xmm2, xmm6, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm6, xmm6, xmm0
vpsrld xmm2, xmm6, 1
vpsrld xmm3, xmm6, 2
vpsrld xmm0, xmm6, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm6
vpxor xmm4, xmm4, xmm2
L_AES_GCM_init_avx1_calc_iv_done:
; T = Encrypt counter
vpxor xmm0, xmm0, xmm0
shl edx, 3
vmovq xmm0, rdx
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpshufd xmm1, xmm4, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm4, 17
vpclmulqdq xmm0, xmm5, xmm4, 0
vpxor xmm1, xmm1, xmm4
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm6, xmm0
vmovdqa xmm4, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm6, xmm6, xmm2
vpxor xmm4, xmm4, xmm1
vpsrld xmm0, xmm6, 31
vpsrld xmm1, xmm4, 31
vpslld xmm6, xmm6, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm6, xmm6, xmm0
vpor xmm4, xmm4, xmm1
vpslld xmm0, xmm6, 31
vpslld xmm1, xmm6, 30
vpslld xmm2, xmm6, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm6, xmm6, xmm0
vpsrld xmm2, xmm6, 1
vpsrld xmm3, xmm6, 2
vpsrld xmm0, xmm6, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm6
vpxor xmm4, xmm4, xmm2
vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
; Encrypt counter
vmovdqa xmm7, OWORD PTR [rdi]
vpxor xmm7, xmm7, xmm4
vaesenc xmm7, xmm7, [rdi+16]
vaesenc xmm7, xmm7, [rdi+32]
vaesenc xmm7, xmm7, [rdi+48]
vaesenc xmm7, xmm7, [rdi+64]
vaesenc xmm7, xmm7, [rdi+80]
vaesenc xmm7, xmm7, [rdi+96]
vaesenc xmm7, xmm7, [rdi+112]
vaesenc xmm7, xmm7, [rdi+128]
vaesenc xmm7, xmm7, [rdi+144]
cmp esi, 11
vmovdqa xmm8, OWORD PTR [rdi+160]
jl L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last
vaesenc xmm7, xmm7, xmm8
vaesenc xmm7, xmm7, [rdi+176]
cmp esi, 13
vmovdqa xmm8, OWORD PTR [rdi+192]
jl L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last
vaesenc xmm7, xmm7, xmm8
vaesenc xmm7, xmm7, [rdi+208]
vmovdqa xmm8, OWORD PTR [rdi+224]
L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last:
vaesenclast xmm7, xmm7, xmm8
vmovdqu xmm15, xmm7
L_AES_GCM_init_avx1_iv_done:
vmovdqa OWORD PTR [r9], xmm15
vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpaddd xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_one
vmovdqa OWORD PTR [rax], xmm5
vmovdqa OWORD PTR [r8], xmm4
vmovdqu xmm6, OWORD PTR [rsp+16]
vmovdqu xmm7, OWORD PTR [rsp+32]
vmovdqu xmm8, OWORD PTR [rsp+48]
vmovdqu xmm15, OWORD PTR [rsp+64]
add rsp, 80
pop r13
pop r12
pop rsi
pop rdi
ret
AES_GCM_init_avx1 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_aad_update_avx1 PROC
mov rax, rcx
sub rsp, 32
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqa xmm5, OWORD PTR [r8]
vmovdqa xmm6, OWORD PTR [r9]
xor ecx, ecx
L_AES_GCM_aad_update_avx1_16_loop:
vmovdqu xmm7, OWORD PTR [rax+rcx]
vpshufb xmm7, xmm7, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm7
; ghash_gfmul_avx
vpshufd xmm1, xmm5, 78
vpshufd xmm2, xmm6, 78
vpclmulqdq xmm3, xmm6, xmm5, 17
vpclmulqdq xmm0, xmm6, xmm5, 0
vpxor xmm1, xmm1, xmm5
vpxor xmm2, xmm2, xmm6
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm4, xmm0
vmovdqa xmm5, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm5, xmm5, xmm1
vpsrld xmm0, xmm4, 31
vpsrld xmm1, xmm5, 31
vpslld xmm4, xmm4, 1
vpslld xmm5, xmm5, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm5, xmm5, xmm2
vpor xmm4, xmm4, xmm0
vpor xmm5, xmm5, xmm1
vpslld xmm0, xmm4, 31
vpslld xmm1, xmm4, 30
vpslld xmm2, xmm4, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm4, xmm4, xmm0
vpsrld xmm2, xmm4, 1
vpsrld xmm3, xmm4, 2
vpsrld xmm0, xmm4, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm4
vpxor xmm5, xmm5, xmm2
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_aad_update_avx1_16_loop
vmovdqa OWORD PTR [r8], xmm5
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
add rsp, 32
ret
AES_GCM_aad_update_avx1 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_block_avx1 PROC
mov r10, r8
mov r11, r9
mov rax, QWORD PTR [rsp+40]
vmovdqu xmm1, OWORD PTR [rax]
vpshufb xmm0, xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpaddd xmm1, xmm1, OWORD PTR L_avx1_aes_gcm_one
vmovdqu OWORD PTR [rax], xmm1
vpxor xmm0, xmm0, [rcx]
vaesenc xmm0, xmm0, [rcx+16]
vaesenc xmm0, xmm0, [rcx+32]
vaesenc xmm0, xmm0, [rcx+48]
vaesenc xmm0, xmm0, [rcx+64]
vaesenc xmm0, xmm0, [rcx+80]
vaesenc xmm0, xmm0, [rcx+96]
vaesenc xmm0, xmm0, [rcx+112]
vaesenc xmm0, xmm0, [rcx+128]
vaesenc xmm0, xmm0, [rcx+144]
cmp edx, 11
vmovdqa xmm1, OWORD PTR [rcx+160]
jl L_AES_GCM_encrypt_block_avx1_aesenc_block_last
vaesenc xmm0, xmm0, xmm1
vaesenc xmm0, xmm0, [rcx+176]
cmp edx, 13
vmovdqa xmm1, OWORD PTR [rcx+192]
jl L_AES_GCM_encrypt_block_avx1_aesenc_block_last
vaesenc xmm0, xmm0, xmm1
vaesenc xmm0, xmm0, [rcx+208]
vmovdqa xmm1, OWORD PTR [rcx+224]
L_AES_GCM_encrypt_block_avx1_aesenc_block_last:
vaesenclast xmm0, xmm0, xmm1
vmovdqu xmm1, OWORD PTR [r11]
vpxor xmm0, xmm0, xmm1
vmovdqu OWORD PTR [r10], xmm0
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vzeroupper
ret
AES_GCM_encrypt_block_avx1 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_ghash_block_avx1 PROC
sub rsp, 32
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqa xmm4, OWORD PTR [rdx]
vmovdqa xmm5, OWORD PTR [r8]
vmovdqu xmm7, OWORD PTR [rcx]
vpshufb xmm7, xmm7, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm7
; ghash_gfmul_avx
vpshufd xmm1, xmm4, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm4, 17
vpclmulqdq xmm0, xmm5, xmm4, 0
vpxor xmm1, xmm1, xmm4
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm6, xmm0
vmovdqa xmm4, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm6, xmm6, xmm2
vpxor xmm4, xmm4, xmm1
vpsrld xmm0, xmm6, 31
vpsrld xmm1, xmm4, 31
vpslld xmm6, xmm6, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm6, xmm6, xmm0
vpor xmm4, xmm4, xmm1
vpslld xmm0, xmm6, 31
vpslld xmm1, xmm6, 30
vpslld xmm2, xmm6, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm6, xmm6, xmm0
vpsrld xmm2, xmm6, 1
vpsrld xmm3, xmm6, 2
vpsrld xmm0, xmm6, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm6
vpxor xmm4, xmm4, xmm2
vmovdqa OWORD PTR [rdx], xmm4
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
add rsp, 32
ret
AES_GCM_ghash_block_avx1 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_update_avx1 PROC
push r13
push r12
push r14
push r15
push rdi
mov rax, rcx
mov r10, r8
mov r8d, edx
mov r11, r9
mov r9d, DWORD PTR [rsp+80]
mov r12, QWORD PTR [rsp+88]
mov r14, QWORD PTR [rsp+96]
mov r15, QWORD PTR [rsp+104]
sub rsp, 320
vmovdqu OWORD PTR [rsp+160], xmm6
vmovdqu OWORD PTR [rsp+176], xmm7
vmovdqu OWORD PTR [rsp+192], xmm8
vmovdqu OWORD PTR [rsp+208], xmm9
vmovdqu OWORD PTR [rsp+224], xmm10
vmovdqu OWORD PTR [rsp+240], xmm11
vmovdqu OWORD PTR [rsp+256], xmm12
vmovdqu OWORD PTR [rsp+272], xmm13
vmovdqu OWORD PTR [rsp+288], xmm14
vmovdqu OWORD PTR [rsp+304], xmm15
vmovdqa xmm6, OWORD PTR [r12]
vmovdqa xmm5, OWORD PTR [r14]
vpsrlq xmm9, xmm5, 63
vpsllq xmm8, xmm5, 1
vpslldq xmm9, xmm9, 8
vpor xmm8, xmm8, xmm9
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
vpxor xmm5, xmm5, xmm8
xor edi, edi
cmp r9d, 128
mov r13d, r9d
jl L_AES_GCM_encrypt_update_avx1_done_128
and r13d, 4294967168
vmovdqa xmm2, xmm6
; H ^ 1
vmovdqu OWORD PTR [rsp], xmm5
; H ^ 2
vpclmulqdq xmm8, xmm5, xmm5, 0
vpclmulqdq xmm0, xmm5, xmm5, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm0, xmm0, xmm14
vmovdqu OWORD PTR [rsp+16], xmm0
; H ^ 3
; ghash_gfmul_red_avx
vpshufd xmm9, xmm5, 78
vpshufd xmm10, xmm0, 78
vpclmulqdq xmm11, xmm0, xmm5, 17
vpclmulqdq xmm8, xmm0, xmm5, 0
vpxor xmm9, xmm9, xmm5
vpxor xmm10, xmm10, xmm0
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm1, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm1, xmm1, xmm14
vmovdqu OWORD PTR [rsp+32], xmm1
; H ^ 4
vpclmulqdq xmm8, xmm0, xmm0, 0
vpclmulqdq xmm3, xmm0, xmm0, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm3, xmm3, xmm14
vmovdqu OWORD PTR [rsp+48], xmm3
; H ^ 5
; ghash_gfmul_red_avx
vpshufd xmm9, xmm0, 78
vpshufd xmm10, xmm1, 78
vpclmulqdq xmm11, xmm1, xmm0, 17
vpclmulqdq xmm8, xmm1, xmm0, 0
vpxor xmm9, xmm9, xmm0
vpxor xmm10, xmm10, xmm1
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm7, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+64], xmm7
; H ^ 6
vpclmulqdq xmm8, xmm1, xmm1, 0
vpclmulqdq xmm7, xmm1, xmm1, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+80], xmm7
; H ^ 7
; ghash_gfmul_red_avx
vpshufd xmm9, xmm1, 78
vpshufd xmm10, xmm3, 78
vpclmulqdq xmm11, xmm3, xmm1, 17
vpclmulqdq xmm8, xmm3, xmm1, 0
vpxor xmm9, xmm9, xmm1
vpxor xmm10, xmm10, xmm3
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm7, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+96], xmm7
; H ^ 8
vpclmulqdq xmm8, xmm3, xmm3, 0
vpclmulqdq xmm7, xmm3, xmm3, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+112], xmm7
; First 128 bytes of input
vmovdqu xmm0, OWORD PTR [r15]
vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpshufb xmm8, xmm0, xmm1
vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
vpshufb xmm9, xmm9, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
vpshufb xmm10, xmm10, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
vpshufb xmm11, xmm11, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
vpshufb xmm12, xmm12, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
vpshufb xmm13, xmm13, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
vpshufb xmm14, xmm14, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
vpshufb xmm15, xmm15, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
vmovdqa xmm7, OWORD PTR [rax]
vmovdqu OWORD PTR [r15], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+16]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+32]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+48]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+64]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+80]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+96]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+112]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+128]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+144]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r8d, 11
vmovdqa xmm7, OWORD PTR [rax+160]
jl L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r8d, 13
vmovdqa xmm7, OWORD PTR [rax+192]
jl L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done:
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vmovdqu xmm0, OWORD PTR [r11]
vmovdqu xmm1, OWORD PTR [r11+16]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vmovdqu OWORD PTR [r10], xmm8
vmovdqu OWORD PTR [r10+16], xmm9
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [r11+32]
vmovdqu xmm1, OWORD PTR [r11+48]
vpxor xmm10, xmm10, xmm0
vpxor xmm11, xmm11, xmm1
vmovdqu OWORD PTR [r10+32], xmm10
vmovdqu OWORD PTR [r10+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vmovdqu xmm0, OWORD PTR [r11+64]
vmovdqu xmm1, OWORD PTR [r11+80]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vmovdqu OWORD PTR [r10+64], xmm12
vmovdqu OWORD PTR [r10+80], xmm13
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [r11+96]
vmovdqu xmm1, OWORD PTR [r11+112]
vpxor xmm14, xmm14, xmm0
vpxor xmm15, xmm15, xmm1
vmovdqu OWORD PTR [r10+96], xmm14
vmovdqu OWORD PTR [r10+112], xmm15
cmp r13d, 128
mov edi, 128
jle L_AES_GCM_encrypt_update_avx1_end_128
; More 128 bytes of input
L_AES_GCM_encrypt_update_avx1_ghash_128:
lea rcx, QWORD PTR [r11+rdi]
lea rdx, QWORD PTR [r10+rdi]
vmovdqu xmm0, OWORD PTR [r15]
vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpshufb xmm8, xmm0, xmm1
vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
vpshufb xmm9, xmm9, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
vpshufb xmm10, xmm10, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
vpshufb xmm11, xmm11, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
vpshufb xmm12, xmm12, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
vpshufb xmm13, xmm13, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
vpshufb xmm14, xmm14, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
vpshufb xmm15, xmm15, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
vmovdqa xmm7, OWORD PTR [rax]
vmovdqu OWORD PTR [r15], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsp+112]
vmovdqu xmm0, OWORD PTR [rdx+-128]
vaesenc xmm8, xmm8, [rax+16]
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm0, xmm0, xmm2
vpshufd xmm1, xmm7, 78
vpshufd xmm5, xmm0, 78
vpxor xmm1, xmm1, xmm7
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm3, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+16]
vaesenc xmm10, xmm10, [rax+16]
vpclmulqdq xmm2, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+16]
vaesenc xmm12, xmm12, [rax+16]
vpclmulqdq xmm1, xmm1, xmm5, 0
vaesenc xmm13, xmm13, [rax+16]
vaesenc xmm14, xmm14, [rax+16]
vaesenc xmm15, xmm15, [rax+16]
vpxor xmm1, xmm1, xmm2
vpxor xmm1, xmm1, xmm3
vmovdqu xmm7, OWORD PTR [rsp+96]
vmovdqu xmm0, OWORD PTR [rdx+-112]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+32]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+32]
vaesenc xmm10, xmm10, [rax+32]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+32]
vaesenc xmm12, xmm12, [rax+32]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+32]
vaesenc xmm14, xmm14, [rax+32]
vaesenc xmm15, xmm15, [rax+32]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+80]
vmovdqu xmm0, OWORD PTR [rdx+-96]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+48]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+48]
vaesenc xmm10, xmm10, [rax+48]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+48]
vaesenc xmm12, xmm12, [rax+48]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+48]
vaesenc xmm14, xmm14, [rax+48]
vaesenc xmm15, xmm15, [rax+48]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+64]
vmovdqu xmm0, OWORD PTR [rdx+-80]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+64]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+64]
vaesenc xmm10, xmm10, [rax+64]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+64]
vaesenc xmm12, xmm12, [rax+64]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+64]
vaesenc xmm14, xmm14, [rax+64]
vaesenc xmm15, xmm15, [rax+64]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+48]
vmovdqu xmm0, OWORD PTR [rdx+-64]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+80]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+80]
vaesenc xmm10, xmm10, [rax+80]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+80]
vaesenc xmm12, xmm12, [rax+80]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+80]
vaesenc xmm14, xmm14, [rax+80]
vaesenc xmm15, xmm15, [rax+80]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+32]
vmovdqu xmm0, OWORD PTR [rdx+-48]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+96]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+96]
vaesenc xmm10, xmm10, [rax+96]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+96]
vaesenc xmm12, xmm12, [rax+96]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+96]
vaesenc xmm14, xmm14, [rax+96]
vaesenc xmm15, xmm15, [rax+96]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm0, OWORD PTR [rdx+-32]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+112]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+112]
vaesenc xmm10, xmm10, [rax+112]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+112]
vaesenc xmm12, xmm12, [rax+112]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+112]
vaesenc xmm14, xmm14, [rax+112]
vaesenc xmm15, xmm15, [rax+112]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp]
vmovdqu xmm0, OWORD PTR [rdx+-16]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+128]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+128]
vaesenc xmm10, xmm10, [rax+128]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+128]
vaesenc xmm12, xmm12, [rax+128]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+128]
vaesenc xmm14, xmm14, [rax+128]
vaesenc xmm15, xmm15, [rax+128]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vpslldq xmm5, xmm1, 8
vpsrldq xmm1, xmm1, 8
vaesenc xmm8, xmm8, [rax+144]
vpxor xmm2, xmm2, xmm5
vpxor xmm3, xmm3, xmm1
vaesenc xmm9, xmm9, [rax+144]
vpslld xmm7, xmm2, 31
vpslld xmm4, xmm2, 30
vpslld xmm5, xmm2, 25
vaesenc xmm10, xmm10, [rax+144]
vpxor xmm7, xmm7, xmm4
vpxor xmm7, xmm7, xmm5
vaesenc xmm11, xmm11, [rax+144]
vpsrldq xmm4, xmm7, 4
vpslldq xmm7, xmm7, 12
vaesenc xmm12, xmm12, [rax+144]
vpxor xmm2, xmm2, xmm7
vpsrld xmm5, xmm2, 1
vaesenc xmm13, xmm13, [rax+144]
vpsrld xmm1, xmm2, 2
vpsrld xmm0, xmm2, 7
vaesenc xmm14, xmm14, [rax+144]
vpxor xmm5, xmm5, xmm1
vpxor xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, [rax+144]
vpxor xmm5, xmm5, xmm4
vpxor xmm2, xmm2, xmm5
vpxor xmm2, xmm2, xmm3
cmp r8d, 11
vmovdqa xmm7, OWORD PTR [rax+160]
jl L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r8d, 13
vmovdqa xmm7, OWORD PTR [rax+192]
jl L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done:
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu xmm1, OWORD PTR [rcx+16]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu OWORD PTR [rdx+16], xmm9
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [rcx+32]
vmovdqu xmm1, OWORD PTR [rcx+48]
vpxor xmm10, xmm10, xmm0
vpxor xmm11, xmm11, xmm1
vmovdqu OWORD PTR [rdx+32], xmm10
vmovdqu OWORD PTR [rdx+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vmovdqu xmm0, OWORD PTR [rcx+64]
vmovdqu xmm1, OWORD PTR [rcx+80]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vmovdqu OWORD PTR [rdx+64], xmm12
vmovdqu OWORD PTR [rdx+80], xmm13
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rcx+96]
vmovdqu xmm1, OWORD PTR [rcx+112]
vpxor xmm14, xmm14, xmm0
vpxor xmm15, xmm15, xmm1
vmovdqu OWORD PTR [rdx+96], xmm14
vmovdqu OWORD PTR [rdx+112], xmm15
add edi, 128
cmp edi, r13d
jl L_AES_GCM_encrypt_update_avx1_ghash_128
L_AES_GCM_encrypt_update_avx1_end_128:
vmovdqa xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpshufb xmm8, xmm8, xmm4
vpshufb xmm9, xmm9, xmm4
vpshufb xmm10, xmm10, xmm4
vpshufb xmm11, xmm11, xmm4
vpxor xmm8, xmm8, xmm2
vpshufb xmm12, xmm12, xmm4
vpshufb xmm13, xmm13, xmm4
vpshufb xmm14, xmm14, xmm4
vpshufb xmm15, xmm15, xmm4
vmovdqu xmm7, OWORD PTR [rsp]
vmovdqu xmm5, OWORD PTR [rsp+16]
; ghash_gfmul_avx
vpshufd xmm1, xmm15, 78
vpshufd xmm2, xmm7, 78
vpclmulqdq xmm3, xmm7, xmm15, 17
vpclmulqdq xmm0, xmm7, xmm15, 0
vpxor xmm1, xmm1, xmm15
vpxor xmm2, xmm2, xmm7
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vmovdqa xmm4, xmm0
vmovdqa xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm14, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm14, 17
vpclmulqdq xmm0, xmm5, xmm14, 0
vpxor xmm1, xmm1, xmm14
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
vmovdqu xmm7, OWORD PTR [rsp+32]
vmovdqu xmm5, OWORD PTR [rsp+48]
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm13, 78
vpshufd xmm2, xmm7, 78
vpclmulqdq xmm3, xmm7, xmm13, 17
vpclmulqdq xmm0, xmm7, xmm13, 0
vpxor xmm1, xmm1, xmm13
vpxor xmm2, xmm2, xmm7
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm12, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm12, 17
vpclmulqdq xmm0, xmm5, xmm12, 0
vpxor xmm1, xmm1, xmm12
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
vmovdqu xmm7, OWORD PTR [rsp+64]
vmovdqu xmm5, OWORD PTR [rsp+80]
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm11, 78
vpshufd xmm2, xmm7, 78
vpclmulqdq xmm3, xmm7, xmm11, 17
vpclmulqdq xmm0, xmm7, xmm11, 0
vpxor xmm1, xmm1, xmm11
vpxor xmm2, xmm2, xmm7
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm10, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm10, 17
vpclmulqdq xmm0, xmm5, xmm10, 0
vpxor xmm1, xmm1, xmm10
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
vmovdqu xmm7, OWORD PTR [rsp+96]
vmovdqu xmm5, OWORD PTR [rsp+112]
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm9, 78
vpshufd xmm2, xmm7, 78
vpclmulqdq xmm3, xmm7, xmm9, 17
vpclmulqdq xmm0, xmm7, xmm9, 0
vpxor xmm1, xmm1, xmm9
vpxor xmm2, xmm2, xmm7
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
; ghash_gfmul_xor_avx
vpshufd xmm1, xmm8, 78
vpshufd xmm2, xmm5, 78
vpclmulqdq xmm3, xmm5, xmm8, 17
vpclmulqdq xmm0, xmm5, xmm8, 0
vpxor xmm1, xmm1, xmm8
vpxor xmm2, xmm2, xmm5
vpclmulqdq xmm1, xmm1, xmm2, 0
vpxor xmm1, xmm1, xmm0
vpxor xmm1, xmm1, xmm3
vpxor xmm4, xmm4, xmm0
vpxor xmm6, xmm6, xmm3
vpslldq xmm2, xmm1, 8
vpsrldq xmm1, xmm1, 8
vpxor xmm4, xmm4, xmm2
vpxor xmm6, xmm6, xmm1
vpslld xmm0, xmm4, 31
vpslld xmm1, xmm4, 30
vpslld xmm2, xmm4, 25
vpxor xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm2
vmovdqa xmm1, xmm0
vpsrldq xmm1, xmm1, 4
vpslldq xmm0, xmm0, 12
vpxor xmm4, xmm4, xmm0
vpsrld xmm2, xmm4, 1
vpsrld xmm3, xmm4, 2
vpsrld xmm0, xmm4, 7
vpxor xmm2, xmm2, xmm3
vpxor xmm2, xmm2, xmm0
vpxor xmm2, xmm2, xmm1
vpxor xmm2, xmm2, xmm4
vpxor xmm6, xmm6, xmm2
vmovdqu xmm5, OWORD PTR [rsp]
L_AES_GCM_encrypt_update_avx1_done_128:
mov edx, r9d
cmp edi, edx
jge L_AES_GCM_encrypt_update_avx1_done_enc
mov r13d, r9d
and r13d, 4294967280
cmp edi, r13d
jge L_AES_GCM_encrypt_update_avx1_last_block_done
vmovdqu xmm9, OWORD PTR [r15]
vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
vmovdqu OWORD PTR [r15], xmm9
vpxor xmm8, xmm8, [rax]
vaesenc xmm8, xmm8, [rax+16]
vaesenc xmm8, xmm8, [rax+32]
vaesenc xmm8, xmm8, [rax+48]
vaesenc xmm8, xmm8, [rax+64]
vaesenc xmm8, xmm8, [rax+80]
vaesenc xmm8, xmm8, [rax+96]
vaesenc xmm8, xmm8, [rax+112]
vaesenc xmm8, xmm8, [rax+128]
vaesenc xmm8, xmm8, [rax+144]
cmp r8d, 11
vmovdqa xmm9, OWORD PTR [rax+160]
jl L_AES_GCM_encrypt_update_avx1_aesenc_block_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [rax+176]
cmp r8d, 13
vmovdqa xmm9, OWORD PTR [rax+192]
jl L_AES_GCM_encrypt_update_avx1_aesenc_block_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [rax+208]
vmovdqa xmm9, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_avx1_aesenc_block_last:
vaesenclast xmm8, xmm8, xmm9
vmovdqu xmm9, OWORD PTR [r11+rdi]
vpxor xmm8, xmm8, xmm9
vmovdqu OWORD PTR [r10+rdi], xmm8
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm8
add edi, 16
cmp edi, r13d
jge L_AES_GCM_encrypt_update_avx1_last_block_ghash
L_AES_GCM_encrypt_update_avx1_last_block_start:
vmovdqu xmm13, OWORD PTR [r11+rdi]
vmovdqu xmm9, OWORD PTR [r15]
vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
vmovdqu OWORD PTR [r15], xmm9
vpxor xmm8, xmm8, [rax]
vpclmulqdq xmm10, xmm6, xmm5, 16
vaesenc xmm8, xmm8, [rax+16]
vaesenc xmm8, xmm8, [rax+32]
vpclmulqdq xmm11, xmm6, xmm5, 1
vaesenc xmm8, xmm8, [rax+48]
vaesenc xmm8, xmm8, [rax+64]
vpclmulqdq xmm12, xmm6, xmm5, 0
vaesenc xmm8, xmm8, [rax+80]
vpclmulqdq xmm1, xmm6, xmm5, 17
vaesenc xmm8, xmm8, [rax+96]
vpxor xmm10, xmm10, xmm11
vpslldq xmm2, xmm10, 8
vpsrldq xmm10, xmm10, 8
vaesenc xmm8, xmm8, [rax+112]
vpxor xmm2, xmm2, xmm12
vpxor xmm3, xmm1, xmm10
vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
vpclmulqdq xmm11, xmm2, xmm0, 16
vaesenc xmm8, xmm8, [rax+128]
vpshufd xmm10, xmm2, 78
vpxor xmm10, xmm10, xmm11
vpclmulqdq xmm11, xmm10, xmm0, 16
vaesenc xmm8, xmm8, [rax+144]
vpshufd xmm10, xmm10, 78
vpxor xmm10, xmm10, xmm11
vpxor xmm6, xmm10, xmm3
cmp r8d, 11
vmovdqa xmm9, OWORD PTR [rax+160]
jl L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [rax+176]
cmp r8d, 13
vmovdqa xmm9, OWORD PTR [rax+192]
jl L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [rax+208]
vmovdqa xmm9, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last:
vaesenclast xmm8, xmm8, xmm9
vmovdqa xmm0, xmm13
vpxor xmm8, xmm8, xmm0
vmovdqu OWORD PTR [r10+rdi], xmm8
vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
add edi, 16
vpxor xmm6, xmm6, xmm8
cmp edi, r13d
jl L_AES_GCM_encrypt_update_avx1_last_block_start
L_AES_GCM_encrypt_update_avx1_last_block_ghash:
; ghash_gfmul_red_avx
vpshufd xmm9, xmm5, 78
vpshufd xmm10, xmm6, 78
vpclmulqdq xmm11, xmm6, xmm5, 17
vpclmulqdq xmm8, xmm6, xmm5, 0
vpxor xmm9, xmm9, xmm5
vpxor xmm10, xmm10, xmm6
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm6, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm6, xmm6, xmm14
L_AES_GCM_encrypt_update_avx1_last_block_done:
L_AES_GCM_encrypt_update_avx1_done_enc:
vmovdqa OWORD PTR [r12], xmm6
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+160]
vmovdqu xmm7, OWORD PTR [rsp+176]
vmovdqu xmm8, OWORD PTR [rsp+192]
vmovdqu xmm9, OWORD PTR [rsp+208]
vmovdqu xmm10, OWORD PTR [rsp+224]
vmovdqu xmm11, OWORD PTR [rsp+240]
vmovdqu xmm12, OWORD PTR [rsp+256]
vmovdqu xmm13, OWORD PTR [rsp+272]
vmovdqu xmm14, OWORD PTR [rsp+288]
vmovdqu xmm15, OWORD PTR [rsp+304]
add rsp, 320
pop rdi
pop r15
pop r14
pop r12
pop r13
ret
AES_GCM_encrypt_update_avx1 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_final_avx1 PROC
push r13
push r12
push r14
mov rax, rcx
mov r10d, r9d
mov r9, rdx
mov r11d, DWORD PTR [rsp+64]
mov r12, QWORD PTR [rsp+72]
mov r14, QWORD PTR [rsp+80]
sub rsp, 144
vmovdqu OWORD PTR [rsp+16], xmm6
vmovdqu OWORD PTR [rsp+32], xmm7
vmovdqu OWORD PTR [rsp+48], xmm8
vmovdqu OWORD PTR [rsp+64], xmm9
vmovdqu OWORD PTR [rsp+80], xmm10
vmovdqu OWORD PTR [rsp+96], xmm11
vmovdqu OWORD PTR [rsp+112], xmm12
vmovdqu OWORD PTR [rsp+128], xmm13
vmovdqa xmm4, OWORD PTR [rax]
vmovdqa xmm5, OWORD PTR [r12]
vmovdqa xmm6, OWORD PTR [r14]
vpsrlq xmm8, xmm5, 63
vpsllq xmm7, xmm5, 1
vpslldq xmm8, xmm8, 8
vpor xmm7, xmm7, xmm8
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
vpxor xmm5, xmm5, xmm7
mov edx, r10d
mov ecx, r11d
shl rdx, 3
shl rcx, 3
vmovq xmm0, rdx
vmovq xmm1, rcx
vpunpcklqdq xmm0, xmm0, xmm1
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_red_avx
vpshufd xmm8, xmm5, 78
vpshufd xmm9, xmm4, 78
vpclmulqdq xmm10, xmm4, xmm5, 17
vpclmulqdq xmm7, xmm4, xmm5, 0
vpxor xmm8, xmm8, xmm5
vpxor xmm9, xmm9, xmm4
vpclmulqdq xmm8, xmm8, xmm9, 0
vpxor xmm8, xmm8, xmm7
vpxor xmm8, xmm8, xmm10
vpslldq xmm9, xmm8, 8
vpsrldq xmm8, xmm8, 8
vpxor xmm7, xmm7, xmm9
vpxor xmm4, xmm10, xmm8
vpslld xmm11, xmm7, 31
vpslld xmm12, xmm7, 30
vpslld xmm13, xmm7, 25
vpxor xmm11, xmm11, xmm12
vpxor xmm11, xmm11, xmm13
vpsrldq xmm12, xmm11, 4
vpslldq xmm11, xmm11, 12
vpxor xmm7, xmm7, xmm11
vpsrld xmm13, xmm7, 1
vpsrld xmm9, xmm7, 2
vpsrld xmm8, xmm7, 7
vpxor xmm13, xmm13, xmm9
vpxor xmm13, xmm13, xmm8
vpxor xmm13, xmm13, xmm12
vpxor xmm13, xmm13, xmm7
vpxor xmm4, xmm4, xmm13
vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm0, xmm4, xmm6
cmp r8d, 16
je L_AES_GCM_encrypt_final_avx1_store_tag_16
xor rcx, rcx
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_encrypt_final_avx1_store_tag_loop:
movzx r13d, BYTE PTR [rsp+rcx]
mov BYTE PTR [r9+rcx], r13b
inc ecx
cmp ecx, r8d
jne L_AES_GCM_encrypt_final_avx1_store_tag_loop
jmp L_AES_GCM_encrypt_final_avx1_store_tag_done
L_AES_GCM_encrypt_final_avx1_store_tag_16:
vmovdqu OWORD PTR [r9], xmm0
L_AES_GCM_encrypt_final_avx1_store_tag_done:
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+16]
vmovdqu xmm7, OWORD PTR [rsp+32]
vmovdqu xmm8, OWORD PTR [rsp+48]
vmovdqu xmm9, OWORD PTR [rsp+64]
vmovdqu xmm10, OWORD PTR [rsp+80]
vmovdqu xmm11, OWORD PTR [rsp+96]
vmovdqu xmm12, OWORD PTR [rsp+112]
vmovdqu xmm13, OWORD PTR [rsp+128]
add rsp, 144
pop r14
pop r12
pop r13
ret
AES_GCM_encrypt_final_avx1 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_decrypt_update_avx1 PROC
push r13
push r12
push r14
push r15
push rdi
mov rax, rcx
mov r10, r8
mov r8d, edx
mov r11, r9
mov r9d, DWORD PTR [rsp+80]
mov r12, QWORD PTR [rsp+88]
mov r14, QWORD PTR [rsp+96]
mov r15, QWORD PTR [rsp+104]
sub rsp, 328
vmovdqu OWORD PTR [rsp+168], xmm6
vmovdqu OWORD PTR [rsp+184], xmm7
vmovdqu OWORD PTR [rsp+200], xmm8
vmovdqu OWORD PTR [rsp+216], xmm9
vmovdqu OWORD PTR [rsp+232], xmm10
vmovdqu OWORD PTR [rsp+248], xmm11
vmovdqu OWORD PTR [rsp+264], xmm12
vmovdqu OWORD PTR [rsp+280], xmm13
vmovdqu OWORD PTR [rsp+296], xmm14
vmovdqu OWORD PTR [rsp+312], xmm15
vmovdqa xmm6, OWORD PTR [r12]
vmovdqa xmm5, OWORD PTR [r14]
vpsrlq xmm9, xmm5, 63
vpsllq xmm8, xmm5, 1
vpslldq xmm9, xmm9, 8
vpor xmm8, xmm8, xmm9
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
vpxor xmm5, xmm5, xmm8
xor edi, edi
cmp r9d, 128
mov r13d, r9d
jl L_AES_GCM_decrypt_update_avx1_done_128
and r13d, 4294967168
vmovdqa xmm2, xmm6
; H ^ 1
vmovdqu OWORD PTR [rsp], xmm5
; H ^ 2
vpclmulqdq xmm8, xmm5, xmm5, 0
vpclmulqdq xmm0, xmm5, xmm5, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm0, xmm0, xmm14
vmovdqu OWORD PTR [rsp+16], xmm0
; H ^ 3
; ghash_gfmul_red_avx
vpshufd xmm9, xmm5, 78
vpshufd xmm10, xmm0, 78
vpclmulqdq xmm11, xmm0, xmm5, 17
vpclmulqdq xmm8, xmm0, xmm5, 0
vpxor xmm9, xmm9, xmm5
vpxor xmm10, xmm10, xmm0
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm1, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm1, xmm1, xmm14
vmovdqu OWORD PTR [rsp+32], xmm1
; H ^ 4
vpclmulqdq xmm8, xmm0, xmm0, 0
vpclmulqdq xmm3, xmm0, xmm0, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm3, xmm3, xmm14
vmovdqu OWORD PTR [rsp+48], xmm3
; H ^ 5
; ghash_gfmul_red_avx
vpshufd xmm9, xmm0, 78
vpshufd xmm10, xmm1, 78
vpclmulqdq xmm11, xmm1, xmm0, 17
vpclmulqdq xmm8, xmm1, xmm0, 0
vpxor xmm9, xmm9, xmm0
vpxor xmm10, xmm10, xmm1
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm7, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+64], xmm7
; H ^ 6
vpclmulqdq xmm8, xmm1, xmm1, 0
vpclmulqdq xmm7, xmm1, xmm1, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+80], xmm7
; H ^ 7
; ghash_gfmul_red_avx
vpshufd xmm9, xmm1, 78
vpshufd xmm10, xmm3, 78
vpclmulqdq xmm11, xmm3, xmm1, 17
vpclmulqdq xmm8, xmm3, xmm1, 0
vpxor xmm9, xmm9, xmm1
vpxor xmm10, xmm10, xmm3
vpclmulqdq xmm9, xmm9, xmm10, 0
vpxor xmm9, xmm9, xmm8
vpxor xmm9, xmm9, xmm11
vpslldq xmm10, xmm9, 8
vpsrldq xmm9, xmm9, 8
vpxor xmm8, xmm8, xmm10
vpxor xmm7, xmm11, xmm9
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+96], xmm7
; H ^ 8
vpclmulqdq xmm8, xmm3, xmm3, 0
vpclmulqdq xmm7, xmm3, xmm3, 17
vpslld xmm12, xmm8, 31
vpslld xmm13, xmm8, 30
vpslld xmm14, xmm8, 25
vpxor xmm12, xmm12, xmm13
vpxor xmm12, xmm12, xmm14
vpsrldq xmm13, xmm12, 4
vpslldq xmm12, xmm12, 12
vpxor xmm8, xmm8, xmm12
vpsrld xmm14, xmm8, 1
vpsrld xmm10, xmm8, 2
vpsrld xmm9, xmm8, 7
vpxor xmm14, xmm14, xmm10
vpxor xmm14, xmm14, xmm9
vpxor xmm14, xmm14, xmm13
vpxor xmm14, xmm14, xmm8
vpxor xmm7, xmm7, xmm14
vmovdqu OWORD PTR [rsp+112], xmm7
L_AES_GCM_decrypt_update_avx1_ghash_128:
lea rcx, QWORD PTR [r11+rdi]
lea rdx, QWORD PTR [r10+rdi]
vmovdqu xmm0, OWORD PTR [r15]
vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpshufb xmm8, xmm0, xmm1
vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
vpshufb xmm9, xmm9, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
vpshufb xmm10, xmm10, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
vpshufb xmm11, xmm11, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
vpshufb xmm12, xmm12, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
vpshufb xmm13, xmm13, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
vpshufb xmm14, xmm14, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
vpshufb xmm15, xmm15, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
vmovdqa xmm7, OWORD PTR [rax]
vmovdqu OWORD PTR [r15], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsp+112]
vmovdqu xmm0, OWORD PTR [rcx]
vaesenc xmm8, xmm8, [rax+16]
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm0, xmm0, xmm2
vpshufd xmm1, xmm7, 78
vpshufd xmm5, xmm0, 78
vpxor xmm1, xmm1, xmm7
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm3, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+16]
vaesenc xmm10, xmm10, [rax+16]
vpclmulqdq xmm2, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+16]
vaesenc xmm12, xmm12, [rax+16]
vpclmulqdq xmm1, xmm1, xmm5, 0
vaesenc xmm13, xmm13, [rax+16]
vaesenc xmm14, xmm14, [rax+16]
vaesenc xmm15, xmm15, [rax+16]
vpxor xmm1, xmm1, xmm2
vpxor xmm1, xmm1, xmm3
vmovdqu xmm7, OWORD PTR [rsp+96]
vmovdqu xmm0, OWORD PTR [rcx+16]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+32]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+32]
vaesenc xmm10, xmm10, [rax+32]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+32]
vaesenc xmm12, xmm12, [rax+32]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+32]
vaesenc xmm14, xmm14, [rax+32]
vaesenc xmm15, xmm15, [rax+32]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+80]
vmovdqu xmm0, OWORD PTR [rcx+32]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+48]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+48]
vaesenc xmm10, xmm10, [rax+48]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+48]
vaesenc xmm12, xmm12, [rax+48]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+48]
vaesenc xmm14, xmm14, [rax+48]
vaesenc xmm15, xmm15, [rax+48]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+64]
vmovdqu xmm0, OWORD PTR [rcx+48]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+64]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+64]
vaesenc xmm10, xmm10, [rax+64]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+64]
vaesenc xmm12, xmm12, [rax+64]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+64]
vaesenc xmm14, xmm14, [rax+64]
vaesenc xmm15, xmm15, [rax+64]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+48]
vmovdqu xmm0, OWORD PTR [rcx+64]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+80]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+80]
vaesenc xmm10, xmm10, [rax+80]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+80]
vaesenc xmm12, xmm12, [rax+80]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+80]
vaesenc xmm14, xmm14, [rax+80]
vaesenc xmm15, xmm15, [rax+80]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+32]
vmovdqu xmm0, OWORD PTR [rcx+80]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+96]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+96]
vaesenc xmm10, xmm10, [rax+96]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+96]
vaesenc xmm12, xmm12, [rax+96]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+96]
vaesenc xmm14, xmm14, [rax+96]
vaesenc xmm15, xmm15, [rax+96]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm0, OWORD PTR [rcx+96]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+112]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+112]
vaesenc xmm10, xmm10, [rax+112]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+112]
vaesenc xmm12, xmm12, [rax+112]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+112]
vaesenc xmm14, xmm14, [rax+112]
vaesenc xmm15, xmm15, [rax+112]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vmovdqu xmm7, OWORD PTR [rsp]
vmovdqu xmm0, OWORD PTR [rcx+112]
vpshufd xmm4, xmm7, 78
vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
vaesenc xmm8, xmm8, [rax+128]
vpxor xmm4, xmm4, xmm7
vpshufd xmm5, xmm0, 78
vpxor xmm5, xmm5, xmm0
vpclmulqdq xmm6, xmm0, xmm7, 17
vaesenc xmm9, xmm9, [rax+128]
vaesenc xmm10, xmm10, [rax+128]
vpclmulqdq xmm7, xmm0, xmm7, 0
vaesenc xmm11, xmm11, [rax+128]
vaesenc xmm12, xmm12, [rax+128]
vpclmulqdq xmm4, xmm4, xmm5, 0
vaesenc xmm13, xmm13, [rax+128]
vaesenc xmm14, xmm14, [rax+128]
vaesenc xmm15, xmm15, [rax+128]
vpxor xmm1, xmm1, xmm7
vpxor xmm2, xmm2, xmm7
vpxor xmm1, xmm1, xmm6
vpxor xmm3, xmm3, xmm6
vpxor xmm1, xmm1, xmm4
vpslldq xmm5, xmm1, 8
vpsrldq xmm1, xmm1, 8
vaesenc xmm8, xmm8, [rax+144]
vpxor xmm2, xmm2, xmm5
vpxor xmm3, xmm3, xmm1
vaesenc xmm9, xmm9, [rax+144]
vpslld xmm7, xmm2, 31
vpslld xmm4, xmm2, 30
vpslld xmm5, xmm2, 25
vaesenc xmm10, xmm10, [rax+144]
vpxor xmm7, xmm7, xmm4
vpxor xmm7, xmm7, xmm5
vaesenc xmm11, xmm11, [rax+144]
vpsrldq xmm4, xmm7, 4
vpslldq xmm7, xmm7, 12
vaesenc xmm12, xmm12, [rax+144]
vpxor xmm2, xmm2, xmm7
vpsrld xmm5, xmm2, 1
vaesenc xmm13, xmm13, [rax+144]
vpsrld xmm1, xmm2, 2
vpsrld xmm0, xmm2, 7
vaesenc xmm14, xmm14, [rax+144]
vpxor xmm5, xmm5, xmm1
vpxor xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, [rax+144]
vpxor xmm5, xmm5, xmm4
vpxor xmm2, xmm2, xmm5
vpxor xmm2, xmm2, xmm3
cmp r8d, 11
vmovdqa xmm7, OWORD PTR [rax+160]
jl L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r8d, 13
vmovdqa xmm7, OWORD PTR [rax+192]
jl L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqa xmm7, OWORD PTR [rax+224]
L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done:
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu xmm1, OWORD PTR [rcx+16]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu OWORD PTR [rdx+16], xmm9
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [rcx+32]
vmovdqu xmm1, OWORD PTR [rcx+48]
vpxor xmm10, xmm10, xmm0
vpxor xmm11, xmm11, xmm1
vmovdqu OWORD PTR [rdx+32], xmm10
vmovdqu OWORD PTR [rdx+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vmovdqu xmm0, OWORD PTR [rcx+64]
vmovdqu xmm1, OWORD PTR [rcx+80]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vmovdqu OWORD PTR [rdx+64], xmm12
vmovdqu OWORD PTR [rdx+80], xmm13
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rcx+96]
vmovdqu xmm1, OWORD PTR [rcx+112]
vpxor xmm14, xmm14, xmm0
vpxor xmm15, xmm15, xmm1
vmovdqu OWORD PTR [rdx+96], xmm14
vmovdqu OWORD PTR [rdx+112], xmm15
add edi, 128
cmp edi, r13d
jl L_AES_GCM_decrypt_update_avx1_ghash_128
vmovdqa xmm6, xmm2
vmovdqu xmm5, OWORD PTR [rsp]
L_AES_GCM_decrypt_update_avx1_done_128:
mov edx, r9d
cmp edi, edx
jge L_AES_GCM_decrypt_update_avx1_done_dec
mov r13d, r9d
and r13d, 4294967280
cmp edi, r13d
jge L_AES_GCM_decrypt_update_avx1_last_block_done
L_AES_GCM_decrypt_update_avx1_last_block_start:
vmovdqu xmm13, OWORD PTR [r11+rdi]
vmovdqa xmm0, xmm5
vpshufb xmm1, xmm13, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm1, xmm1, xmm6
vmovdqu xmm9, OWORD PTR [r15]
vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
vmovdqu OWORD PTR [r15], xmm9
vpxor xmm8, xmm8, [rax]
vpclmulqdq xmm10, xmm1, xmm0, 16
vaesenc xmm8, xmm8, [rax+16]
vaesenc xmm8, xmm8, [rax+32]
vpclmulqdq xmm11, xmm1, xmm0, 1
vaesenc xmm8, xmm8, [rax+48]
vaesenc xmm8, xmm8, [rax+64]
vpclmulqdq xmm12, xmm1, xmm0, 0
vaesenc xmm8, xmm8, [rax+80]
vpclmulqdq xmm1, xmm1, xmm0, 17
vaesenc xmm8, xmm8, [rax+96]
vpxor xmm10, xmm10, xmm11
vpslldq xmm2, xmm10, 8
vpsrldq xmm10, xmm10, 8
vaesenc xmm8, xmm8, [rax+112]
vpxor xmm2, xmm2, xmm12
vpxor xmm3, xmm1, xmm10
vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
vpclmulqdq xmm11, xmm2, xmm0, 16
vaesenc xmm8, xmm8, [rax+128]
vpshufd xmm10, xmm2, 78
vpxor xmm10, xmm10, xmm11
vpclmulqdq xmm11, xmm10, xmm0, 16
vaesenc xmm8, xmm8, [rax+144]
vpshufd xmm10, xmm10, 78
vpxor xmm10, xmm10, xmm11
vpxor xmm6, xmm10, xmm3
cmp r8d, 11
vmovdqa xmm9, OWORD PTR [rax+160]
jl L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [rax+176]
cmp r8d, 13
vmovdqa xmm9, OWORD PTR [rax+192]
jl L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last
vaesenc xmm8, xmm8, xmm9
vaesenc xmm8, xmm8, [rax+208]
vmovdqa xmm9, OWORD PTR [rax+224]
L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last:
vaesenclast xmm8, xmm8, xmm9
vmovdqa xmm0, xmm13
vpxor xmm8, xmm8, xmm0
vmovdqu OWORD PTR [r10+rdi], xmm8
add edi, 16
cmp edi, r13d
jl L_AES_GCM_decrypt_update_avx1_last_block_start
L_AES_GCM_decrypt_update_avx1_last_block_done:
L_AES_GCM_decrypt_update_avx1_done_dec:
vmovdqa OWORD PTR [r12], xmm6
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+168]
vmovdqu xmm7, OWORD PTR [rsp+184]
vmovdqu xmm8, OWORD PTR [rsp+200]
vmovdqu xmm9, OWORD PTR [rsp+216]
vmovdqu xmm10, OWORD PTR [rsp+232]
vmovdqu xmm11, OWORD PTR [rsp+248]
vmovdqu xmm12, OWORD PTR [rsp+264]
vmovdqu xmm13, OWORD PTR [rsp+280]
vmovdqu xmm14, OWORD PTR [rsp+296]
vmovdqu xmm15, OWORD PTR [rsp+312]
add rsp, 328
pop rdi
pop r15
pop r14
pop r12
pop r13
ret
AES_GCM_decrypt_update_avx1 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_decrypt_final_avx1 PROC
push r13
push r12
push r14
push rbp
push r15
mov rax, rcx
mov r10d, r9d
mov r9, rdx
mov r11d, DWORD PTR [rsp+80]
mov r12, QWORD PTR [rsp+88]
mov r14, QWORD PTR [rsp+96]
mov rbp, QWORD PTR [rsp+104]
sub rsp, 160
vmovdqu OWORD PTR [rsp+16], xmm6
vmovdqu OWORD PTR [rsp+32], xmm7
vmovdqu OWORD PTR [rsp+48], xmm8
vmovdqu OWORD PTR [rsp+64], xmm9
vmovdqu OWORD PTR [rsp+80], xmm10
vmovdqu OWORD PTR [rsp+96], xmm11
vmovdqu OWORD PTR [rsp+112], xmm12
vmovdqu OWORD PTR [rsp+128], xmm13
vmovdqu OWORD PTR [rsp+144], xmm15
vmovdqa xmm6, OWORD PTR [rax]
vmovdqa xmm5, OWORD PTR [r12]
vmovdqa xmm15, OWORD PTR [r14]
vpsrlq xmm8, xmm5, 63
vpsllq xmm7, xmm5, 1
vpslldq xmm8, xmm8, 8
vpor xmm7, xmm7, xmm8
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
vpxor xmm5, xmm5, xmm7
mov edx, r10d
mov ecx, r11d
shl rdx, 3
shl rcx, 3
vmovq xmm0, rdx
vmovq xmm1, rcx
vpunpcklqdq xmm0, xmm0, xmm1
vpxor xmm6, xmm6, xmm0
; ghash_gfmul_red_avx
vpshufd xmm8, xmm5, 78
vpshufd xmm9, xmm6, 78
vpclmulqdq xmm10, xmm6, xmm5, 17
vpclmulqdq xmm7, xmm6, xmm5, 0
vpxor xmm8, xmm8, xmm5
vpxor xmm9, xmm9, xmm6
vpclmulqdq xmm8, xmm8, xmm9, 0
vpxor xmm8, xmm8, xmm7
vpxor xmm8, xmm8, xmm10
vpslldq xmm9, xmm8, 8
vpsrldq xmm8, xmm8, 8
vpxor xmm7, xmm7, xmm9
vpxor xmm6, xmm10, xmm8
vpslld xmm11, xmm7, 31
vpslld xmm12, xmm7, 30
vpslld xmm13, xmm7, 25
vpxor xmm11, xmm11, xmm12
vpxor xmm11, xmm11, xmm13
vpsrldq xmm12, xmm11, 4
vpslldq xmm11, xmm11, 12
vpxor xmm7, xmm7, xmm11
vpsrld xmm13, xmm7, 1
vpsrld xmm9, xmm7, 2
vpsrld xmm8, xmm7, 7
vpxor xmm13, xmm13, xmm9
vpxor xmm13, xmm13, xmm8
vpxor xmm13, xmm13, xmm12
vpxor xmm13, xmm13, xmm7
vpxor xmm6, xmm6, xmm13
vpshufb xmm6, xmm6, OWORD PTR L_avx1_aes_gcm_bswap_mask
vpxor xmm0, xmm6, xmm15
cmp r8d, 16
je L_AES_GCM_decrypt_final_avx1_cmp_tag_16
sub rsp, 16
xor rcx, rcx
xor r15, r15
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_decrypt_final_avx1_cmp_tag_loop:
movzx r13d, BYTE PTR [rsp+rcx]
xor r13b, BYTE PTR [r9+rcx]
or r15b, r13b
inc ecx
cmp ecx, r8d
jne L_AES_GCM_decrypt_final_avx1_cmp_tag_loop
cmp r15b, 0
sete r15b
add rsp, 16
xor rcx, rcx
jmp L_AES_GCM_decrypt_final_avx1_cmp_tag_done
L_AES_GCM_decrypt_final_avx1_cmp_tag_16:
vmovdqu xmm1, OWORD PTR [r9]
vpcmpeqb xmm0, xmm0, xmm1
vpmovmskb rdx, xmm0
; %%edx == 0xFFFF then return 1 else => return 0
xor r15d, r15d
cmp edx, 65535
sete r15b
L_AES_GCM_decrypt_final_avx1_cmp_tag_done:
mov DWORD PTR [rbp], r15d
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+16]
vmovdqu xmm7, OWORD PTR [rsp+32]
vmovdqu xmm8, OWORD PTR [rsp+48]
vmovdqu xmm9, OWORD PTR [rsp+64]
vmovdqu xmm10, OWORD PTR [rsp+80]
vmovdqu xmm11, OWORD PTR [rsp+96]
vmovdqu xmm12, OWORD PTR [rsp+112]
vmovdqu xmm13, OWORD PTR [rsp+128]
vmovdqu xmm15, OWORD PTR [rsp+144]
add rsp, 160
pop r15
pop rbp
pop r14
pop r12
pop r13
ret
AES_GCM_decrypt_final_avx1 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx2_rev8 QWORD 579005069656919567, 283686952306183
ptr_L_GCM_generate_m0_avx2_rev8 QWORD L_GCM_generate_m0_avx2_rev8
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx2_mod2_128 QWORD 0, 16212958658533785600
ptr_L_GCM_generate_m0_avx2_mod2_128 QWORD L_GCM_generate_m0_avx2_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
GCM_generate_m0_avx2 PROC
sub rsp, 80
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu xmm9, OWORD PTR L_GCM_generate_m0_avx2_rev8
vmovdqu xmm10, OWORD PTR L_GCM_generate_m0_avx2_mod2_128
vpxor xmm8, xmm8, xmm8
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu xmm8, xmm0
vpshufb xmm0, xmm0, xmm9
vpsllq xmm5, xmm0, 63
vpsrlq xmm4, xmm0, 1
vpslldq xmm1, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm1, xmm1, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm1, xmm1, 31
vpand xmm1, xmm1, xmm10
vpxor xmm1, xmm1, xmm4
vpsllq xmm5, xmm1, 63
vpsrlq xmm4, xmm1, 1
vpslldq xmm2, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm2, xmm2, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm2, xmm2, 31
vpand xmm2, xmm2, xmm10
vpxor xmm2, xmm2, xmm4
vpsllq xmm5, xmm2, 63
vpsrlq xmm4, xmm2, 1
vpslldq xmm3, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm3, xmm3, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm3, xmm3, 31
vpand xmm3, xmm3, xmm10
vpxor xmm3, xmm3, xmm4
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpxor xmm8, xmm3, xmm2
vmovdqu OWORD PTR [rdx+16], xmm3
vmovdqu OWORD PTR [rdx+32], xmm2
vmovdqu OWORD PTR [rdx+48], xmm8
vmovdqu OWORD PTR [rdx+64], xmm1
vpxor xmm4, xmm3, xmm1
vpxor xmm5, xmm2, xmm1
vpxor xmm6, xmm8, xmm1
vmovdqu OWORD PTR [rdx+80], xmm4
vmovdqu OWORD PTR [rdx+96], xmm5
vmovdqu OWORD PTR [rdx+112], xmm6
vmovdqu OWORD PTR [rdx+128], xmm0
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm3, xmm0
vpxor xmm6, xmm2, xmm0
vmovdqu OWORD PTR [rdx+144], xmm4
vmovdqu OWORD PTR [rdx+160], xmm6
vpxor xmm6, xmm3, xmm6
vmovdqu OWORD PTR [rdx+176], xmm6
vmovdqu OWORD PTR [rdx+192], xmm1
vpxor xmm4, xmm3, xmm1
vpxor xmm5, xmm2, xmm1
vpxor xmm6, xmm8, xmm1
vmovdqu OWORD PTR [rdx+208], xmm4
vmovdqu OWORD PTR [rdx+224], xmm5
vmovdqu OWORD PTR [rdx+240], xmm6
vmovdqu xmm0, OWORD PTR [rdx]
vmovdqu xmm1, OWORD PTR [rdx+16]
vmovdqu xmm2, OWORD PTR [rdx+32]
vmovdqu xmm3, OWORD PTR [rdx+48]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+256], xmm0
vmovdqu OWORD PTR [rdx+272], xmm1
vmovdqu OWORD PTR [rdx+288], xmm2
vmovdqu OWORD PTR [rdx+304], xmm3
vmovdqu xmm0, OWORD PTR [rdx+64]
vmovdqu xmm1, OWORD PTR [rdx+80]
vmovdqu xmm2, OWORD PTR [rdx+96]
vmovdqu xmm3, OWORD PTR [rdx+112]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+320], xmm0
vmovdqu OWORD PTR [rdx+336], xmm1
vmovdqu OWORD PTR [rdx+352], xmm2
vmovdqu OWORD PTR [rdx+368], xmm3
vmovdqu xmm0, OWORD PTR [rdx+128]
vmovdqu xmm1, OWORD PTR [rdx+144]
vmovdqu xmm2, OWORD PTR [rdx+160]
vmovdqu xmm3, OWORD PTR [rdx+176]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+384], xmm0
vmovdqu OWORD PTR [rdx+400], xmm1
vmovdqu OWORD PTR [rdx+416], xmm2
vmovdqu OWORD PTR [rdx+432], xmm3
vmovdqu xmm0, OWORD PTR [rdx+192]
vmovdqu xmm1, OWORD PTR [rdx+208]
vmovdqu xmm2, OWORD PTR [rdx+224]
vmovdqu xmm3, OWORD PTR [rdx+240]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+448], xmm0
vmovdqu OWORD PTR [rdx+464], xmm1
vmovdqu OWORD PTR [rdx+480], xmm2
vmovdqu OWORD PTR [rdx+496], xmm3
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
add rsp, 80
ret
GCM_generate_m0_avx2 ENDP
_text ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_one QWORD 0, 1
ptr_L_avx2_aes_gcm_one QWORD L_avx2_aes_gcm_one
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_two QWORD 0, 2
ptr_L_avx2_aes_gcm_two QWORD L_avx2_aes_gcm_two
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_three QWORD 0, 3
ptr_L_avx2_aes_gcm_three QWORD L_avx2_aes_gcm_three
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_four QWORD 0, 4
ptr_L_avx2_aes_gcm_four QWORD L_avx2_aes_gcm_four
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_five QWORD 0, 5
ptr_L_avx2_aes_gcm_five QWORD L_avx2_aes_gcm_five
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_six QWORD 0, 6
ptr_L_avx2_aes_gcm_six QWORD L_avx2_aes_gcm_six
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_seven QWORD 0, 7
ptr_L_avx2_aes_gcm_seven QWORD L_avx2_aes_gcm_seven
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_eight QWORD 0, 8
ptr_L_avx2_aes_gcm_eight QWORD L_avx2_aes_gcm_eight
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_bswap_one QWORD 0, 72057594037927936
ptr_L_avx2_aes_gcm_bswap_one QWORD L_avx2_aes_gcm_bswap_one
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567
ptr_L_avx2_aes_gcm_bswap_epi64 QWORD L_avx2_aes_gcm_bswap_epi64
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183
ptr_L_avx2_aes_gcm_bswap_mask QWORD L_avx2_aes_gcm_bswap_mask
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_mod2_128 QWORD 1, 13979173243358019584
ptr_L_avx2_aes_gcm_mod2_128 QWORD L_avx2_aes_gcm_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_avx2 PROC
push r13
push rdi
push r12
push r15
push rbx
push r14
push rsi
mov rdi, rcx
mov r12, r8
mov rax, r9
mov r15, QWORD PTR [rsp+96]
mov r8, rdx
mov r10d, DWORD PTR [rsp+104]
mov r11d, DWORD PTR [rsp+112]
mov ebx, DWORD PTR [rsp+120]
mov r14d, DWORD PTR [rsp+128]
mov rsi, QWORD PTR [rsp+136]
mov r9d, DWORD PTR [rsp+144]
sub rsp, 320
vmovdqu OWORD PTR [rsp+160], xmm6
vmovdqu OWORD PTR [rsp+176], xmm7
vmovdqu OWORD PTR [rsp+192], xmm8
vmovdqu OWORD PTR [rsp+208], xmm9
vmovdqu OWORD PTR [rsp+224], xmm10
vmovdqu OWORD PTR [rsp+240], xmm11
vmovdqu OWORD PTR [rsp+256], xmm12
vmovdqu OWORD PTR [rsp+272], xmm13
vmovdqu OWORD PTR [rsp+288], xmm14
vmovdqu OWORD PTR [rsp+304], xmm15
vpxor xmm4, xmm4, xmm4
vpxor xmm6, xmm6, xmm6
mov edx, ebx
cmp edx, 12
je L_AES_GCM_encrypt_avx2_iv_12
; Calculate values when IV is not 12 bytes
; H = Encrypt X(=0)
vmovdqu xmm5, OWORD PTR [rsi]
vaesenc xmm5, xmm5, [rsi+16]
vaesenc xmm5, xmm5, [rsi+32]
vaesenc xmm5, xmm5, [rsi+48]
vaesenc xmm5, xmm5, [rsi+64]
vaesenc xmm5, xmm5, [rsi+80]
vaesenc xmm5, xmm5, [rsi+96]
vaesenc xmm5, xmm5, [rsi+112]
vaesenc xmm5, xmm5, [rsi+128]
vaesenc xmm5, xmm5, [rsi+144]
cmp r9d, 11
vmovdqu xmm0, OWORD PTR [rsi+160]
jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm5, xmm5, [rsi+176]
cmp r9d, 13
vmovdqu xmm0, OWORD PTR [rsi+192]
jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm5, xmm5, [rsi+208]
vmovdqu xmm0, OWORD PTR [rsi+224]
L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last:
vaesenclast xmm5, xmm5, xmm0
vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
; Calc counter
; Initialization vector
cmp edx, 0
mov rcx, 0
je L_AES_GCM_encrypt_avx2_calc_iv_done
cmp edx, 16
jl L_AES_GCM_encrypt_avx2_calc_iv_lt16
and edx, 4294967280
L_AES_GCM_encrypt_avx2_calc_iv_16_loop:
vmovdqu xmm0, OWORD PTR [rax+rcx]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm4, 16
vpclmulqdq xmm1, xmm5, xmm4, 1
vpclmulqdq xmm0, xmm5, xmm4, 0
vpclmulqdq xmm3, xmm5, xmm4, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm7, xmm0, xmm1
vpxor xmm4, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm7, xmm2, 16
vpshufd xmm1, xmm7, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm4, xmm1
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_encrypt_avx2_calc_iv_16_loop
mov edx, ebx
cmp ecx, edx
je L_AES_GCM_encrypt_avx2_calc_iv_done
L_AES_GCM_encrypt_avx2_calc_iv_lt16:
vpxor xmm0, xmm0, xmm0
xor ebx, ebx
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_encrypt_avx2_calc_iv_loop:
movzx r13d, BYTE PTR [rax+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_encrypt_avx2_calc_iv_loop
vmovdqu xmm0, OWORD PTR [rsp]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm4, 16
vpclmulqdq xmm1, xmm5, xmm4, 1
vpclmulqdq xmm0, xmm5, xmm4, 0
vpclmulqdq xmm3, xmm5, xmm4, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm7, xmm0, xmm1
vpxor xmm4, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm7, xmm2, 16
vpshufd xmm1, xmm7, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm4, xmm1
L_AES_GCM_encrypt_avx2_calc_iv_done:
; T = Encrypt counter
vpxor xmm0, xmm0, xmm0
shl edx, 3
vmovq xmm0, rdx
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm4, 16
vpclmulqdq xmm1, xmm5, xmm4, 1
vpclmulqdq xmm0, xmm5, xmm4, 0
vpclmulqdq xmm3, xmm5, xmm4, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm7, xmm0, xmm1
vpxor xmm4, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm7, xmm2, 16
vpshufd xmm1, xmm7, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm4, xmm1
vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
; Encrypt counter
vmovdqu xmm15, OWORD PTR [rsi]
vpxor xmm15, xmm15, xmm4
vaesenc xmm15, xmm15, [rsi+16]
vaesenc xmm15, xmm15, [rsi+32]
vaesenc xmm15, xmm15, [rsi+48]
vaesenc xmm15, xmm15, [rsi+64]
vaesenc xmm15, xmm15, [rsi+80]
vaesenc xmm15, xmm15, [rsi+96]
vaesenc xmm15, xmm15, [rsi+112]
vaesenc xmm15, xmm15, [rsi+128]
vaesenc xmm15, xmm15, [rsi+144]
cmp r9d, 11
vmovdqu xmm0, OWORD PTR [rsi+160]
jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last
vaesenc xmm15, xmm15, xmm0
vaesenc xmm15, xmm15, [rsi+176]
cmp r9d, 13
vmovdqu xmm0, OWORD PTR [rsi+192]
jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last
vaesenc xmm15, xmm15, xmm0
vaesenc xmm15, xmm15, [rsi+208]
vmovdqu xmm0, OWORD PTR [rsi+224]
L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last:
vaesenclast xmm15, xmm15, xmm0
jmp L_AES_GCM_encrypt_avx2_iv_done
L_AES_GCM_encrypt_avx2_iv_12:
; # Calculate values when IV is 12 bytes
; Set counter based on IV
vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_one
vmovdqu xmm5, OWORD PTR [rsi]
vpblendd xmm4, xmm4, [rax], 7
; H = Encrypt X(=0) and T = Encrypt counter
vmovdqu xmm7, OWORD PTR [rsi+16]
vpxor xmm15, xmm4, xmm5
vaesenc xmm5, xmm5, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rsi+32]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+48]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+64]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+80]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+96]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+112]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+128]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+144]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
cmp r9d, 11
vmovdqu xmm0, OWORD PTR [rsi+160]
jl L_AES_GCM_encrypt_avx2_calc_iv_12_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+176]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
cmp r9d, 13
vmovdqu xmm0, OWORD PTR [rsi+192]
jl L_AES_GCM_encrypt_avx2_calc_iv_12_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+208]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+224]
L_AES_GCM_encrypt_avx2_calc_iv_12_last:
vaesenclast xmm5, xmm5, xmm0
vaesenclast xmm15, xmm15, xmm0
vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
L_AES_GCM_encrypt_avx2_iv_done:
; Additional authentication data
mov edx, r11d
cmp edx, 0
je L_AES_GCM_encrypt_avx2_calc_aad_done
xor ecx, ecx
cmp edx, 16
jl L_AES_GCM_encrypt_avx2_calc_aad_lt16
and edx, 4294967280
L_AES_GCM_encrypt_avx2_calc_aad_16_loop:
vmovdqu xmm0, OWORD PTR [r12+rcx]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm6, 16
vpclmulqdq xmm1, xmm5, xmm6, 1
vpclmulqdq xmm0, xmm5, xmm6, 0
vpclmulqdq xmm3, xmm5, xmm6, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm7, xmm0, xmm1
vpxor xmm6, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm6, 31
vpslld xmm7, xmm7, 1
vpslld xmm6, xmm6, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm6, xmm6, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm6, xmm6, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm7, xmm2, 16
vpshufd xmm1, xmm7, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm6, xmm6, xmm1
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_encrypt_avx2_calc_aad_16_loop
mov edx, r11d
cmp ecx, edx
je L_AES_GCM_encrypt_avx2_calc_aad_done
L_AES_GCM_encrypt_avx2_calc_aad_lt16:
vpxor xmm0, xmm0, xmm0
xor ebx, ebx
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_encrypt_avx2_calc_aad_loop:
movzx r13d, BYTE PTR [r12+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_encrypt_avx2_calc_aad_loop
vmovdqu xmm0, OWORD PTR [rsp]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm6, 16
vpclmulqdq xmm1, xmm5, xmm6, 1
vpclmulqdq xmm0, xmm5, xmm6, 0
vpclmulqdq xmm3, xmm5, xmm6, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm7, xmm0, xmm1
vpxor xmm6, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm6, 31
vpslld xmm7, xmm7, 1
vpslld xmm6, xmm6, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm6, xmm6, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm6, xmm6, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm7, xmm2, 16
vpshufd xmm1, xmm7, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm6, xmm6, xmm1
L_AES_GCM_encrypt_avx2_calc_aad_done:
; Calculate counter and H
vpsrlq xmm1, xmm5, 63
vpsllq xmm0, xmm5, 1
vpslldq xmm1, xmm1, 8
vpor xmm0, xmm0, xmm1
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
vpxor xmm5, xmm5, xmm0
xor ebx, ebx
cmp r10d, 128
mov r13d, r10d
jl L_AES_GCM_encrypt_avx2_done_128
and r13d, 4294967168
vmovdqu OWORD PTR [rsp+128], xmm4
vmovdqu OWORD PTR [rsp+144], xmm15
vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
; H ^ 1 and H ^ 2
vpclmulqdq xmm9, xmm5, xmm5, 0
vpclmulqdq xmm10, xmm5, xmm5, 17
vpclmulqdq xmm8, xmm9, xmm3, 16
vpshufd xmm9, xmm9, 78
vpxor xmm9, xmm9, xmm8
vpclmulqdq xmm8, xmm9, xmm3, 16
vpshufd xmm9, xmm9, 78
vpxor xmm9, xmm9, xmm8
vpxor xmm0, xmm10, xmm9
vmovdqu OWORD PTR [rsp], xmm5
vmovdqu OWORD PTR [rsp+16], xmm0
; H ^ 3 and H ^ 4
vpclmulqdq xmm11, xmm0, xmm5, 16
vpclmulqdq xmm10, xmm0, xmm5, 1
vpclmulqdq xmm9, xmm0, xmm5, 0
vpclmulqdq xmm12, xmm0, xmm5, 17
vpclmulqdq xmm13, xmm0, xmm0, 0
vpclmulqdq xmm14, xmm0, xmm0, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm2, xmm13, xmm14
vpxor xmm1, xmm10, xmm9
vmovdqu OWORD PTR [rsp+32], xmm1
vmovdqu OWORD PTR [rsp+48], xmm2
; H ^ 5 and H ^ 6
vpclmulqdq xmm11, xmm1, xmm0, 16
vpclmulqdq xmm10, xmm1, xmm0, 1
vpclmulqdq xmm9, xmm1, xmm0, 0
vpclmulqdq xmm12, xmm1, xmm0, 17
vpclmulqdq xmm13, xmm1, xmm1, 0
vpclmulqdq xmm14, xmm1, xmm1, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm0, xmm13, xmm14
vpxor xmm7, xmm10, xmm9
vmovdqu OWORD PTR [rsp+64], xmm7
vmovdqu OWORD PTR [rsp+80], xmm0
; H ^ 7 and H ^ 8
vpclmulqdq xmm11, xmm2, xmm1, 16
vpclmulqdq xmm10, xmm2, xmm1, 1
vpclmulqdq xmm9, xmm2, xmm1, 0
vpclmulqdq xmm12, xmm2, xmm1, 17
vpclmulqdq xmm13, xmm2, xmm2, 0
vpclmulqdq xmm14, xmm2, xmm2, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm0, xmm13, xmm14
vpxor xmm7, xmm10, xmm9
vmovdqu OWORD PTR [rsp+96], xmm7
vmovdqu OWORD PTR [rsp+112], xmm0
; First 128 bytes of input
; aesenc_128
; aesenc_ctr
vmovdqu xmm0, OWORD PTR [rsp+128]
vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
vpshufb xmm8, xmm0, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
vpshufb xmm9, xmm9, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
vpshufb xmm10, xmm10, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
vpshufb xmm11, xmm11, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
vpshufb xmm12, xmm12, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
vpshufb xmm13, xmm13, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
vpshufb xmm14, xmm14, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
vpshufb xmm15, xmm15, xmm1
; aesenc_xor
vmovdqu xmm7, OWORD PTR [rsi]
vmovdqu OWORD PTR [rsp+128], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+16]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+32]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+48]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+64]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+80]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+96]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+112]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+128]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+144]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r9d, 11
vmovdqu xmm7, OWORD PTR [rsi+160]
jl L_AES_GCM_encrypt_avx2_aesenc_128_enc_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r9d, 13
vmovdqu xmm7, OWORD PTR [rsi+192]
jl L_AES_GCM_encrypt_avx2_aesenc_128_enc_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+224]
L_AES_GCM_encrypt_avx2_aesenc_128_enc_done:
; aesenc_last
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [rdi]
vmovdqu xmm1, OWORD PTR [rdi+16]
vmovdqu xmm2, OWORD PTR [rdi+32]
vmovdqu xmm3, OWORD PTR [rdi+48]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vpxor xmm10, xmm10, xmm2
vpxor xmm11, xmm11, xmm3
vmovdqu OWORD PTR [r8], xmm8
vmovdqu OWORD PTR [r8+16], xmm9
vmovdqu OWORD PTR [r8+32], xmm10
vmovdqu OWORD PTR [r8+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rdi+64]
vmovdqu xmm1, OWORD PTR [rdi+80]
vmovdqu xmm2, OWORD PTR [rdi+96]
vmovdqu xmm3, OWORD PTR [rdi+112]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vpxor xmm14, xmm14, xmm2
vpxor xmm15, xmm15, xmm3
vmovdqu OWORD PTR [r8+64], xmm12
vmovdqu OWORD PTR [r8+80], xmm13
vmovdqu OWORD PTR [r8+96], xmm14
vmovdqu OWORD PTR [r8+112], xmm15
cmp r13d, 128
mov ebx, 128
jle L_AES_GCM_encrypt_avx2_end_128
; More 128 bytes of input
L_AES_GCM_encrypt_avx2_ghash_128:
; aesenc_128_ghash
lea rcx, QWORD PTR [rdi+rbx]
lea rdx, QWORD PTR [r8+rbx]
; aesenc_ctr
vmovdqu xmm0, OWORD PTR [rsp+128]
vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
vpshufb xmm8, xmm0, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
vpshufb xmm9, xmm9, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
vpshufb xmm10, xmm10, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
vpshufb xmm11, xmm11, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
vpshufb xmm12, xmm12, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
vpshufb xmm13, xmm13, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
vpshufb xmm14, xmm14, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
vpshufb xmm15, xmm15, xmm1
; aesenc_xor
vmovdqu xmm7, OWORD PTR [rsi]
vmovdqu OWORD PTR [rsp+128], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
; aesenc_pclmul_1
vmovdqu xmm1, OWORD PTR [rdx+-128]
vmovdqu xmm0, OWORD PTR [rsi+16]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vmovdqu xmm2, OWORD PTR [rsp+112]
vpxor xmm1, xmm1, xmm6
vpclmulqdq xmm5, xmm1, xmm2, 16
vpclmulqdq xmm3, xmm1, xmm2, 1
vpclmulqdq xmm6, xmm1, xmm2, 0
vpclmulqdq xmm7, xmm1, xmm2, 17
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_2
vmovdqu xmm1, OWORD PTR [rdx+-112]
vmovdqu xmm0, OWORD PTR [rsp+96]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm2, xmm1, xmm0, 16
vpclmulqdq xmm3, xmm1, xmm0, 1
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+32]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-96]
vmovdqu xmm0, OWORD PTR [rsp+80]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+48]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-80]
vmovdqu xmm0, OWORD PTR [rsp+64]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+64]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-64]
vmovdqu xmm0, OWORD PTR [rsp+48]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+80]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-48]
vmovdqu xmm0, OWORD PTR [rsp+32]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+96]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-32]
vmovdqu xmm0, OWORD PTR [rsp+16]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+112]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-16]
vmovdqu xmm0, OWORD PTR [rsp]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+128]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_l
vpxor xmm5, xmm5, xmm2
vpxor xmm6, xmm6, xmm4
vpxor xmm5, xmm5, xmm3
vpslldq xmm1, xmm5, 8
vpsrldq xmm5, xmm5, 8
vmovdqu xmm4, OWORD PTR [rsi+144]
vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
vaesenc xmm8, xmm8, xmm4
vpxor xmm6, xmm6, xmm1
vpxor xmm7, xmm7, xmm5
vpclmulqdq xmm3, xmm6, xmm0, 16
vaesenc xmm9, xmm9, xmm4
vaesenc xmm10, xmm10, xmm4
vaesenc xmm11, xmm11, xmm4
vpshufd xmm6, xmm6, 78
vpxor xmm6, xmm6, xmm3
vpclmulqdq xmm3, xmm6, xmm0, 16
vaesenc xmm12, xmm12, xmm4
vaesenc xmm13, xmm13, xmm4
vaesenc xmm14, xmm14, xmm4
vpshufd xmm6, xmm6, 78
vpxor xmm6, xmm6, xmm3
vpxor xmm6, xmm6, xmm7
vaesenc xmm15, xmm15, xmm4
cmp r9d, 11
vmovdqu xmm7, OWORD PTR [rsi+160]
jl L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r9d, 13
vmovdqu xmm7, OWORD PTR [rsi+192]
jl L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+224]
L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done:
; aesenc_last
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu xmm1, OWORD PTR [rcx+16]
vmovdqu xmm2, OWORD PTR [rcx+32]
vmovdqu xmm3, OWORD PTR [rcx+48]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vpxor xmm10, xmm10, xmm2
vpxor xmm11, xmm11, xmm3
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu OWORD PTR [rdx+16], xmm9
vmovdqu OWORD PTR [rdx+32], xmm10
vmovdqu OWORD PTR [rdx+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rcx+64]
vmovdqu xmm1, OWORD PTR [rcx+80]
vmovdqu xmm2, OWORD PTR [rcx+96]
vmovdqu xmm3, OWORD PTR [rcx+112]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vpxor xmm14, xmm14, xmm2
vpxor xmm15, xmm15, xmm3
vmovdqu OWORD PTR [rdx+64], xmm12
vmovdqu OWORD PTR [rdx+80], xmm13
vmovdqu OWORD PTR [rdx+96], xmm14
vmovdqu OWORD PTR [rdx+112], xmm15
; aesenc_128_ghash - end
add ebx, 128
cmp ebx, r13d
jl L_AES_GCM_encrypt_avx2_ghash_128
L_AES_GCM_encrypt_avx2_end_128:
vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpshufb xmm8, xmm8, xmm4
vpshufb xmm9, xmm9, xmm4
vpshufb xmm10, xmm10, xmm4
vpshufb xmm11, xmm11, xmm4
vpshufb xmm12, xmm12, xmm4
vpshufb xmm13, xmm13, xmm4
vpshufb xmm14, xmm14, xmm4
vpshufb xmm15, xmm15, xmm4
vpxor xmm8, xmm8, xmm6
vmovdqu xmm7, OWORD PTR [rsp]
vpclmulqdq xmm5, xmm7, xmm15, 16
vpclmulqdq xmm1, xmm7, xmm15, 1
vpclmulqdq xmm4, xmm7, xmm15, 0
vpclmulqdq xmm6, xmm7, xmm15, 17
vpxor xmm5, xmm5, xmm1
vmovdqu xmm7, OWORD PTR [rsp+16]
vpclmulqdq xmm2, xmm7, xmm14, 16
vpclmulqdq xmm1, xmm7, xmm14, 1
vpclmulqdq xmm0, xmm7, xmm14, 0
vpclmulqdq xmm3, xmm7, xmm14, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vmovdqu xmm15, OWORD PTR [rsp+32]
vmovdqu xmm7, OWORD PTR [rsp+48]
vpclmulqdq xmm2, xmm15, xmm13, 16
vpclmulqdq xmm1, xmm15, xmm13, 1
vpclmulqdq xmm0, xmm15, xmm13, 0
vpclmulqdq xmm3, xmm15, xmm13, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vpclmulqdq xmm2, xmm7, xmm12, 16
vpclmulqdq xmm1, xmm7, xmm12, 1
vpclmulqdq xmm0, xmm7, xmm12, 0
vpclmulqdq xmm3, xmm7, xmm12, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vmovdqu xmm15, OWORD PTR [rsp+64]
vmovdqu xmm7, OWORD PTR [rsp+80]
vpclmulqdq xmm2, xmm15, xmm11, 16
vpclmulqdq xmm1, xmm15, xmm11, 1
vpclmulqdq xmm0, xmm15, xmm11, 0
vpclmulqdq xmm3, xmm15, xmm11, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vpclmulqdq xmm2, xmm7, xmm10, 16
vpclmulqdq xmm1, xmm7, xmm10, 1
vpclmulqdq xmm0, xmm7, xmm10, 0
vpclmulqdq xmm3, xmm7, xmm10, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vmovdqu xmm15, OWORD PTR [rsp+96]
vmovdqu xmm7, OWORD PTR [rsp+112]
vpclmulqdq xmm2, xmm15, xmm9, 16
vpclmulqdq xmm1, xmm15, xmm9, 1
vpclmulqdq xmm0, xmm15, xmm9, 0
vpclmulqdq xmm3, xmm15, xmm9, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vpclmulqdq xmm2, xmm7, xmm8, 16
vpclmulqdq xmm1, xmm7, xmm8, 1
vpclmulqdq xmm0, xmm7, xmm8, 0
vpclmulqdq xmm3, xmm7, xmm8, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vpslldq xmm7, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpxor xmm4, xmm4, xmm7
vpxor xmm6, xmm6, xmm5
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm4, xmm2, 16
vpshufd xmm1, xmm4, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm6, xmm6, xmm1
vmovdqu xmm5, OWORD PTR [rsp]
vmovdqu xmm4, OWORD PTR [rsp+128]
vmovdqu xmm15, OWORD PTR [rsp+144]
L_AES_GCM_encrypt_avx2_done_128:
cmp ebx, r10d
je L_AES_GCM_encrypt_avx2_done_enc
mov r13d, r10d
and r13d, 4294967280
cmp ebx, r13d
jge L_AES_GCM_encrypt_avx2_last_block_done
; aesenc_block
vmovdqu xmm1, xmm4
vpshufb xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_one
vpxor xmm0, xmm0, [rsi]
vmovdqu xmm2, OWORD PTR [rsi+16]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rsi+32]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rsi+48]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rsi+64]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rsi+80]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rsi+96]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rsi+112]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rsi+128]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rsi+144]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm4, xmm1
cmp r9d, 11
vmovdqu xmm1, OWORD PTR [rsi+160]
jl L_AES_GCM_encrypt_avx2_aesenc_block_last
vaesenc xmm0, xmm0, xmm1
vmovdqu xmm2, OWORD PTR [rsi+176]
vaesenc xmm0, xmm0, xmm2
cmp r9d, 13
vmovdqu xmm1, OWORD PTR [rsi+192]
jl L_AES_GCM_encrypt_avx2_aesenc_block_last
vaesenc xmm0, xmm0, xmm1
vmovdqu xmm2, OWORD PTR [rsi+208]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm1, OWORD PTR [rsi+224]
L_AES_GCM_encrypt_avx2_aesenc_block_last:
vaesenclast xmm0, xmm0, xmm1
vmovdqu xmm1, OWORD PTR [rdi+rbx]
vpxor xmm0, xmm0, xmm1
vmovdqu OWORD PTR [r8+rbx], xmm0
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm0
add ebx, 16
cmp ebx, r13d
jge L_AES_GCM_encrypt_avx2_last_block_ghash
L_AES_GCM_encrypt_avx2_last_block_start:
vmovdqu xmm12, OWORD PTR [rdi+rbx]
vpshufb xmm11, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
; aesenc_gfmul_sb
vpclmulqdq xmm2, xmm6, xmm5, 1
vpclmulqdq xmm3, xmm6, xmm5, 16
vpclmulqdq xmm1, xmm6, xmm5, 0
vpclmulqdq xmm8, xmm6, xmm5, 17
vpxor xmm11, xmm11, [rsi]
vaesenc xmm11, xmm11, [rsi+16]
vpxor xmm3, xmm3, xmm2
vpslldq xmm2, xmm3, 8
vpsrldq xmm3, xmm3, 8
vaesenc xmm11, xmm11, [rsi+32]
vpxor xmm2, xmm2, xmm1
vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vaesenc xmm11, xmm11, [rsi+48]
vaesenc xmm11, xmm11, [rsi+64]
vaesenc xmm11, xmm11, [rsi+80]
vpshufd xmm2, xmm2, 78
vpxor xmm2, xmm2, xmm1
vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vaesenc xmm11, xmm11, [rsi+96]
vaesenc xmm11, xmm11, [rsi+112]
vaesenc xmm11, xmm11, [rsi+128]
vpshufd xmm2, xmm2, 78
vaesenc xmm11, xmm11, [rsi+144]
vpxor xmm8, xmm8, xmm3
vpxor xmm2, xmm2, xmm8
vmovdqu xmm0, OWORD PTR [rsi+160]
cmp r9d, 11
jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last
vaesenc xmm11, xmm11, xmm0
vaesenc xmm11, xmm11, [rsi+176]
vmovdqu xmm0, OWORD PTR [rsi+192]
cmp r9d, 13
jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last
vaesenc xmm11, xmm11, xmm0
vaesenc xmm11, xmm11, [rsi+208]
vmovdqu xmm0, OWORD PTR [rsi+224]
L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last:
vaesenclast xmm11, xmm11, xmm0
vpxor xmm6, xmm2, xmm1
vpxor xmm11, xmm11, xmm12
vmovdqu OWORD PTR [r8+rbx], xmm11
vpshufb xmm11, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm11
add ebx, 16
cmp ebx, r13d
jl L_AES_GCM_encrypt_avx2_last_block_start
L_AES_GCM_encrypt_avx2_last_block_ghash:
; ghash_gfmul_red
vpclmulqdq xmm10, xmm6, xmm5, 16
vpclmulqdq xmm9, xmm6, xmm5, 1
vpclmulqdq xmm8, xmm6, xmm5, 0
vpxor xmm10, xmm10, xmm9
vpslldq xmm9, xmm10, 8
vpsrldq xmm10, xmm10, 8
vpxor xmm9, xmm9, xmm8
vpclmulqdq xmm6, xmm6, xmm5, 17
vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm9, xmm9, 78
vpxor xmm9, xmm9, xmm8
vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm9, xmm9, 78
vpxor xmm6, xmm6, xmm10
vpxor xmm6, xmm6, xmm9
vpxor xmm6, xmm6, xmm8
L_AES_GCM_encrypt_avx2_last_block_done:
mov ecx, r10d
mov edx, r10d
and ecx, 15
jz L_AES_GCM_encrypt_avx2_done_enc
; aesenc_last15_enc
vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpxor xmm4, xmm4, [rsi]
vaesenc xmm4, xmm4, [rsi+16]
vaesenc xmm4, xmm4, [rsi+32]
vaesenc xmm4, xmm4, [rsi+48]
vaesenc xmm4, xmm4, [rsi+64]
vaesenc xmm4, xmm4, [rsi+80]
vaesenc xmm4, xmm4, [rsi+96]
vaesenc xmm4, xmm4, [rsi+112]
vaesenc xmm4, xmm4, [rsi+128]
vaesenc xmm4, xmm4, [rsi+144]
cmp r9d, 11
vmovdqu xmm0, OWORD PTR [rsi+160]
jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last
vaesenc xmm4, xmm4, xmm0
vaesenc xmm4, xmm4, [rsi+176]
cmp r9d, 13
vmovdqu xmm0, OWORD PTR [rsi+192]
jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last
vaesenc xmm4, xmm4, xmm0
vaesenc xmm4, xmm4, [rsi+208]
vmovdqu xmm0, OWORD PTR [rsi+224]
L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last:
vaesenclast xmm4, xmm4, xmm0
xor ecx, ecx
vpxor xmm0, xmm0, xmm0
vmovdqu OWORD PTR [rsp], xmm4
vmovdqu OWORD PTR [rsp+16], xmm0
L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop:
movzx r13d, BYTE PTR [rdi+rbx]
xor r13b, BYTE PTR [rsp+rcx]
mov BYTE PTR [rsp+rcx+16], r13b
mov BYTE PTR [r8+rbx], r13b
inc ebx
inc ecx
cmp ebx, edx
jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop
L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_finish_enc:
vmovdqu xmm4, OWORD PTR [rsp+16]
vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm4
; ghash_gfmul_red
vpclmulqdq xmm2, xmm6, xmm5, 16
vpclmulqdq xmm1, xmm6, xmm5, 1
vpclmulqdq xmm0, xmm6, xmm5, 0
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm6, xmm6, xmm5, 17
vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm1, xmm1, 78
vpxor xmm6, xmm6, xmm2
vpxor xmm6, xmm6, xmm1
vpxor xmm6, xmm6, xmm0
L_AES_GCM_encrypt_avx2_done_enc:
; calc_tag
shl r10, 3
shl r11, 3
vmovq xmm0, r10
vmovq xmm1, r11
vpunpcklqdq xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm6
; ghash_gfmul_red
vpclmulqdq xmm4, xmm0, xmm5, 16
vpclmulqdq xmm3, xmm0, xmm5, 1
vpclmulqdq xmm2, xmm0, xmm5, 0
vpxor xmm4, xmm4, xmm3
vpslldq xmm3, xmm4, 8
vpsrldq xmm4, xmm4, 8
vpxor xmm3, xmm3, xmm2
vpclmulqdq xmm0, xmm0, xmm5, 17
vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm3, xmm3, 78
vpxor xmm3, xmm3, xmm2
vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm3, xmm3, 78
vpxor xmm0, xmm0, xmm4
vpxor xmm0, xmm0, xmm3
vpxor xmm0, xmm0, xmm2
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm0, xmm0, xmm15
; store_tag
cmp r14d, 16
je L_AES_GCM_encrypt_avx2_store_tag_16
xor rcx, rcx
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_encrypt_avx2_store_tag_loop:
movzx r13d, BYTE PTR [rsp+rcx]
mov BYTE PTR [r15+rcx], r13b
inc ecx
cmp ecx, r14d
jne L_AES_GCM_encrypt_avx2_store_tag_loop
jmp L_AES_GCM_encrypt_avx2_store_tag_done
L_AES_GCM_encrypt_avx2_store_tag_16:
vmovdqu OWORD PTR [r15], xmm0
L_AES_GCM_encrypt_avx2_store_tag_done:
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+160]
vmovdqu xmm7, OWORD PTR [rsp+176]
vmovdqu xmm8, OWORD PTR [rsp+192]
vmovdqu xmm9, OWORD PTR [rsp+208]
vmovdqu xmm10, OWORD PTR [rsp+224]
vmovdqu xmm11, OWORD PTR [rsp+240]
vmovdqu xmm12, OWORD PTR [rsp+256]
vmovdqu xmm13, OWORD PTR [rsp+272]
vmovdqu xmm14, OWORD PTR [rsp+288]
vmovdqu xmm15, OWORD PTR [rsp+304]
add rsp, 320
pop rsi
pop r14
pop rbx
pop r15
pop r12
pop rdi
pop r13
ret
AES_GCM_encrypt_avx2 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_decrypt_avx2 PROC
push r13
push rdi
push r12
push r14
push rbx
push r15
push rsi
push rbp
mov rdi, rcx
mov r12, r8
mov rax, r9
mov r14, QWORD PTR [rsp+104]
mov r8, rdx
mov r10d, DWORD PTR [rsp+112]
mov r11d, DWORD PTR [rsp+120]
mov ebx, DWORD PTR [rsp+128]
mov r15d, DWORD PTR [rsp+136]
mov rsi, QWORD PTR [rsp+144]
mov r9d, DWORD PTR [rsp+152]
mov rbp, QWORD PTR [rsp+160]
sub rsp, 328
vmovdqu OWORD PTR [rsp+168], xmm6
vmovdqu OWORD PTR [rsp+184], xmm7
vmovdqu OWORD PTR [rsp+200], xmm8
vmovdqu OWORD PTR [rsp+216], xmm9
vmovdqu OWORD PTR [rsp+232], xmm10
vmovdqu OWORD PTR [rsp+248], xmm11
vmovdqu OWORD PTR [rsp+264], xmm12
vmovdqu OWORD PTR [rsp+280], xmm13
vmovdqu OWORD PTR [rsp+296], xmm14
vmovdqu OWORD PTR [rsp+312], xmm15
vpxor xmm4, xmm4, xmm4
vpxor xmm6, xmm6, xmm6
mov edx, ebx
cmp edx, 12
je L_AES_GCM_decrypt_avx2_iv_12
; Calculate values when IV is not 12 bytes
; H = Encrypt X(=0)
vmovdqu xmm5, OWORD PTR [rsi]
vaesenc xmm5, xmm5, [rsi+16]
vaesenc xmm5, xmm5, [rsi+32]
vaesenc xmm5, xmm5, [rsi+48]
vaesenc xmm5, xmm5, [rsi+64]
vaesenc xmm5, xmm5, [rsi+80]
vaesenc xmm5, xmm5, [rsi+96]
vaesenc xmm5, xmm5, [rsi+112]
vaesenc xmm5, xmm5, [rsi+128]
vaesenc xmm5, xmm5, [rsi+144]
cmp r9d, 11
vmovdqu xmm0, OWORD PTR [rsi+160]
jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm5, xmm5, [rsi+176]
cmp r9d, 13
vmovdqu xmm0, OWORD PTR [rsi+192]
jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm5, xmm5, [rsi+208]
vmovdqu xmm0, OWORD PTR [rsi+224]
L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last:
vaesenclast xmm5, xmm5, xmm0
vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
; Calc counter
; Initialization vector
cmp edx, 0
mov rcx, 0
je L_AES_GCM_decrypt_avx2_calc_iv_done
cmp edx, 16
jl L_AES_GCM_decrypt_avx2_calc_iv_lt16
and edx, 4294967280
L_AES_GCM_decrypt_avx2_calc_iv_16_loop:
vmovdqu xmm0, OWORD PTR [rax+rcx]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm4, 16
vpclmulqdq xmm1, xmm5, xmm4, 1
vpclmulqdq xmm0, xmm5, xmm4, 0
vpclmulqdq xmm3, xmm5, xmm4, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm7, xmm0, xmm1
vpxor xmm4, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm7, xmm2, 16
vpshufd xmm1, xmm7, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm4, xmm1
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_decrypt_avx2_calc_iv_16_loop
mov edx, ebx
cmp ecx, edx
je L_AES_GCM_decrypt_avx2_calc_iv_done
L_AES_GCM_decrypt_avx2_calc_iv_lt16:
vpxor xmm0, xmm0, xmm0
xor ebx, ebx
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_decrypt_avx2_calc_iv_loop:
movzx r13d, BYTE PTR [rax+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_decrypt_avx2_calc_iv_loop
vmovdqu xmm0, OWORD PTR [rsp]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm4, 16
vpclmulqdq xmm1, xmm5, xmm4, 1
vpclmulqdq xmm0, xmm5, xmm4, 0
vpclmulqdq xmm3, xmm5, xmm4, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm7, xmm0, xmm1
vpxor xmm4, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm7, xmm2, 16
vpshufd xmm1, xmm7, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm4, xmm1
L_AES_GCM_decrypt_avx2_calc_iv_done:
; T = Encrypt counter
vpxor xmm0, xmm0, xmm0
shl edx, 3
vmovq xmm0, rdx
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm4, 16
vpclmulqdq xmm1, xmm5, xmm4, 1
vpclmulqdq xmm0, xmm5, xmm4, 0
vpclmulqdq xmm3, xmm5, xmm4, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm7, xmm0, xmm1
vpxor xmm4, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm4, 31
vpslld xmm7, xmm7, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm4, xmm4, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm7, xmm2, 16
vpshufd xmm1, xmm7, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm4, xmm1
vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
; Encrypt counter
vmovdqu xmm15, OWORD PTR [rsi]
vpxor xmm15, xmm15, xmm4
vaesenc xmm15, xmm15, [rsi+16]
vaesenc xmm15, xmm15, [rsi+32]
vaesenc xmm15, xmm15, [rsi+48]
vaesenc xmm15, xmm15, [rsi+64]
vaesenc xmm15, xmm15, [rsi+80]
vaesenc xmm15, xmm15, [rsi+96]
vaesenc xmm15, xmm15, [rsi+112]
vaesenc xmm15, xmm15, [rsi+128]
vaesenc xmm15, xmm15, [rsi+144]
cmp r9d, 11
vmovdqu xmm0, OWORD PTR [rsi+160]
jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last
vaesenc xmm15, xmm15, xmm0
vaesenc xmm15, xmm15, [rsi+176]
cmp r9d, 13
vmovdqu xmm0, OWORD PTR [rsi+192]
jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last
vaesenc xmm15, xmm15, xmm0
vaesenc xmm15, xmm15, [rsi+208]
vmovdqu xmm0, OWORD PTR [rsi+224]
L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last:
vaesenclast xmm15, xmm15, xmm0
jmp L_AES_GCM_decrypt_avx2_iv_done
L_AES_GCM_decrypt_avx2_iv_12:
; # Calculate values when IV is 12 bytes
; Set counter based on IV
vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_one
vmovdqu xmm5, OWORD PTR [rsi]
vpblendd xmm4, xmm4, [rax], 7
; H = Encrypt X(=0) and T = Encrypt counter
vmovdqu xmm7, OWORD PTR [rsi+16]
vpxor xmm15, xmm4, xmm5
vaesenc xmm5, xmm5, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rsi+32]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+48]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+64]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+80]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+96]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+112]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+128]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+144]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
cmp r9d, 11
vmovdqu xmm0, OWORD PTR [rsi+160]
jl L_AES_GCM_decrypt_avx2_calc_iv_12_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+176]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
cmp r9d, 13
vmovdqu xmm0, OWORD PTR [rsi+192]
jl L_AES_GCM_decrypt_avx2_calc_iv_12_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+208]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm15, xmm15, xmm0
vmovdqu xmm0, OWORD PTR [rsi+224]
L_AES_GCM_decrypt_avx2_calc_iv_12_last:
vaesenclast xmm5, xmm5, xmm0
vaesenclast xmm15, xmm15, xmm0
vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
L_AES_GCM_decrypt_avx2_iv_done:
; Additional authentication data
mov edx, r11d
cmp edx, 0
je L_AES_GCM_decrypt_avx2_calc_aad_done
xor ecx, ecx
cmp edx, 16
jl L_AES_GCM_decrypt_avx2_calc_aad_lt16
and edx, 4294967280
L_AES_GCM_decrypt_avx2_calc_aad_16_loop:
vmovdqu xmm0, OWORD PTR [r12+rcx]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm6, 16
vpclmulqdq xmm1, xmm5, xmm6, 1
vpclmulqdq xmm0, xmm5, xmm6, 0
vpclmulqdq xmm3, xmm5, xmm6, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm7, xmm0, xmm1
vpxor xmm6, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm6, 31
vpslld xmm7, xmm7, 1
vpslld xmm6, xmm6, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm6, xmm6, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm6, xmm6, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm7, xmm2, 16
vpshufd xmm1, xmm7, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm6, xmm6, xmm1
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_decrypt_avx2_calc_aad_16_loop
mov edx, r11d
cmp ecx, edx
je L_AES_GCM_decrypt_avx2_calc_aad_done
L_AES_GCM_decrypt_avx2_calc_aad_lt16:
vpxor xmm0, xmm0, xmm0
xor ebx, ebx
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_decrypt_avx2_calc_aad_loop:
movzx r13d, BYTE PTR [r12+rcx]
mov BYTE PTR [rsp+rbx], r13b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_decrypt_avx2_calc_aad_loop
vmovdqu xmm0, OWORD PTR [rsp]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm6, 16
vpclmulqdq xmm1, xmm5, xmm6, 1
vpclmulqdq xmm0, xmm5, xmm6, 0
vpclmulqdq xmm3, xmm5, xmm6, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm7, xmm0, xmm1
vpxor xmm6, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm7, 31
vpsrld xmm1, xmm6, 31
vpslld xmm7, xmm7, 1
vpslld xmm6, xmm6, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm6, xmm6, xmm2
vpor xmm7, xmm7, xmm0
vpor xmm6, xmm6, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm7, xmm2, 16
vpshufd xmm1, xmm7, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm6, xmm6, xmm1
L_AES_GCM_decrypt_avx2_calc_aad_done:
; Calculate counter and H
vpsrlq xmm1, xmm5, 63
vpsllq xmm0, xmm5, 1
vpslldq xmm1, xmm1, 8
vpor xmm0, xmm0, xmm1
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
vpxor xmm5, xmm5, xmm0
xor ebx, ebx
cmp r10d, 128
mov r13d, r10d
jl L_AES_GCM_decrypt_avx2_done_128
and r13d, 4294967168
vmovdqu OWORD PTR [rsp+128], xmm4
vmovdqu OWORD PTR [rsp+144], xmm15
vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
; H ^ 1 and H ^ 2
vpclmulqdq xmm9, xmm5, xmm5, 0
vpclmulqdq xmm10, xmm5, xmm5, 17
vpclmulqdq xmm8, xmm9, xmm3, 16
vpshufd xmm9, xmm9, 78
vpxor xmm9, xmm9, xmm8
vpclmulqdq xmm8, xmm9, xmm3, 16
vpshufd xmm9, xmm9, 78
vpxor xmm9, xmm9, xmm8
vpxor xmm0, xmm10, xmm9
vmovdqu OWORD PTR [rsp], xmm5
vmovdqu OWORD PTR [rsp+16], xmm0
; H ^ 3 and H ^ 4
vpclmulqdq xmm11, xmm0, xmm5, 16
vpclmulqdq xmm10, xmm0, xmm5, 1
vpclmulqdq xmm9, xmm0, xmm5, 0
vpclmulqdq xmm12, xmm0, xmm5, 17
vpclmulqdq xmm13, xmm0, xmm0, 0
vpclmulqdq xmm14, xmm0, xmm0, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm2, xmm13, xmm14
vpxor xmm1, xmm10, xmm9
vmovdqu OWORD PTR [rsp+32], xmm1
vmovdqu OWORD PTR [rsp+48], xmm2
; H ^ 5 and H ^ 6
vpclmulqdq xmm11, xmm1, xmm0, 16
vpclmulqdq xmm10, xmm1, xmm0, 1
vpclmulqdq xmm9, xmm1, xmm0, 0
vpclmulqdq xmm12, xmm1, xmm0, 17
vpclmulqdq xmm13, xmm1, xmm1, 0
vpclmulqdq xmm14, xmm1, xmm1, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm0, xmm13, xmm14
vpxor xmm7, xmm10, xmm9
vmovdqu OWORD PTR [rsp+64], xmm7
vmovdqu OWORD PTR [rsp+80], xmm0
; H ^ 7 and H ^ 8
vpclmulqdq xmm11, xmm2, xmm1, 16
vpclmulqdq xmm10, xmm2, xmm1, 1
vpclmulqdq xmm9, xmm2, xmm1, 0
vpclmulqdq xmm12, xmm2, xmm1, 17
vpclmulqdq xmm13, xmm2, xmm2, 0
vpclmulqdq xmm14, xmm2, xmm2, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm0, xmm13, xmm14
vpxor xmm7, xmm10, xmm9
vmovdqu OWORD PTR [rsp+96], xmm7
vmovdqu OWORD PTR [rsp+112], xmm0
L_AES_GCM_decrypt_avx2_ghash_128:
; aesenc_128_ghash
lea rcx, QWORD PTR [rdi+rbx]
lea rdx, QWORD PTR [r8+rbx]
; aesenc_ctr
vmovdqu xmm0, OWORD PTR [rsp+128]
vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
vpshufb xmm8, xmm0, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
vpshufb xmm9, xmm9, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
vpshufb xmm10, xmm10, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
vpshufb xmm11, xmm11, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
vpshufb xmm12, xmm12, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
vpshufb xmm13, xmm13, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
vpshufb xmm14, xmm14, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
vpshufb xmm15, xmm15, xmm1
; aesenc_xor
vmovdqu xmm7, OWORD PTR [rsi]
vmovdqu OWORD PTR [rsp+128], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
; aesenc_pclmul_1
vmovdqu xmm1, OWORD PTR [rcx]
vmovdqu xmm0, OWORD PTR [rsi+16]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vmovdqu xmm2, OWORD PTR [rsp+112]
vpxor xmm1, xmm1, xmm6
vpclmulqdq xmm5, xmm1, xmm2, 16
vpclmulqdq xmm3, xmm1, xmm2, 1
vpclmulqdq xmm6, xmm1, xmm2, 0
vpclmulqdq xmm7, xmm1, xmm2, 17
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_2
vmovdqu xmm1, OWORD PTR [rcx+16]
vmovdqu xmm0, OWORD PTR [rsp+96]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm2, xmm1, xmm0, 16
vpclmulqdq xmm3, xmm1, xmm0, 1
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+32]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+32]
vmovdqu xmm0, OWORD PTR [rsp+80]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+48]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+48]
vmovdqu xmm0, OWORD PTR [rsp+64]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+64]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+64]
vmovdqu xmm0, OWORD PTR [rsp+48]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+80]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+80]
vmovdqu xmm0, OWORD PTR [rsp+32]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+96]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+96]
vmovdqu xmm0, OWORD PTR [rsp+16]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+112]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+112]
vmovdqu xmm0, OWORD PTR [rsp]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rsi+128]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_l
vpxor xmm5, xmm5, xmm2
vpxor xmm6, xmm6, xmm4
vpxor xmm5, xmm5, xmm3
vpslldq xmm1, xmm5, 8
vpsrldq xmm5, xmm5, 8
vmovdqu xmm4, OWORD PTR [rsi+144]
vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
vaesenc xmm8, xmm8, xmm4
vpxor xmm6, xmm6, xmm1
vpxor xmm7, xmm7, xmm5
vpclmulqdq xmm3, xmm6, xmm0, 16
vaesenc xmm9, xmm9, xmm4
vaesenc xmm10, xmm10, xmm4
vaesenc xmm11, xmm11, xmm4
vpshufd xmm6, xmm6, 78
vpxor xmm6, xmm6, xmm3
vpclmulqdq xmm3, xmm6, xmm0, 16
vaesenc xmm12, xmm12, xmm4
vaesenc xmm13, xmm13, xmm4
vaesenc xmm14, xmm14, xmm4
vpshufd xmm6, xmm6, 78
vpxor xmm6, xmm6, xmm3
vpxor xmm6, xmm6, xmm7
vaesenc xmm15, xmm15, xmm4
cmp r9d, 11
vmovdqu xmm7, OWORD PTR [rsi+160]
jl L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r9d, 13
vmovdqu xmm7, OWORD PTR [rsi+192]
jl L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rsi+224]
L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done:
; aesenc_last
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu xmm1, OWORD PTR [rcx+16]
vmovdqu xmm2, OWORD PTR [rcx+32]
vmovdqu xmm3, OWORD PTR [rcx+48]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vpxor xmm10, xmm10, xmm2
vpxor xmm11, xmm11, xmm3
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu OWORD PTR [rdx+16], xmm9
vmovdqu OWORD PTR [rdx+32], xmm10
vmovdqu OWORD PTR [rdx+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rcx+64]
vmovdqu xmm1, OWORD PTR [rcx+80]
vmovdqu xmm2, OWORD PTR [rcx+96]
vmovdqu xmm3, OWORD PTR [rcx+112]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vpxor xmm14, xmm14, xmm2
vpxor xmm15, xmm15, xmm3
vmovdqu OWORD PTR [rdx+64], xmm12
vmovdqu OWORD PTR [rdx+80], xmm13
vmovdqu OWORD PTR [rdx+96], xmm14
vmovdqu OWORD PTR [rdx+112], xmm15
; aesenc_128_ghash - end
add ebx, 128
cmp ebx, r13d
jl L_AES_GCM_decrypt_avx2_ghash_128
vmovdqu xmm5, OWORD PTR [rsp]
vmovdqu xmm4, OWORD PTR [rsp+128]
vmovdqu xmm15, OWORD PTR [rsp+144]
L_AES_GCM_decrypt_avx2_done_128:
cmp ebx, r10d
jge L_AES_GCM_decrypt_avx2_done_dec
mov r13d, r10d
and r13d, 4294967280
cmp ebx, r13d
jge L_AES_GCM_decrypt_avx2_last_block_done
L_AES_GCM_decrypt_avx2_last_block_start:
vmovdqu xmm11, OWORD PTR [rdi+rbx]
vpshufb xmm10, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpshufb xmm12, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
vpxor xmm12, xmm12, xmm6
; aesenc_gfmul_sb
vpclmulqdq xmm2, xmm12, xmm5, 1
vpclmulqdq xmm3, xmm12, xmm5, 16
vpclmulqdq xmm1, xmm12, xmm5, 0
vpclmulqdq xmm8, xmm12, xmm5, 17
vpxor xmm10, xmm10, [rsi]
vaesenc xmm10, xmm10, [rsi+16]
vpxor xmm3, xmm3, xmm2
vpslldq xmm2, xmm3, 8
vpsrldq xmm3, xmm3, 8
vaesenc xmm10, xmm10, [rsi+32]
vpxor xmm2, xmm2, xmm1
vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vaesenc xmm10, xmm10, [rsi+48]
vaesenc xmm10, xmm10, [rsi+64]
vaesenc xmm10, xmm10, [rsi+80]
vpshufd xmm2, xmm2, 78
vpxor xmm2, xmm2, xmm1
vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vaesenc xmm10, xmm10, [rsi+96]
vaesenc xmm10, xmm10, [rsi+112]
vaesenc xmm10, xmm10, [rsi+128]
vpshufd xmm2, xmm2, 78
vaesenc xmm10, xmm10, [rsi+144]
vpxor xmm8, xmm8, xmm3
vpxor xmm2, xmm2, xmm8
vmovdqu xmm0, OWORD PTR [rsi+160]
cmp r9d, 11
jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last
vaesenc xmm10, xmm10, xmm0
vaesenc xmm10, xmm10, [rsi+176]
vmovdqu xmm0, OWORD PTR [rsi+192]
cmp r9d, 13
jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last
vaesenc xmm10, xmm10, xmm0
vaesenc xmm10, xmm10, [rsi+208]
vmovdqu xmm0, OWORD PTR [rsi+224]
L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last:
vaesenclast xmm10, xmm10, xmm0
vpxor xmm6, xmm2, xmm1
vpxor xmm10, xmm10, xmm11
vmovdqu OWORD PTR [r8+rbx], xmm10
add ebx, 16
cmp ebx, r13d
jl L_AES_GCM_decrypt_avx2_last_block_start
L_AES_GCM_decrypt_avx2_last_block_done:
mov ecx, r10d
mov edx, r10d
and ecx, 15
jz L_AES_GCM_decrypt_avx2_done_dec
; aesenc_last15_dec
vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpxor xmm4, xmm4, [rsi]
vaesenc xmm4, xmm4, [rsi+16]
vaesenc xmm4, xmm4, [rsi+32]
vaesenc xmm4, xmm4, [rsi+48]
vaesenc xmm4, xmm4, [rsi+64]
vaesenc xmm4, xmm4, [rsi+80]
vaesenc xmm4, xmm4, [rsi+96]
vaesenc xmm4, xmm4, [rsi+112]
vaesenc xmm4, xmm4, [rsi+128]
vaesenc xmm4, xmm4, [rsi+144]
cmp r9d, 11
vmovdqu xmm1, OWORD PTR [rsi+160]
jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last
vaesenc xmm4, xmm4, xmm1
vaesenc xmm4, xmm4, [rsi+176]
cmp r9d, 13
vmovdqu xmm1, OWORD PTR [rsi+192]
jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last
vaesenc xmm4, xmm4, xmm1
vaesenc xmm4, xmm4, [rsi+208]
vmovdqu xmm1, OWORD PTR [rsi+224]
L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last:
vaesenclast xmm4, xmm4, xmm1
xor ecx, ecx
vpxor xmm0, xmm0, xmm0
vmovdqu OWORD PTR [rsp], xmm4
vmovdqu OWORD PTR [rsp+16], xmm0
L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop:
movzx r13d, BYTE PTR [rdi+rbx]
mov BYTE PTR [rsp+rcx+16], r13b
xor r13b, BYTE PTR [rsp+rcx]
mov BYTE PTR [r8+rbx], r13b
inc ebx
inc ecx
cmp ebx, edx
jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop
vmovdqu xmm4, OWORD PTR [rsp+16]
vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm4
; ghash_gfmul_red
vpclmulqdq xmm2, xmm6, xmm5, 16
vpclmulqdq xmm1, xmm6, xmm5, 1
vpclmulqdq xmm0, xmm6, xmm5, 0
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm6, xmm6, xmm5, 17
vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm1, xmm1, 78
vpxor xmm6, xmm6, xmm2
vpxor xmm6, xmm6, xmm1
vpxor xmm6, xmm6, xmm0
L_AES_GCM_decrypt_avx2_done_dec:
; calc_tag
shl r10, 3
shl r11, 3
vmovq xmm0, r10
vmovq xmm1, r11
vpunpcklqdq xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm6
; ghash_gfmul_red
vpclmulqdq xmm4, xmm0, xmm5, 16
vpclmulqdq xmm3, xmm0, xmm5, 1
vpclmulqdq xmm2, xmm0, xmm5, 0
vpxor xmm4, xmm4, xmm3
vpslldq xmm3, xmm4, 8
vpsrldq xmm4, xmm4, 8
vpxor xmm3, xmm3, xmm2
vpclmulqdq xmm0, xmm0, xmm5, 17
vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm3, xmm3, 78
vpxor xmm3, xmm3, xmm2
vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm3, xmm3, 78
vpxor xmm0, xmm0, xmm4
vpxor xmm0, xmm0, xmm3
vpxor xmm0, xmm0, xmm2
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm0, xmm0, xmm15
; cmp_tag
cmp r15d, 16
je L_AES_GCM_decrypt_avx2_cmp_tag_16
xor rdx, rdx
xor rax, rax
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_decrypt_avx2_cmp_tag_loop:
movzx r13d, BYTE PTR [rsp+rdx]
xor r13b, BYTE PTR [r14+rdx]
or al, r13b
inc edx
cmp edx, r15d
jne L_AES_GCM_decrypt_avx2_cmp_tag_loop
cmp al, 0
sete al
jmp L_AES_GCM_decrypt_avx2_cmp_tag_done
L_AES_GCM_decrypt_avx2_cmp_tag_16:
vmovdqu xmm1, OWORD PTR [r14]
vpcmpeqb xmm0, xmm0, xmm1
vpmovmskb rdx, xmm0
; %%edx == 0xFFFF then return 1 else => return 0
xor eax, eax
cmp edx, 65535
sete al
L_AES_GCM_decrypt_avx2_cmp_tag_done:
mov DWORD PTR [rbp], eax
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+168]
vmovdqu xmm7, OWORD PTR [rsp+184]
vmovdqu xmm8, OWORD PTR [rsp+200]
vmovdqu xmm9, OWORD PTR [rsp+216]
vmovdqu xmm10, OWORD PTR [rsp+232]
vmovdqu xmm11, OWORD PTR [rsp+248]
vmovdqu xmm12, OWORD PTR [rsp+264]
vmovdqu xmm13, OWORD PTR [rsp+280]
vmovdqu xmm14, OWORD PTR [rsp+296]
vmovdqu xmm15, OWORD PTR [rsp+312]
add rsp, 328
pop rbp
pop rsi
pop r15
pop rbx
pop r14
pop r12
pop rdi
pop r13
ret
AES_GCM_decrypt_avx2 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_init_avx2 PROC
push rbx
push rdi
push rsi
push r12
mov rdi, rcx
mov rsi, rdx
mov r10, r8
mov r11d, r9d
mov rax, QWORD PTR [rsp+72]
mov r8, QWORD PTR [rsp+80]
mov r9, QWORD PTR [rsp+88]
sub rsp, 48
vmovdqu OWORD PTR [rsp+16], xmm6
vmovdqu OWORD PTR [rsp+32], xmm7
vpxor xmm4, xmm4, xmm4
mov edx, r11d
cmp edx, 12
je L_AES_GCM_init_avx2_iv_12
; Calculate values when IV is not 12 bytes
; H = Encrypt X(=0)
vmovdqu xmm5, OWORD PTR [rdi]
vaesenc xmm5, xmm5, [rdi+16]
vaesenc xmm5, xmm5, [rdi+32]
vaesenc xmm5, xmm5, [rdi+48]
vaesenc xmm5, xmm5, [rdi+64]
vaesenc xmm5, xmm5, [rdi+80]
vaesenc xmm5, xmm5, [rdi+96]
vaesenc xmm5, xmm5, [rdi+112]
vaesenc xmm5, xmm5, [rdi+128]
vaesenc xmm5, xmm5, [rdi+144]
cmp esi, 11
vmovdqu xmm0, OWORD PTR [rdi+160]
jl L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm5, xmm5, [rdi+176]
cmp esi, 13
vmovdqu xmm0, OWORD PTR [rdi+192]
jl L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm5, xmm5, [rdi+208]
vmovdqu xmm0, OWORD PTR [rdi+224]
L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last:
vaesenclast xmm5, xmm5, xmm0
vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
; Calc counter
; Initialization vector
cmp edx, 0
mov rcx, 0
je L_AES_GCM_init_avx2_calc_iv_done
cmp edx, 16
jl L_AES_GCM_init_avx2_calc_iv_lt16
and edx, 4294967280
L_AES_GCM_init_avx2_calc_iv_16_loop:
vmovdqu xmm0, OWORD PTR [r10+rcx]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm4, 16
vpclmulqdq xmm1, xmm5, xmm4, 1
vpclmulqdq xmm0, xmm5, xmm4, 0
vpclmulqdq xmm3, xmm5, xmm4, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm6, xmm0, xmm1
vpxor xmm4, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm6, 31
vpsrld xmm1, xmm4, 31
vpslld xmm6, xmm6, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm6, xmm6, xmm0
vpor xmm4, xmm4, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm6, xmm2, 16
vpshufd xmm1, xmm6, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm4, xmm1
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_init_avx2_calc_iv_16_loop
mov edx, r11d
cmp ecx, edx
je L_AES_GCM_init_avx2_calc_iv_done
L_AES_GCM_init_avx2_calc_iv_lt16:
vpxor xmm0, xmm0, xmm0
xor ebx, ebx
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_init_avx2_calc_iv_loop:
movzx r12d, BYTE PTR [r10+rcx]
mov BYTE PTR [rsp+rbx], r12b
inc ecx
inc ebx
cmp ecx, edx
jl L_AES_GCM_init_avx2_calc_iv_loop
vmovdqu xmm0, OWORD PTR [rsp]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm4, 16
vpclmulqdq xmm1, xmm5, xmm4, 1
vpclmulqdq xmm0, xmm5, xmm4, 0
vpclmulqdq xmm3, xmm5, xmm4, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm6, xmm0, xmm1
vpxor xmm4, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm6, 31
vpsrld xmm1, xmm4, 31
vpslld xmm6, xmm6, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm6, xmm6, xmm0
vpor xmm4, xmm4, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm6, xmm2, 16
vpshufd xmm1, xmm6, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm4, xmm1
L_AES_GCM_init_avx2_calc_iv_done:
; T = Encrypt counter
vpxor xmm0, xmm0, xmm0
shl edx, 3
vmovq xmm0, rdx
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm4, 16
vpclmulqdq xmm1, xmm5, xmm4, 1
vpclmulqdq xmm0, xmm5, xmm4, 0
vpclmulqdq xmm3, xmm5, xmm4, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm6, xmm0, xmm1
vpxor xmm4, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm6, 31
vpsrld xmm1, xmm4, 31
vpslld xmm6, xmm6, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm6, xmm6, xmm0
vpor xmm4, xmm4, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm6, xmm2, 16
vpshufd xmm1, xmm6, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm4, xmm1
vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
; Encrypt counter
vmovdqu xmm7, OWORD PTR [rdi]
vpxor xmm7, xmm7, xmm4
vaesenc xmm7, xmm7, [rdi+16]
vaesenc xmm7, xmm7, [rdi+32]
vaesenc xmm7, xmm7, [rdi+48]
vaesenc xmm7, xmm7, [rdi+64]
vaesenc xmm7, xmm7, [rdi+80]
vaesenc xmm7, xmm7, [rdi+96]
vaesenc xmm7, xmm7, [rdi+112]
vaesenc xmm7, xmm7, [rdi+128]
vaesenc xmm7, xmm7, [rdi+144]
cmp esi, 11
vmovdqu xmm0, OWORD PTR [rdi+160]
jl L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last
vaesenc xmm7, xmm7, xmm0
vaesenc xmm7, xmm7, [rdi+176]
cmp esi, 13
vmovdqu xmm0, OWORD PTR [rdi+192]
jl L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last
vaesenc xmm7, xmm7, xmm0
vaesenc xmm7, xmm7, [rdi+208]
vmovdqu xmm0, OWORD PTR [rdi+224]
L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last:
vaesenclast xmm7, xmm7, xmm0
jmp L_AES_GCM_init_avx2_iv_done
L_AES_GCM_init_avx2_iv_12:
; # Calculate values when IV is 12 bytes
; Set counter based on IV
vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_one
vmovdqu xmm5, OWORD PTR [rdi]
vpblendd xmm4, xmm4, [r10], 7
; H = Encrypt X(=0) and T = Encrypt counter
vmovdqu xmm6, OWORD PTR [rdi+16]
vpxor xmm7, xmm4, xmm5
vaesenc xmm5, xmm5, xmm6
vaesenc xmm7, xmm7, xmm6
vmovdqu xmm0, OWORD PTR [rdi+32]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
vmovdqu xmm0, OWORD PTR [rdi+48]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
vmovdqu xmm0, OWORD PTR [rdi+64]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
vmovdqu xmm0, OWORD PTR [rdi+80]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
vmovdqu xmm0, OWORD PTR [rdi+96]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
vmovdqu xmm0, OWORD PTR [rdi+112]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
vmovdqu xmm0, OWORD PTR [rdi+128]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
vmovdqu xmm0, OWORD PTR [rdi+144]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
cmp esi, 11
vmovdqu xmm0, OWORD PTR [rdi+160]
jl L_AES_GCM_init_avx2_calc_iv_12_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
vmovdqu xmm0, OWORD PTR [rdi+176]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
cmp esi, 13
vmovdqu xmm0, OWORD PTR [rdi+192]
jl L_AES_GCM_init_avx2_calc_iv_12_last
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
vmovdqu xmm0, OWORD PTR [rdi+208]
vaesenc xmm5, xmm5, xmm0
vaesenc xmm7, xmm7, xmm0
vmovdqu xmm0, OWORD PTR [rdi+224]
L_AES_GCM_init_avx2_calc_iv_12_last:
vaesenclast xmm5, xmm5, xmm0
vaesenclast xmm7, xmm7, xmm0
vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
L_AES_GCM_init_avx2_iv_done:
vmovdqu OWORD PTR [r9], xmm7
vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
vmovdqu OWORD PTR [rax], xmm5
vmovdqu OWORD PTR [r8], xmm4
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+16]
vmovdqu xmm7, OWORD PTR [rsp+32]
add rsp, 48
pop r12
pop rsi
pop rdi
pop rbx
ret
AES_GCM_init_avx2 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_aad_update_avx2 PROC
mov rax, rcx
sub rsp, 16
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu xmm4, OWORD PTR [r8]
vmovdqu xmm5, OWORD PTR [r9]
xor ecx, ecx
L_AES_GCM_aad_update_avx2_16_loop:
vmovdqu xmm0, OWORD PTR [rax+rcx]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm4, 16
vpclmulqdq xmm1, xmm5, xmm4, 1
vpclmulqdq xmm0, xmm5, xmm4, 0
vpclmulqdq xmm3, xmm5, xmm4, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm6, xmm0, xmm1
vpxor xmm4, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm6, 31
vpsrld xmm1, xmm4, 31
vpslld xmm6, xmm6, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm6, xmm6, xmm0
vpor xmm4, xmm4, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm6, xmm2, 16
vpshufd xmm1, xmm6, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm4, xmm1
add ecx, 16
cmp ecx, edx
jl L_AES_GCM_aad_update_avx2_16_loop
vmovdqu OWORD PTR [r8], xmm4
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp]
add rsp, 16
ret
AES_GCM_aad_update_avx2 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_block_avx2 PROC
mov r10, r8
mov r11, r9
mov rax, QWORD PTR [rsp+40]
sub rsp, 152
vmovdqu xmm3, OWORD PTR [rax]
; aesenc_block
vmovdqu xmm1, xmm3
vpshufb xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_one
vpxor xmm0, xmm0, [rcx]
vmovdqu xmm2, OWORD PTR [rcx+16]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rcx+32]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rcx+48]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rcx+64]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rcx+80]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rcx+96]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rcx+112]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rcx+128]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rcx+144]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm3, xmm1
cmp edx, 11
vmovdqu xmm1, OWORD PTR [rcx+160]
jl L_AES_GCM_encrypt_block_avx2_aesenc_block_last
vaesenc xmm0, xmm0, xmm1
vmovdqu xmm2, OWORD PTR [rcx+176]
vaesenc xmm0, xmm0, xmm2
cmp edx, 13
vmovdqu xmm1, OWORD PTR [rcx+192]
jl L_AES_GCM_encrypt_block_avx2_aesenc_block_last
vaesenc xmm0, xmm0, xmm1
vmovdqu xmm2, OWORD PTR [rcx+208]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm1, OWORD PTR [rcx+224]
L_AES_GCM_encrypt_block_avx2_aesenc_block_last:
vaesenclast xmm0, xmm0, xmm1
vmovdqu xmm1, OWORD PTR [r11]
vpxor xmm0, xmm0, xmm1
vmovdqu OWORD PTR [r10], xmm0
vmovdqu OWORD PTR [rax], xmm3
vzeroupper
add rsp, 152
ret
AES_GCM_encrypt_block_avx2 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_ghash_block_avx2 PROC
sub rsp, 16
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu xmm4, OWORD PTR [rdx]
vmovdqu xmm5, OWORD PTR [r8]
vmovdqu xmm0, OWORD PTR [rcx]
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm4, xmm4, xmm0
; ghash_gfmul_avx
vpclmulqdq xmm2, xmm5, xmm4, 16
vpclmulqdq xmm1, xmm5, xmm4, 1
vpclmulqdq xmm0, xmm5, xmm4, 0
vpclmulqdq xmm3, xmm5, xmm4, 17
vpxor xmm2, xmm2, xmm1
vpslldq xmm1, xmm2, 8
vpsrldq xmm2, xmm2, 8
vpxor xmm6, xmm0, xmm1
vpxor xmm4, xmm3, xmm2
; ghash_mid
vpsrld xmm0, xmm6, 31
vpsrld xmm1, xmm4, 31
vpslld xmm6, xmm6, 1
vpslld xmm4, xmm4, 1
vpsrldq xmm2, xmm0, 12
vpslldq xmm0, xmm0, 4
vpslldq xmm1, xmm1, 4
vpor xmm4, xmm4, xmm2
vpor xmm6, xmm6, xmm0
vpor xmm4, xmm4, xmm1
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm6, xmm2, 16
vpshufd xmm1, xmm6, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm4, xmm1
vmovdqu OWORD PTR [rdx], xmm4
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp]
add rsp, 16
ret
AES_GCM_ghash_block_avx2 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_update_avx2 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, rcx
mov r10, r8
mov r8d, edx
mov r11, r9
mov r9d, DWORD PTR [rsp+80]
mov r12, QWORD PTR [rsp+88]
mov r13, QWORD PTR [rsp+96]
mov r14, QWORD PTR [rsp+104]
sub rsp, 312
vmovdqu OWORD PTR [rsp+152], xmm6
vmovdqu OWORD PTR [rsp+168], xmm7
vmovdqu OWORD PTR [rsp+184], xmm8
vmovdqu OWORD PTR [rsp+200], xmm9
vmovdqu OWORD PTR [rsp+216], xmm10
vmovdqu OWORD PTR [rsp+232], xmm11
vmovdqu OWORD PTR [rsp+248], xmm12
vmovdqu OWORD PTR [rsp+264], xmm13
vmovdqu OWORD PTR [rsp+280], xmm14
vmovdqu OWORD PTR [rsp+296], xmm15
vmovdqu xmm6, OWORD PTR [r12]
vmovdqu xmm5, OWORD PTR [r13]
vmovdqu xmm4, OWORD PTR [r14]
vpsrlq xmm1, xmm5, 63
vpsllq xmm0, xmm5, 1
vpslldq xmm1, xmm1, 8
vpor xmm0, xmm0, xmm1
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
vpxor xmm5, xmm5, xmm0
xor edi, edi
cmp r9d, 128
mov r15d, r9d
jl L_AES_GCM_encrypt_update_avx2_done_128
and r15d, 4294967168
vmovdqu OWORD PTR [rsp+128], xmm4
vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
; H ^ 1 and H ^ 2
vpclmulqdq xmm9, xmm5, xmm5, 0
vpclmulqdq xmm10, xmm5, xmm5, 17
vpclmulqdq xmm8, xmm9, xmm3, 16
vpshufd xmm9, xmm9, 78
vpxor xmm9, xmm9, xmm8
vpclmulqdq xmm8, xmm9, xmm3, 16
vpshufd xmm9, xmm9, 78
vpxor xmm9, xmm9, xmm8
vpxor xmm0, xmm10, xmm9
vmovdqu OWORD PTR [rsp], xmm5
vmovdqu OWORD PTR [rsp+16], xmm0
; H ^ 3 and H ^ 4
vpclmulqdq xmm11, xmm0, xmm5, 16
vpclmulqdq xmm10, xmm0, xmm5, 1
vpclmulqdq xmm9, xmm0, xmm5, 0
vpclmulqdq xmm12, xmm0, xmm5, 17
vpclmulqdq xmm13, xmm0, xmm0, 0
vpclmulqdq xmm14, xmm0, xmm0, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm2, xmm13, xmm14
vpxor xmm1, xmm10, xmm9
vmovdqu OWORD PTR [rsp+32], xmm1
vmovdqu OWORD PTR [rsp+48], xmm2
; H ^ 5 and H ^ 6
vpclmulqdq xmm11, xmm1, xmm0, 16
vpclmulqdq xmm10, xmm1, xmm0, 1
vpclmulqdq xmm9, xmm1, xmm0, 0
vpclmulqdq xmm12, xmm1, xmm0, 17
vpclmulqdq xmm13, xmm1, xmm1, 0
vpclmulqdq xmm14, xmm1, xmm1, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm0, xmm13, xmm14
vpxor xmm7, xmm10, xmm9
vmovdqu OWORD PTR [rsp+64], xmm7
vmovdqu OWORD PTR [rsp+80], xmm0
; H ^ 7 and H ^ 8
vpclmulqdq xmm11, xmm2, xmm1, 16
vpclmulqdq xmm10, xmm2, xmm1, 1
vpclmulqdq xmm9, xmm2, xmm1, 0
vpclmulqdq xmm12, xmm2, xmm1, 17
vpclmulqdq xmm13, xmm2, xmm2, 0
vpclmulqdq xmm14, xmm2, xmm2, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm0, xmm13, xmm14
vpxor xmm7, xmm10, xmm9
vmovdqu OWORD PTR [rsp+96], xmm7
vmovdqu OWORD PTR [rsp+112], xmm0
; First 128 bytes of input
; aesenc_128
; aesenc_ctr
vmovdqu xmm0, OWORD PTR [rsp+128]
vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
vpshufb xmm8, xmm0, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
vpshufb xmm9, xmm9, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
vpshufb xmm10, xmm10, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
vpshufb xmm11, xmm11, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
vpshufb xmm12, xmm12, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
vpshufb xmm13, xmm13, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
vpshufb xmm14, xmm14, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
vpshufb xmm15, xmm15, xmm1
; aesenc_xor
vmovdqu xmm7, OWORD PTR [rax]
vmovdqu OWORD PTR [rsp+128], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+16]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+32]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+48]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+64]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+80]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+96]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+112]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+128]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+144]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r8d, 11
vmovdqu xmm7, OWORD PTR [rax+160]
jl L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r8d, 13
vmovdqu xmm7, OWORD PTR [rax+192]
jl L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done:
; aesenc_last
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [r11]
vmovdqu xmm1, OWORD PTR [r11+16]
vmovdqu xmm2, OWORD PTR [r11+32]
vmovdqu xmm3, OWORD PTR [r11+48]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vpxor xmm10, xmm10, xmm2
vpxor xmm11, xmm11, xmm3
vmovdqu OWORD PTR [r10], xmm8
vmovdqu OWORD PTR [r10+16], xmm9
vmovdqu OWORD PTR [r10+32], xmm10
vmovdqu OWORD PTR [r10+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [r11+64]
vmovdqu xmm1, OWORD PTR [r11+80]
vmovdqu xmm2, OWORD PTR [r11+96]
vmovdqu xmm3, OWORD PTR [r11+112]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vpxor xmm14, xmm14, xmm2
vpxor xmm15, xmm15, xmm3
vmovdqu OWORD PTR [r10+64], xmm12
vmovdqu OWORD PTR [r10+80], xmm13
vmovdqu OWORD PTR [r10+96], xmm14
vmovdqu OWORD PTR [r10+112], xmm15
cmp r15d, 128
mov edi, 128
jle L_AES_GCM_encrypt_update_avx2_end_128
; More 128 bytes of input
L_AES_GCM_encrypt_update_avx2_ghash_128:
; aesenc_128_ghash
lea rcx, QWORD PTR [r11+rdi]
lea rdx, QWORD PTR [r10+rdi]
; aesenc_ctr
vmovdqu xmm0, OWORD PTR [rsp+128]
vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
vpshufb xmm8, xmm0, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
vpshufb xmm9, xmm9, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
vpshufb xmm10, xmm10, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
vpshufb xmm11, xmm11, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
vpshufb xmm12, xmm12, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
vpshufb xmm13, xmm13, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
vpshufb xmm14, xmm14, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
vpshufb xmm15, xmm15, xmm1
; aesenc_xor
vmovdqu xmm7, OWORD PTR [rax]
vmovdqu OWORD PTR [rsp+128], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
; aesenc_pclmul_1
vmovdqu xmm1, OWORD PTR [rdx+-128]
vmovdqu xmm0, OWORD PTR [rax+16]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vmovdqu xmm2, OWORD PTR [rsp+112]
vpxor xmm1, xmm1, xmm6
vpclmulqdq xmm5, xmm1, xmm2, 16
vpclmulqdq xmm3, xmm1, xmm2, 1
vpclmulqdq xmm6, xmm1, xmm2, 0
vpclmulqdq xmm7, xmm1, xmm2, 17
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_2
vmovdqu xmm1, OWORD PTR [rdx+-112]
vmovdqu xmm0, OWORD PTR [rsp+96]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm2, xmm1, xmm0, 16
vpclmulqdq xmm3, xmm1, xmm0, 1
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+32]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-96]
vmovdqu xmm0, OWORD PTR [rsp+80]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+48]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-80]
vmovdqu xmm0, OWORD PTR [rsp+64]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+64]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-64]
vmovdqu xmm0, OWORD PTR [rsp+48]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+80]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-48]
vmovdqu xmm0, OWORD PTR [rsp+32]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+96]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-32]
vmovdqu xmm0, OWORD PTR [rsp+16]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+112]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rdx+-16]
vmovdqu xmm0, OWORD PTR [rsp]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+128]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_l
vpxor xmm5, xmm5, xmm2
vpxor xmm6, xmm6, xmm4
vpxor xmm5, xmm5, xmm3
vpslldq xmm1, xmm5, 8
vpsrldq xmm5, xmm5, 8
vmovdqu xmm4, OWORD PTR [rax+144]
vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
vaesenc xmm8, xmm8, xmm4
vpxor xmm6, xmm6, xmm1
vpxor xmm7, xmm7, xmm5
vpclmulqdq xmm3, xmm6, xmm0, 16
vaesenc xmm9, xmm9, xmm4
vaesenc xmm10, xmm10, xmm4
vaesenc xmm11, xmm11, xmm4
vpshufd xmm6, xmm6, 78
vpxor xmm6, xmm6, xmm3
vpclmulqdq xmm3, xmm6, xmm0, 16
vaesenc xmm12, xmm12, xmm4
vaesenc xmm13, xmm13, xmm4
vaesenc xmm14, xmm14, xmm4
vpshufd xmm6, xmm6, 78
vpxor xmm6, xmm6, xmm3
vpxor xmm6, xmm6, xmm7
vaesenc xmm15, xmm15, xmm4
cmp r8d, 11
vmovdqu xmm7, OWORD PTR [rax+160]
jl L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r8d, 13
vmovdqu xmm7, OWORD PTR [rax+192]
jl L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done:
; aesenc_last
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu xmm1, OWORD PTR [rcx+16]
vmovdqu xmm2, OWORD PTR [rcx+32]
vmovdqu xmm3, OWORD PTR [rcx+48]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vpxor xmm10, xmm10, xmm2
vpxor xmm11, xmm11, xmm3
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu OWORD PTR [rdx+16], xmm9
vmovdqu OWORD PTR [rdx+32], xmm10
vmovdqu OWORD PTR [rdx+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rcx+64]
vmovdqu xmm1, OWORD PTR [rcx+80]
vmovdqu xmm2, OWORD PTR [rcx+96]
vmovdqu xmm3, OWORD PTR [rcx+112]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vpxor xmm14, xmm14, xmm2
vpxor xmm15, xmm15, xmm3
vmovdqu OWORD PTR [rdx+64], xmm12
vmovdqu OWORD PTR [rdx+80], xmm13
vmovdqu OWORD PTR [rdx+96], xmm14
vmovdqu OWORD PTR [rdx+112], xmm15
; aesenc_128_ghash - end
add edi, 128
cmp edi, r15d
jl L_AES_GCM_encrypt_update_avx2_ghash_128
L_AES_GCM_encrypt_update_avx2_end_128:
vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpshufb xmm8, xmm8, xmm4
vpshufb xmm9, xmm9, xmm4
vpshufb xmm10, xmm10, xmm4
vpshufb xmm11, xmm11, xmm4
vpshufb xmm12, xmm12, xmm4
vpshufb xmm13, xmm13, xmm4
vpshufb xmm14, xmm14, xmm4
vpshufb xmm15, xmm15, xmm4
vpxor xmm8, xmm8, xmm6
vmovdqu xmm7, OWORD PTR [rsp]
vpclmulqdq xmm5, xmm7, xmm15, 16
vpclmulqdq xmm1, xmm7, xmm15, 1
vpclmulqdq xmm4, xmm7, xmm15, 0
vpclmulqdq xmm6, xmm7, xmm15, 17
vpxor xmm5, xmm5, xmm1
vmovdqu xmm7, OWORD PTR [rsp+16]
vpclmulqdq xmm2, xmm7, xmm14, 16
vpclmulqdq xmm1, xmm7, xmm14, 1
vpclmulqdq xmm0, xmm7, xmm14, 0
vpclmulqdq xmm3, xmm7, xmm14, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vmovdqu xmm15, OWORD PTR [rsp+32]
vmovdqu xmm7, OWORD PTR [rsp+48]
vpclmulqdq xmm2, xmm15, xmm13, 16
vpclmulqdq xmm1, xmm15, xmm13, 1
vpclmulqdq xmm0, xmm15, xmm13, 0
vpclmulqdq xmm3, xmm15, xmm13, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vpclmulqdq xmm2, xmm7, xmm12, 16
vpclmulqdq xmm1, xmm7, xmm12, 1
vpclmulqdq xmm0, xmm7, xmm12, 0
vpclmulqdq xmm3, xmm7, xmm12, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vmovdqu xmm15, OWORD PTR [rsp+64]
vmovdqu xmm7, OWORD PTR [rsp+80]
vpclmulqdq xmm2, xmm15, xmm11, 16
vpclmulqdq xmm1, xmm15, xmm11, 1
vpclmulqdq xmm0, xmm15, xmm11, 0
vpclmulqdq xmm3, xmm15, xmm11, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vpclmulqdq xmm2, xmm7, xmm10, 16
vpclmulqdq xmm1, xmm7, xmm10, 1
vpclmulqdq xmm0, xmm7, xmm10, 0
vpclmulqdq xmm3, xmm7, xmm10, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vmovdqu xmm15, OWORD PTR [rsp+96]
vmovdqu xmm7, OWORD PTR [rsp+112]
vpclmulqdq xmm2, xmm15, xmm9, 16
vpclmulqdq xmm1, xmm15, xmm9, 1
vpclmulqdq xmm0, xmm15, xmm9, 0
vpclmulqdq xmm3, xmm15, xmm9, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vpclmulqdq xmm2, xmm7, xmm8, 16
vpclmulqdq xmm1, xmm7, xmm8, 1
vpclmulqdq xmm0, xmm7, xmm8, 0
vpclmulqdq xmm3, xmm7, xmm8, 17
vpxor xmm2, xmm2, xmm1
vpxor xmm6, xmm6, xmm3
vpxor xmm5, xmm5, xmm2
vpxor xmm4, xmm4, xmm0
vpslldq xmm7, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpxor xmm4, xmm4, xmm7
vpxor xmm6, xmm6, xmm5
; ghash_red
vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
vpclmulqdq xmm0, xmm4, xmm2, 16
vpshufd xmm1, xmm4, 78
vpxor xmm1, xmm1, xmm0
vpclmulqdq xmm0, xmm1, xmm2, 16
vpshufd xmm1, xmm1, 78
vpxor xmm1, xmm1, xmm0
vpxor xmm6, xmm6, xmm1
vmovdqu xmm5, OWORD PTR [rsp]
vmovdqu xmm4, OWORD PTR [rsp+128]
L_AES_GCM_encrypt_update_avx2_done_128:
cmp edi, r9d
je L_AES_GCM_encrypt_update_avx2_done_enc
mov r15d, r9d
and r15d, 4294967280
cmp edi, r15d
jge L_AES_GCM_encrypt_update_avx2_last_block_done
; aesenc_block
vmovdqu xmm1, xmm4
vpshufb xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_one
vpxor xmm0, xmm0, [rax]
vmovdqu xmm2, OWORD PTR [rax+16]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rax+32]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rax+48]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rax+64]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rax+80]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rax+96]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rax+112]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rax+128]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rax+144]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm4, xmm1
cmp r8d, 11
vmovdqu xmm1, OWORD PTR [rax+160]
jl L_AES_GCM_encrypt_update_avx2_aesenc_block_last
vaesenc xmm0, xmm0, xmm1
vmovdqu xmm2, OWORD PTR [rax+176]
vaesenc xmm0, xmm0, xmm2
cmp r8d, 13
vmovdqu xmm1, OWORD PTR [rax+192]
jl L_AES_GCM_encrypt_update_avx2_aesenc_block_last
vaesenc xmm0, xmm0, xmm1
vmovdqu xmm2, OWORD PTR [rax+208]
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm1, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_avx2_aesenc_block_last:
vaesenclast xmm0, xmm0, xmm1
vmovdqu xmm1, OWORD PTR [r11+rdi]
vpxor xmm0, xmm0, xmm1
vmovdqu OWORD PTR [r10+rdi], xmm0
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm0
add edi, 16
cmp edi, r15d
jge L_AES_GCM_encrypt_update_avx2_last_block_ghash
L_AES_GCM_encrypt_update_avx2_last_block_start:
vmovdqu xmm12, OWORD PTR [r11+rdi]
vpshufb xmm11, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
; aesenc_gfmul_sb
vpclmulqdq xmm2, xmm6, xmm5, 1
vpclmulqdq xmm3, xmm6, xmm5, 16
vpclmulqdq xmm1, xmm6, xmm5, 0
vpclmulqdq xmm8, xmm6, xmm5, 17
vpxor xmm11, xmm11, [rax]
vaesenc xmm11, xmm11, [rax+16]
vpxor xmm3, xmm3, xmm2
vpslldq xmm2, xmm3, 8
vpsrldq xmm3, xmm3, 8
vaesenc xmm11, xmm11, [rax+32]
vpxor xmm2, xmm2, xmm1
vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vaesenc xmm11, xmm11, [rax+48]
vaesenc xmm11, xmm11, [rax+64]
vaesenc xmm11, xmm11, [rax+80]
vpshufd xmm2, xmm2, 78
vpxor xmm2, xmm2, xmm1
vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vaesenc xmm11, xmm11, [rax+96]
vaesenc xmm11, xmm11, [rax+112]
vaesenc xmm11, xmm11, [rax+128]
vpshufd xmm2, xmm2, 78
vaesenc xmm11, xmm11, [rax+144]
vpxor xmm8, xmm8, xmm3
vpxor xmm2, xmm2, xmm8
vmovdqu xmm0, OWORD PTR [rax+160]
cmp r8d, 11
jl L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last
vaesenc xmm11, xmm11, xmm0
vaesenc xmm11, xmm11, [rax+176]
vmovdqu xmm0, OWORD PTR [rax+192]
cmp r8d, 13
jl L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last
vaesenc xmm11, xmm11, xmm0
vaesenc xmm11, xmm11, [rax+208]
vmovdqu xmm0, OWORD PTR [rax+224]
L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last:
vaesenclast xmm11, xmm11, xmm0
vpxor xmm6, xmm2, xmm1
vpxor xmm11, xmm11, xmm12
vmovdqu OWORD PTR [r10+rdi], xmm11
vpshufb xmm11, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm6, xmm6, xmm11
add edi, 16
cmp edi, r15d
jl L_AES_GCM_encrypt_update_avx2_last_block_start
L_AES_GCM_encrypt_update_avx2_last_block_ghash:
; ghash_gfmul_red
vpclmulqdq xmm10, xmm6, xmm5, 16
vpclmulqdq xmm9, xmm6, xmm5, 1
vpclmulqdq xmm8, xmm6, xmm5, 0
vpxor xmm10, xmm10, xmm9
vpslldq xmm9, xmm10, 8
vpsrldq xmm10, xmm10, 8
vpxor xmm9, xmm9, xmm8
vpclmulqdq xmm6, xmm6, xmm5, 17
vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm9, xmm9, 78
vpxor xmm9, xmm9, xmm8
vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm9, xmm9, 78
vpxor xmm6, xmm6, xmm10
vpxor xmm6, xmm6, xmm9
vpxor xmm6, xmm6, xmm8
L_AES_GCM_encrypt_update_avx2_last_block_done:
L_AES_GCM_encrypt_update_avx2_done_enc:
vmovdqu OWORD PTR [r12], xmm6
vmovdqu OWORD PTR [r14], xmm4
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+152]
vmovdqu xmm7, OWORD PTR [rsp+168]
vmovdqu xmm8, OWORD PTR [rsp+184]
vmovdqu xmm9, OWORD PTR [rsp+200]
vmovdqu xmm10, OWORD PTR [rsp+216]
vmovdqu xmm11, OWORD PTR [rsp+232]
vmovdqu xmm12, OWORD PTR [rsp+248]
vmovdqu xmm13, OWORD PTR [rsp+264]
vmovdqu xmm14, OWORD PTR [rsp+280]
vmovdqu xmm15, OWORD PTR [rsp+296]
add rsp, 312
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
AES_GCM_encrypt_update_avx2 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_encrypt_final_avx2 PROC
push r12
push r13
mov eax, DWORD PTR [rsp+56]
mov r10, QWORD PTR [rsp+64]
mov r11, QWORD PTR [rsp+72]
sub rsp, 48
vmovdqu OWORD PTR [rsp+16], xmm6
vmovdqu OWORD PTR [rsp+32], xmm7
vmovdqu xmm4, OWORD PTR [rcx]
vmovdqu xmm5, OWORD PTR [r10]
vmovdqu xmm6, OWORD PTR [r11]
vpsrlq xmm1, xmm5, 63
vpsllq xmm0, xmm5, 1
vpslldq xmm1, xmm1, 8
vpor xmm0, xmm0, xmm1
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
vpxor xmm5, xmm5, xmm0
; calc_tag
shl r9, 3
shl rax, 3
vmovq xmm0, r9
vmovq xmm1, rax
vpunpcklqdq xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm4
; ghash_gfmul_red
vpclmulqdq xmm7, xmm0, xmm5, 16
vpclmulqdq xmm3, xmm0, xmm5, 1
vpclmulqdq xmm2, xmm0, xmm5, 0
vpxor xmm7, xmm7, xmm3
vpslldq xmm3, xmm7, 8
vpsrldq xmm7, xmm7, 8
vpxor xmm3, xmm3, xmm2
vpclmulqdq xmm0, xmm0, xmm5, 17
vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm3, xmm3, 78
vpxor xmm3, xmm3, xmm2
vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm3, xmm3, 78
vpxor xmm0, xmm0, xmm7
vpxor xmm0, xmm0, xmm3
vpxor xmm0, xmm0, xmm2
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm0, xmm0, xmm6
; store_tag
cmp r8d, 16
je L_AES_GCM_encrypt_final_avx2_store_tag_16
xor r12, r12
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_encrypt_final_avx2_store_tag_loop:
movzx r13d, BYTE PTR [rsp+r12]
mov BYTE PTR [rdx+r12], r13b
inc r12d
cmp r12d, r8d
jne L_AES_GCM_encrypt_final_avx2_store_tag_loop
jmp L_AES_GCM_encrypt_final_avx2_store_tag_done
L_AES_GCM_encrypt_final_avx2_store_tag_16:
vmovdqu OWORD PTR [rdx], xmm0
L_AES_GCM_encrypt_final_avx2_store_tag_done:
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+16]
vmovdqu xmm7, OWORD PTR [rsp+32]
add rsp, 48
pop r13
pop r12
ret
AES_GCM_encrypt_final_avx2 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_decrypt_update_avx2 PROC
push r13
push r12
push r14
push r15
push rdi
mov rax, rcx
mov r10, r8
mov r8d, edx
mov r11, r9
mov r9d, DWORD PTR [rsp+80]
mov r12, QWORD PTR [rsp+88]
mov r14, QWORD PTR [rsp+96]
mov r15, QWORD PTR [rsp+104]
sub rsp, 328
vmovdqu OWORD PTR [rsp+168], xmm6
vmovdqu OWORD PTR [rsp+184], xmm7
vmovdqu OWORD PTR [rsp+200], xmm8
vmovdqu OWORD PTR [rsp+216], xmm9
vmovdqu OWORD PTR [rsp+232], xmm10
vmovdqu OWORD PTR [rsp+248], xmm11
vmovdqu OWORD PTR [rsp+264], xmm12
vmovdqu OWORD PTR [rsp+280], xmm13
vmovdqu OWORD PTR [rsp+296], xmm14
vmovdqu OWORD PTR [rsp+312], xmm15
vmovdqu xmm6, OWORD PTR [r12]
vmovdqu xmm5, OWORD PTR [r14]
vmovdqu xmm4, OWORD PTR [r15]
; Calculate H
vpsrlq xmm1, xmm5, 63
vpsllq xmm0, xmm5, 1
vpslldq xmm1, xmm1, 8
vpor xmm0, xmm0, xmm1
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
vpxor xmm5, xmm5, xmm0
xor edi, edi
cmp r9d, 128
mov r13d, r9d
jl L_AES_GCM_decrypt_update_avx2_done_128
and r13d, 4294967168
vmovdqu OWORD PTR [rsp+128], xmm4
vmovdqu OWORD PTR [rsp+144], xmm15
vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
; H ^ 1 and H ^ 2
vpclmulqdq xmm9, xmm5, xmm5, 0
vpclmulqdq xmm10, xmm5, xmm5, 17
vpclmulqdq xmm8, xmm9, xmm3, 16
vpshufd xmm9, xmm9, 78
vpxor xmm9, xmm9, xmm8
vpclmulqdq xmm8, xmm9, xmm3, 16
vpshufd xmm9, xmm9, 78
vpxor xmm9, xmm9, xmm8
vpxor xmm0, xmm10, xmm9
vmovdqu OWORD PTR [rsp], xmm5
vmovdqu OWORD PTR [rsp+16], xmm0
; H ^ 3 and H ^ 4
vpclmulqdq xmm11, xmm0, xmm5, 16
vpclmulqdq xmm10, xmm0, xmm5, 1
vpclmulqdq xmm9, xmm0, xmm5, 0
vpclmulqdq xmm12, xmm0, xmm5, 17
vpclmulqdq xmm13, xmm0, xmm0, 0
vpclmulqdq xmm14, xmm0, xmm0, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm2, xmm13, xmm14
vpxor xmm1, xmm10, xmm9
vmovdqu OWORD PTR [rsp+32], xmm1
vmovdqu OWORD PTR [rsp+48], xmm2
; H ^ 5 and H ^ 6
vpclmulqdq xmm11, xmm1, xmm0, 16
vpclmulqdq xmm10, xmm1, xmm0, 1
vpclmulqdq xmm9, xmm1, xmm0, 0
vpclmulqdq xmm12, xmm1, xmm0, 17
vpclmulqdq xmm13, xmm1, xmm1, 0
vpclmulqdq xmm14, xmm1, xmm1, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm0, xmm13, xmm14
vpxor xmm7, xmm10, xmm9
vmovdqu OWORD PTR [rsp+64], xmm7
vmovdqu OWORD PTR [rsp+80], xmm0
; H ^ 7 and H ^ 8
vpclmulqdq xmm11, xmm2, xmm1, 16
vpclmulqdq xmm10, xmm2, xmm1, 1
vpclmulqdq xmm9, xmm2, xmm1, 0
vpclmulqdq xmm12, xmm2, xmm1, 17
vpclmulqdq xmm13, xmm2, xmm2, 0
vpclmulqdq xmm14, xmm2, xmm2, 17
vpxor xmm11, xmm11, xmm10
vpslldq xmm10, xmm11, 8
vpsrldq xmm11, xmm11, 8
vpxor xmm10, xmm10, xmm9
vpclmulqdq xmm8, xmm13, xmm3, 16
vpclmulqdq xmm9, xmm10, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm10, xmm10, xmm9
vpxor xmm13, xmm13, xmm8
vpclmulqdq xmm9, xmm10, xmm3, 16
vpclmulqdq xmm8, xmm13, xmm3, 16
vpshufd xmm10, xmm10, 78
vpshufd xmm13, xmm13, 78
vpxor xmm12, xmm12, xmm11
vpxor xmm13, xmm13, xmm8
vpxor xmm10, xmm10, xmm12
vpxor xmm0, xmm13, xmm14
vpxor xmm7, xmm10, xmm9
vmovdqu OWORD PTR [rsp+96], xmm7
vmovdqu OWORD PTR [rsp+112], xmm0
L_AES_GCM_decrypt_update_avx2_ghash_128:
; aesenc_128_ghash
lea rcx, QWORD PTR [r11+rdi]
lea rdx, QWORD PTR [r10+rdi]
; aesenc_ctr
vmovdqu xmm0, OWORD PTR [rsp+128]
vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
vpshufb xmm8, xmm0, xmm1
vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
vpshufb xmm9, xmm9, xmm1
vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
vpshufb xmm10, xmm10, xmm1
vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
vpshufb xmm11, xmm11, xmm1
vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
vpshufb xmm12, xmm12, xmm1
vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
vpshufb xmm13, xmm13, xmm1
vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
vpshufb xmm14, xmm14, xmm1
vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
vpshufb xmm15, xmm15, xmm1
; aesenc_xor
vmovdqu xmm7, OWORD PTR [rax]
vmovdqu OWORD PTR [rsp+128], xmm0
vpxor xmm8, xmm8, xmm7
vpxor xmm9, xmm9, xmm7
vpxor xmm10, xmm10, xmm7
vpxor xmm11, xmm11, xmm7
vpxor xmm12, xmm12, xmm7
vpxor xmm13, xmm13, xmm7
vpxor xmm14, xmm14, xmm7
vpxor xmm15, xmm15, xmm7
; aesenc_pclmul_1
vmovdqu xmm1, OWORD PTR [rcx]
vmovdqu xmm0, OWORD PTR [rax+16]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vmovdqu xmm2, OWORD PTR [rsp+112]
vpxor xmm1, xmm1, xmm6
vpclmulqdq xmm5, xmm1, xmm2, 16
vpclmulqdq xmm3, xmm1, xmm2, 1
vpclmulqdq xmm6, xmm1, xmm2, 0
vpclmulqdq xmm7, xmm1, xmm2, 17
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_2
vmovdqu xmm1, OWORD PTR [rcx+16]
vmovdqu xmm0, OWORD PTR [rsp+96]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm2, xmm1, xmm0, 16
vpclmulqdq xmm3, xmm1, xmm0, 1
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+32]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+32]
vmovdqu xmm0, OWORD PTR [rsp+80]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+48]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+48]
vmovdqu xmm0, OWORD PTR [rsp+64]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+64]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+64]
vmovdqu xmm0, OWORD PTR [rsp+48]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+80]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+80]
vmovdqu xmm0, OWORD PTR [rsp+32]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+96]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+96]
vmovdqu xmm0, OWORD PTR [rsp+16]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+112]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_n
vmovdqu xmm1, OWORD PTR [rcx+112]
vmovdqu xmm0, OWORD PTR [rsp]
vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm5, xmm5, xmm2
vpclmulqdq xmm2, xmm1, xmm0, 16
vpxor xmm5, xmm5, xmm3
vpclmulqdq xmm3, xmm1, xmm0, 1
vpxor xmm6, xmm6, xmm4
vpclmulqdq xmm4, xmm1, xmm0, 0
vpclmulqdq xmm1, xmm1, xmm0, 17
vmovdqu xmm0, OWORD PTR [rax+128]
vpxor xmm7, xmm7, xmm1
vaesenc xmm8, xmm8, xmm0
vaesenc xmm9, xmm9, xmm0
vaesenc xmm10, xmm10, xmm0
vaesenc xmm11, xmm11, xmm0
vaesenc xmm12, xmm12, xmm0
vaesenc xmm13, xmm13, xmm0
vaesenc xmm14, xmm14, xmm0
vaesenc xmm15, xmm15, xmm0
; aesenc_pclmul_l
vpxor xmm5, xmm5, xmm2
vpxor xmm6, xmm6, xmm4
vpxor xmm5, xmm5, xmm3
vpslldq xmm1, xmm5, 8
vpsrldq xmm5, xmm5, 8
vmovdqu xmm4, OWORD PTR [rax+144]
vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
vaesenc xmm8, xmm8, xmm4
vpxor xmm6, xmm6, xmm1
vpxor xmm7, xmm7, xmm5
vpclmulqdq xmm3, xmm6, xmm0, 16
vaesenc xmm9, xmm9, xmm4
vaesenc xmm10, xmm10, xmm4
vaesenc xmm11, xmm11, xmm4
vpshufd xmm6, xmm6, 78
vpxor xmm6, xmm6, xmm3
vpclmulqdq xmm3, xmm6, xmm0, 16
vaesenc xmm12, xmm12, xmm4
vaesenc xmm13, xmm13, xmm4
vaesenc xmm14, xmm14, xmm4
vpshufd xmm6, xmm6, 78
vpxor xmm6, xmm6, xmm3
vpxor xmm6, xmm6, xmm7
vaesenc xmm15, xmm15, xmm4
cmp r8d, 11
vmovdqu xmm7, OWORD PTR [rax+160]
jl L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+176]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
cmp r8d, 13
vmovdqu xmm7, OWORD PTR [rax+192]
jl L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+208]
vaesenc xmm8, xmm8, xmm7
vaesenc xmm9, xmm9, xmm7
vaesenc xmm10, xmm10, xmm7
vaesenc xmm11, xmm11, xmm7
vaesenc xmm12, xmm12, xmm7
vaesenc xmm13, xmm13, xmm7
vaesenc xmm14, xmm14, xmm7
vaesenc xmm15, xmm15, xmm7
vmovdqu xmm7, OWORD PTR [rax+224]
L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done:
; aesenc_last
vaesenclast xmm8, xmm8, xmm7
vaesenclast xmm9, xmm9, xmm7
vaesenclast xmm10, xmm10, xmm7
vaesenclast xmm11, xmm11, xmm7
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu xmm1, OWORD PTR [rcx+16]
vmovdqu xmm2, OWORD PTR [rcx+32]
vmovdqu xmm3, OWORD PTR [rcx+48]
vpxor xmm8, xmm8, xmm0
vpxor xmm9, xmm9, xmm1
vpxor xmm10, xmm10, xmm2
vpxor xmm11, xmm11, xmm3
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu OWORD PTR [rdx+16], xmm9
vmovdqu OWORD PTR [rdx+32], xmm10
vmovdqu OWORD PTR [rdx+48], xmm11
vaesenclast xmm12, xmm12, xmm7
vaesenclast xmm13, xmm13, xmm7
vaesenclast xmm14, xmm14, xmm7
vaesenclast xmm15, xmm15, xmm7
vmovdqu xmm0, OWORD PTR [rcx+64]
vmovdqu xmm1, OWORD PTR [rcx+80]
vmovdqu xmm2, OWORD PTR [rcx+96]
vmovdqu xmm3, OWORD PTR [rcx+112]
vpxor xmm12, xmm12, xmm0
vpxor xmm13, xmm13, xmm1
vpxor xmm14, xmm14, xmm2
vpxor xmm15, xmm15, xmm3
vmovdqu OWORD PTR [rdx+64], xmm12
vmovdqu OWORD PTR [rdx+80], xmm13
vmovdqu OWORD PTR [rdx+96], xmm14
vmovdqu OWORD PTR [rdx+112], xmm15
; aesenc_128_ghash - end
add edi, 128
cmp edi, r13d
jl L_AES_GCM_decrypt_update_avx2_ghash_128
vmovdqu xmm5, OWORD PTR [rsp]
vmovdqu xmm4, OWORD PTR [rsp+128]
vmovdqu xmm15, OWORD PTR [rsp+144]
L_AES_GCM_decrypt_update_avx2_done_128:
cmp edi, r9d
jge L_AES_GCM_decrypt_update_avx2_done_dec
mov r13d, r9d
and r13d, 4294967280
cmp edi, r13d
jge L_AES_GCM_decrypt_update_avx2_last_block_done
L_AES_GCM_decrypt_update_avx2_last_block_start:
vmovdqu xmm11, OWORD PTR [r11+rdi]
vpshufb xmm10, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
vpshufb xmm12, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
vpxor xmm12, xmm12, xmm6
; aesenc_gfmul_sb
vpclmulqdq xmm2, xmm12, xmm5, 1
vpclmulqdq xmm3, xmm12, xmm5, 16
vpclmulqdq xmm1, xmm12, xmm5, 0
vpclmulqdq xmm8, xmm12, xmm5, 17
vpxor xmm10, xmm10, [rax]
vaesenc xmm10, xmm10, [rax+16]
vpxor xmm3, xmm3, xmm2
vpslldq xmm2, xmm3, 8
vpsrldq xmm3, xmm3, 8
vaesenc xmm10, xmm10, [rax+32]
vpxor xmm2, xmm2, xmm1
vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vaesenc xmm10, xmm10, [rax+48]
vaesenc xmm10, xmm10, [rax+64]
vaesenc xmm10, xmm10, [rax+80]
vpshufd xmm2, xmm2, 78
vpxor xmm2, xmm2, xmm1
vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vaesenc xmm10, xmm10, [rax+96]
vaesenc xmm10, xmm10, [rax+112]
vaesenc xmm10, xmm10, [rax+128]
vpshufd xmm2, xmm2, 78
vaesenc xmm10, xmm10, [rax+144]
vpxor xmm8, xmm8, xmm3
vpxor xmm2, xmm2, xmm8
vmovdqu xmm0, OWORD PTR [rax+160]
cmp r8d, 11
jl L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last
vaesenc xmm10, xmm10, xmm0
vaesenc xmm10, xmm10, [rax+176]
vmovdqu xmm0, OWORD PTR [rax+192]
cmp r8d, 13
jl L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last
vaesenc xmm10, xmm10, xmm0
vaesenc xmm10, xmm10, [rax+208]
vmovdqu xmm0, OWORD PTR [rax+224]
L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last:
vaesenclast xmm10, xmm10, xmm0
vpxor xmm6, xmm2, xmm1
vpxor xmm10, xmm10, xmm11
vmovdqu OWORD PTR [r10+rdi], xmm10
add edi, 16
cmp edi, r13d
jl L_AES_GCM_decrypt_update_avx2_last_block_start
L_AES_GCM_decrypt_update_avx2_last_block_done:
L_AES_GCM_decrypt_update_avx2_done_dec:
vmovdqu OWORD PTR [r12], xmm6
vmovdqu OWORD PTR [r15], xmm4
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+168]
vmovdqu xmm7, OWORD PTR [rsp+184]
vmovdqu xmm8, OWORD PTR [rsp+200]
vmovdqu xmm9, OWORD PTR [rsp+216]
vmovdqu xmm10, OWORD PTR [rsp+232]
vmovdqu xmm11, OWORD PTR [rsp+248]
vmovdqu xmm12, OWORD PTR [rsp+264]
vmovdqu xmm13, OWORD PTR [rsp+280]
vmovdqu xmm14, OWORD PTR [rsp+296]
vmovdqu xmm15, OWORD PTR [rsp+312]
add rsp, 328
pop rdi
pop r15
pop r14
pop r12
pop r13
ret
AES_GCM_decrypt_update_avx2 ENDP
_text ENDS
_text SEGMENT READONLY PARA
AES_GCM_decrypt_final_avx2 PROC
push r12
push r13
push r14
mov eax, DWORD PTR [rsp+64]
mov r10, QWORD PTR [rsp+72]
mov r11, QWORD PTR [rsp+80]
mov r12, QWORD PTR [rsp+88]
sub rsp, 48
vmovdqu OWORD PTR [rsp+16], xmm6
vmovdqu OWORD PTR [rsp+32], xmm7
vmovdqu xmm4, OWORD PTR [rcx]
vmovdqu xmm5, OWORD PTR [r10]
vmovdqu xmm6, OWORD PTR [r11]
vpsrlq xmm1, xmm5, 63
vpsllq xmm0, xmm5, 1
vpslldq xmm1, xmm1, 8
vpor xmm0, xmm0, xmm1
vpshufd xmm5, xmm5, 255
vpsrad xmm5, xmm5, 31
vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
vpxor xmm5, xmm5, xmm0
; calc_tag
shl r9, 3
shl rax, 3
vmovq xmm0, r9
vmovq xmm1, rax
vpunpcklqdq xmm0, xmm0, xmm1
vpxor xmm0, xmm0, xmm4
; ghash_gfmul_red
vpclmulqdq xmm7, xmm0, xmm5, 16
vpclmulqdq xmm3, xmm0, xmm5, 1
vpclmulqdq xmm2, xmm0, xmm5, 0
vpxor xmm7, xmm7, xmm3
vpslldq xmm3, xmm7, 8
vpsrldq xmm7, xmm7, 8
vpxor xmm3, xmm3, xmm2
vpclmulqdq xmm0, xmm0, xmm5, 17
vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm3, xmm3, 78
vpxor xmm3, xmm3, xmm2
vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
vpshufd xmm3, xmm3, 78
vpxor xmm0, xmm0, xmm7
vpxor xmm0, xmm0, xmm3
vpxor xmm0, xmm0, xmm2
vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
vpxor xmm0, xmm0, xmm6
; cmp_tag
cmp r8d, 16
je L_AES_GCM_decrypt_final_avx2_cmp_tag_16
xor r13, r13
xor r10, r10
vmovdqu OWORD PTR [rsp], xmm0
L_AES_GCM_decrypt_final_avx2_cmp_tag_loop:
movzx r14d, BYTE PTR [rsp+r13]
xor r14b, BYTE PTR [rdx+r13]
or r10b, r14b
inc r13d
cmp r13d, r8d
jne L_AES_GCM_decrypt_final_avx2_cmp_tag_loop
cmp r10b, 0
sete r10b
jmp L_AES_GCM_decrypt_final_avx2_cmp_tag_done
L_AES_GCM_decrypt_final_avx2_cmp_tag_16:
vmovdqu xmm1, OWORD PTR [rdx]
vpcmpeqb xmm0, xmm0, xmm1
vpmovmskb r13, xmm0
; %%edx == 0xFFFF then return 1 else => return 0
xor r10d, r10d
cmp r13d, 65535
sete r10b
L_AES_GCM_decrypt_final_avx2_cmp_tag_done:
mov DWORD PTR [r12], r10d
vzeroupper
vmovdqu xmm6, OWORD PTR [rsp+16]
vmovdqu xmm7, OWORD PTR [rsp+32]
add rsp, 48
pop r14
pop r13
pop r12
ret
AES_GCM_decrypt_final_avx2 ENDP
_text ENDS
ENDIF
END