diff --git a/crypto/fipsmodule/sha/asm/sha256-armv4.pl b/crypto/fipsmodule/sha/asm/sha256-armv4.pl
index c449a7c6c1..720221f844 100644
--- a/crypto/fipsmodule/sha/asm/sha256-armv4.pl
+++ b/crypto/fipsmodule/sha/asm/sha256-armv4.pl
@@ -482,6 +482,14 @@ ()
 .arch	armv7-a
 .fpu	neon
 
+.LK256_shortcut_neon:
+@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
+#if defined(__thumb2__)
+.word	K256-(.LK256_add_neon+4)
+#else
+.word	K256-(.LK256_add_neon+8)
+#endif
+
 .global	sha256_block_data_order_neon
 .type	sha256_block_data_order_neon,%function
 .align	5
@@ -491,7 +499,21 @@ ()
 	stmdb	sp!,{r4-r12,lr}
 
 	sub	$H,sp,#16*4+16
-	adr	$Ktbl,K256
+
+	@ K256 is just at the boundary of being easily referenced by an ADR from
+	@ this function. In Arm mode, when building with __ARM_ARCH=6, it does
+	@ not fit. By moving code around, we could make it fit, but this is too
+	@ fragile. For simplicity, just load the offset from
+	@ .LK256_shortcut_neon.
+	@
+	@ TODO(davidben): adrl would avoid a load, but clang-assembler does not
+	@ support it. We might be able to emulate it with a macro, but Android's
+	@ did not work when I tried it.
+	@ https://android.googlesource.com/platform/ndk/+/refs/heads/master/docs/ClangMigration.md#arm
+	ldr	$Ktbl,.LK256_shortcut_neon
+.LK256_add_neon:
+	add	$Ktbl,pc,$Ktbl
+
 	bic	$H,$H,#15		@ align for 128-bit stores
 	mov	$t2,sp
 	mov	sp,$H			@ alloca
@@ -617,12 +639,26 @@ ()
 #  define INST(a,b,c,d)	.byte	a,b,c,d
 # endif
 
+.LK256_shortcut_armv8:
+@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
+#if defined(__thumb2__)
+.word	K256-(.LK256_add_armv8+4)
+#else
+.word	K256-(.LK256_add_armv8+8)
+#endif
+
 .type	sha256_block_data_order_armv8,%function
 .align	5
 sha256_block_data_order_armv8:
 .LARMv8:
+	@ K256 is too far to reference from one ADR command in Thumb mode. In
+	@ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte
+	@ boundary. For simplicity, just load the offset from .LK256_shortcut_armv8.
+	ldr	$Ktbl,.LK256_shortcut_armv8
+.LK256_add_armv8:
+	add	$Ktbl,pc,$Ktbl
+
 	vld1.32	{$ABCD,$EFGH},[$ctx]
-	sub	$Ktbl,$Ktbl,#256+32
 	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
 	b	.Loop_v8
 
diff --git a/generated-src/ios-arm/crypto/fipsmodule/sha256-armv4.S b/generated-src/ios-arm/crypto/fipsmodule/sha256-armv4.S
index 76e2d8b981..4c72aaff8e 100644
--- a/generated-src/ios-arm/crypto/fipsmodule/sha256-armv4.S
+++ b/generated-src/ios-arm/crypto/fipsmodule/sha256-armv4.S
@@ -1895,6 +1895,14 @@ Lrounds_16_xx:
 
 
 
+LK256_shortcut_neon:
+@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
+#if defined(__thumb2__)
+.word	K256-(LK256_add_neon+4)
+#else
+.word	K256-(LK256_add_neon+8)
+#endif
+
 .globl	_sha256_block_data_order_neon
 .private_extern	_sha256_block_data_order_neon
 #ifdef __thumb2__
@@ -1907,7 +1915,21 @@ LNEON:
 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
 
 	sub	r11,sp,#16*4+16
-	adr	r14,K256
+
+	@ K256 is just at the boundary of being easily referenced by an ADR from
+	@ this function. In Arm mode, when building with __ARM_ARCH=6, it does
+	@ not fit. By moving code around, we could make it fit, but this is too
+	@ fragile. For simplicity, just load the offset from
+	@ .LK256_shortcut_neon.
+	@
+	@ TODO(davidben): adrl would avoid a load, but clang-assembler does not
+	@ support it. We might be able to emulate it with a macro, but Android's
+	@ did not work when I tried it.
+	@ https://android.googlesource.com/platform/ndk/+/refs/heads/master/docs/ClangMigration.md#arm
+	ldr	r14,LK256_shortcut_neon
+LK256_add_neon:
+	add	r14,pc,r14
+
 	bic	r11,r11,#15		@ align for 128-bit stores
 	mov	r12,sp
 	mov	sp,r11			@ alloca
@@ -2689,14 +2711,28 @@ L_00_48:
 #  define INST(a,b,c,d)	.byte	a,b,c,d
 # endif
 
+LK256_shortcut_armv8:
+@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
+#if defined(__thumb2__)
+.word	K256-(LK256_add_armv8+4)
+#else
+.word	K256-(LK256_add_armv8+8)
+#endif
+
 #ifdef __thumb2__
 .thumb_func	sha256_block_data_order_armv8
 #endif
 .align	5
 sha256_block_data_order_armv8:
 LARMv8:
+	@ K256 is too far to reference from one ADR command in Thumb mode. In
+	@ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte
+	@ boundary. For simplicity, just load the offset from .LK256_shortcut_armv8.
+	ldr	r3,LK256_shortcut_armv8
+LK256_add_armv8:
+	add	r3,pc,r3
+
 	vld1.32	{q0,q1},[r0]
-	sub	r3,r3,#256+32
 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
 	b	Loop_v8
 
diff --git a/generated-src/linux-arm/crypto/fipsmodule/sha256-armv4.S b/generated-src/linux-arm/crypto/fipsmodule/sha256-armv4.S
index 3c63a8ca6e..4fdcdc876c 100644
--- a/generated-src/linux-arm/crypto/fipsmodule/sha256-armv4.S
+++ b/generated-src/linux-arm/crypto/fipsmodule/sha256-armv4.S
@@ -1893,6 +1893,14 @@ sha256_block_data_order:
 .arch	armv7-a
 .fpu	neon
 
+.LK256_shortcut_neon:
+@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
+#if defined(__thumb2__)
+.word	K256-(.LK256_add_neon+4)
+#else
+.word	K256-(.LK256_add_neon+8)
+#endif
+
 .globl	sha256_block_data_order_neon
 .hidden	sha256_block_data_order_neon
 .type	sha256_block_data_order_neon,%function
@@ -1903,7 +1911,21 @@ sha256_block_data_order_neon:
 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
 
 	sub	r11,sp,#16*4+16
-	adr	r14,K256
+
+	@ K256 is just at the boundary of being easily referenced by an ADR from
+	@ this function. In Arm mode, when building with __ARM_ARCH=6, it does
+	@ not fit. By moving code around, we could make it fit, but this is too
+	@ fragile. For simplicity, just load the offset from
+	@ .LK256_shortcut_neon.
+	@
+	@ TODO(davidben): adrl would avoid a load, but clang-assembler does not
+	@ support it. We might be able to emulate it with a macro, but Android's
+	@ did not work when I tried it.
+	@ https://android.googlesource.com/platform/ndk/+/refs/heads/master/docs/ClangMigration.md#arm
+	ldr	r14,.LK256_shortcut_neon
+.LK256_add_neon:
+	add	r14,pc,r14
+
 	bic	r11,r11,#15		@ align for 128-bit stores
 	mov	r12,sp
 	mov	sp,r11			@ alloca
@@ -2685,12 +2707,26 @@ sha256_block_data_order_neon:
 #  define INST(a,b,c,d)	.byte	a,b,c,d
 # endif
 
+.LK256_shortcut_armv8:
+@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
+#if defined(__thumb2__)
+.word	K256-(.LK256_add_armv8+4)
+#else
+.word	K256-(.LK256_add_armv8+8)
+#endif
+
 .type	sha256_block_data_order_armv8,%function
 .align	5
 sha256_block_data_order_armv8:
 .LARMv8:
+	@ K256 is too far to reference from one ADR command in Thumb mode. In
+	@ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte
+	@ boundary. For simplicity, just load the offset from .LK256_shortcut_armv8.
+	ldr	r3,.LK256_shortcut_armv8
+.LK256_add_armv8:
+	add	r3,pc,r3
+
 	vld1.32	{q0,q1},[r0]
-	sub	r3,r3,#256+32
 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
 	b	.Loop_v8