[v2,05/13] aarch64: Add BTI landing pads to assembly code

Message ID 14aaaaef4986ecda5edc2659d87934dcd8cc7933.1589277641.git.szabolcs.nagy@arm.com
State New
Headers show
Series
  • aarch64: branch protection support
Related show

Commit Message

Szabolcs Nagy May 12, 2020, 10:14 a.m.
From: Sudakshina Das <sudi.das@arm.com>


To enable building glibc with branch protection, assembly code
needs BTI landing pads and ELF object file markings in the form
of a GNU property note.

The landing pads are unconditionally added to all functions that
may be indirectly called. When the code segment is not mapped
with PROT_BTI these instructions are nops. They are kept in the
code when BTI is not supported so that the layout of performance
critical code is unchanged across configurations.

The GNU property notes are only added when there is support for
BTI in the toolchain, because old binutils does not handle the
notes right. (Does not know how to merge them nor to put them in
PT_GNU_PROPERTY segment instead of PT_NOTE, and some versions
of binutils emit warnings about the unknown GNU property. In
such cases the produced libc binaries would not have the right
ELF marking so BTI would not be enabled.)

Note: functions using ENTRY or ENTRY_ALIGN now start with an
additional BTI c, so alignment of the following code changes,
but ENTRY_ALIGN_AND_PAD was fixed so there is no change to the
existing code layout. Some string functions may need to be
tuned for optimal performance after this commit.

Co-authored-by: Szabolcs Nagy <szabolcs.nagy@arm.com>

TODO: how to add property notes to asm:
	use END_FILE macro
	or other macro (with descriptive name)
	or add them in sysdep.h
---
 sysdeps/aarch64/__longjmp.S                   |  1 +
 sysdeps/aarch64/crti.S                        |  4 +++
 sysdeps/aarch64/crtn.S                        |  4 +++
 sysdeps/aarch64/dl-tlsdesc.S                  |  5 +++
 sysdeps/aarch64/dl-trampoline.S               |  4 +++
 sysdeps/aarch64/memchr.S                      |  1 +
 sysdeps/aarch64/memcmp.S                      |  1 +
 sysdeps/aarch64/memcpy.S                      |  1 +
 sysdeps/aarch64/memrchr.S                     |  1 +
 sysdeps/aarch64/memset.S                      |  1 +
 sysdeps/aarch64/multiarch/memchr_nosimd.S     |  1 +
 sysdeps/aarch64/multiarch/memcpy_falkor.S     |  1 +
 sysdeps/aarch64/multiarch/memcpy_thunderx.S   |  1 +
 sysdeps/aarch64/multiarch/memcpy_thunderx2.S  |  1 +
 sysdeps/aarch64/multiarch/memmove_falkor.S    |  1 +
 sysdeps/aarch64/multiarch/memset_base64.S     |  1 +
 sysdeps/aarch64/multiarch/memset_kunpeng.S    |  1 +
 sysdeps/aarch64/multiarch/strlen_asimd.S      |  1 +
 sysdeps/aarch64/rawmemchr.S                   |  1 +
 sysdeps/aarch64/setjmp.S                      |  1 +
 sysdeps/aarch64/start.S                       |  2 ++
 sysdeps/aarch64/strchr.S                      |  1 +
 sysdeps/aarch64/strchrnul.S                   |  1 +
 sysdeps/aarch64/strcmp.S                      |  1 +
 sysdeps/aarch64/strcpy.S                      |  1 +
 sysdeps/aarch64/strlen.S                      |  1 +
 sysdeps/aarch64/strncmp.S                     |  1 +
 sysdeps/aarch64/strnlen.S                     |  1 +
 sysdeps/aarch64/strrchr.S                     |  1 +
 sysdeps/aarch64/sysdep.h                      | 32 ++++++++++++++++++-
 sysdeps/unix/sysv/linux/aarch64/__read_tp.S   |  1 +
 sysdeps/unix/sysv/linux/aarch64/clone.S       |  1 +
 sysdeps/unix/sysv/linux/aarch64/getcontext.S  |  1 +
 sysdeps/unix/sysv/linux/aarch64/ioctl.S       |  1 +
 .../unix/sysv/linux/aarch64/libc-__read_tp.S  |  1 +
 sysdeps/unix/sysv/linux/aarch64/setcontext.S  |  1 +
 sysdeps/unix/sysv/linux/aarch64/syscall.S     |  1 +
 sysdeps/unix/sysv/linux/aarch64/vfork.S       |  1 +
 38 files changed, 82 insertions(+), 1 deletion(-)

-- 
2.17.1

Comments

Andreas Schwab May 12, 2020, 10:27 a.m. | #1
On Mai 12 2020, Szabolcs Nagy wrote:

> diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h

> index 604c489170..161d63c782 100644

> --- a/sysdeps/aarch64/sysdep.h

> +++ b/sysdeps/aarch64/sysdep.h

> @@ -41,12 +41,40 @@

>  

>  #define ASM_SIZE_DIRECTIVE(name) .size name,.-name

>  

> +/* Branch Target Identitication support.  */

> +#define BTI_C		hint	34

> +#define BTI_J		hint	36

> +

> +#define FEATURE_1_BTI 1

> +#define FEATURE_1_PAC 2

> +

> +/* Add a GNU_PROPERTY_AARCH64_FEATURE_1_AND note.  */

> +#define GNU_PROPERTY(features)		\

> +  .section .note.gnu.property, "a";	\

> +  .p2align 3;				\

> +  .word 4;				\

> +  .word 16;				\

> +  .word 5;				\

> +  .asciz "GNU";				\

> +  .word 0xc0000000;			\


Is that GNU_PROPERTY_AARCH64_FEATURE_1_AND?

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."
Szabolcs Nagy May 12, 2020, 11:35 a.m. | #2
The 05/12/2020 12:27, Andreas Schwab wrote:
> On Mai 12 2020, Szabolcs Nagy wrote:

> 

> > diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h

> > index 604c489170..161d63c782 100644

> > --- a/sysdeps/aarch64/sysdep.h

> > +++ b/sysdeps/aarch64/sysdep.h

> > @@ -41,12 +41,40 @@

> >  

> >  #define ASM_SIZE_DIRECTIVE(name) .size name,.-name

> >  

> > +/* Branch Target Identitication support.  */

> > +#define BTI_C		hint	34

> > +#define BTI_J		hint	36

> > +

> > +#define FEATURE_1_BTI 1

> > +#define FEATURE_1_PAC 2

> > +

> > +/* Add a GNU_PROPERTY_AARCH64_FEATURE_1_AND note.  */

> > +#define GNU_PROPERTY(features)		\

> > +  .section .note.gnu.property, "a";	\

> > +  .p2align 3;				\

> > +  .word 4;				\

> > +  .word 16;				\

> > +  .word 5;				\

> > +  .asciz "GNU";				\

> > +  .word 0xc0000000;			\

> 

> Is that GNU_PROPERTY_AARCH64_FEATURE_1_AND?


yes, but i cannot include elf.h in asm code.
(and i didnt like the long name, but since
Adhemerval asked the same maybe i should
add a comment)

Patch

diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
index f9060776b4..362171cdb8 100644
--- a/sysdeps/aarch64/__longjmp.S
+++ b/sysdeps/aarch64/__longjmp.S
@@ -116,3 +116,4 @@  ENTRY (__longjmp)
 	/* Use br instead of ret because ret is guaranteed to mispredict */
 	br	x30
 END (__longjmp)
+END_FILE
diff --git a/sysdeps/aarch64/crti.S b/sysdeps/aarch64/crti.S
index 1728eac37a..89a9e25f5b 100644
--- a/sysdeps/aarch64/crti.S
+++ b/sysdeps/aarch64/crti.S
@@ -75,6 +75,7 @@  call_weak_fn:
 	.hidden	_init
 	.type	_init, %function
 _init:
+	BTI_C
 	stp	x29, x30, [sp, -16]!
 	mov	x29, sp
 #if PREINIT_FUNCTION_WEAK
@@ -89,5 +90,8 @@  _init:
 	.hidden	_fini
 	.type	_fini, %function
 _fini:
+	BTI_C
 	stp	x29, x30, [sp, -16]!
 	mov	x29, sp
+
+END_FILE
diff --git a/sysdeps/aarch64/crtn.S b/sysdeps/aarch64/crtn.S
index c3e97cc449..94a6f970ef 100644
--- a/sysdeps/aarch64/crtn.S
+++ b/sysdeps/aarch64/crtn.S
@@ -37,6 +37,8 @@ 
 /* crtn.S puts function epilogues in the .init and .fini sections
    corresponding to the prologues in crti.S. */
 
+#include <sysdep.h>
+
 	.section .init,"ax",%progbits
 	ldp	x29, x30, [sp], 16
 	RET
@@ -44,3 +46,5 @@ 
 	.section .fini,"ax",%progbits
 	ldp	x29, x30, [sp], 16
 	RET
+
+END_FILE
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index 557ad1d505..d55e0443aa 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -74,6 +74,7 @@ 
 	cfi_startproc
 	.align 2
 _dl_tlsdesc_return:
+	BTI_C
 	DELOUSE (0)
 	ldr	PTR_REG (0), [x0, #PTR_SIZE]
 	RET
@@ -95,6 +96,7 @@  _dl_tlsdesc_return:
 	cfi_startproc
 	.align  2
 _dl_tlsdesc_undefweak:
+	BTI_C
 	str	x1, [sp, #-16]!
 	cfi_adjust_cfa_offset (16)
 	DELOUSE (0)
@@ -142,6 +144,7 @@  _dl_tlsdesc_undefweak:
 	cfi_startproc
 	.align 2
 _dl_tlsdesc_dynamic:
+	BTI_C
 	DELOUSE (0)
 
 	/* Save just enough registers to support fast path, if we fall
@@ -235,3 +238,5 @@  _dl_tlsdesc_dynamic:
 	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
 # undef NSAVEXREGPAIRS
 #endif
+
+END_FILE
diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S
index 94e965c096..fba5689d09 100644
--- a/sysdeps/aarch64/dl-trampoline.S
+++ b/sysdeps/aarch64/dl-trampoline.S
@@ -35,6 +35,7 @@ 
 	cfi_startproc
 	.align 2
 _dl_runtime_resolve:
+	BTI_C
 	/* AArch64 we get called with:
 	   ip0		&PLTGOT[2]
 	   ip1		temp(dl resolver entry point)
@@ -126,6 +127,7 @@  _dl_runtime_resolve:
 	cfi_startproc
 	.align 2
 _dl_runtime_profile:
+	BTI_C
 	/* AArch64 we get called with:
 	   ip0		&PLTGOT[2]
 	   ip1		temp(dl resolver entry point)
@@ -298,3 +300,5 @@  _dl_runtime_profile:
 	.size _dl_runtime_profile, .-_dl_runtime_profile
 #endif
 	.previous
+
+END_FILE
diff --git a/sysdeps/aarch64/memchr.S b/sysdeps/aarch64/memchr.S
index 85c65cbfca..c67a31223f 100644
--- a/sysdeps/aarch64/memchr.S
+++ b/sysdeps/aarch64/memchr.S
@@ -159,3 +159,4 @@  L(zero_length):
 END (MEMCHR)
 weak_alias (MEMCHR, memchr)
 libc_hidden_builtin_def (memchr)
+END_FILE
diff --git a/sysdeps/aarch64/memcmp.S b/sysdeps/aarch64/memcmp.S
index 827f54f99e..c6e07f9287 100644
--- a/sysdeps/aarch64/memcmp.S
+++ b/sysdeps/aarch64/memcmp.S
@@ -178,3 +178,4 @@  END (memcmp)
 #undef bcmp
 weak_alias (memcmp, bcmp)
 libc_hidden_builtin_def (memcmp)
+END_FILE
diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
index e0b4c4502f..543d9417f3 100644
--- a/sysdeps/aarch64/memcpy.S
+++ b/sysdeps/aarch64/memcpy.S
@@ -282,3 +282,4 @@  L(copy64_from_start):
 
 END (MEMMOVE)
 libc_hidden_builtin_def (MEMMOVE)
+END_FILE
diff --git a/sysdeps/aarch64/memrchr.S b/sysdeps/aarch64/memrchr.S
index ace5a94e8f..f35a68d14d 100644
--- a/sysdeps/aarch64/memrchr.S
+++ b/sysdeps/aarch64/memrchr.S
@@ -163,3 +163,4 @@  L(zero_length):
 END (__memrchr)
 weak_alias (__memrchr, memrchr)
 libc_hidden_builtin_def (memrchr)
+END_FILE
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
index ac577f1660..7cdae20563 100644
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -189,3 +189,4 @@  L(zva_other):
 
 END (MEMSET)
 libc_hidden_builtin_def (MEMSET)
+END_FILE
diff --git a/sysdeps/aarch64/multiarch/memchr_nosimd.S b/sysdeps/aarch64/multiarch/memchr_nosimd.S
index 41ce10eb32..6d7d38d5bb 100644
--- a/sysdeps/aarch64/multiarch/memchr_nosimd.S
+++ b/sysdeps/aarch64/multiarch/memchr_nosimd.S
@@ -221,3 +221,4 @@  L(none_chr):
 
 END (MEMCHR)
 libc_hidden_builtin_def (MEMCHR)
+END_FILE
diff --git a/sysdeps/aarch64/multiarch/memcpy_falkor.S b/sysdeps/aarch64/multiarch/memcpy_falkor.S
index 35a1fae1b9..999aa48b16 100644
--- a/sysdeps/aarch64/multiarch/memcpy_falkor.S
+++ b/sysdeps/aarch64/multiarch/memcpy_falkor.S
@@ -188,4 +188,5 @@  L(last64):
 
 END (__memcpy_falkor)
 libc_hidden_builtin_def (__memcpy_falkor)
+END_FILE
 #endif
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
index e9407571b5..e6e36a6633 100644
--- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
@@ -318,5 +318,6 @@  L(move_long):
 
 END (MEMCPY)
 libc_hidden_builtin_def (MEMCPY)
+END_FILE
 
 #endif
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
index 68e99455c8..fde4c7198c 100644
--- a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
@@ -474,4 +474,5 @@  L(ext_table):
 	.word	L(ext_size_15) -.
 
 libc_hidden_builtin_def (MEMCPY)
+END_FILE
 #endif
diff --git a/sysdeps/aarch64/multiarch/memmove_falkor.S b/sysdeps/aarch64/multiarch/memmove_falkor.S
index 35fc1fdd41..d8cc992d27 100644
--- a/sysdeps/aarch64/multiarch/memmove_falkor.S
+++ b/sysdeps/aarch64/multiarch/memmove_falkor.S
@@ -223,3 +223,4 @@  L(move_long):
 
 END (__memmove_falkor)
 libc_hidden_builtin_def (__memmove_falkor)
+END_FILE
diff --git a/sysdeps/aarch64/multiarch/memset_base64.S b/sysdeps/aarch64/multiarch/memset_base64.S
index 8f85cd1caf..ee0b832ef9 100644
--- a/sysdeps/aarch64/multiarch/memset_base64.S
+++ b/sysdeps/aarch64/multiarch/memset_base64.S
@@ -184,3 +184,4 @@  L(zva_64):
 
 END (MEMSET)
 libc_hidden_builtin_def (MEMSET)
+END_FILE
diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S
index 8e051d4fd1..aa7ab62fdd 100644
--- a/sysdeps/aarch64/multiarch/memset_kunpeng.S
+++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S
@@ -111,3 +111,4 @@  L(set_long):
 END (MEMSET)
 libc_hidden_builtin_def (MEMSET)
 #endif
+END_FILE
diff --git a/sysdeps/aarch64/multiarch/strlen_asimd.S b/sysdeps/aarch64/multiarch/strlen_asimd.S
index 236a2c96a6..c28aa0ca8d 100644
--- a/sysdeps/aarch64/multiarch/strlen_asimd.S
+++ b/sysdeps/aarch64/multiarch/strlen_asimd.S
@@ -176,3 +176,4 @@  L(page_cross):
 END (__strlen_asimd)
 weak_alias (__strlen_asimd, strlen_asimd)
 libc_hidden_builtin_def (strlen_asimd)
+END_FILE
diff --git a/sysdeps/aarch64/rawmemchr.S b/sysdeps/aarch64/rawmemchr.S
index 5c7a664fb4..4ad614a169 100644
--- a/sysdeps/aarch64/rawmemchr.S
+++ b/sysdeps/aarch64/rawmemchr.S
@@ -40,3 +40,4 @@  L(do_strlen):
 END (__rawmemchr)
 weak_alias (__rawmemchr, rawmemchr)
 libc_hidden_builtin_def (__rawmemchr)
+END_FILE
diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
index 28fdd3f46a..9a0201ef70 100644
--- a/sysdeps/aarch64/setjmp.S
+++ b/sysdeps/aarch64/setjmp.S
@@ -73,3 +73,4 @@  ENTRY (__sigsetjmp)
 #endif
 END (__sigsetjmp)
 hidden_def (__sigsetjmp)
+END_FILE
diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S
index d96cf57e2d..e6c0393c20 100644
--- a/sysdeps/aarch64/start.S
+++ b/sysdeps/aarch64/start.S
@@ -46,6 +46,7 @@ 
 	.globl _start
 	.type _start,#function
 _start:
+	BTI_C
 	/* Create an initial frame with 0 LR and FP */
 	mov	x29, #0
 	mov	x30, #0
@@ -110,3 +111,4 @@  __data_start:
 	.long 0
 	.weak data_start
 	data_start = __data_start
+END_FILE
diff --git a/sysdeps/aarch64/strchr.S b/sysdeps/aarch64/strchr.S
index 4a75e73945..e1f98aa42c 100644
--- a/sysdeps/aarch64/strchr.S
+++ b/sysdeps/aarch64/strchr.S
@@ -137,3 +137,4 @@  L(tail):
 END (strchr)
 libc_hidden_builtin_def (strchr)
 weak_alias (strchr, index)
+END_FILE
diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S
index a65be6cba8..a9ccc54205 100644
--- a/sysdeps/aarch64/strchrnul.S
+++ b/sysdeps/aarch64/strchrnul.S
@@ -129,3 +129,4 @@  L(tail):
 
 END(__strchrnul)
 weak_alias (__strchrnul, strchrnul)
+END_FILE
diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S
index d044c29e9b..a7bee697c0 100644
--- a/sysdeps/aarch64/strcmp.S
+++ b/sysdeps/aarch64/strcmp.S
@@ -182,3 +182,4 @@  L(done):
 	RET
 END(strcmp)
 libc_hidden_builtin_def (strcmp)
+END_FILE
diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
index 548130e413..631da4a358 100644
--- a/sysdeps/aarch64/strcpy.S
+++ b/sysdeps/aarch64/strcpy.S
@@ -321,3 +321,4 @@  libc_hidden_builtin_def (stpcpy)
 #else
 libc_hidden_builtin_def (strcpy)
 #endif
+END_FILE
diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S
index e01fab7c2a..3af25de4b8 100644
--- a/sysdeps/aarch64/strlen.S
+++ b/sysdeps/aarch64/strlen.S
@@ -222,3 +222,4 @@  L(page_cross):
 END (STRLEN)
 weak_alias (STRLEN, strlen)
 libc_hidden_builtin_def (strlen)
+END_FILE
diff --git a/sysdeps/aarch64/strncmp.S b/sysdeps/aarch64/strncmp.S
index c5141fab8a..d289d4a3ba 100644
--- a/sysdeps/aarch64/strncmp.S
+++ b/sysdeps/aarch64/strncmp.S
@@ -270,3 +270,4 @@  L(ret0):
 
 END (strncmp)
 libc_hidden_builtin_def (strncmp)
+END_FILE
diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
index 5981247dd9..964536dba9 100644
--- a/sysdeps/aarch64/strnlen.S
+++ b/sysdeps/aarch64/strnlen.S
@@ -213,3 +213,4 @@  END (__strnlen)
 libc_hidden_def (__strnlen)
 weak_alias (__strnlen, strnlen)
 libc_hidden_def (strnlen)
+END_FILE
diff --git a/sysdeps/aarch64/strrchr.S b/sysdeps/aarch64/strrchr.S
index 94da08d351..9f6d956f21 100644
--- a/sysdeps/aarch64/strrchr.S
+++ b/sysdeps/aarch64/strrchr.S
@@ -164,3 +164,4 @@  L(null_search):
 END(strrchr)
 weak_alias (strrchr, rindex)
 libc_hidden_builtin_def (strrchr)
+END_FILE
diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h
index 604c489170..161d63c782 100644
--- a/sysdeps/aarch64/sysdep.h
+++ b/sysdeps/aarch64/sysdep.h
@@ -41,12 +41,40 @@ 
 
 #define ASM_SIZE_DIRECTIVE(name) .size name,.-name
 
+/* Branch Target Identitication support.  */
+#define BTI_C		hint	34
+#define BTI_J		hint	36
+
+#define FEATURE_1_BTI 1
+#define FEATURE_1_PAC 2
+
+/* Add a GNU_PROPERTY_AARCH64_FEATURE_1_AND note.  */
+#define GNU_PROPERTY(features)		\
+  .section .note.gnu.property, "a";	\
+  .p2align 3;				\
+  .word 4;				\
+  .word 16;				\
+  .word 5;				\
+  .asciz "GNU";				\
+  .word 0xc0000000;			\
+  .word 4;				\
+  .word features;			\
+  .word 0;
+
+/* GNU property note with the supported features.  */
+#ifdef HAVE_AARCH64_BTI
+# define END_FILE GNU_PROPERTY (FEATURE_1_BTI)
+#else
+# define END_FILE
+#endif
+
 /* Define an entry point visible from C.  */
 #define ENTRY(name)						\
   .globl C_SYMBOL_NAME(name);					\
   .type C_SYMBOL_NAME(name),%function;				\
   .align 4;							\
   C_LABEL(name)							\
+  BTI_C;							\
   cfi_startproc;						\
   CALL_MCOUNT
 
@@ -56,6 +84,7 @@ 
   .type C_SYMBOL_NAME(name),%function;				\
   .p2align align;						\
   C_LABEL(name)							\
+  BTI_C;							\
   cfi_startproc;						\
   CALL_MCOUNT
 
@@ -68,10 +97,11 @@ 
   .globl C_SYMBOL_NAME(name);					\
   .type C_SYMBOL_NAME(name),%function;				\
   .p2align align;						\
-  .rep padding;							\
+  .rep padding - 1; /* -1 for bti c.  */			\
   nop;								\
   .endr;							\
   C_LABEL(name)							\
+  BTI_C;							\
   cfi_startproc;						\
   CALL_MCOUNT
 
diff --git a/sysdeps/unix/sysv/linux/aarch64/__read_tp.S b/sysdeps/unix/sysv/linux/aarch64/__read_tp.S
index 12e1131fe7..7825aa5e1b 100644
--- a/sysdeps/unix/sysv/linux/aarch64/__read_tp.S
+++ b/sysdeps/unix/sysv/linux/aarch64/__read_tp.S
@@ -23,3 +23,4 @@  ENTRY (__read_tp)
 	mrs	x0, tpidr_el0
 	RET
 END   (__read_tp)
+END_FILE
diff --git a/sysdeps/unix/sysv/linux/aarch64/clone.S b/sysdeps/unix/sysv/linux/aarch64/clone.S
index 2b14106fd1..15b5a7b217 100644
--- a/sysdeps/unix/sysv/linux/aarch64/clone.S
+++ b/sysdeps/unix/sysv/linux/aarch64/clone.S
@@ -85,3 +85,4 @@  thread_start:
 
 libc_hidden_def (__clone)
 weak_alias (__clone, clone)
+END_FILE
diff --git a/sysdeps/unix/sysv/linux/aarch64/getcontext.S b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
index 8571556189..95f61d5f28 100644
--- a/sysdeps/unix/sysv/linux/aarch64/getcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
@@ -107,3 +107,4 @@  ENTRY(__getcontext)
 
 	PSEUDO_END (__getcontext)
 weak_alias (__getcontext, getcontext)
+END_FILE
diff --git a/sysdeps/unix/sysv/linux/aarch64/ioctl.S b/sysdeps/unix/sysv/linux/aarch64/ioctl.S
index ed36e309c4..19abf43726 100644
--- a/sysdeps/unix/sysv/linux/aarch64/ioctl.S
+++ b/sysdeps/unix/sysv/linux/aarch64/ioctl.S
@@ -30,3 +30,4 @@  PSEUDO_END (__ioctl)
 
 libc_hidden_def (__ioctl)
 weak_alias (__ioctl, ioctl)
+END_FILE
diff --git a/sysdeps/unix/sysv/linux/aarch64/libc-__read_tp.S b/sysdeps/unix/sysv/linux/aarch64/libc-__read_tp.S
index 6de259ed83..df5eec8599 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libc-__read_tp.S
+++ b/sysdeps/unix/sysv/linux/aarch64/libc-__read_tp.S
@@ -17,3 +17,4 @@ 
    <https://www.gnu.org/licenses/>.  */
 
 #include <__read_tp.S>
+END_FILE
diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
index 61fb813db3..d9d6c0f364 100644
--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
@@ -128,3 +128,4 @@  ENTRY (__startcontext)
 	cbnz	x0, __setcontext
 1:	b       HIDDEN_JUMPTARGET (exit)
 END (__startcontext)
+END_FILE
diff --git a/sysdeps/unix/sysv/linux/aarch64/syscall.S b/sysdeps/unix/sysv/linux/aarch64/syscall.S
index 993e307fcf..10e59ece58 100644
--- a/sysdeps/unix/sysv/linux/aarch64/syscall.S
+++ b/sysdeps/unix/sysv/linux/aarch64/syscall.S
@@ -42,3 +42,4 @@  ENTRY (syscall)
 1:
 	b	SYSCALL_ERROR
 PSEUDO_END (syscall)
+END_FILE
diff --git a/sysdeps/unix/sysv/linux/aarch64/vfork.S b/sysdeps/unix/sysv/linux/aarch64/vfork.S
index 9c8ea48c5a..0306003a22 100644
--- a/sysdeps/unix/sysv/linux/aarch64/vfork.S
+++ b/sysdeps/unix/sysv/linux/aarch64/vfork.S
@@ -40,3 +40,4 @@  libc_hidden_def (__vfork)
 
 weak_alias (__vfork, vfork)
 strong_alias (__vfork, __libc_vfork)
+END_FILE