Andrew Geissler | 5f35090 | 2021-07-23 13:09:54 -0400 | [diff] [blame^] | 1 | Backport a number of patches from master to improve Arm MTE support. |
| 2 | |
| 3 | Upstream-Status: Backport [will be in 2.34] |
| 4 | Signed-off-by: Ross Burton <ross.burton@arm.com> |
| 5 | |
| 6 | From 2643466c2928a93de7b80a61f6a8f61a653862e1 Mon Sep 17 00:00:00 2001 |
| 7 | From: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| 8 | Date: Thu, 11 Mar 2021 14:09:56 +0000 |
| 9 | Subject: [PATCH 01/11] malloc: Fix a potential realloc issue with memory |
| 10 | tagging |
| 11 | |
| 12 | At an _int_free call site in realloc the wrong size was used for tag |
| 13 | clearing: the chunk header of the next chunk was also cleared which |
| 14 | in practice may work, but logically wrong. |
| 15 | |
| 16 | The tag clearing is moved before the memcpy to save a tag computation, |
| 17 | this avoids a chunk2mem. Another chunk2mem is removed because newmem |
| 18 | does not have to be recomputed. Whitespaces got fixed too. |
| 19 | |
| 20 | Reviewed-by: DJ Delorie <dj@redhat.com> |
| 21 | --- |
| 22 | malloc/malloc.c | 14 +++++++------- |
| 23 | 1 file changed, 7 insertions(+), 7 deletions(-) |
| 24 | |
| 25 | diff --git a/malloc/malloc.c b/malloc/malloc.c |
| 26 | index 8f8f12c276..51cec67e55 100644 |
| 27 | --- a/malloc/malloc.c |
| 28 | +++ b/malloc/malloc.c |
| 29 | @@ -4851,14 +4851,14 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize, |
| 30 | } |
| 31 | else |
| 32 | { |
| 33 | - void *oldmem = chunk2mem (oldp); |
| 34 | + void *oldmem = chunk2rawmem (oldp); |
| 35 | + size_t sz = CHUNK_AVAILABLE_SIZE (oldp) - CHUNK_HDR_SZ; |
| 36 | + (void) TAG_REGION (oldmem, sz); |
| 37 | newmem = TAG_NEW_USABLE (newmem); |
| 38 | - memcpy (newmem, oldmem, |
| 39 | - CHUNK_AVAILABLE_SIZE (oldp) - CHUNK_HDR_SZ); |
| 40 | - (void) TAG_REGION (chunk2rawmem (oldp), oldsize); |
| 41 | - _int_free (av, oldp, 1); |
| 42 | - check_inuse_chunk (av, newp); |
| 43 | - return chunk2mem (newp); |
| 44 | + memcpy (newmem, oldmem, sz); |
| 45 | + _int_free (av, oldp, 1); |
| 46 | + check_inuse_chunk (av, newp); |
| 47 | + return newmem; |
| 48 | } |
| 49 | } |
| 50 | } |
| 51 | -- |
| 52 | 2.25.1 |
| 53 | |
| 54 | |
| 55 | From 32f3132be063e4b16a5cdb058980af354126e2f4 Mon Sep 17 00:00:00 2001 |
| 56 | From: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| 57 | Date: Thu, 28 Jan 2021 17:34:36 +0000 |
| 58 | Subject: [PATCH 02/11] malloc: Move MTAG_MMAP_FLAGS definition |
| 59 | |
| 60 | This is only used internally in malloc.c, the extern declaration |
| 61 | was wrong, __mtag_mmap_flags has internal linkage. |
| 62 | |
| 63 | Reviewed-by: DJ Delorie <dj@redhat.com> |
| 64 | --- |
| 65 | include/malloc.h | 7 ------- |
| 66 | malloc/malloc.c | 2 ++ |
| 67 | 2 files changed, 2 insertions(+), 7 deletions(-) |
| 68 | |
| 69 | diff --git a/include/malloc.h b/include/malloc.h |
| 70 | index 7ae08d53d3..b77761f74d 100644 |
| 71 | --- a/include/malloc.h |
| 72 | +++ b/include/malloc.h |
| 73 | @@ -16,11 +16,4 @@ typedef struct malloc_state *mstate; |
| 74 | |
| 75 | # endif /* !_ISOMAC */ |
| 76 | |
| 77 | -#ifdef USE_MTAG |
| 78 | -extern int __mtag_mmap_flags; |
| 79 | -#define MTAG_MMAP_FLAGS __mtag_mmap_flags |
| 80 | -#else |
| 81 | -#define MTAG_MMAP_FLAGS 0 |
| 82 | -#endif |
| 83 | - |
| 84 | #endif |
| 85 | diff --git a/malloc/malloc.c b/malloc/malloc.c |
| 86 | index 51cec67e55..61c25d0f93 100644 |
| 87 | --- a/malloc/malloc.c |
| 88 | +++ b/malloc/malloc.c |
| 89 | @@ -463,11 +463,13 @@ static void *(*__tag_region)(void *, size_t) = __default_tag_region; |
| 90 | static void *(*__tag_new_usable)(void *) = __default_tag_nop; |
| 91 | static void *(*__tag_at)(void *) = __default_tag_nop; |
| 92 | |
| 93 | +# define MTAG_MMAP_FLAGS __mtag_mmap_flags |
| 94 | # define TAG_NEW_MEMSET(ptr, val, size) __tag_new_memset (ptr, val, size) |
| 95 | # define TAG_REGION(ptr, size) __tag_region (ptr, size) |
| 96 | # define TAG_NEW_USABLE(ptr) __tag_new_usable (ptr) |
| 97 | # define TAG_AT(ptr) __tag_at (ptr) |
| 98 | #else |
| 99 | +# define MTAG_MMAP_FLAGS 0 |
| 100 | # define TAG_NEW_MEMSET(ptr, val, size) memset (ptr, val, size) |
| 101 | # define TAG_REGION(ptr, size) (ptr) |
| 102 | # define TAG_NEW_USABLE(ptr) (ptr) |
| 103 | -- |
| 104 | 2.25.1 |
| 105 | |
| 106 | |
| 107 | From 4b13f77fb97f9618a7868ab767d05e0c2d7c6f6f Mon Sep 17 00:00:00 2001 |
| 108 | From: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| 109 | Date: Thu, 4 Feb 2021 11:38:23 +0000 |
| 110 | Subject: [PATCH 03/11] malloc: Simplify __mtag_tag_new_usable |
| 111 | |
| 112 | The chunk cannot be a dumped one here. The only non-obvious cases |
| 113 | are free and realloc which may be called on a dumped area chunk, |
| 114 | but in both cases it can be verified that tagging is already |
| 115 | avoided for dumped area chunks. |
| 116 | |
| 117 | Reviewed-by: DJ Delorie <dj@redhat.com> |
| 118 | --- |
| 119 | malloc/arena.c | 5 ----- |
| 120 | 1 file changed, 5 deletions(-) |
| 121 | |
| 122 | diff --git a/malloc/arena.c b/malloc/arena.c |
| 123 | index bf17be27d4..0777dc70c6 100644 |
| 124 | --- a/malloc/arena.c |
| 125 | +++ b/malloc/arena.c |
| 126 | @@ -298,11 +298,6 @@ __mtag_tag_new_usable (void *ptr) |
| 127 | if (ptr) |
| 128 | { |
| 129 | mchunkptr cp = mem2chunk(ptr); |
| 130 | - /* This likely will never happen, but we can't handle retagging |
| 131 | - chunks from the dumped main arena. So just return the |
| 132 | - existing pointer. */ |
| 133 | - if (DUMPED_MAIN_ARENA_CHUNK (cp)) |
| 134 | - return ptr; |
| 135 | ptr = __libc_mtag_tag_region (__libc_mtag_new_tag (ptr), |
| 136 | CHUNK_AVAILABLE_SIZE (cp) - CHUNK_HDR_SZ); |
| 137 | } |
| 138 | -- |
| 139 | 2.25.1 |
| 140 | |
| 141 | |
| 142 | From 4f05837ba6934c5b8bbc6738f8883890493f50b6 Mon Sep 17 00:00:00 2001 |
| 143 | From: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| 144 | Date: Thu, 4 Feb 2021 11:52:14 +0000 |
| 145 | Subject: [PATCH 04/11] malloc: Avoid taggig mmaped memory on free |
| 146 | |
| 147 | Either the memory belongs to the dumped area, in which case we don't |
| 148 | want to tag (the dumped area has the same tag as malloc internal data |
| 149 | so tagging is unnecessary, but chunks there may not have the right |
| 150 | alignment for the tag granule), or the memory will be unmapped |
| 151 | immediately (and thus tagging is not useful). |
| 152 | |
| 153 | Reviewed-by: DJ Delorie <dj@redhat.com> |
| 154 | --- |
| 155 | malloc/malloc.c | 7 ++++--- |
| 156 | 1 file changed, 4 insertions(+), 3 deletions(-) |
| 157 | |
| 158 | diff --git a/malloc/malloc.c b/malloc/malloc.c |
| 159 | index 61c25d0f93..ecb87350b0 100644 |
| 160 | --- a/malloc/malloc.c |
| 161 | +++ b/malloc/malloc.c |
| 162 | @@ -3284,9 +3284,6 @@ __libc_free (void *mem) |
| 163 | |
| 164 | p = mem2chunk (mem); |
| 165 | |
| 166 | - /* Mark the chunk as belonging to the library again. */ |
| 167 | - (void)TAG_REGION (chunk2rawmem (p), CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ); |
| 168 | - |
| 169 | if (chunk_is_mmapped (p)) /* release mmapped memory. */ |
| 170 | { |
| 171 | /* See if the dynamic brk/mmap threshold needs adjusting. |
| 172 | @@ -3307,6 +3304,10 @@ __libc_free (void *mem) |
| 173 | { |
| 174 | MAYBE_INIT_TCACHE (); |
| 175 | |
| 176 | + /* Mark the chunk as belonging to the library again. */ |
| 177 | + (void)TAG_REGION (chunk2rawmem (p), |
| 178 | + CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ); |
| 179 | + |
| 180 | ar_ptr = arena_for_chunk (p); |
| 181 | _int_free (ar_ptr, p, 0); |
| 182 | } |
| 183 | -- |
| 184 | 2.25.1 |
| 185 | |
| 186 | |
| 187 | From 673fad3798846101b77a89595cfa17f334a1c898 Mon Sep 17 00:00:00 2001 |
| 188 | From: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| 189 | Date: Tue, 16 Feb 2021 14:12:25 +0000 |
| 190 | Subject: [PATCH 05/11] malloc: Refactor TAG_ macros to avoid indirection |
| 191 | |
| 192 | This does not change behaviour, just removes one layer of indirection |
| 193 | in the internal memory tagging logic. |
| 194 | |
| 195 | Use tag_ and mtag_ prefixes instead of __tag_ and __mtag_ since these |
| 196 | are all symbols with internal linkage, private to malloc.c, so there |
| 197 | is no user namespace pollution issue. |
| 198 | |
| 199 | Reviewed-by: DJ Delorie <dj@redhat.com> |
| 200 | --- |
| 201 | malloc/arena.c | 16 +++++----- |
| 202 | malloc/hooks.c | 10 +++--- |
| 203 | malloc/malloc.c | 81 +++++++++++++++++++++++-------------------------- |
| 204 | 3 files changed, 51 insertions(+), 56 deletions(-) |
| 205 | |
| 206 | diff --git a/malloc/arena.c b/malloc/arena.c |
| 207 | index 0777dc70c6..d0778fea92 100644 |
| 208 | --- a/malloc/arena.c |
| 209 | +++ b/malloc/arena.c |
| 210 | @@ -332,12 +332,12 @@ ptmalloc_init (void) |
| 211 | if (__MTAG_SBRK_UNTAGGED) |
| 212 | __morecore = __failing_morecore; |
| 213 | |
| 214 | - __mtag_mmap_flags = __MTAG_MMAP_FLAGS; |
| 215 | - __tag_new_memset = __mtag_tag_new_memset; |
| 216 | - __tag_region = __libc_mtag_tag_region; |
| 217 | - __tag_new_usable = __mtag_tag_new_usable; |
| 218 | - __tag_at = __libc_mtag_address_get_tag; |
| 219 | - __mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1); |
| 220 | + mtag_mmap_flags = __MTAG_MMAP_FLAGS; |
| 221 | + tag_new_memset = __mtag_tag_new_memset; |
| 222 | + tag_region = __libc_mtag_tag_region; |
| 223 | + tag_new_usable = __mtag_tag_new_usable; |
| 224 | + tag_at = __libc_mtag_address_get_tag; |
| 225 | + mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1); |
| 226 | } |
| 227 | #endif |
| 228 | |
| 229 | @@ -557,7 +557,7 @@ new_heap (size_t size, size_t top_pad) |
| 230 | } |
| 231 | } |
| 232 | } |
| 233 | - if (__mprotect (p2, size, MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE) != 0) |
| 234 | + if (__mprotect (p2, size, mtag_mmap_flags | PROT_READ | PROT_WRITE) != 0) |
| 235 | { |
| 236 | __munmap (p2, HEAP_MAX_SIZE); |
| 237 | return 0; |
| 238 | @@ -587,7 +587,7 @@ grow_heap (heap_info *h, long diff) |
| 239 | { |
| 240 | if (__mprotect ((char *) h + h->mprotect_size, |
| 241 | (unsigned long) new_size - h->mprotect_size, |
| 242 | - MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE) != 0) |
| 243 | + mtag_mmap_flags | PROT_READ | PROT_WRITE) != 0) |
| 244 | return -2; |
| 245 | |
| 246 | h->mprotect_size = new_size; |
| 247 | diff --git a/malloc/hooks.c b/malloc/hooks.c |
| 248 | index efec05f0a8..d8e304c31c 100644 |
| 249 | --- a/malloc/hooks.c |
| 250 | +++ b/malloc/hooks.c |
| 251 | @@ -68,7 +68,7 @@ __malloc_check_init (void) |
| 252 | tags, so fetch the tag at each location before dereferencing |
| 253 | it. */ |
| 254 | #define SAFE_CHAR_OFFSET(p,offset) \ |
| 255 | - ((unsigned char *) TAG_AT (((unsigned char *) p) + offset)) |
| 256 | + ((unsigned char *) tag_at (((unsigned char *) p) + offset)) |
| 257 | |
| 258 | /* A simple, standard set of debugging hooks. Overhead is `only' one |
| 259 | byte per chunk; still this will catch most cases of double frees or |
| 260 | @@ -249,7 +249,7 @@ malloc_check (size_t sz, const void *caller) |
| 261 | top_check (); |
| 262 | victim = _int_malloc (&main_arena, nb); |
| 263 | __libc_lock_unlock (main_arena.mutex); |
| 264 | - return mem2mem_check (TAG_NEW_USABLE (victim), sz); |
| 265 | + return mem2mem_check (tag_new_usable (victim), sz); |
| 266 | } |
| 267 | |
| 268 | static void |
| 269 | @@ -280,7 +280,7 @@ free_check (void *mem, const void *caller) |
| 270 | else |
| 271 | { |
| 272 | /* Mark the chunk as belonging to the library again. */ |
| 273 | - (void)TAG_REGION (chunk2rawmem (p), CHUNK_AVAILABLE_SIZE (p) |
| 274 | + (void)tag_region (chunk2rawmem (p), CHUNK_AVAILABLE_SIZE (p) |
| 275 | - CHUNK_HDR_SZ); |
| 276 | _int_free (&main_arena, p, 1); |
| 277 | __libc_lock_unlock (main_arena.mutex); |
| 278 | @@ -375,7 +375,7 @@ invert: |
| 279 | |
| 280 | __libc_lock_unlock (main_arena.mutex); |
| 281 | |
| 282 | - return mem2mem_check (TAG_NEW_USABLE (newmem), bytes); |
| 283 | + return mem2mem_check (tag_new_usable (newmem), bytes); |
| 284 | } |
| 285 | |
| 286 | static void * |
| 287 | @@ -417,7 +417,7 @@ memalign_check (size_t alignment, size_t bytes, const void *caller) |
| 288 | top_check (); |
| 289 | mem = _int_memalign (&main_arena, alignment, bytes + 1); |
| 290 | __libc_lock_unlock (main_arena.mutex); |
| 291 | - return mem2mem_check (TAG_NEW_USABLE (mem), bytes); |
| 292 | + return mem2mem_check (tag_new_usable (mem), bytes); |
| 293 | } |
| 294 | |
| 295 | #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_25) |
| 296 | diff --git a/malloc/malloc.c b/malloc/malloc.c |
| 297 | index ecb87350b0..62d00f54cc 100644 |
| 298 | --- a/malloc/malloc.c |
| 299 | +++ b/malloc/malloc.c |
| 300 | @@ -413,26 +413,26 @@ void *(*__morecore)(ptrdiff_t) = __default_morecore; |
| 301 | operations can continue to be used. Support macros are used to do |
| 302 | this: |
| 303 | |
| 304 | - void *TAG_NEW_MEMSET (void *ptr, int, val, size_t size) |
| 305 | + void *tag_new_memset (void *ptr, int, val, size_t size) |
| 306 | |
| 307 | Has the same interface as memset(), but additionally allocates a |
| 308 | new tag, colors the memory with that tag and returns a pointer that |
| 309 | is correctly colored for that location. The non-tagging version |
| 310 | will simply call memset. |
| 311 | |
| 312 | - void *TAG_REGION (void *ptr, size_t size) |
| 313 | + void *tag_region (void *ptr, size_t size) |
| 314 | |
| 315 | Color the region of memory pointed to by PTR and size SIZE with |
| 316 | the color of PTR. Returns the original pointer. |
| 317 | |
| 318 | - void *TAG_NEW_USABLE (void *ptr) |
| 319 | + void *tag_new_usable (void *ptr) |
| 320 | |
| 321 | Allocate a new random color and use it to color the user region of |
| 322 | a chunk; this may include data from the subsequent chunk's header |
| 323 | if tagging is sufficiently fine grained. Returns PTR suitably |
| 324 | recolored for accessing the memory there. |
| 325 | |
| 326 | - void *TAG_AT (void *ptr) |
| 327 | + void *tag_at (void *ptr) |
| 328 | |
| 329 | Read the current color of the memory at the address pointed to by |
| 330 | PTR (ignoring it's current color) and return PTR recolored to that |
| 331 | @@ -455,25 +455,20 @@ __default_tag_nop (void *ptr) |
| 332 | return ptr; |
| 333 | } |
| 334 | |
| 335 | -static int __mtag_mmap_flags = 0; |
| 336 | -static size_t __mtag_granule_mask = ~(size_t)0; |
| 337 | +static int mtag_mmap_flags = 0; |
| 338 | +static size_t mtag_granule_mask = ~(size_t)0; |
| 339 | |
| 340 | -static void *(*__tag_new_memset)(void *, int, size_t) = memset; |
| 341 | -static void *(*__tag_region)(void *, size_t) = __default_tag_region; |
| 342 | -static void *(*__tag_new_usable)(void *) = __default_tag_nop; |
| 343 | -static void *(*__tag_at)(void *) = __default_tag_nop; |
| 344 | +static void *(*tag_new_memset)(void *, int, size_t) = memset; |
| 345 | +static void *(*tag_region)(void *, size_t) = __default_tag_region; |
| 346 | +static void *(*tag_new_usable)(void *) = __default_tag_nop; |
| 347 | +static void *(*tag_at)(void *) = __default_tag_nop; |
| 348 | |
| 349 | -# define MTAG_MMAP_FLAGS __mtag_mmap_flags |
| 350 | -# define TAG_NEW_MEMSET(ptr, val, size) __tag_new_memset (ptr, val, size) |
| 351 | -# define TAG_REGION(ptr, size) __tag_region (ptr, size) |
| 352 | -# define TAG_NEW_USABLE(ptr) __tag_new_usable (ptr) |
| 353 | -# define TAG_AT(ptr) __tag_at (ptr) |
| 354 | #else |
| 355 | -# define MTAG_MMAP_FLAGS 0 |
| 356 | -# define TAG_NEW_MEMSET(ptr, val, size) memset (ptr, val, size) |
| 357 | -# define TAG_REGION(ptr, size) (ptr) |
| 358 | -# define TAG_NEW_USABLE(ptr) (ptr) |
| 359 | -# define TAG_AT(ptr) (ptr) |
| 360 | +# define mtag_mmap_flags 0 |
| 361 | +# define tag_new_memset(ptr, val, size) memset (ptr, val, size) |
| 362 | +# define tag_region(ptr, size) (ptr) |
| 363 | +# define tag_new_usable(ptr) (ptr) |
| 364 | +# define tag_at(ptr) (ptr) |
| 365 | #endif |
| 366 | |
| 367 | #include <string.h> |
| 368 | @@ -1305,8 +1300,8 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 369 | /* Convert between user mem pointers and chunk pointers, updating any |
| 370 | memory tags on the pointer to respect the tag value at that |
| 371 | location. */ |
| 372 | -#define chunk2mem(p) ((void*)TAG_AT (((char*)(p) + CHUNK_HDR_SZ))) |
| 373 | -#define mem2chunk(mem) ((mchunkptr)TAG_AT (((char*)(mem) - CHUNK_HDR_SZ))) |
| 374 | +#define chunk2mem(p) ((void *)tag_at (((char*)(p) + CHUNK_HDR_SZ))) |
| 375 | +#define mem2chunk(mem) ((mchunkptr)tag_at (((char*)(mem) - CHUNK_HDR_SZ))) |
| 376 | |
| 377 | /* The smallest possible chunk */ |
| 378 | #define MIN_CHUNK_SIZE (offsetof(struct malloc_chunk, fd_nextsize)) |
| 379 | @@ -1337,7 +1332,7 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 380 | #ifdef USE_MTAG |
| 381 | #define CHUNK_AVAILABLE_SIZE(p) \ |
| 382 | ((chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ)) \ |
| 383 | - & __mtag_granule_mask) |
| 384 | + & mtag_granule_mask) |
| 385 | #else |
| 386 | #define CHUNK_AVAILABLE_SIZE(p) \ |
| 387 | (chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ)) |
| 388 | @@ -1361,7 +1356,7 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1) |
| 389 | number. Ideally, this would be part of request2size(), but that |
| 390 | must be a macro that produces a compile time constant if passed |
| 391 | a constant literal. */ |
| 392 | - req = (req + ~__mtag_granule_mask) & __mtag_granule_mask; |
| 393 | + req = (req + ~mtag_granule_mask) & mtag_granule_mask; |
| 394 | #endif |
| 395 | |
| 396 | *sz = request2size (req); |
| 397 | @@ -2467,7 +2462,7 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) |
| 398 | if ((unsigned long) (size) > (unsigned long) (nb)) |
| 399 | { |
| 400 | mm = (char *) (MMAP (0, size, |
| 401 | - MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE, 0)); |
| 402 | + mtag_mmap_flags | PROT_READ | PROT_WRITE, 0)); |
| 403 | |
| 404 | if (mm != MAP_FAILED) |
| 405 | { |
| 406 | @@ -2665,7 +2660,7 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) |
| 407 | if ((unsigned long) (size) > (unsigned long) (nb)) |
| 408 | { |
| 409 | char *mbrk = (char *) (MMAP (0, size, |
| 410 | - MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE, |
| 411 | + mtag_mmap_flags | PROT_READ | PROT_WRITE, |
| 412 | 0)); |
| 413 | |
| 414 | if (mbrk != MAP_FAILED) |
| 415 | @@ -3221,14 +3216,14 @@ __libc_malloc (size_t bytes) |
| 416 | && tcache->counts[tc_idx] > 0) |
| 417 | { |
| 418 | victim = tcache_get (tc_idx); |
| 419 | - return TAG_NEW_USABLE (victim); |
| 420 | + return tag_new_usable (victim); |
| 421 | } |
| 422 | DIAG_POP_NEEDS_COMMENT; |
| 423 | #endif |
| 424 | |
| 425 | if (SINGLE_THREAD_P) |
| 426 | { |
| 427 | - victim = TAG_NEW_USABLE (_int_malloc (&main_arena, bytes)); |
| 428 | + victim = tag_new_usable (_int_malloc (&main_arena, bytes)); |
| 429 | assert (!victim || chunk_is_mmapped (mem2chunk (victim)) || |
| 430 | &main_arena == arena_for_chunk (mem2chunk (victim))); |
| 431 | return victim; |
| 432 | @@ -3249,7 +3244,7 @@ __libc_malloc (size_t bytes) |
| 433 | if (ar_ptr != NULL) |
| 434 | __libc_lock_unlock (ar_ptr->mutex); |
| 435 | |
| 436 | - victim = TAG_NEW_USABLE (victim); |
| 437 | + victim = tag_new_usable (victim); |
| 438 | |
| 439 | assert (!victim || chunk_is_mmapped (mem2chunk (victim)) || |
| 440 | ar_ptr == arena_for_chunk (mem2chunk (victim))); |
| 441 | @@ -3305,7 +3300,7 @@ __libc_free (void *mem) |
| 442 | MAYBE_INIT_TCACHE (); |
| 443 | |
| 444 | /* Mark the chunk as belonging to the library again. */ |
| 445 | - (void)TAG_REGION (chunk2rawmem (p), |
| 446 | + (void)tag_region (chunk2rawmem (p), |
| 447 | CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ); |
| 448 | |
| 449 | ar_ptr = arena_for_chunk (p); |
| 450 | @@ -3408,7 +3403,7 @@ __libc_realloc (void *oldmem, size_t bytes) |
| 451 | reused. There's a performance hit for both us and the |
| 452 | caller for doing this, so we might want to |
| 453 | reconsider. */ |
| 454 | - return TAG_NEW_USABLE (newmem); |
| 455 | + return tag_new_usable (newmem); |
| 456 | } |
| 457 | #endif |
| 458 | /* Note the extra SIZE_SZ overhead. */ |
| 459 | @@ -3451,7 +3446,7 @@ __libc_realloc (void *oldmem, size_t bytes) |
| 460 | { |
| 461 | size_t sz = CHUNK_AVAILABLE_SIZE (oldp) - CHUNK_HDR_SZ; |
| 462 | memcpy (newp, oldmem, sz); |
| 463 | - (void) TAG_REGION (chunk2rawmem (oldp), sz); |
| 464 | + (void) tag_region (chunk2rawmem (oldp), sz); |
| 465 | _int_free (ar_ptr, oldp, 0); |
| 466 | } |
| 467 | } |
| 468 | @@ -3509,7 +3504,7 @@ _mid_memalign (size_t alignment, size_t bytes, void *address) |
| 469 | p = _int_memalign (&main_arena, alignment, bytes); |
| 470 | assert (!p || chunk_is_mmapped (mem2chunk (p)) || |
| 471 | &main_arena == arena_for_chunk (mem2chunk (p))); |
| 472 | - return TAG_NEW_USABLE (p); |
| 473 | + return tag_new_usable (p); |
| 474 | } |
| 475 | |
| 476 | arena_get (ar_ptr, bytes + alignment + MINSIZE); |
| 477 | @@ -3527,7 +3522,7 @@ _mid_memalign (size_t alignment, size_t bytes, void *address) |
| 478 | |
| 479 | assert (!p || chunk_is_mmapped (mem2chunk (p)) || |
| 480 | ar_ptr == arena_for_chunk (mem2chunk (p))); |
| 481 | - return TAG_NEW_USABLE (p); |
| 482 | + return tag_new_usable (p); |
| 483 | } |
| 484 | /* For ISO C11. */ |
| 485 | weak_alias (__libc_memalign, aligned_alloc) |
| 486 | @@ -3544,7 +3539,7 @@ __libc_valloc (size_t bytes) |
| 487 | void *address = RETURN_ADDRESS (0); |
| 488 | size_t pagesize = GLRO (dl_pagesize); |
| 489 | p = _mid_memalign (pagesize, bytes, address); |
| 490 | - return TAG_NEW_USABLE (p); |
| 491 | + return tag_new_usable (p); |
| 492 | } |
| 493 | |
| 494 | void * |
| 495 | @@ -3569,7 +3564,7 @@ __libc_pvalloc (size_t bytes) |
| 496 | rounded_bytes = rounded_bytes & -(pagesize - 1); |
| 497 | |
| 498 | p = _mid_memalign (pagesize, rounded_bytes, address); |
| 499 | - return TAG_NEW_USABLE (p); |
| 500 | + return tag_new_usable (p); |
| 501 | } |
| 502 | |
| 503 | void * |
| 504 | @@ -3666,7 +3661,7 @@ __libc_calloc (size_t n, size_t elem_size) |
| 505 | regardless of MORECORE_CLEARS, so we zero the whole block while |
| 506 | doing so. */ |
| 507 | #ifdef USE_MTAG |
| 508 | - return TAG_NEW_MEMSET (mem, 0, CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ); |
| 509 | + return tag_new_memset (mem, 0, CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ); |
| 510 | #else |
| 511 | INTERNAL_SIZE_T csz = chunksize (p); |
| 512 | |
| 513 | @@ -4821,7 +4816,7 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize, |
| 514 | av->top = chunk_at_offset (oldp, nb); |
| 515 | set_head (av->top, (newsize - nb) | PREV_INUSE); |
| 516 | check_inuse_chunk (av, oldp); |
| 517 | - return TAG_NEW_USABLE (chunk2rawmem (oldp)); |
| 518 | + return tag_new_usable (chunk2rawmem (oldp)); |
| 519 | } |
| 520 | |
| 521 | /* Try to expand forward into next chunk; split off remainder below */ |
| 522 | @@ -4856,8 +4851,8 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize, |
| 523 | { |
| 524 | void *oldmem = chunk2rawmem (oldp); |
| 525 | size_t sz = CHUNK_AVAILABLE_SIZE (oldp) - CHUNK_HDR_SZ; |
| 526 | - (void) TAG_REGION (oldmem, sz); |
| 527 | - newmem = TAG_NEW_USABLE (newmem); |
| 528 | + (void) tag_region (oldmem, sz); |
| 529 | + newmem = tag_new_usable (newmem); |
| 530 | memcpy (newmem, oldmem, sz); |
| 531 | _int_free (av, oldp, 1); |
| 532 | check_inuse_chunk (av, newp); |
| 533 | @@ -4881,7 +4876,7 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize, |
| 534 | { |
| 535 | remainder = chunk_at_offset (newp, nb); |
| 536 | /* Clear any user-space tags before writing the header. */ |
| 537 | - remainder = TAG_REGION (remainder, remainder_size); |
| 538 | + remainder = tag_region (remainder, remainder_size); |
| 539 | set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0)); |
| 540 | set_head (remainder, remainder_size | PREV_INUSE | |
| 541 | (av != &main_arena ? NON_MAIN_ARENA : 0)); |
| 542 | @@ -4891,7 +4886,7 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize, |
| 543 | } |
| 544 | |
| 545 | check_inuse_chunk (av, newp); |
| 546 | - return TAG_NEW_USABLE (chunk2rawmem (newp)); |
| 547 | + return tag_new_usable (chunk2rawmem (newp)); |
| 548 | } |
| 549 | |
| 550 | /* |
| 551 | @@ -5108,7 +5103,7 @@ musable (void *mem) |
| 552 | /* The usable space may be reduced if memory tagging is needed, |
| 553 | since we cannot share the user-space data with malloc's internal |
| 554 | data structure. */ |
| 555 | - result &= __mtag_granule_mask; |
| 556 | + result &= mtag_granule_mask; |
| 557 | #endif |
| 558 | return result; |
| 559 | } |
| 560 | -- |
| 561 | 2.25.1 |
| 562 | |
| 563 | |
| 564 | From f0ea41e819f40aacedf25431bedd95da9c5db534 Mon Sep 17 00:00:00 2001 |
| 565 | From: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| 566 | Date: Wed, 27 Jan 2021 15:45:43 +0000 |
| 567 | Subject: [PATCH 06/11] malloc: Use global flag instead of function pointer |
| 568 | dispatch for mtag |
| 569 | |
| 570 | A flag check can be faster than function pointers because of how |
| 571 | branch prediction and speculation works and it can also remove a layer |
| 572 | of indirection when there is a mismatch between the malloc internal |
| 573 | tag_* api and __libc_mtag_* target hooks. |
| 574 | |
| 575 | Memory tagging wrapper functions are moved to malloc.c from arena.c and |
| 576 | the logic now checks mmap_enabled. The definition of tag_new_usable is |
| 577 | moved after chunk related definitions. |
| 578 | |
| 579 | This refactoring also allows using mtag_enabled checks instead of |
| 580 | USE_MTAG ifdefs when memory tagging support only changes code logic |
| 581 | when memory tagging is enabled at runtime. Note: an "if (false)" code |
| 582 | block is optimized away even at -O0 by gcc. |
| 583 | |
| 584 | Reviewed-by: DJ Delorie <dj@redhat.com> |
| 585 | --- |
| 586 | malloc/arena.c | 33 +--------------------------- |
| 587 | malloc/malloc.c | 58 ++++++++++++++++++++++++++++++++----------------- |
| 588 | 2 files changed, 39 insertions(+), 52 deletions(-) |
| 589 | |
| 590 | diff --git a/malloc/arena.c b/malloc/arena.c |
| 591 | index d0778fea92..1e83bb66bd 100644 |
| 592 | --- a/malloc/arena.c |
| 593 | +++ b/malloc/arena.c |
| 594 | @@ -287,34 +287,6 @@ extern struct dl_open_hook *_dl_open_hook; |
| 595 | libc_hidden_proto (_dl_open_hook); |
| 596 | #endif |
| 597 | |
| 598 | -#ifdef USE_MTAG |
| 599 | - |
| 600 | -/* Generate a new (random) tag value for PTR and tag the memory it |
| 601 | - points to upto the end of the usable size for the chunk containing |
| 602 | - it. Return the newly tagged pointer. */ |
| 603 | -static void * |
| 604 | -__mtag_tag_new_usable (void *ptr) |
| 605 | -{ |
| 606 | - if (ptr) |
| 607 | - { |
| 608 | - mchunkptr cp = mem2chunk(ptr); |
| 609 | - ptr = __libc_mtag_tag_region (__libc_mtag_new_tag (ptr), |
| 610 | - CHUNK_AVAILABLE_SIZE (cp) - CHUNK_HDR_SZ); |
| 611 | - } |
| 612 | - return ptr; |
| 613 | -} |
| 614 | - |
| 615 | -/* Generate a new (random) tag value for PTR, set the tags for the |
| 616 | - memory to the new tag and initialize the memory contents to VAL. |
| 617 | - In practice this function will only be called with VAL=0, but we |
| 618 | - keep this parameter to maintain the same prototype as memset. */ |
| 619 | -static void * |
| 620 | -__mtag_tag_new_memset (void *ptr, int val, size_t size) |
| 621 | -{ |
| 622 | - return __libc_mtag_memset_with_tag (__libc_mtag_new_tag (ptr), val, size); |
| 623 | -} |
| 624 | -#endif |
| 625 | - |
| 626 | static void |
| 627 | ptmalloc_init (void) |
| 628 | { |
| 629 | @@ -332,11 +304,8 @@ ptmalloc_init (void) |
| 630 | if (__MTAG_SBRK_UNTAGGED) |
| 631 | __morecore = __failing_morecore; |
| 632 | |
| 633 | + mtag_enabled = true; |
| 634 | mtag_mmap_flags = __MTAG_MMAP_FLAGS; |
| 635 | - tag_new_memset = __mtag_tag_new_memset; |
| 636 | - tag_region = __libc_mtag_tag_region; |
| 637 | - tag_new_usable = __mtag_tag_new_usable; |
| 638 | - tag_at = __libc_mtag_address_get_tag; |
| 639 | mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1); |
| 640 | } |
| 641 | #endif |
| 642 | diff --git a/malloc/malloc.c b/malloc/malloc.c |
| 643 | index 62d00f54cc..253a919ec5 100644 |
| 644 | --- a/malloc/malloc.c |
| 645 | +++ b/malloc/malloc.c |
| 646 | @@ -441,35 +441,41 @@ void *(*__morecore)(ptrdiff_t) = __default_morecore; |
| 647 | */ |
| 648 | |
| 649 | #ifdef USE_MTAG |
| 650 | +static bool mtag_enabled = false; |
| 651 | +static int mtag_mmap_flags = 0; |
| 652 | +static size_t mtag_granule_mask = ~(size_t)0; |
| 653 | +#else |
| 654 | +# define mtag_enabled false |
| 655 | +# define mtag_mmap_flags 0 |
| 656 | +#endif |
| 657 | |
| 658 | -/* Default implementaions when memory tagging is supported, but disabled. */ |
| 659 | -static void * |
| 660 | -__default_tag_region (void *ptr, size_t size) |
| 661 | +static __always_inline void * |
| 662 | +tag_region (void *ptr, size_t size) |
| 663 | { |
| 664 | + if (__glibc_unlikely (mtag_enabled)) |
| 665 | + return __libc_mtag_tag_region (ptr, size); |
| 666 | return ptr; |
| 667 | } |
| 668 | |
| 669 | -static void * |
| 670 | -__default_tag_nop (void *ptr) |
| 671 | +static __always_inline void * |
| 672 | +tag_new_memset (void *ptr, int val, size_t size) |
| 673 | { |
| 674 | - return ptr; |
| 675 | + if (__glibc_unlikely (mtag_enabled)) |
| 676 | + return __libc_mtag_memset_with_tag (__libc_mtag_new_tag (ptr), val, size); |
| 677 | + return memset (ptr, val, size); |
| 678 | } |
| 679 | |
| 680 | -static int mtag_mmap_flags = 0; |
| 681 | -static size_t mtag_granule_mask = ~(size_t)0; |
| 682 | - |
| 683 | -static void *(*tag_new_memset)(void *, int, size_t) = memset; |
| 684 | -static void *(*tag_region)(void *, size_t) = __default_tag_region; |
| 685 | -static void *(*tag_new_usable)(void *) = __default_tag_nop; |
| 686 | -static void *(*tag_at)(void *) = __default_tag_nop; |
| 687 | +/* Defined later. */ |
| 688 | +static void * |
| 689 | +tag_new_usable (void *ptr); |
| 690 | |
| 691 | -#else |
| 692 | -# define mtag_mmap_flags 0 |
| 693 | -# define tag_new_memset(ptr, val, size) memset (ptr, val, size) |
| 694 | -# define tag_region(ptr, size) (ptr) |
| 695 | -# define tag_new_usable(ptr) (ptr) |
| 696 | -# define tag_at(ptr) (ptr) |
| 697 | -#endif |
| 698 | +static __always_inline void * |
| 699 | +tag_at (void *ptr) |
| 700 | +{ |
| 701 | + if (__glibc_unlikely (mtag_enabled)) |
| 702 | + return __libc_mtag_address_get_tag (ptr); |
| 703 | + return ptr; |
| 704 | +} |
| 705 | |
| 706 | #include <string.h> |
| 707 | |
| 708 | @@ -1460,6 +1466,18 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1) |
| 709 | #pragma GCC poison mchunk_size |
| 710 | #pragma GCC poison mchunk_prev_size |
| 711 | |
| 712 | +static __always_inline void * |
| 713 | +tag_new_usable (void *ptr) |
| 714 | +{ |
| 715 | + if (__glibc_unlikely (mtag_enabled) && ptr) |
| 716 | + { |
| 717 | + mchunkptr cp = mem2chunk(ptr); |
| 718 | + ptr = __libc_mtag_tag_region (__libc_mtag_new_tag (ptr), |
| 719 | + CHUNK_AVAILABLE_SIZE (cp) - CHUNK_HDR_SZ); |
| 720 | + } |
| 721 | + return ptr; |
| 722 | +} |
| 723 | + |
| 724 | /* |
| 725 | -------------------- Internal data structures -------------------- |
| 726 | |
| 727 | -- |
| 728 | 2.25.1 |
| 729 | |
| 730 | |
| 731 | From 8597244d5c3edbd672b285eea5f6dea833256f9d Mon Sep 17 00:00:00 2001 |
| 732 | From: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| 733 | Date: Wed, 17 Feb 2021 10:39:37 +0000 |
| 734 | Subject: [PATCH 07/11] malloc: Ensure the generic mtag hooks are not used |
| 735 | |
| 736 | Use inline functions instead of macros, because macros can cause unused |
| 737 | variable warnings and type conversion issues. We assume these functions |
| 738 | may appear in the code but only in dead code paths (hidden by a runtime |
| 739 | check), so it's important that they can compile with correct types, but |
| 740 | if they are actually used that should be an error. |
| 741 | |
| 742 | Currently the hooks are only used when USE_MTAG is true which only |
| 743 | happens on aarch64 and then the aarch64 specific code is used not this |
| 744 | generic header. However followup refactoring will allow the hooks to |
| 745 | be used with !USE_MTAG. |
| 746 | |
| 747 | Note: the const qualifier in the comment was wrong: changing tags is a |
| 748 | write operation. |
| 749 | |
| 750 | Reviewed-by: DJ Delorie <dj@redhat.com> |
| 751 | --- |
| 752 | sysdeps/generic/libc-mtag.h | 41 ++++++++++++++++++++++++++++--------- |
| 753 | 1 file changed, 31 insertions(+), 10 deletions(-) |
| 754 | |
| 755 | diff --git a/sysdeps/generic/libc-mtag.h b/sysdeps/generic/libc-mtag.h |
| 756 | index 1a866cdc0c..e8fc236b6c 100644 |
| 757 | --- a/sysdeps/generic/libc-mtag.h |
| 758 | +++ b/sysdeps/generic/libc-mtag.h |
| 759 | @@ -31,22 +31,43 @@ |
| 760 | /* Extra flags to pass to mmap() to request a tagged region of memory. */ |
| 761 | #define __MTAG_MMAP_FLAGS 0 |
| 762 | |
| 763 | +/* Memory tagging target hooks are only called when memory tagging is |
| 764 | + enabled at runtime. The generic definitions here must not be used. */ |
| 765 | +void __libc_mtag_link_error (void); |
| 766 | + |
| 767 | /* Set the tags for a region of memory, which must have size and alignment |
| 768 | - that are multiples of __MTAG_GRANULE_SIZE. Size cannot be zero. |
| 769 | - void *__libc_mtag_tag_region (const void *, size_t) */ |
| 770 | -#define __libc_mtag_tag_region(p, s) (p) |
| 771 | + that are multiples of __MTAG_GRANULE_SIZE. Size cannot be zero. */ |
| 772 | +static inline void * |
| 773 | +__libc_mtag_tag_region (void *p, size_t n) |
| 774 | +{ |
| 775 | + __libc_mtag_link_error (); |
| 776 | + return p; |
| 777 | +} |
| 778 | |
| 779 | /* Optimized equivalent to __libc_mtag_tag_region followed by memset. */ |
| 780 | -#define __libc_mtag_memset_with_tag memset |
| 781 | +static inline void * |
| 782 | +__libc_mtag_memset_with_tag (void *p, int c, size_t n) |
| 783 | +{ |
| 784 | + __libc_mtag_link_error (); |
| 785 | + return memset (p, c, n); |
| 786 | +} |
| 787 | |
| 788 | /* Convert address P to a pointer that is tagged correctly for that |
| 789 | - location. |
| 790 | - void *__libc_mtag_address_get_tag (void*) */ |
| 791 | -#define __libc_mtag_address_get_tag(p) (p) |
| 792 | + location. */ |
| 793 | +static inline void * |
| 794 | +__libc_mtag_address_get_tag (void *p) |
| 795 | +{ |
| 796 | + __libc_mtag_link_error (); |
| 797 | + return p; |
| 798 | +} |
| 799 | |
| 800 | /* Assign a new (random) tag to a pointer P (does not adjust the tag on |
| 801 | - the memory addressed). |
| 802 | - void *__libc_mtag_new_tag (void*) */ |
| 803 | -#define __libc_mtag_new_tag(p) (p) |
| 804 | + the memory addressed). */ |
| 805 | +static inline void * |
| 806 | +__libc_mtag_new_tag (void *p) |
| 807 | +{ |
| 808 | + __libc_mtag_link_error (); |
| 809 | + return p; |
| 810 | +} |
| 811 | |
| 812 | #endif /* _GENERIC_LIBC_MTAG_H */ |
| 813 | -- |
| 814 | 2.25.1 |
| 815 | |
| 816 | |
| 817 | From 3d9e16280ad881d038aedba0b6fcbd9e78b29072 Mon Sep 17 00:00:00 2001 |
| 818 | From: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| 819 | Date: Fri, 29 Jan 2021 17:07:28 +0000 |
| 820 | Subject: [PATCH 08/11] malloc: Only support zeroing and not arbitrary memset |
| 821 | with mtag |
| 822 | |
| 823 | The memset api is suboptimal and does not provide much benefit. Memory |
| 824 | tagging only needs a zeroing memset (and only for memory that's sized |
| 825 | and aligned to multiples of the tag granule), so change the internal |
| 826 | api and the target hooks accordingly. This is to simplify the |
| 827 | implementation of the target hook. |
| 828 | |
| 829 | Reviewed-by: DJ Delorie <dj@redhat.com> |
| 830 | --- |
| 831 | malloc/malloc.c | 17 ++++++++--------- |
| 832 | sysdeps/aarch64/Makefile | 2 +- |
| 833 | ...g_memset_tag.S => __mtag_tag_zero_region.S} | 18 +++++++----------- |
| 834 | sysdeps/aarch64/libc-mtag.h | 4 ++-- |
| 835 | sysdeps/generic/libc-mtag.h | 6 +++--- |
| 836 | 5 files changed, 21 insertions(+), 26 deletions(-) |
| 837 | rename sysdeps/aarch64/{__mtag_memset_tag.S => __mtag_tag_zero_region.S} (82%) |
| 838 | |
| 839 | diff --git a/malloc/malloc.c b/malloc/malloc.c |
| 840 | index 253a919ec5..01cf6e9325 100644 |
| 841 | --- a/malloc/malloc.c |
| 842 | +++ b/malloc/malloc.c |
| 843 | @@ -413,12 +413,11 @@ void *(*__morecore)(ptrdiff_t) = __default_morecore; |
| 844 | operations can continue to be used. Support macros are used to do |
| 845 | this: |
| 846 | |
| 847 | - void *tag_new_memset (void *ptr, int, val, size_t size) |
| 848 | + void *tag_new_zero_region (void *ptr, size_t size) |
| 849 | |
| 850 | - Has the same interface as memset(), but additionally allocates a |
| 851 | - new tag, colors the memory with that tag and returns a pointer that |
| 852 | - is correctly colored for that location. The non-tagging version |
| 853 | - will simply call memset. |
| 854 | + Allocates a new tag, colors the memory with that tag, zeros the |
| 855 | + memory and returns a pointer that is correctly colored for that |
| 856 | + location. The non-tagging version will simply call memset with 0. |
| 857 | |
| 858 | void *tag_region (void *ptr, size_t size) |
| 859 | |
| 860 | @@ -458,11 +457,11 @@ tag_region (void *ptr, size_t size) |
| 861 | } |
| 862 | |
| 863 | static __always_inline void * |
| 864 | -tag_new_memset (void *ptr, int val, size_t size) |
| 865 | +tag_new_zero_region (void *ptr, size_t size) |
| 866 | { |
| 867 | if (__glibc_unlikely (mtag_enabled)) |
| 868 | - return __libc_mtag_memset_with_tag (__libc_mtag_new_tag (ptr), val, size); |
| 869 | - return memset (ptr, val, size); |
| 870 | + return __libc_mtag_tag_zero_region (__libc_mtag_new_tag (ptr), size); |
| 871 | + return memset (ptr, 0, size); |
| 872 | } |
| 873 | |
| 874 | /* Defined later. */ |
| 875 | @@ -3679,7 +3678,7 @@ __libc_calloc (size_t n, size_t elem_size) |
| 876 | regardless of MORECORE_CLEARS, so we zero the whole block while |
| 877 | doing so. */ |
| 878 | #ifdef USE_MTAG |
| 879 | - return tag_new_memset (mem, 0, CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ); |
| 880 | + return tag_new_zero_region (mem, CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ); |
| 881 | #else |
| 882 | INTERNAL_SIZE_T csz = chunksize (p); |
| 883 | |
| 884 | diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile |
| 885 | index d3ab37a40a..259070cfad 100644 |
| 886 | --- a/sysdeps/aarch64/Makefile |
| 887 | +++ b/sysdeps/aarch64/Makefile |
| 888 | @@ -41,7 +41,7 @@ endif |
| 889 | ifeq ($(subdir),misc) |
| 890 | sysdep_headers += sys/ifunc.h |
| 891 | sysdep_routines += __mtag_address_get_tag \ |
| 892 | - __mtag_memset_tag \ |
| 893 | + __mtag_tag_zero_region \ |
| 894 | __mtag_new_tag \ |
| 895 | __mtag_tag_region |
| 896 | |
| 897 | diff --git a/sysdeps/aarch64/__mtag_memset_tag.S b/sysdeps/aarch64/__mtag_tag_zero_region.S |
| 898 | similarity index 82% |
| 899 | rename from sysdeps/aarch64/__mtag_memset_tag.S |
| 900 | rename to sysdeps/aarch64/__mtag_tag_zero_region.S |
| 901 | index 3c202888a4..74d398bba5 100644 |
| 902 | --- a/sysdeps/aarch64/__mtag_memset_tag.S |
| 903 | +++ b/sysdeps/aarch64/__mtag_tag_zero_region.S |
| 904 | @@ -20,9 +20,6 @@ |
| 905 | |
| 906 | #ifdef USE_MTAG |
| 907 | |
| 908 | -/* Use the same register names and assignments as memset. */ |
| 909 | -#include "memset-reg.h" |
| 910 | - |
| 911 | .arch armv8.5-a |
| 912 | .arch_extension memtag |
| 913 | |
| 914 | @@ -31,16 +28,15 @@ |
| 915 | /* FIXME: This is a minimal implementation. We could do much better than |
| 916 | this for large values of COUNT. */ |
| 917 | |
| 918 | -ENTRY(__libc_mtag_memset_with_tag) |
| 919 | +#define dstin x0 |
| 920 | +#define count x1 |
| 921 | +#define dst x2 |
| 922 | |
| 923 | - and valw, valw, 255 |
| 924 | - orr valw, valw, valw, lsl 8 |
| 925 | - orr valw, valw, valw, lsl 16 |
| 926 | - orr val, val, val, lsl 32 |
| 927 | - mov dst, dstin |
| 928 | +ENTRY(__libc_mtag_tag_zero_region) |
| 929 | |
| 930 | + mov dst, dstin |
| 931 | L(loop): |
| 932 | - stgp val, val, [dst], #16 |
| 933 | + stzg dst, [dst], #16 |
| 934 | subs count, count, 16 |
| 935 | bne L(loop) |
| 936 | #if 0 |
| 937 | @@ -49,5 +45,5 @@ L(loop): |
| 938 | ldg dstin, [dstin] // Recover the tag created (might be untagged). |
| 939 | #endif |
| 940 | ret |
| 941 | -END (__libc_mtag_memset_with_tag) |
| 942 | +END (__libc_mtag_tag_zero_region) |
| 943 | #endif /* USE_MTAG */ |
| 944 | diff --git a/sysdeps/aarch64/libc-mtag.h b/sysdeps/aarch64/libc-mtag.h |
| 945 | index 979cbb743e..f58402ccf9 100644 |
| 946 | --- a/sysdeps/aarch64/libc-mtag.h |
| 947 | +++ b/sysdeps/aarch64/libc-mtag.h |
| 948 | @@ -39,8 +39,8 @@ |
| 949 | void *__libc_mtag_tag_region (const void *, size_t) */ |
| 950 | void *__libc_mtag_tag_region (void *, size_t); |
| 951 | |
| 952 | -/* Optimized equivalent to __libc_mtag_tag_region followed by memset. */ |
| 953 | -void *__libc_mtag_memset_with_tag (void *, int, size_t); |
| 954 | +/* Optimized equivalent to __libc_mtag_tag_region followed by memset to 0. */ |
| 955 | +void *__libc_mtag_tag_zero_region (void *, size_t); |
| 956 | |
| 957 | /* Convert address P to a pointer that is tagged correctly for that |
| 958 | location. |
| 959 | diff --git a/sysdeps/generic/libc-mtag.h b/sysdeps/generic/libc-mtag.h |
| 960 | index e8fc236b6c..4743e873f1 100644 |
| 961 | --- a/sysdeps/generic/libc-mtag.h |
| 962 | +++ b/sysdeps/generic/libc-mtag.h |
| 963 | @@ -44,12 +44,12 @@ __libc_mtag_tag_region (void *p, size_t n) |
| 964 | return p; |
| 965 | } |
| 966 | |
| 967 | -/* Optimized equivalent to __libc_mtag_tag_region followed by memset. */ |
| 968 | +/* Optimized equivalent to __libc_mtag_tag_region followed by memset to 0. */ |
| 969 | static inline void * |
| 970 | -__libc_mtag_memset_with_tag (void *p, int c, size_t n) |
| 971 | +__libc_mtag_tag_zero_region (void *p, size_t n) |
| 972 | { |
| 973 | __libc_mtag_link_error (); |
| 974 | - return memset (p, c, n); |
| 975 | + return memset (p, 0, n); |
| 976 | } |
| 977 | |
| 978 | /* Convert address P to a pointer that is tagged correctly for that |
| 979 | -- |
| 980 | 2.25.1 |
| 981 | |
| 982 | |
| 983 | From 4d596cb72342ba0734dc847653431e078a70edfc Mon Sep 17 00:00:00 2001 |
| 984 | From: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| 985 | Date: Tue, 16 Feb 2021 17:02:44 +0000 |
| 986 | Subject: [PATCH 09/11] malloc: Change calloc when tagging is disabled |
| 987 | |
| 988 | When glibc is built with memory tagging support (USE_MTAG) but it is not |
| 989 | enabled at runtime (mtag_enabled) then unconditional memset was used |
| 990 | even though that can be often avoided. |
| 991 | |
| 992 | This is for performance when tagging is supported but not enabled. |
| 993 | The extra check should have no overhead: tag_new_zero_region already |
| 994 | had a runtime check which the compiler can now optimize away. |
| 995 | |
| 996 | Reviewed-by: DJ Delorie <dj@redhat.com> |
| 997 | --- |
| 998 | malloc/malloc.c | 10 ++++------ |
| 999 | 1 file changed, 4 insertions(+), 6 deletions(-) |
| 1000 | |
| 1001 | diff --git a/malloc/malloc.c b/malloc/malloc.c |
| 1002 | index 01cf6e9325..0b2aff3768 100644 |
| 1003 | --- a/malloc/malloc.c |
| 1004 | +++ b/malloc/malloc.c |
| 1005 | @@ -3591,11 +3591,9 @@ __libc_calloc (size_t n, size_t elem_size) |
| 1006 | mchunkptr oldtop; |
| 1007 | INTERNAL_SIZE_T sz, oldtopsize; |
| 1008 | void *mem; |
| 1009 | -#ifndef USE_MTAG |
| 1010 | unsigned long clearsize; |
| 1011 | unsigned long nclears; |
| 1012 | INTERNAL_SIZE_T *d; |
| 1013 | -#endif |
| 1014 | ptrdiff_t bytes; |
| 1015 | |
| 1016 | if (__glibc_unlikely (__builtin_mul_overflow (n, elem_size, &bytes))) |
| 1017 | @@ -3674,12 +3672,13 @@ __libc_calloc (size_t n, size_t elem_size) |
| 1018 | return 0; |
| 1019 | |
| 1020 | mchunkptr p = mem2chunk (mem); |
| 1021 | + |
| 1022 | /* If we are using memory tagging, then we need to set the tags |
| 1023 | regardless of MORECORE_CLEARS, so we zero the whole block while |
| 1024 | doing so. */ |
| 1025 | -#ifdef USE_MTAG |
| 1026 | - return tag_new_zero_region (mem, CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ); |
| 1027 | -#else |
| 1028 | + if (__glibc_unlikely (mtag_enabled)) |
| 1029 | + return tag_new_zero_region (mem, CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ); |
| 1030 | + |
| 1031 | INTERNAL_SIZE_T csz = chunksize (p); |
| 1032 | |
| 1033 | /* Two optional cases in which clearing not necessary */ |
| 1034 | @@ -3733,7 +3732,6 @@ __libc_calloc (size_t n, size_t elem_size) |
| 1035 | } |
| 1036 | |
| 1037 | return mem; |
| 1038 | -#endif |
| 1039 | } |
| 1040 | |
| 1041 | /* |
| 1042 | -- |
| 1043 | 2.25.1 |
| 1044 | |
| 1045 | |
| 1046 | From 287a35fba55a0a817db7af71ee966a37b7642bf0 Mon Sep 17 00:00:00 2001 |
| 1047 | From: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| 1048 | Date: Mon, 8 Feb 2021 12:39:01 +0000 |
| 1049 | Subject: [PATCH 10/11] malloc: Use branches instead of mtag_granule_mask |
| 1050 | |
| 1051 | The branches may be better optimized since mtag_enabled is widely used. |
| 1052 | |
| 1053 | Granule size larger than a chunk header is not supported since then we |
| 1054 | cannot have both the chunk header and user area granule aligned. To |
| 1055 | fix that for targets with large granule, the chunk layout has to change. |
| 1056 | |
| 1057 | So code that attempted to handle the granule mask generally was changed. |
| 1058 | This simplified CHUNK_AVAILABLE_SIZE and the logic in malloc_usable_size. |
| 1059 | |
| 1060 | Reviewed-by: DJ Delorie <dj@redhat.com> |
| 1061 | --- |
| 1062 | malloc/arena.c | 1 - |
| 1063 | malloc/malloc.c | 34 ++++++++++++++-------------------- |
| 1064 | 2 files changed, 14 insertions(+), 21 deletions(-) |
| 1065 | |
| 1066 | diff --git a/malloc/arena.c b/malloc/arena.c |
| 1067 | index 1e83bb66bd..9fbbb38a15 100644 |
| 1068 | --- a/malloc/arena.c |
| 1069 | +++ b/malloc/arena.c |
| 1070 | @@ -306,7 +306,6 @@ ptmalloc_init (void) |
| 1071 | |
| 1072 | mtag_enabled = true; |
| 1073 | mtag_mmap_flags = __MTAG_MMAP_FLAGS; |
| 1074 | - mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1); |
| 1075 | } |
| 1076 | #endif |
| 1077 | |
| 1078 | diff --git a/malloc/malloc.c b/malloc/malloc.c |
| 1079 | index 0b2aff3768..849bd8e2c9 100644 |
| 1080 | --- a/malloc/malloc.c |
| 1081 | +++ b/malloc/malloc.c |
| 1082 | @@ -442,7 +442,6 @@ void *(*__morecore)(ptrdiff_t) = __default_morecore; |
| 1083 | #ifdef USE_MTAG |
| 1084 | static bool mtag_enabled = false; |
| 1085 | static int mtag_mmap_flags = 0; |
| 1086 | -static size_t mtag_granule_mask = ~(size_t)0; |
| 1087 | #else |
| 1088 | # define mtag_enabled false |
| 1089 | # define mtag_mmap_flags 0 |
| 1090 | @@ -1333,15 +1332,16 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 1091 | ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) |
| 1092 | |
| 1093 | /* Available size of chunk. This is the size of the real usable data |
| 1094 | - in the chunk, plus the chunk header. */ |
| 1095 | -#ifdef USE_MTAG |
| 1096 | -#define CHUNK_AVAILABLE_SIZE(p) \ |
| 1097 | - ((chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ)) \ |
| 1098 | - & mtag_granule_mask) |
| 1099 | -#else |
| 1100 | -#define CHUNK_AVAILABLE_SIZE(p) \ |
| 1101 | - (chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ)) |
| 1102 | -#endif |
| 1103 | + in the chunk, plus the chunk header. Note: If memory tagging is |
| 1104 | + enabled the layout changes to accomodate the granule size, this is |
| 1105 | + wasteful for small allocations so not done by default. The logic |
| 1106 | + does not work if chunk headers are not granule aligned. */ |
| 1107 | +_Static_assert (__MTAG_GRANULE_SIZE <= CHUNK_HDR_SZ, |
| 1108 | + "memory tagging is not supported with large granule."); |
| 1109 | +#define CHUNK_AVAILABLE_SIZE(p) \ |
| 1110 | + (__MTAG_GRANULE_SIZE > SIZE_SZ && __glibc_unlikely (mtag_enabled) ? \ |
| 1111 | + chunksize (p) : \ |
| 1112 | + chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ)) |
| 1113 | |
| 1114 | /* Check if REQ overflows when padded and aligned and if the resulting value |
| 1115 | is less than PTRDIFF_T. Returns TRUE and the requested size or MINSIZE in |
| 1116 | @@ -1353,7 +1353,6 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1) |
| 1117 | if (__glibc_unlikely (req > PTRDIFF_MAX)) |
| 1118 | return false; |
| 1119 | |
| 1120 | -#ifdef USE_MTAG |
| 1121 | /* When using tagged memory, we cannot share the end of the user |
| 1122 | block with the header for the next chunk, so ensure that we |
| 1123 | allocate blocks that are rounded up to the granule size. Take |
| 1124 | @@ -1361,8 +1360,9 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1) |
| 1125 | number. Ideally, this would be part of request2size(), but that |
| 1126 | must be a macro that produces a compile time constant if passed |
| 1127 | a constant literal. */ |
| 1128 | - req = (req + ~mtag_granule_mask) & mtag_granule_mask; |
| 1129 | -#endif |
| 1130 | + if (__glibc_unlikely (mtag_enabled)) |
| 1131 | + req = (req + (__MTAG_GRANULE_SIZE - 1)) & |
| 1132 | + ~(size_t)(__MTAG_GRANULE_SIZE - 1); |
| 1133 | |
| 1134 | *sz = request2size (req); |
| 1135 | return true; |
| 1136 | @@ -5112,14 +5112,8 @@ musable (void *mem) |
| 1137 | result = chunksize (p) - CHUNK_HDR_SZ; |
| 1138 | } |
| 1139 | else if (inuse (p)) |
| 1140 | - result = chunksize (p) - SIZE_SZ; |
| 1141 | + result = CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ; |
| 1142 | |
| 1143 | -#ifdef USE_MTAG |
| 1144 | - /* The usable space may be reduced if memory tagging is needed, |
| 1145 | - since we cannot share the user-space data with malloc's internal |
| 1146 | - data structure. */ |
| 1147 | - result &= mtag_granule_mask; |
| 1148 | -#endif |
| 1149 | return result; |
| 1150 | } |
| 1151 | return 0; |
| 1152 | -- |
| 1153 | 2.25.1 |
| 1154 | |
| 1155 | |
| 1156 | From 66de173bf919e601e408dc78772c6841ad6388ab Mon Sep 17 00:00:00 2001 |
| 1157 | From: Szabolcs Nagy <szabolcs.nagy@arm.com> |
| 1158 | Date: Wed, 17 Feb 2021 10:15:18 +0000 |
| 1159 | Subject: [PATCH 11/11] malloc: Use mtag_enabled instead of USE_MTAG |
| 1160 | |
| 1161 | Use the runtime check where possible: it should not cause slow down in |
| 1162 | the !USE_MTAG case since then mtag_enabled is constant false, but it |
| 1163 | allows compiling the tagging logic so it's less likely to break or |
| 1164 | diverge when developers only test the !USE_MTAG case. |
| 1165 | |
| 1166 | Reviewed-by: DJ Delorie <dj@redhat.com> |
| 1167 | --- |
| 1168 | malloc/hooks.c | 10 ++++------ |
| 1169 | malloc/malloc.c | 10 ++++------ |
| 1170 | 2 files changed, 8 insertions(+), 12 deletions(-) |
| 1171 | |
| 1172 | diff --git a/malloc/hooks.c b/malloc/hooks.c |
| 1173 | index d8e304c31c..9474e199c3 100644 |
| 1174 | --- a/malloc/hooks.c |
| 1175 | +++ b/malloc/hooks.c |
| 1176 | @@ -262,11 +262,10 @@ free_check (void *mem, const void *caller) |
| 1177 | |
| 1178 | int err = errno; |
| 1179 | |
| 1180 | -#ifdef USE_MTAG |
| 1181 | /* Quickly check that the freed pointer matches the tag for the memory. |
| 1182 | This gives a useful double-free detection. */ |
| 1183 | - *(volatile char *)mem; |
| 1184 | -#endif |
| 1185 | + if (__glibc_unlikely (mtag_enabled)) |
| 1186 | + *(volatile char *)mem; |
| 1187 | |
| 1188 | __libc_lock_lock (main_arena.mutex); |
| 1189 | p = mem2chunk_check (mem, NULL); |
| 1190 | @@ -310,11 +309,10 @@ realloc_check (void *oldmem, size_t bytes, const void *caller) |
| 1191 | return NULL; |
| 1192 | } |
| 1193 | |
| 1194 | -#ifdef USE_MTAG |
| 1195 | /* Quickly check that the freed pointer matches the tag for the memory. |
| 1196 | This gives a useful double-free detection. */ |
| 1197 | - *(volatile char *)oldmem; |
| 1198 | -#endif |
| 1199 | + if (__glibc_unlikely (mtag_enabled)) |
| 1200 | + *(volatile char *)oldmem; |
| 1201 | |
| 1202 | __libc_lock_lock (main_arena.mutex); |
| 1203 | const mchunkptr oldp = mem2chunk_check (oldmem, &magic_p); |
| 1204 | diff --git a/malloc/malloc.c b/malloc/malloc.c |
| 1205 | index 849bd8e2c9..36583120ce 100644 |
| 1206 | --- a/malloc/malloc.c |
| 1207 | +++ b/malloc/malloc.c |
| 1208 | @@ -3286,11 +3286,10 @@ __libc_free (void *mem) |
| 1209 | if (mem == 0) /* free(0) has no effect */ |
| 1210 | return; |
| 1211 | |
| 1212 | -#ifdef USE_MTAG |
| 1213 | /* Quickly check that the freed pointer matches the tag for the memory. |
| 1214 | This gives a useful double-free detection. */ |
| 1215 | - *(volatile char *)mem; |
| 1216 | -#endif |
| 1217 | + if (__glibc_unlikely (mtag_enabled)) |
| 1218 | + *(volatile char *)mem; |
| 1219 | |
| 1220 | int err = errno; |
| 1221 | |
| 1222 | @@ -3352,11 +3351,10 @@ __libc_realloc (void *oldmem, size_t bytes) |
| 1223 | if (oldmem == 0) |
| 1224 | return __libc_malloc (bytes); |
| 1225 | |
| 1226 | -#ifdef USE_MTAG |
| 1227 | /* Perform a quick check to ensure that the pointer's tag matches the |
| 1228 | memory's tag. */ |
| 1229 | - *(volatile char*) oldmem; |
| 1230 | -#endif |
| 1231 | + if (__glibc_unlikely (mtag_enabled)) |
| 1232 | + *(volatile char*) oldmem; |
| 1233 | |
| 1234 | /* chunk corresponding to oldmem */ |
| 1235 | const mchunkptr oldp = mem2chunk (oldmem); |
| 1236 | -- |
| 1237 | 2.25.1 |
| 1238 | |